27611 lines
548 KiB
JSON
27611 lines
548 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.926099870528801,
|
|
"eval_steps": 800,
|
|
"global_step": 22800,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 8.097165991902834e-09,
|
|
"loss": 3.5744,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 4.048582995951417e-08,
|
|
"loss": 3.6858,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 8.097165991902834e-08,
|
|
"loss": 3.6628,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 1.214574898785425e-07,
|
|
"loss": 3.3928,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 1.6194331983805668e-07,
|
|
"loss": 3.0895,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 2.0242914979757083e-07,
|
|
"loss": 2.6585,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 2.42914979757085e-07,
|
|
"loss": 2.1214,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 2.8340080971659917e-07,
|
|
"loss": 1.7237,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 3.2388663967611335e-07,
|
|
"loss": 1.2235,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 3.6437246963562754e-07,
|
|
"loss": 1.1867,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 4.0485829959514166e-07,
|
|
"loss": 1.1259,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 4.4534412955465585e-07,
|
|
"loss": 1.0245,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 4.8582995951417e-07,
|
|
"loss": 0.977,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 5.263157894736842e-07,
|
|
"loss": 0.9554,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 5.668016194331983e-07,
|
|
"loss": 0.9017,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 6.072874493927125e-07,
|
|
"loss": 0.8987,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 6.477732793522267e-07,
|
|
"loss": 0.8863,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 6.882591093117408e-07,
|
|
"loss": 0.9425,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 7.287449392712551e-07,
|
|
"loss": 0.91,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 7.692307692307693e-07,
|
|
"loss": 0.848,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 8.097165991902833e-07,
|
|
"loss": 0.8213,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 8.502024291497975e-07,
|
|
"loss": 0.8434,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 8.906882591093117e-07,
|
|
"loss": 0.8409,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 9.311740890688259e-07,
|
|
"loss": 0.8398,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 9.7165991902834e-07,
|
|
"loss": 0.7942,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.0121457489878542e-06,
|
|
"loss": 0.8221,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.0526315789473683e-06,
|
|
"loss": 0.8038,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.0931174089068826e-06,
|
|
"loss": 0.803,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.1336032388663967e-06,
|
|
"loss": 0.7935,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.1740890688259108e-06,
|
|
"loss": 0.8251,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.214574898785425e-06,
|
|
"loss": 0.8082,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.2550607287449393e-06,
|
|
"loss": 0.7897,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.2955465587044534e-06,
|
|
"loss": 0.8286,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.3360323886639675e-06,
|
|
"loss": 0.7742,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.3765182186234816e-06,
|
|
"loss": 0.787,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.4170040485829959e-06,
|
|
"loss": 0.7862,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.4574898785425101e-06,
|
|
"loss": 0.7721,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.4979757085020242e-06,
|
|
"loss": 0.7554,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.5384615384615385e-06,
|
|
"loss": 0.7941,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.5789473684210526e-06,
|
|
"loss": 0.7759,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.6194331983805667e-06,
|
|
"loss": 0.7249,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.6599190283400807e-06,
|
|
"loss": 0.7673,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.700404858299595e-06,
|
|
"loss": 0.7922,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.7408906882591093e-06,
|
|
"loss": 0.7546,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.7813765182186234e-06,
|
|
"loss": 0.7709,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.8218623481781377e-06,
|
|
"loss": 0.7383,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.8623481781376518e-06,
|
|
"loss": 0.7608,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9028340080971658e-06,
|
|
"loss": 0.7663,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.94331983805668e-06,
|
|
"loss": 0.7567,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.983805668016194e-06,
|
|
"loss": 0.751,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999999252295637e-06,
|
|
"loss": 0.8148,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999994682991603e-06,
|
|
"loss": 0.7634,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999998595977674e-06,
|
|
"loss": 0.7448,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999997308265467e-06,
|
|
"loss": 0.7508,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999995605163075e-06,
|
|
"loss": 0.7696,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999934866712048e-06,
|
|
"loss": 0.7676,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999909527907367e-06,
|
|
"loss": 0.7601,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999880035227236e-06,
|
|
"loss": 0.7779,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999846388683895e-06,
|
|
"loss": 0.7768,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999808588291327e-06,
|
|
"loss": 0.7713,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999976663406524e-06,
|
|
"loss": 0.7666,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999972052602305e-06,
|
|
"loss": 0.742,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999967026418392e-06,
|
|
"loss": 0.783,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999961584856872e-06,
|
|
"loss": 0.7269,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999557279200056e-06,
|
|
"loss": 0.7336,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999494556102263e-06,
|
|
"loss": 0.7072,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999427679301387e-06,
|
|
"loss": 0.7709,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999935664882522e-06,
|
|
"loss": 0.7237,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999281464703247e-06,
|
|
"loss": 0.719,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999920212696672e-06,
|
|
"loss": 0.748,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999911863564859e-06,
|
|
"loss": 0.7167,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9999030990783527e-06,
|
|
"loss": 0.7151,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.999893919240795e-06,
|
|
"loss": 0.7095,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 1.9998843240559986e-06,
|
|
"loss": 0.7703,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9998743135279497e-06,
|
|
"loss": 0.7456,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999863887660806e-06,
|
|
"loss": 0.7532,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999853046458899e-06,
|
|
"loss": 0.7014,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9998417899267313e-06,
|
|
"loss": 0.7629,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999830118068979e-06,
|
|
"loss": 0.7329,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999818030890491e-06,
|
|
"loss": 0.723,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999805528396288e-06,
|
|
"loss": 0.7549,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9997926105915627e-06,
|
|
"loss": 0.7121,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999779277481682e-06,
|
|
"loss": 0.7506,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9997655290721834e-06,
|
|
"loss": 0.7284,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9997513653687786e-06,
|
|
"loss": 0.7344,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999736786377351e-06,
|
|
"loss": 0.7684,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9997217921039567e-06,
|
|
"loss": 0.7427,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9997063825548237e-06,
|
|
"loss": 0.7139,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9996905577363533e-06,
|
|
"loss": 0.761,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9996743176551186e-06,
|
|
"loss": 0.7545,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999657662317866e-06,
|
|
"loss": 0.7431,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999640591731515e-06,
|
|
"loss": 0.7225,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999623105903154e-06,
|
|
"loss": 0.7284,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999605204840049e-06,
|
|
"loss": 0.76,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9995868885496343e-06,
|
|
"loss": 0.7413,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9995681570395195e-06,
|
|
"loss": 0.7837,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9995490103174847e-06,
|
|
"loss": 0.7347,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999529448391483e-06,
|
|
"loss": 0.7576,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9995094712696413e-06,
|
|
"loss": 0.7665,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9994890789602576e-06,
|
|
"loss": 0.7353,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999468271471802e-06,
|
|
"loss": 0.7344,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9994470488129185e-06,
|
|
"loss": 0.7476,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9994254109924223e-06,
|
|
"loss": 0.7257,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9994033580193017e-06,
|
|
"loss": 0.7306,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999380889902718e-06,
|
|
"loss": 0.7115,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9993580066520034e-06,
|
|
"loss": 0.7452,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9993347082766636e-06,
|
|
"loss": 0.7523,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9993109947863764e-06,
|
|
"loss": 0.7091,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999286866190993e-06,
|
|
"loss": 0.7383,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999262322500535e-06,
|
|
"loss": 0.7043,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9992373637251982e-06,
|
|
"loss": 0.7098,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999211989875351e-06,
|
|
"loss": 0.7142,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999186200961532e-06,
|
|
"loss": 0.7424,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9991599969944552e-06,
|
|
"loss": 0.7348,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9991333779850043e-06,
|
|
"loss": 0.7126,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999106343944237e-06,
|
|
"loss": 0.7341,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9990788948833833e-06,
|
|
"loss": 0.7445,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999051030813845e-06,
|
|
"loss": 0.7181,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.999022751747197e-06,
|
|
"loss": 0.7295,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.998994057695185e-06,
|
|
"loss": 0.7159,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.99896494866973e-06,
|
|
"loss": 0.6844,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9989354246829222e-06,
|
|
"loss": 0.7511,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9989054857470267e-06,
|
|
"loss": 0.7322,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.9988751318744787e-06,
|
|
"loss": 0.7829,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998844363077888e-06,
|
|
"loss": 0.7229,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998813179370035e-06,
|
|
"loss": 0.738,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9987815807638733e-06,
|
|
"loss": 0.6934,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9987495672725294e-06,
|
|
"loss": 0.7005,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9987171389093e-06,
|
|
"loss": 0.7692,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998684295687657e-06,
|
|
"loss": 0.7101,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998651037621242e-06,
|
|
"loss": 0.7813,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9986173647238715e-06,
|
|
"loss": 0.7526,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9985832770095313e-06,
|
|
"loss": 0.7235,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998548774492382e-06,
|
|
"loss": 0.7201,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9985138571867557e-06,
|
|
"loss": 0.7303,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998478525107157e-06,
|
|
"loss": 0.7375,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998442778268262e-06,
|
|
"loss": 0.7123,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.99840661668492e-06,
|
|
"loss": 0.7541,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998370040372151e-06,
|
|
"loss": 0.7685,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.99833304934515e-06,
|
|
"loss": 0.7029,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9982956436192827e-06,
|
|
"loss": 0.7797,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9982578232100866e-06,
|
|
"loss": 0.7326,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9982195881332714e-06,
|
|
"loss": 0.773,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9981809384047207e-06,
|
|
"loss": 0.741,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9981418740404886e-06,
|
|
"loss": 0.7518,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998102395056802e-06,
|
|
"loss": 0.7338,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998062501470061e-06,
|
|
"loss": 0.7192,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.998022193296836e-06,
|
|
"loss": 0.7429,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9979814705538715e-06,
|
|
"loss": 0.6953,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.997940333258083e-06,
|
|
"loss": 0.7265,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9978987814265583e-06,
|
|
"loss": 0.7105,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.997856815076558e-06,
|
|
"loss": 0.6994,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9978144342255147e-06,
|
|
"loss": 0.7008,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9977716388910325e-06,
|
|
"loss": 0.7301,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.997728429090889e-06,
|
|
"loss": 0.7662,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9976848048430323e-06,
|
|
"loss": 0.7428,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9976407661655844e-06,
|
|
"loss": 0.706,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.997596313076838e-06,
|
|
"loss": 0.6853,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9975514455952584e-06,
|
|
"loss": 0.7363,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9975061637394834e-06,
|
|
"loss": 0.7217,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.997460467528323e-06,
|
|
"loss": 0.7161,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"eval_loss": 0.6896045207977295,
|
|
"eval_runtime": 140.4315,
|
|
"eval_samples_per_second": 16.848,
|
|
"eval_steps_per_second": 2.813,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.997414356980759e-06,
|
|
"loss": 0.7911,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9973678321159443e-06,
|
|
"loss": 0.7037,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9973208929532063e-06,
|
|
"loss": 0.7083,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9972735395120418e-06,
|
|
"loss": 0.7183,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.997225771812122e-06,
|
|
"loss": 0.7227,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9971775898732893e-06,
|
|
"loss": 0.7271,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9971289937155577e-06,
|
|
"loss": 0.7271,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.997079983359113e-06,
|
|
"loss": 0.7065,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9970305588243145e-06,
|
|
"loss": 0.706,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9969807201316925e-06,
|
|
"loss": 0.7,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.9969304673019494e-06,
|
|
"loss": 0.7165,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.99687980035596e-06,
|
|
"loss": 0.729,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.996828719314771e-06,
|
|
"loss": 0.7199,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.996777224199601e-06,
|
|
"loss": 0.7041,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.99672531503184e-06,
|
|
"loss": 0.735,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.996672991833051e-06,
|
|
"loss": 0.7153,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.996620254624969e-06,
|
|
"loss": 0.714,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9965671034295e-06,
|
|
"loss": 0.7309,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.996513538268723e-06,
|
|
"loss": 0.7808,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9964595591648883e-06,
|
|
"loss": 0.7407,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9964051661404185e-06,
|
|
"loss": 0.6831,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9963503592179078e-06,
|
|
"loss": 0.7178,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.996295138420122e-06,
|
|
"loss": 0.7607,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9962395037700007e-06,
|
|
"loss": 0.747,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.996183455290653e-06,
|
|
"loss": 0.6911,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.996126993005361e-06,
|
|
"loss": 0.7038,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.996070116937579e-06,
|
|
"loss": 0.7195,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9960128271109326e-06,
|
|
"loss": 0.6974,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9959551235492195e-06,
|
|
"loss": 0.7399,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9958970062764095e-06,
|
|
"loss": 0.7475,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9958384753166437e-06,
|
|
"loss": 0.7091,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.995779530694236e-06,
|
|
"loss": 0.6908,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9957201724336704e-06,
|
|
"loss": 0.7052,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9956604005596043e-06,
|
|
"loss": 0.6963,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9956002150968667e-06,
|
|
"loss": 0.7064,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9955396160704582e-06,
|
|
"loss": 0.6804,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.99547860350555e-06,
|
|
"loss": 0.6759,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.995417177427488e-06,
|
|
"loss": 0.7175,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9953553378617866e-06,
|
|
"loss": 0.6926,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.995293084834134e-06,
|
|
"loss": 0.7109,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9952304183703893e-06,
|
|
"loss": 0.7129,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9951673384965835e-06,
|
|
"loss": 0.7117,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.99510384523892e-06,
|
|
"loss": 0.7694,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.995039938623773e-06,
|
|
"loss": 0.7381,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9949756186776893e-06,
|
|
"loss": 0.722,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9949108854273855e-06,
|
|
"loss": 0.7288,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9948457388997528e-06,
|
|
"loss": 0.7045,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.994780179121851e-06,
|
|
"loss": 0.7623,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.994714206120914e-06,
|
|
"loss": 0.725,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9946478199243466e-06,
|
|
"loss": 0.7203,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9945810205597246e-06,
|
|
"loss": 0.7011,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9945138080547957e-06,
|
|
"loss": 0.6946,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.99444618243748e-06,
|
|
"loss": 0.7151,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.994378143735868e-06,
|
|
"loss": 0.7074,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9943096919782225e-06,
|
|
"loss": 0.7,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.994240827192978e-06,
|
|
"loss": 0.6957,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9941715494087408e-06,
|
|
"loss": 0.7348,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9941018586542866e-06,
|
|
"loss": 0.6984,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9940317549585665e-06,
|
|
"loss": 0.7252,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9939612383506993e-06,
|
|
"loss": 0.7706,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.993890308859978e-06,
|
|
"loss": 0.7261,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9938189665158654e-06,
|
|
"loss": 0.6879,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9937472113479966e-06,
|
|
"loss": 0.7088,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9936750433861787e-06,
|
|
"loss": 0.7428,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.993602462660389e-06,
|
|
"loss": 0.7111,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.993529469200777e-06,
|
|
"loss": 0.7027,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.993456063037664e-06,
|
|
"loss": 0.6969,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9933822442015416e-06,
|
|
"loss": 0.7343,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.993308012723074e-06,
|
|
"loss": 0.7174,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.993233368633096e-06,
|
|
"loss": 0.6997,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.993158311962614e-06,
|
|
"loss": 0.693,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9930828427428066e-06,
|
|
"loss": 0.7136,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9930069610050224e-06,
|
|
"loss": 0.7211,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9929306667807823e-06,
|
|
"loss": 0.7144,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.992853960101778e-06,
|
|
"loss": 0.6787,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9927768409998733e-06,
|
|
"loss": 0.7348,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.992699309507102e-06,
|
|
"loss": 0.6718,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.992621365655671e-06,
|
|
"loss": 0.7146,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9925430094779566e-06,
|
|
"loss": 0.6982,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9924642410065075e-06,
|
|
"loss": 0.7379,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.992385060274044e-06,
|
|
"loss": 0.6983,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9923054673134564e-06,
|
|
"loss": 0.7893,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.992225462157807e-06,
|
|
"loss": 0.714,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.99214504484033e-06,
|
|
"loss": 0.7394,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9920642153944288e-06,
|
|
"loss": 0.7238,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9919829738536806e-06,
|
|
"loss": 0.6847,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.991901320251831e-06,
|
|
"loss": 0.6936,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9918192546227995e-06,
|
|
"loss": 0.7271,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.991736777000675e-06,
|
|
"loss": 0.7416,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9916538874197176e-06,
|
|
"loss": 0.7637,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9915705859143594e-06,
|
|
"loss": 0.6722,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9914868725192025e-06,
|
|
"loss": 0.6943,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.991402747269022e-06,
|
|
"loss": 0.7433,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.991318210198761e-06,
|
|
"loss": 0.7015,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.991233261343537e-06,
|
|
"loss": 0.6772,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9911479007386364e-06,
|
|
"loss": 0.7278,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.991062128419517e-06,
|
|
"loss": 0.7471,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9909759444218085e-06,
|
|
"loss": 0.7234,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9908893487813106e-06,
|
|
"loss": 0.7118,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.990802341533994e-06,
|
|
"loss": 0.7351,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9907149227160016e-06,
|
|
"loss": 0.7084,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9906270923636457e-06,
|
|
"loss": 0.7174,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9905388505134107e-06,
|
|
"loss": 0.6935,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.990450197201951e-06,
|
|
"loss": 0.7004,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.990361132466093e-06,
|
|
"loss": 0.7077,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9902716563428335e-06,
|
|
"loss": 0.7226,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9901817688693395e-06,
|
|
"loss": 0.7025,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.99009147008295e-06,
|
|
"loss": 0.7139,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9900007600211735e-06,
|
|
"loss": 0.6609,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9899096387216914e-06,
|
|
"loss": 0.7452,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9898181062223536e-06,
|
|
"loss": 0.7111,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9897261625611822e-06,
|
|
"loss": 0.6925,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9896338077763704e-06,
|
|
"loss": 0.7097,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.989541041906281e-06,
|
|
"loss": 0.7146,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9894478649894484e-06,
|
|
"loss": 0.704,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.989354277064577e-06,
|
|
"loss": 0.7466,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9892602781705427e-06,
|
|
"loss": 0.6958,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9891658683463922e-06,
|
|
"loss": 0.7421,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.989071047631342e-06,
|
|
"loss": 0.6658,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.98897581606478e-06,
|
|
"loss": 0.6868,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.988880173686265e-06,
|
|
"loss": 0.7437,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.988784120535525e-06,
|
|
"loss": 0.7484,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.988687656652461e-06,
|
|
"loss": 0.7063,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9885907820771415e-06,
|
|
"loss": 0.713,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.988493496849809e-06,
|
|
"loss": 0.7313,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9883958010108736e-06,
|
|
"loss": 0.6987,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9882976946009186e-06,
|
|
"loss": 0.7089,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9881991776606956e-06,
|
|
"loss": 0.6492,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9881002502311285e-06,
|
|
"loss": 0.6538,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9880009123533095e-06,
|
|
"loss": 0.7096,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9879011640685043e-06,
|
|
"loss": 0.7329,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9878010054181463e-06,
|
|
"loss": 0.7414,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9877004364438414e-06,
|
|
"loss": 0.7089,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.987599457187365e-06,
|
|
"loss": 0.738,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9874980676906617e-06,
|
|
"loss": 0.7078,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9873962679958494e-06,
|
|
"loss": 0.6987,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.987294058145214e-06,
|
|
"loss": 0.7456,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.987191438181213e-06,
|
|
"loss": 0.7402,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.987088408146473e-06,
|
|
"loss": 0.7308,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.986984968083793e-06,
|
|
"loss": 0.7197,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9868811180361402e-06,
|
|
"loss": 0.7386,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9867768580466536e-06,
|
|
"loss": 0.7024,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.986672188158641e-06,
|
|
"loss": 0.7241,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9865671084155826e-06,
|
|
"loss": 0.7006,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.986461618861127e-06,
|
|
"loss": 0.7035,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.986355719539093e-06,
|
|
"loss": 0.723,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9862494104934717e-06,
|
|
"loss": 0.7184,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9861426917684214e-06,
|
|
"loss": 0.7018,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.986035563408273e-06,
|
|
"loss": 0.6943,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9859280254575268e-06,
|
|
"loss": 0.7434,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9858200779608526e-06,
|
|
"loss": 0.7122,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9857117209630913e-06,
|
|
"loss": 0.7187,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9856029545092536e-06,
|
|
"loss": 0.6825,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.985493778644519e-06,
|
|
"loss": 0.6964,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9853841934142396e-06,
|
|
"loss": 0.7437,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9852741988639356e-06,
|
|
"loss": 0.7125,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9851637950392974e-06,
|
|
"loss": 0.7241,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9850529819861863e-06,
|
|
"loss": 0.7113,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.984941759750633e-06,
|
|
"loss": 0.6725,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.984830128378838e-06,
|
|
"loss": 0.7166,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"eval_loss": 0.6776626706123352,
|
|
"eval_runtime": 140.3492,
|
|
"eval_samples_per_second": 16.858,
|
|
"eval_steps_per_second": 2.814,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9847180879171727e-06,
|
|
"loss": 0.7111,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9846056384121768e-06,
|
|
"loss": 0.7004,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9844927799105612e-06,
|
|
"loss": 0.7221,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.984379512459207e-06,
|
|
"loss": 0.7363,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.984265836105163e-06,
|
|
"loss": 0.7107,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9841517508956506e-06,
|
|
"loss": 0.7081,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9840372568780594e-06,
|
|
"loss": 0.6796,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9839223540999496e-06,
|
|
"loss": 0.7207,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9838070426090505e-06,
|
|
"loss": 0.716,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.983691322453261e-06,
|
|
"loss": 0.7306,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.983575193680651e-06,
|
|
"loss": 0.724,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.983458656339459e-06,
|
|
"loss": 0.7447,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9833417104780942e-06,
|
|
"loss": 0.6929,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9832243561451346e-06,
|
|
"loss": 0.7228,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9831065933893275e-06,
|
|
"loss": 0.6824,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.982988422259591e-06,
|
|
"loss": 0.7056,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9828698428050123e-06,
|
|
"loss": 0.6943,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.982750855074849e-06,
|
|
"loss": 0.7101,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9826314591185263e-06,
|
|
"loss": 0.6786,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9825116549856408e-06,
|
|
"loss": 0.6954,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9823914427259584e-06,
|
|
"loss": 0.7165,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.982270822389414e-06,
|
|
"loss": 0.7208,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9821497940261124e-06,
|
|
"loss": 0.6981,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.982028357686327e-06,
|
|
"loss": 0.6914,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9819065134205026e-06,
|
|
"loss": 0.7291,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9817842612792513e-06,
|
|
"loss": 0.6882,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.981661601313356e-06,
|
|
"loss": 0.685,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.981538533573768e-06,
|
|
"loss": 0.6954,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9814150581116093e-06,
|
|
"loss": 0.7104,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9812911749781705e-06,
|
|
"loss": 0.7026,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.981166884224911e-06,
|
|
"loss": 0.6907,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.981042185903461e-06,
|
|
"loss": 0.6988,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.980917080065618e-06,
|
|
"loss": 0.6894,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.98079156676335e-06,
|
|
"loss": 0.7308,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9806656460487955e-06,
|
|
"loss": 0.6688,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9805393179742596e-06,
|
|
"loss": 0.7028,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.980412582592218e-06,
|
|
"loss": 0.6982,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.980285439955316e-06,
|
|
"loss": 0.7326,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.980157890116367e-06,
|
|
"loss": 0.7204,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.980029933128354e-06,
|
|
"loss": 0.7016,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9799015690444302e-06,
|
|
"loss": 0.7076,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9797727979179156e-06,
|
|
"loss": 0.7121,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9796436198023016e-06,
|
|
"loss": 0.7204,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9795140347512472e-06,
|
|
"loss": 0.7178,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.979384042818581e-06,
|
|
"loss": 0.7223,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.979253644058301e-06,
|
|
"loss": 0.7066,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.979122838524573e-06,
|
|
"loss": 0.6873,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9789916262717328e-06,
|
|
"loss": 0.6822,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.9788600073542848e-06,
|
|
"loss": 0.6947,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.978727981826902e-06,
|
|
"loss": 0.7092,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.978595549744427e-06,
|
|
"loss": 0.7166,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9784627111618715e-06,
|
|
"loss": 0.6842,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9783294661344145e-06,
|
|
"loss": 0.7161,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.978195814717405e-06,
|
|
"loss": 0.6881,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.978061756966361e-06,
|
|
"loss": 0.7342,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.977927292936969e-06,
|
|
"loss": 0.6767,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9777924226850842e-06,
|
|
"loss": 0.7096,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.97765714626673e-06,
|
|
"loss": 0.694,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.977521463738099e-06,
|
|
"loss": 0.7152,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9773853751555537e-06,
|
|
"loss": 0.6618,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.977248880575623e-06,
|
|
"loss": 0.689,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9771119800550054e-06,
|
|
"loss": 0.6892,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9769746736505694e-06,
|
|
"loss": 0.7179,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.97683696141935e-06,
|
|
"loss": 0.6888,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9766988434185514e-06,
|
|
"loss": 0.7041,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.976560319705547e-06,
|
|
"loss": 0.6969,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9764213903378786e-06,
|
|
"loss": 0.7162,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9762820553732563e-06,
|
|
"loss": 0.7178,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.976142314869558e-06,
|
|
"loss": 0.7309,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.976002168884831e-06,
|
|
"loss": 0.7198,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.975861617477291e-06,
|
|
"loss": 0.7131,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9757206607053218e-06,
|
|
"loss": 0.7087,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9755792986274755e-06,
|
|
"loss": 0.6708,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.975437531302472e-06,
|
|
"loss": 0.7141,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.975295358789201e-06,
|
|
"loss": 0.7152,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9751527811467195e-06,
|
|
"loss": 0.7172,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9750097984342534e-06,
|
|
"loss": 0.7472,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9748664107111962e-06,
|
|
"loss": 0.7129,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9747226180371094e-06,
|
|
"loss": 0.7066,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.974578420471724e-06,
|
|
"loss": 0.7049,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9744338180749376e-06,
|
|
"loss": 0.7214,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9742888109068175e-06,
|
|
"loss": 0.7469,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9741433990275987e-06,
|
|
"loss": 0.7119,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.973997582497683e-06,
|
|
"loss": 0.7487,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.973851361377642e-06,
|
|
"loss": 0.7259,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9737047357282143e-06,
|
|
"loss": 0.7164,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9735577056103074e-06,
|
|
"loss": 0.7639,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9734102710849956e-06,
|
|
"loss": 0.7181,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.973262432213523e-06,
|
|
"loss": 0.6833,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.973114189057299e-06,
|
|
"loss": 0.7263,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9729655416779044e-06,
|
|
"loss": 0.6957,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.972816490137085e-06,
|
|
"loss": 0.716,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9726670344967554e-06,
|
|
"loss": 0.699,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9725171748189987e-06,
|
|
"loss": 0.6858,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9723669111660645e-06,
|
|
"loss": 0.7036,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9722162436003715e-06,
|
|
"loss": 0.6958,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.9720651721845062e-06,
|
|
"loss": 0.7303,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 1.971913696981222e-06,
|
|
"loss": 0.6836,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.97176181805344e-06,
|
|
"loss": 0.7349,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9716095354642493e-06,
|
|
"loss": 0.7105,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.971456849276907e-06,
|
|
"loss": 0.7126,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.971303759554838e-06,
|
|
"loss": 0.6959,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9711502663616344e-06,
|
|
"loss": 0.6699,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9709963697610554e-06,
|
|
"loss": 0.6671,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.970842069817029e-06,
|
|
"loss": 0.7101,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.970687366593649e-06,
|
|
"loss": 0.6985,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.970532260155179e-06,
|
|
"loss": 0.7175,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.970376750566048e-06,
|
|
"loss": 0.7251,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9702208378908537e-06,
|
|
"loss": 0.6949,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.970064522194361e-06,
|
|
"loss": 0.7573,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9699078035415014e-06,
|
|
"loss": 0.6922,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9697506819973753e-06,
|
|
"loss": 0.7215,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9695931576272493e-06,
|
|
"loss": 0.7252,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9694352304965572e-06,
|
|
"loss": 0.7202,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9692769006709013e-06,
|
|
"loss": 0.6932,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.96911816821605e-06,
|
|
"loss": 0.6522,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9689590331979394e-06,
|
|
"loss": 0.6933,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.968799495682673e-06,
|
|
"loss": 0.6825,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.968639555736521e-06,
|
|
"loss": 0.7037,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.968479213425922e-06,
|
|
"loss": 0.6911,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9683184688174795e-06,
|
|
"loss": 0.7048,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9681573219779658e-06,
|
|
"loss": 0.7273,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9679957729743204e-06,
|
|
"loss": 0.7105,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9678338218736486e-06,
|
|
"loss": 0.6832,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.967671468743224e-06,
|
|
"loss": 0.6829,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9675087136504865e-06,
|
|
"loss": 0.6971,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9673455566630437e-06,
|
|
"loss": 0.7298,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9671819978486688e-06,
|
|
"loss": 0.6841,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.967018037275303e-06,
|
|
"loss": 0.6838,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.966853675011054e-06,
|
|
"loss": 0.7053,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.966688911124197e-06,
|
|
"loss": 0.6886,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9665237456831725e-06,
|
|
"loss": 0.7032,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9663581787565898e-06,
|
|
"loss": 0.7159,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.966192210413223e-06,
|
|
"loss": 0.6896,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.966025840722015e-06,
|
|
"loss": 0.7004,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9658590697520735e-06,
|
|
"loss": 0.6726,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.965691897572674e-06,
|
|
"loss": 0.6911,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9655243242532584e-06,
|
|
"loss": 0.7235,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9653563498634347e-06,
|
|
"loss": 0.7014,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9651879744729786e-06,
|
|
"loss": 0.7192,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9650191981518317e-06,
|
|
"loss": 0.65,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.964850020970102e-06,
|
|
"loss": 0.7128,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9646804429980644e-06,
|
|
"loss": 0.7265,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.96451046430616e-06,
|
|
"loss": 0.7038,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.964340084964997e-06,
|
|
"loss": 0.7132,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.964169305045348e-06,
|
|
"loss": 0.6968,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.9639981246181555e-06,
|
|
"loss": 0.7456,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9638265437545247e-06,
|
|
"loss": 0.7403,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9636545625257297e-06,
|
|
"loss": 0.6893,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.96348218100321e-06,
|
|
"loss": 0.7341,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9633093992585706e-06,
|
|
"loss": 0.7459,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.963136217363585e-06,
|
|
"loss": 0.716,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9629626353901897e-06,
|
|
"loss": 0.7086,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9627886534104903e-06,
|
|
"loss": 0.7041,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.962614271496757e-06,
|
|
"loss": 0.7094,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.962439489721427e-06,
|
|
"loss": 0.6794,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.962264308157102e-06,
|
|
"loss": 0.6751,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9620887268765523e-06,
|
|
"loss": 0.7398,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.961912745952712e-06,
|
|
"loss": 0.6622,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.961736365458682e-06,
|
|
"loss": 0.7404,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"eval_loss": 0.6729753613471985,
|
|
"eval_runtime": 140.5864,
|
|
"eval_samples_per_second": 16.83,
|
|
"eval_steps_per_second": 2.81,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.96155958546773e-06,
|
|
"loss": 0.6928,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.961382406053288e-06,
|
|
"loss": 0.6832,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.961204827288955e-06,
|
|
"loss": 0.6775,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.961026849248496e-06,
|
|
"loss": 0.6995,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9608484720058416e-06,
|
|
"loss": 0.6508,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.960669695635087e-06,
|
|
"loss": 0.6865,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.960490520210496e-06,
|
|
"loss": 0.7273,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9603109458064955e-06,
|
|
"loss": 0.6513,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9601309724976795e-06,
|
|
"loss": 0.7051,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9599506003588068e-06,
|
|
"loss": 0.7164,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9597698294648034e-06,
|
|
"loss": 0.7269,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.959588659890759e-06,
|
|
"loss": 0.7082,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9594070917119306e-06,
|
|
"loss": 0.7164,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9592251250037394e-06,
|
|
"loss": 0.7156,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9590427598417733e-06,
|
|
"loss": 0.6603,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.958859996301785e-06,
|
|
"loss": 0.6811,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.958676834459693e-06,
|
|
"loss": 0.701,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9584932743915807e-06,
|
|
"loss": 0.6546,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9583093161736975e-06,
|
|
"loss": 0.7022,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9581249598824588e-06,
|
|
"loss": 0.7066,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.957940205594444e-06,
|
|
"loss": 0.6942,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.957755053386398e-06,
|
|
"loss": 0.6983,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.957569503335232e-06,
|
|
"loss": 0.7491,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.957383555518022e-06,
|
|
"loss": 0.6916,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9571972100120087e-06,
|
|
"loss": 0.7035,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9570104668945986e-06,
|
|
"loss": 0.6987,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.956823326243363e-06,
|
|
"loss": 0.7035,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.956635788136039e-06,
|
|
"loss": 0.6854,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9564478526505276e-06,
|
|
"loss": 0.7194,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9562595198648956e-06,
|
|
"loss": 0.7026,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9560707898573757e-06,
|
|
"loss": 0.6887,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9558816627063636e-06,
|
|
"loss": 0.7127,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9556921384904217e-06,
|
|
"loss": 0.7126,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9555022172882767e-06,
|
|
"loss": 0.671,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9553118991788203e-06,
|
|
"loss": 0.7004,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9551211842411083e-06,
|
|
"loss": 0.7303,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 1.9549300725543627e-06,
|
|
"loss": 0.7571,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9547385641979696e-06,
|
|
"loss": 0.6688,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9545466592514795e-06,
|
|
"loss": 0.7083,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9543543577946086e-06,
|
|
"loss": 0.7426,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9541616599072365e-06,
|
|
"loss": 0.6722,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.953968565669409e-06,
|
|
"loss": 0.7215,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9537750751613354e-06,
|
|
"loss": 0.7354,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.95358118846339e-06,
|
|
"loss": 0.7253,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9533869056561113e-06,
|
|
"loss": 0.6877,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9531922268202033e-06,
|
|
"loss": 0.72,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.952997152036534e-06,
|
|
"loss": 0.6894,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.952801681386135e-06,
|
|
"loss": 0.69,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9526058149502035e-06,
|
|
"loss": 0.689,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.952409552810101e-06,
|
|
"loss": 0.7495,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9522128950473525e-06,
|
|
"loss": 0.7378,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9520158417436486e-06,
|
|
"loss": 0.7041,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9518183929808433e-06,
|
|
"loss": 0.711,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9516205488409543e-06,
|
|
"loss": 0.6908,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9514223094061654e-06,
|
|
"loss": 0.7107,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.951223674758823e-06,
|
|
"loss": 0.7225,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9510246449814385e-06,
|
|
"loss": 0.7217,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.950825220156687e-06,
|
|
"loss": 0.7274,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.950625400367408e-06,
|
|
"loss": 0.6867,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9504251856966043e-06,
|
|
"loss": 0.7186,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.950224576227444e-06,
|
|
"loss": 0.6889,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9500235720432583e-06,
|
|
"loss": 0.7005,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9498221732275425e-06,
|
|
"loss": 0.6874,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9496203798639565e-06,
|
|
"loss": 0.7033,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9494181920363223e-06,
|
|
"loss": 0.707,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9492156098286283e-06,
|
|
"loss": 0.7348,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.949012633325024e-06,
|
|
"loss": 0.6636,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9488092626098254e-06,
|
|
"loss": 0.6975,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9486054977675103e-06,
|
|
"loss": 0.7124,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9484013388827204e-06,
|
|
"loss": 0.663,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9481967860402627e-06,
|
|
"loss": 0.6916,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9479918393251056e-06,
|
|
"loss": 0.7344,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9477864988223828e-06,
|
|
"loss": 0.6752,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9475807646173908e-06,
|
|
"loss": 0.6885,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9473746367955893e-06,
|
|
"loss": 0.6602,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.947168115442603e-06,
|
|
"loss": 0.6764,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.946961200644218e-06,
|
|
"loss": 0.6784,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.946753892486386e-06,
|
|
"loss": 0.7325,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.94654619105522e-06,
|
|
"loss": 0.6533,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.946338096436998e-06,
|
|
"loss": 0.6741,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9461296087181604e-06,
|
|
"loss": 0.7046,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9459207279853113e-06,
|
|
"loss": 0.6797,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.945711454325218e-06,
|
|
"loss": 0.7096,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9455017878248107e-06,
|
|
"loss": 0.698,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9452917285711834e-06,
|
|
"loss": 0.7036,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.945081276651593e-06,
|
|
"loss": 0.7205,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9448704321534588e-06,
|
|
"loss": 0.6583,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.944659195164364e-06,
|
|
"loss": 0.6757,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9444475657720545e-06,
|
|
"loss": 0.7033,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9442355440644394e-06,
|
|
"loss": 0.7029,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.944023130129591e-06,
|
|
"loss": 0.6993,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9438103240557446e-06,
|
|
"loss": 0.704,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9435971259312966e-06,
|
|
"loss": 0.7104,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9433835358448086e-06,
|
|
"loss": 0.7017,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.943169553885004e-06,
|
|
"loss": 0.7107,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9429551801407687e-06,
|
|
"loss": 0.6939,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.942740414701152e-06,
|
|
"loss": 0.6737,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9425252576553656e-06,
|
|
"loss": 0.7174,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.942309709092784e-06,
|
|
"loss": 0.7147,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9420937691029435e-06,
|
|
"loss": 0.7351,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9418774377755444e-06,
|
|
"loss": 0.6845,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9416607152004485e-06,
|
|
"loss": 0.7156,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9414436014676806e-06,
|
|
"loss": 0.7532,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.941226096667428e-06,
|
|
"loss": 0.7421,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.94100820089004e-06,
|
|
"loss": 0.7049,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.940789914226029e-06,
|
|
"loss": 0.7652,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.940571236766069e-06,
|
|
"loss": 0.6884,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9403521686009964e-06,
|
|
"loss": 0.6974,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.940132709821811e-06,
|
|
"loss": 0.7362,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9399128605196737e-06,
|
|
"loss": 0.7167,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9396926207859082e-06,
|
|
"loss": 0.6806,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.939471990712e-06,
|
|
"loss": 0.7069,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.939250970389597e-06,
|
|
"loss": 0.7185,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9390295599105085e-06,
|
|
"loss": 0.6996,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9388077593667075e-06,
|
|
"loss": 0.7304,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9385855688503276e-06,
|
|
"loss": 0.7209,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9383629884536644e-06,
|
|
"loss": 0.7077,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.938140018269176e-06,
|
|
"loss": 0.6838,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.937916658389483e-06,
|
|
"loss": 0.6784,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9376929089073665e-06,
|
|
"loss": 0.7185,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9374687699157703e-06,
|
|
"loss": 0.7146,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9372442415077994e-06,
|
|
"loss": 0.7131,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9370193237767213e-06,
|
|
"loss": 0.7014,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9367940168159648e-06,
|
|
"loss": 0.697,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9365683207191205e-06,
|
|
"loss": 0.7009,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9363422355799406e-06,
|
|
"loss": 0.7125,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.936115761492339e-06,
|
|
"loss": 0.6954,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.935888898550391e-06,
|
|
"loss": 0.7465,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.935661646848333e-06,
|
|
"loss": 0.7253,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.935434006480564e-06,
|
|
"loss": 0.7223,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.935205977541644e-06,
|
|
"loss": 0.7141,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9349775601262935e-06,
|
|
"loss": 0.732,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9347487543293958e-06,
|
|
"loss": 0.709,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.934519560245994e-06,
|
|
"loss": 0.724,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 1.9342899779712946e-06,
|
|
"loss": 0.6998,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.934060007600663e-06,
|
|
"loss": 0.6945,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9338296492296267e-06,
|
|
"loss": 0.7068,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9335989029538756e-06,
|
|
"loss": 0.691,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9333677688692595e-06,
|
|
"loss": 0.7063,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9331362470717886e-06,
|
|
"loss": 0.7518,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9329043376576357e-06,
|
|
"loss": 0.6806,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.932672040723134e-06,
|
|
"loss": 0.6878,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9324393563647772e-06,
|
|
"loss": 0.6961,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.932206284679221e-06,
|
|
"loss": 0.7125,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.931972825763281e-06,
|
|
"loss": 0.6893,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.931738979713934e-06,
|
|
"loss": 0.6821,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9315047466283177e-06,
|
|
"loss": 0.735,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9312701266037302e-06,
|
|
"loss": 0.6873,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.931035119737631e-06,
|
|
"loss": 0.6853,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9307997261276393e-06,
|
|
"loss": 0.7256,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9305639458715365e-06,
|
|
"loss": 0.7297,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.930327779067263e-06,
|
|
"loss": 0.7162,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9300912258129206e-06,
|
|
"loss": 0.7133,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9298542862067712e-06,
|
|
"loss": 0.7255,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9296169603472384e-06,
|
|
"loss": 0.6895,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.929379248332904e-06,
|
|
"loss": 0.729,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9291411502625123e-06,
|
|
"loss": 0.6971,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.928902666234967e-06,
|
|
"loss": 0.7232,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9286637963493323e-06,
|
|
"loss": 0.6653,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9284245407048323e-06,
|
|
"loss": 0.7309,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"eval_loss": 0.6701433062553406,
|
|
"eval_runtime": 140.1425,
|
|
"eval_samples_per_second": 16.883,
|
|
"eval_steps_per_second": 2.819,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.928184899400853e-06,
|
|
"loss": 0.7025,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9279448725369375e-06,
|
|
"loss": 0.711,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.927704460212792e-06,
|
|
"loss": 0.6813,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9274636625282816e-06,
|
|
"loss": 0.7209,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.927222479583431e-06,
|
|
"loss": 0.7279,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9269809114784265e-06,
|
|
"loss": 0.6779,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.926738958313612e-06,
|
|
"loss": 0.7237,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9264966201894945e-06,
|
|
"loss": 0.7123,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9262538972067375e-06,
|
|
"loss": 0.7147,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9260107894661666e-06,
|
|
"loss": 0.7104,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9257672970687673e-06,
|
|
"loss": 0.7059,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9255234201156834e-06,
|
|
"loss": 0.7259,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9252791587082195e-06,
|
|
"loss": 0.6778,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9250345129478396e-06,
|
|
"loss": 0.7456,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.924789482936168e-06,
|
|
"loss": 0.7254,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.924544068774987e-06,
|
|
"loss": 0.732,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.92429827056624e-06,
|
|
"loss": 0.6824,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9240520884120296e-06,
|
|
"loss": 0.6807,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.923805522414618e-06,
|
|
"loss": 0.6679,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.923558572676426e-06,
|
|
"loss": 0.6813,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9233112393000344e-06,
|
|
"loss": 0.6791,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9230635223881836e-06,
|
|
"loss": 0.6877,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.9228154220437733e-06,
|
|
"loss": 0.7023,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.922566938369861e-06,
|
|
"loss": 0.7001,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9223180714696664e-06,
|
|
"loss": 0.691,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.922068821446565e-06,
|
|
"loss": 0.7149,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9218191884040945e-06,
|
|
"loss": 0.6826,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9215691724459496e-06,
|
|
"loss": 0.7069,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9213187736759848e-06,
|
|
"loss": 0.7044,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9210679921982134e-06,
|
|
"loss": 0.7026,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9208168281168083e-06,
|
|
"loss": 0.7151,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9205652815361003e-06,
|
|
"loss": 0.7147,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.92031335256058e-06,
|
|
"loss": 0.6891,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9200610412948967e-06,
|
|
"loss": 0.7246,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9198083478438584e-06,
|
|
"loss": 0.6832,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.919555272312431e-06,
|
|
"loss": 0.6708,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.91930181480574e-06,
|
|
"loss": 0.7217,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9190479754290703e-06,
|
|
"loss": 0.7272,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.918793754287864e-06,
|
|
"loss": 0.6629,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.918539151487722e-06,
|
|
"loss": 0.6633,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9182841671344053e-06,
|
|
"loss": 0.7085,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.918028801333831e-06,
|
|
"loss": 0.7321,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9177730541920757e-06,
|
|
"loss": 0.7141,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9175169258153752e-06,
|
|
"loss": 0.6976,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9172604163101227e-06,
|
|
"loss": 0.7034,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9170035257828706e-06,
|
|
"loss": 0.6478,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9167462543403286e-06,
|
|
"loss": 0.6949,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9164886020893647e-06,
|
|
"loss": 0.7224,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9162305691370057e-06,
|
|
"loss": 0.7016,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9159721555904364e-06,
|
|
"loss": 0.7065,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9157133615569993e-06,
|
|
"loss": 0.6808,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9154541871441947e-06,
|
|
"loss": 0.7026,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9151946324596826e-06,
|
|
"loss": 0.7035,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9149346976112787e-06,
|
|
"loss": 0.6941,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9146743827069584e-06,
|
|
"loss": 0.6988,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9144136878548536e-06,
|
|
"loss": 0.6812,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.914152613163255e-06,
|
|
"loss": 0.6976,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.913891158740611e-06,
|
|
"loss": 0.7263,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.913629324695527e-06,
|
|
"loss": 0.6872,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.913367111136767e-06,
|
|
"loss": 0.6588,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.913104518173252e-06,
|
|
"loss": 0.6679,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.912841545914061e-06,
|
|
"loss": 0.7026,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9125781944684304e-06,
|
|
"loss": 0.6763,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9123144639457537e-06,
|
|
"loss": 0.7106,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9120503544555826e-06,
|
|
"loss": 0.7148,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.911785866107626e-06,
|
|
"loss": 0.7029,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9115209990117495e-06,
|
|
"loss": 0.7071,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9112557532779773e-06,
|
|
"loss": 0.6967,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9109901290164897e-06,
|
|
"loss": 0.6872,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9107241263376255e-06,
|
|
"loss": 0.6732,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9104577453518783e-06,
|
|
"loss": 0.7015,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.910190986169902e-06,
|
|
"loss": 0.6894,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 1.9099238489025054e-06,
|
|
"loss": 0.6748,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.909656333660655e-06,
|
|
"loss": 0.6691,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.909388440555474e-06,
|
|
"loss": 0.6763,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.909120169698244e-06,
|
|
"loss": 0.7212,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9088515212004006e-06,
|
|
"loss": 0.6999,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.90858249517354e-06,
|
|
"loss": 0.7089,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9083130917294116e-06,
|
|
"loss": 0.6819,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9080433109799243e-06,
|
|
"loss": 0.6902,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9077731530371425e-06,
|
|
"loss": 0.6825,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9075026180132873e-06,
|
|
"loss": 0.7008,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9072317060207364e-06,
|
|
"loss": 0.6789,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9069604171720243e-06,
|
|
"loss": 0.7221,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9066887515798426e-06,
|
|
"loss": 0.6761,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9064167093570382e-06,
|
|
"loss": 0.7134,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9061442906166154e-06,
|
|
"loss": 0.6633,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9058714954717345e-06,
|
|
"loss": 0.6869,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9055983240357123e-06,
|
|
"loss": 0.6967,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9053247764220218e-06,
|
|
"loss": 0.7272,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.905050852744292e-06,
|
|
"loss": 0.6696,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.904776553116309e-06,
|
|
"loss": 0.6606,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9045018776520138e-06,
|
|
"loss": 0.7055,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9042268264655048e-06,
|
|
"loss": 0.6879,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9039513996710357e-06,
|
|
"loss": 0.7322,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.903675597383016e-06,
|
|
"loss": 0.7115,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9033994197160124e-06,
|
|
"loss": 0.6892,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.903122866784746e-06,
|
|
"loss": 0.7278,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9028459387040944e-06,
|
|
"loss": 0.6844,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9025686355890916e-06,
|
|
"loss": 0.7143,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9022909575549265e-06,
|
|
"loss": 0.7161,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9020129047169443e-06,
|
|
"loss": 0.7013,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9017344771906463e-06,
|
|
"loss": 0.6588,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9014556750916879e-06,
|
|
"loss": 0.6451,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9011764985358817e-06,
|
|
"loss": 0.6991,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.900896947639195e-06,
|
|
"loss": 0.6901,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9006170225177508e-06,
|
|
"loss": 0.7236,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9003367232878273e-06,
|
|
"loss": 0.6827,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9000560500658592e-06,
|
|
"loss": 0.6791,
|
|
"step": 3745
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8997750029684347e-06,
|
|
"loss": 0.6822,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.899493582112299e-06,
|
|
"loss": 0.7139,
|
|
"step": 3755
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8992117876143516e-06,
|
|
"loss": 0.6901,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8989296195916476e-06,
|
|
"loss": 0.7012,
|
|
"step": 3765
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8986470781613973e-06,
|
|
"loss": 0.7305,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8983641634409656e-06,
|
|
"loss": 0.6812,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8980808755478726e-06,
|
|
"loss": 0.6781,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8977972145997945e-06,
|
|
"loss": 0.7271,
|
|
"step": 3785
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.897513180714561e-06,
|
|
"loss": 0.7162,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8972287740101572e-06,
|
|
"loss": 0.6669,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8969439946047232e-06,
|
|
"loss": 0.7358,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8966588426165544e-06,
|
|
"loss": 0.7315,
|
|
"step": 3805
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8963733181640999e-06,
|
|
"loss": 0.7245,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.8960874213659643e-06,
|
|
"loss": 0.7233,
|
|
"step": 3815
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8958011523409067e-06,
|
|
"loss": 0.7128,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8955145112078408e-06,
|
|
"loss": 0.6579,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8952274980858344e-06,
|
|
"loss": 0.7155,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8949401130941109e-06,
|
|
"loss": 0.6825,
|
|
"step": 3835
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.894652356352047e-06,
|
|
"loss": 0.6838,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8943642279791747e-06,
|
|
"loss": 0.7339,
|
|
"step": 3845
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8940757280951799e-06,
|
|
"loss": 0.7334,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8937868568199026e-06,
|
|
"loss": 0.7113,
|
|
"step": 3855
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.893497614273338e-06,
|
|
"loss": 0.7085,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8932080005756346e-06,
|
|
"loss": 0.7179,
|
|
"step": 3865
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8929180158470953e-06,
|
|
"loss": 0.6847,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8926276602081777e-06,
|
|
"loss": 0.7149,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8923369337794926e-06,
|
|
"loss": 0.6732,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8920458366818055e-06,
|
|
"loss": 0.6769,
|
|
"step": 3885
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8917543690360351e-06,
|
|
"loss": 0.6755,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8914625309632552e-06,
|
|
"loss": 0.7257,
|
|
"step": 3895
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8911703225846921e-06,
|
|
"loss": 0.6711,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8908777440217274e-06,
|
|
"loss": 0.6978,
|
|
"step": 3905
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8905847953958951e-06,
|
|
"loss": 0.706,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8902914768288837e-06,
|
|
"loss": 0.7315,
|
|
"step": 3915
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8899977884425353e-06,
|
|
"loss": 0.7385,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8897037303588452e-06,
|
|
"loss": 0.6868,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.889409302699963e-06,
|
|
"loss": 0.6924,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8891145055881907e-06,
|
|
"loss": 0.6575,
|
|
"step": 3935
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8888193391459853e-06,
|
|
"loss": 0.7152,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8885238034959556e-06,
|
|
"loss": 0.6853,
|
|
"step": 3945
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8882278987608653e-06,
|
|
"loss": 0.6626,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8879316250636302e-06,
|
|
"loss": 0.6714,
|
|
"step": 3955
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8876349825273197e-06,
|
|
"loss": 0.6994,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8873379712751567e-06,
|
|
"loss": 0.7311,
|
|
"step": 3965
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8870405914305173e-06,
|
|
"loss": 0.7214,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.88674284311693e-06,
|
|
"loss": 0.7004,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8864447264580776e-06,
|
|
"loss": 0.6956,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8861462415777942e-06,
|
|
"loss": 0.7378,
|
|
"step": 3985
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.885847388600069e-06,
|
|
"loss": 0.7195,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8855481676490417e-06,
|
|
"loss": 0.6948,
|
|
"step": 3995
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.885248578849007e-06,
|
|
"loss": 0.7367,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_loss": 0.666739821434021,
|
|
"eval_runtime": 138.8501,
|
|
"eval_samples_per_second": 17.04,
|
|
"eval_steps_per_second": 2.845,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.884948622324411e-06,
|
|
"loss": 0.6942,
|
|
"step": 4005
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.884648298199853e-06,
|
|
"loss": 0.6911,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8843476066000856e-06,
|
|
"loss": 0.7338,
|
|
"step": 4015
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.884046547650013e-06,
|
|
"loss": 0.6914,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8837451214746922e-06,
|
|
"loss": 0.6998,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8834433281993336e-06,
|
|
"loss": 0.7024,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8831411679492992e-06,
|
|
"loss": 0.6748,
|
|
"step": 4035
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.882838640850104e-06,
|
|
"loss": 0.687,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8825357470274148e-06,
|
|
"loss": 0.661,
|
|
"step": 4045
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8822324866070512e-06,
|
|
"loss": 0.7209,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.8819288597149846e-06,
|
|
"loss": 0.6597,
|
|
"step": 4055
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 1.88162486647734e-06,
|
|
"loss": 0.6923,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8813205070203924e-06,
|
|
"loss": 0.6875,
|
|
"step": 4065
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8810157814705705e-06,
|
|
"loss": 0.7085,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8807106899544547e-06,
|
|
"loss": 0.6786,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8804052325987775e-06,
|
|
"loss": 0.7078,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8800994095304227e-06,
|
|
"loss": 0.6945,
|
|
"step": 4085
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8797932208764273e-06,
|
|
"loss": 0.7232,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8794866667639791e-06,
|
|
"loss": 0.6876,
|
|
"step": 4095
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8791797473204176e-06,
|
|
"loss": 0.7299,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8788724626732347e-06,
|
|
"loss": 0.6689,
|
|
"step": 4105
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.878564812950074e-06,
|
|
"loss": 0.7345,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8782567982787302e-06,
|
|
"loss": 0.7271,
|
|
"step": 4115
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8779484187871504e-06,
|
|
"loss": 0.7328,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8776396746034324e-06,
|
|
"loss": 0.7043,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8773305658558258e-06,
|
|
"loss": 0.6841,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8770210926727316e-06,
|
|
"loss": 0.7154,
|
|
"step": 4135
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8767112551827027e-06,
|
|
"loss": 0.7329,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8764010535144426e-06,
|
|
"loss": 0.7226,
|
|
"step": 4145
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8760904877968065e-06,
|
|
"loss": 0.6958,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8757795581588005e-06,
|
|
"loss": 0.7373,
|
|
"step": 4155
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8754682647295822e-06,
|
|
"loss": 0.7313,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.87515660763846e-06,
|
|
"loss": 0.7333,
|
|
"step": 4165
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8748445870148941e-06,
|
|
"loss": 0.6882,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8745322029884946e-06,
|
|
"loss": 0.681,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8742194556890233e-06,
|
|
"loss": 0.7227,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8739063452463926e-06,
|
|
"loss": 0.7047,
|
|
"step": 4185
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.873592871790666e-06,
|
|
"loss": 0.6831,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8732790354520579e-06,
|
|
"loss": 0.6887,
|
|
"step": 4195
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8729648363609324e-06,
|
|
"loss": 0.6833,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8726502746478058e-06,
|
|
"loss": 0.6928,
|
|
"step": 4205
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.872335350443344e-06,
|
|
"loss": 0.7192,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.872020063878364e-06,
|
|
"loss": 0.6792,
|
|
"step": 4215
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8717044150838326e-06,
|
|
"loss": 0.6942,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.871388404190868e-06,
|
|
"loss": 0.733,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8710720313307382e-06,
|
|
"loss": 0.6587,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8707552966348618e-06,
|
|
"loss": 0.7068,
|
|
"step": 4235
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.870438200234808e-06,
|
|
"loss": 0.7121,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8701207422622951e-06,
|
|
"loss": 0.6572,
|
|
"step": 4245
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.869802922849193e-06,
|
|
"loss": 0.6988,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8694847421275206e-06,
|
|
"loss": 0.7009,
|
|
"step": 4255
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8691662002294481e-06,
|
|
"loss": 0.7128,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8688472972872947e-06,
|
|
"loss": 0.6869,
|
|
"step": 4265
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8685280334335296e-06,
|
|
"loss": 0.6775,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8682084088007728e-06,
|
|
"loss": 0.6787,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8678884235217928e-06,
|
|
"loss": 0.7285,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8675680777295097e-06,
|
|
"loss": 0.6764,
|
|
"step": 4285
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8672473715569916e-06,
|
|
"loss": 0.7242,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8669263051374572e-06,
|
|
"loss": 0.6771,
|
|
"step": 4295
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.8666048786042752e-06,
|
|
"loss": 0.7394,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.866283092090963e-06,
|
|
"loss": 0.6957,
|
|
"step": 4305
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8659609457311875e-06,
|
|
"loss": 0.6816,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.865638439658766e-06,
|
|
"loss": 0.6952,
|
|
"step": 4315
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8653155740076647e-06,
|
|
"loss": 0.6754,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8649923489119992e-06,
|
|
"loss": 0.7266,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8646687645060343e-06,
|
|
"loss": 0.7218,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8643448209241841e-06,
|
|
"loss": 0.6779,
|
|
"step": 4335
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.864020518301012e-06,
|
|
"loss": 0.7003,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8636958567712303e-06,
|
|
"loss": 0.7015,
|
|
"step": 4345
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8633708364697013e-06,
|
|
"loss": 0.6808,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8630454575314344e-06,
|
|
"loss": 0.6816,
|
|
"step": 4355
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8627197200915902e-06,
|
|
"loss": 0.7126,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.862393624285477e-06,
|
|
"loss": 0.6838,
|
|
"step": 4365
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8620671702485517e-06,
|
|
"loss": 0.6578,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.861740358116421e-06,
|
|
"loss": 0.7344,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8614131880248393e-06,
|
|
"loss": 0.7172,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8610856601097108e-06,
|
|
"loss": 0.7621,
|
|
"step": 4385
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8607577745070873e-06,
|
|
"loss": 0.7071,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8604295313531698e-06,
|
|
"loss": 0.6889,
|
|
"step": 4395
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8601009307843078e-06,
|
|
"loss": 0.6992,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8597719729369988e-06,
|
|
"loss": 0.6843,
|
|
"step": 4405
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8594426579478891e-06,
|
|
"loss": 0.6791,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8591129859537738e-06,
|
|
"loss": 0.7045,
|
|
"step": 4415
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.858782957091595e-06,
|
|
"loss": 0.6975,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.858452571498444e-06,
|
|
"loss": 0.7386,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8581218293115607e-06,
|
|
"loss": 0.6974,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8577907306683317e-06,
|
|
"loss": 0.7313,
|
|
"step": 4435
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.857459275706293e-06,
|
|
"loss": 0.6941,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8571274645631281e-06,
|
|
"loss": 0.7037,
|
|
"step": 4445
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8567952973766685e-06,
|
|
"loss": 0.655,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.856462774284893e-06,
|
|
"loss": 0.712,
|
|
"step": 4455
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8561298954259297e-06,
|
|
"loss": 0.68,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8557966609380528e-06,
|
|
"loss": 0.6917,
|
|
"step": 4465
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8554630709596855e-06,
|
|
"loss": 0.6719,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8551291256293977e-06,
|
|
"loss": 0.7117,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.854794825085908e-06,
|
|
"loss": 0.7157,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8544601694680814e-06,
|
|
"loss": 0.7147,
|
|
"step": 4485
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8541251589149313e-06,
|
|
"loss": 0.6964,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.853789793565618e-06,
|
|
"loss": 0.7083,
|
|
"step": 4495
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8534540735594492e-06,
|
|
"loss": 0.6735,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8531179990358806e-06,
|
|
"loss": 0.6719,
|
|
"step": 4505
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.852781570134514e-06,
|
|
"loss": 0.6729,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8524447869950995e-06,
|
|
"loss": 0.6988,
|
|
"step": 4515
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8521076497575335e-06,
|
|
"loss": 0.6907,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8517701585618602e-06,
|
|
"loss": 0.7268,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8514323135482704e-06,
|
|
"loss": 0.7261,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8510941148571018e-06,
|
|
"loss": 0.6763,
|
|
"step": 4535
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8507555626288397e-06,
|
|
"loss": 0.6801,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8504166570041152e-06,
|
|
"loss": 0.7502,
|
|
"step": 4545
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 1.8500773981237069e-06,
|
|
"loss": 0.6682,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8497377861285401e-06,
|
|
"loss": 0.709,
|
|
"step": 4555
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8493978211596865e-06,
|
|
"loss": 0.7001,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.849057503358365e-06,
|
|
"loss": 0.7111,
|
|
"step": 4565
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8487168328659403e-06,
|
|
"loss": 0.701,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8483758098239237e-06,
|
|
"loss": 0.691,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8480344343739738e-06,
|
|
"loss": 0.6979,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8476927066578946e-06,
|
|
"loss": 0.7074,
|
|
"step": 4585
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8473506268176372e-06,
|
|
"loss": 0.7229,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8470081949952982e-06,
|
|
"loss": 0.7015,
|
|
"step": 4595
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.846665411333121e-06,
|
|
"loss": 0.7329,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8463222759734948e-06,
|
|
"loss": 0.6908,
|
|
"step": 4605
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8459787890589554e-06,
|
|
"loss": 0.7381,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8456349507321836e-06,
|
|
"loss": 0.6663,
|
|
"step": 4615
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8452907611360076e-06,
|
|
"loss": 0.7021,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8449462204134002e-06,
|
|
"loss": 0.6826,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8446013287074811e-06,
|
|
"loss": 0.6959,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8442560861615148e-06,
|
|
"loss": 0.7348,
|
|
"step": 4635
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8439104929189124e-06,
|
|
"loss": 0.6699,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8435645491232307e-06,
|
|
"loss": 0.7354,
|
|
"step": 4645
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8432182549181707e-06,
|
|
"loss": 0.6721,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8428716104475806e-06,
|
|
"loss": 0.7104,
|
|
"step": 4655
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8425246158554537e-06,
|
|
"loss": 0.6808,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8421772712859282e-06,
|
|
"loss": 0.6941,
|
|
"step": 4665
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8418295768832883e-06,
|
|
"loss": 0.687,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.841481532791963e-06,
|
|
"loss": 0.6894,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.841133139156527e-06,
|
|
"loss": 0.6663,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8407843961216995e-06,
|
|
"loss": 0.6423,
|
|
"step": 4685
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8404353038323459e-06,
|
|
"loss": 0.7073,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8400858624334758e-06,
|
|
"loss": 0.7149,
|
|
"step": 4695
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8397360720702442e-06,
|
|
"loss": 0.7024,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8393859328879511e-06,
|
|
"loss": 0.7039,
|
|
"step": 4705
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.839035445032041e-06,
|
|
"loss": 0.6758,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8386846086481036e-06,
|
|
"loss": 0.6833,
|
|
"step": 4715
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8383334238818736e-06,
|
|
"loss": 0.6996,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8379818908792295e-06,
|
|
"loss": 0.692,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8376300097861953e-06,
|
|
"loss": 0.7099,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8372777807489396e-06,
|
|
"loss": 0.6859,
|
|
"step": 4735
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8369252039137753e-06,
|
|
"loss": 0.696,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8365722794271594e-06,
|
|
"loss": 0.6713,
|
|
"step": 4745
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8362190074356935e-06,
|
|
"loss": 0.7215,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8358653880861245e-06,
|
|
"loss": 0.7001,
|
|
"step": 4755
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.835511421525342e-06,
|
|
"loss": 0.727,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8351571079003812e-06,
|
|
"loss": 0.6764,
|
|
"step": 4765
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8348024473584208e-06,
|
|
"loss": 0.6667,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.834447440046783e-06,
|
|
"loss": 0.7291,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8340920861129358e-06,
|
|
"loss": 0.6699,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8337363857044894e-06,
|
|
"loss": 0.6825,
|
|
"step": 4785
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.833380338969199e-06,
|
|
"loss": 0.6954,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.833023946054963e-06,
|
|
"loss": 0.7002,
|
|
"step": 4795
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.8326672071098246e-06,
|
|
"loss": 0.699,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"eval_loss": 0.6643534302711487,
|
|
"eval_runtime": 138.4591,
|
|
"eval_samples_per_second": 17.088,
|
|
"eval_steps_per_second": 2.853,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8323101222819693e-06,
|
|
"loss": 0.677,
|
|
"step": 4805
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8319526917197275e-06,
|
|
"loss": 0.7381,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8315949155715722e-06,
|
|
"loss": 0.6681,
|
|
"step": 4815
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8312367939861214e-06,
|
|
"loss": 0.6696,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8308783271121346e-06,
|
|
"loss": 0.713,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.830519515098517e-06,
|
|
"loss": 0.7155,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8301603580943148e-06,
|
|
"loss": 0.694,
|
|
"step": 4835
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8298008562487195e-06,
|
|
"loss": 0.7343,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.829441009711065e-06,
|
|
"loss": 0.7173,
|
|
"step": 4845
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8290808186308276e-06,
|
|
"loss": 0.7167,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.828720283157629e-06,
|
|
"loss": 0.7265,
|
|
"step": 4855
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8283594034412313e-06,
|
|
"loss": 0.6793,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8279981796315412e-06,
|
|
"loss": 0.6807,
|
|
"step": 4865
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8276366118786078e-06,
|
|
"loss": 0.6737,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8272747003326235e-06,
|
|
"loss": 0.7202,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8269124451439231e-06,
|
|
"loss": 0.7226,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8265498464629837e-06,
|
|
"loss": 0.6772,
|
|
"step": 4885
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8261869044404265e-06,
|
|
"loss": 0.6868,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.825823619227014e-06,
|
|
"loss": 0.703,
|
|
"step": 4895
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.825459990973652e-06,
|
|
"loss": 0.7193,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8250960198313878e-06,
|
|
"loss": 0.7194,
|
|
"step": 4905
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8247317059514126e-06,
|
|
"loss": 0.6859,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8243670494850592e-06,
|
|
"loss": 0.6749,
|
|
"step": 4915
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.824002050583802e-06,
|
|
"loss": 0.6542,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8236367093992592e-06,
|
|
"loss": 0.6592,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.82327102608319e-06,
|
|
"loss": 0.6788,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8229050007874961e-06,
|
|
"loss": 0.6909,
|
|
"step": 4935
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.822538633664221e-06,
|
|
"loss": 0.7105,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8221719248655507e-06,
|
|
"loss": 0.6731,
|
|
"step": 4945
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.821804874543813e-06,
|
|
"loss": 0.685,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.821437482851477e-06,
|
|
"loss": 0.7251,
|
|
"step": 4955
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.821069749941154e-06,
|
|
"loss": 0.6517,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8207016759655973e-06,
|
|
"loss": 0.704,
|
|
"step": 4965
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8203332610777017e-06,
|
|
"loss": 0.7063,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8199645054305037e-06,
|
|
"loss": 0.7208,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8195954091771805e-06,
|
|
"loss": 0.7339,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8192259724710518e-06,
|
|
"loss": 0.6627,
|
|
"step": 4985
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.818856195465579e-06,
|
|
"loss": 0.7021,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8184860783143635e-06,
|
|
"loss": 0.7108,
|
|
"step": 4995
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8181156211711488e-06,
|
|
"loss": 0.639,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8177448241898196e-06,
|
|
"loss": 0.7256,
|
|
"step": 5005
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.817373687524402e-06,
|
|
"loss": 0.6977,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.817002211329063e-06,
|
|
"loss": 0.6984,
|
|
"step": 5015
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8166303957581103e-06,
|
|
"loss": 0.6875,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8162582409659932e-06,
|
|
"loss": 0.6602,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.815885747107301e-06,
|
|
"loss": 0.6819,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.815512914336765e-06,
|
|
"loss": 0.6843,
|
|
"step": 5035
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8151397428092563e-06,
|
|
"loss": 0.7079,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.8147662326797872e-06,
|
|
"loss": 0.7022,
|
|
"step": 5045
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8143923841035107e-06,
|
|
"loss": 0.6897,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8140181972357201e-06,
|
|
"loss": 0.6774,
|
|
"step": 5055
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8136436722318496e-06,
|
|
"loss": 0.6877,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.813268809247473e-06,
|
|
"loss": 0.6957,
|
|
"step": 5065
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8128936084383058e-06,
|
|
"loss": 0.7274,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.812518069960203e-06,
|
|
"loss": 0.6767,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8121421939691599e-06,
|
|
"loss": 0.6857,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8117659806213122e-06,
|
|
"loss": 0.6637,
|
|
"step": 5085
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8113894300729356e-06,
|
|
"loss": 0.6777,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8110125424804458e-06,
|
|
"loss": 0.6666,
|
|
"step": 5095
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8106353180003988e-06,
|
|
"loss": 0.6811,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8102577567894905e-06,
|
|
"loss": 0.6735,
|
|
"step": 5105
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8098798590045563e-06,
|
|
"loss": 0.7543,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8095016248025717e-06,
|
|
"loss": 0.6957,
|
|
"step": 5115
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.809123054340652e-06,
|
|
"loss": 0.6969,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8087441477760517e-06,
|
|
"loss": 0.6775,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8083649052661661e-06,
|
|
"loss": 0.6794,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8079853269685282e-06,
|
|
"loss": 0.6253,
|
|
"step": 5135
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8076054130408123e-06,
|
|
"loss": 0.7048,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8072251636408314e-06,
|
|
"loss": 0.681,
|
|
"step": 5145
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.806844578926537e-06,
|
|
"loss": 0.6644,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8064636590560215e-06,
|
|
"loss": 0.6972,
|
|
"step": 5155
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8060824041875154e-06,
|
|
"loss": 0.6831,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8057008144793882e-06,
|
|
"loss": 0.6738,
|
|
"step": 5165
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8053188900901497e-06,
|
|
"loss": 0.7293,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8049366311784477e-06,
|
|
"loss": 0.6975,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8045540379030691e-06,
|
|
"loss": 0.7275,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8041711104229401e-06,
|
|
"loss": 0.6867,
|
|
"step": 5185
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.803787848897125e-06,
|
|
"loss": 0.6857,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8034042534848277e-06,
|
|
"loss": 0.6861,
|
|
"step": 5195
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8030203243453901e-06,
|
|
"loss": 0.6732,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8026360616382933e-06,
|
|
"loss": 0.7133,
|
|
"step": 5205
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8022514655231565e-06,
|
|
"loss": 0.7196,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8018665361597373e-06,
|
|
"loss": 0.6929,
|
|
"step": 5215
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.801481273707933e-06,
|
|
"loss": 0.7227,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8010956783277772e-06,
|
|
"loss": 0.6655,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8007097501794435e-06,
|
|
"loss": 0.7454,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.8003234894232426e-06,
|
|
"loss": 0.6815,
|
|
"step": 5235
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.7999368962196243e-06,
|
|
"loss": 0.7456,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.7995499707291762e-06,
|
|
"loss": 0.6822,
|
|
"step": 5245
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.799162713112623e-06,
|
|
"loss": 0.7083,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.798775123530829e-06,
|
|
"loss": 0.6874,
|
|
"step": 5255
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.798387202144795e-06,
|
|
"loss": 0.6864,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.7979989491156603e-06,
|
|
"loss": 0.6991,
|
|
"step": 5265
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.797610364604702e-06,
|
|
"loss": 0.7077,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.7972214487733345e-06,
|
|
"loss": 0.6461,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.7968322017831102e-06,
|
|
"loss": 0.6968,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.7964426237957188e-06,
|
|
"loss": 0.6635,
|
|
"step": 5285
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 1.7960527149729878e-06,
|
|
"loss": 0.684,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7956624754768818e-06,
|
|
"loss": 0.6784,
|
|
"step": 5295
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.795271905469503e-06,
|
|
"loss": 0.7481,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7948810051130905e-06,
|
|
"loss": 0.683,
|
|
"step": 5305
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.794489774570021e-06,
|
|
"loss": 0.6661,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7940982140028087e-06,
|
|
"loss": 0.7184,
|
|
"step": 5315
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.793706323574104e-06,
|
|
"loss": 0.6612,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7933141034466948e-06,
|
|
"loss": 0.703,
|
|
"step": 5325
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.792921553783506e-06,
|
|
"loss": 0.7108,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7925286747475994e-06,
|
|
"loss": 0.7301,
|
|
"step": 5335
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7921354665021735e-06,
|
|
"loss": 0.6704,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7917419292105636e-06,
|
|
"loss": 0.7065,
|
|
"step": 5345
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7913480630362417e-06,
|
|
"loss": 0.7033,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.790953868142816e-06,
|
|
"loss": 0.7258,
|
|
"step": 5355
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.790559344694032e-06,
|
|
"loss": 0.6887,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7901644928537715e-06,
|
|
"loss": 0.7195,
|
|
"step": 5365
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7897693127860524e-06,
|
|
"loss": 0.7058,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7893738046550286e-06,
|
|
"loss": 0.7177,
|
|
"step": 5375
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7889779686249912e-06,
|
|
"loss": 0.6891,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.788581804860367e-06,
|
|
"loss": 0.6551,
|
|
"step": 5385
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7881853135257185e-06,
|
|
"loss": 0.7061,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7877884947857455e-06,
|
|
"loss": 0.6706,
|
|
"step": 5395
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7873913488052827e-06,
|
|
"loss": 0.6719,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7869938757493011e-06,
|
|
"loss": 0.7022,
|
|
"step": 5405
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7865960757829075e-06,
|
|
"loss": 0.676,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7861979490713445e-06,
|
|
"loss": 0.7427,
|
|
"step": 5415
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7857994957799906e-06,
|
|
"loss": 0.698,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.78540071607436e-06,
|
|
"loss": 0.7023,
|
|
"step": 5425
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.785001610120102e-06,
|
|
"loss": 0.6985,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.784602178083002e-06,
|
|
"loss": 0.7192,
|
|
"step": 5435
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7842024201289801e-06,
|
|
"loss": 0.6682,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7838023364240929e-06,
|
|
"loss": 0.694,
|
|
"step": 5445
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7834019271345313e-06,
|
|
"loss": 0.6484,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7830011924266221e-06,
|
|
"loss": 0.6921,
|
|
"step": 5455
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7826001324668267e-06,
|
|
"loss": 0.7135,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7821987474217424e-06,
|
|
"loss": 0.6631,
|
|
"step": 5465
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7817970374581006e-06,
|
|
"loss": 0.6572,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.781395002742768e-06,
|
|
"loss": 0.7068,
|
|
"step": 5475
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.780992643442747e-06,
|
|
"loss": 0.6417,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7805899597251735e-06,
|
|
"loss": 0.6847,
|
|
"step": 5485
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7801869517573191e-06,
|
|
"loss": 0.6862,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.77978361970659e-06,
|
|
"loss": 0.6696,
|
|
"step": 5495
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7793799637405265e-06,
|
|
"loss": 0.737,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7789759840268037e-06,
|
|
"loss": 0.6826,
|
|
"step": 5505
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7785716807332315e-06,
|
|
"loss": 0.7288,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.778167054027754e-06,
|
|
"loss": 0.6856,
|
|
"step": 5515
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7777621040784496e-06,
|
|
"loss": 0.6801,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.777356831053531e-06,
|
|
"loss": 0.6737,
|
|
"step": 5525
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7769512351213448e-06,
|
|
"loss": 0.6547,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.7765453164503722e-06,
|
|
"loss": 0.7255,
|
|
"step": 5535
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7761390752092284e-06,
|
|
"loss": 0.6757,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7757325115666624e-06,
|
|
"loss": 0.6752,
|
|
"step": 5545
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7753256256915572e-06,
|
|
"loss": 0.6901,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7749184177529294e-06,
|
|
"loss": 0.6747,
|
|
"step": 5555
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7745108879199302e-06,
|
|
"loss": 0.6852,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7741030363618437e-06,
|
|
"loss": 0.6804,
|
|
"step": 5565
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7736948632480874e-06,
|
|
"loss": 0.6783,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7732863687482138e-06,
|
|
"loss": 0.6799,
|
|
"step": 5575
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.772877553031907e-06,
|
|
"loss": 0.7125,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7724684162689863e-06,
|
|
"loss": 0.6851,
|
|
"step": 5585
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.772058958629403e-06,
|
|
"loss": 0.6906,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7716491802832425e-06,
|
|
"loss": 0.7292,
|
|
"step": 5595
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.771239081400723e-06,
|
|
"loss": 0.7028,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"eval_loss": 0.6604963541030884,
|
|
"eval_runtime": 138.4769,
|
|
"eval_samples_per_second": 17.086,
|
|
"eval_steps_per_second": 2.852,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7708286621521964e-06,
|
|
"loss": 0.6984,
|
|
"step": 5605
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7704179227081467e-06,
|
|
"loss": 0.7075,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7700068632391917e-06,
|
|
"loss": 0.7014,
|
|
"step": 5615
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7695954839160824e-06,
|
|
"loss": 0.6986,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7691837849097015e-06,
|
|
"loss": 0.7069,
|
|
"step": 5625
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7687717663910658e-06,
|
|
"loss": 0.6893,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7683594285313235e-06,
|
|
"loss": 0.6479,
|
|
"step": 5635
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7679467715017567e-06,
|
|
"loss": 0.7326,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7675337954737795e-06,
|
|
"loss": 0.7032,
|
|
"step": 5645
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7671205006189384e-06,
|
|
"loss": 0.6587,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7667068871089126e-06,
|
|
"loss": 0.6494,
|
|
"step": 5655
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7662929551155136e-06,
|
|
"loss": 0.7001,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7658787048106846e-06,
|
|
"loss": 0.7043,
|
|
"step": 5665
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7654641363665027e-06,
|
|
"loss": 0.6735,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.765049249955175e-06,
|
|
"loss": 0.706,
|
|
"step": 5675
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.764634045749042e-06,
|
|
"loss": 0.7178,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7642185239205763e-06,
|
|
"loss": 0.6817,
|
|
"step": 5685
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7638026846423813e-06,
|
|
"loss": 0.739,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7633865280871935e-06,
|
|
"loss": 0.704,
|
|
"step": 5695
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.762970054427881e-06,
|
|
"loss": 0.6859,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7625532638374429e-06,
|
|
"loss": 0.6645,
|
|
"step": 5705
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7621361564890104e-06,
|
|
"loss": 0.6885,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7617187325558463e-06,
|
|
"loss": 0.7211,
|
|
"step": 5715
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7613009922113449e-06,
|
|
"loss": 0.6563,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.760882935629032e-06,
|
|
"loss": 0.6872,
|
|
"step": 5725
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7604645629825647e-06,
|
|
"loss": 0.7238,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7600458744457312e-06,
|
|
"loss": 0.6829,
|
|
"step": 5735
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7596268701924513e-06,
|
|
"loss": 0.6752,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7592075503967751e-06,
|
|
"loss": 0.7009,
|
|
"step": 5745
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7587879152328852e-06,
|
|
"loss": 0.6987,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.758367964875094e-06,
|
|
"loss": 0.7154,
|
|
"step": 5755
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7579476994978454e-06,
|
|
"loss": 0.6551,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7575271192757138e-06,
|
|
"loss": 0.6711,
|
|
"step": 5765
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7571062243834046e-06,
|
|
"loss": 0.7064,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7566850149957536e-06,
|
|
"loss": 0.6889,
|
|
"step": 5775
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7562634912877282e-06,
|
|
"loss": 0.6998,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 1.7558416534344252e-06,
|
|
"loss": 0.6513,
|
|
"step": 5785
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7554195016110725e-06,
|
|
"loss": 0.7019,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7549970359930285e-06,
|
|
"loss": 0.7119,
|
|
"step": 5795
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7545742567557811e-06,
|
|
"loss": 0.6858,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7541511640749499e-06,
|
|
"loss": 0.6887,
|
|
"step": 5805
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.753727758126283e-06,
|
|
"loss": 0.6976,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7533040390856608e-06,
|
|
"loss": 0.6699,
|
|
"step": 5815
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7528800071290914e-06,
|
|
"loss": 0.6802,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7524556624327147e-06,
|
|
"loss": 0.6804,
|
|
"step": 5825
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7520310051727992e-06,
|
|
"loss": 0.6678,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7516060355257443e-06,
|
|
"loss": 0.6856,
|
|
"step": 5835
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7511807536680782e-06,
|
|
"loss": 0.6983,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.75075515977646e-06,
|
|
"loss": 0.7212,
|
|
"step": 5845
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7503292540276772e-06,
|
|
"loss": 0.6783,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7499030365986475e-06,
|
|
"loss": 0.6932,
|
|
"step": 5855
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.749476507666418e-06,
|
|
"loss": 0.7191,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7490496674081648e-06,
|
|
"loss": 0.6817,
|
|
"step": 5865
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.748622516001194e-06,
|
|
"loss": 0.6568,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7481950536229402e-06,
|
|
"loss": 0.6861,
|
|
"step": 5875
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7477672804509679e-06,
|
|
"loss": 0.6995,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7473391966629698e-06,
|
|
"loss": 0.72,
|
|
"step": 5885
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7469108024367687e-06,
|
|
"loss": 0.6983,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.746482097950316e-06,
|
|
"loss": 0.6961,
|
|
"step": 5895
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7460530833816915e-06,
|
|
"loss": 0.7183,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.745623758909104e-06,
|
|
"loss": 0.7194,
|
|
"step": 5905
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7451941247108914e-06,
|
|
"loss": 0.6699,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.74476418096552e-06,
|
|
"loss": 0.6807,
|
|
"step": 5915
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7443339278515846e-06,
|
|
"loss": 0.6835,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7439033655478084e-06,
|
|
"loss": 0.6874,
|
|
"step": 5925
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7434724942330436e-06,
|
|
"loss": 0.6832,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7430413140862703e-06,
|
|
"loss": 0.6661,
|
|
"step": 5935
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7426098252865968e-06,
|
|
"loss": 0.677,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.74217802801326e-06,
|
|
"loss": 0.6587,
|
|
"step": 5945
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7417459224456246e-06,
|
|
"loss": 0.6613,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.741313508763184e-06,
|
|
"loss": 0.7241,
|
|
"step": 5955
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.740880787145558e-06,
|
|
"loss": 0.7194,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7404477577724964e-06,
|
|
"loss": 0.6832,
|
|
"step": 5965
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7400144208238751e-06,
|
|
"loss": 0.6761,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7395807764796993e-06,
|
|
"loss": 0.7259,
|
|
"step": 5975
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7391468249201004e-06,
|
|
"loss": 0.6704,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7387125663253386e-06,
|
|
"loss": 0.6836,
|
|
"step": 5985
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7382780008758005e-06,
|
|
"loss": 0.6555,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7378431287520016e-06,
|
|
"loss": 0.7281,
|
|
"step": 5995
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7374079501345835e-06,
|
|
"loss": 0.6644,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7369724652043156e-06,
|
|
"loss": 0.6314,
|
|
"step": 6005
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7365366741420947e-06,
|
|
"loss": 0.6719,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.736100577128945e-06,
|
|
"loss": 0.6913,
|
|
"step": 6015
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7356641743460166e-06,
|
|
"loss": 0.6858,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7352274659745878e-06,
|
|
"loss": 0.6853,
|
|
"step": 6025
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.7347904521960635e-06,
|
|
"loss": 0.6921,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7343531331919756e-06,
|
|
"loss": 0.6898,
|
|
"step": 6035
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7339155091439823e-06,
|
|
"loss": 0.6996,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.733477580233869e-06,
|
|
"loss": 0.7071,
|
|
"step": 6045
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7330393466435474e-06,
|
|
"loss": 0.6903,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7326008085550564e-06,
|
|
"loss": 0.6929,
|
|
"step": 6055
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.73216196615056e-06,
|
|
"loss": 0.7195,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7317228196123504e-06,
|
|
"loss": 0.6737,
|
|
"step": 6065
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7312833691228445e-06,
|
|
"loss": 0.7118,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7308436148645871e-06,
|
|
"loss": 0.7004,
|
|
"step": 6075
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7304035570202476e-06,
|
|
"loss": 0.666,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7299631957726223e-06,
|
|
"loss": 0.6574,
|
|
"step": 6085
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7295225313046337e-06,
|
|
"loss": 0.7057,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.72908156379933e-06,
|
|
"loss": 0.6804,
|
|
"step": 6095
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7286402934398848e-06,
|
|
"loss": 0.7117,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7281987204095985e-06,
|
|
"loss": 0.7004,
|
|
"step": 6105
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7277568448918962e-06,
|
|
"loss": 0.6945,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7273146670703295e-06,
|
|
"loss": 0.6268,
|
|
"step": 6115
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7268721871285753e-06,
|
|
"loss": 0.6772,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7264294052504358e-06,
|
|
"loss": 0.7141,
|
|
"step": 6125
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7259863216198385e-06,
|
|
"loss": 0.6466,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7255429364208366e-06,
|
|
"loss": 0.6835,
|
|
"step": 6135
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7250992498376086e-06,
|
|
"loss": 0.6938,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7246552620544584e-06,
|
|
"loss": 0.7017,
|
|
"step": 6145
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7242109732558137e-06,
|
|
"loss": 0.7161,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7237663836262289e-06,
|
|
"loss": 0.6646,
|
|
"step": 6155
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7233214933503826e-06,
|
|
"loss": 0.6712,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7228763026130783e-06,
|
|
"loss": 0.6749,
|
|
"step": 6165
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7224308115992443e-06,
|
|
"loss": 0.647,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7219850204939338e-06,
|
|
"loss": 0.7009,
|
|
"step": 6175
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7215389294823243e-06,
|
|
"loss": 0.6568,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7210925387497186e-06,
|
|
"loss": 0.6501,
|
|
"step": 6185
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7206458484815429e-06,
|
|
"loss": 0.6926,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7201988588633489e-06,
|
|
"loss": 0.6973,
|
|
"step": 6195
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7197515700808124e-06,
|
|
"loss": 0.6979,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7193039823197323e-06,
|
|
"loss": 0.6634,
|
|
"step": 6205
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7188560957660338e-06,
|
|
"loss": 0.6958,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7184079106057645e-06,
|
|
"loss": 0.7073,
|
|
"step": 6215
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7179594270250965e-06,
|
|
"loss": 0.7147,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7175106452103268e-06,
|
|
"loss": 0.7319,
|
|
"step": 6225
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7170615653478742e-06,
|
|
"loss": 0.6786,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7166121876242837e-06,
|
|
"loss": 0.6643,
|
|
"step": 6235
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.716162512226222e-06,
|
|
"loss": 0.7524,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.715712539340481e-06,
|
|
"loss": 0.6517,
|
|
"step": 6245
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.715262269153975e-06,
|
|
"loss": 0.6999,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7148117018537432e-06,
|
|
"loss": 0.6756,
|
|
"step": 6255
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7143608376269462e-06,
|
|
"loss": 0.7208,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.71390967666087e-06,
|
|
"loss": 0.6793,
|
|
"step": 6265
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7134582191429223e-06,
|
|
"loss": 0.68,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 1.7130064652606352e-06,
|
|
"loss": 0.6666,
|
|
"step": 6275
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7125544152016627e-06,
|
|
"loss": 0.6714,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7121020691537831e-06,
|
|
"loss": 0.7034,
|
|
"step": 6285
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7116494273048966e-06,
|
|
"loss": 0.6819,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7111964898430266e-06,
|
|
"loss": 0.66,
|
|
"step": 6295
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.71074325695632e-06,
|
|
"loss": 0.7156,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7102897288330454e-06,
|
|
"loss": 0.6804,
|
|
"step": 6305
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7098359056615942e-06,
|
|
"loss": 0.6807,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7093817876304807e-06,
|
|
"loss": 0.6701,
|
|
"step": 6315
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7089273749283418e-06,
|
|
"loss": 0.6785,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7084726677439364e-06,
|
|
"loss": 0.6941,
|
|
"step": 6325
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7080176662661463e-06,
|
|
"loss": 0.6811,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7075623706839745e-06,
|
|
"loss": 0.6797,
|
|
"step": 6335
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7071067811865474e-06,
|
|
"loss": 0.6913,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7066508979631129e-06,
|
|
"loss": 0.6917,
|
|
"step": 6345
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7061947212030402e-06,
|
|
"loss": 0.6709,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.705738251095822e-06,
|
|
"loss": 0.7023,
|
|
"step": 6355
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7052814878310718e-06,
|
|
"loss": 0.7109,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7048244315985247e-06,
|
|
"loss": 0.6795,
|
|
"step": 6365
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7043670825880384e-06,
|
|
"loss": 0.7013,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7039094409895914e-06,
|
|
"loss": 0.703,
|
|
"step": 6375
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.703451506993284e-06,
|
|
"loss": 0.732,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7029932807893382e-06,
|
|
"loss": 0.6905,
|
|
"step": 6385
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.702534762568097e-06,
|
|
"loss": 0.7172,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7020759525200253e-06,
|
|
"loss": 0.6674,
|
|
"step": 6395
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.701616850835708e-06,
|
|
"loss": 0.6694,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"eval_loss": 0.6586260795593262,
|
|
"eval_runtime": 139.2072,
|
|
"eval_samples_per_second": 16.996,
|
|
"eval_steps_per_second": 2.837,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7011574577058525e-06,
|
|
"loss": 0.7282,
|
|
"step": 6405
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7006977733212867e-06,
|
|
"loss": 0.6736,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.7002377978729596e-06,
|
|
"loss": 0.6672,
|
|
"step": 6415
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6997775315519408e-06,
|
|
"loss": 0.712,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6993169745494209e-06,
|
|
"loss": 0.6949,
|
|
"step": 6425
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6988561270567115e-06,
|
|
"loss": 0.6951,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.698394989265244e-06,
|
|
"loss": 0.7019,
|
|
"step": 6435
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6979335613665717e-06,
|
|
"loss": 0.7143,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6974718435523678e-06,
|
|
"loss": 0.7253,
|
|
"step": 6445
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6970098360144253e-06,
|
|
"loss": 0.7103,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6965475389446586e-06,
|
|
"loss": 0.6953,
|
|
"step": 6455
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6960849525351018e-06,
|
|
"loss": 0.7247,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6956220769779088e-06,
|
|
"loss": 0.7015,
|
|
"step": 6465
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6951589124653547e-06,
|
|
"loss": 0.704,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6946954591898336e-06,
|
|
"loss": 0.7054,
|
|
"step": 6475
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6942317173438604e-06,
|
|
"loss": 0.6704,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.693767687120069e-06,
|
|
"loss": 0.6564,
|
|
"step": 6485
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6933033687112134e-06,
|
|
"loss": 0.7054,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6928387623101681e-06,
|
|
"loss": 0.676,
|
|
"step": 6495
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.692373868109926e-06,
|
|
"loss": 0.7034,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6919086863036003e-06,
|
|
"loss": 0.6781,
|
|
"step": 6505
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6914432170844233e-06,
|
|
"loss": 0.7057,
|
|
"step": 6510
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.690977460645747e-06,
|
|
"loss": 0.6929,
|
|
"step": 6515
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.6905114171810429e-06,
|
|
"loss": 0.6816,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6900450868839009e-06,
|
|
"loss": 0.6845,
|
|
"step": 6525
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6895784699480306e-06,
|
|
"loss": 0.6665,
|
|
"step": 6530
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6891115665672608e-06,
|
|
"loss": 0.6597,
|
|
"step": 6535
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6886443769355393e-06,
|
|
"loss": 0.6623,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.688176901246932e-06,
|
|
"loss": 0.6535,
|
|
"step": 6545
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6877091396956247e-06,
|
|
"loss": 0.6902,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6872410924759215e-06,
|
|
"loss": 0.6447,
|
|
"step": 6555
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.686772759782245e-06,
|
|
"loss": 0.7013,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6863041418091366e-06,
|
|
"loss": 0.7158,
|
|
"step": 6565
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.685835238751256e-06,
|
|
"loss": 0.7109,
|
|
"step": 6570
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6853660508033816e-06,
|
|
"loss": 0.7231,
|
|
"step": 6575
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6848965781604099e-06,
|
|
"loss": 0.7305,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6844268210173556e-06,
|
|
"loss": 0.7013,
|
|
"step": 6585
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6839567795693524e-06,
|
|
"loss": 0.6849,
|
|
"step": 6590
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6834864540116506e-06,
|
|
"loss": 0.7316,
|
|
"step": 6595
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6830158445396196e-06,
|
|
"loss": 0.7193,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6825449513487466e-06,
|
|
"loss": 0.7277,
|
|
"step": 6605
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6820737746346368e-06,
|
|
"loss": 0.7037,
|
|
"step": 6610
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6816023145930123e-06,
|
|
"loss": 0.7078,
|
|
"step": 6615
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.681130571419714e-06,
|
|
"loss": 0.6651,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6806585453106997e-06,
|
|
"loss": 0.6851,
|
|
"step": 6625
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.680186236462045e-06,
|
|
"loss": 0.6877,
|
|
"step": 6630
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6797136450699427e-06,
|
|
"loss": 0.6976,
|
|
"step": 6635
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6792407713307036e-06,
|
|
"loss": 0.7244,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.678767615440755e-06,
|
|
"loss": 0.6734,
|
|
"step": 6645
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6782941775966416e-06,
|
|
"loss": 0.6733,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6778204579950255e-06,
|
|
"loss": 0.6836,
|
|
"step": 6655
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6773464568326859e-06,
|
|
"loss": 0.7101,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6768721743065186e-06,
|
|
"loss": 0.695,
|
|
"step": 6665
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6763976106135366e-06,
|
|
"loss": 0.6774,
|
|
"step": 6670
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6759227659508692e-06,
|
|
"loss": 0.674,
|
|
"step": 6675
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6754476405157631e-06,
|
|
"loss": 0.6887,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.674972234505581e-06,
|
|
"loss": 0.689,
|
|
"step": 6685
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6744965481178026e-06,
|
|
"loss": 0.6877,
|
|
"step": 6690
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6740205815500236e-06,
|
|
"loss": 0.6872,
|
|
"step": 6695
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.673544334999957e-06,
|
|
"loss": 0.6999,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6730678086654306e-06,
|
|
"loss": 0.6983,
|
|
"step": 6705
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6725910027443902e-06,
|
|
"loss": 0.6488,
|
|
"step": 6710
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6721139174348964e-06,
|
|
"loss": 0.6741,
|
|
"step": 6715
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.671636552935126e-06,
|
|
"loss": 0.6809,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6711589094433725e-06,
|
|
"loss": 0.7326,
|
|
"step": 6725
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6706809871580446e-06,
|
|
"loss": 0.6757,
|
|
"step": 6730
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.670202786277667e-06,
|
|
"loss": 0.6515,
|
|
"step": 6735
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6697243070008805e-06,
|
|
"loss": 0.6747,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6692455495264413e-06,
|
|
"loss": 0.6816,
|
|
"step": 6745
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6687665140532209e-06,
|
|
"loss": 0.6918,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6682872007802062e-06,
|
|
"loss": 0.7191,
|
|
"step": 6755
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.6678076099064999e-06,
|
|
"loss": 0.6649,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.66732774163132e-06,
|
|
"loss": 0.6889,
|
|
"step": 6765
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 1.666847596154e-06,
|
|
"loss": 0.6917,
|
|
"step": 6770
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6663671736739874e-06,
|
|
"loss": 0.6717,
|
|
"step": 6775
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.665886474390846e-06,
|
|
"loss": 0.7177,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6654054985042538e-06,
|
|
"loss": 0.6676,
|
|
"step": 6785
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6649242462140044e-06,
|
|
"loss": 0.708,
|
|
"step": 6790
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6644427177200053e-06,
|
|
"loss": 0.6958,
|
|
"step": 6795
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.66396091322228e-06,
|
|
"loss": 0.6862,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.663478832920965e-06,
|
|
"loss": 0.6462,
|
|
"step": 6805
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6629964770163128e-06,
|
|
"loss": 0.6668,
|
|
"step": 6810
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6625138457086897e-06,
|
|
"loss": 0.6926,
|
|
"step": 6815
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6620309391985767e-06,
|
|
"loss": 0.6942,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.661547757686569e-06,
|
|
"loss": 0.6988,
|
|
"step": 6825
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6610643013733756e-06,
|
|
"loss": 0.6548,
|
|
"step": 6830
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6605805704598206e-06,
|
|
"loss": 0.6827,
|
|
"step": 6835
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.660096565146841e-06,
|
|
"loss": 0.6939,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6596122856354885e-06,
|
|
"loss": 0.728,
|
|
"step": 6845
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6591277321269295e-06,
|
|
"loss": 0.6762,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6586429048224422e-06,
|
|
"loss": 0.6697,
|
|
"step": 6855
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6581578039234203e-06,
|
|
"loss": 0.7086,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6576724296313697e-06,
|
|
"loss": 0.7378,
|
|
"step": 6865
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.657186782147912e-06,
|
|
"loss": 0.6861,
|
|
"step": 6870
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6567008616747797e-06,
|
|
"loss": 0.6749,
|
|
"step": 6875
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6562146684138205e-06,
|
|
"loss": 0.67,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.655728202566995e-06,
|
|
"loss": 0.6561,
|
|
"step": 6885
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6552414643363766e-06,
|
|
"loss": 0.7061,
|
|
"step": 6890
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6547544539241516e-06,
|
|
"loss": 0.6779,
|
|
"step": 6895
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6542671715326209e-06,
|
|
"loss": 0.6927,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.653779617364197e-06,
|
|
"loss": 0.6633,
|
|
"step": 6905
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6532917916214055e-06,
|
|
"loss": 0.6755,
|
|
"step": 6910
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6528036945068852e-06,
|
|
"loss": 0.6774,
|
|
"step": 6915
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.652315326223387e-06,
|
|
"loss": 0.7033,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6518266869737754e-06,
|
|
"loss": 0.6823,
|
|
"step": 6925
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6513377769610264e-06,
|
|
"loss": 0.6952,
|
|
"step": 6930
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6508485963882293e-06,
|
|
"loss": 0.676,
|
|
"step": 6935
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.650359145458585e-06,
|
|
"loss": 0.7026,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6498694243754075e-06,
|
|
"loss": 0.6872,
|
|
"step": 6945
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6493794333421228e-06,
|
|
"loss": 0.6846,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6488891725622688e-06,
|
|
"loss": 0.6825,
|
|
"step": 6955
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6483986422394955e-06,
|
|
"loss": 0.723,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6479078425775653e-06,
|
|
"loss": 0.7463,
|
|
"step": 6965
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6474167737803514e-06,
|
|
"loss": 0.7006,
|
|
"step": 6970
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.64692543605184e-06,
|
|
"loss": 0.6745,
|
|
"step": 6975
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6464338295961283e-06,
|
|
"loss": 0.6935,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6459419546174253e-06,
|
|
"loss": 0.7056,
|
|
"step": 6985
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6454498113200521e-06,
|
|
"loss": 0.6763,
|
|
"step": 6990
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6449573999084404e-06,
|
|
"loss": 0.68,
|
|
"step": 6995
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6444647205871332e-06,
|
|
"loss": 0.6595,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6439717735607856e-06,
|
|
"loss": 0.6996,
|
|
"step": 7005
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.643478559034164e-06,
|
|
"loss": 0.7008,
|
|
"step": 7010
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.6429850772121446e-06,
|
|
"loss": 0.6907,
|
|
"step": 7015
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6424913282997159e-06,
|
|
"loss": 0.6627,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.641997312501977e-06,
|
|
"loss": 0.6939,
|
|
"step": 7025
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6415030300241371e-06,
|
|
"loss": 0.7002,
|
|
"step": 7030
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6410084810715177e-06,
|
|
"loss": 0.7098,
|
|
"step": 7035
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6405136658495496e-06,
|
|
"loss": 0.7114,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.640018584563775e-06,
|
|
"loss": 0.6951,
|
|
"step": 7045
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.639523237419846e-06,
|
|
"loss": 0.7112,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6390276246235257e-06,
|
|
"loss": 0.6827,
|
|
"step": 7055
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6385317463806878e-06,
|
|
"loss": 0.7133,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6380356028973152e-06,
|
|
"loss": 0.6766,
|
|
"step": 7065
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6375391943795015e-06,
|
|
"loss": 0.6767,
|
|
"step": 7070
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6370425210334514e-06,
|
|
"loss": 0.6731,
|
|
"step": 7075
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6365455830654775e-06,
|
|
"loss": 0.6974,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6360483806820043e-06,
|
|
"loss": 0.6972,
|
|
"step": 7085
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.635550914089565e-06,
|
|
"loss": 0.6974,
|
|
"step": 7090
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.635053183494803e-06,
|
|
"loss": 0.6649,
|
|
"step": 7095
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6345551891044713e-06,
|
|
"loss": 0.6754,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6340569311254323e-06,
|
|
"loss": 0.6877,
|
|
"step": 7105
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6335584097646585e-06,
|
|
"loss": 0.6841,
|
|
"step": 7110
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6330596252292309e-06,
|
|
"loss": 0.6675,
|
|
"step": 7115
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.63256057772634e-06,
|
|
"loss": 0.6743,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6320612674632864e-06,
|
|
"loss": 0.7045,
|
|
"step": 7125
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.631561694647479e-06,
|
|
"loss": 0.6891,
|
|
"step": 7130
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6310618594864355e-06,
|
|
"loss": 0.6722,
|
|
"step": 7135
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6305617621877841e-06,
|
|
"loss": 0.7012,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6300614029592602e-06,
|
|
"loss": 0.665,
|
|
"step": 7145
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6295607820087084e-06,
|
|
"loss": 0.6967,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6290598995440835e-06,
|
|
"loss": 0.7089,
|
|
"step": 7155
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.628558755773446e-06,
|
|
"loss": 0.7176,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6280573509049679e-06,
|
|
"loss": 0.6847,
|
|
"step": 7165
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6275556851469284e-06,
|
|
"loss": 0.6968,
|
|
"step": 7170
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6270537587077145e-06,
|
|
"loss": 0.6586,
|
|
"step": 7175
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6265515717958222e-06,
|
|
"loss": 0.7039,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6260491246198563e-06,
|
|
"loss": 0.725,
|
|
"step": 7185
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6255464173885275e-06,
|
|
"loss": 0.7166,
|
|
"step": 7190
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6250434503106578e-06,
|
|
"loss": 0.67,
|
|
"step": 7195
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6245402235951742e-06,
|
|
"loss": 0.6697,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"eval_loss": 0.6546275019645691,
|
|
"eval_runtime": 138.8622,
|
|
"eval_samples_per_second": 17.038,
|
|
"eval_steps_per_second": 2.845,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.624036737451113e-06,
|
|
"loss": 0.6907,
|
|
"step": 7205
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.623532992087618e-06,
|
|
"loss": 0.7066,
|
|
"step": 7210
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6230289877139403e-06,
|
|
"loss": 0.7354,
|
|
"step": 7215
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6225247245394393e-06,
|
|
"loss": 0.672,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.622020202773582e-06,
|
|
"loss": 0.676,
|
|
"step": 7225
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6215154226259414e-06,
|
|
"loss": 0.6798,
|
|
"step": 7230
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.621010384306199e-06,
|
|
"loss": 0.689,
|
|
"step": 7235
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.620505088024144e-06,
|
|
"loss": 0.7003,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.619999533989671e-06,
|
|
"loss": 0.6669,
|
|
"step": 7245
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6194937224127837e-06,
|
|
"loss": 0.6849,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6189876535035919e-06,
|
|
"loss": 0.6997,
|
|
"step": 7255
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 1.6184813274723113e-06,
|
|
"loss": 0.7292,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6179747445292659e-06,
|
|
"loss": 0.6555,
|
|
"step": 7265
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6174679048848856e-06,
|
|
"loss": 0.672,
|
|
"step": 7270
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6169608087497077e-06,
|
|
"loss": 0.6803,
|
|
"step": 7275
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6164534563343752e-06,
|
|
"loss": 0.6701,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.615945847849638e-06,
|
|
"loss": 0.6493,
|
|
"step": 7285
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.615437983506352e-06,
|
|
"loss": 0.7276,
|
|
"step": 7290
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6149298635154795e-06,
|
|
"loss": 0.6832,
|
|
"step": 7295
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6144214880880895e-06,
|
|
"loss": 0.6981,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6139128574353568e-06,
|
|
"loss": 0.6715,
|
|
"step": 7305
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.613403971768562e-06,
|
|
"loss": 0.6732,
|
|
"step": 7310
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6128948312990916e-06,
|
|
"loss": 0.6906,
|
|
"step": 7315
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6123854362384384e-06,
|
|
"loss": 0.682,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6118757867982002e-06,
|
|
"loss": 0.701,
|
|
"step": 7325
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6113658831900816e-06,
|
|
"loss": 0.6969,
|
|
"step": 7330
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6108557256258916e-06,
|
|
"loss": 0.6774,
|
|
"step": 7335
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6103453143175458e-06,
|
|
"loss": 0.6849,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6098346494770642e-06,
|
|
"loss": 0.6714,
|
|
"step": 7345
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6093237313165722e-06,
|
|
"loss": 0.6781,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6088125600483014e-06,
|
|
"loss": 0.657,
|
|
"step": 7355
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6083011358845878e-06,
|
|
"loss": 0.6867,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6077894590378722e-06,
|
|
"loss": 0.6977,
|
|
"step": 7365
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.607277529720701e-06,
|
|
"loss": 0.6775,
|
|
"step": 7370
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6067653481457251e-06,
|
|
"loss": 0.6962,
|
|
"step": 7375
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6062529145257e-06,
|
|
"loss": 0.6801,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6057402290734867e-06,
|
|
"loss": 0.6917,
|
|
"step": 7385
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6052272920020502e-06,
|
|
"loss": 0.6858,
|
|
"step": 7390
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6047141035244596e-06,
|
|
"loss": 0.689,
|
|
"step": 7395
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6042006638538893e-06,
|
|
"loss": 0.684,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6036869732036175e-06,
|
|
"loss": 0.6685,
|
|
"step": 7405
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.603173031787027e-06,
|
|
"loss": 0.7001,
|
|
"step": 7410
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.602658839817605e-06,
|
|
"loss": 0.6947,
|
|
"step": 7415
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6021443975089415e-06,
|
|
"loss": 0.6384,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.601629705074732e-06,
|
|
"loss": 0.6828,
|
|
"step": 7425
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6011147627287746e-06,
|
|
"loss": 0.6856,
|
|
"step": 7430
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6005995706849726e-06,
|
|
"loss": 0.6655,
|
|
"step": 7435
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.6000841291573322e-06,
|
|
"loss": 0.7021,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.599568438359963e-06,
|
|
"loss": 0.6889,
|
|
"step": 7445
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.5990524985070785e-06,
|
|
"loss": 0.6917,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.598536309812996e-06,
|
|
"loss": 0.7008,
|
|
"step": 7455
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.598019872492135e-06,
|
|
"loss": 0.6614,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.59750318675902e-06,
|
|
"loss": 0.6887,
|
|
"step": 7465
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.5969862528282771e-06,
|
|
"loss": 0.6733,
|
|
"step": 7470
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.5964690709146367e-06,
|
|
"loss": 0.6935,
|
|
"step": 7475
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.5959516412329314e-06,
|
|
"loss": 0.6731,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.5954339639980967e-06,
|
|
"loss": 0.6846,
|
|
"step": 7485
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.5949160394251718e-06,
|
|
"loss": 0.7281,
|
|
"step": 7490
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.5943978677292976e-06,
|
|
"loss": 0.7238,
|
|
"step": 7495
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.593879449125718e-06,
|
|
"loss": 0.6682,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.5933607838297804e-06,
|
|
"loss": 0.6864,
|
|
"step": 7505
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5928418720569332e-06,
|
|
"loss": 0.6775,
|
|
"step": 7510
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5923227140227278e-06,
|
|
"loss": 0.7329,
|
|
"step": 7515
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5918033099428182e-06,
|
|
"loss": 0.6964,
|
|
"step": 7520
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.59128366003296e-06,
|
|
"loss": 0.6862,
|
|
"step": 7525
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5907637645090117e-06,
|
|
"loss": 0.6552,
|
|
"step": 7530
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5902436235869333e-06,
|
|
"loss": 0.6738,
|
|
"step": 7535
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5897232374827862e-06,
|
|
"loss": 0.7062,
|
|
"step": 7540
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.589202606412735e-06,
|
|
"loss": 0.6975,
|
|
"step": 7545
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5886817305930452e-06,
|
|
"loss": 0.6795,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5881606102400836e-06,
|
|
"loss": 0.6633,
|
|
"step": 7555
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5876392455703198e-06,
|
|
"loss": 0.6959,
|
|
"step": 7560
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5871176368003231e-06,
|
|
"loss": 0.6719,
|
|
"step": 7565
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5865957841467666e-06,
|
|
"loss": 0.7142,
|
|
"step": 7570
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5860736878264222e-06,
|
|
"loss": 0.6729,
|
|
"step": 7575
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5855513480561649e-06,
|
|
"loss": 0.6777,
|
|
"step": 7580
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5850287650529698e-06,
|
|
"loss": 0.6729,
|
|
"step": 7585
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5845059390339134e-06,
|
|
"loss": 0.7104,
|
|
"step": 7590
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5839828702161727e-06,
|
|
"loss": 0.6844,
|
|
"step": 7595
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5834595588170266e-06,
|
|
"loss": 0.6837,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5829360050538537e-06,
|
|
"loss": 0.7076,
|
|
"step": 7605
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5824122091441337e-06,
|
|
"loss": 0.6906,
|
|
"step": 7610
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5818881713054469e-06,
|
|
"loss": 0.7267,
|
|
"step": 7615
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5813638917554742e-06,
|
|
"loss": 0.7293,
|
|
"step": 7620
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5808393707119967e-06,
|
|
"loss": 0.693,
|
|
"step": 7625
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5803146083928956e-06,
|
|
"loss": 0.7122,
|
|
"step": 7630
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.579789605016153e-06,
|
|
"loss": 0.6895,
|
|
"step": 7635
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5792643607998506e-06,
|
|
"loss": 0.6916,
|
|
"step": 7640
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.57873887596217e-06,
|
|
"loss": 0.6745,
|
|
"step": 7645
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5782131507213934e-06,
|
|
"loss": 0.7242,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5776871852959026e-06,
|
|
"loss": 0.714,
|
|
"step": 7655
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5771609799041788e-06,
|
|
"loss": 0.7086,
|
|
"step": 7660
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.576634534764803e-06,
|
|
"loss": 0.738,
|
|
"step": 7665
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5761078500964562e-06,
|
|
"loss": 0.669,
|
|
"step": 7670
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5755809261179185e-06,
|
|
"loss": 0.6587,
|
|
"step": 7675
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5750537630480696e-06,
|
|
"loss": 0.676,
|
|
"step": 7680
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5745263611058886e-06,
|
|
"loss": 0.6838,
|
|
"step": 7685
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5739987205104535e-06,
|
|
"loss": 0.6182,
|
|
"step": 7690
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5734708414809415e-06,
|
|
"loss": 0.6497,
|
|
"step": 7695
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.572942724236629e-06,
|
|
"loss": 0.6681,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5724143689968915e-06,
|
|
"loss": 0.7043,
|
|
"step": 7705
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5718857759812033e-06,
|
|
"loss": 0.6924,
|
|
"step": 7710
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.571356945409137e-06,
|
|
"loss": 0.6852,
|
|
"step": 7715
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.570827877500364e-06,
|
|
"loss": 0.7008,
|
|
"step": 7720
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5702985724746552e-06,
|
|
"loss": 0.7029,
|
|
"step": 7725
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5697690305518787e-06,
|
|
"loss": 0.6684,
|
|
"step": 7730
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.569239251952002e-06,
|
|
"loss": 0.7089,
|
|
"step": 7735
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5687092368950908e-06,
|
|
"loss": 0.6756,
|
|
"step": 7740
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5681789856013076e-06,
|
|
"loss": 0.6874,
|
|
"step": 7745
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5676484982909154e-06,
|
|
"loss": 0.7055,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 1.5671177751842733e-06,
|
|
"loss": 0.7159,
|
|
"step": 7755
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5665868165018395e-06,
|
|
"loss": 0.6968,
|
|
"step": 7760
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.566055622464169e-06,
|
|
"loss": 0.6533,
|
|
"step": 7765
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.565524193291916e-06,
|
|
"loss": 0.6589,
|
|
"step": 7770
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.564992529205831e-06,
|
|
"loss": 0.7165,
|
|
"step": 7775
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5644606304267627e-06,
|
|
"loss": 0.6588,
|
|
"step": 7780
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5639284971756574e-06,
|
|
"loss": 0.7004,
|
|
"step": 7785
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5633961296735585e-06,
|
|
"loss": 0.7019,
|
|
"step": 7790
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.562863528141607e-06,
|
|
"loss": 0.6711,
|
|
"step": 7795
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5623306928010408e-06,
|
|
"loss": 0.6558,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.561797623873195e-06,
|
|
"loss": 0.7162,
|
|
"step": 7805
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5612643215795017e-06,
|
|
"loss": 0.6986,
|
|
"step": 7810
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5607307861414905e-06,
|
|
"loss": 0.717,
|
|
"step": 7815
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.560197017780787e-06,
|
|
"loss": 0.7249,
|
|
"step": 7820
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5596630167191138e-06,
|
|
"loss": 0.6965,
|
|
"step": 7825
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5591287831782908e-06,
|
|
"loss": 0.679,
|
|
"step": 7830
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5585943173802333e-06,
|
|
"loss": 0.6812,
|
|
"step": 7835
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5580596195469547e-06,
|
|
"loss": 0.6957,
|
|
"step": 7840
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5575246899005629e-06,
|
|
"loss": 0.6792,
|
|
"step": 7845
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5569895286632634e-06,
|
|
"loss": 0.6675,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5564541360573578e-06,
|
|
"loss": 0.6931,
|
|
"step": 7855
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5559185123052427e-06,
|
|
"loss": 0.7044,
|
|
"step": 7860
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5553826576294127e-06,
|
|
"loss": 0.6978,
|
|
"step": 7865
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5548465722524561e-06,
|
|
"loss": 0.693,
|
|
"step": 7870
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.554310256397059e-06,
|
|
"loss": 0.7044,
|
|
"step": 7875
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5537737102860015e-06,
|
|
"loss": 0.6841,
|
|
"step": 7880
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5532369341421609e-06,
|
|
"loss": 0.674,
|
|
"step": 7885
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5526999281885088e-06,
|
|
"loss": 0.6544,
|
|
"step": 7890
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.552162692648113e-06,
|
|
"loss": 0.6951,
|
|
"step": 7895
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.551625227744137e-06,
|
|
"loss": 0.6969,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5510875336998382e-06,
|
|
"loss": 0.6827,
|
|
"step": 7905
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5505496107385704e-06,
|
|
"loss": 0.6296,
|
|
"step": 7910
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.550011459083782e-06,
|
|
"loss": 0.6884,
|
|
"step": 7915
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.549473078959017e-06,
|
|
"loss": 0.6726,
|
|
"step": 7920
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.548934470587913e-06,
|
|
"loss": 0.6732,
|
|
"step": 7925
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.548395634194204e-06,
|
|
"loss": 0.6986,
|
|
"step": 7930
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5478565700017174e-06,
|
|
"loss": 0.7457,
|
|
"step": 7935
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.547317278234376e-06,
|
|
"loss": 0.713,
|
|
"step": 7940
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5467777591161973e-06,
|
|
"loss": 0.6842,
|
|
"step": 7945
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5462380128712921e-06,
|
|
"loss": 0.7125,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.545698039723867e-06,
|
|
"loss": 0.7007,
|
|
"step": 7955
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5451578398982216e-06,
|
|
"loss": 0.6896,
|
|
"step": 7960
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5446174136187503e-06,
|
|
"loss": 0.7371,
|
|
"step": 7965
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.544076761109942e-06,
|
|
"loss": 0.6922,
|
|
"step": 7970
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5435358825963784e-06,
|
|
"loss": 0.7154,
|
|
"step": 7975
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.542994778302736e-06,
|
|
"loss": 0.7194,
|
|
"step": 7980
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5424534484537847e-06,
|
|
"loss": 0.6813,
|
|
"step": 7985
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5419118932743883e-06,
|
|
"loss": 0.7203,
|
|
"step": 7990
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5413701129895045e-06,
|
|
"loss": 0.6675,
|
|
"step": 7995
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.5408281078241835e-06,
|
|
"loss": 0.7152,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_loss": 0.6539024114608765,
|
|
"eval_runtime": 138.566,
|
|
"eval_samples_per_second": 17.075,
|
|
"eval_steps_per_second": 2.851,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5402858780035697e-06,
|
|
"loss": 0.6859,
|
|
"step": 8005
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5397434237529012e-06,
|
|
"loss": 0.7315,
|
|
"step": 8010
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5392007452975077e-06,
|
|
"loss": 0.7081,
|
|
"step": 8015
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5386578428628142e-06,
|
|
"loss": 0.682,
|
|
"step": 8020
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5381147166743369e-06,
|
|
"loss": 0.6741,
|
|
"step": 8025
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5375713669576857e-06,
|
|
"loss": 0.6979,
|
|
"step": 8030
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5370277939385644e-06,
|
|
"loss": 0.6927,
|
|
"step": 8035
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.536483997842767e-06,
|
|
"loss": 0.6856,
|
|
"step": 8040
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5359399788961826e-06,
|
|
"loss": 0.6683,
|
|
"step": 8045
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5353957373247917e-06,
|
|
"loss": 0.6646,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5348512733546674e-06,
|
|
"loss": 0.6684,
|
|
"step": 8055
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5343065872119759e-06,
|
|
"loss": 0.6741,
|
|
"step": 8060
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5337616791229744e-06,
|
|
"loss": 0.6936,
|
|
"step": 8065
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5332165493140133e-06,
|
|
"loss": 0.6646,
|
|
"step": 8070
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5326711980115343e-06,
|
|
"loss": 0.6936,
|
|
"step": 8075
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5321256254420724e-06,
|
|
"loss": 0.7009,
|
|
"step": 8080
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5315798318322532e-06,
|
|
"loss": 0.7183,
|
|
"step": 8085
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5310338174087946e-06,
|
|
"loss": 0.6853,
|
|
"step": 8090
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5304875823985066e-06,
|
|
"loss": 0.6851,
|
|
"step": 8095
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5299411270282898e-06,
|
|
"loss": 0.6607,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5293944515251376e-06,
|
|
"loss": 0.6715,
|
|
"step": 8105
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5288475561161342e-06,
|
|
"loss": 0.6525,
|
|
"step": 8110
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5283004410284549e-06,
|
|
"loss": 0.6958,
|
|
"step": 8115
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5277531064893669e-06,
|
|
"loss": 0.7071,
|
|
"step": 8120
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5272055527262278e-06,
|
|
"loss": 0.683,
|
|
"step": 8125
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.526657779966487e-06,
|
|
"loss": 0.71,
|
|
"step": 8130
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5261097884376848e-06,
|
|
"loss": 0.683,
|
|
"step": 8135
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5255615783674512e-06,
|
|
"loss": 0.6911,
|
|
"step": 8140
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5250131499835088e-06,
|
|
"loss": 0.7089,
|
|
"step": 8145
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5244645035136694e-06,
|
|
"loss": 0.6528,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5239156391858363e-06,
|
|
"loss": 0.6626,
|
|
"step": 8155
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.523366557228003e-06,
|
|
"loss": 0.6674,
|
|
"step": 8160
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5228172578682531e-06,
|
|
"loss": 0.6858,
|
|
"step": 8165
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5222677413347612e-06,
|
|
"loss": 0.6944,
|
|
"step": 8170
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.521718007855791e-06,
|
|
"loss": 0.6623,
|
|
"step": 8175
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5211680576596976e-06,
|
|
"loss": 0.6764,
|
|
"step": 8180
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5206178909749254e-06,
|
|
"loss": 0.7022,
|
|
"step": 8185
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5200675080300086e-06,
|
|
"loss": 0.6437,
|
|
"step": 8190
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.519516909053572e-06,
|
|
"loss": 0.6888,
|
|
"step": 8195
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.518966094274329e-06,
|
|
"loss": 0.6837,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.518415063921084e-06,
|
|
"loss": 0.7228,
|
|
"step": 8205
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5178638182227292e-06,
|
|
"loss": 0.6647,
|
|
"step": 8210
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5173123574082482e-06,
|
|
"loss": 0.6992,
|
|
"step": 8215
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5167606817067129e-06,
|
|
"loss": 0.6828,
|
|
"step": 8220
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5162087913472844e-06,
|
|
"loss": 0.7303,
|
|
"step": 8225
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5156566865592128e-06,
|
|
"loss": 0.6794,
|
|
"step": 8230
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5151043675718383e-06,
|
|
"loss": 0.6442,
|
|
"step": 8235
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5145518346145887e-06,
|
|
"loss": 0.6669,
|
|
"step": 8240
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.5139990879169822e-06,
|
|
"loss": 0.6902,
|
|
"step": 8245
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.513446127708624e-06,
|
|
"loss": 0.6503,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5128929542192102e-06,
|
|
"loss": 0.6971,
|
|
"step": 8255
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.512339567678523e-06,
|
|
"loss": 0.7052,
|
|
"step": 8260
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.511785968316435e-06,
|
|
"loss": 0.6673,
|
|
"step": 8265
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5112321563629066e-06,
|
|
"loss": 0.6816,
|
|
"step": 8270
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5106781320479862e-06,
|
|
"loss": 0.6708,
|
|
"step": 8275
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5101238956018109e-06,
|
|
"loss": 0.7032,
|
|
"step": 8280
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5095694472546058e-06,
|
|
"loss": 0.7008,
|
|
"step": 8285
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5090147872366835e-06,
|
|
"loss": 0.719,
|
|
"step": 8290
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5084599157784453e-06,
|
|
"loss": 0.7094,
|
|
"step": 8295
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5079048331103797e-06,
|
|
"loss": 0.7306,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5073495394630636e-06,
|
|
"loss": 0.7004,
|
|
"step": 8305
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5067940350671606e-06,
|
|
"loss": 0.7107,
|
|
"step": 8310
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.506238320153423e-06,
|
|
"loss": 0.7033,
|
|
"step": 8315
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5056823949526898e-06,
|
|
"loss": 0.6855,
|
|
"step": 8320
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5051262596958872e-06,
|
|
"loss": 0.7119,
|
|
"step": 8325
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5045699146140289e-06,
|
|
"loss": 0.6853,
|
|
"step": 8330
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5040133599382162e-06,
|
|
"loss": 0.71,
|
|
"step": 8335
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.503456595899637e-06,
|
|
"loss": 0.6527,
|
|
"step": 8340
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5028996227295664e-06,
|
|
"loss": 0.6692,
|
|
"step": 8345
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5023424406593654e-06,
|
|
"loss": 0.6568,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5017850499204835e-06,
|
|
"loss": 0.658,
|
|
"step": 8355
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.501227450744455e-06,
|
|
"loss": 0.6895,
|
|
"step": 8360
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5006696433629032e-06,
|
|
"loss": 0.7062,
|
|
"step": 8365
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.5001116280075353e-06,
|
|
"loss": 0.6883,
|
|
"step": 8370
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.499553404910146e-06,
|
|
"loss": 0.666,
|
|
"step": 8375
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4989949743026169e-06,
|
|
"loss": 0.709,
|
|
"step": 8380
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4984363364169145e-06,
|
|
"loss": 0.7225,
|
|
"step": 8385
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4978774914850933e-06,
|
|
"loss": 0.6657,
|
|
"step": 8390
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4973184397392915e-06,
|
|
"loss": 0.6774,
|
|
"step": 8395
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4967591814117347e-06,
|
|
"loss": 0.6617,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.496199716734734e-06,
|
|
"loss": 0.669,
|
|
"step": 8405
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4956400459406862e-06,
|
|
"loss": 0.6798,
|
|
"step": 8410
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4950801692620735e-06,
|
|
"loss": 0.6421,
|
|
"step": 8415
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.494520086931464e-06,
|
|
"loss": 0.6742,
|
|
"step": 8420
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4939597991815107e-06,
|
|
"loss": 0.6478,
|
|
"step": 8425
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.493399306244953e-06,
|
|
"loss": 0.6817,
|
|
"step": 8430
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.492838608354614e-06,
|
|
"loss": 0.7469,
|
|
"step": 8435
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4922777057434031e-06,
|
|
"loss": 0.6713,
|
|
"step": 8440
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4917165986443142e-06,
|
|
"loss": 0.7093,
|
|
"step": 8445
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4911552872904266e-06,
|
|
"loss": 0.6825,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4905937719149035e-06,
|
|
"loss": 0.664,
|
|
"step": 8455
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4900320527509942e-06,
|
|
"loss": 0.6827,
|
|
"step": 8460
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.489470130032032e-06,
|
|
"loss": 0.7093,
|
|
"step": 8465
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.488908003991434e-06,
|
|
"loss": 0.7162,
|
|
"step": 8470
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4883456748627032e-06,
|
|
"loss": 0.6738,
|
|
"step": 8475
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4877831428794258e-06,
|
|
"loss": 0.6977,
|
|
"step": 8480
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4872204082752728e-06,
|
|
"loss": 0.7097,
|
|
"step": 8485
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.4866574712839994e-06,
|
|
"loss": 0.6716,
|
|
"step": 8490
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4860943321394443e-06,
|
|
"loss": 0.6891,
|
|
"step": 8495
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4855309910755313e-06,
|
|
"loss": 0.6471,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4849674483262668e-06,
|
|
"loss": 0.6544,
|
|
"step": 8505
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4844037041257416e-06,
|
|
"loss": 0.6554,
|
|
"step": 8510
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4838397587081307e-06,
|
|
"loss": 0.694,
|
|
"step": 8515
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4832756123076912e-06,
|
|
"loss": 0.6488,
|
|
"step": 8520
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4827112651587656e-06,
|
|
"loss": 0.6885,
|
|
"step": 8525
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.482146717495778e-06,
|
|
"loss": 0.6827,
|
|
"step": 8530
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.481581969553237e-06,
|
|
"loss": 0.7017,
|
|
"step": 8535
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.481017021565734e-06,
|
|
"loss": 0.681,
|
|
"step": 8540
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4804518737679432e-06,
|
|
"loss": 0.6228,
|
|
"step": 8545
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4798865263946223e-06,
|
|
"loss": 0.7208,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4793209796806117e-06,
|
|
"loss": 0.6627,
|
|
"step": 8555
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4787552338608341e-06,
|
|
"loss": 0.6698,
|
|
"step": 8560
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4781892891702965e-06,
|
|
"loss": 0.6631,
|
|
"step": 8565
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4776231458440862e-06,
|
|
"loss": 0.6991,
|
|
"step": 8570
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.477056804117375e-06,
|
|
"loss": 0.65,
|
|
"step": 8575
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.476490264225416e-06,
|
|
"loss": 0.6846,
|
|
"step": 8580
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.475923526403545e-06,
|
|
"loss": 0.6916,
|
|
"step": 8585
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.47535659088718e-06,
|
|
"loss": 0.654,
|
|
"step": 8590
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4747894579118208e-06,
|
|
"loss": 0.7077,
|
|
"step": 8595
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.47422212771305e-06,
|
|
"loss": 0.6887,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4736546005265314e-06,
|
|
"loss": 0.6919,
|
|
"step": 8605
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4730868765880109e-06,
|
|
"loss": 0.6471,
|
|
"step": 8610
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4725189561333158e-06,
|
|
"loss": 0.706,
|
|
"step": 8615
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4719508393983555e-06,
|
|
"loss": 0.6916,
|
|
"step": 8620
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.471382526619121e-06,
|
|
"loss": 0.6624,
|
|
"step": 8625
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4708140180316843e-06,
|
|
"loss": 0.6888,
|
|
"step": 8630
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.470245313872199e-06,
|
|
"loss": 0.6796,
|
|
"step": 8635
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4696764143768997e-06,
|
|
"loss": 0.6776,
|
|
"step": 8640
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.469107319782102e-06,
|
|
"loss": 0.7217,
|
|
"step": 8645
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4685380303242037e-06,
|
|
"loss": 0.6858,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4679685462396817e-06,
|
|
"loss": 0.6937,
|
|
"step": 8655
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.467398867765096e-06,
|
|
"loss": 0.6747,
|
|
"step": 8660
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4668289951370848e-06,
|
|
"loss": 0.6699,
|
|
"step": 8665
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4662589285923686e-06,
|
|
"loss": 0.7111,
|
|
"step": 8670
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4656886683677486e-06,
|
|
"loss": 0.667,
|
|
"step": 8675
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4651182147001055e-06,
|
|
"loss": 0.6586,
|
|
"step": 8680
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.464547567826401e-06,
|
|
"loss": 0.6996,
|
|
"step": 8685
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4639767279836766e-06,
|
|
"loss": 0.7181,
|
|
"step": 8690
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.463405695409054e-06,
|
|
"loss": 0.6878,
|
|
"step": 8695
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.462834470339736e-06,
|
|
"loss": 0.6247,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4622630530130037e-06,
|
|
"loss": 0.7024,
|
|
"step": 8705
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4616914436662195e-06,
|
|
"loss": 0.6748,
|
|
"step": 8710
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4611196425368247e-06,
|
|
"loss": 0.6877,
|
|
"step": 8715
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.46054764986234e-06,
|
|
"loss": 0.663,
|
|
"step": 8720
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4599754658803671e-06,
|
|
"loss": 0.6649,
|
|
"step": 8725
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4594030908285858e-06,
|
|
"loss": 0.6701,
|
|
"step": 8730
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.4588305249447557e-06,
|
|
"loss": 0.6684,
|
|
"step": 8735
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4582577684667156e-06,
|
|
"loss": 0.7104,
|
|
"step": 8740
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.457684821632384e-06,
|
|
"loss": 0.7183,
|
|
"step": 8745
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.457111684679757e-06,
|
|
"loss": 0.7016,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4565383578469119e-06,
|
|
"loss": 0.6959,
|
|
"step": 8755
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4559648413720033e-06,
|
|
"loss": 0.7299,
|
|
"step": 8760
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4553911354932646e-06,
|
|
"loss": 0.6539,
|
|
"step": 8765
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4548172404490089e-06,
|
|
"loss": 0.685,
|
|
"step": 8770
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4542431564776265e-06,
|
|
"loss": 0.7004,
|
|
"step": 8775
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.453668883817587e-06,
|
|
"loss": 0.6742,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.453094422707439e-06,
|
|
"loss": 0.6744,
|
|
"step": 8785
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4525197733858077e-06,
|
|
"loss": 0.6871,
|
|
"step": 8790
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.451944936091398e-06,
|
|
"loss": 0.6567,
|
|
"step": 8795
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4513699110629921e-06,
|
|
"loss": 0.6838,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"eval_loss": 0.6491459012031555,
|
|
"eval_runtime": 138.8275,
|
|
"eval_samples_per_second": 17.043,
|
|
"eval_steps_per_second": 2.845,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.45079469853945e-06,
|
|
"loss": 0.6431,
|
|
"step": 8805
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4502192987597113e-06,
|
|
"loss": 0.6606,
|
|
"step": 8810
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4496437119627905e-06,
|
|
"loss": 0.6917,
|
|
"step": 8815
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4490679383877825e-06,
|
|
"loss": 0.6826,
|
|
"step": 8820
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4484919782738581e-06,
|
|
"loss": 0.6904,
|
|
"step": 8825
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4479158318602658e-06,
|
|
"loss": 0.6781,
|
|
"step": 8830
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4473394993863325e-06,
|
|
"loss": 0.7099,
|
|
"step": 8835
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4467629810914615e-06,
|
|
"loss": 0.6993,
|
|
"step": 8840
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4461862772151333e-06,
|
|
"loss": 0.6606,
|
|
"step": 8845
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4456093879969057e-06,
|
|
"loss": 0.7129,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4450323136764136e-06,
|
|
"loss": 0.6789,
|
|
"step": 8855
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4444550544933684e-06,
|
|
"loss": 0.6828,
|
|
"step": 8860
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.443877610687559e-06,
|
|
"loss": 0.701,
|
|
"step": 8865
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4432999824988503e-06,
|
|
"loss": 0.6962,
|
|
"step": 8870
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.442722170167184e-06,
|
|
"loss": 0.6399,
|
|
"step": 8875
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4421441739325782e-06,
|
|
"loss": 0.6943,
|
|
"step": 8880
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4415659940351275e-06,
|
|
"loss": 0.6932,
|
|
"step": 8885
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.440987630715003e-06,
|
|
"loss": 0.7096,
|
|
"step": 8890
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4404090842124519e-06,
|
|
"loss": 0.6652,
|
|
"step": 8895
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.439830354767797e-06,
|
|
"loss": 0.6988,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4392514426214378e-06,
|
|
"loss": 0.6862,
|
|
"step": 8905
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4386723480138491e-06,
|
|
"loss": 0.7313,
|
|
"step": 8910
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.438093071185582e-06,
|
|
"loss": 0.6876,
|
|
"step": 8915
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.437513612377263e-06,
|
|
"loss": 0.664,
|
|
"step": 8920
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.436933971829594e-06,
|
|
"loss": 0.6918,
|
|
"step": 8925
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4363541497833534e-06,
|
|
"loss": 0.7093,
|
|
"step": 8930
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4357741464793932e-06,
|
|
"loss": 0.7008,
|
|
"step": 8935
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4351939621586424e-06,
|
|
"loss": 0.6459,
|
|
"step": 8940
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4346135970621045e-06,
|
|
"loss": 0.7083,
|
|
"step": 8945
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4340330514308576e-06,
|
|
"loss": 0.6674,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4334523255060563e-06,
|
|
"loss": 0.6906,
|
|
"step": 8955
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.432871419528928e-06,
|
|
"loss": 0.6976,
|
|
"step": 8960
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.432290333740776e-06,
|
|
"loss": 0.6487,
|
|
"step": 8965
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4317090683829797e-06,
|
|
"loss": 0.6716,
|
|
"step": 8970
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.43112762369699e-06,
|
|
"loss": 0.6578,
|
|
"step": 8975
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.4305459999243353e-06,
|
|
"loss": 0.6437,
|
|
"step": 8980
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 1.429964197306616e-06,
|
|
"loss": 0.6527,
|
|
"step": 8985
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4293822160855083e-06,
|
|
"loss": 0.7384,
|
|
"step": 8990
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4288000565027623e-06,
|
|
"loss": 0.6817,
|
|
"step": 8995
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4282177188002016e-06,
|
|
"loss": 0.6805,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.427635203219725e-06,
|
|
"loss": 0.6908,
|
|
"step": 9005
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4270525100033036e-06,
|
|
"loss": 0.6745,
|
|
"step": 9010
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4264696393929832e-06,
|
|
"loss": 0.6801,
|
|
"step": 9015
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4258865916308834e-06,
|
|
"loss": 0.6856,
|
|
"step": 9020
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4253033669591971e-06,
|
|
"loss": 0.6676,
|
|
"step": 9025
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.424719965620191e-06,
|
|
"loss": 0.6524,
|
|
"step": 9030
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4241363878562046e-06,
|
|
"loss": 0.6867,
|
|
"step": 9035
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4235526339096514e-06,
|
|
"loss": 0.6674,
|
|
"step": 9040
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.422968704023017e-06,
|
|
"loss": 0.659,
|
|
"step": 9045
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4223845984388613e-06,
|
|
"loss": 0.6706,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.421800317399817e-06,
|
|
"loss": 0.6867,
|
|
"step": 9055
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.421215861148589e-06,
|
|
"loss": 0.7136,
|
|
"step": 9060
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.420631229927955e-06,
|
|
"loss": 0.7003,
|
|
"step": 9065
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4200464239807664e-06,
|
|
"loss": 0.6764,
|
|
"step": 9070
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4194614435499458e-06,
|
|
"loss": 0.6954,
|
|
"step": 9075
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4188762888784897e-06,
|
|
"loss": 0.6641,
|
|
"step": 9080
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4182909602094662e-06,
|
|
"loss": 0.6822,
|
|
"step": 9085
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.417705457786015e-06,
|
|
"loss": 0.7138,
|
|
"step": 9090
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4171197818513495e-06,
|
|
"loss": 0.7152,
|
|
"step": 9095
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4165339326487535e-06,
|
|
"loss": 0.6324,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4159479104215846e-06,
|
|
"loss": 0.6677,
|
|
"step": 9105
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4153617154132712e-06,
|
|
"loss": 0.6521,
|
|
"step": 9110
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.414775347867313e-06,
|
|
"loss": 0.7191,
|
|
"step": 9115
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4141888080272825e-06,
|
|
"loss": 0.6545,
|
|
"step": 9120
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4136020961368228e-06,
|
|
"loss": 0.7033,
|
|
"step": 9125
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4130152124396497e-06,
|
|
"loss": 0.6917,
|
|
"step": 9130
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.412428157179549e-06,
|
|
"loss": 0.7083,
|
|
"step": 9135
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4118409306003784e-06,
|
|
"loss": 0.6786,
|
|
"step": 9140
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4112535329460671e-06,
|
|
"loss": 0.677,
|
|
"step": 9145
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4106659644606139e-06,
|
|
"loss": 0.6903,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.410078225388091e-06,
|
|
"loss": 0.659,
|
|
"step": 9155
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.409490315972639e-06,
|
|
"loss": 0.7028,
|
|
"step": 9160
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4089022364584712e-06,
|
|
"loss": 0.6607,
|
|
"step": 9165
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4083139870898706e-06,
|
|
"loss": 0.6595,
|
|
"step": 9170
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4077255681111903e-06,
|
|
"loss": 0.6557,
|
|
"step": 9175
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4071369797668545e-06,
|
|
"loss": 0.659,
|
|
"step": 9180
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4065482223013585e-06,
|
|
"loss": 0.6879,
|
|
"step": 9185
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.405959295959266e-06,
|
|
"loss": 0.6708,
|
|
"step": 9190
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4053702009852128e-06,
|
|
"loss": 0.6839,
|
|
"step": 9195
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4047809376239034e-06,
|
|
"loss": 0.6835,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4041915061201122e-06,
|
|
"loss": 0.7229,
|
|
"step": 9205
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4036019067186843e-06,
|
|
"loss": 0.6944,
|
|
"step": 9210
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4030121396645349e-06,
|
|
"loss": 0.6232,
|
|
"step": 9215
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.402422205202647e-06,
|
|
"loss": 0.7058,
|
|
"step": 9220
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4018321035780747e-06,
|
|
"loss": 0.6815,
|
|
"step": 9225
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.4012418350359414e-06,
|
|
"loss": 0.6449,
|
|
"step": 9230
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.4006513998214386e-06,
|
|
"loss": 0.6611,
|
|
"step": 9235
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.4000607981798292e-06,
|
|
"loss": 0.6489,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3994700303564431e-06,
|
|
"loss": 0.6866,
|
|
"step": 9245
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3988790965966801e-06,
|
|
"loss": 0.6553,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3982879971460096e-06,
|
|
"loss": 0.6936,
|
|
"step": 9255
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3976967322499683e-06,
|
|
"loss": 0.6741,
|
|
"step": 9260
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3971053021541633e-06,
|
|
"loss": 0.6491,
|
|
"step": 9265
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.396513707104269e-06,
|
|
"loss": 0.6819,
|
|
"step": 9270
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.395921947346029e-06,
|
|
"loss": 0.6472,
|
|
"step": 9275
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3953300231252554e-06,
|
|
"loss": 0.6588,
|
|
"step": 9280
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.394737934687828e-06,
|
|
"loss": 0.6795,
|
|
"step": 9285
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3941456822796954e-06,
|
|
"loss": 0.6876,
|
|
"step": 9290
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.393553266146874e-06,
|
|
"loss": 0.6439,
|
|
"step": 9295
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3929606865354484e-06,
|
|
"loss": 0.6678,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3923679436915711e-06,
|
|
"loss": 0.6646,
|
|
"step": 9305
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3917750378614619e-06,
|
|
"loss": 0.6721,
|
|
"step": 9310
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.391181969291409e-06,
|
|
"loss": 0.6741,
|
|
"step": 9315
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3905887382277683e-06,
|
|
"loss": 0.6298,
|
|
"step": 9320
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3899953449169622e-06,
|
|
"loss": 0.6536,
|
|
"step": 9325
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3894017896054814e-06,
|
|
"loss": 0.7037,
|
|
"step": 9330
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3888080725398836e-06,
|
|
"loss": 0.6777,
|
|
"step": 9335
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3882141939667937e-06,
|
|
"loss": 0.6805,
|
|
"step": 9340
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3876201541329038e-06,
|
|
"loss": 0.666,
|
|
"step": 9345
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3870259532849726e-06,
|
|
"loss": 0.6619,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3864315916698264e-06,
|
|
"loss": 0.7109,
|
|
"step": 9355
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3858370695343573e-06,
|
|
"loss": 0.6914,
|
|
"step": 9360
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3852423871255252e-06,
|
|
"loss": 0.6847,
|
|
"step": 9365
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3846475446903555e-06,
|
|
"loss": 0.6819,
|
|
"step": 9370
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.384052542475941e-06,
|
|
"loss": 0.7004,
|
|
"step": 9375
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3834573807294404e-06,
|
|
"loss": 0.7206,
|
|
"step": 9380
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.382862059698078e-06,
|
|
"loss": 0.7003,
|
|
"step": 9385
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3822665796291459e-06,
|
|
"loss": 0.686,
|
|
"step": 9390
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3816709407700007e-06,
|
|
"loss": 0.682,
|
|
"step": 9395
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3810751433680662e-06,
|
|
"loss": 0.6414,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.380479187670831e-06,
|
|
"loss": 0.6494,
|
|
"step": 9405
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.37988307392585e-06,
|
|
"loss": 0.6997,
|
|
"step": 9410
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3792868023807441e-06,
|
|
"loss": 0.6835,
|
|
"step": 9415
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3786903732831982e-06,
|
|
"loss": 0.6819,
|
|
"step": 9420
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.378093786880965e-06,
|
|
"loss": 0.6568,
|
|
"step": 9425
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3774970434218609e-06,
|
|
"loss": 0.6811,
|
|
"step": 9430
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3769001431537675e-06,
|
|
"loss": 0.6692,
|
|
"step": 9435
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3763030863246326e-06,
|
|
"loss": 0.6732,
|
|
"step": 9440
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3757058731824679e-06,
|
|
"loss": 0.6602,
|
|
"step": 9445
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3751085039753508e-06,
|
|
"loss": 0.6597,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3745109789514233e-06,
|
|
"loss": 0.6906,
|
|
"step": 9455
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3739132983588921e-06,
|
|
"loss": 0.6466,
|
|
"step": 9460
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3733154624460283e-06,
|
|
"loss": 0.6613,
|
|
"step": 9465
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.372717471461168e-06,
|
|
"loss": 0.6821,
|
|
"step": 9470
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 1.3721193256527116e-06,
|
|
"loss": 0.6539,
|
|
"step": 9475
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3715210252691232e-06,
|
|
"loss": 0.6994,
|
|
"step": 9480
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3709225705589318e-06,
|
|
"loss": 0.6556,
|
|
"step": 9485
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3703239617707306e-06,
|
|
"loss": 0.7155,
|
|
"step": 9490
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3697251991531756e-06,
|
|
"loss": 0.6621,
|
|
"step": 9495
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3691262829549883e-06,
|
|
"loss": 0.6771,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3685272134249535e-06,
|
|
"loss": 0.7038,
|
|
"step": 9505
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3679279908119187e-06,
|
|
"loss": 0.6903,
|
|
"step": 9510
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3673286153647967e-06,
|
|
"loss": 0.7236,
|
|
"step": 9515
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3667290873325618e-06,
|
|
"loss": 0.6958,
|
|
"step": 9520
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3661294069642537e-06,
|
|
"loss": 0.653,
|
|
"step": 9525
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3655295745089736e-06,
|
|
"loss": 0.6925,
|
|
"step": 9530
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3649295902158873e-06,
|
|
"loss": 0.6607,
|
|
"step": 9535
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3643294543342228e-06,
|
|
"loss": 0.7378,
|
|
"step": 9540
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.363729167113271e-06,
|
|
"loss": 0.6789,
|
|
"step": 9545
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3631287288023866e-06,
|
|
"loss": 0.6862,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.362528139650986e-06,
|
|
"loss": 0.7204,
|
|
"step": 9555
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.361927399908549e-06,
|
|
"loss": 0.6474,
|
|
"step": 9560
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3613265098246176e-06,
|
|
"loss": 0.6564,
|
|
"step": 9565
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3607254696487962e-06,
|
|
"loss": 0.6762,
|
|
"step": 9570
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.360124279630752e-06,
|
|
"loss": 0.6903,
|
|
"step": 9575
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3595229400202134e-06,
|
|
"loss": 0.6778,
|
|
"step": 9580
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3589214510669722e-06,
|
|
"loss": 0.7117,
|
|
"step": 9585
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3583198130208819e-06,
|
|
"loss": 0.6531,
|
|
"step": 9590
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3577180261318569e-06,
|
|
"loss": 0.7066,
|
|
"step": 9595
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.357116090649875e-06,
|
|
"loss": 0.7132,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"eval_loss": 0.6456849575042725,
|
|
"eval_runtime": 138.6043,
|
|
"eval_samples_per_second": 17.07,
|
|
"eval_steps_per_second": 2.85,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3565140068249746e-06,
|
|
"loss": 0.6633,
|
|
"step": 9605
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3559117749072559e-06,
|
|
"loss": 0.6652,
|
|
"step": 9610
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3553093951468817e-06,
|
|
"loss": 0.6863,
|
|
"step": 9615
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.354706867794074e-06,
|
|
"loss": 0.6849,
|
|
"step": 9620
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3541041930991187e-06,
|
|
"loss": 0.6647,
|
|
"step": 9625
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3535013713123606e-06,
|
|
"loss": 0.6668,
|
|
"step": 9630
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.352898402684207e-06,
|
|
"loss": 0.6574,
|
|
"step": 9635
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3522952874651262e-06,
|
|
"loss": 0.6735,
|
|
"step": 9640
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3516920259056467e-06,
|
|
"loss": 0.688,
|
|
"step": 9645
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.351088618256358e-06,
|
|
"loss": 0.6553,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3504850647679104e-06,
|
|
"loss": 0.6818,
|
|
"step": 9655
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.349881365691015e-06,
|
|
"loss": 0.6596,
|
|
"step": 9660
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.349277521276443e-06,
|
|
"loss": 0.6655,
|
|
"step": 9665
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3486735317750257e-06,
|
|
"loss": 0.6465,
|
|
"step": 9670
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3480693974376561e-06,
|
|
"loss": 0.6647,
|
|
"step": 9675
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3474651185152854e-06,
|
|
"loss": 0.6955,
|
|
"step": 9680
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3468606952589265e-06,
|
|
"loss": 0.6851,
|
|
"step": 9685
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.346256127919651e-06,
|
|
"loss": 0.6865,
|
|
"step": 9690
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.345651416748591e-06,
|
|
"loss": 0.6518,
|
|
"step": 9695
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3450465619969386e-06,
|
|
"loss": 0.67,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.344441563915945e-06,
|
|
"loss": 0.7331,
|
|
"step": 9705
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3438364227569215e-06,
|
|
"loss": 0.7002,
|
|
"step": 9710
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3432311387712377e-06,
|
|
"loss": 0.6777,
|
|
"step": 9715
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.3426257122103237e-06,
|
|
"loss": 0.7075,
|
|
"step": 9720
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3420201433256689e-06,
|
|
"loss": 0.707,
|
|
"step": 9725
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3414144323688204e-06,
|
|
"loss": 0.67,
|
|
"step": 9730
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3408085795913862e-06,
|
|
"loss": 0.6853,
|
|
"step": 9735
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3402025852450317e-06,
|
|
"loss": 0.6859,
|
|
"step": 9740
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3395964495814815e-06,
|
|
"loss": 0.7109,
|
|
"step": 9745
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.33899017285252e-06,
|
|
"loss": 0.7126,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3383837553099882e-06,
|
|
"loss": 0.6782,
|
|
"step": 9755
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3377771972057876e-06,
|
|
"loss": 0.6577,
|
|
"step": 9760
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3371704987918763e-06,
|
|
"loss": 0.6859,
|
|
"step": 9765
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3365636603202724e-06,
|
|
"loss": 0.705,
|
|
"step": 9770
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3359566820430509e-06,
|
|
"loss": 0.6359,
|
|
"step": 9775
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.335349564212345e-06,
|
|
"loss": 0.6926,
|
|
"step": 9780
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3347423070803465e-06,
|
|
"loss": 0.6667,
|
|
"step": 9785
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3341349108993047e-06,
|
|
"loss": 0.7033,
|
|
"step": 9790
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3335273759215269e-06,
|
|
"loss": 0.6923,
|
|
"step": 9795
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3329197023993774e-06,
|
|
"loss": 0.706,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3323118905852789e-06,
|
|
"loss": 0.6059,
|
|
"step": 9805
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.331703940731711e-06,
|
|
"loss": 0.7087,
|
|
"step": 9810
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3310958530912106e-06,
|
|
"loss": 0.7124,
|
|
"step": 9815
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3304876279163725e-06,
|
|
"loss": 0.6984,
|
|
"step": 9820
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3298792654598474e-06,
|
|
"loss": 0.6993,
|
|
"step": 9825
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3292707659743442e-06,
|
|
"loss": 0.7156,
|
|
"step": 9830
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3286621297126285e-06,
|
|
"loss": 0.7062,
|
|
"step": 9835
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3280533569275223e-06,
|
|
"loss": 0.6437,
|
|
"step": 9840
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3274444478719048e-06,
|
|
"loss": 0.6651,
|
|
"step": 9845
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3268354027987108e-06,
|
|
"loss": 0.6647,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3262262219609329e-06,
|
|
"loss": 0.6911,
|
|
"step": 9855
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3256169056116195e-06,
|
|
"loss": 0.6931,
|
|
"step": 9860
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3250074540038751e-06,
|
|
"loss": 0.6979,
|
|
"step": 9865
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.324397867390861e-06,
|
|
"loss": 0.6642,
|
|
"step": 9870
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3237881460257933e-06,
|
|
"loss": 0.6457,
|
|
"step": 9875
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3231782901619458e-06,
|
|
"loss": 0.6684,
|
|
"step": 9880
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3225683000526474e-06,
|
|
"loss": 0.6744,
|
|
"step": 9885
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.321958175951282e-06,
|
|
"loss": 0.6504,
|
|
"step": 9890
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3213479181112903e-06,
|
|
"loss": 0.7047,
|
|
"step": 9895
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.320737526786168e-06,
|
|
"loss": 0.6802,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.320127002229466e-06,
|
|
"loss": 0.6668,
|
|
"step": 9905
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.319516344694792e-06,
|
|
"loss": 0.6759,
|
|
"step": 9910
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3189055544358065e-06,
|
|
"loss": 0.6614,
|
|
"step": 9915
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3182946317062271e-06,
|
|
"loss": 0.6613,
|
|
"step": 9920
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3176835767598259e-06,
|
|
"loss": 0.7015,
|
|
"step": 9925
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3170723898504293e-06,
|
|
"loss": 0.6607,
|
|
"step": 9930
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3164610712319197e-06,
|
|
"loss": 0.7008,
|
|
"step": 9935
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3158496211582328e-06,
|
|
"loss": 0.6727,
|
|
"step": 9940
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3152380398833604e-06,
|
|
"loss": 0.6481,
|
|
"step": 9945
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3146263276613474e-06,
|
|
"loss": 0.6803,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3140144847462943e-06,
|
|
"loss": 0.6999,
|
|
"step": 9955
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.313402511392355e-06,
|
|
"loss": 0.7099,
|
|
"step": 9960
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3127904078537378e-06,
|
|
"loss": 0.6695,
|
|
"step": 9965
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 1.3121781743847054e-06,
|
|
"loss": 0.6898,
|
|
"step": 9970
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.311565811239574e-06,
|
|
"loss": 0.6724,
|
|
"step": 9975
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3109533186727145e-06,
|
|
"loss": 0.6905,
|
|
"step": 9980
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3103406969385502e-06,
|
|
"loss": 0.6442,
|
|
"step": 9985
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3097279462915594e-06,
|
|
"loss": 0.6735,
|
|
"step": 9990
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3091150669862734e-06,
|
|
"loss": 0.6962,
|
|
"step": 9995
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3085020592772767e-06,
|
|
"loss": 0.7271,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3078889234192074e-06,
|
|
"loss": 0.7095,
|
|
"step": 10005
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3072756596667569e-06,
|
|
"loss": 0.6467,
|
|
"step": 10010
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3066622682746693e-06,
|
|
"loss": 0.6728,
|
|
"step": 10015
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3060487494977426e-06,
|
|
"loss": 0.6724,
|
|
"step": 10020
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3054351035908269e-06,
|
|
"loss": 0.6336,
|
|
"step": 10025
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3048213308088253e-06,
|
|
"loss": 0.677,
|
|
"step": 10030
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3042074314066937e-06,
|
|
"loss": 0.7052,
|
|
"step": 10035
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3035934056394404e-06,
|
|
"loss": 0.6877,
|
|
"step": 10040
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3029792537621269e-06,
|
|
"loss": 0.6824,
|
|
"step": 10045
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.302364976029866e-06,
|
|
"loss": 0.6759,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3017505726978239e-06,
|
|
"loss": 0.7549,
|
|
"step": 10055
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3011360440212178e-06,
|
|
"loss": 0.686,
|
|
"step": 10060
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.3005213902553177e-06,
|
|
"loss": 0.6835,
|
|
"step": 10065
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2999066116554457e-06,
|
|
"loss": 0.7057,
|
|
"step": 10070
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2992917084769755e-06,
|
|
"loss": 0.6839,
|
|
"step": 10075
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2986766809753322e-06,
|
|
"loss": 0.676,
|
|
"step": 10080
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.298061529405993e-06,
|
|
"loss": 0.7068,
|
|
"step": 10085
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2974462540244868e-06,
|
|
"loss": 0.7212,
|
|
"step": 10090
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2968308550863932e-06,
|
|
"loss": 0.6873,
|
|
"step": 10095
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2962153328473435e-06,
|
|
"loss": 0.6773,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.295599687563021e-06,
|
|
"loss": 0.6805,
|
|
"step": 10105
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2949839194891587e-06,
|
|
"loss": 0.6523,
|
|
"step": 10110
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2943680288815418e-06,
|
|
"loss": 0.6765,
|
|
"step": 10115
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2937520159960054e-06,
|
|
"loss": 0.6897,
|
|
"step": 10120
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2931358810884362e-06,
|
|
"loss": 0.6637,
|
|
"step": 10125
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2925196244147712e-06,
|
|
"loss": 0.6691,
|
|
"step": 10130
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.291903246230998e-06,
|
|
"loss": 0.6603,
|
|
"step": 10135
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2912867467931551e-06,
|
|
"loss": 0.7122,
|
|
"step": 10140
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2906701263573306e-06,
|
|
"loss": 0.6517,
|
|
"step": 10145
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2900533851796632e-06,
|
|
"loss": 0.7098,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2894365235163425e-06,
|
|
"loss": 0.6925,
|
|
"step": 10155
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2888195416236065e-06,
|
|
"loss": 0.6564,
|
|
"step": 10160
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.288202439757745e-06,
|
|
"loss": 0.6779,
|
|
"step": 10165
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2875852181750964e-06,
|
|
"loss": 0.7095,
|
|
"step": 10170
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2869678771320484e-06,
|
|
"loss": 0.6668,
|
|
"step": 10175
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2863504168850406e-06,
|
|
"loss": 0.6538,
|
|
"step": 10180
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2857328376905593e-06,
|
|
"loss": 0.663,
|
|
"step": 10185
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2851151398051427e-06,
|
|
"loss": 0.648,
|
|
"step": 10190
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2844973234853762e-06,
|
|
"loss": 0.6934,
|
|
"step": 10195
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2838793889878955e-06,
|
|
"loss": 0.6829,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2832613365693857e-06,
|
|
"loss": 0.6484,
|
|
"step": 10205
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2826431664865795e-06,
|
|
"loss": 0.6949,
|
|
"step": 10210
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.2820248789962605e-06,
|
|
"loss": 0.6975,
|
|
"step": 10215
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.281406474355259e-06,
|
|
"loss": 0.6907,
|
|
"step": 10220
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2807879528204554e-06,
|
|
"loss": 0.6599,
|
|
"step": 10225
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.280169314648778e-06,
|
|
"loss": 0.6655,
|
|
"step": 10230
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2795505600972036e-06,
|
|
"loss": 0.6827,
|
|
"step": 10235
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2789316894227582e-06,
|
|
"loss": 0.6679,
|
|
"step": 10240
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2783127028825143e-06,
|
|
"loss": 0.6862,
|
|
"step": 10245
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2776936007335942e-06,
|
|
"loss": 0.6659,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2770743832331667e-06,
|
|
"loss": 0.7268,
|
|
"step": 10255
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2764550506384504e-06,
|
|
"loss": 0.6597,
|
|
"step": 10260
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2758356032067103e-06,
|
|
"loss": 0.6416,
|
|
"step": 10265
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2752160411952592e-06,
|
|
"loss": 0.7008,
|
|
"step": 10270
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2745963648614583e-06,
|
|
"loss": 0.6704,
|
|
"step": 10275
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2739765744627154e-06,
|
|
"loss": 0.6444,
|
|
"step": 10280
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2733566702564855e-06,
|
|
"loss": 0.6594,
|
|
"step": 10285
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2727366525002728e-06,
|
|
"loss": 0.6597,
|
|
"step": 10290
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2721165214516259e-06,
|
|
"loss": 0.6624,
|
|
"step": 10295
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2714962773681427e-06,
|
|
"loss": 0.6726,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.270875920507467e-06,
|
|
"loss": 0.6924,
|
|
"step": 10305
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2702554511272894e-06,
|
|
"loss": 0.631,
|
|
"step": 10310
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2696348694853476e-06,
|
|
"loss": 0.6702,
|
|
"step": 10315
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2690141758394259e-06,
|
|
"loss": 0.6808,
|
|
"step": 10320
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2683933704473552e-06,
|
|
"loss": 0.7162,
|
|
"step": 10325
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2677724535670126e-06,
|
|
"loss": 0.6696,
|
|
"step": 10330
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.267151425456321e-06,
|
|
"loss": 0.6856,
|
|
"step": 10335
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2665302863732508e-06,
|
|
"loss": 0.6957,
|
|
"step": 10340
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2659090365758174e-06,
|
|
"loss": 0.6934,
|
|
"step": 10345
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2652876763220828e-06,
|
|
"loss": 0.6666,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2646662058701547e-06,
|
|
"loss": 0.7049,
|
|
"step": 10355
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2640446254781855e-06,
|
|
"loss": 0.685,
|
|
"step": 10360
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.263422935404376e-06,
|
|
"loss": 0.6696,
|
|
"step": 10365
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2628011359069694e-06,
|
|
"loss": 0.6905,
|
|
"step": 10370
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2621792272442569e-06,
|
|
"loss": 0.7034,
|
|
"step": 10375
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2615572096745732e-06,
|
|
"loss": 0.6746,
|
|
"step": 10380
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2609350834562992e-06,
|
|
"loss": 0.6676,
|
|
"step": 10385
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2603128488478605e-06,
|
|
"loss": 0.6716,
|
|
"step": 10390
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2596905061077283e-06,
|
|
"loss": 0.6554,
|
|
"step": 10395
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2590680554944184e-06,
|
|
"loss": 0.6564,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"eval_loss": 0.6432496309280396,
|
|
"eval_runtime": 144.6412,
|
|
"eval_samples_per_second": 16.358,
|
|
"eval_steps_per_second": 2.731,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2584454972664912e-06,
|
|
"loss": 0.6727,
|
|
"step": 10405
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.257822831682552e-06,
|
|
"loss": 0.6545,
|
|
"step": 10410
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2572000590012505e-06,
|
|
"loss": 0.6794,
|
|
"step": 10415
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2565771794812812e-06,
|
|
"loss": 0.682,
|
|
"step": 10420
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.255954193381383e-06,
|
|
"loss": 0.6566,
|
|
"step": 10425
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2553311009603389e-06,
|
|
"loss": 0.6396,
|
|
"step": 10430
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2547079024769756e-06,
|
|
"loss": 0.6685,
|
|
"step": 10435
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2540845981901648e-06,
|
|
"loss": 0.6409,
|
|
"step": 10440
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2534611883588213e-06,
|
|
"loss": 0.6928,
|
|
"step": 10445
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2528376732419048e-06,
|
|
"loss": 0.7162,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2522140530984173e-06,
|
|
"loss": 0.6575,
|
|
"step": 10455
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 1.2515903281874057e-06,
|
|
"loss": 0.7134,
|
|
"step": 10460
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2509664987679599e-06,
|
|
"loss": 0.6521,
|
|
"step": 10465
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2503425650992124e-06,
|
|
"loss": 0.6655,
|
|
"step": 10470
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2497185274403407e-06,
|
|
"loss": 0.6606,
|
|
"step": 10475
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2490943860505645e-06,
|
|
"loss": 0.6419,
|
|
"step": 10480
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2484701411891465e-06,
|
|
"loss": 0.6822,
|
|
"step": 10485
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2478457931153927e-06,
|
|
"loss": 0.6705,
|
|
"step": 10490
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2472213420886518e-06,
|
|
"loss": 0.638,
|
|
"step": 10495
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2465967883683152e-06,
|
|
"loss": 0.6684,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2459721322138173e-06,
|
|
"loss": 0.668,
|
|
"step": 10505
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.245347373884635e-06,
|
|
"loss": 0.6665,
|
|
"step": 10510
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.244722513640287e-06,
|
|
"loss": 0.6971,
|
|
"step": 10515
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2440975517403352e-06,
|
|
"loss": 0.6807,
|
|
"step": 10520
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.243472488444383e-06,
|
|
"loss": 0.6471,
|
|
"step": 10525
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2428473240120763e-06,
|
|
"loss": 0.658,
|
|
"step": 10530
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2422220587031033e-06,
|
|
"loss": 0.7214,
|
|
"step": 10535
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2415966927771938e-06,
|
|
"loss": 0.6445,
|
|
"step": 10540
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2409712264941189e-06,
|
|
"loss": 0.6899,
|
|
"step": 10545
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.240345660113692e-06,
|
|
"loss": 0.7066,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2397199938957678e-06,
|
|
"loss": 0.6361,
|
|
"step": 10555
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2390942281002435e-06,
|
|
"loss": 0.6871,
|
|
"step": 10560
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.238468362987056e-06,
|
|
"loss": 0.6544,
|
|
"step": 10565
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.237842398816184e-06,
|
|
"loss": 0.6198,
|
|
"step": 10570
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.237216335847648e-06,
|
|
"loss": 0.6907,
|
|
"step": 10575
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.236590174341509e-06,
|
|
"loss": 0.6411,
|
|
"step": 10580
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2359639145578693e-06,
|
|
"loss": 0.6724,
|
|
"step": 10585
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2353375567568715e-06,
|
|
"loss": 0.6606,
|
|
"step": 10590
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.234711101198699e-06,
|
|
"loss": 0.6745,
|
|
"step": 10595
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2340845481435764e-06,
|
|
"loss": 0.6417,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.233457897851768e-06,
|
|
"loss": 0.6824,
|
|
"step": 10605
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2328311505835794e-06,
|
|
"loss": 0.6726,
|
|
"step": 10610
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2322043065993554e-06,
|
|
"loss": 0.692,
|
|
"step": 10615
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2315773661594817e-06,
|
|
"loss": 0.7019,
|
|
"step": 10620
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2309503295243844e-06,
|
|
"loss": 0.6775,
|
|
"step": 10625
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.230323196954528e-06,
|
|
"loss": 0.6774,
|
|
"step": 10630
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.229695968710419e-06,
|
|
"loss": 0.7227,
|
|
"step": 10635
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2290686450526021e-06,
|
|
"loss": 0.6676,
|
|
"step": 10640
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2284412262416621e-06,
|
|
"loss": 0.6781,
|
|
"step": 10645
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2278137125382235e-06,
|
|
"loss": 0.666,
|
|
"step": 10650
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2271861042029496e-06,
|
|
"loss": 0.6926,
|
|
"step": 10655
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2265584014965439e-06,
|
|
"loss": 0.6458,
|
|
"step": 10660
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2259306046797486e-06,
|
|
"loss": 0.6518,
|
|
"step": 10665
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2253027140133444e-06,
|
|
"loss": 0.6911,
|
|
"step": 10670
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2246747297581527e-06,
|
|
"loss": 0.6738,
|
|
"step": 10675
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2240466521750318e-06,
|
|
"loss": 0.6601,
|
|
"step": 10680
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.22341848152488e-06,
|
|
"loss": 0.6561,
|
|
"step": 10685
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.222790218068634e-06,
|
|
"loss": 0.6899,
|
|
"step": 10690
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2221618620672689e-06,
|
|
"loss": 0.6685,
|
|
"step": 10695
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2215334137817985e-06,
|
|
"loss": 0.6926,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.2209048734732742e-06,
|
|
"loss": 0.6241,
|
|
"step": 10705
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2202762414027865e-06,
|
|
"loss": 0.6556,
|
|
"step": 10710
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.219647517831464e-06,
|
|
"loss": 0.6612,
|
|
"step": 10715
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2190187030204727e-06,
|
|
"loss": 0.6931,
|
|
"step": 10720
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2183897972310168e-06,
|
|
"loss": 0.6806,
|
|
"step": 10725
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2177608007243384e-06,
|
|
"loss": 0.7048,
|
|
"step": 10730
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2171317137617172e-06,
|
|
"loss": 0.6722,
|
|
"step": 10735
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2165025366044698e-06,
|
|
"loss": 0.7012,
|
|
"step": 10740
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2158732695139523e-06,
|
|
"loss": 0.6476,
|
|
"step": 10745
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2152439127515557e-06,
|
|
"loss": 0.661,
|
|
"step": 10750
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2146144665787094e-06,
|
|
"loss": 0.6862,
|
|
"step": 10755
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2139849312568803e-06,
|
|
"loss": 0.6949,
|
|
"step": 10760
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2133553070475714e-06,
|
|
"loss": 0.6565,
|
|
"step": 10765
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2127255942123236e-06,
|
|
"loss": 0.6803,
|
|
"step": 10770
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2120957930127139e-06,
|
|
"loss": 0.6909,
|
|
"step": 10775
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.211465903710356e-06,
|
|
"loss": 0.6885,
|
|
"step": 10780
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.210835926566901e-06,
|
|
"loss": 0.6837,
|
|
"step": 10785
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2102058618440354e-06,
|
|
"loss": 0.676,
|
|
"step": 10790
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2095757098034829e-06,
|
|
"loss": 0.6624,
|
|
"step": 10795
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2089454707070033e-06,
|
|
"loss": 0.6218,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.208315144816392e-06,
|
|
"loss": 0.6528,
|
|
"step": 10805
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2076847323934813e-06,
|
|
"loss": 0.6933,
|
|
"step": 10810
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2070542337001385e-06,
|
|
"loss": 0.6646,
|
|
"step": 10815
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.206423648998268e-06,
|
|
"loss": 0.6935,
|
|
"step": 10820
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.205792978549809e-06,
|
|
"loss": 0.6853,
|
|
"step": 10825
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2051622226167362e-06,
|
|
"loss": 0.6648,
|
|
"step": 10830
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2045313814610602e-06,
|
|
"loss": 0.6587,
|
|
"step": 10835
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.203900455344827e-06,
|
|
"loss": 0.6662,
|
|
"step": 10840
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2032694445301182e-06,
|
|
"loss": 0.6504,
|
|
"step": 10845
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2026383492790495e-06,
|
|
"loss": 0.7228,
|
|
"step": 10850
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2020071698537727e-06,
|
|
"loss": 0.6674,
|
|
"step": 10855
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2013759065164748e-06,
|
|
"loss": 0.7009,
|
|
"step": 10860
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.200744559529376e-06,
|
|
"loss": 0.6588,
|
|
"step": 10865
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.2001131291547335e-06,
|
|
"loss": 0.651,
|
|
"step": 10870
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1994816156548368e-06,
|
|
"loss": 0.644,
|
|
"step": 10875
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1988500192920122e-06,
|
|
"loss": 0.6669,
|
|
"step": 10880
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1982183403286186e-06,
|
|
"loss": 0.6718,
|
|
"step": 10885
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1975865790270503e-06,
|
|
"loss": 0.6594,
|
|
"step": 10890
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1969547356497356e-06,
|
|
"loss": 0.6279,
|
|
"step": 10895
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1963228104591362e-06,
|
|
"loss": 0.6866,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1956908037177484e-06,
|
|
"loss": 0.6836,
|
|
"step": 10905
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1950587156881032e-06,
|
|
"loss": 0.6323,
|
|
"step": 10910
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1944265466327634e-06,
|
|
"loss": 0.6714,
|
|
"step": 10915
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.193794296814327e-06,
|
|
"loss": 0.6849,
|
|
"step": 10920
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.193161966495425e-06,
|
|
"loss": 0.6654,
|
|
"step": 10925
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1925295559387221e-06,
|
|
"loss": 0.7011,
|
|
"step": 10930
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.191897065406916e-06,
|
|
"loss": 0.6452,
|
|
"step": 10935
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1912644951627375e-06,
|
|
"loss": 0.6537,
|
|
"step": 10940
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.1906318454689512e-06,
|
|
"loss": 0.6852,
|
|
"step": 10945
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.189999116588354e-06,
|
|
"loss": 0.6316,
|
|
"step": 10950
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 1.189366308783776e-06,
|
|
"loss": 0.6368,
|
|
"step": 10955
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1887334223180804e-06,
|
|
"loss": 0.6621,
|
|
"step": 10960
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1881004574541625e-06,
|
|
"loss": 0.6432,
|
|
"step": 10965
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.18746741445495e-06,
|
|
"loss": 0.6771,
|
|
"step": 10970
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1868342935834041e-06,
|
|
"loss": 0.6967,
|
|
"step": 10975
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1862010951025175e-06,
|
|
"loss": 0.7035,
|
|
"step": 10980
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1855678192753151e-06,
|
|
"loss": 0.6717,
|
|
"step": 10985
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1849344663648543e-06,
|
|
"loss": 0.6407,
|
|
"step": 10990
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1843010366342248e-06,
|
|
"loss": 0.6802,
|
|
"step": 10995
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1836675303465472e-06,
|
|
"loss": 0.6332,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1830339477649749e-06,
|
|
"loss": 0.6221,
|
|
"step": 11005
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1824002891526923e-06,
|
|
"loss": 0.6445,
|
|
"step": 11010
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1817665547729157e-06,
|
|
"loss": 0.6818,
|
|
"step": 11015
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.181132744888893e-06,
|
|
"loss": 0.6772,
|
|
"step": 11020
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1804988597639034e-06,
|
|
"loss": 0.6692,
|
|
"step": 11025
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1798648996612572e-06,
|
|
"loss": 0.6727,
|
|
"step": 11030
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1792308648442958e-06,
|
|
"loss": 0.7129,
|
|
"step": 11035
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1785967555763915e-06,
|
|
"loss": 0.636,
|
|
"step": 11040
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1779625721209484e-06,
|
|
"loss": 0.6769,
|
|
"step": 11045
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1773283147414007e-06,
|
|
"loss": 0.663,
|
|
"step": 11050
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1766939837012128e-06,
|
|
"loss": 0.6988,
|
|
"step": 11055
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1760595792638807e-06,
|
|
"loss": 0.6399,
|
|
"step": 11060
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1754251016929307e-06,
|
|
"loss": 0.6709,
|
|
"step": 11065
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.174790551251919e-06,
|
|
"loss": 0.661,
|
|
"step": 11070
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1741559282044322e-06,
|
|
"loss": 0.6604,
|
|
"step": 11075
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1735212328140876e-06,
|
|
"loss": 0.6601,
|
|
"step": 11080
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1728864653445314e-06,
|
|
"loss": 0.6732,
|
|
"step": 11085
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1722516260594412e-06,
|
|
"loss": 0.6564,
|
|
"step": 11090
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1716167152225237e-06,
|
|
"loss": 0.6819,
|
|
"step": 11095
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.170981733097515e-06,
|
|
"loss": 0.6739,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1703466799481817e-06,
|
|
"loss": 0.6658,
|
|
"step": 11105
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1697115560383185e-06,
|
|
"loss": 0.6999,
|
|
"step": 11110
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1690763616317507e-06,
|
|
"loss": 0.6797,
|
|
"step": 11115
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.168441096992333e-06,
|
|
"loss": 0.6541,
|
|
"step": 11120
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1678057623839484e-06,
|
|
"loss": 0.6586,
|
|
"step": 11125
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1671703580705094e-06,
|
|
"loss": 0.6609,
|
|
"step": 11130
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1665348843159574e-06,
|
|
"loss": 0.6493,
|
|
"step": 11135
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1658993413842624e-06,
|
|
"loss": 0.6771,
|
|
"step": 11140
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1652637295394244e-06,
|
|
"loss": 0.6806,
|
|
"step": 11145
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1646280490454696e-06,
|
|
"loss": 0.6739,
|
|
"step": 11150
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1639923001664555e-06,
|
|
"loss": 0.6482,
|
|
"step": 11155
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1633564831664656e-06,
|
|
"loss": 0.6759,
|
|
"step": 11160
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1627205983096135e-06,
|
|
"loss": 0.6722,
|
|
"step": 11165
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.16208464586004e-06,
|
|
"loss": 0.6531,
|
|
"step": 11170
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1614486260819138e-06,
|
|
"loss": 0.6934,
|
|
"step": 11175
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1608125392394327e-06,
|
|
"loss": 0.6637,
|
|
"step": 11180
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1601763855968212e-06,
|
|
"loss": 0.6879,
|
|
"step": 11185
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1595401654183322e-06,
|
|
"loss": 0.6499,
|
|
"step": 11190
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1589038789682456e-06,
|
|
"loss": 0.7014,
|
|
"step": 11195
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.1582675265108699e-06,
|
|
"loss": 0.6136,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"eval_loss": 0.6401504874229431,
|
|
"eval_runtime": 144.3337,
|
|
"eval_samples_per_second": 16.393,
|
|
"eval_steps_per_second": 2.737,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.15763110831054e-06,
|
|
"loss": 0.6646,
|
|
"step": 11205
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1569946246316182e-06,
|
|
"loss": 0.7085,
|
|
"step": 11210
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.156358075738495e-06,
|
|
"loss": 0.6621,
|
|
"step": 11215
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1557214618955868e-06,
|
|
"loss": 0.6703,
|
|
"step": 11220
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1550847833673374e-06,
|
|
"loss": 0.7204,
|
|
"step": 11225
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.154448040418218e-06,
|
|
"loss": 0.6923,
|
|
"step": 11230
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1538112333127253e-06,
|
|
"loss": 0.6608,
|
|
"step": 11235
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1531743623153842e-06,
|
|
"loss": 0.6824,
|
|
"step": 11240
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1525374276907449e-06,
|
|
"loss": 0.7322,
|
|
"step": 11245
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1519004297033847e-06,
|
|
"loss": 0.6432,
|
|
"step": 11250
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1512633686179071e-06,
|
|
"loss": 0.6795,
|
|
"step": 11255
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1506262446989417e-06,
|
|
"loss": 0.7229,
|
|
"step": 11260
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.149989058211144e-06,
|
|
"loss": 0.6954,
|
|
"step": 11265
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.149351809419196e-06,
|
|
"loss": 0.6879,
|
|
"step": 11270
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.148714498587805e-06,
|
|
"loss": 0.6642,
|
|
"step": 11275
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1480771259817048e-06,
|
|
"loss": 0.7015,
|
|
"step": 11280
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.147439691865654e-06,
|
|
"loss": 0.6467,
|
|
"step": 11285
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1468021965044377e-06,
|
|
"loss": 0.7045,
|
|
"step": 11290
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1461646401628654e-06,
|
|
"loss": 0.6635,
|
|
"step": 11295
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1455270231057728e-06,
|
|
"loss": 0.6943,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.14488934559802e-06,
|
|
"loss": 0.6626,
|
|
"step": 11305
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1442516079044932e-06,
|
|
"loss": 0.6716,
|
|
"step": 11310
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1436138102901031e-06,
|
|
"loss": 0.6748,
|
|
"step": 11315
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.142975953019785e-06,
|
|
"loss": 0.7028,
|
|
"step": 11320
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1423380363584999e-06,
|
|
"loss": 0.6044,
|
|
"step": 11325
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1417000605712316e-06,
|
|
"loss": 0.6831,
|
|
"step": 11330
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1410620259229908e-06,
|
|
"loss": 0.6632,
|
|
"step": 11335
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1404239326788115e-06,
|
|
"loss": 0.6393,
|
|
"step": 11340
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1397857811037512e-06,
|
|
"loss": 0.6501,
|
|
"step": 11345
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1391475714628932e-06,
|
|
"loss": 0.6398,
|
|
"step": 11350
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.138509304021344e-06,
|
|
"loss": 0.6784,
|
|
"step": 11355
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1378709790442346e-06,
|
|
"loss": 0.7065,
|
|
"step": 11360
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1372325967967196e-06,
|
|
"loss": 0.6189,
|
|
"step": 11365
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1365941575439772e-06,
|
|
"loss": 0.6652,
|
|
"step": 11370
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1359556615512099e-06,
|
|
"loss": 0.6752,
|
|
"step": 11375
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1353171090836427e-06,
|
|
"loss": 0.6668,
|
|
"step": 11380
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.134678500406525e-06,
|
|
"loss": 0.6587,
|
|
"step": 11385
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.13403983578513e-06,
|
|
"loss": 0.6873,
|
|
"step": 11390
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1334011154847527e-06,
|
|
"loss": 0.6975,
|
|
"step": 11395
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1327623397707122e-06,
|
|
"loss": 0.6784,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1321235089083502e-06,
|
|
"loss": 0.6643,
|
|
"step": 11405
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1314846231630315e-06,
|
|
"loss": 0.6754,
|
|
"step": 11410
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1308456828001441e-06,
|
|
"loss": 0.6689,
|
|
"step": 11415
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1302066880850975e-06,
|
|
"loss": 0.6594,
|
|
"step": 11420
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1295676392833253e-06,
|
|
"loss": 0.6416,
|
|
"step": 11425
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1289285366602826e-06,
|
|
"loss": 0.7223,
|
|
"step": 11430
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1282893804814468e-06,
|
|
"loss": 0.6944,
|
|
"step": 11435
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.127650171012318e-06,
|
|
"loss": 0.6598,
|
|
"step": 11440
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 1.1270109085184182e-06,
|
|
"loss": 0.7102,
|
|
"step": 11445
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1263715932652919e-06,
|
|
"loss": 0.6803,
|
|
"step": 11450
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1257322255185044e-06,
|
|
"loss": 0.65,
|
|
"step": 11455
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1250928055436443e-06,
|
|
"loss": 0.7018,
|
|
"step": 11460
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.12445333360632e-06,
|
|
"loss": 0.6409,
|
|
"step": 11465
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1238138099721634e-06,
|
|
"loss": 0.6724,
|
|
"step": 11470
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1231742349068271e-06,
|
|
"loss": 0.6854,
|
|
"step": 11475
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1225346086759846e-06,
|
|
"loss": 0.7102,
|
|
"step": 11480
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1218949315453314e-06,
|
|
"loss": 0.6584,
|
|
"step": 11485
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1212552037805836e-06,
|
|
"loss": 0.6584,
|
|
"step": 11490
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1206154256474786e-06,
|
|
"loss": 0.6846,
|
|
"step": 11495
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.119975597411775e-06,
|
|
"loss": 0.7037,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1193357193392512e-06,
|
|
"loss": 0.6689,
|
|
"step": 11505
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1186957916957078e-06,
|
|
"loss": 0.6556,
|
|
"step": 11510
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1180558147469642e-06,
|
|
"loss": 0.6557,
|
|
"step": 11515
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1174157887588623e-06,
|
|
"loss": 0.6662,
|
|
"step": 11520
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1167757139972626e-06,
|
|
"loss": 0.6702,
|
|
"step": 11525
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.116135590728047e-06,
|
|
"loss": 0.6682,
|
|
"step": 11530
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.115495419217117e-06,
|
|
"loss": 0.6855,
|
|
"step": 11535
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.114855199730394e-06,
|
|
"loss": 0.6796,
|
|
"step": 11540
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1142149325338199e-06,
|
|
"loss": 0.6481,
|
|
"step": 11545
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1135746178933563e-06,
|
|
"loss": 0.7167,
|
|
"step": 11550
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.112934256074984e-06,
|
|
"loss": 0.6602,
|
|
"step": 11555
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1122938473447038e-06,
|
|
"loss": 0.6848,
|
|
"step": 11560
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1116533919685361e-06,
|
|
"loss": 0.6628,
|
|
"step": 11565
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1110128902125201e-06,
|
|
"loss": 0.6826,
|
|
"step": 11570
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1103723423427153e-06,
|
|
"loss": 0.6527,
|
|
"step": 11575
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1097317486251992e-06,
|
|
"loss": 0.6737,
|
|
"step": 11580
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.109091109326069e-06,
|
|
"loss": 0.6854,
|
|
"step": 11585
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1084504247114406e-06,
|
|
"loss": 0.7145,
|
|
"step": 11590
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.107809695047449e-06,
|
|
"loss": 0.6756,
|
|
"step": 11595
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1071689206002474e-06,
|
|
"loss": 0.6725,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1065281016360083e-06,
|
|
"loss": 0.7145,
|
|
"step": 11605
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1058872384209224e-06,
|
|
"loss": 0.6899,
|
|
"step": 11610
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1052463312211983e-06,
|
|
"loss": 0.6344,
|
|
"step": 11615
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1046053803030637e-06,
|
|
"loss": 0.654,
|
|
"step": 11620
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1039643859327635e-06,
|
|
"loss": 0.6741,
|
|
"step": 11625
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1033233483765615e-06,
|
|
"loss": 0.6814,
|
|
"step": 11630
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1026822679007395e-06,
|
|
"loss": 0.6565,
|
|
"step": 11635
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1020411447715961e-06,
|
|
"loss": 0.6916,
|
|
"step": 11640
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1013999792554486e-06,
|
|
"loss": 0.6894,
|
|
"step": 11645
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1007587716186317e-06,
|
|
"loss": 0.6698,
|
|
"step": 11650
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.1001175221274968e-06,
|
|
"loss": 0.7096,
|
|
"step": 11655
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.0994762310484142e-06,
|
|
"loss": 0.6887,
|
|
"step": 11660
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.0988348986477705e-06,
|
|
"loss": 0.671,
|
|
"step": 11665
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.0981935251919693e-06,
|
|
"loss": 0.6727,
|
|
"step": 11670
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.0975521109474318e-06,
|
|
"loss": 0.6777,
|
|
"step": 11675
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.0969106561805952e-06,
|
|
"loss": 0.6661,
|
|
"step": 11680
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.0962691611579154e-06,
|
|
"loss": 0.6576,
|
|
"step": 11685
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.0956276261458629e-06,
|
|
"loss": 0.6415,
|
|
"step": 11690
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0949860514109264e-06,
|
|
"loss": 0.6485,
|
|
"step": 11695
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.09434443721961e-06,
|
|
"loss": 0.6966,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0937027838384345e-06,
|
|
"loss": 0.6746,
|
|
"step": 11705
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.093061091533938e-06,
|
|
"loss": 0.6565,
|
|
"step": 11710
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0924193605726733e-06,
|
|
"loss": 0.7127,
|
|
"step": 11715
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0917775912212099e-06,
|
|
"loss": 0.6738,
|
|
"step": 11720
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0911357837461332e-06,
|
|
"loss": 0.6712,
|
|
"step": 11725
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0904939384140445e-06,
|
|
"loss": 0.6617,
|
|
"step": 11730
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0898520554915607e-06,
|
|
"loss": 0.6919,
|
|
"step": 11735
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0892101352453142e-06,
|
|
"loss": 0.6731,
|
|
"step": 11740
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0885681779419537e-06,
|
|
"loss": 0.6931,
|
|
"step": 11745
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0879261838481426e-06,
|
|
"loss": 0.631,
|
|
"step": 11750
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0872841532305587e-06,
|
|
"loss": 0.6515,
|
|
"step": 11755
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0866420863558969e-06,
|
|
"loss": 0.6674,
|
|
"step": 11760
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0859999834908657e-06,
|
|
"loss": 0.6514,
|
|
"step": 11765
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0853578449021896e-06,
|
|
"loss": 0.6393,
|
|
"step": 11770
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.084715670856607e-06,
|
|
"loss": 0.6941,
|
|
"step": 11775
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0840734616208712e-06,
|
|
"loss": 0.664,
|
|
"step": 11780
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0834312174617508e-06,
|
|
"loss": 0.6636,
|
|
"step": 11785
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0827889386460281e-06,
|
|
"loss": 0.6756,
|
|
"step": 11790
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0821466254405004e-06,
|
|
"loss": 0.7116,
|
|
"step": 11795
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0815042781119788e-06,
|
|
"loss": 0.6647,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0808618969272888e-06,
|
|
"loss": 0.6404,
|
|
"step": 11805
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0802194821532702e-06,
|
|
"loss": 0.6711,
|
|
"step": 11810
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.079577034056776e-06,
|
|
"loss": 0.6717,
|
|
"step": 11815
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.078934552904674e-06,
|
|
"loss": 0.6705,
|
|
"step": 11820
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0782920389638452e-06,
|
|
"loss": 0.6713,
|
|
"step": 11825
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0776494925011846e-06,
|
|
"loss": 0.6474,
|
|
"step": 11830
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0770069137836e-06,
|
|
"loss": 0.6745,
|
|
"step": 11835
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0763643030780126e-06,
|
|
"loss": 0.6773,
|
|
"step": 11840
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.075721660651358e-06,
|
|
"loss": 0.6767,
|
|
"step": 11845
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0750789867705843e-06,
|
|
"loss": 0.6758,
|
|
"step": 11850
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0744362817026524e-06,
|
|
"loss": 0.686,
|
|
"step": 11855
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0737935457145364e-06,
|
|
"loss": 0.6736,
|
|
"step": 11860
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.073150779073223e-06,
|
|
"loss": 0.668,
|
|
"step": 11865
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0725079820457123e-06,
|
|
"loss": 0.7003,
|
|
"step": 11870
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0718651548990163e-06,
|
|
"loss": 0.682,
|
|
"step": 11875
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0712222979001602e-06,
|
|
"loss": 0.6445,
|
|
"step": 11880
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0705794113161808e-06,
|
|
"loss": 0.6872,
|
|
"step": 11885
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0699364954141276e-06,
|
|
"loss": 0.6936,
|
|
"step": 11890
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0692935504610625e-06,
|
|
"loss": 0.6195,
|
|
"step": 11895
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.068650576724059e-06,
|
|
"loss": 0.6658,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0680075744702034e-06,
|
|
"loss": 0.6799,
|
|
"step": 11905
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0673645439665925e-06,
|
|
"loss": 0.6823,
|
|
"step": 11910
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0667214854803357e-06,
|
|
"loss": 0.6677,
|
|
"step": 11915
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0660783992785541e-06,
|
|
"loss": 0.6648,
|
|
"step": 11920
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.06543528562838e-06,
|
|
"loss": 0.6313,
|
|
"step": 11925
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0647921447969577e-06,
|
|
"loss": 0.667,
|
|
"step": 11930
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0641489770514418e-06,
|
|
"loss": 0.6567,
|
|
"step": 11935
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.0635057826589987e-06,
|
|
"loss": 0.6727,
|
|
"step": 11940
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0628625618868056e-06,
|
|
"loss": 0.6835,
|
|
"step": 11945
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.062219315002051e-06,
|
|
"loss": 0.6329,
|
|
"step": 11950
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.061576042271934e-06,
|
|
"loss": 0.6823,
|
|
"step": 11955
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0609327439636647e-06,
|
|
"loss": 0.6514,
|
|
"step": 11960
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0602894203444633e-06,
|
|
"loss": 0.716,
|
|
"step": 11965
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0596460716815612e-06,
|
|
"loss": 0.6778,
|
|
"step": 11970
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.059002698242199e-06,
|
|
"loss": 0.6671,
|
|
"step": 11975
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0583593002936298e-06,
|
|
"loss": 0.6936,
|
|
"step": 11980
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0577158781031147e-06,
|
|
"loss": 0.7211,
|
|
"step": 11985
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0570724319379254e-06,
|
|
"loss": 0.6667,
|
|
"step": 11990
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0564289620653446e-06,
|
|
"loss": 0.7181,
|
|
"step": 11995
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0557854687526632e-06,
|
|
"loss": 0.6431,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"eval_loss": 0.6379530429840088,
|
|
"eval_runtime": 144.8913,
|
|
"eval_samples_per_second": 16.329,
|
|
"eval_steps_per_second": 2.726,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0551419522671834e-06,
|
|
"loss": 0.6469,
|
|
"step": 12005
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0544984128762164e-06,
|
|
"loss": 0.6672,
|
|
"step": 12010
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0538548508470824e-06,
|
|
"loss": 0.6599,
|
|
"step": 12015
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.053211266447112e-06,
|
|
"loss": 0.6737,
|
|
"step": 12020
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.052567659943644e-06,
|
|
"loss": 0.6275,
|
|
"step": 12025
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0519240316040269e-06,
|
|
"loss": 0.6603,
|
|
"step": 12030
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0512803816956191e-06,
|
|
"loss": 0.6718,
|
|
"step": 12035
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0506367104857864e-06,
|
|
"loss": 0.7219,
|
|
"step": 12040
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.049993018241905e-06,
|
|
"loss": 0.6921,
|
|
"step": 12045
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0493493052313582e-06,
|
|
"loss": 0.6371,
|
|
"step": 12050
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.048705571721539e-06,
|
|
"loss": 0.6545,
|
|
"step": 12055
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0480618179798493e-06,
|
|
"loss": 0.6818,
|
|
"step": 12060
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.047418044273698e-06,
|
|
"loss": 0.6516,
|
|
"step": 12065
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0467742508705039e-06,
|
|
"loss": 0.649,
|
|
"step": 12070
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0461304380376924e-06,
|
|
"loss": 0.6821,
|
|
"step": 12075
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0454866060426986e-06,
|
|
"loss": 0.6675,
|
|
"step": 12080
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0448427551529635e-06,
|
|
"loss": 0.6436,
|
|
"step": 12085
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0441988856359385e-06,
|
|
"loss": 0.695,
|
|
"step": 12090
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0435549977590806e-06,
|
|
"loss": 0.662,
|
|
"step": 12095
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0429110917898552e-06,
|
|
"loss": 0.6499,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0422671679957357e-06,
|
|
"loss": 0.6987,
|
|
"step": 12105
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0416232266442017e-06,
|
|
"loss": 0.7055,
|
|
"step": 12110
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0409792680027419e-06,
|
|
"loss": 0.6261,
|
|
"step": 12115
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0403352923388504e-06,
|
|
"loss": 0.6435,
|
|
"step": 12120
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.039691299920029e-06,
|
|
"loss": 0.6984,
|
|
"step": 12125
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.039047291013787e-06,
|
|
"loss": 0.6501,
|
|
"step": 12130
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0384032658876397e-06,
|
|
"loss": 0.6991,
|
|
"step": 12135
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.03775922480911e-06,
|
|
"loss": 0.6751,
|
|
"step": 12140
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0371151680457266e-06,
|
|
"loss": 0.649,
|
|
"step": 12145
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0364710958650252e-06,
|
|
"loss": 0.6668,
|
|
"step": 12150
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0358270085345475e-06,
|
|
"loss": 0.6789,
|
|
"step": 12155
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0351829063218423e-06,
|
|
"loss": 0.6708,
|
|
"step": 12160
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0345387894944635e-06,
|
|
"loss": 0.6833,
|
|
"step": 12165
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.033894658319972e-06,
|
|
"loss": 0.6652,
|
|
"step": 12170
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0332505130659344e-06,
|
|
"loss": 0.7033,
|
|
"step": 12175
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.0326063539999228e-06,
|
|
"loss": 0.6458,
|
|
"step": 12180
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 1.031962181389515e-06,
|
|
"loss": 0.702,
|
|
"step": 12185
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0313179955022951e-06,
|
|
"loss": 0.6382,
|
|
"step": 12190
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0306737966058526e-06,
|
|
"loss": 0.6341,
|
|
"step": 12195
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0300295849677811e-06,
|
|
"loss": 0.6793,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0293853608556817e-06,
|
|
"loss": 0.6535,
|
|
"step": 12205
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.028741124537159e-06,
|
|
"loss": 0.661,
|
|
"step": 12210
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0280968762798227e-06,
|
|
"loss": 0.642,
|
|
"step": 12215
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0274526163512885e-06,
|
|
"loss": 0.6661,
|
|
"step": 12220
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0268083450191761e-06,
|
|
"loss": 0.6825,
|
|
"step": 12225
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0261640625511106e-06,
|
|
"loss": 0.6685,
|
|
"step": 12230
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0255197692147207e-06,
|
|
"loss": 0.6488,
|
|
"step": 12235
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.024875465277641e-06,
|
|
"loss": 0.646,
|
|
"step": 12240
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.024231151007509e-06,
|
|
"loss": 0.6769,
|
|
"step": 12245
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0235868266719679e-06,
|
|
"loss": 0.6548,
|
|
"step": 12250
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0229424925386638e-06,
|
|
"loss": 0.6572,
|
|
"step": 12255
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.022298148875248e-06,
|
|
"loss": 0.6832,
|
|
"step": 12260
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0216537959493752e-06,
|
|
"loss": 0.7031,
|
|
"step": 12265
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0210094340287036e-06,
|
|
"loss": 0.6427,
|
|
"step": 12270
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0203650633808957e-06,
|
|
"loss": 0.6727,
|
|
"step": 12275
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0197206842736181e-06,
|
|
"loss": 0.6859,
|
|
"step": 12280
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0190762969745395e-06,
|
|
"loss": 0.6617,
|
|
"step": 12285
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.018431901751333e-06,
|
|
"loss": 0.6733,
|
|
"step": 12290
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0177874988716746e-06,
|
|
"loss": 0.6554,
|
|
"step": 12295
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.017143088603244e-06,
|
|
"loss": 0.6412,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0164986712137239e-06,
|
|
"loss": 0.6979,
|
|
"step": 12305
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0158542469707984e-06,
|
|
"loss": 0.6387,
|
|
"step": 12310
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0152098161421574e-06,
|
|
"loss": 0.6824,
|
|
"step": 12315
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0145653789954907e-06,
|
|
"loss": 0.6187,
|
|
"step": 12320
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0139209357984922e-06,
|
|
"loss": 0.6574,
|
|
"step": 12325
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0132764868188582e-06,
|
|
"loss": 0.6943,
|
|
"step": 12330
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0126320323242868e-06,
|
|
"loss": 0.688,
|
|
"step": 12335
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0119875725824792e-06,
|
|
"loss": 0.7113,
|
|
"step": 12340
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0113431078611381e-06,
|
|
"loss": 0.6502,
|
|
"step": 12345
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0106986384279685e-06,
|
|
"loss": 0.6479,
|
|
"step": 12350
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.010054164550678e-06,
|
|
"loss": 0.6594,
|
|
"step": 12355
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0094096864969744e-06,
|
|
"loss": 0.6451,
|
|
"step": 12360
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0087652045345694e-06,
|
|
"loss": 0.6453,
|
|
"step": 12365
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0081207189311741e-06,
|
|
"loss": 0.6711,
|
|
"step": 12370
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0074762299545034e-06,
|
|
"loss": 0.6659,
|
|
"step": 12375
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0068317378722712e-06,
|
|
"loss": 0.6456,
|
|
"step": 12380
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.006187242952195e-06,
|
|
"loss": 0.6592,
|
|
"step": 12385
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0055427454619916e-06,
|
|
"loss": 0.6596,
|
|
"step": 12390
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.00489824566938e-06,
|
|
"loss": 0.6679,
|
|
"step": 12395
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.00425374384208e-06,
|
|
"loss": 0.6507,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0036092402478114e-06,
|
|
"loss": 0.6902,
|
|
"step": 12405
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0029647351542958e-06,
|
|
"loss": 0.6355,
|
|
"step": 12410
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0023202288292552e-06,
|
|
"loss": 0.6847,
|
|
"step": 12415
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.0016757215404117e-06,
|
|
"loss": 0.6544,
|
|
"step": 12420
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.001031213555488e-06,
|
|
"loss": 0.6403,
|
|
"step": 12425
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.000386705142207e-06,
|
|
"loss": 0.6718,
|
|
"step": 12430
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.997421965682923e-07,
|
|
"loss": 0.6819,
|
|
"step": 12435
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.99097688101467e-07,
|
|
"loss": 0.6521,
|
|
"step": 12440
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.984531800094538e-07,
|
|
"loss": 0.6456,
|
|
"step": 12445
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.978086725599764e-07,
|
|
"loss": 0.6862,
|
|
"step": 12450
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.971641660207574e-07,
|
|
"loss": 0.675,
|
|
"step": 12455
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.965196606595192e-07,
|
|
"loss": 0.6853,
|
|
"step": 12460
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.958751567439835e-07,
|
|
"loss": 0.7034,
|
|
"step": 12465
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.95230654541872e-07,
|
|
"loss": 0.6665,
|
|
"step": 12470
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.94586154320905e-07,
|
|
"loss": 0.7126,
|
|
"step": 12475
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.939416563488025e-07,
|
|
"loss": 0.6668,
|
|
"step": 12480
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.932971608932832e-07,
|
|
"loss": 0.7099,
|
|
"step": 12485
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.926526682220652e-07,
|
|
"loss": 0.6525,
|
|
"step": 12490
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.920081786028647e-07,
|
|
"loss": 0.6092,
|
|
"step": 12495
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.913636923033974e-07,
|
|
"loss": 0.6663,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.907192095913772e-07,
|
|
"loss": 0.6514,
|
|
"step": 12505
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.900747307345166e-07,
|
|
"loss": 0.676,
|
|
"step": 12510
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.894302560005265e-07,
|
|
"loss": 0.6726,
|
|
"step": 12515
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.88785785657116e-07,
|
|
"loss": 0.6882,
|
|
"step": 12520
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.88141319971993e-07,
|
|
"loss": 0.6961,
|
|
"step": 12525
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.874968592128624e-07,
|
|
"loss": 0.6772,
|
|
"step": 12530
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.868524036474276e-07,
|
|
"loss": 0.6945,
|
|
"step": 12535
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.862079535433902e-07,
|
|
"loss": 0.6479,
|
|
"step": 12540
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.855635091684488e-07,
|
|
"loss": 0.6826,
|
|
"step": 12545
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.849190707903004e-07,
|
|
"loss": 0.6593,
|
|
"step": 12550
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.842746386766385e-07,
|
|
"loss": 0.6512,
|
|
"step": 12555
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.836302130951548e-07,
|
|
"loss": 0.6938,
|
|
"step": 12560
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.829857943135386e-07,
|
|
"loss": 0.6384,
|
|
"step": 12565
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.823413825994754e-07,
|
|
"loss": 0.7146,
|
|
"step": 12570
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.816969782206486e-07,
|
|
"loss": 0.6924,
|
|
"step": 12575
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.810525814447372e-07,
|
|
"loss": 0.6643,
|
|
"step": 12580
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.80408192539419e-07,
|
|
"loss": 0.6833,
|
|
"step": 12585
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.797638117723675e-07,
|
|
"loss": 0.7397,
|
|
"step": 12590
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.791194394112523e-07,
|
|
"loss": 0.6674,
|
|
"step": 12595
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.784750757237405e-07,
|
|
"loss": 0.6753,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.77830720977495e-07,
|
|
"loss": 0.6833,
|
|
"step": 12605
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.77186375440175e-07,
|
|
"loss": 0.6847,
|
|
"step": 12610
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.76542039379437e-07,
|
|
"loss": 0.6786,
|
|
"step": 12615
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.75897713062931e-07,
|
|
"loss": 0.6696,
|
|
"step": 12620
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.75253396758306e-07,
|
|
"loss": 0.6623,
|
|
"step": 12625
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.746090907332043e-07,
|
|
"loss": 0.6847,
|
|
"step": 12630
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.739647952552654e-07,
|
|
"loss": 0.6885,
|
|
"step": 12635
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.733205105921247e-07,
|
|
"loss": 0.6954,
|
|
"step": 12640
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.726762370114116e-07,
|
|
"loss": 0.6695,
|
|
"step": 12645
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.720319747807521e-07,
|
|
"loss": 0.6837,
|
|
"step": 12650
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.71387724167767e-07,
|
|
"loss": 0.6701,
|
|
"step": 12655
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.707434854400723e-07,
|
|
"loss": 0.6297,
|
|
"step": 12660
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.700992588652796e-07,
|
|
"loss": 0.6432,
|
|
"step": 12665
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.694550447109946e-07,
|
|
"loss": 0.7152,
|
|
"step": 12670
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 9.688108432448186e-07,
|
|
"loss": 0.6285,
|
|
"step": 12675
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.681666547343467e-07,
|
|
"loss": 0.6382,
|
|
"step": 12680
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.6752247944717e-07,
|
|
"loss": 0.6702,
|
|
"step": 12685
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.668783176508724e-07,
|
|
"loss": 0.6962,
|
|
"step": 12690
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.662341696130339e-07,
|
|
"loss": 0.7102,
|
|
"step": 12695
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.655900356012279e-07,
|
|
"loss": 0.6731,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.649459158830216e-07,
|
|
"loss": 0.6251,
|
|
"step": 12705
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.643018107259774e-07,
|
|
"loss": 0.6723,
|
|
"step": 12710
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.636577203976497e-07,
|
|
"loss": 0.6475,
|
|
"step": 12715
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.630136451655894e-07,
|
|
"loss": 0.6977,
|
|
"step": 12720
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.623695852973395e-07,
|
|
"loss": 0.6807,
|
|
"step": 12725
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.617255410604363e-07,
|
|
"loss": 0.6594,
|
|
"step": 12730
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.61081512722411e-07,
|
|
"loss": 0.6928,
|
|
"step": 12735
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.604375005507862e-07,
|
|
"loss": 0.6592,
|
|
"step": 12740
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.597935048130797e-07,
|
|
"loss": 0.6674,
|
|
"step": 12745
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.591495257768019e-07,
|
|
"loss": 0.6501,
|
|
"step": 12750
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.585055637094557e-07,
|
|
"loss": 0.6283,
|
|
"step": 12755
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.578616188785378e-07,
|
|
"loss": 0.6404,
|
|
"step": 12760
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.572176915515364e-07,
|
|
"loss": 0.6723,
|
|
"step": 12765
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.56573781995934e-07,
|
|
"loss": 0.6595,
|
|
"step": 12770
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.559298904792053e-07,
|
|
"loss": 0.6458,
|
|
"step": 12775
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.552860172688165e-07,
|
|
"loss": 0.6258,
|
|
"step": 12780
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.54642162632228e-07,
|
|
"loss": 0.6791,
|
|
"step": 12785
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.539983268368897e-07,
|
|
"loss": 0.669,
|
|
"step": 12790
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.533545101502466e-07,
|
|
"loss": 0.6532,
|
|
"step": 12795
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.527107128397347e-07,
|
|
"loss": 0.6997,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"eval_loss": 0.6355204582214355,
|
|
"eval_runtime": 144.7041,
|
|
"eval_samples_per_second": 16.351,
|
|
"eval_steps_per_second": 2.73,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.520669351727811e-07,
|
|
"loss": 0.6886,
|
|
"step": 12805
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.514231774168063e-07,
|
|
"loss": 0.6541,
|
|
"step": 12810
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.507794398392205e-07,
|
|
"loss": 0.6731,
|
|
"step": 12815
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.501357227074279e-07,
|
|
"loss": 0.6141,
|
|
"step": 12820
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.49492026288822e-07,
|
|
"loss": 0.6469,
|
|
"step": 12825
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.488483508507892e-07,
|
|
"loss": 0.6434,
|
|
"step": 12830
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.482046966607071e-07,
|
|
"loss": 0.6507,
|
|
"step": 12835
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.475610639859428e-07,
|
|
"loss": 0.6604,
|
|
"step": 12840
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.469174530938573e-07,
|
|
"loss": 0.6497,
|
|
"step": 12845
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.462738642517995e-07,
|
|
"loss": 0.6627,
|
|
"step": 12850
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.456302977271114e-07,
|
|
"loss": 0.6689,
|
|
"step": 12855
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.449867537871251e-07,
|
|
"loss": 0.7033,
|
|
"step": 12860
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.443432326991626e-07,
|
|
"loss": 0.7198,
|
|
"step": 12865
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.436997347305377e-07,
|
|
"loss": 0.6576,
|
|
"step": 12870
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.430562601485527e-07,
|
|
"loss": 0.669,
|
|
"step": 12875
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.424128092205021e-07,
|
|
"loss": 0.6525,
|
|
"step": 12880
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.417693822136701e-07,
|
|
"loss": 0.6669,
|
|
"step": 12885
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.411259793953302e-07,
|
|
"loss": 0.6975,
|
|
"step": 12890
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.404826010327467e-07,
|
|
"loss": 0.6476,
|
|
"step": 12895
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.398392473931726e-07,
|
|
"loss": 0.6884,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.391959187438522e-07,
|
|
"loss": 0.6902,
|
|
"step": 12905
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.385526153520186e-07,
|
|
"loss": 0.6382,
|
|
"step": 12910
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.379093374848939e-07,
|
|
"loss": 0.6584,
|
|
"step": 12915
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.37266085409691e-07,
|
|
"loss": 0.6544,
|
|
"step": 12920
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 9.366228593936098e-07,
|
|
"loss": 0.6808,
|
|
"step": 12925
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.359796597038421e-07,
|
|
"loss": 0.7172,
|
|
"step": 12930
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.353364866075672e-07,
|
|
"loss": 0.6768,
|
|
"step": 12935
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.34693340371953e-07,
|
|
"loss": 0.6933,
|
|
"step": 12940
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.340502212641578e-07,
|
|
"loss": 0.6833,
|
|
"step": 12945
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.334071295513267e-07,
|
|
"loss": 0.6569,
|
|
"step": 12950
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.32764065500595e-07,
|
|
"loss": 0.645,
|
|
"step": 12955
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.321210293790859e-07,
|
|
"loss": 0.6581,
|
|
"step": 12960
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.314780214539107e-07,
|
|
"loss": 0.7163,
|
|
"step": 12965
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.308350419921699e-07,
|
|
"loss": 0.6527,
|
|
"step": 12970
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.301920912609505e-07,
|
|
"loss": 0.6956,
|
|
"step": 12975
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.295491695273301e-07,
|
|
"loss": 0.6763,
|
|
"step": 12980
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.289062770583712e-07,
|
|
"loss": 0.6666,
|
|
"step": 12985
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.282634141211269e-07,
|
|
"loss": 0.687,
|
|
"step": 12990
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.276205809826368e-07,
|
|
"loss": 0.6675,
|
|
"step": 12995
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.269777779099275e-07,
|
|
"loss": 0.6656,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.263350051700147e-07,
|
|
"loss": 0.6616,
|
|
"step": 13005
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.256922630298994e-07,
|
|
"loss": 0.6458,
|
|
"step": 13010
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.250495517565722e-07,
|
|
"loss": 0.6929,
|
|
"step": 13015
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.244068716170099e-07,
|
|
"loss": 0.6717,
|
|
"step": 13020
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.237642228781749e-07,
|
|
"loss": 0.6591,
|
|
"step": 13025
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.231216058070195e-07,
|
|
"loss": 0.6807,
|
|
"step": 13030
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.224790206704798e-07,
|
|
"loss": 0.6745,
|
|
"step": 13035
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.21836467735481e-07,
|
|
"loss": 0.6207,
|
|
"step": 13040
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.21193947268934e-07,
|
|
"loss": 0.6627,
|
|
"step": 13045
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.205514595377356e-07,
|
|
"loss": 0.6786,
|
|
"step": 13050
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.199090048087706e-07,
|
|
"loss": 0.6949,
|
|
"step": 13055
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.192665833489077e-07,
|
|
"loss": 0.665,
|
|
"step": 13060
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.186241954250043e-07,
|
|
"loss": 0.6553,
|
|
"step": 13065
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.179818413039028e-07,
|
|
"loss": 0.6508,
|
|
"step": 13070
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.173395212524306e-07,
|
|
"loss": 0.6393,
|
|
"step": 13075
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.166972355374031e-07,
|
|
"loss": 0.6319,
|
|
"step": 13080
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.160549844256187e-07,
|
|
"loss": 0.6485,
|
|
"step": 13085
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.154127681838642e-07,
|
|
"loss": 0.6805,
|
|
"step": 13090
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.147705870789104e-07,
|
|
"loss": 0.677,
|
|
"step": 13095
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.14128441377513e-07,
|
|
"loss": 0.6738,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.134863313464149e-07,
|
|
"loss": 0.6761,
|
|
"step": 13105
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.128442572523417e-07,
|
|
"loss": 0.6966,
|
|
"step": 13110
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.122022193620068e-07,
|
|
"loss": 0.6435,
|
|
"step": 13115
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.115602179421058e-07,
|
|
"loss": 0.656,
|
|
"step": 13120
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.109182532593213e-07,
|
|
"loss": 0.678,
|
|
"step": 13125
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.102763255803203e-07,
|
|
"loss": 0.6255,
|
|
"step": 13130
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.096344351717527e-07,
|
|
"loss": 0.6929,
|
|
"step": 13135
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.089925823002555e-07,
|
|
"loss": 0.6523,
|
|
"step": 13140
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.083507672324474e-07,
|
|
"loss": 0.6703,
|
|
"step": 13145
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.077089902349338e-07,
|
|
"loss": 0.6668,
|
|
"step": 13150
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.070672515743037e-07,
|
|
"loss": 0.6151,
|
|
"step": 13155
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.064255515171282e-07,
|
|
"loss": 0.6626,
|
|
"step": 13160
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.057838903299656e-07,
|
|
"loss": 0.6921,
|
|
"step": 13165
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 9.05142268279355e-07,
|
|
"loss": 0.6709,
|
|
"step": 13170
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.045006856318215e-07,
|
|
"loss": 0.6411,
|
|
"step": 13175
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.03859142653873e-07,
|
|
"loss": 0.6394,
|
|
"step": 13180
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.03217639612e-07,
|
|
"loss": 0.6713,
|
|
"step": 13185
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.025761767726784e-07,
|
|
"loss": 0.6492,
|
|
"step": 13190
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.019347544023651e-07,
|
|
"loss": 0.6249,
|
|
"step": 13195
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.012933727675023e-07,
|
|
"loss": 0.6649,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.006520321345143e-07,
|
|
"loss": 0.6133,
|
|
"step": 13205
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 9.000107327698078e-07,
|
|
"loss": 0.6659,
|
|
"step": 13210
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.993694749397738e-07,
|
|
"loss": 0.6644,
|
|
"step": 13215
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.987282589107842e-07,
|
|
"loss": 0.6778,
|
|
"step": 13220
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.980870849491954e-07,
|
|
"loss": 0.6317,
|
|
"step": 13225
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.974459533213457e-07,
|
|
"loss": 0.6583,
|
|
"step": 13230
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.968048642935544e-07,
|
|
"loss": 0.6837,
|
|
"step": 13235
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.961638181321257e-07,
|
|
"loss": 0.6398,
|
|
"step": 13240
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.955228151033432e-07,
|
|
"loss": 0.6956,
|
|
"step": 13245
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.948818554734756e-07,
|
|
"loss": 0.736,
|
|
"step": 13250
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.942409395087706e-07,
|
|
"loss": 0.653,
|
|
"step": 13255
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.936000674754592e-07,
|
|
"loss": 0.6775,
|
|
"step": 13260
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.929592396397553e-07,
|
|
"loss": 0.6798,
|
|
"step": 13265
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.923184562678517e-07,
|
|
"loss": 0.656,
|
|
"step": 13270
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.916777176259256e-07,
|
|
"loss": 0.6569,
|
|
"step": 13275
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.91037023980133e-07,
|
|
"loss": 0.6893,
|
|
"step": 13280
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.903963755966129e-07,
|
|
"loss": 0.6301,
|
|
"step": 13285
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.89755772741486e-07,
|
|
"loss": 0.6393,
|
|
"step": 13290
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.891152156808516e-07,
|
|
"loss": 0.6426,
|
|
"step": 13295
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.88474704680793e-07,
|
|
"loss": 0.6431,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.878342400073717e-07,
|
|
"loss": 0.6747,
|
|
"step": 13305
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.871938219266315e-07,
|
|
"loss": 0.6519,
|
|
"step": 13310
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.865534507045973e-07,
|
|
"loss": 0.6479,
|
|
"step": 13315
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.859131266072725e-07,
|
|
"loss": 0.6343,
|
|
"step": 13320
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.852728499006434e-07,
|
|
"loss": 0.7073,
|
|
"step": 13325
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.846326208506743e-07,
|
|
"loss": 0.6508,
|
|
"step": 13330
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.839924397233108e-07,
|
|
"loss": 0.6796,
|
|
"step": 13335
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.8335230678448e-07,
|
|
"loss": 0.6686,
|
|
"step": 13340
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.827122223000856e-07,
|
|
"loss": 0.6708,
|
|
"step": 13345
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.820721865360148e-07,
|
|
"loss": 0.6726,
|
|
"step": 13350
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.814321997581318e-07,
|
|
"loss": 0.6588,
|
|
"step": 13355
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.807922622322815e-07,
|
|
"loss": 0.6799,
|
|
"step": 13360
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.801523742242897e-07,
|
|
"loss": 0.653,
|
|
"step": 13365
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.795125359999586e-07,
|
|
"loss": 0.6911,
|
|
"step": 13370
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.78872747825073e-07,
|
|
"loss": 0.6704,
|
|
"step": 13375
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.782330099653937e-07,
|
|
"loss": 0.6585,
|
|
"step": 13380
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.775933226866636e-07,
|
|
"loss": 0.63,
|
|
"step": 13385
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.76953686254603e-07,
|
|
"loss": 0.6628,
|
|
"step": 13390
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.763141009349104e-07,
|
|
"loss": 0.723,
|
|
"step": 13395
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.756745669932655e-07,
|
|
"loss": 0.6833,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.750350846953234e-07,
|
|
"loss": 0.6724,
|
|
"step": 13405
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.743956543067213e-07,
|
|
"loss": 0.671,
|
|
"step": 13410
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 8.737562760930713e-07,
|
|
"loss": 0.6819,
|
|
"step": 13415
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.731169503199663e-07,
|
|
"loss": 0.6681,
|
|
"step": 13420
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.724776772529775e-07,
|
|
"loss": 0.6503,
|
|
"step": 13425
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.718384571576518e-07,
|
|
"loss": 0.684,
|
|
"step": 13430
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.711992902995171e-07,
|
|
"loss": 0.7102,
|
|
"step": 13435
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.705601769440767e-07,
|
|
"loss": 0.624,
|
|
"step": 13440
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.699211173568128e-07,
|
|
"loss": 0.6489,
|
|
"step": 13445
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.692821118031864e-07,
|
|
"loss": 0.6841,
|
|
"step": 13450
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.686431605486331e-07,
|
|
"loss": 0.6857,
|
|
"step": 13455
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.680042638585694e-07,
|
|
"loss": 0.7022,
|
|
"step": 13460
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.673654219983861e-07,
|
|
"loss": 0.6838,
|
|
"step": 13465
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.667266352334528e-07,
|
|
"loss": 0.7022,
|
|
"step": 13470
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.660879038291169e-07,
|
|
"loss": 0.6553,
|
|
"step": 13475
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.654492280507005e-07,
|
|
"loss": 0.6301,
|
|
"step": 13480
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.648106081635054e-07,
|
|
"loss": 0.6662,
|
|
"step": 13485
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.641720444328075e-07,
|
|
"loss": 0.6445,
|
|
"step": 13490
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.635335371238609e-07,
|
|
"loss": 0.6577,
|
|
"step": 13495
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.628950865018969e-07,
|
|
"loss": 0.647,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.622566928321209e-07,
|
|
"loss": 0.6851,
|
|
"step": 13505
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.616183563797177e-07,
|
|
"loss": 0.6431,
|
|
"step": 13510
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.609800774098452e-07,
|
|
"loss": 0.6909,
|
|
"step": 13515
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.603418561876394e-07,
|
|
"loss": 0.6711,
|
|
"step": 13520
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.597036929782127e-07,
|
|
"loss": 0.7302,
|
|
"step": 13525
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.590655880466511e-07,
|
|
"loss": 0.6841,
|
|
"step": 13530
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.584275416580194e-07,
|
|
"loss": 0.6897,
|
|
"step": 13535
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.577895540773552e-07,
|
|
"loss": 0.6478,
|
|
"step": 13540
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.571516255696738e-07,
|
|
"loss": 0.6627,
|
|
"step": 13545
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.565137563999646e-07,
|
|
"loss": 0.677,
|
|
"step": 13550
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.55875946833193e-07,
|
|
"loss": 0.6428,
|
|
"step": 13555
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.552381971343003e-07,
|
|
"loss": 0.6605,
|
|
"step": 13560
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.546005075682012e-07,
|
|
"loss": 0.6421,
|
|
"step": 13565
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.53962878399787e-07,
|
|
"loss": 0.6669,
|
|
"step": 13570
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.53325309893923e-07,
|
|
"loss": 0.6713,
|
|
"step": 13575
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.526878023154494e-07,
|
|
"loss": 0.696,
|
|
"step": 13580
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.520503559291823e-07,
|
|
"loss": 0.6294,
|
|
"step": 13585
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.514129709999103e-07,
|
|
"loss": 0.6732,
|
|
"step": 13590
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.507756477923982e-07,
|
|
"loss": 0.6502,
|
|
"step": 13595
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.501383865713839e-07,
|
|
"loss": 0.6475,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"eval_loss": 0.6325117349624634,
|
|
"eval_runtime": 139.0789,
|
|
"eval_samples_per_second": 17.012,
|
|
"eval_steps_per_second": 2.84,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.495011876015805e-07,
|
|
"loss": 0.6818,
|
|
"step": 13605
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.488640511476757e-07,
|
|
"loss": 0.6347,
|
|
"step": 13610
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.482269774743291e-07,
|
|
"loss": 0.653,
|
|
"step": 13615
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.475899668461765e-07,
|
|
"loss": 0.6532,
|
|
"step": 13620
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.469530195278261e-07,
|
|
"loss": 0.6745,
|
|
"step": 13625
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.4631613578386e-07,
|
|
"loss": 0.6486,
|
|
"step": 13630
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.456793158788354e-07,
|
|
"loss": 0.6595,
|
|
"step": 13635
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.450425600772802e-07,
|
|
"loss": 0.6835,
|
|
"step": 13640
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.444058686436987e-07,
|
|
"loss": 0.6539,
|
|
"step": 13645
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.437692418425657e-07,
|
|
"loss": 0.6733,
|
|
"step": 13650
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.431326799383309e-07,
|
|
"loss": 0.6533,
|
|
"step": 13655
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 8.424961831954174e-07,
|
|
"loss": 0.6535,
|
|
"step": 13660
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.418597518782189e-07,
|
|
"loss": 0.6637,
|
|
"step": 13665
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.412233862511048e-07,
|
|
"loss": 0.661,
|
|
"step": 13670
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.40587086578415e-07,
|
|
"loss": 0.6421,
|
|
"step": 13675
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.399508531244632e-07,
|
|
"loss": 0.715,
|
|
"step": 13680
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.39314686153535e-07,
|
|
"loss": 0.7132,
|
|
"step": 13685
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.386785859298885e-07,
|
|
"loss": 0.6643,
|
|
"step": 13690
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.380425527177551e-07,
|
|
"loss": 0.6568,
|
|
"step": 13695
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.374065867813365e-07,
|
|
"loss": 0.65,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.36770688384808e-07,
|
|
"loss": 0.691,
|
|
"step": 13705
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.361348577923158e-07,
|
|
"loss": 0.6494,
|
|
"step": 13710
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.354990952679784e-07,
|
|
"loss": 0.6998,
|
|
"step": 13715
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.348634010758869e-07,
|
|
"loss": 0.6635,
|
|
"step": 13720
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.342277754801021e-07,
|
|
"loss": 0.6726,
|
|
"step": 13725
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.33592218744658e-07,
|
|
"loss": 0.6538,
|
|
"step": 13730
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.329567311335588e-07,
|
|
"loss": 0.6708,
|
|
"step": 13735
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.323213129107805e-07,
|
|
"loss": 0.6403,
|
|
"step": 13740
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.316859643402714e-07,
|
|
"loss": 0.6683,
|
|
"step": 13745
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.310506856859485e-07,
|
|
"loss": 0.6569,
|
|
"step": 13750
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.304154772117017e-07,
|
|
"loss": 0.659,
|
|
"step": 13755
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.297803391813908e-07,
|
|
"loss": 0.6665,
|
|
"step": 13760
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.291452718588463e-07,
|
|
"loss": 0.6852,
|
|
"step": 13765
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.285102755078708e-07,
|
|
"loss": 0.625,
|
|
"step": 13770
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.278753503922351e-07,
|
|
"loss": 0.607,
|
|
"step": 13775
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.272404967756821e-07,
|
|
"loss": 0.6543,
|
|
"step": 13780
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.266057149219242e-07,
|
|
"loss": 0.6624,
|
|
"step": 13785
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.259710050946443e-07,
|
|
"loss": 0.6695,
|
|
"step": 13790
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.253363675574959e-07,
|
|
"loss": 0.6375,
|
|
"step": 13795
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.24701802574101e-07,
|
|
"loss": 0.6712,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.240673104080533e-07,
|
|
"loss": 0.6653,
|
|
"step": 13805
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.234328913229145e-07,
|
|
"loss": 0.6578,
|
|
"step": 13810
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.227985455822174e-07,
|
|
"loss": 0.697,
|
|
"step": 13815
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.221642734494631e-07,
|
|
"loss": 0.6273,
|
|
"step": 13820
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.215300751881233e-07,
|
|
"loss": 0.6841,
|
|
"step": 13825
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.208959510616384e-07,
|
|
"loss": 0.6758,
|
|
"step": 13830
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.202619013334176e-07,
|
|
"loss": 0.6531,
|
|
"step": 13835
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.1962792626684e-07,
|
|
"loss": 0.679,
|
|
"step": 13840
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.189940261252531e-07,
|
|
"loss": 0.6685,
|
|
"step": 13845
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.183602011719736e-07,
|
|
"loss": 0.6558,
|
|
"step": 13850
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.177264516702873e-07,
|
|
"loss": 0.6132,
|
|
"step": 13855
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.170927778834476e-07,
|
|
"loss": 0.6571,
|
|
"step": 13860
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.164591800746774e-07,
|
|
"loss": 0.685,
|
|
"step": 13865
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.158256585071674e-07,
|
|
"loss": 0.6806,
|
|
"step": 13870
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.151922134440774e-07,
|
|
"loss": 0.6552,
|
|
"step": 13875
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.145588451485347e-07,
|
|
"loss": 0.6927,
|
|
"step": 13880
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.139255538836351e-07,
|
|
"loss": 0.6594,
|
|
"step": 13885
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.132923399124424e-07,
|
|
"loss": 0.6392,
|
|
"step": 13890
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.126592034979878e-07,
|
|
"loss": 0.6697,
|
|
"step": 13895
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.120261449032706e-07,
|
|
"loss": 0.6549,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 8.113931643912589e-07,
|
|
"loss": 0.6544,
|
|
"step": 13905
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.107602622248859e-07,
|
|
"loss": 0.6528,
|
|
"step": 13910
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.101274386670544e-07,
|
|
"loss": 0.6406,
|
|
"step": 13915
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.094946939806336e-07,
|
|
"loss": 0.6755,
|
|
"step": 13920
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.0886202842846e-07,
|
|
"loss": 0.6685,
|
|
"step": 13925
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.082294422733382e-07,
|
|
"loss": 0.6375,
|
|
"step": 13930
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.075969357780379e-07,
|
|
"loss": 0.6974,
|
|
"step": 13935
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.069645092052975e-07,
|
|
"loss": 0.6379,
|
|
"step": 13940
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.06332162817821e-07,
|
|
"loss": 0.6283,
|
|
"step": 13945
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.056998968782797e-07,
|
|
"loss": 0.6546,
|
|
"step": 13950
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.050677116493121e-07,
|
|
"loss": 0.6807,
|
|
"step": 13955
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.044356073935214e-07,
|
|
"loss": 0.6607,
|
|
"step": 13960
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.038035843734791e-07,
|
|
"loss": 0.6562,
|
|
"step": 13965
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.031716428517212e-07,
|
|
"loss": 0.6787,
|
|
"step": 13970
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.025397830907515e-07,
|
|
"loss": 0.6575,
|
|
"step": 13975
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.019080053530385e-07,
|
|
"loss": 0.6418,
|
|
"step": 13980
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.012763099010172e-07,
|
|
"loss": 0.6619,
|
|
"step": 13985
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.00644696997089e-07,
|
|
"loss": 0.6564,
|
|
"step": 13990
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 8.000131669036196e-07,
|
|
"loss": 0.6619,
|
|
"step": 13995
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.993817198829419e-07,
|
|
"loss": 0.6472,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.987503561973527e-07,
|
|
"loss": 0.658,
|
|
"step": 14005
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.981190761091156e-07,
|
|
"loss": 0.6977,
|
|
"step": 14010
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.974878798804587e-07,
|
|
"loss": 0.6398,
|
|
"step": 14015
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.968567677735752e-07,
|
|
"loss": 0.6633,
|
|
"step": 14020
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.962257400506241e-07,
|
|
"loss": 0.6571,
|
|
"step": 14025
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.95594796973728e-07,
|
|
"loss": 0.6622,
|
|
"step": 14030
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.949639388049758e-07,
|
|
"loss": 0.6876,
|
|
"step": 14035
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.943331658064206e-07,
|
|
"loss": 0.6471,
|
|
"step": 14040
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.937024782400793e-07,
|
|
"loss": 0.6671,
|
|
"step": 14045
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.930718763679349e-07,
|
|
"loss": 0.6538,
|
|
"step": 14050
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.924413604519333e-07,
|
|
"loss": 0.6854,
|
|
"step": 14055
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.918109307539856e-07,
|
|
"loss": 0.7438,
|
|
"step": 14060
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.911805875359667e-07,
|
|
"loss": 0.6835,
|
|
"step": 14065
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.905503310597159e-07,
|
|
"loss": 0.6572,
|
|
"step": 14070
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.899201615870361e-07,
|
|
"loss": 0.6816,
|
|
"step": 14075
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.892900793796941e-07,
|
|
"loss": 0.6017,
|
|
"step": 14080
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.886600846994207e-07,
|
|
"loss": 0.6606,
|
|
"step": 14085
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.880301778079104e-07,
|
|
"loss": 0.6908,
|
|
"step": 14090
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.874003589668207e-07,
|
|
"loss": 0.6787,
|
|
"step": 14095
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.867706284377731e-07,
|
|
"loss": 0.6872,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.86140986482352e-07,
|
|
"loss": 0.6812,
|
|
"step": 14105
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.855114333621055e-07,
|
|
"loss": 0.6552,
|
|
"step": 14110
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.848819693385443e-07,
|
|
"loss": 0.6809,
|
|
"step": 14115
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.84252594673142e-07,
|
|
"loss": 0.6787,
|
|
"step": 14120
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.83623309627336e-07,
|
|
"loss": 0.6638,
|
|
"step": 14125
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.829941144625251e-07,
|
|
"loss": 0.6821,
|
|
"step": 14130
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.823650094400722e-07,
|
|
"loss": 0.6569,
|
|
"step": 14135
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.817359948213014e-07,
|
|
"loss": 0.6722,
|
|
"step": 14140
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.811070708675005e-07,
|
|
"loss": 0.6677,
|
|
"step": 14145
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.804782378399188e-07,
|
|
"loss": 0.6305,
|
|
"step": 14150
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 7.798494959997679e-07,
|
|
"loss": 0.6604,
|
|
"step": 14155
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.79220845608222e-07,
|
|
"loss": 0.6459,
|
|
"step": 14160
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.785922869264167e-07,
|
|
"loss": 0.6571,
|
|
"step": 14165
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.779638202154498e-07,
|
|
"loss": 0.6593,
|
|
"step": 14170
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.773354457363814e-07,
|
|
"loss": 0.672,
|
|
"step": 14175
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.767071637502323e-07,
|
|
"loss": 0.6653,
|
|
"step": 14180
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.760789745179857e-07,
|
|
"loss": 0.6623,
|
|
"step": 14185
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.754508783005855e-07,
|
|
"loss": 0.6597,
|
|
"step": 14190
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.748228753589377e-07,
|
|
"loss": 0.6594,
|
|
"step": 14195
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.741949659539094e-07,
|
|
"loss": 0.6507,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.735671503463283e-07,
|
|
"loss": 0.6891,
|
|
"step": 14205
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.729394287969842e-07,
|
|
"loss": 0.7177,
|
|
"step": 14210
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.723118015666264e-07,
|
|
"loss": 0.6398,
|
|
"step": 14215
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.716842689159662e-07,
|
|
"loss": 0.689,
|
|
"step": 14220
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.710568311056754e-07,
|
|
"loss": 0.6272,
|
|
"step": 14225
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.704294883963858e-07,
|
|
"loss": 0.6754,
|
|
"step": 14230
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.698022410486904e-07,
|
|
"loss": 0.6399,
|
|
"step": 14235
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.69175089323142e-07,
|
|
"loss": 0.6666,
|
|
"step": 14240
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.685480334802543e-07,
|
|
"loss": 0.6878,
|
|
"step": 14245
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.679210737805005e-07,
|
|
"loss": 0.6787,
|
|
"step": 14250
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.672942104843145e-07,
|
|
"loss": 0.6784,
|
|
"step": 14255
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.666674438520898e-07,
|
|
"loss": 0.657,
|
|
"step": 14260
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.660407741441797e-07,
|
|
"loss": 0.6817,
|
|
"step": 14265
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.654142016208976e-07,
|
|
"loss": 0.6439,
|
|
"step": 14270
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.647877265425157e-07,
|
|
"loss": 0.7055,
|
|
"step": 14275
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.641613491692669e-07,
|
|
"loss": 0.6893,
|
|
"step": 14280
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.635350697613429e-07,
|
|
"loss": 0.6435,
|
|
"step": 14285
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.629088885788942e-07,
|
|
"loss": 0.6568,
|
|
"step": 14290
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.622828058820315e-07,
|
|
"loss": 0.6865,
|
|
"step": 14295
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.616568219308239e-07,
|
|
"loss": 0.6944,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.610309369852996e-07,
|
|
"loss": 0.6978,
|
|
"step": 14305
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.604051513054462e-07,
|
|
"loss": 0.6665,
|
|
"step": 14310
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.597794651512092e-07,
|
|
"loss": 0.6395,
|
|
"step": 14315
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.591538787824933e-07,
|
|
"loss": 0.6756,
|
|
"step": 14320
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.585283924591616e-07,
|
|
"loss": 0.6442,
|
|
"step": 14325
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.579030064410357e-07,
|
|
"loss": 0.6175,
|
|
"step": 14330
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.572777209878958e-07,
|
|
"loss": 0.6561,
|
|
"step": 14335
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.566525363594796e-07,
|
|
"loss": 0.6895,
|
|
"step": 14340
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.560274528154836e-07,
|
|
"loss": 0.6491,
|
|
"step": 14345
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.55402470615562e-07,
|
|
"loss": 0.7044,
|
|
"step": 14350
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.547775900193267e-07,
|
|
"loss": 0.6438,
|
|
"step": 14355
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.541528112863484e-07,
|
|
"loss": 0.709,
|
|
"step": 14360
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.53528134676154e-07,
|
|
"loss": 0.6684,
|
|
"step": 14365
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.529035604482292e-07,
|
|
"loss": 0.6646,
|
|
"step": 14370
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.522790888620165e-07,
|
|
"loss": 0.6361,
|
|
"step": 14375
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.516547201769159e-07,
|
|
"loss": 0.6411,
|
|
"step": 14380
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.51030454652285e-07,
|
|
"loss": 0.6162,
|
|
"step": 14385
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.504062925474378e-07,
|
|
"loss": 0.6916,
|
|
"step": 14390
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.497822341216465e-07,
|
|
"loss": 0.6986,
|
|
"step": 14395
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 7.491582796341388e-07,
|
|
"loss": 0.6316,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"eval_loss": 0.629960834980011,
|
|
"eval_runtime": 143.0305,
|
|
"eval_samples_per_second": 16.542,
|
|
"eval_steps_per_second": 2.762,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.485344293441006e-07,
|
|
"loss": 0.7091,
|
|
"step": 14405
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.479106835106733e-07,
|
|
"loss": 0.6732,
|
|
"step": 14410
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.472870423929558e-07,
|
|
"loss": 0.6886,
|
|
"step": 14415
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.466635062500035e-07,
|
|
"loss": 0.6929,
|
|
"step": 14420
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.460400753408274e-07,
|
|
"loss": 0.6848,
|
|
"step": 14425
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.454167499243955e-07,
|
|
"loss": 0.6989,
|
|
"step": 14430
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.447935302596317e-07,
|
|
"loss": 0.656,
|
|
"step": 14435
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.441704166054159e-07,
|
|
"loss": 0.6596,
|
|
"step": 14440
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.435474092205844e-07,
|
|
"loss": 0.6879,
|
|
"step": 14445
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.429245083639288e-07,
|
|
"loss": 0.6734,
|
|
"step": 14450
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.42301714294197e-07,
|
|
"loss": 0.6389,
|
|
"step": 14455
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.416790272700919e-07,
|
|
"loss": 0.6627,
|
|
"step": 14460
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.410564475502723e-07,
|
|
"loss": 0.6592,
|
|
"step": 14465
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.404339753933526e-07,
|
|
"loss": 0.6102,
|
|
"step": 14470
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.398116110579022e-07,
|
|
"loss": 0.6535,
|
|
"step": 14475
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.39189354802446e-07,
|
|
"loss": 0.6776,
|
|
"step": 14480
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.385672068854635e-07,
|
|
"loss": 0.6815,
|
|
"step": 14485
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.379451675653897e-07,
|
|
"loss": 0.6762,
|
|
"step": 14490
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.373232371006146e-07,
|
|
"loss": 0.6405,
|
|
"step": 14495
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.367014157494822e-07,
|
|
"loss": 0.6582,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.360797037702922e-07,
|
|
"loss": 0.7052,
|
|
"step": 14505
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.354581014212978e-07,
|
|
"loss": 0.7036,
|
|
"step": 14510
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.348366089607077e-07,
|
|
"loss": 0.6619,
|
|
"step": 14515
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.342152266466842e-07,
|
|
"loss": 0.6511,
|
|
"step": 14520
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.335939547373441e-07,
|
|
"loss": 0.6736,
|
|
"step": 14525
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.329727934907586e-07,
|
|
"loss": 0.6553,
|
|
"step": 14530
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.323517431649524e-07,
|
|
"loss": 0.6332,
|
|
"step": 14535
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.317308040179045e-07,
|
|
"loss": 0.6236,
|
|
"step": 14540
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.311099763075477e-07,
|
|
"loss": 0.6322,
|
|
"step": 14545
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.304892602917681e-07,
|
|
"loss": 0.6703,
|
|
"step": 14550
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.298686562284064e-07,
|
|
"loss": 0.6805,
|
|
"step": 14555
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.292481643752553e-07,
|
|
"loss": 0.6194,
|
|
"step": 14560
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.286277849900626e-07,
|
|
"loss": 0.677,
|
|
"step": 14565
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.280075183305276e-07,
|
|
"loss": 0.655,
|
|
"step": 14570
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.273873646543043e-07,
|
|
"loss": 0.7065,
|
|
"step": 14575
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.267673242189991e-07,
|
|
"loss": 0.6416,
|
|
"step": 14580
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.261473972821712e-07,
|
|
"loss": 0.623,
|
|
"step": 14585
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.25527584101333e-07,
|
|
"loss": 0.6771,
|
|
"step": 14590
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.249078849339492e-07,
|
|
"loss": 0.6588,
|
|
"step": 14595
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.242883000374378e-07,
|
|
"loss": 0.7181,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.23668829669169e-07,
|
|
"loss": 0.7161,
|
|
"step": 14605
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.23049474086465e-07,
|
|
"loss": 0.6712,
|
|
"step": 14610
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.224302335466013e-07,
|
|
"loss": 0.6546,
|
|
"step": 14615
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.218111083068044e-07,
|
|
"loss": 0.6682,
|
|
"step": 14620
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.211920986242539e-07,
|
|
"loss": 0.6289,
|
|
"step": 14625
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.205732047560813e-07,
|
|
"loss": 0.6735,
|
|
"step": 14630
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.199544269593692e-07,
|
|
"loss": 0.6742,
|
|
"step": 14635
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.193357654911529e-07,
|
|
"loss": 0.663,
|
|
"step": 14640
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 7.187172206084186e-07,
|
|
"loss": 0.6653,
|
|
"step": 14645
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.180987925681047e-07,
|
|
"loss": 0.633,
|
|
"step": 14650
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.174804816271012e-07,
|
|
"loss": 0.6679,
|
|
"step": 14655
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.168622880422484e-07,
|
|
"loss": 0.6849,
|
|
"step": 14660
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.162442120703389e-07,
|
|
"loss": 0.6585,
|
|
"step": 14665
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.15626253968116e-07,
|
|
"loss": 0.6842,
|
|
"step": 14670
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.150084139922745e-07,
|
|
"loss": 0.6745,
|
|
"step": 14675
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.14390692399459e-07,
|
|
"loss": 0.6614,
|
|
"step": 14680
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.137730894462662e-07,
|
|
"loss": 0.6682,
|
|
"step": 14685
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.131556053892431e-07,
|
|
"loss": 0.6302,
|
|
"step": 14690
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.125382404848867e-07,
|
|
"loss": 0.6688,
|
|
"step": 14695
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.119209949896456e-07,
|
|
"loss": 0.6558,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.113038691599178e-07,
|
|
"loss": 0.6402,
|
|
"step": 14705
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.10686863252052e-07,
|
|
"loss": 0.6774,
|
|
"step": 14710
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.100699775223476e-07,
|
|
"loss": 0.6652,
|
|
"step": 14715
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.094532122270528e-07,
|
|
"loss": 0.6132,
|
|
"step": 14720
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.088365676223671e-07,
|
|
"loss": 0.6525,
|
|
"step": 14725
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.08220043964439e-07,
|
|
"loss": 0.6803,
|
|
"step": 14730
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.076036415093673e-07,
|
|
"loss": 0.6476,
|
|
"step": 14735
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.069873605132002e-07,
|
|
"loss": 0.6711,
|
|
"step": 14740
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.063712012319352e-07,
|
|
"loss": 0.6534,
|
|
"step": 14745
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.057551639215198e-07,
|
|
"loss": 0.6575,
|
|
"step": 14750
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.051392488378503e-07,
|
|
"loss": 0.6665,
|
|
"step": 14755
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.045234562367726e-07,
|
|
"loss": 0.6546,
|
|
"step": 14760
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.039077863740817e-07,
|
|
"loss": 0.6307,
|
|
"step": 14765
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.032922395055215e-07,
|
|
"loss": 0.6717,
|
|
"step": 14770
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.026768158867847e-07,
|
|
"loss": 0.7066,
|
|
"step": 14775
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.020615157735126e-07,
|
|
"loss": 0.646,
|
|
"step": 14780
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.014463394212959e-07,
|
|
"loss": 0.6559,
|
|
"step": 14785
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.008312870856738e-07,
|
|
"loss": 0.6847,
|
|
"step": 14790
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 7.00216359022133e-07,
|
|
"loss": 0.6717,
|
|
"step": 14795
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.9960155548611e-07,
|
|
"loss": 0.6857,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.989868767329882e-07,
|
|
"loss": 0.6655,
|
|
"step": 14805
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.983723230181e-07,
|
|
"loss": 0.6216,
|
|
"step": 14810
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.97757894596726e-07,
|
|
"loss": 0.7093,
|
|
"step": 14815
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.971435917240939e-07,
|
|
"loss": 0.6597,
|
|
"step": 14820
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.965294146553802e-07,
|
|
"loss": 0.6385,
|
|
"step": 14825
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.959153636457085e-07,
|
|
"loss": 0.6444,
|
|
"step": 14830
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.953014389501504e-07,
|
|
"loss": 0.6662,
|
|
"step": 14835
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.946876408237245e-07,
|
|
"loss": 0.6718,
|
|
"step": 14840
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.940739695213976e-07,
|
|
"loss": 0.6337,
|
|
"step": 14845
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.934604252980833e-07,
|
|
"loss": 0.6349,
|
|
"step": 14850
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.928470084086424e-07,
|
|
"loss": 0.6827,
|
|
"step": 14855
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.922337191078835e-07,
|
|
"loss": 0.6323,
|
|
"step": 14860
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.916205576505607e-07,
|
|
"loss": 0.6618,
|
|
"step": 14865
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.910075242913767e-07,
|
|
"loss": 0.7118,
|
|
"step": 14870
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.903946192849801e-07,
|
|
"loss": 0.6686,
|
|
"step": 14875
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.897818428859659e-07,
|
|
"loss": 0.6446,
|
|
"step": 14880
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.891691953488767e-07,
|
|
"loss": 0.7074,
|
|
"step": 14885
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 6.885566769282003e-07,
|
|
"loss": 0.6567,
|
|
"step": 14890
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.87944287878372e-07,
|
|
"loss": 0.6953,
|
|
"step": 14895
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.873320284537729e-07,
|
|
"loss": 0.644,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.8671989890873e-07,
|
|
"loss": 0.6672,
|
|
"step": 14905
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.861078994975167e-07,
|
|
"loss": 0.6495,
|
|
"step": 14910
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.85496030474352e-07,
|
|
"loss": 0.634,
|
|
"step": 14915
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.848842920934012e-07,
|
|
"loss": 0.6863,
|
|
"step": 14920
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.842726846087754e-07,
|
|
"loss": 0.688,
|
|
"step": 14925
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.836612082745305e-07,
|
|
"loss": 0.6858,
|
|
"step": 14930
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.830498633446687e-07,
|
|
"loss": 0.6644,
|
|
"step": 14935
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.824386500731373e-07,
|
|
"loss": 0.6457,
|
|
"step": 14940
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.81827568713829e-07,
|
|
"loss": 0.661,
|
|
"step": 14945
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.812166195205818e-07,
|
|
"loss": 0.6476,
|
|
"step": 14950
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.806058027471784e-07,
|
|
"loss": 0.6875,
|
|
"step": 14955
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.799951186473472e-07,
|
|
"loss": 0.6149,
|
|
"step": 14960
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.793845674747606e-07,
|
|
"loss": 0.6324,
|
|
"step": 14965
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.787741494830365e-07,
|
|
"loss": 0.6664,
|
|
"step": 14970
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.781638649257373e-07,
|
|
"loss": 0.6524,
|
|
"step": 14975
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.775537140563696e-07,
|
|
"loss": 0.6808,
|
|
"step": 14980
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.769436971283852e-07,
|
|
"loss": 0.6352,
|
|
"step": 14985
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.763338143951793e-07,
|
|
"loss": 0.6175,
|
|
"step": 14990
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.757240661100925e-07,
|
|
"loss": 0.6742,
|
|
"step": 14995
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.751144525264083e-07,
|
|
"loss": 0.6516,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.745049738973552e-07,
|
|
"loss": 0.6177,
|
|
"step": 15005
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.738956304761054e-07,
|
|
"loss": 0.6248,
|
|
"step": 15010
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.732864225157747e-07,
|
|
"loss": 0.6683,
|
|
"step": 15015
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.726773502694231e-07,
|
|
"loss": 0.6681,
|
|
"step": 15020
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.720684139900533e-07,
|
|
"loss": 0.6574,
|
|
"step": 15025
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.714596139306125e-07,
|
|
"loss": 0.6806,
|
|
"step": 15030
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.708509503439912e-07,
|
|
"loss": 0.6361,
|
|
"step": 15035
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.702424234830227e-07,
|
|
"loss": 0.6768,
|
|
"step": 15040
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.696340336004837e-07,
|
|
"loss": 0.6917,
|
|
"step": 15045
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.690257809490941e-07,
|
|
"loss": 0.6971,
|
|
"step": 15050
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.684176657815166e-07,
|
|
"loss": 0.6592,
|
|
"step": 15055
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.678096883503575e-07,
|
|
"loss": 0.6655,
|
|
"step": 15060
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.672018489081648e-07,
|
|
"loss": 0.6655,
|
|
"step": 15065
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.6659414770743e-07,
|
|
"loss": 0.6384,
|
|
"step": 15070
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.659865850005868e-07,
|
|
"loss": 0.6695,
|
|
"step": 15075
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.653791610400112e-07,
|
|
"loss": 0.6991,
|
|
"step": 15080
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.647718760780226e-07,
|
|
"loss": 0.655,
|
|
"step": 15085
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.641647303668809e-07,
|
|
"loss": 0.6917,
|
|
"step": 15090
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.6355772415879e-07,
|
|
"loss": 0.6641,
|
|
"step": 15095
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.629508577058945e-07,
|
|
"loss": 0.6691,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.623441312602817e-07,
|
|
"loss": 0.6896,
|
|
"step": 15105
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.617375450739802e-07,
|
|
"loss": 0.6577,
|
|
"step": 15110
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.611310993989607e-07,
|
|
"loss": 0.6675,
|
|
"step": 15115
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.60524794487136e-07,
|
|
"loss": 0.6525,
|
|
"step": 15120
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.59918630590359e-07,
|
|
"loss": 0.7091,
|
|
"step": 15125
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.593126079604256e-07,
|
|
"loss": 0.6151,
|
|
"step": 15130
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.587067268490721e-07,
|
|
"loss": 0.6501,
|
|
"step": 15135
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 6.581009875079759e-07,
|
|
"loss": 0.5938,
|
|
"step": 15140
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.574953901887567e-07,
|
|
"loss": 0.6959,
|
|
"step": 15145
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.568899351429736e-07,
|
|
"loss": 0.6624,
|
|
"step": 15150
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.562846226221276e-07,
|
|
"loss": 0.6503,
|
|
"step": 15155
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.556794528776601e-07,
|
|
"loss": 0.6596,
|
|
"step": 15160
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.550744261609538e-07,
|
|
"loss": 0.6702,
|
|
"step": 15165
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.544695427233312e-07,
|
|
"loss": 0.6532,
|
|
"step": 15170
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.538648028160556e-07,
|
|
"loss": 0.6611,
|
|
"step": 15175
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.532602066903309e-07,
|
|
"loss": 0.6825,
|
|
"step": 15180
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.526557545973007e-07,
|
|
"loss": 0.6508,
|
|
"step": 15185
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.520514467880495e-07,
|
|
"loss": 0.6423,
|
|
"step": 15190
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.514472835136015e-07,
|
|
"loss": 0.6711,
|
|
"step": 15195
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.508432650249206e-07,
|
|
"loss": 0.6649,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"eval_loss": 0.6282991766929626,
|
|
"eval_runtime": 145.3768,
|
|
"eval_samples_per_second": 16.275,
|
|
"eval_steps_per_second": 2.717,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.502393915729112e-07,
|
|
"loss": 0.6628,
|
|
"step": 15205
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.496356634084167e-07,
|
|
"loss": 0.6933,
|
|
"step": 15210
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.490320807822207e-07,
|
|
"loss": 0.6308,
|
|
"step": 15215
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.484286439450464e-07,
|
|
"loss": 0.6538,
|
|
"step": 15220
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.478253531475557e-07,
|
|
"loss": 0.6549,
|
|
"step": 15225
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.472222086403509e-07,
|
|
"loss": 0.6538,
|
|
"step": 15230
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.466192106739725e-07,
|
|
"loss": 0.6725,
|
|
"step": 15235
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.46016359498901e-07,
|
|
"loss": 0.6694,
|
|
"step": 15240
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.454136553655553e-07,
|
|
"loss": 0.6556,
|
|
"step": 15245
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.448110985242934e-07,
|
|
"loss": 0.6585,
|
|
"step": 15250
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.442086892254126e-07,
|
|
"loss": 0.6255,
|
|
"step": 15255
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.43606427719148e-07,
|
|
"loss": 0.6198,
|
|
"step": 15260
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.43004314255674e-07,
|
|
"loss": 0.647,
|
|
"step": 15265
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.424023490851031e-07,
|
|
"loss": 0.6542,
|
|
"step": 15270
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.418005324574867e-07,
|
|
"loss": 0.6648,
|
|
"step": 15275
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.411988646228142e-07,
|
|
"loss": 0.6717,
|
|
"step": 15280
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.40597345831013e-07,
|
|
"loss": 0.6903,
|
|
"step": 15285
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.399959763319487e-07,
|
|
"loss": 0.6067,
|
|
"step": 15290
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.393947563754251e-07,
|
|
"loss": 0.6792,
|
|
"step": 15295
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.387936862111838e-07,
|
|
"loss": 0.6527,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.381927660889042e-07,
|
|
"loss": 0.6708,
|
|
"step": 15305
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.375919962582028e-07,
|
|
"loss": 0.6397,
|
|
"step": 15310
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.369913769686349e-07,
|
|
"loss": 0.6297,
|
|
"step": 15315
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.363909084696916e-07,
|
|
"loss": 0.6562,
|
|
"step": 15320
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.35790591010803e-07,
|
|
"loss": 0.6346,
|
|
"step": 15325
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.351904248413359e-07,
|
|
"loss": 0.6526,
|
|
"step": 15330
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.345904102105934e-07,
|
|
"loss": 0.6721,
|
|
"step": 15335
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.339905473678171e-07,
|
|
"loss": 0.6165,
|
|
"step": 15340
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.333908365621842e-07,
|
|
"loss": 0.6632,
|
|
"step": 15345
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.327912780428095e-07,
|
|
"loss": 0.6743,
|
|
"step": 15350
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.321918720587449e-07,
|
|
"loss": 0.6507,
|
|
"step": 15355
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.315926188589776e-07,
|
|
"loss": 0.6477,
|
|
"step": 15360
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.309935186924329e-07,
|
|
"loss": 0.6308,
|
|
"step": 15365
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.303945718079713e-07,
|
|
"loss": 0.6621,
|
|
"step": 15370
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.297957784543903e-07,
|
|
"loss": 0.6398,
|
|
"step": 15375
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.291971388804238e-07,
|
|
"loss": 0.668,
|
|
"step": 15380
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 6.28598653334741e-07,
|
|
"loss": 0.6778,
|
|
"step": 15385
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.280003220659476e-07,
|
|
"loss": 0.6458,
|
|
"step": 15390
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.274021453225854e-07,
|
|
"loss": 0.6483,
|
|
"step": 15395
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.268041233531321e-07,
|
|
"loss": 0.6463,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.262062564059999e-07,
|
|
"loss": 0.7068,
|
|
"step": 15405
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.256085447295383e-07,
|
|
"loss": 0.645,
|
|
"step": 15410
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.250109885720316e-07,
|
|
"loss": 0.6908,
|
|
"step": 15415
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.24413588181699e-07,
|
|
"loss": 0.6441,
|
|
"step": 15420
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.238163438066959e-07,
|
|
"loss": 0.6636,
|
|
"step": 15425
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.232192556951116e-07,
|
|
"loss": 0.6329,
|
|
"step": 15430
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.22622324094972e-07,
|
|
"loss": 0.6181,
|
|
"step": 15435
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.220255492542374e-07,
|
|
"loss": 0.6664,
|
|
"step": 15440
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.214289314208023e-07,
|
|
"loss": 0.6294,
|
|
"step": 15445
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.208324708424975e-07,
|
|
"loss": 0.6518,
|
|
"step": 15450
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.202361677670861e-07,
|
|
"loss": 0.7155,
|
|
"step": 15455
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.196400224422682e-07,
|
|
"loss": 0.6748,
|
|
"step": 15460
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.190440351156776e-07,
|
|
"loss": 0.655,
|
|
"step": 15465
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.184482060348815e-07,
|
|
"loss": 0.7019,
|
|
"step": 15470
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.178525354473829e-07,
|
|
"loss": 0.6503,
|
|
"step": 15475
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.172570236006173e-07,
|
|
"loss": 0.6535,
|
|
"step": 15480
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.166616707419556e-07,
|
|
"loss": 0.6511,
|
|
"step": 15485
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.160664771187025e-07,
|
|
"loss": 0.672,
|
|
"step": 15490
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.154714429780958e-07,
|
|
"loss": 0.6731,
|
|
"step": 15495
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.148765685673078e-07,
|
|
"loss": 0.6348,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.142818541334438e-07,
|
|
"loss": 0.6413,
|
|
"step": 15505
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.136872999235431e-07,
|
|
"loss": 0.6691,
|
|
"step": 15510
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.130929061845789e-07,
|
|
"loss": 0.6727,
|
|
"step": 15515
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.124986731634565e-07,
|
|
"loss": 0.6383,
|
|
"step": 15520
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.119046011070156e-07,
|
|
"loss": 0.6822,
|
|
"step": 15525
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.113106902620282e-07,
|
|
"loss": 0.6817,
|
|
"step": 15530
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.107169408752003e-07,
|
|
"loss": 0.6315,
|
|
"step": 15535
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.101233531931688e-07,
|
|
"loss": 0.6421,
|
|
"step": 15540
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.095299274625064e-07,
|
|
"loss": 0.6542,
|
|
"step": 15545
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.089366639297166e-07,
|
|
"loss": 0.6401,
|
|
"step": 15550
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.083435628412355e-07,
|
|
"loss": 0.6219,
|
|
"step": 15555
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.077506244434328e-07,
|
|
"loss": 0.676,
|
|
"step": 15560
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.07157848982609e-07,
|
|
"loss": 0.6691,
|
|
"step": 15565
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.065652367049985e-07,
|
|
"loss": 0.6419,
|
|
"step": 15570
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.059727878567675e-07,
|
|
"loss": 0.6513,
|
|
"step": 15575
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.053805026840136e-07,
|
|
"loss": 0.6663,
|
|
"step": 15580
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.047883814327675e-07,
|
|
"loss": 0.6643,
|
|
"step": 15585
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.041964243489901e-07,
|
|
"loss": 0.6548,
|
|
"step": 15590
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.036046316785761e-07,
|
|
"loss": 0.6474,
|
|
"step": 15595
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.030130036673514e-07,
|
|
"loss": 0.6343,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.024215405610722e-07,
|
|
"loss": 0.6524,
|
|
"step": 15605
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.018302426054278e-07,
|
|
"loss": 0.6536,
|
|
"step": 15610
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.012391100460377e-07,
|
|
"loss": 0.6917,
|
|
"step": 15615
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.006481431284533e-07,
|
|
"loss": 0.6504,
|
|
"step": 15620
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 6.000573420981578e-07,
|
|
"loss": 0.6773,
|
|
"step": 15625
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 5.994667072005641e-07,
|
|
"loss": 0.6295,
|
|
"step": 15630
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.988762386810173e-07,
|
|
"loss": 0.6617,
|
|
"step": 15635
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.982859367847921e-07,
|
|
"loss": 0.642,
|
|
"step": 15640
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.976958017570954e-07,
|
|
"loss": 0.6194,
|
|
"step": 15645
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.971058338430643e-07,
|
|
"loss": 0.6599,
|
|
"step": 15650
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.965160332877661e-07,
|
|
"loss": 0.657,
|
|
"step": 15655
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.959264003361988e-07,
|
|
"loss": 0.6272,
|
|
"step": 15660
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.953369352332904e-07,
|
|
"loss": 0.66,
|
|
"step": 15665
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.947476382239007e-07,
|
|
"loss": 0.6354,
|
|
"step": 15670
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.94158509552817e-07,
|
|
"loss": 0.6648,
|
|
"step": 15675
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.935695494647595e-07,
|
|
"loss": 0.6793,
|
|
"step": 15680
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.929807582043768e-07,
|
|
"loss": 0.637,
|
|
"step": 15685
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.923921360162471e-07,
|
|
"loss": 0.6285,
|
|
"step": 15690
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.9180368314488e-07,
|
|
"loss": 0.6667,
|
|
"step": 15695
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.912153998347123e-07,
|
|
"loss": 0.6879,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.90627286330113e-07,
|
|
"loss": 0.6651,
|
|
"step": 15705
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.900393428753791e-07,
|
|
"loss": 0.6321,
|
|
"step": 15710
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.894515697147365e-07,
|
|
"loss": 0.6323,
|
|
"step": 15715
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.888639670923419e-07,
|
|
"loss": 0.662,
|
|
"step": 15720
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.882765352522795e-07,
|
|
"loss": 0.609,
|
|
"step": 15725
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.87689274438564e-07,
|
|
"loss": 0.6352,
|
|
"step": 15730
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.871021848951387e-07,
|
|
"loss": 0.6479,
|
|
"step": 15735
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.865152668658745e-07,
|
|
"loss": 0.6526,
|
|
"step": 15740
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.859285205945732e-07,
|
|
"loss": 0.6382,
|
|
"step": 15745
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.85341946324963e-07,
|
|
"loss": 0.6363,
|
|
"step": 15750
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.847555443007022e-07,
|
|
"loss": 0.6941,
|
|
"step": 15755
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.841693147653774e-07,
|
|
"loss": 0.6592,
|
|
"step": 15760
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.835832579625031e-07,
|
|
"loss": 0.6243,
|
|
"step": 15765
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.829973741355221e-07,
|
|
"loss": 0.664,
|
|
"step": 15770
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.824116635278052e-07,
|
|
"loss": 0.644,
|
|
"step": 15775
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.818261263826518e-07,
|
|
"loss": 0.6605,
|
|
"step": 15780
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.812407629432891e-07,
|
|
"loss": 0.6545,
|
|
"step": 15785
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.806555734528713e-07,
|
|
"loss": 0.6652,
|
|
"step": 15790
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.800705581544821e-07,
|
|
"loss": 0.6178,
|
|
"step": 15795
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.794857172911309e-07,
|
|
"loss": 0.6393,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.789010511057555e-07,
|
|
"loss": 0.6067,
|
|
"step": 15805
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.783165598412216e-07,
|
|
"loss": 0.6481,
|
|
"step": 15810
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.777322437403218e-07,
|
|
"loss": 0.6759,
|
|
"step": 15815
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.771481030457757e-07,
|
|
"loss": 0.6192,
|
|
"step": 15820
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.765641380002299e-07,
|
|
"loss": 0.6652,
|
|
"step": 15825
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.759803488462593e-07,
|
|
"loss": 0.6694,
|
|
"step": 15830
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.753967358263642e-07,
|
|
"loss": 0.6317,
|
|
"step": 15835
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.748132991829722e-07,
|
|
"loss": 0.6434,
|
|
"step": 15840
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.742300391584384e-07,
|
|
"loss": 0.6846,
|
|
"step": 15845
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.736469559950437e-07,
|
|
"loss": 0.6559,
|
|
"step": 15850
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.730640499349957e-07,
|
|
"loss": 0.6625,
|
|
"step": 15855
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.724813212204283e-07,
|
|
"loss": 0.633,
|
|
"step": 15860
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.718987700934024e-07,
|
|
"loss": 0.6763,
|
|
"step": 15865
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.713163967959045e-07,
|
|
"loss": 0.667,
|
|
"step": 15870
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 5.70734201569847e-07,
|
|
"loss": 0.6555,
|
|
"step": 15875
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.701521846570693e-07,
|
|
"loss": 0.6228,
|
|
"step": 15880
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.695703462993362e-07,
|
|
"loss": 0.6761,
|
|
"step": 15885
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.689886867383376e-07,
|
|
"loss": 0.6576,
|
|
"step": 15890
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.684072062156906e-07,
|
|
"loss": 0.6484,
|
|
"step": 15895
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.678259049729368e-07,
|
|
"loss": 0.6562,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.672447832515439e-07,
|
|
"loss": 0.667,
|
|
"step": 15905
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.66663841292904e-07,
|
|
"loss": 0.669,
|
|
"step": 15910
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.660830793383364e-07,
|
|
"loss": 0.6429,
|
|
"step": 15915
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.655024976290842e-07,
|
|
"loss": 0.6619,
|
|
"step": 15920
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.649220964063154e-07,
|
|
"loss": 0.662,
|
|
"step": 15925
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.643418759111247e-07,
|
|
"loss": 0.6673,
|
|
"step": 15930
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.637618363845299e-07,
|
|
"loss": 0.6804,
|
|
"step": 15935
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.631819780674742e-07,
|
|
"loss": 0.6447,
|
|
"step": 15940
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.626023012008265e-07,
|
|
"loss": 0.6468,
|
|
"step": 15945
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.620228060253791e-07,
|
|
"loss": 0.6499,
|
|
"step": 15950
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.614434927818493e-07,
|
|
"loss": 0.6622,
|
|
"step": 15955
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.608643617108781e-07,
|
|
"loss": 0.6222,
|
|
"step": 15960
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.602854130530326e-07,
|
|
"loss": 0.6698,
|
|
"step": 15965
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.597066470488026e-07,
|
|
"loss": 0.6493,
|
|
"step": 15970
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.591280639386021e-07,
|
|
"loss": 0.6689,
|
|
"step": 15975
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.585496639627702e-07,
|
|
"loss": 0.6526,
|
|
"step": 15980
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.579714473615689e-07,
|
|
"loss": 0.6491,
|
|
"step": 15985
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.573934143751841e-07,
|
|
"loss": 0.6763,
|
|
"step": 15990
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.568155652437257e-07,
|
|
"loss": 0.6604,
|
|
"step": 15995
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.562379002072277e-07,
|
|
"loss": 0.6458,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"eval_loss": 0.6262282133102417,
|
|
"eval_runtime": 144.7729,
|
|
"eval_samples_per_second": 16.343,
|
|
"eval_steps_per_second": 2.728,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.556604195056469e-07,
|
|
"loss": 0.6665,
|
|
"step": 16005
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.550831233788631e-07,
|
|
"loss": 0.6382,
|
|
"step": 16010
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.545060120666811e-07,
|
|
"loss": 0.6322,
|
|
"step": 16015
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.539290858088277e-07,
|
|
"loss": 0.6352,
|
|
"step": 16020
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.533523448449522e-07,
|
|
"loss": 0.6883,
|
|
"step": 16025
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.527757894146288e-07,
|
|
"loss": 0.6319,
|
|
"step": 16030
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.521994197573534e-07,
|
|
"loss": 0.6687,
|
|
"step": 16035
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.516232361125446e-07,
|
|
"loss": 0.6326,
|
|
"step": 16040
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.510472387195439e-07,
|
|
"loss": 0.6674,
|
|
"step": 16045
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.504714278176163e-07,
|
|
"loss": 0.6257,
|
|
"step": 16050
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.498958036459484e-07,
|
|
"loss": 0.7107,
|
|
"step": 16055
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.49320366443649e-07,
|
|
"loss": 0.6777,
|
|
"step": 16060
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.487451164497503e-07,
|
|
"loss": 0.6695,
|
|
"step": 16065
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.481700539032062e-07,
|
|
"loss": 0.6443,
|
|
"step": 16070
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.475951790428918e-07,
|
|
"loss": 0.6397,
|
|
"step": 16075
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.47020492107606e-07,
|
|
"loss": 0.6617,
|
|
"step": 16080
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.464459933360687e-07,
|
|
"loss": 0.6453,
|
|
"step": 16085
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.458716829669215e-07,
|
|
"loss": 0.6691,
|
|
"step": 16090
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.452975612387274e-07,
|
|
"loss": 0.6762,
|
|
"step": 16095
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.447236283899723e-07,
|
|
"loss": 0.6542,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.441498846590629e-07,
|
|
"loss": 0.6588,
|
|
"step": 16105
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.435763302843268e-07,
|
|
"loss": 0.666,
|
|
"step": 16110
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.43002965504014e-07,
|
|
"loss": 0.6717,
|
|
"step": 16115
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.424297905562953e-07,
|
|
"loss": 0.6642,
|
|
"step": 16120
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 5.418568056792624e-07,
|
|
"loss": 0.6994,
|
|
"step": 16125
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.412840111109277e-07,
|
|
"loss": 0.6606,
|
|
"step": 16130
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.40711407089226e-07,
|
|
"loss": 0.6669,
|
|
"step": 16135
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.401389938520118e-07,
|
|
"loss": 0.6357,
|
|
"step": 16140
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.395667716370598e-07,
|
|
"loss": 0.6473,
|
|
"step": 16145
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.389947406820672e-07,
|
|
"loss": 0.6316,
|
|
"step": 16150
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.384229012246501e-07,
|
|
"loss": 0.6444,
|
|
"step": 16155
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.378512535023457e-07,
|
|
"loss": 0.6439,
|
|
"step": 16160
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.372797977526115e-07,
|
|
"loss": 0.6778,
|
|
"step": 16165
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.367085342128257e-07,
|
|
"loss": 0.6545,
|
|
"step": 16170
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.361374631202857e-07,
|
|
"loss": 0.6666,
|
|
"step": 16175
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.35566584712209e-07,
|
|
"loss": 0.657,
|
|
"step": 16180
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.349958992257346e-07,
|
|
"loss": 0.6504,
|
|
"step": 16185
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.344254068979199e-07,
|
|
"loss": 0.6331,
|
|
"step": 16190
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.338551079657418e-07,
|
|
"loss": 0.6388,
|
|
"step": 16195
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.332850026660985e-07,
|
|
"loss": 0.6554,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.327150912358065e-07,
|
|
"loss": 0.6618,
|
|
"step": 16205
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.321453739116013e-07,
|
|
"loss": 0.6708,
|
|
"step": 16210
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.315758509301396e-07,
|
|
"loss": 0.6671,
|
|
"step": 16215
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.310065225279957e-07,
|
|
"loss": 0.6527,
|
|
"step": 16220
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.304373889416638e-07,
|
|
"loss": 0.7327,
|
|
"step": 16225
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.298684504075567e-07,
|
|
"loss": 0.6462,
|
|
"step": 16230
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.292997071620072e-07,
|
|
"loss": 0.6424,
|
|
"step": 16235
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.28731159441266e-07,
|
|
"loss": 0.6477,
|
|
"step": 16240
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.281628074815023e-07,
|
|
"loss": 0.6995,
|
|
"step": 16245
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.275946515188055e-07,
|
|
"loss": 0.6511,
|
|
"step": 16250
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.270266917891822e-07,
|
|
"loss": 0.6662,
|
|
"step": 16255
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.264589285285581e-07,
|
|
"loss": 0.6326,
|
|
"step": 16260
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.258913619727765e-07,
|
|
"loss": 0.6911,
|
|
"step": 16265
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.253239923576003e-07,
|
|
"loss": 0.6766,
|
|
"step": 16270
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.247568199187099e-07,
|
|
"loss": 0.665,
|
|
"step": 16275
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.241898448917032e-07,
|
|
"loss": 0.6645,
|
|
"step": 16280
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.236230675120974e-07,
|
|
"loss": 0.6634,
|
|
"step": 16285
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.230564880153266e-07,
|
|
"loss": 0.6747,
|
|
"step": 16290
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.224901066367426e-07,
|
|
"loss": 0.689,
|
|
"step": 16295
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.21923923611616e-07,
|
|
"loss": 0.6862,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.21357939175134e-07,
|
|
"loss": 0.6523,
|
|
"step": 16305
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.207921535624017e-07,
|
|
"loss": 0.6355,
|
|
"step": 16310
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.202265670084409e-07,
|
|
"loss": 0.6314,
|
|
"step": 16315
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.196611797481922e-07,
|
|
"loss": 0.6796,
|
|
"step": 16320
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.190959920165124e-07,
|
|
"loss": 0.7183,
|
|
"step": 16325
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.185310040481749e-07,
|
|
"loss": 0.6641,
|
|
"step": 16330
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.179662160778716e-07,
|
|
"loss": 0.659,
|
|
"step": 16335
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.174016283402101e-07,
|
|
"loss": 0.6464,
|
|
"step": 16340
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.16837241069715e-07,
|
|
"loss": 0.6658,
|
|
"step": 16345
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.162730545008287e-07,
|
|
"loss": 0.6686,
|
|
"step": 16350
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.157090688679087e-07,
|
|
"loss": 0.6493,
|
|
"step": 16355
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.1514528440523e-07,
|
|
"loss": 0.6698,
|
|
"step": 16360
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.14581701346983e-07,
|
|
"loss": 0.6519,
|
|
"step": 16365
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 5.140183199272765e-07,
|
|
"loss": 0.6418,
|
|
"step": 16370
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.134551403801336e-07,
|
|
"loss": 0.6559,
|
|
"step": 16375
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.128921629394937e-07,
|
|
"loss": 0.6312,
|
|
"step": 16380
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.123293878392136e-07,
|
|
"loss": 0.6356,
|
|
"step": 16385
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.11766815313065e-07,
|
|
"loss": 0.6799,
|
|
"step": 16390
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.112044455947352e-07,
|
|
"loss": 0.6431,
|
|
"step": 16395
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.106422789178275e-07,
|
|
"loss": 0.6758,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.100803155158621e-07,
|
|
"loss": 0.6414,
|
|
"step": 16405
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.09518555622273e-07,
|
|
"loss": 0.679,
|
|
"step": 16410
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.089569994704099e-07,
|
|
"loss": 0.6796,
|
|
"step": 16415
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.083956472935395e-07,
|
|
"loss": 0.6583,
|
|
"step": 16420
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.07834499324842e-07,
|
|
"loss": 0.6481,
|
|
"step": 16425
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.072735557974128e-07,
|
|
"loss": 0.6472,
|
|
"step": 16430
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.067128169442642e-07,
|
|
"loss": 0.6412,
|
|
"step": 16435
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.061522829983215e-07,
|
|
"loss": 0.6273,
|
|
"step": 16440
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.055919541924258e-07,
|
|
"loss": 0.6899,
|
|
"step": 16445
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.050318307593322e-07,
|
|
"loss": 0.6384,
|
|
"step": 16450
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.044719129317121e-07,
|
|
"loss": 0.6764,
|
|
"step": 16455
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.039122009421501e-07,
|
|
"loss": 0.635,
|
|
"step": 16460
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.033526950231452e-07,
|
|
"loss": 0.6379,
|
|
"step": 16465
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.027933954071121e-07,
|
|
"loss": 0.656,
|
|
"step": 16470
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.022343023263789e-07,
|
|
"loss": 0.667,
|
|
"step": 16475
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.016754160131871e-07,
|
|
"loss": 0.6475,
|
|
"step": 16480
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.011167366996942e-07,
|
|
"loss": 0.6578,
|
|
"step": 16485
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.005582646179705e-07,
|
|
"loss": 0.642,
|
|
"step": 16490
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 5.000000000000002e-07,
|
|
"loss": 0.655,
|
|
"step": 16495
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.994419430776813e-07,
|
|
"loss": 0.6781,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.988840940828266e-07,
|
|
"loss": 0.6668,
|
|
"step": 16505
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.983264532471613e-07,
|
|
"loss": 0.6634,
|
|
"step": 16510
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.977690208023243e-07,
|
|
"loss": 0.6537,
|
|
"step": 16515
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.972117969798687e-07,
|
|
"loss": 0.6613,
|
|
"step": 16520
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.966547820112602e-07,
|
|
"loss": 0.618,
|
|
"step": 16525
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.96097976127878e-07,
|
|
"loss": 0.6319,
|
|
"step": 16530
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.955413795610139e-07,
|
|
"loss": 0.63,
|
|
"step": 16535
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.949849925418741e-07,
|
|
"loss": 0.6434,
|
|
"step": 16540
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.944288153015767e-07,
|
|
"loss": 0.6651,
|
|
"step": 16545
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.938728480711523e-07,
|
|
"loss": 0.641,
|
|
"step": 16550
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.933170910815456e-07,
|
|
"loss": 0.6605,
|
|
"step": 16555
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.927615445636131e-07,
|
|
"loss": 0.658,
|
|
"step": 16560
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.922062087481233e-07,
|
|
"loss": 0.6616,
|
|
"step": 16565
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.916510838657589e-07,
|
|
"loss": 0.6089,
|
|
"step": 16570
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.910961701471135e-07,
|
|
"loss": 0.6732,
|
|
"step": 16575
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.905414678226932e-07,
|
|
"loss": 0.6555,
|
|
"step": 16580
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.899869771229164e-07,
|
|
"loss": 0.6712,
|
|
"step": 16585
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.894326982781144e-07,
|
|
"loss": 0.6536,
|
|
"step": 16590
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.888786315185295e-07,
|
|
"loss": 0.6191,
|
|
"step": 16595
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.883247770743156e-07,
|
|
"loss": 0.6292,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.8777113517554e-07,
|
|
"loss": 0.6654,
|
|
"step": 16605
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.872177060521803e-07,
|
|
"loss": 0.6175,
|
|
"step": 16610
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 4.866644899341258e-07,
|
|
"loss": 0.6802,
|
|
"step": 16615
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.861114870511783e-07,
|
|
"loss": 0.6134,
|
|
"step": 16620
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.855586976330501e-07,
|
|
"loss": 0.6653,
|
|
"step": 16625
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.850061219093651e-07,
|
|
"loss": 0.691,
|
|
"step": 16630
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.844537601096582e-07,
|
|
"loss": 0.6575,
|
|
"step": 16635
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.839016124633762e-07,
|
|
"loss": 0.6805,
|
|
"step": 16640
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.833496791998762e-07,
|
|
"loss": 0.6172,
|
|
"step": 16645
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.827979605484261e-07,
|
|
"loss": 0.647,
|
|
"step": 16650
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.822464567382056e-07,
|
|
"loss": 0.6365,
|
|
"step": 16655
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.816951679983046e-07,
|
|
"loss": 0.6272,
|
|
"step": 16660
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.811440945577232e-07,
|
|
"loss": 0.6359,
|
|
"step": 16665
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.805932366453725e-07,
|
|
"loss": 0.6765,
|
|
"step": 16670
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.800425944900747e-07,
|
|
"loss": 0.6651,
|
|
"step": 16675
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.794921683205613e-07,
|
|
"loss": 0.644,
|
|
"step": 16680
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.789419583654742e-07,
|
|
"loss": 0.6639,
|
|
"step": 16685
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.783919648533668e-07,
|
|
"loss": 0.6714,
|
|
"step": 16690
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.778421880127009e-07,
|
|
"loss": 0.6429,
|
|
"step": 16695
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.772926280718488e-07,
|
|
"loss": 0.6389,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.767432852590939e-07,
|
|
"loss": 0.6473,
|
|
"step": 16705
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.7619415980262765e-07,
|
|
"loss": 0.6766,
|
|
"step": 16710
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.756452519305523e-07,
|
|
"loss": 0.6198,
|
|
"step": 16715
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.7509656187087886e-07,
|
|
"loss": 0.6646,
|
|
"step": 16720
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.745480898515293e-07,
|
|
"loss": 0.6313,
|
|
"step": 16725
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.7399983610033346e-07,
|
|
"loss": 0.6982,
|
|
"step": 16730
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.734518008450311e-07,
|
|
"loss": 0.706,
|
|
"step": 16735
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.7290398431327207e-07,
|
|
"loss": 0.6684,
|
|
"step": 16740
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.7235638673261404e-07,
|
|
"loss": 0.6462,
|
|
"step": 16745
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.7180900833052394e-07,
|
|
"loss": 0.6975,
|
|
"step": 16750
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.7126184933437873e-07,
|
|
"loss": 0.6282,
|
|
"step": 16755
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.7071490997146323e-07,
|
|
"loss": 0.6415,
|
|
"step": 16760
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.7016819046897126e-07,
|
|
"loss": 0.6512,
|
|
"step": 16765
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.6962169105400495e-07,
|
|
"loss": 0.6839,
|
|
"step": 16770
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.6907541195357613e-07,
|
|
"loss": 0.6516,
|
|
"step": 16775
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.6852935339460407e-07,
|
|
"loss": 0.6369,
|
|
"step": 16780
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.6798351560391636e-07,
|
|
"loss": 0.6695,
|
|
"step": 16785
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.674378988082499e-07,
|
|
"loss": 0.6873,
|
|
"step": 16790
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.66892503234249e-07,
|
|
"loss": 0.6493,
|
|
"step": 16795
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.6634732910846563e-07,
|
|
"loss": 0.6268,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"eval_loss": 0.6238049268722534,
|
|
"eval_runtime": 144.7581,
|
|
"eval_samples_per_second": 16.345,
|
|
"eval_steps_per_second": 2.729,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.6580237665736135e-07,
|
|
"loss": 0.6724,
|
|
"step": 16805
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.652576461073042e-07,
|
|
"loss": 0.6617,
|
|
"step": 16810
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.6471313768457053e-07,
|
|
"loss": 0.6567,
|
|
"step": 16815
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.6416885161534394e-07,
|
|
"loss": 0.6866,
|
|
"step": 16820
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.636247881257174e-07,
|
|
"loss": 0.661,
|
|
"step": 16825
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.630809474416885e-07,
|
|
"loss": 0.6815,
|
|
"step": 16830
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.625373297891647e-07,
|
|
"loss": 0.5876,
|
|
"step": 16835
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.619939353939606e-07,
|
|
"loss": 0.7043,
|
|
"step": 16840
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.6145076448179696e-07,
|
|
"loss": 0.6551,
|
|
"step": 16845
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.609078172783023e-07,
|
|
"loss": 0.658,
|
|
"step": 16850
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.603650940090118e-07,
|
|
"loss": 0.6219,
|
|
"step": 16855
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 4.598225948993687e-07,
|
|
"loss": 0.6702,
|
|
"step": 16860
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.5928032017472216e-07,
|
|
"loss": 0.636,
|
|
"step": 16865
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.587382700603278e-07,
|
|
"loss": 0.6706,
|
|
"step": 16870
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.581964447813499e-07,
|
|
"loss": 0.6071,
|
|
"step": 16875
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.5765484456285617e-07,
|
|
"loss": 0.6504,
|
|
"step": 16880
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.5711346962982366e-07,
|
|
"loss": 0.6407,
|
|
"step": 16885
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.56572320207135e-07,
|
|
"loss": 0.6647,
|
|
"step": 16890
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.5603139651957866e-07,
|
|
"loss": 0.6111,
|
|
"step": 16895
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.5549069879184965e-07,
|
|
"loss": 0.6486,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.549502272485487e-07,
|
|
"loss": 0.638,
|
|
"step": 16905
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.544099821141838e-07,
|
|
"loss": 0.6168,
|
|
"step": 16910
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.538699636131675e-07,
|
|
"loss": 0.6655,
|
|
"step": 16915
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.533301719698187e-07,
|
|
"loss": 0.6584,
|
|
"step": 16920
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.5279060740836305e-07,
|
|
"loss": 0.6485,
|
|
"step": 16925
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.522512701529296e-07,
|
|
"loss": 0.6726,
|
|
"step": 16930
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.517121604275551e-07,
|
|
"loss": 0.6818,
|
|
"step": 16935
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.5117327845618136e-07,
|
|
"loss": 0.6737,
|
|
"step": 16940
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.50634624462655e-07,
|
|
"loss": 0.6375,
|
|
"step": 16945
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.5009619867072803e-07,
|
|
"loss": 0.6622,
|
|
"step": 16950
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.4955800130405763e-07,
|
|
"loss": 0.6516,
|
|
"step": 16955
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.4902003258620725e-07,
|
|
"loss": 0.6557,
|
|
"step": 16960
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.484822927406431e-07,
|
|
"loss": 0.6293,
|
|
"step": 16965
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.479447819907383e-07,
|
|
"loss": 0.6313,
|
|
"step": 16970
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.474075005597705e-07,
|
|
"loss": 0.6323,
|
|
"step": 16975
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.4687044867092147e-07,
|
|
"loss": 0.6483,
|
|
"step": 16980
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.463336265472778e-07,
|
|
"loss": 0.6485,
|
|
"step": 16985
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.4579703441183025e-07,
|
|
"loss": 0.6649,
|
|
"step": 16990
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.4526067248747557e-07,
|
|
"loss": 0.6729,
|
|
"step": 16995
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.447245409970132e-07,
|
|
"loss": 0.6805,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.441886401631472e-07,
|
|
"loss": 0.6739,
|
|
"step": 17005
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.4365297020848727e-07,
|
|
"loss": 0.6435,
|
|
"step": 17010
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.431175313555445e-07,
|
|
"loss": 0.6506,
|
|
"step": 17015
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.4258232382673634e-07,
|
|
"loss": 0.6641,
|
|
"step": 17020
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.4204734784438367e-07,
|
|
"loss": 0.6448,
|
|
"step": 17025
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.4151260363071064e-07,
|
|
"loss": 0.6651,
|
|
"step": 17030
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.40978091407845e-07,
|
|
"loss": 0.6762,
|
|
"step": 17035
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.404438113978185e-07,
|
|
"loss": 0.6604,
|
|
"step": 17040
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.3990976382256694e-07,
|
|
"loss": 0.6538,
|
|
"step": 17045
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.393759489039287e-07,
|
|
"loss": 0.631,
|
|
"step": 17050
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.388423668636455e-07,
|
|
"loss": 0.6838,
|
|
"step": 17055
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.3830901792336374e-07,
|
|
"loss": 0.6582,
|
|
"step": 17060
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.377759023046306e-07,
|
|
"loss": 0.6154,
|
|
"step": 17065
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.372430202288981e-07,
|
|
"loss": 0.646,
|
|
"step": 17070
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.3671037191752157e-07,
|
|
"loss": 0.6777,
|
|
"step": 17075
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.361779575917579e-07,
|
|
"loss": 0.6359,
|
|
"step": 17080
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.356457774727674e-07,
|
|
"loss": 0.6211,
|
|
"step": 17085
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.3511383178161263e-07,
|
|
"loss": 0.6075,
|
|
"step": 17090
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.345821207392605e-07,
|
|
"loss": 0.6124,
|
|
"step": 17095
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.340506445665775e-07,
|
|
"loss": 0.6473,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.33519403484335e-07,
|
|
"loss": 0.6244,
|
|
"step": 17105
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 4.3298839771320674e-07,
|
|
"loss": 0.6249,
|
|
"step": 17110
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.3245762747376635e-07,
|
|
"loss": 0.6282,
|
|
"step": 17115
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.3192709298649223e-07,
|
|
"loss": 0.6779,
|
|
"step": 17120
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.3139679447176313e-07,
|
|
"loss": 0.6534,
|
|
"step": 17125
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.3086673214986114e-07,
|
|
"loss": 0.6536,
|
|
"step": 17130
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.303369062409693e-07,
|
|
"loss": 0.6452,
|
|
"step": 17135
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.2980731696517203e-07,
|
|
"loss": 0.6267,
|
|
"step": 17140
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.292779645424576e-07,
|
|
"loss": 0.633,
|
|
"step": 17145
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.2874884919271257e-07,
|
|
"loss": 0.6706,
|
|
"step": 17150
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.282199711357277e-07,
|
|
"loss": 0.6658,
|
|
"step": 17155
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.276913305911952e-07,
|
|
"loss": 0.6152,
|
|
"step": 17160
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.2716292777870624e-07,
|
|
"loss": 0.6495,
|
|
"step": 17165
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.266347629177558e-07,
|
|
"loss": 0.6657,
|
|
"step": 17170
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.2610683622773815e-07,
|
|
"loss": 0.6627,
|
|
"step": 17175
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.255791479279504e-07,
|
|
"loss": 0.6713,
|
|
"step": 17180
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.250516982375891e-07,
|
|
"loss": 0.6723,
|
|
"step": 17185
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.245244873757521e-07,
|
|
"loss": 0.6523,
|
|
"step": 17190
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.2399751556143903e-07,
|
|
"loss": 0.6366,
|
|
"step": 17195
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.2347078301354824e-07,
|
|
"loss": 0.6277,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.229442899508804e-07,
|
|
"loss": 0.6854,
|
|
"step": 17205
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.224180365921366e-07,
|
|
"loss": 0.648,
|
|
"step": 17210
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.2189202315591744e-07,
|
|
"loss": 0.6133,
|
|
"step": 17215
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.2136624986072435e-07,
|
|
"loss": 0.6783,
|
|
"step": 17220
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.208407169249587e-07,
|
|
"loss": 0.6617,
|
|
"step": 17225
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.2031542456692303e-07,
|
|
"loss": 0.639,
|
|
"step": 17230
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.197903730048188e-07,
|
|
"loss": 0.6371,
|
|
"step": 17235
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.192655624567475e-07,
|
|
"loss": 0.636,
|
|
"step": 17240
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.18740993140712e-07,
|
|
"loss": 0.6729,
|
|
"step": 17245
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.182166652746123e-07,
|
|
"loss": 0.6432,
|
|
"step": 17250
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.1769257907625077e-07,
|
|
"loss": 0.6669,
|
|
"step": 17255
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.171687347633276e-07,
|
|
"loss": 0.6517,
|
|
"step": 17260
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.166451325534437e-07,
|
|
"loss": 0.6368,
|
|
"step": 17265
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.161217726640988e-07,
|
|
"loss": 0.6766,
|
|
"step": 17270
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.1559865531269135e-07,
|
|
"loss": 0.6816,
|
|
"step": 17275
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.150757807165212e-07,
|
|
"loss": 0.6367,
|
|
"step": 17280
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.1455314909278405e-07,
|
|
"loss": 0.6598,
|
|
"step": 17285
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.1403076065857755e-07,
|
|
"loss": 0.6619,
|
|
"step": 17290
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.13508615630898e-07,
|
|
"loss": 0.656,
|
|
"step": 17295
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.129867142266384e-07,
|
|
"loss": 0.6077,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.124650566625931e-07,
|
|
"loss": 0.6615,
|
|
"step": 17305
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.119436431554536e-07,
|
|
"loss": 0.6512,
|
|
"step": 17310
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.1142247392181096e-07,
|
|
"loss": 0.6808,
|
|
"step": 17315
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.1090154917815423e-07,
|
|
"loss": 0.667,
|
|
"step": 17320
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.1038086914087045e-07,
|
|
"loss": 0.655,
|
|
"step": 17325
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.0986043402624694e-07,
|
|
"loss": 0.645,
|
|
"step": 17330
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.093402440504662e-07,
|
|
"loss": 0.6364,
|
|
"step": 17335
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.0882029942961136e-07,
|
|
"loss": 0.6864,
|
|
"step": 17340
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.0830060037966386e-07,
|
|
"loss": 0.6425,
|
|
"step": 17345
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.077811471165005e-07,
|
|
"loss": 0.6822,
|
|
"step": 17350
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 4.0726193985589884e-07,
|
|
"loss": 0.6606,
|
|
"step": 17355
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.0674297881353227e-07,
|
|
"loss": 0.6377,
|
|
"step": 17360
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.0622426420497345e-07,
|
|
"loss": 0.7072,
|
|
"step": 17365
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.057057962456916e-07,
|
|
"loss": 0.6939,
|
|
"step": 17370
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.0518757515105327e-07,
|
|
"loss": 0.6147,
|
|
"step": 17375
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.046696011363243e-07,
|
|
"loss": 0.6398,
|
|
"step": 17380
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.041518744166651e-07,
|
|
"loss": 0.6739,
|
|
"step": 17385
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.0363439520713585e-07,
|
|
"loss": 0.6546,
|
|
"step": 17390
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.0311716372269243e-07,
|
|
"loss": 0.6565,
|
|
"step": 17395
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.026001801781883e-07,
|
|
"loss": 0.5946,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.020834447883743e-07,
|
|
"loss": 0.62,
|
|
"step": 17405
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.015669577678973e-07,
|
|
"loss": 0.689,
|
|
"step": 17410
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.010507193313025e-07,
|
|
"loss": 0.6858,
|
|
"step": 17415
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.005347296930295e-07,
|
|
"loss": 0.6892,
|
|
"step": 17420
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 4.000189890674166e-07,
|
|
"loss": 0.6309,
|
|
"step": 17425
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.995034976686986e-07,
|
|
"loss": 0.6863,
|
|
"step": 17430
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.9898825571100483e-07,
|
|
"loss": 0.621,
|
|
"step": 17435
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.984732634083633e-07,
|
|
"loss": 0.6637,
|
|
"step": 17440
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.9795852097469704e-07,
|
|
"loss": 0.6236,
|
|
"step": 17445
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.974440286238252e-07,
|
|
"loss": 0.698,
|
|
"step": 17450
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.969297865694641e-07,
|
|
"loss": 0.6756,
|
|
"step": 17455
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.9641579502522484e-07,
|
|
"loss": 0.6504,
|
|
"step": 17460
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.95902054204616e-07,
|
|
"loss": 0.6308,
|
|
"step": 17465
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.953885643210395e-07,
|
|
"loss": 0.6394,
|
|
"step": 17470
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.948753255877956e-07,
|
|
"loss": 0.635,
|
|
"step": 17475
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.9436233821807965e-07,
|
|
"loss": 0.6351,
|
|
"step": 17480
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.9384960242498076e-07,
|
|
"loss": 0.6077,
|
|
"step": 17485
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.933371184214862e-07,
|
|
"loss": 0.5905,
|
|
"step": 17490
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.928248864204764e-07,
|
|
"loss": 0.6507,
|
|
"step": 17495
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.923129066347288e-07,
|
|
"loss": 0.639,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.9180117927691523e-07,
|
|
"loss": 0.6503,
|
|
"step": 17505
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.912897045596022e-07,
|
|
"loss": 0.651,
|
|
"step": 17510
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.90778482695253e-07,
|
|
"loss": 0.6379,
|
|
"step": 17515
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.9026751389622337e-07,
|
|
"loss": 0.6312,
|
|
"step": 17520
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.897567983747663e-07,
|
|
"loss": 0.6215,
|
|
"step": 17525
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.892463363430283e-07,
|
|
"loss": 0.654,
|
|
"step": 17530
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.8873612801305035e-07,
|
|
"loss": 0.6402,
|
|
"step": 17535
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.8822617359676946e-07,
|
|
"loss": 0.6927,
|
|
"step": 17540
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.877164733060154e-07,
|
|
"loss": 0.6587,
|
|
"step": 17545
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.872070273525143e-07,
|
|
"loss": 0.6217,
|
|
"step": 17550
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.866978359478844e-07,
|
|
"loss": 0.6561,
|
|
"step": 17555
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.8618889930363984e-07,
|
|
"loss": 0.6448,
|
|
"step": 17560
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.856802176311892e-07,
|
|
"loss": 0.6062,
|
|
"step": 17565
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.8517179114183306e-07,
|
|
"loss": 0.6583,
|
|
"step": 17570
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.8466362004676845e-07,
|
|
"loss": 0.6847,
|
|
"step": 17575
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.841557045570849e-07,
|
|
"loss": 0.6257,
|
|
"step": 17580
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.8364804488376556e-07,
|
|
"loss": 0.6344,
|
|
"step": 17585
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.8314064123768885e-07,
|
|
"loss": 0.6658,
|
|
"step": 17590
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.8263349382962483e-07,
|
|
"loss": 0.6301,
|
|
"step": 17595
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 3.821266028702392e-07,
|
|
"loss": 0.6709,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"eval_loss": 0.6223161220550537,
|
|
"eval_runtime": 139.812,
|
|
"eval_samples_per_second": 16.923,
|
|
"eval_steps_per_second": 2.825,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.8161996857008895e-07,
|
|
"loss": 0.6462,
|
|
"step": 17605
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.811135911396259e-07,
|
|
"loss": 0.6432,
|
|
"step": 17610
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.8060747078919574e-07,
|
|
"loss": 0.6256,
|
|
"step": 17615
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.801016077290351e-07,
|
|
"loss": 0.6452,
|
|
"step": 17620
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.7959600216927603e-07,
|
|
"loss": 0.6282,
|
|
"step": 17625
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.7909065431994227e-07,
|
|
"loss": 0.6568,
|
|
"step": 17630
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.785855643909507e-07,
|
|
"loss": 0.6867,
|
|
"step": 17635
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.7808073259211194e-07,
|
|
"loss": 0.622,
|
|
"step": 17640
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.7757615913312813e-07,
|
|
"loss": 0.6691,
|
|
"step": 17645
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.770718442235955e-07,
|
|
"loss": 0.6732,
|
|
"step": 17650
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.765677880730009e-07,
|
|
"loss": 0.6415,
|
|
"step": 17655
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.7606399089072594e-07,
|
|
"loss": 0.7083,
|
|
"step": 17660
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.755604528860431e-07,
|
|
"loss": 0.6402,
|
|
"step": 17665
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.750571742681173e-07,
|
|
"loss": 0.648,
|
|
"step": 17670
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.7455415524600697e-07,
|
|
"loss": 0.6585,
|
|
"step": 17675
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.7405139602866145e-07,
|
|
"loss": 0.6325,
|
|
"step": 17680
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.735488968249224e-07,
|
|
"loss": 0.6709,
|
|
"step": 17685
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.730466578435235e-07,
|
|
"loss": 0.6228,
|
|
"step": 17690
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.725446792930905e-07,
|
|
"loss": 0.637,
|
|
"step": 17695
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.7204296138214197e-07,
|
|
"loss": 0.6506,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.715415043190856e-07,
|
|
"loss": 0.7209,
|
|
"step": 17705
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.710403083122232e-07,
|
|
"loss": 0.6668,
|
|
"step": 17710
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.7053937356974717e-07,
|
|
"loss": 0.6685,
|
|
"step": 17715
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.700387002997408e-07,
|
|
"loss": 0.6666,
|
|
"step": 17720
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.695382887101804e-07,
|
|
"loss": 0.6703,
|
|
"step": 17725
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.6903813900893175e-07,
|
|
"loss": 0.6361,
|
|
"step": 17730
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.685382514037537e-07,
|
|
"loss": 0.6159,
|
|
"step": 17735
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.6803862610229384e-07,
|
|
"loss": 0.6266,
|
|
"step": 17740
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.6753926331209294e-07,
|
|
"loss": 0.6465,
|
|
"step": 17745
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.670401632405826e-07,
|
|
"loss": 0.6146,
|
|
"step": 17750
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.6654132609508325e-07,
|
|
"loss": 0.6482,
|
|
"step": 17755
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.660427520828085e-07,
|
|
"loss": 0.6261,
|
|
"step": 17760
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.655444414108615e-07,
|
|
"loss": 0.6514,
|
|
"step": 17765
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.650463942862356e-07,
|
|
"loss": 0.6404,
|
|
"step": 17770
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.645486109158161e-07,
|
|
"loss": 0.6637,
|
|
"step": 17775
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.640510915063771e-07,
|
|
"loss": 0.6227,
|
|
"step": 17780
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.635538362645849e-07,
|
|
"loss": 0.6501,
|
|
"step": 17785
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.6305684539699366e-07,
|
|
"loss": 0.6559,
|
|
"step": 17790
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.625601191100496e-07,
|
|
"loss": 0.648,
|
|
"step": 17795
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.620636576100894e-07,
|
|
"loss": 0.672,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.6156746110333736e-07,
|
|
"loss": 0.6461,
|
|
"step": 17805
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.6107152979591015e-07,
|
|
"loss": 0.6711,
|
|
"step": 17810
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.6057586389381325e-07,
|
|
"loss": 0.6374,
|
|
"step": 17815
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.6008046360294163e-07,
|
|
"loss": 0.6321,
|
|
"step": 17820
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.5958532912908015e-07,
|
|
"loss": 0.607,
|
|
"step": 17825
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.590904606779035e-07,
|
|
"loss": 0.6357,
|
|
"step": 17830
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.5859585845497666e-07,
|
|
"loss": 0.6354,
|
|
"step": 17835
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.5810152266575167e-07,
|
|
"loss": 0.6476,
|
|
"step": 17840
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 3.576074535155722e-07,
|
|
"loss": 0.6301,
|
|
"step": 17845
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.5711365120966994e-07,
|
|
"loss": 0.6725,
|
|
"step": 17850
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.56620115953166e-07,
|
|
"loss": 0.6659,
|
|
"step": 17855
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.5612684795107104e-07,
|
|
"loss": 0.6825,
|
|
"step": 17860
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.5563384740828406e-07,
|
|
"loss": 0.6667,
|
|
"step": 17865
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.5514111452959317e-07,
|
|
"loss": 0.653,
|
|
"step": 17870
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.54648649519675e-07,
|
|
"loss": 0.6709,
|
|
"step": 17875
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.5415645258309556e-07,
|
|
"loss": 0.664,
|
|
"step": 17880
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.536645239243099e-07,
|
|
"loss": 0.624,
|
|
"step": 17885
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.531728637476594e-07,
|
|
"loss": 0.6278,
|
|
"step": 17890
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.526814722573767e-07,
|
|
"loss": 0.6323,
|
|
"step": 17895
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.52190349657581e-07,
|
|
"loss": 0.6911,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.5169949615228014e-07,
|
|
"loss": 0.6741,
|
|
"step": 17905
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.5120891194537093e-07,
|
|
"loss": 0.6731,
|
|
"step": 17910
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.5071859724063756e-07,
|
|
"loss": 0.6234,
|
|
"step": 17915
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.5022855224175254e-07,
|
|
"loss": 0.6702,
|
|
"step": 17920
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.4973877715227584e-07,
|
|
"loss": 0.6427,
|
|
"step": 17925
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.4924927217565614e-07,
|
|
"loss": 0.6306,
|
|
"step": 17930
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.487600375152303e-07,
|
|
"loss": 0.6943,
|
|
"step": 17935
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.482710733742207e-07,
|
|
"loss": 0.6889,
|
|
"step": 17940
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.477823799557399e-07,
|
|
"loss": 0.6084,
|
|
"step": 17945
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.4729395746278645e-07,
|
|
"loss": 0.6656,
|
|
"step": 17950
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.468058060982468e-07,
|
|
"loss": 0.658,
|
|
"step": 17955
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.463179260648946e-07,
|
|
"loss": 0.649,
|
|
"step": 17960
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.458303175653916e-07,
|
|
"loss": 0.6494,
|
|
"step": 17965
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.4534298080228563e-07,
|
|
"loss": 0.6672,
|
|
"step": 17970
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.44855915978012e-07,
|
|
"loss": 0.6541,
|
|
"step": 17975
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.443691232948938e-07,
|
|
"loss": 0.6382,
|
|
"step": 17980
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.438826029551402e-07,
|
|
"loss": 0.6651,
|
|
"step": 17985
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.43396355160847e-07,
|
|
"loss": 0.6019,
|
|
"step": 17990
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.429103801139981e-07,
|
|
"loss": 0.6356,
|
|
"step": 17995
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.4242467801646303e-07,
|
|
"loss": 0.6722,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.4193924906999804e-07,
|
|
"loss": 0.64,
|
|
"step": 18005
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.4145409347624597e-07,
|
|
"loss": 0.6232,
|
|
"step": 18010
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.409692114367365e-07,
|
|
"loss": 0.6801,
|
|
"step": 18015
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.4048460315288607e-07,
|
|
"loss": 0.6688,
|
|
"step": 18020
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.400002688259953e-07,
|
|
"loss": 0.6331,
|
|
"step": 18025
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.395162086572536e-07,
|
|
"loss": 0.6097,
|
|
"step": 18030
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.39032422847735e-07,
|
|
"loss": 0.6887,
|
|
"step": 18035
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.385489115983996e-07,
|
|
"loss": 0.6126,
|
|
"step": 18040
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.380656751100943e-07,
|
|
"loss": 0.6639,
|
|
"step": 18045
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.3758271358355115e-07,
|
|
"loss": 0.6329,
|
|
"step": 18050
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.371000272193881e-07,
|
|
"loss": 0.6381,
|
|
"step": 18055
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.366176162181086e-07,
|
|
"loss": 0.674,
|
|
"step": 18060
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.361354807801021e-07,
|
|
"loss": 0.6684,
|
|
"step": 18065
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.3565362110564446e-07,
|
|
"loss": 0.6402,
|
|
"step": 18070
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.351720373948945e-07,
|
|
"loss": 0.6506,
|
|
"step": 18075
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.346907298478989e-07,
|
|
"loss": 0.6486,
|
|
"step": 18080
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.3420969866458823e-07,
|
|
"loss": 0.6789,
|
|
"step": 18085
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.337289440447788e-07,
|
|
"loss": 0.6283,
|
|
"step": 18090
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 3.332484661881716e-07,
|
|
"loss": 0.6729,
|
|
"step": 18095
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.327682652943534e-07,
|
|
"loss": 0.6307,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.322883415627953e-07,
|
|
"loss": 0.6712,
|
|
"step": 18105
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.31808695192853e-07,
|
|
"loss": 0.68,
|
|
"step": 18110
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.313293263837683e-07,
|
|
"loss": 0.6577,
|
|
"step": 18115
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.308502353346663e-07,
|
|
"loss": 0.6393,
|
|
"step": 18120
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.3037142224455703e-07,
|
|
"loss": 0.6464,
|
|
"step": 18125
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2989288731233587e-07,
|
|
"loss": 0.6654,
|
|
"step": 18130
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2941463073678186e-07,
|
|
"loss": 0.6245,
|
|
"step": 18135
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.289366527165586e-07,
|
|
"loss": 0.6602,
|
|
"step": 18140
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2845895345021345e-07,
|
|
"loss": 0.6258,
|
|
"step": 18145
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.279815331361795e-07,
|
|
"loss": 0.6558,
|
|
"step": 18150
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.275043919727725e-07,
|
|
"loss": 0.6414,
|
|
"step": 18155
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2702753015819227e-07,
|
|
"loss": 0.6347,
|
|
"step": 18160
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.265509478905241e-07,
|
|
"loss": 0.6173,
|
|
"step": 18165
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2607464536773546e-07,
|
|
"loss": 0.6816,
|
|
"step": 18170
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2559862278767815e-07,
|
|
"loss": 0.6507,
|
|
"step": 18175
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.251228803480883e-07,
|
|
"loss": 0.6762,
|
|
"step": 18180
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2464741824658504e-07,
|
|
"loss": 0.6747,
|
|
"step": 18185
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2417223668067095e-07,
|
|
"loss": 0.6143,
|
|
"step": 18190
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2369733584773216e-07,
|
|
"loss": 0.6404,
|
|
"step": 18195
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.23222715945039e-07,
|
|
"loss": 0.654,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.227483771697441e-07,
|
|
"loss": 0.6367,
|
|
"step": 18205
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.222743197188834e-07,
|
|
"loss": 0.6811,
|
|
"step": 18210
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2180054378937673e-07,
|
|
"loss": 0.6155,
|
|
"step": 18215
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.2132704957802637e-07,
|
|
"loss": 0.6127,
|
|
"step": 18220
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.208538372815172e-07,
|
|
"loss": 0.648,
|
|
"step": 18225
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.203809070964181e-07,
|
|
"loss": 0.6523,
|
|
"step": 18230
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.199082592191801e-07,
|
|
"loss": 0.651,
|
|
"step": 18235
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1943589384613667e-07,
|
|
"loss": 0.6423,
|
|
"step": 18240
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.189638111735042e-07,
|
|
"loss": 0.6186,
|
|
"step": 18245
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1849201139738247e-07,
|
|
"loss": 0.6621,
|
|
"step": 18250
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1802049471375234e-07,
|
|
"loss": 0.6333,
|
|
"step": 18255
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.175492613184777e-07,
|
|
"loss": 0.6234,
|
|
"step": 18260
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1707831140730537e-07,
|
|
"loss": 0.6328,
|
|
"step": 18265
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1660764517586337e-07,
|
|
"loss": 0.6621,
|
|
"step": 18270
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1613726281966255e-07,
|
|
"loss": 0.6735,
|
|
"step": 18275
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1566716453409536e-07,
|
|
"loss": 0.6595,
|
|
"step": 18280
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.15197350514437e-07,
|
|
"loss": 0.6837,
|
|
"step": 18285
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1472782095584396e-07,
|
|
"loss": 0.66,
|
|
"step": 18290
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1425857605335427e-07,
|
|
"loss": 0.6552,
|
|
"step": 18295
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1378961600188903e-07,
|
|
"loss": 0.6419,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1332094099624963e-07,
|
|
"loss": 0.6407,
|
|
"step": 18305
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1285255123111944e-07,
|
|
"loss": 0.6473,
|
|
"step": 18310
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1238444690106394e-07,
|
|
"loss": 0.6624,
|
|
"step": 18315
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1191662820052954e-07,
|
|
"loss": 0.6523,
|
|
"step": 18320
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.114490953238438e-07,
|
|
"loss": 0.6311,
|
|
"step": 18325
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1098184846521567e-07,
|
|
"loss": 0.6484,
|
|
"step": 18330
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1051488781873605e-07,
|
|
"loss": 0.6783,
|
|
"step": 18335
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 3.1004821357837594e-07,
|
|
"loss": 0.6586,
|
|
"step": 18340
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.0958182593798745e-07,
|
|
"loss": 0.6285,
|
|
"step": 18345
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.0911572509130465e-07,
|
|
"loss": 0.6312,
|
|
"step": 18350
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.086499112319414e-07,
|
|
"loss": 0.6531,
|
|
"step": 18355
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.081843845533925e-07,
|
|
"loss": 0.6358,
|
|
"step": 18360
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.0771914524903417e-07,
|
|
"loss": 0.6551,
|
|
"step": 18365
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.0725419351212254e-07,
|
|
"loss": 0.6368,
|
|
"step": 18370
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.067895295357945e-07,
|
|
"loss": 0.6914,
|
|
"step": 18375
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.063251535130672e-07,
|
|
"loss": 0.6461,
|
|
"step": 18380
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.058610656368389e-07,
|
|
"loss": 0.6122,
|
|
"step": 18385
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.053972660998875e-07,
|
|
"loss": 0.6502,
|
|
"step": 18390
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.0493375509487074e-07,
|
|
"loss": 0.688,
|
|
"step": 18395
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.044705328143279e-07,
|
|
"loss": 0.6114,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"eval_loss": 0.6205956935882568,
|
|
"eval_runtime": 140.0377,
|
|
"eval_samples_per_second": 16.895,
|
|
"eval_steps_per_second": 2.821,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.040075994506771e-07,
|
|
"loss": 0.6604,
|
|
"step": 18405
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.0354495519621693e-07,
|
|
"loss": 0.6973,
|
|
"step": 18410
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.030826002431253e-07,
|
|
"loss": 0.6445,
|
|
"step": 18415
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.026205347834612e-07,
|
|
"loss": 0.6553,
|
|
"step": 18420
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.021587590091622e-07,
|
|
"loss": 0.6605,
|
|
"step": 18425
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.0169727311204564e-07,
|
|
"loss": 0.6498,
|
|
"step": 18430
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.0123607728380953e-07,
|
|
"loss": 0.6277,
|
|
"step": 18435
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.0077517171603007e-07,
|
|
"loss": 0.6164,
|
|
"step": 18440
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 3.0031455660016316e-07,
|
|
"loss": 0.6513,
|
|
"step": 18445
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.9985423212754503e-07,
|
|
"loss": 0.6511,
|
|
"step": 18450
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.993941984893902e-07,
|
|
"loss": 0.6822,
|
|
"step": 18455
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.9893445587679245e-07,
|
|
"loss": 0.6549,
|
|
"step": 18460
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.984750044807247e-07,
|
|
"loss": 0.6606,
|
|
"step": 18465
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.9801584449203967e-07,
|
|
"loss": 0.6564,
|
|
"step": 18470
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.97556976101468e-07,
|
|
"loss": 0.6355,
|
|
"step": 18475
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.9709839949961947e-07,
|
|
"loss": 0.6713,
|
|
"step": 18480
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.966401148769834e-07,
|
|
"loss": 0.6468,
|
|
"step": 18485
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.961821224239268e-07,
|
|
"loss": 0.6286,
|
|
"step": 18490
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.957244223306955e-07,
|
|
"loss": 0.6355,
|
|
"step": 18495
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.952670147874149e-07,
|
|
"loss": 0.6472,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.948098999840877e-07,
|
|
"loss": 0.6622,
|
|
"step": 18505
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.943530781105954e-07,
|
|
"loss": 0.6634,
|
|
"step": 18510
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.938965493566975e-07,
|
|
"loss": 0.6532,
|
|
"step": 18515
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.934403139120327e-07,
|
|
"loss": 0.6362,
|
|
"step": 18520
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.9298437196611704e-07,
|
|
"loss": 0.6463,
|
|
"step": 18525
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.925287237083445e-07,
|
|
"loss": 0.6824,
|
|
"step": 18530
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.9207336932798787e-07,
|
|
"loss": 0.6852,
|
|
"step": 18535
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.9161830901419714e-07,
|
|
"loss": 0.6914,
|
|
"step": 18540
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.9116354295600056e-07,
|
|
"loss": 0.6176,
|
|
"step": 18545
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.907090713423035e-07,
|
|
"loss": 0.644,
|
|
"step": 18550
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.9025489436189034e-07,
|
|
"loss": 0.6502,
|
|
"step": 18555
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.8980101220342167e-07,
|
|
"loss": 0.6321,
|
|
"step": 18560
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.893474250554362e-07,
|
|
"loss": 0.6527,
|
|
"step": 18565
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.8889413310635033e-07,
|
|
"loss": 0.6831,
|
|
"step": 18570
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.884411365444577e-07,
|
|
"loss": 0.6418,
|
|
"step": 18575
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.879884355579285e-07,
|
|
"loss": 0.6457,
|
|
"step": 18580
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 2.875360303348116e-07,
|
|
"loss": 0.622,
|
|
"step": 18585
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.870839210630317e-07,
|
|
"loss": 0.6198,
|
|
"step": 18590
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.8663210793039114e-07,
|
|
"loss": 0.6673,
|
|
"step": 18595
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.8618059112456907e-07,
|
|
"loss": 0.6307,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.8572937083312196e-07,
|
|
"loss": 0.6682,
|
|
"step": 18605
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.852784472434826e-07,
|
|
"loss": 0.6635,
|
|
"step": 18610
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.8482782054296037e-07,
|
|
"loss": 0.6525,
|
|
"step": 18615
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.843774909187425e-07,
|
|
"loss": 0.6445,
|
|
"step": 18620
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.8392745855789144e-07,
|
|
"loss": 0.629,
|
|
"step": 18625
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.8347772364734656e-07,
|
|
"loss": 0.6769,
|
|
"step": 18630
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.830282863739244e-07,
|
|
"loss": 0.6489,
|
|
"step": 18635
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.8257914692431703e-07,
|
|
"loss": 0.6738,
|
|
"step": 18640
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.8213030548509296e-07,
|
|
"loss": 0.6459,
|
|
"step": 18645
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.816817622426968e-07,
|
|
"loss": 0.6447,
|
|
"step": 18650
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.8123351738345016e-07,
|
|
"loss": 0.6607,
|
|
"step": 18655
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.807855710935496e-07,
|
|
"loss": 0.6474,
|
|
"step": 18660
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.80337923559068e-07,
|
|
"loss": 0.6493,
|
|
"step": 18665
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.798905749659548e-07,
|
|
"loss": 0.6176,
|
|
"step": 18670
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.7944352550003425e-07,
|
|
"loss": 0.6472,
|
|
"step": 18675
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.7899677534700706e-07,
|
|
"loss": 0.6353,
|
|
"step": 18680
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.785503246924489e-07,
|
|
"loss": 0.6279,
|
|
"step": 18685
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.781041737218121e-07,
|
|
"loss": 0.6039,
|
|
"step": 18690
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.7765832262042364e-07,
|
|
"loss": 0.6366,
|
|
"step": 18695
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.772127715734859e-07,
|
|
"loss": 0.664,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.7676752076607747e-07,
|
|
"loss": 0.6687,
|
|
"step": 18705
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.763225703831513e-07,
|
|
"loss": 0.6712,
|
|
"step": 18710
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.7587792060953585e-07,
|
|
"loss": 0.659,
|
|
"step": 18715
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.754335716299351e-07,
|
|
"loss": 0.6291,
|
|
"step": 18720
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.749895236289276e-07,
|
|
"loss": 0.6037,
|
|
"step": 18725
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.745457767909669e-07,
|
|
"loss": 0.625,
|
|
"step": 18730
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.741023313003814e-07,
|
|
"loss": 0.6479,
|
|
"step": 18735
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.736591873413751e-07,
|
|
"loss": 0.6515,
|
|
"step": 18740
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.7321634509802584e-07,
|
|
"loss": 0.647,
|
|
"step": 18745
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.72773804754286e-07,
|
|
"loss": 0.6699,
|
|
"step": 18750
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.723315664939838e-07,
|
|
"loss": 0.6759,
|
|
"step": 18755
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.718896305008207e-07,
|
|
"loss": 0.6816,
|
|
"step": 18760
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.7144799695837283e-07,
|
|
"loss": 0.6374,
|
|
"step": 18765
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.710066660500916e-07,
|
|
"loss": 0.6381,
|
|
"step": 18770
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.705656379593015e-07,
|
|
"loss": 0.6504,
|
|
"step": 18775
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.7012491286920215e-07,
|
|
"loss": 0.6652,
|
|
"step": 18780
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.696844909628662e-07,
|
|
"loss": 0.6412,
|
|
"step": 18785
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.6924437242324195e-07,
|
|
"loss": 0.6511,
|
|
"step": 18790
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.6880455743315045e-07,
|
|
"loss": 0.6489,
|
|
"step": 18795
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.683650461752867e-07,
|
|
"loss": 0.6671,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.6792583883222044e-07,
|
|
"loss": 0.6377,
|
|
"step": 18805
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.6748693558639426e-07,
|
|
"loss": 0.6314,
|
|
"step": 18810
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.6704833662012484e-07,
|
|
"loss": 0.6626,
|
|
"step": 18815
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.66610042115602e-07,
|
|
"loss": 0.6428,
|
|
"step": 18820
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.6617205225489015e-07,
|
|
"loss": 0.6139,
|
|
"step": 18825
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 2.65734367219926e-07,
|
|
"loss": 0.6466,
|
|
"step": 18830
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.6529698719252e-07,
|
|
"loss": 0.6723,
|
|
"step": 18835
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.6485991235435655e-07,
|
|
"loss": 0.6314,
|
|
"step": 18840
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.644231428869924e-07,
|
|
"loss": 0.68,
|
|
"step": 18845
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.6398667897185754e-07,
|
|
"loss": 0.6589,
|
|
"step": 18850
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.6355052079025573e-07,
|
|
"loss": 0.6587,
|
|
"step": 18855
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.6311466852336326e-07,
|
|
"loss": 0.6506,
|
|
"step": 18860
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.6267912235222913e-07,
|
|
"loss": 0.6344,
|
|
"step": 18865
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.622438824577753e-07,
|
|
"loss": 0.6729,
|
|
"step": 18870
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.61808949020797e-07,
|
|
"loss": 0.6573,
|
|
"step": 18875
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.613743222219618e-07,
|
|
"loss": 0.6228,
|
|
"step": 18880
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.6094000224180934e-07,
|
|
"loss": 0.636,
|
|
"step": 18885
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.6050598926075317e-07,
|
|
"loss": 0.6215,
|
|
"step": 18890
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.6007228345907804e-07,
|
|
"loss": 0.6473,
|
|
"step": 18895
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.596388850169413e-07,
|
|
"loss": 0.6789,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.592057941143736e-07,
|
|
"loss": 0.6479,
|
|
"step": 18905
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5877301093127677e-07,
|
|
"loss": 0.7127,
|
|
"step": 18910
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.583405356474252e-07,
|
|
"loss": 0.6525,
|
|
"step": 18915
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.57908368442465e-07,
|
|
"loss": 0.6547,
|
|
"step": 18920
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.574765094959153e-07,
|
|
"loss": 0.6684,
|
|
"step": 18925
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5704495898716615e-07,
|
|
"loss": 0.6369,
|
|
"step": 18930
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5661371709547983e-07,
|
|
"loss": 0.6421,
|
|
"step": 18935
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5618278399999094e-07,
|
|
"loss": 0.6399,
|
|
"step": 18940
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.557521598797049e-07,
|
|
"loss": 0.5765,
|
|
"step": 18945
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.553218449134994e-07,
|
|
"loss": 0.6161,
|
|
"step": 18950
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5489183928012325e-07,
|
|
"loss": 0.6204,
|
|
"step": 18955
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.544621431581975e-07,
|
|
"loss": 0.6343,
|
|
"step": 18960
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5403275672621407e-07,
|
|
"loss": 0.6635,
|
|
"step": 18965
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.536036801625359e-07,
|
|
"loss": 0.6453,
|
|
"step": 18970
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.531749136453982e-07,
|
|
"loss": 0.6791,
|
|
"step": 18975
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.527464573529068e-07,
|
|
"loss": 0.6195,
|
|
"step": 18980
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5231831146303817e-07,
|
|
"loss": 0.6502,
|
|
"step": 18985
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5189047615364124e-07,
|
|
"loss": 0.6623,
|
|
"step": 18990
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5146295160243457e-07,
|
|
"loss": 0.6331,
|
|
"step": 18995
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5103573798700816e-07,
|
|
"loss": 0.6164,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.506088354848226e-07,
|
|
"loss": 0.6363,
|
|
"step": 19005
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.5018224427321013e-07,
|
|
"loss": 0.6733,
|
|
"step": 19010
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.497559645293726e-07,
|
|
"loss": 0.6452,
|
|
"step": 19015
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.4932999643038264e-07,
|
|
"loss": 0.6344,
|
|
"step": 19020
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.489043401531844e-07,
|
|
"loss": 0.6154,
|
|
"step": 19025
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.484789958745913e-07,
|
|
"loss": 0.6564,
|
|
"step": 19030
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.4805396377128753e-07,
|
|
"loss": 0.6827,
|
|
"step": 19035
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.476292440198283e-07,
|
|
"loss": 0.6415,
|
|
"step": 19040
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.4720483679663815e-07,
|
|
"loss": 0.6611,
|
|
"step": 19045
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.4678074227801213e-07,
|
|
"loss": 0.6308,
|
|
"step": 19050
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.463569606401151e-07,
|
|
"loss": 0.6434,
|
|
"step": 19055
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.459334920589831e-07,
|
|
"loss": 0.6625,
|
|
"step": 19060
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.455103367105207e-07,
|
|
"loss": 0.6416,
|
|
"step": 19065
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.450874947705027e-07,
|
|
"loss": 0.6458,
|
|
"step": 19070
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.446649664145748e-07,
|
|
"loss": 0.6101,
|
|
"step": 19075
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 2.4424275181825103e-07,
|
|
"loss": 0.6629,
|
|
"step": 19080
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.4382085115691575e-07,
|
|
"loss": 0.6881,
|
|
"step": 19085
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.4339926460582263e-07,
|
|
"loss": 0.6869,
|
|
"step": 19090
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.429779923400955e-07,
|
|
"loss": 0.6448,
|
|
"step": 19095
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.42557034534727e-07,
|
|
"loss": 0.6401,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.42136391364579e-07,
|
|
"loss": 0.6611,
|
|
"step": 19105
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.4171606300438364e-07,
|
|
"loss": 0.6471,
|
|
"step": 19110
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.4129604962874127e-07,
|
|
"loss": 0.6922,
|
|
"step": 19115
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.4087635141212157e-07,
|
|
"loss": 0.6707,
|
|
"step": 19120
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.404569685288642e-07,
|
|
"loss": 0.6398,
|
|
"step": 19125
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.400379011531768e-07,
|
|
"loss": 0.6756,
|
|
"step": 19130
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.396191494591363e-07,
|
|
"loss": 0.6565,
|
|
"step": 19135
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.392007136206883e-07,
|
|
"loss": 0.6649,
|
|
"step": 19140
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.387825938116478e-07,
|
|
"loss": 0.6119,
|
|
"step": 19145
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.38364790205698e-07,
|
|
"loss": 0.6636,
|
|
"step": 19150
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3794730297639054e-07,
|
|
"loss": 0.6411,
|
|
"step": 19155
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3753013229714658e-07,
|
|
"loss": 0.6213,
|
|
"step": 19160
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3711327834125495e-07,
|
|
"loss": 0.6499,
|
|
"step": 19165
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3669674128187277e-07,
|
|
"loss": 0.6644,
|
|
"step": 19170
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3628052129202658e-07,
|
|
"loss": 0.6425,
|
|
"step": 19175
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3586461854461015e-07,
|
|
"loss": 0.6316,
|
|
"step": 19180
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3544903321238596e-07,
|
|
"loss": 0.6716,
|
|
"step": 19185
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3503376546798425e-07,
|
|
"loss": 0.6026,
|
|
"step": 19190
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3461881548390416e-07,
|
|
"loss": 0.6799,
|
|
"step": 19195
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3420418343251215e-07,
|
|
"loss": 0.6355,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"eval_loss": 0.6190813779830933,
|
|
"eval_runtime": 143.7787,
|
|
"eval_samples_per_second": 16.456,
|
|
"eval_steps_per_second": 2.747,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3378986948604217e-07,
|
|
"loss": 0.649,
|
|
"step": 19205
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3337587381659752e-07,
|
|
"loss": 0.653,
|
|
"step": 19210
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3296219659614802e-07,
|
|
"loss": 0.6588,
|
|
"step": 19215
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3254883799653124e-07,
|
|
"loss": 0.6292,
|
|
"step": 19220
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.321357981894534e-07,
|
|
"loss": 0.6651,
|
|
"step": 19225
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3172307734648722e-07,
|
|
"loss": 0.6464,
|
|
"step": 19230
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3131067563907359e-07,
|
|
"loss": 0.642,
|
|
"step": 19235
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3089859323852002e-07,
|
|
"loss": 0.6499,
|
|
"step": 19240
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.304868303160028e-07,
|
|
"loss": 0.6595,
|
|
"step": 19245
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.3007538704256412e-07,
|
|
"loss": 0.627,
|
|
"step": 19250
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2966426358911384e-07,
|
|
"loss": 0.6531,
|
|
"step": 19255
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2925346012642964e-07,
|
|
"loss": 0.6453,
|
|
"step": 19260
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.288429768251553e-07,
|
|
"loss": 0.6343,
|
|
"step": 19265
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2843281385580215e-07,
|
|
"loss": 0.6788,
|
|
"step": 19270
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2802297138874792e-07,
|
|
"loss": 0.7187,
|
|
"step": 19275
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.276134495942381e-07,
|
|
"loss": 0.6708,
|
|
"step": 19280
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2720424864238452e-07,
|
|
"loss": 0.6434,
|
|
"step": 19285
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2679536870316506e-07,
|
|
"loss": 0.6389,
|
|
"step": 19290
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2638680994642567e-07,
|
|
"loss": 0.6507,
|
|
"step": 19295
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.259785725418778e-07,
|
|
"loss": 0.6413,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2557065665909936e-07,
|
|
"loss": 0.6542,
|
|
"step": 19305
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.251630624675357e-07,
|
|
"loss": 0.6694,
|
|
"step": 19310
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.247557901364975e-07,
|
|
"loss": 0.6521,
|
|
"step": 19315
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.2434883983516217e-07,
|
|
"loss": 0.6519,
|
|
"step": 19320
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 2.239422117325732e-07,
|
|
"loss": 0.608,
|
|
"step": 19325
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.2353590599764083e-07,
|
|
"loss": 0.653,
|
|
"step": 19330
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.2312992279914067e-07,
|
|
"loss": 0.6787,
|
|
"step": 19335
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.2272426230571428e-07,
|
|
"loss": 0.5948,
|
|
"step": 19340
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.223189246858701e-07,
|
|
"loss": 0.6391,
|
|
"step": 19345
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.2191391010798143e-07,
|
|
"loss": 0.6242,
|
|
"step": 19350
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.2150921874028782e-07,
|
|
"loss": 0.6666,
|
|
"step": 19355
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.211048507508949e-07,
|
|
"loss": 0.672,
|
|
"step": 19360
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.2070080630777334e-07,
|
|
"loss": 0.6048,
|
|
"step": 19365
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.202970855787597e-07,
|
|
"loss": 0.6577,
|
|
"step": 19370
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.198936887315559e-07,
|
|
"loss": 0.6343,
|
|
"step": 19375
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1949061593372986e-07,
|
|
"loss": 0.661,
|
|
"step": 19380
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1908786735271434e-07,
|
|
"loss": 0.635,
|
|
"step": 19385
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1868544315580728e-07,
|
|
"loss": 0.661,
|
|
"step": 19390
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1828334351017286e-07,
|
|
"loss": 0.6658,
|
|
"step": 19395
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1788156858283923e-07,
|
|
"loss": 0.6291,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.174801185407006e-07,
|
|
"loss": 0.6638,
|
|
"step": 19405
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1707899355051518e-07,
|
|
"loss": 0.6319,
|
|
"step": 19410
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.166781937789075e-07,
|
|
"loss": 0.6331,
|
|
"step": 19415
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1627771939236606e-07,
|
|
"loss": 0.5873,
|
|
"step": 19420
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.158775705572441e-07,
|
|
"loss": 0.6696,
|
|
"step": 19425
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1547774743976055e-07,
|
|
"loss": 0.6585,
|
|
"step": 19430
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1507825020599822e-07,
|
|
"loss": 0.6483,
|
|
"step": 19435
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.146790790219044e-07,
|
|
"loss": 0.6506,
|
|
"step": 19440
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.14280234053292e-07,
|
|
"loss": 0.6411,
|
|
"step": 19445
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1388171546583745e-07,
|
|
"loss": 0.7025,
|
|
"step": 19450
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1348352342508181e-07,
|
|
"loss": 0.6499,
|
|
"step": 19455
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1308565809643042e-07,
|
|
"loss": 0.6437,
|
|
"step": 19460
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1268811964515354e-07,
|
|
"loss": 0.6432,
|
|
"step": 19465
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1229090823638507e-07,
|
|
"loss": 0.6752,
|
|
"step": 19470
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1189402403512268e-07,
|
|
"loss": 0.6625,
|
|
"step": 19475
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1149746720622929e-07,
|
|
"loss": 0.6549,
|
|
"step": 19480
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.111012379144309e-07,
|
|
"loss": 0.6326,
|
|
"step": 19485
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.107053363243174e-07,
|
|
"loss": 0.683,
|
|
"step": 19490
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.1030976260034338e-07,
|
|
"loss": 0.6442,
|
|
"step": 19495
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.099145169068266e-07,
|
|
"loss": 0.647,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.095195994079485e-07,
|
|
"loss": 0.6238,
|
|
"step": 19505
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.091250102677542e-07,
|
|
"loss": 0.6403,
|
|
"step": 19510
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.0873074965015335e-07,
|
|
"loss": 0.6408,
|
|
"step": 19515
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.0833681771891743e-07,
|
|
"loss": 0.6941,
|
|
"step": 19520
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.0794321463768273e-07,
|
|
"loss": 0.6483,
|
|
"step": 19525
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.075499405699489e-07,
|
|
"loss": 0.6438,
|
|
"step": 19530
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.071569956790782e-07,
|
|
"loss": 0.6856,
|
|
"step": 19535
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.0676438012829668e-07,
|
|
"loss": 0.6519,
|
|
"step": 19540
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.06372094080693e-07,
|
|
"loss": 0.6411,
|
|
"step": 19545
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.0598013769922008e-07,
|
|
"loss": 0.6776,
|
|
"step": 19550
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.0558851114669274e-07,
|
|
"loss": 0.6476,
|
|
"step": 19555
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.051972145857891e-07,
|
|
"loss": 0.6632,
|
|
"step": 19560
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.0480624817905113e-07,
|
|
"loss": 0.6584,
|
|
"step": 19565
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 2.044156120888818e-07,
|
|
"loss": 0.613,
|
|
"step": 19570
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0402530647754844e-07,
|
|
"loss": 0.6221,
|
|
"step": 19575
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0363533150718093e-07,
|
|
"loss": 0.6513,
|
|
"step": 19580
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.032456873397711e-07,
|
|
"loss": 0.641,
|
|
"step": 19585
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0285637413717395e-07,
|
|
"loss": 0.6591,
|
|
"step": 19590
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.024673920611063e-07,
|
|
"loss": 0.674,
|
|
"step": 19595
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0207874127314862e-07,
|
|
"loss": 0.668,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0169042193474283e-07,
|
|
"loss": 0.6521,
|
|
"step": 19605
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0130243420719294e-07,
|
|
"loss": 0.6629,
|
|
"step": 19610
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0091477825166636e-07,
|
|
"loss": 0.6329,
|
|
"step": 19615
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0052745422919183e-07,
|
|
"loss": 0.6239,
|
|
"step": 19620
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.0014046230065985e-07,
|
|
"loss": 0.6888,
|
|
"step": 19625
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9975380262682429e-07,
|
|
"loss": 0.6387,
|
|
"step": 19630
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.993674753682998e-07,
|
|
"loss": 0.6091,
|
|
"step": 19635
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9898148068556332e-07,
|
|
"loss": 0.6518,
|
|
"step": 19640
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.985958187389536e-07,
|
|
"loss": 0.6446,
|
|
"step": 19645
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.982104896886716e-07,
|
|
"loss": 0.6669,
|
|
"step": 19650
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9782549369477952e-07,
|
|
"loss": 0.6802,
|
|
"step": 19655
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.974408309172011e-07,
|
|
"loss": 0.6449,
|
|
"step": 19660
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.970565015157223e-07,
|
|
"loss": 0.5942,
|
|
"step": 19665
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9667250564999006e-07,
|
|
"loss": 0.68,
|
|
"step": 19670
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.962888434795129e-07,
|
|
"loss": 0.6198,
|
|
"step": 19675
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.959055151636605e-07,
|
|
"loss": 0.6153,
|
|
"step": 19680
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9552252086166465e-07,
|
|
"loss": 0.6893,
|
|
"step": 19685
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9513986073261757e-07,
|
|
"loss": 0.6095,
|
|
"step": 19690
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9475753493547254e-07,
|
|
"loss": 0.6591,
|
|
"step": 19695
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.943755436290454e-07,
|
|
"loss": 0.6112,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.939938869720108e-07,
|
|
"loss": 0.653,
|
|
"step": 19705
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9361256512290624e-07,
|
|
"loss": 0.6472,
|
|
"step": 19710
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.932315782401297e-07,
|
|
"loss": 0.6746,
|
|
"step": 19715
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9285092648193947e-07,
|
|
"loss": 0.6383,
|
|
"step": 19720
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9247061000645515e-07,
|
|
"loss": 0.6605,
|
|
"step": 19725
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.920906289716565e-07,
|
|
"loss": 0.6241,
|
|
"step": 19730
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9171098353538494e-07,
|
|
"loss": 0.6597,
|
|
"step": 19735
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9133167385534167e-07,
|
|
"loss": 0.6183,
|
|
"step": 19740
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9095270008908815e-07,
|
|
"loss": 0.6427,
|
|
"step": 19745
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9057406239404784e-07,
|
|
"loss": 0.645,
|
|
"step": 19750
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.9019576092750234e-07,
|
|
"loss": 0.6908,
|
|
"step": 19755
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.898177958465953e-07,
|
|
"loss": 0.5862,
|
|
"step": 19760
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8944016730833045e-07,
|
|
"loss": 0.6414,
|
|
"step": 19765
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8906287546957122e-07,
|
|
"loss": 0.6513,
|
|
"step": 19770
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8868592048704125e-07,
|
|
"loss": 0.6557,
|
|
"step": 19775
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8830930251732403e-07,
|
|
"loss": 0.6538,
|
|
"step": 19780
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8793302171686398e-07,
|
|
"loss": 0.6498,
|
|
"step": 19785
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8755707824196476e-07,
|
|
"loss": 0.6455,
|
|
"step": 19790
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8718147224878954e-07,
|
|
"loss": 0.6459,
|
|
"step": 19795
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8680620389336267e-07,
|
|
"loss": 0.626,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8643127333156628e-07,
|
|
"loss": 0.6689,
|
|
"step": 19805
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8605668071914404e-07,
|
|
"loss": 0.6425,
|
|
"step": 19810
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.8568242621169806e-07,
|
|
"loss": 0.6661,
|
|
"step": 19815
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8530850996469083e-07,
|
|
"loss": 0.6463,
|
|
"step": 19820
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8493493213344358e-07,
|
|
"loss": 0.6842,
|
|
"step": 19825
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8456169287313716e-07,
|
|
"loss": 0.6676,
|
|
"step": 19830
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8418879233881267e-07,
|
|
"loss": 0.6673,
|
|
"step": 19835
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8381623068536866e-07,
|
|
"loss": 0.6137,
|
|
"step": 19840
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8344400806756455e-07,
|
|
"loss": 0.6686,
|
|
"step": 19845
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8307212464001888e-07,
|
|
"loss": 0.6486,
|
|
"step": 19850
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.827005805572077e-07,
|
|
"loss": 0.6125,
|
|
"step": 19855
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.823293759734681e-07,
|
|
"loss": 0.6306,
|
|
"step": 19860
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8195851104299465e-07,
|
|
"loss": 0.6946,
|
|
"step": 19865
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8158798591984194e-07,
|
|
"loss": 0.6081,
|
|
"step": 19870
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8121780075792258e-07,
|
|
"loss": 0.6554,
|
|
"step": 19875
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8084795571100809e-07,
|
|
"loss": 0.6768,
|
|
"step": 19880
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8047845093272963e-07,
|
|
"loss": 0.6378,
|
|
"step": 19885
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8010928657657521e-07,
|
|
"loss": 0.6416,
|
|
"step": 19890
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7974046279589304e-07,
|
|
"loss": 0.6047,
|
|
"step": 19895
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.793719797438895e-07,
|
|
"loss": 0.6747,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7900383757362913e-07,
|
|
"loss": 0.6308,
|
|
"step": 19905
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7863603643803481e-07,
|
|
"loss": 0.6584,
|
|
"step": 19910
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.782685764898878e-07,
|
|
"loss": 0.6723,
|
|
"step": 19915
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.779014578818283e-07,
|
|
"loss": 0.6617,
|
|
"step": 19920
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.775346807663538e-07,
|
|
"loss": 0.6726,
|
|
"step": 19925
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.771682452958202e-07,
|
|
"loss": 0.6528,
|
|
"step": 19930
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7680215162244228e-07,
|
|
"loss": 0.6322,
|
|
"step": 19935
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7643639989829128e-07,
|
|
"loss": 0.6696,
|
|
"step": 19940
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7607099027529792e-07,
|
|
"loss": 0.6936,
|
|
"step": 19945
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7570592290524966e-07,
|
|
"loss": 0.6281,
|
|
"step": 19950
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7534119793979286e-07,
|
|
"loss": 0.6463,
|
|
"step": 19955
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7497681553043086e-07,
|
|
"loss": 0.6862,
|
|
"step": 19960
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7461277582852473e-07,
|
|
"loss": 0.643,
|
|
"step": 19965
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7424907898529406e-07,
|
|
"loss": 0.6482,
|
|
"step": 19970
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7388572515181444e-07,
|
|
"loss": 0.6563,
|
|
"step": 19975
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7352271447902033e-07,
|
|
"loss": 0.6614,
|
|
"step": 19980
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.731600471177037e-07,
|
|
"loss": 0.6491,
|
|
"step": 19985
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.727977232185125e-07,
|
|
"loss": 0.6552,
|
|
"step": 19990
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7243574293195363e-07,
|
|
"loss": 0.6329,
|
|
"step": 19995
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7207410640838992e-07,
|
|
"loss": 0.6362,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"eval_loss": 0.6180657744407654,
|
|
"eval_runtime": 139.6405,
|
|
"eval_samples_per_second": 16.944,
|
|
"eval_steps_per_second": 2.829,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7171281379804282e-07,
|
|
"loss": 0.6774,
|
|
"step": 20005
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7135186525098965e-07,
|
|
"loss": 0.6437,
|
|
"step": 20010
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.709912609171651e-07,
|
|
"loss": 0.6578,
|
|
"step": 20015
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7063100094636195e-07,
|
|
"loss": 0.661,
|
|
"step": 20020
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.7027108548822788e-07,
|
|
"loss": 0.6436,
|
|
"step": 20025
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.6991151469226928e-07,
|
|
"loss": 0.7003,
|
|
"step": 20030
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.695522887078491e-07,
|
|
"loss": 0.6573,
|
|
"step": 20035
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.6919340768418577e-07,
|
|
"loss": 0.6348,
|
|
"step": 20040
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.6883487177035616e-07,
|
|
"loss": 0.6514,
|
|
"step": 20045
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.6847668111529234e-07,
|
|
"loss": 0.6425,
|
|
"step": 20050
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.681188358677842e-07,
|
|
"loss": 0.6413,
|
|
"step": 20055
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.6776133617647724e-07,
|
|
"loss": 0.6331,
|
|
"step": 20060
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.674041821898735e-07,
|
|
"loss": 0.6977,
|
|
"step": 20065
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.670473740563323e-07,
|
|
"loss": 0.6412,
|
|
"step": 20070
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.666909119240678e-07,
|
|
"loss": 0.6476,
|
|
"step": 20075
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6633479594115184e-07,
|
|
"loss": 0.6352,
|
|
"step": 20080
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6597902625551185e-07,
|
|
"loss": 0.7082,
|
|
"step": 20085
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6562360301493106e-07,
|
|
"loss": 0.6911,
|
|
"step": 20090
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6526852636704968e-07,
|
|
"loss": 0.6373,
|
|
"step": 20095
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6491379645936298e-07,
|
|
"loss": 0.6378,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6455941343922354e-07,
|
|
"loss": 0.6746,
|
|
"step": 20105
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.642053774538379e-07,
|
|
"loss": 0.6449,
|
|
"step": 20110
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6385168865027012e-07,
|
|
"loss": 0.6542,
|
|
"step": 20115
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6349834717543975e-07,
|
|
"loss": 0.637,
|
|
"step": 20120
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.63145353176121e-07,
|
|
"loss": 0.6517,
|
|
"step": 20125
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6279270679894507e-07,
|
|
"loss": 0.6613,
|
|
"step": 20130
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6244040819039772e-07,
|
|
"loss": 0.6674,
|
|
"step": 20135
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6208845749682144e-07,
|
|
"loss": 0.641,
|
|
"step": 20140
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.617368548644129e-07,
|
|
"loss": 0.6318,
|
|
"step": 20145
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6138560043922488e-07,
|
|
"loss": 0.6804,
|
|
"step": 20150
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.6103469436716587e-07,
|
|
"loss": 0.6316,
|
|
"step": 20155
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.606841367939984e-07,
|
|
"loss": 0.6841,
|
|
"step": 20160
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.603339278653414e-07,
|
|
"loss": 0.6406,
|
|
"step": 20165
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5998406772666916e-07,
|
|
"loss": 0.6244,
|
|
"step": 20170
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.596345565233096e-07,
|
|
"loss": 0.6496,
|
|
"step": 20175
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.592853944004473e-07,
|
|
"loss": 0.6582,
|
|
"step": 20180
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5893658150312071e-07,
|
|
"loss": 0.6606,
|
|
"step": 20185
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5858811797622418e-07,
|
|
"loss": 0.6386,
|
|
"step": 20190
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.582400039645062e-07,
|
|
"loss": 0.6196,
|
|
"step": 20195
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5789223961257003e-07,
|
|
"loss": 0.6316,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5754482506487465e-07,
|
|
"loss": 0.6206,
|
|
"step": 20205
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5719776046573207e-07,
|
|
"loss": 0.6245,
|
|
"step": 20210
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5685104595931054e-07,
|
|
"loss": 0.663,
|
|
"step": 20215
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5650468168963249e-07,
|
|
"loss": 0.6784,
|
|
"step": 20220
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5615866780057385e-07,
|
|
"loss": 0.5968,
|
|
"step": 20225
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5581300443586643e-07,
|
|
"loss": 0.6721,
|
|
"step": 20230
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5546769173909534e-07,
|
|
"loss": 0.659,
|
|
"step": 20235
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.551227298537011e-07,
|
|
"loss": 0.6432,
|
|
"step": 20240
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.547781189229771e-07,
|
|
"loss": 0.6608,
|
|
"step": 20245
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.54433859090072e-07,
|
|
"loss": 0.665,
|
|
"step": 20250
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5408995049798888e-07,
|
|
"loss": 0.6538,
|
|
"step": 20255
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.537463932895836e-07,
|
|
"loss": 0.6964,
|
|
"step": 20260
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5340318760756731e-07,
|
|
"loss": 0.642,
|
|
"step": 20265
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5306033359450454e-07,
|
|
"loss": 0.6475,
|
|
"step": 20270
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5271783139281357e-07,
|
|
"loss": 0.6927,
|
|
"step": 20275
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.523756811447674e-07,
|
|
"loss": 0.6862,
|
|
"step": 20280
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5203388299249176e-07,
|
|
"loss": 0.6319,
|
|
"step": 20285
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.516924370779673e-07,
|
|
"loss": 0.649,
|
|
"step": 20290
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.513513435430267e-07,
|
|
"loss": 0.6765,
|
|
"step": 20295
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.5101060252935783e-07,
|
|
"loss": 0.6901,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.50670214178502e-07,
|
|
"loss": 0.6132,
|
|
"step": 20305
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.503301786318526e-07,
|
|
"loss": 0.6503,
|
|
"step": 20310
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4999049603065805e-07,
|
|
"loss": 0.6773,
|
|
"step": 20315
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.496511665160195e-07,
|
|
"loss": 0.6379,
|
|
"step": 20320
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4931219022889107e-07,
|
|
"loss": 0.6177,
|
|
"step": 20325
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4897356731008125e-07,
|
|
"loss": 0.6606,
|
|
"step": 20330
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.486352979002503e-07,
|
|
"loss": 0.6543,
|
|
"step": 20335
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4829738213991328e-07,
|
|
"loss": 0.6527,
|
|
"step": 20340
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4795982016943654e-07,
|
|
"loss": 0.6489,
|
|
"step": 20345
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.476226121290408e-07,
|
|
"loss": 0.6398,
|
|
"step": 20350
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4728575815879973e-07,
|
|
"loss": 0.6249,
|
|
"step": 20355
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.469492583986387e-07,
|
|
"loss": 0.6764,
|
|
"step": 20360
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4661311298833755e-07,
|
|
"loss": 0.6486,
|
|
"step": 20365
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4627732206752786e-07,
|
|
"loss": 0.6423,
|
|
"step": 20370
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4594188577569412e-07,
|
|
"loss": 0.6952,
|
|
"step": 20375
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4560680425217364e-07,
|
|
"loss": 0.684,
|
|
"step": 20380
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4527207763615647e-07,
|
|
"loss": 0.6607,
|
|
"step": 20385
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4493770606668565e-07,
|
|
"loss": 0.6086,
|
|
"step": 20390
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4460368968265524e-07,
|
|
"loss": 0.625,
|
|
"step": 20395
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4427002862281356e-07,
|
|
"loss": 0.6509,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.439367230257602e-07,
|
|
"loss": 0.6514,
|
|
"step": 20405
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4360377302994708e-07,
|
|
"loss": 0.6664,
|
|
"step": 20410
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4327117877367933e-07,
|
|
"loss": 0.6719,
|
|
"step": 20415
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4293894039511324e-07,
|
|
"loss": 0.6728,
|
|
"step": 20420
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4260705803225838e-07,
|
|
"loss": 0.617,
|
|
"step": 20425
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4227553182297492e-07,
|
|
"loss": 0.6401,
|
|
"step": 20430
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4194436190497638e-07,
|
|
"loss": 0.6788,
|
|
"step": 20435
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.416135484158284e-07,
|
|
"loss": 0.6974,
|
|
"step": 20440
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4128309149294694e-07,
|
|
"loss": 0.6263,
|
|
"step": 20445
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4095299127360183e-07,
|
|
"loss": 0.6399,
|
|
"step": 20450
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4062324789491352e-07,
|
|
"loss": 0.6801,
|
|
"step": 20455
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.4029386149385425e-07,
|
|
"loss": 0.6187,
|
|
"step": 20460
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3996483220724876e-07,
|
|
"loss": 0.63,
|
|
"step": 20465
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.396361601717726e-07,
|
|
"loss": 0.6627,
|
|
"step": 20470
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3930784552395381e-07,
|
|
"loss": 0.6438,
|
|
"step": 20475
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.389798884001706e-07,
|
|
"loss": 0.6468,
|
|
"step": 20480
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3865228893665393e-07,
|
|
"loss": 0.588,
|
|
"step": 20485
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3832504726948623e-07,
|
|
"loss": 0.6959,
|
|
"step": 20490
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3799816353460003e-07,
|
|
"loss": 0.658,
|
|
"step": 20495
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3767163786778046e-07,
|
|
"loss": 0.6279,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3734547040466348e-07,
|
|
"loss": 0.6533,
|
|
"step": 20505
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3701966128073605e-07,
|
|
"loss": 0.6677,
|
|
"step": 20510
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3669421063133623e-07,
|
|
"loss": 0.6677,
|
|
"step": 20515
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3636911859165357e-07,
|
|
"loss": 0.6375,
|
|
"step": 20520
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3604438529672913e-07,
|
|
"loss": 0.6312,
|
|
"step": 20525
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3572001088145312e-07,
|
|
"loss": 0.6548,
|
|
"step": 20530
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3539599548056879e-07,
|
|
"loss": 0.6392,
|
|
"step": 20535
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.350723392286689e-07,
|
|
"loss": 0.6157,
|
|
"step": 20540
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3474904226019736e-07,
|
|
"loss": 0.6478,
|
|
"step": 20545
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.3442610470944925e-07,
|
|
"loss": 0.6769,
|
|
"step": 20550
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 1.341035267105699e-07,
|
|
"loss": 0.6771,
|
|
"step": 20555
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3378130839755532e-07,
|
|
"loss": 0.6579,
|
|
"step": 20560
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3345944990425195e-07,
|
|
"loss": 0.6595,
|
|
"step": 20565
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3313795136435736e-07,
|
|
"loss": 0.6163,
|
|
"step": 20570
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3281681291141955e-07,
|
|
"loss": 0.6473,
|
|
"step": 20575
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3249603467883586e-07,
|
|
"loss": 0.6347,
|
|
"step": 20580
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3217561679985545e-07,
|
|
"loss": 0.6629,
|
|
"step": 20585
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3185555940757674e-07,
|
|
"loss": 0.6555,
|
|
"step": 20590
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3153586263494876e-07,
|
|
"loss": 0.6274,
|
|
"step": 20595
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3121652661477112e-07,
|
|
"loss": 0.6048,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3089755147969294e-07,
|
|
"loss": 0.6473,
|
|
"step": 20605
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3057893736221392e-07,
|
|
"loss": 0.6855,
|
|
"step": 20610
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.3026068439468318e-07,
|
|
"loss": 0.6465,
|
|
"step": 20615
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2994279270930052e-07,
|
|
"loss": 0.6463,
|
|
"step": 20620
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2962526243811577e-07,
|
|
"loss": 0.6503,
|
|
"step": 20625
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2930809371302741e-07,
|
|
"loss": 0.5882,
|
|
"step": 20630
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.289912866657854e-07,
|
|
"loss": 0.6766,
|
|
"step": 20635
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2867484142798813e-07,
|
|
"loss": 0.6454,
|
|
"step": 20640
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.283587581310841e-07,
|
|
"loss": 0.6341,
|
|
"step": 20645
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2804303690637197e-07,
|
|
"loss": 0.6512,
|
|
"step": 20650
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2772767788499917e-07,
|
|
"loss": 0.6695,
|
|
"step": 20655
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.274126811979639e-07,
|
|
"loss": 0.6492,
|
|
"step": 20660
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2709804697611193e-07,
|
|
"loss": 0.6555,
|
|
"step": 20665
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.267837753501403e-07,
|
|
"loss": 0.616,
|
|
"step": 20670
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2646986645059454e-07,
|
|
"loss": 0.6314,
|
|
"step": 20675
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.261563204078695e-07,
|
|
"loss": 0.6754,
|
|
"step": 20680
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2584313735220987e-07,
|
|
"loss": 0.6445,
|
|
"step": 20685
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.255303174137089e-07,
|
|
"loss": 0.6318,
|
|
"step": 20690
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2521786072230933e-07,
|
|
"loss": 0.6744,
|
|
"step": 20695
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.249057674078028e-07,
|
|
"loss": 0.6755,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2459403759983023e-07,
|
|
"loss": 0.6621,
|
|
"step": 20705
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2428267142788195e-07,
|
|
"loss": 0.5975,
|
|
"step": 20710
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2397166902129595e-07,
|
|
"loss": 0.6464,
|
|
"step": 20715
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2366103050926057e-07,
|
|
"loss": 0.6505,
|
|
"step": 20720
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2335075602081202e-07,
|
|
"loss": 0.6541,
|
|
"step": 20725
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2304084568483552e-07,
|
|
"loss": 0.6424,
|
|
"step": 20730
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2273129963006558e-07,
|
|
"loss": 0.6359,
|
|
"step": 20735
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.224221179850846e-07,
|
|
"loss": 0.6211,
|
|
"step": 20740
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2211330087832404e-07,
|
|
"loss": 0.6651,
|
|
"step": 20745
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.218048484380636e-07,
|
|
"loss": 0.6969,
|
|
"step": 20750
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2149676079243198e-07,
|
|
"loss": 0.6476,
|
|
"step": 20755
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.211890380694065e-07,
|
|
"loss": 0.6507,
|
|
"step": 20760
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2088168039681168e-07,
|
|
"loss": 0.6329,
|
|
"step": 20765
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2057468790232195e-07,
|
|
"loss": 0.6363,
|
|
"step": 20770
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2026806071345885e-07,
|
|
"loss": 0.6098,
|
|
"step": 20775
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.1996179895759262e-07,
|
|
"loss": 0.5963,
|
|
"step": 20780
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.1965590276194215e-07,
|
|
"loss": 0.6654,
|
|
"step": 20785
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.1935037225357392e-07,
|
|
"loss": 0.6525,
|
|
"step": 20790
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.190452075594024e-07,
|
|
"loss": 0.6529,
|
|
"step": 20795
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.1874040880619041e-07,
|
|
"loss": 0.6519,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"eval_loss": 0.6171961426734924,
|
|
"eval_runtime": 139.723,
|
|
"eval_samples_per_second": 16.934,
|
|
"eval_steps_per_second": 2.827,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.184359761205489e-07,
|
|
"loss": 0.6229,
|
|
"step": 20805
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.181319096289366e-07,
|
|
"loss": 0.6511,
|
|
"step": 20810
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1782820945765958e-07,
|
|
"loss": 0.6513,
|
|
"step": 20815
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1752487573287296e-07,
|
|
"loss": 0.6551,
|
|
"step": 20820
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1722190858057846e-07,
|
|
"loss": 0.6649,
|
|
"step": 20825
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.169193081266262e-07,
|
|
"loss": 0.631,
|
|
"step": 20830
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1661707449671343e-07,
|
|
"loss": 0.6674,
|
|
"step": 20835
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1631520781638582e-07,
|
|
"loss": 0.649,
|
|
"step": 20840
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1601370821103607e-07,
|
|
"loss": 0.5991,
|
|
"step": 20845
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1571257580590421e-07,
|
|
"loss": 0.6397,
|
|
"step": 20850
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1541181072607831e-07,
|
|
"loss": 0.6698,
|
|
"step": 20855
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1511141309649364e-07,
|
|
"loss": 0.6566,
|
|
"step": 20860
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1481138304193228e-07,
|
|
"loss": 0.6261,
|
|
"step": 20865
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1451172068702464e-07,
|
|
"loss": 0.6449,
|
|
"step": 20870
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1421242615624771e-07,
|
|
"loss": 0.6521,
|
|
"step": 20875
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1391349957392571e-07,
|
|
"loss": 0.6329,
|
|
"step": 20880
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1361494106423008e-07,
|
|
"loss": 0.6462,
|
|
"step": 20885
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1331675075117963e-07,
|
|
"loss": 0.6567,
|
|
"step": 20890
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1301892875864005e-07,
|
|
"loss": 0.6515,
|
|
"step": 20895
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.127214752103236e-07,
|
|
"loss": 0.6775,
|
|
"step": 20900
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1242439022979055e-07,
|
|
"loss": 0.6874,
|
|
"step": 20905
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1212767394044697e-07,
|
|
"loss": 0.6659,
|
|
"step": 20910
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1183132646554605e-07,
|
|
"loss": 0.6481,
|
|
"step": 20915
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1153534792818852e-07,
|
|
"loss": 0.6514,
|
|
"step": 20920
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1123973845132095e-07,
|
|
"loss": 0.6444,
|
|
"step": 20925
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1094449815773699e-07,
|
|
"loss": 0.6424,
|
|
"step": 20930
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1064962717007675e-07,
|
|
"loss": 0.611,
|
|
"step": 20935
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1035512561082738e-07,
|
|
"loss": 0.6177,
|
|
"step": 20940
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.1006099360232212e-07,
|
|
"loss": 0.6323,
|
|
"step": 20945
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0976723126674059e-07,
|
|
"loss": 0.6681,
|
|
"step": 20950
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.094738387261096e-07,
|
|
"loss": 0.6665,
|
|
"step": 20955
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0918081610230157e-07,
|
|
"loss": 0.6414,
|
|
"step": 20960
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0888816351703555e-07,
|
|
"loss": 0.6165,
|
|
"step": 20965
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0859588109187678e-07,
|
|
"loss": 0.6792,
|
|
"step": 20970
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0830396894823712e-07,
|
|
"loss": 0.6507,
|
|
"step": 20975
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0801242720737425e-07,
|
|
"loss": 0.6051,
|
|
"step": 20980
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0772125599039183e-07,
|
|
"loss": 0.6421,
|
|
"step": 20985
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0743045541824015e-07,
|
|
"loss": 0.6227,
|
|
"step": 20990
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0714002561171521e-07,
|
|
"loss": 0.6296,
|
|
"step": 20995
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0684996669145874e-07,
|
|
"loss": 0.6638,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0656027877795904e-07,
|
|
"loss": 0.643,
|
|
"step": 21005
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0627096199154983e-07,
|
|
"loss": 0.6595,
|
|
"step": 21010
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0598201645241079e-07,
|
|
"loss": 0.6595,
|
|
"step": 21015
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0569344228056708e-07,
|
|
"loss": 0.6446,
|
|
"step": 21020
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0540523959589042e-07,
|
|
"loss": 0.6615,
|
|
"step": 21025
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0511740851809747e-07,
|
|
"loss": 0.6832,
|
|
"step": 21030
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0482994916675047e-07,
|
|
"loss": 0.6588,
|
|
"step": 21035
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0454286166125814e-07,
|
|
"loss": 0.6532,
|
|
"step": 21040
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 1.0425614612087363e-07,
|
|
"loss": 0.6882,
|
|
"step": 21045
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0396980266469623e-07,
|
|
"loss": 0.6266,
|
|
"step": 21050
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0368383141167059e-07,
|
|
"loss": 0.6535,
|
|
"step": 21055
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0339823248058677e-07,
|
|
"loss": 0.6547,
|
|
"step": 21060
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0311300599007988e-07,
|
|
"loss": 0.6387,
|
|
"step": 21065
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0282815205863038e-07,
|
|
"loss": 0.6395,
|
|
"step": 21070
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0254367080456449e-07,
|
|
"loss": 0.6433,
|
|
"step": 21075
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0225956234605316e-07,
|
|
"loss": 0.6727,
|
|
"step": 21080
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0197582680111228e-07,
|
|
"loss": 0.6174,
|
|
"step": 21085
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0169246428760359e-07,
|
|
"loss": 0.6452,
|
|
"step": 21090
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0140947492323315e-07,
|
|
"loss": 0.628,
|
|
"step": 21095
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0112685882555228e-07,
|
|
"loss": 0.6451,
|
|
"step": 21100
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0084461611195705e-07,
|
|
"loss": 0.6135,
|
|
"step": 21105
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0056274689968902e-07,
|
|
"loss": 0.6015,
|
|
"step": 21110
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0028125130583409e-07,
|
|
"loss": 0.6446,
|
|
"step": 21115
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.0000012944732284e-07,
|
|
"loss": 0.6519,
|
|
"step": 21120
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.971938144093129e-08,
|
|
"loss": 0.6359,
|
|
"step": 21125
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.943900740327937e-08,
|
|
"loss": 0.6572,
|
|
"step": 21130
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.915900745083194e-08,
|
|
"loss": 0.6668,
|
|
"step": 21135
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.887938169989896e-08,
|
|
"loss": 0.6255,
|
|
"step": 21140
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.860013026663428e-08,
|
|
"loss": 0.6186,
|
|
"step": 21145
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.832125326703644e-08,
|
|
"loss": 0.6505,
|
|
"step": 21150
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.804275081694846e-08,
|
|
"loss": 0.6368,
|
|
"step": 21155
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.776462303205824e-08,
|
|
"loss": 0.6271,
|
|
"step": 21160
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.748687002789734e-08,
|
|
"loss": 0.6784,
|
|
"step": 21165
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.720949191984185e-08,
|
|
"loss": 0.6309,
|
|
"step": 21170
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.693248882311256e-08,
|
|
"loss": 0.6185,
|
|
"step": 21175
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.665586085277388e-08,
|
|
"loss": 0.6288,
|
|
"step": 21180
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.637960812373457e-08,
|
|
"loss": 0.7016,
|
|
"step": 21185
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.610373075074806e-08,
|
|
"loss": 0.6614,
|
|
"step": 21190
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.582822884841101e-08,
|
|
"loss": 0.6505,
|
|
"step": 21195
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.555310253116467e-08,
|
|
"loss": 0.6784,
|
|
"step": 21200
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.527835191329392e-08,
|
|
"loss": 0.6565,
|
|
"step": 21205
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.500397710892816e-08,
|
|
"loss": 0.6305,
|
|
"step": 21210
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.472997823203999e-08,
|
|
"loss": 0.6524,
|
|
"step": 21215
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.445635539644615e-08,
|
|
"loss": 0.6717,
|
|
"step": 21220
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.418310871580737e-08,
|
|
"loss": 0.6429,
|
|
"step": 21225
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.391023830362799e-08,
|
|
"loss": 0.6434,
|
|
"step": 21230
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.363774427325577e-08,
|
|
"loss": 0.6648,
|
|
"step": 21235
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.336562673788228e-08,
|
|
"loss": 0.636,
|
|
"step": 21240
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.309388581054322e-08,
|
|
"loss": 0.6771,
|
|
"step": 21245
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.282252160411719e-08,
|
|
"loss": 0.6502,
|
|
"step": 21250
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.255153423132622e-08,
|
|
"loss": 0.6437,
|
|
"step": 21255
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.22809238047365e-08,
|
|
"loss": 0.6704,
|
|
"step": 21260
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.201069043675724e-08,
|
|
"loss": 0.6404,
|
|
"step": 21265
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.174083423964062e-08,
|
|
"loss": 0.6834,
|
|
"step": 21270
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.147135532548311e-08,
|
|
"loss": 0.6516,
|
|
"step": 21275
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.120225380622371e-08,
|
|
"loss": 0.671,
|
|
"step": 21280
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.093352979364466e-08,
|
|
"loss": 0.6583,
|
|
"step": 21285
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.066518339937157e-08,
|
|
"loss": 0.6467,
|
|
"step": 21290
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 9.03972147348735e-08,
|
|
"loss": 0.5999,
|
|
"step": 21295
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 9.012962391146217e-08,
|
|
"loss": 0.6589,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.986241104029224e-08,
|
|
"loss": 0.647,
|
|
"step": 21305
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.959557623236202e-08,
|
|
"loss": 0.6199,
|
|
"step": 21310
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.93291195985122e-08,
|
|
"loss": 0.6762,
|
|
"step": 21315
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.906304124942632e-08,
|
|
"loss": 0.6446,
|
|
"step": 21320
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.879734129563132e-08,
|
|
"loss": 0.6504,
|
|
"step": 21325
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.853201984749658e-08,
|
|
"loss": 0.6898,
|
|
"step": 21330
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.826707701523428e-08,
|
|
"loss": 0.6575,
|
|
"step": 21335
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.800251290889927e-08,
|
|
"loss": 0.6208,
|
|
"step": 21340
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.773832763838939e-08,
|
|
"loss": 0.6662,
|
|
"step": 21345
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.74745213134448e-08,
|
|
"loss": 0.6218,
|
|
"step": 21350
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.721109404364812e-08,
|
|
"loss": 0.6747,
|
|
"step": 21355
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.694804593842519e-08,
|
|
"loss": 0.693,
|
|
"step": 21360
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.668537710704371e-08,
|
|
"loss": 0.6482,
|
|
"step": 21365
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.642308765861406e-08,
|
|
"loss": 0.6946,
|
|
"step": 21370
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.616117770208864e-08,
|
|
"loss": 0.655,
|
|
"step": 21375
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.58996473462631e-08,
|
|
"loss": 0.6549,
|
|
"step": 21380
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.563849669977463e-08,
|
|
"loss": 0.6444,
|
|
"step": 21385
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.537772587110281e-08,
|
|
"loss": 0.646,
|
|
"step": 21390
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.511733496856999e-08,
|
|
"loss": 0.6792,
|
|
"step": 21395
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.485732410033985e-08,
|
|
"loss": 0.6037,
|
|
"step": 21400
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.459769337441868e-08,
|
|
"loss": 0.6055,
|
|
"step": 21405
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.433844289865521e-08,
|
|
"loss": 0.6427,
|
|
"step": 21410
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.407957278073952e-08,
|
|
"loss": 0.6628,
|
|
"step": 21415
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.382108312820401e-08,
|
|
"loss": 0.6569,
|
|
"step": 21420
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.356297404842305e-08,
|
|
"loss": 0.659,
|
|
"step": 21425
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.330524564861297e-08,
|
|
"loss": 0.6279,
|
|
"step": 21430
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.304789803583201e-08,
|
|
"loss": 0.6281,
|
|
"step": 21435
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.279093131697968e-08,
|
|
"loss": 0.6327,
|
|
"step": 21440
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.253434559879835e-08,
|
|
"loss": 0.6402,
|
|
"step": 21445
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.227814098787111e-08,
|
|
"loss": 0.6601,
|
|
"step": 21450
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.202231759062305e-08,
|
|
"loss": 0.6355,
|
|
"step": 21455
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.17668755133214e-08,
|
|
"loss": 0.663,
|
|
"step": 21460
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.151181486207414e-08,
|
|
"loss": 0.6715,
|
|
"step": 21465
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.125713574283155e-08,
|
|
"loss": 0.6456,
|
|
"step": 21470
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.100283826138477e-08,
|
|
"loss": 0.6243,
|
|
"step": 21475
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.074892252336718e-08,
|
|
"loss": 0.6273,
|
|
"step": 21480
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.049538863425298e-08,
|
|
"loss": 0.6379,
|
|
"step": 21485
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 8.024223669935782e-08,
|
|
"loss": 0.6303,
|
|
"step": 21490
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.9989466823839e-08,
|
|
"loss": 0.6826,
|
|
"step": 21495
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.973707911269489e-08,
|
|
"loss": 0.6236,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.948507367076518e-08,
|
|
"loss": 0.6341,
|
|
"step": 21505
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.923345060273046e-08,
|
|
"loss": 0.6677,
|
|
"step": 21510
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.898221001311312e-08,
|
|
"loss": 0.6299,
|
|
"step": 21515
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.873135200627623e-08,
|
|
"loss": 0.6272,
|
|
"step": 21520
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.848087668642377e-08,
|
|
"loss": 0.6455,
|
|
"step": 21525
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.823078415760143e-08,
|
|
"loss": 0.6406,
|
|
"step": 21530
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.798107452369517e-08,
|
|
"loss": 0.7099,
|
|
"step": 21535
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.773174788843218e-08,
|
|
"loss": 0.6831,
|
|
"step": 21540
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.74828043553808e-08,
|
|
"loss": 0.6205,
|
|
"step": 21545
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.723424402794998e-08,
|
|
"loss": 0.649,
|
|
"step": 21550
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.698606700938936e-08,
|
|
"loss": 0.6636,
|
|
"step": 21555
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.673827340278937e-08,
|
|
"loss": 0.6314,
|
|
"step": 21560
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.649086331108178e-08,
|
|
"loss": 0.6969,
|
|
"step": 21565
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.624383683703839e-08,
|
|
"loss": 0.6516,
|
|
"step": 21570
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.599719408327155e-08,
|
|
"loss": 0.7174,
|
|
"step": 21575
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.575093515223496e-08,
|
|
"loss": 0.6436,
|
|
"step": 21580
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.550506014622215e-08,
|
|
"loss": 0.6571,
|
|
"step": 21585
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.525956916736753e-08,
|
|
"loss": 0.6919,
|
|
"step": 21590
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.501446231764607e-08,
|
|
"loss": 0.6461,
|
|
"step": 21595
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.47697396988729e-08,
|
|
"loss": 0.6169,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"eval_loss": 0.6164625883102417,
|
|
"eval_runtime": 140.1585,
|
|
"eval_samples_per_second": 16.881,
|
|
"eval_steps_per_second": 2.818,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.452540141270358e-08,
|
|
"loss": 0.643,
|
|
"step": 21605
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.428144756063415e-08,
|
|
"loss": 0.6571,
|
|
"step": 21610
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.403787824400098e-08,
|
|
"loss": 0.6473,
|
|
"step": 21615
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.379469356398072e-08,
|
|
"loss": 0.6089,
|
|
"step": 21620
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.355189362158997e-08,
|
|
"loss": 0.6356,
|
|
"step": 21625
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.330947851768588e-08,
|
|
"loss": 0.6338,
|
|
"step": 21630
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.306744835296563e-08,
|
|
"loss": 0.6296,
|
|
"step": 21635
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.282580322796606e-08,
|
|
"loss": 0.6403,
|
|
"step": 21640
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.258454324306495e-08,
|
|
"loss": 0.649,
|
|
"step": 21645
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.23436684984794e-08,
|
|
"loss": 0.6433,
|
|
"step": 21650
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.210317909426656e-08,
|
|
"loss": 0.6741,
|
|
"step": 21655
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.186307513032364e-08,
|
|
"loss": 0.6607,
|
|
"step": 21660
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.162335670638797e-08,
|
|
"loss": 0.6845,
|
|
"step": 21665
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.138402392203646e-08,
|
|
"loss": 0.5908,
|
|
"step": 21670
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.114507687668559e-08,
|
|
"loss": 0.6756,
|
|
"step": 21675
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.090651566959216e-08,
|
|
"loss": 0.6435,
|
|
"step": 21680
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.066834039985237e-08,
|
|
"loss": 0.6275,
|
|
"step": 21685
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.043055116640206e-08,
|
|
"loss": 0.6286,
|
|
"step": 21690
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 7.019314806801679e-08,
|
|
"loss": 0.6561,
|
|
"step": 21695
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.99561312033119e-08,
|
|
"loss": 0.6653,
|
|
"step": 21700
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.971950067074206e-08,
|
|
"loss": 0.6333,
|
|
"step": 21705
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.948325656860143e-08,
|
|
"loss": 0.6574,
|
|
"step": 21710
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.924739899502396e-08,
|
|
"loss": 0.6581,
|
|
"step": 21715
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.901192804798272e-08,
|
|
"loss": 0.6574,
|
|
"step": 21720
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.877684382529025e-08,
|
|
"loss": 0.6292,
|
|
"step": 21725
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.854214642459855e-08,
|
|
"loss": 0.6288,
|
|
"step": 21730
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.830783594339895e-08,
|
|
"loss": 0.6242,
|
|
"step": 21735
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.807391247902195e-08,
|
|
"loss": 0.6551,
|
|
"step": 21740
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.784037612863702e-08,
|
|
"loss": 0.6485,
|
|
"step": 21745
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.760722698925358e-08,
|
|
"loss": 0.6398,
|
|
"step": 21750
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.737446515771961e-08,
|
|
"loss": 0.6063,
|
|
"step": 21755
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.714209073072218e-08,
|
|
"loss": 0.6095,
|
|
"step": 21760
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.691010380478779e-08,
|
|
"loss": 0.6306,
|
|
"step": 21765
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.667850447628175e-08,
|
|
"loss": 0.5991,
|
|
"step": 21770
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.644729284140826e-08,
|
|
"loss": 0.6475,
|
|
"step": 21775
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.621646899621091e-08,
|
|
"loss": 0.6737,
|
|
"step": 21780
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.598603303657179e-08,
|
|
"loss": 0.6395,
|
|
"step": 21785
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.5755985058212e-08,
|
|
"loss": 0.6428,
|
|
"step": 21790
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.552632515669121e-08,
|
|
"loss": 0.6312,
|
|
"step": 21795
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.529705342740843e-08,
|
|
"loss": 0.6315,
|
|
"step": 21800
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.506816996560127e-08,
|
|
"loss": 0.6268,
|
|
"step": 21805
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.483967486634546e-08,
|
|
"loss": 0.664,
|
|
"step": 21810
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.461156822455638e-08,
|
|
"loss": 0.6397,
|
|
"step": 21815
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.438385013498726e-08,
|
|
"loss": 0.6273,
|
|
"step": 21820
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.415652069223032e-08,
|
|
"loss": 0.6245,
|
|
"step": 21825
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.392957999071602e-08,
|
|
"loss": 0.6921,
|
|
"step": 21830
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.370302812471384e-08,
|
|
"loss": 0.6685,
|
|
"step": 21835
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.34768651883314e-08,
|
|
"loss": 0.6563,
|
|
"step": 21840
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.325109127551465e-08,
|
|
"loss": 0.6676,
|
|
"step": 21845
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.302570648004834e-08,
|
|
"loss": 0.682,
|
|
"step": 21850
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.280071089555516e-08,
|
|
"loss": 0.63,
|
|
"step": 21855
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.257610461549634e-08,
|
|
"loss": 0.6781,
|
|
"step": 21860
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.235188773317146e-08,
|
|
"loss": 0.6647,
|
|
"step": 21865
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.212806034171836e-08,
|
|
"loss": 0.6611,
|
|
"step": 21870
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.190462253411277e-08,
|
|
"loss": 0.658,
|
|
"step": 21875
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.16815744031688e-08,
|
|
"loss": 0.6362,
|
|
"step": 21880
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.145891604153886e-08,
|
|
"loss": 0.64,
|
|
"step": 21885
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.123664754171331e-08,
|
|
"loss": 0.6428,
|
|
"step": 21890
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.101476899602043e-08,
|
|
"loss": 0.6626,
|
|
"step": 21895
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.079328049662668e-08,
|
|
"loss": 0.6502,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.057218213553661e-08,
|
|
"loss": 0.6694,
|
|
"step": 21905
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.035147400459217e-08,
|
|
"loss": 0.642,
|
|
"step": 21910
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.013115619547404e-08,
|
|
"loss": 0.6864,
|
|
"step": 21915
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.991122879970012e-08,
|
|
"loss": 0.636,
|
|
"step": 21920
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.969169190862644e-08,
|
|
"loss": 0.6338,
|
|
"step": 21925
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.947254561344628e-08,
|
|
"loss": 0.6647,
|
|
"step": 21930
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.9253790005191705e-08,
|
|
"loss": 0.65,
|
|
"step": 21935
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.90354251747317e-08,
|
|
"loss": 0.6274,
|
|
"step": 21940
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.8817451212772815e-08,
|
|
"loss": 0.6559,
|
|
"step": 21945
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.859986820985985e-08,
|
|
"loss": 0.6318,
|
|
"step": 21950
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.838267625637494e-08,
|
|
"loss": 0.6755,
|
|
"step": 21955
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.8165875442537594e-08,
|
|
"loss": 0.6342,
|
|
"step": 21960
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.7949465858404766e-08,
|
|
"loss": 0.6707,
|
|
"step": 21965
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.773344759387155e-08,
|
|
"loss": 0.6339,
|
|
"step": 21970
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.751782073866984e-08,
|
|
"loss": 0.6389,
|
|
"step": 21975
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.730258538236909e-08,
|
|
"loss": 0.6287,
|
|
"step": 21980
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.708774161437635e-08,
|
|
"loss": 0.6844,
|
|
"step": 21985
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.6873289523935775e-08,
|
|
"loss": 0.6497,
|
|
"step": 21990
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.665922920012878e-08,
|
|
"loss": 0.7022,
|
|
"step": 21995
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.644556073187445e-08,
|
|
"loss": 0.6279,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.6232284207928584e-08,
|
|
"loss": 0.6533,
|
|
"step": 22005
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.601939971688452e-08,
|
|
"loss": 0.635,
|
|
"step": 22010
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.580690734717241e-08,
|
|
"loss": 0.6474,
|
|
"step": 22015
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.559480718706e-08,
|
|
"loss": 0.6732,
|
|
"step": 22020
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.5383099324651684e-08,
|
|
"loss": 0.6351,
|
|
"step": 22025
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.5171783847889006e-08,
|
|
"loss": 0.6832,
|
|
"step": 22030
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.496086084455087e-08,
|
|
"loss": 0.6403,
|
|
"step": 22035
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.475033040225274e-08,
|
|
"loss": 0.6231,
|
|
"step": 22040
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.454019260844678e-08,
|
|
"loss": 0.6585,
|
|
"step": 22045
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.433044755042293e-08,
|
|
"loss": 0.6478,
|
|
"step": 22050
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.4121095315307173e-08,
|
|
"loss": 0.627,
|
|
"step": 22055
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.3912135990062726e-08,
|
|
"loss": 0.6431,
|
|
"step": 22060
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.370356966148914e-08,
|
|
"loss": 0.6224,
|
|
"step": 22065
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.3495396416223584e-08,
|
|
"loss": 0.6851,
|
|
"step": 22070
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.3287616340739084e-08,
|
|
"loss": 0.6471,
|
|
"step": 22075
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.308022952134561e-08,
|
|
"loss": 0.6231,
|
|
"step": 22080
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.287323604419014e-08,
|
|
"loss": 0.6358,
|
|
"step": 22085
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.266663599525578e-08,
|
|
"loss": 0.6723,
|
|
"step": 22090
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.246042946036244e-08,
|
|
"loss": 0.674,
|
|
"step": 22095
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.225461652516639e-08,
|
|
"loss": 0.6099,
|
|
"step": 22100
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.204919727516066e-08,
|
|
"loss": 0.6175,
|
|
"step": 22105
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.184417179567468e-08,
|
|
"loss": 0.6383,
|
|
"step": 22110
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.163954017187399e-08,
|
|
"loss": 0.6796,
|
|
"step": 22115
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.143530248876116e-08,
|
|
"loss": 0.6428,
|
|
"step": 22120
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.123145883117452e-08,
|
|
"loss": 0.7068,
|
|
"step": 22125
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.102800928378881e-08,
|
|
"loss": 0.6485,
|
|
"step": 22130
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.082495393111563e-08,
|
|
"loss": 0.6488,
|
|
"step": 22135
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.062229285750208e-08,
|
|
"loss": 0.639,
|
|
"step": 22140
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.0420026147131925e-08,
|
|
"loss": 0.6629,
|
|
"step": 22145
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.021815388402473e-08,
|
|
"loss": 0.6624,
|
|
"step": 22150
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.0016676152036974e-08,
|
|
"loss": 0.6282,
|
|
"step": 22155
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.981559303486038e-08,
|
|
"loss": 0.6319,
|
|
"step": 22160
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.9614904616023134e-08,
|
|
"loss": 0.6655,
|
|
"step": 22165
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.941461097888966e-08,
|
|
"loss": 0.6915,
|
|
"step": 22170
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.921471220666018e-08,
|
|
"loss": 0.5924,
|
|
"step": 22175
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.901520838237061e-08,
|
|
"loss": 0.6699,
|
|
"step": 22180
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.8816099588893436e-08,
|
|
"loss": 0.635,
|
|
"step": 22185
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.86173859089366e-08,
|
|
"loss": 0.6572,
|
|
"step": 22190
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.8419067425044094e-08,
|
|
"loss": 0.6296,
|
|
"step": 22195
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.822114421959545e-08,
|
|
"loss": 0.6598,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.8023616374806564e-08,
|
|
"loss": 0.6451,
|
|
"step": 22205
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.782648397272859e-08,
|
|
"loss": 0.6616,
|
|
"step": 22210
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.762974709524858e-08,
|
|
"loss": 0.6583,
|
|
"step": 22215
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.743340582408961e-08,
|
|
"loss": 0.66,
|
|
"step": 22220
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.723746024080988e-08,
|
|
"loss": 0.6213,
|
|
"step": 22225
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.70419104268035e-08,
|
|
"loss": 0.6316,
|
|
"step": 22230
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.6846756463300054e-08,
|
|
"loss": 0.6505,
|
|
"step": 22235
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.665199843136513e-08,
|
|
"loss": 0.6645,
|
|
"step": 22240
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.645763641189937e-08,
|
|
"loss": 0.6523,
|
|
"step": 22245
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.626367048563884e-08,
|
|
"loss": 0.6516,
|
|
"step": 22250
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.607010073315565e-08,
|
|
"loss": 0.638,
|
|
"step": 22255
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.587692723485681e-08,
|
|
"loss": 0.5924,
|
|
"step": 22260
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.5684150070984804e-08,
|
|
"loss": 0.6316,
|
|
"step": 22265
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.549176932161791e-08,
|
|
"loss": 0.6585,
|
|
"step": 22270
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.5299785066669205e-08,
|
|
"loss": 0.6547,
|
|
"step": 22275
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 4.5108197385887335e-08,
|
|
"loss": 0.6432,
|
|
"step": 22280
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.491700635885598e-08,
|
|
"loss": 0.6075,
|
|
"step": 22285
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.4726212064994493e-08,
|
|
"loss": 0.5892,
|
|
"step": 22290
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.453581458355704e-08,
|
|
"loss": 0.6202,
|
|
"step": 22295
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.4345813993632905e-08,
|
|
"loss": 0.6405,
|
|
"step": 22300
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.4156210374147075e-08,
|
|
"loss": 0.6393,
|
|
"step": 22305
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.396700380385898e-08,
|
|
"loss": 0.6548,
|
|
"step": 22310
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.377819436136332e-08,
|
|
"loss": 0.6813,
|
|
"step": 22315
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.358978212509012e-08,
|
|
"loss": 0.6689,
|
|
"step": 22320
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.340176717330413e-08,
|
|
"loss": 0.6631,
|
|
"step": 22325
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.3214149584105076e-08,
|
|
"loss": 0.6586,
|
|
"step": 22330
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.3026929435427516e-08,
|
|
"loss": 0.6643,
|
|
"step": 22335
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.2840106805041354e-08,
|
|
"loss": 0.654,
|
|
"step": 22340
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.2653681770550955e-08,
|
|
"loss": 0.6688,
|
|
"step": 22345
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.2467654409395484e-08,
|
|
"loss": 0.6761,
|
|
"step": 22350
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.228202479884946e-08,
|
|
"loss": 0.6667,
|
|
"step": 22355
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.209679301602165e-08,
|
|
"loss": 0.6749,
|
|
"step": 22360
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.191195913785561e-08,
|
|
"loss": 0.6396,
|
|
"step": 22365
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.1727523241129606e-08,
|
|
"loss": 0.6696,
|
|
"step": 22370
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.154348540245711e-08,
|
|
"loss": 0.6364,
|
|
"step": 22375
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.135984569828566e-08,
|
|
"loss": 0.6495,
|
|
"step": 22380
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.1176604204897434e-08,
|
|
"loss": 0.6496,
|
|
"step": 22385
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.099376099840968e-08,
|
|
"loss": 0.6268,
|
|
"step": 22390
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.0811316154773515e-08,
|
|
"loss": 0.6527,
|
|
"step": 22395
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.06292697497751e-08,
|
|
"loss": 0.651,
|
|
"step": 22400
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"eval_loss": 0.6161190867424011,
|
|
"eval_runtime": 139.4449,
|
|
"eval_samples_per_second": 16.967,
|
|
"eval_steps_per_second": 2.833,
|
|
"step": 22400
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.044762185903494e-08,
|
|
"loss": 0.6551,
|
|
"step": 22405
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.026637255800813e-08,
|
|
"loss": 0.6677,
|
|
"step": 22410
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.008552192198378e-08,
|
|
"loss": 0.6738,
|
|
"step": 22415
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.9905070026085784e-08,
|
|
"loss": 0.6642,
|
|
"step": 22420
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.9725016945272416e-08,
|
|
"loss": 0.6399,
|
|
"step": 22425
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.9545362754335955e-08,
|
|
"loss": 0.6706,
|
|
"step": 22430
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.936610752790326e-08,
|
|
"loss": 0.686,
|
|
"step": 22435
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.9187251340435653e-08,
|
|
"loss": 0.5905,
|
|
"step": 22440
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.900879426622794e-08,
|
|
"loss": 0.5991,
|
|
"step": 22445
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.8830736379409814e-08,
|
|
"loss": 0.6265,
|
|
"step": 22450
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.865307775394533e-08,
|
|
"loss": 0.6525,
|
|
"step": 22455
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.84758184636319e-08,
|
|
"loss": 0.6228,
|
|
"step": 22460
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.829895858210186e-08,
|
|
"loss": 0.6736,
|
|
"step": 22465
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.812249818282076e-08,
|
|
"loss": 0.6303,
|
|
"step": 22470
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.79464373390892e-08,
|
|
"loss": 0.6367,
|
|
"step": 22475
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.777077612404123e-08,
|
|
"loss": 0.6334,
|
|
"step": 22480
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.75955146106447e-08,
|
|
"loss": 0.6403,
|
|
"step": 22485
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.742065287170215e-08,
|
|
"loss": 0.6089,
|
|
"step": 22490
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.724619097984916e-08,
|
|
"loss": 0.6471,
|
|
"step": 22495
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.707212900755608e-08,
|
|
"loss": 0.6479,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.689846702712651e-08,
|
|
"loss": 0.6429,
|
|
"step": 22505
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.672520511069821e-08,
|
|
"loss": 0.6175,
|
|
"step": 22510
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.655234333024271e-08,
|
|
"loss": 0.6745,
|
|
"step": 22515
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.637988175756512e-08,
|
|
"loss": 0.6604,
|
|
"step": 22520
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 3.6207820464304814e-08,
|
|
"loss": 0.6285,
|
|
"step": 22525
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.603615952193417e-08,
|
|
"loss": 0.6314,
|
|
"step": 22530
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.5864899001759706e-08,
|
|
"loss": 0.6703,
|
|
"step": 22535
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.569403897492185e-08,
|
|
"loss": 0.6586,
|
|
"step": 22540
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.552357951239427e-08,
|
|
"loss": 0.6588,
|
|
"step": 22545
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.5353520684984096e-08,
|
|
"loss": 0.6623,
|
|
"step": 22550
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.51838625633325e-08,
|
|
"loss": 0.6619,
|
|
"step": 22555
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.501460521791399e-08,
|
|
"loss": 0.7056,
|
|
"step": 22560
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.484574871903656e-08,
|
|
"loss": 0.6647,
|
|
"step": 22565
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.467729313684153e-08,
|
|
"loss": 0.6277,
|
|
"step": 22570
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.4509238541304384e-08,
|
|
"loss": 0.6705,
|
|
"step": 22575
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.4341585002232945e-08,
|
|
"loss": 0.654,
|
|
"step": 22580
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.4174332589269385e-08,
|
|
"loss": 0.6352,
|
|
"step": 22585
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.4007481371888915e-08,
|
|
"loss": 0.6517,
|
|
"step": 22590
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.384103141940009e-08,
|
|
"loss": 0.6898,
|
|
"step": 22595
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.3674982800944604e-08,
|
|
"loss": 0.6067,
|
|
"step": 22600
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.350933558549751e-08,
|
|
"loss": 0.6752,
|
|
"step": 22605
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.334408984186765e-08,
|
|
"loss": 0.664,
|
|
"step": 22610
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.317924563869634e-08,
|
|
"loss": 0.6458,
|
|
"step": 22615
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.301480304445836e-08,
|
|
"loss": 0.7108,
|
|
"step": 22620
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.2850762127462184e-08,
|
|
"loss": 0.6561,
|
|
"step": 22625
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.268712295584841e-08,
|
|
"loss": 0.7372,
|
|
"step": 22630
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.252388559759156e-08,
|
|
"loss": 0.6515,
|
|
"step": 22635
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.2361050120499275e-08,
|
|
"loss": 0.6417,
|
|
"step": 22640
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.219861659221168e-08,
|
|
"loss": 0.6537,
|
|
"step": 22645
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.203658508020235e-08,
|
|
"loss": 0.6385,
|
|
"step": 22650
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.1874955651777667e-08,
|
|
"loss": 0.6112,
|
|
"step": 22655
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.171372837407738e-08,
|
|
"loss": 0.6437,
|
|
"step": 22660
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.155290331407357e-08,
|
|
"loss": 0.6222,
|
|
"step": 22665
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.1392480538571574e-08,
|
|
"loss": 0.6439,
|
|
"step": 22670
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.123246011420999e-08,
|
|
"loss": 0.6528,
|
|
"step": 22675
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.107284210745953e-08,
|
|
"loss": 0.6654,
|
|
"step": 22680
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.0913626584624266e-08,
|
|
"loss": 0.6474,
|
|
"step": 22685
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.0754813611840846e-08,
|
|
"loss": 0.6528,
|
|
"step": 22690
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.0596403255078954e-08,
|
|
"loss": 0.6708,
|
|
"step": 22695
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.043839558014083e-08,
|
|
"loss": 0.6365,
|
|
"step": 22700
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.028079065266142e-08,
|
|
"loss": 0.6687,
|
|
"step": 22705
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 3.012358853810859e-08,
|
|
"loss": 0.6645,
|
|
"step": 22710
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.9966789301782535e-08,
|
|
"loss": 0.6366,
|
|
"step": 22715
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.981039300881627e-08,
|
|
"loss": 0.6575,
|
|
"step": 22720
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.9654399724175828e-08,
|
|
"loss": 0.6966,
|
|
"step": 22725
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.949880951265904e-08,
|
|
"loss": 0.6842,
|
|
"step": 22730
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.9343622438896875e-08,
|
|
"loss": 0.6586,
|
|
"step": 22735
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.918883856735277e-08,
|
|
"loss": 0.6629,
|
|
"step": 22740
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.9034457962322513e-08,
|
|
"loss": 0.6283,
|
|
"step": 22745
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.8880480687934473e-08,
|
|
"loss": 0.6272,
|
|
"step": 22750
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.8726906808149486e-08,
|
|
"loss": 0.616,
|
|
"step": 22755
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.857373638676097e-08,
|
|
"loss": 0.6605,
|
|
"step": 22760
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.8420969487394143e-08,
|
|
"loss": 0.6212,
|
|
"step": 22765
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 2.826860617350746e-08,
|
|
"loss": 0.6844,
|
|
"step": 22770
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 2.8116646508391183e-08,
|
|
"loss": 0.645,
|
|
"step": 22775
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 2.7965090555168047e-08,
|
|
"loss": 0.6497,
|
|
"step": 22780
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 2.7813938376793134e-08,
|
|
"loss": 0.6361,
|
|
"step": 22785
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 2.7663190036053552e-08,
|
|
"loss": 0.5859,
|
|
"step": 22790
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 2.75128455955691e-08,
|
|
"loss": 0.6026,
|
|
"step": 22795
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 2.7362905117791268e-08,
|
|
"loss": 0.6694,
|
|
"step": 22800
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 24619,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 400,
|
|
"total_flos": 3187222391169024.0,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|