Files
where-llambo-7b/trainer_state.json
ModelHub XC 9e5bcdea17 初始化项目,由ModelHub XC社区提供模型
Model: amazingvince/where-llambo-7b
Source: Original Platform
2026-05-30 00:31:06 +08:00

27611 lines
548 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.926099870528801,
"eval_steps": 800,
"global_step": 22800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 8.097165991902834e-09,
"loss": 3.5744,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 4.048582995951417e-08,
"loss": 3.6858,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 8.097165991902834e-08,
"loss": 3.6628,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.214574898785425e-07,
"loss": 3.3928,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 1.6194331983805668e-07,
"loss": 3.0895,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 2.0242914979757083e-07,
"loss": 2.6585,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 2.42914979757085e-07,
"loss": 2.1214,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 2.8340080971659917e-07,
"loss": 1.7237,
"step": 35
},
{
"epoch": 0.0,
"learning_rate": 3.2388663967611335e-07,
"loss": 1.2235,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 3.6437246963562754e-07,
"loss": 1.1867,
"step": 45
},
{
"epoch": 0.0,
"learning_rate": 4.0485829959514166e-07,
"loss": 1.1259,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 4.4534412955465585e-07,
"loss": 1.0245,
"step": 55
},
{
"epoch": 0.0,
"learning_rate": 4.8582995951417e-07,
"loss": 0.977,
"step": 60
},
{
"epoch": 0.0,
"learning_rate": 5.263157894736842e-07,
"loss": 0.9554,
"step": 65
},
{
"epoch": 0.0,
"learning_rate": 5.668016194331983e-07,
"loss": 0.9017,
"step": 70
},
{
"epoch": 0.0,
"learning_rate": 6.072874493927125e-07,
"loss": 0.8987,
"step": 75
},
{
"epoch": 0.0,
"learning_rate": 6.477732793522267e-07,
"loss": 0.8863,
"step": 80
},
{
"epoch": 0.0,
"learning_rate": 6.882591093117408e-07,
"loss": 0.9425,
"step": 85
},
{
"epoch": 0.0,
"learning_rate": 7.287449392712551e-07,
"loss": 0.91,
"step": 90
},
{
"epoch": 0.0,
"learning_rate": 7.692307692307693e-07,
"loss": 0.848,
"step": 95
},
{
"epoch": 0.0,
"learning_rate": 8.097165991902833e-07,
"loss": 0.8213,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 8.502024291497975e-07,
"loss": 0.8434,
"step": 105
},
{
"epoch": 0.0,
"learning_rate": 8.906882591093117e-07,
"loss": 0.8409,
"step": 110
},
{
"epoch": 0.0,
"learning_rate": 9.311740890688259e-07,
"loss": 0.8398,
"step": 115
},
{
"epoch": 0.0,
"learning_rate": 9.7165991902834e-07,
"loss": 0.7942,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 1.0121457489878542e-06,
"loss": 0.8221,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 1.0526315789473683e-06,
"loss": 0.8038,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 1.0931174089068826e-06,
"loss": 0.803,
"step": 135
},
{
"epoch": 0.01,
"learning_rate": 1.1336032388663967e-06,
"loss": 0.7935,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 1.1740890688259108e-06,
"loss": 0.8251,
"step": 145
},
{
"epoch": 0.01,
"learning_rate": 1.214574898785425e-06,
"loss": 0.8082,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 1.2550607287449393e-06,
"loss": 0.7897,
"step": 155
},
{
"epoch": 0.01,
"learning_rate": 1.2955465587044534e-06,
"loss": 0.8286,
"step": 160
},
{
"epoch": 0.01,
"learning_rate": 1.3360323886639675e-06,
"loss": 0.7742,
"step": 165
},
{
"epoch": 0.01,
"learning_rate": 1.3765182186234816e-06,
"loss": 0.787,
"step": 170
},
{
"epoch": 0.01,
"learning_rate": 1.4170040485829959e-06,
"loss": 0.7862,
"step": 175
},
{
"epoch": 0.01,
"learning_rate": 1.4574898785425101e-06,
"loss": 0.7721,
"step": 180
},
{
"epoch": 0.01,
"learning_rate": 1.4979757085020242e-06,
"loss": 0.7554,
"step": 185
},
{
"epoch": 0.01,
"learning_rate": 1.5384615384615385e-06,
"loss": 0.7941,
"step": 190
},
{
"epoch": 0.01,
"learning_rate": 1.5789473684210526e-06,
"loss": 0.7759,
"step": 195
},
{
"epoch": 0.01,
"learning_rate": 1.6194331983805667e-06,
"loss": 0.7249,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 1.6599190283400807e-06,
"loss": 0.7673,
"step": 205
},
{
"epoch": 0.01,
"learning_rate": 1.700404858299595e-06,
"loss": 0.7922,
"step": 210
},
{
"epoch": 0.01,
"learning_rate": 1.7408906882591093e-06,
"loss": 0.7546,
"step": 215
},
{
"epoch": 0.01,
"learning_rate": 1.7813765182186234e-06,
"loss": 0.7709,
"step": 220
},
{
"epoch": 0.01,
"learning_rate": 1.8218623481781377e-06,
"loss": 0.7383,
"step": 225
},
{
"epoch": 0.01,
"learning_rate": 1.8623481781376518e-06,
"loss": 0.7608,
"step": 230
},
{
"epoch": 0.01,
"learning_rate": 1.9028340080971658e-06,
"loss": 0.7663,
"step": 235
},
{
"epoch": 0.01,
"learning_rate": 1.94331983805668e-06,
"loss": 0.7567,
"step": 240
},
{
"epoch": 0.01,
"learning_rate": 1.983805668016194e-06,
"loss": 0.751,
"step": 245
},
{
"epoch": 0.01,
"learning_rate": 1.9999999252295637e-06,
"loss": 0.8148,
"step": 250
},
{
"epoch": 0.01,
"learning_rate": 1.9999994682991603e-06,
"loss": 0.7634,
"step": 255
},
{
"epoch": 0.01,
"learning_rate": 1.999998595977674e-06,
"loss": 0.7448,
"step": 260
},
{
"epoch": 0.01,
"learning_rate": 1.999997308265467e-06,
"loss": 0.7508,
"step": 265
},
{
"epoch": 0.01,
"learning_rate": 1.999995605163075e-06,
"loss": 0.7696,
"step": 270
},
{
"epoch": 0.01,
"learning_rate": 1.9999934866712048e-06,
"loss": 0.7676,
"step": 275
},
{
"epoch": 0.01,
"learning_rate": 1.9999909527907367e-06,
"loss": 0.7601,
"step": 280
},
{
"epoch": 0.01,
"learning_rate": 1.9999880035227236e-06,
"loss": 0.7779,
"step": 285
},
{
"epoch": 0.01,
"learning_rate": 1.9999846388683895e-06,
"loss": 0.7768,
"step": 290
},
{
"epoch": 0.01,
"learning_rate": 1.9999808588291327e-06,
"loss": 0.7713,
"step": 295
},
{
"epoch": 0.01,
"learning_rate": 1.999976663406524e-06,
"loss": 0.7666,
"step": 300
},
{
"epoch": 0.01,
"learning_rate": 1.999972052602305e-06,
"loss": 0.742,
"step": 305
},
{
"epoch": 0.01,
"learning_rate": 1.999967026418392e-06,
"loss": 0.783,
"step": 310
},
{
"epoch": 0.01,
"learning_rate": 1.999961584856872e-06,
"loss": 0.7269,
"step": 315
},
{
"epoch": 0.01,
"learning_rate": 1.9999557279200056e-06,
"loss": 0.7336,
"step": 320
},
{
"epoch": 0.01,
"learning_rate": 1.9999494556102263e-06,
"loss": 0.7072,
"step": 325
},
{
"epoch": 0.01,
"learning_rate": 1.9999427679301387e-06,
"loss": 0.7709,
"step": 330
},
{
"epoch": 0.01,
"learning_rate": 1.999935664882522e-06,
"loss": 0.7237,
"step": 335
},
{
"epoch": 0.01,
"learning_rate": 1.9999281464703247e-06,
"loss": 0.719,
"step": 340
},
{
"epoch": 0.01,
"learning_rate": 1.999920212696672e-06,
"loss": 0.748,
"step": 345
},
{
"epoch": 0.01,
"learning_rate": 1.999911863564859e-06,
"loss": 0.7167,
"step": 350
},
{
"epoch": 0.01,
"learning_rate": 1.9999030990783527e-06,
"loss": 0.7151,
"step": 355
},
{
"epoch": 0.01,
"learning_rate": 1.999893919240795e-06,
"loss": 0.7095,
"step": 360
},
{
"epoch": 0.01,
"learning_rate": 1.9998843240559986e-06,
"loss": 0.7703,
"step": 365
},
{
"epoch": 0.02,
"learning_rate": 1.9998743135279497e-06,
"loss": 0.7456,
"step": 370
},
{
"epoch": 0.02,
"learning_rate": 1.999863887660806e-06,
"loss": 0.7532,
"step": 375
},
{
"epoch": 0.02,
"learning_rate": 1.999853046458899e-06,
"loss": 0.7014,
"step": 380
},
{
"epoch": 0.02,
"learning_rate": 1.9998417899267313e-06,
"loss": 0.7629,
"step": 385
},
{
"epoch": 0.02,
"learning_rate": 1.999830118068979e-06,
"loss": 0.7329,
"step": 390
},
{
"epoch": 0.02,
"learning_rate": 1.999818030890491e-06,
"loss": 0.723,
"step": 395
},
{
"epoch": 0.02,
"learning_rate": 1.999805528396288e-06,
"loss": 0.7549,
"step": 400
},
{
"epoch": 0.02,
"learning_rate": 1.9997926105915627e-06,
"loss": 0.7121,
"step": 405
},
{
"epoch": 0.02,
"learning_rate": 1.999779277481682e-06,
"loss": 0.7506,
"step": 410
},
{
"epoch": 0.02,
"learning_rate": 1.9997655290721834e-06,
"loss": 0.7284,
"step": 415
},
{
"epoch": 0.02,
"learning_rate": 1.9997513653687786e-06,
"loss": 0.7344,
"step": 420
},
{
"epoch": 0.02,
"learning_rate": 1.999736786377351e-06,
"loss": 0.7684,
"step": 425
},
{
"epoch": 0.02,
"learning_rate": 1.9997217921039567e-06,
"loss": 0.7427,
"step": 430
},
{
"epoch": 0.02,
"learning_rate": 1.9997063825548237e-06,
"loss": 0.7139,
"step": 435
},
{
"epoch": 0.02,
"learning_rate": 1.9996905577363533e-06,
"loss": 0.761,
"step": 440
},
{
"epoch": 0.02,
"learning_rate": 1.9996743176551186e-06,
"loss": 0.7545,
"step": 445
},
{
"epoch": 0.02,
"learning_rate": 1.999657662317866e-06,
"loss": 0.7431,
"step": 450
},
{
"epoch": 0.02,
"learning_rate": 1.999640591731515e-06,
"loss": 0.7225,
"step": 455
},
{
"epoch": 0.02,
"learning_rate": 1.999623105903154e-06,
"loss": 0.7284,
"step": 460
},
{
"epoch": 0.02,
"learning_rate": 1.999605204840049e-06,
"loss": 0.76,
"step": 465
},
{
"epoch": 0.02,
"learning_rate": 1.9995868885496343e-06,
"loss": 0.7413,
"step": 470
},
{
"epoch": 0.02,
"learning_rate": 1.9995681570395195e-06,
"loss": 0.7837,
"step": 475
},
{
"epoch": 0.02,
"learning_rate": 1.9995490103174847e-06,
"loss": 0.7347,
"step": 480
},
{
"epoch": 0.02,
"learning_rate": 1.999529448391483e-06,
"loss": 0.7576,
"step": 485
},
{
"epoch": 0.02,
"learning_rate": 1.9995094712696413e-06,
"loss": 0.7665,
"step": 490
},
{
"epoch": 0.02,
"learning_rate": 1.9994890789602576e-06,
"loss": 0.7353,
"step": 495
},
{
"epoch": 0.02,
"learning_rate": 1.999468271471802e-06,
"loss": 0.7344,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 1.9994470488129185e-06,
"loss": 0.7476,
"step": 505
},
{
"epoch": 0.02,
"learning_rate": 1.9994254109924223e-06,
"loss": 0.7257,
"step": 510
},
{
"epoch": 0.02,
"learning_rate": 1.9994033580193017e-06,
"loss": 0.7306,
"step": 515
},
{
"epoch": 0.02,
"learning_rate": 1.999380889902718e-06,
"loss": 0.7115,
"step": 520
},
{
"epoch": 0.02,
"learning_rate": 1.9993580066520034e-06,
"loss": 0.7452,
"step": 525
},
{
"epoch": 0.02,
"learning_rate": 1.9993347082766636e-06,
"loss": 0.7523,
"step": 530
},
{
"epoch": 0.02,
"learning_rate": 1.9993109947863764e-06,
"loss": 0.7091,
"step": 535
},
{
"epoch": 0.02,
"learning_rate": 1.999286866190993e-06,
"loss": 0.7383,
"step": 540
},
{
"epoch": 0.02,
"learning_rate": 1.999262322500535e-06,
"loss": 0.7043,
"step": 545
},
{
"epoch": 0.02,
"learning_rate": 1.9992373637251982e-06,
"loss": 0.7098,
"step": 550
},
{
"epoch": 0.02,
"learning_rate": 1.999211989875351e-06,
"loss": 0.7142,
"step": 555
},
{
"epoch": 0.02,
"learning_rate": 1.999186200961532e-06,
"loss": 0.7424,
"step": 560
},
{
"epoch": 0.02,
"learning_rate": 1.9991599969944552e-06,
"loss": 0.7348,
"step": 565
},
{
"epoch": 0.02,
"learning_rate": 1.9991333779850043e-06,
"loss": 0.7126,
"step": 570
},
{
"epoch": 0.02,
"learning_rate": 1.999106343944237e-06,
"loss": 0.7341,
"step": 575
},
{
"epoch": 0.02,
"learning_rate": 1.9990788948833833e-06,
"loss": 0.7445,
"step": 580
},
{
"epoch": 0.02,
"learning_rate": 1.999051030813845e-06,
"loss": 0.7181,
"step": 585
},
{
"epoch": 0.02,
"learning_rate": 1.999022751747197e-06,
"loss": 0.7295,
"step": 590
},
{
"epoch": 0.02,
"learning_rate": 1.998994057695185e-06,
"loss": 0.7159,
"step": 595
},
{
"epoch": 0.02,
"learning_rate": 1.99896494866973e-06,
"loss": 0.6844,
"step": 600
},
{
"epoch": 0.02,
"learning_rate": 1.9989354246829222e-06,
"loss": 0.7511,
"step": 605
},
{
"epoch": 0.02,
"learning_rate": 1.9989054857470267e-06,
"loss": 0.7322,
"step": 610
},
{
"epoch": 0.02,
"learning_rate": 1.9988751318744787e-06,
"loss": 0.7829,
"step": 615
},
{
"epoch": 0.03,
"learning_rate": 1.998844363077888e-06,
"loss": 0.7229,
"step": 620
},
{
"epoch": 0.03,
"learning_rate": 1.998813179370035e-06,
"loss": 0.738,
"step": 625
},
{
"epoch": 0.03,
"learning_rate": 1.9987815807638733e-06,
"loss": 0.6934,
"step": 630
},
{
"epoch": 0.03,
"learning_rate": 1.9987495672725294e-06,
"loss": 0.7005,
"step": 635
},
{
"epoch": 0.03,
"learning_rate": 1.9987171389093e-06,
"loss": 0.7692,
"step": 640
},
{
"epoch": 0.03,
"learning_rate": 1.998684295687657e-06,
"loss": 0.7101,
"step": 645
},
{
"epoch": 0.03,
"learning_rate": 1.998651037621242e-06,
"loss": 0.7813,
"step": 650
},
{
"epoch": 0.03,
"learning_rate": 1.9986173647238715e-06,
"loss": 0.7526,
"step": 655
},
{
"epoch": 0.03,
"learning_rate": 1.9985832770095313e-06,
"loss": 0.7235,
"step": 660
},
{
"epoch": 0.03,
"learning_rate": 1.998548774492382e-06,
"loss": 0.7201,
"step": 665
},
{
"epoch": 0.03,
"learning_rate": 1.9985138571867557e-06,
"loss": 0.7303,
"step": 670
},
{
"epoch": 0.03,
"learning_rate": 1.998478525107157e-06,
"loss": 0.7375,
"step": 675
},
{
"epoch": 0.03,
"learning_rate": 1.998442778268262e-06,
"loss": 0.7123,
"step": 680
},
{
"epoch": 0.03,
"learning_rate": 1.99840661668492e-06,
"loss": 0.7541,
"step": 685
},
{
"epoch": 0.03,
"learning_rate": 1.998370040372151e-06,
"loss": 0.7685,
"step": 690
},
{
"epoch": 0.03,
"learning_rate": 1.99833304934515e-06,
"loss": 0.7029,
"step": 695
},
{
"epoch": 0.03,
"learning_rate": 1.9982956436192827e-06,
"loss": 0.7797,
"step": 700
},
{
"epoch": 0.03,
"learning_rate": 1.9982578232100866e-06,
"loss": 0.7326,
"step": 705
},
{
"epoch": 0.03,
"learning_rate": 1.9982195881332714e-06,
"loss": 0.773,
"step": 710
},
{
"epoch": 0.03,
"learning_rate": 1.9981809384047207e-06,
"loss": 0.741,
"step": 715
},
{
"epoch": 0.03,
"learning_rate": 1.9981418740404886e-06,
"loss": 0.7518,
"step": 720
},
{
"epoch": 0.03,
"learning_rate": 1.998102395056802e-06,
"loss": 0.7338,
"step": 725
},
{
"epoch": 0.03,
"learning_rate": 1.998062501470061e-06,
"loss": 0.7192,
"step": 730
},
{
"epoch": 0.03,
"learning_rate": 1.998022193296836e-06,
"loss": 0.7429,
"step": 735
},
{
"epoch": 0.03,
"learning_rate": 1.9979814705538715e-06,
"loss": 0.6953,
"step": 740
},
{
"epoch": 0.03,
"learning_rate": 1.997940333258083e-06,
"loss": 0.7265,
"step": 745
},
{
"epoch": 0.03,
"learning_rate": 1.9978987814265583e-06,
"loss": 0.7105,
"step": 750
},
{
"epoch": 0.03,
"learning_rate": 1.997856815076558e-06,
"loss": 0.6994,
"step": 755
},
{
"epoch": 0.03,
"learning_rate": 1.9978144342255147e-06,
"loss": 0.7008,
"step": 760
},
{
"epoch": 0.03,
"learning_rate": 1.9977716388910325e-06,
"loss": 0.7301,
"step": 765
},
{
"epoch": 0.03,
"learning_rate": 1.997728429090889e-06,
"loss": 0.7662,
"step": 770
},
{
"epoch": 0.03,
"learning_rate": 1.9976848048430323e-06,
"loss": 0.7428,
"step": 775
},
{
"epoch": 0.03,
"learning_rate": 1.9976407661655844e-06,
"loss": 0.706,
"step": 780
},
{
"epoch": 0.03,
"learning_rate": 1.997596313076838e-06,
"loss": 0.6853,
"step": 785
},
{
"epoch": 0.03,
"learning_rate": 1.9975514455952584e-06,
"loss": 0.7363,
"step": 790
},
{
"epoch": 0.03,
"learning_rate": 1.9975061637394834e-06,
"loss": 0.7217,
"step": 795
},
{
"epoch": 0.03,
"learning_rate": 1.997460467528323e-06,
"loss": 0.7161,
"step": 800
},
{
"epoch": 0.03,
"eval_loss": 0.6896045207977295,
"eval_runtime": 140.4315,
"eval_samples_per_second": 16.848,
"eval_steps_per_second": 2.813,
"step": 800
},
{
"epoch": 0.03,
"learning_rate": 1.997414356980759e-06,
"loss": 0.7911,
"step": 805
},
{
"epoch": 0.03,
"learning_rate": 1.9973678321159443e-06,
"loss": 0.7037,
"step": 810
},
{
"epoch": 0.03,
"learning_rate": 1.9973208929532063e-06,
"loss": 0.7083,
"step": 815
},
{
"epoch": 0.03,
"learning_rate": 1.9972735395120418e-06,
"loss": 0.7183,
"step": 820
},
{
"epoch": 0.03,
"learning_rate": 1.997225771812122e-06,
"loss": 0.7227,
"step": 825
},
{
"epoch": 0.03,
"learning_rate": 1.9971775898732893e-06,
"loss": 0.7271,
"step": 830
},
{
"epoch": 0.03,
"learning_rate": 1.9971289937155577e-06,
"loss": 0.7271,
"step": 835
},
{
"epoch": 0.03,
"learning_rate": 1.997079983359113e-06,
"loss": 0.7065,
"step": 840
},
{
"epoch": 0.03,
"learning_rate": 1.9970305588243145e-06,
"loss": 0.706,
"step": 845
},
{
"epoch": 0.03,
"learning_rate": 1.9969807201316925e-06,
"loss": 0.7,
"step": 850
},
{
"epoch": 0.03,
"learning_rate": 1.9969304673019494e-06,
"loss": 0.7165,
"step": 855
},
{
"epoch": 0.03,
"learning_rate": 1.99687980035596e-06,
"loss": 0.729,
"step": 860
},
{
"epoch": 0.04,
"learning_rate": 1.996828719314771e-06,
"loss": 0.7199,
"step": 865
},
{
"epoch": 0.04,
"learning_rate": 1.996777224199601e-06,
"loss": 0.7041,
"step": 870
},
{
"epoch": 0.04,
"learning_rate": 1.99672531503184e-06,
"loss": 0.735,
"step": 875
},
{
"epoch": 0.04,
"learning_rate": 1.996672991833051e-06,
"loss": 0.7153,
"step": 880
},
{
"epoch": 0.04,
"learning_rate": 1.996620254624969e-06,
"loss": 0.714,
"step": 885
},
{
"epoch": 0.04,
"learning_rate": 1.9965671034295e-06,
"loss": 0.7309,
"step": 890
},
{
"epoch": 0.04,
"learning_rate": 1.996513538268723e-06,
"loss": 0.7808,
"step": 895
},
{
"epoch": 0.04,
"learning_rate": 1.9964595591648883e-06,
"loss": 0.7407,
"step": 900
},
{
"epoch": 0.04,
"learning_rate": 1.9964051661404185e-06,
"loss": 0.6831,
"step": 905
},
{
"epoch": 0.04,
"learning_rate": 1.9963503592179078e-06,
"loss": 0.7178,
"step": 910
},
{
"epoch": 0.04,
"learning_rate": 1.996295138420122e-06,
"loss": 0.7607,
"step": 915
},
{
"epoch": 0.04,
"learning_rate": 1.9962395037700007e-06,
"loss": 0.747,
"step": 920
},
{
"epoch": 0.04,
"learning_rate": 1.996183455290653e-06,
"loss": 0.6911,
"step": 925
},
{
"epoch": 0.04,
"learning_rate": 1.996126993005361e-06,
"loss": 0.7038,
"step": 930
},
{
"epoch": 0.04,
"learning_rate": 1.996070116937579e-06,
"loss": 0.7195,
"step": 935
},
{
"epoch": 0.04,
"learning_rate": 1.9960128271109326e-06,
"loss": 0.6974,
"step": 940
},
{
"epoch": 0.04,
"learning_rate": 1.9959551235492195e-06,
"loss": 0.7399,
"step": 945
},
{
"epoch": 0.04,
"learning_rate": 1.9958970062764095e-06,
"loss": 0.7475,
"step": 950
},
{
"epoch": 0.04,
"learning_rate": 1.9958384753166437e-06,
"loss": 0.7091,
"step": 955
},
{
"epoch": 0.04,
"learning_rate": 1.995779530694236e-06,
"loss": 0.6908,
"step": 960
},
{
"epoch": 0.04,
"learning_rate": 1.9957201724336704e-06,
"loss": 0.7052,
"step": 965
},
{
"epoch": 0.04,
"learning_rate": 1.9956604005596043e-06,
"loss": 0.6963,
"step": 970
},
{
"epoch": 0.04,
"learning_rate": 1.9956002150968667e-06,
"loss": 0.7064,
"step": 975
},
{
"epoch": 0.04,
"learning_rate": 1.9955396160704582e-06,
"loss": 0.6804,
"step": 980
},
{
"epoch": 0.04,
"learning_rate": 1.99547860350555e-06,
"loss": 0.6759,
"step": 985
},
{
"epoch": 0.04,
"learning_rate": 1.995417177427488e-06,
"loss": 0.7175,
"step": 990
},
{
"epoch": 0.04,
"learning_rate": 1.9953553378617866e-06,
"loss": 0.6926,
"step": 995
},
{
"epoch": 0.04,
"learning_rate": 1.995293084834134e-06,
"loss": 0.7109,
"step": 1000
},
{
"epoch": 0.04,
"learning_rate": 1.9952304183703893e-06,
"loss": 0.7129,
"step": 1005
},
{
"epoch": 0.04,
"learning_rate": 1.9951673384965835e-06,
"loss": 0.7117,
"step": 1010
},
{
"epoch": 0.04,
"learning_rate": 1.99510384523892e-06,
"loss": 0.7694,
"step": 1015
},
{
"epoch": 0.04,
"learning_rate": 1.995039938623773e-06,
"loss": 0.7381,
"step": 1020
},
{
"epoch": 0.04,
"learning_rate": 1.9949756186776893e-06,
"loss": 0.722,
"step": 1025
},
{
"epoch": 0.04,
"learning_rate": 1.9949108854273855e-06,
"loss": 0.7288,
"step": 1030
},
{
"epoch": 0.04,
"learning_rate": 1.9948457388997528e-06,
"loss": 0.7045,
"step": 1035
},
{
"epoch": 0.04,
"learning_rate": 1.994780179121851e-06,
"loss": 0.7623,
"step": 1040
},
{
"epoch": 0.04,
"learning_rate": 1.994714206120914e-06,
"loss": 0.725,
"step": 1045
},
{
"epoch": 0.04,
"learning_rate": 1.9946478199243466e-06,
"loss": 0.7203,
"step": 1050
},
{
"epoch": 0.04,
"learning_rate": 1.9945810205597246e-06,
"loss": 0.7011,
"step": 1055
},
{
"epoch": 0.04,
"learning_rate": 1.9945138080547957e-06,
"loss": 0.6946,
"step": 1060
},
{
"epoch": 0.04,
"learning_rate": 1.99444618243748e-06,
"loss": 0.7151,
"step": 1065
},
{
"epoch": 0.04,
"learning_rate": 1.994378143735868e-06,
"loss": 0.7074,
"step": 1070
},
{
"epoch": 0.04,
"learning_rate": 1.9943096919782225e-06,
"loss": 0.7,
"step": 1075
},
{
"epoch": 0.04,
"learning_rate": 1.994240827192978e-06,
"loss": 0.6957,
"step": 1080
},
{
"epoch": 0.04,
"learning_rate": 1.9941715494087408e-06,
"loss": 0.7348,
"step": 1085
},
{
"epoch": 0.04,
"learning_rate": 1.9941018586542866e-06,
"loss": 0.6984,
"step": 1090
},
{
"epoch": 0.04,
"learning_rate": 1.9940317549585665e-06,
"loss": 0.7252,
"step": 1095
},
{
"epoch": 0.04,
"learning_rate": 1.9939612383506993e-06,
"loss": 0.7706,
"step": 1100
},
{
"epoch": 0.04,
"learning_rate": 1.993890308859978e-06,
"loss": 0.7261,
"step": 1105
},
{
"epoch": 0.05,
"learning_rate": 1.9938189665158654e-06,
"loss": 0.6879,
"step": 1110
},
{
"epoch": 0.05,
"learning_rate": 1.9937472113479966e-06,
"loss": 0.7088,
"step": 1115
},
{
"epoch": 0.05,
"learning_rate": 1.9936750433861787e-06,
"loss": 0.7428,
"step": 1120
},
{
"epoch": 0.05,
"learning_rate": 1.993602462660389e-06,
"loss": 0.7111,
"step": 1125
},
{
"epoch": 0.05,
"learning_rate": 1.993529469200777e-06,
"loss": 0.7027,
"step": 1130
},
{
"epoch": 0.05,
"learning_rate": 1.993456063037664e-06,
"loss": 0.6969,
"step": 1135
},
{
"epoch": 0.05,
"learning_rate": 1.9933822442015416e-06,
"loss": 0.7343,
"step": 1140
},
{
"epoch": 0.05,
"learning_rate": 1.993308012723074e-06,
"loss": 0.7174,
"step": 1145
},
{
"epoch": 0.05,
"learning_rate": 1.993233368633096e-06,
"loss": 0.6997,
"step": 1150
},
{
"epoch": 0.05,
"learning_rate": 1.993158311962614e-06,
"loss": 0.693,
"step": 1155
},
{
"epoch": 0.05,
"learning_rate": 1.9930828427428066e-06,
"loss": 0.7136,
"step": 1160
},
{
"epoch": 0.05,
"learning_rate": 1.9930069610050224e-06,
"loss": 0.7211,
"step": 1165
},
{
"epoch": 0.05,
"learning_rate": 1.9929306667807823e-06,
"loss": 0.7144,
"step": 1170
},
{
"epoch": 0.05,
"learning_rate": 1.992853960101778e-06,
"loss": 0.6787,
"step": 1175
},
{
"epoch": 0.05,
"learning_rate": 1.9927768409998733e-06,
"loss": 0.7348,
"step": 1180
},
{
"epoch": 0.05,
"learning_rate": 1.992699309507102e-06,
"loss": 0.6718,
"step": 1185
},
{
"epoch": 0.05,
"learning_rate": 1.992621365655671e-06,
"loss": 0.7146,
"step": 1190
},
{
"epoch": 0.05,
"learning_rate": 1.9925430094779566e-06,
"loss": 0.6982,
"step": 1195
},
{
"epoch": 0.05,
"learning_rate": 1.9924642410065075e-06,
"loss": 0.7379,
"step": 1200
},
{
"epoch": 0.05,
"learning_rate": 1.992385060274044e-06,
"loss": 0.6983,
"step": 1205
},
{
"epoch": 0.05,
"learning_rate": 1.9923054673134564e-06,
"loss": 0.7893,
"step": 1210
},
{
"epoch": 0.05,
"learning_rate": 1.992225462157807e-06,
"loss": 0.714,
"step": 1215
},
{
"epoch": 0.05,
"learning_rate": 1.99214504484033e-06,
"loss": 0.7394,
"step": 1220
},
{
"epoch": 0.05,
"learning_rate": 1.9920642153944288e-06,
"loss": 0.7238,
"step": 1225
},
{
"epoch": 0.05,
"learning_rate": 1.9919829738536806e-06,
"loss": 0.6847,
"step": 1230
},
{
"epoch": 0.05,
"learning_rate": 1.991901320251831e-06,
"loss": 0.6936,
"step": 1235
},
{
"epoch": 0.05,
"learning_rate": 1.9918192546227995e-06,
"loss": 0.7271,
"step": 1240
},
{
"epoch": 0.05,
"learning_rate": 1.991736777000675e-06,
"loss": 0.7416,
"step": 1245
},
{
"epoch": 0.05,
"learning_rate": 1.9916538874197176e-06,
"loss": 0.7637,
"step": 1250
},
{
"epoch": 0.05,
"learning_rate": 1.9915705859143594e-06,
"loss": 0.6722,
"step": 1255
},
{
"epoch": 0.05,
"learning_rate": 1.9914868725192025e-06,
"loss": 0.6943,
"step": 1260
},
{
"epoch": 0.05,
"learning_rate": 1.991402747269022e-06,
"loss": 0.7433,
"step": 1265
},
{
"epoch": 0.05,
"learning_rate": 1.991318210198761e-06,
"loss": 0.7015,
"step": 1270
},
{
"epoch": 0.05,
"learning_rate": 1.991233261343537e-06,
"loss": 0.6772,
"step": 1275
},
{
"epoch": 0.05,
"learning_rate": 1.9911479007386364e-06,
"loss": 0.7278,
"step": 1280
},
{
"epoch": 0.05,
"learning_rate": 1.991062128419517e-06,
"loss": 0.7471,
"step": 1285
},
{
"epoch": 0.05,
"learning_rate": 1.9909759444218085e-06,
"loss": 0.7234,
"step": 1290
},
{
"epoch": 0.05,
"learning_rate": 1.9908893487813106e-06,
"loss": 0.7118,
"step": 1295
},
{
"epoch": 0.05,
"learning_rate": 1.990802341533994e-06,
"loss": 0.7351,
"step": 1300
},
{
"epoch": 0.05,
"learning_rate": 1.9907149227160016e-06,
"loss": 0.7084,
"step": 1305
},
{
"epoch": 0.05,
"learning_rate": 1.9906270923636457e-06,
"loss": 0.7174,
"step": 1310
},
{
"epoch": 0.05,
"learning_rate": 1.9905388505134107e-06,
"loss": 0.6935,
"step": 1315
},
{
"epoch": 0.05,
"learning_rate": 1.990450197201951e-06,
"loss": 0.7004,
"step": 1320
},
{
"epoch": 0.05,
"learning_rate": 1.990361132466093e-06,
"loss": 0.7077,
"step": 1325
},
{
"epoch": 0.05,
"learning_rate": 1.9902716563428335e-06,
"loss": 0.7226,
"step": 1330
},
{
"epoch": 0.05,
"learning_rate": 1.9901817688693395e-06,
"loss": 0.7025,
"step": 1335
},
{
"epoch": 0.05,
"learning_rate": 1.99009147008295e-06,
"loss": 0.7139,
"step": 1340
},
{
"epoch": 0.05,
"learning_rate": 1.9900007600211735e-06,
"loss": 0.6609,
"step": 1345
},
{
"epoch": 0.05,
"learning_rate": 1.9899096387216914e-06,
"loss": 0.7452,
"step": 1350
},
{
"epoch": 0.06,
"learning_rate": 1.9898181062223536e-06,
"loss": 0.7111,
"step": 1355
},
{
"epoch": 0.06,
"learning_rate": 1.9897261625611822e-06,
"loss": 0.6925,
"step": 1360
},
{
"epoch": 0.06,
"learning_rate": 1.9896338077763704e-06,
"loss": 0.7097,
"step": 1365
},
{
"epoch": 0.06,
"learning_rate": 1.989541041906281e-06,
"loss": 0.7146,
"step": 1370
},
{
"epoch": 0.06,
"learning_rate": 1.9894478649894484e-06,
"loss": 0.704,
"step": 1375
},
{
"epoch": 0.06,
"learning_rate": 1.989354277064577e-06,
"loss": 0.7466,
"step": 1380
},
{
"epoch": 0.06,
"learning_rate": 1.9892602781705427e-06,
"loss": 0.6958,
"step": 1385
},
{
"epoch": 0.06,
"learning_rate": 1.9891658683463922e-06,
"loss": 0.7421,
"step": 1390
},
{
"epoch": 0.06,
"learning_rate": 1.989071047631342e-06,
"loss": 0.6658,
"step": 1395
},
{
"epoch": 0.06,
"learning_rate": 1.98897581606478e-06,
"loss": 0.6868,
"step": 1400
},
{
"epoch": 0.06,
"learning_rate": 1.988880173686265e-06,
"loss": 0.7437,
"step": 1405
},
{
"epoch": 0.06,
"learning_rate": 1.988784120535525e-06,
"loss": 0.7484,
"step": 1410
},
{
"epoch": 0.06,
"learning_rate": 1.988687656652461e-06,
"loss": 0.7063,
"step": 1415
},
{
"epoch": 0.06,
"learning_rate": 1.9885907820771415e-06,
"loss": 0.713,
"step": 1420
},
{
"epoch": 0.06,
"learning_rate": 1.988493496849809e-06,
"loss": 0.7313,
"step": 1425
},
{
"epoch": 0.06,
"learning_rate": 1.9883958010108736e-06,
"loss": 0.6987,
"step": 1430
},
{
"epoch": 0.06,
"learning_rate": 1.9882976946009186e-06,
"loss": 0.7089,
"step": 1435
},
{
"epoch": 0.06,
"learning_rate": 1.9881991776606956e-06,
"loss": 0.6492,
"step": 1440
},
{
"epoch": 0.06,
"learning_rate": 1.9881002502311285e-06,
"loss": 0.6538,
"step": 1445
},
{
"epoch": 0.06,
"learning_rate": 1.9880009123533095e-06,
"loss": 0.7096,
"step": 1450
},
{
"epoch": 0.06,
"learning_rate": 1.9879011640685043e-06,
"loss": 0.7329,
"step": 1455
},
{
"epoch": 0.06,
"learning_rate": 1.9878010054181463e-06,
"loss": 0.7414,
"step": 1460
},
{
"epoch": 0.06,
"learning_rate": 1.9877004364438414e-06,
"loss": 0.7089,
"step": 1465
},
{
"epoch": 0.06,
"learning_rate": 1.987599457187365e-06,
"loss": 0.738,
"step": 1470
},
{
"epoch": 0.06,
"learning_rate": 1.9874980676906617e-06,
"loss": 0.7078,
"step": 1475
},
{
"epoch": 0.06,
"learning_rate": 1.9873962679958494e-06,
"loss": 0.6987,
"step": 1480
},
{
"epoch": 0.06,
"learning_rate": 1.987294058145214e-06,
"loss": 0.7456,
"step": 1485
},
{
"epoch": 0.06,
"learning_rate": 1.987191438181213e-06,
"loss": 0.7402,
"step": 1490
},
{
"epoch": 0.06,
"learning_rate": 1.987088408146473e-06,
"loss": 0.7308,
"step": 1495
},
{
"epoch": 0.06,
"learning_rate": 1.986984968083793e-06,
"loss": 0.7197,
"step": 1500
},
{
"epoch": 0.06,
"learning_rate": 1.9868811180361402e-06,
"loss": 0.7386,
"step": 1505
},
{
"epoch": 0.06,
"learning_rate": 1.9867768580466536e-06,
"loss": 0.7024,
"step": 1510
},
{
"epoch": 0.06,
"learning_rate": 1.986672188158641e-06,
"loss": 0.7241,
"step": 1515
},
{
"epoch": 0.06,
"learning_rate": 1.9865671084155826e-06,
"loss": 0.7006,
"step": 1520
},
{
"epoch": 0.06,
"learning_rate": 1.986461618861127e-06,
"loss": 0.7035,
"step": 1525
},
{
"epoch": 0.06,
"learning_rate": 1.986355719539093e-06,
"loss": 0.723,
"step": 1530
},
{
"epoch": 0.06,
"learning_rate": 1.9862494104934717e-06,
"loss": 0.7184,
"step": 1535
},
{
"epoch": 0.06,
"learning_rate": 1.9861426917684214e-06,
"loss": 0.7018,
"step": 1540
},
{
"epoch": 0.06,
"learning_rate": 1.986035563408273e-06,
"loss": 0.6943,
"step": 1545
},
{
"epoch": 0.06,
"learning_rate": 1.9859280254575268e-06,
"loss": 0.7434,
"step": 1550
},
{
"epoch": 0.06,
"learning_rate": 1.9858200779608526e-06,
"loss": 0.7122,
"step": 1555
},
{
"epoch": 0.06,
"learning_rate": 1.9857117209630913e-06,
"loss": 0.7187,
"step": 1560
},
{
"epoch": 0.06,
"learning_rate": 1.9856029545092536e-06,
"loss": 0.6825,
"step": 1565
},
{
"epoch": 0.06,
"learning_rate": 1.985493778644519e-06,
"loss": 0.6964,
"step": 1570
},
{
"epoch": 0.06,
"learning_rate": 1.9853841934142396e-06,
"loss": 0.7437,
"step": 1575
},
{
"epoch": 0.06,
"learning_rate": 1.9852741988639356e-06,
"loss": 0.7125,
"step": 1580
},
{
"epoch": 0.06,
"learning_rate": 1.9851637950392974e-06,
"loss": 0.7241,
"step": 1585
},
{
"epoch": 0.06,
"learning_rate": 1.9850529819861863e-06,
"loss": 0.7113,
"step": 1590
},
{
"epoch": 0.06,
"learning_rate": 1.984941759750633e-06,
"loss": 0.6725,
"step": 1595
},
{
"epoch": 0.06,
"learning_rate": 1.984830128378838e-06,
"loss": 0.7166,
"step": 1600
},
{
"epoch": 0.06,
"eval_loss": 0.6776626706123352,
"eval_runtime": 140.3492,
"eval_samples_per_second": 16.858,
"eval_steps_per_second": 2.814,
"step": 1600
},
{
"epoch": 0.07,
"learning_rate": 1.9847180879171727e-06,
"loss": 0.7111,
"step": 1605
},
{
"epoch": 0.07,
"learning_rate": 1.9846056384121768e-06,
"loss": 0.7004,
"step": 1610
},
{
"epoch": 0.07,
"learning_rate": 1.9844927799105612e-06,
"loss": 0.7221,
"step": 1615
},
{
"epoch": 0.07,
"learning_rate": 1.984379512459207e-06,
"loss": 0.7363,
"step": 1620
},
{
"epoch": 0.07,
"learning_rate": 1.984265836105163e-06,
"loss": 0.7107,
"step": 1625
},
{
"epoch": 0.07,
"learning_rate": 1.9841517508956506e-06,
"loss": 0.7081,
"step": 1630
},
{
"epoch": 0.07,
"learning_rate": 1.9840372568780594e-06,
"loss": 0.6796,
"step": 1635
},
{
"epoch": 0.07,
"learning_rate": 1.9839223540999496e-06,
"loss": 0.7207,
"step": 1640
},
{
"epoch": 0.07,
"learning_rate": 1.9838070426090505e-06,
"loss": 0.716,
"step": 1645
},
{
"epoch": 0.07,
"learning_rate": 1.983691322453261e-06,
"loss": 0.7306,
"step": 1650
},
{
"epoch": 0.07,
"learning_rate": 1.983575193680651e-06,
"loss": 0.724,
"step": 1655
},
{
"epoch": 0.07,
"learning_rate": 1.983458656339459e-06,
"loss": 0.7447,
"step": 1660
},
{
"epoch": 0.07,
"learning_rate": 1.9833417104780942e-06,
"loss": 0.6929,
"step": 1665
},
{
"epoch": 0.07,
"learning_rate": 1.9832243561451346e-06,
"loss": 0.7228,
"step": 1670
},
{
"epoch": 0.07,
"learning_rate": 1.9831065933893275e-06,
"loss": 0.6824,
"step": 1675
},
{
"epoch": 0.07,
"learning_rate": 1.982988422259591e-06,
"loss": 0.7056,
"step": 1680
},
{
"epoch": 0.07,
"learning_rate": 1.9828698428050123e-06,
"loss": 0.6943,
"step": 1685
},
{
"epoch": 0.07,
"learning_rate": 1.982750855074849e-06,
"loss": 0.7101,
"step": 1690
},
{
"epoch": 0.07,
"learning_rate": 1.9826314591185263e-06,
"loss": 0.6786,
"step": 1695
},
{
"epoch": 0.07,
"learning_rate": 1.9825116549856408e-06,
"loss": 0.6954,
"step": 1700
},
{
"epoch": 0.07,
"learning_rate": 1.9823914427259584e-06,
"loss": 0.7165,
"step": 1705
},
{
"epoch": 0.07,
"learning_rate": 1.982270822389414e-06,
"loss": 0.7208,
"step": 1710
},
{
"epoch": 0.07,
"learning_rate": 1.9821497940261124e-06,
"loss": 0.6981,
"step": 1715
},
{
"epoch": 0.07,
"learning_rate": 1.982028357686327e-06,
"loss": 0.6914,
"step": 1720
},
{
"epoch": 0.07,
"learning_rate": 1.9819065134205026e-06,
"loss": 0.7291,
"step": 1725
},
{
"epoch": 0.07,
"learning_rate": 1.9817842612792513e-06,
"loss": 0.6882,
"step": 1730
},
{
"epoch": 0.07,
"learning_rate": 1.981661601313356e-06,
"loss": 0.685,
"step": 1735
},
{
"epoch": 0.07,
"learning_rate": 1.981538533573768e-06,
"loss": 0.6954,
"step": 1740
},
{
"epoch": 0.07,
"learning_rate": 1.9814150581116093e-06,
"loss": 0.7104,
"step": 1745
},
{
"epoch": 0.07,
"learning_rate": 1.9812911749781705e-06,
"loss": 0.7026,
"step": 1750
},
{
"epoch": 0.07,
"learning_rate": 1.981166884224911e-06,
"loss": 0.6907,
"step": 1755
},
{
"epoch": 0.07,
"learning_rate": 1.981042185903461e-06,
"loss": 0.6988,
"step": 1760
},
{
"epoch": 0.07,
"learning_rate": 1.980917080065618e-06,
"loss": 0.6894,
"step": 1765
},
{
"epoch": 0.07,
"learning_rate": 1.98079156676335e-06,
"loss": 0.7308,
"step": 1770
},
{
"epoch": 0.07,
"learning_rate": 1.9806656460487955e-06,
"loss": 0.6688,
"step": 1775
},
{
"epoch": 0.07,
"learning_rate": 1.9805393179742596e-06,
"loss": 0.7028,
"step": 1780
},
{
"epoch": 0.07,
"learning_rate": 1.980412582592218e-06,
"loss": 0.6982,
"step": 1785
},
{
"epoch": 0.07,
"learning_rate": 1.980285439955316e-06,
"loss": 0.7326,
"step": 1790
},
{
"epoch": 0.07,
"learning_rate": 1.980157890116367e-06,
"loss": 0.7204,
"step": 1795
},
{
"epoch": 0.07,
"learning_rate": 1.980029933128354e-06,
"loss": 0.7016,
"step": 1800
},
{
"epoch": 0.07,
"learning_rate": 1.9799015690444302e-06,
"loss": 0.7076,
"step": 1805
},
{
"epoch": 0.07,
"learning_rate": 1.9797727979179156e-06,
"loss": 0.7121,
"step": 1810
},
{
"epoch": 0.07,
"learning_rate": 1.9796436198023016e-06,
"loss": 0.7204,
"step": 1815
},
{
"epoch": 0.07,
"learning_rate": 1.9795140347512472e-06,
"loss": 0.7178,
"step": 1820
},
{
"epoch": 0.07,
"learning_rate": 1.979384042818581e-06,
"loss": 0.7223,
"step": 1825
},
{
"epoch": 0.07,
"learning_rate": 1.979253644058301e-06,
"loss": 0.7066,
"step": 1830
},
{
"epoch": 0.07,
"learning_rate": 1.979122838524573e-06,
"loss": 0.6873,
"step": 1835
},
{
"epoch": 0.07,
"learning_rate": 1.9789916262717328e-06,
"loss": 0.6822,
"step": 1840
},
{
"epoch": 0.07,
"learning_rate": 1.9788600073542848e-06,
"loss": 0.6947,
"step": 1845
},
{
"epoch": 0.08,
"learning_rate": 1.978727981826902e-06,
"loss": 0.7092,
"step": 1850
},
{
"epoch": 0.08,
"learning_rate": 1.978595549744427e-06,
"loss": 0.7166,
"step": 1855
},
{
"epoch": 0.08,
"learning_rate": 1.9784627111618715e-06,
"loss": 0.6842,
"step": 1860
},
{
"epoch": 0.08,
"learning_rate": 1.9783294661344145e-06,
"loss": 0.7161,
"step": 1865
},
{
"epoch": 0.08,
"learning_rate": 1.978195814717405e-06,
"loss": 0.6881,
"step": 1870
},
{
"epoch": 0.08,
"learning_rate": 1.978061756966361e-06,
"loss": 0.7342,
"step": 1875
},
{
"epoch": 0.08,
"learning_rate": 1.977927292936969e-06,
"loss": 0.6767,
"step": 1880
},
{
"epoch": 0.08,
"learning_rate": 1.9777924226850842e-06,
"loss": 0.7096,
"step": 1885
},
{
"epoch": 0.08,
"learning_rate": 1.97765714626673e-06,
"loss": 0.694,
"step": 1890
},
{
"epoch": 0.08,
"learning_rate": 1.977521463738099e-06,
"loss": 0.7152,
"step": 1895
},
{
"epoch": 0.08,
"learning_rate": 1.9773853751555537e-06,
"loss": 0.6618,
"step": 1900
},
{
"epoch": 0.08,
"learning_rate": 1.977248880575623e-06,
"loss": 0.689,
"step": 1905
},
{
"epoch": 0.08,
"learning_rate": 1.9771119800550054e-06,
"loss": 0.6892,
"step": 1910
},
{
"epoch": 0.08,
"learning_rate": 1.9769746736505694e-06,
"loss": 0.7179,
"step": 1915
},
{
"epoch": 0.08,
"learning_rate": 1.97683696141935e-06,
"loss": 0.6888,
"step": 1920
},
{
"epoch": 0.08,
"learning_rate": 1.9766988434185514e-06,
"loss": 0.7041,
"step": 1925
},
{
"epoch": 0.08,
"learning_rate": 1.976560319705547e-06,
"loss": 0.6969,
"step": 1930
},
{
"epoch": 0.08,
"learning_rate": 1.9764213903378786e-06,
"loss": 0.7162,
"step": 1935
},
{
"epoch": 0.08,
"learning_rate": 1.9762820553732563e-06,
"loss": 0.7178,
"step": 1940
},
{
"epoch": 0.08,
"learning_rate": 1.976142314869558e-06,
"loss": 0.7309,
"step": 1945
},
{
"epoch": 0.08,
"learning_rate": 1.976002168884831e-06,
"loss": 0.7198,
"step": 1950
},
{
"epoch": 0.08,
"learning_rate": 1.975861617477291e-06,
"loss": 0.7131,
"step": 1955
},
{
"epoch": 0.08,
"learning_rate": 1.9757206607053218e-06,
"loss": 0.7087,
"step": 1960
},
{
"epoch": 0.08,
"learning_rate": 1.9755792986274755e-06,
"loss": 0.6708,
"step": 1965
},
{
"epoch": 0.08,
"learning_rate": 1.975437531302472e-06,
"loss": 0.7141,
"step": 1970
},
{
"epoch": 0.08,
"learning_rate": 1.975295358789201e-06,
"loss": 0.7152,
"step": 1975
},
{
"epoch": 0.08,
"learning_rate": 1.9751527811467195e-06,
"loss": 0.7172,
"step": 1980
},
{
"epoch": 0.08,
"learning_rate": 1.9750097984342534e-06,
"loss": 0.7472,
"step": 1985
},
{
"epoch": 0.08,
"learning_rate": 1.9748664107111962e-06,
"loss": 0.7129,
"step": 1990
},
{
"epoch": 0.08,
"learning_rate": 1.9747226180371094e-06,
"loss": 0.7066,
"step": 1995
},
{
"epoch": 0.08,
"learning_rate": 1.974578420471724e-06,
"loss": 0.7049,
"step": 2000
},
{
"epoch": 0.08,
"learning_rate": 1.9744338180749376e-06,
"loss": 0.7214,
"step": 2005
},
{
"epoch": 0.08,
"learning_rate": 1.9742888109068175e-06,
"loss": 0.7469,
"step": 2010
},
{
"epoch": 0.08,
"learning_rate": 1.9741433990275987e-06,
"loss": 0.7119,
"step": 2015
},
{
"epoch": 0.08,
"learning_rate": 1.973997582497683e-06,
"loss": 0.7487,
"step": 2020
},
{
"epoch": 0.08,
"learning_rate": 1.973851361377642e-06,
"loss": 0.7259,
"step": 2025
},
{
"epoch": 0.08,
"learning_rate": 1.9737047357282143e-06,
"loss": 0.7164,
"step": 2030
},
{
"epoch": 0.08,
"learning_rate": 1.9735577056103074e-06,
"loss": 0.7639,
"step": 2035
},
{
"epoch": 0.08,
"learning_rate": 1.9734102710849956e-06,
"loss": 0.7181,
"step": 2040
},
{
"epoch": 0.08,
"learning_rate": 1.973262432213523e-06,
"loss": 0.6833,
"step": 2045
},
{
"epoch": 0.08,
"learning_rate": 1.973114189057299e-06,
"loss": 0.7263,
"step": 2050
},
{
"epoch": 0.08,
"learning_rate": 1.9729655416779044e-06,
"loss": 0.6957,
"step": 2055
},
{
"epoch": 0.08,
"learning_rate": 1.972816490137085e-06,
"loss": 0.716,
"step": 2060
},
{
"epoch": 0.08,
"learning_rate": 1.9726670344967554e-06,
"loss": 0.699,
"step": 2065
},
{
"epoch": 0.08,
"learning_rate": 1.9725171748189987e-06,
"loss": 0.6858,
"step": 2070
},
{
"epoch": 0.08,
"learning_rate": 1.9723669111660645e-06,
"loss": 0.7036,
"step": 2075
},
{
"epoch": 0.08,
"learning_rate": 1.9722162436003715e-06,
"loss": 0.6958,
"step": 2080
},
{
"epoch": 0.08,
"learning_rate": 1.9720651721845062e-06,
"loss": 0.7303,
"step": 2085
},
{
"epoch": 0.08,
"learning_rate": 1.971913696981222e-06,
"loss": 0.6836,
"step": 2090
},
{
"epoch": 0.09,
"learning_rate": 1.97176181805344e-06,
"loss": 0.7349,
"step": 2095
},
{
"epoch": 0.09,
"learning_rate": 1.9716095354642493e-06,
"loss": 0.7105,
"step": 2100
},
{
"epoch": 0.09,
"learning_rate": 1.971456849276907e-06,
"loss": 0.7126,
"step": 2105
},
{
"epoch": 0.09,
"learning_rate": 1.971303759554838e-06,
"loss": 0.6959,
"step": 2110
},
{
"epoch": 0.09,
"learning_rate": 1.9711502663616344e-06,
"loss": 0.6699,
"step": 2115
},
{
"epoch": 0.09,
"learning_rate": 1.9709963697610554e-06,
"loss": 0.6671,
"step": 2120
},
{
"epoch": 0.09,
"learning_rate": 1.970842069817029e-06,
"loss": 0.7101,
"step": 2125
},
{
"epoch": 0.09,
"learning_rate": 1.970687366593649e-06,
"loss": 0.6985,
"step": 2130
},
{
"epoch": 0.09,
"learning_rate": 1.970532260155179e-06,
"loss": 0.7175,
"step": 2135
},
{
"epoch": 0.09,
"learning_rate": 1.970376750566048e-06,
"loss": 0.7251,
"step": 2140
},
{
"epoch": 0.09,
"learning_rate": 1.9702208378908537e-06,
"loss": 0.6949,
"step": 2145
},
{
"epoch": 0.09,
"learning_rate": 1.970064522194361e-06,
"loss": 0.7573,
"step": 2150
},
{
"epoch": 0.09,
"learning_rate": 1.9699078035415014e-06,
"loss": 0.6922,
"step": 2155
},
{
"epoch": 0.09,
"learning_rate": 1.9697506819973753e-06,
"loss": 0.7215,
"step": 2160
},
{
"epoch": 0.09,
"learning_rate": 1.9695931576272493e-06,
"loss": 0.7252,
"step": 2165
},
{
"epoch": 0.09,
"learning_rate": 1.9694352304965572e-06,
"loss": 0.7202,
"step": 2170
},
{
"epoch": 0.09,
"learning_rate": 1.9692769006709013e-06,
"loss": 0.6932,
"step": 2175
},
{
"epoch": 0.09,
"learning_rate": 1.96911816821605e-06,
"loss": 0.6522,
"step": 2180
},
{
"epoch": 0.09,
"learning_rate": 1.9689590331979394e-06,
"loss": 0.6933,
"step": 2185
},
{
"epoch": 0.09,
"learning_rate": 1.968799495682673e-06,
"loss": 0.6825,
"step": 2190
},
{
"epoch": 0.09,
"learning_rate": 1.968639555736521e-06,
"loss": 0.7037,
"step": 2195
},
{
"epoch": 0.09,
"learning_rate": 1.968479213425922e-06,
"loss": 0.6911,
"step": 2200
},
{
"epoch": 0.09,
"learning_rate": 1.9683184688174795e-06,
"loss": 0.7048,
"step": 2205
},
{
"epoch": 0.09,
"learning_rate": 1.9681573219779658e-06,
"loss": 0.7273,
"step": 2210
},
{
"epoch": 0.09,
"learning_rate": 1.9679957729743204e-06,
"loss": 0.7105,
"step": 2215
},
{
"epoch": 0.09,
"learning_rate": 1.9678338218736486e-06,
"loss": 0.6832,
"step": 2220
},
{
"epoch": 0.09,
"learning_rate": 1.967671468743224e-06,
"loss": 0.6829,
"step": 2225
},
{
"epoch": 0.09,
"learning_rate": 1.9675087136504865e-06,
"loss": 0.6971,
"step": 2230
},
{
"epoch": 0.09,
"learning_rate": 1.9673455566630437e-06,
"loss": 0.7298,
"step": 2235
},
{
"epoch": 0.09,
"learning_rate": 1.9671819978486688e-06,
"loss": 0.6841,
"step": 2240
},
{
"epoch": 0.09,
"learning_rate": 1.967018037275303e-06,
"loss": 0.6838,
"step": 2245
},
{
"epoch": 0.09,
"learning_rate": 1.966853675011054e-06,
"loss": 0.7053,
"step": 2250
},
{
"epoch": 0.09,
"learning_rate": 1.966688911124197e-06,
"loss": 0.6886,
"step": 2255
},
{
"epoch": 0.09,
"learning_rate": 1.9665237456831725e-06,
"loss": 0.7032,
"step": 2260
},
{
"epoch": 0.09,
"learning_rate": 1.9663581787565898e-06,
"loss": 0.7159,
"step": 2265
},
{
"epoch": 0.09,
"learning_rate": 1.966192210413223e-06,
"loss": 0.6896,
"step": 2270
},
{
"epoch": 0.09,
"learning_rate": 1.966025840722015e-06,
"loss": 0.7004,
"step": 2275
},
{
"epoch": 0.09,
"learning_rate": 1.9658590697520735e-06,
"loss": 0.6726,
"step": 2280
},
{
"epoch": 0.09,
"learning_rate": 1.965691897572674e-06,
"loss": 0.6911,
"step": 2285
},
{
"epoch": 0.09,
"learning_rate": 1.9655243242532584e-06,
"loss": 0.7235,
"step": 2290
},
{
"epoch": 0.09,
"learning_rate": 1.9653563498634347e-06,
"loss": 0.7014,
"step": 2295
},
{
"epoch": 0.09,
"learning_rate": 1.9651879744729786e-06,
"loss": 0.7192,
"step": 2300
},
{
"epoch": 0.09,
"learning_rate": 1.9650191981518317e-06,
"loss": 0.65,
"step": 2305
},
{
"epoch": 0.09,
"learning_rate": 1.964850020970102e-06,
"loss": 0.7128,
"step": 2310
},
{
"epoch": 0.09,
"learning_rate": 1.9646804429980644e-06,
"loss": 0.7265,
"step": 2315
},
{
"epoch": 0.09,
"learning_rate": 1.96451046430616e-06,
"loss": 0.7038,
"step": 2320
},
{
"epoch": 0.09,
"learning_rate": 1.964340084964997e-06,
"loss": 0.7132,
"step": 2325
},
{
"epoch": 0.09,
"learning_rate": 1.964169305045348e-06,
"loss": 0.6968,
"step": 2330
},
{
"epoch": 0.09,
"learning_rate": 1.9639981246181555e-06,
"loss": 0.7456,
"step": 2335
},
{
"epoch": 0.1,
"learning_rate": 1.9638265437545247e-06,
"loss": 0.7403,
"step": 2340
},
{
"epoch": 0.1,
"learning_rate": 1.9636545625257297e-06,
"loss": 0.6893,
"step": 2345
},
{
"epoch": 0.1,
"learning_rate": 1.96348218100321e-06,
"loss": 0.7341,
"step": 2350
},
{
"epoch": 0.1,
"learning_rate": 1.9633093992585706e-06,
"loss": 0.7459,
"step": 2355
},
{
"epoch": 0.1,
"learning_rate": 1.963136217363585e-06,
"loss": 0.716,
"step": 2360
},
{
"epoch": 0.1,
"learning_rate": 1.9629626353901897e-06,
"loss": 0.7086,
"step": 2365
},
{
"epoch": 0.1,
"learning_rate": 1.9627886534104903e-06,
"loss": 0.7041,
"step": 2370
},
{
"epoch": 0.1,
"learning_rate": 1.962614271496757e-06,
"loss": 0.7094,
"step": 2375
},
{
"epoch": 0.1,
"learning_rate": 1.962439489721427e-06,
"loss": 0.6794,
"step": 2380
},
{
"epoch": 0.1,
"learning_rate": 1.962264308157102e-06,
"loss": 0.6751,
"step": 2385
},
{
"epoch": 0.1,
"learning_rate": 1.9620887268765523e-06,
"loss": 0.7398,
"step": 2390
},
{
"epoch": 0.1,
"learning_rate": 1.961912745952712e-06,
"loss": 0.6622,
"step": 2395
},
{
"epoch": 0.1,
"learning_rate": 1.961736365458682e-06,
"loss": 0.7404,
"step": 2400
},
{
"epoch": 0.1,
"eval_loss": 0.6729753613471985,
"eval_runtime": 140.5864,
"eval_samples_per_second": 16.83,
"eval_steps_per_second": 2.81,
"step": 2400
},
{
"epoch": 0.1,
"learning_rate": 1.96155958546773e-06,
"loss": 0.6928,
"step": 2405
},
{
"epoch": 0.1,
"learning_rate": 1.961382406053288e-06,
"loss": 0.6832,
"step": 2410
},
{
"epoch": 0.1,
"learning_rate": 1.961204827288955e-06,
"loss": 0.6775,
"step": 2415
},
{
"epoch": 0.1,
"learning_rate": 1.961026849248496e-06,
"loss": 0.6995,
"step": 2420
},
{
"epoch": 0.1,
"learning_rate": 1.9608484720058416e-06,
"loss": 0.6508,
"step": 2425
},
{
"epoch": 0.1,
"learning_rate": 1.960669695635087e-06,
"loss": 0.6865,
"step": 2430
},
{
"epoch": 0.1,
"learning_rate": 1.960490520210496e-06,
"loss": 0.7273,
"step": 2435
},
{
"epoch": 0.1,
"learning_rate": 1.9603109458064955e-06,
"loss": 0.6513,
"step": 2440
},
{
"epoch": 0.1,
"learning_rate": 1.9601309724976795e-06,
"loss": 0.7051,
"step": 2445
},
{
"epoch": 0.1,
"learning_rate": 1.9599506003588068e-06,
"loss": 0.7164,
"step": 2450
},
{
"epoch": 0.1,
"learning_rate": 1.9597698294648034e-06,
"loss": 0.7269,
"step": 2455
},
{
"epoch": 0.1,
"learning_rate": 1.959588659890759e-06,
"loss": 0.7082,
"step": 2460
},
{
"epoch": 0.1,
"learning_rate": 1.9594070917119306e-06,
"loss": 0.7164,
"step": 2465
},
{
"epoch": 0.1,
"learning_rate": 1.9592251250037394e-06,
"loss": 0.7156,
"step": 2470
},
{
"epoch": 0.1,
"learning_rate": 1.9590427598417733e-06,
"loss": 0.6603,
"step": 2475
},
{
"epoch": 0.1,
"learning_rate": 1.958859996301785e-06,
"loss": 0.6811,
"step": 2480
},
{
"epoch": 0.1,
"learning_rate": 1.958676834459693e-06,
"loss": 0.701,
"step": 2485
},
{
"epoch": 0.1,
"learning_rate": 1.9584932743915807e-06,
"loss": 0.6546,
"step": 2490
},
{
"epoch": 0.1,
"learning_rate": 1.9583093161736975e-06,
"loss": 0.7022,
"step": 2495
},
{
"epoch": 0.1,
"learning_rate": 1.9581249598824588e-06,
"loss": 0.7066,
"step": 2500
},
{
"epoch": 0.1,
"learning_rate": 1.957940205594444e-06,
"loss": 0.6942,
"step": 2505
},
{
"epoch": 0.1,
"learning_rate": 1.957755053386398e-06,
"loss": 0.6983,
"step": 2510
},
{
"epoch": 0.1,
"learning_rate": 1.957569503335232e-06,
"loss": 0.7491,
"step": 2515
},
{
"epoch": 0.1,
"learning_rate": 1.957383555518022e-06,
"loss": 0.6916,
"step": 2520
},
{
"epoch": 0.1,
"learning_rate": 1.9571972100120087e-06,
"loss": 0.7035,
"step": 2525
},
{
"epoch": 0.1,
"learning_rate": 1.9570104668945986e-06,
"loss": 0.6987,
"step": 2530
},
{
"epoch": 0.1,
"learning_rate": 1.956823326243363e-06,
"loss": 0.7035,
"step": 2535
},
{
"epoch": 0.1,
"learning_rate": 1.956635788136039e-06,
"loss": 0.6854,
"step": 2540
},
{
"epoch": 0.1,
"learning_rate": 1.9564478526505276e-06,
"loss": 0.7194,
"step": 2545
},
{
"epoch": 0.1,
"learning_rate": 1.9562595198648956e-06,
"loss": 0.7026,
"step": 2550
},
{
"epoch": 0.1,
"learning_rate": 1.9560707898573757e-06,
"loss": 0.6887,
"step": 2555
},
{
"epoch": 0.1,
"learning_rate": 1.9558816627063636e-06,
"loss": 0.7127,
"step": 2560
},
{
"epoch": 0.1,
"learning_rate": 1.9556921384904217e-06,
"loss": 0.7126,
"step": 2565
},
{
"epoch": 0.1,
"learning_rate": 1.9555022172882767e-06,
"loss": 0.671,
"step": 2570
},
{
"epoch": 0.1,
"learning_rate": 1.9553118991788203e-06,
"loss": 0.7004,
"step": 2575
},
{
"epoch": 0.1,
"learning_rate": 1.9551211842411083e-06,
"loss": 0.7303,
"step": 2580
},
{
"epoch": 0.1,
"learning_rate": 1.9549300725543627e-06,
"loss": 0.7571,
"step": 2585
},
{
"epoch": 0.11,
"learning_rate": 1.9547385641979696e-06,
"loss": 0.6688,
"step": 2590
},
{
"epoch": 0.11,
"learning_rate": 1.9545466592514795e-06,
"loss": 0.7083,
"step": 2595
},
{
"epoch": 0.11,
"learning_rate": 1.9543543577946086e-06,
"loss": 0.7426,
"step": 2600
},
{
"epoch": 0.11,
"learning_rate": 1.9541616599072365e-06,
"loss": 0.6722,
"step": 2605
},
{
"epoch": 0.11,
"learning_rate": 1.953968565669409e-06,
"loss": 0.7215,
"step": 2610
},
{
"epoch": 0.11,
"learning_rate": 1.9537750751613354e-06,
"loss": 0.7354,
"step": 2615
},
{
"epoch": 0.11,
"learning_rate": 1.95358118846339e-06,
"loss": 0.7253,
"step": 2620
},
{
"epoch": 0.11,
"learning_rate": 1.9533869056561113e-06,
"loss": 0.6877,
"step": 2625
},
{
"epoch": 0.11,
"learning_rate": 1.9531922268202033e-06,
"loss": 0.72,
"step": 2630
},
{
"epoch": 0.11,
"learning_rate": 1.952997152036534e-06,
"loss": 0.6894,
"step": 2635
},
{
"epoch": 0.11,
"learning_rate": 1.952801681386135e-06,
"loss": 0.69,
"step": 2640
},
{
"epoch": 0.11,
"learning_rate": 1.9526058149502035e-06,
"loss": 0.689,
"step": 2645
},
{
"epoch": 0.11,
"learning_rate": 1.952409552810101e-06,
"loss": 0.7495,
"step": 2650
},
{
"epoch": 0.11,
"learning_rate": 1.9522128950473525e-06,
"loss": 0.7378,
"step": 2655
},
{
"epoch": 0.11,
"learning_rate": 1.9520158417436486e-06,
"loss": 0.7041,
"step": 2660
},
{
"epoch": 0.11,
"learning_rate": 1.9518183929808433e-06,
"loss": 0.711,
"step": 2665
},
{
"epoch": 0.11,
"learning_rate": 1.9516205488409543e-06,
"loss": 0.6908,
"step": 2670
},
{
"epoch": 0.11,
"learning_rate": 1.9514223094061654e-06,
"loss": 0.7107,
"step": 2675
},
{
"epoch": 0.11,
"learning_rate": 1.951223674758823e-06,
"loss": 0.7225,
"step": 2680
},
{
"epoch": 0.11,
"learning_rate": 1.9510246449814385e-06,
"loss": 0.7217,
"step": 2685
},
{
"epoch": 0.11,
"learning_rate": 1.950825220156687e-06,
"loss": 0.7274,
"step": 2690
},
{
"epoch": 0.11,
"learning_rate": 1.950625400367408e-06,
"loss": 0.6867,
"step": 2695
},
{
"epoch": 0.11,
"learning_rate": 1.9504251856966043e-06,
"loss": 0.7186,
"step": 2700
},
{
"epoch": 0.11,
"learning_rate": 1.950224576227444e-06,
"loss": 0.6889,
"step": 2705
},
{
"epoch": 0.11,
"learning_rate": 1.9500235720432583e-06,
"loss": 0.7005,
"step": 2710
},
{
"epoch": 0.11,
"learning_rate": 1.9498221732275425e-06,
"loss": 0.6874,
"step": 2715
},
{
"epoch": 0.11,
"learning_rate": 1.9496203798639565e-06,
"loss": 0.7033,
"step": 2720
},
{
"epoch": 0.11,
"learning_rate": 1.9494181920363223e-06,
"loss": 0.707,
"step": 2725
},
{
"epoch": 0.11,
"learning_rate": 1.9492156098286283e-06,
"loss": 0.7348,
"step": 2730
},
{
"epoch": 0.11,
"learning_rate": 1.949012633325024e-06,
"loss": 0.6636,
"step": 2735
},
{
"epoch": 0.11,
"learning_rate": 1.9488092626098254e-06,
"loss": 0.6975,
"step": 2740
},
{
"epoch": 0.11,
"learning_rate": 1.9486054977675103e-06,
"loss": 0.7124,
"step": 2745
},
{
"epoch": 0.11,
"learning_rate": 1.9484013388827204e-06,
"loss": 0.663,
"step": 2750
},
{
"epoch": 0.11,
"learning_rate": 1.9481967860402627e-06,
"loss": 0.6916,
"step": 2755
},
{
"epoch": 0.11,
"learning_rate": 1.9479918393251056e-06,
"loss": 0.7344,
"step": 2760
},
{
"epoch": 0.11,
"learning_rate": 1.9477864988223828e-06,
"loss": 0.6752,
"step": 2765
},
{
"epoch": 0.11,
"learning_rate": 1.9475807646173908e-06,
"loss": 0.6885,
"step": 2770
},
{
"epoch": 0.11,
"learning_rate": 1.9473746367955893e-06,
"loss": 0.6602,
"step": 2775
},
{
"epoch": 0.11,
"learning_rate": 1.947168115442603e-06,
"loss": 0.6764,
"step": 2780
},
{
"epoch": 0.11,
"learning_rate": 1.946961200644218e-06,
"loss": 0.6784,
"step": 2785
},
{
"epoch": 0.11,
"learning_rate": 1.946753892486386e-06,
"loss": 0.7325,
"step": 2790
},
{
"epoch": 0.11,
"learning_rate": 1.94654619105522e-06,
"loss": 0.6533,
"step": 2795
},
{
"epoch": 0.11,
"learning_rate": 1.946338096436998e-06,
"loss": 0.6741,
"step": 2800
},
{
"epoch": 0.11,
"learning_rate": 1.9461296087181604e-06,
"loss": 0.7046,
"step": 2805
},
{
"epoch": 0.11,
"learning_rate": 1.9459207279853113e-06,
"loss": 0.6797,
"step": 2810
},
{
"epoch": 0.11,
"learning_rate": 1.945711454325218e-06,
"loss": 0.7096,
"step": 2815
},
{
"epoch": 0.11,
"learning_rate": 1.9455017878248107e-06,
"loss": 0.698,
"step": 2820
},
{
"epoch": 0.11,
"learning_rate": 1.9452917285711834e-06,
"loss": 0.7036,
"step": 2825
},
{
"epoch": 0.11,
"learning_rate": 1.945081276651593e-06,
"loss": 0.7205,
"step": 2830
},
{
"epoch": 0.12,
"learning_rate": 1.9448704321534588e-06,
"loss": 0.6583,
"step": 2835
},
{
"epoch": 0.12,
"learning_rate": 1.944659195164364e-06,
"loss": 0.6757,
"step": 2840
},
{
"epoch": 0.12,
"learning_rate": 1.9444475657720545e-06,
"loss": 0.7033,
"step": 2845
},
{
"epoch": 0.12,
"learning_rate": 1.9442355440644394e-06,
"loss": 0.7029,
"step": 2850
},
{
"epoch": 0.12,
"learning_rate": 1.944023130129591e-06,
"loss": 0.6993,
"step": 2855
},
{
"epoch": 0.12,
"learning_rate": 1.9438103240557446e-06,
"loss": 0.704,
"step": 2860
},
{
"epoch": 0.12,
"learning_rate": 1.9435971259312966e-06,
"loss": 0.7104,
"step": 2865
},
{
"epoch": 0.12,
"learning_rate": 1.9433835358448086e-06,
"loss": 0.7017,
"step": 2870
},
{
"epoch": 0.12,
"learning_rate": 1.943169553885004e-06,
"loss": 0.7107,
"step": 2875
},
{
"epoch": 0.12,
"learning_rate": 1.9429551801407687e-06,
"loss": 0.6939,
"step": 2880
},
{
"epoch": 0.12,
"learning_rate": 1.942740414701152e-06,
"loss": 0.6737,
"step": 2885
},
{
"epoch": 0.12,
"learning_rate": 1.9425252576553656e-06,
"loss": 0.7174,
"step": 2890
},
{
"epoch": 0.12,
"learning_rate": 1.942309709092784e-06,
"loss": 0.7147,
"step": 2895
},
{
"epoch": 0.12,
"learning_rate": 1.9420937691029435e-06,
"loss": 0.7351,
"step": 2900
},
{
"epoch": 0.12,
"learning_rate": 1.9418774377755444e-06,
"loss": 0.6845,
"step": 2905
},
{
"epoch": 0.12,
"learning_rate": 1.9416607152004485e-06,
"loss": 0.7156,
"step": 2910
},
{
"epoch": 0.12,
"learning_rate": 1.9414436014676806e-06,
"loss": 0.7532,
"step": 2915
},
{
"epoch": 0.12,
"learning_rate": 1.941226096667428e-06,
"loss": 0.7421,
"step": 2920
},
{
"epoch": 0.12,
"learning_rate": 1.94100820089004e-06,
"loss": 0.7049,
"step": 2925
},
{
"epoch": 0.12,
"learning_rate": 1.940789914226029e-06,
"loss": 0.7652,
"step": 2930
},
{
"epoch": 0.12,
"learning_rate": 1.940571236766069e-06,
"loss": 0.6884,
"step": 2935
},
{
"epoch": 0.12,
"learning_rate": 1.9403521686009964e-06,
"loss": 0.6974,
"step": 2940
},
{
"epoch": 0.12,
"learning_rate": 1.940132709821811e-06,
"loss": 0.7362,
"step": 2945
},
{
"epoch": 0.12,
"learning_rate": 1.9399128605196737e-06,
"loss": 0.7167,
"step": 2950
},
{
"epoch": 0.12,
"learning_rate": 1.9396926207859082e-06,
"loss": 0.6806,
"step": 2955
},
{
"epoch": 0.12,
"learning_rate": 1.939471990712e-06,
"loss": 0.7069,
"step": 2960
},
{
"epoch": 0.12,
"learning_rate": 1.939250970389597e-06,
"loss": 0.7185,
"step": 2965
},
{
"epoch": 0.12,
"learning_rate": 1.9390295599105085e-06,
"loss": 0.6996,
"step": 2970
},
{
"epoch": 0.12,
"learning_rate": 1.9388077593667075e-06,
"loss": 0.7304,
"step": 2975
},
{
"epoch": 0.12,
"learning_rate": 1.9385855688503276e-06,
"loss": 0.7209,
"step": 2980
},
{
"epoch": 0.12,
"learning_rate": 1.9383629884536644e-06,
"loss": 0.7077,
"step": 2985
},
{
"epoch": 0.12,
"learning_rate": 1.938140018269176e-06,
"loss": 0.6838,
"step": 2990
},
{
"epoch": 0.12,
"learning_rate": 1.937916658389483e-06,
"loss": 0.6784,
"step": 2995
},
{
"epoch": 0.12,
"learning_rate": 1.9376929089073665e-06,
"loss": 0.7185,
"step": 3000
},
{
"epoch": 0.12,
"learning_rate": 1.9374687699157703e-06,
"loss": 0.7146,
"step": 3005
},
{
"epoch": 0.12,
"learning_rate": 1.9372442415077994e-06,
"loss": 0.7131,
"step": 3010
},
{
"epoch": 0.12,
"learning_rate": 1.9370193237767213e-06,
"loss": 0.7014,
"step": 3015
},
{
"epoch": 0.12,
"learning_rate": 1.9367940168159648e-06,
"loss": 0.697,
"step": 3020
},
{
"epoch": 0.12,
"learning_rate": 1.9365683207191205e-06,
"loss": 0.7009,
"step": 3025
},
{
"epoch": 0.12,
"learning_rate": 1.9363422355799406e-06,
"loss": 0.7125,
"step": 3030
},
{
"epoch": 0.12,
"learning_rate": 1.936115761492339e-06,
"loss": 0.6954,
"step": 3035
},
{
"epoch": 0.12,
"learning_rate": 1.935888898550391e-06,
"loss": 0.7465,
"step": 3040
},
{
"epoch": 0.12,
"learning_rate": 1.935661646848333e-06,
"loss": 0.7253,
"step": 3045
},
{
"epoch": 0.12,
"learning_rate": 1.935434006480564e-06,
"loss": 0.7223,
"step": 3050
},
{
"epoch": 0.12,
"learning_rate": 1.935205977541644e-06,
"loss": 0.7141,
"step": 3055
},
{
"epoch": 0.12,
"learning_rate": 1.9349775601262935e-06,
"loss": 0.732,
"step": 3060
},
{
"epoch": 0.12,
"learning_rate": 1.9347487543293958e-06,
"loss": 0.709,
"step": 3065
},
{
"epoch": 0.12,
"learning_rate": 1.934519560245994e-06,
"loss": 0.724,
"step": 3070
},
{
"epoch": 0.12,
"learning_rate": 1.9342899779712946e-06,
"loss": 0.6998,
"step": 3075
},
{
"epoch": 0.13,
"learning_rate": 1.934060007600663e-06,
"loss": 0.6945,
"step": 3080
},
{
"epoch": 0.13,
"learning_rate": 1.9338296492296267e-06,
"loss": 0.7068,
"step": 3085
},
{
"epoch": 0.13,
"learning_rate": 1.9335989029538756e-06,
"loss": 0.691,
"step": 3090
},
{
"epoch": 0.13,
"learning_rate": 1.9333677688692595e-06,
"loss": 0.7063,
"step": 3095
},
{
"epoch": 0.13,
"learning_rate": 1.9331362470717886e-06,
"loss": 0.7518,
"step": 3100
},
{
"epoch": 0.13,
"learning_rate": 1.9329043376576357e-06,
"loss": 0.6806,
"step": 3105
},
{
"epoch": 0.13,
"learning_rate": 1.932672040723134e-06,
"loss": 0.6878,
"step": 3110
},
{
"epoch": 0.13,
"learning_rate": 1.9324393563647772e-06,
"loss": 0.6961,
"step": 3115
},
{
"epoch": 0.13,
"learning_rate": 1.932206284679221e-06,
"loss": 0.7125,
"step": 3120
},
{
"epoch": 0.13,
"learning_rate": 1.931972825763281e-06,
"loss": 0.6893,
"step": 3125
},
{
"epoch": 0.13,
"learning_rate": 1.931738979713934e-06,
"loss": 0.6821,
"step": 3130
},
{
"epoch": 0.13,
"learning_rate": 1.9315047466283177e-06,
"loss": 0.735,
"step": 3135
},
{
"epoch": 0.13,
"learning_rate": 1.9312701266037302e-06,
"loss": 0.6873,
"step": 3140
},
{
"epoch": 0.13,
"learning_rate": 1.931035119737631e-06,
"loss": 0.6853,
"step": 3145
},
{
"epoch": 0.13,
"learning_rate": 1.9307997261276393e-06,
"loss": 0.7256,
"step": 3150
},
{
"epoch": 0.13,
"learning_rate": 1.9305639458715365e-06,
"loss": 0.7297,
"step": 3155
},
{
"epoch": 0.13,
"learning_rate": 1.930327779067263e-06,
"loss": 0.7162,
"step": 3160
},
{
"epoch": 0.13,
"learning_rate": 1.9300912258129206e-06,
"loss": 0.7133,
"step": 3165
},
{
"epoch": 0.13,
"learning_rate": 1.9298542862067712e-06,
"loss": 0.7255,
"step": 3170
},
{
"epoch": 0.13,
"learning_rate": 1.9296169603472384e-06,
"loss": 0.6895,
"step": 3175
},
{
"epoch": 0.13,
"learning_rate": 1.929379248332904e-06,
"loss": 0.729,
"step": 3180
},
{
"epoch": 0.13,
"learning_rate": 1.9291411502625123e-06,
"loss": 0.6971,
"step": 3185
},
{
"epoch": 0.13,
"learning_rate": 1.928902666234967e-06,
"loss": 0.7232,
"step": 3190
},
{
"epoch": 0.13,
"learning_rate": 1.9286637963493323e-06,
"loss": 0.6653,
"step": 3195
},
{
"epoch": 0.13,
"learning_rate": 1.9284245407048323e-06,
"loss": 0.7309,
"step": 3200
},
{
"epoch": 0.13,
"eval_loss": 0.6701433062553406,
"eval_runtime": 140.1425,
"eval_samples_per_second": 16.883,
"eval_steps_per_second": 2.819,
"step": 3200
},
{
"epoch": 0.13,
"learning_rate": 1.928184899400853e-06,
"loss": 0.7025,
"step": 3205
},
{
"epoch": 0.13,
"learning_rate": 1.9279448725369375e-06,
"loss": 0.711,
"step": 3210
},
{
"epoch": 0.13,
"learning_rate": 1.927704460212792e-06,
"loss": 0.6813,
"step": 3215
},
{
"epoch": 0.13,
"learning_rate": 1.9274636625282816e-06,
"loss": 0.7209,
"step": 3220
},
{
"epoch": 0.13,
"learning_rate": 1.927222479583431e-06,
"loss": 0.7279,
"step": 3225
},
{
"epoch": 0.13,
"learning_rate": 1.9269809114784265e-06,
"loss": 0.6779,
"step": 3230
},
{
"epoch": 0.13,
"learning_rate": 1.926738958313612e-06,
"loss": 0.7237,
"step": 3235
},
{
"epoch": 0.13,
"learning_rate": 1.9264966201894945e-06,
"loss": 0.7123,
"step": 3240
},
{
"epoch": 0.13,
"learning_rate": 1.9262538972067375e-06,
"loss": 0.7147,
"step": 3245
},
{
"epoch": 0.13,
"learning_rate": 1.9260107894661666e-06,
"loss": 0.7104,
"step": 3250
},
{
"epoch": 0.13,
"learning_rate": 1.9257672970687673e-06,
"loss": 0.7059,
"step": 3255
},
{
"epoch": 0.13,
"learning_rate": 1.9255234201156834e-06,
"loss": 0.7259,
"step": 3260
},
{
"epoch": 0.13,
"learning_rate": 1.9252791587082195e-06,
"loss": 0.6778,
"step": 3265
},
{
"epoch": 0.13,
"learning_rate": 1.9250345129478396e-06,
"loss": 0.7456,
"step": 3270
},
{
"epoch": 0.13,
"learning_rate": 1.924789482936168e-06,
"loss": 0.7254,
"step": 3275
},
{
"epoch": 0.13,
"learning_rate": 1.924544068774987e-06,
"loss": 0.732,
"step": 3280
},
{
"epoch": 0.13,
"learning_rate": 1.92429827056624e-06,
"loss": 0.6824,
"step": 3285
},
{
"epoch": 0.13,
"learning_rate": 1.9240520884120296e-06,
"loss": 0.6807,
"step": 3290
},
{
"epoch": 0.13,
"learning_rate": 1.923805522414618e-06,
"loss": 0.6679,
"step": 3295
},
{
"epoch": 0.13,
"learning_rate": 1.923558572676426e-06,
"loss": 0.6813,
"step": 3300
},
{
"epoch": 0.13,
"learning_rate": 1.9233112393000344e-06,
"loss": 0.6791,
"step": 3305
},
{
"epoch": 0.13,
"learning_rate": 1.9230635223881836e-06,
"loss": 0.6877,
"step": 3310
},
{
"epoch": 0.13,
"learning_rate": 1.9228154220437733e-06,
"loss": 0.7023,
"step": 3315
},
{
"epoch": 0.13,
"learning_rate": 1.922566938369861e-06,
"loss": 0.7001,
"step": 3320
},
{
"epoch": 0.14,
"learning_rate": 1.9223180714696664e-06,
"loss": 0.691,
"step": 3325
},
{
"epoch": 0.14,
"learning_rate": 1.922068821446565e-06,
"loss": 0.7149,
"step": 3330
},
{
"epoch": 0.14,
"learning_rate": 1.9218191884040945e-06,
"loss": 0.6826,
"step": 3335
},
{
"epoch": 0.14,
"learning_rate": 1.9215691724459496e-06,
"loss": 0.7069,
"step": 3340
},
{
"epoch": 0.14,
"learning_rate": 1.9213187736759848e-06,
"loss": 0.7044,
"step": 3345
},
{
"epoch": 0.14,
"learning_rate": 1.9210679921982134e-06,
"loss": 0.7026,
"step": 3350
},
{
"epoch": 0.14,
"learning_rate": 1.9208168281168083e-06,
"loss": 0.7151,
"step": 3355
},
{
"epoch": 0.14,
"learning_rate": 1.9205652815361003e-06,
"loss": 0.7147,
"step": 3360
},
{
"epoch": 0.14,
"learning_rate": 1.92031335256058e-06,
"loss": 0.6891,
"step": 3365
},
{
"epoch": 0.14,
"learning_rate": 1.9200610412948967e-06,
"loss": 0.7246,
"step": 3370
},
{
"epoch": 0.14,
"learning_rate": 1.9198083478438584e-06,
"loss": 0.6832,
"step": 3375
},
{
"epoch": 0.14,
"learning_rate": 1.919555272312431e-06,
"loss": 0.6708,
"step": 3380
},
{
"epoch": 0.14,
"learning_rate": 1.91930181480574e-06,
"loss": 0.7217,
"step": 3385
},
{
"epoch": 0.14,
"learning_rate": 1.9190479754290703e-06,
"loss": 0.7272,
"step": 3390
},
{
"epoch": 0.14,
"learning_rate": 1.918793754287864e-06,
"loss": 0.6629,
"step": 3395
},
{
"epoch": 0.14,
"learning_rate": 1.918539151487722e-06,
"loss": 0.6633,
"step": 3400
},
{
"epoch": 0.14,
"learning_rate": 1.9182841671344053e-06,
"loss": 0.7085,
"step": 3405
},
{
"epoch": 0.14,
"learning_rate": 1.918028801333831e-06,
"loss": 0.7321,
"step": 3410
},
{
"epoch": 0.14,
"learning_rate": 1.9177730541920757e-06,
"loss": 0.7141,
"step": 3415
},
{
"epoch": 0.14,
"learning_rate": 1.9175169258153752e-06,
"loss": 0.6976,
"step": 3420
},
{
"epoch": 0.14,
"learning_rate": 1.9172604163101227e-06,
"loss": 0.7034,
"step": 3425
},
{
"epoch": 0.14,
"learning_rate": 1.9170035257828706e-06,
"loss": 0.6478,
"step": 3430
},
{
"epoch": 0.14,
"learning_rate": 1.9167462543403286e-06,
"loss": 0.6949,
"step": 3435
},
{
"epoch": 0.14,
"learning_rate": 1.9164886020893647e-06,
"loss": 0.7224,
"step": 3440
},
{
"epoch": 0.14,
"learning_rate": 1.9162305691370057e-06,
"loss": 0.7016,
"step": 3445
},
{
"epoch": 0.14,
"learning_rate": 1.9159721555904364e-06,
"loss": 0.7065,
"step": 3450
},
{
"epoch": 0.14,
"learning_rate": 1.9157133615569993e-06,
"loss": 0.6808,
"step": 3455
},
{
"epoch": 0.14,
"learning_rate": 1.9154541871441947e-06,
"loss": 0.7026,
"step": 3460
},
{
"epoch": 0.14,
"learning_rate": 1.9151946324596826e-06,
"loss": 0.7035,
"step": 3465
},
{
"epoch": 0.14,
"learning_rate": 1.9149346976112787e-06,
"loss": 0.6941,
"step": 3470
},
{
"epoch": 0.14,
"learning_rate": 1.9146743827069584e-06,
"loss": 0.6988,
"step": 3475
},
{
"epoch": 0.14,
"learning_rate": 1.9144136878548536e-06,
"loss": 0.6812,
"step": 3480
},
{
"epoch": 0.14,
"learning_rate": 1.914152613163255e-06,
"loss": 0.6976,
"step": 3485
},
{
"epoch": 0.14,
"learning_rate": 1.913891158740611e-06,
"loss": 0.7263,
"step": 3490
},
{
"epoch": 0.14,
"learning_rate": 1.913629324695527e-06,
"loss": 0.6872,
"step": 3495
},
{
"epoch": 0.14,
"learning_rate": 1.913367111136767e-06,
"loss": 0.6588,
"step": 3500
},
{
"epoch": 0.14,
"learning_rate": 1.913104518173252e-06,
"loss": 0.6679,
"step": 3505
},
{
"epoch": 0.14,
"learning_rate": 1.912841545914061e-06,
"loss": 0.7026,
"step": 3510
},
{
"epoch": 0.14,
"learning_rate": 1.9125781944684304e-06,
"loss": 0.6763,
"step": 3515
},
{
"epoch": 0.14,
"learning_rate": 1.9123144639457537e-06,
"loss": 0.7106,
"step": 3520
},
{
"epoch": 0.14,
"learning_rate": 1.9120503544555826e-06,
"loss": 0.7148,
"step": 3525
},
{
"epoch": 0.14,
"learning_rate": 1.911785866107626e-06,
"loss": 0.7029,
"step": 3530
},
{
"epoch": 0.14,
"learning_rate": 1.9115209990117495e-06,
"loss": 0.7071,
"step": 3535
},
{
"epoch": 0.14,
"learning_rate": 1.9112557532779773e-06,
"loss": 0.6967,
"step": 3540
},
{
"epoch": 0.14,
"learning_rate": 1.9109901290164897e-06,
"loss": 0.6872,
"step": 3545
},
{
"epoch": 0.14,
"learning_rate": 1.9107241263376255e-06,
"loss": 0.6732,
"step": 3550
},
{
"epoch": 0.14,
"learning_rate": 1.9104577453518783e-06,
"loss": 0.7015,
"step": 3555
},
{
"epoch": 0.14,
"learning_rate": 1.910190986169902e-06,
"loss": 0.6894,
"step": 3560
},
{
"epoch": 0.14,
"learning_rate": 1.9099238489025054e-06,
"loss": 0.6748,
"step": 3565
},
{
"epoch": 0.15,
"learning_rate": 1.909656333660655e-06,
"loss": 0.6691,
"step": 3570
},
{
"epoch": 0.15,
"learning_rate": 1.909388440555474e-06,
"loss": 0.6763,
"step": 3575
},
{
"epoch": 0.15,
"learning_rate": 1.909120169698244e-06,
"loss": 0.7212,
"step": 3580
},
{
"epoch": 0.15,
"learning_rate": 1.9088515212004006e-06,
"loss": 0.6999,
"step": 3585
},
{
"epoch": 0.15,
"learning_rate": 1.90858249517354e-06,
"loss": 0.7089,
"step": 3590
},
{
"epoch": 0.15,
"learning_rate": 1.9083130917294116e-06,
"loss": 0.6819,
"step": 3595
},
{
"epoch": 0.15,
"learning_rate": 1.9080433109799243e-06,
"loss": 0.6902,
"step": 3600
},
{
"epoch": 0.15,
"learning_rate": 1.9077731530371425e-06,
"loss": 0.6825,
"step": 3605
},
{
"epoch": 0.15,
"learning_rate": 1.9075026180132873e-06,
"loss": 0.7008,
"step": 3610
},
{
"epoch": 0.15,
"learning_rate": 1.9072317060207364e-06,
"loss": 0.6789,
"step": 3615
},
{
"epoch": 0.15,
"learning_rate": 1.9069604171720243e-06,
"loss": 0.7221,
"step": 3620
},
{
"epoch": 0.15,
"learning_rate": 1.9066887515798426e-06,
"loss": 0.6761,
"step": 3625
},
{
"epoch": 0.15,
"learning_rate": 1.9064167093570382e-06,
"loss": 0.7134,
"step": 3630
},
{
"epoch": 0.15,
"learning_rate": 1.9061442906166154e-06,
"loss": 0.6633,
"step": 3635
},
{
"epoch": 0.15,
"learning_rate": 1.9058714954717345e-06,
"loss": 0.6869,
"step": 3640
},
{
"epoch": 0.15,
"learning_rate": 1.9055983240357123e-06,
"loss": 0.6967,
"step": 3645
},
{
"epoch": 0.15,
"learning_rate": 1.9053247764220218e-06,
"loss": 0.7272,
"step": 3650
},
{
"epoch": 0.15,
"learning_rate": 1.905050852744292e-06,
"loss": 0.6696,
"step": 3655
},
{
"epoch": 0.15,
"learning_rate": 1.904776553116309e-06,
"loss": 0.6606,
"step": 3660
},
{
"epoch": 0.15,
"learning_rate": 1.9045018776520138e-06,
"loss": 0.7055,
"step": 3665
},
{
"epoch": 0.15,
"learning_rate": 1.9042268264655048e-06,
"loss": 0.6879,
"step": 3670
},
{
"epoch": 0.15,
"learning_rate": 1.9039513996710357e-06,
"loss": 0.7322,
"step": 3675
},
{
"epoch": 0.15,
"learning_rate": 1.903675597383016e-06,
"loss": 0.7115,
"step": 3680
},
{
"epoch": 0.15,
"learning_rate": 1.9033994197160124e-06,
"loss": 0.6892,
"step": 3685
},
{
"epoch": 0.15,
"learning_rate": 1.903122866784746e-06,
"loss": 0.7278,
"step": 3690
},
{
"epoch": 0.15,
"learning_rate": 1.9028459387040944e-06,
"loss": 0.6844,
"step": 3695
},
{
"epoch": 0.15,
"learning_rate": 1.9025686355890916e-06,
"loss": 0.7143,
"step": 3700
},
{
"epoch": 0.15,
"learning_rate": 1.9022909575549265e-06,
"loss": 0.7161,
"step": 3705
},
{
"epoch": 0.15,
"learning_rate": 1.9020129047169443e-06,
"loss": 0.7013,
"step": 3710
},
{
"epoch": 0.15,
"learning_rate": 1.9017344771906463e-06,
"loss": 0.6588,
"step": 3715
},
{
"epoch": 0.15,
"learning_rate": 1.9014556750916879e-06,
"loss": 0.6451,
"step": 3720
},
{
"epoch": 0.15,
"learning_rate": 1.9011764985358817e-06,
"loss": 0.6991,
"step": 3725
},
{
"epoch": 0.15,
"learning_rate": 1.900896947639195e-06,
"loss": 0.6901,
"step": 3730
},
{
"epoch": 0.15,
"learning_rate": 1.9006170225177508e-06,
"loss": 0.7236,
"step": 3735
},
{
"epoch": 0.15,
"learning_rate": 1.9003367232878273e-06,
"loss": 0.6827,
"step": 3740
},
{
"epoch": 0.15,
"learning_rate": 1.9000560500658592e-06,
"loss": 0.6791,
"step": 3745
},
{
"epoch": 0.15,
"learning_rate": 1.8997750029684347e-06,
"loss": 0.6822,
"step": 3750
},
{
"epoch": 0.15,
"learning_rate": 1.899493582112299e-06,
"loss": 0.7139,
"step": 3755
},
{
"epoch": 0.15,
"learning_rate": 1.8992117876143516e-06,
"loss": 0.6901,
"step": 3760
},
{
"epoch": 0.15,
"learning_rate": 1.8989296195916476e-06,
"loss": 0.7012,
"step": 3765
},
{
"epoch": 0.15,
"learning_rate": 1.8986470781613973e-06,
"loss": 0.7305,
"step": 3770
},
{
"epoch": 0.15,
"learning_rate": 1.8983641634409656e-06,
"loss": 0.6812,
"step": 3775
},
{
"epoch": 0.15,
"learning_rate": 1.8980808755478726e-06,
"loss": 0.6781,
"step": 3780
},
{
"epoch": 0.15,
"learning_rate": 1.8977972145997945e-06,
"loss": 0.7271,
"step": 3785
},
{
"epoch": 0.15,
"learning_rate": 1.897513180714561e-06,
"loss": 0.7162,
"step": 3790
},
{
"epoch": 0.15,
"learning_rate": 1.8972287740101572e-06,
"loss": 0.6669,
"step": 3795
},
{
"epoch": 0.15,
"learning_rate": 1.8969439946047232e-06,
"loss": 0.7358,
"step": 3800
},
{
"epoch": 0.15,
"learning_rate": 1.8966588426165544e-06,
"loss": 0.7315,
"step": 3805
},
{
"epoch": 0.15,
"learning_rate": 1.8963733181640999e-06,
"loss": 0.7245,
"step": 3810
},
{
"epoch": 0.15,
"learning_rate": 1.8960874213659643e-06,
"loss": 0.7233,
"step": 3815
},
{
"epoch": 0.16,
"learning_rate": 1.8958011523409067e-06,
"loss": 0.7128,
"step": 3820
},
{
"epoch": 0.16,
"learning_rate": 1.8955145112078408e-06,
"loss": 0.6579,
"step": 3825
},
{
"epoch": 0.16,
"learning_rate": 1.8952274980858344e-06,
"loss": 0.7155,
"step": 3830
},
{
"epoch": 0.16,
"learning_rate": 1.8949401130941109e-06,
"loss": 0.6825,
"step": 3835
},
{
"epoch": 0.16,
"learning_rate": 1.894652356352047e-06,
"loss": 0.6838,
"step": 3840
},
{
"epoch": 0.16,
"learning_rate": 1.8943642279791747e-06,
"loss": 0.7339,
"step": 3845
},
{
"epoch": 0.16,
"learning_rate": 1.8940757280951799e-06,
"loss": 0.7334,
"step": 3850
},
{
"epoch": 0.16,
"learning_rate": 1.8937868568199026e-06,
"loss": 0.7113,
"step": 3855
},
{
"epoch": 0.16,
"learning_rate": 1.893497614273338e-06,
"loss": 0.7085,
"step": 3860
},
{
"epoch": 0.16,
"learning_rate": 1.8932080005756346e-06,
"loss": 0.7179,
"step": 3865
},
{
"epoch": 0.16,
"learning_rate": 1.8929180158470953e-06,
"loss": 0.6847,
"step": 3870
},
{
"epoch": 0.16,
"learning_rate": 1.8926276602081777e-06,
"loss": 0.7149,
"step": 3875
},
{
"epoch": 0.16,
"learning_rate": 1.8923369337794926e-06,
"loss": 0.6732,
"step": 3880
},
{
"epoch": 0.16,
"learning_rate": 1.8920458366818055e-06,
"loss": 0.6769,
"step": 3885
},
{
"epoch": 0.16,
"learning_rate": 1.8917543690360351e-06,
"loss": 0.6755,
"step": 3890
},
{
"epoch": 0.16,
"learning_rate": 1.8914625309632552e-06,
"loss": 0.7257,
"step": 3895
},
{
"epoch": 0.16,
"learning_rate": 1.8911703225846921e-06,
"loss": 0.6711,
"step": 3900
},
{
"epoch": 0.16,
"learning_rate": 1.8908777440217274e-06,
"loss": 0.6978,
"step": 3905
},
{
"epoch": 0.16,
"learning_rate": 1.8905847953958951e-06,
"loss": 0.706,
"step": 3910
},
{
"epoch": 0.16,
"learning_rate": 1.8902914768288837e-06,
"loss": 0.7315,
"step": 3915
},
{
"epoch": 0.16,
"learning_rate": 1.8899977884425353e-06,
"loss": 0.7385,
"step": 3920
},
{
"epoch": 0.16,
"learning_rate": 1.8897037303588452e-06,
"loss": 0.6868,
"step": 3925
},
{
"epoch": 0.16,
"learning_rate": 1.889409302699963e-06,
"loss": 0.6924,
"step": 3930
},
{
"epoch": 0.16,
"learning_rate": 1.8891145055881907e-06,
"loss": 0.6575,
"step": 3935
},
{
"epoch": 0.16,
"learning_rate": 1.8888193391459853e-06,
"loss": 0.7152,
"step": 3940
},
{
"epoch": 0.16,
"learning_rate": 1.8885238034959556e-06,
"loss": 0.6853,
"step": 3945
},
{
"epoch": 0.16,
"learning_rate": 1.8882278987608653e-06,
"loss": 0.6626,
"step": 3950
},
{
"epoch": 0.16,
"learning_rate": 1.8879316250636302e-06,
"loss": 0.6714,
"step": 3955
},
{
"epoch": 0.16,
"learning_rate": 1.8876349825273197e-06,
"loss": 0.6994,
"step": 3960
},
{
"epoch": 0.16,
"learning_rate": 1.8873379712751567e-06,
"loss": 0.7311,
"step": 3965
},
{
"epoch": 0.16,
"learning_rate": 1.8870405914305173e-06,
"loss": 0.7214,
"step": 3970
},
{
"epoch": 0.16,
"learning_rate": 1.88674284311693e-06,
"loss": 0.7004,
"step": 3975
},
{
"epoch": 0.16,
"learning_rate": 1.8864447264580776e-06,
"loss": 0.6956,
"step": 3980
},
{
"epoch": 0.16,
"learning_rate": 1.8861462415777942e-06,
"loss": 0.7378,
"step": 3985
},
{
"epoch": 0.16,
"learning_rate": 1.885847388600069e-06,
"loss": 0.7195,
"step": 3990
},
{
"epoch": 0.16,
"learning_rate": 1.8855481676490417e-06,
"loss": 0.6948,
"step": 3995
},
{
"epoch": 0.16,
"learning_rate": 1.885248578849007e-06,
"loss": 0.7367,
"step": 4000
},
{
"epoch": 0.16,
"eval_loss": 0.666739821434021,
"eval_runtime": 138.8501,
"eval_samples_per_second": 17.04,
"eval_steps_per_second": 2.845,
"step": 4000
},
{
"epoch": 0.16,
"learning_rate": 1.884948622324411e-06,
"loss": 0.6942,
"step": 4005
},
{
"epoch": 0.16,
"learning_rate": 1.884648298199853e-06,
"loss": 0.6911,
"step": 4010
},
{
"epoch": 0.16,
"learning_rate": 1.8843476066000856e-06,
"loss": 0.7338,
"step": 4015
},
{
"epoch": 0.16,
"learning_rate": 1.884046547650013e-06,
"loss": 0.6914,
"step": 4020
},
{
"epoch": 0.16,
"learning_rate": 1.8837451214746922e-06,
"loss": 0.6998,
"step": 4025
},
{
"epoch": 0.16,
"learning_rate": 1.8834433281993336e-06,
"loss": 0.7024,
"step": 4030
},
{
"epoch": 0.16,
"learning_rate": 1.8831411679492992e-06,
"loss": 0.6748,
"step": 4035
},
{
"epoch": 0.16,
"learning_rate": 1.882838640850104e-06,
"loss": 0.687,
"step": 4040
},
{
"epoch": 0.16,
"learning_rate": 1.8825357470274148e-06,
"loss": 0.661,
"step": 4045
},
{
"epoch": 0.16,
"learning_rate": 1.8822324866070512e-06,
"loss": 0.7209,
"step": 4050
},
{
"epoch": 0.16,
"learning_rate": 1.8819288597149846e-06,
"loss": 0.6597,
"step": 4055
},
{
"epoch": 0.16,
"learning_rate": 1.88162486647734e-06,
"loss": 0.6923,
"step": 4060
},
{
"epoch": 0.17,
"learning_rate": 1.8813205070203924e-06,
"loss": 0.6875,
"step": 4065
},
{
"epoch": 0.17,
"learning_rate": 1.8810157814705705e-06,
"loss": 0.7085,
"step": 4070
},
{
"epoch": 0.17,
"learning_rate": 1.8807106899544547e-06,
"loss": 0.6786,
"step": 4075
},
{
"epoch": 0.17,
"learning_rate": 1.8804052325987775e-06,
"loss": 0.7078,
"step": 4080
},
{
"epoch": 0.17,
"learning_rate": 1.8800994095304227e-06,
"loss": 0.6945,
"step": 4085
},
{
"epoch": 0.17,
"learning_rate": 1.8797932208764273e-06,
"loss": 0.7232,
"step": 4090
},
{
"epoch": 0.17,
"learning_rate": 1.8794866667639791e-06,
"loss": 0.6876,
"step": 4095
},
{
"epoch": 0.17,
"learning_rate": 1.8791797473204176e-06,
"loss": 0.7299,
"step": 4100
},
{
"epoch": 0.17,
"learning_rate": 1.8788724626732347e-06,
"loss": 0.6689,
"step": 4105
},
{
"epoch": 0.17,
"learning_rate": 1.878564812950074e-06,
"loss": 0.7345,
"step": 4110
},
{
"epoch": 0.17,
"learning_rate": 1.8782567982787302e-06,
"loss": 0.7271,
"step": 4115
},
{
"epoch": 0.17,
"learning_rate": 1.8779484187871504e-06,
"loss": 0.7328,
"step": 4120
},
{
"epoch": 0.17,
"learning_rate": 1.8776396746034324e-06,
"loss": 0.7043,
"step": 4125
},
{
"epoch": 0.17,
"learning_rate": 1.8773305658558258e-06,
"loss": 0.6841,
"step": 4130
},
{
"epoch": 0.17,
"learning_rate": 1.8770210926727316e-06,
"loss": 0.7154,
"step": 4135
},
{
"epoch": 0.17,
"learning_rate": 1.8767112551827027e-06,
"loss": 0.7329,
"step": 4140
},
{
"epoch": 0.17,
"learning_rate": 1.8764010535144426e-06,
"loss": 0.7226,
"step": 4145
},
{
"epoch": 0.17,
"learning_rate": 1.8760904877968065e-06,
"loss": 0.6958,
"step": 4150
},
{
"epoch": 0.17,
"learning_rate": 1.8757795581588005e-06,
"loss": 0.7373,
"step": 4155
},
{
"epoch": 0.17,
"learning_rate": 1.8754682647295822e-06,
"loss": 0.7313,
"step": 4160
},
{
"epoch": 0.17,
"learning_rate": 1.87515660763846e-06,
"loss": 0.7333,
"step": 4165
},
{
"epoch": 0.17,
"learning_rate": 1.8748445870148941e-06,
"loss": 0.6882,
"step": 4170
},
{
"epoch": 0.17,
"learning_rate": 1.8745322029884946e-06,
"loss": 0.681,
"step": 4175
},
{
"epoch": 0.17,
"learning_rate": 1.8742194556890233e-06,
"loss": 0.7227,
"step": 4180
},
{
"epoch": 0.17,
"learning_rate": 1.8739063452463926e-06,
"loss": 0.7047,
"step": 4185
},
{
"epoch": 0.17,
"learning_rate": 1.873592871790666e-06,
"loss": 0.6831,
"step": 4190
},
{
"epoch": 0.17,
"learning_rate": 1.8732790354520579e-06,
"loss": 0.6887,
"step": 4195
},
{
"epoch": 0.17,
"learning_rate": 1.8729648363609324e-06,
"loss": 0.6833,
"step": 4200
},
{
"epoch": 0.17,
"learning_rate": 1.8726502746478058e-06,
"loss": 0.6928,
"step": 4205
},
{
"epoch": 0.17,
"learning_rate": 1.872335350443344e-06,
"loss": 0.7192,
"step": 4210
},
{
"epoch": 0.17,
"learning_rate": 1.872020063878364e-06,
"loss": 0.6792,
"step": 4215
},
{
"epoch": 0.17,
"learning_rate": 1.8717044150838326e-06,
"loss": 0.6942,
"step": 4220
},
{
"epoch": 0.17,
"learning_rate": 1.871388404190868e-06,
"loss": 0.733,
"step": 4225
},
{
"epoch": 0.17,
"learning_rate": 1.8710720313307382e-06,
"loss": 0.6587,
"step": 4230
},
{
"epoch": 0.17,
"learning_rate": 1.8707552966348618e-06,
"loss": 0.7068,
"step": 4235
},
{
"epoch": 0.17,
"learning_rate": 1.870438200234808e-06,
"loss": 0.7121,
"step": 4240
},
{
"epoch": 0.17,
"learning_rate": 1.8701207422622951e-06,
"loss": 0.6572,
"step": 4245
},
{
"epoch": 0.17,
"learning_rate": 1.869802922849193e-06,
"loss": 0.6988,
"step": 4250
},
{
"epoch": 0.17,
"learning_rate": 1.8694847421275206e-06,
"loss": 0.7009,
"step": 4255
},
{
"epoch": 0.17,
"learning_rate": 1.8691662002294481e-06,
"loss": 0.7128,
"step": 4260
},
{
"epoch": 0.17,
"learning_rate": 1.8688472972872947e-06,
"loss": 0.6869,
"step": 4265
},
{
"epoch": 0.17,
"learning_rate": 1.8685280334335296e-06,
"loss": 0.6775,
"step": 4270
},
{
"epoch": 0.17,
"learning_rate": 1.8682084088007728e-06,
"loss": 0.6787,
"step": 4275
},
{
"epoch": 0.17,
"learning_rate": 1.8678884235217928e-06,
"loss": 0.7285,
"step": 4280
},
{
"epoch": 0.17,
"learning_rate": 1.8675680777295097e-06,
"loss": 0.6764,
"step": 4285
},
{
"epoch": 0.17,
"learning_rate": 1.8672473715569916e-06,
"loss": 0.7242,
"step": 4290
},
{
"epoch": 0.17,
"learning_rate": 1.8669263051374572e-06,
"loss": 0.6771,
"step": 4295
},
{
"epoch": 0.17,
"learning_rate": 1.8666048786042752e-06,
"loss": 0.7394,
"step": 4300
},
{
"epoch": 0.17,
"learning_rate": 1.866283092090963e-06,
"loss": 0.6957,
"step": 4305
},
{
"epoch": 0.18,
"learning_rate": 1.8659609457311875e-06,
"loss": 0.6816,
"step": 4310
},
{
"epoch": 0.18,
"learning_rate": 1.865638439658766e-06,
"loss": 0.6952,
"step": 4315
},
{
"epoch": 0.18,
"learning_rate": 1.8653155740076647e-06,
"loss": 0.6754,
"step": 4320
},
{
"epoch": 0.18,
"learning_rate": 1.8649923489119992e-06,
"loss": 0.7266,
"step": 4325
},
{
"epoch": 0.18,
"learning_rate": 1.8646687645060343e-06,
"loss": 0.7218,
"step": 4330
},
{
"epoch": 0.18,
"learning_rate": 1.8643448209241841e-06,
"loss": 0.6779,
"step": 4335
},
{
"epoch": 0.18,
"learning_rate": 1.864020518301012e-06,
"loss": 0.7003,
"step": 4340
},
{
"epoch": 0.18,
"learning_rate": 1.8636958567712303e-06,
"loss": 0.7015,
"step": 4345
},
{
"epoch": 0.18,
"learning_rate": 1.8633708364697013e-06,
"loss": 0.6808,
"step": 4350
},
{
"epoch": 0.18,
"learning_rate": 1.8630454575314344e-06,
"loss": 0.6816,
"step": 4355
},
{
"epoch": 0.18,
"learning_rate": 1.8627197200915902e-06,
"loss": 0.7126,
"step": 4360
},
{
"epoch": 0.18,
"learning_rate": 1.862393624285477e-06,
"loss": 0.6838,
"step": 4365
},
{
"epoch": 0.18,
"learning_rate": 1.8620671702485517e-06,
"loss": 0.6578,
"step": 4370
},
{
"epoch": 0.18,
"learning_rate": 1.861740358116421e-06,
"loss": 0.7344,
"step": 4375
},
{
"epoch": 0.18,
"learning_rate": 1.8614131880248393e-06,
"loss": 0.7172,
"step": 4380
},
{
"epoch": 0.18,
"learning_rate": 1.8610856601097108e-06,
"loss": 0.7621,
"step": 4385
},
{
"epoch": 0.18,
"learning_rate": 1.8607577745070873e-06,
"loss": 0.7071,
"step": 4390
},
{
"epoch": 0.18,
"learning_rate": 1.8604295313531698e-06,
"loss": 0.6889,
"step": 4395
},
{
"epoch": 0.18,
"learning_rate": 1.8601009307843078e-06,
"loss": 0.6992,
"step": 4400
},
{
"epoch": 0.18,
"learning_rate": 1.8597719729369988e-06,
"loss": 0.6843,
"step": 4405
},
{
"epoch": 0.18,
"learning_rate": 1.8594426579478891e-06,
"loss": 0.6791,
"step": 4410
},
{
"epoch": 0.18,
"learning_rate": 1.8591129859537738e-06,
"loss": 0.7045,
"step": 4415
},
{
"epoch": 0.18,
"learning_rate": 1.858782957091595e-06,
"loss": 0.6975,
"step": 4420
},
{
"epoch": 0.18,
"learning_rate": 1.858452571498444e-06,
"loss": 0.7386,
"step": 4425
},
{
"epoch": 0.18,
"learning_rate": 1.8581218293115607e-06,
"loss": 0.6974,
"step": 4430
},
{
"epoch": 0.18,
"learning_rate": 1.8577907306683317e-06,
"loss": 0.7313,
"step": 4435
},
{
"epoch": 0.18,
"learning_rate": 1.857459275706293e-06,
"loss": 0.6941,
"step": 4440
},
{
"epoch": 0.18,
"learning_rate": 1.8571274645631281e-06,
"loss": 0.7037,
"step": 4445
},
{
"epoch": 0.18,
"learning_rate": 1.8567952973766685e-06,
"loss": 0.655,
"step": 4450
},
{
"epoch": 0.18,
"learning_rate": 1.856462774284893e-06,
"loss": 0.712,
"step": 4455
},
{
"epoch": 0.18,
"learning_rate": 1.8561298954259297e-06,
"loss": 0.68,
"step": 4460
},
{
"epoch": 0.18,
"learning_rate": 1.8557966609380528e-06,
"loss": 0.6917,
"step": 4465
},
{
"epoch": 0.18,
"learning_rate": 1.8554630709596855e-06,
"loss": 0.6719,
"step": 4470
},
{
"epoch": 0.18,
"learning_rate": 1.8551291256293977e-06,
"loss": 0.7117,
"step": 4475
},
{
"epoch": 0.18,
"learning_rate": 1.854794825085908e-06,
"loss": 0.7157,
"step": 4480
},
{
"epoch": 0.18,
"learning_rate": 1.8544601694680814e-06,
"loss": 0.7147,
"step": 4485
},
{
"epoch": 0.18,
"learning_rate": 1.8541251589149313e-06,
"loss": 0.6964,
"step": 4490
},
{
"epoch": 0.18,
"learning_rate": 1.853789793565618e-06,
"loss": 0.7083,
"step": 4495
},
{
"epoch": 0.18,
"learning_rate": 1.8534540735594492e-06,
"loss": 0.6735,
"step": 4500
},
{
"epoch": 0.18,
"learning_rate": 1.8531179990358806e-06,
"loss": 0.6719,
"step": 4505
},
{
"epoch": 0.18,
"learning_rate": 1.852781570134514e-06,
"loss": 0.6729,
"step": 4510
},
{
"epoch": 0.18,
"learning_rate": 1.8524447869950995e-06,
"loss": 0.6988,
"step": 4515
},
{
"epoch": 0.18,
"learning_rate": 1.8521076497575335e-06,
"loss": 0.6907,
"step": 4520
},
{
"epoch": 0.18,
"learning_rate": 1.8517701585618602e-06,
"loss": 0.7268,
"step": 4525
},
{
"epoch": 0.18,
"learning_rate": 1.8514323135482704e-06,
"loss": 0.7261,
"step": 4530
},
{
"epoch": 0.18,
"learning_rate": 1.8510941148571018e-06,
"loss": 0.6763,
"step": 4535
},
{
"epoch": 0.18,
"learning_rate": 1.8507555626288397e-06,
"loss": 0.6801,
"step": 4540
},
{
"epoch": 0.18,
"learning_rate": 1.8504166570041152e-06,
"loss": 0.7502,
"step": 4545
},
{
"epoch": 0.18,
"learning_rate": 1.8500773981237069e-06,
"loss": 0.6682,
"step": 4550
},
{
"epoch": 0.19,
"learning_rate": 1.8497377861285401e-06,
"loss": 0.709,
"step": 4555
},
{
"epoch": 0.19,
"learning_rate": 1.8493978211596865e-06,
"loss": 0.7001,
"step": 4560
},
{
"epoch": 0.19,
"learning_rate": 1.849057503358365e-06,
"loss": 0.7111,
"step": 4565
},
{
"epoch": 0.19,
"learning_rate": 1.8487168328659403e-06,
"loss": 0.701,
"step": 4570
},
{
"epoch": 0.19,
"learning_rate": 1.8483758098239237e-06,
"loss": 0.691,
"step": 4575
},
{
"epoch": 0.19,
"learning_rate": 1.8480344343739738e-06,
"loss": 0.6979,
"step": 4580
},
{
"epoch": 0.19,
"learning_rate": 1.8476927066578946e-06,
"loss": 0.7074,
"step": 4585
},
{
"epoch": 0.19,
"learning_rate": 1.8473506268176372e-06,
"loss": 0.7229,
"step": 4590
},
{
"epoch": 0.19,
"learning_rate": 1.8470081949952982e-06,
"loss": 0.7015,
"step": 4595
},
{
"epoch": 0.19,
"learning_rate": 1.846665411333121e-06,
"loss": 0.7329,
"step": 4600
},
{
"epoch": 0.19,
"learning_rate": 1.8463222759734948e-06,
"loss": 0.6908,
"step": 4605
},
{
"epoch": 0.19,
"learning_rate": 1.8459787890589554e-06,
"loss": 0.7381,
"step": 4610
},
{
"epoch": 0.19,
"learning_rate": 1.8456349507321836e-06,
"loss": 0.6663,
"step": 4615
},
{
"epoch": 0.19,
"learning_rate": 1.8452907611360076e-06,
"loss": 0.7021,
"step": 4620
},
{
"epoch": 0.19,
"learning_rate": 1.8449462204134002e-06,
"loss": 0.6826,
"step": 4625
},
{
"epoch": 0.19,
"learning_rate": 1.8446013287074811e-06,
"loss": 0.6959,
"step": 4630
},
{
"epoch": 0.19,
"learning_rate": 1.8442560861615148e-06,
"loss": 0.7348,
"step": 4635
},
{
"epoch": 0.19,
"learning_rate": 1.8439104929189124e-06,
"loss": 0.6699,
"step": 4640
},
{
"epoch": 0.19,
"learning_rate": 1.8435645491232307e-06,
"loss": 0.7354,
"step": 4645
},
{
"epoch": 0.19,
"learning_rate": 1.8432182549181707e-06,
"loss": 0.6721,
"step": 4650
},
{
"epoch": 0.19,
"learning_rate": 1.8428716104475806e-06,
"loss": 0.7104,
"step": 4655
},
{
"epoch": 0.19,
"learning_rate": 1.8425246158554537e-06,
"loss": 0.6808,
"step": 4660
},
{
"epoch": 0.19,
"learning_rate": 1.8421772712859282e-06,
"loss": 0.6941,
"step": 4665
},
{
"epoch": 0.19,
"learning_rate": 1.8418295768832883e-06,
"loss": 0.687,
"step": 4670
},
{
"epoch": 0.19,
"learning_rate": 1.841481532791963e-06,
"loss": 0.6894,
"step": 4675
},
{
"epoch": 0.19,
"learning_rate": 1.841133139156527e-06,
"loss": 0.6663,
"step": 4680
},
{
"epoch": 0.19,
"learning_rate": 1.8407843961216995e-06,
"loss": 0.6423,
"step": 4685
},
{
"epoch": 0.19,
"learning_rate": 1.8404353038323459e-06,
"loss": 0.7073,
"step": 4690
},
{
"epoch": 0.19,
"learning_rate": 1.8400858624334758e-06,
"loss": 0.7149,
"step": 4695
},
{
"epoch": 0.19,
"learning_rate": 1.8397360720702442e-06,
"loss": 0.7024,
"step": 4700
},
{
"epoch": 0.19,
"learning_rate": 1.8393859328879511e-06,
"loss": 0.7039,
"step": 4705
},
{
"epoch": 0.19,
"learning_rate": 1.839035445032041e-06,
"loss": 0.6758,
"step": 4710
},
{
"epoch": 0.19,
"learning_rate": 1.8386846086481036e-06,
"loss": 0.6833,
"step": 4715
},
{
"epoch": 0.19,
"learning_rate": 1.8383334238818736e-06,
"loss": 0.6996,
"step": 4720
},
{
"epoch": 0.19,
"learning_rate": 1.8379818908792295e-06,
"loss": 0.692,
"step": 4725
},
{
"epoch": 0.19,
"learning_rate": 1.8376300097861953e-06,
"loss": 0.7099,
"step": 4730
},
{
"epoch": 0.19,
"learning_rate": 1.8372777807489396e-06,
"loss": 0.6859,
"step": 4735
},
{
"epoch": 0.19,
"learning_rate": 1.8369252039137753e-06,
"loss": 0.696,
"step": 4740
},
{
"epoch": 0.19,
"learning_rate": 1.8365722794271594e-06,
"loss": 0.6713,
"step": 4745
},
{
"epoch": 0.19,
"learning_rate": 1.8362190074356935e-06,
"loss": 0.7215,
"step": 4750
},
{
"epoch": 0.19,
"learning_rate": 1.8358653880861245e-06,
"loss": 0.7001,
"step": 4755
},
{
"epoch": 0.19,
"learning_rate": 1.835511421525342e-06,
"loss": 0.727,
"step": 4760
},
{
"epoch": 0.19,
"learning_rate": 1.8351571079003812e-06,
"loss": 0.6764,
"step": 4765
},
{
"epoch": 0.19,
"learning_rate": 1.8348024473584208e-06,
"loss": 0.6667,
"step": 4770
},
{
"epoch": 0.19,
"learning_rate": 1.834447440046783e-06,
"loss": 0.7291,
"step": 4775
},
{
"epoch": 0.19,
"learning_rate": 1.8340920861129358e-06,
"loss": 0.6699,
"step": 4780
},
{
"epoch": 0.19,
"learning_rate": 1.8337363857044894e-06,
"loss": 0.6825,
"step": 4785
},
{
"epoch": 0.19,
"learning_rate": 1.833380338969199e-06,
"loss": 0.6954,
"step": 4790
},
{
"epoch": 0.19,
"learning_rate": 1.833023946054963e-06,
"loss": 0.7002,
"step": 4795
},
{
"epoch": 0.19,
"learning_rate": 1.8326672071098246e-06,
"loss": 0.699,
"step": 4800
},
{
"epoch": 0.19,
"eval_loss": 0.6643534302711487,
"eval_runtime": 138.4591,
"eval_samples_per_second": 17.088,
"eval_steps_per_second": 2.853,
"step": 4800
},
{
"epoch": 0.2,
"learning_rate": 1.8323101222819693e-06,
"loss": 0.677,
"step": 4805
},
{
"epoch": 0.2,
"learning_rate": 1.8319526917197275e-06,
"loss": 0.7381,
"step": 4810
},
{
"epoch": 0.2,
"learning_rate": 1.8315949155715722e-06,
"loss": 0.6681,
"step": 4815
},
{
"epoch": 0.2,
"learning_rate": 1.8312367939861214e-06,
"loss": 0.6696,
"step": 4820
},
{
"epoch": 0.2,
"learning_rate": 1.8308783271121346e-06,
"loss": 0.713,
"step": 4825
},
{
"epoch": 0.2,
"learning_rate": 1.830519515098517e-06,
"loss": 0.7155,
"step": 4830
},
{
"epoch": 0.2,
"learning_rate": 1.8301603580943148e-06,
"loss": 0.694,
"step": 4835
},
{
"epoch": 0.2,
"learning_rate": 1.8298008562487195e-06,
"loss": 0.7343,
"step": 4840
},
{
"epoch": 0.2,
"learning_rate": 1.829441009711065e-06,
"loss": 0.7173,
"step": 4845
},
{
"epoch": 0.2,
"learning_rate": 1.8290808186308276e-06,
"loss": 0.7167,
"step": 4850
},
{
"epoch": 0.2,
"learning_rate": 1.828720283157629e-06,
"loss": 0.7265,
"step": 4855
},
{
"epoch": 0.2,
"learning_rate": 1.8283594034412313e-06,
"loss": 0.6793,
"step": 4860
},
{
"epoch": 0.2,
"learning_rate": 1.8279981796315412e-06,
"loss": 0.6807,
"step": 4865
},
{
"epoch": 0.2,
"learning_rate": 1.8276366118786078e-06,
"loss": 0.6737,
"step": 4870
},
{
"epoch": 0.2,
"learning_rate": 1.8272747003326235e-06,
"loss": 0.7202,
"step": 4875
},
{
"epoch": 0.2,
"learning_rate": 1.8269124451439231e-06,
"loss": 0.7226,
"step": 4880
},
{
"epoch": 0.2,
"learning_rate": 1.8265498464629837e-06,
"loss": 0.6772,
"step": 4885
},
{
"epoch": 0.2,
"learning_rate": 1.8261869044404265e-06,
"loss": 0.6868,
"step": 4890
},
{
"epoch": 0.2,
"learning_rate": 1.825823619227014e-06,
"loss": 0.703,
"step": 4895
},
{
"epoch": 0.2,
"learning_rate": 1.825459990973652e-06,
"loss": 0.7193,
"step": 4900
},
{
"epoch": 0.2,
"learning_rate": 1.8250960198313878e-06,
"loss": 0.7194,
"step": 4905
},
{
"epoch": 0.2,
"learning_rate": 1.8247317059514126e-06,
"loss": 0.6859,
"step": 4910
},
{
"epoch": 0.2,
"learning_rate": 1.8243670494850592e-06,
"loss": 0.6749,
"step": 4915
},
{
"epoch": 0.2,
"learning_rate": 1.824002050583802e-06,
"loss": 0.6542,
"step": 4920
},
{
"epoch": 0.2,
"learning_rate": 1.8236367093992592e-06,
"loss": 0.6592,
"step": 4925
},
{
"epoch": 0.2,
"learning_rate": 1.82327102608319e-06,
"loss": 0.6788,
"step": 4930
},
{
"epoch": 0.2,
"learning_rate": 1.8229050007874961e-06,
"loss": 0.6909,
"step": 4935
},
{
"epoch": 0.2,
"learning_rate": 1.822538633664221e-06,
"loss": 0.7105,
"step": 4940
},
{
"epoch": 0.2,
"learning_rate": 1.8221719248655507e-06,
"loss": 0.6731,
"step": 4945
},
{
"epoch": 0.2,
"learning_rate": 1.821804874543813e-06,
"loss": 0.685,
"step": 4950
},
{
"epoch": 0.2,
"learning_rate": 1.821437482851477e-06,
"loss": 0.7251,
"step": 4955
},
{
"epoch": 0.2,
"learning_rate": 1.821069749941154e-06,
"loss": 0.6517,
"step": 4960
},
{
"epoch": 0.2,
"learning_rate": 1.8207016759655973e-06,
"loss": 0.704,
"step": 4965
},
{
"epoch": 0.2,
"learning_rate": 1.8203332610777017e-06,
"loss": 0.7063,
"step": 4970
},
{
"epoch": 0.2,
"learning_rate": 1.8199645054305037e-06,
"loss": 0.7208,
"step": 4975
},
{
"epoch": 0.2,
"learning_rate": 1.8195954091771805e-06,
"loss": 0.7339,
"step": 4980
},
{
"epoch": 0.2,
"learning_rate": 1.8192259724710518e-06,
"loss": 0.6627,
"step": 4985
},
{
"epoch": 0.2,
"learning_rate": 1.818856195465579e-06,
"loss": 0.7021,
"step": 4990
},
{
"epoch": 0.2,
"learning_rate": 1.8184860783143635e-06,
"loss": 0.7108,
"step": 4995
},
{
"epoch": 0.2,
"learning_rate": 1.8181156211711488e-06,
"loss": 0.639,
"step": 5000
},
{
"epoch": 0.2,
"learning_rate": 1.8177448241898196e-06,
"loss": 0.7256,
"step": 5005
},
{
"epoch": 0.2,
"learning_rate": 1.817373687524402e-06,
"loss": 0.6977,
"step": 5010
},
{
"epoch": 0.2,
"learning_rate": 1.817002211329063e-06,
"loss": 0.6984,
"step": 5015
},
{
"epoch": 0.2,
"learning_rate": 1.8166303957581103e-06,
"loss": 0.6875,
"step": 5020
},
{
"epoch": 0.2,
"learning_rate": 1.8162582409659932e-06,
"loss": 0.6602,
"step": 5025
},
{
"epoch": 0.2,
"learning_rate": 1.815885747107301e-06,
"loss": 0.6819,
"step": 5030
},
{
"epoch": 0.2,
"learning_rate": 1.815512914336765e-06,
"loss": 0.6843,
"step": 5035
},
{
"epoch": 0.2,
"learning_rate": 1.8151397428092563e-06,
"loss": 0.7079,
"step": 5040
},
{
"epoch": 0.2,
"learning_rate": 1.8147662326797872e-06,
"loss": 0.7022,
"step": 5045
},
{
"epoch": 0.21,
"learning_rate": 1.8143923841035107e-06,
"loss": 0.6897,
"step": 5050
},
{
"epoch": 0.21,
"learning_rate": 1.8140181972357201e-06,
"loss": 0.6774,
"step": 5055
},
{
"epoch": 0.21,
"learning_rate": 1.8136436722318496e-06,
"loss": 0.6877,
"step": 5060
},
{
"epoch": 0.21,
"learning_rate": 1.813268809247473e-06,
"loss": 0.6957,
"step": 5065
},
{
"epoch": 0.21,
"learning_rate": 1.8128936084383058e-06,
"loss": 0.7274,
"step": 5070
},
{
"epoch": 0.21,
"learning_rate": 1.812518069960203e-06,
"loss": 0.6767,
"step": 5075
},
{
"epoch": 0.21,
"learning_rate": 1.8121421939691599e-06,
"loss": 0.6857,
"step": 5080
},
{
"epoch": 0.21,
"learning_rate": 1.8117659806213122e-06,
"loss": 0.6637,
"step": 5085
},
{
"epoch": 0.21,
"learning_rate": 1.8113894300729356e-06,
"loss": 0.6777,
"step": 5090
},
{
"epoch": 0.21,
"learning_rate": 1.8110125424804458e-06,
"loss": 0.6666,
"step": 5095
},
{
"epoch": 0.21,
"learning_rate": 1.8106353180003988e-06,
"loss": 0.6811,
"step": 5100
},
{
"epoch": 0.21,
"learning_rate": 1.8102577567894905e-06,
"loss": 0.6735,
"step": 5105
},
{
"epoch": 0.21,
"learning_rate": 1.8098798590045563e-06,
"loss": 0.7543,
"step": 5110
},
{
"epoch": 0.21,
"learning_rate": 1.8095016248025717e-06,
"loss": 0.6957,
"step": 5115
},
{
"epoch": 0.21,
"learning_rate": 1.809123054340652e-06,
"loss": 0.6969,
"step": 5120
},
{
"epoch": 0.21,
"learning_rate": 1.8087441477760517e-06,
"loss": 0.6775,
"step": 5125
},
{
"epoch": 0.21,
"learning_rate": 1.8083649052661661e-06,
"loss": 0.6794,
"step": 5130
},
{
"epoch": 0.21,
"learning_rate": 1.8079853269685282e-06,
"loss": 0.6253,
"step": 5135
},
{
"epoch": 0.21,
"learning_rate": 1.8076054130408123e-06,
"loss": 0.7048,
"step": 5140
},
{
"epoch": 0.21,
"learning_rate": 1.8072251636408314e-06,
"loss": 0.681,
"step": 5145
},
{
"epoch": 0.21,
"learning_rate": 1.806844578926537e-06,
"loss": 0.6644,
"step": 5150
},
{
"epoch": 0.21,
"learning_rate": 1.8064636590560215e-06,
"loss": 0.6972,
"step": 5155
},
{
"epoch": 0.21,
"learning_rate": 1.8060824041875154e-06,
"loss": 0.6831,
"step": 5160
},
{
"epoch": 0.21,
"learning_rate": 1.8057008144793882e-06,
"loss": 0.6738,
"step": 5165
},
{
"epoch": 0.21,
"learning_rate": 1.8053188900901497e-06,
"loss": 0.7293,
"step": 5170
},
{
"epoch": 0.21,
"learning_rate": 1.8049366311784477e-06,
"loss": 0.6975,
"step": 5175
},
{
"epoch": 0.21,
"learning_rate": 1.8045540379030691e-06,
"loss": 0.7275,
"step": 5180
},
{
"epoch": 0.21,
"learning_rate": 1.8041711104229401e-06,
"loss": 0.6867,
"step": 5185
},
{
"epoch": 0.21,
"learning_rate": 1.803787848897125e-06,
"loss": 0.6857,
"step": 5190
},
{
"epoch": 0.21,
"learning_rate": 1.8034042534848277e-06,
"loss": 0.6861,
"step": 5195
},
{
"epoch": 0.21,
"learning_rate": 1.8030203243453901e-06,
"loss": 0.6732,
"step": 5200
},
{
"epoch": 0.21,
"learning_rate": 1.8026360616382933e-06,
"loss": 0.7133,
"step": 5205
},
{
"epoch": 0.21,
"learning_rate": 1.8022514655231565e-06,
"loss": 0.7196,
"step": 5210
},
{
"epoch": 0.21,
"learning_rate": 1.8018665361597373e-06,
"loss": 0.6929,
"step": 5215
},
{
"epoch": 0.21,
"learning_rate": 1.801481273707933e-06,
"loss": 0.7227,
"step": 5220
},
{
"epoch": 0.21,
"learning_rate": 1.8010956783277772e-06,
"loss": 0.6655,
"step": 5225
},
{
"epoch": 0.21,
"learning_rate": 1.8007097501794435e-06,
"loss": 0.7454,
"step": 5230
},
{
"epoch": 0.21,
"learning_rate": 1.8003234894232426e-06,
"loss": 0.6815,
"step": 5235
},
{
"epoch": 0.21,
"learning_rate": 1.7999368962196243e-06,
"loss": 0.7456,
"step": 5240
},
{
"epoch": 0.21,
"learning_rate": 1.7995499707291762e-06,
"loss": 0.6822,
"step": 5245
},
{
"epoch": 0.21,
"learning_rate": 1.799162713112623e-06,
"loss": 0.7083,
"step": 5250
},
{
"epoch": 0.21,
"learning_rate": 1.798775123530829e-06,
"loss": 0.6874,
"step": 5255
},
{
"epoch": 0.21,
"learning_rate": 1.798387202144795e-06,
"loss": 0.6864,
"step": 5260
},
{
"epoch": 0.21,
"learning_rate": 1.7979989491156603e-06,
"loss": 0.6991,
"step": 5265
},
{
"epoch": 0.21,
"learning_rate": 1.797610364604702e-06,
"loss": 0.7077,
"step": 5270
},
{
"epoch": 0.21,
"learning_rate": 1.7972214487733345e-06,
"loss": 0.6461,
"step": 5275
},
{
"epoch": 0.21,
"learning_rate": 1.7968322017831102e-06,
"loss": 0.6968,
"step": 5280
},
{
"epoch": 0.21,
"learning_rate": 1.7964426237957188e-06,
"loss": 0.6635,
"step": 5285
},
{
"epoch": 0.21,
"learning_rate": 1.7960527149729878e-06,
"loss": 0.684,
"step": 5290
},
{
"epoch": 0.22,
"learning_rate": 1.7956624754768818e-06,
"loss": 0.6784,
"step": 5295
},
{
"epoch": 0.22,
"learning_rate": 1.795271905469503e-06,
"loss": 0.7481,
"step": 5300
},
{
"epoch": 0.22,
"learning_rate": 1.7948810051130905e-06,
"loss": 0.683,
"step": 5305
},
{
"epoch": 0.22,
"learning_rate": 1.794489774570021e-06,
"loss": 0.6661,
"step": 5310
},
{
"epoch": 0.22,
"learning_rate": 1.7940982140028087e-06,
"loss": 0.7184,
"step": 5315
},
{
"epoch": 0.22,
"learning_rate": 1.793706323574104e-06,
"loss": 0.6612,
"step": 5320
},
{
"epoch": 0.22,
"learning_rate": 1.7933141034466948e-06,
"loss": 0.703,
"step": 5325
},
{
"epoch": 0.22,
"learning_rate": 1.792921553783506e-06,
"loss": 0.7108,
"step": 5330
},
{
"epoch": 0.22,
"learning_rate": 1.7925286747475994e-06,
"loss": 0.7301,
"step": 5335
},
{
"epoch": 0.22,
"learning_rate": 1.7921354665021735e-06,
"loss": 0.6704,
"step": 5340
},
{
"epoch": 0.22,
"learning_rate": 1.7917419292105636e-06,
"loss": 0.7065,
"step": 5345
},
{
"epoch": 0.22,
"learning_rate": 1.7913480630362417e-06,
"loss": 0.7033,
"step": 5350
},
{
"epoch": 0.22,
"learning_rate": 1.790953868142816e-06,
"loss": 0.7258,
"step": 5355
},
{
"epoch": 0.22,
"learning_rate": 1.790559344694032e-06,
"loss": 0.6887,
"step": 5360
},
{
"epoch": 0.22,
"learning_rate": 1.7901644928537715e-06,
"loss": 0.7195,
"step": 5365
},
{
"epoch": 0.22,
"learning_rate": 1.7897693127860524e-06,
"loss": 0.7058,
"step": 5370
},
{
"epoch": 0.22,
"learning_rate": 1.7893738046550286e-06,
"loss": 0.7177,
"step": 5375
},
{
"epoch": 0.22,
"learning_rate": 1.7889779686249912e-06,
"loss": 0.6891,
"step": 5380
},
{
"epoch": 0.22,
"learning_rate": 1.788581804860367e-06,
"loss": 0.6551,
"step": 5385
},
{
"epoch": 0.22,
"learning_rate": 1.7881853135257185e-06,
"loss": 0.7061,
"step": 5390
},
{
"epoch": 0.22,
"learning_rate": 1.7877884947857455e-06,
"loss": 0.6706,
"step": 5395
},
{
"epoch": 0.22,
"learning_rate": 1.7873913488052827e-06,
"loss": 0.6719,
"step": 5400
},
{
"epoch": 0.22,
"learning_rate": 1.7869938757493011e-06,
"loss": 0.7022,
"step": 5405
},
{
"epoch": 0.22,
"learning_rate": 1.7865960757829075e-06,
"loss": 0.676,
"step": 5410
},
{
"epoch": 0.22,
"learning_rate": 1.7861979490713445e-06,
"loss": 0.7427,
"step": 5415
},
{
"epoch": 0.22,
"learning_rate": 1.7857994957799906e-06,
"loss": 0.698,
"step": 5420
},
{
"epoch": 0.22,
"learning_rate": 1.78540071607436e-06,
"loss": 0.7023,
"step": 5425
},
{
"epoch": 0.22,
"learning_rate": 1.785001610120102e-06,
"loss": 0.6985,
"step": 5430
},
{
"epoch": 0.22,
"learning_rate": 1.784602178083002e-06,
"loss": 0.7192,
"step": 5435
},
{
"epoch": 0.22,
"learning_rate": 1.7842024201289801e-06,
"loss": 0.6682,
"step": 5440
},
{
"epoch": 0.22,
"learning_rate": 1.7838023364240929e-06,
"loss": 0.694,
"step": 5445
},
{
"epoch": 0.22,
"learning_rate": 1.7834019271345313e-06,
"loss": 0.6484,
"step": 5450
},
{
"epoch": 0.22,
"learning_rate": 1.7830011924266221e-06,
"loss": 0.6921,
"step": 5455
},
{
"epoch": 0.22,
"learning_rate": 1.7826001324668267e-06,
"loss": 0.7135,
"step": 5460
},
{
"epoch": 0.22,
"learning_rate": 1.7821987474217424e-06,
"loss": 0.6631,
"step": 5465
},
{
"epoch": 0.22,
"learning_rate": 1.7817970374581006e-06,
"loss": 0.6572,
"step": 5470
},
{
"epoch": 0.22,
"learning_rate": 1.781395002742768e-06,
"loss": 0.7068,
"step": 5475
},
{
"epoch": 0.22,
"learning_rate": 1.780992643442747e-06,
"loss": 0.6417,
"step": 5480
},
{
"epoch": 0.22,
"learning_rate": 1.7805899597251735e-06,
"loss": 0.6847,
"step": 5485
},
{
"epoch": 0.22,
"learning_rate": 1.7801869517573191e-06,
"loss": 0.6862,
"step": 5490
},
{
"epoch": 0.22,
"learning_rate": 1.77978361970659e-06,
"loss": 0.6696,
"step": 5495
},
{
"epoch": 0.22,
"learning_rate": 1.7793799637405265e-06,
"loss": 0.737,
"step": 5500
},
{
"epoch": 0.22,
"learning_rate": 1.7789759840268037e-06,
"loss": 0.6826,
"step": 5505
},
{
"epoch": 0.22,
"learning_rate": 1.7785716807332315e-06,
"loss": 0.7288,
"step": 5510
},
{
"epoch": 0.22,
"learning_rate": 1.778167054027754e-06,
"loss": 0.6856,
"step": 5515
},
{
"epoch": 0.22,
"learning_rate": 1.7777621040784496e-06,
"loss": 0.6801,
"step": 5520
},
{
"epoch": 0.22,
"learning_rate": 1.777356831053531e-06,
"loss": 0.6737,
"step": 5525
},
{
"epoch": 0.22,
"learning_rate": 1.7769512351213448e-06,
"loss": 0.6547,
"step": 5530
},
{
"epoch": 0.22,
"learning_rate": 1.7765453164503722e-06,
"loss": 0.7255,
"step": 5535
},
{
"epoch": 0.23,
"learning_rate": 1.7761390752092284e-06,
"loss": 0.6757,
"step": 5540
},
{
"epoch": 0.23,
"learning_rate": 1.7757325115666624e-06,
"loss": 0.6752,
"step": 5545
},
{
"epoch": 0.23,
"learning_rate": 1.7753256256915572e-06,
"loss": 0.6901,
"step": 5550
},
{
"epoch": 0.23,
"learning_rate": 1.7749184177529294e-06,
"loss": 0.6747,
"step": 5555
},
{
"epoch": 0.23,
"learning_rate": 1.7745108879199302e-06,
"loss": 0.6852,
"step": 5560
},
{
"epoch": 0.23,
"learning_rate": 1.7741030363618437e-06,
"loss": 0.6804,
"step": 5565
},
{
"epoch": 0.23,
"learning_rate": 1.7736948632480874e-06,
"loss": 0.6783,
"step": 5570
},
{
"epoch": 0.23,
"learning_rate": 1.7732863687482138e-06,
"loss": 0.6799,
"step": 5575
},
{
"epoch": 0.23,
"learning_rate": 1.772877553031907e-06,
"loss": 0.7125,
"step": 5580
},
{
"epoch": 0.23,
"learning_rate": 1.7724684162689863e-06,
"loss": 0.6851,
"step": 5585
},
{
"epoch": 0.23,
"learning_rate": 1.772058958629403e-06,
"loss": 0.6906,
"step": 5590
},
{
"epoch": 0.23,
"learning_rate": 1.7716491802832425e-06,
"loss": 0.7292,
"step": 5595
},
{
"epoch": 0.23,
"learning_rate": 1.771239081400723e-06,
"loss": 0.7028,
"step": 5600
},
{
"epoch": 0.23,
"eval_loss": 0.6604963541030884,
"eval_runtime": 138.4769,
"eval_samples_per_second": 17.086,
"eval_steps_per_second": 2.852,
"step": 5600
},
{
"epoch": 0.23,
"learning_rate": 1.7708286621521964e-06,
"loss": 0.6984,
"step": 5605
},
{
"epoch": 0.23,
"learning_rate": 1.7704179227081467e-06,
"loss": 0.7075,
"step": 5610
},
{
"epoch": 0.23,
"learning_rate": 1.7700068632391917e-06,
"loss": 0.7014,
"step": 5615
},
{
"epoch": 0.23,
"learning_rate": 1.7695954839160824e-06,
"loss": 0.6986,
"step": 5620
},
{
"epoch": 0.23,
"learning_rate": 1.7691837849097015e-06,
"loss": 0.7069,
"step": 5625
},
{
"epoch": 0.23,
"learning_rate": 1.7687717663910658e-06,
"loss": 0.6893,
"step": 5630
},
{
"epoch": 0.23,
"learning_rate": 1.7683594285313235e-06,
"loss": 0.6479,
"step": 5635
},
{
"epoch": 0.23,
"learning_rate": 1.7679467715017567e-06,
"loss": 0.7326,
"step": 5640
},
{
"epoch": 0.23,
"learning_rate": 1.7675337954737795e-06,
"loss": 0.7032,
"step": 5645
},
{
"epoch": 0.23,
"learning_rate": 1.7671205006189384e-06,
"loss": 0.6587,
"step": 5650
},
{
"epoch": 0.23,
"learning_rate": 1.7667068871089126e-06,
"loss": 0.6494,
"step": 5655
},
{
"epoch": 0.23,
"learning_rate": 1.7662929551155136e-06,
"loss": 0.7001,
"step": 5660
},
{
"epoch": 0.23,
"learning_rate": 1.7658787048106846e-06,
"loss": 0.7043,
"step": 5665
},
{
"epoch": 0.23,
"learning_rate": 1.7654641363665027e-06,
"loss": 0.6735,
"step": 5670
},
{
"epoch": 0.23,
"learning_rate": 1.765049249955175e-06,
"loss": 0.706,
"step": 5675
},
{
"epoch": 0.23,
"learning_rate": 1.764634045749042e-06,
"loss": 0.7178,
"step": 5680
},
{
"epoch": 0.23,
"learning_rate": 1.7642185239205763e-06,
"loss": 0.6817,
"step": 5685
},
{
"epoch": 0.23,
"learning_rate": 1.7638026846423813e-06,
"loss": 0.739,
"step": 5690
},
{
"epoch": 0.23,
"learning_rate": 1.7633865280871935e-06,
"loss": 0.704,
"step": 5695
},
{
"epoch": 0.23,
"learning_rate": 1.762970054427881e-06,
"loss": 0.6859,
"step": 5700
},
{
"epoch": 0.23,
"learning_rate": 1.7625532638374429e-06,
"loss": 0.6645,
"step": 5705
},
{
"epoch": 0.23,
"learning_rate": 1.7621361564890104e-06,
"loss": 0.6885,
"step": 5710
},
{
"epoch": 0.23,
"learning_rate": 1.7617187325558463e-06,
"loss": 0.7211,
"step": 5715
},
{
"epoch": 0.23,
"learning_rate": 1.7613009922113449e-06,
"loss": 0.6563,
"step": 5720
},
{
"epoch": 0.23,
"learning_rate": 1.760882935629032e-06,
"loss": 0.6872,
"step": 5725
},
{
"epoch": 0.23,
"learning_rate": 1.7604645629825647e-06,
"loss": 0.7238,
"step": 5730
},
{
"epoch": 0.23,
"learning_rate": 1.7600458744457312e-06,
"loss": 0.6829,
"step": 5735
},
{
"epoch": 0.23,
"learning_rate": 1.7596268701924513e-06,
"loss": 0.6752,
"step": 5740
},
{
"epoch": 0.23,
"learning_rate": 1.7592075503967751e-06,
"loss": 0.7009,
"step": 5745
},
{
"epoch": 0.23,
"learning_rate": 1.7587879152328852e-06,
"loss": 0.6987,
"step": 5750
},
{
"epoch": 0.23,
"learning_rate": 1.758367964875094e-06,
"loss": 0.7154,
"step": 5755
},
{
"epoch": 0.23,
"learning_rate": 1.7579476994978454e-06,
"loss": 0.6551,
"step": 5760
},
{
"epoch": 0.23,
"learning_rate": 1.7575271192757138e-06,
"loss": 0.6711,
"step": 5765
},
{
"epoch": 0.23,
"learning_rate": 1.7571062243834046e-06,
"loss": 0.7064,
"step": 5770
},
{
"epoch": 0.23,
"learning_rate": 1.7566850149957536e-06,
"loss": 0.6889,
"step": 5775
},
{
"epoch": 0.23,
"learning_rate": 1.7562634912877282e-06,
"loss": 0.6998,
"step": 5780
},
{
"epoch": 0.23,
"learning_rate": 1.7558416534344252e-06,
"loss": 0.6513,
"step": 5785
},
{
"epoch": 0.24,
"learning_rate": 1.7554195016110725e-06,
"loss": 0.7019,
"step": 5790
},
{
"epoch": 0.24,
"learning_rate": 1.7549970359930285e-06,
"loss": 0.7119,
"step": 5795
},
{
"epoch": 0.24,
"learning_rate": 1.7545742567557811e-06,
"loss": 0.6858,
"step": 5800
},
{
"epoch": 0.24,
"learning_rate": 1.7541511640749499e-06,
"loss": 0.6887,
"step": 5805
},
{
"epoch": 0.24,
"learning_rate": 1.753727758126283e-06,
"loss": 0.6976,
"step": 5810
},
{
"epoch": 0.24,
"learning_rate": 1.7533040390856608e-06,
"loss": 0.6699,
"step": 5815
},
{
"epoch": 0.24,
"learning_rate": 1.7528800071290914e-06,
"loss": 0.6802,
"step": 5820
},
{
"epoch": 0.24,
"learning_rate": 1.7524556624327147e-06,
"loss": 0.6804,
"step": 5825
},
{
"epoch": 0.24,
"learning_rate": 1.7520310051727992e-06,
"loss": 0.6678,
"step": 5830
},
{
"epoch": 0.24,
"learning_rate": 1.7516060355257443e-06,
"loss": 0.6856,
"step": 5835
},
{
"epoch": 0.24,
"learning_rate": 1.7511807536680782e-06,
"loss": 0.6983,
"step": 5840
},
{
"epoch": 0.24,
"learning_rate": 1.75075515977646e-06,
"loss": 0.7212,
"step": 5845
},
{
"epoch": 0.24,
"learning_rate": 1.7503292540276772e-06,
"loss": 0.6783,
"step": 5850
},
{
"epoch": 0.24,
"learning_rate": 1.7499030365986475e-06,
"loss": 0.6932,
"step": 5855
},
{
"epoch": 0.24,
"learning_rate": 1.749476507666418e-06,
"loss": 0.7191,
"step": 5860
},
{
"epoch": 0.24,
"learning_rate": 1.7490496674081648e-06,
"loss": 0.6817,
"step": 5865
},
{
"epoch": 0.24,
"learning_rate": 1.748622516001194e-06,
"loss": 0.6568,
"step": 5870
},
{
"epoch": 0.24,
"learning_rate": 1.7481950536229402e-06,
"loss": 0.6861,
"step": 5875
},
{
"epoch": 0.24,
"learning_rate": 1.7477672804509679e-06,
"loss": 0.6995,
"step": 5880
},
{
"epoch": 0.24,
"learning_rate": 1.7473391966629698e-06,
"loss": 0.72,
"step": 5885
},
{
"epoch": 0.24,
"learning_rate": 1.7469108024367687e-06,
"loss": 0.6983,
"step": 5890
},
{
"epoch": 0.24,
"learning_rate": 1.746482097950316e-06,
"loss": 0.6961,
"step": 5895
},
{
"epoch": 0.24,
"learning_rate": 1.7460530833816915e-06,
"loss": 0.7183,
"step": 5900
},
{
"epoch": 0.24,
"learning_rate": 1.745623758909104e-06,
"loss": 0.7194,
"step": 5905
},
{
"epoch": 0.24,
"learning_rate": 1.7451941247108914e-06,
"loss": 0.6699,
"step": 5910
},
{
"epoch": 0.24,
"learning_rate": 1.74476418096552e-06,
"loss": 0.6807,
"step": 5915
},
{
"epoch": 0.24,
"learning_rate": 1.7443339278515846e-06,
"loss": 0.6835,
"step": 5920
},
{
"epoch": 0.24,
"learning_rate": 1.7439033655478084e-06,
"loss": 0.6874,
"step": 5925
},
{
"epoch": 0.24,
"learning_rate": 1.7434724942330436e-06,
"loss": 0.6832,
"step": 5930
},
{
"epoch": 0.24,
"learning_rate": 1.7430413140862703e-06,
"loss": 0.6661,
"step": 5935
},
{
"epoch": 0.24,
"learning_rate": 1.7426098252865968e-06,
"loss": 0.677,
"step": 5940
},
{
"epoch": 0.24,
"learning_rate": 1.74217802801326e-06,
"loss": 0.6587,
"step": 5945
},
{
"epoch": 0.24,
"learning_rate": 1.7417459224456246e-06,
"loss": 0.6613,
"step": 5950
},
{
"epoch": 0.24,
"learning_rate": 1.741313508763184e-06,
"loss": 0.7241,
"step": 5955
},
{
"epoch": 0.24,
"learning_rate": 1.740880787145558e-06,
"loss": 0.7194,
"step": 5960
},
{
"epoch": 0.24,
"learning_rate": 1.7404477577724964e-06,
"loss": 0.6832,
"step": 5965
},
{
"epoch": 0.24,
"learning_rate": 1.7400144208238751e-06,
"loss": 0.6761,
"step": 5970
},
{
"epoch": 0.24,
"learning_rate": 1.7395807764796993e-06,
"loss": 0.7259,
"step": 5975
},
{
"epoch": 0.24,
"learning_rate": 1.7391468249201004e-06,
"loss": 0.6704,
"step": 5980
},
{
"epoch": 0.24,
"learning_rate": 1.7387125663253386e-06,
"loss": 0.6836,
"step": 5985
},
{
"epoch": 0.24,
"learning_rate": 1.7382780008758005e-06,
"loss": 0.6555,
"step": 5990
},
{
"epoch": 0.24,
"learning_rate": 1.7378431287520016e-06,
"loss": 0.7281,
"step": 5995
},
{
"epoch": 0.24,
"learning_rate": 1.7374079501345835e-06,
"loss": 0.6644,
"step": 6000
},
{
"epoch": 0.24,
"learning_rate": 1.7369724652043156e-06,
"loss": 0.6314,
"step": 6005
},
{
"epoch": 0.24,
"learning_rate": 1.7365366741420947e-06,
"loss": 0.6719,
"step": 6010
},
{
"epoch": 0.24,
"learning_rate": 1.736100577128945e-06,
"loss": 0.6913,
"step": 6015
},
{
"epoch": 0.24,
"learning_rate": 1.7356641743460166e-06,
"loss": 0.6858,
"step": 6020
},
{
"epoch": 0.24,
"learning_rate": 1.7352274659745878e-06,
"loss": 0.6853,
"step": 6025
},
{
"epoch": 0.24,
"learning_rate": 1.7347904521960635e-06,
"loss": 0.6921,
"step": 6030
},
{
"epoch": 0.25,
"learning_rate": 1.7343531331919756e-06,
"loss": 0.6898,
"step": 6035
},
{
"epoch": 0.25,
"learning_rate": 1.7339155091439823e-06,
"loss": 0.6996,
"step": 6040
},
{
"epoch": 0.25,
"learning_rate": 1.733477580233869e-06,
"loss": 0.7071,
"step": 6045
},
{
"epoch": 0.25,
"learning_rate": 1.7330393466435474e-06,
"loss": 0.6903,
"step": 6050
},
{
"epoch": 0.25,
"learning_rate": 1.7326008085550564e-06,
"loss": 0.6929,
"step": 6055
},
{
"epoch": 0.25,
"learning_rate": 1.73216196615056e-06,
"loss": 0.7195,
"step": 6060
},
{
"epoch": 0.25,
"learning_rate": 1.7317228196123504e-06,
"loss": 0.6737,
"step": 6065
},
{
"epoch": 0.25,
"learning_rate": 1.7312833691228445e-06,
"loss": 0.7118,
"step": 6070
},
{
"epoch": 0.25,
"learning_rate": 1.7308436148645871e-06,
"loss": 0.7004,
"step": 6075
},
{
"epoch": 0.25,
"learning_rate": 1.7304035570202476e-06,
"loss": 0.666,
"step": 6080
},
{
"epoch": 0.25,
"learning_rate": 1.7299631957726223e-06,
"loss": 0.6574,
"step": 6085
},
{
"epoch": 0.25,
"learning_rate": 1.7295225313046337e-06,
"loss": 0.7057,
"step": 6090
},
{
"epoch": 0.25,
"learning_rate": 1.72908156379933e-06,
"loss": 0.6804,
"step": 6095
},
{
"epoch": 0.25,
"learning_rate": 1.7286402934398848e-06,
"loss": 0.7117,
"step": 6100
},
{
"epoch": 0.25,
"learning_rate": 1.7281987204095985e-06,
"loss": 0.7004,
"step": 6105
},
{
"epoch": 0.25,
"learning_rate": 1.7277568448918962e-06,
"loss": 0.6945,
"step": 6110
},
{
"epoch": 0.25,
"learning_rate": 1.7273146670703295e-06,
"loss": 0.6268,
"step": 6115
},
{
"epoch": 0.25,
"learning_rate": 1.7268721871285753e-06,
"loss": 0.6772,
"step": 6120
},
{
"epoch": 0.25,
"learning_rate": 1.7264294052504358e-06,
"loss": 0.7141,
"step": 6125
},
{
"epoch": 0.25,
"learning_rate": 1.7259863216198385e-06,
"loss": 0.6466,
"step": 6130
},
{
"epoch": 0.25,
"learning_rate": 1.7255429364208366e-06,
"loss": 0.6835,
"step": 6135
},
{
"epoch": 0.25,
"learning_rate": 1.7250992498376086e-06,
"loss": 0.6938,
"step": 6140
},
{
"epoch": 0.25,
"learning_rate": 1.7246552620544584e-06,
"loss": 0.7017,
"step": 6145
},
{
"epoch": 0.25,
"learning_rate": 1.7242109732558137e-06,
"loss": 0.7161,
"step": 6150
},
{
"epoch": 0.25,
"learning_rate": 1.7237663836262289e-06,
"loss": 0.6646,
"step": 6155
},
{
"epoch": 0.25,
"learning_rate": 1.7233214933503826e-06,
"loss": 0.6712,
"step": 6160
},
{
"epoch": 0.25,
"learning_rate": 1.7228763026130783e-06,
"loss": 0.6749,
"step": 6165
},
{
"epoch": 0.25,
"learning_rate": 1.7224308115992443e-06,
"loss": 0.647,
"step": 6170
},
{
"epoch": 0.25,
"learning_rate": 1.7219850204939338e-06,
"loss": 0.7009,
"step": 6175
},
{
"epoch": 0.25,
"learning_rate": 1.7215389294823243e-06,
"loss": 0.6568,
"step": 6180
},
{
"epoch": 0.25,
"learning_rate": 1.7210925387497186e-06,
"loss": 0.6501,
"step": 6185
},
{
"epoch": 0.25,
"learning_rate": 1.7206458484815429e-06,
"loss": 0.6926,
"step": 6190
},
{
"epoch": 0.25,
"learning_rate": 1.7201988588633489e-06,
"loss": 0.6973,
"step": 6195
},
{
"epoch": 0.25,
"learning_rate": 1.7197515700808124e-06,
"loss": 0.6979,
"step": 6200
},
{
"epoch": 0.25,
"learning_rate": 1.7193039823197323e-06,
"loss": 0.6634,
"step": 6205
},
{
"epoch": 0.25,
"learning_rate": 1.7188560957660338e-06,
"loss": 0.6958,
"step": 6210
},
{
"epoch": 0.25,
"learning_rate": 1.7184079106057645e-06,
"loss": 0.7073,
"step": 6215
},
{
"epoch": 0.25,
"learning_rate": 1.7179594270250965e-06,
"loss": 0.7147,
"step": 6220
},
{
"epoch": 0.25,
"learning_rate": 1.7175106452103268e-06,
"loss": 0.7319,
"step": 6225
},
{
"epoch": 0.25,
"learning_rate": 1.7170615653478742e-06,
"loss": 0.6786,
"step": 6230
},
{
"epoch": 0.25,
"learning_rate": 1.7166121876242837e-06,
"loss": 0.6643,
"step": 6235
},
{
"epoch": 0.25,
"learning_rate": 1.716162512226222e-06,
"loss": 0.7524,
"step": 6240
},
{
"epoch": 0.25,
"learning_rate": 1.715712539340481e-06,
"loss": 0.6517,
"step": 6245
},
{
"epoch": 0.25,
"learning_rate": 1.715262269153975e-06,
"loss": 0.6999,
"step": 6250
},
{
"epoch": 0.25,
"learning_rate": 1.7148117018537432e-06,
"loss": 0.6756,
"step": 6255
},
{
"epoch": 0.25,
"learning_rate": 1.7143608376269462e-06,
"loss": 0.7208,
"step": 6260
},
{
"epoch": 0.25,
"learning_rate": 1.71390967666087e-06,
"loss": 0.6793,
"step": 6265
},
{
"epoch": 0.25,
"learning_rate": 1.7134582191429223e-06,
"loss": 0.68,
"step": 6270
},
{
"epoch": 0.25,
"learning_rate": 1.7130064652606352e-06,
"loss": 0.6666,
"step": 6275
},
{
"epoch": 0.26,
"learning_rate": 1.7125544152016627e-06,
"loss": 0.6714,
"step": 6280
},
{
"epoch": 0.26,
"learning_rate": 1.7121020691537831e-06,
"loss": 0.7034,
"step": 6285
},
{
"epoch": 0.26,
"learning_rate": 1.7116494273048966e-06,
"loss": 0.6819,
"step": 6290
},
{
"epoch": 0.26,
"learning_rate": 1.7111964898430266e-06,
"loss": 0.66,
"step": 6295
},
{
"epoch": 0.26,
"learning_rate": 1.71074325695632e-06,
"loss": 0.7156,
"step": 6300
},
{
"epoch": 0.26,
"learning_rate": 1.7102897288330454e-06,
"loss": 0.6804,
"step": 6305
},
{
"epoch": 0.26,
"learning_rate": 1.7098359056615942e-06,
"loss": 0.6807,
"step": 6310
},
{
"epoch": 0.26,
"learning_rate": 1.7093817876304807e-06,
"loss": 0.6701,
"step": 6315
},
{
"epoch": 0.26,
"learning_rate": 1.7089273749283418e-06,
"loss": 0.6785,
"step": 6320
},
{
"epoch": 0.26,
"learning_rate": 1.7084726677439364e-06,
"loss": 0.6941,
"step": 6325
},
{
"epoch": 0.26,
"learning_rate": 1.7080176662661463e-06,
"loss": 0.6811,
"step": 6330
},
{
"epoch": 0.26,
"learning_rate": 1.7075623706839745e-06,
"loss": 0.6797,
"step": 6335
},
{
"epoch": 0.26,
"learning_rate": 1.7071067811865474e-06,
"loss": 0.6913,
"step": 6340
},
{
"epoch": 0.26,
"learning_rate": 1.7066508979631129e-06,
"loss": 0.6917,
"step": 6345
},
{
"epoch": 0.26,
"learning_rate": 1.7061947212030402e-06,
"loss": 0.6709,
"step": 6350
},
{
"epoch": 0.26,
"learning_rate": 1.705738251095822e-06,
"loss": 0.7023,
"step": 6355
},
{
"epoch": 0.26,
"learning_rate": 1.7052814878310718e-06,
"loss": 0.7109,
"step": 6360
},
{
"epoch": 0.26,
"learning_rate": 1.7048244315985247e-06,
"loss": 0.6795,
"step": 6365
},
{
"epoch": 0.26,
"learning_rate": 1.7043670825880384e-06,
"loss": 0.7013,
"step": 6370
},
{
"epoch": 0.26,
"learning_rate": 1.7039094409895914e-06,
"loss": 0.703,
"step": 6375
},
{
"epoch": 0.26,
"learning_rate": 1.703451506993284e-06,
"loss": 0.732,
"step": 6380
},
{
"epoch": 0.26,
"learning_rate": 1.7029932807893382e-06,
"loss": 0.6905,
"step": 6385
},
{
"epoch": 0.26,
"learning_rate": 1.702534762568097e-06,
"loss": 0.7172,
"step": 6390
},
{
"epoch": 0.26,
"learning_rate": 1.7020759525200253e-06,
"loss": 0.6674,
"step": 6395
},
{
"epoch": 0.26,
"learning_rate": 1.701616850835708e-06,
"loss": 0.6694,
"step": 6400
},
{
"epoch": 0.26,
"eval_loss": 0.6586260795593262,
"eval_runtime": 139.2072,
"eval_samples_per_second": 16.996,
"eval_steps_per_second": 2.837,
"step": 6400
},
{
"epoch": 0.26,
"learning_rate": 1.7011574577058525e-06,
"loss": 0.7282,
"step": 6405
},
{
"epoch": 0.26,
"learning_rate": 1.7006977733212867e-06,
"loss": 0.6736,
"step": 6410
},
{
"epoch": 0.26,
"learning_rate": 1.7002377978729596e-06,
"loss": 0.6672,
"step": 6415
},
{
"epoch": 0.26,
"learning_rate": 1.6997775315519408e-06,
"loss": 0.712,
"step": 6420
},
{
"epoch": 0.26,
"learning_rate": 1.6993169745494209e-06,
"loss": 0.6949,
"step": 6425
},
{
"epoch": 0.26,
"learning_rate": 1.6988561270567115e-06,
"loss": 0.6951,
"step": 6430
},
{
"epoch": 0.26,
"learning_rate": 1.698394989265244e-06,
"loss": 0.7019,
"step": 6435
},
{
"epoch": 0.26,
"learning_rate": 1.6979335613665717e-06,
"loss": 0.7143,
"step": 6440
},
{
"epoch": 0.26,
"learning_rate": 1.6974718435523678e-06,
"loss": 0.7253,
"step": 6445
},
{
"epoch": 0.26,
"learning_rate": 1.6970098360144253e-06,
"loss": 0.7103,
"step": 6450
},
{
"epoch": 0.26,
"learning_rate": 1.6965475389446586e-06,
"loss": 0.6953,
"step": 6455
},
{
"epoch": 0.26,
"learning_rate": 1.6960849525351018e-06,
"loss": 0.7247,
"step": 6460
},
{
"epoch": 0.26,
"learning_rate": 1.6956220769779088e-06,
"loss": 0.7015,
"step": 6465
},
{
"epoch": 0.26,
"learning_rate": 1.6951589124653547e-06,
"loss": 0.704,
"step": 6470
},
{
"epoch": 0.26,
"learning_rate": 1.6946954591898336e-06,
"loss": 0.7054,
"step": 6475
},
{
"epoch": 0.26,
"learning_rate": 1.6942317173438604e-06,
"loss": 0.6704,
"step": 6480
},
{
"epoch": 0.26,
"learning_rate": 1.693767687120069e-06,
"loss": 0.6564,
"step": 6485
},
{
"epoch": 0.26,
"learning_rate": 1.6933033687112134e-06,
"loss": 0.7054,
"step": 6490
},
{
"epoch": 0.26,
"learning_rate": 1.6928387623101681e-06,
"loss": 0.676,
"step": 6495
},
{
"epoch": 0.26,
"learning_rate": 1.692373868109926e-06,
"loss": 0.7034,
"step": 6500
},
{
"epoch": 0.26,
"learning_rate": 1.6919086863036003e-06,
"loss": 0.6781,
"step": 6505
},
{
"epoch": 0.26,
"learning_rate": 1.6914432170844233e-06,
"loss": 0.7057,
"step": 6510
},
{
"epoch": 0.26,
"learning_rate": 1.690977460645747e-06,
"loss": 0.6929,
"step": 6515
},
{
"epoch": 0.26,
"learning_rate": 1.6905114171810429e-06,
"loss": 0.6816,
"step": 6520
},
{
"epoch": 0.27,
"learning_rate": 1.6900450868839009e-06,
"loss": 0.6845,
"step": 6525
},
{
"epoch": 0.27,
"learning_rate": 1.6895784699480306e-06,
"loss": 0.6665,
"step": 6530
},
{
"epoch": 0.27,
"learning_rate": 1.6891115665672608e-06,
"loss": 0.6597,
"step": 6535
},
{
"epoch": 0.27,
"learning_rate": 1.6886443769355393e-06,
"loss": 0.6623,
"step": 6540
},
{
"epoch": 0.27,
"learning_rate": 1.688176901246932e-06,
"loss": 0.6535,
"step": 6545
},
{
"epoch": 0.27,
"learning_rate": 1.6877091396956247e-06,
"loss": 0.6902,
"step": 6550
},
{
"epoch": 0.27,
"learning_rate": 1.6872410924759215e-06,
"loss": 0.6447,
"step": 6555
},
{
"epoch": 0.27,
"learning_rate": 1.686772759782245e-06,
"loss": 0.7013,
"step": 6560
},
{
"epoch": 0.27,
"learning_rate": 1.6863041418091366e-06,
"loss": 0.7158,
"step": 6565
},
{
"epoch": 0.27,
"learning_rate": 1.685835238751256e-06,
"loss": 0.7109,
"step": 6570
},
{
"epoch": 0.27,
"learning_rate": 1.6853660508033816e-06,
"loss": 0.7231,
"step": 6575
},
{
"epoch": 0.27,
"learning_rate": 1.6848965781604099e-06,
"loss": 0.7305,
"step": 6580
},
{
"epoch": 0.27,
"learning_rate": 1.6844268210173556e-06,
"loss": 0.7013,
"step": 6585
},
{
"epoch": 0.27,
"learning_rate": 1.6839567795693524e-06,
"loss": 0.6849,
"step": 6590
},
{
"epoch": 0.27,
"learning_rate": 1.6834864540116506e-06,
"loss": 0.7316,
"step": 6595
},
{
"epoch": 0.27,
"learning_rate": 1.6830158445396196e-06,
"loss": 0.7193,
"step": 6600
},
{
"epoch": 0.27,
"learning_rate": 1.6825449513487466e-06,
"loss": 0.7277,
"step": 6605
},
{
"epoch": 0.27,
"learning_rate": 1.6820737746346368e-06,
"loss": 0.7037,
"step": 6610
},
{
"epoch": 0.27,
"learning_rate": 1.6816023145930123e-06,
"loss": 0.7078,
"step": 6615
},
{
"epoch": 0.27,
"learning_rate": 1.681130571419714e-06,
"loss": 0.6651,
"step": 6620
},
{
"epoch": 0.27,
"learning_rate": 1.6806585453106997e-06,
"loss": 0.6851,
"step": 6625
},
{
"epoch": 0.27,
"learning_rate": 1.680186236462045e-06,
"loss": 0.6877,
"step": 6630
},
{
"epoch": 0.27,
"learning_rate": 1.6797136450699427e-06,
"loss": 0.6976,
"step": 6635
},
{
"epoch": 0.27,
"learning_rate": 1.6792407713307036e-06,
"loss": 0.7244,
"step": 6640
},
{
"epoch": 0.27,
"learning_rate": 1.678767615440755e-06,
"loss": 0.6734,
"step": 6645
},
{
"epoch": 0.27,
"learning_rate": 1.6782941775966416e-06,
"loss": 0.6733,
"step": 6650
},
{
"epoch": 0.27,
"learning_rate": 1.6778204579950255e-06,
"loss": 0.6836,
"step": 6655
},
{
"epoch": 0.27,
"learning_rate": 1.6773464568326859e-06,
"loss": 0.7101,
"step": 6660
},
{
"epoch": 0.27,
"learning_rate": 1.6768721743065186e-06,
"loss": 0.695,
"step": 6665
},
{
"epoch": 0.27,
"learning_rate": 1.6763976106135366e-06,
"loss": 0.6774,
"step": 6670
},
{
"epoch": 0.27,
"learning_rate": 1.6759227659508692e-06,
"loss": 0.674,
"step": 6675
},
{
"epoch": 0.27,
"learning_rate": 1.6754476405157631e-06,
"loss": 0.6887,
"step": 6680
},
{
"epoch": 0.27,
"learning_rate": 1.674972234505581e-06,
"loss": 0.689,
"step": 6685
},
{
"epoch": 0.27,
"learning_rate": 1.6744965481178026e-06,
"loss": 0.6877,
"step": 6690
},
{
"epoch": 0.27,
"learning_rate": 1.6740205815500236e-06,
"loss": 0.6872,
"step": 6695
},
{
"epoch": 0.27,
"learning_rate": 1.673544334999957e-06,
"loss": 0.6999,
"step": 6700
},
{
"epoch": 0.27,
"learning_rate": 1.6730678086654306e-06,
"loss": 0.6983,
"step": 6705
},
{
"epoch": 0.27,
"learning_rate": 1.6725910027443902e-06,
"loss": 0.6488,
"step": 6710
},
{
"epoch": 0.27,
"learning_rate": 1.6721139174348964e-06,
"loss": 0.6741,
"step": 6715
},
{
"epoch": 0.27,
"learning_rate": 1.671636552935126e-06,
"loss": 0.6809,
"step": 6720
},
{
"epoch": 0.27,
"learning_rate": 1.6711589094433725e-06,
"loss": 0.7326,
"step": 6725
},
{
"epoch": 0.27,
"learning_rate": 1.6706809871580446e-06,
"loss": 0.6757,
"step": 6730
},
{
"epoch": 0.27,
"learning_rate": 1.670202786277667e-06,
"loss": 0.6515,
"step": 6735
},
{
"epoch": 0.27,
"learning_rate": 1.6697243070008805e-06,
"loss": 0.6747,
"step": 6740
},
{
"epoch": 0.27,
"learning_rate": 1.6692455495264413e-06,
"loss": 0.6816,
"step": 6745
},
{
"epoch": 0.27,
"learning_rate": 1.6687665140532209e-06,
"loss": 0.6918,
"step": 6750
},
{
"epoch": 0.27,
"learning_rate": 1.6682872007802062e-06,
"loss": 0.7191,
"step": 6755
},
{
"epoch": 0.27,
"learning_rate": 1.6678076099064999e-06,
"loss": 0.6649,
"step": 6760
},
{
"epoch": 0.27,
"learning_rate": 1.66732774163132e-06,
"loss": 0.6889,
"step": 6765
},
{
"epoch": 0.27,
"learning_rate": 1.666847596154e-06,
"loss": 0.6917,
"step": 6770
},
{
"epoch": 0.28,
"learning_rate": 1.6663671736739874e-06,
"loss": 0.6717,
"step": 6775
},
{
"epoch": 0.28,
"learning_rate": 1.665886474390846e-06,
"loss": 0.7177,
"step": 6780
},
{
"epoch": 0.28,
"learning_rate": 1.6654054985042538e-06,
"loss": 0.6676,
"step": 6785
},
{
"epoch": 0.28,
"learning_rate": 1.6649242462140044e-06,
"loss": 0.708,
"step": 6790
},
{
"epoch": 0.28,
"learning_rate": 1.6644427177200053e-06,
"loss": 0.6958,
"step": 6795
},
{
"epoch": 0.28,
"learning_rate": 1.66396091322228e-06,
"loss": 0.6862,
"step": 6800
},
{
"epoch": 0.28,
"learning_rate": 1.663478832920965e-06,
"loss": 0.6462,
"step": 6805
},
{
"epoch": 0.28,
"learning_rate": 1.6629964770163128e-06,
"loss": 0.6668,
"step": 6810
},
{
"epoch": 0.28,
"learning_rate": 1.6625138457086897e-06,
"loss": 0.6926,
"step": 6815
},
{
"epoch": 0.28,
"learning_rate": 1.6620309391985767e-06,
"loss": 0.6942,
"step": 6820
},
{
"epoch": 0.28,
"learning_rate": 1.661547757686569e-06,
"loss": 0.6988,
"step": 6825
},
{
"epoch": 0.28,
"learning_rate": 1.6610643013733756e-06,
"loss": 0.6548,
"step": 6830
},
{
"epoch": 0.28,
"learning_rate": 1.6605805704598206e-06,
"loss": 0.6827,
"step": 6835
},
{
"epoch": 0.28,
"learning_rate": 1.660096565146841e-06,
"loss": 0.6939,
"step": 6840
},
{
"epoch": 0.28,
"learning_rate": 1.6596122856354885e-06,
"loss": 0.728,
"step": 6845
},
{
"epoch": 0.28,
"learning_rate": 1.6591277321269295e-06,
"loss": 0.6762,
"step": 6850
},
{
"epoch": 0.28,
"learning_rate": 1.6586429048224422e-06,
"loss": 0.6697,
"step": 6855
},
{
"epoch": 0.28,
"learning_rate": 1.6581578039234203e-06,
"loss": 0.7086,
"step": 6860
},
{
"epoch": 0.28,
"learning_rate": 1.6576724296313697e-06,
"loss": 0.7378,
"step": 6865
},
{
"epoch": 0.28,
"learning_rate": 1.657186782147912e-06,
"loss": 0.6861,
"step": 6870
},
{
"epoch": 0.28,
"learning_rate": 1.6567008616747797e-06,
"loss": 0.6749,
"step": 6875
},
{
"epoch": 0.28,
"learning_rate": 1.6562146684138205e-06,
"loss": 0.67,
"step": 6880
},
{
"epoch": 0.28,
"learning_rate": 1.655728202566995e-06,
"loss": 0.6561,
"step": 6885
},
{
"epoch": 0.28,
"learning_rate": 1.6552414643363766e-06,
"loss": 0.7061,
"step": 6890
},
{
"epoch": 0.28,
"learning_rate": 1.6547544539241516e-06,
"loss": 0.6779,
"step": 6895
},
{
"epoch": 0.28,
"learning_rate": 1.6542671715326209e-06,
"loss": 0.6927,
"step": 6900
},
{
"epoch": 0.28,
"learning_rate": 1.653779617364197e-06,
"loss": 0.6633,
"step": 6905
},
{
"epoch": 0.28,
"learning_rate": 1.6532917916214055e-06,
"loss": 0.6755,
"step": 6910
},
{
"epoch": 0.28,
"learning_rate": 1.6528036945068852e-06,
"loss": 0.6774,
"step": 6915
},
{
"epoch": 0.28,
"learning_rate": 1.652315326223387e-06,
"loss": 0.7033,
"step": 6920
},
{
"epoch": 0.28,
"learning_rate": 1.6518266869737754e-06,
"loss": 0.6823,
"step": 6925
},
{
"epoch": 0.28,
"learning_rate": 1.6513377769610264e-06,
"loss": 0.6952,
"step": 6930
},
{
"epoch": 0.28,
"learning_rate": 1.6508485963882293e-06,
"loss": 0.676,
"step": 6935
},
{
"epoch": 0.28,
"learning_rate": 1.650359145458585e-06,
"loss": 0.7026,
"step": 6940
},
{
"epoch": 0.28,
"learning_rate": 1.6498694243754075e-06,
"loss": 0.6872,
"step": 6945
},
{
"epoch": 0.28,
"learning_rate": 1.6493794333421228e-06,
"loss": 0.6846,
"step": 6950
},
{
"epoch": 0.28,
"learning_rate": 1.6488891725622688e-06,
"loss": 0.6825,
"step": 6955
},
{
"epoch": 0.28,
"learning_rate": 1.6483986422394955e-06,
"loss": 0.723,
"step": 6960
},
{
"epoch": 0.28,
"learning_rate": 1.6479078425775653e-06,
"loss": 0.7463,
"step": 6965
},
{
"epoch": 0.28,
"learning_rate": 1.6474167737803514e-06,
"loss": 0.7006,
"step": 6970
},
{
"epoch": 0.28,
"learning_rate": 1.64692543605184e-06,
"loss": 0.6745,
"step": 6975
},
{
"epoch": 0.28,
"learning_rate": 1.6464338295961283e-06,
"loss": 0.6935,
"step": 6980
},
{
"epoch": 0.28,
"learning_rate": 1.6459419546174253e-06,
"loss": 0.7056,
"step": 6985
},
{
"epoch": 0.28,
"learning_rate": 1.6454498113200521e-06,
"loss": 0.6763,
"step": 6990
},
{
"epoch": 0.28,
"learning_rate": 1.6449573999084404e-06,
"loss": 0.68,
"step": 6995
},
{
"epoch": 0.28,
"learning_rate": 1.6444647205871332e-06,
"loss": 0.6595,
"step": 7000
},
{
"epoch": 0.28,
"learning_rate": 1.6439717735607856e-06,
"loss": 0.6996,
"step": 7005
},
{
"epoch": 0.28,
"learning_rate": 1.643478559034164e-06,
"loss": 0.7008,
"step": 7010
},
{
"epoch": 0.28,
"learning_rate": 1.6429850772121446e-06,
"loss": 0.6907,
"step": 7015
},
{
"epoch": 0.29,
"learning_rate": 1.6424913282997159e-06,
"loss": 0.6627,
"step": 7020
},
{
"epoch": 0.29,
"learning_rate": 1.641997312501977e-06,
"loss": 0.6939,
"step": 7025
},
{
"epoch": 0.29,
"learning_rate": 1.6415030300241371e-06,
"loss": 0.7002,
"step": 7030
},
{
"epoch": 0.29,
"learning_rate": 1.6410084810715177e-06,
"loss": 0.7098,
"step": 7035
},
{
"epoch": 0.29,
"learning_rate": 1.6405136658495496e-06,
"loss": 0.7114,
"step": 7040
},
{
"epoch": 0.29,
"learning_rate": 1.640018584563775e-06,
"loss": 0.6951,
"step": 7045
},
{
"epoch": 0.29,
"learning_rate": 1.639523237419846e-06,
"loss": 0.7112,
"step": 7050
},
{
"epoch": 0.29,
"learning_rate": 1.6390276246235257e-06,
"loss": 0.6827,
"step": 7055
},
{
"epoch": 0.29,
"learning_rate": 1.6385317463806878e-06,
"loss": 0.7133,
"step": 7060
},
{
"epoch": 0.29,
"learning_rate": 1.6380356028973152e-06,
"loss": 0.6766,
"step": 7065
},
{
"epoch": 0.29,
"learning_rate": 1.6375391943795015e-06,
"loss": 0.6767,
"step": 7070
},
{
"epoch": 0.29,
"learning_rate": 1.6370425210334514e-06,
"loss": 0.6731,
"step": 7075
},
{
"epoch": 0.29,
"learning_rate": 1.6365455830654775e-06,
"loss": 0.6974,
"step": 7080
},
{
"epoch": 0.29,
"learning_rate": 1.6360483806820043e-06,
"loss": 0.6972,
"step": 7085
},
{
"epoch": 0.29,
"learning_rate": 1.635550914089565e-06,
"loss": 0.6974,
"step": 7090
},
{
"epoch": 0.29,
"learning_rate": 1.635053183494803e-06,
"loss": 0.6649,
"step": 7095
},
{
"epoch": 0.29,
"learning_rate": 1.6345551891044713e-06,
"loss": 0.6754,
"step": 7100
},
{
"epoch": 0.29,
"learning_rate": 1.6340569311254323e-06,
"loss": 0.6877,
"step": 7105
},
{
"epoch": 0.29,
"learning_rate": 1.6335584097646585e-06,
"loss": 0.6841,
"step": 7110
},
{
"epoch": 0.29,
"learning_rate": 1.6330596252292309e-06,
"loss": 0.6675,
"step": 7115
},
{
"epoch": 0.29,
"learning_rate": 1.63256057772634e-06,
"loss": 0.6743,
"step": 7120
},
{
"epoch": 0.29,
"learning_rate": 1.6320612674632864e-06,
"loss": 0.7045,
"step": 7125
},
{
"epoch": 0.29,
"learning_rate": 1.631561694647479e-06,
"loss": 0.6891,
"step": 7130
},
{
"epoch": 0.29,
"learning_rate": 1.6310618594864355e-06,
"loss": 0.6722,
"step": 7135
},
{
"epoch": 0.29,
"learning_rate": 1.6305617621877841e-06,
"loss": 0.7012,
"step": 7140
},
{
"epoch": 0.29,
"learning_rate": 1.6300614029592602e-06,
"loss": 0.665,
"step": 7145
},
{
"epoch": 0.29,
"learning_rate": 1.6295607820087084e-06,
"loss": 0.6967,
"step": 7150
},
{
"epoch": 0.29,
"learning_rate": 1.6290598995440835e-06,
"loss": 0.7089,
"step": 7155
},
{
"epoch": 0.29,
"learning_rate": 1.628558755773446e-06,
"loss": 0.7176,
"step": 7160
},
{
"epoch": 0.29,
"learning_rate": 1.6280573509049679e-06,
"loss": 0.6847,
"step": 7165
},
{
"epoch": 0.29,
"learning_rate": 1.6275556851469284e-06,
"loss": 0.6968,
"step": 7170
},
{
"epoch": 0.29,
"learning_rate": 1.6270537587077145e-06,
"loss": 0.6586,
"step": 7175
},
{
"epoch": 0.29,
"learning_rate": 1.6265515717958222e-06,
"loss": 0.7039,
"step": 7180
},
{
"epoch": 0.29,
"learning_rate": 1.6260491246198563e-06,
"loss": 0.725,
"step": 7185
},
{
"epoch": 0.29,
"learning_rate": 1.6255464173885275e-06,
"loss": 0.7166,
"step": 7190
},
{
"epoch": 0.29,
"learning_rate": 1.6250434503106578e-06,
"loss": 0.67,
"step": 7195
},
{
"epoch": 0.29,
"learning_rate": 1.6245402235951742e-06,
"loss": 0.6697,
"step": 7200
},
{
"epoch": 0.29,
"eval_loss": 0.6546275019645691,
"eval_runtime": 138.8622,
"eval_samples_per_second": 17.038,
"eval_steps_per_second": 2.845,
"step": 7200
},
{
"epoch": 0.29,
"learning_rate": 1.624036737451113e-06,
"loss": 0.6907,
"step": 7205
},
{
"epoch": 0.29,
"learning_rate": 1.623532992087618e-06,
"loss": 0.7066,
"step": 7210
},
{
"epoch": 0.29,
"learning_rate": 1.6230289877139403e-06,
"loss": 0.7354,
"step": 7215
},
{
"epoch": 0.29,
"learning_rate": 1.6225247245394393e-06,
"loss": 0.672,
"step": 7220
},
{
"epoch": 0.29,
"learning_rate": 1.622020202773582e-06,
"loss": 0.676,
"step": 7225
},
{
"epoch": 0.29,
"learning_rate": 1.6215154226259414e-06,
"loss": 0.6798,
"step": 7230
},
{
"epoch": 0.29,
"learning_rate": 1.621010384306199e-06,
"loss": 0.689,
"step": 7235
},
{
"epoch": 0.29,
"learning_rate": 1.620505088024144e-06,
"loss": 0.7003,
"step": 7240
},
{
"epoch": 0.29,
"learning_rate": 1.619999533989671e-06,
"loss": 0.6669,
"step": 7245
},
{
"epoch": 0.29,
"learning_rate": 1.6194937224127837e-06,
"loss": 0.6849,
"step": 7250
},
{
"epoch": 0.29,
"learning_rate": 1.6189876535035919e-06,
"loss": 0.6997,
"step": 7255
},
{
"epoch": 0.29,
"learning_rate": 1.6184813274723113e-06,
"loss": 0.7292,
"step": 7260
},
{
"epoch": 0.3,
"learning_rate": 1.6179747445292659e-06,
"loss": 0.6555,
"step": 7265
},
{
"epoch": 0.3,
"learning_rate": 1.6174679048848856e-06,
"loss": 0.672,
"step": 7270
},
{
"epoch": 0.3,
"learning_rate": 1.6169608087497077e-06,
"loss": 0.6803,
"step": 7275
},
{
"epoch": 0.3,
"learning_rate": 1.6164534563343752e-06,
"loss": 0.6701,
"step": 7280
},
{
"epoch": 0.3,
"learning_rate": 1.615945847849638e-06,
"loss": 0.6493,
"step": 7285
},
{
"epoch": 0.3,
"learning_rate": 1.615437983506352e-06,
"loss": 0.7276,
"step": 7290
},
{
"epoch": 0.3,
"learning_rate": 1.6149298635154795e-06,
"loss": 0.6832,
"step": 7295
},
{
"epoch": 0.3,
"learning_rate": 1.6144214880880895e-06,
"loss": 0.6981,
"step": 7300
},
{
"epoch": 0.3,
"learning_rate": 1.6139128574353568e-06,
"loss": 0.6715,
"step": 7305
},
{
"epoch": 0.3,
"learning_rate": 1.613403971768562e-06,
"loss": 0.6732,
"step": 7310
},
{
"epoch": 0.3,
"learning_rate": 1.6128948312990916e-06,
"loss": 0.6906,
"step": 7315
},
{
"epoch": 0.3,
"learning_rate": 1.6123854362384384e-06,
"loss": 0.682,
"step": 7320
},
{
"epoch": 0.3,
"learning_rate": 1.6118757867982002e-06,
"loss": 0.701,
"step": 7325
},
{
"epoch": 0.3,
"learning_rate": 1.6113658831900816e-06,
"loss": 0.6969,
"step": 7330
},
{
"epoch": 0.3,
"learning_rate": 1.6108557256258916e-06,
"loss": 0.6774,
"step": 7335
},
{
"epoch": 0.3,
"learning_rate": 1.6103453143175458e-06,
"loss": 0.6849,
"step": 7340
},
{
"epoch": 0.3,
"learning_rate": 1.6098346494770642e-06,
"loss": 0.6714,
"step": 7345
},
{
"epoch": 0.3,
"learning_rate": 1.6093237313165722e-06,
"loss": 0.6781,
"step": 7350
},
{
"epoch": 0.3,
"learning_rate": 1.6088125600483014e-06,
"loss": 0.657,
"step": 7355
},
{
"epoch": 0.3,
"learning_rate": 1.6083011358845878e-06,
"loss": 0.6867,
"step": 7360
},
{
"epoch": 0.3,
"learning_rate": 1.6077894590378722e-06,
"loss": 0.6977,
"step": 7365
},
{
"epoch": 0.3,
"learning_rate": 1.607277529720701e-06,
"loss": 0.6775,
"step": 7370
},
{
"epoch": 0.3,
"learning_rate": 1.6067653481457251e-06,
"loss": 0.6962,
"step": 7375
},
{
"epoch": 0.3,
"learning_rate": 1.6062529145257e-06,
"loss": 0.6801,
"step": 7380
},
{
"epoch": 0.3,
"learning_rate": 1.6057402290734867e-06,
"loss": 0.6917,
"step": 7385
},
{
"epoch": 0.3,
"learning_rate": 1.6052272920020502e-06,
"loss": 0.6858,
"step": 7390
},
{
"epoch": 0.3,
"learning_rate": 1.6047141035244596e-06,
"loss": 0.689,
"step": 7395
},
{
"epoch": 0.3,
"learning_rate": 1.6042006638538893e-06,
"loss": 0.684,
"step": 7400
},
{
"epoch": 0.3,
"learning_rate": 1.6036869732036175e-06,
"loss": 0.6685,
"step": 7405
},
{
"epoch": 0.3,
"learning_rate": 1.603173031787027e-06,
"loss": 0.7001,
"step": 7410
},
{
"epoch": 0.3,
"learning_rate": 1.602658839817605e-06,
"loss": 0.6947,
"step": 7415
},
{
"epoch": 0.3,
"learning_rate": 1.6021443975089415e-06,
"loss": 0.6384,
"step": 7420
},
{
"epoch": 0.3,
"learning_rate": 1.601629705074732e-06,
"loss": 0.6828,
"step": 7425
},
{
"epoch": 0.3,
"learning_rate": 1.6011147627287746e-06,
"loss": 0.6856,
"step": 7430
},
{
"epoch": 0.3,
"learning_rate": 1.6005995706849726e-06,
"loss": 0.6655,
"step": 7435
},
{
"epoch": 0.3,
"learning_rate": 1.6000841291573322e-06,
"loss": 0.7021,
"step": 7440
},
{
"epoch": 0.3,
"learning_rate": 1.599568438359963e-06,
"loss": 0.6889,
"step": 7445
},
{
"epoch": 0.3,
"learning_rate": 1.5990524985070785e-06,
"loss": 0.6917,
"step": 7450
},
{
"epoch": 0.3,
"learning_rate": 1.598536309812996e-06,
"loss": 0.7008,
"step": 7455
},
{
"epoch": 0.3,
"learning_rate": 1.598019872492135e-06,
"loss": 0.6614,
"step": 7460
},
{
"epoch": 0.3,
"learning_rate": 1.59750318675902e-06,
"loss": 0.6887,
"step": 7465
},
{
"epoch": 0.3,
"learning_rate": 1.5969862528282771e-06,
"loss": 0.6733,
"step": 7470
},
{
"epoch": 0.3,
"learning_rate": 1.5964690709146367e-06,
"loss": 0.6935,
"step": 7475
},
{
"epoch": 0.3,
"learning_rate": 1.5959516412329314e-06,
"loss": 0.6731,
"step": 7480
},
{
"epoch": 0.3,
"learning_rate": 1.5954339639980967e-06,
"loss": 0.6846,
"step": 7485
},
{
"epoch": 0.3,
"learning_rate": 1.5949160394251718e-06,
"loss": 0.7281,
"step": 7490
},
{
"epoch": 0.3,
"learning_rate": 1.5943978677292976e-06,
"loss": 0.7238,
"step": 7495
},
{
"epoch": 0.3,
"learning_rate": 1.593879449125718e-06,
"loss": 0.6682,
"step": 7500
},
{
"epoch": 0.3,
"learning_rate": 1.5933607838297804e-06,
"loss": 0.6864,
"step": 7505
},
{
"epoch": 0.31,
"learning_rate": 1.5928418720569332e-06,
"loss": 0.6775,
"step": 7510
},
{
"epoch": 0.31,
"learning_rate": 1.5923227140227278e-06,
"loss": 0.7329,
"step": 7515
},
{
"epoch": 0.31,
"learning_rate": 1.5918033099428182e-06,
"loss": 0.6964,
"step": 7520
},
{
"epoch": 0.31,
"learning_rate": 1.59128366003296e-06,
"loss": 0.6862,
"step": 7525
},
{
"epoch": 0.31,
"learning_rate": 1.5907637645090117e-06,
"loss": 0.6552,
"step": 7530
},
{
"epoch": 0.31,
"learning_rate": 1.5902436235869333e-06,
"loss": 0.6738,
"step": 7535
},
{
"epoch": 0.31,
"learning_rate": 1.5897232374827862e-06,
"loss": 0.7062,
"step": 7540
},
{
"epoch": 0.31,
"learning_rate": 1.589202606412735e-06,
"loss": 0.6975,
"step": 7545
},
{
"epoch": 0.31,
"learning_rate": 1.5886817305930452e-06,
"loss": 0.6795,
"step": 7550
},
{
"epoch": 0.31,
"learning_rate": 1.5881606102400836e-06,
"loss": 0.6633,
"step": 7555
},
{
"epoch": 0.31,
"learning_rate": 1.5876392455703198e-06,
"loss": 0.6959,
"step": 7560
},
{
"epoch": 0.31,
"learning_rate": 1.5871176368003231e-06,
"loss": 0.6719,
"step": 7565
},
{
"epoch": 0.31,
"learning_rate": 1.5865957841467666e-06,
"loss": 0.7142,
"step": 7570
},
{
"epoch": 0.31,
"learning_rate": 1.5860736878264222e-06,
"loss": 0.6729,
"step": 7575
},
{
"epoch": 0.31,
"learning_rate": 1.5855513480561649e-06,
"loss": 0.6777,
"step": 7580
},
{
"epoch": 0.31,
"learning_rate": 1.5850287650529698e-06,
"loss": 0.6729,
"step": 7585
},
{
"epoch": 0.31,
"learning_rate": 1.5845059390339134e-06,
"loss": 0.7104,
"step": 7590
},
{
"epoch": 0.31,
"learning_rate": 1.5839828702161727e-06,
"loss": 0.6844,
"step": 7595
},
{
"epoch": 0.31,
"learning_rate": 1.5834595588170266e-06,
"loss": 0.6837,
"step": 7600
},
{
"epoch": 0.31,
"learning_rate": 1.5829360050538537e-06,
"loss": 0.7076,
"step": 7605
},
{
"epoch": 0.31,
"learning_rate": 1.5824122091441337e-06,
"loss": 0.6906,
"step": 7610
},
{
"epoch": 0.31,
"learning_rate": 1.5818881713054469e-06,
"loss": 0.7267,
"step": 7615
},
{
"epoch": 0.31,
"learning_rate": 1.5813638917554742e-06,
"loss": 0.7293,
"step": 7620
},
{
"epoch": 0.31,
"learning_rate": 1.5808393707119967e-06,
"loss": 0.693,
"step": 7625
},
{
"epoch": 0.31,
"learning_rate": 1.5803146083928956e-06,
"loss": 0.7122,
"step": 7630
},
{
"epoch": 0.31,
"learning_rate": 1.579789605016153e-06,
"loss": 0.6895,
"step": 7635
},
{
"epoch": 0.31,
"learning_rate": 1.5792643607998506e-06,
"loss": 0.6916,
"step": 7640
},
{
"epoch": 0.31,
"learning_rate": 1.57873887596217e-06,
"loss": 0.6745,
"step": 7645
},
{
"epoch": 0.31,
"learning_rate": 1.5782131507213934e-06,
"loss": 0.7242,
"step": 7650
},
{
"epoch": 0.31,
"learning_rate": 1.5776871852959026e-06,
"loss": 0.714,
"step": 7655
},
{
"epoch": 0.31,
"learning_rate": 1.5771609799041788e-06,
"loss": 0.7086,
"step": 7660
},
{
"epoch": 0.31,
"learning_rate": 1.576634534764803e-06,
"loss": 0.738,
"step": 7665
},
{
"epoch": 0.31,
"learning_rate": 1.5761078500964562e-06,
"loss": 0.669,
"step": 7670
},
{
"epoch": 0.31,
"learning_rate": 1.5755809261179185e-06,
"loss": 0.6587,
"step": 7675
},
{
"epoch": 0.31,
"learning_rate": 1.5750537630480696e-06,
"loss": 0.676,
"step": 7680
},
{
"epoch": 0.31,
"learning_rate": 1.5745263611058886e-06,
"loss": 0.6838,
"step": 7685
},
{
"epoch": 0.31,
"learning_rate": 1.5739987205104535e-06,
"loss": 0.6182,
"step": 7690
},
{
"epoch": 0.31,
"learning_rate": 1.5734708414809415e-06,
"loss": 0.6497,
"step": 7695
},
{
"epoch": 0.31,
"learning_rate": 1.572942724236629e-06,
"loss": 0.6681,
"step": 7700
},
{
"epoch": 0.31,
"learning_rate": 1.5724143689968915e-06,
"loss": 0.7043,
"step": 7705
},
{
"epoch": 0.31,
"learning_rate": 1.5718857759812033e-06,
"loss": 0.6924,
"step": 7710
},
{
"epoch": 0.31,
"learning_rate": 1.571356945409137e-06,
"loss": 0.6852,
"step": 7715
},
{
"epoch": 0.31,
"learning_rate": 1.570827877500364e-06,
"loss": 0.7008,
"step": 7720
},
{
"epoch": 0.31,
"learning_rate": 1.5702985724746552e-06,
"loss": 0.7029,
"step": 7725
},
{
"epoch": 0.31,
"learning_rate": 1.5697690305518787e-06,
"loss": 0.6684,
"step": 7730
},
{
"epoch": 0.31,
"learning_rate": 1.569239251952002e-06,
"loss": 0.7089,
"step": 7735
},
{
"epoch": 0.31,
"learning_rate": 1.5687092368950908e-06,
"loss": 0.6756,
"step": 7740
},
{
"epoch": 0.31,
"learning_rate": 1.5681789856013076e-06,
"loss": 0.6874,
"step": 7745
},
{
"epoch": 0.31,
"learning_rate": 1.5676484982909154e-06,
"loss": 0.7055,
"step": 7750
},
{
"epoch": 0.31,
"learning_rate": 1.5671177751842733e-06,
"loss": 0.7159,
"step": 7755
},
{
"epoch": 0.32,
"learning_rate": 1.5665868165018395e-06,
"loss": 0.6968,
"step": 7760
},
{
"epoch": 0.32,
"learning_rate": 1.566055622464169e-06,
"loss": 0.6533,
"step": 7765
},
{
"epoch": 0.32,
"learning_rate": 1.565524193291916e-06,
"loss": 0.6589,
"step": 7770
},
{
"epoch": 0.32,
"learning_rate": 1.564992529205831e-06,
"loss": 0.7165,
"step": 7775
},
{
"epoch": 0.32,
"learning_rate": 1.5644606304267627e-06,
"loss": 0.6588,
"step": 7780
},
{
"epoch": 0.32,
"learning_rate": 1.5639284971756574e-06,
"loss": 0.7004,
"step": 7785
},
{
"epoch": 0.32,
"learning_rate": 1.5633961296735585e-06,
"loss": 0.7019,
"step": 7790
},
{
"epoch": 0.32,
"learning_rate": 1.562863528141607e-06,
"loss": 0.6711,
"step": 7795
},
{
"epoch": 0.32,
"learning_rate": 1.5623306928010408e-06,
"loss": 0.6558,
"step": 7800
},
{
"epoch": 0.32,
"learning_rate": 1.561797623873195e-06,
"loss": 0.7162,
"step": 7805
},
{
"epoch": 0.32,
"learning_rate": 1.5612643215795017e-06,
"loss": 0.6986,
"step": 7810
},
{
"epoch": 0.32,
"learning_rate": 1.5607307861414905e-06,
"loss": 0.717,
"step": 7815
},
{
"epoch": 0.32,
"learning_rate": 1.560197017780787e-06,
"loss": 0.7249,
"step": 7820
},
{
"epoch": 0.32,
"learning_rate": 1.5596630167191138e-06,
"loss": 0.6965,
"step": 7825
},
{
"epoch": 0.32,
"learning_rate": 1.5591287831782908e-06,
"loss": 0.679,
"step": 7830
},
{
"epoch": 0.32,
"learning_rate": 1.5585943173802333e-06,
"loss": 0.6812,
"step": 7835
},
{
"epoch": 0.32,
"learning_rate": 1.5580596195469547e-06,
"loss": 0.6957,
"step": 7840
},
{
"epoch": 0.32,
"learning_rate": 1.5575246899005629e-06,
"loss": 0.6792,
"step": 7845
},
{
"epoch": 0.32,
"learning_rate": 1.5569895286632634e-06,
"loss": 0.6675,
"step": 7850
},
{
"epoch": 0.32,
"learning_rate": 1.5564541360573578e-06,
"loss": 0.6931,
"step": 7855
},
{
"epoch": 0.32,
"learning_rate": 1.5559185123052427e-06,
"loss": 0.7044,
"step": 7860
},
{
"epoch": 0.32,
"learning_rate": 1.5553826576294127e-06,
"loss": 0.6978,
"step": 7865
},
{
"epoch": 0.32,
"learning_rate": 1.5548465722524561e-06,
"loss": 0.693,
"step": 7870
},
{
"epoch": 0.32,
"learning_rate": 1.554310256397059e-06,
"loss": 0.7044,
"step": 7875
},
{
"epoch": 0.32,
"learning_rate": 1.5537737102860015e-06,
"loss": 0.6841,
"step": 7880
},
{
"epoch": 0.32,
"learning_rate": 1.5532369341421609e-06,
"loss": 0.674,
"step": 7885
},
{
"epoch": 0.32,
"learning_rate": 1.5526999281885088e-06,
"loss": 0.6544,
"step": 7890
},
{
"epoch": 0.32,
"learning_rate": 1.552162692648113e-06,
"loss": 0.6951,
"step": 7895
},
{
"epoch": 0.32,
"learning_rate": 1.551625227744137e-06,
"loss": 0.6969,
"step": 7900
},
{
"epoch": 0.32,
"learning_rate": 1.5510875336998382e-06,
"loss": 0.6827,
"step": 7905
},
{
"epoch": 0.32,
"learning_rate": 1.5505496107385704e-06,
"loss": 0.6296,
"step": 7910
},
{
"epoch": 0.32,
"learning_rate": 1.550011459083782e-06,
"loss": 0.6884,
"step": 7915
},
{
"epoch": 0.32,
"learning_rate": 1.549473078959017e-06,
"loss": 0.6726,
"step": 7920
},
{
"epoch": 0.32,
"learning_rate": 1.548934470587913e-06,
"loss": 0.6732,
"step": 7925
},
{
"epoch": 0.32,
"learning_rate": 1.548395634194204e-06,
"loss": 0.6986,
"step": 7930
},
{
"epoch": 0.32,
"learning_rate": 1.5478565700017174e-06,
"loss": 0.7457,
"step": 7935
},
{
"epoch": 0.32,
"learning_rate": 1.547317278234376e-06,
"loss": 0.713,
"step": 7940
},
{
"epoch": 0.32,
"learning_rate": 1.5467777591161973e-06,
"loss": 0.6842,
"step": 7945
},
{
"epoch": 0.32,
"learning_rate": 1.5462380128712921e-06,
"loss": 0.7125,
"step": 7950
},
{
"epoch": 0.32,
"learning_rate": 1.545698039723867e-06,
"loss": 0.7007,
"step": 7955
},
{
"epoch": 0.32,
"learning_rate": 1.5451578398982216e-06,
"loss": 0.6896,
"step": 7960
},
{
"epoch": 0.32,
"learning_rate": 1.5446174136187503e-06,
"loss": 0.7371,
"step": 7965
},
{
"epoch": 0.32,
"learning_rate": 1.544076761109942e-06,
"loss": 0.6922,
"step": 7970
},
{
"epoch": 0.32,
"learning_rate": 1.5435358825963784e-06,
"loss": 0.7154,
"step": 7975
},
{
"epoch": 0.32,
"learning_rate": 1.542994778302736e-06,
"loss": 0.7194,
"step": 7980
},
{
"epoch": 0.32,
"learning_rate": 1.5424534484537847e-06,
"loss": 0.6813,
"step": 7985
},
{
"epoch": 0.32,
"learning_rate": 1.5419118932743883e-06,
"loss": 0.7203,
"step": 7990
},
{
"epoch": 0.32,
"learning_rate": 1.5413701129895045e-06,
"loss": 0.6675,
"step": 7995
},
{
"epoch": 0.32,
"learning_rate": 1.5408281078241835e-06,
"loss": 0.7152,
"step": 8000
},
{
"epoch": 0.32,
"eval_loss": 0.6539024114608765,
"eval_runtime": 138.566,
"eval_samples_per_second": 17.075,
"eval_steps_per_second": 2.851,
"step": 8000
},
{
"epoch": 0.33,
"learning_rate": 1.5402858780035697e-06,
"loss": 0.6859,
"step": 8005
},
{
"epoch": 0.33,
"learning_rate": 1.5397434237529012e-06,
"loss": 0.7315,
"step": 8010
},
{
"epoch": 0.33,
"learning_rate": 1.5392007452975077e-06,
"loss": 0.7081,
"step": 8015
},
{
"epoch": 0.33,
"learning_rate": 1.5386578428628142e-06,
"loss": 0.682,
"step": 8020
},
{
"epoch": 0.33,
"learning_rate": 1.5381147166743369e-06,
"loss": 0.6741,
"step": 8025
},
{
"epoch": 0.33,
"learning_rate": 1.5375713669576857e-06,
"loss": 0.6979,
"step": 8030
},
{
"epoch": 0.33,
"learning_rate": 1.5370277939385644e-06,
"loss": 0.6927,
"step": 8035
},
{
"epoch": 0.33,
"learning_rate": 1.536483997842767e-06,
"loss": 0.6856,
"step": 8040
},
{
"epoch": 0.33,
"learning_rate": 1.5359399788961826e-06,
"loss": 0.6683,
"step": 8045
},
{
"epoch": 0.33,
"learning_rate": 1.5353957373247917e-06,
"loss": 0.6646,
"step": 8050
},
{
"epoch": 0.33,
"learning_rate": 1.5348512733546674e-06,
"loss": 0.6684,
"step": 8055
},
{
"epoch": 0.33,
"learning_rate": 1.5343065872119759e-06,
"loss": 0.6741,
"step": 8060
},
{
"epoch": 0.33,
"learning_rate": 1.5337616791229744e-06,
"loss": 0.6936,
"step": 8065
},
{
"epoch": 0.33,
"learning_rate": 1.5332165493140133e-06,
"loss": 0.6646,
"step": 8070
},
{
"epoch": 0.33,
"learning_rate": 1.5326711980115343e-06,
"loss": 0.6936,
"step": 8075
},
{
"epoch": 0.33,
"learning_rate": 1.5321256254420724e-06,
"loss": 0.7009,
"step": 8080
},
{
"epoch": 0.33,
"learning_rate": 1.5315798318322532e-06,
"loss": 0.7183,
"step": 8085
},
{
"epoch": 0.33,
"learning_rate": 1.5310338174087946e-06,
"loss": 0.6853,
"step": 8090
},
{
"epoch": 0.33,
"learning_rate": 1.5304875823985066e-06,
"loss": 0.6851,
"step": 8095
},
{
"epoch": 0.33,
"learning_rate": 1.5299411270282898e-06,
"loss": 0.6607,
"step": 8100
},
{
"epoch": 0.33,
"learning_rate": 1.5293944515251376e-06,
"loss": 0.6715,
"step": 8105
},
{
"epoch": 0.33,
"learning_rate": 1.5288475561161342e-06,
"loss": 0.6525,
"step": 8110
},
{
"epoch": 0.33,
"learning_rate": 1.5283004410284549e-06,
"loss": 0.6958,
"step": 8115
},
{
"epoch": 0.33,
"learning_rate": 1.5277531064893669e-06,
"loss": 0.7071,
"step": 8120
},
{
"epoch": 0.33,
"learning_rate": 1.5272055527262278e-06,
"loss": 0.683,
"step": 8125
},
{
"epoch": 0.33,
"learning_rate": 1.526657779966487e-06,
"loss": 0.71,
"step": 8130
},
{
"epoch": 0.33,
"learning_rate": 1.5261097884376848e-06,
"loss": 0.683,
"step": 8135
},
{
"epoch": 0.33,
"learning_rate": 1.5255615783674512e-06,
"loss": 0.6911,
"step": 8140
},
{
"epoch": 0.33,
"learning_rate": 1.5250131499835088e-06,
"loss": 0.7089,
"step": 8145
},
{
"epoch": 0.33,
"learning_rate": 1.5244645035136694e-06,
"loss": 0.6528,
"step": 8150
},
{
"epoch": 0.33,
"learning_rate": 1.5239156391858363e-06,
"loss": 0.6626,
"step": 8155
},
{
"epoch": 0.33,
"learning_rate": 1.523366557228003e-06,
"loss": 0.6674,
"step": 8160
},
{
"epoch": 0.33,
"learning_rate": 1.5228172578682531e-06,
"loss": 0.6858,
"step": 8165
},
{
"epoch": 0.33,
"learning_rate": 1.5222677413347612e-06,
"loss": 0.6944,
"step": 8170
},
{
"epoch": 0.33,
"learning_rate": 1.521718007855791e-06,
"loss": 0.6623,
"step": 8175
},
{
"epoch": 0.33,
"learning_rate": 1.5211680576596976e-06,
"loss": 0.6764,
"step": 8180
},
{
"epoch": 0.33,
"learning_rate": 1.5206178909749254e-06,
"loss": 0.7022,
"step": 8185
},
{
"epoch": 0.33,
"learning_rate": 1.5200675080300086e-06,
"loss": 0.6437,
"step": 8190
},
{
"epoch": 0.33,
"learning_rate": 1.519516909053572e-06,
"loss": 0.6888,
"step": 8195
},
{
"epoch": 0.33,
"learning_rate": 1.518966094274329e-06,
"loss": 0.6837,
"step": 8200
},
{
"epoch": 0.33,
"learning_rate": 1.518415063921084e-06,
"loss": 0.7228,
"step": 8205
},
{
"epoch": 0.33,
"learning_rate": 1.5178638182227292e-06,
"loss": 0.6647,
"step": 8210
},
{
"epoch": 0.33,
"learning_rate": 1.5173123574082482e-06,
"loss": 0.6992,
"step": 8215
},
{
"epoch": 0.33,
"learning_rate": 1.5167606817067129e-06,
"loss": 0.6828,
"step": 8220
},
{
"epoch": 0.33,
"learning_rate": 1.5162087913472844e-06,
"loss": 0.7303,
"step": 8225
},
{
"epoch": 0.33,
"learning_rate": 1.5156566865592128e-06,
"loss": 0.6794,
"step": 8230
},
{
"epoch": 0.33,
"learning_rate": 1.5151043675718383e-06,
"loss": 0.6442,
"step": 8235
},
{
"epoch": 0.33,
"learning_rate": 1.5145518346145887e-06,
"loss": 0.6669,
"step": 8240
},
{
"epoch": 0.33,
"learning_rate": 1.5139990879169822e-06,
"loss": 0.6902,
"step": 8245
},
{
"epoch": 0.34,
"learning_rate": 1.513446127708624e-06,
"loss": 0.6503,
"step": 8250
},
{
"epoch": 0.34,
"learning_rate": 1.5128929542192102e-06,
"loss": 0.6971,
"step": 8255
},
{
"epoch": 0.34,
"learning_rate": 1.512339567678523e-06,
"loss": 0.7052,
"step": 8260
},
{
"epoch": 0.34,
"learning_rate": 1.511785968316435e-06,
"loss": 0.6673,
"step": 8265
},
{
"epoch": 0.34,
"learning_rate": 1.5112321563629066e-06,
"loss": 0.6816,
"step": 8270
},
{
"epoch": 0.34,
"learning_rate": 1.5106781320479862e-06,
"loss": 0.6708,
"step": 8275
},
{
"epoch": 0.34,
"learning_rate": 1.5101238956018109e-06,
"loss": 0.7032,
"step": 8280
},
{
"epoch": 0.34,
"learning_rate": 1.5095694472546058e-06,
"loss": 0.7008,
"step": 8285
},
{
"epoch": 0.34,
"learning_rate": 1.5090147872366835e-06,
"loss": 0.719,
"step": 8290
},
{
"epoch": 0.34,
"learning_rate": 1.5084599157784453e-06,
"loss": 0.7094,
"step": 8295
},
{
"epoch": 0.34,
"learning_rate": 1.5079048331103797e-06,
"loss": 0.7306,
"step": 8300
},
{
"epoch": 0.34,
"learning_rate": 1.5073495394630636e-06,
"loss": 0.7004,
"step": 8305
},
{
"epoch": 0.34,
"learning_rate": 1.5067940350671606e-06,
"loss": 0.7107,
"step": 8310
},
{
"epoch": 0.34,
"learning_rate": 1.506238320153423e-06,
"loss": 0.7033,
"step": 8315
},
{
"epoch": 0.34,
"learning_rate": 1.5056823949526898e-06,
"loss": 0.6855,
"step": 8320
},
{
"epoch": 0.34,
"learning_rate": 1.5051262596958872e-06,
"loss": 0.7119,
"step": 8325
},
{
"epoch": 0.34,
"learning_rate": 1.5045699146140289e-06,
"loss": 0.6853,
"step": 8330
},
{
"epoch": 0.34,
"learning_rate": 1.5040133599382162e-06,
"loss": 0.71,
"step": 8335
},
{
"epoch": 0.34,
"learning_rate": 1.503456595899637e-06,
"loss": 0.6527,
"step": 8340
},
{
"epoch": 0.34,
"learning_rate": 1.5028996227295664e-06,
"loss": 0.6692,
"step": 8345
},
{
"epoch": 0.34,
"learning_rate": 1.5023424406593654e-06,
"loss": 0.6568,
"step": 8350
},
{
"epoch": 0.34,
"learning_rate": 1.5017850499204835e-06,
"loss": 0.658,
"step": 8355
},
{
"epoch": 0.34,
"learning_rate": 1.501227450744455e-06,
"loss": 0.6895,
"step": 8360
},
{
"epoch": 0.34,
"learning_rate": 1.5006696433629032e-06,
"loss": 0.7062,
"step": 8365
},
{
"epoch": 0.34,
"learning_rate": 1.5001116280075353e-06,
"loss": 0.6883,
"step": 8370
},
{
"epoch": 0.34,
"learning_rate": 1.499553404910146e-06,
"loss": 0.666,
"step": 8375
},
{
"epoch": 0.34,
"learning_rate": 1.4989949743026169e-06,
"loss": 0.709,
"step": 8380
},
{
"epoch": 0.34,
"learning_rate": 1.4984363364169145e-06,
"loss": 0.7225,
"step": 8385
},
{
"epoch": 0.34,
"learning_rate": 1.4978774914850933e-06,
"loss": 0.6657,
"step": 8390
},
{
"epoch": 0.34,
"learning_rate": 1.4973184397392915e-06,
"loss": 0.6774,
"step": 8395
},
{
"epoch": 0.34,
"learning_rate": 1.4967591814117347e-06,
"loss": 0.6617,
"step": 8400
},
{
"epoch": 0.34,
"learning_rate": 1.496199716734734e-06,
"loss": 0.669,
"step": 8405
},
{
"epoch": 0.34,
"learning_rate": 1.4956400459406862e-06,
"loss": 0.6798,
"step": 8410
},
{
"epoch": 0.34,
"learning_rate": 1.4950801692620735e-06,
"loss": 0.6421,
"step": 8415
},
{
"epoch": 0.34,
"learning_rate": 1.494520086931464e-06,
"loss": 0.6742,
"step": 8420
},
{
"epoch": 0.34,
"learning_rate": 1.4939597991815107e-06,
"loss": 0.6478,
"step": 8425
},
{
"epoch": 0.34,
"learning_rate": 1.493399306244953e-06,
"loss": 0.6817,
"step": 8430
},
{
"epoch": 0.34,
"learning_rate": 1.492838608354614e-06,
"loss": 0.7469,
"step": 8435
},
{
"epoch": 0.34,
"learning_rate": 1.4922777057434031e-06,
"loss": 0.6713,
"step": 8440
},
{
"epoch": 0.34,
"learning_rate": 1.4917165986443142e-06,
"loss": 0.7093,
"step": 8445
},
{
"epoch": 0.34,
"learning_rate": 1.4911552872904266e-06,
"loss": 0.6825,
"step": 8450
},
{
"epoch": 0.34,
"learning_rate": 1.4905937719149035e-06,
"loss": 0.664,
"step": 8455
},
{
"epoch": 0.34,
"learning_rate": 1.4900320527509942e-06,
"loss": 0.6827,
"step": 8460
},
{
"epoch": 0.34,
"learning_rate": 1.489470130032032e-06,
"loss": 0.7093,
"step": 8465
},
{
"epoch": 0.34,
"learning_rate": 1.488908003991434e-06,
"loss": 0.7162,
"step": 8470
},
{
"epoch": 0.34,
"learning_rate": 1.4883456748627032e-06,
"loss": 0.6738,
"step": 8475
},
{
"epoch": 0.34,
"learning_rate": 1.4877831428794258e-06,
"loss": 0.6977,
"step": 8480
},
{
"epoch": 0.34,
"learning_rate": 1.4872204082752728e-06,
"loss": 0.7097,
"step": 8485
},
{
"epoch": 0.34,
"learning_rate": 1.4866574712839994e-06,
"loss": 0.6716,
"step": 8490
},
{
"epoch": 0.35,
"learning_rate": 1.4860943321394443e-06,
"loss": 0.6891,
"step": 8495
},
{
"epoch": 0.35,
"learning_rate": 1.4855309910755313e-06,
"loss": 0.6471,
"step": 8500
},
{
"epoch": 0.35,
"learning_rate": 1.4849674483262668e-06,
"loss": 0.6544,
"step": 8505
},
{
"epoch": 0.35,
"learning_rate": 1.4844037041257416e-06,
"loss": 0.6554,
"step": 8510
},
{
"epoch": 0.35,
"learning_rate": 1.4838397587081307e-06,
"loss": 0.694,
"step": 8515
},
{
"epoch": 0.35,
"learning_rate": 1.4832756123076912e-06,
"loss": 0.6488,
"step": 8520
},
{
"epoch": 0.35,
"learning_rate": 1.4827112651587656e-06,
"loss": 0.6885,
"step": 8525
},
{
"epoch": 0.35,
"learning_rate": 1.482146717495778e-06,
"loss": 0.6827,
"step": 8530
},
{
"epoch": 0.35,
"learning_rate": 1.481581969553237e-06,
"loss": 0.7017,
"step": 8535
},
{
"epoch": 0.35,
"learning_rate": 1.481017021565734e-06,
"loss": 0.681,
"step": 8540
},
{
"epoch": 0.35,
"learning_rate": 1.4804518737679432e-06,
"loss": 0.6228,
"step": 8545
},
{
"epoch": 0.35,
"learning_rate": 1.4798865263946223e-06,
"loss": 0.7208,
"step": 8550
},
{
"epoch": 0.35,
"learning_rate": 1.4793209796806117e-06,
"loss": 0.6627,
"step": 8555
},
{
"epoch": 0.35,
"learning_rate": 1.4787552338608341e-06,
"loss": 0.6698,
"step": 8560
},
{
"epoch": 0.35,
"learning_rate": 1.4781892891702965e-06,
"loss": 0.6631,
"step": 8565
},
{
"epoch": 0.35,
"learning_rate": 1.4776231458440862e-06,
"loss": 0.6991,
"step": 8570
},
{
"epoch": 0.35,
"learning_rate": 1.477056804117375e-06,
"loss": 0.65,
"step": 8575
},
{
"epoch": 0.35,
"learning_rate": 1.476490264225416e-06,
"loss": 0.6846,
"step": 8580
},
{
"epoch": 0.35,
"learning_rate": 1.475923526403545e-06,
"loss": 0.6916,
"step": 8585
},
{
"epoch": 0.35,
"learning_rate": 1.47535659088718e-06,
"loss": 0.654,
"step": 8590
},
{
"epoch": 0.35,
"learning_rate": 1.4747894579118208e-06,
"loss": 0.7077,
"step": 8595
},
{
"epoch": 0.35,
"learning_rate": 1.47422212771305e-06,
"loss": 0.6887,
"step": 8600
},
{
"epoch": 0.35,
"learning_rate": 1.4736546005265314e-06,
"loss": 0.6919,
"step": 8605
},
{
"epoch": 0.35,
"learning_rate": 1.4730868765880109e-06,
"loss": 0.6471,
"step": 8610
},
{
"epoch": 0.35,
"learning_rate": 1.4725189561333158e-06,
"loss": 0.706,
"step": 8615
},
{
"epoch": 0.35,
"learning_rate": 1.4719508393983555e-06,
"loss": 0.6916,
"step": 8620
},
{
"epoch": 0.35,
"learning_rate": 1.471382526619121e-06,
"loss": 0.6624,
"step": 8625
},
{
"epoch": 0.35,
"learning_rate": 1.4708140180316843e-06,
"loss": 0.6888,
"step": 8630
},
{
"epoch": 0.35,
"learning_rate": 1.470245313872199e-06,
"loss": 0.6796,
"step": 8635
},
{
"epoch": 0.35,
"learning_rate": 1.4696764143768997e-06,
"loss": 0.6776,
"step": 8640
},
{
"epoch": 0.35,
"learning_rate": 1.469107319782102e-06,
"loss": 0.7217,
"step": 8645
},
{
"epoch": 0.35,
"learning_rate": 1.4685380303242037e-06,
"loss": 0.6858,
"step": 8650
},
{
"epoch": 0.35,
"learning_rate": 1.4679685462396817e-06,
"loss": 0.6937,
"step": 8655
},
{
"epoch": 0.35,
"learning_rate": 1.467398867765096e-06,
"loss": 0.6747,
"step": 8660
},
{
"epoch": 0.35,
"learning_rate": 1.4668289951370848e-06,
"loss": 0.6699,
"step": 8665
},
{
"epoch": 0.35,
"learning_rate": 1.4662589285923686e-06,
"loss": 0.7111,
"step": 8670
},
{
"epoch": 0.35,
"learning_rate": 1.4656886683677486e-06,
"loss": 0.667,
"step": 8675
},
{
"epoch": 0.35,
"learning_rate": 1.4651182147001055e-06,
"loss": 0.6586,
"step": 8680
},
{
"epoch": 0.35,
"learning_rate": 1.464547567826401e-06,
"loss": 0.6996,
"step": 8685
},
{
"epoch": 0.35,
"learning_rate": 1.4639767279836766e-06,
"loss": 0.7181,
"step": 8690
},
{
"epoch": 0.35,
"learning_rate": 1.463405695409054e-06,
"loss": 0.6878,
"step": 8695
},
{
"epoch": 0.35,
"learning_rate": 1.462834470339736e-06,
"loss": 0.6247,
"step": 8700
},
{
"epoch": 0.35,
"learning_rate": 1.4622630530130037e-06,
"loss": 0.7024,
"step": 8705
},
{
"epoch": 0.35,
"learning_rate": 1.4616914436662195e-06,
"loss": 0.6748,
"step": 8710
},
{
"epoch": 0.35,
"learning_rate": 1.4611196425368247e-06,
"loss": 0.6877,
"step": 8715
},
{
"epoch": 0.35,
"learning_rate": 1.46054764986234e-06,
"loss": 0.663,
"step": 8720
},
{
"epoch": 0.35,
"learning_rate": 1.4599754658803671e-06,
"loss": 0.6649,
"step": 8725
},
{
"epoch": 0.35,
"learning_rate": 1.4594030908285858e-06,
"loss": 0.6701,
"step": 8730
},
{
"epoch": 0.35,
"learning_rate": 1.4588305249447557e-06,
"loss": 0.6684,
"step": 8735
},
{
"epoch": 0.36,
"learning_rate": 1.4582577684667156e-06,
"loss": 0.7104,
"step": 8740
},
{
"epoch": 0.36,
"learning_rate": 1.457684821632384e-06,
"loss": 0.7183,
"step": 8745
},
{
"epoch": 0.36,
"learning_rate": 1.457111684679757e-06,
"loss": 0.7016,
"step": 8750
},
{
"epoch": 0.36,
"learning_rate": 1.4565383578469119e-06,
"loss": 0.6959,
"step": 8755
},
{
"epoch": 0.36,
"learning_rate": 1.4559648413720033e-06,
"loss": 0.7299,
"step": 8760
},
{
"epoch": 0.36,
"learning_rate": 1.4553911354932646e-06,
"loss": 0.6539,
"step": 8765
},
{
"epoch": 0.36,
"learning_rate": 1.4548172404490089e-06,
"loss": 0.685,
"step": 8770
},
{
"epoch": 0.36,
"learning_rate": 1.4542431564776265e-06,
"loss": 0.7004,
"step": 8775
},
{
"epoch": 0.36,
"learning_rate": 1.453668883817587e-06,
"loss": 0.6742,
"step": 8780
},
{
"epoch": 0.36,
"learning_rate": 1.453094422707439e-06,
"loss": 0.6744,
"step": 8785
},
{
"epoch": 0.36,
"learning_rate": 1.4525197733858077e-06,
"loss": 0.6871,
"step": 8790
},
{
"epoch": 0.36,
"learning_rate": 1.451944936091398e-06,
"loss": 0.6567,
"step": 8795
},
{
"epoch": 0.36,
"learning_rate": 1.4513699110629921e-06,
"loss": 0.6838,
"step": 8800
},
{
"epoch": 0.36,
"eval_loss": 0.6491459012031555,
"eval_runtime": 138.8275,
"eval_samples_per_second": 17.043,
"eval_steps_per_second": 2.845,
"step": 8800
},
{
"epoch": 0.36,
"learning_rate": 1.45079469853945e-06,
"loss": 0.6431,
"step": 8805
},
{
"epoch": 0.36,
"learning_rate": 1.4502192987597113e-06,
"loss": 0.6606,
"step": 8810
},
{
"epoch": 0.36,
"learning_rate": 1.4496437119627905e-06,
"loss": 0.6917,
"step": 8815
},
{
"epoch": 0.36,
"learning_rate": 1.4490679383877825e-06,
"loss": 0.6826,
"step": 8820
},
{
"epoch": 0.36,
"learning_rate": 1.4484919782738581e-06,
"loss": 0.6904,
"step": 8825
},
{
"epoch": 0.36,
"learning_rate": 1.4479158318602658e-06,
"loss": 0.6781,
"step": 8830
},
{
"epoch": 0.36,
"learning_rate": 1.4473394993863325e-06,
"loss": 0.7099,
"step": 8835
},
{
"epoch": 0.36,
"learning_rate": 1.4467629810914615e-06,
"loss": 0.6993,
"step": 8840
},
{
"epoch": 0.36,
"learning_rate": 1.4461862772151333e-06,
"loss": 0.6606,
"step": 8845
},
{
"epoch": 0.36,
"learning_rate": 1.4456093879969057e-06,
"loss": 0.7129,
"step": 8850
},
{
"epoch": 0.36,
"learning_rate": 1.4450323136764136e-06,
"loss": 0.6789,
"step": 8855
},
{
"epoch": 0.36,
"learning_rate": 1.4444550544933684e-06,
"loss": 0.6828,
"step": 8860
},
{
"epoch": 0.36,
"learning_rate": 1.443877610687559e-06,
"loss": 0.701,
"step": 8865
},
{
"epoch": 0.36,
"learning_rate": 1.4432999824988503e-06,
"loss": 0.6962,
"step": 8870
},
{
"epoch": 0.36,
"learning_rate": 1.442722170167184e-06,
"loss": 0.6399,
"step": 8875
},
{
"epoch": 0.36,
"learning_rate": 1.4421441739325782e-06,
"loss": 0.6943,
"step": 8880
},
{
"epoch": 0.36,
"learning_rate": 1.4415659940351275e-06,
"loss": 0.6932,
"step": 8885
},
{
"epoch": 0.36,
"learning_rate": 1.440987630715003e-06,
"loss": 0.7096,
"step": 8890
},
{
"epoch": 0.36,
"learning_rate": 1.4404090842124519e-06,
"loss": 0.6652,
"step": 8895
},
{
"epoch": 0.36,
"learning_rate": 1.439830354767797e-06,
"loss": 0.6988,
"step": 8900
},
{
"epoch": 0.36,
"learning_rate": 1.4392514426214378e-06,
"loss": 0.6862,
"step": 8905
},
{
"epoch": 0.36,
"learning_rate": 1.4386723480138491e-06,
"loss": 0.7313,
"step": 8910
},
{
"epoch": 0.36,
"learning_rate": 1.438093071185582e-06,
"loss": 0.6876,
"step": 8915
},
{
"epoch": 0.36,
"learning_rate": 1.437513612377263e-06,
"loss": 0.664,
"step": 8920
},
{
"epoch": 0.36,
"learning_rate": 1.436933971829594e-06,
"loss": 0.6918,
"step": 8925
},
{
"epoch": 0.36,
"learning_rate": 1.4363541497833534e-06,
"loss": 0.7093,
"step": 8930
},
{
"epoch": 0.36,
"learning_rate": 1.4357741464793932e-06,
"loss": 0.7008,
"step": 8935
},
{
"epoch": 0.36,
"learning_rate": 1.4351939621586424e-06,
"loss": 0.6459,
"step": 8940
},
{
"epoch": 0.36,
"learning_rate": 1.4346135970621045e-06,
"loss": 0.7083,
"step": 8945
},
{
"epoch": 0.36,
"learning_rate": 1.4340330514308576e-06,
"loss": 0.6674,
"step": 8950
},
{
"epoch": 0.36,
"learning_rate": 1.4334523255060563e-06,
"loss": 0.6906,
"step": 8955
},
{
"epoch": 0.36,
"learning_rate": 1.432871419528928e-06,
"loss": 0.6976,
"step": 8960
},
{
"epoch": 0.36,
"learning_rate": 1.432290333740776e-06,
"loss": 0.6487,
"step": 8965
},
{
"epoch": 0.36,
"learning_rate": 1.4317090683829797e-06,
"loss": 0.6716,
"step": 8970
},
{
"epoch": 0.36,
"learning_rate": 1.43112762369699e-06,
"loss": 0.6578,
"step": 8975
},
{
"epoch": 0.36,
"learning_rate": 1.4305459999243353e-06,
"loss": 0.6437,
"step": 8980
},
{
"epoch": 0.36,
"learning_rate": 1.429964197306616e-06,
"loss": 0.6527,
"step": 8985
},
{
"epoch": 0.37,
"learning_rate": 1.4293822160855083e-06,
"loss": 0.7384,
"step": 8990
},
{
"epoch": 0.37,
"learning_rate": 1.4288000565027623e-06,
"loss": 0.6817,
"step": 8995
},
{
"epoch": 0.37,
"learning_rate": 1.4282177188002016e-06,
"loss": 0.6805,
"step": 9000
},
{
"epoch": 0.37,
"learning_rate": 1.427635203219725e-06,
"loss": 0.6908,
"step": 9005
},
{
"epoch": 0.37,
"learning_rate": 1.4270525100033036e-06,
"loss": 0.6745,
"step": 9010
},
{
"epoch": 0.37,
"learning_rate": 1.4264696393929832e-06,
"loss": 0.6801,
"step": 9015
},
{
"epoch": 0.37,
"learning_rate": 1.4258865916308834e-06,
"loss": 0.6856,
"step": 9020
},
{
"epoch": 0.37,
"learning_rate": 1.4253033669591971e-06,
"loss": 0.6676,
"step": 9025
},
{
"epoch": 0.37,
"learning_rate": 1.424719965620191e-06,
"loss": 0.6524,
"step": 9030
},
{
"epoch": 0.37,
"learning_rate": 1.4241363878562046e-06,
"loss": 0.6867,
"step": 9035
},
{
"epoch": 0.37,
"learning_rate": 1.4235526339096514e-06,
"loss": 0.6674,
"step": 9040
},
{
"epoch": 0.37,
"learning_rate": 1.422968704023017e-06,
"loss": 0.659,
"step": 9045
},
{
"epoch": 0.37,
"learning_rate": 1.4223845984388613e-06,
"loss": 0.6706,
"step": 9050
},
{
"epoch": 0.37,
"learning_rate": 1.421800317399817e-06,
"loss": 0.6867,
"step": 9055
},
{
"epoch": 0.37,
"learning_rate": 1.421215861148589e-06,
"loss": 0.7136,
"step": 9060
},
{
"epoch": 0.37,
"learning_rate": 1.420631229927955e-06,
"loss": 0.7003,
"step": 9065
},
{
"epoch": 0.37,
"learning_rate": 1.4200464239807664e-06,
"loss": 0.6764,
"step": 9070
},
{
"epoch": 0.37,
"learning_rate": 1.4194614435499458e-06,
"loss": 0.6954,
"step": 9075
},
{
"epoch": 0.37,
"learning_rate": 1.4188762888784897e-06,
"loss": 0.6641,
"step": 9080
},
{
"epoch": 0.37,
"learning_rate": 1.4182909602094662e-06,
"loss": 0.6822,
"step": 9085
},
{
"epoch": 0.37,
"learning_rate": 1.417705457786015e-06,
"loss": 0.7138,
"step": 9090
},
{
"epoch": 0.37,
"learning_rate": 1.4171197818513495e-06,
"loss": 0.7152,
"step": 9095
},
{
"epoch": 0.37,
"learning_rate": 1.4165339326487535e-06,
"loss": 0.6324,
"step": 9100
},
{
"epoch": 0.37,
"learning_rate": 1.4159479104215846e-06,
"loss": 0.6677,
"step": 9105
},
{
"epoch": 0.37,
"learning_rate": 1.4153617154132712e-06,
"loss": 0.6521,
"step": 9110
},
{
"epoch": 0.37,
"learning_rate": 1.414775347867313e-06,
"loss": 0.7191,
"step": 9115
},
{
"epoch": 0.37,
"learning_rate": 1.4141888080272825e-06,
"loss": 0.6545,
"step": 9120
},
{
"epoch": 0.37,
"learning_rate": 1.4136020961368228e-06,
"loss": 0.7033,
"step": 9125
},
{
"epoch": 0.37,
"learning_rate": 1.4130152124396497e-06,
"loss": 0.6917,
"step": 9130
},
{
"epoch": 0.37,
"learning_rate": 1.412428157179549e-06,
"loss": 0.7083,
"step": 9135
},
{
"epoch": 0.37,
"learning_rate": 1.4118409306003784e-06,
"loss": 0.6786,
"step": 9140
},
{
"epoch": 0.37,
"learning_rate": 1.4112535329460671e-06,
"loss": 0.677,
"step": 9145
},
{
"epoch": 0.37,
"learning_rate": 1.4106659644606139e-06,
"loss": 0.6903,
"step": 9150
},
{
"epoch": 0.37,
"learning_rate": 1.410078225388091e-06,
"loss": 0.659,
"step": 9155
},
{
"epoch": 0.37,
"learning_rate": 1.409490315972639e-06,
"loss": 0.7028,
"step": 9160
},
{
"epoch": 0.37,
"learning_rate": 1.4089022364584712e-06,
"loss": 0.6607,
"step": 9165
},
{
"epoch": 0.37,
"learning_rate": 1.4083139870898706e-06,
"loss": 0.6595,
"step": 9170
},
{
"epoch": 0.37,
"learning_rate": 1.4077255681111903e-06,
"loss": 0.6557,
"step": 9175
},
{
"epoch": 0.37,
"learning_rate": 1.4071369797668545e-06,
"loss": 0.659,
"step": 9180
},
{
"epoch": 0.37,
"learning_rate": 1.4065482223013585e-06,
"loss": 0.6879,
"step": 9185
},
{
"epoch": 0.37,
"learning_rate": 1.405959295959266e-06,
"loss": 0.6708,
"step": 9190
},
{
"epoch": 0.37,
"learning_rate": 1.4053702009852128e-06,
"loss": 0.6839,
"step": 9195
},
{
"epoch": 0.37,
"learning_rate": 1.4047809376239034e-06,
"loss": 0.6835,
"step": 9200
},
{
"epoch": 0.37,
"learning_rate": 1.4041915061201122e-06,
"loss": 0.7229,
"step": 9205
},
{
"epoch": 0.37,
"learning_rate": 1.4036019067186843e-06,
"loss": 0.6944,
"step": 9210
},
{
"epoch": 0.37,
"learning_rate": 1.4030121396645349e-06,
"loss": 0.6232,
"step": 9215
},
{
"epoch": 0.37,
"learning_rate": 1.402422205202647e-06,
"loss": 0.7058,
"step": 9220
},
{
"epoch": 0.37,
"learning_rate": 1.4018321035780747e-06,
"loss": 0.6815,
"step": 9225
},
{
"epoch": 0.37,
"learning_rate": 1.4012418350359414e-06,
"loss": 0.6449,
"step": 9230
},
{
"epoch": 0.38,
"learning_rate": 1.4006513998214386e-06,
"loss": 0.6611,
"step": 9235
},
{
"epoch": 0.38,
"learning_rate": 1.4000607981798292e-06,
"loss": 0.6489,
"step": 9240
},
{
"epoch": 0.38,
"learning_rate": 1.3994700303564431e-06,
"loss": 0.6866,
"step": 9245
},
{
"epoch": 0.38,
"learning_rate": 1.3988790965966801e-06,
"loss": 0.6553,
"step": 9250
},
{
"epoch": 0.38,
"learning_rate": 1.3982879971460096e-06,
"loss": 0.6936,
"step": 9255
},
{
"epoch": 0.38,
"learning_rate": 1.3976967322499683e-06,
"loss": 0.6741,
"step": 9260
},
{
"epoch": 0.38,
"learning_rate": 1.3971053021541633e-06,
"loss": 0.6491,
"step": 9265
},
{
"epoch": 0.38,
"learning_rate": 1.396513707104269e-06,
"loss": 0.6819,
"step": 9270
},
{
"epoch": 0.38,
"learning_rate": 1.395921947346029e-06,
"loss": 0.6472,
"step": 9275
},
{
"epoch": 0.38,
"learning_rate": 1.3953300231252554e-06,
"loss": 0.6588,
"step": 9280
},
{
"epoch": 0.38,
"learning_rate": 1.394737934687828e-06,
"loss": 0.6795,
"step": 9285
},
{
"epoch": 0.38,
"learning_rate": 1.3941456822796954e-06,
"loss": 0.6876,
"step": 9290
},
{
"epoch": 0.38,
"learning_rate": 1.393553266146874e-06,
"loss": 0.6439,
"step": 9295
},
{
"epoch": 0.38,
"learning_rate": 1.3929606865354484e-06,
"loss": 0.6678,
"step": 9300
},
{
"epoch": 0.38,
"learning_rate": 1.3923679436915711e-06,
"loss": 0.6646,
"step": 9305
},
{
"epoch": 0.38,
"learning_rate": 1.3917750378614619e-06,
"loss": 0.6721,
"step": 9310
},
{
"epoch": 0.38,
"learning_rate": 1.391181969291409e-06,
"loss": 0.6741,
"step": 9315
},
{
"epoch": 0.38,
"learning_rate": 1.3905887382277683e-06,
"loss": 0.6298,
"step": 9320
},
{
"epoch": 0.38,
"learning_rate": 1.3899953449169622e-06,
"loss": 0.6536,
"step": 9325
},
{
"epoch": 0.38,
"learning_rate": 1.3894017896054814e-06,
"loss": 0.7037,
"step": 9330
},
{
"epoch": 0.38,
"learning_rate": 1.3888080725398836e-06,
"loss": 0.6777,
"step": 9335
},
{
"epoch": 0.38,
"learning_rate": 1.3882141939667937e-06,
"loss": 0.6805,
"step": 9340
},
{
"epoch": 0.38,
"learning_rate": 1.3876201541329038e-06,
"loss": 0.666,
"step": 9345
},
{
"epoch": 0.38,
"learning_rate": 1.3870259532849726e-06,
"loss": 0.6619,
"step": 9350
},
{
"epoch": 0.38,
"learning_rate": 1.3864315916698264e-06,
"loss": 0.7109,
"step": 9355
},
{
"epoch": 0.38,
"learning_rate": 1.3858370695343573e-06,
"loss": 0.6914,
"step": 9360
},
{
"epoch": 0.38,
"learning_rate": 1.3852423871255252e-06,
"loss": 0.6847,
"step": 9365
},
{
"epoch": 0.38,
"learning_rate": 1.3846475446903555e-06,
"loss": 0.6819,
"step": 9370
},
{
"epoch": 0.38,
"learning_rate": 1.384052542475941e-06,
"loss": 0.7004,
"step": 9375
},
{
"epoch": 0.38,
"learning_rate": 1.3834573807294404e-06,
"loss": 0.7206,
"step": 9380
},
{
"epoch": 0.38,
"learning_rate": 1.382862059698078e-06,
"loss": 0.7003,
"step": 9385
},
{
"epoch": 0.38,
"learning_rate": 1.3822665796291459e-06,
"loss": 0.686,
"step": 9390
},
{
"epoch": 0.38,
"learning_rate": 1.3816709407700007e-06,
"loss": 0.682,
"step": 9395
},
{
"epoch": 0.38,
"learning_rate": 1.3810751433680662e-06,
"loss": 0.6414,
"step": 9400
},
{
"epoch": 0.38,
"learning_rate": 1.380479187670831e-06,
"loss": 0.6494,
"step": 9405
},
{
"epoch": 0.38,
"learning_rate": 1.37988307392585e-06,
"loss": 0.6997,
"step": 9410
},
{
"epoch": 0.38,
"learning_rate": 1.3792868023807441e-06,
"loss": 0.6835,
"step": 9415
},
{
"epoch": 0.38,
"learning_rate": 1.3786903732831982e-06,
"loss": 0.6819,
"step": 9420
},
{
"epoch": 0.38,
"learning_rate": 1.378093786880965e-06,
"loss": 0.6568,
"step": 9425
},
{
"epoch": 0.38,
"learning_rate": 1.3774970434218609e-06,
"loss": 0.6811,
"step": 9430
},
{
"epoch": 0.38,
"learning_rate": 1.3769001431537675e-06,
"loss": 0.6692,
"step": 9435
},
{
"epoch": 0.38,
"learning_rate": 1.3763030863246326e-06,
"loss": 0.6732,
"step": 9440
},
{
"epoch": 0.38,
"learning_rate": 1.3757058731824679e-06,
"loss": 0.6602,
"step": 9445
},
{
"epoch": 0.38,
"learning_rate": 1.3751085039753508e-06,
"loss": 0.6597,
"step": 9450
},
{
"epoch": 0.38,
"learning_rate": 1.3745109789514233e-06,
"loss": 0.6906,
"step": 9455
},
{
"epoch": 0.38,
"learning_rate": 1.3739132983588921e-06,
"loss": 0.6466,
"step": 9460
},
{
"epoch": 0.38,
"learning_rate": 1.3733154624460283e-06,
"loss": 0.6613,
"step": 9465
},
{
"epoch": 0.38,
"learning_rate": 1.372717471461168e-06,
"loss": 0.6821,
"step": 9470
},
{
"epoch": 0.38,
"learning_rate": 1.3721193256527116e-06,
"loss": 0.6539,
"step": 9475
},
{
"epoch": 0.39,
"learning_rate": 1.3715210252691232e-06,
"loss": 0.6994,
"step": 9480
},
{
"epoch": 0.39,
"learning_rate": 1.3709225705589318e-06,
"loss": 0.6556,
"step": 9485
},
{
"epoch": 0.39,
"learning_rate": 1.3703239617707306e-06,
"loss": 0.7155,
"step": 9490
},
{
"epoch": 0.39,
"learning_rate": 1.3697251991531756e-06,
"loss": 0.6621,
"step": 9495
},
{
"epoch": 0.39,
"learning_rate": 1.3691262829549883e-06,
"loss": 0.6771,
"step": 9500
},
{
"epoch": 0.39,
"learning_rate": 1.3685272134249535e-06,
"loss": 0.7038,
"step": 9505
},
{
"epoch": 0.39,
"learning_rate": 1.3679279908119187e-06,
"loss": 0.6903,
"step": 9510
},
{
"epoch": 0.39,
"learning_rate": 1.3673286153647967e-06,
"loss": 0.7236,
"step": 9515
},
{
"epoch": 0.39,
"learning_rate": 1.3667290873325618e-06,
"loss": 0.6958,
"step": 9520
},
{
"epoch": 0.39,
"learning_rate": 1.3661294069642537e-06,
"loss": 0.653,
"step": 9525
},
{
"epoch": 0.39,
"learning_rate": 1.3655295745089736e-06,
"loss": 0.6925,
"step": 9530
},
{
"epoch": 0.39,
"learning_rate": 1.3649295902158873e-06,
"loss": 0.6607,
"step": 9535
},
{
"epoch": 0.39,
"learning_rate": 1.3643294543342228e-06,
"loss": 0.7378,
"step": 9540
},
{
"epoch": 0.39,
"learning_rate": 1.363729167113271e-06,
"loss": 0.6789,
"step": 9545
},
{
"epoch": 0.39,
"learning_rate": 1.3631287288023866e-06,
"loss": 0.6862,
"step": 9550
},
{
"epoch": 0.39,
"learning_rate": 1.362528139650986e-06,
"loss": 0.7204,
"step": 9555
},
{
"epoch": 0.39,
"learning_rate": 1.361927399908549e-06,
"loss": 0.6474,
"step": 9560
},
{
"epoch": 0.39,
"learning_rate": 1.3613265098246176e-06,
"loss": 0.6564,
"step": 9565
},
{
"epoch": 0.39,
"learning_rate": 1.3607254696487962e-06,
"loss": 0.6762,
"step": 9570
},
{
"epoch": 0.39,
"learning_rate": 1.360124279630752e-06,
"loss": 0.6903,
"step": 9575
},
{
"epoch": 0.39,
"learning_rate": 1.3595229400202134e-06,
"loss": 0.6778,
"step": 9580
},
{
"epoch": 0.39,
"learning_rate": 1.3589214510669722e-06,
"loss": 0.7117,
"step": 9585
},
{
"epoch": 0.39,
"learning_rate": 1.3583198130208819e-06,
"loss": 0.6531,
"step": 9590
},
{
"epoch": 0.39,
"learning_rate": 1.3577180261318569e-06,
"loss": 0.7066,
"step": 9595
},
{
"epoch": 0.39,
"learning_rate": 1.357116090649875e-06,
"loss": 0.7132,
"step": 9600
},
{
"epoch": 0.39,
"eval_loss": 0.6456849575042725,
"eval_runtime": 138.6043,
"eval_samples_per_second": 17.07,
"eval_steps_per_second": 2.85,
"step": 9600
},
{
"epoch": 0.39,
"learning_rate": 1.3565140068249746e-06,
"loss": 0.6633,
"step": 9605
},
{
"epoch": 0.39,
"learning_rate": 1.3559117749072559e-06,
"loss": 0.6652,
"step": 9610
},
{
"epoch": 0.39,
"learning_rate": 1.3553093951468817e-06,
"loss": 0.6863,
"step": 9615
},
{
"epoch": 0.39,
"learning_rate": 1.354706867794074e-06,
"loss": 0.6849,
"step": 9620
},
{
"epoch": 0.39,
"learning_rate": 1.3541041930991187e-06,
"loss": 0.6647,
"step": 9625
},
{
"epoch": 0.39,
"learning_rate": 1.3535013713123606e-06,
"loss": 0.6668,
"step": 9630
},
{
"epoch": 0.39,
"learning_rate": 1.352898402684207e-06,
"loss": 0.6574,
"step": 9635
},
{
"epoch": 0.39,
"learning_rate": 1.3522952874651262e-06,
"loss": 0.6735,
"step": 9640
},
{
"epoch": 0.39,
"learning_rate": 1.3516920259056467e-06,
"loss": 0.688,
"step": 9645
},
{
"epoch": 0.39,
"learning_rate": 1.351088618256358e-06,
"loss": 0.6553,
"step": 9650
},
{
"epoch": 0.39,
"learning_rate": 1.3504850647679104e-06,
"loss": 0.6818,
"step": 9655
},
{
"epoch": 0.39,
"learning_rate": 1.349881365691015e-06,
"loss": 0.6596,
"step": 9660
},
{
"epoch": 0.39,
"learning_rate": 1.349277521276443e-06,
"loss": 0.6655,
"step": 9665
},
{
"epoch": 0.39,
"learning_rate": 1.3486735317750257e-06,
"loss": 0.6465,
"step": 9670
},
{
"epoch": 0.39,
"learning_rate": 1.3480693974376561e-06,
"loss": 0.6647,
"step": 9675
},
{
"epoch": 0.39,
"learning_rate": 1.3474651185152854e-06,
"loss": 0.6955,
"step": 9680
},
{
"epoch": 0.39,
"learning_rate": 1.3468606952589265e-06,
"loss": 0.6851,
"step": 9685
},
{
"epoch": 0.39,
"learning_rate": 1.346256127919651e-06,
"loss": 0.6865,
"step": 9690
},
{
"epoch": 0.39,
"learning_rate": 1.345651416748591e-06,
"loss": 0.6518,
"step": 9695
},
{
"epoch": 0.39,
"learning_rate": 1.3450465619969386e-06,
"loss": 0.67,
"step": 9700
},
{
"epoch": 0.39,
"learning_rate": 1.344441563915945e-06,
"loss": 0.7331,
"step": 9705
},
{
"epoch": 0.39,
"learning_rate": 1.3438364227569215e-06,
"loss": 0.7002,
"step": 9710
},
{
"epoch": 0.39,
"learning_rate": 1.3432311387712377e-06,
"loss": 0.6777,
"step": 9715
},
{
"epoch": 0.39,
"learning_rate": 1.3426257122103237e-06,
"loss": 0.7075,
"step": 9720
},
{
"epoch": 0.4,
"learning_rate": 1.3420201433256689e-06,
"loss": 0.707,
"step": 9725
},
{
"epoch": 0.4,
"learning_rate": 1.3414144323688204e-06,
"loss": 0.67,
"step": 9730
},
{
"epoch": 0.4,
"learning_rate": 1.3408085795913862e-06,
"loss": 0.6853,
"step": 9735
},
{
"epoch": 0.4,
"learning_rate": 1.3402025852450317e-06,
"loss": 0.6859,
"step": 9740
},
{
"epoch": 0.4,
"learning_rate": 1.3395964495814815e-06,
"loss": 0.7109,
"step": 9745
},
{
"epoch": 0.4,
"learning_rate": 1.33899017285252e-06,
"loss": 0.7126,
"step": 9750
},
{
"epoch": 0.4,
"learning_rate": 1.3383837553099882e-06,
"loss": 0.6782,
"step": 9755
},
{
"epoch": 0.4,
"learning_rate": 1.3377771972057876e-06,
"loss": 0.6577,
"step": 9760
},
{
"epoch": 0.4,
"learning_rate": 1.3371704987918763e-06,
"loss": 0.6859,
"step": 9765
},
{
"epoch": 0.4,
"learning_rate": 1.3365636603202724e-06,
"loss": 0.705,
"step": 9770
},
{
"epoch": 0.4,
"learning_rate": 1.3359566820430509e-06,
"loss": 0.6359,
"step": 9775
},
{
"epoch": 0.4,
"learning_rate": 1.335349564212345e-06,
"loss": 0.6926,
"step": 9780
},
{
"epoch": 0.4,
"learning_rate": 1.3347423070803465e-06,
"loss": 0.6667,
"step": 9785
},
{
"epoch": 0.4,
"learning_rate": 1.3341349108993047e-06,
"loss": 0.7033,
"step": 9790
},
{
"epoch": 0.4,
"learning_rate": 1.3335273759215269e-06,
"loss": 0.6923,
"step": 9795
},
{
"epoch": 0.4,
"learning_rate": 1.3329197023993774e-06,
"loss": 0.706,
"step": 9800
},
{
"epoch": 0.4,
"learning_rate": 1.3323118905852789e-06,
"loss": 0.6059,
"step": 9805
},
{
"epoch": 0.4,
"learning_rate": 1.331703940731711e-06,
"loss": 0.7087,
"step": 9810
},
{
"epoch": 0.4,
"learning_rate": 1.3310958530912106e-06,
"loss": 0.7124,
"step": 9815
},
{
"epoch": 0.4,
"learning_rate": 1.3304876279163725e-06,
"loss": 0.6984,
"step": 9820
},
{
"epoch": 0.4,
"learning_rate": 1.3298792654598474e-06,
"loss": 0.6993,
"step": 9825
},
{
"epoch": 0.4,
"learning_rate": 1.3292707659743442e-06,
"loss": 0.7156,
"step": 9830
},
{
"epoch": 0.4,
"learning_rate": 1.3286621297126285e-06,
"loss": 0.7062,
"step": 9835
},
{
"epoch": 0.4,
"learning_rate": 1.3280533569275223e-06,
"loss": 0.6437,
"step": 9840
},
{
"epoch": 0.4,
"learning_rate": 1.3274444478719048e-06,
"loss": 0.6651,
"step": 9845
},
{
"epoch": 0.4,
"learning_rate": 1.3268354027987108e-06,
"loss": 0.6647,
"step": 9850
},
{
"epoch": 0.4,
"learning_rate": 1.3262262219609329e-06,
"loss": 0.6911,
"step": 9855
},
{
"epoch": 0.4,
"learning_rate": 1.3256169056116195e-06,
"loss": 0.6931,
"step": 9860
},
{
"epoch": 0.4,
"learning_rate": 1.3250074540038751e-06,
"loss": 0.6979,
"step": 9865
},
{
"epoch": 0.4,
"learning_rate": 1.324397867390861e-06,
"loss": 0.6642,
"step": 9870
},
{
"epoch": 0.4,
"learning_rate": 1.3237881460257933e-06,
"loss": 0.6457,
"step": 9875
},
{
"epoch": 0.4,
"learning_rate": 1.3231782901619458e-06,
"loss": 0.6684,
"step": 9880
},
{
"epoch": 0.4,
"learning_rate": 1.3225683000526474e-06,
"loss": 0.6744,
"step": 9885
},
{
"epoch": 0.4,
"learning_rate": 1.321958175951282e-06,
"loss": 0.6504,
"step": 9890
},
{
"epoch": 0.4,
"learning_rate": 1.3213479181112903e-06,
"loss": 0.7047,
"step": 9895
},
{
"epoch": 0.4,
"learning_rate": 1.320737526786168e-06,
"loss": 0.6802,
"step": 9900
},
{
"epoch": 0.4,
"learning_rate": 1.320127002229466e-06,
"loss": 0.6668,
"step": 9905
},
{
"epoch": 0.4,
"learning_rate": 1.319516344694792e-06,
"loss": 0.6759,
"step": 9910
},
{
"epoch": 0.4,
"learning_rate": 1.3189055544358065e-06,
"loss": 0.6614,
"step": 9915
},
{
"epoch": 0.4,
"learning_rate": 1.3182946317062271e-06,
"loss": 0.6613,
"step": 9920
},
{
"epoch": 0.4,
"learning_rate": 1.3176835767598259e-06,
"loss": 0.7015,
"step": 9925
},
{
"epoch": 0.4,
"learning_rate": 1.3170723898504293e-06,
"loss": 0.6607,
"step": 9930
},
{
"epoch": 0.4,
"learning_rate": 1.3164610712319197e-06,
"loss": 0.7008,
"step": 9935
},
{
"epoch": 0.4,
"learning_rate": 1.3158496211582328e-06,
"loss": 0.6727,
"step": 9940
},
{
"epoch": 0.4,
"learning_rate": 1.3152380398833604e-06,
"loss": 0.6481,
"step": 9945
},
{
"epoch": 0.4,
"learning_rate": 1.3146263276613474e-06,
"loss": 0.6803,
"step": 9950
},
{
"epoch": 0.4,
"learning_rate": 1.3140144847462943e-06,
"loss": 0.6999,
"step": 9955
},
{
"epoch": 0.4,
"learning_rate": 1.313402511392355e-06,
"loss": 0.7099,
"step": 9960
},
{
"epoch": 0.4,
"learning_rate": 1.3127904078537378e-06,
"loss": 0.6695,
"step": 9965
},
{
"epoch": 0.4,
"learning_rate": 1.3121781743847054e-06,
"loss": 0.6898,
"step": 9970
},
{
"epoch": 0.41,
"learning_rate": 1.311565811239574e-06,
"loss": 0.6724,
"step": 9975
},
{
"epoch": 0.41,
"learning_rate": 1.3109533186727145e-06,
"loss": 0.6905,
"step": 9980
},
{
"epoch": 0.41,
"learning_rate": 1.3103406969385502e-06,
"loss": 0.6442,
"step": 9985
},
{
"epoch": 0.41,
"learning_rate": 1.3097279462915594e-06,
"loss": 0.6735,
"step": 9990
},
{
"epoch": 0.41,
"learning_rate": 1.3091150669862734e-06,
"loss": 0.6962,
"step": 9995
},
{
"epoch": 0.41,
"learning_rate": 1.3085020592772767e-06,
"loss": 0.7271,
"step": 10000
},
{
"epoch": 0.41,
"learning_rate": 1.3078889234192074e-06,
"loss": 0.7095,
"step": 10005
},
{
"epoch": 0.41,
"learning_rate": 1.3072756596667569e-06,
"loss": 0.6467,
"step": 10010
},
{
"epoch": 0.41,
"learning_rate": 1.3066622682746693e-06,
"loss": 0.6728,
"step": 10015
},
{
"epoch": 0.41,
"learning_rate": 1.3060487494977426e-06,
"loss": 0.6724,
"step": 10020
},
{
"epoch": 0.41,
"learning_rate": 1.3054351035908269e-06,
"loss": 0.6336,
"step": 10025
},
{
"epoch": 0.41,
"learning_rate": 1.3048213308088253e-06,
"loss": 0.677,
"step": 10030
},
{
"epoch": 0.41,
"learning_rate": 1.3042074314066937e-06,
"loss": 0.7052,
"step": 10035
},
{
"epoch": 0.41,
"learning_rate": 1.3035934056394404e-06,
"loss": 0.6877,
"step": 10040
},
{
"epoch": 0.41,
"learning_rate": 1.3029792537621269e-06,
"loss": 0.6824,
"step": 10045
},
{
"epoch": 0.41,
"learning_rate": 1.302364976029866e-06,
"loss": 0.6759,
"step": 10050
},
{
"epoch": 0.41,
"learning_rate": 1.3017505726978239e-06,
"loss": 0.7549,
"step": 10055
},
{
"epoch": 0.41,
"learning_rate": 1.3011360440212178e-06,
"loss": 0.686,
"step": 10060
},
{
"epoch": 0.41,
"learning_rate": 1.3005213902553177e-06,
"loss": 0.6835,
"step": 10065
},
{
"epoch": 0.41,
"learning_rate": 1.2999066116554457e-06,
"loss": 0.7057,
"step": 10070
},
{
"epoch": 0.41,
"learning_rate": 1.2992917084769755e-06,
"loss": 0.6839,
"step": 10075
},
{
"epoch": 0.41,
"learning_rate": 1.2986766809753322e-06,
"loss": 0.676,
"step": 10080
},
{
"epoch": 0.41,
"learning_rate": 1.298061529405993e-06,
"loss": 0.7068,
"step": 10085
},
{
"epoch": 0.41,
"learning_rate": 1.2974462540244868e-06,
"loss": 0.7212,
"step": 10090
},
{
"epoch": 0.41,
"learning_rate": 1.2968308550863932e-06,
"loss": 0.6873,
"step": 10095
},
{
"epoch": 0.41,
"learning_rate": 1.2962153328473435e-06,
"loss": 0.6773,
"step": 10100
},
{
"epoch": 0.41,
"learning_rate": 1.295599687563021e-06,
"loss": 0.6805,
"step": 10105
},
{
"epoch": 0.41,
"learning_rate": 1.2949839194891587e-06,
"loss": 0.6523,
"step": 10110
},
{
"epoch": 0.41,
"learning_rate": 1.2943680288815418e-06,
"loss": 0.6765,
"step": 10115
},
{
"epoch": 0.41,
"learning_rate": 1.2937520159960054e-06,
"loss": 0.6897,
"step": 10120
},
{
"epoch": 0.41,
"learning_rate": 1.2931358810884362e-06,
"loss": 0.6637,
"step": 10125
},
{
"epoch": 0.41,
"learning_rate": 1.2925196244147712e-06,
"loss": 0.6691,
"step": 10130
},
{
"epoch": 0.41,
"learning_rate": 1.291903246230998e-06,
"loss": 0.6603,
"step": 10135
},
{
"epoch": 0.41,
"learning_rate": 1.2912867467931551e-06,
"loss": 0.7122,
"step": 10140
},
{
"epoch": 0.41,
"learning_rate": 1.2906701263573306e-06,
"loss": 0.6517,
"step": 10145
},
{
"epoch": 0.41,
"learning_rate": 1.2900533851796632e-06,
"loss": 0.7098,
"step": 10150
},
{
"epoch": 0.41,
"learning_rate": 1.2894365235163425e-06,
"loss": 0.6925,
"step": 10155
},
{
"epoch": 0.41,
"learning_rate": 1.2888195416236065e-06,
"loss": 0.6564,
"step": 10160
},
{
"epoch": 0.41,
"learning_rate": 1.288202439757745e-06,
"loss": 0.6779,
"step": 10165
},
{
"epoch": 0.41,
"learning_rate": 1.2875852181750964e-06,
"loss": 0.7095,
"step": 10170
},
{
"epoch": 0.41,
"learning_rate": 1.2869678771320484e-06,
"loss": 0.6668,
"step": 10175
},
{
"epoch": 0.41,
"learning_rate": 1.2863504168850406e-06,
"loss": 0.6538,
"step": 10180
},
{
"epoch": 0.41,
"learning_rate": 1.2857328376905593e-06,
"loss": 0.663,
"step": 10185
},
{
"epoch": 0.41,
"learning_rate": 1.2851151398051427e-06,
"loss": 0.648,
"step": 10190
},
{
"epoch": 0.41,
"learning_rate": 1.2844973234853762e-06,
"loss": 0.6934,
"step": 10195
},
{
"epoch": 0.41,
"learning_rate": 1.2838793889878955e-06,
"loss": 0.6829,
"step": 10200
},
{
"epoch": 0.41,
"learning_rate": 1.2832613365693857e-06,
"loss": 0.6484,
"step": 10205
},
{
"epoch": 0.41,
"learning_rate": 1.2826431664865795e-06,
"loss": 0.6949,
"step": 10210
},
{
"epoch": 0.41,
"learning_rate": 1.2820248789962605e-06,
"loss": 0.6975,
"step": 10215
},
{
"epoch": 0.42,
"learning_rate": 1.281406474355259e-06,
"loss": 0.6907,
"step": 10220
},
{
"epoch": 0.42,
"learning_rate": 1.2807879528204554e-06,
"loss": 0.6599,
"step": 10225
},
{
"epoch": 0.42,
"learning_rate": 1.280169314648778e-06,
"loss": 0.6655,
"step": 10230
},
{
"epoch": 0.42,
"learning_rate": 1.2795505600972036e-06,
"loss": 0.6827,
"step": 10235
},
{
"epoch": 0.42,
"learning_rate": 1.2789316894227582e-06,
"loss": 0.6679,
"step": 10240
},
{
"epoch": 0.42,
"learning_rate": 1.2783127028825143e-06,
"loss": 0.6862,
"step": 10245
},
{
"epoch": 0.42,
"learning_rate": 1.2776936007335942e-06,
"loss": 0.6659,
"step": 10250
},
{
"epoch": 0.42,
"learning_rate": 1.2770743832331667e-06,
"loss": 0.7268,
"step": 10255
},
{
"epoch": 0.42,
"learning_rate": 1.2764550506384504e-06,
"loss": 0.6597,
"step": 10260
},
{
"epoch": 0.42,
"learning_rate": 1.2758356032067103e-06,
"loss": 0.6416,
"step": 10265
},
{
"epoch": 0.42,
"learning_rate": 1.2752160411952592e-06,
"loss": 0.7008,
"step": 10270
},
{
"epoch": 0.42,
"learning_rate": 1.2745963648614583e-06,
"loss": 0.6704,
"step": 10275
},
{
"epoch": 0.42,
"learning_rate": 1.2739765744627154e-06,
"loss": 0.6444,
"step": 10280
},
{
"epoch": 0.42,
"learning_rate": 1.2733566702564855e-06,
"loss": 0.6594,
"step": 10285
},
{
"epoch": 0.42,
"learning_rate": 1.2727366525002728e-06,
"loss": 0.6597,
"step": 10290
},
{
"epoch": 0.42,
"learning_rate": 1.2721165214516259e-06,
"loss": 0.6624,
"step": 10295
},
{
"epoch": 0.42,
"learning_rate": 1.2714962773681427e-06,
"loss": 0.6726,
"step": 10300
},
{
"epoch": 0.42,
"learning_rate": 1.270875920507467e-06,
"loss": 0.6924,
"step": 10305
},
{
"epoch": 0.42,
"learning_rate": 1.2702554511272894e-06,
"loss": 0.631,
"step": 10310
},
{
"epoch": 0.42,
"learning_rate": 1.2696348694853476e-06,
"loss": 0.6702,
"step": 10315
},
{
"epoch": 0.42,
"learning_rate": 1.2690141758394259e-06,
"loss": 0.6808,
"step": 10320
},
{
"epoch": 0.42,
"learning_rate": 1.2683933704473552e-06,
"loss": 0.7162,
"step": 10325
},
{
"epoch": 0.42,
"learning_rate": 1.2677724535670126e-06,
"loss": 0.6696,
"step": 10330
},
{
"epoch": 0.42,
"learning_rate": 1.267151425456321e-06,
"loss": 0.6856,
"step": 10335
},
{
"epoch": 0.42,
"learning_rate": 1.2665302863732508e-06,
"loss": 0.6957,
"step": 10340
},
{
"epoch": 0.42,
"learning_rate": 1.2659090365758174e-06,
"loss": 0.6934,
"step": 10345
},
{
"epoch": 0.42,
"learning_rate": 1.2652876763220828e-06,
"loss": 0.6666,
"step": 10350
},
{
"epoch": 0.42,
"learning_rate": 1.2646662058701547e-06,
"loss": 0.7049,
"step": 10355
},
{
"epoch": 0.42,
"learning_rate": 1.2640446254781855e-06,
"loss": 0.685,
"step": 10360
},
{
"epoch": 0.42,
"learning_rate": 1.263422935404376e-06,
"loss": 0.6696,
"step": 10365
},
{
"epoch": 0.42,
"learning_rate": 1.2628011359069694e-06,
"loss": 0.6905,
"step": 10370
},
{
"epoch": 0.42,
"learning_rate": 1.2621792272442569e-06,
"loss": 0.7034,
"step": 10375
},
{
"epoch": 0.42,
"learning_rate": 1.2615572096745732e-06,
"loss": 0.6746,
"step": 10380
},
{
"epoch": 0.42,
"learning_rate": 1.2609350834562992e-06,
"loss": 0.6676,
"step": 10385
},
{
"epoch": 0.42,
"learning_rate": 1.2603128488478605e-06,
"loss": 0.6716,
"step": 10390
},
{
"epoch": 0.42,
"learning_rate": 1.2596905061077283e-06,
"loss": 0.6554,
"step": 10395
},
{
"epoch": 0.42,
"learning_rate": 1.2590680554944184e-06,
"loss": 0.6564,
"step": 10400
},
{
"epoch": 0.42,
"eval_loss": 0.6432496309280396,
"eval_runtime": 144.6412,
"eval_samples_per_second": 16.358,
"eval_steps_per_second": 2.731,
"step": 10400
},
{
"epoch": 0.42,
"learning_rate": 1.2584454972664912e-06,
"loss": 0.6727,
"step": 10405
},
{
"epoch": 0.42,
"learning_rate": 1.257822831682552e-06,
"loss": 0.6545,
"step": 10410
},
{
"epoch": 0.42,
"learning_rate": 1.2572000590012505e-06,
"loss": 0.6794,
"step": 10415
},
{
"epoch": 0.42,
"learning_rate": 1.2565771794812812e-06,
"loss": 0.682,
"step": 10420
},
{
"epoch": 0.42,
"learning_rate": 1.255954193381383e-06,
"loss": 0.6566,
"step": 10425
},
{
"epoch": 0.42,
"learning_rate": 1.2553311009603389e-06,
"loss": 0.6396,
"step": 10430
},
{
"epoch": 0.42,
"learning_rate": 1.2547079024769756e-06,
"loss": 0.6685,
"step": 10435
},
{
"epoch": 0.42,
"learning_rate": 1.2540845981901648e-06,
"loss": 0.6409,
"step": 10440
},
{
"epoch": 0.42,
"learning_rate": 1.2534611883588213e-06,
"loss": 0.6928,
"step": 10445
},
{
"epoch": 0.42,
"learning_rate": 1.2528376732419048e-06,
"loss": 0.7162,
"step": 10450
},
{
"epoch": 0.42,
"learning_rate": 1.2522140530984173e-06,
"loss": 0.6575,
"step": 10455
},
{
"epoch": 0.42,
"learning_rate": 1.2515903281874057e-06,
"loss": 0.7134,
"step": 10460
},
{
"epoch": 0.43,
"learning_rate": 1.2509664987679599e-06,
"loss": 0.6521,
"step": 10465
},
{
"epoch": 0.43,
"learning_rate": 1.2503425650992124e-06,
"loss": 0.6655,
"step": 10470
},
{
"epoch": 0.43,
"learning_rate": 1.2497185274403407e-06,
"loss": 0.6606,
"step": 10475
},
{
"epoch": 0.43,
"learning_rate": 1.2490943860505645e-06,
"loss": 0.6419,
"step": 10480
},
{
"epoch": 0.43,
"learning_rate": 1.2484701411891465e-06,
"loss": 0.6822,
"step": 10485
},
{
"epoch": 0.43,
"learning_rate": 1.2478457931153927e-06,
"loss": 0.6705,
"step": 10490
},
{
"epoch": 0.43,
"learning_rate": 1.2472213420886518e-06,
"loss": 0.638,
"step": 10495
},
{
"epoch": 0.43,
"learning_rate": 1.2465967883683152e-06,
"loss": 0.6684,
"step": 10500
},
{
"epoch": 0.43,
"learning_rate": 1.2459721322138173e-06,
"loss": 0.668,
"step": 10505
},
{
"epoch": 0.43,
"learning_rate": 1.245347373884635e-06,
"loss": 0.6665,
"step": 10510
},
{
"epoch": 0.43,
"learning_rate": 1.244722513640287e-06,
"loss": 0.6971,
"step": 10515
},
{
"epoch": 0.43,
"learning_rate": 1.2440975517403352e-06,
"loss": 0.6807,
"step": 10520
},
{
"epoch": 0.43,
"learning_rate": 1.243472488444383e-06,
"loss": 0.6471,
"step": 10525
},
{
"epoch": 0.43,
"learning_rate": 1.2428473240120763e-06,
"loss": 0.658,
"step": 10530
},
{
"epoch": 0.43,
"learning_rate": 1.2422220587031033e-06,
"loss": 0.7214,
"step": 10535
},
{
"epoch": 0.43,
"learning_rate": 1.2415966927771938e-06,
"loss": 0.6445,
"step": 10540
},
{
"epoch": 0.43,
"learning_rate": 1.2409712264941189e-06,
"loss": 0.6899,
"step": 10545
},
{
"epoch": 0.43,
"learning_rate": 1.240345660113692e-06,
"loss": 0.7066,
"step": 10550
},
{
"epoch": 0.43,
"learning_rate": 1.2397199938957678e-06,
"loss": 0.6361,
"step": 10555
},
{
"epoch": 0.43,
"learning_rate": 1.2390942281002435e-06,
"loss": 0.6871,
"step": 10560
},
{
"epoch": 0.43,
"learning_rate": 1.238468362987056e-06,
"loss": 0.6544,
"step": 10565
},
{
"epoch": 0.43,
"learning_rate": 1.237842398816184e-06,
"loss": 0.6198,
"step": 10570
},
{
"epoch": 0.43,
"learning_rate": 1.237216335847648e-06,
"loss": 0.6907,
"step": 10575
},
{
"epoch": 0.43,
"learning_rate": 1.236590174341509e-06,
"loss": 0.6411,
"step": 10580
},
{
"epoch": 0.43,
"learning_rate": 1.2359639145578693e-06,
"loss": 0.6724,
"step": 10585
},
{
"epoch": 0.43,
"learning_rate": 1.2353375567568715e-06,
"loss": 0.6606,
"step": 10590
},
{
"epoch": 0.43,
"learning_rate": 1.234711101198699e-06,
"loss": 0.6745,
"step": 10595
},
{
"epoch": 0.43,
"learning_rate": 1.2340845481435764e-06,
"loss": 0.6417,
"step": 10600
},
{
"epoch": 0.43,
"learning_rate": 1.233457897851768e-06,
"loss": 0.6824,
"step": 10605
},
{
"epoch": 0.43,
"learning_rate": 1.2328311505835794e-06,
"loss": 0.6726,
"step": 10610
},
{
"epoch": 0.43,
"learning_rate": 1.2322043065993554e-06,
"loss": 0.692,
"step": 10615
},
{
"epoch": 0.43,
"learning_rate": 1.2315773661594817e-06,
"loss": 0.7019,
"step": 10620
},
{
"epoch": 0.43,
"learning_rate": 1.2309503295243844e-06,
"loss": 0.6775,
"step": 10625
},
{
"epoch": 0.43,
"learning_rate": 1.230323196954528e-06,
"loss": 0.6774,
"step": 10630
},
{
"epoch": 0.43,
"learning_rate": 1.229695968710419e-06,
"loss": 0.7227,
"step": 10635
},
{
"epoch": 0.43,
"learning_rate": 1.2290686450526021e-06,
"loss": 0.6676,
"step": 10640
},
{
"epoch": 0.43,
"learning_rate": 1.2284412262416621e-06,
"loss": 0.6781,
"step": 10645
},
{
"epoch": 0.43,
"learning_rate": 1.2278137125382235e-06,
"loss": 0.666,
"step": 10650
},
{
"epoch": 0.43,
"learning_rate": 1.2271861042029496e-06,
"loss": 0.6926,
"step": 10655
},
{
"epoch": 0.43,
"learning_rate": 1.2265584014965439e-06,
"loss": 0.6458,
"step": 10660
},
{
"epoch": 0.43,
"learning_rate": 1.2259306046797486e-06,
"loss": 0.6518,
"step": 10665
},
{
"epoch": 0.43,
"learning_rate": 1.2253027140133444e-06,
"loss": 0.6911,
"step": 10670
},
{
"epoch": 0.43,
"learning_rate": 1.2246747297581527e-06,
"loss": 0.6738,
"step": 10675
},
{
"epoch": 0.43,
"learning_rate": 1.2240466521750318e-06,
"loss": 0.6601,
"step": 10680
},
{
"epoch": 0.43,
"learning_rate": 1.22341848152488e-06,
"loss": 0.6561,
"step": 10685
},
{
"epoch": 0.43,
"learning_rate": 1.222790218068634e-06,
"loss": 0.6899,
"step": 10690
},
{
"epoch": 0.43,
"learning_rate": 1.2221618620672689e-06,
"loss": 0.6685,
"step": 10695
},
{
"epoch": 0.43,
"learning_rate": 1.2215334137817985e-06,
"loss": 0.6926,
"step": 10700
},
{
"epoch": 0.43,
"learning_rate": 1.2209048734732742e-06,
"loss": 0.6241,
"step": 10705
},
{
"epoch": 0.44,
"learning_rate": 1.2202762414027865e-06,
"loss": 0.6556,
"step": 10710
},
{
"epoch": 0.44,
"learning_rate": 1.219647517831464e-06,
"loss": 0.6612,
"step": 10715
},
{
"epoch": 0.44,
"learning_rate": 1.2190187030204727e-06,
"loss": 0.6931,
"step": 10720
},
{
"epoch": 0.44,
"learning_rate": 1.2183897972310168e-06,
"loss": 0.6806,
"step": 10725
},
{
"epoch": 0.44,
"learning_rate": 1.2177608007243384e-06,
"loss": 0.7048,
"step": 10730
},
{
"epoch": 0.44,
"learning_rate": 1.2171317137617172e-06,
"loss": 0.6722,
"step": 10735
},
{
"epoch": 0.44,
"learning_rate": 1.2165025366044698e-06,
"loss": 0.7012,
"step": 10740
},
{
"epoch": 0.44,
"learning_rate": 1.2158732695139523e-06,
"loss": 0.6476,
"step": 10745
},
{
"epoch": 0.44,
"learning_rate": 1.2152439127515557e-06,
"loss": 0.661,
"step": 10750
},
{
"epoch": 0.44,
"learning_rate": 1.2146144665787094e-06,
"loss": 0.6862,
"step": 10755
},
{
"epoch": 0.44,
"learning_rate": 1.2139849312568803e-06,
"loss": 0.6949,
"step": 10760
},
{
"epoch": 0.44,
"learning_rate": 1.2133553070475714e-06,
"loss": 0.6565,
"step": 10765
},
{
"epoch": 0.44,
"learning_rate": 1.2127255942123236e-06,
"loss": 0.6803,
"step": 10770
},
{
"epoch": 0.44,
"learning_rate": 1.2120957930127139e-06,
"loss": 0.6909,
"step": 10775
},
{
"epoch": 0.44,
"learning_rate": 1.211465903710356e-06,
"loss": 0.6885,
"step": 10780
},
{
"epoch": 0.44,
"learning_rate": 1.210835926566901e-06,
"loss": 0.6837,
"step": 10785
},
{
"epoch": 0.44,
"learning_rate": 1.2102058618440354e-06,
"loss": 0.676,
"step": 10790
},
{
"epoch": 0.44,
"learning_rate": 1.2095757098034829e-06,
"loss": 0.6624,
"step": 10795
},
{
"epoch": 0.44,
"learning_rate": 1.2089454707070033e-06,
"loss": 0.6218,
"step": 10800
},
{
"epoch": 0.44,
"learning_rate": 1.208315144816392e-06,
"loss": 0.6528,
"step": 10805
},
{
"epoch": 0.44,
"learning_rate": 1.2076847323934813e-06,
"loss": 0.6933,
"step": 10810
},
{
"epoch": 0.44,
"learning_rate": 1.2070542337001385e-06,
"loss": 0.6646,
"step": 10815
},
{
"epoch": 0.44,
"learning_rate": 1.206423648998268e-06,
"loss": 0.6935,
"step": 10820
},
{
"epoch": 0.44,
"learning_rate": 1.205792978549809e-06,
"loss": 0.6853,
"step": 10825
},
{
"epoch": 0.44,
"learning_rate": 1.2051622226167362e-06,
"loss": 0.6648,
"step": 10830
},
{
"epoch": 0.44,
"learning_rate": 1.2045313814610602e-06,
"loss": 0.6587,
"step": 10835
},
{
"epoch": 0.44,
"learning_rate": 1.203900455344827e-06,
"loss": 0.6662,
"step": 10840
},
{
"epoch": 0.44,
"learning_rate": 1.2032694445301182e-06,
"loss": 0.6504,
"step": 10845
},
{
"epoch": 0.44,
"learning_rate": 1.2026383492790495e-06,
"loss": 0.7228,
"step": 10850
},
{
"epoch": 0.44,
"learning_rate": 1.2020071698537727e-06,
"loss": 0.6674,
"step": 10855
},
{
"epoch": 0.44,
"learning_rate": 1.2013759065164748e-06,
"loss": 0.7009,
"step": 10860
},
{
"epoch": 0.44,
"learning_rate": 1.200744559529376e-06,
"loss": 0.6588,
"step": 10865
},
{
"epoch": 0.44,
"learning_rate": 1.2001131291547335e-06,
"loss": 0.651,
"step": 10870
},
{
"epoch": 0.44,
"learning_rate": 1.1994816156548368e-06,
"loss": 0.644,
"step": 10875
},
{
"epoch": 0.44,
"learning_rate": 1.1988500192920122e-06,
"loss": 0.6669,
"step": 10880
},
{
"epoch": 0.44,
"learning_rate": 1.1982183403286186e-06,
"loss": 0.6718,
"step": 10885
},
{
"epoch": 0.44,
"learning_rate": 1.1975865790270503e-06,
"loss": 0.6594,
"step": 10890
},
{
"epoch": 0.44,
"learning_rate": 1.1969547356497356e-06,
"loss": 0.6279,
"step": 10895
},
{
"epoch": 0.44,
"learning_rate": 1.1963228104591362e-06,
"loss": 0.6866,
"step": 10900
},
{
"epoch": 0.44,
"learning_rate": 1.1956908037177484e-06,
"loss": 0.6836,
"step": 10905
},
{
"epoch": 0.44,
"learning_rate": 1.1950587156881032e-06,
"loss": 0.6323,
"step": 10910
},
{
"epoch": 0.44,
"learning_rate": 1.1944265466327634e-06,
"loss": 0.6714,
"step": 10915
},
{
"epoch": 0.44,
"learning_rate": 1.193794296814327e-06,
"loss": 0.6849,
"step": 10920
},
{
"epoch": 0.44,
"learning_rate": 1.193161966495425e-06,
"loss": 0.6654,
"step": 10925
},
{
"epoch": 0.44,
"learning_rate": 1.1925295559387221e-06,
"loss": 0.7011,
"step": 10930
},
{
"epoch": 0.44,
"learning_rate": 1.191897065406916e-06,
"loss": 0.6452,
"step": 10935
},
{
"epoch": 0.44,
"learning_rate": 1.1912644951627375e-06,
"loss": 0.6537,
"step": 10940
},
{
"epoch": 0.44,
"learning_rate": 1.1906318454689512e-06,
"loss": 0.6852,
"step": 10945
},
{
"epoch": 0.44,
"learning_rate": 1.189999116588354e-06,
"loss": 0.6316,
"step": 10950
},
{
"epoch": 0.44,
"learning_rate": 1.189366308783776e-06,
"loss": 0.6368,
"step": 10955
},
{
"epoch": 0.45,
"learning_rate": 1.1887334223180804e-06,
"loss": 0.6621,
"step": 10960
},
{
"epoch": 0.45,
"learning_rate": 1.1881004574541625e-06,
"loss": 0.6432,
"step": 10965
},
{
"epoch": 0.45,
"learning_rate": 1.18746741445495e-06,
"loss": 0.6771,
"step": 10970
},
{
"epoch": 0.45,
"learning_rate": 1.1868342935834041e-06,
"loss": 0.6967,
"step": 10975
},
{
"epoch": 0.45,
"learning_rate": 1.1862010951025175e-06,
"loss": 0.7035,
"step": 10980
},
{
"epoch": 0.45,
"learning_rate": 1.1855678192753151e-06,
"loss": 0.6717,
"step": 10985
},
{
"epoch": 0.45,
"learning_rate": 1.1849344663648543e-06,
"loss": 0.6407,
"step": 10990
},
{
"epoch": 0.45,
"learning_rate": 1.1843010366342248e-06,
"loss": 0.6802,
"step": 10995
},
{
"epoch": 0.45,
"learning_rate": 1.1836675303465472e-06,
"loss": 0.6332,
"step": 11000
},
{
"epoch": 0.45,
"learning_rate": 1.1830339477649749e-06,
"loss": 0.6221,
"step": 11005
},
{
"epoch": 0.45,
"learning_rate": 1.1824002891526923e-06,
"loss": 0.6445,
"step": 11010
},
{
"epoch": 0.45,
"learning_rate": 1.1817665547729157e-06,
"loss": 0.6818,
"step": 11015
},
{
"epoch": 0.45,
"learning_rate": 1.181132744888893e-06,
"loss": 0.6772,
"step": 11020
},
{
"epoch": 0.45,
"learning_rate": 1.1804988597639034e-06,
"loss": 0.6692,
"step": 11025
},
{
"epoch": 0.45,
"learning_rate": 1.1798648996612572e-06,
"loss": 0.6727,
"step": 11030
},
{
"epoch": 0.45,
"learning_rate": 1.1792308648442958e-06,
"loss": 0.7129,
"step": 11035
},
{
"epoch": 0.45,
"learning_rate": 1.1785967555763915e-06,
"loss": 0.636,
"step": 11040
},
{
"epoch": 0.45,
"learning_rate": 1.1779625721209484e-06,
"loss": 0.6769,
"step": 11045
},
{
"epoch": 0.45,
"learning_rate": 1.1773283147414007e-06,
"loss": 0.663,
"step": 11050
},
{
"epoch": 0.45,
"learning_rate": 1.1766939837012128e-06,
"loss": 0.6988,
"step": 11055
},
{
"epoch": 0.45,
"learning_rate": 1.1760595792638807e-06,
"loss": 0.6399,
"step": 11060
},
{
"epoch": 0.45,
"learning_rate": 1.1754251016929307e-06,
"loss": 0.6709,
"step": 11065
},
{
"epoch": 0.45,
"learning_rate": 1.174790551251919e-06,
"loss": 0.661,
"step": 11070
},
{
"epoch": 0.45,
"learning_rate": 1.1741559282044322e-06,
"loss": 0.6604,
"step": 11075
},
{
"epoch": 0.45,
"learning_rate": 1.1735212328140876e-06,
"loss": 0.6601,
"step": 11080
},
{
"epoch": 0.45,
"learning_rate": 1.1728864653445314e-06,
"loss": 0.6732,
"step": 11085
},
{
"epoch": 0.45,
"learning_rate": 1.1722516260594412e-06,
"loss": 0.6564,
"step": 11090
},
{
"epoch": 0.45,
"learning_rate": 1.1716167152225237e-06,
"loss": 0.6819,
"step": 11095
},
{
"epoch": 0.45,
"learning_rate": 1.170981733097515e-06,
"loss": 0.6739,
"step": 11100
},
{
"epoch": 0.45,
"learning_rate": 1.1703466799481817e-06,
"loss": 0.6658,
"step": 11105
},
{
"epoch": 0.45,
"learning_rate": 1.1697115560383185e-06,
"loss": 0.6999,
"step": 11110
},
{
"epoch": 0.45,
"learning_rate": 1.1690763616317507e-06,
"loss": 0.6797,
"step": 11115
},
{
"epoch": 0.45,
"learning_rate": 1.168441096992333e-06,
"loss": 0.6541,
"step": 11120
},
{
"epoch": 0.45,
"learning_rate": 1.1678057623839484e-06,
"loss": 0.6586,
"step": 11125
},
{
"epoch": 0.45,
"learning_rate": 1.1671703580705094e-06,
"loss": 0.6609,
"step": 11130
},
{
"epoch": 0.45,
"learning_rate": 1.1665348843159574e-06,
"loss": 0.6493,
"step": 11135
},
{
"epoch": 0.45,
"learning_rate": 1.1658993413842624e-06,
"loss": 0.6771,
"step": 11140
},
{
"epoch": 0.45,
"learning_rate": 1.1652637295394244e-06,
"loss": 0.6806,
"step": 11145
},
{
"epoch": 0.45,
"learning_rate": 1.1646280490454696e-06,
"loss": 0.6739,
"step": 11150
},
{
"epoch": 0.45,
"learning_rate": 1.1639923001664555e-06,
"loss": 0.6482,
"step": 11155
},
{
"epoch": 0.45,
"learning_rate": 1.1633564831664656e-06,
"loss": 0.6759,
"step": 11160
},
{
"epoch": 0.45,
"learning_rate": 1.1627205983096135e-06,
"loss": 0.6722,
"step": 11165
},
{
"epoch": 0.45,
"learning_rate": 1.16208464586004e-06,
"loss": 0.6531,
"step": 11170
},
{
"epoch": 0.45,
"learning_rate": 1.1614486260819138e-06,
"loss": 0.6934,
"step": 11175
},
{
"epoch": 0.45,
"learning_rate": 1.1608125392394327e-06,
"loss": 0.6637,
"step": 11180
},
{
"epoch": 0.45,
"learning_rate": 1.1601763855968212e-06,
"loss": 0.6879,
"step": 11185
},
{
"epoch": 0.45,
"learning_rate": 1.1595401654183322e-06,
"loss": 0.6499,
"step": 11190
},
{
"epoch": 0.45,
"learning_rate": 1.1589038789682456e-06,
"loss": 0.7014,
"step": 11195
},
{
"epoch": 0.45,
"learning_rate": 1.1582675265108699e-06,
"loss": 0.6136,
"step": 11200
},
{
"epoch": 0.45,
"eval_loss": 0.6401504874229431,
"eval_runtime": 144.3337,
"eval_samples_per_second": 16.393,
"eval_steps_per_second": 2.737,
"step": 11200
},
{
"epoch": 0.46,
"learning_rate": 1.15763110831054e-06,
"loss": 0.6646,
"step": 11205
},
{
"epoch": 0.46,
"learning_rate": 1.1569946246316182e-06,
"loss": 0.7085,
"step": 11210
},
{
"epoch": 0.46,
"learning_rate": 1.156358075738495e-06,
"loss": 0.6621,
"step": 11215
},
{
"epoch": 0.46,
"learning_rate": 1.1557214618955868e-06,
"loss": 0.6703,
"step": 11220
},
{
"epoch": 0.46,
"learning_rate": 1.1550847833673374e-06,
"loss": 0.7204,
"step": 11225
},
{
"epoch": 0.46,
"learning_rate": 1.154448040418218e-06,
"loss": 0.6923,
"step": 11230
},
{
"epoch": 0.46,
"learning_rate": 1.1538112333127253e-06,
"loss": 0.6608,
"step": 11235
},
{
"epoch": 0.46,
"learning_rate": 1.1531743623153842e-06,
"loss": 0.6824,
"step": 11240
},
{
"epoch": 0.46,
"learning_rate": 1.1525374276907449e-06,
"loss": 0.7322,
"step": 11245
},
{
"epoch": 0.46,
"learning_rate": 1.1519004297033847e-06,
"loss": 0.6432,
"step": 11250
},
{
"epoch": 0.46,
"learning_rate": 1.1512633686179071e-06,
"loss": 0.6795,
"step": 11255
},
{
"epoch": 0.46,
"learning_rate": 1.1506262446989417e-06,
"loss": 0.7229,
"step": 11260
},
{
"epoch": 0.46,
"learning_rate": 1.149989058211144e-06,
"loss": 0.6954,
"step": 11265
},
{
"epoch": 0.46,
"learning_rate": 1.149351809419196e-06,
"loss": 0.6879,
"step": 11270
},
{
"epoch": 0.46,
"learning_rate": 1.148714498587805e-06,
"loss": 0.6642,
"step": 11275
},
{
"epoch": 0.46,
"learning_rate": 1.1480771259817048e-06,
"loss": 0.7015,
"step": 11280
},
{
"epoch": 0.46,
"learning_rate": 1.147439691865654e-06,
"loss": 0.6467,
"step": 11285
},
{
"epoch": 0.46,
"learning_rate": 1.1468021965044377e-06,
"loss": 0.7045,
"step": 11290
},
{
"epoch": 0.46,
"learning_rate": 1.1461646401628654e-06,
"loss": 0.6635,
"step": 11295
},
{
"epoch": 0.46,
"learning_rate": 1.1455270231057728e-06,
"loss": 0.6943,
"step": 11300
},
{
"epoch": 0.46,
"learning_rate": 1.14488934559802e-06,
"loss": 0.6626,
"step": 11305
},
{
"epoch": 0.46,
"learning_rate": 1.1442516079044932e-06,
"loss": 0.6716,
"step": 11310
},
{
"epoch": 0.46,
"learning_rate": 1.1436138102901031e-06,
"loss": 0.6748,
"step": 11315
},
{
"epoch": 0.46,
"learning_rate": 1.142975953019785e-06,
"loss": 0.7028,
"step": 11320
},
{
"epoch": 0.46,
"learning_rate": 1.1423380363584999e-06,
"loss": 0.6044,
"step": 11325
},
{
"epoch": 0.46,
"learning_rate": 1.1417000605712316e-06,
"loss": 0.6831,
"step": 11330
},
{
"epoch": 0.46,
"learning_rate": 1.1410620259229908e-06,
"loss": 0.6632,
"step": 11335
},
{
"epoch": 0.46,
"learning_rate": 1.1404239326788115e-06,
"loss": 0.6393,
"step": 11340
},
{
"epoch": 0.46,
"learning_rate": 1.1397857811037512e-06,
"loss": 0.6501,
"step": 11345
},
{
"epoch": 0.46,
"learning_rate": 1.1391475714628932e-06,
"loss": 0.6398,
"step": 11350
},
{
"epoch": 0.46,
"learning_rate": 1.138509304021344e-06,
"loss": 0.6784,
"step": 11355
},
{
"epoch": 0.46,
"learning_rate": 1.1378709790442346e-06,
"loss": 0.7065,
"step": 11360
},
{
"epoch": 0.46,
"learning_rate": 1.1372325967967196e-06,
"loss": 0.6189,
"step": 11365
},
{
"epoch": 0.46,
"learning_rate": 1.1365941575439772e-06,
"loss": 0.6652,
"step": 11370
},
{
"epoch": 0.46,
"learning_rate": 1.1359556615512099e-06,
"loss": 0.6752,
"step": 11375
},
{
"epoch": 0.46,
"learning_rate": 1.1353171090836427e-06,
"loss": 0.6668,
"step": 11380
},
{
"epoch": 0.46,
"learning_rate": 1.134678500406525e-06,
"loss": 0.6587,
"step": 11385
},
{
"epoch": 0.46,
"learning_rate": 1.13403983578513e-06,
"loss": 0.6873,
"step": 11390
},
{
"epoch": 0.46,
"learning_rate": 1.1334011154847527e-06,
"loss": 0.6975,
"step": 11395
},
{
"epoch": 0.46,
"learning_rate": 1.1327623397707122e-06,
"loss": 0.6784,
"step": 11400
},
{
"epoch": 0.46,
"learning_rate": 1.1321235089083502e-06,
"loss": 0.6643,
"step": 11405
},
{
"epoch": 0.46,
"learning_rate": 1.1314846231630315e-06,
"loss": 0.6754,
"step": 11410
},
{
"epoch": 0.46,
"learning_rate": 1.1308456828001441e-06,
"loss": 0.6689,
"step": 11415
},
{
"epoch": 0.46,
"learning_rate": 1.1302066880850975e-06,
"loss": 0.6594,
"step": 11420
},
{
"epoch": 0.46,
"learning_rate": 1.1295676392833253e-06,
"loss": 0.6416,
"step": 11425
},
{
"epoch": 0.46,
"learning_rate": 1.1289285366602826e-06,
"loss": 0.7223,
"step": 11430
},
{
"epoch": 0.46,
"learning_rate": 1.1282893804814468e-06,
"loss": 0.6944,
"step": 11435
},
{
"epoch": 0.46,
"learning_rate": 1.127650171012318e-06,
"loss": 0.6598,
"step": 11440
},
{
"epoch": 0.46,
"learning_rate": 1.1270109085184182e-06,
"loss": 0.7102,
"step": 11445
},
{
"epoch": 0.47,
"learning_rate": 1.1263715932652919e-06,
"loss": 0.6803,
"step": 11450
},
{
"epoch": 0.47,
"learning_rate": 1.1257322255185044e-06,
"loss": 0.65,
"step": 11455
},
{
"epoch": 0.47,
"learning_rate": 1.1250928055436443e-06,
"loss": 0.7018,
"step": 11460
},
{
"epoch": 0.47,
"learning_rate": 1.12445333360632e-06,
"loss": 0.6409,
"step": 11465
},
{
"epoch": 0.47,
"learning_rate": 1.1238138099721634e-06,
"loss": 0.6724,
"step": 11470
},
{
"epoch": 0.47,
"learning_rate": 1.1231742349068271e-06,
"loss": 0.6854,
"step": 11475
},
{
"epoch": 0.47,
"learning_rate": 1.1225346086759846e-06,
"loss": 0.7102,
"step": 11480
},
{
"epoch": 0.47,
"learning_rate": 1.1218949315453314e-06,
"loss": 0.6584,
"step": 11485
},
{
"epoch": 0.47,
"learning_rate": 1.1212552037805836e-06,
"loss": 0.6584,
"step": 11490
},
{
"epoch": 0.47,
"learning_rate": 1.1206154256474786e-06,
"loss": 0.6846,
"step": 11495
},
{
"epoch": 0.47,
"learning_rate": 1.119975597411775e-06,
"loss": 0.7037,
"step": 11500
},
{
"epoch": 0.47,
"learning_rate": 1.1193357193392512e-06,
"loss": 0.6689,
"step": 11505
},
{
"epoch": 0.47,
"learning_rate": 1.1186957916957078e-06,
"loss": 0.6556,
"step": 11510
},
{
"epoch": 0.47,
"learning_rate": 1.1180558147469642e-06,
"loss": 0.6557,
"step": 11515
},
{
"epoch": 0.47,
"learning_rate": 1.1174157887588623e-06,
"loss": 0.6662,
"step": 11520
},
{
"epoch": 0.47,
"learning_rate": 1.1167757139972626e-06,
"loss": 0.6702,
"step": 11525
},
{
"epoch": 0.47,
"learning_rate": 1.116135590728047e-06,
"loss": 0.6682,
"step": 11530
},
{
"epoch": 0.47,
"learning_rate": 1.115495419217117e-06,
"loss": 0.6855,
"step": 11535
},
{
"epoch": 0.47,
"learning_rate": 1.114855199730394e-06,
"loss": 0.6796,
"step": 11540
},
{
"epoch": 0.47,
"learning_rate": 1.1142149325338199e-06,
"loss": 0.6481,
"step": 11545
},
{
"epoch": 0.47,
"learning_rate": 1.1135746178933563e-06,
"loss": 0.7167,
"step": 11550
},
{
"epoch": 0.47,
"learning_rate": 1.112934256074984e-06,
"loss": 0.6602,
"step": 11555
},
{
"epoch": 0.47,
"learning_rate": 1.1122938473447038e-06,
"loss": 0.6848,
"step": 11560
},
{
"epoch": 0.47,
"learning_rate": 1.1116533919685361e-06,
"loss": 0.6628,
"step": 11565
},
{
"epoch": 0.47,
"learning_rate": 1.1110128902125201e-06,
"loss": 0.6826,
"step": 11570
},
{
"epoch": 0.47,
"learning_rate": 1.1103723423427153e-06,
"loss": 0.6527,
"step": 11575
},
{
"epoch": 0.47,
"learning_rate": 1.1097317486251992e-06,
"loss": 0.6737,
"step": 11580
},
{
"epoch": 0.47,
"learning_rate": 1.109091109326069e-06,
"loss": 0.6854,
"step": 11585
},
{
"epoch": 0.47,
"learning_rate": 1.1084504247114406e-06,
"loss": 0.7145,
"step": 11590
},
{
"epoch": 0.47,
"learning_rate": 1.107809695047449e-06,
"loss": 0.6756,
"step": 11595
},
{
"epoch": 0.47,
"learning_rate": 1.1071689206002474e-06,
"loss": 0.6725,
"step": 11600
},
{
"epoch": 0.47,
"learning_rate": 1.1065281016360083e-06,
"loss": 0.7145,
"step": 11605
},
{
"epoch": 0.47,
"learning_rate": 1.1058872384209224e-06,
"loss": 0.6899,
"step": 11610
},
{
"epoch": 0.47,
"learning_rate": 1.1052463312211983e-06,
"loss": 0.6344,
"step": 11615
},
{
"epoch": 0.47,
"learning_rate": 1.1046053803030637e-06,
"loss": 0.654,
"step": 11620
},
{
"epoch": 0.47,
"learning_rate": 1.1039643859327635e-06,
"loss": 0.6741,
"step": 11625
},
{
"epoch": 0.47,
"learning_rate": 1.1033233483765615e-06,
"loss": 0.6814,
"step": 11630
},
{
"epoch": 0.47,
"learning_rate": 1.1026822679007395e-06,
"loss": 0.6565,
"step": 11635
},
{
"epoch": 0.47,
"learning_rate": 1.1020411447715961e-06,
"loss": 0.6916,
"step": 11640
},
{
"epoch": 0.47,
"learning_rate": 1.1013999792554486e-06,
"loss": 0.6894,
"step": 11645
},
{
"epoch": 0.47,
"learning_rate": 1.1007587716186317e-06,
"loss": 0.6698,
"step": 11650
},
{
"epoch": 0.47,
"learning_rate": 1.1001175221274968e-06,
"loss": 0.7096,
"step": 11655
},
{
"epoch": 0.47,
"learning_rate": 1.0994762310484142e-06,
"loss": 0.6887,
"step": 11660
},
{
"epoch": 0.47,
"learning_rate": 1.0988348986477705e-06,
"loss": 0.671,
"step": 11665
},
{
"epoch": 0.47,
"learning_rate": 1.0981935251919693e-06,
"loss": 0.6727,
"step": 11670
},
{
"epoch": 0.47,
"learning_rate": 1.0975521109474318e-06,
"loss": 0.6777,
"step": 11675
},
{
"epoch": 0.47,
"learning_rate": 1.0969106561805952e-06,
"loss": 0.6661,
"step": 11680
},
{
"epoch": 0.47,
"learning_rate": 1.0962691611579154e-06,
"loss": 0.6576,
"step": 11685
},
{
"epoch": 0.47,
"learning_rate": 1.0956276261458629e-06,
"loss": 0.6415,
"step": 11690
},
{
"epoch": 0.48,
"learning_rate": 1.0949860514109264e-06,
"loss": 0.6485,
"step": 11695
},
{
"epoch": 0.48,
"learning_rate": 1.09434443721961e-06,
"loss": 0.6966,
"step": 11700
},
{
"epoch": 0.48,
"learning_rate": 1.0937027838384345e-06,
"loss": 0.6746,
"step": 11705
},
{
"epoch": 0.48,
"learning_rate": 1.093061091533938e-06,
"loss": 0.6565,
"step": 11710
},
{
"epoch": 0.48,
"learning_rate": 1.0924193605726733e-06,
"loss": 0.7127,
"step": 11715
},
{
"epoch": 0.48,
"learning_rate": 1.0917775912212099e-06,
"loss": 0.6738,
"step": 11720
},
{
"epoch": 0.48,
"learning_rate": 1.0911357837461332e-06,
"loss": 0.6712,
"step": 11725
},
{
"epoch": 0.48,
"learning_rate": 1.0904939384140445e-06,
"loss": 0.6617,
"step": 11730
},
{
"epoch": 0.48,
"learning_rate": 1.0898520554915607e-06,
"loss": 0.6919,
"step": 11735
},
{
"epoch": 0.48,
"learning_rate": 1.0892101352453142e-06,
"loss": 0.6731,
"step": 11740
},
{
"epoch": 0.48,
"learning_rate": 1.0885681779419537e-06,
"loss": 0.6931,
"step": 11745
},
{
"epoch": 0.48,
"learning_rate": 1.0879261838481426e-06,
"loss": 0.631,
"step": 11750
},
{
"epoch": 0.48,
"learning_rate": 1.0872841532305587e-06,
"loss": 0.6515,
"step": 11755
},
{
"epoch": 0.48,
"learning_rate": 1.0866420863558969e-06,
"loss": 0.6674,
"step": 11760
},
{
"epoch": 0.48,
"learning_rate": 1.0859999834908657e-06,
"loss": 0.6514,
"step": 11765
},
{
"epoch": 0.48,
"learning_rate": 1.0853578449021896e-06,
"loss": 0.6393,
"step": 11770
},
{
"epoch": 0.48,
"learning_rate": 1.084715670856607e-06,
"loss": 0.6941,
"step": 11775
},
{
"epoch": 0.48,
"learning_rate": 1.0840734616208712e-06,
"loss": 0.664,
"step": 11780
},
{
"epoch": 0.48,
"learning_rate": 1.0834312174617508e-06,
"loss": 0.6636,
"step": 11785
},
{
"epoch": 0.48,
"learning_rate": 1.0827889386460281e-06,
"loss": 0.6756,
"step": 11790
},
{
"epoch": 0.48,
"learning_rate": 1.0821466254405004e-06,
"loss": 0.7116,
"step": 11795
},
{
"epoch": 0.48,
"learning_rate": 1.0815042781119788e-06,
"loss": 0.6647,
"step": 11800
},
{
"epoch": 0.48,
"learning_rate": 1.0808618969272888e-06,
"loss": 0.6404,
"step": 11805
},
{
"epoch": 0.48,
"learning_rate": 1.0802194821532702e-06,
"loss": 0.6711,
"step": 11810
},
{
"epoch": 0.48,
"learning_rate": 1.079577034056776e-06,
"loss": 0.6717,
"step": 11815
},
{
"epoch": 0.48,
"learning_rate": 1.078934552904674e-06,
"loss": 0.6705,
"step": 11820
},
{
"epoch": 0.48,
"learning_rate": 1.0782920389638452e-06,
"loss": 0.6713,
"step": 11825
},
{
"epoch": 0.48,
"learning_rate": 1.0776494925011846e-06,
"loss": 0.6474,
"step": 11830
},
{
"epoch": 0.48,
"learning_rate": 1.0770069137836e-06,
"loss": 0.6745,
"step": 11835
},
{
"epoch": 0.48,
"learning_rate": 1.0763643030780126e-06,
"loss": 0.6773,
"step": 11840
},
{
"epoch": 0.48,
"learning_rate": 1.075721660651358e-06,
"loss": 0.6767,
"step": 11845
},
{
"epoch": 0.48,
"learning_rate": 1.0750789867705843e-06,
"loss": 0.6758,
"step": 11850
},
{
"epoch": 0.48,
"learning_rate": 1.0744362817026524e-06,
"loss": 0.686,
"step": 11855
},
{
"epoch": 0.48,
"learning_rate": 1.0737935457145364e-06,
"loss": 0.6736,
"step": 11860
},
{
"epoch": 0.48,
"learning_rate": 1.073150779073223e-06,
"loss": 0.668,
"step": 11865
},
{
"epoch": 0.48,
"learning_rate": 1.0725079820457123e-06,
"loss": 0.7003,
"step": 11870
},
{
"epoch": 0.48,
"learning_rate": 1.0718651548990163e-06,
"loss": 0.682,
"step": 11875
},
{
"epoch": 0.48,
"learning_rate": 1.0712222979001602e-06,
"loss": 0.6445,
"step": 11880
},
{
"epoch": 0.48,
"learning_rate": 1.0705794113161808e-06,
"loss": 0.6872,
"step": 11885
},
{
"epoch": 0.48,
"learning_rate": 1.0699364954141276e-06,
"loss": 0.6936,
"step": 11890
},
{
"epoch": 0.48,
"learning_rate": 1.0692935504610625e-06,
"loss": 0.6195,
"step": 11895
},
{
"epoch": 0.48,
"learning_rate": 1.068650576724059e-06,
"loss": 0.6658,
"step": 11900
},
{
"epoch": 0.48,
"learning_rate": 1.0680075744702034e-06,
"loss": 0.6799,
"step": 11905
},
{
"epoch": 0.48,
"learning_rate": 1.0673645439665925e-06,
"loss": 0.6823,
"step": 11910
},
{
"epoch": 0.48,
"learning_rate": 1.0667214854803357e-06,
"loss": 0.6677,
"step": 11915
},
{
"epoch": 0.48,
"learning_rate": 1.0660783992785541e-06,
"loss": 0.6648,
"step": 11920
},
{
"epoch": 0.48,
"learning_rate": 1.06543528562838e-06,
"loss": 0.6313,
"step": 11925
},
{
"epoch": 0.48,
"learning_rate": 1.0647921447969577e-06,
"loss": 0.667,
"step": 11930
},
{
"epoch": 0.48,
"learning_rate": 1.0641489770514418e-06,
"loss": 0.6567,
"step": 11935
},
{
"epoch": 0.48,
"learning_rate": 1.0635057826589987e-06,
"loss": 0.6727,
"step": 11940
},
{
"epoch": 0.49,
"learning_rate": 1.0628625618868056e-06,
"loss": 0.6835,
"step": 11945
},
{
"epoch": 0.49,
"learning_rate": 1.062219315002051e-06,
"loss": 0.6329,
"step": 11950
},
{
"epoch": 0.49,
"learning_rate": 1.061576042271934e-06,
"loss": 0.6823,
"step": 11955
},
{
"epoch": 0.49,
"learning_rate": 1.0609327439636647e-06,
"loss": 0.6514,
"step": 11960
},
{
"epoch": 0.49,
"learning_rate": 1.0602894203444633e-06,
"loss": 0.716,
"step": 11965
},
{
"epoch": 0.49,
"learning_rate": 1.0596460716815612e-06,
"loss": 0.6778,
"step": 11970
},
{
"epoch": 0.49,
"learning_rate": 1.059002698242199e-06,
"loss": 0.6671,
"step": 11975
},
{
"epoch": 0.49,
"learning_rate": 1.0583593002936298e-06,
"loss": 0.6936,
"step": 11980
},
{
"epoch": 0.49,
"learning_rate": 1.0577158781031147e-06,
"loss": 0.7211,
"step": 11985
},
{
"epoch": 0.49,
"learning_rate": 1.0570724319379254e-06,
"loss": 0.6667,
"step": 11990
},
{
"epoch": 0.49,
"learning_rate": 1.0564289620653446e-06,
"loss": 0.7181,
"step": 11995
},
{
"epoch": 0.49,
"learning_rate": 1.0557854687526632e-06,
"loss": 0.6431,
"step": 12000
},
{
"epoch": 0.49,
"eval_loss": 0.6379530429840088,
"eval_runtime": 144.8913,
"eval_samples_per_second": 16.329,
"eval_steps_per_second": 2.726,
"step": 12000
},
{
"epoch": 0.49,
"learning_rate": 1.0551419522671834e-06,
"loss": 0.6469,
"step": 12005
},
{
"epoch": 0.49,
"learning_rate": 1.0544984128762164e-06,
"loss": 0.6672,
"step": 12010
},
{
"epoch": 0.49,
"learning_rate": 1.0538548508470824e-06,
"loss": 0.6599,
"step": 12015
},
{
"epoch": 0.49,
"learning_rate": 1.053211266447112e-06,
"loss": 0.6737,
"step": 12020
},
{
"epoch": 0.49,
"learning_rate": 1.052567659943644e-06,
"loss": 0.6275,
"step": 12025
},
{
"epoch": 0.49,
"learning_rate": 1.0519240316040269e-06,
"loss": 0.6603,
"step": 12030
},
{
"epoch": 0.49,
"learning_rate": 1.0512803816956191e-06,
"loss": 0.6718,
"step": 12035
},
{
"epoch": 0.49,
"learning_rate": 1.0506367104857864e-06,
"loss": 0.7219,
"step": 12040
},
{
"epoch": 0.49,
"learning_rate": 1.049993018241905e-06,
"loss": 0.6921,
"step": 12045
},
{
"epoch": 0.49,
"learning_rate": 1.0493493052313582e-06,
"loss": 0.6371,
"step": 12050
},
{
"epoch": 0.49,
"learning_rate": 1.048705571721539e-06,
"loss": 0.6545,
"step": 12055
},
{
"epoch": 0.49,
"learning_rate": 1.0480618179798493e-06,
"loss": 0.6818,
"step": 12060
},
{
"epoch": 0.49,
"learning_rate": 1.047418044273698e-06,
"loss": 0.6516,
"step": 12065
},
{
"epoch": 0.49,
"learning_rate": 1.0467742508705039e-06,
"loss": 0.649,
"step": 12070
},
{
"epoch": 0.49,
"learning_rate": 1.0461304380376924e-06,
"loss": 0.6821,
"step": 12075
},
{
"epoch": 0.49,
"learning_rate": 1.0454866060426986e-06,
"loss": 0.6675,
"step": 12080
},
{
"epoch": 0.49,
"learning_rate": 1.0448427551529635e-06,
"loss": 0.6436,
"step": 12085
},
{
"epoch": 0.49,
"learning_rate": 1.0441988856359385e-06,
"loss": 0.695,
"step": 12090
},
{
"epoch": 0.49,
"learning_rate": 1.0435549977590806e-06,
"loss": 0.662,
"step": 12095
},
{
"epoch": 0.49,
"learning_rate": 1.0429110917898552e-06,
"loss": 0.6499,
"step": 12100
},
{
"epoch": 0.49,
"learning_rate": 1.0422671679957357e-06,
"loss": 0.6987,
"step": 12105
},
{
"epoch": 0.49,
"learning_rate": 1.0416232266442017e-06,
"loss": 0.7055,
"step": 12110
},
{
"epoch": 0.49,
"learning_rate": 1.0409792680027419e-06,
"loss": 0.6261,
"step": 12115
},
{
"epoch": 0.49,
"learning_rate": 1.0403352923388504e-06,
"loss": 0.6435,
"step": 12120
},
{
"epoch": 0.49,
"learning_rate": 1.039691299920029e-06,
"loss": 0.6984,
"step": 12125
},
{
"epoch": 0.49,
"learning_rate": 1.039047291013787e-06,
"loss": 0.6501,
"step": 12130
},
{
"epoch": 0.49,
"learning_rate": 1.0384032658876397e-06,
"loss": 0.6991,
"step": 12135
},
{
"epoch": 0.49,
"learning_rate": 1.03775922480911e-06,
"loss": 0.6751,
"step": 12140
},
{
"epoch": 0.49,
"learning_rate": 1.0371151680457266e-06,
"loss": 0.649,
"step": 12145
},
{
"epoch": 0.49,
"learning_rate": 1.0364710958650252e-06,
"loss": 0.6668,
"step": 12150
},
{
"epoch": 0.49,
"learning_rate": 1.0358270085345475e-06,
"loss": 0.6789,
"step": 12155
},
{
"epoch": 0.49,
"learning_rate": 1.0351829063218423e-06,
"loss": 0.6708,
"step": 12160
},
{
"epoch": 0.49,
"learning_rate": 1.0345387894944635e-06,
"loss": 0.6833,
"step": 12165
},
{
"epoch": 0.49,
"learning_rate": 1.033894658319972e-06,
"loss": 0.6652,
"step": 12170
},
{
"epoch": 0.49,
"learning_rate": 1.0332505130659344e-06,
"loss": 0.7033,
"step": 12175
},
{
"epoch": 0.49,
"learning_rate": 1.0326063539999228e-06,
"loss": 0.6458,
"step": 12180
},
{
"epoch": 0.49,
"learning_rate": 1.031962181389515e-06,
"loss": 0.702,
"step": 12185
},
{
"epoch": 0.5,
"learning_rate": 1.0313179955022951e-06,
"loss": 0.6382,
"step": 12190
},
{
"epoch": 0.5,
"learning_rate": 1.0306737966058526e-06,
"loss": 0.6341,
"step": 12195
},
{
"epoch": 0.5,
"learning_rate": 1.0300295849677811e-06,
"loss": 0.6793,
"step": 12200
},
{
"epoch": 0.5,
"learning_rate": 1.0293853608556817e-06,
"loss": 0.6535,
"step": 12205
},
{
"epoch": 0.5,
"learning_rate": 1.028741124537159e-06,
"loss": 0.661,
"step": 12210
},
{
"epoch": 0.5,
"learning_rate": 1.0280968762798227e-06,
"loss": 0.642,
"step": 12215
},
{
"epoch": 0.5,
"learning_rate": 1.0274526163512885e-06,
"loss": 0.6661,
"step": 12220
},
{
"epoch": 0.5,
"learning_rate": 1.0268083450191761e-06,
"loss": 0.6825,
"step": 12225
},
{
"epoch": 0.5,
"learning_rate": 1.0261640625511106e-06,
"loss": 0.6685,
"step": 12230
},
{
"epoch": 0.5,
"learning_rate": 1.0255197692147207e-06,
"loss": 0.6488,
"step": 12235
},
{
"epoch": 0.5,
"learning_rate": 1.024875465277641e-06,
"loss": 0.646,
"step": 12240
},
{
"epoch": 0.5,
"learning_rate": 1.024231151007509e-06,
"loss": 0.6769,
"step": 12245
},
{
"epoch": 0.5,
"learning_rate": 1.0235868266719679e-06,
"loss": 0.6548,
"step": 12250
},
{
"epoch": 0.5,
"learning_rate": 1.0229424925386638e-06,
"loss": 0.6572,
"step": 12255
},
{
"epoch": 0.5,
"learning_rate": 1.022298148875248e-06,
"loss": 0.6832,
"step": 12260
},
{
"epoch": 0.5,
"learning_rate": 1.0216537959493752e-06,
"loss": 0.7031,
"step": 12265
},
{
"epoch": 0.5,
"learning_rate": 1.0210094340287036e-06,
"loss": 0.6427,
"step": 12270
},
{
"epoch": 0.5,
"learning_rate": 1.0203650633808957e-06,
"loss": 0.6727,
"step": 12275
},
{
"epoch": 0.5,
"learning_rate": 1.0197206842736181e-06,
"loss": 0.6859,
"step": 12280
},
{
"epoch": 0.5,
"learning_rate": 1.0190762969745395e-06,
"loss": 0.6617,
"step": 12285
},
{
"epoch": 0.5,
"learning_rate": 1.018431901751333e-06,
"loss": 0.6733,
"step": 12290
},
{
"epoch": 0.5,
"learning_rate": 1.0177874988716746e-06,
"loss": 0.6554,
"step": 12295
},
{
"epoch": 0.5,
"learning_rate": 1.017143088603244e-06,
"loss": 0.6412,
"step": 12300
},
{
"epoch": 0.5,
"learning_rate": 1.0164986712137239e-06,
"loss": 0.6979,
"step": 12305
},
{
"epoch": 0.5,
"learning_rate": 1.0158542469707984e-06,
"loss": 0.6387,
"step": 12310
},
{
"epoch": 0.5,
"learning_rate": 1.0152098161421574e-06,
"loss": 0.6824,
"step": 12315
},
{
"epoch": 0.5,
"learning_rate": 1.0145653789954907e-06,
"loss": 0.6187,
"step": 12320
},
{
"epoch": 0.5,
"learning_rate": 1.0139209357984922e-06,
"loss": 0.6574,
"step": 12325
},
{
"epoch": 0.5,
"learning_rate": 1.0132764868188582e-06,
"loss": 0.6943,
"step": 12330
},
{
"epoch": 0.5,
"learning_rate": 1.0126320323242868e-06,
"loss": 0.688,
"step": 12335
},
{
"epoch": 0.5,
"learning_rate": 1.0119875725824792e-06,
"loss": 0.7113,
"step": 12340
},
{
"epoch": 0.5,
"learning_rate": 1.0113431078611381e-06,
"loss": 0.6502,
"step": 12345
},
{
"epoch": 0.5,
"learning_rate": 1.0106986384279685e-06,
"loss": 0.6479,
"step": 12350
},
{
"epoch": 0.5,
"learning_rate": 1.010054164550678e-06,
"loss": 0.6594,
"step": 12355
},
{
"epoch": 0.5,
"learning_rate": 1.0094096864969744e-06,
"loss": 0.6451,
"step": 12360
},
{
"epoch": 0.5,
"learning_rate": 1.0087652045345694e-06,
"loss": 0.6453,
"step": 12365
},
{
"epoch": 0.5,
"learning_rate": 1.0081207189311741e-06,
"loss": 0.6711,
"step": 12370
},
{
"epoch": 0.5,
"learning_rate": 1.0074762299545034e-06,
"loss": 0.6659,
"step": 12375
},
{
"epoch": 0.5,
"learning_rate": 1.0068317378722712e-06,
"loss": 0.6456,
"step": 12380
},
{
"epoch": 0.5,
"learning_rate": 1.006187242952195e-06,
"loss": 0.6592,
"step": 12385
},
{
"epoch": 0.5,
"learning_rate": 1.0055427454619916e-06,
"loss": 0.6596,
"step": 12390
},
{
"epoch": 0.5,
"learning_rate": 1.00489824566938e-06,
"loss": 0.6679,
"step": 12395
},
{
"epoch": 0.5,
"learning_rate": 1.00425374384208e-06,
"loss": 0.6507,
"step": 12400
},
{
"epoch": 0.5,
"learning_rate": 1.0036092402478114e-06,
"loss": 0.6902,
"step": 12405
},
{
"epoch": 0.5,
"learning_rate": 1.0029647351542958e-06,
"loss": 0.6355,
"step": 12410
},
{
"epoch": 0.5,
"learning_rate": 1.0023202288292552e-06,
"loss": 0.6847,
"step": 12415
},
{
"epoch": 0.5,
"learning_rate": 1.0016757215404117e-06,
"loss": 0.6544,
"step": 12420
},
{
"epoch": 0.5,
"learning_rate": 1.001031213555488e-06,
"loss": 0.6403,
"step": 12425
},
{
"epoch": 0.5,
"learning_rate": 1.000386705142207e-06,
"loss": 0.6718,
"step": 12430
},
{
"epoch": 0.51,
"learning_rate": 9.997421965682923e-07,
"loss": 0.6819,
"step": 12435
},
{
"epoch": 0.51,
"learning_rate": 9.99097688101467e-07,
"loss": 0.6521,
"step": 12440
},
{
"epoch": 0.51,
"learning_rate": 9.984531800094538e-07,
"loss": 0.6456,
"step": 12445
},
{
"epoch": 0.51,
"learning_rate": 9.978086725599764e-07,
"loss": 0.6862,
"step": 12450
},
{
"epoch": 0.51,
"learning_rate": 9.971641660207574e-07,
"loss": 0.675,
"step": 12455
},
{
"epoch": 0.51,
"learning_rate": 9.965196606595192e-07,
"loss": 0.6853,
"step": 12460
},
{
"epoch": 0.51,
"learning_rate": 9.958751567439835e-07,
"loss": 0.7034,
"step": 12465
},
{
"epoch": 0.51,
"learning_rate": 9.95230654541872e-07,
"loss": 0.6665,
"step": 12470
},
{
"epoch": 0.51,
"learning_rate": 9.94586154320905e-07,
"loss": 0.7126,
"step": 12475
},
{
"epoch": 0.51,
"learning_rate": 9.939416563488025e-07,
"loss": 0.6668,
"step": 12480
},
{
"epoch": 0.51,
"learning_rate": 9.932971608932832e-07,
"loss": 0.7099,
"step": 12485
},
{
"epoch": 0.51,
"learning_rate": 9.926526682220652e-07,
"loss": 0.6525,
"step": 12490
},
{
"epoch": 0.51,
"learning_rate": 9.920081786028647e-07,
"loss": 0.6092,
"step": 12495
},
{
"epoch": 0.51,
"learning_rate": 9.913636923033974e-07,
"loss": 0.6663,
"step": 12500
},
{
"epoch": 0.51,
"learning_rate": 9.907192095913772e-07,
"loss": 0.6514,
"step": 12505
},
{
"epoch": 0.51,
"learning_rate": 9.900747307345166e-07,
"loss": 0.676,
"step": 12510
},
{
"epoch": 0.51,
"learning_rate": 9.894302560005265e-07,
"loss": 0.6726,
"step": 12515
},
{
"epoch": 0.51,
"learning_rate": 9.88785785657116e-07,
"loss": 0.6882,
"step": 12520
},
{
"epoch": 0.51,
"learning_rate": 9.88141319971993e-07,
"loss": 0.6961,
"step": 12525
},
{
"epoch": 0.51,
"learning_rate": 9.874968592128624e-07,
"loss": 0.6772,
"step": 12530
},
{
"epoch": 0.51,
"learning_rate": 9.868524036474276e-07,
"loss": 0.6945,
"step": 12535
},
{
"epoch": 0.51,
"learning_rate": 9.862079535433902e-07,
"loss": 0.6479,
"step": 12540
},
{
"epoch": 0.51,
"learning_rate": 9.855635091684488e-07,
"loss": 0.6826,
"step": 12545
},
{
"epoch": 0.51,
"learning_rate": 9.849190707903004e-07,
"loss": 0.6593,
"step": 12550
},
{
"epoch": 0.51,
"learning_rate": 9.842746386766385e-07,
"loss": 0.6512,
"step": 12555
},
{
"epoch": 0.51,
"learning_rate": 9.836302130951548e-07,
"loss": 0.6938,
"step": 12560
},
{
"epoch": 0.51,
"learning_rate": 9.829857943135386e-07,
"loss": 0.6384,
"step": 12565
},
{
"epoch": 0.51,
"learning_rate": 9.823413825994754e-07,
"loss": 0.7146,
"step": 12570
},
{
"epoch": 0.51,
"learning_rate": 9.816969782206486e-07,
"loss": 0.6924,
"step": 12575
},
{
"epoch": 0.51,
"learning_rate": 9.810525814447372e-07,
"loss": 0.6643,
"step": 12580
},
{
"epoch": 0.51,
"learning_rate": 9.80408192539419e-07,
"loss": 0.6833,
"step": 12585
},
{
"epoch": 0.51,
"learning_rate": 9.797638117723675e-07,
"loss": 0.7397,
"step": 12590
},
{
"epoch": 0.51,
"learning_rate": 9.791194394112523e-07,
"loss": 0.6674,
"step": 12595
},
{
"epoch": 0.51,
"learning_rate": 9.784750757237405e-07,
"loss": 0.6753,
"step": 12600
},
{
"epoch": 0.51,
"learning_rate": 9.77830720977495e-07,
"loss": 0.6833,
"step": 12605
},
{
"epoch": 0.51,
"learning_rate": 9.77186375440175e-07,
"loss": 0.6847,
"step": 12610
},
{
"epoch": 0.51,
"learning_rate": 9.76542039379437e-07,
"loss": 0.6786,
"step": 12615
},
{
"epoch": 0.51,
"learning_rate": 9.75897713062931e-07,
"loss": 0.6696,
"step": 12620
},
{
"epoch": 0.51,
"learning_rate": 9.75253396758306e-07,
"loss": 0.6623,
"step": 12625
},
{
"epoch": 0.51,
"learning_rate": 9.746090907332043e-07,
"loss": 0.6847,
"step": 12630
},
{
"epoch": 0.51,
"learning_rate": 9.739647952552654e-07,
"loss": 0.6885,
"step": 12635
},
{
"epoch": 0.51,
"learning_rate": 9.733205105921247e-07,
"loss": 0.6954,
"step": 12640
},
{
"epoch": 0.51,
"learning_rate": 9.726762370114116e-07,
"loss": 0.6695,
"step": 12645
},
{
"epoch": 0.51,
"learning_rate": 9.720319747807521e-07,
"loss": 0.6837,
"step": 12650
},
{
"epoch": 0.51,
"learning_rate": 9.71387724167767e-07,
"loss": 0.6701,
"step": 12655
},
{
"epoch": 0.51,
"learning_rate": 9.707434854400723e-07,
"loss": 0.6297,
"step": 12660
},
{
"epoch": 0.51,
"learning_rate": 9.700992588652796e-07,
"loss": 0.6432,
"step": 12665
},
{
"epoch": 0.51,
"learning_rate": 9.694550447109946e-07,
"loss": 0.7152,
"step": 12670
},
{
"epoch": 0.51,
"learning_rate": 9.688108432448186e-07,
"loss": 0.6285,
"step": 12675
},
{
"epoch": 0.52,
"learning_rate": 9.681666547343467e-07,
"loss": 0.6382,
"step": 12680
},
{
"epoch": 0.52,
"learning_rate": 9.6752247944717e-07,
"loss": 0.6702,
"step": 12685
},
{
"epoch": 0.52,
"learning_rate": 9.668783176508724e-07,
"loss": 0.6962,
"step": 12690
},
{
"epoch": 0.52,
"learning_rate": 9.662341696130339e-07,
"loss": 0.7102,
"step": 12695
},
{
"epoch": 0.52,
"learning_rate": 9.655900356012279e-07,
"loss": 0.6731,
"step": 12700
},
{
"epoch": 0.52,
"learning_rate": 9.649459158830216e-07,
"loss": 0.6251,
"step": 12705
},
{
"epoch": 0.52,
"learning_rate": 9.643018107259774e-07,
"loss": 0.6723,
"step": 12710
},
{
"epoch": 0.52,
"learning_rate": 9.636577203976497e-07,
"loss": 0.6475,
"step": 12715
},
{
"epoch": 0.52,
"learning_rate": 9.630136451655894e-07,
"loss": 0.6977,
"step": 12720
},
{
"epoch": 0.52,
"learning_rate": 9.623695852973395e-07,
"loss": 0.6807,
"step": 12725
},
{
"epoch": 0.52,
"learning_rate": 9.617255410604363e-07,
"loss": 0.6594,
"step": 12730
},
{
"epoch": 0.52,
"learning_rate": 9.61081512722411e-07,
"loss": 0.6928,
"step": 12735
},
{
"epoch": 0.52,
"learning_rate": 9.604375005507862e-07,
"loss": 0.6592,
"step": 12740
},
{
"epoch": 0.52,
"learning_rate": 9.597935048130797e-07,
"loss": 0.6674,
"step": 12745
},
{
"epoch": 0.52,
"learning_rate": 9.591495257768019e-07,
"loss": 0.6501,
"step": 12750
},
{
"epoch": 0.52,
"learning_rate": 9.585055637094557e-07,
"loss": 0.6283,
"step": 12755
},
{
"epoch": 0.52,
"learning_rate": 9.578616188785378e-07,
"loss": 0.6404,
"step": 12760
},
{
"epoch": 0.52,
"learning_rate": 9.572176915515364e-07,
"loss": 0.6723,
"step": 12765
},
{
"epoch": 0.52,
"learning_rate": 9.56573781995934e-07,
"loss": 0.6595,
"step": 12770
},
{
"epoch": 0.52,
"learning_rate": 9.559298904792053e-07,
"loss": 0.6458,
"step": 12775
},
{
"epoch": 0.52,
"learning_rate": 9.552860172688165e-07,
"loss": 0.6258,
"step": 12780
},
{
"epoch": 0.52,
"learning_rate": 9.54642162632228e-07,
"loss": 0.6791,
"step": 12785
},
{
"epoch": 0.52,
"learning_rate": 9.539983268368897e-07,
"loss": 0.669,
"step": 12790
},
{
"epoch": 0.52,
"learning_rate": 9.533545101502466e-07,
"loss": 0.6532,
"step": 12795
},
{
"epoch": 0.52,
"learning_rate": 9.527107128397347e-07,
"loss": 0.6997,
"step": 12800
},
{
"epoch": 0.52,
"eval_loss": 0.6355204582214355,
"eval_runtime": 144.7041,
"eval_samples_per_second": 16.351,
"eval_steps_per_second": 2.73,
"step": 12800
},
{
"epoch": 0.52,
"learning_rate": 9.520669351727811e-07,
"loss": 0.6886,
"step": 12805
},
{
"epoch": 0.52,
"learning_rate": 9.514231774168063e-07,
"loss": 0.6541,
"step": 12810
},
{
"epoch": 0.52,
"learning_rate": 9.507794398392205e-07,
"loss": 0.6731,
"step": 12815
},
{
"epoch": 0.52,
"learning_rate": 9.501357227074279e-07,
"loss": 0.6141,
"step": 12820
},
{
"epoch": 0.52,
"learning_rate": 9.49492026288822e-07,
"loss": 0.6469,
"step": 12825
},
{
"epoch": 0.52,
"learning_rate": 9.488483508507892e-07,
"loss": 0.6434,
"step": 12830
},
{
"epoch": 0.52,
"learning_rate": 9.482046966607071e-07,
"loss": 0.6507,
"step": 12835
},
{
"epoch": 0.52,
"learning_rate": 9.475610639859428e-07,
"loss": 0.6604,
"step": 12840
},
{
"epoch": 0.52,
"learning_rate": 9.469174530938573e-07,
"loss": 0.6497,
"step": 12845
},
{
"epoch": 0.52,
"learning_rate": 9.462738642517995e-07,
"loss": 0.6627,
"step": 12850
},
{
"epoch": 0.52,
"learning_rate": 9.456302977271114e-07,
"loss": 0.6689,
"step": 12855
},
{
"epoch": 0.52,
"learning_rate": 9.449867537871251e-07,
"loss": 0.7033,
"step": 12860
},
{
"epoch": 0.52,
"learning_rate": 9.443432326991626e-07,
"loss": 0.7198,
"step": 12865
},
{
"epoch": 0.52,
"learning_rate": 9.436997347305377e-07,
"loss": 0.6576,
"step": 12870
},
{
"epoch": 0.52,
"learning_rate": 9.430562601485527e-07,
"loss": 0.669,
"step": 12875
},
{
"epoch": 0.52,
"learning_rate": 9.424128092205021e-07,
"loss": 0.6525,
"step": 12880
},
{
"epoch": 0.52,
"learning_rate": 9.417693822136701e-07,
"loss": 0.6669,
"step": 12885
},
{
"epoch": 0.52,
"learning_rate": 9.411259793953302e-07,
"loss": 0.6975,
"step": 12890
},
{
"epoch": 0.52,
"learning_rate": 9.404826010327467e-07,
"loss": 0.6476,
"step": 12895
},
{
"epoch": 0.52,
"learning_rate": 9.398392473931726e-07,
"loss": 0.6884,
"step": 12900
},
{
"epoch": 0.52,
"learning_rate": 9.391959187438522e-07,
"loss": 0.6902,
"step": 12905
},
{
"epoch": 0.52,
"learning_rate": 9.385526153520186e-07,
"loss": 0.6382,
"step": 12910
},
{
"epoch": 0.52,
"learning_rate": 9.379093374848939e-07,
"loss": 0.6584,
"step": 12915
},
{
"epoch": 0.52,
"learning_rate": 9.37266085409691e-07,
"loss": 0.6544,
"step": 12920
},
{
"epoch": 0.52,
"learning_rate": 9.366228593936098e-07,
"loss": 0.6808,
"step": 12925
},
{
"epoch": 0.53,
"learning_rate": 9.359796597038421e-07,
"loss": 0.7172,
"step": 12930
},
{
"epoch": 0.53,
"learning_rate": 9.353364866075672e-07,
"loss": 0.6768,
"step": 12935
},
{
"epoch": 0.53,
"learning_rate": 9.34693340371953e-07,
"loss": 0.6933,
"step": 12940
},
{
"epoch": 0.53,
"learning_rate": 9.340502212641578e-07,
"loss": 0.6833,
"step": 12945
},
{
"epoch": 0.53,
"learning_rate": 9.334071295513267e-07,
"loss": 0.6569,
"step": 12950
},
{
"epoch": 0.53,
"learning_rate": 9.32764065500595e-07,
"loss": 0.645,
"step": 12955
},
{
"epoch": 0.53,
"learning_rate": 9.321210293790859e-07,
"loss": 0.6581,
"step": 12960
},
{
"epoch": 0.53,
"learning_rate": 9.314780214539107e-07,
"loss": 0.7163,
"step": 12965
},
{
"epoch": 0.53,
"learning_rate": 9.308350419921699e-07,
"loss": 0.6527,
"step": 12970
},
{
"epoch": 0.53,
"learning_rate": 9.301920912609505e-07,
"loss": 0.6956,
"step": 12975
},
{
"epoch": 0.53,
"learning_rate": 9.295491695273301e-07,
"loss": 0.6763,
"step": 12980
},
{
"epoch": 0.53,
"learning_rate": 9.289062770583712e-07,
"loss": 0.6666,
"step": 12985
},
{
"epoch": 0.53,
"learning_rate": 9.282634141211269e-07,
"loss": 0.687,
"step": 12990
},
{
"epoch": 0.53,
"learning_rate": 9.276205809826368e-07,
"loss": 0.6675,
"step": 12995
},
{
"epoch": 0.53,
"learning_rate": 9.269777779099275e-07,
"loss": 0.6656,
"step": 13000
},
{
"epoch": 0.53,
"learning_rate": 9.263350051700147e-07,
"loss": 0.6616,
"step": 13005
},
{
"epoch": 0.53,
"learning_rate": 9.256922630298994e-07,
"loss": 0.6458,
"step": 13010
},
{
"epoch": 0.53,
"learning_rate": 9.250495517565722e-07,
"loss": 0.6929,
"step": 13015
},
{
"epoch": 0.53,
"learning_rate": 9.244068716170099e-07,
"loss": 0.6717,
"step": 13020
},
{
"epoch": 0.53,
"learning_rate": 9.237642228781749e-07,
"loss": 0.6591,
"step": 13025
},
{
"epoch": 0.53,
"learning_rate": 9.231216058070195e-07,
"loss": 0.6807,
"step": 13030
},
{
"epoch": 0.53,
"learning_rate": 9.224790206704798e-07,
"loss": 0.6745,
"step": 13035
},
{
"epoch": 0.53,
"learning_rate": 9.21836467735481e-07,
"loss": 0.6207,
"step": 13040
},
{
"epoch": 0.53,
"learning_rate": 9.21193947268934e-07,
"loss": 0.6627,
"step": 13045
},
{
"epoch": 0.53,
"learning_rate": 9.205514595377356e-07,
"loss": 0.6786,
"step": 13050
},
{
"epoch": 0.53,
"learning_rate": 9.199090048087706e-07,
"loss": 0.6949,
"step": 13055
},
{
"epoch": 0.53,
"learning_rate": 9.192665833489077e-07,
"loss": 0.665,
"step": 13060
},
{
"epoch": 0.53,
"learning_rate": 9.186241954250043e-07,
"loss": 0.6553,
"step": 13065
},
{
"epoch": 0.53,
"learning_rate": 9.179818413039028e-07,
"loss": 0.6508,
"step": 13070
},
{
"epoch": 0.53,
"learning_rate": 9.173395212524306e-07,
"loss": 0.6393,
"step": 13075
},
{
"epoch": 0.53,
"learning_rate": 9.166972355374031e-07,
"loss": 0.6319,
"step": 13080
},
{
"epoch": 0.53,
"learning_rate": 9.160549844256187e-07,
"loss": 0.6485,
"step": 13085
},
{
"epoch": 0.53,
"learning_rate": 9.154127681838642e-07,
"loss": 0.6805,
"step": 13090
},
{
"epoch": 0.53,
"learning_rate": 9.147705870789104e-07,
"loss": 0.677,
"step": 13095
},
{
"epoch": 0.53,
"learning_rate": 9.14128441377513e-07,
"loss": 0.6738,
"step": 13100
},
{
"epoch": 0.53,
"learning_rate": 9.134863313464149e-07,
"loss": 0.6761,
"step": 13105
},
{
"epoch": 0.53,
"learning_rate": 9.128442572523417e-07,
"loss": 0.6966,
"step": 13110
},
{
"epoch": 0.53,
"learning_rate": 9.122022193620068e-07,
"loss": 0.6435,
"step": 13115
},
{
"epoch": 0.53,
"learning_rate": 9.115602179421058e-07,
"loss": 0.656,
"step": 13120
},
{
"epoch": 0.53,
"learning_rate": 9.109182532593213e-07,
"loss": 0.678,
"step": 13125
},
{
"epoch": 0.53,
"learning_rate": 9.102763255803203e-07,
"loss": 0.6255,
"step": 13130
},
{
"epoch": 0.53,
"learning_rate": 9.096344351717527e-07,
"loss": 0.6929,
"step": 13135
},
{
"epoch": 0.53,
"learning_rate": 9.089925823002555e-07,
"loss": 0.6523,
"step": 13140
},
{
"epoch": 0.53,
"learning_rate": 9.083507672324474e-07,
"loss": 0.6703,
"step": 13145
},
{
"epoch": 0.53,
"learning_rate": 9.077089902349338e-07,
"loss": 0.6668,
"step": 13150
},
{
"epoch": 0.53,
"learning_rate": 9.070672515743037e-07,
"loss": 0.6151,
"step": 13155
},
{
"epoch": 0.53,
"learning_rate": 9.064255515171282e-07,
"loss": 0.6626,
"step": 13160
},
{
"epoch": 0.53,
"learning_rate": 9.057838903299656e-07,
"loss": 0.6921,
"step": 13165
},
{
"epoch": 0.53,
"learning_rate": 9.05142268279355e-07,
"loss": 0.6709,
"step": 13170
},
{
"epoch": 0.54,
"learning_rate": 9.045006856318215e-07,
"loss": 0.6411,
"step": 13175
},
{
"epoch": 0.54,
"learning_rate": 9.03859142653873e-07,
"loss": 0.6394,
"step": 13180
},
{
"epoch": 0.54,
"learning_rate": 9.03217639612e-07,
"loss": 0.6713,
"step": 13185
},
{
"epoch": 0.54,
"learning_rate": 9.025761767726784e-07,
"loss": 0.6492,
"step": 13190
},
{
"epoch": 0.54,
"learning_rate": 9.019347544023651e-07,
"loss": 0.6249,
"step": 13195
},
{
"epoch": 0.54,
"learning_rate": 9.012933727675023e-07,
"loss": 0.6649,
"step": 13200
},
{
"epoch": 0.54,
"learning_rate": 9.006520321345143e-07,
"loss": 0.6133,
"step": 13205
},
{
"epoch": 0.54,
"learning_rate": 9.000107327698078e-07,
"loss": 0.6659,
"step": 13210
},
{
"epoch": 0.54,
"learning_rate": 8.993694749397738e-07,
"loss": 0.6644,
"step": 13215
},
{
"epoch": 0.54,
"learning_rate": 8.987282589107842e-07,
"loss": 0.6778,
"step": 13220
},
{
"epoch": 0.54,
"learning_rate": 8.980870849491954e-07,
"loss": 0.6317,
"step": 13225
},
{
"epoch": 0.54,
"learning_rate": 8.974459533213457e-07,
"loss": 0.6583,
"step": 13230
},
{
"epoch": 0.54,
"learning_rate": 8.968048642935544e-07,
"loss": 0.6837,
"step": 13235
},
{
"epoch": 0.54,
"learning_rate": 8.961638181321257e-07,
"loss": 0.6398,
"step": 13240
},
{
"epoch": 0.54,
"learning_rate": 8.955228151033432e-07,
"loss": 0.6956,
"step": 13245
},
{
"epoch": 0.54,
"learning_rate": 8.948818554734756e-07,
"loss": 0.736,
"step": 13250
},
{
"epoch": 0.54,
"learning_rate": 8.942409395087706e-07,
"loss": 0.653,
"step": 13255
},
{
"epoch": 0.54,
"learning_rate": 8.936000674754592e-07,
"loss": 0.6775,
"step": 13260
},
{
"epoch": 0.54,
"learning_rate": 8.929592396397553e-07,
"loss": 0.6798,
"step": 13265
},
{
"epoch": 0.54,
"learning_rate": 8.923184562678517e-07,
"loss": 0.656,
"step": 13270
},
{
"epoch": 0.54,
"learning_rate": 8.916777176259256e-07,
"loss": 0.6569,
"step": 13275
},
{
"epoch": 0.54,
"learning_rate": 8.91037023980133e-07,
"loss": 0.6893,
"step": 13280
},
{
"epoch": 0.54,
"learning_rate": 8.903963755966129e-07,
"loss": 0.6301,
"step": 13285
},
{
"epoch": 0.54,
"learning_rate": 8.89755772741486e-07,
"loss": 0.6393,
"step": 13290
},
{
"epoch": 0.54,
"learning_rate": 8.891152156808516e-07,
"loss": 0.6426,
"step": 13295
},
{
"epoch": 0.54,
"learning_rate": 8.88474704680793e-07,
"loss": 0.6431,
"step": 13300
},
{
"epoch": 0.54,
"learning_rate": 8.878342400073717e-07,
"loss": 0.6747,
"step": 13305
},
{
"epoch": 0.54,
"learning_rate": 8.871938219266315e-07,
"loss": 0.6519,
"step": 13310
},
{
"epoch": 0.54,
"learning_rate": 8.865534507045973e-07,
"loss": 0.6479,
"step": 13315
},
{
"epoch": 0.54,
"learning_rate": 8.859131266072725e-07,
"loss": 0.6343,
"step": 13320
},
{
"epoch": 0.54,
"learning_rate": 8.852728499006434e-07,
"loss": 0.7073,
"step": 13325
},
{
"epoch": 0.54,
"learning_rate": 8.846326208506743e-07,
"loss": 0.6508,
"step": 13330
},
{
"epoch": 0.54,
"learning_rate": 8.839924397233108e-07,
"loss": 0.6796,
"step": 13335
},
{
"epoch": 0.54,
"learning_rate": 8.8335230678448e-07,
"loss": 0.6686,
"step": 13340
},
{
"epoch": 0.54,
"learning_rate": 8.827122223000856e-07,
"loss": 0.6708,
"step": 13345
},
{
"epoch": 0.54,
"learning_rate": 8.820721865360148e-07,
"loss": 0.6726,
"step": 13350
},
{
"epoch": 0.54,
"learning_rate": 8.814321997581318e-07,
"loss": 0.6588,
"step": 13355
},
{
"epoch": 0.54,
"learning_rate": 8.807922622322815e-07,
"loss": 0.6799,
"step": 13360
},
{
"epoch": 0.54,
"learning_rate": 8.801523742242897e-07,
"loss": 0.653,
"step": 13365
},
{
"epoch": 0.54,
"learning_rate": 8.795125359999586e-07,
"loss": 0.6911,
"step": 13370
},
{
"epoch": 0.54,
"learning_rate": 8.78872747825073e-07,
"loss": 0.6704,
"step": 13375
},
{
"epoch": 0.54,
"learning_rate": 8.782330099653937e-07,
"loss": 0.6585,
"step": 13380
},
{
"epoch": 0.54,
"learning_rate": 8.775933226866636e-07,
"loss": 0.63,
"step": 13385
},
{
"epoch": 0.54,
"learning_rate": 8.76953686254603e-07,
"loss": 0.6628,
"step": 13390
},
{
"epoch": 0.54,
"learning_rate": 8.763141009349104e-07,
"loss": 0.723,
"step": 13395
},
{
"epoch": 0.54,
"learning_rate": 8.756745669932655e-07,
"loss": 0.6833,
"step": 13400
},
{
"epoch": 0.54,
"learning_rate": 8.750350846953234e-07,
"loss": 0.6724,
"step": 13405
},
{
"epoch": 0.54,
"learning_rate": 8.743956543067213e-07,
"loss": 0.671,
"step": 13410
},
{
"epoch": 0.54,
"learning_rate": 8.737562760930713e-07,
"loss": 0.6819,
"step": 13415
},
{
"epoch": 0.55,
"learning_rate": 8.731169503199663e-07,
"loss": 0.6681,
"step": 13420
},
{
"epoch": 0.55,
"learning_rate": 8.724776772529775e-07,
"loss": 0.6503,
"step": 13425
},
{
"epoch": 0.55,
"learning_rate": 8.718384571576518e-07,
"loss": 0.684,
"step": 13430
},
{
"epoch": 0.55,
"learning_rate": 8.711992902995171e-07,
"loss": 0.7102,
"step": 13435
},
{
"epoch": 0.55,
"learning_rate": 8.705601769440767e-07,
"loss": 0.624,
"step": 13440
},
{
"epoch": 0.55,
"learning_rate": 8.699211173568128e-07,
"loss": 0.6489,
"step": 13445
},
{
"epoch": 0.55,
"learning_rate": 8.692821118031864e-07,
"loss": 0.6841,
"step": 13450
},
{
"epoch": 0.55,
"learning_rate": 8.686431605486331e-07,
"loss": 0.6857,
"step": 13455
},
{
"epoch": 0.55,
"learning_rate": 8.680042638585694e-07,
"loss": 0.7022,
"step": 13460
},
{
"epoch": 0.55,
"learning_rate": 8.673654219983861e-07,
"loss": 0.6838,
"step": 13465
},
{
"epoch": 0.55,
"learning_rate": 8.667266352334528e-07,
"loss": 0.7022,
"step": 13470
},
{
"epoch": 0.55,
"learning_rate": 8.660879038291169e-07,
"loss": 0.6553,
"step": 13475
},
{
"epoch": 0.55,
"learning_rate": 8.654492280507005e-07,
"loss": 0.6301,
"step": 13480
},
{
"epoch": 0.55,
"learning_rate": 8.648106081635054e-07,
"loss": 0.6662,
"step": 13485
},
{
"epoch": 0.55,
"learning_rate": 8.641720444328075e-07,
"loss": 0.6445,
"step": 13490
},
{
"epoch": 0.55,
"learning_rate": 8.635335371238609e-07,
"loss": 0.6577,
"step": 13495
},
{
"epoch": 0.55,
"learning_rate": 8.628950865018969e-07,
"loss": 0.647,
"step": 13500
},
{
"epoch": 0.55,
"learning_rate": 8.622566928321209e-07,
"loss": 0.6851,
"step": 13505
},
{
"epoch": 0.55,
"learning_rate": 8.616183563797177e-07,
"loss": 0.6431,
"step": 13510
},
{
"epoch": 0.55,
"learning_rate": 8.609800774098452e-07,
"loss": 0.6909,
"step": 13515
},
{
"epoch": 0.55,
"learning_rate": 8.603418561876394e-07,
"loss": 0.6711,
"step": 13520
},
{
"epoch": 0.55,
"learning_rate": 8.597036929782127e-07,
"loss": 0.7302,
"step": 13525
},
{
"epoch": 0.55,
"learning_rate": 8.590655880466511e-07,
"loss": 0.6841,
"step": 13530
},
{
"epoch": 0.55,
"learning_rate": 8.584275416580194e-07,
"loss": 0.6897,
"step": 13535
},
{
"epoch": 0.55,
"learning_rate": 8.577895540773552e-07,
"loss": 0.6478,
"step": 13540
},
{
"epoch": 0.55,
"learning_rate": 8.571516255696738e-07,
"loss": 0.6627,
"step": 13545
},
{
"epoch": 0.55,
"learning_rate": 8.565137563999646e-07,
"loss": 0.677,
"step": 13550
},
{
"epoch": 0.55,
"learning_rate": 8.55875946833193e-07,
"loss": 0.6428,
"step": 13555
},
{
"epoch": 0.55,
"learning_rate": 8.552381971343003e-07,
"loss": 0.6605,
"step": 13560
},
{
"epoch": 0.55,
"learning_rate": 8.546005075682012e-07,
"loss": 0.6421,
"step": 13565
},
{
"epoch": 0.55,
"learning_rate": 8.53962878399787e-07,
"loss": 0.6669,
"step": 13570
},
{
"epoch": 0.55,
"learning_rate": 8.53325309893923e-07,
"loss": 0.6713,
"step": 13575
},
{
"epoch": 0.55,
"learning_rate": 8.526878023154494e-07,
"loss": 0.696,
"step": 13580
},
{
"epoch": 0.55,
"learning_rate": 8.520503559291823e-07,
"loss": 0.6294,
"step": 13585
},
{
"epoch": 0.55,
"learning_rate": 8.514129709999103e-07,
"loss": 0.6732,
"step": 13590
},
{
"epoch": 0.55,
"learning_rate": 8.507756477923982e-07,
"loss": 0.6502,
"step": 13595
},
{
"epoch": 0.55,
"learning_rate": 8.501383865713839e-07,
"loss": 0.6475,
"step": 13600
},
{
"epoch": 0.55,
"eval_loss": 0.6325117349624634,
"eval_runtime": 139.0789,
"eval_samples_per_second": 17.012,
"eval_steps_per_second": 2.84,
"step": 13600
},
{
"epoch": 0.55,
"learning_rate": 8.495011876015805e-07,
"loss": 0.6818,
"step": 13605
},
{
"epoch": 0.55,
"learning_rate": 8.488640511476757e-07,
"loss": 0.6347,
"step": 13610
},
{
"epoch": 0.55,
"learning_rate": 8.482269774743291e-07,
"loss": 0.653,
"step": 13615
},
{
"epoch": 0.55,
"learning_rate": 8.475899668461765e-07,
"loss": 0.6532,
"step": 13620
},
{
"epoch": 0.55,
"learning_rate": 8.469530195278261e-07,
"loss": 0.6745,
"step": 13625
},
{
"epoch": 0.55,
"learning_rate": 8.4631613578386e-07,
"loss": 0.6486,
"step": 13630
},
{
"epoch": 0.55,
"learning_rate": 8.456793158788354e-07,
"loss": 0.6595,
"step": 13635
},
{
"epoch": 0.55,
"learning_rate": 8.450425600772802e-07,
"loss": 0.6835,
"step": 13640
},
{
"epoch": 0.55,
"learning_rate": 8.444058686436987e-07,
"loss": 0.6539,
"step": 13645
},
{
"epoch": 0.55,
"learning_rate": 8.437692418425657e-07,
"loss": 0.6733,
"step": 13650
},
{
"epoch": 0.55,
"learning_rate": 8.431326799383309e-07,
"loss": 0.6533,
"step": 13655
},
{
"epoch": 0.55,
"learning_rate": 8.424961831954174e-07,
"loss": 0.6535,
"step": 13660
},
{
"epoch": 0.56,
"learning_rate": 8.418597518782189e-07,
"loss": 0.6637,
"step": 13665
},
{
"epoch": 0.56,
"learning_rate": 8.412233862511048e-07,
"loss": 0.661,
"step": 13670
},
{
"epoch": 0.56,
"learning_rate": 8.40587086578415e-07,
"loss": 0.6421,
"step": 13675
},
{
"epoch": 0.56,
"learning_rate": 8.399508531244632e-07,
"loss": 0.715,
"step": 13680
},
{
"epoch": 0.56,
"learning_rate": 8.39314686153535e-07,
"loss": 0.7132,
"step": 13685
},
{
"epoch": 0.56,
"learning_rate": 8.386785859298885e-07,
"loss": 0.6643,
"step": 13690
},
{
"epoch": 0.56,
"learning_rate": 8.380425527177551e-07,
"loss": 0.6568,
"step": 13695
},
{
"epoch": 0.56,
"learning_rate": 8.374065867813365e-07,
"loss": 0.65,
"step": 13700
},
{
"epoch": 0.56,
"learning_rate": 8.36770688384808e-07,
"loss": 0.691,
"step": 13705
},
{
"epoch": 0.56,
"learning_rate": 8.361348577923158e-07,
"loss": 0.6494,
"step": 13710
},
{
"epoch": 0.56,
"learning_rate": 8.354990952679784e-07,
"loss": 0.6998,
"step": 13715
},
{
"epoch": 0.56,
"learning_rate": 8.348634010758869e-07,
"loss": 0.6635,
"step": 13720
},
{
"epoch": 0.56,
"learning_rate": 8.342277754801021e-07,
"loss": 0.6726,
"step": 13725
},
{
"epoch": 0.56,
"learning_rate": 8.33592218744658e-07,
"loss": 0.6538,
"step": 13730
},
{
"epoch": 0.56,
"learning_rate": 8.329567311335588e-07,
"loss": 0.6708,
"step": 13735
},
{
"epoch": 0.56,
"learning_rate": 8.323213129107805e-07,
"loss": 0.6403,
"step": 13740
},
{
"epoch": 0.56,
"learning_rate": 8.316859643402714e-07,
"loss": 0.6683,
"step": 13745
},
{
"epoch": 0.56,
"learning_rate": 8.310506856859485e-07,
"loss": 0.6569,
"step": 13750
},
{
"epoch": 0.56,
"learning_rate": 8.304154772117017e-07,
"loss": 0.659,
"step": 13755
},
{
"epoch": 0.56,
"learning_rate": 8.297803391813908e-07,
"loss": 0.6665,
"step": 13760
},
{
"epoch": 0.56,
"learning_rate": 8.291452718588463e-07,
"loss": 0.6852,
"step": 13765
},
{
"epoch": 0.56,
"learning_rate": 8.285102755078708e-07,
"loss": 0.625,
"step": 13770
},
{
"epoch": 0.56,
"learning_rate": 8.278753503922351e-07,
"loss": 0.607,
"step": 13775
},
{
"epoch": 0.56,
"learning_rate": 8.272404967756821e-07,
"loss": 0.6543,
"step": 13780
},
{
"epoch": 0.56,
"learning_rate": 8.266057149219242e-07,
"loss": 0.6624,
"step": 13785
},
{
"epoch": 0.56,
"learning_rate": 8.259710050946443e-07,
"loss": 0.6695,
"step": 13790
},
{
"epoch": 0.56,
"learning_rate": 8.253363675574959e-07,
"loss": 0.6375,
"step": 13795
},
{
"epoch": 0.56,
"learning_rate": 8.24701802574101e-07,
"loss": 0.6712,
"step": 13800
},
{
"epoch": 0.56,
"learning_rate": 8.240673104080533e-07,
"loss": 0.6653,
"step": 13805
},
{
"epoch": 0.56,
"learning_rate": 8.234328913229145e-07,
"loss": 0.6578,
"step": 13810
},
{
"epoch": 0.56,
"learning_rate": 8.227985455822174e-07,
"loss": 0.697,
"step": 13815
},
{
"epoch": 0.56,
"learning_rate": 8.221642734494631e-07,
"loss": 0.6273,
"step": 13820
},
{
"epoch": 0.56,
"learning_rate": 8.215300751881233e-07,
"loss": 0.6841,
"step": 13825
},
{
"epoch": 0.56,
"learning_rate": 8.208959510616384e-07,
"loss": 0.6758,
"step": 13830
},
{
"epoch": 0.56,
"learning_rate": 8.202619013334176e-07,
"loss": 0.6531,
"step": 13835
},
{
"epoch": 0.56,
"learning_rate": 8.1962792626684e-07,
"loss": 0.679,
"step": 13840
},
{
"epoch": 0.56,
"learning_rate": 8.189940261252531e-07,
"loss": 0.6685,
"step": 13845
},
{
"epoch": 0.56,
"learning_rate": 8.183602011719736e-07,
"loss": 0.6558,
"step": 13850
},
{
"epoch": 0.56,
"learning_rate": 8.177264516702873e-07,
"loss": 0.6132,
"step": 13855
},
{
"epoch": 0.56,
"learning_rate": 8.170927778834476e-07,
"loss": 0.6571,
"step": 13860
},
{
"epoch": 0.56,
"learning_rate": 8.164591800746774e-07,
"loss": 0.685,
"step": 13865
},
{
"epoch": 0.56,
"learning_rate": 8.158256585071674e-07,
"loss": 0.6806,
"step": 13870
},
{
"epoch": 0.56,
"learning_rate": 8.151922134440774e-07,
"loss": 0.6552,
"step": 13875
},
{
"epoch": 0.56,
"learning_rate": 8.145588451485347e-07,
"loss": 0.6927,
"step": 13880
},
{
"epoch": 0.56,
"learning_rate": 8.139255538836351e-07,
"loss": 0.6594,
"step": 13885
},
{
"epoch": 0.56,
"learning_rate": 8.132923399124424e-07,
"loss": 0.6392,
"step": 13890
},
{
"epoch": 0.56,
"learning_rate": 8.126592034979878e-07,
"loss": 0.6697,
"step": 13895
},
{
"epoch": 0.56,
"learning_rate": 8.120261449032706e-07,
"loss": 0.6549,
"step": 13900
},
{
"epoch": 0.56,
"learning_rate": 8.113931643912589e-07,
"loss": 0.6544,
"step": 13905
},
{
"epoch": 0.57,
"learning_rate": 8.107602622248859e-07,
"loss": 0.6528,
"step": 13910
},
{
"epoch": 0.57,
"learning_rate": 8.101274386670544e-07,
"loss": 0.6406,
"step": 13915
},
{
"epoch": 0.57,
"learning_rate": 8.094946939806336e-07,
"loss": 0.6755,
"step": 13920
},
{
"epoch": 0.57,
"learning_rate": 8.0886202842846e-07,
"loss": 0.6685,
"step": 13925
},
{
"epoch": 0.57,
"learning_rate": 8.082294422733382e-07,
"loss": 0.6375,
"step": 13930
},
{
"epoch": 0.57,
"learning_rate": 8.075969357780379e-07,
"loss": 0.6974,
"step": 13935
},
{
"epoch": 0.57,
"learning_rate": 8.069645092052975e-07,
"loss": 0.6379,
"step": 13940
},
{
"epoch": 0.57,
"learning_rate": 8.06332162817821e-07,
"loss": 0.6283,
"step": 13945
},
{
"epoch": 0.57,
"learning_rate": 8.056998968782797e-07,
"loss": 0.6546,
"step": 13950
},
{
"epoch": 0.57,
"learning_rate": 8.050677116493121e-07,
"loss": 0.6807,
"step": 13955
},
{
"epoch": 0.57,
"learning_rate": 8.044356073935214e-07,
"loss": 0.6607,
"step": 13960
},
{
"epoch": 0.57,
"learning_rate": 8.038035843734791e-07,
"loss": 0.6562,
"step": 13965
},
{
"epoch": 0.57,
"learning_rate": 8.031716428517212e-07,
"loss": 0.6787,
"step": 13970
},
{
"epoch": 0.57,
"learning_rate": 8.025397830907515e-07,
"loss": 0.6575,
"step": 13975
},
{
"epoch": 0.57,
"learning_rate": 8.019080053530385e-07,
"loss": 0.6418,
"step": 13980
},
{
"epoch": 0.57,
"learning_rate": 8.012763099010172e-07,
"loss": 0.6619,
"step": 13985
},
{
"epoch": 0.57,
"learning_rate": 8.00644696997089e-07,
"loss": 0.6564,
"step": 13990
},
{
"epoch": 0.57,
"learning_rate": 8.000131669036196e-07,
"loss": 0.6619,
"step": 13995
},
{
"epoch": 0.57,
"learning_rate": 7.993817198829419e-07,
"loss": 0.6472,
"step": 14000
},
{
"epoch": 0.57,
"learning_rate": 7.987503561973527e-07,
"loss": 0.658,
"step": 14005
},
{
"epoch": 0.57,
"learning_rate": 7.981190761091156e-07,
"loss": 0.6977,
"step": 14010
},
{
"epoch": 0.57,
"learning_rate": 7.974878798804587e-07,
"loss": 0.6398,
"step": 14015
},
{
"epoch": 0.57,
"learning_rate": 7.968567677735752e-07,
"loss": 0.6633,
"step": 14020
},
{
"epoch": 0.57,
"learning_rate": 7.962257400506241e-07,
"loss": 0.6571,
"step": 14025
},
{
"epoch": 0.57,
"learning_rate": 7.95594796973728e-07,
"loss": 0.6622,
"step": 14030
},
{
"epoch": 0.57,
"learning_rate": 7.949639388049758e-07,
"loss": 0.6876,
"step": 14035
},
{
"epoch": 0.57,
"learning_rate": 7.943331658064206e-07,
"loss": 0.6471,
"step": 14040
},
{
"epoch": 0.57,
"learning_rate": 7.937024782400793e-07,
"loss": 0.6671,
"step": 14045
},
{
"epoch": 0.57,
"learning_rate": 7.930718763679349e-07,
"loss": 0.6538,
"step": 14050
},
{
"epoch": 0.57,
"learning_rate": 7.924413604519333e-07,
"loss": 0.6854,
"step": 14055
},
{
"epoch": 0.57,
"learning_rate": 7.918109307539856e-07,
"loss": 0.7438,
"step": 14060
},
{
"epoch": 0.57,
"learning_rate": 7.911805875359667e-07,
"loss": 0.6835,
"step": 14065
},
{
"epoch": 0.57,
"learning_rate": 7.905503310597159e-07,
"loss": 0.6572,
"step": 14070
},
{
"epoch": 0.57,
"learning_rate": 7.899201615870361e-07,
"loss": 0.6816,
"step": 14075
},
{
"epoch": 0.57,
"learning_rate": 7.892900793796941e-07,
"loss": 0.6017,
"step": 14080
},
{
"epoch": 0.57,
"learning_rate": 7.886600846994207e-07,
"loss": 0.6606,
"step": 14085
},
{
"epoch": 0.57,
"learning_rate": 7.880301778079104e-07,
"loss": 0.6908,
"step": 14090
},
{
"epoch": 0.57,
"learning_rate": 7.874003589668207e-07,
"loss": 0.6787,
"step": 14095
},
{
"epoch": 0.57,
"learning_rate": 7.867706284377731e-07,
"loss": 0.6872,
"step": 14100
},
{
"epoch": 0.57,
"learning_rate": 7.86140986482352e-07,
"loss": 0.6812,
"step": 14105
},
{
"epoch": 0.57,
"learning_rate": 7.855114333621055e-07,
"loss": 0.6552,
"step": 14110
},
{
"epoch": 0.57,
"learning_rate": 7.848819693385443e-07,
"loss": 0.6809,
"step": 14115
},
{
"epoch": 0.57,
"learning_rate": 7.84252594673142e-07,
"loss": 0.6787,
"step": 14120
},
{
"epoch": 0.57,
"learning_rate": 7.83623309627336e-07,
"loss": 0.6638,
"step": 14125
},
{
"epoch": 0.57,
"learning_rate": 7.829941144625251e-07,
"loss": 0.6821,
"step": 14130
},
{
"epoch": 0.57,
"learning_rate": 7.823650094400722e-07,
"loss": 0.6569,
"step": 14135
},
{
"epoch": 0.57,
"learning_rate": 7.817359948213014e-07,
"loss": 0.6722,
"step": 14140
},
{
"epoch": 0.57,
"learning_rate": 7.811070708675005e-07,
"loss": 0.6677,
"step": 14145
},
{
"epoch": 0.57,
"learning_rate": 7.804782378399188e-07,
"loss": 0.6305,
"step": 14150
},
{
"epoch": 0.57,
"learning_rate": 7.798494959997679e-07,
"loss": 0.6604,
"step": 14155
},
{
"epoch": 0.58,
"learning_rate": 7.79220845608222e-07,
"loss": 0.6459,
"step": 14160
},
{
"epoch": 0.58,
"learning_rate": 7.785922869264167e-07,
"loss": 0.6571,
"step": 14165
},
{
"epoch": 0.58,
"learning_rate": 7.779638202154498e-07,
"loss": 0.6593,
"step": 14170
},
{
"epoch": 0.58,
"learning_rate": 7.773354457363814e-07,
"loss": 0.672,
"step": 14175
},
{
"epoch": 0.58,
"learning_rate": 7.767071637502323e-07,
"loss": 0.6653,
"step": 14180
},
{
"epoch": 0.58,
"learning_rate": 7.760789745179857e-07,
"loss": 0.6623,
"step": 14185
},
{
"epoch": 0.58,
"learning_rate": 7.754508783005855e-07,
"loss": 0.6597,
"step": 14190
},
{
"epoch": 0.58,
"learning_rate": 7.748228753589377e-07,
"loss": 0.6594,
"step": 14195
},
{
"epoch": 0.58,
"learning_rate": 7.741949659539094e-07,
"loss": 0.6507,
"step": 14200
},
{
"epoch": 0.58,
"learning_rate": 7.735671503463283e-07,
"loss": 0.6891,
"step": 14205
},
{
"epoch": 0.58,
"learning_rate": 7.729394287969842e-07,
"loss": 0.7177,
"step": 14210
},
{
"epoch": 0.58,
"learning_rate": 7.723118015666264e-07,
"loss": 0.6398,
"step": 14215
},
{
"epoch": 0.58,
"learning_rate": 7.716842689159662e-07,
"loss": 0.689,
"step": 14220
},
{
"epoch": 0.58,
"learning_rate": 7.710568311056754e-07,
"loss": 0.6272,
"step": 14225
},
{
"epoch": 0.58,
"learning_rate": 7.704294883963858e-07,
"loss": 0.6754,
"step": 14230
},
{
"epoch": 0.58,
"learning_rate": 7.698022410486904e-07,
"loss": 0.6399,
"step": 14235
},
{
"epoch": 0.58,
"learning_rate": 7.69175089323142e-07,
"loss": 0.6666,
"step": 14240
},
{
"epoch": 0.58,
"learning_rate": 7.685480334802543e-07,
"loss": 0.6878,
"step": 14245
},
{
"epoch": 0.58,
"learning_rate": 7.679210737805005e-07,
"loss": 0.6787,
"step": 14250
},
{
"epoch": 0.58,
"learning_rate": 7.672942104843145e-07,
"loss": 0.6784,
"step": 14255
},
{
"epoch": 0.58,
"learning_rate": 7.666674438520898e-07,
"loss": 0.657,
"step": 14260
},
{
"epoch": 0.58,
"learning_rate": 7.660407741441797e-07,
"loss": 0.6817,
"step": 14265
},
{
"epoch": 0.58,
"learning_rate": 7.654142016208976e-07,
"loss": 0.6439,
"step": 14270
},
{
"epoch": 0.58,
"learning_rate": 7.647877265425157e-07,
"loss": 0.7055,
"step": 14275
},
{
"epoch": 0.58,
"learning_rate": 7.641613491692669e-07,
"loss": 0.6893,
"step": 14280
},
{
"epoch": 0.58,
"learning_rate": 7.635350697613429e-07,
"loss": 0.6435,
"step": 14285
},
{
"epoch": 0.58,
"learning_rate": 7.629088885788942e-07,
"loss": 0.6568,
"step": 14290
},
{
"epoch": 0.58,
"learning_rate": 7.622828058820315e-07,
"loss": 0.6865,
"step": 14295
},
{
"epoch": 0.58,
"learning_rate": 7.616568219308239e-07,
"loss": 0.6944,
"step": 14300
},
{
"epoch": 0.58,
"learning_rate": 7.610309369852996e-07,
"loss": 0.6978,
"step": 14305
},
{
"epoch": 0.58,
"learning_rate": 7.604051513054462e-07,
"loss": 0.6665,
"step": 14310
},
{
"epoch": 0.58,
"learning_rate": 7.597794651512092e-07,
"loss": 0.6395,
"step": 14315
},
{
"epoch": 0.58,
"learning_rate": 7.591538787824933e-07,
"loss": 0.6756,
"step": 14320
},
{
"epoch": 0.58,
"learning_rate": 7.585283924591616e-07,
"loss": 0.6442,
"step": 14325
},
{
"epoch": 0.58,
"learning_rate": 7.579030064410357e-07,
"loss": 0.6175,
"step": 14330
},
{
"epoch": 0.58,
"learning_rate": 7.572777209878958e-07,
"loss": 0.6561,
"step": 14335
},
{
"epoch": 0.58,
"learning_rate": 7.566525363594796e-07,
"loss": 0.6895,
"step": 14340
},
{
"epoch": 0.58,
"learning_rate": 7.560274528154836e-07,
"loss": 0.6491,
"step": 14345
},
{
"epoch": 0.58,
"learning_rate": 7.55402470615562e-07,
"loss": 0.7044,
"step": 14350
},
{
"epoch": 0.58,
"learning_rate": 7.547775900193267e-07,
"loss": 0.6438,
"step": 14355
},
{
"epoch": 0.58,
"learning_rate": 7.541528112863484e-07,
"loss": 0.709,
"step": 14360
},
{
"epoch": 0.58,
"learning_rate": 7.53528134676154e-07,
"loss": 0.6684,
"step": 14365
},
{
"epoch": 0.58,
"learning_rate": 7.529035604482292e-07,
"loss": 0.6646,
"step": 14370
},
{
"epoch": 0.58,
"learning_rate": 7.522790888620165e-07,
"loss": 0.6361,
"step": 14375
},
{
"epoch": 0.58,
"learning_rate": 7.516547201769159e-07,
"loss": 0.6411,
"step": 14380
},
{
"epoch": 0.58,
"learning_rate": 7.51030454652285e-07,
"loss": 0.6162,
"step": 14385
},
{
"epoch": 0.58,
"learning_rate": 7.504062925474378e-07,
"loss": 0.6916,
"step": 14390
},
{
"epoch": 0.58,
"learning_rate": 7.497822341216465e-07,
"loss": 0.6986,
"step": 14395
},
{
"epoch": 0.58,
"learning_rate": 7.491582796341388e-07,
"loss": 0.6316,
"step": 14400
},
{
"epoch": 0.58,
"eval_loss": 0.629960834980011,
"eval_runtime": 143.0305,
"eval_samples_per_second": 16.542,
"eval_steps_per_second": 2.762,
"step": 14400
},
{
"epoch": 0.59,
"learning_rate": 7.485344293441006e-07,
"loss": 0.7091,
"step": 14405
},
{
"epoch": 0.59,
"learning_rate": 7.479106835106733e-07,
"loss": 0.6732,
"step": 14410
},
{
"epoch": 0.59,
"learning_rate": 7.472870423929558e-07,
"loss": 0.6886,
"step": 14415
},
{
"epoch": 0.59,
"learning_rate": 7.466635062500035e-07,
"loss": 0.6929,
"step": 14420
},
{
"epoch": 0.59,
"learning_rate": 7.460400753408274e-07,
"loss": 0.6848,
"step": 14425
},
{
"epoch": 0.59,
"learning_rate": 7.454167499243955e-07,
"loss": 0.6989,
"step": 14430
},
{
"epoch": 0.59,
"learning_rate": 7.447935302596317e-07,
"loss": 0.656,
"step": 14435
},
{
"epoch": 0.59,
"learning_rate": 7.441704166054159e-07,
"loss": 0.6596,
"step": 14440
},
{
"epoch": 0.59,
"learning_rate": 7.435474092205844e-07,
"loss": 0.6879,
"step": 14445
},
{
"epoch": 0.59,
"learning_rate": 7.429245083639288e-07,
"loss": 0.6734,
"step": 14450
},
{
"epoch": 0.59,
"learning_rate": 7.42301714294197e-07,
"loss": 0.6389,
"step": 14455
},
{
"epoch": 0.59,
"learning_rate": 7.416790272700919e-07,
"loss": 0.6627,
"step": 14460
},
{
"epoch": 0.59,
"learning_rate": 7.410564475502723e-07,
"loss": 0.6592,
"step": 14465
},
{
"epoch": 0.59,
"learning_rate": 7.404339753933526e-07,
"loss": 0.6102,
"step": 14470
},
{
"epoch": 0.59,
"learning_rate": 7.398116110579022e-07,
"loss": 0.6535,
"step": 14475
},
{
"epoch": 0.59,
"learning_rate": 7.39189354802446e-07,
"loss": 0.6776,
"step": 14480
},
{
"epoch": 0.59,
"learning_rate": 7.385672068854635e-07,
"loss": 0.6815,
"step": 14485
},
{
"epoch": 0.59,
"learning_rate": 7.379451675653897e-07,
"loss": 0.6762,
"step": 14490
},
{
"epoch": 0.59,
"learning_rate": 7.373232371006146e-07,
"loss": 0.6405,
"step": 14495
},
{
"epoch": 0.59,
"learning_rate": 7.367014157494822e-07,
"loss": 0.6582,
"step": 14500
},
{
"epoch": 0.59,
"learning_rate": 7.360797037702922e-07,
"loss": 0.7052,
"step": 14505
},
{
"epoch": 0.59,
"learning_rate": 7.354581014212978e-07,
"loss": 0.7036,
"step": 14510
},
{
"epoch": 0.59,
"learning_rate": 7.348366089607077e-07,
"loss": 0.6619,
"step": 14515
},
{
"epoch": 0.59,
"learning_rate": 7.342152266466842e-07,
"loss": 0.6511,
"step": 14520
},
{
"epoch": 0.59,
"learning_rate": 7.335939547373441e-07,
"loss": 0.6736,
"step": 14525
},
{
"epoch": 0.59,
"learning_rate": 7.329727934907586e-07,
"loss": 0.6553,
"step": 14530
},
{
"epoch": 0.59,
"learning_rate": 7.323517431649524e-07,
"loss": 0.6332,
"step": 14535
},
{
"epoch": 0.59,
"learning_rate": 7.317308040179045e-07,
"loss": 0.6236,
"step": 14540
},
{
"epoch": 0.59,
"learning_rate": 7.311099763075477e-07,
"loss": 0.6322,
"step": 14545
},
{
"epoch": 0.59,
"learning_rate": 7.304892602917681e-07,
"loss": 0.6703,
"step": 14550
},
{
"epoch": 0.59,
"learning_rate": 7.298686562284064e-07,
"loss": 0.6805,
"step": 14555
},
{
"epoch": 0.59,
"learning_rate": 7.292481643752553e-07,
"loss": 0.6194,
"step": 14560
},
{
"epoch": 0.59,
"learning_rate": 7.286277849900626e-07,
"loss": 0.677,
"step": 14565
},
{
"epoch": 0.59,
"learning_rate": 7.280075183305276e-07,
"loss": 0.655,
"step": 14570
},
{
"epoch": 0.59,
"learning_rate": 7.273873646543043e-07,
"loss": 0.7065,
"step": 14575
},
{
"epoch": 0.59,
"learning_rate": 7.267673242189991e-07,
"loss": 0.6416,
"step": 14580
},
{
"epoch": 0.59,
"learning_rate": 7.261473972821712e-07,
"loss": 0.623,
"step": 14585
},
{
"epoch": 0.59,
"learning_rate": 7.25527584101333e-07,
"loss": 0.6771,
"step": 14590
},
{
"epoch": 0.59,
"learning_rate": 7.249078849339492e-07,
"loss": 0.6588,
"step": 14595
},
{
"epoch": 0.59,
"learning_rate": 7.242883000374378e-07,
"loss": 0.7181,
"step": 14600
},
{
"epoch": 0.59,
"learning_rate": 7.23668829669169e-07,
"loss": 0.7161,
"step": 14605
},
{
"epoch": 0.59,
"learning_rate": 7.23049474086465e-07,
"loss": 0.6712,
"step": 14610
},
{
"epoch": 0.59,
"learning_rate": 7.224302335466013e-07,
"loss": 0.6546,
"step": 14615
},
{
"epoch": 0.59,
"learning_rate": 7.218111083068044e-07,
"loss": 0.6682,
"step": 14620
},
{
"epoch": 0.59,
"learning_rate": 7.211920986242539e-07,
"loss": 0.6289,
"step": 14625
},
{
"epoch": 0.59,
"learning_rate": 7.205732047560813e-07,
"loss": 0.6735,
"step": 14630
},
{
"epoch": 0.59,
"learning_rate": 7.199544269593692e-07,
"loss": 0.6742,
"step": 14635
},
{
"epoch": 0.59,
"learning_rate": 7.193357654911529e-07,
"loss": 0.663,
"step": 14640
},
{
"epoch": 0.59,
"learning_rate": 7.187172206084186e-07,
"loss": 0.6653,
"step": 14645
},
{
"epoch": 0.6,
"learning_rate": 7.180987925681047e-07,
"loss": 0.633,
"step": 14650
},
{
"epoch": 0.6,
"learning_rate": 7.174804816271012e-07,
"loss": 0.6679,
"step": 14655
},
{
"epoch": 0.6,
"learning_rate": 7.168622880422484e-07,
"loss": 0.6849,
"step": 14660
},
{
"epoch": 0.6,
"learning_rate": 7.162442120703389e-07,
"loss": 0.6585,
"step": 14665
},
{
"epoch": 0.6,
"learning_rate": 7.15626253968116e-07,
"loss": 0.6842,
"step": 14670
},
{
"epoch": 0.6,
"learning_rate": 7.150084139922745e-07,
"loss": 0.6745,
"step": 14675
},
{
"epoch": 0.6,
"learning_rate": 7.14390692399459e-07,
"loss": 0.6614,
"step": 14680
},
{
"epoch": 0.6,
"learning_rate": 7.137730894462662e-07,
"loss": 0.6682,
"step": 14685
},
{
"epoch": 0.6,
"learning_rate": 7.131556053892431e-07,
"loss": 0.6302,
"step": 14690
},
{
"epoch": 0.6,
"learning_rate": 7.125382404848867e-07,
"loss": 0.6688,
"step": 14695
},
{
"epoch": 0.6,
"learning_rate": 7.119209949896456e-07,
"loss": 0.6558,
"step": 14700
},
{
"epoch": 0.6,
"learning_rate": 7.113038691599178e-07,
"loss": 0.6402,
"step": 14705
},
{
"epoch": 0.6,
"learning_rate": 7.10686863252052e-07,
"loss": 0.6774,
"step": 14710
},
{
"epoch": 0.6,
"learning_rate": 7.100699775223476e-07,
"loss": 0.6652,
"step": 14715
},
{
"epoch": 0.6,
"learning_rate": 7.094532122270528e-07,
"loss": 0.6132,
"step": 14720
},
{
"epoch": 0.6,
"learning_rate": 7.088365676223671e-07,
"loss": 0.6525,
"step": 14725
},
{
"epoch": 0.6,
"learning_rate": 7.08220043964439e-07,
"loss": 0.6803,
"step": 14730
},
{
"epoch": 0.6,
"learning_rate": 7.076036415093673e-07,
"loss": 0.6476,
"step": 14735
},
{
"epoch": 0.6,
"learning_rate": 7.069873605132002e-07,
"loss": 0.6711,
"step": 14740
},
{
"epoch": 0.6,
"learning_rate": 7.063712012319352e-07,
"loss": 0.6534,
"step": 14745
},
{
"epoch": 0.6,
"learning_rate": 7.057551639215198e-07,
"loss": 0.6575,
"step": 14750
},
{
"epoch": 0.6,
"learning_rate": 7.051392488378503e-07,
"loss": 0.6665,
"step": 14755
},
{
"epoch": 0.6,
"learning_rate": 7.045234562367726e-07,
"loss": 0.6546,
"step": 14760
},
{
"epoch": 0.6,
"learning_rate": 7.039077863740817e-07,
"loss": 0.6307,
"step": 14765
},
{
"epoch": 0.6,
"learning_rate": 7.032922395055215e-07,
"loss": 0.6717,
"step": 14770
},
{
"epoch": 0.6,
"learning_rate": 7.026768158867847e-07,
"loss": 0.7066,
"step": 14775
},
{
"epoch": 0.6,
"learning_rate": 7.020615157735126e-07,
"loss": 0.646,
"step": 14780
},
{
"epoch": 0.6,
"learning_rate": 7.014463394212959e-07,
"loss": 0.6559,
"step": 14785
},
{
"epoch": 0.6,
"learning_rate": 7.008312870856738e-07,
"loss": 0.6847,
"step": 14790
},
{
"epoch": 0.6,
"learning_rate": 7.00216359022133e-07,
"loss": 0.6717,
"step": 14795
},
{
"epoch": 0.6,
"learning_rate": 6.9960155548611e-07,
"loss": 0.6857,
"step": 14800
},
{
"epoch": 0.6,
"learning_rate": 6.989868767329882e-07,
"loss": 0.6655,
"step": 14805
},
{
"epoch": 0.6,
"learning_rate": 6.983723230181e-07,
"loss": 0.6216,
"step": 14810
},
{
"epoch": 0.6,
"learning_rate": 6.97757894596726e-07,
"loss": 0.7093,
"step": 14815
},
{
"epoch": 0.6,
"learning_rate": 6.971435917240939e-07,
"loss": 0.6597,
"step": 14820
},
{
"epoch": 0.6,
"learning_rate": 6.965294146553802e-07,
"loss": 0.6385,
"step": 14825
},
{
"epoch": 0.6,
"learning_rate": 6.959153636457085e-07,
"loss": 0.6444,
"step": 14830
},
{
"epoch": 0.6,
"learning_rate": 6.953014389501504e-07,
"loss": 0.6662,
"step": 14835
},
{
"epoch": 0.6,
"learning_rate": 6.946876408237245e-07,
"loss": 0.6718,
"step": 14840
},
{
"epoch": 0.6,
"learning_rate": 6.940739695213976e-07,
"loss": 0.6337,
"step": 14845
},
{
"epoch": 0.6,
"learning_rate": 6.934604252980833e-07,
"loss": 0.6349,
"step": 14850
},
{
"epoch": 0.6,
"learning_rate": 6.928470084086424e-07,
"loss": 0.6827,
"step": 14855
},
{
"epoch": 0.6,
"learning_rate": 6.922337191078835e-07,
"loss": 0.6323,
"step": 14860
},
{
"epoch": 0.6,
"learning_rate": 6.916205576505607e-07,
"loss": 0.6618,
"step": 14865
},
{
"epoch": 0.6,
"learning_rate": 6.910075242913767e-07,
"loss": 0.7118,
"step": 14870
},
{
"epoch": 0.6,
"learning_rate": 6.903946192849801e-07,
"loss": 0.6686,
"step": 14875
},
{
"epoch": 0.6,
"learning_rate": 6.897818428859659e-07,
"loss": 0.6446,
"step": 14880
},
{
"epoch": 0.6,
"learning_rate": 6.891691953488767e-07,
"loss": 0.7074,
"step": 14885
},
{
"epoch": 0.6,
"learning_rate": 6.885566769282003e-07,
"loss": 0.6567,
"step": 14890
},
{
"epoch": 0.61,
"learning_rate": 6.87944287878372e-07,
"loss": 0.6953,
"step": 14895
},
{
"epoch": 0.61,
"learning_rate": 6.873320284537729e-07,
"loss": 0.644,
"step": 14900
},
{
"epoch": 0.61,
"learning_rate": 6.8671989890873e-07,
"loss": 0.6672,
"step": 14905
},
{
"epoch": 0.61,
"learning_rate": 6.861078994975167e-07,
"loss": 0.6495,
"step": 14910
},
{
"epoch": 0.61,
"learning_rate": 6.85496030474352e-07,
"loss": 0.634,
"step": 14915
},
{
"epoch": 0.61,
"learning_rate": 6.848842920934012e-07,
"loss": 0.6863,
"step": 14920
},
{
"epoch": 0.61,
"learning_rate": 6.842726846087754e-07,
"loss": 0.688,
"step": 14925
},
{
"epoch": 0.61,
"learning_rate": 6.836612082745305e-07,
"loss": 0.6858,
"step": 14930
},
{
"epoch": 0.61,
"learning_rate": 6.830498633446687e-07,
"loss": 0.6644,
"step": 14935
},
{
"epoch": 0.61,
"learning_rate": 6.824386500731373e-07,
"loss": 0.6457,
"step": 14940
},
{
"epoch": 0.61,
"learning_rate": 6.81827568713829e-07,
"loss": 0.661,
"step": 14945
},
{
"epoch": 0.61,
"learning_rate": 6.812166195205818e-07,
"loss": 0.6476,
"step": 14950
},
{
"epoch": 0.61,
"learning_rate": 6.806058027471784e-07,
"loss": 0.6875,
"step": 14955
},
{
"epoch": 0.61,
"learning_rate": 6.799951186473472e-07,
"loss": 0.6149,
"step": 14960
},
{
"epoch": 0.61,
"learning_rate": 6.793845674747606e-07,
"loss": 0.6324,
"step": 14965
},
{
"epoch": 0.61,
"learning_rate": 6.787741494830365e-07,
"loss": 0.6664,
"step": 14970
},
{
"epoch": 0.61,
"learning_rate": 6.781638649257373e-07,
"loss": 0.6524,
"step": 14975
},
{
"epoch": 0.61,
"learning_rate": 6.775537140563696e-07,
"loss": 0.6808,
"step": 14980
},
{
"epoch": 0.61,
"learning_rate": 6.769436971283852e-07,
"loss": 0.6352,
"step": 14985
},
{
"epoch": 0.61,
"learning_rate": 6.763338143951793e-07,
"loss": 0.6175,
"step": 14990
},
{
"epoch": 0.61,
"learning_rate": 6.757240661100925e-07,
"loss": 0.6742,
"step": 14995
},
{
"epoch": 0.61,
"learning_rate": 6.751144525264083e-07,
"loss": 0.6516,
"step": 15000
},
{
"epoch": 0.61,
"learning_rate": 6.745049738973552e-07,
"loss": 0.6177,
"step": 15005
},
{
"epoch": 0.61,
"learning_rate": 6.738956304761054e-07,
"loss": 0.6248,
"step": 15010
},
{
"epoch": 0.61,
"learning_rate": 6.732864225157747e-07,
"loss": 0.6683,
"step": 15015
},
{
"epoch": 0.61,
"learning_rate": 6.726773502694231e-07,
"loss": 0.6681,
"step": 15020
},
{
"epoch": 0.61,
"learning_rate": 6.720684139900533e-07,
"loss": 0.6574,
"step": 15025
},
{
"epoch": 0.61,
"learning_rate": 6.714596139306125e-07,
"loss": 0.6806,
"step": 15030
},
{
"epoch": 0.61,
"learning_rate": 6.708509503439912e-07,
"loss": 0.6361,
"step": 15035
},
{
"epoch": 0.61,
"learning_rate": 6.702424234830227e-07,
"loss": 0.6768,
"step": 15040
},
{
"epoch": 0.61,
"learning_rate": 6.696340336004837e-07,
"loss": 0.6917,
"step": 15045
},
{
"epoch": 0.61,
"learning_rate": 6.690257809490941e-07,
"loss": 0.6971,
"step": 15050
},
{
"epoch": 0.61,
"learning_rate": 6.684176657815166e-07,
"loss": 0.6592,
"step": 15055
},
{
"epoch": 0.61,
"learning_rate": 6.678096883503575e-07,
"loss": 0.6655,
"step": 15060
},
{
"epoch": 0.61,
"learning_rate": 6.672018489081648e-07,
"loss": 0.6655,
"step": 15065
},
{
"epoch": 0.61,
"learning_rate": 6.6659414770743e-07,
"loss": 0.6384,
"step": 15070
},
{
"epoch": 0.61,
"learning_rate": 6.659865850005868e-07,
"loss": 0.6695,
"step": 15075
},
{
"epoch": 0.61,
"learning_rate": 6.653791610400112e-07,
"loss": 0.6991,
"step": 15080
},
{
"epoch": 0.61,
"learning_rate": 6.647718760780226e-07,
"loss": 0.655,
"step": 15085
},
{
"epoch": 0.61,
"learning_rate": 6.641647303668809e-07,
"loss": 0.6917,
"step": 15090
},
{
"epoch": 0.61,
"learning_rate": 6.6355772415879e-07,
"loss": 0.6641,
"step": 15095
},
{
"epoch": 0.61,
"learning_rate": 6.629508577058945e-07,
"loss": 0.6691,
"step": 15100
},
{
"epoch": 0.61,
"learning_rate": 6.623441312602817e-07,
"loss": 0.6896,
"step": 15105
},
{
"epoch": 0.61,
"learning_rate": 6.617375450739802e-07,
"loss": 0.6577,
"step": 15110
},
{
"epoch": 0.61,
"learning_rate": 6.611310993989607e-07,
"loss": 0.6675,
"step": 15115
},
{
"epoch": 0.61,
"learning_rate": 6.60524794487136e-07,
"loss": 0.6525,
"step": 15120
},
{
"epoch": 0.61,
"learning_rate": 6.59918630590359e-07,
"loss": 0.7091,
"step": 15125
},
{
"epoch": 0.61,
"learning_rate": 6.593126079604256e-07,
"loss": 0.6151,
"step": 15130
},
{
"epoch": 0.61,
"learning_rate": 6.587067268490721e-07,
"loss": 0.6501,
"step": 15135
},
{
"epoch": 0.61,
"learning_rate": 6.581009875079759e-07,
"loss": 0.5938,
"step": 15140
},
{
"epoch": 0.62,
"learning_rate": 6.574953901887567e-07,
"loss": 0.6959,
"step": 15145
},
{
"epoch": 0.62,
"learning_rate": 6.568899351429736e-07,
"loss": 0.6624,
"step": 15150
},
{
"epoch": 0.62,
"learning_rate": 6.562846226221276e-07,
"loss": 0.6503,
"step": 15155
},
{
"epoch": 0.62,
"learning_rate": 6.556794528776601e-07,
"loss": 0.6596,
"step": 15160
},
{
"epoch": 0.62,
"learning_rate": 6.550744261609538e-07,
"loss": 0.6702,
"step": 15165
},
{
"epoch": 0.62,
"learning_rate": 6.544695427233312e-07,
"loss": 0.6532,
"step": 15170
},
{
"epoch": 0.62,
"learning_rate": 6.538648028160556e-07,
"loss": 0.6611,
"step": 15175
},
{
"epoch": 0.62,
"learning_rate": 6.532602066903309e-07,
"loss": 0.6825,
"step": 15180
},
{
"epoch": 0.62,
"learning_rate": 6.526557545973007e-07,
"loss": 0.6508,
"step": 15185
},
{
"epoch": 0.62,
"learning_rate": 6.520514467880495e-07,
"loss": 0.6423,
"step": 15190
},
{
"epoch": 0.62,
"learning_rate": 6.514472835136015e-07,
"loss": 0.6711,
"step": 15195
},
{
"epoch": 0.62,
"learning_rate": 6.508432650249206e-07,
"loss": 0.6649,
"step": 15200
},
{
"epoch": 0.62,
"eval_loss": 0.6282991766929626,
"eval_runtime": 145.3768,
"eval_samples_per_second": 16.275,
"eval_steps_per_second": 2.717,
"step": 15200
},
{
"epoch": 0.62,
"learning_rate": 6.502393915729112e-07,
"loss": 0.6628,
"step": 15205
},
{
"epoch": 0.62,
"learning_rate": 6.496356634084167e-07,
"loss": 0.6933,
"step": 15210
},
{
"epoch": 0.62,
"learning_rate": 6.490320807822207e-07,
"loss": 0.6308,
"step": 15215
},
{
"epoch": 0.62,
"learning_rate": 6.484286439450464e-07,
"loss": 0.6538,
"step": 15220
},
{
"epoch": 0.62,
"learning_rate": 6.478253531475557e-07,
"loss": 0.6549,
"step": 15225
},
{
"epoch": 0.62,
"learning_rate": 6.472222086403509e-07,
"loss": 0.6538,
"step": 15230
},
{
"epoch": 0.62,
"learning_rate": 6.466192106739725e-07,
"loss": 0.6725,
"step": 15235
},
{
"epoch": 0.62,
"learning_rate": 6.46016359498901e-07,
"loss": 0.6694,
"step": 15240
},
{
"epoch": 0.62,
"learning_rate": 6.454136553655553e-07,
"loss": 0.6556,
"step": 15245
},
{
"epoch": 0.62,
"learning_rate": 6.448110985242934e-07,
"loss": 0.6585,
"step": 15250
},
{
"epoch": 0.62,
"learning_rate": 6.442086892254126e-07,
"loss": 0.6255,
"step": 15255
},
{
"epoch": 0.62,
"learning_rate": 6.43606427719148e-07,
"loss": 0.6198,
"step": 15260
},
{
"epoch": 0.62,
"learning_rate": 6.43004314255674e-07,
"loss": 0.647,
"step": 15265
},
{
"epoch": 0.62,
"learning_rate": 6.424023490851031e-07,
"loss": 0.6542,
"step": 15270
},
{
"epoch": 0.62,
"learning_rate": 6.418005324574867e-07,
"loss": 0.6648,
"step": 15275
},
{
"epoch": 0.62,
"learning_rate": 6.411988646228142e-07,
"loss": 0.6717,
"step": 15280
},
{
"epoch": 0.62,
"learning_rate": 6.40597345831013e-07,
"loss": 0.6903,
"step": 15285
},
{
"epoch": 0.62,
"learning_rate": 6.399959763319487e-07,
"loss": 0.6067,
"step": 15290
},
{
"epoch": 0.62,
"learning_rate": 6.393947563754251e-07,
"loss": 0.6792,
"step": 15295
},
{
"epoch": 0.62,
"learning_rate": 6.387936862111838e-07,
"loss": 0.6527,
"step": 15300
},
{
"epoch": 0.62,
"learning_rate": 6.381927660889042e-07,
"loss": 0.6708,
"step": 15305
},
{
"epoch": 0.62,
"learning_rate": 6.375919962582028e-07,
"loss": 0.6397,
"step": 15310
},
{
"epoch": 0.62,
"learning_rate": 6.369913769686349e-07,
"loss": 0.6297,
"step": 15315
},
{
"epoch": 0.62,
"learning_rate": 6.363909084696916e-07,
"loss": 0.6562,
"step": 15320
},
{
"epoch": 0.62,
"learning_rate": 6.35790591010803e-07,
"loss": 0.6346,
"step": 15325
},
{
"epoch": 0.62,
"learning_rate": 6.351904248413359e-07,
"loss": 0.6526,
"step": 15330
},
{
"epoch": 0.62,
"learning_rate": 6.345904102105934e-07,
"loss": 0.6721,
"step": 15335
},
{
"epoch": 0.62,
"learning_rate": 6.339905473678171e-07,
"loss": 0.6165,
"step": 15340
},
{
"epoch": 0.62,
"learning_rate": 6.333908365621842e-07,
"loss": 0.6632,
"step": 15345
},
{
"epoch": 0.62,
"learning_rate": 6.327912780428095e-07,
"loss": 0.6743,
"step": 15350
},
{
"epoch": 0.62,
"learning_rate": 6.321918720587449e-07,
"loss": 0.6507,
"step": 15355
},
{
"epoch": 0.62,
"learning_rate": 6.315926188589776e-07,
"loss": 0.6477,
"step": 15360
},
{
"epoch": 0.62,
"learning_rate": 6.309935186924329e-07,
"loss": 0.6308,
"step": 15365
},
{
"epoch": 0.62,
"learning_rate": 6.303945718079713e-07,
"loss": 0.6621,
"step": 15370
},
{
"epoch": 0.62,
"learning_rate": 6.297957784543903e-07,
"loss": 0.6398,
"step": 15375
},
{
"epoch": 0.62,
"learning_rate": 6.291971388804238e-07,
"loss": 0.668,
"step": 15380
},
{
"epoch": 0.62,
"learning_rate": 6.28598653334741e-07,
"loss": 0.6778,
"step": 15385
},
{
"epoch": 0.63,
"learning_rate": 6.280003220659476e-07,
"loss": 0.6458,
"step": 15390
},
{
"epoch": 0.63,
"learning_rate": 6.274021453225854e-07,
"loss": 0.6483,
"step": 15395
},
{
"epoch": 0.63,
"learning_rate": 6.268041233531321e-07,
"loss": 0.6463,
"step": 15400
},
{
"epoch": 0.63,
"learning_rate": 6.262062564059999e-07,
"loss": 0.7068,
"step": 15405
},
{
"epoch": 0.63,
"learning_rate": 6.256085447295383e-07,
"loss": 0.645,
"step": 15410
},
{
"epoch": 0.63,
"learning_rate": 6.250109885720316e-07,
"loss": 0.6908,
"step": 15415
},
{
"epoch": 0.63,
"learning_rate": 6.24413588181699e-07,
"loss": 0.6441,
"step": 15420
},
{
"epoch": 0.63,
"learning_rate": 6.238163438066959e-07,
"loss": 0.6636,
"step": 15425
},
{
"epoch": 0.63,
"learning_rate": 6.232192556951116e-07,
"loss": 0.6329,
"step": 15430
},
{
"epoch": 0.63,
"learning_rate": 6.22622324094972e-07,
"loss": 0.6181,
"step": 15435
},
{
"epoch": 0.63,
"learning_rate": 6.220255492542374e-07,
"loss": 0.6664,
"step": 15440
},
{
"epoch": 0.63,
"learning_rate": 6.214289314208023e-07,
"loss": 0.6294,
"step": 15445
},
{
"epoch": 0.63,
"learning_rate": 6.208324708424975e-07,
"loss": 0.6518,
"step": 15450
},
{
"epoch": 0.63,
"learning_rate": 6.202361677670861e-07,
"loss": 0.7155,
"step": 15455
},
{
"epoch": 0.63,
"learning_rate": 6.196400224422682e-07,
"loss": 0.6748,
"step": 15460
},
{
"epoch": 0.63,
"learning_rate": 6.190440351156776e-07,
"loss": 0.655,
"step": 15465
},
{
"epoch": 0.63,
"learning_rate": 6.184482060348815e-07,
"loss": 0.7019,
"step": 15470
},
{
"epoch": 0.63,
"learning_rate": 6.178525354473829e-07,
"loss": 0.6503,
"step": 15475
},
{
"epoch": 0.63,
"learning_rate": 6.172570236006173e-07,
"loss": 0.6535,
"step": 15480
},
{
"epoch": 0.63,
"learning_rate": 6.166616707419556e-07,
"loss": 0.6511,
"step": 15485
},
{
"epoch": 0.63,
"learning_rate": 6.160664771187025e-07,
"loss": 0.672,
"step": 15490
},
{
"epoch": 0.63,
"learning_rate": 6.154714429780958e-07,
"loss": 0.6731,
"step": 15495
},
{
"epoch": 0.63,
"learning_rate": 6.148765685673078e-07,
"loss": 0.6348,
"step": 15500
},
{
"epoch": 0.63,
"learning_rate": 6.142818541334438e-07,
"loss": 0.6413,
"step": 15505
},
{
"epoch": 0.63,
"learning_rate": 6.136872999235431e-07,
"loss": 0.6691,
"step": 15510
},
{
"epoch": 0.63,
"learning_rate": 6.130929061845789e-07,
"loss": 0.6727,
"step": 15515
},
{
"epoch": 0.63,
"learning_rate": 6.124986731634565e-07,
"loss": 0.6383,
"step": 15520
},
{
"epoch": 0.63,
"learning_rate": 6.119046011070156e-07,
"loss": 0.6822,
"step": 15525
},
{
"epoch": 0.63,
"learning_rate": 6.113106902620282e-07,
"loss": 0.6817,
"step": 15530
},
{
"epoch": 0.63,
"learning_rate": 6.107169408752003e-07,
"loss": 0.6315,
"step": 15535
},
{
"epoch": 0.63,
"learning_rate": 6.101233531931688e-07,
"loss": 0.6421,
"step": 15540
},
{
"epoch": 0.63,
"learning_rate": 6.095299274625064e-07,
"loss": 0.6542,
"step": 15545
},
{
"epoch": 0.63,
"learning_rate": 6.089366639297166e-07,
"loss": 0.6401,
"step": 15550
},
{
"epoch": 0.63,
"learning_rate": 6.083435628412355e-07,
"loss": 0.6219,
"step": 15555
},
{
"epoch": 0.63,
"learning_rate": 6.077506244434328e-07,
"loss": 0.676,
"step": 15560
},
{
"epoch": 0.63,
"learning_rate": 6.07157848982609e-07,
"loss": 0.6691,
"step": 15565
},
{
"epoch": 0.63,
"learning_rate": 6.065652367049985e-07,
"loss": 0.6419,
"step": 15570
},
{
"epoch": 0.63,
"learning_rate": 6.059727878567675e-07,
"loss": 0.6513,
"step": 15575
},
{
"epoch": 0.63,
"learning_rate": 6.053805026840136e-07,
"loss": 0.6663,
"step": 15580
},
{
"epoch": 0.63,
"learning_rate": 6.047883814327675e-07,
"loss": 0.6643,
"step": 15585
},
{
"epoch": 0.63,
"learning_rate": 6.041964243489901e-07,
"loss": 0.6548,
"step": 15590
},
{
"epoch": 0.63,
"learning_rate": 6.036046316785761e-07,
"loss": 0.6474,
"step": 15595
},
{
"epoch": 0.63,
"learning_rate": 6.030130036673514e-07,
"loss": 0.6343,
"step": 15600
},
{
"epoch": 0.63,
"learning_rate": 6.024215405610722e-07,
"loss": 0.6524,
"step": 15605
},
{
"epoch": 0.63,
"learning_rate": 6.018302426054278e-07,
"loss": 0.6536,
"step": 15610
},
{
"epoch": 0.63,
"learning_rate": 6.012391100460377e-07,
"loss": 0.6917,
"step": 15615
},
{
"epoch": 0.63,
"learning_rate": 6.006481431284533e-07,
"loss": 0.6504,
"step": 15620
},
{
"epoch": 0.63,
"learning_rate": 6.000573420981578e-07,
"loss": 0.6773,
"step": 15625
},
{
"epoch": 0.63,
"learning_rate": 5.994667072005641e-07,
"loss": 0.6295,
"step": 15630
},
{
"epoch": 0.64,
"learning_rate": 5.988762386810173e-07,
"loss": 0.6617,
"step": 15635
},
{
"epoch": 0.64,
"learning_rate": 5.982859367847921e-07,
"loss": 0.642,
"step": 15640
},
{
"epoch": 0.64,
"learning_rate": 5.976958017570954e-07,
"loss": 0.6194,
"step": 15645
},
{
"epoch": 0.64,
"learning_rate": 5.971058338430643e-07,
"loss": 0.6599,
"step": 15650
},
{
"epoch": 0.64,
"learning_rate": 5.965160332877661e-07,
"loss": 0.657,
"step": 15655
},
{
"epoch": 0.64,
"learning_rate": 5.959264003361988e-07,
"loss": 0.6272,
"step": 15660
},
{
"epoch": 0.64,
"learning_rate": 5.953369352332904e-07,
"loss": 0.66,
"step": 15665
},
{
"epoch": 0.64,
"learning_rate": 5.947476382239007e-07,
"loss": 0.6354,
"step": 15670
},
{
"epoch": 0.64,
"learning_rate": 5.94158509552817e-07,
"loss": 0.6648,
"step": 15675
},
{
"epoch": 0.64,
"learning_rate": 5.935695494647595e-07,
"loss": 0.6793,
"step": 15680
},
{
"epoch": 0.64,
"learning_rate": 5.929807582043768e-07,
"loss": 0.637,
"step": 15685
},
{
"epoch": 0.64,
"learning_rate": 5.923921360162471e-07,
"loss": 0.6285,
"step": 15690
},
{
"epoch": 0.64,
"learning_rate": 5.9180368314488e-07,
"loss": 0.6667,
"step": 15695
},
{
"epoch": 0.64,
"learning_rate": 5.912153998347123e-07,
"loss": 0.6879,
"step": 15700
},
{
"epoch": 0.64,
"learning_rate": 5.90627286330113e-07,
"loss": 0.6651,
"step": 15705
},
{
"epoch": 0.64,
"learning_rate": 5.900393428753791e-07,
"loss": 0.6321,
"step": 15710
},
{
"epoch": 0.64,
"learning_rate": 5.894515697147365e-07,
"loss": 0.6323,
"step": 15715
},
{
"epoch": 0.64,
"learning_rate": 5.888639670923419e-07,
"loss": 0.662,
"step": 15720
},
{
"epoch": 0.64,
"learning_rate": 5.882765352522795e-07,
"loss": 0.609,
"step": 15725
},
{
"epoch": 0.64,
"learning_rate": 5.87689274438564e-07,
"loss": 0.6352,
"step": 15730
},
{
"epoch": 0.64,
"learning_rate": 5.871021848951387e-07,
"loss": 0.6479,
"step": 15735
},
{
"epoch": 0.64,
"learning_rate": 5.865152668658745e-07,
"loss": 0.6526,
"step": 15740
},
{
"epoch": 0.64,
"learning_rate": 5.859285205945732e-07,
"loss": 0.6382,
"step": 15745
},
{
"epoch": 0.64,
"learning_rate": 5.85341946324963e-07,
"loss": 0.6363,
"step": 15750
},
{
"epoch": 0.64,
"learning_rate": 5.847555443007022e-07,
"loss": 0.6941,
"step": 15755
},
{
"epoch": 0.64,
"learning_rate": 5.841693147653774e-07,
"loss": 0.6592,
"step": 15760
},
{
"epoch": 0.64,
"learning_rate": 5.835832579625031e-07,
"loss": 0.6243,
"step": 15765
},
{
"epoch": 0.64,
"learning_rate": 5.829973741355221e-07,
"loss": 0.664,
"step": 15770
},
{
"epoch": 0.64,
"learning_rate": 5.824116635278052e-07,
"loss": 0.644,
"step": 15775
},
{
"epoch": 0.64,
"learning_rate": 5.818261263826518e-07,
"loss": 0.6605,
"step": 15780
},
{
"epoch": 0.64,
"learning_rate": 5.812407629432891e-07,
"loss": 0.6545,
"step": 15785
},
{
"epoch": 0.64,
"learning_rate": 5.806555734528713e-07,
"loss": 0.6652,
"step": 15790
},
{
"epoch": 0.64,
"learning_rate": 5.800705581544821e-07,
"loss": 0.6178,
"step": 15795
},
{
"epoch": 0.64,
"learning_rate": 5.794857172911309e-07,
"loss": 0.6393,
"step": 15800
},
{
"epoch": 0.64,
"learning_rate": 5.789010511057555e-07,
"loss": 0.6067,
"step": 15805
},
{
"epoch": 0.64,
"learning_rate": 5.783165598412216e-07,
"loss": 0.6481,
"step": 15810
},
{
"epoch": 0.64,
"learning_rate": 5.777322437403218e-07,
"loss": 0.6759,
"step": 15815
},
{
"epoch": 0.64,
"learning_rate": 5.771481030457757e-07,
"loss": 0.6192,
"step": 15820
},
{
"epoch": 0.64,
"learning_rate": 5.765641380002299e-07,
"loss": 0.6652,
"step": 15825
},
{
"epoch": 0.64,
"learning_rate": 5.759803488462593e-07,
"loss": 0.6694,
"step": 15830
},
{
"epoch": 0.64,
"learning_rate": 5.753967358263642e-07,
"loss": 0.6317,
"step": 15835
},
{
"epoch": 0.64,
"learning_rate": 5.748132991829722e-07,
"loss": 0.6434,
"step": 15840
},
{
"epoch": 0.64,
"learning_rate": 5.742300391584384e-07,
"loss": 0.6846,
"step": 15845
},
{
"epoch": 0.64,
"learning_rate": 5.736469559950437e-07,
"loss": 0.6559,
"step": 15850
},
{
"epoch": 0.64,
"learning_rate": 5.730640499349957e-07,
"loss": 0.6625,
"step": 15855
},
{
"epoch": 0.64,
"learning_rate": 5.724813212204283e-07,
"loss": 0.633,
"step": 15860
},
{
"epoch": 0.64,
"learning_rate": 5.718987700934024e-07,
"loss": 0.6763,
"step": 15865
},
{
"epoch": 0.64,
"learning_rate": 5.713163967959045e-07,
"loss": 0.667,
"step": 15870
},
{
"epoch": 0.64,
"learning_rate": 5.70734201569847e-07,
"loss": 0.6555,
"step": 15875
},
{
"epoch": 0.65,
"learning_rate": 5.701521846570693e-07,
"loss": 0.6228,
"step": 15880
},
{
"epoch": 0.65,
"learning_rate": 5.695703462993362e-07,
"loss": 0.6761,
"step": 15885
},
{
"epoch": 0.65,
"learning_rate": 5.689886867383376e-07,
"loss": 0.6576,
"step": 15890
},
{
"epoch": 0.65,
"learning_rate": 5.684072062156906e-07,
"loss": 0.6484,
"step": 15895
},
{
"epoch": 0.65,
"learning_rate": 5.678259049729368e-07,
"loss": 0.6562,
"step": 15900
},
{
"epoch": 0.65,
"learning_rate": 5.672447832515439e-07,
"loss": 0.667,
"step": 15905
},
{
"epoch": 0.65,
"learning_rate": 5.66663841292904e-07,
"loss": 0.669,
"step": 15910
},
{
"epoch": 0.65,
"learning_rate": 5.660830793383364e-07,
"loss": 0.6429,
"step": 15915
},
{
"epoch": 0.65,
"learning_rate": 5.655024976290842e-07,
"loss": 0.6619,
"step": 15920
},
{
"epoch": 0.65,
"learning_rate": 5.649220964063154e-07,
"loss": 0.662,
"step": 15925
},
{
"epoch": 0.65,
"learning_rate": 5.643418759111247e-07,
"loss": 0.6673,
"step": 15930
},
{
"epoch": 0.65,
"learning_rate": 5.637618363845299e-07,
"loss": 0.6804,
"step": 15935
},
{
"epoch": 0.65,
"learning_rate": 5.631819780674742e-07,
"loss": 0.6447,
"step": 15940
},
{
"epoch": 0.65,
"learning_rate": 5.626023012008265e-07,
"loss": 0.6468,
"step": 15945
},
{
"epoch": 0.65,
"learning_rate": 5.620228060253791e-07,
"loss": 0.6499,
"step": 15950
},
{
"epoch": 0.65,
"learning_rate": 5.614434927818493e-07,
"loss": 0.6622,
"step": 15955
},
{
"epoch": 0.65,
"learning_rate": 5.608643617108781e-07,
"loss": 0.6222,
"step": 15960
},
{
"epoch": 0.65,
"learning_rate": 5.602854130530326e-07,
"loss": 0.6698,
"step": 15965
},
{
"epoch": 0.65,
"learning_rate": 5.597066470488026e-07,
"loss": 0.6493,
"step": 15970
},
{
"epoch": 0.65,
"learning_rate": 5.591280639386021e-07,
"loss": 0.6689,
"step": 15975
},
{
"epoch": 0.65,
"learning_rate": 5.585496639627702e-07,
"loss": 0.6526,
"step": 15980
},
{
"epoch": 0.65,
"learning_rate": 5.579714473615689e-07,
"loss": 0.6491,
"step": 15985
},
{
"epoch": 0.65,
"learning_rate": 5.573934143751841e-07,
"loss": 0.6763,
"step": 15990
},
{
"epoch": 0.65,
"learning_rate": 5.568155652437257e-07,
"loss": 0.6604,
"step": 15995
},
{
"epoch": 0.65,
"learning_rate": 5.562379002072277e-07,
"loss": 0.6458,
"step": 16000
},
{
"epoch": 0.65,
"eval_loss": 0.6262282133102417,
"eval_runtime": 144.7729,
"eval_samples_per_second": 16.343,
"eval_steps_per_second": 2.728,
"step": 16000
},
{
"epoch": 0.65,
"learning_rate": 5.556604195056469e-07,
"loss": 0.6665,
"step": 16005
},
{
"epoch": 0.65,
"learning_rate": 5.550831233788631e-07,
"loss": 0.6382,
"step": 16010
},
{
"epoch": 0.65,
"learning_rate": 5.545060120666811e-07,
"loss": 0.6322,
"step": 16015
},
{
"epoch": 0.65,
"learning_rate": 5.539290858088277e-07,
"loss": 0.6352,
"step": 16020
},
{
"epoch": 0.65,
"learning_rate": 5.533523448449522e-07,
"loss": 0.6883,
"step": 16025
},
{
"epoch": 0.65,
"learning_rate": 5.527757894146288e-07,
"loss": 0.6319,
"step": 16030
},
{
"epoch": 0.65,
"learning_rate": 5.521994197573534e-07,
"loss": 0.6687,
"step": 16035
},
{
"epoch": 0.65,
"learning_rate": 5.516232361125446e-07,
"loss": 0.6326,
"step": 16040
},
{
"epoch": 0.65,
"learning_rate": 5.510472387195439e-07,
"loss": 0.6674,
"step": 16045
},
{
"epoch": 0.65,
"learning_rate": 5.504714278176163e-07,
"loss": 0.6257,
"step": 16050
},
{
"epoch": 0.65,
"learning_rate": 5.498958036459484e-07,
"loss": 0.7107,
"step": 16055
},
{
"epoch": 0.65,
"learning_rate": 5.49320366443649e-07,
"loss": 0.6777,
"step": 16060
},
{
"epoch": 0.65,
"learning_rate": 5.487451164497503e-07,
"loss": 0.6695,
"step": 16065
},
{
"epoch": 0.65,
"learning_rate": 5.481700539032062e-07,
"loss": 0.6443,
"step": 16070
},
{
"epoch": 0.65,
"learning_rate": 5.475951790428918e-07,
"loss": 0.6397,
"step": 16075
},
{
"epoch": 0.65,
"learning_rate": 5.47020492107606e-07,
"loss": 0.6617,
"step": 16080
},
{
"epoch": 0.65,
"learning_rate": 5.464459933360687e-07,
"loss": 0.6453,
"step": 16085
},
{
"epoch": 0.65,
"learning_rate": 5.458716829669215e-07,
"loss": 0.6691,
"step": 16090
},
{
"epoch": 0.65,
"learning_rate": 5.452975612387274e-07,
"loss": 0.6762,
"step": 16095
},
{
"epoch": 0.65,
"learning_rate": 5.447236283899723e-07,
"loss": 0.6542,
"step": 16100
},
{
"epoch": 0.65,
"learning_rate": 5.441498846590629e-07,
"loss": 0.6588,
"step": 16105
},
{
"epoch": 0.65,
"learning_rate": 5.435763302843268e-07,
"loss": 0.666,
"step": 16110
},
{
"epoch": 0.65,
"learning_rate": 5.43002965504014e-07,
"loss": 0.6717,
"step": 16115
},
{
"epoch": 0.65,
"learning_rate": 5.424297905562953e-07,
"loss": 0.6642,
"step": 16120
},
{
"epoch": 0.65,
"learning_rate": 5.418568056792624e-07,
"loss": 0.6994,
"step": 16125
},
{
"epoch": 0.66,
"learning_rate": 5.412840111109277e-07,
"loss": 0.6606,
"step": 16130
},
{
"epoch": 0.66,
"learning_rate": 5.40711407089226e-07,
"loss": 0.6669,
"step": 16135
},
{
"epoch": 0.66,
"learning_rate": 5.401389938520118e-07,
"loss": 0.6357,
"step": 16140
},
{
"epoch": 0.66,
"learning_rate": 5.395667716370598e-07,
"loss": 0.6473,
"step": 16145
},
{
"epoch": 0.66,
"learning_rate": 5.389947406820672e-07,
"loss": 0.6316,
"step": 16150
},
{
"epoch": 0.66,
"learning_rate": 5.384229012246501e-07,
"loss": 0.6444,
"step": 16155
},
{
"epoch": 0.66,
"learning_rate": 5.378512535023457e-07,
"loss": 0.6439,
"step": 16160
},
{
"epoch": 0.66,
"learning_rate": 5.372797977526115e-07,
"loss": 0.6778,
"step": 16165
},
{
"epoch": 0.66,
"learning_rate": 5.367085342128257e-07,
"loss": 0.6545,
"step": 16170
},
{
"epoch": 0.66,
"learning_rate": 5.361374631202857e-07,
"loss": 0.6666,
"step": 16175
},
{
"epoch": 0.66,
"learning_rate": 5.35566584712209e-07,
"loss": 0.657,
"step": 16180
},
{
"epoch": 0.66,
"learning_rate": 5.349958992257346e-07,
"loss": 0.6504,
"step": 16185
},
{
"epoch": 0.66,
"learning_rate": 5.344254068979199e-07,
"loss": 0.6331,
"step": 16190
},
{
"epoch": 0.66,
"learning_rate": 5.338551079657418e-07,
"loss": 0.6388,
"step": 16195
},
{
"epoch": 0.66,
"learning_rate": 5.332850026660985e-07,
"loss": 0.6554,
"step": 16200
},
{
"epoch": 0.66,
"learning_rate": 5.327150912358065e-07,
"loss": 0.6618,
"step": 16205
},
{
"epoch": 0.66,
"learning_rate": 5.321453739116013e-07,
"loss": 0.6708,
"step": 16210
},
{
"epoch": 0.66,
"learning_rate": 5.315758509301396e-07,
"loss": 0.6671,
"step": 16215
},
{
"epoch": 0.66,
"learning_rate": 5.310065225279957e-07,
"loss": 0.6527,
"step": 16220
},
{
"epoch": 0.66,
"learning_rate": 5.304373889416638e-07,
"loss": 0.7327,
"step": 16225
},
{
"epoch": 0.66,
"learning_rate": 5.298684504075567e-07,
"loss": 0.6462,
"step": 16230
},
{
"epoch": 0.66,
"learning_rate": 5.292997071620072e-07,
"loss": 0.6424,
"step": 16235
},
{
"epoch": 0.66,
"learning_rate": 5.28731159441266e-07,
"loss": 0.6477,
"step": 16240
},
{
"epoch": 0.66,
"learning_rate": 5.281628074815023e-07,
"loss": 0.6995,
"step": 16245
},
{
"epoch": 0.66,
"learning_rate": 5.275946515188055e-07,
"loss": 0.6511,
"step": 16250
},
{
"epoch": 0.66,
"learning_rate": 5.270266917891822e-07,
"loss": 0.6662,
"step": 16255
},
{
"epoch": 0.66,
"learning_rate": 5.264589285285581e-07,
"loss": 0.6326,
"step": 16260
},
{
"epoch": 0.66,
"learning_rate": 5.258913619727765e-07,
"loss": 0.6911,
"step": 16265
},
{
"epoch": 0.66,
"learning_rate": 5.253239923576003e-07,
"loss": 0.6766,
"step": 16270
},
{
"epoch": 0.66,
"learning_rate": 5.247568199187099e-07,
"loss": 0.665,
"step": 16275
},
{
"epoch": 0.66,
"learning_rate": 5.241898448917032e-07,
"loss": 0.6645,
"step": 16280
},
{
"epoch": 0.66,
"learning_rate": 5.236230675120974e-07,
"loss": 0.6634,
"step": 16285
},
{
"epoch": 0.66,
"learning_rate": 5.230564880153266e-07,
"loss": 0.6747,
"step": 16290
},
{
"epoch": 0.66,
"learning_rate": 5.224901066367426e-07,
"loss": 0.689,
"step": 16295
},
{
"epoch": 0.66,
"learning_rate": 5.21923923611616e-07,
"loss": 0.6862,
"step": 16300
},
{
"epoch": 0.66,
"learning_rate": 5.21357939175134e-07,
"loss": 0.6523,
"step": 16305
},
{
"epoch": 0.66,
"learning_rate": 5.207921535624017e-07,
"loss": 0.6355,
"step": 16310
},
{
"epoch": 0.66,
"learning_rate": 5.202265670084409e-07,
"loss": 0.6314,
"step": 16315
},
{
"epoch": 0.66,
"learning_rate": 5.196611797481922e-07,
"loss": 0.6796,
"step": 16320
},
{
"epoch": 0.66,
"learning_rate": 5.190959920165124e-07,
"loss": 0.7183,
"step": 16325
},
{
"epoch": 0.66,
"learning_rate": 5.185310040481749e-07,
"loss": 0.6641,
"step": 16330
},
{
"epoch": 0.66,
"learning_rate": 5.179662160778716e-07,
"loss": 0.659,
"step": 16335
},
{
"epoch": 0.66,
"learning_rate": 5.174016283402101e-07,
"loss": 0.6464,
"step": 16340
},
{
"epoch": 0.66,
"learning_rate": 5.16837241069715e-07,
"loss": 0.6658,
"step": 16345
},
{
"epoch": 0.66,
"learning_rate": 5.162730545008287e-07,
"loss": 0.6686,
"step": 16350
},
{
"epoch": 0.66,
"learning_rate": 5.157090688679087e-07,
"loss": 0.6493,
"step": 16355
},
{
"epoch": 0.66,
"learning_rate": 5.1514528440523e-07,
"loss": 0.6698,
"step": 16360
},
{
"epoch": 0.66,
"learning_rate": 5.14581701346983e-07,
"loss": 0.6519,
"step": 16365
},
{
"epoch": 0.66,
"learning_rate": 5.140183199272765e-07,
"loss": 0.6418,
"step": 16370
},
{
"epoch": 0.67,
"learning_rate": 5.134551403801336e-07,
"loss": 0.6559,
"step": 16375
},
{
"epoch": 0.67,
"learning_rate": 5.128921629394937e-07,
"loss": 0.6312,
"step": 16380
},
{
"epoch": 0.67,
"learning_rate": 5.123293878392136e-07,
"loss": 0.6356,
"step": 16385
},
{
"epoch": 0.67,
"learning_rate": 5.11766815313065e-07,
"loss": 0.6799,
"step": 16390
},
{
"epoch": 0.67,
"learning_rate": 5.112044455947352e-07,
"loss": 0.6431,
"step": 16395
},
{
"epoch": 0.67,
"learning_rate": 5.106422789178275e-07,
"loss": 0.6758,
"step": 16400
},
{
"epoch": 0.67,
"learning_rate": 5.100803155158621e-07,
"loss": 0.6414,
"step": 16405
},
{
"epoch": 0.67,
"learning_rate": 5.09518555622273e-07,
"loss": 0.679,
"step": 16410
},
{
"epoch": 0.67,
"learning_rate": 5.089569994704099e-07,
"loss": 0.6796,
"step": 16415
},
{
"epoch": 0.67,
"learning_rate": 5.083956472935395e-07,
"loss": 0.6583,
"step": 16420
},
{
"epoch": 0.67,
"learning_rate": 5.07834499324842e-07,
"loss": 0.6481,
"step": 16425
},
{
"epoch": 0.67,
"learning_rate": 5.072735557974128e-07,
"loss": 0.6472,
"step": 16430
},
{
"epoch": 0.67,
"learning_rate": 5.067128169442642e-07,
"loss": 0.6412,
"step": 16435
},
{
"epoch": 0.67,
"learning_rate": 5.061522829983215e-07,
"loss": 0.6273,
"step": 16440
},
{
"epoch": 0.67,
"learning_rate": 5.055919541924258e-07,
"loss": 0.6899,
"step": 16445
},
{
"epoch": 0.67,
"learning_rate": 5.050318307593322e-07,
"loss": 0.6384,
"step": 16450
},
{
"epoch": 0.67,
"learning_rate": 5.044719129317121e-07,
"loss": 0.6764,
"step": 16455
},
{
"epoch": 0.67,
"learning_rate": 5.039122009421501e-07,
"loss": 0.635,
"step": 16460
},
{
"epoch": 0.67,
"learning_rate": 5.033526950231452e-07,
"loss": 0.6379,
"step": 16465
},
{
"epoch": 0.67,
"learning_rate": 5.027933954071121e-07,
"loss": 0.656,
"step": 16470
},
{
"epoch": 0.67,
"learning_rate": 5.022343023263789e-07,
"loss": 0.667,
"step": 16475
},
{
"epoch": 0.67,
"learning_rate": 5.016754160131871e-07,
"loss": 0.6475,
"step": 16480
},
{
"epoch": 0.67,
"learning_rate": 5.011167366996942e-07,
"loss": 0.6578,
"step": 16485
},
{
"epoch": 0.67,
"learning_rate": 5.005582646179705e-07,
"loss": 0.642,
"step": 16490
},
{
"epoch": 0.67,
"learning_rate": 5.000000000000002e-07,
"loss": 0.655,
"step": 16495
},
{
"epoch": 0.67,
"learning_rate": 4.994419430776813e-07,
"loss": 0.6781,
"step": 16500
},
{
"epoch": 0.67,
"learning_rate": 4.988840940828266e-07,
"loss": 0.6668,
"step": 16505
},
{
"epoch": 0.67,
"learning_rate": 4.983264532471613e-07,
"loss": 0.6634,
"step": 16510
},
{
"epoch": 0.67,
"learning_rate": 4.977690208023243e-07,
"loss": 0.6537,
"step": 16515
},
{
"epoch": 0.67,
"learning_rate": 4.972117969798687e-07,
"loss": 0.6613,
"step": 16520
},
{
"epoch": 0.67,
"learning_rate": 4.966547820112602e-07,
"loss": 0.618,
"step": 16525
},
{
"epoch": 0.67,
"learning_rate": 4.96097976127878e-07,
"loss": 0.6319,
"step": 16530
},
{
"epoch": 0.67,
"learning_rate": 4.955413795610139e-07,
"loss": 0.63,
"step": 16535
},
{
"epoch": 0.67,
"learning_rate": 4.949849925418741e-07,
"loss": 0.6434,
"step": 16540
},
{
"epoch": 0.67,
"learning_rate": 4.944288153015767e-07,
"loss": 0.6651,
"step": 16545
},
{
"epoch": 0.67,
"learning_rate": 4.938728480711523e-07,
"loss": 0.641,
"step": 16550
},
{
"epoch": 0.67,
"learning_rate": 4.933170910815456e-07,
"loss": 0.6605,
"step": 16555
},
{
"epoch": 0.67,
"learning_rate": 4.927615445636131e-07,
"loss": 0.658,
"step": 16560
},
{
"epoch": 0.67,
"learning_rate": 4.922062087481233e-07,
"loss": 0.6616,
"step": 16565
},
{
"epoch": 0.67,
"learning_rate": 4.916510838657589e-07,
"loss": 0.6089,
"step": 16570
},
{
"epoch": 0.67,
"learning_rate": 4.910961701471135e-07,
"loss": 0.6732,
"step": 16575
},
{
"epoch": 0.67,
"learning_rate": 4.905414678226932e-07,
"loss": 0.6555,
"step": 16580
},
{
"epoch": 0.67,
"learning_rate": 4.899869771229164e-07,
"loss": 0.6712,
"step": 16585
},
{
"epoch": 0.67,
"learning_rate": 4.894326982781144e-07,
"loss": 0.6536,
"step": 16590
},
{
"epoch": 0.67,
"learning_rate": 4.888786315185295e-07,
"loss": 0.6191,
"step": 16595
},
{
"epoch": 0.67,
"learning_rate": 4.883247770743156e-07,
"loss": 0.6292,
"step": 16600
},
{
"epoch": 0.67,
"learning_rate": 4.8777113517554e-07,
"loss": 0.6654,
"step": 16605
},
{
"epoch": 0.67,
"learning_rate": 4.872177060521803e-07,
"loss": 0.6175,
"step": 16610
},
{
"epoch": 0.67,
"learning_rate": 4.866644899341258e-07,
"loss": 0.6802,
"step": 16615
},
{
"epoch": 0.68,
"learning_rate": 4.861114870511783e-07,
"loss": 0.6134,
"step": 16620
},
{
"epoch": 0.68,
"learning_rate": 4.855586976330501e-07,
"loss": 0.6653,
"step": 16625
},
{
"epoch": 0.68,
"learning_rate": 4.850061219093651e-07,
"loss": 0.691,
"step": 16630
},
{
"epoch": 0.68,
"learning_rate": 4.844537601096582e-07,
"loss": 0.6575,
"step": 16635
},
{
"epoch": 0.68,
"learning_rate": 4.839016124633762e-07,
"loss": 0.6805,
"step": 16640
},
{
"epoch": 0.68,
"learning_rate": 4.833496791998762e-07,
"loss": 0.6172,
"step": 16645
},
{
"epoch": 0.68,
"learning_rate": 4.827979605484261e-07,
"loss": 0.647,
"step": 16650
},
{
"epoch": 0.68,
"learning_rate": 4.822464567382056e-07,
"loss": 0.6365,
"step": 16655
},
{
"epoch": 0.68,
"learning_rate": 4.816951679983046e-07,
"loss": 0.6272,
"step": 16660
},
{
"epoch": 0.68,
"learning_rate": 4.811440945577232e-07,
"loss": 0.6359,
"step": 16665
},
{
"epoch": 0.68,
"learning_rate": 4.805932366453725e-07,
"loss": 0.6765,
"step": 16670
},
{
"epoch": 0.68,
"learning_rate": 4.800425944900747e-07,
"loss": 0.6651,
"step": 16675
},
{
"epoch": 0.68,
"learning_rate": 4.794921683205613e-07,
"loss": 0.644,
"step": 16680
},
{
"epoch": 0.68,
"learning_rate": 4.789419583654742e-07,
"loss": 0.6639,
"step": 16685
},
{
"epoch": 0.68,
"learning_rate": 4.783919648533668e-07,
"loss": 0.6714,
"step": 16690
},
{
"epoch": 0.68,
"learning_rate": 4.778421880127009e-07,
"loss": 0.6429,
"step": 16695
},
{
"epoch": 0.68,
"learning_rate": 4.772926280718488e-07,
"loss": 0.6389,
"step": 16700
},
{
"epoch": 0.68,
"learning_rate": 4.767432852590939e-07,
"loss": 0.6473,
"step": 16705
},
{
"epoch": 0.68,
"learning_rate": 4.7619415980262765e-07,
"loss": 0.6766,
"step": 16710
},
{
"epoch": 0.68,
"learning_rate": 4.756452519305523e-07,
"loss": 0.6198,
"step": 16715
},
{
"epoch": 0.68,
"learning_rate": 4.7509656187087886e-07,
"loss": 0.6646,
"step": 16720
},
{
"epoch": 0.68,
"learning_rate": 4.745480898515293e-07,
"loss": 0.6313,
"step": 16725
},
{
"epoch": 0.68,
"learning_rate": 4.7399983610033346e-07,
"loss": 0.6982,
"step": 16730
},
{
"epoch": 0.68,
"learning_rate": 4.734518008450311e-07,
"loss": 0.706,
"step": 16735
},
{
"epoch": 0.68,
"learning_rate": 4.7290398431327207e-07,
"loss": 0.6684,
"step": 16740
},
{
"epoch": 0.68,
"learning_rate": 4.7235638673261404e-07,
"loss": 0.6462,
"step": 16745
},
{
"epoch": 0.68,
"learning_rate": 4.7180900833052394e-07,
"loss": 0.6975,
"step": 16750
},
{
"epoch": 0.68,
"learning_rate": 4.7126184933437873e-07,
"loss": 0.6282,
"step": 16755
},
{
"epoch": 0.68,
"learning_rate": 4.7071490997146323e-07,
"loss": 0.6415,
"step": 16760
},
{
"epoch": 0.68,
"learning_rate": 4.7016819046897126e-07,
"loss": 0.6512,
"step": 16765
},
{
"epoch": 0.68,
"learning_rate": 4.6962169105400495e-07,
"loss": 0.6839,
"step": 16770
},
{
"epoch": 0.68,
"learning_rate": 4.6907541195357613e-07,
"loss": 0.6516,
"step": 16775
},
{
"epoch": 0.68,
"learning_rate": 4.6852935339460407e-07,
"loss": 0.6369,
"step": 16780
},
{
"epoch": 0.68,
"learning_rate": 4.6798351560391636e-07,
"loss": 0.6695,
"step": 16785
},
{
"epoch": 0.68,
"learning_rate": 4.674378988082499e-07,
"loss": 0.6873,
"step": 16790
},
{
"epoch": 0.68,
"learning_rate": 4.66892503234249e-07,
"loss": 0.6493,
"step": 16795
},
{
"epoch": 0.68,
"learning_rate": 4.6634732910846563e-07,
"loss": 0.6268,
"step": 16800
},
{
"epoch": 0.68,
"eval_loss": 0.6238049268722534,
"eval_runtime": 144.7581,
"eval_samples_per_second": 16.345,
"eval_steps_per_second": 2.729,
"step": 16800
},
{
"epoch": 0.68,
"learning_rate": 4.6580237665736135e-07,
"loss": 0.6724,
"step": 16805
},
{
"epoch": 0.68,
"learning_rate": 4.652576461073042e-07,
"loss": 0.6617,
"step": 16810
},
{
"epoch": 0.68,
"learning_rate": 4.6471313768457053e-07,
"loss": 0.6567,
"step": 16815
},
{
"epoch": 0.68,
"learning_rate": 4.6416885161534394e-07,
"loss": 0.6866,
"step": 16820
},
{
"epoch": 0.68,
"learning_rate": 4.636247881257174e-07,
"loss": 0.661,
"step": 16825
},
{
"epoch": 0.68,
"learning_rate": 4.630809474416885e-07,
"loss": 0.6815,
"step": 16830
},
{
"epoch": 0.68,
"learning_rate": 4.625373297891647e-07,
"loss": 0.5876,
"step": 16835
},
{
"epoch": 0.68,
"learning_rate": 4.619939353939606e-07,
"loss": 0.7043,
"step": 16840
},
{
"epoch": 0.68,
"learning_rate": 4.6145076448179696e-07,
"loss": 0.6551,
"step": 16845
},
{
"epoch": 0.68,
"learning_rate": 4.609078172783023e-07,
"loss": 0.658,
"step": 16850
},
{
"epoch": 0.68,
"learning_rate": 4.603650940090118e-07,
"loss": 0.6219,
"step": 16855
},
{
"epoch": 0.68,
"learning_rate": 4.598225948993687e-07,
"loss": 0.6702,
"step": 16860
},
{
"epoch": 0.69,
"learning_rate": 4.5928032017472216e-07,
"loss": 0.636,
"step": 16865
},
{
"epoch": 0.69,
"learning_rate": 4.587382700603278e-07,
"loss": 0.6706,
"step": 16870
},
{
"epoch": 0.69,
"learning_rate": 4.581964447813499e-07,
"loss": 0.6071,
"step": 16875
},
{
"epoch": 0.69,
"learning_rate": 4.5765484456285617e-07,
"loss": 0.6504,
"step": 16880
},
{
"epoch": 0.69,
"learning_rate": 4.5711346962982366e-07,
"loss": 0.6407,
"step": 16885
},
{
"epoch": 0.69,
"learning_rate": 4.56572320207135e-07,
"loss": 0.6647,
"step": 16890
},
{
"epoch": 0.69,
"learning_rate": 4.5603139651957866e-07,
"loss": 0.6111,
"step": 16895
},
{
"epoch": 0.69,
"learning_rate": 4.5549069879184965e-07,
"loss": 0.6486,
"step": 16900
},
{
"epoch": 0.69,
"learning_rate": 4.549502272485487e-07,
"loss": 0.638,
"step": 16905
},
{
"epoch": 0.69,
"learning_rate": 4.544099821141838e-07,
"loss": 0.6168,
"step": 16910
},
{
"epoch": 0.69,
"learning_rate": 4.538699636131675e-07,
"loss": 0.6655,
"step": 16915
},
{
"epoch": 0.69,
"learning_rate": 4.533301719698187e-07,
"loss": 0.6584,
"step": 16920
},
{
"epoch": 0.69,
"learning_rate": 4.5279060740836305e-07,
"loss": 0.6485,
"step": 16925
},
{
"epoch": 0.69,
"learning_rate": 4.522512701529296e-07,
"loss": 0.6726,
"step": 16930
},
{
"epoch": 0.69,
"learning_rate": 4.517121604275551e-07,
"loss": 0.6818,
"step": 16935
},
{
"epoch": 0.69,
"learning_rate": 4.5117327845618136e-07,
"loss": 0.6737,
"step": 16940
},
{
"epoch": 0.69,
"learning_rate": 4.50634624462655e-07,
"loss": 0.6375,
"step": 16945
},
{
"epoch": 0.69,
"learning_rate": 4.5009619867072803e-07,
"loss": 0.6622,
"step": 16950
},
{
"epoch": 0.69,
"learning_rate": 4.4955800130405763e-07,
"loss": 0.6516,
"step": 16955
},
{
"epoch": 0.69,
"learning_rate": 4.4902003258620725e-07,
"loss": 0.6557,
"step": 16960
},
{
"epoch": 0.69,
"learning_rate": 4.484822927406431e-07,
"loss": 0.6293,
"step": 16965
},
{
"epoch": 0.69,
"learning_rate": 4.479447819907383e-07,
"loss": 0.6313,
"step": 16970
},
{
"epoch": 0.69,
"learning_rate": 4.474075005597705e-07,
"loss": 0.6323,
"step": 16975
},
{
"epoch": 0.69,
"learning_rate": 4.4687044867092147e-07,
"loss": 0.6483,
"step": 16980
},
{
"epoch": 0.69,
"learning_rate": 4.463336265472778e-07,
"loss": 0.6485,
"step": 16985
},
{
"epoch": 0.69,
"learning_rate": 4.4579703441183025e-07,
"loss": 0.6649,
"step": 16990
},
{
"epoch": 0.69,
"learning_rate": 4.4526067248747557e-07,
"loss": 0.6729,
"step": 16995
},
{
"epoch": 0.69,
"learning_rate": 4.447245409970132e-07,
"loss": 0.6805,
"step": 17000
},
{
"epoch": 0.69,
"learning_rate": 4.441886401631472e-07,
"loss": 0.6739,
"step": 17005
},
{
"epoch": 0.69,
"learning_rate": 4.4365297020848727e-07,
"loss": 0.6435,
"step": 17010
},
{
"epoch": 0.69,
"learning_rate": 4.431175313555445e-07,
"loss": 0.6506,
"step": 17015
},
{
"epoch": 0.69,
"learning_rate": 4.4258232382673634e-07,
"loss": 0.6641,
"step": 17020
},
{
"epoch": 0.69,
"learning_rate": 4.4204734784438367e-07,
"loss": 0.6448,
"step": 17025
},
{
"epoch": 0.69,
"learning_rate": 4.4151260363071064e-07,
"loss": 0.6651,
"step": 17030
},
{
"epoch": 0.69,
"learning_rate": 4.40978091407845e-07,
"loss": 0.6762,
"step": 17035
},
{
"epoch": 0.69,
"learning_rate": 4.404438113978185e-07,
"loss": 0.6604,
"step": 17040
},
{
"epoch": 0.69,
"learning_rate": 4.3990976382256694e-07,
"loss": 0.6538,
"step": 17045
},
{
"epoch": 0.69,
"learning_rate": 4.393759489039287e-07,
"loss": 0.631,
"step": 17050
},
{
"epoch": 0.69,
"learning_rate": 4.388423668636455e-07,
"loss": 0.6838,
"step": 17055
},
{
"epoch": 0.69,
"learning_rate": 4.3830901792336374e-07,
"loss": 0.6582,
"step": 17060
},
{
"epoch": 0.69,
"learning_rate": 4.377759023046306e-07,
"loss": 0.6154,
"step": 17065
},
{
"epoch": 0.69,
"learning_rate": 4.372430202288981e-07,
"loss": 0.646,
"step": 17070
},
{
"epoch": 0.69,
"learning_rate": 4.3671037191752157e-07,
"loss": 0.6777,
"step": 17075
},
{
"epoch": 0.69,
"learning_rate": 4.361779575917579e-07,
"loss": 0.6359,
"step": 17080
},
{
"epoch": 0.69,
"learning_rate": 4.356457774727674e-07,
"loss": 0.6211,
"step": 17085
},
{
"epoch": 0.69,
"learning_rate": 4.3511383178161263e-07,
"loss": 0.6075,
"step": 17090
},
{
"epoch": 0.69,
"learning_rate": 4.345821207392605e-07,
"loss": 0.6124,
"step": 17095
},
{
"epoch": 0.69,
"learning_rate": 4.340506445665775e-07,
"loss": 0.6473,
"step": 17100
},
{
"epoch": 0.69,
"learning_rate": 4.33519403484335e-07,
"loss": 0.6244,
"step": 17105
},
{
"epoch": 0.69,
"learning_rate": 4.3298839771320674e-07,
"loss": 0.6249,
"step": 17110
},
{
"epoch": 0.7,
"learning_rate": 4.3245762747376635e-07,
"loss": 0.6282,
"step": 17115
},
{
"epoch": 0.7,
"learning_rate": 4.3192709298649223e-07,
"loss": 0.6779,
"step": 17120
},
{
"epoch": 0.7,
"learning_rate": 4.3139679447176313e-07,
"loss": 0.6534,
"step": 17125
},
{
"epoch": 0.7,
"learning_rate": 4.3086673214986114e-07,
"loss": 0.6536,
"step": 17130
},
{
"epoch": 0.7,
"learning_rate": 4.303369062409693e-07,
"loss": 0.6452,
"step": 17135
},
{
"epoch": 0.7,
"learning_rate": 4.2980731696517203e-07,
"loss": 0.6267,
"step": 17140
},
{
"epoch": 0.7,
"learning_rate": 4.292779645424576e-07,
"loss": 0.633,
"step": 17145
},
{
"epoch": 0.7,
"learning_rate": 4.2874884919271257e-07,
"loss": 0.6706,
"step": 17150
},
{
"epoch": 0.7,
"learning_rate": 4.282199711357277e-07,
"loss": 0.6658,
"step": 17155
},
{
"epoch": 0.7,
"learning_rate": 4.276913305911952e-07,
"loss": 0.6152,
"step": 17160
},
{
"epoch": 0.7,
"learning_rate": 4.2716292777870624e-07,
"loss": 0.6495,
"step": 17165
},
{
"epoch": 0.7,
"learning_rate": 4.266347629177558e-07,
"loss": 0.6657,
"step": 17170
},
{
"epoch": 0.7,
"learning_rate": 4.2610683622773815e-07,
"loss": 0.6627,
"step": 17175
},
{
"epoch": 0.7,
"learning_rate": 4.255791479279504e-07,
"loss": 0.6713,
"step": 17180
},
{
"epoch": 0.7,
"learning_rate": 4.250516982375891e-07,
"loss": 0.6723,
"step": 17185
},
{
"epoch": 0.7,
"learning_rate": 4.245244873757521e-07,
"loss": 0.6523,
"step": 17190
},
{
"epoch": 0.7,
"learning_rate": 4.2399751556143903e-07,
"loss": 0.6366,
"step": 17195
},
{
"epoch": 0.7,
"learning_rate": 4.2347078301354824e-07,
"loss": 0.6277,
"step": 17200
},
{
"epoch": 0.7,
"learning_rate": 4.229442899508804e-07,
"loss": 0.6854,
"step": 17205
},
{
"epoch": 0.7,
"learning_rate": 4.224180365921366e-07,
"loss": 0.648,
"step": 17210
},
{
"epoch": 0.7,
"learning_rate": 4.2189202315591744e-07,
"loss": 0.6133,
"step": 17215
},
{
"epoch": 0.7,
"learning_rate": 4.2136624986072435e-07,
"loss": 0.6783,
"step": 17220
},
{
"epoch": 0.7,
"learning_rate": 4.208407169249587e-07,
"loss": 0.6617,
"step": 17225
},
{
"epoch": 0.7,
"learning_rate": 4.2031542456692303e-07,
"loss": 0.639,
"step": 17230
},
{
"epoch": 0.7,
"learning_rate": 4.197903730048188e-07,
"loss": 0.6371,
"step": 17235
},
{
"epoch": 0.7,
"learning_rate": 4.192655624567475e-07,
"loss": 0.636,
"step": 17240
},
{
"epoch": 0.7,
"learning_rate": 4.18740993140712e-07,
"loss": 0.6729,
"step": 17245
},
{
"epoch": 0.7,
"learning_rate": 4.182166652746123e-07,
"loss": 0.6432,
"step": 17250
},
{
"epoch": 0.7,
"learning_rate": 4.1769257907625077e-07,
"loss": 0.6669,
"step": 17255
},
{
"epoch": 0.7,
"learning_rate": 4.171687347633276e-07,
"loss": 0.6517,
"step": 17260
},
{
"epoch": 0.7,
"learning_rate": 4.166451325534437e-07,
"loss": 0.6368,
"step": 17265
},
{
"epoch": 0.7,
"learning_rate": 4.161217726640988e-07,
"loss": 0.6766,
"step": 17270
},
{
"epoch": 0.7,
"learning_rate": 4.1559865531269135e-07,
"loss": 0.6816,
"step": 17275
},
{
"epoch": 0.7,
"learning_rate": 4.150757807165212e-07,
"loss": 0.6367,
"step": 17280
},
{
"epoch": 0.7,
"learning_rate": 4.1455314909278405e-07,
"loss": 0.6598,
"step": 17285
},
{
"epoch": 0.7,
"learning_rate": 4.1403076065857755e-07,
"loss": 0.6619,
"step": 17290
},
{
"epoch": 0.7,
"learning_rate": 4.13508615630898e-07,
"loss": 0.656,
"step": 17295
},
{
"epoch": 0.7,
"learning_rate": 4.129867142266384e-07,
"loss": 0.6077,
"step": 17300
},
{
"epoch": 0.7,
"learning_rate": 4.124650566625931e-07,
"loss": 0.6615,
"step": 17305
},
{
"epoch": 0.7,
"learning_rate": 4.119436431554536e-07,
"loss": 0.6512,
"step": 17310
},
{
"epoch": 0.7,
"learning_rate": 4.1142247392181096e-07,
"loss": 0.6808,
"step": 17315
},
{
"epoch": 0.7,
"learning_rate": 4.1090154917815423e-07,
"loss": 0.667,
"step": 17320
},
{
"epoch": 0.7,
"learning_rate": 4.1038086914087045e-07,
"loss": 0.655,
"step": 17325
},
{
"epoch": 0.7,
"learning_rate": 4.0986043402624694e-07,
"loss": 0.645,
"step": 17330
},
{
"epoch": 0.7,
"learning_rate": 4.093402440504662e-07,
"loss": 0.6364,
"step": 17335
},
{
"epoch": 0.7,
"learning_rate": 4.0882029942961136e-07,
"loss": 0.6864,
"step": 17340
},
{
"epoch": 0.7,
"learning_rate": 4.0830060037966386e-07,
"loss": 0.6425,
"step": 17345
},
{
"epoch": 0.7,
"learning_rate": 4.077811471165005e-07,
"loss": 0.6822,
"step": 17350
},
{
"epoch": 0.7,
"learning_rate": 4.0726193985589884e-07,
"loss": 0.6606,
"step": 17355
},
{
"epoch": 0.71,
"learning_rate": 4.0674297881353227e-07,
"loss": 0.6377,
"step": 17360
},
{
"epoch": 0.71,
"learning_rate": 4.0622426420497345e-07,
"loss": 0.7072,
"step": 17365
},
{
"epoch": 0.71,
"learning_rate": 4.057057962456916e-07,
"loss": 0.6939,
"step": 17370
},
{
"epoch": 0.71,
"learning_rate": 4.0518757515105327e-07,
"loss": 0.6147,
"step": 17375
},
{
"epoch": 0.71,
"learning_rate": 4.046696011363243e-07,
"loss": 0.6398,
"step": 17380
},
{
"epoch": 0.71,
"learning_rate": 4.041518744166651e-07,
"loss": 0.6739,
"step": 17385
},
{
"epoch": 0.71,
"learning_rate": 4.0363439520713585e-07,
"loss": 0.6546,
"step": 17390
},
{
"epoch": 0.71,
"learning_rate": 4.0311716372269243e-07,
"loss": 0.6565,
"step": 17395
},
{
"epoch": 0.71,
"learning_rate": 4.026001801781883e-07,
"loss": 0.5946,
"step": 17400
},
{
"epoch": 0.71,
"learning_rate": 4.020834447883743e-07,
"loss": 0.62,
"step": 17405
},
{
"epoch": 0.71,
"learning_rate": 4.015669577678973e-07,
"loss": 0.689,
"step": 17410
},
{
"epoch": 0.71,
"learning_rate": 4.010507193313025e-07,
"loss": 0.6858,
"step": 17415
},
{
"epoch": 0.71,
"learning_rate": 4.005347296930295e-07,
"loss": 0.6892,
"step": 17420
},
{
"epoch": 0.71,
"learning_rate": 4.000189890674166e-07,
"loss": 0.6309,
"step": 17425
},
{
"epoch": 0.71,
"learning_rate": 3.995034976686986e-07,
"loss": 0.6863,
"step": 17430
},
{
"epoch": 0.71,
"learning_rate": 3.9898825571100483e-07,
"loss": 0.621,
"step": 17435
},
{
"epoch": 0.71,
"learning_rate": 3.984732634083633e-07,
"loss": 0.6637,
"step": 17440
},
{
"epoch": 0.71,
"learning_rate": 3.9795852097469704e-07,
"loss": 0.6236,
"step": 17445
},
{
"epoch": 0.71,
"learning_rate": 3.974440286238252e-07,
"loss": 0.698,
"step": 17450
},
{
"epoch": 0.71,
"learning_rate": 3.969297865694641e-07,
"loss": 0.6756,
"step": 17455
},
{
"epoch": 0.71,
"learning_rate": 3.9641579502522484e-07,
"loss": 0.6504,
"step": 17460
},
{
"epoch": 0.71,
"learning_rate": 3.95902054204616e-07,
"loss": 0.6308,
"step": 17465
},
{
"epoch": 0.71,
"learning_rate": 3.953885643210395e-07,
"loss": 0.6394,
"step": 17470
},
{
"epoch": 0.71,
"learning_rate": 3.948753255877956e-07,
"loss": 0.635,
"step": 17475
},
{
"epoch": 0.71,
"learning_rate": 3.9436233821807965e-07,
"loss": 0.6351,
"step": 17480
},
{
"epoch": 0.71,
"learning_rate": 3.9384960242498076e-07,
"loss": 0.6077,
"step": 17485
},
{
"epoch": 0.71,
"learning_rate": 3.933371184214862e-07,
"loss": 0.5905,
"step": 17490
},
{
"epoch": 0.71,
"learning_rate": 3.928248864204764e-07,
"loss": 0.6507,
"step": 17495
},
{
"epoch": 0.71,
"learning_rate": 3.923129066347288e-07,
"loss": 0.639,
"step": 17500
},
{
"epoch": 0.71,
"learning_rate": 3.9180117927691523e-07,
"loss": 0.6503,
"step": 17505
},
{
"epoch": 0.71,
"learning_rate": 3.912897045596022e-07,
"loss": 0.651,
"step": 17510
},
{
"epoch": 0.71,
"learning_rate": 3.90778482695253e-07,
"loss": 0.6379,
"step": 17515
},
{
"epoch": 0.71,
"learning_rate": 3.9026751389622337e-07,
"loss": 0.6312,
"step": 17520
},
{
"epoch": 0.71,
"learning_rate": 3.897567983747663e-07,
"loss": 0.6215,
"step": 17525
},
{
"epoch": 0.71,
"learning_rate": 3.892463363430283e-07,
"loss": 0.654,
"step": 17530
},
{
"epoch": 0.71,
"learning_rate": 3.8873612801305035e-07,
"loss": 0.6402,
"step": 17535
},
{
"epoch": 0.71,
"learning_rate": 3.8822617359676946e-07,
"loss": 0.6927,
"step": 17540
},
{
"epoch": 0.71,
"learning_rate": 3.877164733060154e-07,
"loss": 0.6587,
"step": 17545
},
{
"epoch": 0.71,
"learning_rate": 3.872070273525143e-07,
"loss": 0.6217,
"step": 17550
},
{
"epoch": 0.71,
"learning_rate": 3.866978359478844e-07,
"loss": 0.6561,
"step": 17555
},
{
"epoch": 0.71,
"learning_rate": 3.8618889930363984e-07,
"loss": 0.6448,
"step": 17560
},
{
"epoch": 0.71,
"learning_rate": 3.856802176311892e-07,
"loss": 0.6062,
"step": 17565
},
{
"epoch": 0.71,
"learning_rate": 3.8517179114183306e-07,
"loss": 0.6583,
"step": 17570
},
{
"epoch": 0.71,
"learning_rate": 3.8466362004676845e-07,
"loss": 0.6847,
"step": 17575
},
{
"epoch": 0.71,
"learning_rate": 3.841557045570849e-07,
"loss": 0.6257,
"step": 17580
},
{
"epoch": 0.71,
"learning_rate": 3.8364804488376556e-07,
"loss": 0.6344,
"step": 17585
},
{
"epoch": 0.71,
"learning_rate": 3.8314064123768885e-07,
"loss": 0.6658,
"step": 17590
},
{
"epoch": 0.71,
"learning_rate": 3.8263349382962483e-07,
"loss": 0.6301,
"step": 17595
},
{
"epoch": 0.71,
"learning_rate": 3.821266028702392e-07,
"loss": 0.6709,
"step": 17600
},
{
"epoch": 0.71,
"eval_loss": 0.6223161220550537,
"eval_runtime": 139.812,
"eval_samples_per_second": 16.923,
"eval_steps_per_second": 2.825,
"step": 17600
},
{
"epoch": 0.72,
"learning_rate": 3.8161996857008895e-07,
"loss": 0.6462,
"step": 17605
},
{
"epoch": 0.72,
"learning_rate": 3.811135911396259e-07,
"loss": 0.6432,
"step": 17610
},
{
"epoch": 0.72,
"learning_rate": 3.8060747078919574e-07,
"loss": 0.6256,
"step": 17615
},
{
"epoch": 0.72,
"learning_rate": 3.801016077290351e-07,
"loss": 0.6452,
"step": 17620
},
{
"epoch": 0.72,
"learning_rate": 3.7959600216927603e-07,
"loss": 0.6282,
"step": 17625
},
{
"epoch": 0.72,
"learning_rate": 3.7909065431994227e-07,
"loss": 0.6568,
"step": 17630
},
{
"epoch": 0.72,
"learning_rate": 3.785855643909507e-07,
"loss": 0.6867,
"step": 17635
},
{
"epoch": 0.72,
"learning_rate": 3.7808073259211194e-07,
"loss": 0.622,
"step": 17640
},
{
"epoch": 0.72,
"learning_rate": 3.7757615913312813e-07,
"loss": 0.6691,
"step": 17645
},
{
"epoch": 0.72,
"learning_rate": 3.770718442235955e-07,
"loss": 0.6732,
"step": 17650
},
{
"epoch": 0.72,
"learning_rate": 3.765677880730009e-07,
"loss": 0.6415,
"step": 17655
},
{
"epoch": 0.72,
"learning_rate": 3.7606399089072594e-07,
"loss": 0.7083,
"step": 17660
},
{
"epoch": 0.72,
"learning_rate": 3.755604528860431e-07,
"loss": 0.6402,
"step": 17665
},
{
"epoch": 0.72,
"learning_rate": 3.750571742681173e-07,
"loss": 0.648,
"step": 17670
},
{
"epoch": 0.72,
"learning_rate": 3.7455415524600697e-07,
"loss": 0.6585,
"step": 17675
},
{
"epoch": 0.72,
"learning_rate": 3.7405139602866145e-07,
"loss": 0.6325,
"step": 17680
},
{
"epoch": 0.72,
"learning_rate": 3.735488968249224e-07,
"loss": 0.6709,
"step": 17685
},
{
"epoch": 0.72,
"learning_rate": 3.730466578435235e-07,
"loss": 0.6228,
"step": 17690
},
{
"epoch": 0.72,
"learning_rate": 3.725446792930905e-07,
"loss": 0.637,
"step": 17695
},
{
"epoch": 0.72,
"learning_rate": 3.7204296138214197e-07,
"loss": 0.6506,
"step": 17700
},
{
"epoch": 0.72,
"learning_rate": 3.715415043190856e-07,
"loss": 0.7209,
"step": 17705
},
{
"epoch": 0.72,
"learning_rate": 3.710403083122232e-07,
"loss": 0.6668,
"step": 17710
},
{
"epoch": 0.72,
"learning_rate": 3.7053937356974717e-07,
"loss": 0.6685,
"step": 17715
},
{
"epoch": 0.72,
"learning_rate": 3.700387002997408e-07,
"loss": 0.6666,
"step": 17720
},
{
"epoch": 0.72,
"learning_rate": 3.695382887101804e-07,
"loss": 0.6703,
"step": 17725
},
{
"epoch": 0.72,
"learning_rate": 3.6903813900893175e-07,
"loss": 0.6361,
"step": 17730
},
{
"epoch": 0.72,
"learning_rate": 3.685382514037537e-07,
"loss": 0.6159,
"step": 17735
},
{
"epoch": 0.72,
"learning_rate": 3.6803862610229384e-07,
"loss": 0.6266,
"step": 17740
},
{
"epoch": 0.72,
"learning_rate": 3.6753926331209294e-07,
"loss": 0.6465,
"step": 17745
},
{
"epoch": 0.72,
"learning_rate": 3.670401632405826e-07,
"loss": 0.6146,
"step": 17750
},
{
"epoch": 0.72,
"learning_rate": 3.6654132609508325e-07,
"loss": 0.6482,
"step": 17755
},
{
"epoch": 0.72,
"learning_rate": 3.660427520828085e-07,
"loss": 0.6261,
"step": 17760
},
{
"epoch": 0.72,
"learning_rate": 3.655444414108615e-07,
"loss": 0.6514,
"step": 17765
},
{
"epoch": 0.72,
"learning_rate": 3.650463942862356e-07,
"loss": 0.6404,
"step": 17770
},
{
"epoch": 0.72,
"learning_rate": 3.645486109158161e-07,
"loss": 0.6637,
"step": 17775
},
{
"epoch": 0.72,
"learning_rate": 3.640510915063771e-07,
"loss": 0.6227,
"step": 17780
},
{
"epoch": 0.72,
"learning_rate": 3.635538362645849e-07,
"loss": 0.6501,
"step": 17785
},
{
"epoch": 0.72,
"learning_rate": 3.6305684539699366e-07,
"loss": 0.6559,
"step": 17790
},
{
"epoch": 0.72,
"learning_rate": 3.625601191100496e-07,
"loss": 0.648,
"step": 17795
},
{
"epoch": 0.72,
"learning_rate": 3.620636576100894e-07,
"loss": 0.672,
"step": 17800
},
{
"epoch": 0.72,
"learning_rate": 3.6156746110333736e-07,
"loss": 0.6461,
"step": 17805
},
{
"epoch": 0.72,
"learning_rate": 3.6107152979591015e-07,
"loss": 0.6711,
"step": 17810
},
{
"epoch": 0.72,
"learning_rate": 3.6057586389381325e-07,
"loss": 0.6374,
"step": 17815
},
{
"epoch": 0.72,
"learning_rate": 3.6008046360294163e-07,
"loss": 0.6321,
"step": 17820
},
{
"epoch": 0.72,
"learning_rate": 3.5958532912908015e-07,
"loss": 0.607,
"step": 17825
},
{
"epoch": 0.72,
"learning_rate": 3.590904606779035e-07,
"loss": 0.6357,
"step": 17830
},
{
"epoch": 0.72,
"learning_rate": 3.5859585845497666e-07,
"loss": 0.6354,
"step": 17835
},
{
"epoch": 0.72,
"learning_rate": 3.5810152266575167e-07,
"loss": 0.6476,
"step": 17840
},
{
"epoch": 0.72,
"learning_rate": 3.576074535155722e-07,
"loss": 0.6301,
"step": 17845
},
{
"epoch": 0.73,
"learning_rate": 3.5711365120966994e-07,
"loss": 0.6725,
"step": 17850
},
{
"epoch": 0.73,
"learning_rate": 3.56620115953166e-07,
"loss": 0.6659,
"step": 17855
},
{
"epoch": 0.73,
"learning_rate": 3.5612684795107104e-07,
"loss": 0.6825,
"step": 17860
},
{
"epoch": 0.73,
"learning_rate": 3.5563384740828406e-07,
"loss": 0.6667,
"step": 17865
},
{
"epoch": 0.73,
"learning_rate": 3.5514111452959317e-07,
"loss": 0.653,
"step": 17870
},
{
"epoch": 0.73,
"learning_rate": 3.54648649519675e-07,
"loss": 0.6709,
"step": 17875
},
{
"epoch": 0.73,
"learning_rate": 3.5415645258309556e-07,
"loss": 0.664,
"step": 17880
},
{
"epoch": 0.73,
"learning_rate": 3.536645239243099e-07,
"loss": 0.624,
"step": 17885
},
{
"epoch": 0.73,
"learning_rate": 3.531728637476594e-07,
"loss": 0.6278,
"step": 17890
},
{
"epoch": 0.73,
"learning_rate": 3.526814722573767e-07,
"loss": 0.6323,
"step": 17895
},
{
"epoch": 0.73,
"learning_rate": 3.52190349657581e-07,
"loss": 0.6911,
"step": 17900
},
{
"epoch": 0.73,
"learning_rate": 3.5169949615228014e-07,
"loss": 0.6741,
"step": 17905
},
{
"epoch": 0.73,
"learning_rate": 3.5120891194537093e-07,
"loss": 0.6731,
"step": 17910
},
{
"epoch": 0.73,
"learning_rate": 3.5071859724063756e-07,
"loss": 0.6234,
"step": 17915
},
{
"epoch": 0.73,
"learning_rate": 3.5022855224175254e-07,
"loss": 0.6702,
"step": 17920
},
{
"epoch": 0.73,
"learning_rate": 3.4973877715227584e-07,
"loss": 0.6427,
"step": 17925
},
{
"epoch": 0.73,
"learning_rate": 3.4924927217565614e-07,
"loss": 0.6306,
"step": 17930
},
{
"epoch": 0.73,
"learning_rate": 3.487600375152303e-07,
"loss": 0.6943,
"step": 17935
},
{
"epoch": 0.73,
"learning_rate": 3.482710733742207e-07,
"loss": 0.6889,
"step": 17940
},
{
"epoch": 0.73,
"learning_rate": 3.477823799557399e-07,
"loss": 0.6084,
"step": 17945
},
{
"epoch": 0.73,
"learning_rate": 3.4729395746278645e-07,
"loss": 0.6656,
"step": 17950
},
{
"epoch": 0.73,
"learning_rate": 3.468058060982468e-07,
"loss": 0.658,
"step": 17955
},
{
"epoch": 0.73,
"learning_rate": 3.463179260648946e-07,
"loss": 0.649,
"step": 17960
},
{
"epoch": 0.73,
"learning_rate": 3.458303175653916e-07,
"loss": 0.6494,
"step": 17965
},
{
"epoch": 0.73,
"learning_rate": 3.4534298080228563e-07,
"loss": 0.6672,
"step": 17970
},
{
"epoch": 0.73,
"learning_rate": 3.44855915978012e-07,
"loss": 0.6541,
"step": 17975
},
{
"epoch": 0.73,
"learning_rate": 3.443691232948938e-07,
"loss": 0.6382,
"step": 17980
},
{
"epoch": 0.73,
"learning_rate": 3.438826029551402e-07,
"loss": 0.6651,
"step": 17985
},
{
"epoch": 0.73,
"learning_rate": 3.43396355160847e-07,
"loss": 0.6019,
"step": 17990
},
{
"epoch": 0.73,
"learning_rate": 3.429103801139981e-07,
"loss": 0.6356,
"step": 17995
},
{
"epoch": 0.73,
"learning_rate": 3.4242467801646303e-07,
"loss": 0.6722,
"step": 18000
},
{
"epoch": 0.73,
"learning_rate": 3.4193924906999804e-07,
"loss": 0.64,
"step": 18005
},
{
"epoch": 0.73,
"learning_rate": 3.4145409347624597e-07,
"loss": 0.6232,
"step": 18010
},
{
"epoch": 0.73,
"learning_rate": 3.409692114367365e-07,
"loss": 0.6801,
"step": 18015
},
{
"epoch": 0.73,
"learning_rate": 3.4048460315288607e-07,
"loss": 0.6688,
"step": 18020
},
{
"epoch": 0.73,
"learning_rate": 3.400002688259953e-07,
"loss": 0.6331,
"step": 18025
},
{
"epoch": 0.73,
"learning_rate": 3.395162086572536e-07,
"loss": 0.6097,
"step": 18030
},
{
"epoch": 0.73,
"learning_rate": 3.39032422847735e-07,
"loss": 0.6887,
"step": 18035
},
{
"epoch": 0.73,
"learning_rate": 3.385489115983996e-07,
"loss": 0.6126,
"step": 18040
},
{
"epoch": 0.73,
"learning_rate": 3.380656751100943e-07,
"loss": 0.6639,
"step": 18045
},
{
"epoch": 0.73,
"learning_rate": 3.3758271358355115e-07,
"loss": 0.6329,
"step": 18050
},
{
"epoch": 0.73,
"learning_rate": 3.371000272193881e-07,
"loss": 0.6381,
"step": 18055
},
{
"epoch": 0.73,
"learning_rate": 3.366176162181086e-07,
"loss": 0.674,
"step": 18060
},
{
"epoch": 0.73,
"learning_rate": 3.361354807801021e-07,
"loss": 0.6684,
"step": 18065
},
{
"epoch": 0.73,
"learning_rate": 3.3565362110564446e-07,
"loss": 0.6402,
"step": 18070
},
{
"epoch": 0.73,
"learning_rate": 3.351720373948945e-07,
"loss": 0.6506,
"step": 18075
},
{
"epoch": 0.73,
"learning_rate": 3.346907298478989e-07,
"loss": 0.6486,
"step": 18080
},
{
"epoch": 0.73,
"learning_rate": 3.3420969866458823e-07,
"loss": 0.6789,
"step": 18085
},
{
"epoch": 0.73,
"learning_rate": 3.337289440447788e-07,
"loss": 0.6283,
"step": 18090
},
{
"epoch": 0.73,
"learning_rate": 3.332484661881716e-07,
"loss": 0.6729,
"step": 18095
},
{
"epoch": 0.74,
"learning_rate": 3.327682652943534e-07,
"loss": 0.6307,
"step": 18100
},
{
"epoch": 0.74,
"learning_rate": 3.322883415627953e-07,
"loss": 0.6712,
"step": 18105
},
{
"epoch": 0.74,
"learning_rate": 3.31808695192853e-07,
"loss": 0.68,
"step": 18110
},
{
"epoch": 0.74,
"learning_rate": 3.313293263837683e-07,
"loss": 0.6577,
"step": 18115
},
{
"epoch": 0.74,
"learning_rate": 3.308502353346663e-07,
"loss": 0.6393,
"step": 18120
},
{
"epoch": 0.74,
"learning_rate": 3.3037142224455703e-07,
"loss": 0.6464,
"step": 18125
},
{
"epoch": 0.74,
"learning_rate": 3.2989288731233587e-07,
"loss": 0.6654,
"step": 18130
},
{
"epoch": 0.74,
"learning_rate": 3.2941463073678186e-07,
"loss": 0.6245,
"step": 18135
},
{
"epoch": 0.74,
"learning_rate": 3.289366527165586e-07,
"loss": 0.6602,
"step": 18140
},
{
"epoch": 0.74,
"learning_rate": 3.2845895345021345e-07,
"loss": 0.6258,
"step": 18145
},
{
"epoch": 0.74,
"learning_rate": 3.279815331361795e-07,
"loss": 0.6558,
"step": 18150
},
{
"epoch": 0.74,
"learning_rate": 3.275043919727725e-07,
"loss": 0.6414,
"step": 18155
},
{
"epoch": 0.74,
"learning_rate": 3.2702753015819227e-07,
"loss": 0.6347,
"step": 18160
},
{
"epoch": 0.74,
"learning_rate": 3.265509478905241e-07,
"loss": 0.6173,
"step": 18165
},
{
"epoch": 0.74,
"learning_rate": 3.2607464536773546e-07,
"loss": 0.6816,
"step": 18170
},
{
"epoch": 0.74,
"learning_rate": 3.2559862278767815e-07,
"loss": 0.6507,
"step": 18175
},
{
"epoch": 0.74,
"learning_rate": 3.251228803480883e-07,
"loss": 0.6762,
"step": 18180
},
{
"epoch": 0.74,
"learning_rate": 3.2464741824658504e-07,
"loss": 0.6747,
"step": 18185
},
{
"epoch": 0.74,
"learning_rate": 3.2417223668067095e-07,
"loss": 0.6143,
"step": 18190
},
{
"epoch": 0.74,
"learning_rate": 3.2369733584773216e-07,
"loss": 0.6404,
"step": 18195
},
{
"epoch": 0.74,
"learning_rate": 3.23222715945039e-07,
"loss": 0.654,
"step": 18200
},
{
"epoch": 0.74,
"learning_rate": 3.227483771697441e-07,
"loss": 0.6367,
"step": 18205
},
{
"epoch": 0.74,
"learning_rate": 3.222743197188834e-07,
"loss": 0.6811,
"step": 18210
},
{
"epoch": 0.74,
"learning_rate": 3.2180054378937673e-07,
"loss": 0.6155,
"step": 18215
},
{
"epoch": 0.74,
"learning_rate": 3.2132704957802637e-07,
"loss": 0.6127,
"step": 18220
},
{
"epoch": 0.74,
"learning_rate": 3.208538372815172e-07,
"loss": 0.648,
"step": 18225
},
{
"epoch": 0.74,
"learning_rate": 3.203809070964181e-07,
"loss": 0.6523,
"step": 18230
},
{
"epoch": 0.74,
"learning_rate": 3.199082592191801e-07,
"loss": 0.651,
"step": 18235
},
{
"epoch": 0.74,
"learning_rate": 3.1943589384613667e-07,
"loss": 0.6423,
"step": 18240
},
{
"epoch": 0.74,
"learning_rate": 3.189638111735042e-07,
"loss": 0.6186,
"step": 18245
},
{
"epoch": 0.74,
"learning_rate": 3.1849201139738247e-07,
"loss": 0.6621,
"step": 18250
},
{
"epoch": 0.74,
"learning_rate": 3.1802049471375234e-07,
"loss": 0.6333,
"step": 18255
},
{
"epoch": 0.74,
"learning_rate": 3.175492613184777e-07,
"loss": 0.6234,
"step": 18260
},
{
"epoch": 0.74,
"learning_rate": 3.1707831140730537e-07,
"loss": 0.6328,
"step": 18265
},
{
"epoch": 0.74,
"learning_rate": 3.1660764517586337e-07,
"loss": 0.6621,
"step": 18270
},
{
"epoch": 0.74,
"learning_rate": 3.1613726281966255e-07,
"loss": 0.6735,
"step": 18275
},
{
"epoch": 0.74,
"learning_rate": 3.1566716453409536e-07,
"loss": 0.6595,
"step": 18280
},
{
"epoch": 0.74,
"learning_rate": 3.15197350514437e-07,
"loss": 0.6837,
"step": 18285
},
{
"epoch": 0.74,
"learning_rate": 3.1472782095584396e-07,
"loss": 0.66,
"step": 18290
},
{
"epoch": 0.74,
"learning_rate": 3.1425857605335427e-07,
"loss": 0.6552,
"step": 18295
},
{
"epoch": 0.74,
"learning_rate": 3.1378961600188903e-07,
"loss": 0.6419,
"step": 18300
},
{
"epoch": 0.74,
"learning_rate": 3.1332094099624963e-07,
"loss": 0.6407,
"step": 18305
},
{
"epoch": 0.74,
"learning_rate": 3.1285255123111944e-07,
"loss": 0.6473,
"step": 18310
},
{
"epoch": 0.74,
"learning_rate": 3.1238444690106394e-07,
"loss": 0.6624,
"step": 18315
},
{
"epoch": 0.74,
"learning_rate": 3.1191662820052954e-07,
"loss": 0.6523,
"step": 18320
},
{
"epoch": 0.74,
"learning_rate": 3.114490953238438e-07,
"loss": 0.6311,
"step": 18325
},
{
"epoch": 0.74,
"learning_rate": 3.1098184846521567e-07,
"loss": 0.6484,
"step": 18330
},
{
"epoch": 0.74,
"learning_rate": 3.1051488781873605e-07,
"loss": 0.6783,
"step": 18335
},
{
"epoch": 0.74,
"learning_rate": 3.1004821357837594e-07,
"loss": 0.6586,
"step": 18340
},
{
"epoch": 0.75,
"learning_rate": 3.0958182593798745e-07,
"loss": 0.6285,
"step": 18345
},
{
"epoch": 0.75,
"learning_rate": 3.0911572509130465e-07,
"loss": 0.6312,
"step": 18350
},
{
"epoch": 0.75,
"learning_rate": 3.086499112319414e-07,
"loss": 0.6531,
"step": 18355
},
{
"epoch": 0.75,
"learning_rate": 3.081843845533925e-07,
"loss": 0.6358,
"step": 18360
},
{
"epoch": 0.75,
"learning_rate": 3.0771914524903417e-07,
"loss": 0.6551,
"step": 18365
},
{
"epoch": 0.75,
"learning_rate": 3.0725419351212254e-07,
"loss": 0.6368,
"step": 18370
},
{
"epoch": 0.75,
"learning_rate": 3.067895295357945e-07,
"loss": 0.6914,
"step": 18375
},
{
"epoch": 0.75,
"learning_rate": 3.063251535130672e-07,
"loss": 0.6461,
"step": 18380
},
{
"epoch": 0.75,
"learning_rate": 3.058610656368389e-07,
"loss": 0.6122,
"step": 18385
},
{
"epoch": 0.75,
"learning_rate": 3.053972660998875e-07,
"loss": 0.6502,
"step": 18390
},
{
"epoch": 0.75,
"learning_rate": 3.0493375509487074e-07,
"loss": 0.688,
"step": 18395
},
{
"epoch": 0.75,
"learning_rate": 3.044705328143279e-07,
"loss": 0.6114,
"step": 18400
},
{
"epoch": 0.75,
"eval_loss": 0.6205956935882568,
"eval_runtime": 140.0377,
"eval_samples_per_second": 16.895,
"eval_steps_per_second": 2.821,
"step": 18400
},
{
"epoch": 0.75,
"learning_rate": 3.040075994506771e-07,
"loss": 0.6604,
"step": 18405
},
{
"epoch": 0.75,
"learning_rate": 3.0354495519621693e-07,
"loss": 0.6973,
"step": 18410
},
{
"epoch": 0.75,
"learning_rate": 3.030826002431253e-07,
"loss": 0.6445,
"step": 18415
},
{
"epoch": 0.75,
"learning_rate": 3.026205347834612e-07,
"loss": 0.6553,
"step": 18420
},
{
"epoch": 0.75,
"learning_rate": 3.021587590091622e-07,
"loss": 0.6605,
"step": 18425
},
{
"epoch": 0.75,
"learning_rate": 3.0169727311204564e-07,
"loss": 0.6498,
"step": 18430
},
{
"epoch": 0.75,
"learning_rate": 3.0123607728380953e-07,
"loss": 0.6277,
"step": 18435
},
{
"epoch": 0.75,
"learning_rate": 3.0077517171603007e-07,
"loss": 0.6164,
"step": 18440
},
{
"epoch": 0.75,
"learning_rate": 3.0031455660016316e-07,
"loss": 0.6513,
"step": 18445
},
{
"epoch": 0.75,
"learning_rate": 2.9985423212754503e-07,
"loss": 0.6511,
"step": 18450
},
{
"epoch": 0.75,
"learning_rate": 2.993941984893902e-07,
"loss": 0.6822,
"step": 18455
},
{
"epoch": 0.75,
"learning_rate": 2.9893445587679245e-07,
"loss": 0.6549,
"step": 18460
},
{
"epoch": 0.75,
"learning_rate": 2.984750044807247e-07,
"loss": 0.6606,
"step": 18465
},
{
"epoch": 0.75,
"learning_rate": 2.9801584449203967e-07,
"loss": 0.6564,
"step": 18470
},
{
"epoch": 0.75,
"learning_rate": 2.97556976101468e-07,
"loss": 0.6355,
"step": 18475
},
{
"epoch": 0.75,
"learning_rate": 2.9709839949961947e-07,
"loss": 0.6713,
"step": 18480
},
{
"epoch": 0.75,
"learning_rate": 2.966401148769834e-07,
"loss": 0.6468,
"step": 18485
},
{
"epoch": 0.75,
"learning_rate": 2.961821224239268e-07,
"loss": 0.6286,
"step": 18490
},
{
"epoch": 0.75,
"learning_rate": 2.957244223306955e-07,
"loss": 0.6355,
"step": 18495
},
{
"epoch": 0.75,
"learning_rate": 2.952670147874149e-07,
"loss": 0.6472,
"step": 18500
},
{
"epoch": 0.75,
"learning_rate": 2.948098999840877e-07,
"loss": 0.6622,
"step": 18505
},
{
"epoch": 0.75,
"learning_rate": 2.943530781105954e-07,
"loss": 0.6634,
"step": 18510
},
{
"epoch": 0.75,
"learning_rate": 2.938965493566975e-07,
"loss": 0.6532,
"step": 18515
},
{
"epoch": 0.75,
"learning_rate": 2.934403139120327e-07,
"loss": 0.6362,
"step": 18520
},
{
"epoch": 0.75,
"learning_rate": 2.9298437196611704e-07,
"loss": 0.6463,
"step": 18525
},
{
"epoch": 0.75,
"learning_rate": 2.925287237083445e-07,
"loss": 0.6824,
"step": 18530
},
{
"epoch": 0.75,
"learning_rate": 2.9207336932798787e-07,
"loss": 0.6852,
"step": 18535
},
{
"epoch": 0.75,
"learning_rate": 2.9161830901419714e-07,
"loss": 0.6914,
"step": 18540
},
{
"epoch": 0.75,
"learning_rate": 2.9116354295600056e-07,
"loss": 0.6176,
"step": 18545
},
{
"epoch": 0.75,
"learning_rate": 2.907090713423035e-07,
"loss": 0.644,
"step": 18550
},
{
"epoch": 0.75,
"learning_rate": 2.9025489436189034e-07,
"loss": 0.6502,
"step": 18555
},
{
"epoch": 0.75,
"learning_rate": 2.8980101220342167e-07,
"loss": 0.6321,
"step": 18560
},
{
"epoch": 0.75,
"learning_rate": 2.893474250554362e-07,
"loss": 0.6527,
"step": 18565
},
{
"epoch": 0.75,
"learning_rate": 2.8889413310635033e-07,
"loss": 0.6831,
"step": 18570
},
{
"epoch": 0.75,
"learning_rate": 2.884411365444577e-07,
"loss": 0.6418,
"step": 18575
},
{
"epoch": 0.75,
"learning_rate": 2.879884355579285e-07,
"loss": 0.6457,
"step": 18580
},
{
"epoch": 0.75,
"learning_rate": 2.875360303348116e-07,
"loss": 0.622,
"step": 18585
},
{
"epoch": 0.76,
"learning_rate": 2.870839210630317e-07,
"loss": 0.6198,
"step": 18590
},
{
"epoch": 0.76,
"learning_rate": 2.8663210793039114e-07,
"loss": 0.6673,
"step": 18595
},
{
"epoch": 0.76,
"learning_rate": 2.8618059112456907e-07,
"loss": 0.6307,
"step": 18600
},
{
"epoch": 0.76,
"learning_rate": 2.8572937083312196e-07,
"loss": 0.6682,
"step": 18605
},
{
"epoch": 0.76,
"learning_rate": 2.852784472434826e-07,
"loss": 0.6635,
"step": 18610
},
{
"epoch": 0.76,
"learning_rate": 2.8482782054296037e-07,
"loss": 0.6525,
"step": 18615
},
{
"epoch": 0.76,
"learning_rate": 2.843774909187425e-07,
"loss": 0.6445,
"step": 18620
},
{
"epoch": 0.76,
"learning_rate": 2.8392745855789144e-07,
"loss": 0.629,
"step": 18625
},
{
"epoch": 0.76,
"learning_rate": 2.8347772364734656e-07,
"loss": 0.6769,
"step": 18630
},
{
"epoch": 0.76,
"learning_rate": 2.830282863739244e-07,
"loss": 0.6489,
"step": 18635
},
{
"epoch": 0.76,
"learning_rate": 2.8257914692431703e-07,
"loss": 0.6738,
"step": 18640
},
{
"epoch": 0.76,
"learning_rate": 2.8213030548509296e-07,
"loss": 0.6459,
"step": 18645
},
{
"epoch": 0.76,
"learning_rate": 2.816817622426968e-07,
"loss": 0.6447,
"step": 18650
},
{
"epoch": 0.76,
"learning_rate": 2.8123351738345016e-07,
"loss": 0.6607,
"step": 18655
},
{
"epoch": 0.76,
"learning_rate": 2.807855710935496e-07,
"loss": 0.6474,
"step": 18660
},
{
"epoch": 0.76,
"learning_rate": 2.80337923559068e-07,
"loss": 0.6493,
"step": 18665
},
{
"epoch": 0.76,
"learning_rate": 2.798905749659548e-07,
"loss": 0.6176,
"step": 18670
},
{
"epoch": 0.76,
"learning_rate": 2.7944352550003425e-07,
"loss": 0.6472,
"step": 18675
},
{
"epoch": 0.76,
"learning_rate": 2.7899677534700706e-07,
"loss": 0.6353,
"step": 18680
},
{
"epoch": 0.76,
"learning_rate": 2.785503246924489e-07,
"loss": 0.6279,
"step": 18685
},
{
"epoch": 0.76,
"learning_rate": 2.781041737218121e-07,
"loss": 0.6039,
"step": 18690
},
{
"epoch": 0.76,
"learning_rate": 2.7765832262042364e-07,
"loss": 0.6366,
"step": 18695
},
{
"epoch": 0.76,
"learning_rate": 2.772127715734859e-07,
"loss": 0.664,
"step": 18700
},
{
"epoch": 0.76,
"learning_rate": 2.7676752076607747e-07,
"loss": 0.6687,
"step": 18705
},
{
"epoch": 0.76,
"learning_rate": 2.763225703831513e-07,
"loss": 0.6712,
"step": 18710
},
{
"epoch": 0.76,
"learning_rate": 2.7587792060953585e-07,
"loss": 0.659,
"step": 18715
},
{
"epoch": 0.76,
"learning_rate": 2.754335716299351e-07,
"loss": 0.6291,
"step": 18720
},
{
"epoch": 0.76,
"learning_rate": 2.749895236289276e-07,
"loss": 0.6037,
"step": 18725
},
{
"epoch": 0.76,
"learning_rate": 2.745457767909669e-07,
"loss": 0.625,
"step": 18730
},
{
"epoch": 0.76,
"learning_rate": 2.741023313003814e-07,
"loss": 0.6479,
"step": 18735
},
{
"epoch": 0.76,
"learning_rate": 2.736591873413751e-07,
"loss": 0.6515,
"step": 18740
},
{
"epoch": 0.76,
"learning_rate": 2.7321634509802584e-07,
"loss": 0.647,
"step": 18745
},
{
"epoch": 0.76,
"learning_rate": 2.72773804754286e-07,
"loss": 0.6699,
"step": 18750
},
{
"epoch": 0.76,
"learning_rate": 2.723315664939838e-07,
"loss": 0.6759,
"step": 18755
},
{
"epoch": 0.76,
"learning_rate": 2.718896305008207e-07,
"loss": 0.6816,
"step": 18760
},
{
"epoch": 0.76,
"learning_rate": 2.7144799695837283e-07,
"loss": 0.6374,
"step": 18765
},
{
"epoch": 0.76,
"learning_rate": 2.710066660500916e-07,
"loss": 0.6381,
"step": 18770
},
{
"epoch": 0.76,
"learning_rate": 2.705656379593015e-07,
"loss": 0.6504,
"step": 18775
},
{
"epoch": 0.76,
"learning_rate": 2.7012491286920215e-07,
"loss": 0.6652,
"step": 18780
},
{
"epoch": 0.76,
"learning_rate": 2.696844909628662e-07,
"loss": 0.6412,
"step": 18785
},
{
"epoch": 0.76,
"learning_rate": 2.6924437242324195e-07,
"loss": 0.6511,
"step": 18790
},
{
"epoch": 0.76,
"learning_rate": 2.6880455743315045e-07,
"loss": 0.6489,
"step": 18795
},
{
"epoch": 0.76,
"learning_rate": 2.683650461752867e-07,
"loss": 0.6671,
"step": 18800
},
{
"epoch": 0.76,
"learning_rate": 2.6792583883222044e-07,
"loss": 0.6377,
"step": 18805
},
{
"epoch": 0.76,
"learning_rate": 2.6748693558639426e-07,
"loss": 0.6314,
"step": 18810
},
{
"epoch": 0.76,
"learning_rate": 2.6704833662012484e-07,
"loss": 0.6626,
"step": 18815
},
{
"epoch": 0.76,
"learning_rate": 2.66610042115602e-07,
"loss": 0.6428,
"step": 18820
},
{
"epoch": 0.76,
"learning_rate": 2.6617205225489015e-07,
"loss": 0.6139,
"step": 18825
},
{
"epoch": 0.76,
"learning_rate": 2.65734367219926e-07,
"loss": 0.6466,
"step": 18830
},
{
"epoch": 0.77,
"learning_rate": 2.6529698719252e-07,
"loss": 0.6723,
"step": 18835
},
{
"epoch": 0.77,
"learning_rate": 2.6485991235435655e-07,
"loss": 0.6314,
"step": 18840
},
{
"epoch": 0.77,
"learning_rate": 2.644231428869924e-07,
"loss": 0.68,
"step": 18845
},
{
"epoch": 0.77,
"learning_rate": 2.6398667897185754e-07,
"loss": 0.6589,
"step": 18850
},
{
"epoch": 0.77,
"learning_rate": 2.6355052079025573e-07,
"loss": 0.6587,
"step": 18855
},
{
"epoch": 0.77,
"learning_rate": 2.6311466852336326e-07,
"loss": 0.6506,
"step": 18860
},
{
"epoch": 0.77,
"learning_rate": 2.6267912235222913e-07,
"loss": 0.6344,
"step": 18865
},
{
"epoch": 0.77,
"learning_rate": 2.622438824577753e-07,
"loss": 0.6729,
"step": 18870
},
{
"epoch": 0.77,
"learning_rate": 2.61808949020797e-07,
"loss": 0.6573,
"step": 18875
},
{
"epoch": 0.77,
"learning_rate": 2.613743222219618e-07,
"loss": 0.6228,
"step": 18880
},
{
"epoch": 0.77,
"learning_rate": 2.6094000224180934e-07,
"loss": 0.636,
"step": 18885
},
{
"epoch": 0.77,
"learning_rate": 2.6050598926075317e-07,
"loss": 0.6215,
"step": 18890
},
{
"epoch": 0.77,
"learning_rate": 2.6007228345907804e-07,
"loss": 0.6473,
"step": 18895
},
{
"epoch": 0.77,
"learning_rate": 2.596388850169413e-07,
"loss": 0.6789,
"step": 18900
},
{
"epoch": 0.77,
"learning_rate": 2.592057941143736e-07,
"loss": 0.6479,
"step": 18905
},
{
"epoch": 0.77,
"learning_rate": 2.5877301093127677e-07,
"loss": 0.7127,
"step": 18910
},
{
"epoch": 0.77,
"learning_rate": 2.583405356474252e-07,
"loss": 0.6525,
"step": 18915
},
{
"epoch": 0.77,
"learning_rate": 2.57908368442465e-07,
"loss": 0.6547,
"step": 18920
},
{
"epoch": 0.77,
"learning_rate": 2.574765094959153e-07,
"loss": 0.6684,
"step": 18925
},
{
"epoch": 0.77,
"learning_rate": 2.5704495898716615e-07,
"loss": 0.6369,
"step": 18930
},
{
"epoch": 0.77,
"learning_rate": 2.5661371709547983e-07,
"loss": 0.6421,
"step": 18935
},
{
"epoch": 0.77,
"learning_rate": 2.5618278399999094e-07,
"loss": 0.6399,
"step": 18940
},
{
"epoch": 0.77,
"learning_rate": 2.557521598797049e-07,
"loss": 0.5765,
"step": 18945
},
{
"epoch": 0.77,
"learning_rate": 2.553218449134994e-07,
"loss": 0.6161,
"step": 18950
},
{
"epoch": 0.77,
"learning_rate": 2.5489183928012325e-07,
"loss": 0.6204,
"step": 18955
},
{
"epoch": 0.77,
"learning_rate": 2.544621431581975e-07,
"loss": 0.6343,
"step": 18960
},
{
"epoch": 0.77,
"learning_rate": 2.5403275672621407e-07,
"loss": 0.6635,
"step": 18965
},
{
"epoch": 0.77,
"learning_rate": 2.536036801625359e-07,
"loss": 0.6453,
"step": 18970
},
{
"epoch": 0.77,
"learning_rate": 2.531749136453982e-07,
"loss": 0.6791,
"step": 18975
},
{
"epoch": 0.77,
"learning_rate": 2.527464573529068e-07,
"loss": 0.6195,
"step": 18980
},
{
"epoch": 0.77,
"learning_rate": 2.5231831146303817e-07,
"loss": 0.6502,
"step": 18985
},
{
"epoch": 0.77,
"learning_rate": 2.5189047615364124e-07,
"loss": 0.6623,
"step": 18990
},
{
"epoch": 0.77,
"learning_rate": 2.5146295160243457e-07,
"loss": 0.6331,
"step": 18995
},
{
"epoch": 0.77,
"learning_rate": 2.5103573798700816e-07,
"loss": 0.6164,
"step": 19000
},
{
"epoch": 0.77,
"learning_rate": 2.506088354848226e-07,
"loss": 0.6363,
"step": 19005
},
{
"epoch": 0.77,
"learning_rate": 2.5018224427321013e-07,
"loss": 0.6733,
"step": 19010
},
{
"epoch": 0.77,
"learning_rate": 2.497559645293726e-07,
"loss": 0.6452,
"step": 19015
},
{
"epoch": 0.77,
"learning_rate": 2.4932999643038264e-07,
"loss": 0.6344,
"step": 19020
},
{
"epoch": 0.77,
"learning_rate": 2.489043401531844e-07,
"loss": 0.6154,
"step": 19025
},
{
"epoch": 0.77,
"learning_rate": 2.484789958745913e-07,
"loss": 0.6564,
"step": 19030
},
{
"epoch": 0.77,
"learning_rate": 2.4805396377128753e-07,
"loss": 0.6827,
"step": 19035
},
{
"epoch": 0.77,
"learning_rate": 2.476292440198283e-07,
"loss": 0.6415,
"step": 19040
},
{
"epoch": 0.77,
"learning_rate": 2.4720483679663815e-07,
"loss": 0.6611,
"step": 19045
},
{
"epoch": 0.77,
"learning_rate": 2.4678074227801213e-07,
"loss": 0.6308,
"step": 19050
},
{
"epoch": 0.77,
"learning_rate": 2.463569606401151e-07,
"loss": 0.6434,
"step": 19055
},
{
"epoch": 0.77,
"learning_rate": 2.459334920589831e-07,
"loss": 0.6625,
"step": 19060
},
{
"epoch": 0.77,
"learning_rate": 2.455103367105207e-07,
"loss": 0.6416,
"step": 19065
},
{
"epoch": 0.77,
"learning_rate": 2.450874947705027e-07,
"loss": 0.6458,
"step": 19070
},
{
"epoch": 0.77,
"learning_rate": 2.446649664145748e-07,
"loss": 0.6101,
"step": 19075
},
{
"epoch": 0.77,
"learning_rate": 2.4424275181825103e-07,
"loss": 0.6629,
"step": 19080
},
{
"epoch": 0.78,
"learning_rate": 2.4382085115691575e-07,
"loss": 0.6881,
"step": 19085
},
{
"epoch": 0.78,
"learning_rate": 2.4339926460582263e-07,
"loss": 0.6869,
"step": 19090
},
{
"epoch": 0.78,
"learning_rate": 2.429779923400955e-07,
"loss": 0.6448,
"step": 19095
},
{
"epoch": 0.78,
"learning_rate": 2.42557034534727e-07,
"loss": 0.6401,
"step": 19100
},
{
"epoch": 0.78,
"learning_rate": 2.42136391364579e-07,
"loss": 0.6611,
"step": 19105
},
{
"epoch": 0.78,
"learning_rate": 2.4171606300438364e-07,
"loss": 0.6471,
"step": 19110
},
{
"epoch": 0.78,
"learning_rate": 2.4129604962874127e-07,
"loss": 0.6922,
"step": 19115
},
{
"epoch": 0.78,
"learning_rate": 2.4087635141212157e-07,
"loss": 0.6707,
"step": 19120
},
{
"epoch": 0.78,
"learning_rate": 2.404569685288642e-07,
"loss": 0.6398,
"step": 19125
},
{
"epoch": 0.78,
"learning_rate": 2.400379011531768e-07,
"loss": 0.6756,
"step": 19130
},
{
"epoch": 0.78,
"learning_rate": 2.396191494591363e-07,
"loss": 0.6565,
"step": 19135
},
{
"epoch": 0.78,
"learning_rate": 2.392007136206883e-07,
"loss": 0.6649,
"step": 19140
},
{
"epoch": 0.78,
"learning_rate": 2.387825938116478e-07,
"loss": 0.6119,
"step": 19145
},
{
"epoch": 0.78,
"learning_rate": 2.38364790205698e-07,
"loss": 0.6636,
"step": 19150
},
{
"epoch": 0.78,
"learning_rate": 2.3794730297639054e-07,
"loss": 0.6411,
"step": 19155
},
{
"epoch": 0.78,
"learning_rate": 2.3753013229714658e-07,
"loss": 0.6213,
"step": 19160
},
{
"epoch": 0.78,
"learning_rate": 2.3711327834125495e-07,
"loss": 0.6499,
"step": 19165
},
{
"epoch": 0.78,
"learning_rate": 2.3669674128187277e-07,
"loss": 0.6644,
"step": 19170
},
{
"epoch": 0.78,
"learning_rate": 2.3628052129202658e-07,
"loss": 0.6425,
"step": 19175
},
{
"epoch": 0.78,
"learning_rate": 2.3586461854461015e-07,
"loss": 0.6316,
"step": 19180
},
{
"epoch": 0.78,
"learning_rate": 2.3544903321238596e-07,
"loss": 0.6716,
"step": 19185
},
{
"epoch": 0.78,
"learning_rate": 2.3503376546798425e-07,
"loss": 0.6026,
"step": 19190
},
{
"epoch": 0.78,
"learning_rate": 2.3461881548390416e-07,
"loss": 0.6799,
"step": 19195
},
{
"epoch": 0.78,
"learning_rate": 2.3420418343251215e-07,
"loss": 0.6355,
"step": 19200
},
{
"epoch": 0.78,
"eval_loss": 0.6190813779830933,
"eval_runtime": 143.7787,
"eval_samples_per_second": 16.456,
"eval_steps_per_second": 2.747,
"step": 19200
},
{
"epoch": 0.78,
"learning_rate": 2.3378986948604217e-07,
"loss": 0.649,
"step": 19205
},
{
"epoch": 0.78,
"learning_rate": 2.3337587381659752e-07,
"loss": 0.653,
"step": 19210
},
{
"epoch": 0.78,
"learning_rate": 2.3296219659614802e-07,
"loss": 0.6588,
"step": 19215
},
{
"epoch": 0.78,
"learning_rate": 2.3254883799653124e-07,
"loss": 0.6292,
"step": 19220
},
{
"epoch": 0.78,
"learning_rate": 2.321357981894534e-07,
"loss": 0.6651,
"step": 19225
},
{
"epoch": 0.78,
"learning_rate": 2.3172307734648722e-07,
"loss": 0.6464,
"step": 19230
},
{
"epoch": 0.78,
"learning_rate": 2.3131067563907359e-07,
"loss": 0.642,
"step": 19235
},
{
"epoch": 0.78,
"learning_rate": 2.3089859323852002e-07,
"loss": 0.6499,
"step": 19240
},
{
"epoch": 0.78,
"learning_rate": 2.304868303160028e-07,
"loss": 0.6595,
"step": 19245
},
{
"epoch": 0.78,
"learning_rate": 2.3007538704256412e-07,
"loss": 0.627,
"step": 19250
},
{
"epoch": 0.78,
"learning_rate": 2.2966426358911384e-07,
"loss": 0.6531,
"step": 19255
},
{
"epoch": 0.78,
"learning_rate": 2.2925346012642964e-07,
"loss": 0.6453,
"step": 19260
},
{
"epoch": 0.78,
"learning_rate": 2.288429768251553e-07,
"loss": 0.6343,
"step": 19265
},
{
"epoch": 0.78,
"learning_rate": 2.2843281385580215e-07,
"loss": 0.6788,
"step": 19270
},
{
"epoch": 0.78,
"learning_rate": 2.2802297138874792e-07,
"loss": 0.7187,
"step": 19275
},
{
"epoch": 0.78,
"learning_rate": 2.276134495942381e-07,
"loss": 0.6708,
"step": 19280
},
{
"epoch": 0.78,
"learning_rate": 2.2720424864238452e-07,
"loss": 0.6434,
"step": 19285
},
{
"epoch": 0.78,
"learning_rate": 2.2679536870316506e-07,
"loss": 0.6389,
"step": 19290
},
{
"epoch": 0.78,
"learning_rate": 2.2638680994642567e-07,
"loss": 0.6507,
"step": 19295
},
{
"epoch": 0.78,
"learning_rate": 2.259785725418778e-07,
"loss": 0.6413,
"step": 19300
},
{
"epoch": 0.78,
"learning_rate": 2.2557065665909936e-07,
"loss": 0.6542,
"step": 19305
},
{
"epoch": 0.78,
"learning_rate": 2.251630624675357e-07,
"loss": 0.6694,
"step": 19310
},
{
"epoch": 0.78,
"learning_rate": 2.247557901364975e-07,
"loss": 0.6521,
"step": 19315
},
{
"epoch": 0.78,
"learning_rate": 2.2434883983516217e-07,
"loss": 0.6519,
"step": 19320
},
{
"epoch": 0.78,
"learning_rate": 2.239422117325732e-07,
"loss": 0.608,
"step": 19325
},
{
"epoch": 0.79,
"learning_rate": 2.2353590599764083e-07,
"loss": 0.653,
"step": 19330
},
{
"epoch": 0.79,
"learning_rate": 2.2312992279914067e-07,
"loss": 0.6787,
"step": 19335
},
{
"epoch": 0.79,
"learning_rate": 2.2272426230571428e-07,
"loss": 0.5948,
"step": 19340
},
{
"epoch": 0.79,
"learning_rate": 2.223189246858701e-07,
"loss": 0.6391,
"step": 19345
},
{
"epoch": 0.79,
"learning_rate": 2.2191391010798143e-07,
"loss": 0.6242,
"step": 19350
},
{
"epoch": 0.79,
"learning_rate": 2.2150921874028782e-07,
"loss": 0.6666,
"step": 19355
},
{
"epoch": 0.79,
"learning_rate": 2.211048507508949e-07,
"loss": 0.672,
"step": 19360
},
{
"epoch": 0.79,
"learning_rate": 2.2070080630777334e-07,
"loss": 0.6048,
"step": 19365
},
{
"epoch": 0.79,
"learning_rate": 2.202970855787597e-07,
"loss": 0.6577,
"step": 19370
},
{
"epoch": 0.79,
"learning_rate": 2.198936887315559e-07,
"loss": 0.6343,
"step": 19375
},
{
"epoch": 0.79,
"learning_rate": 2.1949061593372986e-07,
"loss": 0.661,
"step": 19380
},
{
"epoch": 0.79,
"learning_rate": 2.1908786735271434e-07,
"loss": 0.635,
"step": 19385
},
{
"epoch": 0.79,
"learning_rate": 2.1868544315580728e-07,
"loss": 0.661,
"step": 19390
},
{
"epoch": 0.79,
"learning_rate": 2.1828334351017286e-07,
"loss": 0.6658,
"step": 19395
},
{
"epoch": 0.79,
"learning_rate": 2.1788156858283923e-07,
"loss": 0.6291,
"step": 19400
},
{
"epoch": 0.79,
"learning_rate": 2.174801185407006e-07,
"loss": 0.6638,
"step": 19405
},
{
"epoch": 0.79,
"learning_rate": 2.1707899355051518e-07,
"loss": 0.6319,
"step": 19410
},
{
"epoch": 0.79,
"learning_rate": 2.166781937789075e-07,
"loss": 0.6331,
"step": 19415
},
{
"epoch": 0.79,
"learning_rate": 2.1627771939236606e-07,
"loss": 0.5873,
"step": 19420
},
{
"epoch": 0.79,
"learning_rate": 2.158775705572441e-07,
"loss": 0.6696,
"step": 19425
},
{
"epoch": 0.79,
"learning_rate": 2.1547774743976055e-07,
"loss": 0.6585,
"step": 19430
},
{
"epoch": 0.79,
"learning_rate": 2.1507825020599822e-07,
"loss": 0.6483,
"step": 19435
},
{
"epoch": 0.79,
"learning_rate": 2.146790790219044e-07,
"loss": 0.6506,
"step": 19440
},
{
"epoch": 0.79,
"learning_rate": 2.14280234053292e-07,
"loss": 0.6411,
"step": 19445
},
{
"epoch": 0.79,
"learning_rate": 2.1388171546583745e-07,
"loss": 0.7025,
"step": 19450
},
{
"epoch": 0.79,
"learning_rate": 2.1348352342508181e-07,
"loss": 0.6499,
"step": 19455
},
{
"epoch": 0.79,
"learning_rate": 2.1308565809643042e-07,
"loss": 0.6437,
"step": 19460
},
{
"epoch": 0.79,
"learning_rate": 2.1268811964515354e-07,
"loss": 0.6432,
"step": 19465
},
{
"epoch": 0.79,
"learning_rate": 2.1229090823638507e-07,
"loss": 0.6752,
"step": 19470
},
{
"epoch": 0.79,
"learning_rate": 2.1189402403512268e-07,
"loss": 0.6625,
"step": 19475
},
{
"epoch": 0.79,
"learning_rate": 2.1149746720622929e-07,
"loss": 0.6549,
"step": 19480
},
{
"epoch": 0.79,
"learning_rate": 2.111012379144309e-07,
"loss": 0.6326,
"step": 19485
},
{
"epoch": 0.79,
"learning_rate": 2.107053363243174e-07,
"loss": 0.683,
"step": 19490
},
{
"epoch": 0.79,
"learning_rate": 2.1030976260034338e-07,
"loss": 0.6442,
"step": 19495
},
{
"epoch": 0.79,
"learning_rate": 2.099145169068266e-07,
"loss": 0.647,
"step": 19500
},
{
"epoch": 0.79,
"learning_rate": 2.095195994079485e-07,
"loss": 0.6238,
"step": 19505
},
{
"epoch": 0.79,
"learning_rate": 2.091250102677542e-07,
"loss": 0.6403,
"step": 19510
},
{
"epoch": 0.79,
"learning_rate": 2.0873074965015335e-07,
"loss": 0.6408,
"step": 19515
},
{
"epoch": 0.79,
"learning_rate": 2.0833681771891743e-07,
"loss": 0.6941,
"step": 19520
},
{
"epoch": 0.79,
"learning_rate": 2.0794321463768273e-07,
"loss": 0.6483,
"step": 19525
},
{
"epoch": 0.79,
"learning_rate": 2.075499405699489e-07,
"loss": 0.6438,
"step": 19530
},
{
"epoch": 0.79,
"learning_rate": 2.071569956790782e-07,
"loss": 0.6856,
"step": 19535
},
{
"epoch": 0.79,
"learning_rate": 2.0676438012829668e-07,
"loss": 0.6519,
"step": 19540
},
{
"epoch": 0.79,
"learning_rate": 2.06372094080693e-07,
"loss": 0.6411,
"step": 19545
},
{
"epoch": 0.79,
"learning_rate": 2.0598013769922008e-07,
"loss": 0.6776,
"step": 19550
},
{
"epoch": 0.79,
"learning_rate": 2.0558851114669274e-07,
"loss": 0.6476,
"step": 19555
},
{
"epoch": 0.79,
"learning_rate": 2.051972145857891e-07,
"loss": 0.6632,
"step": 19560
},
{
"epoch": 0.79,
"learning_rate": 2.0480624817905113e-07,
"loss": 0.6584,
"step": 19565
},
{
"epoch": 0.79,
"learning_rate": 2.044156120888818e-07,
"loss": 0.613,
"step": 19570
},
{
"epoch": 0.8,
"learning_rate": 2.0402530647754844e-07,
"loss": 0.6221,
"step": 19575
},
{
"epoch": 0.8,
"learning_rate": 2.0363533150718093e-07,
"loss": 0.6513,
"step": 19580
},
{
"epoch": 0.8,
"learning_rate": 2.032456873397711e-07,
"loss": 0.641,
"step": 19585
},
{
"epoch": 0.8,
"learning_rate": 2.0285637413717395e-07,
"loss": 0.6591,
"step": 19590
},
{
"epoch": 0.8,
"learning_rate": 2.024673920611063e-07,
"loss": 0.674,
"step": 19595
},
{
"epoch": 0.8,
"learning_rate": 2.0207874127314862e-07,
"loss": 0.668,
"step": 19600
},
{
"epoch": 0.8,
"learning_rate": 2.0169042193474283e-07,
"loss": 0.6521,
"step": 19605
},
{
"epoch": 0.8,
"learning_rate": 2.0130243420719294e-07,
"loss": 0.6629,
"step": 19610
},
{
"epoch": 0.8,
"learning_rate": 2.0091477825166636e-07,
"loss": 0.6329,
"step": 19615
},
{
"epoch": 0.8,
"learning_rate": 2.0052745422919183e-07,
"loss": 0.6239,
"step": 19620
},
{
"epoch": 0.8,
"learning_rate": 2.0014046230065985e-07,
"loss": 0.6888,
"step": 19625
},
{
"epoch": 0.8,
"learning_rate": 1.9975380262682429e-07,
"loss": 0.6387,
"step": 19630
},
{
"epoch": 0.8,
"learning_rate": 1.993674753682998e-07,
"loss": 0.6091,
"step": 19635
},
{
"epoch": 0.8,
"learning_rate": 1.9898148068556332e-07,
"loss": 0.6518,
"step": 19640
},
{
"epoch": 0.8,
"learning_rate": 1.985958187389536e-07,
"loss": 0.6446,
"step": 19645
},
{
"epoch": 0.8,
"learning_rate": 1.982104896886716e-07,
"loss": 0.6669,
"step": 19650
},
{
"epoch": 0.8,
"learning_rate": 1.9782549369477952e-07,
"loss": 0.6802,
"step": 19655
},
{
"epoch": 0.8,
"learning_rate": 1.974408309172011e-07,
"loss": 0.6449,
"step": 19660
},
{
"epoch": 0.8,
"learning_rate": 1.970565015157223e-07,
"loss": 0.5942,
"step": 19665
},
{
"epoch": 0.8,
"learning_rate": 1.9667250564999006e-07,
"loss": 0.68,
"step": 19670
},
{
"epoch": 0.8,
"learning_rate": 1.962888434795129e-07,
"loss": 0.6198,
"step": 19675
},
{
"epoch": 0.8,
"learning_rate": 1.959055151636605e-07,
"loss": 0.6153,
"step": 19680
},
{
"epoch": 0.8,
"learning_rate": 1.9552252086166465e-07,
"loss": 0.6893,
"step": 19685
},
{
"epoch": 0.8,
"learning_rate": 1.9513986073261757e-07,
"loss": 0.6095,
"step": 19690
},
{
"epoch": 0.8,
"learning_rate": 1.9475753493547254e-07,
"loss": 0.6591,
"step": 19695
},
{
"epoch": 0.8,
"learning_rate": 1.943755436290454e-07,
"loss": 0.6112,
"step": 19700
},
{
"epoch": 0.8,
"learning_rate": 1.939938869720108e-07,
"loss": 0.653,
"step": 19705
},
{
"epoch": 0.8,
"learning_rate": 1.9361256512290624e-07,
"loss": 0.6472,
"step": 19710
},
{
"epoch": 0.8,
"learning_rate": 1.932315782401297e-07,
"loss": 0.6746,
"step": 19715
},
{
"epoch": 0.8,
"learning_rate": 1.9285092648193947e-07,
"loss": 0.6383,
"step": 19720
},
{
"epoch": 0.8,
"learning_rate": 1.9247061000645515e-07,
"loss": 0.6605,
"step": 19725
},
{
"epoch": 0.8,
"learning_rate": 1.920906289716565e-07,
"loss": 0.6241,
"step": 19730
},
{
"epoch": 0.8,
"learning_rate": 1.9171098353538494e-07,
"loss": 0.6597,
"step": 19735
},
{
"epoch": 0.8,
"learning_rate": 1.9133167385534167e-07,
"loss": 0.6183,
"step": 19740
},
{
"epoch": 0.8,
"learning_rate": 1.9095270008908815e-07,
"loss": 0.6427,
"step": 19745
},
{
"epoch": 0.8,
"learning_rate": 1.9057406239404784e-07,
"loss": 0.645,
"step": 19750
},
{
"epoch": 0.8,
"learning_rate": 1.9019576092750234e-07,
"loss": 0.6908,
"step": 19755
},
{
"epoch": 0.8,
"learning_rate": 1.898177958465953e-07,
"loss": 0.5862,
"step": 19760
},
{
"epoch": 0.8,
"learning_rate": 1.8944016730833045e-07,
"loss": 0.6414,
"step": 19765
},
{
"epoch": 0.8,
"learning_rate": 1.8906287546957122e-07,
"loss": 0.6513,
"step": 19770
},
{
"epoch": 0.8,
"learning_rate": 1.8868592048704125e-07,
"loss": 0.6557,
"step": 19775
},
{
"epoch": 0.8,
"learning_rate": 1.8830930251732403e-07,
"loss": 0.6538,
"step": 19780
},
{
"epoch": 0.8,
"learning_rate": 1.8793302171686398e-07,
"loss": 0.6498,
"step": 19785
},
{
"epoch": 0.8,
"learning_rate": 1.8755707824196476e-07,
"loss": 0.6455,
"step": 19790
},
{
"epoch": 0.8,
"learning_rate": 1.8718147224878954e-07,
"loss": 0.6459,
"step": 19795
},
{
"epoch": 0.8,
"learning_rate": 1.8680620389336267e-07,
"loss": 0.626,
"step": 19800
},
{
"epoch": 0.8,
"learning_rate": 1.8643127333156628e-07,
"loss": 0.6689,
"step": 19805
},
{
"epoch": 0.8,
"learning_rate": 1.8605668071914404e-07,
"loss": 0.6425,
"step": 19810
},
{
"epoch": 0.8,
"learning_rate": 1.8568242621169806e-07,
"loss": 0.6661,
"step": 19815
},
{
"epoch": 0.81,
"learning_rate": 1.8530850996469083e-07,
"loss": 0.6463,
"step": 19820
},
{
"epoch": 0.81,
"learning_rate": 1.8493493213344358e-07,
"loss": 0.6842,
"step": 19825
},
{
"epoch": 0.81,
"learning_rate": 1.8456169287313716e-07,
"loss": 0.6676,
"step": 19830
},
{
"epoch": 0.81,
"learning_rate": 1.8418879233881267e-07,
"loss": 0.6673,
"step": 19835
},
{
"epoch": 0.81,
"learning_rate": 1.8381623068536866e-07,
"loss": 0.6137,
"step": 19840
},
{
"epoch": 0.81,
"learning_rate": 1.8344400806756455e-07,
"loss": 0.6686,
"step": 19845
},
{
"epoch": 0.81,
"learning_rate": 1.8307212464001888e-07,
"loss": 0.6486,
"step": 19850
},
{
"epoch": 0.81,
"learning_rate": 1.827005805572077e-07,
"loss": 0.6125,
"step": 19855
},
{
"epoch": 0.81,
"learning_rate": 1.823293759734681e-07,
"loss": 0.6306,
"step": 19860
},
{
"epoch": 0.81,
"learning_rate": 1.8195851104299465e-07,
"loss": 0.6946,
"step": 19865
},
{
"epoch": 0.81,
"learning_rate": 1.8158798591984194e-07,
"loss": 0.6081,
"step": 19870
},
{
"epoch": 0.81,
"learning_rate": 1.8121780075792258e-07,
"loss": 0.6554,
"step": 19875
},
{
"epoch": 0.81,
"learning_rate": 1.8084795571100809e-07,
"loss": 0.6768,
"step": 19880
},
{
"epoch": 0.81,
"learning_rate": 1.8047845093272963e-07,
"loss": 0.6378,
"step": 19885
},
{
"epoch": 0.81,
"learning_rate": 1.8010928657657521e-07,
"loss": 0.6416,
"step": 19890
},
{
"epoch": 0.81,
"learning_rate": 1.7974046279589304e-07,
"loss": 0.6047,
"step": 19895
},
{
"epoch": 0.81,
"learning_rate": 1.793719797438895e-07,
"loss": 0.6747,
"step": 19900
},
{
"epoch": 0.81,
"learning_rate": 1.7900383757362913e-07,
"loss": 0.6308,
"step": 19905
},
{
"epoch": 0.81,
"learning_rate": 1.7863603643803481e-07,
"loss": 0.6584,
"step": 19910
},
{
"epoch": 0.81,
"learning_rate": 1.782685764898878e-07,
"loss": 0.6723,
"step": 19915
},
{
"epoch": 0.81,
"learning_rate": 1.779014578818283e-07,
"loss": 0.6617,
"step": 19920
},
{
"epoch": 0.81,
"learning_rate": 1.775346807663538e-07,
"loss": 0.6726,
"step": 19925
},
{
"epoch": 0.81,
"learning_rate": 1.771682452958202e-07,
"loss": 0.6528,
"step": 19930
},
{
"epoch": 0.81,
"learning_rate": 1.7680215162244228e-07,
"loss": 0.6322,
"step": 19935
},
{
"epoch": 0.81,
"learning_rate": 1.7643639989829128e-07,
"loss": 0.6696,
"step": 19940
},
{
"epoch": 0.81,
"learning_rate": 1.7607099027529792e-07,
"loss": 0.6936,
"step": 19945
},
{
"epoch": 0.81,
"learning_rate": 1.7570592290524966e-07,
"loss": 0.6281,
"step": 19950
},
{
"epoch": 0.81,
"learning_rate": 1.7534119793979286e-07,
"loss": 0.6463,
"step": 19955
},
{
"epoch": 0.81,
"learning_rate": 1.7497681553043086e-07,
"loss": 0.6862,
"step": 19960
},
{
"epoch": 0.81,
"learning_rate": 1.7461277582852473e-07,
"loss": 0.643,
"step": 19965
},
{
"epoch": 0.81,
"learning_rate": 1.7424907898529406e-07,
"loss": 0.6482,
"step": 19970
},
{
"epoch": 0.81,
"learning_rate": 1.7388572515181444e-07,
"loss": 0.6563,
"step": 19975
},
{
"epoch": 0.81,
"learning_rate": 1.7352271447902033e-07,
"loss": 0.6614,
"step": 19980
},
{
"epoch": 0.81,
"learning_rate": 1.731600471177037e-07,
"loss": 0.6491,
"step": 19985
},
{
"epoch": 0.81,
"learning_rate": 1.727977232185125e-07,
"loss": 0.6552,
"step": 19990
},
{
"epoch": 0.81,
"learning_rate": 1.7243574293195363e-07,
"loss": 0.6329,
"step": 19995
},
{
"epoch": 0.81,
"learning_rate": 1.7207410640838992e-07,
"loss": 0.6362,
"step": 20000
},
{
"epoch": 0.81,
"eval_loss": 0.6180657744407654,
"eval_runtime": 139.6405,
"eval_samples_per_second": 16.944,
"eval_steps_per_second": 2.829,
"step": 20000
},
{
"epoch": 0.81,
"learning_rate": 1.7171281379804282e-07,
"loss": 0.6774,
"step": 20005
},
{
"epoch": 0.81,
"learning_rate": 1.7135186525098965e-07,
"loss": 0.6437,
"step": 20010
},
{
"epoch": 0.81,
"learning_rate": 1.709912609171651e-07,
"loss": 0.6578,
"step": 20015
},
{
"epoch": 0.81,
"learning_rate": 1.7063100094636195e-07,
"loss": 0.661,
"step": 20020
},
{
"epoch": 0.81,
"learning_rate": 1.7027108548822788e-07,
"loss": 0.6436,
"step": 20025
},
{
"epoch": 0.81,
"learning_rate": 1.6991151469226928e-07,
"loss": 0.7003,
"step": 20030
},
{
"epoch": 0.81,
"learning_rate": 1.695522887078491e-07,
"loss": 0.6573,
"step": 20035
},
{
"epoch": 0.81,
"learning_rate": 1.6919340768418577e-07,
"loss": 0.6348,
"step": 20040
},
{
"epoch": 0.81,
"learning_rate": 1.6883487177035616e-07,
"loss": 0.6514,
"step": 20045
},
{
"epoch": 0.81,
"learning_rate": 1.6847668111529234e-07,
"loss": 0.6425,
"step": 20050
},
{
"epoch": 0.81,
"learning_rate": 1.681188358677842e-07,
"loss": 0.6413,
"step": 20055
},
{
"epoch": 0.81,
"learning_rate": 1.6776133617647724e-07,
"loss": 0.6331,
"step": 20060
},
{
"epoch": 0.82,
"learning_rate": 1.674041821898735e-07,
"loss": 0.6977,
"step": 20065
},
{
"epoch": 0.82,
"learning_rate": 1.670473740563323e-07,
"loss": 0.6412,
"step": 20070
},
{
"epoch": 0.82,
"learning_rate": 1.666909119240678e-07,
"loss": 0.6476,
"step": 20075
},
{
"epoch": 0.82,
"learning_rate": 1.6633479594115184e-07,
"loss": 0.6352,
"step": 20080
},
{
"epoch": 0.82,
"learning_rate": 1.6597902625551185e-07,
"loss": 0.7082,
"step": 20085
},
{
"epoch": 0.82,
"learning_rate": 1.6562360301493106e-07,
"loss": 0.6911,
"step": 20090
},
{
"epoch": 0.82,
"learning_rate": 1.6526852636704968e-07,
"loss": 0.6373,
"step": 20095
},
{
"epoch": 0.82,
"learning_rate": 1.6491379645936298e-07,
"loss": 0.6378,
"step": 20100
},
{
"epoch": 0.82,
"learning_rate": 1.6455941343922354e-07,
"loss": 0.6746,
"step": 20105
},
{
"epoch": 0.82,
"learning_rate": 1.642053774538379e-07,
"loss": 0.6449,
"step": 20110
},
{
"epoch": 0.82,
"learning_rate": 1.6385168865027012e-07,
"loss": 0.6542,
"step": 20115
},
{
"epoch": 0.82,
"learning_rate": 1.6349834717543975e-07,
"loss": 0.637,
"step": 20120
},
{
"epoch": 0.82,
"learning_rate": 1.63145353176121e-07,
"loss": 0.6517,
"step": 20125
},
{
"epoch": 0.82,
"learning_rate": 1.6279270679894507e-07,
"loss": 0.6613,
"step": 20130
},
{
"epoch": 0.82,
"learning_rate": 1.6244040819039772e-07,
"loss": 0.6674,
"step": 20135
},
{
"epoch": 0.82,
"learning_rate": 1.6208845749682144e-07,
"loss": 0.641,
"step": 20140
},
{
"epoch": 0.82,
"learning_rate": 1.617368548644129e-07,
"loss": 0.6318,
"step": 20145
},
{
"epoch": 0.82,
"learning_rate": 1.6138560043922488e-07,
"loss": 0.6804,
"step": 20150
},
{
"epoch": 0.82,
"learning_rate": 1.6103469436716587e-07,
"loss": 0.6316,
"step": 20155
},
{
"epoch": 0.82,
"learning_rate": 1.606841367939984e-07,
"loss": 0.6841,
"step": 20160
},
{
"epoch": 0.82,
"learning_rate": 1.603339278653414e-07,
"loss": 0.6406,
"step": 20165
},
{
"epoch": 0.82,
"learning_rate": 1.5998406772666916e-07,
"loss": 0.6244,
"step": 20170
},
{
"epoch": 0.82,
"learning_rate": 1.596345565233096e-07,
"loss": 0.6496,
"step": 20175
},
{
"epoch": 0.82,
"learning_rate": 1.592853944004473e-07,
"loss": 0.6582,
"step": 20180
},
{
"epoch": 0.82,
"learning_rate": 1.5893658150312071e-07,
"loss": 0.6606,
"step": 20185
},
{
"epoch": 0.82,
"learning_rate": 1.5858811797622418e-07,
"loss": 0.6386,
"step": 20190
},
{
"epoch": 0.82,
"learning_rate": 1.582400039645062e-07,
"loss": 0.6196,
"step": 20195
},
{
"epoch": 0.82,
"learning_rate": 1.5789223961257003e-07,
"loss": 0.6316,
"step": 20200
},
{
"epoch": 0.82,
"learning_rate": 1.5754482506487465e-07,
"loss": 0.6206,
"step": 20205
},
{
"epoch": 0.82,
"learning_rate": 1.5719776046573207e-07,
"loss": 0.6245,
"step": 20210
},
{
"epoch": 0.82,
"learning_rate": 1.5685104595931054e-07,
"loss": 0.663,
"step": 20215
},
{
"epoch": 0.82,
"learning_rate": 1.5650468168963249e-07,
"loss": 0.6784,
"step": 20220
},
{
"epoch": 0.82,
"learning_rate": 1.5615866780057385e-07,
"loss": 0.5968,
"step": 20225
},
{
"epoch": 0.82,
"learning_rate": 1.5581300443586643e-07,
"loss": 0.6721,
"step": 20230
},
{
"epoch": 0.82,
"learning_rate": 1.5546769173909534e-07,
"loss": 0.659,
"step": 20235
},
{
"epoch": 0.82,
"learning_rate": 1.551227298537011e-07,
"loss": 0.6432,
"step": 20240
},
{
"epoch": 0.82,
"learning_rate": 1.547781189229771e-07,
"loss": 0.6608,
"step": 20245
},
{
"epoch": 0.82,
"learning_rate": 1.54433859090072e-07,
"loss": 0.665,
"step": 20250
},
{
"epoch": 0.82,
"learning_rate": 1.5408995049798888e-07,
"loss": 0.6538,
"step": 20255
},
{
"epoch": 0.82,
"learning_rate": 1.537463932895836e-07,
"loss": 0.6964,
"step": 20260
},
{
"epoch": 0.82,
"learning_rate": 1.5340318760756731e-07,
"loss": 0.642,
"step": 20265
},
{
"epoch": 0.82,
"learning_rate": 1.5306033359450454e-07,
"loss": 0.6475,
"step": 20270
},
{
"epoch": 0.82,
"learning_rate": 1.5271783139281357e-07,
"loss": 0.6927,
"step": 20275
},
{
"epoch": 0.82,
"learning_rate": 1.523756811447674e-07,
"loss": 0.6862,
"step": 20280
},
{
"epoch": 0.82,
"learning_rate": 1.5203388299249176e-07,
"loss": 0.6319,
"step": 20285
},
{
"epoch": 0.82,
"learning_rate": 1.516924370779673e-07,
"loss": 0.649,
"step": 20290
},
{
"epoch": 0.82,
"learning_rate": 1.513513435430267e-07,
"loss": 0.6765,
"step": 20295
},
{
"epoch": 0.82,
"learning_rate": 1.5101060252935783e-07,
"loss": 0.6901,
"step": 20300
},
{
"epoch": 0.82,
"learning_rate": 1.50670214178502e-07,
"loss": 0.6132,
"step": 20305
},
{
"epoch": 0.82,
"learning_rate": 1.503301786318526e-07,
"loss": 0.6503,
"step": 20310
},
{
"epoch": 0.83,
"learning_rate": 1.4999049603065805e-07,
"loss": 0.6773,
"step": 20315
},
{
"epoch": 0.83,
"learning_rate": 1.496511665160195e-07,
"loss": 0.6379,
"step": 20320
},
{
"epoch": 0.83,
"learning_rate": 1.4931219022889107e-07,
"loss": 0.6177,
"step": 20325
},
{
"epoch": 0.83,
"learning_rate": 1.4897356731008125e-07,
"loss": 0.6606,
"step": 20330
},
{
"epoch": 0.83,
"learning_rate": 1.486352979002503e-07,
"loss": 0.6543,
"step": 20335
},
{
"epoch": 0.83,
"learning_rate": 1.4829738213991328e-07,
"loss": 0.6527,
"step": 20340
},
{
"epoch": 0.83,
"learning_rate": 1.4795982016943654e-07,
"loss": 0.6489,
"step": 20345
},
{
"epoch": 0.83,
"learning_rate": 1.476226121290408e-07,
"loss": 0.6398,
"step": 20350
},
{
"epoch": 0.83,
"learning_rate": 1.4728575815879973e-07,
"loss": 0.6249,
"step": 20355
},
{
"epoch": 0.83,
"learning_rate": 1.469492583986387e-07,
"loss": 0.6764,
"step": 20360
},
{
"epoch": 0.83,
"learning_rate": 1.4661311298833755e-07,
"loss": 0.6486,
"step": 20365
},
{
"epoch": 0.83,
"learning_rate": 1.4627732206752786e-07,
"loss": 0.6423,
"step": 20370
},
{
"epoch": 0.83,
"learning_rate": 1.4594188577569412e-07,
"loss": 0.6952,
"step": 20375
},
{
"epoch": 0.83,
"learning_rate": 1.4560680425217364e-07,
"loss": 0.684,
"step": 20380
},
{
"epoch": 0.83,
"learning_rate": 1.4527207763615647e-07,
"loss": 0.6607,
"step": 20385
},
{
"epoch": 0.83,
"learning_rate": 1.4493770606668565e-07,
"loss": 0.6086,
"step": 20390
},
{
"epoch": 0.83,
"learning_rate": 1.4460368968265524e-07,
"loss": 0.625,
"step": 20395
},
{
"epoch": 0.83,
"learning_rate": 1.4427002862281356e-07,
"loss": 0.6509,
"step": 20400
},
{
"epoch": 0.83,
"learning_rate": 1.439367230257602e-07,
"loss": 0.6514,
"step": 20405
},
{
"epoch": 0.83,
"learning_rate": 1.4360377302994708e-07,
"loss": 0.6664,
"step": 20410
},
{
"epoch": 0.83,
"learning_rate": 1.4327117877367933e-07,
"loss": 0.6719,
"step": 20415
},
{
"epoch": 0.83,
"learning_rate": 1.4293894039511324e-07,
"loss": 0.6728,
"step": 20420
},
{
"epoch": 0.83,
"learning_rate": 1.4260705803225838e-07,
"loss": 0.617,
"step": 20425
},
{
"epoch": 0.83,
"learning_rate": 1.4227553182297492e-07,
"loss": 0.6401,
"step": 20430
},
{
"epoch": 0.83,
"learning_rate": 1.4194436190497638e-07,
"loss": 0.6788,
"step": 20435
},
{
"epoch": 0.83,
"learning_rate": 1.416135484158284e-07,
"loss": 0.6974,
"step": 20440
},
{
"epoch": 0.83,
"learning_rate": 1.4128309149294694e-07,
"loss": 0.6263,
"step": 20445
},
{
"epoch": 0.83,
"learning_rate": 1.4095299127360183e-07,
"loss": 0.6399,
"step": 20450
},
{
"epoch": 0.83,
"learning_rate": 1.4062324789491352e-07,
"loss": 0.6801,
"step": 20455
},
{
"epoch": 0.83,
"learning_rate": 1.4029386149385425e-07,
"loss": 0.6187,
"step": 20460
},
{
"epoch": 0.83,
"learning_rate": 1.3996483220724876e-07,
"loss": 0.63,
"step": 20465
},
{
"epoch": 0.83,
"learning_rate": 1.396361601717726e-07,
"loss": 0.6627,
"step": 20470
},
{
"epoch": 0.83,
"learning_rate": 1.3930784552395381e-07,
"loss": 0.6438,
"step": 20475
},
{
"epoch": 0.83,
"learning_rate": 1.389798884001706e-07,
"loss": 0.6468,
"step": 20480
},
{
"epoch": 0.83,
"learning_rate": 1.3865228893665393e-07,
"loss": 0.588,
"step": 20485
},
{
"epoch": 0.83,
"learning_rate": 1.3832504726948623e-07,
"loss": 0.6959,
"step": 20490
},
{
"epoch": 0.83,
"learning_rate": 1.3799816353460003e-07,
"loss": 0.658,
"step": 20495
},
{
"epoch": 0.83,
"learning_rate": 1.3767163786778046e-07,
"loss": 0.6279,
"step": 20500
},
{
"epoch": 0.83,
"learning_rate": 1.3734547040466348e-07,
"loss": 0.6533,
"step": 20505
},
{
"epoch": 0.83,
"learning_rate": 1.3701966128073605e-07,
"loss": 0.6677,
"step": 20510
},
{
"epoch": 0.83,
"learning_rate": 1.3669421063133623e-07,
"loss": 0.6677,
"step": 20515
},
{
"epoch": 0.83,
"learning_rate": 1.3636911859165357e-07,
"loss": 0.6375,
"step": 20520
},
{
"epoch": 0.83,
"learning_rate": 1.3604438529672913e-07,
"loss": 0.6312,
"step": 20525
},
{
"epoch": 0.83,
"learning_rate": 1.3572001088145312e-07,
"loss": 0.6548,
"step": 20530
},
{
"epoch": 0.83,
"learning_rate": 1.3539599548056879e-07,
"loss": 0.6392,
"step": 20535
},
{
"epoch": 0.83,
"learning_rate": 1.350723392286689e-07,
"loss": 0.6157,
"step": 20540
},
{
"epoch": 0.83,
"learning_rate": 1.3474904226019736e-07,
"loss": 0.6478,
"step": 20545
},
{
"epoch": 0.83,
"learning_rate": 1.3442610470944925e-07,
"loss": 0.6769,
"step": 20550
},
{
"epoch": 0.83,
"learning_rate": 1.341035267105699e-07,
"loss": 0.6771,
"step": 20555
},
{
"epoch": 0.84,
"learning_rate": 1.3378130839755532e-07,
"loss": 0.6579,
"step": 20560
},
{
"epoch": 0.84,
"learning_rate": 1.3345944990425195e-07,
"loss": 0.6595,
"step": 20565
},
{
"epoch": 0.84,
"learning_rate": 1.3313795136435736e-07,
"loss": 0.6163,
"step": 20570
},
{
"epoch": 0.84,
"learning_rate": 1.3281681291141955e-07,
"loss": 0.6473,
"step": 20575
},
{
"epoch": 0.84,
"learning_rate": 1.3249603467883586e-07,
"loss": 0.6347,
"step": 20580
},
{
"epoch": 0.84,
"learning_rate": 1.3217561679985545e-07,
"loss": 0.6629,
"step": 20585
},
{
"epoch": 0.84,
"learning_rate": 1.3185555940757674e-07,
"loss": 0.6555,
"step": 20590
},
{
"epoch": 0.84,
"learning_rate": 1.3153586263494876e-07,
"loss": 0.6274,
"step": 20595
},
{
"epoch": 0.84,
"learning_rate": 1.3121652661477112e-07,
"loss": 0.6048,
"step": 20600
},
{
"epoch": 0.84,
"learning_rate": 1.3089755147969294e-07,
"loss": 0.6473,
"step": 20605
},
{
"epoch": 0.84,
"learning_rate": 1.3057893736221392e-07,
"loss": 0.6855,
"step": 20610
},
{
"epoch": 0.84,
"learning_rate": 1.3026068439468318e-07,
"loss": 0.6465,
"step": 20615
},
{
"epoch": 0.84,
"learning_rate": 1.2994279270930052e-07,
"loss": 0.6463,
"step": 20620
},
{
"epoch": 0.84,
"learning_rate": 1.2962526243811577e-07,
"loss": 0.6503,
"step": 20625
},
{
"epoch": 0.84,
"learning_rate": 1.2930809371302741e-07,
"loss": 0.5882,
"step": 20630
},
{
"epoch": 0.84,
"learning_rate": 1.289912866657854e-07,
"loss": 0.6766,
"step": 20635
},
{
"epoch": 0.84,
"learning_rate": 1.2867484142798813e-07,
"loss": 0.6454,
"step": 20640
},
{
"epoch": 0.84,
"learning_rate": 1.283587581310841e-07,
"loss": 0.6341,
"step": 20645
},
{
"epoch": 0.84,
"learning_rate": 1.2804303690637197e-07,
"loss": 0.6512,
"step": 20650
},
{
"epoch": 0.84,
"learning_rate": 1.2772767788499917e-07,
"loss": 0.6695,
"step": 20655
},
{
"epoch": 0.84,
"learning_rate": 1.274126811979639e-07,
"loss": 0.6492,
"step": 20660
},
{
"epoch": 0.84,
"learning_rate": 1.2709804697611193e-07,
"loss": 0.6555,
"step": 20665
},
{
"epoch": 0.84,
"learning_rate": 1.267837753501403e-07,
"loss": 0.616,
"step": 20670
},
{
"epoch": 0.84,
"learning_rate": 1.2646986645059454e-07,
"loss": 0.6314,
"step": 20675
},
{
"epoch": 0.84,
"learning_rate": 1.261563204078695e-07,
"loss": 0.6754,
"step": 20680
},
{
"epoch": 0.84,
"learning_rate": 1.2584313735220987e-07,
"loss": 0.6445,
"step": 20685
},
{
"epoch": 0.84,
"learning_rate": 1.255303174137089e-07,
"loss": 0.6318,
"step": 20690
},
{
"epoch": 0.84,
"learning_rate": 1.2521786072230933e-07,
"loss": 0.6744,
"step": 20695
},
{
"epoch": 0.84,
"learning_rate": 1.249057674078028e-07,
"loss": 0.6755,
"step": 20700
},
{
"epoch": 0.84,
"learning_rate": 1.2459403759983023e-07,
"loss": 0.6621,
"step": 20705
},
{
"epoch": 0.84,
"learning_rate": 1.2428267142788195e-07,
"loss": 0.5975,
"step": 20710
},
{
"epoch": 0.84,
"learning_rate": 1.2397166902129595e-07,
"loss": 0.6464,
"step": 20715
},
{
"epoch": 0.84,
"learning_rate": 1.2366103050926057e-07,
"loss": 0.6505,
"step": 20720
},
{
"epoch": 0.84,
"learning_rate": 1.2335075602081202e-07,
"loss": 0.6541,
"step": 20725
},
{
"epoch": 0.84,
"learning_rate": 1.2304084568483552e-07,
"loss": 0.6424,
"step": 20730
},
{
"epoch": 0.84,
"learning_rate": 1.2273129963006558e-07,
"loss": 0.6359,
"step": 20735
},
{
"epoch": 0.84,
"learning_rate": 1.224221179850846e-07,
"loss": 0.6211,
"step": 20740
},
{
"epoch": 0.84,
"learning_rate": 1.2211330087832404e-07,
"loss": 0.6651,
"step": 20745
},
{
"epoch": 0.84,
"learning_rate": 1.218048484380636e-07,
"loss": 0.6969,
"step": 20750
},
{
"epoch": 0.84,
"learning_rate": 1.2149676079243198e-07,
"loss": 0.6476,
"step": 20755
},
{
"epoch": 0.84,
"learning_rate": 1.211890380694065e-07,
"loss": 0.6507,
"step": 20760
},
{
"epoch": 0.84,
"learning_rate": 1.2088168039681168e-07,
"loss": 0.6329,
"step": 20765
},
{
"epoch": 0.84,
"learning_rate": 1.2057468790232195e-07,
"loss": 0.6363,
"step": 20770
},
{
"epoch": 0.84,
"learning_rate": 1.2026806071345885e-07,
"loss": 0.6098,
"step": 20775
},
{
"epoch": 0.84,
"learning_rate": 1.1996179895759262e-07,
"loss": 0.5963,
"step": 20780
},
{
"epoch": 0.84,
"learning_rate": 1.1965590276194215e-07,
"loss": 0.6654,
"step": 20785
},
{
"epoch": 0.84,
"learning_rate": 1.1935037225357392e-07,
"loss": 0.6525,
"step": 20790
},
{
"epoch": 0.84,
"learning_rate": 1.190452075594024e-07,
"loss": 0.6529,
"step": 20795
},
{
"epoch": 0.84,
"learning_rate": 1.1874040880619041e-07,
"loss": 0.6519,
"step": 20800
},
{
"epoch": 0.84,
"eval_loss": 0.6171961426734924,
"eval_runtime": 139.723,
"eval_samples_per_second": 16.934,
"eval_steps_per_second": 2.827,
"step": 20800
},
{
"epoch": 0.85,
"learning_rate": 1.184359761205489e-07,
"loss": 0.6229,
"step": 20805
},
{
"epoch": 0.85,
"learning_rate": 1.181319096289366e-07,
"loss": 0.6511,
"step": 20810
},
{
"epoch": 0.85,
"learning_rate": 1.1782820945765958e-07,
"loss": 0.6513,
"step": 20815
},
{
"epoch": 0.85,
"learning_rate": 1.1752487573287296e-07,
"loss": 0.6551,
"step": 20820
},
{
"epoch": 0.85,
"learning_rate": 1.1722190858057846e-07,
"loss": 0.6649,
"step": 20825
},
{
"epoch": 0.85,
"learning_rate": 1.169193081266262e-07,
"loss": 0.631,
"step": 20830
},
{
"epoch": 0.85,
"learning_rate": 1.1661707449671343e-07,
"loss": 0.6674,
"step": 20835
},
{
"epoch": 0.85,
"learning_rate": 1.1631520781638582e-07,
"loss": 0.649,
"step": 20840
},
{
"epoch": 0.85,
"learning_rate": 1.1601370821103607e-07,
"loss": 0.5991,
"step": 20845
},
{
"epoch": 0.85,
"learning_rate": 1.1571257580590421e-07,
"loss": 0.6397,
"step": 20850
},
{
"epoch": 0.85,
"learning_rate": 1.1541181072607831e-07,
"loss": 0.6698,
"step": 20855
},
{
"epoch": 0.85,
"learning_rate": 1.1511141309649364e-07,
"loss": 0.6566,
"step": 20860
},
{
"epoch": 0.85,
"learning_rate": 1.1481138304193228e-07,
"loss": 0.6261,
"step": 20865
},
{
"epoch": 0.85,
"learning_rate": 1.1451172068702464e-07,
"loss": 0.6449,
"step": 20870
},
{
"epoch": 0.85,
"learning_rate": 1.1421242615624771e-07,
"loss": 0.6521,
"step": 20875
},
{
"epoch": 0.85,
"learning_rate": 1.1391349957392571e-07,
"loss": 0.6329,
"step": 20880
},
{
"epoch": 0.85,
"learning_rate": 1.1361494106423008e-07,
"loss": 0.6462,
"step": 20885
},
{
"epoch": 0.85,
"learning_rate": 1.1331675075117963e-07,
"loss": 0.6567,
"step": 20890
},
{
"epoch": 0.85,
"learning_rate": 1.1301892875864005e-07,
"loss": 0.6515,
"step": 20895
},
{
"epoch": 0.85,
"learning_rate": 1.127214752103236e-07,
"loss": 0.6775,
"step": 20900
},
{
"epoch": 0.85,
"learning_rate": 1.1242439022979055e-07,
"loss": 0.6874,
"step": 20905
},
{
"epoch": 0.85,
"learning_rate": 1.1212767394044697e-07,
"loss": 0.6659,
"step": 20910
},
{
"epoch": 0.85,
"learning_rate": 1.1183132646554605e-07,
"loss": 0.6481,
"step": 20915
},
{
"epoch": 0.85,
"learning_rate": 1.1153534792818852e-07,
"loss": 0.6514,
"step": 20920
},
{
"epoch": 0.85,
"learning_rate": 1.1123973845132095e-07,
"loss": 0.6444,
"step": 20925
},
{
"epoch": 0.85,
"learning_rate": 1.1094449815773699e-07,
"loss": 0.6424,
"step": 20930
},
{
"epoch": 0.85,
"learning_rate": 1.1064962717007675e-07,
"loss": 0.611,
"step": 20935
},
{
"epoch": 0.85,
"learning_rate": 1.1035512561082738e-07,
"loss": 0.6177,
"step": 20940
},
{
"epoch": 0.85,
"learning_rate": 1.1006099360232212e-07,
"loss": 0.6323,
"step": 20945
},
{
"epoch": 0.85,
"learning_rate": 1.0976723126674059e-07,
"loss": 0.6681,
"step": 20950
},
{
"epoch": 0.85,
"learning_rate": 1.094738387261096e-07,
"loss": 0.6665,
"step": 20955
},
{
"epoch": 0.85,
"learning_rate": 1.0918081610230157e-07,
"loss": 0.6414,
"step": 20960
},
{
"epoch": 0.85,
"learning_rate": 1.0888816351703555e-07,
"loss": 0.6165,
"step": 20965
},
{
"epoch": 0.85,
"learning_rate": 1.0859588109187678e-07,
"loss": 0.6792,
"step": 20970
},
{
"epoch": 0.85,
"learning_rate": 1.0830396894823712e-07,
"loss": 0.6507,
"step": 20975
},
{
"epoch": 0.85,
"learning_rate": 1.0801242720737425e-07,
"loss": 0.6051,
"step": 20980
},
{
"epoch": 0.85,
"learning_rate": 1.0772125599039183e-07,
"loss": 0.6421,
"step": 20985
},
{
"epoch": 0.85,
"learning_rate": 1.0743045541824015e-07,
"loss": 0.6227,
"step": 20990
},
{
"epoch": 0.85,
"learning_rate": 1.0714002561171521e-07,
"loss": 0.6296,
"step": 20995
},
{
"epoch": 0.85,
"learning_rate": 1.0684996669145874e-07,
"loss": 0.6638,
"step": 21000
},
{
"epoch": 0.85,
"learning_rate": 1.0656027877795904e-07,
"loss": 0.643,
"step": 21005
},
{
"epoch": 0.85,
"learning_rate": 1.0627096199154983e-07,
"loss": 0.6595,
"step": 21010
},
{
"epoch": 0.85,
"learning_rate": 1.0598201645241079e-07,
"loss": 0.6595,
"step": 21015
},
{
"epoch": 0.85,
"learning_rate": 1.0569344228056708e-07,
"loss": 0.6446,
"step": 21020
},
{
"epoch": 0.85,
"learning_rate": 1.0540523959589042e-07,
"loss": 0.6615,
"step": 21025
},
{
"epoch": 0.85,
"learning_rate": 1.0511740851809747e-07,
"loss": 0.6832,
"step": 21030
},
{
"epoch": 0.85,
"learning_rate": 1.0482994916675047e-07,
"loss": 0.6588,
"step": 21035
},
{
"epoch": 0.85,
"learning_rate": 1.0454286166125814e-07,
"loss": 0.6532,
"step": 21040
},
{
"epoch": 0.85,
"learning_rate": 1.0425614612087363e-07,
"loss": 0.6882,
"step": 21045
},
{
"epoch": 0.86,
"learning_rate": 1.0396980266469623e-07,
"loss": 0.6266,
"step": 21050
},
{
"epoch": 0.86,
"learning_rate": 1.0368383141167059e-07,
"loss": 0.6535,
"step": 21055
},
{
"epoch": 0.86,
"learning_rate": 1.0339823248058677e-07,
"loss": 0.6547,
"step": 21060
},
{
"epoch": 0.86,
"learning_rate": 1.0311300599007988e-07,
"loss": 0.6387,
"step": 21065
},
{
"epoch": 0.86,
"learning_rate": 1.0282815205863038e-07,
"loss": 0.6395,
"step": 21070
},
{
"epoch": 0.86,
"learning_rate": 1.0254367080456449e-07,
"loss": 0.6433,
"step": 21075
},
{
"epoch": 0.86,
"learning_rate": 1.0225956234605316e-07,
"loss": 0.6727,
"step": 21080
},
{
"epoch": 0.86,
"learning_rate": 1.0197582680111228e-07,
"loss": 0.6174,
"step": 21085
},
{
"epoch": 0.86,
"learning_rate": 1.0169246428760359e-07,
"loss": 0.6452,
"step": 21090
},
{
"epoch": 0.86,
"learning_rate": 1.0140947492323315e-07,
"loss": 0.628,
"step": 21095
},
{
"epoch": 0.86,
"learning_rate": 1.0112685882555228e-07,
"loss": 0.6451,
"step": 21100
},
{
"epoch": 0.86,
"learning_rate": 1.0084461611195705e-07,
"loss": 0.6135,
"step": 21105
},
{
"epoch": 0.86,
"learning_rate": 1.0056274689968902e-07,
"loss": 0.6015,
"step": 21110
},
{
"epoch": 0.86,
"learning_rate": 1.0028125130583409e-07,
"loss": 0.6446,
"step": 21115
},
{
"epoch": 0.86,
"learning_rate": 1.0000012944732284e-07,
"loss": 0.6519,
"step": 21120
},
{
"epoch": 0.86,
"learning_rate": 9.971938144093129e-08,
"loss": 0.6359,
"step": 21125
},
{
"epoch": 0.86,
"learning_rate": 9.943900740327937e-08,
"loss": 0.6572,
"step": 21130
},
{
"epoch": 0.86,
"learning_rate": 9.915900745083194e-08,
"loss": 0.6668,
"step": 21135
},
{
"epoch": 0.86,
"learning_rate": 9.887938169989896e-08,
"loss": 0.6255,
"step": 21140
},
{
"epoch": 0.86,
"learning_rate": 9.860013026663428e-08,
"loss": 0.6186,
"step": 21145
},
{
"epoch": 0.86,
"learning_rate": 9.832125326703644e-08,
"loss": 0.6505,
"step": 21150
},
{
"epoch": 0.86,
"learning_rate": 9.804275081694846e-08,
"loss": 0.6368,
"step": 21155
},
{
"epoch": 0.86,
"learning_rate": 9.776462303205824e-08,
"loss": 0.6271,
"step": 21160
},
{
"epoch": 0.86,
"learning_rate": 9.748687002789734e-08,
"loss": 0.6784,
"step": 21165
},
{
"epoch": 0.86,
"learning_rate": 9.720949191984185e-08,
"loss": 0.6309,
"step": 21170
},
{
"epoch": 0.86,
"learning_rate": 9.693248882311256e-08,
"loss": 0.6185,
"step": 21175
},
{
"epoch": 0.86,
"learning_rate": 9.665586085277388e-08,
"loss": 0.6288,
"step": 21180
},
{
"epoch": 0.86,
"learning_rate": 9.637960812373457e-08,
"loss": 0.7016,
"step": 21185
},
{
"epoch": 0.86,
"learning_rate": 9.610373075074806e-08,
"loss": 0.6614,
"step": 21190
},
{
"epoch": 0.86,
"learning_rate": 9.582822884841101e-08,
"loss": 0.6505,
"step": 21195
},
{
"epoch": 0.86,
"learning_rate": 9.555310253116467e-08,
"loss": 0.6784,
"step": 21200
},
{
"epoch": 0.86,
"learning_rate": 9.527835191329392e-08,
"loss": 0.6565,
"step": 21205
},
{
"epoch": 0.86,
"learning_rate": 9.500397710892816e-08,
"loss": 0.6305,
"step": 21210
},
{
"epoch": 0.86,
"learning_rate": 9.472997823203999e-08,
"loss": 0.6524,
"step": 21215
},
{
"epoch": 0.86,
"learning_rate": 9.445635539644615e-08,
"loss": 0.6717,
"step": 21220
},
{
"epoch": 0.86,
"learning_rate": 9.418310871580737e-08,
"loss": 0.6429,
"step": 21225
},
{
"epoch": 0.86,
"learning_rate": 9.391023830362799e-08,
"loss": 0.6434,
"step": 21230
},
{
"epoch": 0.86,
"learning_rate": 9.363774427325577e-08,
"loss": 0.6648,
"step": 21235
},
{
"epoch": 0.86,
"learning_rate": 9.336562673788228e-08,
"loss": 0.636,
"step": 21240
},
{
"epoch": 0.86,
"learning_rate": 9.309388581054322e-08,
"loss": 0.6771,
"step": 21245
},
{
"epoch": 0.86,
"learning_rate": 9.282252160411719e-08,
"loss": 0.6502,
"step": 21250
},
{
"epoch": 0.86,
"learning_rate": 9.255153423132622e-08,
"loss": 0.6437,
"step": 21255
},
{
"epoch": 0.86,
"learning_rate": 9.22809238047365e-08,
"loss": 0.6704,
"step": 21260
},
{
"epoch": 0.86,
"learning_rate": 9.201069043675724e-08,
"loss": 0.6404,
"step": 21265
},
{
"epoch": 0.86,
"learning_rate": 9.174083423964062e-08,
"loss": 0.6834,
"step": 21270
},
{
"epoch": 0.86,
"learning_rate": 9.147135532548311e-08,
"loss": 0.6516,
"step": 21275
},
{
"epoch": 0.86,
"learning_rate": 9.120225380622371e-08,
"loss": 0.671,
"step": 21280
},
{
"epoch": 0.86,
"learning_rate": 9.093352979364466e-08,
"loss": 0.6583,
"step": 21285
},
{
"epoch": 0.86,
"learning_rate": 9.066518339937157e-08,
"loss": 0.6467,
"step": 21290
},
{
"epoch": 0.86,
"learning_rate": 9.03972147348735e-08,
"loss": 0.5999,
"step": 21295
},
{
"epoch": 0.87,
"learning_rate": 9.012962391146217e-08,
"loss": 0.6589,
"step": 21300
},
{
"epoch": 0.87,
"learning_rate": 8.986241104029224e-08,
"loss": 0.647,
"step": 21305
},
{
"epoch": 0.87,
"learning_rate": 8.959557623236202e-08,
"loss": 0.6199,
"step": 21310
},
{
"epoch": 0.87,
"learning_rate": 8.93291195985122e-08,
"loss": 0.6762,
"step": 21315
},
{
"epoch": 0.87,
"learning_rate": 8.906304124942632e-08,
"loss": 0.6446,
"step": 21320
},
{
"epoch": 0.87,
"learning_rate": 8.879734129563132e-08,
"loss": 0.6504,
"step": 21325
},
{
"epoch": 0.87,
"learning_rate": 8.853201984749658e-08,
"loss": 0.6898,
"step": 21330
},
{
"epoch": 0.87,
"learning_rate": 8.826707701523428e-08,
"loss": 0.6575,
"step": 21335
},
{
"epoch": 0.87,
"learning_rate": 8.800251290889927e-08,
"loss": 0.6208,
"step": 21340
},
{
"epoch": 0.87,
"learning_rate": 8.773832763838939e-08,
"loss": 0.6662,
"step": 21345
},
{
"epoch": 0.87,
"learning_rate": 8.74745213134448e-08,
"loss": 0.6218,
"step": 21350
},
{
"epoch": 0.87,
"learning_rate": 8.721109404364812e-08,
"loss": 0.6747,
"step": 21355
},
{
"epoch": 0.87,
"learning_rate": 8.694804593842519e-08,
"loss": 0.693,
"step": 21360
},
{
"epoch": 0.87,
"learning_rate": 8.668537710704371e-08,
"loss": 0.6482,
"step": 21365
},
{
"epoch": 0.87,
"learning_rate": 8.642308765861406e-08,
"loss": 0.6946,
"step": 21370
},
{
"epoch": 0.87,
"learning_rate": 8.616117770208864e-08,
"loss": 0.655,
"step": 21375
},
{
"epoch": 0.87,
"learning_rate": 8.58996473462631e-08,
"loss": 0.6549,
"step": 21380
},
{
"epoch": 0.87,
"learning_rate": 8.563849669977463e-08,
"loss": 0.6444,
"step": 21385
},
{
"epoch": 0.87,
"learning_rate": 8.537772587110281e-08,
"loss": 0.646,
"step": 21390
},
{
"epoch": 0.87,
"learning_rate": 8.511733496856999e-08,
"loss": 0.6792,
"step": 21395
},
{
"epoch": 0.87,
"learning_rate": 8.485732410033985e-08,
"loss": 0.6037,
"step": 21400
},
{
"epoch": 0.87,
"learning_rate": 8.459769337441868e-08,
"loss": 0.6055,
"step": 21405
},
{
"epoch": 0.87,
"learning_rate": 8.433844289865521e-08,
"loss": 0.6427,
"step": 21410
},
{
"epoch": 0.87,
"learning_rate": 8.407957278073952e-08,
"loss": 0.6628,
"step": 21415
},
{
"epoch": 0.87,
"learning_rate": 8.382108312820401e-08,
"loss": 0.6569,
"step": 21420
},
{
"epoch": 0.87,
"learning_rate": 8.356297404842305e-08,
"loss": 0.659,
"step": 21425
},
{
"epoch": 0.87,
"learning_rate": 8.330524564861297e-08,
"loss": 0.6279,
"step": 21430
},
{
"epoch": 0.87,
"learning_rate": 8.304789803583201e-08,
"loss": 0.6281,
"step": 21435
},
{
"epoch": 0.87,
"learning_rate": 8.279093131697968e-08,
"loss": 0.6327,
"step": 21440
},
{
"epoch": 0.87,
"learning_rate": 8.253434559879835e-08,
"loss": 0.6402,
"step": 21445
},
{
"epoch": 0.87,
"learning_rate": 8.227814098787111e-08,
"loss": 0.6601,
"step": 21450
},
{
"epoch": 0.87,
"learning_rate": 8.202231759062305e-08,
"loss": 0.6355,
"step": 21455
},
{
"epoch": 0.87,
"learning_rate": 8.17668755133214e-08,
"loss": 0.663,
"step": 21460
},
{
"epoch": 0.87,
"learning_rate": 8.151181486207414e-08,
"loss": 0.6715,
"step": 21465
},
{
"epoch": 0.87,
"learning_rate": 8.125713574283155e-08,
"loss": 0.6456,
"step": 21470
},
{
"epoch": 0.87,
"learning_rate": 8.100283826138477e-08,
"loss": 0.6243,
"step": 21475
},
{
"epoch": 0.87,
"learning_rate": 8.074892252336718e-08,
"loss": 0.6273,
"step": 21480
},
{
"epoch": 0.87,
"learning_rate": 8.049538863425298e-08,
"loss": 0.6379,
"step": 21485
},
{
"epoch": 0.87,
"learning_rate": 8.024223669935782e-08,
"loss": 0.6303,
"step": 21490
},
{
"epoch": 0.87,
"learning_rate": 7.9989466823839e-08,
"loss": 0.6826,
"step": 21495
},
{
"epoch": 0.87,
"learning_rate": 7.973707911269489e-08,
"loss": 0.6236,
"step": 21500
},
{
"epoch": 0.87,
"learning_rate": 7.948507367076518e-08,
"loss": 0.6341,
"step": 21505
},
{
"epoch": 0.87,
"learning_rate": 7.923345060273046e-08,
"loss": 0.6677,
"step": 21510
},
{
"epoch": 0.87,
"learning_rate": 7.898221001311312e-08,
"loss": 0.6299,
"step": 21515
},
{
"epoch": 0.87,
"learning_rate": 7.873135200627623e-08,
"loss": 0.6272,
"step": 21520
},
{
"epoch": 0.87,
"learning_rate": 7.848087668642377e-08,
"loss": 0.6455,
"step": 21525
},
{
"epoch": 0.87,
"learning_rate": 7.823078415760143e-08,
"loss": 0.6406,
"step": 21530
},
{
"epoch": 0.87,
"learning_rate": 7.798107452369517e-08,
"loss": 0.7099,
"step": 21535
},
{
"epoch": 0.87,
"learning_rate": 7.773174788843218e-08,
"loss": 0.6831,
"step": 21540
},
{
"epoch": 0.88,
"learning_rate": 7.74828043553808e-08,
"loss": 0.6205,
"step": 21545
},
{
"epoch": 0.88,
"learning_rate": 7.723424402794998e-08,
"loss": 0.649,
"step": 21550
},
{
"epoch": 0.88,
"learning_rate": 7.698606700938936e-08,
"loss": 0.6636,
"step": 21555
},
{
"epoch": 0.88,
"learning_rate": 7.673827340278937e-08,
"loss": 0.6314,
"step": 21560
},
{
"epoch": 0.88,
"learning_rate": 7.649086331108178e-08,
"loss": 0.6969,
"step": 21565
},
{
"epoch": 0.88,
"learning_rate": 7.624383683703839e-08,
"loss": 0.6516,
"step": 21570
},
{
"epoch": 0.88,
"learning_rate": 7.599719408327155e-08,
"loss": 0.7174,
"step": 21575
},
{
"epoch": 0.88,
"learning_rate": 7.575093515223496e-08,
"loss": 0.6436,
"step": 21580
},
{
"epoch": 0.88,
"learning_rate": 7.550506014622215e-08,
"loss": 0.6571,
"step": 21585
},
{
"epoch": 0.88,
"learning_rate": 7.525956916736753e-08,
"loss": 0.6919,
"step": 21590
},
{
"epoch": 0.88,
"learning_rate": 7.501446231764607e-08,
"loss": 0.6461,
"step": 21595
},
{
"epoch": 0.88,
"learning_rate": 7.47697396988729e-08,
"loss": 0.6169,
"step": 21600
},
{
"epoch": 0.88,
"eval_loss": 0.6164625883102417,
"eval_runtime": 140.1585,
"eval_samples_per_second": 16.881,
"eval_steps_per_second": 2.818,
"step": 21600
},
{
"epoch": 0.88,
"learning_rate": 7.452540141270358e-08,
"loss": 0.643,
"step": 21605
},
{
"epoch": 0.88,
"learning_rate": 7.428144756063415e-08,
"loss": 0.6571,
"step": 21610
},
{
"epoch": 0.88,
"learning_rate": 7.403787824400098e-08,
"loss": 0.6473,
"step": 21615
},
{
"epoch": 0.88,
"learning_rate": 7.379469356398072e-08,
"loss": 0.6089,
"step": 21620
},
{
"epoch": 0.88,
"learning_rate": 7.355189362158997e-08,
"loss": 0.6356,
"step": 21625
},
{
"epoch": 0.88,
"learning_rate": 7.330947851768588e-08,
"loss": 0.6338,
"step": 21630
},
{
"epoch": 0.88,
"learning_rate": 7.306744835296563e-08,
"loss": 0.6296,
"step": 21635
},
{
"epoch": 0.88,
"learning_rate": 7.282580322796606e-08,
"loss": 0.6403,
"step": 21640
},
{
"epoch": 0.88,
"learning_rate": 7.258454324306495e-08,
"loss": 0.649,
"step": 21645
},
{
"epoch": 0.88,
"learning_rate": 7.23436684984794e-08,
"loss": 0.6433,
"step": 21650
},
{
"epoch": 0.88,
"learning_rate": 7.210317909426656e-08,
"loss": 0.6741,
"step": 21655
},
{
"epoch": 0.88,
"learning_rate": 7.186307513032364e-08,
"loss": 0.6607,
"step": 21660
},
{
"epoch": 0.88,
"learning_rate": 7.162335670638797e-08,
"loss": 0.6845,
"step": 21665
},
{
"epoch": 0.88,
"learning_rate": 7.138402392203646e-08,
"loss": 0.5908,
"step": 21670
},
{
"epoch": 0.88,
"learning_rate": 7.114507687668559e-08,
"loss": 0.6756,
"step": 21675
},
{
"epoch": 0.88,
"learning_rate": 7.090651566959216e-08,
"loss": 0.6435,
"step": 21680
},
{
"epoch": 0.88,
"learning_rate": 7.066834039985237e-08,
"loss": 0.6275,
"step": 21685
},
{
"epoch": 0.88,
"learning_rate": 7.043055116640206e-08,
"loss": 0.6286,
"step": 21690
},
{
"epoch": 0.88,
"learning_rate": 7.019314806801679e-08,
"loss": 0.6561,
"step": 21695
},
{
"epoch": 0.88,
"learning_rate": 6.99561312033119e-08,
"loss": 0.6653,
"step": 21700
},
{
"epoch": 0.88,
"learning_rate": 6.971950067074206e-08,
"loss": 0.6333,
"step": 21705
},
{
"epoch": 0.88,
"learning_rate": 6.948325656860143e-08,
"loss": 0.6574,
"step": 21710
},
{
"epoch": 0.88,
"learning_rate": 6.924739899502396e-08,
"loss": 0.6581,
"step": 21715
},
{
"epoch": 0.88,
"learning_rate": 6.901192804798272e-08,
"loss": 0.6574,
"step": 21720
},
{
"epoch": 0.88,
"learning_rate": 6.877684382529025e-08,
"loss": 0.6292,
"step": 21725
},
{
"epoch": 0.88,
"learning_rate": 6.854214642459855e-08,
"loss": 0.6288,
"step": 21730
},
{
"epoch": 0.88,
"learning_rate": 6.830783594339895e-08,
"loss": 0.6242,
"step": 21735
},
{
"epoch": 0.88,
"learning_rate": 6.807391247902195e-08,
"loss": 0.6551,
"step": 21740
},
{
"epoch": 0.88,
"learning_rate": 6.784037612863702e-08,
"loss": 0.6485,
"step": 21745
},
{
"epoch": 0.88,
"learning_rate": 6.760722698925358e-08,
"loss": 0.6398,
"step": 21750
},
{
"epoch": 0.88,
"learning_rate": 6.737446515771961e-08,
"loss": 0.6063,
"step": 21755
},
{
"epoch": 0.88,
"learning_rate": 6.714209073072218e-08,
"loss": 0.6095,
"step": 21760
},
{
"epoch": 0.88,
"learning_rate": 6.691010380478779e-08,
"loss": 0.6306,
"step": 21765
},
{
"epoch": 0.88,
"learning_rate": 6.667850447628175e-08,
"loss": 0.5991,
"step": 21770
},
{
"epoch": 0.88,
"learning_rate": 6.644729284140826e-08,
"loss": 0.6475,
"step": 21775
},
{
"epoch": 0.88,
"learning_rate": 6.621646899621091e-08,
"loss": 0.6737,
"step": 21780
},
{
"epoch": 0.88,
"learning_rate": 6.598603303657179e-08,
"loss": 0.6395,
"step": 21785
},
{
"epoch": 0.89,
"learning_rate": 6.5755985058212e-08,
"loss": 0.6428,
"step": 21790
},
{
"epoch": 0.89,
"learning_rate": 6.552632515669121e-08,
"loss": 0.6312,
"step": 21795
},
{
"epoch": 0.89,
"learning_rate": 6.529705342740843e-08,
"loss": 0.6315,
"step": 21800
},
{
"epoch": 0.89,
"learning_rate": 6.506816996560127e-08,
"loss": 0.6268,
"step": 21805
},
{
"epoch": 0.89,
"learning_rate": 6.483967486634546e-08,
"loss": 0.664,
"step": 21810
},
{
"epoch": 0.89,
"learning_rate": 6.461156822455638e-08,
"loss": 0.6397,
"step": 21815
},
{
"epoch": 0.89,
"learning_rate": 6.438385013498726e-08,
"loss": 0.6273,
"step": 21820
},
{
"epoch": 0.89,
"learning_rate": 6.415652069223032e-08,
"loss": 0.6245,
"step": 21825
},
{
"epoch": 0.89,
"learning_rate": 6.392957999071602e-08,
"loss": 0.6921,
"step": 21830
},
{
"epoch": 0.89,
"learning_rate": 6.370302812471384e-08,
"loss": 0.6685,
"step": 21835
},
{
"epoch": 0.89,
"learning_rate": 6.34768651883314e-08,
"loss": 0.6563,
"step": 21840
},
{
"epoch": 0.89,
"learning_rate": 6.325109127551465e-08,
"loss": 0.6676,
"step": 21845
},
{
"epoch": 0.89,
"learning_rate": 6.302570648004834e-08,
"loss": 0.682,
"step": 21850
},
{
"epoch": 0.89,
"learning_rate": 6.280071089555516e-08,
"loss": 0.63,
"step": 21855
},
{
"epoch": 0.89,
"learning_rate": 6.257610461549634e-08,
"loss": 0.6781,
"step": 21860
},
{
"epoch": 0.89,
"learning_rate": 6.235188773317146e-08,
"loss": 0.6647,
"step": 21865
},
{
"epoch": 0.89,
"learning_rate": 6.212806034171836e-08,
"loss": 0.6611,
"step": 21870
},
{
"epoch": 0.89,
"learning_rate": 6.190462253411277e-08,
"loss": 0.658,
"step": 21875
},
{
"epoch": 0.89,
"learning_rate": 6.16815744031688e-08,
"loss": 0.6362,
"step": 21880
},
{
"epoch": 0.89,
"learning_rate": 6.145891604153886e-08,
"loss": 0.64,
"step": 21885
},
{
"epoch": 0.89,
"learning_rate": 6.123664754171331e-08,
"loss": 0.6428,
"step": 21890
},
{
"epoch": 0.89,
"learning_rate": 6.101476899602043e-08,
"loss": 0.6626,
"step": 21895
},
{
"epoch": 0.89,
"learning_rate": 6.079328049662668e-08,
"loss": 0.6502,
"step": 21900
},
{
"epoch": 0.89,
"learning_rate": 6.057218213553661e-08,
"loss": 0.6694,
"step": 21905
},
{
"epoch": 0.89,
"learning_rate": 6.035147400459217e-08,
"loss": 0.642,
"step": 21910
},
{
"epoch": 0.89,
"learning_rate": 6.013115619547404e-08,
"loss": 0.6864,
"step": 21915
},
{
"epoch": 0.89,
"learning_rate": 5.991122879970012e-08,
"loss": 0.636,
"step": 21920
},
{
"epoch": 0.89,
"learning_rate": 5.969169190862644e-08,
"loss": 0.6338,
"step": 21925
},
{
"epoch": 0.89,
"learning_rate": 5.947254561344628e-08,
"loss": 0.6647,
"step": 21930
},
{
"epoch": 0.89,
"learning_rate": 5.9253790005191705e-08,
"loss": 0.65,
"step": 21935
},
{
"epoch": 0.89,
"learning_rate": 5.90354251747317e-08,
"loss": 0.6274,
"step": 21940
},
{
"epoch": 0.89,
"learning_rate": 5.8817451212772815e-08,
"loss": 0.6559,
"step": 21945
},
{
"epoch": 0.89,
"learning_rate": 5.859986820985985e-08,
"loss": 0.6318,
"step": 21950
},
{
"epoch": 0.89,
"learning_rate": 5.838267625637494e-08,
"loss": 0.6755,
"step": 21955
},
{
"epoch": 0.89,
"learning_rate": 5.8165875442537594e-08,
"loss": 0.6342,
"step": 21960
},
{
"epoch": 0.89,
"learning_rate": 5.7949465858404766e-08,
"loss": 0.6707,
"step": 21965
},
{
"epoch": 0.89,
"learning_rate": 5.773344759387155e-08,
"loss": 0.6339,
"step": 21970
},
{
"epoch": 0.89,
"learning_rate": 5.751782073866984e-08,
"loss": 0.6389,
"step": 21975
},
{
"epoch": 0.89,
"learning_rate": 5.730258538236909e-08,
"loss": 0.6287,
"step": 21980
},
{
"epoch": 0.89,
"learning_rate": 5.708774161437635e-08,
"loss": 0.6844,
"step": 21985
},
{
"epoch": 0.89,
"learning_rate": 5.6873289523935775e-08,
"loss": 0.6497,
"step": 21990
},
{
"epoch": 0.89,
"learning_rate": 5.665922920012878e-08,
"loss": 0.7022,
"step": 21995
},
{
"epoch": 0.89,
"learning_rate": 5.644556073187445e-08,
"loss": 0.6279,
"step": 22000
},
{
"epoch": 0.89,
"learning_rate": 5.6232284207928584e-08,
"loss": 0.6533,
"step": 22005
},
{
"epoch": 0.89,
"learning_rate": 5.601939971688452e-08,
"loss": 0.635,
"step": 22010
},
{
"epoch": 0.89,
"learning_rate": 5.580690734717241e-08,
"loss": 0.6474,
"step": 22015
},
{
"epoch": 0.89,
"learning_rate": 5.559480718706e-08,
"loss": 0.6732,
"step": 22020
},
{
"epoch": 0.89,
"learning_rate": 5.5383099324651684e-08,
"loss": 0.6351,
"step": 22025
},
{
"epoch": 0.89,
"learning_rate": 5.5171783847889006e-08,
"loss": 0.6832,
"step": 22030
},
{
"epoch": 0.9,
"learning_rate": 5.496086084455087e-08,
"loss": 0.6403,
"step": 22035
},
{
"epoch": 0.9,
"learning_rate": 5.475033040225274e-08,
"loss": 0.6231,
"step": 22040
},
{
"epoch": 0.9,
"learning_rate": 5.454019260844678e-08,
"loss": 0.6585,
"step": 22045
},
{
"epoch": 0.9,
"learning_rate": 5.433044755042293e-08,
"loss": 0.6478,
"step": 22050
},
{
"epoch": 0.9,
"learning_rate": 5.4121095315307173e-08,
"loss": 0.627,
"step": 22055
},
{
"epoch": 0.9,
"learning_rate": 5.3912135990062726e-08,
"loss": 0.6431,
"step": 22060
},
{
"epoch": 0.9,
"learning_rate": 5.370356966148914e-08,
"loss": 0.6224,
"step": 22065
},
{
"epoch": 0.9,
"learning_rate": 5.3495396416223584e-08,
"loss": 0.6851,
"step": 22070
},
{
"epoch": 0.9,
"learning_rate": 5.3287616340739084e-08,
"loss": 0.6471,
"step": 22075
},
{
"epoch": 0.9,
"learning_rate": 5.308022952134561e-08,
"loss": 0.6231,
"step": 22080
},
{
"epoch": 0.9,
"learning_rate": 5.287323604419014e-08,
"loss": 0.6358,
"step": 22085
},
{
"epoch": 0.9,
"learning_rate": 5.266663599525578e-08,
"loss": 0.6723,
"step": 22090
},
{
"epoch": 0.9,
"learning_rate": 5.246042946036244e-08,
"loss": 0.674,
"step": 22095
},
{
"epoch": 0.9,
"learning_rate": 5.225461652516639e-08,
"loss": 0.6099,
"step": 22100
},
{
"epoch": 0.9,
"learning_rate": 5.204919727516066e-08,
"loss": 0.6175,
"step": 22105
},
{
"epoch": 0.9,
"learning_rate": 5.184417179567468e-08,
"loss": 0.6383,
"step": 22110
},
{
"epoch": 0.9,
"learning_rate": 5.163954017187399e-08,
"loss": 0.6796,
"step": 22115
},
{
"epoch": 0.9,
"learning_rate": 5.143530248876116e-08,
"loss": 0.6428,
"step": 22120
},
{
"epoch": 0.9,
"learning_rate": 5.123145883117452e-08,
"loss": 0.7068,
"step": 22125
},
{
"epoch": 0.9,
"learning_rate": 5.102800928378881e-08,
"loss": 0.6485,
"step": 22130
},
{
"epoch": 0.9,
"learning_rate": 5.082495393111563e-08,
"loss": 0.6488,
"step": 22135
},
{
"epoch": 0.9,
"learning_rate": 5.062229285750208e-08,
"loss": 0.639,
"step": 22140
},
{
"epoch": 0.9,
"learning_rate": 5.0420026147131925e-08,
"loss": 0.6629,
"step": 22145
},
{
"epoch": 0.9,
"learning_rate": 5.021815388402473e-08,
"loss": 0.6624,
"step": 22150
},
{
"epoch": 0.9,
"learning_rate": 5.0016676152036974e-08,
"loss": 0.6282,
"step": 22155
},
{
"epoch": 0.9,
"learning_rate": 4.981559303486038e-08,
"loss": 0.6319,
"step": 22160
},
{
"epoch": 0.9,
"learning_rate": 4.9614904616023134e-08,
"loss": 0.6655,
"step": 22165
},
{
"epoch": 0.9,
"learning_rate": 4.941461097888966e-08,
"loss": 0.6915,
"step": 22170
},
{
"epoch": 0.9,
"learning_rate": 4.921471220666018e-08,
"loss": 0.5924,
"step": 22175
},
{
"epoch": 0.9,
"learning_rate": 4.901520838237061e-08,
"loss": 0.6699,
"step": 22180
},
{
"epoch": 0.9,
"learning_rate": 4.8816099588893436e-08,
"loss": 0.635,
"step": 22185
},
{
"epoch": 0.9,
"learning_rate": 4.86173859089366e-08,
"loss": 0.6572,
"step": 22190
},
{
"epoch": 0.9,
"learning_rate": 4.8419067425044094e-08,
"loss": 0.6296,
"step": 22195
},
{
"epoch": 0.9,
"learning_rate": 4.822114421959545e-08,
"loss": 0.6598,
"step": 22200
},
{
"epoch": 0.9,
"learning_rate": 4.8023616374806564e-08,
"loss": 0.6451,
"step": 22205
},
{
"epoch": 0.9,
"learning_rate": 4.782648397272859e-08,
"loss": 0.6616,
"step": 22210
},
{
"epoch": 0.9,
"learning_rate": 4.762974709524858e-08,
"loss": 0.6583,
"step": 22215
},
{
"epoch": 0.9,
"learning_rate": 4.743340582408961e-08,
"loss": 0.66,
"step": 22220
},
{
"epoch": 0.9,
"learning_rate": 4.723746024080988e-08,
"loss": 0.6213,
"step": 22225
},
{
"epoch": 0.9,
"learning_rate": 4.70419104268035e-08,
"loss": 0.6316,
"step": 22230
},
{
"epoch": 0.9,
"learning_rate": 4.6846756463300054e-08,
"loss": 0.6505,
"step": 22235
},
{
"epoch": 0.9,
"learning_rate": 4.665199843136513e-08,
"loss": 0.6645,
"step": 22240
},
{
"epoch": 0.9,
"learning_rate": 4.645763641189937e-08,
"loss": 0.6523,
"step": 22245
},
{
"epoch": 0.9,
"learning_rate": 4.626367048563884e-08,
"loss": 0.6516,
"step": 22250
},
{
"epoch": 0.9,
"learning_rate": 4.607010073315565e-08,
"loss": 0.638,
"step": 22255
},
{
"epoch": 0.9,
"learning_rate": 4.587692723485681e-08,
"loss": 0.5924,
"step": 22260
},
{
"epoch": 0.9,
"learning_rate": 4.5684150070984804e-08,
"loss": 0.6316,
"step": 22265
},
{
"epoch": 0.9,
"learning_rate": 4.549176932161791e-08,
"loss": 0.6585,
"step": 22270
},
{
"epoch": 0.9,
"learning_rate": 4.5299785066669205e-08,
"loss": 0.6547,
"step": 22275
},
{
"epoch": 0.9,
"learning_rate": 4.5108197385887335e-08,
"loss": 0.6432,
"step": 22280
},
{
"epoch": 0.91,
"learning_rate": 4.491700635885598e-08,
"loss": 0.6075,
"step": 22285
},
{
"epoch": 0.91,
"learning_rate": 4.4726212064994493e-08,
"loss": 0.5892,
"step": 22290
},
{
"epoch": 0.91,
"learning_rate": 4.453581458355704e-08,
"loss": 0.6202,
"step": 22295
},
{
"epoch": 0.91,
"learning_rate": 4.4345813993632905e-08,
"loss": 0.6405,
"step": 22300
},
{
"epoch": 0.91,
"learning_rate": 4.4156210374147075e-08,
"loss": 0.6393,
"step": 22305
},
{
"epoch": 0.91,
"learning_rate": 4.396700380385898e-08,
"loss": 0.6548,
"step": 22310
},
{
"epoch": 0.91,
"learning_rate": 4.377819436136332e-08,
"loss": 0.6813,
"step": 22315
},
{
"epoch": 0.91,
"learning_rate": 4.358978212509012e-08,
"loss": 0.6689,
"step": 22320
},
{
"epoch": 0.91,
"learning_rate": 4.340176717330413e-08,
"loss": 0.6631,
"step": 22325
},
{
"epoch": 0.91,
"learning_rate": 4.3214149584105076e-08,
"loss": 0.6586,
"step": 22330
},
{
"epoch": 0.91,
"learning_rate": 4.3026929435427516e-08,
"loss": 0.6643,
"step": 22335
},
{
"epoch": 0.91,
"learning_rate": 4.2840106805041354e-08,
"loss": 0.654,
"step": 22340
},
{
"epoch": 0.91,
"learning_rate": 4.2653681770550955e-08,
"loss": 0.6688,
"step": 22345
},
{
"epoch": 0.91,
"learning_rate": 4.2467654409395484e-08,
"loss": 0.6761,
"step": 22350
},
{
"epoch": 0.91,
"learning_rate": 4.228202479884946e-08,
"loss": 0.6667,
"step": 22355
},
{
"epoch": 0.91,
"learning_rate": 4.209679301602165e-08,
"loss": 0.6749,
"step": 22360
},
{
"epoch": 0.91,
"learning_rate": 4.191195913785561e-08,
"loss": 0.6396,
"step": 22365
},
{
"epoch": 0.91,
"learning_rate": 4.1727523241129606e-08,
"loss": 0.6696,
"step": 22370
},
{
"epoch": 0.91,
"learning_rate": 4.154348540245711e-08,
"loss": 0.6364,
"step": 22375
},
{
"epoch": 0.91,
"learning_rate": 4.135984569828566e-08,
"loss": 0.6495,
"step": 22380
},
{
"epoch": 0.91,
"learning_rate": 4.1176604204897434e-08,
"loss": 0.6496,
"step": 22385
},
{
"epoch": 0.91,
"learning_rate": 4.099376099840968e-08,
"loss": 0.6268,
"step": 22390
},
{
"epoch": 0.91,
"learning_rate": 4.0811316154773515e-08,
"loss": 0.6527,
"step": 22395
},
{
"epoch": 0.91,
"learning_rate": 4.06292697497751e-08,
"loss": 0.651,
"step": 22400
},
{
"epoch": 0.91,
"eval_loss": 0.6161190867424011,
"eval_runtime": 139.4449,
"eval_samples_per_second": 16.967,
"eval_steps_per_second": 2.833,
"step": 22400
},
{
"epoch": 0.91,
"learning_rate": 4.044762185903494e-08,
"loss": 0.6551,
"step": 22405
},
{
"epoch": 0.91,
"learning_rate": 4.026637255800813e-08,
"loss": 0.6677,
"step": 22410
},
{
"epoch": 0.91,
"learning_rate": 4.008552192198378e-08,
"loss": 0.6738,
"step": 22415
},
{
"epoch": 0.91,
"learning_rate": 3.9905070026085784e-08,
"loss": 0.6642,
"step": 22420
},
{
"epoch": 0.91,
"learning_rate": 3.9725016945272416e-08,
"loss": 0.6399,
"step": 22425
},
{
"epoch": 0.91,
"learning_rate": 3.9545362754335955e-08,
"loss": 0.6706,
"step": 22430
},
{
"epoch": 0.91,
"learning_rate": 3.936610752790326e-08,
"loss": 0.686,
"step": 22435
},
{
"epoch": 0.91,
"learning_rate": 3.9187251340435653e-08,
"loss": 0.5905,
"step": 22440
},
{
"epoch": 0.91,
"learning_rate": 3.900879426622794e-08,
"loss": 0.5991,
"step": 22445
},
{
"epoch": 0.91,
"learning_rate": 3.8830736379409814e-08,
"loss": 0.6265,
"step": 22450
},
{
"epoch": 0.91,
"learning_rate": 3.865307775394533e-08,
"loss": 0.6525,
"step": 22455
},
{
"epoch": 0.91,
"learning_rate": 3.84758184636319e-08,
"loss": 0.6228,
"step": 22460
},
{
"epoch": 0.91,
"learning_rate": 3.829895858210186e-08,
"loss": 0.6736,
"step": 22465
},
{
"epoch": 0.91,
"learning_rate": 3.812249818282076e-08,
"loss": 0.6303,
"step": 22470
},
{
"epoch": 0.91,
"learning_rate": 3.79464373390892e-08,
"loss": 0.6367,
"step": 22475
},
{
"epoch": 0.91,
"learning_rate": 3.777077612404123e-08,
"loss": 0.6334,
"step": 22480
},
{
"epoch": 0.91,
"learning_rate": 3.75955146106447e-08,
"loss": 0.6403,
"step": 22485
},
{
"epoch": 0.91,
"learning_rate": 3.742065287170215e-08,
"loss": 0.6089,
"step": 22490
},
{
"epoch": 0.91,
"learning_rate": 3.724619097984916e-08,
"loss": 0.6471,
"step": 22495
},
{
"epoch": 0.91,
"learning_rate": 3.707212900755608e-08,
"loss": 0.6479,
"step": 22500
},
{
"epoch": 0.91,
"learning_rate": 3.689846702712651e-08,
"loss": 0.6429,
"step": 22505
},
{
"epoch": 0.91,
"learning_rate": 3.672520511069821e-08,
"loss": 0.6175,
"step": 22510
},
{
"epoch": 0.91,
"learning_rate": 3.655234333024271e-08,
"loss": 0.6745,
"step": 22515
},
{
"epoch": 0.91,
"learning_rate": 3.637988175756512e-08,
"loss": 0.6604,
"step": 22520
},
{
"epoch": 0.91,
"learning_rate": 3.6207820464304814e-08,
"loss": 0.6285,
"step": 22525
},
{
"epoch": 0.92,
"learning_rate": 3.603615952193417e-08,
"loss": 0.6314,
"step": 22530
},
{
"epoch": 0.92,
"learning_rate": 3.5864899001759706e-08,
"loss": 0.6703,
"step": 22535
},
{
"epoch": 0.92,
"learning_rate": 3.569403897492185e-08,
"loss": 0.6586,
"step": 22540
},
{
"epoch": 0.92,
"learning_rate": 3.552357951239427e-08,
"loss": 0.6588,
"step": 22545
},
{
"epoch": 0.92,
"learning_rate": 3.5353520684984096e-08,
"loss": 0.6623,
"step": 22550
},
{
"epoch": 0.92,
"learning_rate": 3.51838625633325e-08,
"loss": 0.6619,
"step": 22555
},
{
"epoch": 0.92,
"learning_rate": 3.501460521791399e-08,
"loss": 0.7056,
"step": 22560
},
{
"epoch": 0.92,
"learning_rate": 3.484574871903656e-08,
"loss": 0.6647,
"step": 22565
},
{
"epoch": 0.92,
"learning_rate": 3.467729313684153e-08,
"loss": 0.6277,
"step": 22570
},
{
"epoch": 0.92,
"learning_rate": 3.4509238541304384e-08,
"loss": 0.6705,
"step": 22575
},
{
"epoch": 0.92,
"learning_rate": 3.4341585002232945e-08,
"loss": 0.654,
"step": 22580
},
{
"epoch": 0.92,
"learning_rate": 3.4174332589269385e-08,
"loss": 0.6352,
"step": 22585
},
{
"epoch": 0.92,
"learning_rate": 3.4007481371888915e-08,
"loss": 0.6517,
"step": 22590
},
{
"epoch": 0.92,
"learning_rate": 3.384103141940009e-08,
"loss": 0.6898,
"step": 22595
},
{
"epoch": 0.92,
"learning_rate": 3.3674982800944604e-08,
"loss": 0.6067,
"step": 22600
},
{
"epoch": 0.92,
"learning_rate": 3.350933558549751e-08,
"loss": 0.6752,
"step": 22605
},
{
"epoch": 0.92,
"learning_rate": 3.334408984186765e-08,
"loss": 0.664,
"step": 22610
},
{
"epoch": 0.92,
"learning_rate": 3.317924563869634e-08,
"loss": 0.6458,
"step": 22615
},
{
"epoch": 0.92,
"learning_rate": 3.301480304445836e-08,
"loss": 0.7108,
"step": 22620
},
{
"epoch": 0.92,
"learning_rate": 3.2850762127462184e-08,
"loss": 0.6561,
"step": 22625
},
{
"epoch": 0.92,
"learning_rate": 3.268712295584841e-08,
"loss": 0.7372,
"step": 22630
},
{
"epoch": 0.92,
"learning_rate": 3.252388559759156e-08,
"loss": 0.6515,
"step": 22635
},
{
"epoch": 0.92,
"learning_rate": 3.2361050120499275e-08,
"loss": 0.6417,
"step": 22640
},
{
"epoch": 0.92,
"learning_rate": 3.219861659221168e-08,
"loss": 0.6537,
"step": 22645
},
{
"epoch": 0.92,
"learning_rate": 3.203658508020235e-08,
"loss": 0.6385,
"step": 22650
},
{
"epoch": 0.92,
"learning_rate": 3.1874955651777667e-08,
"loss": 0.6112,
"step": 22655
},
{
"epoch": 0.92,
"learning_rate": 3.171372837407738e-08,
"loss": 0.6437,
"step": 22660
},
{
"epoch": 0.92,
"learning_rate": 3.155290331407357e-08,
"loss": 0.6222,
"step": 22665
},
{
"epoch": 0.92,
"learning_rate": 3.1392480538571574e-08,
"loss": 0.6439,
"step": 22670
},
{
"epoch": 0.92,
"learning_rate": 3.123246011420999e-08,
"loss": 0.6528,
"step": 22675
},
{
"epoch": 0.92,
"learning_rate": 3.107284210745953e-08,
"loss": 0.6654,
"step": 22680
},
{
"epoch": 0.92,
"learning_rate": 3.0913626584624266e-08,
"loss": 0.6474,
"step": 22685
},
{
"epoch": 0.92,
"learning_rate": 3.0754813611840846e-08,
"loss": 0.6528,
"step": 22690
},
{
"epoch": 0.92,
"learning_rate": 3.0596403255078954e-08,
"loss": 0.6708,
"step": 22695
},
{
"epoch": 0.92,
"learning_rate": 3.043839558014083e-08,
"loss": 0.6365,
"step": 22700
},
{
"epoch": 0.92,
"learning_rate": 3.028079065266142e-08,
"loss": 0.6687,
"step": 22705
},
{
"epoch": 0.92,
"learning_rate": 3.012358853810859e-08,
"loss": 0.6645,
"step": 22710
},
{
"epoch": 0.92,
"learning_rate": 2.9966789301782535e-08,
"loss": 0.6366,
"step": 22715
},
{
"epoch": 0.92,
"learning_rate": 2.981039300881627e-08,
"loss": 0.6575,
"step": 22720
},
{
"epoch": 0.92,
"learning_rate": 2.9654399724175828e-08,
"loss": 0.6966,
"step": 22725
},
{
"epoch": 0.92,
"learning_rate": 2.949880951265904e-08,
"loss": 0.6842,
"step": 22730
},
{
"epoch": 0.92,
"learning_rate": 2.9343622438896875e-08,
"loss": 0.6586,
"step": 22735
},
{
"epoch": 0.92,
"learning_rate": 2.918883856735277e-08,
"loss": 0.6629,
"step": 22740
},
{
"epoch": 0.92,
"learning_rate": 2.9034457962322513e-08,
"loss": 0.6283,
"step": 22745
},
{
"epoch": 0.92,
"learning_rate": 2.8880480687934473e-08,
"loss": 0.6272,
"step": 22750
},
{
"epoch": 0.92,
"learning_rate": 2.8726906808149486e-08,
"loss": 0.616,
"step": 22755
},
{
"epoch": 0.92,
"learning_rate": 2.857373638676097e-08,
"loss": 0.6605,
"step": 22760
},
{
"epoch": 0.92,
"learning_rate": 2.8420969487394143e-08,
"loss": 0.6212,
"step": 22765
},
{
"epoch": 0.92,
"learning_rate": 2.826860617350746e-08,
"loss": 0.6844,
"step": 22770
},
{
"epoch": 0.93,
"learning_rate": 2.8116646508391183e-08,
"loss": 0.645,
"step": 22775
},
{
"epoch": 0.93,
"learning_rate": 2.7965090555168047e-08,
"loss": 0.6497,
"step": 22780
},
{
"epoch": 0.93,
"learning_rate": 2.7813938376793134e-08,
"loss": 0.6361,
"step": 22785
},
{
"epoch": 0.93,
"learning_rate": 2.7663190036053552e-08,
"loss": 0.5859,
"step": 22790
},
{
"epoch": 0.93,
"learning_rate": 2.75128455955691e-08,
"loss": 0.6026,
"step": 22795
},
{
"epoch": 0.93,
"learning_rate": 2.7362905117791268e-08,
"loss": 0.6694,
"step": 22800
}
],
"logging_steps": 5,
"max_steps": 24619,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 400,
"total_flos": 3187222391169024.0,
"trial_name": null,
"trial_params": null
}