44440 lines
1.1 MiB
44440 lines
1.1 MiB
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 9.996920161099265,
|
|
"eval_steps": 500,
|
|
"global_step": 31650,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0003158809128958383,
|
|
"grad_norm": 6.2127704289584536,
|
|
"learning_rate": 6.31911532385466e-07,
|
|
"loss": 8.5382,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0015794045644791914,
|
|
"grad_norm": 5.441632582092334,
|
|
"learning_rate": 3.1595576619273302e-06,
|
|
"loss": 8.5222,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.003158809128958383,
|
|
"grad_norm": 6.223853805333185,
|
|
"learning_rate": 6.3191153238546605e-06,
|
|
"loss": 8.5644,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.004738213693437574,
|
|
"grad_norm": 4.555097857317313,
|
|
"learning_rate": 9.478672985781992e-06,
|
|
"loss": 8.4694,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.006317618257916766,
|
|
"grad_norm": 3.7490552824171934,
|
|
"learning_rate": 1.2638230647709321e-05,
|
|
"loss": 8.384,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.007897022822395957,
|
|
"grad_norm": 1.017431068195726,
|
|
"learning_rate": 1.579778830963665e-05,
|
|
"loss": 8.2984,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.009476427386875147,
|
|
"grad_norm": 1.4010697485352104,
|
|
"learning_rate": 1.8957345971563984e-05,
|
|
"loss": 8.2458,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.01105583195135434,
|
|
"grad_norm": 1.4454078805201958,
|
|
"learning_rate": 2.2116903633491313e-05,
|
|
"loss": 8.1732,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.012635236515833531,
|
|
"grad_norm": 0.5841055333237101,
|
|
"learning_rate": 2.5276461295418642e-05,
|
|
"loss": 8.1082,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.014214641080312722,
|
|
"grad_norm": 0.8082771952036348,
|
|
"learning_rate": 2.843601895734597e-05,
|
|
"loss": 8.0705,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.015794045644791914,
|
|
"grad_norm": 0.48868551813287026,
|
|
"learning_rate": 3.15955766192733e-05,
|
|
"loss": 8.0232,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.017373450209271106,
|
|
"grad_norm": 0.5452976654604186,
|
|
"learning_rate": 3.4755134281200636e-05,
|
|
"loss": 7.9518,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.018952854773750295,
|
|
"grad_norm": 0.40190214186715384,
|
|
"learning_rate": 3.791469194312797e-05,
|
|
"loss": 7.8716,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.020532259338229487,
|
|
"grad_norm": 0.49313779491880694,
|
|
"learning_rate": 4.1074249605055293e-05,
|
|
"loss": 7.8674,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.02211166390270868,
|
|
"grad_norm": 0.34274541171590306,
|
|
"learning_rate": 4.4233807266982626e-05,
|
|
"loss": 7.8531,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.02369106846718787,
|
|
"grad_norm": 0.670783515834389,
|
|
"learning_rate": 4.739336492890995e-05,
|
|
"loss": 7.7934,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.025270473031667063,
|
|
"grad_norm": 0.3035557533772571,
|
|
"learning_rate": 5.0552922590837284e-05,
|
|
"loss": 7.7468,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.02684987759614625,
|
|
"grad_norm": 0.26164246283361264,
|
|
"learning_rate": 5.3712480252764616e-05,
|
|
"loss": 7.7665,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.028429282160625444,
|
|
"grad_norm": 0.25633175815508613,
|
|
"learning_rate": 5.687203791469194e-05,
|
|
"loss": 7.7145,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.030008686725104636,
|
|
"grad_norm": 0.6842750393577121,
|
|
"learning_rate": 6.0031595576619274e-05,
|
|
"loss": 7.6732,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.03158809128958383,
|
|
"grad_norm": 0.23328824707618634,
|
|
"learning_rate": 6.31911532385466e-05,
|
|
"loss": 7.652,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.03316749585406302,
|
|
"grad_norm": 0.25073553627568257,
|
|
"learning_rate": 6.635071090047395e-05,
|
|
"loss": 7.6357,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.03474690041854221,
|
|
"grad_norm": 0.20089019017365053,
|
|
"learning_rate": 6.951026856240127e-05,
|
|
"loss": 7.5837,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.036326304983021404,
|
|
"grad_norm": 0.3824165564128397,
|
|
"learning_rate": 7.26698262243286e-05,
|
|
"loss": 7.509,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.03790570954750059,
|
|
"grad_norm": 0.54684949274883,
|
|
"learning_rate": 7.582938388625594e-05,
|
|
"loss": 7.5753,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.03948511411197978,
|
|
"grad_norm": 0.4828149930331908,
|
|
"learning_rate": 7.898894154818326e-05,
|
|
"loss": 7.5584,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.04106451867645897,
|
|
"grad_norm": 0.4319280294615604,
|
|
"learning_rate": 8.214849921011059e-05,
|
|
"loss": 7.4968,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.042643923240938165,
|
|
"grad_norm": 0.14907634526526667,
|
|
"learning_rate": 8.530805687203793e-05,
|
|
"loss": 7.469,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.04422332780541736,
|
|
"grad_norm": 0.4441920832206972,
|
|
"learning_rate": 8.846761453396525e-05,
|
|
"loss": 7.4897,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.04580273236989655,
|
|
"grad_norm": 0.7131383198303115,
|
|
"learning_rate": 9.162717219589258e-05,
|
|
"loss": 7.4244,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.04738213693437574,
|
|
"grad_norm": 0.5231467236556445,
|
|
"learning_rate": 9.47867298578199e-05,
|
|
"loss": 7.4496,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.048961541498854934,
|
|
"grad_norm": 0.4218883424696015,
|
|
"learning_rate": 9.794628751974724e-05,
|
|
"loss": 7.4454,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.050540946063334126,
|
|
"grad_norm": 0.3258725866439942,
|
|
"learning_rate": 0.00010110584518167457,
|
|
"loss": 7.3195,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.05212035062781331,
|
|
"grad_norm": 0.37490805186704756,
|
|
"learning_rate": 0.00010426540284360189,
|
|
"loss": 7.3362,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.0536997551922925,
|
|
"grad_norm": 0.8090856015720164,
|
|
"learning_rate": 0.00010742496050552923,
|
|
"loss": 7.3105,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.055279159756771695,
|
|
"grad_norm": 0.9628260997280141,
|
|
"learning_rate": 0.00011058451816745656,
|
|
"loss": 7.2631,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.05685856432125089,
|
|
"grad_norm": 0.789911407688136,
|
|
"learning_rate": 0.00011374407582938388,
|
|
"loss": 7.2927,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.05843796888573008,
|
|
"grad_norm": 0.43613037350148726,
|
|
"learning_rate": 0.00011690363349131122,
|
|
"loss": 7.3392,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.06001737345020927,
|
|
"grad_norm": 0.43320105305691636,
|
|
"learning_rate": 0.00012006319115323855,
|
|
"loss": 7.3604,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.061596778014688464,
|
|
"grad_norm": 0.4073340900692099,
|
|
"learning_rate": 0.0001232227488151659,
|
|
"loss": 7.3057,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.06317618257916766,
|
|
"grad_norm": 1.064883590443732,
|
|
"learning_rate": 0.0001263823064770932,
|
|
"loss": 7.2429,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.06475558714364685,
|
|
"grad_norm": 0.7109842164173309,
|
|
"learning_rate": 0.00012954186413902054,
|
|
"loss": 7.2668,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.06633499170812604,
|
|
"grad_norm": 1.1073366600163383,
|
|
"learning_rate": 0.0001327014218009479,
|
|
"loss": 7.2269,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.06791439627260523,
|
|
"grad_norm": 1.060947638396007,
|
|
"learning_rate": 0.00013586097946287522,
|
|
"loss": 7.2282,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.06949380083708442,
|
|
"grad_norm": 0.8011757396098579,
|
|
"learning_rate": 0.00013902053712480254,
|
|
"loss": 7.1831,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.07107320540156362,
|
|
"grad_norm": 0.3302248285814246,
|
|
"learning_rate": 0.00014218009478672987,
|
|
"loss": 7.1975,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.07265260996604281,
|
|
"grad_norm": 0.3438470557506677,
|
|
"learning_rate": 0.0001453396524486572,
|
|
"loss": 7.2585,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.07423201453052199,
|
|
"grad_norm": 0.4087012242932606,
|
|
"learning_rate": 0.00014849921011058452,
|
|
"loss": 7.1477,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.07581141909500118,
|
|
"grad_norm": 0.36475666254042266,
|
|
"learning_rate": 0.00015165876777251187,
|
|
"loss": 7.1608,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.07739082365948037,
|
|
"grad_norm": 0.37842710558734155,
|
|
"learning_rate": 0.0001548183254344392,
|
|
"loss": 7.1792,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.07897022822395956,
|
|
"grad_norm": 0.28097584942692855,
|
|
"learning_rate": 0.00015797788309636652,
|
|
"loss": 7.1124,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.08054963278843875,
|
|
"grad_norm": 0.30296611890024583,
|
|
"learning_rate": 0.00016113744075829385,
|
|
"loss": 7.0683,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.08212903735291795,
|
|
"grad_norm": 0.26822958956440557,
|
|
"learning_rate": 0.00016429699842022117,
|
|
"loss": 7.1174,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.08370844191739714,
|
|
"grad_norm": 0.35292758498440635,
|
|
"learning_rate": 0.0001674565560821485,
|
|
"loss": 7.1111,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.08528784648187633,
|
|
"grad_norm": 0.5036068687898875,
|
|
"learning_rate": 0.00017061611374407585,
|
|
"loss": 7.1127,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.08686725104635552,
|
|
"grad_norm": 0.5445505627154091,
|
|
"learning_rate": 0.00017377567140600318,
|
|
"loss": 7.1624,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.08844665561083472,
|
|
"grad_norm": 0.46005020223145754,
|
|
"learning_rate": 0.0001769352290679305,
|
|
"loss": 7.1547,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.09002606017531391,
|
|
"grad_norm": 0.5670663056900972,
|
|
"learning_rate": 0.00018009478672985783,
|
|
"loss": 7.1144,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.0916054647397931,
|
|
"grad_norm": 0.15859858120807763,
|
|
"learning_rate": 0.00018325434439178515,
|
|
"loss": 7.0902,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.09318486930427229,
|
|
"grad_norm": 1.051807672856598,
|
|
"learning_rate": 0.00018641390205371248,
|
|
"loss": 7.0528,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.09476427386875148,
|
|
"grad_norm": 0.29508482169156447,
|
|
"learning_rate": 0.0001895734597156398,
|
|
"loss": 7.1428,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.09634367843323068,
|
|
"grad_norm": 0.16647468794562667,
|
|
"learning_rate": 0.00019273301737756716,
|
|
"loss": 7.0804,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.09792308299770987,
|
|
"grad_norm": 0.11022893704335947,
|
|
"learning_rate": 0.00019589257503949448,
|
|
"loss": 7.0636,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.09950248756218906,
|
|
"grad_norm": 0.22812614469648151,
|
|
"learning_rate": 0.0001990521327014218,
|
|
"loss": 7.0417,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.10108189212666825,
|
|
"grad_norm": 0.2046112457147709,
|
|
"learning_rate": 0.00020221169036334913,
|
|
"loss": 6.9876,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.10266129669114744,
|
|
"grad_norm": 0.2534026181514207,
|
|
"learning_rate": 0.00020537124802527646,
|
|
"loss": 7.0649,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.10424070125562662,
|
|
"grad_norm": 0.6621399961798852,
|
|
"learning_rate": 0.00020853080568720379,
|
|
"loss": 7.0734,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.10582010582010581,
|
|
"grad_norm": 0.43723259742782006,
|
|
"learning_rate": 0.00021169036334913114,
|
|
"loss": 7.0412,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.107399510384585,
|
|
"grad_norm": 1.2826871012932044,
|
|
"learning_rate": 0.00021484992101105846,
|
|
"loss": 7.0748,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.1089789149490642,
|
|
"grad_norm": 0.849524470966563,
|
|
"learning_rate": 0.0002180094786729858,
|
|
"loss": 7.084,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.11055831951354339,
|
|
"grad_norm": 0.40236389663315125,
|
|
"learning_rate": 0.00022116903633491312,
|
|
"loss": 7.0698,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.11213772407802258,
|
|
"grad_norm": 0.27246238834316205,
|
|
"learning_rate": 0.00022432859399684044,
|
|
"loss": 7.1011,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.11371712864250177,
|
|
"grad_norm": 0.30332445638169414,
|
|
"learning_rate": 0.00022748815165876777,
|
|
"loss": 7.0469,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.11529653320698097,
|
|
"grad_norm": 0.27851972139163145,
|
|
"learning_rate": 0.00023064770932069512,
|
|
"loss": 7.0559,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.11687593777146016,
|
|
"grad_norm": 2.470542225348509,
|
|
"learning_rate": 0.00023380726698262244,
|
|
"loss": 7.057,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.11845534233593935,
|
|
"grad_norm": 1.1816037580172367,
|
|
"learning_rate": 0.00023696682464454977,
|
|
"loss": 7.0296,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.12003474690041854,
|
|
"grad_norm": 0.9194065752229377,
|
|
"learning_rate": 0.0002401263823064771,
|
|
"loss": 7.0419,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.12161415146489774,
|
|
"grad_norm": 0.785676558788233,
|
|
"learning_rate": 0.00024328593996840442,
|
|
"loss": 7.0803,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.12319355602937693,
|
|
"grad_norm": 0.3542536042131285,
|
|
"learning_rate": 0.0002464454976303318,
|
|
"loss": 7.0036,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.12477296059385612,
|
|
"grad_norm": 0.30928544139513303,
|
|
"learning_rate": 0.00024960505529225907,
|
|
"loss": 7.0368,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.1263523651583353,
|
|
"grad_norm": 0.6049027454231509,
|
|
"learning_rate": 0.0002527646129541864,
|
|
"loss": 7.0539,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.1279317697228145,
|
|
"grad_norm": 0.2633532003308791,
|
|
"learning_rate": 0.0002559241706161137,
|
|
"loss": 7.0535,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.1295111742872937,
|
|
"grad_norm": 0.6449761311297018,
|
|
"learning_rate": 0.0002590837282780411,
|
|
"loss": 7.0584,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.1310905788517729,
|
|
"grad_norm": 0.8176988911712082,
|
|
"learning_rate": 0.00026224328593996843,
|
|
"loss": 7.0499,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.13266998341625208,
|
|
"grad_norm": 1.0392572109016087,
|
|
"learning_rate": 0.0002654028436018958,
|
|
"loss": 7.0109,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.13424938798073127,
|
|
"grad_norm": 0.2934325636821357,
|
|
"learning_rate": 0.0002685624012638231,
|
|
"loss": 7.0422,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.13582879254521046,
|
|
"grad_norm": 0.35119595809367254,
|
|
"learning_rate": 0.00027172195892575043,
|
|
"loss": 6.9462,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.13740819710968966,
|
|
"grad_norm": 0.6074223601119809,
|
|
"learning_rate": 0.00027488151658767773,
|
|
"loss": 6.9556,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.13898760167416885,
|
|
"grad_norm": 0.7676878495406972,
|
|
"learning_rate": 0.0002780410742496051,
|
|
"loss": 6.9101,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.14056700623864804,
|
|
"grad_norm": 0.9692798944709154,
|
|
"learning_rate": 0.0002812006319115324,
|
|
"loss": 6.9324,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.14214641080312723,
|
|
"grad_norm": 0.44757701778278786,
|
|
"learning_rate": 0.00028436018957345974,
|
|
"loss": 6.938,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.14372581536760642,
|
|
"grad_norm": 0.7851047359753064,
|
|
"learning_rate": 0.00028751974723538703,
|
|
"loss": 6.9853,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.14530521993208562,
|
|
"grad_norm": 0.39761848688780116,
|
|
"learning_rate": 0.0002906793048973144,
|
|
"loss": 6.9224,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.1468846244965648,
|
|
"grad_norm": 0.29640850780132755,
|
|
"learning_rate": 0.0002938388625592417,
|
|
"loss": 6.9306,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.14846402906104397,
|
|
"grad_norm": 1.0532977019610126,
|
|
"learning_rate": 0.00029699842022116904,
|
|
"loss": 6.8812,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.15004343362552316,
|
|
"grad_norm": 1.7205774722673408,
|
|
"learning_rate": 0.0003001579778830964,
|
|
"loss": 6.8913,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.15162283819000236,
|
|
"grad_norm": 0.5666132924088377,
|
|
"learning_rate": 0.00030331753554502374,
|
|
"loss": 6.974,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.15320224275448155,
|
|
"grad_norm": 1.6059207113992533,
|
|
"learning_rate": 0.00030647709320695104,
|
|
"loss": 6.9365,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.15478164731896074,
|
|
"grad_norm": 0.8633831039844193,
|
|
"learning_rate": 0.0003096366508688784,
|
|
"loss": 6.9206,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.15636105188343993,
|
|
"grad_norm": 0.6843817707668207,
|
|
"learning_rate": 0.0003127962085308057,
|
|
"loss": 6.9343,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.15794045644791913,
|
|
"grad_norm": 1.154498970933288,
|
|
"learning_rate": 0.00031595576619273305,
|
|
"loss": 6.8535,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.15951986101239832,
|
|
"grad_norm": 1.044221506843542,
|
|
"learning_rate": 0.00031911532385466034,
|
|
"loss": 6.8448,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.1610992655768775,
|
|
"grad_norm": 0.9026359138753273,
|
|
"learning_rate": 0.0003222748815165877,
|
|
"loss": 6.883,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.1626786701413567,
|
|
"grad_norm": 0.7818431127658798,
|
|
"learning_rate": 0.000325434439178515,
|
|
"loss": 6.8274,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.1642580747058359,
|
|
"grad_norm": 0.3038148153132765,
|
|
"learning_rate": 0.00032859399684044235,
|
|
"loss": 6.8275,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.16583747927031509,
|
|
"grad_norm": 0.5514862335019367,
|
|
"learning_rate": 0.00033175355450236965,
|
|
"loss": 6.7956,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.16741688383479428,
|
|
"grad_norm": 0.30623416454852226,
|
|
"learning_rate": 0.000334913112164297,
|
|
"loss": 6.7252,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.16899628839927347,
|
|
"grad_norm": 0.2304880515847455,
|
|
"learning_rate": 0.0003380726698262243,
|
|
"loss": 6.7551,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.17057569296375266,
|
|
"grad_norm": 0.3433961894230557,
|
|
"learning_rate": 0.0003412322274881517,
|
|
"loss": 6.7272,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.17215509752823185,
|
|
"grad_norm": 0.2939590285931192,
|
|
"learning_rate": 0.000344391785150079,
|
|
"loss": 6.7187,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.17373450209271105,
|
|
"grad_norm": 0.5364191748296011,
|
|
"learning_rate": 0.00034755134281200636,
|
|
"loss": 6.7198,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.17531390665719024,
|
|
"grad_norm": 0.8371082121345393,
|
|
"learning_rate": 0.00035071090047393365,
|
|
"loss": 6.7018,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.17689331122166943,
|
|
"grad_norm": 1.976191367544714,
|
|
"learning_rate": 0.000353870458135861,
|
|
"loss": 6.7703,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.17847271578614862,
|
|
"grad_norm": 0.9401625695151546,
|
|
"learning_rate": 0.0003570300157977883,
|
|
"loss": 6.6485,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.18005212035062781,
|
|
"grad_norm": 0.549662511039272,
|
|
"learning_rate": 0.00036018957345971566,
|
|
"loss": 6.5869,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.181631524915107,
|
|
"grad_norm": 0.6162453796965656,
|
|
"learning_rate": 0.00036334913112164296,
|
|
"loss": 6.5924,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.1832109294795862,
|
|
"grad_norm": 0.7924029515967493,
|
|
"learning_rate": 0.0003665086887835703,
|
|
"loss": 6.5347,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.1847903340440654,
|
|
"grad_norm": 0.6839719475751052,
|
|
"learning_rate": 0.0003696682464454976,
|
|
"loss": 6.4657,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.18636973860854458,
|
|
"grad_norm": 0.4801564500944708,
|
|
"learning_rate": 0.00037282780410742496,
|
|
"loss": 6.4492,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.18794914317302377,
|
|
"grad_norm": 0.33876622201501777,
|
|
"learning_rate": 0.00037598736176935226,
|
|
"loss": 6.362,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.18952854773750297,
|
|
"grad_norm": 0.9890962221430388,
|
|
"learning_rate": 0.0003791469194312796,
|
|
"loss": 6.3882,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.19110795230198216,
|
|
"grad_norm": 1.7599819035026254,
|
|
"learning_rate": 0.00038230647709320696,
|
|
"loss": 6.4227,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.19268735686646135,
|
|
"grad_norm": 0.5869579130945853,
|
|
"learning_rate": 0.0003854660347551343,
|
|
"loss": 6.4053,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.19426676143094054,
|
|
"grad_norm": 0.45551072874042825,
|
|
"learning_rate": 0.0003886255924170616,
|
|
"loss": 6.2954,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.19584616599541974,
|
|
"grad_norm": 0.309190315359063,
|
|
"learning_rate": 0.00039178515007898897,
|
|
"loss": 6.1529,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.19742557055989893,
|
|
"grad_norm": 0.5257212216446493,
|
|
"learning_rate": 0.00039494470774091627,
|
|
"loss": 6.2696,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.19900497512437812,
|
|
"grad_norm": 0.36743671772426867,
|
|
"learning_rate": 0.0003981042654028436,
|
|
"loss": 6.256,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.2005843796888573,
|
|
"grad_norm": 0.4249015310170307,
|
|
"learning_rate": 0.0004012638230647709,
|
|
"loss": 6.1581,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.2021637842533365,
|
|
"grad_norm": 0.3543328965539145,
|
|
"learning_rate": 0.00040442338072669827,
|
|
"loss": 6.1331,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.2037431888178157,
|
|
"grad_norm": 0.3595495143147937,
|
|
"learning_rate": 0.00040758293838862557,
|
|
"loss": 6.0677,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.2053225933822949,
|
|
"grad_norm": 0.318573179267392,
|
|
"learning_rate": 0.0004107424960505529,
|
|
"loss": 6.126,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.20690199794677408,
|
|
"grad_norm": 1.4164004013583242,
|
|
"learning_rate": 0.0004139020537124802,
|
|
"loss": 6.0727,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.20848140251125324,
|
|
"grad_norm": 0.8532063740196797,
|
|
"learning_rate": 0.00041706161137440757,
|
|
"loss": 6.1032,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.21006080707573244,
|
|
"grad_norm": 0.6889050112886497,
|
|
"learning_rate": 0.0004202211690363349,
|
|
"loss": 5.9814,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.21164021164021163,
|
|
"grad_norm": 0.6321713467797891,
|
|
"learning_rate": 0.0004233807266982623,
|
|
"loss": 6.1128,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.21321961620469082,
|
|
"grad_norm": 0.6434857024537683,
|
|
"learning_rate": 0.0004265402843601896,
|
|
"loss": 5.93,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.21479902076917,
|
|
"grad_norm": 0.6137569995098859,
|
|
"learning_rate": 0.00042969984202211693,
|
|
"loss": 5.8829,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.2163784253336492,
|
|
"grad_norm": 0.38408385420051877,
|
|
"learning_rate": 0.0004328593996840442,
|
|
"loss": 5.8576,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.2179578298981284,
|
|
"grad_norm": 0.634119528990219,
|
|
"learning_rate": 0.0004360189573459716,
|
|
"loss": 5.8313,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.2195372344626076,
|
|
"grad_norm": 0.918482316143869,
|
|
"learning_rate": 0.0004391785150078989,
|
|
"loss": 5.8946,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.22111663902708678,
|
|
"grad_norm": 0.7907809934880858,
|
|
"learning_rate": 0.00044233807266982623,
|
|
"loss": 5.834,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.22269604359156597,
|
|
"grad_norm": 0.46370630571448646,
|
|
"learning_rate": 0.00044549763033175353,
|
|
"loss": 5.7829,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.22427544815604517,
|
|
"grad_norm": 0.5823074487510479,
|
|
"learning_rate": 0.0004486571879936809,
|
|
"loss": 5.7058,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.22585485272052436,
|
|
"grad_norm": 0.48774049732384694,
|
|
"learning_rate": 0.00045181674565560823,
|
|
"loss": 5.686,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.22743425728500355,
|
|
"grad_norm": 0.35212117236149904,
|
|
"learning_rate": 0.00045497630331753553,
|
|
"loss": 5.7281,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.22901366184948274,
|
|
"grad_norm": 0.43483984405404036,
|
|
"learning_rate": 0.0004581358609794629,
|
|
"loss": 5.6102,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.23059306641396193,
|
|
"grad_norm": 0.6591526873658319,
|
|
"learning_rate": 0.00046129541864139024,
|
|
"loss": 5.739,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.23217247097844113,
|
|
"grad_norm": 0.49546415150281264,
|
|
"learning_rate": 0.0004644549763033176,
|
|
"loss": 5.6577,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.23375187554292032,
|
|
"grad_norm": 0.5496305910361383,
|
|
"learning_rate": 0.0004676145339652449,
|
|
"loss": 5.6056,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.2353312801073995,
|
|
"grad_norm": 0.3473999093942096,
|
|
"learning_rate": 0.00047077409162717224,
|
|
"loss": 5.5325,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.2369106846718787,
|
|
"grad_norm": 0.5652797885699842,
|
|
"learning_rate": 0.00047393364928909954,
|
|
"loss": 5.5865,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.2384900892363579,
|
|
"grad_norm": 0.1869666229929509,
|
|
"learning_rate": 0.0004770932069510269,
|
|
"loss": 5.5449,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.24006949380083709,
|
|
"grad_norm": 0.8352438525883608,
|
|
"learning_rate": 0.0004802527646129542,
|
|
"loss": 5.5177,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.24164889836531628,
|
|
"grad_norm": 0.4945844539827825,
|
|
"learning_rate": 0.00048341232227488154,
|
|
"loss": 5.525,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.24322830292979547,
|
|
"grad_norm": 0.44884298966390257,
|
|
"learning_rate": 0.00048657187993680884,
|
|
"loss": 5.5426,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.24480770749427466,
|
|
"grad_norm": 0.4629116737725799,
|
|
"learning_rate": 0.0004897314375987362,
|
|
"loss": 5.5327,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.24638711205875385,
|
|
"grad_norm": 0.46472967641352214,
|
|
"learning_rate": 0.0004928909952606635,
|
|
"loss": 5.4677,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.24796651662323305,
|
|
"grad_norm": 0.44982438719700835,
|
|
"learning_rate": 0.0004960505529225908,
|
|
"loss": 5.4653,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.24954592118771224,
|
|
"grad_norm": 0.3514112193707262,
|
|
"learning_rate": 0.0004992101105845181,
|
|
"loss": 5.4494,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.25112532575219143,
|
|
"grad_norm": 0.5242266351187057,
|
|
"learning_rate": 0.0005023696682464455,
|
|
"loss": 5.4762,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.2527047303166706,
|
|
"grad_norm": 0.5584823092530848,
|
|
"learning_rate": 0.0005055292259083729,
|
|
"loss": 5.3685,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.2542841348811498,
|
|
"grad_norm": 0.33757365754502244,
|
|
"learning_rate": 0.0005086887835703001,
|
|
"loss": 5.3335,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.255863539445629,
|
|
"grad_norm": 0.5451094689520501,
|
|
"learning_rate": 0.0005118483412322274,
|
|
"loss": 5.3352,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.2574429440101082,
|
|
"grad_norm": 0.6200864058068326,
|
|
"learning_rate": 0.0005150078988941548,
|
|
"loss": 5.2385,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.2590223485745874,
|
|
"grad_norm": 0.4764076847440562,
|
|
"learning_rate": 0.0005181674565560822,
|
|
"loss": 5.2983,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.2606017531390666,
|
|
"grad_norm": 0.5899460488837064,
|
|
"learning_rate": 0.0005213270142180095,
|
|
"loss": 5.316,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.2621811577035458,
|
|
"grad_norm": 0.3984879097129385,
|
|
"learning_rate": 0.0005244865718799369,
|
|
"loss": 5.2552,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.26376056226802497,
|
|
"grad_norm": 0.35376936484079524,
|
|
"learning_rate": 0.0005276461295418642,
|
|
"loss": 5.1943,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.26533996683250416,
|
|
"grad_norm": 0.24093631193083778,
|
|
"learning_rate": 0.0005308056872037916,
|
|
"loss": 5.2637,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.26691937139698335,
|
|
"grad_norm": 0.49391606311149877,
|
|
"learning_rate": 0.0005339652448657188,
|
|
"loss": 5.4132,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.26849877596146254,
|
|
"grad_norm": 0.43395300336607834,
|
|
"learning_rate": 0.0005371248025276462,
|
|
"loss": 5.2483,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.27007818052594174,
|
|
"grad_norm": 0.46447905290775454,
|
|
"learning_rate": 0.0005402843601895735,
|
|
"loss": 5.1954,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.2716575850904209,
|
|
"grad_norm": 0.2886589835076223,
|
|
"learning_rate": 0.0005434439178515009,
|
|
"loss": 5.2153,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.2732369896549001,
|
|
"grad_norm": 0.36161004026508903,
|
|
"learning_rate": 0.0005466034755134281,
|
|
"loss": 5.2296,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.2748163942193793,
|
|
"grad_norm": 0.36114207768892564,
|
|
"learning_rate": 0.0005497630331753555,
|
|
"loss": 5.2661,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.2763957987838585,
|
|
"grad_norm": 0.4311436524552363,
|
|
"learning_rate": 0.0005529225908372828,
|
|
"loss": 5.2284,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.2779752033483377,
|
|
"grad_norm": 0.5724727528073159,
|
|
"learning_rate": 0.0005560821484992102,
|
|
"loss": 5.279,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.2795546079128169,
|
|
"grad_norm": 0.4022955239127195,
|
|
"learning_rate": 0.0005592417061611374,
|
|
"loss": 5.1918,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.2811340124772961,
|
|
"grad_norm": 0.3832205002594842,
|
|
"learning_rate": 0.0005624012638230648,
|
|
"loss": 5.0748,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.2827134170417753,
|
|
"grad_norm": 0.42672311680939823,
|
|
"learning_rate": 0.0005655608214849921,
|
|
"loss": 5.1771,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.28429282160625446,
|
|
"grad_norm": 0.3416870055878534,
|
|
"learning_rate": 0.0005687203791469195,
|
|
"loss": 5.0845,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.28587222617073366,
|
|
"grad_norm": 0.42324854308877585,
|
|
"learning_rate": 0.0005718799368088467,
|
|
"loss": 5.0715,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.28745163073521285,
|
|
"grad_norm": 0.6012027315352575,
|
|
"learning_rate": 0.0005750394944707741,
|
|
"loss": 5.1826,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.28903103529969204,
|
|
"grad_norm": 0.5218406433884802,
|
|
"learning_rate": 0.0005781990521327014,
|
|
"loss": 5.1133,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.29061043986417123,
|
|
"grad_norm": 0.4149110767186936,
|
|
"learning_rate": 0.0005813586097946288,
|
|
"loss": 5.0224,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.2921898444286504,
|
|
"grad_norm": 0.42354283670841264,
|
|
"learning_rate": 0.000584518167456556,
|
|
"loss": 5.1543,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.2937692489931296,
|
|
"grad_norm": 0.5334352394067117,
|
|
"learning_rate": 0.0005876777251184834,
|
|
"loss": 5.0157,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.2953486535576088,
|
|
"grad_norm": 0.4450401762543175,
|
|
"learning_rate": 0.0005908372827804107,
|
|
"loss": 5.0368,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.29692805812208795,
|
|
"grad_norm": 0.45932586952217247,
|
|
"learning_rate": 0.0005939968404423381,
|
|
"loss": 5.0463,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.29850746268656714,
|
|
"grad_norm": 0.42588394451324696,
|
|
"learning_rate": 0.0005971563981042653,
|
|
"loss": 5.0819,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.30008686725104633,
|
|
"grad_norm": 0.6656788426154296,
|
|
"learning_rate": 0.0006003159557661928,
|
|
"loss": 5.1731,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.3016662718155255,
|
|
"grad_norm": 0.47719556791409956,
|
|
"learning_rate": 0.0006034755134281201,
|
|
"loss": 5.0238,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.3032456763800047,
|
|
"grad_norm": 0.5611432111600015,
|
|
"learning_rate": 0.0006066350710900475,
|
|
"loss": 4.9896,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.3048250809444839,
|
|
"grad_norm": 0.6143519827313882,
|
|
"learning_rate": 0.0006097946287519747,
|
|
"loss": 5.0388,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.3064044855089631,
|
|
"grad_norm": 0.4117370891557286,
|
|
"learning_rate": 0.0006129541864139021,
|
|
"loss": 4.9773,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.3079838900734423,
|
|
"grad_norm": 0.4040255243863166,
|
|
"learning_rate": 0.0006161137440758294,
|
|
"loss": 4.9743,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.3095632946379215,
|
|
"grad_norm": 0.3685024774727212,
|
|
"learning_rate": 0.0006192733017377568,
|
|
"loss": 4.9448,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.3111426992024007,
|
|
"grad_norm": 0.5079257507748496,
|
|
"learning_rate": 0.000622432859399684,
|
|
"loss": 4.9968,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.31272210376687987,
|
|
"grad_norm": 0.41938029073526095,
|
|
"learning_rate": 0.0006255924170616114,
|
|
"loss": 4.8603,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.31430150833135906,
|
|
"grad_norm": 0.40494473054595437,
|
|
"learning_rate": 0.0006287519747235387,
|
|
"loss": 5.0043,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.31588091289583825,
|
|
"grad_norm": 0.38506688078551043,
|
|
"learning_rate": 0.0006319115323854661,
|
|
"loss": 4.8982,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"grad_norm": 0.39620636828274935,
|
|
"learning_rate": 0.0006350710900473933,
|
|
"loss": 4.9752,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.31903972202479663,
|
|
"grad_norm": 0.4173437016756873,
|
|
"learning_rate": 0.0006382306477093207,
|
|
"loss": 4.88,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.3206191265892758,
|
|
"grad_norm": 0.512317814062129,
|
|
"learning_rate": 0.000641390205371248,
|
|
"loss": 4.9377,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.322198531153755,
|
|
"grad_norm": 0.3942980764517883,
|
|
"learning_rate": 0.0006445497630331754,
|
|
"loss": 4.7866,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.3237779357182342,
|
|
"grad_norm": 0.39301676577873923,
|
|
"learning_rate": 0.0006477093206951026,
|
|
"loss": 4.8813,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.3253573402827134,
|
|
"grad_norm": 0.39096154480289047,
|
|
"learning_rate": 0.00065086887835703,
|
|
"loss": 4.927,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.3269367448471926,
|
|
"grad_norm": 0.5660222446285443,
|
|
"learning_rate": 0.0006540284360189573,
|
|
"loss": 4.9219,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.3285161494116718,
|
|
"grad_norm": 0.2968407231102146,
|
|
"learning_rate": 0.0006571879936808847,
|
|
"loss": 4.8572,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.330095553976151,
|
|
"grad_norm": 0.33852019934484984,
|
|
"learning_rate": 0.0006603475513428119,
|
|
"loss": 4.8026,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.33167495854063017,
|
|
"grad_norm": 0.275317165863335,
|
|
"learning_rate": 0.0006635071090047393,
|
|
"loss": 4.7798,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.33325436310510936,
|
|
"grad_norm": 0.36017928511621944,
|
|
"learning_rate": 0.0006666666666666666,
|
|
"loss": 4.9384,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.33483376766958856,
|
|
"grad_norm": 0.37632772082071475,
|
|
"learning_rate": 0.000669826224328594,
|
|
"loss": 4.7871,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.33641317223406775,
|
|
"grad_norm": 0.37278569781126064,
|
|
"learning_rate": 0.0006729857819905212,
|
|
"loss": 4.8438,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.33799257679854694,
|
|
"grad_norm": 0.48509481516036007,
|
|
"learning_rate": 0.0006761453396524486,
|
|
"loss": 4.7854,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.33957198136302613,
|
|
"grad_norm": 0.3689752437680978,
|
|
"learning_rate": 0.000679304897314376,
|
|
"loss": 4.7908,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.3411513859275053,
|
|
"grad_norm": 0.42509360378032895,
|
|
"learning_rate": 0.0006824644549763034,
|
|
"loss": 4.8053,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.3427307904919845,
|
|
"grad_norm": 0.3531227024868157,
|
|
"learning_rate": 0.0006856240126382308,
|
|
"loss": 4.7319,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.3443101950564637,
|
|
"grad_norm": 0.4319768745377021,
|
|
"learning_rate": 0.000688783570300158,
|
|
"loss": 4.771,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.3458895996209429,
|
|
"grad_norm": 0.5218994616666175,
|
|
"learning_rate": 0.0006919431279620854,
|
|
"loss": 4.6886,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.3474690041854221,
|
|
"grad_norm": 0.40081190957648,
|
|
"learning_rate": 0.0006951026856240127,
|
|
"loss": 4.8512,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.3490484087499013,
|
|
"grad_norm": 0.40384089733876094,
|
|
"learning_rate": 0.0006982622432859401,
|
|
"loss": 4.7655,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.3506278133143805,
|
|
"grad_norm": 0.3990224997671872,
|
|
"learning_rate": 0.0007014218009478673,
|
|
"loss": 4.7179,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.35220721787885967,
|
|
"grad_norm": 0.3671177954599768,
|
|
"learning_rate": 0.0007045813586097947,
|
|
"loss": 4.7594,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.35378662244333886,
|
|
"grad_norm": 0.32592358544879374,
|
|
"learning_rate": 0.000707740916271722,
|
|
"loss": 4.8065,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.35536602700781805,
|
|
"grad_norm": 0.27989667046997696,
|
|
"learning_rate": 0.0007109004739336494,
|
|
"loss": 4.7348,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.35694543157229724,
|
|
"grad_norm": 0.2726805249398657,
|
|
"learning_rate": 0.0007140600315955766,
|
|
"loss": 4.7251,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.35852483613677644,
|
|
"grad_norm": 0.26271169229037056,
|
|
"learning_rate": 0.000717219589257504,
|
|
"loss": 4.6697,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.36010424070125563,
|
|
"grad_norm": 0.24966337615323878,
|
|
"learning_rate": 0.0007203791469194313,
|
|
"loss": 4.6451,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.3616836452657348,
|
|
"grad_norm": 0.34651257210740116,
|
|
"learning_rate": 0.0007235387045813587,
|
|
"loss": 4.743,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.363263049830214,
|
|
"grad_norm": 0.30529045148203326,
|
|
"learning_rate": 0.0007266982622432859,
|
|
"loss": 4.6544,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.3648424543946932,
|
|
"grad_norm": 0.29516957256046145,
|
|
"learning_rate": 0.0007298578199052133,
|
|
"loss": 4.6314,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.3664218589591724,
|
|
"grad_norm": 0.37641801638716,
|
|
"learning_rate": 0.0007330173775671406,
|
|
"loss": 4.6799,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.3680012635236516,
|
|
"grad_norm": 0.581499758105289,
|
|
"learning_rate": 0.000736176935229068,
|
|
"loss": 4.6155,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.3695806680881308,
|
|
"grad_norm": 0.3303116674122958,
|
|
"learning_rate": 0.0007393364928909952,
|
|
"loss": 4.6535,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.37116007265261,
|
|
"grad_norm": 0.4152780156861754,
|
|
"learning_rate": 0.0007424960505529226,
|
|
"loss": 4.6205,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.37273947721708917,
|
|
"grad_norm": 0.5075025229078507,
|
|
"learning_rate": 0.0007456556082148499,
|
|
"loss": 4.7402,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.37431888178156836,
|
|
"grad_norm": 0.4168452318847694,
|
|
"learning_rate": 0.0007488151658767773,
|
|
"loss": 4.6322,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.37589828634604755,
|
|
"grad_norm": 0.35699756058795973,
|
|
"learning_rate": 0.0007519747235387045,
|
|
"loss": 4.5663,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.37747769091052674,
|
|
"grad_norm": 0.43478301194689534,
|
|
"learning_rate": 0.0007551342812006319,
|
|
"loss": 4.6439,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.37905709547500593,
|
|
"grad_norm": 0.44362147672336705,
|
|
"learning_rate": 0.0007582938388625592,
|
|
"loss": 4.6466,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3806365000394851,
|
|
"grad_norm": 0.5273983527555247,
|
|
"learning_rate": 0.0007614533965244867,
|
|
"loss": 4.5934,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.3822159046039643,
|
|
"grad_norm": 0.4386346074087536,
|
|
"learning_rate": 0.0007646129541864139,
|
|
"loss": 4.5789,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.3837953091684435,
|
|
"grad_norm": 0.525664691716359,
|
|
"learning_rate": 0.0007677725118483413,
|
|
"loss": 4.6282,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.3853747137329227,
|
|
"grad_norm": 0.5991296460212412,
|
|
"learning_rate": 0.0007709320695102686,
|
|
"loss": 4.5133,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.3869541182974019,
|
|
"grad_norm": 0.3885180787223512,
|
|
"learning_rate": 0.000774091627172196,
|
|
"loss": 4.5911,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.3885335228618811,
|
|
"grad_norm": 0.2773505625109938,
|
|
"learning_rate": 0.0007772511848341232,
|
|
"loss": 4.543,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.3901129274263603,
|
|
"grad_norm": 0.27789170229758464,
|
|
"learning_rate": 0.0007804107424960506,
|
|
"loss": 4.6806,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.39169233199083947,
|
|
"grad_norm": 0.3647966903511207,
|
|
"learning_rate": 0.0007835703001579779,
|
|
"loss": 4.5042,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.39327173655531866,
|
|
"grad_norm": 0.3325733552089913,
|
|
"learning_rate": 0.0007867298578199053,
|
|
"loss": 4.6133,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.39485114111979785,
|
|
"grad_norm": 0.29407283332852896,
|
|
"learning_rate": 0.0007898894154818325,
|
|
"loss": 4.546,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.39643054568427705,
|
|
"grad_norm": 0.3224036612986526,
|
|
"learning_rate": 0.0007930489731437599,
|
|
"loss": 4.4823,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.39800995024875624,
|
|
"grad_norm": 0.4396548377930201,
|
|
"learning_rate": 0.0007962085308056872,
|
|
"loss": 4.4521,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.39958935481323543,
|
|
"grad_norm": 0.4520622366982296,
|
|
"learning_rate": 0.0007993680884676146,
|
|
"loss": 4.4983,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.4011687593777146,
|
|
"grad_norm": 0.5067471099692891,
|
|
"learning_rate": 0.0008025276461295418,
|
|
"loss": 4.6509,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.4027481639421938,
|
|
"grad_norm": 0.44983652286607784,
|
|
"learning_rate": 0.0008056872037914692,
|
|
"loss": 4.5551,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.404327568506673,
|
|
"grad_norm": 0.2744857789721565,
|
|
"learning_rate": 0.0008088467614533965,
|
|
"loss": 4.4722,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.4059069730711522,
|
|
"grad_norm": 0.3090823230246799,
|
|
"learning_rate": 0.0008120063191153239,
|
|
"loss": 4.5588,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.4074863776356314,
|
|
"grad_norm": 0.22056016603549802,
|
|
"learning_rate": 0.0008151658767772511,
|
|
"loss": 4.5044,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.4090657822001106,
|
|
"grad_norm": 0.22668335200552153,
|
|
"learning_rate": 0.0008183254344391785,
|
|
"loss": 4.4704,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.4106451867645898,
|
|
"grad_norm": 0.2528435325303638,
|
|
"learning_rate": 0.0008214849921011058,
|
|
"loss": 4.4797,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.41222459132906897,
|
|
"grad_norm": 0.3199211131575819,
|
|
"learning_rate": 0.0008246445497630332,
|
|
"loss": 4.4488,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.41380399589354816,
|
|
"grad_norm": 0.2841232699773147,
|
|
"learning_rate": 0.0008278041074249604,
|
|
"loss": 4.386,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.4153834004580273,
|
|
"grad_norm": 0.3289313223734094,
|
|
"learning_rate": 0.0008309636650868878,
|
|
"loss": 4.3735,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.4169628050225065,
|
|
"grad_norm": 0.5061174703186038,
|
|
"learning_rate": 0.0008341232227488151,
|
|
"loss": 4.501,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.4185422095869857,
|
|
"grad_norm": 0.31956130549869527,
|
|
"learning_rate": 0.0008372827804107425,
|
|
"loss": 4.4453,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.4201216141514649,
|
|
"grad_norm": 0.37003410488030913,
|
|
"learning_rate": 0.0008404423380726698,
|
|
"loss": 4.4463,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.42170101871594406,
|
|
"grad_norm": 0.3001295975880741,
|
|
"learning_rate": 0.0008436018957345972,
|
|
"loss": 4.4065,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.42328042328042326,
|
|
"grad_norm": 0.3309371435836036,
|
|
"learning_rate": 0.0008467614533965246,
|
|
"loss": 4.3952,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.42485982784490245,
|
|
"grad_norm": 0.3401308584471871,
|
|
"learning_rate": 0.0008499210110584519,
|
|
"loss": 4.3779,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.42643923240938164,
|
|
"grad_norm": 0.3730128820839676,
|
|
"learning_rate": 0.0008530805687203792,
|
|
"loss": 4.4364,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.42801863697386083,
|
|
"grad_norm": 0.40635210718596704,
|
|
"learning_rate": 0.0008562401263823065,
|
|
"loss": 4.4198,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.42959804153834,
|
|
"grad_norm": 0.3104368315190389,
|
|
"learning_rate": 0.0008593996840442339,
|
|
"loss": 4.442,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.4311774461028192,
|
|
"grad_norm": 0.2756437108872531,
|
|
"learning_rate": 0.0008625592417061612,
|
|
"loss": 4.3521,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.4327568506672984,
|
|
"grad_norm": 0.3209507950932325,
|
|
"learning_rate": 0.0008657187993680885,
|
|
"loss": 4.3763,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.4343362552317776,
|
|
"grad_norm": 0.3513469599362552,
|
|
"learning_rate": 0.0008688783570300158,
|
|
"loss": 4.4113,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.4359156597962568,
|
|
"grad_norm": 0.4580040169664537,
|
|
"learning_rate": 0.0008720379146919432,
|
|
"loss": 4.3868,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.437495064360736,
|
|
"grad_norm": 0.29184114117723914,
|
|
"learning_rate": 0.0008751974723538705,
|
|
"loss": 4.3205,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.4390744689252152,
|
|
"grad_norm": 0.3255059541417882,
|
|
"learning_rate": 0.0008783570300157978,
|
|
"loss": 4.4277,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.44065387348969437,
|
|
"grad_norm": 0.23731895358980856,
|
|
"learning_rate": 0.0008815165876777251,
|
|
"loss": 4.3787,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.44223327805417356,
|
|
"grad_norm": 0.25517686293566755,
|
|
"learning_rate": 0.0008846761453396525,
|
|
"loss": 4.3998,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.44381268261865275,
|
|
"grad_norm": 0.38653219284891066,
|
|
"learning_rate": 0.0008878357030015798,
|
|
"loss": 4.3319,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.44539208718313195,
|
|
"grad_norm": 0.4941047718072808,
|
|
"learning_rate": 0.0008909952606635071,
|
|
"loss": 4.3752,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.44697149174761114,
|
|
"grad_norm": 0.27392813184040077,
|
|
"learning_rate": 0.0008941548183254344,
|
|
"loss": 4.3326,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.44855089631209033,
|
|
"grad_norm": 0.5891785129930696,
|
|
"learning_rate": 0.0008973143759873618,
|
|
"loss": 4.4047,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.4501303008765695,
|
|
"grad_norm": 0.3774727501393342,
|
|
"learning_rate": 0.0009004739336492891,
|
|
"loss": 4.3729,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.4517097054410487,
|
|
"grad_norm": 0.3695157989637981,
|
|
"learning_rate": 0.0009036334913112165,
|
|
"loss": 4.3076,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.4532891100055279,
|
|
"grad_norm": 0.24729477197612165,
|
|
"learning_rate": 0.0009067930489731437,
|
|
"loss": 4.3055,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.4548685145700071,
|
|
"grad_norm": 0.2856405701879058,
|
|
"learning_rate": 0.0009099526066350711,
|
|
"loss": 4.2279,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.4564479191344863,
|
|
"grad_norm": 0.3946323745321831,
|
|
"learning_rate": 0.0009131121642969984,
|
|
"loss": 4.3059,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.4580273236989655,
|
|
"grad_norm": 0.2239737721505975,
|
|
"learning_rate": 0.0009162717219589258,
|
|
"loss": 4.3098,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.4596067282634447,
|
|
"grad_norm": 0.28758656187518616,
|
|
"learning_rate": 0.000919431279620853,
|
|
"loss": 4.3616,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.46118613282792387,
|
|
"grad_norm": 0.4034440744665382,
|
|
"learning_rate": 0.0009225908372827805,
|
|
"loss": 4.2732,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.46276553739240306,
|
|
"grad_norm": 0.3297059692259955,
|
|
"learning_rate": 0.0009257503949447078,
|
|
"loss": 4.3945,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.46434494195688225,
|
|
"grad_norm": 0.3748529466334708,
|
|
"learning_rate": 0.0009289099526066352,
|
|
"loss": 4.303,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.46592434652136144,
|
|
"grad_norm": 0.42040543622175475,
|
|
"learning_rate": 0.0009320695102685624,
|
|
"loss": 4.2861,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.46750375108584064,
|
|
"grad_norm": 0.27875315582903953,
|
|
"learning_rate": 0.0009352290679304898,
|
|
"loss": 4.2853,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.4690831556503198,
|
|
"grad_norm": 0.3086495849332195,
|
|
"learning_rate": 0.0009383886255924171,
|
|
"loss": 4.232,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.470662560214799,
|
|
"grad_norm": 0.2553168340779991,
|
|
"learning_rate": 0.0009415481832543445,
|
|
"loss": 4.2786,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.4722419647792782,
|
|
"grad_norm": 0.4119881398779856,
|
|
"learning_rate": 0.0009447077409162717,
|
|
"loss": 4.3109,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.4738213693437574,
|
|
"grad_norm": 0.24709135378680736,
|
|
"learning_rate": 0.0009478672985781991,
|
|
"loss": 4.2491,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.4754007739082366,
|
|
"grad_norm": 0.25124585986886755,
|
|
"learning_rate": 0.0009510268562401264,
|
|
"loss": 4.2701,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.4769801784727158,
|
|
"grad_norm": 0.2542704936390731,
|
|
"learning_rate": 0.0009541864139020538,
|
|
"loss": 4.2497,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.478559583037195,
|
|
"grad_norm": 0.5057204600813832,
|
|
"learning_rate": 0.000957345971563981,
|
|
"loss": 4.252,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.48013898760167417,
|
|
"grad_norm": 0.3214147081649884,
|
|
"learning_rate": 0.0009605055292259084,
|
|
"loss": 4.3211,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.48171839216615336,
|
|
"grad_norm": 0.3325568221215968,
|
|
"learning_rate": 0.0009636650868878357,
|
|
"loss": 4.3235,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.48329779673063256,
|
|
"grad_norm": 0.35194382502241867,
|
|
"learning_rate": 0.0009668246445497631,
|
|
"loss": 4.2069,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.48487720129511175,
|
|
"grad_norm": 0.3232562671913541,
|
|
"learning_rate": 0.0009699842022116903,
|
|
"loss": 4.315,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.48645660585959094,
|
|
"grad_norm": 0.3917725392029616,
|
|
"learning_rate": 0.0009731437598736177,
|
|
"loss": 4.2047,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.48803601042407013,
|
|
"grad_norm": 0.3564362217383263,
|
|
"learning_rate": 0.000976303317535545,
|
|
"loss": 4.1988,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.4896154149885493,
|
|
"grad_norm": 0.43236466336771057,
|
|
"learning_rate": 0.0009794628751974724,
|
|
"loss": 4.2329,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.4911948195530285,
|
|
"grad_norm": 0.2502583684842727,
|
|
"learning_rate": 0.0009826224328593996,
|
|
"loss": 4.1544,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.4927742241175077,
|
|
"grad_norm": 0.2500902436658623,
|
|
"learning_rate": 0.000985781990521327,
|
|
"loss": 4.3171,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.4943536286819869,
|
|
"grad_norm": 0.2544078332384059,
|
|
"learning_rate": 0.0009889415481832543,
|
|
"loss": 4.1461,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.4959330332464661,
|
|
"grad_norm": 0.3040688173532611,
|
|
"learning_rate": 0.0009921011058451816,
|
|
"loss": 4.2662,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.4975124378109453,
|
|
"grad_norm": 0.35895041445570003,
|
|
"learning_rate": 0.000995260663507109,
|
|
"loss": 4.2931,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.4990918423754245,
|
|
"grad_norm": 0.32434784743319817,
|
|
"learning_rate": 0.0009984202211690363,
|
|
"loss": 4.1844,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.5006712469399036,
|
|
"grad_norm": 0.24574931413980025,
|
|
"learning_rate": 0.0010015797788309638,
|
|
"loss": 4.2135,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.5022506515043829,
|
|
"grad_norm": 0.4906220353757956,
|
|
"learning_rate": 0.001004739336492891,
|
|
"loss": 4.1729,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.503830056068862,
|
|
"grad_norm": 0.292823648070113,
|
|
"learning_rate": 0.0010078988941548185,
|
|
"loss": 4.2662,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.5054094606333412,
|
|
"grad_norm": 0.30934621640685955,
|
|
"learning_rate": 0.0010110584518167457,
|
|
"loss": 4.1053,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.5069888651978204,
|
|
"grad_norm": 0.3114064964705933,
|
|
"learning_rate": 0.001014218009478673,
|
|
"loss": 4.2144,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.5085682697622996,
|
|
"grad_norm": 0.36860508283438886,
|
|
"learning_rate": 0.0010173775671406002,
|
|
"loss": 4.1171,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.5101476743267788,
|
|
"grad_norm": 0.259770472981659,
|
|
"learning_rate": 0.0010205371248025277,
|
|
"loss": 4.1222,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.511727078891258,
|
|
"grad_norm": 0.34669740677241034,
|
|
"learning_rate": 0.001023696682464455,
|
|
"loss": 4.1779,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.5133064834557372,
|
|
"grad_norm": 0.26776977534742985,
|
|
"learning_rate": 0.0010268562401263824,
|
|
"loss": 4.1824,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.5148858880202164,
|
|
"grad_norm": 0.33482765454958535,
|
|
"learning_rate": 0.0010300157977883096,
|
|
"loss": 4.2453,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.5164652925846955,
|
|
"grad_norm": 0.41188740885185754,
|
|
"learning_rate": 0.001033175355450237,
|
|
"loss": 4.1147,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.5180446971491748,
|
|
"grad_norm": 0.2964291035835029,
|
|
"learning_rate": 0.0010363349131121643,
|
|
"loss": 4.2193,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.5196241017136539,
|
|
"grad_norm": 0.2793833405669084,
|
|
"learning_rate": 0.0010394944707740915,
|
|
"loss": 4.212,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.5212035062781332,
|
|
"grad_norm": 0.29713774307604923,
|
|
"learning_rate": 0.001042654028436019,
|
|
"loss": 4.0438,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.5227829108426123,
|
|
"grad_norm": 0.26956554558522977,
|
|
"learning_rate": 0.0010458135860979463,
|
|
"loss": 4.106,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 0.5243623154070916,
|
|
"grad_norm": 0.24092181432598472,
|
|
"learning_rate": 0.0010489731437598737,
|
|
"loss": 4.1685,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.5259417199715707,
|
|
"grad_norm": 0.26555033371413345,
|
|
"learning_rate": 0.001052132701421801,
|
|
"loss": 4.0912,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 0.5275211245360499,
|
|
"grad_norm": 0.25132501142979297,
|
|
"learning_rate": 0.0010552922590837284,
|
|
"loss": 4.0463,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"grad_norm": 0.25984688233738684,
|
|
"learning_rate": 0.0010584518167456557,
|
|
"loss": 4.0156,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.5306799336650083,
|
|
"grad_norm": 0.29518646888723116,
|
|
"learning_rate": 0.0010616113744075831,
|
|
"loss": 4.1824,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.5322593382294875,
|
|
"grad_norm": 0.28830409655868694,
|
|
"learning_rate": 0.0010647709320695102,
|
|
"loss": 4.0853,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 0.5338387427939667,
|
|
"grad_norm": 0.3629932150134809,
|
|
"learning_rate": 0.0010679304897314376,
|
|
"loss": 4.1343,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.5354181473584458,
|
|
"grad_norm": 0.3206407737910774,
|
|
"learning_rate": 0.0010710900473933649,
|
|
"loss": 4.0968,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 0.5369975519229251,
|
|
"grad_norm": 0.3976703032902267,
|
|
"learning_rate": 0.0010742496050552923,
|
|
"loss": 4.1574,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.5385769564874042,
|
|
"grad_norm": 0.3410882250123248,
|
|
"learning_rate": 0.0010774091627172196,
|
|
"loss": 3.9865,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 0.5401563610518835,
|
|
"grad_norm": 0.24914445572445618,
|
|
"learning_rate": 0.001080568720379147,
|
|
"loss": 4.1298,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.5417357656163626,
|
|
"grad_norm": 0.3586153475100128,
|
|
"learning_rate": 0.0010837282780410743,
|
|
"loss": 4.0494,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 0.5433151701808419,
|
|
"grad_norm": 0.21271739910302082,
|
|
"learning_rate": 0.0010868878357030017,
|
|
"loss": 4.0796,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.544894574745321,
|
|
"grad_norm": 0.25098095308870794,
|
|
"learning_rate": 0.0010900473933649288,
|
|
"loss": 3.9703,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.5464739793098002,
|
|
"grad_norm": 0.22375556358935217,
|
|
"learning_rate": 0.0010932069510268562,
|
|
"loss": 4.1399,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.5480533838742794,
|
|
"grad_norm": 0.24930977346518232,
|
|
"learning_rate": 0.0010963665086887835,
|
|
"loss": 4.1181,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 0.5496327884387586,
|
|
"grad_norm": 0.332755640979972,
|
|
"learning_rate": 0.001099526066350711,
|
|
"loss": 4.0582,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.5512121930032378,
|
|
"grad_norm": 0.25641546202357435,
|
|
"learning_rate": 0.0011026856240126382,
|
|
"loss": 4.0961,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 0.552791597567717,
|
|
"grad_norm": 0.20369911360555534,
|
|
"learning_rate": 0.0011058451816745656,
|
|
"loss": 4.0879,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.5543710021321961,
|
|
"grad_norm": 0.20533893742270176,
|
|
"learning_rate": 0.0011090047393364929,
|
|
"loss": 4.1274,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 0.5559504066966754,
|
|
"grad_norm": 0.25026975391684464,
|
|
"learning_rate": 0.0011121642969984203,
|
|
"loss": 4.0035,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.5575298112611545,
|
|
"grad_norm": 0.30671183509078215,
|
|
"learning_rate": 0.0011153238546603474,
|
|
"loss": 4.0045,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 0.5591092158256338,
|
|
"grad_norm": 0.39359685055416405,
|
|
"learning_rate": 0.0011184834123222748,
|
|
"loss": 4.0696,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.5606886203901129,
|
|
"grad_norm": 0.4238119417095488,
|
|
"learning_rate": 0.0011216429699842023,
|
|
"loss": 4.1661,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.5622680249545922,
|
|
"grad_norm": 0.3295988445367429,
|
|
"learning_rate": 0.0011248025276461295,
|
|
"loss": 3.9719,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.5638474295190713,
|
|
"grad_norm": 0.3060613640788937,
|
|
"learning_rate": 0.001127962085308057,
|
|
"loss": 4.1499,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 0.5654268340835505,
|
|
"grad_norm": 0.3952683637011821,
|
|
"learning_rate": 0.0011311216429699842,
|
|
"loss": 4.0395,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.5670062386480297,
|
|
"grad_norm": 0.3734181377848123,
|
|
"learning_rate": 0.0011342812006319117,
|
|
"loss": 4.0731,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 0.5685856432125089,
|
|
"grad_norm": 0.38077178575661774,
|
|
"learning_rate": 0.001137440758293839,
|
|
"loss": 4.155,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.5701650477769881,
|
|
"grad_norm": 0.2690074343662603,
|
|
"learning_rate": 0.0011406003159557664,
|
|
"loss": 4.1339,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 0.5717444523414673,
|
|
"grad_norm": 0.3244752552450406,
|
|
"learning_rate": 0.0011437598736176934,
|
|
"loss": 4.0103,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.5733238569059464,
|
|
"grad_norm": 0.3249793558401697,
|
|
"learning_rate": 0.0011469194312796209,
|
|
"loss": 4.0671,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 0.5749032614704257,
|
|
"grad_norm": 0.37198248417206886,
|
|
"learning_rate": 0.0011500789889415481,
|
|
"loss": 4.0761,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.5764826660349048,
|
|
"grad_norm": 0.23416762438568905,
|
|
"learning_rate": 0.0011532385466034756,
|
|
"loss": 3.9817,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.5780620705993841,
|
|
"grad_norm": 0.2718063712033073,
|
|
"learning_rate": 0.0011563981042654028,
|
|
"loss": 4.0381,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.5796414751638632,
|
|
"grad_norm": 0.24530966492867137,
|
|
"learning_rate": 0.0011595576619273303,
|
|
"loss": 3.9687,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 0.5812208797283425,
|
|
"grad_norm": 0.3106480588010222,
|
|
"learning_rate": 0.0011627172195892575,
|
|
"loss": 3.9962,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.5828002842928216,
|
|
"grad_norm": 0.31913403219710973,
|
|
"learning_rate": 0.001165876777251185,
|
|
"loss": 4.0452,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 0.5843796888573008,
|
|
"grad_norm": 0.3275477776859289,
|
|
"learning_rate": 0.001169036334913112,
|
|
"loss": 4.0008,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.58595909342178,
|
|
"grad_norm": 0.25119136619450627,
|
|
"learning_rate": 0.0011721958925750395,
|
|
"loss": 4.0961,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 0.5875384979862592,
|
|
"grad_norm": 0.22397156772013765,
|
|
"learning_rate": 0.0011753554502369667,
|
|
"loss": 4.0451,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.5891179025507384,
|
|
"grad_norm": 0.20479994245596356,
|
|
"learning_rate": 0.0011785150078988942,
|
|
"loss": 4.0457,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 0.5906973071152176,
|
|
"grad_norm": 0.394527139585928,
|
|
"learning_rate": 0.0011816745655608214,
|
|
"loss": 4.0876,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.5922767116796968,
|
|
"grad_norm": 0.23435702222052282,
|
|
"learning_rate": 0.001184834123222749,
|
|
"loss": 3.9747,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.5938561162441759,
|
|
"grad_norm": 0.28634780566431706,
|
|
"learning_rate": 0.0011879936808846761,
|
|
"loss": 4.0351,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.5954355208086551,
|
|
"grad_norm": 0.23259288695977196,
|
|
"learning_rate": 0.0011911532385466036,
|
|
"loss": 4.0478,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 0.5970149253731343,
|
|
"grad_norm": 0.2740802794408343,
|
|
"learning_rate": 0.0011943127962085306,
|
|
"loss": 3.965,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.5985943299376135,
|
|
"grad_norm": 0.2968405906789927,
|
|
"learning_rate": 0.001197472353870458,
|
|
"loss": 3.9978,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 0.6001737345020927,
|
|
"grad_norm": 0.3510632332441351,
|
|
"learning_rate": 0.0012006319115323856,
|
|
"loss": 3.8756,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.6017531390665719,
|
|
"grad_norm": 0.33933157997666646,
|
|
"learning_rate": 0.0012037914691943128,
|
|
"loss": 4.1102,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 0.603332543631051,
|
|
"grad_norm": 0.2662117361891833,
|
|
"learning_rate": 0.0012069510268562403,
|
|
"loss": 3.9772,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.6049119481955303,
|
|
"grad_norm": 0.19808953368357718,
|
|
"learning_rate": 0.0012101105845181675,
|
|
"loss": 3.9335,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 0.6064913527600094,
|
|
"grad_norm": 0.1787972286211566,
|
|
"learning_rate": 0.001213270142180095,
|
|
"loss": 3.9334,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.6080707573244887,
|
|
"grad_norm": 0.22756804055302363,
|
|
"learning_rate": 0.0012164296998420222,
|
|
"loss": 4.0282,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.6096501618889678,
|
|
"grad_norm": 0.20739934559933562,
|
|
"learning_rate": 0.0012195892575039495,
|
|
"loss": 4.0244,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.6112295664534471,
|
|
"grad_norm": 0.25709062488029105,
|
|
"learning_rate": 0.0012227488151658767,
|
|
"loss": 3.9838,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 0.6128089710179262,
|
|
"grad_norm": 0.2859655649994034,
|
|
"learning_rate": 0.0012259083728278042,
|
|
"loss": 4.1079,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.6143883755824054,
|
|
"grad_norm": 0.2728350138342544,
|
|
"learning_rate": 0.0012290679304897314,
|
|
"loss": 3.9119,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 0.6159677801468846,
|
|
"grad_norm": 0.2575910181962937,
|
|
"learning_rate": 0.0012322274881516589,
|
|
"loss": 3.8685,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.6175471847113638,
|
|
"grad_norm": 0.20895739009488526,
|
|
"learning_rate": 0.0012353870458135861,
|
|
"loss": 3.8788,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 0.619126589275843,
|
|
"grad_norm": 0.24459107325549784,
|
|
"learning_rate": 0.0012385466034755136,
|
|
"loss": 3.8308,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.6207059938403222,
|
|
"grad_norm": 0.2507588464193189,
|
|
"learning_rate": 0.0012417061611374408,
|
|
"loss": 3.9587,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 0.6222853984048013,
|
|
"grad_norm": 0.29513467922086983,
|
|
"learning_rate": 0.001244865718799368,
|
|
"loss": 3.9315,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.6238648029692806,
|
|
"grad_norm": 0.20497026627483445,
|
|
"learning_rate": 0.0012480252764612953,
|
|
"loss": 3.9813,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.6254442075337597,
|
|
"grad_norm": 0.2936777035834914,
|
|
"learning_rate": 0.0012511848341232228,
|
|
"loss": 3.9606,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.627023612098239,
|
|
"grad_norm": 0.3422658240468666,
|
|
"learning_rate": 0.00125434439178515,
|
|
"loss": 3.948,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 0.6286030166627181,
|
|
"grad_norm": 0.31350047253232793,
|
|
"learning_rate": 0.0012575039494470775,
|
|
"loss": 3.9702,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.6301824212271974,
|
|
"grad_norm": 0.23473831158349892,
|
|
"learning_rate": 0.0012606635071090047,
|
|
"loss": 3.9025,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 0.6317618257916765,
|
|
"grad_norm": 0.23122760516885937,
|
|
"learning_rate": 0.0012638230647709322,
|
|
"loss": 3.8854,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.6333412303561557,
|
|
"grad_norm": 0.24819213985468946,
|
|
"learning_rate": 0.0012669826224328594,
|
|
"loss": 3.8488,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"grad_norm": 0.2814044069474459,
|
|
"learning_rate": 0.0012701421800947867,
|
|
"loss": 3.8787,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.6365000394851141,
|
|
"grad_norm": 0.22554367699900693,
|
|
"learning_rate": 0.001273301737756714,
|
|
"loss": 3.839,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 0.6380794440495933,
|
|
"grad_norm": 0.24327030764405488,
|
|
"learning_rate": 0.0012764612954186414,
|
|
"loss": 3.9206,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.6396588486140725,
|
|
"grad_norm": 0.16093750563445822,
|
|
"learning_rate": 0.0012796208530805686,
|
|
"loss": 3.8874,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.6412382531785517,
|
|
"grad_norm": 0.194489762604418,
|
|
"learning_rate": 0.001282780410742496,
|
|
"loss": 3.9038,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.6428176577430309,
|
|
"grad_norm": 0.20599428439287257,
|
|
"learning_rate": 0.0012859399684044235,
|
|
"loss": 3.8576,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 0.64439706230751,
|
|
"grad_norm": 0.205011134486772,
|
|
"learning_rate": 0.0012890995260663508,
|
|
"loss": 3.9374,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.6459764668719893,
|
|
"grad_norm": 0.2377028741469135,
|
|
"learning_rate": 0.0012922590837282782,
|
|
"loss": 3.9984,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 0.6475558714364684,
|
|
"grad_norm": 0.24304609434521304,
|
|
"learning_rate": 0.0012954186413902053,
|
|
"loss": 3.9969,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.6491352760009477,
|
|
"grad_norm": 0.2623718421955022,
|
|
"learning_rate": 0.0012985781990521327,
|
|
"loss": 3.819,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 0.6507146805654268,
|
|
"grad_norm": 0.2745914540804823,
|
|
"learning_rate": 0.00130173775671406,
|
|
"loss": 3.9895,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.652294085129906,
|
|
"grad_norm": 0.22452179168678846,
|
|
"learning_rate": 0.0013048973143759874,
|
|
"loss": 3.8138,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 0.6538734896943852,
|
|
"grad_norm": 0.24802787851682156,
|
|
"learning_rate": 0.0013080568720379147,
|
|
"loss": 3.8979,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.6554528942588644,
|
|
"grad_norm": 0.2641386277150738,
|
|
"learning_rate": 0.0013112164296998421,
|
|
"loss": 3.9054,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.6570322988233436,
|
|
"grad_norm": 0.3221795475676082,
|
|
"learning_rate": 0.0013143759873617694,
|
|
"loss": 3.8572,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.6586117033878228,
|
|
"grad_norm": 0.19839742765982213,
|
|
"learning_rate": 0.0013175355450236969,
|
|
"loss": 3.8053,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 0.660191107952302,
|
|
"grad_norm": 0.26138504970498594,
|
|
"learning_rate": 0.0013206951026856239,
|
|
"loss": 3.9861,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.6617705125167812,
|
|
"grad_norm": 0.24992309356917045,
|
|
"learning_rate": 0.0013238546603475513,
|
|
"loss": 3.9771,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 0.6633499170812603,
|
|
"grad_norm": 0.30730598918197166,
|
|
"learning_rate": 0.0013270142180094786,
|
|
"loss": 3.8441,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.6649293216457396,
|
|
"grad_norm": 0.34181324929065954,
|
|
"learning_rate": 0.001330173775671406,
|
|
"loss": 3.9048,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 0.6665087262102187,
|
|
"grad_norm": 0.3703962744185399,
|
|
"learning_rate": 0.0013333333333333333,
|
|
"loss": 3.9084,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.668088130774698,
|
|
"grad_norm": 0.21963841409456603,
|
|
"learning_rate": 0.0013364928909952607,
|
|
"loss": 3.7848,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 0.6696675353391771,
|
|
"grad_norm": 0.3456682458725276,
|
|
"learning_rate": 0.001339652448657188,
|
|
"loss": 4.0472,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.6712469399036564,
|
|
"grad_norm": 0.1987812841934723,
|
|
"learning_rate": 0.0013428120063191155,
|
|
"loss": 3.96,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.6728263444681355,
|
|
"grad_norm": 0.19326508591069674,
|
|
"learning_rate": 0.0013459715639810425,
|
|
"loss": 3.9734,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.6744057490326147,
|
|
"grad_norm": 0.2520202968946282,
|
|
"learning_rate": 0.00134913112164297,
|
|
"loss": 3.8678,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 0.6759851535970939,
|
|
"grad_norm": 0.1832954776535829,
|
|
"learning_rate": 0.0013522906793048972,
|
|
"loss": 3.8302,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.6775645581615731,
|
|
"grad_norm": 0.20427163527235054,
|
|
"learning_rate": 0.0013554502369668246,
|
|
"loss": 3.7359,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 0.6791439627260523,
|
|
"grad_norm": 0.2014346803725015,
|
|
"learning_rate": 0.001358609794628752,
|
|
"loss": 3.8759,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.6807233672905315,
|
|
"grad_norm": 0.2330789181691575,
|
|
"learning_rate": 0.0013617693522906794,
|
|
"loss": 3.774,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 0.6823027718550106,
|
|
"grad_norm": 0.2053101704551176,
|
|
"learning_rate": 0.0013649289099526068,
|
|
"loss": 3.8485,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.6838821764194899,
|
|
"grad_norm": 0.2156812272229568,
|
|
"learning_rate": 0.001368088467614534,
|
|
"loss": 3.8498,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 0.685461580983969,
|
|
"grad_norm": 0.3474900523050622,
|
|
"learning_rate": 0.0013712480252764615,
|
|
"loss": 3.8156,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.6870409855484483,
|
|
"grad_norm": 0.20300679624217857,
|
|
"learning_rate": 0.0013744075829383885,
|
|
"loss": 3.8208,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 0.6886203901129274,
|
|
"grad_norm": 0.22758535553370787,
|
|
"learning_rate": 0.001377567140600316,
|
|
"loss": 3.7393,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.6901997946774067,
|
|
"grad_norm": 0.2913296112206454,
|
|
"learning_rate": 0.0013807266982622433,
|
|
"loss": 3.8639,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 0.6917791992418858,
|
|
"grad_norm": 0.22344429229234122,
|
|
"learning_rate": 0.0013838862559241707,
|
|
"loss": 3.8483,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.693358603806365,
|
|
"grad_norm": 0.24781341095554865,
|
|
"learning_rate": 0.001387045813586098,
|
|
"loss": 3.7865,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 0.6949380083708442,
|
|
"grad_norm": 0.24311562658045918,
|
|
"learning_rate": 0.0013902053712480254,
|
|
"loss": 3.8346,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6965174129353234,
|
|
"grad_norm": 0.2977627052415685,
|
|
"learning_rate": 0.0013933649289099527,
|
|
"loss": 3.8134,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 0.6980968174998026,
|
|
"grad_norm": 0.40561638489455504,
|
|
"learning_rate": 0.0013965244865718801,
|
|
"loss": 3.9838,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.6996762220642818,
|
|
"grad_norm": 0.3162312925055,
|
|
"learning_rate": 0.0013996840442338072,
|
|
"loss": 3.8218,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 0.701255626628761,
|
|
"grad_norm": 0.20741807322760966,
|
|
"learning_rate": 0.0014028436018957346,
|
|
"loss": 3.8041,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.7028350311932402,
|
|
"grad_norm": 0.20987061820283978,
|
|
"learning_rate": 0.0014060031595576619,
|
|
"loss": 3.8393,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 0.7044144357577193,
|
|
"grad_norm": 0.1911108057821915,
|
|
"learning_rate": 0.0014091627172195893,
|
|
"loss": 3.7089,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.7059938403221986,
|
|
"grad_norm": 0.1768747480315818,
|
|
"learning_rate": 0.0014123222748815166,
|
|
"loss": 3.6769,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 0.7075732448866777,
|
|
"grad_norm": 0.2004499176891643,
|
|
"learning_rate": 0.001415481832543444,
|
|
"loss": 3.7227,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.709152649451157,
|
|
"grad_norm": 0.16218360545834662,
|
|
"learning_rate": 0.0014186413902053713,
|
|
"loss": 3.7493,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 0.7107320540156361,
|
|
"grad_norm": 0.2051019817661303,
|
|
"learning_rate": 0.0014218009478672987,
|
|
"loss": 3.8015,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.7123114585801152,
|
|
"grad_norm": 0.1693775898547751,
|
|
"learning_rate": 0.0014249605055292258,
|
|
"loss": 3.7405,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 0.7138908631445945,
|
|
"grad_norm": 0.23008194178880942,
|
|
"learning_rate": 0.0014281200631911532,
|
|
"loss": 3.7961,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.7154702677090736,
|
|
"grad_norm": 0.22627752956761354,
|
|
"learning_rate": 0.0014312796208530805,
|
|
"loss": 3.8194,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 0.7170496722735529,
|
|
"grad_norm": 0.16456172442202183,
|
|
"learning_rate": 0.001434439178515008,
|
|
"loss": 3.7835,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.718629076838032,
|
|
"grad_norm": 0.25085943817371653,
|
|
"learning_rate": 0.0014375987361769352,
|
|
"loss": 3.7775,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 0.7202084814025113,
|
|
"grad_norm": 0.19390913955357955,
|
|
"learning_rate": 0.0014407582938388626,
|
|
"loss": 3.8324,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.7217878859669904,
|
|
"grad_norm": 0.21262144885010617,
|
|
"learning_rate": 0.00144391785150079,
|
|
"loss": 3.8632,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 0.7233672905314696,
|
|
"grad_norm": 0.20753404886485088,
|
|
"learning_rate": 0.0014470774091627173,
|
|
"loss": 3.7361,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.7249466950959488,
|
|
"grad_norm": 0.18330521721784593,
|
|
"learning_rate": 0.0014502369668246446,
|
|
"loss": 3.6117,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 0.726526099660428,
|
|
"grad_norm": 0.28928349085054245,
|
|
"learning_rate": 0.0014533965244865718,
|
|
"loss": 3.7541,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.7281055042249072,
|
|
"grad_norm": 0.16420443253956293,
|
|
"learning_rate": 0.0014565560821484993,
|
|
"loss": 3.7508,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 0.7296849087893864,
|
|
"grad_norm": 0.21614784039850257,
|
|
"learning_rate": 0.0014597156398104265,
|
|
"loss": 3.7169,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.7312643133538655,
|
|
"grad_norm": 0.17984849327994049,
|
|
"learning_rate": 0.001462875197472354,
|
|
"loss": 3.7626,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 0.7328437179183448,
|
|
"grad_norm": 0.29654569082302534,
|
|
"learning_rate": 0.0014660347551342812,
|
|
"loss": 3.7716,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.7344231224828239,
|
|
"grad_norm": 0.2423639271628589,
|
|
"learning_rate": 0.0014691943127962087,
|
|
"loss": 3.8041,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 0.7360025270473032,
|
|
"grad_norm": 0.19251162129527793,
|
|
"learning_rate": 0.001472353870458136,
|
|
"loss": 3.6773,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.7375819316117823,
|
|
"grad_norm": 0.17987437526330385,
|
|
"learning_rate": 0.0014755134281200632,
|
|
"loss": 3.7863,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 0.7391613361762616,
|
|
"grad_norm": 0.28475822855536115,
|
|
"learning_rate": 0.0014786729857819904,
|
|
"loss": 3.8109,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"grad_norm": 0.24249112320758767,
|
|
"learning_rate": 0.0014818325434439179,
|
|
"loss": 3.7797,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 0.74232014530522,
|
|
"grad_norm": 0.2326020797307572,
|
|
"learning_rate": 0.0014849921011058451,
|
|
"loss": 3.6954,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.7438995498696991,
|
|
"grad_norm": 0.22051402603596704,
|
|
"learning_rate": 0.0014881516587677726,
|
|
"loss": 3.8969,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 0.7454789544341783,
|
|
"grad_norm": 0.26421968183562905,
|
|
"learning_rate": 0.0014913112164296998,
|
|
"loss": 3.7278,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.7470583589986575,
|
|
"grad_norm": 0.20101343478698072,
|
|
"learning_rate": 0.0014944707740916273,
|
|
"loss": 3.6197,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 0.7486377635631367,
|
|
"grad_norm": 0.2079706232832599,
|
|
"learning_rate": 0.0014976303317535545,
|
|
"loss": 3.8934,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.7502171681276159,
|
|
"grad_norm": 0.2049265396867882,
|
|
"learning_rate": 0.0015007898894154818,
|
|
"loss": 3.6839,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 0.7517965726920951,
|
|
"grad_norm": 0.28063519759341937,
|
|
"learning_rate": 0.001503949447077409,
|
|
"loss": 3.8475,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.7533759772565742,
|
|
"grad_norm": 0.255893966061907,
|
|
"learning_rate": 0.0015071090047393365,
|
|
"loss": 3.7594,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 0.7549553818210535,
|
|
"grad_norm": 0.2258992783417018,
|
|
"learning_rate": 0.0015102685624012637,
|
|
"loss": 3.6973,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.7565347863855326,
|
|
"grad_norm": 0.28679321830822124,
|
|
"learning_rate": 0.0015134281200631912,
|
|
"loss": 3.7164,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 0.7581141909500119,
|
|
"grad_norm": 0.20356661517477284,
|
|
"learning_rate": 0.0015165876777251184,
|
|
"loss": 3.7652,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.759693595514491,
|
|
"grad_norm": 0.22262758807790112,
|
|
"learning_rate": 0.001519747235387046,
|
|
"loss": 3.6552,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 0.7612730000789703,
|
|
"grad_norm": 0.2249703886837002,
|
|
"learning_rate": 0.0015229067930489734,
|
|
"loss": 3.7424,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.7628524046434494,
|
|
"grad_norm": 0.16232152509783948,
|
|
"learning_rate": 0.0015260663507109004,
|
|
"loss": 3.6885,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 0.7644318092079286,
|
|
"grad_norm": 0.21140514606548705,
|
|
"learning_rate": 0.0015292259083728279,
|
|
"loss": 3.7257,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.7660112137724078,
|
|
"grad_norm": 0.2738777752868707,
|
|
"learning_rate": 0.001532385466034755,
|
|
"loss": 3.6921,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 0.767590618336887,
|
|
"grad_norm": 0.21449459870750393,
|
|
"learning_rate": 0.0015355450236966826,
|
|
"loss": 3.6344,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.7691700229013662,
|
|
"grad_norm": 0.23002450286814663,
|
|
"learning_rate": 0.0015387045813586098,
|
|
"loss": 3.712,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 0.7707494274658454,
|
|
"grad_norm": 0.22351746355041202,
|
|
"learning_rate": 0.0015418641390205373,
|
|
"loss": 3.7744,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.7723288320303245,
|
|
"grad_norm": 0.21500062869290557,
|
|
"learning_rate": 0.0015450236966824645,
|
|
"loss": 3.6787,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 0.7739082365948038,
|
|
"grad_norm": 0.22563787125139578,
|
|
"learning_rate": 0.001548183254344392,
|
|
"loss": 3.725,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.7754876411592829,
|
|
"grad_norm": 0.22153051124094789,
|
|
"learning_rate": 0.001551342812006319,
|
|
"loss": 3.7059,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 0.7770670457237622,
|
|
"grad_norm": 0.18970355960004148,
|
|
"learning_rate": 0.0015545023696682465,
|
|
"loss": 3.7338,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.7786464502882413,
|
|
"grad_norm": 0.1742134853025178,
|
|
"learning_rate": 0.0015576619273301737,
|
|
"loss": 3.6984,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 0.7802258548527206,
|
|
"grad_norm": 0.23660512962689312,
|
|
"learning_rate": 0.0015608214849921012,
|
|
"loss": 3.6406,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.7818052594171997,
|
|
"grad_norm": 0.3272784229892744,
|
|
"learning_rate": 0.0015639810426540284,
|
|
"loss": 3.709,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 0.7833846639816789,
|
|
"grad_norm": 0.20833361715866924,
|
|
"learning_rate": 0.0015671406003159559,
|
|
"loss": 3.6663,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.7849640685461581,
|
|
"grad_norm": 0.2748114142491958,
|
|
"learning_rate": 0.0015703001579778831,
|
|
"loss": 3.6892,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 0.7865434731106373,
|
|
"grad_norm": 0.19890328555853415,
|
|
"learning_rate": 0.0015734597156398106,
|
|
"loss": 3.7667,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.7881228776751165,
|
|
"grad_norm": 0.22412302917861454,
|
|
"learning_rate": 0.0015766192733017378,
|
|
"loss": 3.6875,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 0.7897022822395957,
|
|
"grad_norm": 0.19236289981188603,
|
|
"learning_rate": 0.001579778830963665,
|
|
"loss": 3.6889,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.7912816868040748,
|
|
"grad_norm": 0.1656764441178424,
|
|
"learning_rate": 0.0015829383886255923,
|
|
"loss": 3.7048,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 0.7928610913685541,
|
|
"grad_norm": 0.22914169581464922,
|
|
"learning_rate": 0.0015860979462875198,
|
|
"loss": 3.7057,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.7944404959330332,
|
|
"grad_norm": 0.19880632678862692,
|
|
"learning_rate": 0.001589257503949447,
|
|
"loss": 3.6536,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 0.7960199004975125,
|
|
"grad_norm": 0.20670459783742656,
|
|
"learning_rate": 0.0015924170616113745,
|
|
"loss": 3.6888,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.7975993050619916,
|
|
"grad_norm": 0.20556512892047715,
|
|
"learning_rate": 0.0015955766192733017,
|
|
"loss": 3.6812,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 0.7991787096264709,
|
|
"grad_norm": 0.2561386862908798,
|
|
"learning_rate": 0.0015987361769352292,
|
|
"loss": 3.8281,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.80075811419095,
|
|
"grad_norm": 0.21114926453470764,
|
|
"learning_rate": 0.0016018957345971566,
|
|
"loss": 3.8423,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 0.8023375187554292,
|
|
"grad_norm": 0.17098012197547188,
|
|
"learning_rate": 0.0016050552922590837,
|
|
"loss": 3.6149,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.8039169233199084,
|
|
"grad_norm": 0.19508734119650264,
|
|
"learning_rate": 0.0016082148499210111,
|
|
"loss": 3.6576,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 0.8054963278843876,
|
|
"grad_norm": 0.21765802709335372,
|
|
"learning_rate": 0.0016113744075829384,
|
|
"loss": 3.7115,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.8070757324488668,
|
|
"grad_norm": 0.24314664406509764,
|
|
"learning_rate": 0.0016145339652448658,
|
|
"loss": 3.7214,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 0.808655137013346,
|
|
"grad_norm": 0.29035553782387197,
|
|
"learning_rate": 0.001617693522906793,
|
|
"loss": 3.6752,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.8102345415778252,
|
|
"grad_norm": 0.16294563361366568,
|
|
"learning_rate": 0.0016208530805687205,
|
|
"loss": 3.562,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 0.8118139461423044,
|
|
"grad_norm": 0.17538885167621077,
|
|
"learning_rate": 0.0016240126382306478,
|
|
"loss": 3.653,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.8133933507067835,
|
|
"grad_norm": 0.246490697783557,
|
|
"learning_rate": 0.0016271721958925752,
|
|
"loss": 3.6845,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 0.8149727552712628,
|
|
"grad_norm": 0.17459066139539578,
|
|
"learning_rate": 0.0016303317535545023,
|
|
"loss": 3.7182,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.8165521598357419,
|
|
"grad_norm": 0.18265041543861538,
|
|
"learning_rate": 0.0016334913112164297,
|
|
"loss": 3.5774,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 0.8181315644002212,
|
|
"grad_norm": 0.23776280417043189,
|
|
"learning_rate": 0.001636650868878357,
|
|
"loss": 3.8624,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.8197109689647003,
|
|
"grad_norm": 0.2207643913126606,
|
|
"learning_rate": 0.0016398104265402844,
|
|
"loss": 3.7317,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 0.8212903735291796,
|
|
"grad_norm": 0.17566153909957044,
|
|
"learning_rate": 0.0016429699842022117,
|
|
"loss": 3.6867,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.8228697780936587,
|
|
"grad_norm": 0.2588250019337268,
|
|
"learning_rate": 0.0016461295418641391,
|
|
"loss": 3.6366,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 0.8244491826581379,
|
|
"grad_norm": 0.17498639531811824,
|
|
"learning_rate": 0.0016492890995260664,
|
|
"loss": 3.6697,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.8260285872226171,
|
|
"grad_norm": 0.19205844355571372,
|
|
"learning_rate": 0.0016524486571879938,
|
|
"loss": 3.6052,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 0.8276079917870963,
|
|
"grad_norm": 0.27220693405931584,
|
|
"learning_rate": 0.0016556082148499209,
|
|
"loss": 3.635,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.8291873963515755,
|
|
"grad_norm": 0.19674637897684247,
|
|
"learning_rate": 0.0016587677725118483,
|
|
"loss": 3.7254,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 0.8307668009160546,
|
|
"grad_norm": 0.2186697394730908,
|
|
"learning_rate": 0.0016619273301737756,
|
|
"loss": 3.6222,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.8323462054805338,
|
|
"grad_norm": 0.2960355405417273,
|
|
"learning_rate": 0.001665086887835703,
|
|
"loss": 3.6387,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 0.833925610045013,
|
|
"grad_norm": 0.2596808362060048,
|
|
"learning_rate": 0.0016682464454976303,
|
|
"loss": 3.5618,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.8355050146094922,
|
|
"grad_norm": 0.17733754261557175,
|
|
"learning_rate": 0.0016714060031595577,
|
|
"loss": 3.5618,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 0.8370844191739714,
|
|
"grad_norm": 0.1774345542731582,
|
|
"learning_rate": 0.001674565560821485,
|
|
"loss": 3.6559,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.8386638237384506,
|
|
"grad_norm": 0.20816053295200482,
|
|
"learning_rate": 0.0016777251184834125,
|
|
"loss": 3.6909,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 0.8402432283029297,
|
|
"grad_norm": 0.246077125438171,
|
|
"learning_rate": 0.0016808846761453397,
|
|
"loss": 3.5316,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.841822632867409,
|
|
"grad_norm": 0.2318326708694067,
|
|
"learning_rate": 0.001684044233807267,
|
|
"loss": 3.5794,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 0.8434020374318881,
|
|
"grad_norm": 0.24319493923572003,
|
|
"learning_rate": 0.0016872037914691944,
|
|
"loss": 3.645,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.8449814419963674,
|
|
"grad_norm": 0.17052776048586668,
|
|
"learning_rate": 0.0016903633491311216,
|
|
"loss": 3.5473,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 0.8465608465608465,
|
|
"grad_norm": 0.17919509242747675,
|
|
"learning_rate": 0.0016935229067930491,
|
|
"loss": 3.6732,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.8481402511253258,
|
|
"grad_norm": 0.24689978205175545,
|
|
"learning_rate": 0.0016966824644549764,
|
|
"loss": 3.6452,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 0.8497196556898049,
|
|
"grad_norm": 0.1985879167972585,
|
|
"learning_rate": 0.0016998420221169038,
|
|
"loss": 3.6603,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.8512990602542841,
|
|
"grad_norm": 0.17505379214501765,
|
|
"learning_rate": 0.001703001579778831,
|
|
"loss": 3.5913,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 0.8528784648187633,
|
|
"grad_norm": 0.17049655856229104,
|
|
"learning_rate": 0.0017061611374407583,
|
|
"loss": 3.5962,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.8544578693832425,
|
|
"grad_norm": 0.20477616815014238,
|
|
"learning_rate": 0.0017093206951026855,
|
|
"loss": 3.6277,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 0.8560372739477217,
|
|
"grad_norm": 0.18238352329571159,
|
|
"learning_rate": 0.001712480252764613,
|
|
"loss": 3.5996,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.8576166785122009,
|
|
"grad_norm": 0.1657684244805557,
|
|
"learning_rate": 0.0017156398104265403,
|
|
"loss": 3.6448,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 0.85919608307668,
|
|
"grad_norm": 0.18352367622580032,
|
|
"learning_rate": 0.0017187993680884677,
|
|
"loss": 3.6484,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.8607754876411593,
|
|
"grad_norm": 0.18498777399508617,
|
|
"learning_rate": 0.001721958925750395,
|
|
"loss": 3.6914,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 0.8623548922056384,
|
|
"grad_norm": 0.2006685637489181,
|
|
"learning_rate": 0.0017251184834123224,
|
|
"loss": 3.7236,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.8639342967701177,
|
|
"grad_norm": 0.1532842182126188,
|
|
"learning_rate": 0.0017282780410742497,
|
|
"loss": 3.6976,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 0.8655137013345968,
|
|
"grad_norm": 0.20769688280185736,
|
|
"learning_rate": 0.001731437598736177,
|
|
"loss": 3.5407,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.8670931058990761,
|
|
"grad_norm": 0.17160714406064806,
|
|
"learning_rate": 0.0017345971563981042,
|
|
"loss": 3.5158,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 0.8686725104635552,
|
|
"grad_norm": 0.13808832646048677,
|
|
"learning_rate": 0.0017377567140600316,
|
|
"loss": 3.5806,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.8702519150280345,
|
|
"grad_norm": 0.13578753863052162,
|
|
"learning_rate": 0.0017409162717219589,
|
|
"loss": 3.5781,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 0.8718313195925136,
|
|
"grad_norm": 0.15163041772953603,
|
|
"learning_rate": 0.0017440758293838863,
|
|
"loss": 3.5626,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.8734107241569928,
|
|
"grad_norm": 0.23873184566352487,
|
|
"learning_rate": 0.0017472353870458136,
|
|
"loss": 3.6502,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 0.874990128721472,
|
|
"grad_norm": 0.16021501896526982,
|
|
"learning_rate": 0.001750394944707741,
|
|
"loss": 3.5902,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.8765695332859512,
|
|
"grad_norm": 0.18029266140185582,
|
|
"learning_rate": 0.0017535545023696683,
|
|
"loss": 3.5893,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 0.8781489378504304,
|
|
"grad_norm": 0.14301734885212128,
|
|
"learning_rate": 0.0017567140600315955,
|
|
"loss": 3.6548,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.8797283424149096,
|
|
"grad_norm": 0.17731407703210675,
|
|
"learning_rate": 0.0017598736176935228,
|
|
"loss": 3.559,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 0.8813077469793887,
|
|
"grad_norm": 0.1954449258645855,
|
|
"learning_rate": 0.0017630331753554502,
|
|
"loss": 3.583,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.882887151543868,
|
|
"grad_norm": 0.18652326225135574,
|
|
"learning_rate": 0.0017661927330173777,
|
|
"loss": 3.5913,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 0.8844665561083471,
|
|
"grad_norm": 0.18624709754027133,
|
|
"learning_rate": 0.001769352290679305,
|
|
"loss": 3.5689,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.8860459606728264,
|
|
"grad_norm": 0.17838643662517628,
|
|
"learning_rate": 0.0017725118483412324,
|
|
"loss": 3.5603,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 0.8876253652373055,
|
|
"grad_norm": 0.17720919106270716,
|
|
"learning_rate": 0.0017756714060031596,
|
|
"loss": 3.5922,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.8892047698017848,
|
|
"grad_norm": 0.15319410065129782,
|
|
"learning_rate": 0.001778830963665087,
|
|
"loss": 3.5142,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 0.8907841743662639,
|
|
"grad_norm": 0.14049804512703198,
|
|
"learning_rate": 0.0017819905213270141,
|
|
"loss": 3.5348,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.8923635789307431,
|
|
"grad_norm": 0.13540223274555183,
|
|
"learning_rate": 0.0017851500789889416,
|
|
"loss": 3.6603,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 0.8939429834952223,
|
|
"grad_norm": 0.16060841249757002,
|
|
"learning_rate": 0.0017883096366508688,
|
|
"loss": 3.5376,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.8955223880597015,
|
|
"grad_norm": 0.1509093235271957,
|
|
"learning_rate": 0.0017914691943127963,
|
|
"loss": 3.5386,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 0.8971017926241807,
|
|
"grad_norm": 0.17837636380553173,
|
|
"learning_rate": 0.0017946287519747235,
|
|
"loss": 3.5374,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.8986811971886599,
|
|
"grad_norm": 0.16174607673705266,
|
|
"learning_rate": 0.001797788309636651,
|
|
"loss": 3.52,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 0.900260601753139,
|
|
"grad_norm": 0.14319096673100043,
|
|
"learning_rate": 0.0018009478672985782,
|
|
"loss": 3.5615,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.9018400063176183,
|
|
"grad_norm": 0.16168911340122266,
|
|
"learning_rate": 0.0018041074249605057,
|
|
"loss": 3.5071,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 0.9034194108820974,
|
|
"grad_norm": 0.26405265054409266,
|
|
"learning_rate": 0.001807266982622433,
|
|
"loss": 3.5178,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.9049988154465767,
|
|
"grad_norm": 0.19667628698985987,
|
|
"learning_rate": 0.0018104265402843602,
|
|
"loss": 3.586,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 0.9065782200110558,
|
|
"grad_norm": 0.18103313500074777,
|
|
"learning_rate": 0.0018135860979462874,
|
|
"loss": 3.4582,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.9081576245755351,
|
|
"grad_norm": 0.16618866533472787,
|
|
"learning_rate": 0.0018167456556082149,
|
|
"loss": 3.5747,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 0.9097370291400142,
|
|
"grad_norm": 0.1439781651238869,
|
|
"learning_rate": 0.0018199052132701421,
|
|
"loss": 3.5228,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.9113164337044934,
|
|
"grad_norm": 0.22829688021990502,
|
|
"learning_rate": 0.0018230647709320696,
|
|
"loss": 3.5557,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 0.9128958382689726,
|
|
"grad_norm": 0.20775781246057518,
|
|
"learning_rate": 0.0018262243285939968,
|
|
"loss": 3.466,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.9144752428334518,
|
|
"grad_norm": 0.18957199697528926,
|
|
"learning_rate": 0.0018293838862559243,
|
|
"loss": 3.5275,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 0.916054647397931,
|
|
"grad_norm": 0.15566648720294615,
|
|
"learning_rate": 0.0018325434439178515,
|
|
"loss": 3.5882,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.9176340519624102,
|
|
"grad_norm": 0.2422542117254597,
|
|
"learning_rate": 0.0018357030015797788,
|
|
"loss": 3.7308,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 0.9192134565268893,
|
|
"grad_norm": 0.15264299874862852,
|
|
"learning_rate": 0.001838862559241706,
|
|
"loss": 3.5069,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.9207928610913686,
|
|
"grad_norm": 0.3010264670148029,
|
|
"learning_rate": 0.0018420221169036335,
|
|
"loss": 3.5984,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 0.9223722656558477,
|
|
"grad_norm": 0.18844393665997056,
|
|
"learning_rate": 0.001845181674565561,
|
|
"loss": 3.5524,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.923951670220327,
|
|
"grad_norm": 0.21577593436034362,
|
|
"learning_rate": 0.0018483412322274882,
|
|
"loss": 3.542,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 0.9255310747848061,
|
|
"grad_norm": 0.2003079822459777,
|
|
"learning_rate": 0.0018515007898894157,
|
|
"loss": 3.5052,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.9271104793492854,
|
|
"grad_norm": 0.21144213220190206,
|
|
"learning_rate": 0.001854660347551343,
|
|
"loss": 3.4866,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 0.9286898839137645,
|
|
"grad_norm": 0.23696618462111668,
|
|
"learning_rate": 0.0018578199052132704,
|
|
"loss": 3.5692,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.9302692884782437,
|
|
"grad_norm": 0.18040894099227053,
|
|
"learning_rate": 0.0018609794628751974,
|
|
"loss": 3.556,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 0.9318486930427229,
|
|
"grad_norm": 0.18777757184774668,
|
|
"learning_rate": 0.0018641390205371249,
|
|
"loss": 3.5738,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.9334280976072021,
|
|
"grad_norm": 0.15996203979182136,
|
|
"learning_rate": 0.001867298578199052,
|
|
"loss": 3.4611,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 0.9350075021716813,
|
|
"grad_norm": 0.12437471578621428,
|
|
"learning_rate": 0.0018704581358609796,
|
|
"loss": 3.5311,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.9365869067361605,
|
|
"grad_norm": 0.20779657848924238,
|
|
"learning_rate": 0.0018736176935229068,
|
|
"loss": 3.6051,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 0.9381663113006397,
|
|
"grad_norm": 0.22622371621134083,
|
|
"learning_rate": 0.0018767772511848343,
|
|
"loss": 3.5416,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.9397457158651189,
|
|
"grad_norm": 0.15610415052535734,
|
|
"learning_rate": 0.0018799368088467615,
|
|
"loss": 3.487,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 0.941325120429598,
|
|
"grad_norm": 0.1670956525886456,
|
|
"learning_rate": 0.001883096366508689,
|
|
"loss": 3.6928,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.9429045249940773,
|
|
"grad_norm": 0.14071066099400464,
|
|
"learning_rate": 0.001886255924170616,
|
|
"loss": 3.4163,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 0.9444839295585564,
|
|
"grad_norm": 0.16270580906780718,
|
|
"learning_rate": 0.0018894154818325435,
|
|
"loss": 3.5102,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.9460633341230357,
|
|
"grad_norm": 0.16650126568655202,
|
|
"learning_rate": 0.0018925750394944707,
|
|
"loss": 3.5833,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 0.9476427386875148,
|
|
"grad_norm": 0.16986031593446013,
|
|
"learning_rate": 0.0018957345971563982,
|
|
"loss": 3.4545,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.9492221432519939,
|
|
"grad_norm": 0.19076878616947124,
|
|
"learning_rate": 0.0018988941548183254,
|
|
"loss": 3.6737,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 0.9508015478164732,
|
|
"grad_norm": 0.22207278004599146,
|
|
"learning_rate": 0.0019020537124802529,
|
|
"loss": 3.5761,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"grad_norm": 0.17734953021756708,
|
|
"learning_rate": 0.0019052132701421801,
|
|
"loss": 3.5675,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 0.9539603569454316,
|
|
"grad_norm": 0.14197629022383074,
|
|
"learning_rate": 0.0019083728278041076,
|
|
"loss": 3.5852,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.9555397615099107,
|
|
"grad_norm": 0.152062693283501,
|
|
"learning_rate": 0.0019115323854660346,
|
|
"loss": 3.5915,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 0.95711916607439,
|
|
"grad_norm": 0.12267072229882017,
|
|
"learning_rate": 0.001914691943127962,
|
|
"loss": 3.5183,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.9586985706388691,
|
|
"grad_norm": 0.20550673167545833,
|
|
"learning_rate": 0.0019178515007898893,
|
|
"loss": 3.5475,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 0.9602779752033483,
|
|
"grad_norm": 0.17184158826150192,
|
|
"learning_rate": 0.0019210110584518168,
|
|
"loss": 3.531,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.9618573797678275,
|
|
"grad_norm": 0.16610392459471085,
|
|
"learning_rate": 0.0019241706161137442,
|
|
"loss": 3.611,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 0.9634367843323067,
|
|
"grad_norm": 0.15981880011297245,
|
|
"learning_rate": 0.0019273301737756715,
|
|
"loss": 3.593,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.9650161888967859,
|
|
"grad_norm": 0.15164121985073623,
|
|
"learning_rate": 0.001930489731437599,
|
|
"loss": 3.4119,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 0.9665955934612651,
|
|
"grad_norm": 0.15778640574581523,
|
|
"learning_rate": 0.0019336492890995262,
|
|
"loss": 3.5711,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.9681749980257442,
|
|
"grad_norm": 0.147167482910992,
|
|
"learning_rate": 0.0019368088467614534,
|
|
"loss": 3.4438,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 0.9697544025902235,
|
|
"grad_norm": 0.1901184378323275,
|
|
"learning_rate": 0.0019399684044233807,
|
|
"loss": 3.4494,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.9713338071547026,
|
|
"grad_norm": 0.1569729727319038,
|
|
"learning_rate": 0.0019431279620853081,
|
|
"loss": 3.4788,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 0.9729132117191819,
|
|
"grad_norm": 0.16207010652889833,
|
|
"learning_rate": 0.0019462875197472354,
|
|
"loss": 3.5848,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.974492616283661,
|
|
"grad_norm": 0.15032096081658786,
|
|
"learning_rate": 0.0019494470774091628,
|
|
"loss": 3.5918,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 0.9760720208481403,
|
|
"grad_norm": 0.19629734856584571,
|
|
"learning_rate": 0.00195260663507109,
|
|
"loss": 3.4911,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.9776514254126194,
|
|
"grad_norm": 0.15469401910746663,
|
|
"learning_rate": 0.0019557661927330173,
|
|
"loss": 3.6936,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 0.9792308299770986,
|
|
"grad_norm": 0.13113406422755078,
|
|
"learning_rate": 0.0019589257503949448,
|
|
"loss": 3.5594,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.9808102345415778,
|
|
"grad_norm": 0.15268161125345475,
|
|
"learning_rate": 0.0019620853080568722,
|
|
"loss": 3.4451,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 0.982389639106057,
|
|
"grad_norm": 0.13078473029692392,
|
|
"learning_rate": 0.0019652448657187993,
|
|
"loss": 3.5146,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.9839690436705362,
|
|
"grad_norm": 0.1342097775561928,
|
|
"learning_rate": 0.0019684044233807267,
|
|
"loss": 3.4701,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 0.9855484482350154,
|
|
"grad_norm": 0.17754966693141605,
|
|
"learning_rate": 0.001971563981042654,
|
|
"loss": 3.533,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.9871278527994946,
|
|
"grad_norm": 0.11166476841442985,
|
|
"learning_rate": 0.0019747235387045812,
|
|
"loss": 3.42,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 0.9887072573639738,
|
|
"grad_norm": 0.19911097415765053,
|
|
"learning_rate": 0.0019778830963665087,
|
|
"loss": 3.4671,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.9902866619284529,
|
|
"grad_norm": 0.19387690893246035,
|
|
"learning_rate": 0.001981042654028436,
|
|
"loss": 3.4164,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 0.9918660664929322,
|
|
"grad_norm": 0.19911109695195278,
|
|
"learning_rate": 0.001984202211690363,
|
|
"loss": 3.4661,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.9934454710574113,
|
|
"grad_norm": 0.22858221565607198,
|
|
"learning_rate": 0.0019873617693522906,
|
|
"loss": 3.3476,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 0.9950248756218906,
|
|
"grad_norm": 0.19136657300717996,
|
|
"learning_rate": 0.001990521327014218,
|
|
"loss": 3.4011,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.9966042801863697,
|
|
"grad_norm": 0.13796662021891073,
|
|
"learning_rate": 0.0019936808846761456,
|
|
"loss": 3.6024,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 0.998183684750849,
|
|
"grad_norm": 0.1759449134781156,
|
|
"learning_rate": 0.0019968404423380726,
|
|
"loss": 3.4973,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.9997630893153281,
|
|
"grad_norm": 0.17244156032139696,
|
|
"learning_rate": 0.002,
|
|
"loss": 3.4933,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_loss": 3.453442096710205,
|
|
"eval_runtime": 118.4568,
|
|
"eval_samples_per_second": 22.363,
|
|
"eval_steps_per_second": 5.597,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 1.0012635236515834,
|
|
"grad_norm": 0.17419869669927196,
|
|
"learning_rate": 0.0019999998479531948,
|
|
"loss": 3.4613,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 1.0028429282160625,
|
|
"grad_norm": 0.1336509815659276,
|
|
"learning_rate": 0.001999999391812825,
|
|
"loss": 3.4318,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 1.0044223327805417,
|
|
"grad_norm": 0.16614666612624818,
|
|
"learning_rate": 0.001999998631579029,
|
|
"loss": 3.3616,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 1.006001737345021,
|
|
"grad_norm": 0.18577345941657952,
|
|
"learning_rate": 0.001999997567252038,
|
|
"loss": 3.387,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 1.0075811419095002,
|
|
"grad_norm": 0.14956674408580217,
|
|
"learning_rate": 0.0019999961988321764,
|
|
"loss": 3.4243,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 1.0091605464739792,
|
|
"grad_norm": 0.15122355799473372,
|
|
"learning_rate": 0.00199999452631986,
|
|
"loss": 3.3774,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 1.0107399510384585,
|
|
"grad_norm": 0.14460949065708573,
|
|
"learning_rate": 0.001999992549715597,
|
|
"loss": 3.4325,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 1.0123193556029377,
|
|
"grad_norm": 0.14847452402585345,
|
|
"learning_rate": 0.001999990269019989,
|
|
"loss": 3.4322,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 1.013898760167417,
|
|
"grad_norm": 0.13700971578846127,
|
|
"learning_rate": 0.001999987684233729,
|
|
"loss": 3.4382,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 1.015478164731896,
|
|
"grad_norm": 0.17659161113643015,
|
|
"learning_rate": 0.001999984795357604,
|
|
"loss": 3.5208,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 1.0170575692963753,
|
|
"grad_norm": 0.257228503826398,
|
|
"learning_rate": 0.0019999816023924914,
|
|
"loss": 3.483,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 1.0186369738608545,
|
|
"grad_norm": 0.1734175654718477,
|
|
"learning_rate": 0.0019999781053393626,
|
|
"loss": 3.4846,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 1.0202163784253337,
|
|
"grad_norm": 0.19417425336656402,
|
|
"learning_rate": 0.0019999743041992806,
|
|
"loss": 3.539,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 1.0217957829898128,
|
|
"grad_norm": 0.15962020613799605,
|
|
"learning_rate": 0.001999970198973402,
|
|
"loss": 3.4282,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 1.023375187554292,
|
|
"grad_norm": 0.14722064841836466,
|
|
"learning_rate": 0.001999965789662975,
|
|
"loss": 3.5164,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 1.0249545921187713,
|
|
"grad_norm": 0.16675736592182736,
|
|
"learning_rate": 0.0019999610762693404,
|
|
"loss": 3.4058,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 1.0265339966832505,
|
|
"grad_norm": 0.19849715389813696,
|
|
"learning_rate": 0.0019999560587939313,
|
|
"loss": 3.4989,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 1.0281134012477295,
|
|
"grad_norm": 0.18020905462463116,
|
|
"learning_rate": 0.001999950737238274,
|
|
"loss": 3.4392,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 1.0296928058122088,
|
|
"grad_norm": 0.12088354646913968,
|
|
"learning_rate": 0.0019999451116039855,
|
|
"loss": 3.39,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 1.031272210376688,
|
|
"grad_norm": 0.14689232257108842,
|
|
"learning_rate": 0.0019999391818927782,
|
|
"loss": 3.4552,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 1.0328516149411673,
|
|
"grad_norm": 0.2071823895016292,
|
|
"learning_rate": 0.001999932948106454,
|
|
"loss": 3.4981,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 1.0344310195056463,
|
|
"grad_norm": 0.13533866016746368,
|
|
"learning_rate": 0.0019999264102469093,
|
|
"loss": 3.3788,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 1.0360104240701256,
|
|
"grad_norm": 0.15821544540898988,
|
|
"learning_rate": 0.0019999195683161317,
|
|
"loss": 3.399,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 1.0375898286346048,
|
|
"grad_norm": 0.1299490903409468,
|
|
"learning_rate": 0.0019999124223162024,
|
|
"loss": 3.4135,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 1.039169233199084,
|
|
"grad_norm": 0.12444550525295087,
|
|
"learning_rate": 0.0019999049722492935,
|
|
"loss": 3.4348,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 1.040748637763563,
|
|
"grad_norm": 0.13764088108526956,
|
|
"learning_rate": 0.0019998972181176715,
|
|
"loss": 3.3488,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 1.0423280423280423,
|
|
"grad_norm": 0.1044556743209048,
|
|
"learning_rate": 0.001999889159923694,
|
|
"loss": 3.3857,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 1.0439074468925216,
|
|
"grad_norm": 0.11802247972167725,
|
|
"learning_rate": 0.001999880797669811,
|
|
"loss": 3.3374,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 1.0454868514570008,
|
|
"grad_norm": 0.13697976635150552,
|
|
"learning_rate": 0.0019998721313585666,
|
|
"loss": 3.4261,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 1.0470662560214798,
|
|
"grad_norm": 0.14888758048381356,
|
|
"learning_rate": 0.001999863160992595,
|
|
"loss": 3.4024,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 1.048645660585959,
|
|
"grad_norm": 0.14427856736666778,
|
|
"learning_rate": 0.0019998538865746243,
|
|
"loss": 3.4188,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 1.0502250651504383,
|
|
"grad_norm": 0.1808211556860934,
|
|
"learning_rate": 0.0019998443081074755,
|
|
"loss": 3.5465,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 1.0518044697149174,
|
|
"grad_norm": 0.13795293901986216,
|
|
"learning_rate": 0.0019998344255940602,
|
|
"loss": 3.4072,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 1.0533838742793966,
|
|
"grad_norm": 0.14265201579643738,
|
|
"learning_rate": 0.0019998242390373844,
|
|
"loss": 3.3855,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 1.0549632788438759,
|
|
"grad_norm": 0.12748239513513887,
|
|
"learning_rate": 0.0019998137484405457,
|
|
"loss": 3.3582,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 1.056542683408355,
|
|
"grad_norm": 0.16161872794834853,
|
|
"learning_rate": 0.001999802953806734,
|
|
"loss": 3.3484,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 1.0581220879728341,
|
|
"grad_norm": 0.1892452477657128,
|
|
"learning_rate": 0.001999791855139232,
|
|
"loss": 3.4889,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 1.0597014925373134,
|
|
"grad_norm": 0.17436112643159674,
|
|
"learning_rate": 0.0019997804524414147,
|
|
"loss": 3.4394,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 1.0612808971017926,
|
|
"grad_norm": 0.14612103988932162,
|
|
"learning_rate": 0.001999768745716749,
|
|
"loss": 3.4021,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 1.0628603016662719,
|
|
"grad_norm": 0.15180816088108712,
|
|
"learning_rate": 0.001999756734968796,
|
|
"loss": 3.31,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 1.064439706230751,
|
|
"grad_norm": 0.12107289850375573,
|
|
"learning_rate": 0.0019997444202012075,
|
|
"loss": 3.3731,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 1.0660191107952302,
|
|
"grad_norm": 0.11237567710696524,
|
|
"learning_rate": 0.0019997318014177284,
|
|
"loss": 3.3806,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 1.0675985153597094,
|
|
"grad_norm": 0.14292253124066845,
|
|
"learning_rate": 0.001999718878622196,
|
|
"loss": 3.3619,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 1.0691779199241886,
|
|
"grad_norm": 0.14054299799410475,
|
|
"learning_rate": 0.0019997056518185397,
|
|
"loss": 3.4553,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 1.0707573244886677,
|
|
"grad_norm": 0.13396428088016363,
|
|
"learning_rate": 0.001999692121010782,
|
|
"loss": 3.3671,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 1.072336729053147,
|
|
"grad_norm": 0.16443477530304862,
|
|
"learning_rate": 0.001999678286203038,
|
|
"loss": 3.4458,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 1.0739161336176262,
|
|
"grad_norm": 0.1550207567605643,
|
|
"learning_rate": 0.0019996641473995136,
|
|
"loss": 3.4284,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 1.0754955381821054,
|
|
"grad_norm": 0.1642689908902813,
|
|
"learning_rate": 0.0019996497046045093,
|
|
"loss": 3.4561,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 1.0770749427465844,
|
|
"grad_norm": 0.1362356053231775,
|
|
"learning_rate": 0.001999634957822417,
|
|
"loss": 3.3014,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 1.0786543473110637,
|
|
"grad_norm": 0.1331477986694049,
|
|
"learning_rate": 0.0019996199070577204,
|
|
"loss": 3.3593,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 1.080233751875543,
|
|
"grad_norm": 0.10846175011905092,
|
|
"learning_rate": 0.0019996045523149974,
|
|
"loss": 3.3759,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 1.0818131564400222,
|
|
"grad_norm": 0.15117569072212125,
|
|
"learning_rate": 0.0019995888935989163,
|
|
"loss": 3.38,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 1.0833925610045012,
|
|
"grad_norm": 0.21982954923773612,
|
|
"learning_rate": 0.0019995729309142396,
|
|
"loss": 3.459,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 1.0849719655689805,
|
|
"grad_norm": 0.1277491243359439,
|
|
"learning_rate": 0.0019995566642658203,
|
|
"loss": 3.3356,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 1.0865513701334597,
|
|
"grad_norm": 0.10597539270733282,
|
|
"learning_rate": 0.001999540093658606,
|
|
"loss": 3.3145,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 1.088130774697939,
|
|
"grad_norm": 0.14743827645774274,
|
|
"learning_rate": 0.001999523219097636,
|
|
"loss": 3.4258,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 1.089710179262418,
|
|
"grad_norm": 0.1279665344640135,
|
|
"learning_rate": 0.001999506040588041,
|
|
"loss": 3.5508,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 1.0912895838268972,
|
|
"grad_norm": 0.16499800131667175,
|
|
"learning_rate": 0.001999488558135045,
|
|
"loss": 3.4194,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 1.0928689883913765,
|
|
"grad_norm": 0.15359201841946143,
|
|
"learning_rate": 0.001999470771743964,
|
|
"loss": 3.2986,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 1.0944483929558557,
|
|
"grad_norm": 0.13265460036428217,
|
|
"learning_rate": 0.0019994526814202077,
|
|
"loss": 3.3454,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 1.0960277975203347,
|
|
"grad_norm": 0.1620045489947208,
|
|
"learning_rate": 0.0019994342871692762,
|
|
"loss": 3.3475,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 1.097607202084814,
|
|
"grad_norm": 0.20594824306793155,
|
|
"learning_rate": 0.0019994155889967637,
|
|
"loss": 3.2884,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 1.0991866066492932,
|
|
"grad_norm": 0.21057978013237635,
|
|
"learning_rate": 0.001999396586908356,
|
|
"loss": 3.4672,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 1.1007660112137725,
|
|
"grad_norm": 0.18108584882641593,
|
|
"learning_rate": 0.001999377280909832,
|
|
"loss": 3.3923,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 1.1023454157782515,
|
|
"grad_norm": 0.17909932454295607,
|
|
"learning_rate": 0.0019993576710070613,
|
|
"loss": 3.4725,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 1.1039248203427308,
|
|
"grad_norm": 0.14020488452965366,
|
|
"learning_rate": 0.0019993377572060083,
|
|
"loss": 3.4036,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 1.10550422490721,
|
|
"grad_norm": 0.13843283876669563,
|
|
"learning_rate": 0.0019993175395127284,
|
|
"loss": 3.4219,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 1.1070836294716893,
|
|
"grad_norm": 0.13070152265216842,
|
|
"learning_rate": 0.0019992970179333693,
|
|
"loss": 3.2739,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 1.1086630340361683,
|
|
"grad_norm": 0.11474143922408123,
|
|
"learning_rate": 0.001999276192474172,
|
|
"loss": 3.4454,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 1.1102424386006475,
|
|
"grad_norm": 0.12351976907394624,
|
|
"learning_rate": 0.0019992550631414687,
|
|
"loss": 3.3727,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 1.1118218431651268,
|
|
"grad_norm": 0.1236748122633322,
|
|
"learning_rate": 0.0019992336299416856,
|
|
"loss": 3.3862,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 1.113401247729606,
|
|
"grad_norm": 0.1248956099278761,
|
|
"learning_rate": 0.00199921189288134,
|
|
"loss": 3.3038,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 1.114980652294085,
|
|
"grad_norm": 0.12812904370726583,
|
|
"learning_rate": 0.0019991898519670414,
|
|
"loss": 3.4662,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 1.1165600568585643,
|
|
"grad_norm": 0.12789536088777056,
|
|
"learning_rate": 0.001999167507205493,
|
|
"loss": 3.3488,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 1.1181394614230435,
|
|
"grad_norm": 0.1279786622030309,
|
|
"learning_rate": 0.0019991448586034895,
|
|
"loss": 3.3606,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 1.1197188659875228,
|
|
"grad_norm": 0.13026742250652543,
|
|
"learning_rate": 0.001999121906167918,
|
|
"loss": 3.2312,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 1.1212982705520018,
|
|
"grad_norm": 0.11605126778184269,
|
|
"learning_rate": 0.001999098649905759,
|
|
"loss": 3.1845,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 1.122877675116481,
|
|
"grad_norm": 0.1579503573003752,
|
|
"learning_rate": 0.001999075089824084,
|
|
"loss": 3.3747,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 1.1244570796809603,
|
|
"grad_norm": 0.11793767104036577,
|
|
"learning_rate": 0.0019990512259300567,
|
|
"loss": 3.3676,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 1.1260364842454396,
|
|
"grad_norm": 0.10291434075214628,
|
|
"learning_rate": 0.0019990270582309353,
|
|
"loss": 3.3204,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 1.1276158888099186,
|
|
"grad_norm": 0.1277837851461023,
|
|
"learning_rate": 0.001999002586734068,
|
|
"loss": 3.4178,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 1.1291952933743978,
|
|
"grad_norm": 0.11724537011509982,
|
|
"learning_rate": 0.0019989778114468974,
|
|
"loss": 3.2551,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 1.130774697938877,
|
|
"grad_norm": 0.13588269554424826,
|
|
"learning_rate": 0.0019989527323769564,
|
|
"loss": 3.3492,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 1.1323541025033563,
|
|
"grad_norm": 0.14909668675411505,
|
|
"learning_rate": 0.0019989273495318724,
|
|
"loss": 3.3557,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 1.1339335070678354,
|
|
"grad_norm": 0.26584167932220104,
|
|
"learning_rate": 0.001998901662919364,
|
|
"loss": 3.381,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 1.1355129116323146,
|
|
"grad_norm": 0.2140440086183858,
|
|
"learning_rate": 0.0019988756725472416,
|
|
"loss": 3.3428,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 1.1370923161967939,
|
|
"grad_norm": 0.1938956532318173,
|
|
"learning_rate": 0.001998849378423409,
|
|
"loss": 3.2431,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 1.1386717207612729,
|
|
"grad_norm": 0.13215716093456611,
|
|
"learning_rate": 0.001998822780555863,
|
|
"loss": 3.3249,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 1.1402511253257521,
|
|
"grad_norm": 0.1332396552503134,
|
|
"learning_rate": 0.00199879587895269,
|
|
"loss": 3.2683,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 1.1418305298902314,
|
|
"grad_norm": 0.1376765381345977,
|
|
"learning_rate": 0.0019987686736220723,
|
|
"loss": 3.4012,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 1.1434099344547106,
|
|
"grad_norm": 0.10991614005842894,
|
|
"learning_rate": 0.0019987411645722825,
|
|
"loss": 3.262,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 1.1449893390191899,
|
|
"grad_norm": 0.20587342578391718,
|
|
"learning_rate": 0.0019987133518116857,
|
|
"loss": 3.3281,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 1.146568743583669,
|
|
"grad_norm": 0.16029495756721407,
|
|
"learning_rate": 0.0019986852353487392,
|
|
"loss": 3.179,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 1.1481481481481481,
|
|
"grad_norm": 0.17119484920176706,
|
|
"learning_rate": 0.0019986568151919935,
|
|
"loss": 3.3625,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 1.1497275527126274,
|
|
"grad_norm": 0.18007771763834388,
|
|
"learning_rate": 0.001998628091350091,
|
|
"loss": 3.2947,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 1.1513069572771064,
|
|
"grad_norm": 0.133112890117228,
|
|
"learning_rate": 0.001998599063831766,
|
|
"loss": 3.3428,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 1.1528863618415857,
|
|
"grad_norm": 0.11548842626474974,
|
|
"learning_rate": 0.0019985697326458463,
|
|
"loss": 3.2769,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 1.154465766406065,
|
|
"grad_norm": 0.14524782223820165,
|
|
"learning_rate": 0.0019985400978012506,
|
|
"loss": 3.3381,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 1.1560451709705442,
|
|
"grad_norm": 0.17275599053916738,
|
|
"learning_rate": 0.001998510159306991,
|
|
"loss": 3.3258,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 1.1576245755350234,
|
|
"grad_norm": 0.15188261477993478,
|
|
"learning_rate": 0.001998479917172172,
|
|
"loss": 3.2279,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 1.1592039800995024,
|
|
"grad_norm": 0.11594055773246185,
|
|
"learning_rate": 0.0019984493714059895,
|
|
"loss": 3.2907,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 1.1607833846639817,
|
|
"grad_norm": 0.16472747694603998,
|
|
"learning_rate": 0.0019984185220177325,
|
|
"loss": 3.3045,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 1.162362789228461,
|
|
"grad_norm": 0.12695428028045702,
|
|
"learning_rate": 0.001998387369016782,
|
|
"loss": 3.2843,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 1.16394219379294,
|
|
"grad_norm": 0.1383779290181551,
|
|
"learning_rate": 0.0019983559124126114,
|
|
"loss": 3.3356,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 1.1655215983574192,
|
|
"grad_norm": 0.1178710544115228,
|
|
"learning_rate": 0.0019983241522147865,
|
|
"loss": 3.1765,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 1.1671010029218984,
|
|
"grad_norm": 0.1074282742115371,
|
|
"learning_rate": 0.0019982920884329654,
|
|
"loss": 3.4055,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 1.1686804074863777,
|
|
"grad_norm": 0.1536495034869951,
|
|
"learning_rate": 0.0019982597210768983,
|
|
"loss": 3.2441,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 1.170259812050857,
|
|
"grad_norm": 0.1255113710506922,
|
|
"learning_rate": 0.0019982270501564285,
|
|
"loss": 3.2343,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 1.171839216615336,
|
|
"grad_norm": 0.14783407849501978,
|
|
"learning_rate": 0.00199819407568149,
|
|
"loss": 3.3616,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 1.1734186211798152,
|
|
"grad_norm": 0.14370786416564313,
|
|
"learning_rate": 0.0019981607976621114,
|
|
"loss": 3.1636,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 1.1749980257442945,
|
|
"grad_norm": 0.1645304933045862,
|
|
"learning_rate": 0.0019981272161084113,
|
|
"loss": 3.2503,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 1.1765774303087735,
|
|
"grad_norm": 0.11846710353311589,
|
|
"learning_rate": 0.001998093331030602,
|
|
"loss": 3.2984,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 1.1781568348732527,
|
|
"grad_norm": 0.34162094556522477,
|
|
"learning_rate": 0.0019980591424389876,
|
|
"loss": 3.3242,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 1.179736239437732,
|
|
"grad_norm": 0.18324615960578552,
|
|
"learning_rate": 0.001998024650343965,
|
|
"loss": 3.3071,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 1.1813156440022112,
|
|
"grad_norm": 0.12879840752783278,
|
|
"learning_rate": 0.001997989854756023,
|
|
"loss": 3.3383,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 1.1828950485666905,
|
|
"grad_norm": 0.17522825779942647,
|
|
"learning_rate": 0.001997954755685742,
|
|
"loss": 3.2078,
|
|
"step": 3745
|
|
},
|
|
{
|
|
"epoch": 1.1844744531311695,
|
|
"grad_norm": 0.14932656564575525,
|
|
"learning_rate": 0.0019979193531437962,
|
|
"loss": 3.2501,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 1.1860538576956487,
|
|
"grad_norm": 0.11865123535977866,
|
|
"learning_rate": 0.0019978836471409504,
|
|
"loss": 3.372,
|
|
"step": 3755
|
|
},
|
|
{
|
|
"epoch": 1.187633262260128,
|
|
"grad_norm": 0.11106273613599128,
|
|
"learning_rate": 0.001997847637688064,
|
|
"loss": 3.3333,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 1.189212666824607,
|
|
"grad_norm": 0.11783759839988478,
|
|
"learning_rate": 0.0019978113247960862,
|
|
"loss": 3.2367,
|
|
"step": 3765
|
|
},
|
|
{
|
|
"epoch": 1.1907920713890863,
|
|
"grad_norm": 0.12384146060700155,
|
|
"learning_rate": 0.0019977747084760594,
|
|
"loss": 3.1183,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 1.1923714759535655,
|
|
"grad_norm": 0.12503161294868623,
|
|
"learning_rate": 0.001997737788739119,
|
|
"loss": 3.3296,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 1.1939508805180448,
|
|
"grad_norm": 0.11667544977862357,
|
|
"learning_rate": 0.0019977005655964913,
|
|
"loss": 3.2352,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 1.1955302850825238,
|
|
"grad_norm": 0.17936254357422665,
|
|
"learning_rate": 0.0019976630390594967,
|
|
"loss": 3.236,
|
|
"step": 3785
|
|
},
|
|
{
|
|
"epoch": 1.197109689647003,
|
|
"grad_norm": 0.12000465722482047,
|
|
"learning_rate": 0.001997625209139546,
|
|
"loss": 3.1179,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 1.1986890942114823,
|
|
"grad_norm": 0.14110759430170178,
|
|
"learning_rate": 0.0019975870758481428,
|
|
"loss": 3.2982,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 1.2002684987759615,
|
|
"grad_norm": 0.11537924004492343,
|
|
"learning_rate": 0.001997548639196884,
|
|
"loss": 3.2035,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 1.2018479033404406,
|
|
"grad_norm": 0.15342351191916725,
|
|
"learning_rate": 0.0019975098991974576,
|
|
"loss": 3.2441,
|
|
"step": 3805
|
|
},
|
|
{
|
|
"epoch": 1.2034273079049198,
|
|
"grad_norm": 0.1517303082175663,
|
|
"learning_rate": 0.0019974708558616436,
|
|
"loss": 3.1546,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 1.205006712469399,
|
|
"grad_norm": 0.14179565122989465,
|
|
"learning_rate": 0.001997431509201316,
|
|
"loss": 3.3172,
|
|
"step": 3815
|
|
},
|
|
{
|
|
"epoch": 1.2065861170338783,
|
|
"grad_norm": 0.13118393817648813,
|
|
"learning_rate": 0.001997391859228439,
|
|
"loss": 3.1891,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 1.2081655215983573,
|
|
"grad_norm": 0.1341617483608506,
|
|
"learning_rate": 0.0019973519059550697,
|
|
"loss": 3.109,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 1.2097449261628366,
|
|
"grad_norm": 0.13620070052167957,
|
|
"learning_rate": 0.0019973116493933584,
|
|
"loss": 3.1534,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 1.2113243307273158,
|
|
"grad_norm": 0.12939284342206667,
|
|
"learning_rate": 0.0019972710895555467,
|
|
"loss": 3.1814,
|
|
"step": 3835
|
|
},
|
|
{
|
|
"epoch": 1.212903735291795,
|
|
"grad_norm": 0.13498974475198405,
|
|
"learning_rate": 0.0019972302264539684,
|
|
"loss": 3.1666,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 1.214483139856274,
|
|
"grad_norm": 0.13367835010491091,
|
|
"learning_rate": 0.0019971890601010495,
|
|
"loss": 3.289,
|
|
"step": 3845
|
|
},
|
|
{
|
|
"epoch": 1.2160625444207533,
|
|
"grad_norm": 0.13877130837928153,
|
|
"learning_rate": 0.0019971475905093084,
|
|
"loss": 3.2157,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 1.2176419489852326,
|
|
"grad_norm": 0.1384877772812798,
|
|
"learning_rate": 0.001997105817691357,
|
|
"loss": 3.2791,
|
|
"step": 3855
|
|
},
|
|
{
|
|
"epoch": 1.2192213535497118,
|
|
"grad_norm": 0.12456354108198894,
|
|
"learning_rate": 0.001997063741659896,
|
|
"loss": 3.2563,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 1.2208007581141909,
|
|
"grad_norm": 0.13081867644203868,
|
|
"learning_rate": 0.001997021362427722,
|
|
"loss": 3.119,
|
|
"step": 3865
|
|
},
|
|
{
|
|
"epoch": 1.2223801626786701,
|
|
"grad_norm": 0.12928400341888488,
|
|
"learning_rate": 0.0019969786800077215,
|
|
"loss": 3.2369,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 1.2239595672431494,
|
|
"grad_norm": 0.10358618908421555,
|
|
"learning_rate": 0.001996935694412875,
|
|
"loss": 3.2927,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 1.2255389718076286,
|
|
"grad_norm": 0.0966325015324056,
|
|
"learning_rate": 0.001996892405656253,
|
|
"loss": 3.2167,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 1.2271183763721076,
|
|
"grad_norm": 0.10711532367579371,
|
|
"learning_rate": 0.0019968488137510195,
|
|
"loss": 3.252,
|
|
"step": 3885
|
|
},
|
|
{
|
|
"epoch": 1.2286977809365869,
|
|
"grad_norm": 0.10983836977588884,
|
|
"learning_rate": 0.0019968049187104315,
|
|
"loss": 3.0567,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 1.2302771855010661,
|
|
"grad_norm": 0.10952845946022985,
|
|
"learning_rate": 0.0019967607205478356,
|
|
"loss": 3.0549,
|
|
"step": 3895
|
|
},
|
|
{
|
|
"epoch": 1.2318565900655454,
|
|
"grad_norm": 0.1238381423941252,
|
|
"learning_rate": 0.0019967162192766736,
|
|
"loss": 3.1814,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 1.2334359946300244,
|
|
"grad_norm": 0.13420597612755072,
|
|
"learning_rate": 0.0019966714149104777,
|
|
"loss": 3.1823,
|
|
"step": 3905
|
|
},
|
|
{
|
|
"epoch": 1.2350153991945036,
|
|
"grad_norm": 0.10329111557583345,
|
|
"learning_rate": 0.001996626307462872,
|
|
"loss": 3.187,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 1.236594803758983,
|
|
"grad_norm": 0.10633064709263741,
|
|
"learning_rate": 0.001996580896947574,
|
|
"loss": 3.0926,
|
|
"step": 3915
|
|
},
|
|
{
|
|
"epoch": 1.2381742083234621,
|
|
"grad_norm": 0.1404412639659265,
|
|
"learning_rate": 0.0019965351833783926,
|
|
"loss": 3.1486,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 1.2397536128879412,
|
|
"grad_norm": 0.08542198459750817,
|
|
"learning_rate": 0.0019964891667692292,
|
|
"loss": 3.1266,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 1.2413330174524204,
|
|
"grad_norm": 0.09777778835577029,
|
|
"learning_rate": 0.001996442847134076,
|
|
"loss": 3.1231,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 1.2429124220168997,
|
|
"grad_norm": 0.11792785325465897,
|
|
"learning_rate": 0.0019963962244870202,
|
|
"loss": 3.1933,
|
|
"step": 3935
|
|
},
|
|
{
|
|
"epoch": 1.244491826581379,
|
|
"grad_norm": 0.1097854930910212,
|
|
"learning_rate": 0.001996349298842239,
|
|
"loss": 3.1978,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 1.246071231145858,
|
|
"grad_norm": 0.13829225491817454,
|
|
"learning_rate": 0.0019963020702140014,
|
|
"loss": 3.1931,
|
|
"step": 3945
|
|
},
|
|
{
|
|
"epoch": 1.2476506357103372,
|
|
"grad_norm": 0.09611777306619898,
|
|
"learning_rate": 0.0019962545386166698,
|
|
"loss": 3.1986,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 1.2492300402748164,
|
|
"grad_norm": 0.11954156274404722,
|
|
"learning_rate": 0.0019962067040646984,
|
|
"loss": 3.1796,
|
|
"step": 3955
|
|
},
|
|
{
|
|
"epoch": 1.2508094448392955,
|
|
"grad_norm": 0.11600798040388846,
|
|
"learning_rate": 0.001996158566572633,
|
|
"loss": 3.1458,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 1.2523888494037747,
|
|
"grad_norm": 0.09983825050457865,
|
|
"learning_rate": 0.0019961101261551126,
|
|
"loss": 3.0764,
|
|
"step": 3965
|
|
},
|
|
{
|
|
"epoch": 1.253968253968254,
|
|
"grad_norm": 0.12089312291063067,
|
|
"learning_rate": 0.001996061382826867,
|
|
"loss": 3.1573,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 1.2555476585327332,
|
|
"grad_norm": 0.1365955307509055,
|
|
"learning_rate": 0.0019960123366027185,
|
|
"loss": 3.2639,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 1.2571270630972124,
|
|
"grad_norm": 0.15924451338159068,
|
|
"learning_rate": 0.0019959629874975824,
|
|
"loss": 3.2309,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 1.2587064676616915,
|
|
"grad_norm": 0.1485695640107557,
|
|
"learning_rate": 0.0019959133355264653,
|
|
"loss": 3.0889,
|
|
"step": 3985
|
|
},
|
|
{
|
|
"epoch": 1.2602858722261707,
|
|
"grad_norm": 0.12865335238484993,
|
|
"learning_rate": 0.0019958633807044654,
|
|
"loss": 3.1314,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 1.26186527679065,
|
|
"grad_norm": 0.12968582736289083,
|
|
"learning_rate": 0.0019958131230467745,
|
|
"loss": 3.0844,
|
|
"step": 3995
|
|
},
|
|
{
|
|
"epoch": 1.263444681355129,
|
|
"grad_norm": 0.10831091445134017,
|
|
"learning_rate": 0.0019957625625686756,
|
|
"loss": 3.1432,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 1.2650240859196082,
|
|
"grad_norm": 0.10875440072652662,
|
|
"learning_rate": 0.0019957116992855434,
|
|
"loss": 3.0823,
|
|
"step": 4005
|
|
},
|
|
{
|
|
"epoch": 1.2666034904840875,
|
|
"grad_norm": 0.1391997700234881,
|
|
"learning_rate": 0.001995660533212845,
|
|
"loss": 3.1656,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 1.2681828950485667,
|
|
"grad_norm": 0.12772909277929304,
|
|
"learning_rate": 0.0019956090643661398,
|
|
"loss": 3.08,
|
|
"step": 4015
|
|
},
|
|
{
|
|
"epoch": 1.269762299613046,
|
|
"grad_norm": 0.1267454766007706,
|
|
"learning_rate": 0.0019955572927610795,
|
|
"loss": 3.1432,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 1.271341704177525,
|
|
"grad_norm": 0.11164960171247067,
|
|
"learning_rate": 0.0019955052184134074,
|
|
"loss": 3.1631,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 1.2729211087420043,
|
|
"grad_norm": 0.10158383886480729,
|
|
"learning_rate": 0.0019954528413389586,
|
|
"loss": 3.0912,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 1.2745005133064835,
|
|
"grad_norm": 0.1072604836685963,
|
|
"learning_rate": 0.001995400161553661,
|
|
"loss": 3.1724,
|
|
"step": 4035
|
|
},
|
|
{
|
|
"epoch": 1.2760799178709625,
|
|
"grad_norm": 0.10241192413829599,
|
|
"learning_rate": 0.0019953471790735344,
|
|
"loss": 3.2245,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 1.2776593224354418,
|
|
"grad_norm": 0.12048005165040691,
|
|
"learning_rate": 0.0019952938939146896,
|
|
"loss": 3.165,
|
|
"step": 4045
|
|
},
|
|
{
|
|
"epoch": 1.279238726999921,
|
|
"grad_norm": 0.13777460457101334,
|
|
"learning_rate": 0.001995240306093331,
|
|
"loss": 3.1549,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 1.2808181315644003,
|
|
"grad_norm": 0.14104721610668222,
|
|
"learning_rate": 0.001995186415625754,
|
|
"loss": 3.0806,
|
|
"step": 4055
|
|
},
|
|
{
|
|
"epoch": 1.2823975361288795,
|
|
"grad_norm": 0.15189782154441522,
|
|
"learning_rate": 0.001995132222528346,
|
|
"loss": 3.1992,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 1.2839769406933585,
|
|
"grad_norm": 0.09726047026710698,
|
|
"learning_rate": 0.0019950777268175875,
|
|
"loss": 3.0581,
|
|
"step": 4065
|
|
},
|
|
{
|
|
"epoch": 1.2855563452578378,
|
|
"grad_norm": 0.11930065052202582,
|
|
"learning_rate": 0.0019950229285100505,
|
|
"loss": 3.049,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 1.287135749822317,
|
|
"grad_norm": 0.15432859275743474,
|
|
"learning_rate": 0.0019949678276223975,
|
|
"loss": 3.1533,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 1.288715154386796,
|
|
"grad_norm": 0.13762216779820105,
|
|
"learning_rate": 0.0019949124241713857,
|
|
"loss": 3.063,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 1.2902945589512753,
|
|
"grad_norm": 0.11435516872439676,
|
|
"learning_rate": 0.0019948567181738625,
|
|
"loss": 3.1914,
|
|
"step": 4085
|
|
},
|
|
{
|
|
"epoch": 1.2918739635157546,
|
|
"grad_norm": 0.11364780853900598,
|
|
"learning_rate": 0.0019948007096467673,
|
|
"loss": 3.0757,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 1.2934533680802338,
|
|
"grad_norm": 0.13025643217942512,
|
|
"learning_rate": 0.0019947443986071327,
|
|
"loss": 3.1264,
|
|
"step": 4095
|
|
},
|
|
{
|
|
"epoch": 1.295032772644713,
|
|
"grad_norm": 0.10444186688171346,
|
|
"learning_rate": 0.0019946877850720813,
|
|
"loss": 3.0088,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 1.296612177209192,
|
|
"grad_norm": 0.10515226058000508,
|
|
"learning_rate": 0.0019946308690588304,
|
|
"loss": 3.1422,
|
|
"step": 4105
|
|
},
|
|
{
|
|
"epoch": 1.2981915817736713,
|
|
"grad_norm": 0.09955106249820891,
|
|
"learning_rate": 0.0019945736505846867,
|
|
"loss": 3.0543,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 1.2997709863381506,
|
|
"grad_norm": 0.11936337099156746,
|
|
"learning_rate": 0.0019945161296670505,
|
|
"loss": 3.1021,
|
|
"step": 4115
|
|
},
|
|
{
|
|
"epoch": 1.3013503909026296,
|
|
"grad_norm": 0.11200407436331895,
|
|
"learning_rate": 0.001994458306323413,
|
|
"loss": 3.1028,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 1.3029297954671089,
|
|
"grad_norm": 0.11340792686905848,
|
|
"learning_rate": 0.001994400180571359,
|
|
"loss": 3.0485,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 1.304509200031588,
|
|
"grad_norm": 0.10922500132746676,
|
|
"learning_rate": 0.0019943417524285628,
|
|
"loss": 3.1952,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 1.3060886045960673,
|
|
"grad_norm": 0.17111466112095622,
|
|
"learning_rate": 0.0019942830219127935,
|
|
"loss": 3.0573,
|
|
"step": 4135
|
|
},
|
|
{
|
|
"epoch": 1.3076680091605466,
|
|
"grad_norm": 0.12758696103189704,
|
|
"learning_rate": 0.0019942239890419094,
|
|
"loss": 3.0893,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 1.3092474137250256,
|
|
"grad_norm": 0.1225924821208543,
|
|
"learning_rate": 0.0019941646538338626,
|
|
"loss": 3.1334,
|
|
"step": 4145
|
|
},
|
|
{
|
|
"epoch": 1.3108268182895049,
|
|
"grad_norm": 0.09912067745879768,
|
|
"learning_rate": 0.0019941050163066964,
|
|
"loss": 3.1031,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 1.3124062228539841,
|
|
"grad_norm": 0.13833974443279878,
|
|
"learning_rate": 0.0019940450764785464,
|
|
"loss": 3.102,
|
|
"step": 4155
|
|
},
|
|
{
|
|
"epoch": 1.3139856274184631,
|
|
"grad_norm": 0.10815876254157507,
|
|
"learning_rate": 0.0019939848343676395,
|
|
"loss": 3.118,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 1.3155650319829424,
|
|
"grad_norm": 0.11804236405004581,
|
|
"learning_rate": 0.001993924289992295,
|
|
"loss": 3.1192,
|
|
"step": 4165
|
|
},
|
|
{
|
|
"epoch": 1.3171444365474216,
|
|
"grad_norm": 0.13162961380042495,
|
|
"learning_rate": 0.0019938634433709253,
|
|
"loss": 3.1547,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 1.3187238411119009,
|
|
"grad_norm": 0.12597257223968147,
|
|
"learning_rate": 0.0019938022945220316,
|
|
"loss": 3.1191,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 1.3203032456763801,
|
|
"grad_norm": 0.14620891462000044,
|
|
"learning_rate": 0.00199374084346421,
|
|
"loss": 3.1462,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 1.3218826502408592,
|
|
"grad_norm": 0.14662006556760473,
|
|
"learning_rate": 0.001993679090216147,
|
|
"loss": 3.0915,
|
|
"step": 4185
|
|
},
|
|
{
|
|
"epoch": 1.3234620548053384,
|
|
"grad_norm": 0.1355029139391511,
|
|
"learning_rate": 0.0019936170347966214,
|
|
"loss": 3.0743,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 1.3250414593698177,
|
|
"grad_norm": 0.09561133746027113,
|
|
"learning_rate": 0.001993554677224504,
|
|
"loss": 3.0205,
|
|
"step": 4195
|
|
},
|
|
{
|
|
"epoch": 1.3266208639342967,
|
|
"grad_norm": 0.12680139091132397,
|
|
"learning_rate": 0.001993492017518757,
|
|
"loss": 3.0777,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 1.328200268498776,
|
|
"grad_norm": 0.10167135455161361,
|
|
"learning_rate": 0.0019934290556984356,
|
|
"loss": 3.0413,
|
|
"step": 4205
|
|
},
|
|
{
|
|
"epoch": 1.3297796730632552,
|
|
"grad_norm": 0.09893384227573258,
|
|
"learning_rate": 0.001993365791782685,
|
|
"loss": 3.0541,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 1.3313590776277344,
|
|
"grad_norm": 0.12806959547374103,
|
|
"learning_rate": 0.0019933022257907444,
|
|
"loss": 3.1291,
|
|
"step": 4215
|
|
},
|
|
{
|
|
"epoch": 1.3329384821922137,
|
|
"grad_norm": 0.10631455656766883,
|
|
"learning_rate": 0.001993238357741943,
|
|
"loss": 3.066,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 1.3345178867566927,
|
|
"grad_norm": 0.09020847358227843,
|
|
"learning_rate": 0.0019931741876557034,
|
|
"loss": 2.9794,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 1.336097291321172,
|
|
"grad_norm": 0.11716990851001001,
|
|
"learning_rate": 0.0019931097155515384,
|
|
"loss": 3.087,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 1.3376766958856512,
|
|
"grad_norm": 0.1559309966590921,
|
|
"learning_rate": 0.001993044941449054,
|
|
"loss": 3.0382,
|
|
"step": 4235
|
|
},
|
|
{
|
|
"epoch": 1.3392561004501302,
|
|
"grad_norm": 0.18501164183590774,
|
|
"learning_rate": 0.001992979865367948,
|
|
"loss": 3.1347,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 1.3408355050146095,
|
|
"grad_norm": 0.13204163189060986,
|
|
"learning_rate": 0.001992914487328009,
|
|
"loss": 3.1074,
|
|
"step": 4245
|
|
},
|
|
{
|
|
"epoch": 1.3424149095790887,
|
|
"grad_norm": 0.13187411270146468,
|
|
"learning_rate": 0.0019928488073491187,
|
|
"loss": 3.0863,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 1.343994314143568,
|
|
"grad_norm": 0.14665195514889143,
|
|
"learning_rate": 0.0019927828254512493,
|
|
"loss": 3.083,
|
|
"step": 4255
|
|
},
|
|
{
|
|
"epoch": 1.345573718708047,
|
|
"grad_norm": 0.11771818576434624,
|
|
"learning_rate": 0.0019927165416544655,
|
|
"loss": 2.9603,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 1.3471531232725262,
|
|
"grad_norm": 0.0999785437643928,
|
|
"learning_rate": 0.0019926499559789245,
|
|
"loss": 3.0477,
|
|
"step": 4265
|
|
},
|
|
{
|
|
"epoch": 1.3487325278370055,
|
|
"grad_norm": 0.09901228786007872,
|
|
"learning_rate": 0.001992583068444874,
|
|
"loss": 3.0167,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 1.3503119324014845,
|
|
"grad_norm": 0.08862562160808428,
|
|
"learning_rate": 0.001992515879072654,
|
|
"loss": 3.0456,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 1.3518913369659638,
|
|
"grad_norm": 0.10845634985741569,
|
|
"learning_rate": 0.0019924483878826964,
|
|
"loss": 2.9824,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 1.353470741530443,
|
|
"grad_norm": 0.0995199133515051,
|
|
"learning_rate": 0.001992380594895525,
|
|
"loss": 3.084,
|
|
"step": 4285
|
|
},
|
|
{
|
|
"epoch": 1.3550501460949222,
|
|
"grad_norm": 0.1329601121160114,
|
|
"learning_rate": 0.001992312500131756,
|
|
"loss": 3.0172,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 1.3566295506594015,
|
|
"grad_norm": 0.1140598576669714,
|
|
"learning_rate": 0.001992244103612095,
|
|
"loss": 3.0689,
|
|
"step": 4295
|
|
},
|
|
{
|
|
"epoch": 1.3582089552238805,
|
|
"grad_norm": 0.12755509737439705,
|
|
"learning_rate": 0.0019921754053573416,
|
|
"loss": 3.0067,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 1.3597883597883598,
|
|
"grad_norm": 0.09952094527330028,
|
|
"learning_rate": 0.001992106405388387,
|
|
"loss": 3.0083,
|
|
"step": 4305
|
|
},
|
|
{
|
|
"epoch": 1.361367764352839,
|
|
"grad_norm": 0.11698793022306847,
|
|
"learning_rate": 0.001992037103726213,
|
|
"loss": 3.0387,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 1.362947168917318,
|
|
"grad_norm": 0.1363797347516853,
|
|
"learning_rate": 0.001991967500391894,
|
|
"loss": 3.0957,
|
|
"step": 4315
|
|
},
|
|
{
|
|
"epoch": 1.3645265734817973,
|
|
"grad_norm": 0.10233191377919058,
|
|
"learning_rate": 0.0019918975954065963,
|
|
"loss": 3.0116,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 1.3661059780462765,
|
|
"grad_norm": 0.1308041636276131,
|
|
"learning_rate": 0.0019918273887915773,
|
|
"loss": 3.1729,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 1.3676853826107558,
|
|
"grad_norm": 0.11065995577696967,
|
|
"learning_rate": 0.001991756880568186,
|
|
"loss": 3.118,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 1.369264787175235,
|
|
"grad_norm": 0.13714608654227864,
|
|
"learning_rate": 0.0019916860707578643,
|
|
"loss": 3.0521,
|
|
"step": 4335
|
|
},
|
|
{
|
|
"epoch": 1.370844191739714,
|
|
"grad_norm": 0.11194789482141944,
|
|
"learning_rate": 0.001991614959382144,
|
|
"loss": 2.9706,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 1.3724235963041933,
|
|
"grad_norm": 0.1637143852040583,
|
|
"learning_rate": 0.0019915435464626504,
|
|
"loss": 2.9616,
|
|
"step": 4345
|
|
},
|
|
{
|
|
"epoch": 1.3740030008686726,
|
|
"grad_norm": 0.12488036224199506,
|
|
"learning_rate": 0.0019914718320210995,
|
|
"loss": 3.0638,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 1.3755824054331516,
|
|
"grad_norm": 0.11984278163621372,
|
|
"learning_rate": 0.001991399816079299,
|
|
"loss": 3.0649,
|
|
"step": 4355
|
|
},
|
|
{
|
|
"epoch": 1.3771618099976308,
|
|
"grad_norm": 0.10466430162726809,
|
|
"learning_rate": 0.001991327498659149,
|
|
"loss": 2.9393,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 1.37874121456211,
|
|
"grad_norm": 0.1185557598443159,
|
|
"learning_rate": 0.00199125487978264,
|
|
"loss": 3.0532,
|
|
"step": 4365
|
|
},
|
|
{
|
|
"epoch": 1.3803206191265893,
|
|
"grad_norm": 0.10203033131461912,
|
|
"learning_rate": 0.0019911819594718556,
|
|
"loss": 2.9857,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 1.3819000236910686,
|
|
"grad_norm": 0.0987878808276104,
|
|
"learning_rate": 0.00199110873774897,
|
|
"loss": 3.0392,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 1.3834794282555476,
|
|
"grad_norm": 0.11449697953789201,
|
|
"learning_rate": 0.0019910352146362497,
|
|
"loss": 3.059,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 1.3850588328200268,
|
|
"grad_norm": 0.1545129904912003,
|
|
"learning_rate": 0.0019909613901560527,
|
|
"loss": 3.0935,
|
|
"step": 4385
|
|
},
|
|
{
|
|
"epoch": 1.386638237384506,
|
|
"grad_norm": 0.17034858880784884,
|
|
"learning_rate": 0.0019908872643308283,
|
|
"loss": 3.0646,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 1.3882176419489851,
|
|
"grad_norm": 0.11078244673665372,
|
|
"learning_rate": 0.0019908128371831178,
|
|
"loss": 3.1438,
|
|
"step": 4395
|
|
},
|
|
{
|
|
"epoch": 1.3897970465134644,
|
|
"grad_norm": 0.13774166764383755,
|
|
"learning_rate": 0.0019907381087355537,
|
|
"loss": 3.0062,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 1.3913764510779436,
|
|
"grad_norm": 0.10786798709984459,
|
|
"learning_rate": 0.001990663079010861,
|
|
"loss": 3.0725,
|
|
"step": 4405
|
|
},
|
|
{
|
|
"epoch": 1.3929558556424229,
|
|
"grad_norm": 0.12162781102145018,
|
|
"learning_rate": 0.0019905877480318555,
|
|
"loss": 3.0076,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 1.394535260206902,
|
|
"grad_norm": 0.10185279989897872,
|
|
"learning_rate": 0.0019905121158214447,
|
|
"loss": 3.0459,
|
|
"step": 4415
|
|
},
|
|
{
|
|
"epoch": 1.3961146647713811,
|
|
"grad_norm": 0.1437979177483079,
|
|
"learning_rate": 0.001990436182402628,
|
|
"loss": 3.0439,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 1.3976940693358604,
|
|
"grad_norm": 0.13528480927930733,
|
|
"learning_rate": 0.001990359947798497,
|
|
"loss": 2.9768,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 1.3992734739003396,
|
|
"grad_norm": 0.10152004278416521,
|
|
"learning_rate": 0.001990283412032233,
|
|
"loss": 3.0177,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 1.4008528784648187,
|
|
"grad_norm": 0.10698464459247188,
|
|
"learning_rate": 0.00199020657512711,
|
|
"loss": 2.9564,
|
|
"step": 4435
|
|
},
|
|
{
|
|
"epoch": 1.402432283029298,
|
|
"grad_norm": 0.12627145665399941,
|
|
"learning_rate": 0.0019901294371064944,
|
|
"loss": 2.9423,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 1.4040116875937771,
|
|
"grad_norm": 0.13409019326545243,
|
|
"learning_rate": 0.0019900519979938434,
|
|
"loss": 3.0,
|
|
"step": 4445
|
|
},
|
|
{
|
|
"epoch": 1.4055910921582564,
|
|
"grad_norm": 0.10552324075089604,
|
|
"learning_rate": 0.001989974257812705,
|
|
"loss": 2.9409,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 1.4071704967227356,
|
|
"grad_norm": 0.122826319671673,
|
|
"learning_rate": 0.0019898962165867205,
|
|
"loss": 3.0135,
|
|
"step": 4455
|
|
},
|
|
{
|
|
"epoch": 1.4087499012872147,
|
|
"grad_norm": 0.11354791157697926,
|
|
"learning_rate": 0.0019898178743396207,
|
|
"loss": 3.0774,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 1.410329305851694,
|
|
"grad_norm": 0.15224792619213673,
|
|
"learning_rate": 0.0019897392310952292,
|
|
"loss": 3.0452,
|
|
"step": 4465
|
|
},
|
|
{
|
|
"epoch": 1.4119087104161732,
|
|
"grad_norm": 0.10087355471090065,
|
|
"learning_rate": 0.0019896602868774618,
|
|
"loss": 2.9939,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 1.4134881149806522,
|
|
"grad_norm": 0.08272242215342274,
|
|
"learning_rate": 0.001989581041710324,
|
|
"loss": 3.0465,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 1.4150675195451314,
|
|
"grad_norm": 0.11210873003897295,
|
|
"learning_rate": 0.001989501495617914,
|
|
"loss": 3.1138,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 1.4166469241096107,
|
|
"grad_norm": 0.09534871752668661,
|
|
"learning_rate": 0.001989421648624421,
|
|
"loss": 2.9726,
|
|
"step": 4485
|
|
},
|
|
{
|
|
"epoch": 1.41822632867409,
|
|
"grad_norm": 0.10770938407123477,
|
|
"learning_rate": 0.0019893415007541265,
|
|
"loss": 3.1972,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 1.4198057332385692,
|
|
"grad_norm": 0.09610267990196253,
|
|
"learning_rate": 0.001989261052031403,
|
|
"loss": 3.13,
|
|
"step": 4495
|
|
},
|
|
{
|
|
"epoch": 1.4213851378030482,
|
|
"grad_norm": 0.13605706757511912,
|
|
"learning_rate": 0.0019891803024807138,
|
|
"loss": 3.068,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 1.4229645423675275,
|
|
"grad_norm": 0.12325106578155333,
|
|
"learning_rate": 0.0019890992521266145,
|
|
"loss": 2.9774,
|
|
"step": 4505
|
|
},
|
|
{
|
|
"epoch": 1.4245439469320067,
|
|
"grad_norm": 0.14455830288755464,
|
|
"learning_rate": 0.0019890179009937527,
|
|
"loss": 3.0241,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 1.4261233514964857,
|
|
"grad_norm": 0.1052669832362689,
|
|
"learning_rate": 0.0019889362491068655,
|
|
"loss": 2.9926,
|
|
"step": 4515
|
|
},
|
|
{
|
|
"epoch": 1.427702756060965,
|
|
"grad_norm": 0.1642757068083475,
|
|
"learning_rate": 0.001988854296490784,
|
|
"loss": 3.0745,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 1.4292821606254442,
|
|
"grad_norm": 0.20946325027308604,
|
|
"learning_rate": 0.001988772043170429,
|
|
"loss": 3.0698,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 1.4308615651899235,
|
|
"grad_norm": 0.13936163585211253,
|
|
"learning_rate": 0.001988689489170813,
|
|
"loss": 2.999,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 1.4324409697544027,
|
|
"grad_norm": 0.13050964476816038,
|
|
"learning_rate": 0.0019886066345170396,
|
|
"loss": 3.004,
|
|
"step": 4535
|
|
},
|
|
{
|
|
"epoch": 1.4340203743188817,
|
|
"grad_norm": 0.125075763003334,
|
|
"learning_rate": 0.0019885234792343057,
|
|
"loss": 3.0632,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 1.435599778883361,
|
|
"grad_norm": 0.08896244058788337,
|
|
"learning_rate": 0.0019884400233478976,
|
|
"loss": 2.9419,
|
|
"step": 4545
|
|
},
|
|
{
|
|
"epoch": 1.4371791834478402,
|
|
"grad_norm": 0.11862506407549697,
|
|
"learning_rate": 0.001988356266883193,
|
|
"loss": 2.9654,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 1.4387585880123193,
|
|
"grad_norm": 0.12431900571134885,
|
|
"learning_rate": 0.001988272209865663,
|
|
"loss": 2.8973,
|
|
"step": 4555
|
|
},
|
|
{
|
|
"epoch": 1.4403379925767985,
|
|
"grad_norm": 0.1088210197989654,
|
|
"learning_rate": 0.0019881878523208686,
|
|
"loss": 3.0581,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 1.4419173971412778,
|
|
"grad_norm": 0.10081325955650519,
|
|
"learning_rate": 0.0019881031942744617,
|
|
"loss": 2.9722,
|
|
"step": 4565
|
|
},
|
|
{
|
|
"epoch": 1.443496801705757,
|
|
"grad_norm": 0.09884952322328694,
|
|
"learning_rate": 0.0019880182357521867,
|
|
"loss": 3.1218,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 1.4450762062702363,
|
|
"grad_norm": 0.08478558540659335,
|
|
"learning_rate": 0.0019879329767798787,
|
|
"loss": 2.9096,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 1.4466556108347153,
|
|
"grad_norm": 0.10555755370610553,
|
|
"learning_rate": 0.001987847417383464,
|
|
"loss": 2.9623,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 1.4482350153991945,
|
|
"grad_norm": 0.08622649112731266,
|
|
"learning_rate": 0.001987761557588962,
|
|
"loss": 3.0071,
|
|
"step": 4585
|
|
},
|
|
{
|
|
"epoch": 1.4498144199636738,
|
|
"grad_norm": 0.09445114381187443,
|
|
"learning_rate": 0.001987675397422481,
|
|
"loss": 2.9472,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 1.4513938245281528,
|
|
"grad_norm": 0.08645520160432053,
|
|
"learning_rate": 0.001987588936910222,
|
|
"loss": 3.0327,
|
|
"step": 4595
|
|
},
|
|
{
|
|
"epoch": 1.452973229092632,
|
|
"grad_norm": 0.09266557898694067,
|
|
"learning_rate": 0.0019875021760784773,
|
|
"loss": 2.9679,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 1.4545526336571113,
|
|
"grad_norm": 0.09510456931243706,
|
|
"learning_rate": 0.00198741511495363,
|
|
"loss": 3.0081,
|
|
"step": 4605
|
|
},
|
|
{
|
|
"epoch": 1.4561320382215905,
|
|
"grad_norm": 0.10882985029674294,
|
|
"learning_rate": 0.0019873277535621555,
|
|
"loss": 2.9785,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 1.4577114427860698,
|
|
"grad_norm": 0.09723739191160814,
|
|
"learning_rate": 0.001987240091930619,
|
|
"loss": 2.858,
|
|
"step": 4615
|
|
},
|
|
{
|
|
"epoch": 1.4592908473505488,
|
|
"grad_norm": 0.08482317791765791,
|
|
"learning_rate": 0.001987152130085678,
|
|
"loss": 2.9225,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 1.460870251915028,
|
|
"grad_norm": 0.09570859232600319,
|
|
"learning_rate": 0.0019870638680540816,
|
|
"loss": 3.0148,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 1.462449656479507,
|
|
"grad_norm": 0.09277192471107024,
|
|
"learning_rate": 0.0019869753058626696,
|
|
"loss": 3.0133,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 1.4640290610439863,
|
|
"grad_norm": 0.11133410369142042,
|
|
"learning_rate": 0.0019868864435383725,
|
|
"loss": 2.8989,
|
|
"step": 4635
|
|
},
|
|
{
|
|
"epoch": 1.4656084656084656,
|
|
"grad_norm": 0.07727152770646367,
|
|
"learning_rate": 0.0019867972811082137,
|
|
"loss": 2.9846,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 1.4671878701729448,
|
|
"grad_norm": 0.097514775027411,
|
|
"learning_rate": 0.0019867078185993067,
|
|
"loss": 2.9711,
|
|
"step": 4645
|
|
},
|
|
{
|
|
"epoch": 1.468767274737424,
|
|
"grad_norm": 0.09621032301747534,
|
|
"learning_rate": 0.0019866180560388557,
|
|
"loss": 3.0579,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 1.470346679301903,
|
|
"grad_norm": 0.09648128346327675,
|
|
"learning_rate": 0.0019865279934541584,
|
|
"loss": 3.0346,
|
|
"step": 4655
|
|
},
|
|
{
|
|
"epoch": 1.4719260838663824,
|
|
"grad_norm": 0.11365142360663955,
|
|
"learning_rate": 0.0019864376308726004,
|
|
"loss": 2.9421,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 1.4735054884308616,
|
|
"grad_norm": 0.091741612081307,
|
|
"learning_rate": 0.0019863469683216624,
|
|
"loss": 2.9495,
|
|
"step": 4665
|
|
},
|
|
{
|
|
"epoch": 1.4750848929953406,
|
|
"grad_norm": 0.10035727620230114,
|
|
"learning_rate": 0.0019862560058289125,
|
|
"loss": 2.9744,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 1.4766642975598199,
|
|
"grad_norm": 0.09735171737874418,
|
|
"learning_rate": 0.001986164743422013,
|
|
"loss": 2.967,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 1.4782437021242991,
|
|
"grad_norm": 0.11441091058772467,
|
|
"learning_rate": 0.0019860731811287154,
|
|
"loss": 3.024,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 1.4798231066887784,
|
|
"grad_norm": 0.09619007273414427,
|
|
"learning_rate": 0.0019859813189768644,
|
|
"loss": 2.9589,
|
|
"step": 4685
|
|
},
|
|
{
|
|
"epoch": 1.4814025112532576,
|
|
"grad_norm": 0.1296771374184954,
|
|
"learning_rate": 0.0019858891569943934,
|
|
"loss": 2.9977,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 1.4829819158177366,
|
|
"grad_norm": 0.11256784358801805,
|
|
"learning_rate": 0.0019857966952093286,
|
|
"loss": 2.9332,
|
|
"step": 4695
|
|
},
|
|
{
|
|
"epoch": 1.4845613203822159,
|
|
"grad_norm": 0.11178265985854806,
|
|
"learning_rate": 0.0019857039336497874,
|
|
"loss": 2.9517,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 1.4861407249466951,
|
|
"grad_norm": 0.11370303447286977,
|
|
"learning_rate": 0.001985610872343978,
|
|
"loss": 2.9207,
|
|
"step": 4705
|
|
},
|
|
{
|
|
"epoch": 1.4877201295111742,
|
|
"grad_norm": 0.10863653182760975,
|
|
"learning_rate": 0.0019855175113201993,
|
|
"loss": 2.8895,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 1.4892995340756534,
|
|
"grad_norm": 0.09663215015277192,
|
|
"learning_rate": 0.001985423850606842,
|
|
"loss": 2.9217,
|
|
"step": 4715
|
|
},
|
|
{
|
|
"epoch": 1.4908789386401327,
|
|
"grad_norm": 0.13809403189713054,
|
|
"learning_rate": 0.001985329890232388,
|
|
"loss": 3.0305,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 1.492458343204612,
|
|
"grad_norm": 0.11821789782684203,
|
|
"learning_rate": 0.0019852356302254097,
|
|
"loss": 2.9642,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 1.4940377477690912,
|
|
"grad_norm": 0.11090497998614943,
|
|
"learning_rate": 0.001985141070614571,
|
|
"loss": 2.9144,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 1.4956171523335702,
|
|
"grad_norm": 0.08742000966752303,
|
|
"learning_rate": 0.001985046211428627,
|
|
"loss": 2.9174,
|
|
"step": 4735
|
|
},
|
|
{
|
|
"epoch": 1.4971965568980494,
|
|
"grad_norm": 0.09589758068562293,
|
|
"learning_rate": 0.001984951052696424,
|
|
"loss": 3.0086,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 1.4987759614625287,
|
|
"grad_norm": 0.09592446840177818,
|
|
"learning_rate": 0.001984855594446899,
|
|
"loss": 3.017,
|
|
"step": 4745
|
|
},
|
|
{
|
|
"epoch": 1.5003553660270077,
|
|
"grad_norm": 0.10689358313731816,
|
|
"learning_rate": 0.0019847598367090796,
|
|
"loss": 2.9822,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 1.501934770591487,
|
|
"grad_norm": 0.09503783883425908,
|
|
"learning_rate": 0.0019846637795120857,
|
|
"loss": 2.9189,
|
|
"step": 4755
|
|
},
|
|
{
|
|
"epoch": 1.5035141751559662,
|
|
"grad_norm": 0.09938516992928695,
|
|
"learning_rate": 0.001984567422885128,
|
|
"loss": 3.0234,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 1.5050935797204454,
|
|
"grad_norm": 0.0883607481998521,
|
|
"learning_rate": 0.0019844707668575075,
|
|
"loss": 2.9138,
|
|
"step": 4765
|
|
},
|
|
{
|
|
"epoch": 1.5066729842849247,
|
|
"grad_norm": 0.1508418282651358,
|
|
"learning_rate": 0.001984373811458617,
|
|
"loss": 2.9186,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 1.508252388849404,
|
|
"grad_norm": 0.09830299547741146,
|
|
"learning_rate": 0.001984276556717939,
|
|
"loss": 2.9431,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 1.509831793413883,
|
|
"grad_norm": 0.10920442240888395,
|
|
"learning_rate": 0.0019841790026650496,
|
|
"loss": 3.0,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 1.5114111979783622,
|
|
"grad_norm": 0.12541008593529593,
|
|
"learning_rate": 0.0019840811493296133,
|
|
"loss": 2.9093,
|
|
"step": 4785
|
|
},
|
|
{
|
|
"epoch": 1.5129906025428412,
|
|
"grad_norm": 0.12553700549411517,
|
|
"learning_rate": 0.001983982996741387,
|
|
"loss": 2.94,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 1.5145700071073205,
|
|
"grad_norm": 0.10866989638304686,
|
|
"learning_rate": 0.001983884544930218,
|
|
"loss": 2.8466,
|
|
"step": 4795
|
|
},
|
|
{
|
|
"epoch": 1.5161494116717997,
|
|
"grad_norm": 0.09874987150780433,
|
|
"learning_rate": 0.0019837857939260456,
|
|
"loss": 2.8555,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 1.517728816236279,
|
|
"grad_norm": 0.09699900376639088,
|
|
"learning_rate": 0.0019836867437588988,
|
|
"loss": 2.9514,
|
|
"step": 4805
|
|
},
|
|
{
|
|
"epoch": 1.5193082208007582,
|
|
"grad_norm": 0.09793401970835895,
|
|
"learning_rate": 0.0019835873944588976,
|
|
"loss": 2.9401,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 1.5208876253652373,
|
|
"grad_norm": 0.08558765289450597,
|
|
"learning_rate": 0.0019834877460562545,
|
|
"loss": 2.9612,
|
|
"step": 4815
|
|
},
|
|
{
|
|
"epoch": 1.5224670299297165,
|
|
"grad_norm": 0.11862913842448683,
|
|
"learning_rate": 0.0019833877985812715,
|
|
"loss": 2.9126,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 1.5240464344941955,
|
|
"grad_norm": 0.0985856306095801,
|
|
"learning_rate": 0.0019832875520643415,
|
|
"loss": 2.966,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 1.5256258390586748,
|
|
"grad_norm": 0.08714163509947663,
|
|
"learning_rate": 0.0019831870065359497,
|
|
"loss": 3.0231,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 1.527205243623154,
|
|
"grad_norm": 0.08236307357637031,
|
|
"learning_rate": 0.001983086162026671,
|
|
"loss": 2.8827,
|
|
"step": 4835
|
|
},
|
|
{
|
|
"epoch": 1.5287846481876333,
|
|
"grad_norm": 0.07833304673352745,
|
|
"learning_rate": 0.0019829850185671717,
|
|
"loss": 2.9736,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 1.5303640527521125,
|
|
"grad_norm": 0.0953787909476679,
|
|
"learning_rate": 0.0019828835761882086,
|
|
"loss": 2.9241,
|
|
"step": 4845
|
|
},
|
|
{
|
|
"epoch": 1.5319434573165918,
|
|
"grad_norm": 0.08773467195247037,
|
|
"learning_rate": 0.0019827818349206295,
|
|
"loss": 2.996,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 1.5335228618810708,
|
|
"grad_norm": 0.10556485908717649,
|
|
"learning_rate": 0.001982679794795374,
|
|
"loss": 2.9428,
|
|
"step": 4855
|
|
},
|
|
{
|
|
"epoch": 1.53510226644555,
|
|
"grad_norm": 0.11706965366371103,
|
|
"learning_rate": 0.001982577455843471,
|
|
"loss": 3.0093,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 1.536681671010029,
|
|
"grad_norm": 0.11575180262595218,
|
|
"learning_rate": 0.0019824748180960416,
|
|
"loss": 2.9668,
|
|
"step": 4865
|
|
},
|
|
{
|
|
"epoch": 1.5382610755745083,
|
|
"grad_norm": 0.09629159206346939,
|
|
"learning_rate": 0.0019823718815842974,
|
|
"loss": 2.9568,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 1.5398404801389876,
|
|
"grad_norm": 0.13118955864125037,
|
|
"learning_rate": 0.0019822686463395406,
|
|
"loss": 2.9814,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 1.5414198847034668,
|
|
"grad_norm": 0.11129128119414668,
|
|
"learning_rate": 0.001982165112393164,
|
|
"loss": 2.8452,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 1.542999289267946,
|
|
"grad_norm": 0.11644279357587702,
|
|
"learning_rate": 0.0019820612797766526,
|
|
"loss": 2.9676,
|
|
"step": 4885
|
|
},
|
|
{
|
|
"epoch": 1.5445786938324253,
|
|
"grad_norm": 0.09994823334524525,
|
|
"learning_rate": 0.00198195714852158,
|
|
"loss": 2.8952,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 1.5461580983969043,
|
|
"grad_norm": 0.10139353655854862,
|
|
"learning_rate": 0.0019818527186596124,
|
|
"loss": 2.9744,
|
|
"step": 4895
|
|
},
|
|
{
|
|
"epoch": 1.5477375029613836,
|
|
"grad_norm": 0.1039446792162674,
|
|
"learning_rate": 0.0019817479902225067,
|
|
"loss": 2.9435,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 1.5493169075258626,
|
|
"grad_norm": 0.0796025536388445,
|
|
"learning_rate": 0.0019816429632421094,
|
|
"loss": 2.8935,
|
|
"step": 4905
|
|
},
|
|
{
|
|
"epoch": 1.5508963120903418,
|
|
"grad_norm": 0.10209172127058755,
|
|
"learning_rate": 0.0019815376377503593,
|
|
"loss": 2.9229,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 1.552475716654821,
|
|
"grad_norm": 0.10910718583932719,
|
|
"learning_rate": 0.001981432013779284,
|
|
"loss": 3.0488,
|
|
"step": 4915
|
|
},
|
|
{
|
|
"epoch": 1.5540551212193003,
|
|
"grad_norm": 0.1521828551806453,
|
|
"learning_rate": 0.0019813260913610045,
|
|
"loss": 2.9689,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 1.5556345257837796,
|
|
"grad_norm": 0.10994423634599962,
|
|
"learning_rate": 0.0019812198705277304,
|
|
"loss": 2.9151,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 1.5572139303482588,
|
|
"grad_norm": 0.09818489043815783,
|
|
"learning_rate": 0.0019811133513117627,
|
|
"loss": 2.8832,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 1.5587933349127379,
|
|
"grad_norm": 0.11140774818652234,
|
|
"learning_rate": 0.0019810065337454935,
|
|
"loss": 3.035,
|
|
"step": 4935
|
|
},
|
|
{
|
|
"epoch": 1.560372739477217,
|
|
"grad_norm": 0.1280699733503442,
|
|
"learning_rate": 0.001980899417861405,
|
|
"loss": 2.9458,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 1.5619521440416961,
|
|
"grad_norm": 0.1331934725393578,
|
|
"learning_rate": 0.001980792003692071,
|
|
"loss": 3.0324,
|
|
"step": 4945
|
|
},
|
|
{
|
|
"epoch": 1.5635315486061754,
|
|
"grad_norm": 0.11489890170242362,
|
|
"learning_rate": 0.001980684291270155,
|
|
"loss": 2.8935,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 1.5651109531706546,
|
|
"grad_norm": 0.09146807750801558,
|
|
"learning_rate": 0.001980576280628412,
|
|
"loss": 2.9042,
|
|
"step": 4955
|
|
},
|
|
{
|
|
"epoch": 1.5666903577351339,
|
|
"grad_norm": 0.10620129749069569,
|
|
"learning_rate": 0.0019804679717996864,
|
|
"loss": 3.0244,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 1.5682697622996131,
|
|
"grad_norm": 0.11571456540243834,
|
|
"learning_rate": 0.001980359364816916,
|
|
"loss": 2.8789,
|
|
"step": 4965
|
|
},
|
|
{
|
|
"epoch": 1.5698491668640924,
|
|
"grad_norm": 0.10809737964437593,
|
|
"learning_rate": 0.001980250459713126,
|
|
"loss": 2.9525,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 0.09186481444865727,
|
|
"learning_rate": 0.001980141256521434,
|
|
"loss": 2.9055,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 1.5730079759930506,
|
|
"grad_norm": 0.10354895162682684,
|
|
"learning_rate": 0.001980031755275048,
|
|
"loss": 2.9214,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 1.5745873805575297,
|
|
"grad_norm": 0.10228779853102214,
|
|
"learning_rate": 0.001979921956007267,
|
|
"loss": 2.9981,
|
|
"step": 4985
|
|
},
|
|
{
|
|
"epoch": 1.576166785122009,
|
|
"grad_norm": 0.11682083874165497,
|
|
"learning_rate": 0.0019798118587514802,
|
|
"loss": 3.0015,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 1.5777461896864882,
|
|
"grad_norm": 0.12403310979186538,
|
|
"learning_rate": 0.0019797014635411676,
|
|
"loss": 2.9444,
|
|
"step": 4995
|
|
},
|
|
{
|
|
"epoch": 1.5793255942509674,
|
|
"grad_norm": 0.11390491651356927,
|
|
"learning_rate": 0.001979590770409899,
|
|
"loss": 2.9445,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 1.5809049988154467,
|
|
"grad_norm": 0.10930893827752045,
|
|
"learning_rate": 0.001979479779391336,
|
|
"loss": 2.8585,
|
|
"step": 5005
|
|
},
|
|
{
|
|
"epoch": 1.582484403379926,
|
|
"grad_norm": 0.11160804717514534,
|
|
"learning_rate": 0.0019793684905192303,
|
|
"loss": 2.9065,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 1.584063807944405,
|
|
"grad_norm": 0.10915732890128317,
|
|
"learning_rate": 0.001979256903827424,
|
|
"loss": 2.993,
|
|
"step": 5015
|
|
},
|
|
{
|
|
"epoch": 1.5856432125088842,
|
|
"grad_norm": 0.07314647716296047,
|
|
"learning_rate": 0.0019791450193498497,
|
|
"loss": 2.849,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 1.5872226170733632,
|
|
"grad_norm": 0.07912808490967632,
|
|
"learning_rate": 0.001979032837120531,
|
|
"loss": 2.9493,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 1.5888020216378425,
|
|
"grad_norm": 0.14762568424607497,
|
|
"learning_rate": 0.001978920357173582,
|
|
"loss": 2.9197,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 1.5903814262023217,
|
|
"grad_norm": 0.15722179817288173,
|
|
"learning_rate": 0.0019788075795432064,
|
|
"loss": 2.9489,
|
|
"step": 5035
|
|
},
|
|
{
|
|
"epoch": 1.591960830766801,
|
|
"grad_norm": 0.1389110492503963,
|
|
"learning_rate": 0.0019786945042637,
|
|
"loss": 2.9015,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 1.5935402353312802,
|
|
"grad_norm": 0.11722287293596434,
|
|
"learning_rate": 0.0019785811313694475,
|
|
"loss": 2.9693,
|
|
"step": 5045
|
|
},
|
|
{
|
|
"epoch": 1.5951196398957594,
|
|
"grad_norm": 0.1176910685832534,
|
|
"learning_rate": 0.0019784674608949258,
|
|
"loss": 2.9582,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 1.5966990444602385,
|
|
"grad_norm": 0.1076042621246041,
|
|
"learning_rate": 0.0019783534928747007,
|
|
"loss": 3.05,
|
|
"step": 5055
|
|
},
|
|
{
|
|
"epoch": 1.5982784490247177,
|
|
"grad_norm": 0.08073816464558037,
|
|
"learning_rate": 0.001978239227343429,
|
|
"loss": 2.8257,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 1.5998578535891967,
|
|
"grad_norm": 0.09398261646040879,
|
|
"learning_rate": 0.0019781246643358584,
|
|
"loss": 2.9379,
|
|
"step": 5065
|
|
},
|
|
{
|
|
"epoch": 1.601437258153676,
|
|
"grad_norm": 0.15458833787235945,
|
|
"learning_rate": 0.001978009803886827,
|
|
"loss": 2.8705,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 1.6030166627181552,
|
|
"grad_norm": 0.09455723991841981,
|
|
"learning_rate": 0.001977894646031263,
|
|
"loss": 2.9589,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 1.6045960672826345,
|
|
"grad_norm": 0.12283382903989729,
|
|
"learning_rate": 0.001977779190804185,
|
|
"loss": 2.9334,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 1.6061754718471137,
|
|
"grad_norm": 0.09996430272852855,
|
|
"learning_rate": 0.0019776634382407026,
|
|
"loss": 2.9807,
|
|
"step": 5085
|
|
},
|
|
{
|
|
"epoch": 1.607754876411593,
|
|
"grad_norm": 0.1110137840520642,
|
|
"learning_rate": 0.0019775473883760146,
|
|
"loss": 2.8746,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 1.609334280976072,
|
|
"grad_norm": 0.09264037749590708,
|
|
"learning_rate": 0.0019774310412454116,
|
|
"loss": 2.9106,
|
|
"step": 5095
|
|
},
|
|
{
|
|
"epoch": 1.6109136855405513,
|
|
"grad_norm": 0.09037511086608532,
|
|
"learning_rate": 0.001977314396884274,
|
|
"loss": 2.9531,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 1.6124930901050303,
|
|
"grad_norm": 0.0975055802153603,
|
|
"learning_rate": 0.0019771974553280725,
|
|
"loss": 2.8963,
|
|
"step": 5105
|
|
},
|
|
{
|
|
"epoch": 1.6140724946695095,
|
|
"grad_norm": 0.11115116771807802,
|
|
"learning_rate": 0.0019770802166123687,
|
|
"loss": 2.8541,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 1.6156518992339888,
|
|
"grad_norm": 0.10695337184066091,
|
|
"learning_rate": 0.001976962680772813,
|
|
"loss": 2.9809,
|
|
"step": 5115
|
|
},
|
|
{
|
|
"epoch": 1.617231303798468,
|
|
"grad_norm": 0.104160743986913,
|
|
"learning_rate": 0.001976844847845149,
|
|
"loss": 3.0279,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 1.6188107083629473,
|
|
"grad_norm": 0.12343849835439115,
|
|
"learning_rate": 0.0019767267178652076,
|
|
"loss": 2.8222,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 1.6203901129274265,
|
|
"grad_norm": 0.0985309741194177,
|
|
"learning_rate": 0.0019766082908689118,
|
|
"loss": 2.9535,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 1.6219695174919055,
|
|
"grad_norm": 0.1007765759478301,
|
|
"learning_rate": 0.001976489566892274,
|
|
"loss": 2.9793,
|
|
"step": 5135
|
|
},
|
|
{
|
|
"epoch": 1.6235489220563848,
|
|
"grad_norm": 0.12202324187509646,
|
|
"learning_rate": 0.0019763705459713986,
|
|
"loss": 2.9642,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 1.6251283266208638,
|
|
"grad_norm": 0.09713677698438208,
|
|
"learning_rate": 0.0019762512281424776,
|
|
"loss": 2.9162,
|
|
"step": 5145
|
|
},
|
|
{
|
|
"epoch": 1.626707731185343,
|
|
"grad_norm": 0.08602069645790571,
|
|
"learning_rate": 0.001976131613441796,
|
|
"loss": 2.9203,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 1.6282871357498223,
|
|
"grad_norm": 0.1145716985651162,
|
|
"learning_rate": 0.0019760117019057277,
|
|
"loss": 2.9748,
|
|
"step": 5155
|
|
},
|
|
{
|
|
"epoch": 1.6298665403143016,
|
|
"grad_norm": 0.10787362431996148,
|
|
"learning_rate": 0.001975891493570737,
|
|
"loss": 3.0134,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 1.6314459448787808,
|
|
"grad_norm": 0.11064743389167223,
|
|
"learning_rate": 0.0019757709884733773,
|
|
"loss": 2.9397,
|
|
"step": 5165
|
|
},
|
|
{
|
|
"epoch": 1.63302534944326,
|
|
"grad_norm": 0.08614236606541124,
|
|
"learning_rate": 0.001975650186650295,
|
|
"loss": 2.9192,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 1.634604754007739,
|
|
"grad_norm": 0.09366852847388188,
|
|
"learning_rate": 0.0019755290881382243,
|
|
"loss": 2.9402,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 1.6361841585722183,
|
|
"grad_norm": 0.09730626420222427,
|
|
"learning_rate": 0.0019754076929739905,
|
|
"loss": 2.8907,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 1.6377635631366974,
|
|
"grad_norm": 0.10083090191566357,
|
|
"learning_rate": 0.00197528600119451,
|
|
"loss": 2.9226,
|
|
"step": 5185
|
|
},
|
|
{
|
|
"epoch": 1.6393429677011766,
|
|
"grad_norm": 0.1050001786747672,
|
|
"learning_rate": 0.0019751640128367874,
|
|
"loss": 2.9151,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 1.6409223722656558,
|
|
"grad_norm": 0.1020005922841891,
|
|
"learning_rate": 0.001975041727937919,
|
|
"loss": 2.8825,
|
|
"step": 5195
|
|
},
|
|
{
|
|
"epoch": 1.642501776830135,
|
|
"grad_norm": 0.09371317149531022,
|
|
"learning_rate": 0.001974919146535091,
|
|
"loss": 2.9128,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 1.6440811813946143,
|
|
"grad_norm": 0.10562421951491806,
|
|
"learning_rate": 0.001974796268665579,
|
|
"loss": 2.8204,
|
|
"step": 5205
|
|
},
|
|
{
|
|
"epoch": 1.6456605859590934,
|
|
"grad_norm": 0.11761558427266451,
|
|
"learning_rate": 0.0019746730943667502,
|
|
"loss": 2.9857,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 1.6472399905235726,
|
|
"grad_norm": 0.10865044339281003,
|
|
"learning_rate": 0.001974549623676061,
|
|
"loss": 2.8921,
|
|
"step": 5215
|
|
},
|
|
{
|
|
"epoch": 1.6488193950880516,
|
|
"grad_norm": 0.11581069329966616,
|
|
"learning_rate": 0.0019744258566310575,
|
|
"loss": 2.878,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 1.650398799652531,
|
|
"grad_norm": 0.1170453726056059,
|
|
"learning_rate": 0.0019743017932693763,
|
|
"loss": 2.8566,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 1.6519782042170101,
|
|
"grad_norm": 0.10845987251851028,
|
|
"learning_rate": 0.0019741774336287455,
|
|
"loss": 2.9,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 1.6535576087814894,
|
|
"grad_norm": 0.1024077422432529,
|
|
"learning_rate": 0.001974052777746981,
|
|
"loss": 2.9584,
|
|
"step": 5235
|
|
},
|
|
{
|
|
"epoch": 1.6551370133459686,
|
|
"grad_norm": 0.09824210313823284,
|
|
"learning_rate": 0.0019739278256619905,
|
|
"loss": 2.8304,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 1.6567164179104479,
|
|
"grad_norm": 0.09766997021888332,
|
|
"learning_rate": 0.0019738025774117705,
|
|
"loss": 2.9197,
|
|
"step": 5245
|
|
},
|
|
{
|
|
"epoch": 1.658295822474927,
|
|
"grad_norm": 0.11670891039271215,
|
|
"learning_rate": 0.0019736770330344086,
|
|
"loss": 2.829,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 1.6598752270394062,
|
|
"grad_norm": 0.10093571661443894,
|
|
"learning_rate": 0.001973551192568082,
|
|
"loss": 2.8732,
|
|
"step": 5255
|
|
},
|
|
{
|
|
"epoch": 1.6614546316038852,
|
|
"grad_norm": 0.10502890462616174,
|
|
"learning_rate": 0.001973425056051058,
|
|
"loss": 2.8835,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 1.6630340361683644,
|
|
"grad_norm": 0.08830167834623308,
|
|
"learning_rate": 0.0019732986235216935,
|
|
"loss": 2.7945,
|
|
"step": 5265
|
|
},
|
|
{
|
|
"epoch": 1.6646134407328437,
|
|
"grad_norm": 0.08185027792051866,
|
|
"learning_rate": 0.0019731718950184367,
|
|
"loss": 2.8968,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 1.666192845297323,
|
|
"grad_norm": 0.110446561845335,
|
|
"learning_rate": 0.0019730448705798237,
|
|
"loss": 2.8783,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 1.6677722498618022,
|
|
"grad_norm": 0.08209502578334943,
|
|
"learning_rate": 0.001972917550244483,
|
|
"loss": 2.9148,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 1.6693516544262814,
|
|
"grad_norm": 0.11105492789083406,
|
|
"learning_rate": 0.001972789934051131,
|
|
"loss": 2.8242,
|
|
"step": 5285
|
|
},
|
|
{
|
|
"epoch": 1.6709310589907604,
|
|
"grad_norm": 0.10894083448820191,
|
|
"learning_rate": 0.001972662022038576,
|
|
"loss": 2.8603,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 1.6725104635552397,
|
|
"grad_norm": 0.11154559870326458,
|
|
"learning_rate": 0.0019725338142457145,
|
|
"loss": 2.8131,
|
|
"step": 5295
|
|
},
|
|
{
|
|
"epoch": 1.6740898681197187,
|
|
"grad_norm": 0.10610140342224161,
|
|
"learning_rate": 0.0019724053107115338,
|
|
"loss": 2.8424,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 1.675669272684198,
|
|
"grad_norm": 0.10471075913756912,
|
|
"learning_rate": 0.0019722765114751103,
|
|
"loss": 2.9517,
|
|
"step": 5305
|
|
},
|
|
{
|
|
"epoch": 1.6772486772486772,
|
|
"grad_norm": 0.10961610964173045,
|
|
"learning_rate": 0.001972147416575612,
|
|
"loss": 2.9874,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 1.6788280818131565,
|
|
"grad_norm": 0.11590908074547357,
|
|
"learning_rate": 0.001972018026052296,
|
|
"loss": 2.9816,
|
|
"step": 5315
|
|
},
|
|
{
|
|
"epoch": 1.6804074863776357,
|
|
"grad_norm": 0.09014877820325726,
|
|
"learning_rate": 0.0019718883399445085,
|
|
"loss": 2.8988,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 1.681986890942115,
|
|
"grad_norm": 0.08843261743895112,
|
|
"learning_rate": 0.001971758358291686,
|
|
"loss": 2.8956,
|
|
"step": 5325
|
|
},
|
|
{
|
|
"epoch": 1.683566295506594,
|
|
"grad_norm": 0.13524407413650683,
|
|
"learning_rate": 0.001971628081133356,
|
|
"loss": 2.8855,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 1.6851457000710732,
|
|
"grad_norm": 0.125877431534522,
|
|
"learning_rate": 0.001971497508509134,
|
|
"loss": 2.8863,
|
|
"step": 5335
|
|
},
|
|
{
|
|
"epoch": 1.6867251046355523,
|
|
"grad_norm": 0.08065617976051762,
|
|
"learning_rate": 0.0019713666404587273,
|
|
"loss": 2.8977,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 1.6883045092000315,
|
|
"grad_norm": 0.08766136947767611,
|
|
"learning_rate": 0.001971235477021931,
|
|
"loss": 2.9427,
|
|
"step": 5345
|
|
},
|
|
{
|
|
"epoch": 1.6898839137645107,
|
|
"grad_norm": 0.09373146559188211,
|
|
"learning_rate": 0.0019711040182386315,
|
|
"loss": 2.9298,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 1.69146331832899,
|
|
"grad_norm": 0.09378089597874405,
|
|
"learning_rate": 0.001970972264148805,
|
|
"loss": 2.8447,
|
|
"step": 5355
|
|
},
|
|
{
|
|
"epoch": 1.6930427228934692,
|
|
"grad_norm": 0.08863466546773448,
|
|
"learning_rate": 0.001970840214792516,
|
|
"loss": 2.9024,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 1.6946221274579485,
|
|
"grad_norm": 0.09604705858875463,
|
|
"learning_rate": 0.001970707870209921,
|
|
"loss": 2.8971,
|
|
"step": 5365
|
|
},
|
|
{
|
|
"epoch": 1.6962015320224275,
|
|
"grad_norm": 0.08691993851387995,
|
|
"learning_rate": 0.0019705752304412646,
|
|
"loss": 2.8304,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 1.6977809365869068,
|
|
"grad_norm": 0.08637512290496353,
|
|
"learning_rate": 0.001970442295526882,
|
|
"loss": 2.863,
|
|
"step": 5375
|
|
},
|
|
{
|
|
"epoch": 1.6993603411513858,
|
|
"grad_norm": 0.11610905002376368,
|
|
"learning_rate": 0.0019703090655071977,
|
|
"loss": 2.9165,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 1.700939745715865,
|
|
"grad_norm": 0.0974746503636226,
|
|
"learning_rate": 0.001970175540422726,
|
|
"loss": 2.8237,
|
|
"step": 5385
|
|
},
|
|
{
|
|
"epoch": 1.7025191502803443,
|
|
"grad_norm": 0.08653648143193664,
|
|
"learning_rate": 0.0019700417203140706,
|
|
"loss": 2.8013,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 1.7040985548448235,
|
|
"grad_norm": 0.10331561155385749,
|
|
"learning_rate": 0.0019699076052219263,
|
|
"loss": 2.8358,
|
|
"step": 5395
|
|
},
|
|
{
|
|
"epoch": 1.7056779594093028,
|
|
"grad_norm": 0.10734105048757829,
|
|
"learning_rate": 0.001969773195187076,
|
|
"loss": 2.8952,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 1.707257363973782,
|
|
"grad_norm": 0.10187960767446501,
|
|
"learning_rate": 0.001969638490250393,
|
|
"loss": 2.8426,
|
|
"step": 5405
|
|
},
|
|
{
|
|
"epoch": 1.708836768538261,
|
|
"grad_norm": 0.10952454351464175,
|
|
"learning_rate": 0.0019695034904528407,
|
|
"loss": 2.8591,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 1.7104161731027403,
|
|
"grad_norm": 0.11794184071738911,
|
|
"learning_rate": 0.0019693681958354707,
|
|
"loss": 3.0401,
|
|
"step": 5415
|
|
},
|
|
{
|
|
"epoch": 1.7119955776672193,
|
|
"grad_norm": 0.11416879924650811,
|
|
"learning_rate": 0.0019692326064394265,
|
|
"loss": 2.8389,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 1.7135749822316986,
|
|
"grad_norm": 0.11238489422970098,
|
|
"learning_rate": 0.0019690967223059386,
|
|
"loss": 2.8062,
|
|
"step": 5425
|
|
},
|
|
{
|
|
"epoch": 1.7151543867961778,
|
|
"grad_norm": 0.11617189166521032,
|
|
"learning_rate": 0.00196896054347633,
|
|
"loss": 2.851,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 1.716733791360657,
|
|
"grad_norm": 0.11380891130531372,
|
|
"learning_rate": 0.00196882406999201,
|
|
"loss": 2.8799,
|
|
"step": 5435
|
|
},
|
|
{
|
|
"epoch": 1.7183131959251363,
|
|
"grad_norm": 0.0892821522527821,
|
|
"learning_rate": 0.001968687301894481,
|
|
"loss": 2.826,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 1.7198926004896156,
|
|
"grad_norm": 0.129963031390663,
|
|
"learning_rate": 0.0019685502392253326,
|
|
"loss": 2.7768,
|
|
"step": 5445
|
|
},
|
|
{
|
|
"epoch": 1.7214720050540946,
|
|
"grad_norm": 0.14752416451669453,
|
|
"learning_rate": 0.0019684128820262443,
|
|
"loss": 2.9128,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 1.7230514096185738,
|
|
"grad_norm": 0.10287349342814446,
|
|
"learning_rate": 0.001968275230338986,
|
|
"loss": 2.9499,
|
|
"step": 5455
|
|
},
|
|
{
|
|
"epoch": 1.7246308141830529,
|
|
"grad_norm": 0.09967919811875595,
|
|
"learning_rate": 0.001968137284205417,
|
|
"loss": 2.777,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 1.726210218747532,
|
|
"grad_norm": 0.10413546356378635,
|
|
"learning_rate": 0.001967999043667485,
|
|
"loss": 2.8979,
|
|
"step": 5465
|
|
},
|
|
{
|
|
"epoch": 1.7277896233120114,
|
|
"grad_norm": 0.1072432492489738,
|
|
"learning_rate": 0.001967860508767229,
|
|
"loss": 2.8283,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 1.7293690278764906,
|
|
"grad_norm": 0.09655611693814051,
|
|
"learning_rate": 0.001967721679546776,
|
|
"loss": 2.7948,
|
|
"step": 5475
|
|
},
|
|
{
|
|
"epoch": 1.7309484324409699,
|
|
"grad_norm": 0.08712375046387344,
|
|
"learning_rate": 0.001967582556048343,
|
|
"loss": 2.7723,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 1.732527837005449,
|
|
"grad_norm": 0.09225677250022102,
|
|
"learning_rate": 0.001967443138314237,
|
|
"loss": 2.8686,
|
|
"step": 5485
|
|
},
|
|
{
|
|
"epoch": 1.7341072415699281,
|
|
"grad_norm": 0.10393698580518194,
|
|
"learning_rate": 0.001967303426386853,
|
|
"loss": 2.9854,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 1.7356866461344074,
|
|
"grad_norm": 0.09691227894220021,
|
|
"learning_rate": 0.0019671634203086786,
|
|
"loss": 2.8644,
|
|
"step": 5495
|
|
},
|
|
{
|
|
"epoch": 1.7372660506988864,
|
|
"grad_norm": 0.08534322592873014,
|
|
"learning_rate": 0.0019670231201222867,
|
|
"loss": 2.814,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 1.7388454552633656,
|
|
"grad_norm": 0.11043464541805863,
|
|
"learning_rate": 0.0019668825258703426,
|
|
"loss": 2.9104,
|
|
"step": 5505
|
|
},
|
|
{
|
|
"epoch": 1.740424859827845,
|
|
"grad_norm": 0.11923335025448265,
|
|
"learning_rate": 0.0019667416375955997,
|
|
"loss": 2.8873,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 1.7420042643923241,
|
|
"grad_norm": 0.0944140055846587,
|
|
"learning_rate": 0.001966600455340902,
|
|
"loss": 2.8354,
|
|
"step": 5515
|
|
},
|
|
{
|
|
"epoch": 1.7435836689568034,
|
|
"grad_norm": 0.09289305803071748,
|
|
"learning_rate": 0.0019664589791491814,
|
|
"loss": 2.8526,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 1.7451630735212826,
|
|
"grad_norm": 0.10652799839126612,
|
|
"learning_rate": 0.00196631720906346,
|
|
"loss": 2.7941,
|
|
"step": 5525
|
|
},
|
|
{
|
|
"epoch": 1.7467424780857617,
|
|
"grad_norm": 0.08877425547680012,
|
|
"learning_rate": 0.0019661751451268495,
|
|
"loss": 2.7967,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 1.748321882650241,
|
|
"grad_norm": 0.09920541060889011,
|
|
"learning_rate": 0.00196603278738255,
|
|
"loss": 2.8083,
|
|
"step": 5535
|
|
},
|
|
{
|
|
"epoch": 1.74990128721472,
|
|
"grad_norm": 0.11134150673030374,
|
|
"learning_rate": 0.001965890135873852,
|
|
"loss": 2.7912,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 1.7514806917791992,
|
|
"grad_norm": 0.08671812361927851,
|
|
"learning_rate": 0.0019657471906441354,
|
|
"loss": 2.8215,
|
|
"step": 5545
|
|
},
|
|
{
|
|
"epoch": 1.7530600963436784,
|
|
"grad_norm": 0.09189414759023765,
|
|
"learning_rate": 0.0019656039517368684,
|
|
"loss": 2.848,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 1.7546395009081577,
|
|
"grad_norm": 0.08457665537333102,
|
|
"learning_rate": 0.0019654604191956093,
|
|
"loss": 2.8594,
|
|
"step": 5555
|
|
},
|
|
{
|
|
"epoch": 1.756218905472637,
|
|
"grad_norm": 0.08088442370962645,
|
|
"learning_rate": 0.0019653165930640045,
|
|
"loss": 2.858,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 1.757798310037116,
|
|
"grad_norm": 0.08221707893249937,
|
|
"learning_rate": 0.0019651724733857918,
|
|
"loss": 2.7932,
|
|
"step": 5565
|
|
},
|
|
{
|
|
"epoch": 1.7593777146015952,
|
|
"grad_norm": 0.10413482648930877,
|
|
"learning_rate": 0.0019650280602047966,
|
|
"loss": 2.8853,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 1.7609571191660742,
|
|
"grad_norm": 0.09223354313728577,
|
|
"learning_rate": 0.001964883353564934,
|
|
"loss": 2.8474,
|
|
"step": 5575
|
|
},
|
|
{
|
|
"epoch": 1.7625365237305535,
|
|
"grad_norm": 0.11170715489075345,
|
|
"learning_rate": 0.0019647383535102082,
|
|
"loss": 2.7924,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 1.7641159282950327,
|
|
"grad_norm": 0.10085002954995868,
|
|
"learning_rate": 0.0019645930600847134,
|
|
"loss": 2.8615,
|
|
"step": 5585
|
|
},
|
|
{
|
|
"epoch": 1.765695332859512,
|
|
"grad_norm": 0.13033848574238788,
|
|
"learning_rate": 0.0019644474733326316,
|
|
"loss": 2.8704,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 1.7672747374239912,
|
|
"grad_norm": 0.10626223537711284,
|
|
"learning_rate": 0.0019643015932982355,
|
|
"loss": 2.8016,
|
|
"step": 5595
|
|
},
|
|
{
|
|
"epoch": 1.7688541419884705,
|
|
"grad_norm": 0.09109802005589106,
|
|
"learning_rate": 0.0019641554200258856,
|
|
"loss": 2.8567,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 1.7704335465529495,
|
|
"grad_norm": 0.105574279290575,
|
|
"learning_rate": 0.0019640089535600327,
|
|
"loss": 2.8931,
|
|
"step": 5605
|
|
},
|
|
{
|
|
"epoch": 1.7720129511174287,
|
|
"grad_norm": 0.09271871358737761,
|
|
"learning_rate": 0.0019638621939452165,
|
|
"loss": 2.8455,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 1.7735923556819078,
|
|
"grad_norm": 0.08940779528973208,
|
|
"learning_rate": 0.001963715141226065,
|
|
"loss": 2.8497,
|
|
"step": 5615
|
|
},
|
|
{
|
|
"epoch": 1.775171760246387,
|
|
"grad_norm": 0.09151765966803481,
|
|
"learning_rate": 0.001963567795447297,
|
|
"loss": 2.7759,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 1.7767511648108663,
|
|
"grad_norm": 0.12790289749531383,
|
|
"learning_rate": 0.0019634201566537182,
|
|
"loss": 2.8067,
|
|
"step": 5625
|
|
},
|
|
{
|
|
"epoch": 1.7783305693753455,
|
|
"grad_norm": 0.09309013128028407,
|
|
"learning_rate": 0.0019632722248902256,
|
|
"loss": 2.8082,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 1.7799099739398248,
|
|
"grad_norm": 0.08581911812029219,
|
|
"learning_rate": 0.0019631240002018035,
|
|
"loss": 2.8794,
|
|
"step": 5635
|
|
},
|
|
{
|
|
"epoch": 1.781489378504304,
|
|
"grad_norm": 0.09431687919055587,
|
|
"learning_rate": 0.001962975482633527,
|
|
"loss": 2.8182,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 1.783068783068783,
|
|
"grad_norm": 0.10147779341824549,
|
|
"learning_rate": 0.001962826672230559,
|
|
"loss": 2.823,
|
|
"step": 5645
|
|
},
|
|
{
|
|
"epoch": 1.7846481876332623,
|
|
"grad_norm": 0.10002615650609308,
|
|
"learning_rate": 0.001962677569038151,
|
|
"loss": 2.8715,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 1.7862275921977413,
|
|
"grad_norm": 0.07803919999016076,
|
|
"learning_rate": 0.0019625281731016453,
|
|
"loss": 2.8076,
|
|
"step": 5655
|
|
},
|
|
{
|
|
"epoch": 1.7878069967622205,
|
|
"grad_norm": 0.09534400601107142,
|
|
"learning_rate": 0.001962378484466472,
|
|
"loss": 2.7778,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 1.7893864013266998,
|
|
"grad_norm": 0.0671038402986734,
|
|
"learning_rate": 0.0019622285031781505,
|
|
"loss": 2.8857,
|
|
"step": 5665
|
|
},
|
|
{
|
|
"epoch": 1.790965805891179,
|
|
"grad_norm": 0.07972258446393975,
|
|
"learning_rate": 0.001962078229282289,
|
|
"loss": 2.8196,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 1.7925452104556583,
|
|
"grad_norm": 0.09681179345298509,
|
|
"learning_rate": 0.0019619276628245843,
|
|
"loss": 2.825,
|
|
"step": 5675
|
|
},
|
|
{
|
|
"epoch": 1.7941246150201375,
|
|
"grad_norm": 0.10801872450890859,
|
|
"learning_rate": 0.0019617768038508237,
|
|
"loss": 2.903,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 1.7957040195846166,
|
|
"grad_norm": 0.09544098038024276,
|
|
"learning_rate": 0.0019616256524068823,
|
|
"loss": 2.8387,
|
|
"step": 5685
|
|
},
|
|
{
|
|
"epoch": 1.7972834241490958,
|
|
"grad_norm": 0.1320662539458633,
|
|
"learning_rate": 0.001961474208538723,
|
|
"loss": 2.9244,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 1.7988628287135748,
|
|
"grad_norm": 0.12796798547022245,
|
|
"learning_rate": 0.0019613224722924007,
|
|
"loss": 2.9441,
|
|
"step": 5695
|
|
},
|
|
{
|
|
"epoch": 1.800442233278054,
|
|
"grad_norm": 0.14013468102503435,
|
|
"learning_rate": 0.0019611704437140567,
|
|
"loss": 2.9045,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 1.8020216378425333,
|
|
"grad_norm": 0.10693907084566522,
|
|
"learning_rate": 0.0019610181228499218,
|
|
"loss": 2.8957,
|
|
"step": 5705
|
|
},
|
|
{
|
|
"epoch": 1.8036010424070126,
|
|
"grad_norm": 0.10501285453729153,
|
|
"learning_rate": 0.0019608655097463155,
|
|
"loss": 2.8295,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 1.8051804469714918,
|
|
"grad_norm": 0.08292288367824334,
|
|
"learning_rate": 0.0019607126044496473,
|
|
"loss": 2.8416,
|
|
"step": 5715
|
|
},
|
|
{
|
|
"epoch": 1.806759851535971,
|
|
"grad_norm": 0.08426166408478367,
|
|
"learning_rate": 0.001960559407006414,
|
|
"loss": 2.8757,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 1.80833925610045,
|
|
"grad_norm": 0.12618186204150836,
|
|
"learning_rate": 0.001960405917463202,
|
|
"loss": 2.8091,
|
|
"step": 5725
|
|
},
|
|
{
|
|
"epoch": 1.8099186606649293,
|
|
"grad_norm": 0.0979784528685163,
|
|
"learning_rate": 0.001960252135866687,
|
|
"loss": 2.7894,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 1.8114980652294084,
|
|
"grad_norm": 0.09225963447119595,
|
|
"learning_rate": 0.0019600980622636326,
|
|
"loss": 2.8949,
|
|
"step": 5735
|
|
},
|
|
{
|
|
"epoch": 1.8130774697938876,
|
|
"grad_norm": 0.0957347564504516,
|
|
"learning_rate": 0.001959943696700892,
|
|
"loss": 2.792,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 1.8146568743583669,
|
|
"grad_norm": 0.09816516235276534,
|
|
"learning_rate": 0.001959789039225406,
|
|
"loss": 2.7762,
|
|
"step": 5745
|
|
},
|
|
{
|
|
"epoch": 1.8162362789228461,
|
|
"grad_norm": 0.14832825727107027,
|
|
"learning_rate": 0.0019596340898842056,
|
|
"loss": 2.8905,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 1.8178156834873254,
|
|
"grad_norm": 0.10127269862435555,
|
|
"learning_rate": 0.00195947884872441,
|
|
"loss": 2.9231,
|
|
"step": 5755
|
|
},
|
|
{
|
|
"epoch": 1.8193950880518046,
|
|
"grad_norm": 0.10596226828499104,
|
|
"learning_rate": 0.0019593233157932264,
|
|
"loss": 2.8561,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 1.8209744926162836,
|
|
"grad_norm": 0.08437118924922375,
|
|
"learning_rate": 0.001959167491137952,
|
|
"loss": 2.934,
|
|
"step": 5765
|
|
},
|
|
{
|
|
"epoch": 1.8225538971807629,
|
|
"grad_norm": 0.084182086109871,
|
|
"learning_rate": 0.0019590113748059715,
|
|
"loss": 2.8071,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 1.824133301745242,
|
|
"grad_norm": 0.08334964947133998,
|
|
"learning_rate": 0.0019588549668447595,
|
|
"loss": 2.8028,
|
|
"step": 5775
|
|
},
|
|
{
|
|
"epoch": 1.8257127063097212,
|
|
"grad_norm": 0.10502427527644426,
|
|
"learning_rate": 0.0019586982673018786,
|
|
"loss": 2.8621,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 1.8272921108742004,
|
|
"grad_norm": 0.08792831410856203,
|
|
"learning_rate": 0.00195854127622498,
|
|
"loss": 2.7531,
|
|
"step": 5785
|
|
},
|
|
{
|
|
"epoch": 1.8288715154386797,
|
|
"grad_norm": 0.0879256838351816,
|
|
"learning_rate": 0.0019583839936618028,
|
|
"loss": 2.848,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 1.830450920003159,
|
|
"grad_norm": 0.08223569631478067,
|
|
"learning_rate": 0.001958226419660177,
|
|
"loss": 2.7847,
|
|
"step": 5795
|
|
},
|
|
{
|
|
"epoch": 1.8320303245676381,
|
|
"grad_norm": 0.08954888379371354,
|
|
"learning_rate": 0.001958068554268019,
|
|
"loss": 2.8953,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 1.8336097291321172,
|
|
"grad_norm": 0.08809234715104736,
|
|
"learning_rate": 0.001957910397533335,
|
|
"loss": 2.9025,
|
|
"step": 5805
|
|
},
|
|
{
|
|
"epoch": 1.8351891336965964,
|
|
"grad_norm": 0.09182459314591035,
|
|
"learning_rate": 0.0019577519495042194,
|
|
"loss": 2.8099,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 1.8367685382610754,
|
|
"grad_norm": 0.0802234998272935,
|
|
"learning_rate": 0.0019575932102288553,
|
|
"loss": 2.8088,
|
|
"step": 5815
|
|
},
|
|
{
|
|
"epoch": 1.8383479428255547,
|
|
"grad_norm": 0.08500552989609395,
|
|
"learning_rate": 0.0019574341797555144,
|
|
"loss": 2.8726,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 1.839927347390034,
|
|
"grad_norm": 0.08692276443581538,
|
|
"learning_rate": 0.001957274858132556,
|
|
"loss": 2.8033,
|
|
"step": 5825
|
|
},
|
|
{
|
|
"epoch": 1.8415067519545132,
|
|
"grad_norm": 0.08476049437607958,
|
|
"learning_rate": 0.00195711524540843,
|
|
"loss": 2.8139,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 1.8430861565189924,
|
|
"grad_norm": 0.09529065856387889,
|
|
"learning_rate": 0.0019569553416316724,
|
|
"loss": 2.7915,
|
|
"step": 5835
|
|
},
|
|
{
|
|
"epoch": 1.8446655610834717,
|
|
"grad_norm": 0.10095333087758838,
|
|
"learning_rate": 0.0019567951468509102,
|
|
"loss": 2.8123,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 1.8462449656479507,
|
|
"grad_norm": 0.10951937044106012,
|
|
"learning_rate": 0.001956634661114857,
|
|
"loss": 2.7474,
|
|
"step": 5845
|
|
},
|
|
{
|
|
"epoch": 1.84782437021243,
|
|
"grad_norm": 0.12812453264923507,
|
|
"learning_rate": 0.001956473884472315,
|
|
"loss": 2.8927,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 1.849403774776909,
|
|
"grad_norm": 0.08917260558020511,
|
|
"learning_rate": 0.001956312816972176,
|
|
"loss": 2.767,
|
|
"step": 5855
|
|
},
|
|
{
|
|
"epoch": 1.8509831793413882,
|
|
"grad_norm": 0.09192768008047074,
|
|
"learning_rate": 0.00195615145866342,
|
|
"loss": 2.8303,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 1.8525625839058675,
|
|
"grad_norm": 0.08903746231326805,
|
|
"learning_rate": 0.0019559898095951136,
|
|
"loss": 2.7969,
|
|
"step": 5865
|
|
},
|
|
{
|
|
"epoch": 1.8541419884703467,
|
|
"grad_norm": 0.08198358367157664,
|
|
"learning_rate": 0.0019558278698164145,
|
|
"loss": 2.8563,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 1.855721393034826,
|
|
"grad_norm": 0.09826949164597297,
|
|
"learning_rate": 0.001955665639376567,
|
|
"loss": 2.9417,
|
|
"step": 5875
|
|
},
|
|
{
|
|
"epoch": 1.8573007975993052,
|
|
"grad_norm": 0.097023198766505,
|
|
"learning_rate": 0.0019555031183249045,
|
|
"loss": 2.9257,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 1.8588802021637842,
|
|
"grad_norm": 0.09128928830765581,
|
|
"learning_rate": 0.001955340306710849,
|
|
"loss": 2.8324,
|
|
"step": 5885
|
|
},
|
|
{
|
|
"epoch": 1.8604596067282635,
|
|
"grad_norm": 0.08445857191549251,
|
|
"learning_rate": 0.0019551772045839095,
|
|
"loss": 2.6883,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 1.8620390112927425,
|
|
"grad_norm": 0.07992959206633901,
|
|
"learning_rate": 0.0019550138119936848,
|
|
"loss": 2.876,
|
|
"step": 5895
|
|
},
|
|
{
|
|
"epoch": 1.8636184158572218,
|
|
"grad_norm": 0.0718896788646997,
|
|
"learning_rate": 0.001954850128989862,
|
|
"loss": 2.7931,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 1.865197820421701,
|
|
"grad_norm": 0.06519170978053945,
|
|
"learning_rate": 0.001954686155622216,
|
|
"loss": 2.8115,
|
|
"step": 5905
|
|
},
|
|
{
|
|
"epoch": 1.8667772249861803,
|
|
"grad_norm": 0.09720596950849911,
|
|
"learning_rate": 0.0019545218919406093,
|
|
"loss": 2.7797,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 1.8683566295506595,
|
|
"grad_norm": 0.12678939451280705,
|
|
"learning_rate": 0.001954357337994994,
|
|
"loss": 2.9284,
|
|
"step": 5915
|
|
},
|
|
{
|
|
"epoch": 1.8699360341151388,
|
|
"grad_norm": 0.12164911465261,
|
|
"learning_rate": 0.0019541924938354096,
|
|
"loss": 2.7888,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 1.8715154386796178,
|
|
"grad_norm": 0.12786708775383082,
|
|
"learning_rate": 0.001954027359511984,
|
|
"loss": 2.8952,
|
|
"step": 5925
|
|
},
|
|
{
|
|
"epoch": 1.873094843244097,
|
|
"grad_norm": 0.08646118211188386,
|
|
"learning_rate": 0.0019538619350749345,
|
|
"loss": 2.7687,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 1.874674247808576,
|
|
"grad_norm": 0.08618728690802949,
|
|
"learning_rate": 0.0019536962205745647,
|
|
"loss": 2.7424,
|
|
"step": 5935
|
|
},
|
|
{
|
|
"epoch": 1.8762536523730553,
|
|
"grad_norm": 0.07380398698166094,
|
|
"learning_rate": 0.001953530216061267,
|
|
"loss": 2.7674,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 1.8778330569375346,
|
|
"grad_norm": 0.13229423018200095,
|
|
"learning_rate": 0.0019533639215855237,
|
|
"loss": 2.7864,
|
|
"step": 5945
|
|
},
|
|
{
|
|
"epoch": 1.8794124615020138,
|
|
"grad_norm": 0.11409026363699672,
|
|
"learning_rate": 0.0019531973371979027,
|
|
"loss": 2.9098,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 1.880991866066493,
|
|
"grad_norm": 0.10927764439517655,
|
|
"learning_rate": 0.0019530304629490618,
|
|
"loss": 2.784,
|
|
"step": 5955
|
|
},
|
|
{
|
|
"epoch": 1.882571270630972,
|
|
"grad_norm": 0.08758344666216031,
|
|
"learning_rate": 0.0019528632988897458,
|
|
"loss": 2.7787,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 1.8841506751954513,
|
|
"grad_norm": 0.08989382431986725,
|
|
"learning_rate": 0.001952695845070789,
|
|
"loss": 2.8563,
|
|
"step": 5965
|
|
},
|
|
{
|
|
"epoch": 1.8857300797599303,
|
|
"grad_norm": 0.11095653749941818,
|
|
"learning_rate": 0.0019525281015431127,
|
|
"loss": 2.8349,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 1.8873094843244096,
|
|
"grad_norm": 0.09356354029199736,
|
|
"learning_rate": 0.0019523600683577264,
|
|
"loss": 2.772,
|
|
"step": 5975
|
|
},
|
|
{
|
|
"epoch": 1.8888888888888888,
|
|
"grad_norm": 0.09802457615928181,
|
|
"learning_rate": 0.001952191745565728,
|
|
"loss": 2.8127,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 1.890468293453368,
|
|
"grad_norm": 0.09130936925810829,
|
|
"learning_rate": 0.0019520231332183036,
|
|
"loss": 2.8453,
|
|
"step": 5985
|
|
},
|
|
{
|
|
"epoch": 1.8920476980178473,
|
|
"grad_norm": 0.08809660646950714,
|
|
"learning_rate": 0.001951854231366727,
|
|
"loss": 2.8222,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 1.8936271025823266,
|
|
"grad_norm": 0.11070882861672758,
|
|
"learning_rate": 0.0019516850400623604,
|
|
"loss": 2.7529,
|
|
"step": 5995
|
|
},
|
|
{
|
|
"epoch": 1.8952065071468056,
|
|
"grad_norm": 0.14618290547433194,
|
|
"learning_rate": 0.0019515155593566535,
|
|
"loss": 2.7643,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 1.8967859117112849,
|
|
"grad_norm": 0.1279958619359713,
|
|
"learning_rate": 0.0019513457893011444,
|
|
"loss": 2.7829,
|
|
"step": 6005
|
|
},
|
|
{
|
|
"epoch": 1.8983653162757639,
|
|
"grad_norm": 0.10863296125619862,
|
|
"learning_rate": 0.0019511757299474591,
|
|
"loss": 2.861,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 1.8999447208402431,
|
|
"grad_norm": 0.07358786598532395,
|
|
"learning_rate": 0.0019510053813473114,
|
|
"loss": 2.8414,
|
|
"step": 6015
|
|
},
|
|
{
|
|
"epoch": 1.9015241254047224,
|
|
"grad_norm": 0.07582738144151509,
|
|
"learning_rate": 0.0019508347435525037,
|
|
"loss": 2.8013,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 1.9031035299692016,
|
|
"grad_norm": 0.09336565960832502,
|
|
"learning_rate": 0.001950663816614925,
|
|
"loss": 2.9326,
|
|
"step": 6025
|
|
},
|
|
{
|
|
"epoch": 1.9046829345336809,
|
|
"grad_norm": 0.10824071803830503,
|
|
"learning_rate": 0.001950492600586554,
|
|
"loss": 2.8029,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 1.9062623390981601,
|
|
"grad_norm": 0.08575271033474953,
|
|
"learning_rate": 0.001950321095519456,
|
|
"loss": 2.8342,
|
|
"step": 6035
|
|
},
|
|
{
|
|
"epoch": 1.9078417436626391,
|
|
"grad_norm": 0.1018277175039707,
|
|
"learning_rate": 0.0019501493014657846,
|
|
"loss": 2.8839,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 1.9094211482271184,
|
|
"grad_norm": 0.09328347992445929,
|
|
"learning_rate": 0.0019499772184777813,
|
|
"loss": 2.8144,
|
|
"step": 6045
|
|
},
|
|
{
|
|
"epoch": 1.9110005527915974,
|
|
"grad_norm": 0.10485085743520843,
|
|
"learning_rate": 0.0019498048466077753,
|
|
"loss": 2.86,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 1.9125799573560767,
|
|
"grad_norm": 0.0955680047571786,
|
|
"learning_rate": 0.0019496321859081842,
|
|
"loss": 2.8175,
|
|
"step": 6055
|
|
},
|
|
{
|
|
"epoch": 1.914159361920556,
|
|
"grad_norm": 0.09273907882866253,
|
|
"learning_rate": 0.0019494592364315126,
|
|
"loss": 2.8755,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 1.9157387664850352,
|
|
"grad_norm": 0.07407064110854321,
|
|
"learning_rate": 0.0019492859982303532,
|
|
"loss": 2.9163,
|
|
"step": 6065
|
|
},
|
|
{
|
|
"epoch": 1.9173181710495144,
|
|
"grad_norm": 0.0772395616577009,
|
|
"learning_rate": 0.0019491124713573874,
|
|
"loss": 2.7888,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 1.9188975756139937,
|
|
"grad_norm": 0.07922545919001302,
|
|
"learning_rate": 0.0019489386558653827,
|
|
"loss": 2.7501,
|
|
"step": 6075
|
|
},
|
|
{
|
|
"epoch": 1.9204769801784727,
|
|
"grad_norm": 0.08135294434296457,
|
|
"learning_rate": 0.0019487645518071958,
|
|
"loss": 2.8935,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 1.922056384742952,
|
|
"grad_norm": 0.08030592271812116,
|
|
"learning_rate": 0.0019485901592357707,
|
|
"loss": 2.8497,
|
|
"step": 6085
|
|
},
|
|
{
|
|
"epoch": 1.923635789307431,
|
|
"grad_norm": 0.08200182929395594,
|
|
"learning_rate": 0.0019484154782041388,
|
|
"loss": 2.6893,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 1.9252151938719102,
|
|
"grad_norm": 0.08130310335267442,
|
|
"learning_rate": 0.0019482405087654193,
|
|
"loss": 2.8332,
|
|
"step": 6095
|
|
},
|
|
{
|
|
"epoch": 1.9267945984363894,
|
|
"grad_norm": 0.07548347105029962,
|
|
"learning_rate": 0.00194806525097282,
|
|
"loss": 2.7958,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 1.9283740030008687,
|
|
"grad_norm": 0.09933339001977781,
|
|
"learning_rate": 0.0019478897048796349,
|
|
"loss": 2.8233,
|
|
"step": 6105
|
|
},
|
|
{
|
|
"epoch": 1.929953407565348,
|
|
"grad_norm": 0.08574779037700413,
|
|
"learning_rate": 0.0019477138705392468,
|
|
"loss": 2.7458,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 1.9315328121298272,
|
|
"grad_norm": 0.08066906047062622,
|
|
"learning_rate": 0.001947537748005126,
|
|
"loss": 2.8612,
|
|
"step": 6115
|
|
},
|
|
{
|
|
"epoch": 1.9331122166943062,
|
|
"grad_norm": 0.09598145548735948,
|
|
"learning_rate": 0.0019473613373308298,
|
|
"loss": 2.7525,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 1.9346916212587855,
|
|
"grad_norm": 0.07458977511454752,
|
|
"learning_rate": 0.001947184638570004,
|
|
"loss": 2.8079,
|
|
"step": 6125
|
|
},
|
|
{
|
|
"epoch": 1.9362710258232645,
|
|
"grad_norm": 0.09282378084023518,
|
|
"learning_rate": 0.001947007651776381,
|
|
"loss": 2.7308,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 1.9378504303877437,
|
|
"grad_norm": 0.08151088064882638,
|
|
"learning_rate": 0.001946830377003782,
|
|
"loss": 2.7715,
|
|
"step": 6135
|
|
},
|
|
{
|
|
"epoch": 1.939429834952223,
|
|
"grad_norm": 0.08967279639477138,
|
|
"learning_rate": 0.0019466528143061148,
|
|
"loss": 2.8798,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 1.9410092395167022,
|
|
"grad_norm": 0.08014834943720231,
|
|
"learning_rate": 0.0019464749637373752,
|
|
"loss": 2.8762,
|
|
"step": 6145
|
|
},
|
|
{
|
|
"epoch": 1.9425886440811815,
|
|
"grad_norm": 0.1015957088000604,
|
|
"learning_rate": 0.0019462968253516459,
|
|
"loss": 2.7706,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 1.9441680486456607,
|
|
"grad_norm": 0.08424020119718752,
|
|
"learning_rate": 0.0019461183992030985,
|
|
"loss": 2.8594,
|
|
"step": 6155
|
|
},
|
|
{
|
|
"epoch": 1.9457474532101398,
|
|
"grad_norm": 0.07337585037431,
|
|
"learning_rate": 0.0019459396853459905,
|
|
"loss": 2.8301,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 1.947326857774619,
|
|
"grad_norm": 0.0832795576496703,
|
|
"learning_rate": 0.001945760683834668,
|
|
"loss": 2.818,
|
|
"step": 6165
|
|
},
|
|
{
|
|
"epoch": 1.948906262339098,
|
|
"grad_norm": 0.0857588105421658,
|
|
"learning_rate": 0.0019455813947235644,
|
|
"loss": 2.8158,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 1.9504856669035773,
|
|
"grad_norm": 0.0784707318258361,
|
|
"learning_rate": 0.0019454018180672002,
|
|
"loss": 2.8325,
|
|
"step": 6175
|
|
},
|
|
{
|
|
"epoch": 1.9520650714680565,
|
|
"grad_norm": 0.08506714452247786,
|
|
"learning_rate": 0.0019452219539201829,
|
|
"loss": 2.8065,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 1.9536444760325358,
|
|
"grad_norm": 0.1189752375053285,
|
|
"learning_rate": 0.0019450418023372093,
|
|
"loss": 2.8073,
|
|
"step": 6185
|
|
},
|
|
{
|
|
"epoch": 1.955223880597015,
|
|
"grad_norm": 0.0997349870460688,
|
|
"learning_rate": 0.0019448613633730614,
|
|
"loss": 2.7286,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 1.9568032851614943,
|
|
"grad_norm": 0.09809781879864145,
|
|
"learning_rate": 0.0019446806370826098,
|
|
"loss": 2.8428,
|
|
"step": 6195
|
|
},
|
|
{
|
|
"epoch": 1.9583826897259733,
|
|
"grad_norm": 0.09740558269055588,
|
|
"learning_rate": 0.001944499623520812,
|
|
"loss": 2.7367,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 1.9599620942904525,
|
|
"grad_norm": 0.09927752259372388,
|
|
"learning_rate": 0.0019443183227427134,
|
|
"loss": 2.8624,
|
|
"step": 6205
|
|
},
|
|
{
|
|
"epoch": 1.9615414988549316,
|
|
"grad_norm": 0.09389302280465255,
|
|
"learning_rate": 0.0019441367348034461,
|
|
"loss": 2.8001,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 1.9631209034194108,
|
|
"grad_norm": 0.1420223065796786,
|
|
"learning_rate": 0.0019439548597582302,
|
|
"loss": 2.8459,
|
|
"step": 6215
|
|
},
|
|
{
|
|
"epoch": 1.96470030798389,
|
|
"grad_norm": 0.09125430607116715,
|
|
"learning_rate": 0.0019437726976623726,
|
|
"loss": 2.7651,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 1.9662797125483693,
|
|
"grad_norm": 0.09225743396647654,
|
|
"learning_rate": 0.0019435902485712676,
|
|
"loss": 2.7648,
|
|
"step": 6225
|
|
},
|
|
{
|
|
"epoch": 1.9678591171128486,
|
|
"grad_norm": 0.10098194753518079,
|
|
"learning_rate": 0.0019434075125403965,
|
|
"loss": 2.8095,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 1.9694385216773278,
|
|
"grad_norm": 0.08996164948245026,
|
|
"learning_rate": 0.0019432244896253287,
|
|
"loss": 2.8509,
|
|
"step": 6235
|
|
},
|
|
{
|
|
"epoch": 1.9710179262418068,
|
|
"grad_norm": 0.08739058653092374,
|
|
"learning_rate": 0.0019430411798817197,
|
|
"loss": 2.733,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 1.972597330806286,
|
|
"grad_norm": 0.08466777415280602,
|
|
"learning_rate": 0.0019428575833653134,
|
|
"loss": 2.7618,
|
|
"step": 6245
|
|
},
|
|
{
|
|
"epoch": 1.974176735370765,
|
|
"grad_norm": 0.0879874293503665,
|
|
"learning_rate": 0.00194267370013194,
|
|
"loss": 2.8032,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 1.9757561399352443,
|
|
"grad_norm": 0.08491973521692751,
|
|
"learning_rate": 0.0019424895302375177,
|
|
"loss": 2.8203,
|
|
"step": 6255
|
|
},
|
|
{
|
|
"epoch": 1.9773355444997236,
|
|
"grad_norm": 0.09233997552168374,
|
|
"learning_rate": 0.0019423050737380505,
|
|
"loss": 2.7658,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 1.9789149490642028,
|
|
"grad_norm": 0.08341466289063551,
|
|
"learning_rate": 0.0019421203306896311,
|
|
"loss": 2.8405,
|
|
"step": 6265
|
|
},
|
|
{
|
|
"epoch": 1.980494353628682,
|
|
"grad_norm": 0.09675894067841306,
|
|
"learning_rate": 0.0019419353011484385,
|
|
"loss": 2.8052,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 1.9820737581931613,
|
|
"grad_norm": 0.13064086882653447,
|
|
"learning_rate": 0.001941749985170739,
|
|
"loss": 2.8245,
|
|
"step": 6275
|
|
},
|
|
{
|
|
"epoch": 1.9836531627576404,
|
|
"grad_norm": 0.09444034799577457,
|
|
"learning_rate": 0.001941564382812886,
|
|
"loss": 2.8968,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 1.9852325673221196,
|
|
"grad_norm": 0.0874316229946107,
|
|
"learning_rate": 0.0019413784941313202,
|
|
"loss": 2.7382,
|
|
"step": 6285
|
|
},
|
|
{
|
|
"epoch": 1.9868119718865986,
|
|
"grad_norm": 0.09345391189400902,
|
|
"learning_rate": 0.0019411923191825686,
|
|
"loss": 2.829,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 1.9883913764510779,
|
|
"grad_norm": 0.09191899691273726,
|
|
"learning_rate": 0.0019410058580232464,
|
|
"loss": 2.8228,
|
|
"step": 6295
|
|
},
|
|
{
|
|
"epoch": 1.9899707810155571,
|
|
"grad_norm": 0.08165945666933225,
|
|
"learning_rate": 0.0019408191107100552,
|
|
"loss": 2.726,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 1.9915501855800364,
|
|
"grad_norm": 0.0881495686780152,
|
|
"learning_rate": 0.0019406320772997832,
|
|
"loss": 2.7863,
|
|
"step": 6305
|
|
},
|
|
{
|
|
"epoch": 1.9931295901445156,
|
|
"grad_norm": 0.07429023971824186,
|
|
"learning_rate": 0.0019404447578493062,
|
|
"loss": 2.7083,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 1.9947089947089947,
|
|
"grad_norm": 0.08992601670792755,
|
|
"learning_rate": 0.0019402571524155877,
|
|
"loss": 2.7003,
|
|
"step": 6315
|
|
},
|
|
{
|
|
"epoch": 1.996288399273474,
|
|
"grad_norm": 0.09666646843109974,
|
|
"learning_rate": 0.001940069261055676,
|
|
"loss": 2.8079,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 1.997867803837953,
|
|
"grad_norm": 0.08191896132889992,
|
|
"learning_rate": 0.0019398810838267084,
|
|
"loss": 2.8366,
|
|
"step": 6325
|
|
},
|
|
{
|
|
"epoch": 1.9994472084024322,
|
|
"grad_norm": 0.08391348675660519,
|
|
"learning_rate": 0.0019396926207859084,
|
|
"loss": 2.825,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_loss": 2.780498743057251,
|
|
"eval_runtime": 118.3809,
|
|
"eval_samples_per_second": 22.377,
|
|
"eval_steps_per_second": 5.601,
|
|
"step": 6332
|
|
},
|
|
{
|
|
"epoch": 2.0009476427386876,
|
|
"grad_norm": 0.09575742921571473,
|
|
"learning_rate": 0.0019395038719905863,
|
|
"loss": 2.8089,
|
|
"step": 6335
|
|
},
|
|
{
|
|
"epoch": 2.002527047303167,
|
|
"grad_norm": 0.09475021327498588,
|
|
"learning_rate": 0.0019393148374981393,
|
|
"loss": 2.7499,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 2.0041064518676457,
|
|
"grad_norm": 0.09332000534038609,
|
|
"learning_rate": 0.0019391255173660516,
|
|
"loss": 2.8389,
|
|
"step": 6345
|
|
},
|
|
{
|
|
"epoch": 2.005685856432125,
|
|
"grad_norm": 0.10298028032627798,
|
|
"learning_rate": 0.0019389359116518943,
|
|
"loss": 2.748,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 2.007265260996604,
|
|
"grad_norm": 0.08145562501910214,
|
|
"learning_rate": 0.0019387460204133254,
|
|
"loss": 2.6825,
|
|
"step": 6355
|
|
},
|
|
{
|
|
"epoch": 2.0088446655610834,
|
|
"grad_norm": 0.07447178719663891,
|
|
"learning_rate": 0.0019385558437080897,
|
|
"loss": 2.778,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 2.0104240701255627,
|
|
"grad_norm": 0.0927632838786091,
|
|
"learning_rate": 0.0019383653815940184,
|
|
"loss": 2.7132,
|
|
"step": 6365
|
|
},
|
|
{
|
|
"epoch": 2.012003474690042,
|
|
"grad_norm": 0.09299068690281939,
|
|
"learning_rate": 0.0019381746341290299,
|
|
"loss": 2.7509,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 2.013582879254521,
|
|
"grad_norm": 0.07410954144318185,
|
|
"learning_rate": 0.001937983601371129,
|
|
"loss": 2.7203,
|
|
"step": 6375
|
|
},
|
|
{
|
|
"epoch": 2.0151622838190004,
|
|
"grad_norm": 0.08505430947007388,
|
|
"learning_rate": 0.0019377922833784082,
|
|
"loss": 2.885,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 2.0167416883834792,
|
|
"grad_norm": 0.07506113878222667,
|
|
"learning_rate": 0.0019376006802090458,
|
|
"loss": 2.7818,
|
|
"step": 6385
|
|
},
|
|
{
|
|
"epoch": 2.0183210929479585,
|
|
"grad_norm": 0.07410981032228242,
|
|
"learning_rate": 0.0019374087919213068,
|
|
"loss": 2.8087,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 2.0199004975124377,
|
|
"grad_norm": 0.07533333285267724,
|
|
"learning_rate": 0.0019372166185735436,
|
|
"loss": 2.9084,
|
|
"step": 6395
|
|
},
|
|
{
|
|
"epoch": 2.021479902076917,
|
|
"grad_norm": 0.08729901810896008,
|
|
"learning_rate": 0.0019370241602241949,
|
|
"loss": 2.7977,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 2.023059306641396,
|
|
"grad_norm": 0.06778399060340229,
|
|
"learning_rate": 0.0019368314169317855,
|
|
"loss": 2.7203,
|
|
"step": 6405
|
|
},
|
|
{
|
|
"epoch": 2.0246387112058755,
|
|
"grad_norm": 0.07716673460749471,
|
|
"learning_rate": 0.001936638388754928,
|
|
"loss": 2.7924,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 2.0262181157703547,
|
|
"grad_norm": 0.08371038858709097,
|
|
"learning_rate": 0.0019364450757523208,
|
|
"loss": 2.7855,
|
|
"step": 6415
|
|
},
|
|
{
|
|
"epoch": 2.027797520334834,
|
|
"grad_norm": 0.10523104303606545,
|
|
"learning_rate": 0.0019362514779827495,
|
|
"loss": 2.7961,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 2.0293769248993128,
|
|
"grad_norm": 0.09169356093760239,
|
|
"learning_rate": 0.0019360575955050853,
|
|
"loss": 2.7656,
|
|
"step": 6425
|
|
},
|
|
{
|
|
"epoch": 2.030956329463792,
|
|
"grad_norm": 0.08391763073861802,
|
|
"learning_rate": 0.0019358634283782867,
|
|
"loss": 2.7048,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 2.0325357340282713,
|
|
"grad_norm": 0.0739510810514508,
|
|
"learning_rate": 0.0019356689766613993,
|
|
"loss": 2.7977,
|
|
"step": 6435
|
|
},
|
|
{
|
|
"epoch": 2.0341151385927505,
|
|
"grad_norm": 0.10007494348550869,
|
|
"learning_rate": 0.001935474240413554,
|
|
"loss": 2.8089,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 2.0356945431572298,
|
|
"grad_norm": 0.09409379273580709,
|
|
"learning_rate": 0.0019352792196939694,
|
|
"loss": 2.7812,
|
|
"step": 6445
|
|
},
|
|
{
|
|
"epoch": 2.037273947721709,
|
|
"grad_norm": 0.08934297438992823,
|
|
"learning_rate": 0.0019350839145619496,
|
|
"loss": 2.8069,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 2.0388533522861882,
|
|
"grad_norm": 0.08802671161638972,
|
|
"learning_rate": 0.0019348883250768858,
|
|
"loss": 2.7853,
|
|
"step": 6455
|
|
},
|
|
{
|
|
"epoch": 2.0404327568506675,
|
|
"grad_norm": 0.0865003277963274,
|
|
"learning_rate": 0.0019346924512982555,
|
|
"loss": 2.7491,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 2.0420121614151463,
|
|
"grad_norm": 0.07928593456326218,
|
|
"learning_rate": 0.0019344962932856227,
|
|
"loss": 2.7655,
|
|
"step": 6465
|
|
},
|
|
{
|
|
"epoch": 2.0435915659796255,
|
|
"grad_norm": 0.0830289002304728,
|
|
"learning_rate": 0.0019342998510986377,
|
|
"loss": 2.7275,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 2.045170970544105,
|
|
"grad_norm": 0.07527174943423653,
|
|
"learning_rate": 0.0019341031247970375,
|
|
"loss": 2.7265,
|
|
"step": 6475
|
|
},
|
|
{
|
|
"epoch": 2.046750375108584,
|
|
"grad_norm": 0.07589622127882759,
|
|
"learning_rate": 0.0019339061144406453,
|
|
"loss": 2.863,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 2.0483297796730633,
|
|
"grad_norm": 0.08128329962885099,
|
|
"learning_rate": 0.0019337088200893705,
|
|
"loss": 2.7377,
|
|
"step": 6485
|
|
},
|
|
{
|
|
"epoch": 2.0499091842375425,
|
|
"grad_norm": 0.09961372451168636,
|
|
"learning_rate": 0.0019335112418032091,
|
|
"loss": 2.8205,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 2.051488588802022,
|
|
"grad_norm": 0.06696727578829288,
|
|
"learning_rate": 0.0019333133796422435,
|
|
"loss": 2.7783,
|
|
"step": 6495
|
|
},
|
|
{
|
|
"epoch": 2.053067993366501,
|
|
"grad_norm": 0.08682450943383599,
|
|
"learning_rate": 0.001933115233666642,
|
|
"loss": 2.9212,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 2.05464739793098,
|
|
"grad_norm": 0.09446946892027293,
|
|
"learning_rate": 0.00193291680393666,
|
|
"loss": 2.7808,
|
|
"step": 6505
|
|
},
|
|
{
|
|
"epoch": 2.056226802495459,
|
|
"grad_norm": 0.10711069375479783,
|
|
"learning_rate": 0.0019327180905126386,
|
|
"loss": 2.7313,
|
|
"step": 6510
|
|
},
|
|
{
|
|
"epoch": 2.0578062070599383,
|
|
"grad_norm": 0.09875559420662403,
|
|
"learning_rate": 0.0019325190934550047,
|
|
"loss": 2.7998,
|
|
"step": 6515
|
|
},
|
|
{
|
|
"epoch": 2.0593856116244176,
|
|
"grad_norm": 0.10038130097656887,
|
|
"learning_rate": 0.001932319812824273,
|
|
"loss": 2.7825,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 2.060965016188897,
|
|
"grad_norm": 0.08287967698391464,
|
|
"learning_rate": 0.0019321202486810428,
|
|
"loss": 2.7155,
|
|
"step": 6525
|
|
},
|
|
{
|
|
"epoch": 2.062544420753376,
|
|
"grad_norm": 0.07994162592288757,
|
|
"learning_rate": 0.0019319204010860005,
|
|
"loss": 2.7948,
|
|
"step": 6530
|
|
},
|
|
{
|
|
"epoch": 2.0641238253178553,
|
|
"grad_norm": 0.08453083899950711,
|
|
"learning_rate": 0.0019317202700999184,
|
|
"loss": 2.8841,
|
|
"step": 6535
|
|
},
|
|
{
|
|
"epoch": 2.0657032298823346,
|
|
"grad_norm": 0.10125131581986914,
|
|
"learning_rate": 0.0019315198557836553,
|
|
"loss": 2.8616,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 2.0672826344468134,
|
|
"grad_norm": 0.08479114783412135,
|
|
"learning_rate": 0.0019313191581981552,
|
|
"loss": 2.8251,
|
|
"step": 6545
|
|
},
|
|
{
|
|
"epoch": 2.0688620390112926,
|
|
"grad_norm": 0.0843902017175401,
|
|
"learning_rate": 0.00193111817740445,
|
|
"loss": 2.7163,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 2.070441443575772,
|
|
"grad_norm": 0.08435827895842664,
|
|
"learning_rate": 0.0019309169134636558,
|
|
"loss": 2.7285,
|
|
"step": 6555
|
|
},
|
|
{
|
|
"epoch": 2.072020848140251,
|
|
"grad_norm": 0.08523910283562491,
|
|
"learning_rate": 0.0019307153664369762,
|
|
"loss": 2.8544,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 2.0736002527047304,
|
|
"grad_norm": 0.09297632793559693,
|
|
"learning_rate": 0.0019305135363857,
|
|
"loss": 2.807,
|
|
"step": 6565
|
|
},
|
|
{
|
|
"epoch": 2.0751796572692096,
|
|
"grad_norm": 0.08500085833146576,
|
|
"learning_rate": 0.0019303114233712028,
|
|
"loss": 2.8476,
|
|
"step": 6570
|
|
},
|
|
{
|
|
"epoch": 2.076759061833689,
|
|
"grad_norm": 0.08830857153484742,
|
|
"learning_rate": 0.0019301090274549454,
|
|
"loss": 2.7331,
|
|
"step": 6575
|
|
},
|
|
{
|
|
"epoch": 2.078338466398168,
|
|
"grad_norm": 0.09386122618388641,
|
|
"learning_rate": 0.0019299063486984756,
|
|
"loss": 2.811,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 2.079917870962647,
|
|
"grad_norm": 0.08350414162315721,
|
|
"learning_rate": 0.0019297033871634264,
|
|
"loss": 2.857,
|
|
"step": 6585
|
|
},
|
|
{
|
|
"epoch": 2.081497275527126,
|
|
"grad_norm": 0.08295216253089162,
|
|
"learning_rate": 0.0019295001429115173,
|
|
"loss": 2.7419,
|
|
"step": 6590
|
|
},
|
|
{
|
|
"epoch": 2.0830766800916054,
|
|
"grad_norm": 0.0869171272650609,
|
|
"learning_rate": 0.0019292966160045536,
|
|
"loss": 2.7898,
|
|
"step": 6595
|
|
},
|
|
{
|
|
"epoch": 2.0846560846560847,
|
|
"grad_norm": 0.09241253732508625,
|
|
"learning_rate": 0.001929092806504426,
|
|
"loss": 2.6644,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 2.086235489220564,
|
|
"grad_norm": 0.08196169328886187,
|
|
"learning_rate": 0.0019288887144731125,
|
|
"loss": 2.7326,
|
|
"step": 6605
|
|
},
|
|
{
|
|
"epoch": 2.087814893785043,
|
|
"grad_norm": 0.09264153484001544,
|
|
"learning_rate": 0.0019286843399726754,
|
|
"loss": 2.7017,
|
|
"step": 6610
|
|
},
|
|
{
|
|
"epoch": 2.0893942983495224,
|
|
"grad_norm": 0.08433629884904234,
|
|
"learning_rate": 0.0019284796830652642,
|
|
"loss": 2.7395,
|
|
"step": 6615
|
|
},
|
|
{
|
|
"epoch": 2.0909737029140016,
|
|
"grad_norm": 0.08668778961859433,
|
|
"learning_rate": 0.0019282747438131135,
|
|
"loss": 2.7928,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 2.0925531074784804,
|
|
"grad_norm": 0.08001395369320938,
|
|
"learning_rate": 0.0019280695222785443,
|
|
"loss": 2.7409,
|
|
"step": 6625
|
|
},
|
|
{
|
|
"epoch": 2.0941325120429597,
|
|
"grad_norm": 0.11753845536111973,
|
|
"learning_rate": 0.0019278640185239628,
|
|
"loss": 2.7326,
|
|
"step": 6630
|
|
},
|
|
{
|
|
"epoch": 2.095711916607439,
|
|
"grad_norm": 0.11852435357562266,
|
|
"learning_rate": 0.001927658232611862,
|
|
"loss": 2.8578,
|
|
"step": 6635
|
|
},
|
|
{
|
|
"epoch": 2.097291321171918,
|
|
"grad_norm": 0.09027187736266508,
|
|
"learning_rate": 0.001927452164604819,
|
|
"loss": 2.7912,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 2.0988707257363974,
|
|
"grad_norm": 0.09429557482319734,
|
|
"learning_rate": 0.0019272458145654988,
|
|
"loss": 2.7154,
|
|
"step": 6645
|
|
},
|
|
{
|
|
"epoch": 2.1004501303008767,
|
|
"grad_norm": 0.09542397456612339,
|
|
"learning_rate": 0.0019270391825566508,
|
|
"loss": 2.7675,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 2.102029534865356,
|
|
"grad_norm": 0.1113955713106657,
|
|
"learning_rate": 0.0019268322686411099,
|
|
"loss": 2.7859,
|
|
"step": 6655
|
|
},
|
|
{
|
|
"epoch": 2.1036089394298347,
|
|
"grad_norm": 0.11053959264725198,
|
|
"learning_rate": 0.0019266250728817984,
|
|
"loss": 2.8418,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 2.105188343994314,
|
|
"grad_norm": 0.10237308436161732,
|
|
"learning_rate": 0.0019264175953417222,
|
|
"loss": 2.8229,
|
|
"step": 6665
|
|
},
|
|
{
|
|
"epoch": 2.1067677485587932,
|
|
"grad_norm": 0.11394409500405601,
|
|
"learning_rate": 0.0019262098360839745,
|
|
"loss": 2.8163,
|
|
"step": 6670
|
|
},
|
|
{
|
|
"epoch": 2.1083471531232725,
|
|
"grad_norm": 0.08096007436211594,
|
|
"learning_rate": 0.0019260017951717332,
|
|
"loss": 2.7214,
|
|
"step": 6675
|
|
},
|
|
{
|
|
"epoch": 2.1099265576877517,
|
|
"grad_norm": 0.07284434295650646,
|
|
"learning_rate": 0.0019257934726682627,
|
|
"loss": 2.8227,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 2.111505962252231,
|
|
"grad_norm": 0.08167810722420875,
|
|
"learning_rate": 0.001925584868636912,
|
|
"loss": 2.6956,
|
|
"step": 6685
|
|
},
|
|
{
|
|
"epoch": 2.11308536681671,
|
|
"grad_norm": 0.09417870744998465,
|
|
"learning_rate": 0.0019253759831411165,
|
|
"loss": 2.7186,
|
|
"step": 6690
|
|
},
|
|
{
|
|
"epoch": 2.1146647713811895,
|
|
"grad_norm": 0.11213015574372565,
|
|
"learning_rate": 0.001925166816244397,
|
|
"loss": 2.7526,
|
|
"step": 6695
|
|
},
|
|
{
|
|
"epoch": 2.1162441759456683,
|
|
"grad_norm": 0.10756824240366811,
|
|
"learning_rate": 0.0019249573680103595,
|
|
"loss": 2.8488,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 2.1178235805101475,
|
|
"grad_norm": 0.08711495340656947,
|
|
"learning_rate": 0.0019247476385026961,
|
|
"loss": 2.7033,
|
|
"step": 6705
|
|
},
|
|
{
|
|
"epoch": 2.1194029850746268,
|
|
"grad_norm": 0.07179842865878266,
|
|
"learning_rate": 0.0019245376277851846,
|
|
"loss": 2.7082,
|
|
"step": 6710
|
|
},
|
|
{
|
|
"epoch": 2.120982389639106,
|
|
"grad_norm": 0.09467353597491755,
|
|
"learning_rate": 0.0019243273359216872,
|
|
"loss": 2.695,
|
|
"step": 6715
|
|
},
|
|
{
|
|
"epoch": 2.1225617942035853,
|
|
"grad_norm": 0.08034724022561057,
|
|
"learning_rate": 0.0019241167629761528,
|
|
"loss": 2.7169,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 2.1241411987680645,
|
|
"grad_norm": 0.08242354846761851,
|
|
"learning_rate": 0.001923905909012615,
|
|
"loss": 2.8581,
|
|
"step": 6725
|
|
},
|
|
{
|
|
"epoch": 2.1257206033325438,
|
|
"grad_norm": 0.07521641516870188,
|
|
"learning_rate": 0.0019236947740951932,
|
|
"loss": 2.7248,
|
|
"step": 6730
|
|
},
|
|
{
|
|
"epoch": 2.127300007897023,
|
|
"grad_norm": 0.09292490715429318,
|
|
"learning_rate": 0.0019234833582880923,
|
|
"loss": 2.761,
|
|
"step": 6735
|
|
},
|
|
{
|
|
"epoch": 2.128879412461502,
|
|
"grad_norm": 0.08571654691757924,
|
|
"learning_rate": 0.0019232716616556025,
|
|
"loss": 2.7784,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 2.130458817025981,
|
|
"grad_norm": 0.09267628020872272,
|
|
"learning_rate": 0.0019230596842620994,
|
|
"loss": 2.6894,
|
|
"step": 6745
|
|
},
|
|
{
|
|
"epoch": 2.1320382215904603,
|
|
"grad_norm": 0.09786426971579432,
|
|
"learning_rate": 0.001922847426172044,
|
|
"loss": 2.8413,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 2.1336176261549396,
|
|
"grad_norm": 0.07894207352301423,
|
|
"learning_rate": 0.001922634887449982,
|
|
"loss": 2.767,
|
|
"step": 6755
|
|
},
|
|
{
|
|
"epoch": 2.135197030719419,
|
|
"grad_norm": 0.07560782993568409,
|
|
"learning_rate": 0.0019224220681605462,
|
|
"loss": 2.7604,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 2.136776435283898,
|
|
"grad_norm": 0.08052217048798144,
|
|
"learning_rate": 0.0019222089683684528,
|
|
"loss": 2.742,
|
|
"step": 6765
|
|
},
|
|
{
|
|
"epoch": 2.1383558398483773,
|
|
"grad_norm": 0.0766530719173359,
|
|
"learning_rate": 0.001921995588138504,
|
|
"loss": 2.785,
|
|
"step": 6770
|
|
},
|
|
{
|
|
"epoch": 2.1399352444128565,
|
|
"grad_norm": 0.08226433057004057,
|
|
"learning_rate": 0.001921781927535588,
|
|
"loss": 2.8312,
|
|
"step": 6775
|
|
},
|
|
{
|
|
"epoch": 2.1415146489773353,
|
|
"grad_norm": 0.08154192567298578,
|
|
"learning_rate": 0.001921567986624677,
|
|
"loss": 2.6801,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 2.1430940535418146,
|
|
"grad_norm": 0.09302973546737621,
|
|
"learning_rate": 0.0019213537654708297,
|
|
"loss": 2.7382,
|
|
"step": 6785
|
|
},
|
|
{
|
|
"epoch": 2.144673458106294,
|
|
"grad_norm": 0.09581085650749972,
|
|
"learning_rate": 0.001921139264139189,
|
|
"loss": 2.7129,
|
|
"step": 6790
|
|
},
|
|
{
|
|
"epoch": 2.146252862670773,
|
|
"grad_norm": 0.07855147333701644,
|
|
"learning_rate": 0.001920924482694983,
|
|
"loss": 2.7476,
|
|
"step": 6795
|
|
},
|
|
{
|
|
"epoch": 2.1478322672352523,
|
|
"grad_norm": 0.07584347975072561,
|
|
"learning_rate": 0.0019207094212035259,
|
|
"loss": 2.7737,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 2.1494116717997316,
|
|
"grad_norm": 0.07390332512105707,
|
|
"learning_rate": 0.0019204940797302164,
|
|
"loss": 2.6535,
|
|
"step": 6805
|
|
},
|
|
{
|
|
"epoch": 2.150991076364211,
|
|
"grad_norm": 0.08542053475529204,
|
|
"learning_rate": 0.0019202784583405386,
|
|
"loss": 2.79,
|
|
"step": 6810
|
|
},
|
|
{
|
|
"epoch": 2.15257048092869,
|
|
"grad_norm": 0.0924217776712588,
|
|
"learning_rate": 0.0019200625571000613,
|
|
"loss": 2.7868,
|
|
"step": 6815
|
|
},
|
|
{
|
|
"epoch": 2.154149885493169,
|
|
"grad_norm": 0.0744042476621044,
|
|
"learning_rate": 0.001919846376074439,
|
|
"loss": 2.7307,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 2.155729290057648,
|
|
"grad_norm": 0.10510057195379835,
|
|
"learning_rate": 0.0019196299153294105,
|
|
"loss": 2.7757,
|
|
"step": 6825
|
|
},
|
|
{
|
|
"epoch": 2.1573086946221274,
|
|
"grad_norm": 0.08019805135179159,
|
|
"learning_rate": 0.0019194131749308006,
|
|
"loss": 2.7172,
|
|
"step": 6830
|
|
},
|
|
{
|
|
"epoch": 2.1588880991866066,
|
|
"grad_norm": 0.0941289115865038,
|
|
"learning_rate": 0.0019191961549445186,
|
|
"loss": 2.7177,
|
|
"step": 6835
|
|
},
|
|
{
|
|
"epoch": 2.160467503751086,
|
|
"grad_norm": 0.09756375773672207,
|
|
"learning_rate": 0.0019189788554365586,
|
|
"loss": 2.7795,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 2.162046908315565,
|
|
"grad_norm": 0.10057056244741966,
|
|
"learning_rate": 0.0019187612764730003,
|
|
"loss": 2.742,
|
|
"step": 6845
|
|
},
|
|
{
|
|
"epoch": 2.1636263128800444,
|
|
"grad_norm": 0.08929203686667446,
|
|
"learning_rate": 0.0019185434181200078,
|
|
"loss": 2.6888,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 2.1652057174445236,
|
|
"grad_norm": 0.0809877539288576,
|
|
"learning_rate": 0.0019183252804438307,
|
|
"loss": 2.7447,
|
|
"step": 6855
|
|
},
|
|
{
|
|
"epoch": 2.1667851220090024,
|
|
"grad_norm": 0.08171483838903393,
|
|
"learning_rate": 0.0019181068635108032,
|
|
"loss": 2.7844,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 2.1683645265734817,
|
|
"grad_norm": 0.08029439443597056,
|
|
"learning_rate": 0.0019178881673873444,
|
|
"loss": 2.8377,
|
|
"step": 6865
|
|
},
|
|
{
|
|
"epoch": 2.169943931137961,
|
|
"grad_norm": 0.0869907621724929,
|
|
"learning_rate": 0.0019176691921399586,
|
|
"loss": 2.8235,
|
|
"step": 6870
|
|
},
|
|
{
|
|
"epoch": 2.17152333570244,
|
|
"grad_norm": 0.09465182406104862,
|
|
"learning_rate": 0.0019174499378352343,
|
|
"loss": 2.6915,
|
|
"step": 6875
|
|
},
|
|
{
|
|
"epoch": 2.1731027402669194,
|
|
"grad_norm": 0.08379838935165305,
|
|
"learning_rate": 0.0019172304045398459,
|
|
"loss": 2.8486,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 2.1746821448313987,
|
|
"grad_norm": 0.11732839131285723,
|
|
"learning_rate": 0.0019170105923205516,
|
|
"loss": 2.8215,
|
|
"step": 6885
|
|
},
|
|
{
|
|
"epoch": 2.176261549395878,
|
|
"grad_norm": 0.09292157119205179,
|
|
"learning_rate": 0.0019167905012441953,
|
|
"loss": 2.7212,
|
|
"step": 6890
|
|
},
|
|
{
|
|
"epoch": 2.177840953960357,
|
|
"grad_norm": 0.09627096033681211,
|
|
"learning_rate": 0.0019165701313777054,
|
|
"loss": 2.7765,
|
|
"step": 6895
|
|
},
|
|
{
|
|
"epoch": 2.179420358524836,
|
|
"grad_norm": 0.0696154891978351,
|
|
"learning_rate": 0.0019163494827880944,
|
|
"loss": 2.7463,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 2.180999763089315,
|
|
"grad_norm": 0.09306287488799425,
|
|
"learning_rate": 0.0019161285555424601,
|
|
"loss": 2.8591,
|
|
"step": 6905
|
|
},
|
|
{
|
|
"epoch": 2.1825791676537944,
|
|
"grad_norm": 0.09150579519712532,
|
|
"learning_rate": 0.0019159073497079856,
|
|
"loss": 2.7563,
|
|
"step": 6910
|
|
},
|
|
{
|
|
"epoch": 2.1841585722182737,
|
|
"grad_norm": 0.09795962987423949,
|
|
"learning_rate": 0.001915685865351938,
|
|
"loss": 2.7914,
|
|
"step": 6915
|
|
},
|
|
{
|
|
"epoch": 2.185737976782753,
|
|
"grad_norm": 0.1017505867967586,
|
|
"learning_rate": 0.0019154641025416694,
|
|
"loss": 2.7037,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 2.187317381347232,
|
|
"grad_norm": 0.10253439906939013,
|
|
"learning_rate": 0.001915242061344616,
|
|
"loss": 2.6436,
|
|
"step": 6925
|
|
},
|
|
{
|
|
"epoch": 2.1888967859117114,
|
|
"grad_norm": 0.0802442919313515,
|
|
"learning_rate": 0.0019150197418282993,
|
|
"loss": 2.6696,
|
|
"step": 6930
|
|
},
|
|
{
|
|
"epoch": 2.1904761904761907,
|
|
"grad_norm": 0.09520857188664072,
|
|
"learning_rate": 0.0019147971440603255,
|
|
"loss": 2.7685,
|
|
"step": 6935
|
|
},
|
|
{
|
|
"epoch": 2.1920555950406695,
|
|
"grad_norm": 0.08125953302885813,
|
|
"learning_rate": 0.0019145742681083852,
|
|
"loss": 2.7746,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 2.1936349996051487,
|
|
"grad_norm": 0.08665968783848398,
|
|
"learning_rate": 0.0019143511140402533,
|
|
"loss": 2.8095,
|
|
"step": 6945
|
|
},
|
|
{
|
|
"epoch": 2.195214404169628,
|
|
"grad_norm": 0.08960842367347455,
|
|
"learning_rate": 0.0019141276819237892,
|
|
"loss": 2.7944,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 2.1967938087341072,
|
|
"grad_norm": 0.08881407853420388,
|
|
"learning_rate": 0.0019139039718269377,
|
|
"loss": 2.7121,
|
|
"step": 6955
|
|
},
|
|
{
|
|
"epoch": 2.1983732132985865,
|
|
"grad_norm": 0.0731255303402879,
|
|
"learning_rate": 0.0019136799838177277,
|
|
"loss": 2.719,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 2.1999526178630657,
|
|
"grad_norm": 0.0848000016657035,
|
|
"learning_rate": 0.001913455717964272,
|
|
"loss": 2.719,
|
|
"step": 6965
|
|
},
|
|
{
|
|
"epoch": 2.201532022427545,
|
|
"grad_norm": 0.12587471348602663,
|
|
"learning_rate": 0.001913231174334769,
|
|
"loss": 2.7345,
|
|
"step": 6970
|
|
},
|
|
{
|
|
"epoch": 2.203111426992024,
|
|
"grad_norm": 0.10317234672447549,
|
|
"learning_rate": 0.0019130063529975005,
|
|
"loss": 2.8799,
|
|
"step": 6975
|
|
},
|
|
{
|
|
"epoch": 2.204690831556503,
|
|
"grad_norm": 0.09287084065190662,
|
|
"learning_rate": 0.0019127812540208331,
|
|
"loss": 2.6778,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 2.2062702361209823,
|
|
"grad_norm": 0.10572359558412954,
|
|
"learning_rate": 0.001912555877473219,
|
|
"loss": 2.9066,
|
|
"step": 6985
|
|
},
|
|
{
|
|
"epoch": 2.2078496406854615,
|
|
"grad_norm": 0.09676134765470186,
|
|
"learning_rate": 0.0019123302234231923,
|
|
"loss": 2.6953,
|
|
"step": 6990
|
|
},
|
|
{
|
|
"epoch": 2.2094290452499408,
|
|
"grad_norm": 0.09201142855993154,
|
|
"learning_rate": 0.0019121042919393741,
|
|
"loss": 2.8742,
|
|
"step": 6995
|
|
},
|
|
{
|
|
"epoch": 2.21100844981442,
|
|
"grad_norm": 0.11246187851139017,
|
|
"learning_rate": 0.001911878083090468,
|
|
"loss": 2.679,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 2.2125878543788993,
|
|
"grad_norm": 0.09040985497435061,
|
|
"learning_rate": 0.0019116515969452635,
|
|
"loss": 2.9079,
|
|
"step": 7005
|
|
},
|
|
{
|
|
"epoch": 2.2141672589433785,
|
|
"grad_norm": 0.11929576764954936,
|
|
"learning_rate": 0.0019114248335726327,
|
|
"loss": 2.7648,
|
|
"step": 7010
|
|
},
|
|
{
|
|
"epoch": 2.2157466635078578,
|
|
"grad_norm": 0.09500460087605361,
|
|
"learning_rate": 0.0019111977930415334,
|
|
"loss": 2.6747,
|
|
"step": 7015
|
|
},
|
|
{
|
|
"epoch": 2.2173260680723366,
|
|
"grad_norm": 0.07765834823398761,
|
|
"learning_rate": 0.001910970475421007,
|
|
"loss": 2.7504,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 2.218905472636816,
|
|
"grad_norm": 0.09131955370795612,
|
|
"learning_rate": 0.0019107428807801795,
|
|
"loss": 2.6785,
|
|
"step": 7025
|
|
},
|
|
{
|
|
"epoch": 2.220484877201295,
|
|
"grad_norm": 0.08552501793151021,
|
|
"learning_rate": 0.0019105150091882606,
|
|
"loss": 2.7142,
|
|
"step": 7030
|
|
},
|
|
{
|
|
"epoch": 2.2220642817657743,
|
|
"grad_norm": 0.07197777044252249,
|
|
"learning_rate": 0.001910286860714545,
|
|
"loss": 2.707,
|
|
"step": 7035
|
|
},
|
|
{
|
|
"epoch": 2.2236436863302536,
|
|
"grad_norm": 0.08719897653729854,
|
|
"learning_rate": 0.001910058435428411,
|
|
"loss": 2.7893,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 2.225223090894733,
|
|
"grad_norm": 0.08806121479772976,
|
|
"learning_rate": 0.0019098297333993213,
|
|
"loss": 2.7701,
|
|
"step": 7045
|
|
},
|
|
{
|
|
"epoch": 2.226802495459212,
|
|
"grad_norm": 0.09407117990776794,
|
|
"learning_rate": 0.0019096007546968228,
|
|
"loss": 2.7902,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 2.228381900023691,
|
|
"grad_norm": 0.1111393281263788,
|
|
"learning_rate": 0.0019093714993905465,
|
|
"loss": 2.722,
|
|
"step": 7055
|
|
},
|
|
{
|
|
"epoch": 2.22996130458817,
|
|
"grad_norm": 0.08640788224102358,
|
|
"learning_rate": 0.001909141967550207,
|
|
"loss": 2.7669,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 2.2315407091526493,
|
|
"grad_norm": 0.07853172068323665,
|
|
"learning_rate": 0.0019089121592456041,
|
|
"loss": 2.6776,
|
|
"step": 7065
|
|
},
|
|
{
|
|
"epoch": 2.2331201137171286,
|
|
"grad_norm": 0.09115129421219734,
|
|
"learning_rate": 0.0019086820745466207,
|
|
"loss": 2.7785,
|
|
"step": 7070
|
|
},
|
|
{
|
|
"epoch": 2.234699518281608,
|
|
"grad_norm": 0.07666777840195774,
|
|
"learning_rate": 0.0019084517135232245,
|
|
"loss": 2.822,
|
|
"step": 7075
|
|
},
|
|
{
|
|
"epoch": 2.236278922846087,
|
|
"grad_norm": 0.09210271949346142,
|
|
"learning_rate": 0.001908221076245466,
|
|
"loss": 2.8014,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 2.2378583274105663,
|
|
"grad_norm": 0.06827990396702065,
|
|
"learning_rate": 0.0019079901627834812,
|
|
"loss": 2.8159,
|
|
"step": 7085
|
|
},
|
|
{
|
|
"epoch": 2.2394377319750456,
|
|
"grad_norm": 0.0839761250511822,
|
|
"learning_rate": 0.001907758973207489,
|
|
"loss": 2.7452,
|
|
"step": 7090
|
|
},
|
|
{
|
|
"epoch": 2.241017136539525,
|
|
"grad_norm": 0.09989425447281891,
|
|
"learning_rate": 0.0019075275075877932,
|
|
"loss": 2.7432,
|
|
"step": 7095
|
|
},
|
|
{
|
|
"epoch": 2.2425965411040036,
|
|
"grad_norm": 0.07219217006340295,
|
|
"learning_rate": 0.0019072957659947804,
|
|
"loss": 2.739,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 2.244175945668483,
|
|
"grad_norm": 0.0820885686734087,
|
|
"learning_rate": 0.0019070637484989224,
|
|
"loss": 2.7551,
|
|
"step": 7105
|
|
},
|
|
{
|
|
"epoch": 2.245755350232962,
|
|
"grad_norm": 0.09874155721156277,
|
|
"learning_rate": 0.0019068314551707736,
|
|
"loss": 2.7175,
|
|
"step": 7110
|
|
},
|
|
{
|
|
"epoch": 2.2473347547974414,
|
|
"grad_norm": 0.11703533790943776,
|
|
"learning_rate": 0.0019065988860809734,
|
|
"loss": 2.7516,
|
|
"step": 7115
|
|
},
|
|
{
|
|
"epoch": 2.2489141593619206,
|
|
"grad_norm": 0.08032580616927876,
|
|
"learning_rate": 0.001906366041300244,
|
|
"loss": 2.7831,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 2.2504935639264,
|
|
"grad_norm": 0.09921541096208267,
|
|
"learning_rate": 0.0019061329208993928,
|
|
"loss": 2.8153,
|
|
"step": 7125
|
|
},
|
|
{
|
|
"epoch": 2.252072968490879,
|
|
"grad_norm": 0.091975480333108,
|
|
"learning_rate": 0.0019058995249493097,
|
|
"loss": 2.7338,
|
|
"step": 7130
|
|
},
|
|
{
|
|
"epoch": 2.253652373055358,
|
|
"grad_norm": 0.08328090621779681,
|
|
"learning_rate": 0.0019056658535209687,
|
|
"loss": 2.7909,
|
|
"step": 7135
|
|
},
|
|
{
|
|
"epoch": 2.255231777619837,
|
|
"grad_norm": 0.09282748988959821,
|
|
"learning_rate": 0.0019054319066854283,
|
|
"loss": 2.7228,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 2.2568111821843164,
|
|
"grad_norm": 0.10177668917275927,
|
|
"learning_rate": 0.0019051976845138301,
|
|
"loss": 2.7497,
|
|
"step": 7145
|
|
},
|
|
{
|
|
"epoch": 2.2583905867487957,
|
|
"grad_norm": 0.0724734455676322,
|
|
"learning_rate": 0.0019049631870773993,
|
|
"loss": 2.7286,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 2.259969991313275,
|
|
"grad_norm": 0.10467079801447612,
|
|
"learning_rate": 0.0019047284144474456,
|
|
"loss": 2.6547,
|
|
"step": 7155
|
|
},
|
|
{
|
|
"epoch": 2.261549395877754,
|
|
"grad_norm": 0.08612703666372605,
|
|
"learning_rate": 0.0019044933666953615,
|
|
"loss": 2.7056,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 2.2631288004422334,
|
|
"grad_norm": 0.09418909454140281,
|
|
"learning_rate": 0.0019042580438926233,
|
|
"loss": 2.7448,
|
|
"step": 7165
|
|
},
|
|
{
|
|
"epoch": 2.2647082050067127,
|
|
"grad_norm": 0.07778155554833255,
|
|
"learning_rate": 0.0019040224461107915,
|
|
"loss": 2.608,
|
|
"step": 7170
|
|
},
|
|
{
|
|
"epoch": 2.266287609571192,
|
|
"grad_norm": 0.09163630244228571,
|
|
"learning_rate": 0.0019037865734215101,
|
|
"loss": 2.6925,
|
|
"step": 7175
|
|
},
|
|
{
|
|
"epoch": 2.2678670141356707,
|
|
"grad_norm": 0.09184719746656586,
|
|
"learning_rate": 0.0019035504258965057,
|
|
"loss": 2.7412,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 2.26944641870015,
|
|
"grad_norm": 0.07598524804116423,
|
|
"learning_rate": 0.00190331400360759,
|
|
"loss": 2.7431,
|
|
"step": 7185
|
|
},
|
|
{
|
|
"epoch": 2.271025823264629,
|
|
"grad_norm": 0.07840403354867737,
|
|
"learning_rate": 0.0019030773066266572,
|
|
"loss": 2.6449,
|
|
"step": 7190
|
|
},
|
|
{
|
|
"epoch": 2.2726052278291085,
|
|
"grad_norm": 0.0703405315681963,
|
|
"learning_rate": 0.0019028403350256854,
|
|
"loss": 2.7094,
|
|
"step": 7195
|
|
},
|
|
{
|
|
"epoch": 2.2741846323935877,
|
|
"grad_norm": 0.08088970583186511,
|
|
"learning_rate": 0.0019026030888767364,
|
|
"loss": 2.7412,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 2.275764036958067,
|
|
"grad_norm": 0.09736156750218407,
|
|
"learning_rate": 0.0019023655682519544,
|
|
"loss": 2.7812,
|
|
"step": 7205
|
|
},
|
|
{
|
|
"epoch": 2.2773434415225458,
|
|
"grad_norm": 0.09153884777790493,
|
|
"learning_rate": 0.0019021277732235687,
|
|
"loss": 2.7142,
|
|
"step": 7210
|
|
},
|
|
{
|
|
"epoch": 2.278922846087025,
|
|
"grad_norm": 0.09744944882721748,
|
|
"learning_rate": 0.001901889703863891,
|
|
"loss": 2.763,
|
|
"step": 7215
|
|
},
|
|
{
|
|
"epoch": 2.2805022506515042,
|
|
"grad_norm": 0.08586046564916572,
|
|
"learning_rate": 0.001901651360245317,
|
|
"loss": 2.6857,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 2.2820816552159835,
|
|
"grad_norm": 0.09087609130878502,
|
|
"learning_rate": 0.0019014127424403246,
|
|
"loss": 2.6869,
|
|
"step": 7225
|
|
},
|
|
{
|
|
"epoch": 2.2836610597804627,
|
|
"grad_norm": 0.08836849990028749,
|
|
"learning_rate": 0.0019011738505214767,
|
|
"loss": 2.7596,
|
|
"step": 7230
|
|
},
|
|
{
|
|
"epoch": 2.285240464344942,
|
|
"grad_norm": 0.08747104858505658,
|
|
"learning_rate": 0.001900934684561419,
|
|
"loss": 2.7931,
|
|
"step": 7235
|
|
},
|
|
{
|
|
"epoch": 2.2868198689094212,
|
|
"grad_norm": 0.08755416213412862,
|
|
"learning_rate": 0.0019006952446328795,
|
|
"loss": 2.7607,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 2.2883992734739005,
|
|
"grad_norm": 0.07536655725479138,
|
|
"learning_rate": 0.001900455530808671,
|
|
"loss": 2.8365,
|
|
"step": 7245
|
|
},
|
|
{
|
|
"epoch": 2.2899786780383797,
|
|
"grad_norm": 0.07865625517548534,
|
|
"learning_rate": 0.0019002155431616888,
|
|
"loss": 2.7188,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 2.2915580826028585,
|
|
"grad_norm": 0.07071226651998248,
|
|
"learning_rate": 0.0018999752817649115,
|
|
"loss": 2.6903,
|
|
"step": 7255
|
|
},
|
|
{
|
|
"epoch": 2.293137487167338,
|
|
"grad_norm": 0.07225608791148679,
|
|
"learning_rate": 0.0018997347466914011,
|
|
"loss": 2.7235,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 2.294716891731817,
|
|
"grad_norm": 0.07348591764373122,
|
|
"learning_rate": 0.0018994939380143029,
|
|
"loss": 2.7162,
|
|
"step": 7265
|
|
},
|
|
{
|
|
"epoch": 2.2962962962962963,
|
|
"grad_norm": 0.08593372411968381,
|
|
"learning_rate": 0.0018992528558068452,
|
|
"loss": 2.7526,
|
|
"step": 7270
|
|
},
|
|
{
|
|
"epoch": 2.2978757008607755,
|
|
"grad_norm": 0.09704319575792537,
|
|
"learning_rate": 0.0018990115001423394,
|
|
"loss": 2.8124,
|
|
"step": 7275
|
|
},
|
|
{
|
|
"epoch": 2.2994551054252548,
|
|
"grad_norm": 0.08275708015065528,
|
|
"learning_rate": 0.00189876987109418,
|
|
"loss": 2.6462,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 2.301034509989734,
|
|
"grad_norm": 0.10017074009024984,
|
|
"learning_rate": 0.0018985279687358458,
|
|
"loss": 2.6671,
|
|
"step": 7285
|
|
},
|
|
{
|
|
"epoch": 2.302613914554213,
|
|
"grad_norm": 0.09120856152822093,
|
|
"learning_rate": 0.001898285793140897,
|
|
"loss": 2.7268,
|
|
"step": 7290
|
|
},
|
|
{
|
|
"epoch": 2.304193319118692,
|
|
"grad_norm": 0.07112627860712405,
|
|
"learning_rate": 0.0018980433443829777,
|
|
"loss": 2.6372,
|
|
"step": 7295
|
|
},
|
|
{
|
|
"epoch": 2.3057727236831713,
|
|
"grad_norm": 0.07453676740025032,
|
|
"learning_rate": 0.001897800622535815,
|
|
"loss": 2.7637,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 2.3073521282476506,
|
|
"grad_norm": 0.09143889265627127,
|
|
"learning_rate": 0.0018975576276732196,
|
|
"loss": 2.7296,
|
|
"step": 7305
|
|
},
|
|
{
|
|
"epoch": 2.30893153281213,
|
|
"grad_norm": 0.07443627857352321,
|
|
"learning_rate": 0.0018973143598690842,
|
|
"loss": 2.8072,
|
|
"step": 7310
|
|
},
|
|
{
|
|
"epoch": 2.310510937376609,
|
|
"grad_norm": 0.08305278027683252,
|
|
"learning_rate": 0.0018970708191973847,
|
|
"loss": 2.6825,
|
|
"step": 7315
|
|
},
|
|
{
|
|
"epoch": 2.3120903419410883,
|
|
"grad_norm": 0.09507987362363093,
|
|
"learning_rate": 0.0018968270057321808,
|
|
"loss": 2.6737,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 2.3136697465055676,
|
|
"grad_norm": 0.10136716170746057,
|
|
"learning_rate": 0.0018965829195476144,
|
|
"loss": 2.8396,
|
|
"step": 7325
|
|
},
|
|
{
|
|
"epoch": 2.315249151070047,
|
|
"grad_norm": 0.07136962176276128,
|
|
"learning_rate": 0.001896338560717911,
|
|
"loss": 2.6985,
|
|
"step": 7330
|
|
},
|
|
{
|
|
"epoch": 2.3168285556345256,
|
|
"grad_norm": 0.09289629626468898,
|
|
"learning_rate": 0.0018960939293173776,
|
|
"loss": 2.7599,
|
|
"step": 7335
|
|
},
|
|
{
|
|
"epoch": 2.318407960199005,
|
|
"grad_norm": 0.066454845519932,
|
|
"learning_rate": 0.001895849025420406,
|
|
"loss": 2.6878,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 2.319987364763484,
|
|
"grad_norm": 0.08965211928543629,
|
|
"learning_rate": 0.001895603849101469,
|
|
"loss": 2.7131,
|
|
"step": 7345
|
|
},
|
|
{
|
|
"epoch": 2.3215667693279634,
|
|
"grad_norm": 0.06976491835977368,
|
|
"learning_rate": 0.001895358400435124,
|
|
"loss": 2.7597,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 2.3231461738924426,
|
|
"grad_norm": 0.08732959121801767,
|
|
"learning_rate": 0.0018951126794960103,
|
|
"loss": 2.6749,
|
|
"step": 7355
|
|
},
|
|
{
|
|
"epoch": 2.324725578456922,
|
|
"grad_norm": 0.07146480125302472,
|
|
"learning_rate": 0.0018948666863588494,
|
|
"loss": 2.7254,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 2.326304983021401,
|
|
"grad_norm": 0.08655105891480916,
|
|
"learning_rate": 0.0018946204210984468,
|
|
"loss": 2.7201,
|
|
"step": 7365
|
|
},
|
|
{
|
|
"epoch": 2.32788438758588,
|
|
"grad_norm": 0.09835250777514457,
|
|
"learning_rate": 0.00189437388378969,
|
|
"loss": 2.8441,
|
|
"step": 7370
|
|
},
|
|
{
|
|
"epoch": 2.329463792150359,
|
|
"grad_norm": 0.06363496418764451,
|
|
"learning_rate": 0.0018941270745075497,
|
|
"loss": 2.6174,
|
|
"step": 7375
|
|
},
|
|
{
|
|
"epoch": 2.3310431967148384,
|
|
"grad_norm": 0.07929593580818732,
|
|
"learning_rate": 0.0018938799933270784,
|
|
"loss": 2.6466,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 2.3326226012793176,
|
|
"grad_norm": 0.07900756004679405,
|
|
"learning_rate": 0.0018936326403234123,
|
|
"loss": 2.76,
|
|
"step": 7385
|
|
},
|
|
{
|
|
"epoch": 2.334202005843797,
|
|
"grad_norm": 0.09717551544912385,
|
|
"learning_rate": 0.00189338501557177,
|
|
"loss": 2.6163,
|
|
"step": 7390
|
|
},
|
|
{
|
|
"epoch": 2.335781410408276,
|
|
"grad_norm": 0.08719728186567895,
|
|
"learning_rate": 0.0018931371191474524,
|
|
"loss": 2.6595,
|
|
"step": 7395
|
|
},
|
|
{
|
|
"epoch": 2.3373608149727554,
|
|
"grad_norm": 0.07114676790454187,
|
|
"learning_rate": 0.0018928889511258431,
|
|
"loss": 2.6956,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 2.3389402195372346,
|
|
"grad_norm": 0.07869140529911982,
|
|
"learning_rate": 0.001892640511582409,
|
|
"loss": 2.6961,
|
|
"step": 7405
|
|
},
|
|
{
|
|
"epoch": 2.340519624101714,
|
|
"grad_norm": 0.07186879629277657,
|
|
"learning_rate": 0.0018923918005926983,
|
|
"loss": 2.6958,
|
|
"step": 7410
|
|
},
|
|
{
|
|
"epoch": 2.3420990286661927,
|
|
"grad_norm": 0.0718933979053246,
|
|
"learning_rate": 0.0018921428182323429,
|
|
"loss": 2.6829,
|
|
"step": 7415
|
|
},
|
|
{
|
|
"epoch": 2.343678433230672,
|
|
"grad_norm": 0.07859991062129688,
|
|
"learning_rate": 0.0018918935645770563,
|
|
"loss": 2.7591,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 2.345257837795151,
|
|
"grad_norm": 0.07024473445387007,
|
|
"learning_rate": 0.0018916440397026353,
|
|
"loss": 2.9005,
|
|
"step": 7425
|
|
},
|
|
{
|
|
"epoch": 2.3468372423596304,
|
|
"grad_norm": 0.06626110340336555,
|
|
"learning_rate": 0.0018913942436849587,
|
|
"loss": 2.734,
|
|
"step": 7430
|
|
},
|
|
{
|
|
"epoch": 2.3484166469241097,
|
|
"grad_norm": 0.07754610033325414,
|
|
"learning_rate": 0.0018911441765999877,
|
|
"loss": 2.7525,
|
|
"step": 7435
|
|
},
|
|
{
|
|
"epoch": 2.349996051488589,
|
|
"grad_norm": 0.08855254824958801,
|
|
"learning_rate": 0.0018908938385237665,
|
|
"loss": 2.7487,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 2.351575456053068,
|
|
"grad_norm": 0.07971196533610837,
|
|
"learning_rate": 0.0018906432295324209,
|
|
"loss": 2.6746,
|
|
"step": 7445
|
|
},
|
|
{
|
|
"epoch": 2.353154860617547,
|
|
"grad_norm": 0.09318710563334719,
|
|
"learning_rate": 0.00189039234970216,
|
|
"loss": 2.7336,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 2.354734265182026,
|
|
"grad_norm": 0.08034857917011158,
|
|
"learning_rate": 0.0018901411991092741,
|
|
"loss": 2.6857,
|
|
"step": 7455
|
|
},
|
|
{
|
|
"epoch": 2.3563136697465055,
|
|
"grad_norm": 0.08624688466455536,
|
|
"learning_rate": 0.001889889777830137,
|
|
"loss": 2.6624,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 2.3578930743109847,
|
|
"grad_norm": 0.07256184186888504,
|
|
"learning_rate": 0.001889638085941204,
|
|
"loss": 2.679,
|
|
"step": 7465
|
|
},
|
|
{
|
|
"epoch": 2.359472478875464,
|
|
"grad_norm": 0.0735825503294827,
|
|
"learning_rate": 0.001889386123519013,
|
|
"loss": 2.7518,
|
|
"step": 7470
|
|
},
|
|
{
|
|
"epoch": 2.361051883439943,
|
|
"grad_norm": 0.09243129527436082,
|
|
"learning_rate": 0.0018891338906401845,
|
|
"loss": 2.6232,
|
|
"step": 7475
|
|
},
|
|
{
|
|
"epoch": 2.3626312880044225,
|
|
"grad_norm": 0.07493880585978271,
|
|
"learning_rate": 0.0018888813873814208,
|
|
"loss": 2.7287,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 2.3642106925689017,
|
|
"grad_norm": 0.08015136572695773,
|
|
"learning_rate": 0.0018886286138195061,
|
|
"loss": 2.6657,
|
|
"step": 7485
|
|
},
|
|
{
|
|
"epoch": 2.365790097133381,
|
|
"grad_norm": 0.08648966776424792,
|
|
"learning_rate": 0.0018883755700313078,
|
|
"loss": 2.6228,
|
|
"step": 7490
|
|
},
|
|
{
|
|
"epoch": 2.3673695016978598,
|
|
"grad_norm": 0.07390479939905129,
|
|
"learning_rate": 0.0018881222560937745,
|
|
"loss": 2.6354,
|
|
"step": 7495
|
|
},
|
|
{
|
|
"epoch": 2.368948906262339,
|
|
"grad_norm": 0.07441542053721882,
|
|
"learning_rate": 0.0018878686720839376,
|
|
"loss": 2.6607,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 2.3705283108268183,
|
|
"grad_norm": 0.07216972032624433,
|
|
"learning_rate": 0.00188761481807891,
|
|
"loss": 2.6969,
|
|
"step": 7505
|
|
},
|
|
{
|
|
"epoch": 2.3721077153912975,
|
|
"grad_norm": 0.06369129438154124,
|
|
"learning_rate": 0.0018873606941558875,
|
|
"loss": 2.7562,
|
|
"step": 7510
|
|
},
|
|
{
|
|
"epoch": 2.3736871199557767,
|
|
"grad_norm": 0.0777730619367833,
|
|
"learning_rate": 0.0018871063003921477,
|
|
"loss": 2.7014,
|
|
"step": 7515
|
|
},
|
|
{
|
|
"epoch": 2.375266524520256,
|
|
"grad_norm": 0.08640642863482138,
|
|
"learning_rate": 0.0018868516368650498,
|
|
"loss": 2.7399,
|
|
"step": 7520
|
|
},
|
|
{
|
|
"epoch": 2.3768459290847352,
|
|
"grad_norm": 0.0846546261392458,
|
|
"learning_rate": 0.0018865967036520348,
|
|
"loss": 2.7098,
|
|
"step": 7525
|
|
},
|
|
{
|
|
"epoch": 2.378425333649214,
|
|
"grad_norm": 0.07491805680460785,
|
|
"learning_rate": 0.0018863415008306276,
|
|
"loss": 2.6934,
|
|
"step": 7530
|
|
},
|
|
{
|
|
"epoch": 2.3800047382136933,
|
|
"grad_norm": 0.07523148122995321,
|
|
"learning_rate": 0.0018860860284784322,
|
|
"loss": 2.6807,
|
|
"step": 7535
|
|
},
|
|
{
|
|
"epoch": 2.3815841427781725,
|
|
"grad_norm": 0.10958219801763429,
|
|
"learning_rate": 0.0018858302866731375,
|
|
"loss": 2.7444,
|
|
"step": 7540
|
|
},
|
|
{
|
|
"epoch": 2.383163547342652,
|
|
"grad_norm": 0.08805880564158441,
|
|
"learning_rate": 0.001885574275492512,
|
|
"loss": 2.7311,
|
|
"step": 7545
|
|
},
|
|
{
|
|
"epoch": 2.384742951907131,
|
|
"grad_norm": 0.07842191253583962,
|
|
"learning_rate": 0.0018853179950144077,
|
|
"loss": 2.6944,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 2.3863223564716103,
|
|
"grad_norm": 0.08057227108054174,
|
|
"learning_rate": 0.0018850614453167576,
|
|
"loss": 2.7204,
|
|
"step": 7555
|
|
},
|
|
{
|
|
"epoch": 2.3879017610360895,
|
|
"grad_norm": 0.07942781307655891,
|
|
"learning_rate": 0.0018848046264775765,
|
|
"loss": 2.689,
|
|
"step": 7560
|
|
},
|
|
{
|
|
"epoch": 2.389481165600569,
|
|
"grad_norm": 0.07497938925568602,
|
|
"learning_rate": 0.001884547538574962,
|
|
"loss": 2.7696,
|
|
"step": 7565
|
|
},
|
|
{
|
|
"epoch": 2.3910605701650476,
|
|
"grad_norm": 0.08612690456510577,
|
|
"learning_rate": 0.001884290181687092,
|
|
"loss": 2.8199,
|
|
"step": 7570
|
|
},
|
|
{
|
|
"epoch": 2.392639974729527,
|
|
"grad_norm": 0.08797114556231153,
|
|
"learning_rate": 0.0018840325558922282,
|
|
"loss": 2.7724,
|
|
"step": 7575
|
|
},
|
|
{
|
|
"epoch": 2.394219379294006,
|
|
"grad_norm": 0.09420516265260621,
|
|
"learning_rate": 0.001883774661268712,
|
|
"loss": 2.7078,
|
|
"step": 7580
|
|
},
|
|
{
|
|
"epoch": 2.3957987838584853,
|
|
"grad_norm": 0.08453869336872923,
|
|
"learning_rate": 0.001883516497894968,
|
|
"loss": 2.8102,
|
|
"step": 7585
|
|
},
|
|
{
|
|
"epoch": 2.3973781884229646,
|
|
"grad_norm": 0.07032148456996877,
|
|
"learning_rate": 0.0018832580658495024,
|
|
"loss": 2.7734,
|
|
"step": 7590
|
|
},
|
|
{
|
|
"epoch": 2.398957592987444,
|
|
"grad_norm": 0.09374154890695309,
|
|
"learning_rate": 0.0018829993652109019,
|
|
"loss": 2.7003,
|
|
"step": 7595
|
|
},
|
|
{
|
|
"epoch": 2.400536997551923,
|
|
"grad_norm": 0.08401160942707464,
|
|
"learning_rate": 0.001882740396057836,
|
|
"loss": 2.7927,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 2.402116402116402,
|
|
"grad_norm": 0.06927986287686265,
|
|
"learning_rate": 0.0018824811584690555,
|
|
"loss": 2.6737,
|
|
"step": 7605
|
|
},
|
|
{
|
|
"epoch": 2.403695806680881,
|
|
"grad_norm": 0.07529877322546603,
|
|
"learning_rate": 0.0018822216525233935,
|
|
"loss": 2.7634,
|
|
"step": 7610
|
|
},
|
|
{
|
|
"epoch": 2.4052752112453604,
|
|
"grad_norm": 0.07469206529645643,
|
|
"learning_rate": 0.0018819618782997631,
|
|
"loss": 2.7092,
|
|
"step": 7615
|
|
},
|
|
{
|
|
"epoch": 2.4068546158098396,
|
|
"grad_norm": 0.07470263412071848,
|
|
"learning_rate": 0.0018817018358771608,
|
|
"loss": 2.6249,
|
|
"step": 7620
|
|
},
|
|
{
|
|
"epoch": 2.408434020374319,
|
|
"grad_norm": 0.0800266080961645,
|
|
"learning_rate": 0.0018814415253346638,
|
|
"loss": 2.7841,
|
|
"step": 7625
|
|
},
|
|
{
|
|
"epoch": 2.410013424938798,
|
|
"grad_norm": 0.07779178979769348,
|
|
"learning_rate": 0.0018811809467514302,
|
|
"loss": 2.7671,
|
|
"step": 7630
|
|
},
|
|
{
|
|
"epoch": 2.4115928295032774,
|
|
"grad_norm": 0.0688955490712083,
|
|
"learning_rate": 0.001880920100206701,
|
|
"loss": 2.7498,
|
|
"step": 7635
|
|
},
|
|
{
|
|
"epoch": 2.4131722340677566,
|
|
"grad_norm": 0.08367516993109311,
|
|
"learning_rate": 0.0018806589857797977,
|
|
"loss": 2.6605,
|
|
"step": 7640
|
|
},
|
|
{
|
|
"epoch": 2.414751638632236,
|
|
"grad_norm": 0.08742889444589272,
|
|
"learning_rate": 0.0018803976035501233,
|
|
"loss": 2.6678,
|
|
"step": 7645
|
|
},
|
|
{
|
|
"epoch": 2.4163310431967147,
|
|
"grad_norm": 0.0790738567939181,
|
|
"learning_rate": 0.0018801359535971626,
|
|
"loss": 2.669,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 2.417910447761194,
|
|
"grad_norm": 0.07121469436260072,
|
|
"learning_rate": 0.0018798740360004822,
|
|
"loss": 2.6622,
|
|
"step": 7655
|
|
},
|
|
{
|
|
"epoch": 2.419489852325673,
|
|
"grad_norm": 0.06891514623432604,
|
|
"learning_rate": 0.0018796118508397287,
|
|
"loss": 2.7473,
|
|
"step": 7660
|
|
},
|
|
{
|
|
"epoch": 2.4210692568901524,
|
|
"grad_norm": 0.08026374467155266,
|
|
"learning_rate": 0.0018793493981946318,
|
|
"loss": 2.6967,
|
|
"step": 7665
|
|
},
|
|
{
|
|
"epoch": 2.4226486614546316,
|
|
"grad_norm": 0.07066077070523862,
|
|
"learning_rate": 0.0018790866781450007,
|
|
"loss": 2.6993,
|
|
"step": 7670
|
|
},
|
|
{
|
|
"epoch": 2.424228066019111,
|
|
"grad_norm": 0.07868215840609977,
|
|
"learning_rate": 0.001878823690770728,
|
|
"loss": 2.739,
|
|
"step": 7675
|
|
},
|
|
{
|
|
"epoch": 2.42580747058359,
|
|
"grad_norm": 0.06467538125762497,
|
|
"learning_rate": 0.001878560436151785,
|
|
"loss": 2.6918,
|
|
"step": 7680
|
|
},
|
|
{
|
|
"epoch": 2.427386875148069,
|
|
"grad_norm": 0.09734857618605057,
|
|
"learning_rate": 0.0018782969143682276,
|
|
"loss": 2.7678,
|
|
"step": 7685
|
|
},
|
|
{
|
|
"epoch": 2.428966279712548,
|
|
"grad_norm": 0.07731364571259443,
|
|
"learning_rate": 0.0018780331255001898,
|
|
"loss": 2.677,
|
|
"step": 7690
|
|
},
|
|
{
|
|
"epoch": 2.4305456842770274,
|
|
"grad_norm": 0.07767700316172481,
|
|
"learning_rate": 0.0018777690696278881,
|
|
"loss": 2.7618,
|
|
"step": 7695
|
|
},
|
|
{
|
|
"epoch": 2.4321250888415067,
|
|
"grad_norm": 0.1037623342084868,
|
|
"learning_rate": 0.0018775047468316212,
|
|
"loss": 2.7872,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 2.433704493405986,
|
|
"grad_norm": 0.1065049396723975,
|
|
"learning_rate": 0.0018772401571917668,
|
|
"loss": 2.7372,
|
|
"step": 7705
|
|
},
|
|
{
|
|
"epoch": 2.435283897970465,
|
|
"grad_norm": 0.07938619491090596,
|
|
"learning_rate": 0.0018769753007887855,
|
|
"loss": 2.5384,
|
|
"step": 7710
|
|
},
|
|
{
|
|
"epoch": 2.4368633025349444,
|
|
"grad_norm": 0.08631045408098625,
|
|
"learning_rate": 0.0018767101777032184,
|
|
"loss": 2.6442,
|
|
"step": 7715
|
|
},
|
|
{
|
|
"epoch": 2.4384427070994237,
|
|
"grad_norm": 0.08323949226651153,
|
|
"learning_rate": 0.0018764447880156878,
|
|
"loss": 2.6652,
|
|
"step": 7720
|
|
},
|
|
{
|
|
"epoch": 2.440022111663903,
|
|
"grad_norm": 0.0847747509987669,
|
|
"learning_rate": 0.001876179131806897,
|
|
"loss": 2.653,
|
|
"step": 7725
|
|
},
|
|
{
|
|
"epoch": 2.4416015162283817,
|
|
"grad_norm": 0.08543114070870507,
|
|
"learning_rate": 0.0018759132091576301,
|
|
"loss": 2.6623,
|
|
"step": 7730
|
|
},
|
|
{
|
|
"epoch": 2.443180920792861,
|
|
"grad_norm": 0.08083946579402632,
|
|
"learning_rate": 0.0018756470201487527,
|
|
"loss": 2.6318,
|
|
"step": 7735
|
|
},
|
|
{
|
|
"epoch": 2.4447603253573402,
|
|
"grad_norm": 0.0725618021471989,
|
|
"learning_rate": 0.0018753805648612115,
|
|
"loss": 2.6657,
|
|
"step": 7740
|
|
},
|
|
{
|
|
"epoch": 2.4463397299218195,
|
|
"grad_norm": 0.08341020006084542,
|
|
"learning_rate": 0.001875113843376033,
|
|
"loss": 2.678,
|
|
"step": 7745
|
|
},
|
|
{
|
|
"epoch": 2.4479191344862987,
|
|
"grad_norm": 0.07192478514855673,
|
|
"learning_rate": 0.0018748468557743263,
|
|
"loss": 2.6607,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 2.449498539050778,
|
|
"grad_norm": 0.07975965263261633,
|
|
"learning_rate": 0.00187457960213728,
|
|
"loss": 2.7002,
|
|
"step": 7755
|
|
},
|
|
{
|
|
"epoch": 2.451077943615257,
|
|
"grad_norm": 0.0721191524169921,
|
|
"learning_rate": 0.0018743120825461647,
|
|
"loss": 2.7017,
|
|
"step": 7760
|
|
},
|
|
{
|
|
"epoch": 2.452657348179736,
|
|
"grad_norm": 0.08858243755028575,
|
|
"learning_rate": 0.0018740442970823312,
|
|
"loss": 2.697,
|
|
"step": 7765
|
|
},
|
|
{
|
|
"epoch": 2.4542367527442153,
|
|
"grad_norm": 0.08699423256790373,
|
|
"learning_rate": 0.0018737762458272114,
|
|
"loss": 2.7567,
|
|
"step": 7770
|
|
},
|
|
{
|
|
"epoch": 2.4558161573086945,
|
|
"grad_norm": 0.07730088922028397,
|
|
"learning_rate": 0.0018735079288623182,
|
|
"loss": 2.7256,
|
|
"step": 7775
|
|
},
|
|
{
|
|
"epoch": 2.4573955618731738,
|
|
"grad_norm": 0.08158156654649575,
|
|
"learning_rate": 0.0018732393462692445,
|
|
"loss": 2.7248,
|
|
"step": 7780
|
|
},
|
|
{
|
|
"epoch": 2.458974966437653,
|
|
"grad_norm": 0.08136571604368042,
|
|
"learning_rate": 0.0018729704981296652,
|
|
"loss": 2.6473,
|
|
"step": 7785
|
|
},
|
|
{
|
|
"epoch": 2.4605543710021323,
|
|
"grad_norm": 0.08344350173638025,
|
|
"learning_rate": 0.0018727013845253344,
|
|
"loss": 2.678,
|
|
"step": 7790
|
|
},
|
|
{
|
|
"epoch": 2.4621337755666115,
|
|
"grad_norm": 0.08222325140799036,
|
|
"learning_rate": 0.001872432005538089,
|
|
"loss": 2.6582,
|
|
"step": 7795
|
|
},
|
|
{
|
|
"epoch": 2.4637131801310908,
|
|
"grad_norm": 0.07101265264728349,
|
|
"learning_rate": 0.0018721623612498446,
|
|
"loss": 2.6721,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 2.46529258469557,
|
|
"grad_norm": 0.0719783248403732,
|
|
"learning_rate": 0.0018718924517425986,
|
|
"loss": 2.669,
|
|
"step": 7805
|
|
},
|
|
{
|
|
"epoch": 2.466871989260049,
|
|
"grad_norm": 0.08021611335702594,
|
|
"learning_rate": 0.0018716222770984285,
|
|
"loss": 2.634,
|
|
"step": 7810
|
|
},
|
|
{
|
|
"epoch": 2.468451393824528,
|
|
"grad_norm": 0.0935973449220456,
|
|
"learning_rate": 0.0018713518373994931,
|
|
"loss": 2.6396,
|
|
"step": 7815
|
|
},
|
|
{
|
|
"epoch": 2.4700307983890073,
|
|
"grad_norm": 0.0873982163416268,
|
|
"learning_rate": 0.0018710811327280312,
|
|
"loss": 2.5957,
|
|
"step": 7820
|
|
},
|
|
{
|
|
"epoch": 2.4716102029534865,
|
|
"grad_norm": 0.06000719379053246,
|
|
"learning_rate": 0.0018708101631663622,
|
|
"loss": 2.7188,
|
|
"step": 7825
|
|
},
|
|
{
|
|
"epoch": 2.473189607517966,
|
|
"grad_norm": 0.0882888251028582,
|
|
"learning_rate": 0.0018705389287968863,
|
|
"loss": 2.6632,
|
|
"step": 7830
|
|
},
|
|
{
|
|
"epoch": 2.474769012082445,
|
|
"grad_norm": 0.09869515337928537,
|
|
"learning_rate": 0.0018702674297020844,
|
|
"loss": 2.6711,
|
|
"step": 7835
|
|
},
|
|
{
|
|
"epoch": 2.4763484166469243,
|
|
"grad_norm": 0.08240705547146975,
|
|
"learning_rate": 0.0018699956659645172,
|
|
"loss": 2.7613,
|
|
"step": 7840
|
|
},
|
|
{
|
|
"epoch": 2.477927821211403,
|
|
"grad_norm": 0.07248905164425486,
|
|
"learning_rate": 0.0018697236376668267,
|
|
"loss": 2.696,
|
|
"step": 7845
|
|
},
|
|
{
|
|
"epoch": 2.4795072257758823,
|
|
"grad_norm": 0.0737655102966124,
|
|
"learning_rate": 0.0018694513448917348,
|
|
"loss": 2.7168,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 2.4810866303403616,
|
|
"grad_norm": 0.07535024042252804,
|
|
"learning_rate": 0.0018691787877220438,
|
|
"loss": 2.7605,
|
|
"step": 7855
|
|
},
|
|
{
|
|
"epoch": 2.482666034904841,
|
|
"grad_norm": 0.0771969438222813,
|
|
"learning_rate": 0.0018689059662406371,
|
|
"loss": 2.6679,
|
|
"step": 7860
|
|
},
|
|
{
|
|
"epoch": 2.48424543946932,
|
|
"grad_norm": 0.07142066911663333,
|
|
"learning_rate": 0.0018686328805304774,
|
|
"loss": 2.7337,
|
|
"step": 7865
|
|
},
|
|
{
|
|
"epoch": 2.4858248440337993,
|
|
"grad_norm": 0.07640651552592222,
|
|
"learning_rate": 0.0018683595306746086,
|
|
"loss": 2.6871,
|
|
"step": 7870
|
|
},
|
|
{
|
|
"epoch": 2.4874042485982786,
|
|
"grad_norm": 0.07816469379115588,
|
|
"learning_rate": 0.0018680859167561547,
|
|
"loss": 2.712,
|
|
"step": 7875
|
|
},
|
|
{
|
|
"epoch": 2.488983653162758,
|
|
"grad_norm": 0.07092137707362926,
|
|
"learning_rate": 0.00186781203885832,
|
|
"loss": 2.6838,
|
|
"step": 7880
|
|
},
|
|
{
|
|
"epoch": 2.490563057727237,
|
|
"grad_norm": 0.08452392581027952,
|
|
"learning_rate": 0.0018675378970643885,
|
|
"loss": 2.7528,
|
|
"step": 7885
|
|
},
|
|
{
|
|
"epoch": 2.492142462291716,
|
|
"grad_norm": 0.07700920122144246,
|
|
"learning_rate": 0.0018672634914577257,
|
|
"loss": 2.7425,
|
|
"step": 7890
|
|
},
|
|
{
|
|
"epoch": 2.493721866856195,
|
|
"grad_norm": 0.0725194849975088,
|
|
"learning_rate": 0.001866988822121776,
|
|
"loss": 2.72,
|
|
"step": 7895
|
|
},
|
|
{
|
|
"epoch": 2.4953012714206744,
|
|
"grad_norm": 0.08647124238367777,
|
|
"learning_rate": 0.0018667138891400653,
|
|
"loss": 2.7387,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 2.4968806759851536,
|
|
"grad_norm": 0.09775103044838994,
|
|
"learning_rate": 0.001866438692596198,
|
|
"loss": 2.7273,
|
|
"step": 7905
|
|
},
|
|
{
|
|
"epoch": 2.498460080549633,
|
|
"grad_norm": 0.06664858654174687,
|
|
"learning_rate": 0.0018661632325738605,
|
|
"loss": 2.7252,
|
|
"step": 7910
|
|
},
|
|
{
|
|
"epoch": 2.500039485114112,
|
|
"grad_norm": 0.08104867842322877,
|
|
"learning_rate": 0.0018658875091568177,
|
|
"loss": 2.648,
|
|
"step": 7915
|
|
},
|
|
{
|
|
"epoch": 2.501618889678591,
|
|
"grad_norm": 0.06850407967810379,
|
|
"learning_rate": 0.0018656115224289158,
|
|
"loss": 2.6029,
|
|
"step": 7920
|
|
},
|
|
{
|
|
"epoch": 2.50319829424307,
|
|
"grad_norm": 0.07774145402202129,
|
|
"learning_rate": 0.0018653352724740807,
|
|
"loss": 2.7816,
|
|
"step": 7925
|
|
},
|
|
{
|
|
"epoch": 2.5047776988075494,
|
|
"grad_norm": 0.067705446229846,
|
|
"learning_rate": 0.0018650587593763179,
|
|
"loss": 2.6936,
|
|
"step": 7930
|
|
},
|
|
{
|
|
"epoch": 2.5063571033720287,
|
|
"grad_norm": 0.0730342806916298,
|
|
"learning_rate": 0.0018647819832197131,
|
|
"loss": 2.6904,
|
|
"step": 7935
|
|
},
|
|
{
|
|
"epoch": 2.507936507936508,
|
|
"grad_norm": 0.07501734056288688,
|
|
"learning_rate": 0.0018645049440884325,
|
|
"loss": 2.6693,
|
|
"step": 7940
|
|
},
|
|
{
|
|
"epoch": 2.509515912500987,
|
|
"grad_norm": 0.07247920381952802,
|
|
"learning_rate": 0.001864227642066722,
|
|
"loss": 2.6361,
|
|
"step": 7945
|
|
},
|
|
{
|
|
"epoch": 2.5110953170654664,
|
|
"grad_norm": 0.07729876869468007,
|
|
"learning_rate": 0.0018639500772389074,
|
|
"loss": 2.7373,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 2.5126747216299457,
|
|
"grad_norm": 0.05958962188110759,
|
|
"learning_rate": 0.0018636722496893942,
|
|
"loss": 2.6341,
|
|
"step": 7955
|
|
},
|
|
{
|
|
"epoch": 2.514254126194425,
|
|
"grad_norm": 0.08290918057113816,
|
|
"learning_rate": 0.001863394159502668,
|
|
"loss": 2.662,
|
|
"step": 7960
|
|
},
|
|
{
|
|
"epoch": 2.515833530758904,
|
|
"grad_norm": 0.07605813821249847,
|
|
"learning_rate": 0.001863115806763294,
|
|
"loss": 2.7292,
|
|
"step": 7965
|
|
},
|
|
{
|
|
"epoch": 2.517412935323383,
|
|
"grad_norm": 0.08339138369725956,
|
|
"learning_rate": 0.001862837191555918,
|
|
"loss": 2.7135,
|
|
"step": 7970
|
|
},
|
|
{
|
|
"epoch": 2.518992339887862,
|
|
"grad_norm": 0.08347322719724491,
|
|
"learning_rate": 0.0018625583139652649,
|
|
"loss": 2.6136,
|
|
"step": 7975
|
|
},
|
|
{
|
|
"epoch": 2.5205717444523414,
|
|
"grad_norm": 0.08893567182034597,
|
|
"learning_rate": 0.0018622791740761395,
|
|
"loss": 2.8399,
|
|
"step": 7980
|
|
},
|
|
{
|
|
"epoch": 2.5221511490168207,
|
|
"grad_norm": 0.09214698048473759,
|
|
"learning_rate": 0.0018619997719734266,
|
|
"loss": 2.6203,
|
|
"step": 7985
|
|
},
|
|
{
|
|
"epoch": 2.5237305535813,
|
|
"grad_norm": 0.07526186136361122,
|
|
"learning_rate": 0.0018617201077420905,
|
|
"loss": 2.6775,
|
|
"step": 7990
|
|
},
|
|
{
|
|
"epoch": 2.525309958145779,
|
|
"grad_norm": 0.09091886358113357,
|
|
"learning_rate": 0.001861440181467175,
|
|
"loss": 2.7433,
|
|
"step": 7995
|
|
},
|
|
{
|
|
"epoch": 2.526889362710258,
|
|
"grad_norm": 0.0796869768273692,
|
|
"learning_rate": 0.0018611599932338045,
|
|
"loss": 2.6444,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 2.5284687672747372,
|
|
"grad_norm": 0.07939729630626526,
|
|
"learning_rate": 0.001860879543127182,
|
|
"loss": 2.6763,
|
|
"step": 8005
|
|
},
|
|
{
|
|
"epoch": 2.5300481718392165,
|
|
"grad_norm": 0.08612245528516328,
|
|
"learning_rate": 0.0018605988312325912,
|
|
"loss": 2.5781,
|
|
"step": 8010
|
|
},
|
|
{
|
|
"epoch": 2.5316275764036957,
|
|
"grad_norm": 0.05992673706296889,
|
|
"learning_rate": 0.0018603178576353941,
|
|
"loss": 2.6827,
|
|
"step": 8015
|
|
},
|
|
{
|
|
"epoch": 2.533206980968175,
|
|
"grad_norm": 0.08097528211699864,
|
|
"learning_rate": 0.001860036622421033,
|
|
"loss": 2.6819,
|
|
"step": 8020
|
|
},
|
|
{
|
|
"epoch": 2.5347863855326542,
|
|
"grad_norm": 0.08482703413986586,
|
|
"learning_rate": 0.00185975512567503,
|
|
"loss": 2.7946,
|
|
"step": 8025
|
|
},
|
|
{
|
|
"epoch": 2.5363657900971335,
|
|
"grad_norm": 0.08872520065295834,
|
|
"learning_rate": 0.0018594733674829867,
|
|
"loss": 2.7496,
|
|
"step": 8030
|
|
},
|
|
{
|
|
"epoch": 2.5379451946616127,
|
|
"grad_norm": 0.09288288886907606,
|
|
"learning_rate": 0.0018591913479305833,
|
|
"loss": 2.6849,
|
|
"step": 8035
|
|
},
|
|
{
|
|
"epoch": 2.539524599226092,
|
|
"grad_norm": 0.08531654651171618,
|
|
"learning_rate": 0.0018589090671035807,
|
|
"loss": 2.7099,
|
|
"step": 8040
|
|
},
|
|
{
|
|
"epoch": 2.541104003790571,
|
|
"grad_norm": 0.0916481767628531,
|
|
"learning_rate": 0.0018586265250878184,
|
|
"loss": 2.6628,
|
|
"step": 8045
|
|
},
|
|
{
|
|
"epoch": 2.54268340835505,
|
|
"grad_norm": 0.07175547103052027,
|
|
"learning_rate": 0.0018583437219692161,
|
|
"loss": 2.6703,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 2.5442628129195293,
|
|
"grad_norm": 0.06774903006142224,
|
|
"learning_rate": 0.0018580606578337715,
|
|
"loss": 2.6817,
|
|
"step": 8055
|
|
},
|
|
{
|
|
"epoch": 2.5458422174840085,
|
|
"grad_norm": 0.0826462540723309,
|
|
"learning_rate": 0.0018577773327675638,
|
|
"loss": 2.7343,
|
|
"step": 8060
|
|
},
|
|
{
|
|
"epoch": 2.5474216220484878,
|
|
"grad_norm": 0.08344610696981357,
|
|
"learning_rate": 0.0018574937468567492,
|
|
"loss": 2.6898,
|
|
"step": 8065
|
|
},
|
|
{
|
|
"epoch": 2.549001026612967,
|
|
"grad_norm": 0.0772110804875115,
|
|
"learning_rate": 0.0018572099001875652,
|
|
"loss": 2.6706,
|
|
"step": 8070
|
|
},
|
|
{
|
|
"epoch": 2.5505804311774463,
|
|
"grad_norm": 0.07610845646960257,
|
|
"learning_rate": 0.001856925792846327,
|
|
"loss": 2.7425,
|
|
"step": 8075
|
|
},
|
|
{
|
|
"epoch": 2.552159835741925,
|
|
"grad_norm": 0.06543900145379539,
|
|
"learning_rate": 0.0018566414249194306,
|
|
"loss": 2.6781,
|
|
"step": 8080
|
|
},
|
|
{
|
|
"epoch": 2.5537392403064043,
|
|
"grad_norm": 0.07079232505612955,
|
|
"learning_rate": 0.0018563567964933498,
|
|
"loss": 2.5946,
|
|
"step": 8085
|
|
},
|
|
{
|
|
"epoch": 2.5553186448708836,
|
|
"grad_norm": 0.10262069972175505,
|
|
"learning_rate": 0.0018560719076546389,
|
|
"loss": 2.7899,
|
|
"step": 8090
|
|
},
|
|
{
|
|
"epoch": 2.556898049435363,
|
|
"grad_norm": 0.08666390464029669,
|
|
"learning_rate": 0.0018557867584899305,
|
|
"loss": 2.6093,
|
|
"step": 8095
|
|
},
|
|
{
|
|
"epoch": 2.558477453999842,
|
|
"grad_norm": 0.07262218443994439,
|
|
"learning_rate": 0.0018555013490859364,
|
|
"loss": 2.6317,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 2.5600568585643213,
|
|
"grad_norm": 0.08203991908098497,
|
|
"learning_rate": 0.0018552156795294482,
|
|
"loss": 2.5987,
|
|
"step": 8105
|
|
},
|
|
{
|
|
"epoch": 2.5616362631288006,
|
|
"grad_norm": 0.07864166237147685,
|
|
"learning_rate": 0.0018549297499073356,
|
|
"loss": 2.7561,
|
|
"step": 8110
|
|
},
|
|
{
|
|
"epoch": 2.56321566769328,
|
|
"grad_norm": 0.0802474496306534,
|
|
"learning_rate": 0.0018546435603065486,
|
|
"loss": 2.7016,
|
|
"step": 8115
|
|
},
|
|
{
|
|
"epoch": 2.564795072257759,
|
|
"grad_norm": 0.07907611783306082,
|
|
"learning_rate": 0.0018543571108141155,
|
|
"loss": 2.6933,
|
|
"step": 8120
|
|
},
|
|
{
|
|
"epoch": 2.5663744768222383,
|
|
"grad_norm": 0.07617577072615397,
|
|
"learning_rate": 0.0018540704015171437,
|
|
"loss": 2.7774,
|
|
"step": 8125
|
|
},
|
|
{
|
|
"epoch": 2.567953881386717,
|
|
"grad_norm": 0.07745229149181031,
|
|
"learning_rate": 0.0018537834325028193,
|
|
"loss": 2.709,
|
|
"step": 8130
|
|
},
|
|
{
|
|
"epoch": 2.5695332859511963,
|
|
"grad_norm": 0.05920274025904176,
|
|
"learning_rate": 0.0018534962038584083,
|
|
"loss": 2.6502,
|
|
"step": 8135
|
|
},
|
|
{
|
|
"epoch": 2.5711126905156756,
|
|
"grad_norm": 0.07804375423712556,
|
|
"learning_rate": 0.0018532087156712547,
|
|
"loss": 2.7016,
|
|
"step": 8140
|
|
},
|
|
{
|
|
"epoch": 2.572692095080155,
|
|
"grad_norm": 0.07157365496401365,
|
|
"learning_rate": 0.001852920968028782,
|
|
"loss": 2.7592,
|
|
"step": 8145
|
|
},
|
|
{
|
|
"epoch": 2.574271499644634,
|
|
"grad_norm": 0.07040408906894956,
|
|
"learning_rate": 0.001852632961018492,
|
|
"loss": 2.7072,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 2.575850904209113,
|
|
"grad_norm": 0.08606791875324905,
|
|
"learning_rate": 0.0018523446947279667,
|
|
"loss": 2.7338,
|
|
"step": 8155
|
|
},
|
|
{
|
|
"epoch": 2.577430308773592,
|
|
"grad_norm": 0.06450312373755046,
|
|
"learning_rate": 0.0018520561692448654,
|
|
"loss": 2.6513,
|
|
"step": 8160
|
|
},
|
|
{
|
|
"epoch": 2.5790097133380714,
|
|
"grad_norm": 0.08074620572550516,
|
|
"learning_rate": 0.001851767384656927,
|
|
"loss": 2.6686,
|
|
"step": 8165
|
|
},
|
|
{
|
|
"epoch": 2.5805891179025506,
|
|
"grad_norm": 0.07786908704366824,
|
|
"learning_rate": 0.0018514783410519692,
|
|
"loss": 2.6336,
|
|
"step": 8170
|
|
},
|
|
{
|
|
"epoch": 2.58216852246703,
|
|
"grad_norm": 0.07287148075741615,
|
|
"learning_rate": 0.0018511890385178877,
|
|
"loss": 2.7318,
|
|
"step": 8175
|
|
},
|
|
{
|
|
"epoch": 2.583747927031509,
|
|
"grad_norm": 0.07682255875376442,
|
|
"learning_rate": 0.0018508994771426583,
|
|
"loss": 2.7095,
|
|
"step": 8180
|
|
},
|
|
{
|
|
"epoch": 2.5853273315959884,
|
|
"grad_norm": 0.06738640096293755,
|
|
"learning_rate": 0.0018506096570143342,
|
|
"loss": 2.7102,
|
|
"step": 8185
|
|
},
|
|
{
|
|
"epoch": 2.5869067361604676,
|
|
"grad_norm": 0.08277153940086601,
|
|
"learning_rate": 0.0018503195782210483,
|
|
"loss": 2.6777,
|
|
"step": 8190
|
|
},
|
|
{
|
|
"epoch": 2.588486140724947,
|
|
"grad_norm": 0.07075781475437158,
|
|
"learning_rate": 0.0018500292408510112,
|
|
"loss": 2.6732,
|
|
"step": 8195
|
|
},
|
|
{
|
|
"epoch": 2.590065545289426,
|
|
"grad_norm": 0.0891710854698283,
|
|
"learning_rate": 0.0018497386449925135,
|
|
"loss": 2.6126,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 2.591644949853905,
|
|
"grad_norm": 0.07448667313538068,
|
|
"learning_rate": 0.0018494477907339225,
|
|
"loss": 2.6267,
|
|
"step": 8205
|
|
},
|
|
{
|
|
"epoch": 2.593224354418384,
|
|
"grad_norm": 0.07955598196227132,
|
|
"learning_rate": 0.001849156678163686,
|
|
"loss": 2.6357,
|
|
"step": 8210
|
|
},
|
|
{
|
|
"epoch": 2.5948037589828634,
|
|
"grad_norm": 0.057716126963121936,
|
|
"learning_rate": 0.0018488653073703287,
|
|
"loss": 2.6709,
|
|
"step": 8215
|
|
},
|
|
{
|
|
"epoch": 2.5963831635473427,
|
|
"grad_norm": 0.24803273908784967,
|
|
"learning_rate": 0.0018485736784424553,
|
|
"loss": 2.7145,
|
|
"step": 8220
|
|
},
|
|
{
|
|
"epoch": 2.597962568111822,
|
|
"grad_norm": 0.13060863770251266,
|
|
"learning_rate": 0.0018482817914687478,
|
|
"loss": 2.7761,
|
|
"step": 8225
|
|
},
|
|
{
|
|
"epoch": 2.599541972676301,
|
|
"grad_norm": 0.09447300673293828,
|
|
"learning_rate": 0.0018479896465379672,
|
|
"loss": 2.6685,
|
|
"step": 8230
|
|
},
|
|
{
|
|
"epoch": 2.60112137724078,
|
|
"grad_norm": 0.07751070583791715,
|
|
"learning_rate": 0.0018476972437389532,
|
|
"loss": 2.7205,
|
|
"step": 8235
|
|
},
|
|
{
|
|
"epoch": 2.602700781805259,
|
|
"grad_norm": 0.08781007878460902,
|
|
"learning_rate": 0.0018474045831606235,
|
|
"loss": 2.6261,
|
|
"step": 8240
|
|
},
|
|
{
|
|
"epoch": 2.6042801863697385,
|
|
"grad_norm": 0.08697823868726591,
|
|
"learning_rate": 0.0018471116648919744,
|
|
"loss": 2.7048,
|
|
"step": 8245
|
|
},
|
|
{
|
|
"epoch": 2.6058595909342177,
|
|
"grad_norm": 0.09962612079922767,
|
|
"learning_rate": 0.00184681848902208,
|
|
"loss": 2.7425,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 2.607438995498697,
|
|
"grad_norm": 0.09497362891723997,
|
|
"learning_rate": 0.0018465250556400936,
|
|
"loss": 2.7101,
|
|
"step": 8255
|
|
},
|
|
{
|
|
"epoch": 2.609018400063176,
|
|
"grad_norm": 0.1095961760999583,
|
|
"learning_rate": 0.001846231364835247,
|
|
"loss": 2.749,
|
|
"step": 8260
|
|
},
|
|
{
|
|
"epoch": 2.6105978046276554,
|
|
"grad_norm": 0.07792276572908069,
|
|
"learning_rate": 0.0018459374166968484,
|
|
"loss": 2.6432,
|
|
"step": 8265
|
|
},
|
|
{
|
|
"epoch": 2.6121772091921347,
|
|
"grad_norm": 0.07934151624028277,
|
|
"learning_rate": 0.0018456432113142865,
|
|
"loss": 2.7084,
|
|
"step": 8270
|
|
},
|
|
{
|
|
"epoch": 2.613756613756614,
|
|
"grad_norm": 0.09765015908766372,
|
|
"learning_rate": 0.0018453487487770268,
|
|
"loss": 2.6978,
|
|
"step": 8275
|
|
},
|
|
{
|
|
"epoch": 2.615336018321093,
|
|
"grad_norm": 0.07424207466262638,
|
|
"learning_rate": 0.001845054029174614,
|
|
"loss": 2.7314,
|
|
"step": 8280
|
|
},
|
|
{
|
|
"epoch": 2.616915422885572,
|
|
"grad_norm": 0.06654401533251812,
|
|
"learning_rate": 0.0018447590525966697,
|
|
"loss": 2.6611,
|
|
"step": 8285
|
|
},
|
|
{
|
|
"epoch": 2.6184948274500512,
|
|
"grad_norm": 0.07343882122515961,
|
|
"learning_rate": 0.0018444638191328952,
|
|
"loss": 2.679,
|
|
"step": 8290
|
|
},
|
|
{
|
|
"epoch": 2.6200742320145305,
|
|
"grad_norm": 0.07141272681091779,
|
|
"learning_rate": 0.0018441683288730687,
|
|
"loss": 2.7138,
|
|
"step": 8295
|
|
},
|
|
{
|
|
"epoch": 2.6216536365790097,
|
|
"grad_norm": 0.08065369323647667,
|
|
"learning_rate": 0.0018438725819070467,
|
|
"loss": 2.6659,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 2.623233041143489,
|
|
"grad_norm": 0.06394475792669799,
|
|
"learning_rate": 0.0018435765783247641,
|
|
"loss": 2.5876,
|
|
"step": 8305
|
|
},
|
|
{
|
|
"epoch": 2.6248124457079682,
|
|
"grad_norm": 0.07213582091096707,
|
|
"learning_rate": 0.0018432803182162343,
|
|
"loss": 2.6576,
|
|
"step": 8310
|
|
},
|
|
{
|
|
"epoch": 2.626391850272447,
|
|
"grad_norm": 0.06569845411375552,
|
|
"learning_rate": 0.0018429838016715471,
|
|
"loss": 2.6349,
|
|
"step": 8315
|
|
},
|
|
{
|
|
"epoch": 2.6279712548369263,
|
|
"grad_norm": 0.062004370999434975,
|
|
"learning_rate": 0.0018426870287808722,
|
|
"loss": 2.4996,
|
|
"step": 8320
|
|
},
|
|
{
|
|
"epoch": 2.6295506594014055,
|
|
"grad_norm": 0.07777343451046,
|
|
"learning_rate": 0.0018423899996344558,
|
|
"loss": 2.7423,
|
|
"step": 8325
|
|
},
|
|
{
|
|
"epoch": 2.631130063965885,
|
|
"grad_norm": 0.07268488245099335,
|
|
"learning_rate": 0.0018420927143226226,
|
|
"loss": 2.7066,
|
|
"step": 8330
|
|
},
|
|
{
|
|
"epoch": 2.632709468530364,
|
|
"grad_norm": 0.09713553113440274,
|
|
"learning_rate": 0.001841795172935775,
|
|
"loss": 2.6784,
|
|
"step": 8335
|
|
},
|
|
{
|
|
"epoch": 2.6342888730948433,
|
|
"grad_norm": 0.07579561403529118,
|
|
"learning_rate": 0.0018414973755643941,
|
|
"loss": 2.6956,
|
|
"step": 8340
|
|
},
|
|
{
|
|
"epoch": 2.6358682776593225,
|
|
"grad_norm": 0.07593637072975637,
|
|
"learning_rate": 0.0018411993222990377,
|
|
"loss": 2.5793,
|
|
"step": 8345
|
|
},
|
|
{
|
|
"epoch": 2.6374476822238018,
|
|
"grad_norm": 0.07902721671937,
|
|
"learning_rate": 0.0018409010132303418,
|
|
"loss": 2.6508,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 2.639027086788281,
|
|
"grad_norm": 0.06976273290655018,
|
|
"learning_rate": 0.0018406024484490207,
|
|
"loss": 2.7403,
|
|
"step": 8355
|
|
},
|
|
{
|
|
"epoch": 2.6406064913527603,
|
|
"grad_norm": 0.06631988590855742,
|
|
"learning_rate": 0.0018403036280458657,
|
|
"loss": 2.5342,
|
|
"step": 8360
|
|
},
|
|
{
|
|
"epoch": 2.642185895917239,
|
|
"grad_norm": 0.07970795771258071,
|
|
"learning_rate": 0.0018400045521117462,
|
|
"loss": 2.6565,
|
|
"step": 8365
|
|
},
|
|
{
|
|
"epoch": 2.6437653004817183,
|
|
"grad_norm": 0.09089034026053808,
|
|
"learning_rate": 0.001839705220737609,
|
|
"loss": 2.6178,
|
|
"step": 8370
|
|
},
|
|
{
|
|
"epoch": 2.6453447050461976,
|
|
"grad_norm": 0.0873756080233504,
|
|
"learning_rate": 0.0018394056340144795,
|
|
"loss": 2.6376,
|
|
"step": 8375
|
|
},
|
|
{
|
|
"epoch": 2.646924109610677,
|
|
"grad_norm": 0.07068153933211149,
|
|
"learning_rate": 0.00183910579203346,
|
|
"loss": 2.6346,
|
|
"step": 8380
|
|
},
|
|
{
|
|
"epoch": 2.648503514175156,
|
|
"grad_norm": 0.08184173420274206,
|
|
"learning_rate": 0.0018388056948857301,
|
|
"loss": 2.7169,
|
|
"step": 8385
|
|
},
|
|
{
|
|
"epoch": 2.6500829187396353,
|
|
"grad_norm": 0.09004242599016954,
|
|
"learning_rate": 0.0018385053426625477,
|
|
"loss": 2.6163,
|
|
"step": 8390
|
|
},
|
|
{
|
|
"epoch": 2.651662323304114,
|
|
"grad_norm": 0.07791768928388704,
|
|
"learning_rate": 0.001838204735455248,
|
|
"loss": 2.6018,
|
|
"step": 8395
|
|
},
|
|
{
|
|
"epoch": 2.6532417278685934,
|
|
"grad_norm": 0.06776848053010266,
|
|
"learning_rate": 0.0018379038733552435,
|
|
"loss": 2.7123,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 2.6548211324330726,
|
|
"grad_norm": 0.08293504619586714,
|
|
"learning_rate": 0.0018376027564540249,
|
|
"loss": 2.7125,
|
|
"step": 8405
|
|
},
|
|
{
|
|
"epoch": 2.656400536997552,
|
|
"grad_norm": 0.08305143011589787,
|
|
"learning_rate": 0.0018373013848431597,
|
|
"loss": 2.6436,
|
|
"step": 8410
|
|
},
|
|
{
|
|
"epoch": 2.657979941562031,
|
|
"grad_norm": 0.1003820323837237,
|
|
"learning_rate": 0.0018369997586142929,
|
|
"loss": 2.6058,
|
|
"step": 8415
|
|
},
|
|
{
|
|
"epoch": 2.6595593461265103,
|
|
"grad_norm": 0.07876546071996927,
|
|
"learning_rate": 0.0018366978778591471,
|
|
"loss": 2.7217,
|
|
"step": 8420
|
|
},
|
|
{
|
|
"epoch": 2.6611387506909896,
|
|
"grad_norm": 0.0737061663083282,
|
|
"learning_rate": 0.0018363957426695227,
|
|
"loss": 2.6446,
|
|
"step": 8425
|
|
},
|
|
{
|
|
"epoch": 2.662718155255469,
|
|
"grad_norm": 0.06658529844569622,
|
|
"learning_rate": 0.0018360933531372968,
|
|
"loss": 2.6969,
|
|
"step": 8430
|
|
},
|
|
{
|
|
"epoch": 2.664297559819948,
|
|
"grad_norm": 0.07403289532509451,
|
|
"learning_rate": 0.0018357907093544238,
|
|
"loss": 2.5625,
|
|
"step": 8435
|
|
},
|
|
{
|
|
"epoch": 2.6658769643844273,
|
|
"grad_norm": 0.06490303091705928,
|
|
"learning_rate": 0.0018354878114129364,
|
|
"loss": 2.6345,
|
|
"step": 8440
|
|
},
|
|
{
|
|
"epoch": 2.667456368948906,
|
|
"grad_norm": 0.0690962464048344,
|
|
"learning_rate": 0.0018351846594049437,
|
|
"loss": 2.5917,
|
|
"step": 8445
|
|
},
|
|
{
|
|
"epoch": 2.6690357735133854,
|
|
"grad_norm": 0.07191291298335026,
|
|
"learning_rate": 0.001834881253422632,
|
|
"loss": 2.6261,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 2.6706151780778646,
|
|
"grad_norm": 0.09168957592632122,
|
|
"learning_rate": 0.0018345775935582657,
|
|
"loss": 2.6488,
|
|
"step": 8455
|
|
},
|
|
{
|
|
"epoch": 2.672194582642344,
|
|
"grad_norm": 0.08380802121900667,
|
|
"learning_rate": 0.001834273679904185,
|
|
"loss": 2.6766,
|
|
"step": 8460
|
|
},
|
|
{
|
|
"epoch": 2.673773987206823,
|
|
"grad_norm": 0.07923024055346786,
|
|
"learning_rate": 0.0018339695125528088,
|
|
"loss": 2.6511,
|
|
"step": 8465
|
|
},
|
|
{
|
|
"epoch": 2.6753533917713024,
|
|
"grad_norm": 0.07735194440863055,
|
|
"learning_rate": 0.0018336650915966324,
|
|
"loss": 2.6581,
|
|
"step": 8470
|
|
},
|
|
{
|
|
"epoch": 2.676932796335781,
|
|
"grad_norm": 0.07714028781489839,
|
|
"learning_rate": 0.0018333604171282278,
|
|
"loss": 2.6848,
|
|
"step": 8475
|
|
},
|
|
{
|
|
"epoch": 2.6785122009002604,
|
|
"grad_norm": 0.08332223079564574,
|
|
"learning_rate": 0.001833055489240245,
|
|
"loss": 2.668,
|
|
"step": 8480
|
|
},
|
|
{
|
|
"epoch": 2.6800916054647397,
|
|
"grad_norm": 0.07460803289578619,
|
|
"learning_rate": 0.0018327503080254105,
|
|
"loss": 2.8177,
|
|
"step": 8485
|
|
},
|
|
{
|
|
"epoch": 2.681671010029219,
|
|
"grad_norm": 0.08270742539127242,
|
|
"learning_rate": 0.0018324448735765277,
|
|
"loss": 2.6497,
|
|
"step": 8490
|
|
},
|
|
{
|
|
"epoch": 2.683250414593698,
|
|
"grad_norm": 0.059781586839295804,
|
|
"learning_rate": 0.0018321391859864775,
|
|
"loss": 2.7406,
|
|
"step": 8495
|
|
},
|
|
{
|
|
"epoch": 2.6848298191581774,
|
|
"grad_norm": 0.08694086473733448,
|
|
"learning_rate": 0.0018318332453482176,
|
|
"loss": 2.6342,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 2.6864092237226567,
|
|
"grad_norm": 0.08834612488705565,
|
|
"learning_rate": 0.0018315270517547826,
|
|
"loss": 2.6524,
|
|
"step": 8505
|
|
},
|
|
{
|
|
"epoch": 2.687988628287136,
|
|
"grad_norm": 0.08160955341364691,
|
|
"learning_rate": 0.0018312206052992837,
|
|
"loss": 2.6655,
|
|
"step": 8510
|
|
},
|
|
{
|
|
"epoch": 2.689568032851615,
|
|
"grad_norm": 0.08101212710873497,
|
|
"learning_rate": 0.0018309139060749097,
|
|
"loss": 2.7838,
|
|
"step": 8515
|
|
},
|
|
{
|
|
"epoch": 2.691147437416094,
|
|
"grad_norm": 0.08096363371817464,
|
|
"learning_rate": 0.0018306069541749257,
|
|
"loss": 2.7462,
|
|
"step": 8520
|
|
},
|
|
{
|
|
"epoch": 2.692726841980573,
|
|
"grad_norm": 0.06736595463146286,
|
|
"learning_rate": 0.001830299749692674,
|
|
"loss": 2.6882,
|
|
"step": 8525
|
|
},
|
|
{
|
|
"epoch": 2.6943062465450525,
|
|
"grad_norm": 0.07318768228212218,
|
|
"learning_rate": 0.001829992292721573,
|
|
"loss": 2.6515,
|
|
"step": 8530
|
|
},
|
|
{
|
|
"epoch": 2.6958856511095317,
|
|
"grad_norm": 0.0841948725662524,
|
|
"learning_rate": 0.0018296845833551192,
|
|
"loss": 2.7602,
|
|
"step": 8535
|
|
},
|
|
{
|
|
"epoch": 2.697465055674011,
|
|
"grad_norm": 0.07570074708905208,
|
|
"learning_rate": 0.0018293766216868842,
|
|
"loss": 2.758,
|
|
"step": 8540
|
|
},
|
|
{
|
|
"epoch": 2.69904446023849,
|
|
"grad_norm": 0.08026041820741821,
|
|
"learning_rate": 0.0018290684078105177,
|
|
"loss": 2.5792,
|
|
"step": 8545
|
|
},
|
|
{
|
|
"epoch": 2.700623864802969,
|
|
"grad_norm": 0.07746096583186972,
|
|
"learning_rate": 0.0018287599418197456,
|
|
"loss": 2.6286,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 2.7022032693674483,
|
|
"grad_norm": 0.07472450813156946,
|
|
"learning_rate": 0.0018284512238083703,
|
|
"loss": 2.7215,
|
|
"step": 8555
|
|
},
|
|
{
|
|
"epoch": 2.7037826739319275,
|
|
"grad_norm": 0.07941333908553108,
|
|
"learning_rate": 0.0018281422538702708,
|
|
"loss": 2.6878,
|
|
"step": 8560
|
|
},
|
|
{
|
|
"epoch": 2.7053620784964068,
|
|
"grad_norm": 0.07397615560057773,
|
|
"learning_rate": 0.0018278330320994033,
|
|
"loss": 2.6282,
|
|
"step": 8565
|
|
},
|
|
{
|
|
"epoch": 2.706941483060886,
|
|
"grad_norm": 0.08761091382284485,
|
|
"learning_rate": 0.0018275235585897996,
|
|
"loss": 2.767,
|
|
"step": 8570
|
|
},
|
|
{
|
|
"epoch": 2.7085208876253652,
|
|
"grad_norm": 0.07762415547466953,
|
|
"learning_rate": 0.0018272138334355689,
|
|
"loss": 2.6113,
|
|
"step": 8575
|
|
},
|
|
{
|
|
"epoch": 2.7101002921898445,
|
|
"grad_norm": 0.06892300386844659,
|
|
"learning_rate": 0.0018269038567308967,
|
|
"loss": 2.6718,
|
|
"step": 8580
|
|
},
|
|
{
|
|
"epoch": 2.7116796967543237,
|
|
"grad_norm": 0.08692744814530534,
|
|
"learning_rate": 0.001826593628570045,
|
|
"loss": 2.748,
|
|
"step": 8585
|
|
},
|
|
{
|
|
"epoch": 2.713259101318803,
|
|
"grad_norm": 0.06708209311079495,
|
|
"learning_rate": 0.001826283149047352,
|
|
"loss": 2.5921,
|
|
"step": 8590
|
|
},
|
|
{
|
|
"epoch": 2.7148385058832822,
|
|
"grad_norm": 0.0638740190394845,
|
|
"learning_rate": 0.001825972418257233,
|
|
"loss": 2.6677,
|
|
"step": 8595
|
|
},
|
|
{
|
|
"epoch": 2.716417910447761,
|
|
"grad_norm": 0.07573045477360121,
|
|
"learning_rate": 0.0018256614362941786,
|
|
"loss": 2.7767,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 2.7179973150122403,
|
|
"grad_norm": 0.07837415095835595,
|
|
"learning_rate": 0.0018253502032527567,
|
|
"loss": 2.6454,
|
|
"step": 8605
|
|
},
|
|
{
|
|
"epoch": 2.7195767195767195,
|
|
"grad_norm": 0.07259376544303658,
|
|
"learning_rate": 0.0018250387192276115,
|
|
"loss": 2.6446,
|
|
"step": 8610
|
|
},
|
|
{
|
|
"epoch": 2.721156124141199,
|
|
"grad_norm": 0.09368952879952992,
|
|
"learning_rate": 0.0018247269843134628,
|
|
"loss": 2.6191,
|
|
"step": 8615
|
|
},
|
|
{
|
|
"epoch": 2.722735528705678,
|
|
"grad_norm": 0.08013335502408653,
|
|
"learning_rate": 0.0018244149986051076,
|
|
"loss": 2.614,
|
|
"step": 8620
|
|
},
|
|
{
|
|
"epoch": 2.7243149332701573,
|
|
"grad_norm": 0.07343312294788908,
|
|
"learning_rate": 0.0018241027621974189,
|
|
"loss": 2.6262,
|
|
"step": 8625
|
|
},
|
|
{
|
|
"epoch": 2.725894337834636,
|
|
"grad_norm": 0.08713602436595895,
|
|
"learning_rate": 0.0018237902751853453,
|
|
"loss": 2.6984,
|
|
"step": 8630
|
|
},
|
|
{
|
|
"epoch": 2.7274737423991153,
|
|
"grad_norm": 0.07572530467226961,
|
|
"learning_rate": 0.0018234775376639125,
|
|
"loss": 2.5924,
|
|
"step": 8635
|
|
},
|
|
{
|
|
"epoch": 2.7290531469635946,
|
|
"grad_norm": 0.0651715649995805,
|
|
"learning_rate": 0.0018231645497282217,
|
|
"loss": 2.6392,
|
|
"step": 8640
|
|
},
|
|
{
|
|
"epoch": 2.730632551528074,
|
|
"grad_norm": 0.07046431160450398,
|
|
"learning_rate": 0.0018228513114734507,
|
|
"loss": 2.6397,
|
|
"step": 8645
|
|
},
|
|
{
|
|
"epoch": 2.732211956092553,
|
|
"grad_norm": 0.07222286406847223,
|
|
"learning_rate": 0.0018225378229948532,
|
|
"loss": 2.687,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 2.7337913606570323,
|
|
"grad_norm": 0.06309111384469673,
|
|
"learning_rate": 0.0018222240843877593,
|
|
"loss": 2.5812,
|
|
"step": 8655
|
|
},
|
|
{
|
|
"epoch": 2.7353707652215116,
|
|
"grad_norm": 0.07938025900094192,
|
|
"learning_rate": 0.0018219100957475745,
|
|
"loss": 2.5991,
|
|
"step": 8660
|
|
},
|
|
{
|
|
"epoch": 2.736950169785991,
|
|
"grad_norm": 0.07870940461239452,
|
|
"learning_rate": 0.0018215958571697808,
|
|
"loss": 2.6641,
|
|
"step": 8665
|
|
},
|
|
{
|
|
"epoch": 2.73852957435047,
|
|
"grad_norm": 0.06813435830119051,
|
|
"learning_rate": 0.0018212813687499363,
|
|
"loss": 2.7464,
|
|
"step": 8670
|
|
},
|
|
{
|
|
"epoch": 2.7401089789149493,
|
|
"grad_norm": 0.07370358186967162,
|
|
"learning_rate": 0.001820966630583675,
|
|
"loss": 2.5753,
|
|
"step": 8675
|
|
},
|
|
{
|
|
"epoch": 2.741688383479428,
|
|
"grad_norm": 0.09256574504652948,
|
|
"learning_rate": 0.0018206516427667068,
|
|
"loss": 2.7527,
|
|
"step": 8680
|
|
},
|
|
{
|
|
"epoch": 2.7432677880439074,
|
|
"grad_norm": 0.09791825478747279,
|
|
"learning_rate": 0.001820336405394817,
|
|
"loss": 2.6537,
|
|
"step": 8685
|
|
},
|
|
{
|
|
"epoch": 2.7448471926083866,
|
|
"grad_norm": 0.0711977245700122,
|
|
"learning_rate": 0.0018200209185638676,
|
|
"loss": 2.7593,
|
|
"step": 8690
|
|
},
|
|
{
|
|
"epoch": 2.746426597172866,
|
|
"grad_norm": 0.06319159971095922,
|
|
"learning_rate": 0.0018197051823697964,
|
|
"loss": 2.6357,
|
|
"step": 8695
|
|
},
|
|
{
|
|
"epoch": 2.748006001737345,
|
|
"grad_norm": 0.07053879924636965,
|
|
"learning_rate": 0.0018193891969086162,
|
|
"loss": 2.7059,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 2.7495854063018244,
|
|
"grad_norm": 0.0728205579126769,
|
|
"learning_rate": 0.0018190729622764167,
|
|
"loss": 2.6518,
|
|
"step": 8705
|
|
},
|
|
{
|
|
"epoch": 2.751164810866303,
|
|
"grad_norm": 0.057084351505856216,
|
|
"learning_rate": 0.0018187564785693625,
|
|
"loss": 2.5939,
|
|
"step": 8710
|
|
},
|
|
{
|
|
"epoch": 2.7527442154307824,
|
|
"grad_norm": 0.06874940758099113,
|
|
"learning_rate": 0.001818439745883694,
|
|
"loss": 2.6319,
|
|
"step": 8715
|
|
},
|
|
{
|
|
"epoch": 2.7543236199952617,
|
|
"grad_norm": 0.07274610037668357,
|
|
"learning_rate": 0.0018181227643157283,
|
|
"loss": 2.5699,
|
|
"step": 8720
|
|
},
|
|
{
|
|
"epoch": 2.755903024559741,
|
|
"grad_norm": 0.06424607912088136,
|
|
"learning_rate": 0.001817805533961857,
|
|
"loss": 2.6361,
|
|
"step": 8725
|
|
},
|
|
{
|
|
"epoch": 2.75748242912422,
|
|
"grad_norm": 0.07664431543647424,
|
|
"learning_rate": 0.001817488054918548,
|
|
"loss": 2.6116,
|
|
"step": 8730
|
|
},
|
|
{
|
|
"epoch": 2.7590618336886994,
|
|
"grad_norm": 0.07680904439386883,
|
|
"learning_rate": 0.0018171703272823444,
|
|
"loss": 2.6897,
|
|
"step": 8735
|
|
},
|
|
{
|
|
"epoch": 2.7606412382531786,
|
|
"grad_norm": 0.08070368351555539,
|
|
"learning_rate": 0.0018168523511498656,
|
|
"loss": 2.5728,
|
|
"step": 8740
|
|
},
|
|
{
|
|
"epoch": 2.762220642817658,
|
|
"grad_norm": 0.10305902984751368,
|
|
"learning_rate": 0.0018165341266178055,
|
|
"loss": 2.6039,
|
|
"step": 8745
|
|
},
|
|
{
|
|
"epoch": 2.763800047382137,
|
|
"grad_norm": 0.08180323592745962,
|
|
"learning_rate": 0.0018162156537829346,
|
|
"loss": 2.6779,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 2.7653794519466164,
|
|
"grad_norm": 0.08138935595180714,
|
|
"learning_rate": 0.0018158969327420984,
|
|
"loss": 2.7306,
|
|
"step": 8755
|
|
},
|
|
{
|
|
"epoch": 2.766958856511095,
|
|
"grad_norm": 0.07257660977943017,
|
|
"learning_rate": 0.0018155779635922178,
|
|
"loss": 2.642,
|
|
"step": 8760
|
|
},
|
|
{
|
|
"epoch": 2.7685382610755744,
|
|
"grad_norm": 0.07180881891544696,
|
|
"learning_rate": 0.0018152587464302897,
|
|
"loss": 2.7033,
|
|
"step": 8765
|
|
},
|
|
{
|
|
"epoch": 2.7701176656400537,
|
|
"grad_norm": 0.06433718516880173,
|
|
"learning_rate": 0.0018149392813533853,
|
|
"loss": 2.6953,
|
|
"step": 8770
|
|
},
|
|
{
|
|
"epoch": 2.771697070204533,
|
|
"grad_norm": 0.07729242651671715,
|
|
"learning_rate": 0.001814619568458652,
|
|
"loss": 2.7412,
|
|
"step": 8775
|
|
},
|
|
{
|
|
"epoch": 2.773276474769012,
|
|
"grad_norm": 0.07921295884974441,
|
|
"learning_rate": 0.0018142996078433131,
|
|
"loss": 2.6721,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"epoch": 2.7748558793334914,
|
|
"grad_norm": 0.07152778542092869,
|
|
"learning_rate": 0.001813979399604666,
|
|
"loss": 2.7206,
|
|
"step": 8785
|
|
},
|
|
{
|
|
"epoch": 2.7764352838979702,
|
|
"grad_norm": 0.0733619109588905,
|
|
"learning_rate": 0.001813658943840084,
|
|
"loss": 2.6843,
|
|
"step": 8790
|
|
},
|
|
{
|
|
"epoch": 2.7780146884624495,
|
|
"grad_norm": 0.07316169248525446,
|
|
"learning_rate": 0.001813338240647016,
|
|
"loss": 2.7021,
|
|
"step": 8795
|
|
},
|
|
{
|
|
"epoch": 2.7795940930269287,
|
|
"grad_norm": 0.08060596316716877,
|
|
"learning_rate": 0.0018130172901229856,
|
|
"loss": 2.7007,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 2.781173497591408,
|
|
"grad_norm": 0.11422921262794959,
|
|
"learning_rate": 0.0018126960923655914,
|
|
"loss": 2.6103,
|
|
"step": 8805
|
|
},
|
|
{
|
|
"epoch": 2.782752902155887,
|
|
"grad_norm": 0.06882021473600086,
|
|
"learning_rate": 0.0018123746474725084,
|
|
"loss": 2.6624,
|
|
"step": 8810
|
|
},
|
|
{
|
|
"epoch": 2.7843323067203665,
|
|
"grad_norm": 0.08117965393169584,
|
|
"learning_rate": 0.0018120529555414855,
|
|
"loss": 2.6246,
|
|
"step": 8815
|
|
},
|
|
{
|
|
"epoch": 2.7859117112848457,
|
|
"grad_norm": 0.06802721481144403,
|
|
"learning_rate": 0.001811731016670347,
|
|
"loss": 2.7133,
|
|
"step": 8820
|
|
},
|
|
{
|
|
"epoch": 2.787491115849325,
|
|
"grad_norm": 0.0712690638293664,
|
|
"learning_rate": 0.0018114088309569927,
|
|
"loss": 2.6747,
|
|
"step": 8825
|
|
},
|
|
{
|
|
"epoch": 2.789070520413804,
|
|
"grad_norm": 0.09285584313170489,
|
|
"learning_rate": 0.001811086398499397,
|
|
"loss": 2.6563,
|
|
"step": 8830
|
|
},
|
|
{
|
|
"epoch": 2.7906499249782835,
|
|
"grad_norm": 0.07566449189012191,
|
|
"learning_rate": 0.0018107637193956099,
|
|
"loss": 2.6572,
|
|
"step": 8835
|
|
},
|
|
{
|
|
"epoch": 2.7922293295427623,
|
|
"grad_norm": 0.07798799454761576,
|
|
"learning_rate": 0.0018104407937437558,
|
|
"loss": 2.7,
|
|
"step": 8840
|
|
},
|
|
{
|
|
"epoch": 2.7938087341072415,
|
|
"grad_norm": 0.07913274316285812,
|
|
"learning_rate": 0.0018101176216420343,
|
|
"loss": 2.7162,
|
|
"step": 8845
|
|
},
|
|
{
|
|
"epoch": 2.7953881386717208,
|
|
"grad_norm": 0.08250947208597523,
|
|
"learning_rate": 0.0018097942031887197,
|
|
"loss": 2.5854,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 2.7969675432362,
|
|
"grad_norm": 0.0725336504766197,
|
|
"learning_rate": 0.0018094705384821626,
|
|
"loss": 2.6304,
|
|
"step": 8855
|
|
},
|
|
{
|
|
"epoch": 2.7985469478006793,
|
|
"grad_norm": 0.07030194627786095,
|
|
"learning_rate": 0.0018091466276207863,
|
|
"loss": 2.6804,
|
|
"step": 8860
|
|
},
|
|
{
|
|
"epoch": 2.8001263523651585,
|
|
"grad_norm": 0.07505722120062171,
|
|
"learning_rate": 0.00180882247070309,
|
|
"loss": 2.626,
|
|
"step": 8865
|
|
},
|
|
{
|
|
"epoch": 2.8017057569296373,
|
|
"grad_norm": 0.06820303064495715,
|
|
"learning_rate": 0.0018084980678276482,
|
|
"loss": 2.7694,
|
|
"step": 8870
|
|
},
|
|
{
|
|
"epoch": 2.8032851614941166,
|
|
"grad_norm": 0.056961786091470915,
|
|
"learning_rate": 0.0018081734190931096,
|
|
"loss": 2.5769,
|
|
"step": 8875
|
|
},
|
|
{
|
|
"epoch": 2.804864566058596,
|
|
"grad_norm": 0.06909140931913867,
|
|
"learning_rate": 0.001807848524598198,
|
|
"loss": 2.6962,
|
|
"step": 8880
|
|
},
|
|
{
|
|
"epoch": 2.806443970623075,
|
|
"grad_norm": 0.0648444583829478,
|
|
"learning_rate": 0.0018075233844417117,
|
|
"loss": 2.7129,
|
|
"step": 8885
|
|
},
|
|
{
|
|
"epoch": 2.8080233751875543,
|
|
"grad_norm": 0.07674247498206403,
|
|
"learning_rate": 0.001807197998722523,
|
|
"loss": 2.6899,
|
|
"step": 8890
|
|
},
|
|
{
|
|
"epoch": 2.8096027797520335,
|
|
"grad_norm": 0.08273112738453625,
|
|
"learning_rate": 0.0018068723675395807,
|
|
"loss": 2.6684,
|
|
"step": 8895
|
|
},
|
|
{
|
|
"epoch": 2.811182184316513,
|
|
"grad_norm": 0.08260356529408745,
|
|
"learning_rate": 0.0018065464909919067,
|
|
"loss": 2.692,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 2.812761588880992,
|
|
"grad_norm": 0.07402764354032675,
|
|
"learning_rate": 0.0018062203691785977,
|
|
"loss": 2.6813,
|
|
"step": 8905
|
|
},
|
|
{
|
|
"epoch": 2.8143409934454713,
|
|
"grad_norm": 0.07010201292401957,
|
|
"learning_rate": 0.0018058940021988257,
|
|
"loss": 2.6113,
|
|
"step": 8910
|
|
},
|
|
{
|
|
"epoch": 2.81592039800995,
|
|
"grad_norm": 0.06834416411554649,
|
|
"learning_rate": 0.0018055673901518365,
|
|
"loss": 2.6313,
|
|
"step": 8915
|
|
},
|
|
{
|
|
"epoch": 2.8174998025744293,
|
|
"grad_norm": 0.0805976545965805,
|
|
"learning_rate": 0.001805240533136951,
|
|
"loss": 2.5651,
|
|
"step": 8920
|
|
},
|
|
{
|
|
"epoch": 2.8190792071389086,
|
|
"grad_norm": 0.08638320793892658,
|
|
"learning_rate": 0.001804913431253564,
|
|
"loss": 2.5595,
|
|
"step": 8925
|
|
},
|
|
{
|
|
"epoch": 2.820658611703388,
|
|
"grad_norm": 0.08947587288989334,
|
|
"learning_rate": 0.0018045860846011455,
|
|
"loss": 2.7886,
|
|
"step": 8930
|
|
},
|
|
{
|
|
"epoch": 2.822238016267867,
|
|
"grad_norm": 0.07867181893362114,
|
|
"learning_rate": 0.0018042584932792393,
|
|
"loss": 2.6616,
|
|
"step": 8935
|
|
},
|
|
{
|
|
"epoch": 2.8238174208323463,
|
|
"grad_norm": 0.08242063971099742,
|
|
"learning_rate": 0.001803930657387464,
|
|
"loss": 2.6616,
|
|
"step": 8940
|
|
},
|
|
{
|
|
"epoch": 2.825396825396825,
|
|
"grad_norm": 0.06935681434474747,
|
|
"learning_rate": 0.0018036025770255119,
|
|
"loss": 2.6493,
|
|
"step": 8945
|
|
},
|
|
{
|
|
"epoch": 2.8269762299613044,
|
|
"grad_norm": 0.07899255108632972,
|
|
"learning_rate": 0.0018032742522931505,
|
|
"loss": 2.704,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 2.8285556345257836,
|
|
"grad_norm": 0.08103020399055681,
|
|
"learning_rate": 0.0018029456832902213,
|
|
"loss": 2.5642,
|
|
"step": 8955
|
|
},
|
|
{
|
|
"epoch": 2.830135039090263,
|
|
"grad_norm": 0.06181469755310813,
|
|
"learning_rate": 0.0018026168701166401,
|
|
"loss": 2.6621,
|
|
"step": 8960
|
|
},
|
|
{
|
|
"epoch": 2.831714443654742,
|
|
"grad_norm": 0.06706026030234262,
|
|
"learning_rate": 0.0018022878128723966,
|
|
"loss": 2.5971,
|
|
"step": 8965
|
|
},
|
|
{
|
|
"epoch": 2.8332938482192214,
|
|
"grad_norm": 0.06883385907004572,
|
|
"learning_rate": 0.0018019585116575555,
|
|
"loss": 2.5211,
|
|
"step": 8970
|
|
},
|
|
{
|
|
"epoch": 2.8348732527837006,
|
|
"grad_norm": 0.10706360603552771,
|
|
"learning_rate": 0.0018016289665722543,
|
|
"loss": 2.6044,
|
|
"step": 8975
|
|
},
|
|
{
|
|
"epoch": 2.83645265734818,
|
|
"grad_norm": 0.08541241786364799,
|
|
"learning_rate": 0.0018012991777167065,
|
|
"loss": 2.6751,
|
|
"step": 8980
|
|
},
|
|
{
|
|
"epoch": 2.838032061912659,
|
|
"grad_norm": 0.08674184408950136,
|
|
"learning_rate": 0.0018009691451911984,
|
|
"loss": 2.5867,
|
|
"step": 8985
|
|
},
|
|
{
|
|
"epoch": 2.8396114664771384,
|
|
"grad_norm": 0.07121169031771728,
|
|
"learning_rate": 0.001800638869096091,
|
|
"loss": 2.6856,
|
|
"step": 8990
|
|
},
|
|
{
|
|
"epoch": 2.841190871041617,
|
|
"grad_norm": 0.08691352314313025,
|
|
"learning_rate": 0.0018003083495318184,
|
|
"loss": 2.6154,
|
|
"step": 8995
|
|
},
|
|
{
|
|
"epoch": 2.8427702756060964,
|
|
"grad_norm": 0.0695792691516324,
|
|
"learning_rate": 0.00179997758659889,
|
|
"loss": 2.6422,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 2.8443496801705757,
|
|
"grad_norm": 0.06266454082852482,
|
|
"learning_rate": 0.0017996465803978893,
|
|
"loss": 2.5924,
|
|
"step": 9005
|
|
},
|
|
{
|
|
"epoch": 2.845929084735055,
|
|
"grad_norm": 0.06347254201154383,
|
|
"learning_rate": 0.0017993153310294722,
|
|
"loss": 2.6595,
|
|
"step": 9010
|
|
},
|
|
{
|
|
"epoch": 2.847508489299534,
|
|
"grad_norm": 0.07694282442243464,
|
|
"learning_rate": 0.0017989838385943698,
|
|
"loss": 2.6099,
|
|
"step": 9015
|
|
},
|
|
{
|
|
"epoch": 2.8490878938640134,
|
|
"grad_norm": 0.0721336693151254,
|
|
"learning_rate": 0.0017986521031933874,
|
|
"loss": 2.5332,
|
|
"step": 9020
|
|
},
|
|
{
|
|
"epoch": 2.850667298428492,
|
|
"grad_norm": 0.08170449455876193,
|
|
"learning_rate": 0.0017983201249274026,
|
|
"loss": 2.6325,
|
|
"step": 9025
|
|
},
|
|
{
|
|
"epoch": 2.8522467029929714,
|
|
"grad_norm": 0.08908686952938166,
|
|
"learning_rate": 0.0017979879038973687,
|
|
"loss": 2.7075,
|
|
"step": 9030
|
|
},
|
|
{
|
|
"epoch": 2.8538261075574507,
|
|
"grad_norm": 0.06949885801379575,
|
|
"learning_rate": 0.0017976554402043116,
|
|
"loss": 2.7515,
|
|
"step": 9035
|
|
},
|
|
{
|
|
"epoch": 2.85540551212193,
|
|
"grad_norm": 0.07438198238236672,
|
|
"learning_rate": 0.0017973227339493317,
|
|
"loss": 2.6265,
|
|
"step": 9040
|
|
},
|
|
{
|
|
"epoch": 2.856984916686409,
|
|
"grad_norm": 0.07955118978605713,
|
|
"learning_rate": 0.0017969897852336027,
|
|
"loss": 2.6064,
|
|
"step": 9045
|
|
},
|
|
{
|
|
"epoch": 2.8585643212508884,
|
|
"grad_norm": 0.07190618121590747,
|
|
"learning_rate": 0.0017966565941583716,
|
|
"loss": 2.6345,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 2.8601437258153677,
|
|
"grad_norm": 0.08231651950371446,
|
|
"learning_rate": 0.0017963231608249606,
|
|
"loss": 2.5874,
|
|
"step": 9055
|
|
},
|
|
{
|
|
"epoch": 2.861723130379847,
|
|
"grad_norm": 0.06309285425461614,
|
|
"learning_rate": 0.0017959894853347641,
|
|
"loss": 2.7133,
|
|
"step": 9060
|
|
},
|
|
{
|
|
"epoch": 2.863302534944326,
|
|
"grad_norm": 0.07904453688390618,
|
|
"learning_rate": 0.001795655567789251,
|
|
"loss": 2.6427,
|
|
"step": 9065
|
|
},
|
|
{
|
|
"epoch": 2.8648819395088054,
|
|
"grad_norm": 0.07721599679986656,
|
|
"learning_rate": 0.0017953214082899631,
|
|
"loss": 2.6347,
|
|
"step": 9070
|
|
},
|
|
{
|
|
"epoch": 2.8664613440732842,
|
|
"grad_norm": 0.06904436553479418,
|
|
"learning_rate": 0.0017949870069385167,
|
|
"loss": 2.5924,
|
|
"step": 9075
|
|
},
|
|
{
|
|
"epoch": 2.8680407486377635,
|
|
"grad_norm": 0.08396225233471166,
|
|
"learning_rate": 0.0017946523638366005,
|
|
"loss": 2.655,
|
|
"step": 9080
|
|
},
|
|
{
|
|
"epoch": 2.8696201532022427,
|
|
"grad_norm": 0.07898738662235477,
|
|
"learning_rate": 0.0017943174790859778,
|
|
"loss": 2.6799,
|
|
"step": 9085
|
|
},
|
|
{
|
|
"epoch": 2.871199557766722,
|
|
"grad_norm": 0.08239123848848463,
|
|
"learning_rate": 0.0017939823527884844,
|
|
"loss": 2.5772,
|
|
"step": 9090
|
|
},
|
|
{
|
|
"epoch": 2.8727789623312012,
|
|
"grad_norm": 0.07204567013237982,
|
|
"learning_rate": 0.001793646985046031,
|
|
"loss": 2.6602,
|
|
"step": 9095
|
|
},
|
|
{
|
|
"epoch": 2.8743583668956805,
|
|
"grad_norm": 0.0704621218942999,
|
|
"learning_rate": 0.0017933113759605996,
|
|
"loss": 2.781,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 2.8759377714601593,
|
|
"grad_norm": 0.07728139646563548,
|
|
"learning_rate": 0.001792975525634248,
|
|
"loss": 2.6204,
|
|
"step": 9105
|
|
},
|
|
{
|
|
"epoch": 2.8775171760246385,
|
|
"grad_norm": 0.07943593620001566,
|
|
"learning_rate": 0.001792639434169105,
|
|
"loss": 2.5765,
|
|
"step": 9110
|
|
},
|
|
{
|
|
"epoch": 2.8790965805891178,
|
|
"grad_norm": 0.09213001247403269,
|
|
"learning_rate": 0.0017923031016673745,
|
|
"loss": 2.5809,
|
|
"step": 9115
|
|
},
|
|
{
|
|
"epoch": 2.880675985153597,
|
|
"grad_norm": 0.08413854923337333,
|
|
"learning_rate": 0.0017919665282313333,
|
|
"loss": 2.5438,
|
|
"step": 9120
|
|
},
|
|
{
|
|
"epoch": 2.8822553897180763,
|
|
"grad_norm": 0.07453017380711009,
|
|
"learning_rate": 0.0017916297139633304,
|
|
"loss": 2.6411,
|
|
"step": 9125
|
|
},
|
|
{
|
|
"epoch": 2.8838347942825555,
|
|
"grad_norm": 0.07573358551384778,
|
|
"learning_rate": 0.0017912926589657896,
|
|
"loss": 2.6865,
|
|
"step": 9130
|
|
},
|
|
{
|
|
"epoch": 2.8854141988470348,
|
|
"grad_norm": 0.08215839866502005,
|
|
"learning_rate": 0.0017909553633412068,
|
|
"loss": 2.6374,
|
|
"step": 9135
|
|
},
|
|
{
|
|
"epoch": 2.886993603411514,
|
|
"grad_norm": 0.06722700205323821,
|
|
"learning_rate": 0.0017906178271921518,
|
|
"loss": 2.6867,
|
|
"step": 9140
|
|
},
|
|
{
|
|
"epoch": 2.8885730079759933,
|
|
"grad_norm": 0.08422569175308589,
|
|
"learning_rate": 0.0017902800506212667,
|
|
"loss": 2.6698,
|
|
"step": 9145
|
|
},
|
|
{
|
|
"epoch": 2.8901524125404725,
|
|
"grad_norm": 0.07714249389406545,
|
|
"learning_rate": 0.0017899420337312674,
|
|
"loss": 2.6391,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 2.8917318171049513,
|
|
"grad_norm": 0.07899679303828389,
|
|
"learning_rate": 0.0017896037766249428,
|
|
"loss": 2.6281,
|
|
"step": 9155
|
|
},
|
|
{
|
|
"epoch": 2.8933112216694306,
|
|
"grad_norm": 0.065691455655413,
|
|
"learning_rate": 0.0017892652794051548,
|
|
"loss": 2.5724,
|
|
"step": 9160
|
|
},
|
|
{
|
|
"epoch": 2.89489062623391,
|
|
"grad_norm": 0.07336744027738036,
|
|
"learning_rate": 0.001788926542174838,
|
|
"loss": 2.6424,
|
|
"step": 9165
|
|
},
|
|
{
|
|
"epoch": 2.896470030798389,
|
|
"grad_norm": 0.0744732310866987,
|
|
"learning_rate": 0.0017885875650370002,
|
|
"loss": 2.6277,
|
|
"step": 9170
|
|
},
|
|
{
|
|
"epoch": 2.8980494353628683,
|
|
"grad_norm": 0.06972848194547832,
|
|
"learning_rate": 0.0017882483480947224,
|
|
"loss": 2.5853,
|
|
"step": 9175
|
|
},
|
|
{
|
|
"epoch": 2.8996288399273475,
|
|
"grad_norm": 0.08866138334123327,
|
|
"learning_rate": 0.001787908891451158,
|
|
"loss": 2.631,
|
|
"step": 9180
|
|
},
|
|
{
|
|
"epoch": 2.9012082444918263,
|
|
"grad_norm": 0.06284868660280536,
|
|
"learning_rate": 0.0017875691952095342,
|
|
"loss": 2.5575,
|
|
"step": 9185
|
|
},
|
|
{
|
|
"epoch": 2.9027876490563056,
|
|
"grad_norm": 0.06891932325044023,
|
|
"learning_rate": 0.0017872292594731498,
|
|
"loss": 2.6453,
|
|
"step": 9190
|
|
},
|
|
{
|
|
"epoch": 2.904367053620785,
|
|
"grad_norm": 0.09066742526387073,
|
|
"learning_rate": 0.0017868890843453773,
|
|
"loss": 2.6731,
|
|
"step": 9195
|
|
},
|
|
{
|
|
"epoch": 2.905946458185264,
|
|
"grad_norm": 0.06442962526556177,
|
|
"learning_rate": 0.0017865486699296623,
|
|
"loss": 2.5985,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 2.9075258627497433,
|
|
"grad_norm": 0.08971001588641436,
|
|
"learning_rate": 0.0017862080163295216,
|
|
"loss": 2.6238,
|
|
"step": 9205
|
|
},
|
|
{
|
|
"epoch": 2.9091052673142226,
|
|
"grad_norm": 0.08715457828992919,
|
|
"learning_rate": 0.0017858671236485467,
|
|
"loss": 2.6166,
|
|
"step": 9210
|
|
},
|
|
{
|
|
"epoch": 2.910684671878702,
|
|
"grad_norm": 0.07841888929423384,
|
|
"learning_rate": 0.0017855259919904002,
|
|
"loss": 2.6162,
|
|
"step": 9215
|
|
},
|
|
{
|
|
"epoch": 2.912264076443181,
|
|
"grad_norm": 0.09556274750492062,
|
|
"learning_rate": 0.0017851846214588189,
|
|
"loss": 2.6934,
|
|
"step": 9220
|
|
},
|
|
{
|
|
"epoch": 2.9138434810076603,
|
|
"grad_norm": 0.08687540070929045,
|
|
"learning_rate": 0.0017848430121576101,
|
|
"loss": 2.6846,
|
|
"step": 9225
|
|
},
|
|
{
|
|
"epoch": 2.9154228855721396,
|
|
"grad_norm": 0.0794664687643129,
|
|
"learning_rate": 0.0017845011641906563,
|
|
"loss": 2.7429,
|
|
"step": 9230
|
|
},
|
|
{
|
|
"epoch": 2.9170022901366184,
|
|
"grad_norm": 0.0564704948366244,
|
|
"learning_rate": 0.001784159077661911,
|
|
"loss": 2.6262,
|
|
"step": 9235
|
|
},
|
|
{
|
|
"epoch": 2.9185816947010976,
|
|
"grad_norm": 0.06430940252174853,
|
|
"learning_rate": 0.0017838167526754,
|
|
"loss": 2.694,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 2.920161099265577,
|
|
"grad_norm": 0.0739805659056966,
|
|
"learning_rate": 0.0017834741893352226,
|
|
"loss": 2.7582,
|
|
"step": 9245
|
|
},
|
|
{
|
|
"epoch": 2.921740503830056,
|
|
"grad_norm": 0.07103193701377211,
|
|
"learning_rate": 0.00178313138774555,
|
|
"loss": 2.651,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 2.9233199083945354,
|
|
"grad_norm": 0.07525436923618634,
|
|
"learning_rate": 0.0017827883480106257,
|
|
"loss": 2.8208,
|
|
"step": 9255
|
|
},
|
|
{
|
|
"epoch": 2.924899312959014,
|
|
"grad_norm": 0.06882539320170787,
|
|
"learning_rate": 0.0017824450702347663,
|
|
"loss": 2.6875,
|
|
"step": 9260
|
|
},
|
|
{
|
|
"epoch": 2.9264787175234934,
|
|
"grad_norm": 0.0605989026210221,
|
|
"learning_rate": 0.0017821015545223604,
|
|
"loss": 2.7123,
|
|
"step": 9265
|
|
},
|
|
{
|
|
"epoch": 2.9280581220879727,
|
|
"grad_norm": 0.06663742358876884,
|
|
"learning_rate": 0.0017817578009778686,
|
|
"loss": 2.6781,
|
|
"step": 9270
|
|
},
|
|
{
|
|
"epoch": 2.929637526652452,
|
|
"grad_norm": 0.09360076561750917,
|
|
"learning_rate": 0.0017814138097058244,
|
|
"loss": 2.6964,
|
|
"step": 9275
|
|
},
|
|
{
|
|
"epoch": 2.931216931216931,
|
|
"grad_norm": 0.07721356789273802,
|
|
"learning_rate": 0.001781069580810833,
|
|
"loss": 2.7239,
|
|
"step": 9280
|
|
},
|
|
{
|
|
"epoch": 2.9327963357814104,
|
|
"grad_norm": 0.0791584728486138,
|
|
"learning_rate": 0.0017807251143975727,
|
|
"loss": 2.6406,
|
|
"step": 9285
|
|
},
|
|
{
|
|
"epoch": 2.9343757403458897,
|
|
"grad_norm": 0.08301309547051534,
|
|
"learning_rate": 0.0017803804105707933,
|
|
"loss": 2.6048,
|
|
"step": 9290
|
|
},
|
|
{
|
|
"epoch": 2.935955144910369,
|
|
"grad_norm": 0.08859445122320836,
|
|
"learning_rate": 0.0017800354694353167,
|
|
"loss": 2.5487,
|
|
"step": 9295
|
|
},
|
|
{
|
|
"epoch": 2.937534549474848,
|
|
"grad_norm": 0.0708367658010825,
|
|
"learning_rate": 0.001779690291096038,
|
|
"loss": 2.7331,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 2.9391139540393274,
|
|
"grad_norm": 0.08041620724686906,
|
|
"learning_rate": 0.001779344875657923,
|
|
"loss": 2.7053,
|
|
"step": 9305
|
|
},
|
|
{
|
|
"epoch": 2.940693358603806,
|
|
"grad_norm": 0.07920350812926849,
|
|
"learning_rate": 0.0017789992232260113,
|
|
"loss": 2.5801,
|
|
"step": 9310
|
|
},
|
|
{
|
|
"epoch": 2.9422727631682855,
|
|
"grad_norm": 0.0679019390245231,
|
|
"learning_rate": 0.0017786533339054125,
|
|
"loss": 2.637,
|
|
"step": 9315
|
|
},
|
|
{
|
|
"epoch": 2.9438521677327647,
|
|
"grad_norm": 0.08024512137613557,
|
|
"learning_rate": 0.00177830720780131,
|
|
"loss": 2.717,
|
|
"step": 9320
|
|
},
|
|
{
|
|
"epoch": 2.945431572297244,
|
|
"grad_norm": 0.06745817346604058,
|
|
"learning_rate": 0.001777960845018958,
|
|
"loss": 2.6298,
|
|
"step": 9325
|
|
},
|
|
{
|
|
"epoch": 2.947010976861723,
|
|
"grad_norm": 0.07363382024958783,
|
|
"learning_rate": 0.0017776142456636843,
|
|
"loss": 2.7999,
|
|
"step": 9330
|
|
},
|
|
{
|
|
"epoch": 2.9485903814262024,
|
|
"grad_norm": 0.08057562652277658,
|
|
"learning_rate": 0.0017772674098408864,
|
|
"loss": 2.6378,
|
|
"step": 9335
|
|
},
|
|
{
|
|
"epoch": 2.9501697859906812,
|
|
"grad_norm": 0.0824127445696666,
|
|
"learning_rate": 0.0017769203376560353,
|
|
"loss": 2.5429,
|
|
"step": 9340
|
|
},
|
|
{
|
|
"epoch": 2.9517491905551605,
|
|
"grad_norm": 0.0678009430109042,
|
|
"learning_rate": 0.0017765730292146728,
|
|
"loss": 2.6524,
|
|
"step": 9345
|
|
},
|
|
{
|
|
"epoch": 2.9533285951196397,
|
|
"grad_norm": 0.06532565401449761,
|
|
"learning_rate": 0.0017762254846224144,
|
|
"loss": 2.5821,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 2.954907999684119,
|
|
"grad_norm": 0.10240606684967388,
|
|
"learning_rate": 0.0017758777039849456,
|
|
"loss": 2.6531,
|
|
"step": 9355
|
|
},
|
|
{
|
|
"epoch": 2.9564874042485982,
|
|
"grad_norm": 0.06945713608409096,
|
|
"learning_rate": 0.001775529687408024,
|
|
"loss": 2.5384,
|
|
"step": 9360
|
|
},
|
|
{
|
|
"epoch": 2.9580668088130775,
|
|
"grad_norm": 0.06926524796204718,
|
|
"learning_rate": 0.0017751814349974797,
|
|
"loss": 2.6369,
|
|
"step": 9365
|
|
},
|
|
{
|
|
"epoch": 2.9596462133775567,
|
|
"grad_norm": 0.07910183159512352,
|
|
"learning_rate": 0.0017748329468592137,
|
|
"loss": 2.6389,
|
|
"step": 9370
|
|
},
|
|
{
|
|
"epoch": 2.961225617942036,
|
|
"grad_norm": 0.07519712410902045,
|
|
"learning_rate": 0.001774484223099199,
|
|
"loss": 2.6558,
|
|
"step": 9375
|
|
},
|
|
{
|
|
"epoch": 2.9628050225065152,
|
|
"grad_norm": 0.09726346801188548,
|
|
"learning_rate": 0.0017741352638234807,
|
|
"loss": 2.6349,
|
|
"step": 9380
|
|
},
|
|
{
|
|
"epoch": 2.9643844270709945,
|
|
"grad_norm": 0.07907053145441649,
|
|
"learning_rate": 0.0017737860691381742,
|
|
"loss": 2.5493,
|
|
"step": 9385
|
|
},
|
|
{
|
|
"epoch": 2.9659638316354733,
|
|
"grad_norm": 0.08620631358894117,
|
|
"learning_rate": 0.0017734366391494684,
|
|
"loss": 2.5902,
|
|
"step": 9390
|
|
},
|
|
{
|
|
"epoch": 2.9675432361999525,
|
|
"grad_norm": 0.09907659889237273,
|
|
"learning_rate": 0.0017730869739636219,
|
|
"loss": 2.5682,
|
|
"step": 9395
|
|
},
|
|
{
|
|
"epoch": 2.9691226407644318,
|
|
"grad_norm": 0.10191989416827558,
|
|
"learning_rate": 0.0017727370736869662,
|
|
"loss": 2.6995,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 2.970702045328911,
|
|
"grad_norm": 0.08278402092617577,
|
|
"learning_rate": 0.0017723869384259038,
|
|
"loss": 2.6782,
|
|
"step": 9405
|
|
},
|
|
{
|
|
"epoch": 2.9722814498933903,
|
|
"grad_norm": 0.062115181425159635,
|
|
"learning_rate": 0.0017720365682869078,
|
|
"loss": 2.631,
|
|
"step": 9410
|
|
},
|
|
{
|
|
"epoch": 2.9738608544578695,
|
|
"grad_norm": 0.07399639042405928,
|
|
"learning_rate": 0.0017716859633765244,
|
|
"loss": 2.5604,
|
|
"step": 9415
|
|
},
|
|
{
|
|
"epoch": 2.9754402590223483,
|
|
"grad_norm": 0.07863036800465888,
|
|
"learning_rate": 0.00177133512380137,
|
|
"loss": 2.6059,
|
|
"step": 9420
|
|
},
|
|
{
|
|
"epoch": 2.9770196635868276,
|
|
"grad_norm": 0.08014900090250153,
|
|
"learning_rate": 0.0017709840496681324,
|
|
"loss": 2.5692,
|
|
"step": 9425
|
|
},
|
|
{
|
|
"epoch": 2.978599068151307,
|
|
"grad_norm": 0.07315791918630918,
|
|
"learning_rate": 0.0017706327410835713,
|
|
"loss": 2.5941,
|
|
"step": 9430
|
|
},
|
|
{
|
|
"epoch": 2.980178472715786,
|
|
"grad_norm": 0.08420232912319141,
|
|
"learning_rate": 0.0017702811981545174,
|
|
"loss": 2.6569,
|
|
"step": 9435
|
|
},
|
|
{
|
|
"epoch": 2.9817578772802653,
|
|
"grad_norm": 0.07675128706356427,
|
|
"learning_rate": 0.001769929420987873,
|
|
"loss": 2.6167,
|
|
"step": 9440
|
|
},
|
|
{
|
|
"epoch": 2.9833372818447446,
|
|
"grad_norm": 0.07447243945199063,
|
|
"learning_rate": 0.0017695774096906103,
|
|
"loss": 2.6945,
|
|
"step": 9445
|
|
},
|
|
{
|
|
"epoch": 2.984916686409224,
|
|
"grad_norm": 0.06591328173763196,
|
|
"learning_rate": 0.0017692251643697747,
|
|
"loss": 2.6763,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 2.986496090973703,
|
|
"grad_norm": 0.06352381774469144,
|
|
"learning_rate": 0.0017688726851324812,
|
|
"loss": 2.5987,
|
|
"step": 9455
|
|
},
|
|
{
|
|
"epoch": 2.9880754955381823,
|
|
"grad_norm": 0.0725897617533654,
|
|
"learning_rate": 0.0017685199720859166,
|
|
"loss": 2.7126,
|
|
"step": 9460
|
|
},
|
|
{
|
|
"epoch": 2.9896549001026615,
|
|
"grad_norm": 0.06655104738152724,
|
|
"learning_rate": 0.0017681670253373385,
|
|
"loss": 2.6709,
|
|
"step": 9465
|
|
},
|
|
{
|
|
"epoch": 2.9912343046671404,
|
|
"grad_norm": 0.06338658405519165,
|
|
"learning_rate": 0.0017678138449940765,
|
|
"loss": 2.6405,
|
|
"step": 9470
|
|
},
|
|
{
|
|
"epoch": 2.9928137092316196,
|
|
"grad_norm": 0.07272869654997406,
|
|
"learning_rate": 0.0017674604311635294,
|
|
"loss": 2.691,
|
|
"step": 9475
|
|
},
|
|
{
|
|
"epoch": 2.994393113796099,
|
|
"grad_norm": 0.06869160712622453,
|
|
"learning_rate": 0.0017671067839531687,
|
|
"loss": 2.6887,
|
|
"step": 9480
|
|
},
|
|
{
|
|
"epoch": 2.995972518360578,
|
|
"grad_norm": 0.08164852981872348,
|
|
"learning_rate": 0.0017667529034705364,
|
|
"loss": 2.6771,
|
|
"step": 9485
|
|
},
|
|
{
|
|
"epoch": 2.9975519229250573,
|
|
"grad_norm": 0.08074893728400574,
|
|
"learning_rate": 0.0017663987898232448,
|
|
"loss": 2.6561,
|
|
"step": 9490
|
|
},
|
|
{
|
|
"epoch": 2.9991313274895366,
|
|
"grad_norm": 0.062375815361901135,
|
|
"learning_rate": 0.001766044443118978,
|
|
"loss": 2.5912,
|
|
"step": 9495
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_loss": 2.620439052581787,
|
|
"eval_runtime": 118.3599,
|
|
"eval_samples_per_second": 22.381,
|
|
"eval_steps_per_second": 5.602,
|
|
"step": 9498
|
|
},
|
|
{
|
|
"epoch": 3.0006317618257916,
|
|
"grad_norm": 0.06853007086830248,
|
|
"learning_rate": 0.0017656898634654905,
|
|
"loss": 2.5767,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 3.002211166390271,
|
|
"grad_norm": 0.07075166572388598,
|
|
"learning_rate": 0.0017653350509706075,
|
|
"loss": 2.7542,
|
|
"step": 9505
|
|
},
|
|
{
|
|
"epoch": 3.00379057095475,
|
|
"grad_norm": 0.07412174784052934,
|
|
"learning_rate": 0.0017649800057422257,
|
|
"loss": 2.6384,
|
|
"step": 9510
|
|
},
|
|
{
|
|
"epoch": 3.0053699755192294,
|
|
"grad_norm": 0.07560914597403712,
|
|
"learning_rate": 0.0017646247278883115,
|
|
"loss": 2.5841,
|
|
"step": 9515
|
|
},
|
|
{
|
|
"epoch": 3.0069493800837086,
|
|
"grad_norm": 0.060978546234931104,
|
|
"learning_rate": 0.0017642692175169029,
|
|
"loss": 2.5974,
|
|
"step": 9520
|
|
},
|
|
{
|
|
"epoch": 3.008528784648188,
|
|
"grad_norm": 0.06719904776452261,
|
|
"learning_rate": 0.0017639134747361083,
|
|
"loss": 2.604,
|
|
"step": 9525
|
|
},
|
|
{
|
|
"epoch": 3.0101081892126667,
|
|
"grad_norm": 0.06949796822599907,
|
|
"learning_rate": 0.0017635574996541065,
|
|
"loss": 2.5313,
|
|
"step": 9530
|
|
},
|
|
{
|
|
"epoch": 3.011687593777146,
|
|
"grad_norm": 0.09536273280100076,
|
|
"learning_rate": 0.001763201292379148,
|
|
"loss": 2.5393,
|
|
"step": 9535
|
|
},
|
|
{
|
|
"epoch": 3.013266998341625,
|
|
"grad_norm": 0.07227009941124116,
|
|
"learning_rate": 0.0017628448530195527,
|
|
"loss": 2.6122,
|
|
"step": 9540
|
|
},
|
|
{
|
|
"epoch": 3.0148464029061044,
|
|
"grad_norm": 0.07996626556678565,
|
|
"learning_rate": 0.0017624881816837115,
|
|
"loss": 2.5837,
|
|
"step": 9545
|
|
},
|
|
{
|
|
"epoch": 3.0164258074705836,
|
|
"grad_norm": 0.07043989340248183,
|
|
"learning_rate": 0.001762131278480086,
|
|
"loss": 2.6142,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 3.018005212035063,
|
|
"grad_norm": 0.07238655665106472,
|
|
"learning_rate": 0.001761774143517208,
|
|
"loss": 2.6761,
|
|
"step": 9555
|
|
},
|
|
{
|
|
"epoch": 3.019584616599542,
|
|
"grad_norm": 0.07953742761959369,
|
|
"learning_rate": 0.0017614167769036797,
|
|
"loss": 2.6247,
|
|
"step": 9560
|
|
},
|
|
{
|
|
"epoch": 3.0211640211640214,
|
|
"grad_norm": 0.07777915965850513,
|
|
"learning_rate": 0.0017610591787481748,
|
|
"loss": 2.5844,
|
|
"step": 9565
|
|
},
|
|
{
|
|
"epoch": 3.0227434257285,
|
|
"grad_norm": 0.07908379772586491,
|
|
"learning_rate": 0.001760701349159436,
|
|
"loss": 2.6966,
|
|
"step": 9570
|
|
},
|
|
{
|
|
"epoch": 3.0243228302929794,
|
|
"grad_norm": 0.08185851953480823,
|
|
"learning_rate": 0.0017603432882462773,
|
|
"loss": 2.5849,
|
|
"step": 9575
|
|
},
|
|
{
|
|
"epoch": 3.0259022348574587,
|
|
"grad_norm": 0.0770538911908877,
|
|
"learning_rate": 0.0017599849961175825,
|
|
"loss": 2.572,
|
|
"step": 9580
|
|
},
|
|
{
|
|
"epoch": 3.027481639421938,
|
|
"grad_norm": 0.08170033315514708,
|
|
"learning_rate": 0.0017596264728823063,
|
|
"loss": 2.635,
|
|
"step": 9585
|
|
},
|
|
{
|
|
"epoch": 3.029061043986417,
|
|
"grad_norm": 0.07094356873902435,
|
|
"learning_rate": 0.0017592677186494727,
|
|
"loss": 2.6195,
|
|
"step": 9590
|
|
},
|
|
{
|
|
"epoch": 3.0306404485508964,
|
|
"grad_norm": 0.07146743501629951,
|
|
"learning_rate": 0.0017589087335281772,
|
|
"loss": 2.5883,
|
|
"step": 9595
|
|
},
|
|
{
|
|
"epoch": 3.0322198531153757,
|
|
"grad_norm": 0.06500767908445167,
|
|
"learning_rate": 0.0017585495176275848,
|
|
"loss": 2.6091,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 3.0337992576798545,
|
|
"grad_norm": 0.08646040532750637,
|
|
"learning_rate": 0.00175819007105693,
|
|
"loss": 2.6357,
|
|
"step": 9605
|
|
},
|
|
{
|
|
"epoch": 3.0353786622443337,
|
|
"grad_norm": 0.07956158839233514,
|
|
"learning_rate": 0.0017578303939255195,
|
|
"loss": 2.6634,
|
|
"step": 9610
|
|
},
|
|
{
|
|
"epoch": 3.036958066808813,
|
|
"grad_norm": 0.077271325539974,
|
|
"learning_rate": 0.0017574704863427277,
|
|
"loss": 2.5939,
|
|
"step": 9615
|
|
},
|
|
{
|
|
"epoch": 3.038537471373292,
|
|
"grad_norm": 0.06878782348149351,
|
|
"learning_rate": 0.0017571103484180007,
|
|
"loss": 2.6799,
|
|
"step": 9620
|
|
},
|
|
{
|
|
"epoch": 3.0401168759377715,
|
|
"grad_norm": 0.07725957018593155,
|
|
"learning_rate": 0.0017567499802608542,
|
|
"loss": 2.6557,
|
|
"step": 9625
|
|
},
|
|
{
|
|
"epoch": 3.0416962805022507,
|
|
"grad_norm": 0.06834187968092709,
|
|
"learning_rate": 0.0017563893819808737,
|
|
"loss": 2.6065,
|
|
"step": 9630
|
|
},
|
|
{
|
|
"epoch": 3.04327568506673,
|
|
"grad_norm": 0.06522637545595603,
|
|
"learning_rate": 0.0017560285536877148,
|
|
"loss": 2.649,
|
|
"step": 9635
|
|
},
|
|
{
|
|
"epoch": 3.044855089631209,
|
|
"grad_norm": 0.07050536765462488,
|
|
"learning_rate": 0.001755667495491103,
|
|
"loss": 2.6291,
|
|
"step": 9640
|
|
},
|
|
{
|
|
"epoch": 3.046434494195688,
|
|
"grad_norm": 0.06027223804064083,
|
|
"learning_rate": 0.0017553062075008339,
|
|
"loss": 2.5298,
|
|
"step": 9645
|
|
},
|
|
{
|
|
"epoch": 3.0480138987601673,
|
|
"grad_norm": 0.06690320540753218,
|
|
"learning_rate": 0.0017549446898267732,
|
|
"loss": 2.6128,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 3.0495933033246465,
|
|
"grad_norm": 0.06605666511918341,
|
|
"learning_rate": 0.0017545829425788554,
|
|
"loss": 2.6637,
|
|
"step": 9655
|
|
},
|
|
{
|
|
"epoch": 3.0511727078891258,
|
|
"grad_norm": 0.08318070058406675,
|
|
"learning_rate": 0.001754220965867086,
|
|
"loss": 2.5553,
|
|
"step": 9660
|
|
},
|
|
{
|
|
"epoch": 3.052752112453605,
|
|
"grad_norm": 0.062722653781602,
|
|
"learning_rate": 0.0017538587598015401,
|
|
"loss": 2.5263,
|
|
"step": 9665
|
|
},
|
|
{
|
|
"epoch": 3.0543315170180843,
|
|
"grad_norm": 0.07799851012916846,
|
|
"learning_rate": 0.0017534963244923616,
|
|
"loss": 2.6617,
|
|
"step": 9670
|
|
},
|
|
{
|
|
"epoch": 3.0559109215825635,
|
|
"grad_norm": 0.08255384317691386,
|
|
"learning_rate": 0.0017531336600497647,
|
|
"loss": 2.6013,
|
|
"step": 9675
|
|
},
|
|
{
|
|
"epoch": 3.0574903261470427,
|
|
"grad_norm": 0.08425274192174091,
|
|
"learning_rate": 0.0017527707665840344,
|
|
"loss": 2.5305,
|
|
"step": 9680
|
|
},
|
|
{
|
|
"epoch": 3.0590697307115216,
|
|
"grad_norm": 0.07106324766354837,
|
|
"learning_rate": 0.001752407644205523,
|
|
"loss": 2.5293,
|
|
"step": 9685
|
|
},
|
|
{
|
|
"epoch": 3.060649135276001,
|
|
"grad_norm": 0.07181770959936766,
|
|
"learning_rate": 0.0017520442930246546,
|
|
"loss": 2.7556,
|
|
"step": 9690
|
|
},
|
|
{
|
|
"epoch": 3.06222853984048,
|
|
"grad_norm": 0.07549862141267126,
|
|
"learning_rate": 0.0017516807131519214,
|
|
"loss": 2.5865,
|
|
"step": 9695
|
|
},
|
|
{
|
|
"epoch": 3.0638079444049593,
|
|
"grad_norm": 0.07924017400028194,
|
|
"learning_rate": 0.001751316904697886,
|
|
"loss": 2.5501,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 3.0653873489694385,
|
|
"grad_norm": 0.07144201821865072,
|
|
"learning_rate": 0.0017509528677731802,
|
|
"loss": 2.6066,
|
|
"step": 9705
|
|
},
|
|
{
|
|
"epoch": 3.066966753533918,
|
|
"grad_norm": 0.08845565636076341,
|
|
"learning_rate": 0.0017505886024885055,
|
|
"loss": 2.6793,
|
|
"step": 9710
|
|
},
|
|
{
|
|
"epoch": 3.068546158098397,
|
|
"grad_norm": 0.07287890828079492,
|
|
"learning_rate": 0.0017502241089546323,
|
|
"loss": 2.7102,
|
|
"step": 9715
|
|
},
|
|
{
|
|
"epoch": 3.0701255626628763,
|
|
"grad_norm": 0.06424521906054119,
|
|
"learning_rate": 0.0017498593872824007,
|
|
"loss": 2.5869,
|
|
"step": 9720
|
|
},
|
|
{
|
|
"epoch": 3.071704967227355,
|
|
"grad_norm": 0.07195688620873095,
|
|
"learning_rate": 0.0017494944375827206,
|
|
"loss": 2.5623,
|
|
"step": 9725
|
|
},
|
|
{
|
|
"epoch": 3.0732843717918343,
|
|
"grad_norm": 0.08059766034889969,
|
|
"learning_rate": 0.0017491292599665705,
|
|
"loss": 2.5604,
|
|
"step": 9730
|
|
},
|
|
{
|
|
"epoch": 3.0748637763563136,
|
|
"grad_norm": 0.06236888181816163,
|
|
"learning_rate": 0.0017487638545449992,
|
|
"loss": 2.6125,
|
|
"step": 9735
|
|
},
|
|
{
|
|
"epoch": 3.076443180920793,
|
|
"grad_norm": 0.0673023550989496,
|
|
"learning_rate": 0.0017483982214291233,
|
|
"loss": 2.6745,
|
|
"step": 9740
|
|
},
|
|
{
|
|
"epoch": 3.078022585485272,
|
|
"grad_norm": 0.07204762804360407,
|
|
"learning_rate": 0.00174803236073013,
|
|
"loss": 2.5546,
|
|
"step": 9745
|
|
},
|
|
{
|
|
"epoch": 3.0796019900497513,
|
|
"grad_norm": 0.06487939861273825,
|
|
"learning_rate": 0.001747666272559275,
|
|
"loss": 2.5708,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 3.0811813946142306,
|
|
"grad_norm": 0.06301181261958937,
|
|
"learning_rate": 0.0017472999570278835,
|
|
"loss": 2.502,
|
|
"step": 9755
|
|
},
|
|
{
|
|
"epoch": 3.08276079917871,
|
|
"grad_norm": 0.07313450032879039,
|
|
"learning_rate": 0.0017469334142473502,
|
|
"loss": 2.5558,
|
|
"step": 9760
|
|
},
|
|
{
|
|
"epoch": 3.0843402037431886,
|
|
"grad_norm": 0.06526967263517186,
|
|
"learning_rate": 0.0017465666443291373,
|
|
"loss": 2.6304,
|
|
"step": 9765
|
|
},
|
|
{
|
|
"epoch": 3.085919608307668,
|
|
"grad_norm": 0.09200477440986868,
|
|
"learning_rate": 0.001746199647384778,
|
|
"loss": 2.6975,
|
|
"step": 9770
|
|
},
|
|
{
|
|
"epoch": 3.087499012872147,
|
|
"grad_norm": 0.06902130842790342,
|
|
"learning_rate": 0.0017458324235258736,
|
|
"loss": 2.6248,
|
|
"step": 9775
|
|
},
|
|
{
|
|
"epoch": 3.0890784174366264,
|
|
"grad_norm": 0.09976036882145513,
|
|
"learning_rate": 0.0017454649728640944,
|
|
"loss": 2.7803,
|
|
"step": 9780
|
|
},
|
|
{
|
|
"epoch": 3.0906578220011056,
|
|
"grad_norm": 0.07615180829761749,
|
|
"learning_rate": 0.00174509729551118,
|
|
"loss": 2.5904,
|
|
"step": 9785
|
|
},
|
|
{
|
|
"epoch": 3.092237226565585,
|
|
"grad_norm": 0.07064698067596042,
|
|
"learning_rate": 0.0017447293915789385,
|
|
"loss": 2.7008,
|
|
"step": 9790
|
|
},
|
|
{
|
|
"epoch": 3.093816631130064,
|
|
"grad_norm": 0.06806186891626341,
|
|
"learning_rate": 0.0017443612611792471,
|
|
"loss": 2.6445,
|
|
"step": 9795
|
|
},
|
|
{
|
|
"epoch": 3.0953960356945434,
|
|
"grad_norm": 0.07944195857211085,
|
|
"learning_rate": 0.0017439929044240521,
|
|
"loss": 2.5441,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 3.096975440259022,
|
|
"grad_norm": 0.06674703110757672,
|
|
"learning_rate": 0.0017436243214253686,
|
|
"loss": 2.6004,
|
|
"step": 9805
|
|
},
|
|
{
|
|
"epoch": 3.0985548448235014,
|
|
"grad_norm": 0.06689002583984435,
|
|
"learning_rate": 0.0017432555122952797,
|
|
"loss": 2.5689,
|
|
"step": 9810
|
|
},
|
|
{
|
|
"epoch": 3.1001342493879807,
|
|
"grad_norm": 0.06592068441572836,
|
|
"learning_rate": 0.0017428864771459388,
|
|
"loss": 2.6173,
|
|
"step": 9815
|
|
},
|
|
{
|
|
"epoch": 3.10171365395246,
|
|
"grad_norm": 0.07615257679874533,
|
|
"learning_rate": 0.0017425172160895662,
|
|
"loss": 2.639,
|
|
"step": 9820
|
|
},
|
|
{
|
|
"epoch": 3.103293058516939,
|
|
"grad_norm": 0.06105335137406268,
|
|
"learning_rate": 0.0017421477292384525,
|
|
"loss": 2.6122,
|
|
"step": 9825
|
|
},
|
|
{
|
|
"epoch": 3.1048724630814184,
|
|
"grad_norm": 0.09219718184998302,
|
|
"learning_rate": 0.001741778016704956,
|
|
"loss": 2.6857,
|
|
"step": 9830
|
|
},
|
|
{
|
|
"epoch": 3.1064518676458976,
|
|
"grad_norm": 0.07152092051204469,
|
|
"learning_rate": 0.0017414080786015038,
|
|
"loss": 2.5573,
|
|
"step": 9835
|
|
},
|
|
{
|
|
"epoch": 3.108031272210377,
|
|
"grad_norm": 0.09511116323096488,
|
|
"learning_rate": 0.001741037915040592,
|
|
"loss": 2.5658,
|
|
"step": 9840
|
|
},
|
|
{
|
|
"epoch": 3.1096106767748557,
|
|
"grad_norm": 0.06827500668300775,
|
|
"learning_rate": 0.0017406675261347848,
|
|
"loss": 2.5764,
|
|
"step": 9845
|
|
},
|
|
{
|
|
"epoch": 3.111190081339335,
|
|
"grad_norm": 0.06580983086010492,
|
|
"learning_rate": 0.0017402969119967155,
|
|
"loss": 2.6376,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 3.112769485903814,
|
|
"grad_norm": 0.06724198233210324,
|
|
"learning_rate": 0.0017399260727390847,
|
|
"loss": 2.6765,
|
|
"step": 9855
|
|
},
|
|
{
|
|
"epoch": 3.1143488904682934,
|
|
"grad_norm": 0.08960721837846888,
|
|
"learning_rate": 0.0017395550084746629,
|
|
"loss": 2.5794,
|
|
"step": 9860
|
|
},
|
|
{
|
|
"epoch": 3.1159282950327727,
|
|
"grad_norm": 0.0702518321175971,
|
|
"learning_rate": 0.0017391837193162882,
|
|
"loss": 2.5468,
|
|
"step": 9865
|
|
},
|
|
{
|
|
"epoch": 3.117507699597252,
|
|
"grad_norm": 0.060236678991715406,
|
|
"learning_rate": 0.0017388122053768674,
|
|
"loss": 2.6592,
|
|
"step": 9870
|
|
},
|
|
{
|
|
"epoch": 3.119087104161731,
|
|
"grad_norm": 0.07673494445082678,
|
|
"learning_rate": 0.001738440466769375,
|
|
"loss": 2.6642,
|
|
"step": 9875
|
|
},
|
|
{
|
|
"epoch": 3.1206665087262104,
|
|
"grad_norm": 0.0920573903652007,
|
|
"learning_rate": 0.001738068503606855,
|
|
"loss": 2.614,
|
|
"step": 9880
|
|
},
|
|
{
|
|
"epoch": 3.1222459132906892,
|
|
"grad_norm": 0.08801492578726929,
|
|
"learning_rate": 0.0017376963160024184,
|
|
"loss": 2.595,
|
|
"step": 9885
|
|
},
|
|
{
|
|
"epoch": 3.1238253178551685,
|
|
"grad_norm": 0.07984682108598687,
|
|
"learning_rate": 0.0017373239040692455,
|
|
"loss": 2.6862,
|
|
"step": 9890
|
|
},
|
|
{
|
|
"epoch": 3.1254047224196477,
|
|
"grad_norm": 0.06869423990760569,
|
|
"learning_rate": 0.0017369512679205844,
|
|
"loss": 2.6215,
|
|
"step": 9895
|
|
},
|
|
{
|
|
"epoch": 3.126984126984127,
|
|
"grad_norm": 0.07429979311671929,
|
|
"learning_rate": 0.0017365784076697512,
|
|
"loss": 2.6688,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 3.1285635315486062,
|
|
"grad_norm": 0.06488201514452846,
|
|
"learning_rate": 0.00173620532343013,
|
|
"loss": 2.5465,
|
|
"step": 9905
|
|
},
|
|
{
|
|
"epoch": 3.1301429361130855,
|
|
"grad_norm": 0.06599819169160161,
|
|
"learning_rate": 0.001735832015315174,
|
|
"loss": 2.5547,
|
|
"step": 9910
|
|
},
|
|
{
|
|
"epoch": 3.1317223406775647,
|
|
"grad_norm": 0.07495722665220762,
|
|
"learning_rate": 0.0017354584834384035,
|
|
"loss": 2.5515,
|
|
"step": 9915
|
|
},
|
|
{
|
|
"epoch": 3.1333017452420435,
|
|
"grad_norm": 0.10720551807822579,
|
|
"learning_rate": 0.001735084727913407,
|
|
"loss": 2.6747,
|
|
"step": 9920
|
|
},
|
|
{
|
|
"epoch": 3.1348811498065228,
|
|
"grad_norm": 0.07199510657860425,
|
|
"learning_rate": 0.0017347107488538413,
|
|
"loss": 2.6041,
|
|
"step": 9925
|
|
},
|
|
{
|
|
"epoch": 3.136460554371002,
|
|
"grad_norm": 0.07082328358473082,
|
|
"learning_rate": 0.0017343365463734313,
|
|
"loss": 2.6154,
|
|
"step": 9930
|
|
},
|
|
{
|
|
"epoch": 3.1380399589354813,
|
|
"grad_norm": 0.06337705531192898,
|
|
"learning_rate": 0.0017339621205859693,
|
|
"loss": 2.6622,
|
|
"step": 9935
|
|
},
|
|
{
|
|
"epoch": 3.1396193634999605,
|
|
"grad_norm": 0.07163483374589369,
|
|
"learning_rate": 0.0017335874716053158,
|
|
"loss": 2.6712,
|
|
"step": 9940
|
|
},
|
|
{
|
|
"epoch": 3.1411987680644398,
|
|
"grad_norm": 0.0688206120903354,
|
|
"learning_rate": 0.001733212599545399,
|
|
"loss": 2.5605,
|
|
"step": 9945
|
|
},
|
|
{
|
|
"epoch": 3.142778172628919,
|
|
"grad_norm": 0.07385893720045704,
|
|
"learning_rate": 0.0017328375045202158,
|
|
"loss": 2.6157,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 3.1443575771933983,
|
|
"grad_norm": 0.07979459656789621,
|
|
"learning_rate": 0.0017324621866438294,
|
|
"loss": 2.6269,
|
|
"step": 9955
|
|
},
|
|
{
|
|
"epoch": 3.1459369817578775,
|
|
"grad_norm": 0.06655834052180862,
|
|
"learning_rate": 0.0017320866460303719,
|
|
"loss": 2.532,
|
|
"step": 9960
|
|
},
|
|
{
|
|
"epoch": 3.1475163863223563,
|
|
"grad_norm": 0.08016158513606657,
|
|
"learning_rate": 0.001731710882794043,
|
|
"loss": 2.6182,
|
|
"step": 9965
|
|
},
|
|
{
|
|
"epoch": 3.1490957908868356,
|
|
"grad_norm": 0.07361139147390075,
|
|
"learning_rate": 0.0017313348970491092,
|
|
"loss": 2.5775,
|
|
"step": 9970
|
|
},
|
|
{
|
|
"epoch": 3.150675195451315,
|
|
"grad_norm": 0.07320547421945703,
|
|
"learning_rate": 0.0017309586889099062,
|
|
"loss": 2.6028,
|
|
"step": 9975
|
|
},
|
|
{
|
|
"epoch": 3.152254600015794,
|
|
"grad_norm": 0.06761582611132382,
|
|
"learning_rate": 0.001730582258490836,
|
|
"loss": 2.6096,
|
|
"step": 9980
|
|
},
|
|
{
|
|
"epoch": 3.1538340045802733,
|
|
"grad_norm": 0.061185146073342085,
|
|
"learning_rate": 0.001730205605906369,
|
|
"loss": 2.6569,
|
|
"step": 9985
|
|
},
|
|
{
|
|
"epoch": 3.1554134091447525,
|
|
"grad_norm": 0.07883684609795155,
|
|
"learning_rate": 0.0017298287312710423,
|
|
"loss": 2.628,
|
|
"step": 9990
|
|
},
|
|
{
|
|
"epoch": 3.156992813709232,
|
|
"grad_norm": 0.06254523129730534,
|
|
"learning_rate": 0.0017294516346994615,
|
|
"loss": 2.5979,
|
|
"step": 9995
|
|
},
|
|
{
|
|
"epoch": 3.1585722182737106,
|
|
"grad_norm": 0.06917664553913802,
|
|
"learning_rate": 0.0017290743163062994,
|
|
"loss": 2.6045,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 3.16015162283819,
|
|
"grad_norm": 0.07158404060336618,
|
|
"learning_rate": 0.0017286967762062957,
|
|
"loss": 2.5623,
|
|
"step": 10005
|
|
},
|
|
{
|
|
"epoch": 3.161731027402669,
|
|
"grad_norm": 0.06991889347493886,
|
|
"learning_rate": 0.0017283190145142581,
|
|
"loss": 2.6436,
|
|
"step": 10010
|
|
},
|
|
{
|
|
"epoch": 3.1633104319671483,
|
|
"grad_norm": 0.05855646545913392,
|
|
"learning_rate": 0.001727941031345062,
|
|
"loss": 2.6896,
|
|
"step": 10015
|
|
},
|
|
{
|
|
"epoch": 3.1648898365316276,
|
|
"grad_norm": 0.05698565849999702,
|
|
"learning_rate": 0.0017275628268136486,
|
|
"loss": 2.603,
|
|
"step": 10020
|
|
},
|
|
{
|
|
"epoch": 3.166469241096107,
|
|
"grad_norm": 0.0766093223960529,
|
|
"learning_rate": 0.0017271844010350286,
|
|
"loss": 2.5193,
|
|
"step": 10025
|
|
},
|
|
{
|
|
"epoch": 3.168048645660586,
|
|
"grad_norm": 0.08252746996753374,
|
|
"learning_rate": 0.0017268057541242779,
|
|
"loss": 2.6378,
|
|
"step": 10030
|
|
},
|
|
{
|
|
"epoch": 3.1696280502250653,
|
|
"grad_norm": 0.07373629477286828,
|
|
"learning_rate": 0.0017264268861965414,
|
|
"loss": 2.566,
|
|
"step": 10035
|
|
},
|
|
{
|
|
"epoch": 3.1712074547895446,
|
|
"grad_norm": 0.060221071508953124,
|
|
"learning_rate": 0.0017260477973670301,
|
|
"loss": 2.5316,
|
|
"step": 10040
|
|
},
|
|
{
|
|
"epoch": 3.1727868593540234,
|
|
"grad_norm": 0.07124905911273843,
|
|
"learning_rate": 0.001725668487751022,
|
|
"loss": 2.651,
|
|
"step": 10045
|
|
},
|
|
{
|
|
"epoch": 3.1743662639185026,
|
|
"grad_norm": 0.05897673333679736,
|
|
"learning_rate": 0.0017252889574638638,
|
|
"loss": 2.6663,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 3.175945668482982,
|
|
"grad_norm": 0.06628163393976025,
|
|
"learning_rate": 0.0017249092066209672,
|
|
"loss": 2.4943,
|
|
"step": 10055
|
|
},
|
|
{
|
|
"epoch": 3.177525073047461,
|
|
"grad_norm": 0.07978408642895472,
|
|
"learning_rate": 0.0017245292353378129,
|
|
"loss": 2.5568,
|
|
"step": 10060
|
|
},
|
|
{
|
|
"epoch": 3.1791044776119404,
|
|
"grad_norm": 0.08663555240063688,
|
|
"learning_rate": 0.0017241490437299467,
|
|
"loss": 2.6161,
|
|
"step": 10065
|
|
},
|
|
{
|
|
"epoch": 3.1806838821764196,
|
|
"grad_norm": 0.08172943902726323,
|
|
"learning_rate": 0.0017237686319129834,
|
|
"loss": 2.5907,
|
|
"step": 10070
|
|
},
|
|
{
|
|
"epoch": 3.182263286740899,
|
|
"grad_norm": 0.08483170153181023,
|
|
"learning_rate": 0.0017233880000026031,
|
|
"loss": 2.5362,
|
|
"step": 10075
|
|
},
|
|
{
|
|
"epoch": 3.1838426913053777,
|
|
"grad_norm": 0.07807691290483945,
|
|
"learning_rate": 0.0017230071481145544,
|
|
"loss": 2.5229,
|
|
"step": 10080
|
|
},
|
|
{
|
|
"epoch": 3.185422095869857,
|
|
"grad_norm": 0.07373477776597755,
|
|
"learning_rate": 0.001722626076364651,
|
|
"loss": 2.599,
|
|
"step": 10085
|
|
},
|
|
{
|
|
"epoch": 3.187001500434336,
|
|
"grad_norm": 0.0776405119123419,
|
|
"learning_rate": 0.0017222447848687747,
|
|
"loss": 2.5785,
|
|
"step": 10090
|
|
},
|
|
{
|
|
"epoch": 3.1885809049988154,
|
|
"grad_norm": 0.06815474505145369,
|
|
"learning_rate": 0.0017218632737428742,
|
|
"loss": 2.6602,
|
|
"step": 10095
|
|
},
|
|
{
|
|
"epoch": 3.1901603095632947,
|
|
"grad_norm": 0.07314480378904138,
|
|
"learning_rate": 0.0017214815431029638,
|
|
"loss": 2.6064,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 3.191739714127774,
|
|
"grad_norm": 0.09008509637089876,
|
|
"learning_rate": 0.0017210995930651261,
|
|
"loss": 2.528,
|
|
"step": 10105
|
|
},
|
|
{
|
|
"epoch": 3.193319118692253,
|
|
"grad_norm": 0.07379312421095183,
|
|
"learning_rate": 0.0017207174237455095,
|
|
"loss": 2.5804,
|
|
"step": 10110
|
|
},
|
|
{
|
|
"epoch": 3.1948985232567324,
|
|
"grad_norm": 0.0639290039333819,
|
|
"learning_rate": 0.0017203350352603289,
|
|
"loss": 2.5763,
|
|
"step": 10115
|
|
},
|
|
{
|
|
"epoch": 3.196477927821211,
|
|
"grad_norm": 0.07728896943521313,
|
|
"learning_rate": 0.0017199524277258665,
|
|
"loss": 2.5719,
|
|
"step": 10120
|
|
},
|
|
{
|
|
"epoch": 3.1980573323856905,
|
|
"grad_norm": 0.06501244129816812,
|
|
"learning_rate": 0.0017195696012584707,
|
|
"loss": 2.6149,
|
|
"step": 10125
|
|
},
|
|
{
|
|
"epoch": 3.1996367369501697,
|
|
"grad_norm": 0.07450112316148066,
|
|
"learning_rate": 0.0017191865559745567,
|
|
"loss": 2.5726,
|
|
"step": 10130
|
|
},
|
|
{
|
|
"epoch": 3.201216141514649,
|
|
"grad_norm": 0.06735309656942795,
|
|
"learning_rate": 0.001718803291990606,
|
|
"loss": 2.5708,
|
|
"step": 10135
|
|
},
|
|
{
|
|
"epoch": 3.202795546079128,
|
|
"grad_norm": 0.06066504577737473,
|
|
"learning_rate": 0.0017184198094231666,
|
|
"loss": 2.552,
|
|
"step": 10140
|
|
},
|
|
{
|
|
"epoch": 3.2043749506436074,
|
|
"grad_norm": 0.07540293640624653,
|
|
"learning_rate": 0.0017180361083888537,
|
|
"loss": 2.5849,
|
|
"step": 10145
|
|
},
|
|
{
|
|
"epoch": 3.2059543552080867,
|
|
"grad_norm": 0.08359737734603907,
|
|
"learning_rate": 0.0017176521890043474,
|
|
"loss": 2.6413,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 3.207533759772566,
|
|
"grad_norm": 0.07467973947266096,
|
|
"learning_rate": 0.0017172680513863959,
|
|
"loss": 2.516,
|
|
"step": 10155
|
|
},
|
|
{
|
|
"epoch": 3.2091131643370447,
|
|
"grad_norm": 0.07418893020745522,
|
|
"learning_rate": 0.0017168836956518128,
|
|
"loss": 2.6784,
|
|
"step": 10160
|
|
},
|
|
{
|
|
"epoch": 3.210692568901524,
|
|
"grad_norm": 0.06447644926826153,
|
|
"learning_rate": 0.001716499121917478,
|
|
"loss": 2.5746,
|
|
"step": 10165
|
|
},
|
|
{
|
|
"epoch": 3.2122719734660032,
|
|
"grad_norm": 0.07266055401505396,
|
|
"learning_rate": 0.0017161143303003382,
|
|
"loss": 2.5416,
|
|
"step": 10170
|
|
},
|
|
{
|
|
"epoch": 3.2138513780304825,
|
|
"grad_norm": 0.059584364082995236,
|
|
"learning_rate": 0.0017157293209174055,
|
|
"loss": 2.6751,
|
|
"step": 10175
|
|
},
|
|
{
|
|
"epoch": 3.2154307825949617,
|
|
"grad_norm": 0.077773574299169,
|
|
"learning_rate": 0.0017153440938857598,
|
|
"loss": 2.7405,
|
|
"step": 10180
|
|
},
|
|
{
|
|
"epoch": 3.217010187159441,
|
|
"grad_norm": 0.07093017068348874,
|
|
"learning_rate": 0.0017149586493225453,
|
|
"loss": 2.6437,
|
|
"step": 10185
|
|
},
|
|
{
|
|
"epoch": 3.2185895917239202,
|
|
"grad_norm": 0.06809763786519872,
|
|
"learning_rate": 0.0017145729873449737,
|
|
"loss": 2.5746,
|
|
"step": 10190
|
|
},
|
|
{
|
|
"epoch": 3.2201689962883995,
|
|
"grad_norm": 0.07279951057709282,
|
|
"learning_rate": 0.0017141871080703223,
|
|
"loss": 2.621,
|
|
"step": 10195
|
|
},
|
|
{
|
|
"epoch": 3.2217484008528783,
|
|
"grad_norm": 0.06777881865146256,
|
|
"learning_rate": 0.0017138010116159342,
|
|
"loss": 2.6139,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 3.2233278054173575,
|
|
"grad_norm": 0.08188752813516113,
|
|
"learning_rate": 0.001713414698099219,
|
|
"loss": 2.5855,
|
|
"step": 10205
|
|
},
|
|
{
|
|
"epoch": 3.2249072099818368,
|
|
"grad_norm": 0.07265247848277875,
|
|
"learning_rate": 0.0017130281676376521,
|
|
"loss": 2.6521,
|
|
"step": 10210
|
|
},
|
|
{
|
|
"epoch": 3.226486614546316,
|
|
"grad_norm": 0.08157050567560481,
|
|
"learning_rate": 0.0017126414203487755,
|
|
"loss": 2.6133,
|
|
"step": 10215
|
|
},
|
|
{
|
|
"epoch": 3.2280660191107953,
|
|
"grad_norm": 0.09223512463456594,
|
|
"learning_rate": 0.001712254456350196,
|
|
"loss": 2.7141,
|
|
"step": 10220
|
|
},
|
|
{
|
|
"epoch": 3.2296454236752745,
|
|
"grad_norm": 0.09052288762989194,
|
|
"learning_rate": 0.001711867275759587,
|
|
"loss": 2.6932,
|
|
"step": 10225
|
|
},
|
|
{
|
|
"epoch": 3.2312248282397538,
|
|
"grad_norm": 0.09452848600741288,
|
|
"learning_rate": 0.0017114798786946874,
|
|
"loss": 2.5789,
|
|
"step": 10230
|
|
},
|
|
{
|
|
"epoch": 3.2328042328042326,
|
|
"grad_norm": 0.07348911010047247,
|
|
"learning_rate": 0.0017110922652733027,
|
|
"loss": 2.6713,
|
|
"step": 10235
|
|
},
|
|
{
|
|
"epoch": 3.234383637368712,
|
|
"grad_norm": 0.07595309187595187,
|
|
"learning_rate": 0.0017107044356133036,
|
|
"loss": 2.6233,
|
|
"step": 10240
|
|
},
|
|
{
|
|
"epoch": 3.235963041933191,
|
|
"grad_norm": 0.07837850464064633,
|
|
"learning_rate": 0.0017103163898326264,
|
|
"loss": 2.5859,
|
|
"step": 10245
|
|
},
|
|
{
|
|
"epoch": 3.2375424464976703,
|
|
"grad_norm": 0.05919589780375687,
|
|
"learning_rate": 0.0017099281280492733,
|
|
"loss": 2.6239,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 3.2391218510621496,
|
|
"grad_norm": 0.06421315218708729,
|
|
"learning_rate": 0.0017095396503813123,
|
|
"loss": 2.563,
|
|
"step": 10255
|
|
},
|
|
{
|
|
"epoch": 3.240701255626629,
|
|
"grad_norm": 0.06286673187959461,
|
|
"learning_rate": 0.001709150956946877,
|
|
"loss": 2.5648,
|
|
"step": 10260
|
|
},
|
|
{
|
|
"epoch": 3.242280660191108,
|
|
"grad_norm": 0.06614146390357328,
|
|
"learning_rate": 0.0017087620478641668,
|
|
"loss": 2.6656,
|
|
"step": 10265
|
|
},
|
|
{
|
|
"epoch": 3.2438600647555873,
|
|
"grad_norm": 0.06331427324863376,
|
|
"learning_rate": 0.001708372923251446,
|
|
"loss": 2.6365,
|
|
"step": 10270
|
|
},
|
|
{
|
|
"epoch": 3.2454394693200666,
|
|
"grad_norm": 0.058382455520564966,
|
|
"learning_rate": 0.0017079835832270454,
|
|
"loss": 2.6919,
|
|
"step": 10275
|
|
},
|
|
{
|
|
"epoch": 3.2470188738845454,
|
|
"grad_norm": 0.059389669999683835,
|
|
"learning_rate": 0.00170759402790936,
|
|
"loss": 2.4577,
|
|
"step": 10280
|
|
},
|
|
{
|
|
"epoch": 3.2485982784490246,
|
|
"grad_norm": 0.062319863197590405,
|
|
"learning_rate": 0.0017072042574168523,
|
|
"loss": 2.6714,
|
|
"step": 10285
|
|
},
|
|
{
|
|
"epoch": 3.250177683013504,
|
|
"grad_norm": 0.07209810097155507,
|
|
"learning_rate": 0.0017068142718680481,
|
|
"loss": 2.5454,
|
|
"step": 10290
|
|
},
|
|
{
|
|
"epoch": 3.251757087577983,
|
|
"grad_norm": 0.06465098796584488,
|
|
"learning_rate": 0.00170642407138154,
|
|
"loss": 2.5554,
|
|
"step": 10295
|
|
},
|
|
{
|
|
"epoch": 3.2533364921424623,
|
|
"grad_norm": 0.07919298086200059,
|
|
"learning_rate": 0.0017060336560759848,
|
|
"loss": 2.6169,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 3.2549158967069416,
|
|
"grad_norm": 0.07458149730812416,
|
|
"learning_rate": 0.001705643026070106,
|
|
"loss": 2.5148,
|
|
"step": 10305
|
|
},
|
|
{
|
|
"epoch": 3.256495301271421,
|
|
"grad_norm": 0.06209413237348719,
|
|
"learning_rate": 0.0017052521814826913,
|
|
"loss": 2.4766,
|
|
"step": 10310
|
|
},
|
|
{
|
|
"epoch": 3.2580747058358996,
|
|
"grad_norm": 0.06263559867605274,
|
|
"learning_rate": 0.0017048611224325945,
|
|
"loss": 2.6141,
|
|
"step": 10315
|
|
},
|
|
{
|
|
"epoch": 3.259654110400379,
|
|
"grad_norm": 0.08859222068557153,
|
|
"learning_rate": 0.001704469849038734,
|
|
"loss": 2.5519,
|
|
"step": 10320
|
|
},
|
|
{
|
|
"epoch": 3.261233514964858,
|
|
"grad_norm": 0.13700798939206166,
|
|
"learning_rate": 0.001704078361420093,
|
|
"loss": 2.5719,
|
|
"step": 10325
|
|
},
|
|
{
|
|
"epoch": 3.2628129195293374,
|
|
"grad_norm": 0.08242422054130628,
|
|
"learning_rate": 0.0017036866596957208,
|
|
"loss": 2.543,
|
|
"step": 10330
|
|
},
|
|
{
|
|
"epoch": 3.2643923240938166,
|
|
"grad_norm": 0.07118911228005752,
|
|
"learning_rate": 0.0017032947439847314,
|
|
"loss": 2.6253,
|
|
"step": 10335
|
|
},
|
|
{
|
|
"epoch": 3.265971728658296,
|
|
"grad_norm": 0.06521260116723082,
|
|
"learning_rate": 0.001702902614406304,
|
|
"loss": 2.6389,
|
|
"step": 10340
|
|
},
|
|
{
|
|
"epoch": 3.267551133222775,
|
|
"grad_norm": 0.06794795762837166,
|
|
"learning_rate": 0.0017025102710796825,
|
|
"loss": 2.7081,
|
|
"step": 10345
|
|
},
|
|
{
|
|
"epoch": 3.2691305377872544,
|
|
"grad_norm": 0.06462409310767205,
|
|
"learning_rate": 0.0017021177141241758,
|
|
"loss": 2.5127,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 3.2707099423517336,
|
|
"grad_norm": 0.0642066617491224,
|
|
"learning_rate": 0.0017017249436591584,
|
|
"loss": 2.5866,
|
|
"step": 10355
|
|
},
|
|
{
|
|
"epoch": 3.2722893469162124,
|
|
"grad_norm": 0.07839870321314671,
|
|
"learning_rate": 0.0017013319598040688,
|
|
"loss": 2.5108,
|
|
"step": 10360
|
|
},
|
|
{
|
|
"epoch": 3.2738687514806917,
|
|
"grad_norm": 0.0798891891470443,
|
|
"learning_rate": 0.0017009387626784117,
|
|
"loss": 2.6293,
|
|
"step": 10365
|
|
},
|
|
{
|
|
"epoch": 3.275448156045171,
|
|
"grad_norm": 0.08280526365973821,
|
|
"learning_rate": 0.0017005453524017548,
|
|
"loss": 2.6433,
|
|
"step": 10370
|
|
},
|
|
{
|
|
"epoch": 3.27702756060965,
|
|
"grad_norm": 0.0722954043793966,
|
|
"learning_rate": 0.0017001517290937322,
|
|
"loss": 2.5975,
|
|
"step": 10375
|
|
},
|
|
{
|
|
"epoch": 3.2786069651741294,
|
|
"grad_norm": 0.06287903361816885,
|
|
"learning_rate": 0.0016997578928740422,
|
|
"loss": 2.5392,
|
|
"step": 10380
|
|
},
|
|
{
|
|
"epoch": 3.2801863697386087,
|
|
"grad_norm": 0.06314306170320469,
|
|
"learning_rate": 0.0016993638438624484,
|
|
"loss": 2.6261,
|
|
"step": 10385
|
|
},
|
|
{
|
|
"epoch": 3.281765774303088,
|
|
"grad_norm": 0.0874231602460201,
|
|
"learning_rate": 0.0016989695821787772,
|
|
"loss": 2.5358,
|
|
"step": 10390
|
|
},
|
|
{
|
|
"epoch": 3.2833451788675667,
|
|
"grad_norm": 0.07048809674755248,
|
|
"learning_rate": 0.0016985751079429223,
|
|
"loss": 2.7399,
|
|
"step": 10395
|
|
},
|
|
{
|
|
"epoch": 3.284924583432046,
|
|
"grad_norm": 0.08750781590069255,
|
|
"learning_rate": 0.0016981804212748404,
|
|
"loss": 2.5259,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 3.286503987996525,
|
|
"grad_norm": 0.07232745531335175,
|
|
"learning_rate": 0.0016977855222945531,
|
|
"loss": 2.6078,
|
|
"step": 10405
|
|
},
|
|
{
|
|
"epoch": 3.2880833925610045,
|
|
"grad_norm": 0.07351994308267305,
|
|
"learning_rate": 0.001697390411122147,
|
|
"loss": 2.527,
|
|
"step": 10410
|
|
},
|
|
{
|
|
"epoch": 3.2896627971254837,
|
|
"grad_norm": 0.06933416120054373,
|
|
"learning_rate": 0.0016969950878777723,
|
|
"loss": 2.5771,
|
|
"step": 10415
|
|
},
|
|
{
|
|
"epoch": 3.291242201689963,
|
|
"grad_norm": 0.06861156254584327,
|
|
"learning_rate": 0.0016965995526816446,
|
|
"loss": 2.5522,
|
|
"step": 10420
|
|
},
|
|
{
|
|
"epoch": 3.292821606254442,
|
|
"grad_norm": 0.08451993948302478,
|
|
"learning_rate": 0.0016962038056540438,
|
|
"loss": 2.5522,
|
|
"step": 10425
|
|
},
|
|
{
|
|
"epoch": 3.2944010108189214,
|
|
"grad_norm": 0.07272459006293112,
|
|
"learning_rate": 0.001695807846915314,
|
|
"loss": 2.5529,
|
|
"step": 10430
|
|
},
|
|
{
|
|
"epoch": 3.2959804153834007,
|
|
"grad_norm": 0.06768282259573886,
|
|
"learning_rate": 0.0016954116765858635,
|
|
"loss": 2.6157,
|
|
"step": 10435
|
|
},
|
|
{
|
|
"epoch": 3.2975598199478795,
|
|
"grad_norm": 0.053463358594411925,
|
|
"learning_rate": 0.001695015294786165,
|
|
"loss": 2.649,
|
|
"step": 10440
|
|
},
|
|
{
|
|
"epoch": 3.2991392245123587,
|
|
"grad_norm": 0.09274796837286527,
|
|
"learning_rate": 0.001694618701636756,
|
|
"loss": 2.6481,
|
|
"step": 10445
|
|
},
|
|
{
|
|
"epoch": 3.300718629076838,
|
|
"grad_norm": 0.0772108488909983,
|
|
"learning_rate": 0.001694221897258238,
|
|
"loss": 2.5709,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 3.3022980336413172,
|
|
"grad_norm": 0.08472081158875007,
|
|
"learning_rate": 0.0016938248817712767,
|
|
"loss": 2.5986,
|
|
"step": 10455
|
|
},
|
|
{
|
|
"epoch": 3.3038774382057965,
|
|
"grad_norm": 0.07749119429930267,
|
|
"learning_rate": 0.0016934276552966017,
|
|
"loss": 2.6093,
|
|
"step": 10460
|
|
},
|
|
{
|
|
"epoch": 3.3054568427702757,
|
|
"grad_norm": 0.07045324083306663,
|
|
"learning_rate": 0.001693030217955007,
|
|
"loss": 2.547,
|
|
"step": 10465
|
|
},
|
|
{
|
|
"epoch": 3.307036247334755,
|
|
"grad_norm": 0.06514536706521994,
|
|
"learning_rate": 0.0016926325698673511,
|
|
"loss": 2.5417,
|
|
"step": 10470
|
|
},
|
|
{
|
|
"epoch": 3.308615651899234,
|
|
"grad_norm": 0.07034685763804152,
|
|
"learning_rate": 0.0016922347111545557,
|
|
"loss": 2.6036,
|
|
"step": 10475
|
|
},
|
|
{
|
|
"epoch": 3.310195056463713,
|
|
"grad_norm": 0.09796385162305873,
|
|
"learning_rate": 0.0016918366419376078,
|
|
"loss": 2.5858,
|
|
"step": 10480
|
|
},
|
|
{
|
|
"epoch": 3.3117744610281923,
|
|
"grad_norm": 0.0742290614688897,
|
|
"learning_rate": 0.0016914383623375575,
|
|
"loss": 2.745,
|
|
"step": 10485
|
|
},
|
|
{
|
|
"epoch": 3.3133538655926715,
|
|
"grad_norm": 0.06899769885657452,
|
|
"learning_rate": 0.0016910398724755186,
|
|
"loss": 2.6352,
|
|
"step": 10490
|
|
},
|
|
{
|
|
"epoch": 3.314933270157151,
|
|
"grad_norm": 0.06790020533599436,
|
|
"learning_rate": 0.0016906411724726697,
|
|
"loss": 2.5656,
|
|
"step": 10495
|
|
},
|
|
{
|
|
"epoch": 3.31651267472163,
|
|
"grad_norm": 0.05885065461528016,
|
|
"learning_rate": 0.0016902422624502532,
|
|
"loss": 2.5416,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 3.3180920792861093,
|
|
"grad_norm": 0.07103136246491495,
|
|
"learning_rate": 0.0016898431425295744,
|
|
"loss": 2.6551,
|
|
"step": 10505
|
|
},
|
|
{
|
|
"epoch": 3.3196714838505885,
|
|
"grad_norm": 0.08179324963044199,
|
|
"learning_rate": 0.0016894438128320039,
|
|
"loss": 2.5598,
|
|
"step": 10510
|
|
},
|
|
{
|
|
"epoch": 3.3212508884150673,
|
|
"grad_norm": 0.06249717252547698,
|
|
"learning_rate": 0.0016890442734789743,
|
|
"loss": 2.5719,
|
|
"step": 10515
|
|
},
|
|
{
|
|
"epoch": 3.3228302929795466,
|
|
"grad_norm": 0.07490536043214126,
|
|
"learning_rate": 0.0016886445245919838,
|
|
"loss": 2.5096,
|
|
"step": 10520
|
|
},
|
|
{
|
|
"epoch": 3.324409697544026,
|
|
"grad_norm": 0.09144476189116073,
|
|
"learning_rate": 0.0016882445662925933,
|
|
"loss": 2.5459,
|
|
"step": 10525
|
|
},
|
|
{
|
|
"epoch": 3.325989102108505,
|
|
"grad_norm": 0.08519644228840297,
|
|
"learning_rate": 0.0016878443987024276,
|
|
"loss": 2.6242,
|
|
"step": 10530
|
|
},
|
|
{
|
|
"epoch": 3.3275685066729843,
|
|
"grad_norm": 0.07492565924347942,
|
|
"learning_rate": 0.001687444021943175,
|
|
"loss": 2.6675,
|
|
"step": 10535
|
|
},
|
|
{
|
|
"epoch": 3.3291479112374636,
|
|
"grad_norm": 0.07682330886863895,
|
|
"learning_rate": 0.0016870434361365874,
|
|
"loss": 2.6064,
|
|
"step": 10540
|
|
},
|
|
{
|
|
"epoch": 3.330727315801943,
|
|
"grad_norm": 0.06508504218064855,
|
|
"learning_rate": 0.0016866426414044807,
|
|
"loss": 2.4953,
|
|
"step": 10545
|
|
},
|
|
{
|
|
"epoch": 3.3323067203664216,
|
|
"grad_norm": 0.07424031145732456,
|
|
"learning_rate": 0.0016862416378687337,
|
|
"loss": 2.601,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 3.333886124930901,
|
|
"grad_norm": 0.07137816057188225,
|
|
"learning_rate": 0.001685840425651289,
|
|
"loss": 2.6436,
|
|
"step": 10555
|
|
},
|
|
{
|
|
"epoch": 3.33546552949538,
|
|
"grad_norm": 0.07731783109421575,
|
|
"learning_rate": 0.0016854390048741531,
|
|
"loss": 2.6154,
|
|
"step": 10560
|
|
},
|
|
{
|
|
"epoch": 3.3370449340598594,
|
|
"grad_norm": 0.08805264397542543,
|
|
"learning_rate": 0.001685037375659395,
|
|
"loss": 2.5765,
|
|
"step": 10565
|
|
},
|
|
{
|
|
"epoch": 3.3386243386243386,
|
|
"grad_norm": 0.07101122837198678,
|
|
"learning_rate": 0.001684635538129148,
|
|
"loss": 2.6236,
|
|
"step": 10570
|
|
},
|
|
{
|
|
"epoch": 3.340203743188818,
|
|
"grad_norm": 0.061219029182904686,
|
|
"learning_rate": 0.0016842334924056079,
|
|
"loss": 2.6292,
|
|
"step": 10575
|
|
},
|
|
{
|
|
"epoch": 3.341783147753297,
|
|
"grad_norm": 0.0739208555181634,
|
|
"learning_rate": 0.0016838312386110346,
|
|
"loss": 2.5738,
|
|
"step": 10580
|
|
},
|
|
{
|
|
"epoch": 3.3433625523177763,
|
|
"grad_norm": 0.06174029187954557,
|
|
"learning_rate": 0.0016834287768677505,
|
|
"loss": 2.6446,
|
|
"step": 10585
|
|
},
|
|
{
|
|
"epoch": 3.3449419568822556,
|
|
"grad_norm": 0.06501510056615152,
|
|
"learning_rate": 0.0016830261072981422,
|
|
"loss": 2.621,
|
|
"step": 10590
|
|
},
|
|
{
|
|
"epoch": 3.3465213614467344,
|
|
"grad_norm": 0.07129318306001188,
|
|
"learning_rate": 0.0016826232300246585,
|
|
"loss": 2.5013,
|
|
"step": 10595
|
|
},
|
|
{
|
|
"epoch": 3.3481007660112136,
|
|
"grad_norm": 0.060346700412604246,
|
|
"learning_rate": 0.001682220145169812,
|
|
"loss": 2.5445,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 3.349680170575693,
|
|
"grad_norm": 0.08314925435533438,
|
|
"learning_rate": 0.001681816852856178,
|
|
"loss": 2.7018,
|
|
"step": 10605
|
|
},
|
|
{
|
|
"epoch": 3.351259575140172,
|
|
"grad_norm": 0.07371136983426413,
|
|
"learning_rate": 0.0016814133532063956,
|
|
"loss": 2.5518,
|
|
"step": 10610
|
|
},
|
|
{
|
|
"epoch": 3.3528389797046514,
|
|
"grad_norm": 0.07034160049593934,
|
|
"learning_rate": 0.001681009646343166,
|
|
"loss": 2.4817,
|
|
"step": 10615
|
|
},
|
|
{
|
|
"epoch": 3.3544183842691306,
|
|
"grad_norm": 0.07883534743806464,
|
|
"learning_rate": 0.001680605732389254,
|
|
"loss": 2.6884,
|
|
"step": 10620
|
|
},
|
|
{
|
|
"epoch": 3.35599778883361,
|
|
"grad_norm": 0.06775784286490502,
|
|
"learning_rate": 0.0016802016114674874,
|
|
"loss": 2.5764,
|
|
"step": 10625
|
|
},
|
|
{
|
|
"epoch": 3.3575771933980887,
|
|
"grad_norm": 0.08366321506658525,
|
|
"learning_rate": 0.0016797972837007567,
|
|
"loss": 2.6764,
|
|
"step": 10630
|
|
},
|
|
{
|
|
"epoch": 3.359156597962568,
|
|
"grad_norm": 0.06664698592119082,
|
|
"learning_rate": 0.0016793927492120152,
|
|
"loss": 2.6409,
|
|
"step": 10635
|
|
},
|
|
{
|
|
"epoch": 3.360736002527047,
|
|
"grad_norm": 0.07823045449252466,
|
|
"learning_rate": 0.0016789880081242794,
|
|
"loss": 2.6773,
|
|
"step": 10640
|
|
},
|
|
{
|
|
"epoch": 3.3623154070915264,
|
|
"grad_norm": 0.06796072751134481,
|
|
"learning_rate": 0.0016785830605606288,
|
|
"loss": 2.5749,
|
|
"step": 10645
|
|
},
|
|
{
|
|
"epoch": 3.3638948116560057,
|
|
"grad_norm": 0.07521011042377342,
|
|
"learning_rate": 0.001678177906644205,
|
|
"loss": 2.603,
|
|
"step": 10650
|
|
},
|
|
{
|
|
"epoch": 3.365474216220485,
|
|
"grad_norm": 0.07736604538769924,
|
|
"learning_rate": 0.0016777725464982125,
|
|
"loss": 2.6039,
|
|
"step": 10655
|
|
},
|
|
{
|
|
"epoch": 3.367053620784964,
|
|
"grad_norm": 0.05241740355470881,
|
|
"learning_rate": 0.0016773669802459192,
|
|
"loss": 2.5906,
|
|
"step": 10660
|
|
},
|
|
{
|
|
"epoch": 3.3686330253494434,
|
|
"grad_norm": 0.06374080163158191,
|
|
"learning_rate": 0.0016769612080106554,
|
|
"loss": 2.6656,
|
|
"step": 10665
|
|
},
|
|
{
|
|
"epoch": 3.3702124299139227,
|
|
"grad_norm": 0.0721017802522469,
|
|
"learning_rate": 0.0016765552299158127,
|
|
"loss": 2.7058,
|
|
"step": 10670
|
|
},
|
|
{
|
|
"epoch": 3.3717918344784015,
|
|
"grad_norm": 0.06657094661342346,
|
|
"learning_rate": 0.0016761490460848476,
|
|
"loss": 2.4888,
|
|
"step": 10675
|
|
},
|
|
{
|
|
"epoch": 3.3733712390428807,
|
|
"grad_norm": 0.07017677258309017,
|
|
"learning_rate": 0.0016757426566412776,
|
|
"loss": 2.6002,
|
|
"step": 10680
|
|
},
|
|
{
|
|
"epoch": 3.37495064360736,
|
|
"grad_norm": 0.07685429307755572,
|
|
"learning_rate": 0.0016753360617086832,
|
|
"loss": 2.673,
|
|
"step": 10685
|
|
},
|
|
{
|
|
"epoch": 3.376530048171839,
|
|
"grad_norm": 0.07435876185097813,
|
|
"learning_rate": 0.0016749292614107074,
|
|
"loss": 2.5722,
|
|
"step": 10690
|
|
},
|
|
{
|
|
"epoch": 3.3781094527363185,
|
|
"grad_norm": 0.07399145117339155,
|
|
"learning_rate": 0.0016745222558710554,
|
|
"loss": 2.5617,
|
|
"step": 10695
|
|
},
|
|
{
|
|
"epoch": 3.3796888573007977,
|
|
"grad_norm": 0.08075039423060379,
|
|
"learning_rate": 0.0016741150452134947,
|
|
"loss": 2.5131,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 3.381268261865277,
|
|
"grad_norm": 0.06768543805520227,
|
|
"learning_rate": 0.0016737076295618564,
|
|
"loss": 2.6348,
|
|
"step": 10705
|
|
},
|
|
{
|
|
"epoch": 3.3828476664297558,
|
|
"grad_norm": 0.06413509406418867,
|
|
"learning_rate": 0.001673300009040032,
|
|
"loss": 2.5227,
|
|
"step": 10710
|
|
},
|
|
{
|
|
"epoch": 3.384427070994235,
|
|
"grad_norm": 0.09965748601411166,
|
|
"learning_rate": 0.0016728921837719766,
|
|
"loss": 2.5946,
|
|
"step": 10715
|
|
},
|
|
{
|
|
"epoch": 3.3860064755587143,
|
|
"grad_norm": 0.08948513305462186,
|
|
"learning_rate": 0.0016724841538817072,
|
|
"loss": 2.4781,
|
|
"step": 10720
|
|
},
|
|
{
|
|
"epoch": 3.3875858801231935,
|
|
"grad_norm": 0.06768464720263621,
|
|
"learning_rate": 0.0016720759194933036,
|
|
"loss": 2.5584,
|
|
"step": 10725
|
|
},
|
|
{
|
|
"epoch": 3.3891652846876728,
|
|
"grad_norm": 0.06731032519326456,
|
|
"learning_rate": 0.0016716674807309068,
|
|
"loss": 2.501,
|
|
"step": 10730
|
|
},
|
|
{
|
|
"epoch": 3.390744689252152,
|
|
"grad_norm": 0.07611234865803017,
|
|
"learning_rate": 0.0016712588377187205,
|
|
"loss": 2.7178,
|
|
"step": 10735
|
|
},
|
|
{
|
|
"epoch": 3.3923240938166312,
|
|
"grad_norm": 0.07191634935692151,
|
|
"learning_rate": 0.0016708499905810105,
|
|
"loss": 2.6405,
|
|
"step": 10740
|
|
},
|
|
{
|
|
"epoch": 3.3939034983811105,
|
|
"grad_norm": 0.059784831832480775,
|
|
"learning_rate": 0.0016704409394421042,
|
|
"loss": 2.5167,
|
|
"step": 10745
|
|
},
|
|
{
|
|
"epoch": 3.3954829029455897,
|
|
"grad_norm": 0.06378049802286767,
|
|
"learning_rate": 0.0016700316844263923,
|
|
"loss": 2.558,
|
|
"step": 10750
|
|
},
|
|
{
|
|
"epoch": 3.3970623075100685,
|
|
"grad_norm": 0.0729254696514964,
|
|
"learning_rate": 0.0016696222256583257,
|
|
"loss": 2.6993,
|
|
"step": 10755
|
|
},
|
|
{
|
|
"epoch": 3.398641712074548,
|
|
"grad_norm": 0.06783132950569057,
|
|
"learning_rate": 0.001669212563262419,
|
|
"loss": 2.6441,
|
|
"step": 10760
|
|
},
|
|
{
|
|
"epoch": 3.400221116639027,
|
|
"grad_norm": 0.06881595096036265,
|
|
"learning_rate": 0.0016688026973632473,
|
|
"loss": 2.5529,
|
|
"step": 10765
|
|
},
|
|
{
|
|
"epoch": 3.4018005212035063,
|
|
"grad_norm": 0.0660612489397916,
|
|
"learning_rate": 0.0016683926280854485,
|
|
"loss": 2.5967,
|
|
"step": 10770
|
|
},
|
|
{
|
|
"epoch": 3.4033799257679855,
|
|
"grad_norm": 0.07434614804003171,
|
|
"learning_rate": 0.0016679823555537218,
|
|
"loss": 2.4464,
|
|
"step": 10775
|
|
},
|
|
{
|
|
"epoch": 3.404959330332465,
|
|
"grad_norm": 0.07179597580664843,
|
|
"learning_rate": 0.0016675718798928288,
|
|
"loss": 2.6128,
|
|
"step": 10780
|
|
},
|
|
{
|
|
"epoch": 3.406538734896944,
|
|
"grad_norm": 0.07487229182490646,
|
|
"learning_rate": 0.0016671612012275922,
|
|
"loss": 2.6535,
|
|
"step": 10785
|
|
},
|
|
{
|
|
"epoch": 3.408118139461423,
|
|
"grad_norm": 0.06590376440721327,
|
|
"learning_rate": 0.001666750319682897,
|
|
"loss": 2.5712,
|
|
"step": 10790
|
|
},
|
|
{
|
|
"epoch": 3.409697544025902,
|
|
"grad_norm": 0.07018738374191079,
|
|
"learning_rate": 0.0016663392353836897,
|
|
"loss": 2.6166,
|
|
"step": 10795
|
|
},
|
|
{
|
|
"epoch": 3.4112769485903813,
|
|
"grad_norm": 0.058437229908067226,
|
|
"learning_rate": 0.0016659279484549784,
|
|
"loss": 2.5166,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 3.4128563531548606,
|
|
"grad_norm": 0.06823378284757801,
|
|
"learning_rate": 0.0016655164590218324,
|
|
"loss": 2.5792,
|
|
"step": 10805
|
|
},
|
|
{
|
|
"epoch": 3.41443575771934,
|
|
"grad_norm": 0.08623700445295854,
|
|
"learning_rate": 0.0016651047672093834,
|
|
"loss": 2.5792,
|
|
"step": 10810
|
|
},
|
|
{
|
|
"epoch": 3.416015162283819,
|
|
"grad_norm": 0.06236875375960552,
|
|
"learning_rate": 0.0016646928731428238,
|
|
"loss": 2.587,
|
|
"step": 10815
|
|
},
|
|
{
|
|
"epoch": 3.4175945668482983,
|
|
"grad_norm": 0.0732907310475096,
|
|
"learning_rate": 0.001664280776947409,
|
|
"loss": 2.6817,
|
|
"step": 10820
|
|
},
|
|
{
|
|
"epoch": 3.4191739714127776,
|
|
"grad_norm": 0.07650204052590727,
|
|
"learning_rate": 0.0016638684787484536,
|
|
"loss": 2.6649,
|
|
"step": 10825
|
|
},
|
|
{
|
|
"epoch": 3.420753375977257,
|
|
"grad_norm": 0.06819073406905447,
|
|
"learning_rate": 0.001663455978671336,
|
|
"loss": 2.5444,
|
|
"step": 10830
|
|
},
|
|
{
|
|
"epoch": 3.4223327805417356,
|
|
"grad_norm": 0.06486268882268804,
|
|
"learning_rate": 0.0016630432768414936,
|
|
"loss": 2.5283,
|
|
"step": 10835
|
|
},
|
|
{
|
|
"epoch": 3.423912185106215,
|
|
"grad_norm": 0.08334601379334415,
|
|
"learning_rate": 0.0016626303733844273,
|
|
"loss": 2.638,
|
|
"step": 10840
|
|
},
|
|
{
|
|
"epoch": 3.425491589670694,
|
|
"grad_norm": 0.07005603968188574,
|
|
"learning_rate": 0.0016622172684256982,
|
|
"loss": 2.5851,
|
|
"step": 10845
|
|
},
|
|
{
|
|
"epoch": 3.4270709942351734,
|
|
"grad_norm": 0.07198131768784445,
|
|
"learning_rate": 0.0016618039620909285,
|
|
"loss": 2.5739,
|
|
"step": 10850
|
|
},
|
|
{
|
|
"epoch": 3.4286503987996526,
|
|
"grad_norm": 0.07960782279570881,
|
|
"learning_rate": 0.0016613904545058024,
|
|
"loss": 2.4816,
|
|
"step": 10855
|
|
},
|
|
{
|
|
"epoch": 3.430229803364132,
|
|
"grad_norm": 0.07389425043887975,
|
|
"learning_rate": 0.0016609767457960647,
|
|
"loss": 2.5397,
|
|
"step": 10860
|
|
},
|
|
{
|
|
"epoch": 3.431809207928611,
|
|
"grad_norm": 0.07297687012791151,
|
|
"learning_rate": 0.001660562836087522,
|
|
"loss": 2.6118,
|
|
"step": 10865
|
|
},
|
|
{
|
|
"epoch": 3.43338861249309,
|
|
"grad_norm": 0.06729323343472277,
|
|
"learning_rate": 0.0016601487255060415,
|
|
"loss": 2.5716,
|
|
"step": 10870
|
|
},
|
|
{
|
|
"epoch": 3.434968017057569,
|
|
"grad_norm": 0.07466788627005895,
|
|
"learning_rate": 0.0016597344141775507,
|
|
"loss": 2.5055,
|
|
"step": 10875
|
|
},
|
|
{
|
|
"epoch": 3.4365474216220484,
|
|
"grad_norm": 0.0669564025857904,
|
|
"learning_rate": 0.0016593199022280404,
|
|
"loss": 2.5711,
|
|
"step": 10880
|
|
},
|
|
{
|
|
"epoch": 3.4381268261865277,
|
|
"grad_norm": 0.0715612878194584,
|
|
"learning_rate": 0.0016589051897835598,
|
|
"loss": 2.5906,
|
|
"step": 10885
|
|
},
|
|
{
|
|
"epoch": 3.439706230751007,
|
|
"grad_norm": 0.06104033332557915,
|
|
"learning_rate": 0.0016584902769702212,
|
|
"loss": 2.5908,
|
|
"step": 10890
|
|
},
|
|
{
|
|
"epoch": 3.441285635315486,
|
|
"grad_norm": 0.09763031493678595,
|
|
"learning_rate": 0.0016580751639141964,
|
|
"loss": 2.6466,
|
|
"step": 10895
|
|
},
|
|
{
|
|
"epoch": 3.4428650398799654,
|
|
"grad_norm": 0.08888846981736682,
|
|
"learning_rate": 0.001657659850741719,
|
|
"loss": 2.6081,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 3.4444444444444446,
|
|
"grad_norm": 0.07025842112272405,
|
|
"learning_rate": 0.0016572443375790825,
|
|
"loss": 2.5426,
|
|
"step": 10905
|
|
},
|
|
{
|
|
"epoch": 3.4460238490089234,
|
|
"grad_norm": 0.07725833505892749,
|
|
"learning_rate": 0.0016568286245526424,
|
|
"loss": 2.6115,
|
|
"step": 10910
|
|
},
|
|
{
|
|
"epoch": 3.4476032535734027,
|
|
"grad_norm": 0.07330908357772506,
|
|
"learning_rate": 0.0016564127117888146,
|
|
"loss": 2.6395,
|
|
"step": 10915
|
|
},
|
|
{
|
|
"epoch": 3.449182658137882,
|
|
"grad_norm": 0.06651234838698582,
|
|
"learning_rate": 0.0016559965994140747,
|
|
"loss": 2.5544,
|
|
"step": 10920
|
|
},
|
|
{
|
|
"epoch": 3.450762062702361,
|
|
"grad_norm": 0.06484930161153007,
|
|
"learning_rate": 0.00165558028755496,
|
|
"loss": 2.5822,
|
|
"step": 10925
|
|
},
|
|
{
|
|
"epoch": 3.4523414672668404,
|
|
"grad_norm": 0.06158066171770069,
|
|
"learning_rate": 0.0016551637763380688,
|
|
"loss": 2.5769,
|
|
"step": 10930
|
|
},
|
|
{
|
|
"epoch": 3.4539208718313197,
|
|
"grad_norm": 0.06584894085517716,
|
|
"learning_rate": 0.0016547470658900593,
|
|
"loss": 2.5717,
|
|
"step": 10935
|
|
},
|
|
{
|
|
"epoch": 3.455500276395799,
|
|
"grad_norm": 0.07184458522706526,
|
|
"learning_rate": 0.0016543301563376497,
|
|
"loss": 2.4779,
|
|
"step": 10940
|
|
},
|
|
{
|
|
"epoch": 3.4570796809602777,
|
|
"grad_norm": 0.06801420016492464,
|
|
"learning_rate": 0.0016539130478076208,
|
|
"loss": 2.6033,
|
|
"step": 10945
|
|
},
|
|
{
|
|
"epoch": 3.458659085524757,
|
|
"grad_norm": 0.08292466353061236,
|
|
"learning_rate": 0.001653495740426812,
|
|
"loss": 2.5496,
|
|
"step": 10950
|
|
},
|
|
{
|
|
"epoch": 3.4602384900892362,
|
|
"grad_norm": 0.09120479222013012,
|
|
"learning_rate": 0.0016530782343221234,
|
|
"loss": 2.5863,
|
|
"step": 10955
|
|
},
|
|
{
|
|
"epoch": 3.4618178946537155,
|
|
"grad_norm": 0.07435860546314733,
|
|
"learning_rate": 0.0016526605296205167,
|
|
"loss": 2.6012,
|
|
"step": 10960
|
|
},
|
|
{
|
|
"epoch": 3.4633972992181947,
|
|
"grad_norm": 0.09409409753292039,
|
|
"learning_rate": 0.0016522426264490128,
|
|
"loss": 2.5539,
|
|
"step": 10965
|
|
},
|
|
{
|
|
"epoch": 3.464976703782674,
|
|
"grad_norm": 0.09574905329209961,
|
|
"learning_rate": 0.0016518245249346935,
|
|
"loss": 2.5819,
|
|
"step": 10970
|
|
},
|
|
{
|
|
"epoch": 3.466556108347153,
|
|
"grad_norm": 0.05757058226828307,
|
|
"learning_rate": 0.0016514062252047008,
|
|
"loss": 2.6066,
|
|
"step": 10975
|
|
},
|
|
{
|
|
"epoch": 3.4681355129116325,
|
|
"grad_norm": 0.06550947535422515,
|
|
"learning_rate": 0.0016509877273862368,
|
|
"loss": 2.5726,
|
|
"step": 10980
|
|
},
|
|
{
|
|
"epoch": 3.4697149174761117,
|
|
"grad_norm": 0.05960405730646586,
|
|
"learning_rate": 0.0016505690316065645,
|
|
"loss": 2.6718,
|
|
"step": 10985
|
|
},
|
|
{
|
|
"epoch": 3.4712943220405905,
|
|
"grad_norm": 0.08540369280925722,
|
|
"learning_rate": 0.0016501501379930063,
|
|
"loss": 2.657,
|
|
"step": 10990
|
|
},
|
|
{
|
|
"epoch": 3.4728737266050698,
|
|
"grad_norm": 0.07302920777857468,
|
|
"learning_rate": 0.0016497310466729448,
|
|
"loss": 2.643,
|
|
"step": 10995
|
|
},
|
|
{
|
|
"epoch": 3.474453131169549,
|
|
"grad_norm": 0.06878816207071971,
|
|
"learning_rate": 0.0016493117577738232,
|
|
"loss": 2.6026,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 3.4760325357340283,
|
|
"grad_norm": 0.05880351233861769,
|
|
"learning_rate": 0.0016488922714231451,
|
|
"loss": 2.7324,
|
|
"step": 11005
|
|
},
|
|
{
|
|
"epoch": 3.4776119402985075,
|
|
"grad_norm": 0.0753234680927505,
|
|
"learning_rate": 0.001648472587748473,
|
|
"loss": 2.4972,
|
|
"step": 11010
|
|
},
|
|
{
|
|
"epoch": 3.4791913448629868,
|
|
"grad_norm": 0.05936749176702982,
|
|
"learning_rate": 0.0016480527068774297,
|
|
"loss": 2.607,
|
|
"step": 11015
|
|
},
|
|
{
|
|
"epoch": 3.480770749427466,
|
|
"grad_norm": 0.06152356191409516,
|
|
"learning_rate": 0.001647632628937699,
|
|
"loss": 2.5759,
|
|
"step": 11020
|
|
},
|
|
{
|
|
"epoch": 3.482350153991945,
|
|
"grad_norm": 0.07671879144110781,
|
|
"learning_rate": 0.0016472123540570238,
|
|
"loss": 2.5654,
|
|
"step": 11025
|
|
},
|
|
{
|
|
"epoch": 3.483929558556424,
|
|
"grad_norm": 0.06140593598243999,
|
|
"learning_rate": 0.0016467918823632071,
|
|
"loss": 2.6347,
|
|
"step": 11030
|
|
},
|
|
{
|
|
"epoch": 3.4855089631209033,
|
|
"grad_norm": 0.08460096728720515,
|
|
"learning_rate": 0.0016463712139841112,
|
|
"loss": 2.6307,
|
|
"step": 11035
|
|
},
|
|
{
|
|
"epoch": 3.4870883676853826,
|
|
"grad_norm": 0.06913864655333987,
|
|
"learning_rate": 0.0016459503490476588,
|
|
"loss": 2.5789,
|
|
"step": 11040
|
|
},
|
|
{
|
|
"epoch": 3.488667772249862,
|
|
"grad_norm": 0.06560244455602209,
|
|
"learning_rate": 0.0016455292876818323,
|
|
"loss": 2.5676,
|
|
"step": 11045
|
|
},
|
|
{
|
|
"epoch": 3.490247176814341,
|
|
"grad_norm": 0.07174425583587295,
|
|
"learning_rate": 0.0016451080300146743,
|
|
"loss": 2.6417,
|
|
"step": 11050
|
|
},
|
|
{
|
|
"epoch": 3.4918265813788203,
|
|
"grad_norm": 0.07854517982881039,
|
|
"learning_rate": 0.0016446865761742858,
|
|
"loss": 2.5639,
|
|
"step": 11055
|
|
},
|
|
{
|
|
"epoch": 3.4934059859432995,
|
|
"grad_norm": 0.10798333983779836,
|
|
"learning_rate": 0.001644264926288828,
|
|
"loss": 2.5174,
|
|
"step": 11060
|
|
},
|
|
{
|
|
"epoch": 3.494985390507779,
|
|
"grad_norm": 0.07640438316275776,
|
|
"learning_rate": 0.0016438430804865231,
|
|
"loss": 2.5181,
|
|
"step": 11065
|
|
},
|
|
{
|
|
"epoch": 3.4965647950722576,
|
|
"grad_norm": 0.0661772313075206,
|
|
"learning_rate": 0.0016434210388956508,
|
|
"loss": 2.7018,
|
|
"step": 11070
|
|
},
|
|
{
|
|
"epoch": 3.498144199636737,
|
|
"grad_norm": 0.075491851589846,
|
|
"learning_rate": 0.0016429988016445516,
|
|
"loss": 2.6088,
|
|
"step": 11075
|
|
},
|
|
{
|
|
"epoch": 3.499723604201216,
|
|
"grad_norm": 0.06394737278526207,
|
|
"learning_rate": 0.0016425763688616248,
|
|
"loss": 2.6242,
|
|
"step": 11080
|
|
},
|
|
{
|
|
"epoch": 3.5013030087656953,
|
|
"grad_norm": 0.07171320380234876,
|
|
"learning_rate": 0.00164215374067533,
|
|
"loss": 2.5235,
|
|
"step": 11085
|
|
},
|
|
{
|
|
"epoch": 3.5028824133301746,
|
|
"grad_norm": 0.05872080150096808,
|
|
"learning_rate": 0.0016417309172141853,
|
|
"loss": 2.5743,
|
|
"step": 11090
|
|
},
|
|
{
|
|
"epoch": 3.504461817894654,
|
|
"grad_norm": 0.06144897925333228,
|
|
"learning_rate": 0.0016413078986067691,
|
|
"loss": 2.5211,
|
|
"step": 11095
|
|
},
|
|
{
|
|
"epoch": 3.5060412224591326,
|
|
"grad_norm": 0.0787162807272008,
|
|
"learning_rate": 0.0016408846849817183,
|
|
"loss": 2.6094,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 3.507620627023612,
|
|
"grad_norm": 0.07024488444314682,
|
|
"learning_rate": 0.0016404612764677293,
|
|
"loss": 2.5469,
|
|
"step": 11105
|
|
},
|
|
{
|
|
"epoch": 3.509200031588091,
|
|
"grad_norm": 0.06570148242570677,
|
|
"learning_rate": 0.0016400376731935584,
|
|
"loss": 2.5928,
|
|
"step": 11110
|
|
},
|
|
{
|
|
"epoch": 3.5107794361525704,
|
|
"grad_norm": 0.07249968344951528,
|
|
"learning_rate": 0.0016396138752880203,
|
|
"loss": 2.5755,
|
|
"step": 11115
|
|
},
|
|
{
|
|
"epoch": 3.5123588407170496,
|
|
"grad_norm": 0.07077072471171038,
|
|
"learning_rate": 0.0016391898828799895,
|
|
"loss": 2.6563,
|
|
"step": 11120
|
|
},
|
|
{
|
|
"epoch": 3.513938245281529,
|
|
"grad_norm": 0.06685448261888306,
|
|
"learning_rate": 0.001638765696098399,
|
|
"loss": 2.6498,
|
|
"step": 11125
|
|
},
|
|
{
|
|
"epoch": 3.515517649846008,
|
|
"grad_norm": 0.07346445807897378,
|
|
"learning_rate": 0.0016383413150722415,
|
|
"loss": 2.5116,
|
|
"step": 11130
|
|
},
|
|
{
|
|
"epoch": 3.5170970544104874,
|
|
"grad_norm": 0.07151114604498575,
|
|
"learning_rate": 0.0016379167399305685,
|
|
"loss": 2.6605,
|
|
"step": 11135
|
|
},
|
|
{
|
|
"epoch": 3.5186764589749666,
|
|
"grad_norm": 0.07658864985072805,
|
|
"learning_rate": 0.0016374919708024907,
|
|
"loss": 2.623,
|
|
"step": 11140
|
|
},
|
|
{
|
|
"epoch": 3.520255863539446,
|
|
"grad_norm": 0.06544775756343006,
|
|
"learning_rate": 0.001637067007817178,
|
|
"loss": 2.5293,
|
|
"step": 11145
|
|
},
|
|
{
|
|
"epoch": 3.5218352681039247,
|
|
"grad_norm": 0.09329441798346816,
|
|
"learning_rate": 0.001636641851103858,
|
|
"loss": 2.5974,
|
|
"step": 11150
|
|
},
|
|
{
|
|
"epoch": 3.523414672668404,
|
|
"grad_norm": 0.07482084483418168,
|
|
"learning_rate": 0.0016362165007918188,
|
|
"loss": 2.6311,
|
|
"step": 11155
|
|
},
|
|
{
|
|
"epoch": 3.524994077232883,
|
|
"grad_norm": 0.08007411148052147,
|
|
"learning_rate": 0.0016357909570104067,
|
|
"loss": 2.5713,
|
|
"step": 11160
|
|
},
|
|
{
|
|
"epoch": 3.5265734817973624,
|
|
"grad_norm": 0.07112811037320547,
|
|
"learning_rate": 0.001635365219889027,
|
|
"loss": 2.5326,
|
|
"step": 11165
|
|
},
|
|
{
|
|
"epoch": 3.5281528863618417,
|
|
"grad_norm": 0.06863240009007542,
|
|
"learning_rate": 0.0016349392895571434,
|
|
"loss": 2.477,
|
|
"step": 11170
|
|
},
|
|
{
|
|
"epoch": 3.529732290926321,
|
|
"grad_norm": 0.06905806940530339,
|
|
"learning_rate": 0.001634513166144278,
|
|
"loss": 2.6037,
|
|
"step": 11175
|
|
},
|
|
{
|
|
"epoch": 3.5313116954907997,
|
|
"grad_norm": 0.05760631608345864,
|
|
"learning_rate": 0.0016340868497800134,
|
|
"loss": 2.6011,
|
|
"step": 11180
|
|
},
|
|
{
|
|
"epoch": 3.532891100055279,
|
|
"grad_norm": 0.0759003904957396,
|
|
"learning_rate": 0.0016336603405939887,
|
|
"loss": 2.5307,
|
|
"step": 11185
|
|
},
|
|
{
|
|
"epoch": 3.534470504619758,
|
|
"grad_norm": 0.09208394390989348,
|
|
"learning_rate": 0.0016332336387159033,
|
|
"loss": 2.6075,
|
|
"step": 11190
|
|
},
|
|
{
|
|
"epoch": 3.5360499091842374,
|
|
"grad_norm": 0.08444672769151976,
|
|
"learning_rate": 0.001632806744275514,
|
|
"loss": 2.5832,
|
|
"step": 11195
|
|
},
|
|
{
|
|
"epoch": 3.5376293137487167,
|
|
"grad_norm": 0.07808974855886736,
|
|
"learning_rate": 0.0016323796574026369,
|
|
"loss": 2.5501,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 3.539208718313196,
|
|
"grad_norm": 0.06424278269039783,
|
|
"learning_rate": 0.001631952378227146,
|
|
"loss": 2.6595,
|
|
"step": 11205
|
|
},
|
|
{
|
|
"epoch": 3.540788122877675,
|
|
"grad_norm": 0.06820610119172915,
|
|
"learning_rate": 0.0016315249068789752,
|
|
"loss": 2.5796,
|
|
"step": 11210
|
|
},
|
|
{
|
|
"epoch": 3.5423675274421544,
|
|
"grad_norm": 0.06912948934517808,
|
|
"learning_rate": 0.001631097243488115,
|
|
"loss": 2.613,
|
|
"step": 11215
|
|
},
|
|
{
|
|
"epoch": 3.5439469320066337,
|
|
"grad_norm": 0.06574909061024409,
|
|
"learning_rate": 0.001630669388184615,
|
|
"loss": 2.5504,
|
|
"step": 11220
|
|
},
|
|
{
|
|
"epoch": 3.545526336571113,
|
|
"grad_norm": 0.07357098575986265,
|
|
"learning_rate": 0.0016302413410985838,
|
|
"loss": 2.5472,
|
|
"step": 11225
|
|
},
|
|
{
|
|
"epoch": 3.5471057411355917,
|
|
"grad_norm": 0.060739519425697566,
|
|
"learning_rate": 0.001629813102360187,
|
|
"loss": 2.6525,
|
|
"step": 11230
|
|
},
|
|
{
|
|
"epoch": 3.548685145700071,
|
|
"grad_norm": 0.07460214721162602,
|
|
"learning_rate": 0.0016293846720996505,
|
|
"loss": 2.5912,
|
|
"step": 11235
|
|
},
|
|
{
|
|
"epoch": 3.5502645502645502,
|
|
"grad_norm": 0.07731981043931724,
|
|
"learning_rate": 0.0016289560504472557,
|
|
"loss": 2.5973,
|
|
"step": 11240
|
|
},
|
|
{
|
|
"epoch": 3.5518439548290295,
|
|
"grad_norm": 0.06102617164948744,
|
|
"learning_rate": 0.001628527237533345,
|
|
"loss": 2.5308,
|
|
"step": 11245
|
|
},
|
|
{
|
|
"epoch": 3.5534233593935087,
|
|
"grad_norm": 0.075526968146759,
|
|
"learning_rate": 0.0016280982334883167,
|
|
"loss": 2.6218,
|
|
"step": 11250
|
|
},
|
|
{
|
|
"epoch": 3.555002763957988,
|
|
"grad_norm": 0.0739731983245525,
|
|
"learning_rate": 0.001627669038442629,
|
|
"loss": 2.6824,
|
|
"step": 11255
|
|
},
|
|
{
|
|
"epoch": 3.556582168522467,
|
|
"grad_norm": 0.06741919304797549,
|
|
"learning_rate": 0.0016272396525267969,
|
|
"loss": 2.5657,
|
|
"step": 11260
|
|
},
|
|
{
|
|
"epoch": 3.558161573086946,
|
|
"grad_norm": 0.07405767062357331,
|
|
"learning_rate": 0.001626810075871394,
|
|
"loss": 2.6026,
|
|
"step": 11265
|
|
},
|
|
{
|
|
"epoch": 3.5597409776514253,
|
|
"grad_norm": 0.06368924285850194,
|
|
"learning_rate": 0.0016263803086070522,
|
|
"loss": 2.5798,
|
|
"step": 11270
|
|
},
|
|
{
|
|
"epoch": 3.5613203822159045,
|
|
"grad_norm": 0.0636071014304687,
|
|
"learning_rate": 0.0016259503508644598,
|
|
"loss": 2.5691,
|
|
"step": 11275
|
|
},
|
|
{
|
|
"epoch": 3.5628997867803838,
|
|
"grad_norm": 0.06086408409466744,
|
|
"learning_rate": 0.0016255202027743655,
|
|
"loss": 2.5419,
|
|
"step": 11280
|
|
},
|
|
{
|
|
"epoch": 3.564479191344863,
|
|
"grad_norm": 0.07759136034435625,
|
|
"learning_rate": 0.0016250898644675743,
|
|
"loss": 2.5078,
|
|
"step": 11285
|
|
},
|
|
{
|
|
"epoch": 3.5660585959093423,
|
|
"grad_norm": 0.09729816531848431,
|
|
"learning_rate": 0.0016246593360749486,
|
|
"loss": 2.5457,
|
|
"step": 11290
|
|
},
|
|
{
|
|
"epoch": 3.5676380004738215,
|
|
"grad_norm": 0.074411179609676,
|
|
"learning_rate": 0.0016242286177274102,
|
|
"loss": 2.5908,
|
|
"step": 11295
|
|
},
|
|
{
|
|
"epoch": 3.5692174050383008,
|
|
"grad_norm": 0.06271173103153893,
|
|
"learning_rate": 0.0016237977095559374,
|
|
"loss": 2.6156,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 3.57079680960278,
|
|
"grad_norm": 0.07243411302383965,
|
|
"learning_rate": 0.0016233666116915665,
|
|
"loss": 2.555,
|
|
"step": 11305
|
|
},
|
|
{
|
|
"epoch": 3.572376214167259,
|
|
"grad_norm": 0.06233727605201847,
|
|
"learning_rate": 0.0016229353242653921,
|
|
"loss": 2.5773,
|
|
"step": 11310
|
|
},
|
|
{
|
|
"epoch": 3.573955618731738,
|
|
"grad_norm": 0.08847979897029239,
|
|
"learning_rate": 0.0016225038474085656,
|
|
"loss": 2.4884,
|
|
"step": 11315
|
|
},
|
|
{
|
|
"epoch": 3.5755350232962173,
|
|
"grad_norm": 0.0701888861826865,
|
|
"learning_rate": 0.001622072181252296,
|
|
"loss": 2.4875,
|
|
"step": 11320
|
|
},
|
|
{
|
|
"epoch": 3.5771144278606966,
|
|
"grad_norm": 0.06670567324457778,
|
|
"learning_rate": 0.0016216403259278513,
|
|
"loss": 2.5978,
|
|
"step": 11325
|
|
},
|
|
{
|
|
"epoch": 3.578693832425176,
|
|
"grad_norm": 0.06900210003160186,
|
|
"learning_rate": 0.0016212082815665549,
|
|
"loss": 2.5214,
|
|
"step": 11330
|
|
},
|
|
{
|
|
"epoch": 3.580273236989655,
|
|
"grad_norm": 0.056985066817711455,
|
|
"learning_rate": 0.0016207760482997889,
|
|
"loss": 2.6671,
|
|
"step": 11335
|
|
},
|
|
{
|
|
"epoch": 3.581852641554134,
|
|
"grad_norm": 0.07224028193115203,
|
|
"learning_rate": 0.0016203436262589928,
|
|
"loss": 2.52,
|
|
"step": 11340
|
|
},
|
|
{
|
|
"epoch": 3.583432046118613,
|
|
"grad_norm": 0.07025491795002735,
|
|
"learning_rate": 0.0016199110155756635,
|
|
"loss": 2.5668,
|
|
"step": 11345
|
|
},
|
|
{
|
|
"epoch": 3.5850114506830923,
|
|
"grad_norm": 0.08806917660000195,
|
|
"learning_rate": 0.0016194782163813555,
|
|
"loss": 2.5575,
|
|
"step": 11350
|
|
},
|
|
{
|
|
"epoch": 3.5865908552475716,
|
|
"grad_norm": 0.06733194550829404,
|
|
"learning_rate": 0.0016190452288076793,
|
|
"loss": 2.5797,
|
|
"step": 11355
|
|
},
|
|
{
|
|
"epoch": 3.588170259812051,
|
|
"grad_norm": 0.07468759375418589,
|
|
"learning_rate": 0.0016186120529863043,
|
|
"loss": 2.6095,
|
|
"step": 11360
|
|
},
|
|
{
|
|
"epoch": 3.58974966437653,
|
|
"grad_norm": 0.0682787771741682,
|
|
"learning_rate": 0.0016181786890489566,
|
|
"loss": 2.6025,
|
|
"step": 11365
|
|
},
|
|
{
|
|
"epoch": 3.5913290689410093,
|
|
"grad_norm": 0.09009403905876984,
|
|
"learning_rate": 0.0016177451371274195,
|
|
"loss": 2.5497,
|
|
"step": 11370
|
|
},
|
|
{
|
|
"epoch": 3.5929084735054886,
|
|
"grad_norm": 0.07012466714141395,
|
|
"learning_rate": 0.0016173113973535326,
|
|
"loss": 2.6256,
|
|
"step": 11375
|
|
},
|
|
{
|
|
"epoch": 3.594487878069968,
|
|
"grad_norm": 0.08249534596518228,
|
|
"learning_rate": 0.0016168774698591942,
|
|
"loss": 2.589,
|
|
"step": 11380
|
|
},
|
|
{
|
|
"epoch": 3.596067282634447,
|
|
"grad_norm": 0.0771951952443618,
|
|
"learning_rate": 0.0016164433547763584,
|
|
"loss": 2.6929,
|
|
"step": 11385
|
|
},
|
|
{
|
|
"epoch": 3.597646687198926,
|
|
"grad_norm": 0.07089184148912944,
|
|
"learning_rate": 0.001616009052237037,
|
|
"loss": 2.5692,
|
|
"step": 11390
|
|
},
|
|
{
|
|
"epoch": 3.599226091763405,
|
|
"grad_norm": 0.06721528818867126,
|
|
"learning_rate": 0.0016155745623732988,
|
|
"loss": 2.6388,
|
|
"step": 11395
|
|
},
|
|
{
|
|
"epoch": 3.6008054963278844,
|
|
"grad_norm": 0.08228419386883479,
|
|
"learning_rate": 0.0016151398853172687,
|
|
"loss": 2.6275,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 3.6023849008923636,
|
|
"grad_norm": 0.0655671463462765,
|
|
"learning_rate": 0.00161470502120113,
|
|
"loss": 2.6346,
|
|
"step": 11405
|
|
},
|
|
{
|
|
"epoch": 3.603964305456843,
|
|
"grad_norm": 0.06417851556288011,
|
|
"learning_rate": 0.0016142699701571217,
|
|
"loss": 2.5587,
|
|
"step": 11410
|
|
},
|
|
{
|
|
"epoch": 3.605543710021322,
|
|
"grad_norm": 0.07935220802349033,
|
|
"learning_rate": 0.0016138347323175401,
|
|
"loss": 2.5931,
|
|
"step": 11415
|
|
},
|
|
{
|
|
"epoch": 3.607123114585801,
|
|
"grad_norm": 0.06373656925396162,
|
|
"learning_rate": 0.001613399307814739,
|
|
"loss": 2.5363,
|
|
"step": 11420
|
|
},
|
|
{
|
|
"epoch": 3.60870251915028,
|
|
"grad_norm": 0.07720732300365005,
|
|
"learning_rate": 0.0016129636967811267,
|
|
"loss": 2.5865,
|
|
"step": 11425
|
|
},
|
|
{
|
|
"epoch": 3.6102819237147594,
|
|
"grad_norm": 0.061986669130409316,
|
|
"learning_rate": 0.0016125278993491708,
|
|
"loss": 2.585,
|
|
"step": 11430
|
|
},
|
|
{
|
|
"epoch": 3.6118613282792387,
|
|
"grad_norm": 0.07635466273186765,
|
|
"learning_rate": 0.0016120919156513943,
|
|
"loss": 2.6706,
|
|
"step": 11435
|
|
},
|
|
{
|
|
"epoch": 3.613440732843718,
|
|
"grad_norm": 0.07586918538839484,
|
|
"learning_rate": 0.001611655745820377,
|
|
"loss": 2.6386,
|
|
"step": 11440
|
|
},
|
|
{
|
|
"epoch": 3.615020137408197,
|
|
"grad_norm": 0.06798853097959028,
|
|
"learning_rate": 0.0016112193899887554,
|
|
"loss": 2.6089,
|
|
"step": 11445
|
|
},
|
|
{
|
|
"epoch": 3.6165995419726764,
|
|
"grad_norm": 0.08951585142582336,
|
|
"learning_rate": 0.0016107828482892223,
|
|
"loss": 2.5687,
|
|
"step": 11450
|
|
},
|
|
{
|
|
"epoch": 3.6181789465371557,
|
|
"grad_norm": 0.07872066117084035,
|
|
"learning_rate": 0.0016103461208545277,
|
|
"loss": 2.6284,
|
|
"step": 11455
|
|
},
|
|
{
|
|
"epoch": 3.619758351101635,
|
|
"grad_norm": 0.07150456315801824,
|
|
"learning_rate": 0.001609909207817477,
|
|
"loss": 2.473,
|
|
"step": 11460
|
|
},
|
|
{
|
|
"epoch": 3.6213377556661137,
|
|
"grad_norm": 0.08774636947086427,
|
|
"learning_rate": 0.0016094721093109334,
|
|
"loss": 2.5884,
|
|
"step": 11465
|
|
},
|
|
{
|
|
"epoch": 3.622917160230593,
|
|
"grad_norm": 0.07516109178896002,
|
|
"learning_rate": 0.0016090348254678153,
|
|
"loss": 2.5736,
|
|
"step": 11470
|
|
},
|
|
{
|
|
"epoch": 3.624496564795072,
|
|
"grad_norm": 0.0639443580037544,
|
|
"learning_rate": 0.001608597356421098,
|
|
"loss": 2.5588,
|
|
"step": 11475
|
|
},
|
|
{
|
|
"epoch": 3.6260759693595515,
|
|
"grad_norm": 0.0620553637240486,
|
|
"learning_rate": 0.001608159702303813,
|
|
"loss": 2.5642,
|
|
"step": 11480
|
|
},
|
|
{
|
|
"epoch": 3.6276553739240307,
|
|
"grad_norm": 0.06705275148777172,
|
|
"learning_rate": 0.0016077218632490483,
|
|
"loss": 2.6038,
|
|
"step": 11485
|
|
},
|
|
{
|
|
"epoch": 3.62923477848851,
|
|
"grad_norm": 0.0573434384133038,
|
|
"learning_rate": 0.0016072838393899477,
|
|
"loss": 2.5603,
|
|
"step": 11490
|
|
},
|
|
{
|
|
"epoch": 3.6308141830529888,
|
|
"grad_norm": 0.09390910657840079,
|
|
"learning_rate": 0.0016068456308597115,
|
|
"loss": 2.5916,
|
|
"step": 11495
|
|
},
|
|
{
|
|
"epoch": 3.632393587617468,
|
|
"grad_norm": 0.07769507200891466,
|
|
"learning_rate": 0.0016064072377915963,
|
|
"loss": 2.665,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 3.6339729921819472,
|
|
"grad_norm": 0.08015931577181488,
|
|
"learning_rate": 0.0016059686603189145,
|
|
"loss": 2.618,
|
|
"step": 11505
|
|
},
|
|
{
|
|
"epoch": 3.6355523967464265,
|
|
"grad_norm": 0.061839171520963104,
|
|
"learning_rate": 0.001605529898575035,
|
|
"loss": 2.648,
|
|
"step": 11510
|
|
},
|
|
{
|
|
"epoch": 3.6371318013109057,
|
|
"grad_norm": 0.06849901337162981,
|
|
"learning_rate": 0.0016050909526933819,
|
|
"loss": 2.6146,
|
|
"step": 11515
|
|
},
|
|
{
|
|
"epoch": 3.638711205875385,
|
|
"grad_norm": 0.06096462347936403,
|
|
"learning_rate": 0.001604651822807436,
|
|
"loss": 2.5761,
|
|
"step": 11520
|
|
},
|
|
{
|
|
"epoch": 3.6402906104398642,
|
|
"grad_norm": 0.07735773794461735,
|
|
"learning_rate": 0.0016042125090507343,
|
|
"loss": 2.6231,
|
|
"step": 11525
|
|
},
|
|
{
|
|
"epoch": 3.6418700150043435,
|
|
"grad_norm": 0.06445663947834945,
|
|
"learning_rate": 0.0016037730115568687,
|
|
"loss": 2.6138,
|
|
"step": 11530
|
|
},
|
|
{
|
|
"epoch": 3.6434494195688227,
|
|
"grad_norm": 0.06481551326652253,
|
|
"learning_rate": 0.0016033333304594883,
|
|
"loss": 2.4989,
|
|
"step": 11535
|
|
},
|
|
{
|
|
"epoch": 3.645028824133302,
|
|
"grad_norm": 0.07463445354677864,
|
|
"learning_rate": 0.0016028934658922967,
|
|
"loss": 2.5576,
|
|
"step": 11540
|
|
},
|
|
{
|
|
"epoch": 3.646608228697781,
|
|
"grad_norm": 0.05973603973950257,
|
|
"learning_rate": 0.001602453417989054,
|
|
"loss": 2.6742,
|
|
"step": 11545
|
|
},
|
|
{
|
|
"epoch": 3.64818763326226,
|
|
"grad_norm": 0.06700117854462165,
|
|
"learning_rate": 0.0016020131868835761,
|
|
"loss": 2.6284,
|
|
"step": 11550
|
|
},
|
|
{
|
|
"epoch": 3.6497670378267393,
|
|
"grad_norm": 0.07029568334101637,
|
|
"learning_rate": 0.0016015727727097348,
|
|
"loss": 2.562,
|
|
"step": 11555
|
|
},
|
|
{
|
|
"epoch": 3.6513464423912185,
|
|
"grad_norm": 0.07551743367060475,
|
|
"learning_rate": 0.0016011321756014565,
|
|
"loss": 2.612,
|
|
"step": 11560
|
|
},
|
|
{
|
|
"epoch": 3.6529258469556978,
|
|
"grad_norm": 0.07940481504751765,
|
|
"learning_rate": 0.0016006913956927243,
|
|
"loss": 2.5675,
|
|
"step": 11565
|
|
},
|
|
{
|
|
"epoch": 3.654505251520177,
|
|
"grad_norm": 0.06675841086201434,
|
|
"learning_rate": 0.0016002504331175769,
|
|
"loss": 2.5539,
|
|
"step": 11570
|
|
},
|
|
{
|
|
"epoch": 3.656084656084656,
|
|
"grad_norm": 0.08992331141368655,
|
|
"learning_rate": 0.0015998092880101075,
|
|
"loss": 2.6729,
|
|
"step": 11575
|
|
},
|
|
{
|
|
"epoch": 3.657664060649135,
|
|
"grad_norm": 0.08040146383919725,
|
|
"learning_rate": 0.0015993679605044663,
|
|
"loss": 2.6544,
|
|
"step": 11580
|
|
},
|
|
{
|
|
"epoch": 3.6592434652136143,
|
|
"grad_norm": 0.05876739234585907,
|
|
"learning_rate": 0.0015989264507348575,
|
|
"loss": 2.5403,
|
|
"step": 11585
|
|
},
|
|
{
|
|
"epoch": 3.6608228697780936,
|
|
"grad_norm": 0.07351498356217907,
|
|
"learning_rate": 0.001598484758835542,
|
|
"loss": 2.5323,
|
|
"step": 11590
|
|
},
|
|
{
|
|
"epoch": 3.662402274342573,
|
|
"grad_norm": 0.05913681391507762,
|
|
"learning_rate": 0.0015980428849408348,
|
|
"loss": 2.6195,
|
|
"step": 11595
|
|
},
|
|
{
|
|
"epoch": 3.663981678907052,
|
|
"grad_norm": 0.0581271070416612,
|
|
"learning_rate": 0.0015976008291851075,
|
|
"loss": 2.4767,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 3.6655610834715313,
|
|
"grad_norm": 0.07273962694824715,
|
|
"learning_rate": 0.0015971585917027862,
|
|
"loss": 2.5916,
|
|
"step": 11605
|
|
},
|
|
{
|
|
"epoch": 3.6671404880360106,
|
|
"grad_norm": 0.06637742106479563,
|
|
"learning_rate": 0.0015967161726283526,
|
|
"loss": 2.6021,
|
|
"step": 11610
|
|
},
|
|
{
|
|
"epoch": 3.66871989260049,
|
|
"grad_norm": 0.06949695544434134,
|
|
"learning_rate": 0.0015962735720963432,
|
|
"loss": 2.4988,
|
|
"step": 11615
|
|
},
|
|
{
|
|
"epoch": 3.670299297164969,
|
|
"grad_norm": 0.07110169966769231,
|
|
"learning_rate": 0.0015958307902413503,
|
|
"loss": 2.5254,
|
|
"step": 11620
|
|
},
|
|
{
|
|
"epoch": 3.671878701729448,
|
|
"grad_norm": 0.08016657262784661,
|
|
"learning_rate": 0.0015953878271980212,
|
|
"loss": 2.6278,
|
|
"step": 11625
|
|
},
|
|
{
|
|
"epoch": 3.673458106293927,
|
|
"grad_norm": 0.06057413148965004,
|
|
"learning_rate": 0.0015949446831010575,
|
|
"loss": 2.6439,
|
|
"step": 11630
|
|
},
|
|
{
|
|
"epoch": 3.6750375108584064,
|
|
"grad_norm": 0.07240177167192563,
|
|
"learning_rate": 0.001594501358085217,
|
|
"loss": 2.5542,
|
|
"step": 11635
|
|
},
|
|
{
|
|
"epoch": 3.6766169154228856,
|
|
"grad_norm": 0.060843434450803834,
|
|
"learning_rate": 0.001594057852285312,
|
|
"loss": 2.5908,
|
|
"step": 11640
|
|
},
|
|
{
|
|
"epoch": 3.678196319987365,
|
|
"grad_norm": 0.06525810592005284,
|
|
"learning_rate": 0.0015936141658362097,
|
|
"loss": 2.6601,
|
|
"step": 11645
|
|
},
|
|
{
|
|
"epoch": 3.679775724551844,
|
|
"grad_norm": 0.06321377368586852,
|
|
"learning_rate": 0.001593170298872832,
|
|
"loss": 2.6012,
|
|
"step": 11650
|
|
},
|
|
{
|
|
"epoch": 3.681355129116323,
|
|
"grad_norm": 0.06628932292665589,
|
|
"learning_rate": 0.0015927262515301565,
|
|
"loss": 2.5134,
|
|
"step": 11655
|
|
},
|
|
{
|
|
"epoch": 3.682934533680802,
|
|
"grad_norm": 0.06559326144054903,
|
|
"learning_rate": 0.001592282023943215,
|
|
"loss": 2.6767,
|
|
"step": 11660
|
|
},
|
|
{
|
|
"epoch": 3.6845139382452814,
|
|
"grad_norm": 0.05987102260069441,
|
|
"learning_rate": 0.001591837616247094,
|
|
"loss": 2.6319,
|
|
"step": 11665
|
|
},
|
|
{
|
|
"epoch": 3.6860933428097606,
|
|
"grad_norm": 0.06934301401894631,
|
|
"learning_rate": 0.0015913930285769355,
|
|
"loss": 2.5198,
|
|
"step": 11670
|
|
},
|
|
{
|
|
"epoch": 3.68767274737424,
|
|
"grad_norm": 0.10831757254750395,
|
|
"learning_rate": 0.0015909482610679353,
|
|
"loss": 2.5457,
|
|
"step": 11675
|
|
},
|
|
{
|
|
"epoch": 3.689252151938719,
|
|
"grad_norm": 0.08218091726770442,
|
|
"learning_rate": 0.0015905033138553448,
|
|
"loss": 2.524,
|
|
"step": 11680
|
|
},
|
|
{
|
|
"epoch": 3.6908315565031984,
|
|
"grad_norm": 0.06376451212701367,
|
|
"learning_rate": 0.0015900581870744693,
|
|
"loss": 2.4758,
|
|
"step": 11685
|
|
},
|
|
{
|
|
"epoch": 3.6924109610676776,
|
|
"grad_norm": 0.06430972855977783,
|
|
"learning_rate": 0.001589612880860669,
|
|
"loss": 2.5077,
|
|
"step": 11690
|
|
},
|
|
{
|
|
"epoch": 3.693990365632157,
|
|
"grad_norm": 0.06695416071184843,
|
|
"learning_rate": 0.0015891673953493588,
|
|
"loss": 2.568,
|
|
"step": 11695
|
|
},
|
|
{
|
|
"epoch": 3.695569770196636,
|
|
"grad_norm": 0.061729629801812624,
|
|
"learning_rate": 0.001588721730676008,
|
|
"loss": 2.5588,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 3.697149174761115,
|
|
"grad_norm": 0.05946372003089128,
|
|
"learning_rate": 0.0015882758869761404,
|
|
"loss": 2.4831,
|
|
"step": 11705
|
|
},
|
|
{
|
|
"epoch": 3.698728579325594,
|
|
"grad_norm": 0.0630251412675826,
|
|
"learning_rate": 0.001587829864385334,
|
|
"loss": 2.5141,
|
|
"step": 11710
|
|
},
|
|
{
|
|
"epoch": 3.7003079838900734,
|
|
"grad_norm": 0.05948929391037606,
|
|
"learning_rate": 0.0015873836630392218,
|
|
"loss": 2.5658,
|
|
"step": 11715
|
|
},
|
|
{
|
|
"epoch": 3.7018873884545527,
|
|
"grad_norm": 0.06547513854729227,
|
|
"learning_rate": 0.0015869372830734905,
|
|
"loss": 2.5818,
|
|
"step": 11720
|
|
},
|
|
{
|
|
"epoch": 3.703466793019032,
|
|
"grad_norm": 0.06746569379680042,
|
|
"learning_rate": 0.0015864907246238814,
|
|
"loss": 2.5168,
|
|
"step": 11725
|
|
},
|
|
{
|
|
"epoch": 3.705046197583511,
|
|
"grad_norm": 0.0774919132426451,
|
|
"learning_rate": 0.0015860439878261903,
|
|
"loss": 2.5841,
|
|
"step": 11730
|
|
},
|
|
{
|
|
"epoch": 3.70662560214799,
|
|
"grad_norm": 0.06471795811247055,
|
|
"learning_rate": 0.0015855970728162665,
|
|
"loss": 2.586,
|
|
"step": 11735
|
|
},
|
|
{
|
|
"epoch": 3.708205006712469,
|
|
"grad_norm": 0.08173911913035221,
|
|
"learning_rate": 0.0015851499797300149,
|
|
"loss": 2.5915,
|
|
"step": 11740
|
|
},
|
|
{
|
|
"epoch": 3.7097844112769485,
|
|
"grad_norm": 0.0698748086397385,
|
|
"learning_rate": 0.0015847027087033925,
|
|
"loss": 2.6078,
|
|
"step": 11745
|
|
},
|
|
{
|
|
"epoch": 3.7113638158414277,
|
|
"grad_norm": 0.058986179766953396,
|
|
"learning_rate": 0.0015842552598724123,
|
|
"loss": 2.6095,
|
|
"step": 11750
|
|
},
|
|
{
|
|
"epoch": 3.712943220405907,
|
|
"grad_norm": 0.06443992991199661,
|
|
"learning_rate": 0.0015838076333731406,
|
|
"loss": 2.6793,
|
|
"step": 11755
|
|
},
|
|
{
|
|
"epoch": 3.714522624970386,
|
|
"grad_norm": 0.06707207521817622,
|
|
"learning_rate": 0.0015833598293416979,
|
|
"loss": 2.5904,
|
|
"step": 11760
|
|
},
|
|
{
|
|
"epoch": 3.7161020295348655,
|
|
"grad_norm": 0.06908462552250887,
|
|
"learning_rate": 0.001582911847914258,
|
|
"loss": 2.6395,
|
|
"step": 11765
|
|
},
|
|
{
|
|
"epoch": 3.7176814340993447,
|
|
"grad_norm": 0.07953988614999913,
|
|
"learning_rate": 0.0015824636892270494,
|
|
"loss": 2.6607,
|
|
"step": 11770
|
|
},
|
|
{
|
|
"epoch": 3.719260838663824,
|
|
"grad_norm": 0.07122295263792892,
|
|
"learning_rate": 0.0015820153534163543,
|
|
"loss": 2.4373,
|
|
"step": 11775
|
|
},
|
|
{
|
|
"epoch": 3.720840243228303,
|
|
"grad_norm": 0.0791227253596731,
|
|
"learning_rate": 0.001581566840618509,
|
|
"loss": 2.5887,
|
|
"step": 11780
|
|
},
|
|
{
|
|
"epoch": 3.722419647792782,
|
|
"grad_norm": 0.08491488490909872,
|
|
"learning_rate": 0.0015811181509699033,
|
|
"loss": 2.5077,
|
|
"step": 11785
|
|
},
|
|
{
|
|
"epoch": 3.7239990523572613,
|
|
"grad_norm": 0.06909500785675694,
|
|
"learning_rate": 0.0015806692846069806,
|
|
"loss": 2.5626,
|
|
"step": 11790
|
|
},
|
|
{
|
|
"epoch": 3.7255784569217405,
|
|
"grad_norm": 0.08871884583658181,
|
|
"learning_rate": 0.0015802202416662383,
|
|
"loss": 2.4927,
|
|
"step": 11795
|
|
},
|
|
{
|
|
"epoch": 3.7271578614862197,
|
|
"grad_norm": 0.06983612739579836,
|
|
"learning_rate": 0.0015797710222842278,
|
|
"loss": 2.574,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 3.728737266050699,
|
|
"grad_norm": 0.07178637063925711,
|
|
"learning_rate": 0.0015793216265975539,
|
|
"loss": 2.5315,
|
|
"step": 11805
|
|
},
|
|
{
|
|
"epoch": 3.7303166706151782,
|
|
"grad_norm": 0.07198552612664164,
|
|
"learning_rate": 0.0015788720547428748,
|
|
"loss": 2.6055,
|
|
"step": 11810
|
|
},
|
|
{
|
|
"epoch": 3.731896075179657,
|
|
"grad_norm": 0.0834213905388001,
|
|
"learning_rate": 0.001578422306856902,
|
|
"loss": 2.5838,
|
|
"step": 11815
|
|
},
|
|
{
|
|
"epoch": 3.7334754797441363,
|
|
"grad_norm": 0.06497027284645868,
|
|
"learning_rate": 0.0015779723830764013,
|
|
"loss": 2.5466,
|
|
"step": 11820
|
|
},
|
|
{
|
|
"epoch": 3.7350548843086155,
|
|
"grad_norm": 0.08205936413753118,
|
|
"learning_rate": 0.0015775222835381917,
|
|
"loss": 2.4887,
|
|
"step": 11825
|
|
},
|
|
{
|
|
"epoch": 3.736634288873095,
|
|
"grad_norm": 0.07645597101149956,
|
|
"learning_rate": 0.001577072008379146,
|
|
"loss": 2.611,
|
|
"step": 11830
|
|
},
|
|
{
|
|
"epoch": 3.738213693437574,
|
|
"grad_norm": 0.06915056001015991,
|
|
"learning_rate": 0.001576621557736189,
|
|
"loss": 2.6185,
|
|
"step": 11835
|
|
},
|
|
{
|
|
"epoch": 3.7397930980020533,
|
|
"grad_norm": 0.06742031156171105,
|
|
"learning_rate": 0.001576170931746301,
|
|
"loss": 2.6044,
|
|
"step": 11840
|
|
},
|
|
{
|
|
"epoch": 3.7413725025665325,
|
|
"grad_norm": 0.08832787082109252,
|
|
"learning_rate": 0.0015757201305465133,
|
|
"loss": 2.6052,
|
|
"step": 11845
|
|
},
|
|
{
|
|
"epoch": 3.742951907131012,
|
|
"grad_norm": 0.06210399759957026,
|
|
"learning_rate": 0.0015752691542739129,
|
|
"loss": 2.551,
|
|
"step": 11850
|
|
},
|
|
{
|
|
"epoch": 3.744531311695491,
|
|
"grad_norm": 0.07501130509152855,
|
|
"learning_rate": 0.0015748180030656376,
|
|
"loss": 2.6258,
|
|
"step": 11855
|
|
},
|
|
{
|
|
"epoch": 3.74611071625997,
|
|
"grad_norm": 0.06848979870150981,
|
|
"learning_rate": 0.0015743666770588805,
|
|
"loss": 2.5335,
|
|
"step": 11860
|
|
},
|
|
{
|
|
"epoch": 3.747690120824449,
|
|
"grad_norm": 0.07729824208655418,
|
|
"learning_rate": 0.0015739151763908867,
|
|
"loss": 2.5208,
|
|
"step": 11865
|
|
},
|
|
{
|
|
"epoch": 3.7492695253889283,
|
|
"grad_norm": 0.06169126876014135,
|
|
"learning_rate": 0.0015734635011989545,
|
|
"loss": 2.5912,
|
|
"step": 11870
|
|
},
|
|
{
|
|
"epoch": 3.7508489299534076,
|
|
"grad_norm": 0.06117099440365994,
|
|
"learning_rate": 0.0015730116516204354,
|
|
"loss": 2.5647,
|
|
"step": 11875
|
|
},
|
|
{
|
|
"epoch": 3.752428334517887,
|
|
"grad_norm": 0.07141550473164313,
|
|
"learning_rate": 0.0015725596277927343,
|
|
"loss": 2.5888,
|
|
"step": 11880
|
|
},
|
|
{
|
|
"epoch": 3.754007739082366,
|
|
"grad_norm": 0.06802374034691881,
|
|
"learning_rate": 0.0015721074298533084,
|
|
"loss": 2.6387,
|
|
"step": 11885
|
|
},
|
|
{
|
|
"epoch": 3.755587143646845,
|
|
"grad_norm": 0.07395997757949198,
|
|
"learning_rate": 0.0015716550579396684,
|
|
"loss": 2.6458,
|
|
"step": 11890
|
|
},
|
|
{
|
|
"epoch": 3.757166548211324,
|
|
"grad_norm": 0.07221727705366908,
|
|
"learning_rate": 0.001571202512189378,
|
|
"loss": 2.587,
|
|
"step": 11895
|
|
},
|
|
{
|
|
"epoch": 3.7587459527758034,
|
|
"grad_norm": 0.06499442066371568,
|
|
"learning_rate": 0.0015707497927400528,
|
|
"loss": 2.5284,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 3.7603253573402826,
|
|
"grad_norm": 0.0763606951031882,
|
|
"learning_rate": 0.0015702968997293625,
|
|
"loss": 2.6571,
|
|
"step": 11905
|
|
},
|
|
{
|
|
"epoch": 3.761904761904762,
|
|
"grad_norm": 0.07570530757186843,
|
|
"learning_rate": 0.0015698438332950287,
|
|
"loss": 2.5307,
|
|
"step": 11910
|
|
},
|
|
{
|
|
"epoch": 3.763484166469241,
|
|
"grad_norm": 0.11453605716363023,
|
|
"learning_rate": 0.0015693905935748262,
|
|
"loss": 2.4929,
|
|
"step": 11915
|
|
},
|
|
{
|
|
"epoch": 3.7650635710337204,
|
|
"grad_norm": 0.07181105280729862,
|
|
"learning_rate": 0.0015689371807065815,
|
|
"loss": 2.6071,
|
|
"step": 11920
|
|
},
|
|
{
|
|
"epoch": 3.7666429755981996,
|
|
"grad_norm": 0.08144796201140979,
|
|
"learning_rate": 0.0015684835948281757,
|
|
"loss": 2.5995,
|
|
"step": 11925
|
|
},
|
|
{
|
|
"epoch": 3.768222380162679,
|
|
"grad_norm": 0.06860324974122771,
|
|
"learning_rate": 0.0015680298360775406,
|
|
"loss": 2.6382,
|
|
"step": 11930
|
|
},
|
|
{
|
|
"epoch": 3.769801784727158,
|
|
"grad_norm": 0.07704279863985637,
|
|
"learning_rate": 0.001567575904592662,
|
|
"loss": 2.6116,
|
|
"step": 11935
|
|
},
|
|
{
|
|
"epoch": 3.771381189291637,
|
|
"grad_norm": 0.06734152458036485,
|
|
"learning_rate": 0.0015671218005115766,
|
|
"loss": 2.4517,
|
|
"step": 11940
|
|
},
|
|
{
|
|
"epoch": 3.772960593856116,
|
|
"grad_norm": 0.06861923074644795,
|
|
"learning_rate": 0.0015666675239723756,
|
|
"loss": 2.5049,
|
|
"step": 11945
|
|
},
|
|
{
|
|
"epoch": 3.7745399984205954,
|
|
"grad_norm": 0.06391851461061915,
|
|
"learning_rate": 0.0015662130751132007,
|
|
"loss": 2.509,
|
|
"step": 11950
|
|
},
|
|
{
|
|
"epoch": 3.7761194029850746,
|
|
"grad_norm": 0.08905207752274513,
|
|
"learning_rate": 0.0015657584540722477,
|
|
"loss": 2.591,
|
|
"step": 11955
|
|
},
|
|
{
|
|
"epoch": 3.777698807549554,
|
|
"grad_norm": 0.07245118648464396,
|
|
"learning_rate": 0.001565303660987763,
|
|
"loss": 2.5828,
|
|
"step": 11960
|
|
},
|
|
{
|
|
"epoch": 3.779278212114033,
|
|
"grad_norm": 0.054955350922944506,
|
|
"learning_rate": 0.0015648486959980471,
|
|
"loss": 2.4764,
|
|
"step": 11965
|
|
},
|
|
{
|
|
"epoch": 3.780857616678512,
|
|
"grad_norm": 0.0701877737144574,
|
|
"learning_rate": 0.0015643935592414518,
|
|
"loss": 2.5812,
|
|
"step": 11970
|
|
},
|
|
{
|
|
"epoch": 3.782437021242991,
|
|
"grad_norm": 0.06911041937893157,
|
|
"learning_rate": 0.001563938250856381,
|
|
"loss": 2.5525,
|
|
"step": 11975
|
|
},
|
|
{
|
|
"epoch": 3.7840164258074704,
|
|
"grad_norm": 0.08435506406574682,
|
|
"learning_rate": 0.0015634827709812913,
|
|
"loss": 2.6127,
|
|
"step": 11980
|
|
},
|
|
{
|
|
"epoch": 3.7855958303719497,
|
|
"grad_norm": 0.06339153276038305,
|
|
"learning_rate": 0.001563027119754691,
|
|
"loss": 2.4767,
|
|
"step": 11985
|
|
},
|
|
{
|
|
"epoch": 3.787175234936429,
|
|
"grad_norm": 0.07001504227452558,
|
|
"learning_rate": 0.0015625712973151408,
|
|
"loss": 2.5805,
|
|
"step": 11990
|
|
},
|
|
{
|
|
"epoch": 3.788754639500908,
|
|
"grad_norm": 0.0809836856598099,
|
|
"learning_rate": 0.0015621153038012539,
|
|
"loss": 2.5752,
|
|
"step": 11995
|
|
},
|
|
{
|
|
"epoch": 3.7903340440653874,
|
|
"grad_norm": 0.08150940153250924,
|
|
"learning_rate": 0.0015616591393516944,
|
|
"loss": 2.6091,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 3.7919134486298667,
|
|
"grad_norm": 0.07727453120069605,
|
|
"learning_rate": 0.001561202804105179,
|
|
"loss": 2.5872,
|
|
"step": 12005
|
|
},
|
|
{
|
|
"epoch": 3.793492853194346,
|
|
"grad_norm": 0.08644028462606336,
|
|
"learning_rate": 0.0015607462982004763,
|
|
"loss": 2.5823,
|
|
"step": 12010
|
|
},
|
|
{
|
|
"epoch": 3.795072257758825,
|
|
"grad_norm": 0.06867645950982325,
|
|
"learning_rate": 0.0015602896217764073,
|
|
"loss": 2.5743,
|
|
"step": 12015
|
|
},
|
|
{
|
|
"epoch": 3.796651662323304,
|
|
"grad_norm": 0.06724634687778647,
|
|
"learning_rate": 0.0015598327749718442,
|
|
"loss": 2.5859,
|
|
"step": 12020
|
|
},
|
|
{
|
|
"epoch": 3.7982310668877832,
|
|
"grad_norm": 0.08950770334640352,
|
|
"learning_rate": 0.001559375757925711,
|
|
"loss": 2.587,
|
|
"step": 12025
|
|
},
|
|
{
|
|
"epoch": 3.7998104714522625,
|
|
"grad_norm": 0.0637875610038583,
|
|
"learning_rate": 0.0015589185707769837,
|
|
"loss": 2.5481,
|
|
"step": 12030
|
|
},
|
|
{
|
|
"epoch": 3.8013898760167417,
|
|
"grad_norm": 0.06560210334509306,
|
|
"learning_rate": 0.0015584612136646898,
|
|
"loss": 2.5254,
|
|
"step": 12035
|
|
},
|
|
{
|
|
"epoch": 3.802969280581221,
|
|
"grad_norm": 0.06735440160501718,
|
|
"learning_rate": 0.0015580036867279094,
|
|
"loss": 2.5454,
|
|
"step": 12040
|
|
},
|
|
{
|
|
"epoch": 3.8045486851457,
|
|
"grad_norm": 0.07069421391081696,
|
|
"learning_rate": 0.001557545990105773,
|
|
"loss": 2.5071,
|
|
"step": 12045
|
|
},
|
|
{
|
|
"epoch": 3.806128089710179,
|
|
"grad_norm": 0.062099328451962206,
|
|
"learning_rate": 0.0015570881239374632,
|
|
"loss": 2.5911,
|
|
"step": 12050
|
|
},
|
|
{
|
|
"epoch": 3.8077074942746583,
|
|
"grad_norm": 0.06056417644853614,
|
|
"learning_rate": 0.001556630088362214,
|
|
"loss": 2.628,
|
|
"step": 12055
|
|
},
|
|
{
|
|
"epoch": 3.8092868988391375,
|
|
"grad_norm": 0.06895650617867606,
|
|
"learning_rate": 0.0015561718835193118,
|
|
"loss": 2.6498,
|
|
"step": 12060
|
|
},
|
|
{
|
|
"epoch": 3.8108663034036168,
|
|
"grad_norm": 0.06309650874937076,
|
|
"learning_rate": 0.001555713509548093,
|
|
"loss": 2.5595,
|
|
"step": 12065
|
|
},
|
|
{
|
|
"epoch": 3.812445707968096,
|
|
"grad_norm": 0.06385696409028277,
|
|
"learning_rate": 0.0015552549665879462,
|
|
"loss": 2.5541,
|
|
"step": 12070
|
|
},
|
|
{
|
|
"epoch": 3.8140251125325753,
|
|
"grad_norm": 0.08318600724534689,
|
|
"learning_rate": 0.0015547962547783124,
|
|
"loss": 2.605,
|
|
"step": 12075
|
|
},
|
|
{
|
|
"epoch": 3.8156045170970545,
|
|
"grad_norm": 0.0644446170959427,
|
|
"learning_rate": 0.0015543373742586816,
|
|
"loss": 2.5562,
|
|
"step": 12080
|
|
},
|
|
{
|
|
"epoch": 3.8171839216615338,
|
|
"grad_norm": 0.06254069992465562,
|
|
"learning_rate": 0.0015538783251685972,
|
|
"loss": 2.4745,
|
|
"step": 12085
|
|
},
|
|
{
|
|
"epoch": 3.818763326226013,
|
|
"grad_norm": 0.0627387511682279,
|
|
"learning_rate": 0.001553419107647653,
|
|
"loss": 2.6228,
|
|
"step": 12090
|
|
},
|
|
{
|
|
"epoch": 3.8203427307904922,
|
|
"grad_norm": 0.05981773066384552,
|
|
"learning_rate": 0.001552959721835494,
|
|
"loss": 2.5832,
|
|
"step": 12095
|
|
},
|
|
{
|
|
"epoch": 3.821922135354971,
|
|
"grad_norm": 0.0692513869855841,
|
|
"learning_rate": 0.0015525001678718168,
|
|
"loss": 2.5631,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 3.8235015399194503,
|
|
"grad_norm": 0.074388575687594,
|
|
"learning_rate": 0.0015520404458963684,
|
|
"loss": 2.6013,
|
|
"step": 12105
|
|
},
|
|
{
|
|
"epoch": 3.8250809444839295,
|
|
"grad_norm": 0.06664639223608652,
|
|
"learning_rate": 0.0015515805560489474,
|
|
"loss": 2.4846,
|
|
"step": 12110
|
|
},
|
|
{
|
|
"epoch": 3.826660349048409,
|
|
"grad_norm": 0.07384557080674342,
|
|
"learning_rate": 0.0015511204984694036,
|
|
"loss": 2.7205,
|
|
"step": 12115
|
|
},
|
|
{
|
|
"epoch": 3.828239753612888,
|
|
"grad_norm": 0.07225928430492333,
|
|
"learning_rate": 0.0015506602732976373,
|
|
"loss": 2.5327,
|
|
"step": 12120
|
|
},
|
|
{
|
|
"epoch": 3.8298191581773673,
|
|
"grad_norm": 0.06574503198115073,
|
|
"learning_rate": 0.0015501998806736002,
|
|
"loss": 2.5849,
|
|
"step": 12125
|
|
},
|
|
{
|
|
"epoch": 3.831398562741846,
|
|
"grad_norm": 0.08582669309979607,
|
|
"learning_rate": 0.0015497393207372946,
|
|
"loss": 2.5385,
|
|
"step": 12130
|
|
},
|
|
{
|
|
"epoch": 3.8329779673063253,
|
|
"grad_norm": 0.06837292773786788,
|
|
"learning_rate": 0.0015492785936287742,
|
|
"loss": 2.5878,
|
|
"step": 12135
|
|
},
|
|
{
|
|
"epoch": 3.8345573718708046,
|
|
"grad_norm": 0.07273565814600343,
|
|
"learning_rate": 0.0015488176994881428,
|
|
"loss": 2.5865,
|
|
"step": 12140
|
|
},
|
|
{
|
|
"epoch": 3.836136776435284,
|
|
"grad_norm": 0.07224334715060578,
|
|
"learning_rate": 0.0015483566384555556,
|
|
"loss": 2.5363,
|
|
"step": 12145
|
|
},
|
|
{
|
|
"epoch": 3.837716180999763,
|
|
"grad_norm": 0.0845990886678342,
|
|
"learning_rate": 0.001547895410671218,
|
|
"loss": 2.5515,
|
|
"step": 12150
|
|
},
|
|
{
|
|
"epoch": 3.8392955855642423,
|
|
"grad_norm": 0.06549368814306274,
|
|
"learning_rate": 0.0015474340162753867,
|
|
"loss": 2.5401,
|
|
"step": 12155
|
|
},
|
|
{
|
|
"epoch": 3.8408749901287216,
|
|
"grad_norm": 0.06806248391684777,
|
|
"learning_rate": 0.0015469724554083685,
|
|
"loss": 2.5636,
|
|
"step": 12160
|
|
},
|
|
{
|
|
"epoch": 3.842454394693201,
|
|
"grad_norm": 0.06279417772461118,
|
|
"learning_rate": 0.0015465107282105217,
|
|
"loss": 2.5114,
|
|
"step": 12165
|
|
},
|
|
{
|
|
"epoch": 3.84403379925768,
|
|
"grad_norm": 0.0655147853109739,
|
|
"learning_rate": 0.0015460488348222538,
|
|
"loss": 2.5706,
|
|
"step": 12170
|
|
},
|
|
{
|
|
"epoch": 3.8456132038221593,
|
|
"grad_norm": 0.06907197386454811,
|
|
"learning_rate": 0.0015455867753840242,
|
|
"loss": 2.5016,
|
|
"step": 12175
|
|
},
|
|
{
|
|
"epoch": 3.847192608386638,
|
|
"grad_norm": 0.0736423521161705,
|
|
"learning_rate": 0.0015451245500363421,
|
|
"loss": 2.5645,
|
|
"step": 12180
|
|
},
|
|
{
|
|
"epoch": 3.8487720129511174,
|
|
"grad_norm": 0.07886210741275858,
|
|
"learning_rate": 0.0015446621589197674,
|
|
"loss": 2.5423,
|
|
"step": 12185
|
|
},
|
|
{
|
|
"epoch": 3.8503514175155966,
|
|
"grad_norm": 0.08844057753505022,
|
|
"learning_rate": 0.0015441996021749098,
|
|
"loss": 2.5896,
|
|
"step": 12190
|
|
},
|
|
{
|
|
"epoch": 3.851930822080076,
|
|
"grad_norm": 0.0719920887267598,
|
|
"learning_rate": 0.0015437368799424305,
|
|
"loss": 2.5224,
|
|
"step": 12195
|
|
},
|
|
{
|
|
"epoch": 3.853510226644555,
|
|
"grad_norm": 0.07033026347955103,
|
|
"learning_rate": 0.0015432739923630398,
|
|
"loss": 2.6151,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 3.855089631209034,
|
|
"grad_norm": 0.06221780697286292,
|
|
"learning_rate": 0.0015428109395774993,
|
|
"loss": 2.5101,
|
|
"step": 12205
|
|
},
|
|
{
|
|
"epoch": 3.856669035773513,
|
|
"grad_norm": 0.07062749021279034,
|
|
"learning_rate": 0.0015423477217266198,
|
|
"loss": 2.6095,
|
|
"step": 12210
|
|
},
|
|
{
|
|
"epoch": 3.8582484403379924,
|
|
"grad_norm": 0.06299103067926018,
|
|
"learning_rate": 0.0015418843389512636,
|
|
"loss": 2.5005,
|
|
"step": 12215
|
|
},
|
|
{
|
|
"epoch": 3.8598278449024717,
|
|
"grad_norm": 0.06589410137986898,
|
|
"learning_rate": 0.001541420791392342,
|
|
"loss": 2.5212,
|
|
"step": 12220
|
|
},
|
|
{
|
|
"epoch": 3.861407249466951,
|
|
"grad_norm": 0.06971836408575503,
|
|
"learning_rate": 0.001540957079190817,
|
|
"loss": 2.5891,
|
|
"step": 12225
|
|
},
|
|
{
|
|
"epoch": 3.86298665403143,
|
|
"grad_norm": 0.06266695473473892,
|
|
"learning_rate": 0.0015404932024877006,
|
|
"loss": 2.5676,
|
|
"step": 12230
|
|
},
|
|
{
|
|
"epoch": 3.8645660585959094,
|
|
"grad_norm": 0.059653893509713715,
|
|
"learning_rate": 0.0015400291614240543,
|
|
"loss": 2.5693,
|
|
"step": 12235
|
|
},
|
|
{
|
|
"epoch": 3.8661454631603887,
|
|
"grad_norm": 0.06475486572029057,
|
|
"learning_rate": 0.0015395649561409904,
|
|
"loss": 2.5376,
|
|
"step": 12240
|
|
},
|
|
{
|
|
"epoch": 3.867724867724868,
|
|
"grad_norm": 0.0706062062003792,
|
|
"learning_rate": 0.001539100586779671,
|
|
"loss": 2.5812,
|
|
"step": 12245
|
|
},
|
|
{
|
|
"epoch": 3.869304272289347,
|
|
"grad_norm": 0.06582491956953788,
|
|
"learning_rate": 0.0015386360534813078,
|
|
"loss": 2.5246,
|
|
"step": 12250
|
|
},
|
|
{
|
|
"epoch": 3.870883676853826,
|
|
"grad_norm": 0.06774194262370492,
|
|
"learning_rate": 0.0015381713563871616,
|
|
"loss": 2.6078,
|
|
"step": 12255
|
|
},
|
|
{
|
|
"epoch": 3.872463081418305,
|
|
"grad_norm": 0.08581647240071012,
|
|
"learning_rate": 0.0015377064956385445,
|
|
"loss": 2.5637,
|
|
"step": 12260
|
|
},
|
|
{
|
|
"epoch": 3.8740424859827844,
|
|
"grad_norm": 0.07227433739993022,
|
|
"learning_rate": 0.0015372414713768175,
|
|
"loss": 2.507,
|
|
"step": 12265
|
|
},
|
|
{
|
|
"epoch": 3.8756218905472637,
|
|
"grad_norm": 0.07068266880914315,
|
|
"learning_rate": 0.001536776283743392,
|
|
"loss": 2.5054,
|
|
"step": 12270
|
|
},
|
|
{
|
|
"epoch": 3.877201295111743,
|
|
"grad_norm": 0.06252227417494192,
|
|
"learning_rate": 0.001536310932879728,
|
|
"loss": 2.5033,
|
|
"step": 12275
|
|
},
|
|
{
|
|
"epoch": 3.878780699676222,
|
|
"grad_norm": 0.06688104956127731,
|
|
"learning_rate": 0.0015358454189273358,
|
|
"loss": 2.7985,
|
|
"step": 12280
|
|
},
|
|
{
|
|
"epoch": 3.880360104240701,
|
|
"grad_norm": 0.05763098920186217,
|
|
"learning_rate": 0.0015353797420277753,
|
|
"loss": 2.5103,
|
|
"step": 12285
|
|
},
|
|
{
|
|
"epoch": 3.8819395088051802,
|
|
"grad_norm": 0.06888282622425737,
|
|
"learning_rate": 0.0015349139023226562,
|
|
"loss": 2.5494,
|
|
"step": 12290
|
|
},
|
|
{
|
|
"epoch": 3.8835189133696595,
|
|
"grad_norm": 0.06682425490074745,
|
|
"learning_rate": 0.0015344478999536366,
|
|
"loss": 2.5212,
|
|
"step": 12295
|
|
},
|
|
{
|
|
"epoch": 3.8850983179341387,
|
|
"grad_norm": 0.05722987794976806,
|
|
"learning_rate": 0.0015339817350624257,
|
|
"loss": 2.6008,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 3.886677722498618,
|
|
"grad_norm": 0.0573207391101277,
|
|
"learning_rate": 0.0015335154077907808,
|
|
"loss": 2.5186,
|
|
"step": 12305
|
|
},
|
|
{
|
|
"epoch": 3.8882571270630972,
|
|
"grad_norm": 0.06763657629014941,
|
|
"learning_rate": 0.0015330489182805087,
|
|
"loss": 2.5378,
|
|
"step": 12310
|
|
},
|
|
{
|
|
"epoch": 3.8898365316275765,
|
|
"grad_norm": 0.07187086440873063,
|
|
"learning_rate": 0.001532582266673467,
|
|
"loss": 2.5675,
|
|
"step": 12315
|
|
},
|
|
{
|
|
"epoch": 3.8914159361920557,
|
|
"grad_norm": 0.07420773877218477,
|
|
"learning_rate": 0.0015321154531115601,
|
|
"loss": 2.6274,
|
|
"step": 12320
|
|
},
|
|
{
|
|
"epoch": 3.892995340756535,
|
|
"grad_norm": 0.07927209871341649,
|
|
"learning_rate": 0.001531648477736744,
|
|
"loss": 2.5448,
|
|
"step": 12325
|
|
},
|
|
{
|
|
"epoch": 3.894574745321014,
|
|
"grad_norm": 0.08244381956489737,
|
|
"learning_rate": 0.0015311813406910224,
|
|
"loss": 2.4763,
|
|
"step": 12330
|
|
},
|
|
{
|
|
"epoch": 3.896154149885493,
|
|
"grad_norm": 0.0899563297138929,
|
|
"learning_rate": 0.001530714042116449,
|
|
"loss": 2.6179,
|
|
"step": 12335
|
|
},
|
|
{
|
|
"epoch": 3.8977335544499723,
|
|
"grad_norm": 0.07584948909618364,
|
|
"learning_rate": 0.0015302465821551267,
|
|
"loss": 2.546,
|
|
"step": 12340
|
|
},
|
|
{
|
|
"epoch": 3.8993129590144515,
|
|
"grad_norm": 0.06545533095786493,
|
|
"learning_rate": 0.0015297789609492061,
|
|
"loss": 2.5659,
|
|
"step": 12345
|
|
},
|
|
{
|
|
"epoch": 3.9008923635789308,
|
|
"grad_norm": 0.07894341187855289,
|
|
"learning_rate": 0.0015293111786408883,
|
|
"loss": 2.5599,
|
|
"step": 12350
|
|
},
|
|
{
|
|
"epoch": 3.90247176814341,
|
|
"grad_norm": 0.062169262244883804,
|
|
"learning_rate": 0.0015288432353724232,
|
|
"loss": 2.4791,
|
|
"step": 12355
|
|
},
|
|
{
|
|
"epoch": 3.9040511727078893,
|
|
"grad_norm": 0.06556902494812732,
|
|
"learning_rate": 0.0015283751312861092,
|
|
"loss": 2.6097,
|
|
"step": 12360
|
|
},
|
|
{
|
|
"epoch": 3.905630577272368,
|
|
"grad_norm": 0.06491974805319319,
|
|
"learning_rate": 0.0015279068665242934,
|
|
"loss": 2.6049,
|
|
"step": 12365
|
|
},
|
|
{
|
|
"epoch": 3.9072099818368473,
|
|
"grad_norm": 0.07259381531482005,
|
|
"learning_rate": 0.0015274384412293722,
|
|
"loss": 2.5192,
|
|
"step": 12370
|
|
},
|
|
{
|
|
"epoch": 3.9087893864013266,
|
|
"grad_norm": 0.08063144546357277,
|
|
"learning_rate": 0.0015269698555437912,
|
|
"loss": 2.5991,
|
|
"step": 12375
|
|
},
|
|
{
|
|
"epoch": 3.910368790965806,
|
|
"grad_norm": 0.07107690741998134,
|
|
"learning_rate": 0.001526501109610044,
|
|
"loss": 2.5397,
|
|
"step": 12380
|
|
},
|
|
{
|
|
"epoch": 3.911948195530285,
|
|
"grad_norm": 0.05530165601623058,
|
|
"learning_rate": 0.0015260322035706732,
|
|
"loss": 2.5563,
|
|
"step": 12385
|
|
},
|
|
{
|
|
"epoch": 3.9135276000947643,
|
|
"grad_norm": 0.06966379636017452,
|
|
"learning_rate": 0.00152556313756827,
|
|
"loss": 2.5813,
|
|
"step": 12390
|
|
},
|
|
{
|
|
"epoch": 3.9151070046592436,
|
|
"grad_norm": 0.06841110650186709,
|
|
"learning_rate": 0.001525093911745475,
|
|
"loss": 2.5373,
|
|
"step": 12395
|
|
},
|
|
{
|
|
"epoch": 3.916686409223723,
|
|
"grad_norm": 0.062219731428798705,
|
|
"learning_rate": 0.0015246245262449762,
|
|
"loss": 2.4471,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 3.918265813788202,
|
|
"grad_norm": 0.06941377270222482,
|
|
"learning_rate": 0.0015241549812095112,
|
|
"loss": 2.5232,
|
|
"step": 12405
|
|
},
|
|
{
|
|
"epoch": 3.9198452183526813,
|
|
"grad_norm": 0.060912115315366694,
|
|
"learning_rate": 0.0015236852767818649,
|
|
"loss": 2.6102,
|
|
"step": 12410
|
|
},
|
|
{
|
|
"epoch": 3.92142462291716,
|
|
"grad_norm": 0.053355889741235285,
|
|
"learning_rate": 0.0015232154131048716,
|
|
"loss": 2.5226,
|
|
"step": 12415
|
|
},
|
|
{
|
|
"epoch": 3.9230040274816393,
|
|
"grad_norm": 0.06741970546593662,
|
|
"learning_rate": 0.0015227453903214146,
|
|
"loss": 2.5365,
|
|
"step": 12420
|
|
},
|
|
{
|
|
"epoch": 3.9245834320461186,
|
|
"grad_norm": 0.06898743897733144,
|
|
"learning_rate": 0.0015222752085744242,
|
|
"loss": 2.5389,
|
|
"step": 12425
|
|
},
|
|
{
|
|
"epoch": 3.926162836610598,
|
|
"grad_norm": 0.07828821379776077,
|
|
"learning_rate": 0.00152180486800688,
|
|
"loss": 2.4833,
|
|
"step": 12430
|
|
},
|
|
{
|
|
"epoch": 3.927742241175077,
|
|
"grad_norm": 0.07412821727493593,
|
|
"learning_rate": 0.001521334368761809,
|
|
"loss": 2.573,
|
|
"step": 12435
|
|
},
|
|
{
|
|
"epoch": 3.9293216457395563,
|
|
"grad_norm": 0.0732937633568354,
|
|
"learning_rate": 0.001520863710982287,
|
|
"loss": 2.514,
|
|
"step": 12440
|
|
},
|
|
{
|
|
"epoch": 3.930901050304035,
|
|
"grad_norm": 0.06489639713929875,
|
|
"learning_rate": 0.0015203928948114389,
|
|
"loss": 2.6071,
|
|
"step": 12445
|
|
},
|
|
{
|
|
"epoch": 3.9324804548685144,
|
|
"grad_norm": 0.06300824225072883,
|
|
"learning_rate": 0.0015199219203924366,
|
|
"loss": 2.6405,
|
|
"step": 12450
|
|
},
|
|
{
|
|
"epoch": 3.9340598594329936,
|
|
"grad_norm": 0.06632129995715534,
|
|
"learning_rate": 0.0015194507878684997,
|
|
"loss": 2.5978,
|
|
"step": 12455
|
|
},
|
|
{
|
|
"epoch": 3.935639263997473,
|
|
"grad_norm": 0.06366258558967011,
|
|
"learning_rate": 0.0015189794973828974,
|
|
"loss": 2.4811,
|
|
"step": 12460
|
|
},
|
|
{
|
|
"epoch": 3.937218668561952,
|
|
"grad_norm": 0.057852439284190545,
|
|
"learning_rate": 0.0015185080490789456,
|
|
"loss": 2.4908,
|
|
"step": 12465
|
|
},
|
|
{
|
|
"epoch": 3.9387980731264314,
|
|
"grad_norm": 0.06935274072833637,
|
|
"learning_rate": 0.0015180364431000091,
|
|
"loss": 2.6038,
|
|
"step": 12470
|
|
},
|
|
{
|
|
"epoch": 3.9403774776909106,
|
|
"grad_norm": 0.06447527949901698,
|
|
"learning_rate": 0.0015175646795895,
|
|
"loss": 2.4986,
|
|
"step": 12475
|
|
},
|
|
{
|
|
"epoch": 3.94195688225539,
|
|
"grad_norm": 0.07718286416506806,
|
|
"learning_rate": 0.0015170927586908784,
|
|
"loss": 2.5036,
|
|
"step": 12480
|
|
},
|
|
{
|
|
"epoch": 3.943536286819869,
|
|
"grad_norm": 0.09905668690842306,
|
|
"learning_rate": 0.001516620680547653,
|
|
"loss": 2.5555,
|
|
"step": 12485
|
|
},
|
|
{
|
|
"epoch": 3.9451156913843484,
|
|
"grad_norm": 0.0729132030800347,
|
|
"learning_rate": 0.001516148445303379,
|
|
"loss": 2.5573,
|
|
"step": 12490
|
|
},
|
|
{
|
|
"epoch": 3.946695095948827,
|
|
"grad_norm": 0.0828373214953166,
|
|
"learning_rate": 0.001515676053101661,
|
|
"loss": 2.5675,
|
|
"step": 12495
|
|
},
|
|
{
|
|
"epoch": 3.9482745005133064,
|
|
"grad_norm": 0.07913317264070925,
|
|
"learning_rate": 0.00151520350408615,
|
|
"loss": 2.5286,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 3.9498539050777857,
|
|
"grad_norm": 0.08777782985578693,
|
|
"learning_rate": 0.001514730798400545,
|
|
"loss": 2.5353,
|
|
"step": 12505
|
|
},
|
|
{
|
|
"epoch": 3.951433309642265,
|
|
"grad_norm": 0.06957346647678062,
|
|
"learning_rate": 0.0015142579361885926,
|
|
"loss": 2.6194,
|
|
"step": 12510
|
|
},
|
|
{
|
|
"epoch": 3.953012714206744,
|
|
"grad_norm": 0.06560414609700341,
|
|
"learning_rate": 0.0015137849175940882,
|
|
"loss": 2.5488,
|
|
"step": 12515
|
|
},
|
|
{
|
|
"epoch": 3.9545921187712234,
|
|
"grad_norm": 0.060347021480123796,
|
|
"learning_rate": 0.0015133117427608724,
|
|
"loss": 2.4683,
|
|
"step": 12520
|
|
},
|
|
{
|
|
"epoch": 3.956171523335702,
|
|
"grad_norm": 0.07389638212333845,
|
|
"learning_rate": 0.0015128384118328353,
|
|
"loss": 2.5106,
|
|
"step": 12525
|
|
},
|
|
{
|
|
"epoch": 3.9577509279001815,
|
|
"grad_norm": 0.06385706669645118,
|
|
"learning_rate": 0.001512364924953914,
|
|
"loss": 2.4939,
|
|
"step": 12530
|
|
},
|
|
{
|
|
"epoch": 3.9593303324646607,
|
|
"grad_norm": 0.0655373849546735,
|
|
"learning_rate": 0.0015118912822680924,
|
|
"loss": 2.5059,
|
|
"step": 12535
|
|
},
|
|
{
|
|
"epoch": 3.96090973702914,
|
|
"grad_norm": 0.06035286538094596,
|
|
"learning_rate": 0.0015114174839194027,
|
|
"loss": 2.5216,
|
|
"step": 12540
|
|
},
|
|
{
|
|
"epoch": 3.962489141593619,
|
|
"grad_norm": 0.07314909754776168,
|
|
"learning_rate": 0.0015109435300519238,
|
|
"loss": 2.5976,
|
|
"step": 12545
|
|
},
|
|
{
|
|
"epoch": 3.9640685461580984,
|
|
"grad_norm": 0.06358442436359428,
|
|
"learning_rate": 0.0015104694208097815,
|
|
"loss": 2.5821,
|
|
"step": 12550
|
|
},
|
|
{
|
|
"epoch": 3.9656479507225777,
|
|
"grad_norm": 0.05627148872134842,
|
|
"learning_rate": 0.00150999515633715,
|
|
"loss": 2.5797,
|
|
"step": 12555
|
|
},
|
|
{
|
|
"epoch": 3.967227355287057,
|
|
"grad_norm": 0.055213419124257625,
|
|
"learning_rate": 0.00150952073677825,
|
|
"loss": 2.514,
|
|
"step": 12560
|
|
},
|
|
{
|
|
"epoch": 3.968806759851536,
|
|
"grad_norm": 0.06548271078868903,
|
|
"learning_rate": 0.0015090461622773495,
|
|
"loss": 2.6508,
|
|
"step": 12565
|
|
},
|
|
{
|
|
"epoch": 3.9703861644160154,
|
|
"grad_norm": 0.05439279521404275,
|
|
"learning_rate": 0.001508571432978763,
|
|
"loss": 2.5444,
|
|
"step": 12570
|
|
},
|
|
{
|
|
"epoch": 3.9719655689804942,
|
|
"grad_norm": 0.05597006250704407,
|
|
"learning_rate": 0.0015080965490268533,
|
|
"loss": 2.5818,
|
|
"step": 12575
|
|
},
|
|
{
|
|
"epoch": 3.9735449735449735,
|
|
"grad_norm": 0.06480900428913591,
|
|
"learning_rate": 0.0015076215105660291,
|
|
"loss": 2.503,
|
|
"step": 12580
|
|
},
|
|
{
|
|
"epoch": 3.9751243781094527,
|
|
"grad_norm": 0.0602210100028421,
|
|
"learning_rate": 0.0015071463177407471,
|
|
"loss": 2.488,
|
|
"step": 12585
|
|
},
|
|
{
|
|
"epoch": 3.976703782673932,
|
|
"grad_norm": 0.07181151731195233,
|
|
"learning_rate": 0.0015066709706955104,
|
|
"loss": 2.5144,
|
|
"step": 12590
|
|
},
|
|
{
|
|
"epoch": 3.9782831872384112,
|
|
"grad_norm": 0.061514167955377494,
|
|
"learning_rate": 0.0015061954695748682,
|
|
"loss": 2.4351,
|
|
"step": 12595
|
|
},
|
|
{
|
|
"epoch": 3.97986259180289,
|
|
"grad_norm": 0.08114429713093464,
|
|
"learning_rate": 0.001505719814523418,
|
|
"loss": 2.598,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 3.9814419963673693,
|
|
"grad_norm": 0.06643483669026658,
|
|
"learning_rate": 0.0015052440056858036,
|
|
"loss": 2.5068,
|
|
"step": 12605
|
|
},
|
|
{
|
|
"epoch": 3.9830214009318485,
|
|
"grad_norm": 0.0621066900073868,
|
|
"learning_rate": 0.001504768043206715,
|
|
"loss": 2.6134,
|
|
"step": 12610
|
|
},
|
|
{
|
|
"epoch": 3.984600805496328,
|
|
"grad_norm": 0.10306918624677071,
|
|
"learning_rate": 0.0015042919272308896,
|
|
"loss": 2.5018,
|
|
"step": 12615
|
|
},
|
|
{
|
|
"epoch": 3.986180210060807,
|
|
"grad_norm": 0.07294016120820981,
|
|
"learning_rate": 0.0015038156579031108,
|
|
"loss": 2.602,
|
|
"step": 12620
|
|
},
|
|
{
|
|
"epoch": 3.9877596146252863,
|
|
"grad_norm": 0.06777722117317442,
|
|
"learning_rate": 0.0015033392353682095,
|
|
"loss": 2.5236,
|
|
"step": 12625
|
|
},
|
|
{
|
|
"epoch": 3.9893390191897655,
|
|
"grad_norm": 0.06556144755742266,
|
|
"learning_rate": 0.001502862659771063,
|
|
"loss": 2.5473,
|
|
"step": 12630
|
|
},
|
|
{
|
|
"epoch": 3.9909184237542448,
|
|
"grad_norm": 0.0767011021148724,
|
|
"learning_rate": 0.0015023859312565944,
|
|
"loss": 2.6054,
|
|
"step": 12635
|
|
},
|
|
{
|
|
"epoch": 3.992497828318724,
|
|
"grad_norm": 0.05778768006153058,
|
|
"learning_rate": 0.0015019090499697738,
|
|
"loss": 2.5015,
|
|
"step": 12640
|
|
},
|
|
{
|
|
"epoch": 3.9940772328832033,
|
|
"grad_norm": 0.08519303735460436,
|
|
"learning_rate": 0.001501432016055618,
|
|
"loss": 2.6381,
|
|
"step": 12645
|
|
},
|
|
{
|
|
"epoch": 3.995656637447682,
|
|
"grad_norm": 0.06211552456567722,
|
|
"learning_rate": 0.00150095482965919,
|
|
"loss": 2.5302,
|
|
"step": 12650
|
|
},
|
|
{
|
|
"epoch": 3.9972360420121613,
|
|
"grad_norm": 0.0674676598184494,
|
|
"learning_rate": 0.0015004774909255984,
|
|
"loss": 2.546,
|
|
"step": 12655
|
|
},
|
|
{
|
|
"epoch": 3.9988154465766406,
|
|
"grad_norm": 0.06318447337573145,
|
|
"learning_rate": 0.0015,
|
|
"loss": 2.6011,
|
|
"step": 12660
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_loss": 2.5386574268341064,
|
|
"eval_runtime": 118.4604,
|
|
"eval_samples_per_second": 22.362,
|
|
"eval_steps_per_second": 5.597,
|
|
"step": 12664
|
|
},
|
|
{
|
|
"epoch": 4.000315880912896,
|
|
"grad_norm": 0.06197262595124461,
|
|
"learning_rate": 0.0014995223570275962,
|
|
"loss": 2.5741,
|
|
"step": 12665
|
|
},
|
|
{
|
|
"epoch": 4.001895285477375,
|
|
"grad_norm": 0.10585904612167955,
|
|
"learning_rate": 0.0014990445621536348,
|
|
"loss": 2.5206,
|
|
"step": 12670
|
|
},
|
|
{
|
|
"epoch": 4.0034746900418545,
|
|
"grad_norm": 0.07447969829220162,
|
|
"learning_rate": 0.0014985666155234107,
|
|
"loss": 2.5209,
|
|
"step": 12675
|
|
},
|
|
{
|
|
"epoch": 4.005054094606334,
|
|
"grad_norm": 0.0720136198965427,
|
|
"learning_rate": 0.0014980885172822646,
|
|
"loss": 2.5258,
|
|
"step": 12680
|
|
},
|
|
{
|
|
"epoch": 4.006633499170813,
|
|
"grad_norm": 0.06614382437037156,
|
|
"learning_rate": 0.0014976102675755823,
|
|
"loss": 2.5181,
|
|
"step": 12685
|
|
},
|
|
{
|
|
"epoch": 4.008212903735291,
|
|
"grad_norm": 0.08568424376359589,
|
|
"learning_rate": 0.0014971318665487972,
|
|
"loss": 2.5327,
|
|
"step": 12690
|
|
},
|
|
{
|
|
"epoch": 4.009792308299771,
|
|
"grad_norm": 0.06507721023920351,
|
|
"learning_rate": 0.0014966533143473874,
|
|
"loss": 2.5715,
|
|
"step": 12695
|
|
},
|
|
{
|
|
"epoch": 4.01137171286425,
|
|
"grad_norm": 0.06424893564735171,
|
|
"learning_rate": 0.0014961746111168783,
|
|
"loss": 2.4817,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 4.012951117428729,
|
|
"grad_norm": 0.06417979275404782,
|
|
"learning_rate": 0.00149569575700284,
|
|
"loss": 2.5266,
|
|
"step": 12705
|
|
},
|
|
{
|
|
"epoch": 4.014530521993208,
|
|
"grad_norm": 0.06326836264220499,
|
|
"learning_rate": 0.001495216752150889,
|
|
"loss": 2.5385,
|
|
"step": 12710
|
|
},
|
|
{
|
|
"epoch": 4.016109926557688,
|
|
"grad_norm": 0.0796902692938263,
|
|
"learning_rate": 0.0014947375967066879,
|
|
"loss": 2.556,
|
|
"step": 12715
|
|
},
|
|
{
|
|
"epoch": 4.017689331122167,
|
|
"grad_norm": 0.06999205525685467,
|
|
"learning_rate": 0.0014942582908159445,
|
|
"loss": 2.4686,
|
|
"step": 12720
|
|
},
|
|
{
|
|
"epoch": 4.019268735686646,
|
|
"grad_norm": 0.058974463155499354,
|
|
"learning_rate": 0.0014937788346244126,
|
|
"loss": 2.485,
|
|
"step": 12725
|
|
},
|
|
{
|
|
"epoch": 4.020848140251125,
|
|
"grad_norm": 0.07187170821335045,
|
|
"learning_rate": 0.001493299228277892,
|
|
"loss": 2.6368,
|
|
"step": 12730
|
|
},
|
|
{
|
|
"epoch": 4.022427544815605,
|
|
"grad_norm": 0.0670071291341169,
|
|
"learning_rate": 0.001492819471922228,
|
|
"loss": 2.5562,
|
|
"step": 12735
|
|
},
|
|
{
|
|
"epoch": 4.024006949380084,
|
|
"grad_norm": 0.060418395902372614,
|
|
"learning_rate": 0.001492339565703311,
|
|
"loss": 2.5904,
|
|
"step": 12740
|
|
},
|
|
{
|
|
"epoch": 4.025586353944563,
|
|
"grad_norm": 0.07524606530892788,
|
|
"learning_rate": 0.0014918595097670783,
|
|
"loss": 2.5773,
|
|
"step": 12745
|
|
},
|
|
{
|
|
"epoch": 4.027165758509042,
|
|
"grad_norm": 0.0723928195941418,
|
|
"learning_rate": 0.0014913793042595107,
|
|
"loss": 2.535,
|
|
"step": 12750
|
|
},
|
|
{
|
|
"epoch": 4.028745163073522,
|
|
"grad_norm": 0.06762103752752954,
|
|
"learning_rate": 0.0014908989493266364,
|
|
"loss": 2.4715,
|
|
"step": 12755
|
|
},
|
|
{
|
|
"epoch": 4.030324567638001,
|
|
"grad_norm": 0.0595953998093573,
|
|
"learning_rate": 0.001490418445114528,
|
|
"loss": 2.4335,
|
|
"step": 12760
|
|
},
|
|
{
|
|
"epoch": 4.03190397220248,
|
|
"grad_norm": 0.07426039937453528,
|
|
"learning_rate": 0.001489937791769304,
|
|
"loss": 2.515,
|
|
"step": 12765
|
|
},
|
|
{
|
|
"epoch": 4.0334833767669585,
|
|
"grad_norm": 0.058581189485131954,
|
|
"learning_rate": 0.0014894569894371274,
|
|
"loss": 2.5054,
|
|
"step": 12770
|
|
},
|
|
{
|
|
"epoch": 4.035062781331438,
|
|
"grad_norm": 0.06680693812756389,
|
|
"learning_rate": 0.001488976038264208,
|
|
"loss": 2.5582,
|
|
"step": 12775
|
|
},
|
|
{
|
|
"epoch": 4.036642185895917,
|
|
"grad_norm": 0.06508799516512898,
|
|
"learning_rate": 0.0014884949383967992,
|
|
"loss": 2.4895,
|
|
"step": 12780
|
|
},
|
|
{
|
|
"epoch": 4.038221590460396,
|
|
"grad_norm": 0.056520400454428035,
|
|
"learning_rate": 0.0014880136899812011,
|
|
"loss": 2.515,
|
|
"step": 12785
|
|
},
|
|
{
|
|
"epoch": 4.039800995024875,
|
|
"grad_norm": 0.06301408957681141,
|
|
"learning_rate": 0.0014875322931637573,
|
|
"loss": 2.5243,
|
|
"step": 12790
|
|
},
|
|
{
|
|
"epoch": 4.041380399589355,
|
|
"grad_norm": 0.05950046682220469,
|
|
"learning_rate": 0.0014870507480908585,
|
|
"loss": 2.6026,
|
|
"step": 12795
|
|
},
|
|
{
|
|
"epoch": 4.042959804153834,
|
|
"grad_norm": 0.07726584626318818,
|
|
"learning_rate": 0.001486569054908939,
|
|
"loss": 2.5222,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 4.044539208718313,
|
|
"grad_norm": 0.08585312308095734,
|
|
"learning_rate": 0.0014860872137644784,
|
|
"loss": 2.5153,
|
|
"step": 12805
|
|
},
|
|
{
|
|
"epoch": 4.046118613282792,
|
|
"grad_norm": 0.0778424441713349,
|
|
"learning_rate": 0.001485605224804002,
|
|
"loss": 2.5472,
|
|
"step": 12810
|
|
},
|
|
{
|
|
"epoch": 4.047698017847272,
|
|
"grad_norm": 0.07639674589051161,
|
|
"learning_rate": 0.0014851230881740797,
|
|
"loss": 2.536,
|
|
"step": 12815
|
|
},
|
|
{
|
|
"epoch": 4.049277422411751,
|
|
"grad_norm": 0.07177264548867932,
|
|
"learning_rate": 0.0014846408040213256,
|
|
"loss": 2.4346,
|
|
"step": 12820
|
|
},
|
|
{
|
|
"epoch": 4.05085682697623,
|
|
"grad_norm": 0.07111026091809729,
|
|
"learning_rate": 0.0014841583724923993,
|
|
"loss": 2.4786,
|
|
"step": 12825
|
|
},
|
|
{
|
|
"epoch": 4.052436231540709,
|
|
"grad_norm": 0.0673086220881294,
|
|
"learning_rate": 0.0014836757937340052,
|
|
"loss": 2.4916,
|
|
"step": 12830
|
|
},
|
|
{
|
|
"epoch": 4.054015636105189,
|
|
"grad_norm": 0.0730411181213416,
|
|
"learning_rate": 0.0014831930678928928,
|
|
"loss": 2.4575,
|
|
"step": 12835
|
|
},
|
|
{
|
|
"epoch": 4.055595040669668,
|
|
"grad_norm": 0.08106042561681988,
|
|
"learning_rate": 0.0014827101951158555,
|
|
"loss": 2.5814,
|
|
"step": 12840
|
|
},
|
|
{
|
|
"epoch": 4.057174445234147,
|
|
"grad_norm": 0.06488877308697392,
|
|
"learning_rate": 0.0014822271755497321,
|
|
"loss": 2.5211,
|
|
"step": 12845
|
|
},
|
|
{
|
|
"epoch": 4.0587538497986255,
|
|
"grad_norm": 0.07530413806657588,
|
|
"learning_rate": 0.0014817440093414054,
|
|
"loss": 2.5002,
|
|
"step": 12850
|
|
},
|
|
{
|
|
"epoch": 4.060333254363105,
|
|
"grad_norm": 0.0789122215794093,
|
|
"learning_rate": 0.0014812606966378037,
|
|
"loss": 2.5722,
|
|
"step": 12855
|
|
},
|
|
{
|
|
"epoch": 4.061912658927584,
|
|
"grad_norm": 0.06851900387567693,
|
|
"learning_rate": 0.0014807772375858988,
|
|
"loss": 2.4766,
|
|
"step": 12860
|
|
},
|
|
{
|
|
"epoch": 4.063492063492063,
|
|
"grad_norm": 0.06498106802399249,
|
|
"learning_rate": 0.0014802936323327078,
|
|
"loss": 2.4923,
|
|
"step": 12865
|
|
},
|
|
{
|
|
"epoch": 4.0650714680565425,
|
|
"grad_norm": 0.06676788724094512,
|
|
"learning_rate": 0.001479809881025292,
|
|
"loss": 2.6196,
|
|
"step": 12870
|
|
},
|
|
{
|
|
"epoch": 4.066650872621022,
|
|
"grad_norm": 0.06230390401692597,
|
|
"learning_rate": 0.001479325983810757,
|
|
"loss": 2.592,
|
|
"step": 12875
|
|
},
|
|
{
|
|
"epoch": 4.068230277185501,
|
|
"grad_norm": 0.06662636637151956,
|
|
"learning_rate": 0.0014788419408362525,
|
|
"loss": 2.5958,
|
|
"step": 12880
|
|
},
|
|
{
|
|
"epoch": 4.06980968174998,
|
|
"grad_norm": 0.0531409949938066,
|
|
"learning_rate": 0.0014783577522489732,
|
|
"loss": 2.5631,
|
|
"step": 12885
|
|
},
|
|
{
|
|
"epoch": 4.0713890863144595,
|
|
"grad_norm": 0.07482519940011333,
|
|
"learning_rate": 0.0014778734181961582,
|
|
"loss": 2.5397,
|
|
"step": 12890
|
|
},
|
|
{
|
|
"epoch": 4.072968490878939,
|
|
"grad_norm": 0.06921277307702906,
|
|
"learning_rate": 0.0014773889388250896,
|
|
"loss": 2.542,
|
|
"step": 12895
|
|
},
|
|
{
|
|
"epoch": 4.074547895443418,
|
|
"grad_norm": 0.06772080251241064,
|
|
"learning_rate": 0.001476904314283095,
|
|
"loss": 2.5194,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 4.076127300007897,
|
|
"grad_norm": 0.06356953667371293,
|
|
"learning_rate": 0.0014764195447175452,
|
|
"loss": 2.4988,
|
|
"step": 12905
|
|
},
|
|
{
|
|
"epoch": 4.0777067045723765,
|
|
"grad_norm": 0.06883769891653696,
|
|
"learning_rate": 0.001475934630275856,
|
|
"loss": 2.5617,
|
|
"step": 12910
|
|
},
|
|
{
|
|
"epoch": 4.079286109136856,
|
|
"grad_norm": 0.06831757524638417,
|
|
"learning_rate": 0.0014754495711054865,
|
|
"loss": 2.5954,
|
|
"step": 12915
|
|
},
|
|
{
|
|
"epoch": 4.080865513701335,
|
|
"grad_norm": 0.06404279817913411,
|
|
"learning_rate": 0.0014749643673539403,
|
|
"loss": 2.5586,
|
|
"step": 12920
|
|
},
|
|
{
|
|
"epoch": 4.082444918265814,
|
|
"grad_norm": 0.0644981951967708,
|
|
"learning_rate": 0.0014744790191687646,
|
|
"loss": 2.5238,
|
|
"step": 12925
|
|
},
|
|
{
|
|
"epoch": 4.084024322830293,
|
|
"grad_norm": 0.07938321994404159,
|
|
"learning_rate": 0.0014739935266975502,
|
|
"loss": 2.5267,
|
|
"step": 12930
|
|
},
|
|
{
|
|
"epoch": 4.085603727394772,
|
|
"grad_norm": 0.07415739623629887,
|
|
"learning_rate": 0.0014735078900879332,
|
|
"loss": 2.466,
|
|
"step": 12935
|
|
},
|
|
{
|
|
"epoch": 4.087183131959251,
|
|
"grad_norm": 0.0614255488188257,
|
|
"learning_rate": 0.0014730221094875922,
|
|
"loss": 2.476,
|
|
"step": 12940
|
|
},
|
|
{
|
|
"epoch": 4.08876253652373,
|
|
"grad_norm": 0.08282163856306972,
|
|
"learning_rate": 0.00147253618504425,
|
|
"loss": 2.4782,
|
|
"step": 12945
|
|
},
|
|
{
|
|
"epoch": 4.09034194108821,
|
|
"grad_norm": 0.06277116984145156,
|
|
"learning_rate": 0.0014720501169056726,
|
|
"loss": 2.5513,
|
|
"step": 12950
|
|
},
|
|
{
|
|
"epoch": 4.091921345652689,
|
|
"grad_norm": 0.06684679135522835,
|
|
"learning_rate": 0.001471563905219671,
|
|
"loss": 2.4652,
|
|
"step": 12955
|
|
},
|
|
{
|
|
"epoch": 4.093500750217168,
|
|
"grad_norm": 0.08479893518892212,
|
|
"learning_rate": 0.0014710775501340988,
|
|
"loss": 2.6456,
|
|
"step": 12960
|
|
},
|
|
{
|
|
"epoch": 4.095080154781647,
|
|
"grad_norm": 0.06645816817928275,
|
|
"learning_rate": 0.0014705910517968533,
|
|
"loss": 2.5211,
|
|
"step": 12965
|
|
},
|
|
{
|
|
"epoch": 4.096659559346127,
|
|
"grad_norm": 0.07848580235716644,
|
|
"learning_rate": 0.0014701044103558757,
|
|
"loss": 2.4402,
|
|
"step": 12970
|
|
},
|
|
{
|
|
"epoch": 4.098238963910606,
|
|
"grad_norm": 0.07708904246857005,
|
|
"learning_rate": 0.00146961762595915,
|
|
"loss": 2.6356,
|
|
"step": 12975
|
|
},
|
|
{
|
|
"epoch": 4.099818368475085,
|
|
"grad_norm": 0.07140746362743702,
|
|
"learning_rate": 0.0014691306987547053,
|
|
"loss": 2.4781,
|
|
"step": 12980
|
|
},
|
|
{
|
|
"epoch": 4.101397773039564,
|
|
"grad_norm": 0.07707031926981767,
|
|
"learning_rate": 0.0014686436288906123,
|
|
"loss": 2.5186,
|
|
"step": 12985
|
|
},
|
|
{
|
|
"epoch": 4.102977177604044,
|
|
"grad_norm": 0.07229371248956278,
|
|
"learning_rate": 0.001468156416514986,
|
|
"loss": 2.4977,
|
|
"step": 12990
|
|
},
|
|
{
|
|
"epoch": 4.104556582168523,
|
|
"grad_norm": 0.06754261510286533,
|
|
"learning_rate": 0.0014676690617759845,
|
|
"loss": 2.5047,
|
|
"step": 12995
|
|
},
|
|
{
|
|
"epoch": 4.106135986733002,
|
|
"grad_norm": 0.06257340617672764,
|
|
"learning_rate": 0.0014671815648218092,
|
|
"loss": 2.4436,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 4.10771539129748,
|
|
"grad_norm": 0.06069847036077875,
|
|
"learning_rate": 0.0014666939258007052,
|
|
"loss": 2.4951,
|
|
"step": 13005
|
|
},
|
|
{
|
|
"epoch": 4.10929479586196,
|
|
"grad_norm": 0.07510772935293217,
|
|
"learning_rate": 0.0014662061448609603,
|
|
"loss": 2.5079,
|
|
"step": 13010
|
|
},
|
|
{
|
|
"epoch": 4.110874200426439,
|
|
"grad_norm": 0.07199220345004556,
|
|
"learning_rate": 0.001465718222150905,
|
|
"loss": 2.5082,
|
|
"step": 13015
|
|
},
|
|
{
|
|
"epoch": 4.112453604990918,
|
|
"grad_norm": 0.062254051740544276,
|
|
"learning_rate": 0.001465230157818914,
|
|
"loss": 2.5309,
|
|
"step": 13020
|
|
},
|
|
{
|
|
"epoch": 4.114033009555397,
|
|
"grad_norm": 0.06925424720492623,
|
|
"learning_rate": 0.0014647419520134046,
|
|
"loss": 2.6036,
|
|
"step": 13025
|
|
},
|
|
{
|
|
"epoch": 4.115612414119877,
|
|
"grad_norm": 0.06289672611745836,
|
|
"learning_rate": 0.001464253604882837,
|
|
"loss": 2.5908,
|
|
"step": 13030
|
|
},
|
|
{
|
|
"epoch": 4.117191818684356,
|
|
"grad_norm": 0.07510195324152907,
|
|
"learning_rate": 0.0014637651165757143,
|
|
"loss": 2.5165,
|
|
"step": 13035
|
|
},
|
|
{
|
|
"epoch": 4.118771223248835,
|
|
"grad_norm": 0.061998057020966786,
|
|
"learning_rate": 0.0014632764872405826,
|
|
"loss": 2.5964,
|
|
"step": 13040
|
|
},
|
|
{
|
|
"epoch": 4.120350627813314,
|
|
"grad_norm": 0.07622673341832373,
|
|
"learning_rate": 0.001462787717026031,
|
|
"loss": 2.549,
|
|
"step": 13045
|
|
},
|
|
{
|
|
"epoch": 4.121930032377794,
|
|
"grad_norm": 0.061580797687959134,
|
|
"learning_rate": 0.0014622988060806917,
|
|
"loss": 2.5137,
|
|
"step": 13050
|
|
},
|
|
{
|
|
"epoch": 4.123509436942273,
|
|
"grad_norm": 0.05307281433618267,
|
|
"learning_rate": 0.0014618097545532392,
|
|
"loss": 2.5621,
|
|
"step": 13055
|
|
},
|
|
{
|
|
"epoch": 4.125088841506752,
|
|
"grad_norm": 0.06578261530618441,
|
|
"learning_rate": 0.0014613205625923908,
|
|
"loss": 2.5674,
|
|
"step": 13060
|
|
},
|
|
{
|
|
"epoch": 4.126668246071231,
|
|
"grad_norm": 0.06614447582109592,
|
|
"learning_rate": 0.0014608312303469066,
|
|
"loss": 2.6642,
|
|
"step": 13065
|
|
},
|
|
{
|
|
"epoch": 4.128247650635711,
|
|
"grad_norm": 0.07080643368952194,
|
|
"learning_rate": 0.00146034175796559,
|
|
"loss": 2.612,
|
|
"step": 13070
|
|
},
|
|
{
|
|
"epoch": 4.12982705520019,
|
|
"grad_norm": 0.06637894811759577,
|
|
"learning_rate": 0.0014598521455972855,
|
|
"loss": 2.5063,
|
|
"step": 13075
|
|
},
|
|
{
|
|
"epoch": 4.131406459764669,
|
|
"grad_norm": 0.0657513334100105,
|
|
"learning_rate": 0.0014593623933908822,
|
|
"loss": 2.5591,
|
|
"step": 13080
|
|
},
|
|
{
|
|
"epoch": 4.1329858643291475,
|
|
"grad_norm": 0.07458588668215285,
|
|
"learning_rate": 0.0014588725014953094,
|
|
"loss": 2.611,
|
|
"step": 13085
|
|
},
|
|
{
|
|
"epoch": 4.134565268893627,
|
|
"grad_norm": 0.09211346308075136,
|
|
"learning_rate": 0.001458382470059541,
|
|
"loss": 2.5932,
|
|
"step": 13090
|
|
},
|
|
{
|
|
"epoch": 4.136144673458106,
|
|
"grad_norm": 0.06169403975962911,
|
|
"learning_rate": 0.0014578922992325922,
|
|
"loss": 2.4608,
|
|
"step": 13095
|
|
},
|
|
{
|
|
"epoch": 4.137724078022585,
|
|
"grad_norm": 0.0686759567747662,
|
|
"learning_rate": 0.001457401989163521,
|
|
"loss": 2.5128,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 4.1393034825870645,
|
|
"grad_norm": 0.1079703337793503,
|
|
"learning_rate": 0.0014569115400014268,
|
|
"loss": 2.5853,
|
|
"step": 13105
|
|
},
|
|
{
|
|
"epoch": 4.140882887151544,
|
|
"grad_norm": 0.0718356218996724,
|
|
"learning_rate": 0.0014564209518954528,
|
|
"loss": 2.4582,
|
|
"step": 13110
|
|
},
|
|
{
|
|
"epoch": 4.142462291716023,
|
|
"grad_norm": 0.07375396962599524,
|
|
"learning_rate": 0.0014559302249947832,
|
|
"loss": 2.5638,
|
|
"step": 13115
|
|
},
|
|
{
|
|
"epoch": 4.144041696280502,
|
|
"grad_norm": 0.0710460434497697,
|
|
"learning_rate": 0.0014554393594486458,
|
|
"loss": 2.5623,
|
|
"step": 13120
|
|
},
|
|
{
|
|
"epoch": 4.1456211008449815,
|
|
"grad_norm": 0.05547904233441889,
|
|
"learning_rate": 0.0014549483554063087,
|
|
"loss": 2.4681,
|
|
"step": 13125
|
|
},
|
|
{
|
|
"epoch": 4.147200505409461,
|
|
"grad_norm": 0.07601044775167944,
|
|
"learning_rate": 0.0014544572130170837,
|
|
"loss": 2.5175,
|
|
"step": 13130
|
|
},
|
|
{
|
|
"epoch": 4.14877990997394,
|
|
"grad_norm": 0.07610178491642125,
|
|
"learning_rate": 0.0014539659324303235,
|
|
"loss": 2.5246,
|
|
"step": 13135
|
|
},
|
|
{
|
|
"epoch": 4.150359314538419,
|
|
"grad_norm": 0.07703159150351786,
|
|
"learning_rate": 0.001453474513795424,
|
|
"loss": 2.5618,
|
|
"step": 13140
|
|
},
|
|
{
|
|
"epoch": 4.1519387191028985,
|
|
"grad_norm": 0.06607553225197815,
|
|
"learning_rate": 0.0014529829572618221,
|
|
"loss": 2.5774,
|
|
"step": 13145
|
|
},
|
|
{
|
|
"epoch": 4.153518123667378,
|
|
"grad_norm": 0.06340095814054024,
|
|
"learning_rate": 0.001452491262978997,
|
|
"loss": 2.6844,
|
|
"step": 13150
|
|
},
|
|
{
|
|
"epoch": 4.155097528231857,
|
|
"grad_norm": 0.06394311855675062,
|
|
"learning_rate": 0.0014519994310964698,
|
|
"loss": 2.5379,
|
|
"step": 13155
|
|
},
|
|
{
|
|
"epoch": 4.156676932796336,
|
|
"grad_norm": 0.06475478450956448,
|
|
"learning_rate": 0.0014515074617638035,
|
|
"loss": 2.5873,
|
|
"step": 13160
|
|
},
|
|
{
|
|
"epoch": 4.158256337360815,
|
|
"grad_norm": 0.06510203917360406,
|
|
"learning_rate": 0.001451015355130603,
|
|
"loss": 2.5246,
|
|
"step": 13165
|
|
},
|
|
{
|
|
"epoch": 4.159835741925294,
|
|
"grad_norm": 0.07346007569585081,
|
|
"learning_rate": 0.0014505231113465147,
|
|
"loss": 2.625,
|
|
"step": 13170
|
|
},
|
|
{
|
|
"epoch": 4.161415146489773,
|
|
"grad_norm": 0.07519606396868049,
|
|
"learning_rate": 0.0014500307305612267,
|
|
"loss": 2.5642,
|
|
"step": 13175
|
|
},
|
|
{
|
|
"epoch": 4.162994551054252,
|
|
"grad_norm": 0.06468656469001448,
|
|
"learning_rate": 0.0014495382129244684,
|
|
"loss": 2.5287,
|
|
"step": 13180
|
|
},
|
|
{
|
|
"epoch": 4.164573955618732,
|
|
"grad_norm": 0.06414418265386593,
|
|
"learning_rate": 0.0014490455585860122,
|
|
"loss": 2.6108,
|
|
"step": 13185
|
|
},
|
|
{
|
|
"epoch": 4.166153360183211,
|
|
"grad_norm": 0.05953847764779467,
|
|
"learning_rate": 0.001448552767695671,
|
|
"loss": 2.4915,
|
|
"step": 13190
|
|
},
|
|
{
|
|
"epoch": 4.16773276474769,
|
|
"grad_norm": 0.0639508017646886,
|
|
"learning_rate": 0.0014480598404032984,
|
|
"loss": 2.5515,
|
|
"step": 13195
|
|
},
|
|
{
|
|
"epoch": 4.169312169312169,
|
|
"grad_norm": 0.0694933632681381,
|
|
"learning_rate": 0.001447566776858791,
|
|
"loss": 2.4187,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 4.1708915738766486,
|
|
"grad_norm": 0.06501064281565713,
|
|
"learning_rate": 0.0014470735772120866,
|
|
"loss": 2.5036,
|
|
"step": 13205
|
|
},
|
|
{
|
|
"epoch": 4.172470978441128,
|
|
"grad_norm": 0.07636337677703987,
|
|
"learning_rate": 0.001446580241613164,
|
|
"loss": 2.5329,
|
|
"step": 13210
|
|
},
|
|
{
|
|
"epoch": 4.174050383005607,
|
|
"grad_norm": 0.06685853958905062,
|
|
"learning_rate": 0.001446086770212043,
|
|
"loss": 2.4654,
|
|
"step": 13215
|
|
},
|
|
{
|
|
"epoch": 4.175629787570086,
|
|
"grad_norm": 0.07065294814870929,
|
|
"learning_rate": 0.0014455931631587853,
|
|
"loss": 2.6656,
|
|
"step": 13220
|
|
},
|
|
{
|
|
"epoch": 4.1772091921345655,
|
|
"grad_norm": 0.059600183399475946,
|
|
"learning_rate": 0.0014450994206034935,
|
|
"loss": 2.4968,
|
|
"step": 13225
|
|
},
|
|
{
|
|
"epoch": 4.178788596699045,
|
|
"grad_norm": 0.07724022069731407,
|
|
"learning_rate": 0.001444605542696312,
|
|
"loss": 2.4847,
|
|
"step": 13230
|
|
},
|
|
{
|
|
"epoch": 4.180368001263524,
|
|
"grad_norm": 0.0606023615353993,
|
|
"learning_rate": 0.0014441115295874254,
|
|
"loss": 2.4797,
|
|
"step": 13235
|
|
},
|
|
{
|
|
"epoch": 4.181947405828003,
|
|
"grad_norm": 0.0756569105829631,
|
|
"learning_rate": 0.0014436173814270604,
|
|
"loss": 2.643,
|
|
"step": 13240
|
|
},
|
|
{
|
|
"epoch": 4.183526810392482,
|
|
"grad_norm": 0.06713550346731194,
|
|
"learning_rate": 0.0014431230983654837,
|
|
"loss": 2.4975,
|
|
"step": 13245
|
|
},
|
|
{
|
|
"epoch": 4.185106214956961,
|
|
"grad_norm": 0.0663594090047207,
|
|
"learning_rate": 0.0014426286805530042,
|
|
"loss": 2.5824,
|
|
"step": 13250
|
|
},
|
|
{
|
|
"epoch": 4.18668561952144,
|
|
"grad_norm": 0.07060822775944986,
|
|
"learning_rate": 0.0014421341281399712,
|
|
"loss": 2.5178,
|
|
"step": 13255
|
|
},
|
|
{
|
|
"epoch": 4.188265024085919,
|
|
"grad_norm": 0.05985464631605249,
|
|
"learning_rate": 0.0014416394412767747,
|
|
"loss": 2.5905,
|
|
"step": 13260
|
|
},
|
|
{
|
|
"epoch": 4.189844428650399,
|
|
"grad_norm": 0.07221561707389895,
|
|
"learning_rate": 0.0014411446201138451,
|
|
"loss": 2.567,
|
|
"step": 13265
|
|
},
|
|
{
|
|
"epoch": 4.191423833214878,
|
|
"grad_norm": 0.06578663378996884,
|
|
"learning_rate": 0.0014406496648016556,
|
|
"loss": 2.5081,
|
|
"step": 13270
|
|
},
|
|
{
|
|
"epoch": 4.193003237779357,
|
|
"grad_norm": 0.06648435586133757,
|
|
"learning_rate": 0.0014401545754907186,
|
|
"loss": 2.467,
|
|
"step": 13275
|
|
},
|
|
{
|
|
"epoch": 4.194582642343836,
|
|
"grad_norm": 0.08036769567181871,
|
|
"learning_rate": 0.0014396593523315873,
|
|
"loss": 2.5222,
|
|
"step": 13280
|
|
},
|
|
{
|
|
"epoch": 4.196162046908316,
|
|
"grad_norm": 0.07546337618188616,
|
|
"learning_rate": 0.0014391639954748558,
|
|
"loss": 2.5664,
|
|
"step": 13285
|
|
},
|
|
{
|
|
"epoch": 4.197741451472795,
|
|
"grad_norm": 0.0689660209289397,
|
|
"learning_rate": 0.0014386685050711593,
|
|
"loss": 2.5394,
|
|
"step": 13290
|
|
},
|
|
{
|
|
"epoch": 4.199320856037274,
|
|
"grad_norm": 0.061357672883435126,
|
|
"learning_rate": 0.0014381728812711732,
|
|
"loss": 2.6156,
|
|
"step": 13295
|
|
},
|
|
{
|
|
"epoch": 4.200900260601753,
|
|
"grad_norm": 0.07927976534483275,
|
|
"learning_rate": 0.0014376771242256134,
|
|
"loss": 2.5732,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 4.202479665166233,
|
|
"grad_norm": 0.06605757125068425,
|
|
"learning_rate": 0.0014371812340852367,
|
|
"loss": 2.5047,
|
|
"step": 13305
|
|
},
|
|
{
|
|
"epoch": 4.204059069730712,
|
|
"grad_norm": 0.065185810579207,
|
|
"learning_rate": 0.0014366852110008397,
|
|
"loss": 2.5204,
|
|
"step": 13310
|
|
},
|
|
{
|
|
"epoch": 4.205638474295191,
|
|
"grad_norm": 0.0752369655656418,
|
|
"learning_rate": 0.00143618905512326,
|
|
"loss": 2.5801,
|
|
"step": 13315
|
|
},
|
|
{
|
|
"epoch": 4.2072178788596695,
|
|
"grad_norm": 0.08200102491569435,
|
|
"learning_rate": 0.001435692766603376,
|
|
"loss": 2.5413,
|
|
"step": 13320
|
|
},
|
|
{
|
|
"epoch": 4.208797283424149,
|
|
"grad_norm": 0.06954876890972278,
|
|
"learning_rate": 0.0014351963455921052,
|
|
"loss": 2.5658,
|
|
"step": 13325
|
|
},
|
|
{
|
|
"epoch": 4.210376687988628,
|
|
"grad_norm": 0.07493534613014184,
|
|
"learning_rate": 0.0014346997922404059,
|
|
"loss": 2.4482,
|
|
"step": 13330
|
|
},
|
|
{
|
|
"epoch": 4.211956092553107,
|
|
"grad_norm": 0.0643238145688426,
|
|
"learning_rate": 0.0014342031066992772,
|
|
"loss": 2.4539,
|
|
"step": 13335
|
|
},
|
|
{
|
|
"epoch": 4.2135354971175865,
|
|
"grad_norm": 0.07058738633050943,
|
|
"learning_rate": 0.0014337062891197582,
|
|
"loss": 2.5635,
|
|
"step": 13340
|
|
},
|
|
{
|
|
"epoch": 4.215114901682066,
|
|
"grad_norm": 0.07637776100324138,
|
|
"learning_rate": 0.0014332093396529277,
|
|
"loss": 2.5817,
|
|
"step": 13345
|
|
},
|
|
{
|
|
"epoch": 4.216694306246545,
|
|
"grad_norm": 0.06836462407951979,
|
|
"learning_rate": 0.001432712258449905,
|
|
"loss": 2.5264,
|
|
"step": 13350
|
|
},
|
|
{
|
|
"epoch": 4.218273710811024,
|
|
"grad_norm": 0.0645167368572073,
|
|
"learning_rate": 0.0014322150456618488,
|
|
"loss": 2.6583,
|
|
"step": 13355
|
|
},
|
|
{
|
|
"epoch": 4.2198531153755034,
|
|
"grad_norm": 0.08201628889960703,
|
|
"learning_rate": 0.001431717701439959,
|
|
"loss": 2.4969,
|
|
"step": 13360
|
|
},
|
|
{
|
|
"epoch": 4.221432519939983,
|
|
"grad_norm": 0.08074439771233276,
|
|
"learning_rate": 0.0014312202259354745,
|
|
"loss": 2.3899,
|
|
"step": 13365
|
|
},
|
|
{
|
|
"epoch": 4.223011924504462,
|
|
"grad_norm": 0.07422919481985707,
|
|
"learning_rate": 0.0014307226192996744,
|
|
"loss": 2.5588,
|
|
"step": 13370
|
|
},
|
|
{
|
|
"epoch": 4.224591329068941,
|
|
"grad_norm": 0.08057286421521241,
|
|
"learning_rate": 0.0014302248816838777,
|
|
"loss": 2.5392,
|
|
"step": 13375
|
|
},
|
|
{
|
|
"epoch": 4.22617073363342,
|
|
"grad_norm": 0.06877191225129876,
|
|
"learning_rate": 0.0014297270132394432,
|
|
"loss": 2.6999,
|
|
"step": 13380
|
|
},
|
|
{
|
|
"epoch": 4.2277501381979,
|
|
"grad_norm": 0.07327500934807407,
|
|
"learning_rate": 0.0014292290141177694,
|
|
"loss": 2.4763,
|
|
"step": 13385
|
|
},
|
|
{
|
|
"epoch": 4.229329542762379,
|
|
"grad_norm": 0.07953535741393597,
|
|
"learning_rate": 0.0014287308844702954,
|
|
"loss": 2.5633,
|
|
"step": 13390
|
|
},
|
|
{
|
|
"epoch": 4.230908947326858,
|
|
"grad_norm": 0.09265386453796612,
|
|
"learning_rate": 0.0014282326244484983,
|
|
"loss": 2.5173,
|
|
"step": 13395
|
|
},
|
|
{
|
|
"epoch": 4.2324883518913365,
|
|
"grad_norm": 0.06626723087506727,
|
|
"learning_rate": 0.0014277342342038962,
|
|
"loss": 2.4999,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 4.234067756455816,
|
|
"grad_norm": 0.08258546988481656,
|
|
"learning_rate": 0.0014272357138880461,
|
|
"loss": 2.523,
|
|
"step": 13405
|
|
},
|
|
{
|
|
"epoch": 4.235647161020295,
|
|
"grad_norm": 0.06070972825243939,
|
|
"learning_rate": 0.0014267370636525457,
|
|
"loss": 2.4885,
|
|
"step": 13410
|
|
},
|
|
{
|
|
"epoch": 4.237226565584774,
|
|
"grad_norm": 0.06550670579870467,
|
|
"learning_rate": 0.0014262382836490303,
|
|
"loss": 2.5208,
|
|
"step": 13415
|
|
},
|
|
{
|
|
"epoch": 4.2388059701492535,
|
|
"grad_norm": 0.06939688767864273,
|
|
"learning_rate": 0.0014257393740291762,
|
|
"loss": 2.7158,
|
|
"step": 13420
|
|
},
|
|
{
|
|
"epoch": 4.240385374713733,
|
|
"grad_norm": 0.059725013485192754,
|
|
"learning_rate": 0.0014252403349446984,
|
|
"loss": 2.5409,
|
|
"step": 13425
|
|
},
|
|
{
|
|
"epoch": 4.241964779278212,
|
|
"grad_norm": 0.0674800569829641,
|
|
"learning_rate": 0.001424741166547352,
|
|
"loss": 2.4904,
|
|
"step": 13430
|
|
},
|
|
{
|
|
"epoch": 4.243544183842691,
|
|
"grad_norm": 0.07642877842093784,
|
|
"learning_rate": 0.0014242418689889304,
|
|
"loss": 2.4923,
|
|
"step": 13435
|
|
},
|
|
{
|
|
"epoch": 4.2451235884071705,
|
|
"grad_norm": 0.06707539009249994,
|
|
"learning_rate": 0.0014237424424212673,
|
|
"loss": 2.5957,
|
|
"step": 13440
|
|
},
|
|
{
|
|
"epoch": 4.24670299297165,
|
|
"grad_norm": 0.05478056652010952,
|
|
"learning_rate": 0.0014232428869962344,
|
|
"loss": 2.4415,
|
|
"step": 13445
|
|
},
|
|
{
|
|
"epoch": 4.248282397536129,
|
|
"grad_norm": 0.06647832348332867,
|
|
"learning_rate": 0.001422743202865744,
|
|
"loss": 2.5179,
|
|
"step": 13450
|
|
},
|
|
{
|
|
"epoch": 4.249861802100608,
|
|
"grad_norm": 0.07599209964906246,
|
|
"learning_rate": 0.0014222433901817466,
|
|
"loss": 2.5496,
|
|
"step": 13455
|
|
},
|
|
{
|
|
"epoch": 4.2514412066650875,
|
|
"grad_norm": 0.07508252444522776,
|
|
"learning_rate": 0.001421743449096232,
|
|
"loss": 2.566,
|
|
"step": 13460
|
|
},
|
|
{
|
|
"epoch": 4.253020611229567,
|
|
"grad_norm": 0.07034251601863514,
|
|
"learning_rate": 0.0014212433797612292,
|
|
"loss": 2.4595,
|
|
"step": 13465
|
|
},
|
|
{
|
|
"epoch": 4.254600015794046,
|
|
"grad_norm": 0.07715542862621001,
|
|
"learning_rate": 0.0014207431823288058,
|
|
"loss": 2.4418,
|
|
"step": 13470
|
|
},
|
|
{
|
|
"epoch": 4.256179420358524,
|
|
"grad_norm": 0.09268138219206884,
|
|
"learning_rate": 0.0014202428569510689,
|
|
"loss": 2.5466,
|
|
"step": 13475
|
|
},
|
|
{
|
|
"epoch": 4.257758824923004,
|
|
"grad_norm": 0.05898184787647431,
|
|
"learning_rate": 0.0014197424037801643,
|
|
"loss": 2.5578,
|
|
"step": 13480
|
|
},
|
|
{
|
|
"epoch": 4.259338229487483,
|
|
"grad_norm": 0.07989363701752655,
|
|
"learning_rate": 0.0014192418229682765,
|
|
"loss": 2.5104,
|
|
"step": 13485
|
|
},
|
|
{
|
|
"epoch": 4.260917634051962,
|
|
"grad_norm": 0.05928033650949228,
|
|
"learning_rate": 0.001418741114667629,
|
|
"loss": 2.5307,
|
|
"step": 13490
|
|
},
|
|
{
|
|
"epoch": 4.262497038616441,
|
|
"grad_norm": 0.07230013353506216,
|
|
"learning_rate": 0.0014182402790304837,
|
|
"loss": 2.6218,
|
|
"step": 13495
|
|
},
|
|
{
|
|
"epoch": 4.264076443180921,
|
|
"grad_norm": 0.0699251003611797,
|
|
"learning_rate": 0.001417739316209142,
|
|
"loss": 2.5339,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 4.2656558477454,
|
|
"grad_norm": 0.0661603496513996,
|
|
"learning_rate": 0.001417238226355943,
|
|
"loss": 2.5763,
|
|
"step": 13505
|
|
},
|
|
{
|
|
"epoch": 4.267235252309879,
|
|
"grad_norm": 0.0657835755499118,
|
|
"learning_rate": 0.0014167370096232657,
|
|
"loss": 2.4802,
|
|
"step": 13510
|
|
},
|
|
{
|
|
"epoch": 4.268814656874358,
|
|
"grad_norm": 0.06717937981458183,
|
|
"learning_rate": 0.001416235666163526,
|
|
"loss": 2.5466,
|
|
"step": 13515
|
|
},
|
|
{
|
|
"epoch": 4.270394061438838,
|
|
"grad_norm": 0.061219877946853664,
|
|
"learning_rate": 0.0014157341961291796,
|
|
"loss": 2.4778,
|
|
"step": 13520
|
|
},
|
|
{
|
|
"epoch": 4.271973466003317,
|
|
"grad_norm": 0.05999685422441346,
|
|
"learning_rate": 0.0014152325996727205,
|
|
"loss": 2.4325,
|
|
"step": 13525
|
|
},
|
|
{
|
|
"epoch": 4.273552870567796,
|
|
"grad_norm": 0.06287209203260331,
|
|
"learning_rate": 0.001414730876946681,
|
|
"loss": 2.4705,
|
|
"step": 13530
|
|
},
|
|
{
|
|
"epoch": 4.275132275132275,
|
|
"grad_norm": 0.08434710498720295,
|
|
"learning_rate": 0.001414229028103631,
|
|
"loss": 2.4909,
|
|
"step": 13535
|
|
},
|
|
{
|
|
"epoch": 4.276711679696755,
|
|
"grad_norm": 0.09622578602626065,
|
|
"learning_rate": 0.0014137270532961807,
|
|
"loss": 2.5129,
|
|
"step": 13540
|
|
},
|
|
{
|
|
"epoch": 4.278291084261234,
|
|
"grad_norm": 0.09314345440254598,
|
|
"learning_rate": 0.0014132249526769764,
|
|
"loss": 2.508,
|
|
"step": 13545
|
|
},
|
|
{
|
|
"epoch": 4.279870488825713,
|
|
"grad_norm": 0.06649245162780568,
|
|
"learning_rate": 0.0014127227263987046,
|
|
"loss": 2.6429,
|
|
"step": 13550
|
|
},
|
|
{
|
|
"epoch": 4.281449893390192,
|
|
"grad_norm": 0.06377941458760608,
|
|
"learning_rate": 0.0014122203746140885,
|
|
"loss": 2.5423,
|
|
"step": 13555
|
|
},
|
|
{
|
|
"epoch": 4.283029297954671,
|
|
"grad_norm": 0.07387226422492497,
|
|
"learning_rate": 0.00141171789747589,
|
|
"loss": 2.6465,
|
|
"step": 13560
|
|
},
|
|
{
|
|
"epoch": 4.28460870251915,
|
|
"grad_norm": 0.0657952911027925,
|
|
"learning_rate": 0.0014112152951369097,
|
|
"loss": 2.5298,
|
|
"step": 13565
|
|
},
|
|
{
|
|
"epoch": 4.286188107083629,
|
|
"grad_norm": 0.07001044405093522,
|
|
"learning_rate": 0.0014107125677499854,
|
|
"loss": 2.532,
|
|
"step": 13570
|
|
},
|
|
{
|
|
"epoch": 4.287767511648108,
|
|
"grad_norm": 0.0670565039718826,
|
|
"learning_rate": 0.0014102097154679936,
|
|
"loss": 2.4576,
|
|
"step": 13575
|
|
},
|
|
{
|
|
"epoch": 4.289346916212588,
|
|
"grad_norm": 0.05471371081590369,
|
|
"learning_rate": 0.001409706738443848,
|
|
"loss": 2.5445,
|
|
"step": 13580
|
|
},
|
|
{
|
|
"epoch": 4.290926320777067,
|
|
"grad_norm": 0.058653601582677364,
|
|
"learning_rate": 0.0014092036368305008,
|
|
"loss": 2.5299,
|
|
"step": 13585
|
|
},
|
|
{
|
|
"epoch": 4.292505725341546,
|
|
"grad_norm": 0.06909500250533289,
|
|
"learning_rate": 0.0014087004107809422,
|
|
"loss": 2.5063,
|
|
"step": 13590
|
|
},
|
|
{
|
|
"epoch": 4.294085129906025,
|
|
"grad_norm": 0.0653758506917844,
|
|
"learning_rate": 0.0014081970604482002,
|
|
"loss": 2.4977,
|
|
"step": 13595
|
|
},
|
|
{
|
|
"epoch": 4.295664534470505,
|
|
"grad_norm": 0.062247214562781795,
|
|
"learning_rate": 0.00140769358598534,
|
|
"loss": 2.4858,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 4.297243939034984,
|
|
"grad_norm": 0.06125582463343084,
|
|
"learning_rate": 0.001407189987545465,
|
|
"loss": 2.4963,
|
|
"step": 13605
|
|
},
|
|
{
|
|
"epoch": 4.298823343599463,
|
|
"grad_norm": 0.05558309017172807,
|
|
"learning_rate": 0.0014066862652817164,
|
|
"loss": 2.5758,
|
|
"step": 13610
|
|
},
|
|
{
|
|
"epoch": 4.300402748163942,
|
|
"grad_norm": 0.063779260157762,
|
|
"learning_rate": 0.001406182419347273,
|
|
"loss": 2.4866,
|
|
"step": 13615
|
|
},
|
|
{
|
|
"epoch": 4.301982152728422,
|
|
"grad_norm": 0.07980311631193766,
|
|
"learning_rate": 0.001405678449895351,
|
|
"loss": 2.6886,
|
|
"step": 13620
|
|
},
|
|
{
|
|
"epoch": 4.303561557292901,
|
|
"grad_norm": 0.06793474841846672,
|
|
"learning_rate": 0.0014051743570792047,
|
|
"loss": 2.5536,
|
|
"step": 13625
|
|
},
|
|
{
|
|
"epoch": 4.30514096185738,
|
|
"grad_norm": 0.07823166306740174,
|
|
"learning_rate": 0.0014046701410521246,
|
|
"loss": 2.5734,
|
|
"step": 13630
|
|
},
|
|
{
|
|
"epoch": 4.3067203664218585,
|
|
"grad_norm": 0.0696554159561388,
|
|
"learning_rate": 0.0014041658019674403,
|
|
"loss": 2.6522,
|
|
"step": 13635
|
|
},
|
|
{
|
|
"epoch": 4.308299770986338,
|
|
"grad_norm": 0.0767424290972511,
|
|
"learning_rate": 0.0014036613399785178,
|
|
"loss": 2.5352,
|
|
"step": 13640
|
|
},
|
|
{
|
|
"epoch": 4.309879175550817,
|
|
"grad_norm": 0.06860946097200132,
|
|
"learning_rate": 0.001403156755238761,
|
|
"loss": 2.5275,
|
|
"step": 13645
|
|
},
|
|
{
|
|
"epoch": 4.311458580115296,
|
|
"grad_norm": 0.058943783543938116,
|
|
"learning_rate": 0.001402652047901611,
|
|
"loss": 2.4936,
|
|
"step": 13650
|
|
},
|
|
{
|
|
"epoch": 4.3130379846797755,
|
|
"grad_norm": 0.07162936228619683,
|
|
"learning_rate": 0.0014021472181205456,
|
|
"loss": 2.5556,
|
|
"step": 13655
|
|
},
|
|
{
|
|
"epoch": 4.314617389244255,
|
|
"grad_norm": 0.05559098577268416,
|
|
"learning_rate": 0.0014016422660490806,
|
|
"loss": 2.5328,
|
|
"step": 13660
|
|
},
|
|
{
|
|
"epoch": 4.316196793808734,
|
|
"grad_norm": 0.06925217836926897,
|
|
"learning_rate": 0.0014011371918407685,
|
|
"loss": 2.501,
|
|
"step": 13665
|
|
},
|
|
{
|
|
"epoch": 4.317776198373213,
|
|
"grad_norm": 0.06574442097202614,
|
|
"learning_rate": 0.0014006319956491996,
|
|
"loss": 2.4949,
|
|
"step": 13670
|
|
},
|
|
{
|
|
"epoch": 4.3193556029376925,
|
|
"grad_norm": 0.06163159097364715,
|
|
"learning_rate": 0.0014001266776280004,
|
|
"loss": 2.4374,
|
|
"step": 13675
|
|
},
|
|
{
|
|
"epoch": 4.320935007502172,
|
|
"grad_norm": 0.07527784022922095,
|
|
"learning_rate": 0.0013996212379308352,
|
|
"loss": 2.5651,
|
|
"step": 13680
|
|
},
|
|
{
|
|
"epoch": 4.322514412066651,
|
|
"grad_norm": 0.06016455600422493,
|
|
"learning_rate": 0.0013991156767114044,
|
|
"loss": 2.489,
|
|
"step": 13685
|
|
},
|
|
{
|
|
"epoch": 4.32409381663113,
|
|
"grad_norm": 0.06711337906775994,
|
|
"learning_rate": 0.0013986099941234466,
|
|
"loss": 2.4754,
|
|
"step": 13690
|
|
},
|
|
{
|
|
"epoch": 4.3256732211956095,
|
|
"grad_norm": 0.07728217337140471,
|
|
"learning_rate": 0.0013981041903207362,
|
|
"loss": 2.6236,
|
|
"step": 13695
|
|
},
|
|
{
|
|
"epoch": 4.327252625760089,
|
|
"grad_norm": 0.09202475709476805,
|
|
"learning_rate": 0.001397598265457085,
|
|
"loss": 2.559,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 4.328832030324568,
|
|
"grad_norm": 0.06573246850646428,
|
|
"learning_rate": 0.001397092219686342,
|
|
"loss": 2.4991,
|
|
"step": 13705
|
|
},
|
|
{
|
|
"epoch": 4.330411434889047,
|
|
"grad_norm": 0.06543036372936917,
|
|
"learning_rate": 0.001396586053162392,
|
|
"loss": 2.5055,
|
|
"step": 13710
|
|
},
|
|
{
|
|
"epoch": 4.3319908394535265,
|
|
"grad_norm": 0.0807115846713941,
|
|
"learning_rate": 0.001396079766039157,
|
|
"loss": 2.5171,
|
|
"step": 13715
|
|
},
|
|
{
|
|
"epoch": 4.333570244018005,
|
|
"grad_norm": 0.0809838594844694,
|
|
"learning_rate": 0.0013955733584705957,
|
|
"loss": 2.5522,
|
|
"step": 13720
|
|
},
|
|
{
|
|
"epoch": 4.335149648582484,
|
|
"grad_norm": 0.07523546962854005,
|
|
"learning_rate": 0.0013950668306107034,
|
|
"loss": 2.5382,
|
|
"step": 13725
|
|
},
|
|
{
|
|
"epoch": 4.336729053146963,
|
|
"grad_norm": 0.08278129700893977,
|
|
"learning_rate": 0.0013945601826135122,
|
|
"loss": 2.519,
|
|
"step": 13730
|
|
},
|
|
{
|
|
"epoch": 4.338308457711443,
|
|
"grad_norm": 0.05914581025615629,
|
|
"learning_rate": 0.0013940534146330906,
|
|
"loss": 2.5308,
|
|
"step": 13735
|
|
},
|
|
{
|
|
"epoch": 4.339887862275922,
|
|
"grad_norm": 0.08798118929940507,
|
|
"learning_rate": 0.0013935465268235428,
|
|
"loss": 2.5847,
|
|
"step": 13740
|
|
},
|
|
{
|
|
"epoch": 4.341467266840401,
|
|
"grad_norm": 0.07153656693636579,
|
|
"learning_rate": 0.0013930395193390108,
|
|
"loss": 2.5399,
|
|
"step": 13745
|
|
},
|
|
{
|
|
"epoch": 4.34304667140488,
|
|
"grad_norm": 0.08199461753960192,
|
|
"learning_rate": 0.0013925323923336724,
|
|
"loss": 2.5723,
|
|
"step": 13750
|
|
},
|
|
{
|
|
"epoch": 4.34462607596936,
|
|
"grad_norm": 0.05920696830235281,
|
|
"learning_rate": 0.0013920251459617413,
|
|
"loss": 2.5572,
|
|
"step": 13755
|
|
},
|
|
{
|
|
"epoch": 4.346205480533839,
|
|
"grad_norm": 0.06898148636456203,
|
|
"learning_rate": 0.001391517780377468,
|
|
"loss": 2.5598,
|
|
"step": 13760
|
|
},
|
|
{
|
|
"epoch": 4.347784885098318,
|
|
"grad_norm": 0.056436110889154144,
|
|
"learning_rate": 0.001391010295735139,
|
|
"loss": 2.6277,
|
|
"step": 13765
|
|
},
|
|
{
|
|
"epoch": 4.349364289662797,
|
|
"grad_norm": 0.06356611277294899,
|
|
"learning_rate": 0.0013905026921890778,
|
|
"loss": 2.5147,
|
|
"step": 13770
|
|
},
|
|
{
|
|
"epoch": 4.350943694227277,
|
|
"grad_norm": 0.056308671341577105,
|
|
"learning_rate": 0.0013899949698936425,
|
|
"loss": 2.5154,
|
|
"step": 13775
|
|
},
|
|
{
|
|
"epoch": 4.352523098791756,
|
|
"grad_norm": 0.06676695503706351,
|
|
"learning_rate": 0.0013894871290032285,
|
|
"loss": 2.5106,
|
|
"step": 13780
|
|
},
|
|
{
|
|
"epoch": 4.354102503356235,
|
|
"grad_norm": 0.0859017385397016,
|
|
"learning_rate": 0.0013889791696722676,
|
|
"loss": 2.5461,
|
|
"step": 13785
|
|
},
|
|
{
|
|
"epoch": 4.355681907920714,
|
|
"grad_norm": 0.0641269088382948,
|
|
"learning_rate": 0.001388471092055226,
|
|
"loss": 2.5252,
|
|
"step": 13790
|
|
},
|
|
{
|
|
"epoch": 4.357261312485193,
|
|
"grad_norm": 0.07454163088702107,
|
|
"learning_rate": 0.0013879628963066075,
|
|
"loss": 2.448,
|
|
"step": 13795
|
|
},
|
|
{
|
|
"epoch": 4.358840717049672,
|
|
"grad_norm": 0.09356299937022537,
|
|
"learning_rate": 0.001387454582580951,
|
|
"loss": 2.5333,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 4.360420121614151,
|
|
"grad_norm": 0.06864964432028661,
|
|
"learning_rate": 0.0013869461510328314,
|
|
"loss": 2.5283,
|
|
"step": 13805
|
|
},
|
|
{
|
|
"epoch": 4.36199952617863,
|
|
"grad_norm": 0.07310291256869031,
|
|
"learning_rate": 0.0013864376018168595,
|
|
"loss": 2.5727,
|
|
"step": 13810
|
|
},
|
|
{
|
|
"epoch": 4.36357893074311,
|
|
"grad_norm": 0.07242835709121473,
|
|
"learning_rate": 0.001385928935087682,
|
|
"loss": 2.5139,
|
|
"step": 13815
|
|
},
|
|
{
|
|
"epoch": 4.365158335307589,
|
|
"grad_norm": 0.05428627981157836,
|
|
"learning_rate": 0.0013854201509999808,
|
|
"loss": 2.5324,
|
|
"step": 13820
|
|
},
|
|
{
|
|
"epoch": 4.366737739872068,
|
|
"grad_norm": 0.06937838622486542,
|
|
"learning_rate": 0.0013849112497084746,
|
|
"loss": 2.5905,
|
|
"step": 13825
|
|
},
|
|
{
|
|
"epoch": 4.368317144436547,
|
|
"grad_norm": 0.06755586765132361,
|
|
"learning_rate": 0.0013844022313679167,
|
|
"loss": 2.4154,
|
|
"step": 13830
|
|
},
|
|
{
|
|
"epoch": 4.369896549001027,
|
|
"grad_norm": 0.05777380943619152,
|
|
"learning_rate": 0.0013838930961330958,
|
|
"loss": 2.4106,
|
|
"step": 13835
|
|
},
|
|
{
|
|
"epoch": 4.371475953565506,
|
|
"grad_norm": 0.06083955833853959,
|
|
"learning_rate": 0.0013833838441588374,
|
|
"loss": 2.6462,
|
|
"step": 13840
|
|
},
|
|
{
|
|
"epoch": 4.373055358129985,
|
|
"grad_norm": 0.07551156041609827,
|
|
"learning_rate": 0.0013828744756000013,
|
|
"loss": 2.4989,
|
|
"step": 13845
|
|
},
|
|
{
|
|
"epoch": 4.374634762694464,
|
|
"grad_norm": 0.07542939198719763,
|
|
"learning_rate": 0.0013823649906114838,
|
|
"loss": 2.4391,
|
|
"step": 13850
|
|
},
|
|
{
|
|
"epoch": 4.376214167258944,
|
|
"grad_norm": 0.06506167630590956,
|
|
"learning_rate": 0.0013818553893482153,
|
|
"loss": 2.5238,
|
|
"step": 13855
|
|
},
|
|
{
|
|
"epoch": 4.377793571823423,
|
|
"grad_norm": 0.0597584835306194,
|
|
"learning_rate": 0.001381345671965163,
|
|
"loss": 2.5894,
|
|
"step": 13860
|
|
},
|
|
{
|
|
"epoch": 4.379372976387902,
|
|
"grad_norm": 0.06378945860954864,
|
|
"learning_rate": 0.0013808358386173279,
|
|
"loss": 2.484,
|
|
"step": 13865
|
|
},
|
|
{
|
|
"epoch": 4.380952380952381,
|
|
"grad_norm": 0.058409645223530246,
|
|
"learning_rate": 0.0013803258894597478,
|
|
"loss": 2.5072,
|
|
"step": 13870
|
|
},
|
|
{
|
|
"epoch": 4.38253178551686,
|
|
"grad_norm": 0.07616177601855426,
|
|
"learning_rate": 0.0013798158246474946,
|
|
"loss": 2.5044,
|
|
"step": 13875
|
|
},
|
|
{
|
|
"epoch": 4.384111190081339,
|
|
"grad_norm": 0.06595676858821979,
|
|
"learning_rate": 0.0013793056443356757,
|
|
"loss": 2.5473,
|
|
"step": 13880
|
|
},
|
|
{
|
|
"epoch": 4.385690594645818,
|
|
"grad_norm": 0.06969982767798963,
|
|
"learning_rate": 0.001378795348679434,
|
|
"loss": 2.5624,
|
|
"step": 13885
|
|
},
|
|
{
|
|
"epoch": 4.3872699992102975,
|
|
"grad_norm": 0.06868063538537598,
|
|
"learning_rate": 0.0013782849378339468,
|
|
"loss": 2.4366,
|
|
"step": 13890
|
|
},
|
|
{
|
|
"epoch": 4.388849403774777,
|
|
"grad_norm": 0.060562599314615474,
|
|
"learning_rate": 0.0013777744119544272,
|
|
"loss": 2.4932,
|
|
"step": 13895
|
|
},
|
|
{
|
|
"epoch": 4.390428808339256,
|
|
"grad_norm": 0.05862464836495264,
|
|
"learning_rate": 0.0013772637711961223,
|
|
"loss": 2.6184,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 4.392008212903735,
|
|
"grad_norm": 0.06286076947517723,
|
|
"learning_rate": 0.0013767530157143154,
|
|
"loss": 2.6207,
|
|
"step": 13905
|
|
},
|
|
{
|
|
"epoch": 4.3935876174682145,
|
|
"grad_norm": 0.07489822376604878,
|
|
"learning_rate": 0.001376242145664323,
|
|
"loss": 2.4922,
|
|
"step": 13910
|
|
},
|
|
{
|
|
"epoch": 4.395167022032694,
|
|
"grad_norm": 0.07289292764396378,
|
|
"learning_rate": 0.0013757311612014982,
|
|
"loss": 2.5087,
|
|
"step": 13915
|
|
},
|
|
{
|
|
"epoch": 4.396746426597173,
|
|
"grad_norm": 0.06649281228468891,
|
|
"learning_rate": 0.001375220062481228,
|
|
"loss": 2.4794,
|
|
"step": 13920
|
|
},
|
|
{
|
|
"epoch": 4.398325831161652,
|
|
"grad_norm": 0.084598796226276,
|
|
"learning_rate": 0.0013747088496589342,
|
|
"loss": 2.5119,
|
|
"step": 13925
|
|
},
|
|
{
|
|
"epoch": 4.3999052357261315,
|
|
"grad_norm": 0.07204598530837651,
|
|
"learning_rate": 0.0013741975228900732,
|
|
"loss": 2.452,
|
|
"step": 13930
|
|
},
|
|
{
|
|
"epoch": 4.401484640290611,
|
|
"grad_norm": 0.07094799318360079,
|
|
"learning_rate": 0.0013736860823301362,
|
|
"loss": 2.5321,
|
|
"step": 13935
|
|
},
|
|
{
|
|
"epoch": 4.40306404485509,
|
|
"grad_norm": 0.08547681655392879,
|
|
"learning_rate": 0.001373174528134649,
|
|
"loss": 2.4505,
|
|
"step": 13940
|
|
},
|
|
{
|
|
"epoch": 4.404643449419569,
|
|
"grad_norm": 0.07430050047282415,
|
|
"learning_rate": 0.0013726628604591724,
|
|
"loss": 2.5687,
|
|
"step": 13945
|
|
},
|
|
{
|
|
"epoch": 4.406222853984048,
|
|
"grad_norm": 0.0767070217749806,
|
|
"learning_rate": 0.001372151079459301,
|
|
"loss": 2.5587,
|
|
"step": 13950
|
|
},
|
|
{
|
|
"epoch": 4.407802258548527,
|
|
"grad_norm": 0.083602479556262,
|
|
"learning_rate": 0.0013716391852906637,
|
|
"loss": 2.4874,
|
|
"step": 13955
|
|
},
|
|
{
|
|
"epoch": 4.409381663113006,
|
|
"grad_norm": 0.08026159327952571,
|
|
"learning_rate": 0.001371127178108925,
|
|
"loss": 2.4348,
|
|
"step": 13960
|
|
},
|
|
{
|
|
"epoch": 4.410961067677485,
|
|
"grad_norm": 0.07921769390500488,
|
|
"learning_rate": 0.0013706150580697824,
|
|
"loss": 2.6105,
|
|
"step": 13965
|
|
},
|
|
{
|
|
"epoch": 4.4125404722419646,
|
|
"grad_norm": 0.06758457402712514,
|
|
"learning_rate": 0.0013701028253289686,
|
|
"loss": 2.4873,
|
|
"step": 13970
|
|
},
|
|
{
|
|
"epoch": 4.414119876806444,
|
|
"grad_norm": 0.052999579791167505,
|
|
"learning_rate": 0.0013695904800422505,
|
|
"loss": 2.5906,
|
|
"step": 13975
|
|
},
|
|
{
|
|
"epoch": 4.415699281370923,
|
|
"grad_norm": 0.057999719672230304,
|
|
"learning_rate": 0.0013690780223654284,
|
|
"loss": 2.5637,
|
|
"step": 13980
|
|
},
|
|
{
|
|
"epoch": 4.417278685935402,
|
|
"grad_norm": 0.0923377865259392,
|
|
"learning_rate": 0.0013685654524543379,
|
|
"loss": 2.5586,
|
|
"step": 13985
|
|
},
|
|
{
|
|
"epoch": 4.4188580904998815,
|
|
"grad_norm": 0.07699622829811727,
|
|
"learning_rate": 0.0013680527704648484,
|
|
"loss": 2.5492,
|
|
"step": 13990
|
|
},
|
|
{
|
|
"epoch": 4.420437495064361,
|
|
"grad_norm": 0.059955279849050386,
|
|
"learning_rate": 0.001367539976552863,
|
|
"loss": 2.4955,
|
|
"step": 13995
|
|
},
|
|
{
|
|
"epoch": 4.42201689962884,
|
|
"grad_norm": 0.06040037504820496,
|
|
"learning_rate": 0.0013670270708743186,
|
|
"loss": 2.5267,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 4.423596304193319,
|
|
"grad_norm": 0.08281551864708046,
|
|
"learning_rate": 0.001366514053585187,
|
|
"loss": 2.4655,
|
|
"step": 14005
|
|
},
|
|
{
|
|
"epoch": 4.4251757087577985,
|
|
"grad_norm": 0.05768791872116924,
|
|
"learning_rate": 0.0013660009248414736,
|
|
"loss": 2.5257,
|
|
"step": 14010
|
|
},
|
|
{
|
|
"epoch": 4.426755113322278,
|
|
"grad_norm": 0.06502174163101598,
|
|
"learning_rate": 0.0013654876847992174,
|
|
"loss": 2.5056,
|
|
"step": 14015
|
|
},
|
|
{
|
|
"epoch": 4.428334517886757,
|
|
"grad_norm": 0.05408102748420643,
|
|
"learning_rate": 0.0013649743336144914,
|
|
"loss": 2.5622,
|
|
"step": 14020
|
|
},
|
|
{
|
|
"epoch": 4.429913922451236,
|
|
"grad_norm": 0.06350269471766899,
|
|
"learning_rate": 0.0013644608714434025,
|
|
"loss": 2.5551,
|
|
"step": 14025
|
|
},
|
|
{
|
|
"epoch": 4.4314933270157155,
|
|
"grad_norm": 0.06237235890280388,
|
|
"learning_rate": 0.001363947298442091,
|
|
"loss": 2.4809,
|
|
"step": 14030
|
|
},
|
|
{
|
|
"epoch": 4.433072731580194,
|
|
"grad_norm": 0.060069035801989634,
|
|
"learning_rate": 0.0013634336147667317,
|
|
"loss": 2.4924,
|
|
"step": 14035
|
|
},
|
|
{
|
|
"epoch": 4.434652136144673,
|
|
"grad_norm": 0.06472959109812577,
|
|
"learning_rate": 0.001362919820573532,
|
|
"loss": 2.4054,
|
|
"step": 14040
|
|
},
|
|
{
|
|
"epoch": 4.436231540709152,
|
|
"grad_norm": 0.05414354543169224,
|
|
"learning_rate": 0.0013624059160187336,
|
|
"loss": 2.6368,
|
|
"step": 14045
|
|
},
|
|
{
|
|
"epoch": 4.437810945273632,
|
|
"grad_norm": 0.07275754959987431,
|
|
"learning_rate": 0.0013618919012586114,
|
|
"loss": 2.4423,
|
|
"step": 14050
|
|
},
|
|
{
|
|
"epoch": 4.439390349838111,
|
|
"grad_norm": 0.05630187983993725,
|
|
"learning_rate": 0.0013613777764494746,
|
|
"loss": 2.4456,
|
|
"step": 14055
|
|
},
|
|
{
|
|
"epoch": 4.44096975440259,
|
|
"grad_norm": 0.059532361130487516,
|
|
"learning_rate": 0.0013608635417476647,
|
|
"loss": 2.4708,
|
|
"step": 14060
|
|
},
|
|
{
|
|
"epoch": 4.442549158967069,
|
|
"grad_norm": 0.07262488825091169,
|
|
"learning_rate": 0.0013603491973095574,
|
|
"loss": 2.5457,
|
|
"step": 14065
|
|
},
|
|
{
|
|
"epoch": 4.444128563531549,
|
|
"grad_norm": 0.06222639433243145,
|
|
"learning_rate": 0.0013598347432915616,
|
|
"loss": 2.623,
|
|
"step": 14070
|
|
},
|
|
{
|
|
"epoch": 4.445707968096028,
|
|
"grad_norm": 0.06217185296538233,
|
|
"learning_rate": 0.0013593201798501192,
|
|
"loss": 2.4725,
|
|
"step": 14075
|
|
},
|
|
{
|
|
"epoch": 4.447287372660507,
|
|
"grad_norm": 0.06983882051728081,
|
|
"learning_rate": 0.0013588055071417063,
|
|
"loss": 2.6554,
|
|
"step": 14080
|
|
},
|
|
{
|
|
"epoch": 4.448866777224986,
|
|
"grad_norm": 0.07225143823696022,
|
|
"learning_rate": 0.001358290725322831,
|
|
"loss": 2.5879,
|
|
"step": 14085
|
|
},
|
|
{
|
|
"epoch": 4.450446181789466,
|
|
"grad_norm": 0.04946280398519253,
|
|
"learning_rate": 0.001357775834550035,
|
|
"loss": 2.504,
|
|
"step": 14090
|
|
},
|
|
{
|
|
"epoch": 4.452025586353945,
|
|
"grad_norm": 0.05885832040929403,
|
|
"learning_rate": 0.0013572608349798937,
|
|
"loss": 2.5389,
|
|
"step": 14095
|
|
},
|
|
{
|
|
"epoch": 4.453604990918424,
|
|
"grad_norm": 0.08591016654882142,
|
|
"learning_rate": 0.001356745726769015,
|
|
"loss": 2.5392,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 4.455184395482903,
|
|
"grad_norm": 0.07319105728497587,
|
|
"learning_rate": 0.0013562305100740404,
|
|
"loss": 2.5936,
|
|
"step": 14105
|
|
},
|
|
{
|
|
"epoch": 4.456763800047382,
|
|
"grad_norm": 0.0578954510511244,
|
|
"learning_rate": 0.0013557151850516439,
|
|
"loss": 2.452,
|
|
"step": 14110
|
|
},
|
|
{
|
|
"epoch": 4.458343204611861,
|
|
"grad_norm": 0.06653209710843927,
|
|
"learning_rate": 0.0013551997518585317,
|
|
"loss": 2.4913,
|
|
"step": 14115
|
|
},
|
|
{
|
|
"epoch": 4.45992260917634,
|
|
"grad_norm": 0.07802101976031472,
|
|
"learning_rate": 0.0013546842106514447,
|
|
"loss": 2.4905,
|
|
"step": 14120
|
|
},
|
|
{
|
|
"epoch": 4.4615020137408195,
|
|
"grad_norm": 0.06717026553531497,
|
|
"learning_rate": 0.0013541685615871555,
|
|
"loss": 2.5445,
|
|
"step": 14125
|
|
},
|
|
{
|
|
"epoch": 4.463081418305299,
|
|
"grad_norm": 0.0817827723762572,
|
|
"learning_rate": 0.0013536528048224696,
|
|
"loss": 2.5179,
|
|
"step": 14130
|
|
},
|
|
{
|
|
"epoch": 4.464660822869778,
|
|
"grad_norm": 0.06288364664081973,
|
|
"learning_rate": 0.001353136940514225,
|
|
"loss": 2.5217,
|
|
"step": 14135
|
|
},
|
|
{
|
|
"epoch": 4.466240227434257,
|
|
"grad_norm": 0.06522979915748774,
|
|
"learning_rate": 0.0013526209688192931,
|
|
"loss": 2.5435,
|
|
"step": 14140
|
|
},
|
|
{
|
|
"epoch": 4.467819631998736,
|
|
"grad_norm": 0.06396210312239645,
|
|
"learning_rate": 0.0013521048898945778,
|
|
"loss": 2.5306,
|
|
"step": 14145
|
|
},
|
|
{
|
|
"epoch": 4.469399036563216,
|
|
"grad_norm": 0.08620056053629571,
|
|
"learning_rate": 0.001351588703897015,
|
|
"loss": 2.6416,
|
|
"step": 14150
|
|
},
|
|
{
|
|
"epoch": 4.470978441127695,
|
|
"grad_norm": 0.07435867299037283,
|
|
"learning_rate": 0.0013510724109835738,
|
|
"loss": 2.4844,
|
|
"step": 14155
|
|
},
|
|
{
|
|
"epoch": 4.472557845692174,
|
|
"grad_norm": 0.07096473189861796,
|
|
"learning_rate": 0.0013505560113112555,
|
|
"loss": 2.5824,
|
|
"step": 14160
|
|
},
|
|
{
|
|
"epoch": 4.474137250256653,
|
|
"grad_norm": 0.0913140577177938,
|
|
"learning_rate": 0.0013500395050370937,
|
|
"loss": 2.4796,
|
|
"step": 14165
|
|
},
|
|
{
|
|
"epoch": 4.475716654821133,
|
|
"grad_norm": 0.062042548534127946,
|
|
"learning_rate": 0.001349522892318155,
|
|
"loss": 2.4454,
|
|
"step": 14170
|
|
},
|
|
{
|
|
"epoch": 4.477296059385612,
|
|
"grad_norm": 0.07373986458147358,
|
|
"learning_rate": 0.0013490061733115381,
|
|
"loss": 2.5725,
|
|
"step": 14175
|
|
},
|
|
{
|
|
"epoch": 4.478875463950091,
|
|
"grad_norm": 0.055063662338057706,
|
|
"learning_rate": 0.0013484893481743735,
|
|
"loss": 2.4965,
|
|
"step": 14180
|
|
},
|
|
{
|
|
"epoch": 4.48045486851457,
|
|
"grad_norm": 0.0665836744697159,
|
|
"learning_rate": 0.0013479724170638247,
|
|
"loss": 2.4605,
|
|
"step": 14185
|
|
},
|
|
{
|
|
"epoch": 4.48203427307905,
|
|
"grad_norm": 0.06170549589690821,
|
|
"learning_rate": 0.001347455380137087,
|
|
"loss": 2.5759,
|
|
"step": 14190
|
|
},
|
|
{
|
|
"epoch": 4.483613677643528,
|
|
"grad_norm": 0.06027017881844297,
|
|
"learning_rate": 0.0013469382375513885,
|
|
"loss": 2.4885,
|
|
"step": 14195
|
|
},
|
|
{
|
|
"epoch": 4.485193082208007,
|
|
"grad_norm": 0.06967962993886911,
|
|
"learning_rate": 0.0013464209894639885,
|
|
"loss": 2.5976,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 4.4867724867724865,
|
|
"grad_norm": 0.07882222499929464,
|
|
"learning_rate": 0.0013459036360321788,
|
|
"loss": 2.4848,
|
|
"step": 14205
|
|
},
|
|
{
|
|
"epoch": 4.488351891336966,
|
|
"grad_norm": 0.08115387174683329,
|
|
"learning_rate": 0.0013453861774132836,
|
|
"loss": 2.5393,
|
|
"step": 14210
|
|
},
|
|
{
|
|
"epoch": 4.489931295901445,
|
|
"grad_norm": 0.07249459700051057,
|
|
"learning_rate": 0.0013448686137646586,
|
|
"loss": 2.6291,
|
|
"step": 14215
|
|
},
|
|
{
|
|
"epoch": 4.491510700465924,
|
|
"grad_norm": 0.05952391465582586,
|
|
"learning_rate": 0.0013443509452436915,
|
|
"loss": 2.3992,
|
|
"step": 14220
|
|
},
|
|
{
|
|
"epoch": 4.4930901050304035,
|
|
"grad_norm": 0.06351042392140363,
|
|
"learning_rate": 0.0013438331720078019,
|
|
"loss": 2.6077,
|
|
"step": 14225
|
|
},
|
|
{
|
|
"epoch": 4.494669509594883,
|
|
"grad_norm": 0.09179406626814433,
|
|
"learning_rate": 0.0013433152942144417,
|
|
"loss": 2.5609,
|
|
"step": 14230
|
|
},
|
|
{
|
|
"epoch": 4.496248914159362,
|
|
"grad_norm": 0.06948199961598737,
|
|
"learning_rate": 0.0013427973120210938,
|
|
"loss": 2.4387,
|
|
"step": 14235
|
|
},
|
|
{
|
|
"epoch": 4.497828318723841,
|
|
"grad_norm": 0.06807742010409668,
|
|
"learning_rate": 0.0013422792255852738,
|
|
"loss": 2.5461,
|
|
"step": 14240
|
|
},
|
|
{
|
|
"epoch": 4.4994077232883205,
|
|
"grad_norm": 0.05565111213818792,
|
|
"learning_rate": 0.0013417610350645282,
|
|
"loss": 2.4781,
|
|
"step": 14245
|
|
},
|
|
{
|
|
"epoch": 4.5009871278528,
|
|
"grad_norm": 0.06018365938532872,
|
|
"learning_rate": 0.0013412427406164352,
|
|
"loss": 2.6829,
|
|
"step": 14250
|
|
},
|
|
{
|
|
"epoch": 4.502566532417279,
|
|
"grad_norm": 0.07509777643215441,
|
|
"learning_rate": 0.001340724342398605,
|
|
"loss": 2.629,
|
|
"step": 14255
|
|
},
|
|
{
|
|
"epoch": 4.504145936981758,
|
|
"grad_norm": 0.07356464439759725,
|
|
"learning_rate": 0.0013402058405686797,
|
|
"loss": 2.5301,
|
|
"step": 14260
|
|
},
|
|
{
|
|
"epoch": 4.505725341546237,
|
|
"grad_norm": 0.06645424332119493,
|
|
"learning_rate": 0.0013396872352843317,
|
|
"loss": 2.5281,
|
|
"step": 14265
|
|
},
|
|
{
|
|
"epoch": 4.507304746110716,
|
|
"grad_norm": 0.07558749040618944,
|
|
"learning_rate": 0.0013391685267032654,
|
|
"loss": 2.5062,
|
|
"step": 14270
|
|
},
|
|
{
|
|
"epoch": 4.508884150675195,
|
|
"grad_norm": 0.05613251020843608,
|
|
"learning_rate": 0.0013386497149832173,
|
|
"loss": 2.428,
|
|
"step": 14275
|
|
},
|
|
{
|
|
"epoch": 4.510463555239674,
|
|
"grad_norm": 0.06177974682298049,
|
|
"learning_rate": 0.0013381308002819545,
|
|
"loss": 2.4758,
|
|
"step": 14280
|
|
},
|
|
{
|
|
"epoch": 4.512042959804154,
|
|
"grad_norm": 0.0681251274627974,
|
|
"learning_rate": 0.001337611782757276,
|
|
"loss": 2.5083,
|
|
"step": 14285
|
|
},
|
|
{
|
|
"epoch": 4.513622364368633,
|
|
"grad_norm": 0.0692859935576207,
|
|
"learning_rate": 0.0013370926625670115,
|
|
"loss": 2.4362,
|
|
"step": 14290
|
|
},
|
|
{
|
|
"epoch": 4.515201768933112,
|
|
"grad_norm": 0.06808243851966223,
|
|
"learning_rate": 0.0013365734398690216,
|
|
"loss": 2.592,
|
|
"step": 14295
|
|
},
|
|
{
|
|
"epoch": 4.516781173497591,
|
|
"grad_norm": 0.05856059595516205,
|
|
"learning_rate": 0.0013360541148211994,
|
|
"loss": 2.5013,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 4.518360578062071,
|
|
"grad_norm": 0.0686920741590947,
|
|
"learning_rate": 0.0013355346875814679,
|
|
"loss": 2.529,
|
|
"step": 14305
|
|
},
|
|
{
|
|
"epoch": 4.51993998262655,
|
|
"grad_norm": 0.06560875792670096,
|
|
"learning_rate": 0.0013350151583077818,
|
|
"loss": 2.4386,
|
|
"step": 14310
|
|
},
|
|
{
|
|
"epoch": 4.521519387191029,
|
|
"grad_norm": 0.07705004270678456,
|
|
"learning_rate": 0.0013344955271581262,
|
|
"loss": 2.5246,
|
|
"step": 14315
|
|
},
|
|
{
|
|
"epoch": 4.523098791755508,
|
|
"grad_norm": 0.0694705060803293,
|
|
"learning_rate": 0.0013339757942905182,
|
|
"loss": 2.5614,
|
|
"step": 14320
|
|
},
|
|
{
|
|
"epoch": 4.524678196319988,
|
|
"grad_norm": 0.06658893824711391,
|
|
"learning_rate": 0.001333455959863005,
|
|
"loss": 2.5895,
|
|
"step": 14325
|
|
},
|
|
{
|
|
"epoch": 4.526257600884467,
|
|
"grad_norm": 0.06252886108747881,
|
|
"learning_rate": 0.001332936024033665,
|
|
"loss": 2.5424,
|
|
"step": 14330
|
|
},
|
|
{
|
|
"epoch": 4.527837005448946,
|
|
"grad_norm": 0.06417874942970586,
|
|
"learning_rate": 0.0013324159869606072,
|
|
"loss": 2.5635,
|
|
"step": 14335
|
|
},
|
|
{
|
|
"epoch": 4.529416410013425,
|
|
"grad_norm": 0.07692020820283244,
|
|
"learning_rate": 0.0013318958488019715,
|
|
"loss": 2.6134,
|
|
"step": 14340
|
|
},
|
|
{
|
|
"epoch": 4.530995814577905,
|
|
"grad_norm": 0.06719147959402327,
|
|
"learning_rate": 0.0013313756097159287,
|
|
"loss": 2.4861,
|
|
"step": 14345
|
|
},
|
|
{
|
|
"epoch": 4.532575219142384,
|
|
"grad_norm": 0.07078783998658239,
|
|
"learning_rate": 0.0013308552698606804,
|
|
"loss": 2.6335,
|
|
"step": 14350
|
|
},
|
|
{
|
|
"epoch": 4.534154623706862,
|
|
"grad_norm": 0.06913700433344214,
|
|
"learning_rate": 0.0013303348293944584,
|
|
"loss": 2.4,
|
|
"step": 14355
|
|
},
|
|
{
|
|
"epoch": 4.535734028271341,
|
|
"grad_norm": 0.0631198430913686,
|
|
"learning_rate": 0.001329814288475525,
|
|
"loss": 2.5384,
|
|
"step": 14360
|
|
},
|
|
{
|
|
"epoch": 4.537313432835821,
|
|
"grad_norm": 0.06690628991153875,
|
|
"learning_rate": 0.001329293647262174,
|
|
"loss": 2.4874,
|
|
"step": 14365
|
|
},
|
|
{
|
|
"epoch": 4.5388928374003,
|
|
"grad_norm": 0.061245030936560724,
|
|
"learning_rate": 0.0013287729059127287,
|
|
"loss": 2.579,
|
|
"step": 14370
|
|
},
|
|
{
|
|
"epoch": 4.540472241964779,
|
|
"grad_norm": 0.07265244960888212,
|
|
"learning_rate": 0.0013282520645855435,
|
|
"loss": 2.6145,
|
|
"step": 14375
|
|
},
|
|
{
|
|
"epoch": 4.542051646529258,
|
|
"grad_norm": 0.05562980129409219,
|
|
"learning_rate": 0.001327731123439003,
|
|
"loss": 2.5371,
|
|
"step": 14380
|
|
},
|
|
{
|
|
"epoch": 4.543631051093738,
|
|
"grad_norm": 0.07501342212715247,
|
|
"learning_rate": 0.001327210082631521,
|
|
"loss": 2.5584,
|
|
"step": 14385
|
|
},
|
|
{
|
|
"epoch": 4.545210455658217,
|
|
"grad_norm": 0.05621475207377457,
|
|
"learning_rate": 0.0013266889423215438,
|
|
"loss": 2.5589,
|
|
"step": 14390
|
|
},
|
|
{
|
|
"epoch": 4.546789860222696,
|
|
"grad_norm": 0.07727632887718167,
|
|
"learning_rate": 0.0013261677026675468,
|
|
"loss": 2.5207,
|
|
"step": 14395
|
|
},
|
|
{
|
|
"epoch": 4.548369264787175,
|
|
"grad_norm": 0.06575705110727952,
|
|
"learning_rate": 0.001325646363828035,
|
|
"loss": 2.5446,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 4.549948669351655,
|
|
"grad_norm": 0.0726735090024027,
|
|
"learning_rate": 0.0013251249259615449,
|
|
"loss": 2.494,
|
|
"step": 14405
|
|
},
|
|
{
|
|
"epoch": 4.551528073916134,
|
|
"grad_norm": 0.06988209331509379,
|
|
"learning_rate": 0.0013246033892266417,
|
|
"loss": 2.4647,
|
|
"step": 14410
|
|
},
|
|
{
|
|
"epoch": 4.553107478480613,
|
|
"grad_norm": 0.07207187368145113,
|
|
"learning_rate": 0.0013240817537819218,
|
|
"loss": 2.5596,
|
|
"step": 14415
|
|
},
|
|
{
|
|
"epoch": 4.5546868830450915,
|
|
"grad_norm": 0.059501193560655585,
|
|
"learning_rate": 0.0013235600197860117,
|
|
"loss": 2.4478,
|
|
"step": 14420
|
|
},
|
|
{
|
|
"epoch": 4.556266287609571,
|
|
"grad_norm": 0.06572261116435447,
|
|
"learning_rate": 0.0013230381873975666,
|
|
"loss": 2.5149,
|
|
"step": 14425
|
|
},
|
|
{
|
|
"epoch": 4.55784569217405,
|
|
"grad_norm": 0.06339768919779079,
|
|
"learning_rate": 0.0013225162567752724,
|
|
"loss": 2.4566,
|
|
"step": 14430
|
|
},
|
|
{
|
|
"epoch": 4.559425096738529,
|
|
"grad_norm": 0.0540220761112062,
|
|
"learning_rate": 0.0013219942280778454,
|
|
"loss": 2.5326,
|
|
"step": 14435
|
|
},
|
|
{
|
|
"epoch": 4.5610045013030085,
|
|
"grad_norm": 0.07110542106791799,
|
|
"learning_rate": 0.001321472101464031,
|
|
"loss": 2.4537,
|
|
"step": 14440
|
|
},
|
|
{
|
|
"epoch": 4.562583905867488,
|
|
"grad_norm": 0.06603071822844594,
|
|
"learning_rate": 0.0013209498770926044,
|
|
"loss": 2.5216,
|
|
"step": 14445
|
|
},
|
|
{
|
|
"epoch": 4.564163310431967,
|
|
"grad_norm": 0.06210510436605857,
|
|
"learning_rate": 0.0013204275551223707,
|
|
"loss": 2.4913,
|
|
"step": 14450
|
|
},
|
|
{
|
|
"epoch": 4.565742714996446,
|
|
"grad_norm": 0.06696826280590645,
|
|
"learning_rate": 0.0013199051357121645,
|
|
"loss": 2.5407,
|
|
"step": 14455
|
|
},
|
|
{
|
|
"epoch": 4.5673221195609255,
|
|
"grad_norm": 0.06457040402972691,
|
|
"learning_rate": 0.0013193826190208507,
|
|
"loss": 2.6159,
|
|
"step": 14460
|
|
},
|
|
{
|
|
"epoch": 4.568901524125405,
|
|
"grad_norm": 0.08403683367973355,
|
|
"learning_rate": 0.0013188600052073233,
|
|
"loss": 2.523,
|
|
"step": 14465
|
|
},
|
|
{
|
|
"epoch": 4.570480928689884,
|
|
"grad_norm": 0.09516774150664915,
|
|
"learning_rate": 0.0013183372944305055,
|
|
"loss": 2.5409,
|
|
"step": 14470
|
|
},
|
|
{
|
|
"epoch": 4.572060333254363,
|
|
"grad_norm": 0.06953612060835043,
|
|
"learning_rate": 0.00131781448684935,
|
|
"loss": 2.5325,
|
|
"step": 14475
|
|
},
|
|
{
|
|
"epoch": 4.5736397378188425,
|
|
"grad_norm": 0.06073184199451849,
|
|
"learning_rate": 0.0013172915826228397,
|
|
"loss": 2.537,
|
|
"step": 14480
|
|
},
|
|
{
|
|
"epoch": 4.575219142383322,
|
|
"grad_norm": 0.06047110009531498,
|
|
"learning_rate": 0.0013167685819099868,
|
|
"loss": 2.4982,
|
|
"step": 14485
|
|
},
|
|
{
|
|
"epoch": 4.576798546947801,
|
|
"grad_norm": 0.05516820416705633,
|
|
"learning_rate": 0.0013162454848698317,
|
|
"loss": 2.5509,
|
|
"step": 14490
|
|
},
|
|
{
|
|
"epoch": 4.57837795151228,
|
|
"grad_norm": 0.08413960458826529,
|
|
"learning_rate": 0.0013157222916614453,
|
|
"loss": 2.5301,
|
|
"step": 14495
|
|
},
|
|
{
|
|
"epoch": 4.5799573560767595,
|
|
"grad_norm": 0.061943460655179555,
|
|
"learning_rate": 0.0013151990024439272,
|
|
"loss": 2.5253,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 4.581536760641239,
|
|
"grad_norm": 0.07448364722954148,
|
|
"learning_rate": 0.001314675617376406,
|
|
"loss": 2.4996,
|
|
"step": 14505
|
|
},
|
|
{
|
|
"epoch": 4.583116165205717,
|
|
"grad_norm": 0.07394426552927498,
|
|
"learning_rate": 0.0013141521366180407,
|
|
"loss": 2.4658,
|
|
"step": 14510
|
|
},
|
|
{
|
|
"epoch": 4.584695569770196,
|
|
"grad_norm": 0.07090188960059321,
|
|
"learning_rate": 0.0013136285603280173,
|
|
"loss": 2.6264,
|
|
"step": 14515
|
|
},
|
|
{
|
|
"epoch": 4.586274974334676,
|
|
"grad_norm": 0.07835259049130651,
|
|
"learning_rate": 0.0013131048886655529,
|
|
"loss": 2.4878,
|
|
"step": 14520
|
|
},
|
|
{
|
|
"epoch": 4.587854378899155,
|
|
"grad_norm": 0.06973864384128355,
|
|
"learning_rate": 0.001312581121789892,
|
|
"loss": 2.5461,
|
|
"step": 14525
|
|
},
|
|
{
|
|
"epoch": 4.589433783463634,
|
|
"grad_norm": 0.05713278782611401,
|
|
"learning_rate": 0.0013120572598603094,
|
|
"loss": 2.6036,
|
|
"step": 14530
|
|
},
|
|
{
|
|
"epoch": 4.591013188028113,
|
|
"grad_norm": 0.06448552012510772,
|
|
"learning_rate": 0.0013115333030361076,
|
|
"loss": 2.6607,
|
|
"step": 14535
|
|
},
|
|
{
|
|
"epoch": 4.592592592592593,
|
|
"grad_norm": 0.06146515545218298,
|
|
"learning_rate": 0.001311009251476619,
|
|
"loss": 2.4407,
|
|
"step": 14540
|
|
},
|
|
{
|
|
"epoch": 4.594171997157072,
|
|
"grad_norm": 0.06085502924025703,
|
|
"learning_rate": 0.001310485105341204,
|
|
"loss": 2.5601,
|
|
"step": 14545
|
|
},
|
|
{
|
|
"epoch": 4.595751401721551,
|
|
"grad_norm": 0.06121460588993167,
|
|
"learning_rate": 0.0013099608647892521,
|
|
"loss": 2.3711,
|
|
"step": 14550
|
|
},
|
|
{
|
|
"epoch": 4.59733080628603,
|
|
"grad_norm": 0.06140176313807265,
|
|
"learning_rate": 0.001309436529980182,
|
|
"loss": 2.4567,
|
|
"step": 14555
|
|
},
|
|
{
|
|
"epoch": 4.5989102108505096,
|
|
"grad_norm": 0.06233078186399792,
|
|
"learning_rate": 0.0013089121010734397,
|
|
"loss": 2.4931,
|
|
"step": 14560
|
|
},
|
|
{
|
|
"epoch": 4.600489615414989,
|
|
"grad_norm": 0.07457805665337602,
|
|
"learning_rate": 0.0013083875782285016,
|
|
"loss": 2.4842,
|
|
"step": 14565
|
|
},
|
|
{
|
|
"epoch": 4.602069019979468,
|
|
"grad_norm": 0.07665835344387141,
|
|
"learning_rate": 0.001307862961604871,
|
|
"loss": 2.5222,
|
|
"step": 14570
|
|
},
|
|
{
|
|
"epoch": 4.603648424543947,
|
|
"grad_norm": 0.06806119545622995,
|
|
"learning_rate": 0.0013073382513620808,
|
|
"loss": 2.4976,
|
|
"step": 14575
|
|
},
|
|
{
|
|
"epoch": 4.605227829108426,
|
|
"grad_norm": 0.0555807009259313,
|
|
"learning_rate": 0.001306813447659692,
|
|
"loss": 2.4158,
|
|
"step": 14580
|
|
},
|
|
{
|
|
"epoch": 4.606807233672905,
|
|
"grad_norm": 0.05942410575287233,
|
|
"learning_rate": 0.0013062885506572944,
|
|
"loss": 2.4696,
|
|
"step": 14585
|
|
},
|
|
{
|
|
"epoch": 4.608386638237384,
|
|
"grad_norm": 0.05378692212406273,
|
|
"learning_rate": 0.0013057635605145048,
|
|
"loss": 2.5466,
|
|
"step": 14590
|
|
},
|
|
{
|
|
"epoch": 4.609966042801863,
|
|
"grad_norm": 0.05445137241232568,
|
|
"learning_rate": 0.0013052384773909705,
|
|
"loss": 2.4467,
|
|
"step": 14595
|
|
},
|
|
{
|
|
"epoch": 4.611545447366343,
|
|
"grad_norm": 0.06240141748299297,
|
|
"learning_rate": 0.0013047133014463654,
|
|
"loss": 2.5343,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 4.613124851930822,
|
|
"grad_norm": 0.062130909008503925,
|
|
"learning_rate": 0.001304188032840392,
|
|
"loss": 2.5451,
|
|
"step": 14605
|
|
},
|
|
{
|
|
"epoch": 4.614704256495301,
|
|
"grad_norm": 0.05765365607352223,
|
|
"learning_rate": 0.0013036626717327817,
|
|
"loss": 2.5551,
|
|
"step": 14610
|
|
},
|
|
{
|
|
"epoch": 4.61628366105978,
|
|
"grad_norm": 0.05839180617057512,
|
|
"learning_rate": 0.0013031372182832927,
|
|
"loss": 2.5071,
|
|
"step": 14615
|
|
},
|
|
{
|
|
"epoch": 4.61786306562426,
|
|
"grad_norm": 0.05267195123274345,
|
|
"learning_rate": 0.0013026116726517127,
|
|
"loss": 2.441,
|
|
"step": 14620
|
|
},
|
|
{
|
|
"epoch": 4.619442470188739,
|
|
"grad_norm": 0.05737658725604644,
|
|
"learning_rate": 0.0013020860349978562,
|
|
"loss": 2.5407,
|
|
"step": 14625
|
|
},
|
|
{
|
|
"epoch": 4.621021874753218,
|
|
"grad_norm": 0.05957690197529863,
|
|
"learning_rate": 0.0013015603054815667,
|
|
"loss": 2.4947,
|
|
"step": 14630
|
|
},
|
|
{
|
|
"epoch": 4.622601279317697,
|
|
"grad_norm": 0.05908030966268051,
|
|
"learning_rate": 0.0013010344842627154,
|
|
"loss": 2.6356,
|
|
"step": 14635
|
|
},
|
|
{
|
|
"epoch": 4.624180683882177,
|
|
"grad_norm": 0.06633542156038126,
|
|
"learning_rate": 0.0013005085715012002,
|
|
"loss": 2.5547,
|
|
"step": 14640
|
|
},
|
|
{
|
|
"epoch": 4.625760088446656,
|
|
"grad_norm": 0.06864905776747134,
|
|
"learning_rate": 0.0012999825673569488,
|
|
"loss": 2.6052,
|
|
"step": 14645
|
|
},
|
|
{
|
|
"epoch": 4.627339493011135,
|
|
"grad_norm": 0.07436619834506039,
|
|
"learning_rate": 0.0012994564719899149,
|
|
"loss": 2.5173,
|
|
"step": 14650
|
|
},
|
|
{
|
|
"epoch": 4.628918897575614,
|
|
"grad_norm": 0.05911882126334825,
|
|
"learning_rate": 0.0012989302855600814,
|
|
"loss": 2.4682,
|
|
"step": 14655
|
|
},
|
|
{
|
|
"epoch": 4.630498302140094,
|
|
"grad_norm": 0.06081199141600818,
|
|
"learning_rate": 0.001298404008227458,
|
|
"loss": 2.452,
|
|
"step": 14660
|
|
},
|
|
{
|
|
"epoch": 4.632077706704573,
|
|
"grad_norm": 0.060944164412464016,
|
|
"learning_rate": 0.0012978776401520824,
|
|
"loss": 2.5599,
|
|
"step": 14665
|
|
},
|
|
{
|
|
"epoch": 4.633657111269051,
|
|
"grad_norm": 0.07119386080046132,
|
|
"learning_rate": 0.0012973511814940192,
|
|
"loss": 2.5004,
|
|
"step": 14670
|
|
},
|
|
{
|
|
"epoch": 4.6352365158335305,
|
|
"grad_norm": 0.05575195182785549,
|
|
"learning_rate": 0.001296824632413362,
|
|
"loss": 2.5344,
|
|
"step": 14675
|
|
},
|
|
{
|
|
"epoch": 4.63681592039801,
|
|
"grad_norm": 0.062358567930570354,
|
|
"learning_rate": 0.0012962979930702303,
|
|
"loss": 2.518,
|
|
"step": 14680
|
|
},
|
|
{
|
|
"epoch": 4.638395324962489,
|
|
"grad_norm": 0.06279756886633003,
|
|
"learning_rate": 0.001295771263624772,
|
|
"loss": 2.5308,
|
|
"step": 14685
|
|
},
|
|
{
|
|
"epoch": 4.639974729526968,
|
|
"grad_norm": 0.06673592524557007,
|
|
"learning_rate": 0.0012952444442371623,
|
|
"loss": 2.4142,
|
|
"step": 14690
|
|
},
|
|
{
|
|
"epoch": 4.6415541340914475,
|
|
"grad_norm": 0.06218592477734958,
|
|
"learning_rate": 0.0012947175350676032,
|
|
"loss": 2.4998,
|
|
"step": 14695
|
|
},
|
|
{
|
|
"epoch": 4.643133538655927,
|
|
"grad_norm": 0.07279936719488737,
|
|
"learning_rate": 0.0012941905362763252,
|
|
"loss": 2.5579,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 4.644712943220406,
|
|
"grad_norm": 0.07435222246168129,
|
|
"learning_rate": 0.0012936634480235842,
|
|
"loss": 2.4673,
|
|
"step": 14705
|
|
},
|
|
{
|
|
"epoch": 4.646292347784885,
|
|
"grad_norm": 0.0632941669230261,
|
|
"learning_rate": 0.0012931362704696652,
|
|
"loss": 2.5094,
|
|
"step": 14710
|
|
},
|
|
{
|
|
"epoch": 4.6478717523493644,
|
|
"grad_norm": 0.07212217279803702,
|
|
"learning_rate": 0.0012926090037748792,
|
|
"loss": 2.5115,
|
|
"step": 14715
|
|
},
|
|
{
|
|
"epoch": 4.649451156913844,
|
|
"grad_norm": 0.06989878259512469,
|
|
"learning_rate": 0.0012920816480995645,
|
|
"loss": 2.5446,
|
|
"step": 14720
|
|
},
|
|
{
|
|
"epoch": 4.651030561478323,
|
|
"grad_norm": 0.06845073031606655,
|
|
"learning_rate": 0.001291554203604087,
|
|
"loss": 2.4885,
|
|
"step": 14725
|
|
},
|
|
{
|
|
"epoch": 4.652609966042802,
|
|
"grad_norm": 0.07007950023259411,
|
|
"learning_rate": 0.0012910266704488388,
|
|
"loss": 2.4828,
|
|
"step": 14730
|
|
},
|
|
{
|
|
"epoch": 4.654189370607281,
|
|
"grad_norm": 0.06702450252188678,
|
|
"learning_rate": 0.0012904990487942398,
|
|
"loss": 2.5228,
|
|
"step": 14735
|
|
},
|
|
{
|
|
"epoch": 4.65576877517176,
|
|
"grad_norm": 0.06533141251356811,
|
|
"learning_rate": 0.0012899713388007362,
|
|
"loss": 2.4774,
|
|
"step": 14740
|
|
},
|
|
{
|
|
"epoch": 4.657348179736239,
|
|
"grad_norm": 0.06062891469610465,
|
|
"learning_rate": 0.001289443540628801,
|
|
"loss": 2.4858,
|
|
"step": 14745
|
|
},
|
|
{
|
|
"epoch": 4.658927584300718,
|
|
"grad_norm": 0.05986021154478703,
|
|
"learning_rate": 0.0012889156544389343,
|
|
"loss": 2.5261,
|
|
"step": 14750
|
|
},
|
|
{
|
|
"epoch": 4.6605069888651975,
|
|
"grad_norm": 0.08522016594476302,
|
|
"learning_rate": 0.001288387680391663,
|
|
"loss": 2.519,
|
|
"step": 14755
|
|
},
|
|
{
|
|
"epoch": 4.662086393429677,
|
|
"grad_norm": 0.0731687262962632,
|
|
"learning_rate": 0.0012878596186475407,
|
|
"loss": 2.4651,
|
|
"step": 14760
|
|
},
|
|
{
|
|
"epoch": 4.663665797994156,
|
|
"grad_norm": 0.06355692861166644,
|
|
"learning_rate": 0.0012873314693671474,
|
|
"loss": 2.4626,
|
|
"step": 14765
|
|
},
|
|
{
|
|
"epoch": 4.665245202558635,
|
|
"grad_norm": 0.07824357408268898,
|
|
"learning_rate": 0.0012868032327110904,
|
|
"loss": 2.498,
|
|
"step": 14770
|
|
},
|
|
{
|
|
"epoch": 4.6668246071231145,
|
|
"grad_norm": 0.06509502457557328,
|
|
"learning_rate": 0.0012862749088400026,
|
|
"loss": 2.4943,
|
|
"step": 14775
|
|
},
|
|
{
|
|
"epoch": 4.668404011687594,
|
|
"grad_norm": 0.07135187141990396,
|
|
"learning_rate": 0.0012857464979145442,
|
|
"loss": 2.5456,
|
|
"step": 14780
|
|
},
|
|
{
|
|
"epoch": 4.669983416252073,
|
|
"grad_norm": 0.07224918585527369,
|
|
"learning_rate": 0.001285218000095401,
|
|
"loss": 2.4876,
|
|
"step": 14785
|
|
},
|
|
{
|
|
"epoch": 4.671562820816552,
|
|
"grad_norm": 0.07418437782732842,
|
|
"learning_rate": 0.0012846894155432867,
|
|
"loss": 2.5208,
|
|
"step": 14790
|
|
},
|
|
{
|
|
"epoch": 4.6731422253810315,
|
|
"grad_norm": 0.0709939637700474,
|
|
"learning_rate": 0.00128416074441894,
|
|
"loss": 2.4447,
|
|
"step": 14795
|
|
},
|
|
{
|
|
"epoch": 4.674721629945511,
|
|
"grad_norm": 0.06160707757163909,
|
|
"learning_rate": 0.0012836319868831268,
|
|
"loss": 2.5425,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 4.67630103450999,
|
|
"grad_norm": 0.0713384844027798,
|
|
"learning_rate": 0.001283103143096638,
|
|
"loss": 2.5299,
|
|
"step": 14805
|
|
},
|
|
{
|
|
"epoch": 4.677880439074469,
|
|
"grad_norm": 0.06761132129526373,
|
|
"learning_rate": 0.0012825742132202924,
|
|
"loss": 2.4755,
|
|
"step": 14810
|
|
},
|
|
{
|
|
"epoch": 4.6794598436389485,
|
|
"grad_norm": 0.058001357479072355,
|
|
"learning_rate": 0.0012820451974149341,
|
|
"loss": 2.3942,
|
|
"step": 14815
|
|
},
|
|
{
|
|
"epoch": 4.681039248203428,
|
|
"grad_norm": 0.07610367614504171,
|
|
"learning_rate": 0.0012815160958414332,
|
|
"loss": 2.49,
|
|
"step": 14820
|
|
},
|
|
{
|
|
"epoch": 4.682618652767906,
|
|
"grad_norm": 0.058818872817078344,
|
|
"learning_rate": 0.0012809869086606862,
|
|
"loss": 2.5079,
|
|
"step": 14825
|
|
},
|
|
{
|
|
"epoch": 4.684198057332385,
|
|
"grad_norm": 0.061137964717995257,
|
|
"learning_rate": 0.0012804576360336156,
|
|
"loss": 2.4274,
|
|
"step": 14830
|
|
},
|
|
{
|
|
"epoch": 4.685777461896865,
|
|
"grad_norm": 0.06570390088117294,
|
|
"learning_rate": 0.0012799282781211696,
|
|
"loss": 2.5274,
|
|
"step": 14835
|
|
},
|
|
{
|
|
"epoch": 4.687356866461344,
|
|
"grad_norm": 0.06073055469705799,
|
|
"learning_rate": 0.001279398835084323,
|
|
"loss": 2.4647,
|
|
"step": 14840
|
|
},
|
|
{
|
|
"epoch": 4.688936271025823,
|
|
"grad_norm": 0.060531944183211055,
|
|
"learning_rate": 0.0012788693070840758,
|
|
"loss": 2.5147,
|
|
"step": 14845
|
|
},
|
|
{
|
|
"epoch": 4.690515675590302,
|
|
"grad_norm": 0.06494266452306915,
|
|
"learning_rate": 0.0012783396942814538,
|
|
"loss": 2.5203,
|
|
"step": 14850
|
|
},
|
|
{
|
|
"epoch": 4.692095080154782,
|
|
"grad_norm": 0.07470619420782981,
|
|
"learning_rate": 0.0012778099968375092,
|
|
"loss": 2.4989,
|
|
"step": 14855
|
|
},
|
|
{
|
|
"epoch": 4.693674484719261,
|
|
"grad_norm": 0.07292076646736712,
|
|
"learning_rate": 0.0012772802149133196,
|
|
"loss": 2.4739,
|
|
"step": 14860
|
|
},
|
|
{
|
|
"epoch": 4.69525388928374,
|
|
"grad_norm": 0.07413512363981896,
|
|
"learning_rate": 0.0012767503486699884,
|
|
"loss": 2.546,
|
|
"step": 14865
|
|
},
|
|
{
|
|
"epoch": 4.696833293848219,
|
|
"grad_norm": 0.07548964649732978,
|
|
"learning_rate": 0.001276220398268644,
|
|
"loss": 2.4961,
|
|
"step": 14870
|
|
},
|
|
{
|
|
"epoch": 4.698412698412699,
|
|
"grad_norm": 0.07620750111593708,
|
|
"learning_rate": 0.0012756903638704413,
|
|
"loss": 2.4796,
|
|
"step": 14875
|
|
},
|
|
{
|
|
"epoch": 4.699992102977178,
|
|
"grad_norm": 0.0685213813223197,
|
|
"learning_rate": 0.0012751602456365608,
|
|
"loss": 2.5224,
|
|
"step": 14880
|
|
},
|
|
{
|
|
"epoch": 4.701571507541657,
|
|
"grad_norm": 0.07549811567322803,
|
|
"learning_rate": 0.0012746300437282074,
|
|
"loss": 2.4686,
|
|
"step": 14885
|
|
},
|
|
{
|
|
"epoch": 4.703150912106136,
|
|
"grad_norm": 0.07007093001892828,
|
|
"learning_rate": 0.0012740997583066125,
|
|
"loss": 2.5181,
|
|
"step": 14890
|
|
},
|
|
{
|
|
"epoch": 4.704730316670615,
|
|
"grad_norm": 0.06992912149096699,
|
|
"learning_rate": 0.0012735693895330324,
|
|
"loss": 2.4495,
|
|
"step": 14895
|
|
},
|
|
{
|
|
"epoch": 4.706309721235094,
|
|
"grad_norm": 0.07049608013635306,
|
|
"learning_rate": 0.0012730389375687485,
|
|
"loss": 2.5377,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 4.707889125799573,
|
|
"grad_norm": 0.07458682983718855,
|
|
"learning_rate": 0.0012725084025750682,
|
|
"loss": 2.5174,
|
|
"step": 14905
|
|
},
|
|
{
|
|
"epoch": 4.709468530364052,
|
|
"grad_norm": 0.07688364261040968,
|
|
"learning_rate": 0.0012719777847133241,
|
|
"loss": 2.5228,
|
|
"step": 14910
|
|
},
|
|
{
|
|
"epoch": 4.711047934928532,
|
|
"grad_norm": 0.05854700594985617,
|
|
"learning_rate": 0.0012714470841448733,
|
|
"loss": 2.4756,
|
|
"step": 14915
|
|
},
|
|
{
|
|
"epoch": 4.712627339493011,
|
|
"grad_norm": 0.06417359188466547,
|
|
"learning_rate": 0.0012709163010310985,
|
|
"loss": 2.4729,
|
|
"step": 14920
|
|
},
|
|
{
|
|
"epoch": 4.71420674405749,
|
|
"grad_norm": 0.06532722612555884,
|
|
"learning_rate": 0.0012703854355334073,
|
|
"loss": 2.5088,
|
|
"step": 14925
|
|
},
|
|
{
|
|
"epoch": 4.715786148621969,
|
|
"grad_norm": 0.06393029628497983,
|
|
"learning_rate": 0.001269854487813233,
|
|
"loss": 2.606,
|
|
"step": 14930
|
|
},
|
|
{
|
|
"epoch": 4.717365553186449,
|
|
"grad_norm": 0.075638289548756,
|
|
"learning_rate": 0.0012693234580320332,
|
|
"loss": 2.5032,
|
|
"step": 14935
|
|
},
|
|
{
|
|
"epoch": 4.718944957750928,
|
|
"grad_norm": 0.06346267592489585,
|
|
"learning_rate": 0.00126879234635129,
|
|
"loss": 2.426,
|
|
"step": 14940
|
|
},
|
|
{
|
|
"epoch": 4.720524362315407,
|
|
"grad_norm": 0.0753710407317556,
|
|
"learning_rate": 0.0012682611529325118,
|
|
"loss": 2.5639,
|
|
"step": 14945
|
|
},
|
|
{
|
|
"epoch": 4.722103766879886,
|
|
"grad_norm": 0.07113730530575336,
|
|
"learning_rate": 0.0012677298779372314,
|
|
"loss": 2.5784,
|
|
"step": 14950
|
|
},
|
|
{
|
|
"epoch": 4.723683171444366,
|
|
"grad_norm": 0.07059577267826218,
|
|
"learning_rate": 0.0012671985215270054,
|
|
"loss": 2.6693,
|
|
"step": 14955
|
|
},
|
|
{
|
|
"epoch": 4.725262576008845,
|
|
"grad_norm": 0.06972960409937519,
|
|
"learning_rate": 0.0012666670838634162,
|
|
"loss": 2.4031,
|
|
"step": 14960
|
|
},
|
|
{
|
|
"epoch": 4.726841980573324,
|
|
"grad_norm": 0.08810771753828298,
|
|
"learning_rate": 0.0012661355651080706,
|
|
"loss": 2.5473,
|
|
"step": 14965
|
|
},
|
|
{
|
|
"epoch": 4.728421385137803,
|
|
"grad_norm": 0.08301596809598058,
|
|
"learning_rate": 0.0012656039654225998,
|
|
"loss": 2.5494,
|
|
"step": 14970
|
|
},
|
|
{
|
|
"epoch": 4.730000789702283,
|
|
"grad_norm": 0.06278329136880133,
|
|
"learning_rate": 0.0012650722849686608,
|
|
"loss": 2.476,
|
|
"step": 14975
|
|
},
|
|
{
|
|
"epoch": 4.731580194266762,
|
|
"grad_norm": 0.06671055745535334,
|
|
"learning_rate": 0.0012645405239079329,
|
|
"loss": 2.5091,
|
|
"step": 14980
|
|
},
|
|
{
|
|
"epoch": 4.73315959883124,
|
|
"grad_norm": 0.061039369127283666,
|
|
"learning_rate": 0.001264008682402122,
|
|
"loss": 2.4996,
|
|
"step": 14985
|
|
},
|
|
{
|
|
"epoch": 4.7347390033957195,
|
|
"grad_norm": 0.08628748772802185,
|
|
"learning_rate": 0.0012634767606129575,
|
|
"loss": 2.5259,
|
|
"step": 14990
|
|
},
|
|
{
|
|
"epoch": 4.736318407960199,
|
|
"grad_norm": 0.07475897519624002,
|
|
"learning_rate": 0.0012629447587021935,
|
|
"loss": 2.4534,
|
|
"step": 14995
|
|
},
|
|
{
|
|
"epoch": 4.737897812524678,
|
|
"grad_norm": 0.0705938424551623,
|
|
"learning_rate": 0.0012624126768316086,
|
|
"loss": 2.5447,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 4.739477217089157,
|
|
"grad_norm": 0.0651629087395383,
|
|
"learning_rate": 0.0012618805151630053,
|
|
"loss": 2.5074,
|
|
"step": 15005
|
|
},
|
|
{
|
|
"epoch": 4.7410566216536365,
|
|
"grad_norm": 0.08525149958053733,
|
|
"learning_rate": 0.0012613482738582102,
|
|
"loss": 2.4931,
|
|
"step": 15010
|
|
},
|
|
{
|
|
"epoch": 4.742636026218116,
|
|
"grad_norm": 0.07045853188635649,
|
|
"learning_rate": 0.001260815953079075,
|
|
"loss": 2.4835,
|
|
"step": 15015
|
|
},
|
|
{
|
|
"epoch": 4.744215430782595,
|
|
"grad_norm": 0.06038041471515805,
|
|
"learning_rate": 0.0012602835529874749,
|
|
"loss": 2.5318,
|
|
"step": 15020
|
|
},
|
|
{
|
|
"epoch": 4.745794835347074,
|
|
"grad_norm": 0.059304704263382704,
|
|
"learning_rate": 0.0012597510737453097,
|
|
"loss": 2.4827,
|
|
"step": 15025
|
|
},
|
|
{
|
|
"epoch": 4.7473742399115535,
|
|
"grad_norm": 0.06122825638178242,
|
|
"learning_rate": 0.0012592185155145023,
|
|
"loss": 2.5176,
|
|
"step": 15030
|
|
},
|
|
{
|
|
"epoch": 4.748953644476033,
|
|
"grad_norm": 0.06576469785246933,
|
|
"learning_rate": 0.0012586858784570001,
|
|
"loss": 2.5614,
|
|
"step": 15035
|
|
},
|
|
{
|
|
"epoch": 4.750533049040512,
|
|
"grad_norm": 0.05216499044386397,
|
|
"learning_rate": 0.0012581531627347752,
|
|
"loss": 2.5391,
|
|
"step": 15040
|
|
},
|
|
{
|
|
"epoch": 4.752112453604991,
|
|
"grad_norm": 0.05816811441187202,
|
|
"learning_rate": 0.0012576203685098232,
|
|
"loss": 2.3904,
|
|
"step": 15045
|
|
},
|
|
{
|
|
"epoch": 4.7536918581694705,
|
|
"grad_norm": 0.06327988859865588,
|
|
"learning_rate": 0.0012570874959441634,
|
|
"loss": 2.4144,
|
|
"step": 15050
|
|
},
|
|
{
|
|
"epoch": 4.755271262733949,
|
|
"grad_norm": 0.05954686621291838,
|
|
"learning_rate": 0.0012565545451998382,
|
|
"loss": 2.5184,
|
|
"step": 15055
|
|
},
|
|
{
|
|
"epoch": 4.756850667298428,
|
|
"grad_norm": 0.08534564677467046,
|
|
"learning_rate": 0.0012560215164389148,
|
|
"loss": 2.485,
|
|
"step": 15060
|
|
},
|
|
{
|
|
"epoch": 4.758430071862907,
|
|
"grad_norm": 0.09411002400331668,
|
|
"learning_rate": 0.0012554884098234843,
|
|
"loss": 2.5018,
|
|
"step": 15065
|
|
},
|
|
{
|
|
"epoch": 4.760009476427387,
|
|
"grad_norm": 0.05829188110744158,
|
|
"learning_rate": 0.001254955225515661,
|
|
"loss": 2.4612,
|
|
"step": 15070
|
|
},
|
|
{
|
|
"epoch": 4.761588880991866,
|
|
"grad_norm": 0.06260111830916247,
|
|
"learning_rate": 0.0012544219636775819,
|
|
"loss": 2.4897,
|
|
"step": 15075
|
|
},
|
|
{
|
|
"epoch": 4.763168285556345,
|
|
"grad_norm": 0.09311945584816322,
|
|
"learning_rate": 0.0012538886244714096,
|
|
"loss": 2.3988,
|
|
"step": 15080
|
|
},
|
|
{
|
|
"epoch": 4.764747690120824,
|
|
"grad_norm": 0.09055351042402385,
|
|
"learning_rate": 0.0012533552080593285,
|
|
"loss": 2.466,
|
|
"step": 15085
|
|
},
|
|
{
|
|
"epoch": 4.766327094685304,
|
|
"grad_norm": 0.06199831535615724,
|
|
"learning_rate": 0.0012528217146035477,
|
|
"loss": 2.4885,
|
|
"step": 15090
|
|
},
|
|
{
|
|
"epoch": 4.767906499249783,
|
|
"grad_norm": 0.06763264055913053,
|
|
"learning_rate": 0.0012522881442662988,
|
|
"loss": 2.5595,
|
|
"step": 15095
|
|
},
|
|
{
|
|
"epoch": 4.769485903814262,
|
|
"grad_norm": 0.05847901990048384,
|
|
"learning_rate": 0.001251754497209837,
|
|
"loss": 2.4948,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 4.771065308378741,
|
|
"grad_norm": 0.052060540215396786,
|
|
"learning_rate": 0.001251220773596441,
|
|
"loss": 2.4376,
|
|
"step": 15105
|
|
},
|
|
{
|
|
"epoch": 4.772644712943221,
|
|
"grad_norm": 0.05157419424925148,
|
|
"learning_rate": 0.0012506869735884128,
|
|
"loss": 2.4315,
|
|
"step": 15110
|
|
},
|
|
{
|
|
"epoch": 4.7742241175077,
|
|
"grad_norm": 0.06833545413268356,
|
|
"learning_rate": 0.001250153097348078,
|
|
"loss": 2.5331,
|
|
"step": 15115
|
|
},
|
|
{
|
|
"epoch": 4.775803522072179,
|
|
"grad_norm": 0.06807438736130686,
|
|
"learning_rate": 0.0012496191450377843,
|
|
"loss": 2.5571,
|
|
"step": 15120
|
|
},
|
|
{
|
|
"epoch": 4.777382926636658,
|
|
"grad_norm": 0.08947326196212828,
|
|
"learning_rate": 0.0012490851168199036,
|
|
"loss": 2.5824,
|
|
"step": 15125
|
|
},
|
|
{
|
|
"epoch": 4.778962331201138,
|
|
"grad_norm": 0.07719919698525221,
|
|
"learning_rate": 0.00124855101285683,
|
|
"loss": 2.5316,
|
|
"step": 15130
|
|
},
|
|
{
|
|
"epoch": 4.780541735765617,
|
|
"grad_norm": 0.08025552743432796,
|
|
"learning_rate": 0.0012480168333109819,
|
|
"loss": 2.4447,
|
|
"step": 15135
|
|
},
|
|
{
|
|
"epoch": 4.782121140330095,
|
|
"grad_norm": 0.05261629415497841,
|
|
"learning_rate": 0.0012474825783447992,
|
|
"loss": 2.5032,
|
|
"step": 15140
|
|
},
|
|
{
|
|
"epoch": 4.783700544894574,
|
|
"grad_norm": 0.0757771632437342,
|
|
"learning_rate": 0.0012469482481207454,
|
|
"loss": 2.5653,
|
|
"step": 15145
|
|
},
|
|
{
|
|
"epoch": 4.785279949459054,
|
|
"grad_norm": 0.05623537967074141,
|
|
"learning_rate": 0.0012464138428013073,
|
|
"loss": 2.4591,
|
|
"step": 15150
|
|
},
|
|
{
|
|
"epoch": 4.786859354023533,
|
|
"grad_norm": 0.08262533487856788,
|
|
"learning_rate": 0.001245879362548994,
|
|
"loss": 2.4795,
|
|
"step": 15155
|
|
},
|
|
{
|
|
"epoch": 4.788438758588012,
|
|
"grad_norm": 0.06902115051220888,
|
|
"learning_rate": 0.001245344807526338,
|
|
"loss": 2.5178,
|
|
"step": 15160
|
|
},
|
|
{
|
|
"epoch": 4.790018163152491,
|
|
"grad_norm": 0.059367330068955296,
|
|
"learning_rate": 0.001244810177895893,
|
|
"loss": 2.4574,
|
|
"step": 15165
|
|
},
|
|
{
|
|
"epoch": 4.791597567716971,
|
|
"grad_norm": 0.05293086324890953,
|
|
"learning_rate": 0.001244275473820237,
|
|
"loss": 2.4589,
|
|
"step": 15170
|
|
},
|
|
{
|
|
"epoch": 4.79317697228145,
|
|
"grad_norm": 0.062351455497511706,
|
|
"learning_rate": 0.00124374069546197,
|
|
"loss": 2.518,
|
|
"step": 15175
|
|
},
|
|
{
|
|
"epoch": 4.794756376845929,
|
|
"grad_norm": 0.06960504551811326,
|
|
"learning_rate": 0.0012432058429837152,
|
|
"loss": 2.5104,
|
|
"step": 15180
|
|
},
|
|
{
|
|
"epoch": 4.796335781410408,
|
|
"grad_norm": 0.0785582773003688,
|
|
"learning_rate": 0.0012426709165481175,
|
|
"loss": 2.5301,
|
|
"step": 15185
|
|
},
|
|
{
|
|
"epoch": 4.797915185974888,
|
|
"grad_norm": 0.09538357866394954,
|
|
"learning_rate": 0.0012421359163178442,
|
|
"loss": 2.4908,
|
|
"step": 15190
|
|
},
|
|
{
|
|
"epoch": 4.799494590539367,
|
|
"grad_norm": 0.060926918043546184,
|
|
"learning_rate": 0.001241600842455586,
|
|
"loss": 2.5181,
|
|
"step": 15195
|
|
},
|
|
{
|
|
"epoch": 4.801073995103846,
|
|
"grad_norm": 0.06927795277655609,
|
|
"learning_rate": 0.001241065695124055,
|
|
"loss": 2.5287,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 4.802653399668325,
|
|
"grad_norm": 0.057548110512531184,
|
|
"learning_rate": 0.001240530474485987,
|
|
"loss": 2.4556,
|
|
"step": 15205
|
|
},
|
|
{
|
|
"epoch": 4.804232804232804,
|
|
"grad_norm": 0.06710418212774051,
|
|
"learning_rate": 0.0012399951807041379,
|
|
"loss": 2.5648,
|
|
"step": 15210
|
|
},
|
|
{
|
|
"epoch": 4.805812208797283,
|
|
"grad_norm": 0.06517248472080968,
|
|
"learning_rate": 0.001239459813941288,
|
|
"loss": 2.5899,
|
|
"step": 15215
|
|
},
|
|
{
|
|
"epoch": 4.807391613361762,
|
|
"grad_norm": 0.0671403527288489,
|
|
"learning_rate": 0.0012389243743602383,
|
|
"loss": 2.5127,
|
|
"step": 15220
|
|
},
|
|
{
|
|
"epoch": 4.8089710179262415,
|
|
"grad_norm": 0.07095804854043555,
|
|
"learning_rate": 0.001238388862123813,
|
|
"loss": 2.5935,
|
|
"step": 15225
|
|
},
|
|
{
|
|
"epoch": 4.810550422490721,
|
|
"grad_norm": 0.07413226220034597,
|
|
"learning_rate": 0.001237853277394858,
|
|
"loss": 2.5664,
|
|
"step": 15230
|
|
},
|
|
{
|
|
"epoch": 4.8121298270552,
|
|
"grad_norm": 0.06326029739527476,
|
|
"learning_rate": 0.001237317620336241,
|
|
"loss": 2.5517,
|
|
"step": 15235
|
|
},
|
|
{
|
|
"epoch": 4.813709231619679,
|
|
"grad_norm": 0.07277779313715153,
|
|
"learning_rate": 0.0012367818911108517,
|
|
"loss": 2.4122,
|
|
"step": 15240
|
|
},
|
|
{
|
|
"epoch": 4.8152886361841585,
|
|
"grad_norm": 0.06869560434543806,
|
|
"learning_rate": 0.0012362460898816025,
|
|
"loss": 2.5078,
|
|
"step": 15245
|
|
},
|
|
{
|
|
"epoch": 4.816868040748638,
|
|
"grad_norm": 0.0643629530844895,
|
|
"learning_rate": 0.0012357102168114268,
|
|
"loss": 2.5108,
|
|
"step": 15250
|
|
},
|
|
{
|
|
"epoch": 4.818447445313117,
|
|
"grad_norm": 0.06015315343653677,
|
|
"learning_rate": 0.0012351742720632798,
|
|
"loss": 2.4923,
|
|
"step": 15255
|
|
},
|
|
{
|
|
"epoch": 4.820026849877596,
|
|
"grad_norm": 0.05775598319572417,
|
|
"learning_rate": 0.0012346382558001392,
|
|
"loss": 2.468,
|
|
"step": 15260
|
|
},
|
|
{
|
|
"epoch": 4.8216062544420755,
|
|
"grad_norm": 0.06140546049655784,
|
|
"learning_rate": 0.0012341021681850045,
|
|
"loss": 2.5224,
|
|
"step": 15265
|
|
},
|
|
{
|
|
"epoch": 4.823185659006555,
|
|
"grad_norm": 0.0692881651243686,
|
|
"learning_rate": 0.001233566009380896,
|
|
"loss": 2.4801,
|
|
"step": 15270
|
|
},
|
|
{
|
|
"epoch": 4.824765063571034,
|
|
"grad_norm": 0.06165949337963385,
|
|
"learning_rate": 0.0012330297795508564,
|
|
"loss": 2.5157,
|
|
"step": 15275
|
|
},
|
|
{
|
|
"epoch": 4.826344468135513,
|
|
"grad_norm": 0.0682195228399358,
|
|
"learning_rate": 0.00123249347885795,
|
|
"loss": 2.5445,
|
|
"step": 15280
|
|
},
|
|
{
|
|
"epoch": 4.8279238726999925,
|
|
"grad_norm": 0.06226870586096772,
|
|
"learning_rate": 0.0012319571074652614,
|
|
"loss": 2.4146,
|
|
"step": 15285
|
|
},
|
|
{
|
|
"epoch": 4.829503277264472,
|
|
"grad_norm": 0.06679105092346499,
|
|
"learning_rate": 0.0012314206655358987,
|
|
"loss": 2.5325,
|
|
"step": 15290
|
|
},
|
|
{
|
|
"epoch": 4.831082681828951,
|
|
"grad_norm": 0.06162451599728066,
|
|
"learning_rate": 0.0012308841532329905,
|
|
"loss": 2.4682,
|
|
"step": 15295
|
|
},
|
|
{
|
|
"epoch": 4.832662086393429,
|
|
"grad_norm": 0.05503800445837147,
|
|
"learning_rate": 0.0012303475707196865,
|
|
"loss": 2.3985,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 4.834241490957909,
|
|
"grad_norm": 0.07987179045443299,
|
|
"learning_rate": 0.0012298109181591577,
|
|
"loss": 2.3869,
|
|
"step": 15305
|
|
},
|
|
{
|
|
"epoch": 4.835820895522388,
|
|
"grad_norm": 0.05973667980788249,
|
|
"learning_rate": 0.001229274195714597,
|
|
"loss": 2.466,
|
|
"step": 15310
|
|
},
|
|
{
|
|
"epoch": 4.837400300086867,
|
|
"grad_norm": 0.06080607906192062,
|
|
"learning_rate": 0.0012287374035492183,
|
|
"loss": 2.4171,
|
|
"step": 15315
|
|
},
|
|
{
|
|
"epoch": 4.838979704651346,
|
|
"grad_norm": 0.0929911394564496,
|
|
"learning_rate": 0.0012282005418262569,
|
|
"loss": 2.4351,
|
|
"step": 15320
|
|
},
|
|
{
|
|
"epoch": 4.8405591092158256,
|
|
"grad_norm": 0.07042123203050357,
|
|
"learning_rate": 0.0012276636107089684,
|
|
"loss": 2.4744,
|
|
"step": 15325
|
|
},
|
|
{
|
|
"epoch": 4.842138513780305,
|
|
"grad_norm": 0.061855793739237726,
|
|
"learning_rate": 0.0012271266103606304,
|
|
"loss": 2.6037,
|
|
"step": 15330
|
|
},
|
|
{
|
|
"epoch": 4.843717918344784,
|
|
"grad_norm": 0.06073958843808026,
|
|
"learning_rate": 0.0012265895409445413,
|
|
"loss": 2.4685,
|
|
"step": 15335
|
|
},
|
|
{
|
|
"epoch": 4.845297322909263,
|
|
"grad_norm": 0.06711465508022725,
|
|
"learning_rate": 0.001226052402624021,
|
|
"loss": 2.511,
|
|
"step": 15340
|
|
},
|
|
{
|
|
"epoch": 4.8468767274737425,
|
|
"grad_norm": 0.0808742963943968,
|
|
"learning_rate": 0.001225515195562409,
|
|
"loss": 2.518,
|
|
"step": 15345
|
|
},
|
|
{
|
|
"epoch": 4.848456132038222,
|
|
"grad_norm": 0.06842571928423273,
|
|
"learning_rate": 0.0012249779199230671,
|
|
"loss": 2.4423,
|
|
"step": 15350
|
|
},
|
|
{
|
|
"epoch": 4.850035536602701,
|
|
"grad_norm": 0.05912567812868452,
|
|
"learning_rate": 0.001224440575869377,
|
|
"loss": 2.4587,
|
|
"step": 15355
|
|
},
|
|
{
|
|
"epoch": 4.85161494116718,
|
|
"grad_norm": 0.0681131395124921,
|
|
"learning_rate": 0.0012239031635647418,
|
|
"loss": 2.5492,
|
|
"step": 15360
|
|
},
|
|
{
|
|
"epoch": 4.8531943457316595,
|
|
"grad_norm": 0.07921069271484028,
|
|
"learning_rate": 0.0012233656831725853,
|
|
"loss": 2.5395,
|
|
"step": 15365
|
|
},
|
|
{
|
|
"epoch": 4.854773750296138,
|
|
"grad_norm": 0.06666648532781783,
|
|
"learning_rate": 0.0012228281348563512,
|
|
"loss": 2.447,
|
|
"step": 15370
|
|
},
|
|
{
|
|
"epoch": 4.856353154860617,
|
|
"grad_norm": 0.06854146074690375,
|
|
"learning_rate": 0.0012222905187795053,
|
|
"loss": 2.4339,
|
|
"step": 15375
|
|
},
|
|
{
|
|
"epoch": 4.857932559425096,
|
|
"grad_norm": 0.0711578400136005,
|
|
"learning_rate": 0.0012217528351055327,
|
|
"loss": 2.5131,
|
|
"step": 15380
|
|
},
|
|
{
|
|
"epoch": 4.859511963989576,
|
|
"grad_norm": 0.06374172026154729,
|
|
"learning_rate": 0.0012212150839979402,
|
|
"loss": 2.4771,
|
|
"step": 15385
|
|
},
|
|
{
|
|
"epoch": 4.861091368554055,
|
|
"grad_norm": 0.06380930678887842,
|
|
"learning_rate": 0.0012206772656202537,
|
|
"loss": 2.5038,
|
|
"step": 15390
|
|
},
|
|
{
|
|
"epoch": 4.862670773118534,
|
|
"grad_norm": 0.053049498300829126,
|
|
"learning_rate": 0.0012201393801360208,
|
|
"loss": 2.5873,
|
|
"step": 15395
|
|
},
|
|
{
|
|
"epoch": 4.864250177683013,
|
|
"grad_norm": 0.060002911633182825,
|
|
"learning_rate": 0.0012196014277088088,
|
|
"loss": 2.5473,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 4.865829582247493,
|
|
"grad_norm": 0.06974729192567168,
|
|
"learning_rate": 0.0012190634085022056,
|
|
"loss": 2.551,
|
|
"step": 15405
|
|
},
|
|
{
|
|
"epoch": 4.867408986811972,
|
|
"grad_norm": 0.06401003903677191,
|
|
"learning_rate": 0.0012185253226798195,
|
|
"loss": 2.4325,
|
|
"step": 15410
|
|
},
|
|
{
|
|
"epoch": 4.868988391376451,
|
|
"grad_norm": 0.05765104930964299,
|
|
"learning_rate": 0.0012179871704052793,
|
|
"loss": 2.4814,
|
|
"step": 15415
|
|
},
|
|
{
|
|
"epoch": 4.87056779594093,
|
|
"grad_norm": 0.09917967750843776,
|
|
"learning_rate": 0.0012174489518422332,
|
|
"loss": 2.4686,
|
|
"step": 15420
|
|
},
|
|
{
|
|
"epoch": 4.87214720050541,
|
|
"grad_norm": 0.0790620977932766,
|
|
"learning_rate": 0.0012169106671543499,
|
|
"loss": 2.4991,
|
|
"step": 15425
|
|
},
|
|
{
|
|
"epoch": 4.873726605069889,
|
|
"grad_norm": 0.0746542162946544,
|
|
"learning_rate": 0.0012163723165053192,
|
|
"loss": 2.4402,
|
|
"step": 15430
|
|
},
|
|
{
|
|
"epoch": 4.875306009634368,
|
|
"grad_norm": 0.0688687534963303,
|
|
"learning_rate": 0.0012158339000588492,
|
|
"loss": 2.4895,
|
|
"step": 15435
|
|
},
|
|
{
|
|
"epoch": 4.876885414198847,
|
|
"grad_norm": 0.06709837817692976,
|
|
"learning_rate": 0.001215295417978669,
|
|
"loss": 2.5206,
|
|
"step": 15440
|
|
},
|
|
{
|
|
"epoch": 4.878464818763327,
|
|
"grad_norm": 0.0726724345962881,
|
|
"learning_rate": 0.0012147568704285276,
|
|
"loss": 2.5356,
|
|
"step": 15445
|
|
},
|
|
{
|
|
"epoch": 4.880044223327806,
|
|
"grad_norm": 0.07535157295869183,
|
|
"learning_rate": 0.0012142182575721945,
|
|
"loss": 2.4808,
|
|
"step": 15450
|
|
},
|
|
{
|
|
"epoch": 4.881623627892285,
|
|
"grad_norm": 0.08290435815113797,
|
|
"learning_rate": 0.0012136795795734576,
|
|
"loss": 2.4617,
|
|
"step": 15455
|
|
},
|
|
{
|
|
"epoch": 4.8832030324567635,
|
|
"grad_norm": 0.060912276319679534,
|
|
"learning_rate": 0.0012131408365961263,
|
|
"loss": 2.49,
|
|
"step": 15460
|
|
},
|
|
{
|
|
"epoch": 4.884782437021243,
|
|
"grad_norm": 0.0654902783875126,
|
|
"learning_rate": 0.0012126020288040279,
|
|
"loss": 2.3803,
|
|
"step": 15465
|
|
},
|
|
{
|
|
"epoch": 4.886361841585722,
|
|
"grad_norm": 0.05976371183664185,
|
|
"learning_rate": 0.0012120631563610107,
|
|
"loss": 2.4806,
|
|
"step": 15470
|
|
},
|
|
{
|
|
"epoch": 4.887941246150201,
|
|
"grad_norm": 0.0643486484882794,
|
|
"learning_rate": 0.001211524219430943,
|
|
"loss": 2.5788,
|
|
"step": 15475
|
|
},
|
|
{
|
|
"epoch": 4.8895206507146804,
|
|
"grad_norm": 0.0622585687911665,
|
|
"learning_rate": 0.0012109852181777117,
|
|
"loss": 2.5253,
|
|
"step": 15480
|
|
},
|
|
{
|
|
"epoch": 4.89110005527916,
|
|
"grad_norm": 0.07622978591804572,
|
|
"learning_rate": 0.0012104461527652232,
|
|
"loss": 2.4864,
|
|
"step": 15485
|
|
},
|
|
{
|
|
"epoch": 4.892679459843639,
|
|
"grad_norm": 0.058348614197018894,
|
|
"learning_rate": 0.0012099070233574044,
|
|
"loss": 2.4738,
|
|
"step": 15490
|
|
},
|
|
{
|
|
"epoch": 4.894258864408118,
|
|
"grad_norm": 0.0664664094650849,
|
|
"learning_rate": 0.0012093678301182012,
|
|
"loss": 2.6012,
|
|
"step": 15495
|
|
},
|
|
{
|
|
"epoch": 4.895838268972597,
|
|
"grad_norm": 0.07200468939704618,
|
|
"learning_rate": 0.001208828573211578,
|
|
"loss": 2.497,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 4.897417673537077,
|
|
"grad_norm": 0.08119284123858826,
|
|
"learning_rate": 0.0012082892528015204,
|
|
"loss": 2.544,
|
|
"step": 15505
|
|
},
|
|
{
|
|
"epoch": 4.898997078101556,
|
|
"grad_norm": 0.06423832044178142,
|
|
"learning_rate": 0.0012077498690520314,
|
|
"loss": 2.5031,
|
|
"step": 15510
|
|
},
|
|
{
|
|
"epoch": 4.900576482666035,
|
|
"grad_norm": 0.07950608792266346,
|
|
"learning_rate": 0.001207210422127135,
|
|
"loss": 2.4954,
|
|
"step": 15515
|
|
},
|
|
{
|
|
"epoch": 4.902155887230514,
|
|
"grad_norm": 0.08852436333106962,
|
|
"learning_rate": 0.001206670912190873,
|
|
"loss": 2.3703,
|
|
"step": 15520
|
|
},
|
|
{
|
|
"epoch": 4.903735291794993,
|
|
"grad_norm": 0.05748637559319089,
|
|
"learning_rate": 0.0012061313394073068,
|
|
"loss": 2.4484,
|
|
"step": 15525
|
|
},
|
|
{
|
|
"epoch": 4.905314696359472,
|
|
"grad_norm": 0.05847823169655169,
|
|
"learning_rate": 0.0012055917039405176,
|
|
"loss": 2.497,
|
|
"step": 15530
|
|
},
|
|
{
|
|
"epoch": 4.906894100923951,
|
|
"grad_norm": 0.058178719585947246,
|
|
"learning_rate": 0.0012050520059546047,
|
|
"loss": 2.4362,
|
|
"step": 15535
|
|
},
|
|
{
|
|
"epoch": 4.9084735054884305,
|
|
"grad_norm": 0.06385032415775303,
|
|
"learning_rate": 0.001204512245613687,
|
|
"loss": 2.4866,
|
|
"step": 15540
|
|
},
|
|
{
|
|
"epoch": 4.91005291005291,
|
|
"grad_norm": 0.07410022568643862,
|
|
"learning_rate": 0.0012039724230819017,
|
|
"loss": 2.4655,
|
|
"step": 15545
|
|
},
|
|
{
|
|
"epoch": 4.911632314617389,
|
|
"grad_norm": 0.06652306615010183,
|
|
"learning_rate": 0.0012034325385234061,
|
|
"loss": 2.4223,
|
|
"step": 15550
|
|
},
|
|
{
|
|
"epoch": 4.913211719181868,
|
|
"grad_norm": 0.05840175803667608,
|
|
"learning_rate": 0.0012028925921023753,
|
|
"loss": 2.4936,
|
|
"step": 15555
|
|
},
|
|
{
|
|
"epoch": 4.9147911237463475,
|
|
"grad_norm": 0.06306712385491223,
|
|
"learning_rate": 0.0012023525839830037,
|
|
"loss": 2.4684,
|
|
"step": 15560
|
|
},
|
|
{
|
|
"epoch": 4.916370528310827,
|
|
"grad_norm": 0.07921764574952989,
|
|
"learning_rate": 0.0012018125143295037,
|
|
"loss": 2.4417,
|
|
"step": 15565
|
|
},
|
|
{
|
|
"epoch": 4.917949932875306,
|
|
"grad_norm": 0.06100750172891688,
|
|
"learning_rate": 0.0012012723833061077,
|
|
"loss": 2.4098,
|
|
"step": 15570
|
|
},
|
|
{
|
|
"epoch": 4.919529337439785,
|
|
"grad_norm": 0.06070684128663812,
|
|
"learning_rate": 0.0012007321910770662,
|
|
"loss": 2.4733,
|
|
"step": 15575
|
|
},
|
|
{
|
|
"epoch": 4.9211087420042645,
|
|
"grad_norm": 0.05769476252807484,
|
|
"learning_rate": 0.0012001919378066474,
|
|
"loss": 2.4538,
|
|
"step": 15580
|
|
},
|
|
{
|
|
"epoch": 4.922688146568744,
|
|
"grad_norm": 0.06108438140943986,
|
|
"learning_rate": 0.0011996516236591397,
|
|
"loss": 2.4146,
|
|
"step": 15585
|
|
},
|
|
{
|
|
"epoch": 4.924267551133223,
|
|
"grad_norm": 0.06125485985860159,
|
|
"learning_rate": 0.0011991112487988488,
|
|
"loss": 2.4685,
|
|
"step": 15590
|
|
},
|
|
{
|
|
"epoch": 4.925846955697702,
|
|
"grad_norm": 0.0874930175823073,
|
|
"learning_rate": 0.0011985708133900993,
|
|
"loss": 2.5056,
|
|
"step": 15595
|
|
},
|
|
{
|
|
"epoch": 4.9274263602621815,
|
|
"grad_norm": 0.06731369921187405,
|
|
"learning_rate": 0.0011980303175972342,
|
|
"loss": 2.4626,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 4.929005764826661,
|
|
"grad_norm": 0.07144118656733887,
|
|
"learning_rate": 0.0011974897615846147,
|
|
"loss": 2.4938,
|
|
"step": 15605
|
|
},
|
|
{
|
|
"epoch": 4.93058516939114,
|
|
"grad_norm": 0.07594671817521159,
|
|
"learning_rate": 0.0011969491455166206,
|
|
"loss": 2.4213,
|
|
"step": 15610
|
|
},
|
|
{
|
|
"epoch": 4.932164573955618,
|
|
"grad_norm": 0.06292956355831127,
|
|
"learning_rate": 0.0011964084695576496,
|
|
"loss": 2.4995,
|
|
"step": 15615
|
|
},
|
|
{
|
|
"epoch": 4.933743978520098,
|
|
"grad_norm": 0.05708705811745932,
|
|
"learning_rate": 0.001195867733872118,
|
|
"loss": 2.5237,
|
|
"step": 15620
|
|
},
|
|
{
|
|
"epoch": 4.935323383084577,
|
|
"grad_norm": 0.07187282633092404,
|
|
"learning_rate": 0.0011953269386244597,
|
|
"loss": 2.4704,
|
|
"step": 15625
|
|
},
|
|
{
|
|
"epoch": 4.936902787649056,
|
|
"grad_norm": 0.06076469305755183,
|
|
"learning_rate": 0.0011947860839791277,
|
|
"loss": 2.4851,
|
|
"step": 15630
|
|
},
|
|
{
|
|
"epoch": 4.938482192213535,
|
|
"grad_norm": 0.05770019943142122,
|
|
"learning_rate": 0.0011942451701005918,
|
|
"loss": 2.528,
|
|
"step": 15635
|
|
},
|
|
{
|
|
"epoch": 4.940061596778015,
|
|
"grad_norm": 0.08398343738666424,
|
|
"learning_rate": 0.0011937041971533406,
|
|
"loss": 2.4163,
|
|
"step": 15640
|
|
},
|
|
{
|
|
"epoch": 4.941641001342494,
|
|
"grad_norm": 0.07544733825597935,
|
|
"learning_rate": 0.001193163165301881,
|
|
"loss": 2.4659,
|
|
"step": 15645
|
|
},
|
|
{
|
|
"epoch": 4.943220405906973,
|
|
"grad_norm": 0.07023987207599723,
|
|
"learning_rate": 0.0011926220747107371,
|
|
"loss": 2.4363,
|
|
"step": 15650
|
|
},
|
|
{
|
|
"epoch": 4.944799810471452,
|
|
"grad_norm": 0.07764060150434646,
|
|
"learning_rate": 0.0011920809255444506,
|
|
"loss": 2.5797,
|
|
"step": 15655
|
|
},
|
|
{
|
|
"epoch": 4.946379215035932,
|
|
"grad_norm": 0.0659107035007278,
|
|
"learning_rate": 0.001191539717967582,
|
|
"loss": 2.4964,
|
|
"step": 15660
|
|
},
|
|
{
|
|
"epoch": 4.947958619600411,
|
|
"grad_norm": 0.06660798481623668,
|
|
"learning_rate": 0.001190998452144709,
|
|
"loss": 2.3891,
|
|
"step": 15665
|
|
},
|
|
{
|
|
"epoch": 4.94953802416489,
|
|
"grad_norm": 0.062241320891403946,
|
|
"learning_rate": 0.001190457128240427,
|
|
"loss": 2.4861,
|
|
"step": 15670
|
|
},
|
|
{
|
|
"epoch": 4.951117428729369,
|
|
"grad_norm": 0.07985351613599452,
|
|
"learning_rate": 0.0011899157464193492,
|
|
"loss": 2.6429,
|
|
"step": 15675
|
|
},
|
|
{
|
|
"epoch": 4.952696833293849,
|
|
"grad_norm": 0.05954129453508578,
|
|
"learning_rate": 0.0011893743068461062,
|
|
"loss": 2.5111,
|
|
"step": 15680
|
|
},
|
|
{
|
|
"epoch": 4.954276237858327,
|
|
"grad_norm": 0.06007797509115445,
|
|
"learning_rate": 0.0011888328096853465,
|
|
"loss": 2.5346,
|
|
"step": 15685
|
|
},
|
|
{
|
|
"epoch": 4.955855642422806,
|
|
"grad_norm": 0.05823114187959376,
|
|
"learning_rate": 0.0011882912551017361,
|
|
"loss": 2.5305,
|
|
"step": 15690
|
|
},
|
|
{
|
|
"epoch": 4.957435046987285,
|
|
"grad_norm": 0.05736393358705801,
|
|
"learning_rate": 0.001187749643259958,
|
|
"loss": 2.4991,
|
|
"step": 15695
|
|
},
|
|
{
|
|
"epoch": 4.959014451551765,
|
|
"grad_norm": 0.08238671258192874,
|
|
"learning_rate": 0.0011872079743247125,
|
|
"loss": 2.4749,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 4.960593856116244,
|
|
"grad_norm": 0.06298808861703957,
|
|
"learning_rate": 0.0011866662484607184,
|
|
"loss": 2.4865,
|
|
"step": 15705
|
|
},
|
|
{
|
|
"epoch": 4.962173260680723,
|
|
"grad_norm": 0.05941530849476449,
|
|
"learning_rate": 0.0011861244658327112,
|
|
"loss": 2.4975,
|
|
"step": 15710
|
|
},
|
|
{
|
|
"epoch": 4.963752665245202,
|
|
"grad_norm": 0.0666754246041905,
|
|
"learning_rate": 0.0011855826266054424,
|
|
"loss": 2.4319,
|
|
"step": 15715
|
|
},
|
|
{
|
|
"epoch": 4.965332069809682,
|
|
"grad_norm": 0.05855819542081744,
|
|
"learning_rate": 0.001185040730943683,
|
|
"loss": 2.4817,
|
|
"step": 15720
|
|
},
|
|
{
|
|
"epoch": 4.966911474374161,
|
|
"grad_norm": 0.062468705971845444,
|
|
"learning_rate": 0.0011844987790122195,
|
|
"loss": 2.4615,
|
|
"step": 15725
|
|
},
|
|
{
|
|
"epoch": 4.96849087893864,
|
|
"grad_norm": 0.06404754778845122,
|
|
"learning_rate": 0.0011839567709758558,
|
|
"loss": 2.4788,
|
|
"step": 15730
|
|
},
|
|
{
|
|
"epoch": 4.970070283503119,
|
|
"grad_norm": 0.058087083590399054,
|
|
"learning_rate": 0.001183414706999414,
|
|
"loss": 2.5381,
|
|
"step": 15735
|
|
},
|
|
{
|
|
"epoch": 4.971649688067599,
|
|
"grad_norm": 0.05669964896838692,
|
|
"learning_rate": 0.0011828725872477313,
|
|
"loss": 2.5212,
|
|
"step": 15740
|
|
},
|
|
{
|
|
"epoch": 4.973229092632078,
|
|
"grad_norm": 0.061419590892372596,
|
|
"learning_rate": 0.001182330411885663,
|
|
"loss": 2.5256,
|
|
"step": 15745
|
|
},
|
|
{
|
|
"epoch": 4.974808497196557,
|
|
"grad_norm": 0.06465431498544376,
|
|
"learning_rate": 0.0011817881810780816,
|
|
"loss": 2.513,
|
|
"step": 15750
|
|
},
|
|
{
|
|
"epoch": 4.976387901761036,
|
|
"grad_norm": 0.07385204635840029,
|
|
"learning_rate": 0.0011812458949898759,
|
|
"loss": 2.4556,
|
|
"step": 15755
|
|
},
|
|
{
|
|
"epoch": 4.977967306325516,
|
|
"grad_norm": 0.05803605018835446,
|
|
"learning_rate": 0.0011807035537859513,
|
|
"loss": 2.4638,
|
|
"step": 15760
|
|
},
|
|
{
|
|
"epoch": 4.979546710889995,
|
|
"grad_norm": 0.06856048210261467,
|
|
"learning_rate": 0.001180161157631231,
|
|
"loss": 2.4593,
|
|
"step": 15765
|
|
},
|
|
{
|
|
"epoch": 4.981126115454474,
|
|
"grad_norm": 0.052510067991917486,
|
|
"learning_rate": 0.0011796187066906534,
|
|
"loss": 2.5206,
|
|
"step": 15770
|
|
},
|
|
{
|
|
"epoch": 4.9827055200189525,
|
|
"grad_norm": 0.06025897027411644,
|
|
"learning_rate": 0.0011790762011291748,
|
|
"loss": 2.4054,
|
|
"step": 15775
|
|
},
|
|
{
|
|
"epoch": 4.984284924583432,
|
|
"grad_norm": 0.06761575243212609,
|
|
"learning_rate": 0.0011785336411117675,
|
|
"loss": 2.5053,
|
|
"step": 15780
|
|
},
|
|
{
|
|
"epoch": 4.985864329147911,
|
|
"grad_norm": 0.057767353934116694,
|
|
"learning_rate": 0.0011779910268034208,
|
|
"loss": 2.5243,
|
|
"step": 15785
|
|
},
|
|
{
|
|
"epoch": 4.98744373371239,
|
|
"grad_norm": 0.06466723147966426,
|
|
"learning_rate": 0.0011774483583691397,
|
|
"loss": 2.4687,
|
|
"step": 15790
|
|
},
|
|
{
|
|
"epoch": 4.9890231382768695,
|
|
"grad_norm": 0.060452145161182434,
|
|
"learning_rate": 0.001176905635973947,
|
|
"loss": 2.4447,
|
|
"step": 15795
|
|
},
|
|
{
|
|
"epoch": 4.990602542841349,
|
|
"grad_norm": 0.06364205478369672,
|
|
"learning_rate": 0.0011763628597828803,
|
|
"loss": 2.6067,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 4.992181947405828,
|
|
"grad_norm": 0.060599709328997095,
|
|
"learning_rate": 0.0011758200299609952,
|
|
"loss": 2.3731,
|
|
"step": 15805
|
|
},
|
|
{
|
|
"epoch": 4.993761351970307,
|
|
"grad_norm": 0.06830949840866171,
|
|
"learning_rate": 0.001175277146673362,
|
|
"loss": 2.4716,
|
|
"step": 15810
|
|
},
|
|
{
|
|
"epoch": 4.9953407565347865,
|
|
"grad_norm": 0.09082852984432674,
|
|
"learning_rate": 0.0011747342100850685,
|
|
"loss": 2.4882,
|
|
"step": 15815
|
|
},
|
|
{
|
|
"epoch": 4.996920161099266,
|
|
"grad_norm": 0.06611274814656555,
|
|
"learning_rate": 0.001174191220361218,
|
|
"loss": 2.4938,
|
|
"step": 15820
|
|
},
|
|
{
|
|
"epoch": 4.998499565663745,
|
|
"grad_norm": 0.0651948980122603,
|
|
"learning_rate": 0.0011736481776669307,
|
|
"loss": 2.4455,
|
|
"step": 15825
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.10218245823941818,
|
|
"learning_rate": 0.0011731050821673417,
|
|
"loss": 2.5349,
|
|
"step": 15830
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_loss": 2.4879767894744873,
|
|
"eval_runtime": 118.4334,
|
|
"eval_samples_per_second": 22.367,
|
|
"eval_steps_per_second": 5.598,
|
|
"step": 15830
|
|
},
|
|
{
|
|
"epoch": 5.001579404564479,
|
|
"grad_norm": 0.05936455869948697,
|
|
"learning_rate": 0.001172561934027603,
|
|
"loss": 2.4368,
|
|
"step": 15835
|
|
},
|
|
{
|
|
"epoch": 5.0031588091289585,
|
|
"grad_norm": 0.05178263702404628,
|
|
"learning_rate": 0.0011720187334128829,
|
|
"loss": 2.5044,
|
|
"step": 15840
|
|
},
|
|
{
|
|
"epoch": 5.004738213693438,
|
|
"grad_norm": 0.05428143758672858,
|
|
"learning_rate": 0.001171475480488365,
|
|
"loss": 2.4597,
|
|
"step": 15845
|
|
},
|
|
{
|
|
"epoch": 5.006317618257917,
|
|
"grad_norm": 0.07558504457842352,
|
|
"learning_rate": 0.0011709321754192492,
|
|
"loss": 2.4644,
|
|
"step": 15850
|
|
},
|
|
{
|
|
"epoch": 5.007897022822396,
|
|
"grad_norm": 0.05747418106633769,
|
|
"learning_rate": 0.0011703888183707512,
|
|
"loss": 2.4854,
|
|
"step": 15855
|
|
},
|
|
{
|
|
"epoch": 5.0094764273868755,
|
|
"grad_norm": 0.08243436868767362,
|
|
"learning_rate": 0.0011698454095081018,
|
|
"loss": 2.4989,
|
|
"step": 15860
|
|
},
|
|
{
|
|
"epoch": 5.011055831951355,
|
|
"grad_norm": 0.05952542577324716,
|
|
"learning_rate": 0.0011693019489965484,
|
|
"loss": 2.4299,
|
|
"step": 15865
|
|
},
|
|
{
|
|
"epoch": 5.012635236515833,
|
|
"grad_norm": 0.07313668146611062,
|
|
"learning_rate": 0.0011687584370013544,
|
|
"loss": 2.475,
|
|
"step": 15870
|
|
},
|
|
{
|
|
"epoch": 5.014214641080312,
|
|
"grad_norm": 0.06373345839897392,
|
|
"learning_rate": 0.001168214873687798,
|
|
"loss": 2.4881,
|
|
"step": 15875
|
|
},
|
|
{
|
|
"epoch": 5.015794045644792,
|
|
"grad_norm": 0.07053297624719784,
|
|
"learning_rate": 0.0011676712592211729,
|
|
"loss": 2.3989,
|
|
"step": 15880
|
|
},
|
|
{
|
|
"epoch": 5.017373450209271,
|
|
"grad_norm": 0.06959343635310479,
|
|
"learning_rate": 0.0011671275937667894,
|
|
"loss": 2.4974,
|
|
"step": 15885
|
|
},
|
|
{
|
|
"epoch": 5.01895285477375,
|
|
"grad_norm": 0.05298602620521121,
|
|
"learning_rate": 0.0011665838774899719,
|
|
"loss": 2.4695,
|
|
"step": 15890
|
|
},
|
|
{
|
|
"epoch": 5.020532259338229,
|
|
"grad_norm": 0.06081738360944861,
|
|
"learning_rate": 0.0011660401105560623,
|
|
"loss": 2.4367,
|
|
"step": 15895
|
|
},
|
|
{
|
|
"epoch": 5.022111663902709,
|
|
"grad_norm": 0.05957421304787897,
|
|
"learning_rate": 0.0011654962931304158,
|
|
"loss": 2.607,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 5.023691068467188,
|
|
"grad_norm": 0.058059781037168315,
|
|
"learning_rate": 0.0011649524253784036,
|
|
"loss": 2.4989,
|
|
"step": 15905
|
|
},
|
|
{
|
|
"epoch": 5.025270473031667,
|
|
"grad_norm": 0.06481902623330911,
|
|
"learning_rate": 0.001164408507465413,
|
|
"loss": 2.4182,
|
|
"step": 15910
|
|
},
|
|
{
|
|
"epoch": 5.026849877596146,
|
|
"grad_norm": 0.06644572366396971,
|
|
"learning_rate": 0.0011638645395568457,
|
|
"loss": 2.4966,
|
|
"step": 15915
|
|
},
|
|
{
|
|
"epoch": 5.028429282160626,
|
|
"grad_norm": 0.09413712656237916,
|
|
"learning_rate": 0.0011633205218181191,
|
|
"loss": 2.4376,
|
|
"step": 15920
|
|
},
|
|
{
|
|
"epoch": 5.030008686725105,
|
|
"grad_norm": 0.09649674809369083,
|
|
"learning_rate": 0.001162776454414665,
|
|
"loss": 2.4057,
|
|
"step": 15925
|
|
},
|
|
{
|
|
"epoch": 5.031588091289584,
|
|
"grad_norm": 0.06840301674179984,
|
|
"learning_rate": 0.001162232337511931,
|
|
"loss": 2.4578,
|
|
"step": 15930
|
|
},
|
|
{
|
|
"epoch": 5.033167495854063,
|
|
"grad_norm": 0.06298972187905075,
|
|
"learning_rate": 0.0011616881712753799,
|
|
"loss": 2.5106,
|
|
"step": 15935
|
|
},
|
|
{
|
|
"epoch": 5.034746900418543,
|
|
"grad_norm": 0.08500448183013605,
|
|
"learning_rate": 0.001161143955870489,
|
|
"loss": 2.4424,
|
|
"step": 15940
|
|
},
|
|
{
|
|
"epoch": 5.036326304983022,
|
|
"grad_norm": 0.0666700102574268,
|
|
"learning_rate": 0.0011605996914627508,
|
|
"loss": 2.5119,
|
|
"step": 15945
|
|
},
|
|
{
|
|
"epoch": 5.0379057095475,
|
|
"grad_norm": 0.059598529645478314,
|
|
"learning_rate": 0.0011600553782176724,
|
|
"loss": 2.4163,
|
|
"step": 15950
|
|
},
|
|
{
|
|
"epoch": 5.039485114111979,
|
|
"grad_norm": 0.07605493015186099,
|
|
"learning_rate": 0.0011595110163007758,
|
|
"loss": 2.5509,
|
|
"step": 15955
|
|
},
|
|
{
|
|
"epoch": 5.041064518676459,
|
|
"grad_norm": 0.0648202884729895,
|
|
"learning_rate": 0.0011589666058775985,
|
|
"loss": 2.4685,
|
|
"step": 15960
|
|
},
|
|
{
|
|
"epoch": 5.042643923240938,
|
|
"grad_norm": 0.08600988911396924,
|
|
"learning_rate": 0.0011584221471136924,
|
|
"loss": 2.5668,
|
|
"step": 15965
|
|
},
|
|
{
|
|
"epoch": 5.044223327805417,
|
|
"grad_norm": 0.07635689265264523,
|
|
"learning_rate": 0.0011578776401746232,
|
|
"loss": 2.5022,
|
|
"step": 15970
|
|
},
|
|
{
|
|
"epoch": 5.045802732369896,
|
|
"grad_norm": 0.06792785847659778,
|
|
"learning_rate": 0.0011573330852259723,
|
|
"loss": 2.5037,
|
|
"step": 15975
|
|
},
|
|
{
|
|
"epoch": 5.047382136934376,
|
|
"grad_norm": 0.0792531105851889,
|
|
"learning_rate": 0.0011567884824333352,
|
|
"loss": 2.4579,
|
|
"step": 15980
|
|
},
|
|
{
|
|
"epoch": 5.048961541498855,
|
|
"grad_norm": 0.05561313486502166,
|
|
"learning_rate": 0.001156243831962323,
|
|
"loss": 2.4794,
|
|
"step": 15985
|
|
},
|
|
{
|
|
"epoch": 5.050540946063334,
|
|
"grad_norm": 0.060426396256911145,
|
|
"learning_rate": 0.0011556991339785594,
|
|
"loss": 2.4779,
|
|
"step": 15990
|
|
},
|
|
{
|
|
"epoch": 5.052120350627813,
|
|
"grad_norm": 0.0743286410045955,
|
|
"learning_rate": 0.001155154388647684,
|
|
"loss": 2.5053,
|
|
"step": 15995
|
|
},
|
|
{
|
|
"epoch": 5.053699755192293,
|
|
"grad_norm": 0.0642410054621103,
|
|
"learning_rate": 0.00115460959613535,
|
|
"loss": 2.4671,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 5.055279159756772,
|
|
"grad_norm": 0.08813869602163044,
|
|
"learning_rate": 0.0011540647566072257,
|
|
"loss": 2.5639,
|
|
"step": 16005
|
|
},
|
|
{
|
|
"epoch": 5.056858564321251,
|
|
"grad_norm": 0.06600521276012898,
|
|
"learning_rate": 0.0011535198702289939,
|
|
"loss": 2.437,
|
|
"step": 16010
|
|
},
|
|
{
|
|
"epoch": 5.05843796888573,
|
|
"grad_norm": 0.07586319419867858,
|
|
"learning_rate": 0.00115297493716635,
|
|
"loss": 2.4102,
|
|
"step": 16015
|
|
},
|
|
{
|
|
"epoch": 5.06001737345021,
|
|
"grad_norm": 0.05790710792159861,
|
|
"learning_rate": 0.0011524299575850047,
|
|
"loss": 2.5159,
|
|
"step": 16020
|
|
},
|
|
{
|
|
"epoch": 5.061596778014689,
|
|
"grad_norm": 0.06365084586078266,
|
|
"learning_rate": 0.0011518849316506836,
|
|
"loss": 2.432,
|
|
"step": 16025
|
|
},
|
|
{
|
|
"epoch": 5.063176182579167,
|
|
"grad_norm": 0.08387513396665641,
|
|
"learning_rate": 0.0011513398595291253,
|
|
"loss": 2.4663,
|
|
"step": 16030
|
|
},
|
|
{
|
|
"epoch": 5.0647555871436465,
|
|
"grad_norm": 0.07357946696135872,
|
|
"learning_rate": 0.0011507947413860826,
|
|
"loss": 2.6234,
|
|
"step": 16035
|
|
},
|
|
{
|
|
"epoch": 5.066334991708126,
|
|
"grad_norm": 0.08477276604753632,
|
|
"learning_rate": 0.0011502495773873225,
|
|
"loss": 2.525,
|
|
"step": 16040
|
|
},
|
|
{
|
|
"epoch": 5.067914396272605,
|
|
"grad_norm": 0.06536941662454575,
|
|
"learning_rate": 0.0011497043676986255,
|
|
"loss": 2.5088,
|
|
"step": 16045
|
|
},
|
|
{
|
|
"epoch": 5.069493800837084,
|
|
"grad_norm": 0.06999318385254447,
|
|
"learning_rate": 0.0011491591124857873,
|
|
"loss": 2.5295,
|
|
"step": 16050
|
|
},
|
|
{
|
|
"epoch": 5.0710732054015635,
|
|
"grad_norm": 0.0785145953519047,
|
|
"learning_rate": 0.0011486138119146162,
|
|
"loss": 2.6605,
|
|
"step": 16055
|
|
},
|
|
{
|
|
"epoch": 5.072652609966043,
|
|
"grad_norm": 0.05697907119882617,
|
|
"learning_rate": 0.0011480684661509337,
|
|
"loss": 2.4912,
|
|
"step": 16060
|
|
},
|
|
{
|
|
"epoch": 5.074232014530522,
|
|
"grad_norm": 0.058279369150634495,
|
|
"learning_rate": 0.001147523075360577,
|
|
"loss": 2.478,
|
|
"step": 16065
|
|
},
|
|
{
|
|
"epoch": 5.075811419095001,
|
|
"grad_norm": 0.09506533540972528,
|
|
"learning_rate": 0.0011469776397093955,
|
|
"loss": 2.6287,
|
|
"step": 16070
|
|
},
|
|
{
|
|
"epoch": 5.0773908236594805,
|
|
"grad_norm": 0.05870646987675661,
|
|
"learning_rate": 0.0011464321593632532,
|
|
"loss": 2.4934,
|
|
"step": 16075
|
|
},
|
|
{
|
|
"epoch": 5.07897022822396,
|
|
"grad_norm": 0.0751172381201278,
|
|
"learning_rate": 0.0011458866344880266,
|
|
"loss": 2.4517,
|
|
"step": 16080
|
|
},
|
|
{
|
|
"epoch": 5.080549632788439,
|
|
"grad_norm": 0.04860545935891463,
|
|
"learning_rate": 0.0011453410652496063,
|
|
"loss": 2.5204,
|
|
"step": 16085
|
|
},
|
|
{
|
|
"epoch": 5.082129037352918,
|
|
"grad_norm": 0.113731797770735,
|
|
"learning_rate": 0.001144795451813897,
|
|
"loss": 2.4957,
|
|
"step": 16090
|
|
},
|
|
{
|
|
"epoch": 5.0837084419173975,
|
|
"grad_norm": 0.06105826352877905,
|
|
"learning_rate": 0.0011442497943468157,
|
|
"loss": 2.5007,
|
|
"step": 16095
|
|
},
|
|
{
|
|
"epoch": 5.085287846481877,
|
|
"grad_norm": 0.06997079522607595,
|
|
"learning_rate": 0.001143704093014294,
|
|
"loss": 2.4797,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 5.086867251046356,
|
|
"grad_norm": 0.07272291158991666,
|
|
"learning_rate": 0.0011431583479822754,
|
|
"loss": 2.449,
|
|
"step": 16105
|
|
},
|
|
{
|
|
"epoch": 5.088446655610834,
|
|
"grad_norm": 0.06902835792256261,
|
|
"learning_rate": 0.001142612559416718,
|
|
"loss": 2.5247,
|
|
"step": 16110
|
|
},
|
|
{
|
|
"epoch": 5.090026060175314,
|
|
"grad_norm": 0.07561989736986836,
|
|
"learning_rate": 0.001142066727483592,
|
|
"loss": 2.5351,
|
|
"step": 16115
|
|
},
|
|
{
|
|
"epoch": 5.091605464739793,
|
|
"grad_norm": 0.09065227667689367,
|
|
"learning_rate": 0.0011415208523488825,
|
|
"loss": 2.4434,
|
|
"step": 16120
|
|
},
|
|
{
|
|
"epoch": 5.093184869304272,
|
|
"grad_norm": 0.054130632758493794,
|
|
"learning_rate": 0.0011409749341785857,
|
|
"loss": 2.4761,
|
|
"step": 16125
|
|
},
|
|
{
|
|
"epoch": 5.094764273868751,
|
|
"grad_norm": 0.05406569880209989,
|
|
"learning_rate": 0.0011404289731387122,
|
|
"loss": 2.5257,
|
|
"step": 16130
|
|
},
|
|
{
|
|
"epoch": 5.0963436784332306,
|
|
"grad_norm": 0.07165719721614144,
|
|
"learning_rate": 0.001139882969395285,
|
|
"loss": 2.5085,
|
|
"step": 16135
|
|
},
|
|
{
|
|
"epoch": 5.09792308299771,
|
|
"grad_norm": 0.061909141866988184,
|
|
"learning_rate": 0.0011393369231143405,
|
|
"loss": 2.4395,
|
|
"step": 16140
|
|
},
|
|
{
|
|
"epoch": 5.099502487562189,
|
|
"grad_norm": 0.08023843002538841,
|
|
"learning_rate": 0.0011387908344619281,
|
|
"loss": 2.5965,
|
|
"step": 16145
|
|
},
|
|
{
|
|
"epoch": 5.101081892126668,
|
|
"grad_norm": 0.07634235626975495,
|
|
"learning_rate": 0.00113824470360411,
|
|
"loss": 2.5738,
|
|
"step": 16150
|
|
},
|
|
{
|
|
"epoch": 5.1026612966911475,
|
|
"grad_norm": 0.0604585853738121,
|
|
"learning_rate": 0.0011376985307069605,
|
|
"loss": 2.4193,
|
|
"step": 16155
|
|
},
|
|
{
|
|
"epoch": 5.104240701255627,
|
|
"grad_norm": 0.057772266673428295,
|
|
"learning_rate": 0.0011371523159365675,
|
|
"loss": 2.4257,
|
|
"step": 16160
|
|
},
|
|
{
|
|
"epoch": 5.105820105820106,
|
|
"grad_norm": 0.0718835525811546,
|
|
"learning_rate": 0.0011366060594590317,
|
|
"loss": 2.5369,
|
|
"step": 16165
|
|
},
|
|
{
|
|
"epoch": 5.107399510384585,
|
|
"grad_norm": 0.06032148212252233,
|
|
"learning_rate": 0.0011360597614404663,
|
|
"loss": 2.4418,
|
|
"step": 16170
|
|
},
|
|
{
|
|
"epoch": 5.1089789149490645,
|
|
"grad_norm": 0.06628365602520163,
|
|
"learning_rate": 0.001135513422046996,
|
|
"loss": 2.5456,
|
|
"step": 16175
|
|
},
|
|
{
|
|
"epoch": 5.110558319513544,
|
|
"grad_norm": 0.061422640210239805,
|
|
"learning_rate": 0.0011349670414447603,
|
|
"loss": 2.4793,
|
|
"step": 16180
|
|
},
|
|
{
|
|
"epoch": 5.112137724078023,
|
|
"grad_norm": 0.05771049193758285,
|
|
"learning_rate": 0.0011344206197999094,
|
|
"loss": 2.4786,
|
|
"step": 16185
|
|
},
|
|
{
|
|
"epoch": 5.113717128642501,
|
|
"grad_norm": 0.06091115608367704,
|
|
"learning_rate": 0.0011338741572786072,
|
|
"loss": 2.5138,
|
|
"step": 16190
|
|
},
|
|
{
|
|
"epoch": 5.115296533206981,
|
|
"grad_norm": 0.06074604808714699,
|
|
"learning_rate": 0.0011333276540470292,
|
|
"loss": 2.3742,
|
|
"step": 16195
|
|
},
|
|
{
|
|
"epoch": 5.11687593777146,
|
|
"grad_norm": 0.05479575421173105,
|
|
"learning_rate": 0.0011327811102713632,
|
|
"loss": 2.534,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 5.118455342335939,
|
|
"grad_norm": 0.06195142036541382,
|
|
"learning_rate": 0.0011322345261178097,
|
|
"loss": 2.5255,
|
|
"step": 16205
|
|
},
|
|
{
|
|
"epoch": 5.120034746900418,
|
|
"grad_norm": 0.06262974702071916,
|
|
"learning_rate": 0.001131687901752582,
|
|
"loss": 2.4125,
|
|
"step": 16210
|
|
},
|
|
{
|
|
"epoch": 5.121614151464898,
|
|
"grad_norm": 0.0633672990276845,
|
|
"learning_rate": 0.001131141237341905,
|
|
"loss": 2.4914,
|
|
"step": 16215
|
|
},
|
|
{
|
|
"epoch": 5.123193556029377,
|
|
"grad_norm": 0.0643598624522212,
|
|
"learning_rate": 0.0011305945330520152,
|
|
"loss": 2.4156,
|
|
"step": 16220
|
|
},
|
|
{
|
|
"epoch": 5.124772960593856,
|
|
"grad_norm": 0.061094639303892535,
|
|
"learning_rate": 0.0011300477890491623,
|
|
"loss": 2.4471,
|
|
"step": 16225
|
|
},
|
|
{
|
|
"epoch": 5.126352365158335,
|
|
"grad_norm": 0.052171533316898965,
|
|
"learning_rate": 0.0011295010054996077,
|
|
"loss": 2.4441,
|
|
"step": 16230
|
|
},
|
|
{
|
|
"epoch": 5.127931769722815,
|
|
"grad_norm": 0.056466913910174894,
|
|
"learning_rate": 0.0011289541825696247,
|
|
"loss": 2.485,
|
|
"step": 16235
|
|
},
|
|
{
|
|
"epoch": 5.129511174287294,
|
|
"grad_norm": 0.07572727322827039,
|
|
"learning_rate": 0.001128407320425499,
|
|
"loss": 2.484,
|
|
"step": 16240
|
|
},
|
|
{
|
|
"epoch": 5.131090578851773,
|
|
"grad_norm": 0.06482262428381254,
|
|
"learning_rate": 0.0011278604192335273,
|
|
"loss": 2.4612,
|
|
"step": 16245
|
|
},
|
|
{
|
|
"epoch": 5.132669983416252,
|
|
"grad_norm": 0.06568181975286745,
|
|
"learning_rate": 0.001127313479160019,
|
|
"loss": 2.377,
|
|
"step": 16250
|
|
},
|
|
{
|
|
"epoch": 5.134249387980732,
|
|
"grad_norm": 0.06519208053816444,
|
|
"learning_rate": 0.0011267665003712951,
|
|
"loss": 2.4845,
|
|
"step": 16255
|
|
},
|
|
{
|
|
"epoch": 5.135828792545211,
|
|
"grad_norm": 0.056410431836072604,
|
|
"learning_rate": 0.0011262194830336887,
|
|
"loss": 2.5283,
|
|
"step": 16260
|
|
},
|
|
{
|
|
"epoch": 5.137408197109689,
|
|
"grad_norm": 0.06500325338501298,
|
|
"learning_rate": 0.0011256724273135438,
|
|
"loss": 2.4445,
|
|
"step": 16265
|
|
},
|
|
{
|
|
"epoch": 5.1389876016741685,
|
|
"grad_norm": 0.06452541716306441,
|
|
"learning_rate": 0.0011251253333772165,
|
|
"loss": 2.3963,
|
|
"step": 16270
|
|
},
|
|
{
|
|
"epoch": 5.140567006238648,
|
|
"grad_norm": 0.05911542687700847,
|
|
"learning_rate": 0.0011245782013910748,
|
|
"loss": 2.4702,
|
|
"step": 16275
|
|
},
|
|
{
|
|
"epoch": 5.142146410803127,
|
|
"grad_norm": 0.05073585115407527,
|
|
"learning_rate": 0.001124031031521498,
|
|
"loss": 2.3791,
|
|
"step": 16280
|
|
},
|
|
{
|
|
"epoch": 5.143725815367606,
|
|
"grad_norm": 0.05977816183764443,
|
|
"learning_rate": 0.0011234838239348773,
|
|
"loss": 2.6144,
|
|
"step": 16285
|
|
},
|
|
{
|
|
"epoch": 5.1453052199320854,
|
|
"grad_norm": 0.11924068568162381,
|
|
"learning_rate": 0.0011229365787976144,
|
|
"loss": 2.5134,
|
|
"step": 16290
|
|
},
|
|
{
|
|
"epoch": 5.146884624496565,
|
|
"grad_norm": 0.0734698954709752,
|
|
"learning_rate": 0.0011223892962761233,
|
|
"loss": 2.5135,
|
|
"step": 16295
|
|
},
|
|
{
|
|
"epoch": 5.148464029061044,
|
|
"grad_norm": 0.06039434476566112,
|
|
"learning_rate": 0.0011218419765368294,
|
|
"loss": 2.5427,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 5.150043433625523,
|
|
"grad_norm": 0.06217142680674239,
|
|
"learning_rate": 0.0011212946197461686,
|
|
"loss": 2.5361,
|
|
"step": 16305
|
|
},
|
|
{
|
|
"epoch": 5.151622838190002,
|
|
"grad_norm": 0.06420585743081299,
|
|
"learning_rate": 0.0011207472260705894,
|
|
"loss": 2.3974,
|
|
"step": 16310
|
|
},
|
|
{
|
|
"epoch": 5.153202242754482,
|
|
"grad_norm": 0.07112681958076077,
|
|
"learning_rate": 0.0011201997956765497,
|
|
"loss": 2.3866,
|
|
"step": 16315
|
|
},
|
|
{
|
|
"epoch": 5.154781647318961,
|
|
"grad_norm": 0.06579358055731502,
|
|
"learning_rate": 0.0011196523287305203,
|
|
"loss": 2.4988,
|
|
"step": 16320
|
|
},
|
|
{
|
|
"epoch": 5.15636105188344,
|
|
"grad_norm": 0.06422583481040922,
|
|
"learning_rate": 0.0011191048253989823,
|
|
"loss": 2.5402,
|
|
"step": 16325
|
|
},
|
|
{
|
|
"epoch": 5.157940456447919,
|
|
"grad_norm": 0.05614613307542216,
|
|
"learning_rate": 0.001118557285848428,
|
|
"loss": 2.5435,
|
|
"step": 16330
|
|
},
|
|
{
|
|
"epoch": 5.159519861012399,
|
|
"grad_norm": 0.054122449335019904,
|
|
"learning_rate": 0.0011180097102453605,
|
|
"loss": 2.4795,
|
|
"step": 16335
|
|
},
|
|
{
|
|
"epoch": 5.161099265576878,
|
|
"grad_norm": 0.07874484568863568,
|
|
"learning_rate": 0.0011174620987562936,
|
|
"loss": 2.5427,
|
|
"step": 16340
|
|
},
|
|
{
|
|
"epoch": 5.162678670141356,
|
|
"grad_norm": 0.07263009788499131,
|
|
"learning_rate": 0.0011169144515477537,
|
|
"loss": 2.4887,
|
|
"step": 16345
|
|
},
|
|
{
|
|
"epoch": 5.1642580747058355,
|
|
"grad_norm": 0.06027699366074942,
|
|
"learning_rate": 0.0011163667687862755,
|
|
"loss": 2.4958,
|
|
"step": 16350
|
|
},
|
|
{
|
|
"epoch": 5.165837479270315,
|
|
"grad_norm": 0.05915559937051019,
|
|
"learning_rate": 0.0011158190506384068,
|
|
"loss": 2.5644,
|
|
"step": 16355
|
|
},
|
|
{
|
|
"epoch": 5.167416883834794,
|
|
"grad_norm": 0.059476055705571074,
|
|
"learning_rate": 0.0011152712972707045,
|
|
"loss": 2.6209,
|
|
"step": 16360
|
|
},
|
|
{
|
|
"epoch": 5.168996288399273,
|
|
"grad_norm": 0.06808222938767634,
|
|
"learning_rate": 0.001114723508849737,
|
|
"loss": 2.5696,
|
|
"step": 16365
|
|
},
|
|
{
|
|
"epoch": 5.1705756929637525,
|
|
"grad_norm": 0.06521035300158985,
|
|
"learning_rate": 0.0011141756855420838,
|
|
"loss": 2.4274,
|
|
"step": 16370
|
|
},
|
|
{
|
|
"epoch": 5.172155097528232,
|
|
"grad_norm": 0.06339165426714281,
|
|
"learning_rate": 0.0011136278275143342,
|
|
"loss": 2.6055,
|
|
"step": 16375
|
|
},
|
|
{
|
|
"epoch": 5.173734502092711,
|
|
"grad_norm": 0.05411371687477742,
|
|
"learning_rate": 0.001113079934933088,
|
|
"loss": 2.4679,
|
|
"step": 16380
|
|
},
|
|
{
|
|
"epoch": 5.17531390665719,
|
|
"grad_norm": 0.06111465209279007,
|
|
"learning_rate": 0.0011125320079649562,
|
|
"loss": 2.4941,
|
|
"step": 16385
|
|
},
|
|
{
|
|
"epoch": 5.1768933112216695,
|
|
"grad_norm": 0.058314264656633696,
|
|
"learning_rate": 0.00111198404677656,
|
|
"loss": 2.4186,
|
|
"step": 16390
|
|
},
|
|
{
|
|
"epoch": 5.178472715786149,
|
|
"grad_norm": 0.06611542635281509,
|
|
"learning_rate": 0.00111143605153453,
|
|
"loss": 2.4831,
|
|
"step": 16395
|
|
},
|
|
{
|
|
"epoch": 5.180052120350628,
|
|
"grad_norm": 0.06365065892450729,
|
|
"learning_rate": 0.0011108880224055093,
|
|
"loss": 2.5002,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 5.181631524915107,
|
|
"grad_norm": 0.05149579258538051,
|
|
"learning_rate": 0.0011103399595561493,
|
|
"loss": 2.4647,
|
|
"step": 16405
|
|
},
|
|
{
|
|
"epoch": 5.1832109294795865,
|
|
"grad_norm": 0.06531925817820286,
|
|
"learning_rate": 0.0011097918631531123,
|
|
"loss": 2.4938,
|
|
"step": 16410
|
|
},
|
|
{
|
|
"epoch": 5.184790334044066,
|
|
"grad_norm": 0.05606313145169872,
|
|
"learning_rate": 0.0011092437333630716,
|
|
"loss": 2.5484,
|
|
"step": 16415
|
|
},
|
|
{
|
|
"epoch": 5.186369738608545,
|
|
"grad_norm": 0.061918571489501605,
|
|
"learning_rate": 0.0011086955703527093,
|
|
"loss": 2.5153,
|
|
"step": 16420
|
|
},
|
|
{
|
|
"epoch": 5.187949143173023,
|
|
"grad_norm": 0.06796321611167673,
|
|
"learning_rate": 0.001108147374288719,
|
|
"loss": 2.5846,
|
|
"step": 16425
|
|
},
|
|
{
|
|
"epoch": 5.189528547737503,
|
|
"grad_norm": 0.06654686032721133,
|
|
"learning_rate": 0.0011075991453378025,
|
|
"loss": 2.5631,
|
|
"step": 16430
|
|
},
|
|
{
|
|
"epoch": 5.191107952301982,
|
|
"grad_norm": 0.060261990753233845,
|
|
"learning_rate": 0.0011070508836666737,
|
|
"loss": 2.5073,
|
|
"step": 16435
|
|
},
|
|
{
|
|
"epoch": 5.192687356866461,
|
|
"grad_norm": 0.06296014676202179,
|
|
"learning_rate": 0.0011065025894420552,
|
|
"loss": 2.4725,
|
|
"step": 16440
|
|
},
|
|
{
|
|
"epoch": 5.19426676143094,
|
|
"grad_norm": 0.07877531687828125,
|
|
"learning_rate": 0.0011059542628306797,
|
|
"loss": 2.5068,
|
|
"step": 16445
|
|
},
|
|
{
|
|
"epoch": 5.19584616599542,
|
|
"grad_norm": 0.0687432738343893,
|
|
"learning_rate": 0.0011054059039992895,
|
|
"loss": 2.4373,
|
|
"step": 16450
|
|
},
|
|
{
|
|
"epoch": 5.197425570559899,
|
|
"grad_norm": 0.06456809106330956,
|
|
"learning_rate": 0.0011048575131146377,
|
|
"loss": 2.4645,
|
|
"step": 16455
|
|
},
|
|
{
|
|
"epoch": 5.199004975124378,
|
|
"grad_norm": 0.06532940947281059,
|
|
"learning_rate": 0.001104309090343486,
|
|
"loss": 2.4395,
|
|
"step": 16460
|
|
},
|
|
{
|
|
"epoch": 5.200584379688857,
|
|
"grad_norm": 0.06951168067612123,
|
|
"learning_rate": 0.0011037606358526065,
|
|
"loss": 2.5234,
|
|
"step": 16465
|
|
},
|
|
{
|
|
"epoch": 5.202163784253337,
|
|
"grad_norm": 0.06605425908188053,
|
|
"learning_rate": 0.0011032121498087805,
|
|
"loss": 2.3909,
|
|
"step": 16470
|
|
},
|
|
{
|
|
"epoch": 5.203743188817816,
|
|
"grad_norm": 0.06139388448666948,
|
|
"learning_rate": 0.001102663632378799,
|
|
"loss": 2.4513,
|
|
"step": 16475
|
|
},
|
|
{
|
|
"epoch": 5.205322593382295,
|
|
"grad_norm": 0.05321250714202924,
|
|
"learning_rate": 0.0011021150837294631,
|
|
"loss": 2.4193,
|
|
"step": 16480
|
|
},
|
|
{
|
|
"epoch": 5.206901997946774,
|
|
"grad_norm": 0.0670144536976953,
|
|
"learning_rate": 0.0011015665040275827,
|
|
"loss": 2.466,
|
|
"step": 16485
|
|
},
|
|
{
|
|
"epoch": 5.208481402511254,
|
|
"grad_norm": 0.0640672036717467,
|
|
"learning_rate": 0.0011010178934399773,
|
|
"loss": 2.4192,
|
|
"step": 16490
|
|
},
|
|
{
|
|
"epoch": 5.210060807075733,
|
|
"grad_norm": 0.06885519764125651,
|
|
"learning_rate": 0.0011004692521334755,
|
|
"loss": 2.4949,
|
|
"step": 16495
|
|
},
|
|
{
|
|
"epoch": 5.211640211640212,
|
|
"grad_norm": 0.06479142893373602,
|
|
"learning_rate": 0.0010999205802749163,
|
|
"loss": 2.519,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 5.21321961620469,
|
|
"grad_norm": 0.06367553196577055,
|
|
"learning_rate": 0.0010993718780311474,
|
|
"loss": 2.4375,
|
|
"step": 16505
|
|
},
|
|
{
|
|
"epoch": 5.21479902076917,
|
|
"grad_norm": 0.07410082618851017,
|
|
"learning_rate": 0.001098823145569025,
|
|
"loss": 2.398,
|
|
"step": 16510
|
|
},
|
|
{
|
|
"epoch": 5.216378425333649,
|
|
"grad_norm": 0.06301957247302468,
|
|
"learning_rate": 0.0010982743830554155,
|
|
"loss": 2.5229,
|
|
"step": 16515
|
|
},
|
|
{
|
|
"epoch": 5.217957829898128,
|
|
"grad_norm": 0.06421095569993603,
|
|
"learning_rate": 0.0010977255906571939,
|
|
"loss": 2.4504,
|
|
"step": 16520
|
|
},
|
|
{
|
|
"epoch": 5.219537234462607,
|
|
"grad_norm": 0.06917348371322869,
|
|
"learning_rate": 0.0010971767685412448,
|
|
"loss": 2.4585,
|
|
"step": 16525
|
|
},
|
|
{
|
|
"epoch": 5.221116639027087,
|
|
"grad_norm": 0.0762081917098472,
|
|
"learning_rate": 0.001096627916874461,
|
|
"loss": 2.4095,
|
|
"step": 16530
|
|
},
|
|
{
|
|
"epoch": 5.222696043591566,
|
|
"grad_norm": 0.06776761418065072,
|
|
"learning_rate": 0.0010960790358237448,
|
|
"loss": 2.6017,
|
|
"step": 16535
|
|
},
|
|
{
|
|
"epoch": 5.224275448156045,
|
|
"grad_norm": 0.054990387693461554,
|
|
"learning_rate": 0.0010955301255560085,
|
|
"loss": 2.4561,
|
|
"step": 16540
|
|
},
|
|
{
|
|
"epoch": 5.225854852720524,
|
|
"grad_norm": 0.052096822769913995,
|
|
"learning_rate": 0.0010949811862381706,
|
|
"loss": 2.5755,
|
|
"step": 16545
|
|
},
|
|
{
|
|
"epoch": 5.227434257285004,
|
|
"grad_norm": 0.05360652247727314,
|
|
"learning_rate": 0.0010944322180371612,
|
|
"loss": 2.5952,
|
|
"step": 16550
|
|
},
|
|
{
|
|
"epoch": 5.229013661849483,
|
|
"grad_norm": 0.06870679333822231,
|
|
"learning_rate": 0.0010938832211199177,
|
|
"loss": 2.4554,
|
|
"step": 16555
|
|
},
|
|
{
|
|
"epoch": 5.230593066413962,
|
|
"grad_norm": 0.0708017886233242,
|
|
"learning_rate": 0.0010933341956533863,
|
|
"loss": 2.5121,
|
|
"step": 16560
|
|
},
|
|
{
|
|
"epoch": 5.232172470978441,
|
|
"grad_norm": 0.0888433051481054,
|
|
"learning_rate": 0.0010927851418045223,
|
|
"loss": 2.3919,
|
|
"step": 16565
|
|
},
|
|
{
|
|
"epoch": 5.233751875542921,
|
|
"grad_norm": 0.05876750386387108,
|
|
"learning_rate": 0.0010922360597402899,
|
|
"loss": 2.5121,
|
|
"step": 16570
|
|
},
|
|
{
|
|
"epoch": 5.2353312801074,
|
|
"grad_norm": 0.07072961142914727,
|
|
"learning_rate": 0.0010916869496276605,
|
|
"loss": 2.4312,
|
|
"step": 16575
|
|
},
|
|
{
|
|
"epoch": 5.236910684671878,
|
|
"grad_norm": 0.06580191859037744,
|
|
"learning_rate": 0.0010911378116336156,
|
|
"loss": 2.4287,
|
|
"step": 16580
|
|
},
|
|
{
|
|
"epoch": 5.2384900892363575,
|
|
"grad_norm": 0.06248045185532465,
|
|
"learning_rate": 0.001090588645925145,
|
|
"loss": 2.3803,
|
|
"step": 16585
|
|
},
|
|
{
|
|
"epoch": 5.240069493800837,
|
|
"grad_norm": 0.059890358250749606,
|
|
"learning_rate": 0.0010900394526692453,
|
|
"loss": 2.5089,
|
|
"step": 16590
|
|
},
|
|
{
|
|
"epoch": 5.241648898365316,
|
|
"grad_norm": 0.0585610711878174,
|
|
"learning_rate": 0.0010894902320329237,
|
|
"loss": 2.4623,
|
|
"step": 16595
|
|
},
|
|
{
|
|
"epoch": 5.243228302929795,
|
|
"grad_norm": 0.05983185438216206,
|
|
"learning_rate": 0.0010889409841831942,
|
|
"loss": 2.3765,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 5.2448077074942745,
|
|
"grad_norm": 0.060187315671030875,
|
|
"learning_rate": 0.0010883917092870796,
|
|
"loss": 2.4824,
|
|
"step": 16605
|
|
},
|
|
{
|
|
"epoch": 5.246387112058754,
|
|
"grad_norm": 0.0585721389921002,
|
|
"learning_rate": 0.0010878424075116112,
|
|
"loss": 2.5855,
|
|
"step": 16610
|
|
},
|
|
{
|
|
"epoch": 5.247966516623233,
|
|
"grad_norm": 0.05396930882771886,
|
|
"learning_rate": 0.0010872930790238279,
|
|
"loss": 2.4659,
|
|
"step": 16615
|
|
},
|
|
{
|
|
"epoch": 5.249545921187712,
|
|
"grad_norm": 0.058200285764090955,
|
|
"learning_rate": 0.0010867437239907764,
|
|
"loss": 2.5575,
|
|
"step": 16620
|
|
},
|
|
{
|
|
"epoch": 5.2511253257521915,
|
|
"grad_norm": 0.07066251591128414,
|
|
"learning_rate": 0.0010861943425795131,
|
|
"loss": 2.4811,
|
|
"step": 16625
|
|
},
|
|
{
|
|
"epoch": 5.252704730316671,
|
|
"grad_norm": 0.06454918154698588,
|
|
"learning_rate": 0.001085644934957101,
|
|
"loss": 2.5084,
|
|
"step": 16630
|
|
},
|
|
{
|
|
"epoch": 5.25428413488115,
|
|
"grad_norm": 0.06916896566986358,
|
|
"learning_rate": 0.0010850955012906113,
|
|
"loss": 2.467,
|
|
"step": 16635
|
|
},
|
|
{
|
|
"epoch": 5.255863539445629,
|
|
"grad_norm": 0.07621274124652579,
|
|
"learning_rate": 0.0010845460417471236,
|
|
"loss": 2.4866,
|
|
"step": 16640
|
|
},
|
|
{
|
|
"epoch": 5.2574429440101085,
|
|
"grad_norm": 0.058369653895255,
|
|
"learning_rate": 0.0010839965564937244,
|
|
"loss": 2.4595,
|
|
"step": 16645
|
|
},
|
|
{
|
|
"epoch": 5.259022348574588,
|
|
"grad_norm": 0.060666486896924936,
|
|
"learning_rate": 0.0010834470456975091,
|
|
"loss": 2.4964,
|
|
"step": 16650
|
|
},
|
|
{
|
|
"epoch": 5.260601753139067,
|
|
"grad_norm": 0.05894951924037973,
|
|
"learning_rate": 0.0010828975095255806,
|
|
"loss": 2.4953,
|
|
"step": 16655
|
|
},
|
|
{
|
|
"epoch": 5.262181157703546,
|
|
"grad_norm": 0.06684114577856183,
|
|
"learning_rate": 0.001082347948145049,
|
|
"loss": 2.5224,
|
|
"step": 16660
|
|
},
|
|
{
|
|
"epoch": 5.263760562268025,
|
|
"grad_norm": 0.07270527632123917,
|
|
"learning_rate": 0.0010817983617230325,
|
|
"loss": 2.5415,
|
|
"step": 16665
|
|
},
|
|
{
|
|
"epoch": 5.265339966832504,
|
|
"grad_norm": 0.05330178952705446,
|
|
"learning_rate": 0.0010812487504266565,
|
|
"loss": 2.5755,
|
|
"step": 16670
|
|
},
|
|
{
|
|
"epoch": 5.266919371396983,
|
|
"grad_norm": 0.059499155784585894,
|
|
"learning_rate": 0.001080699114423055,
|
|
"loss": 2.4563,
|
|
"step": 16675
|
|
},
|
|
{
|
|
"epoch": 5.268498775961462,
|
|
"grad_norm": 0.06704501210732458,
|
|
"learning_rate": 0.0010801494538793684,
|
|
"loss": 2.6167,
|
|
"step": 16680
|
|
},
|
|
{
|
|
"epoch": 5.270078180525942,
|
|
"grad_norm": 0.05814805570784973,
|
|
"learning_rate": 0.0010795997689627451,
|
|
"loss": 2.4289,
|
|
"step": 16685
|
|
},
|
|
{
|
|
"epoch": 5.271657585090421,
|
|
"grad_norm": 0.0669517010977098,
|
|
"learning_rate": 0.0010790500598403402,
|
|
"loss": 2.4127,
|
|
"step": 16690
|
|
},
|
|
{
|
|
"epoch": 5.2732369896549,
|
|
"grad_norm": 0.06309378267967428,
|
|
"learning_rate": 0.001078500326679317,
|
|
"loss": 2.4365,
|
|
"step": 16695
|
|
},
|
|
{
|
|
"epoch": 5.274816394219379,
|
|
"grad_norm": 0.06584312219840473,
|
|
"learning_rate": 0.0010779505696468469,
|
|
"loss": 2.4437,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 5.276395798783859,
|
|
"grad_norm": 0.0748847383545806,
|
|
"learning_rate": 0.0010774007889101061,
|
|
"loss": 2.5373,
|
|
"step": 16705
|
|
},
|
|
{
|
|
"epoch": 5.277975203348338,
|
|
"grad_norm": 0.05230783420322393,
|
|
"learning_rate": 0.0010768509846362797,
|
|
"loss": 2.5201,
|
|
"step": 16710
|
|
},
|
|
{
|
|
"epoch": 5.279554607912817,
|
|
"grad_norm": 0.061470445691836156,
|
|
"learning_rate": 0.00107630115699256,
|
|
"loss": 2.4913,
|
|
"step": 16715
|
|
},
|
|
{
|
|
"epoch": 5.281134012477296,
|
|
"grad_norm": 0.056911010107714786,
|
|
"learning_rate": 0.0010757513061461462,
|
|
"loss": 2.4145,
|
|
"step": 16720
|
|
},
|
|
{
|
|
"epoch": 5.2827134170417755,
|
|
"grad_norm": 0.06249572583749286,
|
|
"learning_rate": 0.001075201432264244,
|
|
"loss": 2.5379,
|
|
"step": 16725
|
|
},
|
|
{
|
|
"epoch": 5.284292821606255,
|
|
"grad_norm": 0.05916696895884777,
|
|
"learning_rate": 0.001074651535514067,
|
|
"loss": 2.5474,
|
|
"step": 16730
|
|
},
|
|
{
|
|
"epoch": 5.285872226170734,
|
|
"grad_norm": 0.07074457981481334,
|
|
"learning_rate": 0.0010741016160628345,
|
|
"loss": 2.5206,
|
|
"step": 16735
|
|
},
|
|
{
|
|
"epoch": 5.287451630735212,
|
|
"grad_norm": 0.07527858926282108,
|
|
"learning_rate": 0.0010735516740777741,
|
|
"loss": 2.5098,
|
|
"step": 16740
|
|
},
|
|
{
|
|
"epoch": 5.289031035299692,
|
|
"grad_norm": 0.06153733507179991,
|
|
"learning_rate": 0.00107300170972612,
|
|
"loss": 2.4103,
|
|
"step": 16745
|
|
},
|
|
{
|
|
"epoch": 5.290610439864171,
|
|
"grad_norm": 0.06310848763976958,
|
|
"learning_rate": 0.0010724517231751123,
|
|
"loss": 2.4628,
|
|
"step": 16750
|
|
},
|
|
{
|
|
"epoch": 5.29218984442865,
|
|
"grad_norm": 0.05508495712248767,
|
|
"learning_rate": 0.0010719017145919983,
|
|
"loss": 2.4478,
|
|
"step": 16755
|
|
},
|
|
{
|
|
"epoch": 5.293769248993129,
|
|
"grad_norm": 0.06435719349082726,
|
|
"learning_rate": 0.0010713516841440321,
|
|
"loss": 2.5433,
|
|
"step": 16760
|
|
},
|
|
{
|
|
"epoch": 5.295348653557609,
|
|
"grad_norm": 0.0798223299576822,
|
|
"learning_rate": 0.001070801631998475,
|
|
"loss": 2.447,
|
|
"step": 16765
|
|
},
|
|
{
|
|
"epoch": 5.296928058122088,
|
|
"grad_norm": 0.060059753102617686,
|
|
"learning_rate": 0.0010702515583225936,
|
|
"loss": 2.4276,
|
|
"step": 16770
|
|
},
|
|
{
|
|
"epoch": 5.298507462686567,
|
|
"grad_norm": 0.05718298632033585,
|
|
"learning_rate": 0.0010697014632836627,
|
|
"loss": 2.4071,
|
|
"step": 16775
|
|
},
|
|
{
|
|
"epoch": 5.300086867251046,
|
|
"grad_norm": 0.06008953594279999,
|
|
"learning_rate": 0.0010691513470489616,
|
|
"loss": 2.488,
|
|
"step": 16780
|
|
},
|
|
{
|
|
"epoch": 5.301666271815526,
|
|
"grad_norm": 0.08472308898028438,
|
|
"learning_rate": 0.0010686012097857777,
|
|
"loss": 2.4654,
|
|
"step": 16785
|
|
},
|
|
{
|
|
"epoch": 5.303245676380005,
|
|
"grad_norm": 0.07276775293262801,
|
|
"learning_rate": 0.0010680510516614045,
|
|
"loss": 2.4008,
|
|
"step": 16790
|
|
},
|
|
{
|
|
"epoch": 5.304825080944484,
|
|
"grad_norm": 0.08578372585501848,
|
|
"learning_rate": 0.0010675008728431414,
|
|
"loss": 2.4863,
|
|
"step": 16795
|
|
},
|
|
{
|
|
"epoch": 5.306404485508963,
|
|
"grad_norm": 0.076242387761487,
|
|
"learning_rate": 0.001066950673498294,
|
|
"loss": 2.5704,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 5.307983890073443,
|
|
"grad_norm": 0.06923296405260501,
|
|
"learning_rate": 0.0010664004537941742,
|
|
"loss": 2.5675,
|
|
"step": 16805
|
|
},
|
|
{
|
|
"epoch": 5.309563294637922,
|
|
"grad_norm": 0.07172642450659165,
|
|
"learning_rate": 0.0010658502138981008,
|
|
"loss": 2.5564,
|
|
"step": 16810
|
|
},
|
|
{
|
|
"epoch": 5.311142699202401,
|
|
"grad_norm": 0.07214633031703806,
|
|
"learning_rate": 0.0010652999539773984,
|
|
"loss": 2.52,
|
|
"step": 16815
|
|
},
|
|
{
|
|
"epoch": 5.3127221037668795,
|
|
"grad_norm": 0.0673649488725695,
|
|
"learning_rate": 0.001064749674199397,
|
|
"loss": 2.4927,
|
|
"step": 16820
|
|
},
|
|
{
|
|
"epoch": 5.314301508331359,
|
|
"grad_norm": 0.08050938669838298,
|
|
"learning_rate": 0.0010641993747314334,
|
|
"loss": 2.5102,
|
|
"step": 16825
|
|
},
|
|
{
|
|
"epoch": 5.315880912895838,
|
|
"grad_norm": 0.07195246233541937,
|
|
"learning_rate": 0.00106364905574085,
|
|
"loss": 2.4801,
|
|
"step": 16830
|
|
},
|
|
{
|
|
"epoch": 5.317460317460317,
|
|
"grad_norm": 0.05612902846451449,
|
|
"learning_rate": 0.0010630987173949958,
|
|
"loss": 2.4484,
|
|
"step": 16835
|
|
},
|
|
{
|
|
"epoch": 5.3190397220247965,
|
|
"grad_norm": 0.0496078995641674,
|
|
"learning_rate": 0.0010625483598612246,
|
|
"loss": 2.5132,
|
|
"step": 16840
|
|
},
|
|
{
|
|
"epoch": 5.320619126589276,
|
|
"grad_norm": 0.06237915696779509,
|
|
"learning_rate": 0.0010619979833068965,
|
|
"loss": 2.4461,
|
|
"step": 16845
|
|
},
|
|
{
|
|
"epoch": 5.322198531153755,
|
|
"grad_norm": 0.05606620687503389,
|
|
"learning_rate": 0.001061447587899378,
|
|
"loss": 2.4537,
|
|
"step": 16850
|
|
},
|
|
{
|
|
"epoch": 5.323777935718234,
|
|
"grad_norm": 0.05437527681426904,
|
|
"learning_rate": 0.0010608971738060404,
|
|
"loss": 2.5118,
|
|
"step": 16855
|
|
},
|
|
{
|
|
"epoch": 5.3253573402827135,
|
|
"grad_norm": 0.06628402939707274,
|
|
"learning_rate": 0.0010603467411942618,
|
|
"loss": 2.4164,
|
|
"step": 16860
|
|
},
|
|
{
|
|
"epoch": 5.326936744847193,
|
|
"grad_norm": 0.06435070608088721,
|
|
"learning_rate": 0.0010597962902314246,
|
|
"loss": 2.52,
|
|
"step": 16865
|
|
},
|
|
{
|
|
"epoch": 5.328516149411672,
|
|
"grad_norm": 0.07830690204343072,
|
|
"learning_rate": 0.0010592458210849174,
|
|
"loss": 2.4917,
|
|
"step": 16870
|
|
},
|
|
{
|
|
"epoch": 5.330095553976151,
|
|
"grad_norm": 0.08642147176922639,
|
|
"learning_rate": 0.0010586953339221346,
|
|
"loss": 2.5402,
|
|
"step": 16875
|
|
},
|
|
{
|
|
"epoch": 5.3316749585406304,
|
|
"grad_norm": 0.09187424195378634,
|
|
"learning_rate": 0.0010581448289104759,
|
|
"loss": 2.3867,
|
|
"step": 16880
|
|
},
|
|
{
|
|
"epoch": 5.33325436310511,
|
|
"grad_norm": 0.14183700498762744,
|
|
"learning_rate": 0.0010575943062173462,
|
|
"loss": 2.4946,
|
|
"step": 16885
|
|
},
|
|
{
|
|
"epoch": 5.334833767669589,
|
|
"grad_norm": 0.0643752778203694,
|
|
"learning_rate": 0.001057043766010156,
|
|
"loss": 2.4688,
|
|
"step": 16890
|
|
},
|
|
{
|
|
"epoch": 5.336413172234067,
|
|
"grad_norm": 0.05696581393811108,
|
|
"learning_rate": 0.0010564932084563207,
|
|
"loss": 2.5694,
|
|
"step": 16895
|
|
},
|
|
{
|
|
"epoch": 5.3379925767985466,
|
|
"grad_norm": 0.06514340707364054,
|
|
"learning_rate": 0.0010559426337232618,
|
|
"loss": 2.459,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 5.339571981363026,
|
|
"grad_norm": 0.06423140329868181,
|
|
"learning_rate": 0.0010553920419784056,
|
|
"loss": 2.3664,
|
|
"step": 16905
|
|
},
|
|
{
|
|
"epoch": 5.341151385927505,
|
|
"grad_norm": 0.05756396111491676,
|
|
"learning_rate": 0.0010548414333891834,
|
|
"loss": 2.4964,
|
|
"step": 16910
|
|
},
|
|
{
|
|
"epoch": 5.342730790491984,
|
|
"grad_norm": 0.06258406731521991,
|
|
"learning_rate": 0.0010542908081230314,
|
|
"loss": 2.3496,
|
|
"step": 16915
|
|
},
|
|
{
|
|
"epoch": 5.3443101950564635,
|
|
"grad_norm": 0.05578908244242506,
|
|
"learning_rate": 0.0010537401663473916,
|
|
"loss": 2.4955,
|
|
"step": 16920
|
|
},
|
|
{
|
|
"epoch": 5.345889599620943,
|
|
"grad_norm": 0.05874669174522487,
|
|
"learning_rate": 0.0010531895082297107,
|
|
"loss": 2.5043,
|
|
"step": 16925
|
|
},
|
|
{
|
|
"epoch": 5.347469004185422,
|
|
"grad_norm": 0.05615463876654649,
|
|
"learning_rate": 0.0010526388339374402,
|
|
"loss": 2.4556,
|
|
"step": 16930
|
|
},
|
|
{
|
|
"epoch": 5.349048408749901,
|
|
"grad_norm": 0.0717665194726691,
|
|
"learning_rate": 0.0010520881436380364,
|
|
"loss": 2.434,
|
|
"step": 16935
|
|
},
|
|
{
|
|
"epoch": 5.3506278133143805,
|
|
"grad_norm": 0.06025021098987067,
|
|
"learning_rate": 0.001051537437498961,
|
|
"loss": 2.4606,
|
|
"step": 16940
|
|
},
|
|
{
|
|
"epoch": 5.35220721787886,
|
|
"grad_norm": 0.0723709397590929,
|
|
"learning_rate": 0.0010509867156876802,
|
|
"loss": 2.529,
|
|
"step": 16945
|
|
},
|
|
{
|
|
"epoch": 5.353786622443339,
|
|
"grad_norm": 0.07275350241449625,
|
|
"learning_rate": 0.001050435978371665,
|
|
"loss": 2.4838,
|
|
"step": 16950
|
|
},
|
|
{
|
|
"epoch": 5.355366027007818,
|
|
"grad_norm": 0.06785690614428286,
|
|
"learning_rate": 0.001049885225718391,
|
|
"loss": 2.463,
|
|
"step": 16955
|
|
},
|
|
{
|
|
"epoch": 5.3569454315722975,
|
|
"grad_norm": 0.0666591076212561,
|
|
"learning_rate": 0.0010493344578953385,
|
|
"loss": 2.3827,
|
|
"step": 16960
|
|
},
|
|
{
|
|
"epoch": 5.358524836136777,
|
|
"grad_norm": 0.06319052158087238,
|
|
"learning_rate": 0.0010487836750699925,
|
|
"loss": 2.5529,
|
|
"step": 16965
|
|
},
|
|
{
|
|
"epoch": 5.360104240701256,
|
|
"grad_norm": 0.054779998660577985,
|
|
"learning_rate": 0.0010482328774098428,
|
|
"loss": 2.4198,
|
|
"step": 16970
|
|
},
|
|
{
|
|
"epoch": 5.361683645265735,
|
|
"grad_norm": 0.06595481664767047,
|
|
"learning_rate": 0.0010476820650823834,
|
|
"loss": 2.5699,
|
|
"step": 16975
|
|
},
|
|
{
|
|
"epoch": 5.363263049830214,
|
|
"grad_norm": 0.05765694032285817,
|
|
"learning_rate": 0.0010471312382551122,
|
|
"loss": 2.5256,
|
|
"step": 16980
|
|
},
|
|
{
|
|
"epoch": 5.364842454394693,
|
|
"grad_norm": 0.06406264763633265,
|
|
"learning_rate": 0.0010465803970955325,
|
|
"loss": 2.4457,
|
|
"step": 16985
|
|
},
|
|
{
|
|
"epoch": 5.366421858959172,
|
|
"grad_norm": 0.058716196334157277,
|
|
"learning_rate": 0.0010460295417711518,
|
|
"loss": 2.4894,
|
|
"step": 16990
|
|
},
|
|
{
|
|
"epoch": 5.368001263523651,
|
|
"grad_norm": 0.055264350923520805,
|
|
"learning_rate": 0.0010454786724494818,
|
|
"loss": 2.5555,
|
|
"step": 16995
|
|
},
|
|
{
|
|
"epoch": 5.369580668088131,
|
|
"grad_norm": 0.0779942238783775,
|
|
"learning_rate": 0.0010449277892980381,
|
|
"loss": 2.4478,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 5.37116007265261,
|
|
"grad_norm": 0.05737792547478927,
|
|
"learning_rate": 0.0010443768924843404,
|
|
"loss": 2.4918,
|
|
"step": 17005
|
|
},
|
|
{
|
|
"epoch": 5.372739477217089,
|
|
"grad_norm": 0.07708306947393631,
|
|
"learning_rate": 0.0010438259821759133,
|
|
"loss": 2.3815,
|
|
"step": 17010
|
|
},
|
|
{
|
|
"epoch": 5.374318881781568,
|
|
"grad_norm": 0.07054007734865732,
|
|
"learning_rate": 0.0010432750585402852,
|
|
"loss": 2.424,
|
|
"step": 17015
|
|
},
|
|
{
|
|
"epoch": 5.375898286346048,
|
|
"grad_norm": 0.0706022313990559,
|
|
"learning_rate": 0.0010427241217449885,
|
|
"loss": 2.4761,
|
|
"step": 17020
|
|
},
|
|
{
|
|
"epoch": 5.377477690910527,
|
|
"grad_norm": 0.06813243778394985,
|
|
"learning_rate": 0.0010421731719575588,
|
|
"loss": 2.4499,
|
|
"step": 17025
|
|
},
|
|
{
|
|
"epoch": 5.379057095475006,
|
|
"grad_norm": 0.05391903605438501,
|
|
"learning_rate": 0.0010416222093455373,
|
|
"loss": 2.4157,
|
|
"step": 17030
|
|
},
|
|
{
|
|
"epoch": 5.380636500039485,
|
|
"grad_norm": 0.059391745910537766,
|
|
"learning_rate": 0.0010410712340764676,
|
|
"loss": 2.5278,
|
|
"step": 17035
|
|
},
|
|
{
|
|
"epoch": 5.382215904603965,
|
|
"grad_norm": 0.06332603147895359,
|
|
"learning_rate": 0.0010405202463178984,
|
|
"loss": 2.4639,
|
|
"step": 17040
|
|
},
|
|
{
|
|
"epoch": 5.383795309168444,
|
|
"grad_norm": 0.0711124646613565,
|
|
"learning_rate": 0.0010399692462373811,
|
|
"loss": 2.4097,
|
|
"step": 17045
|
|
},
|
|
{
|
|
"epoch": 5.385374713732923,
|
|
"grad_norm": 0.0637810374649417,
|
|
"learning_rate": 0.0010394182340024711,
|
|
"loss": 2.434,
|
|
"step": 17050
|
|
},
|
|
{
|
|
"epoch": 5.3869541182974015,
|
|
"grad_norm": 0.055547887293987096,
|
|
"learning_rate": 0.0010388672097807281,
|
|
"loss": 2.4207,
|
|
"step": 17055
|
|
},
|
|
{
|
|
"epoch": 5.388533522861881,
|
|
"grad_norm": 0.06417690698464526,
|
|
"learning_rate": 0.0010383161737397154,
|
|
"loss": 2.4728,
|
|
"step": 17060
|
|
},
|
|
{
|
|
"epoch": 5.39011292742636,
|
|
"grad_norm": 0.06593119769528422,
|
|
"learning_rate": 0.0010377651260469987,
|
|
"loss": 2.4172,
|
|
"step": 17065
|
|
},
|
|
{
|
|
"epoch": 5.391692331990839,
|
|
"grad_norm": 0.06845694761919878,
|
|
"learning_rate": 0.0010372140668701482,
|
|
"loss": 2.4343,
|
|
"step": 17070
|
|
},
|
|
{
|
|
"epoch": 5.393271736555318,
|
|
"grad_norm": 0.05436828003384813,
|
|
"learning_rate": 0.001036662996376738,
|
|
"loss": 2.4925,
|
|
"step": 17075
|
|
},
|
|
{
|
|
"epoch": 5.394851141119798,
|
|
"grad_norm": 0.061883791335234604,
|
|
"learning_rate": 0.0010361119147343448,
|
|
"loss": 2.5331,
|
|
"step": 17080
|
|
},
|
|
{
|
|
"epoch": 5.396430545684277,
|
|
"grad_norm": 0.06348242440565674,
|
|
"learning_rate": 0.001035560822110549,
|
|
"loss": 2.5279,
|
|
"step": 17085
|
|
},
|
|
{
|
|
"epoch": 5.398009950248756,
|
|
"grad_norm": 0.0672056420447635,
|
|
"learning_rate": 0.001035009718672935,
|
|
"loss": 2.4785,
|
|
"step": 17090
|
|
},
|
|
{
|
|
"epoch": 5.399589354813235,
|
|
"grad_norm": 0.06675203584026605,
|
|
"learning_rate": 0.0010344586045890882,
|
|
"loss": 2.52,
|
|
"step": 17095
|
|
},
|
|
{
|
|
"epoch": 5.401168759377715,
|
|
"grad_norm": 0.06551572667439134,
|
|
"learning_rate": 0.0010339074800266004,
|
|
"loss": 2.5402,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 5.402748163942194,
|
|
"grad_norm": 0.06334548154625982,
|
|
"learning_rate": 0.0010333563451530648,
|
|
"loss": 2.5497,
|
|
"step": 17105
|
|
},
|
|
{
|
|
"epoch": 5.404327568506673,
|
|
"grad_norm": 0.10586308533135143,
|
|
"learning_rate": 0.0010328052001360778,
|
|
"loss": 2.5112,
|
|
"step": 17110
|
|
},
|
|
{
|
|
"epoch": 5.405906973071152,
|
|
"grad_norm": 0.07921321629017972,
|
|
"learning_rate": 0.0010322540451432386,
|
|
"loss": 2.5719,
|
|
"step": 17115
|
|
},
|
|
{
|
|
"epoch": 5.407486377635632,
|
|
"grad_norm": 0.07478108021776518,
|
|
"learning_rate": 0.0010317028803421505,
|
|
"loss": 2.4436,
|
|
"step": 17120
|
|
},
|
|
{
|
|
"epoch": 5.409065782200111,
|
|
"grad_norm": 0.06353594190898273,
|
|
"learning_rate": 0.001031151705900419,
|
|
"loss": 2.4052,
|
|
"step": 17125
|
|
},
|
|
{
|
|
"epoch": 5.41064518676459,
|
|
"grad_norm": 0.06858545588224939,
|
|
"learning_rate": 0.0010306005219856528,
|
|
"loss": 2.5134,
|
|
"step": 17130
|
|
},
|
|
{
|
|
"epoch": 5.412224591329069,
|
|
"grad_norm": 0.11161139739640971,
|
|
"learning_rate": 0.0010300493287654635,
|
|
"loss": 2.4786,
|
|
"step": 17135
|
|
},
|
|
{
|
|
"epoch": 5.413803995893548,
|
|
"grad_norm": 0.07029261166704179,
|
|
"learning_rate": 0.0010294981264074652,
|
|
"loss": 2.5099,
|
|
"step": 17140
|
|
},
|
|
{
|
|
"epoch": 5.415383400458027,
|
|
"grad_norm": 0.07834550948404823,
|
|
"learning_rate": 0.0010289469150792751,
|
|
"loss": 2.3872,
|
|
"step": 17145
|
|
},
|
|
{
|
|
"epoch": 5.416962805022506,
|
|
"grad_norm": 0.12133522402306104,
|
|
"learning_rate": 0.001028395694948513,
|
|
"loss": 2.4681,
|
|
"step": 17150
|
|
},
|
|
{
|
|
"epoch": 5.4185422095869855,
|
|
"grad_norm": 0.07107773751852352,
|
|
"learning_rate": 0.0010278444661828018,
|
|
"loss": 2.5221,
|
|
"step": 17155
|
|
},
|
|
{
|
|
"epoch": 5.420121614151465,
|
|
"grad_norm": 0.06698901319417577,
|
|
"learning_rate": 0.0010272932289497663,
|
|
"loss": 2.5352,
|
|
"step": 17160
|
|
},
|
|
{
|
|
"epoch": 5.421701018715944,
|
|
"grad_norm": 0.07078386952715568,
|
|
"learning_rate": 0.0010267419834170339,
|
|
"loss": 2.4919,
|
|
"step": 17165
|
|
},
|
|
{
|
|
"epoch": 5.423280423280423,
|
|
"grad_norm": 0.06088678320734555,
|
|
"learning_rate": 0.0010261907297522354,
|
|
"loss": 2.4718,
|
|
"step": 17170
|
|
},
|
|
{
|
|
"epoch": 5.4248598278449025,
|
|
"grad_norm": 0.06240042207023365,
|
|
"learning_rate": 0.0010256394681230035,
|
|
"loss": 2.5504,
|
|
"step": 17175
|
|
},
|
|
{
|
|
"epoch": 5.426439232409382,
|
|
"grad_norm": 0.07121927254555442,
|
|
"learning_rate": 0.0010250881986969731,
|
|
"loss": 2.4199,
|
|
"step": 17180
|
|
},
|
|
{
|
|
"epoch": 5.428018636973861,
|
|
"grad_norm": 0.05428715785274347,
|
|
"learning_rate": 0.0010245369216417817,
|
|
"loss": 2.4463,
|
|
"step": 17185
|
|
},
|
|
{
|
|
"epoch": 5.42959804153834,
|
|
"grad_norm": 0.08288380913410646,
|
|
"learning_rate": 0.001023985637125069,
|
|
"loss": 2.4475,
|
|
"step": 17190
|
|
},
|
|
{
|
|
"epoch": 5.4311774461028195,
|
|
"grad_norm": 0.06595287674137955,
|
|
"learning_rate": 0.0010234343453144777,
|
|
"loss": 2.4012,
|
|
"step": 17195
|
|
},
|
|
{
|
|
"epoch": 5.432756850667299,
|
|
"grad_norm": 0.06925202479359453,
|
|
"learning_rate": 0.0010228830463776513,
|
|
"loss": 2.4934,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 5.434336255231778,
|
|
"grad_norm": 0.06264816824230261,
|
|
"learning_rate": 0.001022331740482237,
|
|
"loss": 2.4051,
|
|
"step": 17205
|
|
},
|
|
{
|
|
"epoch": 5.435915659796256,
|
|
"grad_norm": 0.06786139302297989,
|
|
"learning_rate": 0.0010217804277958828,
|
|
"loss": 2.479,
|
|
"step": 17210
|
|
},
|
|
{
|
|
"epoch": 5.437495064360736,
|
|
"grad_norm": 0.07021697171462576,
|
|
"learning_rate": 0.0010212291084862398,
|
|
"loss": 2.3518,
|
|
"step": 17215
|
|
},
|
|
{
|
|
"epoch": 5.439074468925215,
|
|
"grad_norm": 0.06656502176587634,
|
|
"learning_rate": 0.0010206777827209607,
|
|
"loss": 2.5177,
|
|
"step": 17220
|
|
},
|
|
{
|
|
"epoch": 5.440653873489694,
|
|
"grad_norm": 0.062355951345659966,
|
|
"learning_rate": 0.0010201264506676999,
|
|
"loss": 2.3527,
|
|
"step": 17225
|
|
},
|
|
{
|
|
"epoch": 5.442233278054173,
|
|
"grad_norm": 0.05410415444862466,
|
|
"learning_rate": 0.001019575112494114,
|
|
"loss": 2.4348,
|
|
"step": 17230
|
|
},
|
|
{
|
|
"epoch": 5.443812682618653,
|
|
"grad_norm": 0.0749066429064683,
|
|
"learning_rate": 0.0010190237683678613,
|
|
"loss": 2.5108,
|
|
"step": 17235
|
|
},
|
|
{
|
|
"epoch": 5.445392087183132,
|
|
"grad_norm": 0.08340716169128037,
|
|
"learning_rate": 0.0010184724184566028,
|
|
"loss": 2.4268,
|
|
"step": 17240
|
|
},
|
|
{
|
|
"epoch": 5.446971491747611,
|
|
"grad_norm": 0.0740558184922576,
|
|
"learning_rate": 0.0010179210629279992,
|
|
"loss": 2.4082,
|
|
"step": 17245
|
|
},
|
|
{
|
|
"epoch": 5.44855089631209,
|
|
"grad_norm": 0.06154226785203741,
|
|
"learning_rate": 0.0010173697019497153,
|
|
"loss": 2.5026,
|
|
"step": 17250
|
|
},
|
|
{
|
|
"epoch": 5.45013030087657,
|
|
"grad_norm": 0.061934212314750915,
|
|
"learning_rate": 0.0010168183356894156,
|
|
"loss": 2.4739,
|
|
"step": 17255
|
|
},
|
|
{
|
|
"epoch": 5.451709705441049,
|
|
"grad_norm": 0.11389227639836863,
|
|
"learning_rate": 0.0010162669643147676,
|
|
"loss": 2.5456,
|
|
"step": 17260
|
|
},
|
|
{
|
|
"epoch": 5.453289110005528,
|
|
"grad_norm": 0.06718105948625795,
|
|
"learning_rate": 0.00101571558799344,
|
|
"loss": 2.4807,
|
|
"step": 17265
|
|
},
|
|
{
|
|
"epoch": 5.454868514570007,
|
|
"grad_norm": 0.07172866049825512,
|
|
"learning_rate": 0.0010151642068931023,
|
|
"loss": 2.5509,
|
|
"step": 17270
|
|
},
|
|
{
|
|
"epoch": 5.456447919134487,
|
|
"grad_norm": 0.05455984207106459,
|
|
"learning_rate": 0.001014612821181426,
|
|
"loss": 2.3514,
|
|
"step": 17275
|
|
},
|
|
{
|
|
"epoch": 5.458027323698966,
|
|
"grad_norm": 0.058908508581679266,
|
|
"learning_rate": 0.0010140614310260843,
|
|
"loss": 2.4898,
|
|
"step": 17280
|
|
},
|
|
{
|
|
"epoch": 5.459606728263445,
|
|
"grad_norm": 0.0609450518634079,
|
|
"learning_rate": 0.0010135100365947513,
|
|
"loss": 2.412,
|
|
"step": 17285
|
|
},
|
|
{
|
|
"epoch": 5.461186132827924,
|
|
"grad_norm": 0.060831067041835656,
|
|
"learning_rate": 0.0010129586380551027,
|
|
"loss": 2.4479,
|
|
"step": 17290
|
|
},
|
|
{
|
|
"epoch": 5.462765537392403,
|
|
"grad_norm": 0.06320656444773806,
|
|
"learning_rate": 0.0010124072355748148,
|
|
"loss": 2.3886,
|
|
"step": 17295
|
|
},
|
|
{
|
|
"epoch": 5.464344941956882,
|
|
"grad_norm": 0.06503437000744444,
|
|
"learning_rate": 0.0010118558293215657,
|
|
"loss": 2.5205,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 5.465924346521361,
|
|
"grad_norm": 0.059692468617380576,
|
|
"learning_rate": 0.0010113044194630348,
|
|
"loss": 2.5019,
|
|
"step": 17305
|
|
},
|
|
{
|
|
"epoch": 5.46750375108584,
|
|
"grad_norm": 0.05487462808337112,
|
|
"learning_rate": 0.0010107530061669021,
|
|
"loss": 2.4946,
|
|
"step": 17310
|
|
},
|
|
{
|
|
"epoch": 5.46908315565032,
|
|
"grad_norm": 0.06649084437197074,
|
|
"learning_rate": 0.001010201589600849,
|
|
"loss": 2.4836,
|
|
"step": 17315
|
|
},
|
|
{
|
|
"epoch": 5.470662560214799,
|
|
"grad_norm": 0.07901367919834779,
|
|
"learning_rate": 0.0010096501699325578,
|
|
"loss": 2.5324,
|
|
"step": 17320
|
|
},
|
|
{
|
|
"epoch": 5.472241964779278,
|
|
"grad_norm": 0.0720978856191932,
|
|
"learning_rate": 0.0010090987473297113,
|
|
"loss": 2.4307,
|
|
"step": 17325
|
|
},
|
|
{
|
|
"epoch": 5.473821369343757,
|
|
"grad_norm": 0.07054417946975411,
|
|
"learning_rate": 0.001008547321959994,
|
|
"loss": 2.465,
|
|
"step": 17330
|
|
},
|
|
{
|
|
"epoch": 5.475400773908237,
|
|
"grad_norm": 0.05523354163994097,
|
|
"learning_rate": 0.001007995893991091,
|
|
"loss": 2.4587,
|
|
"step": 17335
|
|
},
|
|
{
|
|
"epoch": 5.476980178472716,
|
|
"grad_norm": 0.07469524334146507,
|
|
"learning_rate": 0.0010074444635906875,
|
|
"loss": 2.4247,
|
|
"step": 17340
|
|
},
|
|
{
|
|
"epoch": 5.478559583037195,
|
|
"grad_norm": 0.05889256875418852,
|
|
"learning_rate": 0.00100689303092647,
|
|
"loss": 2.4254,
|
|
"step": 17345
|
|
},
|
|
{
|
|
"epoch": 5.480138987601674,
|
|
"grad_norm": 0.0579479853221772,
|
|
"learning_rate": 0.0010063415961661258,
|
|
"loss": 2.5711,
|
|
"step": 17350
|
|
},
|
|
{
|
|
"epoch": 5.481718392166154,
|
|
"grad_norm": 0.07167989050379651,
|
|
"learning_rate": 0.0010057901594773431,
|
|
"loss": 2.5187,
|
|
"step": 17355
|
|
},
|
|
{
|
|
"epoch": 5.483297796730633,
|
|
"grad_norm": 0.06150262610378773,
|
|
"learning_rate": 0.0010052387210278096,
|
|
"loss": 2.4176,
|
|
"step": 17360
|
|
},
|
|
{
|
|
"epoch": 5.484877201295112,
|
|
"grad_norm": 0.060729870908633486,
|
|
"learning_rate": 0.0010046872809852147,
|
|
"loss": 2.4863,
|
|
"step": 17365
|
|
},
|
|
{
|
|
"epoch": 5.4864566058595905,
|
|
"grad_norm": 0.060531187020126144,
|
|
"learning_rate": 0.0010041358395172474,
|
|
"loss": 2.4736,
|
|
"step": 17370
|
|
},
|
|
{
|
|
"epoch": 5.48803601042407,
|
|
"grad_norm": 0.058692155895619304,
|
|
"learning_rate": 0.001003584396791598,
|
|
"loss": 2.4325,
|
|
"step": 17375
|
|
},
|
|
{
|
|
"epoch": 5.489615414988549,
|
|
"grad_norm": 0.06928908884049095,
|
|
"learning_rate": 0.001003032952975956,
|
|
"loss": 2.5205,
|
|
"step": 17380
|
|
},
|
|
{
|
|
"epoch": 5.491194819553028,
|
|
"grad_norm": 0.06234210873800841,
|
|
"learning_rate": 0.001002481508238013,
|
|
"loss": 2.4895,
|
|
"step": 17385
|
|
},
|
|
{
|
|
"epoch": 5.4927742241175075,
|
|
"grad_norm": 0.06461681068296442,
|
|
"learning_rate": 0.0010019300627454586,
|
|
"loss": 2.4935,
|
|
"step": 17390
|
|
},
|
|
{
|
|
"epoch": 5.494353628681987,
|
|
"grad_norm": 0.06541091432144396,
|
|
"learning_rate": 0.0010013786166659846,
|
|
"loss": 2.4708,
|
|
"step": 17395
|
|
},
|
|
{
|
|
"epoch": 5.495933033246466,
|
|
"grad_norm": 0.06942867472925346,
|
|
"learning_rate": 0.0010008271701672823,
|
|
"loss": 2.5403,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 5.497512437810945,
|
|
"grad_norm": 0.05600043018162233,
|
|
"learning_rate": 0.0010002757234170428,
|
|
"loss": 2.4785,
|
|
"step": 17405
|
|
},
|
|
{
|
|
"epoch": 5.4990918423754245,
|
|
"grad_norm": 0.0682744087396039,
|
|
"learning_rate": 0.0009997242765829575,
|
|
"loss": 2.453,
|
|
"step": 17410
|
|
},
|
|
{
|
|
"epoch": 5.500671246939904,
|
|
"grad_norm": 0.053749717340257915,
|
|
"learning_rate": 0.000999172829832718,
|
|
"loss": 2.3701,
|
|
"step": 17415
|
|
},
|
|
{
|
|
"epoch": 5.502250651504383,
|
|
"grad_norm": 0.06969763757322571,
|
|
"learning_rate": 0.0009986213833340155,
|
|
"loss": 2.4837,
|
|
"step": 17420
|
|
},
|
|
{
|
|
"epoch": 5.503830056068862,
|
|
"grad_norm": 0.06641338974341425,
|
|
"learning_rate": 0.0009980699372545419,
|
|
"loss": 2.4402,
|
|
"step": 17425
|
|
},
|
|
{
|
|
"epoch": 5.5054094606333415,
|
|
"grad_norm": 0.07644320259996816,
|
|
"learning_rate": 0.0009975184917619872,
|
|
"loss": 2.564,
|
|
"step": 17430
|
|
},
|
|
{
|
|
"epoch": 5.506988865197821,
|
|
"grad_norm": 0.07194674371193889,
|
|
"learning_rate": 0.000996967047024044,
|
|
"loss": 2.4611,
|
|
"step": 17435
|
|
},
|
|
{
|
|
"epoch": 5.5085682697623,
|
|
"grad_norm": 0.061514768644023464,
|
|
"learning_rate": 0.0009964156032084021,
|
|
"loss": 2.4047,
|
|
"step": 17440
|
|
},
|
|
{
|
|
"epoch": 5.510147674326779,
|
|
"grad_norm": 0.06052995303564927,
|
|
"learning_rate": 0.0009958641604827527,
|
|
"loss": 2.4338,
|
|
"step": 17445
|
|
},
|
|
{
|
|
"epoch": 5.5117270788912585,
|
|
"grad_norm": 0.07290696842185718,
|
|
"learning_rate": 0.0009953127190147858,
|
|
"loss": 2.4264,
|
|
"step": 17450
|
|
},
|
|
{
|
|
"epoch": 5.513306483455737,
|
|
"grad_norm": 0.06714044171667788,
|
|
"learning_rate": 0.0009947612789721904,
|
|
"loss": 2.535,
|
|
"step": 17455
|
|
},
|
|
{
|
|
"epoch": 5.514885888020216,
|
|
"grad_norm": 0.07210098271810879,
|
|
"learning_rate": 0.0009942098405226571,
|
|
"loss": 2.5275,
|
|
"step": 17460
|
|
},
|
|
{
|
|
"epoch": 5.516465292584695,
|
|
"grad_norm": 0.08397579566338907,
|
|
"learning_rate": 0.0009936584038338742,
|
|
"loss": 2.4639,
|
|
"step": 17465
|
|
},
|
|
{
|
|
"epoch": 5.518044697149175,
|
|
"grad_norm": 0.08134901166923597,
|
|
"learning_rate": 0.00099310696907353,
|
|
"loss": 2.4538,
|
|
"step": 17470
|
|
},
|
|
{
|
|
"epoch": 5.519624101713654,
|
|
"grad_norm": 0.06500796543368963,
|
|
"learning_rate": 0.000992555536409313,
|
|
"loss": 2.4564,
|
|
"step": 17475
|
|
},
|
|
{
|
|
"epoch": 5.521203506278133,
|
|
"grad_norm": 0.07085037792573211,
|
|
"learning_rate": 0.000992004106008909,
|
|
"loss": 2.4866,
|
|
"step": 17480
|
|
},
|
|
{
|
|
"epoch": 5.522782910842612,
|
|
"grad_norm": 0.0870294260785915,
|
|
"learning_rate": 0.000991452678040006,
|
|
"loss": 2.5225,
|
|
"step": 17485
|
|
},
|
|
{
|
|
"epoch": 5.5243623154070916,
|
|
"grad_norm": 0.07228562028116617,
|
|
"learning_rate": 0.0009909012526702887,
|
|
"loss": 2.5377,
|
|
"step": 17490
|
|
},
|
|
{
|
|
"epoch": 5.525941719971571,
|
|
"grad_norm": 0.08333109488041902,
|
|
"learning_rate": 0.0009903498300674425,
|
|
"loss": 2.4739,
|
|
"step": 17495
|
|
},
|
|
{
|
|
"epoch": 5.52752112453605,
|
|
"grad_norm": 0.06330541636085896,
|
|
"learning_rate": 0.0009897984103991511,
|
|
"loss": 2.4587,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 5.529100529100529,
|
|
"grad_norm": 0.056951745501475054,
|
|
"learning_rate": 0.0009892469938330981,
|
|
"loss": 2.5458,
|
|
"step": 17505
|
|
},
|
|
{
|
|
"epoch": 5.5306799336650085,
|
|
"grad_norm": 0.05321501357293479,
|
|
"learning_rate": 0.0009886955805369654,
|
|
"loss": 2.4727,
|
|
"step": 17510
|
|
},
|
|
{
|
|
"epoch": 5.532259338229488,
|
|
"grad_norm": 0.05821182962415963,
|
|
"learning_rate": 0.0009881441706784348,
|
|
"loss": 2.488,
|
|
"step": 17515
|
|
},
|
|
{
|
|
"epoch": 5.533838742793967,
|
|
"grad_norm": 0.06260039710575123,
|
|
"learning_rate": 0.0009875927644251855,
|
|
"loss": 2.548,
|
|
"step": 17520
|
|
},
|
|
{
|
|
"epoch": 5.535418147358445,
|
|
"grad_norm": 0.0781565614405411,
|
|
"learning_rate": 0.0009870413619448976,
|
|
"loss": 2.4291,
|
|
"step": 17525
|
|
},
|
|
{
|
|
"epoch": 5.536997551922925,
|
|
"grad_norm": 0.08756626675054255,
|
|
"learning_rate": 0.0009864899634052487,
|
|
"loss": 2.4711,
|
|
"step": 17530
|
|
},
|
|
{
|
|
"epoch": 5.538576956487404,
|
|
"grad_norm": 0.06891074352458357,
|
|
"learning_rate": 0.0009859385689739157,
|
|
"loss": 2.5283,
|
|
"step": 17535
|
|
},
|
|
{
|
|
"epoch": 5.540156361051883,
|
|
"grad_norm": 0.09753062283035774,
|
|
"learning_rate": 0.0009853871788185742,
|
|
"loss": 2.5116,
|
|
"step": 17540
|
|
},
|
|
{
|
|
"epoch": 5.541735765616362,
|
|
"grad_norm": 0.07846124506397864,
|
|
"learning_rate": 0.0009848357931068977,
|
|
"loss": 2.4321,
|
|
"step": 17545
|
|
},
|
|
{
|
|
"epoch": 5.543315170180842,
|
|
"grad_norm": 0.06235575304439498,
|
|
"learning_rate": 0.0009842844120065601,
|
|
"loss": 2.4801,
|
|
"step": 17550
|
|
},
|
|
{
|
|
"epoch": 5.544894574745321,
|
|
"grad_norm": 0.06137945296351546,
|
|
"learning_rate": 0.0009837330356852324,
|
|
"loss": 2.5149,
|
|
"step": 17555
|
|
},
|
|
{
|
|
"epoch": 5.5464739793098,
|
|
"grad_norm": 0.06121198130376255,
|
|
"learning_rate": 0.0009831816643105845,
|
|
"loss": 2.4464,
|
|
"step": 17560
|
|
},
|
|
{
|
|
"epoch": 5.548053383874279,
|
|
"grad_norm": 0.06269174896054455,
|
|
"learning_rate": 0.0009826302980502852,
|
|
"loss": 2.4166,
|
|
"step": 17565
|
|
},
|
|
{
|
|
"epoch": 5.549632788438759,
|
|
"grad_norm": 0.05876432486725242,
|
|
"learning_rate": 0.0009820789370720007,
|
|
"loss": 2.4987,
|
|
"step": 17570
|
|
},
|
|
{
|
|
"epoch": 5.551212193003238,
|
|
"grad_norm": 0.06401956237757549,
|
|
"learning_rate": 0.0009815275815433975,
|
|
"loss": 2.469,
|
|
"step": 17575
|
|
},
|
|
{
|
|
"epoch": 5.552791597567717,
|
|
"grad_norm": 0.060370186784724854,
|
|
"learning_rate": 0.0009809762316321388,
|
|
"loss": 2.6298,
|
|
"step": 17580
|
|
},
|
|
{
|
|
"epoch": 5.554371002132196,
|
|
"grad_norm": 0.0738976588830285,
|
|
"learning_rate": 0.0009804248875058862,
|
|
"loss": 2.4293,
|
|
"step": 17585
|
|
},
|
|
{
|
|
"epoch": 5.555950406696676,
|
|
"grad_norm": 0.05452476583067413,
|
|
"learning_rate": 0.0009798735493323004,
|
|
"loss": 2.4461,
|
|
"step": 17590
|
|
},
|
|
{
|
|
"epoch": 5.557529811261155,
|
|
"grad_norm": 0.0642538767603594,
|
|
"learning_rate": 0.0009793222172790395,
|
|
"loss": 2.4149,
|
|
"step": 17595
|
|
},
|
|
{
|
|
"epoch": 5.559109215825634,
|
|
"grad_norm": 0.06065766885817183,
|
|
"learning_rate": 0.0009787708915137603,
|
|
"loss": 2.4054,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 5.560688620390113,
|
|
"grad_norm": 0.06602400522764282,
|
|
"learning_rate": 0.0009782195722041174,
|
|
"loss": 2.5549,
|
|
"step": 17605
|
|
},
|
|
{
|
|
"epoch": 5.562268024954593,
|
|
"grad_norm": 0.06339566592751954,
|
|
"learning_rate": 0.000977668259517763,
|
|
"loss": 2.5543,
|
|
"step": 17610
|
|
},
|
|
{
|
|
"epoch": 5.563847429519071,
|
|
"grad_norm": 0.062141568435096384,
|
|
"learning_rate": 0.000977116953622349,
|
|
"loss": 2.5201,
|
|
"step": 17615
|
|
},
|
|
{
|
|
"epoch": 5.56542683408355,
|
|
"grad_norm": 0.05748031048120429,
|
|
"learning_rate": 0.0009765656546855226,
|
|
"loss": 2.4949,
|
|
"step": 17620
|
|
},
|
|
{
|
|
"epoch": 5.5670062386480295,
|
|
"grad_norm": 0.05976838766209523,
|
|
"learning_rate": 0.0009760143628749312,
|
|
"loss": 2.4857,
|
|
"step": 17625
|
|
},
|
|
{
|
|
"epoch": 5.568585643212509,
|
|
"grad_norm": 0.052765602810347645,
|
|
"learning_rate": 0.0009754630783582188,
|
|
"loss": 2.5324,
|
|
"step": 17630
|
|
},
|
|
{
|
|
"epoch": 5.570165047776988,
|
|
"grad_norm": 0.05883039123763326,
|
|
"learning_rate": 0.000974911801303027,
|
|
"loss": 2.4809,
|
|
"step": 17635
|
|
},
|
|
{
|
|
"epoch": 5.571744452341467,
|
|
"grad_norm": 0.08782768304510996,
|
|
"learning_rate": 0.0009743605318769967,
|
|
"loss": 2.4721,
|
|
"step": 17640
|
|
},
|
|
{
|
|
"epoch": 5.5733238569059464,
|
|
"grad_norm": 0.05907416732092804,
|
|
"learning_rate": 0.0009738092702477646,
|
|
"loss": 2.4432,
|
|
"step": 17645
|
|
},
|
|
{
|
|
"epoch": 5.574903261470426,
|
|
"grad_norm": 0.0685264295057429,
|
|
"learning_rate": 0.0009732580165829662,
|
|
"loss": 2.4322,
|
|
"step": 17650
|
|
},
|
|
{
|
|
"epoch": 5.576482666034905,
|
|
"grad_norm": 0.05612377288970562,
|
|
"learning_rate": 0.0009727067710502341,
|
|
"loss": 2.5235,
|
|
"step": 17655
|
|
},
|
|
{
|
|
"epoch": 5.578062070599384,
|
|
"grad_norm": 0.05649579334506815,
|
|
"learning_rate": 0.0009721555338171982,
|
|
"loss": 2.5793,
|
|
"step": 17660
|
|
},
|
|
{
|
|
"epoch": 5.579641475163863,
|
|
"grad_norm": 0.0651808114232073,
|
|
"learning_rate": 0.0009716043050514869,
|
|
"loss": 2.4136,
|
|
"step": 17665
|
|
},
|
|
{
|
|
"epoch": 5.581220879728343,
|
|
"grad_norm": 0.06795332757210341,
|
|
"learning_rate": 0.0009710530849207249,
|
|
"loss": 2.4253,
|
|
"step": 17670
|
|
},
|
|
{
|
|
"epoch": 5.582800284292822,
|
|
"grad_norm": 0.07165329380615267,
|
|
"learning_rate": 0.0009705018735925349,
|
|
"loss": 2.4546,
|
|
"step": 17675
|
|
},
|
|
{
|
|
"epoch": 5.584379688857301,
|
|
"grad_norm": 0.08186143079488234,
|
|
"learning_rate": 0.0009699506712345368,
|
|
"loss": 2.5271,
|
|
"step": 17680
|
|
},
|
|
{
|
|
"epoch": 5.5859590934217795,
|
|
"grad_norm": 0.06961513474365384,
|
|
"learning_rate": 0.0009693994780143473,
|
|
"loss": 2.4857,
|
|
"step": 17685
|
|
},
|
|
{
|
|
"epoch": 5.587538497986259,
|
|
"grad_norm": 0.07662382334266588,
|
|
"learning_rate": 0.0009688482940995813,
|
|
"loss": 2.4929,
|
|
"step": 17690
|
|
},
|
|
{
|
|
"epoch": 5.589117902550738,
|
|
"grad_norm": 0.07356845960705671,
|
|
"learning_rate": 0.00096829711965785,
|
|
"loss": 2.4273,
|
|
"step": 17695
|
|
},
|
|
{
|
|
"epoch": 5.590697307115217,
|
|
"grad_norm": 0.060151403946030704,
|
|
"learning_rate": 0.0009677459548567617,
|
|
"loss": 2.4488,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 5.5922767116796965,
|
|
"grad_norm": 0.07285569534142783,
|
|
"learning_rate": 0.0009671947998639228,
|
|
"loss": 2.4341,
|
|
"step": 17705
|
|
},
|
|
{
|
|
"epoch": 5.593856116244176,
|
|
"grad_norm": 0.08477060368378804,
|
|
"learning_rate": 0.0009666436548469354,
|
|
"loss": 2.4338,
|
|
"step": 17710
|
|
},
|
|
{
|
|
"epoch": 5.595435520808655,
|
|
"grad_norm": 0.08023375374829808,
|
|
"learning_rate": 0.0009660925199733996,
|
|
"loss": 2.4462,
|
|
"step": 17715
|
|
},
|
|
{
|
|
"epoch": 5.597014925373134,
|
|
"grad_norm": 0.055123349431664234,
|
|
"learning_rate": 0.000965541395410912,
|
|
"loss": 2.4051,
|
|
"step": 17720
|
|
},
|
|
{
|
|
"epoch": 5.5985943299376135,
|
|
"grad_norm": 0.06205980066098349,
|
|
"learning_rate": 0.0009649902813270655,
|
|
"loss": 2.3988,
|
|
"step": 17725
|
|
},
|
|
{
|
|
"epoch": 5.600173734502093,
|
|
"grad_norm": 0.05587189952928736,
|
|
"learning_rate": 0.000964439177889451,
|
|
"loss": 2.4597,
|
|
"step": 17730
|
|
},
|
|
{
|
|
"epoch": 5.601753139066572,
|
|
"grad_norm": 0.057582250498120324,
|
|
"learning_rate": 0.0009638880852656552,
|
|
"loss": 2.5108,
|
|
"step": 17735
|
|
},
|
|
{
|
|
"epoch": 5.603332543631051,
|
|
"grad_norm": 0.06339550672783038,
|
|
"learning_rate": 0.0009633370036232622,
|
|
"loss": 2.5168,
|
|
"step": 17740
|
|
},
|
|
{
|
|
"epoch": 5.6049119481955305,
|
|
"grad_norm": 0.057733986727466144,
|
|
"learning_rate": 0.0009627859331298521,
|
|
"loss": 2.4336,
|
|
"step": 17745
|
|
},
|
|
{
|
|
"epoch": 5.60649135276001,
|
|
"grad_norm": 0.06390094290114952,
|
|
"learning_rate": 0.0009622348739530016,
|
|
"loss": 2.5104,
|
|
"step": 17750
|
|
},
|
|
{
|
|
"epoch": 5.608070757324489,
|
|
"grad_norm": 0.08318532157702971,
|
|
"learning_rate": 0.000961683826260285,
|
|
"loss": 2.3577,
|
|
"step": 17755
|
|
},
|
|
{
|
|
"epoch": 5.609650161888968,
|
|
"grad_norm": 0.07191966723564622,
|
|
"learning_rate": 0.0009611327902192718,
|
|
"loss": 2.5667,
|
|
"step": 17760
|
|
},
|
|
{
|
|
"epoch": 5.6112295664534475,
|
|
"grad_norm": 0.06433803397010941,
|
|
"learning_rate": 0.000960581765997529,
|
|
"loss": 2.4523,
|
|
"step": 17765
|
|
},
|
|
{
|
|
"epoch": 5.612808971017926,
|
|
"grad_norm": 0.0702119826574869,
|
|
"learning_rate": 0.0009600307537626193,
|
|
"loss": 2.4192,
|
|
"step": 17770
|
|
},
|
|
{
|
|
"epoch": 5.614388375582405,
|
|
"grad_norm": 0.07572920151883997,
|
|
"learning_rate": 0.0009594797536821018,
|
|
"loss": 2.406,
|
|
"step": 17775
|
|
},
|
|
{
|
|
"epoch": 5.615967780146884,
|
|
"grad_norm": 0.06286998282516897,
|
|
"learning_rate": 0.0009589287659235326,
|
|
"loss": 2.4889,
|
|
"step": 17780
|
|
},
|
|
{
|
|
"epoch": 5.617547184711364,
|
|
"grad_norm": 0.06618826119811236,
|
|
"learning_rate": 0.0009583777906544627,
|
|
"loss": 2.5588,
|
|
"step": 17785
|
|
},
|
|
{
|
|
"epoch": 5.619126589275843,
|
|
"grad_norm": 0.0859103412752953,
|
|
"learning_rate": 0.0009578268280424413,
|
|
"loss": 2.3832,
|
|
"step": 17790
|
|
},
|
|
{
|
|
"epoch": 5.620705993840322,
|
|
"grad_norm": 0.07137853429429822,
|
|
"learning_rate": 0.000957275878255012,
|
|
"loss": 2.5166,
|
|
"step": 17795
|
|
},
|
|
{
|
|
"epoch": 5.622285398404801,
|
|
"grad_norm": 0.06758649234475232,
|
|
"learning_rate": 0.0009567249414597148,
|
|
"loss": 2.4675,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 5.623864802969281,
|
|
"grad_norm": 0.059088756328290484,
|
|
"learning_rate": 0.0009561740178240868,
|
|
"loss": 2.4409,
|
|
"step": 17805
|
|
},
|
|
{
|
|
"epoch": 5.62544420753376,
|
|
"grad_norm": 0.08041593622491673,
|
|
"learning_rate": 0.0009556231075156598,
|
|
"loss": 2.4645,
|
|
"step": 17810
|
|
},
|
|
{
|
|
"epoch": 5.627023612098239,
|
|
"grad_norm": 0.05685364383901346,
|
|
"learning_rate": 0.000955072210701962,
|
|
"loss": 2.5194,
|
|
"step": 17815
|
|
},
|
|
{
|
|
"epoch": 5.628603016662718,
|
|
"grad_norm": 0.06871867375902899,
|
|
"learning_rate": 0.0009545213275505182,
|
|
"loss": 2.4839,
|
|
"step": 17820
|
|
},
|
|
{
|
|
"epoch": 5.630182421227198,
|
|
"grad_norm": 0.08478486893781242,
|
|
"learning_rate": 0.0009539704582288479,
|
|
"loss": 2.4521,
|
|
"step": 17825
|
|
},
|
|
{
|
|
"epoch": 5.631761825791677,
|
|
"grad_norm": 0.06389375572876728,
|
|
"learning_rate": 0.0009534196029044676,
|
|
"loss": 2.4364,
|
|
"step": 17830
|
|
},
|
|
{
|
|
"epoch": 5.633341230356156,
|
|
"grad_norm": 0.06457230801716601,
|
|
"learning_rate": 0.0009528687617448882,
|
|
"loss": 2.4594,
|
|
"step": 17835
|
|
},
|
|
{
|
|
"epoch": 5.634920634920634,
|
|
"grad_norm": 0.058486007512611106,
|
|
"learning_rate": 0.0009523179349176169,
|
|
"loss": 2.4242,
|
|
"step": 17840
|
|
},
|
|
{
|
|
"epoch": 5.636500039485114,
|
|
"grad_norm": 0.0594000856430798,
|
|
"learning_rate": 0.0009517671225901574,
|
|
"loss": 2.307,
|
|
"step": 17845
|
|
},
|
|
{
|
|
"epoch": 5.638079444049593,
|
|
"grad_norm": 0.05988223693010041,
|
|
"learning_rate": 0.0009512163249300074,
|
|
"loss": 2.4774,
|
|
"step": 17850
|
|
},
|
|
{
|
|
"epoch": 5.639658848614072,
|
|
"grad_norm": 0.06761927429607896,
|
|
"learning_rate": 0.0009506655421046616,
|
|
"loss": 2.5086,
|
|
"step": 17855
|
|
},
|
|
{
|
|
"epoch": 5.641238253178551,
|
|
"grad_norm": 0.06698682955923135,
|
|
"learning_rate": 0.0009501147742816093,
|
|
"loss": 2.4664,
|
|
"step": 17860
|
|
},
|
|
{
|
|
"epoch": 5.642817657743031,
|
|
"grad_norm": 0.06694964877617479,
|
|
"learning_rate": 0.0009495640216283352,
|
|
"loss": 2.4891,
|
|
"step": 17865
|
|
},
|
|
{
|
|
"epoch": 5.64439706230751,
|
|
"grad_norm": 0.06878856138544699,
|
|
"learning_rate": 0.0009490132843123201,
|
|
"loss": 2.5097,
|
|
"step": 17870
|
|
},
|
|
{
|
|
"epoch": 5.645976466871989,
|
|
"grad_norm": 0.060872884646230065,
|
|
"learning_rate": 0.0009484625625010388,
|
|
"loss": 2.4395,
|
|
"step": 17875
|
|
},
|
|
{
|
|
"epoch": 5.647555871436468,
|
|
"grad_norm": 0.06669403635612096,
|
|
"learning_rate": 0.0009479118563619636,
|
|
"loss": 2.4011,
|
|
"step": 17880
|
|
},
|
|
{
|
|
"epoch": 5.649135276000948,
|
|
"grad_norm": 0.06080545042600969,
|
|
"learning_rate": 0.00094736116606256,
|
|
"loss": 2.4199,
|
|
"step": 17885
|
|
},
|
|
{
|
|
"epoch": 5.650714680565427,
|
|
"grad_norm": 0.05368013530662863,
|
|
"learning_rate": 0.0009468104917702894,
|
|
"loss": 2.4389,
|
|
"step": 17890
|
|
},
|
|
{
|
|
"epoch": 5.652294085129906,
|
|
"grad_norm": 0.06946420227942268,
|
|
"learning_rate": 0.0009462598336526086,
|
|
"loss": 2.5133,
|
|
"step": 17895
|
|
},
|
|
{
|
|
"epoch": 5.653873489694385,
|
|
"grad_norm": 0.06366908445615306,
|
|
"learning_rate": 0.0009457091918769685,
|
|
"loss": 2.4904,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 5.655452894258865,
|
|
"grad_norm": 0.07362315822428503,
|
|
"learning_rate": 0.0009451585666108167,
|
|
"loss": 2.5063,
|
|
"step": 17905
|
|
},
|
|
{
|
|
"epoch": 5.657032298823344,
|
|
"grad_norm": 0.09533561009131834,
|
|
"learning_rate": 0.0009446079580215945,
|
|
"loss": 2.4746,
|
|
"step": 17910
|
|
},
|
|
{
|
|
"epoch": 5.658611703387823,
|
|
"grad_norm": 0.06077729252304897,
|
|
"learning_rate": 0.0009440573662767381,
|
|
"loss": 2.5094,
|
|
"step": 17915
|
|
},
|
|
{
|
|
"epoch": 5.660191107952302,
|
|
"grad_norm": 0.0662968301309586,
|
|
"learning_rate": 0.0009435067915436794,
|
|
"loss": 2.3658,
|
|
"step": 17920
|
|
},
|
|
{
|
|
"epoch": 5.661770512516782,
|
|
"grad_norm": 0.0769241530947481,
|
|
"learning_rate": 0.0009429562339898445,
|
|
"loss": 2.5339,
|
|
"step": 17925
|
|
},
|
|
{
|
|
"epoch": 5.66334991708126,
|
|
"grad_norm": 0.06592696376918891,
|
|
"learning_rate": 0.0009424056937826538,
|
|
"loss": 2.5045,
|
|
"step": 17930
|
|
},
|
|
{
|
|
"epoch": 5.664929321645739,
|
|
"grad_norm": 0.08464207015232636,
|
|
"learning_rate": 0.0009418551710895242,
|
|
"loss": 2.5892,
|
|
"step": 17935
|
|
},
|
|
{
|
|
"epoch": 5.6665087262102185,
|
|
"grad_norm": 0.07033232531022446,
|
|
"learning_rate": 0.0009413046660778654,
|
|
"loss": 2.4835,
|
|
"step": 17940
|
|
},
|
|
{
|
|
"epoch": 5.668088130774698,
|
|
"grad_norm": 0.06718718776882328,
|
|
"learning_rate": 0.0009407541789150828,
|
|
"loss": 2.455,
|
|
"step": 17945
|
|
},
|
|
{
|
|
"epoch": 5.669667535339177,
|
|
"grad_norm": 0.07220635578147377,
|
|
"learning_rate": 0.0009402037097685759,
|
|
"loss": 2.4157,
|
|
"step": 17950
|
|
},
|
|
{
|
|
"epoch": 5.671246939903656,
|
|
"grad_norm": 0.07669643712845939,
|
|
"learning_rate": 0.0009396532588057384,
|
|
"loss": 2.5414,
|
|
"step": 17955
|
|
},
|
|
{
|
|
"epoch": 5.6728263444681355,
|
|
"grad_norm": 0.06481064584496879,
|
|
"learning_rate": 0.0009391028261939597,
|
|
"loss": 2.4669,
|
|
"step": 17960
|
|
},
|
|
{
|
|
"epoch": 5.674405749032615,
|
|
"grad_norm": 0.05562313944754461,
|
|
"learning_rate": 0.000938552412100622,
|
|
"loss": 2.428,
|
|
"step": 17965
|
|
},
|
|
{
|
|
"epoch": 5.675985153597094,
|
|
"grad_norm": 0.060502393132632275,
|
|
"learning_rate": 0.0009380020166931036,
|
|
"loss": 2.4722,
|
|
"step": 17970
|
|
},
|
|
{
|
|
"epoch": 5.677564558161573,
|
|
"grad_norm": 0.06509747628332405,
|
|
"learning_rate": 0.0009374516401387759,
|
|
"loss": 2.5836,
|
|
"step": 17975
|
|
},
|
|
{
|
|
"epoch": 5.6791439627260525,
|
|
"grad_norm": 0.05353349416140083,
|
|
"learning_rate": 0.0009369012826050045,
|
|
"loss": 2.4497,
|
|
"step": 17980
|
|
},
|
|
{
|
|
"epoch": 5.680723367290532,
|
|
"grad_norm": 0.08962014160900758,
|
|
"learning_rate": 0.0009363509442591501,
|
|
"loss": 2.4346,
|
|
"step": 17985
|
|
},
|
|
{
|
|
"epoch": 5.682302771855011,
|
|
"grad_norm": 0.0562749201074952,
|
|
"learning_rate": 0.0009358006252685666,
|
|
"loss": 2.4374,
|
|
"step": 17990
|
|
},
|
|
{
|
|
"epoch": 5.68388217641949,
|
|
"grad_norm": 0.05772870332111024,
|
|
"learning_rate": 0.0009352503258006031,
|
|
"loss": 2.4886,
|
|
"step": 17995
|
|
},
|
|
{
|
|
"epoch": 5.685461580983969,
|
|
"grad_norm": 0.056923818553177526,
|
|
"learning_rate": 0.0009347000460226019,
|
|
"loss": 2.3856,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 5.687040985548448,
|
|
"grad_norm": 0.05729033792146132,
|
|
"learning_rate": 0.0009341497861018992,
|
|
"loss": 2.48,
|
|
"step": 18005
|
|
},
|
|
{
|
|
"epoch": 5.688620390112927,
|
|
"grad_norm": 0.06981635363934481,
|
|
"learning_rate": 0.000933599546205826,
|
|
"loss": 2.4828,
|
|
"step": 18010
|
|
},
|
|
{
|
|
"epoch": 5.690199794677406,
|
|
"grad_norm": 0.05164015352664017,
|
|
"learning_rate": 0.0009330493265017061,
|
|
"loss": 2.4892,
|
|
"step": 18015
|
|
},
|
|
{
|
|
"epoch": 5.691779199241886,
|
|
"grad_norm": 0.057934213648398486,
|
|
"learning_rate": 0.0009324991271568588,
|
|
"loss": 2.4577,
|
|
"step": 18020
|
|
},
|
|
{
|
|
"epoch": 5.693358603806365,
|
|
"grad_norm": 0.05694147473041426,
|
|
"learning_rate": 0.0009319489483385955,
|
|
"loss": 2.4117,
|
|
"step": 18025
|
|
},
|
|
{
|
|
"epoch": 5.694938008370844,
|
|
"grad_norm": 0.05096467033027819,
|
|
"learning_rate": 0.0009313987902142222,
|
|
"loss": 2.4195,
|
|
"step": 18030
|
|
},
|
|
{
|
|
"epoch": 5.696517412935323,
|
|
"grad_norm": 0.057477673552068995,
|
|
"learning_rate": 0.0009308486529510386,
|
|
"loss": 2.3689,
|
|
"step": 18035
|
|
},
|
|
{
|
|
"epoch": 5.698096817499803,
|
|
"grad_norm": 0.06798699127392135,
|
|
"learning_rate": 0.0009302985367163379,
|
|
"loss": 2.4072,
|
|
"step": 18040
|
|
},
|
|
{
|
|
"epoch": 5.699676222064282,
|
|
"grad_norm": 0.05563695899204988,
|
|
"learning_rate": 0.0009297484416774066,
|
|
"loss": 2.4454,
|
|
"step": 18045
|
|
},
|
|
{
|
|
"epoch": 5.701255626628761,
|
|
"grad_norm": 0.09863742171806429,
|
|
"learning_rate": 0.0009291983680015254,
|
|
"loss": 2.4635,
|
|
"step": 18050
|
|
},
|
|
{
|
|
"epoch": 5.70283503119324,
|
|
"grad_norm": 0.0628812994958859,
|
|
"learning_rate": 0.0009286483158559679,
|
|
"loss": 2.3879,
|
|
"step": 18055
|
|
},
|
|
{
|
|
"epoch": 5.70441443575772,
|
|
"grad_norm": 0.0638576863793566,
|
|
"learning_rate": 0.0009280982854080021,
|
|
"loss": 2.4721,
|
|
"step": 18060
|
|
},
|
|
{
|
|
"epoch": 5.705993840322199,
|
|
"grad_norm": 0.05476592238086014,
|
|
"learning_rate": 0.0009275482768248881,
|
|
"loss": 2.4724,
|
|
"step": 18065
|
|
},
|
|
{
|
|
"epoch": 5.707573244886678,
|
|
"grad_norm": 0.0725197139200165,
|
|
"learning_rate": 0.0009269982902738802,
|
|
"loss": 2.6608,
|
|
"step": 18070
|
|
},
|
|
{
|
|
"epoch": 5.709152649451157,
|
|
"grad_norm": 0.06572976807566827,
|
|
"learning_rate": 0.0009264483259222259,
|
|
"loss": 2.4306,
|
|
"step": 18075
|
|
},
|
|
{
|
|
"epoch": 5.7107320540156365,
|
|
"grad_norm": 0.0681474422557826,
|
|
"learning_rate": 0.0009258983839371655,
|
|
"loss": 2.4726,
|
|
"step": 18080
|
|
},
|
|
{
|
|
"epoch": 5.712311458580115,
|
|
"grad_norm": 0.06744277166816749,
|
|
"learning_rate": 0.0009253484644859332,
|
|
"loss": 2.4281,
|
|
"step": 18085
|
|
},
|
|
{
|
|
"epoch": 5.713890863144594,
|
|
"grad_norm": 0.07761470290397195,
|
|
"learning_rate": 0.0009247985677357562,
|
|
"loss": 2.4706,
|
|
"step": 18090
|
|
},
|
|
{
|
|
"epoch": 5.715470267709073,
|
|
"grad_norm": 0.057970051991407166,
|
|
"learning_rate": 0.000924248693853854,
|
|
"loss": 2.4427,
|
|
"step": 18095
|
|
},
|
|
{
|
|
"epoch": 5.717049672273553,
|
|
"grad_norm": 0.08015945925405381,
|
|
"learning_rate": 0.0009236988430074401,
|
|
"loss": 2.4668,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 5.718629076838032,
|
|
"grad_norm": 0.06550079456203349,
|
|
"learning_rate": 0.0009231490153637202,
|
|
"loss": 2.5061,
|
|
"step": 18105
|
|
},
|
|
{
|
|
"epoch": 5.720208481402511,
|
|
"grad_norm": 0.0668134312212946,
|
|
"learning_rate": 0.0009225992110898941,
|
|
"loss": 2.411,
|
|
"step": 18110
|
|
},
|
|
{
|
|
"epoch": 5.72178788596699,
|
|
"grad_norm": 0.10618086040739694,
|
|
"learning_rate": 0.0009220494303531534,
|
|
"loss": 2.4976,
|
|
"step": 18115
|
|
},
|
|
{
|
|
"epoch": 5.72336729053147,
|
|
"grad_norm": 0.10057209914764205,
|
|
"learning_rate": 0.0009214996733206826,
|
|
"loss": 2.4485,
|
|
"step": 18120
|
|
},
|
|
{
|
|
"epoch": 5.724946695095949,
|
|
"grad_norm": 0.09196840925031985,
|
|
"learning_rate": 0.00092094994015966,
|
|
"loss": 2.4374,
|
|
"step": 18125
|
|
},
|
|
{
|
|
"epoch": 5.726526099660428,
|
|
"grad_norm": 0.08473209885560502,
|
|
"learning_rate": 0.000920400231037255,
|
|
"loss": 2.5291,
|
|
"step": 18130
|
|
},
|
|
{
|
|
"epoch": 5.728105504224907,
|
|
"grad_norm": 0.07124496206061115,
|
|
"learning_rate": 0.0009198505461206318,
|
|
"loss": 2.4709,
|
|
"step": 18135
|
|
},
|
|
{
|
|
"epoch": 5.729684908789387,
|
|
"grad_norm": 0.06020663786883897,
|
|
"learning_rate": 0.0009193008855769452,
|
|
"loss": 2.4731,
|
|
"step": 18140
|
|
},
|
|
{
|
|
"epoch": 5.731264313353866,
|
|
"grad_norm": 0.04977407449166733,
|
|
"learning_rate": 0.0009187512495733432,
|
|
"loss": 2.4211,
|
|
"step": 18145
|
|
},
|
|
{
|
|
"epoch": 5.732843717918345,
|
|
"grad_norm": 0.0545026125366248,
|
|
"learning_rate": 0.0009182016382769676,
|
|
"loss": 2.5419,
|
|
"step": 18150
|
|
},
|
|
{
|
|
"epoch": 5.7344231224828235,
|
|
"grad_norm": 0.05529547977282229,
|
|
"learning_rate": 0.0009176520518549512,
|
|
"loss": 2.4584,
|
|
"step": 18155
|
|
},
|
|
{
|
|
"epoch": 5.736002527047303,
|
|
"grad_norm": 0.053943009477869834,
|
|
"learning_rate": 0.0009171024904744195,
|
|
"loss": 2.3883,
|
|
"step": 18160
|
|
},
|
|
{
|
|
"epoch": 5.737581931611782,
|
|
"grad_norm": 0.0785572884148887,
|
|
"learning_rate": 0.0009165529543024909,
|
|
"loss": 2.4702,
|
|
"step": 18165
|
|
},
|
|
{
|
|
"epoch": 5.739161336176261,
|
|
"grad_norm": 0.06983162962876663,
|
|
"learning_rate": 0.0009160034435062755,
|
|
"loss": 2.5775,
|
|
"step": 18170
|
|
},
|
|
{
|
|
"epoch": 5.7407407407407405,
|
|
"grad_norm": 0.07317556418866439,
|
|
"learning_rate": 0.0009154539582528766,
|
|
"loss": 2.4661,
|
|
"step": 18175
|
|
},
|
|
{
|
|
"epoch": 5.74232014530522,
|
|
"grad_norm": 0.05152038341836128,
|
|
"learning_rate": 0.0009149044987093887,
|
|
"loss": 2.4231,
|
|
"step": 18180
|
|
},
|
|
{
|
|
"epoch": 5.743899549869699,
|
|
"grad_norm": 0.06470217308342688,
|
|
"learning_rate": 0.000914355065042899,
|
|
"loss": 2.3834,
|
|
"step": 18185
|
|
},
|
|
{
|
|
"epoch": 5.745478954434178,
|
|
"grad_norm": 0.05801791502540421,
|
|
"learning_rate": 0.0009138056574204869,
|
|
"loss": 2.4993,
|
|
"step": 18190
|
|
},
|
|
{
|
|
"epoch": 5.7470583589986575,
|
|
"grad_norm": 0.049807297826352216,
|
|
"learning_rate": 0.0009132562760092234,
|
|
"loss": 2.3345,
|
|
"step": 18195
|
|
},
|
|
{
|
|
"epoch": 5.748637763563137,
|
|
"grad_norm": 0.05176149164337185,
|
|
"learning_rate": 0.0009127069209761725,
|
|
"loss": 2.3969,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 5.750217168127616,
|
|
"grad_norm": 0.05540983496219784,
|
|
"learning_rate": 0.0009121575924883891,
|
|
"loss": 2.5172,
|
|
"step": 18205
|
|
},
|
|
{
|
|
"epoch": 5.751796572692095,
|
|
"grad_norm": 0.07062737019904822,
|
|
"learning_rate": 0.0009116082907129204,
|
|
"loss": 2.4267,
|
|
"step": 18210
|
|
},
|
|
{
|
|
"epoch": 5.7533759772565745,
|
|
"grad_norm": 0.05713448525959423,
|
|
"learning_rate": 0.0009110590158168061,
|
|
"loss": 2.4438,
|
|
"step": 18215
|
|
},
|
|
{
|
|
"epoch": 5.754955381821054,
|
|
"grad_norm": 0.07565938761972303,
|
|
"learning_rate": 0.0009105097679670763,
|
|
"loss": 2.5628,
|
|
"step": 18220
|
|
},
|
|
{
|
|
"epoch": 5.756534786385533,
|
|
"grad_norm": 0.07144974280431203,
|
|
"learning_rate": 0.000909960547330755,
|
|
"loss": 2.4112,
|
|
"step": 18225
|
|
},
|
|
{
|
|
"epoch": 5.758114190950012,
|
|
"grad_norm": 0.08004658227388475,
|
|
"learning_rate": 0.0009094113540748556,
|
|
"loss": 2.4569,
|
|
"step": 18230
|
|
},
|
|
{
|
|
"epoch": 5.7596935955144914,
|
|
"grad_norm": 0.06325848747050856,
|
|
"learning_rate": 0.0009088621883663843,
|
|
"loss": 2.473,
|
|
"step": 18235
|
|
},
|
|
{
|
|
"epoch": 5.761273000078971,
|
|
"grad_norm": 0.0658128971018369,
|
|
"learning_rate": 0.0009083130503723397,
|
|
"loss": 2.4052,
|
|
"step": 18240
|
|
},
|
|
{
|
|
"epoch": 5.762852404643449,
|
|
"grad_norm": 0.07131593128764628,
|
|
"learning_rate": 0.0009077639402597104,
|
|
"loss": 2.4612,
|
|
"step": 18245
|
|
},
|
|
{
|
|
"epoch": 5.764431809207928,
|
|
"grad_norm": 0.05815984418376559,
|
|
"learning_rate": 0.0009072148581954777,
|
|
"loss": 2.445,
|
|
"step": 18250
|
|
},
|
|
{
|
|
"epoch": 5.7660112137724076,
|
|
"grad_norm": 0.06696390988086089,
|
|
"learning_rate": 0.000906665804346614,
|
|
"loss": 2.3816,
|
|
"step": 18255
|
|
},
|
|
{
|
|
"epoch": 5.767590618336887,
|
|
"grad_norm": 0.06072912908978546,
|
|
"learning_rate": 0.0009061167788800824,
|
|
"loss": 2.4931,
|
|
"step": 18260
|
|
},
|
|
{
|
|
"epoch": 5.769170022901366,
|
|
"grad_norm": 0.054937370178212874,
|
|
"learning_rate": 0.0009055677819628388,
|
|
"loss": 2.4928,
|
|
"step": 18265
|
|
},
|
|
{
|
|
"epoch": 5.770749427465845,
|
|
"grad_norm": 0.06507034650605066,
|
|
"learning_rate": 0.0009050188137618295,
|
|
"loss": 2.3613,
|
|
"step": 18270
|
|
},
|
|
{
|
|
"epoch": 5.7723288320303245,
|
|
"grad_norm": 0.05901245015000263,
|
|
"learning_rate": 0.0009044698744439918,
|
|
"loss": 2.4248,
|
|
"step": 18275
|
|
},
|
|
{
|
|
"epoch": 5.773908236594804,
|
|
"grad_norm": 0.05660425661902344,
|
|
"learning_rate": 0.0009039209641762551,
|
|
"loss": 2.4165,
|
|
"step": 18280
|
|
},
|
|
{
|
|
"epoch": 5.775487641159283,
|
|
"grad_norm": 0.06049866366270378,
|
|
"learning_rate": 0.000903372083125539,
|
|
"loss": 2.4223,
|
|
"step": 18285
|
|
},
|
|
{
|
|
"epoch": 5.777067045723762,
|
|
"grad_norm": 0.060957914090694364,
|
|
"learning_rate": 0.0009028232314587555,
|
|
"loss": 2.4436,
|
|
"step": 18290
|
|
},
|
|
{
|
|
"epoch": 5.7786464502882415,
|
|
"grad_norm": 0.053613679381113895,
|
|
"learning_rate": 0.0009022744093428063,
|
|
"loss": 2.5136,
|
|
"step": 18295
|
|
},
|
|
{
|
|
"epoch": 5.780225854852721,
|
|
"grad_norm": 0.05835608721294925,
|
|
"learning_rate": 0.0009017256169445846,
|
|
"loss": 2.4515,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 5.7818052594172,
|
|
"grad_norm": 0.07079845036186541,
|
|
"learning_rate": 0.0009011768544309751,
|
|
"loss": 2.3503,
|
|
"step": 18305
|
|
},
|
|
{
|
|
"epoch": 5.783384663981679,
|
|
"grad_norm": 0.0652389566633875,
|
|
"learning_rate": 0.0009006281219688525,
|
|
"loss": 2.4271,
|
|
"step": 18310
|
|
},
|
|
{
|
|
"epoch": 5.784964068546158,
|
|
"grad_norm": 0.06330923838643326,
|
|
"learning_rate": 0.0009000794197250837,
|
|
"loss": 2.376,
|
|
"step": 18315
|
|
},
|
|
{
|
|
"epoch": 5.786543473110637,
|
|
"grad_norm": 0.06052683005905966,
|
|
"learning_rate": 0.0008995307478665246,
|
|
"loss": 2.4239,
|
|
"step": 18320
|
|
},
|
|
{
|
|
"epoch": 5.788122877675116,
|
|
"grad_norm": 0.06390808341534668,
|
|
"learning_rate": 0.000898982106560023,
|
|
"loss": 2.3723,
|
|
"step": 18325
|
|
},
|
|
{
|
|
"epoch": 5.789702282239595,
|
|
"grad_norm": 0.06987840184368298,
|
|
"learning_rate": 0.0008984334959724177,
|
|
"loss": 2.5108,
|
|
"step": 18330
|
|
},
|
|
{
|
|
"epoch": 5.791281686804075,
|
|
"grad_norm": 0.05559648943533013,
|
|
"learning_rate": 0.0008978849162705369,
|
|
"loss": 2.4836,
|
|
"step": 18335
|
|
},
|
|
{
|
|
"epoch": 5.792861091368554,
|
|
"grad_norm": 0.059576872584581095,
|
|
"learning_rate": 0.000897336367621201,
|
|
"loss": 2.5455,
|
|
"step": 18340
|
|
},
|
|
{
|
|
"epoch": 5.794440495933033,
|
|
"grad_norm": 0.06256260950604808,
|
|
"learning_rate": 0.0008967878501912199,
|
|
"loss": 2.4057,
|
|
"step": 18345
|
|
},
|
|
{
|
|
"epoch": 5.796019900497512,
|
|
"grad_norm": 0.0634177189700817,
|
|
"learning_rate": 0.0008962393641473935,
|
|
"loss": 2.4772,
|
|
"step": 18350
|
|
},
|
|
{
|
|
"epoch": 5.797599305061992,
|
|
"grad_norm": 0.061569379281350534,
|
|
"learning_rate": 0.000895690909656514,
|
|
"loss": 2.4593,
|
|
"step": 18355
|
|
},
|
|
{
|
|
"epoch": 5.799178709626471,
|
|
"grad_norm": 0.07243432736085856,
|
|
"learning_rate": 0.0008951424868853622,
|
|
"loss": 2.4282,
|
|
"step": 18360
|
|
},
|
|
{
|
|
"epoch": 5.80075811419095,
|
|
"grad_norm": 0.08064467427424583,
|
|
"learning_rate": 0.0008945940960007105,
|
|
"loss": 2.4573,
|
|
"step": 18365
|
|
},
|
|
{
|
|
"epoch": 5.802337518755429,
|
|
"grad_norm": 0.05480200407215235,
|
|
"learning_rate": 0.0008940457371693207,
|
|
"loss": 2.4848,
|
|
"step": 18370
|
|
},
|
|
{
|
|
"epoch": 5.803916923319909,
|
|
"grad_norm": 0.05697400727478003,
|
|
"learning_rate": 0.0008934974105579448,
|
|
"loss": 2.3826,
|
|
"step": 18375
|
|
},
|
|
{
|
|
"epoch": 5.805496327884388,
|
|
"grad_norm": 0.060818124238185944,
|
|
"learning_rate": 0.0008929491163333263,
|
|
"loss": 2.4257,
|
|
"step": 18380
|
|
},
|
|
{
|
|
"epoch": 5.807075732448867,
|
|
"grad_norm": 0.05427907215517275,
|
|
"learning_rate": 0.0008924008546621977,
|
|
"loss": 2.5639,
|
|
"step": 18385
|
|
},
|
|
{
|
|
"epoch": 5.808655137013346,
|
|
"grad_norm": 0.06286923950584318,
|
|
"learning_rate": 0.0008918526257112813,
|
|
"loss": 2.4455,
|
|
"step": 18390
|
|
},
|
|
{
|
|
"epoch": 5.810234541577826,
|
|
"grad_norm": 0.05475356012061112,
|
|
"learning_rate": 0.0008913044296472907,
|
|
"loss": 2.3673,
|
|
"step": 18395
|
|
},
|
|
{
|
|
"epoch": 5.811813946142305,
|
|
"grad_norm": 0.09123960529197869,
|
|
"learning_rate": 0.0008907562666369283,
|
|
"loss": 2.4212,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 5.813393350706783,
|
|
"grad_norm": 0.06160750325735122,
|
|
"learning_rate": 0.0008902081368468877,
|
|
"loss": 2.4737,
|
|
"step": 18405
|
|
},
|
|
{
|
|
"epoch": 5.8149727552712625,
|
|
"grad_norm": 0.06042864285383974,
|
|
"learning_rate": 0.0008896600404438512,
|
|
"loss": 2.4076,
|
|
"step": 18410
|
|
},
|
|
{
|
|
"epoch": 5.816552159835742,
|
|
"grad_norm": 0.062130405319900335,
|
|
"learning_rate": 0.0008891119775944908,
|
|
"loss": 2.4252,
|
|
"step": 18415
|
|
},
|
|
{
|
|
"epoch": 5.818131564400221,
|
|
"grad_norm": 0.052148374032821625,
|
|
"learning_rate": 0.0008885639484654701,
|
|
"loss": 2.3184,
|
|
"step": 18420
|
|
},
|
|
{
|
|
"epoch": 5.8197109689647,
|
|
"grad_norm": 0.07799748157479257,
|
|
"learning_rate": 0.0008880159532234403,
|
|
"loss": 2.4437,
|
|
"step": 18425
|
|
},
|
|
{
|
|
"epoch": 5.821290373529179,
|
|
"grad_norm": 0.06354087937293497,
|
|
"learning_rate": 0.000887467992035044,
|
|
"loss": 2.3482,
|
|
"step": 18430
|
|
},
|
|
{
|
|
"epoch": 5.822869778093659,
|
|
"grad_norm": 0.06776266791706592,
|
|
"learning_rate": 0.0008869200650669123,
|
|
"loss": 2.4484,
|
|
"step": 18435
|
|
},
|
|
{
|
|
"epoch": 5.824449182658138,
|
|
"grad_norm": 0.06986149822627638,
|
|
"learning_rate": 0.0008863721724856658,
|
|
"loss": 2.4601,
|
|
"step": 18440
|
|
},
|
|
{
|
|
"epoch": 5.826028587222617,
|
|
"grad_norm": 0.0636207961069725,
|
|
"learning_rate": 0.0008858243144579162,
|
|
"loss": 2.5333,
|
|
"step": 18445
|
|
},
|
|
{
|
|
"epoch": 5.827607991787096,
|
|
"grad_norm": 0.05633739964135409,
|
|
"learning_rate": 0.0008852764911502629,
|
|
"loss": 2.4579,
|
|
"step": 18450
|
|
},
|
|
{
|
|
"epoch": 5.829187396351576,
|
|
"grad_norm": 0.07580112598710743,
|
|
"learning_rate": 0.0008847287027292959,
|
|
"loss": 2.4581,
|
|
"step": 18455
|
|
},
|
|
{
|
|
"epoch": 5.830766800916055,
|
|
"grad_norm": 0.0584897564914947,
|
|
"learning_rate": 0.0008841809493615937,
|
|
"loss": 2.4064,
|
|
"step": 18460
|
|
},
|
|
{
|
|
"epoch": 5.832346205480534,
|
|
"grad_norm": 0.05670195848256627,
|
|
"learning_rate": 0.0008836332312137245,
|
|
"loss": 2.4902,
|
|
"step": 18465
|
|
},
|
|
{
|
|
"epoch": 5.8339256100450125,
|
|
"grad_norm": 0.06261669554059426,
|
|
"learning_rate": 0.0008830855484522467,
|
|
"loss": 2.3873,
|
|
"step": 18470
|
|
},
|
|
{
|
|
"epoch": 5.835505014609492,
|
|
"grad_norm": 0.05497956371246285,
|
|
"learning_rate": 0.0008825379012437065,
|
|
"loss": 2.3732,
|
|
"step": 18475
|
|
},
|
|
{
|
|
"epoch": 5.837084419173971,
|
|
"grad_norm": 0.058994243345202174,
|
|
"learning_rate": 0.0008819902897546399,
|
|
"loss": 2.44,
|
|
"step": 18480
|
|
},
|
|
{
|
|
"epoch": 5.83866382373845,
|
|
"grad_norm": 0.05702003258862989,
|
|
"learning_rate": 0.0008814427141515724,
|
|
"loss": 2.4532,
|
|
"step": 18485
|
|
},
|
|
{
|
|
"epoch": 5.8402432283029295,
|
|
"grad_norm": 0.054639243278408696,
|
|
"learning_rate": 0.0008808951746010176,
|
|
"loss": 2.4148,
|
|
"step": 18490
|
|
},
|
|
{
|
|
"epoch": 5.841822632867409,
|
|
"grad_norm": 0.06417166303687594,
|
|
"learning_rate": 0.0008803476712694799,
|
|
"loss": 2.5821,
|
|
"step": 18495
|
|
},
|
|
{
|
|
"epoch": 5.843402037431888,
|
|
"grad_norm": 0.06281475530121881,
|
|
"learning_rate": 0.0008798002043234507,
|
|
"loss": 2.4995,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 5.844981441996367,
|
|
"grad_norm": 0.05850353517807507,
|
|
"learning_rate": 0.0008792527739294109,
|
|
"loss": 2.5369,
|
|
"step": 18505
|
|
},
|
|
{
|
|
"epoch": 5.8465608465608465,
|
|
"grad_norm": 0.06933387569229595,
|
|
"learning_rate": 0.0008787053802538315,
|
|
"loss": 2.5295,
|
|
"step": 18510
|
|
},
|
|
{
|
|
"epoch": 5.848140251125326,
|
|
"grad_norm": 0.05310284438947429,
|
|
"learning_rate": 0.0008781580234631707,
|
|
"loss": 2.436,
|
|
"step": 18515
|
|
},
|
|
{
|
|
"epoch": 5.849719655689805,
|
|
"grad_norm": 0.05225047738231209,
|
|
"learning_rate": 0.0008776107037238768,
|
|
"loss": 2.4643,
|
|
"step": 18520
|
|
},
|
|
{
|
|
"epoch": 5.851299060254284,
|
|
"grad_norm": 0.06069573506210318,
|
|
"learning_rate": 0.000877063421202386,
|
|
"loss": 2.5021,
|
|
"step": 18525
|
|
},
|
|
{
|
|
"epoch": 5.8528784648187635,
|
|
"grad_norm": 0.06725734461704204,
|
|
"learning_rate": 0.0008765161760651228,
|
|
"loss": 2.448,
|
|
"step": 18530
|
|
},
|
|
{
|
|
"epoch": 5.854457869383243,
|
|
"grad_norm": 0.06705519985775174,
|
|
"learning_rate": 0.000875968968478502,
|
|
"loss": 2.4865,
|
|
"step": 18535
|
|
},
|
|
{
|
|
"epoch": 5.856037273947722,
|
|
"grad_norm": 0.06879518724372008,
|
|
"learning_rate": 0.0008754217986089252,
|
|
"loss": 2.544,
|
|
"step": 18540
|
|
},
|
|
{
|
|
"epoch": 5.857616678512201,
|
|
"grad_norm": 0.06783341102034925,
|
|
"learning_rate": 0.0008748746666227837,
|
|
"loss": 2.4572,
|
|
"step": 18545
|
|
},
|
|
{
|
|
"epoch": 5.8591960830766805,
|
|
"grad_norm": 0.06202947189676426,
|
|
"learning_rate": 0.0008743275726864567,
|
|
"loss": 2.5136,
|
|
"step": 18550
|
|
},
|
|
{
|
|
"epoch": 5.86077548764116,
|
|
"grad_norm": 0.06858077927005182,
|
|
"learning_rate": 0.0008737805169663113,
|
|
"loss": 2.5023,
|
|
"step": 18555
|
|
},
|
|
{
|
|
"epoch": 5.862354892205638,
|
|
"grad_norm": 0.07833625418904053,
|
|
"learning_rate": 0.0008732334996287048,
|
|
"loss": 2.4177,
|
|
"step": 18560
|
|
},
|
|
{
|
|
"epoch": 5.863934296770117,
|
|
"grad_norm": 0.06708193322427108,
|
|
"learning_rate": 0.000872686520839981,
|
|
"loss": 2.4835,
|
|
"step": 18565
|
|
},
|
|
{
|
|
"epoch": 5.865513701334597,
|
|
"grad_norm": 0.06777483951535948,
|
|
"learning_rate": 0.000872139580766473,
|
|
"loss": 2.3961,
|
|
"step": 18570
|
|
},
|
|
{
|
|
"epoch": 5.867093105899076,
|
|
"grad_norm": 0.05897782997028614,
|
|
"learning_rate": 0.0008715926795745013,
|
|
"loss": 2.4551,
|
|
"step": 18575
|
|
},
|
|
{
|
|
"epoch": 5.868672510463555,
|
|
"grad_norm": 0.05648603893901046,
|
|
"learning_rate": 0.000871045817430375,
|
|
"loss": 2.4382,
|
|
"step": 18580
|
|
},
|
|
{
|
|
"epoch": 5.870251915028034,
|
|
"grad_norm": 0.058115341169490256,
|
|
"learning_rate": 0.0008704989945003925,
|
|
"loss": 2.521,
|
|
"step": 18585
|
|
},
|
|
{
|
|
"epoch": 5.871831319592514,
|
|
"grad_norm": 0.058398666016500775,
|
|
"learning_rate": 0.0008699522109508381,
|
|
"loss": 2.5097,
|
|
"step": 18590
|
|
},
|
|
{
|
|
"epoch": 5.873410724156993,
|
|
"grad_norm": 0.05148611445163005,
|
|
"learning_rate": 0.0008694054669479849,
|
|
"loss": 2.4309,
|
|
"step": 18595
|
|
},
|
|
{
|
|
"epoch": 5.874990128721472,
|
|
"grad_norm": 0.05600237584439396,
|
|
"learning_rate": 0.0008688587626580953,
|
|
"loss": 2.4497,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 5.876569533285951,
|
|
"grad_norm": 0.055734415645769995,
|
|
"learning_rate": 0.000868312098247418,
|
|
"loss": 2.4417,
|
|
"step": 18605
|
|
},
|
|
{
|
|
"epoch": 5.878148937850431,
|
|
"grad_norm": 0.049748014401075,
|
|
"learning_rate": 0.0008677654738821904,
|
|
"loss": 2.4085,
|
|
"step": 18610
|
|
},
|
|
{
|
|
"epoch": 5.87972834241491,
|
|
"grad_norm": 0.05825774563921929,
|
|
"learning_rate": 0.0008672188897286372,
|
|
"loss": 2.4668,
|
|
"step": 18615
|
|
},
|
|
{
|
|
"epoch": 5.881307746979389,
|
|
"grad_norm": 0.057123010762611504,
|
|
"learning_rate": 0.000866672345952971,
|
|
"loss": 2.4945,
|
|
"step": 18620
|
|
},
|
|
{
|
|
"epoch": 5.882887151543868,
|
|
"grad_norm": 0.07302755947184687,
|
|
"learning_rate": 0.0008661258427213929,
|
|
"loss": 2.5048,
|
|
"step": 18625
|
|
},
|
|
{
|
|
"epoch": 5.884466556108347,
|
|
"grad_norm": 0.06749009836319973,
|
|
"learning_rate": 0.0008655793802000904,
|
|
"loss": 2.4409,
|
|
"step": 18630
|
|
},
|
|
{
|
|
"epoch": 5.886045960672826,
|
|
"grad_norm": 0.060230827309487846,
|
|
"learning_rate": 0.0008650329585552399,
|
|
"loss": 2.4291,
|
|
"step": 18635
|
|
},
|
|
{
|
|
"epoch": 5.887625365237305,
|
|
"grad_norm": 0.07520235279964461,
|
|
"learning_rate": 0.0008644865779530043,
|
|
"loss": 2.4758,
|
|
"step": 18640
|
|
},
|
|
{
|
|
"epoch": 5.889204769801784,
|
|
"grad_norm": 0.06257962555596458,
|
|
"learning_rate": 0.0008639402385595341,
|
|
"loss": 2.3305,
|
|
"step": 18645
|
|
},
|
|
{
|
|
"epoch": 5.890784174366264,
|
|
"grad_norm": 0.05010892685311434,
|
|
"learning_rate": 0.0008633939405409684,
|
|
"loss": 2.4512,
|
|
"step": 18650
|
|
},
|
|
{
|
|
"epoch": 5.892363578930743,
|
|
"grad_norm": 0.05780345477019086,
|
|
"learning_rate": 0.0008628476840634326,
|
|
"loss": 2.3788,
|
|
"step": 18655
|
|
},
|
|
{
|
|
"epoch": 5.893942983495222,
|
|
"grad_norm": 0.07516486107590714,
|
|
"learning_rate": 0.0008623014692930398,
|
|
"loss": 2.4626,
|
|
"step": 18660
|
|
},
|
|
{
|
|
"epoch": 5.895522388059701,
|
|
"grad_norm": 0.07086214642004705,
|
|
"learning_rate": 0.0008617552963958903,
|
|
"loss": 2.4828,
|
|
"step": 18665
|
|
},
|
|
{
|
|
"epoch": 5.897101792624181,
|
|
"grad_norm": 0.06525854181400412,
|
|
"learning_rate": 0.0008612091655380717,
|
|
"loss": 2.4876,
|
|
"step": 18670
|
|
},
|
|
{
|
|
"epoch": 5.89868119718866,
|
|
"grad_norm": 0.06473701514578614,
|
|
"learning_rate": 0.0008606630768856596,
|
|
"loss": 2.4663,
|
|
"step": 18675
|
|
},
|
|
{
|
|
"epoch": 5.900260601753139,
|
|
"grad_norm": 0.11143040114895895,
|
|
"learning_rate": 0.000860117030604715,
|
|
"loss": 2.4441,
|
|
"step": 18680
|
|
},
|
|
{
|
|
"epoch": 5.901840006317618,
|
|
"grad_norm": 0.08641539861758633,
|
|
"learning_rate": 0.0008595710268612881,
|
|
"loss": 2.4274,
|
|
"step": 18685
|
|
},
|
|
{
|
|
"epoch": 5.903419410882098,
|
|
"grad_norm": 0.07521524967995398,
|
|
"learning_rate": 0.0008590250658214147,
|
|
"loss": 2.5591,
|
|
"step": 18690
|
|
},
|
|
{
|
|
"epoch": 5.904998815446577,
|
|
"grad_norm": 0.0536980553622514,
|
|
"learning_rate": 0.0008584791476511178,
|
|
"loss": 2.4091,
|
|
"step": 18695
|
|
},
|
|
{
|
|
"epoch": 5.906578220011056,
|
|
"grad_norm": 0.05573623517051879,
|
|
"learning_rate": 0.0008579332725164082,
|
|
"loss": 2.4902,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 5.908157624575535,
|
|
"grad_norm": 0.05927694066712838,
|
|
"learning_rate": 0.0008573874405832827,
|
|
"loss": 2.6475,
|
|
"step": 18705
|
|
},
|
|
{
|
|
"epoch": 5.909737029140015,
|
|
"grad_norm": 0.07787533712544467,
|
|
"learning_rate": 0.0008568416520177248,
|
|
"loss": 2.444,
|
|
"step": 18710
|
|
},
|
|
{
|
|
"epoch": 5.911316433704494,
|
|
"grad_norm": 0.07580753073276318,
|
|
"learning_rate": 0.0008562959069857063,
|
|
"loss": 2.4167,
|
|
"step": 18715
|
|
},
|
|
{
|
|
"epoch": 5.912895838268972,
|
|
"grad_norm": 0.08533835749634594,
|
|
"learning_rate": 0.0008557502056531843,
|
|
"loss": 2.3843,
|
|
"step": 18720
|
|
},
|
|
{
|
|
"epoch": 5.9144752428334515,
|
|
"grad_norm": 0.06919443483491658,
|
|
"learning_rate": 0.0008552045481861033,
|
|
"loss": 2.438,
|
|
"step": 18725
|
|
},
|
|
{
|
|
"epoch": 5.916054647397931,
|
|
"grad_norm": 0.08056489336824202,
|
|
"learning_rate": 0.000854658934750394,
|
|
"loss": 2.4123,
|
|
"step": 18730
|
|
},
|
|
{
|
|
"epoch": 5.91763405196241,
|
|
"grad_norm": 0.05431615132944134,
|
|
"learning_rate": 0.0008541133655119736,
|
|
"loss": 2.4532,
|
|
"step": 18735
|
|
},
|
|
{
|
|
"epoch": 5.919213456526889,
|
|
"grad_norm": 0.07022673657018344,
|
|
"learning_rate": 0.0008535678406367471,
|
|
"loss": 2.6008,
|
|
"step": 18740
|
|
},
|
|
{
|
|
"epoch": 5.9207928610913685,
|
|
"grad_norm": 0.08312006482231832,
|
|
"learning_rate": 0.0008530223602906045,
|
|
"loss": 2.3474,
|
|
"step": 18745
|
|
},
|
|
{
|
|
"epoch": 5.922372265655848,
|
|
"grad_norm": 0.07106942527735756,
|
|
"learning_rate": 0.0008524769246394232,
|
|
"loss": 2.5367,
|
|
"step": 18750
|
|
},
|
|
{
|
|
"epoch": 5.923951670220327,
|
|
"grad_norm": 0.054139765634404945,
|
|
"learning_rate": 0.0008519315338490666,
|
|
"loss": 2.4478,
|
|
"step": 18755
|
|
},
|
|
{
|
|
"epoch": 5.925531074784806,
|
|
"grad_norm": 0.05964575416082492,
|
|
"learning_rate": 0.0008513861880853842,
|
|
"loss": 2.4661,
|
|
"step": 18760
|
|
},
|
|
{
|
|
"epoch": 5.9271104793492855,
|
|
"grad_norm": 0.06845435248458885,
|
|
"learning_rate": 0.000850840887514213,
|
|
"loss": 2.4567,
|
|
"step": 18765
|
|
},
|
|
{
|
|
"epoch": 5.928689883913765,
|
|
"grad_norm": 0.06691190090607119,
|
|
"learning_rate": 0.0008502956323013742,
|
|
"loss": 2.4561,
|
|
"step": 18770
|
|
},
|
|
{
|
|
"epoch": 5.930269288478244,
|
|
"grad_norm": 0.05742529746381336,
|
|
"learning_rate": 0.0008497504226126776,
|
|
"loss": 2.334,
|
|
"step": 18775
|
|
},
|
|
{
|
|
"epoch": 5.931848693042723,
|
|
"grad_norm": 0.053918374898314594,
|
|
"learning_rate": 0.0008492052586139176,
|
|
"loss": 2.414,
|
|
"step": 18780
|
|
},
|
|
{
|
|
"epoch": 5.9334280976072025,
|
|
"grad_norm": 0.06588285090390467,
|
|
"learning_rate": 0.0008486601404708748,
|
|
"loss": 2.4861,
|
|
"step": 18785
|
|
},
|
|
{
|
|
"epoch": 5.935007502171681,
|
|
"grad_norm": 0.06592636654504706,
|
|
"learning_rate": 0.0008481150683493165,
|
|
"loss": 2.5156,
|
|
"step": 18790
|
|
},
|
|
{
|
|
"epoch": 5.93658690673616,
|
|
"grad_norm": 0.06268926063718763,
|
|
"learning_rate": 0.000847570042414995,
|
|
"loss": 2.4163,
|
|
"step": 18795
|
|
},
|
|
{
|
|
"epoch": 5.938166311300639,
|
|
"grad_norm": 0.05884941027164353,
|
|
"learning_rate": 0.0008470250628336502,
|
|
"loss": 2.5446,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 5.939745715865119,
|
|
"grad_norm": 0.05866739879678812,
|
|
"learning_rate": 0.0008464801297710065,
|
|
"loss": 2.4007,
|
|
"step": 18805
|
|
},
|
|
{
|
|
"epoch": 5.941325120429598,
|
|
"grad_norm": 0.05844027310580414,
|
|
"learning_rate": 0.0008459352433927742,
|
|
"loss": 2.4184,
|
|
"step": 18810
|
|
},
|
|
{
|
|
"epoch": 5.942904524994077,
|
|
"grad_norm": 0.061943910450837116,
|
|
"learning_rate": 0.0008453904038646502,
|
|
"loss": 2.4625,
|
|
"step": 18815
|
|
},
|
|
{
|
|
"epoch": 5.944483929558556,
|
|
"grad_norm": 0.10225139082580605,
|
|
"learning_rate": 0.0008448456113523165,
|
|
"loss": 2.4255,
|
|
"step": 18820
|
|
},
|
|
{
|
|
"epoch": 5.946063334123036,
|
|
"grad_norm": 0.06259875674488143,
|
|
"learning_rate": 0.0008443008660214407,
|
|
"loss": 2.4698,
|
|
"step": 18825
|
|
},
|
|
{
|
|
"epoch": 5.947642738687515,
|
|
"grad_norm": 0.07473013511729105,
|
|
"learning_rate": 0.0008437561680376773,
|
|
"loss": 2.4987,
|
|
"step": 18830
|
|
},
|
|
{
|
|
"epoch": 5.949222143251994,
|
|
"grad_norm": 0.10129259076352053,
|
|
"learning_rate": 0.0008432115175666646,
|
|
"loss": 2.4721,
|
|
"step": 18835
|
|
},
|
|
{
|
|
"epoch": 5.950801547816473,
|
|
"grad_norm": 0.0699598712898237,
|
|
"learning_rate": 0.000842666914774028,
|
|
"loss": 2.4591,
|
|
"step": 18840
|
|
},
|
|
{
|
|
"epoch": 5.9523809523809526,
|
|
"grad_norm": 0.0997115556126127,
|
|
"learning_rate": 0.0008421223598253772,
|
|
"loss": 2.4802,
|
|
"step": 18845
|
|
},
|
|
{
|
|
"epoch": 5.953960356945432,
|
|
"grad_norm": 0.05240629597794155,
|
|
"learning_rate": 0.0008415778528863077,
|
|
"loss": 2.4607,
|
|
"step": 18850
|
|
},
|
|
{
|
|
"epoch": 5.955539761509911,
|
|
"grad_norm": 0.0600795497830552,
|
|
"learning_rate": 0.0008410333941224016,
|
|
"loss": 2.4358,
|
|
"step": 18855
|
|
},
|
|
{
|
|
"epoch": 5.95711916607439,
|
|
"grad_norm": 0.07904368495608866,
|
|
"learning_rate": 0.0008404889836992241,
|
|
"loss": 2.4552,
|
|
"step": 18860
|
|
},
|
|
{
|
|
"epoch": 5.9586985706388695,
|
|
"grad_norm": 0.07204247109410876,
|
|
"learning_rate": 0.0008399446217823279,
|
|
"loss": 2.3968,
|
|
"step": 18865
|
|
},
|
|
{
|
|
"epoch": 5.960277975203349,
|
|
"grad_norm": 0.05683872136202642,
|
|
"learning_rate": 0.0008394003085372496,
|
|
"loss": 2.4381,
|
|
"step": 18870
|
|
},
|
|
{
|
|
"epoch": 5.961857379767827,
|
|
"grad_norm": 0.08522386567809322,
|
|
"learning_rate": 0.0008388560441295112,
|
|
"loss": 2.4795,
|
|
"step": 18875
|
|
},
|
|
{
|
|
"epoch": 5.963436784332306,
|
|
"grad_norm": 0.05579606099491831,
|
|
"learning_rate": 0.0008383118287246203,
|
|
"loss": 2.4209,
|
|
"step": 18880
|
|
},
|
|
{
|
|
"epoch": 5.965016188896786,
|
|
"grad_norm": 0.07102608009357568,
|
|
"learning_rate": 0.0008377676624880687,
|
|
"loss": 2.3678,
|
|
"step": 18885
|
|
},
|
|
{
|
|
"epoch": 5.966595593461265,
|
|
"grad_norm": 0.053367495741105525,
|
|
"learning_rate": 0.0008372235455853352,
|
|
"loss": 2.4579,
|
|
"step": 18890
|
|
},
|
|
{
|
|
"epoch": 5.968174998025744,
|
|
"grad_norm": 0.0604002108347439,
|
|
"learning_rate": 0.0008366794781818812,
|
|
"loss": 2.4009,
|
|
"step": 18895
|
|
},
|
|
{
|
|
"epoch": 5.969754402590223,
|
|
"grad_norm": 0.07157847356370406,
|
|
"learning_rate": 0.0008361354604431543,
|
|
"loss": 2.5228,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 5.971333807154703,
|
|
"grad_norm": 0.059942760889739315,
|
|
"learning_rate": 0.0008355914925345871,
|
|
"loss": 2.3524,
|
|
"step": 18905
|
|
},
|
|
{
|
|
"epoch": 5.972913211719182,
|
|
"grad_norm": 0.066784733945027,
|
|
"learning_rate": 0.0008350475746215962,
|
|
"loss": 2.4295,
|
|
"step": 18910
|
|
},
|
|
{
|
|
"epoch": 5.974492616283661,
|
|
"grad_norm": 0.07602771233408008,
|
|
"learning_rate": 0.0008345037068695844,
|
|
"loss": 2.3876,
|
|
"step": 18915
|
|
},
|
|
{
|
|
"epoch": 5.97607202084814,
|
|
"grad_norm": 0.06951439257665362,
|
|
"learning_rate": 0.0008339598894439379,
|
|
"loss": 2.4417,
|
|
"step": 18920
|
|
},
|
|
{
|
|
"epoch": 5.97765142541262,
|
|
"grad_norm": 0.07068684262286003,
|
|
"learning_rate": 0.0008334161225100279,
|
|
"loss": 2.4353,
|
|
"step": 18925
|
|
},
|
|
{
|
|
"epoch": 5.979230829977099,
|
|
"grad_norm": 0.05926446479911698,
|
|
"learning_rate": 0.0008328724062332109,
|
|
"loss": 2.4387,
|
|
"step": 18930
|
|
},
|
|
{
|
|
"epoch": 5.980810234541578,
|
|
"grad_norm": 0.05714291646293212,
|
|
"learning_rate": 0.0008323287407788275,
|
|
"loss": 2.4758,
|
|
"step": 18935
|
|
},
|
|
{
|
|
"epoch": 5.982389639106057,
|
|
"grad_norm": 0.06065052122055392,
|
|
"learning_rate": 0.0008317851263122023,
|
|
"loss": 2.4815,
|
|
"step": 18940
|
|
},
|
|
{
|
|
"epoch": 5.983969043670536,
|
|
"grad_norm": 0.050265879107395724,
|
|
"learning_rate": 0.000831241562998646,
|
|
"loss": 2.4395,
|
|
"step": 18945
|
|
},
|
|
{
|
|
"epoch": 5.985548448235015,
|
|
"grad_norm": 0.08877890773804512,
|
|
"learning_rate": 0.0008306980510034514,
|
|
"loss": 2.4496,
|
|
"step": 18950
|
|
},
|
|
{
|
|
"epoch": 5.987127852799494,
|
|
"grad_norm": 0.09088066285243378,
|
|
"learning_rate": 0.0008301545904918985,
|
|
"loss": 2.4562,
|
|
"step": 18955
|
|
},
|
|
{
|
|
"epoch": 5.9887072573639735,
|
|
"grad_norm": 0.07212537493855416,
|
|
"learning_rate": 0.0008296111816292494,
|
|
"loss": 2.3848,
|
|
"step": 18960
|
|
},
|
|
{
|
|
"epoch": 5.990286661928453,
|
|
"grad_norm": 0.09530438007539307,
|
|
"learning_rate": 0.0008290678245807509,
|
|
"loss": 2.4474,
|
|
"step": 18965
|
|
},
|
|
{
|
|
"epoch": 5.991866066492932,
|
|
"grad_norm": 0.06269179023551791,
|
|
"learning_rate": 0.0008285245195116351,
|
|
"loss": 2.4097,
|
|
"step": 18970
|
|
},
|
|
{
|
|
"epoch": 5.993445471057411,
|
|
"grad_norm": 0.05876585534017766,
|
|
"learning_rate": 0.0008279812665871171,
|
|
"loss": 2.4617,
|
|
"step": 18975
|
|
},
|
|
{
|
|
"epoch": 5.9950248756218905,
|
|
"grad_norm": 0.05600082094942641,
|
|
"learning_rate": 0.000827438065972397,
|
|
"loss": 2.5398,
|
|
"step": 18980
|
|
},
|
|
{
|
|
"epoch": 5.99660428018637,
|
|
"grad_norm": 0.06387783912326771,
|
|
"learning_rate": 0.0008268949178326588,
|
|
"loss": 2.4397,
|
|
"step": 18985
|
|
},
|
|
{
|
|
"epoch": 5.998183684750849,
|
|
"grad_norm": 0.0661590272942373,
|
|
"learning_rate": 0.0008263518223330697,
|
|
"loss": 2.4274,
|
|
"step": 18990
|
|
},
|
|
{
|
|
"epoch": 5.999763089315328,
|
|
"grad_norm": 0.07063174926051552,
|
|
"learning_rate": 0.0008258087796387822,
|
|
"loss": 2.5648,
|
|
"step": 18995
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"eval_loss": 2.452611207962036,
|
|
"eval_runtime": 118.4455,
|
|
"eval_samples_per_second": 22.365,
|
|
"eval_steps_per_second": 5.598,
|
|
"step": 18996
|
|
},
|
|
{
|
|
"epoch": 6.001263523651583,
|
|
"grad_norm": 0.06755485745393365,
|
|
"learning_rate": 0.0008252657899149315,
|
|
"loss": 2.4559,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 6.0028429282160625,
|
|
"grad_norm": 0.06995856223474835,
|
|
"learning_rate": 0.0008247228533266381,
|
|
"loss": 2.4591,
|
|
"step": 19005
|
|
},
|
|
{
|
|
"epoch": 6.004422332780542,
|
|
"grad_norm": 0.06036922504256234,
|
|
"learning_rate": 0.0008241799700390051,
|
|
"loss": 2.457,
|
|
"step": 19010
|
|
},
|
|
{
|
|
"epoch": 6.006001737345021,
|
|
"grad_norm": 0.07337472446882752,
|
|
"learning_rate": 0.0008236371402171197,
|
|
"loss": 2.4634,
|
|
"step": 19015
|
|
},
|
|
{
|
|
"epoch": 6.0075811419095,
|
|
"grad_norm": 0.057428936378157186,
|
|
"learning_rate": 0.0008230943640260534,
|
|
"loss": 2.5075,
|
|
"step": 19020
|
|
},
|
|
{
|
|
"epoch": 6.0091605464739795,
|
|
"grad_norm": 0.05892214975458761,
|
|
"learning_rate": 0.00082255164163086,
|
|
"loss": 2.4597,
|
|
"step": 19025
|
|
},
|
|
{
|
|
"epoch": 6.010739951038459,
|
|
"grad_norm": 0.05256753445267508,
|
|
"learning_rate": 0.0008220089731965794,
|
|
"loss": 2.4061,
|
|
"step": 19030
|
|
},
|
|
{
|
|
"epoch": 6.012319355602938,
|
|
"grad_norm": 0.06046687041545794,
|
|
"learning_rate": 0.0008214663588882328,
|
|
"loss": 2.4363,
|
|
"step": 19035
|
|
},
|
|
{
|
|
"epoch": 6.013898760167417,
|
|
"grad_norm": 0.05863844764285577,
|
|
"learning_rate": 0.0008209237988708254,
|
|
"loss": 2.392,
|
|
"step": 19040
|
|
},
|
|
{
|
|
"epoch": 6.0154781647318964,
|
|
"grad_norm": 0.05603381098865268,
|
|
"learning_rate": 0.0008203812933093469,
|
|
"loss": 2.511,
|
|
"step": 19045
|
|
},
|
|
{
|
|
"epoch": 6.017057569296376,
|
|
"grad_norm": 0.0536891131990583,
|
|
"learning_rate": 0.0008198388423687694,
|
|
"loss": 2.4732,
|
|
"step": 19050
|
|
},
|
|
{
|
|
"epoch": 6.018636973860854,
|
|
"grad_norm": 0.05725119713649916,
|
|
"learning_rate": 0.0008192964462140487,
|
|
"loss": 2.4728,
|
|
"step": 19055
|
|
},
|
|
{
|
|
"epoch": 6.020216378425333,
|
|
"grad_norm": 0.06486729711490877,
|
|
"learning_rate": 0.0008187541050101244,
|
|
"loss": 2.4254,
|
|
"step": 19060
|
|
},
|
|
{
|
|
"epoch": 6.0217957829898126,
|
|
"grad_norm": 0.05603893699604805,
|
|
"learning_rate": 0.0008182118189219183,
|
|
"loss": 2.3838,
|
|
"step": 19065
|
|
},
|
|
{
|
|
"epoch": 6.023375187554292,
|
|
"grad_norm": 0.059224961336993114,
|
|
"learning_rate": 0.0008176695881143371,
|
|
"loss": 2.4123,
|
|
"step": 19070
|
|
},
|
|
{
|
|
"epoch": 6.024954592118771,
|
|
"grad_norm": 0.05398166783902062,
|
|
"learning_rate": 0.0008171274127522692,
|
|
"loss": 2.4329,
|
|
"step": 19075
|
|
},
|
|
{
|
|
"epoch": 6.02653399668325,
|
|
"grad_norm": 0.0507008907302069,
|
|
"learning_rate": 0.0008165852930005863,
|
|
"loss": 2.459,
|
|
"step": 19080
|
|
},
|
|
{
|
|
"epoch": 6.0281134012477295,
|
|
"grad_norm": 0.04947190101267776,
|
|
"learning_rate": 0.0008160432290241443,
|
|
"loss": 2.496,
|
|
"step": 19085
|
|
},
|
|
{
|
|
"epoch": 6.029692805812209,
|
|
"grad_norm": 0.06868271629697459,
|
|
"learning_rate": 0.0008155012209877805,
|
|
"loss": 2.4549,
|
|
"step": 19090
|
|
},
|
|
{
|
|
"epoch": 6.031272210376688,
|
|
"grad_norm": 0.05516426059948859,
|
|
"learning_rate": 0.0008149592690563171,
|
|
"loss": 2.4232,
|
|
"step": 19095
|
|
},
|
|
{
|
|
"epoch": 6.032851614941167,
|
|
"grad_norm": 0.06117699149357592,
|
|
"learning_rate": 0.0008144173733945578,
|
|
"loss": 2.4359,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 6.0344310195056465,
|
|
"grad_norm": 0.05511416994441555,
|
|
"learning_rate": 0.0008138755341672892,
|
|
"loss": 2.4632,
|
|
"step": 19105
|
|
},
|
|
{
|
|
"epoch": 6.036010424070126,
|
|
"grad_norm": 0.07941509896758685,
|
|
"learning_rate": 0.0008133337515392817,
|
|
"loss": 2.3763,
|
|
"step": 19110
|
|
},
|
|
{
|
|
"epoch": 6.037589828634605,
|
|
"grad_norm": 0.08006670063910017,
|
|
"learning_rate": 0.0008127920256752873,
|
|
"loss": 2.4321,
|
|
"step": 19115
|
|
},
|
|
{
|
|
"epoch": 6.039169233199084,
|
|
"grad_norm": 0.057388186750708,
|
|
"learning_rate": 0.0008122503567400422,
|
|
"loss": 2.4903,
|
|
"step": 19120
|
|
},
|
|
{
|
|
"epoch": 6.0407486377635635,
|
|
"grad_norm": 0.058150501456731375,
|
|
"learning_rate": 0.0008117087448982643,
|
|
"loss": 2.4013,
|
|
"step": 19125
|
|
},
|
|
{
|
|
"epoch": 6.042328042328043,
|
|
"grad_norm": 0.057680383693818144,
|
|
"learning_rate": 0.0008111671903146534,
|
|
"loss": 2.4884,
|
|
"step": 19130
|
|
},
|
|
{
|
|
"epoch": 6.043907446892521,
|
|
"grad_norm": 0.06935938361387563,
|
|
"learning_rate": 0.0008106256931538938,
|
|
"loss": 2.4594,
|
|
"step": 19135
|
|
},
|
|
{
|
|
"epoch": 6.045486851457,
|
|
"grad_norm": 0.0700477907794558,
|
|
"learning_rate": 0.0008100842535806508,
|
|
"loss": 2.4168,
|
|
"step": 19140
|
|
},
|
|
{
|
|
"epoch": 6.04706625602148,
|
|
"grad_norm": 0.07296062388623109,
|
|
"learning_rate": 0.0008095428717595731,
|
|
"loss": 2.5121,
|
|
"step": 19145
|
|
},
|
|
{
|
|
"epoch": 6.048645660585959,
|
|
"grad_norm": 0.05502508806338148,
|
|
"learning_rate": 0.0008090015478552912,
|
|
"loss": 2.3913,
|
|
"step": 19150
|
|
},
|
|
{
|
|
"epoch": 6.050225065150438,
|
|
"grad_norm": 0.05750008280959948,
|
|
"learning_rate": 0.0008084602820324179,
|
|
"loss": 2.3773,
|
|
"step": 19155
|
|
},
|
|
{
|
|
"epoch": 6.051804469714917,
|
|
"grad_norm": 0.067930747346504,
|
|
"learning_rate": 0.0008079190744555495,
|
|
"loss": 2.4459,
|
|
"step": 19160
|
|
},
|
|
{
|
|
"epoch": 6.053383874279397,
|
|
"grad_norm": 0.05830080957083264,
|
|
"learning_rate": 0.0008073779252892633,
|
|
"loss": 2.3981,
|
|
"step": 19165
|
|
},
|
|
{
|
|
"epoch": 6.054963278843876,
|
|
"grad_norm": 0.07333606080282067,
|
|
"learning_rate": 0.0008068368346981191,
|
|
"loss": 2.4511,
|
|
"step": 19170
|
|
},
|
|
{
|
|
"epoch": 6.056542683408355,
|
|
"grad_norm": 0.056744436786467525,
|
|
"learning_rate": 0.0008062958028466594,
|
|
"loss": 2.4713,
|
|
"step": 19175
|
|
},
|
|
{
|
|
"epoch": 6.058122087972834,
|
|
"grad_norm": 0.057703650483882755,
|
|
"learning_rate": 0.0008057548298994082,
|
|
"loss": 2.3564,
|
|
"step": 19180
|
|
},
|
|
{
|
|
"epoch": 6.059701492537314,
|
|
"grad_norm": 0.0502145706242276,
|
|
"learning_rate": 0.0008052139160208725,
|
|
"loss": 2.4217,
|
|
"step": 19185
|
|
},
|
|
{
|
|
"epoch": 6.061280897101793,
|
|
"grad_norm": 0.053239458975068536,
|
|
"learning_rate": 0.0008046730613755404,
|
|
"loss": 2.4373,
|
|
"step": 19190
|
|
},
|
|
{
|
|
"epoch": 6.062860301666272,
|
|
"grad_norm": 0.06313006413371712,
|
|
"learning_rate": 0.0008041322661278823,
|
|
"loss": 2.5361,
|
|
"step": 19195
|
|
},
|
|
{
|
|
"epoch": 6.064439706230751,
|
|
"grad_norm": 0.0817887604251127,
|
|
"learning_rate": 0.0008035915304423506,
|
|
"loss": 2.3477,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 6.066019110795231,
|
|
"grad_norm": 0.07800824773512383,
|
|
"learning_rate": 0.0008030508544833794,
|
|
"loss": 2.4476,
|
|
"step": 19205
|
|
},
|
|
{
|
|
"epoch": 6.067598515359709,
|
|
"grad_norm": 0.05161799834997871,
|
|
"learning_rate": 0.0008025102384153853,
|
|
"loss": 2.4375,
|
|
"step": 19210
|
|
},
|
|
{
|
|
"epoch": 6.069177919924188,
|
|
"grad_norm": 0.06052198038533024,
|
|
"learning_rate": 0.0008019696824027663,
|
|
"loss": 2.4775,
|
|
"step": 19215
|
|
},
|
|
{
|
|
"epoch": 6.0707573244886675,
|
|
"grad_norm": 0.05953805064958084,
|
|
"learning_rate": 0.0008014291866099007,
|
|
"loss": 2.4548,
|
|
"step": 19220
|
|
},
|
|
{
|
|
"epoch": 6.072336729053147,
|
|
"grad_norm": 0.05988878487502156,
|
|
"learning_rate": 0.0008008887512011513,
|
|
"loss": 2.4502,
|
|
"step": 19225
|
|
},
|
|
{
|
|
"epoch": 6.073916133617626,
|
|
"grad_norm": 0.06897539510277752,
|
|
"learning_rate": 0.0008003483763408604,
|
|
"loss": 2.4402,
|
|
"step": 19230
|
|
},
|
|
{
|
|
"epoch": 6.075495538182105,
|
|
"grad_norm": 0.060776502782774085,
|
|
"learning_rate": 0.0007998080621933527,
|
|
"loss": 2.4286,
|
|
"step": 19235
|
|
},
|
|
{
|
|
"epoch": 6.077074942746584,
|
|
"grad_norm": 0.05985056424299122,
|
|
"learning_rate": 0.0007992678089229344,
|
|
"loss": 2.4727,
|
|
"step": 19240
|
|
},
|
|
{
|
|
"epoch": 6.078654347311064,
|
|
"grad_norm": 0.054963308123563426,
|
|
"learning_rate": 0.0007987276166938923,
|
|
"loss": 2.4795,
|
|
"step": 19245
|
|
},
|
|
{
|
|
"epoch": 6.080233751875543,
|
|
"grad_norm": 0.0541319804727999,
|
|
"learning_rate": 0.0007981874856704964,
|
|
"loss": 2.4764,
|
|
"step": 19250
|
|
},
|
|
{
|
|
"epoch": 6.081813156440022,
|
|
"grad_norm": 0.06476768923529816,
|
|
"learning_rate": 0.0007976474160169966,
|
|
"loss": 2.4733,
|
|
"step": 19255
|
|
},
|
|
{
|
|
"epoch": 6.083392561004501,
|
|
"grad_norm": 0.06039699006882269,
|
|
"learning_rate": 0.0007971074078976249,
|
|
"loss": 2.4292,
|
|
"step": 19260
|
|
},
|
|
{
|
|
"epoch": 6.084971965568981,
|
|
"grad_norm": 0.054339609303525015,
|
|
"learning_rate": 0.0007965674614765942,
|
|
"loss": 2.4195,
|
|
"step": 19265
|
|
},
|
|
{
|
|
"epoch": 6.08655137013346,
|
|
"grad_norm": 0.07341912919361576,
|
|
"learning_rate": 0.0007960275769180982,
|
|
"loss": 2.3928,
|
|
"step": 19270
|
|
},
|
|
{
|
|
"epoch": 6.088130774697939,
|
|
"grad_norm": 0.07088938821426911,
|
|
"learning_rate": 0.0007954877543863133,
|
|
"loss": 2.4505,
|
|
"step": 19275
|
|
},
|
|
{
|
|
"epoch": 6.089710179262418,
|
|
"grad_norm": 0.0755487703825606,
|
|
"learning_rate": 0.0007949479940453956,
|
|
"loss": 2.4253,
|
|
"step": 19280
|
|
},
|
|
{
|
|
"epoch": 6.091289583826898,
|
|
"grad_norm": 0.07742352758165552,
|
|
"learning_rate": 0.0007944082960594825,
|
|
"loss": 2.4225,
|
|
"step": 19285
|
|
},
|
|
{
|
|
"epoch": 6.092868988391376,
|
|
"grad_norm": 0.06083384088162834,
|
|
"learning_rate": 0.0007938686605926934,
|
|
"loss": 2.4465,
|
|
"step": 19290
|
|
},
|
|
{
|
|
"epoch": 6.094448392955855,
|
|
"grad_norm": 0.08586216693868441,
|
|
"learning_rate": 0.000793329087809127,
|
|
"loss": 2.461,
|
|
"step": 19295
|
|
},
|
|
{
|
|
"epoch": 6.0960277975203345,
|
|
"grad_norm": 0.10215266319373814,
|
|
"learning_rate": 0.0007927895778728651,
|
|
"loss": 2.4836,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 6.097607202084814,
|
|
"grad_norm": 0.07178187717373394,
|
|
"learning_rate": 0.0007922501309479688,
|
|
"loss": 2.5602,
|
|
"step": 19305
|
|
},
|
|
{
|
|
"epoch": 6.099186606649293,
|
|
"grad_norm": 0.054302517877230484,
|
|
"learning_rate": 0.0007917107471984798,
|
|
"loss": 2.5433,
|
|
"step": 19310
|
|
},
|
|
{
|
|
"epoch": 6.100766011213772,
|
|
"grad_norm": 0.061096824637253154,
|
|
"learning_rate": 0.0007911714267884221,
|
|
"loss": 2.4354,
|
|
"step": 19315
|
|
},
|
|
{
|
|
"epoch": 6.1023454157782515,
|
|
"grad_norm": 0.06577886495946768,
|
|
"learning_rate": 0.000790632169881799,
|
|
"loss": 2.3632,
|
|
"step": 19320
|
|
},
|
|
{
|
|
"epoch": 6.103924820342731,
|
|
"grad_norm": 0.06098322790980453,
|
|
"learning_rate": 0.0007900929766425957,
|
|
"loss": 2.5382,
|
|
"step": 19325
|
|
},
|
|
{
|
|
"epoch": 6.10550422490721,
|
|
"grad_norm": 0.05641490027858137,
|
|
"learning_rate": 0.000789553847234777,
|
|
"loss": 2.5491,
|
|
"step": 19330
|
|
},
|
|
{
|
|
"epoch": 6.107083629471689,
|
|
"grad_norm": 0.08712990033275116,
|
|
"learning_rate": 0.0007890147818222884,
|
|
"loss": 2.432,
|
|
"step": 19335
|
|
},
|
|
{
|
|
"epoch": 6.1086630340361685,
|
|
"grad_norm": 0.05542758442883438,
|
|
"learning_rate": 0.0007884757805690572,
|
|
"loss": 2.5538,
|
|
"step": 19340
|
|
},
|
|
{
|
|
"epoch": 6.110242438600648,
|
|
"grad_norm": 0.06324436303455877,
|
|
"learning_rate": 0.0007879368436389891,
|
|
"loss": 2.4013,
|
|
"step": 19345
|
|
},
|
|
{
|
|
"epoch": 6.111821843165127,
|
|
"grad_norm": 0.07341142734295356,
|
|
"learning_rate": 0.0007873979711959723,
|
|
"loss": 2.4703,
|
|
"step": 19350
|
|
},
|
|
{
|
|
"epoch": 6.113401247729606,
|
|
"grad_norm": 0.0544160751976481,
|
|
"learning_rate": 0.0007868591634038742,
|
|
"loss": 2.4309,
|
|
"step": 19355
|
|
},
|
|
{
|
|
"epoch": 6.1149806522940855,
|
|
"grad_norm": 0.08064278246626877,
|
|
"learning_rate": 0.0007863204204265422,
|
|
"loss": 2.4925,
|
|
"step": 19360
|
|
},
|
|
{
|
|
"epoch": 6.116560056858565,
|
|
"grad_norm": 0.06617168976754821,
|
|
"learning_rate": 0.0007857817424278056,
|
|
"loss": 2.409,
|
|
"step": 19365
|
|
},
|
|
{
|
|
"epoch": 6.118139461423043,
|
|
"grad_norm": 0.052907287990285504,
|
|
"learning_rate": 0.0007852431295714722,
|
|
"loss": 2.4569,
|
|
"step": 19370
|
|
},
|
|
{
|
|
"epoch": 6.119718865987522,
|
|
"grad_norm": 0.05681403097049076,
|
|
"learning_rate": 0.0007847045820213312,
|
|
"loss": 2.4856,
|
|
"step": 19375
|
|
},
|
|
{
|
|
"epoch": 6.121298270552002,
|
|
"grad_norm": 0.05434375282784585,
|
|
"learning_rate": 0.0007841660999411513,
|
|
"loss": 2.4537,
|
|
"step": 19380
|
|
},
|
|
{
|
|
"epoch": 6.122877675116481,
|
|
"grad_norm": 0.05946687736962094,
|
|
"learning_rate": 0.0007836276834946808,
|
|
"loss": 2.3871,
|
|
"step": 19385
|
|
},
|
|
{
|
|
"epoch": 6.12445707968096,
|
|
"grad_norm": 0.06053216424873867,
|
|
"learning_rate": 0.0007830893328456501,
|
|
"loss": 2.432,
|
|
"step": 19390
|
|
},
|
|
{
|
|
"epoch": 6.126036484245439,
|
|
"grad_norm": 0.07109957967941444,
|
|
"learning_rate": 0.0007825510481577671,
|
|
"loss": 2.5123,
|
|
"step": 19395
|
|
},
|
|
{
|
|
"epoch": 6.127615888809919,
|
|
"grad_norm": 0.058225125492934855,
|
|
"learning_rate": 0.0007820128295947206,
|
|
"loss": 2.4557,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 6.129195293374398,
|
|
"grad_norm": 0.052451647177555134,
|
|
"learning_rate": 0.0007814746773201804,
|
|
"loss": 2.4639,
|
|
"step": 19405
|
|
},
|
|
{
|
|
"epoch": 6.130774697938877,
|
|
"grad_norm": 0.06406265660562299,
|
|
"learning_rate": 0.0007809365914977944,
|
|
"loss": 2.4812,
|
|
"step": 19410
|
|
},
|
|
{
|
|
"epoch": 6.132354102503356,
|
|
"grad_norm": 0.06349179734314664,
|
|
"learning_rate": 0.0007803985722911915,
|
|
"loss": 2.3316,
|
|
"step": 19415
|
|
},
|
|
{
|
|
"epoch": 6.133933507067836,
|
|
"grad_norm": 0.06851849340426248,
|
|
"learning_rate": 0.0007798606198639798,
|
|
"loss": 2.4043,
|
|
"step": 19420
|
|
},
|
|
{
|
|
"epoch": 6.135512911632315,
|
|
"grad_norm": 0.05910445645432784,
|
|
"learning_rate": 0.0007793227343797464,
|
|
"loss": 2.4166,
|
|
"step": 19425
|
|
},
|
|
{
|
|
"epoch": 6.137092316196794,
|
|
"grad_norm": 0.05322964382562131,
|
|
"learning_rate": 0.00077878491600206,
|
|
"loss": 2.4909,
|
|
"step": 19430
|
|
},
|
|
{
|
|
"epoch": 6.138671720761273,
|
|
"grad_norm": 0.0568388791717399,
|
|
"learning_rate": 0.0007782471648944673,
|
|
"loss": 2.5081,
|
|
"step": 19435
|
|
},
|
|
{
|
|
"epoch": 6.140251125325753,
|
|
"grad_norm": 0.05336020271954184,
|
|
"learning_rate": 0.0007777094812204949,
|
|
"loss": 2.4632,
|
|
"step": 19440
|
|
},
|
|
{
|
|
"epoch": 6.141830529890232,
|
|
"grad_norm": 0.06311225934248536,
|
|
"learning_rate": 0.000777171865143649,
|
|
"loss": 2.4742,
|
|
"step": 19445
|
|
},
|
|
{
|
|
"epoch": 6.14340993445471,
|
|
"grad_norm": 0.08329850738087803,
|
|
"learning_rate": 0.0007766343168274149,
|
|
"loss": 2.4213,
|
|
"step": 19450
|
|
},
|
|
{
|
|
"epoch": 6.144989339019189,
|
|
"grad_norm": 0.06946732510040121,
|
|
"learning_rate": 0.0007760968364352584,
|
|
"loss": 2.4884,
|
|
"step": 19455
|
|
},
|
|
{
|
|
"epoch": 6.146568743583669,
|
|
"grad_norm": 0.08711521025253743,
|
|
"learning_rate": 0.0007755594241306231,
|
|
"loss": 2.4959,
|
|
"step": 19460
|
|
},
|
|
{
|
|
"epoch": 6.148148148148148,
|
|
"grad_norm": 0.07855381937421685,
|
|
"learning_rate": 0.0007750220800769333,
|
|
"loss": 2.3949,
|
|
"step": 19465
|
|
},
|
|
{
|
|
"epoch": 6.149727552712627,
|
|
"grad_norm": 0.06113392449692607,
|
|
"learning_rate": 0.0007744848044375912,
|
|
"loss": 2.4374,
|
|
"step": 19470
|
|
},
|
|
{
|
|
"epoch": 6.151306957277106,
|
|
"grad_norm": 0.06288587860316672,
|
|
"learning_rate": 0.000773947597375979,
|
|
"loss": 2.4297,
|
|
"step": 19475
|
|
},
|
|
{
|
|
"epoch": 6.152886361841586,
|
|
"grad_norm": 0.0653770187253421,
|
|
"learning_rate": 0.0007734104590554587,
|
|
"loss": 2.4974,
|
|
"step": 19480
|
|
},
|
|
{
|
|
"epoch": 6.154465766406065,
|
|
"grad_norm": 0.05761192000773616,
|
|
"learning_rate": 0.0007728733896393699,
|
|
"loss": 2.4836,
|
|
"step": 19485
|
|
},
|
|
{
|
|
"epoch": 6.156045170970544,
|
|
"grad_norm": 0.07031759093297552,
|
|
"learning_rate": 0.0007723363892910318,
|
|
"loss": 2.3831,
|
|
"step": 19490
|
|
},
|
|
{
|
|
"epoch": 6.157624575535023,
|
|
"grad_norm": 0.06353387610842912,
|
|
"learning_rate": 0.0007717994581737435,
|
|
"loss": 2.4409,
|
|
"step": 19495
|
|
},
|
|
{
|
|
"epoch": 6.159203980099503,
|
|
"grad_norm": 0.05250149475425236,
|
|
"learning_rate": 0.0007712625964507818,
|
|
"loss": 2.3626,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 6.160783384663982,
|
|
"grad_norm": 0.0507851420947074,
|
|
"learning_rate": 0.0007707258042854032,
|
|
"loss": 2.502,
|
|
"step": 19505
|
|
},
|
|
{
|
|
"epoch": 6.162362789228461,
|
|
"grad_norm": 0.053488995679231016,
|
|
"learning_rate": 0.0007701890818408427,
|
|
"loss": 2.4677,
|
|
"step": 19510
|
|
},
|
|
{
|
|
"epoch": 6.16394219379294,
|
|
"grad_norm": 0.08382190036423479,
|
|
"learning_rate": 0.0007696524292803137,
|
|
"loss": 2.5104,
|
|
"step": 19515
|
|
},
|
|
{
|
|
"epoch": 6.16552159835742,
|
|
"grad_norm": 0.06078629464656898,
|
|
"learning_rate": 0.0007691158467670096,
|
|
"loss": 2.3855,
|
|
"step": 19520
|
|
},
|
|
{
|
|
"epoch": 6.167101002921898,
|
|
"grad_norm": 0.06442201467040917,
|
|
"learning_rate": 0.0007685793344641012,
|
|
"loss": 2.2965,
|
|
"step": 19525
|
|
},
|
|
{
|
|
"epoch": 6.168680407486377,
|
|
"grad_norm": 0.04867253028066429,
|
|
"learning_rate": 0.0007680428925347386,
|
|
"loss": 2.464,
|
|
"step": 19530
|
|
},
|
|
{
|
|
"epoch": 6.1702598120508565,
|
|
"grad_norm": 0.07103714043573228,
|
|
"learning_rate": 0.0007675065211420507,
|
|
"loss": 2.4888,
|
|
"step": 19535
|
|
},
|
|
{
|
|
"epoch": 6.171839216615336,
|
|
"grad_norm": 0.06499738123502725,
|
|
"learning_rate": 0.0007669702204491436,
|
|
"loss": 2.4052,
|
|
"step": 19540
|
|
},
|
|
{
|
|
"epoch": 6.173418621179815,
|
|
"grad_norm": 0.058453433265348194,
|
|
"learning_rate": 0.0007664339906191042,
|
|
"loss": 2.3744,
|
|
"step": 19545
|
|
},
|
|
{
|
|
"epoch": 6.174998025744294,
|
|
"grad_norm": 0.06112806537988322,
|
|
"learning_rate": 0.0007658978318149957,
|
|
"loss": 2.4018,
|
|
"step": 19550
|
|
},
|
|
{
|
|
"epoch": 6.1765774303087735,
|
|
"grad_norm": 0.06365063449863004,
|
|
"learning_rate": 0.0007653617441998608,
|
|
"loss": 2.3682,
|
|
"step": 19555
|
|
},
|
|
{
|
|
"epoch": 6.178156834873253,
|
|
"grad_norm": 0.06203737080784095,
|
|
"learning_rate": 0.0007648257279367206,
|
|
"loss": 2.4238,
|
|
"step": 19560
|
|
},
|
|
{
|
|
"epoch": 6.179736239437732,
|
|
"grad_norm": 0.07088864858319288,
|
|
"learning_rate": 0.0007642897831885735,
|
|
"loss": 2.4125,
|
|
"step": 19565
|
|
},
|
|
{
|
|
"epoch": 6.181315644002211,
|
|
"grad_norm": 0.07057223116169063,
|
|
"learning_rate": 0.0007637539101183979,
|
|
"loss": 2.3309,
|
|
"step": 19570
|
|
},
|
|
{
|
|
"epoch": 6.1828950485666905,
|
|
"grad_norm": 0.06290781688397977,
|
|
"learning_rate": 0.0007632181088891482,
|
|
"loss": 2.46,
|
|
"step": 19575
|
|
},
|
|
{
|
|
"epoch": 6.18447445313117,
|
|
"grad_norm": 0.06570377591134992,
|
|
"learning_rate": 0.0007626823796637592,
|
|
"loss": 2.4906,
|
|
"step": 19580
|
|
},
|
|
{
|
|
"epoch": 6.186053857695649,
|
|
"grad_norm": 0.060461208411296756,
|
|
"learning_rate": 0.0007621467226051422,
|
|
"loss": 2.4814,
|
|
"step": 19585
|
|
},
|
|
{
|
|
"epoch": 6.187633262260128,
|
|
"grad_norm": 0.10724116753160037,
|
|
"learning_rate": 0.0007616111378761871,
|
|
"loss": 2.3867,
|
|
"step": 19590
|
|
},
|
|
{
|
|
"epoch": 6.1892126668246075,
|
|
"grad_norm": 0.06311108522890622,
|
|
"learning_rate": 0.000761075625639762,
|
|
"loss": 2.512,
|
|
"step": 19595
|
|
},
|
|
{
|
|
"epoch": 6.190792071389087,
|
|
"grad_norm": 0.05713061134094679,
|
|
"learning_rate": 0.0007605401860587126,
|
|
"loss": 2.4465,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 6.192371475953566,
|
|
"grad_norm": 0.06088272053471607,
|
|
"learning_rate": 0.0007600048192958622,
|
|
"loss": 2.4067,
|
|
"step": 19605
|
|
},
|
|
{
|
|
"epoch": 6.193950880518044,
|
|
"grad_norm": 0.05431585064737753,
|
|
"learning_rate": 0.0007594695255140134,
|
|
"loss": 2.3904,
|
|
"step": 19610
|
|
},
|
|
{
|
|
"epoch": 6.195530285082524,
|
|
"grad_norm": 0.06016840675239864,
|
|
"learning_rate": 0.0007589343048759449,
|
|
"loss": 2.5428,
|
|
"step": 19615
|
|
},
|
|
{
|
|
"epoch": 6.197109689647003,
|
|
"grad_norm": 0.05380648367219518,
|
|
"learning_rate": 0.0007583991575444142,
|
|
"loss": 2.4529,
|
|
"step": 19620
|
|
},
|
|
{
|
|
"epoch": 6.198689094211482,
|
|
"grad_norm": 0.0775100601412376,
|
|
"learning_rate": 0.0007578640836821561,
|
|
"loss": 2.3903,
|
|
"step": 19625
|
|
},
|
|
{
|
|
"epoch": 6.200268498775961,
|
|
"grad_norm": 0.08526232121545854,
|
|
"learning_rate": 0.0007573290834518827,
|
|
"loss": 2.445,
|
|
"step": 19630
|
|
},
|
|
{
|
|
"epoch": 6.201847903340441,
|
|
"grad_norm": 0.05991259904388278,
|
|
"learning_rate": 0.0007567941570162848,
|
|
"loss": 2.525,
|
|
"step": 19635
|
|
},
|
|
{
|
|
"epoch": 6.20342730790492,
|
|
"grad_norm": 0.05185741511355774,
|
|
"learning_rate": 0.0007562593045380299,
|
|
"loss": 2.429,
|
|
"step": 19640
|
|
},
|
|
{
|
|
"epoch": 6.205006712469399,
|
|
"grad_norm": 0.08706659397624288,
|
|
"learning_rate": 0.0007557245261797633,
|
|
"loss": 2.4428,
|
|
"step": 19645
|
|
},
|
|
{
|
|
"epoch": 6.206586117033878,
|
|
"grad_norm": 0.06709441806701733,
|
|
"learning_rate": 0.0007551898221041076,
|
|
"loss": 2.4473,
|
|
"step": 19650
|
|
},
|
|
{
|
|
"epoch": 6.2081655215983576,
|
|
"grad_norm": 0.05821670970822972,
|
|
"learning_rate": 0.0007546551924736625,
|
|
"loss": 2.4678,
|
|
"step": 19655
|
|
},
|
|
{
|
|
"epoch": 6.209744926162837,
|
|
"grad_norm": 0.062191125073990654,
|
|
"learning_rate": 0.0007541206374510062,
|
|
"loss": 2.4385,
|
|
"step": 19660
|
|
},
|
|
{
|
|
"epoch": 6.211324330727316,
|
|
"grad_norm": 0.0625137100432733,
|
|
"learning_rate": 0.0007535861571986926,
|
|
"loss": 2.4372,
|
|
"step": 19665
|
|
},
|
|
{
|
|
"epoch": 6.212903735291795,
|
|
"grad_norm": 0.061725845909325915,
|
|
"learning_rate": 0.0007530517518792547,
|
|
"loss": 2.4223,
|
|
"step": 19670
|
|
},
|
|
{
|
|
"epoch": 6.2144831398562745,
|
|
"grad_norm": 0.06026508757658309,
|
|
"learning_rate": 0.0007525174216552013,
|
|
"loss": 2.4125,
|
|
"step": 19675
|
|
},
|
|
{
|
|
"epoch": 6.216062544420754,
|
|
"grad_norm": 0.0801349638909046,
|
|
"learning_rate": 0.0007519831666890184,
|
|
"loss": 2.4602,
|
|
"step": 19680
|
|
},
|
|
{
|
|
"epoch": 6.217641948985232,
|
|
"grad_norm": 0.0748061969276911,
|
|
"learning_rate": 0.0007514489871431702,
|
|
"loss": 2.3577,
|
|
"step": 19685
|
|
},
|
|
{
|
|
"epoch": 6.219221353549711,
|
|
"grad_norm": 0.06703059419819289,
|
|
"learning_rate": 0.0007509148831800965,
|
|
"loss": 2.4924,
|
|
"step": 19690
|
|
},
|
|
{
|
|
"epoch": 6.220800758114191,
|
|
"grad_norm": 0.06602078693858335,
|
|
"learning_rate": 0.0007503808549622158,
|
|
"loss": 2.4308,
|
|
"step": 19695
|
|
},
|
|
{
|
|
"epoch": 6.22238016267867,
|
|
"grad_norm": 0.06339145900477593,
|
|
"learning_rate": 0.0007498469026519223,
|
|
"loss": 2.3763,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 6.223959567243149,
|
|
"grad_norm": 0.05530531986340535,
|
|
"learning_rate": 0.000749313026411587,
|
|
"loss": 2.451,
|
|
"step": 19705
|
|
},
|
|
{
|
|
"epoch": 6.225538971807628,
|
|
"grad_norm": 0.07442021183242264,
|
|
"learning_rate": 0.0007487792264035592,
|
|
"loss": 2.4233,
|
|
"step": 19710
|
|
},
|
|
{
|
|
"epoch": 6.227118376372108,
|
|
"grad_norm": 0.052265710521129105,
|
|
"learning_rate": 0.0007482455027901635,
|
|
"loss": 2.4508,
|
|
"step": 19715
|
|
},
|
|
{
|
|
"epoch": 6.228697780936587,
|
|
"grad_norm": 0.05920679774013846,
|
|
"learning_rate": 0.0007477118557337012,
|
|
"loss": 2.4679,
|
|
"step": 19720
|
|
},
|
|
{
|
|
"epoch": 6.230277185501066,
|
|
"grad_norm": 0.05938237683838213,
|
|
"learning_rate": 0.0007471782853964524,
|
|
"loss": 2.5072,
|
|
"step": 19725
|
|
},
|
|
{
|
|
"epoch": 6.231856590065545,
|
|
"grad_norm": 0.05694161128480649,
|
|
"learning_rate": 0.0007466447919406713,
|
|
"loss": 2.4131,
|
|
"step": 19730
|
|
},
|
|
{
|
|
"epoch": 6.233435994630025,
|
|
"grad_norm": 0.06333470851146386,
|
|
"learning_rate": 0.0007461113755285907,
|
|
"loss": 2.455,
|
|
"step": 19735
|
|
},
|
|
{
|
|
"epoch": 6.235015399194504,
|
|
"grad_norm": 0.05424511788662894,
|
|
"learning_rate": 0.0007455780363224184,
|
|
"loss": 2.4136,
|
|
"step": 19740
|
|
},
|
|
{
|
|
"epoch": 6.236594803758983,
|
|
"grad_norm": 0.07083251242734387,
|
|
"learning_rate": 0.0007450447744843393,
|
|
"loss": 2.4706,
|
|
"step": 19745
|
|
},
|
|
{
|
|
"epoch": 6.238174208323462,
|
|
"grad_norm": 0.05102356920787792,
|
|
"learning_rate": 0.0007445115901765161,
|
|
"loss": 2.4865,
|
|
"step": 19750
|
|
},
|
|
{
|
|
"epoch": 6.239753612887942,
|
|
"grad_norm": 0.05770373850241559,
|
|
"learning_rate": 0.0007439784835610852,
|
|
"loss": 2.4086,
|
|
"step": 19755
|
|
},
|
|
{
|
|
"epoch": 6.241333017452421,
|
|
"grad_norm": 0.054102109406934856,
|
|
"learning_rate": 0.0007434454548001621,
|
|
"loss": 2.4218,
|
|
"step": 19760
|
|
},
|
|
{
|
|
"epoch": 6.242912422016899,
|
|
"grad_norm": 0.06613804076706183,
|
|
"learning_rate": 0.0007429125040558371,
|
|
"loss": 2.4291,
|
|
"step": 19765
|
|
},
|
|
{
|
|
"epoch": 6.2444918265813785,
|
|
"grad_norm": 0.052500542600787924,
|
|
"learning_rate": 0.0007423796314901768,
|
|
"loss": 2.4256,
|
|
"step": 19770
|
|
},
|
|
{
|
|
"epoch": 6.246071231145858,
|
|
"grad_norm": 0.06909005559887729,
|
|
"learning_rate": 0.0007418468372652248,
|
|
"loss": 2.4589,
|
|
"step": 19775
|
|
},
|
|
{
|
|
"epoch": 6.247650635710337,
|
|
"grad_norm": 0.06303785575072766,
|
|
"learning_rate": 0.0007413141215429998,
|
|
"loss": 2.4335,
|
|
"step": 19780
|
|
},
|
|
{
|
|
"epoch": 6.249230040274816,
|
|
"grad_norm": 0.06056915510263313,
|
|
"learning_rate": 0.0007407814844854981,
|
|
"loss": 2.454,
|
|
"step": 19785
|
|
},
|
|
{
|
|
"epoch": 6.2508094448392955,
|
|
"grad_norm": 0.06941117388350045,
|
|
"learning_rate": 0.0007402489262546908,
|
|
"loss": 2.4128,
|
|
"step": 19790
|
|
},
|
|
{
|
|
"epoch": 6.252388849403775,
|
|
"grad_norm": 0.05504060593786002,
|
|
"learning_rate": 0.000739716447012525,
|
|
"loss": 2.3883,
|
|
"step": 19795
|
|
},
|
|
{
|
|
"epoch": 6.253968253968254,
|
|
"grad_norm": 0.061787067926771924,
|
|
"learning_rate": 0.000739184046920925,
|
|
"loss": 2.4231,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 6.255547658532733,
|
|
"grad_norm": 0.07911758040593335,
|
|
"learning_rate": 0.0007386517261417896,
|
|
"loss": 2.5322,
|
|
"step": 19805
|
|
},
|
|
{
|
|
"epoch": 6.2571270630972124,
|
|
"grad_norm": 0.07059955428855016,
|
|
"learning_rate": 0.0007381194848369947,
|
|
"loss": 2.5197,
|
|
"step": 19810
|
|
},
|
|
{
|
|
"epoch": 6.258706467661692,
|
|
"grad_norm": 0.05793352587725986,
|
|
"learning_rate": 0.0007375873231683915,
|
|
"loss": 2.4764,
|
|
"step": 19815
|
|
},
|
|
{
|
|
"epoch": 6.260285872226171,
|
|
"grad_norm": 0.06790131801420428,
|
|
"learning_rate": 0.0007370552412978064,
|
|
"loss": 2.5465,
|
|
"step": 19820
|
|
},
|
|
{
|
|
"epoch": 6.26186527679065,
|
|
"grad_norm": 0.07514729263157002,
|
|
"learning_rate": 0.0007365232393870427,
|
|
"loss": 2.5099,
|
|
"step": 19825
|
|
},
|
|
{
|
|
"epoch": 6.263444681355129,
|
|
"grad_norm": 0.06619360340136478,
|
|
"learning_rate": 0.0007359913175978783,
|
|
"loss": 2.4732,
|
|
"step": 19830
|
|
},
|
|
{
|
|
"epoch": 6.265024085919609,
|
|
"grad_norm": 0.05833907471011041,
|
|
"learning_rate": 0.0007354594760920672,
|
|
"loss": 2.4916,
|
|
"step": 19835
|
|
},
|
|
{
|
|
"epoch": 6.266603490484087,
|
|
"grad_norm": 0.05677736917333003,
|
|
"learning_rate": 0.0007349277150313398,
|
|
"loss": 2.4314,
|
|
"step": 19840
|
|
},
|
|
{
|
|
"epoch": 6.268182895048566,
|
|
"grad_norm": 0.0558191327993684,
|
|
"learning_rate": 0.0007343960345774,
|
|
"loss": 2.3966,
|
|
"step": 19845
|
|
},
|
|
{
|
|
"epoch": 6.2697622996130455,
|
|
"grad_norm": 0.05193655545478352,
|
|
"learning_rate": 0.0007338644348919295,
|
|
"loss": 2.4396,
|
|
"step": 19850
|
|
},
|
|
{
|
|
"epoch": 6.271341704177525,
|
|
"grad_norm": 0.057162696128829744,
|
|
"learning_rate": 0.0007333329161365841,
|
|
"loss": 2.4253,
|
|
"step": 19855
|
|
},
|
|
{
|
|
"epoch": 6.272921108742004,
|
|
"grad_norm": 0.055770002545731084,
|
|
"learning_rate": 0.0007328014784729948,
|
|
"loss": 2.3843,
|
|
"step": 19860
|
|
},
|
|
{
|
|
"epoch": 6.274500513306483,
|
|
"grad_norm": 0.05727410758040963,
|
|
"learning_rate": 0.000732270122062769,
|
|
"loss": 2.4839,
|
|
"step": 19865
|
|
},
|
|
{
|
|
"epoch": 6.2760799178709625,
|
|
"grad_norm": 0.057229089242360455,
|
|
"learning_rate": 0.000731738847067488,
|
|
"loss": 2.4468,
|
|
"step": 19870
|
|
},
|
|
{
|
|
"epoch": 6.277659322435442,
|
|
"grad_norm": 0.05769316358896857,
|
|
"learning_rate": 0.00073120765364871,
|
|
"loss": 2.5816,
|
|
"step": 19875
|
|
},
|
|
{
|
|
"epoch": 6.279238726999921,
|
|
"grad_norm": 0.0571768306769889,
|
|
"learning_rate": 0.0007306765419679673,
|
|
"loss": 2.4462,
|
|
"step": 19880
|
|
},
|
|
{
|
|
"epoch": 6.2808181315644,
|
|
"grad_norm": 0.053190686271972334,
|
|
"learning_rate": 0.0007301455121867671,
|
|
"loss": 2.5108,
|
|
"step": 19885
|
|
},
|
|
{
|
|
"epoch": 6.2823975361288795,
|
|
"grad_norm": 0.0713570291442539,
|
|
"learning_rate": 0.0007296145644665928,
|
|
"loss": 2.4145,
|
|
"step": 19890
|
|
},
|
|
{
|
|
"epoch": 6.283976940693359,
|
|
"grad_norm": 0.05917801670607476,
|
|
"learning_rate": 0.0007290836989689015,
|
|
"loss": 2.4244,
|
|
"step": 19895
|
|
},
|
|
{
|
|
"epoch": 6.285556345257838,
|
|
"grad_norm": 0.054512218469362134,
|
|
"learning_rate": 0.0007285529158551267,
|
|
"loss": 2.5235,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 6.287135749822317,
|
|
"grad_norm": 0.05601623728749516,
|
|
"learning_rate": 0.000728022215286676,
|
|
"loss": 2.3833,
|
|
"step": 19905
|
|
},
|
|
{
|
|
"epoch": 6.2887151543867965,
|
|
"grad_norm": 0.06088975945160917,
|
|
"learning_rate": 0.0007274915974249316,
|
|
"loss": 2.4772,
|
|
"step": 19910
|
|
},
|
|
{
|
|
"epoch": 6.290294558951276,
|
|
"grad_norm": 0.0536131533215133,
|
|
"learning_rate": 0.0007269610624312517,
|
|
"loss": 2.4868,
|
|
"step": 19915
|
|
},
|
|
{
|
|
"epoch": 6.291873963515755,
|
|
"grad_norm": 0.05750908257419262,
|
|
"learning_rate": 0.0007264306104669678,
|
|
"loss": 2.4313,
|
|
"step": 19920
|
|
},
|
|
{
|
|
"epoch": 6.293453368080233,
|
|
"grad_norm": 0.059299770139471,
|
|
"learning_rate": 0.0007259002416933876,
|
|
"loss": 2.3861,
|
|
"step": 19925
|
|
},
|
|
{
|
|
"epoch": 6.295032772644713,
|
|
"grad_norm": 0.06457815583700187,
|
|
"learning_rate": 0.0007253699562717929,
|
|
"loss": 2.3659,
|
|
"step": 19930
|
|
},
|
|
{
|
|
"epoch": 6.296612177209192,
|
|
"grad_norm": 0.07102289405919454,
|
|
"learning_rate": 0.0007248397543634392,
|
|
"loss": 2.4526,
|
|
"step": 19935
|
|
},
|
|
{
|
|
"epoch": 6.298191581773671,
|
|
"grad_norm": 0.058997874582867565,
|
|
"learning_rate": 0.0007243096361295587,
|
|
"loss": 2.4705,
|
|
"step": 19940
|
|
},
|
|
{
|
|
"epoch": 6.29977098633815,
|
|
"grad_norm": 0.06266048043881087,
|
|
"learning_rate": 0.0007237796017313563,
|
|
"loss": 2.5197,
|
|
"step": 19945
|
|
},
|
|
{
|
|
"epoch": 6.30135039090263,
|
|
"grad_norm": 0.06221028004346952,
|
|
"learning_rate": 0.000723249651330012,
|
|
"loss": 2.491,
|
|
"step": 19950
|
|
},
|
|
{
|
|
"epoch": 6.302929795467109,
|
|
"grad_norm": 0.06018431461748382,
|
|
"learning_rate": 0.0007227197850866807,
|
|
"loss": 2.4182,
|
|
"step": 19955
|
|
},
|
|
{
|
|
"epoch": 6.304509200031588,
|
|
"grad_norm": 0.059088934210273426,
|
|
"learning_rate": 0.0007221900031624908,
|
|
"loss": 2.4473,
|
|
"step": 19960
|
|
},
|
|
{
|
|
"epoch": 6.306088604596067,
|
|
"grad_norm": 0.06053952326016882,
|
|
"learning_rate": 0.0007216603057185465,
|
|
"loss": 2.3726,
|
|
"step": 19965
|
|
},
|
|
{
|
|
"epoch": 6.307668009160547,
|
|
"grad_norm": 0.05734903425797904,
|
|
"learning_rate": 0.0007211306929159247,
|
|
"loss": 2.4523,
|
|
"step": 19970
|
|
},
|
|
{
|
|
"epoch": 6.309247413725026,
|
|
"grad_norm": 0.059347340635324056,
|
|
"learning_rate": 0.0007206011649156772,
|
|
"loss": 2.4244,
|
|
"step": 19975
|
|
},
|
|
{
|
|
"epoch": 6.310826818289505,
|
|
"grad_norm": 0.057641619829113484,
|
|
"learning_rate": 0.0007200717218788307,
|
|
"loss": 2.4737,
|
|
"step": 19980
|
|
},
|
|
{
|
|
"epoch": 6.312406222853984,
|
|
"grad_norm": 0.07366682908731811,
|
|
"learning_rate": 0.0007195423639663844,
|
|
"loss": 2.4987,
|
|
"step": 19985
|
|
},
|
|
{
|
|
"epoch": 6.313985627418464,
|
|
"grad_norm": 0.08410507754995093,
|
|
"learning_rate": 0.0007190130913393139,
|
|
"loss": 2.4592,
|
|
"step": 19990
|
|
},
|
|
{
|
|
"epoch": 6.315565031982943,
|
|
"grad_norm": 0.08943630780959341,
|
|
"learning_rate": 0.000718483904158567,
|
|
"loss": 2.4048,
|
|
"step": 19995
|
|
},
|
|
{
|
|
"epoch": 6.317144436547421,
|
|
"grad_norm": 0.06330160880031208,
|
|
"learning_rate": 0.0007179548025850659,
|
|
"loss": 2.4437,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 6.3187238411119,
|
|
"grad_norm": 0.06236266066667164,
|
|
"learning_rate": 0.0007174257867797078,
|
|
"loss": 2.4187,
|
|
"step": 20005
|
|
},
|
|
{
|
|
"epoch": 6.32030324567638,
|
|
"grad_norm": 0.06880313643879914,
|
|
"learning_rate": 0.0007168968569033618,
|
|
"loss": 2.397,
|
|
"step": 20010
|
|
},
|
|
{
|
|
"epoch": 6.321882650240859,
|
|
"grad_norm": 0.0796116848627419,
|
|
"learning_rate": 0.0007163680131168735,
|
|
"loss": 2.4144,
|
|
"step": 20015
|
|
},
|
|
{
|
|
"epoch": 6.323462054805338,
|
|
"grad_norm": 0.06082918554738977,
|
|
"learning_rate": 0.0007158392555810602,
|
|
"loss": 2.4489,
|
|
"step": 20020
|
|
},
|
|
{
|
|
"epoch": 6.325041459369817,
|
|
"grad_norm": 0.05528001497547358,
|
|
"learning_rate": 0.0007153105844567133,
|
|
"loss": 2.3929,
|
|
"step": 20025
|
|
},
|
|
{
|
|
"epoch": 6.326620863934297,
|
|
"grad_norm": 0.06956583597351566,
|
|
"learning_rate": 0.0007147819999045991,
|
|
"loss": 2.4127,
|
|
"step": 20030
|
|
},
|
|
{
|
|
"epoch": 6.328200268498776,
|
|
"grad_norm": 0.05928270622597066,
|
|
"learning_rate": 0.0007142535020854561,
|
|
"loss": 2.4676,
|
|
"step": 20035
|
|
},
|
|
{
|
|
"epoch": 6.329779673063255,
|
|
"grad_norm": 0.060333763608973995,
|
|
"learning_rate": 0.0007137250911599978,
|
|
"loss": 2.4146,
|
|
"step": 20040
|
|
},
|
|
{
|
|
"epoch": 6.331359077627734,
|
|
"grad_norm": 0.05991178940603649,
|
|
"learning_rate": 0.0007131967672889101,
|
|
"loss": 2.3577,
|
|
"step": 20045
|
|
},
|
|
{
|
|
"epoch": 6.332938482192214,
|
|
"grad_norm": 0.05558170961877607,
|
|
"learning_rate": 0.0007126685306328525,
|
|
"loss": 2.4753,
|
|
"step": 20050
|
|
},
|
|
{
|
|
"epoch": 6.334517886756693,
|
|
"grad_norm": 0.05785550291586159,
|
|
"learning_rate": 0.0007121403813524595,
|
|
"loss": 2.392,
|
|
"step": 20055
|
|
},
|
|
{
|
|
"epoch": 6.336097291321172,
|
|
"grad_norm": 0.05491722701683667,
|
|
"learning_rate": 0.0007116123196083373,
|
|
"loss": 2.3809,
|
|
"step": 20060
|
|
},
|
|
{
|
|
"epoch": 6.337676695885651,
|
|
"grad_norm": 0.06068570278142025,
|
|
"learning_rate": 0.000711084345561066,
|
|
"loss": 2.3726,
|
|
"step": 20065
|
|
},
|
|
{
|
|
"epoch": 6.339256100450131,
|
|
"grad_norm": 0.055891629425172874,
|
|
"learning_rate": 0.0007105564593711995,
|
|
"loss": 2.4416,
|
|
"step": 20070
|
|
},
|
|
{
|
|
"epoch": 6.34083550501461,
|
|
"grad_norm": 0.05462412335867579,
|
|
"learning_rate": 0.0007100286611992639,
|
|
"loss": 2.2954,
|
|
"step": 20075
|
|
},
|
|
{
|
|
"epoch": 6.342414909579089,
|
|
"grad_norm": 0.05393312209152095,
|
|
"learning_rate": 0.0007095009512057602,
|
|
"loss": 2.5259,
|
|
"step": 20080
|
|
},
|
|
{
|
|
"epoch": 6.3439943141435675,
|
|
"grad_norm": 0.06552752866128936,
|
|
"learning_rate": 0.0007089733295511611,
|
|
"loss": 2.4799,
|
|
"step": 20085
|
|
},
|
|
{
|
|
"epoch": 6.345573718708047,
|
|
"grad_norm": 0.1118648236173462,
|
|
"learning_rate": 0.000708445796395913,
|
|
"loss": 2.4729,
|
|
"step": 20090
|
|
},
|
|
{
|
|
"epoch": 6.347153123272526,
|
|
"grad_norm": 0.06886127615642178,
|
|
"learning_rate": 0.0007079183519004355,
|
|
"loss": 2.5257,
|
|
"step": 20095
|
|
},
|
|
{
|
|
"epoch": 6.348732527837005,
|
|
"grad_norm": 0.0703385290289815,
|
|
"learning_rate": 0.0007073909962251209,
|
|
"loss": 2.4716,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 6.3503119324014845,
|
|
"grad_norm": 0.07464735487394716,
|
|
"learning_rate": 0.0007068637295303349,
|
|
"loss": 2.4992,
|
|
"step": 20105
|
|
},
|
|
{
|
|
"epoch": 6.351891336965964,
|
|
"grad_norm": 0.06789224630431206,
|
|
"learning_rate": 0.0007063365519764162,
|
|
"loss": 2.422,
|
|
"step": 20110
|
|
},
|
|
{
|
|
"epoch": 6.353470741530443,
|
|
"grad_norm": 0.059206546098195346,
|
|
"learning_rate": 0.0007058094637236752,
|
|
"loss": 2.4291,
|
|
"step": 20115
|
|
},
|
|
{
|
|
"epoch": 6.355050146094922,
|
|
"grad_norm": 0.05399741185812805,
|
|
"learning_rate": 0.0007052824649323969,
|
|
"loss": 2.3992,
|
|
"step": 20120
|
|
},
|
|
{
|
|
"epoch": 6.3566295506594015,
|
|
"grad_norm": 0.06147701896292227,
|
|
"learning_rate": 0.0007047555557628379,
|
|
"loss": 2.4161,
|
|
"step": 20125
|
|
},
|
|
{
|
|
"epoch": 6.358208955223881,
|
|
"grad_norm": 0.05166087438320311,
|
|
"learning_rate": 0.0007042287363752283,
|
|
"loss": 2.5049,
|
|
"step": 20130
|
|
},
|
|
{
|
|
"epoch": 6.35978835978836,
|
|
"grad_norm": 0.06354875474376977,
|
|
"learning_rate": 0.0007037020069297702,
|
|
"loss": 2.4589,
|
|
"step": 20135
|
|
},
|
|
{
|
|
"epoch": 6.361367764352839,
|
|
"grad_norm": 0.05775554305567858,
|
|
"learning_rate": 0.0007031753675866381,
|
|
"loss": 2.3615,
|
|
"step": 20140
|
|
},
|
|
{
|
|
"epoch": 6.3629471689173185,
|
|
"grad_norm": 0.06717018758488477,
|
|
"learning_rate": 0.0007026488185059808,
|
|
"loss": 2.5662,
|
|
"step": 20145
|
|
},
|
|
{
|
|
"epoch": 6.364526573481798,
|
|
"grad_norm": 0.07125756890253258,
|
|
"learning_rate": 0.0007021223598479179,
|
|
"loss": 2.4711,
|
|
"step": 20150
|
|
},
|
|
{
|
|
"epoch": 6.366105978046276,
|
|
"grad_norm": 0.0552105505115895,
|
|
"learning_rate": 0.0007015959917725421,
|
|
"loss": 2.3321,
|
|
"step": 20155
|
|
},
|
|
{
|
|
"epoch": 6.367685382610755,
|
|
"grad_norm": 0.0711688256270678,
|
|
"learning_rate": 0.0007010697144399187,
|
|
"loss": 2.3948,
|
|
"step": 20160
|
|
},
|
|
{
|
|
"epoch": 6.369264787175235,
|
|
"grad_norm": 0.05391905398869705,
|
|
"learning_rate": 0.000700543528010085,
|
|
"loss": 2.3398,
|
|
"step": 20165
|
|
},
|
|
{
|
|
"epoch": 6.370844191739714,
|
|
"grad_norm": 0.06534707008655423,
|
|
"learning_rate": 0.0007000174326430515,
|
|
"loss": 2.4308,
|
|
"step": 20170
|
|
},
|
|
{
|
|
"epoch": 6.372423596304193,
|
|
"grad_norm": 0.07938743461028204,
|
|
"learning_rate": 0.0006994914284988001,
|
|
"loss": 2.4693,
|
|
"step": 20175
|
|
},
|
|
{
|
|
"epoch": 6.374003000868672,
|
|
"grad_norm": 0.06811162638520542,
|
|
"learning_rate": 0.000698965515737285,
|
|
"loss": 2.4052,
|
|
"step": 20180
|
|
},
|
|
{
|
|
"epoch": 6.375582405433152,
|
|
"grad_norm": 0.06482877963590887,
|
|
"learning_rate": 0.0006984396945184335,
|
|
"loss": 2.5106,
|
|
"step": 20185
|
|
},
|
|
{
|
|
"epoch": 6.377161809997631,
|
|
"grad_norm": 0.0621878075448932,
|
|
"learning_rate": 0.0006979139650021435,
|
|
"loss": 2.4139,
|
|
"step": 20190
|
|
},
|
|
{
|
|
"epoch": 6.37874121456211,
|
|
"grad_norm": 0.05717168618430722,
|
|
"learning_rate": 0.0006973883273482874,
|
|
"loss": 2.5794,
|
|
"step": 20195
|
|
},
|
|
{
|
|
"epoch": 6.380320619126589,
|
|
"grad_norm": 0.058384350907489155,
|
|
"learning_rate": 0.0006968627817167076,
|
|
"loss": 2.4317,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 6.381900023691069,
|
|
"grad_norm": 0.06208971981672258,
|
|
"learning_rate": 0.0006963373282672185,
|
|
"loss": 2.4561,
|
|
"step": 20205
|
|
},
|
|
{
|
|
"epoch": 6.383479428255548,
|
|
"grad_norm": 0.06027034578128326,
|
|
"learning_rate": 0.000695811967159608,
|
|
"loss": 2.4484,
|
|
"step": 20210
|
|
},
|
|
{
|
|
"epoch": 6.385058832820027,
|
|
"grad_norm": 0.05957450555656316,
|
|
"learning_rate": 0.0006952866985536347,
|
|
"loss": 2.4317,
|
|
"step": 20215
|
|
},
|
|
{
|
|
"epoch": 6.386638237384506,
|
|
"grad_norm": 0.05822686780947575,
|
|
"learning_rate": 0.0006947615226090297,
|
|
"loss": 2.5867,
|
|
"step": 20220
|
|
},
|
|
{
|
|
"epoch": 6.388217641948986,
|
|
"grad_norm": 0.06484165103161406,
|
|
"learning_rate": 0.0006942364394854954,
|
|
"loss": 2.3842,
|
|
"step": 20225
|
|
},
|
|
{
|
|
"epoch": 6.389797046513465,
|
|
"grad_norm": 0.05559231605792102,
|
|
"learning_rate": 0.0006937114493427059,
|
|
"loss": 2.4256,
|
|
"step": 20230
|
|
},
|
|
{
|
|
"epoch": 6.391376451077944,
|
|
"grad_norm": 0.06433577974811837,
|
|
"learning_rate": 0.0006931865523403082,
|
|
"loss": 2.4834,
|
|
"step": 20235
|
|
},
|
|
{
|
|
"epoch": 6.392955855642422,
|
|
"grad_norm": 0.05649681627652346,
|
|
"learning_rate": 0.0006926617486379194,
|
|
"loss": 2.4661,
|
|
"step": 20240
|
|
},
|
|
{
|
|
"epoch": 6.394535260206902,
|
|
"grad_norm": 0.06293263126241654,
|
|
"learning_rate": 0.0006921370383951293,
|
|
"loss": 2.4375,
|
|
"step": 20245
|
|
},
|
|
{
|
|
"epoch": 6.396114664771381,
|
|
"grad_norm": 0.051549970176822474,
|
|
"learning_rate": 0.0006916124217714989,
|
|
"loss": 2.4145,
|
|
"step": 20250
|
|
},
|
|
{
|
|
"epoch": 6.39769406933586,
|
|
"grad_norm": 0.06901527332210579,
|
|
"learning_rate": 0.0006910878989265603,
|
|
"loss": 2.4099,
|
|
"step": 20255
|
|
},
|
|
{
|
|
"epoch": 6.399273473900339,
|
|
"grad_norm": 0.0797135766335866,
|
|
"learning_rate": 0.0006905634700198183,
|
|
"loss": 2.4296,
|
|
"step": 20260
|
|
},
|
|
{
|
|
"epoch": 6.400852878464819,
|
|
"grad_norm": 0.05621303030996471,
|
|
"learning_rate": 0.0006900391352107478,
|
|
"loss": 2.3952,
|
|
"step": 20265
|
|
},
|
|
{
|
|
"epoch": 6.402432283029298,
|
|
"grad_norm": 0.05646658463741475,
|
|
"learning_rate": 0.0006895148946587962,
|
|
"loss": 2.3726,
|
|
"step": 20270
|
|
},
|
|
{
|
|
"epoch": 6.404011687593777,
|
|
"grad_norm": 0.06780048965410568,
|
|
"learning_rate": 0.0006889907485233813,
|
|
"loss": 2.4438,
|
|
"step": 20275
|
|
},
|
|
{
|
|
"epoch": 6.405591092158256,
|
|
"grad_norm": 0.056144378783592644,
|
|
"learning_rate": 0.0006884666969638924,
|
|
"loss": 2.381,
|
|
"step": 20280
|
|
},
|
|
{
|
|
"epoch": 6.407170496722736,
|
|
"grad_norm": 0.07509907298138832,
|
|
"learning_rate": 0.0006879427401396908,
|
|
"loss": 2.4764,
|
|
"step": 20285
|
|
},
|
|
{
|
|
"epoch": 6.408749901287215,
|
|
"grad_norm": 0.059945601070473024,
|
|
"learning_rate": 0.0006874188782101084,
|
|
"loss": 2.5221,
|
|
"step": 20290
|
|
},
|
|
{
|
|
"epoch": 6.410329305851694,
|
|
"grad_norm": 0.054707228885569524,
|
|
"learning_rate": 0.0006868951113344473,
|
|
"loss": 2.5151,
|
|
"step": 20295
|
|
},
|
|
{
|
|
"epoch": 6.411908710416173,
|
|
"grad_norm": 0.06278686032168498,
|
|
"learning_rate": 0.0006863714396719829,
|
|
"loss": 2.4056,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 6.413488114980653,
|
|
"grad_norm": 0.07822803264136353,
|
|
"learning_rate": 0.0006858478633819596,
|
|
"loss": 2.3835,
|
|
"step": 20305
|
|
},
|
|
{
|
|
"epoch": 6.415067519545132,
|
|
"grad_norm": 0.05451919846700328,
|
|
"learning_rate": 0.000685324382623594,
|
|
"loss": 2.4672,
|
|
"step": 20310
|
|
},
|
|
{
|
|
"epoch": 6.41664692410961,
|
|
"grad_norm": 0.06343652940984232,
|
|
"learning_rate": 0.0006848009975560732,
|
|
"loss": 2.3666,
|
|
"step": 20315
|
|
},
|
|
{
|
|
"epoch": 6.4182263286740895,
|
|
"grad_norm": 0.06640771837812848,
|
|
"learning_rate": 0.0006842777083385548,
|
|
"loss": 2.4597,
|
|
"step": 20320
|
|
},
|
|
{
|
|
"epoch": 6.419805733238569,
|
|
"grad_norm": 0.059272823868647025,
|
|
"learning_rate": 0.0006837545151301685,
|
|
"loss": 2.4158,
|
|
"step": 20325
|
|
},
|
|
{
|
|
"epoch": 6.421385137803048,
|
|
"grad_norm": 0.06747797623613917,
|
|
"learning_rate": 0.0006832314180900133,
|
|
"loss": 2.4111,
|
|
"step": 20330
|
|
},
|
|
{
|
|
"epoch": 6.422964542367527,
|
|
"grad_norm": 0.05532566188652506,
|
|
"learning_rate": 0.0006827084173771603,
|
|
"loss": 2.4859,
|
|
"step": 20335
|
|
},
|
|
{
|
|
"epoch": 6.4245439469320065,
|
|
"grad_norm": 0.05718193786836203,
|
|
"learning_rate": 0.0006821855131506502,
|
|
"loss": 2.4076,
|
|
"step": 20340
|
|
},
|
|
{
|
|
"epoch": 6.426123351496486,
|
|
"grad_norm": 0.08859672081989957,
|
|
"learning_rate": 0.0006816627055694946,
|
|
"loss": 2.4557,
|
|
"step": 20345
|
|
},
|
|
{
|
|
"epoch": 6.427702756060965,
|
|
"grad_norm": 0.05611112160445568,
|
|
"learning_rate": 0.0006811399947926768,
|
|
"loss": 2.5086,
|
|
"step": 20350
|
|
},
|
|
{
|
|
"epoch": 6.429282160625444,
|
|
"grad_norm": 0.0765003452427302,
|
|
"learning_rate": 0.0006806173809791492,
|
|
"loss": 2.5024,
|
|
"step": 20355
|
|
},
|
|
{
|
|
"epoch": 6.4308615651899235,
|
|
"grad_norm": 0.06173073387982078,
|
|
"learning_rate": 0.0006800948642878355,
|
|
"loss": 2.409,
|
|
"step": 20360
|
|
},
|
|
{
|
|
"epoch": 6.432440969754403,
|
|
"grad_norm": 0.07566938586979603,
|
|
"learning_rate": 0.0006795724448776297,
|
|
"loss": 2.4258,
|
|
"step": 20365
|
|
},
|
|
{
|
|
"epoch": 6.434020374318882,
|
|
"grad_norm": 0.06657405835538575,
|
|
"learning_rate": 0.0006790501229073958,
|
|
"loss": 2.4711,
|
|
"step": 20370
|
|
},
|
|
{
|
|
"epoch": 6.435599778883361,
|
|
"grad_norm": 0.05398736598825769,
|
|
"learning_rate": 0.0006785278985359692,
|
|
"loss": 2.3043,
|
|
"step": 20375
|
|
},
|
|
{
|
|
"epoch": 6.4371791834478405,
|
|
"grad_norm": 0.0590280207460199,
|
|
"learning_rate": 0.0006780057719221551,
|
|
"loss": 2.3536,
|
|
"step": 20380
|
|
},
|
|
{
|
|
"epoch": 6.43875858801232,
|
|
"grad_norm": 0.05536512813414772,
|
|
"learning_rate": 0.0006774837432247276,
|
|
"loss": 2.3979,
|
|
"step": 20385
|
|
},
|
|
{
|
|
"epoch": 6.440337992576799,
|
|
"grad_norm": 0.06999585369846131,
|
|
"learning_rate": 0.0006769618126024337,
|
|
"loss": 2.4737,
|
|
"step": 20390
|
|
},
|
|
{
|
|
"epoch": 6.441917397141278,
|
|
"grad_norm": 0.0935108775167,
|
|
"learning_rate": 0.0006764399802139885,
|
|
"loss": 2.4879,
|
|
"step": 20395
|
|
},
|
|
{
|
|
"epoch": 6.443496801705757,
|
|
"grad_norm": 0.07381233374689976,
|
|
"learning_rate": 0.0006759182462180782,
|
|
"loss": 2.3666,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 6.445076206270236,
|
|
"grad_norm": 0.05131499052796765,
|
|
"learning_rate": 0.0006753966107733586,
|
|
"loss": 2.3764,
|
|
"step": 20405
|
|
},
|
|
{
|
|
"epoch": 6.446655610834715,
|
|
"grad_norm": 0.05533009728866976,
|
|
"learning_rate": 0.0006748750740384553,
|
|
"loss": 2.3707,
|
|
"step": 20410
|
|
},
|
|
{
|
|
"epoch": 6.448235015399194,
|
|
"grad_norm": 0.05390546171598726,
|
|
"learning_rate": 0.0006743536361719651,
|
|
"loss": 2.4821,
|
|
"step": 20415
|
|
},
|
|
{
|
|
"epoch": 6.4498144199636736,
|
|
"grad_norm": 0.07512206993558714,
|
|
"learning_rate": 0.0006738322973324534,
|
|
"loss": 2.436,
|
|
"step": 20420
|
|
},
|
|
{
|
|
"epoch": 6.451393824528153,
|
|
"grad_norm": 0.054634898457488874,
|
|
"learning_rate": 0.0006733110576784563,
|
|
"loss": 2.3275,
|
|
"step": 20425
|
|
},
|
|
{
|
|
"epoch": 6.452973229092632,
|
|
"grad_norm": 0.05450585017125078,
|
|
"learning_rate": 0.0006727899173684793,
|
|
"loss": 2.437,
|
|
"step": 20430
|
|
},
|
|
{
|
|
"epoch": 6.454552633657111,
|
|
"grad_norm": 0.06061471159834075,
|
|
"learning_rate": 0.0006722688765609975,
|
|
"loss": 2.3199,
|
|
"step": 20435
|
|
},
|
|
{
|
|
"epoch": 6.4561320382215905,
|
|
"grad_norm": 0.052256092507753815,
|
|
"learning_rate": 0.0006717479354144567,
|
|
"loss": 2.2832,
|
|
"step": 20440
|
|
},
|
|
{
|
|
"epoch": 6.45771144278607,
|
|
"grad_norm": 0.06333135818279083,
|
|
"learning_rate": 0.0006712270940872712,
|
|
"loss": 2.4067,
|
|
"step": 20445
|
|
},
|
|
{
|
|
"epoch": 6.459290847350549,
|
|
"grad_norm": 0.06693645700514088,
|
|
"learning_rate": 0.0006707063527378261,
|
|
"loss": 2.4187,
|
|
"step": 20450
|
|
},
|
|
{
|
|
"epoch": 6.460870251915028,
|
|
"grad_norm": 0.09742855878142472,
|
|
"learning_rate": 0.0006701857115244752,
|
|
"loss": 2.3825,
|
|
"step": 20455
|
|
},
|
|
{
|
|
"epoch": 6.4624496564795075,
|
|
"grad_norm": 0.06080532997515874,
|
|
"learning_rate": 0.0006696651706055418,
|
|
"loss": 2.5544,
|
|
"step": 20460
|
|
},
|
|
{
|
|
"epoch": 6.464029061043987,
|
|
"grad_norm": 0.06693902176632435,
|
|
"learning_rate": 0.0006691447301393199,
|
|
"loss": 2.4168,
|
|
"step": 20465
|
|
},
|
|
{
|
|
"epoch": 6.465608465608465,
|
|
"grad_norm": 0.0809218694358045,
|
|
"learning_rate": 0.0006686243902840714,
|
|
"loss": 2.5445,
|
|
"step": 20470
|
|
},
|
|
{
|
|
"epoch": 6.467187870172944,
|
|
"grad_norm": 0.06069332355264493,
|
|
"learning_rate": 0.0006681041511980288,
|
|
"loss": 2.3824,
|
|
"step": 20475
|
|
},
|
|
{
|
|
"epoch": 6.468767274737424,
|
|
"grad_norm": 0.06571379404356267,
|
|
"learning_rate": 0.0006675840130393933,
|
|
"loss": 2.4237,
|
|
"step": 20480
|
|
},
|
|
{
|
|
"epoch": 6.470346679301903,
|
|
"grad_norm": 0.07572609411052877,
|
|
"learning_rate": 0.0006670639759663353,
|
|
"loss": 2.4618,
|
|
"step": 20485
|
|
},
|
|
{
|
|
"epoch": 6.471926083866382,
|
|
"grad_norm": 0.05471074733766293,
|
|
"learning_rate": 0.0006665440401369953,
|
|
"loss": 2.5023,
|
|
"step": 20490
|
|
},
|
|
{
|
|
"epoch": 6.473505488430861,
|
|
"grad_norm": 0.0712791647410548,
|
|
"learning_rate": 0.0006660242057094821,
|
|
"loss": 2.4129,
|
|
"step": 20495
|
|
},
|
|
{
|
|
"epoch": 6.475084892995341,
|
|
"grad_norm": 0.07005922454887745,
|
|
"learning_rate": 0.0006655044728418738,
|
|
"loss": 2.4285,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 6.47666429755982,
|
|
"grad_norm": 0.06958646111624173,
|
|
"learning_rate": 0.0006649848416922186,
|
|
"loss": 2.3992,
|
|
"step": 20505
|
|
},
|
|
{
|
|
"epoch": 6.478243702124299,
|
|
"grad_norm": 0.07334876732527172,
|
|
"learning_rate": 0.0006644653124185323,
|
|
"loss": 2.4153,
|
|
"step": 20510
|
|
},
|
|
{
|
|
"epoch": 6.479823106688778,
|
|
"grad_norm": 0.07258404844749798,
|
|
"learning_rate": 0.0006639458851788009,
|
|
"loss": 2.3949,
|
|
"step": 20515
|
|
},
|
|
{
|
|
"epoch": 6.481402511253258,
|
|
"grad_norm": 0.10411797321323364,
|
|
"learning_rate": 0.0006634265601309787,
|
|
"loss": 2.3937,
|
|
"step": 20520
|
|
},
|
|
{
|
|
"epoch": 6.482981915817737,
|
|
"grad_norm": 0.09113918435988624,
|
|
"learning_rate": 0.0006629073374329888,
|
|
"loss": 2.3915,
|
|
"step": 20525
|
|
},
|
|
{
|
|
"epoch": 6.484561320382216,
|
|
"grad_norm": 0.05540007410136057,
|
|
"learning_rate": 0.0006623882172427241,
|
|
"loss": 2.4174,
|
|
"step": 20530
|
|
},
|
|
{
|
|
"epoch": 6.486140724946695,
|
|
"grad_norm": 0.08202788053405924,
|
|
"learning_rate": 0.0006618691997180455,
|
|
"loss": 2.4115,
|
|
"step": 20535
|
|
},
|
|
{
|
|
"epoch": 6.487720129511175,
|
|
"grad_norm": 0.09924470624914969,
|
|
"learning_rate": 0.0006613502850167829,
|
|
"loss": 2.502,
|
|
"step": 20540
|
|
},
|
|
{
|
|
"epoch": 6.489299534075654,
|
|
"grad_norm": 0.07684790883534742,
|
|
"learning_rate": 0.000660831473296735,
|
|
"loss": 2.3873,
|
|
"step": 20545
|
|
},
|
|
{
|
|
"epoch": 6.490878938640133,
|
|
"grad_norm": 0.05693514419709399,
|
|
"learning_rate": 0.0006603127647156686,
|
|
"loss": 2.4283,
|
|
"step": 20550
|
|
},
|
|
{
|
|
"epoch": 6.4924583432046115,
|
|
"grad_norm": 0.06553061754463374,
|
|
"learning_rate": 0.0006597941594313206,
|
|
"loss": 2.4539,
|
|
"step": 20555
|
|
},
|
|
{
|
|
"epoch": 6.494037747769091,
|
|
"grad_norm": 0.06355261773818827,
|
|
"learning_rate": 0.0006592756576013949,
|
|
"loss": 2.3769,
|
|
"step": 20560
|
|
},
|
|
{
|
|
"epoch": 6.49561715233357,
|
|
"grad_norm": 0.05852829833587037,
|
|
"learning_rate": 0.0006587572593835649,
|
|
"loss": 2.2922,
|
|
"step": 20565
|
|
},
|
|
{
|
|
"epoch": 6.497196556898049,
|
|
"grad_norm": 0.06057719520523879,
|
|
"learning_rate": 0.0006582389649354721,
|
|
"loss": 2.4873,
|
|
"step": 20570
|
|
},
|
|
{
|
|
"epoch": 6.4987759614625284,
|
|
"grad_norm": 0.06404783121293027,
|
|
"learning_rate": 0.0006577207744147262,
|
|
"loss": 2.5823,
|
|
"step": 20575
|
|
},
|
|
{
|
|
"epoch": 6.500355366027008,
|
|
"grad_norm": 0.06238517706108964,
|
|
"learning_rate": 0.0006572026879789063,
|
|
"loss": 2.4048,
|
|
"step": 20580
|
|
},
|
|
{
|
|
"epoch": 6.501934770591487,
|
|
"grad_norm": 0.084566544252628,
|
|
"learning_rate": 0.0006566847057855583,
|
|
"loss": 2.495,
|
|
"step": 20585
|
|
},
|
|
{
|
|
"epoch": 6.503514175155966,
|
|
"grad_norm": 0.08866824050321336,
|
|
"learning_rate": 0.0006561668279921982,
|
|
"loss": 2.4044,
|
|
"step": 20590
|
|
},
|
|
{
|
|
"epoch": 6.505093579720445,
|
|
"grad_norm": 0.06242580125310291,
|
|
"learning_rate": 0.0006556490547563089,
|
|
"loss": 2.5171,
|
|
"step": 20595
|
|
},
|
|
{
|
|
"epoch": 6.506672984284925,
|
|
"grad_norm": 0.06005794753994994,
|
|
"learning_rate": 0.0006551313862353417,
|
|
"loss": 2.3504,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 6.508252388849404,
|
|
"grad_norm": 0.06397329256961332,
|
|
"learning_rate": 0.0006546138225867167,
|
|
"loss": 2.3979,
|
|
"step": 20605
|
|
},
|
|
{
|
|
"epoch": 6.509831793413883,
|
|
"grad_norm": 0.07405986725381314,
|
|
"learning_rate": 0.0006540963639678214,
|
|
"loss": 2.3973,
|
|
"step": 20610
|
|
},
|
|
{
|
|
"epoch": 6.511411197978362,
|
|
"grad_norm": 0.06530724972921835,
|
|
"learning_rate": 0.0006535790105360116,
|
|
"loss": 2.4029,
|
|
"step": 20615
|
|
},
|
|
{
|
|
"epoch": 6.512990602542842,
|
|
"grad_norm": 0.05498352465599929,
|
|
"learning_rate": 0.0006530617624486118,
|
|
"loss": 2.4322,
|
|
"step": 20620
|
|
},
|
|
{
|
|
"epoch": 6.514570007107321,
|
|
"grad_norm": 0.05581188949448493,
|
|
"learning_rate": 0.0006525446198629129,
|
|
"loss": 2.4059,
|
|
"step": 20625
|
|
},
|
|
{
|
|
"epoch": 6.516149411671799,
|
|
"grad_norm": 0.08415959336897978,
|
|
"learning_rate": 0.0006520275829361755,
|
|
"loss": 2.4613,
|
|
"step": 20630
|
|
},
|
|
{
|
|
"epoch": 6.5177288162362785,
|
|
"grad_norm": 0.06862904566833473,
|
|
"learning_rate": 0.0006515106518256269,
|
|
"loss": 2.4917,
|
|
"step": 20635
|
|
},
|
|
{
|
|
"epoch": 6.519308220800758,
|
|
"grad_norm": 0.09169453611375598,
|
|
"learning_rate": 0.000650993826688462,
|
|
"loss": 2.3704,
|
|
"step": 20640
|
|
},
|
|
{
|
|
"epoch": 6.520887625365237,
|
|
"grad_norm": 0.06956139951956909,
|
|
"learning_rate": 0.0006504771076818451,
|
|
"loss": 2.3782,
|
|
"step": 20645
|
|
},
|
|
{
|
|
"epoch": 6.522467029929716,
|
|
"grad_norm": 0.06277690395283149,
|
|
"learning_rate": 0.0006499604949629064,
|
|
"loss": 2.4124,
|
|
"step": 20650
|
|
},
|
|
{
|
|
"epoch": 6.5240464344941955,
|
|
"grad_norm": 0.08322856565190066,
|
|
"learning_rate": 0.0006494439886887448,
|
|
"loss": 2.444,
|
|
"step": 20655
|
|
},
|
|
{
|
|
"epoch": 6.525625839058675,
|
|
"grad_norm": 0.05389248672929697,
|
|
"learning_rate": 0.0006489275890164264,
|
|
"loss": 2.4816,
|
|
"step": 20660
|
|
},
|
|
{
|
|
"epoch": 6.527205243623154,
|
|
"grad_norm": 0.05018136361957522,
|
|
"learning_rate": 0.0006484112961029851,
|
|
"loss": 2.4618,
|
|
"step": 20665
|
|
},
|
|
{
|
|
"epoch": 6.528784648187633,
|
|
"grad_norm": 0.07115480752235714,
|
|
"learning_rate": 0.0006478951101054225,
|
|
"loss": 2.4771,
|
|
"step": 20670
|
|
},
|
|
{
|
|
"epoch": 6.5303640527521125,
|
|
"grad_norm": 0.06140523510481152,
|
|
"learning_rate": 0.0006473790311807066,
|
|
"loss": 2.4204,
|
|
"step": 20675
|
|
},
|
|
{
|
|
"epoch": 6.531943457316592,
|
|
"grad_norm": 0.056264873222821726,
|
|
"learning_rate": 0.0006468630594857749,
|
|
"loss": 2.4072,
|
|
"step": 20680
|
|
},
|
|
{
|
|
"epoch": 6.533522861881071,
|
|
"grad_norm": 0.04836118315341504,
|
|
"learning_rate": 0.0006463471951775307,
|
|
"loss": 2.444,
|
|
"step": 20685
|
|
},
|
|
{
|
|
"epoch": 6.53510226644555,
|
|
"grad_norm": 0.057506584093042185,
|
|
"learning_rate": 0.0006458314384128447,
|
|
"loss": 2.5414,
|
|
"step": 20690
|
|
},
|
|
{
|
|
"epoch": 6.5366816710100295,
|
|
"grad_norm": 0.05276356453937048,
|
|
"learning_rate": 0.0006453157893485555,
|
|
"loss": 2.4246,
|
|
"step": 20695
|
|
},
|
|
{
|
|
"epoch": 6.538261075574509,
|
|
"grad_norm": 0.054430250916809086,
|
|
"learning_rate": 0.000644800248141468,
|
|
"loss": 2.4457,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 6.539840480138988,
|
|
"grad_norm": 0.07702321509104175,
|
|
"learning_rate": 0.0006442848149483565,
|
|
"loss": 2.3848,
|
|
"step": 20705
|
|
},
|
|
{
|
|
"epoch": 6.541419884703467,
|
|
"grad_norm": 0.0669765829276087,
|
|
"learning_rate": 0.0006437694899259597,
|
|
"loss": 2.4353,
|
|
"step": 20710
|
|
},
|
|
{
|
|
"epoch": 6.542999289267946,
|
|
"grad_norm": 0.06911662331662145,
|
|
"learning_rate": 0.0006432542732309849,
|
|
"loss": 2.434,
|
|
"step": 20715
|
|
},
|
|
{
|
|
"epoch": 6.544578693832425,
|
|
"grad_norm": 0.04670729422642143,
|
|
"learning_rate": 0.0006427391650201064,
|
|
"loss": 2.3938,
|
|
"step": 20720
|
|
},
|
|
{
|
|
"epoch": 6.546158098396904,
|
|
"grad_norm": 0.05420350055764316,
|
|
"learning_rate": 0.0006422241654499654,
|
|
"loss": 2.4174,
|
|
"step": 20725
|
|
},
|
|
{
|
|
"epoch": 6.547737502961383,
|
|
"grad_norm": 0.06064627205328386,
|
|
"learning_rate": 0.0006417092746771693,
|
|
"loss": 2.3583,
|
|
"step": 20730
|
|
},
|
|
{
|
|
"epoch": 6.549316907525863,
|
|
"grad_norm": 0.0724627474102502,
|
|
"learning_rate": 0.000641194492858294,
|
|
"loss": 2.4769,
|
|
"step": 20735
|
|
},
|
|
{
|
|
"epoch": 6.550896312090342,
|
|
"grad_norm": 0.06145175058664715,
|
|
"learning_rate": 0.0006406798201498806,
|
|
"loss": 2.5493,
|
|
"step": 20740
|
|
},
|
|
{
|
|
"epoch": 6.552475716654821,
|
|
"grad_norm": 0.07168946019155933,
|
|
"learning_rate": 0.0006401652567084386,
|
|
"loss": 2.4802,
|
|
"step": 20745
|
|
},
|
|
{
|
|
"epoch": 6.5540551212193,
|
|
"grad_norm": 0.05391704872919303,
|
|
"learning_rate": 0.0006396508026904428,
|
|
"loss": 2.4745,
|
|
"step": 20750
|
|
},
|
|
{
|
|
"epoch": 6.55563452578378,
|
|
"grad_norm": 0.05155171999881381,
|
|
"learning_rate": 0.0006391364582523355,
|
|
"loss": 2.383,
|
|
"step": 20755
|
|
},
|
|
{
|
|
"epoch": 6.557213930348259,
|
|
"grad_norm": 0.07862364403967002,
|
|
"learning_rate": 0.0006386222235505257,
|
|
"loss": 2.4155,
|
|
"step": 20760
|
|
},
|
|
{
|
|
"epoch": 6.558793334912738,
|
|
"grad_norm": 0.06171940669513623,
|
|
"learning_rate": 0.0006381080987413884,
|
|
"loss": 2.3414,
|
|
"step": 20765
|
|
},
|
|
{
|
|
"epoch": 6.560372739477217,
|
|
"grad_norm": 0.05391005538952901,
|
|
"learning_rate": 0.0006375940839812666,
|
|
"loss": 2.4023,
|
|
"step": 20770
|
|
},
|
|
{
|
|
"epoch": 6.561952144041697,
|
|
"grad_norm": 0.059746498294211754,
|
|
"learning_rate": 0.0006370801794264682,
|
|
"loss": 2.4768,
|
|
"step": 20775
|
|
},
|
|
{
|
|
"epoch": 6.563531548606176,
|
|
"grad_norm": 0.059891714288636426,
|
|
"learning_rate": 0.0006365663852332684,
|
|
"loss": 2.3974,
|
|
"step": 20780
|
|
},
|
|
{
|
|
"epoch": 6.565110953170654,
|
|
"grad_norm": 0.06020696840420814,
|
|
"learning_rate": 0.0006360527015579092,
|
|
"loss": 2.5119,
|
|
"step": 20785
|
|
},
|
|
{
|
|
"epoch": 6.566690357735133,
|
|
"grad_norm": 0.06637291578503648,
|
|
"learning_rate": 0.0006355391285565974,
|
|
"loss": 2.5168,
|
|
"step": 20790
|
|
},
|
|
{
|
|
"epoch": 6.568269762299613,
|
|
"grad_norm": 0.057836322139409785,
|
|
"learning_rate": 0.0006350256663855085,
|
|
"loss": 2.388,
|
|
"step": 20795
|
|
},
|
|
{
|
|
"epoch": 6.569849166864092,
|
|
"grad_norm": 0.059381685790223855,
|
|
"learning_rate": 0.0006345123152007826,
|
|
"loss": 2.4761,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 6.571428571428571,
|
|
"grad_norm": 0.07339162811532468,
|
|
"learning_rate": 0.0006339990751585264,
|
|
"loss": 2.485,
|
|
"step": 20805
|
|
},
|
|
{
|
|
"epoch": 6.57300797599305,
|
|
"grad_norm": 0.053332487819192885,
|
|
"learning_rate": 0.0006334859464148131,
|
|
"loss": 2.3739,
|
|
"step": 20810
|
|
},
|
|
{
|
|
"epoch": 6.57458738055753,
|
|
"grad_norm": 0.058735428240274216,
|
|
"learning_rate": 0.0006329729291256814,
|
|
"loss": 2.425,
|
|
"step": 20815
|
|
},
|
|
{
|
|
"epoch": 6.576166785122009,
|
|
"grad_norm": 0.05697964987032508,
|
|
"learning_rate": 0.0006324600234471372,
|
|
"loss": 2.4277,
|
|
"step": 20820
|
|
},
|
|
{
|
|
"epoch": 6.577746189686488,
|
|
"grad_norm": 0.05514067686993358,
|
|
"learning_rate": 0.0006319472295351517,
|
|
"loss": 2.4361,
|
|
"step": 20825
|
|
},
|
|
{
|
|
"epoch": 6.579325594250967,
|
|
"grad_norm": 0.06102788490633586,
|
|
"learning_rate": 0.000631434547545662,
|
|
"loss": 2.5325,
|
|
"step": 20830
|
|
},
|
|
{
|
|
"epoch": 6.580904998815447,
|
|
"grad_norm": 0.05897696248779253,
|
|
"learning_rate": 0.0006309219776345717,
|
|
"loss": 2.4727,
|
|
"step": 20835
|
|
},
|
|
{
|
|
"epoch": 6.582484403379926,
|
|
"grad_norm": 0.06727456644683324,
|
|
"learning_rate": 0.00063040951995775,
|
|
"loss": 2.3821,
|
|
"step": 20840
|
|
},
|
|
{
|
|
"epoch": 6.584063807944405,
|
|
"grad_norm": 0.04967440222945126,
|
|
"learning_rate": 0.0006298971746710316,
|
|
"loss": 2.2977,
|
|
"step": 20845
|
|
},
|
|
{
|
|
"epoch": 6.585643212508884,
|
|
"grad_norm": 0.05384296029749339,
|
|
"learning_rate": 0.0006293849419302178,
|
|
"loss": 2.4543,
|
|
"step": 20850
|
|
},
|
|
{
|
|
"epoch": 6.587222617073364,
|
|
"grad_norm": 0.06125422766949562,
|
|
"learning_rate": 0.0006288728218910751,
|
|
"loss": 2.4878,
|
|
"step": 20855
|
|
},
|
|
{
|
|
"epoch": 6.588802021637843,
|
|
"grad_norm": 0.07072270452560224,
|
|
"learning_rate": 0.0006283608147093362,
|
|
"loss": 2.4066,
|
|
"step": 20860
|
|
},
|
|
{
|
|
"epoch": 6.590381426202322,
|
|
"grad_norm": 0.08089157778628371,
|
|
"learning_rate": 0.0006278489205406992,
|
|
"loss": 2.4273,
|
|
"step": 20865
|
|
},
|
|
{
|
|
"epoch": 6.591960830766801,
|
|
"grad_norm": 0.06807327668370637,
|
|
"learning_rate": 0.0006273371395408276,
|
|
"loss": 2.4643,
|
|
"step": 20870
|
|
},
|
|
{
|
|
"epoch": 6.59354023533128,
|
|
"grad_norm": 0.051338173826827205,
|
|
"learning_rate": 0.000626825471865351,
|
|
"loss": 2.4302,
|
|
"step": 20875
|
|
},
|
|
{
|
|
"epoch": 6.595119639895759,
|
|
"grad_norm": 0.06388132555176677,
|
|
"learning_rate": 0.0006263139176698638,
|
|
"loss": 2.4533,
|
|
"step": 20880
|
|
},
|
|
{
|
|
"epoch": 6.596699044460238,
|
|
"grad_norm": 0.06100198666281842,
|
|
"learning_rate": 0.0006258024771099269,
|
|
"loss": 2.3827,
|
|
"step": 20885
|
|
},
|
|
{
|
|
"epoch": 6.5982784490247175,
|
|
"grad_norm": 0.0615362542419615,
|
|
"learning_rate": 0.0006252911503410661,
|
|
"loss": 2.3859,
|
|
"step": 20890
|
|
},
|
|
{
|
|
"epoch": 6.599857853589197,
|
|
"grad_norm": 0.05506183334065402,
|
|
"learning_rate": 0.000624779937518772,
|
|
"loss": 2.3833,
|
|
"step": 20895
|
|
},
|
|
{
|
|
"epoch": 6.601437258153676,
|
|
"grad_norm": 0.06586044247997229,
|
|
"learning_rate": 0.000624268838798502,
|
|
"loss": 2.4417,
|
|
"step": 20900
|
|
},
|
|
{
|
|
"epoch": 6.603016662718155,
|
|
"grad_norm": 0.05622418928177649,
|
|
"learning_rate": 0.0006237578543356769,
|
|
"loss": 2.4432,
|
|
"step": 20905
|
|
},
|
|
{
|
|
"epoch": 6.6045960672826345,
|
|
"grad_norm": 0.054937089771462236,
|
|
"learning_rate": 0.0006232469842856849,
|
|
"loss": 2.4022,
|
|
"step": 20910
|
|
},
|
|
{
|
|
"epoch": 6.606175471847114,
|
|
"grad_norm": 0.0585384939232035,
|
|
"learning_rate": 0.0006227362288038778,
|
|
"loss": 2.3998,
|
|
"step": 20915
|
|
},
|
|
{
|
|
"epoch": 6.607754876411593,
|
|
"grad_norm": 0.0694134000321541,
|
|
"learning_rate": 0.000622225588045573,
|
|
"loss": 2.4481,
|
|
"step": 20920
|
|
},
|
|
{
|
|
"epoch": 6.609334280976072,
|
|
"grad_norm": 0.05339212873887325,
|
|
"learning_rate": 0.0006217150621660532,
|
|
"loss": 2.372,
|
|
"step": 20925
|
|
},
|
|
{
|
|
"epoch": 6.6109136855405515,
|
|
"grad_norm": 0.059635604148924506,
|
|
"learning_rate": 0.0006212046513205661,
|
|
"loss": 2.3724,
|
|
"step": 20930
|
|
},
|
|
{
|
|
"epoch": 6.612493090105031,
|
|
"grad_norm": 0.05840989832969823,
|
|
"learning_rate": 0.0006206943556643246,
|
|
"loss": 2.4719,
|
|
"step": 20935
|
|
},
|
|
{
|
|
"epoch": 6.61407249466951,
|
|
"grad_norm": 0.05419546777536966,
|
|
"learning_rate": 0.0006201841753525058,
|
|
"loss": 2.3878,
|
|
"step": 20940
|
|
},
|
|
{
|
|
"epoch": 6.615651899233988,
|
|
"grad_norm": 0.0694169198910676,
|
|
"learning_rate": 0.0006196741105402524,
|
|
"loss": 2.5015,
|
|
"step": 20945
|
|
},
|
|
{
|
|
"epoch": 6.617231303798468,
|
|
"grad_norm": 0.05556344170108016,
|
|
"learning_rate": 0.0006191641613826723,
|
|
"loss": 2.3889,
|
|
"step": 20950
|
|
},
|
|
{
|
|
"epoch": 6.618810708362947,
|
|
"grad_norm": 0.052620835978537304,
|
|
"learning_rate": 0.0006186543280348375,
|
|
"loss": 2.3962,
|
|
"step": 20955
|
|
},
|
|
{
|
|
"epoch": 6.620390112927426,
|
|
"grad_norm": 0.06339890729547158,
|
|
"learning_rate": 0.0006181446106517849,
|
|
"loss": 2.455,
|
|
"step": 20960
|
|
},
|
|
{
|
|
"epoch": 6.621969517491905,
|
|
"grad_norm": 0.05505793978721506,
|
|
"learning_rate": 0.0006176350093885166,
|
|
"loss": 2.3367,
|
|
"step": 20965
|
|
},
|
|
{
|
|
"epoch": 6.623548922056385,
|
|
"grad_norm": 0.057843051055069136,
|
|
"learning_rate": 0.0006171255243999987,
|
|
"loss": 2.5125,
|
|
"step": 20970
|
|
},
|
|
{
|
|
"epoch": 6.625128326620864,
|
|
"grad_norm": 0.06214587643757465,
|
|
"learning_rate": 0.0006166161558411627,
|
|
"loss": 2.4258,
|
|
"step": 20975
|
|
},
|
|
{
|
|
"epoch": 6.626707731185343,
|
|
"grad_norm": 0.05623495978859743,
|
|
"learning_rate": 0.0006161069038669044,
|
|
"loss": 2.4656,
|
|
"step": 20980
|
|
},
|
|
{
|
|
"epoch": 6.628287135749822,
|
|
"grad_norm": 0.09347695348850525,
|
|
"learning_rate": 0.0006155977686320837,
|
|
"loss": 2.4758,
|
|
"step": 20985
|
|
},
|
|
{
|
|
"epoch": 6.629866540314302,
|
|
"grad_norm": 0.08204971148501496,
|
|
"learning_rate": 0.0006150887502915257,
|
|
"loss": 2.5468,
|
|
"step": 20990
|
|
},
|
|
{
|
|
"epoch": 6.631445944878781,
|
|
"grad_norm": 0.061384071592050546,
|
|
"learning_rate": 0.000614579849000019,
|
|
"loss": 2.424,
|
|
"step": 20995
|
|
},
|
|
{
|
|
"epoch": 6.63302534944326,
|
|
"grad_norm": 0.0758429337434268,
|
|
"learning_rate": 0.0006140710649123182,
|
|
"loss": 2.4577,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 6.634604754007739,
|
|
"grad_norm": 0.06750234585741496,
|
|
"learning_rate": 0.0006135623981831408,
|
|
"loss": 2.4611,
|
|
"step": 21005
|
|
},
|
|
{
|
|
"epoch": 6.6361841585722185,
|
|
"grad_norm": 0.05212622446955658,
|
|
"learning_rate": 0.0006130538489671688,
|
|
"loss": 2.4119,
|
|
"step": 21010
|
|
},
|
|
{
|
|
"epoch": 6.637763563136698,
|
|
"grad_norm": 0.05171825318302362,
|
|
"learning_rate": 0.0006125454174190492,
|
|
"loss": 2.4197,
|
|
"step": 21015
|
|
},
|
|
{
|
|
"epoch": 6.639342967701177,
|
|
"grad_norm": 0.04999694587867551,
|
|
"learning_rate": 0.0006120371036933927,
|
|
"loss": 2.4476,
|
|
"step": 21020
|
|
},
|
|
{
|
|
"epoch": 6.640922372265656,
|
|
"grad_norm": 0.05063313641490577,
|
|
"learning_rate": 0.0006115289079447742,
|
|
"loss": 2.5165,
|
|
"step": 21025
|
|
},
|
|
{
|
|
"epoch": 6.642501776830135,
|
|
"grad_norm": 0.0644335498574247,
|
|
"learning_rate": 0.0006110208303277329,
|
|
"loss": 2.3849,
|
|
"step": 21030
|
|
},
|
|
{
|
|
"epoch": 6.644081181394614,
|
|
"grad_norm": 0.06294764797698174,
|
|
"learning_rate": 0.0006105128709967714,
|
|
"loss": 2.4369,
|
|
"step": 21035
|
|
},
|
|
{
|
|
"epoch": 6.645660585959093,
|
|
"grad_norm": 0.055502495101548276,
|
|
"learning_rate": 0.0006100050301063577,
|
|
"loss": 2.4349,
|
|
"step": 21040
|
|
},
|
|
{
|
|
"epoch": 6.647239990523572,
|
|
"grad_norm": 0.06655289353912996,
|
|
"learning_rate": 0.0006094973078109222,
|
|
"loss": 2.4072,
|
|
"step": 21045
|
|
},
|
|
{
|
|
"epoch": 6.648819395088052,
|
|
"grad_norm": 0.05201586001744827,
|
|
"learning_rate": 0.0006089897042648609,
|
|
"loss": 2.39,
|
|
"step": 21050
|
|
},
|
|
{
|
|
"epoch": 6.650398799652531,
|
|
"grad_norm": 0.053748384847827874,
|
|
"learning_rate": 0.0006084822196225322,
|
|
"loss": 2.4438,
|
|
"step": 21055
|
|
},
|
|
{
|
|
"epoch": 6.65197820421701,
|
|
"grad_norm": 0.051300648982207465,
|
|
"learning_rate": 0.0006079748540382587,
|
|
"loss": 2.3912,
|
|
"step": 21060
|
|
},
|
|
{
|
|
"epoch": 6.653557608781489,
|
|
"grad_norm": 0.04687662341409184,
|
|
"learning_rate": 0.0006074676076663277,
|
|
"loss": 2.4639,
|
|
"step": 21065
|
|
},
|
|
{
|
|
"epoch": 6.655137013345969,
|
|
"grad_norm": 0.07092567169247159,
|
|
"learning_rate": 0.0006069604806609893,
|
|
"loss": 2.4718,
|
|
"step": 21070
|
|
},
|
|
{
|
|
"epoch": 6.656716417910448,
|
|
"grad_norm": 0.05984170026138167,
|
|
"learning_rate": 0.0006064534731764573,
|
|
"loss": 2.4754,
|
|
"step": 21075
|
|
},
|
|
{
|
|
"epoch": 6.658295822474927,
|
|
"grad_norm": 0.0635421823482248,
|
|
"learning_rate": 0.0006059465853669098,
|
|
"loss": 2.429,
|
|
"step": 21080
|
|
},
|
|
{
|
|
"epoch": 6.659875227039406,
|
|
"grad_norm": 0.06890486031509065,
|
|
"learning_rate": 0.0006054398173864876,
|
|
"loss": 2.4638,
|
|
"step": 21085
|
|
},
|
|
{
|
|
"epoch": 6.661454631603886,
|
|
"grad_norm": 0.0690713561702384,
|
|
"learning_rate": 0.0006049331693892965,
|
|
"loss": 2.3999,
|
|
"step": 21090
|
|
},
|
|
{
|
|
"epoch": 6.663034036168365,
|
|
"grad_norm": 0.06404351030765236,
|
|
"learning_rate": 0.0006044266415294046,
|
|
"loss": 2.3991,
|
|
"step": 21095
|
|
},
|
|
{
|
|
"epoch": 6.664613440732843,
|
|
"grad_norm": 0.061888142256296186,
|
|
"learning_rate": 0.0006039202339608432,
|
|
"loss": 2.5241,
|
|
"step": 21100
|
|
},
|
|
{
|
|
"epoch": 6.6661928452973225,
|
|
"grad_norm": 0.0729176166350399,
|
|
"learning_rate": 0.0006034139468376083,
|
|
"loss": 2.3942,
|
|
"step": 21105
|
|
},
|
|
{
|
|
"epoch": 6.667772249861802,
|
|
"grad_norm": 0.04947800128694307,
|
|
"learning_rate": 0.0006029077803136581,
|
|
"loss": 2.419,
|
|
"step": 21110
|
|
},
|
|
{
|
|
"epoch": 6.669351654426281,
|
|
"grad_norm": 0.06841577856807288,
|
|
"learning_rate": 0.0006024017345429149,
|
|
"loss": 2.4606,
|
|
"step": 21115
|
|
},
|
|
{
|
|
"epoch": 6.67093105899076,
|
|
"grad_norm": 0.05770427309484664,
|
|
"learning_rate": 0.0006018958096792641,
|
|
"loss": 2.439,
|
|
"step": 21120
|
|
},
|
|
{
|
|
"epoch": 6.6725104635552395,
|
|
"grad_norm": 0.05522581275167986,
|
|
"learning_rate": 0.0006013900058765535,
|
|
"loss": 2.4211,
|
|
"step": 21125
|
|
},
|
|
{
|
|
"epoch": 6.674089868119719,
|
|
"grad_norm": 0.0600817695752141,
|
|
"learning_rate": 0.0006008843232885958,
|
|
"loss": 2.4781,
|
|
"step": 21130
|
|
},
|
|
{
|
|
"epoch": 6.675669272684198,
|
|
"grad_norm": 0.07080242556859254,
|
|
"learning_rate": 0.0006003787620691651,
|
|
"loss": 2.4881,
|
|
"step": 21135
|
|
},
|
|
{
|
|
"epoch": 6.677248677248677,
|
|
"grad_norm": 0.06125436041243013,
|
|
"learning_rate": 0.0005998733223719998,
|
|
"loss": 2.4683,
|
|
"step": 21140
|
|
},
|
|
{
|
|
"epoch": 6.6788280818131565,
|
|
"grad_norm": 0.08281825486541163,
|
|
"learning_rate": 0.0005993680043508007,
|
|
"loss": 2.4371,
|
|
"step": 21145
|
|
},
|
|
{
|
|
"epoch": 6.680407486377636,
|
|
"grad_norm": 0.07741584389134457,
|
|
"learning_rate": 0.0005988628081592313,
|
|
"loss": 2.4019,
|
|
"step": 21150
|
|
},
|
|
{
|
|
"epoch": 6.681986890942115,
|
|
"grad_norm": 0.08786921274591279,
|
|
"learning_rate": 0.0005983577339509196,
|
|
"loss": 2.4488,
|
|
"step": 21155
|
|
},
|
|
{
|
|
"epoch": 6.683566295506594,
|
|
"grad_norm": 0.06565792219426568,
|
|
"learning_rate": 0.0005978527818794545,
|
|
"loss": 2.4433,
|
|
"step": 21160
|
|
},
|
|
{
|
|
"epoch": 6.6851457000710734,
|
|
"grad_norm": 0.05681726799483692,
|
|
"learning_rate": 0.0005973479520983892,
|
|
"loss": 2.4619,
|
|
"step": 21165
|
|
},
|
|
{
|
|
"epoch": 6.686725104635553,
|
|
"grad_norm": 0.0679320979214565,
|
|
"learning_rate": 0.0005968432447612391,
|
|
"loss": 2.4669,
|
|
"step": 21170
|
|
},
|
|
{
|
|
"epoch": 6.688304509200032,
|
|
"grad_norm": 0.05167237326549837,
|
|
"learning_rate": 0.000596338660021482,
|
|
"loss": 2.53,
|
|
"step": 21175
|
|
},
|
|
{
|
|
"epoch": 6.689883913764511,
|
|
"grad_norm": 0.052642134871684294,
|
|
"learning_rate": 0.0005958341980325598,
|
|
"loss": 2.3834,
|
|
"step": 21180
|
|
},
|
|
{
|
|
"epoch": 6.69146331832899,
|
|
"grad_norm": 0.05183197534273639,
|
|
"learning_rate": 0.0005953298589478757,
|
|
"loss": 2.5494,
|
|
"step": 21185
|
|
},
|
|
{
|
|
"epoch": 6.693042722893469,
|
|
"grad_norm": 0.058016467201555635,
|
|
"learning_rate": 0.0005948256429207957,
|
|
"loss": 2.4402,
|
|
"step": 21190
|
|
},
|
|
{
|
|
"epoch": 6.694622127457948,
|
|
"grad_norm": 0.05504376462993496,
|
|
"learning_rate": 0.0005943215501046492,
|
|
"loss": 2.4724,
|
|
"step": 21195
|
|
},
|
|
{
|
|
"epoch": 6.696201532022427,
|
|
"grad_norm": 0.04805213505710295,
|
|
"learning_rate": 0.000593817580652727,
|
|
"loss": 2.432,
|
|
"step": 21200
|
|
},
|
|
{
|
|
"epoch": 6.6977809365869065,
|
|
"grad_norm": 0.05438841893764982,
|
|
"learning_rate": 0.0005933137347182838,
|
|
"loss": 2.4917,
|
|
"step": 21205
|
|
},
|
|
{
|
|
"epoch": 6.699360341151386,
|
|
"grad_norm": 0.06253126037078709,
|
|
"learning_rate": 0.0005928100124545355,
|
|
"loss": 2.3982,
|
|
"step": 21210
|
|
},
|
|
{
|
|
"epoch": 6.700939745715865,
|
|
"grad_norm": 0.06946817209182686,
|
|
"learning_rate": 0.0005923064140146602,
|
|
"loss": 2.3622,
|
|
"step": 21215
|
|
},
|
|
{
|
|
"epoch": 6.702519150280344,
|
|
"grad_norm": 0.055943700526094424,
|
|
"learning_rate": 0.0005918029395518001,
|
|
"loss": 2.432,
|
|
"step": 21220
|
|
},
|
|
{
|
|
"epoch": 6.7040985548448235,
|
|
"grad_norm": 0.0625940926110543,
|
|
"learning_rate": 0.0005912995892190578,
|
|
"loss": 2.4415,
|
|
"step": 21225
|
|
},
|
|
{
|
|
"epoch": 6.705677959409303,
|
|
"grad_norm": 0.05202327389180163,
|
|
"learning_rate": 0.0005907963631694993,
|
|
"loss": 2.4042,
|
|
"step": 21230
|
|
},
|
|
{
|
|
"epoch": 6.707257363973782,
|
|
"grad_norm": 0.07192885151510676,
|
|
"learning_rate": 0.0005902932615561524,
|
|
"loss": 2.4617,
|
|
"step": 21235
|
|
},
|
|
{
|
|
"epoch": 6.708836768538261,
|
|
"grad_norm": 0.05317858526574907,
|
|
"learning_rate": 0.0005897902845320064,
|
|
"loss": 2.4618,
|
|
"step": 21240
|
|
},
|
|
{
|
|
"epoch": 6.7104161731027405,
|
|
"grad_norm": 0.05493955034968431,
|
|
"learning_rate": 0.0005892874322500146,
|
|
"loss": 2.4495,
|
|
"step": 21245
|
|
},
|
|
{
|
|
"epoch": 6.71199557766722,
|
|
"grad_norm": 0.057507254717356474,
|
|
"learning_rate": 0.0005887847048630902,
|
|
"loss": 2.4012,
|
|
"step": 21250
|
|
},
|
|
{
|
|
"epoch": 6.713574982231699,
|
|
"grad_norm": 0.06348054251390015,
|
|
"learning_rate": 0.00058828210252411,
|
|
"loss": 2.4883,
|
|
"step": 21255
|
|
},
|
|
{
|
|
"epoch": 6.715154386796177,
|
|
"grad_norm": 0.05405229919959808,
|
|
"learning_rate": 0.0005877796253859118,
|
|
"loss": 2.4362,
|
|
"step": 21260
|
|
},
|
|
{
|
|
"epoch": 6.716733791360657,
|
|
"grad_norm": 0.06359025309672207,
|
|
"learning_rate": 0.0005872772736012955,
|
|
"loss": 2.4992,
|
|
"step": 21265
|
|
},
|
|
{
|
|
"epoch": 6.718313195925136,
|
|
"grad_norm": 0.05766892929360303,
|
|
"learning_rate": 0.0005867750473230235,
|
|
"loss": 2.5039,
|
|
"step": 21270
|
|
},
|
|
{
|
|
"epoch": 6.719892600489615,
|
|
"grad_norm": 0.05185065731909846,
|
|
"learning_rate": 0.0005862729467038195,
|
|
"loss": 2.4521,
|
|
"step": 21275
|
|
},
|
|
{
|
|
"epoch": 6.721472005054094,
|
|
"grad_norm": 0.0638969724670669,
|
|
"learning_rate": 0.000585770971896369,
|
|
"loss": 2.4547,
|
|
"step": 21280
|
|
},
|
|
{
|
|
"epoch": 6.723051409618574,
|
|
"grad_norm": 0.07557132807061367,
|
|
"learning_rate": 0.0005852691230533196,
|
|
"loss": 2.5275,
|
|
"step": 21285
|
|
},
|
|
{
|
|
"epoch": 6.724630814183053,
|
|
"grad_norm": 0.06444976114625953,
|
|
"learning_rate": 0.0005847674003272797,
|
|
"loss": 2.4865,
|
|
"step": 21290
|
|
},
|
|
{
|
|
"epoch": 6.726210218747532,
|
|
"grad_norm": 0.048931507948122496,
|
|
"learning_rate": 0.0005842658038708206,
|
|
"loss": 2.418,
|
|
"step": 21295
|
|
},
|
|
{
|
|
"epoch": 6.727789623312011,
|
|
"grad_norm": 0.059369553415906454,
|
|
"learning_rate": 0.0005837643338364744,
|
|
"loss": 2.3805,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 6.729369027876491,
|
|
"grad_norm": 0.05844950933586259,
|
|
"learning_rate": 0.0005832629903767345,
|
|
"loss": 2.402,
|
|
"step": 21305
|
|
},
|
|
{
|
|
"epoch": 6.73094843244097,
|
|
"grad_norm": 0.05893507399240853,
|
|
"learning_rate": 0.0005827617736440569,
|
|
"loss": 2.4748,
|
|
"step": 21310
|
|
},
|
|
{
|
|
"epoch": 6.732527837005449,
|
|
"grad_norm": 0.05084658590116852,
|
|
"learning_rate": 0.0005822606837908578,
|
|
"loss": 2.3668,
|
|
"step": 21315
|
|
},
|
|
{
|
|
"epoch": 6.734107241569928,
|
|
"grad_norm": 0.049818689273461586,
|
|
"learning_rate": 0.0005817597209695162,
|
|
"loss": 2.4421,
|
|
"step": 21320
|
|
},
|
|
{
|
|
"epoch": 6.735686646134408,
|
|
"grad_norm": 0.06560679436585973,
|
|
"learning_rate": 0.0005812588853323713,
|
|
"loss": 2.4364,
|
|
"step": 21325
|
|
},
|
|
{
|
|
"epoch": 6.737266050698887,
|
|
"grad_norm": 0.05177158420238849,
|
|
"learning_rate": 0.0005807581770317237,
|
|
"loss": 2.3318,
|
|
"step": 21330
|
|
},
|
|
{
|
|
"epoch": 6.738845455263366,
|
|
"grad_norm": 0.05844167575968277,
|
|
"learning_rate": 0.000580257596219836,
|
|
"loss": 2.5628,
|
|
"step": 21335
|
|
},
|
|
{
|
|
"epoch": 6.740424859827845,
|
|
"grad_norm": 0.05560042809757776,
|
|
"learning_rate": 0.0005797571430489311,
|
|
"loss": 2.4057,
|
|
"step": 21340
|
|
},
|
|
{
|
|
"epoch": 6.742004264392325,
|
|
"grad_norm": 0.055891772567053834,
|
|
"learning_rate": 0.0005792568176711944,
|
|
"loss": 2.4307,
|
|
"step": 21345
|
|
},
|
|
{
|
|
"epoch": 6.743583668956803,
|
|
"grad_norm": 0.05737511434738402,
|
|
"learning_rate": 0.0005787566202387713,
|
|
"loss": 2.4589,
|
|
"step": 21350
|
|
},
|
|
{
|
|
"epoch": 6.745163073521282,
|
|
"grad_norm": 0.0631938128075815,
|
|
"learning_rate": 0.000578256550903768,
|
|
"loss": 2.4153,
|
|
"step": 21355
|
|
},
|
|
{
|
|
"epoch": 6.746742478085761,
|
|
"grad_norm": 0.060357085037496414,
|
|
"learning_rate": 0.0005777566098182536,
|
|
"loss": 2.4025,
|
|
"step": 21360
|
|
},
|
|
{
|
|
"epoch": 6.748321882650241,
|
|
"grad_norm": 0.04874718966699242,
|
|
"learning_rate": 0.0005772567971342557,
|
|
"loss": 2.4402,
|
|
"step": 21365
|
|
},
|
|
{
|
|
"epoch": 6.74990128721472,
|
|
"grad_norm": 0.06512903170789627,
|
|
"learning_rate": 0.0005767571130037654,
|
|
"loss": 2.4672,
|
|
"step": 21370
|
|
},
|
|
{
|
|
"epoch": 6.751480691779199,
|
|
"grad_norm": 0.053667384264513535,
|
|
"learning_rate": 0.0005762575575787332,
|
|
"loss": 2.4265,
|
|
"step": 21375
|
|
},
|
|
{
|
|
"epoch": 6.753060096343678,
|
|
"grad_norm": 0.07297903970161429,
|
|
"learning_rate": 0.0005757581310110696,
|
|
"loss": 2.4725,
|
|
"step": 21380
|
|
},
|
|
{
|
|
"epoch": 6.754639500908158,
|
|
"grad_norm": 0.05872227870883123,
|
|
"learning_rate": 0.0005752588334526483,
|
|
"loss": 2.373,
|
|
"step": 21385
|
|
},
|
|
{
|
|
"epoch": 6.756218905472637,
|
|
"grad_norm": 0.06336969603677607,
|
|
"learning_rate": 0.0005747596650553019,
|
|
"loss": 2.3794,
|
|
"step": 21390
|
|
},
|
|
{
|
|
"epoch": 6.757798310037116,
|
|
"grad_norm": 0.06503276039389301,
|
|
"learning_rate": 0.000574260625970824,
|
|
"loss": 2.5187,
|
|
"step": 21395
|
|
},
|
|
{
|
|
"epoch": 6.759377714601595,
|
|
"grad_norm": 0.060933732085715314,
|
|
"learning_rate": 0.0005737617163509701,
|
|
"loss": 2.3795,
|
|
"step": 21400
|
|
},
|
|
{
|
|
"epoch": 6.760957119166075,
|
|
"grad_norm": 0.0605127703286891,
|
|
"learning_rate": 0.0005732629363474544,
|
|
"loss": 2.4924,
|
|
"step": 21405
|
|
},
|
|
{
|
|
"epoch": 6.762536523730554,
|
|
"grad_norm": 0.07433786962800859,
|
|
"learning_rate": 0.0005727642861119537,
|
|
"loss": 2.4789,
|
|
"step": 21410
|
|
},
|
|
{
|
|
"epoch": 6.764115928295032,
|
|
"grad_norm": 0.05739265169637819,
|
|
"learning_rate": 0.0005722657657961041,
|
|
"loss": 2.3773,
|
|
"step": 21415
|
|
},
|
|
{
|
|
"epoch": 6.7656953328595115,
|
|
"grad_norm": 0.05821728913037662,
|
|
"learning_rate": 0.000571767375551502,
|
|
"loss": 2.4953,
|
|
"step": 21420
|
|
},
|
|
{
|
|
"epoch": 6.767274737423991,
|
|
"grad_norm": 0.05726282173331931,
|
|
"learning_rate": 0.0005712691155297052,
|
|
"loss": 2.4107,
|
|
"step": 21425
|
|
},
|
|
{
|
|
"epoch": 6.76885414198847,
|
|
"grad_norm": 0.060644972908402456,
|
|
"learning_rate": 0.0005707709858822305,
|
|
"loss": 2.4309,
|
|
"step": 21430
|
|
},
|
|
{
|
|
"epoch": 6.770433546552949,
|
|
"grad_norm": 0.05875530299857965,
|
|
"learning_rate": 0.0005702729867605571,
|
|
"loss": 2.4803,
|
|
"step": 21435
|
|
},
|
|
{
|
|
"epoch": 6.7720129511174285,
|
|
"grad_norm": 0.07191803996274608,
|
|
"learning_rate": 0.0005697751183161228,
|
|
"loss": 2.4641,
|
|
"step": 21440
|
|
},
|
|
{
|
|
"epoch": 6.773592355681908,
|
|
"grad_norm": 0.05775390158910223,
|
|
"learning_rate": 0.0005692773807003257,
|
|
"loss": 2.4321,
|
|
"step": 21445
|
|
},
|
|
{
|
|
"epoch": 6.775171760246387,
|
|
"grad_norm": 0.05510099252873888,
|
|
"learning_rate": 0.0005687797740645257,
|
|
"loss": 2.3841,
|
|
"step": 21450
|
|
},
|
|
{
|
|
"epoch": 6.776751164810866,
|
|
"grad_norm": 0.05358811307984855,
|
|
"learning_rate": 0.0005682822985600409,
|
|
"loss": 2.4162,
|
|
"step": 21455
|
|
},
|
|
{
|
|
"epoch": 6.7783305693753455,
|
|
"grad_norm": 0.05802284171031527,
|
|
"learning_rate": 0.000567784954338151,
|
|
"loss": 2.4454,
|
|
"step": 21460
|
|
},
|
|
{
|
|
"epoch": 6.779909973939825,
|
|
"grad_norm": 0.0505949926437704,
|
|
"learning_rate": 0.0005672877415500956,
|
|
"loss": 2.3944,
|
|
"step": 21465
|
|
},
|
|
{
|
|
"epoch": 6.781489378504304,
|
|
"grad_norm": 0.05866143634554086,
|
|
"learning_rate": 0.0005667906603470723,
|
|
"loss": 2.3932,
|
|
"step": 21470
|
|
},
|
|
{
|
|
"epoch": 6.783068783068783,
|
|
"grad_norm": 0.05793737809646166,
|
|
"learning_rate": 0.000566293710880242,
|
|
"loss": 2.4064,
|
|
"step": 21475
|
|
},
|
|
{
|
|
"epoch": 6.7846481876332625,
|
|
"grad_norm": 0.0519726652750793,
|
|
"learning_rate": 0.0005657968933007227,
|
|
"loss": 2.4746,
|
|
"step": 21480
|
|
},
|
|
{
|
|
"epoch": 6.786227592197742,
|
|
"grad_norm": 0.058300041427298825,
|
|
"learning_rate": 0.0005653002077595944,
|
|
"loss": 2.4135,
|
|
"step": 21485
|
|
},
|
|
{
|
|
"epoch": 6.787806996762221,
|
|
"grad_norm": 0.05234486328538095,
|
|
"learning_rate": 0.0005648036544078954,
|
|
"loss": 2.3928,
|
|
"step": 21490
|
|
},
|
|
{
|
|
"epoch": 6.7893864013267,
|
|
"grad_norm": 0.055602872160708736,
|
|
"learning_rate": 0.0005643072333966242,
|
|
"loss": 2.4309,
|
|
"step": 21495
|
|
},
|
|
{
|
|
"epoch": 6.7909658058911795,
|
|
"grad_norm": 0.05173743221141718,
|
|
"learning_rate": 0.0005638109448767399,
|
|
"loss": 2.4736,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 6.792545210455658,
|
|
"grad_norm": 0.05907175588242061,
|
|
"learning_rate": 0.0005633147889991606,
|
|
"loss": 2.6019,
|
|
"step": 21505
|
|
},
|
|
{
|
|
"epoch": 6.794124615020137,
|
|
"grad_norm": 0.0615331667397914,
|
|
"learning_rate": 0.0005628187659147637,
|
|
"loss": 2.5359,
|
|
"step": 21510
|
|
},
|
|
{
|
|
"epoch": 6.795704019584616,
|
|
"grad_norm": 0.06127667387678013,
|
|
"learning_rate": 0.000562322875774387,
|
|
"loss": 2.4743,
|
|
"step": 21515
|
|
},
|
|
{
|
|
"epoch": 6.797283424149096,
|
|
"grad_norm": 0.10062733082611985,
|
|
"learning_rate": 0.0005618271187288269,
|
|
"loss": 2.3991,
|
|
"step": 21520
|
|
},
|
|
{
|
|
"epoch": 6.798862828713575,
|
|
"grad_norm": 0.0649846458859756,
|
|
"learning_rate": 0.0005613314949288408,
|
|
"loss": 2.4433,
|
|
"step": 21525
|
|
},
|
|
{
|
|
"epoch": 6.800442233278054,
|
|
"grad_norm": 0.05435369272893038,
|
|
"learning_rate": 0.0005608360045251445,
|
|
"loss": 2.4667,
|
|
"step": 21530
|
|
},
|
|
{
|
|
"epoch": 6.802021637842533,
|
|
"grad_norm": 0.05969015357945196,
|
|
"learning_rate": 0.0005603406476684128,
|
|
"loss": 2.4626,
|
|
"step": 21535
|
|
},
|
|
{
|
|
"epoch": 6.803601042407013,
|
|
"grad_norm": 0.05751701647723755,
|
|
"learning_rate": 0.0005598454245092816,
|
|
"loss": 2.4328,
|
|
"step": 21540
|
|
},
|
|
{
|
|
"epoch": 6.805180446971492,
|
|
"grad_norm": 0.04962348059479472,
|
|
"learning_rate": 0.0005593503351983441,
|
|
"loss": 2.36,
|
|
"step": 21545
|
|
},
|
|
{
|
|
"epoch": 6.806759851535971,
|
|
"grad_norm": 0.057983305152976485,
|
|
"learning_rate": 0.0005588553798861547,
|
|
"loss": 2.379,
|
|
"step": 21550
|
|
},
|
|
{
|
|
"epoch": 6.80833925610045,
|
|
"grad_norm": 0.049851774150790285,
|
|
"learning_rate": 0.0005583605587232261,
|
|
"loss": 2.3591,
|
|
"step": 21555
|
|
},
|
|
{
|
|
"epoch": 6.80991866066493,
|
|
"grad_norm": 0.06148293087382803,
|
|
"learning_rate": 0.0005578658718600291,
|
|
"loss": 2.346,
|
|
"step": 21560
|
|
},
|
|
{
|
|
"epoch": 6.811498065229409,
|
|
"grad_norm": 0.0607279741352406,
|
|
"learning_rate": 0.0005573713194469961,
|
|
"loss": 2.4491,
|
|
"step": 21565
|
|
},
|
|
{
|
|
"epoch": 6.813077469793888,
|
|
"grad_norm": 0.0611488176613782,
|
|
"learning_rate": 0.0005568769016345162,
|
|
"loss": 2.4978,
|
|
"step": 21570
|
|
},
|
|
{
|
|
"epoch": 6.814656874358366,
|
|
"grad_norm": 0.06097889717255455,
|
|
"learning_rate": 0.0005563826185729398,
|
|
"loss": 2.3682,
|
|
"step": 21575
|
|
},
|
|
{
|
|
"epoch": 6.816236278922846,
|
|
"grad_norm": 0.05115123269234018,
|
|
"learning_rate": 0.0005558884704125748,
|
|
"loss": 2.3969,
|
|
"step": 21580
|
|
},
|
|
{
|
|
"epoch": 6.817815683487325,
|
|
"grad_norm": 0.07194913074065468,
|
|
"learning_rate": 0.0005553944573036879,
|
|
"loss": 2.3681,
|
|
"step": 21585
|
|
},
|
|
{
|
|
"epoch": 6.819395088051804,
|
|
"grad_norm": 0.06114575712306162,
|
|
"learning_rate": 0.0005549005793965065,
|
|
"loss": 2.4123,
|
|
"step": 21590
|
|
},
|
|
{
|
|
"epoch": 6.820974492616283,
|
|
"grad_norm": 0.05394160080735589,
|
|
"learning_rate": 0.0005544068368412149,
|
|
"loss": 2.3875,
|
|
"step": 21595
|
|
},
|
|
{
|
|
"epoch": 6.822553897180763,
|
|
"grad_norm": 0.06470531739214352,
|
|
"learning_rate": 0.0005539132297879574,
|
|
"loss": 2.497,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 6.824133301745242,
|
|
"grad_norm": 0.051509753550531084,
|
|
"learning_rate": 0.0005534197583868366,
|
|
"loss": 2.417,
|
|
"step": 21605
|
|
},
|
|
{
|
|
"epoch": 6.825712706309721,
|
|
"grad_norm": 0.051051927926556565,
|
|
"learning_rate": 0.0005529264227879134,
|
|
"loss": 2.4835,
|
|
"step": 21610
|
|
},
|
|
{
|
|
"epoch": 6.8272921108742,
|
|
"grad_norm": 0.047564532608101465,
|
|
"learning_rate": 0.000552433223141209,
|
|
"loss": 2.4317,
|
|
"step": 21615
|
|
},
|
|
{
|
|
"epoch": 6.82887151543868,
|
|
"grad_norm": 0.05734664728392672,
|
|
"learning_rate": 0.0005519401595967021,
|
|
"loss": 2.3542,
|
|
"step": 21620
|
|
},
|
|
{
|
|
"epoch": 6.830450920003159,
|
|
"grad_norm": 0.0672852984035047,
|
|
"learning_rate": 0.0005514472323043294,
|
|
"loss": 2.466,
|
|
"step": 21625
|
|
},
|
|
{
|
|
"epoch": 6.832030324567638,
|
|
"grad_norm": 0.05435036293623683,
|
|
"learning_rate": 0.0005509544414139878,
|
|
"loss": 2.3773,
|
|
"step": 21630
|
|
},
|
|
{
|
|
"epoch": 6.833609729132117,
|
|
"grad_norm": 0.06367650525566754,
|
|
"learning_rate": 0.0005504617870755313,
|
|
"loss": 2.4725,
|
|
"step": 21635
|
|
},
|
|
{
|
|
"epoch": 6.835189133696597,
|
|
"grad_norm": 0.06673691388756689,
|
|
"learning_rate": 0.0005499692694387735,
|
|
"loss": 2.4828,
|
|
"step": 21640
|
|
},
|
|
{
|
|
"epoch": 6.836768538261076,
|
|
"grad_norm": 0.05827313896785369,
|
|
"learning_rate": 0.0005494768886534858,
|
|
"loss": 2.3298,
|
|
"step": 21645
|
|
},
|
|
{
|
|
"epoch": 6.838347942825555,
|
|
"grad_norm": 0.0536158853716576,
|
|
"learning_rate": 0.0005489846448693971,
|
|
"loss": 2.4936,
|
|
"step": 21650
|
|
},
|
|
{
|
|
"epoch": 6.839927347390034,
|
|
"grad_norm": 0.05404584286993024,
|
|
"learning_rate": 0.0005484925382361967,
|
|
"loss": 2.3928,
|
|
"step": 21655
|
|
},
|
|
{
|
|
"epoch": 6.841506751954514,
|
|
"grad_norm": 0.05579403311119814,
|
|
"learning_rate": 0.0005480005689035303,
|
|
"loss": 2.3985,
|
|
"step": 21660
|
|
},
|
|
{
|
|
"epoch": 6.843086156518992,
|
|
"grad_norm": 0.04999571171378245,
|
|
"learning_rate": 0.0005475087370210032,
|
|
"loss": 2.4438,
|
|
"step": 21665
|
|
},
|
|
{
|
|
"epoch": 6.844665561083471,
|
|
"grad_norm": 0.05422807783375237,
|
|
"learning_rate": 0.0005470170427381782,
|
|
"loss": 2.4761,
|
|
"step": 21670
|
|
},
|
|
{
|
|
"epoch": 6.8462449656479505,
|
|
"grad_norm": 0.05942475947479741,
|
|
"learning_rate": 0.0005465254862045761,
|
|
"loss": 2.4057,
|
|
"step": 21675
|
|
},
|
|
{
|
|
"epoch": 6.84782437021243,
|
|
"grad_norm": 0.06514406946071337,
|
|
"learning_rate": 0.0005460340675696766,
|
|
"loss": 2.4166,
|
|
"step": 21680
|
|
},
|
|
{
|
|
"epoch": 6.849403774776909,
|
|
"grad_norm": 0.05360057779921819,
|
|
"learning_rate": 0.0005455427869829166,
|
|
"loss": 2.5401,
|
|
"step": 21685
|
|
},
|
|
{
|
|
"epoch": 6.850983179341388,
|
|
"grad_norm": 0.06089804877013015,
|
|
"learning_rate": 0.0005450516445936915,
|
|
"loss": 2.3797,
|
|
"step": 21690
|
|
},
|
|
{
|
|
"epoch": 6.8525625839058675,
|
|
"grad_norm": 0.05693908928925304,
|
|
"learning_rate": 0.0005445606405513546,
|
|
"loss": 2.4941,
|
|
"step": 21695
|
|
},
|
|
{
|
|
"epoch": 6.854141988470347,
|
|
"grad_norm": 0.0707655381654669,
|
|
"learning_rate": 0.0005440697750052166,
|
|
"loss": 2.357,
|
|
"step": 21700
|
|
},
|
|
{
|
|
"epoch": 6.855721393034826,
|
|
"grad_norm": 0.05577309405341037,
|
|
"learning_rate": 0.0005435790481045473,
|
|
"loss": 2.5022,
|
|
"step": 21705
|
|
},
|
|
{
|
|
"epoch": 6.857300797599305,
|
|
"grad_norm": 0.05945130649107957,
|
|
"learning_rate": 0.0005430884599985731,
|
|
"loss": 2.4243,
|
|
"step": 21710
|
|
},
|
|
{
|
|
"epoch": 6.8588802021637845,
|
|
"grad_norm": 0.059499421774360954,
|
|
"learning_rate": 0.0005425980108364793,
|
|
"loss": 2.4342,
|
|
"step": 21715
|
|
},
|
|
{
|
|
"epoch": 6.860459606728264,
|
|
"grad_norm": 0.06518694486381847,
|
|
"learning_rate": 0.0005421077007674079,
|
|
"loss": 2.4241,
|
|
"step": 21720
|
|
},
|
|
{
|
|
"epoch": 6.862039011292743,
|
|
"grad_norm": 0.051394730635343784,
|
|
"learning_rate": 0.0005416175299404588,
|
|
"loss": 2.4587,
|
|
"step": 21725
|
|
},
|
|
{
|
|
"epoch": 6.863618415857222,
|
|
"grad_norm": 0.045408359711962304,
|
|
"learning_rate": 0.0005411274985046905,
|
|
"loss": 2.3355,
|
|
"step": 21730
|
|
},
|
|
{
|
|
"epoch": 6.865197820421701,
|
|
"grad_norm": 0.0513470116736704,
|
|
"learning_rate": 0.0005406376066091186,
|
|
"loss": 2.4024,
|
|
"step": 21735
|
|
},
|
|
{
|
|
"epoch": 6.86677722498618,
|
|
"grad_norm": 0.05063925633979523,
|
|
"learning_rate": 0.0005401478544027145,
|
|
"loss": 2.4745,
|
|
"step": 21740
|
|
},
|
|
{
|
|
"epoch": 6.868356629550659,
|
|
"grad_norm": 0.07104281738141084,
|
|
"learning_rate": 0.0005396582420344105,
|
|
"loss": 2.343,
|
|
"step": 21745
|
|
},
|
|
{
|
|
"epoch": 6.869936034115138,
|
|
"grad_norm": 0.059455373567313806,
|
|
"learning_rate": 0.0005391687696530933,
|
|
"loss": 2.4954,
|
|
"step": 21750
|
|
},
|
|
{
|
|
"epoch": 6.871515438679618,
|
|
"grad_norm": 0.06096793387466837,
|
|
"learning_rate": 0.0005386794374076095,
|
|
"loss": 2.3438,
|
|
"step": 21755
|
|
},
|
|
{
|
|
"epoch": 6.873094843244097,
|
|
"grad_norm": 0.054379783576155465,
|
|
"learning_rate": 0.0005381902454467612,
|
|
"loss": 2.4693,
|
|
"step": 21760
|
|
},
|
|
{
|
|
"epoch": 6.874674247808576,
|
|
"grad_norm": 0.05652207840644949,
|
|
"learning_rate": 0.0005377011939193084,
|
|
"loss": 2.5034,
|
|
"step": 21765
|
|
},
|
|
{
|
|
"epoch": 6.876253652373055,
|
|
"grad_norm": 0.06233177406755634,
|
|
"learning_rate": 0.0005372122829739689,
|
|
"loss": 2.3965,
|
|
"step": 21770
|
|
},
|
|
{
|
|
"epoch": 6.8778330569375346,
|
|
"grad_norm": 0.06609802159000204,
|
|
"learning_rate": 0.0005367235127594176,
|
|
"loss": 2.4501,
|
|
"step": 21775
|
|
},
|
|
{
|
|
"epoch": 6.879412461502014,
|
|
"grad_norm": 0.06125662370101596,
|
|
"learning_rate": 0.0005362348834242861,
|
|
"loss": 2.3855,
|
|
"step": 21780
|
|
},
|
|
{
|
|
"epoch": 6.880991866066493,
|
|
"grad_norm": 0.06099320925783692,
|
|
"learning_rate": 0.0005357463951171635,
|
|
"loss": 2.3108,
|
|
"step": 21785
|
|
},
|
|
{
|
|
"epoch": 6.882571270630972,
|
|
"grad_norm": 0.058472540308738864,
|
|
"learning_rate": 0.0005352580479865954,
|
|
"loss": 2.3968,
|
|
"step": 21790
|
|
},
|
|
{
|
|
"epoch": 6.8841506751954515,
|
|
"grad_norm": 0.06232962054138482,
|
|
"learning_rate": 0.0005347698421810861,
|
|
"loss": 2.3888,
|
|
"step": 21795
|
|
},
|
|
{
|
|
"epoch": 6.885730079759931,
|
|
"grad_norm": 0.06477288485131953,
|
|
"learning_rate": 0.000534281777849095,
|
|
"loss": 2.4474,
|
|
"step": 21800
|
|
},
|
|
{
|
|
"epoch": 6.88730948432441,
|
|
"grad_norm": 0.05873390973067887,
|
|
"learning_rate": 0.0005337938551390398,
|
|
"loss": 2.4788,
|
|
"step": 21805
|
|
},
|
|
{
|
|
"epoch": 6.888888888888889,
|
|
"grad_norm": 0.060654604539577196,
|
|
"learning_rate": 0.0005333060741992949,
|
|
"loss": 2.4519,
|
|
"step": 21810
|
|
},
|
|
{
|
|
"epoch": 6.8904682934533685,
|
|
"grad_norm": 0.05639692992085628,
|
|
"learning_rate": 0.0005328184351781905,
|
|
"loss": 2.3953,
|
|
"step": 21815
|
|
},
|
|
{
|
|
"epoch": 6.892047698017847,
|
|
"grad_norm": 0.053266466338713084,
|
|
"learning_rate": 0.0005323309382240155,
|
|
"loss": 2.3888,
|
|
"step": 21820
|
|
},
|
|
{
|
|
"epoch": 6.893627102582326,
|
|
"grad_norm": 0.05800361122482948,
|
|
"learning_rate": 0.0005318435834850142,
|
|
"loss": 2.3813,
|
|
"step": 21825
|
|
},
|
|
{
|
|
"epoch": 6.895206507146805,
|
|
"grad_norm": 0.06586588164894695,
|
|
"learning_rate": 0.000531356371109388,
|
|
"loss": 2.3707,
|
|
"step": 21830
|
|
},
|
|
{
|
|
"epoch": 6.896785911711285,
|
|
"grad_norm": 0.05851260838880374,
|
|
"learning_rate": 0.000530869301245295,
|
|
"loss": 2.424,
|
|
"step": 21835
|
|
},
|
|
{
|
|
"epoch": 6.898365316275764,
|
|
"grad_norm": 0.06209200802669711,
|
|
"learning_rate": 0.0005303823740408499,
|
|
"loss": 2.3685,
|
|
"step": 21840
|
|
},
|
|
{
|
|
"epoch": 6.899944720840243,
|
|
"grad_norm": 0.05286685692262011,
|
|
"learning_rate": 0.0005298955896441246,
|
|
"loss": 2.2661,
|
|
"step": 21845
|
|
},
|
|
{
|
|
"epoch": 6.901524125404722,
|
|
"grad_norm": 0.05713877322038269,
|
|
"learning_rate": 0.0005294089482031471,
|
|
"loss": 2.3312,
|
|
"step": 21850
|
|
},
|
|
{
|
|
"epoch": 6.903103529969202,
|
|
"grad_norm": 0.05528321789315224,
|
|
"learning_rate": 0.0005289224498659013,
|
|
"loss": 2.4055,
|
|
"step": 21855
|
|
},
|
|
{
|
|
"epoch": 6.904682934533681,
|
|
"grad_norm": 0.06110530953581327,
|
|
"learning_rate": 0.0005284360947803291,
|
|
"loss": 2.3902,
|
|
"step": 21860
|
|
},
|
|
{
|
|
"epoch": 6.90626233909816,
|
|
"grad_norm": 0.05159472647040717,
|
|
"learning_rate": 0.0005279498830943275,
|
|
"loss": 2.3989,
|
|
"step": 21865
|
|
},
|
|
{
|
|
"epoch": 6.907841743662639,
|
|
"grad_norm": 0.05335318785652728,
|
|
"learning_rate": 0.0005274638149557505,
|
|
"loss": 2.5193,
|
|
"step": 21870
|
|
},
|
|
{
|
|
"epoch": 6.909421148227119,
|
|
"grad_norm": 0.056920508642124584,
|
|
"learning_rate": 0.0005269778905124082,
|
|
"loss": 2.485,
|
|
"step": 21875
|
|
},
|
|
{
|
|
"epoch": 6.911000552791598,
|
|
"grad_norm": 0.06303317804323633,
|
|
"learning_rate": 0.0005264921099120668,
|
|
"loss": 2.3776,
|
|
"step": 21880
|
|
},
|
|
{
|
|
"epoch": 6.912579957356077,
|
|
"grad_norm": 0.059305948269867445,
|
|
"learning_rate": 0.0005260064733024498,
|
|
"loss": 2.4129,
|
|
"step": 21885
|
|
},
|
|
{
|
|
"epoch": 6.9141593619205555,
|
|
"grad_norm": 0.058581759702154644,
|
|
"learning_rate": 0.0005255209808312356,
|
|
"loss": 2.4401,
|
|
"step": 21890
|
|
},
|
|
{
|
|
"epoch": 6.915738766485035,
|
|
"grad_norm": 0.05312168898445783,
|
|
"learning_rate": 0.0005250356326460599,
|
|
"loss": 2.4257,
|
|
"step": 21895
|
|
},
|
|
{
|
|
"epoch": 6.917318171049514,
|
|
"grad_norm": 0.05630831885265081,
|
|
"learning_rate": 0.0005245504288945137,
|
|
"loss": 2.4652,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 6.918897575613993,
|
|
"grad_norm": 0.06559146434257056,
|
|
"learning_rate": 0.0005240653697241439,
|
|
"loss": 2.4492,
|
|
"step": 21905
|
|
},
|
|
{
|
|
"epoch": 6.9204769801784725,
|
|
"grad_norm": 0.06018418584465903,
|
|
"learning_rate": 0.0005235804552824548,
|
|
"loss": 2.3373,
|
|
"step": 21910
|
|
},
|
|
{
|
|
"epoch": 6.922056384742952,
|
|
"grad_norm": 0.05290805452225749,
|
|
"learning_rate": 0.0005230956857169051,
|
|
"loss": 2.3979,
|
|
"step": 21915
|
|
},
|
|
{
|
|
"epoch": 6.923635789307431,
|
|
"grad_norm": 0.05688352863619254,
|
|
"learning_rate": 0.0005226110611749106,
|
|
"loss": 2.4009,
|
|
"step": 21920
|
|
},
|
|
{
|
|
"epoch": 6.92521519387191,
|
|
"grad_norm": 0.062045953503586715,
|
|
"learning_rate": 0.0005221265818038422,
|
|
"loss": 2.5034,
|
|
"step": 21925
|
|
},
|
|
{
|
|
"epoch": 6.9267945984363894,
|
|
"grad_norm": 0.06701488553948802,
|
|
"learning_rate": 0.0005216422477510266,
|
|
"loss": 2.4982,
|
|
"step": 21930
|
|
},
|
|
{
|
|
"epoch": 6.928374003000869,
|
|
"grad_norm": 0.06656038450198463,
|
|
"learning_rate": 0.0005211580591637477,
|
|
"loss": 2.4535,
|
|
"step": 21935
|
|
},
|
|
{
|
|
"epoch": 6.929953407565348,
|
|
"grad_norm": 0.05927357694315514,
|
|
"learning_rate": 0.0005206740161892431,
|
|
"loss": 2.4145,
|
|
"step": 21940
|
|
},
|
|
{
|
|
"epoch": 6.931532812129827,
|
|
"grad_norm": 0.06324530362062546,
|
|
"learning_rate": 0.000520190118974708,
|
|
"loss": 2.4619,
|
|
"step": 21945
|
|
},
|
|
{
|
|
"epoch": 6.933112216694306,
|
|
"grad_norm": 0.06475746568052063,
|
|
"learning_rate": 0.0005197063676672922,
|
|
"loss": 2.4084,
|
|
"step": 21950
|
|
},
|
|
{
|
|
"epoch": 6.934691621258786,
|
|
"grad_norm": 0.05339050369592582,
|
|
"learning_rate": 0.0005192227624141014,
|
|
"loss": 2.4382,
|
|
"step": 21955
|
|
},
|
|
{
|
|
"epoch": 6.936271025823265,
|
|
"grad_norm": 0.05058452813668629,
|
|
"learning_rate": 0.0005187393033621966,
|
|
"loss": 2.4678,
|
|
"step": 21960
|
|
},
|
|
{
|
|
"epoch": 6.937850430387744,
|
|
"grad_norm": 0.06376407480731965,
|
|
"learning_rate": 0.000518255990658595,
|
|
"loss": 2.5248,
|
|
"step": 21965
|
|
},
|
|
{
|
|
"epoch": 6.939429834952223,
|
|
"grad_norm": 0.06208593674054495,
|
|
"learning_rate": 0.0005177728244502681,
|
|
"loss": 2.3976,
|
|
"step": 21970
|
|
},
|
|
{
|
|
"epoch": 6.941009239516703,
|
|
"grad_norm": 0.05444347232961063,
|
|
"learning_rate": 0.0005172898048841448,
|
|
"loss": 2.4416,
|
|
"step": 21975
|
|
},
|
|
{
|
|
"epoch": 6.942588644081181,
|
|
"grad_norm": 0.05412349409442446,
|
|
"learning_rate": 0.0005168069321071072,
|
|
"loss": 2.4072,
|
|
"step": 21980
|
|
},
|
|
{
|
|
"epoch": 6.94416804864566,
|
|
"grad_norm": 0.061800130782726656,
|
|
"learning_rate": 0.0005163242062659947,
|
|
"loss": 2.4059,
|
|
"step": 21985
|
|
},
|
|
{
|
|
"epoch": 6.9457474532101395,
|
|
"grad_norm": 0.06225518916268915,
|
|
"learning_rate": 0.000515841627507601,
|
|
"loss": 2.4214,
|
|
"step": 21990
|
|
},
|
|
{
|
|
"epoch": 6.947326857774619,
|
|
"grad_norm": 0.05028043840864827,
|
|
"learning_rate": 0.0005153591959786744,
|
|
"loss": 2.3807,
|
|
"step": 21995
|
|
},
|
|
{
|
|
"epoch": 6.948906262339098,
|
|
"grad_norm": 0.06280098731901271,
|
|
"learning_rate": 0.0005148769118259204,
|
|
"loss": 2.4558,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 6.950485666903577,
|
|
"grad_norm": 0.0545790632896203,
|
|
"learning_rate": 0.0005143947751959978,
|
|
"loss": 2.3941,
|
|
"step": 22005
|
|
},
|
|
{
|
|
"epoch": 6.9520650714680565,
|
|
"grad_norm": 0.05423863822796326,
|
|
"learning_rate": 0.0005139127862355215,
|
|
"loss": 2.4085,
|
|
"step": 22010
|
|
},
|
|
{
|
|
"epoch": 6.953644476032536,
|
|
"grad_norm": 0.05854777490309802,
|
|
"learning_rate": 0.0005134309450910612,
|
|
"loss": 2.3779,
|
|
"step": 22015
|
|
},
|
|
{
|
|
"epoch": 6.955223880597015,
|
|
"grad_norm": 0.06203241290016519,
|
|
"learning_rate": 0.0005129492519091414,
|
|
"loss": 2.3778,
|
|
"step": 22020
|
|
},
|
|
{
|
|
"epoch": 6.956803285161494,
|
|
"grad_norm": 0.06108661630820922,
|
|
"learning_rate": 0.0005124677068362427,
|
|
"loss": 2.3823,
|
|
"step": 22025
|
|
},
|
|
{
|
|
"epoch": 6.9583826897259735,
|
|
"grad_norm": 0.051174743117047304,
|
|
"learning_rate": 0.0005119863100187989,
|
|
"loss": 2.3984,
|
|
"step": 22030
|
|
},
|
|
{
|
|
"epoch": 6.959962094290453,
|
|
"grad_norm": 0.0583678329545391,
|
|
"learning_rate": 0.0005115050616032006,
|
|
"loss": 2.4062,
|
|
"step": 22035
|
|
},
|
|
{
|
|
"epoch": 6.961541498854932,
|
|
"grad_norm": 0.050732585539663234,
|
|
"learning_rate": 0.0005110239617357921,
|
|
"loss": 2.4515,
|
|
"step": 22040
|
|
},
|
|
{
|
|
"epoch": 6.963120903419411,
|
|
"grad_norm": 0.06316805828329164,
|
|
"learning_rate": 0.0005105430105628725,
|
|
"loss": 2.415,
|
|
"step": 22045
|
|
},
|
|
{
|
|
"epoch": 6.96470030798389,
|
|
"grad_norm": 0.05826522378664329,
|
|
"learning_rate": 0.0005100622082306964,
|
|
"loss": 2.4071,
|
|
"step": 22050
|
|
},
|
|
{
|
|
"epoch": 6.966279712548369,
|
|
"grad_norm": 0.06024407186568456,
|
|
"learning_rate": 0.0005095815548854718,
|
|
"loss": 2.4042,
|
|
"step": 22055
|
|
},
|
|
{
|
|
"epoch": 6.967859117112848,
|
|
"grad_norm": 0.05240525392795767,
|
|
"learning_rate": 0.0005091010506733637,
|
|
"loss": 2.3804,
|
|
"step": 22060
|
|
},
|
|
{
|
|
"epoch": 6.969438521677327,
|
|
"grad_norm": 0.050460358002704216,
|
|
"learning_rate": 0.0005086206957404895,
|
|
"loss": 2.3951,
|
|
"step": 22065
|
|
},
|
|
{
|
|
"epoch": 6.971017926241807,
|
|
"grad_norm": 0.059198426627816145,
|
|
"learning_rate": 0.0005081404902329219,
|
|
"loss": 2.4172,
|
|
"step": 22070
|
|
},
|
|
{
|
|
"epoch": 6.972597330806286,
|
|
"grad_norm": 0.07203729921772287,
|
|
"learning_rate": 0.0005076604342966888,
|
|
"loss": 2.4817,
|
|
"step": 22075
|
|
},
|
|
{
|
|
"epoch": 6.974176735370765,
|
|
"grad_norm": 0.054764474869055016,
|
|
"learning_rate": 0.0005071805280777721,
|
|
"loss": 2.4372,
|
|
"step": 22080
|
|
},
|
|
{
|
|
"epoch": 6.975756139935244,
|
|
"grad_norm": 0.06859694147601073,
|
|
"learning_rate": 0.0005067007717221078,
|
|
"loss": 2.4975,
|
|
"step": 22085
|
|
},
|
|
{
|
|
"epoch": 6.977335544499724,
|
|
"grad_norm": 0.05521051080090544,
|
|
"learning_rate": 0.0005062211653755874,
|
|
"loss": 2.4881,
|
|
"step": 22090
|
|
},
|
|
{
|
|
"epoch": 6.978914949064203,
|
|
"grad_norm": 0.0640812527980952,
|
|
"learning_rate": 0.0005057417091840558,
|
|
"loss": 2.405,
|
|
"step": 22095
|
|
},
|
|
{
|
|
"epoch": 6.980494353628682,
|
|
"grad_norm": 0.0802579141078725,
|
|
"learning_rate": 0.0005052624032933124,
|
|
"loss": 2.3667,
|
|
"step": 22100
|
|
},
|
|
{
|
|
"epoch": 6.982073758193161,
|
|
"grad_norm": 0.0676106499145331,
|
|
"learning_rate": 0.0005047832478491112,
|
|
"loss": 2.4645,
|
|
"step": 22105
|
|
},
|
|
{
|
|
"epoch": 6.983653162757641,
|
|
"grad_norm": 0.057582598854102246,
|
|
"learning_rate": 0.0005043042429971601,
|
|
"loss": 2.4041,
|
|
"step": 22110
|
|
},
|
|
{
|
|
"epoch": 6.98523256732212,
|
|
"grad_norm": 0.0642687758267521,
|
|
"learning_rate": 0.000503825388883122,
|
|
"loss": 2.5206,
|
|
"step": 22115
|
|
},
|
|
{
|
|
"epoch": 6.986811971886599,
|
|
"grad_norm": 0.05943948002624905,
|
|
"learning_rate": 0.0005033466856526123,
|
|
"loss": 2.5057,
|
|
"step": 22120
|
|
},
|
|
{
|
|
"epoch": 6.988391376451078,
|
|
"grad_norm": 0.061296324638803454,
|
|
"learning_rate": 0.0005028681334512028,
|
|
"loss": 2.5147,
|
|
"step": 22125
|
|
},
|
|
{
|
|
"epoch": 6.989970781015558,
|
|
"grad_norm": 0.06645640402884086,
|
|
"learning_rate": 0.0005023897324244178,
|
|
"loss": 2.4797,
|
|
"step": 22130
|
|
},
|
|
{
|
|
"epoch": 6.991550185580037,
|
|
"grad_norm": 0.05908400342848221,
|
|
"learning_rate": 0.0005019114827177358,
|
|
"loss": 2.3979,
|
|
"step": 22135
|
|
},
|
|
{
|
|
"epoch": 6.993129590144515,
|
|
"grad_norm": 0.051582964000261286,
|
|
"learning_rate": 0.0005014333844765895,
|
|
"loss": 2.4004,
|
|
"step": 22140
|
|
},
|
|
{
|
|
"epoch": 6.994708994708994,
|
|
"grad_norm": 0.058922640607268637,
|
|
"learning_rate": 0.0005009554378463653,
|
|
"loss": 2.4946,
|
|
"step": 22145
|
|
},
|
|
{
|
|
"epoch": 6.996288399273474,
|
|
"grad_norm": 0.05257449063779885,
|
|
"learning_rate": 0.0005004776429724041,
|
|
"loss": 2.5856,
|
|
"step": 22150
|
|
},
|
|
{
|
|
"epoch": 6.997867803837953,
|
|
"grad_norm": 0.05202237087799441,
|
|
"learning_rate": 0.0005000000000000002,
|
|
"loss": 2.4001,
|
|
"step": 22155
|
|
},
|
|
{
|
|
"epoch": 6.999447208402432,
|
|
"grad_norm": 0.06504574622504392,
|
|
"learning_rate": 0.0004995225090744013,
|
|
"loss": 2.3885,
|
|
"step": 22160
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"eval_loss": 2.4251391887664795,
|
|
"eval_runtime": 118.8395,
|
|
"eval_samples_per_second": 22.291,
|
|
"eval_steps_per_second": 5.579,
|
|
"step": 22162
|
|
},
|
|
{
|
|
"epoch": 7.000947642738687,
|
|
"grad_norm": 0.062346395048555915,
|
|
"learning_rate": 0.0004990451703408103,
|
|
"loss": 2.4199,
|
|
"step": 22165
|
|
},
|
|
{
|
|
"epoch": 7.002527047303166,
|
|
"grad_norm": 0.07037473965441808,
|
|
"learning_rate": 0.0004985679839443818,
|
|
"loss": 2.4788,
|
|
"step": 22170
|
|
},
|
|
{
|
|
"epoch": 7.004106451867646,
|
|
"grad_norm": 0.06285878195523269,
|
|
"learning_rate": 0.0004980909500302261,
|
|
"loss": 2.4508,
|
|
"step": 22175
|
|
},
|
|
{
|
|
"epoch": 7.005685856432125,
|
|
"grad_norm": 0.05917472863936181,
|
|
"learning_rate": 0.0004976140687434057,
|
|
"loss": 2.3731,
|
|
"step": 22180
|
|
},
|
|
{
|
|
"epoch": 7.007265260996604,
|
|
"grad_norm": 0.048792163797312715,
|
|
"learning_rate": 0.0004971373402289371,
|
|
"loss": 2.3495,
|
|
"step": 22185
|
|
},
|
|
{
|
|
"epoch": 7.008844665561083,
|
|
"grad_norm": 0.05213109528685697,
|
|
"learning_rate": 0.0004966607646317905,
|
|
"loss": 2.3474,
|
|
"step": 22190
|
|
},
|
|
{
|
|
"epoch": 7.010424070125563,
|
|
"grad_norm": 0.0781050242157072,
|
|
"learning_rate": 0.0004961843420968894,
|
|
"loss": 2.4575,
|
|
"step": 22195
|
|
},
|
|
{
|
|
"epoch": 7.012003474690042,
|
|
"grad_norm": 0.06626379813972594,
|
|
"learning_rate": 0.0004957080727691107,
|
|
"loss": 2.4007,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 7.013582879254521,
|
|
"grad_norm": 0.06270721111555298,
|
|
"learning_rate": 0.0004952319567932853,
|
|
"loss": 2.3663,
|
|
"step": 22205
|
|
},
|
|
{
|
|
"epoch": 7.015162283819,
|
|
"grad_norm": 0.06311121786415576,
|
|
"learning_rate": 0.0004947559943141963,
|
|
"loss": 2.4322,
|
|
"step": 22210
|
|
},
|
|
{
|
|
"epoch": 7.01674168838348,
|
|
"grad_norm": 0.06659528685910747,
|
|
"learning_rate": 0.000494280185476582,
|
|
"loss": 2.4943,
|
|
"step": 22215
|
|
},
|
|
{
|
|
"epoch": 7.018321092947959,
|
|
"grad_norm": 0.06299817520987319,
|
|
"learning_rate": 0.0004938045304251318,
|
|
"loss": 2.4519,
|
|
"step": 22220
|
|
},
|
|
{
|
|
"epoch": 7.019900497512438,
|
|
"grad_norm": 0.05606478423175714,
|
|
"learning_rate": 0.00049332902930449,
|
|
"loss": 2.4653,
|
|
"step": 22225
|
|
},
|
|
{
|
|
"epoch": 7.021479902076917,
|
|
"grad_norm": 0.07214472192138412,
|
|
"learning_rate": 0.0004928536822592531,
|
|
"loss": 2.3667,
|
|
"step": 22230
|
|
},
|
|
{
|
|
"epoch": 7.023059306641396,
|
|
"grad_norm": 0.07541747642426318,
|
|
"learning_rate": 0.0004923784894339708,
|
|
"loss": 2.3852,
|
|
"step": 22235
|
|
},
|
|
{
|
|
"epoch": 7.024638711205875,
|
|
"grad_norm": 0.08278790926232245,
|
|
"learning_rate": 0.000491903450973147,
|
|
"loss": 2.4173,
|
|
"step": 22240
|
|
},
|
|
{
|
|
"epoch": 7.026218115770354,
|
|
"grad_norm": 0.058936614736605816,
|
|
"learning_rate": 0.0004914285670212374,
|
|
"loss": 2.447,
|
|
"step": 22245
|
|
},
|
|
{
|
|
"epoch": 7.0277975203348335,
|
|
"grad_norm": 0.06837255690973654,
|
|
"learning_rate": 0.0004909538377226508,
|
|
"loss": 2.4236,
|
|
"step": 22250
|
|
},
|
|
{
|
|
"epoch": 7.029376924899313,
|
|
"grad_norm": 0.0650063841790395,
|
|
"learning_rate": 0.0004904792632217502,
|
|
"loss": 2.3878,
|
|
"step": 22255
|
|
},
|
|
{
|
|
"epoch": 7.030956329463792,
|
|
"grad_norm": 0.06832521717430635,
|
|
"learning_rate": 0.0004900048436628498,
|
|
"loss": 2.371,
|
|
"step": 22260
|
|
},
|
|
{
|
|
"epoch": 7.032535734028271,
|
|
"grad_norm": 0.06550638970535379,
|
|
"learning_rate": 0.0004895305791902184,
|
|
"loss": 2.4162,
|
|
"step": 22265
|
|
},
|
|
{
|
|
"epoch": 7.0341151385927505,
|
|
"grad_norm": 0.051004928798066757,
|
|
"learning_rate": 0.0004890564699480764,
|
|
"loss": 2.3918,
|
|
"step": 22270
|
|
},
|
|
{
|
|
"epoch": 7.03569454315723,
|
|
"grad_norm": 0.0599442831633633,
|
|
"learning_rate": 0.0004885825160805973,
|
|
"loss": 2.4745,
|
|
"step": 22275
|
|
},
|
|
{
|
|
"epoch": 7.037273947721709,
|
|
"grad_norm": 0.05159778302570732,
|
|
"learning_rate": 0.00048810871773190766,
|
|
"loss": 2.4326,
|
|
"step": 22280
|
|
},
|
|
{
|
|
"epoch": 7.038853352286188,
|
|
"grad_norm": 0.054053460100690724,
|
|
"learning_rate": 0.0004876350750460859,
|
|
"loss": 2.3592,
|
|
"step": 22285
|
|
},
|
|
{
|
|
"epoch": 7.0404327568506675,
|
|
"grad_norm": 0.06618818877064908,
|
|
"learning_rate": 0.0004871615881671647,
|
|
"loss": 2.4047,
|
|
"step": 22290
|
|
},
|
|
{
|
|
"epoch": 7.042012161415147,
|
|
"grad_norm": 0.08335182568401928,
|
|
"learning_rate": 0.00048668825723912793,
|
|
"loss": 2.3465,
|
|
"step": 22295
|
|
},
|
|
{
|
|
"epoch": 7.043591565979626,
|
|
"grad_norm": 0.07955512465075823,
|
|
"learning_rate": 0.0004862150824059119,
|
|
"loss": 2.4591,
|
|
"step": 22300
|
|
},
|
|
{
|
|
"epoch": 7.045170970544105,
|
|
"grad_norm": 0.053631802016637725,
|
|
"learning_rate": 0.0004857420638114073,
|
|
"loss": 2.3582,
|
|
"step": 22305
|
|
},
|
|
{
|
|
"epoch": 7.0467503751085845,
|
|
"grad_norm": 0.05721351045966907,
|
|
"learning_rate": 0.0004852692015994553,
|
|
"loss": 2.33,
|
|
"step": 22310
|
|
},
|
|
{
|
|
"epoch": 7.048329779673063,
|
|
"grad_norm": 0.04913175962272203,
|
|
"learning_rate": 0.0004847964959138503,
|
|
"loss": 2.4515,
|
|
"step": 22315
|
|
},
|
|
{
|
|
"epoch": 7.049909184237542,
|
|
"grad_norm": 0.060385583661990504,
|
|
"learning_rate": 0.00048432394689833935,
|
|
"loss": 2.3377,
|
|
"step": 22320
|
|
},
|
|
{
|
|
"epoch": 7.051488588802021,
|
|
"grad_norm": 0.053631885417950824,
|
|
"learning_rate": 0.0004838515546966209,
|
|
"loss": 2.3595,
|
|
"step": 22325
|
|
},
|
|
{
|
|
"epoch": 7.053067993366501,
|
|
"grad_norm": 0.06077489506721232,
|
|
"learning_rate": 0.00048337931945234726,
|
|
"loss": 2.3561,
|
|
"step": 22330
|
|
},
|
|
{
|
|
"epoch": 7.05464739793098,
|
|
"grad_norm": 0.06373677010752224,
|
|
"learning_rate": 0.0004829072413091219,
|
|
"loss": 2.4001,
|
|
"step": 22335
|
|
},
|
|
{
|
|
"epoch": 7.056226802495459,
|
|
"grad_norm": 0.061147150243221884,
|
|
"learning_rate": 0.0004824353204105002,
|
|
"loss": 2.4548,
|
|
"step": 22340
|
|
},
|
|
{
|
|
"epoch": 7.057806207059938,
|
|
"grad_norm": 0.05918448000188891,
|
|
"learning_rate": 0.00048196355689999115,
|
|
"loss": 2.3816,
|
|
"step": 22345
|
|
},
|
|
{
|
|
"epoch": 7.059385611624418,
|
|
"grad_norm": 0.05668442100891457,
|
|
"learning_rate": 0.00048149195092105426,
|
|
"loss": 2.4345,
|
|
"step": 22350
|
|
},
|
|
{
|
|
"epoch": 7.060965016188897,
|
|
"grad_norm": 0.05821659007163679,
|
|
"learning_rate": 0.00048102050261710264,
|
|
"loss": 2.4405,
|
|
"step": 22355
|
|
},
|
|
{
|
|
"epoch": 7.062544420753376,
|
|
"grad_norm": 0.05508250643037957,
|
|
"learning_rate": 0.0004805492121315003,
|
|
"loss": 2.3619,
|
|
"step": 22360
|
|
},
|
|
{
|
|
"epoch": 7.064123825317855,
|
|
"grad_norm": 0.05474253066945977,
|
|
"learning_rate": 0.00048007807960756364,
|
|
"loss": 2.488,
|
|
"step": 22365
|
|
},
|
|
{
|
|
"epoch": 7.065703229882335,
|
|
"grad_norm": 0.05590081357760849,
|
|
"learning_rate": 0.0004796071051885611,
|
|
"loss": 2.3588,
|
|
"step": 22370
|
|
},
|
|
{
|
|
"epoch": 7.067282634446814,
|
|
"grad_norm": 0.05252395755875348,
|
|
"learning_rate": 0.00047913628901771266,
|
|
"loss": 2.3175,
|
|
"step": 22375
|
|
},
|
|
{
|
|
"epoch": 7.068862039011293,
|
|
"grad_norm": 0.05640428770084165,
|
|
"learning_rate": 0.0004786656312381913,
|
|
"loss": 2.31,
|
|
"step": 22380
|
|
},
|
|
{
|
|
"epoch": 7.070441443575772,
|
|
"grad_norm": 0.059343346775562744,
|
|
"learning_rate": 0.0004781951319931205,
|
|
"loss": 2.5275,
|
|
"step": 22385
|
|
},
|
|
{
|
|
"epoch": 7.072020848140252,
|
|
"grad_norm": 0.04973075828705303,
|
|
"learning_rate": 0.0004777247914255757,
|
|
"loss": 2.4719,
|
|
"step": 22390
|
|
},
|
|
{
|
|
"epoch": 7.07360025270473,
|
|
"grad_norm": 0.05497162427367808,
|
|
"learning_rate": 0.0004772546096785854,
|
|
"loss": 2.4166,
|
|
"step": 22395
|
|
},
|
|
{
|
|
"epoch": 7.075179657269209,
|
|
"grad_norm": 0.04481162000314791,
|
|
"learning_rate": 0.00047678458689512837,
|
|
"loss": 2.3846,
|
|
"step": 22400
|
|
},
|
|
{
|
|
"epoch": 7.076759061833688,
|
|
"grad_norm": 0.0672072530987208,
|
|
"learning_rate": 0.00047631472321813553,
|
|
"loss": 2.3995,
|
|
"step": 22405
|
|
},
|
|
{
|
|
"epoch": 7.078338466398168,
|
|
"grad_norm": 0.06484850193073702,
|
|
"learning_rate": 0.0004758450187904895,
|
|
"loss": 2.3144,
|
|
"step": 22410
|
|
},
|
|
{
|
|
"epoch": 7.079917870962647,
|
|
"grad_norm": 0.05459059841054011,
|
|
"learning_rate": 0.00047537547375502387,
|
|
"loss": 2.3782,
|
|
"step": 22415
|
|
},
|
|
{
|
|
"epoch": 7.081497275527126,
|
|
"grad_norm": 0.05610420016170933,
|
|
"learning_rate": 0.0004749060882545251,
|
|
"loss": 2.4242,
|
|
"step": 22420
|
|
},
|
|
{
|
|
"epoch": 7.083076680091605,
|
|
"grad_norm": 0.05104265391982312,
|
|
"learning_rate": 0.0004744368624317301,
|
|
"loss": 2.4518,
|
|
"step": 22425
|
|
},
|
|
{
|
|
"epoch": 7.084656084656085,
|
|
"grad_norm": 0.056969806267277344,
|
|
"learning_rate": 0.00047396779642932684,
|
|
"loss": 2.3904,
|
|
"step": 22430
|
|
},
|
|
{
|
|
"epoch": 7.086235489220564,
|
|
"grad_norm": 0.04882437057050647,
|
|
"learning_rate": 0.0004734988903899562,
|
|
"loss": 2.4577,
|
|
"step": 22435
|
|
},
|
|
{
|
|
"epoch": 7.087814893785043,
|
|
"grad_norm": 0.06609464678169245,
|
|
"learning_rate": 0.00047303014445620876,
|
|
"loss": 2.5109,
|
|
"step": 22440
|
|
},
|
|
{
|
|
"epoch": 7.089394298349522,
|
|
"grad_norm": 0.05103370170970565,
|
|
"learning_rate": 0.0004725615587706278,
|
|
"loss": 2.3865,
|
|
"step": 22445
|
|
},
|
|
{
|
|
"epoch": 7.090973702914002,
|
|
"grad_norm": 0.048548993258957965,
|
|
"learning_rate": 0.0004720931334757068,
|
|
"loss": 2.3628,
|
|
"step": 22450
|
|
},
|
|
{
|
|
"epoch": 7.092553107478481,
|
|
"grad_norm": 0.0461957856742399,
|
|
"learning_rate": 0.0004716248687138912,
|
|
"loss": 2.5196,
|
|
"step": 22455
|
|
},
|
|
{
|
|
"epoch": 7.09413251204296,
|
|
"grad_norm": 0.047518760666497656,
|
|
"learning_rate": 0.00047115676462757705,
|
|
"loss": 2.3432,
|
|
"step": 22460
|
|
},
|
|
{
|
|
"epoch": 7.095711916607439,
|
|
"grad_norm": 0.053828768359855776,
|
|
"learning_rate": 0.0004706888213591116,
|
|
"loss": 2.5968,
|
|
"step": 22465
|
|
},
|
|
{
|
|
"epoch": 7.097291321171918,
|
|
"grad_norm": 0.058677777840975644,
|
|
"learning_rate": 0.00047022103905079406,
|
|
"loss": 2.4534,
|
|
"step": 22470
|
|
},
|
|
{
|
|
"epoch": 7.098870725736397,
|
|
"grad_norm": 0.04953095577710977,
|
|
"learning_rate": 0.00046975341784487366,
|
|
"loss": 2.3614,
|
|
"step": 22475
|
|
},
|
|
{
|
|
"epoch": 7.100450130300876,
|
|
"grad_norm": 0.04926369173320298,
|
|
"learning_rate": 0.00046928595788355064,
|
|
"loss": 2.4417,
|
|
"step": 22480
|
|
},
|
|
{
|
|
"epoch": 7.1020295348653555,
|
|
"grad_norm": 0.05298044492282592,
|
|
"learning_rate": 0.0004688186593089775,
|
|
"loss": 2.396,
|
|
"step": 22485
|
|
},
|
|
{
|
|
"epoch": 7.103608939429835,
|
|
"grad_norm": 0.06768544449259989,
|
|
"learning_rate": 0.0004683515222632562,
|
|
"loss": 2.3874,
|
|
"step": 22490
|
|
},
|
|
{
|
|
"epoch": 7.105188343994314,
|
|
"grad_norm": 0.053853648723050415,
|
|
"learning_rate": 0.0004678845468884402,
|
|
"loss": 2.4692,
|
|
"step": 22495
|
|
},
|
|
{
|
|
"epoch": 7.106767748558793,
|
|
"grad_norm": 0.04628813353443334,
|
|
"learning_rate": 0.0004674177333265336,
|
|
"loss": 2.3937,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 7.1083471531232725,
|
|
"grad_norm": 0.05178483320090351,
|
|
"learning_rate": 0.0004669510817194913,
|
|
"loss": 2.4853,
|
|
"step": 22505
|
|
},
|
|
{
|
|
"epoch": 7.109926557687752,
|
|
"grad_norm": 0.0561434246840523,
|
|
"learning_rate": 0.00046648459220921957,
|
|
"loss": 2.4582,
|
|
"step": 22510
|
|
},
|
|
{
|
|
"epoch": 7.111505962252231,
|
|
"grad_norm": 0.07180099332547474,
|
|
"learning_rate": 0.0004660182649375747,
|
|
"loss": 2.3706,
|
|
"step": 22515
|
|
},
|
|
{
|
|
"epoch": 7.11308536681671,
|
|
"grad_norm": 0.054949950862126395,
|
|
"learning_rate": 0.0004655521000463633,
|
|
"loss": 2.5135,
|
|
"step": 22520
|
|
},
|
|
{
|
|
"epoch": 7.1146647713811895,
|
|
"grad_norm": 0.06792203229744449,
|
|
"learning_rate": 0.0004650860976773441,
|
|
"loss": 2.4077,
|
|
"step": 22525
|
|
},
|
|
{
|
|
"epoch": 7.116244175945669,
|
|
"grad_norm": 0.061213820334894885,
|
|
"learning_rate": 0.0004646202579722244,
|
|
"loss": 2.438,
|
|
"step": 22530
|
|
},
|
|
{
|
|
"epoch": 7.117823580510148,
|
|
"grad_norm": 0.05984185122345596,
|
|
"learning_rate": 0.00046415458107266415,
|
|
"loss": 2.4789,
|
|
"step": 22535
|
|
},
|
|
{
|
|
"epoch": 7.119402985074627,
|
|
"grad_norm": 0.057070456339471824,
|
|
"learning_rate": 0.0004636890671202725,
|
|
"loss": 2.411,
|
|
"step": 22540
|
|
},
|
|
{
|
|
"epoch": 7.1209823896391065,
|
|
"grad_norm": 0.05791212251947189,
|
|
"learning_rate": 0.0004632237162566082,
|
|
"loss": 2.3306,
|
|
"step": 22545
|
|
},
|
|
{
|
|
"epoch": 7.122561794203585,
|
|
"grad_norm": 0.046891382606143776,
|
|
"learning_rate": 0.00046275852862318257,
|
|
"loss": 2.4943,
|
|
"step": 22550
|
|
},
|
|
{
|
|
"epoch": 7.124141198768064,
|
|
"grad_norm": 0.051774975874388104,
|
|
"learning_rate": 0.00046229350436145545,
|
|
"loss": 2.3686,
|
|
"step": 22555
|
|
},
|
|
{
|
|
"epoch": 7.125720603332543,
|
|
"grad_norm": 0.04929627229554973,
|
|
"learning_rate": 0.0004618286436128386,
|
|
"loss": 2.4466,
|
|
"step": 22560
|
|
},
|
|
{
|
|
"epoch": 7.127300007897023,
|
|
"grad_norm": 0.046790097185457694,
|
|
"learning_rate": 0.00046136394651869275,
|
|
"loss": 2.3244,
|
|
"step": 22565
|
|
},
|
|
{
|
|
"epoch": 7.128879412461502,
|
|
"grad_norm": 0.05793253072507003,
|
|
"learning_rate": 0.0004608994132203289,
|
|
"loss": 2.3569,
|
|
"step": 22570
|
|
},
|
|
{
|
|
"epoch": 7.130458817025981,
|
|
"grad_norm": 0.05394842571129561,
|
|
"learning_rate": 0.00046043504385900945,
|
|
"loss": 2.4881,
|
|
"step": 22575
|
|
},
|
|
{
|
|
"epoch": 7.13203822159046,
|
|
"grad_norm": 0.05816959393361919,
|
|
"learning_rate": 0.0004599708385759459,
|
|
"loss": 2.4091,
|
|
"step": 22580
|
|
},
|
|
{
|
|
"epoch": 7.1336176261549396,
|
|
"grad_norm": 0.054256171347107855,
|
|
"learning_rate": 0.00045950679751229984,
|
|
"loss": 2.5072,
|
|
"step": 22585
|
|
},
|
|
{
|
|
"epoch": 7.135197030719419,
|
|
"grad_norm": 0.057231813913574105,
|
|
"learning_rate": 0.0004590429208091835,
|
|
"loss": 2.4793,
|
|
"step": 22590
|
|
},
|
|
{
|
|
"epoch": 7.136776435283898,
|
|
"grad_norm": 0.05707810619562981,
|
|
"learning_rate": 0.00045857920860765825,
|
|
"loss": 2.4329,
|
|
"step": 22595
|
|
},
|
|
{
|
|
"epoch": 7.138355839848377,
|
|
"grad_norm": 0.06690668435662243,
|
|
"learning_rate": 0.0004581156610487367,
|
|
"loss": 2.4606,
|
|
"step": 22600
|
|
},
|
|
{
|
|
"epoch": 7.1399352444128565,
|
|
"grad_norm": 0.056773576894885235,
|
|
"learning_rate": 0.0004576522782733802,
|
|
"loss": 2.4883,
|
|
"step": 22605
|
|
},
|
|
{
|
|
"epoch": 7.141514648977336,
|
|
"grad_norm": 0.06528025298970015,
|
|
"learning_rate": 0.000457189060422501,
|
|
"loss": 2.4328,
|
|
"step": 22610
|
|
},
|
|
{
|
|
"epoch": 7.143094053541815,
|
|
"grad_norm": 0.054281522678375084,
|
|
"learning_rate": 0.00045672600763696047,
|
|
"loss": 2.399,
|
|
"step": 22615
|
|
},
|
|
{
|
|
"epoch": 7.144673458106294,
|
|
"grad_norm": 0.05406865499378434,
|
|
"learning_rate": 0.0004562631200575695,
|
|
"loss": 2.5182,
|
|
"step": 22620
|
|
},
|
|
{
|
|
"epoch": 7.1462528626707735,
|
|
"grad_norm": 0.054878683283738044,
|
|
"learning_rate": 0.0004558003978250901,
|
|
"loss": 2.4772,
|
|
"step": 22625
|
|
},
|
|
{
|
|
"epoch": 7.147832267235252,
|
|
"grad_norm": 0.05291823031041038,
|
|
"learning_rate": 0.0004553378410802331,
|
|
"loss": 2.4818,
|
|
"step": 22630
|
|
},
|
|
{
|
|
"epoch": 7.149411671799731,
|
|
"grad_norm": 0.05799495413724737,
|
|
"learning_rate": 0.00045487544996365795,
|
|
"loss": 2.3644,
|
|
"step": 22635
|
|
},
|
|
{
|
|
"epoch": 7.15099107636421,
|
|
"grad_norm": 0.05379204462085772,
|
|
"learning_rate": 0.000454413224615976,
|
|
"loss": 2.3865,
|
|
"step": 22640
|
|
},
|
|
{
|
|
"epoch": 7.15257048092869,
|
|
"grad_norm": 0.05035893136911191,
|
|
"learning_rate": 0.0004539511651777462,
|
|
"loss": 2.4255,
|
|
"step": 22645
|
|
},
|
|
{
|
|
"epoch": 7.154149885493169,
|
|
"grad_norm": 0.0535544461589526,
|
|
"learning_rate": 0.0004534892717894785,
|
|
"loss": 2.4239,
|
|
"step": 22650
|
|
},
|
|
{
|
|
"epoch": 7.155729290057648,
|
|
"grad_norm": 0.049231465561476694,
|
|
"learning_rate": 0.00045302754459163166,
|
|
"loss": 2.4034,
|
|
"step": 22655
|
|
},
|
|
{
|
|
"epoch": 7.157308694622127,
|
|
"grad_norm": 0.05450897661295551,
|
|
"learning_rate": 0.0004525659837246133,
|
|
"loss": 2.4025,
|
|
"step": 22660
|
|
},
|
|
{
|
|
"epoch": 7.158888099186607,
|
|
"grad_norm": 0.0463752158817942,
|
|
"learning_rate": 0.00045210458932878206,
|
|
"loss": 2.4036,
|
|
"step": 22665
|
|
},
|
|
{
|
|
"epoch": 7.160467503751086,
|
|
"grad_norm": 0.05032980803941639,
|
|
"learning_rate": 0.0004516433615444446,
|
|
"loss": 2.3412,
|
|
"step": 22670
|
|
},
|
|
{
|
|
"epoch": 7.162046908315565,
|
|
"grad_norm": 0.05756411469882938,
|
|
"learning_rate": 0.0004511823005118574,
|
|
"loss": 2.3714,
|
|
"step": 22675
|
|
},
|
|
{
|
|
"epoch": 7.163626312880044,
|
|
"grad_norm": 0.04763318582796287,
|
|
"learning_rate": 0.0004507214063712262,
|
|
"loss": 2.3937,
|
|
"step": 22680
|
|
},
|
|
{
|
|
"epoch": 7.165205717444524,
|
|
"grad_norm": 0.04743784396114025,
|
|
"learning_rate": 0.0004502606792627053,
|
|
"loss": 2.4057,
|
|
"step": 22685
|
|
},
|
|
{
|
|
"epoch": 7.166785122009003,
|
|
"grad_norm": 0.05082335261559461,
|
|
"learning_rate": 0.0004498001193264,
|
|
"loss": 2.3993,
|
|
"step": 22690
|
|
},
|
|
{
|
|
"epoch": 7.168364526573482,
|
|
"grad_norm": 0.0668975121732276,
|
|
"learning_rate": 0.00044933972670236255,
|
|
"loss": 2.354,
|
|
"step": 22695
|
|
},
|
|
{
|
|
"epoch": 7.169943931137961,
|
|
"grad_norm": 0.0627127176365516,
|
|
"learning_rate": 0.0004488795015305964,
|
|
"loss": 2.4189,
|
|
"step": 22700
|
|
},
|
|
{
|
|
"epoch": 7.171523335702441,
|
|
"grad_norm": 0.0530348548292881,
|
|
"learning_rate": 0.0004484194439510527,
|
|
"loss": 2.4288,
|
|
"step": 22705
|
|
},
|
|
{
|
|
"epoch": 7.173102740266919,
|
|
"grad_norm": 0.05489002488624982,
|
|
"learning_rate": 0.0004479595541036315,
|
|
"loss": 2.4122,
|
|
"step": 22710
|
|
},
|
|
{
|
|
"epoch": 7.174682144831398,
|
|
"grad_norm": 0.0554180249954323,
|
|
"learning_rate": 0.0004474998321281832,
|
|
"loss": 2.5061,
|
|
"step": 22715
|
|
},
|
|
{
|
|
"epoch": 7.1762615493958775,
|
|
"grad_norm": 0.05960033631635437,
|
|
"learning_rate": 0.00044704027816450586,
|
|
"loss": 2.4847,
|
|
"step": 22720
|
|
},
|
|
{
|
|
"epoch": 7.177840953960357,
|
|
"grad_norm": 0.061594201159924485,
|
|
"learning_rate": 0.0004465808923523471,
|
|
"loss": 2.4432,
|
|
"step": 22725
|
|
},
|
|
{
|
|
"epoch": 7.179420358524836,
|
|
"grad_norm": 0.06446780684025662,
|
|
"learning_rate": 0.000446121674831403,
|
|
"loss": 2.4772,
|
|
"step": 22730
|
|
},
|
|
{
|
|
"epoch": 7.180999763089315,
|
|
"grad_norm": 0.046521565333944,
|
|
"learning_rate": 0.00044566262574131845,
|
|
"loss": 2.3783,
|
|
"step": 22735
|
|
},
|
|
{
|
|
"epoch": 7.1825791676537944,
|
|
"grad_norm": 0.058597223164450145,
|
|
"learning_rate": 0.00044520374522168793,
|
|
"loss": 2.3534,
|
|
"step": 22740
|
|
},
|
|
{
|
|
"epoch": 7.184158572218274,
|
|
"grad_norm": 0.05327155937517266,
|
|
"learning_rate": 0.00044474503341205386,
|
|
"loss": 2.3408,
|
|
"step": 22745
|
|
},
|
|
{
|
|
"epoch": 7.185737976782753,
|
|
"grad_norm": 0.06332922236815533,
|
|
"learning_rate": 0.0004442864904519072,
|
|
"loss": 2.3159,
|
|
"step": 22750
|
|
},
|
|
{
|
|
"epoch": 7.187317381347232,
|
|
"grad_norm": 0.0537193747896481,
|
|
"learning_rate": 0.00044382811648068844,
|
|
"loss": 2.4244,
|
|
"step": 22755
|
|
},
|
|
{
|
|
"epoch": 7.188896785911711,
|
|
"grad_norm": 0.054447157903122496,
|
|
"learning_rate": 0.0004433699116377861,
|
|
"loss": 2.443,
|
|
"step": 22760
|
|
},
|
|
{
|
|
"epoch": 7.190476190476191,
|
|
"grad_norm": 0.07686127379391965,
|
|
"learning_rate": 0.0004429118760625372,
|
|
"loss": 2.4494,
|
|
"step": 22765
|
|
},
|
|
{
|
|
"epoch": 7.19205559504067,
|
|
"grad_norm": 0.06038874503558769,
|
|
"learning_rate": 0.0004424540098942275,
|
|
"loss": 2.4226,
|
|
"step": 22770
|
|
},
|
|
{
|
|
"epoch": 7.193634999605149,
|
|
"grad_norm": 0.048657210149132386,
|
|
"learning_rate": 0.00044199631327209067,
|
|
"loss": 2.5111,
|
|
"step": 22775
|
|
},
|
|
{
|
|
"epoch": 7.195214404169628,
|
|
"grad_norm": 0.05548145875073198,
|
|
"learning_rate": 0.0004415387863353102,
|
|
"loss": 2.3155,
|
|
"step": 22780
|
|
},
|
|
{
|
|
"epoch": 7.196793808734107,
|
|
"grad_norm": 0.05473862703122515,
|
|
"learning_rate": 0.0004410814292230163,
|
|
"loss": 2.3674,
|
|
"step": 22785
|
|
},
|
|
{
|
|
"epoch": 7.198373213298586,
|
|
"grad_norm": 0.05505864233866162,
|
|
"learning_rate": 0.0004406242420742892,
|
|
"loss": 2.4172,
|
|
"step": 22790
|
|
},
|
|
{
|
|
"epoch": 7.199952617863065,
|
|
"grad_norm": 0.05150335407360049,
|
|
"learning_rate": 0.0004401672250281561,
|
|
"loss": 2.3785,
|
|
"step": 22795
|
|
},
|
|
{
|
|
"epoch": 7.2015320224275445,
|
|
"grad_norm": 0.055842220396361636,
|
|
"learning_rate": 0.0004397103782235925,
|
|
"loss": 2.441,
|
|
"step": 22800
|
|
},
|
|
{
|
|
"epoch": 7.203111426992024,
|
|
"grad_norm": 0.06516738120800779,
|
|
"learning_rate": 0.0004392537017995236,
|
|
"loss": 2.4836,
|
|
"step": 22805
|
|
},
|
|
{
|
|
"epoch": 7.204690831556503,
|
|
"grad_norm": 0.05294395080085389,
|
|
"learning_rate": 0.00043879719589482125,
|
|
"loss": 2.541,
|
|
"step": 22810
|
|
},
|
|
{
|
|
"epoch": 7.206270236120982,
|
|
"grad_norm": 0.05353233354000687,
|
|
"learning_rate": 0.00043834086064830605,
|
|
"loss": 2.4319,
|
|
"step": 22815
|
|
},
|
|
{
|
|
"epoch": 7.2078496406854615,
|
|
"grad_norm": 0.05492647814378925,
|
|
"learning_rate": 0.0004378846961987465,
|
|
"loss": 2.4915,
|
|
"step": 22820
|
|
},
|
|
{
|
|
"epoch": 7.209429045249941,
|
|
"grad_norm": 0.05632475820237248,
|
|
"learning_rate": 0.000437428702684859,
|
|
"loss": 2.472,
|
|
"step": 22825
|
|
},
|
|
{
|
|
"epoch": 7.21100844981442,
|
|
"grad_norm": 0.05901269744287303,
|
|
"learning_rate": 0.00043697288024530914,
|
|
"loss": 2.3883,
|
|
"step": 22830
|
|
},
|
|
{
|
|
"epoch": 7.212587854378899,
|
|
"grad_norm": 0.052741292663270635,
|
|
"learning_rate": 0.0004365172290187086,
|
|
"loss": 2.3636,
|
|
"step": 22835
|
|
},
|
|
{
|
|
"epoch": 7.2141672589433785,
|
|
"grad_norm": 0.05175073096790014,
|
|
"learning_rate": 0.00043606174914361895,
|
|
"loss": 2.4367,
|
|
"step": 22840
|
|
},
|
|
{
|
|
"epoch": 7.215746663507858,
|
|
"grad_norm": 0.05257913321958104,
|
|
"learning_rate": 0.00043560644075854837,
|
|
"loss": 2.3495,
|
|
"step": 22845
|
|
},
|
|
{
|
|
"epoch": 7.217326068072337,
|
|
"grad_norm": 0.0541049786157875,
|
|
"learning_rate": 0.000435151304001953,
|
|
"loss": 2.3664,
|
|
"step": 22850
|
|
},
|
|
{
|
|
"epoch": 7.218905472636816,
|
|
"grad_norm": 0.05929024156226885,
|
|
"learning_rate": 0.00043469633901223727,
|
|
"loss": 2.4205,
|
|
"step": 22855
|
|
},
|
|
{
|
|
"epoch": 7.2204848772012955,
|
|
"grad_norm": 0.05692855564895376,
|
|
"learning_rate": 0.000434241545927753,
|
|
"loss": 2.45,
|
|
"step": 22860
|
|
},
|
|
{
|
|
"epoch": 7.222064281765775,
|
|
"grad_norm": 0.0529476590454288,
|
|
"learning_rate": 0.0004337869248867995,
|
|
"loss": 2.4011,
|
|
"step": 22865
|
|
},
|
|
{
|
|
"epoch": 7.223643686330253,
|
|
"grad_norm": 0.05387521337145956,
|
|
"learning_rate": 0.00043333247602762485,
|
|
"loss": 2.4672,
|
|
"step": 22870
|
|
},
|
|
{
|
|
"epoch": 7.225223090894732,
|
|
"grad_norm": 0.05035042347303634,
|
|
"learning_rate": 0.0004328781994884233,
|
|
"loss": 2.4697,
|
|
"step": 22875
|
|
},
|
|
{
|
|
"epoch": 7.226802495459212,
|
|
"grad_norm": 0.06711635883800816,
|
|
"learning_rate": 0.00043242409540733827,
|
|
"loss": 2.3636,
|
|
"step": 22880
|
|
},
|
|
{
|
|
"epoch": 7.228381900023691,
|
|
"grad_norm": 0.050208177847224374,
|
|
"learning_rate": 0.0004319701639224596,
|
|
"loss": 2.4149,
|
|
"step": 22885
|
|
},
|
|
{
|
|
"epoch": 7.22996130458817,
|
|
"grad_norm": 0.057904178053437116,
|
|
"learning_rate": 0.0004315164051718243,
|
|
"loss": 2.5331,
|
|
"step": 22890
|
|
},
|
|
{
|
|
"epoch": 7.231540709152649,
|
|
"grad_norm": 0.050249222969918045,
|
|
"learning_rate": 0.0004310628192934185,
|
|
"loss": 2.3641,
|
|
"step": 22895
|
|
},
|
|
{
|
|
"epoch": 7.233120113717129,
|
|
"grad_norm": 0.06476074688114011,
|
|
"learning_rate": 0.0004306094064251742,
|
|
"loss": 2.4901,
|
|
"step": 22900
|
|
},
|
|
{
|
|
"epoch": 7.234699518281608,
|
|
"grad_norm": 0.056320733281801316,
|
|
"learning_rate": 0.0004301561667049716,
|
|
"loss": 2.3834,
|
|
"step": 22905
|
|
},
|
|
{
|
|
"epoch": 7.236278922846087,
|
|
"grad_norm": 0.05354949264797148,
|
|
"learning_rate": 0.00042970310027063774,
|
|
"loss": 2.3642,
|
|
"step": 22910
|
|
},
|
|
{
|
|
"epoch": 7.237858327410566,
|
|
"grad_norm": 0.06256453961923768,
|
|
"learning_rate": 0.0004292502072599471,
|
|
"loss": 2.492,
|
|
"step": 22915
|
|
},
|
|
{
|
|
"epoch": 7.239437731975046,
|
|
"grad_norm": 0.08469470347265333,
|
|
"learning_rate": 0.0004287974878106222,
|
|
"loss": 2.425,
|
|
"step": 22920
|
|
},
|
|
{
|
|
"epoch": 7.241017136539525,
|
|
"grad_norm": 0.0585997124074025,
|
|
"learning_rate": 0.00042834494206033126,
|
|
"loss": 2.3965,
|
|
"step": 22925
|
|
},
|
|
{
|
|
"epoch": 7.242596541104004,
|
|
"grad_norm": 0.06056931894800767,
|
|
"learning_rate": 0.0004278925701466915,
|
|
"loss": 2.4136,
|
|
"step": 22930
|
|
},
|
|
{
|
|
"epoch": 7.244175945668483,
|
|
"grad_norm": 0.057340188750008424,
|
|
"learning_rate": 0.00042744037220726584,
|
|
"loss": 2.4731,
|
|
"step": 22935
|
|
},
|
|
{
|
|
"epoch": 7.245755350232963,
|
|
"grad_norm": 0.05359526035066487,
|
|
"learning_rate": 0.0004269883483795648,
|
|
"loss": 2.4574,
|
|
"step": 22940
|
|
},
|
|
{
|
|
"epoch": 7.247334754797441,
|
|
"grad_norm": 0.05859497430126532,
|
|
"learning_rate": 0.00042653649880104597,
|
|
"loss": 2.4454,
|
|
"step": 22945
|
|
},
|
|
{
|
|
"epoch": 7.24891415936192,
|
|
"grad_norm": 0.05675932201342108,
|
|
"learning_rate": 0.0004260848236091135,
|
|
"loss": 2.2968,
|
|
"step": 22950
|
|
},
|
|
{
|
|
"epoch": 7.250493563926399,
|
|
"grad_norm": 0.05368356817280291,
|
|
"learning_rate": 0.00042563332294111967,
|
|
"loss": 2.4265,
|
|
"step": 22955
|
|
},
|
|
{
|
|
"epoch": 7.252072968490879,
|
|
"grad_norm": 0.06035120629280444,
|
|
"learning_rate": 0.00042518199693436254,
|
|
"loss": 2.4724,
|
|
"step": 22960
|
|
},
|
|
{
|
|
"epoch": 7.253652373055358,
|
|
"grad_norm": 0.05070637372168603,
|
|
"learning_rate": 0.0004247308457260873,
|
|
"loss": 2.4114,
|
|
"step": 22965
|
|
},
|
|
{
|
|
"epoch": 7.255231777619837,
|
|
"grad_norm": 0.06691780641000095,
|
|
"learning_rate": 0.00042427986945348665,
|
|
"loss": 2.438,
|
|
"step": 22970
|
|
},
|
|
{
|
|
"epoch": 7.256811182184316,
|
|
"grad_norm": 0.07254358776207415,
|
|
"learning_rate": 0.0004238290682536994,
|
|
"loss": 2.3952,
|
|
"step": 22975
|
|
},
|
|
{
|
|
"epoch": 7.258390586748796,
|
|
"grad_norm": 0.045487663001971275,
|
|
"learning_rate": 0.00042337844226381083,
|
|
"loss": 2.4653,
|
|
"step": 22980
|
|
},
|
|
{
|
|
"epoch": 7.259969991313275,
|
|
"grad_norm": 0.06331602162173694,
|
|
"learning_rate": 0.00042292799162085414,
|
|
"loss": 2.3607,
|
|
"step": 22985
|
|
},
|
|
{
|
|
"epoch": 7.261549395877754,
|
|
"grad_norm": 0.06316826413889758,
|
|
"learning_rate": 0.0004224777164618083,
|
|
"loss": 2.4936,
|
|
"step": 22990
|
|
},
|
|
{
|
|
"epoch": 7.263128800442233,
|
|
"grad_norm": 0.056114953613693536,
|
|
"learning_rate": 0.0004220276169235989,
|
|
"loss": 2.4358,
|
|
"step": 22995
|
|
},
|
|
{
|
|
"epoch": 7.264708205006713,
|
|
"grad_norm": 0.05009092501700465,
|
|
"learning_rate": 0.00042157769314309844,
|
|
"loss": 2.3407,
|
|
"step": 23000
|
|
},
|
|
{
|
|
"epoch": 7.266287609571192,
|
|
"grad_norm": 0.05042828197747411,
|
|
"learning_rate": 0.0004211279452571255,
|
|
"loss": 2.4556,
|
|
"step": 23005
|
|
},
|
|
{
|
|
"epoch": 7.267867014135671,
|
|
"grad_norm": 0.04901947494880478,
|
|
"learning_rate": 0.0004206783734024463,
|
|
"loss": 2.4552,
|
|
"step": 23010
|
|
},
|
|
{
|
|
"epoch": 7.26944641870015,
|
|
"grad_norm": 0.11856151065206234,
|
|
"learning_rate": 0.000420228977715772,
|
|
"loss": 2.459,
|
|
"step": 23015
|
|
},
|
|
{
|
|
"epoch": 7.27102582326463,
|
|
"grad_norm": 0.10180627702415497,
|
|
"learning_rate": 0.00041977975833376157,
|
|
"loss": 2.4152,
|
|
"step": 23020
|
|
},
|
|
{
|
|
"epoch": 7.272605227829108,
|
|
"grad_norm": 0.07026541127932902,
|
|
"learning_rate": 0.0004193307153930196,
|
|
"loss": 2.5014,
|
|
"step": 23025
|
|
},
|
|
{
|
|
"epoch": 7.274184632393587,
|
|
"grad_norm": 0.07214665623560222,
|
|
"learning_rate": 0.00041888184903009695,
|
|
"loss": 2.5648,
|
|
"step": 23030
|
|
},
|
|
{
|
|
"epoch": 7.2757640369580665,
|
|
"grad_norm": 0.06659079098589442,
|
|
"learning_rate": 0.0004184331593814913,
|
|
"loss": 2.3869,
|
|
"step": 23035
|
|
},
|
|
{
|
|
"epoch": 7.277343441522546,
|
|
"grad_norm": 0.07462660970105818,
|
|
"learning_rate": 0.00041798464658364566,
|
|
"loss": 2.3619,
|
|
"step": 23040
|
|
},
|
|
{
|
|
"epoch": 7.278922846087025,
|
|
"grad_norm": 0.05553604278264133,
|
|
"learning_rate": 0.00041753631077295087,
|
|
"loss": 2.4243,
|
|
"step": 23045
|
|
},
|
|
{
|
|
"epoch": 7.280502250651504,
|
|
"grad_norm": 0.053494717136432286,
|
|
"learning_rate": 0.00041708815208574247,
|
|
"loss": 2.4105,
|
|
"step": 23050
|
|
},
|
|
{
|
|
"epoch": 7.2820816552159835,
|
|
"grad_norm": 0.04740050351874312,
|
|
"learning_rate": 0.0004166401706583023,
|
|
"loss": 2.3979,
|
|
"step": 23055
|
|
},
|
|
{
|
|
"epoch": 7.283661059780463,
|
|
"grad_norm": 0.06554420156354972,
|
|
"learning_rate": 0.0004161923666268594,
|
|
"loss": 2.3907,
|
|
"step": 23060
|
|
},
|
|
{
|
|
"epoch": 7.285240464344942,
|
|
"grad_norm": 0.051215205180764174,
|
|
"learning_rate": 0.00041574474012758743,
|
|
"loss": 2.5091,
|
|
"step": 23065
|
|
},
|
|
{
|
|
"epoch": 7.286819868909421,
|
|
"grad_norm": 0.08415768441672569,
|
|
"learning_rate": 0.0004152972912966074,
|
|
"loss": 2.3129,
|
|
"step": 23070
|
|
},
|
|
{
|
|
"epoch": 7.2883992734739005,
|
|
"grad_norm": 0.07255028857627768,
|
|
"learning_rate": 0.0004148500202699854,
|
|
"loss": 2.4459,
|
|
"step": 23075
|
|
},
|
|
{
|
|
"epoch": 7.28997867803838,
|
|
"grad_norm": 0.06586873948536655,
|
|
"learning_rate": 0.0004144029271837336,
|
|
"loss": 2.3013,
|
|
"step": 23080
|
|
},
|
|
{
|
|
"epoch": 7.291558082602859,
|
|
"grad_norm": 0.07231511332265027,
|
|
"learning_rate": 0.0004139560121738101,
|
|
"loss": 2.4013,
|
|
"step": 23085
|
|
},
|
|
{
|
|
"epoch": 7.293137487167338,
|
|
"grad_norm": 0.04599805937834209,
|
|
"learning_rate": 0.00041350927537611894,
|
|
"loss": 2.3289,
|
|
"step": 23090
|
|
},
|
|
{
|
|
"epoch": 7.2947168917318175,
|
|
"grad_norm": 0.04781180375675834,
|
|
"learning_rate": 0.00041306271692650965,
|
|
"loss": 2.4017,
|
|
"step": 23095
|
|
},
|
|
{
|
|
"epoch": 7.296296296296296,
|
|
"grad_norm": 0.057149503805875405,
|
|
"learning_rate": 0.0004126163369607784,
|
|
"loss": 2.4368,
|
|
"step": 23100
|
|
},
|
|
{
|
|
"epoch": 7.297875700860775,
|
|
"grad_norm": 0.062140652199933,
|
|
"learning_rate": 0.0004121701356146659,
|
|
"loss": 2.4677,
|
|
"step": 23105
|
|
},
|
|
{
|
|
"epoch": 7.299455105425254,
|
|
"grad_norm": 0.07945387282626987,
|
|
"learning_rate": 0.0004117241130238597,
|
|
"loss": 2.2999,
|
|
"step": 23110
|
|
},
|
|
{
|
|
"epoch": 7.301034509989734,
|
|
"grad_norm": 0.05837365773633515,
|
|
"learning_rate": 0.00041127826932399215,
|
|
"loss": 2.4515,
|
|
"step": 23115
|
|
},
|
|
{
|
|
"epoch": 7.302613914554213,
|
|
"grad_norm": 0.04855213009161075,
|
|
"learning_rate": 0.00041083260465064143,
|
|
"loss": 2.4669,
|
|
"step": 23120
|
|
},
|
|
{
|
|
"epoch": 7.304193319118692,
|
|
"grad_norm": 0.06528173832108752,
|
|
"learning_rate": 0.00041038711913933133,
|
|
"loss": 2.4565,
|
|
"step": 23125
|
|
},
|
|
{
|
|
"epoch": 7.305772723683171,
|
|
"grad_norm": 0.06008851527567248,
|
|
"learning_rate": 0.0004099418129255309,
|
|
"loss": 2.4633,
|
|
"step": 23130
|
|
},
|
|
{
|
|
"epoch": 7.307352128247651,
|
|
"grad_norm": 0.05146277616116244,
|
|
"learning_rate": 0.0004094966861446554,
|
|
"loss": 2.4149,
|
|
"step": 23135
|
|
},
|
|
{
|
|
"epoch": 7.30893153281213,
|
|
"grad_norm": 0.06195290842146711,
|
|
"learning_rate": 0.0004090517389320649,
|
|
"loss": 2.4968,
|
|
"step": 23140
|
|
},
|
|
{
|
|
"epoch": 7.310510937376609,
|
|
"grad_norm": 0.07761875245487002,
|
|
"learning_rate": 0.0004086069714230646,
|
|
"loss": 2.3864,
|
|
"step": 23145
|
|
},
|
|
{
|
|
"epoch": 7.312090341941088,
|
|
"grad_norm": 0.06865512619791067,
|
|
"learning_rate": 0.000408162383752906,
|
|
"loss": 2.4866,
|
|
"step": 23150
|
|
},
|
|
{
|
|
"epoch": 7.313669746505568,
|
|
"grad_norm": 0.06956410081498506,
|
|
"learning_rate": 0.00040771797605678486,
|
|
"loss": 2.4979,
|
|
"step": 23155
|
|
},
|
|
{
|
|
"epoch": 7.315249151070047,
|
|
"grad_norm": 0.07317747601786594,
|
|
"learning_rate": 0.00040727374846984344,
|
|
"loss": 2.4307,
|
|
"step": 23160
|
|
},
|
|
{
|
|
"epoch": 7.316828555634526,
|
|
"grad_norm": 0.08954038886989313,
|
|
"learning_rate": 0.000406829701127168,
|
|
"loss": 2.3915,
|
|
"step": 23165
|
|
},
|
|
{
|
|
"epoch": 7.318407960199005,
|
|
"grad_norm": 0.060376807629495,
|
|
"learning_rate": 0.0004063858341637905,
|
|
"loss": 2.4324,
|
|
"step": 23170
|
|
},
|
|
{
|
|
"epoch": 7.3199873647634845,
|
|
"grad_norm": 0.06896215610563826,
|
|
"learning_rate": 0.0004059421477146882,
|
|
"loss": 2.3749,
|
|
"step": 23175
|
|
},
|
|
{
|
|
"epoch": 7.321566769327964,
|
|
"grad_norm": 0.059302264793621884,
|
|
"learning_rate": 0.0004054986419147829,
|
|
"loss": 2.4397,
|
|
"step": 23180
|
|
},
|
|
{
|
|
"epoch": 7.323146173892442,
|
|
"grad_norm": 0.07616637023410781,
|
|
"learning_rate": 0.0004050553168989426,
|
|
"loss": 2.4457,
|
|
"step": 23185
|
|
},
|
|
{
|
|
"epoch": 7.324725578456921,
|
|
"grad_norm": 0.08542210106270082,
|
|
"learning_rate": 0.00040461217280197915,
|
|
"loss": 2.4341,
|
|
"step": 23190
|
|
},
|
|
{
|
|
"epoch": 7.326304983021401,
|
|
"grad_norm": 0.0676050006964208,
|
|
"learning_rate": 0.0004041692097586496,
|
|
"loss": 2.3246,
|
|
"step": 23195
|
|
},
|
|
{
|
|
"epoch": 7.32788438758588,
|
|
"grad_norm": 0.06667913623813965,
|
|
"learning_rate": 0.00040372642790365677,
|
|
"loss": 2.424,
|
|
"step": 23200
|
|
},
|
|
{
|
|
"epoch": 7.329463792150359,
|
|
"grad_norm": 0.06937850763951874,
|
|
"learning_rate": 0.0004032838273716476,
|
|
"loss": 2.3171,
|
|
"step": 23205
|
|
},
|
|
{
|
|
"epoch": 7.331043196714838,
|
|
"grad_norm": 0.07332911567740688,
|
|
"learning_rate": 0.00040284140829721405,
|
|
"loss": 2.3978,
|
|
"step": 23210
|
|
},
|
|
{
|
|
"epoch": 7.332622601279318,
|
|
"grad_norm": 0.08111694144155136,
|
|
"learning_rate": 0.00040239917081489273,
|
|
"loss": 2.3357,
|
|
"step": 23215
|
|
},
|
|
{
|
|
"epoch": 7.334202005843797,
|
|
"grad_norm": 0.083413955118876,
|
|
"learning_rate": 0.0004019571150591652,
|
|
"loss": 2.4407,
|
|
"step": 23220
|
|
},
|
|
{
|
|
"epoch": 7.335781410408276,
|
|
"grad_norm": 0.05697293108711861,
|
|
"learning_rate": 0.00040151524116445827,
|
|
"loss": 2.3873,
|
|
"step": 23225
|
|
},
|
|
{
|
|
"epoch": 7.337360814972755,
|
|
"grad_norm": 0.05309836851489041,
|
|
"learning_rate": 0.0004010735492651426,
|
|
"loss": 2.3351,
|
|
"step": 23230
|
|
},
|
|
{
|
|
"epoch": 7.338940219537235,
|
|
"grad_norm": 0.07439624739886878,
|
|
"learning_rate": 0.00040063203949553374,
|
|
"loss": 2.4385,
|
|
"step": 23235
|
|
},
|
|
{
|
|
"epoch": 7.340519624101714,
|
|
"grad_norm": 0.07690879262708751,
|
|
"learning_rate": 0.0004001907119898924,
|
|
"loss": 2.4637,
|
|
"step": 23240
|
|
},
|
|
{
|
|
"epoch": 7.342099028666193,
|
|
"grad_norm": 0.08223749279640362,
|
|
"learning_rate": 0.000399749566882423,
|
|
"loss": 2.418,
|
|
"step": 23245
|
|
},
|
|
{
|
|
"epoch": 7.343678433230672,
|
|
"grad_norm": 0.06549252347156347,
|
|
"learning_rate": 0.00039930860430727557,
|
|
"loss": 2.4514,
|
|
"step": 23250
|
|
},
|
|
{
|
|
"epoch": 7.345257837795152,
|
|
"grad_norm": 0.0636998425829546,
|
|
"learning_rate": 0.00039886782439854364,
|
|
"loss": 2.376,
|
|
"step": 23255
|
|
},
|
|
{
|
|
"epoch": 7.34683724235963,
|
|
"grad_norm": 0.07918039158244089,
|
|
"learning_rate": 0.00039842722729026546,
|
|
"loss": 2.4957,
|
|
"step": 23260
|
|
},
|
|
{
|
|
"epoch": 7.348416646924109,
|
|
"grad_norm": 0.07465679171250583,
|
|
"learning_rate": 0.00039798681311642404,
|
|
"loss": 2.4803,
|
|
"step": 23265
|
|
},
|
|
{
|
|
"epoch": 7.3499960514885885,
|
|
"grad_norm": 0.06743300010437357,
|
|
"learning_rate": 0.000397546582010946,
|
|
"loss": 2.4106,
|
|
"step": 23270
|
|
},
|
|
{
|
|
"epoch": 7.351575456053068,
|
|
"grad_norm": 0.06227504892752251,
|
|
"learning_rate": 0.0003971065341077035,
|
|
"loss": 2.4441,
|
|
"step": 23275
|
|
},
|
|
{
|
|
"epoch": 7.353154860617547,
|
|
"grad_norm": 0.0487941241422176,
|
|
"learning_rate": 0.000396666669540512,
|
|
"loss": 2.4093,
|
|
"step": 23280
|
|
},
|
|
{
|
|
"epoch": 7.354734265182026,
|
|
"grad_norm": 0.064214288880335,
|
|
"learning_rate": 0.0003962269884431311,
|
|
"loss": 2.3302,
|
|
"step": 23285
|
|
},
|
|
{
|
|
"epoch": 7.3563136697465055,
|
|
"grad_norm": 0.058324830021285086,
|
|
"learning_rate": 0.0003957874909492658,
|
|
"loss": 2.4446,
|
|
"step": 23290
|
|
},
|
|
{
|
|
"epoch": 7.357893074310985,
|
|
"grad_norm": 0.07221663354459125,
|
|
"learning_rate": 0.0003953481771925641,
|
|
"loss": 2.3911,
|
|
"step": 23295
|
|
},
|
|
{
|
|
"epoch": 7.359472478875464,
|
|
"grad_norm": 0.060054117637791414,
|
|
"learning_rate": 0.00039490904730661846,
|
|
"loss": 2.4816,
|
|
"step": 23300
|
|
},
|
|
{
|
|
"epoch": 7.361051883439943,
|
|
"grad_norm": 0.0818000514125703,
|
|
"learning_rate": 0.00039447010142496555,
|
|
"loss": 2.4044,
|
|
"step": 23305
|
|
},
|
|
{
|
|
"epoch": 7.3626312880044225,
|
|
"grad_norm": 0.0568783557612923,
|
|
"learning_rate": 0.0003940313396810855,
|
|
"loss": 2.3786,
|
|
"step": 23310
|
|
},
|
|
{
|
|
"epoch": 7.364210692568902,
|
|
"grad_norm": 0.05672571981004285,
|
|
"learning_rate": 0.00039359276220840377,
|
|
"loss": 2.4056,
|
|
"step": 23315
|
|
},
|
|
{
|
|
"epoch": 7.365790097133381,
|
|
"grad_norm": 0.06653408816348595,
|
|
"learning_rate": 0.0003931543691402887,
|
|
"loss": 2.411,
|
|
"step": 23320
|
|
},
|
|
{
|
|
"epoch": 7.36736950169786,
|
|
"grad_norm": 0.06543603909140704,
|
|
"learning_rate": 0.0003927161606100523,
|
|
"loss": 2.4427,
|
|
"step": 23325
|
|
},
|
|
{
|
|
"epoch": 7.3689489062623394,
|
|
"grad_norm": 0.05664764631069919,
|
|
"learning_rate": 0.0003922781367509519,
|
|
"loss": 2.462,
|
|
"step": 23330
|
|
},
|
|
{
|
|
"epoch": 7.370528310826819,
|
|
"grad_norm": 0.06610941691709837,
|
|
"learning_rate": 0.0003918402976961868,
|
|
"loss": 2.3808,
|
|
"step": 23335
|
|
},
|
|
{
|
|
"epoch": 7.372107715391298,
|
|
"grad_norm": 0.08353822829197813,
|
|
"learning_rate": 0.00039140264357890187,
|
|
"loss": 2.4553,
|
|
"step": 23340
|
|
},
|
|
{
|
|
"epoch": 7.373687119955776,
|
|
"grad_norm": 0.06463230908761662,
|
|
"learning_rate": 0.0003909651745321847,
|
|
"loss": 2.3985,
|
|
"step": 23345
|
|
},
|
|
{
|
|
"epoch": 7.3752665245202556,
|
|
"grad_norm": 0.059977760365744925,
|
|
"learning_rate": 0.00039052789068906655,
|
|
"loss": 2.3647,
|
|
"step": 23350
|
|
},
|
|
{
|
|
"epoch": 7.376845929084735,
|
|
"grad_norm": 0.051553343673477116,
|
|
"learning_rate": 0.000390090792182523,
|
|
"loss": 2.4085,
|
|
"step": 23355
|
|
},
|
|
{
|
|
"epoch": 7.378425333649214,
|
|
"grad_norm": 0.05368864357172616,
|
|
"learning_rate": 0.00038965387914547235,
|
|
"loss": 2.3228,
|
|
"step": 23360
|
|
},
|
|
{
|
|
"epoch": 7.380004738213693,
|
|
"grad_norm": 0.07794239984230016,
|
|
"learning_rate": 0.00038921715171077765,
|
|
"loss": 2.4189,
|
|
"step": 23365
|
|
},
|
|
{
|
|
"epoch": 7.3815841427781725,
|
|
"grad_norm": 0.061574805112702094,
|
|
"learning_rate": 0.0003887806100112449,
|
|
"loss": 2.4475,
|
|
"step": 23370
|
|
},
|
|
{
|
|
"epoch": 7.383163547342652,
|
|
"grad_norm": 0.05258619548552006,
|
|
"learning_rate": 0.0003883442541796229,
|
|
"loss": 2.4548,
|
|
"step": 23375
|
|
},
|
|
{
|
|
"epoch": 7.384742951907131,
|
|
"grad_norm": 0.06368643485947272,
|
|
"learning_rate": 0.0003879080843486057,
|
|
"loss": 2.5035,
|
|
"step": 23380
|
|
},
|
|
{
|
|
"epoch": 7.38632235647161,
|
|
"grad_norm": 0.04948048503103337,
|
|
"learning_rate": 0.0003874721006508293,
|
|
"loss": 2.3889,
|
|
"step": 23385
|
|
},
|
|
{
|
|
"epoch": 7.3879017610360895,
|
|
"grad_norm": 0.049377374469387274,
|
|
"learning_rate": 0.0003870363032188735,
|
|
"loss": 2.3956,
|
|
"step": 23390
|
|
},
|
|
{
|
|
"epoch": 7.389481165600569,
|
|
"grad_norm": 0.055281587869871605,
|
|
"learning_rate": 0.0003866006921852616,
|
|
"loss": 2.4195,
|
|
"step": 23395
|
|
},
|
|
{
|
|
"epoch": 7.391060570165048,
|
|
"grad_norm": 0.058033724803978066,
|
|
"learning_rate": 0.00038616526768245975,
|
|
"loss": 2.4196,
|
|
"step": 23400
|
|
},
|
|
{
|
|
"epoch": 7.392639974729527,
|
|
"grad_norm": 0.05428967106481442,
|
|
"learning_rate": 0.0003857300298428784,
|
|
"loss": 2.4819,
|
|
"step": 23405
|
|
},
|
|
{
|
|
"epoch": 7.3942193792940065,
|
|
"grad_norm": 0.04879265886815176,
|
|
"learning_rate": 0.0003852949787988703,
|
|
"loss": 2.2827,
|
|
"step": 23410
|
|
},
|
|
{
|
|
"epoch": 7.395798783858485,
|
|
"grad_norm": 0.048160402312052424,
|
|
"learning_rate": 0.0003848601146827314,
|
|
"loss": 2.4231,
|
|
"step": 23415
|
|
},
|
|
{
|
|
"epoch": 7.397378188422964,
|
|
"grad_norm": 0.051898838198475634,
|
|
"learning_rate": 0.0003844254376267017,
|
|
"loss": 2.4422,
|
|
"step": 23420
|
|
},
|
|
{
|
|
"epoch": 7.398957592987443,
|
|
"grad_norm": 0.05260687555507956,
|
|
"learning_rate": 0.00038399094776296296,
|
|
"loss": 2.3719,
|
|
"step": 23425
|
|
},
|
|
{
|
|
"epoch": 7.400536997551923,
|
|
"grad_norm": 0.053159802639681904,
|
|
"learning_rate": 0.0003835566452236416,
|
|
"loss": 2.3793,
|
|
"step": 23430
|
|
},
|
|
{
|
|
"epoch": 7.402116402116402,
|
|
"grad_norm": 0.05620518586086027,
|
|
"learning_rate": 0.000383122530140806,
|
|
"loss": 2.4135,
|
|
"step": 23435
|
|
},
|
|
{
|
|
"epoch": 7.403695806680881,
|
|
"grad_norm": 0.06692891799390799,
|
|
"learning_rate": 0.00038268860264646757,
|
|
"loss": 2.4662,
|
|
"step": 23440
|
|
},
|
|
{
|
|
"epoch": 7.40527521124536,
|
|
"grad_norm": 0.05887652203823664,
|
|
"learning_rate": 0.00038225486287258095,
|
|
"loss": 2.3633,
|
|
"step": 23445
|
|
},
|
|
{
|
|
"epoch": 7.40685461580984,
|
|
"grad_norm": 0.05943560239549316,
|
|
"learning_rate": 0.0003818213109510432,
|
|
"loss": 2.4586,
|
|
"step": 23450
|
|
},
|
|
{
|
|
"epoch": 7.408434020374319,
|
|
"grad_norm": 0.0472467899939956,
|
|
"learning_rate": 0.0003813879470136956,
|
|
"loss": 2.3515,
|
|
"step": 23455
|
|
},
|
|
{
|
|
"epoch": 7.410013424938798,
|
|
"grad_norm": 0.053760128516914814,
|
|
"learning_rate": 0.0003809547711923209,
|
|
"loss": 2.3773,
|
|
"step": 23460
|
|
},
|
|
{
|
|
"epoch": 7.411592829503277,
|
|
"grad_norm": 0.051162254755005174,
|
|
"learning_rate": 0.0003805217836186446,
|
|
"loss": 2.4469,
|
|
"step": 23465
|
|
},
|
|
{
|
|
"epoch": 7.413172234067757,
|
|
"grad_norm": 0.05668412599755234,
|
|
"learning_rate": 0.0003800889844243365,
|
|
"loss": 2.4433,
|
|
"step": 23470
|
|
},
|
|
{
|
|
"epoch": 7.414751638632236,
|
|
"grad_norm": 0.05112718426244128,
|
|
"learning_rate": 0.00037965637374100735,
|
|
"loss": 2.4079,
|
|
"step": 23475
|
|
},
|
|
{
|
|
"epoch": 7.416331043196715,
|
|
"grad_norm": 0.05007729851939131,
|
|
"learning_rate": 0.0003792239517002116,
|
|
"loss": 2.3804,
|
|
"step": 23480
|
|
},
|
|
{
|
|
"epoch": 7.417910447761194,
|
|
"grad_norm": 0.057342576874044066,
|
|
"learning_rate": 0.0003787917184334457,
|
|
"loss": 2.4456,
|
|
"step": 23485
|
|
},
|
|
{
|
|
"epoch": 7.419489852325674,
|
|
"grad_norm": 0.04923906008979115,
|
|
"learning_rate": 0.0003783596740721491,
|
|
"loss": 2.3102,
|
|
"step": 23490
|
|
},
|
|
{
|
|
"epoch": 7.421069256890153,
|
|
"grad_norm": 0.05895303400065207,
|
|
"learning_rate": 0.000377927818747704,
|
|
"loss": 2.4249,
|
|
"step": 23495
|
|
},
|
|
{
|
|
"epoch": 7.422648661454631,
|
|
"grad_norm": 0.05242232639144367,
|
|
"learning_rate": 0.00037749615259143445,
|
|
"loss": 2.3691,
|
|
"step": 23500
|
|
},
|
|
{
|
|
"epoch": 7.4242280660191105,
|
|
"grad_norm": 0.053844882394087655,
|
|
"learning_rate": 0.0003770646757346079,
|
|
"loss": 2.3882,
|
|
"step": 23505
|
|
},
|
|
{
|
|
"epoch": 7.42580747058359,
|
|
"grad_norm": 0.05970724681811083,
|
|
"learning_rate": 0.0003766333883084335,
|
|
"loss": 2.4231,
|
|
"step": 23510
|
|
},
|
|
{
|
|
"epoch": 7.427386875148069,
|
|
"grad_norm": 0.058663310720548756,
|
|
"learning_rate": 0.00037620229044406253,
|
|
"loss": 2.3752,
|
|
"step": 23515
|
|
},
|
|
{
|
|
"epoch": 7.428966279712548,
|
|
"grad_norm": 0.06418413673406441,
|
|
"learning_rate": 0.0003757713822725898,
|
|
"loss": 2.4039,
|
|
"step": 23520
|
|
},
|
|
{
|
|
"epoch": 7.430545684277027,
|
|
"grad_norm": 0.06134913787884254,
|
|
"learning_rate": 0.0003753406639250514,
|
|
"loss": 2.4186,
|
|
"step": 23525
|
|
},
|
|
{
|
|
"epoch": 7.432125088841507,
|
|
"grad_norm": 0.058718245540661414,
|
|
"learning_rate": 0.00037491013553242605,
|
|
"loss": 2.3649,
|
|
"step": 23530
|
|
},
|
|
{
|
|
"epoch": 7.433704493405986,
|
|
"grad_norm": 0.06665366330747048,
|
|
"learning_rate": 0.0003744797972256346,
|
|
"loss": 2.4242,
|
|
"step": 23535
|
|
},
|
|
{
|
|
"epoch": 7.435283897970465,
|
|
"grad_norm": 0.06256570473124856,
|
|
"learning_rate": 0.0003740496491355401,
|
|
"loss": 2.4853,
|
|
"step": 23540
|
|
},
|
|
{
|
|
"epoch": 7.436863302534944,
|
|
"grad_norm": 0.07020851693596936,
|
|
"learning_rate": 0.00037361969139294816,
|
|
"loss": 2.4186,
|
|
"step": 23545
|
|
},
|
|
{
|
|
"epoch": 7.438442707099424,
|
|
"grad_norm": 0.057256852093966595,
|
|
"learning_rate": 0.00037318992412860606,
|
|
"loss": 2.4861,
|
|
"step": 23550
|
|
},
|
|
{
|
|
"epoch": 7.440022111663903,
|
|
"grad_norm": 0.04847339601890821,
|
|
"learning_rate": 0.00037276034747320296,
|
|
"loss": 2.3127,
|
|
"step": 23555
|
|
},
|
|
{
|
|
"epoch": 7.441601516228382,
|
|
"grad_norm": 0.06457386012177314,
|
|
"learning_rate": 0.00037233096155737087,
|
|
"loss": 2.4756,
|
|
"step": 23560
|
|
},
|
|
{
|
|
"epoch": 7.443180920792861,
|
|
"grad_norm": 0.05290248046859747,
|
|
"learning_rate": 0.0003719017665116833,
|
|
"loss": 2.3679,
|
|
"step": 23565
|
|
},
|
|
{
|
|
"epoch": 7.444760325357341,
|
|
"grad_norm": 0.047604859231034254,
|
|
"learning_rate": 0.00037147276246665527,
|
|
"loss": 2.3873,
|
|
"step": 23570
|
|
},
|
|
{
|
|
"epoch": 7.446339729921819,
|
|
"grad_norm": 0.06168118905027415,
|
|
"learning_rate": 0.0003710439495527446,
|
|
"loss": 2.4328,
|
|
"step": 23575
|
|
},
|
|
{
|
|
"epoch": 7.447919134486298,
|
|
"grad_norm": 0.051595459108080534,
|
|
"learning_rate": 0.0003706153279003498,
|
|
"loss": 2.3076,
|
|
"step": 23580
|
|
},
|
|
{
|
|
"epoch": 7.4494985390507775,
|
|
"grad_norm": 0.05230454156519212,
|
|
"learning_rate": 0.00037018689763981295,
|
|
"loss": 2.4704,
|
|
"step": 23585
|
|
},
|
|
{
|
|
"epoch": 7.451077943615257,
|
|
"grad_norm": 0.04841876288423717,
|
|
"learning_rate": 0.00036975865890141626,
|
|
"loss": 2.3298,
|
|
"step": 23590
|
|
},
|
|
{
|
|
"epoch": 7.452657348179736,
|
|
"grad_norm": 0.0652579610408843,
|
|
"learning_rate": 0.000369330611815385,
|
|
"loss": 2.5174,
|
|
"step": 23595
|
|
},
|
|
{
|
|
"epoch": 7.454236752744215,
|
|
"grad_norm": 0.0634467381622313,
|
|
"learning_rate": 0.0003689027565118852,
|
|
"loss": 2.3438,
|
|
"step": 23600
|
|
},
|
|
{
|
|
"epoch": 7.4558161573086945,
|
|
"grad_norm": 0.04628284636995493,
|
|
"learning_rate": 0.00036847509312102467,
|
|
"loss": 2.5966,
|
|
"step": 23605
|
|
},
|
|
{
|
|
"epoch": 7.457395561873174,
|
|
"grad_norm": 0.05251325655453334,
|
|
"learning_rate": 0.00036804762177285367,
|
|
"loss": 2.3418,
|
|
"step": 23610
|
|
},
|
|
{
|
|
"epoch": 7.458974966437653,
|
|
"grad_norm": 0.07722348403670708,
|
|
"learning_rate": 0.0003676203425973632,
|
|
"loss": 2.4356,
|
|
"step": 23615
|
|
},
|
|
{
|
|
"epoch": 7.460554371002132,
|
|
"grad_norm": 0.04889864050952568,
|
|
"learning_rate": 0.00036719325572448627,
|
|
"loss": 2.3873,
|
|
"step": 23620
|
|
},
|
|
{
|
|
"epoch": 7.4621337755666115,
|
|
"grad_norm": 0.06170912673919736,
|
|
"learning_rate": 0.0003667663612840971,
|
|
"loss": 2.3894,
|
|
"step": 23625
|
|
},
|
|
{
|
|
"epoch": 7.463713180131091,
|
|
"grad_norm": 0.059139982586257196,
|
|
"learning_rate": 0.0003663396594060113,
|
|
"loss": 2.4052,
|
|
"step": 23630
|
|
},
|
|
{
|
|
"epoch": 7.46529258469557,
|
|
"grad_norm": 0.050779417722003305,
|
|
"learning_rate": 0.00036591315021998683,
|
|
"loss": 2.3199,
|
|
"step": 23635
|
|
},
|
|
{
|
|
"epoch": 7.466871989260049,
|
|
"grad_norm": 0.052582832535869733,
|
|
"learning_rate": 0.00036548683385572215,
|
|
"loss": 2.3601,
|
|
"step": 23640
|
|
},
|
|
{
|
|
"epoch": 7.4684513938245285,
|
|
"grad_norm": 0.05032154022496104,
|
|
"learning_rate": 0.00036506071044285684,
|
|
"loss": 2.4505,
|
|
"step": 23645
|
|
},
|
|
{
|
|
"epoch": 7.470030798389008,
|
|
"grad_norm": 0.05777376740018471,
|
|
"learning_rate": 0.00036463478011097307,
|
|
"loss": 2.3916,
|
|
"step": 23650
|
|
},
|
|
{
|
|
"epoch": 7.471610202953487,
|
|
"grad_norm": 0.0602882474269405,
|
|
"learning_rate": 0.0003642090429895933,
|
|
"loss": 2.4156,
|
|
"step": 23655
|
|
},
|
|
{
|
|
"epoch": 7.473189607517965,
|
|
"grad_norm": 0.056972774488076666,
|
|
"learning_rate": 0.0003637834992081813,
|
|
"loss": 2.4732,
|
|
"step": 23660
|
|
},
|
|
{
|
|
"epoch": 7.474769012082445,
|
|
"grad_norm": 0.06404474027264716,
|
|
"learning_rate": 0.00036335814889614236,
|
|
"loss": 2.3455,
|
|
"step": 23665
|
|
},
|
|
{
|
|
"epoch": 7.476348416646924,
|
|
"grad_norm": 0.053151870447874665,
|
|
"learning_rate": 0.0003629329921828224,
|
|
"loss": 2.3919,
|
|
"step": 23670
|
|
},
|
|
{
|
|
"epoch": 7.477927821211403,
|
|
"grad_norm": 0.050179990744870624,
|
|
"learning_rate": 0.0003625080291975095,
|
|
"loss": 2.3638,
|
|
"step": 23675
|
|
},
|
|
{
|
|
"epoch": 7.479507225775882,
|
|
"grad_norm": 0.05344170008854404,
|
|
"learning_rate": 0.0003620832600694314,
|
|
"loss": 2.4184,
|
|
"step": 23680
|
|
},
|
|
{
|
|
"epoch": 7.481086630340362,
|
|
"grad_norm": 0.05880039006582616,
|
|
"learning_rate": 0.00036165868492775866,
|
|
"loss": 2.5056,
|
|
"step": 23685
|
|
},
|
|
{
|
|
"epoch": 7.482666034904841,
|
|
"grad_norm": 0.0555886248543761,
|
|
"learning_rate": 0.0003612343039016013,
|
|
"loss": 2.4771,
|
|
"step": 23690
|
|
},
|
|
{
|
|
"epoch": 7.48424543946932,
|
|
"grad_norm": 0.053690628524088346,
|
|
"learning_rate": 0.00036081011712001056,
|
|
"loss": 2.429,
|
|
"step": 23695
|
|
},
|
|
{
|
|
"epoch": 7.485824844033799,
|
|
"grad_norm": 0.04431564004406769,
|
|
"learning_rate": 0.00036038612471197965,
|
|
"loss": 2.389,
|
|
"step": 23700
|
|
},
|
|
{
|
|
"epoch": 7.487404248598279,
|
|
"grad_norm": 0.04878347140061997,
|
|
"learning_rate": 0.0003599623268064416,
|
|
"loss": 2.4411,
|
|
"step": 23705
|
|
},
|
|
{
|
|
"epoch": 7.488983653162758,
|
|
"grad_norm": 0.05236608966606008,
|
|
"learning_rate": 0.0003595387235322707,
|
|
"loss": 2.5116,
|
|
"step": 23710
|
|
},
|
|
{
|
|
"epoch": 7.490563057727237,
|
|
"grad_norm": 0.05100673243720879,
|
|
"learning_rate": 0.000359115315018282,
|
|
"loss": 2.4688,
|
|
"step": 23715
|
|
},
|
|
{
|
|
"epoch": 7.492142462291716,
|
|
"grad_norm": 0.05411154070919953,
|
|
"learning_rate": 0.0003586921013932308,
|
|
"loss": 2.4296,
|
|
"step": 23720
|
|
},
|
|
{
|
|
"epoch": 7.493721866856196,
|
|
"grad_norm": 0.052071339596594705,
|
|
"learning_rate": 0.0003582690827858146,
|
|
"loss": 2.3595,
|
|
"step": 23725
|
|
},
|
|
{
|
|
"epoch": 7.495301271420675,
|
|
"grad_norm": 0.05677860791346205,
|
|
"learning_rate": 0.00035784625932466975,
|
|
"loss": 2.4711,
|
|
"step": 23730
|
|
},
|
|
{
|
|
"epoch": 7.496880675985153,
|
|
"grad_norm": 0.04987279541605942,
|
|
"learning_rate": 0.00035742363113837507,
|
|
"loss": 2.4433,
|
|
"step": 23735
|
|
},
|
|
{
|
|
"epoch": 7.498460080549632,
|
|
"grad_norm": 0.050881308693398304,
|
|
"learning_rate": 0.0003570011983554485,
|
|
"loss": 2.3913,
|
|
"step": 23740
|
|
},
|
|
{
|
|
"epoch": 7.500039485114112,
|
|
"grad_norm": 0.050137608515738584,
|
|
"learning_rate": 0.0003565789611043494,
|
|
"loss": 2.451,
|
|
"step": 23745
|
|
},
|
|
{
|
|
"epoch": 7.501618889678591,
|
|
"grad_norm": 0.055709883463010215,
|
|
"learning_rate": 0.0003561569195134772,
|
|
"loss": 2.4377,
|
|
"step": 23750
|
|
},
|
|
{
|
|
"epoch": 7.50319829424307,
|
|
"grad_norm": 0.05561732872868846,
|
|
"learning_rate": 0.0003557350737111722,
|
|
"loss": 2.4005,
|
|
"step": 23755
|
|
},
|
|
{
|
|
"epoch": 7.504777698807549,
|
|
"grad_norm": 0.0528833894614215,
|
|
"learning_rate": 0.00035531342382571465,
|
|
"loss": 2.3904,
|
|
"step": 23760
|
|
},
|
|
{
|
|
"epoch": 7.506357103372029,
|
|
"grad_norm": 0.06357337200410884,
|
|
"learning_rate": 0.00035489196998532614,
|
|
"loss": 2.3614,
|
|
"step": 23765
|
|
},
|
|
{
|
|
"epoch": 7.507936507936508,
|
|
"grad_norm": 0.05040209838574259,
|
|
"learning_rate": 0.0003544707123181675,
|
|
"loss": 2.3819,
|
|
"step": 23770
|
|
},
|
|
{
|
|
"epoch": 7.509515912500987,
|
|
"grad_norm": 0.06469235887929314,
|
|
"learning_rate": 0.00035404965095234134,
|
|
"loss": 2.4137,
|
|
"step": 23775
|
|
},
|
|
{
|
|
"epoch": 7.511095317065466,
|
|
"grad_norm": 0.058756076336860794,
|
|
"learning_rate": 0.0003536287860158891,
|
|
"loss": 2.4377,
|
|
"step": 23780
|
|
},
|
|
{
|
|
"epoch": 7.512674721629946,
|
|
"grad_norm": 0.05427048344633684,
|
|
"learning_rate": 0.0003532081176367929,
|
|
"loss": 2.4334,
|
|
"step": 23785
|
|
},
|
|
{
|
|
"epoch": 7.514254126194425,
|
|
"grad_norm": 0.05445023053867659,
|
|
"learning_rate": 0.00035278764594297605,
|
|
"loss": 2.402,
|
|
"step": 23790
|
|
},
|
|
{
|
|
"epoch": 7.515833530758904,
|
|
"grad_norm": 0.05572921923027817,
|
|
"learning_rate": 0.00035236737106230086,
|
|
"loss": 2.3982,
|
|
"step": 23795
|
|
},
|
|
{
|
|
"epoch": 7.517412935323383,
|
|
"grad_norm": 0.05097853575965763,
|
|
"learning_rate": 0.00035194729312257035,
|
|
"loss": 2.3,
|
|
"step": 23800
|
|
},
|
|
{
|
|
"epoch": 7.518992339887863,
|
|
"grad_norm": 0.05610154877951178,
|
|
"learning_rate": 0.00035152741225152754,
|
|
"loss": 2.4078,
|
|
"step": 23805
|
|
},
|
|
{
|
|
"epoch": 7.520571744452342,
|
|
"grad_norm": 0.054313889130966415,
|
|
"learning_rate": 0.000351107728576855,
|
|
"loss": 2.492,
|
|
"step": 23810
|
|
},
|
|
{
|
|
"epoch": 7.522151149016821,
|
|
"grad_norm": 0.05448783726055358,
|
|
"learning_rate": 0.00035068824222617666,
|
|
"loss": 2.4035,
|
|
"step": 23815
|
|
},
|
|
{
|
|
"epoch": 7.5237305535812995,
|
|
"grad_norm": 0.05203520227656932,
|
|
"learning_rate": 0.000350268953327055,
|
|
"loss": 2.3478,
|
|
"step": 23820
|
|
},
|
|
{
|
|
"epoch": 7.525309958145779,
|
|
"grad_norm": 0.05930874635571445,
|
|
"learning_rate": 0.0003498498620069938,
|
|
"loss": 2.3964,
|
|
"step": 23825
|
|
},
|
|
{
|
|
"epoch": 7.526889362710258,
|
|
"grad_norm": 0.058976632389174895,
|
|
"learning_rate": 0.00034943096839343545,
|
|
"loss": 2.4033,
|
|
"step": 23830
|
|
},
|
|
{
|
|
"epoch": 7.528468767274737,
|
|
"grad_norm": 0.05484249479120815,
|
|
"learning_rate": 0.0003490122726137632,
|
|
"loss": 2.4029,
|
|
"step": 23835
|
|
},
|
|
{
|
|
"epoch": 7.5300481718392165,
|
|
"grad_norm": 0.05927657326723422,
|
|
"learning_rate": 0.0003485937747952994,
|
|
"loss": 2.3705,
|
|
"step": 23840
|
|
},
|
|
{
|
|
"epoch": 7.531627576403696,
|
|
"grad_norm": 0.0510950825555915,
|
|
"learning_rate": 0.00034817547506530653,
|
|
"loss": 2.4689,
|
|
"step": 23845
|
|
},
|
|
{
|
|
"epoch": 7.533206980968175,
|
|
"grad_norm": 0.0582258861901962,
|
|
"learning_rate": 0.00034775737355098737,
|
|
"loss": 2.3584,
|
|
"step": 23850
|
|
},
|
|
{
|
|
"epoch": 7.534786385532654,
|
|
"grad_norm": 0.06160566988929809,
|
|
"learning_rate": 0.0003473394703794837,
|
|
"loss": 2.4319,
|
|
"step": 23855
|
|
},
|
|
{
|
|
"epoch": 7.5363657900971335,
|
|
"grad_norm": 0.05308567369648612,
|
|
"learning_rate": 0.0003469217656778766,
|
|
"loss": 2.4249,
|
|
"step": 23860
|
|
},
|
|
{
|
|
"epoch": 7.537945194661613,
|
|
"grad_norm": 0.06369662464028052,
|
|
"learning_rate": 0.00034650425957318844,
|
|
"loss": 2.4027,
|
|
"step": 23865
|
|
},
|
|
{
|
|
"epoch": 7.539524599226092,
|
|
"grad_norm": 0.06550185521725853,
|
|
"learning_rate": 0.00034608695219237953,
|
|
"loss": 2.4746,
|
|
"step": 23870
|
|
},
|
|
{
|
|
"epoch": 7.541104003790571,
|
|
"grad_norm": 0.04797082265967074,
|
|
"learning_rate": 0.0003456698436623502,
|
|
"loss": 2.339,
|
|
"step": 23875
|
|
},
|
|
{
|
|
"epoch": 7.5426834083550505,
|
|
"grad_norm": 0.051517689382158276,
|
|
"learning_rate": 0.0003452529341099411,
|
|
"loss": 2.355,
|
|
"step": 23880
|
|
},
|
|
{
|
|
"epoch": 7.54426281291953,
|
|
"grad_norm": 0.05197438536779773,
|
|
"learning_rate": 0.0003448362236619315,
|
|
"loss": 2.3605,
|
|
"step": 23885
|
|
},
|
|
{
|
|
"epoch": 7.545842217484008,
|
|
"grad_norm": 0.05083741389939934,
|
|
"learning_rate": 0.00034441971244504024,
|
|
"loss": 2.4584,
|
|
"step": 23890
|
|
},
|
|
{
|
|
"epoch": 7.547421622048487,
|
|
"grad_norm": 0.05823221757407544,
|
|
"learning_rate": 0.0003440034005859258,
|
|
"loss": 2.3936,
|
|
"step": 23895
|
|
},
|
|
{
|
|
"epoch": 7.549001026612967,
|
|
"grad_norm": 0.06091531540814351,
|
|
"learning_rate": 0.0003435872882111857,
|
|
"loss": 2.4418,
|
|
"step": 23900
|
|
},
|
|
{
|
|
"epoch": 7.550580431177446,
|
|
"grad_norm": 0.04708894240854059,
|
|
"learning_rate": 0.00034317137544735753,
|
|
"loss": 2.3962,
|
|
"step": 23905
|
|
},
|
|
{
|
|
"epoch": 7.552159835741925,
|
|
"grad_norm": 0.07520886685119499,
|
|
"learning_rate": 0.00034275566242091725,
|
|
"loss": 2.3792,
|
|
"step": 23910
|
|
},
|
|
{
|
|
"epoch": 7.553739240306404,
|
|
"grad_norm": 0.0847747471839153,
|
|
"learning_rate": 0.00034234014925828114,
|
|
"loss": 2.5207,
|
|
"step": 23915
|
|
},
|
|
{
|
|
"epoch": 7.555318644870884,
|
|
"grad_norm": 0.04947878947689636,
|
|
"learning_rate": 0.00034192483608580374,
|
|
"loss": 2.429,
|
|
"step": 23920
|
|
},
|
|
{
|
|
"epoch": 7.556898049435363,
|
|
"grad_norm": 0.05002392750223998,
|
|
"learning_rate": 0.0003415097230297791,
|
|
"loss": 2.3927,
|
|
"step": 23925
|
|
},
|
|
{
|
|
"epoch": 7.558477453999842,
|
|
"grad_norm": 0.049763788116632214,
|
|
"learning_rate": 0.0003410948102164404,
|
|
"loss": 2.4146,
|
|
"step": 23930
|
|
},
|
|
{
|
|
"epoch": 7.560056858564321,
|
|
"grad_norm": 0.06408432685825667,
|
|
"learning_rate": 0.00034068009777195985,
|
|
"loss": 2.4863,
|
|
"step": 23935
|
|
},
|
|
{
|
|
"epoch": 7.5616362631288006,
|
|
"grad_norm": 0.04508639165527899,
|
|
"learning_rate": 0.0003402655858224493,
|
|
"loss": 2.371,
|
|
"step": 23940
|
|
},
|
|
{
|
|
"epoch": 7.56321566769328,
|
|
"grad_norm": 0.05422277716588939,
|
|
"learning_rate": 0.00033985127449395893,
|
|
"loss": 2.441,
|
|
"step": 23945
|
|
},
|
|
{
|
|
"epoch": 7.564795072257759,
|
|
"grad_norm": 0.06150907765634088,
|
|
"learning_rate": 0.00033943716391247793,
|
|
"loss": 2.4413,
|
|
"step": 23950
|
|
},
|
|
{
|
|
"epoch": 7.566374476822238,
|
|
"grad_norm": 0.05127590635874652,
|
|
"learning_rate": 0.00033902325420393523,
|
|
"loss": 2.3932,
|
|
"step": 23955
|
|
},
|
|
{
|
|
"epoch": 7.5679538813867175,
|
|
"grad_norm": 0.057672367901127446,
|
|
"learning_rate": 0.0003386095454941974,
|
|
"loss": 2.3839,
|
|
"step": 23960
|
|
},
|
|
{
|
|
"epoch": 7.569533285951197,
|
|
"grad_norm": 0.05146168485962654,
|
|
"learning_rate": 0.00033819603790907147,
|
|
"loss": 2.4863,
|
|
"step": 23965
|
|
},
|
|
{
|
|
"epoch": 7.571112690515676,
|
|
"grad_norm": 0.0535378903359561,
|
|
"learning_rate": 0.00033778273157430207,
|
|
"loss": 2.339,
|
|
"step": 23970
|
|
},
|
|
{
|
|
"epoch": 7.572692095080154,
|
|
"grad_norm": 0.05588125048196558,
|
|
"learning_rate": 0.0003373696266155729,
|
|
"loss": 2.4713,
|
|
"step": 23975
|
|
},
|
|
{
|
|
"epoch": 7.574271499644634,
|
|
"grad_norm": 0.053273495265892444,
|
|
"learning_rate": 0.0003369567231585067,
|
|
"loss": 2.3334,
|
|
"step": 23980
|
|
},
|
|
{
|
|
"epoch": 7.575850904209113,
|
|
"grad_norm": 0.052236979555313555,
|
|
"learning_rate": 0.00033654402132866456,
|
|
"loss": 2.3322,
|
|
"step": 23985
|
|
},
|
|
{
|
|
"epoch": 7.577430308773592,
|
|
"grad_norm": 0.04650182802860371,
|
|
"learning_rate": 0.00033613152125154636,
|
|
"loss": 2.3798,
|
|
"step": 23990
|
|
},
|
|
{
|
|
"epoch": 7.579009713338071,
|
|
"grad_norm": 0.05176251861636686,
|
|
"learning_rate": 0.00033571922305259126,
|
|
"loss": 2.4699,
|
|
"step": 23995
|
|
},
|
|
{
|
|
"epoch": 7.580589117902551,
|
|
"grad_norm": 0.05006150490263106,
|
|
"learning_rate": 0.0003353071268571759,
|
|
"loss": 2.2849,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 7.58216852246703,
|
|
"grad_norm": 0.047048389474652993,
|
|
"learning_rate": 0.00033489523279061674,
|
|
"loss": 2.3827,
|
|
"step": 24005
|
|
},
|
|
{
|
|
"epoch": 7.583747927031509,
|
|
"grad_norm": 0.0557043619859469,
|
|
"learning_rate": 0.0003344835409781679,
|
|
"loss": 2.3429,
|
|
"step": 24010
|
|
},
|
|
{
|
|
"epoch": 7.585327331595988,
|
|
"grad_norm": 0.05172011245331398,
|
|
"learning_rate": 0.0003340720515450221,
|
|
"loss": 2.3527,
|
|
"step": 24015
|
|
},
|
|
{
|
|
"epoch": 7.586906736160468,
|
|
"grad_norm": 0.0491888915760231,
|
|
"learning_rate": 0.0003336607646163106,
|
|
"loss": 2.4425,
|
|
"step": 24020
|
|
},
|
|
{
|
|
"epoch": 7.588486140724947,
|
|
"grad_norm": 0.05737364984818568,
|
|
"learning_rate": 0.00033324968031710303,
|
|
"loss": 2.3395,
|
|
"step": 24025
|
|
},
|
|
{
|
|
"epoch": 7.590065545289426,
|
|
"grad_norm": 0.05841923422913503,
|
|
"learning_rate": 0.0003328387987724079,
|
|
"loss": 2.3915,
|
|
"step": 24030
|
|
},
|
|
{
|
|
"epoch": 7.591644949853905,
|
|
"grad_norm": 0.05107266580284771,
|
|
"learning_rate": 0.0003324281201071715,
|
|
"loss": 2.4611,
|
|
"step": 24035
|
|
},
|
|
{
|
|
"epoch": 7.593224354418385,
|
|
"grad_norm": 0.05129232000800214,
|
|
"learning_rate": 0.00033201764444627823,
|
|
"loss": 2.4628,
|
|
"step": 24040
|
|
},
|
|
{
|
|
"epoch": 7.594803758982863,
|
|
"grad_norm": 0.07500145964807096,
|
|
"learning_rate": 0.0003316073719145517,
|
|
"loss": 2.3735,
|
|
"step": 24045
|
|
},
|
|
{
|
|
"epoch": 7.596383163547342,
|
|
"grad_norm": 0.06367210474236257,
|
|
"learning_rate": 0.0003311973026367526,
|
|
"loss": 2.4049,
|
|
"step": 24050
|
|
},
|
|
{
|
|
"epoch": 7.5979625681118215,
|
|
"grad_norm": 0.05791689448404274,
|
|
"learning_rate": 0.000330787436737581,
|
|
"loss": 2.4212,
|
|
"step": 24055
|
|
},
|
|
{
|
|
"epoch": 7.599541972676301,
|
|
"grad_norm": 0.06148429526004428,
|
|
"learning_rate": 0.00033037777434167414,
|
|
"loss": 2.3598,
|
|
"step": 24060
|
|
},
|
|
{
|
|
"epoch": 7.60112137724078,
|
|
"grad_norm": 0.0632806955715157,
|
|
"learning_rate": 0.00032996831557360786,
|
|
"loss": 2.3784,
|
|
"step": 24065
|
|
},
|
|
{
|
|
"epoch": 7.602700781805259,
|
|
"grad_norm": 0.04967935712466749,
|
|
"learning_rate": 0.0003295590605578959,
|
|
"loss": 2.4446,
|
|
"step": 24070
|
|
},
|
|
{
|
|
"epoch": 7.6042801863697385,
|
|
"grad_norm": 0.053187262168445174,
|
|
"learning_rate": 0.0003291500094189895,
|
|
"loss": 2.3772,
|
|
"step": 24075
|
|
},
|
|
{
|
|
"epoch": 7.605859590934218,
|
|
"grad_norm": 0.051516797227960875,
|
|
"learning_rate": 0.0003287411622812796,
|
|
"loss": 2.4098,
|
|
"step": 24080
|
|
},
|
|
{
|
|
"epoch": 7.607438995498697,
|
|
"grad_norm": 0.04717329615090927,
|
|
"learning_rate": 0.00032833251926909335,
|
|
"loss": 2.388,
|
|
"step": 24085
|
|
},
|
|
{
|
|
"epoch": 7.609018400063176,
|
|
"grad_norm": 0.04961962009053395,
|
|
"learning_rate": 0.0003279240805066963,
|
|
"loss": 2.4496,
|
|
"step": 24090
|
|
},
|
|
{
|
|
"epoch": 7.6105978046276554,
|
|
"grad_norm": 0.0508932227695937,
|
|
"learning_rate": 0.0003275158461182927,
|
|
"loss": 2.3906,
|
|
"step": 24095
|
|
},
|
|
{
|
|
"epoch": 7.612177209192135,
|
|
"grad_norm": 0.047386740219747786,
|
|
"learning_rate": 0.0003271078162280235,
|
|
"loss": 2.3996,
|
|
"step": 24100
|
|
},
|
|
{
|
|
"epoch": 7.613756613756614,
|
|
"grad_norm": 0.051347605182556225,
|
|
"learning_rate": 0.0003266999909599684,
|
|
"loss": 2.3488,
|
|
"step": 24105
|
|
},
|
|
{
|
|
"epoch": 7.615336018321093,
|
|
"grad_norm": 0.05207196085327182,
|
|
"learning_rate": 0.0003262923704381441,
|
|
"loss": 2.4164,
|
|
"step": 24110
|
|
},
|
|
{
|
|
"epoch": 7.616915422885572,
|
|
"grad_norm": 0.051807947784807226,
|
|
"learning_rate": 0.00032588495478650515,
|
|
"loss": 2.3612,
|
|
"step": 24115
|
|
},
|
|
{
|
|
"epoch": 7.618494827450052,
|
|
"grad_norm": 0.05301360880767392,
|
|
"learning_rate": 0.00032547774412894484,
|
|
"loss": 2.4818,
|
|
"step": 24120
|
|
},
|
|
{
|
|
"epoch": 7.620074232014531,
|
|
"grad_norm": 0.058436238645943585,
|
|
"learning_rate": 0.0003250707385892928,
|
|
"loss": 2.4381,
|
|
"step": 24125
|
|
},
|
|
{
|
|
"epoch": 7.62165363657901,
|
|
"grad_norm": 0.05822939906037066,
|
|
"learning_rate": 0.0003246639382913167,
|
|
"loss": 2.4283,
|
|
"step": 24130
|
|
},
|
|
{
|
|
"epoch": 7.6232330411434885,
|
|
"grad_norm": 0.0479691770704822,
|
|
"learning_rate": 0.00032425734335872236,
|
|
"loss": 2.4451,
|
|
"step": 24135
|
|
},
|
|
{
|
|
"epoch": 7.624812445707968,
|
|
"grad_norm": 0.059090968265937245,
|
|
"learning_rate": 0.0003238509539151522,
|
|
"loss": 2.5042,
|
|
"step": 24140
|
|
},
|
|
{
|
|
"epoch": 7.626391850272447,
|
|
"grad_norm": 0.07224711007320013,
|
|
"learning_rate": 0.00032344477008418716,
|
|
"loss": 2.3966,
|
|
"step": 24145
|
|
},
|
|
{
|
|
"epoch": 7.627971254836926,
|
|
"grad_norm": 0.051380954135107085,
|
|
"learning_rate": 0.0003230387919893449,
|
|
"loss": 2.4038,
|
|
"step": 24150
|
|
},
|
|
{
|
|
"epoch": 7.6295506594014055,
|
|
"grad_norm": 0.06859372282080024,
|
|
"learning_rate": 0.00032263301975408087,
|
|
"loss": 2.4316,
|
|
"step": 24155
|
|
},
|
|
{
|
|
"epoch": 7.631130063965885,
|
|
"grad_norm": 0.05318137975594264,
|
|
"learning_rate": 0.00032222745350178773,
|
|
"loss": 2.366,
|
|
"step": 24160
|
|
},
|
|
{
|
|
"epoch": 7.632709468530364,
|
|
"grad_norm": 0.05478988503412728,
|
|
"learning_rate": 0.00032182209335579514,
|
|
"loss": 2.4576,
|
|
"step": 24165
|
|
},
|
|
{
|
|
"epoch": 7.634288873094843,
|
|
"grad_norm": 0.05459819331696542,
|
|
"learning_rate": 0.00032141693943937133,
|
|
"loss": 2.3714,
|
|
"step": 24170
|
|
},
|
|
{
|
|
"epoch": 7.6358682776593225,
|
|
"grad_norm": 0.0590688189521774,
|
|
"learning_rate": 0.0003210119918757206,
|
|
"loss": 2.3416,
|
|
"step": 24175
|
|
},
|
|
{
|
|
"epoch": 7.637447682223802,
|
|
"grad_norm": 0.057447038920899106,
|
|
"learning_rate": 0.0003206072507879847,
|
|
"loss": 2.4195,
|
|
"step": 24180
|
|
},
|
|
{
|
|
"epoch": 7.639027086788281,
|
|
"grad_norm": 0.04830826723505675,
|
|
"learning_rate": 0.00032020271629924345,
|
|
"loss": 2.4337,
|
|
"step": 24185
|
|
},
|
|
{
|
|
"epoch": 7.64060649135276,
|
|
"grad_norm": 0.062425324790732116,
|
|
"learning_rate": 0.00031979838853251274,
|
|
"loss": 2.4195,
|
|
"step": 24190
|
|
},
|
|
{
|
|
"epoch": 7.6421858959172395,
|
|
"grad_norm": 0.05325699262898386,
|
|
"learning_rate": 0.0003193942676107462,
|
|
"loss": 2.3648,
|
|
"step": 24195
|
|
},
|
|
{
|
|
"epoch": 7.643765300481719,
|
|
"grad_norm": 0.05871712109832676,
|
|
"learning_rate": 0.00031899035365683424,
|
|
"loss": 2.4513,
|
|
"step": 24200
|
|
},
|
|
{
|
|
"epoch": 7.645344705046197,
|
|
"grad_norm": 0.060790001080061655,
|
|
"learning_rate": 0.0003185866467936045,
|
|
"loss": 2.3598,
|
|
"step": 24205
|
|
},
|
|
{
|
|
"epoch": 7.646924109610676,
|
|
"grad_norm": 0.056202444771544735,
|
|
"learning_rate": 0.000318183147143822,
|
|
"loss": 2.3693,
|
|
"step": 24210
|
|
},
|
|
{
|
|
"epoch": 7.648503514175156,
|
|
"grad_norm": 0.0644776336201835,
|
|
"learning_rate": 0.0003177798548301883,
|
|
"loss": 2.4006,
|
|
"step": 24215
|
|
},
|
|
{
|
|
"epoch": 7.650082918739635,
|
|
"grad_norm": 0.04997202841729112,
|
|
"learning_rate": 0.0003173767699753416,
|
|
"loss": 2.4625,
|
|
"step": 24220
|
|
},
|
|
{
|
|
"epoch": 7.651662323304114,
|
|
"grad_norm": 0.051209189224240705,
|
|
"learning_rate": 0.0003169738927018579,
|
|
"loss": 2.4392,
|
|
"step": 24225
|
|
},
|
|
{
|
|
"epoch": 7.653241727868593,
|
|
"grad_norm": 0.05000245242432989,
|
|
"learning_rate": 0.0003165712231322493,
|
|
"loss": 2.4012,
|
|
"step": 24230
|
|
},
|
|
{
|
|
"epoch": 7.654821132433073,
|
|
"grad_norm": 0.05428174890846462,
|
|
"learning_rate": 0.00031616876138896547,
|
|
"loss": 2.4287,
|
|
"step": 24235
|
|
},
|
|
{
|
|
"epoch": 7.656400536997552,
|
|
"grad_norm": 0.05278200777192172,
|
|
"learning_rate": 0.0003157665075943922,
|
|
"loss": 2.4078,
|
|
"step": 24240
|
|
},
|
|
{
|
|
"epoch": 7.657979941562031,
|
|
"grad_norm": 0.051568768887383196,
|
|
"learning_rate": 0.0003153644618708523,
|
|
"loss": 2.4421,
|
|
"step": 24245
|
|
},
|
|
{
|
|
"epoch": 7.65955934612651,
|
|
"grad_norm": 0.054085679211000194,
|
|
"learning_rate": 0.00031496262434060516,
|
|
"loss": 2.3234,
|
|
"step": 24250
|
|
},
|
|
{
|
|
"epoch": 7.66113875069099,
|
|
"grad_norm": 0.05481383232982134,
|
|
"learning_rate": 0.00031456099512584704,
|
|
"loss": 2.5246,
|
|
"step": 24255
|
|
},
|
|
{
|
|
"epoch": 7.662718155255469,
|
|
"grad_norm": 0.059426566868512096,
|
|
"learning_rate": 0.000314159574348711,
|
|
"loss": 2.5082,
|
|
"step": 24260
|
|
},
|
|
{
|
|
"epoch": 7.664297559819948,
|
|
"grad_norm": 0.04820355582047649,
|
|
"learning_rate": 0.0003137583621312665,
|
|
"loss": 2.4273,
|
|
"step": 24265
|
|
},
|
|
{
|
|
"epoch": 7.665876964384427,
|
|
"grad_norm": 0.05263363368035431,
|
|
"learning_rate": 0.0003133573585955194,
|
|
"loss": 2.4454,
|
|
"step": 24270
|
|
},
|
|
{
|
|
"epoch": 7.667456368948907,
|
|
"grad_norm": 0.06668088972823961,
|
|
"learning_rate": 0.00031295656386341264,
|
|
"loss": 2.3927,
|
|
"step": 24275
|
|
},
|
|
{
|
|
"epoch": 7.669035773513386,
|
|
"grad_norm": 0.049805678787756505,
|
|
"learning_rate": 0.0003125559780568251,
|
|
"loss": 2.4756,
|
|
"step": 24280
|
|
},
|
|
{
|
|
"epoch": 7.670615178077865,
|
|
"grad_norm": 0.05144318724607636,
|
|
"learning_rate": 0.0003121556012975726,
|
|
"loss": 2.4335,
|
|
"step": 24285
|
|
},
|
|
{
|
|
"epoch": 7.672194582642344,
|
|
"grad_norm": 0.06315689642117019,
|
|
"learning_rate": 0.0003117554337074069,
|
|
"loss": 2.5088,
|
|
"step": 24290
|
|
},
|
|
{
|
|
"epoch": 7.673773987206823,
|
|
"grad_norm": 0.044208247141836916,
|
|
"learning_rate": 0.0003113554754080162,
|
|
"loss": 2.3341,
|
|
"step": 24295
|
|
},
|
|
{
|
|
"epoch": 7.675353391771302,
|
|
"grad_norm": 0.05554919550651169,
|
|
"learning_rate": 0.00031095572652102587,
|
|
"loss": 2.4467,
|
|
"step": 24300
|
|
},
|
|
{
|
|
"epoch": 7.676932796335781,
|
|
"grad_norm": 0.059080134993804094,
|
|
"learning_rate": 0.0003105561871679966,
|
|
"loss": 2.5429,
|
|
"step": 24305
|
|
},
|
|
{
|
|
"epoch": 7.67851220090026,
|
|
"grad_norm": 0.04417541045997235,
|
|
"learning_rate": 0.0003101568574704257,
|
|
"loss": 2.3606,
|
|
"step": 24310
|
|
},
|
|
{
|
|
"epoch": 7.68009160546474,
|
|
"grad_norm": 0.05869619463869087,
|
|
"learning_rate": 0.000309757737549747,
|
|
"loss": 2.3615,
|
|
"step": 24315
|
|
},
|
|
{
|
|
"epoch": 7.681671010029219,
|
|
"grad_norm": 0.050441644302818466,
|
|
"learning_rate": 0.00030935882752733,
|
|
"loss": 2.4206,
|
|
"step": 24320
|
|
},
|
|
{
|
|
"epoch": 7.683250414593698,
|
|
"grad_norm": 0.05476799175365138,
|
|
"learning_rate": 0.0003089601275244813,
|
|
"loss": 2.3986,
|
|
"step": 24325
|
|
},
|
|
{
|
|
"epoch": 7.684829819158177,
|
|
"grad_norm": 0.05157746693117757,
|
|
"learning_rate": 0.0003085616376624426,
|
|
"loss": 2.4136,
|
|
"step": 24330
|
|
},
|
|
{
|
|
"epoch": 7.686409223722657,
|
|
"grad_norm": 0.05541127015732504,
|
|
"learning_rate": 0.00030816335806239226,
|
|
"loss": 2.4145,
|
|
"step": 24335
|
|
},
|
|
{
|
|
"epoch": 7.687988628287136,
|
|
"grad_norm": 0.053786340179167384,
|
|
"learning_rate": 0.0003077652888454443,
|
|
"loss": 2.4215,
|
|
"step": 24340
|
|
},
|
|
{
|
|
"epoch": 7.689568032851615,
|
|
"grad_norm": 0.053708516642199,
|
|
"learning_rate": 0.000307367430132649,
|
|
"loss": 2.4234,
|
|
"step": 24345
|
|
},
|
|
{
|
|
"epoch": 7.691147437416094,
|
|
"grad_norm": 0.04735017320728578,
|
|
"learning_rate": 0.00030696978204499314,
|
|
"loss": 2.422,
|
|
"step": 24350
|
|
},
|
|
{
|
|
"epoch": 7.692726841980574,
|
|
"grad_norm": 0.05885430027941452,
|
|
"learning_rate": 0.00030657234470339866,
|
|
"loss": 2.4398,
|
|
"step": 24355
|
|
},
|
|
{
|
|
"epoch": 7.694306246545052,
|
|
"grad_norm": 0.05120891673299278,
|
|
"learning_rate": 0.00030617511822872336,
|
|
"loss": 2.4177,
|
|
"step": 24360
|
|
},
|
|
{
|
|
"epoch": 7.695885651109531,
|
|
"grad_norm": 0.04667274468962752,
|
|
"learning_rate": 0.00030577810274176197,
|
|
"loss": 2.3767,
|
|
"step": 24365
|
|
},
|
|
{
|
|
"epoch": 7.6974650556740105,
|
|
"grad_norm": 0.05273872070151963,
|
|
"learning_rate": 0.000305381298363244,
|
|
"loss": 2.4298,
|
|
"step": 24370
|
|
},
|
|
{
|
|
"epoch": 7.69904446023849,
|
|
"grad_norm": 0.05372805449575675,
|
|
"learning_rate": 0.00030498470521383525,
|
|
"loss": 2.3437,
|
|
"step": 24375
|
|
},
|
|
{
|
|
"epoch": 7.700623864802969,
|
|
"grad_norm": 0.05486851348986952,
|
|
"learning_rate": 0.000304588323414137,
|
|
"loss": 2.428,
|
|
"step": 24380
|
|
},
|
|
{
|
|
"epoch": 7.702203269367448,
|
|
"grad_norm": 0.061315818652699604,
|
|
"learning_rate": 0.00030419215308468615,
|
|
"loss": 2.468,
|
|
"step": 24385
|
|
},
|
|
{
|
|
"epoch": 7.7037826739319275,
|
|
"grad_norm": 0.05713437637146786,
|
|
"learning_rate": 0.00030379619434595627,
|
|
"loss": 2.4225,
|
|
"step": 24390
|
|
},
|
|
{
|
|
"epoch": 7.705362078496407,
|
|
"grad_norm": 0.04373827826896389,
|
|
"learning_rate": 0.00030340044731835526,
|
|
"loss": 2.3179,
|
|
"step": 24395
|
|
},
|
|
{
|
|
"epoch": 7.706941483060886,
|
|
"grad_norm": 0.05433521324153791,
|
|
"learning_rate": 0.0003030049121222278,
|
|
"loss": 2.4587,
|
|
"step": 24400
|
|
},
|
|
{
|
|
"epoch": 7.708520887625365,
|
|
"grad_norm": 0.0503021980471803,
|
|
"learning_rate": 0.0003026095888778533,
|
|
"loss": 2.4756,
|
|
"step": 24405
|
|
},
|
|
{
|
|
"epoch": 7.7101002921898445,
|
|
"grad_norm": 0.04998658543639774,
|
|
"learning_rate": 0.00030221447770544674,
|
|
"loss": 2.326,
|
|
"step": 24410
|
|
},
|
|
{
|
|
"epoch": 7.711679696754324,
|
|
"grad_norm": 0.05355613379832616,
|
|
"learning_rate": 0.00030181957872515964,
|
|
"loss": 2.3546,
|
|
"step": 24415
|
|
},
|
|
{
|
|
"epoch": 7.713259101318803,
|
|
"grad_norm": 0.04639426212008117,
|
|
"learning_rate": 0.0003014248920570778,
|
|
"loss": 2.4354,
|
|
"step": 24420
|
|
},
|
|
{
|
|
"epoch": 7.714838505883282,
|
|
"grad_norm": 0.054239389335720374,
|
|
"learning_rate": 0.00030103041782122286,
|
|
"loss": 2.3484,
|
|
"step": 24425
|
|
},
|
|
{
|
|
"epoch": 7.7164179104477615,
|
|
"grad_norm": 0.053889348363275934,
|
|
"learning_rate": 0.0003006361561375521,
|
|
"loss": 2.3737,
|
|
"step": 24430
|
|
},
|
|
{
|
|
"epoch": 7.717997315012241,
|
|
"grad_norm": 0.05224091854696833,
|
|
"learning_rate": 0.00030024210712595767,
|
|
"loss": 2.4265,
|
|
"step": 24435
|
|
},
|
|
{
|
|
"epoch": 7.71957671957672,
|
|
"grad_norm": 0.050474388133450517,
|
|
"learning_rate": 0.00029984827090626787,
|
|
"loss": 2.4149,
|
|
"step": 24440
|
|
},
|
|
{
|
|
"epoch": 7.721156124141199,
|
|
"grad_norm": 0.05000111897800184,
|
|
"learning_rate": 0.0002994546475982455,
|
|
"loss": 2.4561,
|
|
"step": 24445
|
|
},
|
|
{
|
|
"epoch": 7.722735528705678,
|
|
"grad_norm": 0.059446531671769685,
|
|
"learning_rate": 0.0002990612373215884,
|
|
"loss": 2.3574,
|
|
"step": 24450
|
|
},
|
|
{
|
|
"epoch": 7.724314933270157,
|
|
"grad_norm": 0.05997617920522711,
|
|
"learning_rate": 0.0002986680401959311,
|
|
"loss": 2.416,
|
|
"step": 24455
|
|
},
|
|
{
|
|
"epoch": 7.725894337834636,
|
|
"grad_norm": 0.050078519265076,
|
|
"learning_rate": 0.00029827505634084185,
|
|
"loss": 2.3241,
|
|
"step": 24460
|
|
},
|
|
{
|
|
"epoch": 7.727473742399115,
|
|
"grad_norm": 0.07124440721657235,
|
|
"learning_rate": 0.00029788228587582444,
|
|
"loss": 2.4418,
|
|
"step": 24465
|
|
},
|
|
{
|
|
"epoch": 7.729053146963595,
|
|
"grad_norm": 0.05454655571950861,
|
|
"learning_rate": 0.000297489728920318,
|
|
"loss": 2.4175,
|
|
"step": 24470
|
|
},
|
|
{
|
|
"epoch": 7.730632551528074,
|
|
"grad_norm": 0.05623786767915883,
|
|
"learning_rate": 0.00029709738559369615,
|
|
"loss": 2.3985,
|
|
"step": 24475
|
|
},
|
|
{
|
|
"epoch": 7.732211956092553,
|
|
"grad_norm": 0.05884938855594215,
|
|
"learning_rate": 0.00029670525601526864,
|
|
"loss": 2.4452,
|
|
"step": 24480
|
|
},
|
|
{
|
|
"epoch": 7.733791360657032,
|
|
"grad_norm": 0.05042583690627841,
|
|
"learning_rate": 0.00029631334030427915,
|
|
"loss": 2.4225,
|
|
"step": 24485
|
|
},
|
|
{
|
|
"epoch": 7.735370765221512,
|
|
"grad_norm": 0.04991076638423713,
|
|
"learning_rate": 0.00029592163857990704,
|
|
"loss": 2.3464,
|
|
"step": 24490
|
|
},
|
|
{
|
|
"epoch": 7.736950169785991,
|
|
"grad_norm": 0.062727062149512,
|
|
"learning_rate": 0.00029553015096126634,
|
|
"loss": 2.4083,
|
|
"step": 24495
|
|
},
|
|
{
|
|
"epoch": 7.73852957435047,
|
|
"grad_norm": 0.04802178819470241,
|
|
"learning_rate": 0.0002951388775674053,
|
|
"loss": 2.3909,
|
|
"step": 24500
|
|
},
|
|
{
|
|
"epoch": 7.740108978914949,
|
|
"grad_norm": 0.05606591642278618,
|
|
"learning_rate": 0.0002947478185173085,
|
|
"loss": 2.438,
|
|
"step": 24505
|
|
},
|
|
{
|
|
"epoch": 7.741688383479429,
|
|
"grad_norm": 0.04736506859581747,
|
|
"learning_rate": 0.00029435697392989405,
|
|
"loss": 2.4206,
|
|
"step": 24510
|
|
},
|
|
{
|
|
"epoch": 7.743267788043908,
|
|
"grad_norm": 0.051938138115853495,
|
|
"learning_rate": 0.00029396634392401535,
|
|
"loss": 2.4396,
|
|
"step": 24515
|
|
},
|
|
{
|
|
"epoch": 7.744847192608386,
|
|
"grad_norm": 0.049781245083747594,
|
|
"learning_rate": 0.0002935759286184605,
|
|
"loss": 2.29,
|
|
"step": 24520
|
|
},
|
|
{
|
|
"epoch": 7.746426597172865,
|
|
"grad_norm": 0.04703249847453405,
|
|
"learning_rate": 0.000293185728131952,
|
|
"loss": 2.3294,
|
|
"step": 24525
|
|
},
|
|
{
|
|
"epoch": 7.748006001737345,
|
|
"grad_norm": 0.04840887139950567,
|
|
"learning_rate": 0.0002927957425831479,
|
|
"loss": 2.4352,
|
|
"step": 24530
|
|
},
|
|
{
|
|
"epoch": 7.749585406301824,
|
|
"grad_norm": 0.05414582291796758,
|
|
"learning_rate": 0.00029240597209064,
|
|
"loss": 2.4122,
|
|
"step": 24535
|
|
},
|
|
{
|
|
"epoch": 7.751164810866303,
|
|
"grad_norm": 0.05452155431449084,
|
|
"learning_rate": 0.0002920164167729548,
|
|
"loss": 2.4029,
|
|
"step": 24540
|
|
},
|
|
{
|
|
"epoch": 7.752744215430782,
|
|
"grad_norm": 0.0537875525333651,
|
|
"learning_rate": 0.00029162707674855416,
|
|
"loss": 2.3363,
|
|
"step": 24545
|
|
},
|
|
{
|
|
"epoch": 7.754323619995262,
|
|
"grad_norm": 0.057189373852712784,
|
|
"learning_rate": 0.00029123795213583346,
|
|
"loss": 2.4146,
|
|
"step": 24550
|
|
},
|
|
{
|
|
"epoch": 7.755903024559741,
|
|
"grad_norm": 0.04624352059819483,
|
|
"learning_rate": 0.0002908490430531232,
|
|
"loss": 2.5916,
|
|
"step": 24555
|
|
},
|
|
{
|
|
"epoch": 7.75748242912422,
|
|
"grad_norm": 0.04924444013165075,
|
|
"learning_rate": 0.00029046034961868793,
|
|
"loss": 2.3289,
|
|
"step": 24560
|
|
},
|
|
{
|
|
"epoch": 7.759061833688699,
|
|
"grad_norm": 0.0494404459697529,
|
|
"learning_rate": 0.0002900718719507268,
|
|
"loss": 2.4075,
|
|
"step": 24565
|
|
},
|
|
{
|
|
"epoch": 7.760641238253179,
|
|
"grad_norm": 0.049554309353999876,
|
|
"learning_rate": 0.00028968361016737376,
|
|
"loss": 2.3909,
|
|
"step": 24570
|
|
},
|
|
{
|
|
"epoch": 7.762220642817658,
|
|
"grad_norm": 0.050877082667088167,
|
|
"learning_rate": 0.00028929556438669625,
|
|
"loss": 2.5629,
|
|
"step": 24575
|
|
},
|
|
{
|
|
"epoch": 7.763800047382137,
|
|
"grad_norm": 0.052260925315823034,
|
|
"learning_rate": 0.00028890773472669716,
|
|
"loss": 2.3817,
|
|
"step": 24580
|
|
},
|
|
{
|
|
"epoch": 7.765379451946616,
|
|
"grad_norm": 0.05061621254808778,
|
|
"learning_rate": 0.0002885201213053126,
|
|
"loss": 2.4837,
|
|
"step": 24585
|
|
},
|
|
{
|
|
"epoch": 7.766958856511096,
|
|
"grad_norm": 0.04983143637760769,
|
|
"learning_rate": 0.00028813272424041306,
|
|
"loss": 2.4145,
|
|
"step": 24590
|
|
},
|
|
{
|
|
"epoch": 7.768538261075575,
|
|
"grad_norm": 0.04506057127475669,
|
|
"learning_rate": 0.0002877455436498041,
|
|
"loss": 2.4351,
|
|
"step": 24595
|
|
},
|
|
{
|
|
"epoch": 7.770117665640054,
|
|
"grad_norm": 0.05247532758561306,
|
|
"learning_rate": 0.0002873585796512247,
|
|
"loss": 2.3988,
|
|
"step": 24600
|
|
},
|
|
{
|
|
"epoch": 7.771697070204533,
|
|
"grad_norm": 0.05439760141095109,
|
|
"learning_rate": 0.000286971832362348,
|
|
"loss": 2.3268,
|
|
"step": 24605
|
|
},
|
|
{
|
|
"epoch": 7.773276474769012,
|
|
"grad_norm": 0.05627223068340628,
|
|
"learning_rate": 0.00028658530190078135,
|
|
"loss": 2.3374,
|
|
"step": 24610
|
|
},
|
|
{
|
|
"epoch": 7.774855879333491,
|
|
"grad_norm": 0.062242832355261256,
|
|
"learning_rate": 0.000286198988384066,
|
|
"loss": 2.5214,
|
|
"step": 24615
|
|
},
|
|
{
|
|
"epoch": 7.77643528389797,
|
|
"grad_norm": 0.05252140392419768,
|
|
"learning_rate": 0.0002858128919296781,
|
|
"loss": 2.4114,
|
|
"step": 24620
|
|
},
|
|
{
|
|
"epoch": 7.7780146884624495,
|
|
"grad_norm": 0.04564201244557298,
|
|
"learning_rate": 0.00028542701265502627,
|
|
"loss": 2.3817,
|
|
"step": 24625
|
|
},
|
|
{
|
|
"epoch": 7.779594093026929,
|
|
"grad_norm": 0.06126100422801149,
|
|
"learning_rate": 0.0002850413506774546,
|
|
"loss": 2.4307,
|
|
"step": 24630
|
|
},
|
|
{
|
|
"epoch": 7.781173497591408,
|
|
"grad_norm": 0.04501233507567427,
|
|
"learning_rate": 0.0002846559061142403,
|
|
"loss": 2.3552,
|
|
"step": 24635
|
|
},
|
|
{
|
|
"epoch": 7.782752902155887,
|
|
"grad_norm": 0.045411464782780374,
|
|
"learning_rate": 0.0002842706790825944,
|
|
"loss": 2.3638,
|
|
"step": 24640
|
|
},
|
|
{
|
|
"epoch": 7.7843323067203665,
|
|
"grad_norm": 0.05683693980555025,
|
|
"learning_rate": 0.0002838856696996621,
|
|
"loss": 2.429,
|
|
"step": 24645
|
|
},
|
|
{
|
|
"epoch": 7.785911711284846,
|
|
"grad_norm": 0.05565246689756234,
|
|
"learning_rate": 0.00028350087808252234,
|
|
"loss": 2.4186,
|
|
"step": 24650
|
|
},
|
|
{
|
|
"epoch": 7.787491115849325,
|
|
"grad_norm": 0.06675077757834104,
|
|
"learning_rate": 0.00028311630434818736,
|
|
"loss": 2.4561,
|
|
"step": 24655
|
|
},
|
|
{
|
|
"epoch": 7.789070520413804,
|
|
"grad_norm": 0.06868642711368238,
|
|
"learning_rate": 0.00028273194861360416,
|
|
"loss": 2.4313,
|
|
"step": 24660
|
|
},
|
|
{
|
|
"epoch": 7.7906499249782835,
|
|
"grad_norm": 0.06557858906849585,
|
|
"learning_rate": 0.00028234781099565243,
|
|
"loss": 2.3944,
|
|
"step": 24665
|
|
},
|
|
{
|
|
"epoch": 7.792229329542763,
|
|
"grad_norm": 0.04551304143974813,
|
|
"learning_rate": 0.00028196389161114644,
|
|
"loss": 2.4348,
|
|
"step": 24670
|
|
},
|
|
{
|
|
"epoch": 7.793808734107242,
|
|
"grad_norm": 0.04838277501568106,
|
|
"learning_rate": 0.00028158019057683336,
|
|
"loss": 2.4776,
|
|
"step": 24675
|
|
},
|
|
{
|
|
"epoch": 7.79538813867172,
|
|
"grad_norm": 0.053328847150971935,
|
|
"learning_rate": 0.0002811967080093939,
|
|
"loss": 2.34,
|
|
"step": 24680
|
|
},
|
|
{
|
|
"epoch": 7.7969675432362,
|
|
"grad_norm": 0.05287452436354166,
|
|
"learning_rate": 0.0002808134440254433,
|
|
"loss": 2.4273,
|
|
"step": 24685
|
|
},
|
|
{
|
|
"epoch": 7.798546947800679,
|
|
"grad_norm": 0.047372276973564276,
|
|
"learning_rate": 0.0002804303987415294,
|
|
"loss": 2.4362,
|
|
"step": 24690
|
|
},
|
|
{
|
|
"epoch": 7.800126352365158,
|
|
"grad_norm": 0.053263680184324806,
|
|
"learning_rate": 0.0002800475722741337,
|
|
"loss": 2.4867,
|
|
"step": 24695
|
|
},
|
|
{
|
|
"epoch": 7.801705756929637,
|
|
"grad_norm": 0.058508417205338135,
|
|
"learning_rate": 0.0002796649647396714,
|
|
"loss": 2.4478,
|
|
"step": 24700
|
|
},
|
|
{
|
|
"epoch": 7.8032851614941166,
|
|
"grad_norm": 0.06229616365010954,
|
|
"learning_rate": 0.00027928257625449074,
|
|
"loss": 2.3928,
|
|
"step": 24705
|
|
},
|
|
{
|
|
"epoch": 7.804864566058596,
|
|
"grad_norm": 0.06458054579349871,
|
|
"learning_rate": 0.00027890040693487404,
|
|
"loss": 2.356,
|
|
"step": 24710
|
|
},
|
|
{
|
|
"epoch": 7.806443970623075,
|
|
"grad_norm": 0.05575505674396327,
|
|
"learning_rate": 0.00027851845689703605,
|
|
"loss": 2.4082,
|
|
"step": 24715
|
|
},
|
|
{
|
|
"epoch": 7.808023375187554,
|
|
"grad_norm": 0.06741414204191258,
|
|
"learning_rate": 0.00027813672625712606,
|
|
"loss": 2.507,
|
|
"step": 24720
|
|
},
|
|
{
|
|
"epoch": 7.8096027797520335,
|
|
"grad_norm": 0.05717411663408411,
|
|
"learning_rate": 0.00027775521513122536,
|
|
"loss": 2.4304,
|
|
"step": 24725
|
|
},
|
|
{
|
|
"epoch": 7.811182184316513,
|
|
"grad_norm": 0.06218825335938903,
|
|
"learning_rate": 0.0002773739236353493,
|
|
"loss": 2.4569,
|
|
"step": 24730
|
|
},
|
|
{
|
|
"epoch": 7.812761588880992,
|
|
"grad_norm": 0.06296445268481235,
|
|
"learning_rate": 0.00027699285188544597,
|
|
"loss": 2.4216,
|
|
"step": 24735
|
|
},
|
|
{
|
|
"epoch": 7.814340993445471,
|
|
"grad_norm": 0.049533919825785235,
|
|
"learning_rate": 0.00027661199999739686,
|
|
"loss": 2.389,
|
|
"step": 24740
|
|
},
|
|
{
|
|
"epoch": 7.8159203980099505,
|
|
"grad_norm": 0.0484112010932451,
|
|
"learning_rate": 0.00027623136808701675,
|
|
"loss": 2.4212,
|
|
"step": 24745
|
|
},
|
|
{
|
|
"epoch": 7.81749980257443,
|
|
"grad_norm": 0.04682500088046376,
|
|
"learning_rate": 0.0002758509562700535,
|
|
"loss": 2.4025,
|
|
"step": 24750
|
|
},
|
|
{
|
|
"epoch": 7.819079207138909,
|
|
"grad_norm": 0.06106433282246892,
|
|
"learning_rate": 0.00027547076466218735,
|
|
"loss": 2.4387,
|
|
"step": 24755
|
|
},
|
|
{
|
|
"epoch": 7.820658611703388,
|
|
"grad_norm": 0.08171965748716478,
|
|
"learning_rate": 0.00027509079337903285,
|
|
"loss": 2.4323,
|
|
"step": 24760
|
|
},
|
|
{
|
|
"epoch": 7.822238016267867,
|
|
"grad_norm": 0.05081140541838431,
|
|
"learning_rate": 0.0002747110425361364,
|
|
"loss": 2.4346,
|
|
"step": 24765
|
|
},
|
|
{
|
|
"epoch": 7.823817420832346,
|
|
"grad_norm": 0.06443489431328071,
|
|
"learning_rate": 0.00027433151224897777,
|
|
"loss": 2.4403,
|
|
"step": 24770
|
|
},
|
|
{
|
|
"epoch": 7.825396825396825,
|
|
"grad_norm": 0.05517541734537136,
|
|
"learning_rate": 0.0002739522026329702,
|
|
"loss": 2.4178,
|
|
"step": 24775
|
|
},
|
|
{
|
|
"epoch": 7.826976229961304,
|
|
"grad_norm": 0.05212450955006681,
|
|
"learning_rate": 0.0002735731138034587,
|
|
"loss": 2.3864,
|
|
"step": 24780
|
|
},
|
|
{
|
|
"epoch": 7.828555634525784,
|
|
"grad_norm": 0.05516649716867154,
|
|
"learning_rate": 0.0002731942458757223,
|
|
"loss": 2.3397,
|
|
"step": 24785
|
|
},
|
|
{
|
|
"epoch": 7.830135039090263,
|
|
"grad_norm": 0.058408999564183946,
|
|
"learning_rate": 0.0002728155989649719,
|
|
"loss": 2.5221,
|
|
"step": 24790
|
|
},
|
|
{
|
|
"epoch": 7.831714443654742,
|
|
"grad_norm": 0.05498459475623652,
|
|
"learning_rate": 0.00027243717318635143,
|
|
"loss": 2.4406,
|
|
"step": 24795
|
|
},
|
|
{
|
|
"epoch": 7.833293848219221,
|
|
"grad_norm": 0.057419232996713934,
|
|
"learning_rate": 0.0002720589686549383,
|
|
"loss": 2.3542,
|
|
"step": 24800
|
|
},
|
|
{
|
|
"epoch": 7.834873252783701,
|
|
"grad_norm": 0.05003476615573564,
|
|
"learning_rate": 0.00027168098548574173,
|
|
"loss": 2.4578,
|
|
"step": 24805
|
|
},
|
|
{
|
|
"epoch": 7.83645265734818,
|
|
"grad_norm": 0.05293468218628365,
|
|
"learning_rate": 0.0002713032237937043,
|
|
"loss": 2.2924,
|
|
"step": 24810
|
|
},
|
|
{
|
|
"epoch": 7.838032061912659,
|
|
"grad_norm": 0.04996153521631309,
|
|
"learning_rate": 0.00027092568369370076,
|
|
"loss": 2.3176,
|
|
"step": 24815
|
|
},
|
|
{
|
|
"epoch": 7.839611466477138,
|
|
"grad_norm": 0.04739933322752104,
|
|
"learning_rate": 0.00027054836530053864,
|
|
"loss": 2.4019,
|
|
"step": 24820
|
|
},
|
|
{
|
|
"epoch": 7.841190871041618,
|
|
"grad_norm": 0.04479465097747017,
|
|
"learning_rate": 0.000270171268728958,
|
|
"loss": 2.4003,
|
|
"step": 24825
|
|
},
|
|
{
|
|
"epoch": 7.842770275606097,
|
|
"grad_norm": 0.05562015652094908,
|
|
"learning_rate": 0.0002697943940936313,
|
|
"loss": 2.3836,
|
|
"step": 24830
|
|
},
|
|
{
|
|
"epoch": 7.844349680170575,
|
|
"grad_norm": 0.04564832353498086,
|
|
"learning_rate": 0.0002694177415091642,
|
|
"loss": 2.2812,
|
|
"step": 24835
|
|
},
|
|
{
|
|
"epoch": 7.8459290847350545,
|
|
"grad_norm": 0.04795302510671516,
|
|
"learning_rate": 0.0002690413110900941,
|
|
"loss": 2.3891,
|
|
"step": 24840
|
|
},
|
|
{
|
|
"epoch": 7.847508489299534,
|
|
"grad_norm": 0.04774434859499396,
|
|
"learning_rate": 0.0002686651029508908,
|
|
"loss": 2.461,
|
|
"step": 24845
|
|
},
|
|
{
|
|
"epoch": 7.849087893864013,
|
|
"grad_norm": 0.054342846436838,
|
|
"learning_rate": 0.0002682891172059573,
|
|
"loss": 2.3694,
|
|
"step": 24850
|
|
},
|
|
{
|
|
"epoch": 7.850667298428492,
|
|
"grad_norm": 0.051858819495668396,
|
|
"learning_rate": 0.0002679133539696279,
|
|
"loss": 2.4877,
|
|
"step": 24855
|
|
},
|
|
{
|
|
"epoch": 7.8522467029929714,
|
|
"grad_norm": 0.04923559555748598,
|
|
"learning_rate": 0.00026753781335617054,
|
|
"loss": 2.4505,
|
|
"step": 24860
|
|
},
|
|
{
|
|
"epoch": 7.853826107557451,
|
|
"grad_norm": 0.053118258304910675,
|
|
"learning_rate": 0.0002671624954797842,
|
|
"loss": 2.3934,
|
|
"step": 24865
|
|
},
|
|
{
|
|
"epoch": 7.85540551212193,
|
|
"grad_norm": 0.055486172400068534,
|
|
"learning_rate": 0.00026678740045460084,
|
|
"loss": 2.3927,
|
|
"step": 24870
|
|
},
|
|
{
|
|
"epoch": 7.856984916686409,
|
|
"grad_norm": 0.0451498304811942,
|
|
"learning_rate": 0.00026641252839468434,
|
|
"loss": 2.4313,
|
|
"step": 24875
|
|
},
|
|
{
|
|
"epoch": 7.858564321250888,
|
|
"grad_norm": 0.0578150705886433,
|
|
"learning_rate": 0.0002660378794140309,
|
|
"loss": 2.3502,
|
|
"step": 24880
|
|
},
|
|
{
|
|
"epoch": 7.860143725815368,
|
|
"grad_norm": 0.05501558259689232,
|
|
"learning_rate": 0.00026566345362656873,
|
|
"loss": 2.4126,
|
|
"step": 24885
|
|
},
|
|
{
|
|
"epoch": 7.861723130379847,
|
|
"grad_norm": 0.04988050009494289,
|
|
"learning_rate": 0.00026528925114615876,
|
|
"loss": 2.3797,
|
|
"step": 24890
|
|
},
|
|
{
|
|
"epoch": 7.863302534944326,
|
|
"grad_norm": 0.04668678481382195,
|
|
"learning_rate": 0.00026491527208659296,
|
|
"loss": 2.4605,
|
|
"step": 24895
|
|
},
|
|
{
|
|
"epoch": 7.864881939508805,
|
|
"grad_norm": 0.05149872000751331,
|
|
"learning_rate": 0.00026454151656159664,
|
|
"loss": 2.4877,
|
|
"step": 24900
|
|
},
|
|
{
|
|
"epoch": 7.866461344073285,
|
|
"grad_norm": 0.060029637050715186,
|
|
"learning_rate": 0.0002641679846848262,
|
|
"loss": 2.4654,
|
|
"step": 24905
|
|
},
|
|
{
|
|
"epoch": 7.868040748637764,
|
|
"grad_norm": 0.05459942097404522,
|
|
"learning_rate": 0.0002637946765698702,
|
|
"loss": 2.4399,
|
|
"step": 24910
|
|
},
|
|
{
|
|
"epoch": 7.869620153202243,
|
|
"grad_norm": 0.05018770004469524,
|
|
"learning_rate": 0.0002634215923302494,
|
|
"loss": 2.361,
|
|
"step": 24915
|
|
},
|
|
{
|
|
"epoch": 7.871199557766722,
|
|
"grad_norm": 0.049283660791538106,
|
|
"learning_rate": 0.0002630487320794158,
|
|
"loss": 2.5835,
|
|
"step": 24920
|
|
},
|
|
{
|
|
"epoch": 7.872778962331201,
|
|
"grad_norm": 0.04853385403241942,
|
|
"learning_rate": 0.0002626760959307547,
|
|
"loss": 2.3399,
|
|
"step": 24925
|
|
},
|
|
{
|
|
"epoch": 7.87435836689568,
|
|
"grad_norm": 0.05341415347819443,
|
|
"learning_rate": 0.00026230368399758185,
|
|
"loss": 2.3583,
|
|
"step": 24930
|
|
},
|
|
{
|
|
"epoch": 7.875937771460159,
|
|
"grad_norm": 0.050805421265106745,
|
|
"learning_rate": 0.0002619314963931452,
|
|
"loss": 2.3894,
|
|
"step": 24935
|
|
},
|
|
{
|
|
"epoch": 7.8775171760246385,
|
|
"grad_norm": 0.05226530326416026,
|
|
"learning_rate": 0.0002615595332306251,
|
|
"loss": 2.3752,
|
|
"step": 24940
|
|
},
|
|
{
|
|
"epoch": 7.879096580589118,
|
|
"grad_norm": 0.055823263173015,
|
|
"learning_rate": 0.00026118779462313267,
|
|
"loss": 2.4041,
|
|
"step": 24945
|
|
},
|
|
{
|
|
"epoch": 7.880675985153597,
|
|
"grad_norm": 0.04994407446763293,
|
|
"learning_rate": 0.00026081628068371176,
|
|
"loss": 2.3953,
|
|
"step": 24950
|
|
},
|
|
{
|
|
"epoch": 7.882255389718076,
|
|
"grad_norm": 0.053867741991867404,
|
|
"learning_rate": 0.00026044499152533707,
|
|
"loss": 2.4296,
|
|
"step": 24955
|
|
},
|
|
{
|
|
"epoch": 7.8838347942825555,
|
|
"grad_norm": 0.05339463529783473,
|
|
"learning_rate": 0.0002600739272609154,
|
|
"loss": 2.3963,
|
|
"step": 24960
|
|
},
|
|
{
|
|
"epoch": 7.885414198847035,
|
|
"grad_norm": 0.04730043850167995,
|
|
"learning_rate": 0.0002597030880032848,
|
|
"loss": 2.4159,
|
|
"step": 24965
|
|
},
|
|
{
|
|
"epoch": 7.886993603411514,
|
|
"grad_norm": 0.04781844530164919,
|
|
"learning_rate": 0.00025933247386521506,
|
|
"loss": 2.4254,
|
|
"step": 24970
|
|
},
|
|
{
|
|
"epoch": 7.888573007975993,
|
|
"grad_norm": 0.04725599956693539,
|
|
"learning_rate": 0.000258962084959408,
|
|
"loss": 2.3778,
|
|
"step": 24975
|
|
},
|
|
{
|
|
"epoch": 7.8901524125404725,
|
|
"grad_norm": 0.04833537867243059,
|
|
"learning_rate": 0.0002585919213984963,
|
|
"loss": 2.4309,
|
|
"step": 24980
|
|
},
|
|
{
|
|
"epoch": 7.891731817104952,
|
|
"grad_norm": 0.05215626198944377,
|
|
"learning_rate": 0.00025822198329504407,
|
|
"loss": 2.347,
|
|
"step": 24985
|
|
},
|
|
{
|
|
"epoch": 7.893311221669431,
|
|
"grad_norm": 0.06425961455609468,
|
|
"learning_rate": 0.0002578522707615476,
|
|
"loss": 2.4576,
|
|
"step": 24990
|
|
},
|
|
{
|
|
"epoch": 7.894890626233909,
|
|
"grad_norm": 0.04813500797824529,
|
|
"learning_rate": 0.0002574827839104339,
|
|
"loss": 2.4278,
|
|
"step": 24995
|
|
},
|
|
{
|
|
"epoch": 7.896470030798389,
|
|
"grad_norm": 0.08809223004983643,
|
|
"learning_rate": 0.00025711352285406154,
|
|
"loss": 2.3338,
|
|
"step": 25000
|
|
},
|
|
{
|
|
"epoch": 7.898049435362868,
|
|
"grad_norm": 0.05086475811689399,
|
|
"learning_rate": 0.00025674448770472046,
|
|
"loss": 2.4051,
|
|
"step": 25005
|
|
},
|
|
{
|
|
"epoch": 7.899628839927347,
|
|
"grad_norm": 0.05044024640626117,
|
|
"learning_rate": 0.00025637567857463153,
|
|
"loss": 2.4012,
|
|
"step": 25010
|
|
},
|
|
{
|
|
"epoch": 7.901208244491826,
|
|
"grad_norm": 0.06111290308917735,
|
|
"learning_rate": 0.0002560070955759479,
|
|
"loss": 2.4507,
|
|
"step": 25015
|
|
},
|
|
{
|
|
"epoch": 7.902787649056306,
|
|
"grad_norm": 0.04877300072310503,
|
|
"learning_rate": 0.00025563873882075304,
|
|
"loss": 2.4372,
|
|
"step": 25020
|
|
},
|
|
{
|
|
"epoch": 7.904367053620785,
|
|
"grad_norm": 0.05316787985740273,
|
|
"learning_rate": 0.0002552706084210615,
|
|
"loss": 2.4281,
|
|
"step": 25025
|
|
},
|
|
{
|
|
"epoch": 7.905946458185264,
|
|
"grad_norm": 0.05566724684639508,
|
|
"learning_rate": 0.00025490270448882014,
|
|
"loss": 2.3855,
|
|
"step": 25030
|
|
},
|
|
{
|
|
"epoch": 7.907525862749743,
|
|
"grad_norm": 0.04554384966620676,
|
|
"learning_rate": 0.00025453502713590546,
|
|
"loss": 2.3956,
|
|
"step": 25035
|
|
},
|
|
{
|
|
"epoch": 7.909105267314223,
|
|
"grad_norm": 0.052073954247616266,
|
|
"learning_rate": 0.0002541675764741264,
|
|
"loss": 2.2434,
|
|
"step": 25040
|
|
},
|
|
{
|
|
"epoch": 7.910684671878702,
|
|
"grad_norm": 0.05025299714211612,
|
|
"learning_rate": 0.00025380035261522206,
|
|
"loss": 2.3467,
|
|
"step": 25045
|
|
},
|
|
{
|
|
"epoch": 7.912264076443181,
|
|
"grad_norm": 0.04376413793424298,
|
|
"learning_rate": 0.0002534333556708628,
|
|
"loss": 2.5498,
|
|
"step": 25050
|
|
},
|
|
{
|
|
"epoch": 7.91384348100766,
|
|
"grad_norm": 0.05257378214001336,
|
|
"learning_rate": 0.0002530665857526503,
|
|
"loss": 2.3848,
|
|
"step": 25055
|
|
},
|
|
{
|
|
"epoch": 7.91542288557214,
|
|
"grad_norm": 0.050520794839413734,
|
|
"learning_rate": 0.00025270004297211633,
|
|
"loss": 2.5111,
|
|
"step": 25060
|
|
},
|
|
{
|
|
"epoch": 7.917002290136619,
|
|
"grad_norm": 0.05498350326076211,
|
|
"learning_rate": 0.00025233372744072505,
|
|
"loss": 2.419,
|
|
"step": 25065
|
|
},
|
|
{
|
|
"epoch": 7.918581694701098,
|
|
"grad_norm": 0.05073618067020773,
|
|
"learning_rate": 0.0002519676392698703,
|
|
"loss": 2.3899,
|
|
"step": 25070
|
|
},
|
|
{
|
|
"epoch": 7.920161099265577,
|
|
"grad_norm": 0.06477949618644933,
|
|
"learning_rate": 0.0002516017785708767,
|
|
"loss": 2.5233,
|
|
"step": 25075
|
|
},
|
|
{
|
|
"epoch": 7.921740503830057,
|
|
"grad_norm": 0.04964483217453487,
|
|
"learning_rate": 0.0002512361454550011,
|
|
"loss": 2.3623,
|
|
"step": 25080
|
|
},
|
|
{
|
|
"epoch": 7.923319908394535,
|
|
"grad_norm": 0.05638298112505006,
|
|
"learning_rate": 0.0002508707400334296,
|
|
"loss": 2.4291,
|
|
"step": 25085
|
|
},
|
|
{
|
|
"epoch": 7.924899312959014,
|
|
"grad_norm": 0.04650636475469773,
|
|
"learning_rate": 0.0002505055624172796,
|
|
"loss": 2.3508,
|
|
"step": 25090
|
|
},
|
|
{
|
|
"epoch": 7.926478717523493,
|
|
"grad_norm": 0.04639131336961897,
|
|
"learning_rate": 0.00025014061271759957,
|
|
"loss": 2.3451,
|
|
"step": 25095
|
|
},
|
|
{
|
|
"epoch": 7.928058122087973,
|
|
"grad_norm": 0.05580224658566145,
|
|
"learning_rate": 0.0002497758910453679,
|
|
"loss": 2.3823,
|
|
"step": 25100
|
|
},
|
|
{
|
|
"epoch": 7.929637526652452,
|
|
"grad_norm": 0.049781739134590074,
|
|
"learning_rate": 0.00024941139751149464,
|
|
"loss": 2.3503,
|
|
"step": 25105
|
|
},
|
|
{
|
|
"epoch": 7.931216931216931,
|
|
"grad_norm": 0.044625547079489805,
|
|
"learning_rate": 0.00024904713222681995,
|
|
"loss": 2.3956,
|
|
"step": 25110
|
|
},
|
|
{
|
|
"epoch": 7.93279633578141,
|
|
"grad_norm": 0.05238646039736108,
|
|
"learning_rate": 0.000248683095302114,
|
|
"loss": 2.4294,
|
|
"step": 25115
|
|
},
|
|
{
|
|
"epoch": 7.93437574034589,
|
|
"grad_norm": 0.044664781114429127,
|
|
"learning_rate": 0.0002483192868480787,
|
|
"loss": 2.3869,
|
|
"step": 25120
|
|
},
|
|
{
|
|
"epoch": 7.935955144910369,
|
|
"grad_norm": 0.04908312164203827,
|
|
"learning_rate": 0.0002479557069753454,
|
|
"loss": 2.3733,
|
|
"step": 25125
|
|
},
|
|
{
|
|
"epoch": 7.937534549474848,
|
|
"grad_norm": 0.052091218263155596,
|
|
"learning_rate": 0.0002475923557944769,
|
|
"loss": 2.4041,
|
|
"step": 25130
|
|
},
|
|
{
|
|
"epoch": 7.939113954039327,
|
|
"grad_norm": 0.04951036878663111,
|
|
"learning_rate": 0.0002472292334159658,
|
|
"loss": 2.2851,
|
|
"step": 25135
|
|
},
|
|
{
|
|
"epoch": 7.940693358603807,
|
|
"grad_norm": 0.04310144923803069,
|
|
"learning_rate": 0.0002468663399502352,
|
|
"loss": 2.3429,
|
|
"step": 25140
|
|
},
|
|
{
|
|
"epoch": 7.942272763168286,
|
|
"grad_norm": 0.0797289602218377,
|
|
"learning_rate": 0.0002465036755076387,
|
|
"loss": 2.4893,
|
|
"step": 25145
|
|
},
|
|
{
|
|
"epoch": 7.943852167732764,
|
|
"grad_norm": 0.052700644932645854,
|
|
"learning_rate": 0.0002461412401984601,
|
|
"loss": 2.3395,
|
|
"step": 25150
|
|
},
|
|
{
|
|
"epoch": 7.9454315722972435,
|
|
"grad_norm": 0.051567074343615994,
|
|
"learning_rate": 0.000245779034132914,
|
|
"loss": 2.3475,
|
|
"step": 25155
|
|
},
|
|
{
|
|
"epoch": 7.947010976861723,
|
|
"grad_norm": 0.04728199495954336,
|
|
"learning_rate": 0.0002454170574211448,
|
|
"loss": 2.4107,
|
|
"step": 25160
|
|
},
|
|
{
|
|
"epoch": 7.948590381426202,
|
|
"grad_norm": 0.04754404914104074,
|
|
"learning_rate": 0.00024505531017322705,
|
|
"loss": 2.3486,
|
|
"step": 25165
|
|
},
|
|
{
|
|
"epoch": 7.950169785990681,
|
|
"grad_norm": 0.05207347438832702,
|
|
"learning_rate": 0.0002446937924991661,
|
|
"loss": 2.3652,
|
|
"step": 25170
|
|
},
|
|
{
|
|
"epoch": 7.9517491905551605,
|
|
"grad_norm": 0.05252867657084638,
|
|
"learning_rate": 0.0002443325045088972,
|
|
"loss": 2.425,
|
|
"step": 25175
|
|
},
|
|
{
|
|
"epoch": 7.95332859511964,
|
|
"grad_norm": 0.05328886315468783,
|
|
"learning_rate": 0.00024397144631228552,
|
|
"loss": 2.4787,
|
|
"step": 25180
|
|
},
|
|
{
|
|
"epoch": 7.954907999684119,
|
|
"grad_norm": 0.06955063626861216,
|
|
"learning_rate": 0.00024361061801912666,
|
|
"loss": 2.5098,
|
|
"step": 25185
|
|
},
|
|
{
|
|
"epoch": 7.956487404248598,
|
|
"grad_norm": 0.050174054653189894,
|
|
"learning_rate": 0.00024325001973914584,
|
|
"loss": 2.4256,
|
|
"step": 25190
|
|
},
|
|
{
|
|
"epoch": 7.9580668088130775,
|
|
"grad_norm": 0.061649303236535384,
|
|
"learning_rate": 0.00024288965158199939,
|
|
"loss": 2.3466,
|
|
"step": 25195
|
|
},
|
|
{
|
|
"epoch": 7.959646213377557,
|
|
"grad_norm": 0.058243615639442824,
|
|
"learning_rate": 0.00024252951365727216,
|
|
"loss": 2.4067,
|
|
"step": 25200
|
|
},
|
|
{
|
|
"epoch": 7.961225617942036,
|
|
"grad_norm": 0.0539283275757395,
|
|
"learning_rate": 0.00024216960607448057,
|
|
"loss": 2.4036,
|
|
"step": 25205
|
|
},
|
|
{
|
|
"epoch": 7.962805022506515,
|
|
"grad_norm": 0.05536339290923945,
|
|
"learning_rate": 0.00024180992894306985,
|
|
"loss": 2.4155,
|
|
"step": 25210
|
|
},
|
|
{
|
|
"epoch": 7.9643844270709945,
|
|
"grad_norm": 0.047689861652926116,
|
|
"learning_rate": 0.0002414504823724153,
|
|
"loss": 2.44,
|
|
"step": 25215
|
|
},
|
|
{
|
|
"epoch": 7.965963831635474,
|
|
"grad_norm": 0.04345815695402367,
|
|
"learning_rate": 0.00024109126647182277,
|
|
"loss": 2.3307,
|
|
"step": 25220
|
|
},
|
|
{
|
|
"epoch": 7.967543236199953,
|
|
"grad_norm": 0.04900328132371538,
|
|
"learning_rate": 0.00024073228135052728,
|
|
"loss": 2.4016,
|
|
"step": 25225
|
|
},
|
|
{
|
|
"epoch": 7.969122640764432,
|
|
"grad_norm": 0.05385073368305318,
|
|
"learning_rate": 0.000240373527117694,
|
|
"loss": 2.4529,
|
|
"step": 25230
|
|
},
|
|
{
|
|
"epoch": 7.9707020453289115,
|
|
"grad_norm": 0.0456740677467157,
|
|
"learning_rate": 0.00024001500388241771,
|
|
"loss": 2.3638,
|
|
"step": 25235
|
|
},
|
|
{
|
|
"epoch": 7.97228144989339,
|
|
"grad_norm": 0.04980594664486062,
|
|
"learning_rate": 0.00023965671175372273,
|
|
"loss": 2.3583,
|
|
"step": 25240
|
|
},
|
|
{
|
|
"epoch": 7.973860854457869,
|
|
"grad_norm": 0.05093312227061897,
|
|
"learning_rate": 0.00023929865084056413,
|
|
"loss": 2.4135,
|
|
"step": 25245
|
|
},
|
|
{
|
|
"epoch": 7.975440259022348,
|
|
"grad_norm": 0.04249090971671767,
|
|
"learning_rate": 0.00023894082125182548,
|
|
"loss": 2.302,
|
|
"step": 25250
|
|
},
|
|
{
|
|
"epoch": 7.977019663586828,
|
|
"grad_norm": 0.05758831539328352,
|
|
"learning_rate": 0.0002385832230963203,
|
|
"loss": 2.4207,
|
|
"step": 25255
|
|
},
|
|
{
|
|
"epoch": 7.978599068151307,
|
|
"grad_norm": 0.051999097443805445,
|
|
"learning_rate": 0.00023822585648279238,
|
|
"loss": 2.3582,
|
|
"step": 25260
|
|
},
|
|
{
|
|
"epoch": 7.980178472715786,
|
|
"grad_norm": 0.048981107909608225,
|
|
"learning_rate": 0.00023786872151991434,
|
|
"loss": 2.3321,
|
|
"step": 25265
|
|
},
|
|
{
|
|
"epoch": 7.981757877280265,
|
|
"grad_norm": 0.06360018661785359,
|
|
"learning_rate": 0.00023751181831628887,
|
|
"loss": 2.4279,
|
|
"step": 25270
|
|
},
|
|
{
|
|
"epoch": 7.983337281844745,
|
|
"grad_norm": 0.051195794290684445,
|
|
"learning_rate": 0.0002371551469804476,
|
|
"loss": 2.3816,
|
|
"step": 25275
|
|
},
|
|
{
|
|
"epoch": 7.984916686409224,
|
|
"grad_norm": 0.04329785290808799,
|
|
"learning_rate": 0.00023679870762085197,
|
|
"loss": 2.4835,
|
|
"step": 25280
|
|
},
|
|
{
|
|
"epoch": 7.986496090973703,
|
|
"grad_norm": 0.05308426307057489,
|
|
"learning_rate": 0.00023644250034589342,
|
|
"loss": 2.3839,
|
|
"step": 25285
|
|
},
|
|
{
|
|
"epoch": 7.988075495538182,
|
|
"grad_norm": 0.051379493670288746,
|
|
"learning_rate": 0.00023608652526389175,
|
|
"loss": 2.3268,
|
|
"step": 25290
|
|
},
|
|
{
|
|
"epoch": 7.9896549001026615,
|
|
"grad_norm": 0.05996063664513099,
|
|
"learning_rate": 0.00023573078248309722,
|
|
"loss": 2.3677,
|
|
"step": 25295
|
|
},
|
|
{
|
|
"epoch": 7.991234304667141,
|
|
"grad_norm": 0.05129741989969305,
|
|
"learning_rate": 0.00023537527211168875,
|
|
"loss": 2.3758,
|
|
"step": 25300
|
|
},
|
|
{
|
|
"epoch": 7.99281370923162,
|
|
"grad_norm": 0.055209849143472586,
|
|
"learning_rate": 0.0002350199942577743,
|
|
"loss": 2.4039,
|
|
"step": 25305
|
|
},
|
|
{
|
|
"epoch": 7.994393113796098,
|
|
"grad_norm": 0.05022509106975794,
|
|
"learning_rate": 0.00023466494902939239,
|
|
"loss": 2.47,
|
|
"step": 25310
|
|
},
|
|
{
|
|
"epoch": 7.995972518360578,
|
|
"grad_norm": 0.05338175583422317,
|
|
"learning_rate": 0.0002343101365345095,
|
|
"loss": 2.3362,
|
|
"step": 25315
|
|
},
|
|
{
|
|
"epoch": 7.997551922925057,
|
|
"grad_norm": 0.049694422459919,
|
|
"learning_rate": 0.0002339555568810221,
|
|
"loss": 2.4244,
|
|
"step": 25320
|
|
},
|
|
{
|
|
"epoch": 7.999131327489536,
|
|
"grad_norm": 0.05052937977693835,
|
|
"learning_rate": 0.0002336012101767554,
|
|
"loss": 2.3527,
|
|
"step": 25325
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_loss": 2.408498525619507,
|
|
"eval_runtime": 118.6136,
|
|
"eval_samples_per_second": 22.333,
|
|
"eval_steps_per_second": 5.59,
|
|
"step": 25328
|
|
},
|
|
{
|
|
"epoch": 8.000631761825792,
|
|
"grad_norm": 0.05432150860401315,
|
|
"learning_rate": 0.00023324709652946374,
|
|
"loss": 2.4273,
|
|
"step": 25330
|
|
},
|
|
{
|
|
"epoch": 8.002211166390271,
|
|
"grad_norm": 0.06070758772782719,
|
|
"learning_rate": 0.00023289321604683133,
|
|
"loss": 2.4141,
|
|
"step": 25335
|
|
},
|
|
{
|
|
"epoch": 8.00379057095475,
|
|
"grad_norm": 0.053364434963253105,
|
|
"learning_rate": 0.00023253956883647088,
|
|
"loss": 2.3626,
|
|
"step": 25340
|
|
},
|
|
{
|
|
"epoch": 8.00536997551923,
|
|
"grad_norm": 0.04694053188876907,
|
|
"learning_rate": 0.00023218615500592376,
|
|
"loss": 2.3654,
|
|
"step": 25345
|
|
},
|
|
{
|
|
"epoch": 8.006949380083709,
|
|
"grad_norm": 0.05142106576940761,
|
|
"learning_rate": 0.0002318329746626614,
|
|
"loss": 2.41,
|
|
"step": 25350
|
|
},
|
|
{
|
|
"epoch": 8.008528784648188,
|
|
"grad_norm": 0.058656523389992464,
|
|
"learning_rate": 0.00023148002791408361,
|
|
"loss": 2.391,
|
|
"step": 25355
|
|
},
|
|
{
|
|
"epoch": 8.010108189212668,
|
|
"grad_norm": 0.05123322096394153,
|
|
"learning_rate": 0.00023112731486751905,
|
|
"loss": 2.361,
|
|
"step": 25360
|
|
},
|
|
{
|
|
"epoch": 8.011687593777147,
|
|
"grad_norm": 0.06560000338501384,
|
|
"learning_rate": 0.0002307748356302256,
|
|
"loss": 2.3499,
|
|
"step": 25365
|
|
},
|
|
{
|
|
"epoch": 8.013266998341626,
|
|
"grad_norm": 0.05016785247315961,
|
|
"learning_rate": 0.00023042259030938962,
|
|
"loss": 2.4791,
|
|
"step": 25370
|
|
},
|
|
{
|
|
"epoch": 8.014846402906105,
|
|
"grad_norm": 0.04777312402978052,
|
|
"learning_rate": 0.00023007057901212725,
|
|
"loss": 2.3446,
|
|
"step": 25375
|
|
},
|
|
{
|
|
"epoch": 8.016425807470583,
|
|
"grad_norm": 0.05049010453414375,
|
|
"learning_rate": 0.00022971880184548233,
|
|
"loss": 2.4549,
|
|
"step": 25380
|
|
},
|
|
{
|
|
"epoch": 8.018005212035062,
|
|
"grad_norm": 0.055384014176131036,
|
|
"learning_rate": 0.00022936725891642862,
|
|
"loss": 2.3611,
|
|
"step": 25385
|
|
},
|
|
{
|
|
"epoch": 8.019584616599541,
|
|
"grad_norm": 0.055221474558915615,
|
|
"learning_rate": 0.00022901595033186762,
|
|
"loss": 2.4506,
|
|
"step": 25390
|
|
},
|
|
{
|
|
"epoch": 8.02116402116402,
|
|
"grad_norm": 0.0463917278982015,
|
|
"learning_rate": 0.00022866487619862996,
|
|
"loss": 2.3758,
|
|
"step": 25395
|
|
},
|
|
{
|
|
"epoch": 8.0227434257285,
|
|
"grad_norm": 0.05158858897446619,
|
|
"learning_rate": 0.0002283140366234756,
|
|
"loss": 2.4999,
|
|
"step": 25400
|
|
},
|
|
{
|
|
"epoch": 8.024322830292979,
|
|
"grad_norm": 0.04720736132104705,
|
|
"learning_rate": 0.0002279634317130922,
|
|
"loss": 2.3788,
|
|
"step": 25405
|
|
},
|
|
{
|
|
"epoch": 8.025902234857458,
|
|
"grad_norm": 0.04877113220273165,
|
|
"learning_rate": 0.00022761306157409656,
|
|
"loss": 2.4509,
|
|
"step": 25410
|
|
},
|
|
{
|
|
"epoch": 8.027481639421937,
|
|
"grad_norm": 0.04710731190705331,
|
|
"learning_rate": 0.00022726292631303403,
|
|
"loss": 2.4476,
|
|
"step": 25415
|
|
},
|
|
{
|
|
"epoch": 8.029061043986417,
|
|
"grad_norm": 0.049172626607559375,
|
|
"learning_rate": 0.00022691302603637808,
|
|
"loss": 2.3511,
|
|
"step": 25420
|
|
},
|
|
{
|
|
"epoch": 8.030640448550896,
|
|
"grad_norm": 0.06282722754478427,
|
|
"learning_rate": 0.00022656336085053187,
|
|
"loss": 2.4699,
|
|
"step": 25425
|
|
},
|
|
{
|
|
"epoch": 8.032219853115375,
|
|
"grad_norm": 0.04587951545285191,
|
|
"learning_rate": 0.00022621393086182595,
|
|
"loss": 2.3529,
|
|
"step": 25430
|
|
},
|
|
{
|
|
"epoch": 8.033799257679854,
|
|
"grad_norm": 0.05540063205346335,
|
|
"learning_rate": 0.00022586473617651958,
|
|
"loss": 2.4312,
|
|
"step": 25435
|
|
},
|
|
{
|
|
"epoch": 8.035378662244334,
|
|
"grad_norm": 0.05586037835539668,
|
|
"learning_rate": 0.00022551577690080104,
|
|
"loss": 2.3557,
|
|
"step": 25440
|
|
},
|
|
{
|
|
"epoch": 8.036958066808813,
|
|
"grad_norm": 0.04641694018306748,
|
|
"learning_rate": 0.00022516705314078644,
|
|
"loss": 2.3362,
|
|
"step": 25445
|
|
},
|
|
{
|
|
"epoch": 8.038537471373292,
|
|
"grad_norm": 0.0499358415566031,
|
|
"learning_rate": 0.00022481856500252052,
|
|
"loss": 2.3701,
|
|
"step": 25450
|
|
},
|
|
{
|
|
"epoch": 8.040116875937771,
|
|
"grad_norm": 0.06463046342707425,
|
|
"learning_rate": 0.00022447031259197615,
|
|
"loss": 2.3637,
|
|
"step": 25455
|
|
},
|
|
{
|
|
"epoch": 8.04169628050225,
|
|
"grad_norm": 0.05002064721781355,
|
|
"learning_rate": 0.00022412229601505453,
|
|
"loss": 2.3874,
|
|
"step": 25460
|
|
},
|
|
{
|
|
"epoch": 8.04327568506673,
|
|
"grad_norm": 0.05458736400973159,
|
|
"learning_rate": 0.00022377451537758565,
|
|
"loss": 2.3834,
|
|
"step": 25465
|
|
},
|
|
{
|
|
"epoch": 8.04485508963121,
|
|
"grad_norm": 0.050519490608758155,
|
|
"learning_rate": 0.00022342697078532692,
|
|
"loss": 2.4172,
|
|
"step": 25470
|
|
},
|
|
{
|
|
"epoch": 8.046434494195688,
|
|
"grad_norm": 0.053622718118899106,
|
|
"learning_rate": 0.00022307966234396504,
|
|
"loss": 2.3652,
|
|
"step": 25475
|
|
},
|
|
{
|
|
"epoch": 8.048013898760168,
|
|
"grad_norm": 0.06334614813487868,
|
|
"learning_rate": 0.00022273259015911397,
|
|
"loss": 2.4569,
|
|
"step": 25480
|
|
},
|
|
{
|
|
"epoch": 8.049593303324647,
|
|
"grad_norm": 0.06045506352015708,
|
|
"learning_rate": 0.00022238575433631582,
|
|
"loss": 2.4382,
|
|
"step": 25485
|
|
},
|
|
{
|
|
"epoch": 8.051172707889126,
|
|
"grad_norm": 0.044523799208584534,
|
|
"learning_rate": 0.00022203915498104177,
|
|
"loss": 2.386,
|
|
"step": 25490
|
|
},
|
|
{
|
|
"epoch": 8.052752112453605,
|
|
"grad_norm": 0.043731210286509156,
|
|
"learning_rate": 0.0002216927921986901,
|
|
"loss": 2.3943,
|
|
"step": 25495
|
|
},
|
|
{
|
|
"epoch": 8.054331517018085,
|
|
"grad_norm": 0.049095155104685415,
|
|
"learning_rate": 0.00022134666609458764,
|
|
"loss": 2.3977,
|
|
"step": 25500
|
|
},
|
|
{
|
|
"epoch": 8.055910921582564,
|
|
"grad_norm": 0.047218796790150126,
|
|
"learning_rate": 0.000221000776773989,
|
|
"loss": 2.3586,
|
|
"step": 25505
|
|
},
|
|
{
|
|
"epoch": 8.057490326147043,
|
|
"grad_norm": 0.04712898469271991,
|
|
"learning_rate": 0.0002206551243420768,
|
|
"loss": 2.4194,
|
|
"step": 25510
|
|
},
|
|
{
|
|
"epoch": 8.059069730711522,
|
|
"grad_norm": 0.05288130067659473,
|
|
"learning_rate": 0.00022030970890396206,
|
|
"loss": 2.467,
|
|
"step": 25515
|
|
},
|
|
{
|
|
"epoch": 8.060649135276002,
|
|
"grad_norm": 0.0467928302266336,
|
|
"learning_rate": 0.00021996453056468313,
|
|
"loss": 2.4048,
|
|
"step": 25520
|
|
},
|
|
{
|
|
"epoch": 8.062228539840481,
|
|
"grad_norm": 0.04743439795377022,
|
|
"learning_rate": 0.00021961958942920678,
|
|
"loss": 2.4559,
|
|
"step": 25525
|
|
},
|
|
{
|
|
"epoch": 8.06380794440496,
|
|
"grad_norm": 0.05871105138921706,
|
|
"learning_rate": 0.00021927488560242748,
|
|
"loss": 2.3977,
|
|
"step": 25530
|
|
},
|
|
{
|
|
"epoch": 8.065387348969438,
|
|
"grad_norm": 0.04618683270318365,
|
|
"learning_rate": 0.00021893041918916712,
|
|
"loss": 2.3794,
|
|
"step": 25535
|
|
},
|
|
{
|
|
"epoch": 8.066966753533917,
|
|
"grad_norm": 0.060264806794645034,
|
|
"learning_rate": 0.00021858619029417603,
|
|
"loss": 2.5053,
|
|
"step": 25540
|
|
},
|
|
{
|
|
"epoch": 8.068546158098396,
|
|
"grad_norm": 0.05920399847235902,
|
|
"learning_rate": 0.00021824219902213184,
|
|
"loss": 2.3823,
|
|
"step": 25545
|
|
},
|
|
{
|
|
"epoch": 8.070125562662875,
|
|
"grad_norm": 0.05817741487690848,
|
|
"learning_rate": 0.0002178984454776398,
|
|
"loss": 2.4137,
|
|
"step": 25550
|
|
},
|
|
{
|
|
"epoch": 8.071704967227355,
|
|
"grad_norm": 0.058443901562272596,
|
|
"learning_rate": 0.00021755492976523384,
|
|
"loss": 2.4134,
|
|
"step": 25555
|
|
},
|
|
{
|
|
"epoch": 8.073284371791834,
|
|
"grad_norm": 0.06539243429105443,
|
|
"learning_rate": 0.0002172116519893742,
|
|
"loss": 2.3694,
|
|
"step": 25560
|
|
},
|
|
{
|
|
"epoch": 8.074863776356313,
|
|
"grad_norm": 0.056757002916980895,
|
|
"learning_rate": 0.0002168686122544502,
|
|
"loss": 2.3464,
|
|
"step": 25565
|
|
},
|
|
{
|
|
"epoch": 8.076443180920792,
|
|
"grad_norm": 0.048507458667812024,
|
|
"learning_rate": 0.00021652581066477762,
|
|
"loss": 2.4113,
|
|
"step": 25570
|
|
},
|
|
{
|
|
"epoch": 8.078022585485272,
|
|
"grad_norm": 0.04977853610079591,
|
|
"learning_rate": 0.00021618324732459993,
|
|
"loss": 2.3516,
|
|
"step": 25575
|
|
},
|
|
{
|
|
"epoch": 8.07960199004975,
|
|
"grad_norm": 0.05706451027447465,
|
|
"learning_rate": 0.00021584092233808906,
|
|
"loss": 2.4199,
|
|
"step": 25580
|
|
},
|
|
{
|
|
"epoch": 8.08118139461423,
|
|
"grad_norm": 0.04735352241412206,
|
|
"learning_rate": 0.0002154988358093437,
|
|
"loss": 2.423,
|
|
"step": 25585
|
|
},
|
|
{
|
|
"epoch": 8.08276079917871,
|
|
"grad_norm": 0.04404759919677923,
|
|
"learning_rate": 0.0002151569878423899,
|
|
"loss": 2.3359,
|
|
"step": 25590
|
|
},
|
|
{
|
|
"epoch": 8.084340203743189,
|
|
"grad_norm": 0.05186351457297536,
|
|
"learning_rate": 0.00021481537854118173,
|
|
"loss": 2.412,
|
|
"step": 25595
|
|
},
|
|
{
|
|
"epoch": 8.085919608307668,
|
|
"grad_norm": 0.04819742089184298,
|
|
"learning_rate": 0.00021447400800959993,
|
|
"loss": 2.4001,
|
|
"step": 25600
|
|
},
|
|
{
|
|
"epoch": 8.087499012872147,
|
|
"grad_norm": 0.05038219497467566,
|
|
"learning_rate": 0.00021413287635145363,
|
|
"loss": 2.444,
|
|
"step": 25605
|
|
},
|
|
{
|
|
"epoch": 8.089078417436626,
|
|
"grad_norm": 0.05904454470900991,
|
|
"learning_rate": 0.00021379198367047836,
|
|
"loss": 2.3723,
|
|
"step": 25610
|
|
},
|
|
{
|
|
"epoch": 8.090657822001106,
|
|
"grad_norm": 0.04749538015256275,
|
|
"learning_rate": 0.0002134513300703379,
|
|
"loss": 2.5017,
|
|
"step": 25615
|
|
},
|
|
{
|
|
"epoch": 8.092237226565585,
|
|
"grad_norm": 0.050329947502558675,
|
|
"learning_rate": 0.0002131109156546226,
|
|
"loss": 2.4175,
|
|
"step": 25620
|
|
},
|
|
{
|
|
"epoch": 8.093816631130064,
|
|
"grad_norm": 0.043354768292465345,
|
|
"learning_rate": 0.0002127707405268503,
|
|
"loss": 2.3622,
|
|
"step": 25625
|
|
},
|
|
{
|
|
"epoch": 8.095396035694543,
|
|
"grad_norm": 0.05339115726289078,
|
|
"learning_rate": 0.00021243080479046606,
|
|
"loss": 2.3798,
|
|
"step": 25630
|
|
},
|
|
{
|
|
"epoch": 8.096975440259023,
|
|
"grad_norm": 0.04280629673605756,
|
|
"learning_rate": 0.00021209110854884184,
|
|
"loss": 2.4827,
|
|
"step": 25635
|
|
},
|
|
{
|
|
"epoch": 8.098554844823502,
|
|
"grad_norm": 0.046150900293078764,
|
|
"learning_rate": 0.00021175165190527768,
|
|
"loss": 2.4778,
|
|
"step": 25640
|
|
},
|
|
{
|
|
"epoch": 8.100134249387981,
|
|
"grad_norm": 0.04435862477833089,
|
|
"learning_rate": 0.000211412434963,
|
|
"loss": 2.4524,
|
|
"step": 25645
|
|
},
|
|
{
|
|
"epoch": 8.10171365395246,
|
|
"grad_norm": 0.06701759717447257,
|
|
"learning_rate": 0.00021107345782516208,
|
|
"loss": 2.4495,
|
|
"step": 25650
|
|
},
|
|
{
|
|
"epoch": 8.10329305851694,
|
|
"grad_norm": 0.05576458550765147,
|
|
"learning_rate": 0.00021073472059484534,
|
|
"loss": 2.4342,
|
|
"step": 25655
|
|
},
|
|
{
|
|
"epoch": 8.104872463081419,
|
|
"grad_norm": 0.053718621623195476,
|
|
"learning_rate": 0.00021039622337505726,
|
|
"loss": 2.4701,
|
|
"step": 25660
|
|
},
|
|
{
|
|
"epoch": 8.106451867645898,
|
|
"grad_norm": 0.04767905473302163,
|
|
"learning_rate": 0.00021005796626873252,
|
|
"loss": 2.4262,
|
|
"step": 25665
|
|
},
|
|
{
|
|
"epoch": 8.108031272210377,
|
|
"grad_norm": 0.05318583148470195,
|
|
"learning_rate": 0.0002097199493787334,
|
|
"loss": 2.4528,
|
|
"step": 25670
|
|
},
|
|
{
|
|
"epoch": 8.109610676774857,
|
|
"grad_norm": 0.05349608089719971,
|
|
"learning_rate": 0.00020938217280784844,
|
|
"loss": 2.4751,
|
|
"step": 25675
|
|
},
|
|
{
|
|
"epoch": 8.111190081339336,
|
|
"grad_norm": 0.052268340128226724,
|
|
"learning_rate": 0.00020904463665879337,
|
|
"loss": 2.4085,
|
|
"step": 25680
|
|
},
|
|
{
|
|
"epoch": 8.112769485903815,
|
|
"grad_norm": 0.04659946604710442,
|
|
"learning_rate": 0.00020870734103421075,
|
|
"loss": 2.4501,
|
|
"step": 25685
|
|
},
|
|
{
|
|
"epoch": 8.114348890468294,
|
|
"grad_norm": 0.04613703886202508,
|
|
"learning_rate": 0.00020837028603666962,
|
|
"loss": 2.3732,
|
|
"step": 25690
|
|
},
|
|
{
|
|
"epoch": 8.115928295032772,
|
|
"grad_norm": 0.04325557682887605,
|
|
"learning_rate": 0.00020803347176866704,
|
|
"loss": 2.3152,
|
|
"step": 25695
|
|
},
|
|
{
|
|
"epoch": 8.117507699597251,
|
|
"grad_norm": 0.04412483063282996,
|
|
"learning_rate": 0.00020769689833262527,
|
|
"loss": 2.4872,
|
|
"step": 25700
|
|
},
|
|
{
|
|
"epoch": 8.11908710416173,
|
|
"grad_norm": 0.05533062549270656,
|
|
"learning_rate": 0.00020736056583089502,
|
|
"loss": 2.4282,
|
|
"step": 25705
|
|
},
|
|
{
|
|
"epoch": 8.12066650872621,
|
|
"grad_norm": 0.049017971929669675,
|
|
"learning_rate": 0.00020702447436575223,
|
|
"loss": 2.4769,
|
|
"step": 25710
|
|
},
|
|
{
|
|
"epoch": 8.122245913290689,
|
|
"grad_norm": 0.0457970432883676,
|
|
"learning_rate": 0.00020668862403940035,
|
|
"loss": 2.3733,
|
|
"step": 25715
|
|
},
|
|
{
|
|
"epoch": 8.123825317855168,
|
|
"grad_norm": 0.05341815120981854,
|
|
"learning_rate": 0.0002063530149539694,
|
|
"loss": 2.4267,
|
|
"step": 25720
|
|
},
|
|
{
|
|
"epoch": 8.125404722419647,
|
|
"grad_norm": 0.04123828694903196,
|
|
"learning_rate": 0.0002060176472115155,
|
|
"loss": 2.2966,
|
|
"step": 25725
|
|
},
|
|
{
|
|
"epoch": 8.126984126984127,
|
|
"grad_norm": 0.04893357789625497,
|
|
"learning_rate": 0.0002056825209140224,
|
|
"loss": 2.4087,
|
|
"step": 25730
|
|
},
|
|
{
|
|
"epoch": 8.128563531548606,
|
|
"grad_norm": 0.053472140914025544,
|
|
"learning_rate": 0.0002053476361633997,
|
|
"loss": 2.3427,
|
|
"step": 25735
|
|
},
|
|
{
|
|
"epoch": 8.130142936113085,
|
|
"grad_norm": 0.048952037330800856,
|
|
"learning_rate": 0.00020501299306148346,
|
|
"loss": 2.4165,
|
|
"step": 25740
|
|
},
|
|
{
|
|
"epoch": 8.131722340677564,
|
|
"grad_norm": 0.06182886325902322,
|
|
"learning_rate": 0.0002046785917100369,
|
|
"loss": 2.5146,
|
|
"step": 25745
|
|
},
|
|
{
|
|
"epoch": 8.133301745242044,
|
|
"grad_norm": 0.04157143444076313,
|
|
"learning_rate": 0.00020434443221074896,
|
|
"loss": 2.3853,
|
|
"step": 25750
|
|
},
|
|
{
|
|
"epoch": 8.134881149806523,
|
|
"grad_norm": 0.0698682218208614,
|
|
"learning_rate": 0.0002040105146652358,
|
|
"loss": 2.4076,
|
|
"step": 25755
|
|
},
|
|
{
|
|
"epoch": 8.136460554371002,
|
|
"grad_norm": 0.0451942558429116,
|
|
"learning_rate": 0.00020367683917503943,
|
|
"loss": 2.3913,
|
|
"step": 25760
|
|
},
|
|
{
|
|
"epoch": 8.138039958935481,
|
|
"grad_norm": 0.06147551527667643,
|
|
"learning_rate": 0.00020334340584162846,
|
|
"loss": 2.3917,
|
|
"step": 25765
|
|
},
|
|
{
|
|
"epoch": 8.13961936349996,
|
|
"grad_norm": 0.05748052007054688,
|
|
"learning_rate": 0.0002030102147663978,
|
|
"loss": 2.3703,
|
|
"step": 25770
|
|
},
|
|
{
|
|
"epoch": 8.14119876806444,
|
|
"grad_norm": 0.0501296109555413,
|
|
"learning_rate": 0.0002026772660506686,
|
|
"loss": 2.3558,
|
|
"step": 25775
|
|
},
|
|
{
|
|
"epoch": 8.142778172628919,
|
|
"grad_norm": 0.04664935132387147,
|
|
"learning_rate": 0.00020234455979568845,
|
|
"loss": 2.3723,
|
|
"step": 25780
|
|
},
|
|
{
|
|
"epoch": 8.144357577193398,
|
|
"grad_norm": 0.05187910422285325,
|
|
"learning_rate": 0.0002020120961026315,
|
|
"loss": 2.288,
|
|
"step": 25785
|
|
},
|
|
{
|
|
"epoch": 8.145936981757878,
|
|
"grad_norm": 0.05553071987652418,
|
|
"learning_rate": 0.00020167987507259733,
|
|
"loss": 2.4434,
|
|
"step": 25790
|
|
},
|
|
{
|
|
"epoch": 8.147516386322357,
|
|
"grad_norm": 0.04495139111319613,
|
|
"learning_rate": 0.0002013478968066128,
|
|
"loss": 2.4154,
|
|
"step": 25795
|
|
},
|
|
{
|
|
"epoch": 8.149095790886836,
|
|
"grad_norm": 0.052812069657348096,
|
|
"learning_rate": 0.00020101616140563017,
|
|
"loss": 2.4956,
|
|
"step": 25800
|
|
},
|
|
{
|
|
"epoch": 8.150675195451315,
|
|
"grad_norm": 0.04455981980348406,
|
|
"learning_rate": 0.00020068466897052805,
|
|
"loss": 2.4061,
|
|
"step": 25805
|
|
},
|
|
{
|
|
"epoch": 8.152254600015794,
|
|
"grad_norm": 0.044133623429162515,
|
|
"learning_rate": 0.00020035341960211107,
|
|
"loss": 2.3777,
|
|
"step": 25810
|
|
},
|
|
{
|
|
"epoch": 8.153834004580274,
|
|
"grad_norm": 0.052513258384821355,
|
|
"learning_rate": 0.00020002241340110982,
|
|
"loss": 2.3417,
|
|
"step": 25815
|
|
},
|
|
{
|
|
"epoch": 8.155413409144753,
|
|
"grad_norm": 0.05292469001695762,
|
|
"learning_rate": 0.00019969165046818184,
|
|
"loss": 2.3839,
|
|
"step": 25820
|
|
},
|
|
{
|
|
"epoch": 8.156992813709232,
|
|
"grad_norm": 0.0544564283114912,
|
|
"learning_rate": 0.00019936113090390952,
|
|
"loss": 2.4386,
|
|
"step": 25825
|
|
},
|
|
{
|
|
"epoch": 8.158572218273711,
|
|
"grad_norm": 0.053988393971699265,
|
|
"learning_rate": 0.00019903085480880167,
|
|
"loss": 2.4039,
|
|
"step": 25830
|
|
},
|
|
{
|
|
"epoch": 8.16015162283819,
|
|
"grad_norm": 0.057185268861558815,
|
|
"learning_rate": 0.00019870082228329357,
|
|
"loss": 2.3546,
|
|
"step": 25835
|
|
},
|
|
{
|
|
"epoch": 8.16173102740267,
|
|
"grad_norm": 0.04791400379030568,
|
|
"learning_rate": 0.00019837103342774544,
|
|
"loss": 2.4683,
|
|
"step": 25840
|
|
},
|
|
{
|
|
"epoch": 8.16331043196715,
|
|
"grad_norm": 0.05226965451943714,
|
|
"learning_rate": 0.00019804148834244462,
|
|
"loss": 2.4214,
|
|
"step": 25845
|
|
},
|
|
{
|
|
"epoch": 8.164889836531628,
|
|
"grad_norm": 0.04203611427973354,
|
|
"learning_rate": 0.0001977121871276034,
|
|
"loss": 2.4867,
|
|
"step": 25850
|
|
},
|
|
{
|
|
"epoch": 8.166469241096106,
|
|
"grad_norm": 0.04737734236925939,
|
|
"learning_rate": 0.00019738312988336004,
|
|
"loss": 2.4974,
|
|
"step": 25855
|
|
},
|
|
{
|
|
"epoch": 8.168048645660585,
|
|
"grad_norm": 0.044843187250530374,
|
|
"learning_rate": 0.0001970543167097789,
|
|
"loss": 2.3521,
|
|
"step": 25860
|
|
},
|
|
{
|
|
"epoch": 8.169628050225064,
|
|
"grad_norm": 0.0451266796780657,
|
|
"learning_rate": 0.00019672574770684948,
|
|
"loss": 2.4727,
|
|
"step": 25865
|
|
},
|
|
{
|
|
"epoch": 8.171207454789544,
|
|
"grad_norm": 0.04518404767506238,
|
|
"learning_rate": 0.00019639742297448837,
|
|
"loss": 2.4196,
|
|
"step": 25870
|
|
},
|
|
{
|
|
"epoch": 8.172786859354023,
|
|
"grad_norm": 0.049498519036167025,
|
|
"learning_rate": 0.0001960693426125364,
|
|
"loss": 2.4917,
|
|
"step": 25875
|
|
},
|
|
{
|
|
"epoch": 8.174366263918502,
|
|
"grad_norm": 0.05481636143947064,
|
|
"learning_rate": 0.00019574150672076074,
|
|
"loss": 2.3747,
|
|
"step": 25880
|
|
},
|
|
{
|
|
"epoch": 8.175945668482981,
|
|
"grad_norm": 0.048659749608224355,
|
|
"learning_rate": 0.00019541391539885456,
|
|
"loss": 2.3617,
|
|
"step": 25885
|
|
},
|
|
{
|
|
"epoch": 8.17752507304746,
|
|
"grad_norm": 0.04726710591130207,
|
|
"learning_rate": 0.00019508656874643604,
|
|
"loss": 2.3334,
|
|
"step": 25890
|
|
},
|
|
{
|
|
"epoch": 8.17910447761194,
|
|
"grad_norm": 0.060859310735491265,
|
|
"learning_rate": 0.00019475946686304925,
|
|
"loss": 2.3829,
|
|
"step": 25895
|
|
},
|
|
{
|
|
"epoch": 8.18068388217642,
|
|
"grad_norm": 0.05521704128431342,
|
|
"learning_rate": 0.0001944326098481638,
|
|
"loss": 2.3431,
|
|
"step": 25900
|
|
},
|
|
{
|
|
"epoch": 8.182263286740898,
|
|
"grad_norm": 0.05295496588973379,
|
|
"learning_rate": 0.00019410599780117445,
|
|
"loss": 2.3398,
|
|
"step": 25905
|
|
},
|
|
{
|
|
"epoch": 8.183842691305378,
|
|
"grad_norm": 0.05094092691924014,
|
|
"learning_rate": 0.00019377963082140248,
|
|
"loss": 2.4341,
|
|
"step": 25910
|
|
},
|
|
{
|
|
"epoch": 8.185422095869857,
|
|
"grad_norm": 0.05532205320005589,
|
|
"learning_rate": 0.00019345350900809366,
|
|
"loss": 2.4228,
|
|
"step": 25915
|
|
},
|
|
{
|
|
"epoch": 8.187001500434336,
|
|
"grad_norm": 0.05141666122768391,
|
|
"learning_rate": 0.00019312763246041932,
|
|
"loss": 2.4069,
|
|
"step": 25920
|
|
},
|
|
{
|
|
"epoch": 8.188580904998815,
|
|
"grad_norm": 0.050508472337427286,
|
|
"learning_rate": 0.00019280200127747704,
|
|
"loss": 2.3717,
|
|
"step": 25925
|
|
},
|
|
{
|
|
"epoch": 8.190160309563295,
|
|
"grad_norm": 0.046726584367368186,
|
|
"learning_rate": 0.00019247661555828844,
|
|
"loss": 2.3031,
|
|
"step": 25930
|
|
},
|
|
{
|
|
"epoch": 8.191739714127774,
|
|
"grad_norm": 0.056439828014490004,
|
|
"learning_rate": 0.000192151475401802,
|
|
"loss": 2.4581,
|
|
"step": 25935
|
|
},
|
|
{
|
|
"epoch": 8.193319118692253,
|
|
"grad_norm": 0.053176267180560934,
|
|
"learning_rate": 0.00019182658090689044,
|
|
"loss": 2.3222,
|
|
"step": 25940
|
|
},
|
|
{
|
|
"epoch": 8.194898523256732,
|
|
"grad_norm": 0.05059043390210798,
|
|
"learning_rate": 0.0001915019321723519,
|
|
"loss": 2.3839,
|
|
"step": 25945
|
|
},
|
|
{
|
|
"epoch": 8.196477927821212,
|
|
"grad_norm": 0.05642321753023156,
|
|
"learning_rate": 0.00019117752929691034,
|
|
"loss": 2.3712,
|
|
"step": 25950
|
|
},
|
|
{
|
|
"epoch": 8.198057332385691,
|
|
"grad_norm": 0.05491345197222136,
|
|
"learning_rate": 0.00019085337237921397,
|
|
"loss": 2.3938,
|
|
"step": 25955
|
|
},
|
|
{
|
|
"epoch": 8.19963673695017,
|
|
"grad_norm": 0.056572885344928915,
|
|
"learning_rate": 0.00019052946151783766,
|
|
"loss": 2.5006,
|
|
"step": 25960
|
|
},
|
|
{
|
|
"epoch": 8.20121614151465,
|
|
"grad_norm": 0.04636740498907759,
|
|
"learning_rate": 0.00019020579681128025,
|
|
"loss": 2.3715,
|
|
"step": 25965
|
|
},
|
|
{
|
|
"epoch": 8.202795546079129,
|
|
"grad_norm": 0.04934308888482995,
|
|
"learning_rate": 0.00018988237835796586,
|
|
"loss": 2.3747,
|
|
"step": 25970
|
|
},
|
|
{
|
|
"epoch": 8.204374950643608,
|
|
"grad_norm": 0.04696054508788191,
|
|
"learning_rate": 0.00018955920625624435,
|
|
"loss": 2.3561,
|
|
"step": 25975
|
|
},
|
|
{
|
|
"epoch": 8.205954355208087,
|
|
"grad_norm": 0.04097225071526321,
|
|
"learning_rate": 0.00018923628060439035,
|
|
"loss": 2.3523,
|
|
"step": 25980
|
|
},
|
|
{
|
|
"epoch": 8.207533759772566,
|
|
"grad_norm": 0.04850041083520627,
|
|
"learning_rate": 0.0001889136015006032,
|
|
"loss": 2.3739,
|
|
"step": 25985
|
|
},
|
|
{
|
|
"epoch": 8.209113164337046,
|
|
"grad_norm": 0.04378582333150282,
|
|
"learning_rate": 0.00018859116904300767,
|
|
"loss": 2.3547,
|
|
"step": 25990
|
|
},
|
|
{
|
|
"epoch": 8.210692568901525,
|
|
"grad_norm": 0.048930957978835087,
|
|
"learning_rate": 0.00018826898332965314,
|
|
"loss": 2.452,
|
|
"step": 25995
|
|
},
|
|
{
|
|
"epoch": 8.212271973466004,
|
|
"grad_norm": 0.048049063084902095,
|
|
"learning_rate": 0.00018794704445851475,
|
|
"loss": 2.3665,
|
|
"step": 26000
|
|
},
|
|
{
|
|
"epoch": 8.213851378030483,
|
|
"grad_norm": 0.06494982882472712,
|
|
"learning_rate": 0.0001876253525274918,
|
|
"loss": 2.3977,
|
|
"step": 26005
|
|
},
|
|
{
|
|
"epoch": 8.21543078259496,
|
|
"grad_norm": 0.04749173325529246,
|
|
"learning_rate": 0.00018730390763440851,
|
|
"loss": 2.457,
|
|
"step": 26010
|
|
},
|
|
{
|
|
"epoch": 8.21701018715944,
|
|
"grad_norm": 0.05242256228335583,
|
|
"learning_rate": 0.0001869827098770146,
|
|
"loss": 2.3897,
|
|
"step": 26015
|
|
},
|
|
{
|
|
"epoch": 8.21858959172392,
|
|
"grad_norm": 0.05671602284570124,
|
|
"learning_rate": 0.00018666175935298391,
|
|
"loss": 2.4221,
|
|
"step": 26020
|
|
},
|
|
{
|
|
"epoch": 8.220168996288399,
|
|
"grad_norm": 0.05300174129660964,
|
|
"learning_rate": 0.00018634105615991593,
|
|
"loss": 2.3662,
|
|
"step": 26025
|
|
},
|
|
{
|
|
"epoch": 8.221748400852878,
|
|
"grad_norm": 0.049436010634950366,
|
|
"learning_rate": 0.00018602060039533418,
|
|
"loss": 2.3879,
|
|
"step": 26030
|
|
},
|
|
{
|
|
"epoch": 8.223327805417357,
|
|
"grad_norm": 0.053023278772629315,
|
|
"learning_rate": 0.0001857003921566871,
|
|
"loss": 2.4243,
|
|
"step": 26035
|
|
},
|
|
{
|
|
"epoch": 8.224907209981836,
|
|
"grad_norm": 0.0468439537196731,
|
|
"learning_rate": 0.00018538043154134808,
|
|
"loss": 2.4004,
|
|
"step": 26040
|
|
},
|
|
{
|
|
"epoch": 8.226486614546316,
|
|
"grad_norm": 0.052684392610237045,
|
|
"learning_rate": 0.0001850607186466149,
|
|
"loss": 2.4019,
|
|
"step": 26045
|
|
},
|
|
{
|
|
"epoch": 8.228066019110795,
|
|
"grad_norm": 0.05042324780054584,
|
|
"learning_rate": 0.0001847412535697106,
|
|
"loss": 2.4142,
|
|
"step": 26050
|
|
},
|
|
{
|
|
"epoch": 8.229645423675274,
|
|
"grad_norm": 0.05305724449911546,
|
|
"learning_rate": 0.0001844220364077822,
|
|
"loss": 2.3344,
|
|
"step": 26055
|
|
},
|
|
{
|
|
"epoch": 8.231224828239753,
|
|
"grad_norm": 0.06049144424858691,
|
|
"learning_rate": 0.0001841030672579015,
|
|
"loss": 2.3801,
|
|
"step": 26060
|
|
},
|
|
{
|
|
"epoch": 8.232804232804233,
|
|
"grad_norm": 0.06369255413270646,
|
|
"learning_rate": 0.00018378434621706542,
|
|
"loss": 2.4561,
|
|
"step": 26065
|
|
},
|
|
{
|
|
"epoch": 8.234383637368712,
|
|
"grad_norm": 0.056604618448458786,
|
|
"learning_rate": 0.00018346587338219456,
|
|
"loss": 2.4035,
|
|
"step": 26070
|
|
},
|
|
{
|
|
"epoch": 8.235963041933191,
|
|
"grad_norm": 0.050889888783988424,
|
|
"learning_rate": 0.00018314764885013469,
|
|
"loss": 2.3944,
|
|
"step": 26075
|
|
},
|
|
{
|
|
"epoch": 8.23754244649767,
|
|
"grad_norm": 0.055367532026650776,
|
|
"learning_rate": 0.00018282967271765583,
|
|
"loss": 2.43,
|
|
"step": 26080
|
|
},
|
|
{
|
|
"epoch": 8.23912185106215,
|
|
"grad_norm": 0.05550179993088914,
|
|
"learning_rate": 0.0001825119450814522,
|
|
"loss": 2.4264,
|
|
"step": 26085
|
|
},
|
|
{
|
|
"epoch": 8.240701255626629,
|
|
"grad_norm": 0.06828840646708353,
|
|
"learning_rate": 0.00018219446603814316,
|
|
"loss": 2.3253,
|
|
"step": 26090
|
|
},
|
|
{
|
|
"epoch": 8.242280660191108,
|
|
"grad_norm": 0.0471356896770975,
|
|
"learning_rate": 0.00018187723568427173,
|
|
"loss": 2.2869,
|
|
"step": 26095
|
|
},
|
|
{
|
|
"epoch": 8.243860064755587,
|
|
"grad_norm": 0.043796165252091464,
|
|
"learning_rate": 0.00018156025411630595,
|
|
"loss": 2.4641,
|
|
"step": 26100
|
|
},
|
|
{
|
|
"epoch": 8.245439469320067,
|
|
"grad_norm": 0.04534146642314097,
|
|
"learning_rate": 0.00018124352143063783,
|
|
"loss": 2.3954,
|
|
"step": 26105
|
|
},
|
|
{
|
|
"epoch": 8.247018873884546,
|
|
"grad_norm": 0.0480378942889025,
|
|
"learning_rate": 0.00018092703772358342,
|
|
"loss": 2.3197,
|
|
"step": 26110
|
|
},
|
|
{
|
|
"epoch": 8.248598278449025,
|
|
"grad_norm": 0.054109348837150635,
|
|
"learning_rate": 0.00018061080309138378,
|
|
"loss": 2.4383,
|
|
"step": 26115
|
|
},
|
|
{
|
|
"epoch": 8.250177683013504,
|
|
"grad_norm": 0.057228962014287385,
|
|
"learning_rate": 0.00018029481763020384,
|
|
"loss": 2.356,
|
|
"step": 26120
|
|
},
|
|
{
|
|
"epoch": 8.251757087577984,
|
|
"grad_norm": 0.048818000998402444,
|
|
"learning_rate": 0.0001799790814361325,
|
|
"loss": 2.4628,
|
|
"step": 26125
|
|
},
|
|
{
|
|
"epoch": 8.253336492142463,
|
|
"grad_norm": 0.04608977632661902,
|
|
"learning_rate": 0.00017966359460518322,
|
|
"loss": 2.333,
|
|
"step": 26130
|
|
},
|
|
{
|
|
"epoch": 8.254915896706942,
|
|
"grad_norm": 0.052682672464735784,
|
|
"learning_rate": 0.00017934835723329345,
|
|
"loss": 2.4297,
|
|
"step": 26135
|
|
},
|
|
{
|
|
"epoch": 8.256495301271421,
|
|
"grad_norm": 0.04704706442557295,
|
|
"learning_rate": 0.00017903336941632508,
|
|
"loss": 2.4017,
|
|
"step": 26140
|
|
},
|
|
{
|
|
"epoch": 8.2580747058359,
|
|
"grad_norm": 0.048189627762573205,
|
|
"learning_rate": 0.00017871863125006382,
|
|
"loss": 2.3934,
|
|
"step": 26145
|
|
},
|
|
{
|
|
"epoch": 8.25965411040038,
|
|
"grad_norm": 0.06717330151488347,
|
|
"learning_rate": 0.00017840414283021923,
|
|
"loss": 2.3163,
|
|
"step": 26150
|
|
},
|
|
{
|
|
"epoch": 8.261233514964859,
|
|
"grad_norm": 0.05286030927555884,
|
|
"learning_rate": 0.00017808990425242566,
|
|
"loss": 2.4227,
|
|
"step": 26155
|
|
},
|
|
{
|
|
"epoch": 8.262812919529338,
|
|
"grad_norm": 0.0505713728802993,
|
|
"learning_rate": 0.00017777591561224094,
|
|
"loss": 2.4652,
|
|
"step": 26160
|
|
},
|
|
{
|
|
"epoch": 8.264392324093816,
|
|
"grad_norm": 0.05267430455837137,
|
|
"learning_rate": 0.000177462177005147,
|
|
"loss": 2.4026,
|
|
"step": 26165
|
|
},
|
|
{
|
|
"epoch": 8.265971728658295,
|
|
"grad_norm": 0.0979418781490772,
|
|
"learning_rate": 0.00017714868852654954,
|
|
"loss": 2.4923,
|
|
"step": 26170
|
|
},
|
|
{
|
|
"epoch": 8.267551133222774,
|
|
"grad_norm": 0.05397587881496513,
|
|
"learning_rate": 0.00017683545027177838,
|
|
"loss": 2.3589,
|
|
"step": 26175
|
|
},
|
|
{
|
|
"epoch": 8.269130537787253,
|
|
"grad_norm": 0.04991240018750876,
|
|
"learning_rate": 0.00017652246233608782,
|
|
"loss": 2.3441,
|
|
"step": 26180
|
|
},
|
|
{
|
|
"epoch": 8.270709942351733,
|
|
"grad_norm": 0.051440642685498895,
|
|
"learning_rate": 0.0001762097248146547,
|
|
"loss": 2.3838,
|
|
"step": 26185
|
|
},
|
|
{
|
|
"epoch": 8.272289346916212,
|
|
"grad_norm": 0.05483117147502908,
|
|
"learning_rate": 0.00017589723780258126,
|
|
"loss": 2.3475,
|
|
"step": 26190
|
|
},
|
|
{
|
|
"epoch": 8.273868751480691,
|
|
"grad_norm": 0.05169302339631226,
|
|
"learning_rate": 0.00017558500139489241,
|
|
"loss": 2.4143,
|
|
"step": 26195
|
|
},
|
|
{
|
|
"epoch": 8.27544815604517,
|
|
"grad_norm": 0.047668104522567596,
|
|
"learning_rate": 0.0001752730156865371,
|
|
"loss": 2.361,
|
|
"step": 26200
|
|
},
|
|
{
|
|
"epoch": 8.27702756060965,
|
|
"grad_norm": 0.04749377812354923,
|
|
"learning_rate": 0.00017496128077238872,
|
|
"loss": 2.3622,
|
|
"step": 26205
|
|
},
|
|
{
|
|
"epoch": 8.278606965174129,
|
|
"grad_norm": 0.04503358060922985,
|
|
"learning_rate": 0.00017464979674724335,
|
|
"loss": 2.3464,
|
|
"step": 26210
|
|
},
|
|
{
|
|
"epoch": 8.280186369738608,
|
|
"grad_norm": 0.04909581956112759,
|
|
"learning_rate": 0.00017433856370582156,
|
|
"loss": 2.4573,
|
|
"step": 26215
|
|
},
|
|
{
|
|
"epoch": 8.281765774303087,
|
|
"grad_norm": 0.04132899506214487,
|
|
"learning_rate": 0.00017402758174276734,
|
|
"loss": 2.4487,
|
|
"step": 26220
|
|
},
|
|
{
|
|
"epoch": 8.283345178867567,
|
|
"grad_norm": 0.047925626721803186,
|
|
"learning_rate": 0.0001737168509526479,
|
|
"loss": 2.416,
|
|
"step": 26225
|
|
},
|
|
{
|
|
"epoch": 8.284924583432046,
|
|
"grad_norm": 0.053581647556568454,
|
|
"learning_rate": 0.00017340637142995507,
|
|
"loss": 2.4093,
|
|
"step": 26230
|
|
},
|
|
{
|
|
"epoch": 8.286503987996525,
|
|
"grad_norm": 0.043515914265809134,
|
|
"learning_rate": 0.0001730961432691034,
|
|
"loss": 2.3549,
|
|
"step": 26235
|
|
},
|
|
{
|
|
"epoch": 8.288083392561004,
|
|
"grad_norm": 0.04861740291279733,
|
|
"learning_rate": 0.00017278616656443113,
|
|
"loss": 2.3499,
|
|
"step": 26240
|
|
},
|
|
{
|
|
"epoch": 8.289662797125484,
|
|
"grad_norm": 0.054873344786989145,
|
|
"learning_rate": 0.0001724764414102007,
|
|
"loss": 2.4345,
|
|
"step": 26245
|
|
},
|
|
{
|
|
"epoch": 8.291242201689963,
|
|
"grad_norm": 0.050911740028379296,
|
|
"learning_rate": 0.00017216696790059717,
|
|
"loss": 2.4058,
|
|
"step": 26250
|
|
},
|
|
{
|
|
"epoch": 8.292821606254442,
|
|
"grad_norm": 0.049689682996031846,
|
|
"learning_rate": 0.00017185774612972948,
|
|
"loss": 2.3661,
|
|
"step": 26255
|
|
},
|
|
{
|
|
"epoch": 8.294401010818921,
|
|
"grad_norm": 0.04654305378222502,
|
|
"learning_rate": 0.0001715487761916301,
|
|
"loss": 2.4523,
|
|
"step": 26260
|
|
},
|
|
{
|
|
"epoch": 8.2959804153834,
|
|
"grad_norm": 0.05244234404666711,
|
|
"learning_rate": 0.00017124005818025444,
|
|
"loss": 2.3811,
|
|
"step": 26265
|
|
},
|
|
{
|
|
"epoch": 8.29755981994788,
|
|
"grad_norm": 0.054568257048876516,
|
|
"learning_rate": 0.0001709315921894823,
|
|
"loss": 2.4299,
|
|
"step": 26270
|
|
},
|
|
{
|
|
"epoch": 8.29913922451236,
|
|
"grad_norm": 0.05397607350166474,
|
|
"learning_rate": 0.0001706233783131157,
|
|
"loss": 2.3978,
|
|
"step": 26275
|
|
},
|
|
{
|
|
"epoch": 8.300718629076838,
|
|
"grad_norm": 0.04837043789766192,
|
|
"learning_rate": 0.00017031541664488093,
|
|
"loss": 2.3639,
|
|
"step": 26280
|
|
},
|
|
{
|
|
"epoch": 8.302298033641318,
|
|
"grad_norm": 0.043583664778450076,
|
|
"learning_rate": 0.00017000770727842695,
|
|
"loss": 2.3316,
|
|
"step": 26285
|
|
},
|
|
{
|
|
"epoch": 8.303877438205797,
|
|
"grad_norm": 0.04609596817627926,
|
|
"learning_rate": 0.00016970025030732606,
|
|
"loss": 2.2548,
|
|
"step": 26290
|
|
},
|
|
{
|
|
"epoch": 8.305456842770276,
|
|
"grad_norm": 0.04325355512380385,
|
|
"learning_rate": 0.0001693930458250742,
|
|
"loss": 2.4832,
|
|
"step": 26295
|
|
},
|
|
{
|
|
"epoch": 8.307036247334755,
|
|
"grad_norm": 0.044709319564823075,
|
|
"learning_rate": 0.00016908609392509032,
|
|
"loss": 2.4069,
|
|
"step": 26300
|
|
},
|
|
{
|
|
"epoch": 8.308615651899235,
|
|
"grad_norm": 0.049610329987342446,
|
|
"learning_rate": 0.00016877939470071645,
|
|
"loss": 2.5391,
|
|
"step": 26305
|
|
},
|
|
{
|
|
"epoch": 8.310195056463714,
|
|
"grad_norm": 0.06240617850116144,
|
|
"learning_rate": 0.00016847294824521775,
|
|
"loss": 2.3962,
|
|
"step": 26310
|
|
},
|
|
{
|
|
"epoch": 8.311774461028193,
|
|
"grad_norm": 0.05002596125229355,
|
|
"learning_rate": 0.00016816675465178255,
|
|
"loss": 2.4464,
|
|
"step": 26315
|
|
},
|
|
{
|
|
"epoch": 8.313353865592672,
|
|
"grad_norm": 0.052546192058887144,
|
|
"learning_rate": 0.00016786081401352272,
|
|
"loss": 2.3425,
|
|
"step": 26320
|
|
},
|
|
{
|
|
"epoch": 8.314933270157152,
|
|
"grad_norm": 0.04967183298894618,
|
|
"learning_rate": 0.00016755512642347258,
|
|
"loss": 2.4558,
|
|
"step": 26325
|
|
},
|
|
{
|
|
"epoch": 8.31651267472163,
|
|
"grad_norm": 0.04421521494190073,
|
|
"learning_rate": 0.00016724969197458973,
|
|
"loss": 2.4106,
|
|
"step": 26330
|
|
},
|
|
{
|
|
"epoch": 8.318092079286108,
|
|
"grad_norm": 0.04687791030191425,
|
|
"learning_rate": 0.00016694451075975524,
|
|
"loss": 2.4344,
|
|
"step": 26335
|
|
},
|
|
{
|
|
"epoch": 8.319671483850588,
|
|
"grad_norm": 0.049095717835493755,
|
|
"learning_rate": 0.0001666395828717724,
|
|
"loss": 2.464,
|
|
"step": 26340
|
|
},
|
|
{
|
|
"epoch": 8.321250888415067,
|
|
"grad_norm": 0.04378104564721021,
|
|
"learning_rate": 0.00016633490840336796,
|
|
"loss": 2.3461,
|
|
"step": 26345
|
|
},
|
|
{
|
|
"epoch": 8.322830292979546,
|
|
"grad_norm": 0.047078014371276226,
|
|
"learning_rate": 0.0001660304874471914,
|
|
"loss": 2.4314,
|
|
"step": 26350
|
|
},
|
|
{
|
|
"epoch": 8.324409697544025,
|
|
"grad_norm": 0.0518549059272024,
|
|
"learning_rate": 0.000165726320095815,
|
|
"loss": 2.4218,
|
|
"step": 26355
|
|
},
|
|
{
|
|
"epoch": 8.325989102108505,
|
|
"grad_norm": 0.04456247783804972,
|
|
"learning_rate": 0.00016542240644173468,
|
|
"loss": 2.4152,
|
|
"step": 26360
|
|
},
|
|
{
|
|
"epoch": 8.327568506672984,
|
|
"grad_norm": 0.0490036060868725,
|
|
"learning_rate": 0.00016511874657736792,
|
|
"loss": 2.4491,
|
|
"step": 26365
|
|
},
|
|
{
|
|
"epoch": 8.329147911237463,
|
|
"grad_norm": 0.049685845519469106,
|
|
"learning_rate": 0.00016481534059505643,
|
|
"loss": 2.314,
|
|
"step": 26370
|
|
},
|
|
{
|
|
"epoch": 8.330727315801942,
|
|
"grad_norm": 0.0486307703124003,
|
|
"learning_rate": 0.00016451218858706373,
|
|
"loss": 2.5204,
|
|
"step": 26375
|
|
},
|
|
{
|
|
"epoch": 8.332306720366422,
|
|
"grad_norm": 0.05557163707955135,
|
|
"learning_rate": 0.00016420929064557611,
|
|
"loss": 2.4001,
|
|
"step": 26380
|
|
},
|
|
{
|
|
"epoch": 8.3338861249309,
|
|
"grad_norm": 0.04972702754574487,
|
|
"learning_rate": 0.0001639066468627034,
|
|
"loss": 2.2729,
|
|
"step": 26385
|
|
},
|
|
{
|
|
"epoch": 8.33546552949538,
|
|
"grad_norm": 0.046247847588770534,
|
|
"learning_rate": 0.00016360425733047757,
|
|
"loss": 2.2791,
|
|
"step": 26390
|
|
},
|
|
{
|
|
"epoch": 8.33704493405986,
|
|
"grad_norm": 0.0545684220382701,
|
|
"learning_rate": 0.00016330212214085306,
|
|
"loss": 2.3244,
|
|
"step": 26395
|
|
},
|
|
{
|
|
"epoch": 8.338624338624339,
|
|
"grad_norm": 0.0462319555311167,
|
|
"learning_rate": 0.00016300024138570746,
|
|
"loss": 2.4117,
|
|
"step": 26400
|
|
},
|
|
{
|
|
"epoch": 8.340203743188818,
|
|
"grad_norm": 0.04850344264398392,
|
|
"learning_rate": 0.00016269861515684047,
|
|
"loss": 2.3817,
|
|
"step": 26405
|
|
},
|
|
{
|
|
"epoch": 8.341783147753297,
|
|
"grad_norm": 0.05035687863061325,
|
|
"learning_rate": 0.00016239724354597519,
|
|
"loss": 2.3797,
|
|
"step": 26410
|
|
},
|
|
{
|
|
"epoch": 8.343362552317776,
|
|
"grad_norm": 0.05707521474873975,
|
|
"learning_rate": 0.00016209612664475637,
|
|
"loss": 2.461,
|
|
"step": 26415
|
|
},
|
|
{
|
|
"epoch": 8.344941956882256,
|
|
"grad_norm": 0.04699302116060703,
|
|
"learning_rate": 0.00016179526454475202,
|
|
"loss": 2.4187,
|
|
"step": 26420
|
|
},
|
|
{
|
|
"epoch": 8.346521361446735,
|
|
"grad_norm": 0.053523996291635755,
|
|
"learning_rate": 0.00016149465733745238,
|
|
"loss": 2.3255,
|
|
"step": 26425
|
|
},
|
|
{
|
|
"epoch": 8.348100766011214,
|
|
"grad_norm": 0.05194690202934393,
|
|
"learning_rate": 0.00016119430511427014,
|
|
"loss": 2.4033,
|
|
"step": 26430
|
|
},
|
|
{
|
|
"epoch": 8.349680170575693,
|
|
"grad_norm": 0.054657962419011275,
|
|
"learning_rate": 0.0001608942079665403,
|
|
"loss": 2.4114,
|
|
"step": 26435
|
|
},
|
|
{
|
|
"epoch": 8.351259575140173,
|
|
"grad_norm": 0.055225413042288445,
|
|
"learning_rate": 0.00016059436598552069,
|
|
"loss": 2.3767,
|
|
"step": 26440
|
|
},
|
|
{
|
|
"epoch": 8.352838979704652,
|
|
"grad_norm": 0.05268677918775418,
|
|
"learning_rate": 0.000160294779262391,
|
|
"loss": 2.3768,
|
|
"step": 26445
|
|
},
|
|
{
|
|
"epoch": 8.354418384269131,
|
|
"grad_norm": 0.05632519112731206,
|
|
"learning_rate": 0.00015999544788825425,
|
|
"loss": 2.413,
|
|
"step": 26450
|
|
},
|
|
{
|
|
"epoch": 8.35599778883361,
|
|
"grad_norm": 0.049034219555129196,
|
|
"learning_rate": 0.00015969637195413456,
|
|
"loss": 2.3094,
|
|
"step": 26455
|
|
},
|
|
{
|
|
"epoch": 8.35757719339809,
|
|
"grad_norm": 0.05535212452768547,
|
|
"learning_rate": 0.00015939755155097945,
|
|
"loss": 2.3666,
|
|
"step": 26460
|
|
},
|
|
{
|
|
"epoch": 8.359156597962569,
|
|
"grad_norm": 0.04734654037385947,
|
|
"learning_rate": 0.0001590989867696583,
|
|
"loss": 2.3512,
|
|
"step": 26465
|
|
},
|
|
{
|
|
"epoch": 8.360736002527048,
|
|
"grad_norm": 0.0435967203440791,
|
|
"learning_rate": 0.00015880067770096228,
|
|
"loss": 2.3269,
|
|
"step": 26470
|
|
},
|
|
{
|
|
"epoch": 8.362315407091527,
|
|
"grad_norm": 0.04425811481071514,
|
|
"learning_rate": 0.00015850262443560593,
|
|
"loss": 2.3649,
|
|
"step": 26475
|
|
},
|
|
{
|
|
"epoch": 8.363894811656007,
|
|
"grad_norm": 0.04438345363301554,
|
|
"learning_rate": 0.0001582048270642249,
|
|
"loss": 2.357,
|
|
"step": 26480
|
|
},
|
|
{
|
|
"epoch": 8.365474216220484,
|
|
"grad_norm": 0.048318944753731725,
|
|
"learning_rate": 0.00015790728567737766,
|
|
"loss": 2.3863,
|
|
"step": 26485
|
|
},
|
|
{
|
|
"epoch": 8.367053620784963,
|
|
"grad_norm": 0.05420293787173252,
|
|
"learning_rate": 0.0001576100003655445,
|
|
"loss": 2.3693,
|
|
"step": 26490
|
|
},
|
|
{
|
|
"epoch": 8.368633025349443,
|
|
"grad_norm": 0.04559022718920658,
|
|
"learning_rate": 0.0001573129712191279,
|
|
"loss": 2.3431,
|
|
"step": 26495
|
|
},
|
|
{
|
|
"epoch": 8.370212429913922,
|
|
"grad_norm": 0.04762563613401282,
|
|
"learning_rate": 0.0001570161983284528,
|
|
"loss": 2.3648,
|
|
"step": 26500
|
|
},
|
|
{
|
|
"epoch": 8.371791834478401,
|
|
"grad_norm": 0.05777111844522066,
|
|
"learning_rate": 0.00015671968178376572,
|
|
"loss": 2.3841,
|
|
"step": 26505
|
|
},
|
|
{
|
|
"epoch": 8.37337123904288,
|
|
"grad_norm": 0.049095515126832724,
|
|
"learning_rate": 0.0001564234216752357,
|
|
"loss": 2.4302,
|
|
"step": 26510
|
|
},
|
|
{
|
|
"epoch": 8.37495064360736,
|
|
"grad_norm": 0.04579683805403143,
|
|
"learning_rate": 0.0001561274180929534,
|
|
"loss": 2.3392,
|
|
"step": 26515
|
|
},
|
|
{
|
|
"epoch": 8.376530048171839,
|
|
"grad_norm": 0.048248477670352924,
|
|
"learning_rate": 0.00015583167112693153,
|
|
"loss": 2.4126,
|
|
"step": 26520
|
|
},
|
|
{
|
|
"epoch": 8.378109452736318,
|
|
"grad_norm": 0.04552189737279042,
|
|
"learning_rate": 0.00015553618086710508,
|
|
"loss": 2.4136,
|
|
"step": 26525
|
|
},
|
|
{
|
|
"epoch": 8.379688857300797,
|
|
"grad_norm": 0.050940319294448574,
|
|
"learning_rate": 0.00015524094740333028,
|
|
"loss": 2.2568,
|
|
"step": 26530
|
|
},
|
|
{
|
|
"epoch": 8.381268261865277,
|
|
"grad_norm": 0.05291216628105431,
|
|
"learning_rate": 0.0001549459708253863,
|
|
"loss": 2.3679,
|
|
"step": 26535
|
|
},
|
|
{
|
|
"epoch": 8.382847666429756,
|
|
"grad_norm": 0.04821052566882871,
|
|
"learning_rate": 0.00015465125122297342,
|
|
"loss": 2.3571,
|
|
"step": 26540
|
|
},
|
|
{
|
|
"epoch": 8.384427070994235,
|
|
"grad_norm": 0.04638352697588534,
|
|
"learning_rate": 0.00015435678868571369,
|
|
"loss": 2.3476,
|
|
"step": 26545
|
|
},
|
|
{
|
|
"epoch": 8.386006475558714,
|
|
"grad_norm": 0.046550515327761256,
|
|
"learning_rate": 0.00015406258330315171,
|
|
"loss": 2.3701,
|
|
"step": 26550
|
|
},
|
|
{
|
|
"epoch": 8.387585880123194,
|
|
"grad_norm": 0.04523603637288149,
|
|
"learning_rate": 0.00015376863516475338,
|
|
"loss": 2.4565,
|
|
"step": 26555
|
|
},
|
|
{
|
|
"epoch": 8.389165284687673,
|
|
"grad_norm": 0.04638226636861543,
|
|
"learning_rate": 0.00015347494435990615,
|
|
"loss": 2.3883,
|
|
"step": 26560
|
|
},
|
|
{
|
|
"epoch": 8.390744689252152,
|
|
"grad_norm": 0.055369001795188055,
|
|
"learning_rate": 0.00015318151097791998,
|
|
"loss": 2.4628,
|
|
"step": 26565
|
|
},
|
|
{
|
|
"epoch": 8.392324093816631,
|
|
"grad_norm": 0.05640221039565341,
|
|
"learning_rate": 0.0001528883351080259,
|
|
"loss": 2.4476,
|
|
"step": 26570
|
|
},
|
|
{
|
|
"epoch": 8.39390349838111,
|
|
"grad_norm": 0.05197482558497645,
|
|
"learning_rate": 0.00015259541683937673,
|
|
"loss": 2.3666,
|
|
"step": 26575
|
|
},
|
|
{
|
|
"epoch": 8.39548290294559,
|
|
"grad_norm": 0.046953061317044895,
|
|
"learning_rate": 0.00015230275626104705,
|
|
"loss": 2.4272,
|
|
"step": 26580
|
|
},
|
|
{
|
|
"epoch": 8.397062307510069,
|
|
"grad_norm": 0.04310952477961555,
|
|
"learning_rate": 0.00015201035346203284,
|
|
"loss": 2.3836,
|
|
"step": 26585
|
|
},
|
|
{
|
|
"epoch": 8.398641712074548,
|
|
"grad_norm": 0.05835831448667237,
|
|
"learning_rate": 0.0001517182085312524,
|
|
"loss": 2.3522,
|
|
"step": 26590
|
|
},
|
|
{
|
|
"epoch": 8.400221116639027,
|
|
"grad_norm": 0.048975691541835814,
|
|
"learning_rate": 0.00015142632155754478,
|
|
"loss": 2.3551,
|
|
"step": 26595
|
|
},
|
|
{
|
|
"epoch": 8.401800521203507,
|
|
"grad_norm": 0.056033320488535765,
|
|
"learning_rate": 0.0001511346926296713,
|
|
"loss": 2.4217,
|
|
"step": 26600
|
|
},
|
|
{
|
|
"epoch": 8.403379925767986,
|
|
"grad_norm": 0.06094107723061698,
|
|
"learning_rate": 0.00015084332183631422,
|
|
"loss": 2.4006,
|
|
"step": 26605
|
|
},
|
|
{
|
|
"epoch": 8.404959330332465,
|
|
"grad_norm": 0.05229380726945292,
|
|
"learning_rate": 0.0001505522092660776,
|
|
"loss": 2.4538,
|
|
"step": 26610
|
|
},
|
|
{
|
|
"epoch": 8.406538734896944,
|
|
"grad_norm": 0.05219638857585323,
|
|
"learning_rate": 0.00015026135500748684,
|
|
"loss": 2.427,
|
|
"step": 26615
|
|
},
|
|
{
|
|
"epoch": 8.408118139461424,
|
|
"grad_norm": 0.04864854277850766,
|
|
"learning_rate": 0.0001499707591489886,
|
|
"loss": 2.3032,
|
|
"step": 26620
|
|
},
|
|
{
|
|
"epoch": 8.409697544025903,
|
|
"grad_norm": 0.05130633586960696,
|
|
"learning_rate": 0.00014968042177895182,
|
|
"loss": 2.3415,
|
|
"step": 26625
|
|
},
|
|
{
|
|
"epoch": 8.411276948590382,
|
|
"grad_norm": 0.04961022287215968,
|
|
"learning_rate": 0.0001493903429856659,
|
|
"loss": 2.3595,
|
|
"step": 26630
|
|
},
|
|
{
|
|
"epoch": 8.412856353154861,
|
|
"grad_norm": 0.05268348341405079,
|
|
"learning_rate": 0.00014910052285734178,
|
|
"loss": 2.388,
|
|
"step": 26635
|
|
},
|
|
{
|
|
"epoch": 8.414435757719339,
|
|
"grad_norm": 0.05251257408511266,
|
|
"learning_rate": 0.00014881096148211239,
|
|
"loss": 2.367,
|
|
"step": 26640
|
|
},
|
|
{
|
|
"epoch": 8.416015162283818,
|
|
"grad_norm": 0.04519350339044876,
|
|
"learning_rate": 0.00014852165894803083,
|
|
"loss": 2.5289,
|
|
"step": 26645
|
|
},
|
|
{
|
|
"epoch": 8.417594566848297,
|
|
"grad_norm": 0.052478266597039615,
|
|
"learning_rate": 0.00014823261534307287,
|
|
"loss": 2.3678,
|
|
"step": 26650
|
|
},
|
|
{
|
|
"epoch": 8.419173971412777,
|
|
"grad_norm": 0.045857352778371906,
|
|
"learning_rate": 0.00014794383075513451,
|
|
"loss": 2.377,
|
|
"step": 26655
|
|
},
|
|
{
|
|
"epoch": 8.420753375977256,
|
|
"grad_norm": 0.05592977801746524,
|
|
"learning_rate": 0.0001476553052720333,
|
|
"loss": 2.4182,
|
|
"step": 26660
|
|
},
|
|
{
|
|
"epoch": 8.422332780541735,
|
|
"grad_norm": 0.04213014073549877,
|
|
"learning_rate": 0.00014736703898150794,
|
|
"loss": 2.4226,
|
|
"step": 26665
|
|
},
|
|
{
|
|
"epoch": 8.423912185106214,
|
|
"grad_norm": 0.04737225206610175,
|
|
"learning_rate": 0.0001470790319712183,
|
|
"loss": 2.3809,
|
|
"step": 26670
|
|
},
|
|
{
|
|
"epoch": 8.425491589670694,
|
|
"grad_norm": 0.04591930059151109,
|
|
"learning_rate": 0.00014679128432874546,
|
|
"loss": 2.3618,
|
|
"step": 26675
|
|
},
|
|
{
|
|
"epoch": 8.427070994235173,
|
|
"grad_norm": 0.0490800680706064,
|
|
"learning_rate": 0.00014650379614159192,
|
|
"loss": 2.3649,
|
|
"step": 26680
|
|
},
|
|
{
|
|
"epoch": 8.428650398799652,
|
|
"grad_norm": 0.04682291738342209,
|
|
"learning_rate": 0.00014621656749718071,
|
|
"loss": 2.3481,
|
|
"step": 26685
|
|
},
|
|
{
|
|
"epoch": 8.430229803364131,
|
|
"grad_norm": 0.04462638581380862,
|
|
"learning_rate": 0.00014592959848285647,
|
|
"loss": 2.3209,
|
|
"step": 26690
|
|
},
|
|
{
|
|
"epoch": 8.43180920792861,
|
|
"grad_norm": 0.050556366675289864,
|
|
"learning_rate": 0.00014564288918588464,
|
|
"loss": 2.3737,
|
|
"step": 26695
|
|
},
|
|
{
|
|
"epoch": 8.43338861249309,
|
|
"grad_norm": 0.04678957148115946,
|
|
"learning_rate": 0.00014535643969345146,
|
|
"loss": 2.4973,
|
|
"step": 26700
|
|
},
|
|
{
|
|
"epoch": 8.43496801705757,
|
|
"grad_norm": 0.0850072426060503,
|
|
"learning_rate": 0.0001450702500926645,
|
|
"loss": 2.4133,
|
|
"step": 26705
|
|
},
|
|
{
|
|
"epoch": 8.436547421622048,
|
|
"grad_norm": 0.04691358996576342,
|
|
"learning_rate": 0.00014478432047055202,
|
|
"loss": 2.4485,
|
|
"step": 26710
|
|
},
|
|
{
|
|
"epoch": 8.438126826186528,
|
|
"grad_norm": 0.046276320227182303,
|
|
"learning_rate": 0.0001444986509140638,
|
|
"loss": 2.3055,
|
|
"step": 26715
|
|
},
|
|
{
|
|
"epoch": 8.439706230751007,
|
|
"grad_norm": 0.04898850587615209,
|
|
"learning_rate": 0.00014421324151006986,
|
|
"loss": 2.3767,
|
|
"step": 26720
|
|
},
|
|
{
|
|
"epoch": 8.441285635315486,
|
|
"grad_norm": 0.04747495705557107,
|
|
"learning_rate": 0.00014392809234536118,
|
|
"loss": 2.331,
|
|
"step": 26725
|
|
},
|
|
{
|
|
"epoch": 8.442865039879965,
|
|
"grad_norm": 0.05078741604035361,
|
|
"learning_rate": 0.00014364320350665016,
|
|
"loss": 2.3616,
|
|
"step": 26730
|
|
},
|
|
{
|
|
"epoch": 8.444444444444445,
|
|
"grad_norm": 0.04340690204052605,
|
|
"learning_rate": 0.0001433585750805695,
|
|
"loss": 2.4084,
|
|
"step": 26735
|
|
},
|
|
{
|
|
"epoch": 8.446023849008924,
|
|
"grad_norm": 0.04199834629885642,
|
|
"learning_rate": 0.00014307420715367302,
|
|
"loss": 2.3922,
|
|
"step": 26740
|
|
},
|
|
{
|
|
"epoch": 8.447603253573403,
|
|
"grad_norm": 0.044490710119333865,
|
|
"learning_rate": 0.00014279009981243507,
|
|
"loss": 2.3083,
|
|
"step": 26745
|
|
},
|
|
{
|
|
"epoch": 8.449182658137882,
|
|
"grad_norm": 0.040045586116287835,
|
|
"learning_rate": 0.00014250625314325094,
|
|
"loss": 2.3888,
|
|
"step": 26750
|
|
},
|
|
{
|
|
"epoch": 8.450762062702362,
|
|
"grad_norm": 0.04978781860839173,
|
|
"learning_rate": 0.0001422226672324366,
|
|
"loss": 2.3644,
|
|
"step": 26755
|
|
},
|
|
{
|
|
"epoch": 8.45234146726684,
|
|
"grad_norm": 0.04652874495738621,
|
|
"learning_rate": 0.0001419393421662284,
|
|
"loss": 2.4269,
|
|
"step": 26760
|
|
},
|
|
{
|
|
"epoch": 8.45392087183132,
|
|
"grad_norm": 0.050196459481046826,
|
|
"learning_rate": 0.00014165627803078417,
|
|
"loss": 2.4805,
|
|
"step": 26765
|
|
},
|
|
{
|
|
"epoch": 8.4555002763958,
|
|
"grad_norm": 0.04059194596613595,
|
|
"learning_rate": 0.00014137347491218166,
|
|
"loss": 2.385,
|
|
"step": 26770
|
|
},
|
|
{
|
|
"epoch": 8.457079680960279,
|
|
"grad_norm": 0.052167719274323225,
|
|
"learning_rate": 0.0001410909328964193,
|
|
"loss": 2.3675,
|
|
"step": 26775
|
|
},
|
|
{
|
|
"epoch": 8.458659085524758,
|
|
"grad_norm": 0.043088470669438834,
|
|
"learning_rate": 0.00014080865206941674,
|
|
"loss": 2.3721,
|
|
"step": 26780
|
|
},
|
|
{
|
|
"epoch": 8.460238490089237,
|
|
"grad_norm": 0.043145268513182636,
|
|
"learning_rate": 0.0001405266325170136,
|
|
"loss": 2.3419,
|
|
"step": 26785
|
|
},
|
|
{
|
|
"epoch": 8.461817894653716,
|
|
"grad_norm": 0.04893717640972426,
|
|
"learning_rate": 0.00014024487432497012,
|
|
"loss": 2.4063,
|
|
"step": 26790
|
|
},
|
|
{
|
|
"epoch": 8.463397299218194,
|
|
"grad_norm": 0.046200764762835744,
|
|
"learning_rate": 0.00013996337757896725,
|
|
"loss": 2.3645,
|
|
"step": 26795
|
|
},
|
|
{
|
|
"epoch": 8.464976703782673,
|
|
"grad_norm": 0.05843755466813624,
|
|
"learning_rate": 0.00013968214236460618,
|
|
"loss": 2.3927,
|
|
"step": 26800
|
|
},
|
|
{
|
|
"epoch": 8.466556108347152,
|
|
"grad_norm": 0.05640577883199117,
|
|
"learning_rate": 0.00013940116876740905,
|
|
"loss": 2.4421,
|
|
"step": 26805
|
|
},
|
|
{
|
|
"epoch": 8.468135512911632,
|
|
"grad_norm": 0.04517329373689541,
|
|
"learning_rate": 0.00013912045687281793,
|
|
"loss": 2.2847,
|
|
"step": 26810
|
|
},
|
|
{
|
|
"epoch": 8.46971491747611,
|
|
"grad_norm": 0.04739957033206674,
|
|
"learning_rate": 0.00013884000676619545,
|
|
"loss": 2.3114,
|
|
"step": 26815
|
|
},
|
|
{
|
|
"epoch": 8.47129432204059,
|
|
"grad_norm": 0.043223701891898524,
|
|
"learning_rate": 0.00013855981853282495,
|
|
"loss": 2.3103,
|
|
"step": 26820
|
|
},
|
|
{
|
|
"epoch": 8.47287372660507,
|
|
"grad_norm": 0.04433846046078752,
|
|
"learning_rate": 0.0001382798922579096,
|
|
"loss": 2.3672,
|
|
"step": 26825
|
|
},
|
|
{
|
|
"epoch": 8.474453131169549,
|
|
"grad_norm": 0.04629210415232134,
|
|
"learning_rate": 0.00013800022802657342,
|
|
"loss": 2.553,
|
|
"step": 26830
|
|
},
|
|
{
|
|
"epoch": 8.476032535734028,
|
|
"grad_norm": 0.05006461672492424,
|
|
"learning_rate": 0.00013772082592386058,
|
|
"loss": 2.4208,
|
|
"step": 26835
|
|
},
|
|
{
|
|
"epoch": 8.477611940298507,
|
|
"grad_norm": 0.05271271109362467,
|
|
"learning_rate": 0.00013744168603473518,
|
|
"loss": 2.3943,
|
|
"step": 26840
|
|
},
|
|
{
|
|
"epoch": 8.479191344862986,
|
|
"grad_norm": 0.04604968648561785,
|
|
"learning_rate": 0.00013716280844408213,
|
|
"loss": 2.4582,
|
|
"step": 26845
|
|
},
|
|
{
|
|
"epoch": 8.480770749427466,
|
|
"grad_norm": 0.04884660446778068,
|
|
"learning_rate": 0.00013688419323670597,
|
|
"loss": 2.4209,
|
|
"step": 26850
|
|
},
|
|
{
|
|
"epoch": 8.482350153991945,
|
|
"grad_norm": 0.05080444628439175,
|
|
"learning_rate": 0.00013660584049733228,
|
|
"loss": 2.3041,
|
|
"step": 26855
|
|
},
|
|
{
|
|
"epoch": 8.483929558556424,
|
|
"grad_norm": 0.04919265879582246,
|
|
"learning_rate": 0.00013632775031060607,
|
|
"loss": 2.4452,
|
|
"step": 26860
|
|
},
|
|
{
|
|
"epoch": 8.485508963120903,
|
|
"grad_norm": 0.045609570661221,
|
|
"learning_rate": 0.00013604992276109262,
|
|
"loss": 2.2835,
|
|
"step": 26865
|
|
},
|
|
{
|
|
"epoch": 8.487088367685383,
|
|
"grad_norm": 0.05316964828168353,
|
|
"learning_rate": 0.00013577235793327792,
|
|
"loss": 2.4214,
|
|
"step": 26870
|
|
},
|
|
{
|
|
"epoch": 8.488667772249862,
|
|
"grad_norm": 0.063021037748091,
|
|
"learning_rate": 0.0001354950559115673,
|
|
"loss": 2.4789,
|
|
"step": 26875
|
|
},
|
|
{
|
|
"epoch": 8.490247176814341,
|
|
"grad_norm": 0.04980120465793191,
|
|
"learning_rate": 0.0001352180167802871,
|
|
"loss": 2.4388,
|
|
"step": 26880
|
|
},
|
|
{
|
|
"epoch": 8.49182658137882,
|
|
"grad_norm": 0.05399731781967924,
|
|
"learning_rate": 0.00013494124062368262,
|
|
"loss": 2.3671,
|
|
"step": 26885
|
|
},
|
|
{
|
|
"epoch": 8.4934059859433,
|
|
"grad_norm": 0.04841454391886285,
|
|
"learning_rate": 0.00013466472752591952,
|
|
"loss": 2.3689,
|
|
"step": 26890
|
|
},
|
|
{
|
|
"epoch": 8.494985390507779,
|
|
"grad_norm": 0.03998517085400193,
|
|
"learning_rate": 0.0001343884775710843,
|
|
"loss": 2.3471,
|
|
"step": 26895
|
|
},
|
|
{
|
|
"epoch": 8.496564795072258,
|
|
"grad_norm": 0.04565734698974977,
|
|
"learning_rate": 0.00013411249084318246,
|
|
"loss": 2.3439,
|
|
"step": 26900
|
|
},
|
|
{
|
|
"epoch": 8.498144199636737,
|
|
"grad_norm": 0.04601453715272313,
|
|
"learning_rate": 0.0001338367674261397,
|
|
"loss": 2.3804,
|
|
"step": 26905
|
|
},
|
|
{
|
|
"epoch": 8.499723604201217,
|
|
"grad_norm": 0.11609018795950421,
|
|
"learning_rate": 0.00013356130740380202,
|
|
"loss": 2.439,
|
|
"step": 26910
|
|
},
|
|
{
|
|
"epoch": 8.501303008765696,
|
|
"grad_norm": 0.045572490502842455,
|
|
"learning_rate": 0.0001332861108599348,
|
|
"loss": 2.3025,
|
|
"step": 26915
|
|
},
|
|
{
|
|
"epoch": 8.502882413330175,
|
|
"grad_norm": 0.05861871484665386,
|
|
"learning_rate": 0.0001330111778782238,
|
|
"loss": 2.4143,
|
|
"step": 26920
|
|
},
|
|
{
|
|
"epoch": 8.504461817894654,
|
|
"grad_norm": 0.04635822578408009,
|
|
"learning_rate": 0.00013273650854227437,
|
|
"loss": 2.4446,
|
|
"step": 26925
|
|
},
|
|
{
|
|
"epoch": 8.506041222459134,
|
|
"grad_norm": 0.053073309477083,
|
|
"learning_rate": 0.00013246210293561144,
|
|
"loss": 2.4323,
|
|
"step": 26930
|
|
},
|
|
{
|
|
"epoch": 8.507620627023613,
|
|
"grad_norm": 0.04600629854742416,
|
|
"learning_rate": 0.0001321879611416803,
|
|
"loss": 2.4094,
|
|
"step": 26935
|
|
},
|
|
{
|
|
"epoch": 8.509200031588092,
|
|
"grad_norm": 0.05531048033616932,
|
|
"learning_rate": 0.00013191408324384523,
|
|
"loss": 2.4388,
|
|
"step": 26940
|
|
},
|
|
{
|
|
"epoch": 8.510779436152571,
|
|
"grad_norm": 0.04913565330415894,
|
|
"learning_rate": 0.0001316404693253914,
|
|
"loss": 2.4353,
|
|
"step": 26945
|
|
},
|
|
{
|
|
"epoch": 8.512358840717049,
|
|
"grad_norm": 0.05304389817746647,
|
|
"learning_rate": 0.00013136711946952273,
|
|
"loss": 2.4191,
|
|
"step": 26950
|
|
},
|
|
{
|
|
"epoch": 8.51393824528153,
|
|
"grad_norm": 0.04397066264998692,
|
|
"learning_rate": 0.000131094033759363,
|
|
"loss": 2.3114,
|
|
"step": 26955
|
|
},
|
|
{
|
|
"epoch": 8.515517649846007,
|
|
"grad_norm": 0.04816854374254131,
|
|
"learning_rate": 0.00013082121227795619,
|
|
"loss": 2.3708,
|
|
"step": 26960
|
|
},
|
|
{
|
|
"epoch": 8.517097054410486,
|
|
"grad_norm": 0.04685898216009314,
|
|
"learning_rate": 0.00013054865510826508,
|
|
"loss": 2.4369,
|
|
"step": 26965
|
|
},
|
|
{
|
|
"epoch": 8.518676458974966,
|
|
"grad_norm": 0.05330126054735967,
|
|
"learning_rate": 0.00013027636233317342,
|
|
"loss": 2.3684,
|
|
"step": 26970
|
|
},
|
|
{
|
|
"epoch": 8.520255863539445,
|
|
"grad_norm": 0.04876095077472937,
|
|
"learning_rate": 0.00013000433403548295,
|
|
"loss": 2.4019,
|
|
"step": 26975
|
|
},
|
|
{
|
|
"epoch": 8.521835268103924,
|
|
"grad_norm": 0.047721082130814954,
|
|
"learning_rate": 0.00012973257029791563,
|
|
"loss": 2.4544,
|
|
"step": 26980
|
|
},
|
|
{
|
|
"epoch": 8.523414672668403,
|
|
"grad_norm": 0.05133114655764637,
|
|
"learning_rate": 0.00012946107120311368,
|
|
"loss": 2.4207,
|
|
"step": 26985
|
|
},
|
|
{
|
|
"epoch": 8.524994077232883,
|
|
"grad_norm": 0.04997269340629122,
|
|
"learning_rate": 0.00012918983683363772,
|
|
"loss": 2.46,
|
|
"step": 26990
|
|
},
|
|
{
|
|
"epoch": 8.526573481797362,
|
|
"grad_norm": 0.048481489112942415,
|
|
"learning_rate": 0.0001289188672719689,
|
|
"loss": 2.3359,
|
|
"step": 26995
|
|
},
|
|
{
|
|
"epoch": 8.528152886361841,
|
|
"grad_norm": 0.0496135719248878,
|
|
"learning_rate": 0.00012864816260050693,
|
|
"loss": 2.3773,
|
|
"step": 27000
|
|
},
|
|
{
|
|
"epoch": 8.52973229092632,
|
|
"grad_norm": 0.04479886542411035,
|
|
"learning_rate": 0.00012837772290157133,
|
|
"loss": 2.4574,
|
|
"step": 27005
|
|
},
|
|
{
|
|
"epoch": 8.5313116954908,
|
|
"grad_norm": 0.07687684708137763,
|
|
"learning_rate": 0.00012810754825740144,
|
|
"loss": 2.3684,
|
|
"step": 27010
|
|
},
|
|
{
|
|
"epoch": 8.532891100055279,
|
|
"grad_norm": 0.04561187302439505,
|
|
"learning_rate": 0.00012783763875015542,
|
|
"loss": 2.3255,
|
|
"step": 27015
|
|
},
|
|
{
|
|
"epoch": 8.534470504619758,
|
|
"grad_norm": 0.04867719139249393,
|
|
"learning_rate": 0.00012756799446191113,
|
|
"loss": 2.515,
|
|
"step": 27020
|
|
},
|
|
{
|
|
"epoch": 8.536049909184237,
|
|
"grad_norm": 0.04745768862703271,
|
|
"learning_rate": 0.0001272986154746656,
|
|
"loss": 2.3639,
|
|
"step": 27025
|
|
},
|
|
{
|
|
"epoch": 8.537629313748717,
|
|
"grad_norm": 0.049244802139073084,
|
|
"learning_rate": 0.00012702950187033502,
|
|
"loss": 2.3926,
|
|
"step": 27030
|
|
},
|
|
{
|
|
"epoch": 8.539208718313196,
|
|
"grad_norm": 0.04750145665638371,
|
|
"learning_rate": 0.00012676065373075552,
|
|
"loss": 2.4717,
|
|
"step": 27035
|
|
},
|
|
{
|
|
"epoch": 8.540788122877675,
|
|
"grad_norm": 0.04806728344499754,
|
|
"learning_rate": 0.00012649207113768203,
|
|
"loss": 2.363,
|
|
"step": 27040
|
|
},
|
|
{
|
|
"epoch": 8.542367527442154,
|
|
"grad_norm": 0.04496497615356363,
|
|
"learning_rate": 0.00012622375417278842,
|
|
"loss": 2.3572,
|
|
"step": 27045
|
|
},
|
|
{
|
|
"epoch": 8.543946932006634,
|
|
"grad_norm": 0.04310426247596021,
|
|
"learning_rate": 0.00012595570291766878,
|
|
"loss": 2.36,
|
|
"step": 27050
|
|
},
|
|
{
|
|
"epoch": 8.545526336571113,
|
|
"grad_norm": 0.04670895850589334,
|
|
"learning_rate": 0.00012568791745383513,
|
|
"loss": 2.3836,
|
|
"step": 27055
|
|
},
|
|
{
|
|
"epoch": 8.547105741135592,
|
|
"grad_norm": 0.04226904425227385,
|
|
"learning_rate": 0.0001254203978627201,
|
|
"loss": 2.3702,
|
|
"step": 27060
|
|
},
|
|
{
|
|
"epoch": 8.548685145700071,
|
|
"grad_norm": 0.04512397132344977,
|
|
"learning_rate": 0.00012515314422567402,
|
|
"loss": 2.3634,
|
|
"step": 27065
|
|
},
|
|
{
|
|
"epoch": 8.55026455026455,
|
|
"grad_norm": 0.04799706807324663,
|
|
"learning_rate": 0.00012488615662396707,
|
|
"loss": 2.3858,
|
|
"step": 27070
|
|
},
|
|
{
|
|
"epoch": 8.55184395482903,
|
|
"grad_norm": 0.055867285713436535,
|
|
"learning_rate": 0.00012461943513878882,
|
|
"loss": 2.5932,
|
|
"step": 27075
|
|
},
|
|
{
|
|
"epoch": 8.55342335939351,
|
|
"grad_norm": 0.043694439654936905,
|
|
"learning_rate": 0.00012435297985124717,
|
|
"loss": 2.5261,
|
|
"step": 27080
|
|
},
|
|
{
|
|
"epoch": 8.555002763957988,
|
|
"grad_norm": 0.05598575873544264,
|
|
"learning_rate": 0.00012408679084236984,
|
|
"loss": 2.4595,
|
|
"step": 27085
|
|
},
|
|
{
|
|
"epoch": 8.556582168522468,
|
|
"grad_norm": 0.047626173937539366,
|
|
"learning_rate": 0.00012382086819310312,
|
|
"loss": 2.3446,
|
|
"step": 27090
|
|
},
|
|
{
|
|
"epoch": 8.558161573086947,
|
|
"grad_norm": 0.04451372643455489,
|
|
"learning_rate": 0.00012355521198431207,
|
|
"loss": 2.4342,
|
|
"step": 27095
|
|
},
|
|
{
|
|
"epoch": 8.559740977651426,
|
|
"grad_norm": 0.046612085781107815,
|
|
"learning_rate": 0.00012328982229678153,
|
|
"loss": 2.4306,
|
|
"step": 27100
|
|
},
|
|
{
|
|
"epoch": 8.561320382215905,
|
|
"grad_norm": 0.05063744861099408,
|
|
"learning_rate": 0.00012302469921121462,
|
|
"loss": 2.4076,
|
|
"step": 27105
|
|
},
|
|
{
|
|
"epoch": 8.562899786780385,
|
|
"grad_norm": 0.04704547000760339,
|
|
"learning_rate": 0.0001227598428082335,
|
|
"loss": 2.359,
|
|
"step": 27110
|
|
},
|
|
{
|
|
"epoch": 8.564479191344862,
|
|
"grad_norm": 0.054329836868169466,
|
|
"learning_rate": 0.00012249525316837927,
|
|
"loss": 2.4404,
|
|
"step": 27115
|
|
},
|
|
{
|
|
"epoch": 8.566058595909341,
|
|
"grad_norm": 0.05317607530238786,
|
|
"learning_rate": 0.00012223093037211187,
|
|
"loss": 2.3496,
|
|
"step": 27120
|
|
},
|
|
{
|
|
"epoch": 8.56763800047382,
|
|
"grad_norm": 0.04136473952865533,
|
|
"learning_rate": 0.00012196687449981047,
|
|
"loss": 2.431,
|
|
"step": 27125
|
|
},
|
|
{
|
|
"epoch": 8.5692174050383,
|
|
"grad_norm": 0.043318270853819764,
|
|
"learning_rate": 0.00012170308563177268,
|
|
"loss": 2.3985,
|
|
"step": 27130
|
|
},
|
|
{
|
|
"epoch": 8.57079680960278,
|
|
"grad_norm": 0.04751826112051017,
|
|
"learning_rate": 0.00012143956384821476,
|
|
"loss": 2.4557,
|
|
"step": 27135
|
|
},
|
|
{
|
|
"epoch": 8.572376214167258,
|
|
"grad_norm": 0.042756837797052694,
|
|
"learning_rate": 0.00012117630922927236,
|
|
"loss": 2.3584,
|
|
"step": 27140
|
|
},
|
|
{
|
|
"epoch": 8.573955618731738,
|
|
"grad_norm": 0.04847605148089578,
|
|
"learning_rate": 0.00012091332185499915,
|
|
"loss": 2.3624,
|
|
"step": 27145
|
|
},
|
|
{
|
|
"epoch": 8.575535023296217,
|
|
"grad_norm": 0.04483130882326042,
|
|
"learning_rate": 0.00012065060180536858,
|
|
"loss": 2.5897,
|
|
"step": 27150
|
|
},
|
|
{
|
|
"epoch": 8.577114427860696,
|
|
"grad_norm": 0.048787954003955754,
|
|
"learning_rate": 0.00012038814916027141,
|
|
"loss": 2.4003,
|
|
"step": 27155
|
|
},
|
|
{
|
|
"epoch": 8.578693832425175,
|
|
"grad_norm": 0.05199760419375112,
|
|
"learning_rate": 0.00012012596399951791,
|
|
"loss": 2.3965,
|
|
"step": 27160
|
|
},
|
|
{
|
|
"epoch": 8.580273236989655,
|
|
"grad_norm": 0.044039090468451725,
|
|
"learning_rate": 0.00011986404640283732,
|
|
"loss": 2.365,
|
|
"step": 27165
|
|
},
|
|
{
|
|
"epoch": 8.581852641554134,
|
|
"grad_norm": 0.052250016311700974,
|
|
"learning_rate": 0.0001196023964498767,
|
|
"loss": 2.3715,
|
|
"step": 27170
|
|
},
|
|
{
|
|
"epoch": 8.583432046118613,
|
|
"grad_norm": 0.04163668584358668,
|
|
"learning_rate": 0.00011934101422020238,
|
|
"loss": 2.3493,
|
|
"step": 27175
|
|
},
|
|
{
|
|
"epoch": 8.585011450683092,
|
|
"grad_norm": 0.04326699889371583,
|
|
"learning_rate": 0.00011907989979329904,
|
|
"loss": 2.3631,
|
|
"step": 27180
|
|
},
|
|
{
|
|
"epoch": 8.586590855247572,
|
|
"grad_norm": 0.04830817975399211,
|
|
"learning_rate": 0.00011881905324856967,
|
|
"loss": 2.3557,
|
|
"step": 27185
|
|
},
|
|
{
|
|
"epoch": 8.58817025981205,
|
|
"grad_norm": 0.042318808544985625,
|
|
"learning_rate": 0.00011855847466533632,
|
|
"loss": 2.353,
|
|
"step": 27190
|
|
},
|
|
{
|
|
"epoch": 8.58974966437653,
|
|
"grad_norm": 0.049810077206891894,
|
|
"learning_rate": 0.00011829816412283911,
|
|
"loss": 2.3398,
|
|
"step": 27195
|
|
},
|
|
{
|
|
"epoch": 8.59132906894101,
|
|
"grad_norm": 0.04226567704517314,
|
|
"learning_rate": 0.00011803812170023687,
|
|
"loss": 2.4034,
|
|
"step": 27200
|
|
},
|
|
{
|
|
"epoch": 8.592908473505489,
|
|
"grad_norm": 0.058126044736796,
|
|
"learning_rate": 0.00011777834747660676,
|
|
"loss": 2.4602,
|
|
"step": 27205
|
|
},
|
|
{
|
|
"epoch": 8.594487878069968,
|
|
"grad_norm": 0.04136207040910559,
|
|
"learning_rate": 0.00011751884153094438,
|
|
"loss": 2.3869,
|
|
"step": 27210
|
|
},
|
|
{
|
|
"epoch": 8.596067282634447,
|
|
"grad_norm": 0.053233027417794544,
|
|
"learning_rate": 0.00011725960394216418,
|
|
"loss": 2.4308,
|
|
"step": 27215
|
|
},
|
|
{
|
|
"epoch": 8.597646687198926,
|
|
"grad_norm": 0.04607223624915819,
|
|
"learning_rate": 0.00011700063478909817,
|
|
"loss": 2.2557,
|
|
"step": 27220
|
|
},
|
|
{
|
|
"epoch": 8.599226091763406,
|
|
"grad_norm": 0.04288248737709212,
|
|
"learning_rate": 0.00011674193415049772,
|
|
"loss": 2.443,
|
|
"step": 27225
|
|
},
|
|
{
|
|
"epoch": 8.600805496327885,
|
|
"grad_norm": 0.057320515749170395,
|
|
"learning_rate": 0.00011648350210503178,
|
|
"loss": 2.4144,
|
|
"step": 27230
|
|
},
|
|
{
|
|
"epoch": 8.602384900892364,
|
|
"grad_norm": 0.04733134712064031,
|
|
"learning_rate": 0.00011622533873128771,
|
|
"loss": 2.344,
|
|
"step": 27235
|
|
},
|
|
{
|
|
"epoch": 8.603964305456843,
|
|
"grad_norm": 0.04635166920196346,
|
|
"learning_rate": 0.00011596744410777205,
|
|
"loss": 2.3624,
|
|
"step": 27240
|
|
},
|
|
{
|
|
"epoch": 8.605543710021323,
|
|
"grad_norm": 0.0441457813955413,
|
|
"learning_rate": 0.00011570981831290805,
|
|
"loss": 2.4046,
|
|
"step": 27245
|
|
},
|
|
{
|
|
"epoch": 8.607123114585802,
|
|
"grad_norm": 0.05363622936563738,
|
|
"learning_rate": 0.0001154524614250383,
|
|
"loss": 2.3747,
|
|
"step": 27250
|
|
},
|
|
{
|
|
"epoch": 8.608702519150281,
|
|
"grad_norm": 0.0578032377866284,
|
|
"learning_rate": 0.0001151953735224236,
|
|
"loss": 2.4532,
|
|
"step": 27255
|
|
},
|
|
{
|
|
"epoch": 8.61028192371476,
|
|
"grad_norm": 0.04490790333327239,
|
|
"learning_rate": 0.00011493855468324255,
|
|
"loss": 2.5246,
|
|
"step": 27260
|
|
},
|
|
{
|
|
"epoch": 8.61186132827924,
|
|
"grad_norm": 0.05386919061929367,
|
|
"learning_rate": 0.00011468200498559234,
|
|
"loss": 2.3122,
|
|
"step": 27265
|
|
},
|
|
{
|
|
"epoch": 8.613440732843717,
|
|
"grad_norm": 0.04117088259884821,
|
|
"learning_rate": 0.00011442572450748801,
|
|
"loss": 2.3539,
|
|
"step": 27270
|
|
},
|
|
{
|
|
"epoch": 8.615020137408198,
|
|
"grad_norm": 0.04948064248550165,
|
|
"learning_rate": 0.00011416971332686243,
|
|
"loss": 2.4723,
|
|
"step": 27275
|
|
},
|
|
{
|
|
"epoch": 8.616599541972676,
|
|
"grad_norm": 0.04401771619064978,
|
|
"learning_rate": 0.00011391397152156768,
|
|
"loss": 2.3534,
|
|
"step": 27280
|
|
},
|
|
{
|
|
"epoch": 8.618178946537155,
|
|
"grad_norm": 0.07132376617845432,
|
|
"learning_rate": 0.00011365849916937276,
|
|
"loss": 2.4067,
|
|
"step": 27285
|
|
},
|
|
{
|
|
"epoch": 8.619758351101634,
|
|
"grad_norm": 0.052906899995070006,
|
|
"learning_rate": 0.0001134032963479652,
|
|
"loss": 2.3107,
|
|
"step": 27290
|
|
},
|
|
{
|
|
"epoch": 8.621337755666113,
|
|
"grad_norm": 0.04631271081941863,
|
|
"learning_rate": 0.00011314836313495069,
|
|
"loss": 2.4194,
|
|
"step": 27295
|
|
},
|
|
{
|
|
"epoch": 8.622917160230593,
|
|
"grad_norm": 0.055879728433552676,
|
|
"learning_rate": 0.00011289369960785234,
|
|
"loss": 2.422,
|
|
"step": 27300
|
|
},
|
|
{
|
|
"epoch": 8.624496564795072,
|
|
"grad_norm": 0.045166759824474614,
|
|
"learning_rate": 0.00011263930584411242,
|
|
"loss": 2.3003,
|
|
"step": 27305
|
|
},
|
|
{
|
|
"epoch": 8.626075969359551,
|
|
"grad_norm": 0.0438466195263903,
|
|
"learning_rate": 0.00011238518192108982,
|
|
"loss": 2.437,
|
|
"step": 27310
|
|
},
|
|
{
|
|
"epoch": 8.62765537392403,
|
|
"grad_norm": 0.050246720491075134,
|
|
"learning_rate": 0.00011213132791606251,
|
|
"loss": 2.3956,
|
|
"step": 27315
|
|
},
|
|
{
|
|
"epoch": 8.62923477848851,
|
|
"grad_norm": 0.04891591637877597,
|
|
"learning_rate": 0.00011187774390622563,
|
|
"loss": 2.4418,
|
|
"step": 27320
|
|
},
|
|
{
|
|
"epoch": 8.630814183052989,
|
|
"grad_norm": 0.05314097459143172,
|
|
"learning_rate": 0.00011162442996869215,
|
|
"loss": 2.3765,
|
|
"step": 27325
|
|
},
|
|
{
|
|
"epoch": 8.632393587617468,
|
|
"grad_norm": 0.04941419266303433,
|
|
"learning_rate": 0.00011137138618049402,
|
|
"loss": 2.4047,
|
|
"step": 27330
|
|
},
|
|
{
|
|
"epoch": 8.633972992181947,
|
|
"grad_norm": 0.04342886656314699,
|
|
"learning_rate": 0.00011111861261857958,
|
|
"loss": 2.3687,
|
|
"step": 27335
|
|
},
|
|
{
|
|
"epoch": 8.635552396746426,
|
|
"grad_norm": 0.042425583545525086,
|
|
"learning_rate": 0.00011086610935981556,
|
|
"loss": 2.3883,
|
|
"step": 27340
|
|
},
|
|
{
|
|
"epoch": 8.637131801310906,
|
|
"grad_norm": 0.048788257732269476,
|
|
"learning_rate": 0.00011061387648098708,
|
|
"loss": 2.426,
|
|
"step": 27345
|
|
},
|
|
{
|
|
"epoch": 8.638711205875385,
|
|
"grad_norm": 0.054371736603684495,
|
|
"learning_rate": 0.00011036191405879614,
|
|
"loss": 2.4437,
|
|
"step": 27350
|
|
},
|
|
{
|
|
"epoch": 8.640290610439864,
|
|
"grad_norm": 0.046904955329134176,
|
|
"learning_rate": 0.00011011022216986322,
|
|
"loss": 2.5424,
|
|
"step": 27355
|
|
},
|
|
{
|
|
"epoch": 8.641870015004343,
|
|
"grad_norm": 0.0442531954142581,
|
|
"learning_rate": 0.00010985880089072608,
|
|
"loss": 2.3574,
|
|
"step": 27360
|
|
},
|
|
{
|
|
"epoch": 8.643449419568823,
|
|
"grad_norm": 0.04775785352655346,
|
|
"learning_rate": 0.00010960765029784015,
|
|
"loss": 2.382,
|
|
"step": 27365
|
|
},
|
|
{
|
|
"epoch": 8.645028824133302,
|
|
"grad_norm": 0.04706402749975755,
|
|
"learning_rate": 0.00010935677046757907,
|
|
"loss": 2.3971,
|
|
"step": 27370
|
|
},
|
|
{
|
|
"epoch": 8.646608228697781,
|
|
"grad_norm": 0.05756185716173598,
|
|
"learning_rate": 0.00010910616147623365,
|
|
"loss": 2.3354,
|
|
"step": 27375
|
|
},
|
|
{
|
|
"epoch": 8.64818763326226,
|
|
"grad_norm": 0.04188256178005546,
|
|
"learning_rate": 0.00010885582340001243,
|
|
"loss": 2.3628,
|
|
"step": 27380
|
|
},
|
|
{
|
|
"epoch": 8.64976703782674,
|
|
"grad_norm": 0.04607030317523363,
|
|
"learning_rate": 0.00010860575631504155,
|
|
"loss": 2.3739,
|
|
"step": 27385
|
|
},
|
|
{
|
|
"epoch": 8.651346442391219,
|
|
"grad_norm": 0.043954050005782654,
|
|
"learning_rate": 0.00010835596029736484,
|
|
"loss": 2.3688,
|
|
"step": 27390
|
|
},
|
|
{
|
|
"epoch": 8.652925846955698,
|
|
"grad_norm": 0.047380854160769884,
|
|
"learning_rate": 0.00010810643542294385,
|
|
"loss": 2.3996,
|
|
"step": 27395
|
|
},
|
|
{
|
|
"epoch": 8.654505251520177,
|
|
"grad_norm": 0.05130458026043142,
|
|
"learning_rate": 0.00010785718176765713,
|
|
"loss": 2.3342,
|
|
"step": 27400
|
|
},
|
|
{
|
|
"epoch": 8.656084656084657,
|
|
"grad_norm": 0.04409781789315218,
|
|
"learning_rate": 0.00010760819940730171,
|
|
"loss": 2.317,
|
|
"step": 27405
|
|
},
|
|
{
|
|
"epoch": 8.657664060649136,
|
|
"grad_norm": 0.04764750498622033,
|
|
"learning_rate": 0.00010735948841759113,
|
|
"loss": 2.4534,
|
|
"step": 27410
|
|
},
|
|
{
|
|
"epoch": 8.659243465213615,
|
|
"grad_norm": 0.04451771256488658,
|
|
"learning_rate": 0.00010711104887415669,
|
|
"loss": 2.3672,
|
|
"step": 27415
|
|
},
|
|
{
|
|
"epoch": 8.660822869778094,
|
|
"grad_norm": 0.039178296466845196,
|
|
"learning_rate": 0.00010686288085254781,
|
|
"loss": 2.2947,
|
|
"step": 27420
|
|
},
|
|
{
|
|
"epoch": 8.662402274342572,
|
|
"grad_norm": 0.054099678943141875,
|
|
"learning_rate": 0.00010661498442823014,
|
|
"loss": 2.5264,
|
|
"step": 27425
|
|
},
|
|
{
|
|
"epoch": 8.663981678907053,
|
|
"grad_norm": 0.04901490624167286,
|
|
"learning_rate": 0.00010636735967658784,
|
|
"loss": 2.3274,
|
|
"step": 27430
|
|
},
|
|
{
|
|
"epoch": 8.66556108347153,
|
|
"grad_norm": 0.044666537972616924,
|
|
"learning_rate": 0.00010612000667292188,
|
|
"loss": 2.3824,
|
|
"step": 27435
|
|
},
|
|
{
|
|
"epoch": 8.66714048803601,
|
|
"grad_norm": 0.04843337262954962,
|
|
"learning_rate": 0.00010587292549245064,
|
|
"loss": 2.3741,
|
|
"step": 27440
|
|
},
|
|
{
|
|
"epoch": 8.668719892600489,
|
|
"grad_norm": 0.04634279589793078,
|
|
"learning_rate": 0.00010562611621031015,
|
|
"loss": 2.3499,
|
|
"step": 27445
|
|
},
|
|
{
|
|
"epoch": 8.670299297164968,
|
|
"grad_norm": 0.04298136264392165,
|
|
"learning_rate": 0.00010537957890155336,
|
|
"loss": 2.4299,
|
|
"step": 27450
|
|
},
|
|
{
|
|
"epoch": 8.671878701729447,
|
|
"grad_norm": 0.04636239404178666,
|
|
"learning_rate": 0.00010513331364115052,
|
|
"loss": 2.4993,
|
|
"step": 27455
|
|
},
|
|
{
|
|
"epoch": 8.673458106293927,
|
|
"grad_norm": 0.041895038972217795,
|
|
"learning_rate": 0.00010488732050398986,
|
|
"loss": 2.3574,
|
|
"step": 27460
|
|
},
|
|
{
|
|
"epoch": 8.675037510858406,
|
|
"grad_norm": 0.06384282552993045,
|
|
"learning_rate": 0.00010464159956487595,
|
|
"loss": 2.46,
|
|
"step": 27465
|
|
},
|
|
{
|
|
"epoch": 8.676616915422885,
|
|
"grad_norm": 0.04789644020746819,
|
|
"learning_rate": 0.00010439615089853094,
|
|
"loss": 2.4163,
|
|
"step": 27470
|
|
},
|
|
{
|
|
"epoch": 8.678196319987364,
|
|
"grad_norm": 0.04561918769867514,
|
|
"learning_rate": 0.00010415097457959432,
|
|
"loss": 2.5047,
|
|
"step": 27475
|
|
},
|
|
{
|
|
"epoch": 8.679775724551844,
|
|
"grad_norm": 0.05114824193321859,
|
|
"learning_rate": 0.00010390607068262248,
|
|
"loss": 2.4126,
|
|
"step": 27480
|
|
},
|
|
{
|
|
"epoch": 8.681355129116323,
|
|
"grad_norm": 0.05406059632662854,
|
|
"learning_rate": 0.00010366143928208938,
|
|
"loss": 2.3904,
|
|
"step": 27485
|
|
},
|
|
{
|
|
"epoch": 8.682934533680802,
|
|
"grad_norm": 0.049908841207682936,
|
|
"learning_rate": 0.00010341708045238552,
|
|
"loss": 2.3692,
|
|
"step": 27490
|
|
},
|
|
{
|
|
"epoch": 8.684513938245281,
|
|
"grad_norm": 0.04737961789122741,
|
|
"learning_rate": 0.00010317299426781923,
|
|
"loss": 2.3752,
|
|
"step": 27495
|
|
},
|
|
{
|
|
"epoch": 8.68609334280976,
|
|
"grad_norm": 0.04405464190125366,
|
|
"learning_rate": 0.00010292918080261537,
|
|
"loss": 2.4539,
|
|
"step": 27500
|
|
},
|
|
{
|
|
"epoch": 8.68767274737424,
|
|
"grad_norm": 0.04905899822998163,
|
|
"learning_rate": 0.00010268564013091596,
|
|
"loss": 2.4646,
|
|
"step": 27505
|
|
},
|
|
{
|
|
"epoch": 8.68925215193872,
|
|
"grad_norm": 0.06104583394465677,
|
|
"learning_rate": 0.00010244237232678066,
|
|
"loss": 2.4228,
|
|
"step": 27510
|
|
},
|
|
{
|
|
"epoch": 8.690831556503198,
|
|
"grad_norm": 0.0513316924495129,
|
|
"learning_rate": 0.00010219937746418495,
|
|
"loss": 2.4072,
|
|
"step": 27515
|
|
},
|
|
{
|
|
"epoch": 8.692410961067678,
|
|
"grad_norm": 0.04766972525437872,
|
|
"learning_rate": 0.0001019566556170225,
|
|
"loss": 2.4783,
|
|
"step": 27520
|
|
},
|
|
{
|
|
"epoch": 8.693990365632157,
|
|
"grad_norm": 0.054019712486918475,
|
|
"learning_rate": 0.00010171420685910326,
|
|
"loss": 2.4326,
|
|
"step": 27525
|
|
},
|
|
{
|
|
"epoch": 8.695569770196636,
|
|
"grad_norm": 0.041842413914295176,
|
|
"learning_rate": 0.00010147203126415428,
|
|
"loss": 2.329,
|
|
"step": 27530
|
|
},
|
|
{
|
|
"epoch": 8.697149174761115,
|
|
"grad_norm": 0.049855000006414546,
|
|
"learning_rate": 0.00010123012890581983,
|
|
"loss": 2.4639,
|
|
"step": 27535
|
|
},
|
|
{
|
|
"epoch": 8.698728579325595,
|
|
"grad_norm": 0.05850871822413731,
|
|
"learning_rate": 0.00010098849985766068,
|
|
"loss": 2.3149,
|
|
"step": 27540
|
|
},
|
|
{
|
|
"epoch": 8.700307983890074,
|
|
"grad_norm": 0.05523500580028637,
|
|
"learning_rate": 0.000100747144193155,
|
|
"loss": 2.4291,
|
|
"step": 27545
|
|
},
|
|
{
|
|
"epoch": 8.701887388454553,
|
|
"grad_norm": 0.04170201774340248,
|
|
"learning_rate": 0.00010050606198569723,
|
|
"loss": 2.4053,
|
|
"step": 27550
|
|
},
|
|
{
|
|
"epoch": 8.703466793019032,
|
|
"grad_norm": 0.05083914755449448,
|
|
"learning_rate": 0.00010026525330859903,
|
|
"loss": 2.3439,
|
|
"step": 27555
|
|
},
|
|
{
|
|
"epoch": 8.705046197583512,
|
|
"grad_norm": 0.047049094641974425,
|
|
"learning_rate": 0.00010002471823508864,
|
|
"loss": 2.441,
|
|
"step": 27560
|
|
},
|
|
{
|
|
"epoch": 8.70662560214799,
|
|
"grad_norm": 0.049698552700892414,
|
|
"learning_rate": 9.97844568383114e-05,
|
|
"loss": 2.3274,
|
|
"step": 27565
|
|
},
|
|
{
|
|
"epoch": 8.70820500671247,
|
|
"grad_norm": 0.04427067432949184,
|
|
"learning_rate": 9.954446919132899e-05,
|
|
"loss": 2.429,
|
|
"step": 27570
|
|
},
|
|
{
|
|
"epoch": 8.70978441127695,
|
|
"grad_norm": 0.04320417937956996,
|
|
"learning_rate": 9.930475536712057e-05,
|
|
"loss": 2.4198,
|
|
"step": 27575
|
|
},
|
|
{
|
|
"epoch": 8.711363815841429,
|
|
"grad_norm": 0.041329497032012934,
|
|
"learning_rate": 9.90653154385811e-05,
|
|
"loss": 2.4125,
|
|
"step": 27580
|
|
},
|
|
{
|
|
"epoch": 8.712943220405908,
|
|
"grad_norm": 0.04029998989493044,
|
|
"learning_rate": 9.882614947852319e-05,
|
|
"loss": 2.3716,
|
|
"step": 27585
|
|
},
|
|
{
|
|
"epoch": 8.714522624970385,
|
|
"grad_norm": 0.04127556580376296,
|
|
"learning_rate": 9.858725755967546e-05,
|
|
"loss": 2.3834,
|
|
"step": 27590
|
|
},
|
|
{
|
|
"epoch": 8.716102029534865,
|
|
"grad_norm": 0.04684041591857787,
|
|
"learning_rate": 9.834863975468322e-05,
|
|
"loss": 2.5174,
|
|
"step": 27595
|
|
},
|
|
{
|
|
"epoch": 8.717681434099344,
|
|
"grad_norm": 0.04554218645399579,
|
|
"learning_rate": 9.811029613610912e-05,
|
|
"loss": 2.4288,
|
|
"step": 27600
|
|
},
|
|
{
|
|
"epoch": 8.719260838663823,
|
|
"grad_norm": 0.044368508354788026,
|
|
"learning_rate": 9.787222677643137e-05,
|
|
"loss": 2.3462,
|
|
"step": 27605
|
|
},
|
|
{
|
|
"epoch": 8.720840243228302,
|
|
"grad_norm": 0.05312336467108467,
|
|
"learning_rate": 9.763443174804576e-05,
|
|
"loss": 2.411,
|
|
"step": 27610
|
|
},
|
|
{
|
|
"epoch": 8.722419647792782,
|
|
"grad_norm": 0.041609962188932766,
|
|
"learning_rate": 9.73969111232641e-05,
|
|
"loss": 2.4051,
|
|
"step": 27615
|
|
},
|
|
{
|
|
"epoch": 8.72399905235726,
|
|
"grad_norm": 0.04773125756521095,
|
|
"learning_rate": 9.715966497431461e-05,
|
|
"loss": 2.2886,
|
|
"step": 27620
|
|
},
|
|
{
|
|
"epoch": 8.72557845692174,
|
|
"grad_norm": 0.04503554984633216,
|
|
"learning_rate": 9.692269337334281e-05,
|
|
"loss": 2.3965,
|
|
"step": 27625
|
|
},
|
|
{
|
|
"epoch": 8.72715786148622,
|
|
"grad_norm": 0.05132758840218605,
|
|
"learning_rate": 9.668599639240993e-05,
|
|
"loss": 2.3531,
|
|
"step": 27630
|
|
},
|
|
{
|
|
"epoch": 8.728737266050699,
|
|
"grad_norm": 0.04774843040892377,
|
|
"learning_rate": 9.64495741034942e-05,
|
|
"loss": 2.502,
|
|
"step": 27635
|
|
},
|
|
{
|
|
"epoch": 8.730316670615178,
|
|
"grad_norm": 0.04329382389998516,
|
|
"learning_rate": 9.621342657849008e-05,
|
|
"loss": 2.3854,
|
|
"step": 27640
|
|
},
|
|
{
|
|
"epoch": 8.731896075179657,
|
|
"grad_norm": 0.04246665809880522,
|
|
"learning_rate": 9.597755388920849e-05,
|
|
"loss": 2.3339,
|
|
"step": 27645
|
|
},
|
|
{
|
|
"epoch": 8.733475479744136,
|
|
"grad_norm": 0.043336216890366264,
|
|
"learning_rate": 9.574195610737679e-05,
|
|
"loss": 2.3294,
|
|
"step": 27650
|
|
},
|
|
{
|
|
"epoch": 8.735054884308616,
|
|
"grad_norm": 0.04949076809099762,
|
|
"learning_rate": 9.55066333046386e-05,
|
|
"loss": 2.42,
|
|
"step": 27655
|
|
},
|
|
{
|
|
"epoch": 8.736634288873095,
|
|
"grad_norm": 0.044252555737269855,
|
|
"learning_rate": 9.527158555255445e-05,
|
|
"loss": 2.3788,
|
|
"step": 27660
|
|
},
|
|
{
|
|
"epoch": 8.738213693437574,
|
|
"grad_norm": 0.04390254514457231,
|
|
"learning_rate": 9.503681292260068e-05,
|
|
"loss": 2.4204,
|
|
"step": 27665
|
|
},
|
|
{
|
|
"epoch": 8.739793098002053,
|
|
"grad_norm": 0.039219814813633305,
|
|
"learning_rate": 9.480231548616991e-05,
|
|
"loss": 2.4288,
|
|
"step": 27670
|
|
},
|
|
{
|
|
"epoch": 8.741372502566533,
|
|
"grad_norm": 0.039131792602412445,
|
|
"learning_rate": 9.456809331457172e-05,
|
|
"loss": 2.3258,
|
|
"step": 27675
|
|
},
|
|
{
|
|
"epoch": 8.742951907131012,
|
|
"grad_norm": 0.04887100846216087,
|
|
"learning_rate": 9.433414647903137e-05,
|
|
"loss": 2.4431,
|
|
"step": 27680
|
|
},
|
|
{
|
|
"epoch": 8.744531311695491,
|
|
"grad_norm": 0.04164886010131534,
|
|
"learning_rate": 9.410047505069042e-05,
|
|
"loss": 2.386,
|
|
"step": 27685
|
|
},
|
|
{
|
|
"epoch": 8.74611071625997,
|
|
"grad_norm": 0.048611792433111846,
|
|
"learning_rate": 9.386707910060755e-05,
|
|
"loss": 2.3307,
|
|
"step": 27690
|
|
},
|
|
{
|
|
"epoch": 8.74769012082445,
|
|
"grad_norm": 0.04580083384677626,
|
|
"learning_rate": 9.363395869975599e-05,
|
|
"loss": 2.3363,
|
|
"step": 27695
|
|
},
|
|
{
|
|
"epoch": 8.749269525388929,
|
|
"grad_norm": 0.04694393619135054,
|
|
"learning_rate": 9.340111391902684e-05,
|
|
"loss": 2.3043,
|
|
"step": 27700
|
|
},
|
|
{
|
|
"epoch": 8.750848929953408,
|
|
"grad_norm": 0.047667781871471394,
|
|
"learning_rate": 9.316854482922655e-05,
|
|
"loss": 2.3623,
|
|
"step": 27705
|
|
},
|
|
{
|
|
"epoch": 8.752428334517887,
|
|
"grad_norm": 0.04368853260378372,
|
|
"learning_rate": 9.293625150107765e-05,
|
|
"loss": 2.4077,
|
|
"step": 27710
|
|
},
|
|
{
|
|
"epoch": 8.754007739082367,
|
|
"grad_norm": 0.046375364667146195,
|
|
"learning_rate": 9.270423400521955e-05,
|
|
"loss": 2.358,
|
|
"step": 27715
|
|
},
|
|
{
|
|
"epoch": 8.755587143646846,
|
|
"grad_norm": 0.044457278881758974,
|
|
"learning_rate": 9.247249241220679e-05,
|
|
"loss": 2.4747,
|
|
"step": 27720
|
|
},
|
|
{
|
|
"epoch": 8.757166548211325,
|
|
"grad_norm": 0.046833651263446595,
|
|
"learning_rate": 9.224102679251089e-05,
|
|
"loss": 2.3934,
|
|
"step": 27725
|
|
},
|
|
{
|
|
"epoch": 8.758745952775804,
|
|
"grad_norm": 0.050935973942569567,
|
|
"learning_rate": 9.20098372165189e-05,
|
|
"loss": 2.3344,
|
|
"step": 27730
|
|
},
|
|
{
|
|
"epoch": 8.760325357340284,
|
|
"grad_norm": 0.04390985059357109,
|
|
"learning_rate": 9.177892375453412e-05,
|
|
"loss": 2.3983,
|
|
"step": 27735
|
|
},
|
|
{
|
|
"epoch": 8.761904761904763,
|
|
"grad_norm": 0.04954885808261568,
|
|
"learning_rate": 9.154828647677593e-05,
|
|
"loss": 2.4317,
|
|
"step": 27740
|
|
},
|
|
{
|
|
"epoch": 8.76348416646924,
|
|
"grad_norm": 0.05344633123620102,
|
|
"learning_rate": 9.131792545337925e-05,
|
|
"loss": 2.3044,
|
|
"step": 27745
|
|
},
|
|
{
|
|
"epoch": 8.76506357103372,
|
|
"grad_norm": 0.047837756495592146,
|
|
"learning_rate": 9.108784075439603e-05,
|
|
"loss": 2.4257,
|
|
"step": 27750
|
|
},
|
|
{
|
|
"epoch": 8.766642975598199,
|
|
"grad_norm": 0.04092377494775665,
|
|
"learning_rate": 9.085803244979307e-05,
|
|
"loss": 2.387,
|
|
"step": 27755
|
|
},
|
|
{
|
|
"epoch": 8.768222380162678,
|
|
"grad_norm": 0.047610008045174436,
|
|
"learning_rate": 9.062850060945371e-05,
|
|
"loss": 2.4132,
|
|
"step": 27760
|
|
},
|
|
{
|
|
"epoch": 8.769801784727157,
|
|
"grad_norm": 0.05302693388087465,
|
|
"learning_rate": 9.039924530317733e-05,
|
|
"loss": 2.341,
|
|
"step": 27765
|
|
},
|
|
{
|
|
"epoch": 8.771381189291636,
|
|
"grad_norm": 0.04229024510916769,
|
|
"learning_rate": 9.017026660067863e-05,
|
|
"loss": 2.4872,
|
|
"step": 27770
|
|
},
|
|
{
|
|
"epoch": 8.772960593856116,
|
|
"grad_norm": 0.041842382305429464,
|
|
"learning_rate": 8.994156457158897e-05,
|
|
"loss": 2.317,
|
|
"step": 27775
|
|
},
|
|
{
|
|
"epoch": 8.774539998420595,
|
|
"grad_norm": 0.05620726232188397,
|
|
"learning_rate": 8.971313928545521e-05,
|
|
"loss": 2.4053,
|
|
"step": 27780
|
|
},
|
|
{
|
|
"epoch": 8.776119402985074,
|
|
"grad_norm": 0.04416747257295702,
|
|
"learning_rate": 8.948499081173955e-05,
|
|
"loss": 2.4268,
|
|
"step": 27785
|
|
},
|
|
{
|
|
"epoch": 8.777698807549553,
|
|
"grad_norm": 0.043428373042804086,
|
|
"learning_rate": 8.925711921982083e-05,
|
|
"loss": 2.3584,
|
|
"step": 27790
|
|
},
|
|
{
|
|
"epoch": 8.779278212114033,
|
|
"grad_norm": 0.04610945382687839,
|
|
"learning_rate": 8.902952457899316e-05,
|
|
"loss": 2.314,
|
|
"step": 27795
|
|
},
|
|
{
|
|
"epoch": 8.780857616678512,
|
|
"grad_norm": 0.046303902360458664,
|
|
"learning_rate": 8.880220695846663e-05,
|
|
"loss": 2.381,
|
|
"step": 27800
|
|
},
|
|
{
|
|
"epoch": 8.782437021242991,
|
|
"grad_norm": 0.04338916205988012,
|
|
"learning_rate": 8.857516642736741e-05,
|
|
"loss": 2.3796,
|
|
"step": 27805
|
|
},
|
|
{
|
|
"epoch": 8.78401642580747,
|
|
"grad_norm": 0.04301418542589073,
|
|
"learning_rate": 8.834840305473657e-05,
|
|
"loss": 2.2911,
|
|
"step": 27810
|
|
},
|
|
{
|
|
"epoch": 8.78559583037195,
|
|
"grad_norm": 0.043566278340293474,
|
|
"learning_rate": 8.812191690953187e-05,
|
|
"loss": 2.3303,
|
|
"step": 27815
|
|
},
|
|
{
|
|
"epoch": 8.787175234936429,
|
|
"grad_norm": 0.04314661109931861,
|
|
"learning_rate": 8.789570806062597e-05,
|
|
"loss": 2.3549,
|
|
"step": 27820
|
|
},
|
|
{
|
|
"epoch": 8.788754639500908,
|
|
"grad_norm": 0.04198275517691545,
|
|
"learning_rate": 8.766977657680775e-05,
|
|
"loss": 2.4167,
|
|
"step": 27825
|
|
},
|
|
{
|
|
"epoch": 8.790334044065387,
|
|
"grad_norm": 0.04406552289381544,
|
|
"learning_rate": 8.744412252678147e-05,
|
|
"loss": 2.6015,
|
|
"step": 27830
|
|
},
|
|
{
|
|
"epoch": 8.791913448629867,
|
|
"grad_norm": 0.053622602101489454,
|
|
"learning_rate": 8.721874597916679e-05,
|
|
"loss": 2.4954,
|
|
"step": 27835
|
|
},
|
|
{
|
|
"epoch": 8.793492853194346,
|
|
"grad_norm": 0.052635623372226624,
|
|
"learning_rate": 8.699364700249979e-05,
|
|
"loss": 2.3696,
|
|
"step": 27840
|
|
},
|
|
{
|
|
"epoch": 8.795072257758825,
|
|
"grad_norm": 0.045562967529298753,
|
|
"learning_rate": 8.676882566523137e-05,
|
|
"loss": 2.4215,
|
|
"step": 27845
|
|
},
|
|
{
|
|
"epoch": 8.796651662323304,
|
|
"grad_norm": 0.043320603795194146,
|
|
"learning_rate": 8.654428203572795e-05,
|
|
"loss": 2.4264,
|
|
"step": 27850
|
|
},
|
|
{
|
|
"epoch": 8.798231066887784,
|
|
"grad_norm": 0.04302390073292323,
|
|
"learning_rate": 8.632001618227248e-05,
|
|
"loss": 2.4053,
|
|
"step": 27855
|
|
},
|
|
{
|
|
"epoch": 8.799810471452263,
|
|
"grad_norm": 0.0418577135978061,
|
|
"learning_rate": 8.609602817306217e-05,
|
|
"loss": 2.3199,
|
|
"step": 27860
|
|
},
|
|
{
|
|
"epoch": 8.801389876016742,
|
|
"grad_norm": 0.04727817390643634,
|
|
"learning_rate": 8.587231807621098e-05,
|
|
"loss": 2.3408,
|
|
"step": 27865
|
|
},
|
|
{
|
|
"epoch": 8.802969280581221,
|
|
"grad_norm": 0.0423515591106379,
|
|
"learning_rate": 8.564888595974718e-05,
|
|
"loss": 2.4128,
|
|
"step": 27870
|
|
},
|
|
{
|
|
"epoch": 8.8045486851457,
|
|
"grad_norm": 0.052276271649834685,
|
|
"learning_rate": 8.542573189161496e-05,
|
|
"loss": 2.3171,
|
|
"step": 27875
|
|
},
|
|
{
|
|
"epoch": 8.80612808971018,
|
|
"grad_norm": 0.05429324170374112,
|
|
"learning_rate": 8.520285593967447e-05,
|
|
"loss": 2.4736,
|
|
"step": 27880
|
|
},
|
|
{
|
|
"epoch": 8.80770749427466,
|
|
"grad_norm": 0.053387068558818716,
|
|
"learning_rate": 8.498025817170063e-05,
|
|
"loss": 2.4531,
|
|
"step": 27885
|
|
},
|
|
{
|
|
"epoch": 8.809286898839138,
|
|
"grad_norm": 0.046713980444878955,
|
|
"learning_rate": 8.475793865538417e-05,
|
|
"loss": 2.4184,
|
|
"step": 27890
|
|
},
|
|
{
|
|
"epoch": 8.810866303403618,
|
|
"grad_norm": 0.044394572356236055,
|
|
"learning_rate": 8.45358974583309e-05,
|
|
"loss": 2.4898,
|
|
"step": 27895
|
|
},
|
|
{
|
|
"epoch": 8.812445707968095,
|
|
"grad_norm": 0.047160557226061856,
|
|
"learning_rate": 8.431413464806193e-05,
|
|
"loss": 2.4044,
|
|
"step": 27900
|
|
},
|
|
{
|
|
"epoch": 8.814025112532576,
|
|
"grad_norm": 0.04377217525018816,
|
|
"learning_rate": 8.40926502920144e-05,
|
|
"loss": 2.4242,
|
|
"step": 27905
|
|
},
|
|
{
|
|
"epoch": 8.815604517097054,
|
|
"grad_norm": 0.050799956400435595,
|
|
"learning_rate": 8.387144445753992e-05,
|
|
"loss": 2.373,
|
|
"step": 27910
|
|
},
|
|
{
|
|
"epoch": 8.817183921661533,
|
|
"grad_norm": 0.04693361828675467,
|
|
"learning_rate": 8.365051721190598e-05,
|
|
"loss": 2.4543,
|
|
"step": 27915
|
|
},
|
|
{
|
|
"epoch": 8.818763326226012,
|
|
"grad_norm": 0.04940313629484455,
|
|
"learning_rate": 8.342986862229496e-05,
|
|
"loss": 2.4479,
|
|
"step": 27920
|
|
},
|
|
{
|
|
"epoch": 8.820342730790491,
|
|
"grad_norm": 0.03822568584026386,
|
|
"learning_rate": 8.320949875580464e-05,
|
|
"loss": 2.4455,
|
|
"step": 27925
|
|
},
|
|
{
|
|
"epoch": 8.82192213535497,
|
|
"grad_norm": 0.040691554610554286,
|
|
"learning_rate": 8.29894076794484e-05,
|
|
"loss": 2.393,
|
|
"step": 27930
|
|
},
|
|
{
|
|
"epoch": 8.82350153991945,
|
|
"grad_norm": 0.044593275922070776,
|
|
"learning_rate": 8.276959546015428e-05,
|
|
"loss": 2.4724,
|
|
"step": 27935
|
|
},
|
|
{
|
|
"epoch": 8.825080944483929,
|
|
"grad_norm": 0.04400811097919304,
|
|
"learning_rate": 8.255006216476569e-05,
|
|
"loss": 2.4454,
|
|
"step": 27940
|
|
},
|
|
{
|
|
"epoch": 8.826660349048408,
|
|
"grad_norm": 0.05607438124327106,
|
|
"learning_rate": 8.233080786004166e-05,
|
|
"loss": 2.4234,
|
|
"step": 27945
|
|
},
|
|
{
|
|
"epoch": 8.828239753612888,
|
|
"grad_norm": 0.04947045921122129,
|
|
"learning_rate": 8.211183261265554e-05,
|
|
"loss": 2.3173,
|
|
"step": 27950
|
|
},
|
|
{
|
|
"epoch": 8.829819158177367,
|
|
"grad_norm": 0.04898501933533458,
|
|
"learning_rate": 8.189313648919694e-05,
|
|
"loss": 2.3083,
|
|
"step": 27955
|
|
},
|
|
{
|
|
"epoch": 8.831398562741846,
|
|
"grad_norm": 0.0503493787919402,
|
|
"learning_rate": 8.167471955616945e-05,
|
|
"loss": 2.3711,
|
|
"step": 27960
|
|
},
|
|
{
|
|
"epoch": 8.832977967306325,
|
|
"grad_norm": 0.04810358661506592,
|
|
"learning_rate": 8.145658187999227e-05,
|
|
"loss": 2.3185,
|
|
"step": 27965
|
|
},
|
|
{
|
|
"epoch": 8.834557371870805,
|
|
"grad_norm": 0.04761398760448387,
|
|
"learning_rate": 8.12387235269999e-05,
|
|
"loss": 2.3879,
|
|
"step": 27970
|
|
},
|
|
{
|
|
"epoch": 8.836136776435284,
|
|
"grad_norm": 0.04509133436808405,
|
|
"learning_rate": 8.102114456344145e-05,
|
|
"loss": 2.4121,
|
|
"step": 27975
|
|
},
|
|
{
|
|
"epoch": 8.837716180999763,
|
|
"grad_norm": 0.046277123819228494,
|
|
"learning_rate": 8.080384505548156e-05,
|
|
"loss": 2.4586,
|
|
"step": 27980
|
|
},
|
|
{
|
|
"epoch": 8.839295585564242,
|
|
"grad_norm": 0.04643289229968356,
|
|
"learning_rate": 8.058682506919945e-05,
|
|
"loss": 2.3555,
|
|
"step": 27985
|
|
},
|
|
{
|
|
"epoch": 8.840874990128722,
|
|
"grad_norm": 0.043840976748217575,
|
|
"learning_rate": 8.037008467058949e-05,
|
|
"loss": 2.389,
|
|
"step": 27990
|
|
},
|
|
{
|
|
"epoch": 8.8424543946932,
|
|
"grad_norm": 0.04452475438728594,
|
|
"learning_rate": 8.015362392556114e-05,
|
|
"loss": 2.4102,
|
|
"step": 27995
|
|
},
|
|
{
|
|
"epoch": 8.84403379925768,
|
|
"grad_norm": 0.04296832445148172,
|
|
"learning_rate": 7.993744289993876e-05,
|
|
"loss": 2.399,
|
|
"step": 28000
|
|
},
|
|
{
|
|
"epoch": 8.84561320382216,
|
|
"grad_norm": 0.042633473559069476,
|
|
"learning_rate": 7.972154165946155e-05,
|
|
"loss": 2.3855,
|
|
"step": 28005
|
|
},
|
|
{
|
|
"epoch": 8.847192608386639,
|
|
"grad_norm": 0.04437673113217061,
|
|
"learning_rate": 7.950592026978376e-05,
|
|
"loss": 2.2616,
|
|
"step": 28010
|
|
},
|
|
{
|
|
"epoch": 8.848772012951118,
|
|
"grad_norm": 0.04398465278826395,
|
|
"learning_rate": 7.929057879647416e-05,
|
|
"loss": 2.3888,
|
|
"step": 28015
|
|
},
|
|
{
|
|
"epoch": 8.850351417515597,
|
|
"grad_norm": 0.042152276865596444,
|
|
"learning_rate": 7.907551730501717e-05,
|
|
"loss": 2.4197,
|
|
"step": 28020
|
|
},
|
|
{
|
|
"epoch": 8.851930822080076,
|
|
"grad_norm": 0.046600110064270915,
|
|
"learning_rate": 7.886073586081133e-05,
|
|
"loss": 2.3751,
|
|
"step": 28025
|
|
},
|
|
{
|
|
"epoch": 8.853510226644556,
|
|
"grad_norm": 0.04043326347735244,
|
|
"learning_rate": 7.86462345291703e-05,
|
|
"loss": 2.3082,
|
|
"step": 28030
|
|
},
|
|
{
|
|
"epoch": 8.855089631209035,
|
|
"grad_norm": 0.041788048853306614,
|
|
"learning_rate": 7.843201337532291e-05,
|
|
"loss": 2.3259,
|
|
"step": 28035
|
|
},
|
|
{
|
|
"epoch": 8.856669035773514,
|
|
"grad_norm": 0.047200211239369405,
|
|
"learning_rate": 7.821807246441193e-05,
|
|
"loss": 2.3561,
|
|
"step": 28040
|
|
},
|
|
{
|
|
"epoch": 8.858248440337993,
|
|
"grad_norm": 0.05771240570091253,
|
|
"learning_rate": 7.800441186149598e-05,
|
|
"loss": 2.467,
|
|
"step": 28045
|
|
},
|
|
{
|
|
"epoch": 8.859827844902473,
|
|
"grad_norm": 0.0444103647842252,
|
|
"learning_rate": 7.779103163154755e-05,
|
|
"loss": 2.3739,
|
|
"step": 28050
|
|
},
|
|
{
|
|
"epoch": 8.86140724946695,
|
|
"grad_norm": 0.048050966507214264,
|
|
"learning_rate": 7.757793183945394e-05,
|
|
"loss": 2.3261,
|
|
"step": 28055
|
|
},
|
|
{
|
|
"epoch": 8.862986654031431,
|
|
"grad_norm": 0.053344791542482436,
|
|
"learning_rate": 7.736511255001799e-05,
|
|
"loss": 2.3812,
|
|
"step": 28060
|
|
},
|
|
{
|
|
"epoch": 8.864566058595909,
|
|
"grad_norm": 0.04263266640138856,
|
|
"learning_rate": 7.715257382795626e-05,
|
|
"loss": 2.4577,
|
|
"step": 28065
|
|
},
|
|
{
|
|
"epoch": 8.866145463160388,
|
|
"grad_norm": 0.04611157933668599,
|
|
"learning_rate": 7.694031573790073e-05,
|
|
"loss": 2.3725,
|
|
"step": 28070
|
|
},
|
|
{
|
|
"epoch": 8.867724867724867,
|
|
"grad_norm": 0.04356758179219666,
|
|
"learning_rate": 7.672833834439763e-05,
|
|
"loss": 2.3687,
|
|
"step": 28075
|
|
},
|
|
{
|
|
"epoch": 8.869304272289346,
|
|
"grad_norm": 0.04474931155565027,
|
|
"learning_rate": 7.651664171190764e-05,
|
|
"loss": 2.3559,
|
|
"step": 28080
|
|
},
|
|
{
|
|
"epoch": 8.870883676853826,
|
|
"grad_norm": 0.0457923641571006,
|
|
"learning_rate": 7.630522590480693e-05,
|
|
"loss": 2.4139,
|
|
"step": 28085
|
|
},
|
|
{
|
|
"epoch": 8.872463081418305,
|
|
"grad_norm": 0.04370572522376028,
|
|
"learning_rate": 7.609409098738518e-05,
|
|
"loss": 2.3599,
|
|
"step": 28090
|
|
},
|
|
{
|
|
"epoch": 8.874042485982784,
|
|
"grad_norm": 0.042834939875349386,
|
|
"learning_rate": 7.588323702384747e-05,
|
|
"loss": 2.3194,
|
|
"step": 28095
|
|
},
|
|
{
|
|
"epoch": 8.875621890547263,
|
|
"grad_norm": 0.045218227911846244,
|
|
"learning_rate": 7.567266407831308e-05,
|
|
"loss": 2.3007,
|
|
"step": 28100
|
|
},
|
|
{
|
|
"epoch": 8.877201295111742,
|
|
"grad_norm": 0.04422452601325681,
|
|
"learning_rate": 7.546237221481567e-05,
|
|
"loss": 2.324,
|
|
"step": 28105
|
|
},
|
|
{
|
|
"epoch": 8.878780699676222,
|
|
"grad_norm": 0.041829744755636056,
|
|
"learning_rate": 7.525236149730396e-05,
|
|
"loss": 2.459,
|
|
"step": 28110
|
|
},
|
|
{
|
|
"epoch": 8.880360104240701,
|
|
"grad_norm": 0.04451605381016142,
|
|
"learning_rate": 7.504263198964057e-05,
|
|
"loss": 2.4524,
|
|
"step": 28115
|
|
},
|
|
{
|
|
"epoch": 8.88193950880518,
|
|
"grad_norm": 0.04376098243257381,
|
|
"learning_rate": 7.483318375560322e-05,
|
|
"loss": 2.3056,
|
|
"step": 28120
|
|
},
|
|
{
|
|
"epoch": 8.88351891336966,
|
|
"grad_norm": 0.04964806600970924,
|
|
"learning_rate": 7.462401685888364e-05,
|
|
"loss": 2.4674,
|
|
"step": 28125
|
|
},
|
|
{
|
|
"epoch": 8.885098317934139,
|
|
"grad_norm": 0.04661754849539569,
|
|
"learning_rate": 7.441513136308809e-05,
|
|
"loss": 2.4871,
|
|
"step": 28130
|
|
},
|
|
{
|
|
"epoch": 8.886677722498618,
|
|
"grad_norm": 0.05142264274006399,
|
|
"learning_rate": 7.42065273317376e-05,
|
|
"loss": 2.3739,
|
|
"step": 28135
|
|
},
|
|
{
|
|
"epoch": 8.888257127063097,
|
|
"grad_norm": 0.04710796769977028,
|
|
"learning_rate": 7.399820482826692e-05,
|
|
"loss": 2.3984,
|
|
"step": 28140
|
|
},
|
|
{
|
|
"epoch": 8.889836531627576,
|
|
"grad_norm": 0.039886449765171465,
|
|
"learning_rate": 7.379016391602555e-05,
|
|
"loss": 2.3704,
|
|
"step": 28145
|
|
},
|
|
{
|
|
"epoch": 8.891415936192056,
|
|
"grad_norm": 0.04349712073760303,
|
|
"learning_rate": 7.358240465827793e-05,
|
|
"loss": 2.4033,
|
|
"step": 28150
|
|
},
|
|
{
|
|
"epoch": 8.892995340756535,
|
|
"grad_norm": 0.044988138574963435,
|
|
"learning_rate": 7.33749271182017e-05,
|
|
"loss": 2.3507,
|
|
"step": 28155
|
|
},
|
|
{
|
|
"epoch": 8.894574745321014,
|
|
"grad_norm": 0.05141158532945691,
|
|
"learning_rate": 7.316773135888999e-05,
|
|
"loss": 2.448,
|
|
"step": 28160
|
|
},
|
|
{
|
|
"epoch": 8.896154149885493,
|
|
"grad_norm": 0.04263475877598905,
|
|
"learning_rate": 7.296081744334948e-05,
|
|
"loss": 2.354,
|
|
"step": 28165
|
|
},
|
|
{
|
|
"epoch": 8.897733554449973,
|
|
"grad_norm": 0.04428641248965364,
|
|
"learning_rate": 7.275418543450118e-05,
|
|
"loss": 2.4402,
|
|
"step": 28170
|
|
},
|
|
{
|
|
"epoch": 8.899312959014452,
|
|
"grad_norm": 0.0430606727234692,
|
|
"learning_rate": 7.254783539518095e-05,
|
|
"loss": 2.3231,
|
|
"step": 28175
|
|
},
|
|
{
|
|
"epoch": 8.900892363578931,
|
|
"grad_norm": 0.04384537450340839,
|
|
"learning_rate": 7.234176738813824e-05,
|
|
"loss": 2.4522,
|
|
"step": 28180
|
|
},
|
|
{
|
|
"epoch": 8.90247176814341,
|
|
"grad_norm": 0.04473594645809847,
|
|
"learning_rate": 7.213598147603717e-05,
|
|
"loss": 2.3859,
|
|
"step": 28185
|
|
},
|
|
{
|
|
"epoch": 8.90405117270789,
|
|
"grad_norm": 0.047074046223589794,
|
|
"learning_rate": 7.193047772145588e-05,
|
|
"loss": 2.3877,
|
|
"step": 28190
|
|
},
|
|
{
|
|
"epoch": 8.905630577272369,
|
|
"grad_norm": 0.04426029183417958,
|
|
"learning_rate": 7.172525618688641e-05,
|
|
"loss": 2.4467,
|
|
"step": 28195
|
|
},
|
|
{
|
|
"epoch": 8.907209981836848,
|
|
"grad_norm": 0.049948508073831714,
|
|
"learning_rate": 7.152031693473594e-05,
|
|
"loss": 2.3481,
|
|
"step": 28200
|
|
},
|
|
{
|
|
"epoch": 8.908789386401327,
|
|
"grad_norm": 0.03971916438871055,
|
|
"learning_rate": 7.131566002732459e-05,
|
|
"loss": 2.4169,
|
|
"step": 28205
|
|
},
|
|
{
|
|
"epoch": 8.910368790965807,
|
|
"grad_norm": 0.04655908358862637,
|
|
"learning_rate": 7.111128552688773e-05,
|
|
"loss": 2.3672,
|
|
"step": 28210
|
|
},
|
|
{
|
|
"epoch": 8.911948195530286,
|
|
"grad_norm": 0.041045892957907094,
|
|
"learning_rate": 7.090719349557406e-05,
|
|
"loss": 2.5533,
|
|
"step": 28215
|
|
},
|
|
{
|
|
"epoch": 8.913527600094763,
|
|
"grad_norm": 0.04753375969385832,
|
|
"learning_rate": 7.070338399544662e-05,
|
|
"loss": 2.3752,
|
|
"step": 28220
|
|
},
|
|
{
|
|
"epoch": 8.915107004659243,
|
|
"grad_norm": 0.04521840246370948,
|
|
"learning_rate": 7.049985708848294e-05,
|
|
"loss": 2.3799,
|
|
"step": 28225
|
|
},
|
|
{
|
|
"epoch": 8.916686409223722,
|
|
"grad_norm": 0.04107290690489171,
|
|
"learning_rate": 7.029661283657385e-05,
|
|
"loss": 2.3567,
|
|
"step": 28230
|
|
},
|
|
{
|
|
"epoch": 8.918265813788201,
|
|
"grad_norm": 0.039786480821288654,
|
|
"learning_rate": 7.009365130152456e-05,
|
|
"loss": 2.3903,
|
|
"step": 28235
|
|
},
|
|
{
|
|
"epoch": 8.91984521835268,
|
|
"grad_norm": 0.04278558574535202,
|
|
"learning_rate": 6.989097254505473e-05,
|
|
"loss": 2.3218,
|
|
"step": 28240
|
|
},
|
|
{
|
|
"epoch": 8.92142462291716,
|
|
"grad_norm": 0.04404789760974957,
|
|
"learning_rate": 6.968857662879735e-05,
|
|
"loss": 2.34,
|
|
"step": 28245
|
|
},
|
|
{
|
|
"epoch": 8.923004027481639,
|
|
"grad_norm": 0.04698598034068341,
|
|
"learning_rate": 6.948646361430011e-05,
|
|
"loss": 2.4482,
|
|
"step": 28250
|
|
},
|
|
{
|
|
"epoch": 8.924583432046118,
|
|
"grad_norm": 0.04197192304493057,
|
|
"learning_rate": 6.928463356302395e-05,
|
|
"loss": 2.4799,
|
|
"step": 28255
|
|
},
|
|
{
|
|
"epoch": 8.926162836610597,
|
|
"grad_norm": 0.04842250969939549,
|
|
"learning_rate": 6.908308653634421e-05,
|
|
"loss": 2.3636,
|
|
"step": 28260
|
|
},
|
|
{
|
|
"epoch": 8.927742241175077,
|
|
"grad_norm": 0.04541276885376044,
|
|
"learning_rate": 6.888182259555009e-05,
|
|
"loss": 2.3819,
|
|
"step": 28265
|
|
},
|
|
{
|
|
"epoch": 8.929321645739556,
|
|
"grad_norm": 0.0468123822660515,
|
|
"learning_rate": 6.868084180184476e-05,
|
|
"loss": 2.3585,
|
|
"step": 28270
|
|
},
|
|
{
|
|
"epoch": 8.930901050304035,
|
|
"grad_norm": 0.0465213220810895,
|
|
"learning_rate": 6.848014421634497e-05,
|
|
"loss": 2.4015,
|
|
"step": 28275
|
|
},
|
|
{
|
|
"epoch": 8.932480454868514,
|
|
"grad_norm": 0.043897267262942054,
|
|
"learning_rate": 6.827972990008169e-05,
|
|
"loss": 2.4203,
|
|
"step": 28280
|
|
},
|
|
{
|
|
"epoch": 8.934059859432994,
|
|
"grad_norm": 0.040792885485446226,
|
|
"learning_rate": 6.807959891399951e-05,
|
|
"loss": 2.266,
|
|
"step": 28285
|
|
},
|
|
{
|
|
"epoch": 8.935639263997473,
|
|
"grad_norm": 0.042875993149876494,
|
|
"learning_rate": 6.787975131895718e-05,
|
|
"loss": 2.3575,
|
|
"step": 28290
|
|
},
|
|
{
|
|
"epoch": 8.937218668561952,
|
|
"grad_norm": 0.045745682590700325,
|
|
"learning_rate": 6.768018717572699e-05,
|
|
"loss": 2.4595,
|
|
"step": 28295
|
|
},
|
|
{
|
|
"epoch": 8.938798073126431,
|
|
"grad_norm": 0.04342424753009981,
|
|
"learning_rate": 6.748090654499517e-05,
|
|
"loss": 2.5196,
|
|
"step": 28300
|
|
},
|
|
{
|
|
"epoch": 8.94037747769091,
|
|
"grad_norm": 0.04334245849721706,
|
|
"learning_rate": 6.728190948736157e-05,
|
|
"loss": 2.408,
|
|
"step": 28305
|
|
},
|
|
{
|
|
"epoch": 8.94195688225539,
|
|
"grad_norm": 0.05159437317808072,
|
|
"learning_rate": 6.708319606334001e-05,
|
|
"loss": 2.4762,
|
|
"step": 28310
|
|
},
|
|
{
|
|
"epoch": 8.94353628681987,
|
|
"grad_norm": 0.043809806637989064,
|
|
"learning_rate": 6.688476633335816e-05,
|
|
"loss": 2.4506,
|
|
"step": 28315
|
|
},
|
|
{
|
|
"epoch": 8.945115691384348,
|
|
"grad_norm": 0.04417766741026222,
|
|
"learning_rate": 6.668662035775675e-05,
|
|
"loss": 2.4299,
|
|
"step": 28320
|
|
},
|
|
{
|
|
"epoch": 8.946695095948828,
|
|
"grad_norm": 0.04224320060040361,
|
|
"learning_rate": 6.648875819679112e-05,
|
|
"loss": 2.4565,
|
|
"step": 28325
|
|
},
|
|
{
|
|
"epoch": 8.948274500513307,
|
|
"grad_norm": 0.060494669613537645,
|
|
"learning_rate": 6.629117991062972e-05,
|
|
"loss": 2.4756,
|
|
"step": 28330
|
|
},
|
|
{
|
|
"epoch": 8.949853905077786,
|
|
"grad_norm": 0.047902803502330005,
|
|
"learning_rate": 6.60938855593548e-05,
|
|
"loss": 2.4002,
|
|
"step": 28335
|
|
},
|
|
{
|
|
"epoch": 8.951433309642265,
|
|
"grad_norm": 0.047155739020529386,
|
|
"learning_rate": 6.58968752029625e-05,
|
|
"loss": 2.3659,
|
|
"step": 28340
|
|
},
|
|
{
|
|
"epoch": 8.953012714206745,
|
|
"grad_norm": 0.04418604212975609,
|
|
"learning_rate": 6.570014890136223e-05,
|
|
"loss": 2.4233,
|
|
"step": 28345
|
|
},
|
|
{
|
|
"epoch": 8.954592118771224,
|
|
"grad_norm": 0.04267468340802014,
|
|
"learning_rate": 6.550370671437722e-05,
|
|
"loss": 2.3731,
|
|
"step": 28350
|
|
},
|
|
{
|
|
"epoch": 8.956171523335703,
|
|
"grad_norm": 0.04434042419174162,
|
|
"learning_rate": 6.530754870174448e-05,
|
|
"loss": 2.4518,
|
|
"step": 28355
|
|
},
|
|
{
|
|
"epoch": 8.957750927900182,
|
|
"grad_norm": 0.03954702637722497,
|
|
"learning_rate": 6.51116749231142e-05,
|
|
"loss": 2.3943,
|
|
"step": 28360
|
|
},
|
|
{
|
|
"epoch": 8.959330332464662,
|
|
"grad_norm": 0.04657808267518389,
|
|
"learning_rate": 6.49160854380505e-05,
|
|
"loss": 2.3881,
|
|
"step": 28365
|
|
},
|
|
{
|
|
"epoch": 8.96090973702914,
|
|
"grad_norm": 0.04487094095565116,
|
|
"learning_rate": 6.472078030603079e-05,
|
|
"loss": 2.3503,
|
|
"step": 28370
|
|
},
|
|
{
|
|
"epoch": 8.962489141593618,
|
|
"grad_norm": 0.043540370969267606,
|
|
"learning_rate": 6.45257595864459e-05,
|
|
"loss": 2.3871,
|
|
"step": 28375
|
|
},
|
|
{
|
|
"epoch": 8.9640685461581,
|
|
"grad_norm": 0.04566018129093167,
|
|
"learning_rate": 6.433102333860075e-05,
|
|
"loss": 2.4074,
|
|
"step": 28380
|
|
},
|
|
{
|
|
"epoch": 8.965647950722577,
|
|
"grad_norm": 0.04158628696039653,
|
|
"learning_rate": 6.413657162171316e-05,
|
|
"loss": 2.3664,
|
|
"step": 28385
|
|
},
|
|
{
|
|
"epoch": 8.967227355287056,
|
|
"grad_norm": 0.055477619760494895,
|
|
"learning_rate": 6.394240449491496e-05,
|
|
"loss": 2.3784,
|
|
"step": 28390
|
|
},
|
|
{
|
|
"epoch": 8.968806759851535,
|
|
"grad_norm": 0.041267318230141375,
|
|
"learning_rate": 6.374852201725078e-05,
|
|
"loss": 2.451,
|
|
"step": 28395
|
|
},
|
|
{
|
|
"epoch": 8.970386164416015,
|
|
"grad_norm": 0.04433084927731949,
|
|
"learning_rate": 6.355492424767906e-05,
|
|
"loss": 2.3834,
|
|
"step": 28400
|
|
},
|
|
{
|
|
"epoch": 8.971965568980494,
|
|
"grad_norm": 0.042990007247864004,
|
|
"learning_rate": 6.336161124507211e-05,
|
|
"loss": 2.3596,
|
|
"step": 28405
|
|
},
|
|
{
|
|
"epoch": 8.973544973544973,
|
|
"grad_norm": 0.04170281731718802,
|
|
"learning_rate": 6.31685830682145e-05,
|
|
"loss": 2.3039,
|
|
"step": 28410
|
|
},
|
|
{
|
|
"epoch": 8.975124378109452,
|
|
"grad_norm": 0.04574421635384774,
|
|
"learning_rate": 6.297583977580534e-05,
|
|
"loss": 2.3449,
|
|
"step": 28415
|
|
},
|
|
{
|
|
"epoch": 8.976703782673932,
|
|
"grad_norm": 0.04389535434415798,
|
|
"learning_rate": 6.278338142645657e-05,
|
|
"loss": 2.4148,
|
|
"step": 28420
|
|
},
|
|
{
|
|
"epoch": 8.97828318723841,
|
|
"grad_norm": 0.04570332578078057,
|
|
"learning_rate": 6.259120807869323e-05,
|
|
"loss": 2.408,
|
|
"step": 28425
|
|
},
|
|
{
|
|
"epoch": 8.97986259180289,
|
|
"grad_norm": 0.03850813327755201,
|
|
"learning_rate": 6.239931979095436e-05,
|
|
"loss": 2.3837,
|
|
"step": 28430
|
|
},
|
|
{
|
|
"epoch": 8.98144199636737,
|
|
"grad_norm": 0.053194674215291324,
|
|
"learning_rate": 6.220771662159175e-05,
|
|
"loss": 2.4011,
|
|
"step": 28435
|
|
},
|
|
{
|
|
"epoch": 8.983021400931849,
|
|
"grad_norm": 0.04013136467677291,
|
|
"learning_rate": 6.201639862887098e-05,
|
|
"loss": 2.3831,
|
|
"step": 28440
|
|
},
|
|
{
|
|
"epoch": 8.984600805496328,
|
|
"grad_norm": 0.05028889901043422,
|
|
"learning_rate": 6.182536587097043e-05,
|
|
"loss": 2.3901,
|
|
"step": 28445
|
|
},
|
|
{
|
|
"epoch": 8.986180210060807,
|
|
"grad_norm": 0.04601368636372297,
|
|
"learning_rate": 6.163461840598183e-05,
|
|
"loss": 2.4495,
|
|
"step": 28450
|
|
},
|
|
{
|
|
"epoch": 8.987759614625286,
|
|
"grad_norm": 0.04871419340899612,
|
|
"learning_rate": 6.144415629191058e-05,
|
|
"loss": 2.4751,
|
|
"step": 28455
|
|
},
|
|
{
|
|
"epoch": 8.989339019189766,
|
|
"grad_norm": 0.04013116759332213,
|
|
"learning_rate": 6.125397958667467e-05,
|
|
"loss": 2.3162,
|
|
"step": 28460
|
|
},
|
|
{
|
|
"epoch": 8.990918423754245,
|
|
"grad_norm": 0.043904232486768474,
|
|
"learning_rate": 6.106408834810562e-05,
|
|
"loss": 2.3792,
|
|
"step": 28465
|
|
},
|
|
{
|
|
"epoch": 8.992497828318724,
|
|
"grad_norm": 0.04647239091746081,
|
|
"learning_rate": 6.087448263394846e-05,
|
|
"loss": 2.3962,
|
|
"step": 28470
|
|
},
|
|
{
|
|
"epoch": 8.994077232883203,
|
|
"grad_norm": 0.04164440679482371,
|
|
"learning_rate": 6.0685162501860735e-05,
|
|
"loss": 2.3796,
|
|
"step": 28475
|
|
},
|
|
{
|
|
"epoch": 8.995656637447683,
|
|
"grad_norm": 0.04287185734386562,
|
|
"learning_rate": 6.0496128009413845e-05,
|
|
"loss": 2.3395,
|
|
"step": 28480
|
|
},
|
|
{
|
|
"epoch": 8.997236042012162,
|
|
"grad_norm": 0.04134370498230083,
|
|
"learning_rate": 6.0307379214091684e-05,
|
|
"loss": 2.3704,
|
|
"step": 28485
|
|
},
|
|
{
|
|
"epoch": 8.998815446576641,
|
|
"grad_norm": 0.041497695897780615,
|
|
"learning_rate": 6.011891617329146e-05,
|
|
"loss": 2.3183,
|
|
"step": 28490
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"eval_loss": 2.398218870162964,
|
|
"eval_runtime": 118.6226,
|
|
"eval_samples_per_second": 22.331,
|
|
"eval_steps_per_second": 5.589,
|
|
"step": 28494
|
|
},
|
|
{
|
|
"epoch": 9.000315880912895,
|
|
"grad_norm": 0.05539895210854539,
|
|
"learning_rate": 5.993073894432421e-05,
|
|
"loss": 2.3754,
|
|
"step": 28495
|
|
},
|
|
{
|
|
"epoch": 9.001895285477374,
|
|
"grad_norm": 0.04123702262548509,
|
|
"learning_rate": 5.9742847584412505e-05,
|
|
"loss": 2.3419,
|
|
"step": 28500
|
|
},
|
|
{
|
|
"epoch": 9.003474690041854,
|
|
"grad_norm": 0.04271805382790392,
|
|
"learning_rate": 5.9555242150693636e-05,
|
|
"loss": 2.4621,
|
|
"step": 28505
|
|
},
|
|
{
|
|
"epoch": 9.005054094606333,
|
|
"grad_norm": 0.040627830008919785,
|
|
"learning_rate": 5.936792270021696e-05,
|
|
"loss": 2.4283,
|
|
"step": 28510
|
|
},
|
|
{
|
|
"epoch": 9.006633499170812,
|
|
"grad_norm": 0.04332750417326001,
|
|
"learning_rate": 5.918088928994492e-05,
|
|
"loss": 2.3038,
|
|
"step": 28515
|
|
},
|
|
{
|
|
"epoch": 9.008212903735291,
|
|
"grad_norm": 0.04885440830855184,
|
|
"learning_rate": 5.899414197675357e-05,
|
|
"loss": 2.4495,
|
|
"step": 28520
|
|
},
|
|
{
|
|
"epoch": 9.00979230829977,
|
|
"grad_norm": 0.044867524974600684,
|
|
"learning_rate": 5.880768081743126e-05,
|
|
"loss": 2.4239,
|
|
"step": 28525
|
|
},
|
|
{
|
|
"epoch": 9.01137171286425,
|
|
"grad_norm": 0.04523618610822175,
|
|
"learning_rate": 5.862150586867998e-05,
|
|
"loss": 2.3923,
|
|
"step": 28530
|
|
},
|
|
{
|
|
"epoch": 9.01295111742873,
|
|
"grad_norm": 0.04445363859267257,
|
|
"learning_rate": 5.843561718711399e-05,
|
|
"loss": 2.4436,
|
|
"step": 28535
|
|
},
|
|
{
|
|
"epoch": 9.014530521993208,
|
|
"grad_norm": 0.03926001759058027,
|
|
"learning_rate": 5.825001482926107e-05,
|
|
"loss": 2.3301,
|
|
"step": 28540
|
|
},
|
|
{
|
|
"epoch": 9.016109926557688,
|
|
"grad_norm": 0.04244760575594229,
|
|
"learning_rate": 5.806469885156163e-05,
|
|
"loss": 2.367,
|
|
"step": 28545
|
|
},
|
|
{
|
|
"epoch": 9.017689331122167,
|
|
"grad_norm": 0.043163584829729325,
|
|
"learning_rate": 5.787966931036892e-05,
|
|
"loss": 2.4273,
|
|
"step": 28550
|
|
},
|
|
{
|
|
"epoch": 9.019268735686646,
|
|
"grad_norm": 0.04168211336664651,
|
|
"learning_rate": 5.76949262619495e-05,
|
|
"loss": 2.3911,
|
|
"step": 28555
|
|
},
|
|
{
|
|
"epoch": 9.020848140251125,
|
|
"grad_norm": 0.04123821904881108,
|
|
"learning_rate": 5.751046976248253e-05,
|
|
"loss": 2.4215,
|
|
"step": 28560
|
|
},
|
|
{
|
|
"epoch": 9.022427544815605,
|
|
"grad_norm": 0.0478115894593452,
|
|
"learning_rate": 5.732629986805982e-05,
|
|
"loss": 2.3722,
|
|
"step": 28565
|
|
},
|
|
{
|
|
"epoch": 9.024006949380084,
|
|
"grad_norm": 0.049719193458654574,
|
|
"learning_rate": 5.7142416634686443e-05,
|
|
"loss": 2.3503,
|
|
"step": 28570
|
|
},
|
|
{
|
|
"epoch": 9.025586353944563,
|
|
"grad_norm": 0.03781963748589037,
|
|
"learning_rate": 5.695882011828024e-05,
|
|
"loss": 2.4532,
|
|
"step": 28575
|
|
},
|
|
{
|
|
"epoch": 9.027165758509042,
|
|
"grad_norm": 0.03952041594418926,
|
|
"learning_rate": 5.677551037467132e-05,
|
|
"loss": 2.3267,
|
|
"step": 28580
|
|
},
|
|
{
|
|
"epoch": 9.028745163073522,
|
|
"grad_norm": 0.042776444291402475,
|
|
"learning_rate": 5.659248745960366e-05,
|
|
"loss": 2.4848,
|
|
"step": 28585
|
|
},
|
|
{
|
|
"epoch": 9.030324567638,
|
|
"grad_norm": 0.04458128838563757,
|
|
"learning_rate": 5.6409751428732613e-05,
|
|
"loss": 2.362,
|
|
"step": 28590
|
|
},
|
|
{
|
|
"epoch": 9.03190397220248,
|
|
"grad_norm": 0.040324798745327996,
|
|
"learning_rate": 5.622730233762752e-05,
|
|
"loss": 2.375,
|
|
"step": 28595
|
|
},
|
|
{
|
|
"epoch": 9.03348337676696,
|
|
"grad_norm": 0.03821938744796752,
|
|
"learning_rate": 5.6045140241769874e-05,
|
|
"loss": 2.446,
|
|
"step": 28600
|
|
},
|
|
{
|
|
"epoch": 9.035062781331439,
|
|
"grad_norm": 0.04090154449247851,
|
|
"learning_rate": 5.586326519655383e-05,
|
|
"loss": 2.401,
|
|
"step": 28605
|
|
},
|
|
{
|
|
"epoch": 9.036642185895918,
|
|
"grad_norm": 0.04416521736060769,
|
|
"learning_rate": 5.568167725728679e-05,
|
|
"loss": 2.3723,
|
|
"step": 28610
|
|
},
|
|
{
|
|
"epoch": 9.038221590460397,
|
|
"grad_norm": 0.03951361589992286,
|
|
"learning_rate": 5.550037647918804e-05,
|
|
"loss": 2.2889,
|
|
"step": 28615
|
|
},
|
|
{
|
|
"epoch": 9.039800995024876,
|
|
"grad_norm": 0.04508963694263608,
|
|
"learning_rate": 5.531936291739037e-05,
|
|
"loss": 2.3452,
|
|
"step": 28620
|
|
},
|
|
{
|
|
"epoch": 9.041380399589356,
|
|
"grad_norm": 0.04172239277099709,
|
|
"learning_rate": 5.513863662693874e-05,
|
|
"loss": 2.4078,
|
|
"step": 28625
|
|
},
|
|
{
|
|
"epoch": 9.042959804153835,
|
|
"grad_norm": 0.04164906108837666,
|
|
"learning_rate": 5.4958197662790864e-05,
|
|
"loss": 2.3829,
|
|
"step": 28630
|
|
},
|
|
{
|
|
"epoch": 9.044539208718314,
|
|
"grad_norm": 0.04207127374846404,
|
|
"learning_rate": 5.477804607981707e-05,
|
|
"loss": 2.3591,
|
|
"step": 28635
|
|
},
|
|
{
|
|
"epoch": 9.046118613282792,
|
|
"grad_norm": 0.03840138408108005,
|
|
"learning_rate": 5.4598181932799976e-05,
|
|
"loss": 2.4897,
|
|
"step": 28640
|
|
},
|
|
{
|
|
"epoch": 9.04769801784727,
|
|
"grad_norm": 0.0426254394099393,
|
|
"learning_rate": 5.4418605276435716e-05,
|
|
"loss": 2.3705,
|
|
"step": 28645
|
|
},
|
|
{
|
|
"epoch": 9.04927742241175,
|
|
"grad_norm": 0.04431027410454712,
|
|
"learning_rate": 5.423931616533207e-05,
|
|
"loss": 2.3392,
|
|
"step": 28650
|
|
},
|
|
{
|
|
"epoch": 9.05085682697623,
|
|
"grad_norm": 0.0400975127964107,
|
|
"learning_rate": 5.4060314654009514e-05,
|
|
"loss": 2.3702,
|
|
"step": 28655
|
|
},
|
|
{
|
|
"epoch": 9.052436231540709,
|
|
"grad_norm": 0.049244319688171286,
|
|
"learning_rate": 5.388160079690174e-05,
|
|
"loss": 2.4115,
|
|
"step": 28660
|
|
},
|
|
{
|
|
"epoch": 9.054015636105188,
|
|
"grad_norm": 0.042487722963328124,
|
|
"learning_rate": 5.370317464835406e-05,
|
|
"loss": 2.3407,
|
|
"step": 28665
|
|
},
|
|
{
|
|
"epoch": 9.055595040669667,
|
|
"grad_norm": 0.0418498746620096,
|
|
"learning_rate": 5.352503626262506e-05,
|
|
"loss": 2.4438,
|
|
"step": 28670
|
|
},
|
|
{
|
|
"epoch": 9.057174445234146,
|
|
"grad_norm": 0.03910125985395159,
|
|
"learning_rate": 5.3347185693885415e-05,
|
|
"loss": 2.3736,
|
|
"step": 28675
|
|
},
|
|
{
|
|
"epoch": 9.058753849798626,
|
|
"grad_norm": 0.04285573909389075,
|
|
"learning_rate": 5.316962299621808e-05,
|
|
"loss": 2.3383,
|
|
"step": 28680
|
|
},
|
|
{
|
|
"epoch": 9.060333254363105,
|
|
"grad_norm": 0.04284186987031212,
|
|
"learning_rate": 5.299234822361898e-05,
|
|
"loss": 2.3175,
|
|
"step": 28685
|
|
},
|
|
{
|
|
"epoch": 9.061912658927584,
|
|
"grad_norm": 0.04475347292548298,
|
|
"learning_rate": 5.281536142999621e-05,
|
|
"loss": 2.3799,
|
|
"step": 28690
|
|
},
|
|
{
|
|
"epoch": 9.063492063492063,
|
|
"grad_norm": 0.05419028033988098,
|
|
"learning_rate": 5.2638662669170276e-05,
|
|
"loss": 2.4104,
|
|
"step": 28695
|
|
},
|
|
{
|
|
"epoch": 9.065071468056543,
|
|
"grad_norm": 0.04581772934670987,
|
|
"learning_rate": 5.24622519948742e-05,
|
|
"loss": 2.3238,
|
|
"step": 28700
|
|
},
|
|
{
|
|
"epoch": 9.066650872621022,
|
|
"grad_norm": 0.044180637089044926,
|
|
"learning_rate": 5.2286129460753174e-05,
|
|
"loss": 2.4376,
|
|
"step": 28705
|
|
},
|
|
{
|
|
"epoch": 9.068230277185501,
|
|
"grad_norm": 0.04129274745161341,
|
|
"learning_rate": 5.211029512036514e-05,
|
|
"loss": 2.3849,
|
|
"step": 28710
|
|
},
|
|
{
|
|
"epoch": 9.06980968174998,
|
|
"grad_norm": 0.04136017192879712,
|
|
"learning_rate": 5.1934749027180206e-05,
|
|
"loss": 2.3897,
|
|
"step": 28715
|
|
},
|
|
{
|
|
"epoch": 9.07138908631446,
|
|
"grad_norm": 0.04171840858660408,
|
|
"learning_rate": 5.1759491234580794e-05,
|
|
"loss": 2.4476,
|
|
"step": 28720
|
|
},
|
|
{
|
|
"epoch": 9.072968490878939,
|
|
"grad_norm": 0.04140480744152659,
|
|
"learning_rate": 5.158452179586148e-05,
|
|
"loss": 2.3982,
|
|
"step": 28725
|
|
},
|
|
{
|
|
"epoch": 9.074547895443418,
|
|
"grad_norm": 0.046919517800492944,
|
|
"learning_rate": 5.1409840764229385e-05,
|
|
"loss": 2.3654,
|
|
"step": 28730
|
|
},
|
|
{
|
|
"epoch": 9.076127300007897,
|
|
"grad_norm": 0.045881106496820825,
|
|
"learning_rate": 5.1235448192804233e-05,
|
|
"loss": 2.4369,
|
|
"step": 28735
|
|
},
|
|
{
|
|
"epoch": 9.077706704572376,
|
|
"grad_norm": 0.04218174046617552,
|
|
"learning_rate": 5.106134413461738e-05,
|
|
"loss": 2.3918,
|
|
"step": 28740
|
|
},
|
|
{
|
|
"epoch": 9.079286109136856,
|
|
"grad_norm": 0.03948596879165074,
|
|
"learning_rate": 5.08875286426127e-05,
|
|
"loss": 2.3829,
|
|
"step": 28745
|
|
},
|
|
{
|
|
"epoch": 9.080865513701335,
|
|
"grad_norm": 0.054065235496467405,
|
|
"learning_rate": 5.071400176964669e-05,
|
|
"loss": 2.3362,
|
|
"step": 28750
|
|
},
|
|
{
|
|
"epoch": 9.082444918265814,
|
|
"grad_norm": 0.050348006713228734,
|
|
"learning_rate": 5.054076356848747e-05,
|
|
"loss": 2.2797,
|
|
"step": 28755
|
|
},
|
|
{
|
|
"epoch": 9.084024322830293,
|
|
"grad_norm": 0.039850146641120986,
|
|
"learning_rate": 5.03678140918159e-05,
|
|
"loss": 2.3972,
|
|
"step": 28760
|
|
},
|
|
{
|
|
"epoch": 9.085603727394773,
|
|
"grad_norm": 0.04094970802271905,
|
|
"learning_rate": 5.01951533922248e-05,
|
|
"loss": 2.3674,
|
|
"step": 28765
|
|
},
|
|
{
|
|
"epoch": 9.087183131959252,
|
|
"grad_norm": 0.044132106892893985,
|
|
"learning_rate": 5.0022781522218844e-05,
|
|
"loss": 2.432,
|
|
"step": 28770
|
|
},
|
|
{
|
|
"epoch": 9.088762536523731,
|
|
"grad_norm": 0.042469894371006324,
|
|
"learning_rate": 4.985069853421553e-05,
|
|
"loss": 2.4111,
|
|
"step": 28775
|
|
},
|
|
{
|
|
"epoch": 9.09034194108821,
|
|
"grad_norm": 0.04143817516641654,
|
|
"learning_rate": 4.9678904480544126e-05,
|
|
"loss": 2.3979,
|
|
"step": 28780
|
|
},
|
|
{
|
|
"epoch": 9.09192134565269,
|
|
"grad_norm": 0.04923096244123163,
|
|
"learning_rate": 4.950739941344606e-05,
|
|
"loss": 2.3604,
|
|
"step": 28785
|
|
},
|
|
{
|
|
"epoch": 9.093500750217169,
|
|
"grad_norm": 0.06095904883104004,
|
|
"learning_rate": 4.933618338507506e-05,
|
|
"loss": 2.3528,
|
|
"step": 28790
|
|
},
|
|
{
|
|
"epoch": 9.095080154781646,
|
|
"grad_norm": 0.04419883584848066,
|
|
"learning_rate": 4.916525644749659e-05,
|
|
"loss": 2.455,
|
|
"step": 28795
|
|
},
|
|
{
|
|
"epoch": 9.096659559346126,
|
|
"grad_norm": 0.04314787308438026,
|
|
"learning_rate": 4.899461865268873e-05,
|
|
"loss": 2.3835,
|
|
"step": 28800
|
|
},
|
|
{
|
|
"epoch": 9.098238963910605,
|
|
"grad_norm": 0.042573988537507504,
|
|
"learning_rate": 4.88242700525412e-05,
|
|
"loss": 2.3775,
|
|
"step": 28805
|
|
},
|
|
{
|
|
"epoch": 9.099818368475084,
|
|
"grad_norm": 0.04207029225332564,
|
|
"learning_rate": 4.86542106988559e-05,
|
|
"loss": 2.3409,
|
|
"step": 28810
|
|
},
|
|
{
|
|
"epoch": 9.101397773039563,
|
|
"grad_norm": 0.0471923165839834,
|
|
"learning_rate": 4.848444064334678e-05,
|
|
"loss": 2.3384,
|
|
"step": 28815
|
|
},
|
|
{
|
|
"epoch": 9.102977177604043,
|
|
"grad_norm": 0.04401213089781764,
|
|
"learning_rate": 4.831495993763968e-05,
|
|
"loss": 2.4127,
|
|
"step": 28820
|
|
},
|
|
{
|
|
"epoch": 9.104556582168522,
|
|
"grad_norm": 0.049263828477959626,
|
|
"learning_rate": 4.8145768633273024e-05,
|
|
"loss": 2.3605,
|
|
"step": 28825
|
|
},
|
|
{
|
|
"epoch": 9.106135986733001,
|
|
"grad_norm": 0.03914847392450638,
|
|
"learning_rate": 4.797686678169655e-05,
|
|
"loss": 2.4509,
|
|
"step": 28830
|
|
},
|
|
{
|
|
"epoch": 9.10771539129748,
|
|
"grad_norm": 0.044789539216387265,
|
|
"learning_rate": 4.780825443427206e-05,
|
|
"loss": 2.3187,
|
|
"step": 28835
|
|
},
|
|
{
|
|
"epoch": 9.10929479586196,
|
|
"grad_norm": 0.047710012261827764,
|
|
"learning_rate": 4.763993164227387e-05,
|
|
"loss": 2.3875,
|
|
"step": 28840
|
|
},
|
|
{
|
|
"epoch": 9.110874200426439,
|
|
"grad_norm": 0.042945195674350796,
|
|
"learning_rate": 4.747189845688749e-05,
|
|
"loss": 2.3657,
|
|
"step": 28845
|
|
},
|
|
{
|
|
"epoch": 9.112453604990918,
|
|
"grad_norm": 0.03912386907019885,
|
|
"learning_rate": 4.730415492921103e-05,
|
|
"loss": 2.4348,
|
|
"step": 28850
|
|
},
|
|
{
|
|
"epoch": 9.114033009555397,
|
|
"grad_norm": 0.047292153040255004,
|
|
"learning_rate": 4.7136701110254255e-05,
|
|
"loss": 2.3042,
|
|
"step": 28855
|
|
},
|
|
{
|
|
"epoch": 9.115612414119877,
|
|
"grad_norm": 0.04365479795989776,
|
|
"learning_rate": 4.6969537050938426e-05,
|
|
"loss": 2.3098,
|
|
"step": 28860
|
|
},
|
|
{
|
|
"epoch": 9.117191818684356,
|
|
"grad_norm": 0.040473229546279604,
|
|
"learning_rate": 4.680266280209744e-05,
|
|
"loss": 2.3938,
|
|
"step": 28865
|
|
},
|
|
{
|
|
"epoch": 9.118771223248835,
|
|
"grad_norm": 0.04031988130048521,
|
|
"learning_rate": 4.663607841447637e-05,
|
|
"loss": 2.3472,
|
|
"step": 28870
|
|
},
|
|
{
|
|
"epoch": 9.120350627813314,
|
|
"grad_norm": 0.040675601722479415,
|
|
"learning_rate": 4.64697839387328e-05,
|
|
"loss": 2.493,
|
|
"step": 28875
|
|
},
|
|
{
|
|
"epoch": 9.121930032377794,
|
|
"grad_norm": 0.04107817503894481,
|
|
"learning_rate": 4.6303779425435625e-05,
|
|
"loss": 2.3484,
|
|
"step": 28880
|
|
},
|
|
{
|
|
"epoch": 9.123509436942273,
|
|
"grad_norm": 0.04263272398412405,
|
|
"learning_rate": 4.613806492506567e-05,
|
|
"loss": 2.3788,
|
|
"step": 28885
|
|
},
|
|
{
|
|
"epoch": 9.125088841506752,
|
|
"grad_norm": 0.04711185197888553,
|
|
"learning_rate": 4.597264048801597e-05,
|
|
"loss": 2.5292,
|
|
"step": 28890
|
|
},
|
|
{
|
|
"epoch": 9.126668246071231,
|
|
"grad_norm": 0.0535459189393864,
|
|
"learning_rate": 4.580750616459084e-05,
|
|
"loss": 2.5002,
|
|
"step": 28895
|
|
},
|
|
{
|
|
"epoch": 9.12824765063571,
|
|
"grad_norm": 0.052533700604335064,
|
|
"learning_rate": 4.564266200500655e-05,
|
|
"loss": 2.3703,
|
|
"step": 28900
|
|
},
|
|
{
|
|
"epoch": 9.12982705520019,
|
|
"grad_norm": 0.05082432610328768,
|
|
"learning_rate": 4.547810805939112e-05,
|
|
"loss": 2.3579,
|
|
"step": 28905
|
|
},
|
|
{
|
|
"epoch": 9.13140645976467,
|
|
"grad_norm": 0.04111080342238679,
|
|
"learning_rate": 4.5313844377784406e-05,
|
|
"loss": 2.382,
|
|
"step": 28910
|
|
},
|
|
{
|
|
"epoch": 9.132985864329148,
|
|
"grad_norm": 0.03809082206329682,
|
|
"learning_rate": 4.514987101013801e-05,
|
|
"loss": 2.3631,
|
|
"step": 28915
|
|
},
|
|
{
|
|
"epoch": 9.134565268893628,
|
|
"grad_norm": 0.043926550953605356,
|
|
"learning_rate": 4.498618800631515e-05,
|
|
"loss": 2.3854,
|
|
"step": 28920
|
|
},
|
|
{
|
|
"epoch": 9.136144673458107,
|
|
"grad_norm": 0.045335341408915215,
|
|
"learning_rate": 4.482279541609069e-05,
|
|
"loss": 2.4797,
|
|
"step": 28925
|
|
},
|
|
{
|
|
"epoch": 9.137724078022586,
|
|
"grad_norm": 0.0399260035243842,
|
|
"learning_rate": 4.465969328915142e-05,
|
|
"loss": 2.3646,
|
|
"step": 28930
|
|
},
|
|
{
|
|
"epoch": 9.139303482587065,
|
|
"grad_norm": 0.0453549885770251,
|
|
"learning_rate": 4.449688167509547e-05,
|
|
"loss": 2.4305,
|
|
"step": 28935
|
|
},
|
|
{
|
|
"epoch": 9.140882887151545,
|
|
"grad_norm": 0.048491122344655156,
|
|
"learning_rate": 4.433436062343299e-05,
|
|
"loss": 2.3452,
|
|
"step": 28940
|
|
},
|
|
{
|
|
"epoch": 9.142462291716024,
|
|
"grad_norm": 0.10868317438250054,
|
|
"learning_rate": 4.417213018358579e-05,
|
|
"loss": 2.3084,
|
|
"step": 28945
|
|
},
|
|
{
|
|
"epoch": 9.144041696280503,
|
|
"grad_norm": 0.050474003875059764,
|
|
"learning_rate": 4.401019040488652e-05,
|
|
"loss": 2.3653,
|
|
"step": 28950
|
|
},
|
|
{
|
|
"epoch": 9.14562110084498,
|
|
"grad_norm": 0.043321815901163206,
|
|
"learning_rate": 4.384854133658045e-05,
|
|
"loss": 2.3629,
|
|
"step": 28955
|
|
},
|
|
{
|
|
"epoch": 9.14720050540946,
|
|
"grad_norm": 0.05295842618436736,
|
|
"learning_rate": 4.368718302782382e-05,
|
|
"loss": 2.4341,
|
|
"step": 28960
|
|
},
|
|
{
|
|
"epoch": 9.148779909973939,
|
|
"grad_norm": 0.0497329088476317,
|
|
"learning_rate": 4.352611552768493e-05,
|
|
"loss": 2.3811,
|
|
"step": 28965
|
|
},
|
|
{
|
|
"epoch": 9.150359314538418,
|
|
"grad_norm": 0.06417418384325152,
|
|
"learning_rate": 4.336533888514327e-05,
|
|
"loss": 2.3073,
|
|
"step": 28970
|
|
},
|
|
{
|
|
"epoch": 9.151938719102898,
|
|
"grad_norm": 0.04820864210283732,
|
|
"learning_rate": 4.320485314908973e-05,
|
|
"loss": 2.3611,
|
|
"step": 28975
|
|
},
|
|
{
|
|
"epoch": 9.153518123667377,
|
|
"grad_norm": 0.04434132639117964,
|
|
"learning_rate": 4.304465836832738e-05,
|
|
"loss": 2.3873,
|
|
"step": 28980
|
|
},
|
|
{
|
|
"epoch": 9.155097528231856,
|
|
"grad_norm": 0.04215533152988047,
|
|
"learning_rate": 4.2884754591570264e-05,
|
|
"loss": 2.3061,
|
|
"step": 28985
|
|
},
|
|
{
|
|
"epoch": 9.156676932796335,
|
|
"grad_norm": 0.04633299675860233,
|
|
"learning_rate": 4.272514186744414e-05,
|
|
"loss": 2.4238,
|
|
"step": 28990
|
|
},
|
|
{
|
|
"epoch": 9.158256337360815,
|
|
"grad_norm": 0.04476402273132824,
|
|
"learning_rate": 4.256582024448608e-05,
|
|
"loss": 2.5064,
|
|
"step": 28995
|
|
},
|
|
{
|
|
"epoch": 9.159835741925294,
|
|
"grad_norm": 0.04526344305115905,
|
|
"learning_rate": 4.240678977114487e-05,
|
|
"loss": 2.3035,
|
|
"step": 29000
|
|
},
|
|
{
|
|
"epoch": 9.161415146489773,
|
|
"grad_norm": 0.041130237054635116,
|
|
"learning_rate": 4.224805049578073e-05,
|
|
"loss": 2.4096,
|
|
"step": 29005
|
|
},
|
|
{
|
|
"epoch": 9.162994551054252,
|
|
"grad_norm": 0.04590940137847429,
|
|
"learning_rate": 4.208960246666505e-05,
|
|
"loss": 2.3711,
|
|
"step": 29010
|
|
},
|
|
{
|
|
"epoch": 9.164573955618732,
|
|
"grad_norm": 0.044620038541742534,
|
|
"learning_rate": 4.1931445731981044e-05,
|
|
"loss": 2.4342,
|
|
"step": 29015
|
|
},
|
|
{
|
|
"epoch": 9.16615336018321,
|
|
"grad_norm": 0.04449040968754132,
|
|
"learning_rate": 4.177358033982326e-05,
|
|
"loss": 2.3963,
|
|
"step": 29020
|
|
},
|
|
{
|
|
"epoch": 9.16773276474769,
|
|
"grad_norm": 0.04514262158130095,
|
|
"learning_rate": 4.1616006338197175e-05,
|
|
"loss": 2.4084,
|
|
"step": 29025
|
|
},
|
|
{
|
|
"epoch": 9.16931216931217,
|
|
"grad_norm": 0.04054822044176869,
|
|
"learning_rate": 4.145872377502047e-05,
|
|
"loss": 2.3895,
|
|
"step": 29030
|
|
},
|
|
{
|
|
"epoch": 9.170891573876649,
|
|
"grad_norm": 0.039526620752448054,
|
|
"learning_rate": 4.1301732698121654e-05,
|
|
"loss": 2.3139,
|
|
"step": 29035
|
|
},
|
|
{
|
|
"epoch": 9.172470978441128,
|
|
"grad_norm": 0.03928310439499334,
|
|
"learning_rate": 4.114503315524043e-05,
|
|
"loss": 2.4264,
|
|
"step": 29040
|
|
},
|
|
{
|
|
"epoch": 9.174050383005607,
|
|
"grad_norm": 0.045551796789566175,
|
|
"learning_rate": 4.098862519402846e-05,
|
|
"loss": 2.2744,
|
|
"step": 29045
|
|
},
|
|
{
|
|
"epoch": 9.175629787570086,
|
|
"grad_norm": 0.0399422783979937,
|
|
"learning_rate": 4.0832508862048145e-05,
|
|
"loss": 2.3409,
|
|
"step": 29050
|
|
},
|
|
{
|
|
"epoch": 9.177209192134566,
|
|
"grad_norm": 0.04924591331646857,
|
|
"learning_rate": 4.067668420677373e-05,
|
|
"loss": 2.3838,
|
|
"step": 29055
|
|
},
|
|
{
|
|
"epoch": 9.178788596699045,
|
|
"grad_norm": 0.044755165729311416,
|
|
"learning_rate": 4.052115127559031e-05,
|
|
"loss": 2.3797,
|
|
"step": 29060
|
|
},
|
|
{
|
|
"epoch": 9.180368001263524,
|
|
"grad_norm": 0.04045754005435543,
|
|
"learning_rate": 4.036591011579438e-05,
|
|
"loss": 2.3068,
|
|
"step": 29065
|
|
},
|
|
{
|
|
"epoch": 9.181947405828003,
|
|
"grad_norm": 0.04275806283767011,
|
|
"learning_rate": 4.0210960774594075e-05,
|
|
"loss": 2.4139,
|
|
"step": 29070
|
|
},
|
|
{
|
|
"epoch": 9.183526810392483,
|
|
"grad_norm": 0.04585594079134387,
|
|
"learning_rate": 4.005630329910825e-05,
|
|
"loss": 2.4098,
|
|
"step": 29075
|
|
},
|
|
{
|
|
"epoch": 9.185106214956962,
|
|
"grad_norm": 0.0638290729672531,
|
|
"learning_rate": 3.990193773636752e-05,
|
|
"loss": 2.3453,
|
|
"step": 29080
|
|
},
|
|
{
|
|
"epoch": 9.186685619521441,
|
|
"grad_norm": 0.04527464135500821,
|
|
"learning_rate": 3.974786413331311e-05,
|
|
"loss": 2.4506,
|
|
"step": 29085
|
|
},
|
|
{
|
|
"epoch": 9.18826502408592,
|
|
"grad_norm": 0.039989221818523775,
|
|
"learning_rate": 3.9594082536797974e-05,
|
|
"loss": 2.3661,
|
|
"step": 29090
|
|
},
|
|
{
|
|
"epoch": 9.1898444286504,
|
|
"grad_norm": 0.043090513322247545,
|
|
"learning_rate": 3.9440592993586264e-05,
|
|
"loss": 2.336,
|
|
"step": 29095
|
|
},
|
|
{
|
|
"epoch": 9.191423833214879,
|
|
"grad_norm": 0.04200432774320849,
|
|
"learning_rate": 3.928739555035288e-05,
|
|
"loss": 2.3503,
|
|
"step": 29100
|
|
},
|
|
{
|
|
"epoch": 9.193003237779358,
|
|
"grad_norm": 0.03999808765273191,
|
|
"learning_rate": 3.913449025368443e-05,
|
|
"loss": 2.3326,
|
|
"step": 29105
|
|
},
|
|
{
|
|
"epoch": 9.194582642343835,
|
|
"grad_norm": 0.03907144195784045,
|
|
"learning_rate": 3.898187715007839e-05,
|
|
"loss": 2.3648,
|
|
"step": 29110
|
|
},
|
|
{
|
|
"epoch": 9.196162046908315,
|
|
"grad_norm": 0.04386079230797623,
|
|
"learning_rate": 3.882955628594331e-05,
|
|
"loss": 2.4088,
|
|
"step": 29115
|
|
},
|
|
{
|
|
"epoch": 9.197741451472794,
|
|
"grad_norm": 0.053581209390653715,
|
|
"learning_rate": 3.867752770759914e-05,
|
|
"loss": 2.3855,
|
|
"step": 29120
|
|
},
|
|
{
|
|
"epoch": 9.199320856037273,
|
|
"grad_norm": 0.05346550104521183,
|
|
"learning_rate": 3.8525791461276774e-05,
|
|
"loss": 2.4107,
|
|
"step": 29125
|
|
},
|
|
{
|
|
"epoch": 9.200900260601752,
|
|
"grad_norm": 0.04310942027466358,
|
|
"learning_rate": 3.837434759311809e-05,
|
|
"loss": 2.3684,
|
|
"step": 29130
|
|
},
|
|
{
|
|
"epoch": 9.202479665166232,
|
|
"grad_norm": 0.04058309001451409,
|
|
"learning_rate": 3.822319614917647e-05,
|
|
"loss": 2.3326,
|
|
"step": 29135
|
|
},
|
|
{
|
|
"epoch": 9.204059069730711,
|
|
"grad_norm": 0.04102981134601642,
|
|
"learning_rate": 3.807233717541569e-05,
|
|
"loss": 2.3551,
|
|
"step": 29140
|
|
},
|
|
{
|
|
"epoch": 9.20563847429519,
|
|
"grad_norm": 0.047155817064088196,
|
|
"learning_rate": 3.792177071771141e-05,
|
|
"loss": 2.318,
|
|
"step": 29145
|
|
},
|
|
{
|
|
"epoch": 9.20721787885967,
|
|
"grad_norm": 0.04273745785676265,
|
|
"learning_rate": 3.777149682184977e-05,
|
|
"loss": 2.3615,
|
|
"step": 29150
|
|
},
|
|
{
|
|
"epoch": 9.208797283424149,
|
|
"grad_norm": 0.0430157506013178,
|
|
"learning_rate": 3.7621515533527995e-05,
|
|
"loss": 2.3555,
|
|
"step": 29155
|
|
},
|
|
{
|
|
"epoch": 9.210376687988628,
|
|
"grad_norm": 0.05222183607969487,
|
|
"learning_rate": 3.747182689835471e-05,
|
|
"loss": 2.3054,
|
|
"step": 29160
|
|
},
|
|
{
|
|
"epoch": 9.211956092553107,
|
|
"grad_norm": 0.04307376017354491,
|
|
"learning_rate": 3.732243096184895e-05,
|
|
"loss": 2.272,
|
|
"step": 29165
|
|
},
|
|
{
|
|
"epoch": 9.213535497117586,
|
|
"grad_norm": 0.044373358610198824,
|
|
"learning_rate": 3.717332776944138e-05,
|
|
"loss": 2.3317,
|
|
"step": 29170
|
|
},
|
|
{
|
|
"epoch": 9.215114901682066,
|
|
"grad_norm": 0.039858621168155556,
|
|
"learning_rate": 3.702451736647305e-05,
|
|
"loss": 2.4248,
|
|
"step": 29175
|
|
},
|
|
{
|
|
"epoch": 9.216694306246545,
|
|
"grad_norm": 0.04569387957148044,
|
|
"learning_rate": 3.6875999798196336e-05,
|
|
"loss": 2.4086,
|
|
"step": 29180
|
|
},
|
|
{
|
|
"epoch": 9.218273710811024,
|
|
"grad_norm": 0.03971019782674811,
|
|
"learning_rate": 3.6727775109774544e-05,
|
|
"loss": 2.3534,
|
|
"step": 29185
|
|
},
|
|
{
|
|
"epoch": 9.219853115375503,
|
|
"grad_norm": 0.04636364959204572,
|
|
"learning_rate": 3.657984334628173e-05,
|
|
"loss": 2.4084,
|
|
"step": 29190
|
|
},
|
|
{
|
|
"epoch": 9.221432519939983,
|
|
"grad_norm": 0.04392572368215338,
|
|
"learning_rate": 3.643220455270324e-05,
|
|
"loss": 2.5082,
|
|
"step": 29195
|
|
},
|
|
{
|
|
"epoch": 9.223011924504462,
|
|
"grad_norm": 0.03935989812368449,
|
|
"learning_rate": 3.6284858773934946e-05,
|
|
"loss": 2.3977,
|
|
"step": 29200
|
|
},
|
|
{
|
|
"epoch": 9.224591329068941,
|
|
"grad_norm": 0.04332327640570466,
|
|
"learning_rate": 3.613780605478367e-05,
|
|
"loss": 2.2912,
|
|
"step": 29205
|
|
},
|
|
{
|
|
"epoch": 9.22617073363342,
|
|
"grad_norm": 0.04855215380149932,
|
|
"learning_rate": 3.599104643996731e-05,
|
|
"loss": 2.389,
|
|
"step": 29210
|
|
},
|
|
{
|
|
"epoch": 9.2277501381979,
|
|
"grad_norm": 0.03795310592202636,
|
|
"learning_rate": 3.58445799741145e-05,
|
|
"loss": 2.3961,
|
|
"step": 29215
|
|
},
|
|
{
|
|
"epoch": 9.229329542762379,
|
|
"grad_norm": 0.05191156694604205,
|
|
"learning_rate": 3.569840670176483e-05,
|
|
"loss": 2.4241,
|
|
"step": 29220
|
|
},
|
|
{
|
|
"epoch": 9.230908947326858,
|
|
"grad_norm": 0.038684792584600135,
|
|
"learning_rate": 3.555252666736863e-05,
|
|
"loss": 2.4226,
|
|
"step": 29225
|
|
},
|
|
{
|
|
"epoch": 9.232488351891337,
|
|
"grad_norm": 0.04123486576408677,
|
|
"learning_rate": 3.540693991528676e-05,
|
|
"loss": 2.4001,
|
|
"step": 29230
|
|
},
|
|
{
|
|
"epoch": 9.234067756455817,
|
|
"grad_norm": 0.04405148701945233,
|
|
"learning_rate": 3.52616464897918e-05,
|
|
"loss": 2.4158,
|
|
"step": 29235
|
|
},
|
|
{
|
|
"epoch": 9.235647161020296,
|
|
"grad_norm": 0.04464129511567758,
|
|
"learning_rate": 3.511664643506618e-05,
|
|
"loss": 2.4193,
|
|
"step": 29240
|
|
},
|
|
{
|
|
"epoch": 9.237226565584775,
|
|
"grad_norm": 0.03677218014352938,
|
|
"learning_rate": 3.497193979520341e-05,
|
|
"loss": 2.3093,
|
|
"step": 29245
|
|
},
|
|
{
|
|
"epoch": 9.238805970149254,
|
|
"grad_norm": 0.04171266312585504,
|
|
"learning_rate": 3.4827526614208184e-05,
|
|
"loss": 2.4566,
|
|
"step": 29250
|
|
},
|
|
{
|
|
"epoch": 9.240385374713734,
|
|
"grad_norm": 0.04169852827156441,
|
|
"learning_rate": 3.468340693599547e-05,
|
|
"loss": 2.3478,
|
|
"step": 29255
|
|
},
|
|
{
|
|
"epoch": 9.241964779278213,
|
|
"grad_norm": 0.04334946368855338,
|
|
"learning_rate": 3.453958080439112e-05,
|
|
"loss": 2.3112,
|
|
"step": 29260
|
|
},
|
|
{
|
|
"epoch": 9.243544183842692,
|
|
"grad_norm": 0.03922094500882477,
|
|
"learning_rate": 3.43960482631317e-05,
|
|
"loss": 2.3882,
|
|
"step": 29265
|
|
},
|
|
{
|
|
"epoch": 9.24512358840717,
|
|
"grad_norm": 0.04519921017609258,
|
|
"learning_rate": 3.42528093558645e-05,
|
|
"loss": 2.316,
|
|
"step": 29270
|
|
},
|
|
{
|
|
"epoch": 9.246702992971649,
|
|
"grad_norm": 0.04997435216087838,
|
|
"learning_rate": 3.41098641261478e-05,
|
|
"loss": 2.3285,
|
|
"step": 29275
|
|
},
|
|
{
|
|
"epoch": 9.248282397536128,
|
|
"grad_norm": 0.039028010205086464,
|
|
"learning_rate": 3.396721261744995e-05,
|
|
"loss": 2.3983,
|
|
"step": 29280
|
|
},
|
|
{
|
|
"epoch": 9.249861802100607,
|
|
"grad_norm": 0.05505879845239007,
|
|
"learning_rate": 3.382485487315079e-05,
|
|
"loss": 2.4632,
|
|
"step": 29285
|
|
},
|
|
{
|
|
"epoch": 9.251441206665087,
|
|
"grad_norm": 0.04380356142493805,
|
|
"learning_rate": 3.3682790936540255e-05,
|
|
"loss": 2.3833,
|
|
"step": 29290
|
|
},
|
|
{
|
|
"epoch": 9.253020611229566,
|
|
"grad_norm": 0.04071937099385057,
|
|
"learning_rate": 3.354102085081878e-05,
|
|
"loss": 2.3134,
|
|
"step": 29295
|
|
},
|
|
{
|
|
"epoch": 9.254600015794045,
|
|
"grad_norm": 0.04483331538338372,
|
|
"learning_rate": 3.339954465909822e-05,
|
|
"loss": 2.4023,
|
|
"step": 29300
|
|
},
|
|
{
|
|
"epoch": 9.256179420358524,
|
|
"grad_norm": 0.05433343268438282,
|
|
"learning_rate": 3.325836240440028e-05,
|
|
"loss": 2.3841,
|
|
"step": 29305
|
|
},
|
|
{
|
|
"epoch": 9.257758824923004,
|
|
"grad_norm": 0.06271809829545318,
|
|
"learning_rate": 3.31174741296576e-05,
|
|
"loss": 2.3909,
|
|
"step": 29310
|
|
},
|
|
{
|
|
"epoch": 9.259338229487483,
|
|
"grad_norm": 0.055331123105307564,
|
|
"learning_rate": 3.297687987771359e-05,
|
|
"loss": 2.3095,
|
|
"step": 29315
|
|
},
|
|
{
|
|
"epoch": 9.260917634051962,
|
|
"grad_norm": 0.06144956409274985,
|
|
"learning_rate": 3.28365796913217e-05,
|
|
"loss": 2.41,
|
|
"step": 29320
|
|
},
|
|
{
|
|
"epoch": 9.262497038616441,
|
|
"grad_norm": 0.049214960201155206,
|
|
"learning_rate": 3.269657361314671e-05,
|
|
"loss": 2.4013,
|
|
"step": 29325
|
|
},
|
|
{
|
|
"epoch": 9.26407644318092,
|
|
"grad_norm": 0.042137070485659386,
|
|
"learning_rate": 3.255686168576333e-05,
|
|
"loss": 2.4245,
|
|
"step": 29330
|
|
},
|
|
{
|
|
"epoch": 9.2656558477454,
|
|
"grad_norm": 0.03725081325722173,
|
|
"learning_rate": 3.241744395165713e-05,
|
|
"loss": 2.3898,
|
|
"step": 29335
|
|
},
|
|
{
|
|
"epoch": 9.267235252309879,
|
|
"grad_norm": 0.045664163663368236,
|
|
"learning_rate": 3.227832045322432e-05,
|
|
"loss": 2.3903,
|
|
"step": 29340
|
|
},
|
|
{
|
|
"epoch": 9.268814656874358,
|
|
"grad_norm": 0.04055654534671299,
|
|
"learning_rate": 3.213949123277127e-05,
|
|
"loss": 2.4562,
|
|
"step": 29345
|
|
},
|
|
{
|
|
"epoch": 9.270394061438838,
|
|
"grad_norm": 0.0421780986676158,
|
|
"learning_rate": 3.200095633251499e-05,
|
|
"loss": 2.4214,
|
|
"step": 29350
|
|
},
|
|
{
|
|
"epoch": 9.271973466003317,
|
|
"grad_norm": 0.040088304665845904,
|
|
"learning_rate": 3.186271579458333e-05,
|
|
"loss": 2.3567,
|
|
"step": 29355
|
|
},
|
|
{
|
|
"epoch": 9.273552870567796,
|
|
"grad_norm": 0.03988299510997113,
|
|
"learning_rate": 3.172476966101401e-05,
|
|
"loss": 2.4537,
|
|
"step": 29360
|
|
},
|
|
{
|
|
"epoch": 9.275132275132275,
|
|
"grad_norm": 0.03951477475300885,
|
|
"learning_rate": 3.15871179737558e-05,
|
|
"loss": 2.4638,
|
|
"step": 29365
|
|
},
|
|
{
|
|
"epoch": 9.276711679696755,
|
|
"grad_norm": 0.03652875104171294,
|
|
"learning_rate": 3.144976077466766e-05,
|
|
"loss": 2.3528,
|
|
"step": 29370
|
|
},
|
|
{
|
|
"epoch": 9.278291084261234,
|
|
"grad_norm": 0.042400024464408444,
|
|
"learning_rate": 3.1312698105519065e-05,
|
|
"loss": 2.4876,
|
|
"step": 29375
|
|
},
|
|
{
|
|
"epoch": 9.279870488825713,
|
|
"grad_norm": 0.06535280970340411,
|
|
"learning_rate": 3.117593000798991e-05,
|
|
"loss": 2.4615,
|
|
"step": 29380
|
|
},
|
|
{
|
|
"epoch": 9.281449893390192,
|
|
"grad_norm": 0.06965542532897992,
|
|
"learning_rate": 3.1039456523670354e-05,
|
|
"loss": 2.394,
|
|
"step": 29385
|
|
},
|
|
{
|
|
"epoch": 9.283029297954672,
|
|
"grad_norm": 0.04933183325778829,
|
|
"learning_rate": 3.0903277694061206e-05,
|
|
"loss": 2.5109,
|
|
"step": 29390
|
|
},
|
|
{
|
|
"epoch": 9.28460870251915,
|
|
"grad_norm": 0.07329883949860261,
|
|
"learning_rate": 3.0767393560573676e-05,
|
|
"loss": 2.4093,
|
|
"step": 29395
|
|
},
|
|
{
|
|
"epoch": 9.28618810708363,
|
|
"grad_norm": 0.043019979665248294,
|
|
"learning_rate": 3.0631804164529155e-05,
|
|
"loss": 2.3237,
|
|
"step": 29400
|
|
},
|
|
{
|
|
"epoch": 9.28776751164811,
|
|
"grad_norm": 0.04286309010098627,
|
|
"learning_rate": 3.0496509547159546e-05,
|
|
"loss": 2.3888,
|
|
"step": 29405
|
|
},
|
|
{
|
|
"epoch": 9.289346916212589,
|
|
"grad_norm": 0.05463231724209813,
|
|
"learning_rate": 3.0361509749606942e-05,
|
|
"loss": 2.4998,
|
|
"step": 29410
|
|
},
|
|
{
|
|
"epoch": 9.290926320777068,
|
|
"grad_norm": 0.04574377988326568,
|
|
"learning_rate": 3.022680481292406e-05,
|
|
"loss": 2.5221,
|
|
"step": 29415
|
|
},
|
|
{
|
|
"epoch": 9.292505725341547,
|
|
"grad_norm": 0.055234541516584426,
|
|
"learning_rate": 3.0092394778073796e-05,
|
|
"loss": 2.361,
|
|
"step": 29420
|
|
},
|
|
{
|
|
"epoch": 9.294085129906026,
|
|
"grad_norm": 0.04377149950342692,
|
|
"learning_rate": 2.9958279685929347e-05,
|
|
"loss": 2.3823,
|
|
"step": 29425
|
|
},
|
|
{
|
|
"epoch": 9.295664534470504,
|
|
"grad_norm": 0.04641116740681584,
|
|
"learning_rate": 2.982445957727431e-05,
|
|
"loss": 2.3346,
|
|
"step": 29430
|
|
},
|
|
{
|
|
"epoch": 9.297243939034983,
|
|
"grad_norm": 0.056468153318240545,
|
|
"learning_rate": 2.969093449280258e-05,
|
|
"loss": 2.4519,
|
|
"step": 29435
|
|
},
|
|
{
|
|
"epoch": 9.298823343599462,
|
|
"grad_norm": 0.05573138933765895,
|
|
"learning_rate": 2.9557704473118117e-05,
|
|
"loss": 2.4091,
|
|
"step": 29440
|
|
},
|
|
{
|
|
"epoch": 9.300402748163942,
|
|
"grad_norm": 0.04510163972553918,
|
|
"learning_rate": 2.9424769558735297e-05,
|
|
"loss": 2.3855,
|
|
"step": 29445
|
|
},
|
|
{
|
|
"epoch": 9.30198215272842,
|
|
"grad_norm": 0.040779117496225474,
|
|
"learning_rate": 2.9292129790079004e-05,
|
|
"loss": 2.3881,
|
|
"step": 29450
|
|
},
|
|
{
|
|
"epoch": 9.3035615572929,
|
|
"grad_norm": 0.05090618230006451,
|
|
"learning_rate": 2.915978520748397e-05,
|
|
"loss": 2.3817,
|
|
"step": 29455
|
|
},
|
|
{
|
|
"epoch": 9.30514096185738,
|
|
"grad_norm": 0.07942568741459298,
|
|
"learning_rate": 2.9027735851195337e-05,
|
|
"loss": 2.3227,
|
|
"step": 29460
|
|
},
|
|
{
|
|
"epoch": 9.306720366421859,
|
|
"grad_norm": 0.04441821545230185,
|
|
"learning_rate": 2.889598176136865e-05,
|
|
"loss": 2.3473,
|
|
"step": 29465
|
|
},
|
|
{
|
|
"epoch": 9.308299770986338,
|
|
"grad_norm": 0.0642102432950859,
|
|
"learning_rate": 2.8764522978069197e-05,
|
|
"loss": 2.3395,
|
|
"step": 29470
|
|
},
|
|
{
|
|
"epoch": 9.309879175550817,
|
|
"grad_norm": 0.04734207881890283,
|
|
"learning_rate": 2.8633359541272997e-05,
|
|
"loss": 2.4116,
|
|
"step": 29475
|
|
},
|
|
{
|
|
"epoch": 9.311458580115296,
|
|
"grad_norm": 0.06115612128620437,
|
|
"learning_rate": 2.8502491490865922e-05,
|
|
"loss": 2.4584,
|
|
"step": 29480
|
|
},
|
|
{
|
|
"epoch": 9.313037984679776,
|
|
"grad_norm": 0.060420910808885335,
|
|
"learning_rate": 2.8371918866644143e-05,
|
|
"loss": 2.4345,
|
|
"step": 29485
|
|
},
|
|
{
|
|
"epoch": 9.314617389244255,
|
|
"grad_norm": 0.05433250871228697,
|
|
"learning_rate": 2.8241641708313894e-05,
|
|
"loss": 2.3294,
|
|
"step": 29490
|
|
},
|
|
{
|
|
"epoch": 9.316196793808734,
|
|
"grad_norm": 0.03906077732226694,
|
|
"learning_rate": 2.8111660055491705e-05,
|
|
"loss": 2.4747,
|
|
"step": 29495
|
|
},
|
|
{
|
|
"epoch": 9.317776198373213,
|
|
"grad_norm": 0.04805529604126894,
|
|
"learning_rate": 2.7981973947704077e-05,
|
|
"loss": 2.2703,
|
|
"step": 29500
|
|
},
|
|
{
|
|
"epoch": 9.319355602937692,
|
|
"grad_norm": 0.039849712288317914,
|
|
"learning_rate": 2.785258342438779e-05,
|
|
"loss": 2.3317,
|
|
"step": 29505
|
|
},
|
|
{
|
|
"epoch": 9.320935007502172,
|
|
"grad_norm": 0.04634429218917224,
|
|
"learning_rate": 2.7723488524889594e-05,
|
|
"loss": 2.3926,
|
|
"step": 29510
|
|
},
|
|
{
|
|
"epoch": 9.322514412066651,
|
|
"grad_norm": 0.03917190970641901,
|
|
"learning_rate": 2.7594689288466535e-05,
|
|
"loss": 2.4679,
|
|
"step": 29515
|
|
},
|
|
{
|
|
"epoch": 9.32409381663113,
|
|
"grad_norm": 0.06671558758633714,
|
|
"learning_rate": 2.7466185754285723e-05,
|
|
"loss": 2.4398,
|
|
"step": 29520
|
|
},
|
|
{
|
|
"epoch": 9.32567322119561,
|
|
"grad_norm": 0.04498801475803708,
|
|
"learning_rate": 2.733797796142401e-05,
|
|
"loss": 2.428,
|
|
"step": 29525
|
|
},
|
|
{
|
|
"epoch": 9.327252625760089,
|
|
"grad_norm": 0.045553744363332226,
|
|
"learning_rate": 2.7210065948868767e-05,
|
|
"loss": 2.3575,
|
|
"step": 29530
|
|
},
|
|
{
|
|
"epoch": 9.328832030324568,
|
|
"grad_norm": 0.04375299099707137,
|
|
"learning_rate": 2.708244975551699e-05,
|
|
"loss": 2.5259,
|
|
"step": 29535
|
|
},
|
|
{
|
|
"epoch": 9.330411434889047,
|
|
"grad_norm": 0.05064363637638876,
|
|
"learning_rate": 2.6955129420176194e-05,
|
|
"loss": 2.3011,
|
|
"step": 29540
|
|
},
|
|
{
|
|
"epoch": 9.331990839453526,
|
|
"grad_norm": 0.04416609196413431,
|
|
"learning_rate": 2.682810498156363e-05,
|
|
"loss": 2.3921,
|
|
"step": 29545
|
|
},
|
|
{
|
|
"epoch": 9.333570244018006,
|
|
"grad_norm": 0.09097704405726886,
|
|
"learning_rate": 2.6701376478306392e-05,
|
|
"loss": 2.4306,
|
|
"step": 29550
|
|
},
|
|
{
|
|
"epoch": 9.335149648582485,
|
|
"grad_norm": 0.04292206993219541,
|
|
"learning_rate": 2.6574943948942222e-05,
|
|
"loss": 2.3514,
|
|
"step": 29555
|
|
},
|
|
{
|
|
"epoch": 9.336729053146964,
|
|
"grad_norm": 0.04708392703843058,
|
|
"learning_rate": 2.644880743191802e-05,
|
|
"loss": 2.3165,
|
|
"step": 29560
|
|
},
|
|
{
|
|
"epoch": 9.338308457711443,
|
|
"grad_norm": 0.049826577755894406,
|
|
"learning_rate": 2.6322966965591443e-05,
|
|
"loss": 2.4197,
|
|
"step": 29565
|
|
},
|
|
{
|
|
"epoch": 9.339887862275923,
|
|
"grad_norm": 0.041587359271011315,
|
|
"learning_rate": 2.6197422588229546e-05,
|
|
"loss": 2.3115,
|
|
"step": 29570
|
|
},
|
|
{
|
|
"epoch": 9.341467266840402,
|
|
"grad_norm": 0.044478761158850064,
|
|
"learning_rate": 2.607217433800968e-05,
|
|
"loss": 2.3174,
|
|
"step": 29575
|
|
},
|
|
{
|
|
"epoch": 9.343046671404881,
|
|
"grad_norm": 0.05454092939613832,
|
|
"learning_rate": 2.594722225301893e-05,
|
|
"loss": 2.3909,
|
|
"step": 29580
|
|
},
|
|
{
|
|
"epoch": 9.344626075969359,
|
|
"grad_norm": 0.045706366906300894,
|
|
"learning_rate": 2.5822566371254574e-05,
|
|
"loss": 2.3744,
|
|
"step": 29585
|
|
},
|
|
{
|
|
"epoch": 9.346205480533838,
|
|
"grad_norm": 0.05969173282860256,
|
|
"learning_rate": 2.569820673062351e-05,
|
|
"loss": 2.3694,
|
|
"step": 29590
|
|
},
|
|
{
|
|
"epoch": 9.347784885098317,
|
|
"grad_norm": 0.04288615088936334,
|
|
"learning_rate": 2.5574143368942816e-05,
|
|
"loss": 2.4734,
|
|
"step": 29595
|
|
},
|
|
{
|
|
"epoch": 9.349364289662796,
|
|
"grad_norm": 0.04541704867615316,
|
|
"learning_rate": 2.5450376323939318e-05,
|
|
"loss": 2.4614,
|
|
"step": 29600
|
|
},
|
|
{
|
|
"epoch": 9.350943694227276,
|
|
"grad_norm": 0.056044125697114734,
|
|
"learning_rate": 2.532690563324991e-05,
|
|
"loss": 2.325,
|
|
"step": 29605
|
|
},
|
|
{
|
|
"epoch": 9.352523098791755,
|
|
"grad_norm": 0.04623682450196402,
|
|
"learning_rate": 2.520373133442111e-05,
|
|
"loss": 2.2482,
|
|
"step": 29610
|
|
},
|
|
{
|
|
"epoch": 9.354102503356234,
|
|
"grad_norm": 0.04593035270856986,
|
|
"learning_rate": 2.5080853464909514e-05,
|
|
"loss": 2.3643,
|
|
"step": 29615
|
|
},
|
|
{
|
|
"epoch": 9.355681907920713,
|
|
"grad_norm": 0.05682904007841541,
|
|
"learning_rate": 2.4958272062081343e-05,
|
|
"loss": 2.3752,
|
|
"step": 29620
|
|
},
|
|
{
|
|
"epoch": 9.357261312485193,
|
|
"grad_norm": 0.046275985250694325,
|
|
"learning_rate": 2.4835987163212893e-05,
|
|
"loss": 2.3326,
|
|
"step": 29625
|
|
},
|
|
{
|
|
"epoch": 9.358840717049672,
|
|
"grad_norm": 0.0429234631915372,
|
|
"learning_rate": 2.47139988054903e-05,
|
|
"loss": 2.3498,
|
|
"step": 29630
|
|
},
|
|
{
|
|
"epoch": 9.360420121614151,
|
|
"grad_norm": 0.04821878012848691,
|
|
"learning_rate": 2.4592307026009452e-05,
|
|
"loss": 2.3521,
|
|
"step": 29635
|
|
},
|
|
{
|
|
"epoch": 9.36199952617863,
|
|
"grad_norm": 0.054843210641088026,
|
|
"learning_rate": 2.4470911861775857e-05,
|
|
"loss": 2.4585,
|
|
"step": 29640
|
|
},
|
|
{
|
|
"epoch": 9.36357893074311,
|
|
"grad_norm": 0.057313655789485635,
|
|
"learning_rate": 2.434981334970532e-05,
|
|
"loss": 2.3287,
|
|
"step": 29645
|
|
},
|
|
{
|
|
"epoch": 9.365158335307589,
|
|
"grad_norm": 0.04453559734930796,
|
|
"learning_rate": 2.4229011526622712e-05,
|
|
"loss": 2.3613,
|
|
"step": 29650
|
|
},
|
|
{
|
|
"epoch": 9.366737739872068,
|
|
"grad_norm": 0.05319782786638664,
|
|
"learning_rate": 2.4108506429263542e-05,
|
|
"loss": 2.394,
|
|
"step": 29655
|
|
},
|
|
{
|
|
"epoch": 9.368317144436547,
|
|
"grad_norm": 0.04118834226833598,
|
|
"learning_rate": 2.3988298094272277e-05,
|
|
"loss": 2.3939,
|
|
"step": 29660
|
|
},
|
|
{
|
|
"epoch": 9.369896549001027,
|
|
"grad_norm": 0.08576726726639043,
|
|
"learning_rate": 2.386838655820378e-05,
|
|
"loss": 2.3874,
|
|
"step": 29665
|
|
},
|
|
{
|
|
"epoch": 9.371475953565506,
|
|
"grad_norm": 0.044162918232095824,
|
|
"learning_rate": 2.3748771857522223e-05,
|
|
"loss": 2.4165,
|
|
"step": 29670
|
|
},
|
|
{
|
|
"epoch": 9.373055358129985,
|
|
"grad_norm": 0.04094173941087508,
|
|
"learning_rate": 2.3629454028601615e-05,
|
|
"loss": 2.3924,
|
|
"step": 29675
|
|
},
|
|
{
|
|
"epoch": 9.374634762694464,
|
|
"grad_norm": 0.05818302960770649,
|
|
"learning_rate": 2.3510433107725824e-05,
|
|
"loss": 2.362,
|
|
"step": 29680
|
|
},
|
|
{
|
|
"epoch": 9.376214167258944,
|
|
"grad_norm": 0.042235338314351285,
|
|
"learning_rate": 2.3391709131088455e-05,
|
|
"loss": 2.3209,
|
|
"step": 29685
|
|
},
|
|
{
|
|
"epoch": 9.377793571823423,
|
|
"grad_norm": 0.04954815858018991,
|
|
"learning_rate": 2.3273282134792517e-05,
|
|
"loss": 2.431,
|
|
"step": 29690
|
|
},
|
|
{
|
|
"epoch": 9.379372976387902,
|
|
"grad_norm": 0.03960092325835107,
|
|
"learning_rate": 2.3155152154851087e-05,
|
|
"loss": 2.2968,
|
|
"step": 29695
|
|
},
|
|
{
|
|
"epoch": 9.380952380952381,
|
|
"grad_norm": 0.05437119338882939,
|
|
"learning_rate": 2.303731922718666e-05,
|
|
"loss": 2.4298,
|
|
"step": 29700
|
|
},
|
|
{
|
|
"epoch": 9.38253178551686,
|
|
"grad_norm": 0.043076222301554494,
|
|
"learning_rate": 2.2919783387631456e-05,
|
|
"loss": 2.4017,
|
|
"step": 29705
|
|
},
|
|
{
|
|
"epoch": 9.38411119008134,
|
|
"grad_norm": 0.04124777112591901,
|
|
"learning_rate": 2.280254467192744e-05,
|
|
"loss": 2.4128,
|
|
"step": 29710
|
|
},
|
|
{
|
|
"epoch": 9.38569059464582,
|
|
"grad_norm": 0.04907795423437361,
|
|
"learning_rate": 2.2685603115725873e-05,
|
|
"loss": 2.4163,
|
|
"step": 29715
|
|
},
|
|
{
|
|
"epoch": 9.387269999210298,
|
|
"grad_norm": 0.06137536486453631,
|
|
"learning_rate": 2.256895875458831e-05,
|
|
"loss": 2.3463,
|
|
"step": 29720
|
|
},
|
|
{
|
|
"epoch": 9.388849403774778,
|
|
"grad_norm": 0.05648848069899127,
|
|
"learning_rate": 2.2452611623985485e-05,
|
|
"loss": 2.3269,
|
|
"step": 29725
|
|
},
|
|
{
|
|
"epoch": 9.390428808339257,
|
|
"grad_norm": 0.045017933640088825,
|
|
"learning_rate": 2.2336561759297656e-05,
|
|
"loss": 2.3745,
|
|
"step": 29730
|
|
},
|
|
{
|
|
"epoch": 9.392008212903736,
|
|
"grad_norm": 0.05507048607068748,
|
|
"learning_rate": 2.222080919581493e-05,
|
|
"loss": 2.3204,
|
|
"step": 29735
|
|
},
|
|
{
|
|
"epoch": 9.393587617468214,
|
|
"grad_norm": 0.043162768313010934,
|
|
"learning_rate": 2.2105353968736808e-05,
|
|
"loss": 2.3629,
|
|
"step": 29740
|
|
},
|
|
{
|
|
"epoch": 9.395167022032693,
|
|
"grad_norm": 0.043935727744640586,
|
|
"learning_rate": 2.1990196113172767e-05,
|
|
"loss": 2.446,
|
|
"step": 29745
|
|
},
|
|
{
|
|
"epoch": 9.396746426597172,
|
|
"grad_norm": 0.048094083771353345,
|
|
"learning_rate": 2.187533566414146e-05,
|
|
"loss": 2.3781,
|
|
"step": 29750
|
|
},
|
|
{
|
|
"epoch": 9.398325831161651,
|
|
"grad_norm": 0.04365144321456516,
|
|
"learning_rate": 2.176077265657106e-05,
|
|
"loss": 2.3454,
|
|
"step": 29755
|
|
},
|
|
{
|
|
"epoch": 9.39990523572613,
|
|
"grad_norm": 0.045399127811420394,
|
|
"learning_rate": 2.1646507125299588e-05,
|
|
"loss": 2.2808,
|
|
"step": 29760
|
|
},
|
|
{
|
|
"epoch": 9.40148464029061,
|
|
"grad_norm": 0.04207657274447779,
|
|
"learning_rate": 2.1532539105074357e-05,
|
|
"loss": 2.3258,
|
|
"step": 29765
|
|
},
|
|
{
|
|
"epoch": 9.403064044855089,
|
|
"grad_norm": 0.051074072509045616,
|
|
"learning_rate": 2.1418868630552426e-05,
|
|
"loss": 2.3527,
|
|
"step": 29770
|
|
},
|
|
{
|
|
"epoch": 9.404643449419568,
|
|
"grad_norm": 0.04342401351013079,
|
|
"learning_rate": 2.130549573630025e-05,
|
|
"loss": 2.3091,
|
|
"step": 29775
|
|
},
|
|
{
|
|
"epoch": 9.406222853984048,
|
|
"grad_norm": 0.044606635309908146,
|
|
"learning_rate": 2.1192420456793703e-05,
|
|
"loss": 2.3136,
|
|
"step": 29780
|
|
},
|
|
{
|
|
"epoch": 9.407802258548527,
|
|
"grad_norm": 0.04151635614695825,
|
|
"learning_rate": 2.1079642826418387e-05,
|
|
"loss": 2.4504,
|
|
"step": 29785
|
|
},
|
|
{
|
|
"epoch": 9.409381663113006,
|
|
"grad_norm": 0.04797609619965564,
|
|
"learning_rate": 2.0967162879469204e-05,
|
|
"loss": 2.452,
|
|
"step": 29790
|
|
},
|
|
{
|
|
"epoch": 9.410961067677485,
|
|
"grad_norm": 0.03965412747029897,
|
|
"learning_rate": 2.085498065015057e-05,
|
|
"loss": 2.473,
|
|
"step": 29795
|
|
},
|
|
{
|
|
"epoch": 9.412540472241965,
|
|
"grad_norm": 0.04644627128071307,
|
|
"learning_rate": 2.0743096172576414e-05,
|
|
"loss": 2.3485,
|
|
"step": 29800
|
|
},
|
|
{
|
|
"epoch": 9.414119876806444,
|
|
"grad_norm": 0.04676942187692444,
|
|
"learning_rate": 2.0631509480769862e-05,
|
|
"loss": 2.3853,
|
|
"step": 29805
|
|
},
|
|
{
|
|
"epoch": 9.415699281370923,
|
|
"grad_norm": 0.04158178213914722,
|
|
"learning_rate": 2.0520220608664098e-05,
|
|
"loss": 2.3735,
|
|
"step": 29810
|
|
},
|
|
{
|
|
"epoch": 9.417278685935402,
|
|
"grad_norm": 0.03858380553527941,
|
|
"learning_rate": 2.0409229590101163e-05,
|
|
"loss": 2.3907,
|
|
"step": 29815
|
|
},
|
|
{
|
|
"epoch": 9.418858090499882,
|
|
"grad_norm": 0.0731203280795725,
|
|
"learning_rate": 2.029853645883262e-05,
|
|
"loss": 2.5312,
|
|
"step": 29820
|
|
},
|
|
{
|
|
"epoch": 9.42043749506436,
|
|
"grad_norm": 0.03981733529130424,
|
|
"learning_rate": 2.0188141248519754e-05,
|
|
"loss": 2.2781,
|
|
"step": 29825
|
|
},
|
|
{
|
|
"epoch": 9.42201689962884,
|
|
"grad_norm": 0.04781066089100794,
|
|
"learning_rate": 2.0078043992732942e-05,
|
|
"loss": 2.458,
|
|
"step": 29830
|
|
},
|
|
{
|
|
"epoch": 9.42359630419332,
|
|
"grad_norm": 0.056305636259483774,
|
|
"learning_rate": 1.9968244724952067e-05,
|
|
"loss": 2.4044,
|
|
"step": 29835
|
|
},
|
|
{
|
|
"epoch": 9.425175708757799,
|
|
"grad_norm": 0.04500037877631552,
|
|
"learning_rate": 1.985874347856631e-05,
|
|
"loss": 2.3375,
|
|
"step": 29840
|
|
},
|
|
{
|
|
"epoch": 9.426755113322278,
|
|
"grad_norm": 0.03801018917288479,
|
|
"learning_rate": 1.9749540286874478e-05,
|
|
"loss": 2.4163,
|
|
"step": 29845
|
|
},
|
|
{
|
|
"epoch": 9.428334517886757,
|
|
"grad_norm": 0.03964796905422109,
|
|
"learning_rate": 1.9640635183084344e-05,
|
|
"loss": 2.3131,
|
|
"step": 29850
|
|
},
|
|
{
|
|
"epoch": 9.429913922451236,
|
|
"grad_norm": 0.04935019034574686,
|
|
"learning_rate": 1.953202820031341e-05,
|
|
"loss": 2.4528,
|
|
"step": 29855
|
|
},
|
|
{
|
|
"epoch": 9.431493327015716,
|
|
"grad_norm": 0.04734727391081135,
|
|
"learning_rate": 1.9423719371588265e-05,
|
|
"loss": 2.4603,
|
|
"step": 29860
|
|
},
|
|
{
|
|
"epoch": 9.433072731580195,
|
|
"grad_norm": 0.04823994120007205,
|
|
"learning_rate": 1.9315708729845116e-05,
|
|
"loss": 2.2735,
|
|
"step": 29865
|
|
},
|
|
{
|
|
"epoch": 9.434652136144674,
|
|
"grad_norm": 0.05545676647956338,
|
|
"learning_rate": 1.920799630792902e-05,
|
|
"loss": 2.3343,
|
|
"step": 29870
|
|
},
|
|
{
|
|
"epoch": 9.436231540709153,
|
|
"grad_norm": 0.04708308106014793,
|
|
"learning_rate": 1.910058213859489e-05,
|
|
"loss": 2.3726,
|
|
"step": 29875
|
|
},
|
|
{
|
|
"epoch": 9.437810945273633,
|
|
"grad_norm": 0.051773996714407994,
|
|
"learning_rate": 1.8993466254506486e-05,
|
|
"loss": 2.4484,
|
|
"step": 29880
|
|
},
|
|
{
|
|
"epoch": 9.439390349838112,
|
|
"grad_norm": 0.055360447135438084,
|
|
"learning_rate": 1.8886648688237307e-05,
|
|
"loss": 2.5286,
|
|
"step": 29885
|
|
},
|
|
{
|
|
"epoch": 9.440969754402591,
|
|
"grad_norm": 0.04011192475212031,
|
|
"learning_rate": 1.8780129472269704e-05,
|
|
"loss": 2.3998,
|
|
"step": 29890
|
|
},
|
|
{
|
|
"epoch": 9.44254915896707,
|
|
"grad_norm": 0.040613211830941014,
|
|
"learning_rate": 1.867390863899543e-05,
|
|
"loss": 2.3915,
|
|
"step": 29895
|
|
},
|
|
{
|
|
"epoch": 9.44412856353155,
|
|
"grad_norm": 0.050149446685664095,
|
|
"learning_rate": 1.8567986220715872e-05,
|
|
"loss": 2.3799,
|
|
"step": 29900
|
|
},
|
|
{
|
|
"epoch": 9.445707968096027,
|
|
"grad_norm": 0.04017383635390211,
|
|
"learning_rate": 1.846236224964093e-05,
|
|
"loss": 2.4791,
|
|
"step": 29905
|
|
},
|
|
{
|
|
"epoch": 9.447287372660506,
|
|
"grad_norm": 0.04618957395879647,
|
|
"learning_rate": 1.835703675789058e-05,
|
|
"loss": 2.4316,
|
|
"step": 29910
|
|
},
|
|
{
|
|
"epoch": 9.448866777224985,
|
|
"grad_norm": 0.039771449300195334,
|
|
"learning_rate": 1.8252009777493418e-05,
|
|
"loss": 2.3222,
|
|
"step": 29915
|
|
},
|
|
{
|
|
"epoch": 9.450446181789465,
|
|
"grad_norm": 0.04138208104877937,
|
|
"learning_rate": 1.8147281340387457e-05,
|
|
"loss": 2.3547,
|
|
"step": 29920
|
|
},
|
|
{
|
|
"epoch": 9.452025586353944,
|
|
"grad_norm": 0.04225855510943169,
|
|
"learning_rate": 1.8042851478420108e-05,
|
|
"loss": 2.4435,
|
|
"step": 29925
|
|
},
|
|
{
|
|
"epoch": 9.453604990918423,
|
|
"grad_norm": 0.042701158580847276,
|
|
"learning_rate": 1.7938720223347748e-05,
|
|
"loss": 2.4467,
|
|
"step": 29930
|
|
},
|
|
{
|
|
"epoch": 9.455184395482902,
|
|
"grad_norm": 0.05047940706705218,
|
|
"learning_rate": 1.7834887606835937e-05,
|
|
"loss": 2.4121,
|
|
"step": 29935
|
|
},
|
|
{
|
|
"epoch": 9.456763800047382,
|
|
"grad_norm": 0.044368724827039276,
|
|
"learning_rate": 1.773135366045964e-05,
|
|
"loss": 2.356,
|
|
"step": 29940
|
|
},
|
|
{
|
|
"epoch": 9.458343204611861,
|
|
"grad_norm": 0.04474235502172375,
|
|
"learning_rate": 1.7628118415702667e-05,
|
|
"loss": 2.2713,
|
|
"step": 29945
|
|
},
|
|
{
|
|
"epoch": 9.45992260917634,
|
|
"grad_norm": 0.04537155012918435,
|
|
"learning_rate": 1.7525181903958465e-05,
|
|
"loss": 2.3115,
|
|
"step": 29950
|
|
},
|
|
{
|
|
"epoch": 9.46150201374082,
|
|
"grad_norm": 0.045396921077155275,
|
|
"learning_rate": 1.7422544156529217e-05,
|
|
"loss": 2.3988,
|
|
"step": 29955
|
|
},
|
|
{
|
|
"epoch": 9.463081418305299,
|
|
"grad_norm": 0.04569536491545306,
|
|
"learning_rate": 1.7320205204626295e-05,
|
|
"loss": 2.4154,
|
|
"step": 29960
|
|
},
|
|
{
|
|
"epoch": 9.464660822869778,
|
|
"grad_norm": 0.04503139905374363,
|
|
"learning_rate": 1.7218165079370573e-05,
|
|
"loss": 2.3998,
|
|
"step": 29965
|
|
},
|
|
{
|
|
"epoch": 9.466240227434257,
|
|
"grad_norm": 0.04357632442196037,
|
|
"learning_rate": 1.7116423811791793e-05,
|
|
"loss": 2.3596,
|
|
"step": 29970
|
|
},
|
|
{
|
|
"epoch": 9.467819631998736,
|
|
"grad_norm": 0.0477858930960453,
|
|
"learning_rate": 1.7014981432828537e-05,
|
|
"loss": 2.4124,
|
|
"step": 29975
|
|
},
|
|
{
|
|
"epoch": 9.469399036563216,
|
|
"grad_norm": 0.050867574355163044,
|
|
"learning_rate": 1.6913837973329126e-05,
|
|
"loss": 2.5338,
|
|
"step": 29980
|
|
},
|
|
{
|
|
"epoch": 9.470978441127695,
|
|
"grad_norm": 0.04198565363155485,
|
|
"learning_rate": 1.6812993464050297e-05,
|
|
"loss": 2.3959,
|
|
"step": 29985
|
|
},
|
|
{
|
|
"epoch": 9.472557845692174,
|
|
"grad_norm": 0.041113056332670775,
|
|
"learning_rate": 1.6712447935658514e-05,
|
|
"loss": 2.4635,
|
|
"step": 29990
|
|
},
|
|
{
|
|
"epoch": 9.474137250256653,
|
|
"grad_norm": 0.041309350679928164,
|
|
"learning_rate": 1.661220141872877e-05,
|
|
"loss": 2.3399,
|
|
"step": 29995
|
|
},
|
|
{
|
|
"epoch": 9.475716654821133,
|
|
"grad_norm": 0.05830406789875503,
|
|
"learning_rate": 1.651225394374567e-05,
|
|
"loss": 2.4523,
|
|
"step": 30000
|
|
},
|
|
{
|
|
"epoch": 9.477296059385612,
|
|
"grad_norm": 0.05305332294843227,
|
|
"learning_rate": 1.6412605541102465e-05,
|
|
"loss": 2.3259,
|
|
"step": 30005
|
|
},
|
|
{
|
|
"epoch": 9.478875463950091,
|
|
"grad_norm": 0.0456174472825058,
|
|
"learning_rate": 1.631325624110158e-05,
|
|
"loss": 2.5383,
|
|
"step": 30010
|
|
},
|
|
{
|
|
"epoch": 9.48045486851457,
|
|
"grad_norm": 0.04176300384571954,
|
|
"learning_rate": 1.621420607395452e-05,
|
|
"loss": 2.4388,
|
|
"step": 30015
|
|
},
|
|
{
|
|
"epoch": 9.48203427307905,
|
|
"grad_norm": 0.04386143968655976,
|
|
"learning_rate": 1.611545506978185e-05,
|
|
"loss": 2.3695,
|
|
"step": 30020
|
|
},
|
|
{
|
|
"epoch": 9.483613677643529,
|
|
"grad_norm": 0.05561908224519766,
|
|
"learning_rate": 1.6017003258612993e-05,
|
|
"loss": 2.4921,
|
|
"step": 30025
|
|
},
|
|
{
|
|
"epoch": 9.485193082208008,
|
|
"grad_norm": 0.05537260677822884,
|
|
"learning_rate": 1.5918850670386677e-05,
|
|
"loss": 2.3573,
|
|
"step": 30030
|
|
},
|
|
{
|
|
"epoch": 9.486772486772487,
|
|
"grad_norm": 0.04378169445858685,
|
|
"learning_rate": 1.5820997334950348e-05,
|
|
"loss": 2.3794,
|
|
"step": 30035
|
|
},
|
|
{
|
|
"epoch": 9.488351891336967,
|
|
"grad_norm": 0.04942199234857001,
|
|
"learning_rate": 1.5723443282060657e-05,
|
|
"loss": 2.4008,
|
|
"step": 30040
|
|
},
|
|
{
|
|
"epoch": 9.489931295901446,
|
|
"grad_norm": 0.04756229203870128,
|
|
"learning_rate": 1.5626188541383202e-05,
|
|
"loss": 2.4094,
|
|
"step": 30045
|
|
},
|
|
{
|
|
"epoch": 9.491510700465925,
|
|
"grad_norm": 0.04332502222276905,
|
|
"learning_rate": 1.5529233142492437e-05,
|
|
"loss": 2.4333,
|
|
"step": 30050
|
|
},
|
|
{
|
|
"epoch": 9.493090105030404,
|
|
"grad_norm": 0.04305266382009812,
|
|
"learning_rate": 1.5432577114871893e-05,
|
|
"loss": 2.3677,
|
|
"step": 30055
|
|
},
|
|
{
|
|
"epoch": 9.494669509594882,
|
|
"grad_norm": 0.04441467026906564,
|
|
"learning_rate": 1.5336220487914053e-05,
|
|
"loss": 2.4181,
|
|
"step": 30060
|
|
},
|
|
{
|
|
"epoch": 9.496248914159361,
|
|
"grad_norm": 0.05774432747502679,
|
|
"learning_rate": 1.5240163290920483e-05,
|
|
"loss": 2.4699,
|
|
"step": 30065
|
|
},
|
|
{
|
|
"epoch": 9.49782831872384,
|
|
"grad_norm": 0.04548534785304647,
|
|
"learning_rate": 1.514440555310137e-05,
|
|
"loss": 2.4159,
|
|
"step": 30070
|
|
},
|
|
{
|
|
"epoch": 9.49940772328832,
|
|
"grad_norm": 0.04934305339107333,
|
|
"learning_rate": 1.5048947303576088e-05,
|
|
"loss": 2.2676,
|
|
"step": 30075
|
|
},
|
|
{
|
|
"epoch": 9.500987127852799,
|
|
"grad_norm": 0.04429559617729792,
|
|
"learning_rate": 1.4953788571372862e-05,
|
|
"loss": 2.4057,
|
|
"step": 30080
|
|
},
|
|
{
|
|
"epoch": 9.502566532417278,
|
|
"grad_norm": 0.03998151373318868,
|
|
"learning_rate": 1.4858929385428987e-05,
|
|
"loss": 2.4556,
|
|
"step": 30085
|
|
},
|
|
{
|
|
"epoch": 9.504145936981757,
|
|
"grad_norm": 0.05214201915349131,
|
|
"learning_rate": 1.476436977459039e-05,
|
|
"loss": 2.4301,
|
|
"step": 30090
|
|
},
|
|
{
|
|
"epoch": 9.505725341546237,
|
|
"grad_norm": 0.039208856656101625,
|
|
"learning_rate": 1.4670109767612184e-05,
|
|
"loss": 2.4322,
|
|
"step": 30095
|
|
},
|
|
{
|
|
"epoch": 9.507304746110716,
|
|
"grad_norm": 0.04334137454919302,
|
|
"learning_rate": 1.457614939315799e-05,
|
|
"loss": 2.3992,
|
|
"step": 30100
|
|
},
|
|
{
|
|
"epoch": 9.508884150675195,
|
|
"grad_norm": 0.041758109790759806,
|
|
"learning_rate": 1.4482488679800843e-05,
|
|
"loss": 2.3598,
|
|
"step": 30105
|
|
},
|
|
{
|
|
"epoch": 9.510463555239674,
|
|
"grad_norm": 0.042498191826742984,
|
|
"learning_rate": 1.4389127656022294e-05,
|
|
"loss": 2.4085,
|
|
"step": 30110
|
|
},
|
|
{
|
|
"epoch": 9.512042959804154,
|
|
"grad_norm": 0.0507269881752584,
|
|
"learning_rate": 1.4296066350212744e-05,
|
|
"loss": 2.4762,
|
|
"step": 30115
|
|
},
|
|
{
|
|
"epoch": 9.513622364368633,
|
|
"grad_norm": 0.03973664673653034,
|
|
"learning_rate": 1.4203304790671556e-05,
|
|
"loss": 2.3195,
|
|
"step": 30120
|
|
},
|
|
{
|
|
"epoch": 9.515201768933112,
|
|
"grad_norm": 0.039955040633668434,
|
|
"learning_rate": 1.4110843005606833e-05,
|
|
"loss": 2.3229,
|
|
"step": 30125
|
|
},
|
|
{
|
|
"epoch": 9.516781173497591,
|
|
"grad_norm": 0.05790493422855318,
|
|
"learning_rate": 1.401868102313586e-05,
|
|
"loss": 2.396,
|
|
"step": 30130
|
|
},
|
|
{
|
|
"epoch": 9.51836057806207,
|
|
"grad_norm": 0.0601991017071839,
|
|
"learning_rate": 1.392681887128433e-05,
|
|
"loss": 2.4457,
|
|
"step": 30135
|
|
},
|
|
{
|
|
"epoch": 9.51993998262655,
|
|
"grad_norm": 0.05021823124552835,
|
|
"learning_rate": 1.383525657798701e-05,
|
|
"loss": 2.3837,
|
|
"step": 30140
|
|
},
|
|
{
|
|
"epoch": 9.521519387191029,
|
|
"grad_norm": 0.04775413593498285,
|
|
"learning_rate": 1.3743994171087404e-05,
|
|
"loss": 2.4212,
|
|
"step": 30145
|
|
},
|
|
{
|
|
"epoch": 9.523098791755508,
|
|
"grad_norm": 0.04553946922627249,
|
|
"learning_rate": 1.3653031678337868e-05,
|
|
"loss": 2.2845,
|
|
"step": 30150
|
|
},
|
|
{
|
|
"epoch": 9.524678196319988,
|
|
"grad_norm": 0.05062828533969865,
|
|
"learning_rate": 1.3562369127399387e-05,
|
|
"loss": 2.4671,
|
|
"step": 30155
|
|
},
|
|
{
|
|
"epoch": 9.526257600884467,
|
|
"grad_norm": 0.046298147695143774,
|
|
"learning_rate": 1.3472006545841908e-05,
|
|
"loss": 2.4893,
|
|
"step": 30160
|
|
},
|
|
{
|
|
"epoch": 9.527837005448946,
|
|
"grad_norm": 0.04729088554594134,
|
|
"learning_rate": 1.3381943961144117e-05,
|
|
"loss": 2.4226,
|
|
"step": 30165
|
|
},
|
|
{
|
|
"epoch": 9.529416410013425,
|
|
"grad_norm": 0.048787859332196835,
|
|
"learning_rate": 1.3292181400693548e-05,
|
|
"loss": 2.3341,
|
|
"step": 30170
|
|
},
|
|
{
|
|
"epoch": 9.530995814577905,
|
|
"grad_norm": 0.04796829554852364,
|
|
"learning_rate": 1.3202718891786259e-05,
|
|
"loss": 2.2858,
|
|
"step": 30175
|
|
},
|
|
{
|
|
"epoch": 9.532575219142384,
|
|
"grad_norm": 0.041277624080704595,
|
|
"learning_rate": 1.3113556461627485e-05,
|
|
"loss": 2.3621,
|
|
"step": 30180
|
|
},
|
|
{
|
|
"epoch": 9.534154623706863,
|
|
"grad_norm": 0.03873505231492884,
|
|
"learning_rate": 1.302469413733065e-05,
|
|
"loss": 2.4327,
|
|
"step": 30185
|
|
},
|
|
{
|
|
"epoch": 9.535734028271342,
|
|
"grad_norm": 0.045494772722792585,
|
|
"learning_rate": 1.2936131945918472e-05,
|
|
"loss": 2.3949,
|
|
"step": 30190
|
|
},
|
|
{
|
|
"epoch": 9.537313432835822,
|
|
"grad_norm": 0.04382388795010529,
|
|
"learning_rate": 1.2847869914321964e-05,
|
|
"loss": 2.3093,
|
|
"step": 30195
|
|
},
|
|
{
|
|
"epoch": 9.5388928374003,
|
|
"grad_norm": 0.04484146571483914,
|
|
"learning_rate": 1.275990806938121e-05,
|
|
"loss": 2.3497,
|
|
"step": 30200
|
|
},
|
|
{
|
|
"epoch": 9.54047224196478,
|
|
"grad_norm": 0.039862199131951076,
|
|
"learning_rate": 1.2672246437844703e-05,
|
|
"loss": 2.4191,
|
|
"step": 30205
|
|
},
|
|
{
|
|
"epoch": 9.54205164652926,
|
|
"grad_norm": 0.04359029508207695,
|
|
"learning_rate": 1.2584885046369898e-05,
|
|
"loss": 2.417,
|
|
"step": 30210
|
|
},
|
|
{
|
|
"epoch": 9.543631051093737,
|
|
"grad_norm": 0.04438023129957551,
|
|
"learning_rate": 1.2497823921522767e-05,
|
|
"loss": 2.286,
|
|
"step": 30215
|
|
},
|
|
{
|
|
"epoch": 9.545210455658216,
|
|
"grad_norm": 0.04096978438925655,
|
|
"learning_rate": 1.2411063089778019e-05,
|
|
"loss": 2.3608,
|
|
"step": 30220
|
|
},
|
|
{
|
|
"epoch": 9.546789860222695,
|
|
"grad_norm": 0.05192133645429705,
|
|
"learning_rate": 1.2324602577518996e-05,
|
|
"loss": 2.3916,
|
|
"step": 30225
|
|
},
|
|
{
|
|
"epoch": 9.548369264787175,
|
|
"grad_norm": 0.0514436329176969,
|
|
"learning_rate": 1.2238442411038109e-05,
|
|
"loss": 2.4901,
|
|
"step": 30230
|
|
},
|
|
{
|
|
"epoch": 9.549948669351654,
|
|
"grad_norm": 0.050780672753172856,
|
|
"learning_rate": 1.2152582616535845e-05,
|
|
"loss": 2.3917,
|
|
"step": 30235
|
|
},
|
|
{
|
|
"epoch": 9.551528073916133,
|
|
"grad_norm": 0.05512754865263124,
|
|
"learning_rate": 1.2067023220121653e-05,
|
|
"loss": 2.4749,
|
|
"step": 30240
|
|
},
|
|
{
|
|
"epoch": 9.553107478480612,
|
|
"grad_norm": 0.04586744410439057,
|
|
"learning_rate": 1.198176424781361e-05,
|
|
"loss": 2.3557,
|
|
"step": 30245
|
|
},
|
|
{
|
|
"epoch": 9.554686883045092,
|
|
"grad_norm": 0.04129686388116584,
|
|
"learning_rate": 1.1896805725538417e-05,
|
|
"loss": 2.3429,
|
|
"step": 30250
|
|
},
|
|
{
|
|
"epoch": 9.55626628760957,
|
|
"grad_norm": 0.04261799093173591,
|
|
"learning_rate": 1.1812147679131414e-05,
|
|
"loss": 2.4092,
|
|
"step": 30255
|
|
},
|
|
{
|
|
"epoch": 9.55784569217405,
|
|
"grad_norm": 0.03968786044092743,
|
|
"learning_rate": 1.1727790134336668e-05,
|
|
"loss": 2.3737,
|
|
"step": 30260
|
|
},
|
|
{
|
|
"epoch": 9.55942509673853,
|
|
"grad_norm": 0.03949974284061107,
|
|
"learning_rate": 1.1643733116806554e-05,
|
|
"loss": 2.3981,
|
|
"step": 30265
|
|
},
|
|
{
|
|
"epoch": 9.561004501303008,
|
|
"grad_norm": 0.05787206772249031,
|
|
"learning_rate": 1.1559976652102621e-05,
|
|
"loss": 2.3627,
|
|
"step": 30270
|
|
},
|
|
{
|
|
"epoch": 9.562583905867488,
|
|
"grad_norm": 0.04210911987426291,
|
|
"learning_rate": 1.1476520765694387e-05,
|
|
"loss": 2.3844,
|
|
"step": 30275
|
|
},
|
|
{
|
|
"epoch": 9.564163310431967,
|
|
"grad_norm": 0.03923730653481024,
|
|
"learning_rate": 1.1393365482960217e-05,
|
|
"loss": 2.4123,
|
|
"step": 30280
|
|
},
|
|
{
|
|
"epoch": 9.565742714996446,
|
|
"grad_norm": 0.04447443451239888,
|
|
"learning_rate": 1.1310510829187325e-05,
|
|
"loss": 2.3568,
|
|
"step": 30285
|
|
},
|
|
{
|
|
"epoch": 9.567322119560925,
|
|
"grad_norm": 0.04632371230474168,
|
|
"learning_rate": 1.1227956829571229e-05,
|
|
"loss": 2.4679,
|
|
"step": 30290
|
|
},
|
|
{
|
|
"epoch": 9.568901524125405,
|
|
"grad_norm": 0.04289668157296784,
|
|
"learning_rate": 1.1145703509215954e-05,
|
|
"loss": 2.3484,
|
|
"step": 30295
|
|
},
|
|
{
|
|
"epoch": 9.570480928689884,
|
|
"grad_norm": 0.04683699577472539,
|
|
"learning_rate": 1.1063750893134273e-05,
|
|
"loss": 2.4411,
|
|
"step": 30300
|
|
},
|
|
{
|
|
"epoch": 9.572060333254363,
|
|
"grad_norm": 0.04195129324963458,
|
|
"learning_rate": 1.098209900624747e-05,
|
|
"loss": 2.3848,
|
|
"step": 30305
|
|
},
|
|
{
|
|
"epoch": 9.573639737818842,
|
|
"grad_norm": 0.043437481591911235,
|
|
"learning_rate": 1.0900747873385353e-05,
|
|
"loss": 2.3015,
|
|
"step": 30310
|
|
},
|
|
{
|
|
"epoch": 9.575219142383322,
|
|
"grad_norm": 0.04696243927648497,
|
|
"learning_rate": 1.0819697519286243e-05,
|
|
"loss": 2.4201,
|
|
"step": 30315
|
|
},
|
|
{
|
|
"epoch": 9.576798546947801,
|
|
"grad_norm": 0.054747008086861344,
|
|
"learning_rate": 1.073894796859709e-05,
|
|
"loss": 2.3748,
|
|
"step": 30320
|
|
},
|
|
{
|
|
"epoch": 9.57837795151228,
|
|
"grad_norm": 0.045182127280545464,
|
|
"learning_rate": 1.0658499245873365e-05,
|
|
"loss": 2.2968,
|
|
"step": 30325
|
|
},
|
|
{
|
|
"epoch": 9.57995735607676,
|
|
"grad_norm": 0.03951587981848332,
|
|
"learning_rate": 1.0578351375578943e-05,
|
|
"loss": 2.4699,
|
|
"step": 30330
|
|
},
|
|
{
|
|
"epoch": 9.581536760641239,
|
|
"grad_norm": 0.05834058521159146,
|
|
"learning_rate": 1.0498504382086216e-05,
|
|
"loss": 2.4349,
|
|
"step": 30335
|
|
},
|
|
{
|
|
"epoch": 9.583116165205718,
|
|
"grad_norm": 0.041251491689309985,
|
|
"learning_rate": 1.0418958289676094e-05,
|
|
"loss": 2.4767,
|
|
"step": 30340
|
|
},
|
|
{
|
|
"epoch": 9.584695569770197,
|
|
"grad_norm": 0.07170533104500992,
|
|
"learning_rate": 1.0339713122538341e-05,
|
|
"loss": 2.3626,
|
|
"step": 30345
|
|
},
|
|
{
|
|
"epoch": 9.586274974334676,
|
|
"grad_norm": 0.04260019416686983,
|
|
"learning_rate": 1.0260768904770678e-05,
|
|
"loss": 2.3644,
|
|
"step": 30350
|
|
},
|
|
{
|
|
"epoch": 9.587854378899156,
|
|
"grad_norm": 0.0487208252713447,
|
|
"learning_rate": 1.018212566037946e-05,
|
|
"loss": 2.3559,
|
|
"step": 30355
|
|
},
|
|
{
|
|
"epoch": 9.589433783463635,
|
|
"grad_norm": 0.040201645318378686,
|
|
"learning_rate": 1.0103783413279777e-05,
|
|
"loss": 2.4521,
|
|
"step": 30360
|
|
},
|
|
{
|
|
"epoch": 9.591013188028114,
|
|
"grad_norm": 0.04248018611799858,
|
|
"learning_rate": 1.0025742187294907e-05,
|
|
"loss": 2.362,
|
|
"step": 30365
|
|
},
|
|
{
|
|
"epoch": 9.592592592592592,
|
|
"grad_norm": 0.0421978492946494,
|
|
"learning_rate": 9.948002006156753e-06,
|
|
"loss": 2.3562,
|
|
"step": 30370
|
|
},
|
|
{
|
|
"epoch": 9.594171997157073,
|
|
"grad_norm": 0.04136724537884408,
|
|
"learning_rate": 9.87056289350552e-06,
|
|
"loss": 2.4004,
|
|
"step": 30375
|
|
},
|
|
{
|
|
"epoch": 9.59575140172155,
|
|
"grad_norm": 0.03960262386614813,
|
|
"learning_rate": 9.793424872890033e-06,
|
|
"loss": 2.4496,
|
|
"step": 30380
|
|
},
|
|
{
|
|
"epoch": 9.59733080628603,
|
|
"grad_norm": 0.04137355502171963,
|
|
"learning_rate": 9.716587967767532e-06,
|
|
"loss": 2.3325,
|
|
"step": 30385
|
|
},
|
|
{
|
|
"epoch": 9.598910210850509,
|
|
"grad_norm": 0.04095316967018649,
|
|
"learning_rate": 9.640052201503436e-06,
|
|
"loss": 2.3791,
|
|
"step": 30390
|
|
},
|
|
{
|
|
"epoch": 9.600489615414988,
|
|
"grad_norm": 0.039896460435274506,
|
|
"learning_rate": 9.563817597371793e-06,
|
|
"loss": 2.4157,
|
|
"step": 30395
|
|
},
|
|
{
|
|
"epoch": 9.602069019979467,
|
|
"grad_norm": 0.04526125496782704,
|
|
"learning_rate": 9.487884178555285e-06,
|
|
"loss": 2.311,
|
|
"step": 30400
|
|
},
|
|
{
|
|
"epoch": 9.603648424543946,
|
|
"grad_norm": 0.056007224336220955,
|
|
"learning_rate": 9.412251968144548e-06,
|
|
"loss": 2.3221,
|
|
"step": 30405
|
|
},
|
|
{
|
|
"epoch": 9.605227829108426,
|
|
"grad_norm": 0.04247609138890829,
|
|
"learning_rate": 9.336920989139075e-06,
|
|
"loss": 2.385,
|
|
"step": 30410
|
|
},
|
|
{
|
|
"epoch": 9.606807233672905,
|
|
"grad_norm": 0.04838922797912883,
|
|
"learning_rate": 9.261891264446321e-06,
|
|
"loss": 2.3418,
|
|
"step": 30415
|
|
},
|
|
{
|
|
"epoch": 9.608386638237384,
|
|
"grad_norm": 0.04463021386680935,
|
|
"learning_rate": 9.187162816882478e-06,
|
|
"loss": 2.3256,
|
|
"step": 30420
|
|
},
|
|
{
|
|
"epoch": 9.609966042801863,
|
|
"grad_norm": 0.04252370965679977,
|
|
"learning_rate": 9.112735669171923e-06,
|
|
"loss": 2.3774,
|
|
"step": 30425
|
|
},
|
|
{
|
|
"epoch": 9.611545447366343,
|
|
"grad_norm": 0.041372974750041105,
|
|
"learning_rate": 9.038609843947331e-06,
|
|
"loss": 2.3604,
|
|
"step": 30430
|
|
},
|
|
{
|
|
"epoch": 9.613124851930822,
|
|
"grad_norm": 0.03918200730070356,
|
|
"learning_rate": 8.964785363750227e-06,
|
|
"loss": 2.3614,
|
|
"step": 30435
|
|
},
|
|
{
|
|
"epoch": 9.614704256495301,
|
|
"grad_norm": 0.04867686562063659,
|
|
"learning_rate": 8.891262251029986e-06,
|
|
"loss": 2.3443,
|
|
"step": 30440
|
|
},
|
|
{
|
|
"epoch": 9.61628366105978,
|
|
"grad_norm": 0.04765697256733076,
|
|
"learning_rate": 8.8180405281445e-06,
|
|
"loss": 2.4513,
|
|
"step": 30445
|
|
},
|
|
{
|
|
"epoch": 9.61786306562426,
|
|
"grad_norm": 0.04711140899166385,
|
|
"learning_rate": 8.745120217360069e-06,
|
|
"loss": 2.302,
|
|
"step": 30450
|
|
},
|
|
{
|
|
"epoch": 9.619442470188739,
|
|
"grad_norm": 0.06165867220520249,
|
|
"learning_rate": 8.67250134085129e-06,
|
|
"loss": 2.3276,
|
|
"step": 30455
|
|
},
|
|
{
|
|
"epoch": 9.621021874753218,
|
|
"grad_norm": 0.04080824588056334,
|
|
"learning_rate": 8.600183920701054e-06,
|
|
"loss": 2.4288,
|
|
"step": 30460
|
|
},
|
|
{
|
|
"epoch": 9.622601279317697,
|
|
"grad_norm": 0.0589845625137612,
|
|
"learning_rate": 8.528167978900658e-06,
|
|
"loss": 2.3658,
|
|
"step": 30465
|
|
},
|
|
{
|
|
"epoch": 9.624180683882177,
|
|
"grad_norm": 0.040971728425958566,
|
|
"learning_rate": 8.456453537349695e-06,
|
|
"loss": 2.4291,
|
|
"step": 30470
|
|
},
|
|
{
|
|
"epoch": 9.625760088446656,
|
|
"grad_norm": 0.04055824535769639,
|
|
"learning_rate": 8.385040617856165e-06,
|
|
"loss": 2.4538,
|
|
"step": 30475
|
|
},
|
|
{
|
|
"epoch": 9.627339493011135,
|
|
"grad_norm": 0.03890212954797521,
|
|
"learning_rate": 8.313929242136031e-06,
|
|
"loss": 2.3181,
|
|
"step": 30480
|
|
},
|
|
{
|
|
"epoch": 9.628918897575614,
|
|
"grad_norm": 0.04510643757841533,
|
|
"learning_rate": 8.243119431813994e-06,
|
|
"loss": 2.3916,
|
|
"step": 30485
|
|
},
|
|
{
|
|
"epoch": 9.630498302140094,
|
|
"grad_norm": 0.0605976137075534,
|
|
"learning_rate": 8.172611208422832e-06,
|
|
"loss": 2.3935,
|
|
"step": 30490
|
|
},
|
|
{
|
|
"epoch": 9.632077706704573,
|
|
"grad_norm": 0.04555784366649664,
|
|
"learning_rate": 8.102404593403612e-06,
|
|
"loss": 2.3827,
|
|
"step": 30495
|
|
},
|
|
{
|
|
"epoch": 9.633657111269052,
|
|
"grad_norm": 0.05622759311961614,
|
|
"learning_rate": 8.032499608105814e-06,
|
|
"loss": 2.3199,
|
|
"step": 30500
|
|
},
|
|
{
|
|
"epoch": 9.635236515833531,
|
|
"grad_norm": 0.04127894921791548,
|
|
"learning_rate": 7.962896273787102e-06,
|
|
"loss": 2.3836,
|
|
"step": 30505
|
|
},
|
|
{
|
|
"epoch": 9.63681592039801,
|
|
"grad_norm": 0.04587006613023309,
|
|
"learning_rate": 7.893594611613208e-06,
|
|
"loss": 2.3174,
|
|
"step": 30510
|
|
},
|
|
{
|
|
"epoch": 9.63839532496249,
|
|
"grad_norm": 0.04783809377117116,
|
|
"learning_rate": 7.8245946426585e-06,
|
|
"loss": 2.3802,
|
|
"step": 30515
|
|
},
|
|
{
|
|
"epoch": 9.639974729526969,
|
|
"grad_norm": 0.05573266032683132,
|
|
"learning_rate": 7.755896387905303e-06,
|
|
"loss": 2.3265,
|
|
"step": 30520
|
|
},
|
|
{
|
|
"epoch": 9.641554134091448,
|
|
"grad_norm": 0.038286832964746505,
|
|
"learning_rate": 7.687499868244463e-06,
|
|
"loss": 2.4079,
|
|
"step": 30525
|
|
},
|
|
{
|
|
"epoch": 9.643133538655928,
|
|
"grad_norm": 0.04762025727828804,
|
|
"learning_rate": 7.619405104474786e-06,
|
|
"loss": 2.337,
|
|
"step": 30530
|
|
},
|
|
{
|
|
"epoch": 9.644712943220405,
|
|
"grad_norm": 0.0470953542275122,
|
|
"learning_rate": 7.5516121173035966e-06,
|
|
"loss": 2.427,
|
|
"step": 30535
|
|
},
|
|
{
|
|
"epoch": 9.646292347784884,
|
|
"grad_norm": 0.044777400364591644,
|
|
"learning_rate": 7.484120927346183e-06,
|
|
"loss": 2.3778,
|
|
"step": 30540
|
|
},
|
|
{
|
|
"epoch": 9.647871752349364,
|
|
"grad_norm": 0.04723592826455098,
|
|
"learning_rate": 7.416931555126239e-06,
|
|
"loss": 2.4023,
|
|
"step": 30545
|
|
},
|
|
{
|
|
"epoch": 9.649451156913843,
|
|
"grad_norm": 0.045076040442776764,
|
|
"learning_rate": 7.350044021075641e-06,
|
|
"loss": 2.3968,
|
|
"step": 30550
|
|
},
|
|
{
|
|
"epoch": 9.651030561478322,
|
|
"grad_norm": 0.04036003268200752,
|
|
"learning_rate": 7.283458345534455e-06,
|
|
"loss": 2.3877,
|
|
"step": 30555
|
|
},
|
|
{
|
|
"epoch": 9.652609966042801,
|
|
"grad_norm": 0.04560351252702661,
|
|
"learning_rate": 7.217174548750927e-06,
|
|
"loss": 2.4267,
|
|
"step": 30560
|
|
},
|
|
{
|
|
"epoch": 9.65418937060728,
|
|
"grad_norm": 0.04756462549138177,
|
|
"learning_rate": 7.151192650881488e-06,
|
|
"loss": 2.4844,
|
|
"step": 30565
|
|
},
|
|
{
|
|
"epoch": 9.65576877517176,
|
|
"grad_norm": 0.046416280920711504,
|
|
"learning_rate": 7.085512671990979e-06,
|
|
"loss": 2.4185,
|
|
"step": 30570
|
|
},
|
|
{
|
|
"epoch": 9.657348179736239,
|
|
"grad_norm": 0.04094277725135964,
|
|
"learning_rate": 7.0201346320520885e-06,
|
|
"loss": 2.3085,
|
|
"step": 30575
|
|
},
|
|
{
|
|
"epoch": 9.658927584300718,
|
|
"grad_norm": 0.0421059556137951,
|
|
"learning_rate": 6.955058550945914e-06,
|
|
"loss": 2.4972,
|
|
"step": 30580
|
|
},
|
|
{
|
|
"epoch": 9.660506988865198,
|
|
"grad_norm": 0.051743655208623236,
|
|
"learning_rate": 6.8902844484617365e-06,
|
|
"loss": 2.3755,
|
|
"step": 30585
|
|
},
|
|
{
|
|
"epoch": 9.662086393429677,
|
|
"grad_norm": 0.05207865617791107,
|
|
"learning_rate": 6.825812344296911e-06,
|
|
"loss": 2.3845,
|
|
"step": 30590
|
|
},
|
|
{
|
|
"epoch": 9.663665797994156,
|
|
"grad_norm": 0.04238329717910449,
|
|
"learning_rate": 6.761642258056977e-06,
|
|
"loss": 2.4376,
|
|
"step": 30595
|
|
},
|
|
{
|
|
"epoch": 9.665245202558635,
|
|
"grad_norm": 0.03890907294829127,
|
|
"learning_rate": 6.697774209255769e-06,
|
|
"loss": 2.5243,
|
|
"step": 30600
|
|
},
|
|
{
|
|
"epoch": 9.666824607123115,
|
|
"grad_norm": 0.040397506187010615,
|
|
"learning_rate": 6.634208217314863e-06,
|
|
"loss": 2.4171,
|
|
"step": 30605
|
|
},
|
|
{
|
|
"epoch": 9.668404011687594,
|
|
"grad_norm": 0.05460175974014248,
|
|
"learning_rate": 6.570944301564574e-06,
|
|
"loss": 2.3557,
|
|
"step": 30610
|
|
},
|
|
{
|
|
"epoch": 9.669983416252073,
|
|
"grad_norm": 0.04497588646677044,
|
|
"learning_rate": 6.5079824812428465e-06,
|
|
"loss": 2.3133,
|
|
"step": 30615
|
|
},
|
|
{
|
|
"epoch": 9.671562820816552,
|
|
"grad_norm": 0.04670290589313647,
|
|
"learning_rate": 6.445322775496032e-06,
|
|
"loss": 2.3479,
|
|
"step": 30620
|
|
},
|
|
{
|
|
"epoch": 9.673142225381032,
|
|
"grad_norm": 0.04177047213439708,
|
|
"learning_rate": 6.382965203378666e-06,
|
|
"loss": 2.3274,
|
|
"step": 30625
|
|
},
|
|
{
|
|
"epoch": 9.67472162994551,
|
|
"grad_norm": 0.052684775403997894,
|
|
"learning_rate": 6.3209097838531345e-06,
|
|
"loss": 2.4518,
|
|
"step": 30630
|
|
},
|
|
{
|
|
"epoch": 9.67630103450999,
|
|
"grad_norm": 0.04117237377462446,
|
|
"learning_rate": 6.259156535790011e-06,
|
|
"loss": 2.379,
|
|
"step": 30635
|
|
},
|
|
{
|
|
"epoch": 9.67788043907447,
|
|
"grad_norm": 0.04181974896568244,
|
|
"learning_rate": 6.197705477968385e-06,
|
|
"loss": 2.4248,
|
|
"step": 30640
|
|
},
|
|
{
|
|
"epoch": 9.679459843638949,
|
|
"grad_norm": 0.03963486459269917,
|
|
"learning_rate": 6.136556629074863e-06,
|
|
"loss": 2.3532,
|
|
"step": 30645
|
|
},
|
|
{
|
|
"epoch": 9.681039248203428,
|
|
"grad_norm": 0.03986513963167819,
|
|
"learning_rate": 6.075710007704571e-06,
|
|
"loss": 2.3489,
|
|
"step": 30650
|
|
},
|
|
{
|
|
"epoch": 9.682618652767907,
|
|
"grad_norm": 0.03910156824584804,
|
|
"learning_rate": 6.0151656323604865e-06,
|
|
"loss": 2.3829,
|
|
"step": 30655
|
|
},
|
|
{
|
|
"epoch": 9.684198057332386,
|
|
"grad_norm": 0.042672288156665183,
|
|
"learning_rate": 5.95492352145377e-06,
|
|
"loss": 2.2549,
|
|
"step": 30660
|
|
},
|
|
{
|
|
"epoch": 9.685777461896866,
|
|
"grad_norm": 0.047864101614561595,
|
|
"learning_rate": 5.894983693303657e-06,
|
|
"loss": 2.3353,
|
|
"step": 30665
|
|
},
|
|
{
|
|
"epoch": 9.687356866461345,
|
|
"grad_norm": 0.04677235295553442,
|
|
"learning_rate": 5.835346166137456e-06,
|
|
"loss": 2.5169,
|
|
"step": 30670
|
|
},
|
|
{
|
|
"epoch": 9.688936271025824,
|
|
"grad_norm": 0.0393518213132439,
|
|
"learning_rate": 5.776010958090661e-06,
|
|
"loss": 2.3498,
|
|
"step": 30675
|
|
},
|
|
{
|
|
"epoch": 9.690515675590303,
|
|
"grad_norm": 0.04166323858330401,
|
|
"learning_rate": 5.7169780872066145e-06,
|
|
"loss": 2.3622,
|
|
"step": 30680
|
|
},
|
|
{
|
|
"epoch": 9.692095080154782,
|
|
"grad_norm": 0.04751055801597481,
|
|
"learning_rate": 5.658247571436958e-06,
|
|
"loss": 2.4624,
|
|
"step": 30685
|
|
},
|
|
{
|
|
"epoch": 9.69367448471926,
|
|
"grad_norm": 0.048265634956542285,
|
|
"learning_rate": 5.599819428641073e-06,
|
|
"loss": 2.4059,
|
|
"step": 30690
|
|
},
|
|
{
|
|
"epoch": 9.69525388928374,
|
|
"grad_norm": 0.04300194900217398,
|
|
"learning_rate": 5.541693676586857e-06,
|
|
"loss": 2.4031,
|
|
"step": 30695
|
|
},
|
|
{
|
|
"epoch": 9.696833293848218,
|
|
"grad_norm": 0.05287579478685596,
|
|
"learning_rate": 5.483870332949614e-06,
|
|
"loss": 2.3722,
|
|
"step": 30700
|
|
},
|
|
{
|
|
"epoch": 9.698412698412698,
|
|
"grad_norm": 0.04379440634829869,
|
|
"learning_rate": 5.426349415313503e-06,
|
|
"loss": 2.3225,
|
|
"step": 30705
|
|
},
|
|
{
|
|
"epoch": 9.699992102977177,
|
|
"grad_norm": 0.04480391987726241,
|
|
"learning_rate": 5.369130941169864e-06,
|
|
"loss": 2.498,
|
|
"step": 30710
|
|
},
|
|
{
|
|
"epoch": 9.701571507541656,
|
|
"grad_norm": 0.055956011611355945,
|
|
"learning_rate": 5.312214927918668e-06,
|
|
"loss": 2.4603,
|
|
"step": 30715
|
|
},
|
|
{
|
|
"epoch": 9.703150912106135,
|
|
"grad_norm": 0.05239746357954682,
|
|
"learning_rate": 5.255601392867626e-06,
|
|
"loss": 2.3886,
|
|
"step": 30720
|
|
},
|
|
{
|
|
"epoch": 9.704730316670615,
|
|
"grad_norm": 0.04521987865346676,
|
|
"learning_rate": 5.199290353232633e-06,
|
|
"loss": 2.4192,
|
|
"step": 30725
|
|
},
|
|
{
|
|
"epoch": 9.706309721235094,
|
|
"grad_norm": 0.0385121905506996,
|
|
"learning_rate": 5.143281826137547e-06,
|
|
"loss": 2.4245,
|
|
"step": 30730
|
|
},
|
|
{
|
|
"epoch": 9.707889125799573,
|
|
"grad_norm": 0.05024761315841477,
|
|
"learning_rate": 5.087575828614077e-06,
|
|
"loss": 2.3304,
|
|
"step": 30735
|
|
},
|
|
{
|
|
"epoch": 9.709468530364052,
|
|
"grad_norm": 0.04971828992090415,
|
|
"learning_rate": 5.0321723776022285e-06,
|
|
"loss": 2.4155,
|
|
"step": 30740
|
|
},
|
|
{
|
|
"epoch": 9.711047934928532,
|
|
"grad_norm": 0.05178697599173527,
|
|
"learning_rate": 4.977071489949636e-06,
|
|
"loss": 2.3765,
|
|
"step": 30745
|
|
},
|
|
{
|
|
"epoch": 9.712627339493011,
|
|
"grad_norm": 0.04105378489204273,
|
|
"learning_rate": 4.922273182412229e-06,
|
|
"loss": 2.3604,
|
|
"step": 30750
|
|
},
|
|
{
|
|
"epoch": 9.71420674405749,
|
|
"grad_norm": 0.04731059855443261,
|
|
"learning_rate": 4.8677774716539005e-06,
|
|
"loss": 2.3253,
|
|
"step": 30755
|
|
},
|
|
{
|
|
"epoch": 9.71578614862197,
|
|
"grad_norm": 0.044619941817862,
|
|
"learning_rate": 4.813584374246283e-06,
|
|
"loss": 2.3247,
|
|
"step": 30760
|
|
},
|
|
{
|
|
"epoch": 9.717365553186449,
|
|
"grad_norm": 0.049710979675962795,
|
|
"learning_rate": 4.759693906669193e-06,
|
|
"loss": 2.4843,
|
|
"step": 30765
|
|
},
|
|
{
|
|
"epoch": 9.718944957750928,
|
|
"grad_norm": 0.04589773917863417,
|
|
"learning_rate": 4.7061060853105245e-06,
|
|
"loss": 2.4386,
|
|
"step": 30770
|
|
},
|
|
{
|
|
"epoch": 9.720524362315407,
|
|
"grad_norm": 0.04962276990711766,
|
|
"learning_rate": 4.652820926465795e-06,
|
|
"loss": 2.4673,
|
|
"step": 30775
|
|
},
|
|
{
|
|
"epoch": 9.722103766879886,
|
|
"grad_norm": 0.06204472796634575,
|
|
"learning_rate": 4.599838446338933e-06,
|
|
"loss": 2.4417,
|
|
"step": 30780
|
|
},
|
|
{
|
|
"epoch": 9.723683171444366,
|
|
"grad_norm": 0.04288187894005779,
|
|
"learning_rate": 4.547158661041273e-06,
|
|
"loss": 2.3675,
|
|
"step": 30785
|
|
},
|
|
{
|
|
"epoch": 9.725262576008845,
|
|
"grad_norm": 0.04264232488693888,
|
|
"learning_rate": 4.494781586592556e-06,
|
|
"loss": 2.3589,
|
|
"step": 30790
|
|
},
|
|
{
|
|
"epoch": 9.726841980573324,
|
|
"grad_norm": 0.04045280768756304,
|
|
"learning_rate": 4.442707238920262e-06,
|
|
"loss": 2.4392,
|
|
"step": 30795
|
|
},
|
|
{
|
|
"epoch": 9.728421385137803,
|
|
"grad_norm": 0.044577534247766414,
|
|
"learning_rate": 4.390935633859949e-06,
|
|
"loss": 2.3925,
|
|
"step": 30800
|
|
},
|
|
{
|
|
"epoch": 9.730000789702283,
|
|
"grad_norm": 0.0628512056528852,
|
|
"learning_rate": 4.339466787155022e-06,
|
|
"loss": 2.3964,
|
|
"step": 30805
|
|
},
|
|
{
|
|
"epoch": 9.731580194266762,
|
|
"grad_norm": 0.04678420169862776,
|
|
"learning_rate": 4.288300714456739e-06,
|
|
"loss": 2.4355,
|
|
"step": 30810
|
|
},
|
|
{
|
|
"epoch": 9.733159598831241,
|
|
"grad_norm": 0.042966489138362214,
|
|
"learning_rate": 4.237437431324432e-06,
|
|
"loss": 2.3571,
|
|
"step": 30815
|
|
},
|
|
{
|
|
"epoch": 9.73473900339572,
|
|
"grad_norm": 0.041981582567709584,
|
|
"learning_rate": 4.186876953225282e-06,
|
|
"loss": 2.3353,
|
|
"step": 30820
|
|
},
|
|
{
|
|
"epoch": 9.7363184079602,
|
|
"grad_norm": 0.04462294311148982,
|
|
"learning_rate": 4.1366192955345495e-06,
|
|
"loss": 2.4082,
|
|
"step": 30825
|
|
},
|
|
{
|
|
"epoch": 9.737897812524679,
|
|
"grad_norm": 0.05296942511870504,
|
|
"learning_rate": 4.086664473535007e-06,
|
|
"loss": 2.3646,
|
|
"step": 30830
|
|
},
|
|
{
|
|
"epoch": 9.739477217089158,
|
|
"grad_norm": 0.061021263036657995,
|
|
"learning_rate": 4.037012502417836e-06,
|
|
"loss": 2.365,
|
|
"step": 30835
|
|
},
|
|
{
|
|
"epoch": 9.741056621653637,
|
|
"grad_norm": 0.042408455896456566,
|
|
"learning_rate": 3.987663397281627e-06,
|
|
"loss": 2.3529,
|
|
"step": 30840
|
|
},
|
|
{
|
|
"epoch": 9.742636026218115,
|
|
"grad_norm": 0.05110007897643741,
|
|
"learning_rate": 3.938617173133485e-06,
|
|
"loss": 2.4308,
|
|
"step": 30845
|
|
},
|
|
{
|
|
"epoch": 9.744215430782596,
|
|
"grad_norm": 0.042637042104907055,
|
|
"learning_rate": 3.8898738448877035e-06,
|
|
"loss": 2.248,
|
|
"step": 30850
|
|
},
|
|
{
|
|
"epoch": 9.745794835347073,
|
|
"grad_norm": 0.05276895688387875,
|
|
"learning_rate": 3.841433427366981e-06,
|
|
"loss": 2.3986,
|
|
"step": 30855
|
|
},
|
|
{
|
|
"epoch": 9.747374239911553,
|
|
"grad_norm": 0.04629343277981927,
|
|
"learning_rate": 3.793295935301755e-06,
|
|
"loss": 2.3809,
|
|
"step": 30860
|
|
},
|
|
{
|
|
"epoch": 9.748953644476032,
|
|
"grad_norm": 0.05131672840523233,
|
|
"learning_rate": 3.7454613833302067e-06,
|
|
"loss": 2.3555,
|
|
"step": 30865
|
|
},
|
|
{
|
|
"epoch": 9.750533049040511,
|
|
"grad_norm": 0.04295634906659519,
|
|
"learning_rate": 3.6979297859986994e-06,
|
|
"loss": 2.3327,
|
|
"step": 30870
|
|
},
|
|
{
|
|
"epoch": 9.75211245360499,
|
|
"grad_norm": 0.051091952606107854,
|
|
"learning_rate": 3.650701157761227e-06,
|
|
"loss": 2.4104,
|
|
"step": 30875
|
|
},
|
|
{
|
|
"epoch": 9.75369185816947,
|
|
"grad_norm": 0.04170818109947054,
|
|
"learning_rate": 3.6037755129795235e-06,
|
|
"loss": 2.308,
|
|
"step": 30880
|
|
},
|
|
{
|
|
"epoch": 9.755271262733949,
|
|
"grad_norm": 0.03961119134473402,
|
|
"learning_rate": 3.5571528659236187e-06,
|
|
"loss": 2.3481,
|
|
"step": 30885
|
|
},
|
|
{
|
|
"epoch": 9.756850667298428,
|
|
"grad_norm": 0.06305211933680803,
|
|
"learning_rate": 3.51083323077106e-06,
|
|
"loss": 2.3825,
|
|
"step": 30890
|
|
},
|
|
{
|
|
"epoch": 9.758430071862907,
|
|
"grad_norm": 0.04611465862398895,
|
|
"learning_rate": 3.4648166216074695e-06,
|
|
"loss": 2.4526,
|
|
"step": 30895
|
|
},
|
|
{
|
|
"epoch": 9.760009476427387,
|
|
"grad_norm": 0.0477775299579714,
|
|
"learning_rate": 3.419103052425987e-06,
|
|
"loss": 2.3879,
|
|
"step": 30900
|
|
},
|
|
{
|
|
"epoch": 9.761588880991866,
|
|
"grad_norm": 0.047779903939825376,
|
|
"learning_rate": 3.373692537127937e-06,
|
|
"loss": 2.5331,
|
|
"step": 30905
|
|
},
|
|
{
|
|
"epoch": 9.763168285556345,
|
|
"grad_norm": 0.04679318402033712,
|
|
"learning_rate": 3.3285850895224955e-06,
|
|
"loss": 2.3771,
|
|
"step": 30910
|
|
},
|
|
{
|
|
"epoch": 9.764747690120824,
|
|
"grad_norm": 0.045154703815724034,
|
|
"learning_rate": 3.2837807233263574e-06,
|
|
"loss": 2.3445,
|
|
"step": 30915
|
|
},
|
|
{
|
|
"epoch": 9.766327094685304,
|
|
"grad_norm": 0.04264748461729115,
|
|
"learning_rate": 3.2392794521642897e-06,
|
|
"loss": 2.382,
|
|
"step": 30920
|
|
},
|
|
{
|
|
"epoch": 9.767906499249783,
|
|
"grad_norm": 0.04750089500426993,
|
|
"learning_rate": 3.195081289568802e-06,
|
|
"loss": 2.2917,
|
|
"step": 30925
|
|
},
|
|
{
|
|
"epoch": 9.769485903814262,
|
|
"grad_norm": 0.0426232785587374,
|
|
"learning_rate": 3.1511862489803645e-06,
|
|
"loss": 2.3385,
|
|
"step": 30930
|
|
},
|
|
{
|
|
"epoch": 9.771065308378741,
|
|
"grad_norm": 0.04052656479849263,
|
|
"learning_rate": 3.1075943437471885e-06,
|
|
"loss": 2.3917,
|
|
"step": 30935
|
|
},
|
|
{
|
|
"epoch": 9.77264471294322,
|
|
"grad_norm": 0.039327189772560396,
|
|
"learning_rate": 3.0643055871252267e-06,
|
|
"loss": 2.4122,
|
|
"step": 30940
|
|
},
|
|
{
|
|
"epoch": 9.7742241175077,
|
|
"grad_norm": 0.04677095681220249,
|
|
"learning_rate": 3.021319992278282e-06,
|
|
"loss": 2.3975,
|
|
"step": 30945
|
|
},
|
|
{
|
|
"epoch": 9.775803522072179,
|
|
"grad_norm": 0.05579730291696965,
|
|
"learning_rate": 2.97863757227812e-06,
|
|
"loss": 2.3975,
|
|
"step": 30950
|
|
},
|
|
{
|
|
"epoch": 9.777382926636658,
|
|
"grad_norm": 0.04784428512811323,
|
|
"learning_rate": 2.9362583401041366e-06,
|
|
"loss": 2.3602,
|
|
"step": 30955
|
|
},
|
|
{
|
|
"epoch": 9.778962331201138,
|
|
"grad_norm": 0.0411858140902529,
|
|
"learning_rate": 2.894182308643467e-06,
|
|
"loss": 2.3335,
|
|
"step": 30960
|
|
},
|
|
{
|
|
"epoch": 9.780541735765617,
|
|
"grad_norm": 0.050145954867967774,
|
|
"learning_rate": 2.852409490691432e-06,
|
|
"loss": 2.3699,
|
|
"step": 30965
|
|
},
|
|
{
|
|
"epoch": 9.782121140330096,
|
|
"grad_norm": 0.04583264587347306,
|
|
"learning_rate": 2.8109398989505376e-06,
|
|
"loss": 2.4688,
|
|
"step": 30970
|
|
},
|
|
{
|
|
"epoch": 9.783700544894575,
|
|
"grad_norm": 0.04245562266316667,
|
|
"learning_rate": 2.7697735460316952e-06,
|
|
"loss": 2.4882,
|
|
"step": 30975
|
|
},
|
|
{
|
|
"epoch": 9.785279949459055,
|
|
"grad_norm": 0.04696733045462945,
|
|
"learning_rate": 2.7289104444532253e-06,
|
|
"loss": 2.3811,
|
|
"step": 30980
|
|
},
|
|
{
|
|
"epoch": 9.786859354023534,
|
|
"grad_norm": 0.0464987441356757,
|
|
"learning_rate": 2.68835060664141e-06,
|
|
"loss": 2.4104,
|
|
"step": 30985
|
|
},
|
|
{
|
|
"epoch": 9.788438758588013,
|
|
"grad_norm": 0.045702405023384705,
|
|
"learning_rate": 2.6480940449301604e-06,
|
|
"loss": 2.3994,
|
|
"step": 30990
|
|
},
|
|
{
|
|
"epoch": 9.790018163152492,
|
|
"grad_norm": 0.04612368902511887,
|
|
"learning_rate": 2.6081407715611295e-06,
|
|
"loss": 2.3026,
|
|
"step": 30995
|
|
},
|
|
{
|
|
"epoch": 9.79159756771697,
|
|
"grad_norm": 0.05515527290172604,
|
|
"learning_rate": 2.568490798684153e-06,
|
|
"loss": 2.3169,
|
|
"step": 31000
|
|
},
|
|
{
|
|
"epoch": 9.79317697228145,
|
|
"grad_norm": 0.053000123638092964,
|
|
"learning_rate": 2.5291441383562543e-06,
|
|
"loss": 2.4969,
|
|
"step": 31005
|
|
},
|
|
{
|
|
"epoch": 9.794756376845928,
|
|
"grad_norm": 0.040632902763189684,
|
|
"learning_rate": 2.4901008025426388e-06,
|
|
"loss": 2.3309,
|
|
"step": 31010
|
|
},
|
|
{
|
|
"epoch": 9.796335781410408,
|
|
"grad_norm": 0.043100027052276765,
|
|
"learning_rate": 2.451360803116032e-06,
|
|
"loss": 2.3358,
|
|
"step": 31015
|
|
},
|
|
{
|
|
"epoch": 9.797915185974887,
|
|
"grad_norm": 0.04365306395795135,
|
|
"learning_rate": 2.412924151857121e-06,
|
|
"loss": 2.3802,
|
|
"step": 31020
|
|
},
|
|
{
|
|
"epoch": 9.799494590539366,
|
|
"grad_norm": 0.048728724479073275,
|
|
"learning_rate": 2.3747908604542235e-06,
|
|
"loss": 2.4144,
|
|
"step": 31025
|
|
},
|
|
{
|
|
"epoch": 9.801073995103845,
|
|
"grad_norm": 0.04094146611112561,
|
|
"learning_rate": 2.3369609405035073e-06,
|
|
"loss": 2.4692,
|
|
"step": 31030
|
|
},
|
|
{
|
|
"epoch": 9.802653399668324,
|
|
"grad_norm": 0.04158612571434427,
|
|
"learning_rate": 2.29943440350866e-06,
|
|
"loss": 2.4477,
|
|
"step": 31035
|
|
},
|
|
{
|
|
"epoch": 9.804232804232804,
|
|
"grad_norm": 0.05301400544320318,
|
|
"learning_rate": 2.2622112608813305e-06,
|
|
"loss": 2.3161,
|
|
"step": 31040
|
|
},
|
|
{
|
|
"epoch": 9.805812208797283,
|
|
"grad_norm": 0.04697171401422431,
|
|
"learning_rate": 2.2252915239407978e-06,
|
|
"loss": 2.3835,
|
|
"step": 31045
|
|
},
|
|
{
|
|
"epoch": 9.807391613361762,
|
|
"grad_norm": 0.0430581547166588,
|
|
"learning_rate": 2.1886752039141923e-06,
|
|
"loss": 2.375,
|
|
"step": 31050
|
|
},
|
|
{
|
|
"epoch": 9.808971017926241,
|
|
"grad_norm": 0.04091095510825859,
|
|
"learning_rate": 2.1523623119361625e-06,
|
|
"loss": 2.3768,
|
|
"step": 31055
|
|
},
|
|
{
|
|
"epoch": 9.81055042249072,
|
|
"grad_norm": 0.04723473573002013,
|
|
"learning_rate": 2.1163528590494307e-06,
|
|
"loss": 2.3879,
|
|
"step": 31060
|
|
},
|
|
{
|
|
"epoch": 9.8121298270552,
|
|
"grad_norm": 0.04752032893714477,
|
|
"learning_rate": 2.080646856204127e-06,
|
|
"loss": 2.3903,
|
|
"step": 31065
|
|
},
|
|
{
|
|
"epoch": 9.81370923161968,
|
|
"grad_norm": 0.040396078273481684,
|
|
"learning_rate": 2.0452443142582323e-06,
|
|
"loss": 2.4456,
|
|
"step": 31070
|
|
},
|
|
{
|
|
"epoch": 9.815288636184158,
|
|
"grad_norm": 0.04194883118868972,
|
|
"learning_rate": 2.010145243977357e-06,
|
|
"loss": 2.4235,
|
|
"step": 31075
|
|
},
|
|
{
|
|
"epoch": 9.816868040748638,
|
|
"grad_norm": 0.03963730955798123,
|
|
"learning_rate": 1.975349656035075e-06,
|
|
"loss": 2.3923,
|
|
"step": 31080
|
|
},
|
|
{
|
|
"epoch": 9.818447445313117,
|
|
"grad_norm": 0.03779142611215144,
|
|
"learning_rate": 1.940857561012366e-06,
|
|
"loss": 2.3656,
|
|
"step": 31085
|
|
},
|
|
{
|
|
"epoch": 9.820026849877596,
|
|
"grad_norm": 0.05077742388623736,
|
|
"learning_rate": 1.9066689693981731e-06,
|
|
"loss": 2.3962,
|
|
"step": 31090
|
|
},
|
|
{
|
|
"epoch": 9.821606254442075,
|
|
"grad_norm": 0.04634979034819079,
|
|
"learning_rate": 1.8727838915888474e-06,
|
|
"loss": 2.3981,
|
|
"step": 31095
|
|
},
|
|
{
|
|
"epoch": 9.823185659006555,
|
|
"grad_norm": 0.0510649134998081,
|
|
"learning_rate": 1.8392023378888122e-06,
|
|
"loss": 2.2927,
|
|
"step": 31100
|
|
},
|
|
{
|
|
"epoch": 9.824765063571034,
|
|
"grad_norm": 0.055070622203620925,
|
|
"learning_rate": 1.8059243185097885e-06,
|
|
"loss": 2.4004,
|
|
"step": 31105
|
|
},
|
|
{
|
|
"epoch": 9.826344468135513,
|
|
"grad_norm": 0.04031151443518212,
|
|
"learning_rate": 1.7729498435716806e-06,
|
|
"loss": 2.4404,
|
|
"step": 31110
|
|
},
|
|
{
|
|
"epoch": 9.827923872699992,
|
|
"grad_norm": 0.057769974945689806,
|
|
"learning_rate": 1.7402789231015791e-06,
|
|
"loss": 2.4039,
|
|
"step": 31115
|
|
},
|
|
{
|
|
"epoch": 9.829503277264472,
|
|
"grad_norm": 0.044190660673983126,
|
|
"learning_rate": 1.7079115670346478e-06,
|
|
"loss": 2.373,
|
|
"step": 31120
|
|
},
|
|
{
|
|
"epoch": 9.831082681828951,
|
|
"grad_norm": 0.04084854306165555,
|
|
"learning_rate": 1.675847785213569e-06,
|
|
"loss": 2.3918,
|
|
"step": 31125
|
|
},
|
|
{
|
|
"epoch": 9.83266208639343,
|
|
"grad_norm": 0.04859205539272279,
|
|
"learning_rate": 1.6440875873886541e-06,
|
|
"loss": 2.4677,
|
|
"step": 31130
|
|
},
|
|
{
|
|
"epoch": 9.83424149095791,
|
|
"grad_norm": 0.04216777782005341,
|
|
"learning_rate": 1.6126309832180664e-06,
|
|
"loss": 2.4853,
|
|
"step": 31135
|
|
},
|
|
{
|
|
"epoch": 9.835820895522389,
|
|
"grad_norm": 0.04953110205193987,
|
|
"learning_rate": 1.5814779822674875e-06,
|
|
"loss": 2.4103,
|
|
"step": 31140
|
|
},
|
|
{
|
|
"epoch": 9.837400300086868,
|
|
"grad_norm": 0.04180295744217038,
|
|
"learning_rate": 1.5506285940103393e-06,
|
|
"loss": 2.3793,
|
|
"step": 31145
|
|
},
|
|
{
|
|
"epoch": 9.838979704651347,
|
|
"grad_norm": 0.047837444378521964,
|
|
"learning_rate": 1.5200828278278954e-06,
|
|
"loss": 2.4945,
|
|
"step": 31150
|
|
},
|
|
{
|
|
"epoch": 9.840559109215826,
|
|
"grad_norm": 0.04530582592693608,
|
|
"learning_rate": 1.489840693008726e-06,
|
|
"loss": 2.4009,
|
|
"step": 31155
|
|
},
|
|
{
|
|
"epoch": 9.842138513780306,
|
|
"grad_norm": 0.04291881654761109,
|
|
"learning_rate": 1.4599021987493632e-06,
|
|
"loss": 2.3659,
|
|
"step": 31160
|
|
},
|
|
{
|
|
"epoch": 9.843717918344783,
|
|
"grad_norm": 0.04355167250909392,
|
|
"learning_rate": 1.430267354153858e-06,
|
|
"loss": 2.4755,
|
|
"step": 31165
|
|
},
|
|
{
|
|
"epoch": 9.845297322909262,
|
|
"grad_norm": 0.0427130805628063,
|
|
"learning_rate": 1.4009361682340017e-06,
|
|
"loss": 2.3501,
|
|
"step": 31170
|
|
},
|
|
{
|
|
"epoch": 9.846876727473742,
|
|
"grad_norm": 0.040811771130643736,
|
|
"learning_rate": 1.3719086499092148e-06,
|
|
"loss": 2.3141,
|
|
"step": 31175
|
|
},
|
|
{
|
|
"epoch": 9.848456132038221,
|
|
"grad_norm": 0.040310308608363724,
|
|
"learning_rate": 1.3431848080066588e-06,
|
|
"loss": 2.3714,
|
|
"step": 31180
|
|
},
|
|
{
|
|
"epoch": 9.8500355366027,
|
|
"grad_norm": 0.045963168756802425,
|
|
"learning_rate": 1.3147646512610135e-06,
|
|
"loss": 2.3644,
|
|
"step": 31185
|
|
},
|
|
{
|
|
"epoch": 9.85161494116718,
|
|
"grad_norm": 0.03729718237169882,
|
|
"learning_rate": 1.2866481883146986e-06,
|
|
"loss": 2.347,
|
|
"step": 31190
|
|
},
|
|
{
|
|
"epoch": 9.853194345731659,
|
|
"grad_norm": 0.052023903170032054,
|
|
"learning_rate": 1.258835427717653e-06,
|
|
"loss": 2.4908,
|
|
"step": 31195
|
|
},
|
|
{
|
|
"epoch": 9.854773750296138,
|
|
"grad_norm": 0.04285587877456899,
|
|
"learning_rate": 1.2313263779275551e-06,
|
|
"loss": 2.3909,
|
|
"step": 31200
|
|
},
|
|
{
|
|
"epoch": 9.856353154860617,
|
|
"grad_norm": 0.043623908820302676,
|
|
"learning_rate": 1.2041210473098252e-06,
|
|
"loss": 2.3754,
|
|
"step": 31205
|
|
},
|
|
{
|
|
"epoch": 9.857932559425096,
|
|
"grad_norm": 0.0449197725350227,
|
|
"learning_rate": 1.1772194441374008e-06,
|
|
"loss": 2.4296,
|
|
"step": 31210
|
|
},
|
|
{
|
|
"epoch": 9.859511963989576,
|
|
"grad_norm": 0.040065117368542624,
|
|
"learning_rate": 1.15062157659096e-06,
|
|
"loss": 2.3674,
|
|
"step": 31215
|
|
},
|
|
{
|
|
"epoch": 9.861091368554055,
|
|
"grad_norm": 0.04012162349533012,
|
|
"learning_rate": 1.1243274527587e-06,
|
|
"loss": 2.3932,
|
|
"step": 31220
|
|
},
|
|
{
|
|
"epoch": 9.862670773118534,
|
|
"grad_norm": 0.048506253143626714,
|
|
"learning_rate": 1.0983370806363358e-06,
|
|
"loss": 2.4138,
|
|
"step": 31225
|
|
},
|
|
{
|
|
"epoch": 9.864250177683013,
|
|
"grad_norm": 0.041801382362332766,
|
|
"learning_rate": 1.0726504681275452e-06,
|
|
"loss": 2.4828,
|
|
"step": 31230
|
|
},
|
|
{
|
|
"epoch": 9.865829582247493,
|
|
"grad_norm": 0.04437419891825854,
|
|
"learning_rate": 1.047267623043524e-06,
|
|
"loss": 2.3569,
|
|
"step": 31235
|
|
},
|
|
{
|
|
"epoch": 9.867408986811972,
|
|
"grad_norm": 0.048059431546183196,
|
|
"learning_rate": 1.0221885531027652e-06,
|
|
"loss": 2.4038,
|
|
"step": 31240
|
|
},
|
|
{
|
|
"epoch": 9.868988391376451,
|
|
"grad_norm": 0.04839714228324445,
|
|
"learning_rate": 9.974132659319458e-07,
|
|
"loss": 2.4167,
|
|
"step": 31245
|
|
},
|
|
{
|
|
"epoch": 9.87056779594093,
|
|
"grad_norm": 0.0499803154894237,
|
|
"learning_rate": 9.729417690649279e-07,
|
|
"loss": 2.4503,
|
|
"step": 31250
|
|
},
|
|
{
|
|
"epoch": 9.87214720050541,
|
|
"grad_norm": 0.04442938220004997,
|
|
"learning_rate": 9.487740699433145e-07,
|
|
"loss": 2.4478,
|
|
"step": 31255
|
|
},
|
|
{
|
|
"epoch": 9.873726605069889,
|
|
"grad_norm": 0.03881726819260687,
|
|
"learning_rate": 9.249101759164491e-07,
|
|
"loss": 2.3652,
|
|
"step": 31260
|
|
},
|
|
{
|
|
"epoch": 9.875306009634368,
|
|
"grad_norm": 0.044093565278199146,
|
|
"learning_rate": 9.013500942410824e-07,
|
|
"loss": 2.3736,
|
|
"step": 31265
|
|
},
|
|
{
|
|
"epoch": 9.876885414198847,
|
|
"grad_norm": 0.05108213400375329,
|
|
"learning_rate": 8.780938320817056e-07,
|
|
"loss": 2.4016,
|
|
"step": 31270
|
|
},
|
|
{
|
|
"epoch": 9.878464818763327,
|
|
"grad_norm": 0.043450172850814976,
|
|
"learning_rate": 8.551413965105504e-07,
|
|
"loss": 2.4129,
|
|
"step": 31275
|
|
},
|
|
{
|
|
"epoch": 9.880044223327806,
|
|
"grad_norm": 0.04571056199368923,
|
|
"learning_rate": 8.324927945070337e-07,
|
|
"loss": 2.3449,
|
|
"step": 31280
|
|
},
|
|
{
|
|
"epoch": 9.881623627892285,
|
|
"grad_norm": 0.03929514397498466,
|
|
"learning_rate": 8.101480329587574e-07,
|
|
"loss": 2.4344,
|
|
"step": 31285
|
|
},
|
|
{
|
|
"epoch": 9.883203032456764,
|
|
"grad_norm": 0.05605053636602669,
|
|
"learning_rate": 7.881071186602861e-07,
|
|
"loss": 2.3708,
|
|
"step": 31290
|
|
},
|
|
{
|
|
"epoch": 9.884782437021244,
|
|
"grad_norm": 0.040096833215611215,
|
|
"learning_rate": 7.663700583144806e-07,
|
|
"loss": 2.4076,
|
|
"step": 31295
|
|
},
|
|
{
|
|
"epoch": 9.886361841585723,
|
|
"grad_norm": 0.04414612578297414,
|
|
"learning_rate": 7.449368585311644e-07,
|
|
"loss": 2.4094,
|
|
"step": 31300
|
|
},
|
|
{
|
|
"epoch": 9.887941246150202,
|
|
"grad_norm": 0.04143465975472764,
|
|
"learning_rate": 7.23807525828124e-07,
|
|
"loss": 2.3948,
|
|
"step": 31305
|
|
},
|
|
{
|
|
"epoch": 9.889520650714681,
|
|
"grad_norm": 0.03961622831463739,
|
|
"learning_rate": 7.029820666306641e-07,
|
|
"loss": 2.402,
|
|
"step": 31310
|
|
},
|
|
{
|
|
"epoch": 9.89110005527916,
|
|
"grad_norm": 0.05701225564541441,
|
|
"learning_rate": 6.824604872717188e-07,
|
|
"loss": 2.4436,
|
|
"step": 31315
|
|
},
|
|
{
|
|
"epoch": 9.892679459843638,
|
|
"grad_norm": 0.04003804695594368,
|
|
"learning_rate": 6.622427939916298e-07,
|
|
"loss": 2.263,
|
|
"step": 31320
|
|
},
|
|
{
|
|
"epoch": 9.894258864408119,
|
|
"grad_norm": 0.05036660745155673,
|
|
"learning_rate": 6.4232899293859e-07,
|
|
"loss": 2.4116,
|
|
"step": 31325
|
|
},
|
|
{
|
|
"epoch": 9.895838268972597,
|
|
"grad_norm": 0.04904166864564292,
|
|
"learning_rate": 6.22719090168311e-07,
|
|
"loss": 2.37,
|
|
"step": 31330
|
|
},
|
|
{
|
|
"epoch": 9.897417673537076,
|
|
"grad_norm": 0.053753816783488964,
|
|
"learning_rate": 6.034130916439118e-07,
|
|
"loss": 2.3696,
|
|
"step": 31335
|
|
},
|
|
{
|
|
"epoch": 9.898997078101555,
|
|
"grad_norm": 0.03768381693534451,
|
|
"learning_rate": 5.844110032362515e-07,
|
|
"loss": 2.416,
|
|
"step": 31340
|
|
},
|
|
{
|
|
"epoch": 9.900576482666034,
|
|
"grad_norm": 0.03941877065191387,
|
|
"learning_rate": 5.65712830723708e-07,
|
|
"loss": 2.4877,
|
|
"step": 31345
|
|
},
|
|
{
|
|
"epoch": 9.902155887230514,
|
|
"grad_norm": 0.060231101967682533,
|
|
"learning_rate": 5.473185797923996e-07,
|
|
"loss": 2.4678,
|
|
"step": 31350
|
|
},
|
|
{
|
|
"epoch": 9.903735291794993,
|
|
"grad_norm": 0.05347136728291984,
|
|
"learning_rate": 5.292282560358519e-07,
|
|
"loss": 2.4145,
|
|
"step": 31355
|
|
},
|
|
{
|
|
"epoch": 9.905314696359472,
|
|
"grad_norm": 0.039082639537203874,
|
|
"learning_rate": 5.114418649552199e-07,
|
|
"loss": 2.3073,
|
|
"step": 31360
|
|
},
|
|
{
|
|
"epoch": 9.906894100923951,
|
|
"grad_norm": 0.044346146786145166,
|
|
"learning_rate": 4.939594119590663e-07,
|
|
"loss": 2.3193,
|
|
"step": 31365
|
|
},
|
|
{
|
|
"epoch": 9.90847350548843,
|
|
"grad_norm": 0.040929624910032235,
|
|
"learning_rate": 4.767809023639158e-07,
|
|
"loss": 2.4079,
|
|
"step": 31370
|
|
},
|
|
{
|
|
"epoch": 9.91005291005291,
|
|
"grad_norm": 0.04329208176770207,
|
|
"learning_rate": 4.5990634139359e-07,
|
|
"loss": 2.4446,
|
|
"step": 31375
|
|
},
|
|
{
|
|
"epoch": 9.911632314617389,
|
|
"grad_norm": 0.07861902074514227,
|
|
"learning_rate": 4.433357341795396e-07,
|
|
"loss": 2.4424,
|
|
"step": 31380
|
|
},
|
|
{
|
|
"epoch": 9.913211719181868,
|
|
"grad_norm": 0.047631633008037584,
|
|
"learning_rate": 4.27069085760623e-07,
|
|
"loss": 2.3565,
|
|
"step": 31385
|
|
},
|
|
{
|
|
"epoch": 9.914791123746348,
|
|
"grad_norm": 0.03730679415803507,
|
|
"learning_rate": 4.111064010836607e-07,
|
|
"loss": 2.4453,
|
|
"step": 31390
|
|
},
|
|
{
|
|
"epoch": 9.916370528310827,
|
|
"grad_norm": 0.03886844888904772,
|
|
"learning_rate": 3.954476850026589e-07,
|
|
"loss": 2.2789,
|
|
"step": 31395
|
|
},
|
|
{
|
|
"epoch": 9.917949932875306,
|
|
"grad_norm": 0.04317889692874003,
|
|
"learning_rate": 3.800929422793642e-07,
|
|
"loss": 2.257,
|
|
"step": 31400
|
|
},
|
|
{
|
|
"epoch": 9.919529337439785,
|
|
"grad_norm": 0.040823580863975674,
|
|
"learning_rate": 3.6504217758304147e-07,
|
|
"loss": 2.3185,
|
|
"step": 31405
|
|
},
|
|
{
|
|
"epoch": 9.921108742004265,
|
|
"grad_norm": 0.039782695619364115,
|
|
"learning_rate": 3.502953954905852e-07,
|
|
"loss": 2.2989,
|
|
"step": 31410
|
|
},
|
|
{
|
|
"epoch": 9.922688146568744,
|
|
"grad_norm": 0.04409438948650292,
|
|
"learning_rate": 3.3585260048629717e-07,
|
|
"loss": 2.3599,
|
|
"step": 31415
|
|
},
|
|
{
|
|
"epoch": 9.924267551133223,
|
|
"grad_norm": 0.05089332354941331,
|
|
"learning_rate": 3.217137969622197e-07,
|
|
"loss": 2.3699,
|
|
"step": 31420
|
|
},
|
|
{
|
|
"epoch": 9.925846955697702,
|
|
"grad_norm": 0.041525466589508155,
|
|
"learning_rate": 3.078789892179135e-07,
|
|
"loss": 2.3521,
|
|
"step": 31425
|
|
},
|
|
{
|
|
"epoch": 9.927426360262182,
|
|
"grad_norm": 0.05122548282281933,
|
|
"learning_rate": 2.943481814603466e-07,
|
|
"loss": 2.3181,
|
|
"step": 31430
|
|
},
|
|
{
|
|
"epoch": 9.92900576482666,
|
|
"grad_norm": 0.037858106490544545,
|
|
"learning_rate": 2.8112137780422765e-07,
|
|
"loss": 2.3224,
|
|
"step": 31435
|
|
},
|
|
{
|
|
"epoch": 9.93058516939114,
|
|
"grad_norm": 0.04483147270983839,
|
|
"learning_rate": 2.681985822716726e-07,
|
|
"loss": 2.44,
|
|
"step": 31440
|
|
},
|
|
{
|
|
"epoch": 9.93216457395562,
|
|
"grad_norm": 0.0419017418322362,
|
|
"learning_rate": 2.555797987924269e-07,
|
|
"loss": 2.35,
|
|
"step": 31445
|
|
},
|
|
{
|
|
"epoch": 9.933743978520098,
|
|
"grad_norm": 0.04684765994945669,
|
|
"learning_rate": 2.4326503120397634e-07,
|
|
"loss": 2.3613,
|
|
"step": 31450
|
|
},
|
|
{
|
|
"epoch": 9.935323383084578,
|
|
"grad_norm": 0.04900510243746249,
|
|
"learning_rate": 2.3125428325088127e-07,
|
|
"loss": 2.391,
|
|
"step": 31455
|
|
},
|
|
{
|
|
"epoch": 9.936902787649057,
|
|
"grad_norm": 0.05155968176817936,
|
|
"learning_rate": 2.1954755858566432e-07,
|
|
"loss": 2.3253,
|
|
"step": 31460
|
|
},
|
|
{
|
|
"epoch": 9.938482192213536,
|
|
"grad_norm": 0.04528411011174494,
|
|
"learning_rate": 2.0814486076825566e-07,
|
|
"loss": 2.4523,
|
|
"step": 31465
|
|
},
|
|
{
|
|
"epoch": 9.940061596778015,
|
|
"grad_norm": 0.04943254274441148,
|
|
"learning_rate": 1.9704619326621487e-07,
|
|
"loss": 2.389,
|
|
"step": 31470
|
|
},
|
|
{
|
|
"epoch": 9.941641001342493,
|
|
"grad_norm": 0.042564905701143275,
|
|
"learning_rate": 1.862515594545089e-07,
|
|
"loss": 2.3866,
|
|
"step": 31475
|
|
},
|
|
{
|
|
"epoch": 9.943220405906974,
|
|
"grad_norm": 0.04164976415564028,
|
|
"learning_rate": 1.7576096261562313e-07,
|
|
"loss": 2.3793,
|
|
"step": 31480
|
|
},
|
|
{
|
|
"epoch": 9.944799810471451,
|
|
"grad_norm": 0.0452706836490186,
|
|
"learning_rate": 1.6557440593989448e-07,
|
|
"loss": 2.3188,
|
|
"step": 31485
|
|
},
|
|
{
|
|
"epoch": 9.94637921503593,
|
|
"grad_norm": 0.04956588186054781,
|
|
"learning_rate": 1.5569189252473415e-07,
|
|
"loss": 2.3585,
|
|
"step": 31490
|
|
},
|
|
{
|
|
"epoch": 9.94795861960041,
|
|
"grad_norm": 0.04468823103013206,
|
|
"learning_rate": 1.4611342537562688e-07,
|
|
"loss": 2.4117,
|
|
"step": 31495
|
|
},
|
|
{
|
|
"epoch": 9.94953802416489,
|
|
"grad_norm": 0.03982867230890297,
|
|
"learning_rate": 1.3683900740513178e-07,
|
|
"loss": 2.3792,
|
|
"step": 31500
|
|
},
|
|
{
|
|
"epoch": 9.951117428729368,
|
|
"grad_norm": 0.04261144088253554,
|
|
"learning_rate": 1.2786864143354837e-07,
|
|
"loss": 2.4035,
|
|
"step": 31505
|
|
},
|
|
{
|
|
"epoch": 9.952696833293848,
|
|
"grad_norm": 0.05113283763969754,
|
|
"learning_rate": 1.1920233018880566e-07,
|
|
"loss": 2.4228,
|
|
"step": 31510
|
|
},
|
|
{
|
|
"epoch": 9.954276237858327,
|
|
"grad_norm": 0.04560790509600287,
|
|
"learning_rate": 1.1084007630612903e-07,
|
|
"loss": 2.3877,
|
|
"step": 31515
|
|
},
|
|
{
|
|
"epoch": 9.955855642422806,
|
|
"grad_norm": 0.05556904301586004,
|
|
"learning_rate": 1.0278188232859531e-07,
|
|
"loss": 2.3437,
|
|
"step": 31520
|
|
},
|
|
{
|
|
"epoch": 9.957435046987285,
|
|
"grad_norm": 0.04668718987766734,
|
|
"learning_rate": 9.502775070657776e-08,
|
|
"loss": 2.5098,
|
|
"step": 31525
|
|
},
|
|
{
|
|
"epoch": 9.959014451551765,
|
|
"grad_norm": 0.046887195489042646,
|
|
"learning_rate": 8.757768379796804e-08,
|
|
"loss": 2.4029,
|
|
"step": 31530
|
|
},
|
|
{
|
|
"epoch": 9.960593856116244,
|
|
"grad_norm": 0.041706676304071585,
|
|
"learning_rate": 8.043168386839827e-08,
|
|
"loss": 2.3795,
|
|
"step": 31535
|
|
},
|
|
{
|
|
"epoch": 9.962173260680723,
|
|
"grad_norm": 0.04488075360311965,
|
|
"learning_rate": 7.358975309090799e-08,
|
|
"loss": 2.3347,
|
|
"step": 31540
|
|
},
|
|
{
|
|
"epoch": 9.963752665245202,
|
|
"grad_norm": 0.04196027227225851,
|
|
"learning_rate": 6.705189354616615e-08,
|
|
"loss": 2.4042,
|
|
"step": 31545
|
|
},
|
|
{
|
|
"epoch": 9.965332069809682,
|
|
"grad_norm": 0.045232793995175886,
|
|
"learning_rate": 6.081810722202707e-08,
|
|
"loss": 2.3868,
|
|
"step": 31550
|
|
},
|
|
{
|
|
"epoch": 9.966911474374161,
|
|
"grad_norm": 0.041741344406426024,
|
|
"learning_rate": 5.488839601441864e-08,
|
|
"loss": 2.3922,
|
|
"step": 31555
|
|
},
|
|
{
|
|
"epoch": 9.96849087893864,
|
|
"grad_norm": 0.04742214796670987,
|
|
"learning_rate": 4.926276172645405e-08,
|
|
"loss": 2.4131,
|
|
"step": 31560
|
|
},
|
|
{
|
|
"epoch": 9.97007028350312,
|
|
"grad_norm": 0.042601983893819445,
|
|
"learning_rate": 4.394120606876495e-08,
|
|
"loss": 2.3363,
|
|
"step": 31565
|
|
},
|
|
{
|
|
"epoch": 9.971649688067599,
|
|
"grad_norm": 0.04327385252232404,
|
|
"learning_rate": 3.8923730659612414e-08,
|
|
"loss": 2.484,
|
|
"step": 31570
|
|
},
|
|
{
|
|
"epoch": 9.973229092632078,
|
|
"grad_norm": 0.04202420797597011,
|
|
"learning_rate": 3.4210337024886964e-08,
|
|
"loss": 2.2765,
|
|
"step": 31575
|
|
},
|
|
{
|
|
"epoch": 9.974808497196557,
|
|
"grad_norm": 0.041801096069587484,
|
|
"learning_rate": 2.9801026597775505e-08,
|
|
"loss": 2.4922,
|
|
"step": 31580
|
|
},
|
|
{
|
|
"epoch": 9.976387901761036,
|
|
"grad_norm": 0.0391322209059303,
|
|
"learning_rate": 2.5695800719205408e-08,
|
|
"loss": 2.4308,
|
|
"step": 31585
|
|
},
|
|
{
|
|
"epoch": 9.977967306325516,
|
|
"grad_norm": 0.044431361724413,
|
|
"learning_rate": 2.1894660637622467e-08,
|
|
"loss": 2.3047,
|
|
"step": 31590
|
|
},
|
|
{
|
|
"epoch": 9.979546710889995,
|
|
"grad_norm": 0.044143168004319534,
|
|
"learning_rate": 1.8397607508768842e-08,
|
|
"loss": 2.3034,
|
|
"step": 31595
|
|
},
|
|
{
|
|
"epoch": 9.981126115454474,
|
|
"grad_norm": 0.04273657060682564,
|
|
"learning_rate": 1.5204642396127178e-08,
|
|
"loss": 2.3248,
|
|
"step": 31600
|
|
},
|
|
{
|
|
"epoch": 9.982705520018953,
|
|
"grad_norm": 0.03915895438328436,
|
|
"learning_rate": 1.2315766270698526e-08,
|
|
"loss": 2.428,
|
|
"step": 31605
|
|
},
|
|
{
|
|
"epoch": 9.984284924583433,
|
|
"grad_norm": 0.054851881731455776,
|
|
"learning_rate": 9.730980010891343e-09,
|
|
"loss": 2.4559,
|
|
"step": 31610
|
|
},
|
|
{
|
|
"epoch": 9.985864329147912,
|
|
"grad_norm": 0.03955674353438591,
|
|
"learning_rate": 7.450284402854557e-09,
|
|
"loss": 2.3912,
|
|
"step": 31615
|
|
},
|
|
{
|
|
"epoch": 9.987443733712391,
|
|
"grad_norm": 0.03918676660618922,
|
|
"learning_rate": 5.473680140033466e-09,
|
|
"loss": 2.3536,
|
|
"step": 31620
|
|
},
|
|
{
|
|
"epoch": 9.98902313827687,
|
|
"grad_norm": 0.038162441992950706,
|
|
"learning_rate": 3.801167823502816e-09,
|
|
"loss": 2.3491,
|
|
"step": 31625
|
|
},
|
|
{
|
|
"epoch": 9.99060254284135,
|
|
"grad_norm": 0.04205200312824167,
|
|
"learning_rate": 2.4327479618557746e-09,
|
|
"loss": 2.4484,
|
|
"step": 31630
|
|
},
|
|
{
|
|
"epoch": 9.992181947405829,
|
|
"grad_norm": 0.04447096819750325,
|
|
"learning_rate": 1.3684209713149542e-09,
|
|
"loss": 2.4543,
|
|
"step": 31635
|
|
},
|
|
{
|
|
"epoch": 9.993761351970306,
|
|
"grad_norm": 0.0503482955066234,
|
|
"learning_rate": 6.08187175399344e-10,
|
|
"loss": 2.3356,
|
|
"step": 31640
|
|
},
|
|
{
|
|
"epoch": 9.995340756534786,
|
|
"grad_norm": 0.04672093855240827,
|
|
"learning_rate": 1.5204680536839987e-10,
|
|
"loss": 2.4031,
|
|
"step": 31645
|
|
},
|
|
{
|
|
"epoch": 9.996920161099265,
|
|
"grad_norm": 0.03833379789275092,
|
|
"learning_rate": 0.0,
|
|
"loss": 2.4458,
|
|
"step": 31650
|
|
},
|
|
{
|
|
"epoch": 9.996920161099265,
|
|
"eval_loss": 2.395866870880127,
|
|
"eval_runtime": 118.7072,
|
|
"eval_samples_per_second": 22.315,
|
|
"eval_steps_per_second": 5.585,
|
|
"step": 31650
|
|
},
|
|
{
|
|
"epoch": 9.996920161099265,
|
|
"step": 31650,
|
|
"total_flos": 9.148115283266765e+16,
|
|
"train_loss": 2.7709856205679606,
|
|
"train_runtime": 79261.052,
|
|
"train_samples_per_second": 6.39,
|
|
"train_steps_per_second": 0.399
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 31650,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 10,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 9.148115283266765e+16,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|