7443 lines
182 KiB
JSON
7443 lines
182 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 5280,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.000946969696969697,
|
|
"grad_norm": 49.75650598702225,
|
|
"learning_rate": 2.8409090909090907e-06,
|
|
"loss": 3.9178,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.001893939393939394,
|
|
"grad_norm": 39.27361859039198,
|
|
"learning_rate": 5.6818181818181815e-06,
|
|
"loss": 3.7038,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.002840909090909091,
|
|
"grad_norm": 22.806730377223552,
|
|
"learning_rate": 8.522727272727271e-06,
|
|
"loss": 2.8608,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.003787878787878788,
|
|
"grad_norm": 9.34674382037142,
|
|
"learning_rate": 1.1363636363636363e-05,
|
|
"loss": 2.207,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.004734848484848485,
|
|
"grad_norm": 4.014713370693969,
|
|
"learning_rate": 1.4204545454545453e-05,
|
|
"loss": 1.74,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.005681818181818182,
|
|
"grad_norm": 2.1906667080696756,
|
|
"learning_rate": 1.7045454545454543e-05,
|
|
"loss": 1.4441,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.006628787878787879,
|
|
"grad_norm": 1.0752279902100068,
|
|
"learning_rate": 1.9886363636363634e-05,
|
|
"loss": 1.2429,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.007575757575757576,
|
|
"grad_norm": 0.7187175898896523,
|
|
"learning_rate": 2.2727272727272726e-05,
|
|
"loss": 1.1147,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.008522727272727272,
|
|
"grad_norm": 0.49281093084047145,
|
|
"learning_rate": 2.5568181818181814e-05,
|
|
"loss": 1.1034,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.00946969696969697,
|
|
"grad_norm": 0.3269633043103454,
|
|
"learning_rate": 2.8409090909090906e-05,
|
|
"loss": 1.058,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.010416666666666666,
|
|
"grad_norm": 0.33257710673863067,
|
|
"learning_rate": 3.125e-05,
|
|
"loss": 1.0722,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.011363636363636364,
|
|
"grad_norm": 0.25938022961715196,
|
|
"learning_rate": 3.4090909090909085e-05,
|
|
"loss": 1.0246,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.01231060606060606,
|
|
"grad_norm": 0.23813343449521693,
|
|
"learning_rate": 3.693181818181818e-05,
|
|
"loss": 1.0074,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.013257575757575758,
|
|
"grad_norm": 0.2200079499271612,
|
|
"learning_rate": 3.977272727272727e-05,
|
|
"loss": 1.0025,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.014204545454545454,
|
|
"grad_norm": 0.20277353873378182,
|
|
"learning_rate": 4.261363636363637e-05,
|
|
"loss": 0.9877,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.015151515151515152,
|
|
"grad_norm": 0.20338107431083782,
|
|
"learning_rate": 4.545454545454545e-05,
|
|
"loss": 0.9579,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.016098484848484848,
|
|
"grad_norm": 0.20166600929973375,
|
|
"learning_rate": 4.8295454545454537e-05,
|
|
"loss": 1.0121,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.017045454545454544,
|
|
"grad_norm": 0.14601915731061527,
|
|
"learning_rate": 5.113636363636363e-05,
|
|
"loss": 0.9809,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.017992424242424244,
|
|
"grad_norm": 0.1910575503845415,
|
|
"learning_rate": 5.3977272727272727e-05,
|
|
"loss": 0.9687,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.01893939393939394,
|
|
"grad_norm": 0.1710125261741899,
|
|
"learning_rate": 5.681818181818181e-05,
|
|
"loss": 0.9759,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.019886363636363636,
|
|
"grad_norm": 0.1546310725877226,
|
|
"learning_rate": 5.96590909090909e-05,
|
|
"loss": 0.9449,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.020833333333333332,
|
|
"grad_norm": 0.15669899681499375,
|
|
"learning_rate": 6.25e-05,
|
|
"loss": 0.9379,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.021780303030303032,
|
|
"grad_norm": 0.10549598423376465,
|
|
"learning_rate": 6.534090909090909e-05,
|
|
"loss": 0.9521,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.022727272727272728,
|
|
"grad_norm": 0.09265444874772286,
|
|
"learning_rate": 6.818181818181817e-05,
|
|
"loss": 0.9402,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.023674242424242424,
|
|
"grad_norm": 0.10012187642369699,
|
|
"learning_rate": 7.102272727272727e-05,
|
|
"loss": 0.9509,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.02462121212121212,
|
|
"grad_norm": 0.10405267547853224,
|
|
"learning_rate": 7.386363636363635e-05,
|
|
"loss": 0.943,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.02556818181818182,
|
|
"grad_norm": 0.0932862459532729,
|
|
"learning_rate": 7.670454545454545e-05,
|
|
"loss": 0.923,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.026515151515151516,
|
|
"grad_norm": 0.08799539522039788,
|
|
"learning_rate": 7.954545454545454e-05,
|
|
"loss": 0.9221,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.027462121212121212,
|
|
"grad_norm": 0.07936971459661492,
|
|
"learning_rate": 8.238636363636362e-05,
|
|
"loss": 0.9385,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.028409090909090908,
|
|
"grad_norm": 0.09395245331857886,
|
|
"learning_rate": 8.522727272727273e-05,
|
|
"loss": 0.9098,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.029356060606060608,
|
|
"grad_norm": 0.08346381824724323,
|
|
"learning_rate": 8.806818181818182e-05,
|
|
"loss": 0.9592,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.030303030303030304,
|
|
"grad_norm": 0.0683897969731906,
|
|
"learning_rate": 9.09090909090909e-05,
|
|
"loss": 0.9162,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.03125,
|
|
"grad_norm": 0.07364507428375824,
|
|
"learning_rate": 9.374999999999999e-05,
|
|
"loss": 0.905,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.032196969696969696,
|
|
"grad_norm": 0.06957507038154116,
|
|
"learning_rate": 9.659090909090907e-05,
|
|
"loss": 0.9277,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.03314393939393939,
|
|
"grad_norm": 0.07231783801209996,
|
|
"learning_rate": 9.943181818181817e-05,
|
|
"loss": 0.8865,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.03409090909090909,
|
|
"grad_norm": 0.08108886238015861,
|
|
"learning_rate": 0.00010227272727272726,
|
|
"loss": 0.9221,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.035037878787878785,
|
|
"grad_norm": 0.06746334152643936,
|
|
"learning_rate": 0.00010511363636363635,
|
|
"loss": 0.8921,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.03598484848484849,
|
|
"grad_norm": 0.07474975292416153,
|
|
"learning_rate": 0.00010795454545454545,
|
|
"loss": 0.9067,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.036931818181818184,
|
|
"grad_norm": 0.06954632694727424,
|
|
"learning_rate": 0.00011079545454545454,
|
|
"loss": 0.9274,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.03787878787878788,
|
|
"grad_norm": 0.1071194914420164,
|
|
"learning_rate": 0.00011363636363636362,
|
|
"loss": 0.9174,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.038825757575757576,
|
|
"grad_norm": 0.08047063933324308,
|
|
"learning_rate": 0.00011647727272727271,
|
|
"loss": 0.8853,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.03977272727272727,
|
|
"grad_norm": 0.06720262982444847,
|
|
"learning_rate": 0.0001193181818181818,
|
|
"loss": 0.8936,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.04071969696969697,
|
|
"grad_norm": 0.06874990083102131,
|
|
"learning_rate": 0.0001221590909090909,
|
|
"loss": 0.8966,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.041666666666666664,
|
|
"grad_norm": 0.09031821629007566,
|
|
"learning_rate": 0.000125,
|
|
"loss": 0.9002,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.04261363636363636,
|
|
"grad_norm": 0.08250031333079004,
|
|
"learning_rate": 0.00012784090909090907,
|
|
"loss": 0.9314,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.043560606060606064,
|
|
"grad_norm": 0.06517851552105172,
|
|
"learning_rate": 0.00013068181818181817,
|
|
"loss": 0.9264,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.04450757575757576,
|
|
"grad_norm": 0.0682659812110987,
|
|
"learning_rate": 0.00013352272727272727,
|
|
"loss": 0.8933,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.045454545454545456,
|
|
"grad_norm": 0.07147589587336683,
|
|
"learning_rate": 0.00013636363636363634,
|
|
"loss": 0.9181,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.04640151515151515,
|
|
"grad_norm": 0.06876166962712452,
|
|
"learning_rate": 0.00013920454545454544,
|
|
"loss": 0.9221,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.04734848484848485,
|
|
"grad_norm": 0.06370293403172177,
|
|
"learning_rate": 0.00014204545454545454,
|
|
"loss": 0.8685,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.048295454545454544,
|
|
"grad_norm": 0.06615942930120759,
|
|
"learning_rate": 0.00014488636363636364,
|
|
"loss": 0.8895,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.04924242424242424,
|
|
"grad_norm": 0.06664909205706883,
|
|
"learning_rate": 0.0001477272727272727,
|
|
"loss": 0.9207,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.050189393939393936,
|
|
"grad_norm": 0.07595494362406784,
|
|
"learning_rate": 0.00015056818181818183,
|
|
"loss": 0.8644,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.05113636363636364,
|
|
"grad_norm": 0.07408826723348173,
|
|
"learning_rate": 0.0001534090909090909,
|
|
"loss": 0.9062,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.052083333333333336,
|
|
"grad_norm": 0.06808841073565555,
|
|
"learning_rate": 0.00015625,
|
|
"loss": 0.9151,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.05303030303030303,
|
|
"grad_norm": 0.07953899365614112,
|
|
"learning_rate": 0.00015909090909090907,
|
|
"loss": 0.8909,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.05397727272727273,
|
|
"grad_norm": 0.07167080509602292,
|
|
"learning_rate": 0.00016193181818181817,
|
|
"loss": 0.9009,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.054924242424242424,
|
|
"grad_norm": 0.07064676898002652,
|
|
"learning_rate": 0.00016477272727272724,
|
|
"loss": 0.8908,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.05587121212121212,
|
|
"grad_norm": 0.07214540293164669,
|
|
"learning_rate": 0.00016761363636363634,
|
|
"loss": 0.9244,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.056818181818181816,
|
|
"grad_norm": 0.06617148307042509,
|
|
"learning_rate": 0.00017045454545454547,
|
|
"loss": 0.8841,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.05776515151515151,
|
|
"grad_norm": 0.06536482732681019,
|
|
"learning_rate": 0.00017329545454545454,
|
|
"loss": 0.8926,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.058712121212121215,
|
|
"grad_norm": 0.0770022545469697,
|
|
"learning_rate": 0.00017613636363636364,
|
|
"loss": 0.8918,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.05965909090909091,
|
|
"grad_norm": 0.07796650440153677,
|
|
"learning_rate": 0.0001789772727272727,
|
|
"loss": 0.8861,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.06060606060606061,
|
|
"grad_norm": 0.06664140681353005,
|
|
"learning_rate": 0.0001818181818181818,
|
|
"loss": 0.883,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.061553030303030304,
|
|
"grad_norm": 0.06505572579245275,
|
|
"learning_rate": 0.00018465909090909088,
|
|
"loss": 0.9046,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.0625,
|
|
"grad_norm": 0.07411769991572527,
|
|
"learning_rate": 0.00018749999999999998,
|
|
"loss": 0.8935,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.0634469696969697,
|
|
"grad_norm": 0.06839084238999557,
|
|
"learning_rate": 0.00019034090909090908,
|
|
"loss": 0.8994,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.06439393939393939,
|
|
"grad_norm": 0.06991741218579048,
|
|
"learning_rate": 0.00019318181818181815,
|
|
"loss": 0.9011,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.06534090909090909,
|
|
"grad_norm": 0.06765382117363,
|
|
"learning_rate": 0.00019602272727272727,
|
|
"loss": 0.8757,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.06628787878787878,
|
|
"grad_norm": 0.07394479834842242,
|
|
"learning_rate": 0.00019886363636363634,
|
|
"loss": 0.8869,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.06723484848484848,
|
|
"grad_norm": 0.07779852408072253,
|
|
"learning_rate": 0.00020170454545454544,
|
|
"loss": 0.8721,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.06818181818181818,
|
|
"grad_norm": 0.07182147114935328,
|
|
"learning_rate": 0.0002045454545454545,
|
|
"loss": 0.9053,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.06912878787878787,
|
|
"grad_norm": 0.07763475442947491,
|
|
"learning_rate": 0.0002073863636363636,
|
|
"loss": 0.8886,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.07007575757575757,
|
|
"grad_norm": 0.06639383299470691,
|
|
"learning_rate": 0.0002102272727272727,
|
|
"loss": 0.9216,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.07102272727272728,
|
|
"grad_norm": 0.07582408978067692,
|
|
"learning_rate": 0.00021306818181818178,
|
|
"loss": 0.9187,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.07196969696969698,
|
|
"grad_norm": 0.067889778321114,
|
|
"learning_rate": 0.0002159090909090909,
|
|
"loss": 0.8848,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.07291666666666667,
|
|
"grad_norm": 0.06350237430320019,
|
|
"learning_rate": 0.00021874999999999998,
|
|
"loss": 0.8991,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.07386363636363637,
|
|
"grad_norm": 0.06463105152473327,
|
|
"learning_rate": 0.00022159090909090908,
|
|
"loss": 0.8993,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.07481060606060606,
|
|
"grad_norm": 0.06289812753072489,
|
|
"learning_rate": 0.00022443181818181815,
|
|
"loss": 0.8977,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.07575757575757576,
|
|
"grad_norm": 0.06451182368963407,
|
|
"learning_rate": 0.00022727272727272725,
|
|
"loss": 0.9109,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.07670454545454546,
|
|
"grad_norm": 0.06417545375628221,
|
|
"learning_rate": 0.00023011363636363634,
|
|
"loss": 0.8689,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.07765151515151515,
|
|
"grad_norm": 0.06624677302997224,
|
|
"learning_rate": 0.00023295454545454542,
|
|
"loss": 0.9096,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.07859848484848485,
|
|
"grad_norm": 0.06713767944662469,
|
|
"learning_rate": 0.00023579545454545454,
|
|
"loss": 0.9128,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.07954545454545454,
|
|
"grad_norm": 0.06632474263833514,
|
|
"learning_rate": 0.0002386363636363636,
|
|
"loss": 0.8992,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.08049242424242424,
|
|
"grad_norm": 0.06326893641363093,
|
|
"learning_rate": 0.0002414772727272727,
|
|
"loss": 0.8838,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.08143939393939394,
|
|
"grad_norm": 0.05270584817938461,
|
|
"learning_rate": 0.0002443181818181818,
|
|
"loss": 0.8604,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.08238636363636363,
|
|
"grad_norm": 0.06950851335464077,
|
|
"learning_rate": 0.0002471590909090909,
|
|
"loss": 0.8928,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.08333333333333333,
|
|
"grad_norm": 0.06031142221337703,
|
|
"learning_rate": 0.00025,
|
|
"loss": 0.8997,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.08428030303030302,
|
|
"grad_norm": 0.0598802579058441,
|
|
"learning_rate": 0.00025284090909090905,
|
|
"loss": 0.888,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.08522727272727272,
|
|
"grad_norm": 0.05979293999494916,
|
|
"learning_rate": 0.00025568181818181815,
|
|
"loss": 0.914,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.08617424242424243,
|
|
"grad_norm": 0.06332115337692762,
|
|
"learning_rate": 0.00025852272727272725,
|
|
"loss": 0.8897,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.08712121212121213,
|
|
"grad_norm": 0.05664133712393486,
|
|
"learning_rate": 0.00026136363636363634,
|
|
"loss": 0.8958,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.08806818181818182,
|
|
"grad_norm": 0.06262104837726735,
|
|
"learning_rate": 0.00026420454545454544,
|
|
"loss": 0.8773,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.08901515151515152,
|
|
"grad_norm": 0.06325434933754956,
|
|
"learning_rate": 0.00026704545454545454,
|
|
"loss": 0.8941,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.08996212121212122,
|
|
"grad_norm": 0.06454144975644246,
|
|
"learning_rate": 0.00026988636363636364,
|
|
"loss": 0.9055,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.09090909090909091,
|
|
"grad_norm": 0.058848918660389354,
|
|
"learning_rate": 0.0002727272727272727,
|
|
"loss": 0.9066,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.09185606060606061,
|
|
"grad_norm": 0.0643339517437263,
|
|
"learning_rate": 0.0002755681818181818,
|
|
"loss": 0.9207,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.0928030303030303,
|
|
"grad_norm": 0.06062790165341026,
|
|
"learning_rate": 0.0002784090909090909,
|
|
"loss": 0.9096,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.09375,
|
|
"grad_norm": 0.06483851920476219,
|
|
"learning_rate": 0.00028125,
|
|
"loss": 0.8924,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.0946969696969697,
|
|
"grad_norm": 0.06599789444637924,
|
|
"learning_rate": 0.0002840909090909091,
|
|
"loss": 0.9052,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.09564393939393939,
|
|
"grad_norm": 0.06622053779375818,
|
|
"learning_rate": 0.0002869318181818182,
|
|
"loss": 0.9261,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.09659090909090909,
|
|
"grad_norm": 0.06986338841915192,
|
|
"learning_rate": 0.0002897727272727273,
|
|
"loss": 0.9147,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.09753787878787878,
|
|
"grad_norm": 0.0566839679645091,
|
|
"learning_rate": 0.0002926136363636363,
|
|
"loss": 0.8702,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.09848484848484848,
|
|
"grad_norm": 0.06286540817635865,
|
|
"learning_rate": 0.0002954545454545454,
|
|
"loss": 0.9081,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.09943181818181818,
|
|
"grad_norm": 0.1319474057131131,
|
|
"learning_rate": 0.0002982954545454545,
|
|
"loss": 0.9121,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.10037878787878787,
|
|
"grad_norm": 0.05848327672137334,
|
|
"learning_rate": 0.0002999998688802619,
|
|
"loss": 0.9124,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.10132575757575757,
|
|
"grad_norm": 0.06169209923879713,
|
|
"learning_rate": 0.0002999983937858416,
|
|
"loss": 0.9065,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.10227272727272728,
|
|
"grad_norm": 0.06107286390796437,
|
|
"learning_rate": 0.0002999952797134999,
|
|
"loss": 0.9061,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.10321969696969698,
|
|
"grad_norm": 0.051714218670872523,
|
|
"learning_rate": 0.00029999052669726326,
|
|
"loss": 0.9188,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.10416666666666667,
|
|
"grad_norm": 0.05186568461682868,
|
|
"learning_rate": 0.00029998413478906613,
|
|
"loss": 0.8956,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.10511363636363637,
|
|
"grad_norm": 0.06237137033014081,
|
|
"learning_rate": 0.00029997610405875047,
|
|
"loss": 0.913,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.10606060606060606,
|
|
"grad_norm": 0.0627745121471118,
|
|
"learning_rate": 0.00029996643459406525,
|
|
"loss": 0.8781,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.10700757575757576,
|
|
"grad_norm": 0.05775548150329091,
|
|
"learning_rate": 0.00029995512650066516,
|
|
"loss": 0.8961,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.10795454545454546,
|
|
"grad_norm": 0.058742015004762956,
|
|
"learning_rate": 0.0002999421799021097,
|
|
"loss": 0.9081,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.10890151515151515,
|
|
"grad_norm": 0.06278642260383162,
|
|
"learning_rate": 0.00029992759493986144,
|
|
"loss": 0.9065,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.10984848484848485,
|
|
"grad_norm": 0.05368202960228582,
|
|
"learning_rate": 0.0002999113717732852,
|
|
"loss": 0.8793,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.11079545454545454,
|
|
"grad_norm": 0.06412640518837653,
|
|
"learning_rate": 0.0002998935105796455,
|
|
"loss": 0.8537,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.11174242424242424,
|
|
"grad_norm": 0.06196513917098741,
|
|
"learning_rate": 0.00029987401155410516,
|
|
"loss": 0.8954,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.11268939393939394,
|
|
"grad_norm": 0.0605833197123053,
|
|
"learning_rate": 0.00029985287490972293,
|
|
"loss": 0.8945,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.11363636363636363,
|
|
"grad_norm": 0.06504637583052113,
|
|
"learning_rate": 0.0002998301008774512,
|
|
"loss": 0.9008,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.11458333333333333,
|
|
"grad_norm": 0.05164915733505111,
|
|
"learning_rate": 0.0002998056897061335,
|
|
"loss": 0.9051,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.11553030303030302,
|
|
"grad_norm": 0.051685554793978426,
|
|
"learning_rate": 0.000299779641662502,
|
|
"loss": 0.8529,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.11647727272727272,
|
|
"grad_norm": 0.062362055131338696,
|
|
"learning_rate": 0.00029975195703117405,
|
|
"loss": 0.8691,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.11742424242424243,
|
|
"grad_norm": 0.06117736368428696,
|
|
"learning_rate": 0.00029972263611464966,
|
|
"loss": 0.8849,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.11837121212121213,
|
|
"grad_norm": 0.050904058331399764,
|
|
"learning_rate": 0.00029969167923330766,
|
|
"loss": 0.8576,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.11931818181818182,
|
|
"grad_norm": 0.053607823287624916,
|
|
"learning_rate": 0.0002996590867254028,
|
|
"loss": 0.9272,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.12026515151515152,
|
|
"grad_norm": 0.06037742366776271,
|
|
"learning_rate": 0.00029962485894706155,
|
|
"loss": 0.882,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.12121212121212122,
|
|
"grad_norm": 0.05143566912627675,
|
|
"learning_rate": 0.00029958899627227837,
|
|
"loss": 0.8828,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.12215909090909091,
|
|
"grad_norm": 0.05625636209624713,
|
|
"learning_rate": 0.00029955149909291154,
|
|
"loss": 0.9344,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.12310606060606061,
|
|
"grad_norm": 0.056332501852780985,
|
|
"learning_rate": 0.00029951236781867937,
|
|
"loss": 0.8857,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.1240530303030303,
|
|
"grad_norm": 0.05582242562634511,
|
|
"learning_rate": 0.0002994716028771549,
|
|
"loss": 0.8911,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.125,
|
|
"grad_norm": 0.05726658160474268,
|
|
"learning_rate": 0.0002994292047137618,
|
|
"loss": 0.9116,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.1259469696969697,
|
|
"grad_norm": 0.06041167916510802,
|
|
"learning_rate": 0.0002993851737917695,
|
|
"loss": 0.8898,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.1268939393939394,
|
|
"grad_norm": 0.05471735073845254,
|
|
"learning_rate": 0.00029933951059228777,
|
|
"loss": 0.8831,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.1278409090909091,
|
|
"grad_norm": 0.05947780307997745,
|
|
"learning_rate": 0.0002992922156142619,
|
|
"loss": 0.8745,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.12878787878787878,
|
|
"grad_norm": 0.06439001370901883,
|
|
"learning_rate": 0.00029924328937446686,
|
|
"loss": 0.8786,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.12973484848484848,
|
|
"grad_norm": 0.05448875777529848,
|
|
"learning_rate": 0.0002991927324075019,
|
|
"loss": 0.8619,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.13068181818181818,
|
|
"grad_norm": 0.05664358337517303,
|
|
"learning_rate": 0.0002991405452657846,
|
|
"loss": 0.8997,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.13162878787878787,
|
|
"grad_norm": 0.06476323651067244,
|
|
"learning_rate": 0.0002990867285195449,
|
|
"loss": 0.8965,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.13257575757575757,
|
|
"grad_norm": 0.052072472586110224,
|
|
"learning_rate": 0.0002990312827568188,
|
|
"loss": 0.9026,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.13352272727272727,
|
|
"grad_norm": 0.058261512452499706,
|
|
"learning_rate": 0.00029897420858344205,
|
|
"loss": 0.8927,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.13446969696969696,
|
|
"grad_norm": 0.048984858535432614,
|
|
"learning_rate": 0.0002989155066230433,
|
|
"loss": 0.8755,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.13541666666666666,
|
|
"grad_norm": 0.05290095759260302,
|
|
"learning_rate": 0.0002988551775170377,
|
|
"loss": 0.8848,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.13636363636363635,
|
|
"grad_norm": 0.05687350078562719,
|
|
"learning_rate": 0.00029879322192461925,
|
|
"loss": 0.8539,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.13731060606060605,
|
|
"grad_norm": 0.05103479492140091,
|
|
"learning_rate": 0.0002987296405227543,
|
|
"loss": 0.8953,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.13825757575757575,
|
|
"grad_norm": 0.06301978431499398,
|
|
"learning_rate": 0.0002986644340061738,
|
|
"loss": 0.8679,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.13920454545454544,
|
|
"grad_norm": 0.06819363260699743,
|
|
"learning_rate": 0.0002985976030873655,
|
|
"loss": 0.8767,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.14015151515151514,
|
|
"grad_norm": 0.06229636016637017,
|
|
"learning_rate": 0.0002985291484965666,
|
|
"loss": 0.8764,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.14109848484848486,
|
|
"grad_norm": 0.05610530939029699,
|
|
"learning_rate": 0.0002984590709817555,
|
|
"loss": 0.9009,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.14204545454545456,
|
|
"grad_norm": 0.04941941919339848,
|
|
"learning_rate": 0.0002983873713086439,
|
|
"loss": 0.8986,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.14299242424242425,
|
|
"grad_norm": 0.054105335971047615,
|
|
"learning_rate": 0.00029831405026066785,
|
|
"loss": 0.9131,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.14393939393939395,
|
|
"grad_norm": 0.055466909315876986,
|
|
"learning_rate": 0.0002982391086389799,
|
|
"loss": 0.8663,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.14488636363636365,
|
|
"grad_norm": 0.05043550339837371,
|
|
"learning_rate": 0.00029816254726243983,
|
|
"loss": 0.8959,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.14583333333333334,
|
|
"grad_norm": 0.05417610489948402,
|
|
"learning_rate": 0.0002980843669676061,
|
|
"loss": 0.8616,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.14678030303030304,
|
|
"grad_norm": 0.05041852142450034,
|
|
"learning_rate": 0.0002980045686087262,
|
|
"loss": 0.8855,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.14772727272727273,
|
|
"grad_norm": 0.06901010647152613,
|
|
"learning_rate": 0.00029792315305772796,
|
|
"loss": 0.9032,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.14867424242424243,
|
|
"grad_norm": 2.738390350230735,
|
|
"learning_rate": 0.00029784012120420944,
|
|
"loss": 0.888,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.14962121212121213,
|
|
"grad_norm": 0.1937615852009521,
|
|
"learning_rate": 0.0002977554739554294,
|
|
"loss": 1.0592,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.15056818181818182,
|
|
"grad_norm": 0.11234392360677802,
|
|
"learning_rate": 0.00029766921223629774,
|
|
"loss": 0.9652,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.15151515151515152,
|
|
"grad_norm": 0.12436979475241608,
|
|
"learning_rate": 0.00029758133698936485,
|
|
"loss": 0.9394,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.15246212121212122,
|
|
"grad_norm": 0.06657911701516095,
|
|
"learning_rate": 0.00029749184917481157,
|
|
"loss": 0.9099,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.1534090909090909,
|
|
"grad_norm": 0.0824368743598765,
|
|
"learning_rate": 0.00029740074977043873,
|
|
"loss": 0.8753,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.1543560606060606,
|
|
"grad_norm": 0.059895919738978086,
|
|
"learning_rate": 0.00029730803977165643,
|
|
"loss": 0.9159,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.1553030303030303,
|
|
"grad_norm": 0.04706104790187168,
|
|
"learning_rate": 0.00029721372019147314,
|
|
"loss": 0.9117,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.15625,
|
|
"grad_norm": 0.05202762617344226,
|
|
"learning_rate": 0.00029711779206048454,
|
|
"loss": 0.8807,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.1571969696969697,
|
|
"grad_norm": 0.050944436154957536,
|
|
"learning_rate": 0.0002970202564268625,
|
|
"loss": 0.8665,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.1581439393939394,
|
|
"grad_norm": 0.050530106988524406,
|
|
"learning_rate": 0.00029692111435634347,
|
|
"loss": 0.853,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.1590909090909091,
|
|
"grad_norm": 0.047944409481566634,
|
|
"learning_rate": 0.0002968203669322168,
|
|
"loss": 0.8719,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.16003787878787878,
|
|
"grad_norm": 0.05621479623321537,
|
|
"learning_rate": 0.0002967180152553129,
|
|
"loss": 0.8602,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.16098484848484848,
|
|
"grad_norm": 0.053970797332012134,
|
|
"learning_rate": 0.0002966140604439914,
|
|
"loss": 0.8804,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.16193181818181818,
|
|
"grad_norm": 0.05575444277143619,
|
|
"learning_rate": 0.0002965085036341287,
|
|
"loss": 0.8672,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.16287878787878787,
|
|
"grad_norm": 0.054447528875280926,
|
|
"learning_rate": 0.0002964013459791057,
|
|
"loss": 0.8705,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.16382575757575757,
|
|
"grad_norm": 0.04819722412795564,
|
|
"learning_rate": 0.0002962925886497952,
|
|
"loss": 0.885,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.16477272727272727,
|
|
"grad_norm": 0.04927047066910389,
|
|
"learning_rate": 0.00029618223283454893,
|
|
"loss": 0.8793,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.16571969696969696,
|
|
"grad_norm": 0.05156180447905288,
|
|
"learning_rate": 0.0002960702797391848,
|
|
"loss": 0.8697,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.16666666666666666,
|
|
"grad_norm": 0.047498663975991506,
|
|
"learning_rate": 0.00029595673058697357,
|
|
"loss": 0.8944,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.16761363636363635,
|
|
"grad_norm": 0.04931121452183828,
|
|
"learning_rate": 0.0002958415866186255,
|
|
"loss": 0.8708,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.16856060606060605,
|
|
"grad_norm": 0.04971806922102403,
|
|
"learning_rate": 0.000295724849092277,
|
|
"loss": 0.886,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.16950757575757575,
|
|
"grad_norm": 0.04782190553843517,
|
|
"learning_rate": 0.0002956065192834765,
|
|
"loss": 0.8625,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.17045454545454544,
|
|
"grad_norm": 0.05985135374827048,
|
|
"learning_rate": 0.00029548659848517073,
|
|
"loss": 0.8572,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.17140151515151514,
|
|
"grad_norm": 0.052563040632905934,
|
|
"learning_rate": 0.00029536508800769083,
|
|
"loss": 0.8527,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.17234848484848486,
|
|
"grad_norm": 0.04857882149280908,
|
|
"learning_rate": 0.0002952419891787375,
|
|
"loss": 0.8739,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.17329545454545456,
|
|
"grad_norm": 0.0479060897399876,
|
|
"learning_rate": 0.00029511730334336693,
|
|
"loss": 0.8905,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.17424242424242425,
|
|
"grad_norm": 0.05040950791010248,
|
|
"learning_rate": 0.00029499103186397596,
|
|
"loss": 0.8738,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.17518939393939395,
|
|
"grad_norm": 0.04549261460480799,
|
|
"learning_rate": 0.00029486317612028705,
|
|
"loss": 0.8697,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.17613636363636365,
|
|
"grad_norm": 0.04640647025353498,
|
|
"learning_rate": 0.00029473373750933354,
|
|
"loss": 0.8697,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.17708333333333334,
|
|
"grad_norm": 0.04813238779352585,
|
|
"learning_rate": 0.0002946027174454439,
|
|
"loss": 0.8691,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.17803030303030304,
|
|
"grad_norm": 0.04735624926206103,
|
|
"learning_rate": 0.0002944701173602269,
|
|
"loss": 0.8785,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.17897727272727273,
|
|
"grad_norm": 0.04918343048516045,
|
|
"learning_rate": 0.00029433593870255547,
|
|
"loss": 0.8832,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.17992424242424243,
|
|
"grad_norm": 0.05187324555019995,
|
|
"learning_rate": 0.00029420018293855097,
|
|
"loss": 0.8931,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.18087121212121213,
|
|
"grad_norm": 0.051833428541254264,
|
|
"learning_rate": 0.0002940628515515673,
|
|
"loss": 0.8505,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.18181818181818182,
|
|
"grad_norm": 0.04842441750953903,
|
|
"learning_rate": 0.0002939239460421746,
|
|
"loss": 0.8619,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.18276515151515152,
|
|
"grad_norm": 0.04450318082424258,
|
|
"learning_rate": 0.00029378346792814284,
|
|
"loss": 0.8935,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.18371212121212122,
|
|
"grad_norm": 0.050265508339128746,
|
|
"learning_rate": 0.00029364141874442534,
|
|
"loss": 0.8875,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.1846590909090909,
|
|
"grad_norm": 0.0489312969341679,
|
|
"learning_rate": 0.00029349780004314196,
|
|
"loss": 0.8707,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.1856060606060606,
|
|
"grad_norm": 0.0448678465223849,
|
|
"learning_rate": 0.0002933526133935619,
|
|
"loss": 0.8759,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.1865530303030303,
|
|
"grad_norm": 0.04933677179150076,
|
|
"learning_rate": 0.000293205860382087,
|
|
"loss": 0.8761,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.1875,
|
|
"grad_norm": 0.04538600611093541,
|
|
"learning_rate": 0.000293057542612234,
|
|
"loss": 0.8683,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.1884469696969697,
|
|
"grad_norm": 0.04816654163118486,
|
|
"learning_rate": 0.00029290766170461733,
|
|
"loss": 0.8575,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.1893939393939394,
|
|
"grad_norm": 0.05162840473736974,
|
|
"learning_rate": 0.0002927562192969312,
|
|
"loss": 0.8788,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.1903409090909091,
|
|
"grad_norm": 0.0526139634089819,
|
|
"learning_rate": 0.00029260321704393166,
|
|
"loss": 0.8842,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.19128787878787878,
|
|
"grad_norm": 0.06020706080827499,
|
|
"learning_rate": 0.0002924486566174187,
|
|
"loss": 0.8873,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.19223484848484848,
|
|
"grad_norm": 0.05225939758166581,
|
|
"learning_rate": 0.00029229253970621796,
|
|
"loss": 0.8354,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.19318181818181818,
|
|
"grad_norm": 0.05284878294440371,
|
|
"learning_rate": 0.0002921348680161622,
|
|
"loss": 0.9025,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.19412878787878787,
|
|
"grad_norm": 0.059173148849731974,
|
|
"learning_rate": 0.00029197564327007266,
|
|
"loss": 0.8405,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.19507575757575757,
|
|
"grad_norm": 0.05723447002828778,
|
|
"learning_rate": 0.00029181486720774024,
|
|
"loss": 0.9033,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.19602272727272727,
|
|
"grad_norm": 0.04961291522370079,
|
|
"learning_rate": 0.0002916525415859065,
|
|
"loss": 0.8517,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.19696969696969696,
|
|
"grad_norm": 0.04405850577071398,
|
|
"learning_rate": 0.0002914886681782445,
|
|
"loss": 0.8605,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.19791666666666666,
|
|
"grad_norm": 0.052549109623340674,
|
|
"learning_rate": 0.00029132324877533943,
|
|
"loss": 0.8903,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.19886363636363635,
|
|
"grad_norm": 0.052002448553744814,
|
|
"learning_rate": 0.000291156285184669,
|
|
"loss": 0.8673,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.19981060606060605,
|
|
"grad_norm": 0.057528545450935206,
|
|
"learning_rate": 0.0002909877792305836,
|
|
"loss": 0.8693,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.20075757575757575,
|
|
"grad_norm": 0.05849975352441284,
|
|
"learning_rate": 0.0002908177327542866,
|
|
"loss": 0.8806,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.20170454545454544,
|
|
"grad_norm": 0.0668330093981767,
|
|
"learning_rate": 0.00029064614761381395,
|
|
"loss": 0.8573,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.20265151515151514,
|
|
"grad_norm": 0.08085163002687007,
|
|
"learning_rate": 0.0002904730256840142,
|
|
"loss": 0.8588,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.20359848484848486,
|
|
"grad_norm": 0.06115289967256575,
|
|
"learning_rate": 0.0002902983688565276,
|
|
"loss": 0.8489,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.20454545454545456,
|
|
"grad_norm": 0.04578661935010964,
|
|
"learning_rate": 0.000290122179039766,
|
|
"loss": 0.8647,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.20549242424242425,
|
|
"grad_norm": 0.04738987289722607,
|
|
"learning_rate": 0.00028994445815889135,
|
|
"loss": 0.8928,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.20643939393939395,
|
|
"grad_norm": 0.048922711283470234,
|
|
"learning_rate": 0.00028976520815579516,
|
|
"loss": 0.8571,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.20738636363636365,
|
|
"grad_norm": 0.042842101095754544,
|
|
"learning_rate": 0.000289584430989077,
|
|
"loss": 0.8994,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.20833333333333334,
|
|
"grad_norm": 0.049240088817609676,
|
|
"learning_rate": 0.0002894021286340233,
|
|
"loss": 0.8703,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.20928030303030304,
|
|
"grad_norm": 0.04405475980914324,
|
|
"learning_rate": 0.0002892183030825857,
|
|
"loss": 0.8697,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.21022727272727273,
|
|
"grad_norm": 0.052077127621998655,
|
|
"learning_rate": 0.00028903295634335904,
|
|
"loss": 0.8995,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.21117424242424243,
|
|
"grad_norm": 0.059590406683339404,
|
|
"learning_rate": 0.00028884609044155983,
|
|
"loss": 0.8798,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.21212121212121213,
|
|
"grad_norm": 0.044865973163293216,
|
|
"learning_rate": 0.0002886577074190038,
|
|
"loss": 0.8965,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.21306818181818182,
|
|
"grad_norm": 0.05169860969332574,
|
|
"learning_rate": 0.0002884678093340838,
|
|
"loss": 0.8554,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.21401515151515152,
|
|
"grad_norm": 0.05156894746619203,
|
|
"learning_rate": 0.00028827639826174716,
|
|
"loss": 0.8727,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.21496212121212122,
|
|
"grad_norm": 0.04493193351896756,
|
|
"learning_rate": 0.0002880834762934731,
|
|
"loss": 0.8659,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.2159090909090909,
|
|
"grad_norm": 0.04709600714027476,
|
|
"learning_rate": 0.0002878890455372498,
|
|
"loss": 0.8494,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.2168560606060606,
|
|
"grad_norm": 0.040578982776887075,
|
|
"learning_rate": 0.00028769310811755153,
|
|
"loss": 0.8345,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.2178030303030303,
|
|
"grad_norm": 0.0454120547462,
|
|
"learning_rate": 0.0002874956661753152,
|
|
"loss": 0.893,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.21875,
|
|
"grad_norm": 0.0458718950734734,
|
|
"learning_rate": 0.00028729672186791704,
|
|
"loss": 0.8453,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.2196969696969697,
|
|
"grad_norm": 0.04315005418940203,
|
|
"learning_rate": 0.0002870962773691493,
|
|
"loss": 0.8389,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.2206439393939394,
|
|
"grad_norm": 0.04752379784930806,
|
|
"learning_rate": 0.00028689433486919617,
|
|
"loss": 0.8673,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.2215909090909091,
|
|
"grad_norm": 0.05684552525580732,
|
|
"learning_rate": 0.00028669089657460984,
|
|
"loss": 0.867,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.22253787878787878,
|
|
"grad_norm": 0.050983860484211566,
|
|
"learning_rate": 0.00028648596470828673,
|
|
"loss": 0.8647,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.22348484848484848,
|
|
"grad_norm": 0.046503504621597226,
|
|
"learning_rate": 0.0002862795415094427,
|
|
"loss": 0.8697,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.22443181818181818,
|
|
"grad_norm": 0.0450753851181475,
|
|
"learning_rate": 0.0002860716292335891,
|
|
"loss": 0.8249,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.22537878787878787,
|
|
"grad_norm": 0.04485421555009402,
|
|
"learning_rate": 0.0002858622301525078,
|
|
"loss": 0.8637,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.22632575757575757,
|
|
"grad_norm": 0.04710476072245473,
|
|
"learning_rate": 0.0002856513465542263,
|
|
"loss": 0.8712,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.22727272727272727,
|
|
"grad_norm": 0.04590515210754422,
|
|
"learning_rate": 0.00028543898074299317,
|
|
"loss": 0.8899,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.22821969696969696,
|
|
"grad_norm": 0.046677806922824555,
|
|
"learning_rate": 0.00028522513503925236,
|
|
"loss": 0.8331,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.22916666666666666,
|
|
"grad_norm": 0.05326891301520586,
|
|
"learning_rate": 0.00028500981177961816,
|
|
"loss": 0.8506,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.23011363636363635,
|
|
"grad_norm": 0.04687694995176648,
|
|
"learning_rate": 0.0002847930133168495,
|
|
"loss": 0.8718,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.23106060606060605,
|
|
"grad_norm": 0.043766596329374095,
|
|
"learning_rate": 0.0002845747420198245,
|
|
"loss": 0.8355,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.23200757575757575,
|
|
"grad_norm": 0.051852335183307466,
|
|
"learning_rate": 0.00028435500027351415,
|
|
"loss": 0.9018,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.23295454545454544,
|
|
"grad_norm": 0.046871786280526524,
|
|
"learning_rate": 0.00028413379047895665,
|
|
"loss": 0.8773,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.23390151515151514,
|
|
"grad_norm": 0.05319047340562311,
|
|
"learning_rate": 0.0002839111150532311,
|
|
"loss": 0.8744,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.23484848484848486,
|
|
"grad_norm": 0.0480557659328724,
|
|
"learning_rate": 0.0002836869764294308,
|
|
"loss": 0.8543,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.23579545454545456,
|
|
"grad_norm": 0.045235971797863456,
|
|
"learning_rate": 0.0002834613770566371,
|
|
"loss": 0.8811,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.23674242424242425,
|
|
"grad_norm": 0.051086084811488776,
|
|
"learning_rate": 0.0002832343193998923,
|
|
"loss": 0.8688,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.23768939393939395,
|
|
"grad_norm": 0.04921720651133015,
|
|
"learning_rate": 0.00028300580594017296,
|
|
"loss": 0.8556,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.23863636363636365,
|
|
"grad_norm": 0.046260991109867027,
|
|
"learning_rate": 0.00028277583917436246,
|
|
"loss": 0.8536,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.23958333333333334,
|
|
"grad_norm": 0.04507558602085682,
|
|
"learning_rate": 0.00028254442161522415,
|
|
"loss": 0.8606,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.24053030303030304,
|
|
"grad_norm": 0.04347528822105258,
|
|
"learning_rate": 0.00028231155579137347,
|
|
"loss": 0.8224,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.24147727272727273,
|
|
"grad_norm": 0.044645406213969646,
|
|
"learning_rate": 0.00028207724424725067,
|
|
"loss": 0.8103,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.24242424242424243,
|
|
"grad_norm": 0.04886991006139869,
|
|
"learning_rate": 0.0002818414895430929,
|
|
"loss": 0.8681,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.24337121212121213,
|
|
"grad_norm": 0.050211499173204034,
|
|
"learning_rate": 0.000281604294254906,
|
|
"loss": 0.8465,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.24431818181818182,
|
|
"grad_norm": 0.04407450456467826,
|
|
"learning_rate": 0.0002813656609744367,
|
|
"loss": 0.8587,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.24526515151515152,
|
|
"grad_norm": 0.046396078514299374,
|
|
"learning_rate": 0.00028112559230914413,
|
|
"loss": 0.8836,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.24621212121212122,
|
|
"grad_norm": 0.04992457772876899,
|
|
"learning_rate": 0.0002808840908821713,
|
|
"loss": 0.847,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.2471590909090909,
|
|
"grad_norm": 0.06899088573327124,
|
|
"learning_rate": 0.00028064115933231653,
|
|
"loss": 0.8284,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.2481060606060606,
|
|
"grad_norm": 0.047887893856181384,
|
|
"learning_rate": 0.00028039680031400455,
|
|
"loss": 0.8428,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.2490530303030303,
|
|
"grad_norm": 0.04582410718511601,
|
|
"learning_rate": 0.00028015101649725747,
|
|
"loss": 0.8384,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"grad_norm": 0.045636060323846026,
|
|
"learning_rate": 0.0002799038105676658,
|
|
"loss": 0.843,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.2509469696969697,
|
|
"grad_norm": 0.04519189300518443,
|
|
"learning_rate": 0.0002796551852263588,
|
|
"loss": 0.8908,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.2518939393939394,
|
|
"grad_norm": 0.045578564236409914,
|
|
"learning_rate": 0.00027940514318997516,
|
|
"loss": 0.8572,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.2528409090909091,
|
|
"grad_norm": 0.051686507213315025,
|
|
"learning_rate": 0.0002791536871906334,
|
|
"loss": 0.8619,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.2537878787878788,
|
|
"grad_norm": 0.04345687899653502,
|
|
"learning_rate": 0.0002789008199759018,
|
|
"loss": 0.8459,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.2547348484848485,
|
|
"grad_norm": 0.046507137153713074,
|
|
"learning_rate": 0.0002786465443087685,
|
|
"loss": 0.8607,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.2556818181818182,
|
|
"grad_norm": 0.042657023216453836,
|
|
"learning_rate": 0.0002783908629676112,
|
|
"loss": 0.8548,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.2566287878787879,
|
|
"grad_norm": 0.040538053747547015,
|
|
"learning_rate": 0.00027813377874616707,
|
|
"loss": 0.8389,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.25757575757575757,
|
|
"grad_norm": 0.04525382597081455,
|
|
"learning_rate": 0.0002778752944535019,
|
|
"loss": 0.8372,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.2585227272727273,
|
|
"grad_norm": 0.04234205633912051,
|
|
"learning_rate": 0.00027761541291397964,
|
|
"loss": 0.8426,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.25946969696969696,
|
|
"grad_norm": 0.04757806675030589,
|
|
"learning_rate": 0.00027735413696723123,
|
|
"loss": 0.8459,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.2604166666666667,
|
|
"grad_norm": 0.043206314979838206,
|
|
"learning_rate": 0.00027709146946812413,
|
|
"loss": 0.8384,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.26136363636363635,
|
|
"grad_norm": 0.04812617395176995,
|
|
"learning_rate": 0.00027682741328673063,
|
|
"loss": 0.83,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.2623106060606061,
|
|
"grad_norm": 0.04595880004567738,
|
|
"learning_rate": 0.0002765619713082965,
|
|
"loss": 0.8704,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.26325757575757575,
|
|
"grad_norm": 0.04342248392045638,
|
|
"learning_rate": 0.0002762951464332098,
|
|
"loss": 0.8545,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.26420454545454547,
|
|
"grad_norm": 0.0468715942875786,
|
|
"learning_rate": 0.0002760269415769691,
|
|
"loss": 0.854,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.26515151515151514,
|
|
"grad_norm": 0.045557679264808,
|
|
"learning_rate": 0.0002757573596701511,
|
|
"loss": 0.8543,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.26609848484848486,
|
|
"grad_norm": 0.045277257320571196,
|
|
"learning_rate": 0.0002754864036583795,
|
|
"loss": 0.8519,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.26704545454545453,
|
|
"grad_norm": 0.04446480155061431,
|
|
"learning_rate": 0.000275214076502292,
|
|
"loss": 0.852,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.26799242424242425,
|
|
"grad_norm": 0.04286461978075112,
|
|
"learning_rate": 0.00027494038117750855,
|
|
"loss": 0.873,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.2689393939393939,
|
|
"grad_norm": 0.04635120751169076,
|
|
"learning_rate": 0.0002746653206745984,
|
|
"loss": 0.8675,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.26988636363636365,
|
|
"grad_norm": 0.04897795119080024,
|
|
"learning_rate": 0.0002743888979990477,
|
|
"loss": 0.8489,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.2708333333333333,
|
|
"grad_norm": 0.045351523544172836,
|
|
"learning_rate": 0.00027411111617122656,
|
|
"loss": 0.8815,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.27178030303030304,
|
|
"grad_norm": 0.046888003925487816,
|
|
"learning_rate": 0.00027383197822635597,
|
|
"loss": 0.8619,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.2727272727272727,
|
|
"grad_norm": 0.050747409625094775,
|
|
"learning_rate": 0.0002735514872144749,
|
|
"loss": 0.877,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.27367424242424243,
|
|
"grad_norm": 0.04867163124702627,
|
|
"learning_rate": 0.0002732696462004066,
|
|
"loss": 0.86,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.2746212121212121,
|
|
"grad_norm": 0.053312130647735934,
|
|
"learning_rate": 0.00027298645826372527,
|
|
"loss": 0.8609,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.2755681818181818,
|
|
"grad_norm": 0.04023418732684079,
|
|
"learning_rate": 0.0002727019264987227,
|
|
"loss": 0.8598,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.2765151515151515,
|
|
"grad_norm": 0.04347366104067643,
|
|
"learning_rate": 0.000272416054014374,
|
|
"loss": 0.8443,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.2774621212121212,
|
|
"grad_norm": 0.042854675811405736,
|
|
"learning_rate": 0.00027212884393430396,
|
|
"loss": 0.8632,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.2784090909090909,
|
|
"grad_norm": 0.04461599878281101,
|
|
"learning_rate": 0.0002718402993967526,
|
|
"loss": 0.8469,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.2793560606060606,
|
|
"grad_norm": 0.0458799502796299,
|
|
"learning_rate": 0.0002715504235545412,
|
|
"loss": 0.8675,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.2803030303030303,
|
|
"grad_norm": 0.041761756053765885,
|
|
"learning_rate": 0.0002712592195750378,
|
|
"loss": 0.8751,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.28125,
|
|
"grad_norm": 0.04293009223271159,
|
|
"learning_rate": 0.0002709666906401224,
|
|
"loss": 0.8591,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.2821969696969697,
|
|
"grad_norm": 0.042628404150602366,
|
|
"learning_rate": 0.00027067283994615225,
|
|
"loss": 0.8314,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.2831439393939394,
|
|
"grad_norm": 0.043803929434188336,
|
|
"learning_rate": 0.0002703776707039271,
|
|
"loss": 0.8515,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.2840909090909091,
|
|
"grad_norm": 0.047256485311155456,
|
|
"learning_rate": 0.00027008118613865406,
|
|
"loss": 0.8376,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.2850378787878788,
|
|
"grad_norm": 0.046926959946348615,
|
|
"learning_rate": 0.00026978338948991206,
|
|
"loss": 0.8423,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.2859848484848485,
|
|
"grad_norm": 0.04941952831110132,
|
|
"learning_rate": 0.0002694842840116169,
|
|
"loss": 0.8564,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.2869318181818182,
|
|
"grad_norm": 0.04638823285342314,
|
|
"learning_rate": 0.0002691838729719854,
|
|
"loss": 0.851,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.2878787878787879,
|
|
"grad_norm": 0.051062848616744594,
|
|
"learning_rate": 0.0002688821596534997,
|
|
"loss": 0.8592,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.28882575757575757,
|
|
"grad_norm": 0.048642765971924094,
|
|
"learning_rate": 0.00026857914735287173,
|
|
"loss": 0.8651,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.2897727272727273,
|
|
"grad_norm": 0.041614396540575575,
|
|
"learning_rate": 0.0002682748393810066,
|
|
"loss": 0.853,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.29071969696969696,
|
|
"grad_norm": 0.04037850703898104,
|
|
"learning_rate": 0.0002679692390629669,
|
|
"loss": 0.8714,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.2916666666666667,
|
|
"grad_norm": 0.045919213909734996,
|
|
"learning_rate": 0.0002676623497379363,
|
|
"loss": 0.8526,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.29261363636363635,
|
|
"grad_norm": 0.0435916558717206,
|
|
"learning_rate": 0.00026735417475918285,
|
|
"loss": 0.8474,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.2935606060606061,
|
|
"grad_norm": 0.04829847525687287,
|
|
"learning_rate": 0.00026704471749402256,
|
|
"loss": 0.8548,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.29450757575757575,
|
|
"grad_norm": 0.04805727825764429,
|
|
"learning_rate": 0.0002667339813237824,
|
|
"loss": 0.8453,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.29545454545454547,
|
|
"grad_norm": 0.05010295983510741,
|
|
"learning_rate": 0.0002664219696437635,
|
|
"loss": 0.8416,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.29640151515151514,
|
|
"grad_norm": 0.04388851666661931,
|
|
"learning_rate": 0.00026610868586320416,
|
|
"loss": 0.8341,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.29734848484848486,
|
|
"grad_norm": 0.045305461318018866,
|
|
"learning_rate": 0.00026579413340524233,
|
|
"loss": 0.8322,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.29829545454545453,
|
|
"grad_norm": 0.04178070112825466,
|
|
"learning_rate": 0.0002654783157068785,
|
|
"loss": 0.8798,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.29924242424242425,
|
|
"grad_norm": 0.039660241288771,
|
|
"learning_rate": 0.00026516123621893756,
|
|
"loss": 0.8512,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.3001893939393939,
|
|
"grad_norm": 0.04956139252725399,
|
|
"learning_rate": 0.0002648428984060321,
|
|
"loss": 0.8531,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.30113636363636365,
|
|
"grad_norm": 0.04050711765679051,
|
|
"learning_rate": 0.0002645233057465235,
|
|
"loss": 0.8714,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.3020833333333333,
|
|
"grad_norm": 0.044882658138218526,
|
|
"learning_rate": 0.00026420246173248466,
|
|
"loss": 0.8576,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.30303030303030304,
|
|
"grad_norm": 0.0443601837928335,
|
|
"learning_rate": 0.00026388036986966146,
|
|
"loss": 0.8458,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.3039772727272727,
|
|
"grad_norm": 0.04445201847842639,
|
|
"learning_rate": 0.00026355703367743463,
|
|
"loss": 0.8262,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.30492424242424243,
|
|
"grad_norm": 0.04296295815749959,
|
|
"learning_rate": 0.0002632324566887811,
|
|
"loss": 0.852,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.3058712121212121,
|
|
"grad_norm": 0.04595484323544366,
|
|
"learning_rate": 0.0002629066424502358,
|
|
"loss": 0.8712,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.3068181818181818,
|
|
"grad_norm": 0.05135389183761192,
|
|
"learning_rate": 0.0002625795945218523,
|
|
"loss": 0.8686,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.3077651515151515,
|
|
"grad_norm": 0.05402260665782284,
|
|
"learning_rate": 0.00026225131647716454,
|
|
"loss": 0.8705,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.3087121212121212,
|
|
"grad_norm": 0.04647458435053281,
|
|
"learning_rate": 0.00026192181190314734,
|
|
"loss": 0.8497,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.3096590909090909,
|
|
"grad_norm": 0.04501320267899854,
|
|
"learning_rate": 0.0002615910844001774,
|
|
"loss": 0.8699,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.3106060606060606,
|
|
"grad_norm": 0.044016650809303635,
|
|
"learning_rate": 0.0002612591375819939,
|
|
"loss": 0.8451,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.3115530303030303,
|
|
"grad_norm": 0.04270641863707379,
|
|
"learning_rate": 0.0002609259750756591,
|
|
"loss": 0.8264,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.3125,
|
|
"grad_norm": 0.0454677296060317,
|
|
"learning_rate": 0.0002605916005215186,
|
|
"loss": 0.8344,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.3134469696969697,
|
|
"grad_norm": 0.04056658550140151,
|
|
"learning_rate": 0.0002602560175731615,
|
|
"loss": 0.8187,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 0.3143939393939394,
|
|
"grad_norm": 0.047795906369322495,
|
|
"learning_rate": 0.0002599192298973808,
|
|
"loss": 0.8596,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.3153409090909091,
|
|
"grad_norm": 0.04746275776015859,
|
|
"learning_rate": 0.00025958124117413296,
|
|
"loss": 0.8373,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 0.3162878787878788,
|
|
"grad_norm": 0.0490167919610274,
|
|
"learning_rate": 0.0002592420550964979,
|
|
"loss": 0.8605,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.3172348484848485,
|
|
"grad_norm": 0.0425174441426416,
|
|
"learning_rate": 0.00025890167537063856,
|
|
"loss": 0.8466,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.3181818181818182,
|
|
"grad_norm": 0.04266217832901279,
|
|
"learning_rate": 0.0002585601057157605,
|
|
"loss": 0.853,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.3191287878787879,
|
|
"grad_norm": 0.042298923569418494,
|
|
"learning_rate": 0.00025821734986407113,
|
|
"loss": 0.852,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 0.32007575757575757,
|
|
"grad_norm": 0.03839109699626175,
|
|
"learning_rate": 0.00025787341156073915,
|
|
"loss": 0.8079,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.3210227272727273,
|
|
"grad_norm": 0.046090433170696436,
|
|
"learning_rate": 0.0002575282945638532,
|
|
"loss": 0.8622,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 0.32196969696969696,
|
|
"grad_norm": 0.043394806500603955,
|
|
"learning_rate": 0.0002571820026443814,
|
|
"loss": 0.8569,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.3229166666666667,
|
|
"grad_norm": 0.04364474268105156,
|
|
"learning_rate": 0.00025683453958612963,
|
|
"loss": 0.859,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 0.32386363636363635,
|
|
"grad_norm": 0.04738452037799788,
|
|
"learning_rate": 0.0002564859091857004,
|
|
"loss": 0.8639,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.3248106060606061,
|
|
"grad_norm": 0.046706389086655475,
|
|
"learning_rate": 0.0002561361152524513,
|
|
"loss": 0.8685,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 0.32575757575757575,
|
|
"grad_norm": 0.04065493744655141,
|
|
"learning_rate": 0.0002557851616084536,
|
|
"loss": 0.8287,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.32670454545454547,
|
|
"grad_norm": 0.042170889184096456,
|
|
"learning_rate": 0.00025543305208845015,
|
|
"loss": 0.8397,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.32765151515151514,
|
|
"grad_norm": 0.057009989738631466,
|
|
"learning_rate": 0.0002550797905398136,
|
|
"loss": 0.8424,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.32859848484848486,
|
|
"grad_norm": 0.054999335357357654,
|
|
"learning_rate": 0.0002547253808225045,
|
|
"loss": 0.8493,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 0.32954545454545453,
|
|
"grad_norm": 0.04878510537743201,
|
|
"learning_rate": 0.0002543698268090291,
|
|
"loss": 0.8687,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.33049242424242425,
|
|
"grad_norm": 0.04555832298125919,
|
|
"learning_rate": 0.0002540131323843968,
|
|
"loss": 0.848,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 0.3314393939393939,
|
|
"grad_norm": 0.04559121786828398,
|
|
"learning_rate": 0.0002536553014460778,
|
|
"loss": 0.8422,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.33238636363636365,
|
|
"grad_norm": 0.0412750415651045,
|
|
"learning_rate": 0.00025329633790396086,
|
|
"loss": 0.8528,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"grad_norm": 0.041659064100662745,
|
|
"learning_rate": 0.00025293624568031,
|
|
"loss": 0.8587,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.33428030303030304,
|
|
"grad_norm": 0.04017536408178578,
|
|
"learning_rate": 0.0002525750287097221,
|
|
"loss": 0.8273,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 0.3352272727272727,
|
|
"grad_norm": 0.043594334725810896,
|
|
"learning_rate": 0.00025221269093908365,
|
|
"loss": 0.8344,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.33617424242424243,
|
|
"grad_norm": 0.03975857698007793,
|
|
"learning_rate": 0.00025184923632752776,
|
|
"loss": 0.8312,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.3371212121212121,
|
|
"grad_norm": 0.043307661902050605,
|
|
"learning_rate": 0.0002514846688463909,
|
|
"loss": 0.8384,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.3380681818181818,
|
|
"grad_norm": 0.041752317967921206,
|
|
"learning_rate": 0.00025111899247916926,
|
|
"loss": 0.8407,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 0.3390151515151515,
|
|
"grad_norm": 0.042776187451381745,
|
|
"learning_rate": 0.0002507522112214758,
|
|
"loss": 0.8217,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.3399621212121212,
|
|
"grad_norm": 0.04588631965260685,
|
|
"learning_rate": 0.0002503843290809958,
|
|
"loss": 0.8546,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 0.3409090909090909,
|
|
"grad_norm": 0.043083702336913526,
|
|
"learning_rate": 0.00025001535007744373,
|
|
"loss": 0.8378,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.3418560606060606,
|
|
"grad_norm": 0.04134116740011592,
|
|
"learning_rate": 0.00024964527824251903,
|
|
"loss": 0.8525,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 0.3428030303030303,
|
|
"grad_norm": 0.04016855550268295,
|
|
"learning_rate": 0.00024927411761986216,
|
|
"loss": 0.8114,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.34375,
|
|
"grad_norm": 0.04440153926149279,
|
|
"learning_rate": 0.0002489018722650103,
|
|
"loss": 0.8502,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 0.3446969696969697,
|
|
"grad_norm": 0.056819456110476556,
|
|
"learning_rate": 0.00024852854624535307,
|
|
"loss": 0.8235,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.3456439393939394,
|
|
"grad_norm": 0.05226275677923263,
|
|
"learning_rate": 0.00024815414364008826,
|
|
"loss": 0.8361,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.3465909090909091,
|
|
"grad_norm": 0.044209574304730354,
|
|
"learning_rate": 0.0002477786685401769,
|
|
"loss": 0.8408,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.3475378787878788,
|
|
"grad_norm": 0.04524443871766083,
|
|
"learning_rate": 0.0002474021250482991,
|
|
"loss": 0.837,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 0.3484848484848485,
|
|
"grad_norm": 0.045412892342884655,
|
|
"learning_rate": 0.0002470245172788086,
|
|
"loss": 0.8386,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.3494318181818182,
|
|
"grad_norm": 0.04952839628457116,
|
|
"learning_rate": 0.0002466458493576882,
|
|
"loss": 0.8396,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 0.3503787878787879,
|
|
"grad_norm": 0.05660754811136995,
|
|
"learning_rate": 0.0002462661254225047,
|
|
"loss": 0.881,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.35132575757575757,
|
|
"grad_norm": 0.04478496122656017,
|
|
"learning_rate": 0.00024588534962236344,
|
|
"loss": 0.8725,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 0.3522727272727273,
|
|
"grad_norm": 0.047218982842698924,
|
|
"learning_rate": 0.0002455035261178632,
|
|
"loss": 0.8637,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.35321969696969696,
|
|
"grad_norm": 0.041481601312663416,
|
|
"learning_rate": 0.0002451206590810506,
|
|
"loss": 0.8217,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 0.3541666666666667,
|
|
"grad_norm": 0.04270379380461237,
|
|
"learning_rate": 0.0002447367526953746,
|
|
"loss": 0.8779,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.35511363636363635,
|
|
"grad_norm": 0.03961220993813286,
|
|
"learning_rate": 0.0002443518111556407,
|
|
"loss": 0.8625,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.3560606060606061,
|
|
"grad_norm": 0.042995120300460946,
|
|
"learning_rate": 0.00024396583866796517,
|
|
"loss": 0.8335,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.35700757575757575,
|
|
"grad_norm": 0.04563005281967126,
|
|
"learning_rate": 0.00024357883944972904,
|
|
"loss": 0.8734,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 0.35795454545454547,
|
|
"grad_norm": 0.046070346094399604,
|
|
"learning_rate": 0.00024319081772953213,
|
|
"loss": 0.8503,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.35890151515151514,
|
|
"grad_norm": 0.046399508495104796,
|
|
"learning_rate": 0.0002428017777471467,
|
|
"loss": 0.8468,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 0.35984848484848486,
|
|
"grad_norm": 0.04153432929534662,
|
|
"learning_rate": 0.0002424117237534712,
|
|
"loss": 0.8511,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.36079545454545453,
|
|
"grad_norm": 0.04277554441903786,
|
|
"learning_rate": 0.0002420206600104839,
|
|
"loss": 0.8517,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 0.36174242424242425,
|
|
"grad_norm": 0.041758108039158866,
|
|
"learning_rate": 0.0002416285907911961,
|
|
"loss": 0.8114,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.3626893939393939,
|
|
"grad_norm": 0.05362310318010687,
|
|
"learning_rate": 0.0002412355203796056,
|
|
"loss": 0.8584,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 0.36363636363636365,
|
|
"grad_norm": 0.0415342758524878,
|
|
"learning_rate": 0.00024084145307064997,
|
|
"loss": 0.8338,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.3645833333333333,
|
|
"grad_norm": 0.04660724457587357,
|
|
"learning_rate": 0.00024044639317015942,
|
|
"loss": 0.8458,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.36553030303030304,
|
|
"grad_norm": 0.0420525971980156,
|
|
"learning_rate": 0.00024005034499480983,
|
|
"loss": 0.8127,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.3664772727272727,
|
|
"grad_norm": 0.04142360417103535,
|
|
"learning_rate": 0.0002396533128720757,
|
|
"loss": 0.8255,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 0.36742424242424243,
|
|
"grad_norm": 0.04258598086280745,
|
|
"learning_rate": 0.0002392553011401827,
|
|
"loss": 0.8083,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.3683712121212121,
|
|
"grad_norm": 0.04277812754465849,
|
|
"learning_rate": 0.00023885631414806026,
|
|
"loss": 0.8093,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 0.3693181818181818,
|
|
"grad_norm": 0.048152407374910465,
|
|
"learning_rate": 0.0002384563562552943,
|
|
"loss": 0.8265,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.3702651515151515,
|
|
"grad_norm": 0.047713607129726394,
|
|
"learning_rate": 0.00023805543183207927,
|
|
"loss": 0.8302,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 0.3712121212121212,
|
|
"grad_norm": 0.04316835360005878,
|
|
"learning_rate": 0.00023765354525917063,
|
|
"loss": 0.8699,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.3721590909090909,
|
|
"grad_norm": 0.04318616029335284,
|
|
"learning_rate": 0.0002372507009278368,
|
|
"loss": 0.8369,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 0.3731060606060606,
|
|
"grad_norm": 0.04234533104631891,
|
|
"learning_rate": 0.00023684690323981142,
|
|
"loss": 0.8252,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.3740530303030303,
|
|
"grad_norm": 0.03731733862303514,
|
|
"learning_rate": 0.00023644215660724503,
|
|
"loss": 0.8043,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.375,
|
|
"grad_norm": 0.04858576668810662,
|
|
"learning_rate": 0.00023603646545265687,
|
|
"loss": 0.8011,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.3759469696969697,
|
|
"grad_norm": 0.03947493260183539,
|
|
"learning_rate": 0.00023562983420888684,
|
|
"loss": 0.8456,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 0.3768939393939394,
|
|
"grad_norm": 0.03987615045295327,
|
|
"learning_rate": 0.00023522226731904664,
|
|
"loss": 0.8081,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.3778409090909091,
|
|
"grad_norm": 0.04467453055781031,
|
|
"learning_rate": 0.0002348137692364715,
|
|
"loss": 0.8196,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 0.3787878787878788,
|
|
"grad_norm": 0.04289778568763919,
|
|
"learning_rate": 0.00023440434442467152,
|
|
"loss": 0.8242,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.3797348484848485,
|
|
"grad_norm": 0.03957134697373444,
|
|
"learning_rate": 0.00023399399735728277,
|
|
"loss": 0.8271,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 0.3806818181818182,
|
|
"grad_norm": 0.04057581593775104,
|
|
"learning_rate": 0.00023358273251801847,
|
|
"loss": 0.7991,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.3816287878787879,
|
|
"grad_norm": 0.044395062218471865,
|
|
"learning_rate": 0.00023317055440062,
|
|
"loss": 0.8398,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 0.38257575757575757,
|
|
"grad_norm": 0.04570817682106231,
|
|
"learning_rate": 0.00023275746750880784,
|
|
"loss": 0.8499,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.3835227272727273,
|
|
"grad_norm": 0.04410857496355408,
|
|
"learning_rate": 0.00023234347635623233,
|
|
"loss": 0.8344,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.38446969696969696,
|
|
"grad_norm": 0.044494824681638484,
|
|
"learning_rate": 0.0002319285854664242,
|
|
"loss": 0.8177,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.3854166666666667,
|
|
"grad_norm": 0.05291529187135586,
|
|
"learning_rate": 0.00023151279937274548,
|
|
"loss": 0.8162,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 0.38636363636363635,
|
|
"grad_norm": 0.0475425548888116,
|
|
"learning_rate": 0.00023109612261833963,
|
|
"loss": 0.836,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.3873106060606061,
|
|
"grad_norm": 0.04795570932520818,
|
|
"learning_rate": 0.00023067855975608204,
|
|
"loss": 0.8017,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 0.38825757575757575,
|
|
"grad_norm": 0.04351083558421903,
|
|
"learning_rate": 0.0002302601153485304,
|
|
"loss": 0.8304,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.38920454545454547,
|
|
"grad_norm": 0.03951679906520293,
|
|
"learning_rate": 0.00022984079396787453,
|
|
"loss": 0.8141,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 0.39015151515151514,
|
|
"grad_norm": 0.04196816207750975,
|
|
"learning_rate": 0.00022942060019588681,
|
|
"loss": 0.8152,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.39109848484848486,
|
|
"grad_norm": 0.04118592006087123,
|
|
"learning_rate": 0.00022899953862387182,
|
|
"loss": 0.8221,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 0.39204545454545453,
|
|
"grad_norm": 0.043398944942405406,
|
|
"learning_rate": 0.00022857761385261624,
|
|
"loss": 0.8784,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.39299242424242425,
|
|
"grad_norm": 0.048877313557472005,
|
|
"learning_rate": 0.0002281548304923387,
|
|
"loss": 0.8301,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.3939393939393939,
|
|
"grad_norm": 0.04660152034578749,
|
|
"learning_rate": 0.0002277311931626393,
|
|
"loss": 0.8383,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.39488636363636365,
|
|
"grad_norm": 0.043844576323543855,
|
|
"learning_rate": 0.00022730670649244913,
|
|
"loss": 0.8598,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 0.3958333333333333,
|
|
"grad_norm": 0.04598334857841716,
|
|
"learning_rate": 0.00022688137511997977,
|
|
"loss": 0.8339,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.39678030303030304,
|
|
"grad_norm": 0.044197199133415584,
|
|
"learning_rate": 0.00022645520369267246,
|
|
"loss": 0.8444,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 0.3977272727272727,
|
|
"grad_norm": 0.04324972778637147,
|
|
"learning_rate": 0.00022602819686714745,
|
|
"loss": 0.8347,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.39867424242424243,
|
|
"grad_norm": 0.07113871411866793,
|
|
"learning_rate": 0.00022560035930915308,
|
|
"loss": 0.8084,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 0.3996212121212121,
|
|
"grad_norm": 0.04394138759860652,
|
|
"learning_rate": 0.0002251716956935149,
|
|
"loss": 0.7981,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.4005681818181818,
|
|
"grad_norm": 0.0418228499942872,
|
|
"learning_rate": 0.00022474221070408436,
|
|
"loss": 0.8289,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 0.4015151515151515,
|
|
"grad_norm": 0.04161882179160139,
|
|
"learning_rate": 0.00022431190903368786,
|
|
"loss": 0.847,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.4024621212121212,
|
|
"grad_norm": 0.043508618425632965,
|
|
"learning_rate": 0.00022388079538407523,
|
|
"loss": 0.8437,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.4034090909090909,
|
|
"grad_norm": 0.0411877941963478,
|
|
"learning_rate": 0.00022344887446586865,
|
|
"loss": 0.8397,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.4043560606060606,
|
|
"grad_norm": 0.044653146921684504,
|
|
"learning_rate": 0.00022301615099851104,
|
|
"loss": 0.8387,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 0.4053030303030303,
|
|
"grad_norm": 0.039794410056582685,
|
|
"learning_rate": 0.00022258262971021437,
|
|
"loss": 0.8602,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.40625,
|
|
"grad_norm": 0.04159725529141693,
|
|
"learning_rate": 0.00022214831533790813,
|
|
"loss": 0.8418,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 0.4071969696969697,
|
|
"grad_norm": 0.04333570277742372,
|
|
"learning_rate": 0.00022171321262718765,
|
|
"loss": 0.8405,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.4081439393939394,
|
|
"grad_norm": 0.041080619204016525,
|
|
"learning_rate": 0.00022127732633226205,
|
|
"loss": 0.812,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 0.4090909090909091,
|
|
"grad_norm": 0.040348407175793716,
|
|
"learning_rate": 0.0002208406612159024,
|
|
"loss": 0.814,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.4100378787878788,
|
|
"grad_norm": 0.04031482700237761,
|
|
"learning_rate": 0.0002204032220493897,
|
|
"loss": 0.8147,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 0.4109848484848485,
|
|
"grad_norm": 0.039485799909013825,
|
|
"learning_rate": 0.00021996501361246277,
|
|
"loss": 0.8176,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.4119318181818182,
|
|
"grad_norm": 0.04055755183348957,
|
|
"learning_rate": 0.00021952604069326579,
|
|
"loss": 0.7957,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 0.4128787878787879,
|
|
"grad_norm": 0.042423918520902,
|
|
"learning_rate": 0.0002190863080882964,
|
|
"loss": 0.8233,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.41382575757575757,
|
|
"grad_norm": 0.0425176465118192,
|
|
"learning_rate": 0.00021864582060235278,
|
|
"loss": 0.8248,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 0.4147727272727273,
|
|
"grad_norm": 0.04617541469982274,
|
|
"learning_rate": 0.00021820458304848165,
|
|
"loss": 0.8517,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.41571969696969696,
|
|
"grad_norm": 0.04672199768475857,
|
|
"learning_rate": 0.0002177626002479254,
|
|
"loss": 0.8431,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 0.4166666666666667,
|
|
"grad_norm": 0.04438315570807549,
|
|
"learning_rate": 0.00021731987703006933,
|
|
"loss": 0.8259,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.41761363636363635,
|
|
"grad_norm": 0.04427543226414618,
|
|
"learning_rate": 0.00021687641823238914,
|
|
"loss": 0.8297,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 0.4185606060606061,
|
|
"grad_norm": 0.046143213680102727,
|
|
"learning_rate": 0.00021643222870039788,
|
|
"loss": 0.8183,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.41950757575757575,
|
|
"grad_norm": 0.05133388935241887,
|
|
"learning_rate": 0.00021598731328759316,
|
|
"loss": 0.8433,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 0.42045454545454547,
|
|
"grad_norm": 0.04409942894634961,
|
|
"learning_rate": 0.0002155416768554039,
|
|
"loss": 0.8341,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.42140151515151514,
|
|
"grad_norm": 0.040874633772368184,
|
|
"learning_rate": 0.00021509532427313745,
|
|
"loss": 0.8257,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 0.42234848484848486,
|
|
"grad_norm": 0.04536497602517255,
|
|
"learning_rate": 0.00021464826041792616,
|
|
"loss": 0.8265,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.42329545454545453,
|
|
"grad_norm": 0.04063288891442798,
|
|
"learning_rate": 0.0002142004901746743,
|
|
"loss": 0.8157,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 0.42424242424242425,
|
|
"grad_norm": 0.04304936310139827,
|
|
"learning_rate": 0.00021375201843600448,
|
|
"loss": 0.8154,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.4251893939393939,
|
|
"grad_norm": 0.03886996255738611,
|
|
"learning_rate": 0.00021330285010220444,
|
|
"loss": 0.8064,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 0.42613636363636365,
|
|
"grad_norm": 0.0439951415809052,
|
|
"learning_rate": 0.00021285299008117327,
|
|
"loss": 0.8189,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.4270833333333333,
|
|
"grad_norm": 0.04105867295635526,
|
|
"learning_rate": 0.00021240244328836786,
|
|
"loss": 0.8042,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 0.42803030303030304,
|
|
"grad_norm": 0.0413161086382512,
|
|
"learning_rate": 0.0002119512146467492,
|
|
"loss": 0.8416,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.4289772727272727,
|
|
"grad_norm": 0.04014206208769556,
|
|
"learning_rate": 0.00021149930908672868,
|
|
"loss": 0.8185,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 0.42992424242424243,
|
|
"grad_norm": 0.04829740014687004,
|
|
"learning_rate": 0.00021104673154611408,
|
|
"loss": 0.8361,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.4308712121212121,
|
|
"grad_norm": 0.03905046798702236,
|
|
"learning_rate": 0.0002105934869700556,
|
|
"loss": 0.8242,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 0.4318181818181818,
|
|
"grad_norm": 0.053526559819707935,
|
|
"learning_rate": 0.00021013958031099205,
|
|
"loss": 0.8426,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.4327651515151515,
|
|
"grad_norm": 0.04462350019901078,
|
|
"learning_rate": 0.0002096850165285964,
|
|
"loss": 0.8408,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 0.4337121212121212,
|
|
"grad_norm": 0.04177938781732431,
|
|
"learning_rate": 0.00020922980058972194,
|
|
"loss": 0.8295,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.4346590909090909,
|
|
"grad_norm": 0.03988703975583103,
|
|
"learning_rate": 0.00020877393746834768,
|
|
"loss": 0.8324,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 0.4356060606060606,
|
|
"grad_norm": 0.04147526586188008,
|
|
"learning_rate": 0.0002083174321455243,
|
|
"loss": 0.8388,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.4365530303030303,
|
|
"grad_norm": 0.038044695307941134,
|
|
"learning_rate": 0.0002078602896093194,
|
|
"loss": 0.7954,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 0.4375,
|
|
"grad_norm": 0.045559472118838124,
|
|
"learning_rate": 0.00020740251485476345,
|
|
"loss": 0.8813,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.4384469696969697,
|
|
"grad_norm": 0.06279101091973911,
|
|
"learning_rate": 0.0002069441128837947,
|
|
"loss": 0.839,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 0.4393939393939394,
|
|
"grad_norm": 0.04692715704201686,
|
|
"learning_rate": 0.00020648508870520476,
|
|
"loss": 0.8352,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.4403409090909091,
|
|
"grad_norm": 0.051468594729952064,
|
|
"learning_rate": 0.00020602544733458418,
|
|
"loss": 0.839,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 0.4412878787878788,
|
|
"grad_norm": 0.049853534068296984,
|
|
"learning_rate": 0.00020556519379426693,
|
|
"loss": 0.8457,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.4422348484848485,
|
|
"grad_norm": 0.043981249031014996,
|
|
"learning_rate": 0.0002051043331132762,
|
|
"loss": 0.8371,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 0.4431818181818182,
|
|
"grad_norm": 0.044420240602267035,
|
|
"learning_rate": 0.00020464287032726913,
|
|
"loss": 0.889,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.4441287878787879,
|
|
"grad_norm": 0.0386102436252357,
|
|
"learning_rate": 0.00020418081047848187,
|
|
"loss": 0.8372,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 0.44507575757575757,
|
|
"grad_norm": 0.04140201905259435,
|
|
"learning_rate": 0.00020371815861567428,
|
|
"loss": 0.8336,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.4460227272727273,
|
|
"grad_norm": 0.04249364186602932,
|
|
"learning_rate": 0.00020325491979407523,
|
|
"loss": 0.8116,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 0.44696969696969696,
|
|
"grad_norm": 0.04505263140825889,
|
|
"learning_rate": 0.00020279109907532693,
|
|
"loss": 0.8089,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.4479166666666667,
|
|
"grad_norm": 0.04905451772451616,
|
|
"learning_rate": 0.0002023267015274296,
|
|
"loss": 0.8161,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 0.44886363636363635,
|
|
"grad_norm": 0.044083131589713095,
|
|
"learning_rate": 0.0002018617322246866,
|
|
"loss": 0.7928,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.4498106060606061,
|
|
"grad_norm": 0.04087182152366722,
|
|
"learning_rate": 0.0002013961962476484,
|
|
"loss": 0.8176,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 0.45075757575757575,
|
|
"grad_norm": 0.03992981513666836,
|
|
"learning_rate": 0.0002009300986830574,
|
|
"loss": 0.8202,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.45170454545454547,
|
|
"grad_norm": 0.04118122404075756,
|
|
"learning_rate": 0.00020046344462379222,
|
|
"loss": 0.8084,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 0.45265151515151514,
|
|
"grad_norm": 0.03836846591943074,
|
|
"learning_rate": 0.00019999623916881217,
|
|
"loss": 0.7813,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.45359848484848486,
|
|
"grad_norm": 0.042256616559667545,
|
|
"learning_rate": 0.0001995284874231014,
|
|
"loss": 0.8405,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 0.45454545454545453,
|
|
"grad_norm": 0.041816536986922656,
|
|
"learning_rate": 0.00019906019449761325,
|
|
"loss": 0.8265,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.45549242424242425,
|
|
"grad_norm": 0.03818514732175339,
|
|
"learning_rate": 0.0001985913655092142,
|
|
"loss": 0.829,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 0.4564393939393939,
|
|
"grad_norm": 0.040830601356944725,
|
|
"learning_rate": 0.00019812200558062817,
|
|
"loss": 0.833,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.45738636363636365,
|
|
"grad_norm": 0.04011921388896301,
|
|
"learning_rate": 0.0001976521198403806,
|
|
"loss": 0.7861,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 0.4583333333333333,
|
|
"grad_norm": 0.044386214010181065,
|
|
"learning_rate": 0.00019718171342274205,
|
|
"loss": 0.8065,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.45928030303030304,
|
|
"grad_norm": 0.039421528176134195,
|
|
"learning_rate": 0.00019671079146767244,
|
|
"loss": 0.8064,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 0.4602272727272727,
|
|
"grad_norm": 0.039658502246534034,
|
|
"learning_rate": 0.00019623935912076488,
|
|
"loss": 0.8319,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.46117424242424243,
|
|
"grad_norm": 0.04350429112923609,
|
|
"learning_rate": 0.00019576742153318914,
|
|
"loss": 0.7962,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 0.4621212121212121,
|
|
"grad_norm": 0.042692790817445286,
|
|
"learning_rate": 0.0001952949838616357,
|
|
"loss": 0.8373,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.4630681818181818,
|
|
"grad_norm": 0.04339398756235945,
|
|
"learning_rate": 0.00019482205126825937,
|
|
"loss": 0.8022,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 0.4640151515151515,
|
|
"grad_norm": 0.04304166274280883,
|
|
"learning_rate": 0.0001943486289206225,
|
|
"loss": 0.8106,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.4649621212121212,
|
|
"grad_norm": 0.043750220078402964,
|
|
"learning_rate": 0.0001938747219916391,
|
|
"loss": 0.8435,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 0.4659090909090909,
|
|
"grad_norm": 0.04664680007453336,
|
|
"learning_rate": 0.0001934003356595179,
|
|
"loss": 0.8472,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.4668560606060606,
|
|
"grad_norm": 0.041222110220423554,
|
|
"learning_rate": 0.00019292547510770585,
|
|
"loss": 0.7787,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 0.4678030303030303,
|
|
"grad_norm": 0.044722804250566385,
|
|
"learning_rate": 0.00019245014552483162,
|
|
"loss": 0.8394,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.46875,
|
|
"grad_norm": 0.0404146031607715,
|
|
"learning_rate": 0.00019197435210464882,
|
|
"loss": 0.8154,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 0.4696969696969697,
|
|
"grad_norm": 0.04023149404763966,
|
|
"learning_rate": 0.00019149810004597903,
|
|
"loss": 0.8191,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.4706439393939394,
|
|
"grad_norm": 0.042874304030485456,
|
|
"learning_rate": 0.00019102139455265556,
|
|
"loss": 0.815,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 0.4715909090909091,
|
|
"grad_norm": 0.042901399276201645,
|
|
"learning_rate": 0.00019054424083346592,
|
|
"loss": 0.8254,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.4725378787878788,
|
|
"grad_norm": 0.048218277435419815,
|
|
"learning_rate": 0.00019006664410209533,
|
|
"loss": 0.8005,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 0.4734848484848485,
|
|
"grad_norm": 0.04801481036784791,
|
|
"learning_rate": 0.00018958860957706973,
|
|
"loss": 0.7971,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.4744318181818182,
|
|
"grad_norm": 0.045807113572995425,
|
|
"learning_rate": 0.00018911014248169862,
|
|
"loss": 0.8308,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 0.4753787878787879,
|
|
"grad_norm": 0.04157428170316455,
|
|
"learning_rate": 0.00018863124804401792,
|
|
"loss": 0.7937,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.47632575757575757,
|
|
"grad_norm": 0.044060198787386526,
|
|
"learning_rate": 0.0001881519314967331,
|
|
"loss": 0.8345,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 0.4772727272727273,
|
|
"grad_norm": 0.04229775014082669,
|
|
"learning_rate": 0.00018767219807716185,
|
|
"loss": 0.7952,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.47821969696969696,
|
|
"grad_norm": 0.038797572756557264,
|
|
"learning_rate": 0.00018719205302717687,
|
|
"loss": 0.8176,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 0.4791666666666667,
|
|
"grad_norm": 0.03699768912795355,
|
|
"learning_rate": 0.00018671150159314855,
|
|
"loss": 0.8063,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.48011363636363635,
|
|
"grad_norm": 0.044197153844287275,
|
|
"learning_rate": 0.00018623054902588775,
|
|
"loss": 0.8083,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 0.4810606060606061,
|
|
"grad_norm": 0.04091887354099648,
|
|
"learning_rate": 0.00018574920058058824,
|
|
"loss": 0.807,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.48200757575757575,
|
|
"grad_norm": 0.48718224838979884,
|
|
"learning_rate": 0.0001852674615167696,
|
|
"loss": 0.8124,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 0.48295454545454547,
|
|
"grad_norm": 0.08791099719174246,
|
|
"learning_rate": 0.00018478533709821946,
|
|
"loss": 0.8227,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.48390151515151514,
|
|
"grad_norm": 0.049480601037611455,
|
|
"learning_rate": 0.000184302832592936,
|
|
"loss": 0.8321,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 0.48484848484848486,
|
|
"grad_norm": 0.0429607961370786,
|
|
"learning_rate": 0.00018381995327307067,
|
|
"loss": 0.8178,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.48579545454545453,
|
|
"grad_norm": 0.0432423766004113,
|
|
"learning_rate": 0.0001833367044148701,
|
|
"loss": 0.7845,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 0.48674242424242425,
|
|
"grad_norm": 0.04315753569890211,
|
|
"learning_rate": 0.00018285309129861905,
|
|
"loss": 0.8346,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.4876893939393939,
|
|
"grad_norm": 0.04241261149768791,
|
|
"learning_rate": 0.00018236911920858215,
|
|
"loss": 0.8322,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 0.48863636363636365,
|
|
"grad_norm": 0.042058874287831324,
|
|
"learning_rate": 0.00018188479343294648,
|
|
"loss": 0.8246,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.4895833333333333,
|
|
"grad_norm": 0.04305771883252671,
|
|
"learning_rate": 0.0001814001192637638,
|
|
"loss": 0.826,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 0.49053030303030304,
|
|
"grad_norm": 0.041842967176783166,
|
|
"learning_rate": 0.0001809151019968925,
|
|
"loss": 0.7911,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.4914772727272727,
|
|
"grad_norm": 0.04061701352923131,
|
|
"learning_rate": 0.00018042974693193998,
|
|
"loss": 0.797,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 0.49242424242424243,
|
|
"grad_norm": 0.04647922786872379,
|
|
"learning_rate": 0.0001799440593722046,
|
|
"loss": 0.7946,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.4933712121212121,
|
|
"grad_norm": 0.04039980658232629,
|
|
"learning_rate": 0.00017945804462461776,
|
|
"loss": 0.8,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 0.4943181818181818,
|
|
"grad_norm": 0.03886277509185924,
|
|
"learning_rate": 0.00017897170799968583,
|
|
"loss": 0.7849,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.4952651515151515,
|
|
"grad_norm": 0.038833022410046734,
|
|
"learning_rate": 0.00017848505481143253,
|
|
"loss": 0.844,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 0.4962121212121212,
|
|
"grad_norm": 0.03944433422520882,
|
|
"learning_rate": 0.00017799809037734017,
|
|
"loss": 0.8163,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.4971590909090909,
|
|
"grad_norm": 0.043008540505396285,
|
|
"learning_rate": 0.00017751082001829215,
|
|
"loss": 0.8258,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 0.4981060606060606,
|
|
"grad_norm": 0.040199050500284966,
|
|
"learning_rate": 0.00017702324905851456,
|
|
"loss": 0.8315,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.4990530303030303,
|
|
"grad_norm": 0.040791783573880774,
|
|
"learning_rate": 0.00017653538282551805,
|
|
"loss": 0.7863,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 0.0442708357721425,
|
|
"learning_rate": 0.00017604722665003956,
|
|
"loss": 0.8213,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.5009469696969697,
|
|
"grad_norm": 0.04301849579782737,
|
|
"learning_rate": 0.00017555878586598413,
|
|
"loss": 0.8236,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 0.5018939393939394,
|
|
"grad_norm": 0.04101785865093793,
|
|
"learning_rate": 0.00017507006581036678,
|
|
"loss": 0.8062,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.5028409090909091,
|
|
"grad_norm": 0.04131104487003713,
|
|
"learning_rate": 0.00017458107182325374,
|
|
"loss": 0.8257,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 0.5037878787878788,
|
|
"grad_norm": 0.043009445132835604,
|
|
"learning_rate": 0.00017409180924770468,
|
|
"loss": 0.8165,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.5047348484848485,
|
|
"grad_norm": 0.03958181194344823,
|
|
"learning_rate": 0.00017360228342971383,
|
|
"loss": 0.8325,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 0.5056818181818182,
|
|
"grad_norm": 0.0397542558833111,
|
|
"learning_rate": 0.00017311249971815185,
|
|
"loss": 0.798,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.5066287878787878,
|
|
"grad_norm": 0.04623083615061935,
|
|
"learning_rate": 0.00017262246346470733,
|
|
"loss": 0.8354,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 0.5075757575757576,
|
|
"grad_norm": 0.04071727047371299,
|
|
"learning_rate": 0.0001721321800238283,
|
|
"loss": 0.7985,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.5085227272727273,
|
|
"grad_norm": 0.03863023788708821,
|
|
"learning_rate": 0.00017164165475266362,
|
|
"loss": 0.8162,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 0.509469696969697,
|
|
"grad_norm": 0.03916331940406132,
|
|
"learning_rate": 0.0001711508930110047,
|
|
"loss": 0.7845,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.5104166666666666,
|
|
"grad_norm": 0.03931132854415474,
|
|
"learning_rate": 0.0001706599001612266,
|
|
"loss": 0.7776,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 0.5113636363636364,
|
|
"grad_norm": 0.043368245986302636,
|
|
"learning_rate": 0.00017016868156822978,
|
|
"loss": 0.8054,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.5123106060606061,
|
|
"grad_norm": 0.03992828095880944,
|
|
"learning_rate": 0.00016967724259938123,
|
|
"loss": 0.7988,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 0.5132575757575758,
|
|
"grad_norm": 0.04739921469866324,
|
|
"learning_rate": 0.00016918558862445582,
|
|
"loss": 0.7943,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.5142045454545454,
|
|
"grad_norm": 0.04332958886748095,
|
|
"learning_rate": 0.00016869372501557788,
|
|
"loss": 0.819,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 0.5151515151515151,
|
|
"grad_norm": 0.04003282248966938,
|
|
"learning_rate": 0.00016820165714716227,
|
|
"loss": 0.8292,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.5160984848484849,
|
|
"grad_norm": 0.04862073540042466,
|
|
"learning_rate": 0.00016770939039585571,
|
|
"loss": 0.827,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 0.5170454545454546,
|
|
"grad_norm": 0.04320764468844852,
|
|
"learning_rate": 0.00016721693014047805,
|
|
"loss": 0.804,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.5179924242424242,
|
|
"grad_norm": 0.041252462956353965,
|
|
"learning_rate": 0.00016672428176196344,
|
|
"loss": 0.7767,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 0.5189393939393939,
|
|
"grad_norm": 0.040985992228162016,
|
|
"learning_rate": 0.00016623145064330162,
|
|
"loss": 0.8092,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.5198863636363636,
|
|
"grad_norm": 0.042258864981091555,
|
|
"learning_rate": 0.0001657384421694791,
|
|
"loss": 0.7994,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 0.5208333333333334,
|
|
"grad_norm": 0.04021897972485824,
|
|
"learning_rate": 0.00016524526172742026,
|
|
"loss": 0.784,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.521780303030303,
|
|
"grad_norm": 0.04326415978694196,
|
|
"learning_rate": 0.0001647519147059285,
|
|
"loss": 0.8047,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 0.5227272727272727,
|
|
"grad_norm": 0.04410213043970681,
|
|
"learning_rate": 0.00016425840649562736,
|
|
"loss": 0.8126,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.5236742424242424,
|
|
"grad_norm": 0.04353103158360417,
|
|
"learning_rate": 0.00016376474248890171,
|
|
"loss": 0.8286,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 0.5246212121212122,
|
|
"grad_norm": 0.040750168292872986,
|
|
"learning_rate": 0.00016327092807983865,
|
|
"loss": 0.808,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.5255681818181818,
|
|
"grad_norm": 0.03802927883237263,
|
|
"learning_rate": 0.0001627769686641687,
|
|
"loss": 0.8053,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 0.5265151515151515,
|
|
"grad_norm": 0.04157749031417506,
|
|
"learning_rate": 0.0001622828696392069,
|
|
"loss": 0.8244,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.5274621212121212,
|
|
"grad_norm": 0.03901280596306352,
|
|
"learning_rate": 0.00016178863640379357,
|
|
"loss": 0.8057,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 0.5284090909090909,
|
|
"grad_norm": 0.04452601323911491,
|
|
"learning_rate": 0.0001612942743582357,
|
|
"loss": 0.8382,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.5293560606060606,
|
|
"grad_norm": 0.05541519438754282,
|
|
"learning_rate": 0.0001607997889042476,
|
|
"loss": 0.841,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 0.5303030303030303,
|
|
"grad_norm": 0.058989969809454576,
|
|
"learning_rate": 0.00016030518544489213,
|
|
"loss": 0.8176,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.53125,
|
|
"grad_norm": 0.05002098578052214,
|
|
"learning_rate": 0.00015981046938452146,
|
|
"loss": 0.8002,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 0.5321969696969697,
|
|
"grad_norm": 0.08021694387476866,
|
|
"learning_rate": 0.00015931564612871812,
|
|
"loss": 0.81,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.5331439393939394,
|
|
"grad_norm": 0.045649438932203974,
|
|
"learning_rate": 0.00015882072108423594,
|
|
"loss": 0.7931,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 0.5340909090909091,
|
|
"grad_norm": 0.042928318104272216,
|
|
"learning_rate": 0.000158325699658941,
|
|
"loss": 0.8097,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.5350378787878788,
|
|
"grad_norm": 0.041290327628681206,
|
|
"learning_rate": 0.0001578305872617525,
|
|
"loss": 0.8009,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 0.5359848484848485,
|
|
"grad_norm": 0.043370278921034656,
|
|
"learning_rate": 0.0001573353893025835,
|
|
"loss": 0.8072,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.5369318181818182,
|
|
"grad_norm": 0.04062057924038298,
|
|
"learning_rate": 0.00015684011119228224,
|
|
"loss": 0.8135,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 0.5378787878787878,
|
|
"grad_norm": 0.03956842259978757,
|
|
"learning_rate": 0.00015634475834257246,
|
|
"loss": 0.8083,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.5388257575757576,
|
|
"grad_norm": 0.04065029502341851,
|
|
"learning_rate": 0.00015584933616599473,
|
|
"loss": 0.8252,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 0.5397727272727273,
|
|
"grad_norm": 0.0406473529931244,
|
|
"learning_rate": 0.00015535385007584706,
|
|
"loss": 0.788,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.540719696969697,
|
|
"grad_norm": 0.040142414490938486,
|
|
"learning_rate": 0.0001548583054861259,
|
|
"loss": 0.7869,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 0.5416666666666666,
|
|
"grad_norm": 0.03672349497309872,
|
|
"learning_rate": 0.0001543627078114667,
|
|
"loss": 0.7999,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.5426136363636364,
|
|
"grad_norm": 0.0430040050376934,
|
|
"learning_rate": 0.00015386706246708524,
|
|
"loss": 0.8061,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 0.5435606060606061,
|
|
"grad_norm": 0.0395889490877207,
|
|
"learning_rate": 0.00015337137486871796,
|
|
"loss": 0.7938,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.5445075757575758,
|
|
"grad_norm": 0.04178894808034304,
|
|
"learning_rate": 0.00015287565043256302,
|
|
"loss": 0.7898,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 0.5454545454545454,
|
|
"grad_norm": 0.04229269873654704,
|
|
"learning_rate": 0.00015237989457522118,
|
|
"loss": 0.8025,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.5464015151515151,
|
|
"grad_norm": 0.04274569352435655,
|
|
"learning_rate": 0.00015188411271363646,
|
|
"loss": 0.8477,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 0.5473484848484849,
|
|
"grad_norm": 0.04018997606928589,
|
|
"learning_rate": 0.00015138831026503702,
|
|
"loss": 0.8121,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.5482954545454546,
|
|
"grad_norm": 0.04487654715701671,
|
|
"learning_rate": 0.00015089249264687603,
|
|
"loss": 0.7961,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 0.5492424242424242,
|
|
"grad_norm": 0.03928757645472925,
|
|
"learning_rate": 0.00015039666527677233,
|
|
"loss": 0.8406,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.5501893939393939,
|
|
"grad_norm": 0.042611840013764696,
|
|
"learning_rate": 0.00014990083357245128,
|
|
"loss": 0.7913,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 0.5511363636363636,
|
|
"grad_norm": 0.04292928983412645,
|
|
"learning_rate": 0.0001494050029516858,
|
|
"loss": 0.7977,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.5520833333333334,
|
|
"grad_norm": 0.038369849549461224,
|
|
"learning_rate": 0.00014890917883223677,
|
|
"loss": 0.8199,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 0.553030303030303,
|
|
"grad_norm": 0.04541317576278365,
|
|
"learning_rate": 0.00014841336663179406,
|
|
"loss": 0.8091,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.5539772727272727,
|
|
"grad_norm": 0.03684570730569431,
|
|
"learning_rate": 0.00014791757176791742,
|
|
"loss": 0.8195,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 0.5549242424242424,
|
|
"grad_norm": 0.03872377757151605,
|
|
"learning_rate": 0.00014742179965797705,
|
|
"loss": 0.8107,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.5558712121212122,
|
|
"grad_norm": 0.0395990381746492,
|
|
"learning_rate": 0.00014692605571909462,
|
|
"loss": 0.8034,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 0.5568181818181818,
|
|
"grad_norm": 0.03886126907028109,
|
|
"learning_rate": 0.00014643034536808387,
|
|
"loss": 0.7968,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.5577651515151515,
|
|
"grad_norm": 0.039986116602809194,
|
|
"learning_rate": 0.00014593467402139164,
|
|
"loss": 0.7946,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 0.5587121212121212,
|
|
"grad_norm": 0.03812573349649333,
|
|
"learning_rate": 0.00014543904709503854,
|
|
"loss": 0.7866,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.5596590909090909,
|
|
"grad_norm": 0.03932184467330868,
|
|
"learning_rate": 0.0001449434700045599,
|
|
"loss": 0.8019,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 0.5606060606060606,
|
|
"grad_norm": 0.04445798754887963,
|
|
"learning_rate": 0.00014444794816494626,
|
|
"loss": 0.825,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.5615530303030303,
|
|
"grad_norm": 0.04248501207056126,
|
|
"learning_rate": 0.0001439524869905848,
|
|
"loss": 0.8226,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 0.5625,
|
|
"grad_norm": 0.040603736504944546,
|
|
"learning_rate": 0.0001434570918951996,
|
|
"loss": 0.8263,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.5634469696969697,
|
|
"grad_norm": 0.044713609887935456,
|
|
"learning_rate": 0.00014296176829179275,
|
|
"loss": 0.7915,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 0.5643939393939394,
|
|
"grad_norm": 0.042449864997604524,
|
|
"learning_rate": 0.00014246652159258526,
|
|
"loss": 0.7896,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.5653409090909091,
|
|
"grad_norm": 0.03798962681468922,
|
|
"learning_rate": 0.0001419713572089577,
|
|
"loss": 0.9055,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 0.5662878787878788,
|
|
"grad_norm": 0.04786757520604103,
|
|
"learning_rate": 0.0001414762805513914,
|
|
"loss": 0.8006,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.5672348484848485,
|
|
"grad_norm": 0.04252660263305811,
|
|
"learning_rate": 0.00014098129702940892,
|
|
"loss": 0.7907,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 0.5681818181818182,
|
|
"grad_norm": 0.043829106135149745,
|
|
"learning_rate": 0.00014048641205151533,
|
|
"loss": 0.7872,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.5691287878787878,
|
|
"grad_norm": 0.03916395834671137,
|
|
"learning_rate": 0.0001399916310251388,
|
|
"loss": 0.7761,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 0.5700757575757576,
|
|
"grad_norm": 0.03967852059796232,
|
|
"learning_rate": 0.00013949695935657193,
|
|
"loss": 0.7951,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.5710227272727273,
|
|
"grad_norm": 0.04226287650595886,
|
|
"learning_rate": 0.00013900240245091203,
|
|
"loss": 0.7765,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 0.571969696969697,
|
|
"grad_norm": 0.04195996621258936,
|
|
"learning_rate": 0.00013850796571200264,
|
|
"loss": 0.8174,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.5729166666666666,
|
|
"grad_norm": 0.04343356999935917,
|
|
"learning_rate": 0.00013801365454237444,
|
|
"loss": 0.8048,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 0.5738636363636364,
|
|
"grad_norm": 0.038284495114294666,
|
|
"learning_rate": 0.00013751947434318564,
|
|
"loss": 0.7818,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.5748106060606061,
|
|
"grad_norm": 0.04286762969801166,
|
|
"learning_rate": 0.00013702543051416383,
|
|
"loss": 0.7904,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 0.5757575757575758,
|
|
"grad_norm": 0.03931256543968111,
|
|
"learning_rate": 0.00013653152845354623,
|
|
"loss": 0.8209,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.5767045454545454,
|
|
"grad_norm": 0.04052581793415016,
|
|
"learning_rate": 0.0001360377735580212,
|
|
"loss": 0.7895,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 0.5776515151515151,
|
|
"grad_norm": 0.04013847290742192,
|
|
"learning_rate": 0.00013554417122266888,
|
|
"loss": 0.7997,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.5785984848484849,
|
|
"grad_norm": 0.04225466886048973,
|
|
"learning_rate": 0.00013505072684090263,
|
|
"loss": 0.8018,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 0.5795454545454546,
|
|
"grad_norm": 0.04063423310025803,
|
|
"learning_rate": 0.00013455744580440982,
|
|
"loss": 0.8103,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.5804924242424242,
|
|
"grad_norm": 0.041112600968304276,
|
|
"learning_rate": 0.00013406433350309304,
|
|
"loss": 0.771,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 0.5814393939393939,
|
|
"grad_norm": 0.043166425761174104,
|
|
"learning_rate": 0.0001335713953250111,
|
|
"loss": 0.7813,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.5823863636363636,
|
|
"grad_norm": 0.042559737261154675,
|
|
"learning_rate": 0.0001330786366563203,
|
|
"loss": 0.7795,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 0.5833333333333334,
|
|
"grad_norm": 0.039294962688869624,
|
|
"learning_rate": 0.00013258606288121542,
|
|
"loss": 0.7852,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.584280303030303,
|
|
"grad_norm": 0.039625489168065825,
|
|
"learning_rate": 0.00013209367938187125,
|
|
"loss": 0.7602,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 0.5852272727272727,
|
|
"grad_norm": 0.038997900760427306,
|
|
"learning_rate": 0.000131601491538383,
|
|
"loss": 0.78,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.5861742424242424,
|
|
"grad_norm": 0.040146555417594515,
|
|
"learning_rate": 0.00013110950472870853,
|
|
"loss": 0.8004,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 0.5871212121212122,
|
|
"grad_norm": 0.039349933500868364,
|
|
"learning_rate": 0.00013061772432860886,
|
|
"loss": 0.8254,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.5880681818181818,
|
|
"grad_norm": 0.040521068783339456,
|
|
"learning_rate": 0.0001301261557115895,
|
|
"loss": 0.7688,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 0.5890151515151515,
|
|
"grad_norm": 0.04280174220822872,
|
|
"learning_rate": 0.00012963480424884214,
|
|
"loss": 0.7883,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.5899621212121212,
|
|
"grad_norm": 0.04000965172907218,
|
|
"learning_rate": 0.00012914367530918557,
|
|
"loss": 0.7733,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 0.5909090909090909,
|
|
"grad_norm": 0.04058021099794167,
|
|
"learning_rate": 0.00012865277425900724,
|
|
"loss": 0.7816,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.5918560606060606,
|
|
"grad_norm": 0.044091148466870456,
|
|
"learning_rate": 0.00012816210646220437,
|
|
"loss": 0.7797,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 0.5928030303030303,
|
|
"grad_norm": 0.042915659449647994,
|
|
"learning_rate": 0.00012767167728012566,
|
|
"loss": 0.787,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.59375,
|
|
"grad_norm": 0.04044950460083324,
|
|
"learning_rate": 0.00012718149207151247,
|
|
"loss": 0.8153,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 0.5946969696969697,
|
|
"grad_norm": 0.035974336074393466,
|
|
"learning_rate": 0.00012669155619244048,
|
|
"loss": 0.7665,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.5956439393939394,
|
|
"grad_norm": 0.03990766970124255,
|
|
"learning_rate": 0.00012620187499626082,
|
|
"loss": 0.7814,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 0.5965909090909091,
|
|
"grad_norm": 0.04063094274983586,
|
|
"learning_rate": 0.00012571245383354192,
|
|
"loss": 0.8079,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.5975378787878788,
|
|
"grad_norm": 0.04192680886047405,
|
|
"learning_rate": 0.00012522329805201104,
|
|
"loss": 0.7851,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 0.5984848484848485,
|
|
"grad_norm": 0.0405476068701757,
|
|
"learning_rate": 0.00012473441299649544,
|
|
"loss": 0.8231,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.5994318181818182,
|
|
"grad_norm": 0.040569633945997545,
|
|
"learning_rate": 0.0001242458040088644,
|
|
"loss": 0.7737,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 0.6003787878787878,
|
|
"grad_norm": 0.038360447057587385,
|
|
"learning_rate": 0.00012375747642797083,
|
|
"loss": 0.7874,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.6013257575757576,
|
|
"grad_norm": 0.04006212810733869,
|
|
"learning_rate": 0.00012326943558959265,
|
|
"loss": 0.7899,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 0.6022727272727273,
|
|
"grad_norm": 0.04200526076077111,
|
|
"learning_rate": 0.0001227816868263746,
|
|
"loss": 0.8006,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.603219696969697,
|
|
"grad_norm": 0.04132950189249958,
|
|
"learning_rate": 0.0001222942354677702,
|
|
"loss": 0.7927,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 0.6041666666666666,
|
|
"grad_norm": 0.039846023645240154,
|
|
"learning_rate": 0.00012180708683998321,
|
|
"loss": 0.8127,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.6051136363636364,
|
|
"grad_norm": 0.0370381211582106,
|
|
"learning_rate": 0.00012132024626590963,
|
|
"loss": 0.7977,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 0.6060606060606061,
|
|
"grad_norm": 0.03637545691166675,
|
|
"learning_rate": 0.00012083371906507937,
|
|
"loss": 0.7972,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.6070075757575758,
|
|
"grad_norm": 0.03719020082784945,
|
|
"learning_rate": 0.00012034751055359836,
|
|
"loss": 0.7944,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 0.6079545454545454,
|
|
"grad_norm": 0.04061887180440516,
|
|
"learning_rate": 0.00011986162604409015,
|
|
"loss": 0.8207,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.6089015151515151,
|
|
"grad_norm": 0.03857442410511439,
|
|
"learning_rate": 0.00011937607084563836,
|
|
"loss": 0.7841,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 0.6098484848484849,
|
|
"grad_norm": 0.03544743527411389,
|
|
"learning_rate": 0.00011889085026372792,
|
|
"loss": 0.7499,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.6107954545454546,
|
|
"grad_norm": 0.040036064198766305,
|
|
"learning_rate": 0.00011840596960018779,
|
|
"loss": 0.7856,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 0.6117424242424242,
|
|
"grad_norm": 0.03717988537059713,
|
|
"learning_rate": 0.00011792143415313285,
|
|
"loss": 0.7884,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.6126893939393939,
|
|
"grad_norm": 0.038107331310669845,
|
|
"learning_rate": 0.00011743724921690557,
|
|
"loss": 0.8106,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 0.6136363636363636,
|
|
"grad_norm": 0.04482871942046956,
|
|
"learning_rate": 0.00011695342008201888,
|
|
"loss": 0.7865,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.6145833333333334,
|
|
"grad_norm": 0.04317100302292292,
|
|
"learning_rate": 0.00011646995203509786,
|
|
"loss": 0.7826,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 0.615530303030303,
|
|
"grad_norm": 0.039413143234785654,
|
|
"learning_rate": 0.00011598685035882209,
|
|
"loss": 0.8101,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.6164772727272727,
|
|
"grad_norm": 0.04160937673240829,
|
|
"learning_rate": 0.00011550412033186792,
|
|
"loss": 0.8075,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 0.6174242424242424,
|
|
"grad_norm": 0.04265804651359686,
|
|
"learning_rate": 0.00011502176722885092,
|
|
"loss": 0.7775,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.6183712121212122,
|
|
"grad_norm": 0.03635722191647411,
|
|
"learning_rate": 0.00011453979632026809,
|
|
"loss": 0.791,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 0.6193181818181818,
|
|
"grad_norm": 0.03705221554060922,
|
|
"learning_rate": 0.00011405821287244035,
|
|
"loss": 0.8008,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.6202651515151515,
|
|
"grad_norm": 0.042403810547206766,
|
|
"learning_rate": 0.00011357702214745493,
|
|
"loss": 0.7652,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 0.6212121212121212,
|
|
"grad_norm": 0.03979753737480537,
|
|
"learning_rate": 0.00011309622940310798,
|
|
"loss": 0.7991,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.6221590909090909,
|
|
"grad_norm": 0.03836363646080294,
|
|
"learning_rate": 0.00011261583989284712,
|
|
"loss": 0.803,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 0.6231060606060606,
|
|
"grad_norm": 0.04244615364903799,
|
|
"learning_rate": 0.00011213585886571376,
|
|
"loss": 0.8072,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.6240530303030303,
|
|
"grad_norm": 0.04283582850640676,
|
|
"learning_rate": 0.00011165629156628613,
|
|
"loss": 0.7861,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 0.625,
|
|
"grad_norm": 0.038461780382639685,
|
|
"learning_rate": 0.00011117714323462186,
|
|
"loss": 0.7835,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.6259469696969697,
|
|
"grad_norm": 0.03744497970084062,
|
|
"learning_rate": 0.00011069841910620057,
|
|
"loss": 0.8062,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 0.6268939393939394,
|
|
"grad_norm": 0.04483128738934721,
|
|
"learning_rate": 0.00011022012441186671,
|
|
"loss": 0.7961,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.6278409090909091,
|
|
"grad_norm": 0.04424941740338033,
|
|
"learning_rate": 0.00010974226437777261,
|
|
"loss": 0.7949,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 0.6287878787878788,
|
|
"grad_norm": 0.04251454254286352,
|
|
"learning_rate": 0.0001092648442253211,
|
|
"loss": 0.7725,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.6297348484848485,
|
|
"grad_norm": 0.040105374119544505,
|
|
"learning_rate": 0.0001087878691711087,
|
|
"loss": 0.8147,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 0.6306818181818182,
|
|
"grad_norm": 0.04164828498994665,
|
|
"learning_rate": 0.00010831134442686835,
|
|
"loss": 0.8076,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.6316287878787878,
|
|
"grad_norm": 0.04194724008380887,
|
|
"learning_rate": 0.00010783527519941272,
|
|
"loss": 0.7514,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 0.6325757575757576,
|
|
"grad_norm": 0.04282010148959667,
|
|
"learning_rate": 0.00010735966669057723,
|
|
"loss": 0.8084,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.6335227272727273,
|
|
"grad_norm": 0.037751992950868556,
|
|
"learning_rate": 0.00010688452409716325,
|
|
"loss": 0.7971,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 0.634469696969697,
|
|
"grad_norm": 0.040981833047628674,
|
|
"learning_rate": 0.00010640985261088102,
|
|
"loss": 0.8259,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.6354166666666666,
|
|
"grad_norm": 0.03623074593719334,
|
|
"learning_rate": 0.00010593565741829331,
|
|
"loss": 0.7584,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 0.6363636363636364,
|
|
"grad_norm": 0.04085407578588483,
|
|
"learning_rate": 0.00010546194370075881,
|
|
"loss": 0.7941,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.6373106060606061,
|
|
"grad_norm": 0.04107679689904555,
|
|
"learning_rate": 0.00010498871663437485,
|
|
"loss": 0.7985,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 0.6382575757575758,
|
|
"grad_norm": 0.03850210602630568,
|
|
"learning_rate": 0.00010451598138992173,
|
|
"loss": 0.7737,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.6392045454545454,
|
|
"grad_norm": 0.0375973308222491,
|
|
"learning_rate": 0.00010404374313280557,
|
|
"loss": 0.7849,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 0.6401515151515151,
|
|
"grad_norm": 0.03545282006804828,
|
|
"learning_rate": 0.00010357200702300214,
|
|
"loss": 0.7993,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.6410984848484849,
|
|
"grad_norm": 0.04099405038912456,
|
|
"learning_rate": 0.0001031007782150004,
|
|
"loss": 0.7879,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 0.6420454545454546,
|
|
"grad_norm": 0.04269554474417421,
|
|
"learning_rate": 0.00010263006185774627,
|
|
"loss": 0.7559,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.6429924242424242,
|
|
"grad_norm": 0.039655024113479126,
|
|
"learning_rate": 0.00010215986309458622,
|
|
"loss": 0.7633,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 0.6439393939393939,
|
|
"grad_norm": 0.040202236041103546,
|
|
"learning_rate": 0.0001016901870632113,
|
|
"loss": 0.7795,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.6448863636363636,
|
|
"grad_norm": 0.038440162083217946,
|
|
"learning_rate": 0.00010122103889560066,
|
|
"loss": 0.788,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 0.6458333333333334,
|
|
"grad_norm": 0.0380210653037665,
|
|
"learning_rate": 0.00010075242371796585,
|
|
"loss": 0.7796,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.646780303030303,
|
|
"grad_norm": 0.038714184645298265,
|
|
"learning_rate": 0.00010028434665069456,
|
|
"loss": 0.7505,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 0.6477272727272727,
|
|
"grad_norm": 0.036301784575765876,
|
|
"learning_rate": 9.981681280829472e-05,
|
|
"loss": 0.7863,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.6486742424242424,
|
|
"grad_norm": 0.04273246901454883,
|
|
"learning_rate": 9.934982729933864e-05,
|
|
"loss": 0.7936,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 0.6496212121212122,
|
|
"grad_norm": 0.04096752213327176,
|
|
"learning_rate": 9.888339522640727e-05,
|
|
"loss": 0.7848,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.6505681818181818,
|
|
"grad_norm": 0.03654932535140771,
|
|
"learning_rate": 9.84175216860344e-05,
|
|
"loss": 0.801,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 0.6515151515151515,
|
|
"grad_norm": 0.03977277870704206,
|
|
"learning_rate": 9.795221176865064e-05,
|
|
"loss": 0.7817,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.6524621212121212,
|
|
"grad_norm": 0.03945778648109342,
|
|
"learning_rate": 9.748747055852845e-05,
|
|
"loss": 0.8034,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 0.6534090909090909,
|
|
"grad_norm": 0.03750951937019652,
|
|
"learning_rate": 9.702330313372607e-05,
|
|
"loss": 0.8047,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.6543560606060606,
|
|
"grad_norm": 0.04224253753307829,
|
|
"learning_rate": 9.655971456603222e-05,
|
|
"loss": 0.7741,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 0.6553030303030303,
|
|
"grad_norm": 0.04193635128292089,
|
|
"learning_rate": 9.609670992091063e-05,
|
|
"loss": 0.7686,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.65625,
|
|
"grad_norm": 0.0383707593111435,
|
|
"learning_rate": 9.563429425744476e-05,
|
|
"loss": 0.7937,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 0.6571969696969697,
|
|
"grad_norm": 0.04221940705987869,
|
|
"learning_rate": 9.517247262828245e-05,
|
|
"loss": 0.7589,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.6581439393939394,
|
|
"grad_norm": 0.03796604736644861,
|
|
"learning_rate": 9.47112500795808e-05,
|
|
"loss": 0.7673,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 0.6590909090909091,
|
|
"grad_norm": 0.03883212329330115,
|
|
"learning_rate": 9.425063165095088e-05,
|
|
"loss": 0.7899,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.6600378787878788,
|
|
"grad_norm": 0.03775017282994837,
|
|
"learning_rate": 9.379062237540282e-05,
|
|
"loss": 0.7824,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 0.6609848484848485,
|
|
"grad_norm": 0.040969682549424714,
|
|
"learning_rate": 9.333122727929086e-05,
|
|
"loss": 0.7744,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.6619318181818182,
|
|
"grad_norm": 0.043909710244610795,
|
|
"learning_rate": 9.287245138225807e-05,
|
|
"loss": 0.7844,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 0.6628787878787878,
|
|
"grad_norm": 0.03979263612757763,
|
|
"learning_rate": 9.241429969718193e-05,
|
|
"loss": 0.7771,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.6638257575757576,
|
|
"grad_norm": 0.036720996927973024,
|
|
"learning_rate": 9.195677723011943e-05,
|
|
"loss": 0.7787,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 0.6647727272727273,
|
|
"grad_norm": 0.03931047020350426,
|
|
"learning_rate": 9.149988898025224e-05,
|
|
"loss": 0.7924,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.665719696969697,
|
|
"grad_norm": 0.037239986158338824,
|
|
"learning_rate": 9.10436399398321e-05,
|
|
"loss": 0.763,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 0.03903031387121002,
|
|
"learning_rate": 9.058803509412646e-05,
|
|
"loss": 0.7948,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.6676136363636364,
|
|
"grad_norm": 0.037974042366075905,
|
|
"learning_rate": 9.013307942136387e-05,
|
|
"loss": 0.7958,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 0.6685606060606061,
|
|
"grad_norm": 0.03996518480569968,
|
|
"learning_rate": 8.967877789267957e-05,
|
|
"loss": 0.7961,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.6695075757575758,
|
|
"grad_norm": 0.04156879401863253,
|
|
"learning_rate": 8.92251354720612e-05,
|
|
"loss": 0.7805,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 0.6704545454545454,
|
|
"grad_norm": 0.0379764100996147,
|
|
"learning_rate": 8.877215711629457e-05,
|
|
"loss": 0.776,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.6714015151515151,
|
|
"grad_norm": 0.03967109810939677,
|
|
"learning_rate": 8.831984777490954e-05,
|
|
"loss": 0.7884,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 0.6723484848484849,
|
|
"grad_norm": 0.04222761106798767,
|
|
"learning_rate": 8.786821239012582e-05,
|
|
"loss": 0.7714,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.6732954545454546,
|
|
"grad_norm": 0.04051009224725267,
|
|
"learning_rate": 8.741725589679912e-05,
|
|
"loss": 0.7656,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 0.6742424242424242,
|
|
"grad_norm": 0.03816042826696097,
|
|
"learning_rate": 8.696698322236706e-05,
|
|
"loss": 0.7609,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.6751893939393939,
|
|
"grad_norm": 0.03985241002351074,
|
|
"learning_rate": 8.651739928679556e-05,
|
|
"loss": 0.7982,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 0.6761363636363636,
|
|
"grad_norm": 0.039265226899704256,
|
|
"learning_rate": 8.606850900252478e-05,
|
|
"loss": 0.7886,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.6770833333333334,
|
|
"grad_norm": 0.03925154055754282,
|
|
"learning_rate": 8.562031727441567e-05,
|
|
"loss": 0.7963,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 0.678030303030303,
|
|
"grad_norm": 0.039792050085331696,
|
|
"learning_rate": 8.517282899969629e-05,
|
|
"loss": 0.8051,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.6789772727272727,
|
|
"grad_norm": 0.03879414602215694,
|
|
"learning_rate": 8.472604906790852e-05,
|
|
"loss": 0.8024,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 0.6799242424242424,
|
|
"grad_norm": 0.0418000610573599,
|
|
"learning_rate": 8.427998236085404e-05,
|
|
"loss": 0.762,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.6808712121212122,
|
|
"grad_norm": 0.045760675836821356,
|
|
"learning_rate": 8.38346337525417e-05,
|
|
"loss": 0.7923,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 0.6818181818181818,
|
|
"grad_norm": 0.040543801463440665,
|
|
"learning_rate": 8.339000810913386e-05,
|
|
"loss": 0.7809,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.6827651515151515,
|
|
"grad_norm": 0.03937397555590882,
|
|
"learning_rate": 8.294611028889332e-05,
|
|
"loss": 0.7985,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 0.6837121212121212,
|
|
"grad_norm": 0.04054190343244338,
|
|
"learning_rate": 8.250294514213009e-05,
|
|
"loss": 0.8063,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.6846590909090909,
|
|
"grad_norm": 0.039503064995133216,
|
|
"learning_rate": 8.206051751114875e-05,
|
|
"loss": 0.8033,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 0.6856060606060606,
|
|
"grad_norm": 0.03961793432281865,
|
|
"learning_rate": 8.161883223019513e-05,
|
|
"loss": 0.7841,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.6865530303030303,
|
|
"grad_norm": 0.03964303335275124,
|
|
"learning_rate": 8.11778941254037e-05,
|
|
"loss": 0.793,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 0.6875,
|
|
"grad_norm": 0.03665153936722208,
|
|
"learning_rate": 8.073770801474495e-05,
|
|
"loss": 0.776,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.6884469696969697,
|
|
"grad_norm": 0.04064557439554845,
|
|
"learning_rate": 8.029827870797233e-05,
|
|
"loss": 0.7622,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 0.6893939393939394,
|
|
"grad_norm": 0.038999462198328914,
|
|
"learning_rate": 7.985961100657029e-05,
|
|
"loss": 0.7945,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.6903409090909091,
|
|
"grad_norm": 0.03814629462651061,
|
|
"learning_rate": 7.942170970370128e-05,
|
|
"loss": 0.7907,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 0.6912878787878788,
|
|
"grad_norm": 0.03936834359810894,
|
|
"learning_rate": 7.898457958415362e-05,
|
|
"loss": 0.8105,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.6922348484848485,
|
|
"grad_norm": 0.043208861683073814,
|
|
"learning_rate": 7.854822542428923e-05,
|
|
"loss": 0.7829,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 0.6931818181818182,
|
|
"grad_norm": 0.04118046552241357,
|
|
"learning_rate": 7.811265199199152e-05,
|
|
"loss": 0.7881,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.6941287878787878,
|
|
"grad_norm": 0.04008925612177105,
|
|
"learning_rate": 7.76778640466128e-05,
|
|
"loss": 0.7898,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 0.6950757575757576,
|
|
"grad_norm": 0.03889735909259863,
|
|
"learning_rate": 7.724386633892306e-05,
|
|
"loss": 0.7829,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.6960227272727273,
|
|
"grad_norm": 0.04151816317577747,
|
|
"learning_rate": 7.681066361105756e-05,
|
|
"loss": 0.7767,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 0.696969696969697,
|
|
"grad_norm": 0.036944282946496376,
|
|
"learning_rate": 7.63782605964648e-05,
|
|
"loss": 0.7765,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.6979166666666666,
|
|
"grad_norm": 0.03615917598734965,
|
|
"learning_rate": 7.594666201985545e-05,
|
|
"loss": 0.7861,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 0.6988636363636364,
|
|
"grad_norm": 0.04067200248262229,
|
|
"learning_rate": 7.551587259715034e-05,
|
|
"loss": 0.8289,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.6998106060606061,
|
|
"grad_norm": 0.037365461143322884,
|
|
"learning_rate": 7.508589703542878e-05,
|
|
"loss": 0.811,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 0.7007575757575758,
|
|
"grad_norm": 0.04185500665647231,
|
|
"learning_rate": 7.465674003287745e-05,
|
|
"loss": 0.7682,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.7017045454545454,
|
|
"grad_norm": 0.040446504718946015,
|
|
"learning_rate": 7.422840627873897e-05,
|
|
"loss": 0.795,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 0.7026515151515151,
|
|
"grad_norm": 0.03757009814629214,
|
|
"learning_rate": 7.380090045326045e-05,
|
|
"loss": 0.7504,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.7035984848484849,
|
|
"grad_norm": 0.038548413203444785,
|
|
"learning_rate": 7.337422722764275e-05,
|
|
"loss": 0.8075,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 0.7045454545454546,
|
|
"grad_norm": 0.03999887548841091,
|
|
"learning_rate": 7.294839126398908e-05,
|
|
"loss": 0.774,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.7054924242424242,
|
|
"grad_norm": 0.04141191338877119,
|
|
"learning_rate": 7.252339721525412e-05,
|
|
"loss": 0.8107,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 0.7064393939393939,
|
|
"grad_norm": 0.0427189690829545,
|
|
"learning_rate": 7.209924972519343e-05,
|
|
"loss": 0.783,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.7073863636363636,
|
|
"grad_norm": 0.041829790074471566,
|
|
"learning_rate": 7.167595342831253e-05,
|
|
"loss": 0.8037,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 0.7083333333333334,
|
|
"grad_norm": 0.03932038001837439,
|
|
"learning_rate": 7.125351294981598e-05,
|
|
"loss": 0.7577,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.709280303030303,
|
|
"grad_norm": 0.044979053856984176,
|
|
"learning_rate": 7.083193290555744e-05,
|
|
"loss": 0.7623,
|
|
"step": 3745
|
|
},
|
|
{
|
|
"epoch": 0.7102272727272727,
|
|
"grad_norm": 0.040516807472682444,
|
|
"learning_rate": 7.041121790198881e-05,
|
|
"loss": 0.7796,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.7111742424242424,
|
|
"grad_norm": 0.04076058081578708,
|
|
"learning_rate": 6.999137253611e-05,
|
|
"loss": 0.789,
|
|
"step": 3755
|
|
},
|
|
{
|
|
"epoch": 0.7121212121212122,
|
|
"grad_norm": 0.03822915371523041,
|
|
"learning_rate": 6.95724013954186e-05,
|
|
"loss": 0.784,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.7130681818181818,
|
|
"grad_norm": 0.04000916349581932,
|
|
"learning_rate": 6.91543090578601e-05,
|
|
"loss": 0.7722,
|
|
"step": 3765
|
|
},
|
|
{
|
|
"epoch": 0.7140151515151515,
|
|
"grad_norm": 0.044726470002604886,
|
|
"learning_rate": 6.87371000917774e-05,
|
|
"loss": 0.7575,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.7149621212121212,
|
|
"grad_norm": 0.04471754304335626,
|
|
"learning_rate": 6.832077905586119e-05,
|
|
"loss": 0.7691,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 0.7159090909090909,
|
|
"grad_norm": 0.03949943632638639,
|
|
"learning_rate": 6.790535049910017e-05,
|
|
"loss": 0.784,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.7168560606060606,
|
|
"grad_norm": 0.038373759541872915,
|
|
"learning_rate": 6.749081896073106e-05,
|
|
"loss": 0.7601,
|
|
"step": 3785
|
|
},
|
|
{
|
|
"epoch": 0.7178030303030303,
|
|
"grad_norm": 0.03616627695182055,
|
|
"learning_rate": 6.707718897018941e-05,
|
|
"loss": 0.7591,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.71875,
|
|
"grad_norm": 0.04327639838927876,
|
|
"learning_rate": 6.66644650470597e-05,
|
|
"loss": 0.7846,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 0.7196969696969697,
|
|
"grad_norm": 0.043231180710510686,
|
|
"learning_rate": 6.625265170102615e-05,
|
|
"loss": 0.752,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.7206439393939394,
|
|
"grad_norm": 0.039624521674453655,
|
|
"learning_rate": 6.584175343182359e-05,
|
|
"loss": 0.7995,
|
|
"step": 3805
|
|
},
|
|
{
|
|
"epoch": 0.7215909090909091,
|
|
"grad_norm": 0.04268727987190514,
|
|
"learning_rate": 6.543177472918794e-05,
|
|
"loss": 0.7877,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.7225378787878788,
|
|
"grad_norm": 0.0402914394741491,
|
|
"learning_rate": 6.502272007280755e-05,
|
|
"loss": 0.7539,
|
|
"step": 3815
|
|
},
|
|
{
|
|
"epoch": 0.7234848484848485,
|
|
"grad_norm": 0.03869463859402182,
|
|
"learning_rate": 6.461459393227385e-05,
|
|
"loss": 0.7583,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.7244318181818182,
|
|
"grad_norm": 0.03726113086293714,
|
|
"learning_rate": 6.420740076703291e-05,
|
|
"loss": 0.7435,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 0.7253787878787878,
|
|
"grad_norm": 0.04242697385724998,
|
|
"learning_rate": 6.38011450263364e-05,
|
|
"loss": 0.7909,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.7263257575757576,
|
|
"grad_norm": 0.041072190514661835,
|
|
"learning_rate": 6.339583114919301e-05,
|
|
"loss": 0.7938,
|
|
"step": 3835
|
|
},
|
|
{
|
|
"epoch": 0.7272727272727273,
|
|
"grad_norm": 0.04140101649993429,
|
|
"learning_rate": 6.299146356432029e-05,
|
|
"loss": 0.7724,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.728219696969697,
|
|
"grad_norm": 0.04276312404745283,
|
|
"learning_rate": 6.258804669009575e-05,
|
|
"loss": 0.8042,
|
|
"step": 3845
|
|
},
|
|
{
|
|
"epoch": 0.7291666666666666,
|
|
"grad_norm": 0.03951731344689689,
|
|
"learning_rate": 6.218558493450893e-05,
|
|
"loss": 0.7555,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.7301136363636364,
|
|
"grad_norm": 0.03936898937096199,
|
|
"learning_rate": 6.178408269511312e-05,
|
|
"loss": 0.7863,
|
|
"step": 3855
|
|
},
|
|
{
|
|
"epoch": 0.7310606060606061,
|
|
"grad_norm": 0.038716693149565,
|
|
"learning_rate": 6.138354435897748e-05,
|
|
"loss": 0.7745,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.7320075757575758,
|
|
"grad_norm": 0.04072421020095559,
|
|
"learning_rate": 6.098397430263858e-05,
|
|
"loss": 0.7956,
|
|
"step": 3865
|
|
},
|
|
{
|
|
"epoch": 0.7329545454545454,
|
|
"grad_norm": 0.04100101910580714,
|
|
"learning_rate": 6.058537689205328e-05,
|
|
"loss": 0.7578,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.7339015151515151,
|
|
"grad_norm": 0.039583105949787575,
|
|
"learning_rate": 6.0187756482550645e-05,
|
|
"loss": 0.796,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 0.7348484848484849,
|
|
"grad_norm": 0.0398070275621675,
|
|
"learning_rate": 5.9791117418784274e-05,
|
|
"loss": 0.7667,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.7357954545454546,
|
|
"grad_norm": 0.03854944447288334,
|
|
"learning_rate": 5.939546403468501e-05,
|
|
"loss": 0.7499,
|
|
"step": 3885
|
|
},
|
|
{
|
|
"epoch": 0.7367424242424242,
|
|
"grad_norm": 0.04368614364362331,
|
|
"learning_rate": 5.900080065341363e-05,
|
|
"loss": 0.78,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.7376893939393939,
|
|
"grad_norm": 0.042603936273429066,
|
|
"learning_rate": 5.860713158731333e-05,
|
|
"loss": 0.7636,
|
|
"step": 3895
|
|
},
|
|
{
|
|
"epoch": 0.7386363636363636,
|
|
"grad_norm": 0.043008266779781215,
|
|
"learning_rate": 5.821446113786302e-05,
|
|
"loss": 0.7631,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.7395833333333334,
|
|
"grad_norm": 0.03910221102626979,
|
|
"learning_rate": 5.782279359562988e-05,
|
|
"loss": 0.7691,
|
|
"step": 3905
|
|
},
|
|
{
|
|
"epoch": 0.740530303030303,
|
|
"grad_norm": 0.042051023569034784,
|
|
"learning_rate": 5.743213324022272e-05,
|
|
"loss": 0.7905,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.7414772727272727,
|
|
"grad_norm": 0.044919920953571925,
|
|
"learning_rate": 5.7042484340245265e-05,
|
|
"loss": 0.7715,
|
|
"step": 3915
|
|
},
|
|
{
|
|
"epoch": 0.7424242424242424,
|
|
"grad_norm": 0.041073163710179876,
|
|
"learning_rate": 5.665385115324953e-05,
|
|
"loss": 0.7468,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.7433712121212122,
|
|
"grad_norm": 0.03567748888143746,
|
|
"learning_rate": 5.626623792568885e-05,
|
|
"loss": 0.7902,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 0.7443181818181818,
|
|
"grad_norm": 0.040223394497797826,
|
|
"learning_rate": 5.587964889287218e-05,
|
|
"loss": 0.8142,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.7452651515151515,
|
|
"grad_norm": 0.03937628241815354,
|
|
"learning_rate": 5.5494088278917434e-05,
|
|
"loss": 0.7561,
|
|
"step": 3935
|
|
},
|
|
{
|
|
"epoch": 0.7462121212121212,
|
|
"grad_norm": 0.039022205324506364,
|
|
"learning_rate": 5.5109560296705066e-05,
|
|
"loss": 0.7761,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.7471590909090909,
|
|
"grad_norm": 0.037705226948508606,
|
|
"learning_rate": 5.472606914783266e-05,
|
|
"loss": 0.7697,
|
|
"step": 3945
|
|
},
|
|
{
|
|
"epoch": 0.7481060606060606,
|
|
"grad_norm": 0.03949092410910686,
|
|
"learning_rate": 5.434361902256868e-05,
|
|
"loss": 0.7804,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.7490530303030303,
|
|
"grad_norm": 0.040225291682004415,
|
|
"learning_rate": 5.396221409980653e-05,
|
|
"loss": 0.7895,
|
|
"step": 3955
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"grad_norm": 0.03696261298169589,
|
|
"learning_rate": 5.358185854701909e-05,
|
|
"loss": 0.7715,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.7509469696969697,
|
|
"grad_norm": 0.03863523973389968,
|
|
"learning_rate": 5.320255652021336e-05,
|
|
"loss": 0.7748,
|
|
"step": 3965
|
|
},
|
|
{
|
|
"epoch": 0.7518939393939394,
|
|
"grad_norm": 0.041539206419021424,
|
|
"learning_rate": 5.282431216388457e-05,
|
|
"loss": 0.7556,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.7528409090909091,
|
|
"grad_norm": 0.040538971828186623,
|
|
"learning_rate": 5.244712961097142e-05,
|
|
"loss": 0.7843,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 0.7537878787878788,
|
|
"grad_norm": 0.042618390094256595,
|
|
"learning_rate": 5.207101298281049e-05,
|
|
"loss": 0.7666,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.7547348484848485,
|
|
"grad_norm": 0.03893190636534372,
|
|
"learning_rate": 5.1695966389091396e-05,
|
|
"loss": 0.7793,
|
|
"step": 3985
|
|
},
|
|
{
|
|
"epoch": 0.7556818181818182,
|
|
"grad_norm": 0.03755845511933422,
|
|
"learning_rate": 5.132199392781205e-05,
|
|
"loss": 0.77,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.7566287878787878,
|
|
"grad_norm": 0.04022540043473745,
|
|
"learning_rate": 5.094909968523351e-05,
|
|
"loss": 0.78,
|
|
"step": 3995
|
|
},
|
|
{
|
|
"epoch": 0.7575757575757576,
|
|
"grad_norm": 0.04253171860319729,
|
|
"learning_rate": 5.057728773583559e-05,
|
|
"loss": 0.7478,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.7585227272727273,
|
|
"grad_norm": 0.042102270133092194,
|
|
"learning_rate": 5.0206562142272334e-05,
|
|
"loss": 0.7817,
|
|
"step": 4005
|
|
},
|
|
{
|
|
"epoch": 0.759469696969697,
|
|
"grad_norm": 0.04424433693890534,
|
|
"learning_rate": 4.9836926955327656e-05,
|
|
"loss": 0.7774,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.7604166666666666,
|
|
"grad_norm": 0.03727474161719155,
|
|
"learning_rate": 4.946838621387063e-05,
|
|
"loss": 0.7548,
|
|
"step": 4015
|
|
},
|
|
{
|
|
"epoch": 0.7613636363636364,
|
|
"grad_norm": 0.038729203633207226,
|
|
"learning_rate": 4.9100943944812114e-05,
|
|
"loss": 0.7723,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.7623106060606061,
|
|
"grad_norm": 0.04033403314672209,
|
|
"learning_rate": 4.873460416306023e-05,
|
|
"loss": 0.7815,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 0.7632575757575758,
|
|
"grad_norm": 0.03663235670257645,
|
|
"learning_rate": 4.836937087147655e-05,
|
|
"loss": 0.7968,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.7642045454545454,
|
|
"grad_norm": 0.038393805155331855,
|
|
"learning_rate": 4.8005248060832446e-05,
|
|
"loss": 0.7572,
|
|
"step": 4035
|
|
},
|
|
{
|
|
"epoch": 0.7651515151515151,
|
|
"grad_norm": 0.03692754409484954,
|
|
"learning_rate": 4.7642239709765596e-05,
|
|
"loss": 0.7707,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.7660984848484849,
|
|
"grad_norm": 0.03765360322249641,
|
|
"learning_rate": 4.728034978473621e-05,
|
|
"loss": 0.7886,
|
|
"step": 4045
|
|
},
|
|
{
|
|
"epoch": 0.7670454545454546,
|
|
"grad_norm": 0.04124091795917703,
|
|
"learning_rate": 4.691958223998401e-05,
|
|
"loss": 0.7693,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.7679924242424242,
|
|
"grad_norm": 0.038689870405394365,
|
|
"learning_rate": 4.655994101748477e-05,
|
|
"loss": 0.7921,
|
|
"step": 4055
|
|
},
|
|
{
|
|
"epoch": 0.7689393939393939,
|
|
"grad_norm": 0.03609443794000663,
|
|
"learning_rate": 4.620143004690736e-05,
|
|
"loss": 0.7289,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.7698863636363636,
|
|
"grad_norm": 0.03987735376629914,
|
|
"learning_rate": 4.584405324557092e-05,
|
|
"loss": 0.7605,
|
|
"step": 4065
|
|
},
|
|
{
|
|
"epoch": 0.7708333333333334,
|
|
"grad_norm": 0.04097229768216734,
|
|
"learning_rate": 4.548781451840179e-05,
|
|
"loss": 0.7663,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.771780303030303,
|
|
"grad_norm": 0.040742694287622665,
|
|
"learning_rate": 4.513271775789099e-05,
|
|
"loss": 0.8028,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 0.7727272727272727,
|
|
"grad_norm": 0.04002660818121977,
|
|
"learning_rate": 4.477876684405179e-05,
|
|
"loss": 0.7613,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.7736742424242424,
|
|
"grad_norm": 0.03889418322921735,
|
|
"learning_rate": 4.4425965644377206e-05,
|
|
"loss": 0.7551,
|
|
"step": 4085
|
|
},
|
|
{
|
|
"epoch": 0.7746212121212122,
|
|
"grad_norm": 0.041611350969633386,
|
|
"learning_rate": 4.407431801379765e-05,
|
|
"loss": 0.7626,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.7755681818181818,
|
|
"grad_norm": 0.038681908003403036,
|
|
"learning_rate": 4.37238277946389e-05,
|
|
"loss": 0.7903,
|
|
"step": 4095
|
|
},
|
|
{
|
|
"epoch": 0.7765151515151515,
|
|
"grad_norm": 0.03548309908592482,
|
|
"learning_rate": 4.337449881658027e-05,
|
|
"loss": 0.7786,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.7774621212121212,
|
|
"grad_norm": 0.039380169633909605,
|
|
"learning_rate": 4.3026334896612454e-05,
|
|
"loss": 0.7403,
|
|
"step": 4105
|
|
},
|
|
{
|
|
"epoch": 0.7784090909090909,
|
|
"grad_norm": 0.04079678253532297,
|
|
"learning_rate": 4.267933983899601e-05,
|
|
"loss": 0.7436,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.7793560606060606,
|
|
"grad_norm": 0.039301682797346464,
|
|
"learning_rate": 4.233351743521987e-05,
|
|
"loss": 0.7671,
|
|
"step": 4115
|
|
},
|
|
{
|
|
"epoch": 0.7803030303030303,
|
|
"grad_norm": 0.03847308151820405,
|
|
"learning_rate": 4.19888714639597e-05,
|
|
"loss": 0.7448,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.78125,
|
|
"grad_norm": 0.041300407433686306,
|
|
"learning_rate": 4.164540569103667e-05,
|
|
"loss": 0.7589,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 0.7821969696969697,
|
|
"grad_norm": 0.03941592905737965,
|
|
"learning_rate": 4.1303123869376535e-05,
|
|
"loss": 0.757,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.7831439393939394,
|
|
"grad_norm": 0.037406188407398566,
|
|
"learning_rate": 4.096202973896825e-05,
|
|
"loss": 0.7725,
|
|
"step": 4135
|
|
},
|
|
{
|
|
"epoch": 0.7840909090909091,
|
|
"grad_norm": 0.03990816917288711,
|
|
"learning_rate": 4.0622127026823445e-05,
|
|
"loss": 0.7317,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.7850378787878788,
|
|
"grad_norm": 0.03511090719697071,
|
|
"learning_rate": 4.028341944693543e-05,
|
|
"loss": 0.7529,
|
|
"step": 4145
|
|
},
|
|
{
|
|
"epoch": 0.7859848484848485,
|
|
"grad_norm": 0.0379918188663595,
|
|
"learning_rate": 3.9945910700238865e-05,
|
|
"loss": 0.7766,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.7869318181818182,
|
|
"grad_norm": 0.04057440463664927,
|
|
"learning_rate": 3.960960447456907e-05,
|
|
"loss": 0.7828,
|
|
"step": 4155
|
|
},
|
|
{
|
|
"epoch": 0.7878787878787878,
|
|
"grad_norm": 0.03820049794907823,
|
|
"learning_rate": 3.9274504444622016e-05,
|
|
"loss": 0.7687,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.7888257575757576,
|
|
"grad_norm": 0.04059223380009775,
|
|
"learning_rate": 3.894061427191384e-05,
|
|
"loss": 0.7736,
|
|
"step": 4165
|
|
},
|
|
{
|
|
"epoch": 0.7897727272727273,
|
|
"grad_norm": 0.03586736668288177,
|
|
"learning_rate": 3.860793760474105e-05,
|
|
"loss": 0.7504,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.790719696969697,
|
|
"grad_norm": 0.03808778963155071,
|
|
"learning_rate": 3.8276478078140746e-05,
|
|
"loss": 0.7827,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 0.7916666666666666,
|
|
"grad_norm": 0.04112934182070688,
|
|
"learning_rate": 3.794623931385062e-05,
|
|
"loss": 0.7754,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.7926136363636364,
|
|
"grad_norm": 0.03890850582072587,
|
|
"learning_rate": 3.7617224920269607e-05,
|
|
"loss": 0.7529,
|
|
"step": 4185
|
|
},
|
|
{
|
|
"epoch": 0.7935606060606061,
|
|
"grad_norm": 0.03888836620814126,
|
|
"learning_rate": 3.7289438492418375e-05,
|
|
"loss": 0.7797,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.7945075757575758,
|
|
"grad_norm": 0.04186176891014295,
|
|
"learning_rate": 3.696288361190015e-05,
|
|
"loss": 0.7735,
|
|
"step": 4195
|
|
},
|
|
{
|
|
"epoch": 0.7954545454545454,
|
|
"grad_norm": 0.03922361327888292,
|
|
"learning_rate": 3.663756384686127e-05,
|
|
"loss": 0.7431,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.7964015151515151,
|
|
"grad_norm": 0.037477031699552646,
|
|
"learning_rate": 3.631348275195259e-05,
|
|
"loss": 0.7477,
|
|
"step": 4205
|
|
},
|
|
{
|
|
"epoch": 0.7973484848484849,
|
|
"grad_norm": 0.03804879504351854,
|
|
"learning_rate": 3.599064386829051e-05,
|
|
"loss": 0.7873,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.7982954545454546,
|
|
"grad_norm": 0.041760751407490776,
|
|
"learning_rate": 3.5669050723418074e-05,
|
|
"loss": 0.7644,
|
|
"step": 4215
|
|
},
|
|
{
|
|
"epoch": 0.7992424242424242,
|
|
"grad_norm": 0.035448569098769006,
|
|
"learning_rate": 3.534870683126664e-05,
|
|
"loss": 0.7786,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.8001893939393939,
|
|
"grad_norm": 0.042355358195710444,
|
|
"learning_rate": 3.5029615692117555e-05,
|
|
"loss": 0.7576,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 0.8011363636363636,
|
|
"grad_norm": 0.039815508314079394,
|
|
"learning_rate": 3.47117807925636e-05,
|
|
"loss": 0.7678,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.8020833333333334,
|
|
"grad_norm": 0.04043653268326337,
|
|
"learning_rate": 3.4395205605471286e-05,
|
|
"loss": 0.7763,
|
|
"step": 4235
|
|
},
|
|
{
|
|
"epoch": 0.803030303030303,
|
|
"grad_norm": 0.03871480607482675,
|
|
"learning_rate": 3.4079893589942543e-05,
|
|
"loss": 0.761,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.8039772727272727,
|
|
"grad_norm": 0.041056778138514105,
|
|
"learning_rate": 3.376584819127712e-05,
|
|
"loss": 0.7686,
|
|
"step": 4245
|
|
},
|
|
{
|
|
"epoch": 0.8049242424242424,
|
|
"grad_norm": 0.0402161423571865,
|
|
"learning_rate": 3.3453072840935e-05,
|
|
"loss": 0.7704,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.8058712121212122,
|
|
"grad_norm": 0.03864045638204508,
|
|
"learning_rate": 3.314157095649868e-05,
|
|
"loss": 0.7707,
|
|
"step": 4255
|
|
},
|
|
{
|
|
"epoch": 0.8068181818181818,
|
|
"grad_norm": 0.042228336396620804,
|
|
"learning_rate": 3.283134594163599e-05,
|
|
"loss": 0.7482,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.8077651515151515,
|
|
"grad_norm": 0.04047310966274174,
|
|
"learning_rate": 3.252240118606293e-05,
|
|
"loss": 0.7587,
|
|
"step": 4265
|
|
},
|
|
{
|
|
"epoch": 0.8087121212121212,
|
|
"grad_norm": 0.03977289729530792,
|
|
"learning_rate": 3.221474006550662e-05,
|
|
"loss": 0.768,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.8096590909090909,
|
|
"grad_norm": 0.03903161422504673,
|
|
"learning_rate": 3.1908365941668115e-05,
|
|
"loss": 0.7433,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 0.8106060606060606,
|
|
"grad_norm": 0.03937683373863951,
|
|
"learning_rate": 3.160328216218617e-05,
|
|
"loss": 0.7889,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.8115530303030303,
|
|
"grad_norm": 0.039794170439367574,
|
|
"learning_rate": 3.129949206060039e-05,
|
|
"loss": 0.7418,
|
|
"step": 4285
|
|
},
|
|
{
|
|
"epoch": 0.8125,
|
|
"grad_norm": 0.041917271276222724,
|
|
"learning_rate": 3.099699895631474e-05,
|
|
"loss": 0.7451,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.8134469696969697,
|
|
"grad_norm": 0.037936541801760024,
|
|
"learning_rate": 3.069580615456137e-05,
|
|
"loss": 0.7627,
|
|
"step": 4295
|
|
},
|
|
{
|
|
"epoch": 0.8143939393939394,
|
|
"grad_norm": 0.037614637220929004,
|
|
"learning_rate": 3.03959169463646e-05,
|
|
"loss": 0.7674,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.8153409090909091,
|
|
"grad_norm": 0.03881575160344137,
|
|
"learning_rate": 3.009733460850473e-05,
|
|
"loss": 0.7646,
|
|
"step": 4305
|
|
},
|
|
{
|
|
"epoch": 0.8162878787878788,
|
|
"grad_norm": 0.041064989397716814,
|
|
"learning_rate": 2.9800062403482493e-05,
|
|
"loss": 0.7554,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.8172348484848485,
|
|
"grad_norm": 0.03751500962917203,
|
|
"learning_rate": 2.9504103579483163e-05,
|
|
"loss": 0.772,
|
|
"step": 4315
|
|
},
|
|
{
|
|
"epoch": 0.8181818181818182,
|
|
"grad_norm": 0.03762830736526142,
|
|
"learning_rate": 2.9209461370341204e-05,
|
|
"loss": 0.7419,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.8191287878787878,
|
|
"grad_norm": 0.043952738002868426,
|
|
"learning_rate": 2.891613899550499e-05,
|
|
"loss": 0.7876,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 0.8200757575757576,
|
|
"grad_norm": 0.04012795700450522,
|
|
"learning_rate": 2.8624139660001448e-05,
|
|
"loss": 0.7589,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.8210227272727273,
|
|
"grad_norm": 0.03722728875218868,
|
|
"learning_rate": 2.8333466554401125e-05,
|
|
"loss": 0.7521,
|
|
"step": 4335
|
|
},
|
|
{
|
|
"epoch": 0.821969696969697,
|
|
"grad_norm": 0.037476550396028804,
|
|
"learning_rate": 2.804412285478343e-05,
|
|
"loss": 0.7393,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.8229166666666666,
|
|
"grad_norm": 0.037123539268036035,
|
|
"learning_rate": 2.775611172270185e-05,
|
|
"loss": 0.7654,
|
|
"step": 4345
|
|
},
|
|
{
|
|
"epoch": 0.8238636363636364,
|
|
"grad_norm": 0.03776861945697112,
|
|
"learning_rate": 2.7469436305149172e-05,
|
|
"loss": 0.7629,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.8248106060606061,
|
|
"grad_norm": 0.0397986113112224,
|
|
"learning_rate": 2.7184099734523567e-05,
|
|
"loss": 0.776,
|
|
"step": 4355
|
|
},
|
|
{
|
|
"epoch": 0.8257575757575758,
|
|
"grad_norm": 0.04058484357450401,
|
|
"learning_rate": 2.690010512859403e-05,
|
|
"loss": 0.7563,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.8267045454545454,
|
|
"grad_norm": 0.03896827353754326,
|
|
"learning_rate": 2.6617455590466363e-05,
|
|
"loss": 0.7457,
|
|
"step": 4365
|
|
},
|
|
{
|
|
"epoch": 0.8276515151515151,
|
|
"grad_norm": 0.03825814102977763,
|
|
"learning_rate": 2.633615420854928e-05,
|
|
"loss": 0.75,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.8285984848484849,
|
|
"grad_norm": 0.03716265282693635,
|
|
"learning_rate": 2.6056204056520795e-05,
|
|
"loss": 0.758,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 0.8295454545454546,
|
|
"grad_norm": 0.04444901558357328,
|
|
"learning_rate": 2.5777608193294396e-05,
|
|
"loss": 0.7576,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.8304924242424242,
|
|
"grad_norm": 0.03624822046682716,
|
|
"learning_rate": 2.550036966298581e-05,
|
|
"loss": 0.7483,
|
|
"step": 4385
|
|
},
|
|
{
|
|
"epoch": 0.8314393939393939,
|
|
"grad_norm": 0.038466147912297376,
|
|
"learning_rate": 2.5224491494879705e-05,
|
|
"loss": 0.7735,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.8323863636363636,
|
|
"grad_norm": 0.04216736810756713,
|
|
"learning_rate": 2.4949976703396486e-05,
|
|
"loss": 0.7666,
|
|
"step": 4395
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.037577989344391036,
|
|
"learning_rate": 2.4676828288059558e-05,
|
|
"loss": 0.7504,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.834280303030303,
|
|
"grad_norm": 0.04096373784722742,
|
|
"learning_rate": 2.4405049233462316e-05,
|
|
"loss": 0.7541,
|
|
"step": 4405
|
|
},
|
|
{
|
|
"epoch": 0.8352272727272727,
|
|
"grad_norm": 0.03828220611720389,
|
|
"learning_rate": 2.413464250923566e-05,
|
|
"loss": 0.7512,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.8361742424242424,
|
|
"grad_norm": 0.03788635961789358,
|
|
"learning_rate": 2.3865611070015605e-05,
|
|
"loss": 0.7544,
|
|
"step": 4415
|
|
},
|
|
{
|
|
"epoch": 0.8371212121212122,
|
|
"grad_norm": 0.04139269826653916,
|
|
"learning_rate": 2.3597957855410932e-05,
|
|
"loss": 0.7847,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.8380681818181818,
|
|
"grad_norm": 0.04098240265367287,
|
|
"learning_rate": 2.3331685789970978e-05,
|
|
"loss": 0.7548,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 0.8390151515151515,
|
|
"grad_norm": 0.0366730676630168,
|
|
"learning_rate": 2.3066797783153767e-05,
|
|
"loss": 0.7546,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.8399621212121212,
|
|
"grad_norm": 0.03855939364168934,
|
|
"learning_rate": 2.280329672929434e-05,
|
|
"loss": 0.7526,
|
|
"step": 4435
|
|
},
|
|
{
|
|
"epoch": 0.8409090909090909,
|
|
"grad_norm": 0.03769398425525407,
|
|
"learning_rate": 2.2541185507572858e-05,
|
|
"loss": 0.7659,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.8418560606060606,
|
|
"grad_norm": 0.037763802543836905,
|
|
"learning_rate": 2.228046698198336e-05,
|
|
"loss": 0.7492,
|
|
"step": 4445
|
|
},
|
|
{
|
|
"epoch": 0.8428030303030303,
|
|
"grad_norm": 0.038504139823869195,
|
|
"learning_rate": 2.202114400130246e-05,
|
|
"loss": 0.7532,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.84375,
|
|
"grad_norm": 0.03839729099480198,
|
|
"learning_rate": 2.1763219399058042e-05,
|
|
"loss": 0.7716,
|
|
"step": 4455
|
|
},
|
|
{
|
|
"epoch": 0.8446969696969697,
|
|
"grad_norm": 0.03634271413981629,
|
|
"learning_rate": 2.150669599349845e-05,
|
|
"loss": 0.781,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.8456439393939394,
|
|
"grad_norm": 0.038799770819478115,
|
|
"learning_rate": 2.1251576587561774e-05,
|
|
"loss": 0.7471,
|
|
"step": 4465
|
|
},
|
|
{
|
|
"epoch": 0.8465909090909091,
|
|
"grad_norm": 0.037447866031002947,
|
|
"learning_rate": 2.0997863968844914e-05,
|
|
"loss": 0.7454,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.8475378787878788,
|
|
"grad_norm": 0.03812532250323499,
|
|
"learning_rate": 2.0745560909573534e-05,
|
|
"loss": 0.7487,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 0.8484848484848485,
|
|
"grad_norm": 0.03967052575801908,
|
|
"learning_rate": 2.0494670166571353e-05,
|
|
"loss": 0.7448,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.8494318181818182,
|
|
"grad_norm": 0.038717835231477656,
|
|
"learning_rate": 2.0245194481230386e-05,
|
|
"loss": 0.746,
|
|
"step": 4485
|
|
},
|
|
{
|
|
"epoch": 0.8503787878787878,
|
|
"grad_norm": 0.03978509491909852,
|
|
"learning_rate": 1.9997136579480698e-05,
|
|
"loss": 0.7591,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.8513257575757576,
|
|
"grad_norm": 0.040392976405494746,
|
|
"learning_rate": 1.9750499171760864e-05,
|
|
"loss": 0.7437,
|
|
"step": 4495
|
|
},
|
|
{
|
|
"epoch": 0.8522727272727273,
|
|
"grad_norm": 0.03839961150423484,
|
|
"learning_rate": 1.9505284952988154e-05,
|
|
"loss": 0.7191,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.853219696969697,
|
|
"grad_norm": 0.03701174541805748,
|
|
"learning_rate": 1.9261496602529163e-05,
|
|
"loss": 0.7614,
|
|
"step": 4505
|
|
},
|
|
{
|
|
"epoch": 0.8541666666666666,
|
|
"grad_norm": 0.03820961258437268,
|
|
"learning_rate": 1.9019136784170635e-05,
|
|
"loss": 0.7914,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.8551136363636364,
|
|
"grad_norm": 0.039034746383769636,
|
|
"learning_rate": 1.877820814609018e-05,
|
|
"loss": 0.7378,
|
|
"step": 4515
|
|
},
|
|
{
|
|
"epoch": 0.8560606060606061,
|
|
"grad_norm": 0.035548177827413464,
|
|
"learning_rate": 1.8538713320827398e-05,
|
|
"loss": 0.7587,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.8570075757575758,
|
|
"grad_norm": 0.03927586449468295,
|
|
"learning_rate": 1.8300654925255227e-05,
|
|
"loss": 0.7505,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 0.8579545454545454,
|
|
"grad_norm": 0.03808728080301323,
|
|
"learning_rate": 1.8064035560551254e-05,
|
|
"loss": 0.7546,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.8589015151515151,
|
|
"grad_norm": 0.03971353114564455,
|
|
"learning_rate": 1.7828857812169183e-05,
|
|
"loss": 0.7481,
|
|
"step": 4535
|
|
},
|
|
{
|
|
"epoch": 0.8598484848484849,
|
|
"grad_norm": 0.038394015305144635,
|
|
"learning_rate": 1.7595124249810798e-05,
|
|
"loss": 0.7512,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.8607954545454546,
|
|
"grad_norm": 0.04035971231008132,
|
|
"learning_rate": 1.736283742739781e-05,
|
|
"loss": 0.7514,
|
|
"step": 4545
|
|
},
|
|
{
|
|
"epoch": 0.8617424242424242,
|
|
"grad_norm": 0.03852526269337616,
|
|
"learning_rate": 1.7131999883043864e-05,
|
|
"loss": 0.7324,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.8626893939393939,
|
|
"grad_norm": 0.0380464207669555,
|
|
"learning_rate": 1.690261413902685e-05,
|
|
"loss": 0.778,
|
|
"step": 4555
|
|
},
|
|
{
|
|
"epoch": 0.8636363636363636,
|
|
"grad_norm": 0.03809307250418814,
|
|
"learning_rate": 1.6674682701761493e-05,
|
|
"loss": 0.741,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.8645833333333334,
|
|
"grad_norm": 0.03730545835154613,
|
|
"learning_rate": 1.644820806177165e-05,
|
|
"loss": 0.7494,
|
|
"step": 4565
|
|
},
|
|
{
|
|
"epoch": 0.865530303030303,
|
|
"grad_norm": 0.0410319046669476,
|
|
"learning_rate": 1.622319269366349e-05,
|
|
"loss": 0.7774,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.8664772727272727,
|
|
"grad_norm": 0.0365044250054721,
|
|
"learning_rate": 1.599963905609807e-05,
|
|
"loss": 0.7404,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 0.8674242424242424,
|
|
"grad_norm": 0.039763773803898096,
|
|
"learning_rate": 1.5777549591764705e-05,
|
|
"loss": 0.7789,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.8683712121212122,
|
|
"grad_norm": 0.03734007441823457,
|
|
"learning_rate": 1.555692672735431e-05,
|
|
"loss": 0.7487,
|
|
"step": 4585
|
|
},
|
|
{
|
|
"epoch": 0.8693181818181818,
|
|
"grad_norm": 0.040126269127401595,
|
|
"learning_rate": 1.5337772873532696e-05,
|
|
"loss": 0.7653,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.8702651515151515,
|
|
"grad_norm": 0.04012812890151172,
|
|
"learning_rate": 1.5120090424914305e-05,
|
|
"loss": 0.7763,
|
|
"step": 4595
|
|
},
|
|
{
|
|
"epoch": 0.8712121212121212,
|
|
"grad_norm": 0.03851463594249241,
|
|
"learning_rate": 1.4903881760036163e-05,
|
|
"loss": 0.7654,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.8721590909090909,
|
|
"grad_norm": 0.03700030549429758,
|
|
"learning_rate": 1.46891492413318e-05,
|
|
"loss": 0.7481,
|
|
"step": 4605
|
|
},
|
|
{
|
|
"epoch": 0.8731060606060606,
|
|
"grad_norm": 0.037764961164442196,
|
|
"learning_rate": 1.4475895215105299e-05,
|
|
"loss": 0.751,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 0.8740530303030303,
|
|
"grad_norm": 0.039658099635677214,
|
|
"learning_rate": 1.4264122011505919e-05,
|
|
"loss": 0.7454,
|
|
"step": 4615
|
|
},
|
|
{
|
|
"epoch": 0.875,
|
|
"grad_norm": 0.037877514658421034,
|
|
"learning_rate": 1.4053831944502508e-05,
|
|
"loss": 0.7311,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.8759469696969697,
|
|
"grad_norm": 0.03981948814673359,
|
|
"learning_rate": 1.3845027311858149e-05,
|
|
"loss": 0.7701,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 0.8768939393939394,
|
|
"grad_norm": 0.03725499979086121,
|
|
"learning_rate": 1.3637710395105134e-05,
|
|
"loss": 0.7496,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 0.8778409090909091,
|
|
"grad_norm": 0.03845165051106696,
|
|
"learning_rate": 1.3431883459520115e-05,
|
|
"loss": 0.7598,
|
|
"step": 4635
|
|
},
|
|
{
|
|
"epoch": 0.8787878787878788,
|
|
"grad_norm": 0.03921039831701288,
|
|
"learning_rate": 1.3227548754099148e-05,
|
|
"loss": 0.7576,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.8797348484848485,
|
|
"grad_norm": 0.03677027674299414,
|
|
"learning_rate": 1.3024708511533266e-05,
|
|
"loss": 0.7536,
|
|
"step": 4645
|
|
},
|
|
{
|
|
"epoch": 0.8806818181818182,
|
|
"grad_norm": 0.03769732908420657,
|
|
"learning_rate": 1.2823364948184095e-05,
|
|
"loss": 0.7631,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.8816287878787878,
|
|
"grad_norm": 0.038277457641516056,
|
|
"learning_rate": 1.2623520264059528e-05,
|
|
"loss": 0.758,
|
|
"step": 4655
|
|
},
|
|
{
|
|
"epoch": 0.8825757575757576,
|
|
"grad_norm": 0.03835316890120053,
|
|
"learning_rate": 1.2425176642789841e-05,
|
|
"loss": 0.7545,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.8835227272727273,
|
|
"grad_norm": 0.040317569065410515,
|
|
"learning_rate": 1.2228336251603632e-05,
|
|
"loss": 0.7211,
|
|
"step": 4665
|
|
},
|
|
{
|
|
"epoch": 0.884469696969697,
|
|
"grad_norm": 0.03605361368261573,
|
|
"learning_rate": 1.2033001241304285e-05,
|
|
"loss": 0.7356,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 0.8854166666666666,
|
|
"grad_norm": 0.042368074274969164,
|
|
"learning_rate": 1.1839173746246462e-05,
|
|
"loss": 0.7643,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 0.8863636363636364,
|
|
"grad_norm": 0.04206007952837537,
|
|
"learning_rate": 1.164685588431281e-05,
|
|
"loss": 0.7694,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.8873106060606061,
|
|
"grad_norm": 0.03587287003409619,
|
|
"learning_rate": 1.14560497568906e-05,
|
|
"loss": 0.7336,
|
|
"step": 4685
|
|
},
|
|
{
|
|
"epoch": 0.8882575757575758,
|
|
"grad_norm": 0.04055727525356863,
|
|
"learning_rate": 1.126675744884904e-05,
|
|
"loss": 0.7858,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 0.8892045454545454,
|
|
"grad_norm": 0.03731194721410893,
|
|
"learning_rate": 1.1078981028516421e-05,
|
|
"loss": 0.7546,
|
|
"step": 4695
|
|
},
|
|
{
|
|
"epoch": 0.8901515151515151,
|
|
"grad_norm": 0.03913350636593797,
|
|
"learning_rate": 1.08927225476574e-05,
|
|
"loss": 0.7555,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.8910984848484849,
|
|
"grad_norm": 0.03620266304429595,
|
|
"learning_rate": 1.0707984041450673e-05,
|
|
"loss": 0.7393,
|
|
"step": 4705
|
|
},
|
|
{
|
|
"epoch": 0.8920454545454546,
|
|
"grad_norm": 0.0372176814841684,
|
|
"learning_rate": 1.0524767528466766e-05,
|
|
"loss": 0.7815,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 0.8929924242424242,
|
|
"grad_norm": 0.04163117308071071,
|
|
"learning_rate": 1.034307501064589e-05,
|
|
"loss": 0.7744,
|
|
"step": 4715
|
|
},
|
|
{
|
|
"epoch": 0.8939393939393939,
|
|
"grad_norm": 0.03841314072028053,
|
|
"learning_rate": 1.0162908473276133e-05,
|
|
"loss": 0.7441,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.8948863636363636,
|
|
"grad_norm": 0.03658511014566751,
|
|
"learning_rate": 9.984269884971796e-06,
|
|
"loss": 0.7534,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 0.8958333333333334,
|
|
"grad_norm": 0.035726934558083914,
|
|
"learning_rate": 9.807161197651742e-06,
|
|
"loss": 0.7561,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 0.896780303030303,
|
|
"grad_norm": 0.03786917865045401,
|
|
"learning_rate": 9.63158434651825e-06,
|
|
"loss": 0.753,
|
|
"step": 4735
|
|
},
|
|
{
|
|
"epoch": 0.8977272727272727,
|
|
"grad_norm": 0.03878081614015611,
|
|
"learning_rate": 9.45754125003576e-06,
|
|
"loss": 0.7665,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.8986742424242424,
|
|
"grad_norm": 0.03776273136819908,
|
|
"learning_rate": 9.285033809909863e-06,
|
|
"loss": 0.7882,
|
|
"step": 4745
|
|
},
|
|
{
|
|
"epoch": 0.8996212121212122,
|
|
"grad_norm": 0.04079662714361428,
|
|
"learning_rate": 9.114063911066676e-06,
|
|
"loss": 0.7775,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.9005681818181818,
|
|
"grad_norm": 0.04107251149823735,
|
|
"learning_rate": 8.944633421632169e-06,
|
|
"loss": 0.7785,
|
|
"step": 4755
|
|
},
|
|
{
|
|
"epoch": 0.9015151515151515,
|
|
"grad_norm": 0.043937037368177494,
|
|
"learning_rate": 8.776744192911666e-06,
|
|
"loss": 0.7709,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.9024621212121212,
|
|
"grad_norm": 0.03806032575275296,
|
|
"learning_rate": 8.610398059369733e-06,
|
|
"loss": 0.7398,
|
|
"step": 4765
|
|
},
|
|
{
|
|
"epoch": 0.9034090909090909,
|
|
"grad_norm": 0.03989849682979902,
|
|
"learning_rate": 8.445596838610136e-06,
|
|
"loss": 0.7839,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 0.9043560606060606,
|
|
"grad_norm": 0.03804089571024527,
|
|
"learning_rate": 8.282342331355896e-06,
|
|
"loss": 0.737,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 0.9053030303030303,
|
|
"grad_norm": 0.036823538728651795,
|
|
"learning_rate": 8.120636321429618e-06,
|
|
"loss": 0.7365,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.90625,
|
|
"grad_norm": 0.039238403212191623,
|
|
"learning_rate": 7.960480575734162e-06,
|
|
"loss": 0.7679,
|
|
"step": 4785
|
|
},
|
|
{
|
|
"epoch": 0.9071969696969697,
|
|
"grad_norm": 0.03655300704953951,
|
|
"learning_rate": 7.801876844233102e-06,
|
|
"loss": 0.7276,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 0.9081439393939394,
|
|
"grad_norm": 0.038671267549804565,
|
|
"learning_rate": 7.64482685993174e-06,
|
|
"loss": 0.754,
|
|
"step": 4795
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 0.04012488210471297,
|
|
"learning_rate": 7.489332338858201e-06,
|
|
"loss": 0.7706,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.9100378787878788,
|
|
"grad_norm": 0.039340313195190324,
|
|
"learning_rate": 7.3353949800445625e-06,
|
|
"loss": 0.7437,
|
|
"step": 4805
|
|
},
|
|
{
|
|
"epoch": 0.9109848484848485,
|
|
"grad_norm": 0.03623402554079789,
|
|
"learning_rate": 7.1830164655084175e-06,
|
|
"loss": 0.747,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 0.9119318181818182,
|
|
"grad_norm": 0.040697271432715135,
|
|
"learning_rate": 7.032198460234367e-06,
|
|
"loss": 0.7624,
|
|
"step": 4815
|
|
},
|
|
{
|
|
"epoch": 0.9128787878787878,
|
|
"grad_norm": 0.03876794678188874,
|
|
"learning_rate": 6.88294261215595e-06,
|
|
"loss": 0.7132,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.9138257575757576,
|
|
"grad_norm": 0.039259366616565435,
|
|
"learning_rate": 6.7352505521375445e-06,
|
|
"loss": 0.768,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 0.9147727272727273,
|
|
"grad_norm": 0.04205245328852447,
|
|
"learning_rate": 6.5891238939566275e-06,
|
|
"loss": 0.78,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 0.915719696969697,
|
|
"grad_norm": 0.041430998012228624,
|
|
"learning_rate": 6.444564234286059e-06,
|
|
"loss": 0.7476,
|
|
"step": 4835
|
|
},
|
|
{
|
|
"epoch": 0.9166666666666666,
|
|
"grad_norm": 0.04090068304483327,
|
|
"learning_rate": 6.301573152676664e-06,
|
|
"loss": 0.7832,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.9176136363636364,
|
|
"grad_norm": 0.03893377913410083,
|
|
"learning_rate": 6.160152211540059e-06,
|
|
"loss": 0.766,
|
|
"step": 4845
|
|
},
|
|
{
|
|
"epoch": 0.9185606060606061,
|
|
"grad_norm": 0.03562213581544829,
|
|
"learning_rate": 6.020302956131434e-06,
|
|
"loss": 0.7506,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.9195075757575758,
|
|
"grad_norm": 0.03858250177735203,
|
|
"learning_rate": 5.8820269145327335e-06,
|
|
"loss": 0.7449,
|
|
"step": 4855
|
|
},
|
|
{
|
|
"epoch": 0.9204545454545454,
|
|
"grad_norm": 0.03698269538357442,
|
|
"learning_rate": 5.7453255976360526e-06,
|
|
"loss": 0.7419,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.9214015151515151,
|
|
"grad_norm": 0.039313884060948906,
|
|
"learning_rate": 5.6102004991269655e-06,
|
|
"loss": 0.7509,
|
|
"step": 4865
|
|
},
|
|
{
|
|
"epoch": 0.9223484848484849,
|
|
"grad_norm": 0.038202779909671226,
|
|
"learning_rate": 5.476653095468292e-06,
|
|
"loss": 0.7404,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 0.9232954545454546,
|
|
"grad_norm": 0.038488820882748215,
|
|
"learning_rate": 5.344684845883957e-06,
|
|
"loss": 0.7584,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 0.9242424242424242,
|
|
"grad_norm": 0.03673421514324292,
|
|
"learning_rate": 5.214297192343104e-06,
|
|
"loss": 0.7493,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.9251893939393939,
|
|
"grad_norm": 0.0350920577902006,
|
|
"learning_rate": 5.085491559544175e-06,
|
|
"loss": 0.7834,
|
|
"step": 4885
|
|
},
|
|
{
|
|
"epoch": 0.9261363636363636,
|
|
"grad_norm": 0.03508459667371372,
|
|
"learning_rate": 4.9582693548994914e-06,
|
|
"loss": 0.761,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 0.9270833333333334,
|
|
"grad_norm": 0.03866865889378931,
|
|
"learning_rate": 4.832631968519862e-06,
|
|
"loss": 0.7536,
|
|
"step": 4895
|
|
},
|
|
{
|
|
"epoch": 0.928030303030303,
|
|
"grad_norm": 0.03900577265735235,
|
|
"learning_rate": 4.708580773199333e-06,
|
|
"loss": 0.7588,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.9289772727272727,
|
|
"grad_norm": 0.03975550249874538,
|
|
"learning_rate": 4.586117124400196e-06,
|
|
"loss": 0.7301,
|
|
"step": 4905
|
|
},
|
|
{
|
|
"epoch": 0.9299242424242424,
|
|
"grad_norm": 0.03658042099322432,
|
|
"learning_rate": 4.465242360238269e-06,
|
|
"loss": 0.7192,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 0.9308712121212122,
|
|
"grad_norm": 0.03538458441608596,
|
|
"learning_rate": 4.345957801468092e-06,
|
|
"loss": 0.7537,
|
|
"step": 4915
|
|
},
|
|
{
|
|
"epoch": 0.9318181818181818,
|
|
"grad_norm": 0.039696089459974056,
|
|
"learning_rate": 4.228264751468752e-06,
|
|
"loss": 0.7578,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.9327651515151515,
|
|
"grad_norm": 0.03891493239675872,
|
|
"learning_rate": 4.112164496229381e-06,
|
|
"loss": 0.7988,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 0.9337121212121212,
|
|
"grad_norm": 0.0391191766137019,
|
|
"learning_rate": 3.997658304335249e-06,
|
|
"loss": 0.748,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 0.9346590909090909,
|
|
"grad_norm": 0.041273803822080235,
|
|
"learning_rate": 3.88474742695391e-06,
|
|
"loss": 0.7444,
|
|
"step": 4935
|
|
},
|
|
{
|
|
"epoch": 0.9356060606060606,
|
|
"grad_norm": 0.03833780199551714,
|
|
"learning_rate": 3.77343309782151e-06,
|
|
"loss": 0.7535,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.9365530303030303,
|
|
"grad_norm": 0.0377506973575768,
|
|
"learning_rate": 3.663716533229183e-06,
|
|
"loss": 0.7603,
|
|
"step": 4945
|
|
},
|
|
{
|
|
"epoch": 0.9375,
|
|
"grad_norm": 0.03920529885627104,
|
|
"learning_rate": 3.5555989320099952e-06,
|
|
"loss": 0.7346,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.9384469696969697,
|
|
"grad_norm": 0.03926812513866438,
|
|
"learning_rate": 3.4490814755256724e-06,
|
|
"loss": 0.7882,
|
|
"step": 4955
|
|
},
|
|
{
|
|
"epoch": 0.9393939393939394,
|
|
"grad_norm": 0.03915603389468844,
|
|
"learning_rate": 3.344165327653725e-06,
|
|
"loss": 0.7804,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.9403409090909091,
|
|
"grad_norm": 0.03692758597782573,
|
|
"learning_rate": 3.2408516347747606e-06,
|
|
"loss": 0.7615,
|
|
"step": 4965
|
|
},
|
|
{
|
|
"epoch": 0.9412878787878788,
|
|
"grad_norm": 0.040111477519722376,
|
|
"learning_rate": 3.1391415257599583e-06,
|
|
"loss": 0.7624,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 0.9422348484848485,
|
|
"grad_norm": 0.036054816584654786,
|
|
"learning_rate": 3.039036111958715e-06,
|
|
"loss": 0.7595,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 0.9431818181818182,
|
|
"grad_norm": 0.03402332704452141,
|
|
"learning_rate": 2.9405364871864514e-06,
|
|
"loss": 0.7569,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.9441287878787878,
|
|
"grad_norm": 0.039782631917374064,
|
|
"learning_rate": 2.8436437277128075e-06,
|
|
"loss": 0.7616,
|
|
"step": 4985
|
|
},
|
|
{
|
|
"epoch": 0.9450757575757576,
|
|
"grad_norm": 0.03902157823658662,
|
|
"learning_rate": 2.7483588922497025e-06,
|
|
"loss": 0.7324,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 0.9460227272727273,
|
|
"grad_norm": 0.039325622745832914,
|
|
"learning_rate": 2.6546830219399405e-06,
|
|
"loss": 0.7597,
|
|
"step": 4995
|
|
},
|
|
{
|
|
"epoch": 0.946969696969697,
|
|
"grad_norm": 0.03881835950586153,
|
|
"learning_rate": 2.562617140345691e-06,
|
|
"loss": 0.7473,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.9479166666666666,
|
|
"grad_norm": 0.039364545515671236,
|
|
"learning_rate": 2.472162253437343e-06,
|
|
"loss": 0.7553,
|
|
"step": 5005
|
|
},
|
|
{
|
|
"epoch": 0.9488636363636364,
|
|
"grad_norm": 0.03556550322704117,
|
|
"learning_rate": 2.3833193495825853e-06,
|
|
"loss": 0.7329,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 0.9498106060606061,
|
|
"grad_norm": 0.03769878462512779,
|
|
"learning_rate": 2.2960893995355443e-06,
|
|
"loss": 0.7677,
|
|
"step": 5015
|
|
},
|
|
{
|
|
"epoch": 0.9507575757575758,
|
|
"grad_norm": 0.04179821670604519,
|
|
"learning_rate": 2.210473356426146e-06,
|
|
"loss": 0.7329,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.9517045454545454,
|
|
"grad_norm": 0.03551927757361974,
|
|
"learning_rate": 2.1264721557497866e-06,
|
|
"loss": 0.745,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 0.9526515151515151,
|
|
"grad_norm": 0.035762866838308725,
|
|
"learning_rate": 2.0440867153570627e-06,
|
|
"loss": 0.757,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 0.9535984848484849,
|
|
"grad_norm": 0.03810233771587777,
|
|
"learning_rate": 1.9633179354437257e-06,
|
|
"loss": 0.737,
|
|
"step": 5035
|
|
},
|
|
{
|
|
"epoch": 0.9545454545454546,
|
|
"grad_norm": 0.03824899164000916,
|
|
"learning_rate": 1.8841666985408566e-06,
|
|
"loss": 0.7708,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.9554924242424242,
|
|
"grad_norm": 0.03784087071597624,
|
|
"learning_rate": 1.8066338695052585e-06,
|
|
"loss": 0.7791,
|
|
"step": 5045
|
|
},
|
|
{
|
|
"epoch": 0.9564393939393939,
|
|
"grad_norm": 0.03875228767293192,
|
|
"learning_rate": 1.730720295509963e-06,
|
|
"loss": 0.757,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.9573863636363636,
|
|
"grad_norm": 0.03959979451862991,
|
|
"learning_rate": 1.6564268060349884e-06,
|
|
"loss": 0.7581,
|
|
"step": 5055
|
|
},
|
|
{
|
|
"epoch": 0.9583333333333334,
|
|
"grad_norm": 0.03746814864806074,
|
|
"learning_rate": 1.583754212858329e-06,
|
|
"loss": 0.7492,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.959280303030303,
|
|
"grad_norm": 0.03840085310645041,
|
|
"learning_rate": 1.5127033100469477e-06,
|
|
"loss": 0.7428,
|
|
"step": 5065
|
|
},
|
|
{
|
|
"epoch": 0.9602272727272727,
|
|
"grad_norm": 0.04015902576846351,
|
|
"learning_rate": 1.4432748739482468e-06,
|
|
"loss": 0.7601,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 0.9611742424242424,
|
|
"grad_norm": 0.03815333482717027,
|
|
"learning_rate": 1.3754696631815276e-06,
|
|
"loss": 0.7781,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 0.9621212121212122,
|
|
"grad_norm": 0.039134552740557424,
|
|
"learning_rate": 1.3092884186296282e-06,
|
|
"loss": 0.7605,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.9630681818181818,
|
|
"grad_norm": 0.03873683575508503,
|
|
"learning_rate": 1.2447318634309977e-06,
|
|
"loss": 0.7465,
|
|
"step": 5085
|
|
},
|
|
{
|
|
"epoch": 0.9640151515151515,
|
|
"grad_norm": 0.038640801639052244,
|
|
"learning_rate": 1.1818007029716525e-06,
|
|
"loss": 0.7616,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 0.9649621212121212,
|
|
"grad_norm": 0.04251912678550345,
|
|
"learning_rate": 1.1204956248774655e-06,
|
|
"loss": 0.747,
|
|
"step": 5095
|
|
},
|
|
{
|
|
"epoch": 0.9659090909090909,
|
|
"grad_norm": 0.039291943037907916,
|
|
"learning_rate": 1.0608172990067553e-06,
|
|
"loss": 0.7628,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.9668560606060606,
|
|
"grad_norm": 0.04115060966519561,
|
|
"learning_rate": 1.0027663774429096e-06,
|
|
"loss": 0.7533,
|
|
"step": 5105
|
|
},
|
|
{
|
|
"epoch": 0.9678030303030303,
|
|
"grad_norm": 0.040550851971633786,
|
|
"learning_rate": 9.463434944872395e-07,
|
|
"loss": 0.77,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 0.96875,
|
|
"grad_norm": 0.036797005792547945,
|
|
"learning_rate": 8.91549266652053e-07,
|
|
"loss": 0.7296,
|
|
"step": 5115
|
|
},
|
|
{
|
|
"epoch": 0.9696969696969697,
|
|
"grad_norm": 0.03621498987532269,
|
|
"learning_rate": 8.383842926539929e-07,
|
|
"loss": 0.7682,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.9706439393939394,
|
|
"grad_norm": 0.03987403439986009,
|
|
"learning_rate": 7.868491534073928e-07,
|
|
"loss": 0.793,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 0.9715909090909091,
|
|
"grad_norm": 0.03862093235236962,
|
|
"learning_rate": 7.369444120179647e-07,
|
|
"loss": 0.7388,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 0.9725378787878788,
|
|
"grad_norm": 0.03845042714550149,
|
|
"learning_rate": 6.88670613776704e-07,
|
|
"loss": 0.7571,
|
|
"step": 5135
|
|
},
|
|
{
|
|
"epoch": 0.9734848484848485,
|
|
"grad_norm": 0.03537218356309702,
|
|
"learning_rate": 6.420282861538283e-07,
|
|
"loss": 0.7192,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.9744318181818182,
|
|
"grad_norm": 0.03716360855745044,
|
|
"learning_rate": 5.970179387931151e-07,
|
|
"loss": 0.7498,
|
|
"step": 5145
|
|
},
|
|
{
|
|
"epoch": 0.9753787878787878,
|
|
"grad_norm": 0.03704971797049268,
|
|
"learning_rate": 5.536400635062721e-07,
|
|
"loss": 0.7639,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.9763257575757576,
|
|
"grad_norm": 0.03658375948794085,
|
|
"learning_rate": 5.118951342675592e-07,
|
|
"loss": 0.7607,
|
|
"step": 5155
|
|
},
|
|
{
|
|
"epoch": 0.9772727272727273,
|
|
"grad_norm": 0.038160851981614306,
|
|
"learning_rate": 4.717836072086589e-07,
|
|
"loss": 0.7761,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.978219696969697,
|
|
"grad_norm": 0.03554298384663066,
|
|
"learning_rate": 4.3330592061361357e-07,
|
|
"loss": 0.7515,
|
|
"step": 5165
|
|
},
|
|
{
|
|
"epoch": 0.9791666666666666,
|
|
"grad_norm": 0.035988940616932245,
|
|
"learning_rate": 3.964624949141626e-07,
|
|
"loss": 0.7287,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 0.9801136363636364,
|
|
"grad_norm": 0.038573387129357734,
|
|
"learning_rate": 3.6125373268499625e-07,
|
|
"loss": 0.7584,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 0.9810606060606061,
|
|
"grad_norm": 0.03560535292438474,
|
|
"learning_rate": 3.2768001863945905e-07,
|
|
"loss": 0.7381,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.9820075757575758,
|
|
"grad_norm": 0.03470144345138998,
|
|
"learning_rate": 2.9574171962533644e-07,
|
|
"loss": 0.7447,
|
|
"step": 5185
|
|
},
|
|
{
|
|
"epoch": 0.9829545454545454,
|
|
"grad_norm": 0.038083967145801485,
|
|
"learning_rate": 2.654391846207915e-07,
|
|
"loss": 0.7667,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 0.9839015151515151,
|
|
"grad_norm": 0.03704077024461041,
|
|
"learning_rate": 2.3677274473063444e-07,
|
|
"loss": 0.7666,
|
|
"step": 5195
|
|
},
|
|
{
|
|
"epoch": 0.9848484848484849,
|
|
"grad_norm": 0.04114138125476826,
|
|
"learning_rate": 2.0974271318260905e-07,
|
|
"loss": 0.7681,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.9857954545454546,
|
|
"grad_norm": 0.03557878772125844,
|
|
"learning_rate": 1.8434938532406186e-07,
|
|
"loss": 0.7482,
|
|
"step": 5205
|
|
},
|
|
{
|
|
"epoch": 0.9867424242424242,
|
|
"grad_norm": 0.03606070157019983,
|
|
"learning_rate": 1.6059303861862826e-07,
|
|
"loss": 0.7404,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 0.9876893939393939,
|
|
"grad_norm": 0.037415707092603924,
|
|
"learning_rate": 1.3847393264330153e-07,
|
|
"loss": 0.768,
|
|
"step": 5215
|
|
},
|
|
{
|
|
"epoch": 0.9886363636363636,
|
|
"grad_norm": 0.03725866754101771,
|
|
"learning_rate": 1.1799230908550173e-07,
|
|
"loss": 0.7409,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.9895833333333334,
|
|
"grad_norm": 0.039547937064916217,
|
|
"learning_rate": 9.914839174049449e-08,
|
|
"loss": 0.7408,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 0.990530303030303,
|
|
"grad_norm": 0.03604106643192906,
|
|
"learning_rate": 8.194238650889307e-08,
|
|
"loss": 0.7571,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 0.9914772727272727,
|
|
"grad_norm": 0.03696081603757769,
|
|
"learning_rate": 6.637448139447666e-08,
|
|
"loss": 0.7416,
|
|
"step": 5235
|
|
},
|
|
{
|
|
"epoch": 0.9924242424242424,
|
|
"grad_norm": 0.037770041786195266,
|
|
"learning_rate": 5.244484650207548e-08,
|
|
"loss": 0.756,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.9933712121212122,
|
|
"grad_norm": 0.04046458037414051,
|
|
"learning_rate": 4.01536340357389e-08,
|
|
"loss": 0.759,
|
|
"step": 5245
|
|
},
|
|
{
|
|
"epoch": 0.9943181818181818,
|
|
"grad_norm": 0.03827692567603896,
|
|
"learning_rate": 2.9500978297103407e-08,
|
|
"loss": 0.7787,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.9952651515151515,
|
|
"grad_norm": 0.03835615698389389,
|
|
"learning_rate": 2.0486995683860476e-08,
|
|
"loss": 0.7284,
|
|
"step": 5255
|
|
},
|
|
{
|
|
"epoch": 0.9962121212121212,
|
|
"grad_norm": 0.04008817912232536,
|
|
"learning_rate": 1.3111784688507599e-08,
|
|
"loss": 0.7493,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.9971590909090909,
|
|
"grad_norm": 0.03940503331438013,
|
|
"learning_rate": 7.375425897299115e-09,
|
|
"loss": 0.7522,
|
|
"step": 5265
|
|
},
|
|
{
|
|
"epoch": 0.9981060606060606,
|
|
"grad_norm": 0.03788195266551941,
|
|
"learning_rate": 3.277981989346923e-09,
|
|
"loss": 0.746,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 0.9990530303030303,
|
|
"grad_norm": 0.03792409661384259,
|
|
"learning_rate": 8.194977359210486e-10,
|
|
"loss": 0.7443,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.04145596279829835,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.7751,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_loss": 1.116625189781189,
|
|
"eval_runtime": 1241.8314,
|
|
"eval_samples_per_second": 194.261,
|
|
"eval_steps_per_second": 6.071,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 5280,
|
|
"total_flos": 771937243234304.0,
|
|
"train_loss": 0.8302312182657646,
|
|
"train_runtime": 21905.5744,
|
|
"train_samples_per_second": 30.851,
|
|
"train_steps_per_second": 0.241
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 5280,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 771937243234304.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|