7436 lines
182 KiB
JSON
7436 lines
182 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9998579209093061,
|
|
"eval_steps": 500,
|
|
"global_step": 5278,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.000947193937958797,
|
|
"grad_norm": 52.55722061842073,
|
|
"learning_rate": 2.8409090909090907e-06,
|
|
"loss": 3.7816,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.001894387875917594,
|
|
"grad_norm": 38.95340149423018,
|
|
"learning_rate": 5.6818181818181815e-06,
|
|
"loss": 3.5056,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0028415818138763913,
|
|
"grad_norm": 21.712424534014453,
|
|
"learning_rate": 8.522727272727271e-06,
|
|
"loss": 2.882,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.003788775751835188,
|
|
"grad_norm": 9.040565708034416,
|
|
"learning_rate": 1.1363636363636363e-05,
|
|
"loss": 2.173,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.004735969689793985,
|
|
"grad_norm": 4.220912189865154,
|
|
"learning_rate": 1.4204545454545453e-05,
|
|
"loss": 1.6834,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.005683163627752783,
|
|
"grad_norm": 1.9714256696877792,
|
|
"learning_rate": 1.7045454545454543e-05,
|
|
"loss": 1.3769,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.006630357565711579,
|
|
"grad_norm": 1.0423474521511698,
|
|
"learning_rate": 1.9886363636363634e-05,
|
|
"loss": 1.2239,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.007577551503670376,
|
|
"grad_norm": 1.2391396539653674,
|
|
"learning_rate": 2.2727272727272726e-05,
|
|
"loss": 1.1367,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.008524745441629173,
|
|
"grad_norm": 0.46298881388281793,
|
|
"learning_rate": 2.5568181818181814e-05,
|
|
"loss": 1.0701,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.00947193937958797,
|
|
"grad_norm": 0.3539086469394409,
|
|
"learning_rate": 2.8409090909090906e-05,
|
|
"loss": 1.0452,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.010419133317546767,
|
|
"grad_norm": 0.303699683898037,
|
|
"learning_rate": 3.125e-05,
|
|
"loss": 1.0418,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.011366327255505565,
|
|
"grad_norm": 0.2133546406490209,
|
|
"learning_rate": 3.4090909090909085e-05,
|
|
"loss": 0.9768,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.012313521193464362,
|
|
"grad_norm": 0.2101107113029947,
|
|
"learning_rate": 3.693181818181818e-05,
|
|
"loss": 0.9808,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.013260715131423158,
|
|
"grad_norm": 0.2001787181890806,
|
|
"learning_rate": 3.977272727272727e-05,
|
|
"loss": 0.9558,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.014207909069381956,
|
|
"grad_norm": 0.2060240507616133,
|
|
"learning_rate": 4.261363636363637e-05,
|
|
"loss": 0.961,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.015155103007340753,
|
|
"grad_norm": 0.1998791188434844,
|
|
"learning_rate": 4.545454545454545e-05,
|
|
"loss": 0.9515,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.01610229694529955,
|
|
"grad_norm": 0.15833294762038227,
|
|
"learning_rate": 4.8295454545454537e-05,
|
|
"loss": 0.9414,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.017049490883258345,
|
|
"grad_norm": 0.1520760900480208,
|
|
"learning_rate": 5.113636363636363e-05,
|
|
"loss": 0.8963,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.017996684821217145,
|
|
"grad_norm": 0.1388506087010938,
|
|
"learning_rate": 5.3977272727272727e-05,
|
|
"loss": 0.9501,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.01894387875917594,
|
|
"grad_norm": 0.1410344131187388,
|
|
"learning_rate": 5.681818181818181e-05,
|
|
"loss": 0.909,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.019891072697134738,
|
|
"grad_norm": 0.12318568339151473,
|
|
"learning_rate": 5.96590909090909e-05,
|
|
"loss": 0.9107,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.020838266635093534,
|
|
"grad_norm": 0.1279058146057783,
|
|
"learning_rate": 6.25e-05,
|
|
"loss": 0.8978,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.02178546057305233,
|
|
"grad_norm": 0.11032351265233929,
|
|
"learning_rate": 6.534090909090909e-05,
|
|
"loss": 0.931,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.02273265451101113,
|
|
"grad_norm": 0.1119942887320865,
|
|
"learning_rate": 6.818181818181817e-05,
|
|
"loss": 0.8893,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.023679848448969927,
|
|
"grad_norm": 0.10935745636043816,
|
|
"learning_rate": 7.102272727272727e-05,
|
|
"loss": 0.9006,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.024627042386928723,
|
|
"grad_norm": 0.1083392524866063,
|
|
"learning_rate": 7.386363636363635e-05,
|
|
"loss": 0.8987,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.02557423632488752,
|
|
"grad_norm": 0.09975528745630019,
|
|
"learning_rate": 7.670454545454545e-05,
|
|
"loss": 0.8831,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.026521430262846316,
|
|
"grad_norm": 0.08603398438897839,
|
|
"learning_rate": 7.954545454545454e-05,
|
|
"loss": 0.8836,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.027468624200805116,
|
|
"grad_norm": 0.08005996371772706,
|
|
"learning_rate": 8.238636363636362e-05,
|
|
"loss": 0.8932,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.028415818138763912,
|
|
"grad_norm": 0.09105132634043626,
|
|
"learning_rate": 8.522727272727273e-05,
|
|
"loss": 0.8953,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.02936301207672271,
|
|
"grad_norm": 0.08350347397936869,
|
|
"learning_rate": 8.806818181818182e-05,
|
|
"loss": 0.8901,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.030310206014681505,
|
|
"grad_norm": 0.06553921290329484,
|
|
"learning_rate": 9.09090909090909e-05,
|
|
"loss": 0.84,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.031257399952640305,
|
|
"grad_norm": 0.07002312647287917,
|
|
"learning_rate": 9.374999999999999e-05,
|
|
"loss": 0.9037,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.0322045938905991,
|
|
"grad_norm": 0.07442774560302336,
|
|
"learning_rate": 9.659090909090907e-05,
|
|
"loss": 0.8961,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.0331517878285579,
|
|
"grad_norm": 0.07978205419119488,
|
|
"learning_rate": 9.943181818181817e-05,
|
|
"loss": 0.8472,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.03409898176651669,
|
|
"grad_norm": 0.08212508538706279,
|
|
"learning_rate": 0.00010227272727272726,
|
|
"loss": 0.8828,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.03504617570447549,
|
|
"grad_norm": 0.06703812335472381,
|
|
"learning_rate": 0.00010511363636363635,
|
|
"loss": 0.8952,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.03599336964243429,
|
|
"grad_norm": 0.07274197409833637,
|
|
"learning_rate": 0.00010795454545454545,
|
|
"loss": 0.8884,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.03694056358039308,
|
|
"grad_norm": 0.06566094578471512,
|
|
"learning_rate": 0.00011079545454545454,
|
|
"loss": 0.8457,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.03788775751835188,
|
|
"grad_norm": 0.08307007406251704,
|
|
"learning_rate": 0.00011363636363636362,
|
|
"loss": 0.8723,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.038834951456310676,
|
|
"grad_norm": 0.08535904339597694,
|
|
"learning_rate": 0.00011647727272727271,
|
|
"loss": 0.9134,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.039782145394269476,
|
|
"grad_norm": 0.06790982585326019,
|
|
"learning_rate": 0.0001193181818181818,
|
|
"loss": 0.888,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.040729339332228276,
|
|
"grad_norm": 0.07237225374916947,
|
|
"learning_rate": 0.0001221590909090909,
|
|
"loss": 0.8629,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.04167653327018707,
|
|
"grad_norm": 0.07663584947407301,
|
|
"learning_rate": 0.000125,
|
|
"loss": 0.8979,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.04262372720814587,
|
|
"grad_norm": 0.06530101929213837,
|
|
"learning_rate": 0.00012784090909090907,
|
|
"loss": 0.893,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.04357092114610466,
|
|
"grad_norm": 0.0750675120954495,
|
|
"learning_rate": 0.00013068181818181817,
|
|
"loss": 0.8826,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.04451811508406346,
|
|
"grad_norm": 0.06833860955837964,
|
|
"learning_rate": 0.00013352272727272727,
|
|
"loss": 0.9042,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.04546530902202226,
|
|
"grad_norm": 0.06776526573633952,
|
|
"learning_rate": 0.00013636363636363634,
|
|
"loss": 0.8771,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.046412502959981054,
|
|
"grad_norm": 0.06177830383391277,
|
|
"learning_rate": 0.00013920454545454544,
|
|
"loss": 0.8735,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.047359696897939854,
|
|
"grad_norm": 0.06679265204109729,
|
|
"learning_rate": 0.00014204545454545454,
|
|
"loss": 0.8705,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.04830689083589865,
|
|
"grad_norm": 0.06611716647725416,
|
|
"learning_rate": 0.00014488636363636364,
|
|
"loss": 0.8716,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.04925408477385745,
|
|
"grad_norm": 0.06159220932412366,
|
|
"learning_rate": 0.0001477272727272727,
|
|
"loss": 0.8713,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.05020127871181625,
|
|
"grad_norm": 0.06823008150239136,
|
|
"learning_rate": 0.00015056818181818183,
|
|
"loss": 0.8969,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.05114847264977504,
|
|
"grad_norm": 0.0703778333367766,
|
|
"learning_rate": 0.0001534090909090909,
|
|
"loss": 0.8687,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.05209566658773384,
|
|
"grad_norm": 0.06489353446808296,
|
|
"learning_rate": 0.00015625,
|
|
"loss": 0.8639,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.05304286052569263,
|
|
"grad_norm": 0.06986694992701606,
|
|
"learning_rate": 0.00015909090909090907,
|
|
"loss": 0.8666,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.05399005446365143,
|
|
"grad_norm": 0.06723519153451205,
|
|
"learning_rate": 0.00016193181818181817,
|
|
"loss": 0.8764,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.05493724840161023,
|
|
"grad_norm": 0.07077673680927957,
|
|
"learning_rate": 0.00016477272727272724,
|
|
"loss": 0.8618,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.055884442339569025,
|
|
"grad_norm": 0.06891807376211805,
|
|
"learning_rate": 0.00016761363636363634,
|
|
"loss": 0.8745,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.056831636277527825,
|
|
"grad_norm": 0.06841751882175513,
|
|
"learning_rate": 0.00017045454545454547,
|
|
"loss": 0.8461,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.05777883021548662,
|
|
"grad_norm": 0.07176128138475886,
|
|
"learning_rate": 0.00017329545454545454,
|
|
"loss": 0.8667,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.05872602415344542,
|
|
"grad_norm": 0.07290498390552146,
|
|
"learning_rate": 0.00017613636363636364,
|
|
"loss": 0.8739,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.05967321809140422,
|
|
"grad_norm": 0.06773178575121884,
|
|
"learning_rate": 0.0001789772727272727,
|
|
"loss": 0.8871,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.06062041202936301,
|
|
"grad_norm": 0.0678345156257685,
|
|
"learning_rate": 0.0001818181818181818,
|
|
"loss": 0.8677,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.06156760596732181,
|
|
"grad_norm": 0.07520267059018662,
|
|
"learning_rate": 0.00018465909090909088,
|
|
"loss": 0.8762,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.06251479990528061,
|
|
"grad_norm": 0.06412335985278948,
|
|
"learning_rate": 0.00018749999999999998,
|
|
"loss": 0.8935,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.0634619938432394,
|
|
"grad_norm": 0.09455831472598197,
|
|
"learning_rate": 0.00019034090909090908,
|
|
"loss": 0.8799,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.0644091877811982,
|
|
"grad_norm": 0.08784917380610333,
|
|
"learning_rate": 0.00019318181818181815,
|
|
"loss": 0.8622,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.065356381719157,
|
|
"grad_norm": 0.0706205634228429,
|
|
"learning_rate": 0.00019602272727272727,
|
|
"loss": 0.8574,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.0663035756571158,
|
|
"grad_norm": 0.0647307909886003,
|
|
"learning_rate": 0.00019886363636363634,
|
|
"loss": 0.8542,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.06725076959507459,
|
|
"grad_norm": 0.06150113901124715,
|
|
"learning_rate": 0.00020170454545454544,
|
|
"loss": 0.8407,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.06819796353303338,
|
|
"grad_norm": 0.06257476247931755,
|
|
"learning_rate": 0.0002045454545454545,
|
|
"loss": 0.8564,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.06914515747099219,
|
|
"grad_norm": 0.06213175384030445,
|
|
"learning_rate": 0.0002073863636363636,
|
|
"loss": 0.8481,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.07009235140895098,
|
|
"grad_norm": 0.07243770022823882,
|
|
"learning_rate": 0.0002102272727272727,
|
|
"loss": 0.8742,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.07103954534690977,
|
|
"grad_norm": 0.06094029025046222,
|
|
"learning_rate": 0.00021306818181818178,
|
|
"loss": 0.8686,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.07198673928486858,
|
|
"grad_norm": 0.07211978873607085,
|
|
"learning_rate": 0.0002159090909090909,
|
|
"loss": 0.8404,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.07293393322282737,
|
|
"grad_norm": 0.0664949706014046,
|
|
"learning_rate": 0.00021874999999999998,
|
|
"loss": 0.8767,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.07388112716078617,
|
|
"grad_norm": 0.07048102073887626,
|
|
"learning_rate": 0.00022159090909090908,
|
|
"loss": 0.8662,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.07482832109874497,
|
|
"grad_norm": 0.06550092590742955,
|
|
"learning_rate": 0.00022443181818181815,
|
|
"loss": 0.8546,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.07577551503670377,
|
|
"grad_norm": 0.07566899187849191,
|
|
"learning_rate": 0.00022727272727272725,
|
|
"loss": 0.8551,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.07672270897466256,
|
|
"grad_norm": 0.06303208036750815,
|
|
"learning_rate": 0.00023011363636363634,
|
|
"loss": 0.8699,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.07766990291262135,
|
|
"grad_norm": 0.06875716346372687,
|
|
"learning_rate": 0.00023295454545454542,
|
|
"loss": 0.8627,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.07861709685058016,
|
|
"grad_norm": 0.08595111194659674,
|
|
"learning_rate": 0.00023579545454545454,
|
|
"loss": 0.8834,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.07956429078853895,
|
|
"grad_norm": 0.061252177097668066,
|
|
"learning_rate": 0.0002386363636363636,
|
|
"loss": 0.8589,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.08051148472649775,
|
|
"grad_norm": 0.06674992779765852,
|
|
"learning_rate": 0.0002414772727272727,
|
|
"loss": 0.8807,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.08145867866445655,
|
|
"grad_norm": 0.06735600365269058,
|
|
"learning_rate": 0.0002443181818181818,
|
|
"loss": 0.8632,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.08240587260241534,
|
|
"grad_norm": 0.06263588681026308,
|
|
"learning_rate": 0.0002471590909090909,
|
|
"loss": 0.9034,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.08335306654037414,
|
|
"grad_norm": 0.064198353107829,
|
|
"learning_rate": 0.00025,
|
|
"loss": 0.9008,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.08430026047833294,
|
|
"grad_norm": 0.06187143454881962,
|
|
"learning_rate": 0.00025284090909090905,
|
|
"loss": 0.8421,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.08524745441629174,
|
|
"grad_norm": 0.05826341458036729,
|
|
"learning_rate": 0.00025568181818181815,
|
|
"loss": 0.8654,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.08619464835425053,
|
|
"grad_norm": 0.06153983398074908,
|
|
"learning_rate": 0.00025852272727272725,
|
|
"loss": 0.8345,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.08714184229220932,
|
|
"grad_norm": 0.057544439891252096,
|
|
"learning_rate": 0.00026136363636363634,
|
|
"loss": 0.8474,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.08808903623016813,
|
|
"grad_norm": 0.05386176310877567,
|
|
"learning_rate": 0.00026420454545454544,
|
|
"loss": 0.8449,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.08903623016812692,
|
|
"grad_norm": 0.05733131738608226,
|
|
"learning_rate": 0.00026704545454545454,
|
|
"loss": 0.8557,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.08998342410608572,
|
|
"grad_norm": 0.05468199774083347,
|
|
"learning_rate": 0.00026988636363636364,
|
|
"loss": 0.8738,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.09093061804404452,
|
|
"grad_norm": 0.0732652071369859,
|
|
"learning_rate": 0.0002727272727272727,
|
|
"loss": 0.8764,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.09187781198200332,
|
|
"grad_norm": 0.06609967510300549,
|
|
"learning_rate": 0.0002755681818181818,
|
|
"loss": 0.8695,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.09282500591996211,
|
|
"grad_norm": 0.06205800779765995,
|
|
"learning_rate": 0.0002784090909090909,
|
|
"loss": 0.8616,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.09377219985792092,
|
|
"grad_norm": 0.05861483996354783,
|
|
"learning_rate": 0.00028125,
|
|
"loss": 0.8701,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.09471939379587971,
|
|
"grad_norm": 0.060736665872329516,
|
|
"learning_rate": 0.0002840909090909091,
|
|
"loss": 0.8947,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.0956665877338385,
|
|
"grad_norm": 0.06142593731376609,
|
|
"learning_rate": 0.0002869318181818182,
|
|
"loss": 0.8526,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.0966137816717973,
|
|
"grad_norm": 0.054392069599614346,
|
|
"learning_rate": 0.0002897727272727273,
|
|
"loss": 0.8466,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.0975609756097561,
|
|
"grad_norm": 0.06371556067004604,
|
|
"learning_rate": 0.0002926136363636363,
|
|
"loss": 0.8121,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.0985081695477149,
|
|
"grad_norm": 0.06255544482298064,
|
|
"learning_rate": 0.0002954545454545454,
|
|
"loss": 0.8398,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.09945536348567369,
|
|
"grad_norm": 0.05906918417826802,
|
|
"learning_rate": 0.0002982954545454545,
|
|
"loss": 0.8763,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.1004025574236325,
|
|
"grad_norm": 0.05701807064591476,
|
|
"learning_rate": 0.0002999998687698221,
|
|
"loss": 0.8712,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.10134975136159129,
|
|
"grad_norm": 0.061038500218446035,
|
|
"learning_rate": 0.00029999839243295787,
|
|
"loss": 0.8712,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.10229694529955008,
|
|
"grad_norm": 0.056276485767051056,
|
|
"learning_rate": 0.0002999952757377059,
|
|
"loss": 0.8761,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.10324413923750889,
|
|
"grad_norm": 0.06244892878431511,
|
|
"learning_rate": 0.00029999051871814974,
|
|
"loss": 0.8711,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.10419133317546768,
|
|
"grad_norm": 0.05743365231512095,
|
|
"learning_rate": 0.0002999841214263116,
|
|
"loss": 0.8457,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.10513852711342647,
|
|
"grad_norm": 0.06440086459939007,
|
|
"learning_rate": 0.000299976083932151,
|
|
"loss": 0.8994,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.10608572105138526,
|
|
"grad_norm": 0.056927632875731916,
|
|
"learning_rate": 0.0002999664063235649,
|
|
"loss": 0.841,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.10703291498934407,
|
|
"grad_norm": 0.0583247436509342,
|
|
"learning_rate": 0.00029995508870638596,
|
|
"loss": 0.8765,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.10798010892730286,
|
|
"grad_norm": 0.04974164281813745,
|
|
"learning_rate": 0.00029994213120438187,
|
|
"loss": 0.8429,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.10892730286526166,
|
|
"grad_norm": 0.06125862188074254,
|
|
"learning_rate": 0.0002999275339592538,
|
|
"loss": 0.8935,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.10987449680322046,
|
|
"grad_norm": 0.059759664506794964,
|
|
"learning_rate": 0.0002999112971306348,
|
|
"loss": 0.869,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.11082169074117926,
|
|
"grad_norm": 0.06328973302665653,
|
|
"learning_rate": 0.00029989342089608835,
|
|
"loss": 0.852,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.11176888467913805,
|
|
"grad_norm": 0.060243737818932684,
|
|
"learning_rate": 0.00029987390545110605,
|
|
"loss": 0.857,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.11271607861709686,
|
|
"grad_norm": 0.055808252373465206,
|
|
"learning_rate": 0.0002998527510091056,
|
|
"loss": 0.8774,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.11366327255505565,
|
|
"grad_norm": 0.06324311731152125,
|
|
"learning_rate": 0.0002998299578014287,
|
|
"loss": 0.8726,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.11461046649301444,
|
|
"grad_norm": 0.05610986410453152,
|
|
"learning_rate": 0.0002998055260773381,
|
|
"loss": 0.8589,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.11555766043097324,
|
|
"grad_norm": 0.04965479001175545,
|
|
"learning_rate": 0.0002997794561040153,
|
|
"loss": 0.8383,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.11650485436893204,
|
|
"grad_norm": 0.05064790996842586,
|
|
"learning_rate": 0.00029975174816655736,
|
|
"loss": 0.8524,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.11745204830689084,
|
|
"grad_norm": 0.052969432132795244,
|
|
"learning_rate": 0.00029972240256797384,
|
|
"loss": 0.8848,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.11839924224484963,
|
|
"grad_norm": 0.04987789018051327,
|
|
"learning_rate": 0.0002996914196291835,
|
|
"loss": 0.8579,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.11934643618280844,
|
|
"grad_norm": 0.05418008688918587,
|
|
"learning_rate": 0.0002996587996890107,
|
|
"loss": 0.9321,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.12029363012076723,
|
|
"grad_norm": 0.057606406769674844,
|
|
"learning_rate": 0.000299624543104182,
|
|
"loss": 0.864,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.12124082405872602,
|
|
"grad_norm": 0.050896712551105165,
|
|
"learning_rate": 0.0002995886502493219,
|
|
"loss": 0.8508,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.12218801799668483,
|
|
"grad_norm": 0.05856562263921288,
|
|
"learning_rate": 0.00029955112151694885,
|
|
"loss": 0.8557,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.12313521193464362,
|
|
"grad_norm": 0.056397198481637226,
|
|
"learning_rate": 0.00029951195731747114,
|
|
"loss": 0.8763,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.12408240587260241,
|
|
"grad_norm": 0.06021365518577683,
|
|
"learning_rate": 0.00029947115807918217,
|
|
"loss": 0.8691,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.12502959981056122,
|
|
"grad_norm": 0.049632655250353507,
|
|
"learning_rate": 0.0002994287242482558,
|
|
"loss": 0.8593,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.12597679374852,
|
|
"grad_norm": 0.04958168029549867,
|
|
"learning_rate": 0.00029938465628874165,
|
|
"loss": 0.8591,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.1269239876864788,
|
|
"grad_norm": 0.05757210103501373,
|
|
"learning_rate": 0.00029933895468255985,
|
|
"loss": 0.8402,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.1278711816244376,
|
|
"grad_norm": 0.06892314154494911,
|
|
"learning_rate": 0.0002992916199294959,
|
|
"loss": 0.8689,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.1288183755623964,
|
|
"grad_norm": 0.049748765290627474,
|
|
"learning_rate": 0.000299242652547195,
|
|
"loss": 0.8486,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.1297655695003552,
|
|
"grad_norm": 0.054839939836728246,
|
|
"learning_rate": 0.0002991920530711566,
|
|
"loss": 0.8673,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.130712763438314,
|
|
"grad_norm": 0.05978086373502768,
|
|
"learning_rate": 0.00029913982205472857,
|
|
"loss": 0.8608,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.13165995737627278,
|
|
"grad_norm": 0.052502322799696084,
|
|
"learning_rate": 0.0002990859600691008,
|
|
"loss": 0.8613,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.1326071513142316,
|
|
"grad_norm": 0.05054364592204903,
|
|
"learning_rate": 0.0002990304677032994,
|
|
"loss": 0.8746,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.1335543452521904,
|
|
"grad_norm": 0.05172804041468556,
|
|
"learning_rate": 0.00029897334556418004,
|
|
"loss": 0.8256,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.13450153919014918,
|
|
"grad_norm": 0.05101864907200138,
|
|
"learning_rate": 0.0002989145942764212,
|
|
"loss": 0.8655,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.13544873312810798,
|
|
"grad_norm": 0.05509456096234295,
|
|
"learning_rate": 0.0002988542144825176,
|
|
"loss": 0.8692,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.13639592706606676,
|
|
"grad_norm": 0.049333296450028125,
|
|
"learning_rate": 0.000298792206842773,
|
|
"loss": 0.8572,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.13734312100402557,
|
|
"grad_norm": 0.059122961216738656,
|
|
"learning_rate": 0.0002987285720352929,
|
|
"loss": 0.8735,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.13829031494198438,
|
|
"grad_norm": 0.050375990420733686,
|
|
"learning_rate": 0.0002986633107559775,
|
|
"loss": 0.82,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.13923750887994316,
|
|
"grad_norm": 0.0496346403585563,
|
|
"learning_rate": 0.0002985964237185136,
|
|
"loss": 0.8467,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.14018470281790196,
|
|
"grad_norm": 0.053630745330135815,
|
|
"learning_rate": 0.00029852791165436716,
|
|
"loss": 0.8858,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.14113189675586077,
|
|
"grad_norm": 0.055544816267542034,
|
|
"learning_rate": 0.0002984577753127752,
|
|
"loss": 0.8707,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.14207909069381955,
|
|
"grad_norm": 0.04983399501220757,
|
|
"learning_rate": 0.00029838601546073744,
|
|
"loss": 0.846,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.14302628463177836,
|
|
"grad_norm": 0.05531741725439223,
|
|
"learning_rate": 0.00029831263288300817,
|
|
"loss": 0.8716,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.14397347856973716,
|
|
"grad_norm": 0.05305914413910715,
|
|
"learning_rate": 0.00029823762838208744,
|
|
"loss": 0.8694,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.14492067250769594,
|
|
"grad_norm": 0.063066365795915,
|
|
"learning_rate": 0.00029816100277821247,
|
|
"loss": 0.8575,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.14586786644565475,
|
|
"grad_norm": 0.052014222449902975,
|
|
"learning_rate": 0.00029808275690934864,
|
|
"loss": 0.8553,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.14681506038361355,
|
|
"grad_norm": 0.05627981583655042,
|
|
"learning_rate": 0.00029800289163118014,
|
|
"loss": 0.8491,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.14776225432157233,
|
|
"grad_norm": 0.048744553655055825,
|
|
"learning_rate": 0.00029792140781710103,
|
|
"loss": 0.8597,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.14870944825953114,
|
|
"grad_norm": 0.07103922836346321,
|
|
"learning_rate": 0.00029783830635820506,
|
|
"loss": 0.8685,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.14965664219748995,
|
|
"grad_norm": 0.05572062793930259,
|
|
"learning_rate": 0.0002977535881632766,
|
|
"loss": 0.8144,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.15060383613544873,
|
|
"grad_norm": 0.047330412499616226,
|
|
"learning_rate": 0.00029766725415878017,
|
|
"loss": 0.8353,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.15155103007340753,
|
|
"grad_norm": 0.05324033535511291,
|
|
"learning_rate": 0.00029757930528885064,
|
|
"loss": 0.8411,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.15249822401136634,
|
|
"grad_norm": 0.05513304510602078,
|
|
"learning_rate": 0.0002974897425152828,
|
|
"loss": 0.8809,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.15344541794932512,
|
|
"grad_norm": 0.052990990094527124,
|
|
"learning_rate": 0.0002973985668175207,
|
|
"loss": 0.8608,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.15439261188728393,
|
|
"grad_norm": 0.05324896337561592,
|
|
"learning_rate": 0.0002973057791926473,
|
|
"loss": 0.8458,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.1553398058252427,
|
|
"grad_norm": 0.05276687776977392,
|
|
"learning_rate": 0.000297211380655373,
|
|
"loss": 0.8697,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.1562869997632015,
|
|
"grad_norm": 0.052354949797073405,
|
|
"learning_rate": 0.0002971153722380253,
|
|
"loss": 0.8507,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.15723419370116032,
|
|
"grad_norm": 0.049368244945149506,
|
|
"learning_rate": 0.0002970177549905368,
|
|
"loss": 0.8403,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.1581813876391191,
|
|
"grad_norm": 0.046532042464774784,
|
|
"learning_rate": 0.00029691852998043396,
|
|
"loss": 0.8552,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.1591285815770779,
|
|
"grad_norm": 0.04876609183561892,
|
|
"learning_rate": 0.00029681769829282574,
|
|
"loss": 0.8479,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.1600757755150367,
|
|
"grad_norm": 0.059730813463699885,
|
|
"learning_rate": 0.0002967152610303913,
|
|
"loss": 0.8545,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.1610229694529955,
|
|
"grad_norm": 0.055750160324234604,
|
|
"learning_rate": 0.00029661121931336804,
|
|
"loss": 0.8504,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.1619701633909543,
|
|
"grad_norm": 0.0528593038524647,
|
|
"learning_rate": 0.0002965055742795395,
|
|
"loss": 0.8814,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.1629173573289131,
|
|
"grad_norm": 0.05558247953451502,
|
|
"learning_rate": 0.000296398327084223,
|
|
"loss": 0.85,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.16386455126687188,
|
|
"grad_norm": 0.06143605437889347,
|
|
"learning_rate": 0.00029628947890025656,
|
|
"loss": 0.8561,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.1648117452048307,
|
|
"grad_norm": 0.05186425147430888,
|
|
"learning_rate": 0.0002961790309179866,
|
|
"loss": 0.8393,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.1657589391427895,
|
|
"grad_norm": 0.047149331325217335,
|
|
"learning_rate": 0.00029606698434525434,
|
|
"loss": 0.8668,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.16670613308074828,
|
|
"grad_norm": 0.048689384932807,
|
|
"learning_rate": 0.00029595334040738333,
|
|
"loss": 0.8374,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.16765332701870708,
|
|
"grad_norm": 0.053510975406836386,
|
|
"learning_rate": 0.00029583810034716545,
|
|
"loss": 0.8491,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.1686005209566659,
|
|
"grad_norm": 0.05595964353451741,
|
|
"learning_rate": 0.00029572126542484745,
|
|
"loss": 0.8727,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.16954771489462467,
|
|
"grad_norm": 0.055885278431375376,
|
|
"learning_rate": 0.0002956028369181174,
|
|
"loss": 0.882,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.17049490883258347,
|
|
"grad_norm": 0.047842403175001005,
|
|
"learning_rate": 0.00029548281612209044,
|
|
"loss": 0.8682,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.17144210277054228,
|
|
"grad_norm": 0.058823537208354766,
|
|
"learning_rate": 0.00029536120434929476,
|
|
"loss": 0.8373,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.17238929670850106,
|
|
"grad_norm": 0.05444610376517603,
|
|
"learning_rate": 0.00029523800292965724,
|
|
"loss": 0.8783,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.17333649064645987,
|
|
"grad_norm": 0.054957105759307595,
|
|
"learning_rate": 0.00029511321321048893,
|
|
"loss": 0.843,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.17428368458441865,
|
|
"grad_norm": 0.06583345091917218,
|
|
"learning_rate": 0.0002949868365564701,
|
|
"loss": 0.8504,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.17523087852237745,
|
|
"grad_norm": 0.04777073198426105,
|
|
"learning_rate": 0.00029485887434963566,
|
|
"loss": 0.8298,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.17617807246033626,
|
|
"grad_norm": 0.05562673540582162,
|
|
"learning_rate": 0.00029472932798935977,
|
|
"loss": 0.8418,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.17712526639829504,
|
|
"grad_norm": 0.04785779459273509,
|
|
"learning_rate": 0.0002945981988923406,
|
|
"loss": 0.8328,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.17807246033625385,
|
|
"grad_norm": 0.05554074332095169,
|
|
"learning_rate": 0.00029446548849258513,
|
|
"loss": 0.8279,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.17901965427421265,
|
|
"grad_norm": 0.046624447216736774,
|
|
"learning_rate": 0.00029433119824139286,
|
|
"loss": 0.8494,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.17996684821217143,
|
|
"grad_norm": 0.051194260228541774,
|
|
"learning_rate": 0.0002941953296073405,
|
|
"loss": 0.8594,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.18091404215013024,
|
|
"grad_norm": 0.05090727374729561,
|
|
"learning_rate": 0.0002940578840762658,
|
|
"loss": 0.8422,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.18186123608808905,
|
|
"grad_norm": 0.04639853886400584,
|
|
"learning_rate": 0.00029391886315125083,
|
|
"loss": 0.8344,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.18280843002604782,
|
|
"grad_norm": 0.056061389000908554,
|
|
"learning_rate": 0.0002937782683526063,
|
|
"loss": 0.8131,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.18375562396400663,
|
|
"grad_norm": 0.04702100386110801,
|
|
"learning_rate": 0.00029363610121785447,
|
|
"loss": 0.8141,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.18470281790196544,
|
|
"grad_norm": 0.04844408584935392,
|
|
"learning_rate": 0.00029349236330171224,
|
|
"loss": 0.8149,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.18565001183992422,
|
|
"grad_norm": 0.048586646278994214,
|
|
"learning_rate": 0.0002933470561760744,
|
|
"loss": 0.8723,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.18659720577788302,
|
|
"grad_norm": 0.0510188898302652,
|
|
"learning_rate": 0.00029320018142999643,
|
|
"loss": 0.8319,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.18754439971584183,
|
|
"grad_norm": 0.04664479134380501,
|
|
"learning_rate": 0.0002930517406696771,
|
|
"loss": 0.8425,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.1884915936538006,
|
|
"grad_norm": 0.04274961179387165,
|
|
"learning_rate": 0.0002929017355184407,
|
|
"loss": 0.8252,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.18943878759175942,
|
|
"grad_norm": 0.05336071927501094,
|
|
"learning_rate": 0.00029275016761671954,
|
|
"loss": 0.8343,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.19038598152971822,
|
|
"grad_norm": 0.0732142750483461,
|
|
"learning_rate": 0.00029259703862203587,
|
|
"loss": 0.8305,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.191333175467677,
|
|
"grad_norm": 0.059075683989297266,
|
|
"learning_rate": 0.00029244235020898395,
|
|
"loss": 0.8487,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.1922803694056358,
|
|
"grad_norm": 0.04650016716241106,
|
|
"learning_rate": 0.0002922861040692115,
|
|
"loss": 0.8583,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.1932275633435946,
|
|
"grad_norm": 0.04767602957865441,
|
|
"learning_rate": 0.0002921283019114011,
|
|
"loss": 0.8496,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.1941747572815534,
|
|
"grad_norm": 0.05644985578762857,
|
|
"learning_rate": 0.00029196894546125197,
|
|
"loss": 0.8429,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.1951219512195122,
|
|
"grad_norm": 0.049699082298498856,
|
|
"learning_rate": 0.0002918080364614607,
|
|
"loss": 0.8121,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.19606914515747098,
|
|
"grad_norm": 0.05678361846269368,
|
|
"learning_rate": 0.0002916455766717024,
|
|
"loss": 0.831,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.1970163390954298,
|
|
"grad_norm": 0.05634582924907757,
|
|
"learning_rate": 0.00029148156786861125,
|
|
"loss": 0.8411,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.1979635330333886,
|
|
"grad_norm": 0.044578510424992744,
|
|
"learning_rate": 0.0002913160118457612,
|
|
"loss": 0.8163,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.19891072697134737,
|
|
"grad_norm": 0.049243336108268164,
|
|
"learning_rate": 0.00029114891041364646,
|
|
"loss": 0.8651,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.19985792090930618,
|
|
"grad_norm": 0.05392438131527857,
|
|
"learning_rate": 0.00029098026539966143,
|
|
"loss": 0.8304,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.200805114847265,
|
|
"grad_norm": 0.05004934480263781,
|
|
"learning_rate": 0.0002908100786480811,
|
|
"loss": 0.8686,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.20175230878522377,
|
|
"grad_norm": 0.04733082301433883,
|
|
"learning_rate": 0.00029063835202004036,
|
|
"loss": 0.8346,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.20269950272318257,
|
|
"grad_norm": 0.05783680843141235,
|
|
"learning_rate": 0.0002904650873935143,
|
|
"loss": 0.8312,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.20364669666114138,
|
|
"grad_norm": 0.04565338494016318,
|
|
"learning_rate": 0.0002902902866632969,
|
|
"loss": 0.8595,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.20459389059910016,
|
|
"grad_norm": 0.050858909948530574,
|
|
"learning_rate": 0.0002901139517409811,
|
|
"loss": 0.8642,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.20554108453705897,
|
|
"grad_norm": 0.05066970549535419,
|
|
"learning_rate": 0.0002899360845549373,
|
|
"loss": 0.8342,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.20648827847501777,
|
|
"grad_norm": 0.04679512262177614,
|
|
"learning_rate": 0.0002897566870502925,
|
|
"loss": 0.8306,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.20743547241297655,
|
|
"grad_norm": 0.05217676490462024,
|
|
"learning_rate": 0.00028957576118890914,
|
|
"loss": 0.8225,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.20838266635093536,
|
|
"grad_norm": 0.05437239079138474,
|
|
"learning_rate": 0.0002893933089493635,
|
|
"loss": 0.8553,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.20932986028889414,
|
|
"grad_norm": 0.0509435046538677,
|
|
"learning_rate": 0.00028920933232692386,
|
|
"loss": 0.8086,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.21027705422685294,
|
|
"grad_norm": 0.044222391512642414,
|
|
"learning_rate": 0.00028902383333352926,
|
|
"loss": 0.8412,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.21122424816481175,
|
|
"grad_norm": 0.04967664422872056,
|
|
"learning_rate": 0.0002888368139977669,
|
|
"loss": 0.8506,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.21217144210277053,
|
|
"grad_norm": 0.0465634897763295,
|
|
"learning_rate": 0.0002886482763648503,
|
|
"loss": 0.8217,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.21311863604072934,
|
|
"grad_norm": 0.05052365351929177,
|
|
"learning_rate": 0.0002884582224965968,
|
|
"loss": 0.8332,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.21406582997868814,
|
|
"grad_norm": 0.05180493973251838,
|
|
"learning_rate": 0.000288266654471405,
|
|
"loss": 0.8347,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.21501302391664692,
|
|
"grad_norm": 0.0471780634040616,
|
|
"learning_rate": 0.0002880735743842322,
|
|
"loss": 0.8366,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.21596021785460573,
|
|
"grad_norm": 0.049108577250984026,
|
|
"learning_rate": 0.0002878789843465713,
|
|
"loss": 0.8362,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.21690741179256454,
|
|
"grad_norm": 0.05564424373715666,
|
|
"learning_rate": 0.0002876828864864277,
|
|
"loss": 0.8514,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.21785460573052332,
|
|
"grad_norm": 0.0564159881319112,
|
|
"learning_rate": 0.0002874852829482963,
|
|
"loss": 0.8723,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.21880179966848212,
|
|
"grad_norm": 0.051069627553698455,
|
|
"learning_rate": 0.0002872861758931376,
|
|
"loss": 0.851,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.21974899360644093,
|
|
"grad_norm": 0.05117455703332741,
|
|
"learning_rate": 0.00028708556749835454,
|
|
"loss": 0.8434,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.2206961875443997,
|
|
"grad_norm": 0.04934525970498773,
|
|
"learning_rate": 0.0002868834599577684,
|
|
"loss": 0.841,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.22164338148235851,
|
|
"grad_norm": 0.05894722590210803,
|
|
"learning_rate": 0.0002866798554815948,
|
|
"loss": 0.8458,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.22259057542031732,
|
|
"grad_norm": 0.05630861952888565,
|
|
"learning_rate": 0.0002864747562964197,
|
|
"loss": 0.8343,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.2235377693582761,
|
|
"grad_norm": 0.045749561983576756,
|
|
"learning_rate": 0.000286268164645175,
|
|
"loss": 0.8421,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.2244849632962349,
|
|
"grad_norm": 0.04499091282837436,
|
|
"learning_rate": 0.00028606008278711373,
|
|
"loss": 0.8397,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.22543215723419371,
|
|
"grad_norm": 0.042402412945632365,
|
|
"learning_rate": 0.00028585051299778594,
|
|
"loss": 0.8061,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.2263793511721525,
|
|
"grad_norm": 0.046372304084499535,
|
|
"learning_rate": 0.00028563945756901314,
|
|
"loss": 0.8514,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.2273265451101113,
|
|
"grad_norm": 0.050066239277336104,
|
|
"learning_rate": 0.00028542691880886376,
|
|
"loss": 0.8473,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.22827373904807008,
|
|
"grad_norm": 0.04750268427944095,
|
|
"learning_rate": 0.0002852128990416275,
|
|
"loss": 0.8155,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.22922093298602889,
|
|
"grad_norm": 0.04448032581142824,
|
|
"learning_rate": 0.0002849974006077904,
|
|
"loss": 0.8462,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.2301681269239877,
|
|
"grad_norm": 0.045988358494773375,
|
|
"learning_rate": 0.00028478042586400876,
|
|
"loss": 0.8139,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.23111532086194647,
|
|
"grad_norm": 0.05150874461710223,
|
|
"learning_rate": 0.00028456197718308365,
|
|
"loss": 0.8511,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.23206251479990528,
|
|
"grad_norm": 0.04407477102954397,
|
|
"learning_rate": 0.00028434205695393477,
|
|
"loss": 0.8374,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.23300970873786409,
|
|
"grad_norm": 0.04739621311473698,
|
|
"learning_rate": 0.0002841206675815745,
|
|
"loss": 0.8126,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.23395690267582286,
|
|
"grad_norm": 0.04187717815582618,
|
|
"learning_rate": 0.0002838978114870816,
|
|
"loss": 0.8274,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.23490409661378167,
|
|
"grad_norm": 0.04177075006556251,
|
|
"learning_rate": 0.0002836734911075746,
|
|
"loss": 0.8168,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.23585129055174048,
|
|
"grad_norm": 0.045942488951301354,
|
|
"learning_rate": 0.0002834477088961853,
|
|
"loss": 0.8054,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.23679848448969926,
|
|
"grad_norm": 0.04359836043504769,
|
|
"learning_rate": 0.00028322046732203165,
|
|
"loss": 0.8538,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.23774567842765806,
|
|
"grad_norm": 0.04439950975585717,
|
|
"learning_rate": 0.0002829917688701912,
|
|
"loss": 0.8352,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.23869287236561687,
|
|
"grad_norm": 0.06555071271570542,
|
|
"learning_rate": 0.00028276161604167354,
|
|
"loss": 0.8395,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.23964006630357565,
|
|
"grad_norm": 0.06429653649306202,
|
|
"learning_rate": 0.0002825300113533932,
|
|
"loss": 0.8639,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.24058726024153446,
|
|
"grad_norm": 0.050999285390159056,
|
|
"learning_rate": 0.0002822969573381418,
|
|
"loss": 0.8265,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.24153445417949326,
|
|
"grad_norm": 0.06883273146747126,
|
|
"learning_rate": 0.0002820624565445608,
|
|
"loss": 0.8505,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.24248164811745204,
|
|
"grad_norm": 0.08792312386360097,
|
|
"learning_rate": 0.00028182651153711334,
|
|
"loss": 0.8393,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.24342884205541085,
|
|
"grad_norm": 0.05834297565610207,
|
|
"learning_rate": 0.0002815891248960562,
|
|
"loss": 0.8198,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.24437603599336966,
|
|
"grad_norm": 0.05226579523827474,
|
|
"learning_rate": 0.0002813502992174116,
|
|
"loss": 0.8127,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.24532322993132843,
|
|
"grad_norm": 0.040640751080577776,
|
|
"learning_rate": 0.00028111003711293897,
|
|
"loss": 0.8068,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.24627042386928724,
|
|
"grad_norm": 0.04679354987985417,
|
|
"learning_rate": 0.00028086834121010616,
|
|
"loss": 0.8368,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.24721761780724602,
|
|
"grad_norm": 0.04499421671023844,
|
|
"learning_rate": 0.0002806252141520608,
|
|
"loss": 0.8492,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.24816481174520483,
|
|
"grad_norm": 0.5315442282070766,
|
|
"learning_rate": 0.00028038065859760147,
|
|
"loss": 0.8775,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.24911200568316363,
|
|
"grad_norm": 49.60479029682665,
|
|
"learning_rate": 0.0002801346772211486,
|
|
"loss": 14.7667,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.25005919962112244,
|
|
"grad_norm": 74.14171657249427,
|
|
"learning_rate": 0.000279887272712715,
|
|
"loss": 7.0642,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.2510063935590812,
|
|
"grad_norm": 0.9679325181066564,
|
|
"learning_rate": 0.00027963844777787687,
|
|
"loss": 2.8211,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.25195358749704,
|
|
"grad_norm": 1.8750749955853339,
|
|
"learning_rate": 0.0002793882051377437,
|
|
"loss": 2.5509,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.25290078143499883,
|
|
"grad_norm": 1.256663906662595,
|
|
"learning_rate": 0.00027913654752892897,
|
|
"loss": 1.6113,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.2538479753729576,
|
|
"grad_norm": 0.4517499508288906,
|
|
"learning_rate": 0.00027888347770352,
|
|
"loss": 1.3621,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.2547951693109164,
|
|
"grad_norm": 0.16403760364117914,
|
|
"learning_rate": 0.00027862899842904783,
|
|
"loss": 1.1522,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.2557423632488752,
|
|
"grad_norm": 0.15782986879922667,
|
|
"learning_rate": 0.00027837311248845697,
|
|
"loss": 1.0121,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.256689557186834,
|
|
"grad_norm": 0.07939626217767812,
|
|
"learning_rate": 0.00027811582268007516,
|
|
"loss": 0.9976,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.2576367511247928,
|
|
"grad_norm": 0.08824920573476226,
|
|
"learning_rate": 0.0002778571318175825,
|
|
"loss": 0.937,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.2585839450627516,
|
|
"grad_norm": 0.05529848573271428,
|
|
"learning_rate": 0.0002775970427299808,
|
|
"loss": 0.9259,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.2595311390007104,
|
|
"grad_norm": 0.05160154895612758,
|
|
"learning_rate": 0.00027733555826156266,
|
|
"loss": 0.932,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.2604783329386692,
|
|
"grad_norm": 0.6967363411222061,
|
|
"learning_rate": 0.00027707268127188033,
|
|
"loss": 0.941,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.261425526876628,
|
|
"grad_norm": 0.05931503868977922,
|
|
"learning_rate": 0.00027680841463571446,
|
|
"loss": 0.8775,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.2623727208145868,
|
|
"grad_norm": 0.07337997434432302,
|
|
"learning_rate": 0.0002765427612430426,
|
|
"loss": 0.887,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.26331991475254557,
|
|
"grad_norm": 0.053104858087786935,
|
|
"learning_rate": 0.00027627572399900775,
|
|
"loss": 0.8484,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.2642671086905044,
|
|
"grad_norm": 0.06163074713437679,
|
|
"learning_rate": 0.00027600730582388644,
|
|
"loss": 0.8812,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.2652143026284632,
|
|
"grad_norm": 0.05016009889342552,
|
|
"learning_rate": 0.00027573750965305676,
|
|
"loss": 0.8678,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.26616149656642196,
|
|
"grad_norm": 0.053793608113706266,
|
|
"learning_rate": 0.0002754663384369664,
|
|
"loss": 0.8421,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.2671086905043808,
|
|
"grad_norm": 0.054043702396785645,
|
|
"learning_rate": 0.0002751937951411005,
|
|
"loss": 0.8374,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.2680558844423396,
|
|
"grad_norm": 0.05993998463791112,
|
|
"learning_rate": 0.00027491988274594865,
|
|
"loss": 0.8521,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.26900307838029835,
|
|
"grad_norm": 0.05214973518061344,
|
|
"learning_rate": 0.00027464460424697304,
|
|
"loss": 0.8563,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.2699502723182572,
|
|
"grad_norm": 0.04937709748344037,
|
|
"learning_rate": 0.0002743679626545753,
|
|
"loss": 0.8611,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.27089746625621597,
|
|
"grad_norm": 0.04690788256444743,
|
|
"learning_rate": 0.0002740899609940634,
|
|
"loss": 0.8737,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.27184466019417475,
|
|
"grad_norm": 0.04458440143618464,
|
|
"learning_rate": 0.00027381060230561904,
|
|
"loss": 0.8393,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.2727918541321335,
|
|
"grad_norm": 0.044640210601826116,
|
|
"learning_rate": 0.0002735298896442641,
|
|
"loss": 0.8569,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.27373904807009236,
|
|
"grad_norm": 0.048448777510886915,
|
|
"learning_rate": 0.00027324782607982727,
|
|
"loss": 0.8348,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.27468624200805114,
|
|
"grad_norm": 0.0500357269022256,
|
|
"learning_rate": 0.0002729644146969104,
|
|
"loss": 0.8676,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.2756334359460099,
|
|
"grad_norm": 0.053604059700079344,
|
|
"learning_rate": 0.0002726796585948551,
|
|
"loss": 0.8495,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.27658062988396875,
|
|
"grad_norm": 0.061024238560798645,
|
|
"learning_rate": 0.00027239356088770846,
|
|
"loss": 0.84,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.27752782382192753,
|
|
"grad_norm": 0.05542695537862334,
|
|
"learning_rate": 0.0002721061247041891,
|
|
"loss": 0.8445,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.2784750177598863,
|
|
"grad_norm": 0.05174226319950172,
|
|
"learning_rate": 0.00027181735318765305,
|
|
"loss": 0.8239,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.27942221169784515,
|
|
"grad_norm": 0.0524720390301642,
|
|
"learning_rate": 0.0002715272494960594,
|
|
"loss": 0.8717,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.2803694056358039,
|
|
"grad_norm": 0.05078251806460792,
|
|
"learning_rate": 0.00027123581680193575,
|
|
"loss": 0.8776,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.2813165995737627,
|
|
"grad_norm": 0.0508208511214285,
|
|
"learning_rate": 0.0002709430582923432,
|
|
"loss": 0.8337,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.28226379351172154,
|
|
"grad_norm": 0.05428792173232585,
|
|
"learning_rate": 0.00027064897716884195,
|
|
"loss": 0.8331,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.2832109874496803,
|
|
"grad_norm": 0.04339480572676772,
|
|
"learning_rate": 0.0002703535766474561,
|
|
"loss": 0.8474,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.2841581813876391,
|
|
"grad_norm": 0.04801286233761752,
|
|
"learning_rate": 0.00027005685995863833,
|
|
"loss": 0.8538,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.28510537532559793,
|
|
"grad_norm": 0.05126840483010234,
|
|
"learning_rate": 0.00026975883034723486,
|
|
"loss": 0.8508,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.2860525692635567,
|
|
"grad_norm": 0.05632538740067954,
|
|
"learning_rate": 0.00026945949107244984,
|
|
"loss": 0.8239,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.2869997632015155,
|
|
"grad_norm": 0.05321785003972056,
|
|
"learning_rate": 0.0002691588454078095,
|
|
"loss": 0.809,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.2879469571394743,
|
|
"grad_norm": 0.05267212261846467,
|
|
"learning_rate": 0.00026885689664112673,
|
|
"loss": 0.8235,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.2888941510774331,
|
|
"grad_norm": 0.06239400176564442,
|
|
"learning_rate": 0.0002685536480744648,
|
|
"loss": 0.8336,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.2898413450153919,
|
|
"grad_norm": 0.05392065914950409,
|
|
"learning_rate": 0.0002682491030241016,
|
|
"loss": 0.8227,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.2907885389533507,
|
|
"grad_norm": 0.042067002075187244,
|
|
"learning_rate": 0.0002679432648204928,
|
|
"loss": 0.8336,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.2917357328913095,
|
|
"grad_norm": 0.04983258400363465,
|
|
"learning_rate": 0.0002676361368082362,
|
|
"loss": 0.7947,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.2926829268292683,
|
|
"grad_norm": 0.04869904064999139,
|
|
"learning_rate": 0.00026732772234603437,
|
|
"loss": 0.8127,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.2936301207672271,
|
|
"grad_norm": 0.05851898617300107,
|
|
"learning_rate": 0.00026701802480665857,
|
|
"loss": 0.8313,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.2945773147051859,
|
|
"grad_norm": 0.0552687029482635,
|
|
"learning_rate": 0.0002667070475769114,
|
|
"loss": 0.8049,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.29552450864314467,
|
|
"grad_norm": 0.06477348854060364,
|
|
"learning_rate": 0.00026639479405759006,
|
|
"loss": 0.83,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.2964717025811035,
|
|
"grad_norm": 0.04555333394215088,
|
|
"learning_rate": 0.000266081267663449,
|
|
"loss": 0.84,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.2974188965190623,
|
|
"grad_norm": 0.04197167864122965,
|
|
"learning_rate": 0.00026576647182316264,
|
|
"loss": 0.8192,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.29836609045702106,
|
|
"grad_norm": 0.15360351138708875,
|
|
"learning_rate": 0.00026545040997928785,
|
|
"loss": 0.8756,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.2993132843949799,
|
|
"grad_norm": 0.07002447455001899,
|
|
"learning_rate": 0.00026513308558822636,
|
|
"loss": 0.8182,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.3002604783329387,
|
|
"grad_norm": 0.05720839543286207,
|
|
"learning_rate": 0.0002648145021201868,
|
|
"loss": 0.8334,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.30120767227089745,
|
|
"grad_norm": 0.05201720217949488,
|
|
"learning_rate": 0.0002644946630591469,
|
|
"loss": 0.8494,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.3021548662088563,
|
|
"grad_norm": 0.04850167766978546,
|
|
"learning_rate": 0.0002641735719028155,
|
|
"loss": 0.8285,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.30310206014681507,
|
|
"grad_norm": 0.044133764645286594,
|
|
"learning_rate": 0.000263851232162594,
|
|
"loss": 0.8225,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.30404925408477385,
|
|
"grad_norm": 0.0488939501306738,
|
|
"learning_rate": 0.00026352764736353815,
|
|
"loss": 0.8395,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.3049964480227327,
|
|
"grad_norm": 0.044482887206502425,
|
|
"learning_rate": 0.0002632028210443194,
|
|
"loss": 0.8199,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.30594364196069146,
|
|
"grad_norm": 0.054188307478421044,
|
|
"learning_rate": 0.00026287675675718653,
|
|
"loss": 0.833,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.30689083589865024,
|
|
"grad_norm": 0.048205088442678685,
|
|
"learning_rate": 0.00026254945806792614,
|
|
"loss": 0.8287,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.3078380298366091,
|
|
"grad_norm": 0.0457249850341829,
|
|
"learning_rate": 0.0002622209285558244,
|
|
"loss": 0.8104,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.30878522377456785,
|
|
"grad_norm": 0.04764496488482527,
|
|
"learning_rate": 0.00026189117181362733,
|
|
"loss": 0.807,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.30973241771252663,
|
|
"grad_norm": 0.04577871219106504,
|
|
"learning_rate": 0.0002615601914475018,
|
|
"loss": 0.8387,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.3106796116504854,
|
|
"grad_norm": 0.0560487648361042,
|
|
"learning_rate": 0.0002612279910769962,
|
|
"loss": 0.8209,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.31162680558844424,
|
|
"grad_norm": 0.051106440022587435,
|
|
"learning_rate": 0.0002608945743350004,
|
|
"loss": 0.8066,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.312573999526403,
|
|
"grad_norm": 0.060741538352692886,
|
|
"learning_rate": 0.0002605599448677066,
|
|
"loss": 0.8258,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.3135211934643618,
|
|
"grad_norm": 0.0600167744118878,
|
|
"learning_rate": 0.000260224106334569,
|
|
"loss": 0.8174,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 0.31446838740232064,
|
|
"grad_norm": 0.05182728019691824,
|
|
"learning_rate": 0.000259887062408264,
|
|
"loss": 0.8379,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.3154155813402794,
|
|
"grad_norm": 0.048406806604266626,
|
|
"learning_rate": 0.00025954881677464994,
|
|
"loss": 0.8239,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 0.3163627752782382,
|
|
"grad_norm": 0.04614485855762265,
|
|
"learning_rate": 0.0002592093731327269,
|
|
"loss": 0.8328,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.31730996921619703,
|
|
"grad_norm": 0.04097208134051075,
|
|
"learning_rate": 0.0002588687351945962,
|
|
"loss": 0.8054,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.3182571631541558,
|
|
"grad_norm": 0.04859899532989667,
|
|
"learning_rate": 0.0002585269066854197,
|
|
"loss": 0.828,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.3192043570921146,
|
|
"grad_norm": 0.0514636005763012,
|
|
"learning_rate": 0.00025818389134337925,
|
|
"loss": 0.805,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 0.3201515510300734,
|
|
"grad_norm": 0.04510538821375225,
|
|
"learning_rate": 0.0002578396929196356,
|
|
"loss": 0.8296,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.3210987449680322,
|
|
"grad_norm": 0.04625807399119475,
|
|
"learning_rate": 0.00025749431517828775,
|
|
"loss": 0.8085,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 0.322045938905991,
|
|
"grad_norm": 0.04353062203420096,
|
|
"learning_rate": 0.0002571477618963311,
|
|
"loss": 0.8169,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.3229931328439498,
|
|
"grad_norm": 0.045940703703086845,
|
|
"learning_rate": 0.00025680003686361704,
|
|
"loss": 0.8337,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 0.3239403267819086,
|
|
"grad_norm": 0.047707507442658635,
|
|
"learning_rate": 0.00025645114388281066,
|
|
"loss": 0.8097,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.3248875207198674,
|
|
"grad_norm": 0.04488951827418563,
|
|
"learning_rate": 0.00025610108676934974,
|
|
"loss": 0.8296,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 0.3258347146578262,
|
|
"grad_norm": 0.05157676249590623,
|
|
"learning_rate": 0.00025574986935140287,
|
|
"loss": 0.832,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.326781908595785,
|
|
"grad_norm": 0.045481015370253376,
|
|
"learning_rate": 0.00025539749546982736,
|
|
"loss": 0.812,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.32772910253374377,
|
|
"grad_norm": 0.04671492664808977,
|
|
"learning_rate": 0.0002550439689781276,
|
|
"loss": 0.783,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.3286762964717026,
|
|
"grad_norm": 0.056706366227313135,
|
|
"learning_rate": 0.00025468929374241256,
|
|
"loss": 0.829,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 0.3296234904096614,
|
|
"grad_norm": 0.04717465337329956,
|
|
"learning_rate": 0.0002543334736413539,
|
|
"loss": 0.8482,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.33057068434762016,
|
|
"grad_norm": 0.048757000553929425,
|
|
"learning_rate": 0.0002539765125661432,
|
|
"loss": 0.807,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 0.331517878285579,
|
|
"grad_norm": 0.04444091877456076,
|
|
"learning_rate": 0.00025361841442044956,
|
|
"loss": 0.8321,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.33246507222353777,
|
|
"grad_norm": 0.04389186380472923,
|
|
"learning_rate": 0.00025325918312037697,
|
|
"loss": 0.806,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 0.33341226616149655,
|
|
"grad_norm": 0.0451220202476477,
|
|
"learning_rate": 0.0002528988225944214,
|
|
"loss": 0.8239,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.3343594600994554,
|
|
"grad_norm": 0.04940473146055937,
|
|
"learning_rate": 0.00025253733678342775,
|
|
"loss": 0.7978,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 0.33530665403741416,
|
|
"grad_norm": 0.04360662632042812,
|
|
"learning_rate": 0.000252174729640547,
|
|
"loss": 0.7936,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.33625384797537294,
|
|
"grad_norm": 0.04508266007873255,
|
|
"learning_rate": 0.0002518110051311927,
|
|
"loss": 0.8354,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.3372010419133318,
|
|
"grad_norm": 0.0447597919709376,
|
|
"learning_rate": 0.00025144616723299785,
|
|
"loss": 0.8128,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.33814823585129056,
|
|
"grad_norm": 0.04068239709765713,
|
|
"learning_rate": 0.0002510802199357713,
|
|
"loss": 0.8173,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 0.33909542978924934,
|
|
"grad_norm": 0.04478708527154376,
|
|
"learning_rate": 0.000250713167241454,
|
|
"loss": 0.8192,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.34004262372720817,
|
|
"grad_norm": 0.04547217242470498,
|
|
"learning_rate": 0.00025034501316407537,
|
|
"loss": 0.8418,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 0.34098981766516695,
|
|
"grad_norm": 0.04674703701128052,
|
|
"learning_rate": 0.0002499757617297095,
|
|
"loss": 0.7595,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.34193701160312573,
|
|
"grad_norm": 0.0456363229285353,
|
|
"learning_rate": 0.00024960541697643094,
|
|
"loss": 0.8125,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 0.34288420554108456,
|
|
"grad_norm": 0.04312589291081109,
|
|
"learning_rate": 0.00024923398295427046,
|
|
"loss": 0.7931,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.34383139947904334,
|
|
"grad_norm": 0.04696710814032231,
|
|
"learning_rate": 0.00024886146372517107,
|
|
"loss": 0.8062,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 0.3447785934170021,
|
|
"grad_norm": 0.043291328088353766,
|
|
"learning_rate": 0.00024848786336294346,
|
|
"loss": 0.7962,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.3457257873549609,
|
|
"grad_norm": 0.04284288738527321,
|
|
"learning_rate": 0.0002481131859532212,
|
|
"loss": 0.8031,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.34667298129291974,
|
|
"grad_norm": 0.04649906122780062,
|
|
"learning_rate": 0.0002477374355934165,
|
|
"loss": 0.7931,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.3476201752308785,
|
|
"grad_norm": 0.05184640676841213,
|
|
"learning_rate": 0.0002473606163926751,
|
|
"loss": 0.833,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 0.3485673691688373,
|
|
"grad_norm": 0.04860441277746714,
|
|
"learning_rate": 0.00024698273247183137,
|
|
"loss": 0.8212,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.34951456310679613,
|
|
"grad_norm": 0.05114423505303399,
|
|
"learning_rate": 0.0002466037879633633,
|
|
"loss": 0.7971,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 0.3504617570447549,
|
|
"grad_norm": 0.047498936911412486,
|
|
"learning_rate": 0.00024622378701134737,
|
|
"loss": 0.8274,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.3514089509827137,
|
|
"grad_norm": 0.04549364277100318,
|
|
"learning_rate": 0.00024584273377141306,
|
|
"loss": 0.7948,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 0.3523561449206725,
|
|
"grad_norm": 0.05177910151205359,
|
|
"learning_rate": 0.0002454606324106977,
|
|
"loss": 0.8036,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.3533033388586313,
|
|
"grad_norm": 0.04603732482580829,
|
|
"learning_rate": 0.00024507748710780034,
|
|
"loss": 0.8062,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 0.3542505327965901,
|
|
"grad_norm": 0.05348425306250115,
|
|
"learning_rate": 0.00024469330205273676,
|
|
"loss": 0.7993,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.3551977267345489,
|
|
"grad_norm": 0.04956827787982081,
|
|
"learning_rate": 0.0002443080814468931,
|
|
"loss": 0.8036,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.3561449206725077,
|
|
"grad_norm": 0.05048490898899849,
|
|
"learning_rate": 0.00024392182950298033,
|
|
"loss": 0.8339,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.35709211461046647,
|
|
"grad_norm": 0.04546839597515788,
|
|
"learning_rate": 0.0002435345504449877,
|
|
"loss": 0.8127,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 0.3580393085484253,
|
|
"grad_norm": 0.04742371580337266,
|
|
"learning_rate": 0.00024314624850813689,
|
|
"loss": 0.8226,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.3589865024863841,
|
|
"grad_norm": 0.04631643296750854,
|
|
"learning_rate": 0.00024275692793883577,
|
|
"loss": 0.8133,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 0.35993369642434286,
|
|
"grad_norm": 0.04058836177118087,
|
|
"learning_rate": 0.00024236659299463171,
|
|
"loss": 0.7976,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.3608808903623017,
|
|
"grad_norm": 0.049761880526735185,
|
|
"learning_rate": 0.00024197524794416508,
|
|
"loss": 0.8144,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 0.3618280843002605,
|
|
"grad_norm": 0.0434144853404768,
|
|
"learning_rate": 0.00024158289706712266,
|
|
"loss": 0.7961,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.36277527823821926,
|
|
"grad_norm": 0.04729858009338802,
|
|
"learning_rate": 0.0002411895446541908,
|
|
"loss": 0.8092,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 0.3637224721761781,
|
|
"grad_norm": 0.04391468404782121,
|
|
"learning_rate": 0.00024079519500700848,
|
|
"loss": 0.7873,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.36466966611413687,
|
|
"grad_norm": 0.046136629260664565,
|
|
"learning_rate": 0.00024039985243812017,
|
|
"loss": 0.8358,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.36561686005209565,
|
|
"grad_norm": 0.04922463227417696,
|
|
"learning_rate": 0.000240003521270929,
|
|
"loss": 0.7982,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.3665640539900545,
|
|
"grad_norm": 0.050099522270096786,
|
|
"learning_rate": 0.00023960620583964905,
|
|
"loss": 0.8119,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 0.36751124792801326,
|
|
"grad_norm": 0.04673234998587366,
|
|
"learning_rate": 0.00023920791048925817,
|
|
"loss": 0.7916,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.36845844186597204,
|
|
"grad_norm": 0.044736938963155615,
|
|
"learning_rate": 0.00023880863957545065,
|
|
"loss": 0.8092,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 0.3694056358039309,
|
|
"grad_norm": 0.04403494677156711,
|
|
"learning_rate": 0.00023840839746458906,
|
|
"loss": 0.8007,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.37035282974188966,
|
|
"grad_norm": 0.04207278974539967,
|
|
"learning_rate": 0.00023800718853365707,
|
|
"loss": 0.8079,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 0.37130002367984843,
|
|
"grad_norm": 0.04441047413325019,
|
|
"learning_rate": 0.00023760501717021127,
|
|
"loss": 0.7981,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.37224721761780727,
|
|
"grad_norm": 0.04392750904458252,
|
|
"learning_rate": 0.00023720188777233328,
|
|
"loss": 0.8189,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 0.37319441155576605,
|
|
"grad_norm": 0.0409355126519413,
|
|
"learning_rate": 0.0002367978047485816,
|
|
"loss": 0.8065,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.3741416054937248,
|
|
"grad_norm": 0.044747845633113445,
|
|
"learning_rate": 0.00023639277251794342,
|
|
"loss": 0.8152,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.37508879943168366,
|
|
"grad_norm": 0.04360255348673726,
|
|
"learning_rate": 0.0002359867955097863,
|
|
"loss": 0.797,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.37603599336964244,
|
|
"grad_norm": 0.04405719390782307,
|
|
"learning_rate": 0.00023557987816380985,
|
|
"loss": 0.8058,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 0.3769831873076012,
|
|
"grad_norm": 0.04839154568146625,
|
|
"learning_rate": 0.00023517202492999686,
|
|
"loss": 0.8114,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.37793038124556005,
|
|
"grad_norm": 0.04779278427510571,
|
|
"learning_rate": 0.00023476324026856503,
|
|
"loss": 0.7969,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 0.37887757518351883,
|
|
"grad_norm": 0.05404705371255034,
|
|
"learning_rate": 0.00023435352864991787,
|
|
"loss": 0.8054,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.3798247691214776,
|
|
"grad_norm": 0.04669907966605441,
|
|
"learning_rate": 0.000233942894554596,
|
|
"loss": 0.8018,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 0.38077196305943645,
|
|
"grad_norm": 0.043137725132093525,
|
|
"learning_rate": 0.0002335313424732282,
|
|
"loss": 0.7924,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.3817191569973952,
|
|
"grad_norm": 0.04354182985885191,
|
|
"learning_rate": 0.00023311887690648196,
|
|
"loss": 0.7958,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 0.382666350935354,
|
|
"grad_norm": 0.042644363380025696,
|
|
"learning_rate": 0.00023270550236501467,
|
|
"loss": 0.8399,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.3836135448733128,
|
|
"grad_norm": 0.04539230083821745,
|
|
"learning_rate": 0.00023229122336942417,
|
|
"loss": 0.8038,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.3845607388112716,
|
|
"grad_norm": 0.04633684365426799,
|
|
"learning_rate": 0.0002318760444501991,
|
|
"loss": 0.7918,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.3855079327492304,
|
|
"grad_norm": 0.04726122562269177,
|
|
"learning_rate": 0.0002314599701476696,
|
|
"loss": 0.8095,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 0.3864551266871892,
|
|
"grad_norm": 0.04700120263284989,
|
|
"learning_rate": 0.00023104300501195765,
|
|
"loss": 0.7986,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.387402320625148,
|
|
"grad_norm": 0.04621030089117987,
|
|
"learning_rate": 0.0002306251536029271,
|
|
"loss": 0.803,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 0.3883495145631068,
|
|
"grad_norm": 0.04183970144784169,
|
|
"learning_rate": 0.00023020642049013403,
|
|
"loss": 0.785,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.38929670850106557,
|
|
"grad_norm": 0.0469484416435775,
|
|
"learning_rate": 0.0002297868102527767,
|
|
"loss": 0.7991,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 0.3902439024390244,
|
|
"grad_norm": 0.04737826498167925,
|
|
"learning_rate": 0.0002293663274796454,
|
|
"loss": 0.8004,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.3911910963769832,
|
|
"grad_norm": 0.04713629528918563,
|
|
"learning_rate": 0.00022894497676907244,
|
|
"loss": 0.7856,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 0.39213829031494196,
|
|
"grad_norm": 0.0539311516444963,
|
|
"learning_rate": 0.0002285227627288816,
|
|
"loss": 0.8007,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.3930854842529008,
|
|
"grad_norm": 0.04352919743448704,
|
|
"learning_rate": 0.00022809968997633803,
|
|
"loss": 0.7976,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.3940326781908596,
|
|
"grad_norm": 0.0438878761761322,
|
|
"learning_rate": 0.00022767576313809757,
|
|
"loss": 0.8084,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.39497987212881835,
|
|
"grad_norm": 0.04356708774126194,
|
|
"learning_rate": 0.0002272509868501561,
|
|
"loss": 0.8018,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 0.3959270660667772,
|
|
"grad_norm": 0.044799245818807745,
|
|
"learning_rate": 0.00022682536575779926,
|
|
"loss": 0.8185,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.39687426000473597,
|
|
"grad_norm": 0.04366762752086422,
|
|
"learning_rate": 0.00022639890451555094,
|
|
"loss": 0.8082,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 0.39782145394269475,
|
|
"grad_norm": 0.04970721314212078,
|
|
"learning_rate": 0.00022597160778712303,
|
|
"loss": 0.8163,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.3987686478806536,
|
|
"grad_norm": 0.04264183467808777,
|
|
"learning_rate": 0.00022554348024536413,
|
|
"loss": 0.7765,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 0.39971584181861236,
|
|
"grad_norm": 0.046000942745726546,
|
|
"learning_rate": 0.00022511452657220836,
|
|
"loss": 0.7767,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.40066303575657114,
|
|
"grad_norm": 0.04416028306809829,
|
|
"learning_rate": 0.0002246847514586244,
|
|
"loss": 0.7756,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 0.40161022969453,
|
|
"grad_norm": 0.04652032350471012,
|
|
"learning_rate": 0.00022425415960456406,
|
|
"loss": 0.785,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.40255742363248875,
|
|
"grad_norm": 0.039319004016494394,
|
|
"learning_rate": 0.00022382275571891088,
|
|
"loss": 0.8171,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.40350461757044753,
|
|
"grad_norm": 0.043191035340779275,
|
|
"learning_rate": 0.00022339054451942853,
|
|
"loss": 0.7888,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.40445181150840637,
|
|
"grad_norm": 0.04850980353591229,
|
|
"learning_rate": 0.00022295753073270957,
|
|
"loss": 0.8024,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 0.40539900544636515,
|
|
"grad_norm": 0.05346932816843321,
|
|
"learning_rate": 0.00022252371909412338,
|
|
"loss": 0.7943,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.4063461993843239,
|
|
"grad_norm": 0.0504819764321655,
|
|
"learning_rate": 0.00022208911434776446,
|
|
"loss": 0.8113,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 0.40729339332228276,
|
|
"grad_norm": 0.04358873484038266,
|
|
"learning_rate": 0.00022165372124640075,
|
|
"loss": 0.7792,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.40824058726024154,
|
|
"grad_norm": 0.048136044036710554,
|
|
"learning_rate": 0.0002212175445514214,
|
|
"loss": 0.8271,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 0.4091877811982003,
|
|
"grad_norm": 0.0482886839866423,
|
|
"learning_rate": 0.00022078058903278493,
|
|
"loss": 0.8082,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.41013497513615915,
|
|
"grad_norm": 0.04932102621776615,
|
|
"learning_rate": 0.00022034285946896683,
|
|
"loss": 0.8157,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 0.41108216907411793,
|
|
"grad_norm": 0.04558271327582903,
|
|
"learning_rate": 0.0002199043606469075,
|
|
"loss": 0.8205,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.4120293630120767,
|
|
"grad_norm": 0.04637987890505172,
|
|
"learning_rate": 0.00021946509736195982,
|
|
"loss": 0.8104,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 0.41297655695003554,
|
|
"grad_norm": 0.04402627600967093,
|
|
"learning_rate": 0.00021902507441783666,
|
|
"loss": 0.7735,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.4139237508879943,
|
|
"grad_norm": 0.04343306123899328,
|
|
"learning_rate": 0.0002185842966265585,
|
|
"loss": 0.8137,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 0.4148709448259531,
|
|
"grad_norm": 0.04049175499820178,
|
|
"learning_rate": 0.00021814276880840057,
|
|
"loss": 0.7666,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.41581813876391194,
|
|
"grad_norm": 0.04190114466718279,
|
|
"learning_rate": 0.0002177004957918404,
|
|
"loss": 0.7941,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 0.4167653327018707,
|
|
"grad_norm": 0.04167140065120627,
|
|
"learning_rate": 0.00021725748241350486,
|
|
"loss": 0.8049,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.4177125266398295,
|
|
"grad_norm": 0.04030091050865229,
|
|
"learning_rate": 0.00021681373351811715,
|
|
"loss": 0.7765,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 0.4186597205777883,
|
|
"grad_norm": 0.043928276470467834,
|
|
"learning_rate": 0.00021636925395844425,
|
|
"loss": 0.8004,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.4196069145157471,
|
|
"grad_norm": 0.041711739434037935,
|
|
"learning_rate": 0.00021592404859524338,
|
|
"loss": 0.8014,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 0.4205541084537059,
|
|
"grad_norm": 0.08515140585912362,
|
|
"learning_rate": 0.00021547812229720905,
|
|
"loss": 0.7925,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.42150130239166467,
|
|
"grad_norm": 0.046571948096373585,
|
|
"learning_rate": 0.0002150314799409198,
|
|
"loss": 0.7995,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 0.4224484963296235,
|
|
"grad_norm": 0.04291996713823751,
|
|
"learning_rate": 0.00021458412641078484,
|
|
"loss": 0.7833,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.4233956902675823,
|
|
"grad_norm": 0.04800959860061716,
|
|
"learning_rate": 0.00021413606659899075,
|
|
"loss": 0.8056,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 0.42434288420554106,
|
|
"grad_norm": 0.04165302702567769,
|
|
"learning_rate": 0.00021368730540544784,
|
|
"loss": 0.8031,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.4252900781434999,
|
|
"grad_norm": 0.042443453575190054,
|
|
"learning_rate": 0.0002132378477377366,
|
|
"loss": 0.8342,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 0.4262372720814587,
|
|
"grad_norm": 0.04364197763796348,
|
|
"learning_rate": 0.00021278769851105413,
|
|
"loss": 0.8069,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.42718446601941745,
|
|
"grad_norm": 0.04424427184165216,
|
|
"learning_rate": 0.00021233686264816024,
|
|
"loss": 0.8093,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 0.4281316599573763,
|
|
"grad_norm": 0.04438652665500913,
|
|
"learning_rate": 0.00021188534507932369,
|
|
"loss": 0.812,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.42907885389533507,
|
|
"grad_norm": 0.04194291606682371,
|
|
"learning_rate": 0.0002114331507422682,
|
|
"loss": 0.7999,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 0.43002604783329385,
|
|
"grad_norm": 0.040114143951823826,
|
|
"learning_rate": 0.0002109802845821187,
|
|
"loss": 0.776,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.4309732417712527,
|
|
"grad_norm": 0.049855477781271945,
|
|
"learning_rate": 0.0002105267515513469,
|
|
"loss": 0.7898,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 0.43192043570921146,
|
|
"grad_norm": 0.04832409909314252,
|
|
"learning_rate": 0.00021007255660971736,
|
|
"loss": 0.7705,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.43286762964717024,
|
|
"grad_norm": 0.04448804574954983,
|
|
"learning_rate": 0.00020961770472423323,
|
|
"loss": 0.7856,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 0.4338148235851291,
|
|
"grad_norm": 0.04056107663630711,
|
|
"learning_rate": 0.00020916220086908185,
|
|
"loss": 0.8386,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.43476201752308785,
|
|
"grad_norm": 0.04562375800507603,
|
|
"learning_rate": 0.00020870605002558038,
|
|
"loss": 0.7919,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 0.43570921146104663,
|
|
"grad_norm": 0.04990170126587913,
|
|
"learning_rate": 0.00020824925718212133,
|
|
"loss": 0.7812,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.43665640539900547,
|
|
"grad_norm": 0.04782898096811606,
|
|
"learning_rate": 0.00020779182733411813,
|
|
"loss": 0.8204,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 0.43760359933696424,
|
|
"grad_norm": 0.05977747562829158,
|
|
"learning_rate": 0.00020733376548395026,
|
|
"loss": 0.7674,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.438550793274923,
|
|
"grad_norm": 0.050186919112014665,
|
|
"learning_rate": 0.00020687507664090873,
|
|
"loss": 0.7842,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 0.43949798721288186,
|
|
"grad_norm": 0.06774152080561698,
|
|
"learning_rate": 0.0002064157658211413,
|
|
"loss": 0.7863,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.44044518115084064,
|
|
"grad_norm": 0.05251562387148029,
|
|
"learning_rate": 0.0002059558380475974,
|
|
"loss": 0.7803,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 0.4413923750887994,
|
|
"grad_norm": 0.11110903402386875,
|
|
"learning_rate": 0.00020549529834997356,
|
|
"loss": 0.8211,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.44233956902675825,
|
|
"grad_norm": 0.05088713402476131,
|
|
"learning_rate": 0.0002050341517646581,
|
|
"loss": 0.8229,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 0.44328676296471703,
|
|
"grad_norm": 0.048748937702802024,
|
|
"learning_rate": 0.00020457240333467618,
|
|
"loss": 0.8308,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.4442339569026758,
|
|
"grad_norm": 0.04414994149656158,
|
|
"learning_rate": 0.00020411005810963467,
|
|
"loss": 0.7783,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 0.44518115084063464,
|
|
"grad_norm": 0.04297554142966596,
|
|
"learning_rate": 0.00020364712114566682,
|
|
"loss": 0.7994,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.4461283447785934,
|
|
"grad_norm": 0.0466918891507033,
|
|
"learning_rate": 0.00020318359750537722,
|
|
"loss": 0.7766,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 0.4470755387165522,
|
|
"grad_norm": 0.0426774503592908,
|
|
"learning_rate": 0.00020271949225778604,
|
|
"loss": 0.7689,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.44802273265451104,
|
|
"grad_norm": 0.04339210841225298,
|
|
"learning_rate": 0.00020225481047827395,
|
|
"loss": 0.7629,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 0.4489699265924698,
|
|
"grad_norm": 0.04157539901744288,
|
|
"learning_rate": 0.0002017895572485264,
|
|
"loss": 0.7993,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.4499171205304286,
|
|
"grad_norm": 0.04747869551715979,
|
|
"learning_rate": 0.00020132373765647824,
|
|
"loss": 0.7831,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 0.45086431446838743,
|
|
"grad_norm": 0.041036503272024266,
|
|
"learning_rate": 0.00020085735679625785,
|
|
"loss": 0.7938,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.4518115084063462,
|
|
"grad_norm": 0.046349793056969124,
|
|
"learning_rate": 0.00020039041976813155,
|
|
"loss": 0.8213,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 0.452758702344305,
|
|
"grad_norm": 0.04266113778547003,
|
|
"learning_rate": 0.000199922931678448,
|
|
"loss": 0.7797,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.4537058962822638,
|
|
"grad_norm": 0.04138658311957195,
|
|
"learning_rate": 0.00019945489763958192,
|
|
"loss": 0.7855,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 0.4546530902202226,
|
|
"grad_norm": 0.03957822721175349,
|
|
"learning_rate": 0.00019898632276987865,
|
|
"loss": 0.7802,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.4556002841581814,
|
|
"grad_norm": 0.043723247514117734,
|
|
"learning_rate": 0.00019851721219359787,
|
|
"loss": 0.7914,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 0.45654747809614016,
|
|
"grad_norm": 0.040483397534663346,
|
|
"learning_rate": 0.0001980475710408577,
|
|
"loss": 0.7784,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.457494672034099,
|
|
"grad_norm": 0.047435378340581154,
|
|
"learning_rate": 0.00019757740444757856,
|
|
"loss": 0.8099,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 0.45844186597205777,
|
|
"grad_norm": 0.04167097525052772,
|
|
"learning_rate": 0.00019710671755542684,
|
|
"loss": 0.8004,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.45938905991001655,
|
|
"grad_norm": 0.045347168717410416,
|
|
"learning_rate": 0.0001966355155117592,
|
|
"loss": 0.7503,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 0.4603362538479754,
|
|
"grad_norm": 0.04793512210403035,
|
|
"learning_rate": 0.00019616380346956555,
|
|
"loss": 0.8034,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.46128344778593416,
|
|
"grad_norm": 0.044750662926773134,
|
|
"learning_rate": 0.00019569158658741325,
|
|
"loss": 0.8036,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 0.46223064172389294,
|
|
"grad_norm": 0.0408279434029639,
|
|
"learning_rate": 0.0001952188700293905,
|
|
"loss": 0.7744,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.4631778356618518,
|
|
"grad_norm": 0.04475763705667161,
|
|
"learning_rate": 0.0001947456589650498,
|
|
"loss": 0.7831,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 0.46412502959981056,
|
|
"grad_norm": 0.048943569028928394,
|
|
"learning_rate": 0.00019427195856935156,
|
|
"loss": 0.7584,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.46507222353776934,
|
|
"grad_norm": 0.0470737958639738,
|
|
"learning_rate": 0.00019379777402260735,
|
|
"loss": 0.8045,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 0.46601941747572817,
|
|
"grad_norm": 0.04333259402866556,
|
|
"learning_rate": 0.0001933231105104235,
|
|
"loss": 0.8252,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.46696661141368695,
|
|
"grad_norm": 0.04500393126058614,
|
|
"learning_rate": 0.00019284797322364412,
|
|
"loss": 0.7963,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 0.46791380535164573,
|
|
"grad_norm": 0.04617455518775394,
|
|
"learning_rate": 0.00019237236735829434,
|
|
"loss": 0.7905,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.46886099928960456,
|
|
"grad_norm": 0.04568226599386409,
|
|
"learning_rate": 0.0001918962981155238,
|
|
"loss": 0.7878,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 0.46980819322756334,
|
|
"grad_norm": 0.04384781746645944,
|
|
"learning_rate": 0.00019141977070154945,
|
|
"loss": 0.8155,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.4707553871655221,
|
|
"grad_norm": 0.04128165433631373,
|
|
"learning_rate": 0.0001909427903275988,
|
|
"loss": 0.8024,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 0.47170258110348096,
|
|
"grad_norm": 0.04842154647206823,
|
|
"learning_rate": 0.00019046536220985267,
|
|
"loss": 0.7762,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.47264977504143973,
|
|
"grad_norm": 0.042673491173335736,
|
|
"learning_rate": 0.00018998749156938854,
|
|
"loss": 0.7709,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 0.4735969689793985,
|
|
"grad_norm": 0.04785439305575584,
|
|
"learning_rate": 0.00018950918363212324,
|
|
"loss": 0.7804,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.47454416291735735,
|
|
"grad_norm": 0.0461863217228199,
|
|
"learning_rate": 0.00018903044362875558,
|
|
"loss": 0.7925,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 0.4754913568553161,
|
|
"grad_norm": 0.044243673212673404,
|
|
"learning_rate": 0.0001885512767947097,
|
|
"loss": 0.7941,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.4764385507932749,
|
|
"grad_norm": 0.040611884775210036,
|
|
"learning_rate": 0.0001880716883700772,
|
|
"loss": 0.7562,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 0.47738574473123374,
|
|
"grad_norm": 0.041073708433040416,
|
|
"learning_rate": 0.00018759168359956034,
|
|
"loss": 0.7856,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.4783329386691925,
|
|
"grad_norm": 0.040555781455264096,
|
|
"learning_rate": 0.00018711126773241434,
|
|
"loss": 0.7808,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 0.4792801326071513,
|
|
"grad_norm": 0.04412592641007611,
|
|
"learning_rate": 0.00018663044602239016,
|
|
"loss": 0.7527,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.48022732654511013,
|
|
"grad_norm": 0.04484068242737891,
|
|
"learning_rate": 0.00018614922372767705,
|
|
"loss": 0.8026,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 0.4811745204830689,
|
|
"grad_norm": 0.04494181539453932,
|
|
"learning_rate": 0.00018566760611084482,
|
|
"loss": 0.7884,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.4821217144210277,
|
|
"grad_norm": 0.04177209726984591,
|
|
"learning_rate": 0.00018518559843878663,
|
|
"loss": 0.7944,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 0.4830689083589865,
|
|
"grad_norm": 0.0418891994027945,
|
|
"learning_rate": 0.00018470320598266114,
|
|
"loss": 0.7876,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.4840161022969453,
|
|
"grad_norm": 0.04509400985562226,
|
|
"learning_rate": 0.00018422043401783499,
|
|
"loss": 0.7906,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 0.4849632962349041,
|
|
"grad_norm": 0.03886301281064671,
|
|
"learning_rate": 0.00018373728782382497,
|
|
"loss": 0.7658,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.4859104901728629,
|
|
"grad_norm": 0.04186589657661449,
|
|
"learning_rate": 0.00018325377268424054,
|
|
"loss": 0.7921,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 0.4868576841108217,
|
|
"grad_norm": 0.04025799467986521,
|
|
"learning_rate": 0.00018276989388672573,
|
|
"loss": 0.8143,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.4878048780487805,
|
|
"grad_norm": 0.04058499655814065,
|
|
"learning_rate": 0.0001822856567229016,
|
|
"loss": 0.7819,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 0.4887520719867393,
|
|
"grad_norm": 0.04628173800240841,
|
|
"learning_rate": 0.0001818010664883082,
|
|
"loss": 0.7944,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.4896992659246981,
|
|
"grad_norm": 0.042223328062815924,
|
|
"learning_rate": 0.0001813161284823466,
|
|
"loss": 0.7975,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 0.49064645986265687,
|
|
"grad_norm": 0.041313006294521955,
|
|
"learning_rate": 0.00018083084800822128,
|
|
"loss": 0.7954,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.49159365380061565,
|
|
"grad_norm": 0.042569155544217974,
|
|
"learning_rate": 0.0001803452303728816,
|
|
"loss": 0.7628,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 0.4925408477385745,
|
|
"grad_norm": 0.043867104326580246,
|
|
"learning_rate": 0.00017985928088696434,
|
|
"loss": 0.7558,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.49348804167653326,
|
|
"grad_norm": 0.04371521207560914,
|
|
"learning_rate": 0.0001793730048647352,
|
|
"loss": 0.7686,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 0.49443523561449204,
|
|
"grad_norm": 0.04369821893875824,
|
|
"learning_rate": 0.00017888640762403078,
|
|
"loss": 0.7961,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.4953824295524509,
|
|
"grad_norm": 0.043782837772372955,
|
|
"learning_rate": 0.00017839949448620064,
|
|
"loss": 0.8211,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 0.49632962349040965,
|
|
"grad_norm": 0.04427063556293102,
|
|
"learning_rate": 0.00017791227077604876,
|
|
"loss": 0.7961,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.49727681742836843,
|
|
"grad_norm": 0.04245267391622513,
|
|
"learning_rate": 0.00017742474182177567,
|
|
"loss": 0.7556,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 0.49822401136632727,
|
|
"grad_norm": 0.04105426628219163,
|
|
"learning_rate": 0.00017693691295491982,
|
|
"loss": 0.7994,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.49917120530428605,
|
|
"grad_norm": 0.03984608708960842,
|
|
"learning_rate": 0.0001764487895102995,
|
|
"loss": 0.7818,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 0.5001183992422449,
|
|
"grad_norm": 0.04428069948873579,
|
|
"learning_rate": 0.00017596037682595465,
|
|
"loss": 0.7862,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.5010655931802036,
|
|
"grad_norm": 0.04490329464859081,
|
|
"learning_rate": 0.00017547168024308806,
|
|
"loss": 0.7975,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 0.5020127871181624,
|
|
"grad_norm": 0.0413111891075842,
|
|
"learning_rate": 0.0001749827051060072,
|
|
"loss": 0.7678,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.5029599810561213,
|
|
"grad_norm": 0.04240726873268668,
|
|
"learning_rate": 0.00017449345676206595,
|
|
"loss": 0.796,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 0.50390717499408,
|
|
"grad_norm": 0.041665058788828147,
|
|
"learning_rate": 0.0001740039405616057,
|
|
"loss": 0.7769,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.5048543689320388,
|
|
"grad_norm": 0.044320910745836974,
|
|
"learning_rate": 0.00017351416185789725,
|
|
"loss": 0.7805,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 0.5058015628699977,
|
|
"grad_norm": 0.04059793612300337,
|
|
"learning_rate": 0.00017302412600708202,
|
|
"loss": 0.7585,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.5067487568079564,
|
|
"grad_norm": 0.04002124902466378,
|
|
"learning_rate": 0.00017253383836811356,
|
|
"loss": 0.7902,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 0.5076959507459152,
|
|
"grad_norm": 0.03946188221690789,
|
|
"learning_rate": 0.00017204330430269896,
|
|
"loss": 0.7883,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.5086431446838741,
|
|
"grad_norm": 0.04168808355706374,
|
|
"learning_rate": 0.00017155252917524014,
|
|
"loss": 0.7623,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 0.5095903386218328,
|
|
"grad_norm": 0.0403800938955808,
|
|
"learning_rate": 0.0001710615183527753,
|
|
"loss": 0.7837,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.5105375325597916,
|
|
"grad_norm": 0.04265285946105665,
|
|
"learning_rate": 0.0001705702772049201,
|
|
"loss": 0.782,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 0.5114847264977505,
|
|
"grad_norm": 0.03909716815369796,
|
|
"learning_rate": 0.00017007881110380903,
|
|
"loss": 0.7992,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.5124319204357092,
|
|
"grad_norm": 0.04390278736912629,
|
|
"learning_rate": 0.00016958712542403665,
|
|
"loss": 0.7925,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 0.513379114373668,
|
|
"grad_norm": 0.040328289968045196,
|
|
"learning_rate": 0.00016909522554259875,
|
|
"loss": 0.7888,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.5143263083116268,
|
|
"grad_norm": 0.042972772541450946,
|
|
"learning_rate": 0.00016860311683883366,
|
|
"loss": 0.7522,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 0.5152735022495856,
|
|
"grad_norm": 0.040623359273199364,
|
|
"learning_rate": 0.0001681108046943633,
|
|
"loss": 0.7673,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.5162206961875444,
|
|
"grad_norm": 0.0451021298457456,
|
|
"learning_rate": 0.00016761829449303442,
|
|
"loss": 0.7803,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 0.5171678901255032,
|
|
"grad_norm": 0.041924502748264085,
|
|
"learning_rate": 0.00016712559162085963,
|
|
"loss": 0.7691,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.518115084063462,
|
|
"grad_norm": 0.04149735002775417,
|
|
"learning_rate": 0.0001666327014659587,
|
|
"loss": 0.7889,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 0.5190622780014208,
|
|
"grad_norm": 0.04415559586614686,
|
|
"learning_rate": 0.00016613962941849924,
|
|
"loss": 0.7808,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.5200094719393796,
|
|
"grad_norm": 0.03925128460208889,
|
|
"learning_rate": 0.00016564638087063834,
|
|
"loss": 0.7773,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 0.5209566658773384,
|
|
"grad_norm": 0.04368831843596003,
|
|
"learning_rate": 0.00016515296121646299,
|
|
"loss": 0.7882,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.5219038598152972,
|
|
"grad_norm": 0.0422781127201221,
|
|
"learning_rate": 0.00016465937585193144,
|
|
"loss": 0.764,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 0.522851053753256,
|
|
"grad_norm": 0.040472037162999186,
|
|
"learning_rate": 0.0001641656301748143,
|
|
"loss": 0.7667,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.5237982476912147,
|
|
"grad_norm": 0.04379401207313861,
|
|
"learning_rate": 0.00016367172958463503,
|
|
"loss": 0.7792,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 0.5247454416291736,
|
|
"grad_norm": 0.04114160448623085,
|
|
"learning_rate": 0.00016317767948261148,
|
|
"loss": 0.7812,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.5256926355671324,
|
|
"grad_norm": 0.03953583092530725,
|
|
"learning_rate": 0.00016268348527159632,
|
|
"loss": 0.751,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 0.5266398295050911,
|
|
"grad_norm": 0.046057484794039295,
|
|
"learning_rate": 0.0001621891523560183,
|
|
"loss": 0.8031,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.52758702344305,
|
|
"grad_norm": 0.044096492253313226,
|
|
"learning_rate": 0.00016169468614182306,
|
|
"loss": 0.768,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 0.5285342173810088,
|
|
"grad_norm": 0.04028812660467143,
|
|
"learning_rate": 0.00016120009203641374,
|
|
"loss": 0.7417,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.5294814113189675,
|
|
"grad_norm": 0.0421254976442694,
|
|
"learning_rate": 0.00016070537544859238,
|
|
"loss": 0.7525,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 0.5304286052569264,
|
|
"grad_norm": 0.041297980776658014,
|
|
"learning_rate": 0.00016021054178850025,
|
|
"loss": 0.7555,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.5313757991948852,
|
|
"grad_norm": 0.04218044701907684,
|
|
"learning_rate": 0.000159715596467559,
|
|
"loss": 0.7636,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 0.5323229931328439,
|
|
"grad_norm": 0.04257545791321493,
|
|
"learning_rate": 0.00015922054489841134,
|
|
"loss": 0.7877,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.5332701870708028,
|
|
"grad_norm": 0.04037611852463485,
|
|
"learning_rate": 0.0001587253924948619,
|
|
"loss": 0.7967,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 0.5342173810087616,
|
|
"grad_norm": 0.04724884702232222,
|
|
"learning_rate": 0.00015823014467181813,
|
|
"loss": 0.778,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.5351645749467203,
|
|
"grad_norm": 0.041552815375501054,
|
|
"learning_rate": 0.00015773480684523082,
|
|
"loss": 0.7644,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 0.5361117688846792,
|
|
"grad_norm": 0.04060976824870222,
|
|
"learning_rate": 0.00015723938443203505,
|
|
"loss": 0.7568,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.537058962822638,
|
|
"grad_norm": 0.039968469240745465,
|
|
"learning_rate": 0.000156743882850091,
|
|
"loss": 0.7641,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 0.5380061567605967,
|
|
"grad_norm": 0.04550185985191349,
|
|
"learning_rate": 0.00015624830751812452,
|
|
"loss": 0.7631,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.5389533506985555,
|
|
"grad_norm": 0.04397422552677812,
|
|
"learning_rate": 0.0001557526638556681,
|
|
"loss": 0.7898,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 0.5399005446365144,
|
|
"grad_norm": 0.04128096672528751,
|
|
"learning_rate": 0.00015525695728300142,
|
|
"loss": 0.8049,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.5408477385744731,
|
|
"grad_norm": 0.043926555415344445,
|
|
"learning_rate": 0.00015476119322109215,
|
|
"loss": 0.7856,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 0.5417949325124319,
|
|
"grad_norm": 0.04191285035326946,
|
|
"learning_rate": 0.00015426537709153665,
|
|
"loss": 0.7811,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.5427421264503908,
|
|
"grad_norm": 0.04398915485090648,
|
|
"learning_rate": 0.00015376951431650063,
|
|
"loss": 0.7642,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 0.5436893203883495,
|
|
"grad_norm": 0.043931690655324165,
|
|
"learning_rate": 0.00015327361031865994,
|
|
"loss": 0.7453,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.5446365143263083,
|
|
"grad_norm": 0.04098372350907064,
|
|
"learning_rate": 0.00015277767052114134,
|
|
"loss": 0.791,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 0.545583708264267,
|
|
"grad_norm": 0.044827794864255185,
|
|
"learning_rate": 0.00015228170034746287,
|
|
"loss": 0.7742,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.5465309022022259,
|
|
"grad_norm": 0.043361929504781155,
|
|
"learning_rate": 0.00015178570522147503,
|
|
"loss": 0.7721,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 0.5474780961401847,
|
|
"grad_norm": 0.04140360142754399,
|
|
"learning_rate": 0.00015128969056730094,
|
|
"loss": 0.7638,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.5484252900781434,
|
|
"grad_norm": 0.044541139967598814,
|
|
"learning_rate": 0.00015079366180927747,
|
|
"loss": 0.7648,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 0.5493724840161023,
|
|
"grad_norm": 0.04373950378809721,
|
|
"learning_rate": 0.00015029762437189555,
|
|
"loss": 0.764,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.5503196779540611,
|
|
"grad_norm": 0.044464277569613665,
|
|
"learning_rate": 0.00014980158367974123,
|
|
"loss": 0.7584,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 0.5512668718920198,
|
|
"grad_norm": 0.041529612627100455,
|
|
"learning_rate": 0.000149305545157436,
|
|
"loss": 0.784,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.5522140658299787,
|
|
"grad_norm": 0.04387046595628468,
|
|
"learning_rate": 0.00014880951422957764,
|
|
"loss": 0.7829,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 0.5531612597679375,
|
|
"grad_norm": 0.03928205510955695,
|
|
"learning_rate": 0.00014831349632068097,
|
|
"loss": 0.7838,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.5541084537058962,
|
|
"grad_norm": 0.03792796655680272,
|
|
"learning_rate": 0.0001478174968551183,
|
|
"loss": 0.7585,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 0.5550556476438551,
|
|
"grad_norm": 0.0418350076429959,
|
|
"learning_rate": 0.00014732152125706042,
|
|
"loss": 0.7892,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.5560028415818139,
|
|
"grad_norm": 0.0459671460831212,
|
|
"learning_rate": 0.00014682557495041684,
|
|
"loss": 0.733,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 0.5569500355197726,
|
|
"grad_norm": 0.04058456377794918,
|
|
"learning_rate": 0.00014632966335877706,
|
|
"loss": 0.7686,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.5578972294577315,
|
|
"grad_norm": 0.040486030304535854,
|
|
"learning_rate": 0.00014583379190535075,
|
|
"loss": 0.7396,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 0.5588444233956903,
|
|
"grad_norm": 0.04178283807566538,
|
|
"learning_rate": 0.00014533796601290868,
|
|
"loss": 0.7982,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.559791617333649,
|
|
"grad_norm": 0.040277272924603875,
|
|
"learning_rate": 0.0001448421911037234,
|
|
"loss": 0.7607,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 0.5607388112716079,
|
|
"grad_norm": 0.041258343955637974,
|
|
"learning_rate": 0.0001443464725995098,
|
|
"loss": 0.7443,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.5616860052095667,
|
|
"grad_norm": 0.04032233073174216,
|
|
"learning_rate": 0.00014385081592136614,
|
|
"loss": 0.7993,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 0.5626331991475254,
|
|
"grad_norm": 0.03958174726228383,
|
|
"learning_rate": 0.0001433552264897143,
|
|
"loss": 0.7897,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.5635803930854842,
|
|
"grad_norm": 0.03858023826870275,
|
|
"learning_rate": 0.000142859709724241,
|
|
"loss": 0.7531,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 0.5645275870234431,
|
|
"grad_norm": 0.03863381061427778,
|
|
"learning_rate": 0.00014236427104383827,
|
|
"loss": 0.7683,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.5654747809614018,
|
|
"grad_norm": 0.04304392912853754,
|
|
"learning_rate": 0.00014186891586654395,
|
|
"loss": 0.7611,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 0.5664219748993606,
|
|
"grad_norm": 0.040517793123203284,
|
|
"learning_rate": 0.00014137364960948307,
|
|
"loss": 0.7597,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.5673691688373195,
|
|
"grad_norm": 0.04010964103438508,
|
|
"learning_rate": 0.0001408784776888079,
|
|
"loss": 0.7886,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 0.5683163627752782,
|
|
"grad_norm": 0.16933225878617628,
|
|
"learning_rate": 0.00014038340551963946,
|
|
"loss": 0.7754,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.569263556713237,
|
|
"grad_norm": 0.049628098964115824,
|
|
"learning_rate": 0.0001398884385160074,
|
|
"loss": 0.7557,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 0.5702107506511959,
|
|
"grad_norm": 0.04117704234522045,
|
|
"learning_rate": 0.00013939358209079177,
|
|
"loss": 0.7662,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.5711579445891546,
|
|
"grad_norm": 0.04404870833994479,
|
|
"learning_rate": 0.00013889884165566317,
|
|
"loss": 0.7802,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 0.5721051385271134,
|
|
"grad_norm": 0.0400783837624163,
|
|
"learning_rate": 0.00013840422262102357,
|
|
"loss": 0.7772,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.5730523324650723,
|
|
"grad_norm": 0.04445272029297132,
|
|
"learning_rate": 0.00013790973039594766,
|
|
"loss": 0.7403,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 0.573999526403031,
|
|
"grad_norm": 0.04189631829289255,
|
|
"learning_rate": 0.000137415370388123,
|
|
"loss": 0.7811,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.5749467203409898,
|
|
"grad_norm": 0.04116724359754346,
|
|
"learning_rate": 0.00013692114800379165,
|
|
"loss": 0.7696,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 0.5758939142789486,
|
|
"grad_norm": 0.03811498143887281,
|
|
"learning_rate": 0.00013642706864769023,
|
|
"loss": 0.7974,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.5768411082169074,
|
|
"grad_norm": 0.03813260068428526,
|
|
"learning_rate": 0.00013593313772299151,
|
|
"loss": 0.7491,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 0.5777883021548662,
|
|
"grad_norm": 0.04258997197360236,
|
|
"learning_rate": 0.00013543936063124503,
|
|
"loss": 0.7611,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.578735496092825,
|
|
"grad_norm": 0.044652230022576414,
|
|
"learning_rate": 0.00013494574277231772,
|
|
"loss": 0.7639,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 0.5796826900307838,
|
|
"grad_norm": 0.040644166117172274,
|
|
"learning_rate": 0.00013445228954433568,
|
|
"loss": 0.7871,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.5806298839687426,
|
|
"grad_norm": 0.03834957657226225,
|
|
"learning_rate": 0.00013395900634362418,
|
|
"loss": 0.7516,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 0.5815770779067014,
|
|
"grad_norm": 0.0425924434925859,
|
|
"learning_rate": 0.0001334658985646493,
|
|
"loss": 0.767,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.5825242718446602,
|
|
"grad_norm": 0.04603157938094156,
|
|
"learning_rate": 0.00013297297159995872,
|
|
"loss": 0.7642,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 0.583471465782619,
|
|
"grad_norm": 0.04251195805977364,
|
|
"learning_rate": 0.00013248023084012268,
|
|
"loss": 0.7695,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.5844186597205778,
|
|
"grad_norm": 0.04025338997238214,
|
|
"learning_rate": 0.0001319876816736754,
|
|
"loss": 0.7428,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 0.5853658536585366,
|
|
"grad_norm": 0.04211750103789674,
|
|
"learning_rate": 0.00013149532948705542,
|
|
"loss": 0.7621,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.5863130475964954,
|
|
"grad_norm": 0.04455741942115236,
|
|
"learning_rate": 0.0001310031796645475,
|
|
"loss": 0.8198,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 0.5872602415344542,
|
|
"grad_norm": 0.042122208500608355,
|
|
"learning_rate": 0.00013051123758822317,
|
|
"loss": 0.7902,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.5882074354724129,
|
|
"grad_norm": 0.04026734370639814,
|
|
"learning_rate": 0.0001300195086378822,
|
|
"loss": 0.743,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 0.5891546294103718,
|
|
"grad_norm": 0.04263858640190378,
|
|
"learning_rate": 0.00012952799819099362,
|
|
"loss": 0.7611,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.5901018233483306,
|
|
"grad_norm": 0.042020491856201506,
|
|
"learning_rate": 0.00012903671162263692,
|
|
"loss": 0.7638,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 0.5910490172862893,
|
|
"grad_norm": 0.0411120885986108,
|
|
"learning_rate": 0.0001285456543054433,
|
|
"loss": 0.7779,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.5919962112242482,
|
|
"grad_norm": 0.0406466480831228,
|
|
"learning_rate": 0.0001280548316095369,
|
|
"loss": 0.7648,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 0.592943405162207,
|
|
"grad_norm": 0.039876223182510155,
|
|
"learning_rate": 0.00012756424890247612,
|
|
"loss": 0.7465,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.5938905991001657,
|
|
"grad_norm": 0.03968060112874898,
|
|
"learning_rate": 0.00012707391154919478,
|
|
"loss": 0.7788,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 0.5948377930381246,
|
|
"grad_norm": 0.04109563591743239,
|
|
"learning_rate": 0.00012658382491194368,
|
|
"loss": 0.7629,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.5957849869760834,
|
|
"grad_norm": 0.04068329826647931,
|
|
"learning_rate": 0.0001260939943502317,
|
|
"loss": 0.7652,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 0.5967321809140421,
|
|
"grad_norm": 0.039457301232553726,
|
|
"learning_rate": 0.00012560442522076745,
|
|
"loss": 0.771,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.597679374852001,
|
|
"grad_norm": 0.039313871399632806,
|
|
"learning_rate": 0.0001251151228774005,
|
|
"loss": 0.7665,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 0.5986265687899598,
|
|
"grad_norm": 0.04012444827177292,
|
|
"learning_rate": 0.0001246260926710628,
|
|
"loss": 0.7672,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.5995737627279185,
|
|
"grad_norm": 0.04199451923312326,
|
|
"learning_rate": 0.00012413733994971044,
|
|
"loss": 0.7767,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 0.6005209566658773,
|
|
"grad_norm": 0.043536826355634925,
|
|
"learning_rate": 0.0001236488700582648,
|
|
"loss": 0.7447,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.6014681506038362,
|
|
"grad_norm": 0.04225224126474484,
|
|
"learning_rate": 0.00012316068833855438,
|
|
"loss": 0.7705,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 0.6024153445417949,
|
|
"grad_norm": 0.04122031488132078,
|
|
"learning_rate": 0.00012267280012925622,
|
|
"loss": 0.7553,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.6033625384797537,
|
|
"grad_norm": 0.04052877747206926,
|
|
"learning_rate": 0.00012218521076583767,
|
|
"loss": 0.7395,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 0.6043097324177126,
|
|
"grad_norm": 0.03976724566633056,
|
|
"learning_rate": 0.00012169792558049789,
|
|
"loss": 0.7902,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.6052569263556713,
|
|
"grad_norm": 0.03994928147888954,
|
|
"learning_rate": 0.00012121094990210951,
|
|
"loss": 0.7492,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 0.6062041202936301,
|
|
"grad_norm": 0.0410556694914752,
|
|
"learning_rate": 0.00012072428905616064,
|
|
"loss": 0.7513,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.607151314231589,
|
|
"grad_norm": 0.04147107567296144,
|
|
"learning_rate": 0.00012023794836469624,
|
|
"loss": 0.7321,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 0.6080985081695477,
|
|
"grad_norm": 0.042191697935109296,
|
|
"learning_rate": 0.00011975193314626025,
|
|
"loss": 0.7553,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.6090457021075065,
|
|
"grad_norm": 0.04045134486807547,
|
|
"learning_rate": 0.00011926624871583717,
|
|
"loss": 0.7352,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 0.6099928960454654,
|
|
"grad_norm": 0.04292542392593404,
|
|
"learning_rate": 0.00011878090038479416,
|
|
"loss": 0.771,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.6109400899834241,
|
|
"grad_norm": 0.03947257757285626,
|
|
"learning_rate": 0.00011829589346082281,
|
|
"loss": 0.7555,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 0.6118872839213829,
|
|
"grad_norm": 0.03798498100177421,
|
|
"learning_rate": 0.00011781123324788111,
|
|
"loss": 0.7717,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.6128344778593418,
|
|
"grad_norm": 0.040633643895124465,
|
|
"learning_rate": 0.00011732692504613554,
|
|
"loss": 0.7412,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 0.6137816717973005,
|
|
"grad_norm": 0.038994248859382026,
|
|
"learning_rate": 0.00011684297415190295,
|
|
"loss": 0.7626,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.6147288657352593,
|
|
"grad_norm": 0.037087473994001245,
|
|
"learning_rate": 0.00011635938585759284,
|
|
"loss": 0.7485,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 0.6156760596732181,
|
|
"grad_norm": 0.04283707658057122,
|
|
"learning_rate": 0.00011587616545164923,
|
|
"loss": 0.76,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.6166232536111769,
|
|
"grad_norm": 0.03994982225913288,
|
|
"learning_rate": 0.00011539331821849317,
|
|
"loss": 0.7867,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 0.6175704475491357,
|
|
"grad_norm": 0.045291416938963186,
|
|
"learning_rate": 0.00011491084943846459,
|
|
"loss": 0.7909,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.6185176414870944,
|
|
"grad_norm": 0.04461380336072227,
|
|
"learning_rate": 0.00011442876438776475,
|
|
"loss": 0.7501,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 0.6194648354250533,
|
|
"grad_norm": 0.04724285794795771,
|
|
"learning_rate": 0.00011394706833839858,
|
|
"loss": 0.7663,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.6204120293630121,
|
|
"grad_norm": 0.04043233809259392,
|
|
"learning_rate": 0.00011346576655811683,
|
|
"loss": 0.7573,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 0.6213592233009708,
|
|
"grad_norm": 0.04083620126750758,
|
|
"learning_rate": 0.00011298486431035874,
|
|
"loss": 0.796,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.6223064172389297,
|
|
"grad_norm": 0.04080275832439418,
|
|
"learning_rate": 0.00011250436685419418,
|
|
"loss": 0.7631,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 0.6232536111768885,
|
|
"grad_norm": 0.03886958426209183,
|
|
"learning_rate": 0.00011202427944426636,
|
|
"loss": 0.75,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.6242008051148472,
|
|
"grad_norm": 0.03941484340988021,
|
|
"learning_rate": 0.00011154460733073433,
|
|
"loss": 0.7562,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 0.625147999052806,
|
|
"grad_norm": 0.04351471227388938,
|
|
"learning_rate": 0.00011106535575921536,
|
|
"loss": 0.7714,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.6260951929907649,
|
|
"grad_norm": 0.04704801990878809,
|
|
"learning_rate": 0.00011058652997072802,
|
|
"loss": 0.7793,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 0.6270423869287236,
|
|
"grad_norm": 0.04584785902650524,
|
|
"learning_rate": 0.00011010813520163427,
|
|
"loss": 0.7626,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.6279895808666824,
|
|
"grad_norm": 0.04629280784526772,
|
|
"learning_rate": 0.00010963017668358273,
|
|
"loss": 0.7418,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 0.6289367748046413,
|
|
"grad_norm": 0.04407747166586352,
|
|
"learning_rate": 0.00010915265964345114,
|
|
"loss": 0.7459,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.6298839687426,
|
|
"grad_norm": 0.039913077554486434,
|
|
"learning_rate": 0.00010867558930328934,
|
|
"loss": 0.7504,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 0.6308311626805588,
|
|
"grad_norm": 0.04379451323559438,
|
|
"learning_rate": 0.00010819897088026224,
|
|
"loss": 0.7633,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.6317783566185177,
|
|
"grad_norm": 0.04536301609111961,
|
|
"learning_rate": 0.00010772280958659241,
|
|
"loss": 0.7657,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 0.6327255505564764,
|
|
"grad_norm": 0.041972499038774445,
|
|
"learning_rate": 0.00010724711062950358,
|
|
"loss": 0.774,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.6336727444944352,
|
|
"grad_norm": 0.04243182090390366,
|
|
"learning_rate": 0.00010677187921116325,
|
|
"loss": 0.7593,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 0.6346199384323941,
|
|
"grad_norm": 0.041997095162117505,
|
|
"learning_rate": 0.00010629712052862619,
|
|
"loss": 0.7525,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.6355671323703528,
|
|
"grad_norm": 0.043340122902892075,
|
|
"learning_rate": 0.00010582283977377709,
|
|
"loss": 0.7554,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 0.6365143263083116,
|
|
"grad_norm": 0.041977926659558386,
|
|
"learning_rate": 0.00010534904213327447,
|
|
"loss": 0.7503,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.6374615202462705,
|
|
"grad_norm": 0.04155046135436731,
|
|
"learning_rate": 0.00010487573278849338,
|
|
"loss": 0.7555,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 0.6384087141842292,
|
|
"grad_norm": 0.04049957089110068,
|
|
"learning_rate": 0.00010440291691546895,
|
|
"loss": 0.7701,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.639355908122188,
|
|
"grad_norm": 0.042538488276278326,
|
|
"learning_rate": 0.00010393059968483989,
|
|
"loss": 0.765,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 0.6403031020601468,
|
|
"grad_norm": 0.03712808828646828,
|
|
"learning_rate": 0.00010345878626179162,
|
|
"loss": 0.7492,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.6412502959981056,
|
|
"grad_norm": 0.04192587249684641,
|
|
"learning_rate": 0.00010298748180600031,
|
|
"loss": 0.7644,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 0.6421974899360644,
|
|
"grad_norm": 0.037429843394214256,
|
|
"learning_rate": 0.00010251669147157582,
|
|
"loss": 0.7484,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.6431446838740232,
|
|
"grad_norm": 0.0454827756794171,
|
|
"learning_rate": 0.00010204642040700593,
|
|
"loss": 0.7432,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 0.644091877811982,
|
|
"grad_norm": 0.03998857520778792,
|
|
"learning_rate": 0.00010157667375509966,
|
|
"loss": 0.7767,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.6450390717499408,
|
|
"grad_norm": 0.03874189126294134,
|
|
"learning_rate": 0.00010110745665293102,
|
|
"loss": 0.7613,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 0.6459862656878996,
|
|
"grad_norm": 0.04315072772814885,
|
|
"learning_rate": 0.00010063877423178327,
|
|
"loss": 0.7615,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.6469334596258584,
|
|
"grad_norm": 0.039915598481917704,
|
|
"learning_rate": 0.00010017063161709203,
|
|
"loss": 0.7368,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 0.6478806535638172,
|
|
"grad_norm": 0.043320616700844417,
|
|
"learning_rate": 9.970303392839016e-05,
|
|
"loss": 0.7643,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.648827847501776,
|
|
"grad_norm": 0.038608629273838305,
|
|
"learning_rate": 9.923598627925085e-05,
|
|
"loss": 0.7647,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 0.6497750414397347,
|
|
"grad_norm": 0.0403187718238657,
|
|
"learning_rate": 9.876949377723254e-05,
|
|
"loss": 0.7583,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.6507222353776936,
|
|
"grad_norm": 0.04071452984696662,
|
|
"learning_rate": 9.830356152382245e-05,
|
|
"loss": 0.7543,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 0.6516694293156524,
|
|
"grad_norm": 0.04123560216855826,
|
|
"learning_rate": 9.783819461438097e-05,
|
|
"loss": 0.7503,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.6526166232536111,
|
|
"grad_norm": 0.04169247579017777,
|
|
"learning_rate": 9.737339813808621e-05,
|
|
"loss": 0.7633,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 0.65356381719157,
|
|
"grad_norm": 0.03950148386345426,
|
|
"learning_rate": 9.69091771778778e-05,
|
|
"loss": 0.7797,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.6545110111295288,
|
|
"grad_norm": 0.041505476095881656,
|
|
"learning_rate": 9.644553681040196e-05,
|
|
"loss": 0.7464,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 0.6554582050674875,
|
|
"grad_norm": 0.04278317549795013,
|
|
"learning_rate": 9.598248210595531e-05,
|
|
"loss": 0.7758,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.6564053990054464,
|
|
"grad_norm": 0.04182779902664369,
|
|
"learning_rate": 9.552001812842996e-05,
|
|
"loss": 0.7786,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 0.6573525929434052,
|
|
"grad_norm": 0.03765966562832586,
|
|
"learning_rate": 9.505814993525797e-05,
|
|
"loss": 0.748,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.6582997868813639,
|
|
"grad_norm": 0.03856379305262854,
|
|
"learning_rate": 9.459688257735575e-05,
|
|
"loss": 0.7265,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 0.6592469808193228,
|
|
"grad_norm": 0.042262933505673055,
|
|
"learning_rate": 9.413622109906937e-05,
|
|
"loss": 0.7608,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.6601941747572816,
|
|
"grad_norm": 0.03934049459172944,
|
|
"learning_rate": 9.367617053811885e-05,
|
|
"loss": 0.7355,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 0.6611413686952403,
|
|
"grad_norm": 0.040505520508700994,
|
|
"learning_rate": 9.321673592554346e-05,
|
|
"loss": 0.7285,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.6620885626331992,
|
|
"grad_norm": 0.0443739357214151,
|
|
"learning_rate": 9.275792228564647e-05,
|
|
"loss": 0.7465,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 0.663035756571158,
|
|
"grad_norm": 0.042948832615535365,
|
|
"learning_rate": 9.229973463594036e-05,
|
|
"loss": 0.7415,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.6639829505091167,
|
|
"grad_norm": 0.037284235954941944,
|
|
"learning_rate": 9.184217798709195e-05,
|
|
"loss": 0.7624,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 0.6649301444470755,
|
|
"grad_norm": 0.041373070240537976,
|
|
"learning_rate": 9.13852573428673e-05,
|
|
"loss": 0.76,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.6658773383850344,
|
|
"grad_norm": 0.04078003790880156,
|
|
"learning_rate": 9.092897770007748e-05,
|
|
"loss": 0.7696,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 0.6668245323229931,
|
|
"grad_norm": 0.04227188813941138,
|
|
"learning_rate": 9.047334404852349e-05,
|
|
"loss": 0.7385,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.6677717262609519,
|
|
"grad_norm": 0.04055813753473042,
|
|
"learning_rate": 9.001836137094199e-05,
|
|
"loss": 0.7411,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 0.6687189201989108,
|
|
"grad_norm": 0.03979213857043945,
|
|
"learning_rate": 8.95640346429506e-05,
|
|
"loss": 0.7419,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.6696661141368695,
|
|
"grad_norm": 0.04277127849333733,
|
|
"learning_rate": 8.911036883299367e-05,
|
|
"loss": 0.7459,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 0.6706133080748283,
|
|
"grad_norm": 0.0414740211347812,
|
|
"learning_rate": 8.865736890228782e-05,
|
|
"loss": 0.7663,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.6715605020127872,
|
|
"grad_norm": 0.03968770903759535,
|
|
"learning_rate": 8.820503980476766e-05,
|
|
"loss": 0.7397,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 0.6725076959507459,
|
|
"grad_norm": 0.04028225688842527,
|
|
"learning_rate": 8.775338648703182e-05,
|
|
"loss": 0.7359,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.6734548898887047,
|
|
"grad_norm": 0.03957291045780008,
|
|
"learning_rate": 8.730241388828852e-05,
|
|
"loss": 0.7458,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 0.6744020838266636,
|
|
"grad_norm": 0.04075648253621573,
|
|
"learning_rate": 8.685212694030197e-05,
|
|
"loss": 0.7334,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.6753492777646223,
|
|
"grad_norm": 0.03417551964299854,
|
|
"learning_rate": 8.640253056733788e-05,
|
|
"loss": 0.7105,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 0.6762964717025811,
|
|
"grad_norm": 0.041401776497170924,
|
|
"learning_rate": 8.595362968611036e-05,
|
|
"loss": 0.714,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.67724366564054,
|
|
"grad_norm": 0.04079313998460472,
|
|
"learning_rate": 8.550542920572751e-05,
|
|
"loss": 0.7426,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 0.6781908595784987,
|
|
"grad_norm": 0.03987821946868629,
|
|
"learning_rate": 8.505793402763786e-05,
|
|
"loss": 0.763,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.6791380535164575,
|
|
"grad_norm": 0.04091134166237045,
|
|
"learning_rate": 8.461114904557712e-05,
|
|
"loss": 0.751,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 0.6800852474544163,
|
|
"grad_norm": 0.04177068343214537,
|
|
"learning_rate": 8.416507914551405e-05,
|
|
"loss": 0.78,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.6810324413923751,
|
|
"grad_norm": 0.03961181610952186,
|
|
"learning_rate": 8.371972920559791e-05,
|
|
"loss": 0.7335,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 0.6819796353303339,
|
|
"grad_norm": 0.039417654158232895,
|
|
"learning_rate": 8.327510409610408e-05,
|
|
"loss": 0.7642,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.6829268292682927,
|
|
"grad_norm": 0.04073914119422375,
|
|
"learning_rate": 8.283120867938156e-05,
|
|
"loss": 0.7468,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 0.6838740232062515,
|
|
"grad_norm": 0.03905373593771108,
|
|
"learning_rate": 8.23880478097996e-05,
|
|
"loss": 0.7377,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.6848212171442103,
|
|
"grad_norm": 0.04307956957119377,
|
|
"learning_rate": 8.194562633369428e-05,
|
|
"loss": 0.7536,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 0.6857684110821691,
|
|
"grad_norm": 0.04210126693770651,
|
|
"learning_rate": 8.150394908931622e-05,
|
|
"loss": 0.7554,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.6867156050201279,
|
|
"grad_norm": 0.0421490482386118,
|
|
"learning_rate": 8.106302090677682e-05,
|
|
"loss": 0.7936,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 0.6876627989580867,
|
|
"grad_norm": 0.03588928471279015,
|
|
"learning_rate": 8.062284660799617e-05,
|
|
"loss": 0.7287,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.6886099928960455,
|
|
"grad_norm": 0.03952924524135366,
|
|
"learning_rate": 8.018343100664975e-05,
|
|
"loss": 0.7527,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 0.6895571868340042,
|
|
"grad_norm": 0.03984238587807663,
|
|
"learning_rate": 7.974477890811622e-05,
|
|
"loss": 0.7528,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.6905043807719631,
|
|
"grad_norm": 0.04306536866980951,
|
|
"learning_rate": 7.930689510942467e-05,
|
|
"loss": 0.7263,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 0.6914515747099218,
|
|
"grad_norm": 0.03823091396622685,
|
|
"learning_rate": 7.886978439920219e-05,
|
|
"loss": 0.7262,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.6923987686478806,
|
|
"grad_norm": 0.040037669190792775,
|
|
"learning_rate": 7.84334515576215e-05,
|
|
"loss": 0.761,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 0.6933459625858395,
|
|
"grad_norm": 0.04316597192554324,
|
|
"learning_rate": 7.799790135634848e-05,
|
|
"loss": 0.7654,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.6942931565237982,
|
|
"grad_norm": 0.040090557871707184,
|
|
"learning_rate": 7.756313855849061e-05,
|
|
"loss": 0.7576,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 0.695240350461757,
|
|
"grad_norm": 0.03885025639567031,
|
|
"learning_rate": 7.712916791854398e-05,
|
|
"loss": 0.7337,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.6961875443997159,
|
|
"grad_norm": 0.039817406008376306,
|
|
"learning_rate": 7.669599418234209e-05,
|
|
"loss": 0.7827,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 0.6971347383376746,
|
|
"grad_norm": 0.03796969889976453,
|
|
"learning_rate": 7.626362208700345e-05,
|
|
"loss": 0.7401,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.6980819322756334,
|
|
"grad_norm": 0.04250157953569218,
|
|
"learning_rate": 7.583205636087998e-05,
|
|
"loss": 0.7849,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 0.6990291262135923,
|
|
"grad_norm": 0.039617912653361516,
|
|
"learning_rate": 7.540130172350553e-05,
|
|
"loss": 0.7299,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.699976320151551,
|
|
"grad_norm": 0.03816192045516508,
|
|
"learning_rate": 7.497136288554358e-05,
|
|
"loss": 0.7514,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 0.7009235140895098,
|
|
"grad_norm": 0.03983670858460556,
|
|
"learning_rate": 7.454224454873653e-05,
|
|
"loss": 0.726,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.7018707080274686,
|
|
"grad_norm": 0.03934247221089433,
|
|
"learning_rate": 7.411395140585366e-05,
|
|
"loss": 0.755,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 0.7028179019654274,
|
|
"grad_norm": 0.041762302180065505,
|
|
"learning_rate": 7.368648814064017e-05,
|
|
"loss": 0.7731,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.7037650959033862,
|
|
"grad_norm": 0.03718549677733008,
|
|
"learning_rate": 7.325985942776586e-05,
|
|
"loss": 0.7245,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 0.704712289841345,
|
|
"grad_norm": 0.03979809659047382,
|
|
"learning_rate": 7.283406993277401e-05,
|
|
"loss": 0.7493,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.7056594837793038,
|
|
"grad_norm": 0.03891391378511992,
|
|
"learning_rate": 7.240912431203036e-05,
|
|
"loss": 0.7372,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 0.7066066777172626,
|
|
"grad_norm": 0.03760484285201233,
|
|
"learning_rate": 7.198502721267201e-05,
|
|
"loss": 0.7319,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.7075538716552214,
|
|
"grad_norm": 0.039788628547401186,
|
|
"learning_rate": 7.156178327255696e-05,
|
|
"loss": 0.7107,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 0.7085010655931802,
|
|
"grad_norm": 0.03854056853156896,
|
|
"learning_rate": 7.113939712021312e-05,
|
|
"loss": 0.7195,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.709448259531139,
|
|
"grad_norm": 0.04238606817116954,
|
|
"learning_rate": 7.071787337478785e-05,
|
|
"loss": 0.7448,
|
|
"step": 3745
|
|
},
|
|
{
|
|
"epoch": 0.7103954534690978,
|
|
"grad_norm": 0.03913911448999711,
|
|
"learning_rate": 7.029721664599718e-05,
|
|
"loss": 0.7553,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.7113426474070565,
|
|
"grad_norm": 0.04214126618606677,
|
|
"learning_rate": 6.987743153407576e-05,
|
|
"loss": 0.7263,
|
|
"step": 3755
|
|
},
|
|
{
|
|
"epoch": 0.7122898413450154,
|
|
"grad_norm": 0.044083161857802124,
|
|
"learning_rate": 6.94585226297263e-05,
|
|
"loss": 0.7366,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.7132370352829742,
|
|
"grad_norm": 0.04073891396330348,
|
|
"learning_rate": 6.90404945140695e-05,
|
|
"loss": 0.7389,
|
|
"step": 3765
|
|
},
|
|
{
|
|
"epoch": 0.7141842292209329,
|
|
"grad_norm": 0.03825315556026045,
|
|
"learning_rate": 6.862335175859387e-05,
|
|
"loss": 0.7347,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.7151314231588918,
|
|
"grad_norm": 0.03504164436950897,
|
|
"learning_rate": 6.820709892510566e-05,
|
|
"loss": 0.7563,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 0.7160786170968506,
|
|
"grad_norm": 0.040944422736284514,
|
|
"learning_rate": 6.779174056567923e-05,
|
|
"loss": 0.7324,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.7170258110348093,
|
|
"grad_norm": 0.042252601374869914,
|
|
"learning_rate": 6.737728122260705e-05,
|
|
"loss": 0.7428,
|
|
"step": 3785
|
|
},
|
|
{
|
|
"epoch": 0.7179730049727682,
|
|
"grad_norm": 0.04198448843744255,
|
|
"learning_rate": 6.696372542835007e-05,
|
|
"loss": 0.7563,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.718920198910727,
|
|
"grad_norm": 0.03891146859575185,
|
|
"learning_rate": 6.655107770548829e-05,
|
|
"loss": 0.7653,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 0.7198673928486857,
|
|
"grad_norm": 0.047863846470899335,
|
|
"learning_rate": 6.613934256667098e-05,
|
|
"loss": 0.7443,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.7208145867866446,
|
|
"grad_norm": 0.04392069667011626,
|
|
"learning_rate": 6.572852451456766e-05,
|
|
"loss": 0.7506,
|
|
"step": 3805
|
|
},
|
|
{
|
|
"epoch": 0.7217617807246034,
|
|
"grad_norm": 0.04054436230218552,
|
|
"learning_rate": 6.53186280418188e-05,
|
|
"loss": 0.7472,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.7227089746625621,
|
|
"grad_norm": 0.03885365408522593,
|
|
"learning_rate": 6.490965763098654e-05,
|
|
"loss": 0.719,
|
|
"step": 3815
|
|
},
|
|
{
|
|
"epoch": 0.723656168600521,
|
|
"grad_norm": 0.04194579275932475,
|
|
"learning_rate": 6.450161775450572e-05,
|
|
"loss": 0.7125,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.7246033625384798,
|
|
"grad_norm": 0.04052378750052248,
|
|
"learning_rate": 6.409451287463508e-05,
|
|
"loss": 0.7766,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 0.7255505564764385,
|
|
"grad_norm": 0.039568496333215346,
|
|
"learning_rate": 6.368834744340837e-05,
|
|
"loss": 0.7278,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.7264977504143973,
|
|
"grad_norm": 0.036717341021427846,
|
|
"learning_rate": 6.328312590258568e-05,
|
|
"loss": 0.7389,
|
|
"step": 3835
|
|
},
|
|
{
|
|
"epoch": 0.7274449443523562,
|
|
"grad_norm": 0.04020458964083549,
|
|
"learning_rate": 6.28788526836049e-05,
|
|
"loss": 0.7484,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.7283921382903149,
|
|
"grad_norm": 0.04041885111843109,
|
|
"learning_rate": 6.247553220753305e-05,
|
|
"loss": 0.7286,
|
|
"step": 3845
|
|
},
|
|
{
|
|
"epoch": 0.7293393322282737,
|
|
"grad_norm": 0.03932262216800061,
|
|
"learning_rate": 6.207316888501833e-05,
|
|
"loss": 0.7211,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.7302865261662326,
|
|
"grad_norm": 0.03945586913466572,
|
|
"learning_rate": 6.167176711624157e-05,
|
|
"loss": 0.7343,
|
|
"step": 3855
|
|
},
|
|
{
|
|
"epoch": 0.7312337201041913,
|
|
"grad_norm": 0.03793066654633331,
|
|
"learning_rate": 6.127133129086818e-05,
|
|
"loss": 0.7283,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.7321809140421501,
|
|
"grad_norm": 0.03754478471345365,
|
|
"learning_rate": 6.087186578800027e-05,
|
|
"loss": 0.7537,
|
|
"step": 3865
|
|
},
|
|
{
|
|
"epoch": 0.733128107980109,
|
|
"grad_norm": 0.04134693277562893,
|
|
"learning_rate": 6.0473374976128444e-05,
|
|
"loss": 0.7279,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.7340753019180677,
|
|
"grad_norm": 0.03826648717909671,
|
|
"learning_rate": 6.007586321308445e-05,
|
|
"loss": 0.722,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 0.7350224958560265,
|
|
"grad_norm": 0.041876625013751154,
|
|
"learning_rate": 5.967933484599324e-05,
|
|
"loss": 0.7488,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.7359696897939854,
|
|
"grad_norm": 0.04361394297294752,
|
|
"learning_rate": 5.928379421122557e-05,
|
|
"loss": 0.751,
|
|
"step": 3885
|
|
},
|
|
{
|
|
"epoch": 0.7369168837319441,
|
|
"grad_norm": 0.040558338035631775,
|
|
"learning_rate": 5.888924563435032e-05,
|
|
"loss": 0.7359,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.7378640776699029,
|
|
"grad_norm": 0.041973741302183905,
|
|
"learning_rate": 5.849569343008758e-05,
|
|
"loss": 0.746,
|
|
"step": 3895
|
|
},
|
|
{
|
|
"epoch": 0.7388112716078618,
|
|
"grad_norm": 0.043105183047686346,
|
|
"learning_rate": 5.8103141902261205e-05,
|
|
"loss": 0.7403,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.7397584655458205,
|
|
"grad_norm": 0.038539690413186195,
|
|
"learning_rate": 5.7711595343751806e-05,
|
|
"loss": 0.7467,
|
|
"step": 3905
|
|
},
|
|
{
|
|
"epoch": 0.7407056594837793,
|
|
"grad_norm": 0.040297657563285356,
|
|
"learning_rate": 5.732105803644986e-05,
|
|
"loss": 0.7256,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.7416528534217381,
|
|
"grad_norm": 0.040590581894694416,
|
|
"learning_rate": 5.693153425120872e-05,
|
|
"loss": 0.7301,
|
|
"step": 3915
|
|
},
|
|
{
|
|
"epoch": 0.7426000473596969,
|
|
"grad_norm": 0.039579719033313615,
|
|
"learning_rate": 5.654302824779815e-05,
|
|
"loss": 0.7343,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.7435472412976557,
|
|
"grad_norm": 0.03807616540727068,
|
|
"learning_rate": 5.6155544274857436e-05,
|
|
"loss": 0.7219,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 0.7444944352356145,
|
|
"grad_norm": 0.03907724233269258,
|
|
"learning_rate": 5.576908656984938e-05,
|
|
"loss": 0.7359,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.7454416291735733,
|
|
"grad_norm": 0.0411669903197421,
|
|
"learning_rate": 5.5383659359013516e-05,
|
|
"loss": 0.7606,
|
|
"step": 3935
|
|
},
|
|
{
|
|
"epoch": 0.7463888231115321,
|
|
"grad_norm": 0.03660616286712515,
|
|
"learning_rate": 5.499926685731999e-05,
|
|
"loss": 0.7144,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.7473360170494909,
|
|
"grad_norm": 0.04196508435156298,
|
|
"learning_rate": 5.461591326842368e-05,
|
|
"loss": 0.7268,
|
|
"step": 3945
|
|
},
|
|
{
|
|
"epoch": 0.7482832109874497,
|
|
"grad_norm": 0.03782538627658778,
|
|
"learning_rate": 5.4233602784617875e-05,
|
|
"loss": 0.7538,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.7492304049254085,
|
|
"grad_norm": 0.039917199754721966,
|
|
"learning_rate": 5.385233958678899e-05,
|
|
"loss": 0.7471,
|
|
"step": 3955
|
|
},
|
|
{
|
|
"epoch": 0.7501775988633673,
|
|
"grad_norm": 0.041462493245785374,
|
|
"learning_rate": 5.347212784437014e-05,
|
|
"loss": 0.7335,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.751124792801326,
|
|
"grad_norm": 0.04262717035805544,
|
|
"learning_rate": 5.3092971715296036e-05,
|
|
"loss": 0.7517,
|
|
"step": 3965
|
|
},
|
|
{
|
|
"epoch": 0.7520719867392849,
|
|
"grad_norm": 0.03912615098583992,
|
|
"learning_rate": 5.2714875345957364e-05,
|
|
"loss": 0.7505,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.7530191806772437,
|
|
"grad_norm": 0.03843483966970995,
|
|
"learning_rate": 5.2337842871155464e-05,
|
|
"loss": 0.729,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 0.7539663746152024,
|
|
"grad_norm": 0.04001769351762854,
|
|
"learning_rate": 5.1961878414057116e-05,
|
|
"loss": 0.743,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.7549135685531613,
|
|
"grad_norm": 0.03903544150596959,
|
|
"learning_rate": 5.158698608614928e-05,
|
|
"loss": 0.7231,
|
|
"step": 3985
|
|
},
|
|
{
|
|
"epoch": 0.7558607624911201,
|
|
"grad_norm": 0.04246943536811293,
|
|
"learning_rate": 5.1213169987194506e-05,
|
|
"loss": 0.7376,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.7568079564290788,
|
|
"grad_norm": 0.04255248262700945,
|
|
"learning_rate": 5.08404342051856e-05,
|
|
"loss": 0.769,
|
|
"step": 3995
|
|
},
|
|
{
|
|
"epoch": 0.7577551503670377,
|
|
"grad_norm": 0.038324457218321194,
|
|
"learning_rate": 5.04687828163015e-05,
|
|
"loss": 0.7171,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.7587023443049965,
|
|
"grad_norm": 0.04478588785492951,
|
|
"learning_rate": 5.0098219884862265e-05,
|
|
"loss": 0.764,
|
|
"step": 4005
|
|
},
|
|
{
|
|
"epoch": 0.7596495382429552,
|
|
"grad_norm": 0.040448163186758916,
|
|
"learning_rate": 4.9728749463284634e-05,
|
|
"loss": 0.7416,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.7605967321809141,
|
|
"grad_norm": 0.04321236917872768,
|
|
"learning_rate": 4.936037559203806e-05,
|
|
"loss": 0.754,
|
|
"step": 4015
|
|
},
|
|
{
|
|
"epoch": 0.7615439261188729,
|
|
"grad_norm": 0.041244116901498824,
|
|
"learning_rate": 4.899310229960002e-05,
|
|
"loss": 0.745,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.7624911200568316,
|
|
"grad_norm": 0.04064461271404752,
|
|
"learning_rate": 4.862693360241259e-05,
|
|
"loss": 0.7351,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 0.7634383139947905,
|
|
"grad_norm": 0.03781319026471734,
|
|
"learning_rate": 4.826187350483783e-05,
|
|
"loss": 0.7307,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.7643855079327492,
|
|
"grad_norm": 0.03974939076217195,
|
|
"learning_rate": 4.789792599911453e-05,
|
|
"loss": 0.7438,
|
|
"step": 4035
|
|
},
|
|
{
|
|
"epoch": 0.765332701870708,
|
|
"grad_norm": 0.04065565436383743,
|
|
"learning_rate": 4.753509506531436e-05,
|
|
"loss": 0.7636,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.7662798958086668,
|
|
"grad_norm": 0.04062956705746941,
|
|
"learning_rate": 4.717338467129813e-05,
|
|
"loss": 0.7569,
|
|
"step": 4045
|
|
},
|
|
{
|
|
"epoch": 0.7672270897466256,
|
|
"grad_norm": 0.041349177857734835,
|
|
"learning_rate": 4.6812798772672936e-05,
|
|
"loss": 0.7026,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.7681742836845844,
|
|
"grad_norm": 0.04354632478814716,
|
|
"learning_rate": 4.645334131274828e-05,
|
|
"loss": 0.7145,
|
|
"step": 4055
|
|
},
|
|
{
|
|
"epoch": 0.7691214776225432,
|
|
"grad_norm": 0.03936898140216215,
|
|
"learning_rate": 4.609501622249343e-05,
|
|
"loss": 0.7286,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.770068671560502,
|
|
"grad_norm": 0.03929846725341178,
|
|
"learning_rate": 4.573782742049407e-05,
|
|
"loss": 0.7304,
|
|
"step": 4065
|
|
},
|
|
{
|
|
"epoch": 0.7710158654984608,
|
|
"grad_norm": 0.04124206397422393,
|
|
"learning_rate": 4.538177881290973e-05,
|
|
"loss": 0.7306,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.7719630594364196,
|
|
"grad_norm": 0.042221939835918945,
|
|
"learning_rate": 4.502687429343106e-05,
|
|
"loss": 0.7519,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 0.7729102533743784,
|
|
"grad_norm": 0.04130656943974972,
|
|
"learning_rate": 4.4673117743236884e-05,
|
|
"loss": 0.7245,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.7738574473123372,
|
|
"grad_norm": 0.03913493498829275,
|
|
"learning_rate": 4.432051303095225e-05,
|
|
"loss": 0.7487,
|
|
"step": 4085
|
|
},
|
|
{
|
|
"epoch": 0.774804641250296,
|
|
"grad_norm": 0.037086715638808006,
|
|
"learning_rate": 4.396906401260573e-05,
|
|
"loss": 0.7308,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.7757518351882547,
|
|
"grad_norm": 0.041005709394066454,
|
|
"learning_rate": 4.361877453158749e-05,
|
|
"loss": 0.7222,
|
|
"step": 4095
|
|
},
|
|
{
|
|
"epoch": 0.7766990291262136,
|
|
"grad_norm": 0.0390323446969787,
|
|
"learning_rate": 4.3269648418607194e-05,
|
|
"loss": 0.7187,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.7776462230641724,
|
|
"grad_norm": 0.04019349411682957,
|
|
"learning_rate": 4.29216894916521e-05,
|
|
"loss": 0.7089,
|
|
"step": 4105
|
|
},
|
|
{
|
|
"epoch": 0.7785934170021311,
|
|
"grad_norm": 0.04095873534287484,
|
|
"learning_rate": 4.257490155594528e-05,
|
|
"loss": 0.7546,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.77954061094009,
|
|
"grad_norm": 0.040023174171211935,
|
|
"learning_rate": 4.2229288403903994e-05,
|
|
"loss": 0.7151,
|
|
"step": 4115
|
|
},
|
|
{
|
|
"epoch": 0.7804878048780488,
|
|
"grad_norm": 0.04008455709185164,
|
|
"learning_rate": 4.188485381509833e-05,
|
|
"loss": 0.7317,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.7814349988160075,
|
|
"grad_norm": 0.041977277262445226,
|
|
"learning_rate": 4.154160155620977e-05,
|
|
"loss": 0.73,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 0.7823821927539664,
|
|
"grad_norm": 0.04088401185241922,
|
|
"learning_rate": 4.119953538099006e-05,
|
|
"loss": 0.7639,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.7833293866919252,
|
|
"grad_norm": 0.037283132614974145,
|
|
"learning_rate": 4.085865903021999e-05,
|
|
"loss": 0.7456,
|
|
"step": 4135
|
|
},
|
|
{
|
|
"epoch": 0.7842765806298839,
|
|
"grad_norm": 0.046399869448183154,
|
|
"learning_rate": 4.051897623166879e-05,
|
|
"loss": 0.748,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.7852237745678428,
|
|
"grad_norm": 0.03912459186813119,
|
|
"learning_rate": 4.0180490700053105e-05,
|
|
"loss": 0.7518,
|
|
"step": 4145
|
|
},
|
|
{
|
|
"epoch": 0.7861709685058016,
|
|
"grad_norm": 0.04059706834387849,
|
|
"learning_rate": 3.984320613699648e-05,
|
|
"loss": 0.7174,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.7871181624437603,
|
|
"grad_norm": 0.04279168262844896,
|
|
"learning_rate": 3.950712623098892e-05,
|
|
"loss": 0.717,
|
|
"step": 4155
|
|
},
|
|
{
|
|
"epoch": 0.7880653563817192,
|
|
"grad_norm": 0.04328417157580087,
|
|
"learning_rate": 3.917225465734632e-05,
|
|
"loss": 0.7402,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.789012550319678,
|
|
"grad_norm": 0.03907178607474956,
|
|
"learning_rate": 3.883859507817061e-05,
|
|
"loss": 0.7109,
|
|
"step": 4165
|
|
},
|
|
{
|
|
"epoch": 0.7899597442576367,
|
|
"grad_norm": 0.03842633012724885,
|
|
"learning_rate": 3.850615114230949e-05,
|
|
"loss": 0.7565,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.7909069381955955,
|
|
"grad_norm": 0.04237253188344206,
|
|
"learning_rate": 3.81749264853166e-05,
|
|
"loss": 0.7489,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 0.7918541321335544,
|
|
"grad_norm": 0.04212676511762711,
|
|
"learning_rate": 3.784492472941173e-05,
|
|
"loss": 0.7506,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.7928013260715131,
|
|
"grad_norm": 0.040564694587832524,
|
|
"learning_rate": 3.751614948344116e-05,
|
|
"loss": 0.7594,
|
|
"step": 4185
|
|
},
|
|
{
|
|
"epoch": 0.7937485200094719,
|
|
"grad_norm": 0.03846320035915876,
|
|
"learning_rate": 3.718860434283832e-05,
|
|
"loss": 0.7416,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.7946957139474308,
|
|
"grad_norm": 0.04058426964727867,
|
|
"learning_rate": 3.686229288958442e-05,
|
|
"loss": 0.7703,
|
|
"step": 4195
|
|
},
|
|
{
|
|
"epoch": 0.7956429078853895,
|
|
"grad_norm": 0.03767236001284344,
|
|
"learning_rate": 3.653721869216926e-05,
|
|
"loss": 0.7344,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.7965901018233483,
|
|
"grad_norm": 0.03990592712721354,
|
|
"learning_rate": 3.621338530555207e-05,
|
|
"loss": 0.7329,
|
|
"step": 4205
|
|
},
|
|
{
|
|
"epoch": 0.7975372957613072,
|
|
"grad_norm": 0.037505327559176606,
|
|
"learning_rate": 3.589079627112298e-05,
|
|
"loss": 0.7033,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.7984844896992659,
|
|
"grad_norm": 0.03592112011887372,
|
|
"learning_rate": 3.5569455116663944e-05,
|
|
"loss": 0.75,
|
|
"step": 4215
|
|
},
|
|
{
|
|
"epoch": 0.7994316836372247,
|
|
"grad_norm": 0.037737720756345204,
|
|
"learning_rate": 3.524936535631036e-05,
|
|
"loss": 0.7178,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.8003788775751836,
|
|
"grad_norm": 0.041074083535645345,
|
|
"learning_rate": 3.49305304905126e-05,
|
|
"loss": 0.7296,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 0.8013260715131423,
|
|
"grad_norm": 0.037206587197529284,
|
|
"learning_rate": 3.461295400599759e-05,
|
|
"loss": 0.7318,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.8022732654511011,
|
|
"grad_norm": 0.04155270440503186,
|
|
"learning_rate": 3.429663937573095e-05,
|
|
"loss": 0.7643,
|
|
"step": 4235
|
|
},
|
|
{
|
|
"epoch": 0.80322045938906,
|
|
"grad_norm": 0.04200718034951944,
|
|
"learning_rate": 3.3981590058878764e-05,
|
|
"loss": 0.7303,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.8041676533270187,
|
|
"grad_norm": 0.0391722359803136,
|
|
"learning_rate": 3.36678095007699e-05,
|
|
"loss": 0.7551,
|
|
"step": 4245
|
|
},
|
|
{
|
|
"epoch": 0.8051148472649775,
|
|
"grad_norm": 0.04162052982671362,
|
|
"learning_rate": 3.335530113285832e-05,
|
|
"loss": 0.7429,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.8060620412029363,
|
|
"grad_norm": 0.038860916165607134,
|
|
"learning_rate": 3.304406837268538e-05,
|
|
"loss": 0.7304,
|
|
"step": 4255
|
|
},
|
|
{
|
|
"epoch": 0.8070092351408951,
|
|
"grad_norm": 0.03765860345115411,
|
|
"learning_rate": 3.2734114623842714e-05,
|
|
"loss": 0.7541,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.8079564290788539,
|
|
"grad_norm": 0.03985364166742812,
|
|
"learning_rate": 3.242544327593487e-05,
|
|
"loss": 0.7159,
|
|
"step": 4265
|
|
},
|
|
{
|
|
"epoch": 0.8089036230168127,
|
|
"grad_norm": 0.04096122915397548,
|
|
"learning_rate": 3.211805770454229e-05,
|
|
"loss": 0.7494,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.8098508169547715,
|
|
"grad_norm": 0.03718014396684494,
|
|
"learning_rate": 3.181196127118425e-05,
|
|
"loss": 0.7228,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 0.8107980108927303,
|
|
"grad_norm": 0.037725347863418725,
|
|
"learning_rate": 3.150715732328235e-05,
|
|
"loss": 0.7507,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.8117452048306891,
|
|
"grad_norm": 0.04354928220465528,
|
|
"learning_rate": 3.120364919412374e-05,
|
|
"loss": 0.744,
|
|
"step": 4285
|
|
},
|
|
{
|
|
"epoch": 0.8126923987686479,
|
|
"grad_norm": 0.03829035376988075,
|
|
"learning_rate": 3.090144020282469e-05,
|
|
"loss": 0.7497,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.8136395927066067,
|
|
"grad_norm": 0.04107370767206898,
|
|
"learning_rate": 3.060053365429433e-05,
|
|
"loss": 0.7087,
|
|
"step": 4295
|
|
},
|
|
{
|
|
"epoch": 0.8145867866445655,
|
|
"grad_norm": 0.04270910227284078,
|
|
"learning_rate": 3.030093283919841e-05,
|
|
"loss": 0.7301,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.8155339805825242,
|
|
"grad_norm": 0.040080109853962084,
|
|
"learning_rate": 3.000264103392348e-05,
|
|
"loss": 0.7113,
|
|
"step": 4305
|
|
},
|
|
{
|
|
"epoch": 0.8164811745204831,
|
|
"grad_norm": 0.03923392398314568,
|
|
"learning_rate": 2.9705661500540916e-05,
|
|
"loss": 0.7235,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.8174283684584419,
|
|
"grad_norm": 0.04092925596400112,
|
|
"learning_rate": 2.9409997486771332e-05,
|
|
"loss": 0.7086,
|
|
"step": 4315
|
|
},
|
|
{
|
|
"epoch": 0.8183755623964006,
|
|
"grad_norm": 0.03723866823377466,
|
|
"learning_rate": 2.911565222594904e-05,
|
|
"loss": 0.7154,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.8193227563343595,
|
|
"grad_norm": 0.04026831218954151,
|
|
"learning_rate": 2.8822628936986576e-05,
|
|
"loss": 0.7166,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 0.8202699502723183,
|
|
"grad_norm": 0.03924390038270272,
|
|
"learning_rate": 2.8530930824339725e-05,
|
|
"loss": 0.7114,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.821217144210277,
|
|
"grad_norm": 0.03857672055842463,
|
|
"learning_rate": 2.8240561077972336e-05,
|
|
"loss": 0.7275,
|
|
"step": 4335
|
|
},
|
|
{
|
|
"epoch": 0.8221643381482359,
|
|
"grad_norm": 0.03813437644170459,
|
|
"learning_rate": 2.795152287332143e-05,
|
|
"loss": 0.7407,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.8231115320861947,
|
|
"grad_norm": 0.039511538349970426,
|
|
"learning_rate": 2.766381937126246e-05,
|
|
"loss": 0.7224,
|
|
"step": 4345
|
|
},
|
|
{
|
|
"epoch": 0.8240587260241534,
|
|
"grad_norm": 0.04035022629840493,
|
|
"learning_rate": 2.737745371807484e-05,
|
|
"loss": 0.7226,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.8250059199621123,
|
|
"grad_norm": 0.041748400769491566,
|
|
"learning_rate": 2.7092429045407493e-05,
|
|
"loss": 0.7076,
|
|
"step": 4355
|
|
},
|
|
{
|
|
"epoch": 0.8259531139000711,
|
|
"grad_norm": 0.04074616252719918,
|
|
"learning_rate": 2.6808748470244596e-05,
|
|
"loss": 0.733,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.8269003078380298,
|
|
"grad_norm": 0.040304189290435735,
|
|
"learning_rate": 2.6526415094871456e-05,
|
|
"loss": 0.7275,
|
|
"step": 4365
|
|
},
|
|
{
|
|
"epoch": 0.8278475017759886,
|
|
"grad_norm": 0.04056642286405685,
|
|
"learning_rate": 2.624543200684059e-05,
|
|
"loss": 0.7419,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.8287946957139475,
|
|
"grad_norm": 0.044474637104153496,
|
|
"learning_rate": 2.5965802278938104e-05,
|
|
"loss": 0.7029,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 0.8297418896519062,
|
|
"grad_norm": 0.040065168017582885,
|
|
"learning_rate": 2.5687528969149797e-05,
|
|
"loss": 0.7375,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.830689083589865,
|
|
"grad_norm": 0.039500986446779594,
|
|
"learning_rate": 2.541061512062808e-05,
|
|
"loss": 0.7475,
|
|
"step": 4385
|
|
},
|
|
{
|
|
"epoch": 0.8316362775278239,
|
|
"grad_norm": 0.04336888291320547,
|
|
"learning_rate": 2.5135063761658465e-05,
|
|
"loss": 0.7506,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.8325834714657826,
|
|
"grad_norm": 0.04238948284006484,
|
|
"learning_rate": 2.4860877905626385e-05,
|
|
"loss": 0.7072,
|
|
"step": 4395
|
|
},
|
|
{
|
|
"epoch": 0.8335306654037414,
|
|
"grad_norm": 0.03937330992282642,
|
|
"learning_rate": 2.4588060550984517e-05,
|
|
"loss": 0.7271,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.8344778593417003,
|
|
"grad_norm": 0.04329748465343663,
|
|
"learning_rate": 2.4316614681219616e-05,
|
|
"loss": 0.7726,
|
|
"step": 4405
|
|
},
|
|
{
|
|
"epoch": 0.835425053279659,
|
|
"grad_norm": 0.04153391947066848,
|
|
"learning_rate": 2.4046543264820367e-05,
|
|
"loss": 0.7623,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.8363722472176178,
|
|
"grad_norm": 0.03864210650963097,
|
|
"learning_rate": 2.3777849255244402e-05,
|
|
"loss": 0.7335,
|
|
"step": 4415
|
|
},
|
|
{
|
|
"epoch": 0.8373194411555765,
|
|
"grad_norm": 0.03605009140484424,
|
|
"learning_rate": 2.3510535590886464e-05,
|
|
"loss": 0.7185,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.8382666350935354,
|
|
"grad_norm": 0.04343264310904382,
|
|
"learning_rate": 2.324460519504584e-05,
|
|
"loss": 0.7278,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 0.8392138290314942,
|
|
"grad_norm": 0.04222856281924883,
|
|
"learning_rate": 2.298006097589478e-05,
|
|
"loss": 0.7272,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.8401610229694529,
|
|
"grad_norm": 0.03693060964307286,
|
|
"learning_rate": 2.2716905826446553e-05,
|
|
"loss": 0.728,
|
|
"step": 4435
|
|
},
|
|
{
|
|
"epoch": 0.8411082169074118,
|
|
"grad_norm": 0.03882562220045244,
|
|
"learning_rate": 2.2455142624523632e-05,
|
|
"loss": 0.7228,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.8420554108453706,
|
|
"grad_norm": 0.04087387623558309,
|
|
"learning_rate": 2.2194774232726492e-05,
|
|
"loss": 0.7155,
|
|
"step": 4445
|
|
},
|
|
{
|
|
"epoch": 0.8430026047833293,
|
|
"grad_norm": 0.04058338081432673,
|
|
"learning_rate": 2.193580349840211e-05,
|
|
"loss": 0.7023,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.8439497987212882,
|
|
"grad_norm": 0.04404654162614751,
|
|
"learning_rate": 2.167823325361297e-05,
|
|
"loss": 0.6959,
|
|
"step": 4455
|
|
},
|
|
{
|
|
"epoch": 0.844896992659247,
|
|
"grad_norm": 0.03855188004859097,
|
|
"learning_rate": 2.1422066315106007e-05,
|
|
"loss": 0.7258,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.8458441865972057,
|
|
"grad_norm": 0.03928313344333361,
|
|
"learning_rate": 2.1167305484281814e-05,
|
|
"loss": 0.7372,
|
|
"step": 4465
|
|
},
|
|
{
|
|
"epoch": 0.8467913805351646,
|
|
"grad_norm": 0.03940286973106492,
|
|
"learning_rate": 2.0913953547164058e-05,
|
|
"loss": 0.7163,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.8477385744731234,
|
|
"grad_norm": 0.04323326221582775,
|
|
"learning_rate": 2.0662013274368854e-05,
|
|
"loss": 0.7378,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 0.8486857684110821,
|
|
"grad_norm": 0.03858572389634398,
|
|
"learning_rate": 2.041148742107471e-05,
|
|
"loss": 0.7397,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.849632962349041,
|
|
"grad_norm": 0.040751992473504785,
|
|
"learning_rate": 2.0162378726992222e-05,
|
|
"loss": 0.7581,
|
|
"step": 4485
|
|
},
|
|
{
|
|
"epoch": 0.8505801562869998,
|
|
"grad_norm": 0.03829683763062958,
|
|
"learning_rate": 1.9914689916334175e-05,
|
|
"loss": 0.6946,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.8515273502249585,
|
|
"grad_norm": 0.03695727697699672,
|
|
"learning_rate": 1.9668423697785656e-05,
|
|
"loss": 0.7331,
|
|
"step": 4495
|
|
},
|
|
{
|
|
"epoch": 0.8524745441629173,
|
|
"grad_norm": 0.03752543633546998,
|
|
"learning_rate": 1.942358276447462e-05,
|
|
"loss": 0.7281,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.8534217381008762,
|
|
"grad_norm": 0.04068189729909485,
|
|
"learning_rate": 1.9180169793942272e-05,
|
|
"loss": 0.7639,
|
|
"step": 4505
|
|
},
|
|
{
|
|
"epoch": 0.8543689320388349,
|
|
"grad_norm": 0.04048730316852887,
|
|
"learning_rate": 1.893818744811388e-05,
|
|
"loss": 0.748,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.8553161259767937,
|
|
"grad_norm": 0.04595934306302146,
|
|
"learning_rate": 1.869763837326963e-05,
|
|
"loss": 0.7799,
|
|
"step": 4515
|
|
},
|
|
{
|
|
"epoch": 0.8562633199147526,
|
|
"grad_norm": 0.04070824155336302,
|
|
"learning_rate": 1.8458525200015593e-05,
|
|
"loss": 0.7525,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.8572105138527113,
|
|
"grad_norm": 0.03956623366079343,
|
|
"learning_rate": 1.822085054325515e-05,
|
|
"loss": 0.7159,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 0.8581577077906701,
|
|
"grad_norm": 0.03807785792934746,
|
|
"learning_rate": 1.798461700216029e-05,
|
|
"loss": 0.7562,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.859104901728629,
|
|
"grad_norm": 0.03756680254096828,
|
|
"learning_rate": 1.7749827160143164e-05,
|
|
"loss": 0.7292,
|
|
"step": 4535
|
|
},
|
|
{
|
|
"epoch": 0.8600520956665877,
|
|
"grad_norm": 0.03976826946310067,
|
|
"learning_rate": 1.751648358482789e-05,
|
|
"loss": 0.7282,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.8609992896045465,
|
|
"grad_norm": 0.043044198125544504,
|
|
"learning_rate": 1.7284588828022378e-05,
|
|
"loss": 0.7152,
|
|
"step": 4545
|
|
},
|
|
{
|
|
"epoch": 0.8619464835425054,
|
|
"grad_norm": 0.04230346678410999,
|
|
"learning_rate": 1.7054145425690536e-05,
|
|
"loss": 0.7297,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.8628936774804641,
|
|
"grad_norm": 0.03893557149385341,
|
|
"learning_rate": 1.6825155897924513e-05,
|
|
"loss": 0.7239,
|
|
"step": 4555
|
|
},
|
|
{
|
|
"epoch": 0.8638408714184229,
|
|
"grad_norm": 0.037068694217922235,
|
|
"learning_rate": 1.6597622748917132e-05,
|
|
"loss": 0.7142,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.8647880653563818,
|
|
"grad_norm": 0.03991740786553955,
|
|
"learning_rate": 1.6371548466934385e-05,
|
|
"loss": 0.7308,
|
|
"step": 4565
|
|
},
|
|
{
|
|
"epoch": 0.8657352592943405,
|
|
"grad_norm": 0.04471672391377206,
|
|
"learning_rate": 1.6146935524288446e-05,
|
|
"loss": 0.7301,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.8666824532322993,
|
|
"grad_norm": 0.037180999778756545,
|
|
"learning_rate": 1.5923786377310433e-05,
|
|
"loss": 0.7203,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 0.8676296471702581,
|
|
"grad_norm": 0.04146480156497774,
|
|
"learning_rate": 1.5702103466323708e-05,
|
|
"loss": 0.7119,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.8685768411082169,
|
|
"grad_norm": 0.036947710794122034,
|
|
"learning_rate": 1.5481889215617073e-05,
|
|
"loss": 0.7196,
|
|
"step": 4585
|
|
},
|
|
{
|
|
"epoch": 0.8695240350461757,
|
|
"grad_norm": 0.03548597961029977,
|
|
"learning_rate": 1.5263146033418227e-05,
|
|
"loss": 0.7051,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.8704712289841345,
|
|
"grad_norm": 0.03955821537818952,
|
|
"learning_rate": 1.5045876311867628e-05,
|
|
"loss": 0.7206,
|
|
"step": 4595
|
|
},
|
|
{
|
|
"epoch": 0.8714184229220933,
|
|
"grad_norm": 0.038962735470389774,
|
|
"learning_rate": 1.4830082426992112e-05,
|
|
"loss": 0.7266,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.8723656168600521,
|
|
"grad_norm": 0.03843209348318749,
|
|
"learning_rate": 1.4615766738679036e-05,
|
|
"loss": 0.7236,
|
|
"step": 4605
|
|
},
|
|
{
|
|
"epoch": 0.8733128107980109,
|
|
"grad_norm": 0.03583871656951685,
|
|
"learning_rate": 1.4402931590650462e-05,
|
|
"loss": 0.7037,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 0.8742600047359697,
|
|
"grad_norm": 0.03814447238860827,
|
|
"learning_rate": 1.4191579310437412e-05,
|
|
"loss": 0.7142,
|
|
"step": 4615
|
|
},
|
|
{
|
|
"epoch": 0.8752071986739285,
|
|
"grad_norm": 0.04131298607004148,
|
|
"learning_rate": 1.398171220935459e-05,
|
|
"loss": 0.7472,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.8761543926118873,
|
|
"grad_norm": 0.04027601558619238,
|
|
"learning_rate": 1.3773332582474995e-05,
|
|
"loss": 0.7222,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 0.877101586549846,
|
|
"grad_norm": 0.03807892715571328,
|
|
"learning_rate": 1.356644270860487e-05,
|
|
"loss": 0.724,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 0.8780487804878049,
|
|
"grad_norm": 0.037140756994390095,
|
|
"learning_rate": 1.3361044850258657e-05,
|
|
"loss": 0.7313,
|
|
"step": 4635
|
|
},
|
|
{
|
|
"epoch": 0.8789959744257637,
|
|
"grad_norm": 0.040598401756478456,
|
|
"learning_rate": 1.3157141253634469e-05,
|
|
"loss": 0.7418,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.8799431683637224,
|
|
"grad_norm": 0.03636159365334358,
|
|
"learning_rate": 1.2954734148589369e-05,
|
|
"loss": 0.733,
|
|
"step": 4645
|
|
},
|
|
{
|
|
"epoch": 0.8808903623016813,
|
|
"grad_norm": 0.04100817868151093,
|
|
"learning_rate": 1.2753825748615032e-05,
|
|
"loss": 0.715,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.8818375562396401,
|
|
"grad_norm": 0.040249623789949056,
|
|
"learning_rate": 1.255441825081354e-05,
|
|
"loss": 0.7177,
|
|
"step": 4655
|
|
},
|
|
{
|
|
"epoch": 0.8827847501775988,
|
|
"grad_norm": 0.038473649906406296,
|
|
"learning_rate": 1.235651383587331e-05,
|
|
"loss": 0.735,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.8837319441155577,
|
|
"grad_norm": 0.03716433351702097,
|
|
"learning_rate": 1.2160114668045335e-05,
|
|
"loss": 0.7109,
|
|
"step": 4665
|
|
},
|
|
{
|
|
"epoch": 0.8846791380535165,
|
|
"grad_norm": 0.0366065266385142,
|
|
"learning_rate": 1.1965222895119442e-05,
|
|
"loss": 0.7098,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 0.8856263319914752,
|
|
"grad_norm": 0.041156901959537195,
|
|
"learning_rate": 1.1771840648400849e-05,
|
|
"loss": 0.7422,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 0.8865735259294341,
|
|
"grad_norm": 0.03951261248655999,
|
|
"learning_rate": 1.1579970042686843e-05,
|
|
"loss": 0.7434,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.8875207198673929,
|
|
"grad_norm": 0.042505748782389406,
|
|
"learning_rate": 1.1389613176243567e-05,
|
|
"loss": 0.7422,
|
|
"step": 4685
|
|
},
|
|
{
|
|
"epoch": 0.8884679138053516,
|
|
"grad_norm": 0.036559177807256635,
|
|
"learning_rate": 1.1200772130783259e-05,
|
|
"loss": 0.6995,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 0.8894151077433105,
|
|
"grad_norm": 0.038540183149753306,
|
|
"learning_rate": 1.1013448971441313e-05,
|
|
"loss": 0.7386,
|
|
"step": 4695
|
|
},
|
|
{
|
|
"epoch": 0.8903623016812693,
|
|
"grad_norm": 0.03778323009855525,
|
|
"learning_rate": 1.0827645746753837e-05,
|
|
"loss": 0.7293,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.891309495619228,
|
|
"grad_norm": 0.037960112685593676,
|
|
"learning_rate": 1.064336448863507e-05,
|
|
"loss": 0.7132,
|
|
"step": 4705
|
|
},
|
|
{
|
|
"epoch": 0.8922566895571868,
|
|
"grad_norm": 0.038143343588638426,
|
|
"learning_rate": 1.0460607212355343e-05,
|
|
"loss": 0.7157,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 0.8932038834951457,
|
|
"grad_norm": 0.03885540215502875,
|
|
"learning_rate": 1.0279375916518956e-05,
|
|
"loss": 0.7329,
|
|
"step": 4715
|
|
},
|
|
{
|
|
"epoch": 0.8941510774331044,
|
|
"grad_norm": 0.036527462926812734,
|
|
"learning_rate": 1.0099672583042306e-05,
|
|
"loss": 0.706,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.8950982713710632,
|
|
"grad_norm": 0.04170086636332326,
|
|
"learning_rate": 9.921499177132325e-06,
|
|
"loss": 0.7159,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 0.8960454653090221,
|
|
"grad_norm": 0.03917784068162278,
|
|
"learning_rate": 9.744857647264743e-06,
|
|
"loss": 0.7151,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 0.8969926592469808,
|
|
"grad_norm": 0.0368623059179063,
|
|
"learning_rate": 9.56974992516309e-06,
|
|
"loss": 0.7175,
|
|
"step": 4735
|
|
},
|
|
{
|
|
"epoch": 0.8979398531849396,
|
|
"grad_norm": 0.03761895854461036,
|
|
"learning_rate": 9.396177925777315e-06,
|
|
"loss": 0.7376,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.8988870471228985,
|
|
"grad_norm": 0.03943918919221873,
|
|
"learning_rate": 9.224143547263018e-06,
|
|
"loss": 0.727,
|
|
"step": 4745
|
|
},
|
|
{
|
|
"epoch": 0.8998342410608572,
|
|
"grad_norm": 0.041135228489504315,
|
|
"learning_rate": 9.053648670960634e-06,
|
|
"loss": 0.7079,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.900781434998816,
|
|
"grad_norm": 0.037659487176961826,
|
|
"learning_rate": 8.88469516137476e-06,
|
|
"loss": 0.719,
|
|
"step": 4755
|
|
},
|
|
{
|
|
"epoch": 0.9017286289367749,
|
|
"grad_norm": 0.03937100685930776,
|
|
"learning_rate": 8.717284866153967e-06,
|
|
"loss": 0.704,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.9026758228747336,
|
|
"grad_norm": 0.038814959136852366,
|
|
"learning_rate": 8.551419616070322e-06,
|
|
"loss": 0.7329,
|
|
"step": 4765
|
|
},
|
|
{
|
|
"epoch": 0.9036230168126924,
|
|
"grad_norm": 0.03791616596156232,
|
|
"learning_rate": 8.387101224999738e-06,
|
|
"loss": 0.7544,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 0.9045702107506512,
|
|
"grad_norm": 0.036920295113180866,
|
|
"learning_rate": 8.224331489901747e-06,
|
|
"loss": 0.7353,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 0.90551740468861,
|
|
"grad_norm": 0.04102354744430632,
|
|
"learning_rate": 8.063112190800114e-06,
|
|
"loss": 0.743,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.9064645986265688,
|
|
"grad_norm": 0.039647668406205074,
|
|
"learning_rate": 7.903445090763278e-06,
|
|
"loss": 0.7288,
|
|
"step": 4785
|
|
},
|
|
{
|
|
"epoch": 0.9074117925645276,
|
|
"grad_norm": 0.037692364175638995,
|
|
"learning_rate": 7.745331935885008e-06,
|
|
"loss": 0.7185,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 0.9083589865024864,
|
|
"grad_norm": 0.036226337301176685,
|
|
"learning_rate": 7.588774455265517e-06,
|
|
"loss": 0.7396,
|
|
"step": 4795
|
|
},
|
|
{
|
|
"epoch": 0.9093061804404452,
|
|
"grad_norm": 0.03741364399731542,
|
|
"learning_rate": 7.433774360992279e-06,
|
|
"loss": 0.7226,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.9102533743784039,
|
|
"grad_norm": 0.03775216663355994,
|
|
"learning_rate": 7.280333348121503e-06,
|
|
"loss": 0.716,
|
|
"step": 4805
|
|
},
|
|
{
|
|
"epoch": 0.9112005683163628,
|
|
"grad_norm": 0.03958684677935284,
|
|
"learning_rate": 7.128453094659508e-06,
|
|
"loss": 0.7364,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 0.9121477622543216,
|
|
"grad_norm": 0.03816974922462119,
|
|
"learning_rate": 6.978135261544398e-06,
|
|
"loss": 0.726,
|
|
"step": 4815
|
|
},
|
|
{
|
|
"epoch": 0.9130949561922803,
|
|
"grad_norm": 0.03916663794581213,
|
|
"learning_rate": 6.829381492627978e-06,
|
|
"loss": 0.7091,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.9140421501302392,
|
|
"grad_norm": 0.03819017711079349,
|
|
"learning_rate": 6.682193414657583e-06,
|
|
"loss": 0.7225,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 0.914989344068198,
|
|
"grad_norm": 0.03825337879044159,
|
|
"learning_rate": 6.5365726372584805e-06,
|
|
"loss": 0.7167,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 0.9159365380061567,
|
|
"grad_norm": 0.03783436042390378,
|
|
"learning_rate": 6.392520752916097e-06,
|
|
"loss": 0.7425,
|
|
"step": 4835
|
|
},
|
|
{
|
|
"epoch": 0.9168837319441155,
|
|
"grad_norm": 0.03893124825143902,
|
|
"learning_rate": 6.2500393369588505e-06,
|
|
"loss": 0.7272,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.9178309258820744,
|
|
"grad_norm": 0.03729369559567071,
|
|
"learning_rate": 6.109129947540631e-06,
|
|
"loss": 0.741,
|
|
"step": 4845
|
|
},
|
|
{
|
|
"epoch": 0.9187781198200331,
|
|
"grad_norm": 0.03912193568036812,
|
|
"learning_rate": 5.969794125623928e-06,
|
|
"loss": 0.7276,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.9197253137579919,
|
|
"grad_norm": 0.03810873022721955,
|
|
"learning_rate": 5.832033394963015e-06,
|
|
"loss": 0.7231,
|
|
"step": 4855
|
|
},
|
|
{
|
|
"epoch": 0.9206725076959508,
|
|
"grad_norm": 0.03730402627420899,
|
|
"learning_rate": 5.69584926208711e-06,
|
|
"loss": 0.7047,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.9216197016339095,
|
|
"grad_norm": 0.03667813336307107,
|
|
"learning_rate": 5.561243216284139e-06,
|
|
"loss": 0.7152,
|
|
"step": 4865
|
|
},
|
|
{
|
|
"epoch": 0.9225668955718683,
|
|
"grad_norm": 0.03964877444312505,
|
|
"learning_rate": 5.4282167295842e-06,
|
|
"loss": 0.7151,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 0.9235140895098272,
|
|
"grad_norm": 0.04352873821823642,
|
|
"learning_rate": 5.296771256743676e-06,
|
|
"loss": 0.7148,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 0.9244612834477859,
|
|
"grad_norm": 0.04158839264995075,
|
|
"learning_rate": 5.166908235229178e-06,
|
|
"loss": 0.699,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.9254084773857447,
|
|
"grad_norm": 0.03955083160304519,
|
|
"learning_rate": 5.038629085201878e-06,
|
|
"loss": 0.727,
|
|
"step": 4885
|
|
},
|
|
{
|
|
"epoch": 0.9263556713237036,
|
|
"grad_norm": 0.044593469584970485,
|
|
"learning_rate": 4.911935209502072e-06,
|
|
"loss": 0.7399,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 0.9273028652616623,
|
|
"grad_norm": 0.03845070002008913,
|
|
"learning_rate": 4.786827993633635e-06,
|
|
"loss": 0.7197,
|
|
"step": 4895
|
|
},
|
|
{
|
|
"epoch": 0.9282500591996211,
|
|
"grad_norm": 0.037419281999128515,
|
|
"learning_rate": 4.663308805749061e-06,
|
|
"loss": 0.7318,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.92919725313758,
|
|
"grad_norm": 0.03903737099095032,
|
|
"learning_rate": 4.541378996634382e-06,
|
|
"loss": 0.7339,
|
|
"step": 4905
|
|
},
|
|
{
|
|
"epoch": 0.9301444470755387,
|
|
"grad_norm": 0.03913683799560393,
|
|
"learning_rate": 4.421039899694468e-06,
|
|
"loss": 0.7229,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 0.9310916410134975,
|
|
"grad_norm": 0.03740496457952301,
|
|
"learning_rate": 4.302292830938403e-06,
|
|
"loss": 0.7138,
|
|
"step": 4915
|
|
},
|
|
{
|
|
"epoch": 0.9320388349514563,
|
|
"grad_norm": 0.03949065539672211,
|
|
"learning_rate": 4.185139088965083e-06,
|
|
"loss": 0.7036,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.9329860288894151,
|
|
"grad_norm": 0.03718724530040875,
|
|
"learning_rate": 4.06957995494911e-06,
|
|
"loss": 0.7241,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 0.9339332228273739,
|
|
"grad_norm": 0.038984046127228444,
|
|
"learning_rate": 3.955616692626612e-06,
|
|
"loss": 0.7132,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 0.9348804167653327,
|
|
"grad_norm": 0.03705946169516573,
|
|
"learning_rate": 3.843250548281584e-06,
|
|
"loss": 0.7205,
|
|
"step": 4935
|
|
},
|
|
{
|
|
"epoch": 0.9358276107032915,
|
|
"grad_norm": 0.04056507468322176,
|
|
"learning_rate": 3.7324827507321907e-06,
|
|
"loss": 0.7095,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.9367748046412503,
|
|
"grad_norm": 0.03838686948370597,
|
|
"learning_rate": 3.62331451131731e-06,
|
|
"loss": 0.7402,
|
|
"step": 4945
|
|
},
|
|
{
|
|
"epoch": 0.9377219985792091,
|
|
"grad_norm": 0.03735322865661384,
|
|
"learning_rate": 3.5157470238832975e-06,
|
|
"loss": 0.7113,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.9386691925171678,
|
|
"grad_norm": 0.03865946802077697,
|
|
"learning_rate": 3.4097814647709775e-06,
|
|
"loss": 0.7327,
|
|
"step": 4955
|
|
},
|
|
{
|
|
"epoch": 0.9396163864551267,
|
|
"grad_norm": 0.03750616620805965,
|
|
"learning_rate": 3.3054189928027386e-06,
|
|
"loss": 0.7078,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.9405635803930855,
|
|
"grad_norm": 0.040179517311287265,
|
|
"learning_rate": 3.202660749269842e-06,
|
|
"loss": 0.7168,
|
|
"step": 4965
|
|
},
|
|
{
|
|
"epoch": 0.9415107743310442,
|
|
"grad_norm": 0.038488343198023446,
|
|
"learning_rate": 3.1015078579199992e-06,
|
|
"loss": 0.7263,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 0.9424579682690031,
|
|
"grad_norm": 0.03846115414340801,
|
|
"learning_rate": 3.0019614249449818e-06,
|
|
"loss": 0.7396,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 0.9434051622069619,
|
|
"grad_norm": 0.03891780338800125,
|
|
"learning_rate": 2.9040225389686477e-06,
|
|
"loss": 0.7197,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.9443523561449206,
|
|
"grad_norm": 0.038745010189377524,
|
|
"learning_rate": 2.8076922710349836e-06,
|
|
"loss": 0.6982,
|
|
"step": 4985
|
|
},
|
|
{
|
|
"epoch": 0.9452995500828795,
|
|
"grad_norm": 0.03993490506698604,
|
|
"learning_rate": 2.7129716745963316e-06,
|
|
"loss": 0.6958,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 0.9462467440208383,
|
|
"grad_norm": 0.03974439268947578,
|
|
"learning_rate": 2.6198617855020143e-06,
|
|
"loss": 0.7312,
|
|
"step": 4995
|
|
},
|
|
{
|
|
"epoch": 0.947193937958797,
|
|
"grad_norm": 0.04146912204153186,
|
|
"learning_rate": 2.5283636219867954e-06,
|
|
"loss": 0.7385,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.9481411318967559,
|
|
"grad_norm": 0.03707229834469002,
|
|
"learning_rate": 2.43847818465997e-06,
|
|
"loss": 0.7345,
|
|
"step": 5005
|
|
},
|
|
{
|
|
"epoch": 0.9490883258347147,
|
|
"grad_norm": 0.039133364688565486,
|
|
"learning_rate": 2.3502064564942578e-06,
|
|
"loss": 0.7075,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 0.9500355197726734,
|
|
"grad_norm": 0.03622200599308819,
|
|
"learning_rate": 2.263549402815179e-06,
|
|
"loss": 0.6983,
|
|
"step": 5015
|
|
},
|
|
{
|
|
"epoch": 0.9509827137106323,
|
|
"grad_norm": 0.038295665871847934,
|
|
"learning_rate": 2.1785079712903275e-06,
|
|
"loss": 0.7334,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.9519299076485911,
|
|
"grad_norm": 0.03915418927573118,
|
|
"learning_rate": 2.095083091919214e-06,
|
|
"loss": 0.7372,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 0.9528771015865498,
|
|
"grad_norm": 0.03728899024964666,
|
|
"learning_rate": 2.0132756770229576e-06,
|
|
"loss": 0.7046,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 0.9538242955245086,
|
|
"grad_norm": 0.03775405052284275,
|
|
"learning_rate": 1.9330866212343086e-06,
|
|
"loss": 0.7143,
|
|
"step": 5035
|
|
},
|
|
{
|
|
"epoch": 0.9547714894624675,
|
|
"grad_norm": 0.038310827788718325,
|
|
"learning_rate": 1.8545168014879764e-06,
|
|
"loss": 0.7111,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.9557186834004262,
|
|
"grad_norm": 0.03835494831636439,
|
|
"learning_rate": 1.777567077010883e-06,
|
|
"loss": 0.7398,
|
|
"step": 5045
|
|
},
|
|
{
|
|
"epoch": 0.956665877338385,
|
|
"grad_norm": 0.038374737236019835,
|
|
"learning_rate": 1.7022382893129072e-06,
|
|
"loss": 0.7149,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.9576130712763439,
|
|
"grad_norm": 0.039365530128439526,
|
|
"learning_rate": 1.6285312621775903e-06,
|
|
"loss": 0.7074,
|
|
"step": 5055
|
|
},
|
|
{
|
|
"epoch": 0.9585602652143026,
|
|
"grad_norm": 0.038447131703807806,
|
|
"learning_rate": 1.5564468016531773e-06,
|
|
"loss": 0.7531,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.9595074591522614,
|
|
"grad_norm": 0.03768227162346853,
|
|
"learning_rate": 1.48598569604379e-06,
|
|
"loss": 0.7215,
|
|
"step": 5065
|
|
},
|
|
{
|
|
"epoch": 0.9604546530902203,
|
|
"grad_norm": 0.03691120803905496,
|
|
"learning_rate": 1.4171487159007843e-06,
|
|
"loss": 0.7037,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 0.961401847028179,
|
|
"grad_norm": 0.03847682888310551,
|
|
"learning_rate": 1.349936614014341e-06,
|
|
"loss": 0.7386,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 0.9623490409661378,
|
|
"grad_norm": 0.038562458996163625,
|
|
"learning_rate": 1.2843501254052368e-06,
|
|
"loss": 0.7145,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.9632962349040967,
|
|
"grad_norm": 0.039915946654852284,
|
|
"learning_rate": 1.2203899673168205e-06,
|
|
"loss": 0.723,
|
|
"step": 5085
|
|
},
|
|
{
|
|
"epoch": 0.9642434288420554,
|
|
"grad_norm": 0.03842715857163489,
|
|
"learning_rate": 1.1580568392071e-06,
|
|
"loss": 0.7092,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 0.9651906227800142,
|
|
"grad_norm": 0.040087737408694465,
|
|
"learning_rate": 1.0973514227412161e-06,
|
|
"loss": 0.7467,
|
|
"step": 5095
|
|
},
|
|
{
|
|
"epoch": 0.966137816717973,
|
|
"grad_norm": 0.03868018943060032,
|
|
"learning_rate": 1.038274381783849e-06,
|
|
"loss": 0.7034,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.9670850106559318,
|
|
"grad_norm": 0.03888690917055077,
|
|
"learning_rate": 9.80826362392073e-07,
|
|
"loss": 0.724,
|
|
"step": 5105
|
|
},
|
|
{
|
|
"epoch": 0.9680322045938906,
|
|
"grad_norm": 0.0361796924871166,
|
|
"learning_rate": 9.250079928082132e-07,
|
|
"loss": 0.736,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 0.9689793985318494,
|
|
"grad_norm": 0.03965669973295369,
|
|
"learning_rate": 8.708198834530166e-07,
|
|
"loss": 0.7261,
|
|
"step": 5115
|
|
},
|
|
{
|
|
"epoch": 0.9699265924698082,
|
|
"grad_norm": 0.039430492673631544,
|
|
"learning_rate": 8.182626269189752e-07,
|
|
"loss": 0.7242,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.970873786407767,
|
|
"grad_norm": 0.0348287855119034,
|
|
"learning_rate": 7.673367979637968e-07,
|
|
"loss": 0.7082,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 0.9718209803457258,
|
|
"grad_norm": 0.03706016619862877,
|
|
"learning_rate": 7.180429535042276e-07,
|
|
"loss": 0.7518,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 0.9727681742836846,
|
|
"grad_norm": 0.04056580951745248,
|
|
"learning_rate": 6.703816326098399e-07,
|
|
"loss": 0.7401,
|
|
"step": 5135
|
|
},
|
|
{
|
|
"epoch": 0.9737153682216434,
|
|
"grad_norm": 0.04032169255601853,
|
|
"learning_rate": 6.24353356497187e-07,
|
|
"loss": 0.7491,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.9746625621596022,
|
|
"grad_norm": 0.04118720081247295,
|
|
"learning_rate": 5.799586285241242e-07,
|
|
"loss": 0.7569,
|
|
"step": 5145
|
|
},
|
|
{
|
|
"epoch": 0.975609756097561,
|
|
"grad_norm": 0.039992603944723155,
|
|
"learning_rate": 5.371979341843136e-07,
|
|
"loss": 0.719,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.9765569500355198,
|
|
"grad_norm": 0.038975715445144526,
|
|
"learning_rate": 4.960717411018277e-07,
|
|
"loss": 0.7183,
|
|
"step": 5155
|
|
},
|
|
{
|
|
"epoch": 0.9775041439734786,
|
|
"grad_norm": 0.03558255003932097,
|
|
"learning_rate": 4.565804990261379e-07,
|
|
"loss": 0.72,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.9784513379114373,
|
|
"grad_norm": 0.041836063593222644,
|
|
"learning_rate": 4.187246398271171e-07,
|
|
"loss": 0.7227,
|
|
"step": 5165
|
|
},
|
|
{
|
|
"epoch": 0.9793985318493962,
|
|
"grad_norm": 0.0401155869593642,
|
|
"learning_rate": 3.825045774904112e-07,
|
|
"loss": 0.723,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 0.980345725787355,
|
|
"grad_norm": 0.04241569076307143,
|
|
"learning_rate": 3.4792070811280884e-07,
|
|
"loss": 0.7329,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 0.9812929197253137,
|
|
"grad_norm": 0.04084926695436618,
|
|
"learning_rate": 3.149734098979617e-07,
|
|
"loss": 0.7126,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.9822401136632726,
|
|
"grad_norm": 0.03939583534788028,
|
|
"learning_rate": 2.83663043152238e-07,
|
|
"loss": 0.7426,
|
|
"step": 5185
|
|
},
|
|
{
|
|
"epoch": 0.9831873076012313,
|
|
"grad_norm": 0.039426635966572685,
|
|
"learning_rate": 2.5398995028079184e-07,
|
|
"loss": 0.7086,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 0.9841345015391901,
|
|
"grad_norm": 0.03761530845106783,
|
|
"learning_rate": 2.2595445578381665e-07,
|
|
"loss": 0.7132,
|
|
"step": 5195
|
|
},
|
|
{
|
|
"epoch": 0.985081695477149,
|
|
"grad_norm": 0.03983372031881901,
|
|
"learning_rate": 1.9955686625299782e-07,
|
|
"loss": 0.7041,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.9860288894151077,
|
|
"grad_norm": 0.038745755637894036,
|
|
"learning_rate": 1.7479747036813207e-07,
|
|
"loss": 0.7565,
|
|
"step": 5205
|
|
},
|
|
{
|
|
"epoch": 0.9869760833530665,
|
|
"grad_norm": 0.03451565749276735,
|
|
"learning_rate": 1.5167653889401332e-07,
|
|
"loss": 0.7053,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 0.9879232772910254,
|
|
"grad_norm": 0.0377767563169325,
|
|
"learning_rate": 1.3019432467743508e-07,
|
|
"loss": 0.7165,
|
|
"step": 5215
|
|
},
|
|
{
|
|
"epoch": 0.9888704712289841,
|
|
"grad_norm": 0.03919747343677102,
|
|
"learning_rate": 1.1035106264445925e-07,
|
|
"loss": 0.7135,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.9898176651669429,
|
|
"grad_norm": 0.035742270061366835,
|
|
"learning_rate": 9.214696979781833e-08,
|
|
"loss": 0.7422,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 0.9907648591049018,
|
|
"grad_norm": 0.03988070344903895,
|
|
"learning_rate": 7.558224521455048e-08,
|
|
"loss": 0.7176,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 0.9917120530428605,
|
|
"grad_norm": 0.03844068057098456,
|
|
"learning_rate": 6.065707004383468e-08,
|
|
"loss": 0.7275,
|
|
"step": 5235
|
|
},
|
|
{
|
|
"epoch": 0.9926592469808193,
|
|
"grad_norm": 0.037145775071304184,
|
|
"learning_rate": 4.737160750500901e-08,
|
|
"loss": 0.7158,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.9936064409187781,
|
|
"grad_norm": 0.03801708244408533,
|
|
"learning_rate": 3.572600288572203e-08,
|
|
"loss": 0.7286,
|
|
"step": 5245
|
|
},
|
|
{
|
|
"epoch": 0.9945536348567369,
|
|
"grad_norm": 0.03898719180494596,
|
|
"learning_rate": 2.5720383540484002e-08,
|
|
"loss": 0.7208,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.9955008287946957,
|
|
"grad_norm": 0.03854350834493005,
|
|
"learning_rate": 1.7354858889134793e-08,
|
|
"loss": 0.7347,
|
|
"step": 5255
|
|
},
|
|
{
|
|
"epoch": 0.9964480227326545,
|
|
"grad_norm": 0.039328700531041635,
|
|
"learning_rate": 1.0629520415694759e-08,
|
|
"loss": 0.7603,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.9973952166706133,
|
|
"grad_norm": 0.03842843734298471,
|
|
"learning_rate": 5.544441667398869e-09,
|
|
"loss": 0.7204,
|
|
"step": 5265
|
|
},
|
|
{
|
|
"epoch": 0.9983424106085721,
|
|
"grad_norm": 0.03655798359456603,
|
|
"learning_rate": 2.099678253847381e-09,
|
|
"loss": 0.7179,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 0.9992896045465309,
|
|
"grad_norm": 0.03754524737942178,
|
|
"learning_rate": 2.952678464229752e-10,
|
|
"loss": 0.6995,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 0.9998579209093061,
|
|
"eval_loss": 1.1249996423721313,
|
|
"eval_runtime": 1040.1086,
|
|
"eval_samples_per_second": 188.598,
|
|
"eval_steps_per_second": 5.895,
|
|
"step": 5278
|
|
},
|
|
{
|
|
"epoch": 0.9998579209093061,
|
|
"step": 5278,
|
|
"total_flos": 768453779423232.0,
|
|
"train_loss": 0.8248233945137535,
|
|
"train_runtime": 21690.489,
|
|
"train_samples_per_second": 31.151,
|
|
"train_steps_per_second": 0.243
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 5278,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 768453779423232.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|