8093 lines
199 KiB
JSON
8093 lines
199 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.4999945685825087,
|
|
"eval_steps": 500,
|
|
"global_step": 11507,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00043451339930695114,
|
|
"grad_norm": 1.8335795402526855,
|
|
"learning_rate": 5.780346820809249e-07,
|
|
"loss": 0.1919,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0008690267986139023,
|
|
"grad_norm": 1.1079758405685425,
|
|
"learning_rate": 1.1560693641618499e-06,
|
|
"loss": 0.1616,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.0013035401979208535,
|
|
"grad_norm": 0.7856003046035767,
|
|
"learning_rate": 1.7341040462427746e-06,
|
|
"loss": 0.1325,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0017380535972278046,
|
|
"grad_norm": 0.47701773047447205,
|
|
"learning_rate": 2.3121387283236997e-06,
|
|
"loss": 0.0861,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.0021725669965347557,
|
|
"grad_norm": 0.4777753949165344,
|
|
"learning_rate": 2.890173410404625e-06,
|
|
"loss": 0.0734,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.002607080395841707,
|
|
"grad_norm": 0.44196391105651855,
|
|
"learning_rate": 3.468208092485549e-06,
|
|
"loss": 0.0565,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.003041593795148658,
|
|
"grad_norm": 0.3910512924194336,
|
|
"learning_rate": 4.046242774566474e-06,
|
|
"loss": 0.0524,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.003476107194455609,
|
|
"grad_norm": 0.43339353799819946,
|
|
"learning_rate": 4.6242774566473994e-06,
|
|
"loss": 0.0534,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.00391062059376256,
|
|
"grad_norm": 0.44343557953834534,
|
|
"learning_rate": 5.202312138728324e-06,
|
|
"loss": 0.0432,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.004345133993069511,
|
|
"grad_norm": 0.37480905652046204,
|
|
"learning_rate": 5.78034682080925e-06,
|
|
"loss": 0.0476,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.004779647392376463,
|
|
"grad_norm": 0.32702726125717163,
|
|
"learning_rate": 6.358381502890174e-06,
|
|
"loss": 0.0326,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.005214160791683414,
|
|
"grad_norm": 0.40295419096946716,
|
|
"learning_rate": 6.936416184971098e-06,
|
|
"loss": 0.0374,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.005648674190990364,
|
|
"grad_norm": 0.3500003516674042,
|
|
"learning_rate": 7.5144508670520235e-06,
|
|
"loss": 0.031,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.006083187590297316,
|
|
"grad_norm": 0.38030797243118286,
|
|
"learning_rate": 8.092485549132949e-06,
|
|
"loss": 0.0368,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.006517700989604267,
|
|
"grad_norm": 0.3058304190635681,
|
|
"learning_rate": 8.670520231213873e-06,
|
|
"loss": 0.0342,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.006952214388911218,
|
|
"grad_norm": 0.3178086280822754,
|
|
"learning_rate": 9.248554913294799e-06,
|
|
"loss": 0.0328,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.00738672778821817,
|
|
"grad_norm": 0.416713148355484,
|
|
"learning_rate": 9.826589595375723e-06,
|
|
"loss": 0.037,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.00782124118752512,
|
|
"grad_norm": 0.24077042937278748,
|
|
"learning_rate": 1.0404624277456647e-05,
|
|
"loss": 0.0313,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.008255754586832071,
|
|
"grad_norm": 0.27852290868759155,
|
|
"learning_rate": 1.0982658959537573e-05,
|
|
"loss": 0.0266,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.008690267986139023,
|
|
"grad_norm": 0.24909931421279907,
|
|
"learning_rate": 1.15606936416185e-05,
|
|
"loss": 0.0365,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.009124781385445974,
|
|
"grad_norm": 0.24574002623558044,
|
|
"learning_rate": 1.2138728323699422e-05,
|
|
"loss": 0.03,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.009559294784752925,
|
|
"grad_norm": 0.2927786111831665,
|
|
"learning_rate": 1.2716763005780348e-05,
|
|
"loss": 0.0286,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.009993808184059877,
|
|
"grad_norm": 0.31231585144996643,
|
|
"learning_rate": 1.3294797687861272e-05,
|
|
"loss": 0.0305,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.010428321583366828,
|
|
"grad_norm": 0.3059456944465637,
|
|
"learning_rate": 1.3872832369942197e-05,
|
|
"loss": 0.0313,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.010862834982673777,
|
|
"grad_norm": 0.23252910375595093,
|
|
"learning_rate": 1.4450867052023123e-05,
|
|
"loss": 0.0269,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.011297348381980729,
|
|
"grad_norm": 0.430499792098999,
|
|
"learning_rate": 1.5028901734104047e-05,
|
|
"loss": 0.0339,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.01173186178128768,
|
|
"grad_norm": 0.21920466423034668,
|
|
"learning_rate": 1.5606936416184973e-05,
|
|
"loss": 0.0233,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.012166375180594631,
|
|
"grad_norm": 0.2036123424768448,
|
|
"learning_rate": 1.6184971098265897e-05,
|
|
"loss": 0.0252,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.012600888579901583,
|
|
"grad_norm": 0.3722895681858063,
|
|
"learning_rate": 1.676300578034682e-05,
|
|
"loss": 0.027,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.013035401979208534,
|
|
"grad_norm": 0.33059456944465637,
|
|
"learning_rate": 1.7341040462427746e-05,
|
|
"loss": 0.0311,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.013469915378515485,
|
|
"grad_norm": 0.2505900263786316,
|
|
"learning_rate": 1.7919075144508673e-05,
|
|
"loss": 0.022,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.013904428777822437,
|
|
"grad_norm": 0.2660132348537445,
|
|
"learning_rate": 1.8497109826589598e-05,
|
|
"loss": 0.0282,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.014338942177129388,
|
|
"grad_norm": 0.22333598136901855,
|
|
"learning_rate": 1.9075144508670522e-05,
|
|
"loss": 0.0245,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.01477345557643634,
|
|
"grad_norm": 0.2619096338748932,
|
|
"learning_rate": 1.9653179190751446e-05,
|
|
"loss": 0.0235,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.015207968975743289,
|
|
"grad_norm": 0.26429101824760437,
|
|
"learning_rate": 1.9999993661544142e-05,
|
|
"loss": 0.0221,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.01564248237505024,
|
|
"grad_norm": 0.16357919573783875,
|
|
"learning_rate": 1.999992235400801e-05,
|
|
"loss": 0.0232,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.016076995774357193,
|
|
"grad_norm": 0.181153804063797,
|
|
"learning_rate": 1.9999771816432783e-05,
|
|
"loss": 0.0259,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.016511509173664143,
|
|
"grad_norm": 0.2861078679561615,
|
|
"learning_rate": 1.9999542050011175e-05,
|
|
"loss": 0.0215,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.016946022572971092,
|
|
"grad_norm": 0.19298051297664642,
|
|
"learning_rate": 1.999923305656364e-05,
|
|
"loss": 0.0205,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.017380535972278045,
|
|
"grad_norm": 0.16827619075775146,
|
|
"learning_rate": 1.999884483853836e-05,
|
|
"loss": 0.0251,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.017815049371584995,
|
|
"grad_norm": 0.16084188222885132,
|
|
"learning_rate": 1.9998377399011203e-05,
|
|
"loss": 0.0253,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.018249562770891948,
|
|
"grad_norm": 0.2285071611404419,
|
|
"learning_rate": 1.9997830741685734e-05,
|
|
"loss": 0.0233,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.018684076170198897,
|
|
"grad_norm": 0.1760404258966446,
|
|
"learning_rate": 1.9997204870893147e-05,
|
|
"loss": 0.0255,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.01911858956950585,
|
|
"grad_norm": 0.17383626103401184,
|
|
"learning_rate": 1.9996499791592257e-05,
|
|
"loss": 0.0224,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.0195531029688128,
|
|
"grad_norm": 0.182758167386055,
|
|
"learning_rate": 1.9995715509369456e-05,
|
|
"loss": 0.0242,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.019987616368119753,
|
|
"grad_norm": 0.22543829679489136,
|
|
"learning_rate": 1.999485203043866e-05,
|
|
"loss": 0.0204,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.020422129767426703,
|
|
"grad_norm": 0.19388648867607117,
|
|
"learning_rate": 1.9993909361641272e-05,
|
|
"loss": 0.0221,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.020856643166733656,
|
|
"grad_norm": 0.2187209576368332,
|
|
"learning_rate": 1.999288751044612e-05,
|
|
"loss": 0.0214,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.021291156566040605,
|
|
"grad_norm": 0.27493494749069214,
|
|
"learning_rate": 1.9991786484949397e-05,
|
|
"loss": 0.0247,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.021725669965347555,
|
|
"grad_norm": 0.17861245572566986,
|
|
"learning_rate": 1.99906062938746e-05,
|
|
"loss": 0.026,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.022160183364654508,
|
|
"grad_norm": 0.16928374767303467,
|
|
"learning_rate": 1.9989346946572455e-05,
|
|
"loss": 0.0244,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.022594696763961458,
|
|
"grad_norm": 0.5377978086471558,
|
|
"learning_rate": 1.998800845302086e-05,
|
|
"loss": 0.0226,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.02302921016326841,
|
|
"grad_norm": 0.15883971750736237,
|
|
"learning_rate": 1.9986590823824785e-05,
|
|
"loss": 0.0216,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.02346372356257536,
|
|
"grad_norm": 0.16129736602306366,
|
|
"learning_rate": 1.99850940702162e-05,
|
|
"loss": 0.0194,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.023898236961882313,
|
|
"grad_norm": 0.1318095475435257,
|
|
"learning_rate": 1.9983518204053976e-05,
|
|
"loss": 0.0179,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.024332750361189263,
|
|
"grad_norm": 0.13563190400600433,
|
|
"learning_rate": 1.9981863237823807e-05,
|
|
"loss": 0.0192,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.024767263760496216,
|
|
"grad_norm": 0.19693246483802795,
|
|
"learning_rate": 1.9980129184638103e-05,
|
|
"loss": 0.0192,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.025201777159803165,
|
|
"grad_norm": 0.14829251170158386,
|
|
"learning_rate": 1.9978316058235875e-05,
|
|
"loss": 0.0207,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.025636290559110115,
|
|
"grad_norm": 0.19513817131519318,
|
|
"learning_rate": 1.9976423872982646e-05,
|
|
"loss": 0.0167,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.026070803958417068,
|
|
"grad_norm": 0.20041579008102417,
|
|
"learning_rate": 1.9974452643870318e-05,
|
|
"loss": 0.0236,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.026505317357724018,
|
|
"grad_norm": 0.1073588877916336,
|
|
"learning_rate": 1.9972402386517076e-05,
|
|
"loss": 0.0136,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.02693983075703097,
|
|
"grad_norm": 0.1381412297487259,
|
|
"learning_rate": 1.997027311716724e-05,
|
|
"loss": 0.0177,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.02737434415633792,
|
|
"grad_norm": 0.1919044554233551,
|
|
"learning_rate": 1.9968064852691145e-05,
|
|
"loss": 0.0173,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.027808857555644873,
|
|
"grad_norm": 0.14668576419353485,
|
|
"learning_rate": 1.9965777610585023e-05,
|
|
"loss": 0.0214,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.028243370954951823,
|
|
"grad_norm": 0.13156649470329285,
|
|
"learning_rate": 1.9963411408970837e-05,
|
|
"loss": 0.0194,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.028677884354258776,
|
|
"grad_norm": 0.17079275846481323,
|
|
"learning_rate": 1.9960966266596164e-05,
|
|
"loss": 0.0169,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.029112397753565725,
|
|
"grad_norm": 0.09269203990697861,
|
|
"learning_rate": 1.995844220283402e-05,
|
|
"loss": 0.019,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.02954691115287268,
|
|
"grad_norm": 0.21620836853981018,
|
|
"learning_rate": 1.995583923768273e-05,
|
|
"loss": 0.0192,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.029981424552179628,
|
|
"grad_norm": 0.15077561140060425,
|
|
"learning_rate": 1.995315739176576e-05,
|
|
"loss": 0.0216,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.030415937951486578,
|
|
"grad_norm": 0.21490907669067383,
|
|
"learning_rate": 1.9950396686331543e-05,
|
|
"loss": 0.0182,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.03085045135079353,
|
|
"grad_norm": 0.1409023404121399,
|
|
"learning_rate": 1.9947557143253337e-05,
|
|
"loss": 0.0174,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.03128496475010048,
|
|
"grad_norm": 0.11508463323116302,
|
|
"learning_rate": 1.9944638785029024e-05,
|
|
"loss": 0.0155,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.03171947814940743,
|
|
"grad_norm": 0.10164955258369446,
|
|
"learning_rate": 1.9941641634780942e-05,
|
|
"loss": 0.0148,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.032153991548714386,
|
|
"grad_norm": 0.18500874936580658,
|
|
"learning_rate": 1.993856571625572e-05,
|
|
"loss": 0.0157,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.03258850494802133,
|
|
"grad_norm": 0.3012203872203827,
|
|
"learning_rate": 1.9935411053824057e-05,
|
|
"loss": 0.0161,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.033023018347328285,
|
|
"grad_norm": 0.1304362714290619,
|
|
"learning_rate": 1.9932177672480544e-05,
|
|
"loss": 0.0177,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.03345753174663524,
|
|
"grad_norm": 0.20605377852916718,
|
|
"learning_rate": 1.992886559784348e-05,
|
|
"loss": 0.0188,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.033892045145942185,
|
|
"grad_norm": 0.14996512234210968,
|
|
"learning_rate": 1.9925474856154645e-05,
|
|
"loss": 0.014,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.03432655854524914,
|
|
"grad_norm": 0.2049788534641266,
|
|
"learning_rate": 1.992200547427911e-05,
|
|
"loss": 0.0206,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.03476107194455609,
|
|
"grad_norm": 0.16461384296417236,
|
|
"learning_rate": 1.9918457479705012e-05,
|
|
"loss": 0.0153,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.035195585343863044,
|
|
"grad_norm": 0.13572026789188385,
|
|
"learning_rate": 1.9914830900543346e-05,
|
|
"loss": 0.02,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.03563009874316999,
|
|
"grad_norm": 0.1633780300617218,
|
|
"learning_rate": 1.991112576552774e-05,
|
|
"loss": 0.0203,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.03606461214247694,
|
|
"grad_norm": 0.14304646849632263,
|
|
"learning_rate": 1.9907342104014213e-05,
|
|
"loss": 0.0166,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.036499125541783896,
|
|
"grad_norm": 0.09156439453363419,
|
|
"learning_rate": 1.9903479945980964e-05,
|
|
"loss": 0.0144,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.03693363894109085,
|
|
"grad_norm": 0.103766530752182,
|
|
"learning_rate": 1.9899539322028128e-05,
|
|
"loss": 0.0169,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.037368152340397795,
|
|
"grad_norm": 0.18641023337841034,
|
|
"learning_rate": 1.9895520263377523e-05,
|
|
"loss": 0.0201,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.03780266573970475,
|
|
"grad_norm": 0.156049445271492,
|
|
"learning_rate": 1.989142280187242e-05,
|
|
"loss": 0.0191,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.0382371791390117,
|
|
"grad_norm": 0.19983117282390594,
|
|
"learning_rate": 1.9887246969977266e-05,
|
|
"loss": 0.0145,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.03867169253831865,
|
|
"grad_norm": 0.14561262726783752,
|
|
"learning_rate": 1.9882992800777458e-05,
|
|
"loss": 0.0132,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.0391062059376256,
|
|
"grad_norm": 0.13224513828754425,
|
|
"learning_rate": 1.987866032797905e-05,
|
|
"loss": 0.0144,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.03954071933693255,
|
|
"grad_norm": 0.1451365202665329,
|
|
"learning_rate": 1.987424958590851e-05,
|
|
"loss": 0.0156,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.039975232736239506,
|
|
"grad_norm": 0.18520617485046387,
|
|
"learning_rate": 1.9869760609512435e-05,
|
|
"loss": 0.0159,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.04040974613554645,
|
|
"grad_norm": 0.15158046782016754,
|
|
"learning_rate": 1.9865193434357284e-05,
|
|
"loss": 0.0169,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.040844259534853405,
|
|
"grad_norm": 0.18894895911216736,
|
|
"learning_rate": 1.9860548096629084e-05,
|
|
"loss": 0.015,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.04127877293416036,
|
|
"grad_norm": 0.15075677633285522,
|
|
"learning_rate": 1.985582463313314e-05,
|
|
"loss": 0.0186,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.04171328633346731,
|
|
"grad_norm": 0.14799635112285614,
|
|
"learning_rate": 1.985102308129377e-05,
|
|
"loss": 0.0172,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.04214779973277426,
|
|
"grad_norm": 0.14480791985988617,
|
|
"learning_rate": 1.9846143479153986e-05,
|
|
"loss": 0.0181,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.04258231313208121,
|
|
"grad_norm": 0.15664492547512054,
|
|
"learning_rate": 1.9841185865375186e-05,
|
|
"loss": 0.0206,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.043016826531388164,
|
|
"grad_norm": 0.1123814582824707,
|
|
"learning_rate": 1.9836150279236878e-05,
|
|
"loss": 0.0146,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.04345133993069511,
|
|
"grad_norm": 0.16255418956279755,
|
|
"learning_rate": 1.9831036760636334e-05,
|
|
"loss": 0.0176,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.04388585333000206,
|
|
"grad_norm": 0.1457909196615219,
|
|
"learning_rate": 1.98258453500883e-05,
|
|
"loss": 0.0183,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.044320366729309016,
|
|
"grad_norm": 0.15886881947517395,
|
|
"learning_rate": 1.982057608872466e-05,
|
|
"loss": 0.0221,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.04475488012861597,
|
|
"grad_norm": 0.1742968112230301,
|
|
"learning_rate": 1.981522901829411e-05,
|
|
"loss": 0.0181,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.045189393527922915,
|
|
"grad_norm": 0.12152547389268875,
|
|
"learning_rate": 1.9809804181161857e-05,
|
|
"loss": 0.019,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.04562390692722987,
|
|
"grad_norm": 0.16744455695152283,
|
|
"learning_rate": 1.9804301620309223e-05,
|
|
"loss": 0.0175,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.04605842032653682,
|
|
"grad_norm": 0.21038441359996796,
|
|
"learning_rate": 1.9798721379333363e-05,
|
|
"loss": 0.0171,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.046492933725843774,
|
|
"grad_norm": 0.17659246921539307,
|
|
"learning_rate": 1.9793063502446894e-05,
|
|
"loss": 0.0149,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.04692744712515072,
|
|
"grad_norm": 0.10321566462516785,
|
|
"learning_rate": 1.978732803447754e-05,
|
|
"loss": 0.0161,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.04736196052445767,
|
|
"grad_norm": 0.16017237305641174,
|
|
"learning_rate": 1.9781515020867793e-05,
|
|
"loss": 0.0161,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.047796473923764626,
|
|
"grad_norm": 0.13999983668327332,
|
|
"learning_rate": 1.9775624507674543e-05,
|
|
"loss": 0.0126,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.04823098732307157,
|
|
"grad_norm": 0.08775936812162399,
|
|
"learning_rate": 1.9769656541568703e-05,
|
|
"loss": 0.0125,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.048665500722378525,
|
|
"grad_norm": 0.11605579406023026,
|
|
"learning_rate": 1.9763611169834865e-05,
|
|
"loss": 0.0161,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.04910001412168548,
|
|
"grad_norm": 0.16576404869556427,
|
|
"learning_rate": 1.9757488440370904e-05,
|
|
"loss": 0.0137,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.04953452752099243,
|
|
"grad_norm": 0.11074787378311157,
|
|
"learning_rate": 1.9751288401687603e-05,
|
|
"loss": 0.0124,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.04996904092029938,
|
|
"grad_norm": 0.11228064447641373,
|
|
"learning_rate": 1.9745011102908277e-05,
|
|
"loss": 0.0179,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.05040355431960633,
|
|
"grad_norm": 0.13807277381420135,
|
|
"learning_rate": 1.9738656593768372e-05,
|
|
"loss": 0.0142,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.050838067718913284,
|
|
"grad_norm": 0.13158458471298218,
|
|
"learning_rate": 1.9732224924615083e-05,
|
|
"loss": 0.0165,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.05127258111822023,
|
|
"grad_norm": 0.13589569926261902,
|
|
"learning_rate": 1.9725716146406948e-05,
|
|
"loss": 0.0196,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.05170709451752718,
|
|
"grad_norm": 0.08806575834751129,
|
|
"learning_rate": 1.9719130310713438e-05,
|
|
"loss": 0.0123,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.052141607916834136,
|
|
"grad_norm": 0.1415325552225113,
|
|
"learning_rate": 1.971246746971456e-05,
|
|
"loss": 0.0177,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.05257612131614109,
|
|
"grad_norm": 0.10823646932840347,
|
|
"learning_rate": 1.9705727676200443e-05,
|
|
"loss": 0.0125,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.053010634715448035,
|
|
"grad_norm": 0.15576744079589844,
|
|
"learning_rate": 1.9698910983570907e-05,
|
|
"loss": 0.0167,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.05344514811475499,
|
|
"grad_norm": 0.19083163142204285,
|
|
"learning_rate": 1.9692017445835057e-05,
|
|
"loss": 0.016,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.05387966151406194,
|
|
"grad_norm": 0.17201757431030273,
|
|
"learning_rate": 1.968504711761084e-05,
|
|
"loss": 0.0199,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.054314174913368894,
|
|
"grad_norm": 0.129535973072052,
|
|
"learning_rate": 1.9678000054124626e-05,
|
|
"loss": 0.0175,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.05474868831267584,
|
|
"grad_norm": 0.1300555020570755,
|
|
"learning_rate": 1.9670876311210763e-05,
|
|
"loss": 0.0159,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.05518320171198279,
|
|
"grad_norm": 0.17606855928897858,
|
|
"learning_rate": 1.9663675945311125e-05,
|
|
"loss": 0.0163,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.055617715111289746,
|
|
"grad_norm": 0.12871307134628296,
|
|
"learning_rate": 1.9656399013474686e-05,
|
|
"loss": 0.0153,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.05605222851059669,
|
|
"grad_norm": 0.12462568283081055,
|
|
"learning_rate": 1.9649045573357053e-05,
|
|
"loss": 0.0155,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.056486741909903646,
|
|
"grad_norm": 0.10148585587739944,
|
|
"learning_rate": 1.9641615683220017e-05,
|
|
"loss": 0.0173,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.0569212553092106,
|
|
"grad_norm": 0.1246766597032547,
|
|
"learning_rate": 1.9634109401931076e-05,
|
|
"loss": 0.0146,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.05735576870851755,
|
|
"grad_norm": 0.10318879783153534,
|
|
"learning_rate": 1.962652678896299e-05,
|
|
"loss": 0.0161,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.0577902821078245,
|
|
"grad_norm": 0.14044098556041718,
|
|
"learning_rate": 1.9618867904393303e-05,
|
|
"loss": 0.0134,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.05822479550713145,
|
|
"grad_norm": 0.1771666258573532,
|
|
"learning_rate": 1.9611132808903854e-05,
|
|
"loss": 0.014,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.058659308906438404,
|
|
"grad_norm": 0.13592560589313507,
|
|
"learning_rate": 1.960332156378031e-05,
|
|
"loss": 0.0154,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.05909382230574536,
|
|
"grad_norm": 0.1836591511964798,
|
|
"learning_rate": 1.9595434230911676e-05,
|
|
"loss": 0.015,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.0595283357050523,
|
|
"grad_norm": 0.1101338267326355,
|
|
"learning_rate": 1.9587470872789813e-05,
|
|
"loss": 0.0137,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.059962849104359256,
|
|
"grad_norm": 0.11457395553588867,
|
|
"learning_rate": 1.957943155250892e-05,
|
|
"loss": 0.0139,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.06039736250366621,
|
|
"grad_norm": 0.11563432216644287,
|
|
"learning_rate": 1.9571316333765066e-05,
|
|
"loss": 0.0141,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.060831875902973155,
|
|
"grad_norm": 0.21389071643352509,
|
|
"learning_rate": 1.9563125280855655e-05,
|
|
"loss": 0.0148,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.06126638930228011,
|
|
"grad_norm": 0.12076102942228317,
|
|
"learning_rate": 1.9554858458678935e-05,
|
|
"loss": 0.0136,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.06170090270158706,
|
|
"grad_norm": 0.14556382596492767,
|
|
"learning_rate": 1.9546515932733482e-05,
|
|
"loss": 0.0134,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.062135416100894014,
|
|
"grad_norm": 0.10479048639535904,
|
|
"learning_rate": 1.9538097769117673e-05,
|
|
"loss": 0.0159,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.06256992950020096,
|
|
"grad_norm": 0.1761564463376999,
|
|
"learning_rate": 1.9529604034529167e-05,
|
|
"loss": 0.0169,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.06300444289950792,
|
|
"grad_norm": 0.11594516783952713,
|
|
"learning_rate": 1.952103479626438e-05,
|
|
"loss": 0.014,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.06343895629881487,
|
|
"grad_norm": 0.16869281232357025,
|
|
"learning_rate": 1.951239012221795e-05,
|
|
"loss": 0.0132,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.06387346969812181,
|
|
"grad_norm": 0.12788125872612,
|
|
"learning_rate": 1.9503670080882196e-05,
|
|
"loss": 0.0166,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.06430798309742877,
|
|
"grad_norm": 0.14619000256061554,
|
|
"learning_rate": 1.949487474134657e-05,
|
|
"loss": 0.0163,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.06474249649673572,
|
|
"grad_norm": 0.12555566430091858,
|
|
"learning_rate": 1.9486004173297127e-05,
|
|
"loss": 0.0133,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.06517700989604266,
|
|
"grad_norm": 0.17209427058696747,
|
|
"learning_rate": 1.9477058447015958e-05,
|
|
"loss": 0.0173,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.06561152329534962,
|
|
"grad_norm": 0.11963009834289551,
|
|
"learning_rate": 1.9468037633380638e-05,
|
|
"loss": 0.0142,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.06604603669465657,
|
|
"grad_norm": 0.21612398326396942,
|
|
"learning_rate": 1.9458941803863662e-05,
|
|
"loss": 0.0151,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.06648055009396352,
|
|
"grad_norm": 0.10102367401123047,
|
|
"learning_rate": 1.9449771030531884e-05,
|
|
"loss": 0.0135,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.06691506349327048,
|
|
"grad_norm": 0.12625013291835785,
|
|
"learning_rate": 1.9440525386045938e-05,
|
|
"loss": 0.0166,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.06734957689257742,
|
|
"grad_norm": 0.14682821929454803,
|
|
"learning_rate": 1.9431204943659673e-05,
|
|
"loss": 0.014,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.06778409029188437,
|
|
"grad_norm": 0.07684620469808578,
|
|
"learning_rate": 1.9421809777219566e-05,
|
|
"loss": 0.0124,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.06821860369119133,
|
|
"grad_norm": 0.17129260301589966,
|
|
"learning_rate": 1.9412339961164125e-05,
|
|
"loss": 0.0198,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.06865311709049828,
|
|
"grad_norm": 0.1657508760690689,
|
|
"learning_rate": 1.9402795570523337e-05,
|
|
"loss": 0.0153,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.06908763048980524,
|
|
"grad_norm": 0.1310373842716217,
|
|
"learning_rate": 1.9393176680918023e-05,
|
|
"loss": 0.0143,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.06952214388911218,
|
|
"grad_norm": 0.11203637719154358,
|
|
"learning_rate": 1.938348336855928e-05,
|
|
"loss": 0.0161,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.06995665728841913,
|
|
"grad_norm": 0.10988133400678635,
|
|
"learning_rate": 1.9373715710247855e-05,
|
|
"loss": 0.0157,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.07039117068772609,
|
|
"grad_norm": 0.12275862693786621,
|
|
"learning_rate": 1.9363873783373538e-05,
|
|
"loss": 0.0145,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.07082568408703303,
|
|
"grad_norm": 0.0957581177353859,
|
|
"learning_rate": 1.935395766591457e-05,
|
|
"loss": 0.0122,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.07126019748633998,
|
|
"grad_norm": 0.1373092085123062,
|
|
"learning_rate": 1.934396743643699e-05,
|
|
"loss": 0.0104,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.07169471088564694,
|
|
"grad_norm": 0.11010999977588654,
|
|
"learning_rate": 1.9333903174094042e-05,
|
|
"loss": 0.0129,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.07212922428495389,
|
|
"grad_norm": 0.1349271684885025,
|
|
"learning_rate": 1.9323764958625538e-05,
|
|
"loss": 0.0146,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.07256373768426083,
|
|
"grad_norm": 0.12207391858100891,
|
|
"learning_rate": 1.931355287035722e-05,
|
|
"loss": 0.0146,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.07299825108356779,
|
|
"grad_norm": 0.1334318071603775,
|
|
"learning_rate": 1.9303266990200132e-05,
|
|
"loss": 0.0143,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.07343276448287474,
|
|
"grad_norm": 0.151198148727417,
|
|
"learning_rate": 1.9292907399649974e-05,
|
|
"loss": 0.0108,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.0738672778821817,
|
|
"grad_norm": 0.14836585521697998,
|
|
"learning_rate": 1.928247418078646e-05,
|
|
"loss": 0.0158,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.07430179128148864,
|
|
"grad_norm": 0.11701719462871552,
|
|
"learning_rate": 1.927196741627267e-05,
|
|
"loss": 0.0165,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.07473630468079559,
|
|
"grad_norm": 0.14887355268001556,
|
|
"learning_rate": 1.926138718935438e-05,
|
|
"loss": 0.0125,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.07517081808010255,
|
|
"grad_norm": 0.09698404371738434,
|
|
"learning_rate": 1.9250733583859426e-05,
|
|
"loss": 0.0143,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.0756053314794095,
|
|
"grad_norm": 0.12021283805370331,
|
|
"learning_rate": 1.9240006684197018e-05,
|
|
"loss": 0.016,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.07603984487871644,
|
|
"grad_norm": 0.1289375126361847,
|
|
"learning_rate": 1.9229206575357086e-05,
|
|
"loss": 0.0196,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.0764743582780234,
|
|
"grad_norm": 0.09976855665445328,
|
|
"learning_rate": 1.9218333342909595e-05,
|
|
"loss": 0.0137,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.07690887167733035,
|
|
"grad_norm": 0.14193719625473022,
|
|
"learning_rate": 1.9207387073003882e-05,
|
|
"loss": 0.0125,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.0773433850766373,
|
|
"grad_norm": 0.11004018038511276,
|
|
"learning_rate": 1.9196367852367958e-05,
|
|
"loss": 0.0132,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.07777789847594425,
|
|
"grad_norm": 0.08808097243309021,
|
|
"learning_rate": 1.918527576830783e-05,
|
|
"loss": 0.0159,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.0782124118752512,
|
|
"grad_norm": 0.10530976951122284,
|
|
"learning_rate": 1.9174110908706803e-05,
|
|
"loss": 0.0152,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.07864692527455816,
|
|
"grad_norm": 0.1097542867064476,
|
|
"learning_rate": 1.916287336202479e-05,
|
|
"loss": 0.0119,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.0790814386738651,
|
|
"grad_norm": 0.09065093100070953,
|
|
"learning_rate": 1.9151563217297612e-05,
|
|
"loss": 0.0124,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.07951595207317205,
|
|
"grad_norm": 0.1255994588136673,
|
|
"learning_rate": 1.9140180564136285e-05,
|
|
"loss": 0.0136,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.07995046547247901,
|
|
"grad_norm": 0.14145658910274506,
|
|
"learning_rate": 1.912872549272631e-05,
|
|
"loss": 0.0157,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.08038497887178596,
|
|
"grad_norm": 0.12845374643802643,
|
|
"learning_rate": 1.9117198093826973e-05,
|
|
"loss": 0.0139,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.0808194922710929,
|
|
"grad_norm": 0.13135316967964172,
|
|
"learning_rate": 1.910559845877061e-05,
|
|
"loss": 0.0126,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.08125400567039986,
|
|
"grad_norm": 0.09882261604070663,
|
|
"learning_rate": 1.9093926679461883e-05,
|
|
"loss": 0.012,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.08168851906970681,
|
|
"grad_norm": 0.12513484060764313,
|
|
"learning_rate": 1.9082182848377066e-05,
|
|
"loss": 0.012,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.08212303246901376,
|
|
"grad_norm": 0.11127033084630966,
|
|
"learning_rate": 1.90703670585633e-05,
|
|
"loss": 0.0138,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.08255754586832072,
|
|
"grad_norm": 0.09939371049404144,
|
|
"learning_rate": 1.9058479403637867e-05,
|
|
"loss": 0.014,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.08299205926762766,
|
|
"grad_norm": 0.15272824466228485,
|
|
"learning_rate": 1.9046519977787424e-05,
|
|
"loss": 0.0116,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.08342657266693462,
|
|
"grad_norm": 0.11801430583000183,
|
|
"learning_rate": 1.9034488875767296e-05,
|
|
"loss": 0.0117,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.08386108606624157,
|
|
"grad_norm": 0.12052637338638306,
|
|
"learning_rate": 1.9022386192900682e-05,
|
|
"loss": 0.0169,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.08429559946554852,
|
|
"grad_norm": 0.13256338238716125,
|
|
"learning_rate": 1.9010212025077938e-05,
|
|
"loss": 0.0125,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.08473011286485548,
|
|
"grad_norm": 0.09443484246730804,
|
|
"learning_rate": 1.8997966468755785e-05,
|
|
"loss": 0.014,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.08516462626416242,
|
|
"grad_norm": 0.10231075435876846,
|
|
"learning_rate": 1.898564962095657e-05,
|
|
"loss": 0.0107,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.08559913966346937,
|
|
"grad_norm": 0.10564391314983368,
|
|
"learning_rate": 1.8973261579267486e-05,
|
|
"loss": 0.0129,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.08603365306277633,
|
|
"grad_norm": 0.14694425463676453,
|
|
"learning_rate": 1.8960802441839794e-05,
|
|
"loss": 0.0118,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.08646816646208327,
|
|
"grad_norm": 0.14167533814907074,
|
|
"learning_rate": 1.894827230738806e-05,
|
|
"loss": 0.0136,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.08690267986139022,
|
|
"grad_norm": 0.11116527765989304,
|
|
"learning_rate": 1.8935671275189356e-05,
|
|
"loss": 0.0136,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.08733719326069718,
|
|
"grad_norm": 0.10359754413366318,
|
|
"learning_rate": 1.8922999445082484e-05,
|
|
"loss": 0.0115,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.08777170666000413,
|
|
"grad_norm": 0.10434489697217941,
|
|
"learning_rate": 1.8910256917467183e-05,
|
|
"loss": 0.0118,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.08820622005931109,
|
|
"grad_norm": 0.1596977263689041,
|
|
"learning_rate": 1.8897443793303334e-05,
|
|
"loss": 0.0119,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.08864073345861803,
|
|
"grad_norm": 0.11361852288246155,
|
|
"learning_rate": 1.888456017411016e-05,
|
|
"loss": 0.013,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.08907524685792498,
|
|
"grad_norm": 0.10693772882223129,
|
|
"learning_rate": 1.8871606161965416e-05,
|
|
"loss": 0.0142,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.08950976025723194,
|
|
"grad_norm": 0.11561683565378189,
|
|
"learning_rate": 1.8858581859504587e-05,
|
|
"loss": 0.0297,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.08994427365653888,
|
|
"grad_norm": 0.2715816795825958,
|
|
"learning_rate": 1.8845487369920076e-05,
|
|
"loss": 0.0105,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.09037878705584583,
|
|
"grad_norm": 0.08823924511671066,
|
|
"learning_rate": 1.883232279696038e-05,
|
|
"loss": 0.014,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.09081330045515279,
|
|
"grad_norm": 0.2058764398097992,
|
|
"learning_rate": 1.8819088244929275e-05,
|
|
"loss": 0.0129,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.09124781385445974,
|
|
"grad_norm": 0.11656977981328964,
|
|
"learning_rate": 1.8805783818684976e-05,
|
|
"loss": 0.0127,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.09168232725376668,
|
|
"grad_norm": 0.14761896431446075,
|
|
"learning_rate": 1.8792409623639325e-05,
|
|
"loss": 0.0149,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.09211684065307364,
|
|
"grad_norm": 0.18636038899421692,
|
|
"learning_rate": 1.8778965765756946e-05,
|
|
"loss": 0.0152,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.09255135405238059,
|
|
"grad_norm": 0.11357667297124863,
|
|
"learning_rate": 1.8765452351554408e-05,
|
|
"loss": 0.0134,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.09298586745168755,
|
|
"grad_norm": 0.26886075735092163,
|
|
"learning_rate": 1.875186948809937e-05,
|
|
"loss": 0.0108,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.0934203808509945,
|
|
"grad_norm": 0.1092400997877121,
|
|
"learning_rate": 1.8738217283009747e-05,
|
|
"loss": 0.0135,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.09385489425030144,
|
|
"grad_norm": 0.138901486992836,
|
|
"learning_rate": 1.872449584445286e-05,
|
|
"loss": 0.0116,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.0942894076496084,
|
|
"grad_norm": 0.0775412917137146,
|
|
"learning_rate": 1.8710705281144557e-05,
|
|
"loss": 0.015,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.09472392104891535,
|
|
"grad_norm": 0.12415778636932373,
|
|
"learning_rate": 1.869684570234838e-05,
|
|
"loss": 0.0135,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.09515843444822229,
|
|
"grad_norm": 0.15818843245506287,
|
|
"learning_rate": 1.8682917217874675e-05,
|
|
"loss": 0.0153,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.09559294784752925,
|
|
"grad_norm": 0.15443842113018036,
|
|
"learning_rate": 1.8668919938079738e-05,
|
|
"loss": 0.0146,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.0960274612468362,
|
|
"grad_norm": 0.14416638016700745,
|
|
"learning_rate": 1.8654853973864928e-05,
|
|
"loss": 0.0144,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.09646197464614314,
|
|
"grad_norm": 0.14622808992862701,
|
|
"learning_rate": 1.8640719436675806e-05,
|
|
"loss": 0.0127,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.0968964880454501,
|
|
"grad_norm": 0.10530474781990051,
|
|
"learning_rate": 1.862651643850123e-05,
|
|
"loss": 0.0132,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.09733100144475705,
|
|
"grad_norm": 0.13297906517982483,
|
|
"learning_rate": 1.8612245091872484e-05,
|
|
"loss": 0.0096,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.097765514844064,
|
|
"grad_norm": 0.09163651615381241,
|
|
"learning_rate": 1.8597905509862386e-05,
|
|
"loss": 0.0142,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.09820002824337096,
|
|
"grad_norm": 0.09490437805652618,
|
|
"learning_rate": 1.8583497806084377e-05,
|
|
"loss": 0.0104,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.0986345416426779,
|
|
"grad_norm": 0.12775211036205292,
|
|
"learning_rate": 1.856902209469164e-05,
|
|
"loss": 0.0182,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.09906905504198486,
|
|
"grad_norm": 0.14316552877426147,
|
|
"learning_rate": 1.8554478490376186e-05,
|
|
"loss": 0.0141,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.09950356844129181,
|
|
"grad_norm": 0.4003185033798218,
|
|
"learning_rate": 1.8539867108367937e-05,
|
|
"loss": 0.01,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.09993808184059876,
|
|
"grad_norm": 0.14583055675029755,
|
|
"learning_rate": 1.8525188064433827e-05,
|
|
"loss": 0.0143,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.10037259523990572,
|
|
"grad_norm": 0.18036730587482452,
|
|
"learning_rate": 1.8510441474876893e-05,
|
|
"loss": 0.0136,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.10080710863921266,
|
|
"grad_norm": 0.17284591495990753,
|
|
"learning_rate": 1.8495627456535316e-05,
|
|
"loss": 0.0127,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.10124162203851961,
|
|
"grad_norm": 0.11936520785093307,
|
|
"learning_rate": 1.8480746126781544e-05,
|
|
"loss": 0.0123,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.10167613543782657,
|
|
"grad_norm": 0.11931222677230835,
|
|
"learning_rate": 1.846579760352132e-05,
|
|
"loss": 0.0125,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.10211064883713351,
|
|
"grad_norm": 0.10272102057933807,
|
|
"learning_rate": 1.845078200519277e-05,
|
|
"loss": 0.0176,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.10254516223644046,
|
|
"grad_norm": 0.2629345655441284,
|
|
"learning_rate": 1.8435699450765467e-05,
|
|
"loss": 0.0127,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.10297967563574742,
|
|
"grad_norm": 0.1450505554676056,
|
|
"learning_rate": 1.8420550059739476e-05,
|
|
"loss": 0.0121,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.10341418903505437,
|
|
"grad_norm": 0.1271376609802246,
|
|
"learning_rate": 1.840533395214441e-05,
|
|
"loss": 0.012,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.10384870243436133,
|
|
"grad_norm": 0.11844556778669357,
|
|
"learning_rate": 1.839005124853849e-05,
|
|
"loss": 0.0166,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.10428321583366827,
|
|
"grad_norm": 0.16082872450351715,
|
|
"learning_rate": 1.837470207000757e-05,
|
|
"loss": 0.0122,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.10471772923297522,
|
|
"grad_norm": 0.12687557935714722,
|
|
"learning_rate": 1.83592865381642e-05,
|
|
"loss": 0.0118,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.10515224263228218,
|
|
"grad_norm": 0.17328034341335297,
|
|
"learning_rate": 1.8343804775146646e-05,
|
|
"loss": 0.0142,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.10558675603158912,
|
|
"grad_norm": 0.09038952738046646,
|
|
"learning_rate": 1.8328256903617928e-05,
|
|
"loss": 0.0156,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.10602126943089607,
|
|
"grad_norm": 0.096098393201828,
|
|
"learning_rate": 1.8312643046764854e-05,
|
|
"loss": 0.0151,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.10645578283020303,
|
|
"grad_norm": 0.10437561571598053,
|
|
"learning_rate": 1.829696332829703e-05,
|
|
"loss": 0.0129,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.10689029622950998,
|
|
"grad_norm": 0.13580961525440216,
|
|
"learning_rate": 1.8281217872445894e-05,
|
|
"loss": 0.0114,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.10732480962881692,
|
|
"grad_norm": 0.1774880290031433,
|
|
"learning_rate": 1.8265406803963723e-05,
|
|
"loss": 0.0135,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.10775932302812388,
|
|
"grad_norm": 0.12563155591487885,
|
|
"learning_rate": 1.8249530248122643e-05,
|
|
"loss": 0.0118,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.10819383642743083,
|
|
"grad_norm": 0.0847230926156044,
|
|
"learning_rate": 1.8233588330713648e-05,
|
|
"loss": 0.0141,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.10862834982673779,
|
|
"grad_norm": 0.11921481043100357,
|
|
"learning_rate": 1.8217581178045588e-05,
|
|
"loss": 0.0097,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.10906286322604473,
|
|
"grad_norm": 0.14641766250133514,
|
|
"learning_rate": 1.8201508916944187e-05,
|
|
"loss": 0.0134,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.10949737662535168,
|
|
"grad_norm": 0.10427770018577576,
|
|
"learning_rate": 1.818537167475102e-05,
|
|
"loss": 0.012,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.10993189002465864,
|
|
"grad_norm": 0.09014574438333511,
|
|
"learning_rate": 1.816916957932251e-05,
|
|
"loss": 0.0111,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.11036640342396559,
|
|
"grad_norm": 0.221757173538208,
|
|
"learning_rate": 1.815290275902892e-05,
|
|
"loss": 0.0135,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.11080091682327253,
|
|
"grad_norm": 0.1362486183643341,
|
|
"learning_rate": 1.813657134275333e-05,
|
|
"loss": 0.0112,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.11123543022257949,
|
|
"grad_norm": 0.08970905840396881,
|
|
"learning_rate": 1.812017545989063e-05,
|
|
"loss": 0.013,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.11166994362188644,
|
|
"grad_norm": 0.122174933552742,
|
|
"learning_rate": 1.810371524034646e-05,
|
|
"loss": 0.0127,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.11210445702119338,
|
|
"grad_norm": 0.10636741667985916,
|
|
"learning_rate": 1.808719081453622e-05,
|
|
"loss": 0.0113,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.11253897042050034,
|
|
"grad_norm": 0.08577335625886917,
|
|
"learning_rate": 1.8070602313384018e-05,
|
|
"loss": 0.0121,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.11297348381980729,
|
|
"grad_norm": 0.11767273396253586,
|
|
"learning_rate": 1.8053949868321637e-05,
|
|
"loss": 0.0111,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.11340799721911425,
|
|
"grad_norm": 0.1318366676568985,
|
|
"learning_rate": 1.803723361128748e-05,
|
|
"loss": 0.013,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.1138425106184212,
|
|
"grad_norm": 0.08728184551000595,
|
|
"learning_rate": 1.8020453674725557e-05,
|
|
"loss": 0.0128,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.11427702401772814,
|
|
"grad_norm": 0.07697049528360367,
|
|
"learning_rate": 1.8003610191584394e-05,
|
|
"loss": 0.0094,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.1147115374170351,
|
|
"grad_norm": 0.07770522683858871,
|
|
"learning_rate": 1.7986703295316018e-05,
|
|
"loss": 0.011,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.11514605081634205,
|
|
"grad_norm": 0.13932470977306366,
|
|
"learning_rate": 1.7969733119874866e-05,
|
|
"loss": 0.0131,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.115580564215649,
|
|
"grad_norm": 0.09085828810930252,
|
|
"learning_rate": 1.795269979971675e-05,
|
|
"loss": 0.0113,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.11601507761495596,
|
|
"grad_norm": 0.1439436376094818,
|
|
"learning_rate": 1.7935603469797784e-05,
|
|
"loss": 0.0119,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.1164495910142629,
|
|
"grad_norm": 0.08274544030427933,
|
|
"learning_rate": 1.7918444265573308e-05,
|
|
"loss": 0.0149,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.11688410441356985,
|
|
"grad_norm": 0.15263070166110992,
|
|
"learning_rate": 1.7901222322996815e-05,
|
|
"loss": 0.0124,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.11731861781287681,
|
|
"grad_norm": 0.059803709387779236,
|
|
"learning_rate": 1.788393777851889e-05,
|
|
"loss": 0.0117,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.11775313121218375,
|
|
"grad_norm": 0.10974439978599548,
|
|
"learning_rate": 1.7866590769086104e-05,
|
|
"loss": 0.0146,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.11818764461149071,
|
|
"grad_norm": 0.11613568663597107,
|
|
"learning_rate": 1.7849181432139946e-05,
|
|
"loss": 0.0134,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.11862215801079766,
|
|
"grad_norm": 0.0861983597278595,
|
|
"learning_rate": 1.7831709905615744e-05,
|
|
"loss": 0.0123,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.1190566714101046,
|
|
"grad_norm": 0.0727258250117302,
|
|
"learning_rate": 1.781417632794153e-05,
|
|
"loss": 0.0106,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.11949118480941157,
|
|
"grad_norm": 0.09521336853504181,
|
|
"learning_rate": 1.7796580838037e-05,
|
|
"loss": 0.0108,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.11992569820871851,
|
|
"grad_norm": 0.08138874173164368,
|
|
"learning_rate": 1.777892357531236e-05,
|
|
"loss": 0.011,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.12036021160802546,
|
|
"grad_norm": 0.10241363942623138,
|
|
"learning_rate": 1.776120467966727e-05,
|
|
"loss": 0.0131,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.12079472500733242,
|
|
"grad_norm": 0.11157892644405365,
|
|
"learning_rate": 1.774342429148969e-05,
|
|
"loss": 0.0107,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.12122923840663936,
|
|
"grad_norm": 0.08816739171743393,
|
|
"learning_rate": 1.7725582551654804e-05,
|
|
"loss": 0.0107,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.12166375180594631,
|
|
"grad_norm": 0.0863705575466156,
|
|
"learning_rate": 1.7707679601523882e-05,
|
|
"loss": 0.0111,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.12209826520525327,
|
|
"grad_norm": 0.12580861151218414,
|
|
"learning_rate": 1.7689715582943167e-05,
|
|
"loss": 0.0118,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.12253277860456022,
|
|
"grad_norm": 0.12607578933238983,
|
|
"learning_rate": 1.7671690638242763e-05,
|
|
"loss": 0.0136,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.12296729200386716,
|
|
"grad_norm": 0.12284299731254578,
|
|
"learning_rate": 1.7653604910235474e-05,
|
|
"loss": 0.0123,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.12340180540317412,
|
|
"grad_norm": 0.10298939794301987,
|
|
"learning_rate": 1.763545854221571e-05,
|
|
"loss": 0.0097,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.12383631880248107,
|
|
"grad_norm": 0.0938117504119873,
|
|
"learning_rate": 1.761725167795834e-05,
|
|
"loss": 0.0117,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.12427083220178803,
|
|
"grad_norm": 0.1595860719680786,
|
|
"learning_rate": 1.7598984461717532e-05,
|
|
"loss": 0.015,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.12470534560109497,
|
|
"grad_norm": 0.1603042334318161,
|
|
"learning_rate": 1.758065703822564e-05,
|
|
"loss": 0.0106,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.12513985900040192,
|
|
"grad_norm": 0.10560432821512222,
|
|
"learning_rate": 1.756226955269204e-05,
|
|
"loss": 0.011,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.12557437239970887,
|
|
"grad_norm": 0.1174912303686142,
|
|
"learning_rate": 1.7543822150801975e-05,
|
|
"loss": 0.0126,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.12600888579901584,
|
|
"grad_norm": 0.09937790781259537,
|
|
"learning_rate": 1.7525314978715425e-05,
|
|
"loss": 0.0129,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.1264433991983228,
|
|
"grad_norm": 0.10457811504602432,
|
|
"learning_rate": 1.7506748183065925e-05,
|
|
"loss": 0.0137,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.12687791259762973,
|
|
"grad_norm": 0.15306922793388367,
|
|
"learning_rate": 1.7488121910959405e-05,
|
|
"loss": 0.0116,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.12731242599693668,
|
|
"grad_norm": 0.0991855189204216,
|
|
"learning_rate": 1.7469436309973046e-05,
|
|
"loss": 0.0125,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.12774693939624363,
|
|
"grad_norm": 0.20841960608959198,
|
|
"learning_rate": 1.7450691528154087e-05,
|
|
"loss": 0.0134,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.12818145279555057,
|
|
"grad_norm": 0.09408193081617355,
|
|
"learning_rate": 1.7431887714018653e-05,
|
|
"loss": 0.0153,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.12861596619485755,
|
|
"grad_norm": 0.1006455197930336,
|
|
"learning_rate": 1.74130250165506e-05,
|
|
"loss": 0.0105,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.1290504795941645,
|
|
"grad_norm": 0.0960792750120163,
|
|
"learning_rate": 1.7394103585200316e-05,
|
|
"loss": 0.0128,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.12948499299347144,
|
|
"grad_norm": 0.15480937063694,
|
|
"learning_rate": 1.737512356988353e-05,
|
|
"loss": 0.0136,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.12991950639277838,
|
|
"grad_norm": 0.07801657170057297,
|
|
"learning_rate": 1.7356085120980154e-05,
|
|
"loss": 0.0105,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.13035401979208533,
|
|
"grad_norm": 0.0841207206249237,
|
|
"learning_rate": 1.7336988389333064e-05,
|
|
"loss": 0.0133,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.1307885331913923,
|
|
"grad_norm": 0.08529253304004669,
|
|
"learning_rate": 1.731783352624691e-05,
|
|
"loss": 0.0118,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.13122304659069925,
|
|
"grad_norm": 0.09804724156856537,
|
|
"learning_rate": 1.7298620683486927e-05,
|
|
"loss": 0.0117,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.1316575599900062,
|
|
"grad_norm": 0.09919776022434235,
|
|
"learning_rate": 1.7279350013277725e-05,
|
|
"loss": 0.0126,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.13209207338931314,
|
|
"grad_norm": 0.11483841389417648,
|
|
"learning_rate": 1.726002166830209e-05,
|
|
"loss": 0.011,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.1325265867886201,
|
|
"grad_norm": 0.12246759980916977,
|
|
"learning_rate": 1.7240635801699755e-05,
|
|
"loss": 0.0131,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.13296110018792703,
|
|
"grad_norm": 0.13971829414367676,
|
|
"learning_rate": 1.7221192567066215e-05,
|
|
"loss": 0.0103,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.133395613587234,
|
|
"grad_norm": 0.14394456148147583,
|
|
"learning_rate": 1.720169211845149e-05,
|
|
"loss": 0.0131,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.13383012698654095,
|
|
"grad_norm": 0.13212616741657257,
|
|
"learning_rate": 1.718213461035891e-05,
|
|
"loss": 0.0127,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.1342646403858479,
|
|
"grad_norm": 0.10326551645994186,
|
|
"learning_rate": 1.716252019774389e-05,
|
|
"loss": 0.0104,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.13469915378515485,
|
|
"grad_norm": 0.11226284503936768,
|
|
"learning_rate": 1.7142849036012706e-05,
|
|
"loss": 0.0116,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.1351336671844618,
|
|
"grad_norm": 0.10741928964853287,
|
|
"learning_rate": 1.712312128102126e-05,
|
|
"loss": 0.0107,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.13556818058376874,
|
|
"grad_norm": 0.3138485550880432,
|
|
"learning_rate": 1.710333708907384e-05,
|
|
"loss": 0.015,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.1360026939830757,
|
|
"grad_norm": 0.13233919441699982,
|
|
"learning_rate": 1.70834966169219e-05,
|
|
"loss": 0.0104,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.13643720738238266,
|
|
"grad_norm": 0.09749718010425568,
|
|
"learning_rate": 1.7063600021762798e-05,
|
|
"loss": 0.0112,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.1368717207816896,
|
|
"grad_norm": 0.11438994854688644,
|
|
"learning_rate": 1.7043647461238557e-05,
|
|
"loss": 0.0154,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.13730623418099655,
|
|
"grad_norm": 0.11394909024238586,
|
|
"learning_rate": 1.702363909343462e-05,
|
|
"loss": 0.0132,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.1377407475803035,
|
|
"grad_norm": 0.07735557854175568,
|
|
"learning_rate": 1.7003575076878593e-05,
|
|
"loss": 0.0119,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.13817526097961047,
|
|
"grad_norm": 0.08114404231309891,
|
|
"learning_rate": 1.6983455570538996e-05,
|
|
"loss": 0.0106,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.13860977437891742,
|
|
"grad_norm": 0.08775005489587784,
|
|
"learning_rate": 1.696328073382399e-05,
|
|
"loss": 0.0108,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.13904428777822436,
|
|
"grad_norm": 0.10539509356021881,
|
|
"learning_rate": 1.694305072658013e-05,
|
|
"loss": 0.0142,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.1394788011775313,
|
|
"grad_norm": 0.11596210300922394,
|
|
"learning_rate": 1.6922765709091085e-05,
|
|
"loss": 0.0116,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.13991331457683825,
|
|
"grad_norm": 0.11444628238677979,
|
|
"learning_rate": 1.6902425842076372e-05,
|
|
"loss": 0.0099,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.1403478279761452,
|
|
"grad_norm": 0.11727698147296906,
|
|
"learning_rate": 1.6882031286690095e-05,
|
|
"loss": 0.0099,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.14078234137545217,
|
|
"grad_norm": 0.06600234657526016,
|
|
"learning_rate": 1.6861582204519648e-05,
|
|
"loss": 0.0113,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.14121685477475912,
|
|
"grad_norm": 0.1237931102514267,
|
|
"learning_rate": 1.6841078757584445e-05,
|
|
"loss": 0.0109,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.14165136817406607,
|
|
"grad_norm": 0.12242930382490158,
|
|
"learning_rate": 1.6820521108334643e-05,
|
|
"loss": 0.0135,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.142085881573373,
|
|
"grad_norm": 0.1355576366186142,
|
|
"learning_rate": 1.6799909419649835e-05,
|
|
"loss": 0.0118,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.14252039497267996,
|
|
"grad_norm": 0.10697627067565918,
|
|
"learning_rate": 1.6779243854837784e-05,
|
|
"loss": 0.0113,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.14295490837198693,
|
|
"grad_norm": 0.12463831156492233,
|
|
"learning_rate": 1.675852457763311e-05,
|
|
"loss": 0.0106,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.14338942177129388,
|
|
"grad_norm": 0.1064586415886879,
|
|
"learning_rate": 1.6737751752196005e-05,
|
|
"loss": 0.0089,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.14382393517060083,
|
|
"grad_norm": 0.1283196061849594,
|
|
"learning_rate": 1.6716925543110916e-05,
|
|
"loss": 0.0105,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.14425844856990777,
|
|
"grad_norm": 0.09701595455408096,
|
|
"learning_rate": 1.669604611538527e-05,
|
|
"loss": 0.0112,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.14469296196921472,
|
|
"grad_norm": 0.06893642246723175,
|
|
"learning_rate": 1.6675113634448136e-05,
|
|
"loss": 0.0105,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.14512747536852166,
|
|
"grad_norm": 0.13216637074947357,
|
|
"learning_rate": 1.6654128266148926e-05,
|
|
"loss": 0.0135,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.14556198876782864,
|
|
"grad_norm": 0.09184638410806656,
|
|
"learning_rate": 1.6633090176756092e-05,
|
|
"loss": 0.0103,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.14599650216713558,
|
|
"grad_norm": 0.07556001096963882,
|
|
"learning_rate": 1.6611999532955783e-05,
|
|
"loss": 0.011,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.14643101556644253,
|
|
"grad_norm": 0.045008886605501175,
|
|
"learning_rate": 1.6590856501850562e-05,
|
|
"loss": 0.0093,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.14686552896574948,
|
|
"grad_norm": 0.07397887110710144,
|
|
"learning_rate": 1.6569661250958042e-05,
|
|
"loss": 0.0083,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.14730004236505642,
|
|
"grad_norm": 0.09333399683237076,
|
|
"learning_rate": 1.6548413948209584e-05,
|
|
"loss": 0.0116,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.1477345557643634,
|
|
"grad_norm": 0.09300416707992554,
|
|
"learning_rate": 1.6527114761948957e-05,
|
|
"loss": 0.0124,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.14816906916367034,
|
|
"grad_norm": 0.0548076257109642,
|
|
"learning_rate": 1.6505763860931002e-05,
|
|
"loss": 0.0104,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.1486035825629773,
|
|
"grad_norm": 0.09666861593723297,
|
|
"learning_rate": 1.6484361414320312e-05,
|
|
"loss": 0.0119,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.14903809596228423,
|
|
"grad_norm": 0.1338212788105011,
|
|
"learning_rate": 1.6462907591689875e-05,
|
|
"loss": 0.0128,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.14947260936159118,
|
|
"grad_norm": 0.11156731843948364,
|
|
"learning_rate": 1.644140256301972e-05,
|
|
"loss": 0.0108,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.14990712276089813,
|
|
"grad_norm": 0.09104102104902267,
|
|
"learning_rate": 1.6419846498695605e-05,
|
|
"loss": 0.0124,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.1503416361602051,
|
|
"grad_norm": 0.13596515357494354,
|
|
"learning_rate": 1.639823956950764e-05,
|
|
"loss": 0.0157,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.15077614955951205,
|
|
"grad_norm": 0.13181833922863007,
|
|
"learning_rate": 1.6376581946648928e-05,
|
|
"loss": 0.0111,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.151210662958819,
|
|
"grad_norm": 0.08273448050022125,
|
|
"learning_rate": 1.6354873801714236e-05,
|
|
"loss": 0.0089,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.15164517635812594,
|
|
"grad_norm": 0.17430652678012848,
|
|
"learning_rate": 1.6333115306698625e-05,
|
|
"loss": 0.0092,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.15207968975743288,
|
|
"grad_norm": 0.10232073068618774,
|
|
"learning_rate": 1.6311306633996064e-05,
|
|
"loss": 0.0094,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.15251420315673986,
|
|
"grad_norm": 0.11144959181547165,
|
|
"learning_rate": 1.62894479563981e-05,
|
|
"loss": 0.0114,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.1529487165560468,
|
|
"grad_norm": 0.0800149068236351,
|
|
"learning_rate": 1.6267539447092463e-05,
|
|
"loss": 0.0099,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.15338322995535375,
|
|
"grad_norm": 0.09478965401649475,
|
|
"learning_rate": 1.6245581279661708e-05,
|
|
"loss": 0.0127,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.1538177433546607,
|
|
"grad_norm": 0.10897224396467209,
|
|
"learning_rate": 1.6223573628081826e-05,
|
|
"loss": 0.0101,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.15425225675396764,
|
|
"grad_norm": 0.06671247631311417,
|
|
"learning_rate": 1.620151666672089e-05,
|
|
"loss": 0.0091,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.1546867701532746,
|
|
"grad_norm": 0.10703366249799728,
|
|
"learning_rate": 1.617941057033764e-05,
|
|
"loss": 0.0139,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.15512128355258156,
|
|
"grad_norm": 0.09891420602798462,
|
|
"learning_rate": 1.6157255514080134e-05,
|
|
"loss": 0.0119,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.1555557969518885,
|
|
"grad_norm": 0.10249175131320953,
|
|
"learning_rate": 1.6135051673484323e-05,
|
|
"loss": 0.0123,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.15599031035119545,
|
|
"grad_norm": 0.12472105771303177,
|
|
"learning_rate": 1.6112799224472686e-05,
|
|
"loss": 0.0102,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.1564248237505024,
|
|
"grad_norm": 0.048791538923978806,
|
|
"learning_rate": 1.6090498343352844e-05,
|
|
"loss": 0.0106,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.15685933714980935,
|
|
"grad_norm": 0.08500496298074722,
|
|
"learning_rate": 1.606814920681613e-05,
|
|
"loss": 0.0109,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.15729385054911632,
|
|
"grad_norm": 0.07313550263643265,
|
|
"learning_rate": 1.6045751991936213e-05,
|
|
"loss": 0.0099,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.15772836394842327,
|
|
"grad_norm": 0.14947198331356049,
|
|
"learning_rate": 1.60233068761677e-05,
|
|
"loss": 0.0122,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.1581628773477302,
|
|
"grad_norm": 0.17013369500637054,
|
|
"learning_rate": 1.60008140373447e-05,
|
|
"loss": 0.0089,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.15859739074703716,
|
|
"grad_norm": 0.07465286552906036,
|
|
"learning_rate": 1.5978273653679458e-05,
|
|
"loss": 0.0088,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.1590319041463441,
|
|
"grad_norm": 0.0930761843919754,
|
|
"learning_rate": 1.5955685903760905e-05,
|
|
"loss": 0.0101,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.15946641754565105,
|
|
"grad_norm": 0.1549626737833023,
|
|
"learning_rate": 1.593305096655326e-05,
|
|
"loss": 0.0103,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.15990093094495803,
|
|
"grad_norm": 0.07674683630466461,
|
|
"learning_rate": 1.591036902139461e-05,
|
|
"loss": 0.0113,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.16033544434426497,
|
|
"grad_norm": 0.06362561136484146,
|
|
"learning_rate": 1.5887640247995495e-05,
|
|
"loss": 0.0122,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.16076995774357192,
|
|
"grad_norm": 0.09524749219417572,
|
|
"learning_rate": 1.5864864826437473e-05,
|
|
"loss": 0.0093,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.16120447114287886,
|
|
"grad_norm": 0.10837098211050034,
|
|
"learning_rate": 1.5842042937171696e-05,
|
|
"loss": 0.0099,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.1616389845421858,
|
|
"grad_norm": 0.1316443234682083,
|
|
"learning_rate": 1.5819174761017485e-05,
|
|
"loss": 0.0115,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.16207349794149278,
|
|
"grad_norm": 0.08324498683214188,
|
|
"learning_rate": 1.57962604791609e-05,
|
|
"loss": 0.0101,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.16250801134079973,
|
|
"grad_norm": 0.12263214588165283,
|
|
"learning_rate": 1.5773300273153296e-05,
|
|
"loss": 0.0107,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.16294252474010668,
|
|
"grad_norm": 0.2212323546409607,
|
|
"learning_rate": 1.5750294324909886e-05,
|
|
"loss": 0.0111,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.16337703813941362,
|
|
"grad_norm": 0.07996786385774612,
|
|
"learning_rate": 1.57272428167083e-05,
|
|
"loss": 0.0113,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.16381155153872057,
|
|
"grad_norm": 0.10634341835975647,
|
|
"learning_rate": 1.570414593118715e-05,
|
|
"loss": 0.0115,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.16424606493802751,
|
|
"grad_norm": 0.1128893718123436,
|
|
"learning_rate": 1.5681003851344568e-05,
|
|
"loss": 0.0105,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.1646805783373345,
|
|
"grad_norm": 0.07602597773075104,
|
|
"learning_rate": 1.5657816760536767e-05,
|
|
"loss": 0.0096,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.16511509173664143,
|
|
"grad_norm": 0.12339795380830765,
|
|
"learning_rate": 1.5634584842476588e-05,
|
|
"loss": 0.0107,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.16554960513594838,
|
|
"grad_norm": 0.21459102630615234,
|
|
"learning_rate": 1.5611308281232038e-05,
|
|
"loss": 0.0121,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.16598411853525533,
|
|
"grad_norm": 0.07586748898029327,
|
|
"learning_rate": 1.5587987261224827e-05,
|
|
"loss": 0.0094,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.16641863193456227,
|
|
"grad_norm": 0.119880810379982,
|
|
"learning_rate": 1.556462196722893e-05,
|
|
"loss": 0.0132,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.16685314533386925,
|
|
"grad_norm": 0.10775720328092575,
|
|
"learning_rate": 1.55412125843691e-05,
|
|
"loss": 0.0091,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.1672876587331762,
|
|
"grad_norm": 0.1075601577758789,
|
|
"learning_rate": 1.5517759298119406e-05,
|
|
"loss": 0.0108,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.16772217213248314,
|
|
"grad_norm": 0.10800988227128983,
|
|
"learning_rate": 1.5494262294301768e-05,
|
|
"loss": 0.012,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.16815668553179008,
|
|
"grad_norm": 0.08098742365837097,
|
|
"learning_rate": 1.547072175908449e-05,
|
|
"loss": 0.0106,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.16859119893109703,
|
|
"grad_norm": 0.11291100829839706,
|
|
"learning_rate": 1.5447137878980768e-05,
|
|
"loss": 0.0074,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.16902571233040398,
|
|
"grad_norm": 0.13343898952007294,
|
|
"learning_rate": 1.5423510840847228e-05,
|
|
"loss": 0.0123,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.16946022572971095,
|
|
"grad_norm": 0.11526608467102051,
|
|
"learning_rate": 1.5399840831882442e-05,
|
|
"loss": 0.0139,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.1698947391290179,
|
|
"grad_norm": 0.07974295318126678,
|
|
"learning_rate": 1.5376128039625438e-05,
|
|
"loss": 0.0106,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.17032925252832484,
|
|
"grad_norm": 0.08782044053077698,
|
|
"learning_rate": 1.535237265195422e-05,
|
|
"loss": 0.0108,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.1707637659276318,
|
|
"grad_norm": 0.22049036622047424,
|
|
"learning_rate": 1.5328574857084277e-05,
|
|
"loss": 0.0164,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.17119827932693873,
|
|
"grad_norm": 0.11717794090509415,
|
|
"learning_rate": 1.53047348435671e-05,
|
|
"loss": 0.009,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.1716327927262457,
|
|
"grad_norm": 0.0919695645570755,
|
|
"learning_rate": 1.5280852800288672e-05,
|
|
"loss": 0.0093,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.17206730612555265,
|
|
"grad_norm": 0.11691190302371979,
|
|
"learning_rate": 1.5256928916467986e-05,
|
|
"loss": 0.0078,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.1725018195248596,
|
|
"grad_norm": 0.07742350548505783,
|
|
"learning_rate": 1.5232963381655536e-05,
|
|
"loss": 0.0108,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.17293633292416655,
|
|
"grad_norm": 0.07558136433362961,
|
|
"learning_rate": 1.5208956385731824e-05,
|
|
"loss": 0.008,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.1733708463234735,
|
|
"grad_norm": 0.10943376272916794,
|
|
"learning_rate": 1.5184908118905853e-05,
|
|
"loss": 0.0141,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.17380535972278044,
|
|
"grad_norm": 0.05014868825674057,
|
|
"learning_rate": 1.5160818771713609e-05,
|
|
"loss": 0.0082,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.1742398731220874,
|
|
"grad_norm": 0.09473850578069687,
|
|
"learning_rate": 1.5136688535016571e-05,
|
|
"loss": 0.0098,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.17467438652139436,
|
|
"grad_norm": 0.08004864305257797,
|
|
"learning_rate": 1.5112517600000179e-05,
|
|
"loss": 0.0132,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.1751088999207013,
|
|
"grad_norm": 0.07450365275144577,
|
|
"learning_rate": 1.5088306158172334e-05,
|
|
"loss": 0.0107,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.17554341332000825,
|
|
"grad_norm": 0.18986289203166962,
|
|
"learning_rate": 1.5064054401361872e-05,
|
|
"loss": 0.0098,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.1759779267193152,
|
|
"grad_norm": 0.08691735565662384,
|
|
"learning_rate": 1.5039762521717054e-05,
|
|
"loss": 0.0099,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.17641244011862217,
|
|
"grad_norm": 0.07385091483592987,
|
|
"learning_rate": 1.5015430711704027e-05,
|
|
"loss": 0.0109,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.17684695351792912,
|
|
"grad_norm": 0.141652449965477,
|
|
"learning_rate": 1.4991059164105318e-05,
|
|
"loss": 0.0113,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.17728146691723606,
|
|
"grad_norm": 0.08523985743522644,
|
|
"learning_rate": 1.496664807201829e-05,
|
|
"loss": 0.0098,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.177715980316543,
|
|
"grad_norm": 0.09805174171924591,
|
|
"learning_rate": 1.494219762885362e-05,
|
|
"loss": 0.01,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.17815049371584996,
|
|
"grad_norm": 0.08430161327123642,
|
|
"learning_rate": 1.4917708028333779e-05,
|
|
"loss": 0.0134,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.1785850071151569,
|
|
"grad_norm": 0.08099672198295593,
|
|
"learning_rate": 1.4893179464491461e-05,
|
|
"loss": 0.0098,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.17901952051446388,
|
|
"grad_norm": 0.11928766220808029,
|
|
"learning_rate": 1.4868612131668095e-05,
|
|
"loss": 0.0094,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.17945403391377082,
|
|
"grad_norm": 0.11964277923107147,
|
|
"learning_rate": 1.4844006224512254e-05,
|
|
"loss": 0.0108,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.17988854731307777,
|
|
"grad_norm": 0.077316053211689,
|
|
"learning_rate": 1.4819361937978162e-05,
|
|
"loss": 0.009,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.18032306071238471,
|
|
"grad_norm": 0.11310229450464249,
|
|
"learning_rate": 1.4794679467324106e-05,
|
|
"loss": 0.0083,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.18075757411169166,
|
|
"grad_norm": 0.09045251458883286,
|
|
"learning_rate": 1.4769959008110922e-05,
|
|
"loss": 0.0095,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.18119208751099863,
|
|
"grad_norm": 0.09859993308782578,
|
|
"learning_rate": 1.4745200756200418e-05,
|
|
"loss": 0.0085,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.18162660091030558,
|
|
"grad_norm": 0.09493473172187805,
|
|
"learning_rate": 1.4720404907753849e-05,
|
|
"loss": 0.0085,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.18206111430961253,
|
|
"grad_norm": 0.10200127214193344,
|
|
"learning_rate": 1.4695571659230343e-05,
|
|
"loss": 0.0108,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.18249562770891947,
|
|
"grad_norm": 0.10617578774690628,
|
|
"learning_rate": 1.4670701207385354e-05,
|
|
"loss": 0.0113,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.18293014110822642,
|
|
"grad_norm": 0.0633573904633522,
|
|
"learning_rate": 1.46457937492691e-05,
|
|
"loss": 0.0104,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.18336465450753336,
|
|
"grad_norm": 0.17527233064174652,
|
|
"learning_rate": 1.4620849482224996e-05,
|
|
"loss": 0.0103,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.18379916790684034,
|
|
"grad_norm": 0.11846215277910233,
|
|
"learning_rate": 1.459586860388811e-05,
|
|
"loss": 0.0107,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.18423368130614728,
|
|
"grad_norm": 0.06739620864391327,
|
|
"learning_rate": 1.4570851312183572e-05,
|
|
"loss": 0.0081,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.18466819470545423,
|
|
"grad_norm": 0.07695576548576355,
|
|
"learning_rate": 1.4545797805325017e-05,
|
|
"loss": 0.0087,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.18510270810476118,
|
|
"grad_norm": 0.08914867043495178,
|
|
"learning_rate": 1.4520708281813023e-05,
|
|
"loss": 0.0113,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.18553722150406812,
|
|
"grad_norm": 0.10220155119895935,
|
|
"learning_rate": 1.4495582940433525e-05,
|
|
"loss": 0.0086,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.1859717349033751,
|
|
"grad_norm": 0.12287895381450653,
|
|
"learning_rate": 1.4470421980256253e-05,
|
|
"loss": 0.0121,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.18640624830268204,
|
|
"grad_norm": 0.11932995170354843,
|
|
"learning_rate": 1.4445225600633128e-05,
|
|
"loss": 0.0088,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.186840761701989,
|
|
"grad_norm": 0.08786104619503021,
|
|
"learning_rate": 1.4419994001196727e-05,
|
|
"loss": 0.0096,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.18727527510129593,
|
|
"grad_norm": 0.13282403349876404,
|
|
"learning_rate": 1.4394727381858655e-05,
|
|
"loss": 0.0079,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.18770978850060288,
|
|
"grad_norm": 0.10811638087034225,
|
|
"learning_rate": 1.4369425942807994e-05,
|
|
"loss": 0.009,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.18814430189990983,
|
|
"grad_norm": 0.16801166534423828,
|
|
"learning_rate": 1.4344089884509702e-05,
|
|
"loss": 0.0106,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.1885788152992168,
|
|
"grad_norm": 0.0781073272228241,
|
|
"learning_rate": 1.4318719407703022e-05,
|
|
"loss": 0.0081,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.18901332869852375,
|
|
"grad_norm": 0.08049961924552917,
|
|
"learning_rate": 1.4293314713399904e-05,
|
|
"loss": 0.009,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.1894478420978307,
|
|
"grad_norm": 0.1333671361207962,
|
|
"learning_rate": 1.4267876002883406e-05,
|
|
"loss": 0.0134,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.18988235549713764,
|
|
"grad_norm": 0.129620760679245,
|
|
"learning_rate": 1.424240347770609e-05,
|
|
"loss": 0.0118,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.19031686889644459,
|
|
"grad_norm": 0.09441433101892471,
|
|
"learning_rate": 1.4216897339688446e-05,
|
|
"loss": 0.0111,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.19075138229575153,
|
|
"grad_norm": 0.08081834018230438,
|
|
"learning_rate": 1.419135779091727e-05,
|
|
"loss": 0.0097,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.1911858956950585,
|
|
"grad_norm": 0.06669897586107254,
|
|
"learning_rate": 1.4165785033744081e-05,
|
|
"loss": 0.0124,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.19162040909436545,
|
|
"grad_norm": 0.09304026514291763,
|
|
"learning_rate": 1.4140179270783506e-05,
|
|
"loss": 0.0106,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.1920549224936724,
|
|
"grad_norm": 0.09239599853754044,
|
|
"learning_rate": 1.4114540704911679e-05,
|
|
"loss": 0.0098,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.19248943589297934,
|
|
"grad_norm": 0.09333720058202744,
|
|
"learning_rate": 1.4088869539264636e-05,
|
|
"loss": 0.0117,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.1929239492922863,
|
|
"grad_norm": 0.10393518954515457,
|
|
"learning_rate": 1.40631659772367e-05,
|
|
"loss": 0.0102,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.19335846269159326,
|
|
"grad_norm": 0.059147026389837265,
|
|
"learning_rate": 1.4037430222478876e-05,
|
|
"loss": 0.0097,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.1937929760909002,
|
|
"grad_norm": 0.09978174418210983,
|
|
"learning_rate": 1.4011662478897239e-05,
|
|
"loss": 0.0109,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.19422748949020716,
|
|
"grad_norm": 0.08174576610326767,
|
|
"learning_rate": 1.3985862950651296e-05,
|
|
"loss": 0.0102,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.1946620028895141,
|
|
"grad_norm": 0.05495322868227959,
|
|
"learning_rate": 1.3960031842152404e-05,
|
|
"loss": 0.0112,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.19509651628882105,
|
|
"grad_norm": 0.10337530076503754,
|
|
"learning_rate": 1.3934169358062128e-05,
|
|
"loss": 0.0101,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.195531029688128,
|
|
"grad_norm": 0.08432216197252274,
|
|
"learning_rate": 1.3908275703290616e-05,
|
|
"loss": 0.01,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.19596554308743497,
|
|
"grad_norm": 0.10446428507566452,
|
|
"learning_rate": 1.3882351082994996e-05,
|
|
"loss": 0.0078,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.19640005648674191,
|
|
"grad_norm": 0.1255977302789688,
|
|
"learning_rate": 1.385639570257772e-05,
|
|
"loss": 0.0091,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.19683456988604886,
|
|
"grad_norm": 0.19239485263824463,
|
|
"learning_rate": 1.3830409767684976e-05,
|
|
"loss": 0.0082,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.1972690832853558,
|
|
"grad_norm": 0.0951613038778305,
|
|
"learning_rate": 1.380439348420502e-05,
|
|
"loss": 0.0083,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.19770359668466275,
|
|
"grad_norm": 0.10827091336250305,
|
|
"learning_rate": 1.377834705826657e-05,
|
|
"loss": 0.0084,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.19813811008396973,
|
|
"grad_norm": 0.0801621600985527,
|
|
"learning_rate": 1.3752270696237164e-05,
|
|
"loss": 0.0072,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.19857262348327667,
|
|
"grad_norm": 0.08785197138786316,
|
|
"learning_rate": 1.3726164604721523e-05,
|
|
"loss": 0.0094,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.19900713688258362,
|
|
"grad_norm": 0.07245610654354095,
|
|
"learning_rate": 1.370002899055992e-05,
|
|
"loss": 0.0098,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.19944165028189056,
|
|
"grad_norm": 0.12170054018497467,
|
|
"learning_rate": 1.3673864060826531e-05,
|
|
"loss": 0.0097,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.1998761636811975,
|
|
"grad_norm": 0.09241359680891037,
|
|
"learning_rate": 1.3647670022827815e-05,
|
|
"loss": 0.0079,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.20031067708050446,
|
|
"grad_norm": 0.08759228140115738,
|
|
"learning_rate": 1.3621447084100843e-05,
|
|
"loss": 0.0115,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 0.20074519047981143,
|
|
"grad_norm": 0.09032405912876129,
|
|
"learning_rate": 1.3595195452411674e-05,
|
|
"loss": 0.0122,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.20117970387911838,
|
|
"grad_norm": 0.05508997291326523,
|
|
"learning_rate": 1.3568915335753704e-05,
|
|
"loss": 0.0094,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 0.20161421727842532,
|
|
"grad_norm": 0.05879232659935951,
|
|
"learning_rate": 1.3542606942346019e-05,
|
|
"loss": 0.0095,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.20204873067773227,
|
|
"grad_norm": 0.09512756764888763,
|
|
"learning_rate": 1.3516270480631738e-05,
|
|
"loss": 0.0104,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.20248324407703922,
|
|
"grad_norm": 0.06975122541189194,
|
|
"learning_rate": 1.3489906159276374e-05,
|
|
"loss": 0.0095,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.2029177574763462,
|
|
"grad_norm": 0.08634886890649796,
|
|
"learning_rate": 1.3463514187166169e-05,
|
|
"loss": 0.0102,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 0.20335227087565313,
|
|
"grad_norm": 0.1217721700668335,
|
|
"learning_rate": 1.343709477340644e-05,
|
|
"loss": 0.0107,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.20378678427496008,
|
|
"grad_norm": 0.08943139016628265,
|
|
"learning_rate": 1.3410648127319941e-05,
|
|
"loss": 0.0095,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 0.20422129767426703,
|
|
"grad_norm": 0.0762837752699852,
|
|
"learning_rate": 1.3384174458445167e-05,
|
|
"loss": 0.0069,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.20465581107357397,
|
|
"grad_norm": 0.3115338385105133,
|
|
"learning_rate": 1.335767397653474e-05,
|
|
"loss": 0.0076,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 0.20509032447288092,
|
|
"grad_norm": 0.07176689803600311,
|
|
"learning_rate": 1.3331146891553708e-05,
|
|
"loss": 0.0082,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.2055248378721879,
|
|
"grad_norm": 0.05090424790978432,
|
|
"learning_rate": 1.3304593413677893e-05,
|
|
"loss": 0.0101,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 0.20595935127149484,
|
|
"grad_norm": 0.1412171572446823,
|
|
"learning_rate": 1.327801375329225e-05,
|
|
"loss": 0.0078,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.20639386467080179,
|
|
"grad_norm": 0.09543070197105408,
|
|
"learning_rate": 1.325140812098916e-05,
|
|
"loss": 0.0126,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.20682837807010873,
|
|
"grad_norm": 0.1111612468957901,
|
|
"learning_rate": 1.322477672756679e-05,
|
|
"loss": 0.011,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.20726289146941568,
|
|
"grad_norm": 0.09902411699295044,
|
|
"learning_rate": 1.3198119784027415e-05,
|
|
"loss": 0.0097,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 0.20769740486872265,
|
|
"grad_norm": 0.11887188255786896,
|
|
"learning_rate": 1.3171437501575739e-05,
|
|
"loss": 0.0111,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.2081319182680296,
|
|
"grad_norm": 0.07737130671739578,
|
|
"learning_rate": 1.3144730091617235e-05,
|
|
"loss": 0.0098,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 0.20856643166733654,
|
|
"grad_norm": 0.09391709417104721,
|
|
"learning_rate": 1.3117997765756455e-05,
|
|
"loss": 0.0097,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.2090009450666435,
|
|
"grad_norm": 0.1096915453672409,
|
|
"learning_rate": 1.3091240735795372e-05,
|
|
"loss": 0.0086,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 0.20943545846595044,
|
|
"grad_norm": 0.0931084156036377,
|
|
"learning_rate": 1.3064459213731679e-05,
|
|
"loss": 0.008,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.20986997186525738,
|
|
"grad_norm": 0.08801384270191193,
|
|
"learning_rate": 1.3037653411757134e-05,
|
|
"loss": 0.0096,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 0.21030448526456436,
|
|
"grad_norm": 0.09326059371232986,
|
|
"learning_rate": 1.301082354225585e-05,
|
|
"loss": 0.0089,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.2107389986638713,
|
|
"grad_norm": 0.08902314305305481,
|
|
"learning_rate": 1.2983969817802653e-05,
|
|
"loss": 0.01,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.21117351206317825,
|
|
"grad_norm": 0.06725908070802689,
|
|
"learning_rate": 1.2957092451161344e-05,
|
|
"loss": 0.0076,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.2116080254624852,
|
|
"grad_norm": 0.10939764231443405,
|
|
"learning_rate": 1.293019165528307e-05,
|
|
"loss": 0.0094,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 0.21204253886179214,
|
|
"grad_norm": 0.0729513093829155,
|
|
"learning_rate": 1.2903267643304588e-05,
|
|
"loss": 0.0101,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.21247705226109911,
|
|
"grad_norm": 0.09630395472049713,
|
|
"learning_rate": 1.2876320628546608e-05,
|
|
"loss": 0.0087,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 0.21291156566040606,
|
|
"grad_norm": 0.0898730531334877,
|
|
"learning_rate": 1.2849350824512097e-05,
|
|
"loss": 0.0091,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.213346079059713,
|
|
"grad_norm": 0.07306133955717087,
|
|
"learning_rate": 1.2822358444884568e-05,
|
|
"loss": 0.0083,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 0.21378059245901995,
|
|
"grad_norm": 0.04950597882270813,
|
|
"learning_rate": 1.279534370352642e-05,
|
|
"loss": 0.0081,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.2142151058583269,
|
|
"grad_norm": 0.06085862219333649,
|
|
"learning_rate": 1.276830681447721e-05,
|
|
"loss": 0.0076,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 0.21464961925763384,
|
|
"grad_norm": 0.1412172168493271,
|
|
"learning_rate": 1.2741247991951976e-05,
|
|
"loss": 0.0093,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.21508413265694082,
|
|
"grad_norm": 0.09206506609916687,
|
|
"learning_rate": 1.2714167450339551e-05,
|
|
"loss": 0.0107,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.21551864605624776,
|
|
"grad_norm": 0.10359804332256317,
|
|
"learning_rate": 1.268706540420083e-05,
|
|
"loss": 0.0087,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.2159531594555547,
|
|
"grad_norm": 0.08671776205301285,
|
|
"learning_rate": 1.2659942068267097e-05,
|
|
"loss": 0.0067,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 0.21638767285486166,
|
|
"grad_norm": 0.0956655815243721,
|
|
"learning_rate": 1.2632797657438317e-05,
|
|
"loss": 0.0087,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.2168221862541686,
|
|
"grad_norm": 0.08012635260820389,
|
|
"learning_rate": 1.2605632386781442e-05,
|
|
"loss": 0.0093,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 0.21725669965347558,
|
|
"grad_norm": 0.4396616220474243,
|
|
"learning_rate": 1.2578446471528678e-05,
|
|
"loss": 0.01,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.21769121305278252,
|
|
"grad_norm": 0.0880979672074318,
|
|
"learning_rate": 1.2551240127075815e-05,
|
|
"loss": 0.01,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 0.21812572645208947,
|
|
"grad_norm": 0.1420845091342926,
|
|
"learning_rate": 1.2524013568980496e-05,
|
|
"loss": 0.0111,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.21856023985139642,
|
|
"grad_norm": 0.08724194765090942,
|
|
"learning_rate": 1.249676701296053e-05,
|
|
"loss": 0.009,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 0.21899475325070336,
|
|
"grad_norm": 0.11142345517873764,
|
|
"learning_rate": 1.2469500674892159e-05,
|
|
"loss": 0.0082,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.2194292666500103,
|
|
"grad_norm": 0.0890979990363121,
|
|
"learning_rate": 1.244221477080836e-05,
|
|
"loss": 0.0091,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.21986378004931728,
|
|
"grad_norm": 0.05859982222318649,
|
|
"learning_rate": 1.2414909516897145e-05,
|
|
"loss": 0.0073,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.22029829344862423,
|
|
"grad_norm": 0.19758309423923492,
|
|
"learning_rate": 1.2387585129499815e-05,
|
|
"loss": 0.0092,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 0.22073280684793117,
|
|
"grad_norm": 0.0901278406381607,
|
|
"learning_rate": 1.2360241825109293e-05,
|
|
"loss": 0.0092,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.22116732024723812,
|
|
"grad_norm": 0.09502365440130234,
|
|
"learning_rate": 1.2332879820368358e-05,
|
|
"loss": 0.0098,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 0.22160183364654507,
|
|
"grad_norm": 0.0769268274307251,
|
|
"learning_rate": 1.2305499332067967e-05,
|
|
"loss": 0.0079,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.22203634704585204,
|
|
"grad_norm": 0.09428519010543823,
|
|
"learning_rate": 1.2278100577145526e-05,
|
|
"loss": 0.009,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 0.22247086044515899,
|
|
"grad_norm": 0.07567259669303894,
|
|
"learning_rate": 1.2250683772683151e-05,
|
|
"loss": 0.0105,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.22290537384446593,
|
|
"grad_norm": 0.095923513174057,
|
|
"learning_rate": 1.222324913590599e-05,
|
|
"loss": 0.013,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 0.22333988724377288,
|
|
"grad_norm": 0.09458699822425842,
|
|
"learning_rate": 1.2195796884180458e-05,
|
|
"loss": 0.0092,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.22377440064307982,
|
|
"grad_norm": 0.09393813461065292,
|
|
"learning_rate": 1.2168327235012544e-05,
|
|
"loss": 0.0078,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.22420891404238677,
|
|
"grad_norm": 0.12188570201396942,
|
|
"learning_rate": 1.2140840406046075e-05,
|
|
"loss": 0.0089,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.22464342744169374,
|
|
"grad_norm": 0.08482064306735992,
|
|
"learning_rate": 1.2113336615060996e-05,
|
|
"loss": 0.0076,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 0.2250779408410007,
|
|
"grad_norm": 0.1528569608926773,
|
|
"learning_rate": 1.2085816079971639e-05,
|
|
"loss": 0.0092,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.22551245424030764,
|
|
"grad_norm": 0.07889417558908463,
|
|
"learning_rate": 1.205827901882501e-05,
|
|
"loss": 0.008,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 0.22594696763961458,
|
|
"grad_norm": 0.10469748824834824,
|
|
"learning_rate": 1.2030725649799043e-05,
|
|
"loss": 0.01,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.22638148103892153,
|
|
"grad_norm": 0.13136431574821472,
|
|
"learning_rate": 1.2003156191200885e-05,
|
|
"loss": 0.0078,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 0.2268159944382285,
|
|
"grad_norm": 0.1373455971479416,
|
|
"learning_rate": 1.1975570861465156e-05,
|
|
"loss": 0.0108,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.22725050783753545,
|
|
"grad_norm": 0.051384877413511276,
|
|
"learning_rate": 1.194796987915223e-05,
|
|
"loss": 0.01,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 0.2276850212368424,
|
|
"grad_norm": 0.046507228165864944,
|
|
"learning_rate": 1.1920353462946503e-05,
|
|
"loss": 0.0109,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.22811953463614934,
|
|
"grad_norm": 0.1168011799454689,
|
|
"learning_rate": 1.1892721831654638e-05,
|
|
"loss": 0.0082,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.2285540480354563,
|
|
"grad_norm": 0.06604877859354019,
|
|
"learning_rate": 1.1865075204203866e-05,
|
|
"loss": 0.007,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.22898856143476323,
|
|
"grad_norm": 0.09276457130908966,
|
|
"learning_rate": 1.1837413799640216e-05,
|
|
"loss": 0.0083,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 0.2294230748340702,
|
|
"grad_norm": 0.11472596973180771,
|
|
"learning_rate": 1.1809737837126812e-05,
|
|
"loss": 0.0091,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 0.22985758823337715,
|
|
"grad_norm": 0.0845196470618248,
|
|
"learning_rate": 1.1782047535942117e-05,
|
|
"loss": 0.0093,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 0.2302921016326841,
|
|
"grad_norm": 0.11708565801382065,
|
|
"learning_rate": 1.1754343115478193e-05,
|
|
"loss": 0.0088,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.23072661503199104,
|
|
"grad_norm": 0.12564682960510254,
|
|
"learning_rate": 1.1726624795238981e-05,
|
|
"loss": 0.009,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 0.231161128431298,
|
|
"grad_norm": 0.08443303406238556,
|
|
"learning_rate": 1.1698892794838546e-05,
|
|
"loss": 0.0087,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 0.23159564183060496,
|
|
"grad_norm": 0.08368808776140213,
|
|
"learning_rate": 1.167114733399934e-05,
|
|
"loss": 0.0087,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 0.2320301552299119,
|
|
"grad_norm": 0.06150970607995987,
|
|
"learning_rate": 1.1643388632550468e-05,
|
|
"loss": 0.0083,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 0.23246466862921886,
|
|
"grad_norm": 0.06667175889015198,
|
|
"learning_rate": 1.1615616910425928e-05,
|
|
"loss": 0.0084,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 0.2328991820285258,
|
|
"grad_norm": 0.10415839403867722,
|
|
"learning_rate": 1.15878323876629e-05,
|
|
"loss": 0.0084,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 0.23333369542783275,
|
|
"grad_norm": 0.06974676251411438,
|
|
"learning_rate": 1.1560035284399977e-05,
|
|
"loss": 0.0101,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 0.2337682088271397,
|
|
"grad_norm": 0.07754170894622803,
|
|
"learning_rate": 1.1532225820875422e-05,
|
|
"loss": 0.009,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 0.23420272222644667,
|
|
"grad_norm": 0.09246724098920822,
|
|
"learning_rate": 1.1504404217425438e-05,
|
|
"loss": 0.0104,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 0.23463723562575362,
|
|
"grad_norm": 0.11340566724538803,
|
|
"learning_rate": 1.1476570694482406e-05,
|
|
"loss": 0.0099,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.23507174902506056,
|
|
"grad_norm": 0.05798724293708801,
|
|
"learning_rate": 1.1448725472573145e-05,
|
|
"loss": 0.0104,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 0.2355062624243675,
|
|
"grad_norm": 0.07213063538074493,
|
|
"learning_rate": 1.1420868772317184e-05,
|
|
"loss": 0.0101,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 0.23594077582367445,
|
|
"grad_norm": 0.10362295061349869,
|
|
"learning_rate": 1.1393000814424973e-05,
|
|
"loss": 0.0098,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 0.23637528922298143,
|
|
"grad_norm": 0.07319142669439316,
|
|
"learning_rate": 1.1365121819696163e-05,
|
|
"loss": 0.0097,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 0.23680980262228837,
|
|
"grad_norm": 0.13031233847141266,
|
|
"learning_rate": 1.1337232009017858e-05,
|
|
"loss": 0.0107,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 0.23724431602159532,
|
|
"grad_norm": 0.10126952081918716,
|
|
"learning_rate": 1.130933160336285e-05,
|
|
"loss": 0.0107,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 0.23767882942090227,
|
|
"grad_norm": 0.13342677056789398,
|
|
"learning_rate": 1.1281420823787883e-05,
|
|
"loss": 0.0098,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 0.2381133428202092,
|
|
"grad_norm": 0.257994145154953,
|
|
"learning_rate": 1.1253499891431882e-05,
|
|
"loss": 0.0073,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 0.23854785621951616,
|
|
"grad_norm": 0.12316708266735077,
|
|
"learning_rate": 1.1225569027514229e-05,
|
|
"loss": 0.0071,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 0.23898236961882313,
|
|
"grad_norm": 0.12356661260128021,
|
|
"learning_rate": 1.1197628453332986e-05,
|
|
"loss": 0.0092,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.23941688301813008,
|
|
"grad_norm": 0.05779719725251198,
|
|
"learning_rate": 1.1169678390263143e-05,
|
|
"loss": 0.0103,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 0.23985139641743702,
|
|
"grad_norm": 0.08832740038633347,
|
|
"learning_rate": 1.1141719059754884e-05,
|
|
"loss": 0.0083,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 0.24028590981674397,
|
|
"grad_norm": 0.11366628110408783,
|
|
"learning_rate": 1.1113750683331813e-05,
|
|
"loss": 0.0107,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 0.24072042321605092,
|
|
"grad_norm": 0.08835666626691818,
|
|
"learning_rate": 1.1085773482589206e-05,
|
|
"loss": 0.0091,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 0.2411549366153579,
|
|
"grad_norm": 0.04762517660856247,
|
|
"learning_rate": 1.1057787679192256e-05,
|
|
"loss": 0.0058,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 0.24158945001466484,
|
|
"grad_norm": 0.1362943947315216,
|
|
"learning_rate": 1.1029793494874312e-05,
|
|
"loss": 0.0107,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 0.24202396341397178,
|
|
"grad_norm": 0.0849776417016983,
|
|
"learning_rate": 1.1001791151435131e-05,
|
|
"loss": 0.0093,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 0.24245847681327873,
|
|
"grad_norm": 0.047240015119314194,
|
|
"learning_rate": 1.0973780870739111e-05,
|
|
"loss": 0.0068,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 0.24289299021258567,
|
|
"grad_norm": 0.07293111085891724,
|
|
"learning_rate": 1.0945762874713537e-05,
|
|
"loss": 0.0075,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 0.24332750361189262,
|
|
"grad_norm": 0.08692555129528046,
|
|
"learning_rate": 1.0917737385346828e-05,
|
|
"loss": 0.006,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.2437620170111996,
|
|
"grad_norm": 0.13119547069072723,
|
|
"learning_rate": 1.0889704624686766e-05,
|
|
"loss": 0.0098,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 0.24419653041050654,
|
|
"grad_norm": 0.14227084815502167,
|
|
"learning_rate": 1.0861664814838747e-05,
|
|
"loss": 0.008,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 0.2446310438098135,
|
|
"grad_norm": 0.06881032884120941,
|
|
"learning_rate": 1.083361817796403e-05,
|
|
"loss": 0.0113,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 0.24506555720912043,
|
|
"grad_norm": 0.12126508355140686,
|
|
"learning_rate": 1.0805564936277936e-05,
|
|
"loss": 0.0078,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 0.24550007060842738,
|
|
"grad_norm": 0.10700210928916931,
|
|
"learning_rate": 1.0777505312048152e-05,
|
|
"loss": 0.0101,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 0.24593458400773432,
|
|
"grad_norm": 0.08859797567129135,
|
|
"learning_rate": 1.0749439527592909e-05,
|
|
"loss": 0.0115,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 0.2463690974070413,
|
|
"grad_norm": 0.10005716234445572,
|
|
"learning_rate": 1.0721367805279251e-05,
|
|
"loss": 0.0087,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 0.24680361080634824,
|
|
"grad_norm": 0.06850054860115051,
|
|
"learning_rate": 1.0693290367521276e-05,
|
|
"loss": 0.0071,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 0.2472381242056552,
|
|
"grad_norm": 0.06010816991329193,
|
|
"learning_rate": 1.0665207436778353e-05,
|
|
"loss": 0.008,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 0.24767263760496214,
|
|
"grad_norm": 0.11750059574842453,
|
|
"learning_rate": 1.0637119235553388e-05,
|
|
"loss": 0.0088,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.24810715100426908,
|
|
"grad_norm": 0.09331734478473663,
|
|
"learning_rate": 1.0609025986391032e-05,
|
|
"loss": 0.0093,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 0.24854166440357606,
|
|
"grad_norm": 0.06403643637895584,
|
|
"learning_rate": 1.0580927911875938e-05,
|
|
"loss": 0.0066,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 0.248976177802883,
|
|
"grad_norm": 0.07363591343164444,
|
|
"learning_rate": 1.055282523463099e-05,
|
|
"loss": 0.0084,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 0.24941069120218995,
|
|
"grad_norm": 0.08989038318395615,
|
|
"learning_rate": 1.0524718177315536e-05,
|
|
"loss": 0.0064,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 0.2498452046014969,
|
|
"grad_norm": 0.12647745013237,
|
|
"learning_rate": 1.0496606962623632e-05,
|
|
"loss": 0.0088,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 0.25027971800080384,
|
|
"grad_norm": 0.07326851785182953,
|
|
"learning_rate": 1.0468491813282269e-05,
|
|
"loss": 0.0079,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 0.2507142314001108,
|
|
"grad_norm": 0.0844263806939125,
|
|
"learning_rate": 1.0440372952049618e-05,
|
|
"loss": 0.0078,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 0.25114874479941773,
|
|
"grad_norm": 0.0971388965845108,
|
|
"learning_rate": 1.0412250601713254e-05,
|
|
"loss": 0.007,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 0.2515832581987247,
|
|
"grad_norm": 0.10897351801395416,
|
|
"learning_rate": 1.03841249850884e-05,
|
|
"loss": 0.01,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 0.2520177715980317,
|
|
"grad_norm": 0.07884043455123901,
|
|
"learning_rate": 1.0355996325016152e-05,
|
|
"loss": 0.0112,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.2524522849973386,
|
|
"grad_norm": 0.08184106647968292,
|
|
"learning_rate": 1.0327864844361735e-05,
|
|
"loss": 0.0064,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 0.2528867983966456,
|
|
"grad_norm": 0.10301776975393295,
|
|
"learning_rate": 1.02997307660127e-05,
|
|
"loss": 0.0081,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 0.2533213117959525,
|
|
"grad_norm": 0.1618221551179886,
|
|
"learning_rate": 1.0271594312877196e-05,
|
|
"loss": 0.0091,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 0.25375582519525947,
|
|
"grad_norm": 0.07037736475467682,
|
|
"learning_rate": 1.024345570788218e-05,
|
|
"loss": 0.0082,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 0.2541903385945664,
|
|
"grad_norm": 0.13534654676914215,
|
|
"learning_rate": 1.0215315173971661e-05,
|
|
"loss": 0.0097,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 0.25462485199387336,
|
|
"grad_norm": 0.11499059200286865,
|
|
"learning_rate": 1.0187172934104934e-05,
|
|
"loss": 0.0089,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 0.2550593653931803,
|
|
"grad_norm": 0.09554502367973328,
|
|
"learning_rate": 1.01590292112548e-05,
|
|
"loss": 0.0101,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 0.25549387879248725,
|
|
"grad_norm": 0.07305638492107391,
|
|
"learning_rate": 1.013088422840582e-05,
|
|
"loss": 0.0071,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 0.2559283921917942,
|
|
"grad_norm": 0.06403714418411255,
|
|
"learning_rate": 1.0102738208552535e-05,
|
|
"loss": 0.0089,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 0.25636290559110114,
|
|
"grad_norm": 0.06630440801382065,
|
|
"learning_rate": 1.0074591374697701e-05,
|
|
"loss": 0.0073,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.25679741899040814,
|
|
"grad_norm": 0.05963248759508133,
|
|
"learning_rate": 1.0046443949850531e-05,
|
|
"loss": 0.0101,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 0.2572319323897151,
|
|
"grad_norm": 0.06275281310081482,
|
|
"learning_rate": 1.00182961570249e-05,
|
|
"loss": 0.0078,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 0.25766644578902204,
|
|
"grad_norm": 0.0786055400967598,
|
|
"learning_rate": 9.990148219237623e-06,
|
|
"loss": 0.0078,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 0.258100959188329,
|
|
"grad_norm": 0.09199898689985275,
|
|
"learning_rate": 9.96200035950665e-06,
|
|
"loss": 0.0071,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 0.25853547258763593,
|
|
"grad_norm": 0.06566759198904037,
|
|
"learning_rate": 9.933852800849311e-06,
|
|
"loss": 0.0073,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 0.2589699859869429,
|
|
"grad_norm": 0.1568640023469925,
|
|
"learning_rate": 9.905705766280564e-06,
|
|
"loss": 0.008,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 0.2594044993862498,
|
|
"grad_norm": 0.06404092907905579,
|
|
"learning_rate": 9.877559478811199e-06,
|
|
"loss": 0.0066,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 0.25983901278555677,
|
|
"grad_norm": 0.08680085837841034,
|
|
"learning_rate": 9.849414161446093e-06,
|
|
"loss": 0.0074,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 0.2602735261848637,
|
|
"grad_norm": 0.11825085431337357,
|
|
"learning_rate": 9.821270037182442e-06,
|
|
"loss": 0.0089,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 0.26070803958417066,
|
|
"grad_norm": 0.09618745744228363,
|
|
"learning_rate": 9.793127329007973e-06,
|
|
"loss": 0.0087,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.2611425529834776,
|
|
"grad_norm": 0.05515113100409508,
|
|
"learning_rate": 9.76498625989922e-06,
|
|
"loss": 0.0061,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 0.2615770663827846,
|
|
"grad_norm": 0.10781148821115494,
|
|
"learning_rate": 9.736847052819704e-06,
|
|
"loss": 0.0094,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 0.26201157978209155,
|
|
"grad_norm": 0.0913834422826767,
|
|
"learning_rate": 9.708709930718204e-06,
|
|
"loss": 0.0087,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 0.2624460931813985,
|
|
"grad_norm": 0.07052706182003021,
|
|
"learning_rate": 9.680575116526982e-06,
|
|
"loss": 0.0081,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 0.26288060658070544,
|
|
"grad_norm": 0.08302155137062073,
|
|
"learning_rate": 9.652442833160012e-06,
|
|
"loss": 0.0078,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 0.2633151199800124,
|
|
"grad_norm": 0.157508984208107,
|
|
"learning_rate": 9.624313303511218e-06,
|
|
"loss": 0.0091,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 0.26374963337931934,
|
|
"grad_norm": 0.07369590550661087,
|
|
"learning_rate": 9.5961867504527e-06,
|
|
"loss": 0.0069,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 0.2641841467786263,
|
|
"grad_norm": 0.07717025279998779,
|
|
"learning_rate": 9.568063396832979e-06,
|
|
"loss": 0.0073,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 0.26461866017793323,
|
|
"grad_norm": 0.07593873143196106,
|
|
"learning_rate": 9.539943465475224e-06,
|
|
"loss": 0.0089,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 0.2650531735772402,
|
|
"grad_norm": 0.10739245265722275,
|
|
"learning_rate": 9.511827179175496e-06,
|
|
"loss": 0.0082,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.2654876869765471,
|
|
"grad_norm": 0.07673447579145432,
|
|
"learning_rate": 9.483714760700968e-06,
|
|
"loss": 0.0094,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 0.26592220037585407,
|
|
"grad_norm": 0.08048778027296066,
|
|
"learning_rate": 9.455606432788172e-06,
|
|
"loss": 0.0073,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 0.26635671377516107,
|
|
"grad_norm": 0.06304773688316345,
|
|
"learning_rate": 9.427502418141228e-06,
|
|
"loss": 0.0069,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 0.266791227174468,
|
|
"grad_norm": 0.10460677742958069,
|
|
"learning_rate": 9.399402939430078e-06,
|
|
"loss": 0.0094,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 0.26722574057377496,
|
|
"grad_norm": 0.09594422578811646,
|
|
"learning_rate": 9.371308219288739e-06,
|
|
"loss": 0.0086,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 0.2676602539730819,
|
|
"grad_norm": 0.07144231349229813,
|
|
"learning_rate": 9.343218480313514e-06,
|
|
"loss": 0.0084,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 0.26809476737238885,
|
|
"grad_norm": 0.05371937155723572,
|
|
"learning_rate": 9.315133945061243e-06,
|
|
"loss": 0.009,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 0.2685292807716958,
|
|
"grad_norm": 0.14217378199100494,
|
|
"learning_rate": 9.287054836047532e-06,
|
|
"loss": 0.0105,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 0.26896379417100275,
|
|
"grad_norm": 0.09040679782629013,
|
|
"learning_rate": 9.258981375745005e-06,
|
|
"loss": 0.0092,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 0.2693983075703097,
|
|
"grad_norm": 0.11461376398801804,
|
|
"learning_rate": 9.230913786581523e-06,
|
|
"loss": 0.0066,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.26983282096961664,
|
|
"grad_norm": 0.09241148829460144,
|
|
"learning_rate": 9.20285229093843e-06,
|
|
"loss": 0.0091,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 0.2702673343689236,
|
|
"grad_norm": 0.1639711558818817,
|
|
"learning_rate": 9.174797111148792e-06,
|
|
"loss": 0.0078,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 0.27070184776823053,
|
|
"grad_norm": 0.10276318341493607,
|
|
"learning_rate": 9.146748469495632e-06,
|
|
"loss": 0.0078,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 0.2711363611675375,
|
|
"grad_norm": 0.0878051146864891,
|
|
"learning_rate": 9.11870658821018e-06,
|
|
"loss": 0.008,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 0.2715708745668445,
|
|
"grad_norm": 0.12146293371915817,
|
|
"learning_rate": 9.090671689470092e-06,
|
|
"loss": 0.0071,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 0.2720053879661514,
|
|
"grad_norm": 0.07059358060359955,
|
|
"learning_rate": 9.062643995397705e-06,
|
|
"loss": 0.0071,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 0.27243990136545837,
|
|
"grad_norm": 0.11675061285495758,
|
|
"learning_rate": 9.034623728058269e-06,
|
|
"loss": 0.0062,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 0.2728744147647653,
|
|
"grad_norm": 0.07080376893281937,
|
|
"learning_rate": 9.006611109458201e-06,
|
|
"loss": 0.0084,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 0.27330892816407226,
|
|
"grad_norm": 0.10039542615413666,
|
|
"learning_rate": 8.97860636154331e-06,
|
|
"loss": 0.0091,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 0.2737434415633792,
|
|
"grad_norm": 0.1331404149532318,
|
|
"learning_rate": 8.950609706197048e-06,
|
|
"loss": 0.0101,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.27417795496268615,
|
|
"grad_norm": 0.09555957466363907,
|
|
"learning_rate": 8.922621365238742e-06,
|
|
"loss": 0.0093,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 0.2746124683619931,
|
|
"grad_norm": 0.10700193792581558,
|
|
"learning_rate": 8.89464156042185e-06,
|
|
"loss": 0.0081,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 0.27504698176130005,
|
|
"grad_norm": 0.10304276645183563,
|
|
"learning_rate": 8.8666705134322e-06,
|
|
"loss": 0.0111,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 0.275481495160607,
|
|
"grad_norm": 0.07287819683551788,
|
|
"learning_rate": 8.838708445886223e-06,
|
|
"loss": 0.0089,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 0.27591600855991394,
|
|
"grad_norm": 0.06307504326105118,
|
|
"learning_rate": 8.810755579329213e-06,
|
|
"loss": 0.0065,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 0.27635052195922094,
|
|
"grad_norm": 0.06355910003185272,
|
|
"learning_rate": 8.782812135233556e-06,
|
|
"loss": 0.0065,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 0.2767850353585279,
|
|
"grad_norm": 0.06679730117321014,
|
|
"learning_rate": 8.754878334996995e-06,
|
|
"loss": 0.0081,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 0.27721954875783483,
|
|
"grad_norm": 0.07842592149972916,
|
|
"learning_rate": 8.726954399940855e-06,
|
|
"loss": 0.0088,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 0.2776540621571418,
|
|
"grad_norm": 0.048519112169742584,
|
|
"learning_rate": 8.699040551308296e-06,
|
|
"loss": 0.0109,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 0.2780885755564487,
|
|
"grad_norm": 0.07662378251552582,
|
|
"learning_rate": 8.671137010262568e-06,
|
|
"loss": 0.007,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.27852308895575567,
|
|
"grad_norm": 0.07653367519378662,
|
|
"learning_rate": 8.643243997885253e-06,
|
|
"loss": 0.0075,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 0.2789576023550626,
|
|
"grad_norm": 0.050618138164281845,
|
|
"learning_rate": 8.615361735174517e-06,
|
|
"loss": 0.0087,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 0.27939211575436956,
|
|
"grad_norm": 0.0792509987950325,
|
|
"learning_rate": 8.58749044304335e-06,
|
|
"loss": 0.0084,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 0.2798266291536765,
|
|
"grad_norm": 0.05023474618792534,
|
|
"learning_rate": 8.559630342317822e-06,
|
|
"loss": 0.0058,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 0.28026114255298346,
|
|
"grad_norm": 0.08565095067024231,
|
|
"learning_rate": 8.531781653735334e-06,
|
|
"loss": 0.0117,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 0.2806956559522904,
|
|
"grad_norm": 0.09438503533601761,
|
|
"learning_rate": 8.503944597942865e-06,
|
|
"loss": 0.0088,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 0.2811301693515974,
|
|
"grad_norm": 0.12111536413431168,
|
|
"learning_rate": 8.476119395495235e-06,
|
|
"loss": 0.0094,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 0.28156468275090435,
|
|
"grad_norm": 0.0618109367787838,
|
|
"learning_rate": 8.44830626685334e-06,
|
|
"loss": 0.0079,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 0.2819991961502113,
|
|
"grad_norm": 0.07699032127857208,
|
|
"learning_rate": 8.42050543238242e-06,
|
|
"loss": 0.0087,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 0.28243370954951824,
|
|
"grad_norm": 0.1489276885986328,
|
|
"learning_rate": 8.392717112350301e-06,
|
|
"loss": 0.0108,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.2828682229488252,
|
|
"grad_norm": 0.1276170313358307,
|
|
"learning_rate": 8.364941526925667e-06,
|
|
"loss": 0.0082,
|
|
"step": 6510
|
|
},
|
|
{
|
|
"epoch": 0.28330273634813213,
|
|
"grad_norm": 0.11442344635725021,
|
|
"learning_rate": 8.337178896176295e-06,
|
|
"loss": 0.0071,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 0.2837372497474391,
|
|
"grad_norm": 0.07063666731119156,
|
|
"learning_rate": 8.309429440067324e-06,
|
|
"loss": 0.0094,
|
|
"step": 6530
|
|
},
|
|
{
|
|
"epoch": 0.284171763146746,
|
|
"grad_norm": 0.06442690640687943,
|
|
"learning_rate": 8.281693378459516e-06,
|
|
"loss": 0.0072,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 0.28460627654605297,
|
|
"grad_norm": 0.09958231449127197,
|
|
"learning_rate": 8.253970931107492e-06,
|
|
"loss": 0.0089,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 0.2850407899453599,
|
|
"grad_norm": 0.06900348514318466,
|
|
"learning_rate": 8.226262317658027e-06,
|
|
"loss": 0.0093,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 0.28547530334466686,
|
|
"grad_norm": 0.06914185732603073,
|
|
"learning_rate": 8.198567757648272e-06,
|
|
"loss": 0.0084,
|
|
"step": 6570
|
|
},
|
|
{
|
|
"epoch": 0.28590981674397387,
|
|
"grad_norm": 0.08540336787700653,
|
|
"learning_rate": 8.170887470504038e-06,
|
|
"loss": 0.0092,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 0.2863443301432808,
|
|
"grad_norm": 0.05744350329041481,
|
|
"learning_rate": 8.143221675538053e-06,
|
|
"loss": 0.0084,
|
|
"step": 6590
|
|
},
|
|
{
|
|
"epoch": 0.28677884354258776,
|
|
"grad_norm": 0.0658305436372757,
|
|
"learning_rate": 8.115570591948222e-06,
|
|
"loss": 0.0081,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.2872133569418947,
|
|
"grad_norm": 0.06954361498355865,
|
|
"learning_rate": 8.087934438815888e-06,
|
|
"loss": 0.0079,
|
|
"step": 6610
|
|
},
|
|
{
|
|
"epoch": 0.28764787034120165,
|
|
"grad_norm": 0.0778515487909317,
|
|
"learning_rate": 8.0603134351041e-06,
|
|
"loss": 0.0085,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 0.2880823837405086,
|
|
"grad_norm": 0.030152561143040657,
|
|
"learning_rate": 8.032707799655876e-06,
|
|
"loss": 0.0075,
|
|
"step": 6630
|
|
},
|
|
{
|
|
"epoch": 0.28851689713981554,
|
|
"grad_norm": 0.09243931621313095,
|
|
"learning_rate": 8.005117751192472e-06,
|
|
"loss": 0.0069,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 0.2889514105391225,
|
|
"grad_norm": 0.05257497727870941,
|
|
"learning_rate": 7.977543508311653e-06,
|
|
"loss": 0.0069,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 0.28938592393842943,
|
|
"grad_norm": 0.1092933937907219,
|
|
"learning_rate": 7.949985289485945e-06,
|
|
"loss": 0.0089,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 0.2898204373377364,
|
|
"grad_norm": 0.08264698833227158,
|
|
"learning_rate": 7.922443313060919e-06,
|
|
"loss": 0.0078,
|
|
"step": 6670
|
|
},
|
|
{
|
|
"epoch": 0.2902549507370433,
|
|
"grad_norm": 0.07861427962779999,
|
|
"learning_rate": 7.894917797253452e-06,
|
|
"loss": 0.0088,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 0.29068946413635033,
|
|
"grad_norm": 0.08720831573009491,
|
|
"learning_rate": 7.867408960150015e-06,
|
|
"loss": 0.0092,
|
|
"step": 6690
|
|
},
|
|
{
|
|
"epoch": 0.2911239775356573,
|
|
"grad_norm": 0.10889329016208649,
|
|
"learning_rate": 7.839917019704921e-06,
|
|
"loss": 0.0089,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.2915584909349642,
|
|
"grad_norm": 0.10911451280117035,
|
|
"learning_rate": 7.812442193738612e-06,
|
|
"loss": 0.0077,
|
|
"step": 6710
|
|
},
|
|
{
|
|
"epoch": 0.29199300433427117,
|
|
"grad_norm": 0.09256261587142944,
|
|
"learning_rate": 7.784984699935934e-06,
|
|
"loss": 0.0065,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 0.2924275177335781,
|
|
"grad_norm": 0.06900645047426224,
|
|
"learning_rate": 7.7575447558444e-06,
|
|
"loss": 0.0082,
|
|
"step": 6730
|
|
},
|
|
{
|
|
"epoch": 0.29286203113288506,
|
|
"grad_norm": 0.08416484296321869,
|
|
"learning_rate": 7.730122578872492e-06,
|
|
"loss": 0.0087,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 0.293296544532192,
|
|
"grad_norm": 0.0650983676314354,
|
|
"learning_rate": 7.702718386287904e-06,
|
|
"loss": 0.0064,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 0.29373105793149895,
|
|
"grad_norm": 0.05348186567425728,
|
|
"learning_rate": 7.675332395215853e-06,
|
|
"loss": 0.0089,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 0.2941655713308059,
|
|
"grad_norm": 0.12326699495315552,
|
|
"learning_rate": 7.64796482263734e-06,
|
|
"loss": 0.0087,
|
|
"step": 6770
|
|
},
|
|
{
|
|
"epoch": 0.29460008473011284,
|
|
"grad_norm": 0.07229342311620712,
|
|
"learning_rate": 7.620615885387419e-06,
|
|
"loss": 0.006,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 0.2950345981294198,
|
|
"grad_norm": 0.11028210073709488,
|
|
"learning_rate": 7.593285800153527e-06,
|
|
"loss": 0.0069,
|
|
"step": 6790
|
|
},
|
|
{
|
|
"epoch": 0.2954691115287268,
|
|
"grad_norm": 0.08311082422733307,
|
|
"learning_rate": 7.565974783473709e-06,
|
|
"loss": 0.0094,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.29590362492803374,
|
|
"grad_norm": 0.06337704509496689,
|
|
"learning_rate": 7.5386830517349366e-06,
|
|
"loss": 0.0075,
|
|
"step": 6810
|
|
},
|
|
{
|
|
"epoch": 0.2963381383273407,
|
|
"grad_norm": 0.09829442203044891,
|
|
"learning_rate": 7.511410821171385e-06,
|
|
"loss": 0.0081,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 0.29677265172664763,
|
|
"grad_norm": 0.06810601055622101,
|
|
"learning_rate": 7.484158307862726e-06,
|
|
"loss": 0.008,
|
|
"step": 6830
|
|
},
|
|
{
|
|
"epoch": 0.2972071651259546,
|
|
"grad_norm": 0.11772483587265015,
|
|
"learning_rate": 7.4569257277324035e-06,
|
|
"loss": 0.0087,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 0.2976416785252615,
|
|
"grad_norm": 0.08689630776643753,
|
|
"learning_rate": 7.429713296545934e-06,
|
|
"loss": 0.0072,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 0.29807619192456847,
|
|
"grad_norm": 0.06928461790084839,
|
|
"learning_rate": 7.402521229909185e-06,
|
|
"loss": 0.01,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 0.2985107053238754,
|
|
"grad_norm": 0.06894993036985397,
|
|
"learning_rate": 7.37534974326668e-06,
|
|
"loss": 0.0061,
|
|
"step": 6870
|
|
},
|
|
{
|
|
"epoch": 0.29894521872318236,
|
|
"grad_norm": 0.06853579729795456,
|
|
"learning_rate": 7.3481990518998915e-06,
|
|
"loss": 0.0064,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 0.2993797321224893,
|
|
"grad_norm": 0.11018829047679901,
|
|
"learning_rate": 7.321069370925519e-06,
|
|
"loss": 0.009,
|
|
"step": 6890
|
|
},
|
|
{
|
|
"epoch": 0.29981424552179625,
|
|
"grad_norm": 0.0806964859366417,
|
|
"learning_rate": 7.293960915293803e-06,
|
|
"loss": 0.0092,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.30024875892110325,
|
|
"grad_norm": 0.08158484101295471,
|
|
"learning_rate": 7.266873899786803e-06,
|
|
"loss": 0.007,
|
|
"step": 6910
|
|
},
|
|
{
|
|
"epoch": 0.3006832723204102,
|
|
"grad_norm": 0.04432486370205879,
|
|
"learning_rate": 7.2398085390167275e-06,
|
|
"loss": 0.0063,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 0.30111778571971715,
|
|
"grad_norm": 0.08704327791929245,
|
|
"learning_rate": 7.212765047424191e-06,
|
|
"loss": 0.0075,
|
|
"step": 6930
|
|
},
|
|
{
|
|
"epoch": 0.3015522991190241,
|
|
"grad_norm": 0.10205797106027603,
|
|
"learning_rate": 7.185743639276552e-06,
|
|
"loss": 0.0089,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 0.30198681251833104,
|
|
"grad_norm": 0.08234458416700363,
|
|
"learning_rate": 7.158744528666196e-06,
|
|
"loss": 0.0066,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 0.302421325917638,
|
|
"grad_norm": 0.14198225736618042,
|
|
"learning_rate": 7.131767929508833e-06,
|
|
"loss": 0.0082,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 0.30285583931694493,
|
|
"grad_norm": 0.06368236243724823,
|
|
"learning_rate": 7.104814055541838e-06,
|
|
"loss": 0.0071,
|
|
"step": 6970
|
|
},
|
|
{
|
|
"epoch": 0.3032903527162519,
|
|
"grad_norm": 0.12608693540096283,
|
|
"learning_rate": 7.077883120322507e-06,
|
|
"loss": 0.0091,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 0.3037248661155588,
|
|
"grad_norm": 0.07397144287824631,
|
|
"learning_rate": 7.0509753372264065e-06,
|
|
"loss": 0.0083,
|
|
"step": 6990
|
|
},
|
|
{
|
|
"epoch": 0.30415937951486577,
|
|
"grad_norm": 0.07896910607814789,
|
|
"learning_rate": 7.024090919445662e-06,
|
|
"loss": 0.0086,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.3045938929141727,
|
|
"grad_norm": 0.06012195348739624,
|
|
"learning_rate": 6.997230079987272e-06,
|
|
"loss": 0.0115,
|
|
"step": 7010
|
|
},
|
|
{
|
|
"epoch": 0.3050284063134797,
|
|
"grad_norm": 0.12889036536216736,
|
|
"learning_rate": 6.970393031671428e-06,
|
|
"loss": 0.0077,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 0.30546291971278666,
|
|
"grad_norm": 0.08058907091617584,
|
|
"learning_rate": 6.943579987129822e-06,
|
|
"loss": 0.009,
|
|
"step": 7030
|
|
},
|
|
{
|
|
"epoch": 0.3058974331120936,
|
|
"grad_norm": 0.10182671993970871,
|
|
"learning_rate": 6.916791158803954e-06,
|
|
"loss": 0.0072,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 0.30633194651140055,
|
|
"grad_norm": 0.08950523287057877,
|
|
"learning_rate": 6.890026758943464e-06,
|
|
"loss": 0.0074,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 0.3067664599107075,
|
|
"grad_norm": 0.05128556489944458,
|
|
"learning_rate": 6.86328699960445e-06,
|
|
"loss": 0.0091,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 0.30720097331001445,
|
|
"grad_norm": 0.11177459359169006,
|
|
"learning_rate": 6.83657209264777e-06,
|
|
"loss": 0.0067,
|
|
"step": 7070
|
|
},
|
|
{
|
|
"epoch": 0.3076354867093214,
|
|
"grad_norm": 0.06378534436225891,
|
|
"learning_rate": 6.809882249737383e-06,
|
|
"loss": 0.0062,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 0.30807000010862834,
|
|
"grad_norm": 0.0639062151312828,
|
|
"learning_rate": 6.783217682338655e-06,
|
|
"loss": 0.0065,
|
|
"step": 7090
|
|
},
|
|
{
|
|
"epoch": 0.3085045135079353,
|
|
"grad_norm": 0.06348402798175812,
|
|
"learning_rate": 6.7565786017167004e-06,
|
|
"loss": 0.0084,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 0.30893902690724223,
|
|
"grad_norm": 0.08029096573591232,
|
|
"learning_rate": 6.7299652189347e-06,
|
|
"loss": 0.0103,
|
|
"step": 7110
|
|
},
|
|
{
|
|
"epoch": 0.3093735403065492,
|
|
"grad_norm": 0.09063462167978287,
|
|
"learning_rate": 6.703377744852227e-06,
|
|
"loss": 0.0087,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 0.3098080537058562,
|
|
"grad_norm": 0.09227754920721054,
|
|
"learning_rate": 6.6768163901235776e-06,
|
|
"loss": 0.0059,
|
|
"step": 7130
|
|
},
|
|
{
|
|
"epoch": 0.3102425671051631,
|
|
"grad_norm": 0.09460172802209854,
|
|
"learning_rate": 6.650281365196096e-06,
|
|
"loss": 0.0071,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 0.31067708050447007,
|
|
"grad_norm": 0.08981993049383163,
|
|
"learning_rate": 6.623772880308534e-06,
|
|
"loss": 0.0082,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 0.311111593903777,
|
|
"grad_norm": 0.1009664312005043,
|
|
"learning_rate": 6.597291145489344e-06,
|
|
"loss": 0.0079,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 0.31154610730308396,
|
|
"grad_norm": 0.07889096438884735,
|
|
"learning_rate": 6.570836370555045e-06,
|
|
"loss": 0.0069,
|
|
"step": 7170
|
|
},
|
|
{
|
|
"epoch": 0.3119806207023909,
|
|
"grad_norm": 0.08060748875141144,
|
|
"learning_rate": 6.544408765108549e-06,
|
|
"loss": 0.0065,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 0.31241513410169786,
|
|
"grad_norm": 0.047370001673698425,
|
|
"learning_rate": 6.518008538537501e-06,
|
|
"loss": 0.0085,
|
|
"step": 7190
|
|
},
|
|
{
|
|
"epoch": 0.3128496475010048,
|
|
"grad_norm": 0.0684918537735939,
|
|
"learning_rate": 6.4916359000126284e-06,
|
|
"loss": 0.0089,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.31328416090031175,
|
|
"grad_norm": 0.08644723892211914,
|
|
"learning_rate": 6.465291058486072e-06,
|
|
"loss": 0.0091,
|
|
"step": 7210
|
|
},
|
|
{
|
|
"epoch": 0.3137186742996187,
|
|
"grad_norm": 0.06115137040615082,
|
|
"learning_rate": 6.438974222689729e-06,
|
|
"loss": 0.009,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 0.31415318769892564,
|
|
"grad_norm": 0.09270749986171722,
|
|
"learning_rate": 6.4126856011336146e-06,
|
|
"loss": 0.0094,
|
|
"step": 7230
|
|
},
|
|
{
|
|
"epoch": 0.31458770109823264,
|
|
"grad_norm": 0.06477729976177216,
|
|
"learning_rate": 6.386425402104199e-06,
|
|
"loss": 0.0079,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 0.3150222144975396,
|
|
"grad_norm": 0.11577966809272766,
|
|
"learning_rate": 6.3601938336627555e-06,
|
|
"loss": 0.0079,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 0.31545672789684653,
|
|
"grad_norm": 0.058794233947992325,
|
|
"learning_rate": 6.33399110364372e-06,
|
|
"loss": 0.0055,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 0.3158912412961535,
|
|
"grad_norm": 0.09309697151184082,
|
|
"learning_rate": 6.307817419653031e-06,
|
|
"loss": 0.0076,
|
|
"step": 7270
|
|
},
|
|
{
|
|
"epoch": 0.3163257546954604,
|
|
"grad_norm": 0.09355561435222626,
|
|
"learning_rate": 6.281672989066501e-06,
|
|
"loss": 0.0083,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 0.31676026809476737,
|
|
"grad_norm": 0.05001560226082802,
|
|
"learning_rate": 6.255558019028168e-06,
|
|
"loss": 0.0086,
|
|
"step": 7290
|
|
},
|
|
{
|
|
"epoch": 0.3171947814940743,
|
|
"grad_norm": 0.07396744191646576,
|
|
"learning_rate": 6.229472716448647e-06,
|
|
"loss": 0.0074,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 0.31762929489338126,
|
|
"grad_norm": 0.05909964442253113,
|
|
"learning_rate": 6.203417288003497e-06,
|
|
"loss": 0.006,
|
|
"step": 7310
|
|
},
|
|
{
|
|
"epoch": 0.3180638082926882,
|
|
"grad_norm": 0.08963511139154434,
|
|
"learning_rate": 6.177391940131581e-06,
|
|
"loss": 0.0077,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 0.31849832169199516,
|
|
"grad_norm": 0.06426141411066055,
|
|
"learning_rate": 6.15139687903343e-06,
|
|
"loss": 0.0067,
|
|
"step": 7330
|
|
},
|
|
{
|
|
"epoch": 0.3189328350913021,
|
|
"grad_norm": 0.09945748746395111,
|
|
"learning_rate": 6.12543231066962e-06,
|
|
"loss": 0.0082,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 0.3193673484906091,
|
|
"grad_norm": 0.06709706038236618,
|
|
"learning_rate": 6.099498440759123e-06,
|
|
"loss": 0.0074,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 0.31980186188991605,
|
|
"grad_norm": 0.07290440052747726,
|
|
"learning_rate": 6.0735954747776856e-06,
|
|
"loss": 0.0071,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 0.320236375289223,
|
|
"grad_norm": 0.06449789553880692,
|
|
"learning_rate": 6.047723617956201e-06,
|
|
"loss": 0.0081,
|
|
"step": 7370
|
|
},
|
|
{
|
|
"epoch": 0.32067088868852994,
|
|
"grad_norm": 0.08315026760101318,
|
|
"learning_rate": 6.021883075279089e-06,
|
|
"loss": 0.0085,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 0.3211054020878369,
|
|
"grad_norm": 0.06351494789123535,
|
|
"learning_rate": 5.996074051482657e-06,
|
|
"loss": 0.0084,
|
|
"step": 7390
|
|
},
|
|
{
|
|
"epoch": 0.32153991548714383,
|
|
"grad_norm": 0.0669468492269516,
|
|
"learning_rate": 5.9702967510534884e-06,
|
|
"loss": 0.0073,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 0.3219744288864508,
|
|
"grad_norm": 0.060450613498687744,
|
|
"learning_rate": 5.94455137822682e-06,
|
|
"loss": 0.0064,
|
|
"step": 7410
|
|
},
|
|
{
|
|
"epoch": 0.3224089422857577,
|
|
"grad_norm": 0.04701624810695648,
|
|
"learning_rate": 5.918838136984926e-06,
|
|
"loss": 0.0062,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 0.3228434556850647,
|
|
"grad_norm": 0.0940837636590004,
|
|
"learning_rate": 5.893157231055501e-06,
|
|
"loss": 0.007,
|
|
"step": 7430
|
|
},
|
|
{
|
|
"epoch": 0.3232779690843716,
|
|
"grad_norm": 0.10061509162187576,
|
|
"learning_rate": 5.867508863910043e-06,
|
|
"loss": 0.0079,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 0.32371248248367857,
|
|
"grad_norm": 0.07226772606372833,
|
|
"learning_rate": 5.841893238762242e-06,
|
|
"loss": 0.0072,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 0.32414699588298557,
|
|
"grad_norm": 0.10662239789962769,
|
|
"learning_rate": 5.816310558566367e-06,
|
|
"loss": 0.0061,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 0.3245815092822925,
|
|
"grad_norm": 0.12014666199684143,
|
|
"learning_rate": 5.790761026015675e-06,
|
|
"loss": 0.0091,
|
|
"step": 7470
|
|
},
|
|
{
|
|
"epoch": 0.32501602268159946,
|
|
"grad_norm": 0.10308293253183365,
|
|
"learning_rate": 5.765244843540783e-06,
|
|
"loss": 0.0096,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 0.3254505360809064,
|
|
"grad_norm": 0.15284277498722076,
|
|
"learning_rate": 5.739762213308073e-06,
|
|
"loss": 0.007,
|
|
"step": 7490
|
|
},
|
|
{
|
|
"epoch": 0.32588504948021335,
|
|
"grad_norm": 0.06631079316139221,
|
|
"learning_rate": 5.714313337218087e-06,
|
|
"loss": 0.0061,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 0.3263195628795203,
|
|
"grad_norm": 0.13062670826911926,
|
|
"learning_rate": 5.688898416903938e-06,
|
|
"loss": 0.0084,
|
|
"step": 7510
|
|
},
|
|
{
|
|
"epoch": 0.32675407627882724,
|
|
"grad_norm": 0.07424893230199814,
|
|
"learning_rate": 5.663517653729708e-06,
|
|
"loss": 0.0054,
|
|
"step": 7520
|
|
},
|
|
{
|
|
"epoch": 0.3271885896781342,
|
|
"grad_norm": 0.054448146373033524,
|
|
"learning_rate": 5.638171248788842e-06,
|
|
"loss": 0.0068,
|
|
"step": 7530
|
|
},
|
|
{
|
|
"epoch": 0.32762310307744114,
|
|
"grad_norm": 0.042936768382787704,
|
|
"learning_rate": 5.6128594029025585e-06,
|
|
"loss": 0.0057,
|
|
"step": 7540
|
|
},
|
|
{
|
|
"epoch": 0.3280576164767481,
|
|
"grad_norm": 0.19675545394420624,
|
|
"learning_rate": 5.587582316618276e-06,
|
|
"loss": 0.0111,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 0.32849212987605503,
|
|
"grad_norm": 0.060188472270965576,
|
|
"learning_rate": 5.562340190207995e-06,
|
|
"loss": 0.0083,
|
|
"step": 7560
|
|
},
|
|
{
|
|
"epoch": 0.32892664327536203,
|
|
"grad_norm": 0.07820606976747513,
|
|
"learning_rate": 5.537133223666742e-06,
|
|
"loss": 0.0076,
|
|
"step": 7570
|
|
},
|
|
{
|
|
"epoch": 0.329361156674669,
|
|
"grad_norm": 0.06374085694551468,
|
|
"learning_rate": 5.511961616710961e-06,
|
|
"loss": 0.0065,
|
|
"step": 7580
|
|
},
|
|
{
|
|
"epoch": 0.3297956700739759,
|
|
"grad_norm": 0.05585566163063049,
|
|
"learning_rate": 5.486825568776941e-06,
|
|
"loss": 0.0074,
|
|
"step": 7590
|
|
},
|
|
{
|
|
"epoch": 0.33023018347328287,
|
|
"grad_norm": 0.04485785588622093,
|
|
"learning_rate": 5.461725279019226e-06,
|
|
"loss": 0.0077,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 0.3306646968725898,
|
|
"grad_norm": 0.08395210653543472,
|
|
"learning_rate": 5.436660946309067e-06,
|
|
"loss": 0.0082,
|
|
"step": 7610
|
|
},
|
|
{
|
|
"epoch": 0.33109921027189676,
|
|
"grad_norm": 0.08163761347532272,
|
|
"learning_rate": 5.411632769232808e-06,
|
|
"loss": 0.0086,
|
|
"step": 7620
|
|
},
|
|
{
|
|
"epoch": 0.3315337236712037,
|
|
"grad_norm": 0.09057643264532089,
|
|
"learning_rate": 5.386640946090325e-06,
|
|
"loss": 0.0084,
|
|
"step": 7630
|
|
},
|
|
{
|
|
"epoch": 0.33196823707051065,
|
|
"grad_norm": 0.10619207471609116,
|
|
"learning_rate": 5.361685674893481e-06,
|
|
"loss": 0.007,
|
|
"step": 7640
|
|
},
|
|
{
|
|
"epoch": 0.3324027504698176,
|
|
"grad_norm": 0.11618001759052277,
|
|
"learning_rate": 5.3367671533645105e-06,
|
|
"loss": 0.0071,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 0.33283726386912454,
|
|
"grad_norm": 0.05917714163661003,
|
|
"learning_rate": 5.3118855789345e-06,
|
|
"loss": 0.0064,
|
|
"step": 7660
|
|
},
|
|
{
|
|
"epoch": 0.3332717772684315,
|
|
"grad_norm": 0.04789431020617485,
|
|
"learning_rate": 5.2870411487417825e-06,
|
|
"loss": 0.006,
|
|
"step": 7670
|
|
},
|
|
{
|
|
"epoch": 0.3337062906677385,
|
|
"grad_norm": 0.11848578602075577,
|
|
"learning_rate": 5.262234059630415e-06,
|
|
"loss": 0.0068,
|
|
"step": 7680
|
|
},
|
|
{
|
|
"epoch": 0.33414080406704544,
|
|
"grad_norm": 0.06466028839349747,
|
|
"learning_rate": 5.237464508148575e-06,
|
|
"loss": 0.0074,
|
|
"step": 7690
|
|
},
|
|
{
|
|
"epoch": 0.3345753174663524,
|
|
"grad_norm": 0.10490430891513824,
|
|
"learning_rate": 5.212732690547047e-06,
|
|
"loss": 0.0085,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 0.33500983086565933,
|
|
"grad_norm": 0.0808047205209732,
|
|
"learning_rate": 5.1880388027776415e-06,
|
|
"loss": 0.0076,
|
|
"step": 7710
|
|
},
|
|
{
|
|
"epoch": 0.3354443442649663,
|
|
"grad_norm": 0.08687549829483032,
|
|
"learning_rate": 5.163383040491645e-06,
|
|
"loss": 0.0086,
|
|
"step": 7720
|
|
},
|
|
{
|
|
"epoch": 0.3358788576642732,
|
|
"grad_norm": 0.07844628393650055,
|
|
"learning_rate": 5.1387655990382716e-06,
|
|
"loss": 0.0078,
|
|
"step": 7730
|
|
},
|
|
{
|
|
"epoch": 0.33631337106358017,
|
|
"grad_norm": 0.07566333562135696,
|
|
"learning_rate": 5.114186673463123e-06,
|
|
"loss": 0.0085,
|
|
"step": 7740
|
|
},
|
|
{
|
|
"epoch": 0.3367478844628871,
|
|
"grad_norm": 0.042920857667922974,
|
|
"learning_rate": 5.089646458506639e-06,
|
|
"loss": 0.0081,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 0.33718239786219406,
|
|
"grad_norm": 0.05019129067659378,
|
|
"learning_rate": 5.065145148602542e-06,
|
|
"loss": 0.0068,
|
|
"step": 7760
|
|
},
|
|
{
|
|
"epoch": 0.337616911261501,
|
|
"grad_norm": 0.12619146704673767,
|
|
"learning_rate": 5.040682937876319e-06,
|
|
"loss": 0.0073,
|
|
"step": 7770
|
|
},
|
|
{
|
|
"epoch": 0.33805142466080795,
|
|
"grad_norm": 0.10307935625314713,
|
|
"learning_rate": 5.016260020143659e-06,
|
|
"loss": 0.0089,
|
|
"step": 7780
|
|
},
|
|
{
|
|
"epoch": 0.33848593806011495,
|
|
"grad_norm": 0.06839073449373245,
|
|
"learning_rate": 4.9918765889089475e-06,
|
|
"loss": 0.0064,
|
|
"step": 7790
|
|
},
|
|
{
|
|
"epoch": 0.3389204514594219,
|
|
"grad_norm": 0.07737269252538681,
|
|
"learning_rate": 4.967532837363695e-06,
|
|
"loss": 0.0066,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 0.33935496485872885,
|
|
"grad_norm": 0.07342928647994995,
|
|
"learning_rate": 4.943228958385045e-06,
|
|
"loss": 0.0073,
|
|
"step": 7810
|
|
},
|
|
{
|
|
"epoch": 0.3397894782580358,
|
|
"grad_norm": 0.09632057696580887,
|
|
"learning_rate": 4.918965144534219e-06,
|
|
"loss": 0.008,
|
|
"step": 7820
|
|
},
|
|
{
|
|
"epoch": 0.34022399165734274,
|
|
"grad_norm": 0.07035823166370392,
|
|
"learning_rate": 4.894741588054993e-06,
|
|
"loss": 0.0057,
|
|
"step": 7830
|
|
},
|
|
{
|
|
"epoch": 0.3406585050566497,
|
|
"grad_norm": 0.059140220284461975,
|
|
"learning_rate": 4.8705584808722065e-06,
|
|
"loss": 0.0057,
|
|
"step": 7840
|
|
},
|
|
{
|
|
"epoch": 0.34109301845595663,
|
|
"grad_norm": 0.10383981466293335,
|
|
"learning_rate": 4.8464160145901894e-06,
|
|
"loss": 0.0087,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 0.3415275318552636,
|
|
"grad_norm": 0.08844143152236938,
|
|
"learning_rate": 4.822314380491281e-06,
|
|
"loss": 0.0083,
|
|
"step": 7860
|
|
},
|
|
{
|
|
"epoch": 0.3419620452545705,
|
|
"grad_norm": 0.04909001663327217,
|
|
"learning_rate": 4.7982537695343115e-06,
|
|
"loss": 0.0079,
|
|
"step": 7870
|
|
},
|
|
{
|
|
"epoch": 0.34239655865387747,
|
|
"grad_norm": 0.07348047941923141,
|
|
"learning_rate": 4.7742343723530685e-06,
|
|
"loss": 0.0083,
|
|
"step": 7880
|
|
},
|
|
{
|
|
"epoch": 0.3428310720531844,
|
|
"grad_norm": 0.060402486473321915,
|
|
"learning_rate": 4.750256379254814e-06,
|
|
"loss": 0.0066,
|
|
"step": 7890
|
|
},
|
|
{
|
|
"epoch": 0.3432655854524914,
|
|
"grad_norm": 0.12773926556110382,
|
|
"learning_rate": 4.72631998021875e-06,
|
|
"loss": 0.0077,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 0.34370009885179836,
|
|
"grad_norm": 0.048774924129247665,
|
|
"learning_rate": 4.70242536489454e-06,
|
|
"loss": 0.007,
|
|
"step": 7910
|
|
},
|
|
{
|
|
"epoch": 0.3441346122511053,
|
|
"grad_norm": 0.09722929447889328,
|
|
"learning_rate": 4.6785727226007746e-06,
|
|
"loss": 0.006,
|
|
"step": 7920
|
|
},
|
|
{
|
|
"epoch": 0.34456912565041226,
|
|
"grad_norm": 0.08816218376159668,
|
|
"learning_rate": 4.654762242323506e-06,
|
|
"loss": 0.0066,
|
|
"step": 7930
|
|
},
|
|
{
|
|
"epoch": 0.3450036390497192,
|
|
"grad_norm": 0.046284399926662445,
|
|
"learning_rate": 4.63099411271473e-06,
|
|
"loss": 0.0089,
|
|
"step": 7940
|
|
},
|
|
{
|
|
"epoch": 0.34543815244902615,
|
|
"grad_norm": 0.06123678386211395,
|
|
"learning_rate": 4.607268522090887e-06,
|
|
"loss": 0.0088,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 0.3458726658483331,
|
|
"grad_norm": 0.08266434818506241,
|
|
"learning_rate": 4.583585658431383e-06,
|
|
"loss": 0.0064,
|
|
"step": 7960
|
|
},
|
|
{
|
|
"epoch": 0.34630717924764004,
|
|
"grad_norm": 0.061687689274549484,
|
|
"learning_rate": 4.5599457093771e-06,
|
|
"loss": 0.0085,
|
|
"step": 7970
|
|
},
|
|
{
|
|
"epoch": 0.346741692646947,
|
|
"grad_norm": 0.065816231071949,
|
|
"learning_rate": 4.536348862228902e-06,
|
|
"loss": 0.0075,
|
|
"step": 7980
|
|
},
|
|
{
|
|
"epoch": 0.34717620604625393,
|
|
"grad_norm": 0.045334603637456894,
|
|
"learning_rate": 4.512795303946148e-06,
|
|
"loss": 0.0052,
|
|
"step": 7990
|
|
},
|
|
{
|
|
"epoch": 0.3476107194455609,
|
|
"grad_norm": 0.08470525592565536,
|
|
"learning_rate": 4.489285221145227e-06,
|
|
"loss": 0.0083,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.3480452328448679,
|
|
"grad_norm": 0.06111738085746765,
|
|
"learning_rate": 4.4658188000980586e-06,
|
|
"loss": 0.0055,
|
|
"step": 8010
|
|
},
|
|
{
|
|
"epoch": 0.3484797462441748,
|
|
"grad_norm": 0.08732720464468002,
|
|
"learning_rate": 4.442396226730637e-06,
|
|
"loss": 0.0082,
|
|
"step": 8020
|
|
},
|
|
{
|
|
"epoch": 0.34891425964348177,
|
|
"grad_norm": 0.09063022583723068,
|
|
"learning_rate": 4.419017686621536e-06,
|
|
"loss": 0.006,
|
|
"step": 8030
|
|
},
|
|
{
|
|
"epoch": 0.3493487730427887,
|
|
"grad_norm": 0.09084238857030869,
|
|
"learning_rate": 4.395683365000468e-06,
|
|
"loss": 0.0069,
|
|
"step": 8040
|
|
},
|
|
{
|
|
"epoch": 0.34978328644209566,
|
|
"grad_norm": 0.06919077038764954,
|
|
"learning_rate": 4.372393446746781e-06,
|
|
"loss": 0.0078,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 0.3502177998414026,
|
|
"grad_norm": 0.06786080449819565,
|
|
"learning_rate": 4.349148116388026e-06,
|
|
"loss": 0.0049,
|
|
"step": 8060
|
|
},
|
|
{
|
|
"epoch": 0.35065231324070956,
|
|
"grad_norm": 0.12213651090860367,
|
|
"learning_rate": 4.325947558098478e-06,
|
|
"loss": 0.007,
|
|
"step": 8070
|
|
},
|
|
{
|
|
"epoch": 0.3510868266400165,
|
|
"grad_norm": 0.04927683249115944,
|
|
"learning_rate": 4.302791955697676e-06,
|
|
"loss": 0.0061,
|
|
"step": 8080
|
|
},
|
|
{
|
|
"epoch": 0.35152134003932345,
|
|
"grad_norm": 0.03858277201652527,
|
|
"learning_rate": 4.27968149264897e-06,
|
|
"loss": 0.006,
|
|
"step": 8090
|
|
},
|
|
{
|
|
"epoch": 0.3519558534386304,
|
|
"grad_norm": 0.052387017756700516,
|
|
"learning_rate": 4.256616352058073e-06,
|
|
"loss": 0.0085,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 0.35239036683793734,
|
|
"grad_norm": 0.07891060411930084,
|
|
"learning_rate": 4.2335967166716064e-06,
|
|
"loss": 0.0059,
|
|
"step": 8110
|
|
},
|
|
{
|
|
"epoch": 0.35282488023724434,
|
|
"grad_norm": 0.06736122816801071,
|
|
"learning_rate": 4.210622768875643e-06,
|
|
"loss": 0.0077,
|
|
"step": 8120
|
|
},
|
|
{
|
|
"epoch": 0.3532593936365513,
|
|
"grad_norm": 0.0961538702249527,
|
|
"learning_rate": 4.187694690694279e-06,
|
|
"loss": 0.0109,
|
|
"step": 8130
|
|
},
|
|
{
|
|
"epoch": 0.35369390703585823,
|
|
"grad_norm": 0.0797416940331459,
|
|
"learning_rate": 4.1648126637881745e-06,
|
|
"loss": 0.008,
|
|
"step": 8140
|
|
},
|
|
{
|
|
"epoch": 0.3541284204351652,
|
|
"grad_norm": 0.07754529267549515,
|
|
"learning_rate": 4.141976869453123e-06,
|
|
"loss": 0.0064,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 0.3545629338344721,
|
|
"grad_norm": 0.110176220536232,
|
|
"learning_rate": 4.119187488618621e-06,
|
|
"loss": 0.0068,
|
|
"step": 8160
|
|
},
|
|
{
|
|
"epoch": 0.3549974472337791,
|
|
"grad_norm": 0.09413070976734161,
|
|
"learning_rate": 4.096444701846427e-06,
|
|
"loss": 0.0078,
|
|
"step": 8170
|
|
},
|
|
{
|
|
"epoch": 0.355431960633086,
|
|
"grad_norm": 0.09964610636234283,
|
|
"learning_rate": 4.073748689329125e-06,
|
|
"loss": 0.007,
|
|
"step": 8180
|
|
},
|
|
{
|
|
"epoch": 0.35586647403239297,
|
|
"grad_norm": 0.14618416130542755,
|
|
"learning_rate": 4.051099630888704e-06,
|
|
"loss": 0.0072,
|
|
"step": 8190
|
|
},
|
|
{
|
|
"epoch": 0.3563009874316999,
|
|
"grad_norm": 0.07930748909711838,
|
|
"learning_rate": 4.028497705975139e-06,
|
|
"loss": 0.0062,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 0.35673550083100686,
|
|
"grad_norm": 0.0969531461596489,
|
|
"learning_rate": 4.0059430936649645e-06,
|
|
"loss": 0.0065,
|
|
"step": 8210
|
|
},
|
|
{
|
|
"epoch": 0.3571700142303138,
|
|
"grad_norm": 0.06370487064123154,
|
|
"learning_rate": 3.9834359726598415e-06,
|
|
"loss": 0.008,
|
|
"step": 8220
|
|
},
|
|
{
|
|
"epoch": 0.3576045276296208,
|
|
"grad_norm": 0.06438025087118149,
|
|
"learning_rate": 3.9609765212851694e-06,
|
|
"loss": 0.0062,
|
|
"step": 8230
|
|
},
|
|
{
|
|
"epoch": 0.35803904102892775,
|
|
"grad_norm": 0.07360579073429108,
|
|
"learning_rate": 3.938564917488644e-06,
|
|
"loss": 0.0067,
|
|
"step": 8240
|
|
},
|
|
{
|
|
"epoch": 0.3584735544282347,
|
|
"grad_norm": 0.10698428750038147,
|
|
"learning_rate": 3.916201338838872e-06,
|
|
"loss": 0.006,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 0.35890806782754164,
|
|
"grad_norm": 0.07496725022792816,
|
|
"learning_rate": 3.893885962523954e-06,
|
|
"loss": 0.0065,
|
|
"step": 8260
|
|
},
|
|
{
|
|
"epoch": 0.3593425812268486,
|
|
"grad_norm": 0.0918489471077919,
|
|
"learning_rate": 3.871618965350075e-06,
|
|
"loss": 0.0063,
|
|
"step": 8270
|
|
},
|
|
{
|
|
"epoch": 0.35977709462615554,
|
|
"grad_norm": 0.06308259069919586,
|
|
"learning_rate": 3.849400523740102e-06,
|
|
"loss": 0.0072,
|
|
"step": 8280
|
|
},
|
|
{
|
|
"epoch": 0.3602116080254625,
|
|
"grad_norm": 0.058937299996614456,
|
|
"learning_rate": 3.82723081373221e-06,
|
|
"loss": 0.0053,
|
|
"step": 8290
|
|
},
|
|
{
|
|
"epoch": 0.36064612142476943,
|
|
"grad_norm": 0.0596042238175869,
|
|
"learning_rate": 3.805110010978463e-06,
|
|
"loss": 0.0073,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 0.3610806348240764,
|
|
"grad_norm": 0.07679947465658188,
|
|
"learning_rate": 3.783038290743427e-06,
|
|
"loss": 0.0068,
|
|
"step": 8310
|
|
},
|
|
{
|
|
"epoch": 0.3615151482233833,
|
|
"grad_norm": 0.04791805148124695,
|
|
"learning_rate": 3.7610158279027796e-06,
|
|
"loss": 0.0075,
|
|
"step": 8320
|
|
},
|
|
{
|
|
"epoch": 0.36194966162269027,
|
|
"grad_norm": 0.06380537897348404,
|
|
"learning_rate": 3.7390427969419395e-06,
|
|
"loss": 0.0076,
|
|
"step": 8330
|
|
},
|
|
{
|
|
"epoch": 0.36238417502199727,
|
|
"grad_norm": 0.06472238153219223,
|
|
"learning_rate": 3.7171193719546726e-06,
|
|
"loss": 0.0062,
|
|
"step": 8340
|
|
},
|
|
{
|
|
"epoch": 0.3628186884213042,
|
|
"grad_norm": 0.09685289859771729,
|
|
"learning_rate": 3.695245726641702e-06,
|
|
"loss": 0.0066,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 0.36325320182061116,
|
|
"grad_norm": 0.09350993484258652,
|
|
"learning_rate": 3.6734220343093575e-06,
|
|
"loss": 0.0053,
|
|
"step": 8360
|
|
},
|
|
{
|
|
"epoch": 0.3636877152199181,
|
|
"grad_norm": 0.1431475281715393,
|
|
"learning_rate": 3.6516484678681783e-06,
|
|
"loss": 0.0102,
|
|
"step": 8370
|
|
},
|
|
{
|
|
"epoch": 0.36412222861922505,
|
|
"grad_norm": 0.06581385433673859,
|
|
"learning_rate": 3.629925199831552e-06,
|
|
"loss": 0.0067,
|
|
"step": 8380
|
|
},
|
|
{
|
|
"epoch": 0.364556742018532,
|
|
"grad_norm": 0.05527684837579727,
|
|
"learning_rate": 3.6082524023143574e-06,
|
|
"loss": 0.0059,
|
|
"step": 8390
|
|
},
|
|
{
|
|
"epoch": 0.36499125541783894,
|
|
"grad_norm": 0.0786575973033905,
|
|
"learning_rate": 3.5866302470315882e-06,
|
|
"loss": 0.0069,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 0.3654257688171459,
|
|
"grad_norm": 0.08859694749116898,
|
|
"learning_rate": 3.565058905296991e-06,
|
|
"loss": 0.0063,
|
|
"step": 8410
|
|
},
|
|
{
|
|
"epoch": 0.36586028221645284,
|
|
"grad_norm": 0.10222943872213364,
|
|
"learning_rate": 3.543538548021723e-06,
|
|
"loss": 0.0079,
|
|
"step": 8420
|
|
},
|
|
{
|
|
"epoch": 0.3662947956157598,
|
|
"grad_norm": 0.07007434964179993,
|
|
"learning_rate": 3.5220693457129775e-06,
|
|
"loss": 0.0075,
|
|
"step": 8430
|
|
},
|
|
{
|
|
"epoch": 0.36672930901506673,
|
|
"grad_norm": 0.06237521767616272,
|
|
"learning_rate": 3.5006514684726545e-06,
|
|
"loss": 0.0082,
|
|
"step": 8440
|
|
},
|
|
{
|
|
"epoch": 0.36716382241437373,
|
|
"grad_norm": 0.04917728528380394,
|
|
"learning_rate": 3.4792850859959903e-06,
|
|
"loss": 0.007,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 0.3675983358136807,
|
|
"grad_norm": 0.08655932545661926,
|
|
"learning_rate": 3.457970367570239e-06,
|
|
"loss": 0.0088,
|
|
"step": 8460
|
|
},
|
|
{
|
|
"epoch": 0.3680328492129876,
|
|
"grad_norm": 0.08564922958612442,
|
|
"learning_rate": 3.4367074820733017e-06,
|
|
"loss": 0.0053,
|
|
"step": 8470
|
|
},
|
|
{
|
|
"epoch": 0.36846736261229457,
|
|
"grad_norm": 0.067174032330513,
|
|
"learning_rate": 3.415496597972414e-06,
|
|
"loss": 0.0062,
|
|
"step": 8480
|
|
},
|
|
{
|
|
"epoch": 0.3689018760116015,
|
|
"grad_norm": 0.08985011279582977,
|
|
"learning_rate": 3.394337883322805e-06,
|
|
"loss": 0.0078,
|
|
"step": 8490
|
|
},
|
|
{
|
|
"epoch": 0.36933638941090846,
|
|
"grad_norm": 0.0990537703037262,
|
|
"learning_rate": 3.373231505766348e-06,
|
|
"loss": 0.0075,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 0.3697709028102154,
|
|
"grad_norm": 0.0639311894774437,
|
|
"learning_rate": 3.352177632530251e-06,
|
|
"loss": 0.0076,
|
|
"step": 8510
|
|
},
|
|
{
|
|
"epoch": 0.37020541620952235,
|
|
"grad_norm": 0.03501976281404495,
|
|
"learning_rate": 3.3311764304257342e-06,
|
|
"loss": 0.0063,
|
|
"step": 8520
|
|
},
|
|
{
|
|
"epoch": 0.3706399296088293,
|
|
"grad_norm": 0.06429938226938248,
|
|
"learning_rate": 3.3102280658466977e-06,
|
|
"loss": 0.0069,
|
|
"step": 8530
|
|
},
|
|
{
|
|
"epoch": 0.37107444300813625,
|
|
"grad_norm": 0.0672391802072525,
|
|
"learning_rate": 3.2893327047684034e-06,
|
|
"loss": 0.0069,
|
|
"step": 8540
|
|
},
|
|
{
|
|
"epoch": 0.3715089564074432,
|
|
"grad_norm": 0.10419747233390808,
|
|
"learning_rate": 3.2684905127461573e-06,
|
|
"loss": 0.0071,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 0.3719434698067502,
|
|
"grad_norm": 0.08025926351547241,
|
|
"learning_rate": 3.2477016549140173e-06,
|
|
"loss": 0.0068,
|
|
"step": 8560
|
|
},
|
|
{
|
|
"epoch": 0.37237798320605714,
|
|
"grad_norm": 0.05993766710162163,
|
|
"learning_rate": 3.226966295983466e-06,
|
|
"loss": 0.0086,
|
|
"step": 8570
|
|
},
|
|
{
|
|
"epoch": 0.3728124966053641,
|
|
"grad_norm": 0.08144347369670868,
|
|
"learning_rate": 3.206284600242102e-06,
|
|
"loss": 0.006,
|
|
"step": 8580
|
|
},
|
|
{
|
|
"epoch": 0.37324701000467103,
|
|
"grad_norm": 0.0695662721991539,
|
|
"learning_rate": 3.185656731552362e-06,
|
|
"loss": 0.0052,
|
|
"step": 8590
|
|
},
|
|
{
|
|
"epoch": 0.373681523403978,
|
|
"grad_norm": 0.07274291664361954,
|
|
"learning_rate": 3.1650828533501943e-06,
|
|
"loss": 0.0058,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 0.3741160368032849,
|
|
"grad_norm": 0.06351452320814133,
|
|
"learning_rate": 3.144563128643776e-06,
|
|
"loss": 0.0052,
|
|
"step": 8610
|
|
},
|
|
{
|
|
"epoch": 0.37455055020259187,
|
|
"grad_norm": 0.07425472140312195,
|
|
"learning_rate": 3.1240977200122422e-06,
|
|
"loss": 0.0082,
|
|
"step": 8620
|
|
},
|
|
{
|
|
"epoch": 0.3749850636018988,
|
|
"grad_norm": 0.06502246856689453,
|
|
"learning_rate": 3.1036867896043574e-06,
|
|
"loss": 0.0057,
|
|
"step": 8630
|
|
},
|
|
{
|
|
"epoch": 0.37541957700120576,
|
|
"grad_norm": 0.03994821384549141,
|
|
"learning_rate": 3.0833304991372557e-06,
|
|
"loss": 0.0066,
|
|
"step": 8640
|
|
},
|
|
{
|
|
"epoch": 0.3758540904005127,
|
|
"grad_norm": 0.07854374498128891,
|
|
"learning_rate": 3.063029009895162e-06,
|
|
"loss": 0.0063,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 0.37628860379981965,
|
|
"grad_norm": 0.05410481616854668,
|
|
"learning_rate": 3.0427824827281062e-06,
|
|
"loss": 0.0077,
|
|
"step": 8660
|
|
},
|
|
{
|
|
"epoch": 0.3767231171991266,
|
|
"grad_norm": 0.08873879164457321,
|
|
"learning_rate": 3.022591078050644e-06,
|
|
"loss": 0.0077,
|
|
"step": 8670
|
|
},
|
|
{
|
|
"epoch": 0.3771576305984336,
|
|
"grad_norm": 0.062346357852220535,
|
|
"learning_rate": 3.0024549558405945e-06,
|
|
"loss": 0.0088,
|
|
"step": 8680
|
|
},
|
|
{
|
|
"epoch": 0.37759214399774055,
|
|
"grad_norm": 0.06358446925878525,
|
|
"learning_rate": 2.982374275637776e-06,
|
|
"loss": 0.0082,
|
|
"step": 8690
|
|
},
|
|
{
|
|
"epoch": 0.3780266573970475,
|
|
"grad_norm": 0.05350648984313011,
|
|
"learning_rate": 2.9623491965427264e-06,
|
|
"loss": 0.004,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 0.37846117079635444,
|
|
"grad_norm": 0.08188925683498383,
|
|
"learning_rate": 2.942379877215461e-06,
|
|
"loss": 0.0068,
|
|
"step": 8710
|
|
},
|
|
{
|
|
"epoch": 0.3788956841956614,
|
|
"grad_norm": 0.07393115013837814,
|
|
"learning_rate": 2.922466475874206e-06,
|
|
"loss": 0.0081,
|
|
"step": 8720
|
|
},
|
|
{
|
|
"epoch": 0.37933019759496833,
|
|
"grad_norm": 0.08056262135505676,
|
|
"learning_rate": 2.90260915029414e-06,
|
|
"loss": 0.0068,
|
|
"step": 8730
|
|
},
|
|
{
|
|
"epoch": 0.3797647109942753,
|
|
"grad_norm": 0.1089349240064621,
|
|
"learning_rate": 2.882808057806149e-06,
|
|
"loss": 0.0082,
|
|
"step": 8740
|
|
},
|
|
{
|
|
"epoch": 0.3801992243935822,
|
|
"grad_norm": 0.0753493532538414,
|
|
"learning_rate": 2.863063355295589e-06,
|
|
"loss": 0.0055,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 0.38063373779288917,
|
|
"grad_norm": 0.06075584515929222,
|
|
"learning_rate": 2.8433751992010315e-06,
|
|
"loss": 0.0064,
|
|
"step": 8760
|
|
},
|
|
{
|
|
"epoch": 0.3810682511921961,
|
|
"grad_norm": 0.08952397853136063,
|
|
"learning_rate": 2.8237437455130203e-06,
|
|
"loss": 0.0058,
|
|
"step": 8770
|
|
},
|
|
{
|
|
"epoch": 0.38150276459150306,
|
|
"grad_norm": 0.1460278481245041,
|
|
"learning_rate": 2.8041691497728527e-06,
|
|
"loss": 0.0095,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"epoch": 0.38193727799081006,
|
|
"grad_norm": 0.05322817713022232,
|
|
"learning_rate": 2.784651567071327e-06,
|
|
"loss": 0.0061,
|
|
"step": 8790
|
|
},
|
|
{
|
|
"epoch": 0.382371791390117,
|
|
"grad_norm": 0.13151440024375916,
|
|
"learning_rate": 2.7651911520475316e-06,
|
|
"loss": 0.0074,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 0.38280630478942396,
|
|
"grad_norm": 0.0687444806098938,
|
|
"learning_rate": 2.745788058887604e-06,
|
|
"loss": 0.0064,
|
|
"step": 8810
|
|
},
|
|
{
|
|
"epoch": 0.3832408181887309,
|
|
"grad_norm": 0.05780453979969025,
|
|
"learning_rate": 2.7264424413235267e-06,
|
|
"loss": 0.0056,
|
|
"step": 8820
|
|
},
|
|
{
|
|
"epoch": 0.38367533158803785,
|
|
"grad_norm": 0.06033443659543991,
|
|
"learning_rate": 2.707154452631889e-06,
|
|
"loss": 0.0045,
|
|
"step": 8830
|
|
},
|
|
{
|
|
"epoch": 0.3841098449873448,
|
|
"grad_norm": 0.065843366086483,
|
|
"learning_rate": 2.6879242456326827e-06,
|
|
"loss": 0.005,
|
|
"step": 8840
|
|
},
|
|
{
|
|
"epoch": 0.38454435838665174,
|
|
"grad_norm": 0.07421589642763138,
|
|
"learning_rate": 2.6687519726881063e-06,
|
|
"loss": 0.006,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 0.3849788717859587,
|
|
"grad_norm": 0.09318463504314423,
|
|
"learning_rate": 2.649637785701329e-06,
|
|
"loss": 0.0081,
|
|
"step": 8860
|
|
},
|
|
{
|
|
"epoch": 0.38541338518526563,
|
|
"grad_norm": 0.04374096915125847,
|
|
"learning_rate": 2.630581836115301e-06,
|
|
"loss": 0.0059,
|
|
"step": 8870
|
|
},
|
|
{
|
|
"epoch": 0.3858478985845726,
|
|
"grad_norm": 0.1101013720035553,
|
|
"learning_rate": 2.6115842749115604e-06,
|
|
"loss": 0.0082,
|
|
"step": 8880
|
|
},
|
|
{
|
|
"epoch": 0.3862824119838795,
|
|
"grad_norm": 0.08844293653964996,
|
|
"learning_rate": 2.5926452526090305e-06,
|
|
"loss": 0.007,
|
|
"step": 8890
|
|
},
|
|
{
|
|
"epoch": 0.3867169253831865,
|
|
"grad_norm": 0.05187362805008888,
|
|
"learning_rate": 2.573764919262819e-06,
|
|
"loss": 0.0056,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 0.3871514387824935,
|
|
"grad_norm": 0.06577293574810028,
|
|
"learning_rate": 2.5549434244630478e-06,
|
|
"loss": 0.0064,
|
|
"step": 8910
|
|
},
|
|
{
|
|
"epoch": 0.3875859521818004,
|
|
"grad_norm": 0.08939126133918762,
|
|
"learning_rate": 2.536180917333648e-06,
|
|
"loss": 0.0069,
|
|
"step": 8920
|
|
},
|
|
{
|
|
"epoch": 0.38802046558110737,
|
|
"grad_norm": 0.05213819071650505,
|
|
"learning_rate": 2.5174775465311897e-06,
|
|
"loss": 0.0074,
|
|
"step": 8930
|
|
},
|
|
{
|
|
"epoch": 0.3884549789804143,
|
|
"grad_norm": 0.06323190778493881,
|
|
"learning_rate": 2.4988334602437057e-06,
|
|
"loss": 0.007,
|
|
"step": 8940
|
|
},
|
|
{
|
|
"epoch": 0.38888949237972126,
|
|
"grad_norm": 0.05582160875201225,
|
|
"learning_rate": 2.4802488061895137e-06,
|
|
"loss": 0.0063,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 0.3893240057790282,
|
|
"grad_norm": 0.07055678218603134,
|
|
"learning_rate": 2.4617237316160427e-06,
|
|
"loss": 0.0067,
|
|
"step": 8960
|
|
},
|
|
{
|
|
"epoch": 0.38975851917833515,
|
|
"grad_norm": 0.04307841509580612,
|
|
"learning_rate": 2.4432583832986633e-06,
|
|
"loss": 0.0059,
|
|
"step": 8970
|
|
},
|
|
{
|
|
"epoch": 0.3901930325776421,
|
|
"grad_norm": 0.08723011612892151,
|
|
"learning_rate": 2.42485290753955e-06,
|
|
"loss": 0.007,
|
|
"step": 8980
|
|
},
|
|
{
|
|
"epoch": 0.39062754597694904,
|
|
"grad_norm": 0.08264581859111786,
|
|
"learning_rate": 2.4065074501664863e-06,
|
|
"loss": 0.007,
|
|
"step": 8990
|
|
},
|
|
{
|
|
"epoch": 0.391062059376256,
|
|
"grad_norm": 0.10687217116355896,
|
|
"learning_rate": 2.3882221565317277e-06,
|
|
"loss": 0.0051,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 0.391496572775563,
|
|
"grad_norm": 0.05457941070199013,
|
|
"learning_rate": 2.3699971715108593e-06,
|
|
"loss": 0.0053,
|
|
"step": 9010
|
|
},
|
|
{
|
|
"epoch": 0.39193108617486994,
|
|
"grad_norm": 0.04098676145076752,
|
|
"learning_rate": 2.3518326395016222e-06,
|
|
"loss": 0.0055,
|
|
"step": 9020
|
|
},
|
|
{
|
|
"epoch": 0.3923655995741769,
|
|
"grad_norm": 0.047154199331998825,
|
|
"learning_rate": 2.3337287044227996e-06,
|
|
"loss": 0.0068,
|
|
"step": 9030
|
|
},
|
|
{
|
|
"epoch": 0.39280011297348383,
|
|
"grad_norm": 0.05270608887076378,
|
|
"learning_rate": 2.315685509713046e-06,
|
|
"loss": 0.0062,
|
|
"step": 9040
|
|
},
|
|
{
|
|
"epoch": 0.3932346263727908,
|
|
"grad_norm": 0.06262046098709106,
|
|
"learning_rate": 2.2977031983297817e-06,
|
|
"loss": 0.0069,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 0.3936691397720977,
|
|
"grad_norm": 0.07482646405696869,
|
|
"learning_rate": 2.279781912748028e-06,
|
|
"loss": 0.0067,
|
|
"step": 9060
|
|
},
|
|
{
|
|
"epoch": 0.39410365317140467,
|
|
"grad_norm": 0.06200959533452988,
|
|
"learning_rate": 2.2619217949593076e-06,
|
|
"loss": 0.0074,
|
|
"step": 9070
|
|
},
|
|
{
|
|
"epoch": 0.3945381665707116,
|
|
"grad_norm": 0.09040582180023193,
|
|
"learning_rate": 2.2441229864705048e-06,
|
|
"loss": 0.0074,
|
|
"step": 9080
|
|
},
|
|
{
|
|
"epoch": 0.39497267997001856,
|
|
"grad_norm": 0.07866061478853226,
|
|
"learning_rate": 2.226385628302742e-06,
|
|
"loss": 0.0073,
|
|
"step": 9090
|
|
},
|
|
{
|
|
"epoch": 0.3954071933693255,
|
|
"grad_norm": 0.07364839315414429,
|
|
"learning_rate": 2.2087098609902636e-06,
|
|
"loss": 0.0083,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 0.39584170676863245,
|
|
"grad_norm": 0.07077943533658981,
|
|
"learning_rate": 2.1910958245793347e-06,
|
|
"loss": 0.0079,
|
|
"step": 9110
|
|
},
|
|
{
|
|
"epoch": 0.39627622016793945,
|
|
"grad_norm": 0.09263047575950623,
|
|
"learning_rate": 2.173543658627121e-06,
|
|
"loss": 0.0063,
|
|
"step": 9120
|
|
},
|
|
{
|
|
"epoch": 0.3967107335672464,
|
|
"grad_norm": 0.05478379502892494,
|
|
"learning_rate": 2.1560535022005766e-06,
|
|
"loss": 0.0075,
|
|
"step": 9130
|
|
},
|
|
{
|
|
"epoch": 0.39714524696655334,
|
|
"grad_norm": 0.037011146545410156,
|
|
"learning_rate": 2.138625493875359e-06,
|
|
"loss": 0.0064,
|
|
"step": 9140
|
|
},
|
|
{
|
|
"epoch": 0.3975797603658603,
|
|
"grad_norm": 0.07936471700668335,
|
|
"learning_rate": 2.1212597717347183e-06,
|
|
"loss": 0.0081,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 0.39801427376516724,
|
|
"grad_norm": 0.1095275804400444,
|
|
"learning_rate": 2.1039564733684014e-06,
|
|
"loss": 0.0073,
|
|
"step": 9160
|
|
},
|
|
{
|
|
"epoch": 0.3984487871644742,
|
|
"grad_norm": 0.05063824728131294,
|
|
"learning_rate": 2.0867157358715794e-06,
|
|
"loss": 0.0072,
|
|
"step": 9170
|
|
},
|
|
{
|
|
"epoch": 0.39888330056378113,
|
|
"grad_norm": 0.05019955709576607,
|
|
"learning_rate": 2.0695376958437442e-06,
|
|
"loss": 0.0055,
|
|
"step": 9180
|
|
},
|
|
{
|
|
"epoch": 0.3993178139630881,
|
|
"grad_norm": 0.06541518121957779,
|
|
"learning_rate": 2.0524224893876253e-06,
|
|
"loss": 0.0071,
|
|
"step": 9190
|
|
},
|
|
{
|
|
"epoch": 0.399752327362395,
|
|
"grad_norm": 0.07469270378351212,
|
|
"learning_rate": 2.0353702521081277e-06,
|
|
"loss": 0.0052,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 0.40018684076170197,
|
|
"grad_norm": 0.08644366264343262,
|
|
"learning_rate": 2.0183811191112436e-06,
|
|
"loss": 0.0062,
|
|
"step": 9210
|
|
},
|
|
{
|
|
"epoch": 0.4006213541610089,
|
|
"grad_norm": 0.06640897691249847,
|
|
"learning_rate": 2.001455225002984e-06,
|
|
"loss": 0.0054,
|
|
"step": 9220
|
|
},
|
|
{
|
|
"epoch": 0.4010558675603159,
|
|
"grad_norm": 0.07081051170825958,
|
|
"learning_rate": 1.984592703888313e-06,
|
|
"loss": 0.0057,
|
|
"step": 9230
|
|
},
|
|
{
|
|
"epoch": 0.40149038095962286,
|
|
"grad_norm": 0.053732264786958694,
|
|
"learning_rate": 1.967793689370093e-06,
|
|
"loss": 0.0052,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 0.4019248943589298,
|
|
"grad_norm": 0.060937847942113876,
|
|
"learning_rate": 1.95105831454801e-06,
|
|
"loss": 0.0065,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 0.40235940775823675,
|
|
"grad_norm": 0.06776256114244461,
|
|
"learning_rate": 1.9343867120175375e-06,
|
|
"loss": 0.0072,
|
|
"step": 9260
|
|
},
|
|
{
|
|
"epoch": 0.4027939211575437,
|
|
"grad_norm": 0.10395505279302597,
|
|
"learning_rate": 1.9177790138688746e-06,
|
|
"loss": 0.0068,
|
|
"step": 9270
|
|
},
|
|
{
|
|
"epoch": 0.40322843455685065,
|
|
"grad_norm": 0.058665502816438675,
|
|
"learning_rate": 1.9012353516858984e-06,
|
|
"loss": 0.0055,
|
|
"step": 9280
|
|
},
|
|
{
|
|
"epoch": 0.4036629479561576,
|
|
"grad_norm": 0.08694977313280106,
|
|
"learning_rate": 1.884755856545123e-06,
|
|
"loss": 0.0055,
|
|
"step": 9290
|
|
},
|
|
{
|
|
"epoch": 0.40409746135546454,
|
|
"grad_norm": 0.05163672938942909,
|
|
"learning_rate": 1.8683406590146714e-06,
|
|
"loss": 0.0051,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 0.4045319747547715,
|
|
"grad_norm": 0.10258106887340546,
|
|
"learning_rate": 1.8519898891532273e-06,
|
|
"loss": 0.0054,
|
|
"step": 9310
|
|
},
|
|
{
|
|
"epoch": 0.40496648815407843,
|
|
"grad_norm": 0.0607648529112339,
|
|
"learning_rate": 1.8357036765090107e-06,
|
|
"loss": 0.0054,
|
|
"step": 9320
|
|
},
|
|
{
|
|
"epoch": 0.4054010015533854,
|
|
"grad_norm": 0.08496219664812088,
|
|
"learning_rate": 1.8194821501187455e-06,
|
|
"loss": 0.0068,
|
|
"step": 9330
|
|
},
|
|
{
|
|
"epoch": 0.4058355149526924,
|
|
"grad_norm": 0.0701276883482933,
|
|
"learning_rate": 1.8033254385066501e-06,
|
|
"loss": 0.0047,
|
|
"step": 9340
|
|
},
|
|
{
|
|
"epoch": 0.4062700283519993,
|
|
"grad_norm": 0.043031346052885056,
|
|
"learning_rate": 1.7872336696834091e-06,
|
|
"loss": 0.0069,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 0.40670454175130627,
|
|
"grad_norm": 0.09492848813533783,
|
|
"learning_rate": 1.7712069711451553e-06,
|
|
"loss": 0.0067,
|
|
"step": 9360
|
|
},
|
|
{
|
|
"epoch": 0.4071390551506132,
|
|
"grad_norm": 0.23138777911663055,
|
|
"learning_rate": 1.7552454698724753e-06,
|
|
"loss": 0.0063,
|
|
"step": 9370
|
|
},
|
|
{
|
|
"epoch": 0.40757356854992016,
|
|
"grad_norm": 0.07827364653348923,
|
|
"learning_rate": 1.7393492923293854e-06,
|
|
"loss": 0.0052,
|
|
"step": 9380
|
|
},
|
|
{
|
|
"epoch": 0.4080080819492271,
|
|
"grad_norm": 0.07025067508220673,
|
|
"learning_rate": 1.7235185644623352e-06,
|
|
"loss": 0.0072,
|
|
"step": 9390
|
|
},
|
|
{
|
|
"epoch": 0.40844259534853405,
|
|
"grad_norm": 0.0790853276848793,
|
|
"learning_rate": 1.7077534116992266e-06,
|
|
"loss": 0.0066,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 0.408877108747841,
|
|
"grad_norm": 0.04930580407381058,
|
|
"learning_rate": 1.692053958948393e-06,
|
|
"loss": 0.0082,
|
|
"step": 9410
|
|
},
|
|
{
|
|
"epoch": 0.40931162214714795,
|
|
"grad_norm": 0.049826521426439285,
|
|
"learning_rate": 1.6764203305976224e-06,
|
|
"loss": 0.0068,
|
|
"step": 9420
|
|
},
|
|
{
|
|
"epoch": 0.4097461355464549,
|
|
"grad_norm": 0.038873229175806046,
|
|
"learning_rate": 1.6608526505131773e-06,
|
|
"loss": 0.007,
|
|
"step": 9430
|
|
},
|
|
{
|
|
"epoch": 0.41018064894576184,
|
|
"grad_norm": 0.07032894343137741,
|
|
"learning_rate": 1.6453510420388085e-06,
|
|
"loss": 0.0049,
|
|
"step": 9440
|
|
},
|
|
{
|
|
"epoch": 0.41061516234506884,
|
|
"grad_norm": 0.057884182780981064,
|
|
"learning_rate": 1.6299156279947725e-06,
|
|
"loss": 0.005,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 0.4110496757443758,
|
|
"grad_norm": 0.06757012754678726,
|
|
"learning_rate": 1.6145465306768604e-06,
|
|
"loss": 0.0067,
|
|
"step": 9460
|
|
},
|
|
{
|
|
"epoch": 0.41148418914368273,
|
|
"grad_norm": 0.07038474082946777,
|
|
"learning_rate": 1.5992438718554415e-06,
|
|
"loss": 0.0065,
|
|
"step": 9470
|
|
},
|
|
{
|
|
"epoch": 0.4119187025429897,
|
|
"grad_norm": 0.06230664253234863,
|
|
"learning_rate": 1.5840077727744785e-06,
|
|
"loss": 0.0059,
|
|
"step": 9480
|
|
},
|
|
{
|
|
"epoch": 0.4123532159422966,
|
|
"grad_norm": 0.06649099290370941,
|
|
"learning_rate": 1.5688383541505835e-06,
|
|
"loss": 0.0061,
|
|
"step": 9490
|
|
},
|
|
{
|
|
"epoch": 0.41278772934160357,
|
|
"grad_norm": 0.10095405578613281,
|
|
"learning_rate": 1.5537357361720551e-06,
|
|
"loss": 0.0065,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 0.4132222427409105,
|
|
"grad_norm": 0.056790791451931,
|
|
"learning_rate": 1.5387000384979223e-06,
|
|
"loss": 0.0052,
|
|
"step": 9510
|
|
},
|
|
{
|
|
"epoch": 0.41365675614021746,
|
|
"grad_norm": 0.04234682396054268,
|
|
"learning_rate": 1.5237313802569974e-06,
|
|
"loss": 0.0069,
|
|
"step": 9520
|
|
},
|
|
{
|
|
"epoch": 0.4140912695395244,
|
|
"grad_norm": 0.057066041976213455,
|
|
"learning_rate": 1.5088298800469413e-06,
|
|
"loss": 0.0074,
|
|
"step": 9530
|
|
},
|
|
{
|
|
"epoch": 0.41452578293883136,
|
|
"grad_norm": 0.11709760129451752,
|
|
"learning_rate": 1.4939956559333202e-06,
|
|
"loss": 0.0068,
|
|
"step": 9540
|
|
},
|
|
{
|
|
"epoch": 0.4149602963381383,
|
|
"grad_norm": 0.11893763393163681,
|
|
"learning_rate": 1.479228825448654e-06,
|
|
"loss": 0.0073,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 0.4153948097374453,
|
|
"grad_norm": 0.05574529990553856,
|
|
"learning_rate": 1.4645295055915154e-06,
|
|
"loss": 0.0056,
|
|
"step": 9560
|
|
},
|
|
{
|
|
"epoch": 0.41582932313675225,
|
|
"grad_norm": 0.07223575562238693,
|
|
"learning_rate": 1.4498978128255691e-06,
|
|
"loss": 0.0061,
|
|
"step": 9570
|
|
},
|
|
{
|
|
"epoch": 0.4162638365360592,
|
|
"grad_norm": 0.04353675991296768,
|
|
"learning_rate": 1.4353338630786817e-06,
|
|
"loss": 0.0057,
|
|
"step": 9580
|
|
},
|
|
{
|
|
"epoch": 0.41669834993536614,
|
|
"grad_norm": 0.08113046735525131,
|
|
"learning_rate": 1.420837771741973e-06,
|
|
"loss": 0.007,
|
|
"step": 9590
|
|
},
|
|
{
|
|
"epoch": 0.4171328633346731,
|
|
"grad_norm": 0.06625610589981079,
|
|
"learning_rate": 1.4064096536689298e-06,
|
|
"loss": 0.0074,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 0.41756737673398003,
|
|
"grad_norm": 0.06211593374609947,
|
|
"learning_rate": 1.3920496231744717e-06,
|
|
"loss": 0.0056,
|
|
"step": 9610
|
|
},
|
|
{
|
|
"epoch": 0.418001890133287,
|
|
"grad_norm": 0.150951087474823,
|
|
"learning_rate": 1.3777577940340558e-06,
|
|
"loss": 0.0075,
|
|
"step": 9620
|
|
},
|
|
{
|
|
"epoch": 0.4184364035325939,
|
|
"grad_norm": 0.1257888674736023,
|
|
"learning_rate": 1.3635342794827888e-06,
|
|
"loss": 0.0071,
|
|
"step": 9630
|
|
},
|
|
{
|
|
"epoch": 0.41887091693190087,
|
|
"grad_norm": 0.0830865353345871,
|
|
"learning_rate": 1.3493791922145027e-06,
|
|
"loss": 0.0061,
|
|
"step": 9640
|
|
},
|
|
{
|
|
"epoch": 0.4193054303312078,
|
|
"grad_norm": 0.07638338953256607,
|
|
"learning_rate": 1.3352926443808778e-06,
|
|
"loss": 0.0067,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 0.41973994373051476,
|
|
"grad_norm": 0.09588000923395157,
|
|
"learning_rate": 1.3212747475905564e-06,
|
|
"loss": 0.0086,
|
|
"step": 9660
|
|
},
|
|
{
|
|
"epoch": 0.42017445712982177,
|
|
"grad_norm": 0.0916999876499176,
|
|
"learning_rate": 1.3073256129082534e-06,
|
|
"loss": 0.0057,
|
|
"step": 9670
|
|
},
|
|
{
|
|
"epoch": 0.4206089705291287,
|
|
"grad_norm": 0.0716477781534195,
|
|
"learning_rate": 1.2934453508538746e-06,
|
|
"loss": 0.0042,
|
|
"step": 9680
|
|
},
|
|
{
|
|
"epoch": 0.42104348392843566,
|
|
"grad_norm": 0.060920000076293945,
|
|
"learning_rate": 1.2796340714016419e-06,
|
|
"loss": 0.0046,
|
|
"step": 9690
|
|
},
|
|
{
|
|
"epoch": 0.4214779973277426,
|
|
"grad_norm": 0.116277314722538,
|
|
"learning_rate": 1.26589188397923e-06,
|
|
"loss": 0.0062,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 0.42191251072704955,
|
|
"grad_norm": 0.09452416747808456,
|
|
"learning_rate": 1.2522188974668847e-06,
|
|
"loss": 0.006,
|
|
"step": 9710
|
|
},
|
|
{
|
|
"epoch": 0.4223470241263565,
|
|
"grad_norm": 0.0834532305598259,
|
|
"learning_rate": 1.2386152201965763e-06,
|
|
"loss": 0.0066,
|
|
"step": 9720
|
|
},
|
|
{
|
|
"epoch": 0.42278153752566344,
|
|
"grad_norm": 0.09147264808416367,
|
|
"learning_rate": 1.2250809599511293e-06,
|
|
"loss": 0.0064,
|
|
"step": 9730
|
|
},
|
|
{
|
|
"epoch": 0.4232160509249704,
|
|
"grad_norm": 0.058695271611213684,
|
|
"learning_rate": 1.2116162239633734e-06,
|
|
"loss": 0.0058,
|
|
"step": 9740
|
|
},
|
|
{
|
|
"epoch": 0.42365056432427733,
|
|
"grad_norm": 0.10094515234231949,
|
|
"learning_rate": 1.198221118915287e-06,
|
|
"loss": 0.0059,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 0.4240850777235843,
|
|
"grad_norm": 0.11626178026199341,
|
|
"learning_rate": 1.1848957509371739e-06,
|
|
"loss": 0.0078,
|
|
"step": 9760
|
|
},
|
|
{
|
|
"epoch": 0.4245195911228912,
|
|
"grad_norm": 0.09096948802471161,
|
|
"learning_rate": 1.1716402256067905e-06,
|
|
"loss": 0.0071,
|
|
"step": 9770
|
|
},
|
|
{
|
|
"epoch": 0.42495410452219823,
|
|
"grad_norm": 0.10112593322992325,
|
|
"learning_rate": 1.1584546479485316e-06,
|
|
"loss": 0.0068,
|
|
"step": 9780
|
|
},
|
|
{
|
|
"epoch": 0.4253886179215052,
|
|
"grad_norm": 0.08292309194803238,
|
|
"learning_rate": 1.1453391224325928e-06,
|
|
"loss": 0.0059,
|
|
"step": 9790
|
|
},
|
|
{
|
|
"epoch": 0.4258231313208121,
|
|
"grad_norm": 0.12202641367912292,
|
|
"learning_rate": 1.1322937529741384e-06,
|
|
"loss": 0.0062,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 0.42625764472011907,
|
|
"grad_norm": 0.12251067161560059,
|
|
"learning_rate": 1.1193186429324887e-06,
|
|
"loss": 0.008,
|
|
"step": 9810
|
|
},
|
|
{
|
|
"epoch": 0.426692158119426,
|
|
"grad_norm": 0.14434179663658142,
|
|
"learning_rate": 1.1064138951102843e-06,
|
|
"loss": 0.0093,
|
|
"step": 9820
|
|
},
|
|
{
|
|
"epoch": 0.42712667151873296,
|
|
"grad_norm": 0.08781075477600098,
|
|
"learning_rate": 1.093579611752692e-06,
|
|
"loss": 0.0067,
|
|
"step": 9830
|
|
},
|
|
{
|
|
"epoch": 0.4275611849180399,
|
|
"grad_norm": 0.08094519376754761,
|
|
"learning_rate": 1.080815894546574e-06,
|
|
"loss": 0.0048,
|
|
"step": 9840
|
|
},
|
|
{
|
|
"epoch": 0.42799569831734685,
|
|
"grad_norm": 0.0896298959851265,
|
|
"learning_rate": 1.0681228446196978e-06,
|
|
"loss": 0.0054,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 0.4284302117166538,
|
|
"grad_norm": 0.07688537985086441,
|
|
"learning_rate": 1.0555005625399316e-06,
|
|
"loss": 0.0067,
|
|
"step": 9860
|
|
},
|
|
{
|
|
"epoch": 0.42886472511596074,
|
|
"grad_norm": 0.05957145243883133,
|
|
"learning_rate": 1.0429491483144394e-06,
|
|
"loss": 0.0063,
|
|
"step": 9870
|
|
},
|
|
{
|
|
"epoch": 0.4292992385152677,
|
|
"grad_norm": 0.062143776565790176,
|
|
"learning_rate": 1.030468701388896e-06,
|
|
"loss": 0.0055,
|
|
"step": 9880
|
|
},
|
|
{
|
|
"epoch": 0.4297337519145747,
|
|
"grad_norm": 0.06420819461345673,
|
|
"learning_rate": 1.0180593206467015e-06,
|
|
"loss": 0.0071,
|
|
"step": 9890
|
|
},
|
|
{
|
|
"epoch": 0.43016826531388164,
|
|
"grad_norm": 0.0829436257481575,
|
|
"learning_rate": 1.0057211044081916e-06,
|
|
"loss": 0.0053,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 0.4306027787131886,
|
|
"grad_norm": 0.038479071110486984,
|
|
"learning_rate": 9.934541504298589e-07,
|
|
"loss": 0.008,
|
|
"step": 9910
|
|
},
|
|
{
|
|
"epoch": 0.43103729211249553,
|
|
"grad_norm": 0.057884518057107925,
|
|
"learning_rate": 9.812585559035848e-07,
|
|
"loss": 0.007,
|
|
"step": 9920
|
|
},
|
|
{
|
|
"epoch": 0.4314718055118025,
|
|
"grad_norm": 0.0618940070271492,
|
|
"learning_rate": 9.691344174558615e-07,
|
|
"loss": 0.0072,
|
|
"step": 9930
|
|
},
|
|
{
|
|
"epoch": 0.4319063189111094,
|
|
"grad_norm": 0.06519508361816406,
|
|
"learning_rate": 9.570818311470298e-07,
|
|
"loss": 0.0075,
|
|
"step": 9940
|
|
},
|
|
{
|
|
"epoch": 0.43234083231041637,
|
|
"grad_norm": 0.08697501569986343,
|
|
"learning_rate": 9.451008924705196e-07,
|
|
"loss": 0.0067,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 0.4327753457097233,
|
|
"grad_norm": 0.07981958240270615,
|
|
"learning_rate": 9.331916963520959e-07,
|
|
"loss": 0.0064,
|
|
"step": 9960
|
|
},
|
|
{
|
|
"epoch": 0.43320985910903026,
|
|
"grad_norm": 0.06097478047013283,
|
|
"learning_rate": 9.213543371490963e-07,
|
|
"loss": 0.0062,
|
|
"step": 9970
|
|
},
|
|
{
|
|
"epoch": 0.4336443725083372,
|
|
"grad_norm": 0.055920518934726715,
|
|
"learning_rate": 9.095889086496867e-07,
|
|
"loss": 0.0064,
|
|
"step": 9980
|
|
},
|
|
{
|
|
"epoch": 0.43407888590764415,
|
|
"grad_norm": 0.09264001250267029,
|
|
"learning_rate": 8.978955040721371e-07,
|
|
"loss": 0.0068,
|
|
"step": 9990
|
|
},
|
|
{
|
|
"epoch": 0.43451339930695115,
|
|
"grad_norm": 0.07896417379379272,
|
|
"learning_rate": 8.862742160640525e-07,
|
|
"loss": 0.0064,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 0.4349479127062581,
|
|
"grad_norm": 0.053227316588163376,
|
|
"learning_rate": 8.747251367016552e-07,
|
|
"loss": 0.0073,
|
|
"step": 10010
|
|
},
|
|
{
|
|
"epoch": 0.43538242610556505,
|
|
"grad_norm": 0.04080301523208618,
|
|
"learning_rate": 8.632483574890615e-07,
|
|
"loss": 0.006,
|
|
"step": 10020
|
|
},
|
|
{
|
|
"epoch": 0.435816939504872,
|
|
"grad_norm": 0.030272576957941055,
|
|
"learning_rate": 8.518439693575408e-07,
|
|
"loss": 0.0064,
|
|
"step": 10030
|
|
},
|
|
{
|
|
"epoch": 0.43625145290417894,
|
|
"grad_norm": 0.06880766153335571,
|
|
"learning_rate": 8.405120626648067e-07,
|
|
"loss": 0.0074,
|
|
"step": 10040
|
|
},
|
|
{
|
|
"epoch": 0.4366859663034859,
|
|
"grad_norm": 0.1094704121351242,
|
|
"learning_rate": 8.292527271942996e-07,
|
|
"loss": 0.0057,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 0.43712047970279283,
|
|
"grad_norm": 0.08783411979675293,
|
|
"learning_rate": 8.180660521544692e-07,
|
|
"loss": 0.0094,
|
|
"step": 10060
|
|
},
|
|
{
|
|
"epoch": 0.4375549931020998,
|
|
"grad_norm": 0.08887655287981033,
|
|
"learning_rate": 8.069521261780733e-07,
|
|
"loss": 0.0063,
|
|
"step": 10070
|
|
},
|
|
{
|
|
"epoch": 0.4379895065014067,
|
|
"grad_norm": 0.07971230894327164,
|
|
"learning_rate": 7.959110373214751e-07,
|
|
"loss": 0.0068,
|
|
"step": 10080
|
|
},
|
|
{
|
|
"epoch": 0.43842401990071367,
|
|
"grad_norm": 0.051013197749853134,
|
|
"learning_rate": 7.849428730639463e-07,
|
|
"loss": 0.0051,
|
|
"step": 10090
|
|
},
|
|
{
|
|
"epoch": 0.4388585333000206,
|
|
"grad_norm": 0.08994479477405548,
|
|
"learning_rate": 7.740477203069674e-07,
|
|
"loss": 0.0058,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 0.4392930466993276,
|
|
"grad_norm": 0.03989997133612633,
|
|
"learning_rate": 7.63225665373546e-07,
|
|
"loss": 0.0066,
|
|
"step": 10110
|
|
},
|
|
{
|
|
"epoch": 0.43972756009863456,
|
|
"grad_norm": 0.058878444135189056,
|
|
"learning_rate": 7.524767940075329e-07,
|
|
"loss": 0.007,
|
|
"step": 10120
|
|
},
|
|
{
|
|
"epoch": 0.4401620734979415,
|
|
"grad_norm": 0.052942000329494476,
|
|
"learning_rate": 7.418011913729406e-07,
|
|
"loss": 0.0061,
|
|
"step": 10130
|
|
},
|
|
{
|
|
"epoch": 0.44059658689724845,
|
|
"grad_norm": 0.09582941979169846,
|
|
"learning_rate": 7.311989420532639e-07,
|
|
"loss": 0.0066,
|
|
"step": 10140
|
|
},
|
|
{
|
|
"epoch": 0.4410311002965554,
|
|
"grad_norm": 0.07386282831430435,
|
|
"learning_rate": 7.206701300508212e-07,
|
|
"loss": 0.0059,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 0.44146561369586235,
|
|
"grad_norm": 0.04970019310712814,
|
|
"learning_rate": 7.102148387860764e-07,
|
|
"loss": 0.0058,
|
|
"step": 10160
|
|
},
|
|
{
|
|
"epoch": 0.4419001270951693,
|
|
"grad_norm": 0.06993437558412552,
|
|
"learning_rate": 6.998331510969869e-07,
|
|
"loss": 0.0067,
|
|
"step": 10170
|
|
},
|
|
{
|
|
"epoch": 0.44233464049447624,
|
|
"grad_norm": 0.06462310254573822,
|
|
"learning_rate": 6.895251492383426e-07,
|
|
"loss": 0.0062,
|
|
"step": 10180
|
|
},
|
|
{
|
|
"epoch": 0.4427691538937832,
|
|
"grad_norm": 0.06221221759915352,
|
|
"learning_rate": 6.7929091488112e-07,
|
|
"loss": 0.0053,
|
|
"step": 10190
|
|
},
|
|
{
|
|
"epoch": 0.44320366729309013,
|
|
"grad_norm": 0.055420126765966415,
|
|
"learning_rate": 6.691305291118234e-07,
|
|
"loss": 0.0049,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 0.4436381806923971,
|
|
"grad_norm": 0.08335283398628235,
|
|
"learning_rate": 6.59044072431857e-07,
|
|
"loss": 0.0055,
|
|
"step": 10210
|
|
},
|
|
{
|
|
"epoch": 0.4440726940917041,
|
|
"grad_norm": 0.05226948857307434,
|
|
"learning_rate": 6.490316247568762e-07,
|
|
"loss": 0.0055,
|
|
"step": 10220
|
|
},
|
|
{
|
|
"epoch": 0.444507207491011,
|
|
"grad_norm": 0.0770556852221489,
|
|
"learning_rate": 6.390932654161596e-07,
|
|
"loss": 0.0071,
|
|
"step": 10230
|
|
},
|
|
{
|
|
"epoch": 0.44494172089031797,
|
|
"grad_norm": 0.04744845628738403,
|
|
"learning_rate": 6.292290731519757e-07,
|
|
"loss": 0.005,
|
|
"step": 10240
|
|
},
|
|
{
|
|
"epoch": 0.4453762342896249,
|
|
"grad_norm": 0.07813084870576859,
|
|
"learning_rate": 6.194391261189703e-07,
|
|
"loss": 0.0051,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 0.44581074768893186,
|
|
"grad_norm": 0.11257217824459076,
|
|
"learning_rate": 6.097235018835279e-07,
|
|
"loss": 0.0061,
|
|
"step": 10260
|
|
},
|
|
{
|
|
"epoch": 0.4462452610882388,
|
|
"grad_norm": 0.10771900415420532,
|
|
"learning_rate": 6.000822774231796e-07,
|
|
"loss": 0.0082,
|
|
"step": 10270
|
|
},
|
|
{
|
|
"epoch": 0.44667977448754576,
|
|
"grad_norm": 0.06365276128053665,
|
|
"learning_rate": 5.905155291259768e-07,
|
|
"loss": 0.0053,
|
|
"step": 10280
|
|
},
|
|
{
|
|
"epoch": 0.4471142878868527,
|
|
"grad_norm": 0.04938112571835518,
|
|
"learning_rate": 5.810233327898929e-07,
|
|
"loss": 0.0058,
|
|
"step": 10290
|
|
},
|
|
{
|
|
"epoch": 0.44754880128615965,
|
|
"grad_norm": 0.10017894953489304,
|
|
"learning_rate": 5.716057636222172e-07,
|
|
"loss": 0.006,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 0.4479833146854666,
|
|
"grad_norm": 0.06827139854431152,
|
|
"learning_rate": 5.622628962389687e-07,
|
|
"loss": 0.0053,
|
|
"step": 10310
|
|
},
|
|
{
|
|
"epoch": 0.44841782808477354,
|
|
"grad_norm": 0.09991230070590973,
|
|
"learning_rate": 5.529948046642985e-07,
|
|
"loss": 0.0072,
|
|
"step": 10320
|
|
},
|
|
{
|
|
"epoch": 0.44885234148408054,
|
|
"grad_norm": 0.07327882200479507,
|
|
"learning_rate": 5.43801562329902e-07,
|
|
"loss": 0.0057,
|
|
"step": 10330
|
|
},
|
|
{
|
|
"epoch": 0.4492868548833875,
|
|
"grad_norm": 0.0757019892334938,
|
|
"learning_rate": 5.346832420744363e-07,
|
|
"loss": 0.0069,
|
|
"step": 10340
|
|
},
|
|
{
|
|
"epoch": 0.44972136828269443,
|
|
"grad_norm": 0.05079514533281326,
|
|
"learning_rate": 5.256399161429515e-07,
|
|
"loss": 0.0068,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 0.4501558816820014,
|
|
"grad_norm": 0.06105861812829971,
|
|
"learning_rate": 5.166716561863128e-07,
|
|
"loss": 0.0067,
|
|
"step": 10360
|
|
},
|
|
{
|
|
"epoch": 0.4505903950813083,
|
|
"grad_norm": 0.08541977405548096,
|
|
"learning_rate": 5.077785332606266e-07,
|
|
"loss": 0.0044,
|
|
"step": 10370
|
|
},
|
|
{
|
|
"epoch": 0.45102490848061527,
|
|
"grad_norm": 0.1724163442850113,
|
|
"learning_rate": 4.989606178266914e-07,
|
|
"loss": 0.0063,
|
|
"step": 10380
|
|
},
|
|
{
|
|
"epoch": 0.4514594218799222,
|
|
"grad_norm": 0.07179833203554153,
|
|
"learning_rate": 4.902179797494255e-07,
|
|
"loss": 0.0053,
|
|
"step": 10390
|
|
},
|
|
{
|
|
"epoch": 0.45189393527922916,
|
|
"grad_norm": 0.11870724707841873,
|
|
"learning_rate": 4.815506882973242e-07,
|
|
"loss": 0.006,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 0.4523284486785361,
|
|
"grad_norm": 0.04006374999880791,
|
|
"learning_rate": 4.7295881214190486e-07,
|
|
"loss": 0.0091,
|
|
"step": 10410
|
|
},
|
|
{
|
|
"epoch": 0.45276296207784306,
|
|
"grad_norm": 0.0644485279917717,
|
|
"learning_rate": 4.644424193571628e-07,
|
|
"loss": 0.0058,
|
|
"step": 10420
|
|
},
|
|
{
|
|
"epoch": 0.45319747547715,
|
|
"grad_norm": 0.05453095585107803,
|
|
"learning_rate": 4.5600157741903626e-07,
|
|
"loss": 0.0056,
|
|
"step": 10430
|
|
},
|
|
{
|
|
"epoch": 0.453631988876457,
|
|
"grad_norm": 0.09095140546560287,
|
|
"learning_rate": 4.4763635320486663e-07,
|
|
"loss": 0.0082,
|
|
"step": 10440
|
|
},
|
|
{
|
|
"epoch": 0.45406650227576395,
|
|
"grad_norm": 0.08895137161016464,
|
|
"learning_rate": 4.3934681299287683e-07,
|
|
"loss": 0.0061,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 0.4545010156750709,
|
|
"grad_norm": 0.08378702402114868,
|
|
"learning_rate": 4.311330224616328e-07,
|
|
"loss": 0.0061,
|
|
"step": 10460
|
|
},
|
|
{
|
|
"epoch": 0.45493552907437784,
|
|
"grad_norm": 0.06177133694291115,
|
|
"learning_rate": 4.2299504668953383e-07,
|
|
"loss": 0.0055,
|
|
"step": 10470
|
|
},
|
|
{
|
|
"epoch": 0.4553700424736848,
|
|
"grad_norm": 0.053938720375299454,
|
|
"learning_rate": 4.1493295015429645e-07,
|
|
"loss": 0.0063,
|
|
"step": 10480
|
|
},
|
|
{
|
|
"epoch": 0.45580455587299173,
|
|
"grad_norm": 0.05690125748515129,
|
|
"learning_rate": 4.0694679673243807e-07,
|
|
"loss": 0.0052,
|
|
"step": 10490
|
|
},
|
|
{
|
|
"epoch": 0.4562390692722987,
|
|
"grad_norm": 0.055636215955019,
|
|
"learning_rate": 3.990366496987741e-07,
|
|
"loss": 0.006,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 0.4566735826716056,
|
|
"grad_norm": 0.057698529213666916,
|
|
"learning_rate": 3.912025717259194e-07,
|
|
"loss": 0.006,
|
|
"step": 10510
|
|
},
|
|
{
|
|
"epoch": 0.4571080960709126,
|
|
"grad_norm": 0.06959450244903564,
|
|
"learning_rate": 3.834446248837853e-07,
|
|
"loss": 0.0059,
|
|
"step": 10520
|
|
},
|
|
{
|
|
"epoch": 0.4575426094702195,
|
|
"grad_norm": 0.07037419080734253,
|
|
"learning_rate": 3.7576287063909034e-07,
|
|
"loss": 0.0092,
|
|
"step": 10530
|
|
},
|
|
{
|
|
"epoch": 0.45797712286952647,
|
|
"grad_norm": 0.0696893259882927,
|
|
"learning_rate": 3.681573698548779e-07,
|
|
"loss": 0.0064,
|
|
"step": 10540
|
|
},
|
|
{
|
|
"epoch": 0.45841163626883347,
|
|
"grad_norm": 0.09161542356014252,
|
|
"learning_rate": 3.606281827900282e-07,
|
|
"loss": 0.0067,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 0.4588461496681404,
|
|
"grad_norm": 0.04898115620017052,
|
|
"learning_rate": 3.531753690987816e-07,
|
|
"loss": 0.0074,
|
|
"step": 10560
|
|
},
|
|
{
|
|
"epoch": 0.45928066306744736,
|
|
"grad_norm": 0.06962580233812332,
|
|
"learning_rate": 3.4579898783027145e-07,
|
|
"loss": 0.0075,
|
|
"step": 10570
|
|
},
|
|
{
|
|
"epoch": 0.4597151764667543,
|
|
"grad_norm": 0.07286280393600464,
|
|
"learning_rate": 3.3849909742804553e-07,
|
|
"loss": 0.0052,
|
|
"step": 10580
|
|
},
|
|
{
|
|
"epoch": 0.46014968986606125,
|
|
"grad_norm": 0.0685592070221901,
|
|
"learning_rate": 3.3127575572961755e-07,
|
|
"loss": 0.0081,
|
|
"step": 10590
|
|
},
|
|
{
|
|
"epoch": 0.4605842032653682,
|
|
"grad_norm": 0.07501457631587982,
|
|
"learning_rate": 3.2412901996599075e-07,
|
|
"loss": 0.0059,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 0.46101871666467514,
|
|
"grad_norm": 0.055459920316934586,
|
|
"learning_rate": 3.170589467612262e-07,
|
|
"loss": 0.0053,
|
|
"step": 10610
|
|
},
|
|
{
|
|
"epoch": 0.4614532300639821,
|
|
"grad_norm": 0.07159551233053207,
|
|
"learning_rate": 3.100655921319706e-07,
|
|
"loss": 0.0074,
|
|
"step": 10620
|
|
},
|
|
{
|
|
"epoch": 0.46188774346328904,
|
|
"grad_norm": 0.07556604593992233,
|
|
"learning_rate": 3.03149011487035e-07,
|
|
"loss": 0.0051,
|
|
"step": 10630
|
|
},
|
|
{
|
|
"epoch": 0.462322256862596,
|
|
"grad_norm": 0.05757951736450195,
|
|
"learning_rate": 2.96309259626939e-07,
|
|
"loss": 0.0097,
|
|
"step": 10640
|
|
},
|
|
{
|
|
"epoch": 0.4627567702619029,
|
|
"grad_norm": 0.07891504466533661,
|
|
"learning_rate": 2.895463907434837e-07,
|
|
"loss": 0.007,
|
|
"step": 10650
|
|
},
|
|
{
|
|
"epoch": 0.46319128366120993,
|
|
"grad_norm": 0.04868793115019798,
|
|
"learning_rate": 2.8286045841932064e-07,
|
|
"loss": 0.006,
|
|
"step": 10660
|
|
},
|
|
{
|
|
"epoch": 0.4636257970605169,
|
|
"grad_norm": 0.11161201447248459,
|
|
"learning_rate": 2.762515156275303e-07,
|
|
"loss": 0.0052,
|
|
"step": 10670
|
|
},
|
|
{
|
|
"epoch": 0.4640603104598238,
|
|
"grad_norm": 0.07221969217061996,
|
|
"learning_rate": 2.697196147311987e-07,
|
|
"loss": 0.0079,
|
|
"step": 10680
|
|
},
|
|
{
|
|
"epoch": 0.46449482385913077,
|
|
"grad_norm": 0.07179443538188934,
|
|
"learning_rate": 2.6326480748300467e-07,
|
|
"loss": 0.0069,
|
|
"step": 10690
|
|
},
|
|
{
|
|
"epoch": 0.4649293372584377,
|
|
"grad_norm": 0.10213626176118851,
|
|
"learning_rate": 2.5688714502480783e-07,
|
|
"loss": 0.0069,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 0.46536385065774466,
|
|
"grad_norm": 0.05147803574800491,
|
|
"learning_rate": 2.5058667788724566e-07,
|
|
"loss": 0.0065,
|
|
"step": 10710
|
|
},
|
|
{
|
|
"epoch": 0.4657983640570516,
|
|
"grad_norm": 0.03046802245080471,
|
|
"learning_rate": 2.4436345598932933e-07,
|
|
"loss": 0.0073,
|
|
"step": 10720
|
|
},
|
|
{
|
|
"epoch": 0.46623287745635855,
|
|
"grad_norm": 0.045375872403383255,
|
|
"learning_rate": 2.3821752863805502e-07,
|
|
"loss": 0.0059,
|
|
"step": 10730
|
|
},
|
|
{
|
|
"epoch": 0.4666673908556655,
|
|
"grad_norm": 0.11396394670009613,
|
|
"learning_rate": 2.3214894452800784e-07,
|
|
"loss": 0.0057,
|
|
"step": 10740
|
|
},
|
|
{
|
|
"epoch": 0.46710190425497244,
|
|
"grad_norm": 0.05735607445240021,
|
|
"learning_rate": 2.2615775174097633e-07,
|
|
"loss": 0.0065,
|
|
"step": 10750
|
|
},
|
|
{
|
|
"epoch": 0.4675364176542794,
|
|
"grad_norm": 0.05877210199832916,
|
|
"learning_rate": 2.2024399774556948e-07,
|
|
"loss": 0.0037,
|
|
"step": 10760
|
|
},
|
|
{
|
|
"epoch": 0.4679709310535864,
|
|
"grad_norm": 0.09503768384456635,
|
|
"learning_rate": 2.1440772939685272e-07,
|
|
"loss": 0.0057,
|
|
"step": 10770
|
|
},
|
|
{
|
|
"epoch": 0.46840544445289334,
|
|
"grad_norm": 0.08510304242372513,
|
|
"learning_rate": 2.086489929359603e-07,
|
|
"loss": 0.0046,
|
|
"step": 10780
|
|
},
|
|
{
|
|
"epoch": 0.4688399578522003,
|
|
"grad_norm": 0.10792459547519684,
|
|
"learning_rate": 2.0296783398973452e-07,
|
|
"loss": 0.0064,
|
|
"step": 10790
|
|
},
|
|
{
|
|
"epoch": 0.46927447125150723,
|
|
"grad_norm": 0.05590936169028282,
|
|
"learning_rate": 1.973642975703738e-07,
|
|
"loss": 0.0055,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 0.4697089846508142,
|
|
"grad_norm": 0.04974950850009918,
|
|
"learning_rate": 1.918384280750618e-07,
|
|
"loss": 0.0043,
|
|
"step": 10810
|
|
},
|
|
{
|
|
"epoch": 0.4701434980501211,
|
|
"grad_norm": 0.14148883521556854,
|
|
"learning_rate": 1.8639026928562453e-07,
|
|
"loss": 0.0074,
|
|
"step": 10820
|
|
},
|
|
{
|
|
"epoch": 0.47057801144942807,
|
|
"grad_norm": 0.08046723902225494,
|
|
"learning_rate": 1.810198643681793e-07,
|
|
"loss": 0.0053,
|
|
"step": 10830
|
|
},
|
|
{
|
|
"epoch": 0.471012524848735,
|
|
"grad_norm": 0.06864185631275177,
|
|
"learning_rate": 1.7572725587279738e-07,
|
|
"loss": 0.007,
|
|
"step": 10840
|
|
},
|
|
{
|
|
"epoch": 0.47144703824804196,
|
|
"grad_norm": 0.1145964041352272,
|
|
"learning_rate": 1.7051248573316083e-07,
|
|
"loss": 0.0063,
|
|
"step": 10850
|
|
},
|
|
{
|
|
"epoch": 0.4718815516473489,
|
|
"grad_norm": 0.07953092455863953,
|
|
"learning_rate": 1.6537559526623614e-07,
|
|
"loss": 0.0063,
|
|
"step": 10860
|
|
},
|
|
{
|
|
"epoch": 0.47231606504665585,
|
|
"grad_norm": 0.04491928219795227,
|
|
"learning_rate": 1.6031662517194235e-07,
|
|
"loss": 0.0059,
|
|
"step": 10870
|
|
},
|
|
{
|
|
"epoch": 0.47275057844596285,
|
|
"grad_norm": 0.026222513988614082,
|
|
"learning_rate": 1.5533561553282895e-07,
|
|
"loss": 0.0042,
|
|
"step": 10880
|
|
},
|
|
{
|
|
"epoch": 0.4731850918452698,
|
|
"grad_norm": 0.08618629723787308,
|
|
"learning_rate": 1.5043260581376285e-07,
|
|
"loss": 0.0058,
|
|
"step": 10890
|
|
},
|
|
{
|
|
"epoch": 0.47361960524457675,
|
|
"grad_norm": 0.07844515144824982,
|
|
"learning_rate": 1.4560763486160868e-07,
|
|
"loss": 0.0068,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 0.4740541186438837,
|
|
"grad_norm": 0.06669094413518906,
|
|
"learning_rate": 1.4086074090493007e-07,
|
|
"loss": 0.0058,
|
|
"step": 10910
|
|
},
|
|
{
|
|
"epoch": 0.47448863204319064,
|
|
"grad_norm": 0.06707707047462463,
|
|
"learning_rate": 1.3619196155367664e-07,
|
|
"loss": 0.0055,
|
|
"step": 10920
|
|
},
|
|
{
|
|
"epoch": 0.4749231454424976,
|
|
"grad_norm": 0.08793602883815765,
|
|
"learning_rate": 1.3160133379889305e-07,
|
|
"loss": 0.0068,
|
|
"step": 10930
|
|
},
|
|
{
|
|
"epoch": 0.47535765884180453,
|
|
"grad_norm": 0.05631832033395767,
|
|
"learning_rate": 1.2708889401242263e-07,
|
|
"loss": 0.0064,
|
|
"step": 10940
|
|
},
|
|
{
|
|
"epoch": 0.4757921722411115,
|
|
"grad_norm": 0.09073697775602341,
|
|
"learning_rate": 1.22654677946622e-07,
|
|
"loss": 0.0082,
|
|
"step": 10950
|
|
},
|
|
{
|
|
"epoch": 0.4762266856404184,
|
|
"grad_norm": 0.08381710201501846,
|
|
"learning_rate": 1.1829872073407467e-07,
|
|
"loss": 0.0053,
|
|
"step": 10960
|
|
},
|
|
{
|
|
"epoch": 0.47666119903972537,
|
|
"grad_norm": 0.044089365750551224,
|
|
"learning_rate": 1.1402105688731568e-07,
|
|
"loss": 0.0061,
|
|
"step": 10970
|
|
},
|
|
{
|
|
"epoch": 0.4770957124390323,
|
|
"grad_norm": 0.06977023184299469,
|
|
"learning_rate": 1.0982172029855409e-07,
|
|
"loss": 0.0077,
|
|
"step": 10980
|
|
},
|
|
{
|
|
"epoch": 0.4775302258383393,
|
|
"grad_norm": 0.07421712577342987,
|
|
"learning_rate": 1.0570074423940758e-07,
|
|
"loss": 0.0049,
|
|
"step": 10990
|
|
},
|
|
{
|
|
"epoch": 0.47796473923764626,
|
|
"grad_norm": 0.0687830001115799,
|
|
"learning_rate": 1.0165816136064266e-07,
|
|
"loss": 0.0068,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 0.4783992526369532,
|
|
"grad_norm": 0.05829397588968277,
|
|
"learning_rate": 9.769400369190496e-08,
|
|
"loss": 0.0062,
|
|
"step": 11010
|
|
},
|
|
{
|
|
"epoch": 0.47883376603626016,
|
|
"grad_norm": 0.07584609091281891,
|
|
"learning_rate": 9.3808302641476e-08,
|
|
"loss": 0.0064,
|
|
"step": 11020
|
|
},
|
|
{
|
|
"epoch": 0.4792682794355671,
|
|
"grad_norm": 0.0651635229587555,
|
|
"learning_rate": 9.000108899602011e-08,
|
|
"loss": 0.0065,
|
|
"step": 11030
|
|
},
|
|
{
|
|
"epoch": 0.47970279283487405,
|
|
"grad_norm": 0.06295653432607651,
|
|
"learning_rate": 8.627239292033907e-08,
|
|
"loss": 0.0056,
|
|
"step": 11040
|
|
},
|
|
{
|
|
"epoch": 0.480137306234181,
|
|
"grad_norm": 0.10909046232700348,
|
|
"learning_rate": 8.262224395713559e-08,
|
|
"loss": 0.0077,
|
|
"step": 11050
|
|
},
|
|
{
|
|
"epoch": 0.48057181963348794,
|
|
"grad_norm": 0.05574299395084381,
|
|
"learning_rate": 7.905067102678021e-08,
|
|
"loss": 0.0051,
|
|
"step": 11060
|
|
},
|
|
{
|
|
"epoch": 0.4810063330327949,
|
|
"grad_norm": 0.0833418220281601,
|
|
"learning_rate": 7.555770242707705e-08,
|
|
"loss": 0.0051,
|
|
"step": 11070
|
|
},
|
|
{
|
|
"epoch": 0.48144084643210183,
|
|
"grad_norm": 0.07659269869327545,
|
|
"learning_rate": 7.214336583304616e-08,
|
|
"loss": 0.0058,
|
|
"step": 11080
|
|
},
|
|
{
|
|
"epoch": 0.4818753598314088,
|
|
"grad_norm": 0.07921771705150604,
|
|
"learning_rate": 6.880768829670036e-08,
|
|
"loss": 0.006,
|
|
"step": 11090
|
|
},
|
|
{
|
|
"epoch": 0.4823098732307158,
|
|
"grad_norm": 0.06295815110206604,
|
|
"learning_rate": 6.555069624682997e-08,
|
|
"loss": 0.007,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 0.4827443866300227,
|
|
"grad_norm": 0.06317449361085892,
|
|
"learning_rate": 6.237241548879613e-08,
|
|
"loss": 0.0055,
|
|
"step": 11110
|
|
},
|
|
{
|
|
"epoch": 0.48317890002932967,
|
|
"grad_norm": 0.09486140310764313,
|
|
"learning_rate": 5.9272871204324457e-08,
|
|
"loss": 0.0069,
|
|
"step": 11120
|
|
},
|
|
{
|
|
"epoch": 0.4836134134286366,
|
|
"grad_norm": 0.10850790143013,
|
|
"learning_rate": 5.625208795130954e-08,
|
|
"loss": 0.0061,
|
|
"step": 11130
|
|
},
|
|
{
|
|
"epoch": 0.48404792682794356,
|
|
"grad_norm": 0.06827063858509064,
|
|
"learning_rate": 5.3310089663611844e-08,
|
|
"loss": 0.0057,
|
|
"step": 11140
|
|
},
|
|
{
|
|
"epoch": 0.4844824402272505,
|
|
"grad_norm": 0.055239662528038025,
|
|
"learning_rate": 5.04468996508789e-08,
|
|
"loss": 0.0083,
|
|
"step": 11150
|
|
},
|
|
{
|
|
"epoch": 0.48491695362655746,
|
|
"grad_norm": 0.08159584552049637,
|
|
"learning_rate": 4.766254059835107e-08,
|
|
"loss": 0.0055,
|
|
"step": 11160
|
|
},
|
|
{
|
|
"epoch": 0.4853514670258644,
|
|
"grad_norm": 0.06158966198563576,
|
|
"learning_rate": 4.4957034566687205e-08,
|
|
"loss": 0.0057,
|
|
"step": 11170
|
|
},
|
|
{
|
|
"epoch": 0.48578598042517135,
|
|
"grad_norm": 0.03253835067152977,
|
|
"learning_rate": 4.2330402991789255e-08,
|
|
"loss": 0.0053,
|
|
"step": 11180
|
|
},
|
|
{
|
|
"epoch": 0.4862204938244783,
|
|
"grad_norm": 0.08656269311904907,
|
|
"learning_rate": 3.9782666684631266e-08,
|
|
"loss": 0.0066,
|
|
"step": 11190
|
|
},
|
|
{
|
|
"epoch": 0.48665500722378524,
|
|
"grad_norm": 0.08442550897598267,
|
|
"learning_rate": 3.7313845831093984e-08,
|
|
"loss": 0.0066,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 0.4870895206230922,
|
|
"grad_norm": 0.06725499778985977,
|
|
"learning_rate": 3.492395999180609e-08,
|
|
"loss": 0.0061,
|
|
"step": 11210
|
|
},
|
|
{
|
|
"epoch": 0.4875240340223992,
|
|
"grad_norm": 0.09642699360847473,
|
|
"learning_rate": 3.261302810198985e-08,
|
|
"loss": 0.007,
|
|
"step": 11220
|
|
},
|
|
{
|
|
"epoch": 0.48795854742170613,
|
|
"grad_norm": 0.05808092653751373,
|
|
"learning_rate": 3.038106847131128e-08,
|
|
"loss": 0.0058,
|
|
"step": 11230
|
|
},
|
|
{
|
|
"epoch": 0.4883930608210131,
|
|
"grad_norm": 0.07764837890863419,
|
|
"learning_rate": 2.822809878373134e-08,
|
|
"loss": 0.0076,
|
|
"step": 11240
|
|
},
|
|
{
|
|
"epoch": 0.48882757422032,
|
|
"grad_norm": 0.047388892620801926,
|
|
"learning_rate": 2.6154136097369386e-08,
|
|
"loss": 0.0051,
|
|
"step": 11250
|
|
},
|
|
{
|
|
"epoch": 0.489262087619627,
|
|
"grad_norm": 0.08450010418891907,
|
|
"learning_rate": 2.415919684436774e-08,
|
|
"loss": 0.0073,
|
|
"step": 11260
|
|
},
|
|
{
|
|
"epoch": 0.4896966010189339,
|
|
"grad_norm": 0.09996630996465683,
|
|
"learning_rate": 2.224329683076065e-08,
|
|
"loss": 0.0078,
|
|
"step": 11270
|
|
},
|
|
{
|
|
"epoch": 0.49013111441824087,
|
|
"grad_norm": 0.08762189745903015,
|
|
"learning_rate": 2.0406451236349988e-08,
|
|
"loss": 0.0059,
|
|
"step": 11280
|
|
},
|
|
{
|
|
"epoch": 0.4905656278175478,
|
|
"grad_norm": 0.06317150592803955,
|
|
"learning_rate": 1.8648674614583084e-08,
|
|
"loss": 0.0062,
|
|
"step": 11290
|
|
},
|
|
{
|
|
"epoch": 0.49100014121685476,
|
|
"grad_norm": 0.06011360511183739,
|
|
"learning_rate": 1.6969980892439508e-08,
|
|
"loss": 0.0057,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 0.4914346546161617,
|
|
"grad_norm": 0.06316263228654861,
|
|
"learning_rate": 1.537038337031782e-08,
|
|
"loss": 0.0067,
|
|
"step": 11310
|
|
},
|
|
{
|
|
"epoch": 0.49186916801546865,
|
|
"grad_norm": 0.10531795769929886,
|
|
"learning_rate": 1.3849894721935653e-08,
|
|
"loss": 0.0064,
|
|
"step": 11320
|
|
},
|
|
{
|
|
"epoch": 0.49230368141477565,
|
|
"grad_norm": 0.09684789925813675,
|
|
"learning_rate": 1.2408526994223125e-08,
|
|
"loss": 0.0065,
|
|
"step": 11330
|
|
},
|
|
{
|
|
"epoch": 0.4927381948140826,
|
|
"grad_norm": 0.06766608357429504,
|
|
"learning_rate": 1.1046291607231807e-08,
|
|
"loss": 0.0062,
|
|
"step": 11340
|
|
},
|
|
{
|
|
"epoch": 0.49317270821338954,
|
|
"grad_norm": 0.04883148521184921,
|
|
"learning_rate": 9.763199354041463e-09,
|
|
"loss": 0.0067,
|
|
"step": 11350
|
|
},
|
|
{
|
|
"epoch": 0.4936072216126965,
|
|
"grad_norm": 0.057381924241781235,
|
|
"learning_rate": 8.559260400674564e-09,
|
|
"loss": 0.0062,
|
|
"step": 11360
|
|
},
|
|
{
|
|
"epoch": 0.49404173501200344,
|
|
"grad_norm": 0.07570614665746689,
|
|
"learning_rate": 7.434484286020782e-09,
|
|
"loss": 0.0075,
|
|
"step": 11370
|
|
},
|
|
{
|
|
"epoch": 0.4944762484113104,
|
|
"grad_norm": 0.03946550935506821,
|
|
"learning_rate": 6.388879921753743e-09,
|
|
"loss": 0.0062,
|
|
"step": 11380
|
|
},
|
|
{
|
|
"epoch": 0.49491076181061733,
|
|
"grad_norm": 0.059903956949710846,
|
|
"learning_rate": 5.422455592264397e-09,
|
|
"loss": 0.0065,
|
|
"step": 11390
|
|
},
|
|
{
|
|
"epoch": 0.4953452752099243,
|
|
"grad_norm": 0.08148939162492752,
|
|
"learning_rate": 4.535218954596632e-09,
|
|
"loss": 0.0075,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 0.4957797886092312,
|
|
"grad_norm": 0.12582159042358398,
|
|
"learning_rate": 3.727177038385099e-09,
|
|
"loss": 0.0057,
|
|
"step": 11410
|
|
},
|
|
{
|
|
"epoch": 0.49621430200853817,
|
|
"grad_norm": 0.11304333060979843,
|
|
"learning_rate": 2.998336245797484e-09,
|
|
"loss": 0.0066,
|
|
"step": 11420
|
|
},
|
|
{
|
|
"epoch": 0.4966488154078451,
|
|
"grad_norm": 0.08336801081895828,
|
|
"learning_rate": 2.348702351487875e-09,
|
|
"loss": 0.0057,
|
|
"step": 11430
|
|
},
|
|
{
|
|
"epoch": 0.4970833288071521,
|
|
"grad_norm": 0.05257615074515343,
|
|
"learning_rate": 1.778280502546803e-09,
|
|
"loss": 0.0067,
|
|
"step": 11440
|
|
},
|
|
{
|
|
"epoch": 0.49751784220645906,
|
|
"grad_norm": 0.0752919614315033,
|
|
"learning_rate": 1.2870752184657165e-09,
|
|
"loss": 0.0049,
|
|
"step": 11450
|
|
},
|
|
{
|
|
"epoch": 0.497952355605766,
|
|
"grad_norm": 0.07169657945632935,
|
|
"learning_rate": 8.750903910959008e-10,
|
|
"loss": 0.0068,
|
|
"step": 11460
|
|
},
|
|
{
|
|
"epoch": 0.49838686900507295,
|
|
"grad_norm": 0.07614590227603912,
|
|
"learning_rate": 5.423292846196138e-10,
|
|
"loss": 0.007,
|
|
"step": 11470
|
|
},
|
|
{
|
|
"epoch": 0.4988213824043799,
|
|
"grad_norm": 0.09255485981702805,
|
|
"learning_rate": 2.8879453552455026e-10,
|
|
"loss": 0.0066,
|
|
"step": 11480
|
|
},
|
|
{
|
|
"epoch": 0.49925589580368684,
|
|
"grad_norm": 0.05579246208071709,
|
|
"learning_rate": 1.1448815258385815e-10,
|
|
"loss": 0.0056,
|
|
"step": 11490
|
|
},
|
|
{
|
|
"epoch": 0.4996904092029938,
|
|
"grad_norm": 0.09784293174743652,
|
|
"learning_rate": 1.9411516838374612e-11,
|
|
"loss": 0.0058,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 0.4999945685825087,
|
|
"step": 11507,
|
|
"total_flos": 1.2555120144411525e+19,
|
|
"train_loss": 0.010843981425175505,
|
|
"train_runtime": 122112.938,
|
|
"train_samples_per_second": 0.754,
|
|
"train_steps_per_second": 0.094
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 11507,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.2555120144411525e+19,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|