7477 lines
183 KiB
JSON
7477 lines
183 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.5000058835296471,
|
|
"eval_steps": 500,
|
|
"global_step": 10623,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00047068237176847136,
|
|
"grad_norm": 1.853895664215088,
|
|
"learning_rate": 6.269592476489028e-07,
|
|
"loss": 0.2895,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0009413647435369427,
|
|
"grad_norm": 1.4670448303222656,
|
|
"learning_rate": 1.2539184952978056e-06,
|
|
"loss": 0.2741,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.0014120471153054141,
|
|
"grad_norm": 0.9682801961898804,
|
|
"learning_rate": 1.8808777429467086e-06,
|
|
"loss": 0.2117,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0018827294870738854,
|
|
"grad_norm": 0.8661645650863647,
|
|
"learning_rate": 2.507836990595611e-06,
|
|
"loss": 0.1419,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.0023534118588423567,
|
|
"grad_norm": 0.8800103068351746,
|
|
"learning_rate": 3.1347962382445144e-06,
|
|
"loss": 0.1067,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.0028240942306108283,
|
|
"grad_norm": 0.5418772101402283,
|
|
"learning_rate": 3.7617554858934172e-06,
|
|
"loss": 0.082,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.0032947766023792993,
|
|
"grad_norm": 0.5682976245880127,
|
|
"learning_rate": 4.3887147335423205e-06,
|
|
"loss": 0.0684,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.003765458974147771,
|
|
"grad_norm": 0.7124200463294983,
|
|
"learning_rate": 5.015673981191222e-06,
|
|
"loss": 0.0687,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.004236141345916242,
|
|
"grad_norm": 0.5894989371299744,
|
|
"learning_rate": 5.642633228840125e-06,
|
|
"loss": 0.0546,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.0047068237176847135,
|
|
"grad_norm": 0.6233320236206055,
|
|
"learning_rate": 6.269592476489029e-06,
|
|
"loss": 0.0578,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.0051775060894531846,
|
|
"grad_norm": 0.7837837934494019,
|
|
"learning_rate": 6.896551724137932e-06,
|
|
"loss": 0.0492,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.0056481884612216565,
|
|
"grad_norm": 0.4565499722957611,
|
|
"learning_rate": 7.5235109717868345e-06,
|
|
"loss": 0.0599,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.006118870832990128,
|
|
"grad_norm": 0.4221401512622833,
|
|
"learning_rate": 8.150470219435737e-06,
|
|
"loss": 0.0514,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.006589553204758599,
|
|
"grad_norm": 0.3985210061073303,
|
|
"learning_rate": 8.777429467084641e-06,
|
|
"loss": 0.0522,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.00706023557652707,
|
|
"grad_norm": 0.6109229326248169,
|
|
"learning_rate": 9.404388714733543e-06,
|
|
"loss": 0.0579,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.007530917948295542,
|
|
"grad_norm": 0.4704965353012085,
|
|
"learning_rate": 1.0031347962382445e-05,
|
|
"loss": 0.0454,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.008001600320064013,
|
|
"grad_norm": 0.4213680624961853,
|
|
"learning_rate": 1.0658307210031348e-05,
|
|
"loss": 0.045,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.008472282691832485,
|
|
"grad_norm": 0.4626062512397766,
|
|
"learning_rate": 1.128526645768025e-05,
|
|
"loss": 0.0405,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.008942965063600955,
|
|
"grad_norm": 0.41663089394569397,
|
|
"learning_rate": 1.1912225705329154e-05,
|
|
"loss": 0.0446,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.009413647435369427,
|
|
"grad_norm": 0.49606436491012573,
|
|
"learning_rate": 1.2539184952978058e-05,
|
|
"loss": 0.0381,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.009884329807137899,
|
|
"grad_norm": 0.38951125741004944,
|
|
"learning_rate": 1.316614420062696e-05,
|
|
"loss": 0.0442,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.010355012178906369,
|
|
"grad_norm": 0.33546990156173706,
|
|
"learning_rate": 1.3793103448275863e-05,
|
|
"loss": 0.0458,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.010825694550674841,
|
|
"grad_norm": 0.32765892148017883,
|
|
"learning_rate": 1.4420062695924765e-05,
|
|
"loss": 0.0401,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.011296376922443313,
|
|
"grad_norm": 0.38245293498039246,
|
|
"learning_rate": 1.5047021943573669e-05,
|
|
"loss": 0.0362,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.011767059294211783,
|
|
"grad_norm": 0.5879177451133728,
|
|
"learning_rate": 1.567398119122257e-05,
|
|
"loss": 0.0366,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.012237741665980255,
|
|
"grad_norm": 0.38414984941482544,
|
|
"learning_rate": 1.6300940438871475e-05,
|
|
"loss": 0.0384,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.012708424037748725,
|
|
"grad_norm": 0.2964108884334564,
|
|
"learning_rate": 1.6927899686520378e-05,
|
|
"loss": 0.0307,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.013179106409517197,
|
|
"grad_norm": 0.4213695824146271,
|
|
"learning_rate": 1.7554858934169282e-05,
|
|
"loss": 0.0443,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.01364978878128567,
|
|
"grad_norm": 0.2879776954650879,
|
|
"learning_rate": 1.8181818181818182e-05,
|
|
"loss": 0.0422,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.01412047115305414,
|
|
"grad_norm": 0.7699964046478271,
|
|
"learning_rate": 1.8808777429467086e-05,
|
|
"loss": 0.0356,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.014591153524822612,
|
|
"grad_norm": 0.314653217792511,
|
|
"learning_rate": 1.943573667711599e-05,
|
|
"loss": 0.036,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.015061835896591083,
|
|
"grad_norm": 0.2611827850341797,
|
|
"learning_rate": 1.999999953520864e-05,
|
|
"loss": 0.031,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.015532518268359554,
|
|
"grad_norm": 0.2755236029624939,
|
|
"learning_rate": 1.9999943760297725e-05,
|
|
"loss": 0.0312,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.016003200640128026,
|
|
"grad_norm": 0.35887399315834045,
|
|
"learning_rate": 1.99997950277089e-05,
|
|
"loss": 0.0342,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.016473883011896496,
|
|
"grad_norm": 0.32914966344833374,
|
|
"learning_rate": 1.999955333882476e-05,
|
|
"loss": 0.0372,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.01694456538366497,
|
|
"grad_norm": 0.434587687253952,
|
|
"learning_rate": 1.9999218695892e-05,
|
|
"loss": 0.0305,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.01741524775543344,
|
|
"grad_norm": 0.22577083110809326,
|
|
"learning_rate": 1.9998791102021396e-05,
|
|
"loss": 0.0332,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.01788593012720191,
|
|
"grad_norm": 0.34992215037345886,
|
|
"learning_rate": 1.999827056118779e-05,
|
|
"loss": 0.0275,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.018356612498970384,
|
|
"grad_norm": 0.21031337976455688,
|
|
"learning_rate": 1.9997657078230034e-05,
|
|
"loss": 0.0282,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.018827294870738854,
|
|
"grad_norm": 0.23182496428489685,
|
|
"learning_rate": 1.9996950658850956e-05,
|
|
"loss": 0.0332,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.019297977242507324,
|
|
"grad_norm": 0.30487576127052307,
|
|
"learning_rate": 1.99961513096173e-05,
|
|
"loss": 0.0317,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.019768659614275798,
|
|
"grad_norm": 0.23188067972660065,
|
|
"learning_rate": 1.999525903795968e-05,
|
|
"loss": 0.0291,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.020239341986044268,
|
|
"grad_norm": 0.26485952734947205,
|
|
"learning_rate": 1.9994273852172484e-05,
|
|
"loss": 0.0265,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.020710024357812738,
|
|
"grad_norm": 0.2696860730648041,
|
|
"learning_rate": 1.9993195761413823e-05,
|
|
"loss": 0.0294,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.021180706729581212,
|
|
"grad_norm": 0.22003962099552155,
|
|
"learning_rate": 1.999202477570544e-05,
|
|
"loss": 0.027,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.021651389101349682,
|
|
"grad_norm": 0.21706393361091614,
|
|
"learning_rate": 1.9990760905932605e-05,
|
|
"loss": 0.0287,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.022122071473118152,
|
|
"grad_norm": 0.29643166065216064,
|
|
"learning_rate": 1.9989404163844026e-05,
|
|
"loss": 0.0304,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.022592753844886626,
|
|
"grad_norm": 0.21076913177967072,
|
|
"learning_rate": 1.9987954562051724e-05,
|
|
"loss": 0.0233,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.023063436216655096,
|
|
"grad_norm": 0.3193027973175049,
|
|
"learning_rate": 1.998641211403095e-05,
|
|
"loss": 0.0256,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.023534118588423567,
|
|
"grad_norm": 0.18541871011257172,
|
|
"learning_rate": 1.9984776834120015e-05,
|
|
"loss": 0.0319,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.024004800960192037,
|
|
"grad_norm": 0.24217073619365692,
|
|
"learning_rate": 1.9983048737520186e-05,
|
|
"loss": 0.0229,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.02447548333196051,
|
|
"grad_norm": 0.24985109269618988,
|
|
"learning_rate": 1.9981227840295544e-05,
|
|
"loss": 0.0277,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.02494616570372898,
|
|
"grad_norm": 0.276968389749527,
|
|
"learning_rate": 1.9979314159372815e-05,
|
|
"loss": 0.0306,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.02541684807549745,
|
|
"grad_norm": 0.25580137968063354,
|
|
"learning_rate": 1.997730771254124e-05,
|
|
"loss": 0.0306,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.025887530447265925,
|
|
"grad_norm": 0.21411041915416718,
|
|
"learning_rate": 1.9975208518452384e-05,
|
|
"loss": 0.0284,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.026358212819034395,
|
|
"grad_norm": 0.4203673303127289,
|
|
"learning_rate": 1.9973016596619973e-05,
|
|
"loss": 0.0282,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.026828895190802865,
|
|
"grad_norm": 0.19542217254638672,
|
|
"learning_rate": 1.9970731967419724e-05,
|
|
"loss": 0.0262,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.02729957756257134,
|
|
"grad_norm": 0.2871203124523163,
|
|
"learning_rate": 1.9968354652089142e-05,
|
|
"loss": 0.0237,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.02777025993433981,
|
|
"grad_norm": 0.1866108924150467,
|
|
"learning_rate": 1.9965884672727313e-05,
|
|
"loss": 0.0258,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.02824094230610828,
|
|
"grad_norm": 0.23779630661010742,
|
|
"learning_rate": 1.996332205229473e-05,
|
|
"loss": 0.0258,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.028711624677876753,
|
|
"grad_norm": 0.14882665872573853,
|
|
"learning_rate": 1.9960666814613043e-05,
|
|
"loss": 0.0196,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.029182307049645223,
|
|
"grad_norm": 0.2880736291408539,
|
|
"learning_rate": 1.995791898436487e-05,
|
|
"loss": 0.028,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.029652989421413693,
|
|
"grad_norm": 0.29828083515167236,
|
|
"learning_rate": 1.995507858709354e-05,
|
|
"loss": 0.0305,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.030123671793182167,
|
|
"grad_norm": 0.5830768942832947,
|
|
"learning_rate": 1.9952145649202885e-05,
|
|
"loss": 0.0307,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.030594354164950637,
|
|
"grad_norm": 0.22826269268989563,
|
|
"learning_rate": 1.9949120197956956e-05,
|
|
"loss": 0.0257,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.031065036536719107,
|
|
"grad_norm": 0.32943838834762573,
|
|
"learning_rate": 1.9946002261479817e-05,
|
|
"loss": 0.0267,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.03153571890848758,
|
|
"grad_norm": 0.23798730969429016,
|
|
"learning_rate": 1.9942791868755233e-05,
|
|
"loss": 0.0305,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.03200640128025605,
|
|
"grad_norm": 0.2915290296077728,
|
|
"learning_rate": 1.993948904962644e-05,
|
|
"loss": 0.0248,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.03247708365202452,
|
|
"grad_norm": 0.24953021109104156,
|
|
"learning_rate": 1.9936093834795853e-05,
|
|
"loss": 0.0226,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.03294776602379299,
|
|
"grad_norm": 0.2042916715145111,
|
|
"learning_rate": 1.993260625582478e-05,
|
|
"loss": 0.0272,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.03341844839556146,
|
|
"grad_norm": 0.23323510587215424,
|
|
"learning_rate": 1.992902634513312e-05,
|
|
"loss": 0.0255,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.03388913076732994,
|
|
"grad_norm": 0.16620193421840668,
|
|
"learning_rate": 1.9925354135999083e-05,
|
|
"loss": 0.0196,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.03435981313909841,
|
|
"grad_norm": 0.2203141152858734,
|
|
"learning_rate": 1.992158966255887e-05,
|
|
"loss": 0.0231,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.03483049551086688,
|
|
"grad_norm": 0.21319791674613953,
|
|
"learning_rate": 1.9917732959806336e-05,
|
|
"loss": 0.0275,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.03530117788263535,
|
|
"grad_norm": 0.13679344952106476,
|
|
"learning_rate": 1.9913784063592708e-05,
|
|
"loss": 0.0194,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.03577186025440382,
|
|
"grad_norm": 0.22628575563430786,
|
|
"learning_rate": 1.990974301062621e-05,
|
|
"loss": 0.0259,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.03624254262617229,
|
|
"grad_norm": 0.18509197235107422,
|
|
"learning_rate": 1.990560983847174e-05,
|
|
"loss": 0.0216,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.03671322499794077,
|
|
"grad_norm": 0.1981833577156067,
|
|
"learning_rate": 1.9901384585550527e-05,
|
|
"loss": 0.0236,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.03718390736970924,
|
|
"grad_norm": 0.22769884765148163,
|
|
"learning_rate": 1.989706729113976e-05,
|
|
"loss": 0.0233,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.03765458974147771,
|
|
"grad_norm": 0.23713231086730957,
|
|
"learning_rate": 1.9892657995372227e-05,
|
|
"loss": 0.0229,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.03812527211324618,
|
|
"grad_norm": 0.39365676045417786,
|
|
"learning_rate": 1.988815673923596e-05,
|
|
"loss": 0.0234,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.03859595448501465,
|
|
"grad_norm": 0.3216700255870819,
|
|
"learning_rate": 1.9883563564573815e-05,
|
|
"loss": 0.0248,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.03906663685678312,
|
|
"grad_norm": 0.16760173439979553,
|
|
"learning_rate": 1.9878878514083124e-05,
|
|
"loss": 0.0201,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.039537319228551596,
|
|
"grad_norm": 0.23704811930656433,
|
|
"learning_rate": 1.9874101631315268e-05,
|
|
"loss": 0.0205,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.040008001600320066,
|
|
"grad_norm": 0.18856912851333618,
|
|
"learning_rate": 1.9869232960675292e-05,
|
|
"loss": 0.0242,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.040478683972088536,
|
|
"grad_norm": 0.1919320523738861,
|
|
"learning_rate": 1.9864272547421482e-05,
|
|
"loss": 0.0219,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.040949366343857006,
|
|
"grad_norm": 0.20129288733005524,
|
|
"learning_rate": 1.985922043766495e-05,
|
|
"loss": 0.0253,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.041420048715625477,
|
|
"grad_norm": 0.22689007222652435,
|
|
"learning_rate": 1.9854076678369197e-05,
|
|
"loss": 0.0206,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.04189073108739395,
|
|
"grad_norm": 0.2808779776096344,
|
|
"learning_rate": 1.984884131734968e-05,
|
|
"loss": 0.0217,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.042361413459162424,
|
|
"grad_norm": 0.1597484052181244,
|
|
"learning_rate": 1.9843514403273378e-05,
|
|
"loss": 0.0218,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.042832095830930894,
|
|
"grad_norm": 0.15527930855751038,
|
|
"learning_rate": 1.9838095985658324e-05,
|
|
"loss": 0.0236,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.043302778202699364,
|
|
"grad_norm": 0.2966651916503906,
|
|
"learning_rate": 1.9832586114873154e-05,
|
|
"loss": 0.0288,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.043773460574467835,
|
|
"grad_norm": 0.1232568770647049,
|
|
"learning_rate": 1.9826984842136637e-05,
|
|
"loss": 0.0206,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.044244142946236305,
|
|
"grad_norm": 0.2170974314212799,
|
|
"learning_rate": 1.982129221951719e-05,
|
|
"loss": 0.0212,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.044714825318004775,
|
|
"grad_norm": 0.18615590035915375,
|
|
"learning_rate": 1.9815508299932417e-05,
|
|
"loss": 0.0201,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.04518550768977325,
|
|
"grad_norm": 0.16665947437286377,
|
|
"learning_rate": 1.980963313714859e-05,
|
|
"loss": 0.0245,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.04565619006154172,
|
|
"grad_norm": 0.21176232397556305,
|
|
"learning_rate": 1.9803666785780165e-05,
|
|
"loss": 0.0237,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.04612687243331019,
|
|
"grad_norm": 0.23852507770061493,
|
|
"learning_rate": 1.979760930128927e-05,
|
|
"loss": 0.0223,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.04659755480507866,
|
|
"grad_norm": 0.2165747582912445,
|
|
"learning_rate": 1.979146073998519e-05,
|
|
"loss": 0.0219,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.04706823717684713,
|
|
"grad_norm": 0.1552462875843048,
|
|
"learning_rate": 1.9785221159023852e-05,
|
|
"loss": 0.0245,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.0475389195486156,
|
|
"grad_norm": 0.17933006584644318,
|
|
"learning_rate": 1.9778890616407266e-05,
|
|
"loss": 0.0249,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.048009601920384073,
|
|
"grad_norm": 0.23089653253555298,
|
|
"learning_rate": 1.9772469170983026e-05,
|
|
"loss": 0.0257,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.04848028429215255,
|
|
"grad_norm": 0.1873331367969513,
|
|
"learning_rate": 1.976595688244373e-05,
|
|
"loss": 0.0209,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.04895096666392102,
|
|
"grad_norm": 0.20000211894512177,
|
|
"learning_rate": 1.975935381132644e-05,
|
|
"loss": 0.0174,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.04942164903568949,
|
|
"grad_norm": 0.17019209265708923,
|
|
"learning_rate": 1.975266001901212e-05,
|
|
"loss": 0.0189,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.04989233140745796,
|
|
"grad_norm": 0.1945488452911377,
|
|
"learning_rate": 1.9745875567725058e-05,
|
|
"loss": 0.0244,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.05036301377922643,
|
|
"grad_norm": 0.1913270354270935,
|
|
"learning_rate": 1.973900052053229e-05,
|
|
"loss": 0.0245,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.0508336961509949,
|
|
"grad_norm": 0.1417558342218399,
|
|
"learning_rate": 1.973203494134302e-05,
|
|
"loss": 0.0184,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.05130437852276338,
|
|
"grad_norm": 0.1926746964454651,
|
|
"learning_rate": 1.9724978894908017e-05,
|
|
"loss": 0.0232,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.05177506089453185,
|
|
"grad_norm": 0.13734498620033264,
|
|
"learning_rate": 1.971783244681902e-05,
|
|
"loss": 0.0217,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.05224574326630032,
|
|
"grad_norm": 0.24532277882099152,
|
|
"learning_rate": 1.9710595663508125e-05,
|
|
"loss": 0.0209,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.05271642563806879,
|
|
"grad_norm": 0.25304025411605835,
|
|
"learning_rate": 1.9703268612247172e-05,
|
|
"loss": 0.0254,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.05318710800983726,
|
|
"grad_norm": 0.17198318243026733,
|
|
"learning_rate": 1.9695851361147098e-05,
|
|
"loss": 0.0188,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.05365779038160573,
|
|
"grad_norm": 0.13433778285980225,
|
|
"learning_rate": 1.9688343979157348e-05,
|
|
"loss": 0.0249,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.05412847275337421,
|
|
"grad_norm": 0.2117832452058792,
|
|
"learning_rate": 1.968074653606519e-05,
|
|
"loss": 0.0203,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.05459915512514268,
|
|
"grad_norm": 0.15447969734668732,
|
|
"learning_rate": 1.9673059102495084e-05,
|
|
"loss": 0.0192,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.05506983749691115,
|
|
"grad_norm": 0.21258267760276794,
|
|
"learning_rate": 1.9665281749908034e-05,
|
|
"loss": 0.0209,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.05554051986867962,
|
|
"grad_norm": 0.22038120031356812,
|
|
"learning_rate": 1.9657414550600907e-05,
|
|
"loss": 0.0181,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.05601120224044809,
|
|
"grad_norm": 0.16627314686775208,
|
|
"learning_rate": 1.964945757770578e-05,
|
|
"loss": 0.0188,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.05648188461221656,
|
|
"grad_norm": 0.16459894180297852,
|
|
"learning_rate": 1.964141090518923e-05,
|
|
"loss": 0.0187,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.056952566983985035,
|
|
"grad_norm": 0.1881185919046402,
|
|
"learning_rate": 1.963327460785168e-05,
|
|
"loss": 0.0203,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.057423249355753506,
|
|
"grad_norm": 0.2117614597082138,
|
|
"learning_rate": 1.962504876132669e-05,
|
|
"loss": 0.0229,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.057893931727521976,
|
|
"grad_norm": 0.20820604264736176,
|
|
"learning_rate": 1.9616733442080253e-05,
|
|
"loss": 0.0197,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.058364614099290446,
|
|
"grad_norm": 0.12462881952524185,
|
|
"learning_rate": 1.960832872741007e-05,
|
|
"loss": 0.016,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.058835296471058916,
|
|
"grad_norm": 0.1357419639825821,
|
|
"learning_rate": 1.9599834695444863e-05,
|
|
"loss": 0.0203,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.059305978842827387,
|
|
"grad_norm": 0.14965488016605377,
|
|
"learning_rate": 1.959125142514362e-05,
|
|
"loss": 0.0197,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.059776661214595864,
|
|
"grad_norm": 0.27559077739715576,
|
|
"learning_rate": 1.9582578996294882e-05,
|
|
"loss": 0.0209,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.060247343586364334,
|
|
"grad_norm": 0.13103267550468445,
|
|
"learning_rate": 1.957381748951599e-05,
|
|
"loss": 0.0188,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.060718025958132804,
|
|
"grad_norm": 0.2551141083240509,
|
|
"learning_rate": 1.9564966986252326e-05,
|
|
"loss": 0.0198,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.061188708329901274,
|
|
"grad_norm": 0.16889545321464539,
|
|
"learning_rate": 1.9556027568776577e-05,
|
|
"loss": 0.0201,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.061659390701669745,
|
|
"grad_norm": 0.2296774685382843,
|
|
"learning_rate": 1.9546999320187966e-05,
|
|
"loss": 0.0191,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.062130073073438215,
|
|
"grad_norm": 0.16604012250900269,
|
|
"learning_rate": 1.953788232441147e-05,
|
|
"loss": 0.0185,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.06260075544520669,
|
|
"grad_norm": 0.17663390934467316,
|
|
"learning_rate": 1.9528676666197026e-05,
|
|
"loss": 0.0232,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.06307143781697516,
|
|
"grad_norm": 0.23400381207466125,
|
|
"learning_rate": 1.951938243111879e-05,
|
|
"loss": 0.0168,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.06354212018874363,
|
|
"grad_norm": 0.17597930133342743,
|
|
"learning_rate": 1.9509999705574293e-05,
|
|
"loss": 0.018,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.0640128025605121,
|
|
"grad_norm": 0.1490190625190735,
|
|
"learning_rate": 1.9500528576783667e-05,
|
|
"loss": 0.0221,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.06448348493228058,
|
|
"grad_norm": 0.22764147818088531,
|
|
"learning_rate": 1.9490969132788815e-05,
|
|
"loss": 0.0229,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.06495416730404904,
|
|
"grad_norm": 0.24380986392498016,
|
|
"learning_rate": 1.9481321462452617e-05,
|
|
"loss": 0.0191,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.06542484967581752,
|
|
"grad_norm": 0.1726468801498413,
|
|
"learning_rate": 1.9471585655458073e-05,
|
|
"loss": 0.0193,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.06589553204758598,
|
|
"grad_norm": 0.09343688189983368,
|
|
"learning_rate": 1.9461761802307494e-05,
|
|
"loss": 0.0226,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.06636621441935446,
|
|
"grad_norm": 0.17471985518932343,
|
|
"learning_rate": 1.945184999432166e-05,
|
|
"loss": 0.0168,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.06683689679112292,
|
|
"grad_norm": 0.11864632368087769,
|
|
"learning_rate": 1.9441850323638944e-05,
|
|
"loss": 0.0176,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.0673075791628914,
|
|
"grad_norm": 0.16971559822559357,
|
|
"learning_rate": 1.943176288321449e-05,
|
|
"loss": 0.0187,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.06777826153465988,
|
|
"grad_norm": 0.14903157949447632,
|
|
"learning_rate": 1.942158776681933e-05,
|
|
"loss": 0.0224,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.06824894390642834,
|
|
"grad_norm": 0.13742227852344513,
|
|
"learning_rate": 1.941132506903951e-05,
|
|
"loss": 0.0178,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.06871962627819682,
|
|
"grad_norm": 0.2229357659816742,
|
|
"learning_rate": 1.9400974885275226e-05,
|
|
"loss": 0.0197,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.06919030864996528,
|
|
"grad_norm": 0.168918177485466,
|
|
"learning_rate": 1.9390537311739927e-05,
|
|
"loss": 0.0228,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.06966099102173376,
|
|
"grad_norm": 0.17068640887737274,
|
|
"learning_rate": 1.938001244545941e-05,
|
|
"loss": 0.0212,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.07013167339350224,
|
|
"grad_norm": 0.19765964150428772,
|
|
"learning_rate": 1.9369400384270948e-05,
|
|
"loss": 0.0199,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.0706023557652707,
|
|
"grad_norm": 0.18849430978298187,
|
|
"learning_rate": 1.935870122682235e-05,
|
|
"loss": 0.0226,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.07107303813703918,
|
|
"grad_norm": 0.19435733556747437,
|
|
"learning_rate": 1.934791507257105e-05,
|
|
"loss": 0.0169,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.07154372050880764,
|
|
"grad_norm": 0.21003960072994232,
|
|
"learning_rate": 1.933704202178321e-05,
|
|
"loss": 0.0216,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.07201440288057612,
|
|
"grad_norm": 0.1524060070514679,
|
|
"learning_rate": 1.9326082175532744e-05,
|
|
"loss": 0.0188,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.07248508525234458,
|
|
"grad_norm": 0.16427676379680634,
|
|
"learning_rate": 1.9315035635700412e-05,
|
|
"loss": 0.0203,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.07295576762411306,
|
|
"grad_norm": 0.18140247464179993,
|
|
"learning_rate": 1.9303902504972866e-05,
|
|
"loss": 0.0229,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.07342644999588153,
|
|
"grad_norm": 0.153948113322258,
|
|
"learning_rate": 1.9292682886841683e-05,
|
|
"loss": 0.0193,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.07389713236765,
|
|
"grad_norm": 0.17368511855602264,
|
|
"learning_rate": 1.9281376885602412e-05,
|
|
"loss": 0.0218,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.07436781473941848,
|
|
"grad_norm": 0.36407792568206787,
|
|
"learning_rate": 1.926998460635361e-05,
|
|
"loss": 0.0212,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.07483849711118694,
|
|
"grad_norm": 0.17313210666179657,
|
|
"learning_rate": 1.9258506154995854e-05,
|
|
"loss": 0.0178,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.07530917948295542,
|
|
"grad_norm": 0.22496558725833893,
|
|
"learning_rate": 1.924694163823076e-05,
|
|
"loss": 0.0256,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.07577986185472389,
|
|
"grad_norm": 0.18591882288455963,
|
|
"learning_rate": 1.9235291163559996e-05,
|
|
"loss": 0.0214,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.07625054422649236,
|
|
"grad_norm": 0.12409357726573944,
|
|
"learning_rate": 1.922355483928428e-05,
|
|
"loss": 0.0171,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.07672122659826083,
|
|
"grad_norm": 0.20914681255817413,
|
|
"learning_rate": 1.9211732774502372e-05,
|
|
"loss": 0.0172,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.0771919089700293,
|
|
"grad_norm": 0.19645445048809052,
|
|
"learning_rate": 1.919982507911006e-05,
|
|
"loss": 0.0171,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.07766259134179777,
|
|
"grad_norm": 0.15752822160720825,
|
|
"learning_rate": 1.918783186379913e-05,
|
|
"loss": 0.0184,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.07813327371356624,
|
|
"grad_norm": 0.16205720603466034,
|
|
"learning_rate": 1.917575324005636e-05,
|
|
"loss": 0.018,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.07860395608533471,
|
|
"grad_norm": 0.16118857264518738,
|
|
"learning_rate": 1.916358932016246e-05,
|
|
"loss": 0.0183,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.07907463845710319,
|
|
"grad_norm": 0.15827985107898712,
|
|
"learning_rate": 1.9151340217191042e-05,
|
|
"loss": 0.0198,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.07954532082887165,
|
|
"grad_norm": 0.10632066428661346,
|
|
"learning_rate": 1.9139006045007567e-05,
|
|
"loss": 0.017,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.08001600320064013,
|
|
"grad_norm": 0.15292513370513916,
|
|
"learning_rate": 1.9126586918268275e-05,
|
|
"loss": 0.0181,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.0804866855724086,
|
|
"grad_norm": 0.1305696964263916,
|
|
"learning_rate": 1.9114082952419134e-05,
|
|
"loss": 0.0189,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.08095736794417707,
|
|
"grad_norm": 0.14558079838752747,
|
|
"learning_rate": 1.9101494263694764e-05,
|
|
"loss": 0.0185,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.08142805031594554,
|
|
"grad_norm": 0.1263592690229416,
|
|
"learning_rate": 1.9088820969117348e-05,
|
|
"loss": 0.0173,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.08189873268771401,
|
|
"grad_norm": 0.17646698653697968,
|
|
"learning_rate": 1.907606318649555e-05,
|
|
"loss": 0.0184,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.08236941505948249,
|
|
"grad_norm": 0.12980340421199799,
|
|
"learning_rate": 1.906322103442343e-05,
|
|
"loss": 0.0184,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.08284009743125095,
|
|
"grad_norm": 0.3880372643470764,
|
|
"learning_rate": 1.9050294632279317e-05,
|
|
"loss": 0.0192,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.08331077980301943,
|
|
"grad_norm": 0.15283338725566864,
|
|
"learning_rate": 1.9037284100224714e-05,
|
|
"loss": 0.0241,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.0837814621747879,
|
|
"grad_norm": 0.19735479354858398,
|
|
"learning_rate": 1.9024189559203193e-05,
|
|
"loss": 0.0183,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.08425214454655637,
|
|
"grad_norm": 0.19369491934776306,
|
|
"learning_rate": 1.9011011130939254e-05,
|
|
"loss": 0.0161,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.08472282691832485,
|
|
"grad_norm": 0.14755471050739288,
|
|
"learning_rate": 1.8997748937937188e-05,
|
|
"loss": 0.0194,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.08519350929009331,
|
|
"grad_norm": 0.15142719447612762,
|
|
"learning_rate": 1.8984403103479957e-05,
|
|
"loss": 0.0203,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.08566419166186179,
|
|
"grad_norm": 0.1482364982366562,
|
|
"learning_rate": 1.897097375162804e-05,
|
|
"loss": 0.016,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.08613487403363025,
|
|
"grad_norm": 0.17669634521007538,
|
|
"learning_rate": 1.8957461007218272e-05,
|
|
"loss": 0.0164,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.08660555640539873,
|
|
"grad_norm": 0.1324431598186493,
|
|
"learning_rate": 1.8943864995862692e-05,
|
|
"loss": 0.0188,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.08707623877716719,
|
|
"grad_norm": 0.1473216414451599,
|
|
"learning_rate": 1.8930185843947382e-05,
|
|
"loss": 0.0183,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.08754692114893567,
|
|
"grad_norm": 0.1502453088760376,
|
|
"learning_rate": 1.891642367863127e-05,
|
|
"loss": 0.0169,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.08801760352070415,
|
|
"grad_norm": 0.15653997659683228,
|
|
"learning_rate": 1.8902578627844975e-05,
|
|
"loss": 0.0178,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.08848828589247261,
|
|
"grad_norm": 0.15937530994415283,
|
|
"learning_rate": 1.8888650820289594e-05,
|
|
"loss": 0.0181,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.08895896826424109,
|
|
"grad_norm": 0.3920705318450928,
|
|
"learning_rate": 1.8874640385435515e-05,
|
|
"loss": 0.0166,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.08942965063600955,
|
|
"grad_norm": 0.10840073972940445,
|
|
"learning_rate": 1.8860547453521232e-05,
|
|
"loss": 0.0156,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.08990033300777803,
|
|
"grad_norm": 0.14995324611663818,
|
|
"learning_rate": 1.8846372155552095e-05,
|
|
"loss": 0.021,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.0903710153795465,
|
|
"grad_norm": 0.14307928085327148,
|
|
"learning_rate": 1.8832114623299125e-05,
|
|
"loss": 0.0176,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.09084169775131497,
|
|
"grad_norm": 0.13450343906879425,
|
|
"learning_rate": 1.8817774989297776e-05,
|
|
"loss": 0.0195,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.09131238012308344,
|
|
"grad_norm": 0.15622445940971375,
|
|
"learning_rate": 1.8803353386846708e-05,
|
|
"loss": 0.0163,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.09178306249485191,
|
|
"grad_norm": 0.15726979076862335,
|
|
"learning_rate": 1.878884995000654e-05,
|
|
"loss": 0.0164,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.09225374486662039,
|
|
"grad_norm": 0.16298818588256836,
|
|
"learning_rate": 1.8774264813598614e-05,
|
|
"loss": 0.0184,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.09272442723838885,
|
|
"grad_norm": 0.1313478946685791,
|
|
"learning_rate": 1.875959811320373e-05,
|
|
"loss": 0.0171,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.09319510961015733,
|
|
"grad_norm": 0.1678081899881363,
|
|
"learning_rate": 1.87448499851609e-05,
|
|
"loss": 0.0207,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.0936657919819258,
|
|
"grad_norm": 0.1520097851753235,
|
|
"learning_rate": 1.8730020566566068e-05,
|
|
"loss": 0.0173,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.09413647435369427,
|
|
"grad_norm": 0.11722538620233536,
|
|
"learning_rate": 1.8715109995270836e-05,
|
|
"loss": 0.0168,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.09460715672546274,
|
|
"grad_norm": 0.13153265416622162,
|
|
"learning_rate": 1.8700118409881198e-05,
|
|
"loss": 0.0181,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.0950778390972312,
|
|
"grad_norm": 0.2276691049337387,
|
|
"learning_rate": 1.8685045949756232e-05,
|
|
"loss": 0.0202,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.09554852146899968,
|
|
"grad_norm": 0.20304642617702484,
|
|
"learning_rate": 1.8669892755006816e-05,
|
|
"loss": 0.0149,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.09601920384076815,
|
|
"grad_norm": 0.13692355155944824,
|
|
"learning_rate": 1.865465896649432e-05,
|
|
"loss": 0.0139,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.09648988621253662,
|
|
"grad_norm": 0.20823757350444794,
|
|
"learning_rate": 1.8639344725829302e-05,
|
|
"loss": 0.0187,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.0969605685843051,
|
|
"grad_norm": 0.18364469707012177,
|
|
"learning_rate": 1.862395017537019e-05,
|
|
"loss": 0.0159,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.09743125095607356,
|
|
"grad_norm": 0.1560748666524887,
|
|
"learning_rate": 1.860847545822195e-05,
|
|
"loss": 0.0135,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.09790193332784204,
|
|
"grad_norm": 0.14844276010990143,
|
|
"learning_rate": 1.8592920718234775e-05,
|
|
"loss": 0.0188,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.0983726156996105,
|
|
"grad_norm": 0.14456084370613098,
|
|
"learning_rate": 1.8577286100002723e-05,
|
|
"loss": 0.016,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.09884329807137898,
|
|
"grad_norm": 0.17267173528671265,
|
|
"learning_rate": 1.8561571748862394e-05,
|
|
"loss": 0.0194,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.09931398044314746,
|
|
"grad_norm": 0.15792517364025116,
|
|
"learning_rate": 1.854577781089157e-05,
|
|
"loss": 0.016,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.09978466281491592,
|
|
"grad_norm": 0.13792484998703003,
|
|
"learning_rate": 1.8529904432907858e-05,
|
|
"loss": 0.0159,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.1002553451866844,
|
|
"grad_norm": 0.15125246345996857,
|
|
"learning_rate": 1.8513951762467318e-05,
|
|
"loss": 0.0197,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.10072602755845286,
|
|
"grad_norm": 0.11484289914369583,
|
|
"learning_rate": 1.849791994786311e-05,
|
|
"loss": 0.0212,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.10119670993022134,
|
|
"grad_norm": 0.15358440577983856,
|
|
"learning_rate": 1.8481809138124092e-05,
|
|
"loss": 0.0153,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.1016673923019898,
|
|
"grad_norm": 0.1823110580444336,
|
|
"learning_rate": 1.846561948301346e-05,
|
|
"loss": 0.0177,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.10213807467375828,
|
|
"grad_norm": 0.12166211009025574,
|
|
"learning_rate": 1.8449351133027327e-05,
|
|
"loss": 0.0144,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.10260875704552676,
|
|
"grad_norm": 0.17179739475250244,
|
|
"learning_rate": 1.8433004239393353e-05,
|
|
"loss": 0.0197,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.10307943941729522,
|
|
"grad_norm": 0.11975016444921494,
|
|
"learning_rate": 1.8416578954069318e-05,
|
|
"loss": 0.0183,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.1035501217890637,
|
|
"grad_norm": 0.16793161630630493,
|
|
"learning_rate": 1.840007542974172e-05,
|
|
"loss": 0.0157,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.10402080416083216,
|
|
"grad_norm": 0.12980200350284576,
|
|
"learning_rate": 1.838349381982435e-05,
|
|
"loss": 0.0177,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.10449148653260064,
|
|
"grad_norm": 0.13064740598201752,
|
|
"learning_rate": 1.8366834278456872e-05,
|
|
"loss": 0.0181,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.10496216890436912,
|
|
"grad_norm": 0.1093079224228859,
|
|
"learning_rate": 1.8350096960503383e-05,
|
|
"loss": 0.014,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.10543285127613758,
|
|
"grad_norm": 0.20110709965229034,
|
|
"learning_rate": 1.833328202155098e-05,
|
|
"loss": 0.0146,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.10590353364790606,
|
|
"grad_norm": 0.16145408153533936,
|
|
"learning_rate": 1.831638961790831e-05,
|
|
"loss": 0.0158,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.10637421601967452,
|
|
"grad_norm": 0.12829731404781342,
|
|
"learning_rate": 1.8299419906604115e-05,
|
|
"loss": 0.017,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.106844898391443,
|
|
"grad_norm": 0.15451538562774658,
|
|
"learning_rate": 1.8282373045385786e-05,
|
|
"loss": 0.0182,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.10731558076321146,
|
|
"grad_norm": 0.19389769434928894,
|
|
"learning_rate": 1.8265249192717868e-05,
|
|
"loss": 0.0152,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.10778626313497994,
|
|
"grad_norm": 0.13470546901226044,
|
|
"learning_rate": 1.8248048507780626e-05,
|
|
"loss": 0.0146,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.10825694550674841,
|
|
"grad_norm": 0.164323627948761,
|
|
"learning_rate": 1.8230771150468517e-05,
|
|
"loss": 0.0186,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.10872762787851688,
|
|
"grad_norm": 0.12247840315103531,
|
|
"learning_rate": 1.821341728138876e-05,
|
|
"loss": 0.0137,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.10919831025028535,
|
|
"grad_norm": 0.12810325622558594,
|
|
"learning_rate": 1.819598706185979e-05,
|
|
"loss": 0.0131,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.10966899262205382,
|
|
"grad_norm": 0.1692439764738083,
|
|
"learning_rate": 1.8178480653909795e-05,
|
|
"loss": 0.0186,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.1101396749938223,
|
|
"grad_norm": 0.10300221294164658,
|
|
"learning_rate": 1.8160898220275196e-05,
|
|
"loss": 0.0148,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.11061035736559076,
|
|
"grad_norm": 0.12927402555942535,
|
|
"learning_rate": 1.8143239924399127e-05,
|
|
"loss": 0.0162,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.11108103973735924,
|
|
"grad_norm": 0.15936507284641266,
|
|
"learning_rate": 1.8125505930429936e-05,
|
|
"loss": 0.0169,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.11155172210912771,
|
|
"grad_norm": 0.11727359890937805,
|
|
"learning_rate": 1.810769640321963e-05,
|
|
"loss": 0.0142,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.11202240448089618,
|
|
"grad_norm": 0.1353157013654709,
|
|
"learning_rate": 1.8089811508322382e-05,
|
|
"loss": 0.0158,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.11249308685266465,
|
|
"grad_norm": 0.15169605612754822,
|
|
"learning_rate": 1.8071851411992948e-05,
|
|
"loss": 0.0124,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.11296376922443312,
|
|
"grad_norm": 0.18794389069080353,
|
|
"learning_rate": 1.8053816281185154e-05,
|
|
"loss": 0.0155,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.1134344515962016,
|
|
"grad_norm": 0.17180399596691132,
|
|
"learning_rate": 1.803570628355033e-05,
|
|
"loss": 0.0151,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.11390513396797007,
|
|
"grad_norm": 0.12078606337308884,
|
|
"learning_rate": 1.801752158743576e-05,
|
|
"loss": 0.0133,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.11437581633973853,
|
|
"grad_norm": 0.14188805222511292,
|
|
"learning_rate": 1.7999262361883102e-05,
|
|
"loss": 0.0167,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.11484649871150701,
|
|
"grad_norm": 0.14054830372333527,
|
|
"learning_rate": 1.7980928776626833e-05,
|
|
"loss": 0.0215,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.11531718108327547,
|
|
"grad_norm": 0.14647804200649261,
|
|
"learning_rate": 1.796252100209266e-05,
|
|
"loss": 0.0157,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.11578786345504395,
|
|
"grad_norm": 0.13471056520938873,
|
|
"learning_rate": 1.794403920939595e-05,
|
|
"loss": 0.0221,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.11625854582681242,
|
|
"grad_norm": 0.14870299398899078,
|
|
"learning_rate": 1.7925483570340118e-05,
|
|
"loss": 0.0145,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.11672922819858089,
|
|
"grad_norm": 0.16026903688907623,
|
|
"learning_rate": 1.7906854257415048e-05,
|
|
"loss": 0.0202,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.11719991057034937,
|
|
"grad_norm": 0.09352197498083115,
|
|
"learning_rate": 1.7888151443795478e-05,
|
|
"loss": 0.0148,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.11767059294211783,
|
|
"grad_norm": 0.16556917130947113,
|
|
"learning_rate": 1.78693753033394e-05,
|
|
"loss": 0.0157,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.11814127531388631,
|
|
"grad_norm": 0.10724364221096039,
|
|
"learning_rate": 1.7850526010586437e-05,
|
|
"loss": 0.0153,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.11861195768565477,
|
|
"grad_norm": 0.133198544383049,
|
|
"learning_rate": 1.7831603740756223e-05,
|
|
"loss": 0.0183,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.11908264005742325,
|
|
"grad_norm": 0.15818530321121216,
|
|
"learning_rate": 1.7812608669746774e-05,
|
|
"loss": 0.0175,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.11955332242919173,
|
|
"grad_norm": 0.1321696639060974,
|
|
"learning_rate": 1.779354097413285e-05,
|
|
"loss": 0.0163,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.12002400480096019,
|
|
"grad_norm": 0.1563303917646408,
|
|
"learning_rate": 1.777440083116432e-05,
|
|
"loss": 0.0125,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.12049468717272867,
|
|
"grad_norm": 0.15840402245521545,
|
|
"learning_rate": 1.7755188418764517e-05,
|
|
"loss": 0.017,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.12096536954449713,
|
|
"grad_norm": 0.1330888271331787,
|
|
"learning_rate": 1.7735903915528553e-05,
|
|
"loss": 0.0191,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.12143605191626561,
|
|
"grad_norm": 0.10261926054954529,
|
|
"learning_rate": 1.7716547500721715e-05,
|
|
"loss": 0.0175,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.12190673428803407,
|
|
"grad_norm": 0.1102108284831047,
|
|
"learning_rate": 1.7697119354277746e-05,
|
|
"loss": 0.0166,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.12237741665980255,
|
|
"grad_norm": 0.14028173685073853,
|
|
"learning_rate": 1.76776196567972e-05,
|
|
"loss": 0.0166,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.12284809903157103,
|
|
"grad_norm": 0.18411780893802643,
|
|
"learning_rate": 1.7658048589545757e-05,
|
|
"loss": 0.0172,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.12331878140333949,
|
|
"grad_norm": 0.1064414232969284,
|
|
"learning_rate": 1.7638406334452535e-05,
|
|
"loss": 0.0133,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.12378946377510797,
|
|
"grad_norm": 0.13803263008594513,
|
|
"learning_rate": 1.7618693074108405e-05,
|
|
"loss": 0.0172,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.12426014614687643,
|
|
"grad_norm": 0.125034362077713,
|
|
"learning_rate": 1.7598908991764288e-05,
|
|
"loss": 0.0183,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.1247308285186449,
|
|
"grad_norm": 0.22171784937381744,
|
|
"learning_rate": 1.7579054271329457e-05,
|
|
"loss": 0.015,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.12520151089041337,
|
|
"grad_norm": 0.13481014966964722,
|
|
"learning_rate": 1.755912909736981e-05,
|
|
"loss": 0.0184,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.12567219326218185,
|
|
"grad_norm": 0.1926272213459015,
|
|
"learning_rate": 1.753913365510619e-05,
|
|
"loss": 0.0144,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.12614287563395032,
|
|
"grad_norm": 0.16069357097148895,
|
|
"learning_rate": 1.751906813041263e-05,
|
|
"loss": 0.0147,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.1266135580057188,
|
|
"grad_norm": 0.15145283937454224,
|
|
"learning_rate": 1.749893270981463e-05,
|
|
"loss": 0.016,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.12708424037748725,
|
|
"grad_norm": 0.11472610384225845,
|
|
"learning_rate": 1.747872758048744e-05,
|
|
"loss": 0.016,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.12755492274925573,
|
|
"grad_norm": 0.14715996384620667,
|
|
"learning_rate": 1.745845293025431e-05,
|
|
"loss": 0.0138,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.1280256051210242,
|
|
"grad_norm": 0.07992751151323318,
|
|
"learning_rate": 1.7438108947584737e-05,
|
|
"loss": 0.0169,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.12849628749279268,
|
|
"grad_norm": 0.10780564695596695,
|
|
"learning_rate": 1.7417695821592727e-05,
|
|
"loss": 0.0151,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.12896696986456116,
|
|
"grad_norm": 0.1476987600326538,
|
|
"learning_rate": 1.739721374203502e-05,
|
|
"loss": 0.0163,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.1294376522363296,
|
|
"grad_norm": 0.14835335314273834,
|
|
"learning_rate": 1.7376662899309346e-05,
|
|
"loss": 0.0147,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.12990833460809809,
|
|
"grad_norm": 0.12376794964075089,
|
|
"learning_rate": 1.7356043484452643e-05,
|
|
"loss": 0.0143,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.13037901697986656,
|
|
"grad_norm": 0.1482350081205368,
|
|
"learning_rate": 1.733535568913928e-05,
|
|
"loss": 0.014,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.13084969935163504,
|
|
"grad_norm": 0.09628381580114365,
|
|
"learning_rate": 1.731459970567928e-05,
|
|
"loss": 0.0141,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.13132038172340352,
|
|
"grad_norm": 0.14838829636573792,
|
|
"learning_rate": 1.729377572701653e-05,
|
|
"loss": 0.0144,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.13179106409517197,
|
|
"grad_norm": 0.18305176496505737,
|
|
"learning_rate": 1.7272883946726986e-05,
|
|
"loss": 0.0172,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.13226174646694044,
|
|
"grad_norm": 0.11932403594255447,
|
|
"learning_rate": 1.7251924559016885e-05,
|
|
"loss": 0.0174,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.13273242883870892,
|
|
"grad_norm": 0.12068909406661987,
|
|
"learning_rate": 1.7230897758720916e-05,
|
|
"loss": 0.0182,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.1332031112104774,
|
|
"grad_norm": 0.17540064454078674,
|
|
"learning_rate": 1.720980374130044e-05,
|
|
"loss": 0.0211,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.13367379358224585,
|
|
"grad_norm": 0.11220990121364594,
|
|
"learning_rate": 1.7188642702841643e-05,
|
|
"loss": 0.0132,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.13414447595401433,
|
|
"grad_norm": 0.1383756846189499,
|
|
"learning_rate": 1.716741484005373e-05,
|
|
"loss": 0.0145,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.1346151583257828,
|
|
"grad_norm": 0.17390526831150055,
|
|
"learning_rate": 1.7146120350267094e-05,
|
|
"loss": 0.0175,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.13508584069755128,
|
|
"grad_norm": 0.13952553272247314,
|
|
"learning_rate": 1.7124759431431485e-05,
|
|
"loss": 0.0143,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.13555652306931976,
|
|
"grad_norm": 0.11968237161636353,
|
|
"learning_rate": 1.7103332282114156e-05,
|
|
"loss": 0.0183,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.1360272054410882,
|
|
"grad_norm": 0.08217765390872955,
|
|
"learning_rate": 1.7081839101498033e-05,
|
|
"loss": 0.015,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.13649788781285668,
|
|
"grad_norm": 0.12313380837440491,
|
|
"learning_rate": 1.7060280089379854e-05,
|
|
"loss": 0.0139,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.13696857018462516,
|
|
"grad_norm": 0.16178159415721893,
|
|
"learning_rate": 1.703865544616832e-05,
|
|
"loss": 0.0162,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.13743925255639364,
|
|
"grad_norm": 0.1910175085067749,
|
|
"learning_rate": 1.7016965372882227e-05,
|
|
"loss": 0.0132,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.13790993492816211,
|
|
"grad_norm": 0.1751141995191574,
|
|
"learning_rate": 1.6995210071148582e-05,
|
|
"loss": 0.0136,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.13838061729993056,
|
|
"grad_norm": 0.3517913222312927,
|
|
"learning_rate": 1.6973389743200764e-05,
|
|
"loss": 0.0167,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.13885129967169904,
|
|
"grad_norm": 0.09838580340147018,
|
|
"learning_rate": 1.6951504591876614e-05,
|
|
"loss": 0.0181,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.13932198204346752,
|
|
"grad_norm": 0.1362282782793045,
|
|
"learning_rate": 1.692955482061656e-05,
|
|
"loss": 0.013,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.139792664415236,
|
|
"grad_norm": 0.08601140975952148,
|
|
"learning_rate": 1.6907540633461728e-05,
|
|
"loss": 0.0192,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.14026334678700447,
|
|
"grad_norm": 0.19153062999248505,
|
|
"learning_rate": 1.6885462235052038e-05,
|
|
"loss": 0.0164,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.14073402915877292,
|
|
"grad_norm": 0.14621330797672272,
|
|
"learning_rate": 1.6863319830624313e-05,
|
|
"loss": 0.0196,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.1412047115305414,
|
|
"grad_norm": 0.13781657814979553,
|
|
"learning_rate": 1.6841113626010358e-05,
|
|
"loss": 0.0185,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.14167539390230988,
|
|
"grad_norm": 0.16201090812683105,
|
|
"learning_rate": 1.6818843827635052e-05,
|
|
"loss": 0.0136,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.14214607627407835,
|
|
"grad_norm": 0.15096746385097504,
|
|
"learning_rate": 1.679651064251444e-05,
|
|
"loss": 0.0146,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.1426167586458468,
|
|
"grad_norm": 0.12844118475914001,
|
|
"learning_rate": 1.677411427825379e-05,
|
|
"loss": 0.0147,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.14308744101761528,
|
|
"grad_norm": 0.10843386501073837,
|
|
"learning_rate": 1.6751654943045672e-05,
|
|
"loss": 0.0145,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.14355812338938376,
|
|
"grad_norm": 0.2006511688232422,
|
|
"learning_rate": 1.672913284566803e-05,
|
|
"loss": 0.0133,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.14402880576115223,
|
|
"grad_norm": 0.11654097586870193,
|
|
"learning_rate": 1.6706548195482222e-05,
|
|
"loss": 0.0135,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.1444994881329207,
|
|
"grad_norm": 0.12871982157230377,
|
|
"learning_rate": 1.66839012024311e-05,
|
|
"loss": 0.0146,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.14497017050468916,
|
|
"grad_norm": 0.14667001366615295,
|
|
"learning_rate": 1.666119207703703e-05,
|
|
"loss": 0.0137,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.14544085287645764,
|
|
"grad_norm": 0.13176274299621582,
|
|
"learning_rate": 1.6638421030399962e-05,
|
|
"loss": 0.0151,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.14591153524822612,
|
|
"grad_norm": 0.1677788347005844,
|
|
"learning_rate": 1.6615588274195445e-05,
|
|
"loss": 0.0146,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.1463822176199946,
|
|
"grad_norm": 0.1045595034956932,
|
|
"learning_rate": 1.6592694020672667e-05,
|
|
"loss": 0.0163,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.14685289999176307,
|
|
"grad_norm": 0.17962002754211426,
|
|
"learning_rate": 1.65697384826525e-05,
|
|
"loss": 0.0126,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.14732358236353152,
|
|
"grad_norm": 0.22977662086486816,
|
|
"learning_rate": 1.6546721873525488e-05,
|
|
"loss": 0.0138,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.1477942647353,
|
|
"grad_norm": 0.1115872710943222,
|
|
"learning_rate": 1.6523644407249893e-05,
|
|
"loss": 0.0126,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.14826494710706847,
|
|
"grad_norm": 0.1523939073085785,
|
|
"learning_rate": 1.6500506298349682e-05,
|
|
"loss": 0.0156,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.14873562947883695,
|
|
"grad_norm": 0.14125415682792664,
|
|
"learning_rate": 1.6477307761912555e-05,
|
|
"loss": 0.0144,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.14920631185060543,
|
|
"grad_norm": 0.11077827960252762,
|
|
"learning_rate": 1.645404901358794e-05,
|
|
"loss": 0.0109,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.14967699422237388,
|
|
"grad_norm": 0.12060945481061935,
|
|
"learning_rate": 1.6430730269584963e-05,
|
|
"loss": 0.0142,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.15014767659414235,
|
|
"grad_norm": 0.110359326004982,
|
|
"learning_rate": 1.6407351746670484e-05,
|
|
"loss": 0.013,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.15061835896591083,
|
|
"grad_norm": 0.19728495180606842,
|
|
"learning_rate": 1.638391366216704e-05,
|
|
"loss": 0.0139,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.1510890413376793,
|
|
"grad_norm": 0.08657602965831757,
|
|
"learning_rate": 1.636041623395085e-05,
|
|
"loss": 0.0111,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.15155972370944779,
|
|
"grad_norm": 0.12473930418491364,
|
|
"learning_rate": 1.6336859680449773e-05,
|
|
"loss": 0.0151,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.15203040608121624,
|
|
"grad_norm": 0.11829215288162231,
|
|
"learning_rate": 1.6313244220641304e-05,
|
|
"loss": 0.014,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.1525010884529847,
|
|
"grad_norm": 0.15931151807308197,
|
|
"learning_rate": 1.6289570074050492e-05,
|
|
"loss": 0.0129,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.1529717708247532,
|
|
"grad_norm": 0.08709783107042313,
|
|
"learning_rate": 1.626583746074796e-05,
|
|
"loss": 0.0146,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.15344245319652167,
|
|
"grad_norm": 0.10884682089090347,
|
|
"learning_rate": 1.6242046601347796e-05,
|
|
"loss": 0.0116,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.15391313556829012,
|
|
"grad_norm": 0.10619014501571655,
|
|
"learning_rate": 1.6218197717005562e-05,
|
|
"loss": 0.0133,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.1543838179400586,
|
|
"grad_norm": 0.11754658818244934,
|
|
"learning_rate": 1.6194291029416188e-05,
|
|
"loss": 0.0147,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.15485450031182707,
|
|
"grad_norm": 0.17230188846588135,
|
|
"learning_rate": 1.617032676081194e-05,
|
|
"loss": 0.0157,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.15532518268359555,
|
|
"grad_norm": 0.13506999611854553,
|
|
"learning_rate": 1.614630513396035e-05,
|
|
"loss": 0.0143,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.15579586505536402,
|
|
"grad_norm": 0.10659985989332199,
|
|
"learning_rate": 1.6122226372162137e-05,
|
|
"loss": 0.0149,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.15626654742713247,
|
|
"grad_norm": 0.1660306453704834,
|
|
"learning_rate": 1.6098090699249144e-05,
|
|
"loss": 0.0176,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.15673722979890095,
|
|
"grad_norm": 0.12292105704545975,
|
|
"learning_rate": 1.607389833958223e-05,
|
|
"loss": 0.0167,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.15720791217066943,
|
|
"grad_norm": 0.10276249796152115,
|
|
"learning_rate": 1.6049649518049234e-05,
|
|
"loss": 0.0142,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.1576785945424379,
|
|
"grad_norm": 0.08089154213666916,
|
|
"learning_rate": 1.6025344460062826e-05,
|
|
"loss": 0.0133,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.15814927691420638,
|
|
"grad_norm": 0.11011315882205963,
|
|
"learning_rate": 1.6000983391558457e-05,
|
|
"loss": 0.0155,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.15861995928597483,
|
|
"grad_norm": 0.12116759270429611,
|
|
"learning_rate": 1.5976566538992237e-05,
|
|
"loss": 0.014,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.1590906416577433,
|
|
"grad_norm": 0.10609672218561172,
|
|
"learning_rate": 1.5952094129338834e-05,
|
|
"loss": 0.0133,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.1595613240295118,
|
|
"grad_norm": 0.14467139542102814,
|
|
"learning_rate": 1.5927566390089362e-05,
|
|
"loss": 0.0166,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.16003200640128026,
|
|
"grad_norm": 0.12918353080749512,
|
|
"learning_rate": 1.5902983549249272e-05,
|
|
"loss": 0.0215,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.16050268877304874,
|
|
"grad_norm": 0.13685718178749084,
|
|
"learning_rate": 1.5878345835336232e-05,
|
|
"loss": 0.0114,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.1609733711448172,
|
|
"grad_norm": 0.1492920219898224,
|
|
"learning_rate": 1.5853653477377996e-05,
|
|
"loss": 0.0141,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.16144405351658567,
|
|
"grad_norm": 0.0778472051024437,
|
|
"learning_rate": 1.582890670491028e-05,
|
|
"loss": 0.0145,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.16191473588835414,
|
|
"grad_norm": 0.18978632986545563,
|
|
"learning_rate": 1.5804105747974626e-05,
|
|
"loss": 0.0147,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.16238541826012262,
|
|
"grad_norm": 0.08243067562580109,
|
|
"learning_rate": 1.5779250837116275e-05,
|
|
"loss": 0.0126,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.16285610063189107,
|
|
"grad_norm": 0.12201232463121414,
|
|
"learning_rate": 1.5754342203382003e-05,
|
|
"loss": 0.0155,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.16332678300365955,
|
|
"grad_norm": 0.1052756980061531,
|
|
"learning_rate": 1.5729380078317982e-05,
|
|
"loss": 0.0132,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.16379746537542803,
|
|
"grad_norm": 0.08094022423028946,
|
|
"learning_rate": 1.570436469396764e-05,
|
|
"loss": 0.0141,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.1642681477471965,
|
|
"grad_norm": 0.16547109186649323,
|
|
"learning_rate": 1.567929628286949e-05,
|
|
"loss": 0.0164,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.16473883011896498,
|
|
"grad_norm": 0.26253578066825867,
|
|
"learning_rate": 1.5654175078054965e-05,
|
|
"loss": 0.013,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.16520951249073343,
|
|
"grad_norm": 0.12960611283779144,
|
|
"learning_rate": 1.562900131304627e-05,
|
|
"loss": 0.0146,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.1656801948625019,
|
|
"grad_norm": 0.23528945446014404,
|
|
"learning_rate": 1.5603775221854195e-05,
|
|
"loss": 0.0159,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.16615087723427038,
|
|
"grad_norm": 0.12691619992256165,
|
|
"learning_rate": 1.557849703897594e-05,
|
|
"loss": 0.0153,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.16662155960603886,
|
|
"grad_norm": 0.07701673358678818,
|
|
"learning_rate": 1.5553166999392954e-05,
|
|
"loss": 0.0156,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.16709224197780734,
|
|
"grad_norm": 0.0947437584400177,
|
|
"learning_rate": 1.5527785338568718e-05,
|
|
"loss": 0.0126,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.1675629243495758,
|
|
"grad_norm": 0.11626827716827393,
|
|
"learning_rate": 1.550235229244659e-05,
|
|
"loss": 0.0142,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.16803360672134426,
|
|
"grad_norm": 0.09487389773130417,
|
|
"learning_rate": 1.5476868097447586e-05,
|
|
"loss": 0.0148,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.16850428909311274,
|
|
"grad_norm": 0.1195988804101944,
|
|
"learning_rate": 1.5451332990468202e-05,
|
|
"loss": 0.0131,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.16897497146488122,
|
|
"grad_norm": 0.1547771394252777,
|
|
"learning_rate": 1.5425747208878195e-05,
|
|
"loss": 0.0162,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.1694456538366497,
|
|
"grad_norm": 0.12685328722000122,
|
|
"learning_rate": 1.5400110990518386e-05,
|
|
"loss": 0.0141,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.16991633620841815,
|
|
"grad_norm": 0.1409425139427185,
|
|
"learning_rate": 1.5374424573698453e-05,
|
|
"loss": 0.0134,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.17038701858018662,
|
|
"grad_norm": 0.09961021691560745,
|
|
"learning_rate": 1.5348688197194696e-05,
|
|
"loss": 0.0148,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.1708577009519551,
|
|
"grad_norm": 0.17034722864627838,
|
|
"learning_rate": 1.532290210024785e-05,
|
|
"loss": 0.0155,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.17132838332372358,
|
|
"grad_norm": 0.11699162423610687,
|
|
"learning_rate": 1.529706652256083e-05,
|
|
"loss": 0.0384,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.17179906569549203,
|
|
"grad_norm": 0.20753923058509827,
|
|
"learning_rate": 1.5271181704296513e-05,
|
|
"loss": 0.0142,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.1722697480672605,
|
|
"grad_norm": 0.07077182084321976,
|
|
"learning_rate": 1.5245247886075518e-05,
|
|
"loss": 0.0132,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.17274043043902898,
|
|
"grad_norm": 0.10375021398067474,
|
|
"learning_rate": 1.5219265308973952e-05,
|
|
"loss": 0.0097,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.17321111281079746,
|
|
"grad_norm": 0.18538837134838104,
|
|
"learning_rate": 1.519323421452117e-05,
|
|
"loss": 0.0189,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.17368179518256593,
|
|
"grad_norm": 0.15278775990009308,
|
|
"learning_rate": 1.5167154844697549e-05,
|
|
"loss": 0.0123,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.17415247755433438,
|
|
"grad_norm": 0.12953409552574158,
|
|
"learning_rate": 1.5141027441932217e-05,
|
|
"loss": 0.0147,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.17462315992610286,
|
|
"grad_norm": 0.12612172961235046,
|
|
"learning_rate": 1.5114852249100811e-05,
|
|
"loss": 0.0159,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.17509384229787134,
|
|
"grad_norm": 0.1542387455701828,
|
|
"learning_rate": 1.5088629509523207e-05,
|
|
"loss": 0.0148,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.17556452466963982,
|
|
"grad_norm": 0.12569794058799744,
|
|
"learning_rate": 1.5062359466961283e-05,
|
|
"loss": 0.0156,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.1760352070414083,
|
|
"grad_norm": 0.07093730568885803,
|
|
"learning_rate": 1.5036042365616621e-05,
|
|
"loss": 0.0123,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.17650588941317674,
|
|
"grad_norm": 0.15838895738124847,
|
|
"learning_rate": 1.5009678450128263e-05,
|
|
"loss": 0.0131,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.17697657178494522,
|
|
"grad_norm": 0.14409016072750092,
|
|
"learning_rate": 1.498326796557042e-05,
|
|
"loss": 0.0135,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.1774472541567137,
|
|
"grad_norm": 0.1365499496459961,
|
|
"learning_rate": 1.495681115745021e-05,
|
|
"loss": 0.0147,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.17791793652848217,
|
|
"grad_norm": 0.10746482759714127,
|
|
"learning_rate": 1.4930308271705357e-05,
|
|
"loss": 0.0137,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.17838861890025065,
|
|
"grad_norm": 0.14234359562397003,
|
|
"learning_rate": 1.4903759554701922e-05,
|
|
"loss": 0.0122,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.1788593012720191,
|
|
"grad_norm": 0.13182035088539124,
|
|
"learning_rate": 1.4877165253231995e-05,
|
|
"loss": 0.0132,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.17932998364378758,
|
|
"grad_norm": 0.10743127018213272,
|
|
"learning_rate": 1.4850525614511427e-05,
|
|
"loss": 0.0156,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.17980066601555605,
|
|
"grad_norm": 0.11722472310066223,
|
|
"learning_rate": 1.4823840886177494e-05,
|
|
"loss": 0.0144,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.18027134838732453,
|
|
"grad_norm": 0.11572381854057312,
|
|
"learning_rate": 1.4797111316286639e-05,
|
|
"loss": 0.0154,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.180742030759093,
|
|
"grad_norm": 0.11866148561239243,
|
|
"learning_rate": 1.4770337153312131e-05,
|
|
"loss": 0.0112,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.18121271313086146,
|
|
"grad_norm": 0.1207449808716774,
|
|
"learning_rate": 1.474351864614177e-05,
|
|
"loss": 0.0131,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.18168339550262994,
|
|
"grad_norm": 0.13676854968070984,
|
|
"learning_rate": 1.4716656044075577e-05,
|
|
"loss": 0.0149,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.1821540778743984,
|
|
"grad_norm": 0.11046641319990158,
|
|
"learning_rate": 1.468974959682346e-05,
|
|
"loss": 0.0155,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.1826247602461669,
|
|
"grad_norm": 0.14606067538261414,
|
|
"learning_rate": 1.466279955450292e-05,
|
|
"loss": 0.0163,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.18309544261793534,
|
|
"grad_norm": 0.1259547919034958,
|
|
"learning_rate": 1.4635806167636698e-05,
|
|
"loss": 0.013,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.18356612498970382,
|
|
"grad_norm": 0.12568768858909607,
|
|
"learning_rate": 1.4608769687150459e-05,
|
|
"loss": 0.0145,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.1840368073614723,
|
|
"grad_norm": 0.11527460068464279,
|
|
"learning_rate": 1.4581690364370466e-05,
|
|
"loss": 0.0159,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.18450748973324077,
|
|
"grad_norm": 0.11225946247577667,
|
|
"learning_rate": 1.455456845102123e-05,
|
|
"loss": 0.0155,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.18497817210500925,
|
|
"grad_norm": 0.17642593383789062,
|
|
"learning_rate": 1.4527404199223173e-05,
|
|
"loss": 0.014,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.1854488544767777,
|
|
"grad_norm": 0.13245446979999542,
|
|
"learning_rate": 1.4500197861490293e-05,
|
|
"loss": 0.0158,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.18591953684854617,
|
|
"grad_norm": 0.07578036934137344,
|
|
"learning_rate": 1.4472949690727813e-05,
|
|
"loss": 0.0121,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.18639021922031465,
|
|
"grad_norm": 0.0968451127409935,
|
|
"learning_rate": 1.4445659940229827e-05,
|
|
"loss": 0.0169,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.18686090159208313,
|
|
"grad_norm": 0.143287792801857,
|
|
"learning_rate": 1.441832886367694e-05,
|
|
"loss": 0.0131,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.1873315839638516,
|
|
"grad_norm": 0.10515135526657104,
|
|
"learning_rate": 1.4390956715133928e-05,
|
|
"loss": 0.0129,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.18780226633562006,
|
|
"grad_norm": 0.1260642260313034,
|
|
"learning_rate": 1.4363543749047354e-05,
|
|
"loss": 0.0137,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.18827294870738853,
|
|
"grad_norm": 0.12232226878404617,
|
|
"learning_rate": 1.4336090220243222e-05,
|
|
"loss": 0.0095,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.188743631079157,
|
|
"grad_norm": 0.10386616736650467,
|
|
"learning_rate": 1.4308596383924593e-05,
|
|
"loss": 0.0144,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.1892143134509255,
|
|
"grad_norm": 0.18197520077228546,
|
|
"learning_rate": 1.4281062495669224e-05,
|
|
"loss": 0.0135,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.18968499582269396,
|
|
"grad_norm": 0.12256472557783127,
|
|
"learning_rate": 1.4253488811427188e-05,
|
|
"loss": 0.0133,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.1901556781944624,
|
|
"grad_norm": 0.07531856000423431,
|
|
"learning_rate": 1.4225875587518485e-05,
|
|
"loss": 0.0147,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.1906263605662309,
|
|
"grad_norm": 0.1462387889623642,
|
|
"learning_rate": 1.4198223080630686e-05,
|
|
"loss": 0.0136,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.19109704293799937,
|
|
"grad_norm": 0.09299920499324799,
|
|
"learning_rate": 1.4170531547816513e-05,
|
|
"loss": 0.0111,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.19156772530976784,
|
|
"grad_norm": 0.1020888090133667,
|
|
"learning_rate": 1.4142801246491476e-05,
|
|
"loss": 0.0121,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.1920384076815363,
|
|
"grad_norm": 0.10841774940490723,
|
|
"learning_rate": 1.4115032434431461e-05,
|
|
"loss": 0.0112,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.19250909005330477,
|
|
"grad_norm": 0.14375372231006622,
|
|
"learning_rate": 1.4087225369770356e-05,
|
|
"loss": 0.0126,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.19297977242507325,
|
|
"grad_norm": 0.13949818909168243,
|
|
"learning_rate": 1.4059380310997626e-05,
|
|
"loss": 0.0135,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.19345045479684173,
|
|
"grad_norm": 0.1299353390932083,
|
|
"learning_rate": 1.403149751695593e-05,
|
|
"loss": 0.016,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.1939211371686102,
|
|
"grad_norm": 0.10485529899597168,
|
|
"learning_rate": 1.40035772468387e-05,
|
|
"loss": 0.0145,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.19439181954037865,
|
|
"grad_norm": 0.09539597481489182,
|
|
"learning_rate": 1.3975619760187746e-05,
|
|
"loss": 0.0119,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.19486250191214713,
|
|
"grad_norm": 0.12080245465040207,
|
|
"learning_rate": 1.3947625316890836e-05,
|
|
"loss": 0.0141,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.1953331842839156,
|
|
"grad_norm": 0.12871263921260834,
|
|
"learning_rate": 1.3919594177179272e-05,
|
|
"loss": 0.0128,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.19580386665568408,
|
|
"grad_norm": 0.12878765165805817,
|
|
"learning_rate": 1.3891526601625492e-05,
|
|
"loss": 0.0118,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.19627454902745256,
|
|
"grad_norm": 0.12321794778108597,
|
|
"learning_rate": 1.3863422851140624e-05,
|
|
"loss": 0.0132,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.196745231399221,
|
|
"grad_norm": 0.15922504663467407,
|
|
"learning_rate": 1.3835283186972077e-05,
|
|
"loss": 0.0152,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.1972159137709895,
|
|
"grad_norm": 0.09492635726928711,
|
|
"learning_rate": 1.3807107870701102e-05,
|
|
"loss": 0.0113,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.19768659614275796,
|
|
"grad_norm": 0.11242096871137619,
|
|
"learning_rate": 1.3778897164240378e-05,
|
|
"loss": 0.0126,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.19815727851452644,
|
|
"grad_norm": 0.15205872058868408,
|
|
"learning_rate": 1.3750651329831548e-05,
|
|
"loss": 0.0101,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.19862796088629492,
|
|
"grad_norm": 0.09228533506393433,
|
|
"learning_rate": 1.3722370630042809e-05,
|
|
"loss": 0.0112,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.19909864325806337,
|
|
"grad_norm": 0.11618391424417496,
|
|
"learning_rate": 1.369405532776646e-05,
|
|
"loss": 0.0115,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.19956932562983185,
|
|
"grad_norm": 0.10944508761167526,
|
|
"learning_rate": 1.3665705686216457e-05,
|
|
"loss": 0.0128,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.20004000800160032,
|
|
"grad_norm": 0.15271086990833282,
|
|
"learning_rate": 1.3637321968925964e-05,
|
|
"loss": 0.0123,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.2005106903733688,
|
|
"grad_norm": 0.15104058384895325,
|
|
"learning_rate": 1.3608904439744905e-05,
|
|
"loss": 0.0158,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.20098137274513725,
|
|
"grad_norm": 0.18065479397773743,
|
|
"learning_rate": 1.3580453362837527e-05,
|
|
"loss": 0.0128,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.20145205511690573,
|
|
"grad_norm": 0.11352675408124924,
|
|
"learning_rate": 1.355196900267992e-05,
|
|
"loss": 0.012,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.2019227374886742,
|
|
"grad_norm": 0.10866222530603409,
|
|
"learning_rate": 1.3523451624057566e-05,
|
|
"loss": 0.0108,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.20239341986044268,
|
|
"grad_norm": 0.1298263520002365,
|
|
"learning_rate": 1.3494901492062889e-05,
|
|
"loss": 0.0122,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.20286410223221116,
|
|
"grad_norm": 0.1176961287856102,
|
|
"learning_rate": 1.346631887209278e-05,
|
|
"loss": 0.0132,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.2033347846039796,
|
|
"grad_norm": 0.15750454366207123,
|
|
"learning_rate": 1.343770402984613e-05,
|
|
"loss": 0.0131,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.20380546697574808,
|
|
"grad_norm": 0.12924407422542572,
|
|
"learning_rate": 1.3409057231321363e-05,
|
|
"loss": 0.0144,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.20427614934751656,
|
|
"grad_norm": 0.0852523222565651,
|
|
"learning_rate": 1.3380378742813964e-05,
|
|
"loss": 0.0138,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.20474683171928504,
|
|
"grad_norm": 0.13959772884845734,
|
|
"learning_rate": 1.3351668830914004e-05,
|
|
"loss": 0.0107,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.20521751409105352,
|
|
"grad_norm": 0.10934294760227203,
|
|
"learning_rate": 1.3322927762503656e-05,
|
|
"loss": 0.0107,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.20568819646282197,
|
|
"grad_norm": 0.165533646941185,
|
|
"learning_rate": 1.329415580475472e-05,
|
|
"loss": 0.0106,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.20615887883459044,
|
|
"grad_norm": 0.08202959597110748,
|
|
"learning_rate": 1.3265353225126143e-05,
|
|
"loss": 0.0128,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.20662956120635892,
|
|
"grad_norm": 0.097500741481781,
|
|
"learning_rate": 1.3236520291361516e-05,
|
|
"loss": 0.0128,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.2071002435781274,
|
|
"grad_norm": 0.13810063898563385,
|
|
"learning_rate": 1.3207657271486607e-05,
|
|
"loss": 0.0136,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.20757092594989587,
|
|
"grad_norm": 0.14978671073913574,
|
|
"learning_rate": 1.3178764433806858e-05,
|
|
"loss": 0.0125,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.20804160832166432,
|
|
"grad_norm": 0.10034939646720886,
|
|
"learning_rate": 1.3149842046904885e-05,
|
|
"loss": 0.0134,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.2085122906934328,
|
|
"grad_norm": 0.08658329397439957,
|
|
"learning_rate": 1.3120890379637996e-05,
|
|
"loss": 0.0128,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.20898297306520128,
|
|
"grad_norm": 0.10699402540922165,
|
|
"learning_rate": 1.3091909701135676e-05,
|
|
"loss": 0.0154,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.20945365543696975,
|
|
"grad_norm": 0.14609429240226746,
|
|
"learning_rate": 1.3062900280797104e-05,
|
|
"loss": 0.0151,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.20992433780873823,
|
|
"grad_norm": 0.12663030624389648,
|
|
"learning_rate": 1.3033862388288628e-05,
|
|
"loss": 0.0121,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.21039502018050668,
|
|
"grad_norm": 0.15187297761440277,
|
|
"learning_rate": 1.3004796293541269e-05,
|
|
"loss": 0.0133,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.21086570255227516,
|
|
"grad_norm": 0.12615492939949036,
|
|
"learning_rate": 1.297570226674822e-05,
|
|
"loss": 0.0096,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.21133638492404364,
|
|
"grad_norm": 0.1413070261478424,
|
|
"learning_rate": 1.294658057836232e-05,
|
|
"loss": 0.0152,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.2118070672958121,
|
|
"grad_norm": 0.09986194223165512,
|
|
"learning_rate": 1.2917431499093538e-05,
|
|
"loss": 0.0147,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.21227774966758056,
|
|
"grad_norm": 0.10710835456848145,
|
|
"learning_rate": 1.288825529990647e-05,
|
|
"loss": 0.0133,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.21274843203934904,
|
|
"grad_norm": 0.10883558541536331,
|
|
"learning_rate": 1.2859052252017824e-05,
|
|
"loss": 0.012,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.21321911441111752,
|
|
"grad_norm": 0.12123081088066101,
|
|
"learning_rate": 1.2829822626893867e-05,
|
|
"loss": 0.0133,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.213689796782886,
|
|
"grad_norm": 0.11655440926551819,
|
|
"learning_rate": 1.2800566696247943e-05,
|
|
"loss": 0.0115,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.21416047915465447,
|
|
"grad_norm": 0.1198849231004715,
|
|
"learning_rate": 1.2771284732037912e-05,
|
|
"loss": 0.0148,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.21463116152642292,
|
|
"grad_norm": 0.13865037262439728,
|
|
"learning_rate": 1.274197700646365e-05,
|
|
"loss": 0.0126,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.2151018438981914,
|
|
"grad_norm": 0.1405821293592453,
|
|
"learning_rate": 1.2712643791964501e-05,
|
|
"loss": 0.0112,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.21557252626995987,
|
|
"grad_norm": 0.11903827637434006,
|
|
"learning_rate": 1.2683285361216745e-05,
|
|
"loss": 0.0098,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.21604320864172835,
|
|
"grad_norm": 0.10922175645828247,
|
|
"learning_rate": 1.2653901987131074e-05,
|
|
"loss": 0.0141,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.21651389101349683,
|
|
"grad_norm": 0.14365769922733307,
|
|
"learning_rate": 1.262449394285005e-05,
|
|
"loss": 0.0115,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.21698457338526528,
|
|
"grad_norm": 0.12143559008836746,
|
|
"learning_rate": 1.2595061501745556e-05,
|
|
"loss": 0.0168,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 0.21745525575703376,
|
|
"grad_norm": 0.07886059582233429,
|
|
"learning_rate": 1.2565604937416267e-05,
|
|
"loss": 0.0107,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.21792593812880223,
|
|
"grad_norm": 0.0770447701215744,
|
|
"learning_rate": 1.2536124523685114e-05,
|
|
"loss": 0.011,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 0.2183966205005707,
|
|
"grad_norm": 0.13335862755775452,
|
|
"learning_rate": 1.2506620534596711e-05,
|
|
"loss": 0.0131,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.2188673028723392,
|
|
"grad_norm": 0.11177244782447815,
|
|
"learning_rate": 1.247709324441483e-05,
|
|
"loss": 0.0099,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.21933798524410764,
|
|
"grad_norm": 0.1267925202846527,
|
|
"learning_rate": 1.2447542927619857e-05,
|
|
"loss": 0.0124,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.2198086676158761,
|
|
"grad_norm": 0.11596515029668808,
|
|
"learning_rate": 1.2417969858906214e-05,
|
|
"loss": 0.0118,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 0.2202793499876446,
|
|
"grad_norm": 0.10480307042598724,
|
|
"learning_rate": 1.2388374313179828e-05,
|
|
"loss": 0.0118,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.22075003235941307,
|
|
"grad_norm": 0.11253635585308075,
|
|
"learning_rate": 1.2358756565555563e-05,
|
|
"loss": 0.0107,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 0.22122071473118152,
|
|
"grad_norm": 0.12209013849496841,
|
|
"learning_rate": 1.2329116891354677e-05,
|
|
"loss": 0.0111,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.22169139710295,
|
|
"grad_norm": 0.11227235943078995,
|
|
"learning_rate": 1.2299455566102248e-05,
|
|
"loss": 0.0112,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 0.22216207947471847,
|
|
"grad_norm": 0.13081388175487518,
|
|
"learning_rate": 1.2269772865524612e-05,
|
|
"loss": 0.0099,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.22263276184648695,
|
|
"grad_norm": 0.13076095283031464,
|
|
"learning_rate": 1.2240069065546823e-05,
|
|
"loss": 0.0144,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 0.22310344421825543,
|
|
"grad_norm": 0.0984998419880867,
|
|
"learning_rate": 1.2210344442290054e-05,
|
|
"loss": 0.0113,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.22357412659002388,
|
|
"grad_norm": 0.17332613468170166,
|
|
"learning_rate": 1.2180599272069058e-05,
|
|
"loss": 0.0119,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.22404480896179235,
|
|
"grad_norm": 0.12893901765346527,
|
|
"learning_rate": 1.215083383138958e-05,
|
|
"loss": 0.0113,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.22451549133356083,
|
|
"grad_norm": 0.11272910982370377,
|
|
"learning_rate": 1.2121048396945807e-05,
|
|
"loss": 0.0144,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 0.2249861737053293,
|
|
"grad_norm": 0.09054556488990784,
|
|
"learning_rate": 1.2091243245617774e-05,
|
|
"loss": 0.0147,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.22545685607709778,
|
|
"grad_norm": 0.11335314065217972,
|
|
"learning_rate": 1.2061418654468808e-05,
|
|
"loss": 0.0117,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 0.22592753844886623,
|
|
"grad_norm": 0.10343427211046219,
|
|
"learning_rate": 1.203157490074294e-05,
|
|
"loss": 0.0113,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.2263982208206347,
|
|
"grad_norm": 0.1006130576133728,
|
|
"learning_rate": 1.2001712261862335e-05,
|
|
"loss": 0.016,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 0.2268689031924032,
|
|
"grad_norm": 0.14608773589134216,
|
|
"learning_rate": 1.1971831015424713e-05,
|
|
"loss": 0.0141,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.22733958556417166,
|
|
"grad_norm": 0.09634409844875336,
|
|
"learning_rate": 1.194193143920076e-05,
|
|
"loss": 0.011,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 0.22781026793594014,
|
|
"grad_norm": 0.11604062467813492,
|
|
"learning_rate": 1.1912013811131562e-05,
|
|
"loss": 0.0122,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.2282809503077086,
|
|
"grad_norm": 0.1343134641647339,
|
|
"learning_rate": 1.1882078409326003e-05,
|
|
"loss": 0.0101,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.22875163267947707,
|
|
"grad_norm": 0.11303374171257019,
|
|
"learning_rate": 1.1852125512058194e-05,
|
|
"loss": 0.0148,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.22922231505124555,
|
|
"grad_norm": 0.09161823242902756,
|
|
"learning_rate": 1.1822155397764873e-05,
|
|
"loss": 0.0131,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 0.22969299742301402,
|
|
"grad_norm": 0.10220952332019806,
|
|
"learning_rate": 1.179216834504284e-05,
|
|
"loss": 0.0125,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.2301636797947825,
|
|
"grad_norm": 0.08977775275707245,
|
|
"learning_rate": 1.1762164632646334e-05,
|
|
"loss": 0.0117,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 0.23063436216655095,
|
|
"grad_norm": 0.08586223423480988,
|
|
"learning_rate": 1.1732144539484467e-05,
|
|
"loss": 0.0098,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.23110504453831943,
|
|
"grad_norm": 0.11193137615919113,
|
|
"learning_rate": 1.1702108344618627e-05,
|
|
"loss": 0.0099,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 0.2315757269100879,
|
|
"grad_norm": 0.17393282055854797,
|
|
"learning_rate": 1.1672056327259876e-05,
|
|
"loss": 0.0128,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.23204640928185638,
|
|
"grad_norm": 0.10275278985500336,
|
|
"learning_rate": 1.1641988766766359e-05,
|
|
"loss": 0.0081,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 0.23251709165362483,
|
|
"grad_norm": 0.11751987040042877,
|
|
"learning_rate": 1.1611905942640707e-05,
|
|
"loss": 0.0122,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.2329877740253933,
|
|
"grad_norm": 0.06911163777112961,
|
|
"learning_rate": 1.1581808134527443e-05,
|
|
"loss": 0.0112,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.23345845639716178,
|
|
"grad_norm": 0.10532654821872711,
|
|
"learning_rate": 1.1551695622210377e-05,
|
|
"loss": 0.0136,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.23392913876893026,
|
|
"grad_norm": 0.11161577701568604,
|
|
"learning_rate": 1.1521568685610003e-05,
|
|
"loss": 0.0116,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 0.23439982114069874,
|
|
"grad_norm": 0.1475476622581482,
|
|
"learning_rate": 1.1491427604780898e-05,
|
|
"loss": 0.0105,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.2348705035124672,
|
|
"grad_norm": 0.11789501458406448,
|
|
"learning_rate": 1.1461272659909137e-05,
|
|
"loss": 0.013,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 0.23534118588423567,
|
|
"grad_norm": 0.21765132248401642,
|
|
"learning_rate": 1.1431104131309654e-05,
|
|
"loss": 0.0153,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.23581186825600414,
|
|
"grad_norm": 0.14552538096904755,
|
|
"learning_rate": 1.1400922299423663e-05,
|
|
"loss": 0.0142,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 0.23628255062777262,
|
|
"grad_norm": 0.10701721161603928,
|
|
"learning_rate": 1.1370727444816045e-05,
|
|
"loss": 0.0113,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.2367532329995411,
|
|
"grad_norm": 0.13691140711307526,
|
|
"learning_rate": 1.1340519848172735e-05,
|
|
"loss": 0.0156,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 0.23722391537130955,
|
|
"grad_norm": 0.19442430138587952,
|
|
"learning_rate": 1.1310299790298118e-05,
|
|
"loss": 0.0122,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.23769459774307802,
|
|
"grad_norm": 0.1421356052160263,
|
|
"learning_rate": 1.1280067552112408e-05,
|
|
"loss": 0.0111,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.2381652801148465,
|
|
"grad_norm": 0.1671832799911499,
|
|
"learning_rate": 1.124982341464906e-05,
|
|
"loss": 0.0156,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.23863596248661498,
|
|
"grad_norm": 0.11938446015119553,
|
|
"learning_rate": 1.1219567659052126e-05,
|
|
"loss": 0.0108,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 0.23910664485838345,
|
|
"grad_norm": 0.11811240017414093,
|
|
"learning_rate": 1.118930056657367e-05,
|
|
"loss": 0.0095,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.2395773272301519,
|
|
"grad_norm": 0.11781369149684906,
|
|
"learning_rate": 1.115902241857114e-05,
|
|
"loss": 0.0126,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 0.24004800960192038,
|
|
"grad_norm": 0.08257889002561569,
|
|
"learning_rate": 1.1128733496504751e-05,
|
|
"loss": 0.0127,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.24051869197368886,
|
|
"grad_norm": 0.11438579857349396,
|
|
"learning_rate": 1.1098434081934871e-05,
|
|
"loss": 0.0145,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 0.24098937434545734,
|
|
"grad_norm": 0.1617477536201477,
|
|
"learning_rate": 1.1068124456519402e-05,
|
|
"loss": 0.0126,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.24146005671722579,
|
|
"grad_norm": 0.09215593338012695,
|
|
"learning_rate": 1.1037804902011175e-05,
|
|
"loss": 0.0108,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 0.24193073908899426,
|
|
"grad_norm": 0.13177545368671417,
|
|
"learning_rate": 1.1007475700255313e-05,
|
|
"loss": 0.0143,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.24240142146076274,
|
|
"grad_norm": 0.09815867245197296,
|
|
"learning_rate": 1.0977137133186613e-05,
|
|
"loss": 0.0101,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.24287210383253122,
|
|
"grad_norm": 0.15604525804519653,
|
|
"learning_rate": 1.094678948282694e-05,
|
|
"loss": 0.0131,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.2433427862042997,
|
|
"grad_norm": 0.12083793431520462,
|
|
"learning_rate": 1.0916433031282592e-05,
|
|
"loss": 0.0113,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 0.24381346857606814,
|
|
"grad_norm": 0.1081007793545723,
|
|
"learning_rate": 1.0886068060741676e-05,
|
|
"loss": 0.0106,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.24428415094783662,
|
|
"grad_norm": 0.10474909096956253,
|
|
"learning_rate": 1.0855694853471499e-05,
|
|
"loss": 0.0123,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 0.2447548333196051,
|
|
"grad_norm": 0.10129079222679138,
|
|
"learning_rate": 1.0825313691815928e-05,
|
|
"loss": 0.011,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.24522551569137357,
|
|
"grad_norm": 0.15255433320999146,
|
|
"learning_rate": 1.0794924858192779e-05,
|
|
"loss": 0.01,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 0.24569619806314205,
|
|
"grad_norm": 0.12974129617214203,
|
|
"learning_rate": 1.0764528635091179e-05,
|
|
"loss": 0.0112,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.2461668804349105,
|
|
"grad_norm": 0.14340999722480774,
|
|
"learning_rate": 1.0734125305068943e-05,
|
|
"loss": 0.0114,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 0.24663756280667898,
|
|
"grad_norm": 0.11762043833732605,
|
|
"learning_rate": 1.0703715150749967e-05,
|
|
"loss": 0.0116,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.24710824517844746,
|
|
"grad_norm": 0.10286321491003036,
|
|
"learning_rate": 1.0673298454821567e-05,
|
|
"loss": 0.0097,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.24757892755021593,
|
|
"grad_norm": 0.09568856656551361,
|
|
"learning_rate": 1.0642875500031878e-05,
|
|
"loss": 0.0123,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.2480496099219844,
|
|
"grad_norm": 0.10438144207000732,
|
|
"learning_rate": 1.0612446569187214e-05,
|
|
"loss": 0.0109,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 0.24852029229375286,
|
|
"grad_norm": 0.12573927640914917,
|
|
"learning_rate": 1.058201194514944e-05,
|
|
"loss": 0.0139,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 0.24899097466552134,
|
|
"grad_norm": 0.093235082924366,
|
|
"learning_rate": 1.0551571910833344e-05,
|
|
"loss": 0.0131,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 0.2494616570372898,
|
|
"grad_norm": 0.11204102635383606,
|
|
"learning_rate": 1.0521126749204009e-05,
|
|
"loss": 0.0135,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.2499323394090583,
|
|
"grad_norm": 0.3277778625488281,
|
|
"learning_rate": 1.0490676743274181e-05,
|
|
"loss": 0.0106,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 0.25040302178082674,
|
|
"grad_norm": 0.1804187297821045,
|
|
"learning_rate": 1.0460222176101635e-05,
|
|
"loss": 0.0123,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 0.25087370415259524,
|
|
"grad_norm": 0.11421903222799301,
|
|
"learning_rate": 1.0429763330786546e-05,
|
|
"loss": 0.0096,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 0.2513443865243637,
|
|
"grad_norm": 0.0876801386475563,
|
|
"learning_rate": 1.0399300490468862e-05,
|
|
"loss": 0.011,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 0.25181506889613214,
|
|
"grad_norm": 0.10158374160528183,
|
|
"learning_rate": 1.0368833938325667e-05,
|
|
"loss": 0.0106,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 0.25228575126790065,
|
|
"grad_norm": 0.17920152842998505,
|
|
"learning_rate": 1.0338363957568544e-05,
|
|
"loss": 0.0123,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 0.2527564336396691,
|
|
"grad_norm": 0.13499869406223297,
|
|
"learning_rate": 1.030789083144095e-05,
|
|
"loss": 0.0105,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 0.2532271160114376,
|
|
"grad_norm": 0.09289251267910004,
|
|
"learning_rate": 1.027741484321559e-05,
|
|
"loss": 0.0114,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 0.25369779838320605,
|
|
"grad_norm": 0.0807180181145668,
|
|
"learning_rate": 1.024693627619176e-05,
|
|
"loss": 0.0119,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 0.2541684807549745,
|
|
"grad_norm": 0.10101146250963211,
|
|
"learning_rate": 1.0216455413692738e-05,
|
|
"loss": 0.0118,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.254639163126743,
|
|
"grad_norm": 0.1171114444732666,
|
|
"learning_rate": 1.0185972539063139e-05,
|
|
"loss": 0.0107,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 0.25510984549851146,
|
|
"grad_norm": 0.07857057452201843,
|
|
"learning_rate": 1.0155487935666277e-05,
|
|
"loss": 0.0109,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 0.25558052787027996,
|
|
"grad_norm": 0.08648790419101715,
|
|
"learning_rate": 1.0125001886881543e-05,
|
|
"loss": 0.0108,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 0.2560512102420484,
|
|
"grad_norm": 0.13399982452392578,
|
|
"learning_rate": 1.0094514676101759e-05,
|
|
"loss": 0.0119,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 0.25652189261381686,
|
|
"grad_norm": 0.1115146353840828,
|
|
"learning_rate": 1.0064026586730553e-05,
|
|
"loss": 0.0114,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 0.25699257498558536,
|
|
"grad_norm": 0.10460494458675385,
|
|
"learning_rate": 1.0033537902179716e-05,
|
|
"loss": 0.0118,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 0.2574632573573538,
|
|
"grad_norm": 0.09911986440420151,
|
|
"learning_rate": 1.0003048905866577e-05,
|
|
"loss": 0.0126,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 0.2579339397291223,
|
|
"grad_norm": 0.1489085555076599,
|
|
"learning_rate": 9.972559881211353e-06,
|
|
"loss": 0.0113,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 0.25840462210089077,
|
|
"grad_norm": 0.12387832999229431,
|
|
"learning_rate": 9.942071111634538e-06,
|
|
"loss": 0.0147,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 0.2588753044726592,
|
|
"grad_norm": 0.12749050557613373,
|
|
"learning_rate": 9.91158288055425e-06,
|
|
"loss": 0.011,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.2593459868444277,
|
|
"grad_norm": 0.11786317825317383,
|
|
"learning_rate": 9.88109547138359e-06,
|
|
"loss": 0.012,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 0.25981666921619617,
|
|
"grad_norm": 0.11137409508228302,
|
|
"learning_rate": 9.850609167528038e-06,
|
|
"loss": 0.0116,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 0.2602873515879647,
|
|
"grad_norm": 0.11208612471818924,
|
|
"learning_rate": 9.820124252382784e-06,
|
|
"loss": 0.0111,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 0.2607580339597331,
|
|
"grad_norm": 0.12384283542633057,
|
|
"learning_rate": 9.789641009330113e-06,
|
|
"loss": 0.0118,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 0.2612287163315016,
|
|
"grad_norm": 0.14745421707630157,
|
|
"learning_rate": 9.759159721736772e-06,
|
|
"loss": 0.0105,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 0.2616993987032701,
|
|
"grad_norm": 0.04955082759261131,
|
|
"learning_rate": 9.72868067295132e-06,
|
|
"loss": 0.0118,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 0.26217008107503853,
|
|
"grad_norm": 0.11502547562122345,
|
|
"learning_rate": 9.698204146301513e-06,
|
|
"loss": 0.0127,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 0.26264076344680704,
|
|
"grad_norm": 0.0981709435582161,
|
|
"learning_rate": 9.667730425091666e-06,
|
|
"loss": 0.0104,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 0.2631114458185755,
|
|
"grad_norm": 0.12210562825202942,
|
|
"learning_rate": 9.637259792599997e-06,
|
|
"loss": 0.0135,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 0.26358212819034393,
|
|
"grad_norm": 0.1253737211227417,
|
|
"learning_rate": 9.606792532076028e-06,
|
|
"loss": 0.0113,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.26405281056211244,
|
|
"grad_norm": 0.11497100442647934,
|
|
"learning_rate": 9.576328926737936e-06,
|
|
"loss": 0.0137,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 0.2645234929338809,
|
|
"grad_norm": 0.123923659324646,
|
|
"learning_rate": 9.545869259769904e-06,
|
|
"loss": 0.011,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 0.26499417530564934,
|
|
"grad_norm": 0.10925687849521637,
|
|
"learning_rate": 9.515413814319524e-06,
|
|
"loss": 0.0136,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 0.26546485767741784,
|
|
"grad_norm": 0.09571066498756409,
|
|
"learning_rate": 9.484962873495137e-06,
|
|
"loss": 0.0122,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 0.2659355400491863,
|
|
"grad_norm": 0.1034766286611557,
|
|
"learning_rate": 9.454516720363203e-06,
|
|
"loss": 0.0109,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 0.2664062224209548,
|
|
"grad_norm": 0.1284029185771942,
|
|
"learning_rate": 9.424075637945692e-06,
|
|
"loss": 0.0119,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 0.26687690479272325,
|
|
"grad_norm": 0.15385793149471283,
|
|
"learning_rate": 9.393639909217423e-06,
|
|
"loss": 0.0119,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 0.2673475871644917,
|
|
"grad_norm": 0.06712723523378372,
|
|
"learning_rate": 9.363209817103455e-06,
|
|
"loss": 0.0109,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 0.2678182695362602,
|
|
"grad_norm": 0.12401560693979263,
|
|
"learning_rate": 9.332785644476452e-06,
|
|
"loss": 0.0107,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 0.26828895190802865,
|
|
"grad_norm": 0.08679396659135818,
|
|
"learning_rate": 9.302367674154043e-06,
|
|
"loss": 0.009,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.26875963427979715,
|
|
"grad_norm": 0.13442565500736237,
|
|
"learning_rate": 9.271956188896211e-06,
|
|
"loss": 0.011,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 0.2692303166515656,
|
|
"grad_norm": 0.07840581238269806,
|
|
"learning_rate": 9.241551471402654e-06,
|
|
"loss": 0.0112,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 0.26970099902333405,
|
|
"grad_norm": 0.10214755684137344,
|
|
"learning_rate": 9.211153804310146e-06,
|
|
"loss": 0.0093,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 0.27017168139510256,
|
|
"grad_norm": 0.13749827444553375,
|
|
"learning_rate": 9.180763470189938e-06,
|
|
"loss": 0.0132,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 0.270642363766871,
|
|
"grad_norm": 0.16672512888908386,
|
|
"learning_rate": 9.15038075154511e-06,
|
|
"loss": 0.0107,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 0.2711130461386395,
|
|
"grad_norm": 0.14287547767162323,
|
|
"learning_rate": 9.120005930807939e-06,
|
|
"loss": 0.0115,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 0.27158372851040796,
|
|
"grad_norm": 0.0881832018494606,
|
|
"learning_rate": 9.0896392903373e-06,
|
|
"loss": 0.0098,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 0.2720544108821764,
|
|
"grad_norm": 0.1583278775215149,
|
|
"learning_rate": 9.059281112416017e-06,
|
|
"loss": 0.0115,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 0.2725250932539449,
|
|
"grad_norm": 0.09432189911603928,
|
|
"learning_rate": 9.028931679248249e-06,
|
|
"loss": 0.0095,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 0.27299577562571337,
|
|
"grad_norm": 0.11643416434526443,
|
|
"learning_rate": 8.998591272956866e-06,
|
|
"loss": 0.0104,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.27346645799748187,
|
|
"grad_norm": 0.10580825060606003,
|
|
"learning_rate": 8.96826017558083e-06,
|
|
"loss": 0.0118,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 0.2739371403692503,
|
|
"grad_norm": 0.08177099376916885,
|
|
"learning_rate": 8.937938669072557e-06,
|
|
"loss": 0.0084,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 0.27440782274101877,
|
|
"grad_norm": 0.10689327120780945,
|
|
"learning_rate": 8.90762703529532e-06,
|
|
"loss": 0.0118,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 0.2748785051127873,
|
|
"grad_norm": 0.0654144212603569,
|
|
"learning_rate": 8.877325556020615e-06,
|
|
"loss": 0.0111,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 0.2753491874845557,
|
|
"grad_norm": 0.1506287157535553,
|
|
"learning_rate": 8.847034512925536e-06,
|
|
"loss": 0.0143,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 0.27581986985632423,
|
|
"grad_norm": 0.08336376398801804,
|
|
"learning_rate": 8.816754187590175e-06,
|
|
"loss": 0.012,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 0.2762905522280927,
|
|
"grad_norm": 0.10075052827596664,
|
|
"learning_rate": 8.786484861494984e-06,
|
|
"loss": 0.0103,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 0.27676123459986113,
|
|
"grad_norm": 0.07631979137659073,
|
|
"learning_rate": 8.756226816018172e-06,
|
|
"loss": 0.0084,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 0.27723191697162963,
|
|
"grad_norm": 0.07799085974693298,
|
|
"learning_rate": 8.725980332433089e-06,
|
|
"loss": 0.0098,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 0.2777025993433981,
|
|
"grad_norm": 0.11320314556360245,
|
|
"learning_rate": 8.695745691905599e-06,
|
|
"loss": 0.0136,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.2781732817151666,
|
|
"grad_norm": 0.09825349599123001,
|
|
"learning_rate": 8.665523175491484e-06,
|
|
"loss": 0.0106,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 0.27864396408693504,
|
|
"grad_norm": 0.116277776658535,
|
|
"learning_rate": 8.635313064133817e-06,
|
|
"loss": 0.0125,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 0.2791146464587035,
|
|
"grad_norm": 0.1149587482213974,
|
|
"learning_rate": 8.605115638660356e-06,
|
|
"loss": 0.011,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 0.279585328830472,
|
|
"grad_norm": 0.10295125842094421,
|
|
"learning_rate": 8.57493117978094e-06,
|
|
"loss": 0.0107,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 0.28005601120224044,
|
|
"grad_norm": 0.13894349336624146,
|
|
"learning_rate": 8.544759968084863e-06,
|
|
"loss": 0.0112,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 0.28052669357400895,
|
|
"grad_norm": 0.09977217763662338,
|
|
"learning_rate": 8.51460228403828e-06,
|
|
"loss": 0.0097,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 0.2809973759457774,
|
|
"grad_norm": 0.14578835666179657,
|
|
"learning_rate": 8.484458407981601e-06,
|
|
"loss": 0.0126,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 0.28146805831754584,
|
|
"grad_norm": 0.16326677799224854,
|
|
"learning_rate": 8.454328620126871e-06,
|
|
"loss": 0.0113,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 0.28193874068931435,
|
|
"grad_norm": 0.1076991930603981,
|
|
"learning_rate": 8.424213200555171e-06,
|
|
"loss": 0.0114,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 0.2824094230610828,
|
|
"grad_norm": 0.07674437016248703,
|
|
"learning_rate": 8.394112429214032e-06,
|
|
"loss": 0.0098,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.2828801054328513,
|
|
"grad_norm": 0.08356063067913055,
|
|
"learning_rate": 8.364026585914802e-06,
|
|
"loss": 0.0107,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 0.28335078780461975,
|
|
"grad_norm": 0.11439137160778046,
|
|
"learning_rate": 8.33395595033007e-06,
|
|
"loss": 0.0101,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 0.2838214701763882,
|
|
"grad_norm": 0.10765797644853592,
|
|
"learning_rate": 8.303900801991052e-06,
|
|
"loss": 0.0119,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 0.2842921525481567,
|
|
"grad_norm": 0.11504880338907242,
|
|
"learning_rate": 8.273861420285e-06,
|
|
"loss": 0.0104,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 0.28476283491992516,
|
|
"grad_norm": 0.09300310909748077,
|
|
"learning_rate": 8.243838084452603e-06,
|
|
"loss": 0.0087,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 0.2852335172916936,
|
|
"grad_norm": 0.0914502665400505,
|
|
"learning_rate": 8.213831073585385e-06,
|
|
"loss": 0.0108,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 0.2857041996634621,
|
|
"grad_norm": 0.09434883296489716,
|
|
"learning_rate": 8.183840666623123e-06,
|
|
"loss": 0.0124,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 0.28617488203523056,
|
|
"grad_norm": 0.0897921472787857,
|
|
"learning_rate": 8.153867142351242e-06,
|
|
"loss": 0.0107,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 0.28664556440699906,
|
|
"grad_norm": 0.1389276534318924,
|
|
"learning_rate": 8.123910779398233e-06,
|
|
"loss": 0.0106,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 0.2871162467787675,
|
|
"grad_norm": 0.14727766811847687,
|
|
"learning_rate": 8.093971856233051e-06,
|
|
"loss": 0.0104,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.28758692915053596,
|
|
"grad_norm": 0.11871699243783951,
|
|
"learning_rate": 8.064050651162546e-06,
|
|
"loss": 0.0119,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 0.28805761152230447,
|
|
"grad_norm": 0.12036462873220444,
|
|
"learning_rate": 8.034147442328852e-06,
|
|
"loss": 0.0084,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 0.2885282938940729,
|
|
"grad_norm": 0.08591372519731522,
|
|
"learning_rate": 8.004262507706819e-06,
|
|
"loss": 0.0115,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 0.2889989762658414,
|
|
"grad_norm": 0.07248928397893906,
|
|
"learning_rate": 7.97439612510142e-06,
|
|
"loss": 0.0107,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 0.2894696586376099,
|
|
"grad_norm": 0.10148704051971436,
|
|
"learning_rate": 7.944548572145178e-06,
|
|
"loss": 0.0118,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 0.2899403410093783,
|
|
"grad_norm": 0.13358736038208008,
|
|
"learning_rate": 7.914720126295572e-06,
|
|
"loss": 0.0114,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 0.2904110233811468,
|
|
"grad_norm": 0.08479638397693634,
|
|
"learning_rate": 7.884911064832466e-06,
|
|
"loss": 0.0101,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 0.2908817057529153,
|
|
"grad_norm": 0.10710818320512772,
|
|
"learning_rate": 7.855121664855535e-06,
|
|
"loss": 0.0112,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 0.2913523881246838,
|
|
"grad_norm": 0.09368746727705002,
|
|
"learning_rate": 7.825352203281682e-06,
|
|
"loss": 0.01,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 0.29182307049645223,
|
|
"grad_norm": 0.15866398811340332,
|
|
"learning_rate": 7.79560295684246e-06,
|
|
"loss": 0.0101,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.2922937528682207,
|
|
"grad_norm": 0.10704703629016876,
|
|
"learning_rate": 7.765874202081516e-06,
|
|
"loss": 0.0107,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 0.2927644352399892,
|
|
"grad_norm": 0.14181233942508698,
|
|
"learning_rate": 7.736166215352004e-06,
|
|
"loss": 0.0103,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 0.29323511761175763,
|
|
"grad_norm": 0.1760113686323166,
|
|
"learning_rate": 7.706479272814024e-06,
|
|
"loss": 0.0121,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 0.29370579998352614,
|
|
"grad_norm": 0.09428353607654572,
|
|
"learning_rate": 7.67681365043205e-06,
|
|
"loss": 0.0113,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 0.2941764823552946,
|
|
"grad_norm": 0.09111696481704712,
|
|
"learning_rate": 7.64716962397237e-06,
|
|
"loss": 0.0085,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 0.29464716472706304,
|
|
"grad_norm": 0.11573656648397446,
|
|
"learning_rate": 7.617547469000524e-06,
|
|
"loss": 0.0103,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 0.29511784709883154,
|
|
"grad_norm": 0.05127233639359474,
|
|
"learning_rate": 7.587947460878731e-06,
|
|
"loss": 0.0091,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 0.2955885294706,
|
|
"grad_norm": 0.12428274005651474,
|
|
"learning_rate": 7.5583698747633394e-06,
|
|
"loss": 0.0107,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 0.2960592118423685,
|
|
"grad_norm": 0.09793796390295029,
|
|
"learning_rate": 7.528814985602273e-06,
|
|
"loss": 0.0089,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 0.29652989421413695,
|
|
"grad_norm": 0.10944128036499023,
|
|
"learning_rate": 7.49928306813246e-06,
|
|
"loss": 0.0126,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.2970005765859054,
|
|
"grad_norm": 0.149366557598114,
|
|
"learning_rate": 7.4697743968772906e-06,
|
|
"loss": 0.0108,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 0.2974712589576739,
|
|
"grad_norm": 0.1714988797903061,
|
|
"learning_rate": 7.440289246144067e-06,
|
|
"loss": 0.0114,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 0.29794194132944235,
|
|
"grad_norm": 0.08940602838993073,
|
|
"learning_rate": 7.410827890021444e-06,
|
|
"loss": 0.0097,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 0.29841262370121086,
|
|
"grad_norm": 0.10650070011615753,
|
|
"learning_rate": 7.381390602376882e-06,
|
|
"loss": 0.0121,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 0.2988833060729793,
|
|
"grad_norm": 0.28164905309677124,
|
|
"learning_rate": 7.351977656854118e-06,
|
|
"loss": 0.013,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 0.29935398844474775,
|
|
"grad_norm": 0.09398011118173599,
|
|
"learning_rate": 7.322589326870597e-06,
|
|
"loss": 0.0092,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 0.29982467081651626,
|
|
"grad_norm": 0.0998765379190445,
|
|
"learning_rate": 7.293225885614948e-06,
|
|
"loss": 0.0106,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 0.3002953531882847,
|
|
"grad_norm": 0.09863186627626419,
|
|
"learning_rate": 7.263887606044437e-06,
|
|
"loss": 0.0089,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 0.3007660355600532,
|
|
"grad_norm": 0.14850477874279022,
|
|
"learning_rate": 7.234574760882431e-06,
|
|
"loss": 0.0108,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 0.30123671793182166,
|
|
"grad_norm": 0.07693659514188766,
|
|
"learning_rate": 7.205287622615866e-06,
|
|
"loss": 0.0094,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.3017074003035901,
|
|
"grad_norm": 0.11360711604356766,
|
|
"learning_rate": 7.176026463492711e-06,
|
|
"loss": 0.0082,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 0.3021780826753586,
|
|
"grad_norm": 0.09131176024675369,
|
|
"learning_rate": 7.146791555519431e-06,
|
|
"loss": 0.0111,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 0.30264876504712707,
|
|
"grad_norm": 0.0828235074877739,
|
|
"learning_rate": 7.117583170458478e-06,
|
|
"loss": 0.0082,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 0.30311944741889557,
|
|
"grad_norm": 0.1633165031671524,
|
|
"learning_rate": 7.0884015798257365e-06,
|
|
"loss": 0.0128,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 0.303590129790664,
|
|
"grad_norm": 0.07436200231313705,
|
|
"learning_rate": 7.059247054888025e-06,
|
|
"loss": 0.0105,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 0.30406081216243247,
|
|
"grad_norm": 0.15634426474571228,
|
|
"learning_rate": 7.030119866660565e-06,
|
|
"loss": 0.0109,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 0.304531494534201,
|
|
"grad_norm": 0.06774807721376419,
|
|
"learning_rate": 7.001020285904454e-06,
|
|
"loss": 0.0093,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 0.3050021769059694,
|
|
"grad_norm": 0.08545653522014618,
|
|
"learning_rate": 6.971948583124159e-06,
|
|
"loss": 0.0099,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 0.3054728592777379,
|
|
"grad_norm": 0.09483916312456131,
|
|
"learning_rate": 6.9429050285650015e-06,
|
|
"loss": 0.0093,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 0.3059435416495064,
|
|
"grad_norm": 0.08985210210084915,
|
|
"learning_rate": 6.913889892210631e-06,
|
|
"loss": 0.009,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.30641422402127483,
|
|
"grad_norm": 0.12550567090511322,
|
|
"learning_rate": 6.884903443780541e-06,
|
|
"loss": 0.0097,
|
|
"step": 6510
|
|
},
|
|
{
|
|
"epoch": 0.30688490639304333,
|
|
"grad_norm": 0.13989980518817902,
|
|
"learning_rate": 6.8559459527275426e-06,
|
|
"loss": 0.0118,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 0.3073555887648118,
|
|
"grad_norm": 0.07210268825292587,
|
|
"learning_rate": 6.827017688235255e-06,
|
|
"loss": 0.01,
|
|
"step": 6530
|
|
},
|
|
{
|
|
"epoch": 0.30782627113658023,
|
|
"grad_norm": 0.0844946876168251,
|
|
"learning_rate": 6.798118919215625e-06,
|
|
"loss": 0.0115,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 0.30829695350834874,
|
|
"grad_norm": 0.09124928712844849,
|
|
"learning_rate": 6.769249914306408e-06,
|
|
"loss": 0.0119,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 0.3087676358801172,
|
|
"grad_norm": 0.08468437939882278,
|
|
"learning_rate": 6.740410941868678e-06,
|
|
"loss": 0.0096,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 0.3092383182518857,
|
|
"grad_norm": 0.10643789917230606,
|
|
"learning_rate": 6.711602269984339e-06,
|
|
"loss": 0.0115,
|
|
"step": 6570
|
|
},
|
|
{
|
|
"epoch": 0.30970900062365414,
|
|
"grad_norm": 0.12056536972522736,
|
|
"learning_rate": 6.6828241664536145e-06,
|
|
"loss": 0.0087,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 0.3101796829954226,
|
|
"grad_norm": 0.1426694393157959,
|
|
"learning_rate": 6.65407689879258e-06,
|
|
"loss": 0.0092,
|
|
"step": 6590
|
|
},
|
|
{
|
|
"epoch": 0.3106503653671911,
|
|
"grad_norm": 0.13670802116394043,
|
|
"learning_rate": 6.625360734230663e-06,
|
|
"loss": 0.0097,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.31112104773895954,
|
|
"grad_norm": 0.06073112413287163,
|
|
"learning_rate": 6.596675939708166e-06,
|
|
"loss": 0.0118,
|
|
"step": 6610
|
|
},
|
|
{
|
|
"epoch": 0.31159173011072805,
|
|
"grad_norm": 0.10629275441169739,
|
|
"learning_rate": 6.5680227818737695e-06,
|
|
"loss": 0.0091,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 0.3120624124824965,
|
|
"grad_norm": 0.11279012262821198,
|
|
"learning_rate": 6.539401527082083e-06,
|
|
"loss": 0.0108,
|
|
"step": 6630
|
|
},
|
|
{
|
|
"epoch": 0.31253309485426495,
|
|
"grad_norm": 0.15536552667617798,
|
|
"learning_rate": 6.510812441391131e-06,
|
|
"loss": 0.0116,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 0.31300377722603345,
|
|
"grad_norm": 0.08353878557682037,
|
|
"learning_rate": 6.4822557905599156e-06,
|
|
"loss": 0.0079,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 0.3134744595978019,
|
|
"grad_norm": 0.11909312754869461,
|
|
"learning_rate": 6.4537318400459295e-06,
|
|
"loss": 0.0095,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 0.3139451419695704,
|
|
"grad_norm": 0.09018908441066742,
|
|
"learning_rate": 6.425240855002674e-06,
|
|
"loss": 0.0089,
|
|
"step": 6670
|
|
},
|
|
{
|
|
"epoch": 0.31441582434133886,
|
|
"grad_norm": 0.17807909846305847,
|
|
"learning_rate": 6.396783100277224e-06,
|
|
"loss": 0.0114,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 0.3148865067131073,
|
|
"grad_norm": 0.09402504563331604,
|
|
"learning_rate": 6.368358840407754e-06,
|
|
"loss": 0.0078,
|
|
"step": 6690
|
|
},
|
|
{
|
|
"epoch": 0.3153571890848758,
|
|
"grad_norm": 0.09198101609945297,
|
|
"learning_rate": 6.339968339621056e-06,
|
|
"loss": 0.0093,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.31582787145664426,
|
|
"grad_norm": 0.13976627588272095,
|
|
"learning_rate": 6.311611861830129e-06,
|
|
"loss": 0.0133,
|
|
"step": 6710
|
|
},
|
|
{
|
|
"epoch": 0.31629855382841277,
|
|
"grad_norm": 0.07539477199316025,
|
|
"learning_rate": 6.283289670631684e-06,
|
|
"loss": 0.0073,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 0.3167692362001812,
|
|
"grad_norm": 0.09688951075077057,
|
|
"learning_rate": 6.2550020293037095e-06,
|
|
"loss": 0.01,
|
|
"step": 6730
|
|
},
|
|
{
|
|
"epoch": 0.31723991857194966,
|
|
"grad_norm": 0.07880812883377075,
|
|
"learning_rate": 6.2267492008030395e-06,
|
|
"loss": 0.0092,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 0.31771060094371817,
|
|
"grad_norm": 0.09777586162090302,
|
|
"learning_rate": 6.198531447762875e-06,
|
|
"loss": 0.0079,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 0.3181812833154866,
|
|
"grad_norm": 0.0828159973025322,
|
|
"learning_rate": 6.1703490324903745e-06,
|
|
"loss": 0.0089,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 0.3186519656872551,
|
|
"grad_norm": 0.07249979674816132,
|
|
"learning_rate": 6.142202216964204e-06,
|
|
"loss": 0.0077,
|
|
"step": 6770
|
|
},
|
|
{
|
|
"epoch": 0.3191226480590236,
|
|
"grad_norm": 0.1451720893383026,
|
|
"learning_rate": 6.114091262832087e-06,
|
|
"loss": 0.0113,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 0.319593330430792,
|
|
"grad_norm": 0.09208575636148453,
|
|
"learning_rate": 6.0860164314084e-06,
|
|
"loss": 0.01,
|
|
"step": 6790
|
|
},
|
|
{
|
|
"epoch": 0.3200640128025605,
|
|
"grad_norm": 0.11224311590194702,
|
|
"learning_rate": 6.05797798367173e-06,
|
|
"loss": 0.0106,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.320534695174329,
|
|
"grad_norm": 0.10680809617042542,
|
|
"learning_rate": 6.029976180262431e-06,
|
|
"loss": 0.0082,
|
|
"step": 6810
|
|
},
|
|
{
|
|
"epoch": 0.3210053775460975,
|
|
"grad_norm": 0.08746019005775452,
|
|
"learning_rate": 6.0020112814802355e-06,
|
|
"loss": 0.0087,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 0.32147605991786593,
|
|
"grad_norm": 0.10770803689956665,
|
|
"learning_rate": 5.9740835472818145e-06,
|
|
"loss": 0.0143,
|
|
"step": 6830
|
|
},
|
|
{
|
|
"epoch": 0.3219467422896344,
|
|
"grad_norm": 0.1275395154953003,
|
|
"learning_rate": 5.946193237278352e-06,
|
|
"loss": 0.0104,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 0.3224174246614029,
|
|
"grad_norm": 0.12579120695590973,
|
|
"learning_rate": 5.918340610733154e-06,
|
|
"loss": 0.0111,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 0.32288810703317133,
|
|
"grad_norm": 0.15018723905086517,
|
|
"learning_rate": 5.8905259265592315e-06,
|
|
"loss": 0.0085,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 0.32335878940493984,
|
|
"grad_norm": 0.1583593636751175,
|
|
"learning_rate": 5.8627494433168756e-06,
|
|
"loss": 0.0132,
|
|
"step": 6870
|
|
},
|
|
{
|
|
"epoch": 0.3238294717767083,
|
|
"grad_norm": 0.09487713128328323,
|
|
"learning_rate": 5.835011419211285e-06,
|
|
"loss": 0.0105,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 0.32430015414847674,
|
|
"grad_norm": 0.1019512191414833,
|
|
"learning_rate": 5.807312112090129e-06,
|
|
"loss": 0.0103,
|
|
"step": 6890
|
|
},
|
|
{
|
|
"epoch": 0.32477083652024524,
|
|
"grad_norm": 0.12229369580745697,
|
|
"learning_rate": 5.779651779441192e-06,
|
|
"loss": 0.0131,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.3252415188920137,
|
|
"grad_norm": 0.16477836668491364,
|
|
"learning_rate": 5.752030678389948e-06,
|
|
"loss": 0.0097,
|
|
"step": 6910
|
|
},
|
|
{
|
|
"epoch": 0.32571220126378214,
|
|
"grad_norm": 0.09149530529975891,
|
|
"learning_rate": 5.724449065697182e-06,
|
|
"loss": 0.0087,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 0.32618288363555065,
|
|
"grad_norm": 0.11204541474580765,
|
|
"learning_rate": 5.696907197756598e-06,
|
|
"loss": 0.0107,
|
|
"step": 6930
|
|
},
|
|
{
|
|
"epoch": 0.3266535660073191,
|
|
"grad_norm": 0.10151305049657822,
|
|
"learning_rate": 5.669405330592457e-06,
|
|
"loss": 0.0084,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 0.3271242483790876,
|
|
"grad_norm": 0.11028383672237396,
|
|
"learning_rate": 5.6419437198571525e-06,
|
|
"loss": 0.0089,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 0.32759493075085605,
|
|
"grad_norm": 0.08457642793655396,
|
|
"learning_rate": 5.6145226208288875e-06,
|
|
"loss": 0.0083,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 0.3280656131226245,
|
|
"grad_norm": 0.1047384962439537,
|
|
"learning_rate": 5.587142288409262e-06,
|
|
"loss": 0.0073,
|
|
"step": 6970
|
|
},
|
|
{
|
|
"epoch": 0.328536295494393,
|
|
"grad_norm": 0.12552668154239655,
|
|
"learning_rate": 5.559802977120918e-06,
|
|
"loss": 0.0085,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 0.32900697786616145,
|
|
"grad_norm": 0.11733315140008926,
|
|
"learning_rate": 5.532504941105176e-06,
|
|
"loss": 0.0102,
|
|
"step": 6990
|
|
},
|
|
{
|
|
"epoch": 0.32947766023792996,
|
|
"grad_norm": 0.20306788384914398,
|
|
"learning_rate": 5.505248434119666e-06,
|
|
"loss": 0.0115,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.3299483426096984,
|
|
"grad_norm": 0.09611959010362625,
|
|
"learning_rate": 5.478033709535968e-06,
|
|
"loss": 0.0102,
|
|
"step": 7010
|
|
},
|
|
{
|
|
"epoch": 0.33041902498146686,
|
|
"grad_norm": 0.07470675557851791,
|
|
"learning_rate": 5.4508610203372794e-06,
|
|
"loss": 0.0086,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 0.33088970735323536,
|
|
"grad_norm": 0.16557636857032776,
|
|
"learning_rate": 5.42373061911601e-06,
|
|
"loss": 0.0098,
|
|
"step": 7030
|
|
},
|
|
{
|
|
"epoch": 0.3313603897250038,
|
|
"grad_norm": 0.08212872594594955,
|
|
"learning_rate": 5.3966427580715044e-06,
|
|
"loss": 0.0109,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 0.3318310720967723,
|
|
"grad_norm": 0.12389719486236572,
|
|
"learning_rate": 5.3695976890076375e-06,
|
|
"loss": 0.0102,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 0.33230175446854077,
|
|
"grad_norm": 0.10865834355354309,
|
|
"learning_rate": 5.3425956633305075e-06,
|
|
"loss": 0.0093,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 0.3327724368403092,
|
|
"grad_norm": 0.10937181860208511,
|
|
"learning_rate": 5.3156369320460796e-06,
|
|
"loss": 0.0092,
|
|
"step": 7070
|
|
},
|
|
{
|
|
"epoch": 0.3332431192120777,
|
|
"grad_norm": 0.056186139583587646,
|
|
"learning_rate": 5.2887217457578856e-06,
|
|
"loss": 0.0086,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 0.33371380158384617,
|
|
"grad_norm": 0.10875654220581055,
|
|
"learning_rate": 5.261850354664633e-06,
|
|
"loss": 0.0103,
|
|
"step": 7090
|
|
},
|
|
{
|
|
"epoch": 0.3341844839556147,
|
|
"grad_norm": 0.11125942319631577,
|
|
"learning_rate": 5.235023008557955e-06,
|
|
"loss": 0.0122,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 0.3346551663273831,
|
|
"grad_norm": 0.12772494554519653,
|
|
"learning_rate": 5.20823995682003e-06,
|
|
"loss": 0.009,
|
|
"step": 7110
|
|
},
|
|
{
|
|
"epoch": 0.3351258486991516,
|
|
"grad_norm": 0.09490371495485306,
|
|
"learning_rate": 5.1815014484212825e-06,
|
|
"loss": 0.009,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 0.3355965310709201,
|
|
"grad_norm": 0.16368533670902252,
|
|
"learning_rate": 5.154807731918081e-06,
|
|
"loss": 0.0113,
|
|
"step": 7130
|
|
},
|
|
{
|
|
"epoch": 0.33606721344268853,
|
|
"grad_norm": 0.10722652077674866,
|
|
"learning_rate": 5.1281590554504095e-06,
|
|
"loss": 0.0096,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 0.33653789581445703,
|
|
"grad_norm": 0.10400055348873138,
|
|
"learning_rate": 5.101555666739563e-06,
|
|
"loss": 0.0103,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 0.3370085781862255,
|
|
"grad_norm": 0.05726364627480507,
|
|
"learning_rate": 5.074997813085873e-06,
|
|
"loss": 0.0085,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 0.33747926055799393,
|
|
"grad_norm": 0.13163743913173676,
|
|
"learning_rate": 5.048485741366351e-06,
|
|
"loss": 0.0103,
|
|
"step": 7170
|
|
},
|
|
{
|
|
"epoch": 0.33794994292976244,
|
|
"grad_norm": 0.09994348138570786,
|
|
"learning_rate": 5.0220196980324545e-06,
|
|
"loss": 0.0099,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 0.3384206253015309,
|
|
"grad_norm": 0.07613382488489151,
|
|
"learning_rate": 4.995599929107758e-06,
|
|
"loss": 0.0119,
|
|
"step": 7190
|
|
},
|
|
{
|
|
"epoch": 0.3388913076732994,
|
|
"grad_norm": 0.09880557656288147,
|
|
"learning_rate": 4.9692266801856815e-06,
|
|
"loss": 0.0109,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.33936199004506784,
|
|
"grad_norm": 0.08730833232402802,
|
|
"learning_rate": 4.942900196427195e-06,
|
|
"loss": 0.0098,
|
|
"step": 7210
|
|
},
|
|
{
|
|
"epoch": 0.3398326724168363,
|
|
"grad_norm": 0.1271960735321045,
|
|
"learning_rate": 4.916620722558568e-06,
|
|
"loss": 0.0099,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 0.3403033547886048,
|
|
"grad_norm": 0.08068729937076569,
|
|
"learning_rate": 4.8903885028690454e-06,
|
|
"loss": 0.0111,
|
|
"step": 7230
|
|
},
|
|
{
|
|
"epoch": 0.34077403716037324,
|
|
"grad_norm": 0.0711289495229721,
|
|
"learning_rate": 4.864203781208632e-06,
|
|
"loss": 0.0096,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 0.34124471953214175,
|
|
"grad_norm": 0.11912761628627777,
|
|
"learning_rate": 4.838066800985786e-06,
|
|
"loss": 0.01,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 0.3417154019039102,
|
|
"grad_norm": 0.15148796141147614,
|
|
"learning_rate": 4.811977805165174e-06,
|
|
"loss": 0.0101,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 0.34218608427567865,
|
|
"grad_norm": 0.10042264312505722,
|
|
"learning_rate": 4.7859370362654045e-06,
|
|
"loss": 0.0105,
|
|
"step": 7270
|
|
},
|
|
{
|
|
"epoch": 0.34265676664744715,
|
|
"grad_norm": 0.08555499464273453,
|
|
"learning_rate": 4.75994473635678e-06,
|
|
"loss": 0.0099,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 0.3431274490192156,
|
|
"grad_norm": 0.09323468804359436,
|
|
"learning_rate": 4.7340011470590415e-06,
|
|
"loss": 0.0111,
|
|
"step": 7290
|
|
},
|
|
{
|
|
"epoch": 0.34359813139098405,
|
|
"grad_norm": 0.07832957059144974,
|
|
"learning_rate": 4.708106509539134e-06,
|
|
"loss": 0.0097,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 0.34406881376275256,
|
|
"grad_norm": 0.08861073851585388,
|
|
"learning_rate": 4.682261064508944e-06,
|
|
"loss": 0.0086,
|
|
"step": 7310
|
|
},
|
|
{
|
|
"epoch": 0.344539496134521,
|
|
"grad_norm": 0.13024084270000458,
|
|
"learning_rate": 4.656465052223079e-06,
|
|
"loss": 0.0123,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 0.3450101785062895,
|
|
"grad_norm": 0.13911134004592896,
|
|
"learning_rate": 4.630718712476628e-06,
|
|
"loss": 0.011,
|
|
"step": 7330
|
|
},
|
|
{
|
|
"epoch": 0.34548086087805796,
|
|
"grad_norm": 0.09930678457021713,
|
|
"learning_rate": 4.6050222846029315e-06,
|
|
"loss": 0.0094,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 0.3459515432498264,
|
|
"grad_norm": 0.06568389385938644,
|
|
"learning_rate": 4.5793760074713565e-06,
|
|
"loss": 0.01,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 0.3464222256215949,
|
|
"grad_norm": 0.09176962077617645,
|
|
"learning_rate": 4.553780119485093e-06,
|
|
"loss": 0.0104,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 0.34689290799336336,
|
|
"grad_norm": 0.12670232355594635,
|
|
"learning_rate": 4.528234858578894e-06,
|
|
"loss": 0.0094,
|
|
"step": 7370
|
|
},
|
|
{
|
|
"epoch": 0.34736359036513187,
|
|
"grad_norm": 0.12104298919439316,
|
|
"learning_rate": 4.502740462216919e-06,
|
|
"loss": 0.0087,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 0.3478342727369003,
|
|
"grad_norm": 0.1292145550251007,
|
|
"learning_rate": 4.477297167390487e-06,
|
|
"loss": 0.0104,
|
|
"step": 7390
|
|
},
|
|
{
|
|
"epoch": 0.34830495510866877,
|
|
"grad_norm": 0.06514370441436768,
|
|
"learning_rate": 4.451905210615889e-06,
|
|
"loss": 0.0091,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 0.3487756374804373,
|
|
"grad_norm": 0.09446771442890167,
|
|
"learning_rate": 4.426564827932185e-06,
|
|
"loss": 0.0098,
|
|
"step": 7410
|
|
},
|
|
{
|
|
"epoch": 0.3492463198522057,
|
|
"grad_norm": 0.12294322997331619,
|
|
"learning_rate": 4.401276254899014e-06,
|
|
"loss": 0.0097,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 0.3497170022239742,
|
|
"grad_norm": 0.11448011547327042,
|
|
"learning_rate": 4.3760397265943965e-06,
|
|
"loss": 0.0099,
|
|
"step": 7430
|
|
},
|
|
{
|
|
"epoch": 0.3501876845957427,
|
|
"grad_norm": 0.08247274160385132,
|
|
"learning_rate": 4.350855477612565e-06,
|
|
"loss": 0.0091,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 0.3506583669675111,
|
|
"grad_norm": 0.11805404722690582,
|
|
"learning_rate": 4.325723742061767e-06,
|
|
"loss": 0.0094,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 0.35112904933927963,
|
|
"grad_norm": 0.0908360630273819,
|
|
"learning_rate": 4.30064475356209e-06,
|
|
"loss": 0.0084,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 0.3515997317110481,
|
|
"grad_norm": 0.11973714083433151,
|
|
"learning_rate": 4.275618745243301e-06,
|
|
"loss": 0.0105,
|
|
"step": 7470
|
|
},
|
|
{
|
|
"epoch": 0.3520704140828166,
|
|
"grad_norm": 0.13092456758022308,
|
|
"learning_rate": 4.2506459497426685e-06,
|
|
"loss": 0.0103,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 0.35254109645458503,
|
|
"grad_norm": 0.10864631086587906,
|
|
"learning_rate": 4.225726599202808e-06,
|
|
"loss": 0.0094,
|
|
"step": 7490
|
|
},
|
|
{
|
|
"epoch": 0.3530117788263535,
|
|
"grad_norm": 0.09894406050443649,
|
|
"learning_rate": 4.200860925269519e-06,
|
|
"loss": 0.0085,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 0.353482461198122,
|
|
"grad_norm": 0.07283376902341843,
|
|
"learning_rate": 4.176049159089626e-06,
|
|
"loss": 0.0099,
|
|
"step": 7510
|
|
},
|
|
{
|
|
"epoch": 0.35395314356989044,
|
|
"grad_norm": 0.10817442089319229,
|
|
"learning_rate": 4.1512915313088505e-06,
|
|
"loss": 0.0103,
|
|
"step": 7520
|
|
},
|
|
{
|
|
"epoch": 0.35442382594165894,
|
|
"grad_norm": 0.09655621647834778,
|
|
"learning_rate": 4.126588272069645e-06,
|
|
"loss": 0.0128,
|
|
"step": 7530
|
|
},
|
|
{
|
|
"epoch": 0.3548945083134274,
|
|
"grad_norm": 0.06617021560668945,
|
|
"learning_rate": 4.101939611009059e-06,
|
|
"loss": 0.0066,
|
|
"step": 7540
|
|
},
|
|
{
|
|
"epoch": 0.35536519068519584,
|
|
"grad_norm": 0.1352662295103073,
|
|
"learning_rate": 4.077345777256614e-06,
|
|
"loss": 0.0113,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 0.35583587305696435,
|
|
"grad_norm": 0.06878627091646194,
|
|
"learning_rate": 4.052806999432161e-06,
|
|
"loss": 0.0116,
|
|
"step": 7560
|
|
},
|
|
{
|
|
"epoch": 0.3563065554287328,
|
|
"grad_norm": 0.06316298991441727,
|
|
"learning_rate": 4.028323505643762e-06,
|
|
"loss": 0.0065,
|
|
"step": 7570
|
|
},
|
|
{
|
|
"epoch": 0.3567772378005013,
|
|
"grad_norm": 0.0798206776380539,
|
|
"learning_rate": 4.003895523485575e-06,
|
|
"loss": 0.0083,
|
|
"step": 7580
|
|
},
|
|
{
|
|
"epoch": 0.35724792017226975,
|
|
"grad_norm": 0.07234697788953781,
|
|
"learning_rate": 3.979523280035723e-06,
|
|
"loss": 0.0115,
|
|
"step": 7590
|
|
},
|
|
{
|
|
"epoch": 0.3577186025440382,
|
|
"grad_norm": 0.08777622878551483,
|
|
"learning_rate": 3.955207001854197e-06,
|
|
"loss": 0.0105,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 0.3581892849158067,
|
|
"grad_norm": 0.09228674322366714,
|
|
"learning_rate": 3.930946914980744e-06,
|
|
"loss": 0.0102,
|
|
"step": 7610
|
|
},
|
|
{
|
|
"epoch": 0.35865996728757515,
|
|
"grad_norm": 0.21110312640666962,
|
|
"learning_rate": 3.906743244932767e-06,
|
|
"loss": 0.0089,
|
|
"step": 7620
|
|
},
|
|
{
|
|
"epoch": 0.35913064965934366,
|
|
"grad_norm": 0.11766435950994492,
|
|
"learning_rate": 3.882596216703226e-06,
|
|
"loss": 0.0124,
|
|
"step": 7630
|
|
},
|
|
{
|
|
"epoch": 0.3596013320311121,
|
|
"grad_norm": 0.09137210249900818,
|
|
"learning_rate": 3.858506054758547e-06,
|
|
"loss": 0.0076,
|
|
"step": 7640
|
|
},
|
|
{
|
|
"epoch": 0.36007201440288056,
|
|
"grad_norm": 0.1201593354344368,
|
|
"learning_rate": 3.834472983036551e-06,
|
|
"loss": 0.0091,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 0.36054269677464906,
|
|
"grad_norm": 0.12418673932552338,
|
|
"learning_rate": 3.8104972249443417e-06,
|
|
"loss": 0.0088,
|
|
"step": 7660
|
|
},
|
|
{
|
|
"epoch": 0.3610133791464175,
|
|
"grad_norm": 0.09985870122909546,
|
|
"learning_rate": 3.7865790033562532e-06,
|
|
"loss": 0.0091,
|
|
"step": 7670
|
|
},
|
|
{
|
|
"epoch": 0.361484061518186,
|
|
"grad_norm": 0.1320667713880539,
|
|
"learning_rate": 3.7627185406117707e-06,
|
|
"loss": 0.0106,
|
|
"step": 7680
|
|
},
|
|
{
|
|
"epoch": 0.36195474388995447,
|
|
"grad_norm": 0.11557376384735107,
|
|
"learning_rate": 3.738916058513462e-06,
|
|
"loss": 0.0084,
|
|
"step": 7690
|
|
},
|
|
{
|
|
"epoch": 0.3624254262617229,
|
|
"grad_norm": 0.08301427960395813,
|
|
"learning_rate": 3.7151717783249175e-06,
|
|
"loss": 0.0099,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 0.3628961086334914,
|
|
"grad_norm": 0.13551072776317596,
|
|
"learning_rate": 3.6914859207686916e-06,
|
|
"loss": 0.0092,
|
|
"step": 7710
|
|
},
|
|
{
|
|
"epoch": 0.36336679100525987,
|
|
"grad_norm": 0.13606616854667664,
|
|
"learning_rate": 3.6678587060242586e-06,
|
|
"loss": 0.011,
|
|
"step": 7720
|
|
},
|
|
{
|
|
"epoch": 0.3638374733770283,
|
|
"grad_norm": 0.11019964516162872,
|
|
"learning_rate": 3.6442903537259556e-06,
|
|
"loss": 0.0084,
|
|
"step": 7730
|
|
},
|
|
{
|
|
"epoch": 0.3643081557487968,
|
|
"grad_norm": 0.0992899090051651,
|
|
"learning_rate": 3.6207810829609414e-06,
|
|
"loss": 0.0107,
|
|
"step": 7740
|
|
},
|
|
{
|
|
"epoch": 0.3647788381205653,
|
|
"grad_norm": 0.10431647300720215,
|
|
"learning_rate": 3.5973311122671695e-06,
|
|
"loss": 0.0085,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 0.3652495204923338,
|
|
"grad_norm": 0.09148714691400528,
|
|
"learning_rate": 3.5739406596313474e-06,
|
|
"loss": 0.0081,
|
|
"step": 7760
|
|
},
|
|
{
|
|
"epoch": 0.36572020286410223,
|
|
"grad_norm": 0.10995065420866013,
|
|
"learning_rate": 3.5506099424869133e-06,
|
|
"loss": 0.0088,
|
|
"step": 7770
|
|
},
|
|
{
|
|
"epoch": 0.3661908852358707,
|
|
"grad_norm": 0.09201940894126892,
|
|
"learning_rate": 3.5273391777120136e-06,
|
|
"loss": 0.0111,
|
|
"step": 7780
|
|
},
|
|
{
|
|
"epoch": 0.3666615676076392,
|
|
"grad_norm": 0.06754633039236069,
|
|
"learning_rate": 3.504128581627497e-06,
|
|
"loss": 0.0096,
|
|
"step": 7790
|
|
},
|
|
{
|
|
"epoch": 0.36713224997940763,
|
|
"grad_norm": 0.11938969045877457,
|
|
"learning_rate": 3.480978369994885e-06,
|
|
"loss": 0.0098,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 0.36760293235117614,
|
|
"grad_norm": 0.09905743598937988,
|
|
"learning_rate": 3.4578887580143793e-06,
|
|
"loss": 0.0104,
|
|
"step": 7810
|
|
},
|
|
{
|
|
"epoch": 0.3680736147229446,
|
|
"grad_norm": 0.09096118062734604,
|
|
"learning_rate": 3.4348599603228584e-06,
|
|
"loss": 0.0102,
|
|
"step": 7820
|
|
},
|
|
{
|
|
"epoch": 0.36854429709471304,
|
|
"grad_norm": 0.12320122867822647,
|
|
"learning_rate": 3.411892190991882e-06,
|
|
"loss": 0.0094,
|
|
"step": 7830
|
|
},
|
|
{
|
|
"epoch": 0.36901497946648154,
|
|
"grad_norm": 0.07308287918567657,
|
|
"learning_rate": 3.3889856635257024e-06,
|
|
"loss": 0.009,
|
|
"step": 7840
|
|
},
|
|
{
|
|
"epoch": 0.36948566183825,
|
|
"grad_norm": 0.1116013377904892,
|
|
"learning_rate": 3.366140590859276e-06,
|
|
"loss": 0.0094,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 0.3699563442100185,
|
|
"grad_norm": 0.22650498151779175,
|
|
"learning_rate": 3.343357185356284e-06,
|
|
"loss": 0.0097,
|
|
"step": 7860
|
|
},
|
|
{
|
|
"epoch": 0.37042702658178694,
|
|
"grad_norm": 0.0837603211402893,
|
|
"learning_rate": 3.3206356588071733e-06,
|
|
"loss": 0.0077,
|
|
"step": 7870
|
|
},
|
|
{
|
|
"epoch": 0.3708977089535554,
|
|
"grad_norm": 0.17952406406402588,
|
|
"learning_rate": 3.2979762224271616e-06,
|
|
"loss": 0.0103,
|
|
"step": 7880
|
|
},
|
|
{
|
|
"epoch": 0.3713683913253239,
|
|
"grad_norm": 0.15936988592147827,
|
|
"learning_rate": 3.275379086854292e-06,
|
|
"loss": 0.011,
|
|
"step": 7890
|
|
},
|
|
{
|
|
"epoch": 0.37183907369709235,
|
|
"grad_norm": 0.09263869374990463,
|
|
"learning_rate": 3.252844462147472e-06,
|
|
"loss": 0.0087,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 0.37230975606886085,
|
|
"grad_norm": 0.10962749272584915,
|
|
"learning_rate": 3.230372557784518e-06,
|
|
"loss": 0.0085,
|
|
"step": 7910
|
|
},
|
|
{
|
|
"epoch": 0.3727804384406293,
|
|
"grad_norm": 0.11479237675666809,
|
|
"learning_rate": 3.2079635826602053e-06,
|
|
"loss": 0.01,
|
|
"step": 7920
|
|
},
|
|
{
|
|
"epoch": 0.37325112081239775,
|
|
"grad_norm": 0.05293889343738556,
|
|
"learning_rate": 3.185617745084343e-06,
|
|
"loss": 0.0073,
|
|
"step": 7930
|
|
},
|
|
{
|
|
"epoch": 0.37372180318416626,
|
|
"grad_norm": 0.13599932193756104,
|
|
"learning_rate": 3.163335252779811e-06,
|
|
"loss": 0.0101,
|
|
"step": 7940
|
|
},
|
|
{
|
|
"epoch": 0.3741924855559347,
|
|
"grad_norm": 0.08273231238126755,
|
|
"learning_rate": 3.1411163128806497e-06,
|
|
"loss": 0.0083,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 0.3746631679277032,
|
|
"grad_norm": 0.10171296447515488,
|
|
"learning_rate": 3.118961131930127e-06,
|
|
"loss": 0.0101,
|
|
"step": 7960
|
|
},
|
|
{
|
|
"epoch": 0.37513385029947166,
|
|
"grad_norm": 0.1403917372226715,
|
|
"learning_rate": 3.0968699158788185e-06,
|
|
"loss": 0.0086,
|
|
"step": 7970
|
|
},
|
|
{
|
|
"epoch": 0.3756045326712401,
|
|
"grad_norm": 0.11102043837308884,
|
|
"learning_rate": 3.0748428700826938e-06,
|
|
"loss": 0.0086,
|
|
"step": 7980
|
|
},
|
|
{
|
|
"epoch": 0.3760752150430086,
|
|
"grad_norm": 0.08787306398153305,
|
|
"learning_rate": 3.0528801993012056e-06,
|
|
"loss": 0.0091,
|
|
"step": 7990
|
|
},
|
|
{
|
|
"epoch": 0.37654589741477706,
|
|
"grad_norm": 0.07784000039100647,
|
|
"learning_rate": 3.0309821076953893e-06,
|
|
"loss": 0.0097,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.37701657978654557,
|
|
"grad_norm": 0.20823068916797638,
|
|
"learning_rate": 3.0091487988259684e-06,
|
|
"loss": 0.0086,
|
|
"step": 8010
|
|
},
|
|
{
|
|
"epoch": 0.377487262158314,
|
|
"grad_norm": 0.08798613399267197,
|
|
"learning_rate": 2.9873804756514513e-06,
|
|
"loss": 0.0085,
|
|
"step": 8020
|
|
},
|
|
{
|
|
"epoch": 0.37795794453008247,
|
|
"grad_norm": 0.1586165726184845,
|
|
"learning_rate": 2.965677340526254e-06,
|
|
"loss": 0.0088,
|
|
"step": 8030
|
|
},
|
|
{
|
|
"epoch": 0.378428626901851,
|
|
"grad_norm": 0.09549938887357712,
|
|
"learning_rate": 2.944039595198814e-06,
|
|
"loss": 0.0108,
|
|
"step": 8040
|
|
},
|
|
{
|
|
"epoch": 0.3788993092736194,
|
|
"grad_norm": 0.08494443446397781,
|
|
"learning_rate": 2.9224674408097207e-06,
|
|
"loss": 0.0089,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 0.3793699916453879,
|
|
"grad_norm": 0.08340760320425034,
|
|
"learning_rate": 2.900961077889837e-06,
|
|
"loss": 0.0103,
|
|
"step": 8060
|
|
},
|
|
{
|
|
"epoch": 0.3798406740171564,
|
|
"grad_norm": 0.08635089546442032,
|
|
"learning_rate": 2.879520706358446e-06,
|
|
"loss": 0.0097,
|
|
"step": 8070
|
|
},
|
|
{
|
|
"epoch": 0.3803113563889248,
|
|
"grad_norm": 0.07516948133707047,
|
|
"learning_rate": 2.8581465255213834e-06,
|
|
"loss": 0.0091,
|
|
"step": 8080
|
|
},
|
|
{
|
|
"epoch": 0.38078203876069333,
|
|
"grad_norm": 0.09443487972021103,
|
|
"learning_rate": 2.836838734069187e-06,
|
|
"loss": 0.0088,
|
|
"step": 8090
|
|
},
|
|
{
|
|
"epoch": 0.3812527211324618,
|
|
"grad_norm": 0.13118596374988556,
|
|
"learning_rate": 2.8155975300752524e-06,
|
|
"loss": 0.0101,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 0.3817234035042303,
|
|
"grad_norm": 0.11904377490282059,
|
|
"learning_rate": 2.794423110993991e-06,
|
|
"loss": 0.0122,
|
|
"step": 8110
|
|
},
|
|
{
|
|
"epoch": 0.38219408587599873,
|
|
"grad_norm": 0.0989333763718605,
|
|
"learning_rate": 2.7733156736589893e-06,
|
|
"loss": 0.0105,
|
|
"step": 8120
|
|
},
|
|
{
|
|
"epoch": 0.3826647682477672,
|
|
"grad_norm": 0.09652882069349289,
|
|
"learning_rate": 2.7522754142811957e-06,
|
|
"loss": 0.0102,
|
|
"step": 8130
|
|
},
|
|
{
|
|
"epoch": 0.3831354506195357,
|
|
"grad_norm": 0.09222520142793655,
|
|
"learning_rate": 2.731302528447063e-06,
|
|
"loss": 0.0097,
|
|
"step": 8140
|
|
},
|
|
{
|
|
"epoch": 0.38360613299130414,
|
|
"grad_norm": 0.10134019702672958,
|
|
"learning_rate": 2.710397211116774e-06,
|
|
"loss": 0.0072,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 0.3840768153630726,
|
|
"grad_norm": 0.07716267555952072,
|
|
"learning_rate": 2.6895596566223937e-06,
|
|
"loss": 0.011,
|
|
"step": 8160
|
|
},
|
|
{
|
|
"epoch": 0.3845474977348411,
|
|
"grad_norm": 0.09856786578893661,
|
|
"learning_rate": 2.66879005866608e-06,
|
|
"loss": 0.0088,
|
|
"step": 8170
|
|
},
|
|
{
|
|
"epoch": 0.38501818010660954,
|
|
"grad_norm": 0.10285631567239761,
|
|
"learning_rate": 2.648088610318278e-06,
|
|
"loss": 0.0073,
|
|
"step": 8180
|
|
},
|
|
{
|
|
"epoch": 0.38548886247837805,
|
|
"grad_norm": 0.07368358969688416,
|
|
"learning_rate": 2.6274555040159265e-06,
|
|
"loss": 0.0097,
|
|
"step": 8190
|
|
},
|
|
{
|
|
"epoch": 0.3859595448501465,
|
|
"grad_norm": 0.09550728648900986,
|
|
"learning_rate": 2.606890931560667e-06,
|
|
"loss": 0.0109,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 0.38643022722191495,
|
|
"grad_norm": 0.10182874649763107,
|
|
"learning_rate": 2.5863950841170704e-06,
|
|
"loss": 0.0102,
|
|
"step": 8210
|
|
},
|
|
{
|
|
"epoch": 0.38690090959368345,
|
|
"grad_norm": 0.0784030556678772,
|
|
"learning_rate": 2.5659681522108428e-06,
|
|
"loss": 0.0102,
|
|
"step": 8220
|
|
},
|
|
{
|
|
"epoch": 0.3873715919654519,
|
|
"grad_norm": 0.12794169783592224,
|
|
"learning_rate": 2.5456103257270693e-06,
|
|
"loss": 0.0103,
|
|
"step": 8230
|
|
},
|
|
{
|
|
"epoch": 0.3878422743372204,
|
|
"grad_norm": 0.09494341909885406,
|
|
"learning_rate": 2.5253217939084407e-06,
|
|
"loss": 0.0095,
|
|
"step": 8240
|
|
},
|
|
{
|
|
"epoch": 0.38831295670898885,
|
|
"grad_norm": 0.11016801744699478,
|
|
"learning_rate": 2.505102745353499e-06,
|
|
"loss": 0.0094,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 0.3887836390807573,
|
|
"grad_norm": 0.11591393500566483,
|
|
"learning_rate": 2.4849533680148787e-06,
|
|
"loss": 0.013,
|
|
"step": 8260
|
|
},
|
|
{
|
|
"epoch": 0.3892543214525258,
|
|
"grad_norm": 0.10235019773244858,
|
|
"learning_rate": 2.4648738491975745e-06,
|
|
"loss": 0.009,
|
|
"step": 8270
|
|
},
|
|
{
|
|
"epoch": 0.38972500382429426,
|
|
"grad_norm": 0.14550204575061798,
|
|
"learning_rate": 2.4448643755571687e-06,
|
|
"loss": 0.0105,
|
|
"step": 8280
|
|
},
|
|
{
|
|
"epoch": 0.39019568619606276,
|
|
"grad_norm": 0.07565101981163025,
|
|
"learning_rate": 2.424925133098137e-06,
|
|
"loss": 0.0086,
|
|
"step": 8290
|
|
},
|
|
{
|
|
"epoch": 0.3906663685678312,
|
|
"grad_norm": 0.12985540926456451,
|
|
"learning_rate": 2.4050563071720867e-06,
|
|
"loss": 0.0101,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 0.39113705093959966,
|
|
"grad_norm": 0.13539749383926392,
|
|
"learning_rate": 2.3852580824760487e-06,
|
|
"loss": 0.0107,
|
|
"step": 8310
|
|
},
|
|
{
|
|
"epoch": 0.39160773331136817,
|
|
"grad_norm": 0.1268506944179535,
|
|
"learning_rate": 2.3655306430507563e-06,
|
|
"loss": 0.0086,
|
|
"step": 8320
|
|
},
|
|
{
|
|
"epoch": 0.3920784156831366,
|
|
"grad_norm": 0.23865291476249695,
|
|
"learning_rate": 2.345874172278939e-06,
|
|
"loss": 0.0085,
|
|
"step": 8330
|
|
},
|
|
{
|
|
"epoch": 0.3925490980549051,
|
|
"grad_norm": 0.12161499261856079,
|
|
"learning_rate": 2.326288852883607e-06,
|
|
"loss": 0.0074,
|
|
"step": 8340
|
|
},
|
|
{
|
|
"epoch": 0.39301978042667357,
|
|
"grad_norm": 0.10894527286291122,
|
|
"learning_rate": 2.306774866926377e-06,
|
|
"loss": 0.0091,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 0.393490462798442,
|
|
"grad_norm": 0.12950897216796875,
|
|
"learning_rate": 2.287332395805737e-06,
|
|
"loss": 0.0099,
|
|
"step": 8360
|
|
},
|
|
{
|
|
"epoch": 0.3939611451702105,
|
|
"grad_norm": 0.11317972838878632,
|
|
"learning_rate": 2.26796162025541e-06,
|
|
"loss": 0.0076,
|
|
"step": 8370
|
|
},
|
|
{
|
|
"epoch": 0.394431827541979,
|
|
"grad_norm": 0.07927144318819046,
|
|
"learning_rate": 2.248662720342637e-06,
|
|
"loss": 0.0092,
|
|
"step": 8380
|
|
},
|
|
{
|
|
"epoch": 0.3949025099137475,
|
|
"grad_norm": 0.09778954088687897,
|
|
"learning_rate": 2.229435875466519e-06,
|
|
"loss": 0.0087,
|
|
"step": 8390
|
|
},
|
|
{
|
|
"epoch": 0.39537319228551593,
|
|
"grad_norm": 0.09443400055170059,
|
|
"learning_rate": 2.2102812643563455e-06,
|
|
"loss": 0.009,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 0.3958438746572844,
|
|
"grad_norm": 0.10521642118692398,
|
|
"learning_rate": 2.191199065069941e-06,
|
|
"loss": 0.0103,
|
|
"step": 8410
|
|
},
|
|
{
|
|
"epoch": 0.3963145570290529,
|
|
"grad_norm": 0.10631761699914932,
|
|
"learning_rate": 2.1721894549919863e-06,
|
|
"loss": 0.0091,
|
|
"step": 8420
|
|
},
|
|
{
|
|
"epoch": 0.39678523940082133,
|
|
"grad_norm": 0.09079862385988235,
|
|
"learning_rate": 2.1532526108324047e-06,
|
|
"loss": 0.0092,
|
|
"step": 8430
|
|
},
|
|
{
|
|
"epoch": 0.39725592177258984,
|
|
"grad_norm": 0.14181046187877655,
|
|
"learning_rate": 2.1343887086246893e-06,
|
|
"loss": 0.0089,
|
|
"step": 8440
|
|
},
|
|
{
|
|
"epoch": 0.3977266041443583,
|
|
"grad_norm": 0.09131366014480591,
|
|
"learning_rate": 2.1155979237242817e-06,
|
|
"loss": 0.0083,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 0.39819728651612674,
|
|
"grad_norm": 0.1395096778869629,
|
|
"learning_rate": 2.0968804308069324e-06,
|
|
"loss": 0.0127,
|
|
"step": 8460
|
|
},
|
|
{
|
|
"epoch": 0.39866796888789524,
|
|
"grad_norm": 0.1457928866147995,
|
|
"learning_rate": 2.0782364038670986e-06,
|
|
"loss": 0.0088,
|
|
"step": 8470
|
|
},
|
|
{
|
|
"epoch": 0.3991386512596637,
|
|
"grad_norm": 0.10321693122386932,
|
|
"learning_rate": 2.0596660162162872e-06,
|
|
"loss": 0.0076,
|
|
"step": 8480
|
|
},
|
|
{
|
|
"epoch": 0.3996093336314322,
|
|
"grad_norm": 0.10687396675348282,
|
|
"learning_rate": 2.041169440481493e-06,
|
|
"loss": 0.0112,
|
|
"step": 8490
|
|
},
|
|
{
|
|
"epoch": 0.40008001600320064,
|
|
"grad_norm": 0.07050684839487076,
|
|
"learning_rate": 2.022746848603543e-06,
|
|
"loss": 0.0077,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 0.4005506983749691,
|
|
"grad_norm": 0.09150954335927963,
|
|
"learning_rate": 2.0043984118355464e-06,
|
|
"loss": 0.0102,
|
|
"step": 8510
|
|
},
|
|
{
|
|
"epoch": 0.4010213807467376,
|
|
"grad_norm": 0.06295426934957504,
|
|
"learning_rate": 1.986124300741267e-06,
|
|
"loss": 0.0092,
|
|
"step": 8520
|
|
},
|
|
{
|
|
"epoch": 0.40149206311850605,
|
|
"grad_norm": 0.10398832708597183,
|
|
"learning_rate": 1.967924685193552e-06,
|
|
"loss": 0.0089,
|
|
"step": 8530
|
|
},
|
|
{
|
|
"epoch": 0.4019627454902745,
|
|
"grad_norm": 0.10056550800800323,
|
|
"learning_rate": 1.9497997343727513e-06,
|
|
"loss": 0.0104,
|
|
"step": 8540
|
|
},
|
|
{
|
|
"epoch": 0.402433427862043,
|
|
"grad_norm": 0.10275435447692871,
|
|
"learning_rate": 1.9317496167651563e-06,
|
|
"loss": 0.0097,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 0.40290411023381145,
|
|
"grad_norm": 0.11029147356748581,
|
|
"learning_rate": 1.9137745001613984e-06,
|
|
"loss": 0.0099,
|
|
"step": 8560
|
|
},
|
|
{
|
|
"epoch": 0.40337479260557996,
|
|
"grad_norm": 0.09769067168235779,
|
|
"learning_rate": 1.8958745516549382e-06,
|
|
"loss": 0.0082,
|
|
"step": 8570
|
|
},
|
|
{
|
|
"epoch": 0.4038454749773484,
|
|
"grad_norm": 0.12096501886844635,
|
|
"learning_rate": 1.8780499376404715e-06,
|
|
"loss": 0.0115,
|
|
"step": 8580
|
|
},
|
|
{
|
|
"epoch": 0.40431615734911686,
|
|
"grad_norm": 0.10380151867866516,
|
|
"learning_rate": 1.8603008238124043e-06,
|
|
"loss": 0.0059,
|
|
"step": 8590
|
|
},
|
|
{
|
|
"epoch": 0.40478683972088536,
|
|
"grad_norm": 0.11337976157665253,
|
|
"learning_rate": 1.842627375163305e-06,
|
|
"loss": 0.0105,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 0.4052575220926538,
|
|
"grad_norm": 0.12097438424825668,
|
|
"learning_rate": 1.8250297559823716e-06,
|
|
"loss": 0.0101,
|
|
"step": 8610
|
|
},
|
|
{
|
|
"epoch": 0.4057282044644223,
|
|
"grad_norm": 0.07001138478517532,
|
|
"learning_rate": 1.8075081298539032e-06,
|
|
"loss": 0.0077,
|
|
"step": 8620
|
|
},
|
|
{
|
|
"epoch": 0.40619888683619076,
|
|
"grad_norm": 0.0757589340209961,
|
|
"learning_rate": 1.7900626596557924e-06,
|
|
"loss": 0.0071,
|
|
"step": 8630
|
|
},
|
|
{
|
|
"epoch": 0.4066695692079592,
|
|
"grad_norm": 0.12334230542182922,
|
|
"learning_rate": 1.7726935075579798e-06,
|
|
"loss": 0.0077,
|
|
"step": 8640
|
|
},
|
|
{
|
|
"epoch": 0.4071402515797277,
|
|
"grad_norm": 0.07599970698356628,
|
|
"learning_rate": 1.7554008350209862e-06,
|
|
"loss": 0.0086,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 0.40761093395149617,
|
|
"grad_norm": 0.08076886087656021,
|
|
"learning_rate": 1.7381848027943815e-06,
|
|
"loss": 0.0097,
|
|
"step": 8660
|
|
},
|
|
{
|
|
"epoch": 0.4080816163232647,
|
|
"grad_norm": 0.10640278458595276,
|
|
"learning_rate": 1.721045570915304e-06,
|
|
"loss": 0.0105,
|
|
"step": 8670
|
|
},
|
|
{
|
|
"epoch": 0.4085522986950331,
|
|
"grad_norm": 0.166885107755661,
|
|
"learning_rate": 1.703983298706966e-06,
|
|
"loss": 0.0087,
|
|
"step": 8680
|
|
},
|
|
{
|
|
"epoch": 0.40902298106680157,
|
|
"grad_norm": 0.10858030617237091,
|
|
"learning_rate": 1.6869981447771876e-06,
|
|
"loss": 0.0087,
|
|
"step": 8690
|
|
},
|
|
{
|
|
"epoch": 0.4094936634385701,
|
|
"grad_norm": 0.10928177088499069,
|
|
"learning_rate": 1.670090267016895e-06,
|
|
"loss": 0.0089,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 0.4099643458103385,
|
|
"grad_norm": 0.11939238011837006,
|
|
"learning_rate": 1.653259822598683e-06,
|
|
"loss": 0.0094,
|
|
"step": 8710
|
|
},
|
|
{
|
|
"epoch": 0.41043502818210703,
|
|
"grad_norm": 0.06870622932910919,
|
|
"learning_rate": 1.6365069679753331e-06,
|
|
"loss": 0.007,
|
|
"step": 8720
|
|
},
|
|
{
|
|
"epoch": 0.4109057105538755,
|
|
"grad_norm": 0.09177110344171524,
|
|
"learning_rate": 1.619831858878368e-06,
|
|
"loss": 0.0073,
|
|
"step": 8730
|
|
},
|
|
{
|
|
"epoch": 0.41137639292564393,
|
|
"grad_norm": 0.11786507815122604,
|
|
"learning_rate": 1.6032346503166007e-06,
|
|
"loss": 0.0096,
|
|
"step": 8740
|
|
},
|
|
{
|
|
"epoch": 0.41184707529741243,
|
|
"grad_norm": 0.09845741093158722,
|
|
"learning_rate": 1.5867154965746956e-06,
|
|
"loss": 0.0088,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 0.4123177576691809,
|
|
"grad_norm": 0.06096066161990166,
|
|
"learning_rate": 1.5702745512117323e-06,
|
|
"loss": 0.0083,
|
|
"step": 8760
|
|
},
|
|
{
|
|
"epoch": 0.4127884400409494,
|
|
"grad_norm": 0.127801775932312,
|
|
"learning_rate": 1.553911967059788e-06,
|
|
"loss": 0.0087,
|
|
"step": 8770
|
|
},
|
|
{
|
|
"epoch": 0.41325912241271784,
|
|
"grad_norm": 0.11020908504724503,
|
|
"learning_rate": 1.537627896222489e-06,
|
|
"loss": 0.0095,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"epoch": 0.4137298047844863,
|
|
"grad_norm": 0.08979953080415726,
|
|
"learning_rate": 1.5214224900736375e-06,
|
|
"loss": 0.0101,
|
|
"step": 8790
|
|
},
|
|
{
|
|
"epoch": 0.4142004871562548,
|
|
"grad_norm": 0.07168541848659515,
|
|
"learning_rate": 1.5052958992557687e-06,
|
|
"loss": 0.0105,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 0.41467116952802324,
|
|
"grad_norm": 0.09373784065246582,
|
|
"learning_rate": 1.4892482736787717e-06,
|
|
"loss": 0.0085,
|
|
"step": 8810
|
|
},
|
|
{
|
|
"epoch": 0.41514185189979175,
|
|
"grad_norm": 0.09700144827365875,
|
|
"learning_rate": 1.4732797625184814e-06,
|
|
"loss": 0.0078,
|
|
"step": 8820
|
|
},
|
|
{
|
|
"epoch": 0.4156125342715602,
|
|
"grad_norm": 0.048015572130680084,
|
|
"learning_rate": 1.4573905142153134e-06,
|
|
"loss": 0.011,
|
|
"step": 8830
|
|
},
|
|
{
|
|
"epoch": 0.41608321664332865,
|
|
"grad_norm": 0.11324000358581543,
|
|
"learning_rate": 1.44158067647285e-06,
|
|
"loss": 0.0104,
|
|
"step": 8840
|
|
},
|
|
{
|
|
"epoch": 0.41655389901509715,
|
|
"grad_norm": 0.19960851967334747,
|
|
"learning_rate": 1.4258503962565096e-06,
|
|
"loss": 0.0109,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 0.4170245813868656,
|
|
"grad_norm": 0.08650194108486176,
|
|
"learning_rate": 1.4101998197921352e-06,
|
|
"loss": 0.0115,
|
|
"step": 8860
|
|
},
|
|
{
|
|
"epoch": 0.4174952637586341,
|
|
"grad_norm": 0.10010552406311035,
|
|
"learning_rate": 1.3946290925646788e-06,
|
|
"loss": 0.0089,
|
|
"step": 8870
|
|
},
|
|
{
|
|
"epoch": 0.41796594613040255,
|
|
"grad_norm": 0.11954224109649658,
|
|
"learning_rate": 1.379138359316814e-06,
|
|
"loss": 0.01,
|
|
"step": 8880
|
|
},
|
|
{
|
|
"epoch": 0.418436628502171,
|
|
"grad_norm": 0.09938015043735504,
|
|
"learning_rate": 1.363727764047612e-06,
|
|
"loss": 0.0089,
|
|
"step": 8890
|
|
},
|
|
{
|
|
"epoch": 0.4189073108739395,
|
|
"grad_norm": 0.11380025744438171,
|
|
"learning_rate": 1.3483974500111907e-06,
|
|
"loss": 0.008,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 0.41937799324570796,
|
|
"grad_norm": 0.10757610946893692,
|
|
"learning_rate": 1.3331475597153988e-06,
|
|
"loss": 0.0075,
|
|
"step": 8910
|
|
},
|
|
{
|
|
"epoch": 0.41984867561747646,
|
|
"grad_norm": 0.09432507306337357,
|
|
"learning_rate": 1.3179782349204618e-06,
|
|
"loss": 0.0098,
|
|
"step": 8920
|
|
},
|
|
{
|
|
"epoch": 0.4203193579892449,
|
|
"grad_norm": 0.08634945750236511,
|
|
"learning_rate": 1.3028896166377003e-06,
|
|
"loss": 0.009,
|
|
"step": 8930
|
|
},
|
|
{
|
|
"epoch": 0.42079004036101336,
|
|
"grad_norm": 0.12073128670454025,
|
|
"learning_rate": 1.2878818451281939e-06,
|
|
"loss": 0.0077,
|
|
"step": 8940
|
|
},
|
|
{
|
|
"epoch": 0.42126072273278187,
|
|
"grad_norm": 0.10001961886882782,
|
|
"learning_rate": 1.2729550599014862e-06,
|
|
"loss": 0.0078,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 0.4217314051045503,
|
|
"grad_norm": 0.07984888553619385,
|
|
"learning_rate": 1.2581093997142846e-06,
|
|
"loss": 0.0084,
|
|
"step": 8960
|
|
},
|
|
{
|
|
"epoch": 0.42220208747631877,
|
|
"grad_norm": 0.08163861185312271,
|
|
"learning_rate": 1.2433450025691807e-06,
|
|
"loss": 0.0086,
|
|
"step": 8970
|
|
},
|
|
{
|
|
"epoch": 0.42267276984808727,
|
|
"grad_norm": 0.11532753705978394,
|
|
"learning_rate": 1.2286620057133459e-06,
|
|
"loss": 0.0088,
|
|
"step": 8980
|
|
},
|
|
{
|
|
"epoch": 0.4231434522198557,
|
|
"grad_norm": 0.078957200050354,
|
|
"learning_rate": 1.2140605456372856e-06,
|
|
"loss": 0.008,
|
|
"step": 8990
|
|
},
|
|
{
|
|
"epoch": 0.4236141345916242,
|
|
"grad_norm": 0.12556132674217224,
|
|
"learning_rate": 1.1995407580735364e-06,
|
|
"loss": 0.0086,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 0.4240848169633927,
|
|
"grad_norm": 0.09047644585371017,
|
|
"learning_rate": 1.1851027779954373e-06,
|
|
"loss": 0.01,
|
|
"step": 9010
|
|
},
|
|
{
|
|
"epoch": 0.4245554993351611,
|
|
"grad_norm": 0.13083992898464203,
|
|
"learning_rate": 1.1707467396158524e-06,
|
|
"loss": 0.0099,
|
|
"step": 9020
|
|
},
|
|
{
|
|
"epoch": 0.42502618170692963,
|
|
"grad_norm": 0.06367667019367218,
|
|
"learning_rate": 1.1564727763859306e-06,
|
|
"loss": 0.0068,
|
|
"step": 9030
|
|
},
|
|
{
|
|
"epoch": 0.4254968640786981,
|
|
"grad_norm": 0.12646174430847168,
|
|
"learning_rate": 1.1422810209938627e-06,
|
|
"loss": 0.0104,
|
|
"step": 9040
|
|
},
|
|
{
|
|
"epoch": 0.4259675464504666,
|
|
"grad_norm": 0.09600350260734558,
|
|
"learning_rate": 1.1281716053636616e-06,
|
|
"loss": 0.0095,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 0.42643822882223503,
|
|
"grad_norm": 0.06250861287117004,
|
|
"learning_rate": 1.1141446606539063e-06,
|
|
"loss": 0.0081,
|
|
"step": 9060
|
|
},
|
|
{
|
|
"epoch": 0.4269089111940035,
|
|
"grad_norm": 0.06376807391643524,
|
|
"learning_rate": 1.1002003172565579e-06,
|
|
"loss": 0.0092,
|
|
"step": 9070
|
|
},
|
|
{
|
|
"epoch": 0.427379593565772,
|
|
"grad_norm": 0.1088075190782547,
|
|
"learning_rate": 1.086338704795722e-06,
|
|
"loss": 0.0078,
|
|
"step": 9080
|
|
},
|
|
{
|
|
"epoch": 0.42785027593754044,
|
|
"grad_norm": 0.11370082199573517,
|
|
"learning_rate": 1.0725599521264518e-06,
|
|
"loss": 0.0098,
|
|
"step": 9090
|
|
},
|
|
{
|
|
"epoch": 0.42832095830930894,
|
|
"grad_norm": 0.10452120006084442,
|
|
"learning_rate": 1.0588641873335558e-06,
|
|
"loss": 0.0075,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 0.4287916406810774,
|
|
"grad_norm": 0.09549901634454727,
|
|
"learning_rate": 1.0452515377303974e-06,
|
|
"loss": 0.0093,
|
|
"step": 9110
|
|
},
|
|
{
|
|
"epoch": 0.42926232305284584,
|
|
"grad_norm": 0.08076899498701096,
|
|
"learning_rate": 1.0317221298577163e-06,
|
|
"loss": 0.0076,
|
|
"step": 9120
|
|
},
|
|
{
|
|
"epoch": 0.42973300542461434,
|
|
"grad_norm": 0.08189800381660461,
|
|
"learning_rate": 1.0182760894824607e-06,
|
|
"loss": 0.0084,
|
|
"step": 9130
|
|
},
|
|
{
|
|
"epoch": 0.4302036877963828,
|
|
"grad_norm": 0.08954903483390808,
|
|
"learning_rate": 1.0049135415965926e-06,
|
|
"loss": 0.0089,
|
|
"step": 9140
|
|
},
|
|
{
|
|
"epoch": 0.4306743701681513,
|
|
"grad_norm": 0.10612103343009949,
|
|
"learning_rate": 9.916346104159602e-07,
|
|
"loss": 0.0085,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 0.43114505253991975,
|
|
"grad_norm": 0.08432716876268387,
|
|
"learning_rate": 9.784394193791169e-07,
|
|
"loss": 0.007,
|
|
"step": 9160
|
|
},
|
|
{
|
|
"epoch": 0.4316157349116882,
|
|
"grad_norm": 0.113120436668396,
|
|
"learning_rate": 9.653280911461837e-07,
|
|
"loss": 0.0077,
|
|
"step": 9170
|
|
},
|
|
{
|
|
"epoch": 0.4320864172834567,
|
|
"grad_norm": 0.10104914754629135,
|
|
"learning_rate": 9.523007475977064e-07,
|
|
"loss": 0.0071,
|
|
"step": 9180
|
|
},
|
|
{
|
|
"epoch": 0.43255709965522515,
|
|
"grad_norm": 0.09766939282417297,
|
|
"learning_rate": 9.393575098335339e-07,
|
|
"loss": 0.0094,
|
|
"step": 9190
|
|
},
|
|
{
|
|
"epoch": 0.43302778202699366,
|
|
"grad_norm": 0.10833235085010529,
|
|
"learning_rate": 9.264984981716663e-07,
|
|
"loss": 0.0107,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 0.4334984643987621,
|
|
"grad_norm": 0.10373176634311676,
|
|
"learning_rate": 9.137238321471675e-07,
|
|
"loss": 0.0112,
|
|
"step": 9210
|
|
},
|
|
{
|
|
"epoch": 0.43396914677053056,
|
|
"grad_norm": 0.09001635015010834,
|
|
"learning_rate": 9.010336305110345e-07,
|
|
"loss": 0.0076,
|
|
"step": 9220
|
|
},
|
|
{
|
|
"epoch": 0.43443982914229906,
|
|
"grad_norm": 0.10548534989356995,
|
|
"learning_rate": 8.884280112290977e-07,
|
|
"loss": 0.0087,
|
|
"step": 9230
|
|
},
|
|
{
|
|
"epoch": 0.4349105115140675,
|
|
"grad_norm": 0.1272614300251007,
|
|
"learning_rate": 8.759070914809253e-07,
|
|
"loss": 0.0076,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 0.435381193885836,
|
|
"grad_norm": 0.08105993270874023,
|
|
"learning_rate": 8.634709876587344e-07,
|
|
"loss": 0.0084,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 0.43585187625760446,
|
|
"grad_norm": 0.0882258415222168,
|
|
"learning_rate": 8.511198153663069e-07,
|
|
"loss": 0.0091,
|
|
"step": 9260
|
|
},
|
|
{
|
|
"epoch": 0.4363225586293729,
|
|
"grad_norm": 0.06415614485740662,
|
|
"learning_rate": 8.388536894179234e-07,
|
|
"loss": 0.006,
|
|
"step": 9270
|
|
},
|
|
{
|
|
"epoch": 0.4367932410011414,
|
|
"grad_norm": 0.108585424721241,
|
|
"learning_rate": 8.266727238372763e-07,
|
|
"loss": 0.0081,
|
|
"step": 9280
|
|
},
|
|
{
|
|
"epoch": 0.43726392337290987,
|
|
"grad_norm": 0.14916934072971344,
|
|
"learning_rate": 8.145770318564361e-07,
|
|
"loss": 0.0084,
|
|
"step": 9290
|
|
},
|
|
{
|
|
"epoch": 0.4377346057446784,
|
|
"grad_norm": 0.11187581717967987,
|
|
"learning_rate": 8.025667259147773e-07,
|
|
"loss": 0.0089,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 0.4382052881164468,
|
|
"grad_norm": 0.10432884097099304,
|
|
"learning_rate": 7.906419176579416e-07,
|
|
"loss": 0.008,
|
|
"step": 9310
|
|
},
|
|
{
|
|
"epoch": 0.43867597048821527,
|
|
"grad_norm": 0.10626804083585739,
|
|
"learning_rate": 7.788027179367997e-07,
|
|
"loss": 0.0079,
|
|
"step": 9320
|
|
},
|
|
{
|
|
"epoch": 0.4391466528599838,
|
|
"grad_norm": 0.16250360012054443,
|
|
"learning_rate": 7.670492368064275e-07,
|
|
"loss": 0.0088,
|
|
"step": 9330
|
|
},
|
|
{
|
|
"epoch": 0.4396173352317522,
|
|
"grad_norm": 0.08617981523275375,
|
|
"learning_rate": 7.553815835250644e-07,
|
|
"loss": 0.0114,
|
|
"step": 9340
|
|
},
|
|
{
|
|
"epoch": 0.44008801760352073,
|
|
"grad_norm": 0.09257598966360092,
|
|
"learning_rate": 7.437998665531221e-07,
|
|
"loss": 0.0095,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 0.4405586999752892,
|
|
"grad_norm": 0.11574006080627441,
|
|
"learning_rate": 7.323041935521502e-07,
|
|
"loss": 0.0094,
|
|
"step": 9360
|
|
},
|
|
{
|
|
"epoch": 0.44102938234705763,
|
|
"grad_norm": 0.1045096293091774,
|
|
"learning_rate": 7.208946713838638e-07,
|
|
"loss": 0.0084,
|
|
"step": 9370
|
|
},
|
|
{
|
|
"epoch": 0.44150006471882614,
|
|
"grad_norm": 0.08487322926521301,
|
|
"learning_rate": 7.095714061091241e-07,
|
|
"loss": 0.0084,
|
|
"step": 9380
|
|
},
|
|
{
|
|
"epoch": 0.4419707470905946,
|
|
"grad_norm": 0.0824100524187088,
|
|
"learning_rate": 6.983345029869681e-07,
|
|
"loss": 0.0098,
|
|
"step": 9390
|
|
},
|
|
{
|
|
"epoch": 0.44244142946236303,
|
|
"grad_norm": 0.08562915772199631,
|
|
"learning_rate": 6.871840664736251e-07,
|
|
"loss": 0.0083,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 0.44291211183413154,
|
|
"grad_norm": 0.07793686538934708,
|
|
"learning_rate": 6.761202002215506e-07,
|
|
"loss": 0.0087,
|
|
"step": 9410
|
|
},
|
|
{
|
|
"epoch": 0.4433827942059,
|
|
"grad_norm": 0.11262806504964828,
|
|
"learning_rate": 6.65143007078447e-07,
|
|
"loss": 0.0078,
|
|
"step": 9420
|
|
},
|
|
{
|
|
"epoch": 0.4438534765776685,
|
|
"grad_norm": 0.15416622161865234,
|
|
"learning_rate": 6.542525890863338e-07,
|
|
"loss": 0.0088,
|
|
"step": 9430
|
|
},
|
|
{
|
|
"epoch": 0.44432415894943694,
|
|
"grad_norm": 0.0901259034872055,
|
|
"learning_rate": 6.434490474805743e-07,
|
|
"loss": 0.0078,
|
|
"step": 9440
|
|
},
|
|
{
|
|
"epoch": 0.4447948413212054,
|
|
"grad_norm": 0.10945569723844528,
|
|
"learning_rate": 6.327324826889469e-07,
|
|
"loss": 0.0104,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 0.4452655236929739,
|
|
"grad_norm": 0.125677227973938,
|
|
"learning_rate": 6.221029943307099e-07,
|
|
"loss": 0.0085,
|
|
"step": 9460
|
|
},
|
|
{
|
|
"epoch": 0.44573620606474235,
|
|
"grad_norm": 0.073707215487957,
|
|
"learning_rate": 6.115606812156749e-07,
|
|
"loss": 0.0095,
|
|
"step": 9470
|
|
},
|
|
{
|
|
"epoch": 0.44620688843651085,
|
|
"grad_norm": 0.05944458767771721,
|
|
"learning_rate": 6.01105641343287e-07,
|
|
"loss": 0.0088,
|
|
"step": 9480
|
|
},
|
|
{
|
|
"epoch": 0.4466775708082793,
|
|
"grad_norm": 0.06994154304265976,
|
|
"learning_rate": 5.907379719017181e-07,
|
|
"loss": 0.0083,
|
|
"step": 9490
|
|
},
|
|
{
|
|
"epoch": 0.44714825318004775,
|
|
"grad_norm": 0.10343065112829208,
|
|
"learning_rate": 5.804577692669533e-07,
|
|
"loss": 0.0105,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 0.44761893555181625,
|
|
"grad_norm": 0.10532603412866592,
|
|
"learning_rate": 5.702651290019112e-07,
|
|
"loss": 0.0092,
|
|
"step": 9510
|
|
},
|
|
{
|
|
"epoch": 0.4480896179235847,
|
|
"grad_norm": 0.09277930855751038,
|
|
"learning_rate": 5.601601458555406e-07,
|
|
"loss": 0.0083,
|
|
"step": 9520
|
|
},
|
|
{
|
|
"epoch": 0.4485603002953532,
|
|
"grad_norm": 0.09575467556715012,
|
|
"learning_rate": 5.501429137619452e-07,
|
|
"loss": 0.0077,
|
|
"step": 9530
|
|
},
|
|
{
|
|
"epoch": 0.44903098266712166,
|
|
"grad_norm": 0.09499261528253555,
|
|
"learning_rate": 5.402135258395114e-07,
|
|
"loss": 0.0067,
|
|
"step": 9540
|
|
},
|
|
{
|
|
"epoch": 0.4495016650388901,
|
|
"grad_norm": 0.093096524477005,
|
|
"learning_rate": 5.303720743900475e-07,
|
|
"loss": 0.0068,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 0.4499723474106586,
|
|
"grad_norm": 0.06929812580347061,
|
|
"learning_rate": 5.206186508979083e-07,
|
|
"loss": 0.0068,
|
|
"step": 9560
|
|
},
|
|
{
|
|
"epoch": 0.45044302978242706,
|
|
"grad_norm": 0.07270170748233795,
|
|
"learning_rate": 5.109533460291694e-07,
|
|
"loss": 0.0107,
|
|
"step": 9570
|
|
},
|
|
{
|
|
"epoch": 0.45091371215419557,
|
|
"grad_norm": 0.06971085071563721,
|
|
"learning_rate": 5.01376249630764e-07,
|
|
"loss": 0.007,
|
|
"step": 9580
|
|
},
|
|
{
|
|
"epoch": 0.451384394525964,
|
|
"grad_norm": 0.10059388726949692,
|
|
"learning_rate": 4.918874507296578e-07,
|
|
"loss": 0.0081,
|
|
"step": 9590
|
|
},
|
|
{
|
|
"epoch": 0.45185507689773247,
|
|
"grad_norm": 0.11430494487285614,
|
|
"learning_rate": 4.824870375320156e-07,
|
|
"loss": 0.0085,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 0.45232575926950097,
|
|
"grad_norm": 0.11335556954145432,
|
|
"learning_rate": 4.731750974223892e-07,
|
|
"loss": 0.0091,
|
|
"step": 9610
|
|
},
|
|
{
|
|
"epoch": 0.4527964416412694,
|
|
"grad_norm": 0.07682538777589798,
|
|
"learning_rate": 4.639517169628971e-07,
|
|
"loss": 0.0083,
|
|
"step": 9620
|
|
},
|
|
{
|
|
"epoch": 0.4532671240130379,
|
|
"grad_norm": 0.11125083267688751,
|
|
"learning_rate": 4.548169818924275e-07,
|
|
"loss": 0.0088,
|
|
"step": 9630
|
|
},
|
|
{
|
|
"epoch": 0.4537378063848064,
|
|
"grad_norm": 0.1001538559794426,
|
|
"learning_rate": 4.4577097712582897e-07,
|
|
"loss": 0.0067,
|
|
"step": 9640
|
|
},
|
|
{
|
|
"epoch": 0.4542084887565748,
|
|
"grad_norm": 0.1667022854089737,
|
|
"learning_rate": 4.3681378675313747e-07,
|
|
"loss": 0.0084,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 0.45467917112834333,
|
|
"grad_norm": 0.09442941844463348,
|
|
"learning_rate": 4.279454940387828e-07,
|
|
"loss": 0.0068,
|
|
"step": 9660
|
|
},
|
|
{
|
|
"epoch": 0.4551498535001118,
|
|
"grad_norm": 0.11196138709783554,
|
|
"learning_rate": 4.191661814208181e-07,
|
|
"loss": 0.0107,
|
|
"step": 9670
|
|
},
|
|
{
|
|
"epoch": 0.4556205358718803,
|
|
"grad_norm": 0.06755967438220978,
|
|
"learning_rate": 4.1047593051015245e-07,
|
|
"loss": 0.0091,
|
|
"step": 9680
|
|
},
|
|
{
|
|
"epoch": 0.45609121824364873,
|
|
"grad_norm": 0.09105851501226425,
|
|
"learning_rate": 4.018748220897994e-07,
|
|
"loss": 0.0079,
|
|
"step": 9690
|
|
},
|
|
{
|
|
"epoch": 0.4565619006154172,
|
|
"grad_norm": 0.06257709860801697,
|
|
"learning_rate": 3.933629361141078e-07,
|
|
"loss": 0.0084,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 0.4570325829871857,
|
|
"grad_norm": 0.12144725024700165,
|
|
"learning_rate": 3.849403517080452e-07,
|
|
"loss": 0.0096,
|
|
"step": 9710
|
|
},
|
|
{
|
|
"epoch": 0.45750326535895414,
|
|
"grad_norm": 0.09013078361749649,
|
|
"learning_rate": 3.7660714716643563e-07,
|
|
"loss": 0.0081,
|
|
"step": 9720
|
|
},
|
|
{
|
|
"epoch": 0.45797394773072264,
|
|
"grad_norm": 0.10376989096403122,
|
|
"learning_rate": 3.683633999532521e-07,
|
|
"loss": 0.0098,
|
|
"step": 9730
|
|
},
|
|
{
|
|
"epoch": 0.4584446301024911,
|
|
"grad_norm": 0.07394952327013016,
|
|
"learning_rate": 3.60209186700885e-07,
|
|
"loss": 0.0073,
|
|
"step": 9740
|
|
},
|
|
{
|
|
"epoch": 0.45891531247425954,
|
|
"grad_norm": 0.07216717302799225,
|
|
"learning_rate": 3.521445832094328e-07,
|
|
"loss": 0.0085,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 0.45938599484602805,
|
|
"grad_norm": 0.1032300665974617,
|
|
"learning_rate": 3.441696644459969e-07,
|
|
"loss": 0.0085,
|
|
"step": 9760
|
|
},
|
|
{
|
|
"epoch": 0.4598566772177965,
|
|
"grad_norm": 0.08255499601364136,
|
|
"learning_rate": 3.362845045439911e-07,
|
|
"loss": 0.008,
|
|
"step": 9770
|
|
},
|
|
{
|
|
"epoch": 0.460327359589565,
|
|
"grad_norm": 0.07097998261451721,
|
|
"learning_rate": 3.284891768024401e-07,
|
|
"loss": 0.0101,
|
|
"step": 9780
|
|
},
|
|
{
|
|
"epoch": 0.46079804196133345,
|
|
"grad_norm": 0.07620490342378616,
|
|
"learning_rate": 3.207837536853087e-07,
|
|
"loss": 0.009,
|
|
"step": 9790
|
|
},
|
|
{
|
|
"epoch": 0.4612687243331019,
|
|
"grad_norm": 0.08704191446304321,
|
|
"learning_rate": 3.131683068208247e-07,
|
|
"loss": 0.0071,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 0.4617394067048704,
|
|
"grad_norm": 0.10447575896978378,
|
|
"learning_rate": 3.0564290700081044e-07,
|
|
"loss": 0.0081,
|
|
"step": 9810
|
|
},
|
|
{
|
|
"epoch": 0.46221008907663885,
|
|
"grad_norm": 0.1433567851781845,
|
|
"learning_rate": 2.9820762418002916e-07,
|
|
"loss": 0.0091,
|
|
"step": 9820
|
|
},
|
|
{
|
|
"epoch": 0.4626807714484073,
|
|
"grad_norm": 0.09916414320468903,
|
|
"learning_rate": 2.908625274755339e-07,
|
|
"loss": 0.0097,
|
|
"step": 9830
|
|
},
|
|
{
|
|
"epoch": 0.4631514538201758,
|
|
"grad_norm": 0.06610321998596191,
|
|
"learning_rate": 2.8360768516601745e-07,
|
|
"loss": 0.0065,
|
|
"step": 9840
|
|
},
|
|
{
|
|
"epoch": 0.46362213619194426,
|
|
"grad_norm": 0.09837215393781662,
|
|
"learning_rate": 2.764431646911947e-07,
|
|
"loss": 0.0084,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 0.46409281856371276,
|
|
"grad_norm": 0.15495586395263672,
|
|
"learning_rate": 2.693690326511533e-07,
|
|
"loss": 0.009,
|
|
"step": 9860
|
|
},
|
|
{
|
|
"epoch": 0.4645635009354812,
|
|
"grad_norm": 0.07781887799501419,
|
|
"learning_rate": 2.6238535480575533e-07,
|
|
"loss": 0.0078,
|
|
"step": 9870
|
|
},
|
|
{
|
|
"epoch": 0.46503418330724966,
|
|
"grad_norm": 0.06881334632635117,
|
|
"learning_rate": 2.55492196074012e-07,
|
|
"loss": 0.0081,
|
|
"step": 9880
|
|
},
|
|
{
|
|
"epoch": 0.46550486567901816,
|
|
"grad_norm": 0.12513375282287598,
|
|
"learning_rate": 2.4868962053348764e-07,
|
|
"loss": 0.0086,
|
|
"step": 9890
|
|
},
|
|
{
|
|
"epoch": 0.4659755480507866,
|
|
"grad_norm": 0.10875881463289261,
|
|
"learning_rate": 2.419776914196981e-07,
|
|
"loss": 0.0092,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 0.4664462304225551,
|
|
"grad_norm": 0.10919349640607834,
|
|
"learning_rate": 2.3535647112553295e-07,
|
|
"loss": 0.0107,
|
|
"step": 9910
|
|
},
|
|
{
|
|
"epoch": 0.46691691279432357,
|
|
"grad_norm": 0.055101413279771805,
|
|
"learning_rate": 2.288260212006599e-07,
|
|
"loss": 0.0085,
|
|
"step": 9920
|
|
},
|
|
{
|
|
"epoch": 0.467387595166092,
|
|
"grad_norm": 0.10072072595357895,
|
|
"learning_rate": 2.2238640235097032e-07,
|
|
"loss": 0.0091,
|
|
"step": 9930
|
|
},
|
|
{
|
|
"epoch": 0.4678582775378605,
|
|
"grad_norm": 0.09013131260871887,
|
|
"learning_rate": 2.1603767443799994e-07,
|
|
"loss": 0.0074,
|
|
"step": 9940
|
|
},
|
|
{
|
|
"epoch": 0.468328959909629,
|
|
"grad_norm": 0.10675112158060074,
|
|
"learning_rate": 2.097798964783826e-07,
|
|
"loss": 0.0097,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 0.4687996422813975,
|
|
"grad_norm": 0.13907890021800995,
|
|
"learning_rate": 2.0361312664329502e-07,
|
|
"loss": 0.0078,
|
|
"step": 9960
|
|
},
|
|
{
|
|
"epoch": 0.4692703246531659,
|
|
"grad_norm": 0.09058912843465805,
|
|
"learning_rate": 1.975374222579207e-07,
|
|
"loss": 0.0098,
|
|
"step": 9970
|
|
},
|
|
{
|
|
"epoch": 0.4697410070249344,
|
|
"grad_norm": 0.14657960832118988,
|
|
"learning_rate": 1.9155283980091366e-07,
|
|
"loss": 0.0086,
|
|
"step": 9980
|
|
},
|
|
{
|
|
"epoch": 0.4702116893967029,
|
|
"grad_norm": 0.06072373688220978,
|
|
"learning_rate": 1.8565943490387761e-07,
|
|
"loss": 0.009,
|
|
"step": 9990
|
|
},
|
|
{
|
|
"epoch": 0.47068237176847133,
|
|
"grad_norm": 0.07640701532363892,
|
|
"learning_rate": 1.79857262350841e-07,
|
|
"loss": 0.0074,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 0.47115305414023984,
|
|
"grad_norm": 0.10978458821773529,
|
|
"learning_rate": 1.741463760777584e-07,
|
|
"loss": 0.0079,
|
|
"step": 10010
|
|
},
|
|
{
|
|
"epoch": 0.4716237365120083,
|
|
"grad_norm": 0.09151839464902878,
|
|
"learning_rate": 1.685268291719999e-07,
|
|
"loss": 0.0076,
|
|
"step": 10020
|
|
},
|
|
{
|
|
"epoch": 0.47209441888377673,
|
|
"grad_norm": 0.09422069042921066,
|
|
"learning_rate": 1.6299867387186363e-07,
|
|
"loss": 0.0105,
|
|
"step": 10030
|
|
},
|
|
{
|
|
"epoch": 0.47256510125554524,
|
|
"grad_norm": 0.07369952648878098,
|
|
"learning_rate": 1.5756196156608393e-07,
|
|
"loss": 0.0098,
|
|
"step": 10040
|
|
},
|
|
{
|
|
"epoch": 0.4730357836273137,
|
|
"grad_norm": 0.05266701802611351,
|
|
"learning_rate": 1.5221674279336408e-07,
|
|
"loss": 0.0079,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 0.4735064659990822,
|
|
"grad_norm": 0.1843479424715042,
|
|
"learning_rate": 1.4696306724189312e-07,
|
|
"loss": 0.0116,
|
|
"step": 10060
|
|
},
|
|
{
|
|
"epoch": 0.47397714837085064,
|
|
"grad_norm": 0.08956597000360489,
|
|
"learning_rate": 1.4180098374889429e-07,
|
|
"loss": 0.0093,
|
|
"step": 10070
|
|
},
|
|
{
|
|
"epoch": 0.4744478307426191,
|
|
"grad_norm": 0.12094996124505997,
|
|
"learning_rate": 1.367305403001673e-07,
|
|
"loss": 0.0084,
|
|
"step": 10080
|
|
},
|
|
{
|
|
"epoch": 0.4749185131143876,
|
|
"grad_norm": 0.13796210289001465,
|
|
"learning_rate": 1.3175178402964116e-07,
|
|
"loss": 0.0093,
|
|
"step": 10090
|
|
},
|
|
{
|
|
"epoch": 0.47538919548615605,
|
|
"grad_norm": 0.11424495279788971,
|
|
"learning_rate": 1.2686476121894e-07,
|
|
"loss": 0.009,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 0.47585987785792455,
|
|
"grad_norm": 0.08337216824293137,
|
|
"learning_rate": 1.220695172969477e-07,
|
|
"loss": 0.0104,
|
|
"step": 10110
|
|
},
|
|
{
|
|
"epoch": 0.476330560229693,
|
|
"grad_norm": 0.07916202396154404,
|
|
"learning_rate": 1.1736609683938749e-07,
|
|
"loss": 0.0075,
|
|
"step": 10120
|
|
},
|
|
{
|
|
"epoch": 0.47680124260146145,
|
|
"grad_norm": 0.09398314356803894,
|
|
"learning_rate": 1.1275454356841298e-07,
|
|
"loss": 0.0089,
|
|
"step": 10130
|
|
},
|
|
{
|
|
"epoch": 0.47727192497322996,
|
|
"grad_norm": 0.14049002528190613,
|
|
"learning_rate": 1.0823490035218986e-07,
|
|
"loss": 0.0089,
|
|
"step": 10140
|
|
},
|
|
{
|
|
"epoch": 0.4777426073449984,
|
|
"grad_norm": 0.10012887418270111,
|
|
"learning_rate": 1.038072092045117e-07,
|
|
"loss": 0.007,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 0.4782132897167669,
|
|
"grad_norm": 0.0714501217007637,
|
|
"learning_rate": 9.947151128439692e-08,
|
|
"loss": 0.0095,
|
|
"step": 10160
|
|
},
|
|
{
|
|
"epoch": 0.47868397208853536,
|
|
"grad_norm": 0.10559763014316559,
|
|
"learning_rate": 9.52278468957124e-08,
|
|
"loss": 0.0102,
|
|
"step": 10170
|
|
},
|
|
{
|
|
"epoch": 0.4791546544603038,
|
|
"grad_norm": 0.06704830378293991,
|
|
"learning_rate": 9.107625548679944e-08,
|
|
"loss": 0.0065,
|
|
"step": 10180
|
|
},
|
|
{
|
|
"epoch": 0.4796253368320723,
|
|
"grad_norm": 0.10070990025997162,
|
|
"learning_rate": 8.701677565010725e-08,
|
|
"loss": 0.0081,
|
|
"step": 10190
|
|
},
|
|
{
|
|
"epoch": 0.48009601920384076,
|
|
"grad_norm": 0.117221400141716,
|
|
"learning_rate": 8.304944512182666e-08,
|
|
"loss": 0.0072,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 0.4805667015756092,
|
|
"grad_norm": 0.08964571356773376,
|
|
"learning_rate": 7.91743007815493e-08,
|
|
"loss": 0.0094,
|
|
"step": 10210
|
|
},
|
|
{
|
|
"epoch": 0.4810373839473777,
|
|
"grad_norm": 0.08869532495737076,
|
|
"learning_rate": 7.539137865192003e-08,
|
|
"loss": 0.0078,
|
|
"step": 10220
|
|
},
|
|
{
|
|
"epoch": 0.48150806631914617,
|
|
"grad_norm": 0.11361581832170486,
|
|
"learning_rate": 7.17007138983028e-08,
|
|
"loss": 0.0089,
|
|
"step": 10230
|
|
},
|
|
{
|
|
"epoch": 0.48197874869091467,
|
|
"grad_norm": 0.09199584275484085,
|
|
"learning_rate": 6.810234082845313e-08,
|
|
"loss": 0.0077,
|
|
"step": 10240
|
|
},
|
|
{
|
|
"epoch": 0.4824494310626831,
|
|
"grad_norm": 0.09256649017333984,
|
|
"learning_rate": 6.459629289219838e-08,
|
|
"loss": 0.0094,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 0.48292011343445157,
|
|
"grad_norm": 0.06944099813699722,
|
|
"learning_rate": 6.118260268112908e-08,
|
|
"loss": 0.0092,
|
|
"step": 10260
|
|
},
|
|
{
|
|
"epoch": 0.4833907958062201,
|
|
"grad_norm": 0.07624297589063644,
|
|
"learning_rate": 5.786130192829809e-08,
|
|
"loss": 0.0084,
|
|
"step": 10270
|
|
},
|
|
{
|
|
"epoch": 0.4838614781779885,
|
|
"grad_norm": 0.11081594228744507,
|
|
"learning_rate": 5.4632421507916366e-08,
|
|
"loss": 0.0084,
|
|
"step": 10280
|
|
},
|
|
{
|
|
"epoch": 0.48433216054975703,
|
|
"grad_norm": 0.11816196143627167,
|
|
"learning_rate": 5.1495991435076555e-08,
|
|
"loss": 0.0079,
|
|
"step": 10290
|
|
},
|
|
{
|
|
"epoch": 0.4848028429215255,
|
|
"grad_norm": 0.09288829565048218,
|
|
"learning_rate": 4.845204086546984e-08,
|
|
"loss": 0.0068,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 0.48527352529329393,
|
|
"grad_norm": 0.1271565854549408,
|
|
"learning_rate": 4.5500598095110645e-08,
|
|
"loss": 0.0098,
|
|
"step": 10310
|
|
},
|
|
{
|
|
"epoch": 0.48574420766506243,
|
|
"grad_norm": 0.09441140294075012,
|
|
"learning_rate": 4.264169056008016e-08,
|
|
"loss": 0.0089,
|
|
"step": 10320
|
|
},
|
|
{
|
|
"epoch": 0.4862148900368309,
|
|
"grad_norm": 0.07992501556873322,
|
|
"learning_rate": 3.987534483626987e-08,
|
|
"loss": 0.0072,
|
|
"step": 10330
|
|
},
|
|
{
|
|
"epoch": 0.4866855724085994,
|
|
"grad_norm": 0.1038915291428566,
|
|
"learning_rate": 3.720158663913065e-08,
|
|
"loss": 0.0091,
|
|
"step": 10340
|
|
},
|
|
{
|
|
"epoch": 0.48715625478036784,
|
|
"grad_norm": 0.0911439061164856,
|
|
"learning_rate": 3.4620440823438517e-08,
|
|
"loss": 0.0095,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 0.4876269371521363,
|
|
"grad_norm": 0.11160367727279663,
|
|
"learning_rate": 3.2131931383059256e-08,
|
|
"loss": 0.0086,
|
|
"step": 10360
|
|
},
|
|
{
|
|
"epoch": 0.4880976195239048,
|
|
"grad_norm": 0.14317543804645538,
|
|
"learning_rate": 2.9736081450730813e-08,
|
|
"loss": 0.0096,
|
|
"step": 10370
|
|
},
|
|
{
|
|
"epoch": 0.48856830189567324,
|
|
"grad_norm": 0.05576762929558754,
|
|
"learning_rate": 2.7432913297841256e-08,
|
|
"loss": 0.0082,
|
|
"step": 10380
|
|
},
|
|
{
|
|
"epoch": 0.48903898426744175,
|
|
"grad_norm": 0.08932645618915558,
|
|
"learning_rate": 2.5222448334227822e-08,
|
|
"loss": 0.0125,
|
|
"step": 10390
|
|
},
|
|
{
|
|
"epoch": 0.4895096666392102,
|
|
"grad_norm": 0.09650373458862305,
|
|
"learning_rate": 2.3104707107974857e-08,
|
|
"loss": 0.0073,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 0.48998034901097864,
|
|
"grad_norm": 0.12321902066469193,
|
|
"learning_rate": 2.1079709305226183e-08,
|
|
"loss": 0.0077,
|
|
"step": 10410
|
|
},
|
|
{
|
|
"epoch": 0.49045103138274715,
|
|
"grad_norm": 0.08745528757572174,
|
|
"learning_rate": 1.914747374999304e-08,
|
|
"loss": 0.0095,
|
|
"step": 10420
|
|
},
|
|
{
|
|
"epoch": 0.4909217137545156,
|
|
"grad_norm": 0.08539978414773941,
|
|
"learning_rate": 1.7308018403991988e-08,
|
|
"loss": 0.0074,
|
|
"step": 10430
|
|
},
|
|
{
|
|
"epoch": 0.4913923961262841,
|
|
"grad_norm": 0.10824183374643326,
|
|
"learning_rate": 1.556136036646838e-08,
|
|
"loss": 0.007,
|
|
"step": 10440
|
|
},
|
|
{
|
|
"epoch": 0.49186307849805255,
|
|
"grad_norm": 0.09461843222379684,
|
|
"learning_rate": 1.3907515874042044e-08,
|
|
"loss": 0.0068,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 0.492333760869821,
|
|
"grad_norm": 0.08664172887802124,
|
|
"learning_rate": 1.2346500300551844e-08,
|
|
"loss": 0.0077,
|
|
"step": 10460
|
|
},
|
|
{
|
|
"epoch": 0.4928044432415895,
|
|
"grad_norm": 0.09471101313829422,
|
|
"learning_rate": 1.0878328156919139e-08,
|
|
"loss": 0.0094,
|
|
"step": 10470
|
|
},
|
|
{
|
|
"epoch": 0.49327512561335796,
|
|
"grad_norm": 0.14020918309688568,
|
|
"learning_rate": 9.503013091006763e-09,
|
|
"loss": 0.0102,
|
|
"step": 10480
|
|
},
|
|
{
|
|
"epoch": 0.49374580798512646,
|
|
"grad_norm": 0.10110370069742203,
|
|
"learning_rate": 8.220567887498033e-09,
|
|
"loss": 0.0098,
|
|
"step": 10490
|
|
},
|
|
{
|
|
"epoch": 0.4942164903568949,
|
|
"grad_norm": 0.10459288954734802,
|
|
"learning_rate": 7.031004467771274e-09,
|
|
"loss": 0.0079,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 0.49468717272866336,
|
|
"grad_norm": 0.0781732052564621,
|
|
"learning_rate": 5.934333889794364e-09,
|
|
"loss": 0.0081,
|
|
"step": 10510
|
|
},
|
|
{
|
|
"epoch": 0.49515785510043187,
|
|
"grad_norm": 0.07988249510526657,
|
|
"learning_rate": 4.93056634801925e-09,
|
|
"loss": 0.0071,
|
|
"step": 10520
|
|
},
|
|
{
|
|
"epoch": 0.4956285374722003,
|
|
"grad_norm": 0.1206192746758461,
|
|
"learning_rate": 4.019711173289809e-09,
|
|
"loss": 0.0084,
|
|
"step": 10530
|
|
},
|
|
{
|
|
"epoch": 0.4960992198439688,
|
|
"grad_norm": 0.13311167061328888,
|
|
"learning_rate": 3.201776832749692e-09,
|
|
"loss": 0.0102,
|
|
"step": 10540
|
|
},
|
|
{
|
|
"epoch": 0.49656990221573727,
|
|
"grad_norm": 0.11637212336063385,
|
|
"learning_rate": 2.4767709297712772e-09,
|
|
"loss": 0.0092,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 0.4970405845875057,
|
|
"grad_norm": 0.10831353813409805,
|
|
"learning_rate": 1.8447002038779471e-09,
|
|
"loss": 0.0094,
|
|
"step": 10560
|
|
},
|
|
{
|
|
"epoch": 0.4975112669592742,
|
|
"grad_norm": 0.07090216130018234,
|
|
"learning_rate": 1.305570530686362e-09,
|
|
"loss": 0.0073,
|
|
"step": 10570
|
|
},
|
|
{
|
|
"epoch": 0.4979819493310427,
|
|
"grad_norm": 0.11900214105844498,
|
|
"learning_rate": 8.593869218487261e-10,
|
|
"loss": 0.0118,
|
|
"step": 10580
|
|
},
|
|
{
|
|
"epoch": 0.4984526317028112,
|
|
"grad_norm": 0.0904439315199852,
|
|
"learning_rate": 5.061535250061589e-10,
|
|
"loss": 0.0086,
|
|
"step": 10590
|
|
},
|
|
{
|
|
"epoch": 0.4989233140745796,
|
|
"grad_norm": 0.08377506583929062,
|
|
"learning_rate": 2.45873623754278e-10,
|
|
"loss": 0.0072,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 0.4993939964463481,
|
|
"grad_norm": 0.06532546132802963,
|
|
"learning_rate": 7.85496376076722e-11,
|
|
"loss": 0.007,
|
|
"step": 10610
|
|
},
|
|
{
|
|
"epoch": 0.4998646788181166,
|
|
"grad_norm": 0.05805087089538574,
|
|
"learning_rate": 4.1831219799171044e-12,
|
|
"loss": 0.0084,
|
|
"step": 10620
|
|
},
|
|
{
|
|
"epoch": 0.5000058835296471,
|
|
"step": 10623,
|
|
"total_flos": 1.1590600616501576e+19,
|
|
"train_loss": 0.014902605037183441,
|
|
"train_runtime": 113108.7382,
|
|
"train_samples_per_second": 0.751,
|
|
"train_steps_per_second": 0.094
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 10623,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.1590600616501576e+19,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|