5550 lines
135 KiB
JSON
5550 lines
135 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 15767,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.001268472125325046,
|
|
"grad_norm": 0.8813716769218445,
|
|
"learning_rate": 4.993657639373375e-05,
|
|
"loss": 0.8575,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.002536944250650092,
|
|
"grad_norm": 0.941986083984375,
|
|
"learning_rate": 4.98731527874675e-05,
|
|
"loss": 0.8299,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.003805416375975138,
|
|
"grad_norm": 0.8295786380767822,
|
|
"learning_rate": 4.980972918120125e-05,
|
|
"loss": 0.8491,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.005073888501300184,
|
|
"grad_norm": 0.7794184684753418,
|
|
"learning_rate": 4.9749476755248304e-05,
|
|
"loss": 0.8294,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.00634236062662523,
|
|
"grad_norm": 0.8349530100822449,
|
|
"learning_rate": 4.9686053148982053e-05,
|
|
"loss": 0.8443,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.007610832751950276,
|
|
"grad_norm": 0.8651715517044067,
|
|
"learning_rate": 4.96226295427158e-05,
|
|
"loss": 0.817,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.008879304877275321,
|
|
"grad_norm": 0.8325722217559814,
|
|
"learning_rate": 4.955920593644955e-05,
|
|
"loss": 0.7915,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.010147777002600368,
|
|
"grad_norm": 0.8690646886825562,
|
|
"learning_rate": 4.94957823301833e-05,
|
|
"loss": 0.8673,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.011416249127925413,
|
|
"grad_norm": 0.8512411117553711,
|
|
"learning_rate": 4.9432358723917043e-05,
|
|
"loss": 0.8621,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.01268472125325046,
|
|
"grad_norm": 0.8262362480163574,
|
|
"learning_rate": 4.936893511765079e-05,
|
|
"loss": 0.8387,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.013953193378575505,
|
|
"grad_norm": 0.8982943892478943,
|
|
"learning_rate": 4.930551151138454e-05,
|
|
"loss": 0.8199,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.015221665503900552,
|
|
"grad_norm": 1.024151086807251,
|
|
"learning_rate": 4.92452590854316e-05,
|
|
"loss": 0.8289,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.016490137629225597,
|
|
"grad_norm": 0.8547594547271729,
|
|
"learning_rate": 4.918183547916535e-05,
|
|
"loss": 0.8457,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.017758609754550642,
|
|
"grad_norm": 0.8753955364227295,
|
|
"learning_rate": 4.9118411872899095e-05,
|
|
"loss": 0.8213,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.01902708187987569,
|
|
"grad_norm": 0.8638611435890198,
|
|
"learning_rate": 4.9054988266632844e-05,
|
|
"loss": 0.8542,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.020295554005200736,
|
|
"grad_norm": 0.9179531931877136,
|
|
"learning_rate": 4.899156466036659e-05,
|
|
"loss": 0.8362,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.02156402613052578,
|
|
"grad_norm": 0.9708409905433655,
|
|
"learning_rate": 4.892814105410034e-05,
|
|
"loss": 0.8485,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.022832498255850826,
|
|
"grad_norm": 0.9580267667770386,
|
|
"learning_rate": 4.8864717447834085e-05,
|
|
"loss": 0.8234,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.024100970381175875,
|
|
"grad_norm": 0.8894557356834412,
|
|
"learning_rate": 4.8801293841567834e-05,
|
|
"loss": 0.8227,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.02536944250650092,
|
|
"grad_norm": 0.8640332221984863,
|
|
"learning_rate": 4.873787023530158e-05,
|
|
"loss": 0.8695,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.026637914631825965,
|
|
"grad_norm": 0.9173359870910645,
|
|
"learning_rate": 4.867444662903533e-05,
|
|
"loss": 0.8427,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.02790638675715101,
|
|
"grad_norm": 0.8366764187812805,
|
|
"learning_rate": 4.8611023022769075e-05,
|
|
"loss": 0.8248,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.02917485888247606,
|
|
"grad_norm": 0.801325798034668,
|
|
"learning_rate": 4.8547599416502824e-05,
|
|
"loss": 0.8649,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.030443331007801104,
|
|
"grad_norm": 1.0194541215896606,
|
|
"learning_rate": 4.848417581023657e-05,
|
|
"loss": 0.7695,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.03171180313312615,
|
|
"grad_norm": 0.8728939294815063,
|
|
"learning_rate": 4.842075220397032e-05,
|
|
"loss": 0.8076,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.032980275258451194,
|
|
"grad_norm": 0.9340566992759705,
|
|
"learning_rate": 4.8357328597704065e-05,
|
|
"loss": 0.8078,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.03424874738377624,
|
|
"grad_norm": 0.8570923209190369,
|
|
"learning_rate": 4.8293904991437814e-05,
|
|
"loss": 0.85,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.035517219509101285,
|
|
"grad_norm": 0.7447881698608398,
|
|
"learning_rate": 4.823048138517156e-05,
|
|
"loss": 0.864,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.03678569163442633,
|
|
"grad_norm": 0.9067574143409729,
|
|
"learning_rate": 4.816705777890531e-05,
|
|
"loss": 0.8536,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.03805416375975138,
|
|
"grad_norm": 1.009699821472168,
|
|
"learning_rate": 4.8103634172639055e-05,
|
|
"loss": 0.8023,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.039322635885076423,
|
|
"grad_norm": 0.9121712446212769,
|
|
"learning_rate": 4.8040210566372804e-05,
|
|
"loss": 0.8643,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.04059110801040147,
|
|
"grad_norm": 0.8111468553543091,
|
|
"learning_rate": 4.797678696010656e-05,
|
|
"loss": 0.8364,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.04185958013572652,
|
|
"grad_norm": 0.8392479419708252,
|
|
"learning_rate": 4.79133633538403e-05,
|
|
"loss": 0.8694,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.04312805226105156,
|
|
"grad_norm": 1.0781140327453613,
|
|
"learning_rate": 4.784993974757405e-05,
|
|
"loss": 0.8613,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.04439652438637661,
|
|
"grad_norm": 1.0560787916183472,
|
|
"learning_rate": 4.7786516141307794e-05,
|
|
"loss": 0.8348,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.04566499651170165,
|
|
"grad_norm": 0.9052237868309021,
|
|
"learning_rate": 4.772309253504155e-05,
|
|
"loss": 0.8389,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.0469334686370267,
|
|
"grad_norm": 0.698861300945282,
|
|
"learning_rate": 4.765966892877529e-05,
|
|
"loss": 0.8556,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.04820194076235175,
|
|
"grad_norm": 0.8764225840568542,
|
|
"learning_rate": 4.759624532250904e-05,
|
|
"loss": 0.8294,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.04947041288767679,
|
|
"grad_norm": 0.9750523567199707,
|
|
"learning_rate": 4.7532821716242784e-05,
|
|
"loss": 0.8109,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.05073888501300184,
|
|
"grad_norm": 0.931287407875061,
|
|
"learning_rate": 4.746939810997654e-05,
|
|
"loss": 0.8256,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.05200735713832689,
|
|
"grad_norm": 0.8329412341117859,
|
|
"learning_rate": 4.740597450371028e-05,
|
|
"loss": 0.8394,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.05327582926365193,
|
|
"grad_norm": 0.8462470173835754,
|
|
"learning_rate": 4.734255089744403e-05,
|
|
"loss": 0.8445,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.05454430138897698,
|
|
"grad_norm": 0.8757215142250061,
|
|
"learning_rate": 4.7279127291177774e-05,
|
|
"loss": 0.8467,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.05581277351430202,
|
|
"grad_norm": 0.8820921182632446,
|
|
"learning_rate": 4.721570368491153e-05,
|
|
"loss": 0.8415,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.05708124563962707,
|
|
"grad_norm": 0.9306124448776245,
|
|
"learning_rate": 4.715228007864527e-05,
|
|
"loss": 0.8488,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.05834971776495212,
|
|
"grad_norm": 0.9606575965881348,
|
|
"learning_rate": 4.708885647237902e-05,
|
|
"loss": 0.8495,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.05961818989027716,
|
|
"grad_norm": 0.7876096963882446,
|
|
"learning_rate": 4.702543286611277e-05,
|
|
"loss": 0.8364,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.06088666201560221,
|
|
"grad_norm": 1.05940842628479,
|
|
"learning_rate": 4.696200925984652e-05,
|
|
"loss": 0.856,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.06215513414092725,
|
|
"grad_norm": 0.8594347238540649,
|
|
"learning_rate": 4.689858565358026e-05,
|
|
"loss": 0.8114,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.0634236062662523,
|
|
"grad_norm": 0.8395501971244812,
|
|
"learning_rate": 4.683516204731401e-05,
|
|
"loss": 0.8645,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.06469207839157734,
|
|
"grad_norm": 0.9333510994911194,
|
|
"learning_rate": 4.677173844104776e-05,
|
|
"loss": 0.8196,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.06596055051690239,
|
|
"grad_norm": 0.8195106983184814,
|
|
"learning_rate": 4.670831483478151e-05,
|
|
"loss": 0.8586,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.06722902264222744,
|
|
"grad_norm": 0.9601035118103027,
|
|
"learning_rate": 4.664489122851525e-05,
|
|
"loss": 0.8884,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.06849749476755249,
|
|
"grad_norm": 1.006823182106018,
|
|
"learning_rate": 4.6581467622249e-05,
|
|
"loss": 0.8301,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.06976596689287753,
|
|
"grad_norm": 0.9135039448738098,
|
|
"learning_rate": 4.651804401598275e-05,
|
|
"loss": 0.8267,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.07103443901820257,
|
|
"grad_norm": 0.929247260093689,
|
|
"learning_rate": 4.64546204097165e-05,
|
|
"loss": 0.8569,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.07230291114352762,
|
|
"grad_norm": 0.8837388753890991,
|
|
"learning_rate": 4.639119680345025e-05,
|
|
"loss": 0.8236,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.07357138326885267,
|
|
"grad_norm": 0.9246069192886353,
|
|
"learning_rate": 4.63309443774973e-05,
|
|
"loss": 0.8736,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.07483985539417772,
|
|
"grad_norm": 0.803130567073822,
|
|
"learning_rate": 4.626752077123105e-05,
|
|
"loss": 0.841,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.07610832751950276,
|
|
"grad_norm": 0.8640844225883484,
|
|
"learning_rate": 4.62040971649648e-05,
|
|
"loss": 0.8171,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.0773767996448278,
|
|
"grad_norm": 0.7979689240455627,
|
|
"learning_rate": 4.614067355869855e-05,
|
|
"loss": 0.8282,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.07864527177015285,
|
|
"grad_norm": 0.9452027678489685,
|
|
"learning_rate": 4.60772499524323e-05,
|
|
"loss": 0.8724,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.0799137438954779,
|
|
"grad_norm": 0.8859258890151978,
|
|
"learning_rate": 4.601382634616604e-05,
|
|
"loss": 0.8825,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.08118221602080294,
|
|
"grad_norm": 0.7669050693511963,
|
|
"learning_rate": 4.595040273989979e-05,
|
|
"loss": 0.7727,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.08245068814612799,
|
|
"grad_norm": 0.9593196511268616,
|
|
"learning_rate": 4.588697913363354e-05,
|
|
"loss": 0.8217,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.08371916027145304,
|
|
"grad_norm": 0.9165802597999573,
|
|
"learning_rate": 4.582355552736729e-05,
|
|
"loss": 0.8288,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.08498763239677808,
|
|
"grad_norm": 0.8536924123764038,
|
|
"learning_rate": 4.576013192110103e-05,
|
|
"loss": 0.85,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.08625610452210312,
|
|
"grad_norm": 1.2818777561187744,
|
|
"learning_rate": 4.569670831483479e-05,
|
|
"loss": 0.8595,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.08752457664742817,
|
|
"grad_norm": 0.8300599455833435,
|
|
"learning_rate": 4.563328470856853e-05,
|
|
"loss": 0.8843,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.08879304877275322,
|
|
"grad_norm": 0.8837544322013855,
|
|
"learning_rate": 4.556986110230228e-05,
|
|
"loss": 0.869,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.09006152089807827,
|
|
"grad_norm": 0.8470463156700134,
|
|
"learning_rate": 4.550643749603602e-05,
|
|
"loss": 0.8748,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.0913299930234033,
|
|
"grad_norm": 0.9581688046455383,
|
|
"learning_rate": 4.544301388976978e-05,
|
|
"loss": 0.8545,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.09259846514872835,
|
|
"grad_norm": 0.9453362822532654,
|
|
"learning_rate": 4.537959028350352e-05,
|
|
"loss": 0.8134,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.0938669372740534,
|
|
"grad_norm": 1.0060471296310425,
|
|
"learning_rate": 4.531616667723727e-05,
|
|
"loss": 0.8136,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.09513540939937845,
|
|
"grad_norm": 1.0802257061004639,
|
|
"learning_rate": 4.525274307097101e-05,
|
|
"loss": 0.8548,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.0964038815247035,
|
|
"grad_norm": 0.8368780612945557,
|
|
"learning_rate": 4.518931946470477e-05,
|
|
"loss": 0.8709,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.09767235365002853,
|
|
"grad_norm": 0.8320122361183167,
|
|
"learning_rate": 4.512589585843851e-05,
|
|
"loss": 0.8181,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.09894082577535358,
|
|
"grad_norm": 0.8374115824699402,
|
|
"learning_rate": 4.506247225217226e-05,
|
|
"loss": 0.8549,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.10020929790067863,
|
|
"grad_norm": 0.7658076882362366,
|
|
"learning_rate": 4.4999048645906e-05,
|
|
"loss": 0.8453,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.10147777002600368,
|
|
"grad_norm": 0.8223689198493958,
|
|
"learning_rate": 4.493562503963976e-05,
|
|
"loss": 0.9502,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.10274624215132873,
|
|
"grad_norm": 0.8949286341667175,
|
|
"learning_rate": 4.487220143337351e-05,
|
|
"loss": 0.8565,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.10401471427665378,
|
|
"grad_norm": 0.9013976454734802,
|
|
"learning_rate": 4.480877782710725e-05,
|
|
"loss": 0.8313,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.10528318640197881,
|
|
"grad_norm": 0.7802047729492188,
|
|
"learning_rate": 4.4745354220841e-05,
|
|
"loss": 0.8598,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.10655165852730386,
|
|
"grad_norm": 1.0458945035934448,
|
|
"learning_rate": 4.468193061457475e-05,
|
|
"loss": 0.8358,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.10782013065262891,
|
|
"grad_norm": 0.8241732716560364,
|
|
"learning_rate": 4.46185070083085e-05,
|
|
"loss": 0.8473,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.10908860277795396,
|
|
"grad_norm": 0.794696569442749,
|
|
"learning_rate": 4.455508340204224e-05,
|
|
"loss": 0.8557,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.110357074903279,
|
|
"grad_norm": 0.9596499800682068,
|
|
"learning_rate": 4.449165979577599e-05,
|
|
"loss": 0.8727,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.11162554702860404,
|
|
"grad_norm": 0.789816677570343,
|
|
"learning_rate": 4.442823618950974e-05,
|
|
"loss": 0.8475,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.11289401915392909,
|
|
"grad_norm": 0.8604167103767395,
|
|
"learning_rate": 4.436481258324349e-05,
|
|
"loss": 0.8426,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.11416249127925414,
|
|
"grad_norm": 0.7762212157249451,
|
|
"learning_rate": 4.430138897697723e-05,
|
|
"loss": 0.8532,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.11543096340457919,
|
|
"grad_norm": 0.7988696694374084,
|
|
"learning_rate": 4.423796537071098e-05,
|
|
"loss": 0.8398,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.11669943552990424,
|
|
"grad_norm": 0.7536550164222717,
|
|
"learning_rate": 4.417454176444473e-05,
|
|
"loss": 0.8669,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.11796790765522927,
|
|
"grad_norm": 0.8086602687835693,
|
|
"learning_rate": 4.411111815817848e-05,
|
|
"loss": 0.858,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.11923637978055432,
|
|
"grad_norm": 0.8665058612823486,
|
|
"learning_rate": 4.404769455191222e-05,
|
|
"loss": 0.8445,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.12050485190587937,
|
|
"grad_norm": 0.9934934973716736,
|
|
"learning_rate": 4.398427094564597e-05,
|
|
"loss": 0.8548,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.12177332403120442,
|
|
"grad_norm": 1.0823296308517456,
|
|
"learning_rate": 4.392084733937972e-05,
|
|
"loss": 0.8598,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.12304179615652946,
|
|
"grad_norm": 0.767232358455658,
|
|
"learning_rate": 4.385742373311347e-05,
|
|
"loss": 0.8375,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.1243102682818545,
|
|
"grad_norm": 0.9011818170547485,
|
|
"learning_rate": 4.379400012684721e-05,
|
|
"loss": 0.8322,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.12557874040717956,
|
|
"grad_norm": 0.7959470152854919,
|
|
"learning_rate": 4.373057652058096e-05,
|
|
"loss": 0.8742,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.1268472125325046,
|
|
"grad_norm": 0.7924261093139648,
|
|
"learning_rate": 4.366715291431471e-05,
|
|
"loss": 0.854,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.12811568465782963,
|
|
"grad_norm": 0.7666494250297546,
|
|
"learning_rate": 4.360372930804846e-05,
|
|
"loss": 0.8068,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.12938415678315468,
|
|
"grad_norm": 0.7881539463996887,
|
|
"learning_rate": 4.354030570178221e-05,
|
|
"loss": 0.8483,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.13065262890847973,
|
|
"grad_norm": 0.8898656964302063,
|
|
"learning_rate": 4.3476882095515956e-05,
|
|
"loss": 0.8818,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.13192110103380478,
|
|
"grad_norm": 0.8294678330421448,
|
|
"learning_rate": 4.3413458489249705e-05,
|
|
"loss": 0.8118,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.13318957315912983,
|
|
"grad_norm": 0.867928147315979,
|
|
"learning_rate": 4.335003488298345e-05,
|
|
"loss": 0.8602,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.13445804528445487,
|
|
"grad_norm": 0.8367668390274048,
|
|
"learning_rate": 4.32866112767172e-05,
|
|
"loss": 0.8859,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.13572651740977992,
|
|
"grad_norm": 0.8341355919837952,
|
|
"learning_rate": 4.3223187670450946e-05,
|
|
"loss": 0.8484,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.13699498953510497,
|
|
"grad_norm": 0.8836013674736023,
|
|
"learning_rate": 4.3159764064184695e-05,
|
|
"loss": 0.851,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.13826346166043002,
|
|
"grad_norm": 0.7626996040344238,
|
|
"learning_rate": 4.309634045791844e-05,
|
|
"loss": 0.8308,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.13953193378575507,
|
|
"grad_norm": 0.926589846611023,
|
|
"learning_rate": 4.303291685165219e-05,
|
|
"loss": 0.8699,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.14080040591108012,
|
|
"grad_norm": 0.79881751537323,
|
|
"learning_rate": 4.2969493245385936e-05,
|
|
"loss": 0.8215,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.14206887803640514,
|
|
"grad_norm": 0.96977299451828,
|
|
"learning_rate": 4.2906069639119685e-05,
|
|
"loss": 0.8218,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.1433373501617302,
|
|
"grad_norm": 0.786342442035675,
|
|
"learning_rate": 4.284264603285343e-05,
|
|
"loss": 0.8128,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.14460582228705524,
|
|
"grad_norm": 0.8864126205444336,
|
|
"learning_rate": 4.277922242658718e-05,
|
|
"loss": 0.8931,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.14587429441238028,
|
|
"grad_norm": 0.9522060751914978,
|
|
"learning_rate": 4.2715798820320926e-05,
|
|
"loss": 0.8592,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.14714276653770533,
|
|
"grad_norm": 0.7771050333976746,
|
|
"learning_rate": 4.2652375214054675e-05,
|
|
"loss": 0.8473,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.14841123866303038,
|
|
"grad_norm": 0.9344542026519775,
|
|
"learning_rate": 4.258895160778842e-05,
|
|
"loss": 0.8611,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.14967971078835543,
|
|
"grad_norm": 0.8358260989189148,
|
|
"learning_rate": 4.252552800152217e-05,
|
|
"loss": 0.8689,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.15094818291368048,
|
|
"grad_norm": 0.9203112125396729,
|
|
"learning_rate": 4.2462104395255916e-05,
|
|
"loss": 0.8701,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.15221665503900553,
|
|
"grad_norm": 0.87474125623703,
|
|
"learning_rate": 4.2398680788989665e-05,
|
|
"loss": 0.8441,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.15348512716433058,
|
|
"grad_norm": 0.838341474533081,
|
|
"learning_rate": 4.233525718272341e-05,
|
|
"loss": 0.8402,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.1547535992896556,
|
|
"grad_norm": 0.9312511682510376,
|
|
"learning_rate": 4.227183357645716e-05,
|
|
"loss": 0.8594,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.15602207141498065,
|
|
"grad_norm": 0.8382570147514343,
|
|
"learning_rate": 4.220840997019091e-05,
|
|
"loss": 0.8629,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.1572905435403057,
|
|
"grad_norm": 0.7457637190818787,
|
|
"learning_rate": 4.2144986363924655e-05,
|
|
"loss": 0.8518,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.15855901566563074,
|
|
"grad_norm": 0.9622685313224792,
|
|
"learning_rate": 4.2081562757658405e-05,
|
|
"loss": 0.8555,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.1598274877909558,
|
|
"grad_norm": 0.8078732490539551,
|
|
"learning_rate": 4.201813915139215e-05,
|
|
"loss": 0.8499,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.16109595991628084,
|
|
"grad_norm": 0.9720255136489868,
|
|
"learning_rate": 4.19547155451259e-05,
|
|
"loss": 0.8273,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.1623644320416059,
|
|
"grad_norm": 0.86973637342453,
|
|
"learning_rate": 4.1891291938859645e-05,
|
|
"loss": 0.8212,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.16363290416693094,
|
|
"grad_norm": 0.7209818363189697,
|
|
"learning_rate": 4.1827868332593395e-05,
|
|
"loss": 0.8207,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.16490137629225599,
|
|
"grad_norm": 0.8860530853271484,
|
|
"learning_rate": 4.176444472632714e-05,
|
|
"loss": 0.8513,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.16616984841758103,
|
|
"grad_norm": 0.8513688445091248,
|
|
"learning_rate": 4.170102112006089e-05,
|
|
"loss": 0.8581,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.16743832054290608,
|
|
"grad_norm": 0.805892825126648,
|
|
"learning_rate": 4.1637597513794635e-05,
|
|
"loss": 0.8173,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.1687067926682311,
|
|
"grad_norm": 0.7897241115570068,
|
|
"learning_rate": 4.1574173907528385e-05,
|
|
"loss": 0.8358,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.16997526479355615,
|
|
"grad_norm": 0.8170486092567444,
|
|
"learning_rate": 4.151075030126213e-05,
|
|
"loss": 0.8259,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.1712437369188812,
|
|
"grad_norm": 0.993430495262146,
|
|
"learning_rate": 4.144732669499588e-05,
|
|
"loss": 0.8485,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.17251220904420625,
|
|
"grad_norm": 0.8168458342552185,
|
|
"learning_rate": 4.1383903088729625e-05,
|
|
"loss": 0.8446,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.1737806811695313,
|
|
"grad_norm": 0.9205940365791321,
|
|
"learning_rate": 4.1320479482463375e-05,
|
|
"loss": 0.8119,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.17504915329485635,
|
|
"grad_norm": 0.7091718912124634,
|
|
"learning_rate": 4.1257055876197124e-05,
|
|
"loss": 0.8672,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.1763176254201814,
|
|
"grad_norm": 0.7693396210670471,
|
|
"learning_rate": 4.119363226993087e-05,
|
|
"loss": 0.8285,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.17758609754550644,
|
|
"grad_norm": 1.0046180486679077,
|
|
"learning_rate": 4.1130208663664615e-05,
|
|
"loss": 0.8405,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.1788545696708315,
|
|
"grad_norm": 0.8736918568611145,
|
|
"learning_rate": 4.1066785057398365e-05,
|
|
"loss": 0.838,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.18012304179615654,
|
|
"grad_norm": 0.9117953777313232,
|
|
"learning_rate": 4.1003361451132114e-05,
|
|
"loss": 0.866,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.18139151392148156,
|
|
"grad_norm": 0.7822251319885254,
|
|
"learning_rate": 4.093993784486586e-05,
|
|
"loss": 0.9023,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.1826599860468066,
|
|
"grad_norm": 0.8296000361442566,
|
|
"learning_rate": 4.087651423859961e-05,
|
|
"loss": 0.8215,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.18392845817213166,
|
|
"grad_norm": 0.814051628112793,
|
|
"learning_rate": 4.0813090632333355e-05,
|
|
"loss": 0.8362,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.1851969302974567,
|
|
"grad_norm": 0.8576933145523071,
|
|
"learning_rate": 4.0749667026067104e-05,
|
|
"loss": 0.8584,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.18646540242278176,
|
|
"grad_norm": 1.1711878776550293,
|
|
"learning_rate": 4.068624341980085e-05,
|
|
"loss": 0.8411,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.1877338745481068,
|
|
"grad_norm": 0.8704431056976318,
|
|
"learning_rate": 4.06228198135346e-05,
|
|
"loss": 0.8548,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.18900234667343185,
|
|
"grad_norm": 0.8817090392112732,
|
|
"learning_rate": 4.0559396207268345e-05,
|
|
"loss": 0.8284,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.1902708187987569,
|
|
"grad_norm": 0.8044705390930176,
|
|
"learning_rate": 4.0495972601002094e-05,
|
|
"loss": 0.8242,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.19153929092408195,
|
|
"grad_norm": 0.7911235690116882,
|
|
"learning_rate": 4.043254899473584e-05,
|
|
"loss": 0.8239,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.192807763049407,
|
|
"grad_norm": 0.8657433390617371,
|
|
"learning_rate": 4.036912538846959e-05,
|
|
"loss": 0.8538,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.19407623517473205,
|
|
"grad_norm": 0.7675552368164062,
|
|
"learning_rate": 4.0305701782203335e-05,
|
|
"loss": 0.8036,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.19534470730005707,
|
|
"grad_norm": 0.8326247930526733,
|
|
"learning_rate": 4.0242278175937084e-05,
|
|
"loss": 0.8808,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.19661317942538212,
|
|
"grad_norm": 0.9370586276054382,
|
|
"learning_rate": 4.017885456967083e-05,
|
|
"loss": 0.8826,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.19788165155070717,
|
|
"grad_norm": 1.0956268310546875,
|
|
"learning_rate": 4.011543096340458e-05,
|
|
"loss": 0.8272,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.19915012367603221,
|
|
"grad_norm": 0.8846516609191895,
|
|
"learning_rate": 4.0052007357138325e-05,
|
|
"loss": 0.7996,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.20041859580135726,
|
|
"grad_norm": 0.8572809100151062,
|
|
"learning_rate": 3.998858375087208e-05,
|
|
"loss": 0.8098,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.2016870679266823,
|
|
"grad_norm": 0.9393579959869385,
|
|
"learning_rate": 3.992516014460582e-05,
|
|
"loss": 0.8076,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.20295554005200736,
|
|
"grad_norm": 0.7605802416801453,
|
|
"learning_rate": 3.986173653833957e-05,
|
|
"loss": 0.8284,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.2042240121773324,
|
|
"grad_norm": 0.6856004595756531,
|
|
"learning_rate": 3.9798312932073315e-05,
|
|
"loss": 0.8277,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.20549248430265746,
|
|
"grad_norm": 0.889754593372345,
|
|
"learning_rate": 3.973488932580707e-05,
|
|
"loss": 0.863,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.2067609564279825,
|
|
"grad_norm": 0.6881526112556458,
|
|
"learning_rate": 3.967146571954081e-05,
|
|
"loss": 0.8966,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.20802942855330755,
|
|
"grad_norm": 0.9273505806922913,
|
|
"learning_rate": 3.960804211327456e-05,
|
|
"loss": 0.867,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.20929790067863258,
|
|
"grad_norm": 0.9193258285522461,
|
|
"learning_rate": 3.954461850700831e-05,
|
|
"loss": 0.8131,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.21056637280395762,
|
|
"grad_norm": 0.9435883164405823,
|
|
"learning_rate": 3.948119490074206e-05,
|
|
"loss": 0.8066,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.21183484492928267,
|
|
"grad_norm": 0.8440527319908142,
|
|
"learning_rate": 3.941777129447581e-05,
|
|
"loss": 0.8195,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.21310331705460772,
|
|
"grad_norm": 0.7809598445892334,
|
|
"learning_rate": 3.935434768820955e-05,
|
|
"loss": 0.8503,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.21437178917993277,
|
|
"grad_norm": 0.7411904335021973,
|
|
"learning_rate": 3.92909240819433e-05,
|
|
"loss": 0.8055,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.21564026130525782,
|
|
"grad_norm": 0.9117131233215332,
|
|
"learning_rate": 3.922750047567705e-05,
|
|
"loss": 0.8789,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.21690873343058287,
|
|
"grad_norm": 0.816663920879364,
|
|
"learning_rate": 3.91640768694108e-05,
|
|
"loss": 0.8311,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.21817720555590792,
|
|
"grad_norm": 0.7956539988517761,
|
|
"learning_rate": 3.910065326314454e-05,
|
|
"loss": 0.8475,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.21944567768123296,
|
|
"grad_norm": 0.858045756816864,
|
|
"learning_rate": 3.903722965687829e-05,
|
|
"loss": 0.8234,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.220714149806558,
|
|
"grad_norm": 0.8653853535652161,
|
|
"learning_rate": 3.897380605061204e-05,
|
|
"loss": 0.8363,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.22198262193188303,
|
|
"grad_norm": 0.8181082606315613,
|
|
"learning_rate": 3.891038244434579e-05,
|
|
"loss": 0.8224,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.22325109405720808,
|
|
"grad_norm": 0.8543662428855896,
|
|
"learning_rate": 3.884695883807953e-05,
|
|
"loss": 0.8572,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.22451956618253313,
|
|
"grad_norm": 0.8624857068061829,
|
|
"learning_rate": 3.878353523181328e-05,
|
|
"loss": 0.822,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.22578803830785818,
|
|
"grad_norm": 0.8224851489067078,
|
|
"learning_rate": 3.872011162554703e-05,
|
|
"loss": 0.8651,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.22705651043318323,
|
|
"grad_norm": 0.7411419749259949,
|
|
"learning_rate": 3.865668801928078e-05,
|
|
"loss": 0.8125,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.22832498255850828,
|
|
"grad_norm": 0.7959814667701721,
|
|
"learning_rate": 3.859326441301452e-05,
|
|
"loss": 0.8178,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.22959345468383333,
|
|
"grad_norm": 0.8626113533973694,
|
|
"learning_rate": 3.852984080674827e-05,
|
|
"loss": 0.8471,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.23086192680915837,
|
|
"grad_norm": 0.8034262657165527,
|
|
"learning_rate": 3.846641720048202e-05,
|
|
"loss": 0.8552,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.23213039893448342,
|
|
"grad_norm": 0.7086619734764099,
|
|
"learning_rate": 3.840299359421577e-05,
|
|
"loss": 0.8164,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.23339887105980847,
|
|
"grad_norm": 0.8688384890556335,
|
|
"learning_rate": 3.833956998794951e-05,
|
|
"loss": 0.8591,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.23466734318513352,
|
|
"grad_norm": 0.8770636916160583,
|
|
"learning_rate": 3.827614638168326e-05,
|
|
"loss": 0.835,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.23593581531045854,
|
|
"grad_norm": 0.6980867981910706,
|
|
"learning_rate": 3.821272277541702e-05,
|
|
"loss": 0.8467,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.2372042874357836,
|
|
"grad_norm": 0.7267412543296814,
|
|
"learning_rate": 3.814929916915076e-05,
|
|
"loss": 0.8238,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.23847275956110864,
|
|
"grad_norm": 0.7703206539154053,
|
|
"learning_rate": 3.808587556288451e-05,
|
|
"loss": 0.8233,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.2397412316864337,
|
|
"grad_norm": 0.8153693079948425,
|
|
"learning_rate": 3.802245195661825e-05,
|
|
"loss": 0.8361,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.24100970381175874,
|
|
"grad_norm": 0.7741461992263794,
|
|
"learning_rate": 3.795902835035201e-05,
|
|
"loss": 0.9083,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.24227817593708378,
|
|
"grad_norm": 0.8532843589782715,
|
|
"learning_rate": 3.789560474408575e-05,
|
|
"loss": 0.8692,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.24354664806240883,
|
|
"grad_norm": 0.8939579725265503,
|
|
"learning_rate": 3.78321811378195e-05,
|
|
"loss": 0.8336,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.24481512018773388,
|
|
"grad_norm": 0.8242042660713196,
|
|
"learning_rate": 3.776875753155325e-05,
|
|
"loss": 0.7705,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.24608359231305893,
|
|
"grad_norm": 0.9386515021324158,
|
|
"learning_rate": 3.7705333925287e-05,
|
|
"loss": 0.8021,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.24735206443838398,
|
|
"grad_norm": 0.9312074184417725,
|
|
"learning_rate": 3.764191031902074e-05,
|
|
"loss": 0.8113,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.248620536563709,
|
|
"grad_norm": 0.8644290566444397,
|
|
"learning_rate": 3.757848671275449e-05,
|
|
"loss": 0.8336,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.24988900868903405,
|
|
"grad_norm": 0.7512555122375488,
|
|
"learning_rate": 3.751506310648824e-05,
|
|
"loss": 0.832,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.2511574808143591,
|
|
"grad_norm": 0.8139438629150391,
|
|
"learning_rate": 3.745163950022199e-05,
|
|
"loss": 0.8583,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.2524259529396842,
|
|
"grad_norm": 0.800618588924408,
|
|
"learning_rate": 3.738821589395573e-05,
|
|
"loss": 0.8822,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.2536944250650092,
|
|
"grad_norm": 0.8025221228599548,
|
|
"learning_rate": 3.732479228768948e-05,
|
|
"loss": 0.8326,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.25496289719033427,
|
|
"grad_norm": 0.9008484482765198,
|
|
"learning_rate": 3.726136868142323e-05,
|
|
"loss": 0.8686,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.25623136931565926,
|
|
"grad_norm": 0.8681203126907349,
|
|
"learning_rate": 3.719794507515698e-05,
|
|
"loss": 0.8329,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.2574998414409843,
|
|
"grad_norm": 0.7322081327438354,
|
|
"learning_rate": 3.713452146889072e-05,
|
|
"loss": 0.8128,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.25876831356630936,
|
|
"grad_norm": 0.8935046792030334,
|
|
"learning_rate": 3.707109786262447e-05,
|
|
"loss": 0.8317,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.2600367856916344,
|
|
"grad_norm": 0.8813959360122681,
|
|
"learning_rate": 3.700767425635822e-05,
|
|
"loss": 0.855,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.26130525781695946,
|
|
"grad_norm": 0.7657369375228882,
|
|
"learning_rate": 3.694425065009197e-05,
|
|
"loss": 0.7865,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.2625737299422845,
|
|
"grad_norm": 0.7684091329574585,
|
|
"learning_rate": 3.688082704382572e-05,
|
|
"loss": 0.8039,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.26384220206760955,
|
|
"grad_norm": 0.7692010998725891,
|
|
"learning_rate": 3.681740343755946e-05,
|
|
"loss": 0.841,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.2651106741929346,
|
|
"grad_norm": 0.8434869050979614,
|
|
"learning_rate": 3.675397983129321e-05,
|
|
"loss": 0.831,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.26637914631825965,
|
|
"grad_norm": 0.7663161158561707,
|
|
"learning_rate": 3.669055622502696e-05,
|
|
"loss": 0.7749,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.2676476184435847,
|
|
"grad_norm": 0.8157145380973816,
|
|
"learning_rate": 3.662713261876071e-05,
|
|
"loss": 0.8067,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.26891609056890975,
|
|
"grad_norm": 0.8220170140266418,
|
|
"learning_rate": 3.656370901249445e-05,
|
|
"loss": 0.7872,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.2701845626942348,
|
|
"grad_norm": 0.9565273523330688,
|
|
"learning_rate": 3.6500285406228205e-05,
|
|
"loss": 0.8294,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.27145303481955985,
|
|
"grad_norm": 0.775265634059906,
|
|
"learning_rate": 3.643686179996195e-05,
|
|
"loss": 0.8129,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.2727215069448849,
|
|
"grad_norm": 0.9544994235038757,
|
|
"learning_rate": 3.63734381936957e-05,
|
|
"loss": 0.8212,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.27398997907020994,
|
|
"grad_norm": 1.041999340057373,
|
|
"learning_rate": 3.631001458742944e-05,
|
|
"loss": 0.8178,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.275258451195535,
|
|
"grad_norm": 0.8267019987106323,
|
|
"learning_rate": 3.6246590981163195e-05,
|
|
"loss": 0.8306,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.27652692332086004,
|
|
"grad_norm": 0.849429190158844,
|
|
"learning_rate": 3.618316737489694e-05,
|
|
"loss": 0.8179,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.2777953954461851,
|
|
"grad_norm": 0.8038565516471863,
|
|
"learning_rate": 3.611974376863069e-05,
|
|
"loss": 0.82,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.27906386757151014,
|
|
"grad_norm": 0.7065672874450684,
|
|
"learning_rate": 3.605632016236443e-05,
|
|
"loss": 0.8313,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.2803323396968352,
|
|
"grad_norm": 0.8574305772781372,
|
|
"learning_rate": 3.5992896556098185e-05,
|
|
"loss": 0.8529,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.28160081182216024,
|
|
"grad_norm": 0.8780136108398438,
|
|
"learning_rate": 3.592947294983193e-05,
|
|
"loss": 0.8181,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.28286928394748523,
|
|
"grad_norm": 0.8531150817871094,
|
|
"learning_rate": 3.586604934356568e-05,
|
|
"loss": 0.8044,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.2841377560728103,
|
|
"grad_norm": 0.807064414024353,
|
|
"learning_rate": 3.580262573729942e-05,
|
|
"loss": 0.8388,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.2854062281981353,
|
|
"grad_norm": 0.8407668471336365,
|
|
"learning_rate": 3.5739202131033175e-05,
|
|
"loss": 0.8579,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.2866747003234604,
|
|
"grad_norm": 0.9750702977180481,
|
|
"learning_rate": 3.567577852476692e-05,
|
|
"loss": 0.8606,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.2879431724487854,
|
|
"grad_norm": 0.8305932283401489,
|
|
"learning_rate": 3.561235491850067e-05,
|
|
"loss": 0.8312,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.28921164457411047,
|
|
"grad_norm": 0.7954509854316711,
|
|
"learning_rate": 3.5548931312234416e-05,
|
|
"loss": 0.8504,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.2904801166994355,
|
|
"grad_norm": 0.8049436211585999,
|
|
"learning_rate": 3.5485507705968165e-05,
|
|
"loss": 0.887,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.29174858882476057,
|
|
"grad_norm": 0.7284989953041077,
|
|
"learning_rate": 3.5422084099701914e-05,
|
|
"loss": 0.811,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.2930170609500856,
|
|
"grad_norm": 0.8093234300613403,
|
|
"learning_rate": 3.535866049343566e-05,
|
|
"loss": 0.8339,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.29428553307541067,
|
|
"grad_norm": 0.7123083472251892,
|
|
"learning_rate": 3.5295236887169406e-05,
|
|
"loss": 0.8251,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.2955540052007357,
|
|
"grad_norm": 0.8694972395896912,
|
|
"learning_rate": 3.5231813280903155e-05,
|
|
"loss": 0.8166,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.29682247732606076,
|
|
"grad_norm": 0.8641796708106995,
|
|
"learning_rate": 3.5168389674636904e-05,
|
|
"loss": 0.811,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.2980909494513858,
|
|
"grad_norm": 0.8516160249710083,
|
|
"learning_rate": 3.510496606837065e-05,
|
|
"loss": 0.8965,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.29935942157671086,
|
|
"grad_norm": 0.866295337677002,
|
|
"learning_rate": 3.5041542462104396e-05,
|
|
"loss": 0.8301,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.3006278937020359,
|
|
"grad_norm": 0.8344794511795044,
|
|
"learning_rate": 3.4978118855838145e-05,
|
|
"loss": 0.8512,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.30189636582736096,
|
|
"grad_norm": 0.8196772933006287,
|
|
"learning_rate": 3.4914695249571894e-05,
|
|
"loss": 0.8471,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.303164837952686,
|
|
"grad_norm": 0.66361004114151,
|
|
"learning_rate": 3.485127164330564e-05,
|
|
"loss": 0.8528,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.30443331007801105,
|
|
"grad_norm": 0.8254550099372864,
|
|
"learning_rate": 3.4787848037039386e-05,
|
|
"loss": 0.8133,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.3057017822033361,
|
|
"grad_norm": 0.7867299914360046,
|
|
"learning_rate": 3.4724424430773135e-05,
|
|
"loss": 0.8183,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.30697025432866115,
|
|
"grad_norm": 1.0029250383377075,
|
|
"learning_rate": 3.4661000824506884e-05,
|
|
"loss": 0.8192,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.3082387264539862,
|
|
"grad_norm": 0.8651610612869263,
|
|
"learning_rate": 3.459757721824063e-05,
|
|
"loss": 0.8755,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.3095071985793112,
|
|
"grad_norm": 0.8182082772254944,
|
|
"learning_rate": 3.4534153611974376e-05,
|
|
"loss": 0.8661,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.31077567070463624,
|
|
"grad_norm": 1.0455305576324463,
|
|
"learning_rate": 3.4470730005708125e-05,
|
|
"loss": 0.8134,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.3120441428299613,
|
|
"grad_norm": 0.7838363647460938,
|
|
"learning_rate": 3.4407306399441874e-05,
|
|
"loss": 0.8531,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.31331261495528634,
|
|
"grad_norm": 0.712868332862854,
|
|
"learning_rate": 3.434388279317562e-05,
|
|
"loss": 0.8109,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.3145810870806114,
|
|
"grad_norm": 0.8088375329971313,
|
|
"learning_rate": 3.428045918690937e-05,
|
|
"loss": 0.8717,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.31584955920593644,
|
|
"grad_norm": 0.8567003607749939,
|
|
"learning_rate": 3.421703558064312e-05,
|
|
"loss": 0.7997,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.3171180313312615,
|
|
"grad_norm": 0.7826852202415466,
|
|
"learning_rate": 3.4153611974376864e-05,
|
|
"loss": 0.8071,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.31838650345658653,
|
|
"grad_norm": 0.7257752418518066,
|
|
"learning_rate": 3.4090188368110613e-05,
|
|
"loss": 0.8053,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.3196549755819116,
|
|
"grad_norm": 0.9757511019706726,
|
|
"learning_rate": 3.402676476184436e-05,
|
|
"loss": 0.8195,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.32092344770723663,
|
|
"grad_norm": 0.9173797369003296,
|
|
"learning_rate": 3.396334115557811e-05,
|
|
"loss": 0.8662,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.3221919198325617,
|
|
"grad_norm": 0.8483538627624512,
|
|
"learning_rate": 3.3899917549311854e-05,
|
|
"loss": 0.8339,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.32346039195788673,
|
|
"grad_norm": 1.0109443664550781,
|
|
"learning_rate": 3.3836493943045604e-05,
|
|
"loss": 0.8731,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.3247288640832118,
|
|
"grad_norm": 0.8521518707275391,
|
|
"learning_rate": 3.377307033677935e-05,
|
|
"loss": 0.8367,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.3259973362085368,
|
|
"grad_norm": 0.8792763352394104,
|
|
"learning_rate": 3.37096467305131e-05,
|
|
"loss": 0.857,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.3272658083338619,
|
|
"grad_norm": 0.7366037368774414,
|
|
"learning_rate": 3.3646223124246844e-05,
|
|
"loss": 0.8556,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.3285342804591869,
|
|
"grad_norm": 0.9895220994949341,
|
|
"learning_rate": 3.3582799517980594e-05,
|
|
"loss": 0.885,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.32980275258451197,
|
|
"grad_norm": 0.8205326199531555,
|
|
"learning_rate": 3.351937591171434e-05,
|
|
"loss": 0.7703,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.331071224709837,
|
|
"grad_norm": 0.7155152559280396,
|
|
"learning_rate": 3.345595230544809e-05,
|
|
"loss": 0.8479,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.33233969683516207,
|
|
"grad_norm": 0.7578288912773132,
|
|
"learning_rate": 3.3392528699181834e-05,
|
|
"loss": 0.8019,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.3336081689604871,
|
|
"grad_norm": 0.8876450657844543,
|
|
"learning_rate": 3.3329105092915584e-05,
|
|
"loss": 0.8644,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.33487664108581217,
|
|
"grad_norm": 0.8280789256095886,
|
|
"learning_rate": 3.326568148664933e-05,
|
|
"loss": 0.8358,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 0.33614511321113716,
|
|
"grad_norm": 0.8563920855522156,
|
|
"learning_rate": 3.320225788038308e-05,
|
|
"loss": 0.8908,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.3374135853364622,
|
|
"grad_norm": 0.8536137342453003,
|
|
"learning_rate": 3.3138834274116824e-05,
|
|
"loss": 0.8188,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 0.33868205746178726,
|
|
"grad_norm": 0.909870982170105,
|
|
"learning_rate": 3.3075410667850574e-05,
|
|
"loss": 0.8175,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 0.3399505295871123,
|
|
"grad_norm": 0.7365782260894775,
|
|
"learning_rate": 3.301198706158432e-05,
|
|
"loss": 0.8463,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 0.34121900171243735,
|
|
"grad_norm": 0.7502683997154236,
|
|
"learning_rate": 3.294856345531807e-05,
|
|
"loss": 0.7959,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 0.3424874738377624,
|
|
"grad_norm": 0.6690531373023987,
|
|
"learning_rate": 3.288513984905182e-05,
|
|
"loss": 0.8266,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.34375594596308745,
|
|
"grad_norm": 0.9006823301315308,
|
|
"learning_rate": 3.2821716242785564e-05,
|
|
"loss": 0.8397,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 0.3450244180884125,
|
|
"grad_norm": 0.826954185962677,
|
|
"learning_rate": 3.275829263651932e-05,
|
|
"loss": 0.8022,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 0.34629289021373755,
|
|
"grad_norm": 0.8345193266868591,
|
|
"learning_rate": 3.269486903025306e-05,
|
|
"loss": 0.8148,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 0.3475613623390626,
|
|
"grad_norm": 0.7929914593696594,
|
|
"learning_rate": 3.263144542398681e-05,
|
|
"loss": 0.8073,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 0.34882983446438764,
|
|
"grad_norm": 0.9218955636024475,
|
|
"learning_rate": 3.2568021817720554e-05,
|
|
"loss": 0.8576,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.3500983065897127,
|
|
"grad_norm": 0.8164006471633911,
|
|
"learning_rate": 3.250459821145431e-05,
|
|
"loss": 0.8434,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 0.35136677871503774,
|
|
"grad_norm": 0.7511663436889648,
|
|
"learning_rate": 3.244117460518805e-05,
|
|
"loss": 0.7878,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 0.3526352508403628,
|
|
"grad_norm": 0.8193197846412659,
|
|
"learning_rate": 3.23777509989218e-05,
|
|
"loss": 0.8608,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 0.35390372296568784,
|
|
"grad_norm": 0.9112285375595093,
|
|
"learning_rate": 3.2314327392655544e-05,
|
|
"loss": 0.8404,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 0.3551721950910129,
|
|
"grad_norm": 0.75201416015625,
|
|
"learning_rate": 3.22509037863893e-05,
|
|
"loss": 0.813,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.35644066721633794,
|
|
"grad_norm": 0.8154107332229614,
|
|
"learning_rate": 3.218748018012304e-05,
|
|
"loss": 0.8083,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 0.357709139341663,
|
|
"grad_norm": 0.7872757315635681,
|
|
"learning_rate": 3.212405657385679e-05,
|
|
"loss": 0.8358,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 0.35897761146698803,
|
|
"grad_norm": 0.8861322999000549,
|
|
"learning_rate": 3.206063296759054e-05,
|
|
"loss": 0.8328,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 0.3602460835923131,
|
|
"grad_norm": 0.7487745881080627,
|
|
"learning_rate": 3.199720936132429e-05,
|
|
"loss": 0.8626,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 0.36151455571763813,
|
|
"grad_norm": 0.9134072065353394,
|
|
"learning_rate": 3.193378575505803e-05,
|
|
"loss": 0.8187,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.3627830278429631,
|
|
"grad_norm": 0.8179683089256287,
|
|
"learning_rate": 3.187036214879178e-05,
|
|
"loss": 0.8223,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 0.3640514999682882,
|
|
"grad_norm": 0.9501960873603821,
|
|
"learning_rate": 3.180693854252553e-05,
|
|
"loss": 0.8064,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 0.3653199720936132,
|
|
"grad_norm": 0.8398934006690979,
|
|
"learning_rate": 3.174351493625928e-05,
|
|
"loss": 0.8279,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 0.36658844421893827,
|
|
"grad_norm": 0.7718421816825867,
|
|
"learning_rate": 3.168009132999302e-05,
|
|
"loss": 0.8453,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 0.3678569163442633,
|
|
"grad_norm": 0.7935000061988831,
|
|
"learning_rate": 3.161666772372677e-05,
|
|
"loss": 0.8292,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.36912538846958837,
|
|
"grad_norm": 0.8383910655975342,
|
|
"learning_rate": 3.155324411746052e-05,
|
|
"loss": 0.8116,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 0.3703938605949134,
|
|
"grad_norm": 0.7147135734558105,
|
|
"learning_rate": 3.148982051119427e-05,
|
|
"loss": 0.8146,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 0.37166233272023846,
|
|
"grad_norm": 0.792220950126648,
|
|
"learning_rate": 3.142639690492802e-05,
|
|
"loss": 0.8278,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 0.3729308048455635,
|
|
"grad_norm": 0.9010721445083618,
|
|
"learning_rate": 3.136297329866176e-05,
|
|
"loss": 0.8217,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 0.37419927697088856,
|
|
"grad_norm": 0.7762110233306885,
|
|
"learning_rate": 3.129954969239551e-05,
|
|
"loss": 0.7813,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.3754677490962136,
|
|
"grad_norm": 0.8827633261680603,
|
|
"learning_rate": 3.123612608612926e-05,
|
|
"loss": 0.8087,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 0.37673622122153866,
|
|
"grad_norm": 0.7577320337295532,
|
|
"learning_rate": 3.117270247986301e-05,
|
|
"loss": 0.8198,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 0.3780046933468637,
|
|
"grad_norm": 0.8777590990066528,
|
|
"learning_rate": 3.110927887359675e-05,
|
|
"loss": 0.8144,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 0.37927316547218876,
|
|
"grad_norm": 0.8621464967727661,
|
|
"learning_rate": 3.10458552673305e-05,
|
|
"loss": 0.837,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 0.3805416375975138,
|
|
"grad_norm": 0.842014491558075,
|
|
"learning_rate": 3.098243166106425e-05,
|
|
"loss": 0.7945,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.38181010972283885,
|
|
"grad_norm": 0.8439661860466003,
|
|
"learning_rate": 3.0919008054798e-05,
|
|
"loss": 0.7907,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 0.3830785818481639,
|
|
"grad_norm": 0.7277771830558777,
|
|
"learning_rate": 3.085558444853174e-05,
|
|
"loss": 0.7792,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 0.38434705397348895,
|
|
"grad_norm": 0.8808379173278809,
|
|
"learning_rate": 3.07921608422655e-05,
|
|
"loss": 0.8356,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 0.385615526098814,
|
|
"grad_norm": 0.7495381832122803,
|
|
"learning_rate": 3.072873723599924e-05,
|
|
"loss": 0.8252,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 0.38688399822413905,
|
|
"grad_norm": 0.8698447942733765,
|
|
"learning_rate": 3.066531362973299e-05,
|
|
"loss": 0.8633,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.3881524703494641,
|
|
"grad_norm": 0.8586138486862183,
|
|
"learning_rate": 3.060189002346673e-05,
|
|
"loss": 0.8542,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 0.3894209424747891,
|
|
"grad_norm": 0.8375261425971985,
|
|
"learning_rate": 3.053846641720049e-05,
|
|
"loss": 0.8353,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 0.39068941460011414,
|
|
"grad_norm": 0.723111093044281,
|
|
"learning_rate": 3.047504281093423e-05,
|
|
"loss": 0.8011,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 0.3919578867254392,
|
|
"grad_norm": 0.7700281143188477,
|
|
"learning_rate": 3.041161920466798e-05,
|
|
"loss": 0.8309,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 0.39322635885076423,
|
|
"grad_norm": 0.8734796643257141,
|
|
"learning_rate": 3.0348195598401725e-05,
|
|
"loss": 0.7967,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.3944948309760893,
|
|
"grad_norm": 0.8601865172386169,
|
|
"learning_rate": 3.0284771992135474e-05,
|
|
"loss": 0.819,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 0.39576330310141433,
|
|
"grad_norm": 0.8753730058670044,
|
|
"learning_rate": 3.022134838586922e-05,
|
|
"loss": 0.814,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 0.3970317752267394,
|
|
"grad_norm": 0.8561723232269287,
|
|
"learning_rate": 3.015792477960297e-05,
|
|
"loss": 0.8073,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 0.39830024735206443,
|
|
"grad_norm": 0.8156526684761047,
|
|
"learning_rate": 3.009450117333672e-05,
|
|
"loss": 0.8706,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 0.3995687194773895,
|
|
"grad_norm": 0.7783714532852173,
|
|
"learning_rate": 3.0031077567070464e-05,
|
|
"loss": 0.7929,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.4008371916027145,
|
|
"grad_norm": 0.7870660424232483,
|
|
"learning_rate": 2.9967653960804216e-05,
|
|
"loss": 0.8169,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 0.4021056637280396,
|
|
"grad_norm": 0.9100777506828308,
|
|
"learning_rate": 2.990423035453796e-05,
|
|
"loss": 0.8365,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 0.4033741358533646,
|
|
"grad_norm": 0.8847957849502563,
|
|
"learning_rate": 2.984080674827171e-05,
|
|
"loss": 0.8178,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 0.4046426079786897,
|
|
"grad_norm": 0.9754513502120972,
|
|
"learning_rate": 2.9777383142005454e-05,
|
|
"loss": 0.8457,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 0.4059110801040147,
|
|
"grad_norm": 0.8685561418533325,
|
|
"learning_rate": 2.9713959535739206e-05,
|
|
"loss": 0.823,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.40717955222933977,
|
|
"grad_norm": 0.8493009209632874,
|
|
"learning_rate": 2.965053592947295e-05,
|
|
"loss": 0.8221,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 0.4084480243546648,
|
|
"grad_norm": 0.954006016254425,
|
|
"learning_rate": 2.95871123232067e-05,
|
|
"loss": 0.811,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 0.40971649647998987,
|
|
"grad_norm": 11.963293075561523,
|
|
"learning_rate": 2.9523688716940444e-05,
|
|
"loss": 0.8032,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 0.4109849686053149,
|
|
"grad_norm": 0.9225347638130188,
|
|
"learning_rate": 2.9460265110674196e-05,
|
|
"loss": 0.8645,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 0.41225344073063996,
|
|
"grad_norm": 0.7878516316413879,
|
|
"learning_rate": 2.939684150440794e-05,
|
|
"loss": 0.8303,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.413521912855965,
|
|
"grad_norm": 0.815546989440918,
|
|
"learning_rate": 2.933341789814169e-05,
|
|
"loss": 0.7873,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 0.41479038498129006,
|
|
"grad_norm": 0.7072951197624207,
|
|
"learning_rate": 2.9269994291875437e-05,
|
|
"loss": 0.8231,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 0.4160588571066151,
|
|
"grad_norm": 0.7275887131690979,
|
|
"learning_rate": 2.9206570685609186e-05,
|
|
"loss": 0.8445,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 0.4173273292319401,
|
|
"grad_norm": 0.7888057827949524,
|
|
"learning_rate": 2.9143147079342932e-05,
|
|
"loss": 0.7819,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 0.41859580135726515,
|
|
"grad_norm": 0.8133323788642883,
|
|
"learning_rate": 2.907972347307668e-05,
|
|
"loss": 0.7918,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.4198642734825902,
|
|
"grad_norm": 0.9344608187675476,
|
|
"learning_rate": 2.9016299866810427e-05,
|
|
"loss": 0.805,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 0.42113274560791525,
|
|
"grad_norm": 0.9130797386169434,
|
|
"learning_rate": 2.8952876260544176e-05,
|
|
"loss": 0.808,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 0.4224012177332403,
|
|
"grad_norm": 0.8647783398628235,
|
|
"learning_rate": 2.8889452654277922e-05,
|
|
"loss": 0.8027,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 0.42366968985856535,
|
|
"grad_norm": 0.8025421500205994,
|
|
"learning_rate": 2.882602904801167e-05,
|
|
"loss": 0.811,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 0.4249381619838904,
|
|
"grad_norm": 0.7249786853790283,
|
|
"learning_rate": 2.876260544174542e-05,
|
|
"loss": 0.7597,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.42620663410921544,
|
|
"grad_norm": 0.7876623868942261,
|
|
"learning_rate": 2.8699181835479166e-05,
|
|
"loss": 0.8297,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 0.4274751062345405,
|
|
"grad_norm": 0.8284019827842712,
|
|
"learning_rate": 2.8635758229212916e-05,
|
|
"loss": 0.7985,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 0.42874357835986554,
|
|
"grad_norm": 0.7855024337768555,
|
|
"learning_rate": 2.857233462294666e-05,
|
|
"loss": 0.8414,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 0.4300120504851906,
|
|
"grad_norm": 0.789946436882019,
|
|
"learning_rate": 2.850891101668041e-05,
|
|
"loss": 0.8307,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 0.43128052261051564,
|
|
"grad_norm": 0.7722301483154297,
|
|
"learning_rate": 2.8445487410414156e-05,
|
|
"loss": 0.8101,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.4325489947358407,
|
|
"grad_norm": 0.8557891249656677,
|
|
"learning_rate": 2.8382063804147906e-05,
|
|
"loss": 0.846,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 0.43381746686116573,
|
|
"grad_norm": 0.8216169476509094,
|
|
"learning_rate": 2.831864019788165e-05,
|
|
"loss": 0.7966,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 0.4350859389864908,
|
|
"grad_norm": 0.87419593334198,
|
|
"learning_rate": 2.82552165916154e-05,
|
|
"loss": 0.7777,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 0.43635441111181583,
|
|
"grad_norm": 0.8956803679466248,
|
|
"learning_rate": 2.8191792985349146e-05,
|
|
"loss": 0.8066,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 0.4376228832371409,
|
|
"grad_norm": 0.8264901638031006,
|
|
"learning_rate": 2.8128369379082896e-05,
|
|
"loss": 0.8489,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.43889135536246593,
|
|
"grad_norm": 0.7960401773452759,
|
|
"learning_rate": 2.806494577281664e-05,
|
|
"loss": 0.8257,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 0.440159827487791,
|
|
"grad_norm": 0.7691190838813782,
|
|
"learning_rate": 2.8001522166550394e-05,
|
|
"loss": 0.8365,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 0.441428299613116,
|
|
"grad_norm": 0.7433714270591736,
|
|
"learning_rate": 2.7938098560284136e-05,
|
|
"loss": 0.7755,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 0.4426967717384411,
|
|
"grad_norm": 0.7270233035087585,
|
|
"learning_rate": 2.787467495401789e-05,
|
|
"loss": 0.808,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 0.44396524386376607,
|
|
"grad_norm": 0.7907856106758118,
|
|
"learning_rate": 2.781125134775163e-05,
|
|
"loss": 0.8038,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.4452337159890911,
|
|
"grad_norm": 0.7421363592147827,
|
|
"learning_rate": 2.7747827741485384e-05,
|
|
"loss": 0.8058,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 0.44650218811441617,
|
|
"grad_norm": 0.8635361790657043,
|
|
"learning_rate": 2.7684404135219126e-05,
|
|
"loss": 0.786,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 0.4477706602397412,
|
|
"grad_norm": 0.9545580744743347,
|
|
"learning_rate": 2.762098052895288e-05,
|
|
"loss": 0.8431,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 0.44903913236506626,
|
|
"grad_norm": 0.9529020190238953,
|
|
"learning_rate": 2.755755692268662e-05,
|
|
"loss": 0.8082,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 0.4503076044903913,
|
|
"grad_norm": 0.8344403505325317,
|
|
"learning_rate": 2.7494133316420374e-05,
|
|
"loss": 0.8031,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 0.45157607661571636,
|
|
"grad_norm": 0.8490266799926758,
|
|
"learning_rate": 2.7430709710154123e-05,
|
|
"loss": 0.7952,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 0.4528445487410414,
|
|
"grad_norm": 0.8201053738594055,
|
|
"learning_rate": 2.736728610388787e-05,
|
|
"loss": 0.8057,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 0.45411302086636646,
|
|
"grad_norm": 0.7206814289093018,
|
|
"learning_rate": 2.7303862497621618e-05,
|
|
"loss": 0.7907,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 0.4553814929916915,
|
|
"grad_norm": 0.7151837944984436,
|
|
"learning_rate": 2.7240438891355364e-05,
|
|
"loss": 0.7989,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 0.45664996511701655,
|
|
"grad_norm": 0.73557049036026,
|
|
"learning_rate": 2.7177015285089113e-05,
|
|
"loss": 0.7887,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.4579184372423416,
|
|
"grad_norm": 0.8012831807136536,
|
|
"learning_rate": 2.711359167882286e-05,
|
|
"loss": 0.7963,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 0.45918690936766665,
|
|
"grad_norm": 0.7544090747833252,
|
|
"learning_rate": 2.7050168072556608e-05,
|
|
"loss": 0.8021,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 0.4604553814929917,
|
|
"grad_norm": 0.8027962446212769,
|
|
"learning_rate": 2.6986744466290354e-05,
|
|
"loss": 0.8346,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 0.46172385361831675,
|
|
"grad_norm": 0.8969400525093079,
|
|
"learning_rate": 2.6923320860024103e-05,
|
|
"loss": 0.8362,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 0.4629923257436418,
|
|
"grad_norm": 0.8505738377571106,
|
|
"learning_rate": 2.685989725375785e-05,
|
|
"loss": 0.8253,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 0.46426079786896685,
|
|
"grad_norm": 0.8005324602127075,
|
|
"learning_rate": 2.6796473647491598e-05,
|
|
"loss": 0.8133,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 0.4655292699942919,
|
|
"grad_norm": 0.8265887498855591,
|
|
"learning_rate": 2.6733050041225344e-05,
|
|
"loss": 0.7974,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 0.46679774211961694,
|
|
"grad_norm": 0.8310080170631409,
|
|
"learning_rate": 2.6669626434959093e-05,
|
|
"loss": 0.8311,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 0.468066214244942,
|
|
"grad_norm": 0.8837007284164429,
|
|
"learning_rate": 2.660620282869284e-05,
|
|
"loss": 0.8086,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 0.46933468637026704,
|
|
"grad_norm": 0.8574205040931702,
|
|
"learning_rate": 2.6542779222426588e-05,
|
|
"loss": 0.8045,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 0.47060315849559203,
|
|
"grad_norm": 0.7924466729164124,
|
|
"learning_rate": 2.6479355616160334e-05,
|
|
"loss": 0.831,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 0.4718716306209171,
|
|
"grad_norm": 0.8812252283096313,
|
|
"learning_rate": 2.6415932009894083e-05,
|
|
"loss": 0.8414,
|
|
"step": 7440
|
|
},
|
|
{
|
|
"epoch": 0.47314010274624213,
|
|
"grad_norm": 0.8879112601280212,
|
|
"learning_rate": 2.635250840362783e-05,
|
|
"loss": 0.8037,
|
|
"step": 7460
|
|
},
|
|
{
|
|
"epoch": 0.4744085748715672,
|
|
"grad_norm": 0.8532351851463318,
|
|
"learning_rate": 2.6289084797361578e-05,
|
|
"loss": 0.7961,
|
|
"step": 7480
|
|
},
|
|
{
|
|
"epoch": 0.4756770469968922,
|
|
"grad_norm": 0.7800135016441345,
|
|
"learning_rate": 2.6225661191095324e-05,
|
|
"loss": 0.8158,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 0.4769455191222173,
|
|
"grad_norm": 0.8264251947402954,
|
|
"learning_rate": 2.6162237584829073e-05,
|
|
"loss": 0.7902,
|
|
"step": 7520
|
|
},
|
|
{
|
|
"epoch": 0.4782139912475423,
|
|
"grad_norm": 0.9216287136077881,
|
|
"learning_rate": 2.6098813978562826e-05,
|
|
"loss": 0.8387,
|
|
"step": 7540
|
|
},
|
|
{
|
|
"epoch": 0.4794824633728674,
|
|
"grad_norm": 0.8331848382949829,
|
|
"learning_rate": 2.6035390372296568e-05,
|
|
"loss": 0.8309,
|
|
"step": 7560
|
|
},
|
|
{
|
|
"epoch": 0.4807509354981924,
|
|
"grad_norm": 0.7791485786437988,
|
|
"learning_rate": 2.597196676603032e-05,
|
|
"loss": 0.7954,
|
|
"step": 7580
|
|
},
|
|
{
|
|
"epoch": 0.48201940762351747,
|
|
"grad_norm": 0.8223782777786255,
|
|
"learning_rate": 2.5908543159764063e-05,
|
|
"loss": 0.8012,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 0.4832878797488425,
|
|
"grad_norm": 0.7362112998962402,
|
|
"learning_rate": 2.5845119553497816e-05,
|
|
"loss": 0.8548,
|
|
"step": 7620
|
|
},
|
|
{
|
|
"epoch": 0.48455635187416757,
|
|
"grad_norm": 0.9084497094154358,
|
|
"learning_rate": 2.578169594723156e-05,
|
|
"loss": 0.7889,
|
|
"step": 7640
|
|
},
|
|
{
|
|
"epoch": 0.4858248239994926,
|
|
"grad_norm": 0.8268200755119324,
|
|
"learning_rate": 2.571827234096531e-05,
|
|
"loss": 0.778,
|
|
"step": 7660
|
|
},
|
|
{
|
|
"epoch": 0.48709329612481767,
|
|
"grad_norm": 0.683861494064331,
|
|
"learning_rate": 2.5654848734699057e-05,
|
|
"loss": 0.8079,
|
|
"step": 7680
|
|
},
|
|
{
|
|
"epoch": 0.4883617682501427,
|
|
"grad_norm": 0.9449877142906189,
|
|
"learning_rate": 2.5591425128432806e-05,
|
|
"loss": 0.7967,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 0.48963024037546776,
|
|
"grad_norm": 0.8445014357566833,
|
|
"learning_rate": 2.552800152216655e-05,
|
|
"loss": 0.7774,
|
|
"step": 7720
|
|
},
|
|
{
|
|
"epoch": 0.4908987125007928,
|
|
"grad_norm": 0.811717689037323,
|
|
"learning_rate": 2.54645779159003e-05,
|
|
"loss": 0.7659,
|
|
"step": 7740
|
|
},
|
|
{
|
|
"epoch": 0.49216718462611786,
|
|
"grad_norm": 0.7867732644081116,
|
|
"learning_rate": 2.5401154309634047e-05,
|
|
"loss": 0.8539,
|
|
"step": 7760
|
|
},
|
|
{
|
|
"epoch": 0.4934356567514429,
|
|
"grad_norm": 0.7808852791786194,
|
|
"learning_rate": 2.5337730703367796e-05,
|
|
"loss": 0.8294,
|
|
"step": 7780
|
|
},
|
|
{
|
|
"epoch": 0.49470412887676796,
|
|
"grad_norm": 0.8998913168907166,
|
|
"learning_rate": 2.527430709710154e-05,
|
|
"loss": 0.8507,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 0.495972601002093,
|
|
"grad_norm": 0.7990160584449768,
|
|
"learning_rate": 2.521088349083529e-05,
|
|
"loss": 0.7261,
|
|
"step": 7820
|
|
},
|
|
{
|
|
"epoch": 0.497241073127418,
|
|
"grad_norm": 0.7361629605293274,
|
|
"learning_rate": 2.5147459884569037e-05,
|
|
"loss": 0.8058,
|
|
"step": 7840
|
|
},
|
|
{
|
|
"epoch": 0.49850954525274305,
|
|
"grad_norm": 0.849870502948761,
|
|
"learning_rate": 2.5084036278302786e-05,
|
|
"loss": 0.7708,
|
|
"step": 7860
|
|
},
|
|
{
|
|
"epoch": 0.4997780173780681,
|
|
"grad_norm": 0.7667344808578491,
|
|
"learning_rate": 2.502061267203653e-05,
|
|
"loss": 0.818,
|
|
"step": 7880
|
|
},
|
|
{
|
|
"epoch": 0.5010464895033931,
|
|
"grad_norm": 0.800609827041626,
|
|
"learning_rate": 2.495718906577028e-05,
|
|
"loss": 0.8248,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 0.5023149616287182,
|
|
"grad_norm": 0.7429226040840149,
|
|
"learning_rate": 2.489376545950403e-05,
|
|
"loss": 0.8395,
|
|
"step": 7920
|
|
},
|
|
{
|
|
"epoch": 0.5035834337540432,
|
|
"grad_norm": 0.7970502972602844,
|
|
"learning_rate": 2.4830341853237776e-05,
|
|
"loss": 0.8182,
|
|
"step": 7940
|
|
},
|
|
{
|
|
"epoch": 0.5048519058793683,
|
|
"grad_norm": 1.7285821437835693,
|
|
"learning_rate": 2.4766918246971525e-05,
|
|
"loss": 0.8241,
|
|
"step": 7960
|
|
},
|
|
{
|
|
"epoch": 0.5061203780046933,
|
|
"grad_norm": 0.8314895629882812,
|
|
"learning_rate": 2.470349464070527e-05,
|
|
"loss": 0.8017,
|
|
"step": 7980
|
|
},
|
|
{
|
|
"epoch": 0.5073888501300184,
|
|
"grad_norm": 0.9516363143920898,
|
|
"learning_rate": 2.464007103443902e-05,
|
|
"loss": 0.8395,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.5086573222553434,
|
|
"grad_norm": 0.8164798617362976,
|
|
"learning_rate": 2.4576647428172766e-05,
|
|
"loss": 0.8033,
|
|
"step": 8020
|
|
},
|
|
{
|
|
"epoch": 0.5099257943806685,
|
|
"grad_norm": 0.7700650691986084,
|
|
"learning_rate": 2.4513223821906515e-05,
|
|
"loss": 0.779,
|
|
"step": 8040
|
|
},
|
|
{
|
|
"epoch": 0.5111942665059935,
|
|
"grad_norm": 0.8437737226486206,
|
|
"learning_rate": 2.444980021564026e-05,
|
|
"loss": 0.8112,
|
|
"step": 8060
|
|
},
|
|
{
|
|
"epoch": 0.5124627386313185,
|
|
"grad_norm": 0.8371322751045227,
|
|
"learning_rate": 2.438637660937401e-05,
|
|
"loss": 0.8152,
|
|
"step": 8080
|
|
},
|
|
{
|
|
"epoch": 0.5137312107566436,
|
|
"grad_norm": 0.8382763862609863,
|
|
"learning_rate": 2.4322953003107756e-05,
|
|
"loss": 0.8414,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 0.5149996828819686,
|
|
"grad_norm": 0.9525557160377502,
|
|
"learning_rate": 2.4259529396841505e-05,
|
|
"loss": 0.8091,
|
|
"step": 8120
|
|
},
|
|
{
|
|
"epoch": 0.5162681550072937,
|
|
"grad_norm": 0.7620564699172974,
|
|
"learning_rate": 2.419610579057525e-05,
|
|
"loss": 0.7981,
|
|
"step": 8140
|
|
},
|
|
{
|
|
"epoch": 0.5175366271326187,
|
|
"grad_norm": 0.8722305297851562,
|
|
"learning_rate": 2.4132682184309003e-05,
|
|
"loss": 0.8079,
|
|
"step": 8160
|
|
},
|
|
{
|
|
"epoch": 0.5188050992579438,
|
|
"grad_norm": 0.8774722218513489,
|
|
"learning_rate": 2.406925857804275e-05,
|
|
"loss": 0.7937,
|
|
"step": 8180
|
|
},
|
|
{
|
|
"epoch": 0.5200735713832688,
|
|
"grad_norm": 0.7515254616737366,
|
|
"learning_rate": 2.40058349717765e-05,
|
|
"loss": 0.8134,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 0.5213420435085939,
|
|
"grad_norm": 0.8385280966758728,
|
|
"learning_rate": 2.3942411365510244e-05,
|
|
"loss": 0.8064,
|
|
"step": 8220
|
|
},
|
|
{
|
|
"epoch": 0.5226105156339189,
|
|
"grad_norm": 0.8530700206756592,
|
|
"learning_rate": 2.3878987759243994e-05,
|
|
"loss": 0.8249,
|
|
"step": 8240
|
|
},
|
|
{
|
|
"epoch": 0.523878987759244,
|
|
"grad_norm": 0.7866977453231812,
|
|
"learning_rate": 2.381556415297774e-05,
|
|
"loss": 0.803,
|
|
"step": 8260
|
|
},
|
|
{
|
|
"epoch": 0.525147459884569,
|
|
"grad_norm": 0.8509036302566528,
|
|
"learning_rate": 2.375214054671149e-05,
|
|
"loss": 0.8307,
|
|
"step": 8280
|
|
},
|
|
{
|
|
"epoch": 0.5264159320098941,
|
|
"grad_norm": 0.8268348574638367,
|
|
"learning_rate": 2.3688716940445234e-05,
|
|
"loss": 0.8205,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 0.5276844041352191,
|
|
"grad_norm": 0.9176819920539856,
|
|
"learning_rate": 2.3625293334178984e-05,
|
|
"loss": 0.7774,
|
|
"step": 8320
|
|
},
|
|
{
|
|
"epoch": 0.5289528762605442,
|
|
"grad_norm": 0.758176326751709,
|
|
"learning_rate": 2.356186972791273e-05,
|
|
"loss": 0.8067,
|
|
"step": 8340
|
|
},
|
|
{
|
|
"epoch": 0.5302213483858692,
|
|
"grad_norm": 0.7369076609611511,
|
|
"learning_rate": 2.349844612164648e-05,
|
|
"loss": 0.7673,
|
|
"step": 8360
|
|
},
|
|
{
|
|
"epoch": 0.5314898205111943,
|
|
"grad_norm": 0.8413040041923523,
|
|
"learning_rate": 2.3435022515380224e-05,
|
|
"loss": 0.8289,
|
|
"step": 8380
|
|
},
|
|
{
|
|
"epoch": 0.5327582926365193,
|
|
"grad_norm": 0.8975269794464111,
|
|
"learning_rate": 2.3371598909113974e-05,
|
|
"loss": 0.8097,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 0.5340267647618444,
|
|
"grad_norm": 0.8501763343811035,
|
|
"learning_rate": 2.330817530284772e-05,
|
|
"loss": 0.7867,
|
|
"step": 8420
|
|
},
|
|
{
|
|
"epoch": 0.5352952368871694,
|
|
"grad_norm": 0.9364180564880371,
|
|
"learning_rate": 2.324475169658147e-05,
|
|
"loss": 0.7925,
|
|
"step": 8440
|
|
},
|
|
{
|
|
"epoch": 0.5365637090124945,
|
|
"grad_norm": 0.6347882151603699,
|
|
"learning_rate": 2.3181328090315214e-05,
|
|
"loss": 0.7635,
|
|
"step": 8460
|
|
},
|
|
{
|
|
"epoch": 0.5378321811378195,
|
|
"grad_norm": 0.8539864420890808,
|
|
"learning_rate": 2.3117904484048964e-05,
|
|
"loss": 0.7993,
|
|
"step": 8480
|
|
},
|
|
{
|
|
"epoch": 0.5391006532631445,
|
|
"grad_norm": 0.8893634080886841,
|
|
"learning_rate": 2.305448087778271e-05,
|
|
"loss": 0.7809,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 0.5403691253884696,
|
|
"grad_norm": 0.7993662357330322,
|
|
"learning_rate": 2.299105727151646e-05,
|
|
"loss": 0.8639,
|
|
"step": 8520
|
|
},
|
|
{
|
|
"epoch": 0.5416375975137946,
|
|
"grad_norm": 0.8157054781913757,
|
|
"learning_rate": 2.2927633665250208e-05,
|
|
"loss": 0.8121,
|
|
"step": 8540
|
|
},
|
|
{
|
|
"epoch": 0.5429060696391197,
|
|
"grad_norm": 0.8141036033630371,
|
|
"learning_rate": 2.2864210058983954e-05,
|
|
"loss": 0.7748,
|
|
"step": 8560
|
|
},
|
|
{
|
|
"epoch": 0.5441745417644447,
|
|
"grad_norm": 0.8311188220977783,
|
|
"learning_rate": 2.2803957633031015e-05,
|
|
"loss": 0.8182,
|
|
"step": 8580
|
|
},
|
|
{
|
|
"epoch": 0.5454430138897698,
|
|
"grad_norm": 0.8923128247261047,
|
|
"learning_rate": 2.274053402676476e-05,
|
|
"loss": 0.7841,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 0.5467114860150948,
|
|
"grad_norm": 0.8246520757675171,
|
|
"learning_rate": 2.267711042049851e-05,
|
|
"loss": 0.8146,
|
|
"step": 8620
|
|
},
|
|
{
|
|
"epoch": 0.5479799581404199,
|
|
"grad_norm": 0.8469933271408081,
|
|
"learning_rate": 2.261368681423226e-05,
|
|
"loss": 0.817,
|
|
"step": 8640
|
|
},
|
|
{
|
|
"epoch": 0.5492484302657449,
|
|
"grad_norm": 0.8211717009544373,
|
|
"learning_rate": 2.2550263207966005e-05,
|
|
"loss": 0.8082,
|
|
"step": 8660
|
|
},
|
|
{
|
|
"epoch": 0.55051690239107,
|
|
"grad_norm": 0.9137957692146301,
|
|
"learning_rate": 2.2486839601699754e-05,
|
|
"loss": 0.7691,
|
|
"step": 8680
|
|
},
|
|
{
|
|
"epoch": 0.551785374516395,
|
|
"grad_norm": 0.8431654572486877,
|
|
"learning_rate": 2.2423415995433503e-05,
|
|
"loss": 0.812,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 0.5530538466417201,
|
|
"grad_norm": 0.9029563069343567,
|
|
"learning_rate": 2.235999238916725e-05,
|
|
"loss": 0.8667,
|
|
"step": 8720
|
|
},
|
|
{
|
|
"epoch": 0.5543223187670451,
|
|
"grad_norm": 0.8180502653121948,
|
|
"learning_rate": 2.2296568782900998e-05,
|
|
"loss": 0.8171,
|
|
"step": 8740
|
|
},
|
|
{
|
|
"epoch": 0.5555907908923702,
|
|
"grad_norm": 0.844530463218689,
|
|
"learning_rate": 2.2233145176634744e-05,
|
|
"loss": 0.7784,
|
|
"step": 8760
|
|
},
|
|
{
|
|
"epoch": 0.5568592630176952,
|
|
"grad_norm": 0.7153404355049133,
|
|
"learning_rate": 2.2169721570368493e-05,
|
|
"loss": 0.7891,
|
|
"step": 8780
|
|
},
|
|
{
|
|
"epoch": 0.5581277351430203,
|
|
"grad_norm": 0.7020410895347595,
|
|
"learning_rate": 2.2106297964102242e-05,
|
|
"loss": 0.8042,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 0.5593962072683453,
|
|
"grad_norm": 0.7581042647361755,
|
|
"learning_rate": 2.2042874357835988e-05,
|
|
"loss": 0.8337,
|
|
"step": 8820
|
|
},
|
|
{
|
|
"epoch": 0.5606646793936704,
|
|
"grad_norm": 0.7392009496688843,
|
|
"learning_rate": 2.1979450751569737e-05,
|
|
"loss": 0.8151,
|
|
"step": 8840
|
|
},
|
|
{
|
|
"epoch": 0.5619331515189954,
|
|
"grad_norm": 0.8381578326225281,
|
|
"learning_rate": 2.1916027145303483e-05,
|
|
"loss": 0.7923,
|
|
"step": 8860
|
|
},
|
|
{
|
|
"epoch": 0.5632016236443205,
|
|
"grad_norm": 1.0505058765411377,
|
|
"learning_rate": 2.1852603539037232e-05,
|
|
"loss": 0.8117,
|
|
"step": 8880
|
|
},
|
|
{
|
|
"epoch": 0.5644700957696455,
|
|
"grad_norm": 0.67955082654953,
|
|
"learning_rate": 2.1789179932770978e-05,
|
|
"loss": 0.7921,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 0.5657385678949705,
|
|
"grad_norm": 0.798687219619751,
|
|
"learning_rate": 2.1725756326504727e-05,
|
|
"loss": 0.8038,
|
|
"step": 8920
|
|
},
|
|
{
|
|
"epoch": 0.5670070400202956,
|
|
"grad_norm": 0.989431619644165,
|
|
"learning_rate": 2.1662332720238473e-05,
|
|
"loss": 0.8317,
|
|
"step": 8940
|
|
},
|
|
{
|
|
"epoch": 0.5682755121456206,
|
|
"grad_norm": 0.8161944150924683,
|
|
"learning_rate": 2.1598909113972222e-05,
|
|
"loss": 0.8222,
|
|
"step": 8960
|
|
},
|
|
{
|
|
"epoch": 0.5695439842709457,
|
|
"grad_norm": 0.8795542120933533,
|
|
"learning_rate": 2.1535485507705968e-05,
|
|
"loss": 0.7717,
|
|
"step": 8980
|
|
},
|
|
{
|
|
"epoch": 0.5708124563962707,
|
|
"grad_norm": 0.7453576326370239,
|
|
"learning_rate": 2.1472061901439717e-05,
|
|
"loss": 0.805,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 0.5720809285215958,
|
|
"grad_norm": 0.8081907033920288,
|
|
"learning_rate": 2.1408638295173463e-05,
|
|
"loss": 0.7757,
|
|
"step": 9020
|
|
},
|
|
{
|
|
"epoch": 0.5733494006469207,
|
|
"grad_norm": 0.7817357778549194,
|
|
"learning_rate": 2.1345214688907212e-05,
|
|
"loss": 0.7528,
|
|
"step": 9040
|
|
},
|
|
{
|
|
"epoch": 0.5746178727722459,
|
|
"grad_norm": 0.8645827770233154,
|
|
"learning_rate": 2.1281791082640958e-05,
|
|
"loss": 0.7713,
|
|
"step": 9060
|
|
},
|
|
{
|
|
"epoch": 0.5758863448975708,
|
|
"grad_norm": 0.8567843437194824,
|
|
"learning_rate": 2.1218367476374707e-05,
|
|
"loss": 0.7447,
|
|
"step": 9080
|
|
},
|
|
{
|
|
"epoch": 0.577154817022896,
|
|
"grad_norm": 0.7494439482688904,
|
|
"learning_rate": 2.1154943870108453e-05,
|
|
"loss": 0.7661,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 0.5784232891482209,
|
|
"grad_norm": 0.8079215884208679,
|
|
"learning_rate": 2.1091520263842206e-05,
|
|
"loss": 0.8119,
|
|
"step": 9120
|
|
},
|
|
{
|
|
"epoch": 0.579691761273546,
|
|
"grad_norm": 0.9019980430603027,
|
|
"learning_rate": 2.102809665757595e-05,
|
|
"loss": 0.8125,
|
|
"step": 9140
|
|
},
|
|
{
|
|
"epoch": 0.580960233398871,
|
|
"grad_norm": 0.7452351450920105,
|
|
"learning_rate": 2.09646730513097e-05,
|
|
"loss": 0.7691,
|
|
"step": 9160
|
|
},
|
|
{
|
|
"epoch": 0.5822287055241961,
|
|
"grad_norm": 0.7727750539779663,
|
|
"learning_rate": 2.0901249445043446e-05,
|
|
"loss": 0.7925,
|
|
"step": 9180
|
|
},
|
|
{
|
|
"epoch": 0.5834971776495211,
|
|
"grad_norm": 0.7486307621002197,
|
|
"learning_rate": 2.0837825838777196e-05,
|
|
"loss": 0.7668,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 0.5847656497748462,
|
|
"grad_norm": 0.8719222545623779,
|
|
"learning_rate": 2.077440223251094e-05,
|
|
"loss": 0.8196,
|
|
"step": 9220
|
|
},
|
|
{
|
|
"epoch": 0.5860341219001712,
|
|
"grad_norm": 0.7641133069992065,
|
|
"learning_rate": 2.071097862624469e-05,
|
|
"loss": 0.7559,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 0.5873025940254963,
|
|
"grad_norm": 0.8036416172981262,
|
|
"learning_rate": 2.0647555019978436e-05,
|
|
"loss": 0.8285,
|
|
"step": 9260
|
|
},
|
|
{
|
|
"epoch": 0.5885710661508213,
|
|
"grad_norm": 0.8614276051521301,
|
|
"learning_rate": 2.0584131413712186e-05,
|
|
"loss": 0.7951,
|
|
"step": 9280
|
|
},
|
|
{
|
|
"epoch": 0.5898395382761464,
|
|
"grad_norm": 0.8406545519828796,
|
|
"learning_rate": 2.052070780744593e-05,
|
|
"loss": 0.7714,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 0.5911080104014714,
|
|
"grad_norm": 0.9403005838394165,
|
|
"learning_rate": 2.045728420117968e-05,
|
|
"loss": 0.7999,
|
|
"step": 9320
|
|
},
|
|
{
|
|
"epoch": 0.5923764825267964,
|
|
"grad_norm": 0.8395708799362183,
|
|
"learning_rate": 2.0393860594913426e-05,
|
|
"loss": 0.8069,
|
|
"step": 9340
|
|
},
|
|
{
|
|
"epoch": 0.5936449546521215,
|
|
"grad_norm": 0.8432602286338806,
|
|
"learning_rate": 2.0330436988647176e-05,
|
|
"loss": 0.8189,
|
|
"step": 9360
|
|
},
|
|
{
|
|
"epoch": 0.5949134267774465,
|
|
"grad_norm": 0.7362537980079651,
|
|
"learning_rate": 2.026701338238092e-05,
|
|
"loss": 0.8069,
|
|
"step": 9380
|
|
},
|
|
{
|
|
"epoch": 0.5961818989027716,
|
|
"grad_norm": 0.7601738572120667,
|
|
"learning_rate": 2.020358977611467e-05,
|
|
"loss": 0.7449,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 0.5974503710280966,
|
|
"grad_norm": 0.8012720346450806,
|
|
"learning_rate": 2.0140166169848416e-05,
|
|
"loss": 0.7831,
|
|
"step": 9420
|
|
},
|
|
{
|
|
"epoch": 0.5987188431534217,
|
|
"grad_norm": 0.7670310139656067,
|
|
"learning_rate": 2.0076742563582166e-05,
|
|
"loss": 0.8393,
|
|
"step": 9440
|
|
},
|
|
{
|
|
"epoch": 0.5999873152787467,
|
|
"grad_norm": 0.8244422674179077,
|
|
"learning_rate": 2.001331895731591e-05,
|
|
"loss": 0.7709,
|
|
"step": 9460
|
|
},
|
|
{
|
|
"epoch": 0.6012557874040718,
|
|
"grad_norm": 0.7943612933158875,
|
|
"learning_rate": 1.994989535104966e-05,
|
|
"loss": 0.819,
|
|
"step": 9480
|
|
},
|
|
{
|
|
"epoch": 0.6025242595293968,
|
|
"grad_norm": 0.9540635347366333,
|
|
"learning_rate": 1.9886471744783406e-05,
|
|
"loss": 0.7899,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 0.6037927316547219,
|
|
"grad_norm": 0.9198821783065796,
|
|
"learning_rate": 1.9823048138517156e-05,
|
|
"loss": 0.8239,
|
|
"step": 9520
|
|
},
|
|
{
|
|
"epoch": 0.6050612037800469,
|
|
"grad_norm": 0.7837796807289124,
|
|
"learning_rate": 1.9759624532250905e-05,
|
|
"loss": 0.7929,
|
|
"step": 9540
|
|
},
|
|
{
|
|
"epoch": 0.606329675905372,
|
|
"grad_norm": 0.8205187320709229,
|
|
"learning_rate": 1.9696200925984654e-05,
|
|
"loss": 0.819,
|
|
"step": 9560
|
|
},
|
|
{
|
|
"epoch": 0.607598148030697,
|
|
"grad_norm": 0.8532772064208984,
|
|
"learning_rate": 1.96327773197184e-05,
|
|
"loss": 0.7455,
|
|
"step": 9580
|
|
},
|
|
{
|
|
"epoch": 0.6088666201560221,
|
|
"grad_norm": 0.8524623513221741,
|
|
"learning_rate": 1.956935371345215e-05,
|
|
"loss": 0.8089,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 0.6101350922813471,
|
|
"grad_norm": 0.8614479899406433,
|
|
"learning_rate": 1.9505930107185895e-05,
|
|
"loss": 0.8059,
|
|
"step": 9620
|
|
},
|
|
{
|
|
"epoch": 0.6114035644066722,
|
|
"grad_norm": 0.7598078846931458,
|
|
"learning_rate": 1.9442506500919644e-05,
|
|
"loss": 0.7832,
|
|
"step": 9640
|
|
},
|
|
{
|
|
"epoch": 0.6126720365319972,
|
|
"grad_norm": 0.809009850025177,
|
|
"learning_rate": 1.937908289465339e-05,
|
|
"loss": 0.7259,
|
|
"step": 9660
|
|
},
|
|
{
|
|
"epoch": 0.6139405086573223,
|
|
"grad_norm": 0.7381779551506042,
|
|
"learning_rate": 1.931565928838714e-05,
|
|
"loss": 0.7676,
|
|
"step": 9680
|
|
},
|
|
{
|
|
"epoch": 0.6152089807826473,
|
|
"grad_norm": 0.8887180685997009,
|
|
"learning_rate": 1.9252235682120888e-05,
|
|
"loss": 0.8126,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 0.6164774529079724,
|
|
"grad_norm": 0.7270573973655701,
|
|
"learning_rate": 1.9188812075854634e-05,
|
|
"loss": 0.7786,
|
|
"step": 9720
|
|
},
|
|
{
|
|
"epoch": 0.6177459250332974,
|
|
"grad_norm": 0.7978057861328125,
|
|
"learning_rate": 1.9125388469588383e-05,
|
|
"loss": 0.8281,
|
|
"step": 9740
|
|
},
|
|
{
|
|
"epoch": 0.6190143971586224,
|
|
"grad_norm": 0.8202372789382935,
|
|
"learning_rate": 1.906196486332213e-05,
|
|
"loss": 0.7656,
|
|
"step": 9760
|
|
},
|
|
{
|
|
"epoch": 0.6202828692839475,
|
|
"grad_norm": 0.9720300436019897,
|
|
"learning_rate": 1.8998541257055878e-05,
|
|
"loss": 0.7881,
|
|
"step": 9780
|
|
},
|
|
{
|
|
"epoch": 0.6215513414092725,
|
|
"grad_norm": 0.9297833442687988,
|
|
"learning_rate": 1.8935117650789624e-05,
|
|
"loss": 0.8237,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 0.6228198135345976,
|
|
"grad_norm": 0.7593715190887451,
|
|
"learning_rate": 1.8871694044523373e-05,
|
|
"loss": 0.7574,
|
|
"step": 9820
|
|
},
|
|
{
|
|
"epoch": 0.6240882856599226,
|
|
"grad_norm": 0.8537524938583374,
|
|
"learning_rate": 1.880827043825712e-05,
|
|
"loss": 0.7969,
|
|
"step": 9840
|
|
},
|
|
{
|
|
"epoch": 0.6253567577852477,
|
|
"grad_norm": 0.770918607711792,
|
|
"learning_rate": 1.8744846831990868e-05,
|
|
"loss": 0.7894,
|
|
"step": 9860
|
|
},
|
|
{
|
|
"epoch": 0.6266252299105727,
|
|
"grad_norm": 0.7605695724487305,
|
|
"learning_rate": 1.8681423225724614e-05,
|
|
"loss": 0.782,
|
|
"step": 9880
|
|
},
|
|
{
|
|
"epoch": 0.6278937020358978,
|
|
"grad_norm": 0.8978208303451538,
|
|
"learning_rate": 1.8617999619458363e-05,
|
|
"loss": 0.7979,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 0.6291621741612228,
|
|
"grad_norm": 0.7393850088119507,
|
|
"learning_rate": 1.855457601319211e-05,
|
|
"loss": 0.8139,
|
|
"step": 9920
|
|
},
|
|
{
|
|
"epoch": 0.6304306462865479,
|
|
"grad_norm": 0.7255131602287292,
|
|
"learning_rate": 1.8491152406925858e-05,
|
|
"loss": 0.7802,
|
|
"step": 9940
|
|
},
|
|
{
|
|
"epoch": 0.6316991184118729,
|
|
"grad_norm": 0.7080028653144836,
|
|
"learning_rate": 1.8427728800659607e-05,
|
|
"loss": 0.8059,
|
|
"step": 9960
|
|
},
|
|
{
|
|
"epoch": 0.632967590537198,
|
|
"grad_norm": 0.8282076716423035,
|
|
"learning_rate": 1.8364305194393357e-05,
|
|
"loss": 0.7905,
|
|
"step": 9980
|
|
},
|
|
{
|
|
"epoch": 0.634236062662523,
|
|
"grad_norm": 0.8741589784622192,
|
|
"learning_rate": 1.8300881588127102e-05,
|
|
"loss": 0.8174,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 0.6355045347878481,
|
|
"grad_norm": 0.7435175776481628,
|
|
"learning_rate": 1.823745798186085e-05,
|
|
"loss": 0.7678,
|
|
"step": 10020
|
|
},
|
|
{
|
|
"epoch": 0.6367730069131731,
|
|
"grad_norm": 0.7347603440284729,
|
|
"learning_rate": 1.817720555590791e-05,
|
|
"loss": 0.8225,
|
|
"step": 10040
|
|
},
|
|
{
|
|
"epoch": 0.6380414790384982,
|
|
"grad_norm": 0.8974965214729309,
|
|
"learning_rate": 1.811378194964166e-05,
|
|
"loss": 0.7766,
|
|
"step": 10060
|
|
},
|
|
{
|
|
"epoch": 0.6393099511638232,
|
|
"grad_norm": 0.7255268692970276,
|
|
"learning_rate": 1.8050358343375408e-05,
|
|
"loss": 0.8,
|
|
"step": 10080
|
|
},
|
|
{
|
|
"epoch": 0.6405784232891483,
|
|
"grad_norm": 0.7062020897865295,
|
|
"learning_rate": 1.7986934737109154e-05,
|
|
"loss": 0.7088,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 0.6418468954144733,
|
|
"grad_norm": 0.8076253533363342,
|
|
"learning_rate": 1.7923511130842903e-05,
|
|
"loss": 0.8152,
|
|
"step": 10120
|
|
},
|
|
{
|
|
"epoch": 0.6431153675397984,
|
|
"grad_norm": 0.8340699672698975,
|
|
"learning_rate": 1.786008752457665e-05,
|
|
"loss": 0.7985,
|
|
"step": 10140
|
|
},
|
|
{
|
|
"epoch": 0.6443838396651234,
|
|
"grad_norm": 0.7522137761116028,
|
|
"learning_rate": 1.7796663918310398e-05,
|
|
"loss": 0.7704,
|
|
"step": 10160
|
|
},
|
|
{
|
|
"epoch": 0.6456523117904484,
|
|
"grad_norm": 0.8227932453155518,
|
|
"learning_rate": 1.7733240312044144e-05,
|
|
"loss": 0.828,
|
|
"step": 10180
|
|
},
|
|
{
|
|
"epoch": 0.6469207839157735,
|
|
"grad_norm": 0.7742383480072021,
|
|
"learning_rate": 1.7669816705777893e-05,
|
|
"loss": 0.7893,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 0.6481892560410984,
|
|
"grad_norm": 0.7038094401359558,
|
|
"learning_rate": 1.760639309951164e-05,
|
|
"loss": 0.7699,
|
|
"step": 10220
|
|
},
|
|
{
|
|
"epoch": 0.6494577281664236,
|
|
"grad_norm": 0.8382614850997925,
|
|
"learning_rate": 1.7542969493245388e-05,
|
|
"loss": 0.8254,
|
|
"step": 10240
|
|
},
|
|
{
|
|
"epoch": 0.6507262002917485,
|
|
"grad_norm": 0.9173989295959473,
|
|
"learning_rate": 1.7479545886979134e-05,
|
|
"loss": 0.7603,
|
|
"step": 10260
|
|
},
|
|
{
|
|
"epoch": 0.6519946724170737,
|
|
"grad_norm": 0.7602284550666809,
|
|
"learning_rate": 1.7416122280712883e-05,
|
|
"loss": 0.8019,
|
|
"step": 10280
|
|
},
|
|
{
|
|
"epoch": 0.6532631445423986,
|
|
"grad_norm": 0.8012353181838989,
|
|
"learning_rate": 1.735269867444663e-05,
|
|
"loss": 0.7944,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 0.6545316166677237,
|
|
"grad_norm": 0.8844314217567444,
|
|
"learning_rate": 1.7289275068180378e-05,
|
|
"loss": 0.7785,
|
|
"step": 10320
|
|
},
|
|
{
|
|
"epoch": 0.6558000887930487,
|
|
"grad_norm": 0.7556779980659485,
|
|
"learning_rate": 1.7225851461914124e-05,
|
|
"loss": 0.7597,
|
|
"step": 10340
|
|
},
|
|
{
|
|
"epoch": 0.6570685609183738,
|
|
"grad_norm": 0.8446857929229736,
|
|
"learning_rate": 1.7162427855647873e-05,
|
|
"loss": 0.7941,
|
|
"step": 10360
|
|
},
|
|
{
|
|
"epoch": 0.6583370330436988,
|
|
"grad_norm": 0.7313318848609924,
|
|
"learning_rate": 1.709900424938162e-05,
|
|
"loss": 0.8017,
|
|
"step": 10380
|
|
},
|
|
{
|
|
"epoch": 0.6596055051690239,
|
|
"grad_norm": 0.8298467397689819,
|
|
"learning_rate": 1.7035580643115368e-05,
|
|
"loss": 0.7869,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 0.6608739772943489,
|
|
"grad_norm": 0.8003538846969604,
|
|
"learning_rate": 1.6972157036849114e-05,
|
|
"loss": 0.8002,
|
|
"step": 10420
|
|
},
|
|
{
|
|
"epoch": 0.662142449419674,
|
|
"grad_norm": 0.7555122971534729,
|
|
"learning_rate": 1.6908733430582863e-05,
|
|
"loss": 0.7632,
|
|
"step": 10440
|
|
},
|
|
{
|
|
"epoch": 0.663410921544999,
|
|
"grad_norm": 0.7712675333023071,
|
|
"learning_rate": 1.684530982431661e-05,
|
|
"loss": 0.7441,
|
|
"step": 10460
|
|
},
|
|
{
|
|
"epoch": 0.6646793936703241,
|
|
"grad_norm": 0.6845158338546753,
|
|
"learning_rate": 1.6781886218050358e-05,
|
|
"loss": 0.7384,
|
|
"step": 10480
|
|
},
|
|
{
|
|
"epoch": 0.6659478657956491,
|
|
"grad_norm": 0.8500059843063354,
|
|
"learning_rate": 1.6718462611784107e-05,
|
|
"loss": 0.8052,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 0.6672163379209742,
|
|
"grad_norm": 0.759861946105957,
|
|
"learning_rate": 1.6655039005517856e-05,
|
|
"loss": 0.828,
|
|
"step": 10520
|
|
},
|
|
{
|
|
"epoch": 0.6684848100462992,
|
|
"grad_norm": 0.7759114503860474,
|
|
"learning_rate": 1.6591615399251602e-05,
|
|
"loss": 0.7719,
|
|
"step": 10540
|
|
},
|
|
{
|
|
"epoch": 0.6697532821716243,
|
|
"grad_norm": 0.8368454575538635,
|
|
"learning_rate": 1.652819179298535e-05,
|
|
"loss": 0.8383,
|
|
"step": 10560
|
|
},
|
|
{
|
|
"epoch": 0.6710217542969493,
|
|
"grad_norm": 0.8691524267196655,
|
|
"learning_rate": 1.6464768186719097e-05,
|
|
"loss": 0.7822,
|
|
"step": 10580
|
|
},
|
|
{
|
|
"epoch": 0.6722902264222743,
|
|
"grad_norm": 0.8464477062225342,
|
|
"learning_rate": 1.6401344580452846e-05,
|
|
"loss": 0.7802,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 0.6735586985475994,
|
|
"grad_norm": 0.796231210231781,
|
|
"learning_rate": 1.6337920974186592e-05,
|
|
"loss": 0.7821,
|
|
"step": 10620
|
|
},
|
|
{
|
|
"epoch": 0.6748271706729244,
|
|
"grad_norm": 0.7409220933914185,
|
|
"learning_rate": 1.627449736792034e-05,
|
|
"loss": 0.8203,
|
|
"step": 10640
|
|
},
|
|
{
|
|
"epoch": 0.6760956427982495,
|
|
"grad_norm": 0.7760050892829895,
|
|
"learning_rate": 1.6211073761654087e-05,
|
|
"loss": 0.7673,
|
|
"step": 10660
|
|
},
|
|
{
|
|
"epoch": 0.6773641149235745,
|
|
"grad_norm": 0.7795297503471375,
|
|
"learning_rate": 1.6147650155387836e-05,
|
|
"loss": 0.7545,
|
|
"step": 10680
|
|
},
|
|
{
|
|
"epoch": 0.6786325870488996,
|
|
"grad_norm": 0.8562922477722168,
|
|
"learning_rate": 1.6084226549121585e-05,
|
|
"loss": 0.7744,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 0.6799010591742246,
|
|
"grad_norm": 0.8879472613334656,
|
|
"learning_rate": 1.602080294285533e-05,
|
|
"loss": 0.7775,
|
|
"step": 10720
|
|
},
|
|
{
|
|
"epoch": 0.6811695312995497,
|
|
"grad_norm": 0.674929141998291,
|
|
"learning_rate": 1.595737933658908e-05,
|
|
"loss": 0.8155,
|
|
"step": 10740
|
|
},
|
|
{
|
|
"epoch": 0.6824380034248747,
|
|
"grad_norm": 0.8436025381088257,
|
|
"learning_rate": 1.5893955730322826e-05,
|
|
"loss": 0.7855,
|
|
"step": 10760
|
|
},
|
|
{
|
|
"epoch": 0.6837064755501998,
|
|
"grad_norm": 0.7950330972671509,
|
|
"learning_rate": 1.5830532124056575e-05,
|
|
"loss": 0.8171,
|
|
"step": 10780
|
|
},
|
|
{
|
|
"epoch": 0.6849749476755248,
|
|
"grad_norm": 0.7402753233909607,
|
|
"learning_rate": 1.576710851779032e-05,
|
|
"loss": 0.7543,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 0.6862434198008499,
|
|
"grad_norm": 0.7969671487808228,
|
|
"learning_rate": 1.570368491152407e-05,
|
|
"loss": 0.811,
|
|
"step": 10820
|
|
},
|
|
{
|
|
"epoch": 0.6875118919261749,
|
|
"grad_norm": 0.9241589307785034,
|
|
"learning_rate": 1.5640261305257816e-05,
|
|
"loss": 0.8234,
|
|
"step": 10840
|
|
},
|
|
{
|
|
"epoch": 0.6887803640515,
|
|
"grad_norm": 0.8808215260505676,
|
|
"learning_rate": 1.5576837698991565e-05,
|
|
"loss": 0.7646,
|
|
"step": 10860
|
|
},
|
|
{
|
|
"epoch": 0.690048836176825,
|
|
"grad_norm": 0.7900111675262451,
|
|
"learning_rate": 1.551341409272531e-05,
|
|
"loss": 0.7461,
|
|
"step": 10880
|
|
},
|
|
{
|
|
"epoch": 0.6913173083021501,
|
|
"grad_norm": 0.9008402824401855,
|
|
"learning_rate": 1.544999048645906e-05,
|
|
"loss": 0.7693,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 0.6925857804274751,
|
|
"grad_norm": 0.925081729888916,
|
|
"learning_rate": 1.538656688019281e-05,
|
|
"loss": 0.8174,
|
|
"step": 10920
|
|
},
|
|
{
|
|
"epoch": 0.6938542525528002,
|
|
"grad_norm": 0.8141810297966003,
|
|
"learning_rate": 1.532314327392656e-05,
|
|
"loss": 0.8009,
|
|
"step": 10940
|
|
},
|
|
{
|
|
"epoch": 0.6951227246781252,
|
|
"grad_norm": 0.8973850011825562,
|
|
"learning_rate": 1.5259719667660305e-05,
|
|
"loss": 0.7731,
|
|
"step": 10960
|
|
},
|
|
{
|
|
"epoch": 0.6963911968034503,
|
|
"grad_norm": 0.7652609348297119,
|
|
"learning_rate": 1.5196296061394052e-05,
|
|
"loss": 0.7751,
|
|
"step": 10980
|
|
},
|
|
{
|
|
"epoch": 0.6976596689287753,
|
|
"grad_norm": 0.8361225724220276,
|
|
"learning_rate": 1.51328724551278e-05,
|
|
"loss": 0.7551,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 0.6989281410541003,
|
|
"grad_norm": 0.7935757040977478,
|
|
"learning_rate": 1.5069448848861547e-05,
|
|
"loss": 0.8139,
|
|
"step": 11020
|
|
},
|
|
{
|
|
"epoch": 0.7001966131794254,
|
|
"grad_norm": 0.7135019898414612,
|
|
"learning_rate": 1.5006025242595295e-05,
|
|
"loss": 0.7768,
|
|
"step": 11040
|
|
},
|
|
{
|
|
"epoch": 0.7014650853047504,
|
|
"grad_norm": 0.811869204044342,
|
|
"learning_rate": 1.4942601636329042e-05,
|
|
"loss": 0.7697,
|
|
"step": 11060
|
|
},
|
|
{
|
|
"epoch": 0.7027335574300755,
|
|
"grad_norm": 0.9030170440673828,
|
|
"learning_rate": 1.487917803006279e-05,
|
|
"loss": 0.8206,
|
|
"step": 11080
|
|
},
|
|
{
|
|
"epoch": 0.7040020295554005,
|
|
"grad_norm": 0.765082836151123,
|
|
"learning_rate": 1.4815754423796537e-05,
|
|
"loss": 0.8204,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 0.7052705016807256,
|
|
"grad_norm": 0.7715885639190674,
|
|
"learning_rate": 1.4752330817530285e-05,
|
|
"loss": 0.7718,
|
|
"step": 11120
|
|
},
|
|
{
|
|
"epoch": 0.7065389738060506,
|
|
"grad_norm": 0.7729353904724121,
|
|
"learning_rate": 1.4688907211264034e-05,
|
|
"loss": 0.8165,
|
|
"step": 11140
|
|
},
|
|
{
|
|
"epoch": 0.7078074459313757,
|
|
"grad_norm": 0.6622787117958069,
|
|
"learning_rate": 1.4625483604997781e-05,
|
|
"loss": 0.7618,
|
|
"step": 11160
|
|
},
|
|
{
|
|
"epoch": 0.7090759180567007,
|
|
"grad_norm": 0.820572555065155,
|
|
"learning_rate": 1.4562059998731529e-05,
|
|
"loss": 0.7887,
|
|
"step": 11180
|
|
},
|
|
{
|
|
"epoch": 0.7103443901820258,
|
|
"grad_norm": 0.7710301876068115,
|
|
"learning_rate": 1.4498636392465276e-05,
|
|
"loss": 0.7712,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 0.7116128623073508,
|
|
"grad_norm": 0.8138539791107178,
|
|
"learning_rate": 1.4435212786199024e-05,
|
|
"loss": 0.7989,
|
|
"step": 11220
|
|
},
|
|
{
|
|
"epoch": 0.7128813344326759,
|
|
"grad_norm": 0.7800792455673218,
|
|
"learning_rate": 1.4371789179932771e-05,
|
|
"loss": 0.7641,
|
|
"step": 11240
|
|
},
|
|
{
|
|
"epoch": 0.7141498065580009,
|
|
"grad_norm": 0.809686005115509,
|
|
"learning_rate": 1.4308365573666519e-05,
|
|
"loss": 0.8053,
|
|
"step": 11260
|
|
},
|
|
{
|
|
"epoch": 0.715418278683326,
|
|
"grad_norm": 0.8002369403839111,
|
|
"learning_rate": 1.4244941967400266e-05,
|
|
"loss": 0.8044,
|
|
"step": 11280
|
|
},
|
|
{
|
|
"epoch": 0.716686750808651,
|
|
"grad_norm": 0.8907930850982666,
|
|
"learning_rate": 1.4181518361134014e-05,
|
|
"loss": 0.7896,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 0.7179552229339761,
|
|
"grad_norm": 0.8205035328865051,
|
|
"learning_rate": 1.4118094754867761e-05,
|
|
"loss": 0.7609,
|
|
"step": 11320
|
|
},
|
|
{
|
|
"epoch": 0.7192236950593011,
|
|
"grad_norm": 0.7667264342308044,
|
|
"learning_rate": 1.4054671148601512e-05,
|
|
"loss": 0.8022,
|
|
"step": 11340
|
|
},
|
|
{
|
|
"epoch": 0.7204921671846262,
|
|
"grad_norm": 0.7035322189331055,
|
|
"learning_rate": 1.399124754233526e-05,
|
|
"loss": 0.7969,
|
|
"step": 11360
|
|
},
|
|
{
|
|
"epoch": 0.7217606393099512,
|
|
"grad_norm": 0.7853593230247498,
|
|
"learning_rate": 1.3927823936069007e-05,
|
|
"loss": 0.7839,
|
|
"step": 11380
|
|
},
|
|
{
|
|
"epoch": 0.7230291114352763,
|
|
"grad_norm": 0.9023504853248596,
|
|
"learning_rate": 1.3864400329802755e-05,
|
|
"loss": 0.7867,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 0.7242975835606013,
|
|
"grad_norm": 0.8038562536239624,
|
|
"learning_rate": 1.3800976723536502e-05,
|
|
"loss": 0.76,
|
|
"step": 11420
|
|
},
|
|
{
|
|
"epoch": 0.7255660556859262,
|
|
"grad_norm": 0.8277421593666077,
|
|
"learning_rate": 1.373755311727025e-05,
|
|
"loss": 0.8377,
|
|
"step": 11440
|
|
},
|
|
{
|
|
"epoch": 0.7268345278112514,
|
|
"grad_norm": 0.7307552099227905,
|
|
"learning_rate": 1.3674129511003997e-05,
|
|
"loss": 0.7962,
|
|
"step": 11460
|
|
},
|
|
{
|
|
"epoch": 0.7281029999365763,
|
|
"grad_norm": 0.7248812913894653,
|
|
"learning_rate": 1.3610705904737745e-05,
|
|
"loss": 0.7655,
|
|
"step": 11480
|
|
},
|
|
{
|
|
"epoch": 0.7293714720619014,
|
|
"grad_norm": 0.8142716288566589,
|
|
"learning_rate": 1.3547282298471492e-05,
|
|
"loss": 0.7874,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 0.7306399441872264,
|
|
"grad_norm": 0.8528370261192322,
|
|
"learning_rate": 1.348385869220524e-05,
|
|
"loss": 0.7478,
|
|
"step": 11520
|
|
},
|
|
{
|
|
"epoch": 0.7319084163125515,
|
|
"grad_norm": 0.7856337428092957,
|
|
"learning_rate": 1.3420435085938987e-05,
|
|
"loss": 0.7682,
|
|
"step": 11540
|
|
},
|
|
{
|
|
"epoch": 0.7331768884378765,
|
|
"grad_norm": 0.8709967136383057,
|
|
"learning_rate": 1.3357011479672735e-05,
|
|
"loss": 0.7951,
|
|
"step": 11560
|
|
},
|
|
{
|
|
"epoch": 0.7344453605632016,
|
|
"grad_norm": 0.7879327535629272,
|
|
"learning_rate": 1.3293587873406482e-05,
|
|
"loss": 0.8052,
|
|
"step": 11580
|
|
},
|
|
{
|
|
"epoch": 0.7357138326885266,
|
|
"grad_norm": 0.7124823331832886,
|
|
"learning_rate": 1.323016426714023e-05,
|
|
"loss": 0.7847,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 0.7369823048138517,
|
|
"grad_norm": 0.8641963601112366,
|
|
"learning_rate": 1.3166740660873977e-05,
|
|
"loss": 0.7953,
|
|
"step": 11620
|
|
},
|
|
{
|
|
"epoch": 0.7382507769391767,
|
|
"grad_norm": 0.777748167514801,
|
|
"learning_rate": 1.3103317054607725e-05,
|
|
"loss": 0.7786,
|
|
"step": 11640
|
|
},
|
|
{
|
|
"epoch": 0.7395192490645018,
|
|
"grad_norm": 0.9086549878120422,
|
|
"learning_rate": 1.3039893448341472e-05,
|
|
"loss": 0.7954,
|
|
"step": 11660
|
|
},
|
|
{
|
|
"epoch": 0.7407877211898268,
|
|
"grad_norm": 0.7550273537635803,
|
|
"learning_rate": 1.297646984207522e-05,
|
|
"loss": 0.7679,
|
|
"step": 11680
|
|
},
|
|
{
|
|
"epoch": 0.7420561933151519,
|
|
"grad_norm": 0.8174465894699097,
|
|
"learning_rate": 1.2913046235808967e-05,
|
|
"loss": 0.7829,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 0.7433246654404769,
|
|
"grad_norm": 0.8319543600082397,
|
|
"learning_rate": 1.2849622629542715e-05,
|
|
"loss": 0.7592,
|
|
"step": 11720
|
|
},
|
|
{
|
|
"epoch": 0.744593137565802,
|
|
"grad_norm": 0.7246963381767273,
|
|
"learning_rate": 1.2786199023276462e-05,
|
|
"loss": 0.7925,
|
|
"step": 11740
|
|
},
|
|
{
|
|
"epoch": 0.745861609691127,
|
|
"grad_norm": 0.7811394333839417,
|
|
"learning_rate": 1.2722775417010213e-05,
|
|
"loss": 0.8063,
|
|
"step": 11760
|
|
},
|
|
{
|
|
"epoch": 0.7471300818164521,
|
|
"grad_norm": 0.9180453419685364,
|
|
"learning_rate": 1.266252299105727e-05,
|
|
"loss": 0.8391,
|
|
"step": 11780
|
|
},
|
|
{
|
|
"epoch": 0.7483985539417771,
|
|
"grad_norm": 0.6986908912658691,
|
|
"learning_rate": 1.2599099384791018e-05,
|
|
"loss": 0.7773,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 0.7496670260671022,
|
|
"grad_norm": 0.8293908834457397,
|
|
"learning_rate": 1.2535675778524766e-05,
|
|
"loss": 0.7888,
|
|
"step": 11820
|
|
},
|
|
{
|
|
"epoch": 0.7509354981924272,
|
|
"grad_norm": 0.9369567036628723,
|
|
"learning_rate": 1.2472252172258515e-05,
|
|
"loss": 0.772,
|
|
"step": 11840
|
|
},
|
|
{
|
|
"epoch": 0.7522039703177522,
|
|
"grad_norm": 0.884286105632782,
|
|
"learning_rate": 1.2408828565992263e-05,
|
|
"loss": 0.7806,
|
|
"step": 11860
|
|
},
|
|
{
|
|
"epoch": 0.7534724424430773,
|
|
"grad_norm": 0.749497652053833,
|
|
"learning_rate": 1.234540495972601e-05,
|
|
"loss": 0.7501,
|
|
"step": 11880
|
|
},
|
|
{
|
|
"epoch": 0.7547409145684023,
|
|
"grad_norm": 0.6741966605186462,
|
|
"learning_rate": 1.2281981353459758e-05,
|
|
"loss": 0.7672,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 0.7560093866937274,
|
|
"grad_norm": 0.8107251524925232,
|
|
"learning_rate": 1.2218557747193505e-05,
|
|
"loss": 0.7765,
|
|
"step": 11920
|
|
},
|
|
{
|
|
"epoch": 0.7572778588190524,
|
|
"grad_norm": 0.9146373867988586,
|
|
"learning_rate": 1.2155134140927253e-05,
|
|
"loss": 0.7462,
|
|
"step": 11940
|
|
},
|
|
{
|
|
"epoch": 0.7585463309443775,
|
|
"grad_norm": 0.9027043581008911,
|
|
"learning_rate": 1.2091710534661e-05,
|
|
"loss": 0.7819,
|
|
"step": 11960
|
|
},
|
|
{
|
|
"epoch": 0.7598148030697025,
|
|
"grad_norm": 0.7713417410850525,
|
|
"learning_rate": 1.202828692839475e-05,
|
|
"loss": 0.7684,
|
|
"step": 11980
|
|
},
|
|
{
|
|
"epoch": 0.7610832751950276,
|
|
"grad_norm": 0.8822270631790161,
|
|
"learning_rate": 1.1964863322128497e-05,
|
|
"loss": 0.7524,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 0.7623517473203526,
|
|
"grad_norm": 0.8402985334396362,
|
|
"learning_rate": 1.1901439715862244e-05,
|
|
"loss": 0.795,
|
|
"step": 12020
|
|
},
|
|
{
|
|
"epoch": 0.7636202194456777,
|
|
"grad_norm": 0.7556558847427368,
|
|
"learning_rate": 1.1838016109595992e-05,
|
|
"loss": 0.773,
|
|
"step": 12040
|
|
},
|
|
{
|
|
"epoch": 0.7648886915710027,
|
|
"grad_norm": 0.7098413705825806,
|
|
"learning_rate": 1.177459250332974e-05,
|
|
"loss": 0.7888,
|
|
"step": 12060
|
|
},
|
|
{
|
|
"epoch": 0.7661571636963278,
|
|
"grad_norm": 0.6865963935852051,
|
|
"learning_rate": 1.1711168897063487e-05,
|
|
"loss": 0.7844,
|
|
"step": 12080
|
|
},
|
|
{
|
|
"epoch": 0.7674256358216528,
|
|
"grad_norm": 0.9354507923126221,
|
|
"learning_rate": 1.1647745290797234e-05,
|
|
"loss": 0.8064,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 0.7686941079469779,
|
|
"grad_norm": 0.8983631134033203,
|
|
"learning_rate": 1.1584321684530983e-05,
|
|
"loss": 0.759,
|
|
"step": 12120
|
|
},
|
|
{
|
|
"epoch": 0.7699625800723029,
|
|
"grad_norm": 0.9061852693557739,
|
|
"learning_rate": 1.1520898078264731e-05,
|
|
"loss": 0.7959,
|
|
"step": 12140
|
|
},
|
|
{
|
|
"epoch": 0.771231052197628,
|
|
"grad_norm": 0.8606493473052979,
|
|
"learning_rate": 1.1457474471998478e-05,
|
|
"loss": 0.7926,
|
|
"step": 12160
|
|
},
|
|
{
|
|
"epoch": 0.772499524322953,
|
|
"grad_norm": 0.9167592525482178,
|
|
"learning_rate": 1.1394050865732226e-05,
|
|
"loss": 0.7813,
|
|
"step": 12180
|
|
},
|
|
{
|
|
"epoch": 0.7737679964482781,
|
|
"grad_norm": 0.866223931312561,
|
|
"learning_rate": 1.1330627259465975e-05,
|
|
"loss": 0.7827,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 0.7750364685736031,
|
|
"grad_norm": 0.720427930355072,
|
|
"learning_rate": 1.1267203653199723e-05,
|
|
"loss": 0.813,
|
|
"step": 12220
|
|
},
|
|
{
|
|
"epoch": 0.7763049406989282,
|
|
"grad_norm": 0.8172628283500671,
|
|
"learning_rate": 1.120378004693347e-05,
|
|
"loss": 0.7733,
|
|
"step": 12240
|
|
},
|
|
{
|
|
"epoch": 0.7775734128242532,
|
|
"grad_norm": 0.741121768951416,
|
|
"learning_rate": 1.1140356440667218e-05,
|
|
"loss": 0.7434,
|
|
"step": 12260
|
|
},
|
|
{
|
|
"epoch": 0.7788418849495782,
|
|
"grad_norm": 0.723564624786377,
|
|
"learning_rate": 1.1076932834400965e-05,
|
|
"loss": 0.7789,
|
|
"step": 12280
|
|
},
|
|
{
|
|
"epoch": 0.7801103570749033,
|
|
"grad_norm": 0.9289072155952454,
|
|
"learning_rate": 1.1013509228134713e-05,
|
|
"loss": 0.7894,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 0.7813788292002283,
|
|
"grad_norm": 0.8132310509681702,
|
|
"learning_rate": 1.095008562186846e-05,
|
|
"loss": 0.8153,
|
|
"step": 12320
|
|
},
|
|
{
|
|
"epoch": 0.7826473013255534,
|
|
"grad_norm": 0.967943549156189,
|
|
"learning_rate": 1.0886662015602208e-05,
|
|
"loss": 0.7821,
|
|
"step": 12340
|
|
},
|
|
{
|
|
"epoch": 0.7839157734508784,
|
|
"grad_norm": 0.7738404273986816,
|
|
"learning_rate": 1.0823238409335955e-05,
|
|
"loss": 0.7855,
|
|
"step": 12360
|
|
},
|
|
{
|
|
"epoch": 0.7851842455762035,
|
|
"grad_norm": 0.8411769270896912,
|
|
"learning_rate": 1.0759814803069703e-05,
|
|
"loss": 0.7988,
|
|
"step": 12380
|
|
},
|
|
{
|
|
"epoch": 0.7864527177015285,
|
|
"grad_norm": 0.8962435722351074,
|
|
"learning_rate": 1.0696391196803452e-05,
|
|
"loss": 0.8321,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 0.7877211898268536,
|
|
"grad_norm": 0.7484604716300964,
|
|
"learning_rate": 1.06329675905372e-05,
|
|
"loss": 0.7686,
|
|
"step": 12420
|
|
},
|
|
{
|
|
"epoch": 0.7889896619521786,
|
|
"grad_norm": 0.802546501159668,
|
|
"learning_rate": 1.0569543984270947e-05,
|
|
"loss": 0.7904,
|
|
"step": 12440
|
|
},
|
|
{
|
|
"epoch": 0.7902581340775037,
|
|
"grad_norm": 0.7103933691978455,
|
|
"learning_rate": 1.0506120378004694e-05,
|
|
"loss": 0.756,
|
|
"step": 12460
|
|
},
|
|
{
|
|
"epoch": 0.7915266062028287,
|
|
"grad_norm": 0.6866100430488586,
|
|
"learning_rate": 1.0442696771738442e-05,
|
|
"loss": 0.7736,
|
|
"step": 12480
|
|
},
|
|
{
|
|
"epoch": 0.7927950783281538,
|
|
"grad_norm": 0.7697407603263855,
|
|
"learning_rate": 1.037927316547219e-05,
|
|
"loss": 0.7796,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 0.7940635504534788,
|
|
"grad_norm": 0.8658385276794434,
|
|
"learning_rate": 1.0315849559205937e-05,
|
|
"loss": 0.787,
|
|
"step": 12520
|
|
},
|
|
{
|
|
"epoch": 0.7953320225788039,
|
|
"grad_norm": 0.8282449245452881,
|
|
"learning_rate": 1.0252425952939684e-05,
|
|
"loss": 0.7666,
|
|
"step": 12540
|
|
},
|
|
{
|
|
"epoch": 0.7966004947041289,
|
|
"grad_norm": 0.8376625776290894,
|
|
"learning_rate": 1.0189002346673432e-05,
|
|
"loss": 0.7313,
|
|
"step": 12560
|
|
},
|
|
{
|
|
"epoch": 0.797868966829454,
|
|
"grad_norm": 0.8002750277519226,
|
|
"learning_rate": 1.012557874040718e-05,
|
|
"loss": 0.8,
|
|
"step": 12580
|
|
},
|
|
{
|
|
"epoch": 0.799137438954779,
|
|
"grad_norm": 0.7849326729774475,
|
|
"learning_rate": 1.0062155134140929e-05,
|
|
"loss": 0.7761,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 0.8004059110801041,
|
|
"grad_norm": 0.8501541018486023,
|
|
"learning_rate": 9.998731527874676e-06,
|
|
"loss": 0.7845,
|
|
"step": 12620
|
|
},
|
|
{
|
|
"epoch": 0.801674383205429,
|
|
"grad_norm": 0.7818264365196228,
|
|
"learning_rate": 9.935307921608424e-06,
|
|
"loss": 0.7669,
|
|
"step": 12640
|
|
},
|
|
{
|
|
"epoch": 0.8029428553307542,
|
|
"grad_norm": 0.9117188453674316,
|
|
"learning_rate": 9.871884315342171e-06,
|
|
"loss": 0.7578,
|
|
"step": 12660
|
|
},
|
|
{
|
|
"epoch": 0.8042113274560792,
|
|
"grad_norm": 0.8058929443359375,
|
|
"learning_rate": 9.808460709075919e-06,
|
|
"loss": 0.7676,
|
|
"step": 12680
|
|
},
|
|
{
|
|
"epoch": 0.8054797995814041,
|
|
"grad_norm": 0.8033195734024048,
|
|
"learning_rate": 9.745037102809666e-06,
|
|
"loss": 0.8151,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 0.8067482717067292,
|
|
"grad_norm": 0.898897647857666,
|
|
"learning_rate": 9.681613496543414e-06,
|
|
"loss": 0.8267,
|
|
"step": 12720
|
|
},
|
|
{
|
|
"epoch": 0.8080167438320542,
|
|
"grad_norm": 0.9970609545707703,
|
|
"learning_rate": 9.618189890277161e-06,
|
|
"loss": 0.8075,
|
|
"step": 12740
|
|
},
|
|
{
|
|
"epoch": 0.8092852159573793,
|
|
"grad_norm": 0.904344916343689,
|
|
"learning_rate": 9.554766284010909e-06,
|
|
"loss": 0.8,
|
|
"step": 12760
|
|
},
|
|
{
|
|
"epoch": 0.8105536880827043,
|
|
"grad_norm": 0.8318148255348206,
|
|
"learning_rate": 9.491342677744656e-06,
|
|
"loss": 0.8027,
|
|
"step": 12780
|
|
},
|
|
{
|
|
"epoch": 0.8118221602080294,
|
|
"grad_norm": 0.8471246957778931,
|
|
"learning_rate": 9.427919071478404e-06,
|
|
"loss": 0.7587,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 0.8130906323333544,
|
|
"grad_norm": 0.7848266363143921,
|
|
"learning_rate": 9.364495465212153e-06,
|
|
"loss": 0.7499,
|
|
"step": 12820
|
|
},
|
|
{
|
|
"epoch": 0.8143591044586795,
|
|
"grad_norm": 0.9037428498268127,
|
|
"learning_rate": 9.3010718589459e-06,
|
|
"loss": 0.7865,
|
|
"step": 12840
|
|
},
|
|
{
|
|
"epoch": 0.8156275765840045,
|
|
"grad_norm": 0.7049270868301392,
|
|
"learning_rate": 9.237648252679648e-06,
|
|
"loss": 0.817,
|
|
"step": 12860
|
|
},
|
|
{
|
|
"epoch": 0.8168960487093296,
|
|
"grad_norm": 0.7613449096679688,
|
|
"learning_rate": 9.174224646413395e-06,
|
|
"loss": 0.806,
|
|
"step": 12880
|
|
},
|
|
{
|
|
"epoch": 0.8181645208346546,
|
|
"grad_norm": 0.7704141139984131,
|
|
"learning_rate": 9.110801040147143e-06,
|
|
"loss": 0.7307,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 0.8194329929599797,
|
|
"grad_norm": 0.707279622554779,
|
|
"learning_rate": 9.04737743388089e-06,
|
|
"loss": 0.7838,
|
|
"step": 12920
|
|
},
|
|
{
|
|
"epoch": 0.8207014650853047,
|
|
"grad_norm": 0.7817753553390503,
|
|
"learning_rate": 8.983953827614638e-06,
|
|
"loss": 0.7994,
|
|
"step": 12940
|
|
},
|
|
{
|
|
"epoch": 0.8219699372106298,
|
|
"grad_norm": 0.8321487307548523,
|
|
"learning_rate": 8.920530221348385e-06,
|
|
"loss": 0.7843,
|
|
"step": 12960
|
|
},
|
|
{
|
|
"epoch": 0.8232384093359548,
|
|
"grad_norm": 0.799281120300293,
|
|
"learning_rate": 8.857106615082133e-06,
|
|
"loss": 0.771,
|
|
"step": 12980
|
|
},
|
|
{
|
|
"epoch": 0.8245068814612799,
|
|
"grad_norm": 0.8843486309051514,
|
|
"learning_rate": 8.79368300881588e-06,
|
|
"loss": 0.8269,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 0.8257753535866049,
|
|
"grad_norm": 0.6699514985084534,
|
|
"learning_rate": 8.73025940254963e-06,
|
|
"loss": 0.7398,
|
|
"step": 13020
|
|
},
|
|
{
|
|
"epoch": 0.82704382571193,
|
|
"grad_norm": 0.7868858575820923,
|
|
"learning_rate": 8.666835796283377e-06,
|
|
"loss": 0.7779,
|
|
"step": 13040
|
|
},
|
|
{
|
|
"epoch": 0.828312297837255,
|
|
"grad_norm": 0.8733574151992798,
|
|
"learning_rate": 8.603412190017125e-06,
|
|
"loss": 0.7977,
|
|
"step": 13060
|
|
},
|
|
{
|
|
"epoch": 0.8295807699625801,
|
|
"grad_norm": 0.7439238429069519,
|
|
"learning_rate": 8.539988583750872e-06,
|
|
"loss": 0.7587,
|
|
"step": 13080
|
|
},
|
|
{
|
|
"epoch": 0.8308492420879051,
|
|
"grad_norm": 0.8214549422264099,
|
|
"learning_rate": 8.476564977484621e-06,
|
|
"loss": 0.8181,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 0.8321177142132302,
|
|
"grad_norm": 0.8577607870101929,
|
|
"learning_rate": 8.413141371218369e-06,
|
|
"loss": 0.793,
|
|
"step": 13120
|
|
},
|
|
{
|
|
"epoch": 0.8333861863385552,
|
|
"grad_norm": 0.6957492828369141,
|
|
"learning_rate": 8.349717764952116e-06,
|
|
"loss": 0.755,
|
|
"step": 13140
|
|
},
|
|
{
|
|
"epoch": 0.8346546584638802,
|
|
"grad_norm": 0.981088399887085,
|
|
"learning_rate": 8.286294158685864e-06,
|
|
"loss": 0.7769,
|
|
"step": 13160
|
|
},
|
|
{
|
|
"epoch": 0.8359231305892053,
|
|
"grad_norm": 0.7333866357803345,
|
|
"learning_rate": 8.222870552419611e-06,
|
|
"loss": 0.7536,
|
|
"step": 13180
|
|
},
|
|
{
|
|
"epoch": 0.8371916027145303,
|
|
"grad_norm": 0.8152589201927185,
|
|
"learning_rate": 8.159446946153359e-06,
|
|
"loss": 0.7497,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 0.8384600748398554,
|
|
"grad_norm": 0.8962567448616028,
|
|
"learning_rate": 8.096023339887106e-06,
|
|
"loss": 0.7838,
|
|
"step": 13220
|
|
},
|
|
{
|
|
"epoch": 0.8397285469651804,
|
|
"grad_norm": 0.6861271262168884,
|
|
"learning_rate": 8.032599733620855e-06,
|
|
"loss": 0.777,
|
|
"step": 13240
|
|
},
|
|
{
|
|
"epoch": 0.8409970190905055,
|
|
"grad_norm": 0.7273656725883484,
|
|
"learning_rate": 7.969176127354603e-06,
|
|
"loss": 0.7345,
|
|
"step": 13260
|
|
},
|
|
{
|
|
"epoch": 0.8422654912158305,
|
|
"grad_norm": 0.7643877267837524,
|
|
"learning_rate": 7.90575252108835e-06,
|
|
"loss": 0.7837,
|
|
"step": 13280
|
|
},
|
|
{
|
|
"epoch": 0.8435339633411556,
|
|
"grad_norm": 0.695196270942688,
|
|
"learning_rate": 7.842328914822098e-06,
|
|
"loss": 0.7361,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 0.8448024354664806,
|
|
"grad_norm": 0.6783697009086609,
|
|
"learning_rate": 7.778905308555845e-06,
|
|
"loss": 0.7315,
|
|
"step": 13320
|
|
},
|
|
{
|
|
"epoch": 0.8460709075918057,
|
|
"grad_norm": 0.8633202314376831,
|
|
"learning_rate": 7.715481702289593e-06,
|
|
"loss": 0.7713,
|
|
"step": 13340
|
|
},
|
|
{
|
|
"epoch": 0.8473393797171307,
|
|
"grad_norm": 0.7902844548225403,
|
|
"learning_rate": 7.65205809602334e-06,
|
|
"loss": 0.7732,
|
|
"step": 13360
|
|
},
|
|
{
|
|
"epoch": 0.8486078518424558,
|
|
"grad_norm": 1.1263785362243652,
|
|
"learning_rate": 7.588634489757088e-06,
|
|
"loss": 0.7549,
|
|
"step": 13380
|
|
},
|
|
{
|
|
"epoch": 0.8498763239677808,
|
|
"grad_norm": 0.7141507863998413,
|
|
"learning_rate": 7.5252108834908354e-06,
|
|
"loss": 0.7498,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 0.8511447960931059,
|
|
"grad_norm": 0.7436708211898804,
|
|
"learning_rate": 7.461787277224583e-06,
|
|
"loss": 0.7525,
|
|
"step": 13420
|
|
},
|
|
{
|
|
"epoch": 0.8524132682184309,
|
|
"grad_norm": 0.7840022444725037,
|
|
"learning_rate": 7.3983636709583304e-06,
|
|
"loss": 0.7658,
|
|
"step": 13440
|
|
},
|
|
{
|
|
"epoch": 0.853681740343756,
|
|
"grad_norm": 0.7560069561004639,
|
|
"learning_rate": 7.33494006469208e-06,
|
|
"loss": 0.7946,
|
|
"step": 13460
|
|
},
|
|
{
|
|
"epoch": 0.854950212469081,
|
|
"grad_norm": 0.7361060976982117,
|
|
"learning_rate": 7.271516458425827e-06,
|
|
"loss": 0.7722,
|
|
"step": 13480
|
|
},
|
|
{
|
|
"epoch": 0.8562186845944061,
|
|
"grad_norm": 0.8141864538192749,
|
|
"learning_rate": 7.208092852159575e-06,
|
|
"loss": 0.751,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 0.8574871567197311,
|
|
"grad_norm": 0.7860879898071289,
|
|
"learning_rate": 7.144669245893322e-06,
|
|
"loss": 0.7401,
|
|
"step": 13520
|
|
},
|
|
{
|
|
"epoch": 0.8587556288450562,
|
|
"grad_norm": 1.1111942529678345,
|
|
"learning_rate": 7.08124563962707e-06,
|
|
"loss": 0.803,
|
|
"step": 13540
|
|
},
|
|
{
|
|
"epoch": 0.8600241009703812,
|
|
"grad_norm": 0.7983526587486267,
|
|
"learning_rate": 7.017822033360817e-06,
|
|
"loss": 0.7963,
|
|
"step": 13560
|
|
},
|
|
{
|
|
"epoch": 0.8612925730957062,
|
|
"grad_norm": 0.7415090799331665,
|
|
"learning_rate": 6.954398427094565e-06,
|
|
"loss": 0.7997,
|
|
"step": 13580
|
|
},
|
|
{
|
|
"epoch": 0.8625610452210313,
|
|
"grad_norm": 0.8375813364982605,
|
|
"learning_rate": 6.890974820828312e-06,
|
|
"loss": 0.8052,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 0.8638295173463563,
|
|
"grad_norm": 0.8622868657112122,
|
|
"learning_rate": 6.82755121456206e-06,
|
|
"loss": 0.7816,
|
|
"step": 13620
|
|
},
|
|
{
|
|
"epoch": 0.8650979894716814,
|
|
"grad_norm": 0.9229819774627686,
|
|
"learning_rate": 6.764127608295807e-06,
|
|
"loss": 0.7558,
|
|
"step": 13640
|
|
},
|
|
{
|
|
"epoch": 0.8663664615970064,
|
|
"grad_norm": 0.8334788084030151,
|
|
"learning_rate": 6.700704002029556e-06,
|
|
"loss": 0.7648,
|
|
"step": 13660
|
|
},
|
|
{
|
|
"epoch": 0.8676349337223315,
|
|
"grad_norm": 0.8499231338500977,
|
|
"learning_rate": 6.637280395763304e-06,
|
|
"loss": 0.7954,
|
|
"step": 13680
|
|
},
|
|
{
|
|
"epoch": 0.8689034058476565,
|
|
"grad_norm": 0.8455031514167786,
|
|
"learning_rate": 6.573856789497051e-06,
|
|
"loss": 0.7973,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 0.8701718779729816,
|
|
"grad_norm": 0.7882249355316162,
|
|
"learning_rate": 6.510433183230799e-06,
|
|
"loss": 0.8031,
|
|
"step": 13720
|
|
},
|
|
{
|
|
"epoch": 0.8714403500983066,
|
|
"grad_norm": 0.7251647114753723,
|
|
"learning_rate": 6.447009576964546e-06,
|
|
"loss": 0.7581,
|
|
"step": 13740
|
|
},
|
|
{
|
|
"epoch": 0.8727088222236317,
|
|
"grad_norm": 0.8899182081222534,
|
|
"learning_rate": 6.383585970698294e-06,
|
|
"loss": 0.7981,
|
|
"step": 13760
|
|
},
|
|
{
|
|
"epoch": 0.8739772943489567,
|
|
"grad_norm": 0.7742959260940552,
|
|
"learning_rate": 6.320162364432042e-06,
|
|
"loss": 0.7606,
|
|
"step": 13780
|
|
},
|
|
{
|
|
"epoch": 0.8752457664742818,
|
|
"grad_norm": 0.7180908918380737,
|
|
"learning_rate": 6.25673875816579e-06,
|
|
"loss": 0.8301,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 0.8765142385996068,
|
|
"grad_norm": 0.7653104066848755,
|
|
"learning_rate": 6.193315151899537e-06,
|
|
"loss": 0.8017,
|
|
"step": 13820
|
|
},
|
|
{
|
|
"epoch": 0.8777827107249319,
|
|
"grad_norm": 0.802506148815155,
|
|
"learning_rate": 6.1298915456332856e-06,
|
|
"loss": 0.7634,
|
|
"step": 13840
|
|
},
|
|
{
|
|
"epoch": 0.8790511828502569,
|
|
"grad_norm": 0.882520318031311,
|
|
"learning_rate": 6.066467939367033e-06,
|
|
"loss": 0.777,
|
|
"step": 13860
|
|
},
|
|
{
|
|
"epoch": 0.880319654975582,
|
|
"grad_norm": 0.7464948892593384,
|
|
"learning_rate": 6.0030443331007806e-06,
|
|
"loss": 0.7495,
|
|
"step": 13880
|
|
},
|
|
{
|
|
"epoch": 0.881588127100907,
|
|
"grad_norm": 0.7769840359687805,
|
|
"learning_rate": 5.939620726834528e-06,
|
|
"loss": 0.7562,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 0.882856599226232,
|
|
"grad_norm": 0.9281843304634094,
|
|
"learning_rate": 5.8761971205682756e-06,
|
|
"loss": 0.7845,
|
|
"step": 13920
|
|
},
|
|
{
|
|
"epoch": 0.884125071351557,
|
|
"grad_norm": 0.801986813545227,
|
|
"learning_rate": 5.812773514302024e-06,
|
|
"loss": 0.7301,
|
|
"step": 13940
|
|
},
|
|
{
|
|
"epoch": 0.8853935434768821,
|
|
"grad_norm": 0.8647619485855103,
|
|
"learning_rate": 5.749349908035771e-06,
|
|
"loss": 0.7693,
|
|
"step": 13960
|
|
},
|
|
{
|
|
"epoch": 0.8866620156022071,
|
|
"grad_norm": 0.8235803246498108,
|
|
"learning_rate": 5.685926301769519e-06,
|
|
"loss": 0.7972,
|
|
"step": 13980
|
|
},
|
|
{
|
|
"epoch": 0.8879304877275321,
|
|
"grad_norm": 0.8338538408279419,
|
|
"learning_rate": 5.622502695503266e-06,
|
|
"loss": 0.7951,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 0.8891989598528572,
|
|
"grad_norm": 0.704741895198822,
|
|
"learning_rate": 5.559079089237014e-06,
|
|
"loss": 0.7446,
|
|
"step": 14020
|
|
},
|
|
{
|
|
"epoch": 0.8904674319781822,
|
|
"grad_norm": 0.7731455564498901,
|
|
"learning_rate": 5.495655482970762e-06,
|
|
"loss": 0.7266,
|
|
"step": 14040
|
|
},
|
|
{
|
|
"epoch": 0.8917359041035073,
|
|
"grad_norm": 0.779869794845581,
|
|
"learning_rate": 5.43223187670451e-06,
|
|
"loss": 0.7638,
|
|
"step": 14060
|
|
},
|
|
{
|
|
"epoch": 0.8930043762288323,
|
|
"grad_norm": 0.7645334005355835,
|
|
"learning_rate": 5.368808270438257e-06,
|
|
"loss": 0.7868,
|
|
"step": 14080
|
|
},
|
|
{
|
|
"epoch": 0.8942728483541574,
|
|
"grad_norm": 0.8010347485542297,
|
|
"learning_rate": 5.305384664172005e-06,
|
|
"loss": 0.8038,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 0.8955413204794824,
|
|
"grad_norm": 0.830556333065033,
|
|
"learning_rate": 5.241961057905752e-06,
|
|
"loss": 0.7774,
|
|
"step": 14120
|
|
},
|
|
{
|
|
"epoch": 0.8968097926048075,
|
|
"grad_norm": 0.9386767745018005,
|
|
"learning_rate": 5.178537451639501e-06,
|
|
"loss": 0.7591,
|
|
"step": 14140
|
|
},
|
|
{
|
|
"epoch": 0.8980782647301325,
|
|
"grad_norm": 0.8357464671134949,
|
|
"learning_rate": 5.115113845373248e-06,
|
|
"loss": 0.7642,
|
|
"step": 14160
|
|
},
|
|
{
|
|
"epoch": 0.8993467368554576,
|
|
"grad_norm": 0.7423475384712219,
|
|
"learning_rate": 5.051690239106996e-06,
|
|
"loss": 0.7586,
|
|
"step": 14180
|
|
},
|
|
{
|
|
"epoch": 0.9006152089807826,
|
|
"grad_norm": 0.8751846551895142,
|
|
"learning_rate": 4.988266632840743e-06,
|
|
"loss": 0.7536,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 0.9018836811061077,
|
|
"grad_norm": 0.8088937997817993,
|
|
"learning_rate": 4.924843026574491e-06,
|
|
"loss": 0.7747,
|
|
"step": 14220
|
|
},
|
|
{
|
|
"epoch": 0.9031521532314327,
|
|
"grad_norm": 0.7831218242645264,
|
|
"learning_rate": 4.861419420308239e-06,
|
|
"loss": 0.7741,
|
|
"step": 14240
|
|
},
|
|
{
|
|
"epoch": 0.9044206253567578,
|
|
"grad_norm": 0.8346021175384521,
|
|
"learning_rate": 4.7979958140419865e-06,
|
|
"loss": 0.8034,
|
|
"step": 14260
|
|
},
|
|
{
|
|
"epoch": 0.9056890974820828,
|
|
"grad_norm": 0.7575668692588806,
|
|
"learning_rate": 4.734572207775735e-06,
|
|
"loss": 0.7866,
|
|
"step": 14280
|
|
},
|
|
{
|
|
"epoch": 0.9069575696074079,
|
|
"grad_norm": 0.8374447822570801,
|
|
"learning_rate": 4.671148601509482e-06,
|
|
"loss": 0.7808,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 0.9082260417327329,
|
|
"grad_norm": 0.7750478982925415,
|
|
"learning_rate": 4.60772499524323e-06,
|
|
"loss": 0.7628,
|
|
"step": 14320
|
|
},
|
|
{
|
|
"epoch": 0.909494513858058,
|
|
"grad_norm": 0.8722181916236877,
|
|
"learning_rate": 4.54747256929029e-06,
|
|
"loss": 0.7744,
|
|
"step": 14340
|
|
},
|
|
{
|
|
"epoch": 0.910762985983383,
|
|
"grad_norm": 0.8774197101593018,
|
|
"learning_rate": 4.484048963024038e-06,
|
|
"loss": 0.8039,
|
|
"step": 14360
|
|
},
|
|
{
|
|
"epoch": 0.9120314581087081,
|
|
"grad_norm": 0.677240788936615,
|
|
"learning_rate": 4.420625356757786e-06,
|
|
"loss": 0.7648,
|
|
"step": 14380
|
|
},
|
|
{
|
|
"epoch": 0.9132999302340331,
|
|
"grad_norm": 0.8468155264854431,
|
|
"learning_rate": 4.357201750491533e-06,
|
|
"loss": 0.7494,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 0.9145684023593581,
|
|
"grad_norm": 0.8869547247886658,
|
|
"learning_rate": 4.293778144225281e-06,
|
|
"loss": 0.7713,
|
|
"step": 14420
|
|
},
|
|
{
|
|
"epoch": 0.9158368744846832,
|
|
"grad_norm": 0.8307056427001953,
|
|
"learning_rate": 4.230354537959028e-06,
|
|
"loss": 0.7784,
|
|
"step": 14440
|
|
},
|
|
{
|
|
"epoch": 0.9171053466100082,
|
|
"grad_norm": 0.8972311019897461,
|
|
"learning_rate": 4.166930931692777e-06,
|
|
"loss": 0.7653,
|
|
"step": 14460
|
|
},
|
|
{
|
|
"epoch": 0.9183738187353333,
|
|
"grad_norm": 0.7319260835647583,
|
|
"learning_rate": 4.103507325426524e-06,
|
|
"loss": 0.7802,
|
|
"step": 14480
|
|
},
|
|
{
|
|
"epoch": 0.9196422908606583,
|
|
"grad_norm": 0.8915144801139832,
|
|
"learning_rate": 4.040083719160272e-06,
|
|
"loss": 0.7709,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 0.9209107629859834,
|
|
"grad_norm": 1.0568279027938843,
|
|
"learning_rate": 3.976660112894019e-06,
|
|
"loss": 0.7961,
|
|
"step": 14520
|
|
},
|
|
{
|
|
"epoch": 0.9221792351113084,
|
|
"grad_norm": 0.9176843166351318,
|
|
"learning_rate": 3.913236506627767e-06,
|
|
"loss": 0.7758,
|
|
"step": 14540
|
|
},
|
|
{
|
|
"epoch": 0.9234477072366335,
|
|
"grad_norm": 0.8266422152519226,
|
|
"learning_rate": 3.849812900361515e-06,
|
|
"loss": 0.7896,
|
|
"step": 14560
|
|
},
|
|
{
|
|
"epoch": 0.9247161793619585,
|
|
"grad_norm": 0.8376505970954895,
|
|
"learning_rate": 3.7863892940952626e-06,
|
|
"loss": 0.7529,
|
|
"step": 14580
|
|
},
|
|
{
|
|
"epoch": 0.9259846514872836,
|
|
"grad_norm": 0.7567136287689209,
|
|
"learning_rate": 3.72296568782901e-06,
|
|
"loss": 0.7361,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 0.9272531236126086,
|
|
"grad_norm": 0.8554660677909851,
|
|
"learning_rate": 3.6595420815627576e-06,
|
|
"loss": 0.7808,
|
|
"step": 14620
|
|
},
|
|
{
|
|
"epoch": 0.9285215957379337,
|
|
"grad_norm": 0.8139016032218933,
|
|
"learning_rate": 3.5961184752965055e-06,
|
|
"loss": 0.7743,
|
|
"step": 14640
|
|
},
|
|
{
|
|
"epoch": 0.9297900678632587,
|
|
"grad_norm": 0.8457810282707214,
|
|
"learning_rate": 3.532694869030253e-06,
|
|
"loss": 0.7431,
|
|
"step": 14660
|
|
},
|
|
{
|
|
"epoch": 0.9310585399885838,
|
|
"grad_norm": 0.7392221093177795,
|
|
"learning_rate": 3.469271262764001e-06,
|
|
"loss": 0.7678,
|
|
"step": 14680
|
|
},
|
|
{
|
|
"epoch": 0.9323270121139088,
|
|
"grad_norm": 0.74676913022995,
|
|
"learning_rate": 3.405847656497749e-06,
|
|
"loss": 0.7739,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 0.9335954842392339,
|
|
"grad_norm": 0.9203459620475769,
|
|
"learning_rate": 3.3424240502314964e-06,
|
|
"loss": 0.7746,
|
|
"step": 14720
|
|
},
|
|
{
|
|
"epoch": 0.9348639563645589,
|
|
"grad_norm": 0.8364453911781311,
|
|
"learning_rate": 3.279000443965244e-06,
|
|
"loss": 0.8001,
|
|
"step": 14740
|
|
},
|
|
{
|
|
"epoch": 0.936132428489884,
|
|
"grad_norm": 0.8923636078834534,
|
|
"learning_rate": 3.2155768376989914e-06,
|
|
"loss": 0.7831,
|
|
"step": 14760
|
|
},
|
|
{
|
|
"epoch": 0.937400900615209,
|
|
"grad_norm": 0.8334746360778809,
|
|
"learning_rate": 3.1521532314327397e-06,
|
|
"loss": 0.7731,
|
|
"step": 14780
|
|
},
|
|
{
|
|
"epoch": 0.9386693727405341,
|
|
"grad_norm": 0.7588502764701843,
|
|
"learning_rate": 3.0887296251664873e-06,
|
|
"loss": 0.7625,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 0.9399378448658591,
|
|
"grad_norm": 0.7889774441719055,
|
|
"learning_rate": 3.0253060189002348e-06,
|
|
"loss": 0.7354,
|
|
"step": 14820
|
|
},
|
|
{
|
|
"epoch": 0.9412063169911841,
|
|
"grad_norm": 0.7171176671981812,
|
|
"learning_rate": 2.9618824126339823e-06,
|
|
"loss": 0.7341,
|
|
"step": 14840
|
|
},
|
|
{
|
|
"epoch": 0.9424747891165092,
|
|
"grad_norm": 0.9590219259262085,
|
|
"learning_rate": 2.89845880636773e-06,
|
|
"loss": 0.8243,
|
|
"step": 14860
|
|
},
|
|
{
|
|
"epoch": 0.9437432612418342,
|
|
"grad_norm": 0.8210035562515259,
|
|
"learning_rate": 2.8350352001014777e-06,
|
|
"loss": 0.7863,
|
|
"step": 14880
|
|
},
|
|
{
|
|
"epoch": 0.9450117333671593,
|
|
"grad_norm": 0.9011463522911072,
|
|
"learning_rate": 2.7716115938352256e-06,
|
|
"loss": 0.7325,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 0.9462802054924843,
|
|
"grad_norm": 0.8472886681556702,
|
|
"learning_rate": 2.7081879875689735e-06,
|
|
"loss": 0.7409,
|
|
"step": 14920
|
|
},
|
|
{
|
|
"epoch": 0.9475486776178094,
|
|
"grad_norm": 0.8105041980743408,
|
|
"learning_rate": 2.644764381302721e-06,
|
|
"loss": 0.7598,
|
|
"step": 14940
|
|
},
|
|
{
|
|
"epoch": 0.9488171497431344,
|
|
"grad_norm": 0.9121299386024475,
|
|
"learning_rate": 2.581340775036469e-06,
|
|
"loss": 0.8026,
|
|
"step": 14960
|
|
},
|
|
{
|
|
"epoch": 0.9500856218684595,
|
|
"grad_norm": 0.7272994518280029,
|
|
"learning_rate": 2.5179171687702165e-06,
|
|
"loss": 0.771,
|
|
"step": 14980
|
|
},
|
|
{
|
|
"epoch": 0.9513540939937845,
|
|
"grad_norm": 0.9553162455558777,
|
|
"learning_rate": 2.454493562503964e-06,
|
|
"loss": 0.7562,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 0.9526225661191096,
|
|
"grad_norm": 0.775790810585022,
|
|
"learning_rate": 2.391069956237712e-06,
|
|
"loss": 0.7682,
|
|
"step": 15020
|
|
},
|
|
{
|
|
"epoch": 0.9538910382444346,
|
|
"grad_norm": 0.7874183654785156,
|
|
"learning_rate": 2.3276463499714594e-06,
|
|
"loss": 0.7706,
|
|
"step": 15040
|
|
},
|
|
{
|
|
"epoch": 0.9551595103697597,
|
|
"grad_norm": 0.9175487756729126,
|
|
"learning_rate": 2.2642227437052073e-06,
|
|
"loss": 0.7904,
|
|
"step": 15060
|
|
},
|
|
{
|
|
"epoch": 0.9564279824950846,
|
|
"grad_norm": 0.7636487483978271,
|
|
"learning_rate": 2.200799137438955e-06,
|
|
"loss": 0.7568,
|
|
"step": 15080
|
|
},
|
|
{
|
|
"epoch": 0.9576964546204098,
|
|
"grad_norm": 0.8234472870826721,
|
|
"learning_rate": 2.1373755311727023e-06,
|
|
"loss": 0.7902,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 0.9589649267457347,
|
|
"grad_norm": 0.7153974175453186,
|
|
"learning_rate": 2.0739519249064502e-06,
|
|
"loss": 0.7305,
|
|
"step": 15120
|
|
},
|
|
{
|
|
"epoch": 0.9602333988710599,
|
|
"grad_norm": 0.8177540302276611,
|
|
"learning_rate": 2.010528318640198e-06,
|
|
"loss": 0.7584,
|
|
"step": 15140
|
|
},
|
|
{
|
|
"epoch": 0.9615018709963848,
|
|
"grad_norm": 0.7617529630661011,
|
|
"learning_rate": 1.9471047123739457e-06,
|
|
"loss": 0.74,
|
|
"step": 15160
|
|
},
|
|
{
|
|
"epoch": 0.96277034312171,
|
|
"grad_norm": 0.8607798218727112,
|
|
"learning_rate": 1.8836811061076934e-06,
|
|
"loss": 0.7867,
|
|
"step": 15180
|
|
},
|
|
{
|
|
"epoch": 0.9640388152470349,
|
|
"grad_norm": 0.7388313412666321,
|
|
"learning_rate": 1.820257499841441e-06,
|
|
"loss": 0.7561,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 0.96530728737236,
|
|
"grad_norm": 0.8492052555084229,
|
|
"learning_rate": 1.756833893575189e-06,
|
|
"loss": 0.7769,
|
|
"step": 15220
|
|
},
|
|
{
|
|
"epoch": 0.966575759497685,
|
|
"grad_norm": 0.7551003694534302,
|
|
"learning_rate": 1.6934102873089365e-06,
|
|
"loss": 0.7641,
|
|
"step": 15240
|
|
},
|
|
{
|
|
"epoch": 0.96784423162301,
|
|
"grad_norm": 0.8290461897850037,
|
|
"learning_rate": 1.629986681042684e-06,
|
|
"loss": 0.7537,
|
|
"step": 15260
|
|
},
|
|
{
|
|
"epoch": 0.9691127037483351,
|
|
"grad_norm": 0.8409222960472107,
|
|
"learning_rate": 1.566563074776432e-06,
|
|
"loss": 0.7591,
|
|
"step": 15280
|
|
},
|
|
{
|
|
"epoch": 0.9703811758736601,
|
|
"grad_norm": 0.7970009446144104,
|
|
"learning_rate": 1.5031394685101795e-06,
|
|
"loss": 0.7763,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 0.9716496479989852,
|
|
"grad_norm": 0.8285433650016785,
|
|
"learning_rate": 1.4397158622439274e-06,
|
|
"loss": 0.7357,
|
|
"step": 15320
|
|
},
|
|
{
|
|
"epoch": 0.9729181201243102,
|
|
"grad_norm": 0.8585550785064697,
|
|
"learning_rate": 1.376292255977675e-06,
|
|
"loss": 0.7706,
|
|
"step": 15340
|
|
},
|
|
{
|
|
"epoch": 0.9741865922496353,
|
|
"grad_norm": 0.7604003548622131,
|
|
"learning_rate": 1.3128686497114226e-06,
|
|
"loss": 0.7455,
|
|
"step": 15360
|
|
},
|
|
{
|
|
"epoch": 0.9754550643749603,
|
|
"grad_norm": 0.7896905541419983,
|
|
"learning_rate": 1.2494450434451703e-06,
|
|
"loss": 0.7824,
|
|
"step": 15380
|
|
},
|
|
{
|
|
"epoch": 0.9767235365002854,
|
|
"grad_norm": 0.8073090314865112,
|
|
"learning_rate": 1.186021437178918e-06,
|
|
"loss": 0.7836,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 0.9779920086256104,
|
|
"grad_norm": 0.8942602276802063,
|
|
"learning_rate": 1.1225978309126657e-06,
|
|
"loss": 0.7993,
|
|
"step": 15420
|
|
},
|
|
{
|
|
"epoch": 0.9792604807509355,
|
|
"grad_norm": 0.7417428493499756,
|
|
"learning_rate": 1.0591742246464135e-06,
|
|
"loss": 0.7828,
|
|
"step": 15440
|
|
},
|
|
{
|
|
"epoch": 0.9805289528762605,
|
|
"grad_norm": 0.7453452944755554,
|
|
"learning_rate": 9.957506183801612e-07,
|
|
"loss": 0.7589,
|
|
"step": 15460
|
|
},
|
|
{
|
|
"epoch": 0.9817974250015856,
|
|
"grad_norm": 0.7909823656082153,
|
|
"learning_rate": 9.323270121139089e-07,
|
|
"loss": 0.7776,
|
|
"step": 15480
|
|
},
|
|
{
|
|
"epoch": 0.9830658971269106,
|
|
"grad_norm": 0.8445958495140076,
|
|
"learning_rate": 8.689034058476566e-07,
|
|
"loss": 0.8027,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 0.9843343692522357,
|
|
"grad_norm": 0.8094016313552856,
|
|
"learning_rate": 8.054797995814042e-07,
|
|
"loss": 0.7426,
|
|
"step": 15520
|
|
},
|
|
{
|
|
"epoch": 0.9856028413775607,
|
|
"grad_norm": 0.9366889595985413,
|
|
"learning_rate": 7.420561933151519e-07,
|
|
"loss": 0.7754,
|
|
"step": 15540
|
|
},
|
|
{
|
|
"epoch": 0.9868713135028858,
|
|
"grad_norm": 0.8186490535736084,
|
|
"learning_rate": 6.786325870488996e-07,
|
|
"loss": 0.7699,
|
|
"step": 15560
|
|
},
|
|
{
|
|
"epoch": 0.9881397856282108,
|
|
"grad_norm": 0.7232887148857117,
|
|
"learning_rate": 6.152089807826473e-07,
|
|
"loss": 0.7384,
|
|
"step": 15580
|
|
},
|
|
{
|
|
"epoch": 0.9894082577535359,
|
|
"grad_norm": 0.7465324401855469,
|
|
"learning_rate": 5.517853745163951e-07,
|
|
"loss": 0.775,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 0.9906767298788609,
|
|
"grad_norm": 0.8372469544410706,
|
|
"learning_rate": 4.883617682501427e-07,
|
|
"loss": 0.7497,
|
|
"step": 15620
|
|
},
|
|
{
|
|
"epoch": 0.991945202004186,
|
|
"grad_norm": 0.8273568749427795,
|
|
"learning_rate": 4.2493816198389043e-07,
|
|
"loss": 0.7738,
|
|
"step": 15640
|
|
},
|
|
{
|
|
"epoch": 0.993213674129511,
|
|
"grad_norm": 0.7157850861549377,
|
|
"learning_rate": 3.6151455571763815e-07,
|
|
"loss": 0.7398,
|
|
"step": 15660
|
|
},
|
|
{
|
|
"epoch": 0.994482146254836,
|
|
"grad_norm": 0.8117349147796631,
|
|
"learning_rate": 2.980909494513858e-07,
|
|
"loss": 0.8053,
|
|
"step": 15680
|
|
},
|
|
{
|
|
"epoch": 0.9957506183801611,
|
|
"grad_norm": 0.869315505027771,
|
|
"learning_rate": 2.3466734318513352e-07,
|
|
"loss": 0.7819,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 0.9970190905054861,
|
|
"grad_norm": 0.7544413208961487,
|
|
"learning_rate": 1.712437369188812e-07,
|
|
"loss": 0.805,
|
|
"step": 15720
|
|
},
|
|
{
|
|
"epoch": 0.9982875626308112,
|
|
"grad_norm": 0.9937105178833008,
|
|
"learning_rate": 1.0782013065262892e-07,
|
|
"loss": 0.7808,
|
|
"step": 15740
|
|
},
|
|
{
|
|
"epoch": 0.9995560347561362,
|
|
"grad_norm": 0.7431527972221375,
|
|
"learning_rate": 4.439652438637661e-08,
|
|
"loss": 0.7574,
|
|
"step": 15760
|
|
}
|
|
],
|
|
"logging_steps": 20,
|
|
"max_steps": 15767,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 15767,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3.607614351420162e+18,
|
|
"train_batch_size": 18,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|