3117 lines
76 KiB
JSON
3117 lines
76 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4394,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0022762839664248113,
|
|
"grad_norm": 10.595879842373915,
|
|
"learning_rate": 2.0454545454545456e-07,
|
|
"loss": 0.9592,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.004552567932849623,
|
|
"grad_norm": 8.506899480641875,
|
|
"learning_rate": 4.3181818181818187e-07,
|
|
"loss": 0.9341,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.006828851899274435,
|
|
"grad_norm": 4.048540973587426,
|
|
"learning_rate": 6.590909090909091e-07,
|
|
"loss": 0.8698,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.009105135865699245,
|
|
"grad_norm": 1.7983922183043597,
|
|
"learning_rate": 8.863636363636364e-07,
|
|
"loss": 0.7762,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.011381419832124057,
|
|
"grad_norm": 1.301643851792194,
|
|
"learning_rate": 1.1136363636363637e-06,
|
|
"loss": 0.7056,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.01365770379854887,
|
|
"grad_norm": 0.8116123655731647,
|
|
"learning_rate": 1.3409090909090911e-06,
|
|
"loss": 0.636,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.015933987764973682,
|
|
"grad_norm": 0.7273739176052791,
|
|
"learning_rate": 1.5681818181818184e-06,
|
|
"loss": 0.6355,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.01821027173139849,
|
|
"grad_norm": 0.7380885266969118,
|
|
"learning_rate": 1.7954545454545456e-06,
|
|
"loss": 0.6222,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.020486555697823303,
|
|
"grad_norm": 0.6975134603454084,
|
|
"learning_rate": 2.022727272727273e-06,
|
|
"loss": 0.5979,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.022762839664248115,
|
|
"grad_norm": 0.7291820146919704,
|
|
"learning_rate": 2.25e-06,
|
|
"loss": 0.5945,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.025039123630672927,
|
|
"grad_norm": 0.7008207721708937,
|
|
"learning_rate": 2.4772727272727275e-06,
|
|
"loss": 0.5705,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.02731540759709774,
|
|
"grad_norm": 0.6449537277719501,
|
|
"learning_rate": 2.7045454545454545e-06,
|
|
"loss": 0.5718,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.029591691563522548,
|
|
"grad_norm": 0.6058495976490267,
|
|
"learning_rate": 2.931818181818182e-06,
|
|
"loss": 0.5594,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.031867975529947364,
|
|
"grad_norm": 0.681338176363733,
|
|
"learning_rate": 3.1590909090909094e-06,
|
|
"loss": 0.564,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.034144259496372176,
|
|
"grad_norm": 0.7273342639401195,
|
|
"learning_rate": 3.3863636363636364e-06,
|
|
"loss": 0.5657,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.03642054346279698,
|
|
"grad_norm": 0.6489466886504726,
|
|
"learning_rate": 3.6136363636363643e-06,
|
|
"loss": 0.5484,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.03869682742922179,
|
|
"grad_norm": 0.6973554298115421,
|
|
"learning_rate": 3.840909090909091e-06,
|
|
"loss": 0.5445,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.040973111395646605,
|
|
"grad_norm": 0.6394889437481598,
|
|
"learning_rate": 4.068181818181818e-06,
|
|
"loss": 0.548,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.04324939536207142,
|
|
"grad_norm": 0.6430348845599891,
|
|
"learning_rate": 4.295454545454546e-06,
|
|
"loss": 0.5481,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.04552567932849623,
|
|
"grad_norm": 0.7221066666159676,
|
|
"learning_rate": 4.522727272727273e-06,
|
|
"loss": 0.5492,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.04780196329492104,
|
|
"grad_norm": 0.6814488809728508,
|
|
"learning_rate": 4.75e-06,
|
|
"loss": 0.5429,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.050078247261345854,
|
|
"grad_norm": 0.6336335480126656,
|
|
"learning_rate": 4.977272727272728e-06,
|
|
"loss": 0.5323,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.052354531227770666,
|
|
"grad_norm": 0.7343888412812761,
|
|
"learning_rate": 5.204545454545455e-06,
|
|
"loss": 0.5483,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.05463081519419548,
|
|
"grad_norm": 0.8008844122645815,
|
|
"learning_rate": 5.431818181818182e-06,
|
|
"loss": 0.5286,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.05690709916062029,
|
|
"grad_norm": 0.7508926850083371,
|
|
"learning_rate": 5.65909090909091e-06,
|
|
"loss": 0.5387,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.059183383127045096,
|
|
"grad_norm": 0.6226438437116334,
|
|
"learning_rate": 5.886363636363637e-06,
|
|
"loss": 0.5311,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.06145966709346991,
|
|
"grad_norm": 0.6829241845904538,
|
|
"learning_rate": 6.113636363636364e-06,
|
|
"loss": 0.5325,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.06373595105989473,
|
|
"grad_norm": 0.7361115947795334,
|
|
"learning_rate": 6.340909090909091e-06,
|
|
"loss": 0.5344,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.06601223502631953,
|
|
"grad_norm": 0.7763867544917724,
|
|
"learning_rate": 6.568181818181819e-06,
|
|
"loss": 0.5255,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.06828851899274435,
|
|
"grad_norm": 0.7805934170558461,
|
|
"learning_rate": 6.795454545454546e-06,
|
|
"loss": 0.5237,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.07056480295916916,
|
|
"grad_norm": 0.8252464031314231,
|
|
"learning_rate": 7.022727272727273e-06,
|
|
"loss": 0.5242,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.07284108692559396,
|
|
"grad_norm": 0.7334894747403866,
|
|
"learning_rate": 7.25e-06,
|
|
"loss": 0.521,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.07511737089201878,
|
|
"grad_norm": 0.6846204422508828,
|
|
"learning_rate": 7.477272727272727e-06,
|
|
"loss": 0.5229,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.07739365485844359,
|
|
"grad_norm": 0.8052410281091429,
|
|
"learning_rate": 7.704545454545456e-06,
|
|
"loss": 0.5262,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.0796699388248684,
|
|
"grad_norm": 0.6991957963444353,
|
|
"learning_rate": 7.931818181818182e-06,
|
|
"loss": 0.5335,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.08194622279129321,
|
|
"grad_norm": 0.7125373155196583,
|
|
"learning_rate": 8.15909090909091e-06,
|
|
"loss": 0.5125,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.08422250675771803,
|
|
"grad_norm": 0.7997954899533661,
|
|
"learning_rate": 8.386363636363638e-06,
|
|
"loss": 0.5121,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.08649879072414283,
|
|
"grad_norm": 0.7683875875855587,
|
|
"learning_rate": 8.613636363636364e-06,
|
|
"loss": 0.5121,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.08877507469056765,
|
|
"grad_norm": 0.67604209965691,
|
|
"learning_rate": 8.840909090909092e-06,
|
|
"loss": 0.5124,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.09105135865699246,
|
|
"grad_norm": 0.8371326669517628,
|
|
"learning_rate": 9.06818181818182e-06,
|
|
"loss": 0.5253,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.09332764262341726,
|
|
"grad_norm": 0.8604199705110958,
|
|
"learning_rate": 9.295454545454546e-06,
|
|
"loss": 0.5057,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.09560392658984208,
|
|
"grad_norm": 0.7001685508024218,
|
|
"learning_rate": 9.522727272727274e-06,
|
|
"loss": 0.512,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.09788021055626689,
|
|
"grad_norm": 0.746358066866932,
|
|
"learning_rate": 9.75e-06,
|
|
"loss": 0.5152,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.10015649452269171,
|
|
"grad_norm": 0.7485703568589076,
|
|
"learning_rate": 9.977272727272728e-06,
|
|
"loss": 0.51,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.10243277848911651,
|
|
"grad_norm": 0.9902309913638483,
|
|
"learning_rate": 9.999872165053986e-06,
|
|
"loss": 0.5156,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.10470906245554133,
|
|
"grad_norm": 0.9153541257496008,
|
|
"learning_rate": 9.999430274867309e-06,
|
|
"loss": 0.5059,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.10698534642196614,
|
|
"grad_norm": 0.6739494150459265,
|
|
"learning_rate": 9.998672779119897e-06,
|
|
"loss": 0.4995,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.10926163038839096,
|
|
"grad_norm": 0.7147898292959126,
|
|
"learning_rate": 9.997599725631174e-06,
|
|
"loss": 0.5021,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.11153791435481576,
|
|
"grad_norm": 0.7526289283194069,
|
|
"learning_rate": 9.996211182141184e-06,
|
|
"loss": 0.5113,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.11381419832124058,
|
|
"grad_norm": 0.8898690822057496,
|
|
"learning_rate": 9.994507236306327e-06,
|
|
"loss": 0.5081,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.11609048228766539,
|
|
"grad_norm": 0.8227051185656225,
|
|
"learning_rate": 9.99248799569382e-06,
|
|
"loss": 0.5034,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.11836676625409019,
|
|
"grad_norm": 0.6375442627559045,
|
|
"learning_rate": 9.990153587774895e-06,
|
|
"loss": 0.5021,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.12064305022051501,
|
|
"grad_norm": 0.6931479406599156,
|
|
"learning_rate": 9.98750415991677e-06,
|
|
"loss": 0.5044,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.12291933418693982,
|
|
"grad_norm": 0.8097527394823517,
|
|
"learning_rate": 9.984539879373335e-06,
|
|
"loss": 0.5088,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.12519561815336464,
|
|
"grad_norm": 0.8559434520312957,
|
|
"learning_rate": 9.981260933274597e-06,
|
|
"loss": 0.5111,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.12747190211978945,
|
|
"grad_norm": 0.6767290786737241,
|
|
"learning_rate": 9.977667528614869e-06,
|
|
"loss": 0.4952,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.12974818608621425,
|
|
"grad_norm": 0.6584609354184026,
|
|
"learning_rate": 9.973759892239696e-06,
|
|
"loss": 0.4916,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.13202447005263906,
|
|
"grad_norm": 1.0067522800390891,
|
|
"learning_rate": 9.969538270831538e-06,
|
|
"loss": 0.4938,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.13430075401906388,
|
|
"grad_norm": 0.7842725791926335,
|
|
"learning_rate": 9.9650029308942e-06,
|
|
"loss": 0.5031,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.1365770379854887,
|
|
"grad_norm": 0.7213504076724643,
|
|
"learning_rate": 9.960154158736011e-06,
|
|
"loss": 0.4987,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.1388533219519135,
|
|
"grad_norm": 0.7639458215235444,
|
|
"learning_rate": 9.954992260451737e-06,
|
|
"loss": 0.4916,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.1411296059183383,
|
|
"grad_norm": 0.6736103272836254,
|
|
"learning_rate": 9.949517561903268e-06,
|
|
"loss": 0.4953,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.14340588988476313,
|
|
"grad_norm": 0.7206570170845565,
|
|
"learning_rate": 9.943730408699047e-06,
|
|
"loss": 0.4949,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.14568217385118792,
|
|
"grad_norm": 0.7325803766368787,
|
|
"learning_rate": 9.937631166172248e-06,
|
|
"loss": 0.5015,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.14795845781761274,
|
|
"grad_norm": 0.7277234400694914,
|
|
"learning_rate": 9.931220219357714e-06,
|
|
"loss": 0.5065,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.15023474178403756,
|
|
"grad_norm": 6.746710290505448,
|
|
"learning_rate": 9.924497972967652e-06,
|
|
"loss": 0.4918,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.15251102575046238,
|
|
"grad_norm": 0.6381821044001871,
|
|
"learning_rate": 9.91746485136609e-06,
|
|
"loss": 0.4842,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.15478730971688717,
|
|
"grad_norm": 0.6894734847882242,
|
|
"learning_rate": 9.91012129854207e-06,
|
|
"loss": 0.4874,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.157063593683312,
|
|
"grad_norm": 0.7239229961207453,
|
|
"learning_rate": 9.90246777808164e-06,
|
|
"loss": 0.4817,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.1593398776497368,
|
|
"grad_norm": 0.6946825414002048,
|
|
"learning_rate": 9.894504773138573e-06,
|
|
"loss": 0.5014,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.16161616161616163,
|
|
"grad_norm": 0.7364735544657125,
|
|
"learning_rate": 9.88623278640388e-06,
|
|
"loss": 0.4861,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.16389244558258642,
|
|
"grad_norm": 0.6496771859407187,
|
|
"learning_rate": 9.877652340074063e-06,
|
|
"loss": 0.4892,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.16616872954901124,
|
|
"grad_norm": 0.6578218852464074,
|
|
"learning_rate": 9.868763975818156e-06,
|
|
"loss": 0.4866,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.16844501351543606,
|
|
"grad_norm": 0.8508879887579466,
|
|
"learning_rate": 9.859568254743535e-06,
|
|
"loss": 0.4986,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.17072129748186085,
|
|
"grad_norm": 0.783429883650226,
|
|
"learning_rate": 9.850065757360485e-06,
|
|
"loss": 0.4988,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.17299758144828567,
|
|
"grad_norm": 0.8186071721692408,
|
|
"learning_rate": 9.840257083545562e-06,
|
|
"loss": 0.4818,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.1752738654147105,
|
|
"grad_norm": 0.710423604497616,
|
|
"learning_rate": 9.83014285250372e-06,
|
|
"loss": 0.4888,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.1775501493811353,
|
|
"grad_norm": 0.6800493756535392,
|
|
"learning_rate": 9.81972370272923e-06,
|
|
"loss": 0.4863,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.1798264333475601,
|
|
"grad_norm": 0.7168752838596176,
|
|
"learning_rate": 9.809000291965354e-06,
|
|
"loss": 0.4911,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.18210271731398492,
|
|
"grad_norm": 0.7743139671538918,
|
|
"learning_rate": 9.797973297162842e-06,
|
|
"loss": 0.4967,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.18437900128040974,
|
|
"grad_norm": 0.6820565783740362,
|
|
"learning_rate": 9.78664341443719e-06,
|
|
"loss": 0.4766,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.18665528524683453,
|
|
"grad_norm": 0.745509395049587,
|
|
"learning_rate": 9.775011359024692e-06,
|
|
"loss": 0.4831,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.18893156921325935,
|
|
"grad_norm": 0.6587611779174343,
|
|
"learning_rate": 9.763077865237293e-06,
|
|
"loss": 0.486,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.19120785317968417,
|
|
"grad_norm": 0.7570644152194894,
|
|
"learning_rate": 9.750843686416233e-06,
|
|
"loss": 0.4876,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.193484137146109,
|
|
"grad_norm": 0.7438169073992149,
|
|
"learning_rate": 9.738309594884489e-06,
|
|
"loss": 0.498,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.19576042111253378,
|
|
"grad_norm": 0.8075454984835392,
|
|
"learning_rate": 9.725476381898018e-06,
|
|
"loss": 0.4761,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.1980367050789586,
|
|
"grad_norm": 0.7687120756908921,
|
|
"learning_rate": 9.712344857595804e-06,
|
|
"loss": 0.4735,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.20031298904538342,
|
|
"grad_norm": 0.6692318540483727,
|
|
"learning_rate": 9.698915850948725e-06,
|
|
"loss": 0.4796,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.20258927301180824,
|
|
"grad_norm": 0.7357514918311532,
|
|
"learning_rate": 9.685190209707214e-06,
|
|
"loss": 0.4881,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.20486555697823303,
|
|
"grad_norm": 0.7672275789241998,
|
|
"learning_rate": 9.67116880034774e-06,
|
|
"loss": 0.4857,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.20714184094465785,
|
|
"grad_norm": 0.7135300642695123,
|
|
"learning_rate": 9.656852508018111e-06,
|
|
"loss": 0.4791,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.20941812491108266,
|
|
"grad_norm": 0.8360299716625731,
|
|
"learning_rate": 9.642242236481604e-06,
|
|
"loss": 0.4849,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.21169440887750746,
|
|
"grad_norm": 0.8029294625940537,
|
|
"learning_rate": 9.6273389080599e-06,
|
|
"loss": 0.4797,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.21397069284393228,
|
|
"grad_norm": 0.7293278863253679,
|
|
"learning_rate": 9.612143463574866e-06,
|
|
"loss": 0.4822,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.2162469768103571,
|
|
"grad_norm": 0.7388020334147711,
|
|
"learning_rate": 9.596656862289158e-06,
|
|
"loss": 0.4918,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.2185232607767819,
|
|
"grad_norm": 0.6641555373860104,
|
|
"learning_rate": 9.580880081845674e-06,
|
|
"loss": 0.4776,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.2207995447432067,
|
|
"grad_norm": 0.8539043098487583,
|
|
"learning_rate": 9.564814118205825e-06,
|
|
"loss": 0.48,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.22307582870963152,
|
|
"grad_norm": 0.7736828871091971,
|
|
"learning_rate": 9.548459985586668e-06,
|
|
"loss": 0.4819,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.22535211267605634,
|
|
"grad_norm": 0.7761396197941033,
|
|
"learning_rate": 9.531818716396879e-06,
|
|
"loss": 0.4874,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.22762839664248116,
|
|
"grad_norm": 0.8766761552909957,
|
|
"learning_rate": 9.514891361171584e-06,
|
|
"loss": 0.477,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.22990468060890595,
|
|
"grad_norm": 0.850710977709625,
|
|
"learning_rate": 9.497678988506027e-06,
|
|
"loss": 0.4809,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.23218096457533077,
|
|
"grad_norm": 0.7569136767466481,
|
|
"learning_rate": 9.480182684988128e-06,
|
|
"loss": 0.4798,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.2344572485417556,
|
|
"grad_norm": 1.0189056868147057,
|
|
"learning_rate": 9.462403555129875e-06,
|
|
"loss": 0.4799,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.23673353250818038,
|
|
"grad_norm": 0.7004478108183485,
|
|
"learning_rate": 9.444342721297607e-06,
|
|
"loss": 0.4786,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.2390098164746052,
|
|
"grad_norm": 0.8359707826058439,
|
|
"learning_rate": 9.426001323641156e-06,
|
|
"loss": 0.4715,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.24128610044103002,
|
|
"grad_norm": 0.6888590261970808,
|
|
"learning_rate": 9.40738052002187e-06,
|
|
"loss": 0.4682,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.24356238440745484,
|
|
"grad_norm": 0.7568931250342633,
|
|
"learning_rate": 9.388481485939532e-06,
|
|
"loss": 0.4746,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.24583866837387963,
|
|
"grad_norm": 0.7349584513921489,
|
|
"learning_rate": 9.369305414458128e-06,
|
|
"loss": 0.4763,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.24811495234030445,
|
|
"grad_norm": 0.8343501997237606,
|
|
"learning_rate": 9.349853516130556e-06,
|
|
"loss": 0.484,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.25039123630672927,
|
|
"grad_norm": 0.7193295893335135,
|
|
"learning_rate": 9.330127018922195e-06,
|
|
"loss": 0.4834,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.2526675202731541,
|
|
"grad_norm": 0.7580309140105336,
|
|
"learning_rate": 9.310127168133378e-06,
|
|
"loss": 0.4812,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.2549438042395789,
|
|
"grad_norm": 0.7613918214537019,
|
|
"learning_rate": 9.289855226320796e-06,
|
|
"loss": 0.4727,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.25722008820600367,
|
|
"grad_norm": 0.725239008063755,
|
|
"learning_rate": 9.269312473217777e-06,
|
|
"loss": 0.4803,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.2594963721724285,
|
|
"grad_norm": 0.8482520214554938,
|
|
"learning_rate": 9.248500205653518e-06,
|
|
"loss": 0.4745,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.2617726561388533,
|
|
"grad_norm": 0.7908430286997805,
|
|
"learning_rate": 9.22741973747119e-06,
|
|
"loss": 0.471,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.26404894010527813,
|
|
"grad_norm": 0.7036666630426738,
|
|
"learning_rate": 9.20607239944503e-06,
|
|
"loss": 0.4676,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.26632522407170295,
|
|
"grad_norm": 0.7263755674894325,
|
|
"learning_rate": 9.18445953919631e-06,
|
|
"loss": 0.4738,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.26860150803812777,
|
|
"grad_norm": 0.7487742930473097,
|
|
"learning_rate": 9.16258252110827e-06,
|
|
"loss": 0.4749,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.2708777920045526,
|
|
"grad_norm": 0.7931436305160696,
|
|
"learning_rate": 9.140442726239986e-06,
|
|
"loss": 0.4739,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.2731540759709774,
|
|
"grad_norm": 0.7971178665228899,
|
|
"learning_rate": 9.118041552239187e-06,
|
|
"loss": 0.4715,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.27543035993740217,
|
|
"grad_norm": 0.7352453908333937,
|
|
"learning_rate": 9.095380413254029e-06,
|
|
"loss": 0.4735,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.277706643903827,
|
|
"grad_norm": 0.6686848966506087,
|
|
"learning_rate": 9.072460739843807e-06,
|
|
"loss": 0.4701,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.2799829278702518,
|
|
"grad_norm": 0.7632913563075477,
|
|
"learning_rate": 9.049283978888665e-06,
|
|
"loss": 0.4709,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.2822592118366766,
|
|
"grad_norm": 0.9126191629516692,
|
|
"learning_rate": 9.025851593498245e-06,
|
|
"loss": 0.4812,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.28453549580310145,
|
|
"grad_norm": 0.7586654591086177,
|
|
"learning_rate": 9.002165062919321e-06,
|
|
"loss": 0.4759,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.28681177976952626,
|
|
"grad_norm": 0.6020016779620824,
|
|
"learning_rate": 8.978225882442431e-06,
|
|
"loss": 0.4585,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.2890880637359511,
|
|
"grad_norm": 0.6918094336131744,
|
|
"learning_rate": 8.95403556330747e-06,
|
|
"loss": 0.4734,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.29136434770237585,
|
|
"grad_norm": 0.8432310682535402,
|
|
"learning_rate": 8.929595632608286e-06,
|
|
"loss": 0.4657,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.29364063166880067,
|
|
"grad_norm": 0.7968867596780567,
|
|
"learning_rate": 8.904907633196287e-06,
|
|
"loss": 0.4689,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.2959169156352255,
|
|
"grad_norm": 0.8975405798229933,
|
|
"learning_rate": 8.879973123583041e-06,
|
|
"loss": 0.4742,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.2981931996016503,
|
|
"grad_norm": 0.8375787090323347,
|
|
"learning_rate": 8.854793677841878e-06,
|
|
"loss": 0.4679,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.3004694835680751,
|
|
"grad_norm": 0.7641222076424216,
|
|
"learning_rate": 8.829370885508538e-06,
|
|
"loss": 0.4668,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.30274576753449994,
|
|
"grad_norm": 0.7968284482238742,
|
|
"learning_rate": 8.803706351480819e-06,
|
|
"loss": 0.4621,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.30502205150092476,
|
|
"grad_norm": 0.7504856443013833,
|
|
"learning_rate": 8.777801695917257e-06,
|
|
"loss": 0.4638,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.3072983354673495,
|
|
"grad_norm": 0.7386127839372575,
|
|
"learning_rate": 8.751658554134861e-06,
|
|
"loss": 0.472,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.30957461943377435,
|
|
"grad_norm": 0.7519795926557973,
|
|
"learning_rate": 8.725278576505865e-06,
|
|
"loss": 0.463,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.31185090340019916,
|
|
"grad_norm": 0.6860694902316768,
|
|
"learning_rate": 8.698663428353551e-06,
|
|
"loss": 0.469,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.314127187366624,
|
|
"grad_norm": 0.84686290617533,
|
|
"learning_rate": 8.671814789847116e-06,
|
|
"loss": 0.4727,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.3164034713330488,
|
|
"grad_norm": 0.7119241552397302,
|
|
"learning_rate": 8.64473435589561e-06,
|
|
"loss": 0.4706,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.3186797552994736,
|
|
"grad_norm": 0.7745989228725879,
|
|
"learning_rate": 8.617423836040937e-06,
|
|
"loss": 0.4679,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.32095603926589844,
|
|
"grad_norm": 0.7227304748917153,
|
|
"learning_rate": 8.589884954349928e-06,
|
|
"loss": 0.4738,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.32323232323232326,
|
|
"grad_norm": 1.228623655278939,
|
|
"learning_rate": 8.562119449305517e-06,
|
|
"loss": 0.4648,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.325508607198748,
|
|
"grad_norm": 0.6887447415599827,
|
|
"learning_rate": 8.534129073696984e-06,
|
|
"loss": 0.4707,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.32778489116517284,
|
|
"grad_norm": 0.7108435007854059,
|
|
"learning_rate": 8.505915594509304e-06,
|
|
"loss": 0.4633,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.33006117513159766,
|
|
"grad_norm": 0.7768742017040517,
|
|
"learning_rate": 8.477480792811607e-06,
|
|
"loss": 0.466,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.3323374590980225,
|
|
"grad_norm": 0.9552934171248785,
|
|
"learning_rate": 8.448826463644733e-06,
|
|
"loss": 0.4615,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.3346137430644473,
|
|
"grad_norm": 0.7969014798994358,
|
|
"learning_rate": 8.419954415907925e-06,
|
|
"loss": 0.4685,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.3368900270308721,
|
|
"grad_norm": 0.8468048307712729,
|
|
"learning_rate": 8.390866472244624e-06,
|
|
"loss": 0.4599,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.33916631099729694,
|
|
"grad_norm": 0.8691044067047762,
|
|
"learning_rate": 8.36156446892742e-06,
|
|
"loss": 0.4722,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.3414425949637217,
|
|
"grad_norm": 0.9380457631209852,
|
|
"learning_rate": 8.332050255742126e-06,
|
|
"loss": 0.4741,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.3437188789301465,
|
|
"grad_norm": 0.8859250973757697,
|
|
"learning_rate": 8.302325695871e-06,
|
|
"loss": 0.4621,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.34599516289657134,
|
|
"grad_norm": 0.7488919862243867,
|
|
"learning_rate": 8.272392665775132e-06,
|
|
"loss": 0.4604,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.34827144686299616,
|
|
"grad_norm": 0.9715182317776861,
|
|
"learning_rate": 8.242253055075989e-06,
|
|
"loss": 0.463,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.350547730829421,
|
|
"grad_norm": 0.9442966262864598,
|
|
"learning_rate": 8.211908766436114e-06,
|
|
"loss": 0.4599,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.3528240147958458,
|
|
"grad_norm": 0.885636304605366,
|
|
"learning_rate": 8.181361715439023e-06,
|
|
"loss": 0.4753,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.3551002987622706,
|
|
"grad_norm": 0.8187381110333617,
|
|
"learning_rate": 8.15061383046828e-06,
|
|
"loss": 0.468,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.3573765827286954,
|
|
"grad_norm": 0.7537652164973274,
|
|
"learning_rate": 8.119667052585753e-06,
|
|
"loss": 0.4591,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.3596528666951202,
|
|
"grad_norm": 0.8597815188650263,
|
|
"learning_rate": 8.088523335409086e-06,
|
|
"loss": 0.4562,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.361929150661545,
|
|
"grad_norm": 0.745664972159197,
|
|
"learning_rate": 8.057184644988363e-06,
|
|
"loss": 0.4603,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.36420543462796984,
|
|
"grad_norm": 0.8238816020218861,
|
|
"learning_rate": 8.025652959682004e-06,
|
|
"loss": 0.4677,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.36648171859439466,
|
|
"grad_norm": 0.7416721413551182,
|
|
"learning_rate": 7.993930270031863e-06,
|
|
"loss": 0.4619,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.3687580025608195,
|
|
"grad_norm": 0.7143674127418036,
|
|
"learning_rate": 7.962018578637578e-06,
|
|
"loss": 0.4629,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.3710342865272443,
|
|
"grad_norm": 0.7454781957945812,
|
|
"learning_rate": 7.929919900030147e-06,
|
|
"loss": 0.4645,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.37331057049366906,
|
|
"grad_norm": 0.763351971097948,
|
|
"learning_rate": 7.897636260544752e-06,
|
|
"loss": 0.4619,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.3755868544600939,
|
|
"grad_norm": 0.7405410645904156,
|
|
"learning_rate": 7.865169698192842e-06,
|
|
"loss": 0.4628,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.3778631384265187,
|
|
"grad_norm": 0.7347060655230769,
|
|
"learning_rate": 7.832522262533481e-06,
|
|
"loss": 0.4649,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.3801394223929435,
|
|
"grad_norm": 0.7023247967552008,
|
|
"learning_rate": 7.799696014543949e-06,
|
|
"loss": 0.4593,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.38241570635936833,
|
|
"grad_norm": 0.9339187019341834,
|
|
"learning_rate": 7.766693026489655e-06,
|
|
"loss": 0.4541,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.38469199032579315,
|
|
"grad_norm": 0.7809209531769312,
|
|
"learning_rate": 7.733515381793305e-06,
|
|
"loss": 0.4653,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.386968274292218,
|
|
"grad_norm": 0.8161647769578234,
|
|
"learning_rate": 7.70016517490338e-06,
|
|
"loss": 0.4643,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.3892445582586428,
|
|
"grad_norm": 0.8219287065471175,
|
|
"learning_rate": 7.666644511161925e-06,
|
|
"loss": 0.4573,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.39152084222506756,
|
|
"grad_norm": 0.730307940342026,
|
|
"learning_rate": 7.632955506671633e-06,
|
|
"loss": 0.4587,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.3937971261914924,
|
|
"grad_norm": 0.7616849447054718,
|
|
"learning_rate": 7.599100288162267e-06,
|
|
"loss": 0.462,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.3960734101579172,
|
|
"grad_norm": 0.8087396986104316,
|
|
"learning_rate": 7.565080992856393e-06,
|
|
"loss": 0.4626,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.398349694124342,
|
|
"grad_norm": 0.725916588847792,
|
|
"learning_rate": 7.530899768334476e-06,
|
|
"loss": 0.4679,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.40062597809076683,
|
|
"grad_norm": 0.8356414931081636,
|
|
"learning_rate": 7.496558772399289e-06,
|
|
"loss": 0.4562,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.40290226205719165,
|
|
"grad_norm": 0.8552053129727125,
|
|
"learning_rate": 7.462060172939711e-06,
|
|
"loss": 0.4593,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.40517854602361647,
|
|
"grad_norm": 0.7085366390905654,
|
|
"learning_rate": 7.427406147793861e-06,
|
|
"loss": 0.4641,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.40745482999004123,
|
|
"grad_norm": 0.858819978106255,
|
|
"learning_rate": 7.392598884611617e-06,
|
|
"loss": 0.4595,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.40973111395646605,
|
|
"grad_norm": 0.8915575128268939,
|
|
"learning_rate": 7.357640580716516e-06,
|
|
"loss": 0.4609,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.4120073979228909,
|
|
"grad_norm": 0.8410874583168452,
|
|
"learning_rate": 7.32253344296704e-06,
|
|
"loss": 0.4519,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.4142836818893157,
|
|
"grad_norm": 0.7553762047323214,
|
|
"learning_rate": 7.2872796876173e-06,
|
|
"loss": 0.4509,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.4165599658557405,
|
|
"grad_norm": 0.7579109862136422,
|
|
"learning_rate": 7.251881540177125e-06,
|
|
"loss": 0.4639,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.41883624982216533,
|
|
"grad_norm": 0.8596157431482481,
|
|
"learning_rate": 7.2163412352715745e-06,
|
|
"loss": 0.4665,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.42111253378859015,
|
|
"grad_norm": 0.8318429397770328,
|
|
"learning_rate": 7.180661016499868e-06,
|
|
"loss": 0.46,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.4233888177550149,
|
|
"grad_norm": 0.6921114545354697,
|
|
"learning_rate": 7.144843136293746e-06,
|
|
"loss": 0.4578,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.42566510172143973,
|
|
"grad_norm": 0.6714500243037865,
|
|
"learning_rate": 7.108889855775289e-06,
|
|
"loss": 0.4507,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.42794138568786455,
|
|
"grad_norm": 0.7710332319490364,
|
|
"learning_rate": 7.0728034446141654e-06,
|
|
"loss": 0.4571,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.43021766965428937,
|
|
"grad_norm": 0.8446602836780411,
|
|
"learning_rate": 7.036586180884357e-06,
|
|
"loss": 0.4707,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.4324939536207142,
|
|
"grad_norm": 0.7674212691654148,
|
|
"learning_rate": 7.000240350920344e-06,
|
|
"loss": 0.462,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.434770237587139,
|
|
"grad_norm": 0.8556511437021528,
|
|
"learning_rate": 6.96376824917278e-06,
|
|
"loss": 0.4533,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.4370465215535638,
|
|
"grad_norm": 0.8078502008317322,
|
|
"learning_rate": 6.927172178063636e-06,
|
|
"loss": 0.4608,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.43932280551998865,
|
|
"grad_norm": 0.7150517188869988,
|
|
"learning_rate": 6.890454447840862e-06,
|
|
"loss": 0.4535,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.4415990894864134,
|
|
"grad_norm": 0.7523733013230715,
|
|
"learning_rate": 6.853617376432542e-06,
|
|
"loss": 0.4624,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.44387537345283823,
|
|
"grad_norm": 0.9728144367431893,
|
|
"learning_rate": 6.816663289300567e-06,
|
|
"loss": 0.4597,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.44615165741926305,
|
|
"grad_norm": 0.7177459968030785,
|
|
"learning_rate": 6.779594519293833e-06,
|
|
"loss": 0.4607,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.44842794138568787,
|
|
"grad_norm": 0.8088528853225262,
|
|
"learning_rate": 6.742413406500967e-06,
|
|
"loss": 0.4629,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.4507042253521127,
|
|
"grad_norm": 0.9175593683030001,
|
|
"learning_rate": 6.705122298102611e-06,
|
|
"loss": 0.4636,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.4529805093185375,
|
|
"grad_norm": 0.7621550740057926,
|
|
"learning_rate": 6.667723548223241e-06,
|
|
"loss": 0.4704,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.4552567932849623,
|
|
"grad_norm": 0.7894831335092004,
|
|
"learning_rate": 6.630219517782557e-06,
|
|
"loss": 0.4665,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.4575330772513871,
|
|
"grad_norm": 0.7289053283342039,
|
|
"learning_rate": 6.592612574346442e-06,
|
|
"loss": 0.4496,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.4598093612178119,
|
|
"grad_norm": 0.7268790659257058,
|
|
"learning_rate": 6.554905091977506e-06,
|
|
"loss": 0.4529,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.4620856451842367,
|
|
"grad_norm": 0.7436765967755353,
|
|
"learning_rate": 6.5170994510852035e-06,
|
|
"loss": 0.4548,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.46436192915066155,
|
|
"grad_norm": 0.7695864257948672,
|
|
"learning_rate": 6.479198038275578e-06,
|
|
"loss": 0.4539,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.46663821311708636,
|
|
"grad_norm": 0.8295487862322445,
|
|
"learning_rate": 6.441203246200587e-06,
|
|
"loss": 0.4634,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.4689144970835112,
|
|
"grad_norm": 0.8241912559218441,
|
|
"learning_rate": 6.403117473407065e-06,
|
|
"loss": 0.4496,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.471190781049936,
|
|
"grad_norm": 0.7076343515274734,
|
|
"learning_rate": 6.364943124185308e-06,
|
|
"loss": 0.4497,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.47346706501636077,
|
|
"grad_norm": 0.8310935730693106,
|
|
"learning_rate": 6.3266826084172835e-06,
|
|
"loss": 0.4648,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.4757433489827856,
|
|
"grad_norm": 0.7704280812181517,
|
|
"learning_rate": 6.288338341424515e-06,
|
|
"loss": 0.455,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.4780196329492104,
|
|
"grad_norm": 0.8181658490570038,
|
|
"learning_rate": 6.249912743815595e-06,
|
|
"loss": 0.4596,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.4802959169156352,
|
|
"grad_norm": 0.7431508637124334,
|
|
"learning_rate": 6.211408241333379e-06,
|
|
"loss": 0.4538,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.48257220088206004,
|
|
"grad_norm": 0.709619776250267,
|
|
"learning_rate": 6.172827264701857e-06,
|
|
"loss": 0.4537,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.48484848484848486,
|
|
"grad_norm": 0.8223387777060188,
|
|
"learning_rate": 6.134172249472702e-06,
|
|
"loss": 0.4551,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.4871247688149097,
|
|
"grad_norm": 0.7768854003850929,
|
|
"learning_rate": 6.095445635871516e-06,
|
|
"loss": 0.4657,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.48940105278133444,
|
|
"grad_norm": 0.769025632798714,
|
|
"learning_rate": 6.0566498686437855e-06,
|
|
"loss": 0.4557,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.49167733674775926,
|
|
"grad_norm": 0.6692137861485721,
|
|
"learning_rate": 6.0177873969005475e-06,
|
|
"loss": 0.4563,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.4939536207141841,
|
|
"grad_norm": 0.8907000112703206,
|
|
"learning_rate": 5.978860673963784e-06,
|
|
"loss": 0.4548,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.4962299046806089,
|
|
"grad_norm": 0.8129551036581305,
|
|
"learning_rate": 5.939872157211545e-06,
|
|
"loss": 0.4501,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.4985061886470337,
|
|
"grad_norm": 0.9050604723863666,
|
|
"learning_rate": 5.900824307922819e-06,
|
|
"loss": 0.4529,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.5007824726134585,
|
|
"grad_norm": 0.9464291168776866,
|
|
"learning_rate": 5.861719591122158e-06,
|
|
"loss": 0.4597,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.5030587565798833,
|
|
"grad_norm": 0.7629842734151758,
|
|
"learning_rate": 5.8225604754240635e-06,
|
|
"loss": 0.4547,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.5053350405463082,
|
|
"grad_norm": 0.737677886868225,
|
|
"learning_rate": 5.783349432877146e-06,
|
|
"loss": 0.4568,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.5076113245127329,
|
|
"grad_norm": 0.7440218389833005,
|
|
"learning_rate": 5.744088938808068e-06,
|
|
"loss": 0.4554,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.5098876084791578,
|
|
"grad_norm": 0.8000074715652351,
|
|
"learning_rate": 5.70478147166529e-06,
|
|
"loss": 0.4671,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.5121638924455826,
|
|
"grad_norm": 0.9238644399016241,
|
|
"learning_rate": 5.665429512862597e-06,
|
|
"loss": 0.4574,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.5144401764120073,
|
|
"grad_norm": 0.758841769369074,
|
|
"learning_rate": 5.626035546622457e-06,
|
|
"loss": 0.4558,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.5167164603784322,
|
|
"grad_norm": 0.7971224800656472,
|
|
"learning_rate": 5.586602059819199e-06,
|
|
"loss": 0.4496,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.518992744344857,
|
|
"grad_norm": 0.9171620412959115,
|
|
"learning_rate": 5.547131541822018e-06,
|
|
"loss": 0.4558,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.5212690283112819,
|
|
"grad_norm": 0.7842020256066858,
|
|
"learning_rate": 5.5076264843378225e-06,
|
|
"loss": 0.4527,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.5235453122777066,
|
|
"grad_norm": 0.9188201380044063,
|
|
"learning_rate": 5.4680893812539436e-06,
|
|
"loss": 0.4608,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.5258215962441315,
|
|
"grad_norm": 0.7861154037939578,
|
|
"learning_rate": 5.428522728480697e-06,
|
|
"loss": 0.4523,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.5280978802105563,
|
|
"grad_norm": 0.7920300857523709,
|
|
"learning_rate": 5.388929023793817e-06,
|
|
"loss": 0.4568,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.5303741641769811,
|
|
"grad_norm": 0.7612825596142501,
|
|
"learning_rate": 5.349310766676781e-06,
|
|
"loss": 0.4483,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.5326504481434059,
|
|
"grad_norm": 0.7537687250775554,
|
|
"learning_rate": 5.3096704581630195e-06,
|
|
"loss": 0.4563,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.5349267321098307,
|
|
"grad_norm": 0.751390092998076,
|
|
"learning_rate": 5.270010600678034e-06,
|
|
"loss": 0.4578,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.5372030160762555,
|
|
"grad_norm": 0.8063126059500658,
|
|
"learning_rate": 5.230333697881413e-06,
|
|
"loss": 0.4424,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.5394793000426803,
|
|
"grad_norm": 0.7268784420755078,
|
|
"learning_rate": 5.190642254508789e-06,
|
|
"loss": 0.4488,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.5417555840091052,
|
|
"grad_norm": 0.8219467384704279,
|
|
"learning_rate": 5.15093877621372e-06,
|
|
"loss": 0.4443,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.5440318679755299,
|
|
"grad_norm": 0.9341715266763854,
|
|
"learning_rate": 5.111225769409505e-06,
|
|
"loss": 0.4563,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.5463081519419548,
|
|
"grad_norm": 0.8890086015346076,
|
|
"learning_rate": 5.071505741110958e-06,
|
|
"loss": 0.4531,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.5485844359083796,
|
|
"grad_norm": 0.7859192247678671,
|
|
"learning_rate": 5.031781198776157e-06,
|
|
"loss": 0.4448,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.5508607198748043,
|
|
"grad_norm": 0.8457709944734434,
|
|
"learning_rate": 4.9920546501481355e-06,
|
|
"loss": 0.4502,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.5531370038412292,
|
|
"grad_norm": 0.777261473128808,
|
|
"learning_rate": 4.952328603096588e-06,
|
|
"loss": 0.4493,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.555413287807654,
|
|
"grad_norm": 0.7489538278905294,
|
|
"learning_rate": 4.912605565459537e-06,
|
|
"loss": 0.4532,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.5576895717740789,
|
|
"grad_norm": 0.7471858030987701,
|
|
"learning_rate": 4.872888044885031e-06,
|
|
"loss": 0.4662,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.5599658557405036,
|
|
"grad_norm": 0.76997073617317,
|
|
"learning_rate": 4.833178548672836e-06,
|
|
"loss": 0.449,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.5622421397069285,
|
|
"grad_norm": 0.8114769999661829,
|
|
"learning_rate": 4.793479583616152e-06,
|
|
"loss": 0.4511,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.5645184236733533,
|
|
"grad_norm": 0.7887812825481647,
|
|
"learning_rate": 4.753793655843362e-06,
|
|
"loss": 0.4531,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.566794707639778,
|
|
"grad_norm": 0.8266580905214915,
|
|
"learning_rate": 4.714123270659836e-06,
|
|
"loss": 0.4499,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.5690709916062029,
|
|
"grad_norm": 0.9772993025496673,
|
|
"learning_rate": 4.674470932389759e-06,
|
|
"loss": 0.462,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.5713472755726277,
|
|
"grad_norm": 0.7550741577854698,
|
|
"learning_rate": 4.634839144218047e-06,
|
|
"loss": 0.4424,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.5736235595390525,
|
|
"grad_norm": 0.8265913157075914,
|
|
"learning_rate": 4.595230408032324e-06,
|
|
"loss": 0.4468,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.5758998435054773,
|
|
"grad_norm": 0.7723721397391996,
|
|
"learning_rate": 4.555647224264978e-06,
|
|
"loss": 0.4448,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.5781761274719022,
|
|
"grad_norm": 0.7438678281440869,
|
|
"learning_rate": 4.516092091735324e-06,
|
|
"loss": 0.4537,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.5804524114383269,
|
|
"grad_norm": 0.827117915360568,
|
|
"learning_rate": 4.47656750749184e-06,
|
|
"loss": 0.4558,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.5827286954047517,
|
|
"grad_norm": 0.7273943037424042,
|
|
"learning_rate": 4.4370759666545495e-06,
|
|
"loss": 0.4444,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.5850049793711766,
|
|
"grad_norm": 0.7703519904997088,
|
|
"learning_rate": 4.397619962257498e-06,
|
|
"loss": 0.4481,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.5872812633376013,
|
|
"grad_norm": 0.803886348953792,
|
|
"learning_rate": 4.3582019850913796e-06,
|
|
"loss": 0.4487,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.5895575473040262,
|
|
"grad_norm": 0.7300835048061479,
|
|
"learning_rate": 4.3188245235462865e-06,
|
|
"loss": 0.4446,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.591833831270451,
|
|
"grad_norm": 0.8101791772935961,
|
|
"learning_rate": 4.2794900634546385e-06,
|
|
"loss": 0.4553,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.5941101152368758,
|
|
"grad_norm": 0.8207823650264575,
|
|
"learning_rate": 4.240201087934238e-06,
|
|
"loss": 0.4511,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.5963863992033006,
|
|
"grad_norm": 0.8176342960234186,
|
|
"learning_rate": 4.200960077231528e-06,
|
|
"loss": 0.4425,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.5986626831697254,
|
|
"grad_norm": 0.916935818899542,
|
|
"learning_rate": 4.161769508565012e-06,
|
|
"loss": 0.4442,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.6009389671361502,
|
|
"grad_norm": 0.7421424964297176,
|
|
"learning_rate": 4.122631855968873e-06,
|
|
"loss": 0.4509,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.603215251102575,
|
|
"grad_norm": 0.9115282778731496,
|
|
"learning_rate": 4.0835495901367955e-06,
|
|
"loss": 0.455,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.6054915350689999,
|
|
"grad_norm": 0.7816827264699414,
|
|
"learning_rate": 4.0445251782659875e-06,
|
|
"loss": 0.4381,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.6077678190354247,
|
|
"grad_norm": 2.194076497168195,
|
|
"learning_rate": 4.005561083901434e-06,
|
|
"loss": 0.4447,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.6100441030018495,
|
|
"grad_norm": 0.7573191196887984,
|
|
"learning_rate": 3.966659766780383e-06,
|
|
"loss": 0.4446,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.6123203869682743,
|
|
"grad_norm": 0.7687574770334604,
|
|
"learning_rate": 3.927823682677057e-06,
|
|
"loss": 0.4496,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.614596670934699,
|
|
"grad_norm": 0.9358386363240805,
|
|
"learning_rate": 3.889055283247628e-06,
|
|
"loss": 0.4568,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.6168729549011239,
|
|
"grad_norm": 0.7314708296261042,
|
|
"learning_rate": 3.850357015875456e-06,
|
|
"loss": 0.4446,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.6191492388675487,
|
|
"grad_norm": 0.8070457903866413,
|
|
"learning_rate": 3.8117313235165754e-06,
|
|
"loss": 0.4521,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.6214255228339736,
|
|
"grad_norm": 0.7257119700751845,
|
|
"learning_rate": 3.7731806445454856e-06,
|
|
"loss": 0.4427,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.6237018068003983,
|
|
"grad_norm": 0.7975212623445046,
|
|
"learning_rate": 3.7347074126012195e-06,
|
|
"loss": 0.4477,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.6259780907668232,
|
|
"grad_norm": 0.7714694828216863,
|
|
"learning_rate": 3.6963140564337074e-06,
|
|
"loss": 0.4538,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.628254374733248,
|
|
"grad_norm": 0.800630933191912,
|
|
"learning_rate": 3.658002999750462e-06,
|
|
"loss": 0.446,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.6305306586996727,
|
|
"grad_norm": 0.7408683914326505,
|
|
"learning_rate": 3.6197766610635656e-06,
|
|
"loss": 0.446,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.6328069426660976,
|
|
"grad_norm": 0.7609135405893662,
|
|
"learning_rate": 3.5816374535369934e-06,
|
|
"loss": 0.4416,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.6350832266325224,
|
|
"grad_norm": 0.8061635946819576,
|
|
"learning_rate": 3.543587784834288e-06,
|
|
"loss": 0.4385,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.6373595105989472,
|
|
"grad_norm": 0.8207487764566586,
|
|
"learning_rate": 3.5056300569665503e-06,
|
|
"loss": 0.4443,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.639635794565372,
|
|
"grad_norm": 0.7780495002255654,
|
|
"learning_rate": 3.4677666661408096e-06,
|
|
"loss": 0.4393,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.6419120785317969,
|
|
"grad_norm": 0.820529174759461,
|
|
"learning_rate": 3.4300000026087664e-06,
|
|
"loss": 0.448,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.6441883624982216,
|
|
"grad_norm": 0.7890561874485035,
|
|
"learning_rate": 3.392332450515886e-06,
|
|
"loss": 0.4489,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.6464646464646465,
|
|
"grad_norm": 0.8198574552313013,
|
|
"learning_rate": 3.3547663877508928e-06,
|
|
"loss": 0.4496,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.6487409304310713,
|
|
"grad_norm": 0.8209595374102931,
|
|
"learning_rate": 3.3173041857956716e-06,
|
|
"loss": 0.442,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.651017214397496,
|
|
"grad_norm": 0.8174067144646198,
|
|
"learning_rate": 3.2799482095755424e-06,
|
|
"loss": 0.4447,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.6532934983639209,
|
|
"grad_norm": 0.7098988026439182,
|
|
"learning_rate": 3.242700817309976e-06,
|
|
"loss": 0.4429,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.6555697823303457,
|
|
"grad_norm": 0.9250724312431224,
|
|
"learning_rate": 3.205564360363724e-06,
|
|
"loss": 0.4508,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.6578460662967706,
|
|
"grad_norm": 0.9050078757133033,
|
|
"learning_rate": 3.168541183098378e-06,
|
|
"loss": 0.447,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.6601223502631953,
|
|
"grad_norm": 0.7647575780260846,
|
|
"learning_rate": 3.131633622724377e-06,
|
|
"loss": 0.4521,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.6623986342296202,
|
|
"grad_norm": 0.749840221225747,
|
|
"learning_rate": 3.0948440091534594e-06,
|
|
"loss": 0.438,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.664674918196045,
|
|
"grad_norm": 0.7184423492925078,
|
|
"learning_rate": 3.058174664851582e-06,
|
|
"loss": 0.4465,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.6669512021624697,
|
|
"grad_norm": 0.7788843341529811,
|
|
"learning_rate": 3.0216279046923084e-06,
|
|
"loss": 0.4427,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.6692274861288946,
|
|
"grad_norm": 0.795166169097631,
|
|
"learning_rate": 2.9852060358106717e-06,
|
|
"loss": 0.4438,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.6715037700953194,
|
|
"grad_norm": 0.8307138251372255,
|
|
"learning_rate": 2.9489113574575272e-06,
|
|
"loss": 0.4467,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.6737800540617442,
|
|
"grad_norm": 0.7663349953487524,
|
|
"learning_rate": 2.912746160854417e-06,
|
|
"loss": 0.4491,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.676056338028169,
|
|
"grad_norm": 0.7838493815092107,
|
|
"learning_rate": 2.8767127290489084e-06,
|
|
"loss": 0.438,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.6783326219945939,
|
|
"grad_norm": 0.791824009698266,
|
|
"learning_rate": 2.840813336770487e-06,
|
|
"loss": 0.4372,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.6806089059610186,
|
|
"grad_norm": 0.73879969079544,
|
|
"learning_rate": 2.805050250286949e-06,
|
|
"loss": 0.4514,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.6828851899274434,
|
|
"grad_norm": 0.8099909989978515,
|
|
"learning_rate": 2.769425727261339e-06,
|
|
"loss": 0.4537,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.6851614738938683,
|
|
"grad_norm": 0.7590973946118406,
|
|
"learning_rate": 2.7339420166094183e-06,
|
|
"loss": 0.4463,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.687437757860293,
|
|
"grad_norm": 0.796201063821709,
|
|
"learning_rate": 2.6986013583577083e-06,
|
|
"loss": 0.4397,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.6897140418267179,
|
|
"grad_norm": 0.7050687084934512,
|
|
"learning_rate": 2.6634059835020733e-06,
|
|
"loss": 0.4268,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.6919903257931427,
|
|
"grad_norm": 1.0187421881981598,
|
|
"learning_rate": 2.628358113866881e-06,
|
|
"loss": 0.4438,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.6942666097595676,
|
|
"grad_norm": 0.8443744661543358,
|
|
"learning_rate": 2.5934599619647495e-06,
|
|
"loss": 0.4512,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.6965428937259923,
|
|
"grad_norm": 0.7864339330637931,
|
|
"learning_rate": 2.558713730856862e-06,
|
|
"loss": 0.4372,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.6988191776924171,
|
|
"grad_norm": 0.8039029994902843,
|
|
"learning_rate": 2.524121614013906e-06,
|
|
"loss": 0.447,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.701095461658842,
|
|
"grad_norm": 0.8554560338951394,
|
|
"learning_rate": 2.4896857951775973e-06,
|
|
"loss": 0.4418,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.7033717456252667,
|
|
"grad_norm": 0.7789235728757384,
|
|
"learning_rate": 2.455408448222814e-06,
|
|
"loss": 0.4428,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.7056480295916916,
|
|
"grad_norm": 0.8390767483792194,
|
|
"learning_rate": 2.4212917370203877e-06,
|
|
"loss": 0.4513,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.7079243135581164,
|
|
"grad_norm": 0.7423511083655429,
|
|
"learning_rate": 2.3873378153004736e-06,
|
|
"loss": 0.4415,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.7102005975245412,
|
|
"grad_norm": 0.7424046071658116,
|
|
"learning_rate": 2.3535488265166095e-06,
|
|
"loss": 0.4293,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.712476881490966,
|
|
"grad_norm": 0.7456496661301177,
|
|
"learning_rate": 2.319926903710398e-06,
|
|
"loss": 0.4438,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.7147531654573908,
|
|
"grad_norm": 0.8278781199522129,
|
|
"learning_rate": 2.2864741693768423e-06,
|
|
"loss": 0.4387,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.7170294494238156,
|
|
"grad_norm": 0.8116784082715538,
|
|
"learning_rate": 2.253192735330371e-06,
|
|
"loss": 0.4462,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.7193057333902404,
|
|
"grad_norm": 0.7004955096514237,
|
|
"learning_rate": 2.2200847025715142e-06,
|
|
"loss": 0.4398,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.7215820173566653,
|
|
"grad_norm": 0.7367447404574639,
|
|
"learning_rate": 2.1871521611542705e-06,
|
|
"loss": 0.4475,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.72385830132309,
|
|
"grad_norm": 0.7799543504096647,
|
|
"learning_rate": 2.1543971900541722e-06,
|
|
"loss": 0.443,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.7261345852895149,
|
|
"grad_norm": 0.8090558026400204,
|
|
"learning_rate": 2.1218218570370303e-06,
|
|
"loss": 0.4449,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.7284108692559397,
|
|
"grad_norm": 0.8879224467129067,
|
|
"learning_rate": 2.0894282185284147e-06,
|
|
"loss": 0.4484,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.7306871532223644,
|
|
"grad_norm": 0.7328095140462628,
|
|
"learning_rate": 2.057218319483828e-06,
|
|
"loss": 0.4414,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.7329634371887893,
|
|
"grad_norm": 0.7572339184999409,
|
|
"learning_rate": 2.0251941932596115e-06,
|
|
"loss": 0.4372,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.7352397211552141,
|
|
"grad_norm": 0.7491266549650365,
|
|
"learning_rate": 1.9933578614845784e-06,
|
|
"loss": 0.4393,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.737516005121639,
|
|
"grad_norm": 0.821203801856231,
|
|
"learning_rate": 1.961711333932407e-06,
|
|
"loss": 0.4507,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.7397922890880637,
|
|
"grad_norm": 0.8213618929354572,
|
|
"learning_rate": 1.930256608394747e-06,
|
|
"loss": 0.4404,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.7420685730544886,
|
|
"grad_norm": 0.7809128990538117,
|
|
"learning_rate": 1.898995670555112e-06,
|
|
"loss": 0.4338,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.7443448570209134,
|
|
"grad_norm": 0.7867605315635634,
|
|
"learning_rate": 1.8679304938635373e-06,
|
|
"loss": 0.4481,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.7466211409873381,
|
|
"grad_norm": 0.7482999924845694,
|
|
"learning_rate": 1.8370630394119742e-06,
|
|
"loss": 0.4343,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.748897424953763,
|
|
"grad_norm": 0.8060085030283564,
|
|
"learning_rate": 1.806395255810518e-06,
|
|
"loss": 0.4377,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.7511737089201878,
|
|
"grad_norm": 0.8755665289100689,
|
|
"learning_rate": 1.7759290790643696e-06,
|
|
"loss": 0.4451,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.7534499928866126,
|
|
"grad_norm": 0.8120416476683848,
|
|
"learning_rate": 1.745666432451638e-06,
|
|
"loss": 0.4387,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.7557262768530374,
|
|
"grad_norm": 0.8156630900998141,
|
|
"learning_rate": 1.7156092264019198e-06,
|
|
"loss": 0.4361,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.7580025608194623,
|
|
"grad_norm": 0.7998270545611499,
|
|
"learning_rate": 1.6857593583756915e-06,
|
|
"loss": 0.448,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.760278844785887,
|
|
"grad_norm": 0.8207266172010161,
|
|
"learning_rate": 1.6561187127445367e-06,
|
|
"loss": 0.4417,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.7625551287523119,
|
|
"grad_norm": 0.8067253437276407,
|
|
"learning_rate": 1.626689160672182e-06,
|
|
"loss": 0.4476,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.7648314127187367,
|
|
"grad_norm": 0.8853629851991137,
|
|
"learning_rate": 1.5974725599963776e-06,
|
|
"loss": 0.4325,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.7671076966851614,
|
|
"grad_norm": 0.921124886395691,
|
|
"learning_rate": 1.5684707551116074e-06,
|
|
"loss": 0.4385,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.7693839806515863,
|
|
"grad_norm": 0.9059423563712878,
|
|
"learning_rate": 1.5396855768526664e-06,
|
|
"loss": 0.4441,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.7716602646180111,
|
|
"grad_norm": 0.7726314288351178,
|
|
"learning_rate": 1.5111188423790773e-06,
|
|
"loss": 0.4367,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.773936548584436,
|
|
"grad_norm": 0.8371458674118885,
|
|
"learning_rate": 1.4827723550603706e-06,
|
|
"loss": 0.4494,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.7762128325508607,
|
|
"grad_norm": 0.9280929650984211,
|
|
"learning_rate": 1.4546479043622592e-06,
|
|
"loss": 0.4363,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.7784891165172856,
|
|
"grad_norm": 0.8794307651372741,
|
|
"learning_rate": 1.4267472657336473e-06,
|
|
"loss": 0.4398,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.7807654004837103,
|
|
"grad_norm": 0.8239130127708325,
|
|
"learning_rate": 1.3990722004945705e-06,
|
|
"loss": 0.4418,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.7830416844501351,
|
|
"grad_norm": 0.790534002165955,
|
|
"learning_rate": 1.371624455724998e-06,
|
|
"loss": 0.4457,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.78531796841656,
|
|
"grad_norm": 0.821515070101423,
|
|
"learning_rate": 1.3444057641545377e-06,
|
|
"loss": 0.4446,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.7875942523829847,
|
|
"grad_norm": 0.7821344445262979,
|
|
"learning_rate": 1.317417844053066e-06,
|
|
"loss": 0.4326,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.7898705363494096,
|
|
"grad_norm": 0.7672443240766755,
|
|
"learning_rate": 1.2906623991222384e-06,
|
|
"loss": 0.4392,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.7921468203158344,
|
|
"grad_norm": 0.8085724525323776,
|
|
"learning_rate": 1.2641411183879527e-06,
|
|
"loss": 0.4368,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.7944231042822593,
|
|
"grad_norm": 0.8245898899691535,
|
|
"learning_rate": 1.2378556760937172e-06,
|
|
"loss": 0.4383,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.796699388248684,
|
|
"grad_norm": 0.9491912069759774,
|
|
"learning_rate": 1.2118077315949555e-06,
|
|
"loss": 0.4433,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.7989756722151088,
|
|
"grad_norm": 0.766723308181965,
|
|
"learning_rate": 1.1859989292542617e-06,
|
|
"loss": 0.4391,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.8012519561815337,
|
|
"grad_norm": 0.9223119446048714,
|
|
"learning_rate": 1.16043089833759e-06,
|
|
"loss": 0.4353,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.8035282401479584,
|
|
"grad_norm": 0.8683331011804727,
|
|
"learning_rate": 1.1351052529114031e-06,
|
|
"loss": 0.4481,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.8058045241143833,
|
|
"grad_norm": 0.8456488216541104,
|
|
"learning_rate": 1.1100235917407749e-06,
|
|
"loss": 0.4423,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.8080808080808081,
|
|
"grad_norm": 0.8435393816637614,
|
|
"learning_rate": 1.0851874981884703e-06,
|
|
"loss": 0.4392,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.8103570920472329,
|
|
"grad_norm": 0.8551429696631416,
|
|
"learning_rate": 1.0605985401149854e-06,
|
|
"loss": 0.4373,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.8126333760136577,
|
|
"grad_norm": 0.8326619193733407,
|
|
"learning_rate": 1.0362582697795736e-06,
|
|
"loss": 0.4403,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.8149096599800825,
|
|
"grad_norm": 0.7750783611846149,
|
|
"learning_rate": 1.012168223742252e-06,
|
|
"loss": 0.4358,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.8171859439465073,
|
|
"grad_norm": 0.74086430166713,
|
|
"learning_rate": 9.883299227667997e-07,
|
|
"loss": 0.4376,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.8194622279129321,
|
|
"grad_norm": 0.8021836339934405,
|
|
"learning_rate": 9.647448717247598e-07,
|
|
"loss": 0.446,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.821738511879357,
|
|
"grad_norm": 0.9512917372490248,
|
|
"learning_rate": 9.414145595004365e-07,
|
|
"loss": 0.4342,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.8240147958457817,
|
|
"grad_norm": 0.8218941611192476,
|
|
"learning_rate": 9.183404588968981e-07,
|
|
"loss": 0.4389,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.8262910798122066,
|
|
"grad_norm": 0.8452290036325213,
|
|
"learning_rate": 8.955240265430182e-07,
|
|
"loss": 0.4352,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.8285673637786314,
|
|
"grad_norm": 0.8478009707657702,
|
|
"learning_rate": 8.729667028014999e-07,
|
|
"loss": 0.4512,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.8308436477450561,
|
|
"grad_norm": 0.9736536719337533,
|
|
"learning_rate": 8.506699116779643e-07,
|
|
"loss": 0.4359,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.833119931711481,
|
|
"grad_norm": 0.7223652613461555,
|
|
"learning_rate": 8.286350607310506e-07,
|
|
"loss": 0.434,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.8353962156779058,
|
|
"grad_norm": 0.7773708358623372,
|
|
"learning_rate": 8.068635409835541e-07,
|
|
"loss": 0.4367,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.8376724996443307,
|
|
"grad_norm": 0.8191260289584852,
|
|
"learning_rate": 7.853567268346212e-07,
|
|
"loss": 0.4427,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.8399487836107554,
|
|
"grad_norm": 0.8476046937498816,
|
|
"learning_rate": 7.641159759729821e-07,
|
|
"loss": 0.439,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.8422250675771803,
|
|
"grad_norm": 0.854034845886361,
|
|
"learning_rate": 7.431426292912414e-07,
|
|
"loss": 0.4458,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.8445013515436051,
|
|
"grad_norm": 0.8815331913866332,
|
|
"learning_rate": 7.224380108012325e-07,
|
|
"loss": 0.4299,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.8467776355100298,
|
|
"grad_norm": 0.7558898242505027,
|
|
"learning_rate": 7.020034275504329e-07,
|
|
"loss": 0.4363,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.8490539194764547,
|
|
"grad_norm": 0.8092910834412402,
|
|
"learning_rate": 6.81840169539451e-07,
|
|
"loss": 0.4374,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.8513302034428795,
|
|
"grad_norm": 0.7669287363471868,
|
|
"learning_rate": 6.619495096405959e-07,
|
|
"loss": 0.4405,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.8536064874093043,
|
|
"grad_norm": 0.794026413586518,
|
|
"learning_rate": 6.423327035175186e-07,
|
|
"loss": 0.447,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.8558827713757291,
|
|
"grad_norm": 0.7408546983770723,
|
|
"learning_rate": 6.229909895459429e-07,
|
|
"loss": 0.4315,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.858159055342154,
|
|
"grad_norm": 0.7237210059574698,
|
|
"learning_rate": 6.039255887354966e-07,
|
|
"loss": 0.4391,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.8604353393085787,
|
|
"grad_norm": 0.9595044766938607,
|
|
"learning_rate": 5.851377046526208e-07,
|
|
"loss": 0.4427,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.8627116232750035,
|
|
"grad_norm": 0.8802588662536729,
|
|
"learning_rate": 5.666285233445978e-07,
|
|
"loss": 0.4447,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.8649879072414284,
|
|
"grad_norm": 0.8249439161204124,
|
|
"learning_rate": 5.483992132646781e-07,
|
|
"loss": 0.4433,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.8672641912078531,
|
|
"grad_norm": 0.7832517731345501,
|
|
"learning_rate": 5.304509251983103e-07,
|
|
"loss": 0.4358,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.869540475174278,
|
|
"grad_norm": 0.7248468716835277,
|
|
"learning_rate": 5.127847921905076e-07,
|
|
"loss": 0.4449,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.8718167591407028,
|
|
"grad_norm": 0.7513659638517941,
|
|
"learning_rate": 4.954019294743045e-07,
|
|
"loss": 0.4448,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.8740930431071277,
|
|
"grad_norm": 0.9357495352200004,
|
|
"learning_rate": 4.783034344003673e-07,
|
|
"loss": 0.4398,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.8763693270735524,
|
|
"grad_norm": 0.7702794579988504,
|
|
"learning_rate": 4.6149038636771337e-07,
|
|
"loss": 0.4396,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.8786456110399773,
|
|
"grad_norm": 0.7237585600700893,
|
|
"learning_rate": 4.449638467555706e-07,
|
|
"loss": 0.4369,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.8809218950064021,
|
|
"grad_norm": 0.7689094322016036,
|
|
"learning_rate": 4.2872485885637803e-07,
|
|
"loss": 0.4419,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.8831981789728268,
|
|
"grad_norm": 0.6768771986551186,
|
|
"learning_rate": 4.1277444780992215e-07,
|
|
"loss": 0.4337,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.8854744629392517,
|
|
"grad_norm": 0.805045121729404,
|
|
"learning_rate": 3.9711362053862115e-07,
|
|
"loss": 0.4284,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.8877507469056765,
|
|
"grad_norm": 0.9172147345939642,
|
|
"learning_rate": 3.817433656839586e-07,
|
|
"loss": 0.4446,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.8900270308721013,
|
|
"grad_norm": 0.817219220444952,
|
|
"learning_rate": 3.6666465354407766e-07,
|
|
"loss": 0.4378,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.8923033148385261,
|
|
"grad_norm": 0.8368161427901388,
|
|
"learning_rate": 3.5187843601252157e-07,
|
|
"loss": 0.4396,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.894579598804951,
|
|
"grad_norm": 0.8185450548355138,
|
|
"learning_rate": 3.373856465181424e-07,
|
|
"loss": 0.4364,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.8968558827713757,
|
|
"grad_norm": 0.7618894593947647,
|
|
"learning_rate": 3.231871999661845e-07,
|
|
"loss": 0.4383,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.8991321667378005,
|
|
"grad_norm": 0.956580014268259,
|
|
"learning_rate": 3.0928399268051247e-07,
|
|
"loss": 0.442,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.9014084507042254,
|
|
"grad_norm": 0.800968878170199,
|
|
"learning_rate": 2.9567690234704295e-07,
|
|
"loss": 0.4395,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.9036847346706501,
|
|
"grad_norm": 0.8086633377655379,
|
|
"learning_rate": 2.8236678795832863e-07,
|
|
"loss": 0.4347,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.905961018637075,
|
|
"grad_norm": 0.7394969702194686,
|
|
"learning_rate": 2.693544897593325e-07,
|
|
"loss": 0.4359,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.9082373026034998,
|
|
"grad_norm": 0.786442599221493,
|
|
"learning_rate": 2.566408291943906e-07,
|
|
"loss": 0.4483,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.9105135865699246,
|
|
"grad_norm": 0.7730250654140295,
|
|
"learning_rate": 2.4422660885534635e-07,
|
|
"loss": 0.4506,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.9127898705363494,
|
|
"grad_norm": 0.7971290026825717,
|
|
"learning_rate": 2.3211261243089255e-07,
|
|
"loss": 0.4351,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.9150661545027742,
|
|
"grad_norm": 0.7956171103006121,
|
|
"learning_rate": 2.2029960465709433e-07,
|
|
"loss": 0.4358,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.917342438469199,
|
|
"grad_norm": 0.7975596769757733,
|
|
"learning_rate": 2.0878833126911135e-07,
|
|
"loss": 0.4429,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.9196187224356238,
|
|
"grad_norm": 0.7720277466201635,
|
|
"learning_rate": 1.9757951895412576e-07,
|
|
"loss": 0.4352,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.9218950064020487,
|
|
"grad_norm": 0.7853839927041535,
|
|
"learning_rate": 1.866738753054631e-07,
|
|
"loss": 0.4551,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.9241712903684735,
|
|
"grad_norm": 0.7825373294798991,
|
|
"learning_rate": 1.7607208877792604e-07,
|
|
"loss": 0.4417,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.9264475743348983,
|
|
"grad_norm": 0.8184549696249286,
|
|
"learning_rate": 1.6577482864432946e-07,
|
|
"loss": 0.4399,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.9287238583013231,
|
|
"grad_norm": 0.7713360616227353,
|
|
"learning_rate": 1.5578274495325618e-07,
|
|
"loss": 0.4329,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.9310001422677479,
|
|
"grad_norm": 0.7499304967763705,
|
|
"learning_rate": 1.4609646848801561e-07,
|
|
"loss": 0.4378,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.9332764262341727,
|
|
"grad_norm": 0.8556735326094547,
|
|
"learning_rate": 1.3671661072682585e-07,
|
|
"loss": 0.4463,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.9355527102005975,
|
|
"grad_norm": 0.7950794399913543,
|
|
"learning_rate": 1.276437638042116e-07,
|
|
"loss": 0.4324,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.9378289941670224,
|
|
"grad_norm": 0.8029386901058022,
|
|
"learning_rate": 1.1887850047362315e-07,
|
|
"loss": 0.4418,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.9401052781334471,
|
|
"grad_norm": 0.8040559392859772,
|
|
"learning_rate": 1.104213740712795e-07,
|
|
"loss": 0.4432,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.942381562099872,
|
|
"grad_norm": 0.7956315398376556,
|
|
"learning_rate": 1.0227291848123932e-07,
|
|
"loss": 0.4443,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.9446578460662968,
|
|
"grad_norm": 0.8316381693144498,
|
|
"learning_rate": 9.443364810169331e-08,
|
|
"loss": 0.4479,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.9469341300327215,
|
|
"grad_norm": 0.7160374953454977,
|
|
"learning_rate": 8.690405781249745e-08,
|
|
"loss": 0.4394,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.9492104139991464,
|
|
"grad_norm": 1.1619693324211688,
|
|
"learning_rate": 7.96846229439241e-08,
|
|
"loss": 0.4391,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.9514866979655712,
|
|
"grad_norm": 0.7807355700189923,
|
|
"learning_rate": 7.277579924666322e-08,
|
|
"loss": 0.4431,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.953762981931996,
|
|
"grad_norm": 0.799461080581998,
|
|
"learning_rate": 6.617802286304597e-08,
|
|
"loss": 0.4465,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.9560392658984208,
|
|
"grad_norm": 0.8615155608414043,
|
|
"learning_rate": 5.989171029951446e-08,
|
|
"loss": 0.4545,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.9583155498648457,
|
|
"grad_norm": 1.7829375204373485,
|
|
"learning_rate": 5.391725840032724e-08,
|
|
"loss": 0.4361,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.9605918338312704,
|
|
"grad_norm": 0.8158290333293733,
|
|
"learning_rate": 4.8255044322507714e-08,
|
|
"loss": 0.4319,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.9628681177976952,
|
|
"grad_norm": 0.7479808786166098,
|
|
"learning_rate": 4.290542551203536e-08,
|
|
"loss": 0.4452,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.9651444017641201,
|
|
"grad_norm": 0.823075917374607,
|
|
"learning_rate": 3.7868739681278796e-08,
|
|
"loss": 0.4395,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.9674206857305448,
|
|
"grad_norm": 0.8193285422862143,
|
|
"learning_rate": 3.314530478768008e-08,
|
|
"loss": 0.4378,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.9696969696969697,
|
|
"grad_norm": 0.7169988301586265,
|
|
"learning_rate": 2.8735419013677934e-08,
|
|
"loss": 0.4368,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.9719732536633945,
|
|
"grad_norm": 0.867983394784437,
|
|
"learning_rate": 2.4639360747888974e-08,
|
|
"loss": 0.4433,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.9742495376298194,
|
|
"grad_norm": 0.8926574534891935,
|
|
"learning_rate": 2.0857388567529502e-08,
|
|
"loss": 0.4366,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.9765258215962441,
|
|
"grad_norm": 0.8314945073143262,
|
|
"learning_rate": 1.738974122209358e-08,
|
|
"loss": 0.4472,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.9788021055626689,
|
|
"grad_norm": 0.8241979804740377,
|
|
"learning_rate": 1.4236637618282312e-08,
|
|
"loss": 0.4496,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.9810783895290938,
|
|
"grad_norm": 0.7393166267679743,
|
|
"learning_rate": 1.1398276806182107e-08,
|
|
"loss": 0.4315,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.9833546734955185,
|
|
"grad_norm": 0.7415102824589282,
|
|
"learning_rate": 8.874837966700855e-09,
|
|
"loss": 0.433,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.9856309574619434,
|
|
"grad_norm": 0.7946710429947246,
|
|
"learning_rate": 6.6664804002564145e-09,
|
|
"loss": 0.4364,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.9879072414283682,
|
|
"grad_norm": 0.8084200476976278,
|
|
"learning_rate": 4.773343516718543e-09,
|
|
"loss": 0.4312,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.990183525394793,
|
|
"grad_norm": 0.8535152486368869,
|
|
"learning_rate": 3.1955468266120505e-09,
|
|
"loss": 0.4462,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.9924598093612178,
|
|
"grad_norm": 0.7974643794010233,
|
|
"learning_rate": 1.9331899335661708e-09,
|
|
"loss": 0.4357,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.9947360933276427,
|
|
"grad_norm": 0.7710537576596679,
|
|
"learning_rate": 9.863525280340292e-10,
|
|
"loss": 0.4337,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.9970123772940674,
|
|
"grad_norm": 0.7708116142325829,
|
|
"learning_rate": 3.550943822550057e-10,
|
|
"loss": 0.4338,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.9992886612604922,
|
|
"grad_norm": 0.8061784317503751,
|
|
"learning_rate": 3.9455346487193846e-11,
|
|
"loss": 0.4427,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 4394,
|
|
"total_flos": 9.741193804139987e+18,
|
|
"train_loss": 0.29473522613414266,
|
|
"train_runtime": 212859.8204,
|
|
"train_samples_per_second": 1.981,
|
|
"train_steps_per_second": 0.021
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 4394,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 9.741193804139987e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|