Files
llama3-8b-full-pretrain-was…/trainer_state.json
ModelHub XC 07f314e97a 初始化项目,由ModelHub XC社区提供模型
Model: shuoxing/llama3-8b-full-pretrain-wash-c4-1-8m-bs4
Source: Original Platform
2026-06-12 17:20:18 +08:00

20393 lines
526 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2907,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0010319917440660474,
"grad_norm": 38.264573129438034,
"learning_rate": 0.0,
"loss": 4.170958995819092,
"step": 1
},
{
"epoch": 0.0020639834881320948,
"grad_norm": 32.07645559117946,
"learning_rate": 3.436426116838488e-08,
"loss": 4.053407669067383,
"step": 2
},
{
"epoch": 0.0030959752321981426,
"grad_norm": 33.360400309719275,
"learning_rate": 6.872852233676976e-08,
"loss": 4.419272422790527,
"step": 3
},
{
"epoch": 0.0041279669762641896,
"grad_norm": 38.690889015820794,
"learning_rate": 1.0309278350515465e-07,
"loss": 4.089800834655762,
"step": 4
},
{
"epoch": 0.005159958720330237,
"grad_norm": 40.53568672789824,
"learning_rate": 1.3745704467353952e-07,
"loss": 4.314361095428467,
"step": 5
},
{
"epoch": 0.006191950464396285,
"grad_norm": 52.73132919762138,
"learning_rate": 1.7182130584192442e-07,
"loss": 4.534241676330566,
"step": 6
},
{
"epoch": 0.007223942208462332,
"grad_norm": 37.467034340256674,
"learning_rate": 2.061855670103093e-07,
"loss": 4.245081424713135,
"step": 7
},
{
"epoch": 0.008255933952528379,
"grad_norm": 36.54412560626581,
"learning_rate": 2.405498281786942e-07,
"loss": 3.9605345726013184,
"step": 8
},
{
"epoch": 0.009287925696594427,
"grad_norm": 32.09650929914277,
"learning_rate": 2.7491408934707903e-07,
"loss": 3.553675889968872,
"step": 9
},
{
"epoch": 0.010319917440660475,
"grad_norm": 39.99737645231538,
"learning_rate": 3.0927835051546394e-07,
"loss": 3.8991663455963135,
"step": 10
},
{
"epoch": 0.011351909184726523,
"grad_norm": 26.94996491123709,
"learning_rate": 3.4364261168384884e-07,
"loss": 4.447433948516846,
"step": 11
},
{
"epoch": 0.01238390092879257,
"grad_norm": 38.29183176227633,
"learning_rate": 3.780068728522337e-07,
"loss": 3.8410043716430664,
"step": 12
},
{
"epoch": 0.013415892672858616,
"grad_norm": 39.44434132664455,
"learning_rate": 4.123711340206186e-07,
"loss": 4.28723669052124,
"step": 13
},
{
"epoch": 0.014447884416924664,
"grad_norm": 44.60959999422548,
"learning_rate": 4.467353951890035e-07,
"loss": 4.403202056884766,
"step": 14
},
{
"epoch": 0.015479876160990712,
"grad_norm": 33.86401023900952,
"learning_rate": 4.810996563573884e-07,
"loss": 4.3327813148498535,
"step": 15
},
{
"epoch": 0.016511867905056758,
"grad_norm": 36.37314241341025,
"learning_rate": 5.154639175257732e-07,
"loss": 4.586869716644287,
"step": 16
},
{
"epoch": 0.017543859649122806,
"grad_norm": 40.95405299759724,
"learning_rate": 5.498281786941581e-07,
"loss": 4.356800079345703,
"step": 17
},
{
"epoch": 0.018575851393188854,
"grad_norm": 44.68055417441504,
"learning_rate": 5.84192439862543e-07,
"loss": 3.605020523071289,
"step": 18
},
{
"epoch": 0.0196078431372549,
"grad_norm": 34.30658098751594,
"learning_rate": 6.185567010309279e-07,
"loss": 3.9082632064819336,
"step": 19
},
{
"epoch": 0.02063983488132095,
"grad_norm": 39.53723456862595,
"learning_rate": 6.529209621993128e-07,
"loss": 4.336679935455322,
"step": 20
},
{
"epoch": 0.021671826625386997,
"grad_norm": 44.1317837523831,
"learning_rate": 6.872852233676977e-07,
"loss": 4.653647422790527,
"step": 21
},
{
"epoch": 0.022703818369453045,
"grad_norm": 39.93848572620918,
"learning_rate": 7.216494845360824e-07,
"loss": 4.44484806060791,
"step": 22
},
{
"epoch": 0.023735810113519093,
"grad_norm": 37.7809949162029,
"learning_rate": 7.560137457044674e-07,
"loss": 4.18214225769043,
"step": 23
},
{
"epoch": 0.02476780185758514,
"grad_norm": 25.863583611599758,
"learning_rate": 7.903780068728522e-07,
"loss": 3.864325761795044,
"step": 24
},
{
"epoch": 0.025799793601651185,
"grad_norm": 37.58953378307994,
"learning_rate": 8.247422680412372e-07,
"loss": 4.450252056121826,
"step": 25
},
{
"epoch": 0.026831785345717233,
"grad_norm": 19.483525440807544,
"learning_rate": 8.59106529209622e-07,
"loss": 3.714585304260254,
"step": 26
},
{
"epoch": 0.02786377708978328,
"grad_norm": 16.580078896381973,
"learning_rate": 8.93470790378007e-07,
"loss": 2.6292967796325684,
"step": 27
},
{
"epoch": 0.02889576883384933,
"grad_norm": 23.906848784358015,
"learning_rate": 9.278350515463919e-07,
"loss": 3.2508959770202637,
"step": 28
},
{
"epoch": 0.029927760577915376,
"grad_norm": 23.113354867848006,
"learning_rate": 9.621993127147767e-07,
"loss": 4.082338333129883,
"step": 29
},
{
"epoch": 0.030959752321981424,
"grad_norm": 15.20360049168446,
"learning_rate": 9.965635738831617e-07,
"loss": 3.178603410720825,
"step": 30
},
{
"epoch": 0.03199174406604747,
"grad_norm": 27.255926573914664,
"learning_rate": 1.0309278350515464e-06,
"loss": 3.615831136703491,
"step": 31
},
{
"epoch": 0.033023735810113516,
"grad_norm": 18.358659884678197,
"learning_rate": 1.0652920962199314e-06,
"loss": 3.769883871078491,
"step": 32
},
{
"epoch": 0.034055727554179564,
"grad_norm": 20.18024340362498,
"learning_rate": 1.0996563573883161e-06,
"loss": 3.5580708980560303,
"step": 33
},
{
"epoch": 0.03508771929824561,
"grad_norm": 12.713459341070733,
"learning_rate": 1.134020618556701e-06,
"loss": 3.3640944957733154,
"step": 34
},
{
"epoch": 0.03611971104231166,
"grad_norm": 15.175494098184558,
"learning_rate": 1.168384879725086e-06,
"loss": 3.2460312843322754,
"step": 35
},
{
"epoch": 0.03715170278637771,
"grad_norm": 28.349265846819563,
"learning_rate": 1.202749140893471e-06,
"loss": 4.279034614562988,
"step": 36
},
{
"epoch": 0.038183694530443756,
"grad_norm": 14.84497695062972,
"learning_rate": 1.2371134020618557e-06,
"loss": 3.5686206817626953,
"step": 37
},
{
"epoch": 0.0392156862745098,
"grad_norm": 12.18324253859612,
"learning_rate": 1.2714776632302405e-06,
"loss": 3.2601733207702637,
"step": 38
},
{
"epoch": 0.04024767801857585,
"grad_norm": 18.125128941879503,
"learning_rate": 1.3058419243986257e-06,
"loss": 3.5317866802215576,
"step": 39
},
{
"epoch": 0.0412796697626419,
"grad_norm": 15.44482786670929,
"learning_rate": 1.3402061855670104e-06,
"loss": 3.243781089782715,
"step": 40
},
{
"epoch": 0.04231166150670795,
"grad_norm": 13.768437323999384,
"learning_rate": 1.3745704467353954e-06,
"loss": 2.8587775230407715,
"step": 41
},
{
"epoch": 0.043343653250773995,
"grad_norm": 16.008351007245647,
"learning_rate": 1.4089347079037801e-06,
"loss": 3.319521903991699,
"step": 42
},
{
"epoch": 0.04437564499484004,
"grad_norm": 28.007268290570234,
"learning_rate": 1.4432989690721649e-06,
"loss": 3.675022602081299,
"step": 43
},
{
"epoch": 0.04540763673890609,
"grad_norm": 17.116348976722346,
"learning_rate": 1.47766323024055e-06,
"loss": 3.6678833961486816,
"step": 44
},
{
"epoch": 0.04643962848297214,
"grad_norm": 8.269409663120342,
"learning_rate": 1.5120274914089348e-06,
"loss": 3.1754953861236572,
"step": 45
},
{
"epoch": 0.047471620227038186,
"grad_norm": 11.838220487063731,
"learning_rate": 1.5463917525773197e-06,
"loss": 4.087137699127197,
"step": 46
},
{
"epoch": 0.048503611971104234,
"grad_norm": 13.156253776640677,
"learning_rate": 1.5807560137457045e-06,
"loss": 3.4292521476745605,
"step": 47
},
{
"epoch": 0.04953560371517028,
"grad_norm": 12.019537855742236,
"learning_rate": 1.6151202749140896e-06,
"loss": 3.0743494033813477,
"step": 48
},
{
"epoch": 0.05056759545923633,
"grad_norm": 22.60220162648615,
"learning_rate": 1.6494845360824744e-06,
"loss": 3.8045639991760254,
"step": 49
},
{
"epoch": 0.05159958720330237,
"grad_norm": 17.72197752886121,
"learning_rate": 1.6838487972508594e-06,
"loss": 3.7670891284942627,
"step": 50
},
{
"epoch": 0.05263157894736842,
"grad_norm": 12.61715753067164,
"learning_rate": 1.718213058419244e-06,
"loss": 2.6255364418029785,
"step": 51
},
{
"epoch": 0.053663570691434466,
"grad_norm": 14.76027652711265,
"learning_rate": 1.7525773195876288e-06,
"loss": 3.2327818870544434,
"step": 52
},
{
"epoch": 0.054695562435500514,
"grad_norm": 12.127995456182449,
"learning_rate": 1.786941580756014e-06,
"loss": 3.5888636112213135,
"step": 53
},
{
"epoch": 0.05572755417956656,
"grad_norm": 12.814387754432278,
"learning_rate": 1.8213058419243988e-06,
"loss": 3.601221799850464,
"step": 54
},
{
"epoch": 0.05675954592363261,
"grad_norm": 13.054485091375586,
"learning_rate": 1.8556701030927837e-06,
"loss": 2.9378113746643066,
"step": 55
},
{
"epoch": 0.05779153766769866,
"grad_norm": 15.675205835371122,
"learning_rate": 1.8900343642611685e-06,
"loss": 3.0619659423828125,
"step": 56
},
{
"epoch": 0.058823529411764705,
"grad_norm": 18.601477307180467,
"learning_rate": 1.9243986254295534e-06,
"loss": 3.913945436477661,
"step": 57
},
{
"epoch": 0.05985552115583075,
"grad_norm": 14.684932137524926,
"learning_rate": 1.9587628865979384e-06,
"loss": 3.16741943359375,
"step": 58
},
{
"epoch": 0.0608875128998968,
"grad_norm": 10.584505171302856,
"learning_rate": 1.9931271477663233e-06,
"loss": 2.752635955810547,
"step": 59
},
{
"epoch": 0.06191950464396285,
"grad_norm": 14.294337494468676,
"learning_rate": 2.027491408934708e-06,
"loss": 3.683850049972534,
"step": 60
},
{
"epoch": 0.0629514963880289,
"grad_norm": 14.548967939892366,
"learning_rate": 2.061855670103093e-06,
"loss": 3.897719383239746,
"step": 61
},
{
"epoch": 0.06398348813209494,
"grad_norm": 10.784203027141196,
"learning_rate": 2.096219931271478e-06,
"loss": 3.5739078521728516,
"step": 62
},
{
"epoch": 0.06501547987616099,
"grad_norm": 9.005770358258374,
"learning_rate": 2.1305841924398628e-06,
"loss": 3.4974448680877686,
"step": 63
},
{
"epoch": 0.06604747162022703,
"grad_norm": 13.801645228196314,
"learning_rate": 2.1649484536082477e-06,
"loss": 3.6671719551086426,
"step": 64
},
{
"epoch": 0.06707946336429309,
"grad_norm": 18.864311571048795,
"learning_rate": 2.1993127147766322e-06,
"loss": 3.5606706142425537,
"step": 65
},
{
"epoch": 0.06811145510835913,
"grad_norm": 36.121188402287004,
"learning_rate": 2.2336769759450176e-06,
"loss": 3.54666805267334,
"step": 66
},
{
"epoch": 0.06914344685242518,
"grad_norm": 18.418938373726004,
"learning_rate": 2.268041237113402e-06,
"loss": 3.6477246284484863,
"step": 67
},
{
"epoch": 0.07017543859649122,
"grad_norm": 10.342314919135225,
"learning_rate": 2.302405498281787e-06,
"loss": 3.859053611755371,
"step": 68
},
{
"epoch": 0.07120743034055728,
"grad_norm": 10.856757226736953,
"learning_rate": 2.336769759450172e-06,
"loss": 3.555589199066162,
"step": 69
},
{
"epoch": 0.07223942208462332,
"grad_norm": 11.203267403062268,
"learning_rate": 2.3711340206185566e-06,
"loss": 2.384549140930176,
"step": 70
},
{
"epoch": 0.07327141382868937,
"grad_norm": 9.751556559109623,
"learning_rate": 2.405498281786942e-06,
"loss": 3.5311636924743652,
"step": 71
},
{
"epoch": 0.07430340557275542,
"grad_norm": 24.74278527119019,
"learning_rate": 2.4398625429553265e-06,
"loss": 3.828981399536133,
"step": 72
},
{
"epoch": 0.07533539731682147,
"grad_norm": 20.20134665078964,
"learning_rate": 2.4742268041237115e-06,
"loss": 3.5808427333831787,
"step": 73
},
{
"epoch": 0.07636738906088751,
"grad_norm": 23.478145098032897,
"learning_rate": 2.5085910652920964e-06,
"loss": 3.169694423675537,
"step": 74
},
{
"epoch": 0.07739938080495357,
"grad_norm": 8.074437086627722,
"learning_rate": 2.542955326460481e-06,
"loss": 3.299074172973633,
"step": 75
},
{
"epoch": 0.0784313725490196,
"grad_norm": 12.350822645184348,
"learning_rate": 2.577319587628866e-06,
"loss": 3.151115655899048,
"step": 76
},
{
"epoch": 0.07946336429308566,
"grad_norm": 11.854286237324086,
"learning_rate": 2.6116838487972513e-06,
"loss": 3.488835096359253,
"step": 77
},
{
"epoch": 0.0804953560371517,
"grad_norm": 14.751144818698055,
"learning_rate": 2.646048109965636e-06,
"loss": 3.2102808952331543,
"step": 78
},
{
"epoch": 0.08152734778121776,
"grad_norm": 12.450756671913153,
"learning_rate": 2.680412371134021e-06,
"loss": 3.0494484901428223,
"step": 79
},
{
"epoch": 0.0825593395252838,
"grad_norm": 10.909290889970654,
"learning_rate": 2.7147766323024053e-06,
"loss": 3.182155132293701,
"step": 80
},
{
"epoch": 0.08359133126934984,
"grad_norm": 17.007046121547553,
"learning_rate": 2.7491408934707907e-06,
"loss": 3.132920026779175,
"step": 81
},
{
"epoch": 0.0846233230134159,
"grad_norm": 8.156744910941768,
"learning_rate": 2.7835051546391757e-06,
"loss": 2.959625244140625,
"step": 82
},
{
"epoch": 0.08565531475748193,
"grad_norm": 11.237150545090403,
"learning_rate": 2.8178694158075602e-06,
"loss": 2.8158531188964844,
"step": 83
},
{
"epoch": 0.08668730650154799,
"grad_norm": 8.124664173201067,
"learning_rate": 2.852233676975945e-06,
"loss": 2.988208532333374,
"step": 84
},
{
"epoch": 0.08771929824561403,
"grad_norm": 12.562368133864503,
"learning_rate": 2.8865979381443297e-06,
"loss": 2.8602449893951416,
"step": 85
},
{
"epoch": 0.08875128998968008,
"grad_norm": 10.04284020697332,
"learning_rate": 2.920962199312715e-06,
"loss": 3.2334365844726562,
"step": 86
},
{
"epoch": 0.08978328173374613,
"grad_norm": 7.610534456472524,
"learning_rate": 2.9553264604811e-06,
"loss": 2.1243410110473633,
"step": 87
},
{
"epoch": 0.09081527347781218,
"grad_norm": 19.188341920491524,
"learning_rate": 2.9896907216494846e-06,
"loss": 3.877272605895996,
"step": 88
},
{
"epoch": 0.09184726522187822,
"grad_norm": 10.75878999285903,
"learning_rate": 3.0240549828178695e-06,
"loss": 2.8275046348571777,
"step": 89
},
{
"epoch": 0.09287925696594428,
"grad_norm": 17.36699124390256,
"learning_rate": 3.058419243986255e-06,
"loss": 3.3012256622314453,
"step": 90
},
{
"epoch": 0.09391124871001032,
"grad_norm": 7.627420340539254,
"learning_rate": 3.0927835051546395e-06,
"loss": 2.723191976547241,
"step": 91
},
{
"epoch": 0.09494324045407637,
"grad_norm": 16.901290623449444,
"learning_rate": 3.1271477663230244e-06,
"loss": 3.4844679832458496,
"step": 92
},
{
"epoch": 0.09597523219814241,
"grad_norm": 18.36579239064143,
"learning_rate": 3.161512027491409e-06,
"loss": 3.245941638946533,
"step": 93
},
{
"epoch": 0.09700722394220847,
"grad_norm": 6.816226068152801,
"learning_rate": 3.195876288659794e-06,
"loss": 1.0889668464660645,
"step": 94
},
{
"epoch": 0.09803921568627451,
"grad_norm": 11.855599758847085,
"learning_rate": 3.2302405498281793e-06,
"loss": 2.9265670776367188,
"step": 95
},
{
"epoch": 0.09907120743034056,
"grad_norm": 11.85375182382243,
"learning_rate": 3.264604810996564e-06,
"loss": 2.8669631481170654,
"step": 96
},
{
"epoch": 0.1001031991744066,
"grad_norm": 12.46248341052778,
"learning_rate": 3.298969072164949e-06,
"loss": 3.630988121032715,
"step": 97
},
{
"epoch": 0.10113519091847266,
"grad_norm": 14.491951832964455,
"learning_rate": 3.3333333333333333e-06,
"loss": 3.4562764167785645,
"step": 98
},
{
"epoch": 0.1021671826625387,
"grad_norm": 9.862773480939577,
"learning_rate": 3.3676975945017187e-06,
"loss": 3.196944236755371,
"step": 99
},
{
"epoch": 0.10319917440660474,
"grad_norm": 15.116742234147907,
"learning_rate": 3.4020618556701037e-06,
"loss": 3.2567732334136963,
"step": 100
},
{
"epoch": 0.1042311661506708,
"grad_norm": 13.64659923131581,
"learning_rate": 3.436426116838488e-06,
"loss": 2.071723461151123,
"step": 101
},
{
"epoch": 0.10526315789473684,
"grad_norm": 9.92773943086049,
"learning_rate": 3.470790378006873e-06,
"loss": 2.6918675899505615,
"step": 102
},
{
"epoch": 0.10629514963880289,
"grad_norm": 13.873607215983421,
"learning_rate": 3.5051546391752577e-06,
"loss": 2.8453264236450195,
"step": 103
},
{
"epoch": 0.10732714138286893,
"grad_norm": 7.974277010169032,
"learning_rate": 3.539518900343643e-06,
"loss": 2.940626382827759,
"step": 104
},
{
"epoch": 0.10835913312693499,
"grad_norm": 10.445936177248788,
"learning_rate": 3.573883161512028e-06,
"loss": 3.002101421356201,
"step": 105
},
{
"epoch": 0.10939112487100103,
"grad_norm": 16.347438947139267,
"learning_rate": 3.6082474226804126e-06,
"loss": 3.2126479148864746,
"step": 106
},
{
"epoch": 0.11042311661506708,
"grad_norm": 10.598312696913382,
"learning_rate": 3.6426116838487975e-06,
"loss": 2.8869528770446777,
"step": 107
},
{
"epoch": 0.11145510835913312,
"grad_norm": 9.446228078701083,
"learning_rate": 3.6769759450171825e-06,
"loss": 2.824401378631592,
"step": 108
},
{
"epoch": 0.11248710010319918,
"grad_norm": 9.051408990447767,
"learning_rate": 3.7113402061855674e-06,
"loss": 2.923281192779541,
"step": 109
},
{
"epoch": 0.11351909184726522,
"grad_norm": 11.532599229799272,
"learning_rate": 3.7457044673539524e-06,
"loss": 3.263378381729126,
"step": 110
},
{
"epoch": 0.11455108359133127,
"grad_norm": 8.612569290133255,
"learning_rate": 3.780068728522337e-06,
"loss": 2.3272271156311035,
"step": 111
},
{
"epoch": 0.11558307533539731,
"grad_norm": 10.217127793164705,
"learning_rate": 3.814432989690722e-06,
"loss": 2.9798202514648438,
"step": 112
},
{
"epoch": 0.11661506707946337,
"grad_norm": 7.251982120801302,
"learning_rate": 3.848797250859107e-06,
"loss": 3.069607973098755,
"step": 113
},
{
"epoch": 0.11764705882352941,
"grad_norm": 8.606629694041775,
"learning_rate": 3.883161512027492e-06,
"loss": 2.5642192363739014,
"step": 114
},
{
"epoch": 0.11867905056759546,
"grad_norm": 17.037513857081912,
"learning_rate": 3.917525773195877e-06,
"loss": 3.3925039768218994,
"step": 115
},
{
"epoch": 0.1197110423116615,
"grad_norm": 36.93516693280702,
"learning_rate": 3.951890034364262e-06,
"loss": 3.3626632690429688,
"step": 116
},
{
"epoch": 0.12074303405572756,
"grad_norm": 18.3571141791971,
"learning_rate": 3.986254295532647e-06,
"loss": 3.111511707305908,
"step": 117
},
{
"epoch": 0.1217750257997936,
"grad_norm": 16.195077738348424,
"learning_rate": 4.020618556701032e-06,
"loss": 2.9979894161224365,
"step": 118
},
{
"epoch": 0.12280701754385964,
"grad_norm": 11.22682971528628,
"learning_rate": 4.054982817869416e-06,
"loss": 2.9094398021698,
"step": 119
},
{
"epoch": 0.1238390092879257,
"grad_norm": 9.818020752484045,
"learning_rate": 4.089347079037801e-06,
"loss": 2.995288610458374,
"step": 120
},
{
"epoch": 0.12487100103199174,
"grad_norm": 15.677881529415794,
"learning_rate": 4.123711340206186e-06,
"loss": 3.0458688735961914,
"step": 121
},
{
"epoch": 0.1259029927760578,
"grad_norm": 10.561459834532538,
"learning_rate": 4.158075601374571e-06,
"loss": 3.482619047164917,
"step": 122
},
{
"epoch": 0.12693498452012383,
"grad_norm": 12.542195355260022,
"learning_rate": 4.192439862542956e-06,
"loss": 3.0959300994873047,
"step": 123
},
{
"epoch": 0.12796697626418987,
"grad_norm": 10.857060562341111,
"learning_rate": 4.2268041237113405e-06,
"loss": 3.2271957397460938,
"step": 124
},
{
"epoch": 0.12899896800825594,
"grad_norm": 18.219131423659785,
"learning_rate": 4.2611683848797255e-06,
"loss": 2.5085105895996094,
"step": 125
},
{
"epoch": 0.13003095975232198,
"grad_norm": 9.401192182845163,
"learning_rate": 4.2955326460481105e-06,
"loss": 2.629058837890625,
"step": 126
},
{
"epoch": 0.13106295149638802,
"grad_norm": 11.257897352313293,
"learning_rate": 4.329896907216495e-06,
"loss": 3.031485080718994,
"step": 127
},
{
"epoch": 0.13209494324045407,
"grad_norm": 14.99015554328189,
"learning_rate": 4.36426116838488e-06,
"loss": 3.0450351238250732,
"step": 128
},
{
"epoch": 0.13312693498452013,
"grad_norm": 8.43577882737364,
"learning_rate": 4.3986254295532645e-06,
"loss": 2.7244393825531006,
"step": 129
},
{
"epoch": 0.13415892672858618,
"grad_norm": 20.916171658532548,
"learning_rate": 4.4329896907216494e-06,
"loss": 2.0315990447998047,
"step": 130
},
{
"epoch": 0.13519091847265222,
"grad_norm": 7.513190417589719,
"learning_rate": 4.467353951890035e-06,
"loss": 3.0035698413848877,
"step": 131
},
{
"epoch": 0.13622291021671826,
"grad_norm": 26.148958761673452,
"learning_rate": 4.501718213058419e-06,
"loss": 3.425873279571533,
"step": 132
},
{
"epoch": 0.13725490196078433,
"grad_norm": 18.60377751563881,
"learning_rate": 4.536082474226804e-06,
"loss": 2.837137222290039,
"step": 133
},
{
"epoch": 0.13828689370485037,
"grad_norm": 9.231386213638103,
"learning_rate": 4.570446735395189e-06,
"loss": 2.713682174682617,
"step": 134
},
{
"epoch": 0.1393188854489164,
"grad_norm": 8.302335013209317,
"learning_rate": 4.604810996563574e-06,
"loss": 2.447545289993286,
"step": 135
},
{
"epoch": 0.14035087719298245,
"grad_norm": 8.539319304391766,
"learning_rate": 4.639175257731959e-06,
"loss": 2.9703640937805176,
"step": 136
},
{
"epoch": 0.14138286893704852,
"grad_norm": 10.406180966902255,
"learning_rate": 4.673539518900344e-06,
"loss": 2.8641350269317627,
"step": 137
},
{
"epoch": 0.14241486068111456,
"grad_norm": 9.85807004865754,
"learning_rate": 4.707903780068729e-06,
"loss": 2.8812713623046875,
"step": 138
},
{
"epoch": 0.1434468524251806,
"grad_norm": 12.483093886606463,
"learning_rate": 4.742268041237113e-06,
"loss": 2.8616321086883545,
"step": 139
},
{
"epoch": 0.14447884416924664,
"grad_norm": 14.725269738219037,
"learning_rate": 4.776632302405499e-06,
"loss": 2.9235153198242188,
"step": 140
},
{
"epoch": 0.14551083591331268,
"grad_norm": 14.22587761070801,
"learning_rate": 4.810996563573884e-06,
"loss": 2.8137569427490234,
"step": 141
},
{
"epoch": 0.14654282765737875,
"grad_norm": 12.328982503358834,
"learning_rate": 4.845360824742268e-06,
"loss": 2.68989896774292,
"step": 142
},
{
"epoch": 0.1475748194014448,
"grad_norm": 9.893270455608677,
"learning_rate": 4.879725085910653e-06,
"loss": 3.09940505027771,
"step": 143
},
{
"epoch": 0.14860681114551083,
"grad_norm": 14.968488810940084,
"learning_rate": 4.914089347079038e-06,
"loss": 3.154467821121216,
"step": 144
},
{
"epoch": 0.14963880288957687,
"grad_norm": 8.77535414976699,
"learning_rate": 4.948453608247423e-06,
"loss": 2.3697733879089355,
"step": 145
},
{
"epoch": 0.15067079463364294,
"grad_norm": 12.60090393189173,
"learning_rate": 4.982817869415808e-06,
"loss": 2.832059383392334,
"step": 146
},
{
"epoch": 0.15170278637770898,
"grad_norm": 8.133074225973754,
"learning_rate": 5.017182130584193e-06,
"loss": 3.008730888366699,
"step": 147
},
{
"epoch": 0.15273477812177502,
"grad_norm": 11.669903098407282,
"learning_rate": 5.051546391752578e-06,
"loss": 3.4180641174316406,
"step": 148
},
{
"epoch": 0.15376676986584106,
"grad_norm": 11.657155258917168,
"learning_rate": 5.085910652920962e-06,
"loss": 3.033390522003174,
"step": 149
},
{
"epoch": 0.15479876160990713,
"grad_norm": 8.538656667770233,
"learning_rate": 5.120274914089347e-06,
"loss": 2.817418098449707,
"step": 150
},
{
"epoch": 0.15583075335397317,
"grad_norm": 14.328660767942383,
"learning_rate": 5.154639175257732e-06,
"loss": 3.162787914276123,
"step": 151
},
{
"epoch": 0.1568627450980392,
"grad_norm": 11.115423828752997,
"learning_rate": 5.189003436426118e-06,
"loss": 2.862689256668091,
"step": 152
},
{
"epoch": 0.15789473684210525,
"grad_norm": 8.211023882248943,
"learning_rate": 5.223367697594503e-06,
"loss": 3.511646032333374,
"step": 153
},
{
"epoch": 0.15892672858617132,
"grad_norm": 10.547321886955585,
"learning_rate": 5.257731958762888e-06,
"loss": 2.5060176849365234,
"step": 154
},
{
"epoch": 0.15995872033023736,
"grad_norm": 9.183030025162694,
"learning_rate": 5.292096219931272e-06,
"loss": 3.171947717666626,
"step": 155
},
{
"epoch": 0.1609907120743034,
"grad_norm": 9.116532341417292,
"learning_rate": 5.326460481099657e-06,
"loss": 2.5070362091064453,
"step": 156
},
{
"epoch": 0.16202270381836945,
"grad_norm": 11.011061651573083,
"learning_rate": 5.360824742268042e-06,
"loss": 2.4879579544067383,
"step": 157
},
{
"epoch": 0.16305469556243551,
"grad_norm": 12.155789588921165,
"learning_rate": 5.395189003436427e-06,
"loss": 2.769174098968506,
"step": 158
},
{
"epoch": 0.16408668730650156,
"grad_norm": 8.52073235657292,
"learning_rate": 5.429553264604811e-06,
"loss": 2.3643741607666016,
"step": 159
},
{
"epoch": 0.1651186790505676,
"grad_norm": 13.23465003546493,
"learning_rate": 5.463917525773196e-06,
"loss": 2.593092918395996,
"step": 160
},
{
"epoch": 0.16615067079463364,
"grad_norm": 12.101671965226927,
"learning_rate": 5.4982817869415815e-06,
"loss": 2.8370893001556396,
"step": 161
},
{
"epoch": 0.16718266253869968,
"grad_norm": 11.888079691465743,
"learning_rate": 5.532646048109966e-06,
"loss": 2.9755825996398926,
"step": 162
},
{
"epoch": 0.16821465428276575,
"grad_norm": 19.214763509261946,
"learning_rate": 5.567010309278351e-06,
"loss": 3.475999355316162,
"step": 163
},
{
"epoch": 0.1692466460268318,
"grad_norm": 7.9945005629139425,
"learning_rate": 5.601374570446736e-06,
"loss": 2.942070484161377,
"step": 164
},
{
"epoch": 0.17027863777089783,
"grad_norm": 8.303932327257701,
"learning_rate": 5.6357388316151204e-06,
"loss": 2.83166241645813,
"step": 165
},
{
"epoch": 0.17131062951496387,
"grad_norm": 8.905346416558189,
"learning_rate": 5.670103092783505e-06,
"loss": 2.9068546295166016,
"step": 166
},
{
"epoch": 0.17234262125902994,
"grad_norm": 9.579295533050125,
"learning_rate": 5.70446735395189e-06,
"loss": 3.112548351287842,
"step": 167
},
{
"epoch": 0.17337461300309598,
"grad_norm": 10.536784663679345,
"learning_rate": 5.738831615120275e-06,
"loss": 3.296271800994873,
"step": 168
},
{
"epoch": 0.17440660474716202,
"grad_norm": 24.842411581016172,
"learning_rate": 5.7731958762886594e-06,
"loss": 3.670775890350342,
"step": 169
},
{
"epoch": 0.17543859649122806,
"grad_norm": 22.019557529324477,
"learning_rate": 5.807560137457045e-06,
"loss": 3.7383432388305664,
"step": 170
},
{
"epoch": 0.17647058823529413,
"grad_norm": 13.704368388684442,
"learning_rate": 5.84192439862543e-06,
"loss": 2.4311046600341797,
"step": 171
},
{
"epoch": 0.17750257997936017,
"grad_norm": 8.387017897178954,
"learning_rate": 5.876288659793815e-06,
"loss": 2.6996443271636963,
"step": 172
},
{
"epoch": 0.1785345717234262,
"grad_norm": 15.693639993341684,
"learning_rate": 5.9106529209622e-06,
"loss": 2.783320426940918,
"step": 173
},
{
"epoch": 0.17956656346749225,
"grad_norm": 20.3581897243643,
"learning_rate": 5.945017182130585e-06,
"loss": 2.5705931186676025,
"step": 174
},
{
"epoch": 0.18059855521155832,
"grad_norm": 10.17140010492615,
"learning_rate": 5.979381443298969e-06,
"loss": 2.9759726524353027,
"step": 175
},
{
"epoch": 0.18163054695562436,
"grad_norm": 29.834325540874072,
"learning_rate": 6.013745704467354e-06,
"loss": 2.3043899536132812,
"step": 176
},
{
"epoch": 0.1826625386996904,
"grad_norm": 12.9811040950101,
"learning_rate": 6.048109965635739e-06,
"loss": 2.7947998046875,
"step": 177
},
{
"epoch": 0.18369453044375644,
"grad_norm": 13.007887184214686,
"learning_rate": 6.082474226804124e-06,
"loss": 2.867368698120117,
"step": 178
},
{
"epoch": 0.18472652218782248,
"grad_norm": 10.97250328468093,
"learning_rate": 6.11683848797251e-06,
"loss": 2.964649200439453,
"step": 179
},
{
"epoch": 0.18575851393188855,
"grad_norm": 13.663161398038788,
"learning_rate": 6.151202749140894e-06,
"loss": 2.844449043273926,
"step": 180
},
{
"epoch": 0.1867905056759546,
"grad_norm": 16.7880436257611,
"learning_rate": 6.185567010309279e-06,
"loss": 2.888267993927002,
"step": 181
},
{
"epoch": 0.18782249742002063,
"grad_norm": 13.60494749666835,
"learning_rate": 6.219931271477664e-06,
"loss": 2.7521345615386963,
"step": 182
},
{
"epoch": 0.18885448916408668,
"grad_norm": 30.82620894708794,
"learning_rate": 6.254295532646049e-06,
"loss": 3.2509548664093018,
"step": 183
},
{
"epoch": 0.18988648090815274,
"grad_norm": 13.528624823741277,
"learning_rate": 6.288659793814433e-06,
"loss": 2.8344573974609375,
"step": 184
},
{
"epoch": 0.19091847265221878,
"grad_norm": 18.761538401728718,
"learning_rate": 6.323024054982818e-06,
"loss": 2.790778398513794,
"step": 185
},
{
"epoch": 0.19195046439628483,
"grad_norm": 10.532250495313523,
"learning_rate": 6.357388316151203e-06,
"loss": 2.844142198562622,
"step": 186
},
{
"epoch": 0.19298245614035087,
"grad_norm": 10.737588464332136,
"learning_rate": 6.391752577319588e-06,
"loss": 2.4085793495178223,
"step": 187
},
{
"epoch": 0.19401444788441694,
"grad_norm": 12.716202236391176,
"learning_rate": 6.426116838487974e-06,
"loss": 3.11259126663208,
"step": 188
},
{
"epoch": 0.19504643962848298,
"grad_norm": 18.863162432919182,
"learning_rate": 6.460481099656359e-06,
"loss": 2.130476713180542,
"step": 189
},
{
"epoch": 0.19607843137254902,
"grad_norm": 16.592077675123832,
"learning_rate": 6.494845360824743e-06,
"loss": 3.2449631690979004,
"step": 190
},
{
"epoch": 0.19711042311661506,
"grad_norm": 8.856414937670612,
"learning_rate": 6.529209621993128e-06,
"loss": 3.328016996383667,
"step": 191
},
{
"epoch": 0.19814241486068113,
"grad_norm": 44.59005446367263,
"learning_rate": 6.563573883161513e-06,
"loss": 2.5433740615844727,
"step": 192
},
{
"epoch": 0.19917440660474717,
"grad_norm": 42.20840631794685,
"learning_rate": 6.597938144329898e-06,
"loss": 3.115938186645508,
"step": 193
},
{
"epoch": 0.2002063983488132,
"grad_norm": 21.49432776270929,
"learning_rate": 6.632302405498282e-06,
"loss": 3.705045223236084,
"step": 194
},
{
"epoch": 0.20123839009287925,
"grad_norm": 13.153220389904382,
"learning_rate": 6.666666666666667e-06,
"loss": 3.270660877227783,
"step": 195
},
{
"epoch": 0.20227038183694532,
"grad_norm": 10.291045681689145,
"learning_rate": 6.701030927835052e-06,
"loss": 2.783212900161743,
"step": 196
},
{
"epoch": 0.20330237358101136,
"grad_norm": 9.438804617099628,
"learning_rate": 6.735395189003437e-06,
"loss": 2.624166488647461,
"step": 197
},
{
"epoch": 0.2043343653250774,
"grad_norm": 9.565752702177734,
"learning_rate": 6.769759450171822e-06,
"loss": 2.792402505874634,
"step": 198
},
{
"epoch": 0.20536635706914344,
"grad_norm": 8.203774518764854,
"learning_rate": 6.804123711340207e-06,
"loss": 2.9766368865966797,
"step": 199
},
{
"epoch": 0.20639834881320948,
"grad_norm": 16.64174398612072,
"learning_rate": 6.8384879725085914e-06,
"loss": 2.8217639923095703,
"step": 200
},
{
"epoch": 0.20743034055727555,
"grad_norm": 15.499296910290152,
"learning_rate": 6.872852233676976e-06,
"loss": 3.113293170928955,
"step": 201
},
{
"epoch": 0.2084623323013416,
"grad_norm": 12.533221265320801,
"learning_rate": 6.907216494845361e-06,
"loss": 2.725703001022339,
"step": 202
},
{
"epoch": 0.20949432404540763,
"grad_norm": 10.18270298827117,
"learning_rate": 6.941580756013746e-06,
"loss": 3.741483449935913,
"step": 203
},
{
"epoch": 0.21052631578947367,
"grad_norm": 16.4388133098385,
"learning_rate": 6.9759450171821304e-06,
"loss": 2.6681666374206543,
"step": 204
},
{
"epoch": 0.21155830753353974,
"grad_norm": 7.800822880325412,
"learning_rate": 7.010309278350515e-06,
"loss": 2.4429688453674316,
"step": 205
},
{
"epoch": 0.21259029927760578,
"grad_norm": 11.2671154522289,
"learning_rate": 7.044673539518901e-06,
"loss": 3.0641002655029297,
"step": 206
},
{
"epoch": 0.21362229102167182,
"grad_norm": 17.704159664048625,
"learning_rate": 7.079037800687286e-06,
"loss": 3.075429677963257,
"step": 207
},
{
"epoch": 0.21465428276573786,
"grad_norm": 10.682158078938821,
"learning_rate": 7.113402061855671e-06,
"loss": 2.7346138954162598,
"step": 208
},
{
"epoch": 0.21568627450980393,
"grad_norm": 9.431220771114651,
"learning_rate": 7.147766323024056e-06,
"loss": 2.5908002853393555,
"step": 209
},
{
"epoch": 0.21671826625386997,
"grad_norm": 17.15737144502723,
"learning_rate": 7.18213058419244e-06,
"loss": 2.7248172760009766,
"step": 210
},
{
"epoch": 0.21775025799793601,
"grad_norm": 18.55275798328875,
"learning_rate": 7.216494845360825e-06,
"loss": 3.1422934532165527,
"step": 211
},
{
"epoch": 0.21878224974200206,
"grad_norm": 8.674665435079042,
"learning_rate": 7.25085910652921e-06,
"loss": 2.5640807151794434,
"step": 212
},
{
"epoch": 0.21981424148606812,
"grad_norm": 9.27329731014561,
"learning_rate": 7.285223367697595e-06,
"loss": 2.792649984359741,
"step": 213
},
{
"epoch": 0.22084623323013416,
"grad_norm": 13.285088154216584,
"learning_rate": 7.319587628865979e-06,
"loss": 3.1613028049468994,
"step": 214
},
{
"epoch": 0.2218782249742002,
"grad_norm": 21.421706016244368,
"learning_rate": 7.353951890034365e-06,
"loss": 3.27880597114563,
"step": 215
},
{
"epoch": 0.22291021671826625,
"grad_norm": 9.498086512903509,
"learning_rate": 7.38831615120275e-06,
"loss": 2.89503812789917,
"step": 216
},
{
"epoch": 0.22394220846233232,
"grad_norm": 12.444719476190334,
"learning_rate": 7.422680412371135e-06,
"loss": 2.5208213329315186,
"step": 217
},
{
"epoch": 0.22497420020639836,
"grad_norm": 8.326855341727157,
"learning_rate": 7.45704467353952e-06,
"loss": 2.224851131439209,
"step": 218
},
{
"epoch": 0.2260061919504644,
"grad_norm": 10.38571627684951,
"learning_rate": 7.491408934707905e-06,
"loss": 2.8946280479431152,
"step": 219
},
{
"epoch": 0.22703818369453044,
"grad_norm": 7.740368769601456,
"learning_rate": 7.525773195876289e-06,
"loss": 3.0110769271850586,
"step": 220
},
{
"epoch": 0.22807017543859648,
"grad_norm": 8.267476570780092,
"learning_rate": 7.560137457044674e-06,
"loss": 2.925208568572998,
"step": 221
},
{
"epoch": 0.22910216718266255,
"grad_norm": 13.692029885184212,
"learning_rate": 7.594501718213059e-06,
"loss": 3.7805609703063965,
"step": 222
},
{
"epoch": 0.2301341589267286,
"grad_norm": 7.932163154931762,
"learning_rate": 7.628865979381444e-06,
"loss": 2.7688980102539062,
"step": 223
},
{
"epoch": 0.23116615067079463,
"grad_norm": 11.007263664299453,
"learning_rate": 7.663230240549829e-06,
"loss": 2.930605173110962,
"step": 224
},
{
"epoch": 0.23219814241486067,
"grad_norm": 10.212261427573413,
"learning_rate": 7.697594501718214e-06,
"loss": 3.478551149368286,
"step": 225
},
{
"epoch": 0.23323013415892674,
"grad_norm": 7.683193085540734,
"learning_rate": 7.731958762886599e-06,
"loss": 3.014065742492676,
"step": 226
},
{
"epoch": 0.23426212590299278,
"grad_norm": 15.024778804729713,
"learning_rate": 7.766323024054984e-06,
"loss": 2.694744110107422,
"step": 227
},
{
"epoch": 0.23529411764705882,
"grad_norm": 9.927780165927903,
"learning_rate": 7.800687285223369e-06,
"loss": 2.865319013595581,
"step": 228
},
{
"epoch": 0.23632610939112486,
"grad_norm": 22.148139267313475,
"learning_rate": 7.835051546391754e-06,
"loss": 2.258620262145996,
"step": 229
},
{
"epoch": 0.23735810113519093,
"grad_norm": 7.5511306699872,
"learning_rate": 7.869415807560138e-06,
"loss": 2.5735924243927,
"step": 230
},
{
"epoch": 0.23839009287925697,
"grad_norm": 10.257255964053067,
"learning_rate": 7.903780068728523e-06,
"loss": 2.8336105346679688,
"step": 231
},
{
"epoch": 0.239422084623323,
"grad_norm": 20.642576922687905,
"learning_rate": 7.938144329896907e-06,
"loss": 2.992154836654663,
"step": 232
},
{
"epoch": 0.24045407636738905,
"grad_norm": 6.970555634840997,
"learning_rate": 7.972508591065293e-06,
"loss": 2.60292911529541,
"step": 233
},
{
"epoch": 0.24148606811145512,
"grad_norm": 14.079903341685629,
"learning_rate": 8.006872852233678e-06,
"loss": 2.760301113128662,
"step": 234
},
{
"epoch": 0.24251805985552116,
"grad_norm": 13.243679329890357,
"learning_rate": 8.041237113402063e-06,
"loss": 2.595752239227295,
"step": 235
},
{
"epoch": 0.2435500515995872,
"grad_norm": 9.004451209153824,
"learning_rate": 8.075601374570448e-06,
"loss": 3.067572593688965,
"step": 236
},
{
"epoch": 0.24458204334365324,
"grad_norm": 8.942965479095568,
"learning_rate": 8.109965635738832e-06,
"loss": 2.9863030910491943,
"step": 237
},
{
"epoch": 0.24561403508771928,
"grad_norm": 10.513165239313906,
"learning_rate": 8.144329896907216e-06,
"loss": 2.3195133209228516,
"step": 238
},
{
"epoch": 0.24664602683178535,
"grad_norm": 7.156933615533272,
"learning_rate": 8.178694158075601e-06,
"loss": 2.628549814224243,
"step": 239
},
{
"epoch": 0.2476780185758514,
"grad_norm": 13.868964538667283,
"learning_rate": 8.213058419243986e-06,
"loss": 2.612640619277954,
"step": 240
},
{
"epoch": 0.24871001031991744,
"grad_norm": 30.38753833791798,
"learning_rate": 8.247422680412371e-06,
"loss": 2.8055434226989746,
"step": 241
},
{
"epoch": 0.24974200206398348,
"grad_norm": 13.238825374613219,
"learning_rate": 8.281786941580758e-06,
"loss": 3.2974088191986084,
"step": 242
},
{
"epoch": 0.25077399380804954,
"grad_norm": 28.315627867846285,
"learning_rate": 8.316151202749141e-06,
"loss": 2.8685109615325928,
"step": 243
},
{
"epoch": 0.2518059855521156,
"grad_norm": 9.305515908821278,
"learning_rate": 8.350515463917526e-06,
"loss": 2.907991886138916,
"step": 244
},
{
"epoch": 0.2528379772961816,
"grad_norm": 6.222471020559953,
"learning_rate": 8.384879725085911e-06,
"loss": 2.106353282928467,
"step": 245
},
{
"epoch": 0.25386996904024767,
"grad_norm": 10.045321472502492,
"learning_rate": 8.419243986254296e-06,
"loss": 2.985814332962036,
"step": 246
},
{
"epoch": 0.2549019607843137,
"grad_norm": 21.398479137508037,
"learning_rate": 8.453608247422681e-06,
"loss": 2.043405294418335,
"step": 247
},
{
"epoch": 0.25593395252837975,
"grad_norm": 10.274413900421287,
"learning_rate": 8.487972508591066e-06,
"loss": 2.8896231651306152,
"step": 248
},
{
"epoch": 0.25696594427244585,
"grad_norm": 8.087671858243937,
"learning_rate": 8.522336769759451e-06,
"loss": 3.0149636268615723,
"step": 249
},
{
"epoch": 0.2579979360165119,
"grad_norm": 10.698229808587508,
"learning_rate": 8.556701030927836e-06,
"loss": 2.5276103019714355,
"step": 250
},
{
"epoch": 0.2590299277605779,
"grad_norm": 9.667088415612039,
"learning_rate": 8.591065292096221e-06,
"loss": 2.8755786418914795,
"step": 251
},
{
"epoch": 0.26006191950464397,
"grad_norm": 14.564032491233844,
"learning_rate": 8.625429553264606e-06,
"loss": 3.011821746826172,
"step": 252
},
{
"epoch": 0.26109391124871,
"grad_norm": 11.007757031318695,
"learning_rate": 8.65979381443299e-06,
"loss": 3.234562873840332,
"step": 253
},
{
"epoch": 0.26212590299277605,
"grad_norm": 10.321254117575847,
"learning_rate": 8.694158075601376e-06,
"loss": 2.898616075515747,
"step": 254
},
{
"epoch": 0.2631578947368421,
"grad_norm": 13.245489550571538,
"learning_rate": 8.72852233676976e-06,
"loss": 3.1038565635681152,
"step": 255
},
{
"epoch": 0.26418988648090813,
"grad_norm": 11.376158862004893,
"learning_rate": 8.762886597938146e-06,
"loss": 3.243223190307617,
"step": 256
},
{
"epoch": 0.26522187822497423,
"grad_norm": 39.21634222490642,
"learning_rate": 8.797250859106529e-06,
"loss": 3.3660576343536377,
"step": 257
},
{
"epoch": 0.26625386996904027,
"grad_norm": 15.732748791203472,
"learning_rate": 8.831615120274914e-06,
"loss": 2.6311941146850586,
"step": 258
},
{
"epoch": 0.2672858617131063,
"grad_norm": 8.307884590331216,
"learning_rate": 8.865979381443299e-06,
"loss": 3.070228338241577,
"step": 259
},
{
"epoch": 0.26831785345717235,
"grad_norm": 20.763674204739655,
"learning_rate": 8.900343642611684e-06,
"loss": 3.5172173976898193,
"step": 260
},
{
"epoch": 0.2693498452012384,
"grad_norm": 22.557605282511833,
"learning_rate": 8.93470790378007e-06,
"loss": 2.4271297454833984,
"step": 261
},
{
"epoch": 0.27038183694530443,
"grad_norm": 14.37219569989753,
"learning_rate": 8.969072164948455e-06,
"loss": 2.866842746734619,
"step": 262
},
{
"epoch": 0.2714138286893705,
"grad_norm": 8.68631119483259,
"learning_rate": 9.003436426116839e-06,
"loss": 2.546846389770508,
"step": 263
},
{
"epoch": 0.2724458204334365,
"grad_norm": 10.697039335181826,
"learning_rate": 9.037800687285224e-06,
"loss": 3.255836009979248,
"step": 264
},
{
"epoch": 0.27347781217750256,
"grad_norm": 12.364532427739105,
"learning_rate": 9.072164948453609e-06,
"loss": 2.631788730621338,
"step": 265
},
{
"epoch": 0.27450980392156865,
"grad_norm": 8.89024681407495,
"learning_rate": 9.106529209621994e-06,
"loss": 2.0813612937927246,
"step": 266
},
{
"epoch": 0.2755417956656347,
"grad_norm": 12.714578000012677,
"learning_rate": 9.140893470790379e-06,
"loss": 3.210515022277832,
"step": 267
},
{
"epoch": 0.27657378740970073,
"grad_norm": 14.202532100418049,
"learning_rate": 9.175257731958764e-06,
"loss": 2.2079906463623047,
"step": 268
},
{
"epoch": 0.2776057791537668,
"grad_norm": 10.81421745806548,
"learning_rate": 9.209621993127148e-06,
"loss": 2.2032227516174316,
"step": 269
},
{
"epoch": 0.2786377708978328,
"grad_norm": 13.109956618269027,
"learning_rate": 9.243986254295533e-06,
"loss": 3.036953926086426,
"step": 270
},
{
"epoch": 0.27966976264189886,
"grad_norm": 9.630762729673748,
"learning_rate": 9.278350515463918e-06,
"loss": 2.6770286560058594,
"step": 271
},
{
"epoch": 0.2807017543859649,
"grad_norm": 11.626984563836794,
"learning_rate": 9.312714776632303e-06,
"loss": 2.9310131072998047,
"step": 272
},
{
"epoch": 0.28173374613003094,
"grad_norm": 12.749203348361391,
"learning_rate": 9.347079037800688e-06,
"loss": 2.9083001613616943,
"step": 273
},
{
"epoch": 0.28276573787409703,
"grad_norm": 15.838893812041686,
"learning_rate": 9.381443298969073e-06,
"loss": 2.9983253479003906,
"step": 274
},
{
"epoch": 0.2837977296181631,
"grad_norm": 13.8172980151992,
"learning_rate": 9.415807560137458e-06,
"loss": 2.9297034740448,
"step": 275
},
{
"epoch": 0.2848297213622291,
"grad_norm": 9.755184721368341,
"learning_rate": 9.450171821305843e-06,
"loss": 3.3235621452331543,
"step": 276
},
{
"epoch": 0.28586171310629516,
"grad_norm": 10.544935847627189,
"learning_rate": 9.484536082474226e-06,
"loss": 2.7880706787109375,
"step": 277
},
{
"epoch": 0.2868937048503612,
"grad_norm": 16.007278048200753,
"learning_rate": 9.518900343642611e-06,
"loss": 2.841766834259033,
"step": 278
},
{
"epoch": 0.28792569659442724,
"grad_norm": 7.504369408272682,
"learning_rate": 9.553264604810998e-06,
"loss": 2.679216146469116,
"step": 279
},
{
"epoch": 0.2889576883384933,
"grad_norm": 21.208047797032336,
"learning_rate": 9.587628865979383e-06,
"loss": 2.819913387298584,
"step": 280
},
{
"epoch": 0.2899896800825593,
"grad_norm": 10.024430545679552,
"learning_rate": 9.621993127147768e-06,
"loss": 2.6728343963623047,
"step": 281
},
{
"epoch": 0.29102167182662536,
"grad_norm": 32.63663115821379,
"learning_rate": 9.656357388316153e-06,
"loss": 2.7835605144500732,
"step": 282
},
{
"epoch": 0.29205366357069146,
"grad_norm": 17.371321854452674,
"learning_rate": 9.690721649484536e-06,
"loss": 2.8734326362609863,
"step": 283
},
{
"epoch": 0.2930856553147575,
"grad_norm": 11.588865129366797,
"learning_rate": 9.725085910652921e-06,
"loss": 2.65207576751709,
"step": 284
},
{
"epoch": 0.29411764705882354,
"grad_norm": 8.213931559350788,
"learning_rate": 9.759450171821306e-06,
"loss": 3.1204328536987305,
"step": 285
},
{
"epoch": 0.2951496388028896,
"grad_norm": 9.921519173515582,
"learning_rate": 9.793814432989691e-06,
"loss": 2.9491803646087646,
"step": 286
},
{
"epoch": 0.2961816305469556,
"grad_norm": 9.509914069257567,
"learning_rate": 9.828178694158076e-06,
"loss": 2.441004991531372,
"step": 287
},
{
"epoch": 0.29721362229102166,
"grad_norm": 7.715460900071529,
"learning_rate": 9.862542955326461e-06,
"loss": 2.7245256900787354,
"step": 288
},
{
"epoch": 0.2982456140350877,
"grad_norm": 8.822780073205767,
"learning_rate": 9.896907216494846e-06,
"loss": 2.8065128326416016,
"step": 289
},
{
"epoch": 0.29927760577915374,
"grad_norm": 19.18711691630486,
"learning_rate": 9.931271477663231e-06,
"loss": 3.9420652389526367,
"step": 290
},
{
"epoch": 0.30030959752321984,
"grad_norm": 27.577856706805292,
"learning_rate": 9.965635738831616e-06,
"loss": 3.3128182888031006,
"step": 291
},
{
"epoch": 0.3013415892672859,
"grad_norm": 10.735643267608587,
"learning_rate": 1e-05,
"loss": 2.7581353187561035,
"step": 292
},
{
"epoch": 0.3023735810113519,
"grad_norm": 10.827298953198202,
"learning_rate": 9.999996394510604e-06,
"loss": 2.6423697471618652,
"step": 293
},
{
"epoch": 0.30340557275541796,
"grad_norm": 52.09810992012433,
"learning_rate": 9.999985578047618e-06,
"loss": 2.7856717109680176,
"step": 294
},
{
"epoch": 0.304437564499484,
"grad_norm": 8.015444030673175,
"learning_rate": 9.99996755062664e-06,
"loss": 2.587672233581543,
"step": 295
},
{
"epoch": 0.30546955624355004,
"grad_norm": 10.412156584943583,
"learning_rate": 9.999942312273667e-06,
"loss": 2.918144702911377,
"step": 296
},
{
"epoch": 0.3065015479876161,
"grad_norm": 15.334737859529346,
"learning_rate": 9.9999098630251e-06,
"loss": 3.476226329803467,
"step": 297
},
{
"epoch": 0.3075335397316821,
"grad_norm": 21.535339149470804,
"learning_rate": 9.999870202927739e-06,
"loss": 2.8158390522003174,
"step": 298
},
{
"epoch": 0.30856553147574817,
"grad_norm": 15.501940000678802,
"learning_rate": 9.999823332038779e-06,
"loss": 3.293022871017456,
"step": 299
},
{
"epoch": 0.30959752321981426,
"grad_norm": 8.473506114508018,
"learning_rate": 9.999769250425817e-06,
"loss": 3.0215983390808105,
"step": 300
},
{
"epoch": 0.3106295149638803,
"grad_norm": 12.507129335204262,
"learning_rate": 9.999707958166849e-06,
"loss": 2.851978063583374,
"step": 301
},
{
"epoch": 0.31166150670794635,
"grad_norm": 11.4112811515218,
"learning_rate": 9.999639455350272e-06,
"loss": 3.32511043548584,
"step": 302
},
{
"epoch": 0.3126934984520124,
"grad_norm": 13.739752311404821,
"learning_rate": 9.999563742074881e-06,
"loss": 2.245746612548828,
"step": 303
},
{
"epoch": 0.3137254901960784,
"grad_norm": 7.6278657739069775,
"learning_rate": 9.999480818449868e-06,
"loss": 2.3384673595428467,
"step": 304
},
{
"epoch": 0.31475748194014447,
"grad_norm": 9.983488457149306,
"learning_rate": 9.999390684594824e-06,
"loss": 3.0112438201904297,
"step": 305
},
{
"epoch": 0.3157894736842105,
"grad_norm": 7.467844366957447,
"learning_rate": 9.99929334063974e-06,
"loss": 2.795377254486084,
"step": 306
},
{
"epoch": 0.31682146542827655,
"grad_norm": 16.83206766868716,
"learning_rate": 9.999188786725007e-06,
"loss": 1.893045425415039,
"step": 307
},
{
"epoch": 0.31785345717234265,
"grad_norm": 25.176127063159374,
"learning_rate": 9.999077023001411e-06,
"loss": 2.9917497634887695,
"step": 308
},
{
"epoch": 0.3188854489164087,
"grad_norm": 7.922777368099187,
"learning_rate": 9.998958049630138e-06,
"loss": 2.824535369873047,
"step": 309
},
{
"epoch": 0.31991744066047473,
"grad_norm": 13.433952072522445,
"learning_rate": 9.998831866782769e-06,
"loss": 1.974698781967163,
"step": 310
},
{
"epoch": 0.32094943240454077,
"grad_norm": 8.553809514306295,
"learning_rate": 9.998698474641286e-06,
"loss": 2.8490941524505615,
"step": 311
},
{
"epoch": 0.3219814241486068,
"grad_norm": 20.312633204656144,
"learning_rate": 9.998557873398066e-06,
"loss": 3.004058361053467,
"step": 312
},
{
"epoch": 0.32301341589267285,
"grad_norm": 8.697705945748643,
"learning_rate": 9.998410063255883e-06,
"loss": 2.712806463241577,
"step": 313
},
{
"epoch": 0.3240454076367389,
"grad_norm": 18.603506945443936,
"learning_rate": 9.998255044427912e-06,
"loss": 3.0143351554870605,
"step": 314
},
{
"epoch": 0.32507739938080493,
"grad_norm": 14.783978465463441,
"learning_rate": 9.998092817137714e-06,
"loss": 2.8690080642700195,
"step": 315
},
{
"epoch": 0.32610939112487103,
"grad_norm": 8.454737221004214,
"learning_rate": 9.997923381619257e-06,
"loss": 2.493039131164551,
"step": 316
},
{
"epoch": 0.32714138286893707,
"grad_norm": 9.462248941946614,
"learning_rate": 9.997746738116897e-06,
"loss": 3.4006752967834473,
"step": 317
},
{
"epoch": 0.3281733746130031,
"grad_norm": 13.039488480718953,
"learning_rate": 9.997562886885393e-06,
"loss": 2.318763256072998,
"step": 318
},
{
"epoch": 0.32920536635706915,
"grad_norm": 10.280603609185906,
"learning_rate": 9.997371828189892e-06,
"loss": 2.6687698364257812,
"step": 319
},
{
"epoch": 0.3302373581011352,
"grad_norm": 10.304930139142693,
"learning_rate": 9.997173562305937e-06,
"loss": 2.9789810180664062,
"step": 320
},
{
"epoch": 0.33126934984520123,
"grad_norm": 10.891861523702152,
"learning_rate": 9.996968089519468e-06,
"loss": 2.615915298461914,
"step": 321
},
{
"epoch": 0.3323013415892673,
"grad_norm": 8.947955043261503,
"learning_rate": 9.996755410126815e-06,
"loss": 2.811088800430298,
"step": 322
},
{
"epoch": 0.3333333333333333,
"grad_norm": 15.51212907567898,
"learning_rate": 9.996535524434705e-06,
"loss": 3.112473249435425,
"step": 323
},
{
"epoch": 0.33436532507739936,
"grad_norm": 11.932511086933177,
"learning_rate": 9.996308432760257e-06,
"loss": 2.898390531539917,
"step": 324
},
{
"epoch": 0.33539731682146545,
"grad_norm": 20.972719472534482,
"learning_rate": 9.99607413543098e-06,
"loss": 3.2577900886535645,
"step": 325
},
{
"epoch": 0.3364293085655315,
"grad_norm": 9.17129674530525,
"learning_rate": 9.995832632784777e-06,
"loss": 2.911431312561035,
"step": 326
},
{
"epoch": 0.33746130030959753,
"grad_norm": 7.9024820720274755,
"learning_rate": 9.99558392516994e-06,
"loss": 3.40252685546875,
"step": 327
},
{
"epoch": 0.3384932920536636,
"grad_norm": 9.29051423881268,
"learning_rate": 9.995328012945158e-06,
"loss": 2.979201316833496,
"step": 328
},
{
"epoch": 0.3395252837977296,
"grad_norm": 12.543344461822567,
"learning_rate": 9.995064896479505e-06,
"loss": 3.4100840091705322,
"step": 329
},
{
"epoch": 0.34055727554179566,
"grad_norm": 21.478041304262256,
"learning_rate": 9.994794576152444e-06,
"loss": 1.7510769367218018,
"step": 330
},
{
"epoch": 0.3415892672858617,
"grad_norm": 17.186995192634996,
"learning_rate": 9.994517052353835e-06,
"loss": 2.7779769897460938,
"step": 331
},
{
"epoch": 0.34262125902992774,
"grad_norm": 14.884852678001963,
"learning_rate": 9.994232325483917e-06,
"loss": 3.062211513519287,
"step": 332
},
{
"epoch": 0.34365325077399383,
"grad_norm": 11.56452242003458,
"learning_rate": 9.993940395953324e-06,
"loss": 3.7257943153381348,
"step": 333
},
{
"epoch": 0.3446852425180599,
"grad_norm": 12.805093366422206,
"learning_rate": 9.993641264183074e-06,
"loss": 2.962735176086426,
"step": 334
},
{
"epoch": 0.3457172342621259,
"grad_norm": 55.42169115560981,
"learning_rate": 9.993334930604575e-06,
"loss": 2.8952317237854004,
"step": 335
},
{
"epoch": 0.34674922600619196,
"grad_norm": 15.80184371734487,
"learning_rate": 9.99302139565962e-06,
"loss": 2.249530792236328,
"step": 336
},
{
"epoch": 0.347781217750258,
"grad_norm": 6.869909846606394,
"learning_rate": 9.992700659800387e-06,
"loss": 3.177823066711426,
"step": 337
},
{
"epoch": 0.34881320949432404,
"grad_norm": 10.887638824184588,
"learning_rate": 9.99237272348944e-06,
"loss": 2.578218936920166,
"step": 338
},
{
"epoch": 0.3498452012383901,
"grad_norm": 11.668756836891024,
"learning_rate": 9.992037587199729e-06,
"loss": 2.8234119415283203,
"step": 339
},
{
"epoch": 0.3508771929824561,
"grad_norm": 10.975725441437913,
"learning_rate": 9.991695251414584e-06,
"loss": 2.981606960296631,
"step": 340
},
{
"epoch": 0.35190918472652216,
"grad_norm": 14.630846465595496,
"learning_rate": 9.99134571662772e-06,
"loss": 2.5231080055236816,
"step": 341
},
{
"epoch": 0.35294117647058826,
"grad_norm": 12.244846502375566,
"learning_rate": 9.990988983343237e-06,
"loss": 2.4090816974639893,
"step": 342
},
{
"epoch": 0.3539731682146543,
"grad_norm": 16.87537596953493,
"learning_rate": 9.990625052075612e-06,
"loss": 3.5845909118652344,
"step": 343
},
{
"epoch": 0.35500515995872034,
"grad_norm": 13.78946221844984,
"learning_rate": 9.990253923349706e-06,
"loss": 2.855722427368164,
"step": 344
},
{
"epoch": 0.3560371517027864,
"grad_norm": 7.35317353541043,
"learning_rate": 9.98987559770076e-06,
"loss": 2.6833579540252686,
"step": 345
},
{
"epoch": 0.3570691434468524,
"grad_norm": 8.228648765687204,
"learning_rate": 9.98949007567439e-06,
"loss": 2.6026723384857178,
"step": 346
},
{
"epoch": 0.35810113519091846,
"grad_norm": 12.63270371305149,
"learning_rate": 9.989097357826601e-06,
"loss": 3.161715507507324,
"step": 347
},
{
"epoch": 0.3591331269349845,
"grad_norm": 10.945191581908023,
"learning_rate": 9.988697444723763e-06,
"loss": 2.507702589035034,
"step": 348
},
{
"epoch": 0.36016511867905054,
"grad_norm": 8.265290958176653,
"learning_rate": 9.98829033694263e-06,
"loss": 2.746232271194458,
"step": 349
},
{
"epoch": 0.36119711042311664,
"grad_norm": 10.712443634776584,
"learning_rate": 9.987876035070334e-06,
"loss": 2.7389914989471436,
"step": 350
},
{
"epoch": 0.3622291021671827,
"grad_norm": 60.73029115548872,
"learning_rate": 9.987454539704377e-06,
"loss": 2.6853432655334473,
"step": 351
},
{
"epoch": 0.3632610939112487,
"grad_norm": 14.57137989089784,
"learning_rate": 9.98702585145264e-06,
"loss": 2.959132671356201,
"step": 352
},
{
"epoch": 0.36429308565531476,
"grad_norm": 7.288243249285789,
"learning_rate": 9.986589970933371e-06,
"loss": 2.905642032623291,
"step": 353
},
{
"epoch": 0.3653250773993808,
"grad_norm": 17.16941316169951,
"learning_rate": 9.986146898775198e-06,
"loss": 2.7748100757598877,
"step": 354
},
{
"epoch": 0.36635706914344685,
"grad_norm": 14.12324828442973,
"learning_rate": 9.985696635617119e-06,
"loss": 3.2043676376342773,
"step": 355
},
{
"epoch": 0.3673890608875129,
"grad_norm": 18.325239784089757,
"learning_rate": 9.9852391821085e-06,
"loss": 2.4523096084594727,
"step": 356
},
{
"epoch": 0.3684210526315789,
"grad_norm": 19.21617188187594,
"learning_rate": 9.984774538909078e-06,
"loss": 3.257173776626587,
"step": 357
},
{
"epoch": 0.36945304437564497,
"grad_norm": 11.81568205276225,
"learning_rate": 9.984302706688962e-06,
"loss": 3.214928150177002,
"step": 358
},
{
"epoch": 0.37048503611971106,
"grad_norm": 11.173774018769517,
"learning_rate": 9.983823686128623e-06,
"loss": 3.023054599761963,
"step": 359
},
{
"epoch": 0.3715170278637771,
"grad_norm": 13.600635783261229,
"learning_rate": 9.983337477918904e-06,
"loss": 2.7034380435943604,
"step": 360
},
{
"epoch": 0.37254901960784315,
"grad_norm": 12.108451404270347,
"learning_rate": 9.982844082761012e-06,
"loss": 2.837873935699463,
"step": 361
},
{
"epoch": 0.3735810113519092,
"grad_norm": 16.766925295407642,
"learning_rate": 9.98234350136652e-06,
"loss": 2.9200439453125,
"step": 362
},
{
"epoch": 0.3746130030959752,
"grad_norm": 7.704863020577384,
"learning_rate": 9.981835734457367e-06,
"loss": 2.867964744567871,
"step": 363
},
{
"epoch": 0.37564499484004127,
"grad_norm": 22.36103217474955,
"learning_rate": 9.981320782765847e-06,
"loss": 2.6212334632873535,
"step": 364
},
{
"epoch": 0.3766769865841073,
"grad_norm": 19.199321631797922,
"learning_rate": 9.980798647034623e-06,
"loss": 2.153306245803833,
"step": 365
},
{
"epoch": 0.37770897832817335,
"grad_norm": 17.3777675791743,
"learning_rate": 9.98026932801672e-06,
"loss": 3.090249538421631,
"step": 366
},
{
"epoch": 0.37874097007223945,
"grad_norm": 19.630282766187502,
"learning_rate": 9.979732826475515e-06,
"loss": 3.281906843185425,
"step": 367
},
{
"epoch": 0.3797729618163055,
"grad_norm": 19.11095646451305,
"learning_rate": 9.97918914318475e-06,
"loss": 2.9486136436462402,
"step": 368
},
{
"epoch": 0.38080495356037153,
"grad_norm": 9.262762771838458,
"learning_rate": 9.978638278928526e-06,
"loss": 2.7943458557128906,
"step": 369
},
{
"epoch": 0.38183694530443757,
"grad_norm": 21.806333096154297,
"learning_rate": 9.978080234501292e-06,
"loss": 1.732006549835205,
"step": 370
},
{
"epoch": 0.3828689370485036,
"grad_norm": 6.763172190678009,
"learning_rate": 9.977515010707862e-06,
"loss": 2.133474349975586,
"step": 371
},
{
"epoch": 0.38390092879256965,
"grad_norm": 19.366115574534298,
"learning_rate": 9.976942608363394e-06,
"loss": 3.236906051635742,
"step": 372
},
{
"epoch": 0.3849329205366357,
"grad_norm": 9.249879875236504,
"learning_rate": 9.976363028293408e-06,
"loss": 2.3459606170654297,
"step": 373
},
{
"epoch": 0.38596491228070173,
"grad_norm": 11.205019811117323,
"learning_rate": 9.975776271333772e-06,
"loss": 2.967754602432251,
"step": 374
},
{
"epoch": 0.38699690402476783,
"grad_norm": 8.937431500238974,
"learning_rate": 9.975182338330704e-06,
"loss": 2.5639843940734863,
"step": 375
},
{
"epoch": 0.38802889576883387,
"grad_norm": 8.040846314018593,
"learning_rate": 9.97458123014077e-06,
"loss": 2.6252379417419434,
"step": 376
},
{
"epoch": 0.3890608875128999,
"grad_norm": 13.693624667041957,
"learning_rate": 9.973972947630886e-06,
"loss": 3.164388656616211,
"step": 377
},
{
"epoch": 0.39009287925696595,
"grad_norm": 18.601866471494823,
"learning_rate": 9.973357491678317e-06,
"loss": 3.2664146423339844,
"step": 378
},
{
"epoch": 0.391124871001032,
"grad_norm": 13.00798986403027,
"learning_rate": 9.972734863170668e-06,
"loss": 2.738879680633545,
"step": 379
},
{
"epoch": 0.39215686274509803,
"grad_norm": 9.189801321181857,
"learning_rate": 9.972105063005895e-06,
"loss": 2.6619622707366943,
"step": 380
},
{
"epoch": 0.3931888544891641,
"grad_norm": 20.273889494348328,
"learning_rate": 9.971468092092289e-06,
"loss": 3.262800693511963,
"step": 381
},
{
"epoch": 0.3942208462332301,
"grad_norm": 13.562348770998613,
"learning_rate": 9.970823951348488e-06,
"loss": 2.8759002685546875,
"step": 382
},
{
"epoch": 0.39525283797729616,
"grad_norm": 10.472991979915335,
"learning_rate": 9.970172641703469e-06,
"loss": 2.684480905532837,
"step": 383
},
{
"epoch": 0.39628482972136225,
"grad_norm": 12.988097260817664,
"learning_rate": 9.969514164096548e-06,
"loss": 2.765899658203125,
"step": 384
},
{
"epoch": 0.3973168214654283,
"grad_norm": 14.082206700480766,
"learning_rate": 9.968848519477382e-06,
"loss": 3.019839286804199,
"step": 385
},
{
"epoch": 0.39834881320949433,
"grad_norm": 22.037728509038477,
"learning_rate": 9.968175708805954e-06,
"loss": 3.36822509765625,
"step": 386
},
{
"epoch": 0.3993808049535604,
"grad_norm": 32.96293580792141,
"learning_rate": 9.967495733052594e-06,
"loss": 2.3022584915161133,
"step": 387
},
{
"epoch": 0.4004127966976264,
"grad_norm": 16.977515540691243,
"learning_rate": 9.966808593197959e-06,
"loss": 3.093107223510742,
"step": 388
},
{
"epoch": 0.40144478844169246,
"grad_norm": 13.576846821861261,
"learning_rate": 9.96611429023304e-06,
"loss": 3.0452375411987305,
"step": 389
},
{
"epoch": 0.4024767801857585,
"grad_norm": 22.21723302108591,
"learning_rate": 9.965412825159156e-06,
"loss": 2.6012535095214844,
"step": 390
},
{
"epoch": 0.40350877192982454,
"grad_norm": 14.197779554582961,
"learning_rate": 9.964704198987955e-06,
"loss": 2.974013328552246,
"step": 391
},
{
"epoch": 0.40454076367389064,
"grad_norm": 15.990117282812127,
"learning_rate": 9.96398841274142e-06,
"loss": 3.539915084838867,
"step": 392
},
{
"epoch": 0.4055727554179567,
"grad_norm": 16.322394164585692,
"learning_rate": 9.963265467451853e-06,
"loss": 3.248749017715454,
"step": 393
},
{
"epoch": 0.4066047471620227,
"grad_norm": 9.828830418023808,
"learning_rate": 9.962535364161879e-06,
"loss": 2.8553173542022705,
"step": 394
},
{
"epoch": 0.40763673890608876,
"grad_norm": 13.454736718150299,
"learning_rate": 9.961798103924454e-06,
"loss": 2.443535566329956,
"step": 395
},
{
"epoch": 0.4086687306501548,
"grad_norm": 13.941255314244941,
"learning_rate": 9.96105368780285e-06,
"loss": 2.379136085510254,
"step": 396
},
{
"epoch": 0.40970072239422084,
"grad_norm": 13.117699185931533,
"learning_rate": 9.96030211687066e-06,
"loss": 3.2327611446380615,
"step": 397
},
{
"epoch": 0.4107327141382869,
"grad_norm": 9.77972211250318,
"learning_rate": 9.9595433922118e-06,
"loss": 2.840574264526367,
"step": 398
},
{
"epoch": 0.4117647058823529,
"grad_norm": 12.77728328287478,
"learning_rate": 9.958777514920498e-06,
"loss": 2.725296974182129,
"step": 399
},
{
"epoch": 0.41279669762641896,
"grad_norm": 9.363684373456174,
"learning_rate": 9.958004486101293e-06,
"loss": 2.8668479919433594,
"step": 400
},
{
"epoch": 0.41382868937048506,
"grad_norm": 7.830675919094182,
"learning_rate": 9.957224306869053e-06,
"loss": 2.924373149871826,
"step": 401
},
{
"epoch": 0.4148606811145511,
"grad_norm": 11.583533095118902,
"learning_rate": 9.956436978348943e-06,
"loss": 3.06062650680542,
"step": 402
},
{
"epoch": 0.41589267285861714,
"grad_norm": 13.137555464833737,
"learning_rate": 9.955642501676447e-06,
"loss": 2.592439651489258,
"step": 403
},
{
"epoch": 0.4169246646026832,
"grad_norm": 6.498482284005015,
"learning_rate": 9.954840877997356e-06,
"loss": 2.9468114376068115,
"step": 404
},
{
"epoch": 0.4179566563467492,
"grad_norm": 16.005423084911904,
"learning_rate": 9.954032108467769e-06,
"loss": 2.9270787239074707,
"step": 405
},
{
"epoch": 0.41898864809081526,
"grad_norm": 9.22312215745454,
"learning_rate": 9.953216194254088e-06,
"loss": 2.422769784927368,
"step": 406
},
{
"epoch": 0.4200206398348813,
"grad_norm": 16.90201457779728,
"learning_rate": 9.952393136533021e-06,
"loss": 2.977414131164551,
"step": 407
},
{
"epoch": 0.42105263157894735,
"grad_norm": 10.986524956754087,
"learning_rate": 9.95156293649158e-06,
"loss": 2.8430471420288086,
"step": 408
},
{
"epoch": 0.42208462332301344,
"grad_norm": 8.196133786488467,
"learning_rate": 9.950725595327076e-06,
"loss": 2.8283088207244873,
"step": 409
},
{
"epoch": 0.4231166150670795,
"grad_norm": 9.997912814132267,
"learning_rate": 9.949881114247117e-06,
"loss": 3.180129051208496,
"step": 410
},
{
"epoch": 0.4241486068111455,
"grad_norm": 22.911756115565684,
"learning_rate": 9.949029494469613e-06,
"loss": 2.9284815788269043,
"step": 411
},
{
"epoch": 0.42518059855521156,
"grad_norm": 16.846511577170734,
"learning_rate": 9.948170737222763e-06,
"loss": 2.946915864944458,
"step": 412
},
{
"epoch": 0.4262125902992776,
"grad_norm": 10.890074775505424,
"learning_rate": 9.947304843745065e-06,
"loss": 2.7982027530670166,
"step": 413
},
{
"epoch": 0.42724458204334365,
"grad_norm": 16.493152057559257,
"learning_rate": 9.946431815285307e-06,
"loss": 2.9675230979919434,
"step": 414
},
{
"epoch": 0.4282765737874097,
"grad_norm": 23.323596296720044,
"learning_rate": 9.945551653102566e-06,
"loss": 2.308884859085083,
"step": 415
},
{
"epoch": 0.4293085655314757,
"grad_norm": 13.906035502172923,
"learning_rate": 9.94466435846621e-06,
"loss": 2.8153557777404785,
"step": 416
},
{
"epoch": 0.43034055727554177,
"grad_norm": 13.717983567538138,
"learning_rate": 9.943769932655889e-06,
"loss": 2.911931037902832,
"step": 417
},
{
"epoch": 0.43137254901960786,
"grad_norm": 9.5030223166851,
"learning_rate": 9.942868376961542e-06,
"loss": 2.2906694412231445,
"step": 418
},
{
"epoch": 0.4324045407636739,
"grad_norm": 14.25839596238375,
"learning_rate": 9.941959692683387e-06,
"loss": 3.677119016647339,
"step": 419
},
{
"epoch": 0.43343653250773995,
"grad_norm": 11.318600737070227,
"learning_rate": 9.941043881131928e-06,
"loss": 2.7575297355651855,
"step": 420
},
{
"epoch": 0.434468524251806,
"grad_norm": 8.694948519786168,
"learning_rate": 9.94012094362794e-06,
"loss": 2.9048759937286377,
"step": 421
},
{
"epoch": 0.43550051599587203,
"grad_norm": 8.718078750395678,
"learning_rate": 9.939190881502484e-06,
"loss": 2.418113946914673,
"step": 422
},
{
"epoch": 0.43653250773993807,
"grad_norm": 11.446167858175109,
"learning_rate": 9.93825369609689e-06,
"loss": 2.754148006439209,
"step": 423
},
{
"epoch": 0.4375644994840041,
"grad_norm": 15.718887678684286,
"learning_rate": 9.93730938876276e-06,
"loss": 2.9343578815460205,
"step": 424
},
{
"epoch": 0.43859649122807015,
"grad_norm": 40.31476387336908,
"learning_rate": 9.936357960861972e-06,
"loss": 4.214080810546875,
"step": 425
},
{
"epoch": 0.43962848297213625,
"grad_norm": 11.539263308149694,
"learning_rate": 9.935399413766672e-06,
"loss": 2.7992300987243652,
"step": 426
},
{
"epoch": 0.4406604747162023,
"grad_norm": 10.944370426001614,
"learning_rate": 9.934433748859275e-06,
"loss": 2.484590530395508,
"step": 427
},
{
"epoch": 0.44169246646026833,
"grad_norm": 8.26905923788389,
"learning_rate": 9.933460967532454e-06,
"loss": 2.95884370803833,
"step": 428
},
{
"epoch": 0.44272445820433437,
"grad_norm": 10.549063851492853,
"learning_rate": 9.932481071189153e-06,
"loss": 3.0373470783233643,
"step": 429
},
{
"epoch": 0.4437564499484004,
"grad_norm": 8.449015598464936,
"learning_rate": 9.931494061242573e-06,
"loss": 2.8038151264190674,
"step": 430
},
{
"epoch": 0.44478844169246645,
"grad_norm": 8.938806234909366,
"learning_rate": 9.930499939116176e-06,
"loss": 2.8492302894592285,
"step": 431
},
{
"epoch": 0.4458204334365325,
"grad_norm": 15.990285713057496,
"learning_rate": 9.929498706243681e-06,
"loss": 3.027756929397583,
"step": 432
},
{
"epoch": 0.44685242518059853,
"grad_norm": 8.636867273214362,
"learning_rate": 9.928490364069061e-06,
"loss": 2.9714016914367676,
"step": 433
},
{
"epoch": 0.44788441692466463,
"grad_norm": 19.47087373298744,
"learning_rate": 9.927474914046543e-06,
"loss": 2.5500893592834473,
"step": 434
},
{
"epoch": 0.44891640866873067,
"grad_norm": 11.726357779590334,
"learning_rate": 9.926452357640606e-06,
"loss": 2.5415332317352295,
"step": 435
},
{
"epoch": 0.4499484004127967,
"grad_norm": 12.640200129799714,
"learning_rate": 9.925422696325976e-06,
"loss": 3.4519457817077637,
"step": 436
},
{
"epoch": 0.45098039215686275,
"grad_norm": 12.751227264561491,
"learning_rate": 9.924385931587625e-06,
"loss": 2.9185168743133545,
"step": 437
},
{
"epoch": 0.4520123839009288,
"grad_norm": 14.350642201270977,
"learning_rate": 9.923342064920771e-06,
"loss": 3.1892685890197754,
"step": 438
},
{
"epoch": 0.45304437564499483,
"grad_norm": 17.349109352530736,
"learning_rate": 9.922291097830876e-06,
"loss": 3.497467041015625,
"step": 439
},
{
"epoch": 0.4540763673890609,
"grad_norm": 8.882606386936544,
"learning_rate": 9.921233031833639e-06,
"loss": 2.8938982486724854,
"step": 440
},
{
"epoch": 0.4551083591331269,
"grad_norm": 12.00104895756575,
"learning_rate": 9.920167868454997e-06,
"loss": 2.568723201751709,
"step": 441
},
{
"epoch": 0.45614035087719296,
"grad_norm": 12.24197775502783,
"learning_rate": 9.919095609231125e-06,
"loss": 2.752882957458496,
"step": 442
},
{
"epoch": 0.45717234262125905,
"grad_norm": 10.373362572382495,
"learning_rate": 9.918016255708431e-06,
"loss": 2.4421474933624268,
"step": 443
},
{
"epoch": 0.4582043343653251,
"grad_norm": 10.161332440181559,
"learning_rate": 9.916929809443555e-06,
"loss": 2.9970295429229736,
"step": 444
},
{
"epoch": 0.45923632610939114,
"grad_norm": 9.34236510344498,
"learning_rate": 9.915836272003365e-06,
"loss": 3.05275559425354,
"step": 445
},
{
"epoch": 0.4602683178534572,
"grad_norm": 9.96739412039589,
"learning_rate": 9.914735644964955e-06,
"loss": 2.963543653488159,
"step": 446
},
{
"epoch": 0.4613003095975232,
"grad_norm": 18.452664752564154,
"learning_rate": 9.913627929915643e-06,
"loss": 3.117292642593384,
"step": 447
},
{
"epoch": 0.46233230134158926,
"grad_norm": 10.880262131715845,
"learning_rate": 9.912513128452974e-06,
"loss": 3.0254008769989014,
"step": 448
},
{
"epoch": 0.4633642930856553,
"grad_norm": 12.030161973072362,
"learning_rate": 9.911391242184709e-06,
"loss": 3.374577045440674,
"step": 449
},
{
"epoch": 0.46439628482972134,
"grad_norm": 10.04357223387812,
"learning_rate": 9.910262272728827e-06,
"loss": 3.246830463409424,
"step": 450
},
{
"epoch": 0.46542827657378744,
"grad_norm": 12.972716308863506,
"learning_rate": 9.909126221713523e-06,
"loss": 2.814098834991455,
"step": 451
},
{
"epoch": 0.4664602683178535,
"grad_norm": 15.11624739030851,
"learning_rate": 9.907983090777206e-06,
"loss": 2.862546920776367,
"step": 452
},
{
"epoch": 0.4674922600619195,
"grad_norm": 8.309522945339996,
"learning_rate": 9.906832881568495e-06,
"loss": 2.7970046997070312,
"step": 453
},
{
"epoch": 0.46852425180598556,
"grad_norm": 8.832127930749055,
"learning_rate": 9.905675595746214e-06,
"loss": 2.7871201038360596,
"step": 454
},
{
"epoch": 0.4695562435500516,
"grad_norm": 8.994774891105193,
"learning_rate": 9.9045112349794e-06,
"loss": 2.843500852584839,
"step": 455
},
{
"epoch": 0.47058823529411764,
"grad_norm": 9.434601040395169,
"learning_rate": 9.903339800947284e-06,
"loss": 2.8510613441467285,
"step": 456
},
{
"epoch": 0.4716202270381837,
"grad_norm": 13.487737414745894,
"learning_rate": 9.902161295339306e-06,
"loss": 2.999211311340332,
"step": 457
},
{
"epoch": 0.4726522187822497,
"grad_norm": 11.537976864473686,
"learning_rate": 9.900975719855103e-06,
"loss": 2.8772103786468506,
"step": 458
},
{
"epoch": 0.47368421052631576,
"grad_norm": 6.8020753807677625,
"learning_rate": 9.899783076204505e-06,
"loss": 2.6091156005859375,
"step": 459
},
{
"epoch": 0.47471620227038186,
"grad_norm": 13.189197539765583,
"learning_rate": 9.898583366107539e-06,
"loss": 3.0862064361572266,
"step": 460
},
{
"epoch": 0.4757481940144479,
"grad_norm": 12.492595777939266,
"learning_rate": 9.897376591294419e-06,
"loss": 3.177717685699463,
"step": 461
},
{
"epoch": 0.47678018575851394,
"grad_norm": 8.163142685905079,
"learning_rate": 9.896162753505554e-06,
"loss": 2.645430564880371,
"step": 462
},
{
"epoch": 0.47781217750258,
"grad_norm": 8.182725255858564,
"learning_rate": 9.894941854491533e-06,
"loss": 2.8407297134399414,
"step": 463
},
{
"epoch": 0.478844169246646,
"grad_norm": 14.663125083277464,
"learning_rate": 9.893713896013134e-06,
"loss": 3.1082820892333984,
"step": 464
},
{
"epoch": 0.47987616099071206,
"grad_norm": 14.128620127185377,
"learning_rate": 9.892478879841312e-06,
"loss": 2.8026363849639893,
"step": 465
},
{
"epoch": 0.4809081527347781,
"grad_norm": 11.79057657992109,
"learning_rate": 9.891236807757201e-06,
"loss": 3.1938395500183105,
"step": 466
},
{
"epoch": 0.48194014447884415,
"grad_norm": 20.61347482442454,
"learning_rate": 9.889987681552116e-06,
"loss": 2.888730049133301,
"step": 467
},
{
"epoch": 0.48297213622291024,
"grad_norm": 9.126516869757138,
"learning_rate": 9.888731503027535e-06,
"loss": 2.6922078132629395,
"step": 468
},
{
"epoch": 0.4840041279669763,
"grad_norm": 7.061744400580414,
"learning_rate": 9.88746827399512e-06,
"loss": 3.191213369369507,
"step": 469
},
{
"epoch": 0.4850361197110423,
"grad_norm": 21.416335420332835,
"learning_rate": 9.88619799627669e-06,
"loss": 3.262622833251953,
"step": 470
},
{
"epoch": 0.48606811145510836,
"grad_norm": 12.192570025373612,
"learning_rate": 9.884920671704236e-06,
"loss": 2.7565722465515137,
"step": 471
},
{
"epoch": 0.4871001031991744,
"grad_norm": 10.743234544996307,
"learning_rate": 9.883636302119911e-06,
"loss": 2.4965720176696777,
"step": 472
},
{
"epoch": 0.48813209494324045,
"grad_norm": 6.832289564849522,
"learning_rate": 9.882344889376025e-06,
"loss": 2.9415411949157715,
"step": 473
},
{
"epoch": 0.4891640866873065,
"grad_norm": 14.449776531612711,
"learning_rate": 9.881046435335051e-06,
"loss": 3.0663790702819824,
"step": 474
},
{
"epoch": 0.49019607843137253,
"grad_norm": 16.313898001218128,
"learning_rate": 9.879740941869611e-06,
"loss": 3.389232873916626,
"step": 475
},
{
"epoch": 0.49122807017543857,
"grad_norm": 28.323859556398297,
"learning_rate": 9.878428410862484e-06,
"loss": 2.3527965545654297,
"step": 476
},
{
"epoch": 0.49226006191950467,
"grad_norm": 16.000154869678152,
"learning_rate": 9.877108844206596e-06,
"loss": 2.9908742904663086,
"step": 477
},
{
"epoch": 0.4932920536635707,
"grad_norm": 8.010781348355478,
"learning_rate": 9.875782243805019e-06,
"loss": 2.6792373657226562,
"step": 478
},
{
"epoch": 0.49432404540763675,
"grad_norm": 15.59977702072723,
"learning_rate": 9.874448611570972e-06,
"loss": 2.5156683921813965,
"step": 479
},
{
"epoch": 0.4953560371517028,
"grad_norm": 20.37273895188341,
"learning_rate": 9.873107949427815e-06,
"loss": 3.209371566772461,
"step": 480
},
{
"epoch": 0.49638802889576883,
"grad_norm": 10.036822409380285,
"learning_rate": 9.871760259309043e-06,
"loss": 2.7667236328125,
"step": 481
},
{
"epoch": 0.49742002063983487,
"grad_norm": 9.753320357829859,
"learning_rate": 9.87040554315829e-06,
"loss": 1.9972810745239258,
"step": 482
},
{
"epoch": 0.4984520123839009,
"grad_norm": 10.272132278927545,
"learning_rate": 9.869043802929322e-06,
"loss": 2.9918630123138428,
"step": 483
},
{
"epoch": 0.49948400412796695,
"grad_norm": 7.60379661096887,
"learning_rate": 9.867675040586035e-06,
"loss": 2.4964921474456787,
"step": 484
},
{
"epoch": 0.500515995872033,
"grad_norm": 7.939425539221603,
"learning_rate": 9.866299258102452e-06,
"loss": 2.7059545516967773,
"step": 485
},
{
"epoch": 0.5015479876160991,
"grad_norm": 9.333404161929746,
"learning_rate": 9.864916457462718e-06,
"loss": 2.2699975967407227,
"step": 486
},
{
"epoch": 0.5025799793601651,
"grad_norm": 23.79614363690678,
"learning_rate": 9.863526640661107e-06,
"loss": 2.6487884521484375,
"step": 487
},
{
"epoch": 0.5036119711042312,
"grad_norm": 12.805837891171706,
"learning_rate": 9.862129809702006e-06,
"loss": 2.6330082416534424,
"step": 488
},
{
"epoch": 0.5046439628482973,
"grad_norm": 10.301925185020982,
"learning_rate": 9.860725966599915e-06,
"loss": 2.554631233215332,
"step": 489
},
{
"epoch": 0.5056759545923633,
"grad_norm": 37.98629229944275,
"learning_rate": 9.859315113379455e-06,
"loss": 3.3101139068603516,
"step": 490
},
{
"epoch": 0.5067079463364293,
"grad_norm": 7.326811124309165,
"learning_rate": 9.857897252075348e-06,
"loss": 2.6415815353393555,
"step": 491
},
{
"epoch": 0.5077399380804953,
"grad_norm": 15.800853756638167,
"learning_rate": 9.856472384732432e-06,
"loss": 3.3660855293273926,
"step": 492
},
{
"epoch": 0.5087719298245614,
"grad_norm": 17.332722973199935,
"learning_rate": 9.855040513405642e-06,
"loss": 3.0009288787841797,
"step": 493
},
{
"epoch": 0.5098039215686274,
"grad_norm": 10.309930138772124,
"learning_rate": 9.853601640160018e-06,
"loss": 2.8668737411499023,
"step": 494
},
{
"epoch": 0.5108359133126935,
"grad_norm": 9.955824429005219,
"learning_rate": 9.852155767070696e-06,
"loss": 2.70121169090271,
"step": 495
},
{
"epoch": 0.5118679050567595,
"grad_norm": 14.293014023725297,
"learning_rate": 9.850702896222908e-06,
"loss": 2.6304683685302734,
"step": 496
},
{
"epoch": 0.5128998968008256,
"grad_norm": 10.16377770776268,
"learning_rate": 9.84924302971198e-06,
"loss": 2.697707176208496,
"step": 497
},
{
"epoch": 0.5139318885448917,
"grad_norm": 17.313925209639383,
"learning_rate": 9.847776169643322e-06,
"loss": 3.2866413593292236,
"step": 498
},
{
"epoch": 0.5149638802889577,
"grad_norm": 13.041656264112651,
"learning_rate": 9.846302318132437e-06,
"loss": 2.739035129547119,
"step": 499
},
{
"epoch": 0.5159958720330238,
"grad_norm": 25.977909711367786,
"learning_rate": 9.844821477304904e-06,
"loss": 2.803387403488159,
"step": 500
},
{
"epoch": 0.5170278637770898,
"grad_norm": 11.25826916709719,
"learning_rate": 9.843333649296387e-06,
"loss": 3.2589616775512695,
"step": 501
},
{
"epoch": 0.5180598555211559,
"grad_norm": 10.61990540469556,
"learning_rate": 9.841838836252627e-06,
"loss": 2.7748382091522217,
"step": 502
},
{
"epoch": 0.5190918472652218,
"grad_norm": 12.272307422950709,
"learning_rate": 9.840337040329433e-06,
"loss": 2.3776865005493164,
"step": 503
},
{
"epoch": 0.5201238390092879,
"grad_norm": 15.454489179514884,
"learning_rate": 9.838828263692693e-06,
"loss": 3.4385757446289062,
"step": 504
},
{
"epoch": 0.5211558307533539,
"grad_norm": 10.462179292307349,
"learning_rate": 9.837312508518355e-06,
"loss": 2.903876543045044,
"step": 505
},
{
"epoch": 0.52218782249742,
"grad_norm": 9.146943786633209,
"learning_rate": 9.835789776992436e-06,
"loss": 2.5005409717559814,
"step": 506
},
{
"epoch": 0.5232198142414861,
"grad_norm": 12.200395261029715,
"learning_rate": 9.834260071311013e-06,
"loss": 2.875609874725342,
"step": 507
},
{
"epoch": 0.5242518059855521,
"grad_norm": 9.905509897971436,
"learning_rate": 9.832723393680222e-06,
"loss": 2.530484437942505,
"step": 508
},
{
"epoch": 0.5252837977296182,
"grad_norm": 14.932339890770075,
"learning_rate": 9.83117974631625e-06,
"loss": 3.1532692909240723,
"step": 509
},
{
"epoch": 0.5263157894736842,
"grad_norm": 11.517270882974753,
"learning_rate": 9.829629131445342e-06,
"loss": 3.3132500648498535,
"step": 510
},
{
"epoch": 0.5273477812177503,
"grad_norm": 10.007374901505056,
"learning_rate": 9.828071551303786e-06,
"loss": 3.384342670440674,
"step": 511
},
{
"epoch": 0.5283797729618163,
"grad_norm": 14.941295583083766,
"learning_rate": 9.826507008137919e-06,
"loss": 2.9540984630584717,
"step": 512
},
{
"epoch": 0.5294117647058824,
"grad_norm": 14.270089534040753,
"learning_rate": 9.824935504204118e-06,
"loss": 2.96120023727417,
"step": 513
},
{
"epoch": 0.5304437564499485,
"grad_norm": 9.60285781526636,
"learning_rate": 9.823357041768798e-06,
"loss": 2.6130409240722656,
"step": 514
},
{
"epoch": 0.5314757481940144,
"grad_norm": 17.84592940119971,
"learning_rate": 9.82177162310841e-06,
"loss": 3.43778657913208,
"step": 515
},
{
"epoch": 0.5325077399380805,
"grad_norm": 15.641732375671388,
"learning_rate": 9.820179250509442e-06,
"loss": 2.6619691848754883,
"step": 516
},
{
"epoch": 0.5335397316821465,
"grad_norm": 21.653610476568268,
"learning_rate": 9.818579926268406e-06,
"loss": 2.7543861865997314,
"step": 517
},
{
"epoch": 0.5345717234262126,
"grad_norm": 16.735112469367458,
"learning_rate": 9.81697365269184e-06,
"loss": 2.566066265106201,
"step": 518
},
{
"epoch": 0.5356037151702786,
"grad_norm": 9.928716464268096,
"learning_rate": 9.8153604320963e-06,
"loss": 2.8962950706481934,
"step": 519
},
{
"epoch": 0.5366357069143447,
"grad_norm": 7.929089403898088,
"learning_rate": 9.813740266808375e-06,
"loss": 2.634678363800049,
"step": 520
},
{
"epoch": 0.5376676986584107,
"grad_norm": 24.63429174148746,
"learning_rate": 9.812113159164654e-06,
"loss": 2.4516327381134033,
"step": 521
},
{
"epoch": 0.5386996904024768,
"grad_norm": 10.602652369185357,
"learning_rate": 9.810479111511748e-06,
"loss": 2.5806102752685547,
"step": 522
},
{
"epoch": 0.5397316821465429,
"grad_norm": 12.360924872138455,
"learning_rate": 9.80883812620627e-06,
"loss": 2.667391300201416,
"step": 523
},
{
"epoch": 0.5407636738906089,
"grad_norm": 11.213683273229366,
"learning_rate": 9.807190205614847e-06,
"loss": 2.6906814575195312,
"step": 524
},
{
"epoch": 0.541795665634675,
"grad_norm": 11.312769983405103,
"learning_rate": 9.805535352114097e-06,
"loss": 2.9441957473754883,
"step": 525
},
{
"epoch": 0.542827657378741,
"grad_norm": 11.414090265004733,
"learning_rate": 9.80387356809065e-06,
"loss": 3.042667865753174,
"step": 526
},
{
"epoch": 0.543859649122807,
"grad_norm": 24.401446685942332,
"learning_rate": 9.802204855941118e-06,
"loss": 3.2037558555603027,
"step": 527
},
{
"epoch": 0.544891640866873,
"grad_norm": 14.234503967293701,
"learning_rate": 9.800529218072112e-06,
"loss": 3.2438836097717285,
"step": 528
},
{
"epoch": 0.5459236326109391,
"grad_norm": 19.941491292892934,
"learning_rate": 9.79884665690023e-06,
"loss": 2.996436595916748,
"step": 529
},
{
"epoch": 0.5469556243550051,
"grad_norm": 17.39429990283107,
"learning_rate": 9.797157174852057e-06,
"loss": 3.27510404586792,
"step": 530
},
{
"epoch": 0.5479876160990712,
"grad_norm": 12.33719755107959,
"learning_rate": 9.795460774364153e-06,
"loss": 2.511228561401367,
"step": 531
},
{
"epoch": 0.5490196078431373,
"grad_norm": 12.39665151922383,
"learning_rate": 9.793757457883062e-06,
"loss": 2.7313032150268555,
"step": 532
},
{
"epoch": 0.5500515995872033,
"grad_norm": 10.597746898974815,
"learning_rate": 9.7920472278653e-06,
"loss": 3.2101497650146484,
"step": 533
},
{
"epoch": 0.5510835913312694,
"grad_norm": 14.269140980065217,
"learning_rate": 9.79033008677735e-06,
"loss": 3.409496307373047,
"step": 534
},
{
"epoch": 0.5521155830753354,
"grad_norm": 16.87834227035807,
"learning_rate": 9.788606037095672e-06,
"loss": 2.925279140472412,
"step": 535
},
{
"epoch": 0.5531475748194015,
"grad_norm": 12.890765154675877,
"learning_rate": 9.786875081306677e-06,
"loss": 2.770125389099121,
"step": 536
},
{
"epoch": 0.5541795665634675,
"grad_norm": 8.253261202188625,
"learning_rate": 9.785137221906744e-06,
"loss": 2.789903163909912,
"step": 537
},
{
"epoch": 0.5552115583075335,
"grad_norm": 12.283379998713215,
"learning_rate": 9.783392461402208e-06,
"loss": 2.7387213706970215,
"step": 538
},
{
"epoch": 0.5562435500515995,
"grad_norm": 9.957027587195357,
"learning_rate": 9.781640802309356e-06,
"loss": 2.5360267162323,
"step": 539
},
{
"epoch": 0.5572755417956656,
"grad_norm": 11.516373350987816,
"learning_rate": 9.779882247154419e-06,
"loss": 2.6707615852355957,
"step": 540
},
{
"epoch": 0.5583075335397317,
"grad_norm": 10.237296608456246,
"learning_rate": 9.778116798473581e-06,
"loss": 2.4297256469726562,
"step": 541
},
{
"epoch": 0.5593395252837977,
"grad_norm": 12.673097956290409,
"learning_rate": 9.776344458812964e-06,
"loss": 3.2397093772888184,
"step": 542
},
{
"epoch": 0.5603715170278638,
"grad_norm": 10.07521278832074,
"learning_rate": 9.774565230728628e-06,
"loss": 2.525027275085449,
"step": 543
},
{
"epoch": 0.5614035087719298,
"grad_norm": 10.57130148434467,
"learning_rate": 9.772779116786568e-06,
"loss": 2.574157238006592,
"step": 544
},
{
"epoch": 0.5624355005159959,
"grad_norm": 12.124957814722062,
"learning_rate": 9.770986119562714e-06,
"loss": 2.9182310104370117,
"step": 545
},
{
"epoch": 0.5634674922600619,
"grad_norm": 8.965146386103498,
"learning_rate": 9.769186241642912e-06,
"loss": 2.629945755004883,
"step": 546
},
{
"epoch": 0.564499484004128,
"grad_norm": 13.426207572012682,
"learning_rate": 9.767379485622943e-06,
"loss": 3.2727980613708496,
"step": 547
},
{
"epoch": 0.5655314757481941,
"grad_norm": 9.160205572760614,
"learning_rate": 9.765565854108503e-06,
"loss": 2.322061538696289,
"step": 548
},
{
"epoch": 0.56656346749226,
"grad_norm": 10.994969899026037,
"learning_rate": 9.763745349715202e-06,
"loss": 2.6975133419036865,
"step": 549
},
{
"epoch": 0.5675954592363261,
"grad_norm": 11.994858883920255,
"learning_rate": 9.761917975068564e-06,
"loss": 2.7160282135009766,
"step": 550
},
{
"epoch": 0.5686274509803921,
"grad_norm": 9.176623905737621,
"learning_rate": 9.760083732804022e-06,
"loss": 2.919581174850464,
"step": 551
},
{
"epoch": 0.5696594427244582,
"grad_norm": 9.35943521250712,
"learning_rate": 9.758242625566912e-06,
"loss": 2.583207130432129,
"step": 552
},
{
"epoch": 0.5706914344685242,
"grad_norm": 20.653688605226197,
"learning_rate": 9.75639465601247e-06,
"loss": 2.9091479778289795,
"step": 553
},
{
"epoch": 0.5717234262125903,
"grad_norm": 10.35374830404143,
"learning_rate": 9.754539826805829e-06,
"loss": 2.736656904220581,
"step": 554
},
{
"epoch": 0.5727554179566563,
"grad_norm": 8.128894062796498,
"learning_rate": 9.75267814062202e-06,
"loss": 2.8653979301452637,
"step": 555
},
{
"epoch": 0.5737874097007224,
"grad_norm": 11.714637714032797,
"learning_rate": 9.750809600145955e-06,
"loss": 2.816713809967041,
"step": 556
},
{
"epoch": 0.5748194014447885,
"grad_norm": 84.19076077455068,
"learning_rate": 9.748934208072436e-06,
"loss": 3.234724283218384,
"step": 557
},
{
"epoch": 0.5758513931888545,
"grad_norm": 7.046513409283844,
"learning_rate": 9.747051967106147e-06,
"loss": 2.851330041885376,
"step": 558
},
{
"epoch": 0.5768833849329206,
"grad_norm": 9.692926498760864,
"learning_rate": 9.745162879961647e-06,
"loss": 2.6581106185913086,
"step": 559
},
{
"epoch": 0.5779153766769866,
"grad_norm": 17.793573180527815,
"learning_rate": 9.743266949363368e-06,
"loss": 2.938344717025757,
"step": 560
},
{
"epoch": 0.5789473684210527,
"grad_norm": 15.080146501460122,
"learning_rate": 9.741364178045615e-06,
"loss": 2.7053351402282715,
"step": 561
},
{
"epoch": 0.5799793601651186,
"grad_norm": 13.655409599016627,
"learning_rate": 9.739454568752556e-06,
"loss": 3.4648630619049072,
"step": 562
},
{
"epoch": 0.5810113519091847,
"grad_norm": 9.057246477179403,
"learning_rate": 9.737538124238222e-06,
"loss": 2.716965436935425,
"step": 563
},
{
"epoch": 0.5820433436532507,
"grad_norm": 13.906963767108351,
"learning_rate": 9.735614847266502e-06,
"loss": 3.076453924179077,
"step": 564
},
{
"epoch": 0.5830753353973168,
"grad_norm": 15.381126441512325,
"learning_rate": 9.733684740611134e-06,
"loss": 3.1801598072052,
"step": 565
},
{
"epoch": 0.5841073271413829,
"grad_norm": 9.80808696167901,
"learning_rate": 9.731747807055713e-06,
"loss": 2.517850875854492,
"step": 566
},
{
"epoch": 0.5851393188854489,
"grad_norm": 9.828092721350625,
"learning_rate": 9.729804049393677e-06,
"loss": 3.1751341819763184,
"step": 567
},
{
"epoch": 0.586171310629515,
"grad_norm": 12.043242343137528,
"learning_rate": 9.727853470428301e-06,
"loss": 2.8291687965393066,
"step": 568
},
{
"epoch": 0.587203302373581,
"grad_norm": 23.52905268157183,
"learning_rate": 9.725896072972707e-06,
"loss": 3.2142086029052734,
"step": 569
},
{
"epoch": 0.5882352941176471,
"grad_norm": 10.451472602985962,
"learning_rate": 9.723931859849842e-06,
"loss": 2.8336706161499023,
"step": 570
},
{
"epoch": 0.5892672858617131,
"grad_norm": 10.202907257116511,
"learning_rate": 9.721960833892485e-06,
"loss": 2.876638412475586,
"step": 571
},
{
"epoch": 0.5902992776057792,
"grad_norm": 10.20554245805355,
"learning_rate": 9.719982997943245e-06,
"loss": 2.641693115234375,
"step": 572
},
{
"epoch": 0.5913312693498453,
"grad_norm": 8.864986065419998,
"learning_rate": 9.717998354854545e-06,
"loss": 3.199042797088623,
"step": 573
},
{
"epoch": 0.5923632610939112,
"grad_norm": 30.978260310733006,
"learning_rate": 9.716006907488629e-06,
"loss": 4.51623010635376,
"step": 574
},
{
"epoch": 0.5933952528379773,
"grad_norm": 14.544403846601538,
"learning_rate": 9.714008658717558e-06,
"loss": 3.3321194648742676,
"step": 575
},
{
"epoch": 0.5944272445820433,
"grad_norm": 15.335917515161709,
"learning_rate": 9.712003611423194e-06,
"loss": 2.279353618621826,
"step": 576
},
{
"epoch": 0.5954592363261094,
"grad_norm": 17.30521345042098,
"learning_rate": 9.709991768497208e-06,
"loss": 3.110170364379883,
"step": 577
},
{
"epoch": 0.5964912280701754,
"grad_norm": 13.0244368033095,
"learning_rate": 9.707973132841072e-06,
"loss": 3.0601935386657715,
"step": 578
},
{
"epoch": 0.5975232198142415,
"grad_norm": 17.783099723375052,
"learning_rate": 9.705947707366054e-06,
"loss": 2.3580307960510254,
"step": 579
},
{
"epoch": 0.5985552115583075,
"grad_norm": 11.222725042910128,
"learning_rate": 9.703915494993215e-06,
"loss": 2.43118953704834,
"step": 580
},
{
"epoch": 0.5995872033023736,
"grad_norm": 20.058317947787405,
"learning_rate": 9.701876498653402e-06,
"loss": 3.334968090057373,
"step": 581
},
{
"epoch": 0.6006191950464397,
"grad_norm": 16.270739740240145,
"learning_rate": 9.699830721287246e-06,
"loss": 2.9842143058776855,
"step": 582
},
{
"epoch": 0.6016511867905057,
"grad_norm": 11.484139834647996,
"learning_rate": 9.69777816584516e-06,
"loss": 2.535402297973633,
"step": 583
},
{
"epoch": 0.6026831785345718,
"grad_norm": 12.510489500637012,
"learning_rate": 9.695718835287328e-06,
"loss": 2.87693452835083,
"step": 584
},
{
"epoch": 0.6037151702786377,
"grad_norm": 7.376993753654825,
"learning_rate": 9.69365273258371e-06,
"loss": 3.2447357177734375,
"step": 585
},
{
"epoch": 0.6047471620227038,
"grad_norm": 9.555009770863176,
"learning_rate": 9.691579860714033e-06,
"loss": 2.7599704265594482,
"step": 586
},
{
"epoch": 0.6057791537667698,
"grad_norm": 10.740584867704138,
"learning_rate": 9.689500222667782e-06,
"loss": 3.2146971225738525,
"step": 587
},
{
"epoch": 0.6068111455108359,
"grad_norm": 7.871154664025923,
"learning_rate": 9.6874138214442e-06,
"loss": 2.678419589996338,
"step": 588
},
{
"epoch": 0.6078431372549019,
"grad_norm": 10.949030762287666,
"learning_rate": 9.685320660052286e-06,
"loss": 2.680488109588623,
"step": 589
},
{
"epoch": 0.608875128998968,
"grad_norm": 10.246851497890871,
"learning_rate": 9.683220741510793e-06,
"loss": 2.916963577270508,
"step": 590
},
{
"epoch": 0.6099071207430341,
"grad_norm": 18.75558796180508,
"learning_rate": 9.68111406884821e-06,
"loss": 3.0434699058532715,
"step": 591
},
{
"epoch": 0.6109391124871001,
"grad_norm": 15.779253328657708,
"learning_rate": 9.679000645102771e-06,
"loss": 4.273288726806641,
"step": 592
},
{
"epoch": 0.6119711042311662,
"grad_norm": 38.01520037711664,
"learning_rate": 9.676880473322452e-06,
"loss": 2.3172545433044434,
"step": 593
},
{
"epoch": 0.6130030959752322,
"grad_norm": 8.482705813777587,
"learning_rate": 9.67475355656495e-06,
"loss": 3.028085470199585,
"step": 594
},
{
"epoch": 0.6140350877192983,
"grad_norm": 7.786458035214588,
"learning_rate": 9.6726198978977e-06,
"loss": 2.7112159729003906,
"step": 595
},
{
"epoch": 0.6150670794633643,
"grad_norm": 7.547208333658604,
"learning_rate": 9.670479500397854e-06,
"loss": 2.4657599925994873,
"step": 596
},
{
"epoch": 0.6160990712074303,
"grad_norm": 17.276597522101635,
"learning_rate": 9.668332367152282e-06,
"loss": 2.915532350540161,
"step": 597
},
{
"epoch": 0.6171310629514963,
"grad_norm": 11.229378917400053,
"learning_rate": 9.666178501257573e-06,
"loss": 2.6651906967163086,
"step": 598
},
{
"epoch": 0.6181630546955624,
"grad_norm": 11.092754648834564,
"learning_rate": 9.664017905820021e-06,
"loss": 2.791090965270996,
"step": 599
},
{
"epoch": 0.6191950464396285,
"grad_norm": 8.366193431753203,
"learning_rate": 9.66185058395563e-06,
"loss": 2.9052670001983643,
"step": 600
},
{
"epoch": 0.6202270381836945,
"grad_norm": 9.084523683371547,
"learning_rate": 9.6596765387901e-06,
"loss": 2.0730695724487305,
"step": 601
},
{
"epoch": 0.6212590299277606,
"grad_norm": 18.34019433810752,
"learning_rate": 9.657495773458832e-06,
"loss": 2.617827892303467,
"step": 602
},
{
"epoch": 0.6222910216718266,
"grad_norm": 12.29576293105994,
"learning_rate": 9.655308291106915e-06,
"loss": 2.3276329040527344,
"step": 603
},
{
"epoch": 0.6233230134158927,
"grad_norm": 14.840325418207792,
"learning_rate": 9.653114094889128e-06,
"loss": 3.239396095275879,
"step": 604
},
{
"epoch": 0.6243550051599587,
"grad_norm": 9.56777055262741,
"learning_rate": 9.65091318796993e-06,
"loss": 2.225588321685791,
"step": 605
},
{
"epoch": 0.6253869969040248,
"grad_norm": 17.408599977515152,
"learning_rate": 9.64870557352346e-06,
"loss": 4.101033687591553,
"step": 606
},
{
"epoch": 0.6264189886480909,
"grad_norm": 12.6799104424994,
"learning_rate": 9.646491254733533e-06,
"loss": 3.09356689453125,
"step": 607
},
{
"epoch": 0.6274509803921569,
"grad_norm": 10.671217767975829,
"learning_rate": 9.644270234793625e-06,
"loss": 2.3732972145080566,
"step": 608
},
{
"epoch": 0.628482972136223,
"grad_norm": 37.52455438840146,
"learning_rate": 9.642042516906884e-06,
"loss": 2.3487610816955566,
"step": 609
},
{
"epoch": 0.6295149638802889,
"grad_norm": 9.04452611714464,
"learning_rate": 9.639808104286118e-06,
"loss": 2.822605609893799,
"step": 610
},
{
"epoch": 0.630546955624355,
"grad_norm": 7.354451143517812,
"learning_rate": 9.637567000153783e-06,
"loss": 2.935185194015503,
"step": 611
},
{
"epoch": 0.631578947368421,
"grad_norm": 7.332551011890475,
"learning_rate": 9.63531920774199e-06,
"loss": 2.9632339477539062,
"step": 612
},
{
"epoch": 0.6326109391124871,
"grad_norm": 15.10098765191982,
"learning_rate": 9.6330647302925e-06,
"loss": 2.4484119415283203,
"step": 613
},
{
"epoch": 0.6336429308565531,
"grad_norm": 7.87243647045685,
"learning_rate": 9.630803571056709e-06,
"loss": 2.841092109680176,
"step": 614
},
{
"epoch": 0.6346749226006192,
"grad_norm": 17.622392235945,
"learning_rate": 9.62853573329565e-06,
"loss": 3.339503526687622,
"step": 615
},
{
"epoch": 0.6357069143446853,
"grad_norm": 16.27308797381183,
"learning_rate": 9.62626122027999e-06,
"loss": 2.700296401977539,
"step": 616
},
{
"epoch": 0.6367389060887513,
"grad_norm": 15.908772084211096,
"learning_rate": 9.62398003529002e-06,
"loss": 2.5863516330718994,
"step": 617
},
{
"epoch": 0.6377708978328174,
"grad_norm": 7.9769771271762675,
"learning_rate": 9.621692181615657e-06,
"loss": 2.5605409145355225,
"step": 618
},
{
"epoch": 0.6388028895768834,
"grad_norm": 17.97539267026468,
"learning_rate": 9.619397662556434e-06,
"loss": 1.9493972063064575,
"step": 619
},
{
"epoch": 0.6398348813209495,
"grad_norm": 11.543738479270724,
"learning_rate": 9.617096481421498e-06,
"loss": 2.926856756210327,
"step": 620
},
{
"epoch": 0.6408668730650154,
"grad_norm": 19.54677543508397,
"learning_rate": 9.6147886415296e-06,
"loss": 3.1197805404663086,
"step": 621
},
{
"epoch": 0.6418988648090815,
"grad_norm": 7.376107725428685,
"learning_rate": 9.612474146209097e-06,
"loss": 2.92832612991333,
"step": 622
},
{
"epoch": 0.6429308565531475,
"grad_norm": 12.572511377197603,
"learning_rate": 9.610152998797946e-06,
"loss": 2.574267864227295,
"step": 623
},
{
"epoch": 0.6439628482972136,
"grad_norm": 10.73391934702385,
"learning_rate": 9.607825202643696e-06,
"loss": 2.7154815196990967,
"step": 624
},
{
"epoch": 0.6449948400412797,
"grad_norm": 25.74792746698063,
"learning_rate": 9.605490761103485e-06,
"loss": 3.2330567836761475,
"step": 625
},
{
"epoch": 0.6460268317853457,
"grad_norm": 8.879203460607043,
"learning_rate": 9.60314967754403e-06,
"loss": 2.8523783683776855,
"step": 626
},
{
"epoch": 0.6470588235294118,
"grad_norm": 20.30571917980229,
"learning_rate": 9.600801955341638e-06,
"loss": 3.3165574073791504,
"step": 627
},
{
"epoch": 0.6480908152734778,
"grad_norm": 9.494600192580265,
"learning_rate": 9.598447597882181e-06,
"loss": 2.7943115234375,
"step": 628
},
{
"epoch": 0.6491228070175439,
"grad_norm": 14.614501521119585,
"learning_rate": 9.596086608561105e-06,
"loss": 3.095608711242676,
"step": 629
},
{
"epoch": 0.6501547987616099,
"grad_norm": 11.426320902321967,
"learning_rate": 9.593718990783415e-06,
"loss": 3.028874397277832,
"step": 630
},
{
"epoch": 0.651186790505676,
"grad_norm": 9.551960197143908,
"learning_rate": 9.591344747963685e-06,
"loss": 2.879570245742798,
"step": 631
},
{
"epoch": 0.6522187822497421,
"grad_norm": 10.273847483188757,
"learning_rate": 9.588963883526033e-06,
"loss": 2.807219982147217,
"step": 632
},
{
"epoch": 0.653250773993808,
"grad_norm": 10.330349921653404,
"learning_rate": 9.586576400904135e-06,
"loss": 2.7486634254455566,
"step": 633
},
{
"epoch": 0.6542827657378741,
"grad_norm": 8.93371321322733,
"learning_rate": 9.584182303541205e-06,
"loss": 2.0055630207061768,
"step": 634
},
{
"epoch": 0.6553147574819401,
"grad_norm": 16.71564616123394,
"learning_rate": 9.581781594890002e-06,
"loss": 2.5305252075195312,
"step": 635
},
{
"epoch": 0.6563467492260062,
"grad_norm": 19.66138410376843,
"learning_rate": 9.579374278412819e-06,
"loss": 2.6674296855926514,
"step": 636
},
{
"epoch": 0.6573787409700722,
"grad_norm": 7.878556702040047,
"learning_rate": 9.576960357581475e-06,
"loss": 2.5877861976623535,
"step": 637
},
{
"epoch": 0.6584107327141383,
"grad_norm": 21.148865665043417,
"learning_rate": 9.574539835877316e-06,
"loss": 2.3358216285705566,
"step": 638
},
{
"epoch": 0.6594427244582043,
"grad_norm": 27.193231781342472,
"learning_rate": 9.572112716791214e-06,
"loss": 3.3498458862304688,
"step": 639
},
{
"epoch": 0.6604747162022704,
"grad_norm": 11.406472675869308,
"learning_rate": 9.569679003823542e-06,
"loss": 3.41530704498291,
"step": 640
},
{
"epoch": 0.6615067079463365,
"grad_norm": 8.61944457154882,
"learning_rate": 9.567238700484195e-06,
"loss": 2.8429856300354004,
"step": 641
},
{
"epoch": 0.6625386996904025,
"grad_norm": 8.399879511973081,
"learning_rate": 9.564791810292569e-06,
"loss": 2.522237777709961,
"step": 642
},
{
"epoch": 0.6635706914344686,
"grad_norm": 7.102928350396439,
"learning_rate": 9.562338336777556e-06,
"loss": 2.6794557571411133,
"step": 643
},
{
"epoch": 0.6646026831785345,
"grad_norm": 9.367704921473894,
"learning_rate": 9.559878283477546e-06,
"loss": 2.757814407348633,
"step": 644
},
{
"epoch": 0.6656346749226006,
"grad_norm": 11.857632395196745,
"learning_rate": 9.557411653940416e-06,
"loss": 1.3053548336029053,
"step": 645
},
{
"epoch": 0.6666666666666666,
"grad_norm": 8.264877117521738,
"learning_rate": 9.554938451723533e-06,
"loss": 2.681445837020874,
"step": 646
},
{
"epoch": 0.6676986584107327,
"grad_norm": 11.91672847995356,
"learning_rate": 9.552458680393732e-06,
"loss": 3.016225576400757,
"step": 647
},
{
"epoch": 0.6687306501547987,
"grad_norm": 13.22285021053636,
"learning_rate": 9.549972343527336e-06,
"loss": 2.237546920776367,
"step": 648
},
{
"epoch": 0.6697626418988648,
"grad_norm": 13.890816027625009,
"learning_rate": 9.547479444710125e-06,
"loss": 2.1632239818573,
"step": 649
},
{
"epoch": 0.6707946336429309,
"grad_norm": 13.968055221199034,
"learning_rate": 9.544979987537348e-06,
"loss": 2.8767595291137695,
"step": 650
},
{
"epoch": 0.6718266253869969,
"grad_norm": 10.572301879287217,
"learning_rate": 9.54247397561371e-06,
"loss": 3.2392611503601074,
"step": 651
},
{
"epoch": 0.672858617131063,
"grad_norm": 8.69083114193443,
"learning_rate": 9.539961412553375e-06,
"loss": 3.211719512939453,
"step": 652
},
{
"epoch": 0.673890608875129,
"grad_norm": 15.854699726954683,
"learning_rate": 9.537442301979947e-06,
"loss": 2.769451379776001,
"step": 653
},
{
"epoch": 0.6749226006191951,
"grad_norm": 9.123769838722273,
"learning_rate": 9.534916647526477e-06,
"loss": 2.761396646499634,
"step": 654
},
{
"epoch": 0.675954592363261,
"grad_norm": 15.534876797351453,
"learning_rate": 9.532384452835457e-06,
"loss": 2.5450034141540527,
"step": 655
},
{
"epoch": 0.6769865841073271,
"grad_norm": 19.84524192746136,
"learning_rate": 9.529845721558802e-06,
"loss": 2.847796678543091,
"step": 656
},
{
"epoch": 0.6780185758513931,
"grad_norm": 22.76040744388083,
"learning_rate": 9.527300457357862e-06,
"loss": 3.6643857955932617,
"step": 657
},
{
"epoch": 0.6790505675954592,
"grad_norm": 13.739512021459273,
"learning_rate": 9.524748663903408e-06,
"loss": 2.9481019973754883,
"step": 658
},
{
"epoch": 0.6800825593395253,
"grad_norm": 8.044318217381289,
"learning_rate": 9.52219034487562e-06,
"loss": 3.0928149223327637,
"step": 659
},
{
"epoch": 0.6811145510835913,
"grad_norm": 10.299202871378856,
"learning_rate": 9.5196255039641e-06,
"loss": 3.110884666442871,
"step": 660
},
{
"epoch": 0.6821465428276574,
"grad_norm": 9.923669131740587,
"learning_rate": 9.517054144867852e-06,
"loss": 2.8850812911987305,
"step": 661
},
{
"epoch": 0.6831785345717234,
"grad_norm": 8.06974951581709,
"learning_rate": 9.514476271295274e-06,
"loss": 3.0151658058166504,
"step": 662
},
{
"epoch": 0.6842105263157895,
"grad_norm": 10.70998071656242,
"learning_rate": 9.511891886964167e-06,
"loss": 2.7311177253723145,
"step": 663
},
{
"epoch": 0.6852425180598555,
"grad_norm": 21.25500131920381,
"learning_rate": 9.50930099560172e-06,
"loss": 2.797929286956787,
"step": 664
},
{
"epoch": 0.6862745098039216,
"grad_norm": 12.353219706840553,
"learning_rate": 9.506703600944504e-06,
"loss": 2.719322681427002,
"step": 665
},
{
"epoch": 0.6873065015479877,
"grad_norm": 9.37526977212679,
"learning_rate": 9.504099706738472e-06,
"loss": 2.0463595390319824,
"step": 666
},
{
"epoch": 0.6883384932920537,
"grad_norm": 12.137096463786149,
"learning_rate": 9.501489316738945e-06,
"loss": 2.929328441619873,
"step": 667
},
{
"epoch": 0.6893704850361198,
"grad_norm": 17.03028609691427,
"learning_rate": 9.498872434710624e-06,
"loss": 2.685189723968506,
"step": 668
},
{
"epoch": 0.6904024767801857,
"grad_norm": 11.813969893205455,
"learning_rate": 9.496249064427557e-06,
"loss": 2.399085521697998,
"step": 669
},
{
"epoch": 0.6914344685242518,
"grad_norm": 10.433948381325353,
"learning_rate": 9.493619209673164e-06,
"loss": 2.962214708328247,
"step": 670
},
{
"epoch": 0.6924664602683178,
"grad_norm": 10.924256879246604,
"learning_rate": 9.490982874240206e-06,
"loss": 2.994148015975952,
"step": 671
},
{
"epoch": 0.6934984520123839,
"grad_norm": 10.777410886774046,
"learning_rate": 9.488340061930797e-06,
"loss": 3.059415340423584,
"step": 672
},
{
"epoch": 0.6945304437564499,
"grad_norm": 30.990766907842698,
"learning_rate": 9.485690776556388e-06,
"loss": 2.916910409927368,
"step": 673
},
{
"epoch": 0.695562435500516,
"grad_norm": 15.468140122707947,
"learning_rate": 9.48303502193777e-06,
"loss": 2.9391396045684814,
"step": 674
},
{
"epoch": 0.6965944272445821,
"grad_norm": 14.007942004032593,
"learning_rate": 9.48037280190506e-06,
"loss": 3.11044979095459,
"step": 675
},
{
"epoch": 0.6976264189886481,
"grad_norm": 8.4366885996248,
"learning_rate": 9.477704120297698e-06,
"loss": 2.9738566875457764,
"step": 676
},
{
"epoch": 0.6986584107327142,
"grad_norm": 8.798963190200652,
"learning_rate": 9.475028980964447e-06,
"loss": 2.7472972869873047,
"step": 677
},
{
"epoch": 0.6996904024767802,
"grad_norm": 20.733085450347474,
"learning_rate": 9.472347387763382e-06,
"loss": 3.2058520317077637,
"step": 678
},
{
"epoch": 0.7007223942208463,
"grad_norm": 10.073546129911945,
"learning_rate": 9.469659344561886e-06,
"loss": 2.298720359802246,
"step": 679
},
{
"epoch": 0.7017543859649122,
"grad_norm": 9.078790896884078,
"learning_rate": 9.46696485523664e-06,
"loss": 3.1304984092712402,
"step": 680
},
{
"epoch": 0.7027863777089783,
"grad_norm": 18.362405319861182,
"learning_rate": 9.464263923673629e-06,
"loss": 2.650181293487549,
"step": 681
},
{
"epoch": 0.7038183694530443,
"grad_norm": 19.699414812583893,
"learning_rate": 9.461556553768124e-06,
"loss": 3.0006284713745117,
"step": 682
},
{
"epoch": 0.7048503611971104,
"grad_norm": 8.446438400335564,
"learning_rate": 9.458842749424682e-06,
"loss": 2.7441043853759766,
"step": 683
},
{
"epoch": 0.7058823529411765,
"grad_norm": 26.742234328847115,
"learning_rate": 9.45612251455714e-06,
"loss": 3.2146198749542236,
"step": 684
},
{
"epoch": 0.7069143446852425,
"grad_norm": 13.584554757600017,
"learning_rate": 9.453395853088609e-06,
"loss": 3.1827893257141113,
"step": 685
},
{
"epoch": 0.7079463364293086,
"grad_norm": 6.7388482588659695,
"learning_rate": 9.450662768951468e-06,
"loss": 2.9448766708374023,
"step": 686
},
{
"epoch": 0.7089783281733746,
"grad_norm": 11.127788465095271,
"learning_rate": 9.447923266087361e-06,
"loss": 3.136505603790283,
"step": 687
},
{
"epoch": 0.7100103199174407,
"grad_norm": 14.989815281265162,
"learning_rate": 9.445177348447187e-06,
"loss": 3.103586196899414,
"step": 688
},
{
"epoch": 0.7110423116615067,
"grad_norm": 14.83033037187033,
"learning_rate": 9.442425019991097e-06,
"loss": 2.857605218887329,
"step": 689
},
{
"epoch": 0.7120743034055728,
"grad_norm": 16.11429528022689,
"learning_rate": 9.439666284688486e-06,
"loss": 3.428617000579834,
"step": 690
},
{
"epoch": 0.7131062951496389,
"grad_norm": 10.602945033932661,
"learning_rate": 9.436901146517991e-06,
"loss": 2.8772683143615723,
"step": 691
},
{
"epoch": 0.7141382868937048,
"grad_norm": 8.166056489516002,
"learning_rate": 9.434129609467484e-06,
"loss": 2.5252692699432373,
"step": 692
},
{
"epoch": 0.7151702786377709,
"grad_norm": 9.608848273921893,
"learning_rate": 9.43135167753406e-06,
"loss": 2.7914929389953613,
"step": 693
},
{
"epoch": 0.7162022703818369,
"grad_norm": 9.334774312998807,
"learning_rate": 9.428567354724047e-06,
"loss": 2.8287720680236816,
"step": 694
},
{
"epoch": 0.717234262125903,
"grad_norm": 10.683021997652697,
"learning_rate": 9.425776645052977e-06,
"loss": 2.9784929752349854,
"step": 695
},
{
"epoch": 0.718266253869969,
"grad_norm": 22.0759217459894,
"learning_rate": 9.422979552545604e-06,
"loss": 2.3638341426849365,
"step": 696
},
{
"epoch": 0.7192982456140351,
"grad_norm": 17.421648915610042,
"learning_rate": 9.420176081235882e-06,
"loss": 3.194939374923706,
"step": 697
},
{
"epoch": 0.7203302373581011,
"grad_norm": 10.263567225837505,
"learning_rate": 9.417366235166962e-06,
"loss": 2.8977842330932617,
"step": 698
},
{
"epoch": 0.7213622291021672,
"grad_norm": 62.23893520113241,
"learning_rate": 9.414550018391197e-06,
"loss": 2.399980068206787,
"step": 699
},
{
"epoch": 0.7223942208462333,
"grad_norm": 43.01592458636682,
"learning_rate": 9.411727434970121e-06,
"loss": 2.2690107822418213,
"step": 700
},
{
"epoch": 0.7234262125902993,
"grad_norm": 9.09760035353303,
"learning_rate": 9.408898488974453e-06,
"loss": 1.8195793628692627,
"step": 701
},
{
"epoch": 0.7244582043343654,
"grad_norm": 20.82039863664745,
"learning_rate": 9.406063184484084e-06,
"loss": 3.671936273574829,
"step": 702
},
{
"epoch": 0.7254901960784313,
"grad_norm": 22.26006983128521,
"learning_rate": 9.40322152558808e-06,
"loss": 2.808476448059082,
"step": 703
},
{
"epoch": 0.7265221878224974,
"grad_norm": 12.786160596812799,
"learning_rate": 9.400373516384671e-06,
"loss": 3.3824217319488525,
"step": 704
},
{
"epoch": 0.7275541795665634,
"grad_norm": 13.227887605235063,
"learning_rate": 9.397519160981239e-06,
"loss": 3.272808313369751,
"step": 705
},
{
"epoch": 0.7285861713106295,
"grad_norm": 21.125694529711843,
"learning_rate": 9.394658463494328e-06,
"loss": 2.741298198699951,
"step": 706
},
{
"epoch": 0.7296181630546955,
"grad_norm": 9.597190596105376,
"learning_rate": 9.391791428049622e-06,
"loss": 2.583601951599121,
"step": 707
},
{
"epoch": 0.7306501547987616,
"grad_norm": 13.673553563405285,
"learning_rate": 9.388918058781947e-06,
"loss": 2.6293039321899414,
"step": 708
},
{
"epoch": 0.7316821465428277,
"grad_norm": 14.809515080215478,
"learning_rate": 9.386038359835265e-06,
"loss": 2.608527660369873,
"step": 709
},
{
"epoch": 0.7327141382868937,
"grad_norm": 20.749093596054855,
"learning_rate": 9.383152335362664e-06,
"loss": 3.150191307067871,
"step": 710
},
{
"epoch": 0.7337461300309598,
"grad_norm": 12.70554197774949,
"learning_rate": 9.380259989526358e-06,
"loss": 2.7562756538391113,
"step": 711
},
{
"epoch": 0.7347781217750258,
"grad_norm": 7.911315504138644,
"learning_rate": 9.377361326497673e-06,
"loss": 3.000636100769043,
"step": 712
},
{
"epoch": 0.7358101135190919,
"grad_norm": 18.777551104783374,
"learning_rate": 9.374456350457052e-06,
"loss": 3.5504672527313232,
"step": 713
},
{
"epoch": 0.7368421052631579,
"grad_norm": 15.56092683279138,
"learning_rate": 9.371545065594038e-06,
"loss": 2.754519462585449,
"step": 714
},
{
"epoch": 0.737874097007224,
"grad_norm": 14.257959884490688,
"learning_rate": 9.368627476107275e-06,
"loss": 3.421858310699463,
"step": 715
},
{
"epoch": 0.7389060887512899,
"grad_norm": 11.738492800659309,
"learning_rate": 9.365703586204495e-06,
"loss": 2.8414125442504883,
"step": 716
},
{
"epoch": 0.739938080495356,
"grad_norm": 7.684516652147754,
"learning_rate": 9.36277340010252e-06,
"loss": 2.511709690093994,
"step": 717
},
{
"epoch": 0.7409700722394221,
"grad_norm": 7.673149105995348,
"learning_rate": 9.359836922027255e-06,
"loss": 2.693768262863159,
"step": 718
},
{
"epoch": 0.7420020639834881,
"grad_norm": 12.645958239160871,
"learning_rate": 9.356894156213674e-06,
"loss": 2.4622414112091064,
"step": 719
},
{
"epoch": 0.7430340557275542,
"grad_norm": 10.063934028161487,
"learning_rate": 9.353945106905822e-06,
"loss": 2.7610793113708496,
"step": 720
},
{
"epoch": 0.7440660474716202,
"grad_norm": 28.87732507966042,
"learning_rate": 9.350989778356804e-06,
"loss": 2.6496834754943848,
"step": 721
},
{
"epoch": 0.7450980392156863,
"grad_norm": 16.04099161626254,
"learning_rate": 9.348028174828785e-06,
"loss": 2.8012263774871826,
"step": 722
},
{
"epoch": 0.7461300309597523,
"grad_norm": 18.863090632580292,
"learning_rate": 9.345060300592976e-06,
"loss": 2.623215675354004,
"step": 723
},
{
"epoch": 0.7471620227038184,
"grad_norm": 12.326353277057443,
"learning_rate": 9.342086159929629e-06,
"loss": 2.9217588901519775,
"step": 724
},
{
"epoch": 0.7481940144478845,
"grad_norm": 8.179220092438687,
"learning_rate": 9.339105757128042e-06,
"loss": 2.6204776763916016,
"step": 725
},
{
"epoch": 0.7492260061919505,
"grad_norm": 7.219516548076823,
"learning_rate": 9.336119096486538e-06,
"loss": 2.5338289737701416,
"step": 726
},
{
"epoch": 0.7502579979360166,
"grad_norm": 7.4288592447235064,
"learning_rate": 9.333126182312466e-06,
"loss": 2.7727599143981934,
"step": 727
},
{
"epoch": 0.7512899896800825,
"grad_norm": 8.727310813003266,
"learning_rate": 9.330127018922195e-06,
"loss": 2.846273899078369,
"step": 728
},
{
"epoch": 0.7523219814241486,
"grad_norm": 13.12477588101624,
"learning_rate": 9.327121610641102e-06,
"loss": 2.754586696624756,
"step": 729
},
{
"epoch": 0.7533539731682146,
"grad_norm": 7.191021167663748,
"learning_rate": 9.324109961803578e-06,
"loss": 2.8355979919433594,
"step": 730
},
{
"epoch": 0.7543859649122807,
"grad_norm": 9.763856623533446,
"learning_rate": 9.321092076753009e-06,
"loss": 2.9621317386627197,
"step": 731
},
{
"epoch": 0.7554179566563467,
"grad_norm": 14.955015033358134,
"learning_rate": 9.318067959841776e-06,
"loss": 3.0515987873077393,
"step": 732
},
{
"epoch": 0.7564499484004128,
"grad_norm": 12.869674986241153,
"learning_rate": 9.315037615431247e-06,
"loss": 3.477961540222168,
"step": 733
},
{
"epoch": 0.7574819401444789,
"grad_norm": 33.030189464635995,
"learning_rate": 9.312001047891772e-06,
"loss": 3.993717670440674,
"step": 734
},
{
"epoch": 0.7585139318885449,
"grad_norm": 11.639652244898626,
"learning_rate": 9.308958261602677e-06,
"loss": 3.2335915565490723,
"step": 735
},
{
"epoch": 0.759545923632611,
"grad_norm": 10.336777347057009,
"learning_rate": 9.305909260952255e-06,
"loss": 1.841538429260254,
"step": 736
},
{
"epoch": 0.760577915376677,
"grad_norm": 10.528463414308874,
"learning_rate": 9.302854050337761e-06,
"loss": 2.6561760902404785,
"step": 737
},
{
"epoch": 0.7616099071207431,
"grad_norm": 11.574953224265704,
"learning_rate": 9.299792634165407e-06,
"loss": 2.8268561363220215,
"step": 738
},
{
"epoch": 0.762641898864809,
"grad_norm": 10.342309275853001,
"learning_rate": 9.296725016850354e-06,
"loss": 2.692314624786377,
"step": 739
},
{
"epoch": 0.7636738906088751,
"grad_norm": 11.830732967318072,
"learning_rate": 9.29365120281671e-06,
"loss": 2.5520989894866943,
"step": 740
},
{
"epoch": 0.7647058823529411,
"grad_norm": 10.084933673579913,
"learning_rate": 9.290571196497511e-06,
"loss": 2.9406180381774902,
"step": 741
},
{
"epoch": 0.7657378740970072,
"grad_norm": 19.141054972203882,
"learning_rate": 9.287485002334732e-06,
"loss": 2.550870418548584,
"step": 742
},
{
"epoch": 0.7667698658410733,
"grad_norm": 12.137785304288148,
"learning_rate": 9.284392624779271e-06,
"loss": 2.932023763656616,
"step": 743
},
{
"epoch": 0.7678018575851393,
"grad_norm": 12.319251681417937,
"learning_rate": 9.28129406829094e-06,
"loss": 2.9180939197540283,
"step": 744
},
{
"epoch": 0.7688338493292054,
"grad_norm": 17.67587880952499,
"learning_rate": 9.278189337338461e-06,
"loss": 2.494802236557007,
"step": 745
},
{
"epoch": 0.7698658410732714,
"grad_norm": 17.043019392132898,
"learning_rate": 9.275078436399469e-06,
"loss": 2.905942440032959,
"step": 746
},
{
"epoch": 0.7708978328173375,
"grad_norm": 10.736717086056796,
"learning_rate": 9.27196136996049e-06,
"loss": 2.4120588302612305,
"step": 747
},
{
"epoch": 0.7719298245614035,
"grad_norm": 14.60363403718915,
"learning_rate": 9.268838142516943e-06,
"loss": 2.5547168254852295,
"step": 748
},
{
"epoch": 0.7729618163054696,
"grad_norm": 22.435801897090034,
"learning_rate": 9.265708758573135e-06,
"loss": 2.870389699935913,
"step": 749
},
{
"epoch": 0.7739938080495357,
"grad_norm": 11.20773230740112,
"learning_rate": 9.262573222642249e-06,
"loss": 3.0618457794189453,
"step": 750
},
{
"epoch": 0.7750257997936016,
"grad_norm": 37.50198419802748,
"learning_rate": 9.259431539246343e-06,
"loss": 2.7579920291900635,
"step": 751
},
{
"epoch": 0.7760577915376677,
"grad_norm": 8.921780765895253,
"learning_rate": 9.256283712916337e-06,
"loss": 3.263230323791504,
"step": 752
},
{
"epoch": 0.7770897832817337,
"grad_norm": 51.1999059027526,
"learning_rate": 9.253129748192016e-06,
"loss": 2.0034947395324707,
"step": 753
},
{
"epoch": 0.7781217750257998,
"grad_norm": 14.747805473660467,
"learning_rate": 9.249969649622013e-06,
"loss": 3.345240592956543,
"step": 754
},
{
"epoch": 0.7791537667698658,
"grad_norm": 16.75723048704436,
"learning_rate": 9.246803421763806e-06,
"loss": 3.153355598449707,
"step": 755
},
{
"epoch": 0.7801857585139319,
"grad_norm": 9.834318076373075,
"learning_rate": 9.24363106918372e-06,
"loss": 2.8869364261627197,
"step": 756
},
{
"epoch": 0.7812177502579979,
"grad_norm": 6.540745571633765,
"learning_rate": 9.240452596456906e-06,
"loss": 2.6238231658935547,
"step": 757
},
{
"epoch": 0.782249742002064,
"grad_norm": 11.314123597573413,
"learning_rate": 9.237268008167345e-06,
"loss": 2.992520570755005,
"step": 758
},
{
"epoch": 0.7832817337461301,
"grad_norm": 12.923401981866071,
"learning_rate": 9.234077308907836e-06,
"loss": 2.7931909561157227,
"step": 759
},
{
"epoch": 0.7843137254901961,
"grad_norm": 16.65804955953843,
"learning_rate": 9.230880503279991e-06,
"loss": 2.8888728618621826,
"step": 760
},
{
"epoch": 0.7853457172342622,
"grad_norm": 10.421526610811561,
"learning_rate": 9.227677595894231e-06,
"loss": 2.6950912475585938,
"step": 761
},
{
"epoch": 0.7863777089783281,
"grad_norm": 14.617189434664802,
"learning_rate": 9.224468591369774e-06,
"loss": 2.90584135055542,
"step": 762
},
{
"epoch": 0.7874097007223942,
"grad_norm": 8.58675097542137,
"learning_rate": 9.221253494334636e-06,
"loss": 2.7167434692382812,
"step": 763
},
{
"epoch": 0.7884416924664602,
"grad_norm": 16.15135541389231,
"learning_rate": 9.218032309425613e-06,
"loss": 2.9132118225097656,
"step": 764
},
{
"epoch": 0.7894736842105263,
"grad_norm": 19.154768503387693,
"learning_rate": 9.214805041288285e-06,
"loss": 3.2092108726501465,
"step": 765
},
{
"epoch": 0.7905056759545923,
"grad_norm": 7.661840634293321,
"learning_rate": 9.211571694577006e-06,
"loss": 2.8506178855895996,
"step": 766
},
{
"epoch": 0.7915376676986584,
"grad_norm": 8.591767633922984,
"learning_rate": 9.208332273954892e-06,
"loss": 2.538100481033325,
"step": 767
},
{
"epoch": 0.7925696594427245,
"grad_norm": 7.594303407760444,
"learning_rate": 9.205086784093823e-06,
"loss": 3.1037511825561523,
"step": 768
},
{
"epoch": 0.7936016511867905,
"grad_norm": 9.033662868228994,
"learning_rate": 9.201835229674432e-06,
"loss": 3.0107996463775635,
"step": 769
},
{
"epoch": 0.7946336429308566,
"grad_norm": 9.686825296599345,
"learning_rate": 9.198577615386095e-06,
"loss": 2.7799291610717773,
"step": 770
},
{
"epoch": 0.7956656346749226,
"grad_norm": 10.644985773301196,
"learning_rate": 9.195313945926931e-06,
"loss": 3.0491931438446045,
"step": 771
},
{
"epoch": 0.7966976264189887,
"grad_norm": 14.458806185486146,
"learning_rate": 9.19204422600379e-06,
"loss": 2.0217819213867188,
"step": 772
},
{
"epoch": 0.7977296181630547,
"grad_norm": 16.938469522017233,
"learning_rate": 9.188768460332246e-06,
"loss": 2.975667953491211,
"step": 773
},
{
"epoch": 0.7987616099071208,
"grad_norm": 13.266256845510885,
"learning_rate": 9.185486653636598e-06,
"loss": 3.2247445583343506,
"step": 774
},
{
"epoch": 0.7997936016511867,
"grad_norm": 14.677831907052576,
"learning_rate": 9.182198810649851e-06,
"loss": 2.5141379833221436,
"step": 775
},
{
"epoch": 0.8008255933952528,
"grad_norm": 15.422936627055057,
"learning_rate": 9.178904936113719e-06,
"loss": 2.432544231414795,
"step": 776
},
{
"epoch": 0.8018575851393189,
"grad_norm": 17.434839804300076,
"learning_rate": 9.175605034778615e-06,
"loss": 3.105786085128784,
"step": 777
},
{
"epoch": 0.8028895768833849,
"grad_norm": 15.705879400216705,
"learning_rate": 9.172299111403643e-06,
"loss": 2.8137431144714355,
"step": 778
},
{
"epoch": 0.803921568627451,
"grad_norm": 11.88271321999419,
"learning_rate": 9.168987170756586e-06,
"loss": 3.2288758754730225,
"step": 779
},
{
"epoch": 0.804953560371517,
"grad_norm": 13.066687170197715,
"learning_rate": 9.165669217613919e-06,
"loss": 2.6433792114257812,
"step": 780
},
{
"epoch": 0.8059855521155831,
"grad_norm": 9.733580696355473,
"learning_rate": 9.162345256760776e-06,
"loss": 2.9955153465270996,
"step": 781
},
{
"epoch": 0.8070175438596491,
"grad_norm": 8.965843698513766,
"learning_rate": 9.159015292990959e-06,
"loss": 2.703097343444824,
"step": 782
},
{
"epoch": 0.8080495356037152,
"grad_norm": 8.866794936332639,
"learning_rate": 9.155679331106926e-06,
"loss": 2.5528955459594727,
"step": 783
},
{
"epoch": 0.8090815273477813,
"grad_norm": 21.82383745566159,
"learning_rate": 9.152337375919792e-06,
"loss": 2.804586887359619,
"step": 784
},
{
"epoch": 0.8101135190918473,
"grad_norm": 18.653243489377278,
"learning_rate": 9.148989432249305e-06,
"loss": 2.9573066234588623,
"step": 785
},
{
"epoch": 0.8111455108359134,
"grad_norm": 24.105591024363626,
"learning_rate": 9.145635504923858e-06,
"loss": 2.6056911945343018,
"step": 786
},
{
"epoch": 0.8121775025799793,
"grad_norm": 7.220666512171809,
"learning_rate": 9.142275598780473e-06,
"loss": 2.29858660697937,
"step": 787
},
{
"epoch": 0.8132094943240454,
"grad_norm": 12.514739396755783,
"learning_rate": 9.138909718664788e-06,
"loss": 2.4496383666992188,
"step": 788
},
{
"epoch": 0.8142414860681114,
"grad_norm": 8.833426493794931,
"learning_rate": 9.135537869431064e-06,
"loss": 2.2068653106689453,
"step": 789
},
{
"epoch": 0.8152734778121775,
"grad_norm": 9.530428925497914,
"learning_rate": 9.132160055942165e-06,
"loss": 2.7950196266174316,
"step": 790
},
{
"epoch": 0.8163054695562435,
"grad_norm": 9.98582960206115,
"learning_rate": 9.128776283069562e-06,
"loss": 2.9262535572052,
"step": 791
},
{
"epoch": 0.8173374613003096,
"grad_norm": 12.900268842724707,
"learning_rate": 9.125386555693316e-06,
"loss": 2.8006927967071533,
"step": 792
},
{
"epoch": 0.8183694530443757,
"grad_norm": 18.760962708945037,
"learning_rate": 9.12199087870208e-06,
"loss": 3.037771224975586,
"step": 793
},
{
"epoch": 0.8194014447884417,
"grad_norm": 16.019875579967447,
"learning_rate": 9.118589256993082e-06,
"loss": 3.1076667308807373,
"step": 794
},
{
"epoch": 0.8204334365325078,
"grad_norm": 10.999401596643924,
"learning_rate": 9.115181695472129e-06,
"loss": 2.5742170810699463,
"step": 795
},
{
"epoch": 0.8214654282765738,
"grad_norm": 29.56667660008175,
"learning_rate": 9.111768199053588e-06,
"loss": 3.0342397689819336,
"step": 796
},
{
"epoch": 0.8224974200206399,
"grad_norm": 29.123927498016464,
"learning_rate": 9.108348772660393e-06,
"loss": 2.8449482917785645,
"step": 797
},
{
"epoch": 0.8235294117647058,
"grad_norm": 9.976520193375213,
"learning_rate": 9.104923421224026e-06,
"loss": 2.592133045196533,
"step": 798
},
{
"epoch": 0.8245614035087719,
"grad_norm": 14.0583828864681,
"learning_rate": 9.10149214968451e-06,
"loss": 2.886117458343506,
"step": 799
},
{
"epoch": 0.8255933952528379,
"grad_norm": 7.531701327580056,
"learning_rate": 9.098054962990415e-06,
"loss": 3.031280279159546,
"step": 800
},
{
"epoch": 0.826625386996904,
"grad_norm": 19.666778618237757,
"learning_rate": 9.094611866098835e-06,
"loss": 2.9728336334228516,
"step": 801
},
{
"epoch": 0.8276573787409701,
"grad_norm": 22.18837308676401,
"learning_rate": 9.09116286397539e-06,
"loss": 2.537578582763672,
"step": 802
},
{
"epoch": 0.8286893704850361,
"grad_norm": 14.866057727210325,
"learning_rate": 9.087707961594216e-06,
"loss": 2.3121652603149414,
"step": 803
},
{
"epoch": 0.8297213622291022,
"grad_norm": 6.831833832253876,
"learning_rate": 9.084247163937959e-06,
"loss": 2.6835837364196777,
"step": 804
},
{
"epoch": 0.8307533539731682,
"grad_norm": 6.98251442833819,
"learning_rate": 9.080780475997767e-06,
"loss": 2.783241033554077,
"step": 805
},
{
"epoch": 0.8317853457172343,
"grad_norm": 7.313666585431364,
"learning_rate": 9.077307902773283e-06,
"loss": 2.0943210124969482,
"step": 806
},
{
"epoch": 0.8328173374613003,
"grad_norm": 10.593290702908797,
"learning_rate": 9.073829449272635e-06,
"loss": 2.4133317470550537,
"step": 807
},
{
"epoch": 0.8338493292053664,
"grad_norm": 9.71450579879684,
"learning_rate": 9.070345120512436e-06,
"loss": 2.853483200073242,
"step": 808
},
{
"epoch": 0.8348813209494325,
"grad_norm": 20.568941834325773,
"learning_rate": 9.066854921517769e-06,
"loss": 3.0913448333740234,
"step": 809
},
{
"epoch": 0.8359133126934984,
"grad_norm": 13.234746631937494,
"learning_rate": 9.063358857322186e-06,
"loss": 2.83183217048645,
"step": 810
},
{
"epoch": 0.8369453044375645,
"grad_norm": 36.08377425601343,
"learning_rate": 9.059856932967693e-06,
"loss": 4.695616245269775,
"step": 811
},
{
"epoch": 0.8379772961816305,
"grad_norm": 20.79566564325163,
"learning_rate": 9.056349153504753e-06,
"loss": 2.8295822143554688,
"step": 812
},
{
"epoch": 0.8390092879256966,
"grad_norm": 13.818243787580341,
"learning_rate": 9.052835523992272e-06,
"loss": 3.0946226119995117,
"step": 813
},
{
"epoch": 0.8400412796697626,
"grad_norm": 13.897038800922802,
"learning_rate": 9.049316049497587e-06,
"loss": 2.604569673538208,
"step": 814
},
{
"epoch": 0.8410732714138287,
"grad_norm": 9.520643622186595,
"learning_rate": 9.045790735096471e-06,
"loss": 1.6071749925613403,
"step": 815
},
{
"epoch": 0.8421052631578947,
"grad_norm": 17.614931222808963,
"learning_rate": 9.042259585873119e-06,
"loss": 3.564199447631836,
"step": 816
},
{
"epoch": 0.8431372549019608,
"grad_norm": 15.952214274549668,
"learning_rate": 9.03872260692014e-06,
"loss": 2.104039192199707,
"step": 817
},
{
"epoch": 0.8441692466460269,
"grad_norm": 24.93531895357169,
"learning_rate": 9.035179803338548e-06,
"loss": 2.5762863159179688,
"step": 818
},
{
"epoch": 0.8452012383900929,
"grad_norm": 11.328967064867303,
"learning_rate": 9.031631180237759e-06,
"loss": 3.1360249519348145,
"step": 819
},
{
"epoch": 0.846233230134159,
"grad_norm": 8.769885949562484,
"learning_rate": 9.028076742735583e-06,
"loss": 3.7183291912078857,
"step": 820
},
{
"epoch": 0.847265221878225,
"grad_norm": 14.997316368327947,
"learning_rate": 9.024516495958216e-06,
"loss": 2.3527259826660156,
"step": 821
},
{
"epoch": 0.848297213622291,
"grad_norm": 20.600644268125738,
"learning_rate": 9.020950445040227e-06,
"loss": 2.992480754852295,
"step": 822
},
{
"epoch": 0.849329205366357,
"grad_norm": 13.053281780210115,
"learning_rate": 9.017378595124564e-06,
"loss": 3.188293218612671,
"step": 823
},
{
"epoch": 0.8503611971104231,
"grad_norm": 9.553879881471575,
"learning_rate": 9.013800951362532e-06,
"loss": 2.5585105419158936,
"step": 824
},
{
"epoch": 0.8513931888544891,
"grad_norm": 9.68787522067668,
"learning_rate": 9.010217518913793e-06,
"loss": 2.2665152549743652,
"step": 825
},
{
"epoch": 0.8524251805985552,
"grad_norm": 16.14020671978396,
"learning_rate": 9.00662830294636e-06,
"loss": 2.4879815578460693,
"step": 826
},
{
"epoch": 0.8534571723426213,
"grad_norm": 6.4954863969066645,
"learning_rate": 9.00303330863658e-06,
"loss": 2.7422685623168945,
"step": 827
},
{
"epoch": 0.8544891640866873,
"grad_norm": 15.10528478082292,
"learning_rate": 8.999432541169145e-06,
"loss": 2.557654619216919,
"step": 828
},
{
"epoch": 0.8555211558307534,
"grad_norm": 8.986901556021547,
"learning_rate": 8.995826005737063e-06,
"loss": 2.8331565856933594,
"step": 829
},
{
"epoch": 0.8565531475748194,
"grad_norm": 7.2493056927751764,
"learning_rate": 8.992213707541666e-06,
"loss": 3.2016687393188477,
"step": 830
},
{
"epoch": 0.8575851393188855,
"grad_norm": 10.894871102064524,
"learning_rate": 8.988595651792594e-06,
"loss": 2.877350091934204,
"step": 831
},
{
"epoch": 0.8586171310629515,
"grad_norm": 9.323882331158536,
"learning_rate": 8.984971843707787e-06,
"loss": 2.4523348808288574,
"step": 832
},
{
"epoch": 0.8596491228070176,
"grad_norm": 16.354976278586676,
"learning_rate": 8.981342288513496e-06,
"loss": 3.012882709503174,
"step": 833
},
{
"epoch": 0.8606811145510835,
"grad_norm": 19.164383219243817,
"learning_rate": 8.977706991444242e-06,
"loss": 2.7856948375701904,
"step": 834
},
{
"epoch": 0.8617131062951496,
"grad_norm": 10.40447845595958,
"learning_rate": 8.974065957742837e-06,
"loss": 2.9169929027557373,
"step": 835
},
{
"epoch": 0.8627450980392157,
"grad_norm": 8.940861320158776,
"learning_rate": 8.970419192660366e-06,
"loss": 3.0807642936706543,
"step": 836
},
{
"epoch": 0.8637770897832817,
"grad_norm": 9.175133924656317,
"learning_rate": 8.966766701456177e-06,
"loss": 3.258143901824951,
"step": 837
},
{
"epoch": 0.8648090815273478,
"grad_norm": 8.944273683325898,
"learning_rate": 8.963108489397875e-06,
"loss": 2.918156147003174,
"step": 838
},
{
"epoch": 0.8658410732714138,
"grad_norm": 9.104926562136558,
"learning_rate": 8.959444561761324e-06,
"loss": 2.6537275314331055,
"step": 839
},
{
"epoch": 0.8668730650154799,
"grad_norm": 13.03635629491526,
"learning_rate": 8.955774923830618e-06,
"loss": 2.762986183166504,
"step": 840
},
{
"epoch": 0.8679050567595459,
"grad_norm": 11.774222353342637,
"learning_rate": 8.952099580898096e-06,
"loss": 3.0888192653656006,
"step": 841
},
{
"epoch": 0.868937048503612,
"grad_norm": 8.07404672692578,
"learning_rate": 8.948418538264323e-06,
"loss": 2.560690402984619,
"step": 842
},
{
"epoch": 0.8699690402476781,
"grad_norm": 13.903023771407138,
"learning_rate": 8.944731801238083e-06,
"loss": 2.9453420639038086,
"step": 843
},
{
"epoch": 0.8710010319917441,
"grad_norm": 13.19580799047363,
"learning_rate": 8.94103937513637e-06,
"loss": 2.3148603439331055,
"step": 844
},
{
"epoch": 0.8720330237358102,
"grad_norm": 13.8684670671641,
"learning_rate": 8.93734126528439e-06,
"loss": 3.250168800354004,
"step": 845
},
{
"epoch": 0.8730650154798761,
"grad_norm": 13.64497681612564,
"learning_rate": 8.933637477015537e-06,
"loss": 2.5707225799560547,
"step": 846
},
{
"epoch": 0.8740970072239422,
"grad_norm": 7.418807578985766,
"learning_rate": 8.929928015671401e-06,
"loss": 2.7249460220336914,
"step": 847
},
{
"epoch": 0.8751289989680082,
"grad_norm": 10.36606069634186,
"learning_rate": 8.92621288660175e-06,
"loss": 2.62217116355896,
"step": 848
},
{
"epoch": 0.8761609907120743,
"grad_norm": 11.539302627011725,
"learning_rate": 8.922492095164527e-06,
"loss": 2.7988717555999756,
"step": 849
},
{
"epoch": 0.8771929824561403,
"grad_norm": 7.625280626172235,
"learning_rate": 8.918765646725845e-06,
"loss": 2.7753400802612305,
"step": 850
},
{
"epoch": 0.8782249742002064,
"grad_norm": 7.584634902926666,
"learning_rate": 8.915033546659967e-06,
"loss": 2.5195202827453613,
"step": 851
},
{
"epoch": 0.8792569659442725,
"grad_norm": 9.294927755289535,
"learning_rate": 8.911295800349316e-06,
"loss": 2.447134256362915,
"step": 852
},
{
"epoch": 0.8802889576883385,
"grad_norm": 8.842077696389328,
"learning_rate": 8.907552413184452e-06,
"loss": 2.7952306270599365,
"step": 853
},
{
"epoch": 0.8813209494324046,
"grad_norm": 9.10276076863987,
"learning_rate": 8.903803390564072e-06,
"loss": 3.282439708709717,
"step": 854
},
{
"epoch": 0.8823529411764706,
"grad_norm": 7.0807723439984,
"learning_rate": 8.900048737895003e-06,
"loss": 2.9339287281036377,
"step": 855
},
{
"epoch": 0.8833849329205367,
"grad_norm": 16.063943118954818,
"learning_rate": 8.896288460592187e-06,
"loss": 2.1572723388671875,
"step": 856
},
{
"epoch": 0.8844169246646026,
"grad_norm": 13.46757538425171,
"learning_rate": 8.892522564078678e-06,
"loss": 3.067239761352539,
"step": 857
},
{
"epoch": 0.8854489164086687,
"grad_norm": 12.986747589632044,
"learning_rate": 8.88875105378564e-06,
"loss": 2.712153196334839,
"step": 858
},
{
"epoch": 0.8864809081527347,
"grad_norm": 23.618370549207103,
"learning_rate": 8.884973935152327e-06,
"loss": 3.7660300731658936,
"step": 859
},
{
"epoch": 0.8875128998968008,
"grad_norm": 10.310150562761834,
"learning_rate": 8.881191213626084e-06,
"loss": 2.520407199859619,
"step": 860
},
{
"epoch": 0.8885448916408669,
"grad_norm": 13.958521350174198,
"learning_rate": 8.877402894662334e-06,
"loss": 3.23856520652771,
"step": 861
},
{
"epoch": 0.8895768833849329,
"grad_norm": 18.806192980822665,
"learning_rate": 8.87360898372458e-06,
"loss": 2.57643461227417,
"step": 862
},
{
"epoch": 0.890608875128999,
"grad_norm": 19.548747147689593,
"learning_rate": 8.869809486284377e-06,
"loss": 2.895467758178711,
"step": 863
},
{
"epoch": 0.891640866873065,
"grad_norm": 6.280566713179624,
"learning_rate": 8.86600440782135e-06,
"loss": 2.423025608062744,
"step": 864
},
{
"epoch": 0.8926728586171311,
"grad_norm": 19.11416840071934,
"learning_rate": 8.862193753823164e-06,
"loss": 2.310001850128174,
"step": 865
},
{
"epoch": 0.8937048503611971,
"grad_norm": 9.654187002962859,
"learning_rate": 8.858377529785529e-06,
"loss": 2.8082315921783447,
"step": 866
},
{
"epoch": 0.8947368421052632,
"grad_norm": 8.32153198104575,
"learning_rate": 8.854555741212184e-06,
"loss": 3.2019920349121094,
"step": 867
},
{
"epoch": 0.8957688338493293,
"grad_norm": 13.34983541223451,
"learning_rate": 8.850728393614903e-06,
"loss": 2.907245397567749,
"step": 868
},
{
"epoch": 0.8968008255933952,
"grad_norm": 12.955888022748075,
"learning_rate": 8.846895492513465e-06,
"loss": 2.7559356689453125,
"step": 869
},
{
"epoch": 0.8978328173374613,
"grad_norm": 15.360527468351282,
"learning_rate": 8.843057043435666e-06,
"loss": 1.923850178718567,
"step": 870
},
{
"epoch": 0.8988648090815273,
"grad_norm": 10.388487298597354,
"learning_rate": 8.8392130519173e-06,
"loss": 2.542006492614746,
"step": 871
},
{
"epoch": 0.8998968008255934,
"grad_norm": 51.63958659191007,
"learning_rate": 8.835363523502154e-06,
"loss": 3.031914234161377,
"step": 872
},
{
"epoch": 0.9009287925696594,
"grad_norm": 7.615013687284186,
"learning_rate": 8.831508463742004e-06,
"loss": 2.4690163135528564,
"step": 873
},
{
"epoch": 0.9019607843137255,
"grad_norm": 13.23651290719279,
"learning_rate": 8.8276478781966e-06,
"loss": 2.684204339981079,
"step": 874
},
{
"epoch": 0.9029927760577915,
"grad_norm": 14.717871417125446,
"learning_rate": 8.823781772433664e-06,
"loss": 2.8591437339782715,
"step": 875
},
{
"epoch": 0.9040247678018576,
"grad_norm": 13.566775670141093,
"learning_rate": 8.819910152028872e-06,
"loss": 4.115355491638184,
"step": 876
},
{
"epoch": 0.9050567595459237,
"grad_norm": 9.568980566918885,
"learning_rate": 8.816033022565864e-06,
"loss": 2.0554182529449463,
"step": 877
},
{
"epoch": 0.9060887512899897,
"grad_norm": 16.0602903812488,
"learning_rate": 8.812150389636216e-06,
"loss": 3.273054361343384,
"step": 878
},
{
"epoch": 0.9071207430340558,
"grad_norm": 8.207226227741794,
"learning_rate": 8.808262258839448e-06,
"loss": 3.1750121116638184,
"step": 879
},
{
"epoch": 0.9081527347781218,
"grad_norm": 11.382393925896498,
"learning_rate": 8.804368635783002e-06,
"loss": 2.556485414505005,
"step": 880
},
{
"epoch": 0.9091847265221878,
"grad_norm": 14.683025935907603,
"learning_rate": 8.80046952608225e-06,
"loss": 2.406846523284912,
"step": 881
},
{
"epoch": 0.9102167182662538,
"grad_norm": 7.074443534407077,
"learning_rate": 8.796564935360465e-06,
"loss": 3.083665609359741,
"step": 882
},
{
"epoch": 0.9112487100103199,
"grad_norm": 13.03835762728328,
"learning_rate": 8.792654869248835e-06,
"loss": 2.1653966903686523,
"step": 883
},
{
"epoch": 0.9122807017543859,
"grad_norm": 16.27216423441271,
"learning_rate": 8.788739333386443e-06,
"loss": 2.123908281326294,
"step": 884
},
{
"epoch": 0.913312693498452,
"grad_norm": 10.41574771229284,
"learning_rate": 8.78481833342025e-06,
"loss": 2.6915431022644043,
"step": 885
},
{
"epoch": 0.9143446852425181,
"grad_norm": 22.13468429161606,
"learning_rate": 8.780891875005116e-06,
"loss": 3.5780766010284424,
"step": 886
},
{
"epoch": 0.9153766769865841,
"grad_norm": 12.878815073539679,
"learning_rate": 8.776959963803754e-06,
"loss": 2.8188726902008057,
"step": 887
},
{
"epoch": 0.9164086687306502,
"grad_norm": 18.065810992789128,
"learning_rate": 8.773022605486755e-06,
"loss": 2.6013312339782715,
"step": 888
},
{
"epoch": 0.9174406604747162,
"grad_norm": 13.15001228695871,
"learning_rate": 8.769079805732557e-06,
"loss": 3.426697254180908,
"step": 889
},
{
"epoch": 0.9184726522187823,
"grad_norm": 9.92878291032441,
"learning_rate": 8.765131570227452e-06,
"loss": 2.7974376678466797,
"step": 890
},
{
"epoch": 0.9195046439628483,
"grad_norm": 10.3135415025846,
"learning_rate": 8.761177904665566e-06,
"loss": 2.6423161029815674,
"step": 891
},
{
"epoch": 0.9205366357069144,
"grad_norm": 17.41217869324895,
"learning_rate": 8.75721881474886e-06,
"loss": 2.153823137283325,
"step": 892
},
{
"epoch": 0.9215686274509803,
"grad_norm": 8.579722943441634,
"learning_rate": 8.753254306187119e-06,
"loss": 2.7427873611450195,
"step": 893
},
{
"epoch": 0.9226006191950464,
"grad_norm": 7.699573500485071,
"learning_rate": 8.749284384697935e-06,
"loss": 2.511962890625,
"step": 894
},
{
"epoch": 0.9236326109391125,
"grad_norm": 11.268034755079555,
"learning_rate": 8.745309056006717e-06,
"loss": 2.595938205718994,
"step": 895
},
{
"epoch": 0.9246646026831785,
"grad_norm": 10.003038343239702,
"learning_rate": 8.741328325846663e-06,
"loss": 2.515320062637329,
"step": 896
},
{
"epoch": 0.9256965944272446,
"grad_norm": 11.981229081636926,
"learning_rate": 8.73734219995877e-06,
"loss": 2.6038193702697754,
"step": 897
},
{
"epoch": 0.9267285861713106,
"grad_norm": 8.241540490814092,
"learning_rate": 8.733350684091806e-06,
"loss": 2.2989137172698975,
"step": 898
},
{
"epoch": 0.9277605779153767,
"grad_norm": 23.262335469826844,
"learning_rate": 8.729353784002323e-06,
"loss": 3.0240113735198975,
"step": 899
},
{
"epoch": 0.9287925696594427,
"grad_norm": 8.095825962933413,
"learning_rate": 8.725351505454631e-06,
"loss": 2.9244229793548584,
"step": 900
},
{
"epoch": 0.9298245614035088,
"grad_norm": 11.340469002970913,
"learning_rate": 8.721343854220801e-06,
"loss": 2.842160224914551,
"step": 901
},
{
"epoch": 0.9308565531475749,
"grad_norm": 11.539814264733689,
"learning_rate": 8.71733083608065e-06,
"loss": 2.8664965629577637,
"step": 902
},
{
"epoch": 0.9318885448916409,
"grad_norm": 18.287543283260945,
"learning_rate": 8.713312456821734e-06,
"loss": 3.327446460723877,
"step": 903
},
{
"epoch": 0.932920536635707,
"grad_norm": 12.875215844396848,
"learning_rate": 8.709288722239345e-06,
"loss": 3.116183042526245,
"step": 904
},
{
"epoch": 0.9339525283797729,
"grad_norm": 8.275011313688877,
"learning_rate": 8.705259638136492e-06,
"loss": 2.486152410507202,
"step": 905
},
{
"epoch": 0.934984520123839,
"grad_norm": 16.24739963302216,
"learning_rate": 8.701225210323908e-06,
"loss": 2.340258836746216,
"step": 906
},
{
"epoch": 0.936016511867905,
"grad_norm": 15.184465335082342,
"learning_rate": 8.697185444620024e-06,
"loss": 2.975020408630371,
"step": 907
},
{
"epoch": 0.9370485036119711,
"grad_norm": 15.631464693784698,
"learning_rate": 8.693140346850975e-06,
"loss": 2.2340219020843506,
"step": 908
},
{
"epoch": 0.9380804953560371,
"grad_norm": 13.90806035469735,
"learning_rate": 8.689089922850585e-06,
"loss": 3.165771484375,
"step": 909
},
{
"epoch": 0.9391124871001032,
"grad_norm": 14.76401235221072,
"learning_rate": 8.685034178460354e-06,
"loss": 3.132847309112549,
"step": 910
},
{
"epoch": 0.9401444788441693,
"grad_norm": 10.903854932092292,
"learning_rate": 8.680973119529462e-06,
"loss": 3.366068124771118,
"step": 911
},
{
"epoch": 0.9411764705882353,
"grad_norm": 13.776301491737472,
"learning_rate": 8.67690675191475e-06,
"loss": 2.9348413944244385,
"step": 912
},
{
"epoch": 0.9422084623323014,
"grad_norm": 15.692440535098076,
"learning_rate": 8.672835081480719e-06,
"loss": 2.4257867336273193,
"step": 913
},
{
"epoch": 0.9432404540763674,
"grad_norm": 14.417130218937615,
"learning_rate": 8.668758114099513e-06,
"loss": 3.141251564025879,
"step": 914
},
{
"epoch": 0.9442724458204335,
"grad_norm": 8.435093461118733,
"learning_rate": 8.664675855650916e-06,
"loss": 2.9332737922668457,
"step": 915
},
{
"epoch": 0.9453044375644994,
"grad_norm": 10.648097845063,
"learning_rate": 8.660588312022345e-06,
"loss": 2.5100889205932617,
"step": 916
},
{
"epoch": 0.9463364293085655,
"grad_norm": 10.312899822318727,
"learning_rate": 8.656495489108835e-06,
"loss": 2.524362802505493,
"step": 917
},
{
"epoch": 0.9473684210526315,
"grad_norm": 11.368498557398345,
"learning_rate": 8.652397392813043e-06,
"loss": 2.685992956161499,
"step": 918
},
{
"epoch": 0.9484004127966976,
"grad_norm": 16.85664501625052,
"learning_rate": 8.648294029045224e-06,
"loss": 2.5421335697174072,
"step": 919
},
{
"epoch": 0.9494324045407637,
"grad_norm": 21.739808082272482,
"learning_rate": 8.644185403723231e-06,
"loss": 3.15808367729187,
"step": 920
},
{
"epoch": 0.9504643962848297,
"grad_norm": 9.331946667414577,
"learning_rate": 8.640071522772507e-06,
"loss": 3.1691818237304688,
"step": 921
},
{
"epoch": 0.9514963880288958,
"grad_norm": 8.736477908392848,
"learning_rate": 8.635952392126072e-06,
"loss": 2.8568482398986816,
"step": 922
},
{
"epoch": 0.9525283797729618,
"grad_norm": 13.175497418723833,
"learning_rate": 8.631828017724521e-06,
"loss": 3.022144317626953,
"step": 923
},
{
"epoch": 0.9535603715170279,
"grad_norm": 4.929151400144564,
"learning_rate": 8.627698405516007e-06,
"loss": 2.628474473953247,
"step": 924
},
{
"epoch": 0.9545923632610939,
"grad_norm": 7.92168979450254,
"learning_rate": 8.62356356145624e-06,
"loss": 2.601992130279541,
"step": 925
},
{
"epoch": 0.95562435500516,
"grad_norm": 8.612265586231148,
"learning_rate": 8.619423491508478e-06,
"loss": 2.1208882331848145,
"step": 926
},
{
"epoch": 0.9566563467492261,
"grad_norm": 10.53109930567618,
"learning_rate": 8.615278201643507e-06,
"loss": 2.7668910026550293,
"step": 927
},
{
"epoch": 0.957688338493292,
"grad_norm": 10.42126479592771,
"learning_rate": 8.611127697839649e-06,
"loss": 2.4703214168548584,
"step": 928
},
{
"epoch": 0.9587203302373581,
"grad_norm": 20.957195531331426,
"learning_rate": 8.606971986082741e-06,
"loss": 2.511150360107422,
"step": 929
},
{
"epoch": 0.9597523219814241,
"grad_norm": 11.823633225194214,
"learning_rate": 8.602811072366138e-06,
"loss": 3.3425581455230713,
"step": 930
},
{
"epoch": 0.9607843137254902,
"grad_norm": 10.336285112955688,
"learning_rate": 8.598644962690685e-06,
"loss": 3.0630111694335938,
"step": 931
},
{
"epoch": 0.9618163054695562,
"grad_norm": 10.713794321165828,
"learning_rate": 8.594473663064735e-06,
"loss": 2.752450704574585,
"step": 932
},
{
"epoch": 0.9628482972136223,
"grad_norm": 19.139918427211896,
"learning_rate": 8.590297179504113e-06,
"loss": 2.7772939205169678,
"step": 933
},
{
"epoch": 0.9638802889576883,
"grad_norm": 17.944538225658494,
"learning_rate": 8.586115518032128e-06,
"loss": 2.5406405925750732,
"step": 934
},
{
"epoch": 0.9649122807017544,
"grad_norm": 11.022292060071564,
"learning_rate": 8.581928684679555e-06,
"loss": 2.357140064239502,
"step": 935
},
{
"epoch": 0.9659442724458205,
"grad_norm": 14.86874145409155,
"learning_rate": 8.577736685484626e-06,
"loss": 2.384441375732422,
"step": 936
},
{
"epoch": 0.9669762641898865,
"grad_norm": 10.324453002839654,
"learning_rate": 8.573539526493024e-06,
"loss": 2.4222421646118164,
"step": 937
},
{
"epoch": 0.9680082559339526,
"grad_norm": 9.723529992760021,
"learning_rate": 8.569337213757877e-06,
"loss": 2.5892581939697266,
"step": 938
},
{
"epoch": 0.9690402476780186,
"grad_norm": 7.600976083096801,
"learning_rate": 8.56512975333974e-06,
"loss": 2.39567232131958,
"step": 939
},
{
"epoch": 0.9700722394220846,
"grad_norm": 12.092137993169,
"learning_rate": 8.560917151306594e-06,
"loss": 2.8416504859924316,
"step": 940
},
{
"epoch": 0.9711042311661506,
"grad_norm": 12.333962742823834,
"learning_rate": 8.556699413733837e-06,
"loss": 2.095158576965332,
"step": 941
},
{
"epoch": 0.9721362229102167,
"grad_norm": 8.925591336397723,
"learning_rate": 8.552476546704274e-06,
"loss": 2.5353894233703613,
"step": 942
},
{
"epoch": 0.9731682146542827,
"grad_norm": 8.049927028903152,
"learning_rate": 8.548248556308102e-06,
"loss": 2.8934223651885986,
"step": 943
},
{
"epoch": 0.9742002063983488,
"grad_norm": 11.08149431174175,
"learning_rate": 8.544015448642916e-06,
"loss": 2.9796085357666016,
"step": 944
},
{
"epoch": 0.9752321981424149,
"grad_norm": 15.938377902327213,
"learning_rate": 8.539777229813679e-06,
"loss": 2.9126100540161133,
"step": 945
},
{
"epoch": 0.9762641898864809,
"grad_norm": 24.04700421888887,
"learning_rate": 8.535533905932739e-06,
"loss": 2.5258841514587402,
"step": 946
},
{
"epoch": 0.977296181630547,
"grad_norm": 14.212766990310266,
"learning_rate": 8.531285483119795e-06,
"loss": 3.0223984718322754,
"step": 947
},
{
"epoch": 0.978328173374613,
"grad_norm": 37.61050293165456,
"learning_rate": 8.527031967501906e-06,
"loss": 2.8660964965820312,
"step": 948
},
{
"epoch": 0.9793601651186791,
"grad_norm": 14.403917310713943,
"learning_rate": 8.522773365213475e-06,
"loss": 3.0697875022888184,
"step": 949
},
{
"epoch": 0.9803921568627451,
"grad_norm": 7.098727469496524,
"learning_rate": 8.518509682396239e-06,
"loss": 2.850987434387207,
"step": 950
},
{
"epoch": 0.9814241486068112,
"grad_norm": 22.572887139198077,
"learning_rate": 8.514240925199264e-06,
"loss": 2.3839073181152344,
"step": 951
},
{
"epoch": 0.9824561403508771,
"grad_norm": 8.788082983526527,
"learning_rate": 8.509967099778934e-06,
"loss": 3.274752140045166,
"step": 952
},
{
"epoch": 0.9834881320949432,
"grad_norm": 20.559290641214933,
"learning_rate": 8.505688212298941e-06,
"loss": 2.112039804458618,
"step": 953
},
{
"epoch": 0.9845201238390093,
"grad_norm": 22.912915721133384,
"learning_rate": 8.501404268930278e-06,
"loss": 2.4956905841827393,
"step": 954
},
{
"epoch": 0.9855521155830753,
"grad_norm": 13.126124723791293,
"learning_rate": 8.497115275851229e-06,
"loss": 3.32060170173645,
"step": 955
},
{
"epoch": 0.9865841073271414,
"grad_norm": 11.190620519311915,
"learning_rate": 8.492821239247365e-06,
"loss": 1.7453281879425049,
"step": 956
},
{
"epoch": 0.9876160990712074,
"grad_norm": 13.0991058336918,
"learning_rate": 8.488522165311524e-06,
"loss": 3.2108659744262695,
"step": 957
},
{
"epoch": 0.9886480908152735,
"grad_norm": 6.930979842489424,
"learning_rate": 8.484218060243816e-06,
"loss": 2.2504026889801025,
"step": 958
},
{
"epoch": 0.9896800825593395,
"grad_norm": 8.244851270837673,
"learning_rate": 8.479908930251598e-06,
"loss": 2.60968279838562,
"step": 959
},
{
"epoch": 0.9907120743034056,
"grad_norm": 10.240611873801234,
"learning_rate": 8.475594781549483e-06,
"loss": 2.1658976078033447,
"step": 960
},
{
"epoch": 0.9917440660474717,
"grad_norm": 28.864480536810515,
"learning_rate": 8.471275620359317e-06,
"loss": 3.141274929046631,
"step": 961
},
{
"epoch": 0.9927760577915377,
"grad_norm": 12.113857550322448,
"learning_rate": 8.466951452910175e-06,
"loss": 3.094350814819336,
"step": 962
},
{
"epoch": 0.9938080495356038,
"grad_norm": 12.264594375162037,
"learning_rate": 8.462622285438353e-06,
"loss": 2.9558591842651367,
"step": 963
},
{
"epoch": 0.9948400412796697,
"grad_norm": 18.61406866971962,
"learning_rate": 8.45828812418736e-06,
"loss": 2.1471381187438965,
"step": 964
},
{
"epoch": 0.9958720330237358,
"grad_norm": 10.57186468452118,
"learning_rate": 8.453948975407902e-06,
"loss": 2.4173901081085205,
"step": 965
},
{
"epoch": 0.9969040247678018,
"grad_norm": 22.749642951004663,
"learning_rate": 8.449604845357884e-06,
"loss": 3.100821018218994,
"step": 966
},
{
"epoch": 0.9979360165118679,
"grad_norm": 8.295297615285332,
"learning_rate": 8.44525574030239e-06,
"loss": 2.519862174987793,
"step": 967
},
{
"epoch": 0.9989680082559339,
"grad_norm": 11.978450605385294,
"learning_rate": 8.44090166651368e-06,
"loss": 2.934941530227661,
"step": 968
},
{
"epoch": 1.0,
"grad_norm": 18.050324887327026,
"learning_rate": 8.436542630271185e-06,
"loss": 2.6186721324920654,
"step": 969
},
{
"epoch": 1.001031991744066,
"grad_norm": 16.05476252261218,
"learning_rate": 8.432178637861483e-06,
"loss": 2.1563639640808105,
"step": 970
},
{
"epoch": 1.0020639834881322,
"grad_norm": 9.594858198037516,
"learning_rate": 8.427809695578308e-06,
"loss": 1.5539755821228027,
"step": 971
},
{
"epoch": 1.0030959752321982,
"grad_norm": 18.807824999148238,
"learning_rate": 8.42343580972253e-06,
"loss": 1.7002182006835938,
"step": 972
},
{
"epoch": 1.0041279669762642,
"grad_norm": 8.027778363641874,
"learning_rate": 8.41905698660215e-06,
"loss": 1.7681635618209839,
"step": 973
},
{
"epoch": 1.0051599587203301,
"grad_norm": 8.623238509439696,
"learning_rate": 8.414673232532286e-06,
"loss": 1.373335838317871,
"step": 974
},
{
"epoch": 1.0061919504643964,
"grad_norm": 8.021841163165114,
"learning_rate": 8.41028455383517e-06,
"loss": 1.8987131118774414,
"step": 975
},
{
"epoch": 1.0072239422084623,
"grad_norm": 5.856433020017942,
"learning_rate": 8.405890956840136e-06,
"loss": 1.373334288597107,
"step": 976
},
{
"epoch": 1.0082559339525283,
"grad_norm": 12.34788416810373,
"learning_rate": 8.401492447883611e-06,
"loss": 1.4636648893356323,
"step": 977
},
{
"epoch": 1.0092879256965945,
"grad_norm": 11.49620424832148,
"learning_rate": 8.397089033309106e-06,
"loss": 1.7376048564910889,
"step": 978
},
{
"epoch": 1.0103199174406605,
"grad_norm": 12.636249423602015,
"learning_rate": 8.392680719467207e-06,
"loss": 2.006678819656372,
"step": 979
},
{
"epoch": 1.0113519091847265,
"grad_norm": 15.598006639972548,
"learning_rate": 8.388267512715565e-06,
"loss": 1.7946763038635254,
"step": 980
},
{
"epoch": 1.0123839009287925,
"grad_norm": 11.693682446027942,
"learning_rate": 8.383849419418889e-06,
"loss": 1.8450850248336792,
"step": 981
},
{
"epoch": 1.0134158926728587,
"grad_norm": 12.088922204874327,
"learning_rate": 8.379426445948933e-06,
"loss": 1.9440231323242188,
"step": 982
},
{
"epoch": 1.0144478844169247,
"grad_norm": 12.647814682769102,
"learning_rate": 8.374998598684491e-06,
"loss": 2.2285561561584473,
"step": 983
},
{
"epoch": 1.0154798761609907,
"grad_norm": 16.737139048202998,
"learning_rate": 8.370565884011389e-06,
"loss": 1.5640538930892944,
"step": 984
},
{
"epoch": 1.0165118679050567,
"grad_norm": 13.230104870514074,
"learning_rate": 8.366128308322464e-06,
"loss": 1.3722457885742188,
"step": 985
},
{
"epoch": 1.0175438596491229,
"grad_norm": 10.841490744693859,
"learning_rate": 8.361685878017569e-06,
"loss": 1.5551173686981201,
"step": 986
},
{
"epoch": 1.0185758513931888,
"grad_norm": 11.799978525328118,
"learning_rate": 8.357238599503561e-06,
"loss": 1.8554892539978027,
"step": 987
},
{
"epoch": 1.0196078431372548,
"grad_norm": 10.905843303872457,
"learning_rate": 8.352786479194288e-06,
"loss": 0.8869086503982544,
"step": 988
},
{
"epoch": 1.020639834881321,
"grad_norm": 9.157020132242838,
"learning_rate": 8.348329523510575e-06,
"loss": 1.5887739658355713,
"step": 989
},
{
"epoch": 1.021671826625387,
"grad_norm": 12.324235401608584,
"learning_rate": 8.343867738880224e-06,
"loss": 1.9491077661514282,
"step": 990
},
{
"epoch": 1.022703818369453,
"grad_norm": 10.040335718777902,
"learning_rate": 8.339401131738007e-06,
"loss": 1.5371899604797363,
"step": 991
},
{
"epoch": 1.023735810113519,
"grad_norm": 12.243061824958042,
"learning_rate": 8.33492970852564e-06,
"loss": 1.762153148651123,
"step": 992
},
{
"epoch": 1.0247678018575852,
"grad_norm": 8.745195314678448,
"learning_rate": 8.330453475691795e-06,
"loss": 1.4221936464309692,
"step": 993
},
{
"epoch": 1.0257997936016512,
"grad_norm": 10.920261519671197,
"learning_rate": 8.325972439692075e-06,
"loss": 2.0102250576019287,
"step": 994
},
{
"epoch": 1.0268317853457172,
"grad_norm": 10.184593431575598,
"learning_rate": 8.321486606989009e-06,
"loss": 1.4635906219482422,
"step": 995
},
{
"epoch": 1.0278637770897834,
"grad_norm": 7.472082964121502,
"learning_rate": 8.316995984052048e-06,
"loss": 1.3242697715759277,
"step": 996
},
{
"epoch": 1.0288957688338494,
"grad_norm": 10.021648848752625,
"learning_rate": 8.312500577357547e-06,
"loss": 1.3507283926010132,
"step": 997
},
{
"epoch": 1.0299277605779154,
"grad_norm": 12.209845924003377,
"learning_rate": 8.308000393388766e-06,
"loss": 1.1653996706008911,
"step": 998
},
{
"epoch": 1.0309597523219813,
"grad_norm": 11.99835459000819,
"learning_rate": 8.30349543863585e-06,
"loss": 1.1610116958618164,
"step": 999
},
{
"epoch": 1.0319917440660475,
"grad_norm": 17.474369680421233,
"learning_rate": 8.298985719595824e-06,
"loss": 1.5260579586029053,
"step": 1000
},
{
"epoch": 1.0330237358101135,
"grad_norm": 9.295614465776053,
"learning_rate": 8.294471242772588e-06,
"loss": 1.3743215799331665,
"step": 1001
},
{
"epoch": 1.0340557275541795,
"grad_norm": 12.80841059312557,
"learning_rate": 8.289952014676896e-06,
"loss": 1.5841118097305298,
"step": 1002
},
{
"epoch": 1.0350877192982457,
"grad_norm": 9.224528441262,
"learning_rate": 8.285428041826367e-06,
"loss": 1.4197484254837036,
"step": 1003
},
{
"epoch": 1.0361197110423117,
"grad_norm": 16.858592753583462,
"learning_rate": 8.280899330745452e-06,
"loss": 2.183623790740967,
"step": 1004
},
{
"epoch": 1.0371517027863777,
"grad_norm": 21.383223442525296,
"learning_rate": 8.276365887965439e-06,
"loss": 1.8438667058944702,
"step": 1005
},
{
"epoch": 1.0381836945304437,
"grad_norm": 10.825302703272476,
"learning_rate": 8.27182772002444e-06,
"loss": 1.3235054016113281,
"step": 1006
},
{
"epoch": 1.0392156862745099,
"grad_norm": 20.316069030942174,
"learning_rate": 8.26728483346738e-06,
"loss": 2.006171703338623,
"step": 1007
},
{
"epoch": 1.0402476780185759,
"grad_norm": 9.948994304742493,
"learning_rate": 8.262737234845993e-06,
"loss": 1.711141586303711,
"step": 1008
},
{
"epoch": 1.0412796697626419,
"grad_norm": 10.158121460132765,
"learning_rate": 8.258184930718806e-06,
"loss": 1.515275001525879,
"step": 1009
},
{
"epoch": 1.0423116615067078,
"grad_norm": 10.857642178327987,
"learning_rate": 8.253627927651131e-06,
"loss": 2.4347457885742188,
"step": 1010
},
{
"epoch": 1.043343653250774,
"grad_norm": 12.3480224278066,
"learning_rate": 8.24906623221506e-06,
"loss": 1.3014485836029053,
"step": 1011
},
{
"epoch": 1.04437564499484,
"grad_norm": 13.258087199229148,
"learning_rate": 8.244499850989453e-06,
"loss": 1.2108027935028076,
"step": 1012
},
{
"epoch": 1.045407636738906,
"grad_norm": 10.772891424959182,
"learning_rate": 8.239928790559921e-06,
"loss": 1.0891926288604736,
"step": 1013
},
{
"epoch": 1.0464396284829722,
"grad_norm": 12.063550652084645,
"learning_rate": 8.235353057518832e-06,
"loss": 0.9712181687355042,
"step": 1014
},
{
"epoch": 1.0474716202270382,
"grad_norm": 9.839529947110902,
"learning_rate": 8.230772658465284e-06,
"loss": 1.5175083875656128,
"step": 1015
},
{
"epoch": 1.0485036119711042,
"grad_norm": 11.211375364424866,
"learning_rate": 8.226187600005116e-06,
"loss": 1.9159111976623535,
"step": 1016
},
{
"epoch": 1.0495356037151702,
"grad_norm": 10.97182298304798,
"learning_rate": 8.221597888750873e-06,
"loss": 1.3137034177780151,
"step": 1017
},
{
"epoch": 1.0505675954592364,
"grad_norm": 8.588510298188089,
"learning_rate": 8.21700353132182e-06,
"loss": 1.6617357730865479,
"step": 1018
},
{
"epoch": 1.0515995872033024,
"grad_norm": 13.201744584709031,
"learning_rate": 8.212404534343923e-06,
"loss": 1.9006850719451904,
"step": 1019
},
{
"epoch": 1.0526315789473684,
"grad_norm": 9.323623275175217,
"learning_rate": 8.207800904449829e-06,
"loss": 1.5100172758102417,
"step": 1020
},
{
"epoch": 1.0536635706914346,
"grad_norm": 11.913995314722836,
"learning_rate": 8.20319264827888e-06,
"loss": 2.788395881652832,
"step": 1021
},
{
"epoch": 1.0546955624355006,
"grad_norm": 19.097847988506008,
"learning_rate": 8.19857977247708e-06,
"loss": 2.4118590354919434,
"step": 1022
},
{
"epoch": 1.0557275541795665,
"grad_norm": 8.49565411469797,
"learning_rate": 8.1939622836971e-06,
"loss": 2.0495169162750244,
"step": 1023
},
{
"epoch": 1.0567595459236325,
"grad_norm": 13.327628308023453,
"learning_rate": 8.189340188598263e-06,
"loss": 1.24650239944458,
"step": 1024
},
{
"epoch": 1.0577915376676987,
"grad_norm": 13.415163634469513,
"learning_rate": 8.184713493846533e-06,
"loss": 1.9664095640182495,
"step": 1025
},
{
"epoch": 1.0588235294117647,
"grad_norm": 9.849539930086326,
"learning_rate": 8.180082206114511e-06,
"loss": 1.2281479835510254,
"step": 1026
},
{
"epoch": 1.0598555211558307,
"grad_norm": 16.920920109593283,
"learning_rate": 8.17544633208142e-06,
"loss": 1.5589534044265747,
"step": 1027
},
{
"epoch": 1.060887512899897,
"grad_norm": 13.973934268345339,
"learning_rate": 8.1708058784331e-06,
"loss": 1.5565776824951172,
"step": 1028
},
{
"epoch": 1.061919504643963,
"grad_norm": 9.237207321033726,
"learning_rate": 8.166160851861991e-06,
"loss": 1.1853022575378418,
"step": 1029
},
{
"epoch": 1.0629514963880289,
"grad_norm": 17.927718371572553,
"learning_rate": 8.161511259067132e-06,
"loss": 1.247610092163086,
"step": 1030
},
{
"epoch": 1.0639834881320949,
"grad_norm": 9.882845156111637,
"learning_rate": 8.156857106754146e-06,
"loss": 1.7039568424224854,
"step": 1031
},
{
"epoch": 1.065015479876161,
"grad_norm": 20.898414415727924,
"learning_rate": 8.15219840163523e-06,
"loss": 1.7086340188980103,
"step": 1032
},
{
"epoch": 1.066047471620227,
"grad_norm": 8.091855854808516,
"learning_rate": 8.147535150429152e-06,
"loss": 1.825638771057129,
"step": 1033
},
{
"epoch": 1.067079463364293,
"grad_norm": 7.8479612013519455,
"learning_rate": 8.142867359861229e-06,
"loss": 1.5998353958129883,
"step": 1034
},
{
"epoch": 1.068111455108359,
"grad_norm": 13.109224886594863,
"learning_rate": 8.13819503666333e-06,
"loss": 1.3819228410720825,
"step": 1035
},
{
"epoch": 1.0691434468524252,
"grad_norm": 20.26012950910058,
"learning_rate": 8.133518187573864e-06,
"loss": 1.7897826433181763,
"step": 1036
},
{
"epoch": 1.0701754385964912,
"grad_norm": 10.3800525344813,
"learning_rate": 8.128836819337756e-06,
"loss": 3.021141529083252,
"step": 1037
},
{
"epoch": 1.0712074303405572,
"grad_norm": 8.243334160834076,
"learning_rate": 8.124150938706462e-06,
"loss": 2.0512983798980713,
"step": 1038
},
{
"epoch": 1.0722394220846234,
"grad_norm": 8.471515219842848,
"learning_rate": 8.119460552437934e-06,
"loss": 1.7683520317077637,
"step": 1039
},
{
"epoch": 1.0732714138286894,
"grad_norm": 12.009553013711418,
"learning_rate": 8.114765667296628e-06,
"loss": 1.4466801881790161,
"step": 1040
},
{
"epoch": 1.0743034055727554,
"grad_norm": 13.497829932993618,
"learning_rate": 8.110066290053493e-06,
"loss": 1.7317613363265991,
"step": 1041
},
{
"epoch": 1.0753353973168214,
"grad_norm": 45.88528225853127,
"learning_rate": 8.105362427485942e-06,
"loss": 1.4356427192687988,
"step": 1042
},
{
"epoch": 1.0763673890608876,
"grad_norm": 7.778793676406035,
"learning_rate": 8.100654086377875e-06,
"loss": 1.7903071641921997,
"step": 1043
},
{
"epoch": 1.0773993808049536,
"grad_norm": 14.255467890004203,
"learning_rate": 8.095941273519634e-06,
"loss": 1.3363776206970215,
"step": 1044
},
{
"epoch": 1.0784313725490196,
"grad_norm": 9.036230547290165,
"learning_rate": 8.09122399570802e-06,
"loss": 1.202798843383789,
"step": 1045
},
{
"epoch": 1.0794633642930858,
"grad_norm": 18.688989336597917,
"learning_rate": 8.086502259746272e-06,
"loss": 3.8349111080169678,
"step": 1046
},
{
"epoch": 1.0804953560371517,
"grad_norm": 7.517142738194956,
"learning_rate": 8.081776072444059e-06,
"loss": 1.5295863151550293,
"step": 1047
},
{
"epoch": 1.0815273477812177,
"grad_norm": 10.129619974724548,
"learning_rate": 8.077045440617465e-06,
"loss": 1.1850452423095703,
"step": 1048
},
{
"epoch": 1.0825593395252837,
"grad_norm": 10.207207301825113,
"learning_rate": 8.07231037108899e-06,
"loss": 1.3872036933898926,
"step": 1049
},
{
"epoch": 1.08359133126935,
"grad_norm": 16.408079543721303,
"learning_rate": 8.067570870687527e-06,
"loss": 1.0992767810821533,
"step": 1050
},
{
"epoch": 1.084623323013416,
"grad_norm": 9.538420851034369,
"learning_rate": 8.062826946248367e-06,
"loss": 1.5989173650741577,
"step": 1051
},
{
"epoch": 1.085655314757482,
"grad_norm": 14.905496947608839,
"learning_rate": 8.058078604613178e-06,
"loss": 1.1511602401733398,
"step": 1052
},
{
"epoch": 1.086687306501548,
"grad_norm": 9.629087356335749,
"learning_rate": 8.053325852629997e-06,
"loss": 1.6529784202575684,
"step": 1053
},
{
"epoch": 1.087719298245614,
"grad_norm": 10.085267441911933,
"learning_rate": 8.048568697153222e-06,
"loss": 1.2220031023025513,
"step": 1054
},
{
"epoch": 1.08875128998968,
"grad_norm": 8.753348062316723,
"learning_rate": 8.043807145043604e-06,
"loss": 1.8311115503311157,
"step": 1055
},
{
"epoch": 1.089783281733746,
"grad_norm": 12.3408445582175,
"learning_rate": 8.039041203168233e-06,
"loss": 1.4134912490844727,
"step": 1056
},
{
"epoch": 1.0908152734778123,
"grad_norm": 11.783862781283819,
"learning_rate": 8.034270878400529e-06,
"loss": 2.0294742584228516,
"step": 1057
},
{
"epoch": 1.0918472652218782,
"grad_norm": 15.95782287855318,
"learning_rate": 8.029496177620235e-06,
"loss": 2.1761271953582764,
"step": 1058
},
{
"epoch": 1.0928792569659442,
"grad_norm": 19.40651300793911,
"learning_rate": 8.024717107713402e-06,
"loss": 2.606509208679199,
"step": 1059
},
{
"epoch": 1.0939112487100102,
"grad_norm": 12.146765149401814,
"learning_rate": 8.019933675572389e-06,
"loss": 2.140730857849121,
"step": 1060
},
{
"epoch": 1.0949432404540764,
"grad_norm": 15.050517998410367,
"learning_rate": 8.015145888095838e-06,
"loss": 1.610368013381958,
"step": 1061
},
{
"epoch": 1.0959752321981424,
"grad_norm": 9.277932993782787,
"learning_rate": 8.010353752188676e-06,
"loss": 1.4251974821090698,
"step": 1062
},
{
"epoch": 1.0970072239422084,
"grad_norm": 7.323597909570674,
"learning_rate": 8.005557274762103e-06,
"loss": 1.3800044059753418,
"step": 1063
},
{
"epoch": 1.0980392156862746,
"grad_norm": 25.233200899043258,
"learning_rate": 8.000756462733577e-06,
"loss": 0.9251134395599365,
"step": 1064
},
{
"epoch": 1.0990712074303406,
"grad_norm": 9.587282864681407,
"learning_rate": 7.995951323026808e-06,
"loss": 1.3711384534835815,
"step": 1065
},
{
"epoch": 1.1001031991744066,
"grad_norm": 10.911019786869124,
"learning_rate": 7.991141862571749e-06,
"loss": 1.3407011032104492,
"step": 1066
},
{
"epoch": 1.1011351909184726,
"grad_norm": 9.203525627848961,
"learning_rate": 7.986328088304584e-06,
"loss": 1.2513601779937744,
"step": 1067
},
{
"epoch": 1.1021671826625388,
"grad_norm": 10.0827879606565,
"learning_rate": 7.981510007167719e-06,
"loss": 1.395066261291504,
"step": 1068
},
{
"epoch": 1.1031991744066048,
"grad_norm": 24.008463216744232,
"learning_rate": 7.976687626109765e-06,
"loss": 2.0673134326934814,
"step": 1069
},
{
"epoch": 1.1042311661506707,
"grad_norm": 22.233138519673105,
"learning_rate": 7.971860952085546e-06,
"loss": 1.3563206195831299,
"step": 1070
},
{
"epoch": 1.1052631578947367,
"grad_norm": 10.308977883395446,
"learning_rate": 7.967029992056066e-06,
"loss": 1.195631742477417,
"step": 1071
},
{
"epoch": 1.106295149638803,
"grad_norm": 7.768208205965082,
"learning_rate": 7.962194752988519e-06,
"loss": 1.311927318572998,
"step": 1072
},
{
"epoch": 1.107327141382869,
"grad_norm": 11.206042051044388,
"learning_rate": 7.957355241856261e-06,
"loss": 1.8952860832214355,
"step": 1073
},
{
"epoch": 1.108359133126935,
"grad_norm": 14.522936932286084,
"learning_rate": 7.95251146563882e-06,
"loss": 2.9905707836151123,
"step": 1074
},
{
"epoch": 1.109391124871001,
"grad_norm": 8.330515845051611,
"learning_rate": 7.947663431321866e-06,
"loss": 0.9683359265327454,
"step": 1075
},
{
"epoch": 1.110423116615067,
"grad_norm": 14.489243484006737,
"learning_rate": 7.942811145897215e-06,
"loss": 0.9986129999160767,
"step": 1076
},
{
"epoch": 1.111455108359133,
"grad_norm": 12.075146700492658,
"learning_rate": 7.937954616362813e-06,
"loss": 1.6724216938018799,
"step": 1077
},
{
"epoch": 1.1124871001031993,
"grad_norm": 20.295900640522916,
"learning_rate": 7.933093849722724e-06,
"loss": 1.9367952346801758,
"step": 1078
},
{
"epoch": 1.1135190918472653,
"grad_norm": 20.886634414966576,
"learning_rate": 7.928228852987126e-06,
"loss": 1.3379758596420288,
"step": 1079
},
{
"epoch": 1.1145510835913313,
"grad_norm": 15.391029180263173,
"learning_rate": 7.923359633172299e-06,
"loss": 1.158537745475769,
"step": 1080
},
{
"epoch": 1.1155830753353972,
"grad_norm": 18.61127263155287,
"learning_rate": 7.918486197300608e-06,
"loss": 2.004901885986328,
"step": 1081
},
{
"epoch": 1.1166150670794635,
"grad_norm": 10.01744099671902,
"learning_rate": 7.913608552400504e-06,
"loss": 1.117366909980774,
"step": 1082
},
{
"epoch": 1.1176470588235294,
"grad_norm": 13.07772422800308,
"learning_rate": 7.908726705506502e-06,
"loss": 1.727134108543396,
"step": 1083
},
{
"epoch": 1.1186790505675954,
"grad_norm": 9.572834550760705,
"learning_rate": 7.903840663659186e-06,
"loss": 1.0672117471694946,
"step": 1084
},
{
"epoch": 1.1197110423116614,
"grad_norm": 9.409168030173149,
"learning_rate": 7.89895043390518e-06,
"loss": 1.5920839309692383,
"step": 1085
},
{
"epoch": 1.1207430340557276,
"grad_norm": 21.583892474378278,
"learning_rate": 7.894056023297156e-06,
"loss": 2.1002070903778076,
"step": 1086
},
{
"epoch": 1.1217750257997936,
"grad_norm": 7.55744359619947,
"learning_rate": 7.889157438893813e-06,
"loss": 0.7461848258972168,
"step": 1087
},
{
"epoch": 1.1228070175438596,
"grad_norm": 25.813146959585897,
"learning_rate": 7.884254687759863e-06,
"loss": 2.137263536453247,
"step": 1088
},
{
"epoch": 1.1238390092879258,
"grad_norm": 13.42741779404928,
"learning_rate": 7.879347776966039e-06,
"loss": 1.5984553098678589,
"step": 1089
},
{
"epoch": 1.1248710010319918,
"grad_norm": 20.393007755113697,
"learning_rate": 7.874436713589065e-06,
"loss": 1.4474716186523438,
"step": 1090
},
{
"epoch": 1.1259029927760578,
"grad_norm": 9.345956996474037,
"learning_rate": 7.869521504711653e-06,
"loss": 1.6039841175079346,
"step": 1091
},
{
"epoch": 1.1269349845201238,
"grad_norm": 11.133766283828184,
"learning_rate": 7.864602157422501e-06,
"loss": 1.083439588546753,
"step": 1092
},
{
"epoch": 1.12796697626419,
"grad_norm": 14.667634387278584,
"learning_rate": 7.859678678816266e-06,
"loss": 1.7140947580337524,
"step": 1093
},
{
"epoch": 1.128998968008256,
"grad_norm": 10.461128558097727,
"learning_rate": 7.854751075993572e-06,
"loss": 1.3764314651489258,
"step": 1094
},
{
"epoch": 1.130030959752322,
"grad_norm": 8.700167873438682,
"learning_rate": 7.849819356060986e-06,
"loss": 1.55415940284729,
"step": 1095
},
{
"epoch": 1.131062951496388,
"grad_norm": 10.892723358166004,
"learning_rate": 7.844883526131014e-06,
"loss": 1.4459302425384521,
"step": 1096
},
{
"epoch": 1.1320949432404541,
"grad_norm": 12.75716199523203,
"learning_rate": 7.839943593322084e-06,
"loss": 1.8245646953582764,
"step": 1097
},
{
"epoch": 1.13312693498452,
"grad_norm": 13.312280216700708,
"learning_rate": 7.834999564758553e-06,
"loss": 2.2421915531158447,
"step": 1098
},
{
"epoch": 1.134158926728586,
"grad_norm": 12.798983235555607,
"learning_rate": 7.830051447570674e-06,
"loss": 0.9207951426506042,
"step": 1099
},
{
"epoch": 1.1351909184726523,
"grad_norm": 19.561513762469197,
"learning_rate": 7.8250992488946e-06,
"loss": 1.3448659181594849,
"step": 1100
},
{
"epoch": 1.1362229102167183,
"grad_norm": 10.655264812219203,
"learning_rate": 7.820142975872377e-06,
"loss": 1.5575282573699951,
"step": 1101
},
{
"epoch": 1.1372549019607843,
"grad_norm": 10.236688960938611,
"learning_rate": 7.815182635651913e-06,
"loss": 1.1294775009155273,
"step": 1102
},
{
"epoch": 1.1382868937048505,
"grad_norm": 13.12609756584624,
"learning_rate": 7.810218235386994e-06,
"loss": 1.45517098903656,
"step": 1103
},
{
"epoch": 1.1393188854489165,
"grad_norm": 12.890391208806918,
"learning_rate": 7.805249782237256e-06,
"loss": 1.4597437381744385,
"step": 1104
},
{
"epoch": 1.1403508771929824,
"grad_norm": 20.154226114081528,
"learning_rate": 7.800277283368184e-06,
"loss": 1.961379885673523,
"step": 1105
},
{
"epoch": 1.1413828689370484,
"grad_norm": 9.464678873571312,
"learning_rate": 7.79530074595109e-06,
"loss": 1.188375473022461,
"step": 1106
},
{
"epoch": 1.1424148606811146,
"grad_norm": 14.154349605303679,
"learning_rate": 7.790320177163116e-06,
"loss": 1.73966383934021,
"step": 1107
},
{
"epoch": 1.1434468524251806,
"grad_norm": 10.420904211191234,
"learning_rate": 7.78533558418722e-06,
"loss": 2.5877296924591064,
"step": 1108
},
{
"epoch": 1.1444788441692466,
"grad_norm": 16.044920620511835,
"learning_rate": 7.780346974212159e-06,
"loss": 1.5162712335586548,
"step": 1109
},
{
"epoch": 1.1455108359133126,
"grad_norm": 16.28795365818509,
"learning_rate": 7.775354354432484e-06,
"loss": 1.6514241695404053,
"step": 1110
},
{
"epoch": 1.1465428276573788,
"grad_norm": 18.70420376563244,
"learning_rate": 7.770357732048533e-06,
"loss": 1.10786771774292,
"step": 1111
},
{
"epoch": 1.1475748194014448,
"grad_norm": 10.780579837493953,
"learning_rate": 7.765357114266409e-06,
"loss": 1.5677838325500488,
"step": 1112
},
{
"epoch": 1.1486068111455108,
"grad_norm": 17.00709242561451,
"learning_rate": 7.760352508297988e-06,
"loss": 2.3274734020233154,
"step": 1113
},
{
"epoch": 1.149638802889577,
"grad_norm": 8.250265430352718,
"learning_rate": 7.755343921360887e-06,
"loss": 1.1954925060272217,
"step": 1114
},
{
"epoch": 1.150670794633643,
"grad_norm": 12.032632061244854,
"learning_rate": 7.750331360678471e-06,
"loss": 0.9751878976821899,
"step": 1115
},
{
"epoch": 1.151702786377709,
"grad_norm": 11.770457667292105,
"learning_rate": 7.745314833479834e-06,
"loss": 1.2305867671966553,
"step": 1116
},
{
"epoch": 1.152734778121775,
"grad_norm": 14.257996247234294,
"learning_rate": 7.740294346999786e-06,
"loss": 1.3428764343261719,
"step": 1117
},
{
"epoch": 1.1537667698658411,
"grad_norm": 6.682943553705904,
"learning_rate": 7.735269908478856e-06,
"loss": 1.1069071292877197,
"step": 1118
},
{
"epoch": 1.1547987616099071,
"grad_norm": 12.012699080331593,
"learning_rate": 7.730241525163266e-06,
"loss": 1.4219002723693848,
"step": 1119
},
{
"epoch": 1.1558307533539731,
"grad_norm": 9.403879667177996,
"learning_rate": 7.72520920430493e-06,
"loss": 1.0495020151138306,
"step": 1120
},
{
"epoch": 1.156862745098039,
"grad_norm": 8.719460389688432,
"learning_rate": 7.720172953161438e-06,
"loss": 1.7092715501785278,
"step": 1121
},
{
"epoch": 1.1578947368421053,
"grad_norm": 7.831380903960697,
"learning_rate": 7.715132778996053e-06,
"loss": 1.8763151168823242,
"step": 1122
},
{
"epoch": 1.1589267285861713,
"grad_norm": 19.446465480773362,
"learning_rate": 7.710088689077689e-06,
"loss": 1.7434253692626953,
"step": 1123
},
{
"epoch": 1.1599587203302373,
"grad_norm": 11.704881506960021,
"learning_rate": 7.705040690680915e-06,
"loss": 1.6507149934768677,
"step": 1124
},
{
"epoch": 1.1609907120743035,
"grad_norm": 8.857711832262313,
"learning_rate": 7.699988791085931e-06,
"loss": 1.8820699453353882,
"step": 1125
},
{
"epoch": 1.1620227038183695,
"grad_norm": 9.422720425465496,
"learning_rate": 7.694932997578565e-06,
"loss": 1.5133967399597168,
"step": 1126
},
{
"epoch": 1.1630546955624355,
"grad_norm": 13.889002563598517,
"learning_rate": 7.68987331745026e-06,
"loss": 2.1303153038024902,
"step": 1127
},
{
"epoch": 1.1640866873065017,
"grad_norm": 11.739488829961909,
"learning_rate": 7.684809757998066e-06,
"loss": 1.5680687427520752,
"step": 1128
},
{
"epoch": 1.1651186790505677,
"grad_norm": 14.384496276363349,
"learning_rate": 7.679742326524628e-06,
"loss": 1.483288288116455,
"step": 1129
},
{
"epoch": 1.1661506707946336,
"grad_norm": 10.874983943768116,
"learning_rate": 7.674671030338176e-06,
"loss": 2.03609561920166,
"step": 1130
},
{
"epoch": 1.1671826625386996,
"grad_norm": 11.378154502477775,
"learning_rate": 7.669595876752508e-06,
"loss": 1.7551511526107788,
"step": 1131
},
{
"epoch": 1.1682146542827658,
"grad_norm": 10.45131877825218,
"learning_rate": 7.664516873086987e-06,
"loss": 1.564146637916565,
"step": 1132
},
{
"epoch": 1.1692466460268318,
"grad_norm": 14.201292073707153,
"learning_rate": 7.659434026666536e-06,
"loss": 2.13928484916687,
"step": 1133
},
{
"epoch": 1.1702786377708978,
"grad_norm": 8.513928619367956,
"learning_rate": 7.654347344821613e-06,
"loss": 1.8708624839782715,
"step": 1134
},
{
"epoch": 1.1713106295149638,
"grad_norm": 26.303106116968944,
"learning_rate": 7.64925683488821e-06,
"loss": 1.4692811965942383,
"step": 1135
},
{
"epoch": 1.17234262125903,
"grad_norm": 26.79861905969612,
"learning_rate": 7.644162504207834e-06,
"loss": 1.458707571029663,
"step": 1136
},
{
"epoch": 1.173374613003096,
"grad_norm": 14.432879402437464,
"learning_rate": 7.639064360127512e-06,
"loss": 1.3703393936157227,
"step": 1137
},
{
"epoch": 1.174406604747162,
"grad_norm": 11.172595147665566,
"learning_rate": 7.633962409999765e-06,
"loss": 1.5748720169067383,
"step": 1138
},
{
"epoch": 1.1754385964912282,
"grad_norm": 10.323351276905665,
"learning_rate": 7.6288566611826e-06,
"loss": 1.9944082498550415,
"step": 1139
},
{
"epoch": 1.1764705882352942,
"grad_norm": 19.622276447625225,
"learning_rate": 7.623747121039512e-06,
"loss": 1.4969562292099,
"step": 1140
},
{
"epoch": 1.1775025799793601,
"grad_norm": 10.575512895874743,
"learning_rate": 7.618633796939454e-06,
"loss": 1.993700623512268,
"step": 1141
},
{
"epoch": 1.1785345717234261,
"grad_norm": 13.393720411744683,
"learning_rate": 7.613516696256842e-06,
"loss": 1.6115179061889648,
"step": 1142
},
{
"epoch": 1.1795665634674923,
"grad_norm": 12.397493564889684,
"learning_rate": 7.608395826371536e-06,
"loss": 1.0056989192962646,
"step": 1143
},
{
"epoch": 1.1805985552115583,
"grad_norm": 8.63445703443166,
"learning_rate": 7.603271194668835e-06,
"loss": 1.631241798400879,
"step": 1144
},
{
"epoch": 1.1816305469556243,
"grad_norm": 9.960901745053299,
"learning_rate": 7.598142808539458e-06,
"loss": 1.2353228330612183,
"step": 1145
},
{
"epoch": 1.1826625386996903,
"grad_norm": 11.90792893561612,
"learning_rate": 7.593010675379542e-06,
"loss": 1.3433904647827148,
"step": 1146
},
{
"epoch": 1.1836945304437565,
"grad_norm": 14.96853874887359,
"learning_rate": 7.5878748025906315e-06,
"loss": 1.197488784790039,
"step": 1147
},
{
"epoch": 1.1847265221878225,
"grad_norm": 17.738178329199055,
"learning_rate": 7.582735197579657e-06,
"loss": 1.4474613666534424,
"step": 1148
},
{
"epoch": 1.1857585139318885,
"grad_norm": 10.212462753877391,
"learning_rate": 7.577591867758937e-06,
"loss": 1.2012611627578735,
"step": 1149
},
{
"epoch": 1.1867905056759547,
"grad_norm": 15.941473391658032,
"learning_rate": 7.572444820546157e-06,
"loss": 1.5386306047439575,
"step": 1150
},
{
"epoch": 1.1878224974200207,
"grad_norm": 13.83055155407034,
"learning_rate": 7.567294063364369e-06,
"loss": 1.314261794090271,
"step": 1151
},
{
"epoch": 1.1888544891640866,
"grad_norm": 10.71943455414836,
"learning_rate": 7.562139603641971e-06,
"loss": 1.694484829902649,
"step": 1152
},
{
"epoch": 1.1898864809081529,
"grad_norm": 9.764445754550207,
"learning_rate": 7.556981448812707e-06,
"loss": 1.5592496395111084,
"step": 1153
},
{
"epoch": 1.1909184726522188,
"grad_norm": 12.283485449478526,
"learning_rate": 7.551819606315644e-06,
"loss": 1.2753419876098633,
"step": 1154
},
{
"epoch": 1.1919504643962848,
"grad_norm": 10.19282029688353,
"learning_rate": 7.546654083595167e-06,
"loss": 2.5282106399536133,
"step": 1155
},
{
"epoch": 1.1929824561403508,
"grad_norm": 9.945635665366675,
"learning_rate": 7.541484888100974e-06,
"loss": 1.256844162940979,
"step": 1156
},
{
"epoch": 1.194014447884417,
"grad_norm": 7.5302923653532785,
"learning_rate": 7.5363120272880554e-06,
"loss": 1.311590552330017,
"step": 1157
},
{
"epoch": 1.195046439628483,
"grad_norm": 14.701895071884568,
"learning_rate": 7.531135508616689e-06,
"loss": 1.5042333602905273,
"step": 1158
},
{
"epoch": 1.196078431372549,
"grad_norm": 9.897244311338603,
"learning_rate": 7.52595533955243e-06,
"loss": 1.5542973279953003,
"step": 1159
},
{
"epoch": 1.197110423116615,
"grad_norm": 6.868338379037202,
"learning_rate": 7.520771527566093e-06,
"loss": 1.6668894290924072,
"step": 1160
},
{
"epoch": 1.1981424148606812,
"grad_norm": 8.953819262399154,
"learning_rate": 7.515584080133753e-06,
"loss": 1.67169189453125,
"step": 1161
},
{
"epoch": 1.1991744066047472,
"grad_norm": 11.976800133954304,
"learning_rate": 7.510393004736723e-06,
"loss": 1.6002707481384277,
"step": 1162
},
{
"epoch": 1.2002063983488132,
"grad_norm": 14.07389764280866,
"learning_rate": 7.50519830886155e-06,
"loss": 1.1405104398727417,
"step": 1163
},
{
"epoch": 1.2012383900928794,
"grad_norm": 64.45087236802736,
"learning_rate": 7.500000000000001e-06,
"loss": 1.908927083015442,
"step": 1164
},
{
"epoch": 1.2022703818369453,
"grad_norm": 9.506985345062768,
"learning_rate": 7.494798085649058e-06,
"loss": 1.78879976272583,
"step": 1165
},
{
"epoch": 1.2033023735810113,
"grad_norm": 10.559746008230174,
"learning_rate": 7.489592573310896e-06,
"loss": 1.2638949155807495,
"step": 1166
},
{
"epoch": 1.2043343653250773,
"grad_norm": 18.971862976066006,
"learning_rate": 7.484383470492886e-06,
"loss": 1.8479580879211426,
"step": 1167
},
{
"epoch": 1.2053663570691435,
"grad_norm": 10.968772501481508,
"learning_rate": 7.479170784707574e-06,
"loss": 1.2037568092346191,
"step": 1168
},
{
"epoch": 1.2063983488132095,
"grad_norm": 11.135611363827667,
"learning_rate": 7.473954523472673e-06,
"loss": 2.127514600753784,
"step": 1169
},
{
"epoch": 1.2074303405572755,
"grad_norm": 21.45101625402494,
"learning_rate": 7.468734694311051e-06,
"loss": 1.5933599472045898,
"step": 1170
},
{
"epoch": 1.2084623323013415,
"grad_norm": 12.58067929424832,
"learning_rate": 7.463511304750724e-06,
"loss": 1.6531916856765747,
"step": 1171
},
{
"epoch": 1.2094943240454077,
"grad_norm": 14.649666157234222,
"learning_rate": 7.458284362324844e-06,
"loss": 1.17509126663208,
"step": 1172
},
{
"epoch": 1.2105263157894737,
"grad_norm": 10.039116690840553,
"learning_rate": 7.453053874571683e-06,
"loss": 1.4780304431915283,
"step": 1173
},
{
"epoch": 1.2115583075335397,
"grad_norm": 8.816563367174497,
"learning_rate": 7.44781984903463e-06,
"loss": 1.7202036380767822,
"step": 1174
},
{
"epoch": 1.2125902992776059,
"grad_norm": 18.902257219854263,
"learning_rate": 7.442582293262174e-06,
"loss": 1.120159387588501,
"step": 1175
},
{
"epoch": 1.2136222910216719,
"grad_norm": 16.571562799351348,
"learning_rate": 7.437341214807895e-06,
"loss": 1.8021618127822876,
"step": 1176
},
{
"epoch": 1.2146542827657378,
"grad_norm": 10.904647120919584,
"learning_rate": 7.432096621230455e-06,
"loss": 1.6198031902313232,
"step": 1177
},
{
"epoch": 1.215686274509804,
"grad_norm": 21.795007344934522,
"learning_rate": 7.426848520093585e-06,
"loss": 1.4299342632293701,
"step": 1178
},
{
"epoch": 1.21671826625387,
"grad_norm": 11.04639195785765,
"learning_rate": 7.421596918966072e-06,
"loss": 1.6385784149169922,
"step": 1179
},
{
"epoch": 1.217750257997936,
"grad_norm": 10.479625516242656,
"learning_rate": 7.416341825421755e-06,
"loss": 1.2281197309494019,
"step": 1180
},
{
"epoch": 1.218782249742002,
"grad_norm": 12.249613732754618,
"learning_rate": 7.411083247039506e-06,
"loss": 1.3680484294891357,
"step": 1181
},
{
"epoch": 1.2198142414860682,
"grad_norm": 16.349200785730165,
"learning_rate": 7.4058211914032264e-06,
"loss": 1.4355851411819458,
"step": 1182
},
{
"epoch": 1.2208462332301342,
"grad_norm": 13.102412298483305,
"learning_rate": 7.400555666101829e-06,
"loss": 1.8079159259796143,
"step": 1183
},
{
"epoch": 1.2218782249742002,
"grad_norm": 13.586305957723505,
"learning_rate": 7.395286678729232e-06,
"loss": 1.2523359060287476,
"step": 1184
},
{
"epoch": 1.2229102167182662,
"grad_norm": 8.662082403874702,
"learning_rate": 7.390014236884349e-06,
"loss": 1.3039319515228271,
"step": 1185
},
{
"epoch": 1.2239422084623324,
"grad_norm": 8.711755492904405,
"learning_rate": 7.384738348171069e-06,
"loss": 1.032116413116455,
"step": 1186
},
{
"epoch": 1.2249742002063984,
"grad_norm": 14.906728364069968,
"learning_rate": 7.379459020198261e-06,
"loss": 1.8307032585144043,
"step": 1187
},
{
"epoch": 1.2260061919504643,
"grad_norm": 15.7359508426536,
"learning_rate": 7.374176260579746e-06,
"loss": 1.6073552370071411,
"step": 1188
},
{
"epoch": 1.2270381836945305,
"grad_norm": 26.584089550977485,
"learning_rate": 7.368890076934298e-06,
"loss": 1.3526015281677246,
"step": 1189
},
{
"epoch": 1.2280701754385965,
"grad_norm": 13.134590698432364,
"learning_rate": 7.36360047688563e-06,
"loss": 2.4070539474487305,
"step": 1190
},
{
"epoch": 1.2291021671826625,
"grad_norm": 51.80448653646729,
"learning_rate": 7.35830746806238e-06,
"loss": 2.3934192657470703,
"step": 1191
},
{
"epoch": 1.2301341589267285,
"grad_norm": 18.212263393779182,
"learning_rate": 7.353011058098104e-06,
"loss": 1.1821274757385254,
"step": 1192
},
{
"epoch": 1.2311661506707947,
"grad_norm": 18.405227075703046,
"learning_rate": 7.34771125463126e-06,
"loss": 1.1790523529052734,
"step": 1193
},
{
"epoch": 1.2321981424148607,
"grad_norm": 19.446981285426606,
"learning_rate": 7.342408065305202e-06,
"loss": 2.165193557739258,
"step": 1194
},
{
"epoch": 1.2332301341589267,
"grad_norm": 24.59132404411911,
"learning_rate": 7.3371014977681685e-06,
"loss": 1.4371821880340576,
"step": 1195
},
{
"epoch": 1.2342621259029927,
"grad_norm": 15.429073098790294,
"learning_rate": 7.33179155967327e-06,
"loss": 1.5296125411987305,
"step": 1196
},
{
"epoch": 1.2352941176470589,
"grad_norm": 13.347553476226281,
"learning_rate": 7.326478258678474e-06,
"loss": 1.9588744640350342,
"step": 1197
},
{
"epoch": 1.2363261093911249,
"grad_norm": 14.260271910609688,
"learning_rate": 7.321161602446601e-06,
"loss": 1.2351356744766235,
"step": 1198
},
{
"epoch": 1.2373581011351908,
"grad_norm": 7.802396516831153,
"learning_rate": 7.315841598645313e-06,
"loss": 1.22335946559906,
"step": 1199
},
{
"epoch": 1.238390092879257,
"grad_norm": 14.03192012952255,
"learning_rate": 7.310518254947092e-06,
"loss": 1.795140027999878,
"step": 1200
},
{
"epoch": 1.239422084623323,
"grad_norm": 12.82609528076698,
"learning_rate": 7.305191579029246e-06,
"loss": 1.3026559352874756,
"step": 1201
},
{
"epoch": 1.240454076367389,
"grad_norm": 8.175138039549628,
"learning_rate": 7.299861578573881e-06,
"loss": 1.3581645488739014,
"step": 1202
},
{
"epoch": 1.2414860681114552,
"grad_norm": 19.10804827967584,
"learning_rate": 7.294528261267905e-06,
"loss": 1.415238618850708,
"step": 1203
},
{
"epoch": 1.2425180598555212,
"grad_norm": 16.335381882389054,
"learning_rate": 7.289191634803002e-06,
"loss": 1.8068633079528809,
"step": 1204
},
{
"epoch": 1.2435500515995872,
"grad_norm": 13.151633240669845,
"learning_rate": 7.283851706875633e-06,
"loss": 2.057955741882324,
"step": 1205
},
{
"epoch": 1.2445820433436532,
"grad_norm": 8.937106130577334,
"learning_rate": 7.278508485187022e-06,
"loss": 1.2385305166244507,
"step": 1206
},
{
"epoch": 1.2456140350877192,
"grad_norm": 16.887133387666424,
"learning_rate": 7.273161977443137e-06,
"loss": 1.0469703674316406,
"step": 1207
},
{
"epoch": 1.2466460268317854,
"grad_norm": 16.606881298783872,
"learning_rate": 7.267812191354691e-06,
"loss": 1.2680339813232422,
"step": 1208
},
{
"epoch": 1.2476780185758514,
"grad_norm": 9.311173055553429,
"learning_rate": 7.262459134637122e-06,
"loss": 1.4323675632476807,
"step": 1209
},
{
"epoch": 1.2487100103199174,
"grad_norm": 12.258511806079309,
"learning_rate": 7.257102815010585e-06,
"loss": 1.5210167169570923,
"step": 1210
},
{
"epoch": 1.2497420020639836,
"grad_norm": 9.846658124763904,
"learning_rate": 7.251743240199944e-06,
"loss": 1.3966602087020874,
"step": 1211
},
{
"epoch": 1.2507739938080495,
"grad_norm": 10.650172801865791,
"learning_rate": 7.246380417934752e-06,
"loss": 1.2398332357406616,
"step": 1212
},
{
"epoch": 1.2518059855521155,
"grad_norm": 17.175116898809183,
"learning_rate": 7.24101435594925e-06,
"loss": 2.0594077110290527,
"step": 1213
},
{
"epoch": 1.2528379772961817,
"grad_norm": 11.462008515081358,
"learning_rate": 7.2356450619823495e-06,
"loss": 1.7122313976287842,
"step": 1214
},
{
"epoch": 1.2538699690402477,
"grad_norm": 8.87949827881755,
"learning_rate": 7.230272543777625e-06,
"loss": 1.1924419403076172,
"step": 1215
},
{
"epoch": 1.2549019607843137,
"grad_norm": 9.21318932815073,
"learning_rate": 7.224896809083297e-06,
"loss": 1.6619291305541992,
"step": 1216
},
{
"epoch": 1.2559339525283797,
"grad_norm": 7.207008807843295,
"learning_rate": 7.219517865652228e-06,
"loss": 1.6214816570281982,
"step": 1217
},
{
"epoch": 1.256965944272446,
"grad_norm": 13.735589808619933,
"learning_rate": 7.214135721241908e-06,
"loss": 1.911083459854126,
"step": 1218
},
{
"epoch": 1.2579979360165119,
"grad_norm": 12.344700121567975,
"learning_rate": 7.208750383614442e-06,
"loss": 1.6782649755477905,
"step": 1219
},
{
"epoch": 1.2590299277605779,
"grad_norm": 9.30907810758885,
"learning_rate": 7.203361860536544e-06,
"loss": 1.339187502861023,
"step": 1220
},
{
"epoch": 1.2600619195046439,
"grad_norm": 8.751624839086462,
"learning_rate": 7.1979701597795145e-06,
"loss": 1.91410493850708,
"step": 1221
},
{
"epoch": 1.26109391124871,
"grad_norm": 9.909673616548863,
"learning_rate": 7.192575289119246e-06,
"loss": 1.1134378910064697,
"step": 1222
},
{
"epoch": 1.262125902992776,
"grad_norm": 16.497933136012385,
"learning_rate": 7.187177256336194e-06,
"loss": 1.2252280712127686,
"step": 1223
},
{
"epoch": 1.263157894736842,
"grad_norm": 20.610993611451047,
"learning_rate": 7.181776069215382e-06,
"loss": 0.9598990678787231,
"step": 1224
},
{
"epoch": 1.2641898864809082,
"grad_norm": 13.830348435110258,
"learning_rate": 7.176371735546377e-06,
"loss": 1.5006102323532104,
"step": 1225
},
{
"epoch": 1.2652218782249742,
"grad_norm": 13.15337113477755,
"learning_rate": 7.170964263123286e-06,
"loss": 0.9433538913726807,
"step": 1226
},
{
"epoch": 1.2662538699690402,
"grad_norm": 25.898742835089134,
"learning_rate": 7.165553659744744e-06,
"loss": 1.5279262065887451,
"step": 1227
},
{
"epoch": 1.2672858617131064,
"grad_norm": 24.6601728692259,
"learning_rate": 7.160139933213899e-06,
"loss": 0.6092150807380676,
"step": 1228
},
{
"epoch": 1.2683178534571724,
"grad_norm": 11.514310521737785,
"learning_rate": 7.154723091338404e-06,
"loss": 1.605895757675171,
"step": 1229
},
{
"epoch": 1.2693498452012384,
"grad_norm": 12.060484911728022,
"learning_rate": 7.1493031419304095e-06,
"loss": 1.384972333908081,
"step": 1230
},
{
"epoch": 1.2703818369453044,
"grad_norm": 16.430377347078384,
"learning_rate": 7.1438800928065385e-06,
"loss": 1.4129787683486938,
"step": 1231
},
{
"epoch": 1.2714138286893704,
"grad_norm": 11.185735193254,
"learning_rate": 7.138453951787894e-06,
"loss": 1.020602822303772,
"step": 1232
},
{
"epoch": 1.2724458204334366,
"grad_norm": 14.645847466215377,
"learning_rate": 7.133024726700027e-06,
"loss": 1.4097436666488647,
"step": 1233
},
{
"epoch": 1.2734778121775026,
"grad_norm": 10.044328600175684,
"learning_rate": 7.12759242537295e-06,
"loss": 1.802374243736267,
"step": 1234
},
{
"epoch": 1.2745098039215685,
"grad_norm": 17.231549286448168,
"learning_rate": 7.1221570556411005e-06,
"loss": 1.298069953918457,
"step": 1235
},
{
"epoch": 1.2755417956656347,
"grad_norm": 10.40438584029231,
"learning_rate": 7.1167186253433474e-06,
"loss": 1.9908943176269531,
"step": 1236
},
{
"epoch": 1.2765737874097007,
"grad_norm": 10.094942538512901,
"learning_rate": 7.111277142322971e-06,
"loss": 1.542879581451416,
"step": 1237
},
{
"epoch": 1.2776057791537667,
"grad_norm": 84.36926538241218,
"learning_rate": 7.105832614427656e-06,
"loss": 1.3311142921447754,
"step": 1238
},
{
"epoch": 1.278637770897833,
"grad_norm": 16.046071866527804,
"learning_rate": 7.100385049509477e-06,
"loss": 1.7607979774475098,
"step": 1239
},
{
"epoch": 1.279669762641899,
"grad_norm": 10.83846781923933,
"learning_rate": 7.094934455424889e-06,
"loss": 1.2249962091445923,
"step": 1240
},
{
"epoch": 1.280701754385965,
"grad_norm": 15.256033188599012,
"learning_rate": 7.089480840034715e-06,
"loss": 1.9785404205322266,
"step": 1241
},
{
"epoch": 1.2817337461300309,
"grad_norm": 7.75804045811503,
"learning_rate": 7.084024211204136e-06,
"loss": 1.1947821378707886,
"step": 1242
},
{
"epoch": 1.282765737874097,
"grad_norm": 17.308231386990283,
"learning_rate": 7.07856457680268e-06,
"loss": 1.1516659259796143,
"step": 1243
},
{
"epoch": 1.283797729618163,
"grad_norm": 13.440135470304948,
"learning_rate": 7.073101944704209e-06,
"loss": 1.3261773586273193,
"step": 1244
},
{
"epoch": 1.284829721362229,
"grad_norm": 10.582432798186264,
"learning_rate": 7.067636322786906e-06,
"loss": 1.8862181901931763,
"step": 1245
},
{
"epoch": 1.285861713106295,
"grad_norm": 9.287526448936454,
"learning_rate": 7.06216771893327e-06,
"loss": 1.8220220804214478,
"step": 1246
},
{
"epoch": 1.2868937048503613,
"grad_norm": 11.547994179745336,
"learning_rate": 7.056696141030095e-06,
"loss": 0.8939683437347412,
"step": 1247
},
{
"epoch": 1.2879256965944272,
"grad_norm": 10.68722215184878,
"learning_rate": 7.051221596968471e-06,
"loss": 1.5759246349334717,
"step": 1248
},
{
"epoch": 1.2889576883384932,
"grad_norm": 7.647385646118011,
"learning_rate": 7.0457440946437605e-06,
"loss": 2.126826763153076,
"step": 1249
},
{
"epoch": 1.2899896800825594,
"grad_norm": 9.860181991689545,
"learning_rate": 7.040263641955594e-06,
"loss": 1.167819857597351,
"step": 1250
},
{
"epoch": 1.2910216718266254,
"grad_norm": 10.300885899118448,
"learning_rate": 7.034780246807857e-06,
"loss": 0.8613216280937195,
"step": 1251
},
{
"epoch": 1.2920536635706914,
"grad_norm": 14.731011070999733,
"learning_rate": 7.029293917108678e-06,
"loss": 1.7457356452941895,
"step": 1252
},
{
"epoch": 1.2930856553147576,
"grad_norm": 16.05965173219095,
"learning_rate": 7.023804660770422e-06,
"loss": 2.087299346923828,
"step": 1253
},
{
"epoch": 1.2941176470588236,
"grad_norm": 19.98491427880135,
"learning_rate": 7.0183124857096676e-06,
"loss": 1.3274412155151367,
"step": 1254
},
{
"epoch": 1.2951496388028896,
"grad_norm": 8.480002780443607,
"learning_rate": 7.012817399847207e-06,
"loss": 1.2816498279571533,
"step": 1255
},
{
"epoch": 1.2961816305469556,
"grad_norm": 10.022827971317682,
"learning_rate": 7.0073194111080315e-06,
"loss": 1.2162542343139648,
"step": 1256
},
{
"epoch": 1.2972136222910216,
"grad_norm": 6.871625539305346,
"learning_rate": 7.001818527421314e-06,
"loss": 0.9680448770523071,
"step": 1257
},
{
"epoch": 1.2982456140350878,
"grad_norm": 7.317032162601178,
"learning_rate": 6.996314756720409e-06,
"loss": 1.8782672882080078,
"step": 1258
},
{
"epoch": 1.2992776057791537,
"grad_norm": 11.160755976506538,
"learning_rate": 6.9908081069428294e-06,
"loss": 1.9667476415634155,
"step": 1259
},
{
"epoch": 1.3003095975232197,
"grad_norm": 8.456022150247852,
"learning_rate": 6.985298586030241e-06,
"loss": 1.8856534957885742,
"step": 1260
},
{
"epoch": 1.301341589267286,
"grad_norm": 8.939573953300108,
"learning_rate": 6.979786201928455e-06,
"loss": 1.4806193113327026,
"step": 1261
},
{
"epoch": 1.302373581011352,
"grad_norm": 10.227571088009338,
"learning_rate": 6.974270962587405e-06,
"loss": 1.4667408466339111,
"step": 1262
},
{
"epoch": 1.303405572755418,
"grad_norm": 13.231021950855867,
"learning_rate": 6.968752875961149e-06,
"loss": 1.3672292232513428,
"step": 1263
},
{
"epoch": 1.3044375644994841,
"grad_norm": 17.13546379140691,
"learning_rate": 6.963231950007845e-06,
"loss": 1.7469172477722168,
"step": 1264
},
{
"epoch": 1.30546955624355,
"grad_norm": 10.826702738897808,
"learning_rate": 6.95770819268975e-06,
"loss": 1.0989598035812378,
"step": 1265
},
{
"epoch": 1.306501547987616,
"grad_norm": 13.088464704259879,
"learning_rate": 6.952181611973203e-06,
"loss": 1.5576810836791992,
"step": 1266
},
{
"epoch": 1.307533539731682,
"grad_norm": 10.604025023103842,
"learning_rate": 6.9466522158286175e-06,
"loss": 1.8453103303909302,
"step": 1267
},
{
"epoch": 1.308565531475748,
"grad_norm": 13.530398513450153,
"learning_rate": 6.941120012230464e-06,
"loss": 1.4260985851287842,
"step": 1268
},
{
"epoch": 1.3095975232198143,
"grad_norm": 18.227518930179752,
"learning_rate": 6.93558500915726e-06,
"loss": 1.8812353610992432,
"step": 1269
},
{
"epoch": 1.3106295149638802,
"grad_norm": 16.834162609499675,
"learning_rate": 6.930047214591569e-06,
"loss": 1.3378567695617676,
"step": 1270
},
{
"epoch": 1.3116615067079462,
"grad_norm": 17.632777807359314,
"learning_rate": 6.924506636519968e-06,
"loss": 1.2886006832122803,
"step": 1271
},
{
"epoch": 1.3126934984520124,
"grad_norm": 8.590505601569463,
"learning_rate": 6.918963282933063e-06,
"loss": 1.9369480609893799,
"step": 1272
},
{
"epoch": 1.3137254901960784,
"grad_norm": 15.460248224270192,
"learning_rate": 6.913417161825449e-06,
"loss": 1.6706684827804565,
"step": 1273
},
{
"epoch": 1.3147574819401444,
"grad_norm": 16.46819164452491,
"learning_rate": 6.907868281195722e-06,
"loss": 1.8951505422592163,
"step": 1274
},
{
"epoch": 1.3157894736842106,
"grad_norm": 16.231207232177706,
"learning_rate": 6.902316649046452e-06,
"loss": 1.964847207069397,
"step": 1275
},
{
"epoch": 1.3168214654282766,
"grad_norm": 13.918532385058198,
"learning_rate": 6.896762273384179e-06,
"loss": 1.9368600845336914,
"step": 1276
},
{
"epoch": 1.3178534571723426,
"grad_norm": 14.031469949597836,
"learning_rate": 6.891205162219402e-06,
"loss": 1.7412488460540771,
"step": 1277
},
{
"epoch": 1.3188854489164088,
"grad_norm": 11.144610201408431,
"learning_rate": 6.885645323566561e-06,
"loss": 1.5681676864624023,
"step": 1278
},
{
"epoch": 1.3199174406604748,
"grad_norm": 11.79257157269013,
"learning_rate": 6.880082765444034e-06,
"loss": 1.7658888101577759,
"step": 1279
},
{
"epoch": 1.3209494324045408,
"grad_norm": 13.902619176662753,
"learning_rate": 6.8745174958741164e-06,
"loss": 1.7245615720748901,
"step": 1280
},
{
"epoch": 1.3219814241486068,
"grad_norm": 18.76369717340051,
"learning_rate": 6.868949522883017e-06,
"loss": 1.6578267812728882,
"step": 1281
},
{
"epoch": 1.3230134158926727,
"grad_norm": 8.996167250954647,
"learning_rate": 6.863378854500846e-06,
"loss": 1.5728305578231812,
"step": 1282
},
{
"epoch": 1.324045407636739,
"grad_norm": 9.57359831014415,
"learning_rate": 6.857805498761593e-06,
"loss": 1.375089168548584,
"step": 1283
},
{
"epoch": 1.325077399380805,
"grad_norm": 9.066334633906381,
"learning_rate": 6.852229463703131e-06,
"loss": 1.8140184879302979,
"step": 1284
},
{
"epoch": 1.326109391124871,
"grad_norm": 9.652084209720877,
"learning_rate": 6.846650757367192e-06,
"loss": 1.1467084884643555,
"step": 1285
},
{
"epoch": 1.3271413828689371,
"grad_norm": 12.930430979530293,
"learning_rate": 6.841069387799364e-06,
"loss": 1.2440385818481445,
"step": 1286
},
{
"epoch": 1.328173374613003,
"grad_norm": 9.645014089750466,
"learning_rate": 6.835485363049075e-06,
"loss": 1.2007393836975098,
"step": 1287
},
{
"epoch": 1.329205366357069,
"grad_norm": 10.864773450057065,
"learning_rate": 6.829898691169581e-06,
"loss": 1.7485893964767456,
"step": 1288
},
{
"epoch": 1.3302373581011353,
"grad_norm": 7.027777848617237,
"learning_rate": 6.8243093802179574e-06,
"loss": 1.6619250774383545,
"step": 1289
},
{
"epoch": 1.3312693498452013,
"grad_norm": 14.918429364794484,
"learning_rate": 6.81871743825508e-06,
"loss": 1.067305088043213,
"step": 1290
},
{
"epoch": 1.3323013415892673,
"grad_norm": 7.774349136796258,
"learning_rate": 6.813122873345632e-06,
"loss": 1.8407032489776611,
"step": 1291
},
{
"epoch": 1.3333333333333333,
"grad_norm": 14.681779739650855,
"learning_rate": 6.8075256935580655e-06,
"loss": 2.130232334136963,
"step": 1292
},
{
"epoch": 1.3343653250773992,
"grad_norm": 9.52858502084645,
"learning_rate": 6.80192590696461e-06,
"loss": 1.317871332168579,
"step": 1293
},
{
"epoch": 1.3353973168214655,
"grad_norm": 15.393341342952034,
"learning_rate": 6.796323521641257e-06,
"loss": 0.9826754331588745,
"step": 1294
},
{
"epoch": 1.3364293085655314,
"grad_norm": 12.804696585185201,
"learning_rate": 6.790718545667738e-06,
"loss": 1.7907086610794067,
"step": 1295
},
{
"epoch": 1.3374613003095974,
"grad_norm": 11.551243689301163,
"learning_rate": 6.78511098712753e-06,
"loss": 1.8355216979980469,
"step": 1296
},
{
"epoch": 1.3384932920536636,
"grad_norm": 12.067399492809944,
"learning_rate": 6.779500854107828e-06,
"loss": 0.6331468820571899,
"step": 1297
},
{
"epoch": 1.3395252837977296,
"grad_norm": 13.245167552074978,
"learning_rate": 6.773888154699543e-06,
"loss": 1.709820032119751,
"step": 1298
},
{
"epoch": 1.3405572755417956,
"grad_norm": 7.370036463533888,
"learning_rate": 6.768272896997285e-06,
"loss": 1.5491633415222168,
"step": 1299
},
{
"epoch": 1.3415892672858618,
"grad_norm": 23.00598116208232,
"learning_rate": 6.762655089099353e-06,
"loss": 2.1439013481140137,
"step": 1300
},
{
"epoch": 1.3426212590299278,
"grad_norm": 12.393604989350957,
"learning_rate": 6.757034739107732e-06,
"loss": 1.1984869241714478,
"step": 1301
},
{
"epoch": 1.3436532507739938,
"grad_norm": 7.950299634053667,
"learning_rate": 6.751411855128062e-06,
"loss": 1.8228305578231812,
"step": 1302
},
{
"epoch": 1.34468524251806,
"grad_norm": 8.48250505168335,
"learning_rate": 6.745786445269644e-06,
"loss": 1.5694873332977295,
"step": 1303
},
{
"epoch": 1.345717234262126,
"grad_norm": 25.47218353712853,
"learning_rate": 6.740158517645418e-06,
"loss": 1.922268271446228,
"step": 1304
},
{
"epoch": 1.346749226006192,
"grad_norm": 9.26806273282689,
"learning_rate": 6.734528080371962e-06,
"loss": 1.4681077003479004,
"step": 1305
},
{
"epoch": 1.347781217750258,
"grad_norm": 8.276957138807358,
"learning_rate": 6.728895141569464e-06,
"loss": 1.7013589143753052,
"step": 1306
},
{
"epoch": 1.348813209494324,
"grad_norm": 10.28492210292928,
"learning_rate": 6.723259709361726e-06,
"loss": 1.5537389516830444,
"step": 1307
},
{
"epoch": 1.3498452012383901,
"grad_norm": 14.359998071626563,
"learning_rate": 6.717621791876147e-06,
"loss": 2.2924580574035645,
"step": 1308
},
{
"epoch": 1.3508771929824561,
"grad_norm": 20.262716426358573,
"learning_rate": 6.711981397243703e-06,
"loss": 1.4611811637878418,
"step": 1309
},
{
"epoch": 1.351909184726522,
"grad_norm": 18.62862713951369,
"learning_rate": 6.706338533598951e-06,
"loss": 1.7338206768035889,
"step": 1310
},
{
"epoch": 1.3529411764705883,
"grad_norm": 8.391919709124968,
"learning_rate": 6.700693209080003e-06,
"loss": 1.2168724536895752,
"step": 1311
},
{
"epoch": 1.3539731682146543,
"grad_norm": 19.2929608725374,
"learning_rate": 6.695045431828524e-06,
"loss": 1.4697058200836182,
"step": 1312
},
{
"epoch": 1.3550051599587203,
"grad_norm": 25.873752000752564,
"learning_rate": 6.689395209989713e-06,
"loss": 1.6276439428329468,
"step": 1313
},
{
"epoch": 1.3560371517027865,
"grad_norm": 8.689028425958979,
"learning_rate": 6.6837425517122945e-06,
"loss": 1.2771011590957642,
"step": 1314
},
{
"epoch": 1.3570691434468525,
"grad_norm": 12.809736064633563,
"learning_rate": 6.678087465148511e-06,
"loss": 1.3017215728759766,
"step": 1315
},
{
"epoch": 1.3581011351909185,
"grad_norm": 15.79317141632538,
"learning_rate": 6.672429958454103e-06,
"loss": 1.9034658670425415,
"step": 1316
},
{
"epoch": 1.3591331269349844,
"grad_norm": 9.924700399484639,
"learning_rate": 6.666770039788305e-06,
"loss": 1.3009569644927979,
"step": 1317
},
{
"epoch": 1.3601651186790504,
"grad_norm": 12.264158048711824,
"learning_rate": 6.661107717313824e-06,
"loss": 1.3613559007644653,
"step": 1318
},
{
"epoch": 1.3611971104231166,
"grad_norm": 12.995848633185838,
"learning_rate": 6.655442999196838e-06,
"loss": 0.8472391366958618,
"step": 1319
},
{
"epoch": 1.3622291021671826,
"grad_norm": 12.715673473779065,
"learning_rate": 6.649775893606982e-06,
"loss": 1.333915114402771,
"step": 1320
},
{
"epoch": 1.3632610939112486,
"grad_norm": 12.237472217177828,
"learning_rate": 6.64410640871733e-06,
"loss": 1.5714805126190186,
"step": 1321
},
{
"epoch": 1.3642930856553148,
"grad_norm": 14.454550700639611,
"learning_rate": 6.638434552704389e-06,
"loss": 2.2822306156158447,
"step": 1322
},
{
"epoch": 1.3653250773993808,
"grad_norm": 26.144904229457918,
"learning_rate": 6.632760333748086e-06,
"loss": 1.3987756967544556,
"step": 1323
},
{
"epoch": 1.3663570691434468,
"grad_norm": 11.746650435747524,
"learning_rate": 6.627083760031755e-06,
"loss": 1.3919203281402588,
"step": 1324
},
{
"epoch": 1.367389060887513,
"grad_norm": 8.768967894320843,
"learning_rate": 6.621404839742127e-06,
"loss": 1.8376681804656982,
"step": 1325
},
{
"epoch": 1.368421052631579,
"grad_norm": 7.280635538208758,
"learning_rate": 6.615723581069318e-06,
"loss": 1.408341884613037,
"step": 1326
},
{
"epoch": 1.369453044375645,
"grad_norm": 12.407087366787891,
"learning_rate": 6.610039992206814e-06,
"loss": 1.5806256532669067,
"step": 1327
},
{
"epoch": 1.3704850361197112,
"grad_norm": 11.943643963848155,
"learning_rate": 6.604354081351461e-06,
"loss": 2.1980557441711426,
"step": 1328
},
{
"epoch": 1.3715170278637772,
"grad_norm": 9.940910005359118,
"learning_rate": 6.5986658567034565e-06,
"loss": 1.2753872871398926,
"step": 1329
},
{
"epoch": 1.3725490196078431,
"grad_norm": 7.560652373165568,
"learning_rate": 6.592975326466336e-06,
"loss": 1.95408296585083,
"step": 1330
},
{
"epoch": 1.3735810113519091,
"grad_norm": 11.601700149690876,
"learning_rate": 6.587282498846956e-06,
"loss": 1.5149401426315308,
"step": 1331
},
{
"epoch": 1.3746130030959751,
"grad_norm": 12.092137663163145,
"learning_rate": 6.5815873820554925e-06,
"loss": 1.8140610456466675,
"step": 1332
},
{
"epoch": 1.3756449948400413,
"grad_norm": 8.401132139383938,
"learning_rate": 6.575889984305411e-06,
"loss": 1.916958212852478,
"step": 1333
},
{
"epoch": 1.3766769865841073,
"grad_norm": 9.68323274094978,
"learning_rate": 6.57019031381348e-06,
"loss": 1.9963104724884033,
"step": 1334
},
{
"epoch": 1.3777089783281733,
"grad_norm": 11.431708385978807,
"learning_rate": 6.564488378799738e-06,
"loss": 1.2590843439102173,
"step": 1335
},
{
"epoch": 1.3787409700722395,
"grad_norm": 8.571951982688004,
"learning_rate": 6.558784187487495e-06,
"loss": 1.394248604774475,
"step": 1336
},
{
"epoch": 1.3797729618163055,
"grad_norm": 10.492254285636673,
"learning_rate": 6.553077748103307e-06,
"loss": 1.6234259605407715,
"step": 1337
},
{
"epoch": 1.3808049535603715,
"grad_norm": 14.586388788385152,
"learning_rate": 6.5473690688769775e-06,
"loss": 1.6336512565612793,
"step": 1338
},
{
"epoch": 1.3818369453044377,
"grad_norm": 7.877592238801531,
"learning_rate": 6.5416581580415415e-06,
"loss": 2.567668914794922,
"step": 1339
},
{
"epoch": 1.3828689370485037,
"grad_norm": 11.874862181391041,
"learning_rate": 6.535945023833249e-06,
"loss": 1.7893708944320679,
"step": 1340
},
{
"epoch": 1.3839009287925697,
"grad_norm": 10.04975526985597,
"learning_rate": 6.530229674491559e-06,
"loss": 1.4658175706863403,
"step": 1341
},
{
"epoch": 1.3849329205366356,
"grad_norm": 33.409645572782985,
"learning_rate": 6.524512118259122e-06,
"loss": 3.506965398788452,
"step": 1342
},
{
"epoch": 1.3859649122807016,
"grad_norm": 10.778179108329482,
"learning_rate": 6.518792363381776e-06,
"loss": 1.2108711004257202,
"step": 1343
},
{
"epoch": 1.3869969040247678,
"grad_norm": 9.572063239204743,
"learning_rate": 6.513070418108525e-06,
"loss": 1.6470930576324463,
"step": 1344
},
{
"epoch": 1.3880288957688338,
"grad_norm": 12.130274106203869,
"learning_rate": 6.507346290691534e-06,
"loss": 1.4623892307281494,
"step": 1345
},
{
"epoch": 1.3890608875128998,
"grad_norm": 13.41976404770925,
"learning_rate": 6.501619989386118e-06,
"loss": 1.5492268800735474,
"step": 1346
},
{
"epoch": 1.390092879256966,
"grad_norm": 16.948274862432797,
"learning_rate": 6.4958915224507235e-06,
"loss": 1.1457581520080566,
"step": 1347
},
{
"epoch": 1.391124871001032,
"grad_norm": 10.519090027770496,
"learning_rate": 6.490160898146919e-06,
"loss": 1.7141773700714111,
"step": 1348
},
{
"epoch": 1.392156862745098,
"grad_norm": 9.328092490719959,
"learning_rate": 6.484428124739388e-06,
"loss": 2.0902035236358643,
"step": 1349
},
{
"epoch": 1.3931888544891642,
"grad_norm": 17.461370440544524,
"learning_rate": 6.478693210495913e-06,
"loss": 3.634981632232666,
"step": 1350
},
{
"epoch": 1.3942208462332302,
"grad_norm": 9.592360182696881,
"learning_rate": 6.472956163687363e-06,
"loss": 1.582890510559082,
"step": 1351
},
{
"epoch": 1.3952528379772962,
"grad_norm": 19.520153952418713,
"learning_rate": 6.467216992587679e-06,
"loss": 1.5116333961486816,
"step": 1352
},
{
"epoch": 1.3962848297213624,
"grad_norm": 15.309150288640616,
"learning_rate": 6.4614757054738744e-06,
"loss": 1.3862173557281494,
"step": 1353
},
{
"epoch": 1.3973168214654283,
"grad_norm": 12.534277109856836,
"learning_rate": 6.455732310626005e-06,
"loss": 1.5213537216186523,
"step": 1354
},
{
"epoch": 1.3983488132094943,
"grad_norm": 7.623388240826514,
"learning_rate": 6.449986816327173e-06,
"loss": 1.5704606771469116,
"step": 1355
},
{
"epoch": 1.3993808049535603,
"grad_norm": 13.96068897335519,
"learning_rate": 6.444239230863505e-06,
"loss": 1.6977448463439941,
"step": 1356
},
{
"epoch": 1.4004127966976263,
"grad_norm": 7.7978324340101395,
"learning_rate": 6.438489562524143e-06,
"loss": 0.5249931812286377,
"step": 1357
},
{
"epoch": 1.4014447884416925,
"grad_norm": 12.885842848452445,
"learning_rate": 6.432737819601236e-06,
"loss": 1.6976923942565918,
"step": 1358
},
{
"epoch": 1.4024767801857585,
"grad_norm": 8.879805450767204,
"learning_rate": 6.426984010389924e-06,
"loss": 1.4502618312835693,
"step": 1359
},
{
"epoch": 1.4035087719298245,
"grad_norm": 14.587093528930188,
"learning_rate": 6.421228143188325e-06,
"loss": 1.5353691577911377,
"step": 1360
},
{
"epoch": 1.4045407636738907,
"grad_norm": 19.111908518818357,
"learning_rate": 6.4154702262975254e-06,
"loss": 1.5783036947250366,
"step": 1361
},
{
"epoch": 1.4055727554179567,
"grad_norm": 18.35949942738296,
"learning_rate": 6.40971026802157e-06,
"loss": 1.5345011949539185,
"step": 1362
},
{
"epoch": 1.4066047471620227,
"grad_norm": 13.506971539689111,
"learning_rate": 6.403948276667446e-06,
"loss": 1.7035109996795654,
"step": 1363
},
{
"epoch": 1.4076367389060889,
"grad_norm": 16.84134628365059,
"learning_rate": 6.398184260545072e-06,
"loss": 1.957930564880371,
"step": 1364
},
{
"epoch": 1.4086687306501549,
"grad_norm": 12.614397611258333,
"learning_rate": 6.39241822796729e-06,
"loss": 1.7606230974197388,
"step": 1365
},
{
"epoch": 1.4097007223942208,
"grad_norm": 12.167810963337313,
"learning_rate": 6.386650187249843e-06,
"loss": 1.1950409412384033,
"step": 1366
},
{
"epoch": 1.4107327141382868,
"grad_norm": 18.497511480405805,
"learning_rate": 6.380880146711379e-06,
"loss": 1.470259428024292,
"step": 1367
},
{
"epoch": 1.4117647058823528,
"grad_norm": 12.68092565602118,
"learning_rate": 6.375108114673425e-06,
"loss": 2.590393304824829,
"step": 1368
},
{
"epoch": 1.412796697626419,
"grad_norm": 14.371407712891207,
"learning_rate": 6.369334099460382e-06,
"loss": 1.1304482221603394,
"step": 1369
},
{
"epoch": 1.413828689370485,
"grad_norm": 12.495927048359073,
"learning_rate": 6.363558109399508e-06,
"loss": 1.537990689277649,
"step": 1370
},
{
"epoch": 1.414860681114551,
"grad_norm": 12.65546028712542,
"learning_rate": 6.3577801528209125e-06,
"loss": 1.5674386024475098,
"step": 1371
},
{
"epoch": 1.4158926728586172,
"grad_norm": 10.941640579885966,
"learning_rate": 6.3520002380575395e-06,
"loss": 1.2706232070922852,
"step": 1372
},
{
"epoch": 1.4169246646026832,
"grad_norm": 13.571585050567363,
"learning_rate": 6.346218373445159e-06,
"loss": 1.5578970909118652,
"step": 1373
},
{
"epoch": 1.4179566563467492,
"grad_norm": 8.250524965469019,
"learning_rate": 6.340434567322351e-06,
"loss": 1.6340320110321045,
"step": 1374
},
{
"epoch": 1.4189886480908154,
"grad_norm": 10.160982961416137,
"learning_rate": 6.334648828030494e-06,
"loss": 1.663146734237671,
"step": 1375
},
{
"epoch": 1.4200206398348814,
"grad_norm": 13.750329742860425,
"learning_rate": 6.32886116391376e-06,
"loss": 1.119152545928955,
"step": 1376
},
{
"epoch": 1.4210526315789473,
"grad_norm": 12.556590420753649,
"learning_rate": 6.323071583319091e-06,
"loss": 1.083686113357544,
"step": 1377
},
{
"epoch": 1.4220846233230136,
"grad_norm": 7.811797834471477,
"learning_rate": 6.317280094596197e-06,
"loss": 1.0705013275146484,
"step": 1378
},
{
"epoch": 1.4231166150670795,
"grad_norm": 13.31750115490766,
"learning_rate": 6.3114867060975396e-06,
"loss": 1.2521347999572754,
"step": 1379
},
{
"epoch": 1.4241486068111455,
"grad_norm": 16.62958542447736,
"learning_rate": 6.305691426178316e-06,
"loss": 1.2190382480621338,
"step": 1380
},
{
"epoch": 1.4251805985552115,
"grad_norm": 11.024512409090624,
"learning_rate": 6.299894263196456e-06,
"loss": 1.4755083322525024,
"step": 1381
},
{
"epoch": 1.4262125902992775,
"grad_norm": 27.767711605272577,
"learning_rate": 6.294095225512604e-06,
"loss": 3.2815802097320557,
"step": 1382
},
{
"epoch": 1.4272445820433437,
"grad_norm": 9.477799042765865,
"learning_rate": 6.288294321490107e-06,
"loss": 2.3116531372070312,
"step": 1383
},
{
"epoch": 1.4282765737874097,
"grad_norm": 11.990489393588334,
"learning_rate": 6.282491559495005e-06,
"loss": 1.9234528541564941,
"step": 1384
},
{
"epoch": 1.4293085655314757,
"grad_norm": 12.348925653645608,
"learning_rate": 6.276686947896015e-06,
"loss": 1.8830013275146484,
"step": 1385
},
{
"epoch": 1.4303405572755419,
"grad_norm": 8.455310727922418,
"learning_rate": 6.270880495064524e-06,
"loss": 1.5233604907989502,
"step": 1386
},
{
"epoch": 1.4313725490196079,
"grad_norm": 8.66136780771483,
"learning_rate": 6.265072209374574e-06,
"loss": 1.7544364929199219,
"step": 1387
},
{
"epoch": 1.4324045407636739,
"grad_norm": 11.686141996301,
"learning_rate": 6.259262099202849e-06,
"loss": 1.745523452758789,
"step": 1388
},
{
"epoch": 1.43343653250774,
"grad_norm": 25.909341019617443,
"learning_rate": 6.253450172928668e-06,
"loss": 1.608154296875,
"step": 1389
},
{
"epoch": 1.434468524251806,
"grad_norm": 15.986325301735999,
"learning_rate": 6.247636438933963e-06,
"loss": 1.7948957681655884,
"step": 1390
},
{
"epoch": 1.435500515995872,
"grad_norm": 11.105863635839464,
"learning_rate": 6.241820905603277e-06,
"loss": 1.2178281545639038,
"step": 1391
},
{
"epoch": 1.436532507739938,
"grad_norm": 12.800978863737068,
"learning_rate": 6.23600358132375e-06,
"loss": 1.4831297397613525,
"step": 1392
},
{
"epoch": 1.437564499484004,
"grad_norm": 30.76144400796033,
"learning_rate": 6.230184474485101e-06,
"loss": 2.632197141647339,
"step": 1393
},
{
"epoch": 1.4385964912280702,
"grad_norm": 13.142736450508734,
"learning_rate": 6.22436359347962e-06,
"loss": 2.632613182067871,
"step": 1394
},
{
"epoch": 1.4396284829721362,
"grad_norm": 11.286789151567506,
"learning_rate": 6.218540946702158e-06,
"loss": 1.2967652082443237,
"step": 1395
},
{
"epoch": 1.4406604747162022,
"grad_norm": 11.133156606466436,
"learning_rate": 6.212716542550112e-06,
"loss": 1.594740629196167,
"step": 1396
},
{
"epoch": 1.4416924664602684,
"grad_norm": 8.730555310476715,
"learning_rate": 6.206890389423412e-06,
"loss": 1.584374189376831,
"step": 1397
},
{
"epoch": 1.4427244582043344,
"grad_norm": 8.534026782889603,
"learning_rate": 6.201062495724513e-06,
"loss": 2.5659961700439453,
"step": 1398
},
{
"epoch": 1.4437564499484004,
"grad_norm": 7.288337030266494,
"learning_rate": 6.195232869858375e-06,
"loss": 1.7394036054611206,
"step": 1399
},
{
"epoch": 1.4447884416924666,
"grad_norm": 9.841768591510458,
"learning_rate": 6.189401520232464e-06,
"loss": 1.254488229751587,
"step": 1400
},
{
"epoch": 1.4458204334365325,
"grad_norm": 12.839409832556502,
"learning_rate": 6.183568455256725e-06,
"loss": 1.8513953685760498,
"step": 1401
},
{
"epoch": 1.4468524251805985,
"grad_norm": 13.276032955940094,
"learning_rate": 6.177733683343578e-06,
"loss": 1.8852769136428833,
"step": 1402
},
{
"epoch": 1.4478844169246647,
"grad_norm": 9.43865265441635,
"learning_rate": 6.171897212907912e-06,
"loss": 1.440374732017517,
"step": 1403
},
{
"epoch": 1.4489164086687307,
"grad_norm": 11.606811882038782,
"learning_rate": 6.166059052367055e-06,
"loss": 0.9862947463989258,
"step": 1404
},
{
"epoch": 1.4499484004127967,
"grad_norm": 14.206842551035457,
"learning_rate": 6.16021921014078e-06,
"loss": 1.9904972314834595,
"step": 1405
},
{
"epoch": 1.4509803921568627,
"grad_norm": 8.696455410766612,
"learning_rate": 6.154377694651279e-06,
"loss": 1.3866620063781738,
"step": 1406
},
{
"epoch": 1.4520123839009287,
"grad_norm": 8.596637181735094,
"learning_rate": 6.148534514323165e-06,
"loss": 1.2775863409042358,
"step": 1407
},
{
"epoch": 1.453044375644995,
"grad_norm": 10.271782117514103,
"learning_rate": 6.142689677583447e-06,
"loss": 1.40165114402771,
"step": 1408
},
{
"epoch": 1.4540763673890609,
"grad_norm": 8.693261567738002,
"learning_rate": 6.136843192861522e-06,
"loss": 1.458809494972229,
"step": 1409
},
{
"epoch": 1.4551083591331269,
"grad_norm": 13.065942705085428,
"learning_rate": 6.130995068589166e-06,
"loss": 1.12819242477417,
"step": 1410
},
{
"epoch": 1.456140350877193,
"grad_norm": 14.840424340102057,
"learning_rate": 6.125145313200519e-06,
"loss": 1.9877163171768188,
"step": 1411
},
{
"epoch": 1.457172342621259,
"grad_norm": 12.797097904552402,
"learning_rate": 6.119293935132076e-06,
"loss": 1.3951126337051392,
"step": 1412
},
{
"epoch": 1.458204334365325,
"grad_norm": 12.77015646004144,
"learning_rate": 6.113440942822666e-06,
"loss": 1.1544969081878662,
"step": 1413
},
{
"epoch": 1.4592363261093912,
"grad_norm": 9.33713645008868,
"learning_rate": 6.107586344713451e-06,
"loss": 2.1354317665100098,
"step": 1414
},
{
"epoch": 1.4602683178534572,
"grad_norm": 15.776487661672133,
"learning_rate": 6.101730149247908e-06,
"loss": 1.611715316772461,
"step": 1415
},
{
"epoch": 1.4613003095975232,
"grad_norm": 12.73566319918363,
"learning_rate": 6.095872364871818e-06,
"loss": 2.534665107727051,
"step": 1416
},
{
"epoch": 1.4623323013415892,
"grad_norm": 21.518820732330596,
"learning_rate": 6.090013000033251e-06,
"loss": 1.435193419456482,
"step": 1417
},
{
"epoch": 1.4633642930856552,
"grad_norm": 13.034743581278482,
"learning_rate": 6.084152063182559e-06,
"loss": 0.7462902665138245,
"step": 1418
},
{
"epoch": 1.4643962848297214,
"grad_norm": 12.039234728135936,
"learning_rate": 6.078289562772362e-06,
"loss": 1.4898221492767334,
"step": 1419
},
{
"epoch": 1.4654282765737874,
"grad_norm": 10.031784052622553,
"learning_rate": 6.072425507257528e-06,
"loss": 1.8172965049743652,
"step": 1420
},
{
"epoch": 1.4664602683178534,
"grad_norm": 12.802095758665487,
"learning_rate": 6.066559905095179e-06,
"loss": 2.3749866485595703,
"step": 1421
},
{
"epoch": 1.4674922600619196,
"grad_norm": 12.745749761023887,
"learning_rate": 6.060692764744657e-06,
"loss": 2.0804553031921387,
"step": 1422
},
{
"epoch": 1.4685242518059856,
"grad_norm": 12.233534810853003,
"learning_rate": 6.054824094667529e-06,
"loss": 2.026507616043091,
"step": 1423
},
{
"epoch": 1.4695562435500515,
"grad_norm": 11.046016933485669,
"learning_rate": 6.048953903327568e-06,
"loss": 1.3507273197174072,
"step": 1424
},
{
"epoch": 1.4705882352941178,
"grad_norm": 10.09541906536385,
"learning_rate": 6.043082199190735e-06,
"loss": 1.6492483615875244,
"step": 1425
},
{
"epoch": 1.4716202270381837,
"grad_norm": 10.447296179338457,
"learning_rate": 6.037208990725181e-06,
"loss": 0.9386216402053833,
"step": 1426
},
{
"epoch": 1.4726522187822497,
"grad_norm": 9.277034655962519,
"learning_rate": 6.031334286401218e-06,
"loss": 1.2079228162765503,
"step": 1427
},
{
"epoch": 1.4736842105263157,
"grad_norm": 13.768299398362204,
"learning_rate": 6.025458094691323e-06,
"loss": 1.549511432647705,
"step": 1428
},
{
"epoch": 1.474716202270382,
"grad_norm": 9.200360613784461,
"learning_rate": 6.019580424070114e-06,
"loss": 1.2142996788024902,
"step": 1429
},
{
"epoch": 1.475748194014448,
"grad_norm": 11.175925574454759,
"learning_rate": 6.0137012830143405e-06,
"loss": 1.2798269987106323,
"step": 1430
},
{
"epoch": 1.4767801857585139,
"grad_norm": 10.251935687194365,
"learning_rate": 6.007820680002878e-06,
"loss": 1.052504301071167,
"step": 1431
},
{
"epoch": 1.4778121775025799,
"grad_norm": 11.33479315396473,
"learning_rate": 6.0019386235167055e-06,
"loss": 1.3901056051254272,
"step": 1432
},
{
"epoch": 1.478844169246646,
"grad_norm": 11.445001768478525,
"learning_rate": 5.9960551220389e-06,
"loss": 1.121410846710205,
"step": 1433
},
{
"epoch": 1.479876160990712,
"grad_norm": 14.072546458014429,
"learning_rate": 5.990170184054622e-06,
"loss": 1.7075709104537964,
"step": 1434
},
{
"epoch": 1.480908152734778,
"grad_norm": 11.420431766114314,
"learning_rate": 5.984283818051104e-06,
"loss": 2.078220844268799,
"step": 1435
},
{
"epoch": 1.4819401444788443,
"grad_norm": 12.833753989702446,
"learning_rate": 5.978396032517641e-06,
"loss": 1.833093523979187,
"step": 1436
},
{
"epoch": 1.4829721362229102,
"grad_norm": 8.45273153102322,
"learning_rate": 5.972506835945569e-06,
"loss": 2.052757740020752,
"step": 1437
},
{
"epoch": 1.4840041279669762,
"grad_norm": 15.084190288166576,
"learning_rate": 5.966616236828263e-06,
"loss": 1.1162149906158447,
"step": 1438
},
{
"epoch": 1.4850361197110424,
"grad_norm": 14.058585981663885,
"learning_rate": 5.960724243661119e-06,
"loss": 1.4707602262496948,
"step": 1439
},
{
"epoch": 1.4860681114551084,
"grad_norm": 17.79529872485665,
"learning_rate": 5.9548308649415486e-06,
"loss": 1.8684618473052979,
"step": 1440
},
{
"epoch": 1.4871001031991744,
"grad_norm": 12.509522667361384,
"learning_rate": 5.948936109168954e-06,
"loss": 2.0305280685424805,
"step": 1441
},
{
"epoch": 1.4881320949432404,
"grad_norm": 15.322431562131413,
"learning_rate": 5.943039984844727e-06,
"loss": 2.028458833694458,
"step": 1442
},
{
"epoch": 1.4891640866873064,
"grad_norm": 10.558362837173703,
"learning_rate": 5.937142500472235e-06,
"loss": 1.3905258178710938,
"step": 1443
},
{
"epoch": 1.4901960784313726,
"grad_norm": 10.812034444363723,
"learning_rate": 5.931243664556803e-06,
"loss": 1.1899843215942383,
"step": 1444
},
{
"epoch": 1.4912280701754386,
"grad_norm": 9.376651735471622,
"learning_rate": 5.925343485605709e-06,
"loss": 1.3725718259811401,
"step": 1445
},
{
"epoch": 1.4922600619195046,
"grad_norm": 10.631617419467513,
"learning_rate": 5.919441972128165e-06,
"loss": 2.473677158355713,
"step": 1446
},
{
"epoch": 1.4932920536635708,
"grad_norm": 12.177567464057311,
"learning_rate": 5.913539132635309e-06,
"loss": 1.3525331020355225,
"step": 1447
},
{
"epoch": 1.4943240454076367,
"grad_norm": 17.265066708620502,
"learning_rate": 5.90763497564019e-06,
"loss": 2.0814294815063477,
"step": 1448
},
{
"epoch": 1.4953560371517027,
"grad_norm": 10.516814129994565,
"learning_rate": 5.901729509657758e-06,
"loss": 1.4664232730865479,
"step": 1449
},
{
"epoch": 1.496388028895769,
"grad_norm": 9.305983886869592,
"learning_rate": 5.895822743204855e-06,
"loss": 1.3303736448287964,
"step": 1450
},
{
"epoch": 1.497420020639835,
"grad_norm": 12.006125644076471,
"learning_rate": 5.889914684800191e-06,
"loss": 2.165750503540039,
"step": 1451
},
{
"epoch": 1.498452012383901,
"grad_norm": 13.8111428322773,
"learning_rate": 5.884005342964343e-06,
"loss": 1.3294661045074463,
"step": 1452
},
{
"epoch": 1.499484004127967,
"grad_norm": 11.698364347897481,
"learning_rate": 5.87809472621974e-06,
"loss": 1.6502536535263062,
"step": 1453
},
{
"epoch": 1.5005159958720329,
"grad_norm": 7.817300525446381,
"learning_rate": 5.872182843090644e-06,
"loss": 1.302175521850586,
"step": 1454
},
{
"epoch": 1.501547987616099,
"grad_norm": 5.571703031423158,
"learning_rate": 5.8662697021031555e-06,
"loss": 1.9427019357681274,
"step": 1455
},
{
"epoch": 1.502579979360165,
"grad_norm": 9.404770291286901,
"learning_rate": 5.860355311785175e-06,
"loss": 1.2602835893630981,
"step": 1456
},
{
"epoch": 1.503611971104231,
"grad_norm": 13.125735745212998,
"learning_rate": 5.8544396806664135e-06,
"loss": 1.3319302797317505,
"step": 1457
},
{
"epoch": 1.5046439628482973,
"grad_norm": 10.729152197735356,
"learning_rate": 5.848522817278369e-06,
"loss": 1.3135343790054321,
"step": 1458
},
{
"epoch": 1.5056759545923633,
"grad_norm": 11.124309797323237,
"learning_rate": 5.8426047301543165e-06,
"loss": 2.520270824432373,
"step": 1459
},
{
"epoch": 1.5067079463364292,
"grad_norm": 8.362701679492053,
"learning_rate": 5.836685427829296e-06,
"loss": 1.7932217121124268,
"step": 1460
},
{
"epoch": 1.5077399380804954,
"grad_norm": 9.13955572924332,
"learning_rate": 5.830764918840102e-06,
"loss": 1.0656404495239258,
"step": 1461
},
{
"epoch": 1.5087719298245614,
"grad_norm": 13.47182208637423,
"learning_rate": 5.824843211725265e-06,
"loss": 2.238260507583618,
"step": 1462
},
{
"epoch": 1.5098039215686274,
"grad_norm": 9.202878119340536,
"learning_rate": 5.818920315025045e-06,
"loss": 1.6161681413650513,
"step": 1463
},
{
"epoch": 1.5108359133126936,
"grad_norm": 11.722185650063693,
"learning_rate": 5.812996237281423e-06,
"loss": 1.6068499088287354,
"step": 1464
},
{
"epoch": 1.5118679050567594,
"grad_norm": 9.617934336633931,
"learning_rate": 5.807070987038075e-06,
"loss": 1.7375080585479736,
"step": 1465
},
{
"epoch": 1.5128998968008256,
"grad_norm": 11.986350172942641,
"learning_rate": 5.8011445728403724e-06,
"loss": 1.4095920324325562,
"step": 1466
},
{
"epoch": 1.5139318885448918,
"grad_norm": 11.439405475221042,
"learning_rate": 5.7952170032353675e-06,
"loss": 2.3441104888916016,
"step": 1467
},
{
"epoch": 1.5149638802889576,
"grad_norm": 10.864248301742652,
"learning_rate": 5.7892882867717705e-06,
"loss": 1.1567907333374023,
"step": 1468
},
{
"epoch": 1.5159958720330238,
"grad_norm": 8.931863762917068,
"learning_rate": 5.7833584319999555e-06,
"loss": 1.375398874282837,
"step": 1469
},
{
"epoch": 1.5170278637770898,
"grad_norm": 11.194765468102132,
"learning_rate": 5.777427447471933e-06,
"loss": 1.997140884399414,
"step": 1470
},
{
"epoch": 1.5180598555211557,
"grad_norm": 10.822588418086724,
"learning_rate": 5.771495341741344e-06,
"loss": 1.4403889179229736,
"step": 1471
},
{
"epoch": 1.519091847265222,
"grad_norm": 12.160372351646561,
"learning_rate": 5.765562123363445e-06,
"loss": 1.1714125871658325,
"step": 1472
},
{
"epoch": 1.520123839009288,
"grad_norm": 12.879802517475234,
"learning_rate": 5.759627800895098e-06,
"loss": 1.2552604675292969,
"step": 1473
},
{
"epoch": 1.521155830753354,
"grad_norm": 9.307394670135041,
"learning_rate": 5.75369238289476e-06,
"loss": 1.5141665935516357,
"step": 1474
},
{
"epoch": 1.5221878224974201,
"grad_norm": 10.935452019272311,
"learning_rate": 5.747755877922464e-06,
"loss": 1.5338034629821777,
"step": 1475
},
{
"epoch": 1.5232198142414861,
"grad_norm": 11.9459171615159,
"learning_rate": 5.7418182945398136e-06,
"loss": 1.9021246433258057,
"step": 1476
},
{
"epoch": 1.524251805985552,
"grad_norm": 14.221176762445017,
"learning_rate": 5.735879641309964e-06,
"loss": 1.4347755908966064,
"step": 1477
},
{
"epoch": 1.5252837977296183,
"grad_norm": 9.735995474013986,
"learning_rate": 5.729939926797617e-06,
"loss": 1.5926001071929932,
"step": 1478
},
{
"epoch": 1.526315789473684,
"grad_norm": 8.085584686886568,
"learning_rate": 5.723999159569005e-06,
"loss": 1.271660566329956,
"step": 1479
},
{
"epoch": 1.5273477812177503,
"grad_norm": 11.747415248330071,
"learning_rate": 5.718057348191874e-06,
"loss": 1.1379930973052979,
"step": 1480
},
{
"epoch": 1.5283797729618163,
"grad_norm": 10.759081786618275,
"learning_rate": 5.712114501235485e-06,
"loss": 1.7403454780578613,
"step": 1481
},
{
"epoch": 1.5294117647058822,
"grad_norm": 10.544504378713793,
"learning_rate": 5.7061706272705796e-06,
"loss": 1.6052613258361816,
"step": 1482
},
{
"epoch": 1.5304437564499485,
"grad_norm": 15.57304293320465,
"learning_rate": 5.7002257348693925e-06,
"loss": 1.1717628240585327,
"step": 1483
},
{
"epoch": 1.5314757481940144,
"grad_norm": 15.929231531783989,
"learning_rate": 5.6942798326056205e-06,
"loss": 1.8529958724975586,
"step": 1484
},
{
"epoch": 1.5325077399380804,
"grad_norm": 8.34964910158795,
"learning_rate": 5.688332929054417e-06,
"loss": 1.7307343482971191,
"step": 1485
},
{
"epoch": 1.5335397316821466,
"grad_norm": 15.606681556690106,
"learning_rate": 5.682385032792386e-06,
"loss": 1.4584531784057617,
"step": 1486
},
{
"epoch": 1.5345717234262126,
"grad_norm": 13.23496465250211,
"learning_rate": 5.6764361523975535e-06,
"loss": 1.3606812953948975,
"step": 1487
},
{
"epoch": 1.5356037151702786,
"grad_norm": 11.134571166045973,
"learning_rate": 5.670486296449373e-06,
"loss": 1.5671963691711426,
"step": 1488
},
{
"epoch": 1.5366357069143448,
"grad_norm": 15.493278817263745,
"learning_rate": 5.664535473528698e-06,
"loss": 1.2391408681869507,
"step": 1489
},
{
"epoch": 1.5376676986584106,
"grad_norm": 10.213002097693753,
"learning_rate": 5.658583692217783e-06,
"loss": 1.294258713722229,
"step": 1490
},
{
"epoch": 1.5386996904024768,
"grad_norm": 12.818515458411959,
"learning_rate": 5.65263096110026e-06,
"loss": 1.159803867340088,
"step": 1491
},
{
"epoch": 1.539731682146543,
"grad_norm": 10.797451105215767,
"learning_rate": 5.646677288761132e-06,
"loss": 1.3275816440582275,
"step": 1492
},
{
"epoch": 1.5407636738906088,
"grad_norm": 8.030273488551382,
"learning_rate": 5.640722683786763e-06,
"loss": 1.8123530149459839,
"step": 1493
},
{
"epoch": 1.541795665634675,
"grad_norm": 9.27722564107055,
"learning_rate": 5.634767154764855e-06,
"loss": 1.186286449432373,
"step": 1494
},
{
"epoch": 1.542827657378741,
"grad_norm": 9.714533218402066,
"learning_rate": 5.628810710284452e-06,
"loss": 1.3890738487243652,
"step": 1495
},
{
"epoch": 1.543859649122807,
"grad_norm": 13.892419121328265,
"learning_rate": 5.622853358935908e-06,
"loss": 1.5548430681228638,
"step": 1496
},
{
"epoch": 1.5448916408668731,
"grad_norm": 19.532000923493,
"learning_rate": 5.616895109310891e-06,
"loss": 1.4705501794815063,
"step": 1497
},
{
"epoch": 1.5459236326109391,
"grad_norm": 12.11543562722765,
"learning_rate": 5.6109359700023655e-06,
"loss": 1.580586314201355,
"step": 1498
},
{
"epoch": 1.546955624355005,
"grad_norm": 10.642818826957756,
"learning_rate": 5.604975949604575e-06,
"loss": 1.3070194721221924,
"step": 1499
},
{
"epoch": 1.5479876160990713,
"grad_norm": 9.62098409065898,
"learning_rate": 5.599015056713037e-06,
"loss": 1.736723780632019,
"step": 1500
},
{
"epoch": 1.5490196078431373,
"grad_norm": 13.275477934185444,
"learning_rate": 5.5930532999245246e-06,
"loss": 1.9193358421325684,
"step": 1501
},
{
"epoch": 1.5500515995872033,
"grad_norm": 13.769257992693825,
"learning_rate": 5.587090687837059e-06,
"loss": 2.576160192489624,
"step": 1502
},
{
"epoch": 1.5510835913312695,
"grad_norm": 12.63314417067987,
"learning_rate": 5.581127229049892e-06,
"loss": 2.4420785903930664,
"step": 1503
},
{
"epoch": 1.5521155830753353,
"grad_norm": 18.02829925219223,
"learning_rate": 5.575162932163501e-06,
"loss": 1.7575864791870117,
"step": 1504
},
{
"epoch": 1.5531475748194015,
"grad_norm": 18.256616100672844,
"learning_rate": 5.569197805779571e-06,
"loss": 1.8051812648773193,
"step": 1505
},
{
"epoch": 1.5541795665634675,
"grad_norm": 10.34342544361706,
"learning_rate": 5.563231858500978e-06,
"loss": 1.7020881175994873,
"step": 1506
},
{
"epoch": 1.5552115583075334,
"grad_norm": 9.179344832215872,
"learning_rate": 5.5572650989317874e-06,
"loss": 1.781341314315796,
"step": 1507
},
{
"epoch": 1.5562435500515996,
"grad_norm": 10.177043030118556,
"learning_rate": 5.551297535677236e-06,
"loss": 1.5569262504577637,
"step": 1508
},
{
"epoch": 1.5572755417956656,
"grad_norm": 9.583449662670834,
"learning_rate": 5.545329177343717e-06,
"loss": 2.1539697647094727,
"step": 1509
},
{
"epoch": 1.5583075335397316,
"grad_norm": 9.400041914372162,
"learning_rate": 5.539360032538771e-06,
"loss": 1.4488544464111328,
"step": 1510
},
{
"epoch": 1.5593395252837978,
"grad_norm": 7.919911135989337,
"learning_rate": 5.533390109871074e-06,
"loss": 0.8961690068244934,
"step": 1511
},
{
"epoch": 1.5603715170278638,
"grad_norm": 12.372315743172784,
"learning_rate": 5.527419417950424e-06,
"loss": 1.3138301372528076,
"step": 1512
},
{
"epoch": 1.5614035087719298,
"grad_norm": 12.730704327051964,
"learning_rate": 5.521447965387725e-06,
"loss": 1.3151988983154297,
"step": 1513
},
{
"epoch": 1.562435500515996,
"grad_norm": 7.100293313995284,
"learning_rate": 5.515475760794984e-06,
"loss": 1.3968534469604492,
"step": 1514
},
{
"epoch": 1.5634674922600618,
"grad_norm": 7.392014954526171,
"learning_rate": 5.509502812785286e-06,
"loss": 1.4642367362976074,
"step": 1515
},
{
"epoch": 1.564499484004128,
"grad_norm": 7.492607467771529,
"learning_rate": 5.503529129972792e-06,
"loss": 1.7389013767242432,
"step": 1516
},
{
"epoch": 1.5655314757481942,
"grad_norm": 8.944711785968888,
"learning_rate": 5.497554720972723e-06,
"loss": 1.4127401113510132,
"step": 1517
},
{
"epoch": 1.56656346749226,
"grad_norm": 12.957800966751032,
"learning_rate": 5.4915795944013475e-06,
"loss": 1.6793855428695679,
"step": 1518
},
{
"epoch": 1.5675954592363261,
"grad_norm": 7.326475883945584,
"learning_rate": 5.485603758875965e-06,
"loss": 1.7146462202072144,
"step": 1519
},
{
"epoch": 1.5686274509803921,
"grad_norm": 11.589191219080782,
"learning_rate": 5.479627223014902e-06,
"loss": 1.3451881408691406,
"step": 1520
},
{
"epoch": 1.5696594427244581,
"grad_norm": 16.447402666325793,
"learning_rate": 5.4736499954374914e-06,
"loss": 1.2179075479507446,
"step": 1521
},
{
"epoch": 1.5706914344685243,
"grad_norm": 8.160593859091993,
"learning_rate": 5.467672084764066e-06,
"loss": 1.4500888586044312,
"step": 1522
},
{
"epoch": 1.5717234262125903,
"grad_norm": 13.125699034555025,
"learning_rate": 5.461693499615945e-06,
"loss": 1.6095616817474365,
"step": 1523
},
{
"epoch": 1.5727554179566563,
"grad_norm": 8.93740984586208,
"learning_rate": 5.455714248615417e-06,
"loss": 1.4185380935668945,
"step": 1524
},
{
"epoch": 1.5737874097007225,
"grad_norm": 14.45255315472622,
"learning_rate": 5.449734340385731e-06,
"loss": 2.674546718597412,
"step": 1525
},
{
"epoch": 1.5748194014447885,
"grad_norm": 7.4837493471229175,
"learning_rate": 5.443753783551089e-06,
"loss": 1.137263536453247,
"step": 1526
},
{
"epoch": 1.5758513931888545,
"grad_norm": 9.570474546668043,
"learning_rate": 5.4377725867366215e-06,
"loss": 1.42196786403656,
"step": 1527
},
{
"epoch": 1.5768833849329207,
"grad_norm": 9.511890006370185,
"learning_rate": 5.431790758568388e-06,
"loss": 1.3452329635620117,
"step": 1528
},
{
"epoch": 1.5779153766769864,
"grad_norm": 12.043083718932166,
"learning_rate": 5.425808307673353e-06,
"loss": 1.150077223777771,
"step": 1529
},
{
"epoch": 1.5789473684210527,
"grad_norm": 10.943590583717281,
"learning_rate": 5.4198252426793815e-06,
"loss": 1.872908592224121,
"step": 1530
},
{
"epoch": 1.5799793601651186,
"grad_norm": 9.128250032776844,
"learning_rate": 5.413841572215228e-06,
"loss": 2.1760289669036865,
"step": 1531
},
{
"epoch": 1.5810113519091846,
"grad_norm": 7.921443667774921,
"learning_rate": 5.4078573049105135e-06,
"loss": 1.3120591640472412,
"step": 1532
},
{
"epoch": 1.5820433436532508,
"grad_norm": 12.057521921968885,
"learning_rate": 5.401872449395724e-06,
"loss": 1.4298338890075684,
"step": 1533
},
{
"epoch": 1.5830753353973168,
"grad_norm": 18.411184603387003,
"learning_rate": 5.3958870143021925e-06,
"loss": 1.3725688457489014,
"step": 1534
},
{
"epoch": 1.5841073271413828,
"grad_norm": 10.04468876006326,
"learning_rate": 5.389901008262088e-06,
"loss": 1.4433213472366333,
"step": 1535
},
{
"epoch": 1.585139318885449,
"grad_norm": 9.128904820696041,
"learning_rate": 5.383914439908403e-06,
"loss": 1.6381319761276245,
"step": 1536
},
{
"epoch": 1.586171310629515,
"grad_norm": 11.525643554964871,
"learning_rate": 5.377927317874942e-06,
"loss": 1.583193063735962,
"step": 1537
},
{
"epoch": 1.587203302373581,
"grad_norm": 9.945250975135199,
"learning_rate": 5.371939650796307e-06,
"loss": 1.3325223922729492,
"step": 1538
},
{
"epoch": 1.5882352941176472,
"grad_norm": 11.326604029853062,
"learning_rate": 5.365951447307884e-06,
"loss": 1.445408821105957,
"step": 1539
},
{
"epoch": 1.589267285861713,
"grad_norm": 14.685339833591474,
"learning_rate": 5.359962716045836e-06,
"loss": 1.6517233848571777,
"step": 1540
},
{
"epoch": 1.5902992776057792,
"grad_norm": 9.251883685106936,
"learning_rate": 5.353973465647085e-06,
"loss": 1.377445936203003,
"step": 1541
},
{
"epoch": 1.5913312693498454,
"grad_norm": 9.41245716131414,
"learning_rate": 5.347983704749307e-06,
"loss": 1.2144426107406616,
"step": 1542
},
{
"epoch": 1.5923632610939111,
"grad_norm": 28.182784474952282,
"learning_rate": 5.3419934419909024e-06,
"loss": 2.0331592559814453,
"step": 1543
},
{
"epoch": 1.5933952528379773,
"grad_norm": 9.463227142084353,
"learning_rate": 5.336002686011007e-06,
"loss": 1.401644229888916,
"step": 1544
},
{
"epoch": 1.5944272445820433,
"grad_norm": 13.957678408468148,
"learning_rate": 5.330011445449463e-06,
"loss": 1.7547483444213867,
"step": 1545
},
{
"epoch": 1.5954592363261093,
"grad_norm": 13.965244353484048,
"learning_rate": 5.324019728946813e-06,
"loss": 1.2632057666778564,
"step": 1546
},
{
"epoch": 1.5964912280701755,
"grad_norm": 8.227946520511322,
"learning_rate": 5.318027545144285e-06,
"loss": 1.8406842947006226,
"step": 1547
},
{
"epoch": 1.5975232198142415,
"grad_norm": 10.461119013028867,
"learning_rate": 5.312034902683779e-06,
"loss": 1.6203927993774414,
"step": 1548
},
{
"epoch": 1.5985552115583075,
"grad_norm": 12.655939294576894,
"learning_rate": 5.3060418102078606e-06,
"loss": 1.2169432640075684,
"step": 1549
},
{
"epoch": 1.5995872033023737,
"grad_norm": 15.741119526890982,
"learning_rate": 5.30004827635974e-06,
"loss": 1.2657561302185059,
"step": 1550
},
{
"epoch": 1.6006191950464397,
"grad_norm": 9.942059337569491,
"learning_rate": 5.29405430978327e-06,
"loss": 1.4021785259246826,
"step": 1551
},
{
"epoch": 1.6016511867905057,
"grad_norm": 18.7804432153341,
"learning_rate": 5.288059919122922e-06,
"loss": 1.5848625898361206,
"step": 1552
},
{
"epoch": 1.6026831785345719,
"grad_norm": 13.139933695764597,
"learning_rate": 5.28206511302378e-06,
"loss": 1.2499003410339355,
"step": 1553
},
{
"epoch": 1.6037151702786376,
"grad_norm": 15.87750104151842,
"learning_rate": 5.276069900131527e-06,
"loss": 2.3699073791503906,
"step": 1554
},
{
"epoch": 1.6047471620227038,
"grad_norm": 14.943036426486472,
"learning_rate": 5.270074289092436e-06,
"loss": 1.2444607019424438,
"step": 1555
},
{
"epoch": 1.6057791537667698,
"grad_norm": 8.815300016946654,
"learning_rate": 5.2640782885533515e-06,
"loss": 1.8418155908584595,
"step": 1556
},
{
"epoch": 1.6068111455108358,
"grad_norm": 17.329411897746205,
"learning_rate": 5.258081907161679e-06,
"loss": 1.5811779499053955,
"step": 1557
},
{
"epoch": 1.607843137254902,
"grad_norm": 8.381110893104205,
"learning_rate": 5.252085153565375e-06,
"loss": 0.8724288940429688,
"step": 1558
},
{
"epoch": 1.608875128998968,
"grad_norm": 11.581865689267957,
"learning_rate": 5.246088036412932e-06,
"loss": 1.2417452335357666,
"step": 1559
},
{
"epoch": 1.609907120743034,
"grad_norm": 14.733501236802056,
"learning_rate": 5.240090564353365e-06,
"loss": 1.3736588954925537,
"step": 1560
},
{
"epoch": 1.6109391124871002,
"grad_norm": 9.371127457010875,
"learning_rate": 5.234092746036207e-06,
"loss": 1.3768947124481201,
"step": 1561
},
{
"epoch": 1.6119711042311662,
"grad_norm": 12.9031259829694,
"learning_rate": 5.228094590111482e-06,
"loss": 1.6988537311553955,
"step": 1562
},
{
"epoch": 1.6130030959752322,
"grad_norm": 8.543930368744116,
"learning_rate": 5.222096105229706e-06,
"loss": 1.2279560565948486,
"step": 1563
},
{
"epoch": 1.6140350877192984,
"grad_norm": 21.64334602867434,
"learning_rate": 5.21609730004187e-06,
"loss": 1.220855712890625,
"step": 1564
},
{
"epoch": 1.6150670794633641,
"grad_norm": 8.654475735092758,
"learning_rate": 5.210098183199425e-06,
"loss": 1.4797768592834473,
"step": 1565
},
{
"epoch": 1.6160990712074303,
"grad_norm": 9.732715925307753,
"learning_rate": 5.204098763354271e-06,
"loss": 1.3446614742279053,
"step": 1566
},
{
"epoch": 1.6171310629514963,
"grad_norm": 13.726568655001055,
"learning_rate": 5.198099049158747e-06,
"loss": 1.5844343900680542,
"step": 1567
},
{
"epoch": 1.6181630546955623,
"grad_norm": 13.041238597711342,
"learning_rate": 5.1920990492656135e-06,
"loss": 1.4087917804718018,
"step": 1568
},
{
"epoch": 1.6191950464396285,
"grad_norm": 10.797232297896977,
"learning_rate": 5.186098772328045e-06,
"loss": 1.329378604888916,
"step": 1569
},
{
"epoch": 1.6202270381836945,
"grad_norm": 19.667372833362254,
"learning_rate": 5.180098226999618e-06,
"loss": 1.7636022567749023,
"step": 1570
},
{
"epoch": 1.6212590299277605,
"grad_norm": 12.2622869747031,
"learning_rate": 5.174097421934292e-06,
"loss": 1.1490764617919922,
"step": 1571
},
{
"epoch": 1.6222910216718267,
"grad_norm": 21.98393618159907,
"learning_rate": 5.168096365786402e-06,
"loss": 2.3102211952209473,
"step": 1572
},
{
"epoch": 1.6233230134158927,
"grad_norm": 9.735773744875136,
"learning_rate": 5.162095067210649e-06,
"loss": 2.117244243621826,
"step": 1573
},
{
"epoch": 1.6243550051599587,
"grad_norm": 18.463264815416654,
"learning_rate": 5.156093534862073e-06,
"loss": 1.6067651510238647,
"step": 1574
},
{
"epoch": 1.6253869969040249,
"grad_norm": 10.41108714363021,
"learning_rate": 5.150091777396064e-06,
"loss": 1.8751254081726074,
"step": 1575
},
{
"epoch": 1.6264189886480909,
"grad_norm": 16.631981321449587,
"learning_rate": 5.144089803468333e-06,
"loss": 1.724184513092041,
"step": 1576
},
{
"epoch": 1.6274509803921569,
"grad_norm": 16.402806927437133,
"learning_rate": 5.1380876217348975e-06,
"loss": 1.6436328887939453,
"step": 1577
},
{
"epoch": 1.628482972136223,
"grad_norm": 14.603029785469376,
"learning_rate": 5.132085240852081e-06,
"loss": 1.4703165292739868,
"step": 1578
},
{
"epoch": 1.6295149638802888,
"grad_norm": 10.809206838766594,
"learning_rate": 5.126082669476486e-06,
"loss": 1.5617787837982178,
"step": 1579
},
{
"epoch": 1.630546955624355,
"grad_norm": 15.021756779960803,
"learning_rate": 5.1200799162650035e-06,
"loss": 1.7255334854125977,
"step": 1580
},
{
"epoch": 1.631578947368421,
"grad_norm": 11.385645534836486,
"learning_rate": 5.114076989874774e-06,
"loss": 1.0603340864181519,
"step": 1581
},
{
"epoch": 1.632610939112487,
"grad_norm": 10.23379994812466,
"learning_rate": 5.108073898963194e-06,
"loss": 1.5189906358718872,
"step": 1582
},
{
"epoch": 1.6336429308565532,
"grad_norm": 9.144538402885932,
"learning_rate": 5.102070652187896e-06,
"loss": 1.7182832956314087,
"step": 1583
},
{
"epoch": 1.6346749226006192,
"grad_norm": 9.54719126606895,
"learning_rate": 5.096067258206735e-06,
"loss": 1.6244089603424072,
"step": 1584
},
{
"epoch": 1.6357069143446852,
"grad_norm": 14.084997047091763,
"learning_rate": 5.090063725677783e-06,
"loss": 1.9122145175933838,
"step": 1585
},
{
"epoch": 1.6367389060887514,
"grad_norm": 10.013449437705365,
"learning_rate": 5.084060063259307e-06,
"loss": 1.6446342468261719,
"step": 1586
},
{
"epoch": 1.6377708978328174,
"grad_norm": 17.089763116564722,
"learning_rate": 5.078056279609765e-06,
"loss": 1.7437760829925537,
"step": 1587
},
{
"epoch": 1.6388028895768834,
"grad_norm": 11.044670734053568,
"learning_rate": 5.072052383387787e-06,
"loss": 1.2320588827133179,
"step": 1588
},
{
"epoch": 1.6398348813209496,
"grad_norm": 14.08195744318699,
"learning_rate": 5.066048383252167e-06,
"loss": 1.3749891519546509,
"step": 1589
},
{
"epoch": 1.6408668730650153,
"grad_norm": 16.434964061637274,
"learning_rate": 5.060044287861849e-06,
"loss": 1.593663215637207,
"step": 1590
},
{
"epoch": 1.6418988648090815,
"grad_norm": 14.051200573452352,
"learning_rate": 5.0540401058759146e-06,
"loss": 1.4485447406768799,
"step": 1591
},
{
"epoch": 1.6429308565531475,
"grad_norm": 7.510337244595722,
"learning_rate": 5.048035845953569e-06,
"loss": 1.5242805480957031,
"step": 1592
},
{
"epoch": 1.6439628482972135,
"grad_norm": 8.52829998314459,
"learning_rate": 5.0420315167541276e-06,
"loss": 1.5791704654693604,
"step": 1593
},
{
"epoch": 1.6449948400412797,
"grad_norm": 8.817881646576703,
"learning_rate": 5.036027126937013e-06,
"loss": 1.4871054887771606,
"step": 1594
},
{
"epoch": 1.6460268317853457,
"grad_norm": 36.28513377894254,
"learning_rate": 5.030022685161728e-06,
"loss": 1.431492567062378,
"step": 1595
},
{
"epoch": 1.6470588235294117,
"grad_norm": 7.8606543397940944,
"learning_rate": 5.024018200087855e-06,
"loss": 1.1094318628311157,
"step": 1596
},
{
"epoch": 1.648090815273478,
"grad_norm": 12.578770283939217,
"learning_rate": 5.018013680375035e-06,
"loss": 1.6543710231781006,
"step": 1597
},
{
"epoch": 1.6491228070175439,
"grad_norm": 9.50037107760328,
"learning_rate": 5.012009134682962e-06,
"loss": 1.903153896331787,
"step": 1598
},
{
"epoch": 1.6501547987616099,
"grad_norm": 7.830729585220461,
"learning_rate": 5.006004571671366e-06,
"loss": 1.0540351867675781,
"step": 1599
},
{
"epoch": 1.651186790505676,
"grad_norm": 10.062924028777218,
"learning_rate": 5e-06,
"loss": 2.05865216255188,
"step": 1600
},
{
"epoch": 1.652218782249742,
"grad_norm": 15.157412191135569,
"learning_rate": 4.993995428328636e-06,
"loss": 1.5756983757019043,
"step": 1601
},
{
"epoch": 1.653250773993808,
"grad_norm": 9.663025370177763,
"learning_rate": 4.987990865317041e-06,
"loss": 1.184818148612976,
"step": 1602
},
{
"epoch": 1.6542827657378743,
"grad_norm": 14.143188141995502,
"learning_rate": 4.981986319624967e-06,
"loss": 1.0625786781311035,
"step": 1603
},
{
"epoch": 1.65531475748194,
"grad_norm": 8.063283383306972,
"learning_rate": 4.975981799912147e-06,
"loss": 1.6618146896362305,
"step": 1604
},
{
"epoch": 1.6563467492260062,
"grad_norm": 12.709312761105126,
"learning_rate": 4.969977314838272e-06,
"loss": 1.8030552864074707,
"step": 1605
},
{
"epoch": 1.6573787409700722,
"grad_norm": 14.574230843269499,
"learning_rate": 4.9639728730629875e-06,
"loss": 1.3334550857543945,
"step": 1606
},
{
"epoch": 1.6584107327141382,
"grad_norm": 15.387353844548636,
"learning_rate": 4.957968483245872e-06,
"loss": 1.209212303161621,
"step": 1607
},
{
"epoch": 1.6594427244582044,
"grad_norm": 7.981954261851792,
"learning_rate": 4.951964154046432e-06,
"loss": 1.3116942644119263,
"step": 1608
},
{
"epoch": 1.6604747162022704,
"grad_norm": 8.253089376554888,
"learning_rate": 4.945959894124087e-06,
"loss": 2.226292848587036,
"step": 1609
},
{
"epoch": 1.6615067079463364,
"grad_norm": 12.043947987554164,
"learning_rate": 4.939955712138152e-06,
"loss": 1.098877191543579,
"step": 1610
},
{
"epoch": 1.6625386996904026,
"grad_norm": 20.431447291322144,
"learning_rate": 4.933951616747836e-06,
"loss": 1.091958999633789,
"step": 1611
},
{
"epoch": 1.6635706914344686,
"grad_norm": 15.057623937343532,
"learning_rate": 4.927947616612216e-06,
"loss": 1.723926067352295,
"step": 1612
},
{
"epoch": 1.6646026831785345,
"grad_norm": 12.230671689105673,
"learning_rate": 4.921943720390237e-06,
"loss": 1.3400969505310059,
"step": 1613
},
{
"epoch": 1.6656346749226008,
"grad_norm": 7.6343512879012305,
"learning_rate": 4.915939936740695e-06,
"loss": 1.2368969917297363,
"step": 1614
},
{
"epoch": 1.6666666666666665,
"grad_norm": 10.268261353546139,
"learning_rate": 4.909936274322218e-06,
"loss": 2.005229949951172,
"step": 1615
},
{
"epoch": 1.6676986584107327,
"grad_norm": 8.935903383990896,
"learning_rate": 4.903932741793266e-06,
"loss": 1.6855125427246094,
"step": 1616
},
{
"epoch": 1.6687306501547987,
"grad_norm": 10.95300941540321,
"learning_rate": 4.897929347812105e-06,
"loss": 2.65848445892334,
"step": 1617
},
{
"epoch": 1.6697626418988647,
"grad_norm": 11.881114482756562,
"learning_rate": 4.891926101036807e-06,
"loss": 1.18087899684906,
"step": 1618
},
{
"epoch": 1.670794633642931,
"grad_norm": 11.21765463146983,
"learning_rate": 4.8859230101252265e-06,
"loss": 1.3030716180801392,
"step": 1619
},
{
"epoch": 1.671826625386997,
"grad_norm": 9.896122171751951,
"learning_rate": 4.879920083734997e-06,
"loss": 1.1979525089263916,
"step": 1620
},
{
"epoch": 1.6728586171310629,
"grad_norm": 10.086324155948205,
"learning_rate": 4.873917330523515e-06,
"loss": 1.686155080795288,
"step": 1621
},
{
"epoch": 1.673890608875129,
"grad_norm": 16.56729831056172,
"learning_rate": 4.867914759147923e-06,
"loss": 1.1940433979034424,
"step": 1622
},
{
"epoch": 1.674922600619195,
"grad_norm": 11.583028128884898,
"learning_rate": 4.861912378265105e-06,
"loss": 1.2244114875793457,
"step": 1623
},
{
"epoch": 1.675954592363261,
"grad_norm": 10.593147180242253,
"learning_rate": 4.855910196531669e-06,
"loss": 1.3701207637786865,
"step": 1624
},
{
"epoch": 1.6769865841073273,
"grad_norm": 8.676009971461912,
"learning_rate": 4.849908222603935e-06,
"loss": 1.537056803703308,
"step": 1625
},
{
"epoch": 1.678018575851393,
"grad_norm": 9.6472850150048,
"learning_rate": 4.843906465137928e-06,
"loss": 1.1899044513702393,
"step": 1626
},
{
"epoch": 1.6790505675954592,
"grad_norm": 11.331798705960017,
"learning_rate": 4.837904932789354e-06,
"loss": 1.2348029613494873,
"step": 1627
},
{
"epoch": 1.6800825593395254,
"grad_norm": 14.519718124029408,
"learning_rate": 4.8319036342135985e-06,
"loss": 1.33205246925354,
"step": 1628
},
{
"epoch": 1.6811145510835912,
"grad_norm": 9.937051983890404,
"learning_rate": 4.825902578065709e-06,
"loss": 1.4491955041885376,
"step": 1629
},
{
"epoch": 1.6821465428276574,
"grad_norm": 13.640510767187013,
"learning_rate": 4.8199017730003835e-06,
"loss": 1.3364590406417847,
"step": 1630
},
{
"epoch": 1.6831785345717234,
"grad_norm": 9.463468217820962,
"learning_rate": 4.813901227671956e-06,
"loss": 1.0079162120819092,
"step": 1631
},
{
"epoch": 1.6842105263157894,
"grad_norm": 16.78555368860191,
"learning_rate": 4.807900950734388e-06,
"loss": 1.1904146671295166,
"step": 1632
},
{
"epoch": 1.6852425180598556,
"grad_norm": 17.46837597768086,
"learning_rate": 4.801900950841256e-06,
"loss": 1.6875295639038086,
"step": 1633
},
{
"epoch": 1.6862745098039216,
"grad_norm": 11.04892393388373,
"learning_rate": 4.7959012366457296e-06,
"loss": 1.0612598657608032,
"step": 1634
},
{
"epoch": 1.6873065015479876,
"grad_norm": 10.951994980251744,
"learning_rate": 4.789901816800576e-06,
"loss": 1.220237374305725,
"step": 1635
},
{
"epoch": 1.6883384932920538,
"grad_norm": 10.810193358891974,
"learning_rate": 4.78390269995813e-06,
"loss": 2.282114267349243,
"step": 1636
},
{
"epoch": 1.6893704850361198,
"grad_norm": 10.601081487834142,
"learning_rate": 4.777903894770295e-06,
"loss": 1.8550426959991455,
"step": 1637
},
{
"epoch": 1.6904024767801857,
"grad_norm": 16.937769027859023,
"learning_rate": 4.771905409888519e-06,
"loss": 1.6059949398040771,
"step": 1638
},
{
"epoch": 1.691434468524252,
"grad_norm": 11.82584669633604,
"learning_rate": 4.765907253963794e-06,
"loss": 1.5053925514221191,
"step": 1639
},
{
"epoch": 1.6924664602683177,
"grad_norm": 21.097151926344726,
"learning_rate": 4.759909435646636e-06,
"loss": 1.4629043340682983,
"step": 1640
},
{
"epoch": 1.693498452012384,
"grad_norm": 13.235912731207039,
"learning_rate": 4.75391196358707e-06,
"loss": 1.3238599300384521,
"step": 1641
},
{
"epoch": 1.69453044375645,
"grad_norm": 7.8725490128533675,
"learning_rate": 4.747914846434628e-06,
"loss": 1.45640230178833,
"step": 1642
},
{
"epoch": 1.6955624355005159,
"grad_norm": 14.946160581628561,
"learning_rate": 4.741918092838323e-06,
"loss": 1.2770317792892456,
"step": 1643
},
{
"epoch": 1.696594427244582,
"grad_norm": 8.510880011836228,
"learning_rate": 4.735921711446649e-06,
"loss": 2.1564223766326904,
"step": 1644
},
{
"epoch": 1.697626418988648,
"grad_norm": 14.277925313390723,
"learning_rate": 4.729925710907564e-06,
"loss": 1.5286706686019897,
"step": 1645
},
{
"epoch": 1.698658410732714,
"grad_norm": 19.948193785198274,
"learning_rate": 4.723930099868474e-06,
"loss": 1.0891897678375244,
"step": 1646
},
{
"epoch": 1.6996904024767803,
"grad_norm": 17.95581198688877,
"learning_rate": 4.717934886976222e-06,
"loss": 1.64451265335083,
"step": 1647
},
{
"epoch": 1.7007223942208463,
"grad_norm": 8.13400056672354,
"learning_rate": 4.711940080877079e-06,
"loss": 2.1116182804107666,
"step": 1648
},
{
"epoch": 1.7017543859649122,
"grad_norm": 11.37852718181068,
"learning_rate": 4.705945690216732e-06,
"loss": 1.7777572870254517,
"step": 1649
},
{
"epoch": 1.7027863777089784,
"grad_norm": 15.414009752148521,
"learning_rate": 4.6999517236402606e-06,
"loss": 1.2301387786865234,
"step": 1650
},
{
"epoch": 1.7038183694530442,
"grad_norm": 10.564694903218198,
"learning_rate": 4.693958189792141e-06,
"loss": 1.45538330078125,
"step": 1651
},
{
"epoch": 1.7048503611971104,
"grad_norm": 10.880485640783963,
"learning_rate": 4.687965097316223e-06,
"loss": 1.4673454761505127,
"step": 1652
},
{
"epoch": 1.7058823529411766,
"grad_norm": 12.193018066560857,
"learning_rate": 4.681972454855716e-06,
"loss": 1.5198006629943848,
"step": 1653
},
{
"epoch": 1.7069143446852424,
"grad_norm": 11.781383156813746,
"learning_rate": 4.675980271053188e-06,
"loss": 1.2425904273986816,
"step": 1654
},
{
"epoch": 1.7079463364293086,
"grad_norm": 10.567582329941056,
"learning_rate": 4.669988554550537e-06,
"loss": 1.4114564657211304,
"step": 1655
},
{
"epoch": 1.7089783281733746,
"grad_norm": 10.888758406783566,
"learning_rate": 4.6639973139889944e-06,
"loss": 1.25340735912323,
"step": 1656
},
{
"epoch": 1.7100103199174406,
"grad_norm": 8.530234178613362,
"learning_rate": 4.658006558009099e-06,
"loss": 1.8368052244186401,
"step": 1657
},
{
"epoch": 1.7110423116615068,
"grad_norm": 8.41778259091175,
"learning_rate": 4.6520162952506955e-06,
"loss": 1.3329459428787231,
"step": 1658
},
{
"epoch": 1.7120743034055728,
"grad_norm": 11.408600150564817,
"learning_rate": 4.646026534352915e-06,
"loss": 1.4662222862243652,
"step": 1659
},
{
"epoch": 1.7131062951496387,
"grad_norm": 10.198028210749765,
"learning_rate": 4.640037283954165e-06,
"loss": 1.2321686744689941,
"step": 1660
},
{
"epoch": 1.714138286893705,
"grad_norm": 14.136421995011203,
"learning_rate": 4.634048552692118e-06,
"loss": 1.333176851272583,
"step": 1661
},
{
"epoch": 1.715170278637771,
"grad_norm": 22.44277745548592,
"learning_rate": 4.628060349203696e-06,
"loss": 1.7321614027023315,
"step": 1662
},
{
"epoch": 1.716202270381837,
"grad_norm": 9.246555897894348,
"learning_rate": 4.6220726821250585e-06,
"loss": 1.0161447525024414,
"step": 1663
},
{
"epoch": 1.7172342621259031,
"grad_norm": 14.785133991387573,
"learning_rate": 4.616085560091596e-06,
"loss": 2.0218515396118164,
"step": 1664
},
{
"epoch": 1.718266253869969,
"grad_norm": 10.769115891694414,
"learning_rate": 4.6100989917379135e-06,
"loss": 1.2232139110565186,
"step": 1665
},
{
"epoch": 1.719298245614035,
"grad_norm": 11.807273474877455,
"learning_rate": 4.604112985697809e-06,
"loss": 1.8226964473724365,
"step": 1666
},
{
"epoch": 1.720330237358101,
"grad_norm": 12.289979140449626,
"learning_rate": 4.598127550604277e-06,
"loss": 1.0019363164901733,
"step": 1667
},
{
"epoch": 1.721362229102167,
"grad_norm": 8.738085743756857,
"learning_rate": 4.592142695089489e-06,
"loss": 1.4394011497497559,
"step": 1668
},
{
"epoch": 1.7223942208462333,
"grad_norm": 13.565046659820133,
"learning_rate": 4.586158427784774e-06,
"loss": 1.683599829673767,
"step": 1669
},
{
"epoch": 1.7234262125902993,
"grad_norm": 11.027567358705781,
"learning_rate": 4.580174757320619e-06,
"loss": 1.4985852241516113,
"step": 1670
},
{
"epoch": 1.7244582043343653,
"grad_norm": 7.447607771930486,
"learning_rate": 4.57419169232665e-06,
"loss": 1.4004662036895752,
"step": 1671
},
{
"epoch": 1.7254901960784315,
"grad_norm": 15.051229317074,
"learning_rate": 4.568209241431615e-06,
"loss": 1.4091589450836182,
"step": 1672
},
{
"epoch": 1.7265221878224974,
"grad_norm": 16.506203657234344,
"learning_rate": 4.5622274132633785e-06,
"loss": 1.4664890766143799,
"step": 1673
},
{
"epoch": 1.7275541795665634,
"grad_norm": 10.232611043119553,
"learning_rate": 4.556246216448911e-06,
"loss": 1.2841026782989502,
"step": 1674
},
{
"epoch": 1.7285861713106296,
"grad_norm": 7.835802175897451,
"learning_rate": 4.5502656596142695e-06,
"loss": 1.7837278842926025,
"step": 1675
},
{
"epoch": 1.7296181630546954,
"grad_norm": 12.624683401827452,
"learning_rate": 4.544285751384585e-06,
"loss": 2.0063414573669434,
"step": 1676
},
{
"epoch": 1.7306501547987616,
"grad_norm": 16.539887599121375,
"learning_rate": 4.538306500384056e-06,
"loss": 0.8558920621871948,
"step": 1677
},
{
"epoch": 1.7316821465428278,
"grad_norm": 16.751030182188703,
"learning_rate": 4.5323279152359355e-06,
"loss": 1.3210715055465698,
"step": 1678
},
{
"epoch": 1.7327141382868936,
"grad_norm": 7.039841235166431,
"learning_rate": 4.526350004562511e-06,
"loss": 2.069108009338379,
"step": 1679
},
{
"epoch": 1.7337461300309598,
"grad_norm": 11.392043707468872,
"learning_rate": 4.520372776985101e-06,
"loss": 1.338263750076294,
"step": 1680
},
{
"epoch": 1.7347781217750258,
"grad_norm": 10.516192126098069,
"learning_rate": 4.5143962411240375e-06,
"loss": 1.6379947662353516,
"step": 1681
},
{
"epoch": 1.7358101135190918,
"grad_norm": 9.35772522779427,
"learning_rate": 4.508420405598653e-06,
"loss": 2.372286081314087,
"step": 1682
},
{
"epoch": 1.736842105263158,
"grad_norm": 9.517241992831798,
"learning_rate": 4.502445279027277e-06,
"loss": 0.9688540697097778,
"step": 1683
},
{
"epoch": 1.737874097007224,
"grad_norm": 13.372923199452366,
"learning_rate": 4.496470870027209e-06,
"loss": 1.966747760772705,
"step": 1684
},
{
"epoch": 1.73890608875129,
"grad_norm": 10.363511311415849,
"learning_rate": 4.490497187214716e-06,
"loss": 1.199428915977478,
"step": 1685
},
{
"epoch": 1.7399380804953561,
"grad_norm": 18.07202517668516,
"learning_rate": 4.484524239205018e-06,
"loss": 1.5780396461486816,
"step": 1686
},
{
"epoch": 1.7409700722394221,
"grad_norm": 9.799818692093627,
"learning_rate": 4.478552034612277e-06,
"loss": 1.6711639165878296,
"step": 1687
},
{
"epoch": 1.7420020639834881,
"grad_norm": 11.68127685838972,
"learning_rate": 4.472580582049578e-06,
"loss": 1.4168503284454346,
"step": 1688
},
{
"epoch": 1.7430340557275543,
"grad_norm": 8.526896445904088,
"learning_rate": 4.4666098901289275e-06,
"loss": 1.3873357772827148,
"step": 1689
},
{
"epoch": 1.74406604747162,
"grad_norm": 9.47407309758453,
"learning_rate": 4.460639967461231e-06,
"loss": 0.9989665746688843,
"step": 1690
},
{
"epoch": 1.7450980392156863,
"grad_norm": 12.40847033937051,
"learning_rate": 4.4546708226562855e-06,
"loss": 1.3871872425079346,
"step": 1691
},
{
"epoch": 1.7461300309597523,
"grad_norm": 13.755098765725549,
"learning_rate": 4.448702464322764e-06,
"loss": 1.8586505651474,
"step": 1692
},
{
"epoch": 1.7471620227038183,
"grad_norm": 15.66048747585057,
"learning_rate": 4.4427349010682125e-06,
"loss": 0.824250340461731,
"step": 1693
},
{
"epoch": 1.7481940144478845,
"grad_norm": 12.753131385497102,
"learning_rate": 4.4367681414990235e-06,
"loss": 1.7118589878082275,
"step": 1694
},
{
"epoch": 1.7492260061919505,
"grad_norm": 10.39540501438844,
"learning_rate": 4.43080219422043e-06,
"loss": 1.2179622650146484,
"step": 1695
},
{
"epoch": 1.7502579979360164,
"grad_norm": 13.089449428313449,
"learning_rate": 4.4248370678364995e-06,
"loss": 2.330209732055664,
"step": 1696
},
{
"epoch": 1.7512899896800826,
"grad_norm": 8.196659926334611,
"learning_rate": 4.418872770950109e-06,
"loss": 1.450493335723877,
"step": 1697
},
{
"epoch": 1.7523219814241486,
"grad_norm": 11.185288051737096,
"learning_rate": 4.412909312162943e-06,
"loss": 1.42171311378479,
"step": 1698
},
{
"epoch": 1.7533539731682146,
"grad_norm": 8.29516600428506,
"learning_rate": 4.406946700075478e-06,
"loss": 1.521761178970337,
"step": 1699
},
{
"epoch": 1.7543859649122808,
"grad_norm": 11.018372635918848,
"learning_rate": 4.400984943286965e-06,
"loss": 1.8015716075897217,
"step": 1700
},
{
"epoch": 1.7554179566563466,
"grad_norm": 15.133587195079995,
"learning_rate": 4.395024050395425e-06,
"loss": 0.9233524203300476,
"step": 1701
},
{
"epoch": 1.7564499484004128,
"grad_norm": 11.500988193878046,
"learning_rate": 4.3890640299976345e-06,
"loss": 1.43326997756958,
"step": 1702
},
{
"epoch": 1.757481940144479,
"grad_norm": 12.310396485005848,
"learning_rate": 4.38310489068911e-06,
"loss": 1.3258637189865112,
"step": 1703
},
{
"epoch": 1.7585139318885448,
"grad_norm": 8.523204733246939,
"learning_rate": 4.377146641064093e-06,
"loss": 1.4248607158660889,
"step": 1704
},
{
"epoch": 1.759545923632611,
"grad_norm": 19.205926331715546,
"learning_rate": 4.371189289715549e-06,
"loss": 1.5387122631072998,
"step": 1705
},
{
"epoch": 1.760577915376677,
"grad_norm": 15.113038777535353,
"learning_rate": 4.365232845235146e-06,
"loss": 0.923610508441925,
"step": 1706
},
{
"epoch": 1.761609907120743,
"grad_norm": 12.698087301403156,
"learning_rate": 4.35927731621324e-06,
"loss": 0.9821719527244568,
"step": 1707
},
{
"epoch": 1.7626418988648092,
"grad_norm": 9.948047333565306,
"learning_rate": 4.3533227112388694e-06,
"loss": 1.2262959480285645,
"step": 1708
},
{
"epoch": 1.7636738906088751,
"grad_norm": 14.826874614805199,
"learning_rate": 4.347369038899744e-06,
"loss": 1.7229928970336914,
"step": 1709
},
{
"epoch": 1.7647058823529411,
"grad_norm": 8.110995812952876,
"learning_rate": 4.34141630778222e-06,
"loss": 1.8245748281478882,
"step": 1710
},
{
"epoch": 1.7657378740970073,
"grad_norm": 9.695390031194178,
"learning_rate": 4.335464526471303e-06,
"loss": 1.5449144840240479,
"step": 1711
},
{
"epoch": 1.7667698658410733,
"grad_norm": 13.059824028606869,
"learning_rate": 4.329513703550628e-06,
"loss": 1.0499621629714966,
"step": 1712
},
{
"epoch": 1.7678018575851393,
"grad_norm": 18.788511042618204,
"learning_rate": 4.323563847602447e-06,
"loss": 1.1592087745666504,
"step": 1713
},
{
"epoch": 1.7688338493292055,
"grad_norm": 12.573399404834106,
"learning_rate": 4.317614967207615e-06,
"loss": 1.5952107906341553,
"step": 1714
},
{
"epoch": 1.7698658410732713,
"grad_norm": 21.209842652730753,
"learning_rate": 4.3116670709455835e-06,
"loss": 1.9653873443603516,
"step": 1715
},
{
"epoch": 1.7708978328173375,
"grad_norm": 14.706837026604616,
"learning_rate": 4.305720167394381e-06,
"loss": 1.9494094848632812,
"step": 1716
},
{
"epoch": 1.7719298245614035,
"grad_norm": 19.022816684709333,
"learning_rate": 4.299774265130609e-06,
"loss": 1.2872564792633057,
"step": 1717
},
{
"epoch": 1.7729618163054695,
"grad_norm": 13.219640085670449,
"learning_rate": 4.293829372729422e-06,
"loss": 1.2262442111968994,
"step": 1718
},
{
"epoch": 1.7739938080495357,
"grad_norm": 14.616500111422022,
"learning_rate": 4.287885498764518e-06,
"loss": 1.4967855215072632,
"step": 1719
},
{
"epoch": 1.7750257997936016,
"grad_norm": 9.026475934495569,
"learning_rate": 4.2819426518081265e-06,
"loss": 1.3772231340408325,
"step": 1720
},
{
"epoch": 1.7760577915376676,
"grad_norm": 10.921117337301181,
"learning_rate": 4.276000840430996e-06,
"loss": 1.3878000974655151,
"step": 1721
},
{
"epoch": 1.7770897832817338,
"grad_norm": 10.632278457360254,
"learning_rate": 4.270060073202384e-06,
"loss": 1.5760631561279297,
"step": 1722
},
{
"epoch": 1.7781217750257998,
"grad_norm": 12.506170854043054,
"learning_rate": 4.264120358690037e-06,
"loss": 1.424022912979126,
"step": 1723
},
{
"epoch": 1.7791537667698658,
"grad_norm": 10.890925941428618,
"learning_rate": 4.258181705460188e-06,
"loss": 0.8698490858078003,
"step": 1724
},
{
"epoch": 1.780185758513932,
"grad_norm": 12.789536760043523,
"learning_rate": 4.252244122077538e-06,
"loss": 1.3168162107467651,
"step": 1725
},
{
"epoch": 1.7812177502579978,
"grad_norm": 12.835238696082937,
"learning_rate": 4.246307617105242e-06,
"loss": 1.3633615970611572,
"step": 1726
},
{
"epoch": 1.782249742002064,
"grad_norm": 11.61784719244646,
"learning_rate": 4.240372199104904e-06,
"loss": 1.4262614250183105,
"step": 1727
},
{
"epoch": 1.7832817337461302,
"grad_norm": 9.6906194820901,
"learning_rate": 4.234437876636557e-06,
"loss": 2.1347155570983887,
"step": 1728
},
{
"epoch": 1.784313725490196,
"grad_norm": 10.324544526775666,
"learning_rate": 4.2285046582586585e-06,
"loss": 2.0410780906677246,
"step": 1729
},
{
"epoch": 1.7853457172342622,
"grad_norm": 12.604320361206465,
"learning_rate": 4.222572552528067e-06,
"loss": 1.3613595962524414,
"step": 1730
},
{
"epoch": 1.7863777089783281,
"grad_norm": 18.674271236695578,
"learning_rate": 4.216641568000044e-06,
"loss": 0.9914162755012512,
"step": 1731
},
{
"epoch": 1.7874097007223941,
"grad_norm": 10.550522092811725,
"learning_rate": 4.21071171322823e-06,
"loss": 1.4601118564605713,
"step": 1732
},
{
"epoch": 1.7884416924664603,
"grad_norm": 13.143024178808387,
"learning_rate": 4.204782996764634e-06,
"loss": 1.7215886116027832,
"step": 1733
},
{
"epoch": 1.7894736842105263,
"grad_norm": 12.946786676017096,
"learning_rate": 4.198855427159628e-06,
"loss": 1.5440949201583862,
"step": 1734
},
{
"epoch": 1.7905056759545923,
"grad_norm": 15.205947894883542,
"learning_rate": 4.192929012961927e-06,
"loss": 1.1962049007415771,
"step": 1735
},
{
"epoch": 1.7915376676986585,
"grad_norm": 11.249024041201825,
"learning_rate": 4.1870037627185785e-06,
"loss": 1.5020911693572998,
"step": 1736
},
{
"epoch": 1.7925696594427245,
"grad_norm": 7.122886080105791,
"learning_rate": 4.181079684974956e-06,
"loss": 1.357811450958252,
"step": 1737
},
{
"epoch": 1.7936016511867905,
"grad_norm": 8.219069585584625,
"learning_rate": 4.175156788274738e-06,
"loss": 0.9888667464256287,
"step": 1738
},
{
"epoch": 1.7946336429308567,
"grad_norm": 10.65778312145511,
"learning_rate": 4.169235081159901e-06,
"loss": 1.6089493036270142,
"step": 1739
},
{
"epoch": 1.7956656346749225,
"grad_norm": 14.420633707013382,
"learning_rate": 4.163314572170704e-06,
"loss": 1.5799247026443481,
"step": 1740
},
{
"epoch": 1.7966976264189887,
"grad_norm": 10.779161976759232,
"learning_rate": 4.157395269845684e-06,
"loss": 1.1055116653442383,
"step": 1741
},
{
"epoch": 1.7977296181630547,
"grad_norm": 8.526212194371725,
"learning_rate": 4.151477182721632e-06,
"loss": 1.9454221725463867,
"step": 1742
},
{
"epoch": 1.7987616099071206,
"grad_norm": 12.654906488380304,
"learning_rate": 4.145560319333587e-06,
"loss": 0.8242064714431763,
"step": 1743
},
{
"epoch": 1.7997936016511868,
"grad_norm": 12.0745341858681,
"learning_rate": 4.139644688214827e-06,
"loss": 1.7421832084655762,
"step": 1744
},
{
"epoch": 1.8008255933952528,
"grad_norm": 11.404849956167968,
"learning_rate": 4.133730297896846e-06,
"loss": 0.9724714756011963,
"step": 1745
},
{
"epoch": 1.8018575851393188,
"grad_norm": 9.798225085263166,
"learning_rate": 4.1278171569093564e-06,
"loss": 1.2271108627319336,
"step": 1746
},
{
"epoch": 1.802889576883385,
"grad_norm": 7.826160390229573,
"learning_rate": 4.1219052737802624e-06,
"loss": 1.3896276950836182,
"step": 1747
},
{
"epoch": 1.803921568627451,
"grad_norm": 8.696271555135679,
"learning_rate": 4.115994657035659e-06,
"loss": 1.5526032447814941,
"step": 1748
},
{
"epoch": 1.804953560371517,
"grad_norm": 12.860082171763079,
"learning_rate": 4.110085315199811e-06,
"loss": 2.7864584922790527,
"step": 1749
},
{
"epoch": 1.8059855521155832,
"grad_norm": 12.092757095998914,
"learning_rate": 4.104177256795145e-06,
"loss": 1.6778318881988525,
"step": 1750
},
{
"epoch": 1.807017543859649,
"grad_norm": 9.577992702190265,
"learning_rate": 4.098270490342241e-06,
"loss": 1.6193572282791138,
"step": 1751
},
{
"epoch": 1.8080495356037152,
"grad_norm": 12.270427643491995,
"learning_rate": 4.0923650243598104e-06,
"loss": 2.024756908416748,
"step": 1752
},
{
"epoch": 1.8090815273477814,
"grad_norm": 15.143367732619192,
"learning_rate": 4.086460867364694e-06,
"loss": 1.6637275218963623,
"step": 1753
},
{
"epoch": 1.8101135190918471,
"grad_norm": 12.545136666185163,
"learning_rate": 4.0805580278718364e-06,
"loss": 1.3071680068969727,
"step": 1754
},
{
"epoch": 1.8111455108359134,
"grad_norm": 6.4743236934340915,
"learning_rate": 4.074656514394292e-06,
"loss": 1.4666328430175781,
"step": 1755
},
{
"epoch": 1.8121775025799793,
"grad_norm": 9.6880750212984,
"learning_rate": 4.0687563354431986e-06,
"loss": 1.2108535766601562,
"step": 1756
},
{
"epoch": 1.8132094943240453,
"grad_norm": 9.240024657830892,
"learning_rate": 4.062857499527767e-06,
"loss": 1.6777081489562988,
"step": 1757
},
{
"epoch": 1.8142414860681115,
"grad_norm": 8.029446154665262,
"learning_rate": 4.0569600151552745e-06,
"loss": 1.3032546043395996,
"step": 1758
},
{
"epoch": 1.8152734778121775,
"grad_norm": 13.2134299208589,
"learning_rate": 4.051063890831047e-06,
"loss": 1.6355738639831543,
"step": 1759
},
{
"epoch": 1.8163054695562435,
"grad_norm": 18.25673877600439,
"learning_rate": 4.045169135058452e-06,
"loss": 1.8013114929199219,
"step": 1760
},
{
"epoch": 1.8173374613003097,
"grad_norm": 17.253970737512073,
"learning_rate": 4.039275756338881e-06,
"loss": 1.6528455018997192,
"step": 1761
},
{
"epoch": 1.8183694530443757,
"grad_norm": 15.441558754292478,
"learning_rate": 4.033383763171738e-06,
"loss": 2.3111886978149414,
"step": 1762
},
{
"epoch": 1.8194014447884417,
"grad_norm": 31.216500987812733,
"learning_rate": 4.027493164054433e-06,
"loss": 1.7374169826507568,
"step": 1763
},
{
"epoch": 1.8204334365325079,
"grad_norm": 10.023239892905801,
"learning_rate": 4.021603967482361e-06,
"loss": 1.4004958868026733,
"step": 1764
},
{
"epoch": 1.8214654282765737,
"grad_norm": 11.113603285626901,
"learning_rate": 4.015716181948897e-06,
"loss": 1.4151482582092285,
"step": 1765
},
{
"epoch": 1.8224974200206399,
"grad_norm": 10.344500401937307,
"learning_rate": 4.009829815945381e-06,
"loss": 1.4603420495986938,
"step": 1766
},
{
"epoch": 1.8235294117647058,
"grad_norm": 11.901402794224847,
"learning_rate": 4.003944877961102e-06,
"loss": 1.086212158203125,
"step": 1767
},
{
"epoch": 1.8245614035087718,
"grad_norm": 9.500123542012892,
"learning_rate": 3.998061376483298e-06,
"loss": 1.8286771774291992,
"step": 1768
},
{
"epoch": 1.825593395252838,
"grad_norm": 13.81642804897701,
"learning_rate": 3.9921793199971235e-06,
"loss": 1.088291883468628,
"step": 1769
},
{
"epoch": 1.826625386996904,
"grad_norm": 10.213108360136134,
"learning_rate": 3.98629871698566e-06,
"loss": 1.7142021656036377,
"step": 1770
},
{
"epoch": 1.82765737874097,
"grad_norm": 12.73036098533881,
"learning_rate": 3.980419575929888e-06,
"loss": 1.4606733322143555,
"step": 1771
},
{
"epoch": 1.8286893704850362,
"grad_norm": 9.835511553146963,
"learning_rate": 3.974541905308679e-06,
"loss": 1.0927921533584595,
"step": 1772
},
{
"epoch": 1.8297213622291022,
"grad_norm": 16.83908725373327,
"learning_rate": 3.968665713598783e-06,
"loss": 1.8018839359283447,
"step": 1773
},
{
"epoch": 1.8307533539731682,
"grad_norm": 15.209294467658857,
"learning_rate": 3.962791009274821e-06,
"loss": 1.1861164569854736,
"step": 1774
},
{
"epoch": 1.8317853457172344,
"grad_norm": 7.937082353648308,
"learning_rate": 3.956917800809266e-06,
"loss": 1.1646783351898193,
"step": 1775
},
{
"epoch": 1.8328173374613002,
"grad_norm": 12.31141961301524,
"learning_rate": 3.951046096672434e-06,
"loss": 1.2939174175262451,
"step": 1776
},
{
"epoch": 1.8338493292053664,
"grad_norm": 9.585711399460749,
"learning_rate": 3.945175905332473e-06,
"loss": 1.9322181940078735,
"step": 1777
},
{
"epoch": 1.8348813209494326,
"grad_norm": 8.268824748296955,
"learning_rate": 3.939307235255343e-06,
"loss": 0.9638408422470093,
"step": 1778
},
{
"epoch": 1.8359133126934983,
"grad_norm": 12.594960878000762,
"learning_rate": 3.933440094904824e-06,
"loss": 2.2295453548431396,
"step": 1779
},
{
"epoch": 1.8369453044375645,
"grad_norm": 15.830275247088338,
"learning_rate": 3.927574492742473e-06,
"loss": 2.0274429321289062,
"step": 1780
},
{
"epoch": 1.8379772961816305,
"grad_norm": 13.33237828685689,
"learning_rate": 3.921710437227641e-06,
"loss": 1.5238763093948364,
"step": 1781
},
{
"epoch": 1.8390092879256965,
"grad_norm": 12.66844229019403,
"learning_rate": 3.915847936817442e-06,
"loss": 1.3702871799468994,
"step": 1782
},
{
"epoch": 1.8400412796697627,
"grad_norm": 17.03081179911215,
"learning_rate": 3.909986999966751e-06,
"loss": 1.4059205055236816,
"step": 1783
},
{
"epoch": 1.8410732714138287,
"grad_norm": 9.304730452471944,
"learning_rate": 3.904127635128184e-06,
"loss": 1.6512281894683838,
"step": 1784
},
{
"epoch": 1.8421052631578947,
"grad_norm": 10.942023695799591,
"learning_rate": 3.898269850752093e-06,
"loss": 1.360194206237793,
"step": 1785
},
{
"epoch": 1.843137254901961,
"grad_norm": 9.76664016025721,
"learning_rate": 3.892413655286551e-06,
"loss": 1.101757287979126,
"step": 1786
},
{
"epoch": 1.8441692466460269,
"grad_norm": 31.684418851573756,
"learning_rate": 3.886559057177337e-06,
"loss": 1.9761056900024414,
"step": 1787
},
{
"epoch": 1.8452012383900929,
"grad_norm": 9.70788789701846,
"learning_rate": 3.880706064867927e-06,
"loss": 1.732954502105713,
"step": 1788
},
{
"epoch": 1.846233230134159,
"grad_norm": 12.994694218945037,
"learning_rate": 3.8748546867994815e-06,
"loss": 1.4196879863739014,
"step": 1789
},
{
"epoch": 1.8472652218782248,
"grad_norm": 14.131533482147614,
"learning_rate": 3.8690049314108355e-06,
"loss": 0.7784943580627441,
"step": 1790
},
{
"epoch": 1.848297213622291,
"grad_norm": 7.894747060154967,
"learning_rate": 3.863156807138481e-06,
"loss": 1.5873610973358154,
"step": 1791
},
{
"epoch": 1.849329205366357,
"grad_norm": 8.08327356417994,
"learning_rate": 3.857310322416555e-06,
"loss": 1.2376326322555542,
"step": 1792
},
{
"epoch": 1.850361197110423,
"grad_norm": 9.79680906436854,
"learning_rate": 3.851465485676836e-06,
"loss": 1.8484421968460083,
"step": 1793
},
{
"epoch": 1.8513931888544892,
"grad_norm": 13.421851682551248,
"learning_rate": 3.845622305348723e-06,
"loss": 1.769080400466919,
"step": 1794
},
{
"epoch": 1.8524251805985552,
"grad_norm": 11.35164598325514,
"learning_rate": 3.839780789859222e-06,
"loss": 1.3666892051696777,
"step": 1795
},
{
"epoch": 1.8534571723426212,
"grad_norm": 7.278640163373893,
"learning_rate": 3.833940947632947e-06,
"loss": 1.421882152557373,
"step": 1796
},
{
"epoch": 1.8544891640866874,
"grad_norm": 15.123748431974601,
"learning_rate": 3.82810278709209e-06,
"loss": 0.8415699005126953,
"step": 1797
},
{
"epoch": 1.8555211558307534,
"grad_norm": 10.864648717292008,
"learning_rate": 3.822266316656421e-06,
"loss": 2.7428770065307617,
"step": 1798
},
{
"epoch": 1.8565531475748194,
"grad_norm": 11.717969553411004,
"learning_rate": 3.816431544743276e-06,
"loss": 1.419779658317566,
"step": 1799
},
{
"epoch": 1.8575851393188856,
"grad_norm": 17.129393236970014,
"learning_rate": 3.8105984797675364e-06,
"loss": 2.135234832763672,
"step": 1800
},
{
"epoch": 1.8586171310629513,
"grad_norm": 13.799126093066207,
"learning_rate": 3.8047671301416256e-06,
"loss": 1.2894712686538696,
"step": 1801
},
{
"epoch": 1.8596491228070176,
"grad_norm": 11.695133936167258,
"learning_rate": 3.798937504275489e-06,
"loss": 2.12467622756958,
"step": 1802
},
{
"epoch": 1.8606811145510835,
"grad_norm": 7.861185070945205,
"learning_rate": 3.79310961057659e-06,
"loss": 1.6944842338562012,
"step": 1803
},
{
"epoch": 1.8617131062951495,
"grad_norm": 19.604668930120155,
"learning_rate": 3.7872834574498894e-06,
"loss": 1.5712006092071533,
"step": 1804
},
{
"epoch": 1.8627450980392157,
"grad_norm": 18.118263316570626,
"learning_rate": 3.7814590532978428e-06,
"loss": 1.1979610919952393,
"step": 1805
},
{
"epoch": 1.8637770897832817,
"grad_norm": 11.588503990001286,
"learning_rate": 3.775636406520382e-06,
"loss": 1.2051764726638794,
"step": 1806
},
{
"epoch": 1.8648090815273477,
"grad_norm": 15.802393803467309,
"learning_rate": 3.7698155255149005e-06,
"loss": 1.317204236984253,
"step": 1807
},
{
"epoch": 1.865841073271414,
"grad_norm": 7.636660886174154,
"learning_rate": 3.7639964186762506e-06,
"loss": 1.3314225673675537,
"step": 1808
},
{
"epoch": 1.86687306501548,
"grad_norm": 9.568118739152196,
"learning_rate": 3.7581790943967228e-06,
"loss": 1.755152940750122,
"step": 1809
},
{
"epoch": 1.8679050567595459,
"grad_norm": 21.68620704466686,
"learning_rate": 3.752363561066039e-06,
"loss": 2.36037278175354,
"step": 1810
},
{
"epoch": 1.868937048503612,
"grad_norm": 10.836943489605135,
"learning_rate": 3.746549827071334e-06,
"loss": 1.3274281024932861,
"step": 1811
},
{
"epoch": 1.869969040247678,
"grad_norm": 18.231812775806993,
"learning_rate": 3.740737900797151e-06,
"loss": 2.0181620121002197,
"step": 1812
},
{
"epoch": 1.871001031991744,
"grad_norm": 10.002199945579473,
"learning_rate": 3.7349277906254278e-06,
"loss": 1.0888769626617432,
"step": 1813
},
{
"epoch": 1.8720330237358103,
"grad_norm": 13.642398873064936,
"learning_rate": 3.7291195049354776e-06,
"loss": 3.219508409500122,
"step": 1814
},
{
"epoch": 1.873065015479876,
"grad_norm": 14.86774936708945,
"learning_rate": 3.723313052103987e-06,
"loss": 1.6249029636383057,
"step": 1815
},
{
"epoch": 1.8740970072239422,
"grad_norm": 8.626190109352544,
"learning_rate": 3.7175084405049978e-06,
"loss": 1.7096291780471802,
"step": 1816
},
{
"epoch": 1.8751289989680082,
"grad_norm": 12.20701095710016,
"learning_rate": 3.7117056785098935e-06,
"loss": 1.2819960117340088,
"step": 1817
},
{
"epoch": 1.8761609907120742,
"grad_norm": 10.901552275933442,
"learning_rate": 3.705904774487396e-06,
"loss": 1.361170768737793,
"step": 1818
},
{
"epoch": 1.8771929824561404,
"grad_norm": 11.950482935105002,
"learning_rate": 3.7001057368035446e-06,
"loss": 0.9725128412246704,
"step": 1819
},
{
"epoch": 1.8782249742002064,
"grad_norm": 11.1783419506552,
"learning_rate": 3.6943085738216855e-06,
"loss": 1.6708776950836182,
"step": 1820
},
{
"epoch": 1.8792569659442724,
"grad_norm": 13.04360676985624,
"learning_rate": 3.688513293902462e-06,
"loss": 1.1926006078720093,
"step": 1821
},
{
"epoch": 1.8802889576883386,
"grad_norm": 14.916308684644543,
"learning_rate": 3.6827199054038043e-06,
"loss": 1.7682125568389893,
"step": 1822
},
{
"epoch": 1.8813209494324046,
"grad_norm": 8.274121694167667,
"learning_rate": 3.6769284166809104e-06,
"loss": 1.6041537523269653,
"step": 1823
},
{
"epoch": 1.8823529411764706,
"grad_norm": 9.507652963938067,
"learning_rate": 3.6711388360862417e-06,
"loss": 1.4414433240890503,
"step": 1824
},
{
"epoch": 1.8833849329205368,
"grad_norm": 8.94598775314897,
"learning_rate": 3.6653511719695077e-06,
"loss": 1.6177234649658203,
"step": 1825
},
{
"epoch": 1.8844169246646025,
"grad_norm": 26.366290475468524,
"learning_rate": 3.659565432677652e-06,
"loss": 1.6701890230178833,
"step": 1826
},
{
"epoch": 1.8854489164086687,
"grad_norm": 10.399901839605608,
"learning_rate": 3.653781626554842e-06,
"loss": 1.2970281839370728,
"step": 1827
},
{
"epoch": 1.8864809081527347,
"grad_norm": 26.30425533380288,
"learning_rate": 3.6479997619424605e-06,
"loss": 1.2655129432678223,
"step": 1828
},
{
"epoch": 1.8875128998968007,
"grad_norm": 8.452072950248295,
"learning_rate": 3.642219847179089e-06,
"loss": 1.334805965423584,
"step": 1829
},
{
"epoch": 1.888544891640867,
"grad_norm": 11.066535865814831,
"learning_rate": 3.636441890600493e-06,
"loss": 1.7193948030471802,
"step": 1830
},
{
"epoch": 1.889576883384933,
"grad_norm": 9.779287406809443,
"learning_rate": 3.6306659005396195e-06,
"loss": 1.087109088897705,
"step": 1831
},
{
"epoch": 1.890608875128999,
"grad_norm": 10.233826024672242,
"learning_rate": 3.6248918853265756e-06,
"loss": 1.526931881904602,
"step": 1832
},
{
"epoch": 1.891640866873065,
"grad_norm": 14.236830265198021,
"learning_rate": 3.619119853288622e-06,
"loss": 1.742538332939148,
"step": 1833
},
{
"epoch": 1.892672858617131,
"grad_norm": 16.3542365001747,
"learning_rate": 3.6133498127501587e-06,
"loss": 0.9473057985305786,
"step": 1834
},
{
"epoch": 1.893704850361197,
"grad_norm": 10.48141733965336,
"learning_rate": 3.607581772032713e-06,
"loss": 1.312403678894043,
"step": 1835
},
{
"epoch": 1.8947368421052633,
"grad_norm": 13.417134210608252,
"learning_rate": 3.6018157394549287e-06,
"loss": 1.765456199645996,
"step": 1836
},
{
"epoch": 1.8957688338493293,
"grad_norm": 18.97243868977882,
"learning_rate": 3.596051723332554e-06,
"loss": 1.2540202140808105,
"step": 1837
},
{
"epoch": 1.8968008255933952,
"grad_norm": 26.599063236979354,
"learning_rate": 3.5902897319784313e-06,
"loss": 1.6883485317230225,
"step": 1838
},
{
"epoch": 1.8978328173374615,
"grad_norm": 20.47631536226638,
"learning_rate": 3.5845297737024754e-06,
"loss": 1.232295274734497,
"step": 1839
},
{
"epoch": 1.8988648090815272,
"grad_norm": 11.457787863331676,
"learning_rate": 3.5787718568116764e-06,
"loss": 1.3976306915283203,
"step": 1840
},
{
"epoch": 1.8998968008255934,
"grad_norm": 13.605991884385617,
"learning_rate": 3.573015989610078e-06,
"loss": 1.4287066459655762,
"step": 1841
},
{
"epoch": 1.9009287925696594,
"grad_norm": 15.821839277913396,
"learning_rate": 3.567262180398765e-06,
"loss": 1.6137340068817139,
"step": 1842
},
{
"epoch": 1.9019607843137254,
"grad_norm": 14.704775588225525,
"learning_rate": 3.561510437475858e-06,
"loss": 1.3677417039871216,
"step": 1843
},
{
"epoch": 1.9029927760577916,
"grad_norm": 18.49671119582087,
"learning_rate": 3.5557607691364983e-06,
"loss": 1.4877206087112427,
"step": 1844
},
{
"epoch": 1.9040247678018576,
"grad_norm": 23.286182662515156,
"learning_rate": 3.550013183672829e-06,
"loss": 1.5937504768371582,
"step": 1845
},
{
"epoch": 1.9050567595459236,
"grad_norm": 12.987288949321805,
"learning_rate": 3.5442676893739956e-06,
"loss": 1.318833589553833,
"step": 1846
},
{
"epoch": 1.9060887512899898,
"grad_norm": 10.799259403771813,
"learning_rate": 3.538524294526127e-06,
"loss": 1.4114301204681396,
"step": 1847
},
{
"epoch": 1.9071207430340558,
"grad_norm": 20.78194593074858,
"learning_rate": 3.5327830074123214e-06,
"loss": 1.7304108142852783,
"step": 1848
},
{
"epoch": 1.9081527347781218,
"grad_norm": 10.79589310794868,
"learning_rate": 3.527043836312639e-06,
"loss": 1.6173585653305054,
"step": 1849
},
{
"epoch": 1.909184726522188,
"grad_norm": 11.865600573746853,
"learning_rate": 3.521306789504089e-06,
"loss": 1.1029250621795654,
"step": 1850
},
{
"epoch": 1.9102167182662537,
"grad_norm": 16.51790031229028,
"learning_rate": 3.5155718752606126e-06,
"loss": 1.731012225151062,
"step": 1851
},
{
"epoch": 1.91124871001032,
"grad_norm": 15.067985774385786,
"learning_rate": 3.509839101853082e-06,
"loss": 3.3693652153015137,
"step": 1852
},
{
"epoch": 1.912280701754386,
"grad_norm": 13.012022325347537,
"learning_rate": 3.504108477549279e-06,
"loss": 1.9551976919174194,
"step": 1853
},
{
"epoch": 1.913312693498452,
"grad_norm": 7.879854916635597,
"learning_rate": 3.4983800106138833e-06,
"loss": 1.4166200160980225,
"step": 1854
},
{
"epoch": 1.914344685242518,
"grad_norm": 8.199603624658192,
"learning_rate": 3.4926537093084654e-06,
"loss": 1.4520305395126343,
"step": 1855
},
{
"epoch": 1.915376676986584,
"grad_norm": 8.35561969860675,
"learning_rate": 3.486929581891476e-06,
"loss": 1.4539759159088135,
"step": 1856
},
{
"epoch": 1.91640866873065,
"grad_norm": 24.847987306954906,
"learning_rate": 3.4812076366182256e-06,
"loss": 1.5426585674285889,
"step": 1857
},
{
"epoch": 1.9174406604747163,
"grad_norm": 26.640742297972697,
"learning_rate": 3.475487881740879e-06,
"loss": 1.4774678945541382,
"step": 1858
},
{
"epoch": 1.9184726522187823,
"grad_norm": 21.636408870276405,
"learning_rate": 3.4697703255084426e-06,
"loss": 2.1614882946014404,
"step": 1859
},
{
"epoch": 1.9195046439628483,
"grad_norm": 6.965501959643614,
"learning_rate": 3.464054976166753e-06,
"loss": 1.8535370826721191,
"step": 1860
},
{
"epoch": 1.9205366357069145,
"grad_norm": 9.259509440773725,
"learning_rate": 3.45834184195846e-06,
"loss": 1.6998491287231445,
"step": 1861
},
{
"epoch": 1.9215686274509802,
"grad_norm": 16.29518792543394,
"learning_rate": 3.4526309311230238e-06,
"loss": 2.3080320358276367,
"step": 1862
},
{
"epoch": 1.9226006191950464,
"grad_norm": 10.880967440643758,
"learning_rate": 3.446922251896696e-06,
"loss": 1.5008809566497803,
"step": 1863
},
{
"epoch": 1.9236326109391126,
"grad_norm": 14.202936012317007,
"learning_rate": 3.441215812512508e-06,
"loss": 1.4904303550720215,
"step": 1864
},
{
"epoch": 1.9246646026831784,
"grad_norm": 16.546369603640375,
"learning_rate": 3.4355116212002616e-06,
"loss": 2.3721206188201904,
"step": 1865
},
{
"epoch": 1.9256965944272446,
"grad_norm": 9.13217757918041,
"learning_rate": 3.4298096861865204e-06,
"loss": 1.7444360256195068,
"step": 1866
},
{
"epoch": 1.9267285861713106,
"grad_norm": 14.92466668241575,
"learning_rate": 3.4241100156945907e-06,
"loss": 1.4486478567123413,
"step": 1867
},
{
"epoch": 1.9277605779153766,
"grad_norm": 16.97368297096261,
"learning_rate": 3.41841261794451e-06,
"loss": 0.9204004406929016,
"step": 1868
},
{
"epoch": 1.9287925696594428,
"grad_norm": 14.013135225488206,
"learning_rate": 3.4127175011530443e-06,
"loss": 1.501824975013733,
"step": 1869
},
{
"epoch": 1.9298245614035088,
"grad_norm": 15.653228452019427,
"learning_rate": 3.407024673533665e-06,
"loss": 1.8300246000289917,
"step": 1870
},
{
"epoch": 1.9308565531475748,
"grad_norm": 9.199922021024074,
"learning_rate": 3.401334143296544e-06,
"loss": 2.072077989578247,
"step": 1871
},
{
"epoch": 1.931888544891641,
"grad_norm": 9.150098006383843,
"learning_rate": 3.3956459186485414e-06,
"loss": 1.2317850589752197,
"step": 1872
},
{
"epoch": 1.932920536635707,
"grad_norm": 10.395256249908842,
"learning_rate": 3.389960007793189e-06,
"loss": 1.273494839668274,
"step": 1873
},
{
"epoch": 1.933952528379773,
"grad_norm": 10.38634924924544,
"learning_rate": 3.3842764189306844e-06,
"loss": 1.8254051208496094,
"step": 1874
},
{
"epoch": 1.9349845201238391,
"grad_norm": 9.880787086639312,
"learning_rate": 3.3785951602578728e-06,
"loss": 1.6783366203308105,
"step": 1875
},
{
"epoch": 1.936016511867905,
"grad_norm": 8.844458764291927,
"learning_rate": 3.372916239968246e-06,
"loss": 1.068297266960144,
"step": 1876
},
{
"epoch": 1.9370485036119711,
"grad_norm": 18.409259489228877,
"learning_rate": 3.367239666251915e-06,
"loss": 1.3494412899017334,
"step": 1877
},
{
"epoch": 1.938080495356037,
"grad_norm": 8.125723974719769,
"learning_rate": 3.361565447295612e-06,
"loss": 1.8301430940628052,
"step": 1878
},
{
"epoch": 1.939112487100103,
"grad_norm": 15.844934465283734,
"learning_rate": 3.355893591282672e-06,
"loss": 1.3259280920028687,
"step": 1879
},
{
"epoch": 1.9401444788441693,
"grad_norm": 8.309531976854908,
"learning_rate": 3.3502241063930196e-06,
"loss": 1.5008363723754883,
"step": 1880
},
{
"epoch": 1.9411764705882353,
"grad_norm": 12.989009848670683,
"learning_rate": 3.344557000803163e-06,
"loss": 1.2330214977264404,
"step": 1881
},
{
"epoch": 1.9422084623323013,
"grad_norm": 15.10676301137227,
"learning_rate": 3.3388922826861794e-06,
"loss": 1.171676516532898,
"step": 1882
},
{
"epoch": 1.9432404540763675,
"grad_norm": 15.57918362721749,
"learning_rate": 3.333229960211698e-06,
"loss": 1.2732231616973877,
"step": 1883
},
{
"epoch": 1.9442724458204335,
"grad_norm": 12.219239159218336,
"learning_rate": 3.327570041545897e-06,
"loss": 2.0378801822662354,
"step": 1884
},
{
"epoch": 1.9453044375644994,
"grad_norm": 21.248851380839813,
"learning_rate": 3.321912534851489e-06,
"loss": 1.3067893981933594,
"step": 1885
},
{
"epoch": 1.9463364293085657,
"grad_norm": 22.363387549472883,
"learning_rate": 3.3162574482877063e-06,
"loss": 1.843634009361267,
"step": 1886
},
{
"epoch": 1.9473684210526314,
"grad_norm": 8.637840897549372,
"learning_rate": 3.310604790010289e-06,
"loss": 1.3801031112670898,
"step": 1887
},
{
"epoch": 1.9484004127966976,
"grad_norm": 15.57742329719762,
"learning_rate": 3.304954568171478e-06,
"loss": 1.2379931211471558,
"step": 1888
},
{
"epoch": 1.9494324045407638,
"grad_norm": 11.929156205044768,
"learning_rate": 3.2993067909199982e-06,
"loss": 1.2321984767913818,
"step": 1889
},
{
"epoch": 1.9504643962848296,
"grad_norm": 11.506361878048958,
"learning_rate": 3.2936614664010503e-06,
"loss": 1.3178244829177856,
"step": 1890
},
{
"epoch": 1.9514963880288958,
"grad_norm": 9.574280751560662,
"learning_rate": 3.288018602756299e-06,
"loss": 1.5133464336395264,
"step": 1891
},
{
"epoch": 1.9525283797729618,
"grad_norm": 8.761103303332469,
"learning_rate": 3.282378208123856e-06,
"loss": 0.8351538181304932,
"step": 1892
},
{
"epoch": 1.9535603715170278,
"grad_norm": 15.119584543608218,
"learning_rate": 3.276740290638275e-06,
"loss": 1.6170310974121094,
"step": 1893
},
{
"epoch": 1.954592363261094,
"grad_norm": 9.257877103523935,
"learning_rate": 3.271104858430537e-06,
"loss": 1.283090353012085,
"step": 1894
},
{
"epoch": 1.95562435500516,
"grad_norm": 8.949450609209322,
"learning_rate": 3.26547191962804e-06,
"loss": 1.5036699771881104,
"step": 1895
},
{
"epoch": 1.956656346749226,
"grad_norm": 16.853847438903518,
"learning_rate": 3.259841482354582e-06,
"loss": 1.4999172687530518,
"step": 1896
},
{
"epoch": 1.9576883384932922,
"grad_norm": 12.450045679558006,
"learning_rate": 3.254213554730358e-06,
"loss": 1.9045746326446533,
"step": 1897
},
{
"epoch": 1.9587203302373581,
"grad_norm": 8.13458772630108,
"learning_rate": 3.24858814487194e-06,
"loss": 2.0080020427703857,
"step": 1898
},
{
"epoch": 1.9597523219814241,
"grad_norm": 18.88602798086758,
"learning_rate": 3.242965260892269e-06,
"loss": 1.2330591678619385,
"step": 1899
},
{
"epoch": 1.9607843137254903,
"grad_norm": 9.646930886353221,
"learning_rate": 3.2373449109006476e-06,
"loss": 1.6764395236968994,
"step": 1900
},
{
"epoch": 1.961816305469556,
"grad_norm": 8.797859795063196,
"learning_rate": 3.231727103002718e-06,
"loss": 1.0952730178833008,
"step": 1901
},
{
"epoch": 1.9628482972136223,
"grad_norm": 12.331075461924263,
"learning_rate": 3.2261118453004595e-06,
"loss": 1.2641353607177734,
"step": 1902
},
{
"epoch": 1.9638802889576883,
"grad_norm": 6.027711307261203,
"learning_rate": 3.220499145892173e-06,
"loss": 0.8769614696502686,
"step": 1903
},
{
"epoch": 1.9649122807017543,
"grad_norm": 27.1225755613239,
"learning_rate": 3.21488901287247e-06,
"loss": 1.1951849460601807,
"step": 1904
},
{
"epoch": 1.9659442724458205,
"grad_norm": 30.675159473591993,
"learning_rate": 3.2092814543322624e-06,
"loss": 1.5789157152175903,
"step": 1905
},
{
"epoch": 1.9669762641898865,
"grad_norm": 9.410282310517966,
"learning_rate": 3.2036764783587446e-06,
"loss": 1.4281562566757202,
"step": 1906
},
{
"epoch": 1.9680082559339525,
"grad_norm": 10.964885396257436,
"learning_rate": 3.198074093035391e-06,
"loss": 1.7447469234466553,
"step": 1907
},
{
"epoch": 1.9690402476780187,
"grad_norm": 13.532327740709574,
"learning_rate": 3.192474306441936e-06,
"loss": 1.466497540473938,
"step": 1908
},
{
"epoch": 1.9700722394220846,
"grad_norm": 9.531111577263932,
"learning_rate": 3.186877126654369e-06,
"loss": 1.7062510251998901,
"step": 1909
},
{
"epoch": 1.9711042311661506,
"grad_norm": 13.433433122569705,
"learning_rate": 3.1812825617449207e-06,
"loss": 1.2298617362976074,
"step": 1910
},
{
"epoch": 1.9721362229102168,
"grad_norm": 9.117118600131604,
"learning_rate": 3.175690619782046e-06,
"loss": 1.32313072681427,
"step": 1911
},
{
"epoch": 1.9731682146542826,
"grad_norm": 13.632547041799603,
"learning_rate": 3.170101308830421e-06,
"loss": 1.7262647151947021,
"step": 1912
},
{
"epoch": 1.9742002063983488,
"grad_norm": 8.962755655228897,
"learning_rate": 3.164514636950925e-06,
"loss": 2.2817416191101074,
"step": 1913
},
{
"epoch": 1.975232198142415,
"grad_norm": 16.111500659178432,
"learning_rate": 3.1589306122006367e-06,
"loss": 1.1965981721878052,
"step": 1914
},
{
"epoch": 1.9762641898864808,
"grad_norm": 9.702276870170294,
"learning_rate": 3.1533492426328086e-06,
"loss": 1.3094780445098877,
"step": 1915
},
{
"epoch": 1.977296181630547,
"grad_norm": 13.158818670104788,
"learning_rate": 3.1477705362968702e-06,
"loss": 1.5770702362060547,
"step": 1916
},
{
"epoch": 1.978328173374613,
"grad_norm": 15.374685127791743,
"learning_rate": 3.1421945012384085e-06,
"loss": 1.5390429496765137,
"step": 1917
},
{
"epoch": 1.979360165118679,
"grad_norm": 5.198005672311248,
"learning_rate": 3.1366211454991558e-06,
"loss": 0.8496372699737549,
"step": 1918
},
{
"epoch": 1.9803921568627452,
"grad_norm": 10.909086321860805,
"learning_rate": 3.1310504771169835e-06,
"loss": 1.5664114952087402,
"step": 1919
},
{
"epoch": 1.9814241486068112,
"grad_norm": 11.822442026920228,
"learning_rate": 3.1254825041258852e-06,
"loss": 1.2260844707489014,
"step": 1920
},
{
"epoch": 1.9824561403508771,
"grad_norm": 11.125967110797111,
"learning_rate": 3.119917234555968e-06,
"loss": 1.2665646076202393,
"step": 1921
},
{
"epoch": 1.9834881320949433,
"grad_norm": 11.399017901383028,
"learning_rate": 3.114354676433441e-06,
"loss": 2.8943164348602295,
"step": 1922
},
{
"epoch": 1.9845201238390093,
"grad_norm": 13.818827066188698,
"learning_rate": 3.108794837780599e-06,
"loss": 1.7140778303146362,
"step": 1923
},
{
"epoch": 1.9855521155830753,
"grad_norm": 11.136556319523761,
"learning_rate": 3.103237726615822e-06,
"loss": 1.912706732749939,
"step": 1924
},
{
"epoch": 1.9865841073271415,
"grad_norm": 12.291829033063722,
"learning_rate": 3.0976833509535494e-06,
"loss": 1.3148201704025269,
"step": 1925
},
{
"epoch": 1.9876160990712073,
"grad_norm": 12.445042354781304,
"learning_rate": 3.09213171880428e-06,
"loss": 1.282441258430481,
"step": 1926
},
{
"epoch": 1.9886480908152735,
"grad_norm": 14.61958855274096,
"learning_rate": 3.0865828381745515e-06,
"loss": 1.6544055938720703,
"step": 1927
},
{
"epoch": 1.9896800825593395,
"grad_norm": 25.232339435869783,
"learning_rate": 3.081036717066938e-06,
"loss": 2.1010539531707764,
"step": 1928
},
{
"epoch": 1.9907120743034055,
"grad_norm": 16.073894486944297,
"learning_rate": 3.075493363480032e-06,
"loss": 1.3920762538909912,
"step": 1929
},
{
"epoch": 1.9917440660474717,
"grad_norm": 10.095724298366525,
"learning_rate": 3.0699527854084338e-06,
"loss": 1.5464189052581787,
"step": 1930
},
{
"epoch": 1.9927760577915377,
"grad_norm": 9.52645488696497,
"learning_rate": 3.064414990842742e-06,
"loss": 1.5836119651794434,
"step": 1931
},
{
"epoch": 1.9938080495356036,
"grad_norm": 15.194079441704625,
"learning_rate": 3.0588799877695375e-06,
"loss": 1.3523647785186768,
"step": 1932
},
{
"epoch": 1.9948400412796699,
"grad_norm": 17.3143929301106,
"learning_rate": 3.0533477841713833e-06,
"loss": 1.4108376502990723,
"step": 1933
},
{
"epoch": 1.9958720330237358,
"grad_norm": 13.667781813150206,
"learning_rate": 3.047818388026797e-06,
"loss": 1.1967389583587646,
"step": 1934
},
{
"epoch": 1.9969040247678018,
"grad_norm": 8.305388590205636,
"learning_rate": 3.0422918073102505e-06,
"loss": 0.9703651070594788,
"step": 1935
},
{
"epoch": 1.997936016511868,
"grad_norm": 11.508512870175016,
"learning_rate": 3.036768049992157e-06,
"loss": 1.4309837818145752,
"step": 1936
},
{
"epoch": 1.9989680082559338,
"grad_norm": 19.643208795588926,
"learning_rate": 3.0312471240388523e-06,
"loss": 1.8425827026367188,
"step": 1937
},
{
"epoch": 2.0,
"grad_norm": 11.548933037588704,
"learning_rate": 3.025729037412596e-06,
"loss": 1.0835847854614258,
"step": 1938
},
{
"epoch": 2.001031991744066,
"grad_norm": 7.610702897217233,
"learning_rate": 3.020213798071546e-06,
"loss": 0.5837053656578064,
"step": 1939
},
{
"epoch": 2.002063983488132,
"grad_norm": 11.266433976578904,
"learning_rate": 3.0147014139697596e-06,
"loss": 0.7347603440284729,
"step": 1940
},
{
"epoch": 2.003095975232198,
"grad_norm": 9.880288129649191,
"learning_rate": 3.0091918930571735e-06,
"loss": 0.4960901737213135,
"step": 1941
},
{
"epoch": 2.0041279669762644,
"grad_norm": 9.582387875341349,
"learning_rate": 3.0036852432795925e-06,
"loss": 0.721099853515625,
"step": 1942
},
{
"epoch": 2.00515995872033,
"grad_norm": 7.546639419003511,
"learning_rate": 2.998181472578686e-06,
"loss": 0.45264309644699097,
"step": 1943
},
{
"epoch": 2.0061919504643964,
"grad_norm": 11.8161746256189,
"learning_rate": 2.99268058889197e-06,
"loss": 0.527152419090271,
"step": 1944
},
{
"epoch": 2.007223942208462,
"grad_norm": 11.263796030627102,
"learning_rate": 2.987182600152794e-06,
"loss": 0.8129073977470398,
"step": 1945
},
{
"epoch": 2.0082559339525283,
"grad_norm": 9.663767619188024,
"learning_rate": 2.981687514290334e-06,
"loss": 0.5236397981643677,
"step": 1946
},
{
"epoch": 2.0092879256965945,
"grad_norm": 15.543270477389466,
"learning_rate": 2.9761953392295795e-06,
"loss": 0.4407860040664673,
"step": 1947
},
{
"epoch": 2.0103199174406603,
"grad_norm": 8.862278960252826,
"learning_rate": 2.9707060828913226e-06,
"loss": 0.9539024829864502,
"step": 1948
},
{
"epoch": 2.0113519091847265,
"grad_norm": 17.028877179665482,
"learning_rate": 2.9652197531921443e-06,
"loss": 0.3500378131866455,
"step": 1949
},
{
"epoch": 2.0123839009287927,
"grad_norm": 10.583949836368962,
"learning_rate": 2.959736358044409e-06,
"loss": 0.6622694730758667,
"step": 1950
},
{
"epoch": 2.0134158926728585,
"grad_norm": 11.500190396469042,
"learning_rate": 2.954255905356242e-06,
"loss": 0.7597059607505798,
"step": 1951
},
{
"epoch": 2.0144478844169247,
"grad_norm": 12.346486958403212,
"learning_rate": 2.9487784030315297e-06,
"loss": 0.7360637187957764,
"step": 1952
},
{
"epoch": 2.015479876160991,
"grad_norm": 13.827330375019702,
"learning_rate": 2.943303858969905e-06,
"loss": 0.5327243804931641,
"step": 1953
},
{
"epoch": 2.0165118679050567,
"grad_norm": 11.735084359293525,
"learning_rate": 2.937832281066731e-06,
"loss": 0.8617746829986572,
"step": 1954
},
{
"epoch": 2.017543859649123,
"grad_norm": 12.041591232803045,
"learning_rate": 2.9323636772130948e-06,
"loss": 0.2598215639591217,
"step": 1955
},
{
"epoch": 2.018575851393189,
"grad_norm": 9.62893293691884,
"learning_rate": 2.9268980552957917e-06,
"loss": 0.42265596985816956,
"step": 1956
},
{
"epoch": 2.019607843137255,
"grad_norm": 15.03567444823228,
"learning_rate": 2.921435423197321e-06,
"loss": 0.8368219137191772,
"step": 1957
},
{
"epoch": 2.020639834881321,
"grad_norm": 13.460049532005664,
"learning_rate": 2.915975788795864e-06,
"loss": 0.48208919167518616,
"step": 1958
},
{
"epoch": 2.021671826625387,
"grad_norm": 15.191495719958873,
"learning_rate": 2.910519159965288e-06,
"loss": 0.7324795722961426,
"step": 1959
},
{
"epoch": 2.022703818369453,
"grad_norm": 16.961021228805286,
"learning_rate": 2.905065544575114e-06,
"loss": 0.2796049118041992,
"step": 1960
},
{
"epoch": 2.023735810113519,
"grad_norm": 10.869212843674253,
"learning_rate": 2.8996149504905235e-06,
"loss": 0.573403000831604,
"step": 1961
},
{
"epoch": 2.024767801857585,
"grad_norm": 9.624391087664794,
"learning_rate": 2.894167385572344e-06,
"loss": 0.32885146141052246,
"step": 1962
},
{
"epoch": 2.025799793601651,
"grad_norm": 11.72752318773145,
"learning_rate": 2.8887228576770302e-06,
"loss": 0.6362261772155762,
"step": 1963
},
{
"epoch": 2.0268317853457174,
"grad_norm": 10.330774917455367,
"learning_rate": 2.8832813746566546e-06,
"loss": 0.5546295642852783,
"step": 1964
},
{
"epoch": 2.027863777089783,
"grad_norm": 16.016020234822413,
"learning_rate": 2.8778429443589007e-06,
"loss": 1.2917468547821045,
"step": 1965
},
{
"epoch": 2.0288957688338494,
"grad_norm": 16.38153593326629,
"learning_rate": 2.8724075746270513e-06,
"loss": 0.5093211531639099,
"step": 1966
},
{
"epoch": 2.0299277605779156,
"grad_norm": 10.512995345727335,
"learning_rate": 2.8669752732999736e-06,
"loss": 0.6507851481437683,
"step": 1967
},
{
"epoch": 2.0309597523219813,
"grad_norm": 9.787277460258979,
"learning_rate": 2.86154604821211e-06,
"loss": 0.5437256097793579,
"step": 1968
},
{
"epoch": 2.0319917440660475,
"grad_norm": 10.197545673598418,
"learning_rate": 2.856119907193463e-06,
"loss": 0.4393252432346344,
"step": 1969
},
{
"epoch": 2.0330237358101133,
"grad_norm": 11.060147206827484,
"learning_rate": 2.8506968580695926e-06,
"loss": 0.5344643592834473,
"step": 1970
},
{
"epoch": 2.0340557275541795,
"grad_norm": 10.273833742280798,
"learning_rate": 2.8452769086615943e-06,
"loss": 0.47633272409439087,
"step": 1971
},
{
"epoch": 2.0350877192982457,
"grad_norm": 11.053008212226397,
"learning_rate": 2.8398600667861032e-06,
"loss": 0.9062159657478333,
"step": 1972
},
{
"epoch": 2.0361197110423115,
"grad_norm": 10.355456409344027,
"learning_rate": 2.834446340255258e-06,
"loss": 0.45710867643356323,
"step": 1973
},
{
"epoch": 2.0371517027863777,
"grad_norm": 8.151931898870096,
"learning_rate": 2.829035736876715e-06,
"loss": 0.40623173117637634,
"step": 1974
},
{
"epoch": 2.038183694530444,
"grad_norm": 7.952996765570959,
"learning_rate": 2.823628264453625e-06,
"loss": 0.21306441724300385,
"step": 1975
},
{
"epoch": 2.0392156862745097,
"grad_norm": 9.880121410681433,
"learning_rate": 2.8182239307846195e-06,
"loss": 0.6863006353378296,
"step": 1976
},
{
"epoch": 2.040247678018576,
"grad_norm": 8.176021094360394,
"learning_rate": 2.812822743663808e-06,
"loss": 0.3915032148361206,
"step": 1977
},
{
"epoch": 2.041279669762642,
"grad_norm": 13.471316196705478,
"learning_rate": 2.8074247108807568e-06,
"loss": 0.9592699408531189,
"step": 1978
},
{
"epoch": 2.042311661506708,
"grad_norm": 7.656361104421015,
"learning_rate": 2.802029840220487e-06,
"loss": 0.2343856692314148,
"step": 1979
},
{
"epoch": 2.043343653250774,
"grad_norm": 8.6987171546621,
"learning_rate": 2.796638139463456e-06,
"loss": 0.35938137769699097,
"step": 1980
},
{
"epoch": 2.0443756449948403,
"grad_norm": 7.906012051058154,
"learning_rate": 2.7912496163855563e-06,
"loss": 0.6258907914161682,
"step": 1981
},
{
"epoch": 2.045407636738906,
"grad_norm": 11.36063073025996,
"learning_rate": 2.7858642787580937e-06,
"loss": 1.2554874420166016,
"step": 1982
},
{
"epoch": 2.0464396284829722,
"grad_norm": 6.584040487023417,
"learning_rate": 2.780482134347774e-06,
"loss": 0.3977857828140259,
"step": 1983
},
{
"epoch": 2.047471620227038,
"grad_norm": 9.79108752002825,
"learning_rate": 2.7751031909167046e-06,
"loss": 0.3324020206928253,
"step": 1984
},
{
"epoch": 2.048503611971104,
"grad_norm": 11.132342107747727,
"learning_rate": 2.7697274562223762e-06,
"loss": 0.48161542415618896,
"step": 1985
},
{
"epoch": 2.0495356037151704,
"grad_norm": 7.706079328113716,
"learning_rate": 2.764354938017651e-06,
"loss": 0.445020854473114,
"step": 1986
},
{
"epoch": 2.050567595459236,
"grad_norm": 13.318361101046666,
"learning_rate": 2.7589856440507523e-06,
"loss": 0.667015790939331,
"step": 1987
},
{
"epoch": 2.0515995872033024,
"grad_norm": 9.204842314118537,
"learning_rate": 2.7536195820652506e-06,
"loss": 0.46111762523651123,
"step": 1988
},
{
"epoch": 2.0526315789473686,
"grad_norm": 11.196672429674683,
"learning_rate": 2.7482567598000586e-06,
"loss": 0.25720328092575073,
"step": 1989
},
{
"epoch": 2.0536635706914343,
"grad_norm": 7.289064766870941,
"learning_rate": 2.742897184989414e-06,
"loss": 0.33461254835128784,
"step": 1990
},
{
"epoch": 2.0546955624355006,
"grad_norm": 7.993905358399321,
"learning_rate": 2.73754086536288e-06,
"loss": 0.35615432262420654,
"step": 1991
},
{
"epoch": 2.0557275541795668,
"grad_norm": 7.165879335402671,
"learning_rate": 2.73218780864531e-06,
"loss": 0.32950523495674133,
"step": 1992
},
{
"epoch": 2.0567595459236325,
"grad_norm": 7.746768635857653,
"learning_rate": 2.7268380225568635e-06,
"loss": 0.28539013862609863,
"step": 1993
},
{
"epoch": 2.0577915376676987,
"grad_norm": 10.27642457032376,
"learning_rate": 2.721491514812979e-06,
"loss": 0.31158798933029175,
"step": 1994
},
{
"epoch": 2.0588235294117645,
"grad_norm": 12.72944670746384,
"learning_rate": 2.7161482931243668e-06,
"loss": 0.4267995357513428,
"step": 1995
},
{
"epoch": 2.0598555211558307,
"grad_norm": 8.525124260723349,
"learning_rate": 2.710808365197e-06,
"loss": 0.20493009686470032,
"step": 1996
},
{
"epoch": 2.060887512899897,
"grad_norm": 12.474481030332862,
"learning_rate": 2.7054717387320973e-06,
"loss": 0.4201287031173706,
"step": 1997
},
{
"epoch": 2.0619195046439627,
"grad_norm": 8.33891289384112,
"learning_rate": 2.7001384214261204e-06,
"loss": 0.4269750714302063,
"step": 1998
},
{
"epoch": 2.062951496388029,
"grad_norm": 9.30474416961196,
"learning_rate": 2.6948084209707566e-06,
"loss": 0.22076722979545593,
"step": 1999
},
{
"epoch": 2.063983488132095,
"grad_norm": 10.100526322224276,
"learning_rate": 2.689481745052908e-06,
"loss": 0.42582884430885315,
"step": 2000
},
{
"epoch": 2.065015479876161,
"grad_norm": 16.06469621263434,
"learning_rate": 2.6841584013546894e-06,
"loss": 0.6067658066749573,
"step": 2001
},
{
"epoch": 2.066047471620227,
"grad_norm": 14.33291742560144,
"learning_rate": 2.678838397553399e-06,
"loss": 0.751406729221344,
"step": 2002
},
{
"epoch": 2.0670794633642933,
"grad_norm": 8.332923727656144,
"learning_rate": 2.673521741321527e-06,
"loss": 0.383941650390625,
"step": 2003
},
{
"epoch": 2.068111455108359,
"grad_norm": 14.255250305964788,
"learning_rate": 2.6682084403267305e-06,
"loss": 0.3727511167526245,
"step": 2004
},
{
"epoch": 2.0691434468524252,
"grad_norm": 12.660218738337791,
"learning_rate": 2.662898502231831e-06,
"loss": 0.3918401002883911,
"step": 2005
},
{
"epoch": 2.0701754385964914,
"grad_norm": 10.43156888417619,
"learning_rate": 2.6575919346948e-06,
"loss": 0.2653726637363434,
"step": 2006
},
{
"epoch": 2.071207430340557,
"grad_norm": 11.433483427690277,
"learning_rate": 2.652288745368743e-06,
"loss": 0.899912416934967,
"step": 2007
},
{
"epoch": 2.0722394220846234,
"grad_norm": 11.360721893826966,
"learning_rate": 2.6469889419018985e-06,
"loss": 0.11928503215312958,
"step": 2008
},
{
"epoch": 2.073271413828689,
"grad_norm": 11.605801479680045,
"learning_rate": 2.6416925319376195e-06,
"loss": 0.3207067847251892,
"step": 2009
},
{
"epoch": 2.0743034055727554,
"grad_norm": 14.412850460880513,
"learning_rate": 2.6363995231143714e-06,
"loss": 0.6017872095108032,
"step": 2010
},
{
"epoch": 2.0753353973168216,
"grad_norm": 11.631141232534024,
"learning_rate": 2.6311099230657033e-06,
"loss": 0.5595146417617798,
"step": 2011
},
{
"epoch": 2.0763673890608874,
"grad_norm": 9.660589581892381,
"learning_rate": 2.6258237394202556e-06,
"loss": 0.7578150033950806,
"step": 2012
},
{
"epoch": 2.0773993808049536,
"grad_norm": 10.56875579356723,
"learning_rate": 2.6205409798017407e-06,
"loss": 0.2807178795337677,
"step": 2013
},
{
"epoch": 2.0784313725490198,
"grad_norm": 11.182376725264405,
"learning_rate": 2.6152616518289307e-06,
"loss": 0.36187225580215454,
"step": 2014
},
{
"epoch": 2.0794633642930855,
"grad_norm": 10.450314187269546,
"learning_rate": 2.6099857631156544e-06,
"loss": 1.092901349067688,
"step": 2015
},
{
"epoch": 2.0804953560371517,
"grad_norm": 21.10493659862159,
"learning_rate": 2.60471332127077e-06,
"loss": 0.5096402764320374,
"step": 2016
},
{
"epoch": 2.081527347781218,
"grad_norm": 8.046419730983052,
"learning_rate": 2.5994443338981732e-06,
"loss": 0.27640849351882935,
"step": 2017
},
{
"epoch": 2.0825593395252837,
"grad_norm": 16.38030602904602,
"learning_rate": 2.5941788085967757e-06,
"loss": 0.5682850480079651,
"step": 2018
},
{
"epoch": 2.08359133126935,
"grad_norm": 8.53582038116658,
"learning_rate": 2.5889167529604952e-06,
"loss": 0.27553099393844604,
"step": 2019
},
{
"epoch": 2.0846233230134157,
"grad_norm": 11.37774393999609,
"learning_rate": 2.5836581745782474e-06,
"loss": 0.3816848695278168,
"step": 2020
},
{
"epoch": 2.085655314757482,
"grad_norm": 7.72441522627485,
"learning_rate": 2.57840308103393e-06,
"loss": 0.4121520519256592,
"step": 2021
},
{
"epoch": 2.086687306501548,
"grad_norm": 13.081335997853959,
"learning_rate": 2.573151479906417e-06,
"loss": 0.2650455832481384,
"step": 2022
},
{
"epoch": 2.087719298245614,
"grad_norm": 12.924865840532487,
"learning_rate": 2.5679033787695457e-06,
"loss": 0.5901508331298828,
"step": 2023
},
{
"epoch": 2.08875128998968,
"grad_norm": 11.10572686553205,
"learning_rate": 2.5626587851921053e-06,
"loss": 0.23889164626598358,
"step": 2024
},
{
"epoch": 2.0897832817337463,
"grad_norm": 14.331827592304306,
"learning_rate": 2.557417706737828e-06,
"loss": 0.540855884552002,
"step": 2025
},
{
"epoch": 2.090815273477812,
"grad_norm": 10.452283403325024,
"learning_rate": 2.552180150965372e-06,
"loss": 0.495063841342926,
"step": 2026
},
{
"epoch": 2.0918472652218782,
"grad_norm": 11.627502238168367,
"learning_rate": 2.5469461254283188e-06,
"loss": 0.36154189705848694,
"step": 2027
},
{
"epoch": 2.0928792569659445,
"grad_norm": 9.678570676226414,
"learning_rate": 2.541715637675156e-06,
"loss": 0.23651531338691711,
"step": 2028
},
{
"epoch": 2.09391124871001,
"grad_norm": 13.55207712153128,
"learning_rate": 2.5364886952492775e-06,
"loss": 0.40609103441238403,
"step": 2029
},
{
"epoch": 2.0949432404540764,
"grad_norm": 9.821258605592359,
"learning_rate": 2.531265305688951e-06,
"loss": 0.2444392591714859,
"step": 2030
},
{
"epoch": 2.0959752321981426,
"grad_norm": 16.229633508796756,
"learning_rate": 2.526045476527329e-06,
"loss": 0.4760296046733856,
"step": 2031
},
{
"epoch": 2.0970072239422084,
"grad_norm": 10.244670958345468,
"learning_rate": 2.520829215292426e-06,
"loss": 0.3381337523460388,
"step": 2032
},
{
"epoch": 2.0980392156862746,
"grad_norm": 19.68762391684102,
"learning_rate": 2.5156165295071134e-06,
"loss": 0.48230743408203125,
"step": 2033
},
{
"epoch": 2.0990712074303404,
"grad_norm": 8.34117887968612,
"learning_rate": 2.5104074266891055e-06,
"loss": 0.4035602807998657,
"step": 2034
},
{
"epoch": 2.1001031991744066,
"grad_norm": 8.272915623942248,
"learning_rate": 2.5052019143509454e-06,
"loss": 0.3126871883869171,
"step": 2035
},
{
"epoch": 2.101135190918473,
"grad_norm": 9.047040893287747,
"learning_rate": 2.5000000000000015e-06,
"loss": 0.5362358093261719,
"step": 2036
},
{
"epoch": 2.1021671826625385,
"grad_norm": 11.128227003602051,
"learning_rate": 2.494801691138453e-06,
"loss": 0.2896101176738739,
"step": 2037
},
{
"epoch": 2.1031991744066048,
"grad_norm": 9.20607292832876,
"learning_rate": 2.489606995263279e-06,
"loss": 0.2587195336818695,
"step": 2038
},
{
"epoch": 2.104231166150671,
"grad_norm": 10.749347258860894,
"learning_rate": 2.484415919866248e-06,
"loss": 0.477316290140152,
"step": 2039
},
{
"epoch": 2.1052631578947367,
"grad_norm": 9.253270074434505,
"learning_rate": 2.4792284724339077e-06,
"loss": 0.30313533544540405,
"step": 2040
},
{
"epoch": 2.106295149638803,
"grad_norm": 10.419790311969663,
"learning_rate": 2.4740446604475715e-06,
"loss": 0.3734855055809021,
"step": 2041
},
{
"epoch": 2.107327141382869,
"grad_norm": 9.075436281088471,
"learning_rate": 2.468864491383311e-06,
"loss": 0.3065953850746155,
"step": 2042
},
{
"epoch": 2.108359133126935,
"grad_norm": 9.09576839076326,
"learning_rate": 2.463687972711945e-06,
"loss": 0.7746214270591736,
"step": 2043
},
{
"epoch": 2.109391124871001,
"grad_norm": 13.341558733150194,
"learning_rate": 2.4585151118990286e-06,
"loss": 0.41721075773239136,
"step": 2044
},
{
"epoch": 2.110423116615067,
"grad_norm": 10.442466435447885,
"learning_rate": 2.453345916404835e-06,
"loss": 0.4096275269985199,
"step": 2045
},
{
"epoch": 2.111455108359133,
"grad_norm": 8.206956814870292,
"learning_rate": 2.4481803936843586e-06,
"loss": 0.1956387758255005,
"step": 2046
},
{
"epoch": 2.1124871001031993,
"grad_norm": 9.703779186684427,
"learning_rate": 2.4430185511872944e-06,
"loss": 0.7298110723495483,
"step": 2047
},
{
"epoch": 2.113519091847265,
"grad_norm": 8.37066996055875,
"learning_rate": 2.4378603963580293e-06,
"loss": 0.2986023724079132,
"step": 2048
},
{
"epoch": 2.1145510835913313,
"grad_norm": 9.125753074947662,
"learning_rate": 2.4327059366356325e-06,
"loss": 0.253151535987854,
"step": 2049
},
{
"epoch": 2.1155830753353975,
"grad_norm": 9.022273371401743,
"learning_rate": 2.4275551794538445e-06,
"loss": 0.3628351092338562,
"step": 2050
},
{
"epoch": 2.1166150670794632,
"grad_norm": 13.284143093222118,
"learning_rate": 2.422408132241065e-06,
"loss": 0.48799097537994385,
"step": 2051
},
{
"epoch": 2.1176470588235294,
"grad_norm": 47.35592241039155,
"learning_rate": 2.417264802420343e-06,
"loss": 1.0655995607376099,
"step": 2052
},
{
"epoch": 2.1186790505675956,
"grad_norm": 9.004160541592759,
"learning_rate": 2.4121251974093706e-06,
"loss": 0.26724356412887573,
"step": 2053
},
{
"epoch": 2.1197110423116614,
"grad_norm": 10.765443314103234,
"learning_rate": 2.406989324620459e-06,
"loss": 0.23049749433994293,
"step": 2054
},
{
"epoch": 2.1207430340557276,
"grad_norm": 8.958513741945511,
"learning_rate": 2.401857191460544e-06,
"loss": 0.3267396092414856,
"step": 2055
},
{
"epoch": 2.121775025799794,
"grad_norm": 11.180547607555656,
"learning_rate": 2.396728805331167e-06,
"loss": 0.5201942920684814,
"step": 2056
},
{
"epoch": 2.1228070175438596,
"grad_norm": 9.6878631954348,
"learning_rate": 2.391604173628465e-06,
"loss": 0.38582032918930054,
"step": 2057
},
{
"epoch": 2.123839009287926,
"grad_norm": 9.627683093068656,
"learning_rate": 2.3864833037431596e-06,
"loss": 0.3799446225166321,
"step": 2058
},
{
"epoch": 2.1248710010319916,
"grad_norm": 14.077616949596758,
"learning_rate": 2.3813662030605473e-06,
"loss": 0.3688233494758606,
"step": 2059
},
{
"epoch": 2.1259029927760578,
"grad_norm": 8.238448213968562,
"learning_rate": 2.3762528789604887e-06,
"loss": 0.2844564616680145,
"step": 2060
},
{
"epoch": 2.126934984520124,
"grad_norm": 14.778310625808261,
"learning_rate": 2.3711433388174e-06,
"loss": 0.9820846915245056,
"step": 2061
},
{
"epoch": 2.1279669762641897,
"grad_norm": 7.038877981529248,
"learning_rate": 2.3660375900002364e-06,
"loss": 0.23352807760238647,
"step": 2062
},
{
"epoch": 2.128998968008256,
"grad_norm": 8.801565337485316,
"learning_rate": 2.3609356398724896e-06,
"loss": 0.47995999455451965,
"step": 2063
},
{
"epoch": 2.130030959752322,
"grad_norm": 6.5514911977888906,
"learning_rate": 2.3558374957921678e-06,
"loss": 0.1579141765832901,
"step": 2064
},
{
"epoch": 2.131062951496388,
"grad_norm": 13.345001457721189,
"learning_rate": 2.350743165111793e-06,
"loss": 0.7573691606521606,
"step": 2065
},
{
"epoch": 2.132094943240454,
"grad_norm": 8.851065098558594,
"learning_rate": 2.3456526551783874e-06,
"loss": 0.32301467657089233,
"step": 2066
},
{
"epoch": 2.1331269349845203,
"grad_norm": 8.579398770461948,
"learning_rate": 2.340565973333464e-06,
"loss": 0.38894182443618774,
"step": 2067
},
{
"epoch": 2.134158926728586,
"grad_norm": 10.964225902857754,
"learning_rate": 2.3354831269130133e-06,
"loss": 0.4255586564540863,
"step": 2068
},
{
"epoch": 2.1351909184726523,
"grad_norm": 7.552562151337086,
"learning_rate": 2.330404123247495e-06,
"loss": 0.24562402069568634,
"step": 2069
},
{
"epoch": 2.136222910216718,
"grad_norm": 17.04563208493501,
"learning_rate": 2.3253289696618257e-06,
"loss": 0.3281315565109253,
"step": 2070
},
{
"epoch": 2.1372549019607843,
"grad_norm": 10.631771131445332,
"learning_rate": 2.320257673475371e-06,
"loss": 1.191027283668518,
"step": 2071
},
{
"epoch": 2.1382868937048505,
"grad_norm": 10.709973973674364,
"learning_rate": 2.3151902420019357e-06,
"loss": 0.6106898784637451,
"step": 2072
},
{
"epoch": 2.1393188854489162,
"grad_norm": 11.71516331609504,
"learning_rate": 2.310126682549742e-06,
"loss": 0.7854446172714233,
"step": 2073
},
{
"epoch": 2.1403508771929824,
"grad_norm": 10.26037239508922,
"learning_rate": 2.305067002421438e-06,
"loss": 0.441925972700119,
"step": 2074
},
{
"epoch": 2.1413828689370487,
"grad_norm": 11.886166662674606,
"learning_rate": 2.300011208914071e-06,
"loss": 0.4592568278312683,
"step": 2075
},
{
"epoch": 2.1424148606811144,
"grad_norm": 10.504523851406987,
"learning_rate": 2.2949593093190863e-06,
"loss": 0.3115933835506439,
"step": 2076
},
{
"epoch": 2.1434468524251806,
"grad_norm": 13.024770233030189,
"learning_rate": 2.2899113109223113e-06,
"loss": 0.40064454078674316,
"step": 2077
},
{
"epoch": 2.144478844169247,
"grad_norm": 17.22807437979318,
"learning_rate": 2.2848672210039484e-06,
"loss": 0.6171914935112,
"step": 2078
},
{
"epoch": 2.1455108359133126,
"grad_norm": 6.25175917101113,
"learning_rate": 2.279827046838562e-06,
"loss": 0.3618736267089844,
"step": 2079
},
{
"epoch": 2.146542827657379,
"grad_norm": 7.078420574869955,
"learning_rate": 2.274790795695071e-06,
"loss": 0.43610116839408875,
"step": 2080
},
{
"epoch": 2.147574819401445,
"grad_norm": 11.264346934071357,
"learning_rate": 2.269758474836734e-06,
"loss": 0.8911107778549194,
"step": 2081
},
{
"epoch": 2.1486068111455108,
"grad_norm": 15.930742320989614,
"learning_rate": 2.264730091521146e-06,
"loss": 0.48314476013183594,
"step": 2082
},
{
"epoch": 2.149638802889577,
"grad_norm": 8.171698428610899,
"learning_rate": 2.259705653000216e-06,
"loss": 0.25453081727027893,
"step": 2083
},
{
"epoch": 2.1506707946336427,
"grad_norm": 7.6375827588902965,
"learning_rate": 2.2546851665201692e-06,
"loss": 0.1955549716949463,
"step": 2084
},
{
"epoch": 2.151702786377709,
"grad_norm": 7.383691170416679,
"learning_rate": 2.24966863932153e-06,
"loss": 0.1405426263809204,
"step": 2085
},
{
"epoch": 2.152734778121775,
"grad_norm": 8.801702748150783,
"learning_rate": 2.2446560786391135e-06,
"loss": 0.8943809270858765,
"step": 2086
},
{
"epoch": 2.153766769865841,
"grad_norm": 7.145573332227521,
"learning_rate": 2.239647491702013e-06,
"loss": 0.35383838415145874,
"step": 2087
},
{
"epoch": 2.154798761609907,
"grad_norm": 12.765906885545176,
"learning_rate": 2.2346428857335904e-06,
"loss": 0.7786715030670166,
"step": 2088
},
{
"epoch": 2.1558307533539733,
"grad_norm": 9.688973922110497,
"learning_rate": 2.2296422679514686e-06,
"loss": 0.39823275804519653,
"step": 2089
},
{
"epoch": 2.156862745098039,
"grad_norm": 12.736295425673598,
"learning_rate": 2.224645645567517e-06,
"loss": 0.9067201018333435,
"step": 2090
},
{
"epoch": 2.1578947368421053,
"grad_norm": 11.046850636454053,
"learning_rate": 2.219653025787844e-06,
"loss": 0.38563841581344604,
"step": 2091
},
{
"epoch": 2.1589267285861715,
"grad_norm": 12.670959912418832,
"learning_rate": 2.2146644158127827e-06,
"loss": 0.44574642181396484,
"step": 2092
},
{
"epoch": 2.1599587203302373,
"grad_norm": 11.52877149882546,
"learning_rate": 2.209679822836886e-06,
"loss": 0.7529317140579224,
"step": 2093
},
{
"epoch": 2.1609907120743035,
"grad_norm": 13.481079102148628,
"learning_rate": 2.2046992540489126e-06,
"loss": 0.28910043835639954,
"step": 2094
},
{
"epoch": 2.1620227038183693,
"grad_norm": 10.545525658517882,
"learning_rate": 2.199722716631818e-06,
"loss": 0.5858748555183411,
"step": 2095
},
{
"epoch": 2.1630546955624355,
"grad_norm": 13.286208111212561,
"learning_rate": 2.1947502177627437e-06,
"loss": 0.37049344182014465,
"step": 2096
},
{
"epoch": 2.1640866873065017,
"grad_norm": 13.777848367998493,
"learning_rate": 2.189781764613006e-06,
"loss": 0.4377231299877167,
"step": 2097
},
{
"epoch": 2.1651186790505674,
"grad_norm": 9.496773578931737,
"learning_rate": 2.1848173643480875e-06,
"loss": 0.5167649388313293,
"step": 2098
},
{
"epoch": 2.1661506707946336,
"grad_norm": 15.849745603382729,
"learning_rate": 2.179857024127624e-06,
"loss": 0.5104716420173645,
"step": 2099
},
{
"epoch": 2.1671826625387,
"grad_norm": 12.76752296827376,
"learning_rate": 2.1749007511054005e-06,
"loss": 0.39890056848526,
"step": 2100
},
{
"epoch": 2.1682146542827656,
"grad_norm": 8.773963974580964,
"learning_rate": 2.1699485524293285e-06,
"loss": 0.4501742124557495,
"step": 2101
},
{
"epoch": 2.169246646026832,
"grad_norm": 13.173250814161523,
"learning_rate": 2.1650004352414493e-06,
"loss": 0.555133044719696,
"step": 2102
},
{
"epoch": 2.170278637770898,
"grad_norm": 11.020899187251302,
"learning_rate": 2.1600564066779172e-06,
"loss": 0.38730090856552124,
"step": 2103
},
{
"epoch": 2.171310629514964,
"grad_norm": 11.130112411783447,
"learning_rate": 2.1551164738689896e-06,
"loss": 0.5620251893997192,
"step": 2104
},
{
"epoch": 2.17234262125903,
"grad_norm": 11.614057941800839,
"learning_rate": 2.1501806439390156e-06,
"loss": 0.41809794306755066,
"step": 2105
},
{
"epoch": 2.173374613003096,
"grad_norm": 12.650393566481371,
"learning_rate": 2.1452489240064284e-06,
"loss": 0.6263318061828613,
"step": 2106
},
{
"epoch": 2.174406604747162,
"grad_norm": 12.3807562566126,
"learning_rate": 2.1403213211837344e-06,
"loss": 0.5498677492141724,
"step": 2107
},
{
"epoch": 2.175438596491228,
"grad_norm": 8.76125024687879,
"learning_rate": 2.1353978425775006e-06,
"loss": 0.3538694977760315,
"step": 2108
},
{
"epoch": 2.176470588235294,
"grad_norm": 15.819502963317236,
"learning_rate": 2.130478495288347e-06,
"loss": 0.7942072153091431,
"step": 2109
},
{
"epoch": 2.17750257997936,
"grad_norm": 9.908160370261319,
"learning_rate": 2.125563286410938e-06,
"loss": 0.47650521993637085,
"step": 2110
},
{
"epoch": 2.1785345717234263,
"grad_norm": 14.55609857912769,
"learning_rate": 2.1206522230339634e-06,
"loss": 0.42236289381980896,
"step": 2111
},
{
"epoch": 2.179566563467492,
"grad_norm": 17.012814649988037,
"learning_rate": 2.1157453122401385e-06,
"loss": 1.215108036994934,
"step": 2112
},
{
"epoch": 2.1805985552115583,
"grad_norm": 15.556015184879993,
"learning_rate": 2.11084256110619e-06,
"loss": 0.7476073503494263,
"step": 2113
},
{
"epoch": 2.1816305469556245,
"grad_norm": 12.208046332164217,
"learning_rate": 2.105943976702845e-06,
"loss": 0.30959609150886536,
"step": 2114
},
{
"epoch": 2.1826625386996903,
"grad_norm": 11.309562486851062,
"learning_rate": 2.1010495660948206e-06,
"loss": 0.6432572603225708,
"step": 2115
},
{
"epoch": 2.1836945304437565,
"grad_norm": 11.565800824630927,
"learning_rate": 2.0961593363408154e-06,
"loss": 1.2213436365127563,
"step": 2116
},
{
"epoch": 2.1847265221878223,
"grad_norm": 10.806743127598923,
"learning_rate": 2.0912732944934984e-06,
"loss": 0.2402806282043457,
"step": 2117
},
{
"epoch": 2.1857585139318885,
"grad_norm": 15.266326681062148,
"learning_rate": 2.0863914475994974e-06,
"loss": 0.7473478317260742,
"step": 2118
},
{
"epoch": 2.1867905056759547,
"grad_norm": 11.85831662157018,
"learning_rate": 2.081513802699394e-06,
"loss": 1.5603134632110596,
"step": 2119
},
{
"epoch": 2.1878224974200204,
"grad_norm": 12.610129105672378,
"learning_rate": 2.076640366827703e-06,
"loss": 0.5242027044296265,
"step": 2120
},
{
"epoch": 2.1888544891640866,
"grad_norm": 7.985787003390586,
"learning_rate": 2.0717711470128747e-06,
"loss": 0.22941672801971436,
"step": 2121
},
{
"epoch": 2.189886480908153,
"grad_norm": 18.4640777229605,
"learning_rate": 2.0669061502772776e-06,
"loss": 0.5810708999633789,
"step": 2122
},
{
"epoch": 2.1909184726522186,
"grad_norm": 14.977477850492251,
"learning_rate": 2.0620453836371885e-06,
"loss": 0.4758716821670532,
"step": 2123
},
{
"epoch": 2.191950464396285,
"grad_norm": 8.226507927253131,
"learning_rate": 2.0571888541027857e-06,
"loss": 0.3519587218761444,
"step": 2124
},
{
"epoch": 2.192982456140351,
"grad_norm": 8.9390473817918,
"learning_rate": 2.0523365686781345e-06,
"loss": 0.4949192702770233,
"step": 2125
},
{
"epoch": 2.194014447884417,
"grad_norm": 11.399187864137117,
"learning_rate": 2.0474885343611804e-06,
"loss": 0.33248478174209595,
"step": 2126
},
{
"epoch": 2.195046439628483,
"grad_norm": 9.581866135820988,
"learning_rate": 2.0426447581437386e-06,
"loss": 0.3073532283306122,
"step": 2127
},
{
"epoch": 2.196078431372549,
"grad_norm": 13.688929287126498,
"learning_rate": 2.0378052470114822e-06,
"loss": 0.5941171646118164,
"step": 2128
},
{
"epoch": 2.197110423116615,
"grad_norm": 10.344955312103714,
"learning_rate": 2.032970007943935e-06,
"loss": 0.42333728075027466,
"step": 2129
},
{
"epoch": 2.198142414860681,
"grad_norm": 11.410027161328824,
"learning_rate": 2.028139047914456e-06,
"loss": 0.45369571447372437,
"step": 2130
},
{
"epoch": 2.1991744066047474,
"grad_norm": 9.717065128794871,
"learning_rate": 2.0233123738902355e-06,
"loss": 0.2441929429769516,
"step": 2131
},
{
"epoch": 2.200206398348813,
"grad_norm": 10.775810333693101,
"learning_rate": 2.018489992832283e-06,
"loss": 0.47238659858703613,
"step": 2132
},
{
"epoch": 2.2012383900928794,
"grad_norm": 10.077291541682422,
"learning_rate": 2.0136719116954168e-06,
"loss": 0.2882574200630188,
"step": 2133
},
{
"epoch": 2.202270381836945,
"grad_norm": 8.125021055807146,
"learning_rate": 2.0088581374282514e-06,
"loss": 0.5156424045562744,
"step": 2134
},
{
"epoch": 2.2033023735810113,
"grad_norm": 8.818725252024239,
"learning_rate": 2.0040486769731928e-06,
"loss": 0.23826254904270172,
"step": 2135
},
{
"epoch": 2.2043343653250775,
"grad_norm": 8.018001950456375,
"learning_rate": 1.999243537266424e-06,
"loss": 0.4738239049911499,
"step": 2136
},
{
"epoch": 2.2053663570691433,
"grad_norm": 10.2654264334262,
"learning_rate": 1.994442725237898e-06,
"loss": 0.34067076444625854,
"step": 2137
},
{
"epoch": 2.2063983488132095,
"grad_norm": 10.726847895363598,
"learning_rate": 1.989646247811326e-06,
"loss": 0.4780583381652832,
"step": 2138
},
{
"epoch": 2.2074303405572757,
"grad_norm": 11.005879210645626,
"learning_rate": 1.9848541119041644e-06,
"loss": 0.4559696912765503,
"step": 2139
},
{
"epoch": 2.2084623323013415,
"grad_norm": 11.221008336553952,
"learning_rate": 1.980066324427613e-06,
"loss": 0.5784523487091064,
"step": 2140
},
{
"epoch": 2.2094943240454077,
"grad_norm": 11.789084325482024,
"learning_rate": 1.9752828922865993e-06,
"loss": 0.6254563331604004,
"step": 2141
},
{
"epoch": 2.2105263157894735,
"grad_norm": 9.819567138617387,
"learning_rate": 1.9705038223797673e-06,
"loss": 0.5376569628715515,
"step": 2142
},
{
"epoch": 2.2115583075335397,
"grad_norm": 9.042544730500095,
"learning_rate": 1.965729121599473e-06,
"loss": 0.32408738136291504,
"step": 2143
},
{
"epoch": 2.212590299277606,
"grad_norm": 10.077898140702857,
"learning_rate": 1.960958796831769e-06,
"loss": 0.2823505401611328,
"step": 2144
},
{
"epoch": 2.2136222910216716,
"grad_norm": 7.705419094182372,
"learning_rate": 1.956192854956397e-06,
"loss": 0.2653104066848755,
"step": 2145
},
{
"epoch": 2.214654282765738,
"grad_norm": 9.482777149614346,
"learning_rate": 1.9514313028467783e-06,
"loss": 0.4442105293273926,
"step": 2146
},
{
"epoch": 2.215686274509804,
"grad_norm": 18.816500800240203,
"learning_rate": 1.9466741473700033e-06,
"loss": 0.7567444443702698,
"step": 2147
},
{
"epoch": 2.21671826625387,
"grad_norm": 10.523644645810517,
"learning_rate": 1.9419213953868236e-06,
"loss": 0.6099028587341309,
"step": 2148
},
{
"epoch": 2.217750257997936,
"grad_norm": 13.450744551287302,
"learning_rate": 1.9371730537516344e-06,
"loss": 0.6623318195343018,
"step": 2149
},
{
"epoch": 2.218782249742002,
"grad_norm": 9.569934224545488,
"learning_rate": 1.9324291293124747e-06,
"loss": 0.47942644357681274,
"step": 2150
},
{
"epoch": 2.219814241486068,
"grad_norm": 10.18178417543629,
"learning_rate": 1.927689628911013e-06,
"loss": 1.0906792879104614,
"step": 2151
},
{
"epoch": 2.220846233230134,
"grad_norm": 13.166259873833667,
"learning_rate": 1.9229545593825367e-06,
"loss": 0.30344152450561523,
"step": 2152
},
{
"epoch": 2.2218782249742004,
"grad_norm": 9.508415212815473,
"learning_rate": 1.9182239275559443e-06,
"loss": 0.4512457549571991,
"step": 2153
},
{
"epoch": 2.222910216718266,
"grad_norm": 13.74278373206301,
"learning_rate": 1.913497740253728e-06,
"loss": 0.3366636633872986,
"step": 2154
},
{
"epoch": 2.2239422084623324,
"grad_norm": 11.872675446972762,
"learning_rate": 1.9087760042919808e-06,
"loss": 0.2936578392982483,
"step": 2155
},
{
"epoch": 2.2249742002063986,
"grad_norm": 9.914652472935117,
"learning_rate": 1.9040587264803673e-06,
"loss": 0.6060217618942261,
"step": 2156
},
{
"epoch": 2.2260061919504643,
"grad_norm": 10.656783486055465,
"learning_rate": 1.899345913622128e-06,
"loss": 0.763049840927124,
"step": 2157
},
{
"epoch": 2.2270381836945305,
"grad_norm": 12.604264061427578,
"learning_rate": 1.8946375725140581e-06,
"loss": 0.6956782937049866,
"step": 2158
},
{
"epoch": 2.2280701754385963,
"grad_norm": 9.332473056549954,
"learning_rate": 1.8899337099465092e-06,
"loss": 0.690141499042511,
"step": 2159
},
{
"epoch": 2.2291021671826625,
"grad_norm": 7.301853545257415,
"learning_rate": 1.8852343327033717e-06,
"loss": 0.23047930002212524,
"step": 2160
},
{
"epoch": 2.2301341589267287,
"grad_norm": 9.85687211577048,
"learning_rate": 1.8805394475620674e-06,
"loss": 0.4893918037414551,
"step": 2161
},
{
"epoch": 2.2311661506707945,
"grad_norm": 8.689634125858236,
"learning_rate": 1.8758490612935398e-06,
"loss": 0.447439968585968,
"step": 2162
},
{
"epoch": 2.2321981424148607,
"grad_norm": 13.30896756432028,
"learning_rate": 1.8711631806622443e-06,
"loss": 0.48308444023132324,
"step": 2163
},
{
"epoch": 2.233230134158927,
"grad_norm": 12.456869689142426,
"learning_rate": 1.8664818124261375e-06,
"loss": 0.3598782420158386,
"step": 2164
},
{
"epoch": 2.2342621259029927,
"grad_norm": 9.21047384707439,
"learning_rate": 1.8618049633366698e-06,
"loss": 0.5054575800895691,
"step": 2165
},
{
"epoch": 2.235294117647059,
"grad_norm": 7.834072364800285,
"learning_rate": 1.8571326401387717e-06,
"loss": 0.14533498883247375,
"step": 2166
},
{
"epoch": 2.2363261093911246,
"grad_norm": 8.6530833971032,
"learning_rate": 1.8524648495708514e-06,
"loss": 0.661357045173645,
"step": 2167
},
{
"epoch": 2.237358101135191,
"grad_norm": 11.19943394122764,
"learning_rate": 1.8478015983647718e-06,
"loss": 0.6174920797348022,
"step": 2168
},
{
"epoch": 2.238390092879257,
"grad_norm": 10.366083307918593,
"learning_rate": 1.8431428932458556e-06,
"loss": 0.6961145997047424,
"step": 2169
},
{
"epoch": 2.239422084623323,
"grad_norm": 16.282273584205967,
"learning_rate": 1.8384887409328688e-06,
"loss": 0.42053890228271484,
"step": 2170
},
{
"epoch": 2.240454076367389,
"grad_norm": 11.270734561202865,
"learning_rate": 1.8338391481380097e-06,
"loss": 0.6044822335243225,
"step": 2171
},
{
"epoch": 2.2414860681114552,
"grad_norm": 11.13863087178757,
"learning_rate": 1.8291941215669024e-06,
"loss": 0.17979833483695984,
"step": 2172
},
{
"epoch": 2.242518059855521,
"grad_norm": 11.87594192768117,
"learning_rate": 1.8245536679185793e-06,
"loss": 0.6116322875022888,
"step": 2173
},
{
"epoch": 2.243550051599587,
"grad_norm": 9.237256375764636,
"learning_rate": 1.8199177938854895e-06,
"loss": 0.44815587997436523,
"step": 2174
},
{
"epoch": 2.2445820433436534,
"grad_norm": 6.584204256501777,
"learning_rate": 1.8152865061534675e-06,
"loss": 0.4116666615009308,
"step": 2175
},
{
"epoch": 2.245614035087719,
"grad_norm": 13.36188671219953,
"learning_rate": 1.8106598114017398e-06,
"loss": 0.5253646969795227,
"step": 2176
},
{
"epoch": 2.2466460268317854,
"grad_norm": 11.92523191299553,
"learning_rate": 1.806037716302902e-06,
"loss": 0.48259201645851135,
"step": 2177
},
{
"epoch": 2.2476780185758516,
"grad_norm": 13.561237646204148,
"learning_rate": 1.801420227522921e-06,
"loss": 1.8981776237487793,
"step": 2178
},
{
"epoch": 2.2487100103199174,
"grad_norm": 9.074617736581224,
"learning_rate": 1.796807351721121e-06,
"loss": 0.30176305770874023,
"step": 2179
},
{
"epoch": 2.2497420020639836,
"grad_norm": 19.161891287407926,
"learning_rate": 1.7921990955501705e-06,
"loss": 0.301688551902771,
"step": 2180
},
{
"epoch": 2.2507739938080498,
"grad_norm": 15.535421035921493,
"learning_rate": 1.7875954656560802e-06,
"loss": 1.0767804384231567,
"step": 2181
},
{
"epoch": 2.2518059855521155,
"grad_norm": 17.0125475831456,
"learning_rate": 1.7829964686781793e-06,
"loss": 0.8623077869415283,
"step": 2182
},
{
"epoch": 2.2528379772961817,
"grad_norm": 8.124126424867834,
"learning_rate": 1.7784021112491273e-06,
"loss": 0.5567214488983154,
"step": 2183
},
{
"epoch": 2.2538699690402475,
"grad_norm": 10.875316602560629,
"learning_rate": 1.7738123999948853e-06,
"loss": 0.3203074336051941,
"step": 2184
},
{
"epoch": 2.2549019607843137,
"grad_norm": 10.703077649740377,
"learning_rate": 1.769227341534715e-06,
"loss": 0.8443643450737,
"step": 2185
},
{
"epoch": 2.25593395252838,
"grad_norm": 10.909463202886121,
"learning_rate": 1.7646469424811707e-06,
"loss": 0.6140201687812805,
"step": 2186
},
{
"epoch": 2.2569659442724457,
"grad_norm": 8.473032286683102,
"learning_rate": 1.7600712094400802e-06,
"loss": 0.3071803152561188,
"step": 2187
},
{
"epoch": 2.257997936016512,
"grad_norm": 11.657311094006454,
"learning_rate": 1.755500149010549e-06,
"loss": 0.8473285436630249,
"step": 2188
},
{
"epoch": 2.259029927760578,
"grad_norm": 9.516720867180952,
"learning_rate": 1.7509337677849404e-06,
"loss": 0.1772429496049881,
"step": 2189
},
{
"epoch": 2.260061919504644,
"grad_norm": 9.494675596695124,
"learning_rate": 1.7463720723488698e-06,
"loss": 0.48868194222450256,
"step": 2190
},
{
"epoch": 2.26109391124871,
"grad_norm": 12.060059218685938,
"learning_rate": 1.7418150692811969e-06,
"loss": 0.2509000897407532,
"step": 2191
},
{
"epoch": 2.262125902992776,
"grad_norm": 7.065907524965014,
"learning_rate": 1.737262765154008e-06,
"loss": 0.16848008334636688,
"step": 2192
},
{
"epoch": 2.263157894736842,
"grad_norm": 6.858130398118923,
"learning_rate": 1.7327151665326208e-06,
"loss": 0.25383830070495605,
"step": 2193
},
{
"epoch": 2.2641898864809082,
"grad_norm": 25.3718609084507,
"learning_rate": 1.7281722799755613e-06,
"loss": 0.4060966968536377,
"step": 2194
},
{
"epoch": 2.265221878224974,
"grad_norm": 12.947905219908122,
"learning_rate": 1.723634112034563e-06,
"loss": 0.5273417234420776,
"step": 2195
},
{
"epoch": 2.26625386996904,
"grad_norm": 13.566005766002926,
"learning_rate": 1.7191006692545493e-06,
"loss": 0.26978084444999695,
"step": 2196
},
{
"epoch": 2.2672858617131064,
"grad_norm": 15.518192045444478,
"learning_rate": 1.7145719581736337e-06,
"loss": 0.8343046307563782,
"step": 2197
},
{
"epoch": 2.268317853457172,
"grad_norm": 11.141763148539908,
"learning_rate": 1.710047985323104e-06,
"loss": 0.21038788557052612,
"step": 2198
},
{
"epoch": 2.2693498452012384,
"grad_norm": 10.9815731017179,
"learning_rate": 1.7055287572274142e-06,
"loss": 0.40513765811920166,
"step": 2199
},
{
"epoch": 2.2703818369453046,
"grad_norm": 13.750227883056459,
"learning_rate": 1.7010142804041785e-06,
"loss": 0.532772421836853,
"step": 2200
},
{
"epoch": 2.2714138286893704,
"grad_norm": 8.871418169099249,
"learning_rate": 1.6965045613641523e-06,
"loss": 0.4089523255825043,
"step": 2201
},
{
"epoch": 2.2724458204334366,
"grad_norm": 9.437343178262012,
"learning_rate": 1.6919996066112337e-06,
"loss": 0.2525821030139923,
"step": 2202
},
{
"epoch": 2.2734778121775028,
"grad_norm": 18.80428833428539,
"learning_rate": 1.6874994226424518e-06,
"loss": 0.3479749262332916,
"step": 2203
},
{
"epoch": 2.2745098039215685,
"grad_norm": 11.304407306056925,
"learning_rate": 1.6830040159479521e-06,
"loss": 0.5370907783508301,
"step": 2204
},
{
"epoch": 2.2755417956656347,
"grad_norm": 17.206317290083987,
"learning_rate": 1.6785133930109927e-06,
"loss": 1.4786996841430664,
"step": 2205
},
{
"epoch": 2.276573787409701,
"grad_norm": 11.036270573698204,
"learning_rate": 1.674027560307927e-06,
"loss": 0.34293660521507263,
"step": 2206
},
{
"epoch": 2.2776057791537667,
"grad_norm": 10.455555407974552,
"learning_rate": 1.6695465243082055e-06,
"loss": 0.27484482526779175,
"step": 2207
},
{
"epoch": 2.278637770897833,
"grad_norm": 6.027728038520487,
"learning_rate": 1.66507029147436e-06,
"loss": 0.16362959146499634,
"step": 2208
},
{
"epoch": 2.2796697626418987,
"grad_norm": 11.285134553373428,
"learning_rate": 1.6605988682619944e-06,
"loss": 0.23489339649677277,
"step": 2209
},
{
"epoch": 2.280701754385965,
"grad_norm": 13.041895042789484,
"learning_rate": 1.6561322611197772e-06,
"loss": 1.8350913524627686,
"step": 2210
},
{
"epoch": 2.281733746130031,
"grad_norm": 6.148647089292928,
"learning_rate": 1.6516704764894265e-06,
"loss": 0.1827671229839325,
"step": 2211
},
{
"epoch": 2.282765737874097,
"grad_norm": 9.946301771192338,
"learning_rate": 1.6472135208057128e-06,
"loss": 0.3029034733772278,
"step": 2212
},
{
"epoch": 2.283797729618163,
"grad_norm": 10.743644308129369,
"learning_rate": 1.642761400496438e-06,
"loss": 0.25189274549484253,
"step": 2213
},
{
"epoch": 2.2848297213622293,
"grad_norm": 8.923316366999579,
"learning_rate": 1.6383141219824328e-06,
"loss": 0.22978025674819946,
"step": 2214
},
{
"epoch": 2.285861713106295,
"grad_norm": 10.195349858709296,
"learning_rate": 1.6338716916775394e-06,
"loss": 0.16604535281658173,
"step": 2215
},
{
"epoch": 2.2868937048503613,
"grad_norm": 6.643488404122427,
"learning_rate": 1.629434115988614e-06,
"loss": 0.35414767265319824,
"step": 2216
},
{
"epoch": 2.287925696594427,
"grad_norm": 8.525699870440226,
"learning_rate": 1.6250014013155092e-06,
"loss": 0.3973991274833679,
"step": 2217
},
{
"epoch": 2.2889576883384932,
"grad_norm": 8.372392423539754,
"learning_rate": 1.6205735540510676e-06,
"loss": 0.4818735718727112,
"step": 2218
},
{
"epoch": 2.2899896800825594,
"grad_norm": 8.99426902317675,
"learning_rate": 1.6161505805811135e-06,
"loss": 0.23810598254203796,
"step": 2219
},
{
"epoch": 2.291021671826625,
"grad_norm": 10.67750050459878,
"learning_rate": 1.611732487284437e-06,
"loss": 0.4141588807106018,
"step": 2220
},
{
"epoch": 2.2920536635706914,
"grad_norm": 9.985959799982476,
"learning_rate": 1.6073192805327936e-06,
"loss": 0.5675234198570251,
"step": 2221
},
{
"epoch": 2.2930856553147576,
"grad_norm": 8.269013039840578,
"learning_rate": 1.6029109666908944e-06,
"loss": 0.3020309805870056,
"step": 2222
},
{
"epoch": 2.2941176470588234,
"grad_norm": 10.309152116306388,
"learning_rate": 1.5985075521163907e-06,
"loss": 0.5373930931091309,
"step": 2223
},
{
"epoch": 2.2951496388028896,
"grad_norm": 14.136819996226265,
"learning_rate": 1.5941090431598654e-06,
"loss": 0.6357072591781616,
"step": 2224
},
{
"epoch": 2.296181630546956,
"grad_norm": 9.207755036750997,
"learning_rate": 1.5897154461648317e-06,
"loss": 0.562431812286377,
"step": 2225
},
{
"epoch": 2.2972136222910216,
"grad_norm": 7.501876404025285,
"learning_rate": 1.5853267674677154e-06,
"loss": 0.3901820480823517,
"step": 2226
},
{
"epoch": 2.2982456140350878,
"grad_norm": 10.882395654407299,
"learning_rate": 1.580943013397851e-06,
"loss": 0.2067510038614273,
"step": 2227
},
{
"epoch": 2.299277605779154,
"grad_norm": 8.980900675238086,
"learning_rate": 1.5765641902774704e-06,
"loss": 0.2763798236846924,
"step": 2228
},
{
"epoch": 2.3003095975232197,
"grad_norm": 13.686128722812816,
"learning_rate": 1.572190304421694e-06,
"loss": 0.3706619143486023,
"step": 2229
},
{
"epoch": 2.301341589267286,
"grad_norm": 10.631613008658723,
"learning_rate": 1.567821362138518e-06,
"loss": 0.8434795141220093,
"step": 2230
},
{
"epoch": 2.302373581011352,
"grad_norm": 7.8863603274727225,
"learning_rate": 1.5634573697288164e-06,
"loss": 0.3212193548679352,
"step": 2231
},
{
"epoch": 2.303405572755418,
"grad_norm": 10.281296985834022,
"learning_rate": 1.5590983334863191e-06,
"loss": 0.4050968587398529,
"step": 2232
},
{
"epoch": 2.304437564499484,
"grad_norm": 10.029639794509817,
"learning_rate": 1.5547442596976115e-06,
"loss": 0.5072900652885437,
"step": 2233
},
{
"epoch": 2.30546955624355,
"grad_norm": 8.773520153143581,
"learning_rate": 1.550395154642117e-06,
"loss": 0.23122380673885345,
"step": 2234
},
{
"epoch": 2.306501547987616,
"grad_norm": 14.580360787576096,
"learning_rate": 1.5460510245920984e-06,
"loss": 0.37928903102874756,
"step": 2235
},
{
"epoch": 2.3075335397316823,
"grad_norm": 22.26283919312677,
"learning_rate": 1.5417118758126408e-06,
"loss": 0.6587377786636353,
"step": 2236
},
{
"epoch": 2.308565531475748,
"grad_norm": 8.557340391624383,
"learning_rate": 1.537377714561647e-06,
"loss": 0.3196367025375366,
"step": 2237
},
{
"epoch": 2.3095975232198143,
"grad_norm": 15.63210352214777,
"learning_rate": 1.533048547089827e-06,
"loss": 0.5556677579879761,
"step": 2238
},
{
"epoch": 2.3106295149638805,
"grad_norm": 10.211616555441974,
"learning_rate": 1.5287243796406852e-06,
"loss": 0.2998398542404175,
"step": 2239
},
{
"epoch": 2.3116615067079462,
"grad_norm": 9.720441767418976,
"learning_rate": 1.524405218450517e-06,
"loss": 0.3288514018058777,
"step": 2240
},
{
"epoch": 2.3126934984520124,
"grad_norm": 10.325944493786375,
"learning_rate": 1.5200910697484016e-06,
"loss": 0.5593407154083252,
"step": 2241
},
{
"epoch": 2.313725490196078,
"grad_norm": 10.501982039385146,
"learning_rate": 1.5157819397561863e-06,
"loss": 0.3179510831832886,
"step": 2242
},
{
"epoch": 2.3147574819401444,
"grad_norm": 9.417056273079762,
"learning_rate": 1.5114778346884768e-06,
"loss": 0.5840306878089905,
"step": 2243
},
{
"epoch": 2.3157894736842106,
"grad_norm": 37.14468874568553,
"learning_rate": 1.5071787607526366e-06,
"loss": 0.5103886723518372,
"step": 2244
},
{
"epoch": 2.3168214654282764,
"grad_norm": 9.950016255402419,
"learning_rate": 1.5028847241487715e-06,
"loss": 0.5624645948410034,
"step": 2245
},
{
"epoch": 2.3178534571723426,
"grad_norm": 9.161989586045065,
"learning_rate": 1.4985957310697242e-06,
"loss": 0.37405914068222046,
"step": 2246
},
{
"epoch": 2.318885448916409,
"grad_norm": 7.827841822954748,
"learning_rate": 1.4943117877010605e-06,
"loss": 0.32541224360466003,
"step": 2247
},
{
"epoch": 2.3199174406604746,
"grad_norm": 10.214626163709012,
"learning_rate": 1.4900329002210684e-06,
"loss": 0.3640068471431732,
"step": 2248
},
{
"epoch": 2.3209494324045408,
"grad_norm": 16.64776495496287,
"learning_rate": 1.4857590748007373e-06,
"loss": 0.36090803146362305,
"step": 2249
},
{
"epoch": 2.321981424148607,
"grad_norm": 8.39665025975816,
"learning_rate": 1.4814903176037605e-06,
"loss": 0.23087266087532043,
"step": 2250
},
{
"epoch": 2.3230134158926727,
"grad_norm": 12.400366359823169,
"learning_rate": 1.477226634786525e-06,
"loss": 0.5182145833969116,
"step": 2251
},
{
"epoch": 2.324045407636739,
"grad_norm": 23.01924582709264,
"learning_rate": 1.472968032498095e-06,
"loss": 1.7229740619659424,
"step": 2252
},
{
"epoch": 2.325077399380805,
"grad_norm": 8.801602741757135,
"learning_rate": 1.4687145168802063e-06,
"loss": 0.24391722679138184,
"step": 2253
},
{
"epoch": 2.326109391124871,
"grad_norm": 9.167374353837271,
"learning_rate": 1.4644660940672628e-06,
"loss": 0.4265590310096741,
"step": 2254
},
{
"epoch": 2.327141382868937,
"grad_norm": 8.87708478323919,
"learning_rate": 1.4602227701863214e-06,
"loss": 0.30143553018569946,
"step": 2255
},
{
"epoch": 2.3281733746130033,
"grad_norm": 14.076434184627097,
"learning_rate": 1.4559845513570859e-06,
"loss": 0.345781147480011,
"step": 2256
},
{
"epoch": 2.329205366357069,
"grad_norm": 9.343761101164164,
"learning_rate": 1.4517514436918995e-06,
"loss": 0.27536025643348694,
"step": 2257
},
{
"epoch": 2.3302373581011353,
"grad_norm": 8.772146897151353,
"learning_rate": 1.4475234532957284e-06,
"loss": 0.38081449270248413,
"step": 2258
},
{
"epoch": 2.331269349845201,
"grad_norm": 8.377025171999012,
"learning_rate": 1.4433005862661625e-06,
"loss": 0.30673426389694214,
"step": 2259
},
{
"epoch": 2.3323013415892673,
"grad_norm": 13.785014247606318,
"learning_rate": 1.439082848693406e-06,
"loss": 0.7989313006401062,
"step": 2260
},
{
"epoch": 2.3333333333333335,
"grad_norm": 12.196511340922179,
"learning_rate": 1.434870246660262e-06,
"loss": 0.4146811366081238,
"step": 2261
},
{
"epoch": 2.3343653250773992,
"grad_norm": 11.294083924432297,
"learning_rate": 1.4306627862421246e-06,
"loss": 0.5966222882270813,
"step": 2262
},
{
"epoch": 2.3353973168214655,
"grad_norm": 10.66807634497044,
"learning_rate": 1.4264604735069764e-06,
"loss": 0.5763157606124878,
"step": 2263
},
{
"epoch": 2.3364293085655317,
"grad_norm": 11.371466604330612,
"learning_rate": 1.4222633145153758e-06,
"loss": 0.7577279806137085,
"step": 2264
},
{
"epoch": 2.3374613003095974,
"grad_norm": 9.889572174171429,
"learning_rate": 1.4180713153204468e-06,
"loss": 0.4307854175567627,
"step": 2265
},
{
"epoch": 2.3384932920536636,
"grad_norm": 7.072193984041829,
"learning_rate": 1.4138844819678726e-06,
"loss": 0.20140615105628967,
"step": 2266
},
{
"epoch": 2.3395252837977294,
"grad_norm": 10.632779713688336,
"learning_rate": 1.4097028204958896e-06,
"loss": 0.22186976671218872,
"step": 2267
},
{
"epoch": 2.3405572755417956,
"grad_norm": 8.929669950167531,
"learning_rate": 1.4055263369352673e-06,
"loss": 0.8361318707466125,
"step": 2268
},
{
"epoch": 2.341589267285862,
"grad_norm": 18.286877202260154,
"learning_rate": 1.4013550373093139e-06,
"loss": 0.8228157162666321,
"step": 2269
},
{
"epoch": 2.3426212590299276,
"grad_norm": 8.923437567911852,
"learning_rate": 1.397188927633863e-06,
"loss": 0.6820056438446045,
"step": 2270
},
{
"epoch": 2.343653250773994,
"grad_norm": 13.026055261230391,
"learning_rate": 1.393028013917259e-06,
"loss": 0.7587333917617798,
"step": 2271
},
{
"epoch": 2.34468524251806,
"grad_norm": 11.29064677317461,
"learning_rate": 1.388872302160353e-06,
"loss": 0.9072903394699097,
"step": 2272
},
{
"epoch": 2.3457172342621257,
"grad_norm": 12.179380361158485,
"learning_rate": 1.3847217983564943e-06,
"loss": 0.4823061227798462,
"step": 2273
},
{
"epoch": 2.346749226006192,
"grad_norm": 6.953779587008528,
"learning_rate": 1.3805765084915236e-06,
"loss": 0.2917562425136566,
"step": 2274
},
{
"epoch": 2.347781217750258,
"grad_norm": 11.39337241380322,
"learning_rate": 1.3764364385437595e-06,
"loss": 0.42079344391822815,
"step": 2275
},
{
"epoch": 2.348813209494324,
"grad_norm": 8.464953413026237,
"learning_rate": 1.3723015944839947e-06,
"loss": 0.6202406287193298,
"step": 2276
},
{
"epoch": 2.34984520123839,
"grad_norm": 17.585128569200954,
"learning_rate": 1.3681719822754813e-06,
"loss": 0.6479356288909912,
"step": 2277
},
{
"epoch": 2.3508771929824563,
"grad_norm": 12.926284507484658,
"learning_rate": 1.3640476078739296e-06,
"loss": 0.44959646463394165,
"step": 2278
},
{
"epoch": 2.351909184726522,
"grad_norm": 10.32503660981902,
"learning_rate": 1.3599284772274935e-06,
"loss": 0.3125981092453003,
"step": 2279
},
{
"epoch": 2.3529411764705883,
"grad_norm": 11.281541729676572,
"learning_rate": 1.35581459627677e-06,
"loss": 0.7096989154815674,
"step": 2280
},
{
"epoch": 2.3539731682146545,
"grad_norm": 8.101741112538424,
"learning_rate": 1.351705970954777e-06,
"loss": 0.6925749182701111,
"step": 2281
},
{
"epoch": 2.3550051599587203,
"grad_norm": 11.7984677504724,
"learning_rate": 1.347602607186957e-06,
"loss": 0.7987287640571594,
"step": 2282
},
{
"epoch": 2.3560371517027865,
"grad_norm": 15.291266015010487,
"learning_rate": 1.3435045108911648e-06,
"loss": 0.9378319978713989,
"step": 2283
},
{
"epoch": 2.3570691434468523,
"grad_norm": 8.389071900028682,
"learning_rate": 1.339411687977657e-06,
"loss": 0.2552429139614105,
"step": 2284
},
{
"epoch": 2.3581011351909185,
"grad_norm": 13.02561353962284,
"learning_rate": 1.335324144349085e-06,
"loss": 0.23329763114452362,
"step": 2285
},
{
"epoch": 2.3591331269349847,
"grad_norm": 10.87277502286887,
"learning_rate": 1.3312418859004895e-06,
"loss": 0.2614462375640869,
"step": 2286
},
{
"epoch": 2.3601651186790504,
"grad_norm": 13.95195753887422,
"learning_rate": 1.327164918519282e-06,
"loss": 0.2634202539920807,
"step": 2287
},
{
"epoch": 2.3611971104231166,
"grad_norm": 11.846596561960679,
"learning_rate": 1.3230932480852487e-06,
"loss": 0.4762287437915802,
"step": 2288
},
{
"epoch": 2.362229102167183,
"grad_norm": 11.405128940430034,
"learning_rate": 1.3190268804705381e-06,
"loss": 0.5081691741943359,
"step": 2289
},
{
"epoch": 2.3632610939112486,
"grad_norm": 14.103546259617673,
"learning_rate": 1.3149658215396478e-06,
"loss": 0.35150325298309326,
"step": 2290
},
{
"epoch": 2.364293085655315,
"grad_norm": 14.532903837666758,
"learning_rate": 1.310910077149417e-06,
"loss": 0.7659963965415955,
"step": 2291
},
{
"epoch": 2.3653250773993806,
"grad_norm": 13.34599463001999,
"learning_rate": 1.3068596531490253e-06,
"loss": 0.5429165959358215,
"step": 2292
},
{
"epoch": 2.366357069143447,
"grad_norm": 7.885947716842902,
"learning_rate": 1.3028145553799764e-06,
"loss": 0.31663644313812256,
"step": 2293
},
{
"epoch": 2.367389060887513,
"grad_norm": 8.921384888300858,
"learning_rate": 1.2987747896760927e-06,
"loss": 1.0047956705093384,
"step": 2294
},
{
"epoch": 2.3684210526315788,
"grad_norm": 18.076640304312118,
"learning_rate": 1.2947403618635097e-06,
"loss": 0.6527321338653564,
"step": 2295
},
{
"epoch": 2.369453044375645,
"grad_norm": 16.577720379130852,
"learning_rate": 1.290711277760658e-06,
"loss": 0.49626123905181885,
"step": 2296
},
{
"epoch": 2.370485036119711,
"grad_norm": 10.918242651297279,
"learning_rate": 1.2866875431782677e-06,
"loss": 0.5808942914009094,
"step": 2297
},
{
"epoch": 2.371517027863777,
"grad_norm": 17.45010139274395,
"learning_rate": 1.2826691639193506e-06,
"loss": 0.8238117098808289,
"step": 2298
},
{
"epoch": 2.372549019607843,
"grad_norm": 11.926863010648972,
"learning_rate": 1.2786561457791996e-06,
"loss": 0.6759803891181946,
"step": 2299
},
{
"epoch": 2.3735810113519094,
"grad_norm": 13.867351303340826,
"learning_rate": 1.2746484945453691e-06,
"loss": 0.3109396994113922,
"step": 2300
},
{
"epoch": 2.374613003095975,
"grad_norm": 11.264931022174274,
"learning_rate": 1.270646215997678e-06,
"loss": 0.2699579894542694,
"step": 2301
},
{
"epoch": 2.3756449948400413,
"grad_norm": 11.601205893014052,
"learning_rate": 1.2666493159081944e-06,
"loss": 0.32813602685928345,
"step": 2302
},
{
"epoch": 2.3766769865841075,
"grad_norm": 9.489393114719974,
"learning_rate": 1.262657800041232e-06,
"loss": 0.29331862926483154,
"step": 2303
},
{
"epoch": 2.3777089783281733,
"grad_norm": 10.455110525170294,
"learning_rate": 1.2586716741533389e-06,
"loss": 0.35240501165390015,
"step": 2304
},
{
"epoch": 2.3787409700722395,
"grad_norm": 12.1430793084311,
"learning_rate": 1.2546909439932858e-06,
"loss": 0.4376395344734192,
"step": 2305
},
{
"epoch": 2.3797729618163057,
"grad_norm": 20.666356804549835,
"learning_rate": 1.2507156153020667e-06,
"loss": 0.6594339609146118,
"step": 2306
},
{
"epoch": 2.3808049535603715,
"grad_norm": 7.966081233097103,
"learning_rate": 1.2467456938128824e-06,
"loss": 0.4372865557670593,
"step": 2307
},
{
"epoch": 2.3818369453044377,
"grad_norm": 8.177508156131061,
"learning_rate": 1.2427811852511396e-06,
"loss": 0.1819734275341034,
"step": 2308
},
{
"epoch": 2.3828689370485034,
"grad_norm": 7.302440475460186,
"learning_rate": 1.2388220953344354e-06,
"loss": 0.24572576582431793,
"step": 2309
},
{
"epoch": 2.3839009287925697,
"grad_norm": 10.31916401694975,
"learning_rate": 1.23486842977255e-06,
"loss": 0.5179727077484131,
"step": 2310
},
{
"epoch": 2.384932920536636,
"grad_norm": 8.252763135789019,
"learning_rate": 1.2309201942674442e-06,
"loss": 0.21115389466285706,
"step": 2311
},
{
"epoch": 2.3859649122807016,
"grad_norm": 10.930992870936334,
"learning_rate": 1.226977394513247e-06,
"loss": 0.5590957999229431,
"step": 2312
},
{
"epoch": 2.386996904024768,
"grad_norm": 13.945253351389052,
"learning_rate": 1.2230400361962469e-06,
"loss": 0.8369548916816711,
"step": 2313
},
{
"epoch": 2.388028895768834,
"grad_norm": 12.529360499358702,
"learning_rate": 1.2191081249948871e-06,
"loss": 0.4372384548187256,
"step": 2314
},
{
"epoch": 2.3890608875129,
"grad_norm": 11.004924824102241,
"learning_rate": 1.2151816665797507e-06,
"loss": 0.3626002073287964,
"step": 2315
},
{
"epoch": 2.390092879256966,
"grad_norm": 13.728593391482939,
"learning_rate": 1.2112606666135602e-06,
"loss": 0.259071946144104,
"step": 2316
},
{
"epoch": 2.3911248710010318,
"grad_norm": 10.280716365371559,
"learning_rate": 1.2073451307511642e-06,
"loss": 0.45200228691101074,
"step": 2317
},
{
"epoch": 2.392156862745098,
"grad_norm": 10.155973965955244,
"learning_rate": 1.203435064639536e-06,
"loss": 0.3288681209087372,
"step": 2318
},
{
"epoch": 2.393188854489164,
"grad_norm": 8.58553962060409,
"learning_rate": 1.1995304739177515e-06,
"loss": 0.43590980768203735,
"step": 2319
},
{
"epoch": 2.39422084623323,
"grad_norm": 27.483561695551096,
"learning_rate": 1.1956313642169974e-06,
"loss": 0.8669304251670837,
"step": 2320
},
{
"epoch": 2.395252837977296,
"grad_norm": 9.371469568007717,
"learning_rate": 1.1917377411605523e-06,
"loss": 0.3421843945980072,
"step": 2321
},
{
"epoch": 2.3962848297213624,
"grad_norm": 13.730655837456155,
"learning_rate": 1.1878496103637838e-06,
"loss": 0.8081228137016296,
"step": 2322
},
{
"epoch": 2.397316821465428,
"grad_norm": 15.974869879662167,
"learning_rate": 1.1839669774341378e-06,
"loss": 1.2036049365997314,
"step": 2323
},
{
"epoch": 2.3983488132094943,
"grad_norm": 13.471015946681664,
"learning_rate": 1.1800898479711293e-06,
"loss": 0.18934336304664612,
"step": 2324
},
{
"epoch": 2.3993808049535605,
"grad_norm": 11.525094925651421,
"learning_rate": 1.1762182275663387e-06,
"loss": 0.4728744626045227,
"step": 2325
},
{
"epoch": 2.4004127966976263,
"grad_norm": 10.72246303484596,
"learning_rate": 1.1723521218034006e-06,
"loss": 0.3724917769432068,
"step": 2326
},
{
"epoch": 2.4014447884416925,
"grad_norm": 12.89596258783316,
"learning_rate": 1.1684915362579951e-06,
"loss": 0.3012378215789795,
"step": 2327
},
{
"epoch": 2.4024767801857587,
"grad_norm": 7.97123740423598,
"learning_rate": 1.1646364764978468e-06,
"loss": 0.426517516374588,
"step": 2328
},
{
"epoch": 2.4035087719298245,
"grad_norm": 10.567269384597846,
"learning_rate": 1.1607869480827016e-06,
"loss": 0.8786559104919434,
"step": 2329
},
{
"epoch": 2.4045407636738907,
"grad_norm": 9.465822104953064,
"learning_rate": 1.1569429565643353e-06,
"loss": 0.27923858165740967,
"step": 2330
},
{
"epoch": 2.405572755417957,
"grad_norm": 8.941211304728357,
"learning_rate": 1.1531045074865355e-06,
"loss": 0.35923856496810913,
"step": 2331
},
{
"epoch": 2.4066047471620227,
"grad_norm": 8.354879524912997,
"learning_rate": 1.1492716063850973e-06,
"loss": 0.22897693514823914,
"step": 2332
},
{
"epoch": 2.407636738906089,
"grad_norm": 10.360519545976057,
"learning_rate": 1.1454442587878161e-06,
"loss": 1.8416224718093872,
"step": 2333
},
{
"epoch": 2.4086687306501546,
"grad_norm": 10.263453437778633,
"learning_rate": 1.1416224702144734e-06,
"loss": 0.19484372437000275,
"step": 2334
},
{
"epoch": 2.409700722394221,
"grad_norm": 12.678350979764224,
"learning_rate": 1.137806246176838e-06,
"loss": 0.3071631193161011,
"step": 2335
},
{
"epoch": 2.410732714138287,
"grad_norm": 8.604280482284869,
"learning_rate": 1.1339955921786504e-06,
"loss": 0.2432839423418045,
"step": 2336
},
{
"epoch": 2.411764705882353,
"grad_norm": 14.531747010640835,
"learning_rate": 1.1301905137156238e-06,
"loss": 0.3913341760635376,
"step": 2337
},
{
"epoch": 2.412796697626419,
"grad_norm": 15.11076558143236,
"learning_rate": 1.1263910162754222e-06,
"loss": 0.23445601761341095,
"step": 2338
},
{
"epoch": 2.4138286893704852,
"grad_norm": 8.677501081099685,
"learning_rate": 1.1225971053376661e-06,
"loss": 0.273960679769516,
"step": 2339
},
{
"epoch": 2.414860681114551,
"grad_norm": 8.861797371537126,
"learning_rate": 1.1188087863739173e-06,
"loss": 0.5194929242134094,
"step": 2340
},
{
"epoch": 2.415892672858617,
"grad_norm": 9.66066843155121,
"learning_rate": 1.1150260648476742e-06,
"loss": 0.6325628757476807,
"step": 2341
},
{
"epoch": 2.416924664602683,
"grad_norm": 9.558262645529188,
"learning_rate": 1.1112489462143622e-06,
"loss": 0.6525687575340271,
"step": 2342
},
{
"epoch": 2.417956656346749,
"grad_norm": 26.355092347093272,
"learning_rate": 1.1074774359213236e-06,
"loss": 1.1010922193527222,
"step": 2343
},
{
"epoch": 2.4189886480908154,
"grad_norm": 22.52338795222645,
"learning_rate": 1.1037115394078162e-06,
"loss": 0.6247187852859497,
"step": 2344
},
{
"epoch": 2.420020639834881,
"grad_norm": 28.624581804368976,
"learning_rate": 1.0999512621049991e-06,
"loss": 1.2945120334625244,
"step": 2345
},
{
"epoch": 2.4210526315789473,
"grad_norm": 13.87565959898355,
"learning_rate": 1.096196609435929e-06,
"loss": 0.8663886785507202,
"step": 2346
},
{
"epoch": 2.4220846233230136,
"grad_norm": 18.65055265213526,
"learning_rate": 1.0924475868155493e-06,
"loss": 0.36518436670303345,
"step": 2347
},
{
"epoch": 2.4231166150670793,
"grad_norm": 7.921531494445021,
"learning_rate": 1.0887041996506858e-06,
"loss": 0.24292084574699402,
"step": 2348
},
{
"epoch": 2.4241486068111455,
"grad_norm": 17.719547304099176,
"learning_rate": 1.084966453340034e-06,
"loss": 1.2447023391723633,
"step": 2349
},
{
"epoch": 2.4251805985552117,
"grad_norm": 12.712028230215658,
"learning_rate": 1.081234353274157e-06,
"loss": 0.37312689423561096,
"step": 2350
},
{
"epoch": 2.4262125902992775,
"grad_norm": 13.940826103948076,
"learning_rate": 1.0775079048354736e-06,
"loss": 0.4350719153881073,
"step": 2351
},
{
"epoch": 2.4272445820433437,
"grad_norm": 10.100638431884445,
"learning_rate": 1.0737871133982524e-06,
"loss": 0.9279487729072571,
"step": 2352
},
{
"epoch": 2.42827657378741,
"grad_norm": 6.804514506619492,
"learning_rate": 1.070071984328601e-06,
"loss": 0.40028977394104004,
"step": 2353
},
{
"epoch": 2.4293085655314757,
"grad_norm": 17.521722269336678,
"learning_rate": 1.0663625229844643e-06,
"loss": 0.5591195225715637,
"step": 2354
},
{
"epoch": 2.430340557275542,
"grad_norm": 11.965660390374483,
"learning_rate": 1.0626587347156097e-06,
"loss": 0.7699227333068848,
"step": 2355
},
{
"epoch": 2.431372549019608,
"grad_norm": 9.677677366026419,
"learning_rate": 1.0589606248636291e-06,
"loss": 0.796596109867096,
"step": 2356
},
{
"epoch": 2.432404540763674,
"grad_norm": 15.7077268294847,
"learning_rate": 1.055268198761918e-06,
"loss": 0.35285720229148865,
"step": 2357
},
{
"epoch": 2.43343653250774,
"grad_norm": 11.019236024085936,
"learning_rate": 1.0515814617356773e-06,
"loss": 0.3330291509628296,
"step": 2358
},
{
"epoch": 2.434468524251806,
"grad_norm": 9.845431324155186,
"learning_rate": 1.0479004191019043e-06,
"loss": 0.35188984870910645,
"step": 2359
},
{
"epoch": 2.435500515995872,
"grad_norm": 10.27888078280417,
"learning_rate": 1.0442250761693829e-06,
"loss": 0.42126739025115967,
"step": 2360
},
{
"epoch": 2.4365325077399382,
"grad_norm": 9.77599850425449,
"learning_rate": 1.040555438238679e-06,
"loss": 0.300399512052536,
"step": 2361
},
{
"epoch": 2.437564499484004,
"grad_norm": 7.193000367836019,
"learning_rate": 1.0368915106021255e-06,
"loss": 0.6267426013946533,
"step": 2362
},
{
"epoch": 2.43859649122807,
"grad_norm": 8.350912664417,
"learning_rate": 1.0332332985438248e-06,
"loss": 0.43233785033226013,
"step": 2363
},
{
"epoch": 2.4396284829721364,
"grad_norm": 7.908070153243322,
"learning_rate": 1.0295808073396352e-06,
"loss": 0.28449347615242004,
"step": 2364
},
{
"epoch": 2.440660474716202,
"grad_norm": 9.445311334028778,
"learning_rate": 1.0259340422571635e-06,
"loss": 0.6865044832229614,
"step": 2365
},
{
"epoch": 2.4416924664602684,
"grad_norm": 9.637391373408922,
"learning_rate": 1.0222930085557593e-06,
"loss": 0.27788588404655457,
"step": 2366
},
{
"epoch": 2.442724458204334,
"grad_norm": 11.592538564975873,
"learning_rate": 1.0186577114865053e-06,
"loss": 0.5116929411888123,
"step": 2367
},
{
"epoch": 2.4437564499484004,
"grad_norm": 13.014236028703879,
"learning_rate": 1.015028156292212e-06,
"loss": 0.7098373770713806,
"step": 2368
},
{
"epoch": 2.4447884416924666,
"grad_norm": 9.186002802022488,
"learning_rate": 1.0114043482074088e-06,
"loss": 0.5690082311630249,
"step": 2369
},
{
"epoch": 2.4458204334365323,
"grad_norm": 9.488909720476801,
"learning_rate": 1.0077862924583354e-06,
"loss": 0.3127894401550293,
"step": 2370
},
{
"epoch": 2.4468524251805985,
"grad_norm": 8.92075291296032,
"learning_rate": 1.0041739942629387e-06,
"loss": 0.2941093444824219,
"step": 2371
},
{
"epoch": 2.4478844169246647,
"grad_norm": 21.400702288335943,
"learning_rate": 1.0005674588308566e-06,
"loss": 0.615723192691803,
"step": 2372
},
{
"epoch": 2.4489164086687305,
"grad_norm": 16.998484336603784,
"learning_rate": 9.969666913634207e-07,
"loss": 0.33745965361595154,
"step": 2373
},
{
"epoch": 2.4499484004127967,
"grad_norm": 9.728842337723819,
"learning_rate": 9.933716970536428e-07,
"loss": 0.36395740509033203,
"step": 2374
},
{
"epoch": 2.450980392156863,
"grad_norm": 18.40886473242549,
"learning_rate": 9.897824810862084e-07,
"loss": 0.3619149923324585,
"step": 2375
},
{
"epoch": 2.4520123839009287,
"grad_norm": 39.54015632713799,
"learning_rate": 9.861990486374695e-07,
"loss": 0.6555899381637573,
"step": 2376
},
{
"epoch": 2.453044375644995,
"grad_norm": 11.682157076951567,
"learning_rate": 9.826214048754368e-07,
"loss": 0.40092623233795166,
"step": 2377
},
{
"epoch": 2.454076367389061,
"grad_norm": 9.362559088256313,
"learning_rate": 9.790495549597733e-07,
"loss": 0.18525682389736176,
"step": 2378
},
{
"epoch": 2.455108359133127,
"grad_norm": 7.775023798181715,
"learning_rate": 9.75483504041786e-07,
"loss": 0.3614833950996399,
"step": 2379
},
{
"epoch": 2.456140350877193,
"grad_norm": 14.837476186767415,
"learning_rate": 9.719232572644189e-07,
"loss": 0.9362202286720276,
"step": 2380
},
{
"epoch": 2.4571723426212593,
"grad_norm": 11.002975691995825,
"learning_rate": 9.683688197622432e-07,
"loss": 0.47554850578308105,
"step": 2381
},
{
"epoch": 2.458204334365325,
"grad_norm": 13.068756446775595,
"learning_rate": 9.648201966614546e-07,
"loss": 0.4872112572193146,
"step": 2382
},
{
"epoch": 2.4592363261093912,
"grad_norm": 11.014793773785462,
"learning_rate": 9.61277393079862e-07,
"loss": 0.2566531002521515,
"step": 2383
},
{
"epoch": 2.460268317853457,
"grad_norm": 12.206664490362016,
"learning_rate": 9.577404141268815e-07,
"loss": 0.4083847403526306,
"step": 2384
},
{
"epoch": 2.461300309597523,
"grad_norm": 8.639693634651767,
"learning_rate": 9.542092649035295e-07,
"loss": 0.26377323269844055,
"step": 2385
},
{
"epoch": 2.4623323013415894,
"grad_norm": 11.586254114186088,
"learning_rate": 9.506839505024146e-07,
"loss": 0.6863972544670105,
"step": 2386
},
{
"epoch": 2.463364293085655,
"grad_norm": 8.17869956434473,
"learning_rate": 9.471644760077297e-07,
"loss": 0.5492748618125916,
"step": 2387
},
{
"epoch": 2.4643962848297214,
"grad_norm": 7.728135075325033,
"learning_rate": 9.436508464952471e-07,
"loss": 0.257376492023468,
"step": 2388
},
{
"epoch": 2.4654282765737876,
"grad_norm": 13.235499955162616,
"learning_rate": 9.40143067032307e-07,
"loss": 2.1276726722717285,
"step": 2389
},
{
"epoch": 2.4664602683178534,
"grad_norm": 11.469789882583859,
"learning_rate": 9.366411426778165e-07,
"loss": 0.5650811791419983,
"step": 2390
},
{
"epoch": 2.4674922600619196,
"grad_norm": 14.094658908361136,
"learning_rate": 9.331450784822326e-07,
"loss": 0.35821136832237244,
"step": 2391
},
{
"epoch": 2.4685242518059853,
"grad_norm": 11.027558569826938,
"learning_rate": 9.296548794875659e-07,
"loss": 0.49398985505104065,
"step": 2392
},
{
"epoch": 2.4695562435500515,
"grad_norm": 13.783391565891415,
"learning_rate": 9.261705507273666e-07,
"loss": 0.7277883291244507,
"step": 2393
},
{
"epoch": 2.4705882352941178,
"grad_norm": 8.55942333562643,
"learning_rate": 9.22692097226719e-07,
"loss": 0.5172953605651855,
"step": 2394
},
{
"epoch": 2.4716202270381835,
"grad_norm": 11.393872910806078,
"learning_rate": 9.19219524002234e-07,
"loss": 0.3203275203704834,
"step": 2395
},
{
"epoch": 2.4726522187822497,
"grad_norm": 9.980558628262713,
"learning_rate": 9.157528360620416e-07,
"loss": 0.26650774478912354,
"step": 2396
},
{
"epoch": 2.473684210526316,
"grad_norm": 29.038587672697723,
"learning_rate": 9.122920384057849e-07,
"loss": 0.9730219841003418,
"step": 2397
},
{
"epoch": 2.4747162022703817,
"grad_norm": 7.0552253170083015,
"learning_rate": 9.088371360246107e-07,
"loss": 0.24205099046230316,
"step": 2398
},
{
"epoch": 2.475748194014448,
"grad_norm": 9.612073593825405,
"learning_rate": 9.053881339011672e-07,
"loss": 0.5207908153533936,
"step": 2399
},
{
"epoch": 2.476780185758514,
"grad_norm": 8.051099464493184,
"learning_rate": 9.019450370095867e-07,
"loss": 0.27800866961479187,
"step": 2400
},
{
"epoch": 2.47781217750258,
"grad_norm": 9.826849297611608,
"learning_rate": 8.985078503154914e-07,
"loss": 0.2225874662399292,
"step": 2401
},
{
"epoch": 2.478844169246646,
"grad_norm": 12.338649401779252,
"learning_rate": 8.950765787759769e-07,
"loss": 0.18932898342609406,
"step": 2402
},
{
"epoch": 2.4798761609907123,
"grad_norm": 8.506983689882658,
"learning_rate": 8.916512273396078e-07,
"loss": 0.4070678949356079,
"step": 2403
},
{
"epoch": 2.480908152734778,
"grad_norm": 12.940466747256666,
"learning_rate": 8.882318009464124e-07,
"loss": 0.37709781527519226,
"step": 2404
},
{
"epoch": 2.4819401444788443,
"grad_norm": 7.538269987278947,
"learning_rate": 8.848183045278729e-07,
"loss": 0.1588832139968872,
"step": 2405
},
{
"epoch": 2.4829721362229105,
"grad_norm": 7.4930689254547005,
"learning_rate": 8.814107430069185e-07,
"loss": 0.5599884986877441,
"step": 2406
},
{
"epoch": 2.4840041279669762,
"grad_norm": 13.417182779620711,
"learning_rate": 8.780091212979208e-07,
"loss": 0.7557123899459839,
"step": 2407
},
{
"epoch": 2.4850361197110424,
"grad_norm": 5.534363337722231,
"learning_rate": 8.74613444306684e-07,
"loss": 0.07814184576272964,
"step": 2408
},
{
"epoch": 2.486068111455108,
"grad_norm": 9.35144847838186,
"learning_rate": 8.712237169304394e-07,
"loss": 0.20516231656074524,
"step": 2409
},
{
"epoch": 2.4871001031991744,
"grad_norm": 8.075249750271306,
"learning_rate": 8.678399440578367e-07,
"loss": 0.2661157250404358,
"step": 2410
},
{
"epoch": 2.4881320949432406,
"grad_norm": 13.388476095299996,
"learning_rate": 8.644621305689383e-07,
"loss": 0.29081588983535767,
"step": 2411
},
{
"epoch": 2.4891640866873064,
"grad_norm": 9.58389049030021,
"learning_rate": 8.61090281335214e-07,
"loss": 0.3577726483345032,
"step": 2412
},
{
"epoch": 2.4901960784313726,
"grad_norm": 8.567604008779373,
"learning_rate": 8.577244012195291e-07,
"loss": 0.6649857759475708,
"step": 2413
},
{
"epoch": 2.4912280701754383,
"grad_norm": 8.7965865174537,
"learning_rate": 8.543644950761426e-07,
"loss": 0.3712612986564636,
"step": 2414
},
{
"epoch": 2.4922600619195046,
"grad_norm": 7.005903539625946,
"learning_rate": 8.510105677506964e-07,
"loss": 0.10786724090576172,
"step": 2415
},
{
"epoch": 2.4932920536635708,
"grad_norm": 9.08726282307587,
"learning_rate": 8.476626240802099e-07,
"loss": 0.5389184355735779,
"step": 2416
},
{
"epoch": 2.4943240454076365,
"grad_norm": 12.889574549268769,
"learning_rate": 8.443206688930744e-07,
"loss": 0.4571652114391327,
"step": 2417
},
{
"epoch": 2.4953560371517027,
"grad_norm": 9.210834471883407,
"learning_rate": 8.409847070090437e-07,
"loss": 0.5685957074165344,
"step": 2418
},
{
"epoch": 2.496388028895769,
"grad_norm": 10.263316886694389,
"learning_rate": 8.376547432392262e-07,
"loss": 0.6476658582687378,
"step": 2419
},
{
"epoch": 2.4974200206398347,
"grad_norm": 17.78802954389812,
"learning_rate": 8.343307823860819e-07,
"loss": 0.5561540126800537,
"step": 2420
},
{
"epoch": 2.498452012383901,
"grad_norm": 8.664026801032575,
"learning_rate": 8.310128292434139e-07,
"loss": 0.2646542191505432,
"step": 2421
},
{
"epoch": 2.499484004127967,
"grad_norm": 11.559484893196826,
"learning_rate": 8.277008885963594e-07,
"loss": 0.44151124358177185,
"step": 2422
},
{
"epoch": 2.500515995872033,
"grad_norm": 9.83190452333222,
"learning_rate": 8.243949652213862e-07,
"loss": 0.41169655323028564,
"step": 2423
},
{
"epoch": 2.501547987616099,
"grad_norm": 10.979346072111683,
"learning_rate": 8.210950638862813e-07,
"loss": 0.31727972626686096,
"step": 2424
},
{
"epoch": 2.5025799793601653,
"grad_norm": 11.557106442230344,
"learning_rate": 8.178011893501498e-07,
"loss": 0.29078972339630127,
"step": 2425
},
{
"epoch": 2.503611971104231,
"grad_norm": 9.674782624549838,
"learning_rate": 8.145133463634031e-07,
"loss": 0.9324804544448853,
"step": 2426
},
{
"epoch": 2.5046439628482973,
"grad_norm": 14.034812449654558,
"learning_rate": 8.112315396677561e-07,
"loss": 0.7548234462738037,
"step": 2427
},
{
"epoch": 2.5056759545923635,
"grad_norm": 13.9406219318329,
"learning_rate": 8.079557739962129e-07,
"loss": 0.3924209475517273,
"step": 2428
},
{
"epoch": 2.5067079463364292,
"grad_norm": 14.264471133053592,
"learning_rate": 8.046860540730711e-07,
"loss": 0.23829391598701477,
"step": 2429
},
{
"epoch": 2.5077399380804954,
"grad_norm": 9.555475674044061,
"learning_rate": 8.014223846139069e-07,
"loss": 0.27250006794929504,
"step": 2430
},
{
"epoch": 2.5087719298245617,
"grad_norm": 11.176391033621893,
"learning_rate": 7.981647703255702e-07,
"loss": 0.41484761238098145,
"step": 2431
},
{
"epoch": 2.5098039215686274,
"grad_norm": 6.698502697538434,
"learning_rate": 7.949132159061784e-07,
"loss": 0.2051486372947693,
"step": 2432
},
{
"epoch": 2.5108359133126936,
"grad_norm": 13.280473592235404,
"learning_rate": 7.916677260451095e-07,
"loss": 0.417132169008255,
"step": 2433
},
{
"epoch": 2.5118679050567594,
"grad_norm": 10.079119541318564,
"learning_rate": 7.884283054229958e-07,
"loss": 0.27953770756721497,
"step": 2434
},
{
"epoch": 2.5128998968008256,
"grad_norm": 9.000723058812289,
"learning_rate": 7.851949587117152e-07,
"loss": 0.5249171853065491,
"step": 2435
},
{
"epoch": 2.513931888544892,
"grad_norm": 10.290660728328614,
"learning_rate": 7.819676905743872e-07,
"loss": 0.31173601746559143,
"step": 2436
},
{
"epoch": 2.5149638802889576,
"grad_norm": 11.538047770457174,
"learning_rate": 7.787465056653653e-07,
"loss": 0.6719921827316284,
"step": 2437
},
{
"epoch": 2.5159958720330238,
"grad_norm": 9.22266151911703,
"learning_rate": 7.755314086302257e-07,
"loss": 0.44996383786201477,
"step": 2438
},
{
"epoch": 2.5170278637770895,
"grad_norm": 7.624242369239375,
"learning_rate": 7.723224041057697e-07,
"loss": 0.26270487904548645,
"step": 2439
},
{
"epoch": 2.5180598555211557,
"grad_norm": 16.048347022232104,
"learning_rate": 7.691194967200099e-07,
"loss": 0.19687271118164062,
"step": 2440
},
{
"epoch": 2.519091847265222,
"grad_norm": 13.4497672853125,
"learning_rate": 7.659226910921652e-07,
"loss": 0.553430438041687,
"step": 2441
},
{
"epoch": 2.5201238390092877,
"grad_norm": 11.394367224771276,
"learning_rate": 7.627319918326559e-07,
"loss": 0.7229731678962708,
"step": 2442
},
{
"epoch": 2.521155830753354,
"grad_norm": 7.754596846111269,
"learning_rate": 7.595474035430944e-07,
"loss": 0.2186160832643509,
"step": 2443
},
{
"epoch": 2.52218782249742,
"grad_norm": 12.784166446936656,
"learning_rate": 7.563689308162803e-07,
"loss": 0.5006595849990845,
"step": 2444
},
{
"epoch": 2.523219814241486,
"grad_norm": 20.0431695330293,
"learning_rate": 7.531965782361939e-07,
"loss": 1.1238250732421875,
"step": 2445
},
{
"epoch": 2.524251805985552,
"grad_norm": 15.706339089105576,
"learning_rate": 7.500303503779898e-07,
"loss": 0.33517026901245117,
"step": 2446
},
{
"epoch": 2.5252837977296183,
"grad_norm": 11.617057309870031,
"learning_rate": 7.468702518079857e-07,
"loss": 0.3896215856075287,
"step": 2447
},
{
"epoch": 2.526315789473684,
"grad_norm": 19.340965941158494,
"learning_rate": 7.43716287083664e-07,
"loss": 1.8497036695480347,
"step": 2448
},
{
"epoch": 2.5273477812177503,
"grad_norm": 15.18705838277447,
"learning_rate": 7.405684607536584e-07,
"loss": 1.9955216646194458,
"step": 2449
},
{
"epoch": 2.5283797729618165,
"grad_norm": 10.877736708597599,
"learning_rate": 7.374267773577515e-07,
"loss": 1.1957073211669922,
"step": 2450
},
{
"epoch": 2.5294117647058822,
"grad_norm": 30.178788325484387,
"learning_rate": 7.342912414268654e-07,
"loss": 1.469434142112732,
"step": 2451
},
{
"epoch": 2.5304437564499485,
"grad_norm": 9.400424513052604,
"learning_rate": 7.31161857483057e-07,
"loss": 0.32218417525291443,
"step": 2452
},
{
"epoch": 2.5314757481940147,
"grad_norm": 9.012375664833659,
"learning_rate": 7.280386300395104e-07,
"loss": 0.22649557888507843,
"step": 2453
},
{
"epoch": 2.5325077399380804,
"grad_norm": 11.655272587248323,
"learning_rate": 7.249215636005308e-07,
"loss": 0.49087798595428467,
"step": 2454
},
{
"epoch": 2.5335397316821466,
"grad_norm": 10.896749070097966,
"learning_rate": 7.218106626615384e-07,
"loss": 0.472625732421875,
"step": 2455
},
{
"epoch": 2.534571723426213,
"grad_norm": 10.457972855615788,
"learning_rate": 7.187059317090622e-07,
"loss": 0.6984173059463501,
"step": 2456
},
{
"epoch": 2.5356037151702786,
"grad_norm": 14.40471006951635,
"learning_rate": 7.156073752207304e-07,
"loss": 0.8018549680709839,
"step": 2457
},
{
"epoch": 2.536635706914345,
"grad_norm": 14.039618022353128,
"learning_rate": 7.125149976652684e-07,
"loss": 0.38079196214675903,
"step": 2458
},
{
"epoch": 2.5376676986584106,
"grad_norm": 13.56849882237949,
"learning_rate": 7.094288035024905e-07,
"loss": 0.3471815586090088,
"step": 2459
},
{
"epoch": 2.538699690402477,
"grad_norm": 6.644592311169667,
"learning_rate": 7.063487971832922e-07,
"loss": 0.26021164655685425,
"step": 2460
},
{
"epoch": 2.539731682146543,
"grad_norm": 18.06651652107949,
"learning_rate": 7.032749831496466e-07,
"loss": 0.4367244839668274,
"step": 2461
},
{
"epoch": 2.5407636738906088,
"grad_norm": 10.006337776595991,
"learning_rate": 7.002073658345943e-07,
"loss": 0.3738947808742523,
"step": 2462
},
{
"epoch": 2.541795665634675,
"grad_norm": 11.307412438943082,
"learning_rate": 6.971459496622401e-07,
"loss": 0.2714172899723053,
"step": 2463
},
{
"epoch": 2.5428276573787407,
"grad_norm": 9.942800760696215,
"learning_rate": 6.940907390477458e-07,
"loss": 0.2196260392665863,
"step": 2464
},
{
"epoch": 2.543859649122807,
"grad_norm": 12.911374595634284,
"learning_rate": 6.910417383973244e-07,
"loss": 0.5117729902267456,
"step": 2465
},
{
"epoch": 2.544891640866873,
"grad_norm": 15.765117131752072,
"learning_rate": 6.879989521082292e-07,
"loss": 0.5069431662559509,
"step": 2466
},
{
"epoch": 2.545923632610939,
"grad_norm": 10.612093959667995,
"learning_rate": 6.849623845687547e-07,
"loss": 0.5721427798271179,
"step": 2467
},
{
"epoch": 2.546955624355005,
"grad_norm": 16.032074380217704,
"learning_rate": 6.819320401582258e-07,
"loss": 0.4624538719654083,
"step": 2468
},
{
"epoch": 2.5479876160990713,
"grad_norm": 12.54192301084899,
"learning_rate": 6.789079232469925e-07,
"loss": 0.606447696685791,
"step": 2469
},
{
"epoch": 2.549019607843137,
"grad_norm": 7.463315600117024,
"learning_rate": 6.758900381964228e-07,
"loss": 0.2632223069667816,
"step": 2470
},
{
"epoch": 2.5500515995872033,
"grad_norm": 10.988763485820304,
"learning_rate": 6.728783893588986e-07,
"loss": 0.6660168766975403,
"step": 2471
},
{
"epoch": 2.5510835913312695,
"grad_norm": 8.09866210736358,
"learning_rate": 6.698729810778065e-07,
"loss": 0.35602515935897827,
"step": 2472
},
{
"epoch": 2.5521155830753353,
"grad_norm": 9.982111656825898,
"learning_rate": 6.668738176875339e-07,
"loss": 0.3641916513442993,
"step": 2473
},
{
"epoch": 2.5531475748194015,
"grad_norm": 7.348812433182777,
"learning_rate": 6.638809035134614e-07,
"loss": 0.18170510232448578,
"step": 2474
},
{
"epoch": 2.5541795665634677,
"grad_norm": 5.66582504929301,
"learning_rate": 6.608942428719583e-07,
"loss": 0.19020238518714905,
"step": 2475
},
{
"epoch": 2.5552115583075334,
"grad_norm": 10.64433572809659,
"learning_rate": 6.579138400703716e-07,
"loss": 0.6588963866233826,
"step": 2476
},
{
"epoch": 2.5562435500515996,
"grad_norm": 11.455825600050641,
"learning_rate": 6.549396994070262e-07,
"loss": 0.41902226209640503,
"step": 2477
},
{
"epoch": 2.557275541795666,
"grad_norm": 8.586632321929025,
"learning_rate": 6.519718251712159e-07,
"loss": 0.31348979473114014,
"step": 2478
},
{
"epoch": 2.5583075335397316,
"grad_norm": 9.056130934377439,
"learning_rate": 6.490102216431964e-07,
"loss": 0.21359741687774658,
"step": 2479
},
{
"epoch": 2.559339525283798,
"grad_norm": 8.46713170840959,
"learning_rate": 6.460548930941801e-07,
"loss": 0.1758163869380951,
"step": 2480
},
{
"epoch": 2.560371517027864,
"grad_norm": 8.635977324174299,
"learning_rate": 6.431058437863269e-07,
"loss": 0.2282043695449829,
"step": 2481
},
{
"epoch": 2.56140350877193,
"grad_norm": 17.59283103512317,
"learning_rate": 6.401630779727453e-07,
"loss": 1.196661114692688,
"step": 2482
},
{
"epoch": 2.562435500515996,
"grad_norm": 15.598402324436318,
"learning_rate": 6.372265998974797e-07,
"loss": 0.5663949847221375,
"step": 2483
},
{
"epoch": 2.5634674922600618,
"grad_norm": 8.688801397974835,
"learning_rate": 6.342964137955071e-07,
"loss": 0.29304569959640503,
"step": 2484
},
{
"epoch": 2.564499484004128,
"grad_norm": 11.251060722008908,
"learning_rate": 6.313725238927271e-07,
"loss": 0.4160574972629547,
"step": 2485
},
{
"epoch": 2.565531475748194,
"grad_norm": 12.134033946614794,
"learning_rate": 6.28454934405962e-07,
"loss": 0.7963811159133911,
"step": 2486
},
{
"epoch": 2.56656346749226,
"grad_norm": 9.34676118545788,
"learning_rate": 6.255436495429478e-07,
"loss": 0.2370861917734146,
"step": 2487
},
{
"epoch": 2.567595459236326,
"grad_norm": 12.439532661969023,
"learning_rate": 6.226386735023271e-07,
"loss": 0.3600406348705292,
"step": 2488
},
{
"epoch": 2.568627450980392,
"grad_norm": 11.858949836866532,
"learning_rate": 6.197400104736439e-07,
"loss": 0.4274609088897705,
"step": 2489
},
{
"epoch": 2.569659442724458,
"grad_norm": 7.485843655541987,
"learning_rate": 6.168476646373372e-07,
"loss": 0.35645705461502075,
"step": 2490
},
{
"epoch": 2.5706914344685243,
"grad_norm": 10.876910518645126,
"learning_rate": 6.139616401647364e-07,
"loss": 0.658128559589386,
"step": 2491
},
{
"epoch": 2.57172342621259,
"grad_norm": 11.220677018841794,
"learning_rate": 6.110819412180535e-07,
"loss": 0.28535163402557373,
"step": 2492
},
{
"epoch": 2.5727554179566563,
"grad_norm": 8.449879516914931,
"learning_rate": 6.082085719503788e-07,
"loss": 0.2511385679244995,
"step": 2493
},
{
"epoch": 2.5737874097007225,
"grad_norm": 9.277463295533472,
"learning_rate": 6.053415365056731e-07,
"loss": 0.48958778381347656,
"step": 2494
},
{
"epoch": 2.5748194014447883,
"grad_norm": 11.781584043356633,
"learning_rate": 6.02480839018762e-07,
"loss": 0.39071589708328247,
"step": 2495
},
{
"epoch": 2.5758513931888545,
"grad_norm": 10.71000621503098,
"learning_rate": 5.99626483615331e-07,
"loss": 0.37455040216445923,
"step": 2496
},
{
"epoch": 2.5768833849329207,
"grad_norm": 9.84869526655172,
"learning_rate": 5.967784744119204e-07,
"loss": 0.5807996988296509,
"step": 2497
},
{
"epoch": 2.5779153766769864,
"grad_norm": 13.933072369238419,
"learning_rate": 5.939368155159164e-07,
"loss": 0.25031518936157227,
"step": 2498
},
{
"epoch": 2.5789473684210527,
"grad_norm": 8.586408084297304,
"learning_rate": 5.911015110255492e-07,
"loss": 0.4800521731376648,
"step": 2499
},
{
"epoch": 2.579979360165119,
"grad_norm": 8.09655300736004,
"learning_rate": 5.882725650298787e-07,
"loss": 0.20928475260734558,
"step": 2500
},
{
"epoch": 2.5810113519091846,
"grad_norm": 14.6859228627622,
"learning_rate": 5.854499816088027e-07,
"loss": 0.39752423763275146,
"step": 2501
},
{
"epoch": 2.582043343653251,
"grad_norm": 12.158917900342898,
"learning_rate": 5.826337648330377e-07,
"loss": 0.4349973797798157,
"step": 2502
},
{
"epoch": 2.583075335397317,
"grad_norm": 9.681776111710935,
"learning_rate": 5.798239187641208e-07,
"loss": 0.45683524012565613,
"step": 2503
},
{
"epoch": 2.584107327141383,
"grad_norm": 8.787952484909313,
"learning_rate": 5.770204474543978e-07,
"loss": 0.3035852313041687,
"step": 2504
},
{
"epoch": 2.585139318885449,
"grad_norm": 6.630879856298401,
"learning_rate": 5.742233549470239e-07,
"loss": 0.3834453225135803,
"step": 2505
},
{
"epoch": 2.586171310629515,
"grad_norm": 8.001953975708291,
"learning_rate": 5.71432645275955e-07,
"loss": 0.8652332425117493,
"step": 2506
},
{
"epoch": 2.587203302373581,
"grad_norm": 18.533290450557388,
"learning_rate": 5.6864832246594e-07,
"loss": 0.4923500120639801,
"step": 2507
},
{
"epoch": 2.588235294117647,
"grad_norm": 6.044956146468534,
"learning_rate": 5.658703905325186e-07,
"loss": 0.3429776430130005,
"step": 2508
},
{
"epoch": 2.589267285861713,
"grad_norm": 12.613913614264515,
"learning_rate": 5.630988534820097e-07,
"loss": 0.34255480766296387,
"step": 2509
},
{
"epoch": 2.590299277605779,
"grad_norm": 9.33459919728904,
"learning_rate": 5.603337153115145e-07,
"loss": 0.2871645390987396,
"step": 2510
},
{
"epoch": 2.5913312693498454,
"grad_norm": 8.145714999328998,
"learning_rate": 5.575749800089036e-07,
"loss": 0.2089812457561493,
"step": 2511
},
{
"epoch": 2.592363261093911,
"grad_norm": 8.6645424037609,
"learning_rate": 5.548226515528133e-07,
"loss": 0.505244255065918,
"step": 2512
},
{
"epoch": 2.5933952528379773,
"grad_norm": 7.017950431806059,
"learning_rate": 5.520767339126398e-07,
"loss": 0.23949137330055237,
"step": 2513
},
{
"epoch": 2.594427244582043,
"grad_norm": 9.882265890708554,
"learning_rate": 5.493372310485329e-07,
"loss": 0.4443948268890381,
"step": 2514
},
{
"epoch": 2.5954592363261093,
"grad_norm": 10.91476539486523,
"learning_rate": 5.466041469113925e-07,
"loss": 0.5866333246231079,
"step": 2515
},
{
"epoch": 2.5964912280701755,
"grad_norm": 13.318863757727463,
"learning_rate": 5.438774854428614e-07,
"loss": 0.330798864364624,
"step": 2516
},
{
"epoch": 2.5975232198142413,
"grad_norm": 10.171733346667224,
"learning_rate": 5.411572505753193e-07,
"loss": 0.43884724378585815,
"step": 2517
},
{
"epoch": 2.5985552115583075,
"grad_norm": 11.893739777185429,
"learning_rate": 5.384434462318778e-07,
"loss": 1.344459891319275,
"step": 2518
},
{
"epoch": 2.5995872033023737,
"grad_norm": 16.36984848554477,
"learning_rate": 5.357360763263713e-07,
"loss": 0.9081894755363464,
"step": 2519
},
{
"epoch": 2.6006191950464395,
"grad_norm": 14.972619594796644,
"learning_rate": 5.330351447633603e-07,
"loss": 0.2988734841346741,
"step": 2520
},
{
"epoch": 2.6016511867905057,
"grad_norm": 9.481854122496655,
"learning_rate": 5.303406554381157e-07,
"loss": 0.6907855868339539,
"step": 2521
},
{
"epoch": 2.602683178534572,
"grad_norm": 8.798461888110825,
"learning_rate": 5.276526122366194e-07,
"loss": 0.9872853755950928,
"step": 2522
},
{
"epoch": 2.6037151702786376,
"grad_norm": 11.330081466508329,
"learning_rate": 5.249710190355545e-07,
"loss": 0.5052847266197205,
"step": 2523
},
{
"epoch": 2.604747162022704,
"grad_norm": 16.83636731445847,
"learning_rate": 5.222958797023036e-07,
"loss": 0.49554190039634705,
"step": 2524
},
{
"epoch": 2.60577915376677,
"grad_norm": 5.684493515055032,
"learning_rate": 5.196271980949419e-07,
"loss": 0.15073411166667938,
"step": 2525
},
{
"epoch": 2.606811145510836,
"grad_norm": 10.619974690207405,
"learning_rate": 5.169649780622304e-07,
"loss": 0.40889978408813477,
"step": 2526
},
{
"epoch": 2.607843137254902,
"grad_norm": 6.075467322496469,
"learning_rate": 5.143092234436125e-07,
"loss": 0.18674173951148987,
"step": 2527
},
{
"epoch": 2.6088751289989682,
"grad_norm": 14.683336159569796,
"learning_rate": 5.11659938069205e-07,
"loss": 0.8641536235809326,
"step": 2528
},
{
"epoch": 2.609907120743034,
"grad_norm": 8.275272204780919,
"learning_rate": 5.090171257597948e-07,
"loss": 0.2988804280757904,
"step": 2529
},
{
"epoch": 2.6109391124871,
"grad_norm": 7.632913465500986,
"learning_rate": 5.06380790326837e-07,
"loss": 0.18241772055625916,
"step": 2530
},
{
"epoch": 2.6119711042311664,
"grad_norm": 8.976502679663055,
"learning_rate": 5.037509355724429e-07,
"loss": 0.40648674964904785,
"step": 2531
},
{
"epoch": 2.613003095975232,
"grad_norm": 12.031314667668429,
"learning_rate": 5.011275652893782e-07,
"loss": 0.4976109564304352,
"step": 2532
},
{
"epoch": 2.6140350877192984,
"grad_norm": 10.559616396859743,
"learning_rate": 4.985106832610553e-07,
"loss": 0.29959365725517273,
"step": 2533
},
{
"epoch": 2.615067079463364,
"grad_norm": 10.244172209304915,
"learning_rate": 4.959002932615303e-07,
"loss": 0.5306459665298462,
"step": 2534
},
{
"epoch": 2.6160990712074303,
"grad_norm": 11.724042807932554,
"learning_rate": 4.932963990554974e-07,
"loss": 0.8250664472579956,
"step": 2535
},
{
"epoch": 2.617131062951496,
"grad_norm": 10.10555559418158,
"learning_rate": 4.906990043982813e-07,
"loss": 0.23426461219787598,
"step": 2536
},
{
"epoch": 2.6181630546955623,
"grad_norm": 10.423993619036137,
"learning_rate": 4.881081130358345e-07,
"loss": 0.523322582244873,
"step": 2537
},
{
"epoch": 2.6191950464396285,
"grad_norm": 14.41673356600909,
"learning_rate": 4.855237287047265e-07,
"loss": 0.2107163816690445,
"step": 2538
},
{
"epoch": 2.6202270381836943,
"grad_norm": 13.693033367636005,
"learning_rate": 4.829458551321492e-07,
"loss": 0.41339415311813354,
"step": 2539
},
{
"epoch": 2.6212590299277605,
"grad_norm": 10.235350260735638,
"learning_rate": 4.803744960358992e-07,
"loss": 0.3401373028755188,
"step": 2540
},
{
"epoch": 2.6222910216718267,
"grad_norm": 10.602668549796832,
"learning_rate": 4.77809655124381e-07,
"loss": 0.33104604482650757,
"step": 2541
},
{
"epoch": 2.6233230134158925,
"grad_norm": 9.639984300173323,
"learning_rate": 4.752513360965949e-07,
"loss": 0.38588908314704895,
"step": 2542
},
{
"epoch": 2.6243550051599587,
"grad_norm": 18.481304271361157,
"learning_rate": 4.7269954264213935e-07,
"loss": 0.5953255891799927,
"step": 2543
},
{
"epoch": 2.625386996904025,
"grad_norm": 12.791025971074705,
"learning_rate": 4.701542784411994e-07,
"loss": 0.28090372681617737,
"step": 2544
},
{
"epoch": 2.6264189886480906,
"grad_norm": 14.954317766614954,
"learning_rate": 4.676155471645444e-07,
"loss": 0.331378698348999,
"step": 2545
},
{
"epoch": 2.627450980392157,
"grad_norm": 12.171313532906659,
"learning_rate": 4.650833524735232e-07,
"loss": 0.5808216333389282,
"step": 2546
},
{
"epoch": 2.628482972136223,
"grad_norm": 6.875020329613904,
"learning_rate": 4.6255769802005414e-07,
"loss": 0.32516252994537354,
"step": 2547
},
{
"epoch": 2.629514963880289,
"grad_norm": 9.99065210903672,
"learning_rate": 4.6003858744662564e-07,
"loss": 0.327812135219574,
"step": 2548
},
{
"epoch": 2.630546955624355,
"grad_norm": 9.40256075754474,
"learning_rate": 4.5752602438628945e-07,
"loss": 0.268187016248703,
"step": 2549
},
{
"epoch": 2.6315789473684212,
"grad_norm": 11.367727046797691,
"learning_rate": 4.5502001246265416e-07,
"loss": 0.7534793615341187,
"step": 2550
},
{
"epoch": 2.632610939112487,
"grad_norm": 16.325306855068217,
"learning_rate": 4.5252055528987647e-07,
"loss": 0.7205969095230103,
"step": 2551
},
{
"epoch": 2.633642930856553,
"grad_norm": 11.0242810820087,
"learning_rate": 4.500276564726652e-07,
"loss": 0.6006823778152466,
"step": 2552
},
{
"epoch": 2.6346749226006194,
"grad_norm": 9.32383878761997,
"learning_rate": 4.4754131960626777e-07,
"loss": 0.4036358594894409,
"step": 2553
},
{
"epoch": 2.635706914344685,
"grad_norm": 10.861707031976824,
"learning_rate": 4.4506154827646917e-07,
"loss": 0.35124677419662476,
"step": 2554
},
{
"epoch": 2.6367389060887514,
"grad_norm": 20.241972042836696,
"learning_rate": 4.4258834605958424e-07,
"loss": 2.209158182144165,
"step": 2555
},
{
"epoch": 2.6377708978328176,
"grad_norm": 10.957452091178835,
"learning_rate": 4.401217165224564e-07,
"loss": 0.4837624430656433,
"step": 2556
},
{
"epoch": 2.6388028895768834,
"grad_norm": 9.37118737601346,
"learning_rate": 4.3766166322244505e-07,
"loss": 0.31439873576164246,
"step": 2557
},
{
"epoch": 2.6398348813209496,
"grad_norm": 20.670899732864804,
"learning_rate": 4.3520818970743174e-07,
"loss": 0.4805004596710205,
"step": 2558
},
{
"epoch": 2.6408668730650153,
"grad_norm": 10.230178996368872,
"learning_rate": 4.327612995158043e-07,
"loss": 0.5170422792434692,
"step": 2559
},
{
"epoch": 2.6418988648090815,
"grad_norm": 11.215599006098603,
"learning_rate": 4.3032099617645874e-07,
"loss": 0.43596765398979187,
"step": 2560
},
{
"epoch": 2.6429308565531473,
"grad_norm": 8.901642713887572,
"learning_rate": 4.2788728320878827e-07,
"loss": 0.3309241235256195,
"step": 2561
},
{
"epoch": 2.6439628482972135,
"grad_norm": 9.234533641791927,
"learning_rate": 4.254601641226835e-07,
"loss": 0.270923912525177,
"step": 2562
},
{
"epoch": 2.6449948400412797,
"grad_norm": 11.873686654548335,
"learning_rate": 4.230396424185268e-07,
"loss": 0.5812723636627197,
"step": 2563
},
{
"epoch": 2.6460268317853455,
"grad_norm": 9.747512038747255,
"learning_rate": 4.2062572158718284e-07,
"loss": 0.6770668625831604,
"step": 2564
},
{
"epoch": 2.6470588235294117,
"grad_norm": 7.510586607263941,
"learning_rate": 4.1821840510999965e-07,
"loss": 0.16891315579414368,
"step": 2565
},
{
"epoch": 2.648090815273478,
"grad_norm": 9.520517790823536,
"learning_rate": 4.1581769645879675e-07,
"loss": 0.6919821500778198,
"step": 2566
},
{
"epoch": 2.6491228070175437,
"grad_norm": 10.289191172399185,
"learning_rate": 4.134235990958668e-07,
"loss": 0.5486763715744019,
"step": 2567
},
{
"epoch": 2.65015479876161,
"grad_norm": 11.37490516967252,
"learning_rate": 4.1103611647396734e-07,
"loss": 0.6272962689399719,
"step": 2568
},
{
"epoch": 2.651186790505676,
"grad_norm": 10.000458568771393,
"learning_rate": 4.0865525203631626e-07,
"loss": 0.21195337176322937,
"step": 2569
},
{
"epoch": 2.652218782249742,
"grad_norm": 8.026791633329674,
"learning_rate": 4.0628100921658475e-07,
"loss": 0.2913268208503723,
"step": 2570
},
{
"epoch": 2.653250773993808,
"grad_norm": 8.514124896425205,
"learning_rate": 4.039133914388965e-07,
"loss": 0.7255011796951294,
"step": 2571
},
{
"epoch": 2.6542827657378743,
"grad_norm": 11.993469857334887,
"learning_rate": 4.0155240211781966e-07,
"loss": 0.3760417401790619,
"step": 2572
},
{
"epoch": 2.65531475748194,
"grad_norm": 9.849969686885421,
"learning_rate": 3.9919804465836263e-07,
"loss": 0.3319247364997864,
"step": 2573
},
{
"epoch": 2.656346749226006,
"grad_norm": 11.081626558031994,
"learning_rate": 3.9685032245596997e-07,
"loss": 0.29489922523498535,
"step": 2574
},
{
"epoch": 2.6573787409700724,
"grad_norm": 20.890441985451485,
"learning_rate": 3.9450923889651825e-07,
"loss": 1.2522039413452148,
"step": 2575
},
{
"epoch": 2.658410732714138,
"grad_norm": 12.594726781366386,
"learning_rate": 3.921747973563056e-07,
"loss": 0.252694696187973,
"step": 2576
},
{
"epoch": 2.6594427244582044,
"grad_norm": 10.667974819332423,
"learning_rate": 3.8984700120205387e-07,
"loss": 0.5963405966758728,
"step": 2577
},
{
"epoch": 2.6604747162022706,
"grad_norm": 11.189244057357962,
"learning_rate": 3.875258537909032e-07,
"loss": 0.7342553734779358,
"step": 2578
},
{
"epoch": 2.6615067079463364,
"grad_norm": 12.808316425027316,
"learning_rate": 3.85211358470402e-07,
"loss": 1.8337653875350952,
"step": 2579
},
{
"epoch": 2.6625386996904026,
"grad_norm": 12.695716920942775,
"learning_rate": 3.829035185785035e-07,
"loss": 0.7166613340377808,
"step": 2580
},
{
"epoch": 2.663570691434469,
"grad_norm": 15.965685815266713,
"learning_rate": 3.8060233744356634e-07,
"loss": 0.3166029751300812,
"step": 2581
},
{
"epoch": 2.6646026831785345,
"grad_norm": 15.228449537881858,
"learning_rate": 3.783078183843436e-07,
"loss": 0.6232759952545166,
"step": 2582
},
{
"epoch": 2.6656346749226008,
"grad_norm": 10.586080227145011,
"learning_rate": 3.7601996470998156e-07,
"loss": 0.2533762454986572,
"step": 2583
},
{
"epoch": 2.6666666666666665,
"grad_norm": 13.97178141622236,
"learning_rate": 3.737387797200126e-07,
"loss": 0.49488574266433716,
"step": 2584
},
{
"epoch": 2.6676986584107327,
"grad_norm": 13.599094780061206,
"learning_rate": 3.7146426670435166e-07,
"loss": 0.8465943932533264,
"step": 2585
},
{
"epoch": 2.6687306501547985,
"grad_norm": 8.733972776218572,
"learning_rate": 3.691964289432914e-07,
"loss": 0.31913653016090393,
"step": 2586
},
{
"epoch": 2.6697626418988647,
"grad_norm": 41.93487863173271,
"learning_rate": 3.669352697074996e-07,
"loss": 1.0798392295837402,
"step": 2587
},
{
"epoch": 2.670794633642931,
"grad_norm": 20.657609086193606,
"learning_rate": 3.646807922580098e-07,
"loss": 1.051876187324524,
"step": 2588
},
{
"epoch": 2.6718266253869967,
"grad_norm": 7.488384625739389,
"learning_rate": 3.624329998462189e-07,
"loss": 0.4441087543964386,
"step": 2589
},
{
"epoch": 2.672858617131063,
"grad_norm": 9.081245894719912,
"learning_rate": 3.6019189571388444e-07,
"loss": 0.5778773427009583,
"step": 2590
},
{
"epoch": 2.673890608875129,
"grad_norm": 9.197366691178136,
"learning_rate": 3.5795748309311707e-07,
"loss": 0.2405458688735962,
"step": 2591
},
{
"epoch": 2.674922600619195,
"grad_norm": 8.672764900791982,
"learning_rate": 3.557297652063768e-07,
"loss": 0.7079523801803589,
"step": 2592
},
{
"epoch": 2.675954592363261,
"grad_norm": 9.513513997500619,
"learning_rate": 3.5350874526646925e-07,
"loss": 1.325160264968872,
"step": 2593
},
{
"epoch": 2.6769865841073273,
"grad_norm": 11.620643797708638,
"learning_rate": 3.512944264765411e-07,
"loss": 0.37012574076652527,
"step": 2594
},
{
"epoch": 2.678018575851393,
"grad_norm": 9.739107116416724,
"learning_rate": 3.4908681203007167e-07,
"loss": 0.6634089946746826,
"step": 2595
},
{
"epoch": 2.6790505675954592,
"grad_norm": 8.271222786255464,
"learning_rate": 3.4688590511087304e-07,
"loss": 0.35661011934280396,
"step": 2596
},
{
"epoch": 2.6800825593395254,
"grad_norm": 10.331308569039791,
"learning_rate": 3.446917088930851e-07,
"loss": 0.3146715760231018,
"step": 2597
},
{
"epoch": 2.681114551083591,
"grad_norm": 13.441579495000436,
"learning_rate": 3.4250422654116933e-07,
"loss": 0.6467351913452148,
"step": 2598
},
{
"epoch": 2.6821465428276574,
"grad_norm": 8.705423478386244,
"learning_rate": 3.40323461209901e-07,
"loss": 0.3432140350341797,
"step": 2599
},
{
"epoch": 2.6831785345717236,
"grad_norm": 6.746735612398821,
"learning_rate": 3.3814941604437155e-07,
"loss": 0.5899496674537659,
"step": 2600
},
{
"epoch": 2.6842105263157894,
"grad_norm": 14.93423327939512,
"learning_rate": 3.359820941799796e-07,
"loss": 0.68793785572052,
"step": 2601
},
{
"epoch": 2.6852425180598556,
"grad_norm": 8.156724190137599,
"learning_rate": 3.338214987424282e-07,
"loss": 0.12590919435024261,
"step": 2602
},
{
"epoch": 2.686274509803922,
"grad_norm": 14.12186668178814,
"learning_rate": 3.316676328477192e-07,
"loss": 0.24262937903404236,
"step": 2603
},
{
"epoch": 2.6873065015479876,
"grad_norm": 13.624667824422568,
"learning_rate": 3.2952049960214785e-07,
"loss": 0.5101624727249146,
"step": 2604
},
{
"epoch": 2.6883384932920538,
"grad_norm": 9.527792356559328,
"learning_rate": 3.273801021023004e-07,
"loss": 0.5193660855293274,
"step": 2605
},
{
"epoch": 2.68937048503612,
"grad_norm": 12.691636236212773,
"learning_rate": 3.2524644343504887e-07,
"loss": 0.729722797870636,
"step": 2606
},
{
"epoch": 2.6904024767801857,
"grad_norm": 10.937206199199641,
"learning_rate": 3.231195266775489e-07,
"loss": 0.6672416925430298,
"step": 2607
},
{
"epoch": 2.691434468524252,
"grad_norm": 9.946644179075593,
"learning_rate": 3.20999354897229e-07,
"loss": 0.39457738399505615,
"step": 2608
},
{
"epoch": 2.6924664602683177,
"grad_norm": 5.906614566792187,
"learning_rate": 3.1888593115179225e-07,
"loss": 0.3601424992084503,
"step": 2609
},
{
"epoch": 2.693498452012384,
"grad_norm": 11.398058075247581,
"learning_rate": 3.167792584892093e-07,
"loss": 0.5270384550094604,
"step": 2610
},
{
"epoch": 2.6945304437564497,
"grad_norm": 11.726090044245446,
"learning_rate": 3.146793399477144e-07,
"loss": 0.6068238615989685,
"step": 2611
},
{
"epoch": 2.695562435500516,
"grad_norm": 7.509456538501037,
"learning_rate": 3.1258617855580155e-07,
"loss": 0.5335989594459534,
"step": 2612
},
{
"epoch": 2.696594427244582,
"grad_norm": 8.602600505566611,
"learning_rate": 3.104997773322205e-07,
"loss": 0.29156219959259033,
"step": 2613
},
{
"epoch": 2.697626418988648,
"grad_norm": 13.710570424583297,
"learning_rate": 3.0842013928596757e-07,
"loss": 0.41730475425720215,
"step": 2614
},
{
"epoch": 2.698658410732714,
"grad_norm": 10.720182485615819,
"learning_rate": 3.063472674162882e-07,
"loss": 0.53395676612854,
"step": 2615
},
{
"epoch": 2.6996904024767803,
"grad_norm": 7.4405866757185715,
"learning_rate": 3.0428116471267146e-07,
"loss": 0.5421364307403564,
"step": 2616
},
{
"epoch": 2.700722394220846,
"grad_norm": 15.399616022833726,
"learning_rate": 3.022218341548422e-07,
"loss": 0.39568254351615906,
"step": 2617
},
{
"epoch": 2.7017543859649122,
"grad_norm": 12.618503901643852,
"learning_rate": 3.0016927871275524e-07,
"loss": 0.4751710593700409,
"step": 2618
},
{
"epoch": 2.7027863777089784,
"grad_norm": 10.459012951475524,
"learning_rate": 2.981235013465994e-07,
"loss": 0.47635746002197266,
"step": 2619
},
{
"epoch": 2.703818369453044,
"grad_norm": 11.870003226809803,
"learning_rate": 2.9608450500678566e-07,
"loss": 0.8148362636566162,
"step": 2620
},
{
"epoch": 2.7048503611971104,
"grad_norm": 21.98035525761005,
"learning_rate": 2.940522926339462e-07,
"loss": 0.28189510107040405,
"step": 2621
},
{
"epoch": 2.7058823529411766,
"grad_norm": 7.909654450133803,
"learning_rate": 2.9202686715892934e-07,
"loss": 0.3399927020072937,
"step": 2622
},
{
"epoch": 2.7069143446852424,
"grad_norm": 12.964749406405296,
"learning_rate": 2.9000823150279355e-07,
"loss": 0.411767840385437,
"step": 2623
},
{
"epoch": 2.7079463364293086,
"grad_norm": 8.8267704870169,
"learning_rate": 2.879963885768083e-07,
"loss": 0.38652342557907104,
"step": 2624
},
{
"epoch": 2.708978328173375,
"grad_norm": 7.294871041892732,
"learning_rate": 2.859913412824428e-07,
"loss": 0.2980038523674011,
"step": 2625
},
{
"epoch": 2.7100103199174406,
"grad_norm": 10.785565333143657,
"learning_rate": 2.839930925113715e-07,
"loss": 0.6537376642227173,
"step": 2626
},
{
"epoch": 2.7110423116615068,
"grad_norm": 7.807193554695228,
"learning_rate": 2.8200164514545657e-07,
"loss": 0.404613733291626,
"step": 2627
},
{
"epoch": 2.712074303405573,
"grad_norm": 8.772347522340448,
"learning_rate": 2.800170020567566e-07,
"loss": 0.31629127264022827,
"step": 2628
},
{
"epoch": 2.7131062951496387,
"grad_norm": 8.979858832049977,
"learning_rate": 2.780391661075155e-07,
"loss": 0.38792482018470764,
"step": 2629
},
{
"epoch": 2.714138286893705,
"grad_norm": 15.835606847253056,
"learning_rate": 2.760681401501597e-07,
"loss": 0.6627403497695923,
"step": 2630
},
{
"epoch": 2.715170278637771,
"grad_norm": 11.307368841720763,
"learning_rate": 2.7410392702729495e-07,
"loss": 0.6148909330368042,
"step": 2631
},
{
"epoch": 2.716202270381837,
"grad_norm": 9.753722183697175,
"learning_rate": 2.721465295716996e-07,
"loss": 0.7569953799247742,
"step": 2632
},
{
"epoch": 2.717234262125903,
"grad_norm": 9.776518927595994,
"learning_rate": 2.701959506063251e-07,
"loss": 1.4264123439788818,
"step": 2633
},
{
"epoch": 2.718266253869969,
"grad_norm": 9.543544280612078,
"learning_rate": 2.6825219294428773e-07,
"loss": 0.24961692094802856,
"step": 2634
},
{
"epoch": 2.719298245614035,
"grad_norm": 9.719510604440641,
"learning_rate": 2.663152593888668e-07,
"loss": 0.6841002106666565,
"step": 2635
},
{
"epoch": 2.720330237358101,
"grad_norm": 7.413237897907853,
"learning_rate": 2.643851527335006e-07,
"loss": 0.29095280170440674,
"step": 2636
},
{
"epoch": 2.721362229102167,
"grad_norm": 10.584843614063363,
"learning_rate": 2.624618757617792e-07,
"loss": 0.25524187088012695,
"step": 2637
},
{
"epoch": 2.7223942208462333,
"grad_norm": 7.752309826241938,
"learning_rate": 2.605454312474448e-07,
"loss": 0.19103951752185822,
"step": 2638
},
{
"epoch": 2.723426212590299,
"grad_norm": 10.875904791849495,
"learning_rate": 2.586358219543861e-07,
"loss": 0.2569884657859802,
"step": 2639
},
{
"epoch": 2.7244582043343653,
"grad_norm": 11.864527583325158,
"learning_rate": 2.5673305063663335e-07,
"loss": 0.5300930142402649,
"step": 2640
},
{
"epoch": 2.7254901960784315,
"grad_norm": 12.264536840582725,
"learning_rate": 2.5483712003835535e-07,
"loss": 0.41859257221221924,
"step": 2641
},
{
"epoch": 2.7265221878224972,
"grad_norm": 9.740302429611017,
"learning_rate": 2.529480328938549e-07,
"loss": 0.2770339548587799,
"step": 2642
},
{
"epoch": 2.7275541795665634,
"grad_norm": 10.099875607541852,
"learning_rate": 2.510657919275655e-07,
"loss": 0.32811036705970764,
"step": 2643
},
{
"epoch": 2.7285861713106296,
"grad_norm": 12.218220392873288,
"learning_rate": 2.4919039985404626e-07,
"loss": 0.3892282545566559,
"step": 2644
},
{
"epoch": 2.7296181630546954,
"grad_norm": 16.99587425966017,
"learning_rate": 2.4732185937798193e-07,
"loss": 0.5392187833786011,
"step": 2645
},
{
"epoch": 2.7306501547987616,
"grad_norm": 10.629289948428452,
"learning_rate": 2.4546017319417195e-07,
"loss": 0.5916829109191895,
"step": 2646
},
{
"epoch": 2.731682146542828,
"grad_norm": 12.159168968204627,
"learning_rate": 2.436053439875319e-07,
"loss": 0.49346038699150085,
"step": 2647
},
{
"epoch": 2.7327141382868936,
"grad_norm": 7.354390122040929,
"learning_rate": 2.4175737443308976e-07,
"loss": 0.17073936760425568,
"step": 2648
},
{
"epoch": 2.73374613003096,
"grad_norm": 11.696783298201472,
"learning_rate": 2.399162671959793e-07,
"loss": 0.2301475703716278,
"step": 2649
},
{
"epoch": 2.734778121775026,
"grad_norm": 11.353460350336352,
"learning_rate": 2.380820249314375e-07,
"loss": 0.3915819525718689,
"step": 2650
},
{
"epoch": 2.7358101135190918,
"grad_norm": 14.48128568885365,
"learning_rate": 2.3625465028479955e-07,
"loss": 0.47442927956581116,
"step": 2651
},
{
"epoch": 2.736842105263158,
"grad_norm": 8.508034736982234,
"learning_rate": 2.3443414589149838e-07,
"loss": 0.32633233070373535,
"step": 2652
},
{
"epoch": 2.737874097007224,
"grad_norm": 22.605320819509327,
"learning_rate": 2.3262051437705768e-07,
"loss": 0.3886736035346985,
"step": 2653
},
{
"epoch": 2.73890608875129,
"grad_norm": 15.517220409316668,
"learning_rate": 2.3081375835708854e-07,
"loss": 0.5935795903205872,
"step": 2654
},
{
"epoch": 2.739938080495356,
"grad_norm": 8.07802463401297,
"learning_rate": 2.2901388043728878e-07,
"loss": 0.17653796076774597,
"step": 2655
},
{
"epoch": 2.7409700722394224,
"grad_norm": 15.402356283167004,
"learning_rate": 2.272208832134326e-07,
"loss": 1.0124492645263672,
"step": 2656
},
{
"epoch": 2.742002063983488,
"grad_norm": 9.665398333246364,
"learning_rate": 2.254347692713732e-07,
"loss": 0.38071686029434204,
"step": 2657
},
{
"epoch": 2.7430340557275543,
"grad_norm": 8.458922156834284,
"learning_rate": 2.236555411870378e-07,
"loss": 0.28697431087493896,
"step": 2658
},
{
"epoch": 2.74406604747162,
"grad_norm": 10.098512735439094,
"learning_rate": 2.218832015264205e-07,
"loss": 0.2906286120414734,
"step": 2659
},
{
"epoch": 2.7450980392156863,
"grad_norm": 18.255008975926504,
"learning_rate": 2.201177528455828e-07,
"loss": 0.6745045781135559,
"step": 2660
},
{
"epoch": 2.746130030959752,
"grad_norm": 9.185863469817656,
"learning_rate": 2.183591976906463e-07,
"loss": 0.4779427647590637,
"step": 2661
},
{
"epoch": 2.7471620227038183,
"grad_norm": 10.67008278019397,
"learning_rate": 2.1660753859779225e-07,
"loss": 0.40853461623191833,
"step": 2662
},
{
"epoch": 2.7481940144478845,
"grad_norm": 12.329079381594989,
"learning_rate": 2.1486277809325552e-07,
"loss": 0.3884234130382538,
"step": 2663
},
{
"epoch": 2.7492260061919502,
"grad_norm": 9.108233903645676,
"learning_rate": 2.131249186933243e-07,
"loss": 0.7551658153533936,
"step": 2664
},
{
"epoch": 2.7502579979360164,
"grad_norm": 10.067547605251312,
"learning_rate": 2.113939629043299e-07,
"loss": 0.2735165059566498,
"step": 2665
},
{
"epoch": 2.7512899896800826,
"grad_norm": 9.680391414190792,
"learning_rate": 2.0966991322264984e-07,
"loss": 0.3912424147129059,
"step": 2666
},
{
"epoch": 2.7523219814241484,
"grad_norm": 11.378370517148733,
"learning_rate": 2.0795277213470188e-07,
"loss": 0.2264825999736786,
"step": 2667
},
{
"epoch": 2.7533539731682146,
"grad_norm": 21.941128890870687,
"learning_rate": 2.0624254211693894e-07,
"loss": 0.3313651382923126,
"step": 2668
},
{
"epoch": 2.754385964912281,
"grad_norm": 15.594446975408829,
"learning_rate": 2.045392256358486e-07,
"loss": 0.58062344789505,
"step": 2669
},
{
"epoch": 2.7554179566563466,
"grad_norm": 12.128011911960778,
"learning_rate": 2.0284282514794475e-07,
"loss": 0.4429638385772705,
"step": 2670
},
{
"epoch": 2.756449948400413,
"grad_norm": 11.255435272305718,
"learning_rate": 2.0115334309977085e-07,
"loss": 0.2681140899658203,
"step": 2671
},
{
"epoch": 2.757481940144479,
"grad_norm": 10.030708809594579,
"learning_rate": 1.994707819278896e-07,
"loss": 0.45492902398109436,
"step": 2672
},
{
"epoch": 2.7585139318885448,
"grad_norm": 14.285857459671988,
"learning_rate": 1.9779514405888377e-07,
"loss": 0.6418532133102417,
"step": 2673
},
{
"epoch": 2.759545923632611,
"grad_norm": 14.377463040463253,
"learning_rate": 1.96126431909352e-07,
"loss": 0.411813884973526,
"step": 2674
},
{
"epoch": 2.760577915376677,
"grad_norm": 21.43587231087201,
"learning_rate": 1.9446464788590303e-07,
"loss": 0.5893786549568176,
"step": 2675
},
{
"epoch": 2.761609907120743,
"grad_norm": 11.225164649348446,
"learning_rate": 1.9280979438515479e-07,
"loss": 0.620746910572052,
"step": 2676
},
{
"epoch": 2.762641898864809,
"grad_norm": 7.883313488136548,
"learning_rate": 1.9116187379373043e-07,
"loss": 0.38842883706092834,
"step": 2677
},
{
"epoch": 2.7636738906088754,
"grad_norm": 9.710407091495496,
"learning_rate": 1.8952088848825323e-07,
"loss": 0.2083079218864441,
"step": 2678
},
{
"epoch": 2.764705882352941,
"grad_norm": 8.553670477437477,
"learning_rate": 1.878868408353468e-07,
"loss": 0.18988867104053497,
"step": 2679
},
{
"epoch": 2.7657378740970073,
"grad_norm": 9.294659711703655,
"learning_rate": 1.8625973319162605e-07,
"loss": 0.72264164686203,
"step": 2680
},
{
"epoch": 2.7667698658410735,
"grad_norm": 10.670834169664904,
"learning_rate": 1.846395679036994e-07,
"loss": 0.4202941656112671,
"step": 2681
},
{
"epoch": 2.7678018575851393,
"grad_norm": 12.53950084393448,
"learning_rate": 1.830263473081617e-07,
"loss": 0.7468389272689819,
"step": 2682
},
{
"epoch": 2.7688338493292055,
"grad_norm": 14.216952823524933,
"learning_rate": 1.8142007373159521e-07,
"loss": 0.8521854281425476,
"step": 2683
},
{
"epoch": 2.7698658410732713,
"grad_norm": 12.268212354249279,
"learning_rate": 1.7982074949055794e-07,
"loss": 0.24861863255500793,
"step": 2684
},
{
"epoch": 2.7708978328173375,
"grad_norm": 5.890931718558767,
"learning_rate": 1.7822837689158988e-07,
"loss": 0.22426769137382507,
"step": 2685
},
{
"epoch": 2.7719298245614032,
"grad_norm": 11.2628113902164,
"learning_rate": 1.7664295823120347e-07,
"loss": 0.2600301504135132,
"step": 2686
},
{
"epoch": 2.7729618163054695,
"grad_norm": 9.251502944320855,
"learning_rate": 1.7506449579588357e-07,
"loss": 0.8601874709129333,
"step": 2687
},
{
"epoch": 2.7739938080495357,
"grad_norm": 9.331768788971495,
"learning_rate": 1.7349299186208258e-07,
"loss": 0.32815781235694885,
"step": 2688
},
{
"epoch": 2.7750257997936014,
"grad_norm": 10.322044875268688,
"learning_rate": 1.7192844869621472e-07,
"loss": 0.6990963220596313,
"step": 2689
},
{
"epoch": 2.7760577915376676,
"grad_norm": 10.474923900476831,
"learning_rate": 1.7037086855465902e-07,
"loss": 0.7018382549285889,
"step": 2690
},
{
"epoch": 2.777089783281734,
"grad_norm": 8.820928293879732,
"learning_rate": 1.688202536837502e-07,
"loss": 0.30639582872390747,
"step": 2691
},
{
"epoch": 2.7781217750257996,
"grad_norm": 14.004852574406378,
"learning_rate": 1.6727660631977894e-07,
"loss": 0.5016225576400757,
"step": 2692
},
{
"epoch": 2.779153766769866,
"grad_norm": 11.16794150458729,
"learning_rate": 1.6573992868898714e-07,
"loss": 0.49680233001708984,
"step": 2693
},
{
"epoch": 2.780185758513932,
"grad_norm": 9.109794510400386,
"learning_rate": 1.642102230075643e-07,
"loss": 0.44969314336776733,
"step": 2694
},
{
"epoch": 2.781217750257998,
"grad_norm": 9.912484510145516,
"learning_rate": 1.6268749148164563e-07,
"loss": 0.1183367371559143,
"step": 2695
},
{
"epoch": 2.782249742002064,
"grad_norm": 8.344026321949334,
"learning_rate": 1.6117173630730787e-07,
"loss": 0.43249958753585815,
"step": 2696
},
{
"epoch": 2.78328173374613,
"grad_norm": 13.014283356582448,
"learning_rate": 1.5966295967056676e-07,
"loss": 0.20627948641777039,
"step": 2697
},
{
"epoch": 2.784313725490196,
"grad_norm": 9.620985232822974,
"learning_rate": 1.5816116374737456e-07,
"loss": 0.2142390012741089,
"step": 2698
},
{
"epoch": 2.785345717234262,
"grad_norm": 10.793188846713067,
"learning_rate": 1.5666635070361312e-07,
"loss": 0.4890708327293396,
"step": 2699
},
{
"epoch": 2.7863777089783284,
"grad_norm": 10.864902239809117,
"learning_rate": 1.5517852269509692e-07,
"loss": 0.3679332137107849,
"step": 2700
},
{
"epoch": 2.787409700722394,
"grad_norm": 11.946551012179722,
"learning_rate": 1.536976818675645e-07,
"loss": 0.8791127800941467,
"step": 2701
},
{
"epoch": 2.7884416924664603,
"grad_norm": 15.216087505144005,
"learning_rate": 1.5222383035667866e-07,
"loss": 0.6431314945220947,
"step": 2702
},
{
"epoch": 2.7894736842105265,
"grad_norm": 11.35163488635866,
"learning_rate": 1.5075697028802127e-07,
"loss": 0.37764033675193787,
"step": 2703
},
{
"epoch": 2.7905056759545923,
"grad_norm": 9.918993023831883,
"learning_rate": 1.492971037770924e-07,
"loss": 0.18792365491390228,
"step": 2704
},
{
"epoch": 2.7915376676986585,
"grad_norm": 9.27459483814883,
"learning_rate": 1.4784423292930505e-07,
"loss": 0.17031517624855042,
"step": 2705
},
{
"epoch": 2.7925696594427247,
"grad_norm": 11.747999342453264,
"learning_rate": 1.463983598399832e-07,
"loss": 0.6868878602981567,
"step": 2706
},
{
"epoch": 2.7936016511867905,
"grad_norm": 13.424203722464256,
"learning_rate": 1.4495948659435932e-07,
"loss": 0.5683586597442627,
"step": 2707
},
{
"epoch": 2.7946336429308567,
"grad_norm": 11.909445878210795,
"learning_rate": 1.435276152675691e-07,
"loss": 0.726739227771759,
"step": 2708
},
{
"epoch": 2.7956656346749225,
"grad_norm": 10.0878594595676,
"learning_rate": 1.4210274792465284e-07,
"loss": 0.3911072015762329,
"step": 2709
},
{
"epoch": 2.7966976264189887,
"grad_norm": 13.466899644240698,
"learning_rate": 1.4068488662054735e-07,
"loss": 0.34685128927230835,
"step": 2710
},
{
"epoch": 2.7977296181630544,
"grad_norm": 13.176249426431637,
"learning_rate": 1.3927403340008582e-07,
"loss": 0.37769877910614014,
"step": 2711
},
{
"epoch": 2.7987616099071206,
"grad_norm": 17.08716499957554,
"learning_rate": 1.378701902979962e-07,
"loss": 0.32527002692222595,
"step": 2712
},
{
"epoch": 2.799793601651187,
"grad_norm": 10.11939347764081,
"learning_rate": 1.364733593388934e-07,
"loss": 0.15866559743881226,
"step": 2713
},
{
"epoch": 2.8008255933952526,
"grad_norm": 10.646998032475292,
"learning_rate": 1.3508354253728205e-07,
"loss": 0.27378302812576294,
"step": 2714
},
{
"epoch": 2.801857585139319,
"grad_norm": 16.93448402712155,
"learning_rate": 1.3370074189755e-07,
"loss": 0.41292816400527954,
"step": 2715
},
{
"epoch": 2.802889576883385,
"grad_norm": 16.01106639724304,
"learning_rate": 1.323249594139664e-07,
"loss": 0.5351982712745667,
"step": 2716
},
{
"epoch": 2.803921568627451,
"grad_norm": 11.68268860833618,
"learning_rate": 1.3095619707067963e-07,
"loss": 0.6094648241996765,
"step": 2717
},
{
"epoch": 2.804953560371517,
"grad_norm": 14.556869333459348,
"learning_rate": 1.2959445684171123e-07,
"loss": 0.4445239305496216,
"step": 2718
},
{
"epoch": 2.805985552115583,
"grad_norm": 10.363256530414061,
"learning_rate": 1.2823974069095802e-07,
"loss": 1.329785704612732,
"step": 2719
},
{
"epoch": 2.807017543859649,
"grad_norm": 9.19755960663621,
"learning_rate": 1.2689205057218602e-07,
"loss": 0.22288936376571655,
"step": 2720
},
{
"epoch": 2.808049535603715,
"grad_norm": 11.178463438858222,
"learning_rate": 1.2555138842902826e-07,
"loss": 0.2416641116142273,
"step": 2721
},
{
"epoch": 2.8090815273477814,
"grad_norm": 8.724096486816755,
"learning_rate": 1.24217756194982e-07,
"loss": 0.3657599687576294,
"step": 2722
},
{
"epoch": 2.810113519091847,
"grad_norm": 9.987512768985848,
"learning_rate": 1.2289115579340538e-07,
"loss": 0.27216145396232605,
"step": 2723
},
{
"epoch": 2.8111455108359134,
"grad_norm": 12.572919253248884,
"learning_rate": 1.2157158913751687e-07,
"loss": 0.5774905681610107,
"step": 2724
},
{
"epoch": 2.8121775025799796,
"grad_norm": 13.113996940511523,
"learning_rate": 1.2025905813038917e-07,
"loss": 0.3477795124053955,
"step": 2725
},
{
"epoch": 2.8132094943240453,
"grad_norm": 11.919438155728603,
"learning_rate": 1.1895356466494978e-07,
"loss": 0.8780714869499207,
"step": 2726
},
{
"epoch": 2.8142414860681115,
"grad_norm": 10.355575005545365,
"learning_rate": 1.1765511062397483e-07,
"loss": 0.7887623310089111,
"step": 2727
},
{
"epoch": 2.8152734778121777,
"grad_norm": 13.82676737829339,
"learning_rate": 1.1636369788008973e-07,
"loss": 0.6320158839225769,
"step": 2728
},
{
"epoch": 2.8163054695562435,
"grad_norm": 11.183921843773033,
"learning_rate": 1.1507932829576407e-07,
"loss": 0.43458500504493713,
"step": 2729
},
{
"epoch": 2.8173374613003097,
"grad_norm": 11.564870667791334,
"learning_rate": 1.1380200372331063e-07,
"loss": 0.5087906718254089,
"step": 2730
},
{
"epoch": 2.818369453044376,
"grad_norm": 13.832341375909081,
"learning_rate": 1.1253172600488083e-07,
"loss": 0.3639557659626007,
"step": 2731
},
{
"epoch": 2.8194014447884417,
"grad_norm": 12.750370779773132,
"learning_rate": 1.1126849697246533e-07,
"loss": 0.32219162583351135,
"step": 2732
},
{
"epoch": 2.820433436532508,
"grad_norm": 15.083575215367821,
"learning_rate": 1.1001231844788574e-07,
"loss": 0.5374658107757568,
"step": 2733
},
{
"epoch": 2.8214654282765737,
"grad_norm": 11.60486251326375,
"learning_rate": 1.0876319224279896e-07,
"loss": 0.2404775768518448,
"step": 2734
},
{
"epoch": 2.82249742002064,
"grad_norm": 8.905506133399465,
"learning_rate": 1.0752112015868843e-07,
"loss": 0.596005380153656,
"step": 2735
},
{
"epoch": 2.8235294117647056,
"grad_norm": 18.932193446521126,
"learning_rate": 1.0628610398686679e-07,
"loss": 1.1185606718063354,
"step": 2736
},
{
"epoch": 2.824561403508772,
"grad_norm": 10.874924961588212,
"learning_rate": 1.0505814550846705e-07,
"loss": 0.19293265044689178,
"step": 2737
},
{
"epoch": 2.825593395252838,
"grad_norm": 7.561813823776279,
"learning_rate": 1.0383724649444704e-07,
"loss": 0.29149097204208374,
"step": 2738
},
{
"epoch": 2.826625386996904,
"grad_norm": 9.496734760649641,
"learning_rate": 1.0262340870558162e-07,
"loss": 0.3492857813835144,
"step": 2739
},
{
"epoch": 2.82765737874097,
"grad_norm": 14.643825367827347,
"learning_rate": 1.014166338924627e-07,
"loss": 0.3620374798774719,
"step": 2740
},
{
"epoch": 2.828689370485036,
"grad_norm": 13.630359044704328,
"learning_rate": 1.0021692379549585e-07,
"loss": 1.076962947845459,
"step": 2741
},
{
"epoch": 2.829721362229102,
"grad_norm": 8.35946293486137,
"learning_rate": 9.902428014489762e-08,
"loss": 0.445547878742218,
"step": 2742
},
{
"epoch": 2.830753353973168,
"grad_norm": 10.391459307268816,
"learning_rate": 9.783870466069433e-08,
"loss": 0.39448314905166626,
"step": 2743
},
{
"epoch": 2.8317853457172344,
"grad_norm": 13.087674033489241,
"learning_rate": 9.666019905271662e-08,
"loss": 0.6303298473358154,
"step": 2744
},
{
"epoch": 2.8328173374613,
"grad_norm": 21.531957553952452,
"learning_rate": 9.548876502060211e-08,
"loss": 0.9480113387107849,
"step": 2745
},
{
"epoch": 2.8338493292053664,
"grad_norm": 9.911494826358105,
"learning_rate": 9.432440425378664e-08,
"loss": 0.6038950085639954,
"step": 2746
},
{
"epoch": 2.8348813209494326,
"grad_norm": 8.33541902845192,
"learning_rate": 9.316711843150638e-08,
"loss": 0.2861171066761017,
"step": 2747
},
{
"epoch": 2.8359133126934983,
"grad_norm": 14.31287728036117,
"learning_rate": 9.201690922279405e-08,
"loss": 0.38812384009361267,
"step": 2748
},
{
"epoch": 2.8369453044375645,
"grad_norm": 13.492626654791492,
"learning_rate": 9.087377828647714e-08,
"loss": 0.4195922613143921,
"step": 2749
},
{
"epoch": 2.8379772961816307,
"grad_norm": 8.52565604085448,
"learning_rate": 8.973772727117358e-08,
"loss": 0.3792739808559418,
"step": 2750
},
{
"epoch": 2.8390092879256965,
"grad_norm": 9.66940222976016,
"learning_rate": 8.860875781529222e-08,
"loss": 0.4448316693305969,
"step": 2751
},
{
"epoch": 2.8400412796697627,
"grad_norm": 14.998287535953239,
"learning_rate": 8.748687154702673e-08,
"loss": 0.5362205505371094,
"step": 2752
},
{
"epoch": 2.841073271413829,
"grad_norm": 7.2640553996033885,
"learning_rate": 8.637207008435788e-08,
"loss": 0.27778613567352295,
"step": 2753
},
{
"epoch": 2.8421052631578947,
"grad_norm": 7.803788163827977,
"learning_rate": 8.526435503504737e-08,
"loss": 0.23669756948947906,
"step": 2754
},
{
"epoch": 2.843137254901961,
"grad_norm": 33.20287493265629,
"learning_rate": 8.416372799663674e-08,
"loss": 1.3798854351043701,
"step": 2755
},
{
"epoch": 2.844169246646027,
"grad_norm": 13.719617537778563,
"learning_rate": 8.307019055644517e-08,
"loss": 0.6489579677581787,
"step": 2756
},
{
"epoch": 2.845201238390093,
"grad_norm": 12.182729633208371,
"learning_rate": 8.198374429156886e-08,
"loss": 0.501702606678009,
"step": 2757
},
{
"epoch": 2.846233230134159,
"grad_norm": 11.014775544350565,
"learning_rate": 8.090439076887557e-08,
"loss": 0.6627386808395386,
"step": 2758
},
{
"epoch": 2.847265221878225,
"grad_norm": 11.52891371708018,
"learning_rate": 7.983213154500402e-08,
"loss": 0.5250353813171387,
"step": 2759
},
{
"epoch": 2.848297213622291,
"grad_norm": 10.62518915136225,
"learning_rate": 7.876696816636276e-08,
"loss": 0.19456374645233154,
"step": 2760
},
{
"epoch": 2.849329205366357,
"grad_norm": 20.732355197414126,
"learning_rate": 7.770890216912463e-08,
"loss": 0.6740440130233765,
"step": 2761
},
{
"epoch": 2.850361197110423,
"grad_norm": 9.981363789805053,
"learning_rate": 7.665793507922903e-08,
"loss": 0.417470782995224,
"step": 2762
},
{
"epoch": 2.8513931888544892,
"grad_norm": 10.2390899038885,
"learning_rate": 7.561406841237573e-08,
"loss": 0.8772274255752563,
"step": 2763
},
{
"epoch": 2.852425180598555,
"grad_norm": 10.049760714103,
"learning_rate": 7.45773036740255e-08,
"loss": 0.6738741397857666,
"step": 2764
},
{
"epoch": 2.853457172342621,
"grad_norm": 11.496249432336205,
"learning_rate": 7.354764235939505e-08,
"loss": 0.24579696357250214,
"step": 2765
},
{
"epoch": 2.8544891640866874,
"grad_norm": 11.221020100326134,
"learning_rate": 7.252508595345765e-08,
"loss": 0.3331579864025116,
"step": 2766
},
{
"epoch": 2.855521155830753,
"grad_norm": 12.112232013450475,
"learning_rate": 7.150963593094029e-08,
"loss": 0.7456363439559937,
"step": 2767
},
{
"epoch": 2.8565531475748194,
"grad_norm": 10.205895852941165,
"learning_rate": 7.050129375632098e-08,
"loss": 0.2590240240097046,
"step": 2768
},
{
"epoch": 2.8575851393188856,
"grad_norm": 9.005970205024083,
"learning_rate": 6.950006088382533e-08,
"loss": 0.31267833709716797,
"step": 2769
},
{
"epoch": 2.8586171310629513,
"grad_norm": 6.844001294261074,
"learning_rate": 6.850593875742827e-08,
"loss": 0.3022312819957733,
"step": 2770
},
{
"epoch": 2.8596491228070176,
"grad_norm": 8.123381166455903,
"learning_rate": 6.751892881084853e-08,
"loss": 0.31355875730514526,
"step": 2771
},
{
"epoch": 2.8606811145510838,
"grad_norm": 9.43281052399945,
"learning_rate": 6.65390324675469e-08,
"loss": 0.20084381103515625,
"step": 2772
},
{
"epoch": 2.8617131062951495,
"grad_norm": 17.440180275290196,
"learning_rate": 6.556625114072623e-08,
"loss": 0.4890974760055542,
"step": 2773
},
{
"epoch": 2.8627450980392157,
"grad_norm": 10.496821025738013,
"learning_rate": 6.460058623332766e-08,
"loss": 0.5973390340805054,
"step": 2774
},
{
"epoch": 2.863777089783282,
"grad_norm": 26.173617084616886,
"learning_rate": 6.364203913802824e-08,
"loss": 0.5038038492202759,
"step": 2775
},
{
"epoch": 2.8648090815273477,
"grad_norm": 18.962199958622488,
"learning_rate": 6.269061123724163e-08,
"loss": 0.7357000708580017,
"step": 2776
},
{
"epoch": 2.865841073271414,
"grad_norm": 8.419033596286154,
"learning_rate": 6.174630390311242e-08,
"loss": 0.31707966327667236,
"step": 2777
},
{
"epoch": 2.86687306501548,
"grad_norm": 11.399487419360666,
"learning_rate": 6.080911849751681e-08,
"loss": 0.5759315490722656,
"step": 2778
},
{
"epoch": 2.867905056759546,
"grad_norm": 13.696280633619493,
"learning_rate": 5.987905637206026e-08,
"loss": 0.4270392060279846,
"step": 2779
},
{
"epoch": 2.868937048503612,
"grad_norm": 8.507421922322656,
"learning_rate": 5.895611886807317e-08,
"loss": 0.40549927949905396,
"step": 2780
},
{
"epoch": 2.8699690402476783,
"grad_norm": 10.040991890497093,
"learning_rate": 5.804030731661303e-08,
"loss": 0.8265923261642456,
"step": 2781
},
{
"epoch": 2.871001031991744,
"grad_norm": 10.531228237547275,
"learning_rate": 5.713162303845887e-08,
"loss": 0.5311161279678345,
"step": 2782
},
{
"epoch": 2.8720330237358103,
"grad_norm": 6.474633748136652,
"learning_rate": 5.623006734411185e-08,
"loss": 0.4658339321613312,
"step": 2783
},
{
"epoch": 2.873065015479876,
"grad_norm": 13.523958814362693,
"learning_rate": 5.533564153379134e-08,
"loss": 0.641715407371521,
"step": 2784
},
{
"epoch": 2.8740970072239422,
"grad_norm": 12.74107558480464,
"learning_rate": 5.444834689743439e-08,
"loss": 0.5655953288078308,
"step": 2785
},
{
"epoch": 2.875128998968008,
"grad_norm": 8.181066484517263,
"learning_rate": 5.356818471469405e-08,
"loss": 0.29476261138916016,
"step": 2786
},
{
"epoch": 2.876160990712074,
"grad_norm": 16.023616527724055,
"learning_rate": 5.269515625493549e-08,
"loss": 0.3077610433101654,
"step": 2787
},
{
"epoch": 2.8771929824561404,
"grad_norm": 9.146095009743794,
"learning_rate": 5.182926277723821e-08,
"loss": 0.5303142666816711,
"step": 2788
},
{
"epoch": 2.878224974200206,
"grad_norm": 7.830994742973659,
"learning_rate": 5.097050553038829e-08,
"loss": 0.3145953416824341,
"step": 2789
},
{
"epoch": 2.8792569659442724,
"grad_norm": 12.407694830029483,
"learning_rate": 5.0118885752883376e-08,
"loss": 0.5515159368515015,
"step": 2790
},
{
"epoch": 2.8802889576883386,
"grad_norm": 11.21357136261069,
"learning_rate": 4.927440467292488e-08,
"loss": 0.41922271251678467,
"step": 2791
},
{
"epoch": 2.8813209494324044,
"grad_norm": 10.70014876987049,
"learning_rate": 4.843706350842081e-08,
"loss": 0.23146328330039978,
"step": 2792
},
{
"epoch": 2.8823529411764706,
"grad_norm": 12.729873312487426,
"learning_rate": 4.760686346698018e-08,
"loss": 1.2350009679794312,
"step": 2793
},
{
"epoch": 2.8833849329205368,
"grad_norm": 10.327084671696761,
"learning_rate": 4.678380574591357e-08,
"loss": 0.33487558364868164,
"step": 2794
},
{
"epoch": 2.8844169246646025,
"grad_norm": 14.020291391129224,
"learning_rate": 4.596789153223258e-08,
"loss": 0.7674515247344971,
"step": 2795
},
{
"epoch": 2.8854489164086687,
"grad_norm": 9.236506031541138,
"learning_rate": 4.515912200264427e-08,
"loss": 0.4201269745826721,
"step": 2796
},
{
"epoch": 2.886480908152735,
"grad_norm": 11.388103105456102,
"learning_rate": 4.43574983235534e-08,
"loss": 0.35071414709091187,
"step": 2797
},
{
"epoch": 2.8875128998968007,
"grad_norm": 11.519886506074421,
"learning_rate": 4.356302165105741e-08,
"loss": 0.4228086471557617,
"step": 2798
},
{
"epoch": 2.888544891640867,
"grad_norm": 10.920468331420269,
"learning_rate": 4.2775693130948094e-08,
"loss": 0.4655839204788208,
"step": 2799
},
{
"epoch": 2.889576883384933,
"grad_norm": 11.301364218639682,
"learning_rate": 4.19955138987066e-08,
"loss": 0.44735416769981384,
"step": 2800
},
{
"epoch": 2.890608875128999,
"grad_norm": 17.013383304560165,
"learning_rate": 4.122248507950399e-08,
"loss": 0.8772158622741699,
"step": 2801
},
{
"epoch": 2.891640866873065,
"grad_norm": 11.977124328109158,
"learning_rate": 4.045660778820015e-08,
"loss": 0.2992047369480133,
"step": 2802
},
{
"epoch": 2.8926728586171313,
"grad_norm": 13.724193152652521,
"learning_rate": 3.9697883129338756e-08,
"loss": 0.29466360807418823,
"step": 2803
},
{
"epoch": 2.893704850361197,
"grad_norm": 11.254521680601806,
"learning_rate": 3.894631219715006e-08,
"loss": 0.21574847400188446,
"step": 2804
},
{
"epoch": 2.8947368421052633,
"grad_norm": 10.296319023427813,
"learning_rate": 3.820189607554647e-08,
"loss": 0.3829636871814728,
"step": 2805
},
{
"epoch": 2.8957688338493295,
"grad_norm": 8.752116369115315,
"learning_rate": 3.746463583812143e-08,
"loss": 0.5248335599899292,
"step": 2806
},
{
"epoch": 2.8968008255933952,
"grad_norm": 10.458602320218011,
"learning_rate": 3.6734532548149405e-08,
"loss": 0.42929303646087646,
"step": 2807
},
{
"epoch": 2.8978328173374615,
"grad_norm": 13.063885625069268,
"learning_rate": 3.601158725858034e-08,
"loss": 0.19391657412052155,
"step": 2808
},
{
"epoch": 2.898864809081527,
"grad_norm": 11.833405438652617,
"learning_rate": 3.529580101204466e-08,
"loss": 0.4269029498100281,
"step": 2809
},
{
"epoch": 2.8998968008255934,
"grad_norm": 14.210076402805809,
"learning_rate": 3.458717484084606e-08,
"loss": 0.30730894207954407,
"step": 2810
},
{
"epoch": 2.900928792569659,
"grad_norm": 12.170748254840527,
"learning_rate": 3.3885709766962036e-08,
"loss": 0.6975186467170715,
"step": 2811
},
{
"epoch": 2.9019607843137254,
"grad_norm": 10.836316553045704,
"learning_rate": 3.3191406802041693e-08,
"loss": 0.3806256651878357,
"step": 2812
},
{
"epoch": 2.9029927760577916,
"grad_norm": 9.015789403322383,
"learning_rate": 3.2504266947406824e-08,
"loss": 0.29094287753105164,
"step": 2813
},
{
"epoch": 2.9040247678018574,
"grad_norm": 10.27225739186077,
"learning_rate": 3.1824291194046954e-08,
"loss": 0.4287331700325012,
"step": 2814
},
{
"epoch": 2.9050567595459236,
"grad_norm": 10.577320468383352,
"learning_rate": 3.11514805226204e-08,
"loss": 0.982883095741272,
"step": 2815
},
{
"epoch": 2.90608875128999,
"grad_norm": 12.498037060798291,
"learning_rate": 3.048583590345266e-08,
"loss": 0.6146286725997925,
"step": 2816
},
{
"epoch": 2.9071207430340555,
"grad_norm": 14.050659575481342,
"learning_rate": 2.982735829653249e-08,
"loss": 0.6893696784973145,
"step": 2817
},
{
"epoch": 2.9081527347781218,
"grad_norm": 10.410803532472595,
"learning_rate": 2.9176048651513578e-08,
"loss": 0.5689204931259155,
"step": 2818
},
{
"epoch": 2.909184726522188,
"grad_norm": 10.927169952282387,
"learning_rate": 2.8531907907712876e-08,
"loss": 0.39179882407188416,
"step": 2819
},
{
"epoch": 2.9102167182662537,
"grad_norm": 20.5922212081612,
"learning_rate": 2.7894936994106724e-08,
"loss": 1.1332855224609375,
"step": 2820
},
{
"epoch": 2.91124871001032,
"grad_norm": 7.895624536417559,
"learning_rate": 2.726513682933196e-08,
"loss": 0.25301265716552734,
"step": 2821
},
{
"epoch": 2.912280701754386,
"grad_norm": 8.380759870706228,
"learning_rate": 2.6642508321683692e-08,
"loss": 0.2217177003622055,
"step": 2822
},
{
"epoch": 2.913312693498452,
"grad_norm": 13.769480574074626,
"learning_rate": 2.602705236911418e-08,
"loss": 0.8979390859603882,
"step": 2823
},
{
"epoch": 2.914344685242518,
"grad_norm": 9.085418594008585,
"learning_rate": 2.5418769859231194e-08,
"loss": 0.658086895942688,
"step": 2824
},
{
"epoch": 2.9153766769865843,
"grad_norm": 12.25697807718521,
"learning_rate": 2.4817661669297445e-08,
"loss": 0.5765917897224426,
"step": 2825
},
{
"epoch": 2.91640866873065,
"grad_norm": 10.694665070101687,
"learning_rate": 2.4223728666228906e-08,
"loss": 0.5431925058364868,
"step": 2826
},
{
"epoch": 2.9174406604747163,
"grad_norm": 11.889135343753276,
"learning_rate": 2.3636971706592627e-08,
"loss": 0.4332832098007202,
"step": 2827
},
{
"epoch": 2.9184726522187825,
"grad_norm": 7.798765327554262,
"learning_rate": 2.3057391636606698e-08,
"loss": 0.8352420330047607,
"step": 2828
},
{
"epoch": 2.9195046439628483,
"grad_norm": 9.417958904982532,
"learning_rate": 2.248498929214027e-08,
"loss": 0.28343185782432556,
"step": 2829
},
{
"epoch": 2.9205366357069145,
"grad_norm": 14.648485052578973,
"learning_rate": 2.1919765498708556e-08,
"loss": 0.31452351808547974,
"step": 2830
},
{
"epoch": 2.9215686274509802,
"grad_norm": 12.479013074380937,
"learning_rate": 2.1361721071475605e-08,
"loss": 0.39548927545547485,
"step": 2831
},
{
"epoch": 2.9226006191950464,
"grad_norm": 11.940862196644698,
"learning_rate": 2.081085681524986e-08,
"loss": 0.5270460247993469,
"step": 2832
},
{
"epoch": 2.9236326109391126,
"grad_norm": 14.506115481993955,
"learning_rate": 2.0267173524485816e-08,
"loss": 0.483257532119751,
"step": 2833
},
{
"epoch": 2.9246646026831784,
"grad_norm": 24.618465765797602,
"learning_rate": 1.9730671983281824e-08,
"loss": 0.9315032362937927,
"step": 2834
},
{
"epoch": 2.9256965944272446,
"grad_norm": 7.785308907165337,
"learning_rate": 1.920135296537784e-08,
"loss": 0.2826288342475891,
"step": 2835
},
{
"epoch": 2.9267285861713104,
"grad_norm": 8.905394415808534,
"learning_rate": 1.8679217234154335e-08,
"loss": 0.14207813143730164,
"step": 2836
},
{
"epoch": 2.9277605779153766,
"grad_norm": 9.642626790270166,
"learning_rate": 1.8164265542634507e-08,
"loss": 0.2834736406803131,
"step": 2837
},
{
"epoch": 2.928792569659443,
"grad_norm": 12.736848130226655,
"learning_rate": 1.765649863347929e-08,
"loss": 0.3384333848953247,
"step": 2838
},
{
"epoch": 2.9298245614035086,
"grad_norm": 9.98619549627037,
"learning_rate": 1.7155917238987906e-08,
"loss": 0.24947945773601532,
"step": 2839
},
{
"epoch": 2.9308565531475748,
"grad_norm": 15.43292926706652,
"learning_rate": 1.6662522081097308e-08,
"loss": 0.3706369400024414,
"step": 2840
},
{
"epoch": 2.931888544891641,
"grad_norm": 15.193315285621756,
"learning_rate": 1.61763138713783e-08,
"loss": 1.1745437383651733,
"step": 2841
},
{
"epoch": 2.9329205366357067,
"grad_norm": 8.539014235999547,
"learning_rate": 1.5697293311039973e-08,
"loss": 0.6649677753448486,
"step": 2842
},
{
"epoch": 2.933952528379773,
"grad_norm": 12.099623997731364,
"learning_rate": 1.522546109092249e-08,
"loss": 0.5370413064956665,
"step": 2843
},
{
"epoch": 2.934984520123839,
"grad_norm": 10.079698858586877,
"learning_rate": 1.4760817891500966e-08,
"loss": 0.6036677956581116,
"step": 2844
},
{
"epoch": 2.936016511867905,
"grad_norm": 22.78996695895161,
"learning_rate": 1.4303364382881601e-08,
"loss": 0.7218687534332275,
"step": 2845
},
{
"epoch": 2.937048503611971,
"grad_norm": 10.605000934865663,
"learning_rate": 1.3853101224802212e-08,
"loss": 0.3760529160499573,
"step": 2846
},
{
"epoch": 2.9380804953560373,
"grad_norm": 7.990920195942308,
"learning_rate": 1.3410029066630025e-08,
"loss": 0.3890661597251892,
"step": 2847
},
{
"epoch": 2.939112487100103,
"grad_norm": 14.00545881468774,
"learning_rate": 1.2974148547362231e-08,
"loss": 0.7472094893455505,
"step": 2848
},
{
"epoch": 2.9401444788441693,
"grad_norm": 18.041176052953638,
"learning_rate": 1.2545460295623757e-08,
"loss": 1.3969285488128662,
"step": 2849
},
{
"epoch": 2.9411764705882355,
"grad_norm": 12.776565154910713,
"learning_rate": 1.212396492966672e-08,
"loss": 0.3700495660305023,
"step": 2850
},
{
"epoch": 2.9422084623323013,
"grad_norm": 10.925723452827187,
"learning_rate": 1.1709663057370424e-08,
"loss": 0.40052279829978943,
"step": 2851
},
{
"epoch": 2.9432404540763675,
"grad_norm": 7.389953137459436,
"learning_rate": 1.1302555276238581e-08,
"loss": 0.2983669340610504,
"step": 2852
},
{
"epoch": 2.9442724458204337,
"grad_norm": 7.6412748058052635,
"learning_rate": 1.0902642173400424e-08,
"loss": 0.46939218044281006,
"step": 2853
},
{
"epoch": 2.9453044375644994,
"grad_norm": 13.33103952638023,
"learning_rate": 1.05099243256096e-08,
"loss": 0.7096831202507019,
"step": 2854
},
{
"epoch": 2.9463364293085657,
"grad_norm": 9.170467654663462,
"learning_rate": 1.0124402299241943e-08,
"loss": 0.3482118248939514,
"step": 2855
},
{
"epoch": 2.9473684210526314,
"grad_norm": 11.277517652250038,
"learning_rate": 9.746076650294922e-09,
"loss": 0.5088193416595459,
"step": 2856
},
{
"epoch": 2.9484004127966976,
"grad_norm": 10.021439312052443,
"learning_rate": 9.374947924388755e-09,
"loss": 0.39733967185020447,
"step": 2857
},
{
"epoch": 2.949432404540764,
"grad_norm": 10.531017191140222,
"learning_rate": 9.011016656764182e-09,
"loss": 0.5305665731430054,
"step": 2858
},
{
"epoch": 2.9504643962848296,
"grad_norm": 10.033494308935959,
"learning_rate": 8.654283372280248e-09,
"loss": 0.6603313684463501,
"step": 2859
},
{
"epoch": 2.951496388028896,
"grad_norm": 9.636604443118136,
"learning_rate": 8.304748585417077e-09,
"loss": 0.45227867364883423,
"step": 2860
},
{
"epoch": 2.9525283797729616,
"grad_norm": 15.953758922812748,
"learning_rate": 7.96241280027199e-09,
"loss": 0.6437937021255493,
"step": 2861
},
{
"epoch": 2.9535603715170278,
"grad_norm": 8.005244278076123,
"learning_rate": 7.627276510560056e-09,
"loss": 0.29151588678359985,
"step": 2862
},
{
"epoch": 2.954592363261094,
"grad_norm": 13.702876635129847,
"learning_rate": 7.299340199613536e-09,
"loss": 0.6355423927307129,
"step": 2863
},
{
"epoch": 2.9556243550051597,
"grad_norm": 15.606539779621775,
"learning_rate": 6.978604340380779e-09,
"loss": 0.689186692237854,
"step": 2864
},
{
"epoch": 2.956656346749226,
"grad_norm": 9.995480499282703,
"learning_rate": 6.665069395425661e-09,
"loss": 0.46941396594047546,
"step": 2865
},
{
"epoch": 2.957688338493292,
"grad_norm": 8.071745754656114,
"learning_rate": 6.3587358169264755e-09,
"loss": 0.25395047664642334,
"step": 2866
},
{
"epoch": 2.958720330237358,
"grad_norm": 14.390161317483425,
"learning_rate": 6.059604046677603e-09,
"loss": 0.1884629726409912,
"step": 2867
},
{
"epoch": 2.959752321981424,
"grad_norm": 8.335728992300055,
"learning_rate": 5.767674516083954e-09,
"loss": 0.7414337992668152,
"step": 2868
},
{
"epoch": 2.9607843137254903,
"grad_norm": 17.72468534614113,
"learning_rate": 5.48294764616597e-09,
"loss": 0.45233145356178284,
"step": 2869
},
{
"epoch": 2.961816305469556,
"grad_norm": 9.074680053880547,
"learning_rate": 5.205423847555735e-09,
"loss": 0.5274835824966431,
"step": 2870
},
{
"epoch": 2.9628482972136223,
"grad_norm": 10.471200128103057,
"learning_rate": 4.9351035204964205e-09,
"loss": 0.3120895326137543,
"step": 2871
},
{
"epoch": 2.9638802889576885,
"grad_norm": 8.392739587257111,
"learning_rate": 4.671987054842842e-09,
"loss": 0.3116510808467865,
"step": 2872
},
{
"epoch": 2.9649122807017543,
"grad_norm": 11.476475028393562,
"learning_rate": 4.416074830060346e-09,
"loss": 0.36004531383514404,
"step": 2873
},
{
"epoch": 2.9659442724458205,
"grad_norm": 11.562375996795918,
"learning_rate": 4.167367215224816e-09,
"loss": 0.4337872862815857,
"step": 2874
},
{
"epoch": 2.9669762641898867,
"grad_norm": 11.747325700622214,
"learning_rate": 3.925864569021554e-09,
"loss": 0.35795819759368896,
"step": 2875
},
{
"epoch": 2.9680082559339525,
"grad_norm": 7.907415652265775,
"learning_rate": 3.6915672397436208e-09,
"loss": 0.29183316230773926,
"step": 2876
},
{
"epoch": 2.9690402476780187,
"grad_norm": 11.88023342897466,
"learning_rate": 3.4644755652946115e-09,
"loss": 0.5190762877464294,
"step": 2877
},
{
"epoch": 2.970072239422085,
"grad_norm": 9.961046006998368,
"learning_rate": 3.244589873185322e-09,
"loss": 0.26542696356773376,
"step": 2878
},
{
"epoch": 2.9711042311661506,
"grad_norm": 8.78934589213461,
"learning_rate": 3.0319104805326404e-09,
"loss": 0.34915220737457275,
"step": 2879
},
{
"epoch": 2.972136222910217,
"grad_norm": 12.178593168751178,
"learning_rate": 2.8264376940634332e-09,
"loss": 0.2416977733373642,
"step": 2880
},
{
"epoch": 2.9731682146542826,
"grad_norm": 11.769541867178553,
"learning_rate": 2.6281718101089927e-09,
"loss": 0.4350973963737488,
"step": 2881
},
{
"epoch": 2.974200206398349,
"grad_norm": 9.211479849303478,
"learning_rate": 2.437113114607259e-09,
"loss": 0.29845768213272095,
"step": 2882
},
{
"epoch": 2.975232198142415,
"grad_norm": 12.049136286543924,
"learning_rate": 2.2532618831022646e-09,
"loss": 0.44155120849609375,
"step": 2883
},
{
"epoch": 2.976264189886481,
"grad_norm": 7.992230482448386,
"learning_rate": 2.076618380744133e-09,
"loss": 0.24698218703269958,
"step": 2884
},
{
"epoch": 2.977296181630547,
"grad_norm": 12.091285409429458,
"learning_rate": 1.9071828622868603e-09,
"loss": 0.4726845324039459,
"step": 2885
},
{
"epoch": 2.9783281733746128,
"grad_norm": 4.444246393658765,
"learning_rate": 1.7449555720899792e-09,
"loss": 0.20463469624519348,
"step": 2886
},
{
"epoch": 2.979360165118679,
"grad_norm": 9.529793198317492,
"learning_rate": 1.5899367441168934e-09,
"loss": 0.3188978433609009,
"step": 2887
},
{
"epoch": 2.980392156862745,
"grad_norm": 13.147973989711732,
"learning_rate": 1.4421266019348789e-09,
"loss": 0.6265153288841248,
"step": 2888
},
{
"epoch": 2.981424148606811,
"grad_norm": 15.723436943080621,
"learning_rate": 1.3015253587150832e-09,
"loss": 0.4962090253829956,
"step": 2889
},
{
"epoch": 2.982456140350877,
"grad_norm": 10.55766498053424,
"learning_rate": 1.1681332172319704e-09,
"loss": 1.112623929977417,
"step": 2890
},
{
"epoch": 2.9834881320949433,
"grad_norm": 11.392080474977103,
"learning_rate": 1.0419503698633205e-09,
"loss": 0.2539787292480469,
"step": 2891
},
{
"epoch": 2.984520123839009,
"grad_norm": 12.631956110800884,
"learning_rate": 9.229769985902304e-10,
"loss": 0.36053264141082764,
"step": 2892
},
{
"epoch": 2.9855521155830753,
"grad_norm": 12.749296952365622,
"learning_rate": 8.11213274994338e-10,
"loss": 0.3173477351665497,
"step": 2893
},
{
"epoch": 2.9865841073271415,
"grad_norm": 10.105348450703012,
"learning_rate": 7.066593602611527e-10,
"loss": 0.5603840351104736,
"step": 2894
},
{
"epoch": 2.9876160990712073,
"grad_norm": 8.958449767473246,
"learning_rate": 6.09315405177835e-10,
"loss": 0.4048866331577301,
"step": 2895
},
{
"epoch": 2.9886480908152735,
"grad_norm": 10.931973199978893,
"learning_rate": 5.191815501343067e-10,
"loss": 0.4945456385612488,
"step": 2896
},
{
"epoch": 2.9896800825593397,
"grad_norm": 9.277216206985075,
"learning_rate": 4.362579251204757e-10,
"loss": 0.703812837600708,
"step": 2897
},
{
"epoch": 2.9907120743034055,
"grad_norm": 8.155754435970792,
"learning_rate": 3.605446497279008e-10,
"loss": 0.34644225239753723,
"step": 2898
},
{
"epoch": 2.9917440660474717,
"grad_norm": 8.169228239958668,
"learning_rate": 2.920418331514574e-10,
"loss": 0.27196234464645386,
"step": 2899
},
{
"epoch": 2.992776057791538,
"grad_norm": 7.080576518911739,
"learning_rate": 2.307495741843413e-10,
"loss": 0.19420018792152405,
"step": 2900
},
{
"epoch": 2.9938080495356036,
"grad_norm": 28.391027360704857,
"learning_rate": 1.766679612219546e-10,
"loss": 1.4822670221328735,
"step": 2901
},
{
"epoch": 2.99484004127967,
"grad_norm": 16.511890158796366,
"learning_rate": 1.2979707226135063e-10,
"loss": 2.818498373031616,
"step": 2902
},
{
"epoch": 2.995872033023736,
"grad_norm": 7.117609068184708,
"learning_rate": 9.013697489956841e-11,
"loss": 0.45413410663604736,
"step": 2903
},
{
"epoch": 2.996904024767802,
"grad_norm": 12.77729655477497,
"learning_rate": 5.768772633363284e-11,
"loss": 0.9320136308670044,
"step": 2904
},
{
"epoch": 2.997936016511868,
"grad_norm": 13.631618562372143,
"learning_rate": 3.244937336166487e-11,
"loss": 0.9153477549552917,
"step": 2905
},
{
"epoch": 2.998968008255934,
"grad_norm": 5.890317723699965,
"learning_rate": 1.4421952382881466e-11,
"loss": 0.17859113216400146,
"step": 2906
},
{
"epoch": 3.0,
"grad_norm": 5.446335325117752,
"learning_rate": 3.6054893953751947e-12,
"loss": 0.2771958112716675,
"step": 2907
},
{
"epoch": 3.0,
"step": 2907,
"total_flos": 7768633466880.0,
"train_loss": 1.6535469640083522,
"train_runtime": 3364.7928,
"train_samples_per_second": 3.453,
"train_steps_per_second": 0.864
}
],
"logging_steps": 1,
"max_steps": 2907,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7768633466880.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}