Files
llama3-8b-full-pretrain-was…/trainer_state.json
ModelHub XC 40d1f746eb 初始化项目,由ModelHub XC社区提供模型
Model: shuoxing/llama3-8b-full-pretrain-wash-c4-3-9m-bs4
Source: Original Platform
2026-06-12 17:28:17 +08:00

43367 lines
1.1 MiB

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 6189,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0004847309743092584,
"grad_norm": 41.02025269985854,
"learning_rate": 0.0,
"loss": 4.26969051361084,
"step": 1
},
{
"epoch": 0.0009694619486185168,
"grad_norm": 36.22597395859766,
"learning_rate": 1.6155088852988694e-08,
"loss": 3.5956547260284424,
"step": 2
},
{
"epoch": 0.001454192922927775,
"grad_norm": 30.46392263426707,
"learning_rate": 3.231017770597739e-08,
"loss": 3.9731106758117676,
"step": 3
},
{
"epoch": 0.0019389238972370335,
"grad_norm": 41.59717921207695,
"learning_rate": 4.846526655896608e-08,
"loss": 4.449781894683838,
"step": 4
},
{
"epoch": 0.0024236548715462916,
"grad_norm": 32.122455921411564,
"learning_rate": 6.462035541195477e-08,
"loss": 3.543102741241455,
"step": 5
},
{
"epoch": 0.00290838584585555,
"grad_norm": 39.064071675261836,
"learning_rate": 8.077544426494346e-08,
"loss": 4.133577823638916,
"step": 6
},
{
"epoch": 0.0033931168201648087,
"grad_norm": 46.35334984573486,
"learning_rate": 9.693053311793216e-08,
"loss": 4.824061393737793,
"step": 7
},
{
"epoch": 0.003877847794474067,
"grad_norm": 40.417636227060406,
"learning_rate": 1.1308562197092085e-07,
"loss": 3.9582011699676514,
"step": 8
},
{
"epoch": 0.004362578768783325,
"grad_norm": 35.36216741061857,
"learning_rate": 1.2924071082390955e-07,
"loss": 4.5889892578125,
"step": 9
},
{
"epoch": 0.004847309743092583,
"grad_norm": 36.22157036706304,
"learning_rate": 1.4539579967689823e-07,
"loss": 4.174090385437012,
"step": 10
},
{
"epoch": 0.005332040717401842,
"grad_norm": 36.11964281797753,
"learning_rate": 1.6155088852988693e-07,
"loss": 4.614691734313965,
"step": 11
},
{
"epoch": 0.0058167716917111,
"grad_norm": 26.834559022808097,
"learning_rate": 1.7770597738287563e-07,
"loss": 3.6251485347747803,
"step": 12
},
{
"epoch": 0.006301502666020358,
"grad_norm": 45.926670669218126,
"learning_rate": 1.938610662358643e-07,
"loss": 4.623769283294678,
"step": 13
},
{
"epoch": 0.0067862336403296175,
"grad_norm": 34.09695560708778,
"learning_rate": 2.1001615508885301e-07,
"loss": 3.941284656524658,
"step": 14
},
{
"epoch": 0.007270964614638876,
"grad_norm": 28.382932434274228,
"learning_rate": 2.261712439418417e-07,
"loss": 2.572709560394287,
"step": 15
},
{
"epoch": 0.007755695588948134,
"grad_norm": 31.904350168720484,
"learning_rate": 2.4232633279483037e-07,
"loss": 4.603989601135254,
"step": 16
},
{
"epoch": 0.008240426563257392,
"grad_norm": 37.417475166059184,
"learning_rate": 2.584814216478191e-07,
"loss": 4.063669681549072,
"step": 17
},
{
"epoch": 0.00872515753756665,
"grad_norm": 33.55151162843232,
"learning_rate": 2.746365105008078e-07,
"loss": 3.3943214416503906,
"step": 18
},
{
"epoch": 0.009209888511875909,
"grad_norm": 42.19696768757933,
"learning_rate": 2.9079159935379645e-07,
"loss": 4.437129497528076,
"step": 19
},
{
"epoch": 0.009694619486185167,
"grad_norm": 37.02881726556255,
"learning_rate": 3.0694668820678513e-07,
"loss": 4.322831153869629,
"step": 20
},
{
"epoch": 0.010179350460494426,
"grad_norm": 41.72690827981763,
"learning_rate": 3.2310177705977386e-07,
"loss": 4.623260498046875,
"step": 21
},
{
"epoch": 0.010664081434803683,
"grad_norm": 43.699707969884564,
"learning_rate": 3.392568659127626e-07,
"loss": 4.2672529220581055,
"step": 22
},
{
"epoch": 0.011148812409112942,
"grad_norm": 37.73987452452978,
"learning_rate": 3.5541195476575127e-07,
"loss": 3.85565185546875,
"step": 23
},
{
"epoch": 0.0116335433834222,
"grad_norm": 34.94393413281458,
"learning_rate": 3.715670436187399e-07,
"loss": 3.7854933738708496,
"step": 24
},
{
"epoch": 0.012118274357731459,
"grad_norm": 33.57223768180529,
"learning_rate": 3.877221324717286e-07,
"loss": 3.697699546813965,
"step": 25
},
{
"epoch": 0.012603005332040717,
"grad_norm": 36.177938871937315,
"learning_rate": 4.038772213247173e-07,
"loss": 4.181012153625488,
"step": 26
},
{
"epoch": 0.013087736306349976,
"grad_norm": 26.551940996329098,
"learning_rate": 4.2003231017770603e-07,
"loss": 3.9852514266967773,
"step": 27
},
{
"epoch": 0.013572467280659235,
"grad_norm": 31.862498366961713,
"learning_rate": 4.3618739903069476e-07,
"loss": 4.294615268707275,
"step": 28
},
{
"epoch": 0.014057198254968492,
"grad_norm": 79.62810315144702,
"learning_rate": 4.523424878836834e-07,
"loss": 4.653779983520508,
"step": 29
},
{
"epoch": 0.014541929229277752,
"grad_norm": 35.92347294004075,
"learning_rate": 4.6849757673667206e-07,
"loss": 4.345451831817627,
"step": 30
},
{
"epoch": 0.015026660203587009,
"grad_norm": 33.62670888570208,
"learning_rate": 4.846526655896607e-07,
"loss": 4.026397705078125,
"step": 31
},
{
"epoch": 0.015511391177896268,
"grad_norm": 28.748391432727917,
"learning_rate": 5.008077544426495e-07,
"loss": 4.67657995223999,
"step": 32
},
{
"epoch": 0.015996122152205527,
"grad_norm": 35.147155911742566,
"learning_rate": 5.169628432956382e-07,
"loss": 4.704855918884277,
"step": 33
},
{
"epoch": 0.016480853126514785,
"grad_norm": 42.44759608165818,
"learning_rate": 5.331179321486268e-07,
"loss": 4.945542812347412,
"step": 34
},
{
"epoch": 0.016965584100824042,
"grad_norm": 34.407676909234105,
"learning_rate": 5.492730210016156e-07,
"loss": 3.6859307289123535,
"step": 35
},
{
"epoch": 0.0174503150751333,
"grad_norm": 25.027471909774267,
"learning_rate": 5.654281098546043e-07,
"loss": 3.987711191177368,
"step": 36
},
{
"epoch": 0.01793504604944256,
"grad_norm": 26.14265193126162,
"learning_rate": 5.815831987075929e-07,
"loss": 4.079133033752441,
"step": 37
},
{
"epoch": 0.018419777023751818,
"grad_norm": 24.171806239797384,
"learning_rate": 5.977382875605816e-07,
"loss": 3.8736090660095215,
"step": 38
},
{
"epoch": 0.018904507998061076,
"grad_norm": 44.175610821242216,
"learning_rate": 6.138933764135703e-07,
"loss": 4.262664318084717,
"step": 39
},
{
"epoch": 0.019389238972370333,
"grad_norm": 28.38030044763542,
"learning_rate": 6.300484652665591e-07,
"loss": 4.104944229125977,
"step": 40
},
{
"epoch": 0.019873969946679594,
"grad_norm": 22.67781490605589,
"learning_rate": 6.462035541195477e-07,
"loss": 3.9127256870269775,
"step": 41
},
{
"epoch": 0.02035870092098885,
"grad_norm": 37.26420175855039,
"learning_rate": 6.623586429725363e-07,
"loss": 3.270333766937256,
"step": 42
},
{
"epoch": 0.02084343189529811,
"grad_norm": 33.142735029663825,
"learning_rate": 6.785137318255252e-07,
"loss": 3.832284927368164,
"step": 43
},
{
"epoch": 0.021328162869607366,
"grad_norm": 26.56721478862478,
"learning_rate": 6.946688206785138e-07,
"loss": 3.6056270599365234,
"step": 44
},
{
"epoch": 0.021812893843916627,
"grad_norm": 15.579382978326848,
"learning_rate": 7.108239095315025e-07,
"loss": 3.686903953552246,
"step": 45
},
{
"epoch": 0.022297624818225885,
"grad_norm": 23.439811528346663,
"learning_rate": 7.269789983844912e-07,
"loss": 3.569413423538208,
"step": 46
},
{
"epoch": 0.022782355792535142,
"grad_norm": 27.415712168614196,
"learning_rate": 7.431340872374798e-07,
"loss": 4.23013973236084,
"step": 47
},
{
"epoch": 0.0232670867668444,
"grad_norm": 19.428040854127147,
"learning_rate": 7.592891760904686e-07,
"loss": 4.022500038146973,
"step": 48
},
{
"epoch": 0.02375181774115366,
"grad_norm": 16.22913770219433,
"learning_rate": 7.754442649434572e-07,
"loss": 3.244152545928955,
"step": 49
},
{
"epoch": 0.024236548715462918,
"grad_norm": 22.569081291493237,
"learning_rate": 7.91599353796446e-07,
"loss": 3.151660919189453,
"step": 50
},
{
"epoch": 0.024721279689772176,
"grad_norm": 16.17420010236338,
"learning_rate": 8.077544426494346e-07,
"loss": 3.4667253494262695,
"step": 51
},
{
"epoch": 0.025206010664081433,
"grad_norm": 12.00442742775893,
"learning_rate": 8.239095315024233e-07,
"loss": 3.7005205154418945,
"step": 52
},
{
"epoch": 0.025690741638390694,
"grad_norm": 24.808369710089025,
"learning_rate": 8.400646203554121e-07,
"loss": 3.8304450511932373,
"step": 53
},
{
"epoch": 0.02617547261269995,
"grad_norm": 15.317993342462627,
"learning_rate": 8.562197092084007e-07,
"loss": 3.4527809619903564,
"step": 54
},
{
"epoch": 0.02666020358700921,
"grad_norm": 11.009941841251534,
"learning_rate": 8.723747980613895e-07,
"loss": 3.589771032333374,
"step": 55
},
{
"epoch": 0.02714493456131847,
"grad_norm": 17.488476626195133,
"learning_rate": 8.885298869143781e-07,
"loss": 4.015233993530273,
"step": 56
},
{
"epoch": 0.027629665535627727,
"grad_norm": 23.643192597163107,
"learning_rate": 9.046849757673668e-07,
"loss": 3.7982656955718994,
"step": 57
},
{
"epoch": 0.028114396509936985,
"grad_norm": 14.392154339207867,
"learning_rate": 9.208400646203555e-07,
"loss": 3.076981544494629,
"step": 58
},
{
"epoch": 0.028599127484246242,
"grad_norm": 18.091385640210703,
"learning_rate": 9.369951534733441e-07,
"loss": 3.4630393981933594,
"step": 59
},
{
"epoch": 0.029083858458555503,
"grad_norm": 13.321923427576056,
"learning_rate": 9.53150242326333e-07,
"loss": 3.321859836578369,
"step": 60
},
{
"epoch": 0.02956858943286476,
"grad_norm": 20.59679719749177,
"learning_rate": 9.693053311793215e-07,
"loss": 3.536377191543579,
"step": 61
},
{
"epoch": 0.030053320407174018,
"grad_norm": 15.23853498208797,
"learning_rate": 9.854604200323102e-07,
"loss": 3.361931085586548,
"step": 62
},
{
"epoch": 0.030538051381483276,
"grad_norm": 24.667202964235436,
"learning_rate": 1.001615508885299e-06,
"loss": 3.5399508476257324,
"step": 63
},
{
"epoch": 0.031022782355792537,
"grad_norm": 13.839173595985677,
"learning_rate": 1.0177705977382877e-06,
"loss": 3.0816116333007812,
"step": 64
},
{
"epoch": 0.031507513330101794,
"grad_norm": 12.90614878663486,
"learning_rate": 1.0339256865912764e-06,
"loss": 3.465927839279175,
"step": 65
},
{
"epoch": 0.031992244304411055,
"grad_norm": 18.67095552001039,
"learning_rate": 1.0500807754442651e-06,
"loss": 3.1942691802978516,
"step": 66
},
{
"epoch": 0.03247697527872031,
"grad_norm": 9.357055247817545,
"learning_rate": 1.0662358642972536e-06,
"loss": 3.4150538444519043,
"step": 67
},
{
"epoch": 0.03296170625302957,
"grad_norm": 31.180234140560053,
"learning_rate": 1.0823909531502424e-06,
"loss": 3.182727098464966,
"step": 68
},
{
"epoch": 0.033446437227338824,
"grad_norm": 13.339453390334997,
"learning_rate": 1.098546042003231e-06,
"loss": 3.623173713684082,
"step": 69
},
{
"epoch": 0.033931168201648085,
"grad_norm": 21.83502970527788,
"learning_rate": 1.1147011308562198e-06,
"loss": 3.6481504440307617,
"step": 70
},
{
"epoch": 0.034415899175957346,
"grad_norm": 10.989096845214132,
"learning_rate": 1.1308562197092086e-06,
"loss": 3.2327418327331543,
"step": 71
},
{
"epoch": 0.0349006301502666,
"grad_norm": 9.94786739046521,
"learning_rate": 1.147011308562197e-06,
"loss": 3.23703932762146,
"step": 72
},
{
"epoch": 0.03538536112457586,
"grad_norm": 23.6729045434767,
"learning_rate": 1.1631663974151858e-06,
"loss": 3.0757534503936768,
"step": 73
},
{
"epoch": 0.03587009209888512,
"grad_norm": 18.654379244908093,
"learning_rate": 1.1793214862681745e-06,
"loss": 3.4029648303985596,
"step": 74
},
{
"epoch": 0.036354823073194376,
"grad_norm": 17.1922498548421,
"learning_rate": 1.1954765751211633e-06,
"loss": 3.217639446258545,
"step": 75
},
{
"epoch": 0.036839554047503636,
"grad_norm": 20.556236005652956,
"learning_rate": 1.211631663974152e-06,
"loss": 3.4199254512786865,
"step": 76
},
{
"epoch": 0.03732428502181289,
"grad_norm": 12.465628747100476,
"learning_rate": 1.2277867528271405e-06,
"loss": 3.730072259902954,
"step": 77
},
{
"epoch": 0.03780901599612215,
"grad_norm": 10.478427376415176,
"learning_rate": 1.2439418416801295e-06,
"loss": 3.3886313438415527,
"step": 78
},
{
"epoch": 0.03829374697043141,
"grad_norm": 9.457694900585151,
"learning_rate": 1.2600969305331182e-06,
"loss": 3.03110408782959,
"step": 79
},
{
"epoch": 0.038778477944740666,
"grad_norm": 15.011911047268724,
"learning_rate": 1.2762520193861067e-06,
"loss": 3.437314987182617,
"step": 80
},
{
"epoch": 0.03926320891904993,
"grad_norm": 22.552901292204428,
"learning_rate": 1.2924071082390954e-06,
"loss": 3.71456241607666,
"step": 81
},
{
"epoch": 0.03974793989335919,
"grad_norm": 16.108512391234946,
"learning_rate": 1.3085621970920842e-06,
"loss": 3.789304256439209,
"step": 82
},
{
"epoch": 0.04023267086766844,
"grad_norm": 23.997931655335243,
"learning_rate": 1.3247172859450727e-06,
"loss": 3.9037415981292725,
"step": 83
},
{
"epoch": 0.0407174018419777,
"grad_norm": 13.385270070542717,
"learning_rate": 1.3408723747980614e-06,
"loss": 3.2644453048706055,
"step": 84
},
{
"epoch": 0.041202132816286964,
"grad_norm": 14.961659983122154,
"learning_rate": 1.3570274636510504e-06,
"loss": 3.045938491821289,
"step": 85
},
{
"epoch": 0.04168686379059622,
"grad_norm": 14.647831264043164,
"learning_rate": 1.3731825525040387e-06,
"loss": 2.9740500450134277,
"step": 86
},
{
"epoch": 0.04217159476490548,
"grad_norm": 9.753941653635998,
"learning_rate": 1.3893376413570276e-06,
"loss": 2.963986396789551,
"step": 87
},
{
"epoch": 0.04265632573921473,
"grad_norm": 19.515812977222115,
"learning_rate": 1.4054927302100163e-06,
"loss": 3.258944034576416,
"step": 88
},
{
"epoch": 0.043141056713523994,
"grad_norm": 16.02024620841785,
"learning_rate": 1.421647819063005e-06,
"loss": 3.562412977218628,
"step": 89
},
{
"epoch": 0.043625787687833255,
"grad_norm": 13.34364021976473,
"learning_rate": 1.4378029079159936e-06,
"loss": 3.612384557723999,
"step": 90
},
{
"epoch": 0.04411051866214251,
"grad_norm": 9.598620448032266,
"learning_rate": 1.4539579967689823e-06,
"loss": 3.4110207557678223,
"step": 91
},
{
"epoch": 0.04459524963645177,
"grad_norm": 8.024224003010092,
"learning_rate": 1.470113085621971e-06,
"loss": 3.188340425491333,
"step": 92
},
{
"epoch": 0.04507998061076103,
"grad_norm": 13.110727483199966,
"learning_rate": 1.4862681744749596e-06,
"loss": 3.2462472915649414,
"step": 93
},
{
"epoch": 0.045564711585070285,
"grad_norm": 10.87786830763328,
"learning_rate": 1.5024232633279485e-06,
"loss": 3.1422948837280273,
"step": 94
},
{
"epoch": 0.046049442559379546,
"grad_norm": 7.809468236543284,
"learning_rate": 1.5185783521809372e-06,
"loss": 3.4546501636505127,
"step": 95
},
{
"epoch": 0.0465341735336888,
"grad_norm": 12.646823957055302,
"learning_rate": 1.5347334410339258e-06,
"loss": 3.3679776191711426,
"step": 96
},
{
"epoch": 0.04701890450799806,
"grad_norm": 11.49010120390962,
"learning_rate": 1.5508885298869145e-06,
"loss": 3.351832151412964,
"step": 97
},
{
"epoch": 0.04750363548230732,
"grad_norm": 11.210349432361404,
"learning_rate": 1.5670436187399032e-06,
"loss": 2.757481098175049,
"step": 98
},
{
"epoch": 0.047988366456616575,
"grad_norm": 10.795985470339627,
"learning_rate": 1.583198707592892e-06,
"loss": 3.0598981380462646,
"step": 99
},
{
"epoch": 0.048473097430925836,
"grad_norm": 10.826239548829268,
"learning_rate": 1.5993537964458805e-06,
"loss": 3.368502616882324,
"step": 100
},
{
"epoch": 0.0489578284052351,
"grad_norm": 15.552737009898225,
"learning_rate": 1.6155088852988692e-06,
"loss": 3.1896557807922363,
"step": 101
},
{
"epoch": 0.04944255937954435,
"grad_norm": 10.799458855875775,
"learning_rate": 1.6316639741518581e-06,
"loss": 3.1506450176239014,
"step": 102
},
{
"epoch": 0.04992729035385361,
"grad_norm": 11.635711194681248,
"learning_rate": 1.6478190630048467e-06,
"loss": 3.4445507526397705,
"step": 103
},
{
"epoch": 0.050412021328162866,
"grad_norm": 12.264465515149476,
"learning_rate": 1.6639741518578354e-06,
"loss": 2.9591221809387207,
"step": 104
},
{
"epoch": 0.05089675230247213,
"grad_norm": 14.831125996583987,
"learning_rate": 1.6801292407108241e-06,
"loss": 3.185213088989258,
"step": 105
},
{
"epoch": 0.05138148327678139,
"grad_norm": 16.061711253386235,
"learning_rate": 1.6962843295638126e-06,
"loss": 3.6954684257507324,
"step": 106
},
{
"epoch": 0.05186621425109064,
"grad_norm": 18.029427692167946,
"learning_rate": 1.7124394184168014e-06,
"loss": 2.9725430011749268,
"step": 107
},
{
"epoch": 0.0523509452253999,
"grad_norm": 16.12430518498283,
"learning_rate": 1.72859450726979e-06,
"loss": 3.0694193840026855,
"step": 108
},
{
"epoch": 0.052835676199709164,
"grad_norm": 9.534480278966619,
"learning_rate": 1.744749596122779e-06,
"loss": 3.305382251739502,
"step": 109
},
{
"epoch": 0.05332040717401842,
"grad_norm": 11.458086558530551,
"learning_rate": 1.7609046849757673e-06,
"loss": 3.245755195617676,
"step": 110
},
{
"epoch": 0.05380513814832768,
"grad_norm": 12.770441241902928,
"learning_rate": 1.7770597738287563e-06,
"loss": 3.5494534969329834,
"step": 111
},
{
"epoch": 0.05428986912263694,
"grad_norm": 8.652852327297914,
"learning_rate": 1.793214862681745e-06,
"loss": 3.0800206661224365,
"step": 112
},
{
"epoch": 0.054774600096946194,
"grad_norm": 16.9992532246631,
"learning_rate": 1.8093699515347335e-06,
"loss": 3.2646520137786865,
"step": 113
},
{
"epoch": 0.055259331071255455,
"grad_norm": 13.18443029371436,
"learning_rate": 1.8255250403877223e-06,
"loss": 2.921024799346924,
"step": 114
},
{
"epoch": 0.05574406204556471,
"grad_norm": 19.517240209840427,
"learning_rate": 1.841680129240711e-06,
"loss": 3.195812702178955,
"step": 115
},
{
"epoch": 0.05622879301987397,
"grad_norm": 13.60289820335408,
"learning_rate": 1.8578352180936995e-06,
"loss": 3.475102424621582,
"step": 116
},
{
"epoch": 0.05671352399418323,
"grad_norm": 13.166855298977369,
"learning_rate": 1.8739903069466882e-06,
"loss": 3.0727787017822266,
"step": 117
},
{
"epoch": 0.057198254968492485,
"grad_norm": 13.928666462148799,
"learning_rate": 1.8901453957996772e-06,
"loss": 3.3913979530334473,
"step": 118
},
{
"epoch": 0.057682985942801746,
"grad_norm": 11.557485815169715,
"learning_rate": 1.906300484652666e-06,
"loss": 3.7547435760498047,
"step": 119
},
{
"epoch": 0.058167716917111006,
"grad_norm": 14.582767560865964,
"learning_rate": 1.9224555735056544e-06,
"loss": 2.996476650238037,
"step": 120
},
{
"epoch": 0.05865244789142026,
"grad_norm": 16.08577848239802,
"learning_rate": 1.938610662358643e-06,
"loss": 3.138495922088623,
"step": 121
},
{
"epoch": 0.05913717886572952,
"grad_norm": 18.0398986738145,
"learning_rate": 1.954765751211632e-06,
"loss": 2.7317428588867188,
"step": 122
},
{
"epoch": 0.059621909840038775,
"grad_norm": 12.326969132118187,
"learning_rate": 1.9709208400646204e-06,
"loss": 1.9785209894180298,
"step": 123
},
{
"epoch": 0.060106640814348036,
"grad_norm": 9.969733441990556,
"learning_rate": 1.9870759289176093e-06,
"loss": 3.369781017303467,
"step": 124
},
{
"epoch": 0.0605913717886573,
"grad_norm": 12.031839331715423,
"learning_rate": 2.003231017770598e-06,
"loss": 3.148988723754883,
"step": 125
},
{
"epoch": 0.06107610276296655,
"grad_norm": 15.990736156725701,
"learning_rate": 2.0193861066235864e-06,
"loss": 2.821732997894287,
"step": 126
},
{
"epoch": 0.06156083373727581,
"grad_norm": 11.310562958708081,
"learning_rate": 2.0355411954765753e-06,
"loss": 3.6513171195983887,
"step": 127
},
{
"epoch": 0.06204556471158507,
"grad_norm": 12.736877070701121,
"learning_rate": 2.051696284329564e-06,
"loss": 2.930734157562256,
"step": 128
},
{
"epoch": 0.06253029568589433,
"grad_norm": 17.0882880873924,
"learning_rate": 2.0678513731825528e-06,
"loss": 3.3475141525268555,
"step": 129
},
{
"epoch": 0.06301502666020359,
"grad_norm": 11.58996328169585,
"learning_rate": 2.0840064620355413e-06,
"loss": 3.287075996398926,
"step": 130
},
{
"epoch": 0.06349975763451285,
"grad_norm": 11.662742478858108,
"learning_rate": 2.1001615508885302e-06,
"loss": 3.131418466567993,
"step": 131
},
{
"epoch": 0.06398448860882211,
"grad_norm": 11.50097348600423,
"learning_rate": 2.1163166397415188e-06,
"loss": 3.0425357818603516,
"step": 132
},
{
"epoch": 0.06446921958313136,
"grad_norm": 13.041420122221252,
"learning_rate": 2.1324717285945073e-06,
"loss": 2.6074767112731934,
"step": 133
},
{
"epoch": 0.06495395055744062,
"grad_norm": 14.920380431163009,
"learning_rate": 2.1486268174474962e-06,
"loss": 3.015321731567383,
"step": 134
},
{
"epoch": 0.06543868153174988,
"grad_norm": 8.303006328218524,
"learning_rate": 2.1647819063004847e-06,
"loss": 3.3170082569122314,
"step": 135
},
{
"epoch": 0.06592341250605914,
"grad_norm": 12.328358817481968,
"learning_rate": 2.1809369951534733e-06,
"loss": 3.4430160522460938,
"step": 136
},
{
"epoch": 0.0664081434803684,
"grad_norm": 21.025269096154755,
"learning_rate": 2.197092084006462e-06,
"loss": 4.469269275665283,
"step": 137
},
{
"epoch": 0.06689287445467765,
"grad_norm": 14.6952127365395,
"learning_rate": 2.213247172859451e-06,
"loss": 3.3737614154815674,
"step": 138
},
{
"epoch": 0.06737760542898691,
"grad_norm": 13.67989382195369,
"learning_rate": 2.2294022617124397e-06,
"loss": 3.127131462097168,
"step": 139
},
{
"epoch": 0.06786233640329617,
"grad_norm": 11.092955166745599,
"learning_rate": 2.245557350565428e-06,
"loss": 2.797031879425049,
"step": 140
},
{
"epoch": 0.06834706737760543,
"grad_norm": 14.517509945787966,
"learning_rate": 2.261712439418417e-06,
"loss": 2.855074167251587,
"step": 141
},
{
"epoch": 0.06883179835191469,
"grad_norm": 13.215337302699112,
"learning_rate": 2.2778675282714056e-06,
"loss": 3.22391939163208,
"step": 142
},
{
"epoch": 0.06931652932622395,
"grad_norm": 15.50999633512176,
"learning_rate": 2.294022617124394e-06,
"loss": 3.708390235900879,
"step": 143
},
{
"epoch": 0.0698012603005332,
"grad_norm": 12.046590999548037,
"learning_rate": 2.310177705977383e-06,
"loss": 2.885770082473755,
"step": 144
},
{
"epoch": 0.07028599127484246,
"grad_norm": 13.804372370981602,
"learning_rate": 2.3263327948303716e-06,
"loss": 3.26053524017334,
"step": 145
},
{
"epoch": 0.07077072224915172,
"grad_norm": 7.307697198602801,
"learning_rate": 2.34248788368336e-06,
"loss": 2.853951930999756,
"step": 146
},
{
"epoch": 0.07125545322346098,
"grad_norm": 8.665034469252905,
"learning_rate": 2.358642972536349e-06,
"loss": 2.770782947540283,
"step": 147
},
{
"epoch": 0.07174018419777024,
"grad_norm": 13.391384787726784,
"learning_rate": 2.374798061389338e-06,
"loss": 2.6117753982543945,
"step": 148
},
{
"epoch": 0.07222491517207949,
"grad_norm": 14.411967122746987,
"learning_rate": 2.3909531502423265e-06,
"loss": 2.8382580280303955,
"step": 149
},
{
"epoch": 0.07270964614638875,
"grad_norm": 16.196398190957385,
"learning_rate": 2.407108239095315e-06,
"loss": 3.3854668140411377,
"step": 150
},
{
"epoch": 0.07319437712069801,
"grad_norm": 12.381270275335288,
"learning_rate": 2.423263327948304e-06,
"loss": 3.22286319732666,
"step": 151
},
{
"epoch": 0.07367910809500727,
"grad_norm": 11.609219517483467,
"learning_rate": 2.4394184168012925e-06,
"loss": 3.2402470111846924,
"step": 152
},
{
"epoch": 0.07416383906931653,
"grad_norm": 8.368798019489049,
"learning_rate": 2.455573505654281e-06,
"loss": 2.983020782470703,
"step": 153
},
{
"epoch": 0.07464857004362578,
"grad_norm": 7.181226412405584,
"learning_rate": 2.47172859450727e-06,
"loss": 2.9759395122528076,
"step": 154
},
{
"epoch": 0.07513330101793504,
"grad_norm": 9.153889205215037,
"learning_rate": 2.487883683360259e-06,
"loss": 3.4874448776245117,
"step": 155
},
{
"epoch": 0.0756180319922443,
"grad_norm": 14.273188482618952,
"learning_rate": 2.5040387722132474e-06,
"loss": 3.231018543243408,
"step": 156
},
{
"epoch": 0.07610276296655356,
"grad_norm": 8.912255084265691,
"learning_rate": 2.5201938610662364e-06,
"loss": 3.0160276889801025,
"step": 157
},
{
"epoch": 0.07658749394086282,
"grad_norm": 13.508380296112772,
"learning_rate": 2.5363489499192245e-06,
"loss": 3.15554141998291,
"step": 158
},
{
"epoch": 0.07707222491517209,
"grad_norm": 9.739054751720852,
"learning_rate": 2.5525040387722134e-06,
"loss": 3.1272435188293457,
"step": 159
},
{
"epoch": 0.07755695588948133,
"grad_norm": 13.933815371009198,
"learning_rate": 2.568659127625202e-06,
"loss": 3.19451642036438,
"step": 160
},
{
"epoch": 0.0780416868637906,
"grad_norm": 10.11161341175865,
"learning_rate": 2.584814216478191e-06,
"loss": 2.7447381019592285,
"step": 161
},
{
"epoch": 0.07852641783809985,
"grad_norm": 13.08194812361127,
"learning_rate": 2.60096930533118e-06,
"loss": 2.6258203983306885,
"step": 162
},
{
"epoch": 0.07901114881240912,
"grad_norm": 12.601055016109909,
"learning_rate": 2.6171243941841683e-06,
"loss": 3.119978189468384,
"step": 163
},
{
"epoch": 0.07949587978671838,
"grad_norm": 16.00705580578286,
"learning_rate": 2.6332794830371573e-06,
"loss": 3.2668325901031494,
"step": 164
},
{
"epoch": 0.07998061076102762,
"grad_norm": 10.815523592544961,
"learning_rate": 2.6494345718901454e-06,
"loss": 2.5623724460601807,
"step": 165
},
{
"epoch": 0.08046534173533688,
"grad_norm": 8.250850686115394,
"learning_rate": 2.6655896607431343e-06,
"loss": 3.1041767597198486,
"step": 166
},
{
"epoch": 0.08095007270964615,
"grad_norm": 8.88939663467738,
"learning_rate": 2.681744749596123e-06,
"loss": 2.733553409576416,
"step": 167
},
{
"epoch": 0.0814348036839554,
"grad_norm": 10.099203142883557,
"learning_rate": 2.6978998384491118e-06,
"loss": 2.8755440711975098,
"step": 168
},
{
"epoch": 0.08191953465826467,
"grad_norm": 10.902033498462902,
"learning_rate": 2.7140549273021007e-06,
"loss": 2.4123501777648926,
"step": 169
},
{
"epoch": 0.08240426563257393,
"grad_norm": 12.470597726806629,
"learning_rate": 2.7302100161550892e-06,
"loss": 3.086699962615967,
"step": 170
},
{
"epoch": 0.08288899660688318,
"grad_norm": 14.505196662829134,
"learning_rate": 2.7463651050080773e-06,
"loss": 3.258992910385132,
"step": 171
},
{
"epoch": 0.08337372758119244,
"grad_norm": 12.855532446807144,
"learning_rate": 2.7625201938610663e-06,
"loss": 2.734248161315918,
"step": 172
},
{
"epoch": 0.0838584585555017,
"grad_norm": 15.343201289018836,
"learning_rate": 2.7786752827140552e-06,
"loss": 3.337332248687744,
"step": 173
},
{
"epoch": 0.08434318952981096,
"grad_norm": 13.324733914899012,
"learning_rate": 2.7948303715670437e-06,
"loss": 3.3533105850219727,
"step": 174
},
{
"epoch": 0.08482792050412022,
"grad_norm": 16.540754621834058,
"learning_rate": 2.8109854604200327e-06,
"loss": 2.740955114364624,
"step": 175
},
{
"epoch": 0.08531265147842947,
"grad_norm": 15.024671955719896,
"learning_rate": 2.827140549273021e-06,
"loss": 3.0808329582214355,
"step": 176
},
{
"epoch": 0.08579738245273873,
"grad_norm": 18.831556178507007,
"learning_rate": 2.84329563812601e-06,
"loss": 3.2246971130371094,
"step": 177
},
{
"epoch": 0.08628211342704799,
"grad_norm": 11.421439758594525,
"learning_rate": 2.8594507269789982e-06,
"loss": 2.5276894569396973,
"step": 178
},
{
"epoch": 0.08676684440135725,
"grad_norm": 10.166224659786417,
"learning_rate": 2.875605815831987e-06,
"loss": 3.3250131607055664,
"step": 179
},
{
"epoch": 0.08725157537566651,
"grad_norm": 13.22947143479082,
"learning_rate": 2.891760904684976e-06,
"loss": 3.007500648498535,
"step": 180
},
{
"epoch": 0.08773630634997576,
"grad_norm": 9.660440972496403,
"learning_rate": 2.9079159935379646e-06,
"loss": 3.008558750152588,
"step": 181
},
{
"epoch": 0.08822103732428502,
"grad_norm": 10.381021605602449,
"learning_rate": 2.9240710823909536e-06,
"loss": 3.1066818237304688,
"step": 182
},
{
"epoch": 0.08870576829859428,
"grad_norm": 9.250270606533972,
"learning_rate": 2.940226171243942e-06,
"loss": 2.4226036071777344,
"step": 183
},
{
"epoch": 0.08919049927290354,
"grad_norm": 12.26546860507811,
"learning_rate": 2.956381260096931e-06,
"loss": 3.002305507659912,
"step": 184
},
{
"epoch": 0.0896752302472128,
"grad_norm": 8.46321031638364,
"learning_rate": 2.972536348949919e-06,
"loss": 2.951932430267334,
"step": 185
},
{
"epoch": 0.09015996122152206,
"grad_norm": 19.21935784943363,
"learning_rate": 2.988691437802908e-06,
"loss": 3.064774751663208,
"step": 186
},
{
"epoch": 0.09064469219583131,
"grad_norm": 7.756052772917,
"learning_rate": 3.004846526655897e-06,
"loss": 2.8354992866516113,
"step": 187
},
{
"epoch": 0.09112942317014057,
"grad_norm": 16.001578653975013,
"learning_rate": 3.0210016155088855e-06,
"loss": 3.736851215362549,
"step": 188
},
{
"epoch": 0.09161415414444983,
"grad_norm": 11.368004589200726,
"learning_rate": 3.0371567043618745e-06,
"loss": 3.3956451416015625,
"step": 189
},
{
"epoch": 0.09209888511875909,
"grad_norm": 11.44729329161922,
"learning_rate": 3.053311793214863e-06,
"loss": 2.7637391090393066,
"step": 190
},
{
"epoch": 0.09258361609306835,
"grad_norm": 10.807752378104063,
"learning_rate": 3.0694668820678515e-06,
"loss": 2.9727437496185303,
"step": 191
},
{
"epoch": 0.0930683470673776,
"grad_norm": 15.747352400916292,
"learning_rate": 3.08562197092084e-06,
"loss": 3.2629332542419434,
"step": 192
},
{
"epoch": 0.09355307804168686,
"grad_norm": 10.333365937096605,
"learning_rate": 3.101777059773829e-06,
"loss": 3.795717239379883,
"step": 193
},
{
"epoch": 0.09403780901599612,
"grad_norm": 10.890889643807876,
"learning_rate": 3.117932148626818e-06,
"loss": 3.0094246864318848,
"step": 194
},
{
"epoch": 0.09452253999030538,
"grad_norm": 12.235490928289291,
"learning_rate": 3.1340872374798064e-06,
"loss": 3.0586698055267334,
"step": 195
},
{
"epoch": 0.09500727096461464,
"grad_norm": 18.12041678830306,
"learning_rate": 3.1502423263327954e-06,
"loss": 3.0235962867736816,
"step": 196
},
{
"epoch": 0.0954920019389239,
"grad_norm": 9.590039892681121,
"learning_rate": 3.166397415185784e-06,
"loss": 3.858149290084839,
"step": 197
},
{
"epoch": 0.09597673291323315,
"grad_norm": 10.828416150807273,
"learning_rate": 3.1825525040387724e-06,
"loss": 3.0043535232543945,
"step": 198
},
{
"epoch": 0.09646146388754241,
"grad_norm": 14.688374321668633,
"learning_rate": 3.198707592891761e-06,
"loss": 2.984731435775757,
"step": 199
},
{
"epoch": 0.09694619486185167,
"grad_norm": 16.69378597944891,
"learning_rate": 3.21486268174475e-06,
"loss": 3.0953354835510254,
"step": 200
},
{
"epoch": 0.09743092583616093,
"grad_norm": 23.847235763172947,
"learning_rate": 3.2310177705977384e-06,
"loss": 3.794921398162842,
"step": 201
},
{
"epoch": 0.0979156568104702,
"grad_norm": 10.16935573139639,
"learning_rate": 3.2471728594507273e-06,
"loss": 3.2532012462615967,
"step": 202
},
{
"epoch": 0.09840038778477944,
"grad_norm": 15.012137055804079,
"learning_rate": 3.2633279483037163e-06,
"loss": 2.903505802154541,
"step": 203
},
{
"epoch": 0.0988851187590887,
"grad_norm": 7.322333726627323,
"learning_rate": 3.2794830371567048e-06,
"loss": 3.159177303314209,
"step": 204
},
{
"epoch": 0.09936984973339796,
"grad_norm": 11.015662632489125,
"learning_rate": 3.2956381260096933e-06,
"loss": 2.710814952850342,
"step": 205
},
{
"epoch": 0.09985458070770722,
"grad_norm": 8.90113797107191,
"learning_rate": 3.311793214862682e-06,
"loss": 2.981245517730713,
"step": 206
},
{
"epoch": 0.10033931168201649,
"grad_norm": 8.084091501907102,
"learning_rate": 3.3279483037156708e-06,
"loss": 3.0578770637512207,
"step": 207
},
{
"epoch": 0.10082404265632573,
"grad_norm": 22.215327671344237,
"learning_rate": 3.3441033925686593e-06,
"loss": 3.4959142208099365,
"step": 208
},
{
"epoch": 0.101308773630635,
"grad_norm": 10.817702556780901,
"learning_rate": 3.3602584814216482e-06,
"loss": 2.8585562705993652,
"step": 209
},
{
"epoch": 0.10179350460494425,
"grad_norm": 13.136186755091607,
"learning_rate": 3.376413570274637e-06,
"loss": 3.232922315597534,
"step": 210
},
{
"epoch": 0.10227823557925352,
"grad_norm": 7.963752212697469,
"learning_rate": 3.3925686591276253e-06,
"loss": 2.636431932449341,
"step": 211
},
{
"epoch": 0.10276296655356278,
"grad_norm": 10.404630175809869,
"learning_rate": 3.408723747980614e-06,
"loss": 2.971284866333008,
"step": 212
},
{
"epoch": 0.10324769752787204,
"grad_norm": 17.496355030978666,
"learning_rate": 3.4248788368336027e-06,
"loss": 3.2348570823669434,
"step": 213
},
{
"epoch": 0.10373242850218128,
"grad_norm": 9.062398856731285,
"learning_rate": 3.4410339256865917e-06,
"loss": 2.3618509769439697,
"step": 214
},
{
"epoch": 0.10421715947649055,
"grad_norm": 16.035714901078457,
"learning_rate": 3.45718901453958e-06,
"loss": 2.328580379486084,
"step": 215
},
{
"epoch": 0.1047018904507998,
"grad_norm": 26.65083009947864,
"learning_rate": 3.473344103392569e-06,
"loss": 3.207963466644287,
"step": 216
},
{
"epoch": 0.10518662142510907,
"grad_norm": 12.644271177008887,
"learning_rate": 3.489499192245558e-06,
"loss": 2.040618896484375,
"step": 217
},
{
"epoch": 0.10567135239941833,
"grad_norm": 11.48773497312433,
"learning_rate": 3.505654281098546e-06,
"loss": 2.7529067993164062,
"step": 218
},
{
"epoch": 0.10615608337372757,
"grad_norm": 11.033372058738893,
"learning_rate": 3.5218093699515347e-06,
"loss": 2.9669387340545654,
"step": 219
},
{
"epoch": 0.10664081434803684,
"grad_norm": 8.799656465743377,
"learning_rate": 3.5379644588045236e-06,
"loss": 3.4235281944274902,
"step": 220
},
{
"epoch": 0.1071255453223461,
"grad_norm": 7.055248810040456,
"learning_rate": 3.5541195476575126e-06,
"loss": 2.8927550315856934,
"step": 221
},
{
"epoch": 0.10761027629665536,
"grad_norm": 13.732588936119319,
"learning_rate": 3.570274636510501e-06,
"loss": 3.4001431465148926,
"step": 222
},
{
"epoch": 0.10809500727096462,
"grad_norm": 16.839808764208986,
"learning_rate": 3.58642972536349e-06,
"loss": 3.2785227298736572,
"step": 223
},
{
"epoch": 0.10857973824527388,
"grad_norm": 19.451647923884735,
"learning_rate": 3.602584814216478e-06,
"loss": 2.2535414695739746,
"step": 224
},
{
"epoch": 0.10906446921958313,
"grad_norm": 8.607335852700993,
"learning_rate": 3.618739903069467e-06,
"loss": 2.4071905612945557,
"step": 225
},
{
"epoch": 0.10954920019389239,
"grad_norm": 19.05752856951135,
"learning_rate": 3.6348949919224556e-06,
"loss": 2.772110939025879,
"step": 226
},
{
"epoch": 0.11003393116820165,
"grad_norm": 9.088491805646095,
"learning_rate": 3.6510500807754445e-06,
"loss": 2.9669060707092285,
"step": 227
},
{
"epoch": 0.11051866214251091,
"grad_norm": 10.304787136671285,
"learning_rate": 3.6672051696284335e-06,
"loss": 3.098999500274658,
"step": 228
},
{
"epoch": 0.11100339311682017,
"grad_norm": 10.549382622224712,
"learning_rate": 3.683360258481422e-06,
"loss": 2.6162285804748535,
"step": 229
},
{
"epoch": 0.11148812409112942,
"grad_norm": 15.464277371496227,
"learning_rate": 3.699515347334411e-06,
"loss": 3.111013889312744,
"step": 230
},
{
"epoch": 0.11197285506543868,
"grad_norm": 19.306278603562372,
"learning_rate": 3.715670436187399e-06,
"loss": 2.5436673164367676,
"step": 231
},
{
"epoch": 0.11245758603974794,
"grad_norm": 10.98983177066707,
"learning_rate": 3.731825525040388e-06,
"loss": 2.6840600967407227,
"step": 232
},
{
"epoch": 0.1129423170140572,
"grad_norm": 12.644977640157304,
"learning_rate": 3.7479806138933765e-06,
"loss": 3.1348063945770264,
"step": 233
},
{
"epoch": 0.11342704798836646,
"grad_norm": 18.052729226020453,
"learning_rate": 3.7641357027463654e-06,
"loss": 3.1830317974090576,
"step": 234
},
{
"epoch": 0.11391177896267571,
"grad_norm": 20.812073468489963,
"learning_rate": 3.7802907915993544e-06,
"loss": 2.7999279499053955,
"step": 235
},
{
"epoch": 0.11439650993698497,
"grad_norm": 7.834924601039092,
"learning_rate": 3.796445880452343e-06,
"loss": 2.961503028869629,
"step": 236
},
{
"epoch": 0.11488124091129423,
"grad_norm": 23.46759328883997,
"learning_rate": 3.812600969305332e-06,
"loss": 3.6638612747192383,
"step": 237
},
{
"epoch": 0.11536597188560349,
"grad_norm": 10.385976283678888,
"learning_rate": 3.82875605815832e-06,
"loss": 3.095399856567383,
"step": 238
},
{
"epoch": 0.11585070285991275,
"grad_norm": 7.3736264577546065,
"learning_rate": 3.844911147011309e-06,
"loss": 3.2725958824157715,
"step": 239
},
{
"epoch": 0.11633543383422201,
"grad_norm": 9.956556847578513,
"learning_rate": 3.861066235864298e-06,
"loss": 3.4274494647979736,
"step": 240
},
{
"epoch": 0.11682016480853126,
"grad_norm": 10.072812740294227,
"learning_rate": 3.877221324717286e-06,
"loss": 3.33449649810791,
"step": 241
},
{
"epoch": 0.11730489578284052,
"grad_norm": 11.782842622632588,
"learning_rate": 3.893376413570275e-06,
"loss": 2.623849391937256,
"step": 242
},
{
"epoch": 0.11778962675714978,
"grad_norm": 15.40484184865702,
"learning_rate": 3.909531502423264e-06,
"loss": 2.741448402404785,
"step": 243
},
{
"epoch": 0.11827435773145904,
"grad_norm": 6.864753838948246,
"learning_rate": 3.925686591276252e-06,
"loss": 2.7037625312805176,
"step": 244
},
{
"epoch": 0.1187590887057683,
"grad_norm": 13.503043269507332,
"learning_rate": 3.941841680129241e-06,
"loss": 2.9884345531463623,
"step": 245
},
{
"epoch": 0.11924381968007755,
"grad_norm": 9.154026745115766,
"learning_rate": 3.95799676898223e-06,
"loss": 2.765995502471924,
"step": 246
},
{
"epoch": 0.11972855065438681,
"grad_norm": 12.50973203438904,
"learning_rate": 3.974151857835219e-06,
"loss": 2.9060816764831543,
"step": 247
},
{
"epoch": 0.12021328162869607,
"grad_norm": 8.80968797060637,
"learning_rate": 3.990306946688207e-06,
"loss": 2.837008237838745,
"step": 248
},
{
"epoch": 0.12069801260300533,
"grad_norm": 16.544253866192534,
"learning_rate": 4.006462035541196e-06,
"loss": 3.2112555503845215,
"step": 249
},
{
"epoch": 0.1211827435773146,
"grad_norm": 11.56637510238781,
"learning_rate": 4.022617124394185e-06,
"loss": 2.840733051300049,
"step": 250
},
{
"epoch": 0.12166747455162386,
"grad_norm": 8.046790597220351,
"learning_rate": 4.038772213247173e-06,
"loss": 3.130436420440674,
"step": 251
},
{
"epoch": 0.1221522055259331,
"grad_norm": 14.644956511272918,
"learning_rate": 4.054927302100162e-06,
"loss": 3.4662694931030273,
"step": 252
},
{
"epoch": 0.12263693650024236,
"grad_norm": 9.606091564353024,
"learning_rate": 4.071082390953151e-06,
"loss": 2.256847858428955,
"step": 253
},
{
"epoch": 0.12312166747455162,
"grad_norm": 31.486417716006752,
"learning_rate": 4.08723747980614e-06,
"loss": 2.7758665084838867,
"step": 254
},
{
"epoch": 0.12360639844886089,
"grad_norm": 11.040035390808104,
"learning_rate": 4.103392568659128e-06,
"loss": 2.8123860359191895,
"step": 255
},
{
"epoch": 0.12409112942317015,
"grad_norm": 20.39533809482724,
"learning_rate": 4.119547657512117e-06,
"loss": 2.7829513549804688,
"step": 256
},
{
"epoch": 0.1245758603974794,
"grad_norm": 18.556501293931788,
"learning_rate": 4.1357027463651056e-06,
"loss": 3.6441197395324707,
"step": 257
},
{
"epoch": 0.12506059137178865,
"grad_norm": 11.324661770424354,
"learning_rate": 4.151857835218094e-06,
"loss": 2.9442787170410156,
"step": 258
},
{
"epoch": 0.12554532234609792,
"grad_norm": 9.588401966283959,
"learning_rate": 4.168012924071083e-06,
"loss": 2.6889820098876953,
"step": 259
},
{
"epoch": 0.12603005332040718,
"grad_norm": 16.53837487899961,
"learning_rate": 4.1841680129240716e-06,
"loss": 2.9280219078063965,
"step": 260
},
{
"epoch": 0.12651478429471644,
"grad_norm": 21.784663678000427,
"learning_rate": 4.2003231017770605e-06,
"loss": 2.9163925647735596,
"step": 261
},
{
"epoch": 0.1269995152690257,
"grad_norm": 10.702089291558543,
"learning_rate": 4.216478190630049e-06,
"loss": 2.6200692653656006,
"step": 262
},
{
"epoch": 0.12748424624333496,
"grad_norm": 10.753039658915597,
"learning_rate": 4.2326332794830375e-06,
"loss": 2.7478623390197754,
"step": 263
},
{
"epoch": 0.12796897721764422,
"grad_norm": 16.021524687304836,
"learning_rate": 4.248788368336026e-06,
"loss": 2.4477007389068604,
"step": 264
},
{
"epoch": 0.12845370819195345,
"grad_norm": 10.9380967337022,
"learning_rate": 4.2649434571890146e-06,
"loss": 2.7276508808135986,
"step": 265
},
{
"epoch": 0.1289384391662627,
"grad_norm": 12.535242746190466,
"learning_rate": 4.2810985460420035e-06,
"loss": 2.9811928272247314,
"step": 266
},
{
"epoch": 0.12942317014057197,
"grad_norm": 11.583139988379001,
"learning_rate": 4.2972536348949925e-06,
"loss": 3.3619511127471924,
"step": 267
},
{
"epoch": 0.12990790111488124,
"grad_norm": 8.86962024336899,
"learning_rate": 4.313408723747981e-06,
"loss": 2.8336644172668457,
"step": 268
},
{
"epoch": 0.1303926320891905,
"grad_norm": 11.404990119454828,
"learning_rate": 4.3295638126009695e-06,
"loss": 3.22733736038208,
"step": 269
},
{
"epoch": 0.13087736306349976,
"grad_norm": 13.706573313515005,
"learning_rate": 4.3457189014539584e-06,
"loss": 3.2316033840179443,
"step": 270
},
{
"epoch": 0.13136209403780902,
"grad_norm": 11.770856851217223,
"learning_rate": 4.3618739903069465e-06,
"loss": 3.2562499046325684,
"step": 271
},
{
"epoch": 0.13184682501211828,
"grad_norm": 16.971927832614163,
"learning_rate": 4.3780290791599355e-06,
"loss": 2.8616416454315186,
"step": 272
},
{
"epoch": 0.13233155598642754,
"grad_norm": 32.14909633203204,
"learning_rate": 4.394184168012924e-06,
"loss": 2.7879626750946045,
"step": 273
},
{
"epoch": 0.1328162869607368,
"grad_norm": 11.757661654443078,
"learning_rate": 4.410339256865913e-06,
"loss": 2.759215831756592,
"step": 274
},
{
"epoch": 0.13330101793504606,
"grad_norm": 12.45167507666977,
"learning_rate": 4.426494345718902e-06,
"loss": 2.9355406761169434,
"step": 275
},
{
"epoch": 0.1337857489093553,
"grad_norm": 5.551667906894262,
"learning_rate": 4.44264943457189e-06,
"loss": 2.1405885219573975,
"step": 276
},
{
"epoch": 0.13427047988366456,
"grad_norm": 12.301436048014093,
"learning_rate": 4.458804523424879e-06,
"loss": 3.865417718887329,
"step": 277
},
{
"epoch": 0.13475521085797382,
"grad_norm": 9.204407874104009,
"learning_rate": 4.4749596122778674e-06,
"loss": 2.7252397537231445,
"step": 278
},
{
"epoch": 0.13523994183228308,
"grad_norm": 13.768207990523386,
"learning_rate": 4.491114701130856e-06,
"loss": 3.03243350982666,
"step": 279
},
{
"epoch": 0.13572467280659234,
"grad_norm": 11.548644304860165,
"learning_rate": 4.507269789983845e-06,
"loss": 3.12099289894104,
"step": 280
},
{
"epoch": 0.1362094037809016,
"grad_norm": 10.696223758727765,
"learning_rate": 4.523424878836834e-06,
"loss": 2.3988802433013916,
"step": 281
},
{
"epoch": 0.13669413475521086,
"grad_norm": 22.455939548418193,
"learning_rate": 4.539579967689823e-06,
"loss": 3.1426260471343994,
"step": 282
},
{
"epoch": 0.13717886572952012,
"grad_norm": 9.61871176583177,
"learning_rate": 4.555735056542811e-06,
"loss": 2.69942045211792,
"step": 283
},
{
"epoch": 0.13766359670382938,
"grad_norm": 8.41721236650176,
"learning_rate": 4.571890145395799e-06,
"loss": 3.2619197368621826,
"step": 284
},
{
"epoch": 0.13814832767813864,
"grad_norm": 10.887800419232402,
"learning_rate": 4.588045234248788e-06,
"loss": 2.460340976715088,
"step": 285
},
{
"epoch": 0.1386330586524479,
"grad_norm": 10.720162722180769,
"learning_rate": 4.604200323101777e-06,
"loss": 2.907320261001587,
"step": 286
},
{
"epoch": 0.13911778962675714,
"grad_norm": 8.06066485866737,
"learning_rate": 4.620355411954766e-06,
"loss": 2.9075984954833984,
"step": 287
},
{
"epoch": 0.1396025206010664,
"grad_norm": 8.452545448776615,
"learning_rate": 4.636510500807755e-06,
"loss": 3.872378349304199,
"step": 288
},
{
"epoch": 0.14008725157537566,
"grad_norm": 11.633005727410925,
"learning_rate": 4.652665589660743e-06,
"loss": 3.154317855834961,
"step": 289
},
{
"epoch": 0.14057198254968492,
"grad_norm": 11.340421720053817,
"learning_rate": 4.668820678513732e-06,
"loss": 2.846569538116455,
"step": 290
},
{
"epoch": 0.14105671352399418,
"grad_norm": 18.74591002578279,
"learning_rate": 4.68497576736672e-06,
"loss": 2.52746319770813,
"step": 291
},
{
"epoch": 0.14154144449830344,
"grad_norm": 9.355727019055145,
"learning_rate": 4.701130856219709e-06,
"loss": 3.0335590839385986,
"step": 292
},
{
"epoch": 0.1420261754726127,
"grad_norm": 9.945079504852059,
"learning_rate": 4.717285945072698e-06,
"loss": 2.718900680541992,
"step": 293
},
{
"epoch": 0.14251090644692196,
"grad_norm": 13.816238621824391,
"learning_rate": 4.733441033925687e-06,
"loss": 2.8767290115356445,
"step": 294
},
{
"epoch": 0.14299563742123123,
"grad_norm": 10.504768722150754,
"learning_rate": 4.749596122778676e-06,
"loss": 3.461036443710327,
"step": 295
},
{
"epoch": 0.1434803683955405,
"grad_norm": 22.7347839754867,
"learning_rate": 4.765751211631664e-06,
"loss": 3.3491454124450684,
"step": 296
},
{
"epoch": 0.14396509936984975,
"grad_norm": 18.00770363363556,
"learning_rate": 4.781906300484653e-06,
"loss": 3.499770164489746,
"step": 297
},
{
"epoch": 0.14444983034415898,
"grad_norm": 14.208433714133246,
"learning_rate": 4.798061389337641e-06,
"loss": 2.979475259780884,
"step": 298
},
{
"epoch": 0.14493456131846824,
"grad_norm": 17.232869404364244,
"learning_rate": 4.81421647819063e-06,
"loss": 2.8901870250701904,
"step": 299
},
{
"epoch": 0.1454192922927775,
"grad_norm": 10.53512006465038,
"learning_rate": 4.830371567043619e-06,
"loss": 3.5578670501708984,
"step": 300
},
{
"epoch": 0.14590402326708676,
"grad_norm": 15.865632939445385,
"learning_rate": 4.846526655896608e-06,
"loss": 3.314779758453369,
"step": 301
},
{
"epoch": 0.14638875424139602,
"grad_norm": 13.970653327967632,
"learning_rate": 4.862681744749597e-06,
"loss": 2.549731969833374,
"step": 302
},
{
"epoch": 0.14687348521570529,
"grad_norm": 18.877611233061277,
"learning_rate": 4.878836833602585e-06,
"loss": 2.6834683418273926,
"step": 303
},
{
"epoch": 0.14735821619001455,
"grad_norm": 20.09029800944155,
"learning_rate": 4.894991922455574e-06,
"loss": 3.2907803058624268,
"step": 304
},
{
"epoch": 0.1478429471643238,
"grad_norm": 12.355184904761568,
"learning_rate": 4.911147011308562e-06,
"loss": 3.0311193466186523,
"step": 305
},
{
"epoch": 0.14832767813863307,
"grad_norm": 8.567273349485928,
"learning_rate": 4.927302100161551e-06,
"loss": 2.6315665245056152,
"step": 306
},
{
"epoch": 0.14881240911294233,
"grad_norm": 10.72116880943771,
"learning_rate": 4.94345718901454e-06,
"loss": 2.5715317726135254,
"step": 307
},
{
"epoch": 0.14929714008725156,
"grad_norm": 20.09874957231033,
"learning_rate": 4.959612277867529e-06,
"loss": 2.9154765605926514,
"step": 308
},
{
"epoch": 0.14978187106156082,
"grad_norm": 12.600831391834655,
"learning_rate": 4.975767366720518e-06,
"loss": 3.3018312454223633,
"step": 309
},
{
"epoch": 0.15026660203587008,
"grad_norm": 15.239445220576838,
"learning_rate": 4.991922455573506e-06,
"loss": 2.947727918624878,
"step": 310
},
{
"epoch": 0.15075133301017934,
"grad_norm": 8.092104277391888,
"learning_rate": 5.008077544426495e-06,
"loss": 2.3657913208007812,
"step": 311
},
{
"epoch": 0.1512360639844886,
"grad_norm": 19.246275124578414,
"learning_rate": 5.024232633279483e-06,
"loss": 2.857553482055664,
"step": 312
},
{
"epoch": 0.15172079495879787,
"grad_norm": 9.23675900670474,
"learning_rate": 5.040387722132473e-06,
"loss": 2.507875919342041,
"step": 313
},
{
"epoch": 0.15220552593310713,
"grad_norm": 23.25727707151693,
"learning_rate": 5.056542810985461e-06,
"loss": 3.0007028579711914,
"step": 314
},
{
"epoch": 0.1526902569074164,
"grad_norm": 28.687733969645322,
"learning_rate": 5.072697899838449e-06,
"loss": 2.544477701187134,
"step": 315
},
{
"epoch": 0.15317498788172565,
"grad_norm": 10.761958219455465,
"learning_rate": 5.088852988691439e-06,
"loss": 2.8871922492980957,
"step": 316
},
{
"epoch": 0.1536597188560349,
"grad_norm": 13.00471516373923,
"learning_rate": 5.105008077544427e-06,
"loss": 2.7603092193603516,
"step": 317
},
{
"epoch": 0.15414444983034417,
"grad_norm": 26.933015638060642,
"learning_rate": 5.121163166397416e-06,
"loss": 4.127505302429199,
"step": 318
},
{
"epoch": 0.1546291808046534,
"grad_norm": 8.646061166694,
"learning_rate": 5.137318255250404e-06,
"loss": 3.673733711242676,
"step": 319
},
{
"epoch": 0.15511391177896267,
"grad_norm": 9.585694234107105,
"learning_rate": 5.153473344103394e-06,
"loss": 2.5553648471832275,
"step": 320
},
{
"epoch": 0.15559864275327193,
"grad_norm": 14.781697622561742,
"learning_rate": 5.169628432956382e-06,
"loss": 2.9539196491241455,
"step": 321
},
{
"epoch": 0.1560833737275812,
"grad_norm": 18.265412954422228,
"learning_rate": 5.18578352180937e-06,
"loss": 2.9008634090423584,
"step": 322
},
{
"epoch": 0.15656810470189045,
"grad_norm": 15.814083138134652,
"learning_rate": 5.20193861066236e-06,
"loss": 3.339747667312622,
"step": 323
},
{
"epoch": 0.1570528356761997,
"grad_norm": 13.105408498022273,
"learning_rate": 5.218093699515348e-06,
"loss": 2.839287281036377,
"step": 324
},
{
"epoch": 0.15753756665050897,
"grad_norm": 15.201462889122936,
"learning_rate": 5.234248788368337e-06,
"loss": 2.967663288116455,
"step": 325
},
{
"epoch": 0.15802229762481823,
"grad_norm": 12.565318745128828,
"learning_rate": 5.250403877221325e-06,
"loss": 3.0170860290527344,
"step": 326
},
{
"epoch": 0.1585070285991275,
"grad_norm": 24.1027508198693,
"learning_rate": 5.2665589660743146e-06,
"loss": 2.313015937805176,
"step": 327
},
{
"epoch": 0.15899175957343675,
"grad_norm": 11.705229833036567,
"learning_rate": 5.282714054927303e-06,
"loss": 2.4194862842559814,
"step": 328
},
{
"epoch": 0.159476490547746,
"grad_norm": 8.636666143380028,
"learning_rate": 5.298869143780291e-06,
"loss": 2.615384101867676,
"step": 329
},
{
"epoch": 0.15996122152205525,
"grad_norm": 15.925823489807913,
"learning_rate": 5.3150242326332805e-06,
"loss": 3.7487993240356445,
"step": 330
},
{
"epoch": 0.1604459524963645,
"grad_norm": 12.070673399056988,
"learning_rate": 5.331179321486269e-06,
"loss": 3.0720906257629395,
"step": 331
},
{
"epoch": 0.16093068347067377,
"grad_norm": 8.758717603730034,
"learning_rate": 5.3473344103392576e-06,
"loss": 2.770258903503418,
"step": 332
},
{
"epoch": 0.16141541444498303,
"grad_norm": 9.928710248167919,
"learning_rate": 5.363489499192246e-06,
"loss": 2.7429370880126953,
"step": 333
},
{
"epoch": 0.1619001454192923,
"grad_norm": 12.528933031297719,
"learning_rate": 5.379644588045234e-06,
"loss": 2.2400596141815186,
"step": 334
},
{
"epoch": 0.16238487639360155,
"grad_norm": 14.371049785110174,
"learning_rate": 5.3957996768982236e-06,
"loss": 3.1883554458618164,
"step": 335
},
{
"epoch": 0.1628696073679108,
"grad_norm": 13.571017255829053,
"learning_rate": 5.411954765751212e-06,
"loss": 2.9650235176086426,
"step": 336
},
{
"epoch": 0.16335433834222007,
"grad_norm": 5.921374621115095,
"learning_rate": 5.4281098546042014e-06,
"loss": 2.6686930656433105,
"step": 337
},
{
"epoch": 0.16383906931652933,
"grad_norm": 23.375273974440535,
"learning_rate": 5.4442649434571895e-06,
"loss": 2.577899694442749,
"step": 338
},
{
"epoch": 0.1643238002908386,
"grad_norm": 11.18923587738833,
"learning_rate": 5.4604200323101785e-06,
"loss": 2.7193431854248047,
"step": 339
},
{
"epoch": 0.16480853126514786,
"grad_norm": 9.419510541384213,
"learning_rate": 5.4765751211631666e-06,
"loss": 1.866974115371704,
"step": 340
},
{
"epoch": 0.1652932622394571,
"grad_norm": 9.684131926257768,
"learning_rate": 5.492730210016155e-06,
"loss": 2.3770766258239746,
"step": 341
},
{
"epoch": 0.16577799321376635,
"grad_norm": 9.28536227529456,
"learning_rate": 5.5088852988691445e-06,
"loss": 2.6573359966278076,
"step": 342
},
{
"epoch": 0.1662627241880756,
"grad_norm": 15.811889386344351,
"learning_rate": 5.5250403877221325e-06,
"loss": 2.947338819503784,
"step": 343
},
{
"epoch": 0.16674745516238487,
"grad_norm": 15.105399831492443,
"learning_rate": 5.5411954765751215e-06,
"loss": 3.1534266471862793,
"step": 344
},
{
"epoch": 0.16723218613669413,
"grad_norm": 11.47989213568277,
"learning_rate": 5.5573505654281104e-06,
"loss": 2.8058269023895264,
"step": 345
},
{
"epoch": 0.1677169171110034,
"grad_norm": 7.027013367500873,
"learning_rate": 5.573505654281099e-06,
"loss": 2.3735389709472656,
"step": 346
},
{
"epoch": 0.16820164808531265,
"grad_norm": 9.805877658922748,
"learning_rate": 5.5896607431340875e-06,
"loss": 2.6240577697753906,
"step": 347
},
{
"epoch": 0.16868637905962192,
"grad_norm": 8.727813120666418,
"learning_rate": 5.6058158319870756e-06,
"loss": 3.1422083377838135,
"step": 348
},
{
"epoch": 0.16917111003393118,
"grad_norm": 11.225221524646035,
"learning_rate": 5.621970920840065e-06,
"loss": 2.439424514770508,
"step": 349
},
{
"epoch": 0.16965584100824044,
"grad_norm": 10.723477959666791,
"learning_rate": 5.6381260096930534e-06,
"loss": 3.1436843872070312,
"step": 350
},
{
"epoch": 0.1701405719825497,
"grad_norm": 11.181648873663073,
"learning_rate": 5.654281098546042e-06,
"loss": 3.062307834625244,
"step": 351
},
{
"epoch": 0.17062530295685893,
"grad_norm": 9.54987459982745,
"learning_rate": 5.670436187399031e-06,
"loss": 3.463653087615967,
"step": 352
},
{
"epoch": 0.1711100339311682,
"grad_norm": 12.936001981409081,
"learning_rate": 5.68659127625202e-06,
"loss": 2.7325143814086914,
"step": 353
},
{
"epoch": 0.17159476490547745,
"grad_norm": 10.710861225443912,
"learning_rate": 5.702746365105008e-06,
"loss": 2.8428101539611816,
"step": 354
},
{
"epoch": 0.17207949587978671,
"grad_norm": 11.855128590294456,
"learning_rate": 5.7189014539579965e-06,
"loss": 3.2609505653381348,
"step": 355
},
{
"epoch": 0.17256422685409598,
"grad_norm": 12.797634241902424,
"learning_rate": 5.735056542810986e-06,
"loss": 2.9274725914001465,
"step": 356
},
{
"epoch": 0.17304895782840524,
"grad_norm": 18.233311116851496,
"learning_rate": 5.751211631663974e-06,
"loss": 4.012993335723877,
"step": 357
},
{
"epoch": 0.1735336888027145,
"grad_norm": 12.085728723955027,
"learning_rate": 5.767366720516963e-06,
"loss": 3.1602323055267334,
"step": 358
},
{
"epoch": 0.17401841977702376,
"grad_norm": 12.35690432735027,
"learning_rate": 5.783521809369952e-06,
"loss": 3.1614990234375,
"step": 359
},
{
"epoch": 0.17450315075133302,
"grad_norm": 11.331158910987986,
"learning_rate": 5.799676898222941e-06,
"loss": 2.5720067024230957,
"step": 360
},
{
"epoch": 0.17498788172564228,
"grad_norm": 7.192626991887324,
"learning_rate": 5.815831987075929e-06,
"loss": 2.811629056930542,
"step": 361
},
{
"epoch": 0.1754726126999515,
"grad_norm": 11.046307886230233,
"learning_rate": 5.831987075928917e-06,
"loss": 2.7356531620025635,
"step": 362
},
{
"epoch": 0.17595734367426077,
"grad_norm": 36.0140650941125,
"learning_rate": 5.848142164781907e-06,
"loss": 3.1139495372772217,
"step": 363
},
{
"epoch": 0.17644207464857004,
"grad_norm": 7.006430866824001,
"learning_rate": 5.864297253634895e-06,
"loss": 3.098376989364624,
"step": 364
},
{
"epoch": 0.1769268056228793,
"grad_norm": 25.016471565475467,
"learning_rate": 5.880452342487884e-06,
"loss": 3.0602917671203613,
"step": 365
},
{
"epoch": 0.17741153659718856,
"grad_norm": 12.29139388364039,
"learning_rate": 5.896607431340873e-06,
"loss": 2.716895341873169,
"step": 366
},
{
"epoch": 0.17789626757149782,
"grad_norm": 7.362681694860965,
"learning_rate": 5.912762520193862e-06,
"loss": 2.7560248374938965,
"step": 367
},
{
"epoch": 0.17838099854580708,
"grad_norm": 23.508535866303294,
"learning_rate": 5.92891760904685e-06,
"loss": 3.2643227577209473,
"step": 368
},
{
"epoch": 0.17886572952011634,
"grad_norm": 25.130828310529722,
"learning_rate": 5.945072697899838e-06,
"loss": 4.924375534057617,
"step": 369
},
{
"epoch": 0.1793504604944256,
"grad_norm": 11.151175644134895,
"learning_rate": 5.961227786752828e-06,
"loss": 2.9297962188720703,
"step": 370
},
{
"epoch": 0.17983519146873486,
"grad_norm": 20.28346295665146,
"learning_rate": 5.977382875605816e-06,
"loss": 2.9168593883514404,
"step": 371
},
{
"epoch": 0.18031992244304412,
"grad_norm": 10.756133184710917,
"learning_rate": 5.993537964458805e-06,
"loss": 3.8211796283721924,
"step": 372
},
{
"epoch": 0.18080465341735336,
"grad_norm": 20.385159139821447,
"learning_rate": 6.009693053311794e-06,
"loss": 2.6352741718292236,
"step": 373
},
{
"epoch": 0.18128938439166262,
"grad_norm": 13.143545844475765,
"learning_rate": 6.025848142164782e-06,
"loss": 3.156874656677246,
"step": 374
},
{
"epoch": 0.18177411536597188,
"grad_norm": 13.627778401560663,
"learning_rate": 6.042003231017771e-06,
"loss": 2.4009530544281006,
"step": 375
},
{
"epoch": 0.18225884634028114,
"grad_norm": 14.416144934705404,
"learning_rate": 6.058158319870759e-06,
"loss": 3.270224094390869,
"step": 376
},
{
"epoch": 0.1827435773145904,
"grad_norm": 12.10014785145877,
"learning_rate": 6.074313408723749e-06,
"loss": 2.646615743637085,
"step": 377
},
{
"epoch": 0.18322830828889966,
"grad_norm": 9.757027058684733,
"learning_rate": 6.090468497576737e-06,
"loss": 3.0286476612091064,
"step": 378
},
{
"epoch": 0.18371303926320892,
"grad_norm": 25.025994473926474,
"learning_rate": 6.106623586429726e-06,
"loss": 2.6842775344848633,
"step": 379
},
{
"epoch": 0.18419777023751818,
"grad_norm": 10.205088472282553,
"learning_rate": 6.122778675282715e-06,
"loss": 2.9597606658935547,
"step": 380
},
{
"epoch": 0.18468250121182744,
"grad_norm": 8.415567592646589,
"learning_rate": 6.138933764135703e-06,
"loss": 3.0696797370910645,
"step": 381
},
{
"epoch": 0.1851672321861367,
"grad_norm": 19.518992351898206,
"learning_rate": 6.155088852988692e-06,
"loss": 3.0582327842712402,
"step": 382
},
{
"epoch": 0.18565196316044597,
"grad_norm": 13.919550558131716,
"learning_rate": 6.17124394184168e-06,
"loss": 2.7542381286621094,
"step": 383
},
{
"epoch": 0.1861366941347552,
"grad_norm": 29.528086249518253,
"learning_rate": 6.18739903069467e-06,
"loss": 2.8645291328430176,
"step": 384
},
{
"epoch": 0.18662142510906446,
"grad_norm": 14.373828858798262,
"learning_rate": 6.203554119547658e-06,
"loss": 2.923300266265869,
"step": 385
},
{
"epoch": 0.18710615608337372,
"grad_norm": 11.076810357920905,
"learning_rate": 6.219709208400647e-06,
"loss": 2.809591770172119,
"step": 386
},
{
"epoch": 0.18759088705768298,
"grad_norm": 27.567551127902274,
"learning_rate": 6.235864297253636e-06,
"loss": 2.3717756271362305,
"step": 387
},
{
"epoch": 0.18807561803199224,
"grad_norm": 16.225330805017023,
"learning_rate": 6.252019386106624e-06,
"loss": 3.123983860015869,
"step": 388
},
{
"epoch": 0.1885603490063015,
"grad_norm": 7.638317002755751,
"learning_rate": 6.268174474959613e-06,
"loss": 2.689042568206787,
"step": 389
},
{
"epoch": 0.18904507998061076,
"grad_norm": 10.672478254052452,
"learning_rate": 6.284329563812601e-06,
"loss": 2.1655490398406982,
"step": 390
},
{
"epoch": 0.18952981095492002,
"grad_norm": 10.507470585724082,
"learning_rate": 6.300484652665591e-06,
"loss": 3.5005626678466797,
"step": 391
},
{
"epoch": 0.19001454192922929,
"grad_norm": 12.153952722626808,
"learning_rate": 6.316639741518579e-06,
"loss": 2.9582266807556152,
"step": 392
},
{
"epoch": 0.19049927290353855,
"grad_norm": 10.744641108273946,
"learning_rate": 6.332794830371568e-06,
"loss": 2.625063419342041,
"step": 393
},
{
"epoch": 0.1909840038778478,
"grad_norm": 9.771497043873916,
"learning_rate": 6.348949919224556e-06,
"loss": 2.60994029045105,
"step": 394
},
{
"epoch": 0.19146873485215704,
"grad_norm": 28.72459570401896,
"learning_rate": 6.365105008077545e-06,
"loss": 2.6505672931671143,
"step": 395
},
{
"epoch": 0.1919534658264663,
"grad_norm": 14.24306062893526,
"learning_rate": 6.381260096930534e-06,
"loss": 3.3491158485412598,
"step": 396
},
{
"epoch": 0.19243819680077556,
"grad_norm": 19.411145987646172,
"learning_rate": 6.397415185783522e-06,
"loss": 2.372833490371704,
"step": 397
},
{
"epoch": 0.19292292777508482,
"grad_norm": 10.311185098294606,
"learning_rate": 6.413570274636512e-06,
"loss": 2.0699234008789062,
"step": 398
},
{
"epoch": 0.19340765874939408,
"grad_norm": 17.560990411537688,
"learning_rate": 6.4297253634895e-06,
"loss": 2.5136590003967285,
"step": 399
},
{
"epoch": 0.19389238972370335,
"grad_norm": 11.810214825994246,
"learning_rate": 6.445880452342489e-06,
"loss": 2.9091782569885254,
"step": 400
},
{
"epoch": 0.1943771206980126,
"grad_norm": 8.923602839879397,
"learning_rate": 6.462035541195477e-06,
"loss": 2.762298583984375,
"step": 401
},
{
"epoch": 0.19486185167232187,
"grad_norm": 16.27347727342578,
"learning_rate": 6.478190630048466e-06,
"loss": 2.6796109676361084,
"step": 402
},
{
"epoch": 0.19534658264663113,
"grad_norm": 6.903590474985229,
"learning_rate": 6.494345718901455e-06,
"loss": 2.4200985431671143,
"step": 403
},
{
"epoch": 0.1958313136209404,
"grad_norm": 7.3825985074561995,
"learning_rate": 6.510500807754443e-06,
"loss": 2.416273355484009,
"step": 404
},
{
"epoch": 0.19631604459524965,
"grad_norm": 9.605954686588268,
"learning_rate": 6.5266558966074325e-06,
"loss": 2.859245538711548,
"step": 405
},
{
"epoch": 0.19680077556955888,
"grad_norm": 14.01740792708358,
"learning_rate": 6.542810985460421e-06,
"loss": 3.3257462978363037,
"step": 406
},
{
"epoch": 0.19728550654386814,
"grad_norm": 10.353664929950614,
"learning_rate": 6.5589660743134096e-06,
"loss": 2.7842440605163574,
"step": 407
},
{
"epoch": 0.1977702375181774,
"grad_norm": 14.245385872690733,
"learning_rate": 6.575121163166398e-06,
"loss": 2.663003444671631,
"step": 408
},
{
"epoch": 0.19825496849248667,
"grad_norm": 19.230254266929084,
"learning_rate": 6.591276252019387e-06,
"loss": 3.493741750717163,
"step": 409
},
{
"epoch": 0.19873969946679593,
"grad_norm": 107.64872977827413,
"learning_rate": 6.6074313408723756e-06,
"loss": 2.5543909072875977,
"step": 410
},
{
"epoch": 0.1992244304411052,
"grad_norm": 18.94540974400696,
"learning_rate": 6.623586429725364e-06,
"loss": 2.564774990081787,
"step": 411
},
{
"epoch": 0.19970916141541445,
"grad_norm": 12.796312582433192,
"learning_rate": 6.6397415185783534e-06,
"loss": 3.2753469944000244,
"step": 412
},
{
"epoch": 0.2001938923897237,
"grad_norm": 9.649631914216416,
"learning_rate": 6.6558966074313415e-06,
"loss": 2.7669036388397217,
"step": 413
},
{
"epoch": 0.20067862336403297,
"grad_norm": 11.120499003166183,
"learning_rate": 6.67205169628433e-06,
"loss": 2.936286449432373,
"step": 414
},
{
"epoch": 0.20116335433834223,
"grad_norm": 18.226659755542734,
"learning_rate": 6.6882067851373186e-06,
"loss": 3.3874592781066895,
"step": 415
},
{
"epoch": 0.20164808531265146,
"grad_norm": 8.52565532528602,
"learning_rate": 6.7043618739903075e-06,
"loss": 3.4146323204040527,
"step": 416
},
{
"epoch": 0.20213281628696073,
"grad_norm": 7.233937151041931,
"learning_rate": 6.7205169628432965e-06,
"loss": 2.782090663909912,
"step": 417
},
{
"epoch": 0.20261754726127,
"grad_norm": 11.941393982735791,
"learning_rate": 6.7366720516962845e-06,
"loss": 2.479891777038574,
"step": 418
},
{
"epoch": 0.20310227823557925,
"grad_norm": 8.744145017394075,
"learning_rate": 6.752827140549274e-06,
"loss": 2.941582202911377,
"step": 419
},
{
"epoch": 0.2035870092098885,
"grad_norm": 11.32850625166376,
"learning_rate": 6.7689822294022624e-06,
"loss": 3.2689096927642822,
"step": 420
},
{
"epoch": 0.20407174018419777,
"grad_norm": 14.41203466025537,
"learning_rate": 6.7851373182552505e-06,
"loss": 3.0448875427246094,
"step": 421
},
{
"epoch": 0.20455647115850703,
"grad_norm": 9.263791170418907,
"learning_rate": 6.8012924071082395e-06,
"loss": 2.982584238052368,
"step": 422
},
{
"epoch": 0.2050412021328163,
"grad_norm": 16.10591065962252,
"learning_rate": 6.817447495961228e-06,
"loss": 3.0585832595825195,
"step": 423
},
{
"epoch": 0.20552593310712555,
"grad_norm": 11.142473648735365,
"learning_rate": 6.833602584814217e-06,
"loss": 3.0911970138549805,
"step": 424
},
{
"epoch": 0.2060106640814348,
"grad_norm": 13.562552456165479,
"learning_rate": 6.8497576736672054e-06,
"loss": 2.527979850769043,
"step": 425
},
{
"epoch": 0.20649539505574407,
"grad_norm": 13.142984859975437,
"learning_rate": 6.865912762520195e-06,
"loss": 2.9030375480651855,
"step": 426
},
{
"epoch": 0.2069801260300533,
"grad_norm": 13.397989371373848,
"learning_rate": 6.882067851373183e-06,
"loss": 2.9681477546691895,
"step": 427
},
{
"epoch": 0.20746485700436257,
"grad_norm": 6.389132896925534,
"learning_rate": 6.8982229402261714e-06,
"loss": 2.737961530685425,
"step": 428
},
{
"epoch": 0.20794958797867183,
"grad_norm": 10.507514667146038,
"learning_rate": 6.91437802907916e-06,
"loss": 3.209543228149414,
"step": 429
},
{
"epoch": 0.2084343189529811,
"grad_norm": 7.22068132699742,
"learning_rate": 6.930533117932149e-06,
"loss": 3.1824169158935547,
"step": 430
},
{
"epoch": 0.20891904992729035,
"grad_norm": 22.992117955289938,
"learning_rate": 6.946688206785138e-06,
"loss": 2.4474141597747803,
"step": 431
},
{
"epoch": 0.2094037809015996,
"grad_norm": 13.263648121695637,
"learning_rate": 6.962843295638126e-06,
"loss": 2.698476791381836,
"step": 432
},
{
"epoch": 0.20988851187590887,
"grad_norm": 16.548888896917926,
"learning_rate": 6.978998384491116e-06,
"loss": 2.78529953956604,
"step": 433
},
{
"epoch": 0.21037324285021813,
"grad_norm": 29.532069333476215,
"learning_rate": 6.995153473344104e-06,
"loss": 2.9042816162109375,
"step": 434
},
{
"epoch": 0.2108579738245274,
"grad_norm": 9.001034946348732,
"learning_rate": 7.011308562197092e-06,
"loss": 2.8605165481567383,
"step": 435
},
{
"epoch": 0.21134270479883666,
"grad_norm": 16.63498627832683,
"learning_rate": 7.027463651050081e-06,
"loss": 2.928544521331787,
"step": 436
},
{
"epoch": 0.21182743577314592,
"grad_norm": 10.686866899376074,
"learning_rate": 7.043618739903069e-06,
"loss": 3.0176005363464355,
"step": 437
},
{
"epoch": 0.21231216674745515,
"grad_norm": 6.903128859006627,
"learning_rate": 7.059773828756059e-06,
"loss": 2.7629966735839844,
"step": 438
},
{
"epoch": 0.2127968977217644,
"grad_norm": 7.728071799065463,
"learning_rate": 7.075928917609047e-06,
"loss": 3.0124268531799316,
"step": 439
},
{
"epoch": 0.21328162869607367,
"grad_norm": 8.929731632234937,
"learning_rate": 7.092084006462037e-06,
"loss": 2.4502124786376953,
"step": 440
},
{
"epoch": 0.21376635967038293,
"grad_norm": 9.69956814902254,
"learning_rate": 7.108239095315025e-06,
"loss": 2.416954278945923,
"step": 441
},
{
"epoch": 0.2142510906446922,
"grad_norm": 34.32359969204926,
"learning_rate": 7.124394184168013e-06,
"loss": 2.9433512687683105,
"step": 442
},
{
"epoch": 0.21473582161900145,
"grad_norm": 20.375823672820847,
"learning_rate": 7.140549273021002e-06,
"loss": 3.1315908432006836,
"step": 443
},
{
"epoch": 0.21522055259331072,
"grad_norm": 7.4675865689958805,
"learning_rate": 7.15670436187399e-06,
"loss": 2.599388837814331,
"step": 444
},
{
"epoch": 0.21570528356761998,
"grad_norm": 6.565118290154163,
"learning_rate": 7.17285945072698e-06,
"loss": 2.9343106746673584,
"step": 445
},
{
"epoch": 0.21619001454192924,
"grad_norm": 11.897405831792701,
"learning_rate": 7.189014539579968e-06,
"loss": 2.834850311279297,
"step": 446
},
{
"epoch": 0.2166747455162385,
"grad_norm": 13.818525067221113,
"learning_rate": 7.205169628432956e-06,
"loss": 2.2822704315185547,
"step": 447
},
{
"epoch": 0.21715947649054776,
"grad_norm": 11.388459510909305,
"learning_rate": 7.221324717285946e-06,
"loss": 3.1177737712860107,
"step": 448
},
{
"epoch": 0.217644207464857,
"grad_norm": 14.383902107728657,
"learning_rate": 7.237479806138934e-06,
"loss": 2.971219539642334,
"step": 449
},
{
"epoch": 0.21812893843916625,
"grad_norm": 15.258553603065664,
"learning_rate": 7.253634894991923e-06,
"loss": 2.5755774974823,
"step": 450
},
{
"epoch": 0.21861366941347551,
"grad_norm": 6.1797634454805666,
"learning_rate": 7.269789983844911e-06,
"loss": 2.6765313148498535,
"step": 451
},
{
"epoch": 0.21909840038778478,
"grad_norm": 11.664779572383875,
"learning_rate": 7.285945072697901e-06,
"loss": 3.108405351638794,
"step": 452
},
{
"epoch": 0.21958313136209404,
"grad_norm": 8.162143738729087,
"learning_rate": 7.302100161550889e-06,
"loss": 2.5972719192504883,
"step": 453
},
{
"epoch": 0.2200678623364033,
"grad_norm": 9.173278251917703,
"learning_rate": 7.318255250403877e-06,
"loss": 3.161656379699707,
"step": 454
},
{
"epoch": 0.22055259331071256,
"grad_norm": 14.280332602534763,
"learning_rate": 7.334410339256867e-06,
"loss": 2.9388556480407715,
"step": 455
},
{
"epoch": 0.22103732428502182,
"grad_norm": 10.100019611774911,
"learning_rate": 7.350565428109855e-06,
"loss": 2.820383071899414,
"step": 456
},
{
"epoch": 0.22152205525933108,
"grad_norm": 13.707971704798066,
"learning_rate": 7.366720516962844e-06,
"loss": 3.250032663345337,
"step": 457
},
{
"epoch": 0.22200678623364034,
"grad_norm": 27.849796465592828,
"learning_rate": 7.382875605815832e-06,
"loss": 2.5840346813201904,
"step": 458
},
{
"epoch": 0.2224915172079496,
"grad_norm": 8.798661937735227,
"learning_rate": 7.399030694668822e-06,
"loss": 2.866715431213379,
"step": 459
},
{
"epoch": 0.22297624818225883,
"grad_norm": 9.21588785158536,
"learning_rate": 7.41518578352181e-06,
"loss": 3.1385390758514404,
"step": 460
},
{
"epoch": 0.2234609791565681,
"grad_norm": 9.769623345284746,
"learning_rate": 7.431340872374798e-06,
"loss": 2.663710594177246,
"step": 461
},
{
"epoch": 0.22394571013087736,
"grad_norm": 10.222254371776984,
"learning_rate": 7.447495961227788e-06,
"loss": 3.1445398330688477,
"step": 462
},
{
"epoch": 0.22443044110518662,
"grad_norm": 10.32178248170141,
"learning_rate": 7.463651050080776e-06,
"loss": 3.3270263671875,
"step": 463
},
{
"epoch": 0.22491517207949588,
"grad_norm": 17.01980588824466,
"learning_rate": 7.479806138933765e-06,
"loss": 2.591313362121582,
"step": 464
},
{
"epoch": 0.22539990305380514,
"grad_norm": 8.538597601970885,
"learning_rate": 7.495961227786753e-06,
"loss": 3.18306303024292,
"step": 465
},
{
"epoch": 0.2258846340281144,
"grad_norm": 11.288718963795688,
"learning_rate": 7.512116316639743e-06,
"loss": 3.138984441757202,
"step": 466
},
{
"epoch": 0.22636936500242366,
"grad_norm": 40.34961357187281,
"learning_rate": 7.528271405492731e-06,
"loss": 4.418365955352783,
"step": 467
},
{
"epoch": 0.22685409597673292,
"grad_norm": 10.527354801552914,
"learning_rate": 7.544426494345719e-06,
"loss": 3.1681923866271973,
"step": 468
},
{
"epoch": 0.22733882695104218,
"grad_norm": 10.018793119247785,
"learning_rate": 7.560581583198709e-06,
"loss": 3.127969741821289,
"step": 469
},
{
"epoch": 0.22782355792535142,
"grad_norm": 26.301267986609908,
"learning_rate": 7.576736672051697e-06,
"loss": 2.372586488723755,
"step": 470
},
{
"epoch": 0.22830828889966068,
"grad_norm": 14.467620577387684,
"learning_rate": 7.592891760904686e-06,
"loss": 2.8599698543548584,
"step": 471
},
{
"epoch": 0.22879301987396994,
"grad_norm": 14.292350529460863,
"learning_rate": 7.609046849757674e-06,
"loss": 2.935361862182617,
"step": 472
},
{
"epoch": 0.2292777508482792,
"grad_norm": 10.850563565668747,
"learning_rate": 7.625201938610664e-06,
"loss": 2.8377327919006348,
"step": 473
},
{
"epoch": 0.22976248182258846,
"grad_norm": 18.159701167353543,
"learning_rate": 7.641357027463651e-06,
"loss": 2.6604673862457275,
"step": 474
},
{
"epoch": 0.23024721279689772,
"grad_norm": 9.13142724298689,
"learning_rate": 7.65751211631664e-06,
"loss": 3.0835094451904297,
"step": 475
},
{
"epoch": 0.23073194377120698,
"grad_norm": 21.56797496668057,
"learning_rate": 7.673667205169629e-06,
"loss": 3.0069451332092285,
"step": 476
},
{
"epoch": 0.23121667474551624,
"grad_norm": 13.660492246023274,
"learning_rate": 7.689822294022618e-06,
"loss": 3.2506377696990967,
"step": 477
},
{
"epoch": 0.2317014057198255,
"grad_norm": 21.464479903763316,
"learning_rate": 7.705977382875607e-06,
"loss": 2.3665618896484375,
"step": 478
},
{
"epoch": 0.23218613669413476,
"grad_norm": 16.007926841321982,
"learning_rate": 7.722132471728596e-06,
"loss": 2.4313201904296875,
"step": 479
},
{
"epoch": 0.23267086766844403,
"grad_norm": 12.544078961506584,
"learning_rate": 7.738287560581585e-06,
"loss": 2.3750159740448,
"step": 480
},
{
"epoch": 0.23315559864275326,
"grad_norm": 23.357939056567304,
"learning_rate": 7.754442649434572e-06,
"loss": 3.0478901863098145,
"step": 481
},
{
"epoch": 0.23364032961706252,
"grad_norm": 15.36493578759406,
"learning_rate": 7.77059773828756e-06,
"loss": 2.7273573875427246,
"step": 482
},
{
"epoch": 0.23412506059137178,
"grad_norm": 8.512763824507743,
"learning_rate": 7.78675282714055e-06,
"loss": 2.8059723377227783,
"step": 483
},
{
"epoch": 0.23460979156568104,
"grad_norm": 13.473349863657491,
"learning_rate": 7.802907915993539e-06,
"loss": 2.994621753692627,
"step": 484
},
{
"epoch": 0.2350945225399903,
"grad_norm": 20.17461842254869,
"learning_rate": 7.819063004846528e-06,
"loss": 2.2371976375579834,
"step": 485
},
{
"epoch": 0.23557925351429956,
"grad_norm": 19.487392665157767,
"learning_rate": 7.835218093699516e-06,
"loss": 2.463167667388916,
"step": 486
},
{
"epoch": 0.23606398448860882,
"grad_norm": 14.216074578414373,
"learning_rate": 7.851373182552504e-06,
"loss": 2.905806303024292,
"step": 487
},
{
"epoch": 0.23654871546291809,
"grad_norm": 16.500558182171478,
"learning_rate": 7.867528271405493e-06,
"loss": 3.0947861671447754,
"step": 488
},
{
"epoch": 0.23703344643722735,
"grad_norm": 18.479793384159034,
"learning_rate": 7.883683360258482e-06,
"loss": 3.556015729904175,
"step": 489
},
{
"epoch": 0.2375181774115366,
"grad_norm": 29.033683193282496,
"learning_rate": 7.89983844911147e-06,
"loss": 3.5879275798797607,
"step": 490
},
{
"epoch": 0.23800290838584587,
"grad_norm": 8.51121530311687,
"learning_rate": 7.91599353796446e-06,
"loss": 2.919184684753418,
"step": 491
},
{
"epoch": 0.2384876393601551,
"grad_norm": 10.064242026446042,
"learning_rate": 7.932148626817448e-06,
"loss": 3.19797945022583,
"step": 492
},
{
"epoch": 0.23897237033446436,
"grad_norm": 21.909496728844122,
"learning_rate": 7.948303715670437e-06,
"loss": 2.604217529296875,
"step": 493
},
{
"epoch": 0.23945710130877362,
"grad_norm": 9.43993013287164,
"learning_rate": 7.964458804523425e-06,
"loss": 3.214405059814453,
"step": 494
},
{
"epoch": 0.23994183228308288,
"grad_norm": 16.498698180648727,
"learning_rate": 7.980613893376414e-06,
"loss": 2.9331345558166504,
"step": 495
},
{
"epoch": 0.24042656325739215,
"grad_norm": 25.811949944372095,
"learning_rate": 7.996768982229403e-06,
"loss": 2.99019718170166,
"step": 496
},
{
"epoch": 0.2409112942317014,
"grad_norm": 25.69236784181588,
"learning_rate": 8.012924071082391e-06,
"loss": 2.8378653526306152,
"step": 497
},
{
"epoch": 0.24139602520601067,
"grad_norm": 13.88874867252455,
"learning_rate": 8.02907915993538e-06,
"loss": 2.909583568572998,
"step": 498
},
{
"epoch": 0.24188075618031993,
"grad_norm": 8.218544644335283,
"learning_rate": 8.04523424878837e-06,
"loss": 2.837493419647217,
"step": 499
},
{
"epoch": 0.2423654871546292,
"grad_norm": 11.500070515125007,
"learning_rate": 8.061389337641358e-06,
"loss": 2.918443441390991,
"step": 500
},
{
"epoch": 0.24285021812893845,
"grad_norm": 10.114140295418991,
"learning_rate": 8.077544426494346e-06,
"loss": 2.552727222442627,
"step": 501
},
{
"epoch": 0.2433349491032477,
"grad_norm": 10.189227895363596,
"learning_rate": 8.093699515347334e-06,
"loss": 2.7898731231689453,
"step": 502
},
{
"epoch": 0.24381968007755694,
"grad_norm": 13.08460060657369,
"learning_rate": 8.109854604200323e-06,
"loss": 2.475520133972168,
"step": 503
},
{
"epoch": 0.2443044110518662,
"grad_norm": 8.606527036945353,
"learning_rate": 8.126009693053312e-06,
"loss": 2.5415778160095215,
"step": 504
},
{
"epoch": 0.24478914202617547,
"grad_norm": 13.802557177571098,
"learning_rate": 8.142164781906301e-06,
"loss": 2.7415058612823486,
"step": 505
},
{
"epoch": 0.24527387300048473,
"grad_norm": 21.968404085864645,
"learning_rate": 8.15831987075929e-06,
"loss": 4.311206817626953,
"step": 506
},
{
"epoch": 0.245758603974794,
"grad_norm": 12.002299848884345,
"learning_rate": 8.17447495961228e-06,
"loss": 2.8645572662353516,
"step": 507
},
{
"epoch": 0.24624333494910325,
"grad_norm": 13.320322509677347,
"learning_rate": 8.190630048465266e-06,
"loss": 2.624079942703247,
"step": 508
},
{
"epoch": 0.2467280659234125,
"grad_norm": 11.877122584600686,
"learning_rate": 8.206785137318255e-06,
"loss": 2.5716371536254883,
"step": 509
},
{
"epoch": 0.24721279689772177,
"grad_norm": 10.94965770976451,
"learning_rate": 8.222940226171244e-06,
"loss": 3.134033679962158,
"step": 510
},
{
"epoch": 0.24769752787203103,
"grad_norm": 8.113536847612663,
"learning_rate": 8.239095315024233e-06,
"loss": 2.8948495388031006,
"step": 511
},
{
"epoch": 0.2481822588463403,
"grad_norm": 14.665390061871278,
"learning_rate": 8.255250403877222e-06,
"loss": 2.687488079071045,
"step": 512
},
{
"epoch": 0.24866698982064955,
"grad_norm": 11.453292331185336,
"learning_rate": 8.271405492730211e-06,
"loss": 2.2624351978302,
"step": 513
},
{
"epoch": 0.2491517207949588,
"grad_norm": 7.3648761813287855,
"learning_rate": 8.2875605815832e-06,
"loss": 1.5254552364349365,
"step": 514
},
{
"epoch": 0.24963645176926805,
"grad_norm": 10.7984132294541,
"learning_rate": 8.303715670436187e-06,
"loss": 2.4871716499328613,
"step": 515
},
{
"epoch": 0.2501211827435773,
"grad_norm": 9.927042784510618,
"learning_rate": 8.319870759289176e-06,
"loss": 3.124423027038574,
"step": 516
},
{
"epoch": 0.2506059137178866,
"grad_norm": 8.768162888465296,
"learning_rate": 8.336025848142165e-06,
"loss": 2.9188406467437744,
"step": 517
},
{
"epoch": 0.25109064469219583,
"grad_norm": 10.2678535806811,
"learning_rate": 8.352180936995154e-06,
"loss": 2.9420769214630127,
"step": 518
},
{
"epoch": 0.25157537566650506,
"grad_norm": 15.742143020355204,
"learning_rate": 8.368336025848143e-06,
"loss": 2.940609931945801,
"step": 519
},
{
"epoch": 0.25206010664081435,
"grad_norm": 13.644446545459962,
"learning_rate": 8.384491114701132e-06,
"loss": 2.967130661010742,
"step": 520
},
{
"epoch": 0.2525448376151236,
"grad_norm": 23.333574522774782,
"learning_rate": 8.400646203554121e-06,
"loss": 2.902386426925659,
"step": 521
},
{
"epoch": 0.2530295685894329,
"grad_norm": 22.475695984896895,
"learning_rate": 8.416801292407108e-06,
"loss": 2.457684278488159,
"step": 522
},
{
"epoch": 0.2535142995637421,
"grad_norm": 7.4540420445035505,
"learning_rate": 8.432956381260097e-06,
"loss": 3.0126147270202637,
"step": 523
},
{
"epoch": 0.2539990305380514,
"grad_norm": 9.186015838276852,
"learning_rate": 8.449111470113086e-06,
"loss": 2.80767822265625,
"step": 524
},
{
"epoch": 0.25448376151236063,
"grad_norm": 20.28272383051196,
"learning_rate": 8.465266558966075e-06,
"loss": 2.927285671234131,
"step": 525
},
{
"epoch": 0.2549684924866699,
"grad_norm": 17.877128536648556,
"learning_rate": 8.481421647819064e-06,
"loss": 2.929622173309326,
"step": 526
},
{
"epoch": 0.25545322346097915,
"grad_norm": 14.31779511288913,
"learning_rate": 8.497576736672051e-06,
"loss": 3.1691198348999023,
"step": 527
},
{
"epoch": 0.25593795443528844,
"grad_norm": 28.09773837314376,
"learning_rate": 8.513731825525042e-06,
"loss": 2.747352123260498,
"step": 528
},
{
"epoch": 0.2564226854095977,
"grad_norm": 11.220931632890796,
"learning_rate": 8.529886914378029e-06,
"loss": 4.22380256652832,
"step": 529
},
{
"epoch": 0.2569074163839069,
"grad_norm": 11.06615788864276,
"learning_rate": 8.546042003231018e-06,
"loss": 2.618208885192871,
"step": 530
},
{
"epoch": 0.2573921473582162,
"grad_norm": 14.156273527219069,
"learning_rate": 8.562197092084007e-06,
"loss": 2.945373058319092,
"step": 531
},
{
"epoch": 0.2578768783325254,
"grad_norm": 36.83605304393031,
"learning_rate": 8.578352180936996e-06,
"loss": 2.3983089923858643,
"step": 532
},
{
"epoch": 0.2583616093068347,
"grad_norm": 13.966104242024727,
"learning_rate": 8.594507269789985e-06,
"loss": 3.3714725971221924,
"step": 533
},
{
"epoch": 0.25884634028114395,
"grad_norm": 17.49615612850907,
"learning_rate": 8.610662358642972e-06,
"loss": 2.7563626766204834,
"step": 534
},
{
"epoch": 0.25933107125545324,
"grad_norm": 11.493038684762723,
"learning_rate": 8.626817447495963e-06,
"loss": 3.18229341506958,
"step": 535
},
{
"epoch": 0.25981580222976247,
"grad_norm": 15.217891964481542,
"learning_rate": 8.64297253634895e-06,
"loss": 3.325489044189453,
"step": 536
},
{
"epoch": 0.26030053320407176,
"grad_norm": 9.494700480474444,
"learning_rate": 8.659127625201939e-06,
"loss": 2.947113037109375,
"step": 537
},
{
"epoch": 0.260785264178381,
"grad_norm": 8.740325162721403,
"learning_rate": 8.675282714054928e-06,
"loss": 2.4041972160339355,
"step": 538
},
{
"epoch": 0.2612699951526903,
"grad_norm": 12.597753248437986,
"learning_rate": 8.691437802907917e-06,
"loss": 2.7758007049560547,
"step": 539
},
{
"epoch": 0.2617547261269995,
"grad_norm": 8.504027063699091,
"learning_rate": 8.707592891760906e-06,
"loss": 2.794734477996826,
"step": 540
},
{
"epoch": 0.26223945710130875,
"grad_norm": 13.70429732311585,
"learning_rate": 8.723747980613893e-06,
"loss": 2.820345878601074,
"step": 541
},
{
"epoch": 0.26272418807561804,
"grad_norm": 10.348932705203293,
"learning_rate": 8.739903069466884e-06,
"loss": 2.637064218521118,
"step": 542
},
{
"epoch": 0.26320891904992727,
"grad_norm": 8.339400391696776,
"learning_rate": 8.756058158319871e-06,
"loss": 2.6331136226654053,
"step": 543
},
{
"epoch": 0.26369365002423656,
"grad_norm": 13.483149795873986,
"learning_rate": 8.77221324717286e-06,
"loss": 2.602522850036621,
"step": 544
},
{
"epoch": 0.2641783809985458,
"grad_norm": 9.388233704363646,
"learning_rate": 8.788368336025849e-06,
"loss": 3.2241599559783936,
"step": 545
},
{
"epoch": 0.2646631119728551,
"grad_norm": 15.193634846469337,
"learning_rate": 8.804523424878838e-06,
"loss": 2.9407432079315186,
"step": 546
},
{
"epoch": 0.2651478429471643,
"grad_norm": 7.591692851049034,
"learning_rate": 8.820678513731827e-06,
"loss": 2.6527979373931885,
"step": 547
},
{
"epoch": 0.2656325739214736,
"grad_norm": 13.510518303896568,
"learning_rate": 8.836833602584814e-06,
"loss": 2.6669933795928955,
"step": 548
},
{
"epoch": 0.26611730489578284,
"grad_norm": 10.979025841942482,
"learning_rate": 8.852988691437805e-06,
"loss": 2.986569881439209,
"step": 549
},
{
"epoch": 0.2666020358700921,
"grad_norm": 15.7598279052028,
"learning_rate": 8.869143780290792e-06,
"loss": 2.7994232177734375,
"step": 550
},
{
"epoch": 0.26708676684440136,
"grad_norm": 10.381628351347716,
"learning_rate": 8.88529886914378e-06,
"loss": 2.438081741333008,
"step": 551
},
{
"epoch": 0.2675714978187106,
"grad_norm": 9.889354484094724,
"learning_rate": 8.90145395799677e-06,
"loss": 2.6343979835510254,
"step": 552
},
{
"epoch": 0.2680562287930199,
"grad_norm": 14.430279263843016,
"learning_rate": 8.917609046849759e-06,
"loss": 2.754612922668457,
"step": 553
},
{
"epoch": 0.2685409597673291,
"grad_norm": 10.550354844647572,
"learning_rate": 8.933764135702748e-06,
"loss": 3.3319764137268066,
"step": 554
},
{
"epoch": 0.2690256907416384,
"grad_norm": 8.251758308745448,
"learning_rate": 8.949919224555735e-06,
"loss": 2.7776479721069336,
"step": 555
},
{
"epoch": 0.26951042171594763,
"grad_norm": 61.076018916596674,
"learning_rate": 8.966074313408725e-06,
"loss": 2.336108446121216,
"step": 556
},
{
"epoch": 0.2699951526902569,
"grad_norm": 29.287689424674404,
"learning_rate": 8.982229402261713e-06,
"loss": 3.1603269577026367,
"step": 557
},
{
"epoch": 0.27047988366456616,
"grad_norm": 9.666374439294671,
"learning_rate": 8.998384491114702e-06,
"loss": 2.5842807292938232,
"step": 558
},
{
"epoch": 0.27096461463887545,
"grad_norm": 12.589089782284782,
"learning_rate": 9.01453957996769e-06,
"loss": 2.846177577972412,
"step": 559
},
{
"epoch": 0.2714493456131847,
"grad_norm": 9.813775502844829,
"learning_rate": 9.03069466882068e-06,
"loss": 2.7395119667053223,
"step": 560
},
{
"epoch": 0.27193407658749397,
"grad_norm": 6.428064026780721,
"learning_rate": 9.046849757673668e-06,
"loss": 2.8863372802734375,
"step": 561
},
{
"epoch": 0.2724188075618032,
"grad_norm": 9.121432608750059,
"learning_rate": 9.063004846526656e-06,
"loss": 3.2077858448028564,
"step": 562
},
{
"epoch": 0.27290353853611243,
"grad_norm": 13.758289218500858,
"learning_rate": 9.079159935379646e-06,
"loss": 2.902139663696289,
"step": 563
},
{
"epoch": 0.2733882695104217,
"grad_norm": 13.956653179774674,
"learning_rate": 9.095315024232634e-06,
"loss": 2.42870831489563,
"step": 564
},
{
"epoch": 0.27387300048473096,
"grad_norm": 10.00292650037617,
"learning_rate": 9.111470113085623e-06,
"loss": 2.9871280193328857,
"step": 565
},
{
"epoch": 0.27435773145904024,
"grad_norm": 14.421911876947174,
"learning_rate": 9.127625201938612e-06,
"loss": 2.8641467094421387,
"step": 566
},
{
"epoch": 0.2748424624333495,
"grad_norm": 19.22672240472011,
"learning_rate": 9.143780290791599e-06,
"loss": 3.2118215560913086,
"step": 567
},
{
"epoch": 0.27532719340765877,
"grad_norm": 18.236788115298083,
"learning_rate": 9.15993537964459e-06,
"loss": 3.094792604446411,
"step": 568
},
{
"epoch": 0.275811924381968,
"grad_norm": 12.31636169571686,
"learning_rate": 9.176090468497577e-06,
"loss": 3.154940128326416,
"step": 569
},
{
"epoch": 0.2762966553562773,
"grad_norm": 20.263189709191774,
"learning_rate": 9.192245557350567e-06,
"loss": 2.6635661125183105,
"step": 570
},
{
"epoch": 0.2767813863305865,
"grad_norm": 11.62302007218648,
"learning_rate": 9.208400646203555e-06,
"loss": 2.8931846618652344,
"step": 571
},
{
"epoch": 0.2772661173048958,
"grad_norm": 7.73327577818798,
"learning_rate": 9.224555735056543e-06,
"loss": 2.7136635780334473,
"step": 572
},
{
"epoch": 0.27775084827920504,
"grad_norm": 17.134639625344548,
"learning_rate": 9.240710823909532e-06,
"loss": 3.186647653579712,
"step": 573
},
{
"epoch": 0.2782355792535143,
"grad_norm": 12.467482472200999,
"learning_rate": 9.25686591276252e-06,
"loss": 3.253159761428833,
"step": 574
},
{
"epoch": 0.27872031022782356,
"grad_norm": 7.06822940534329,
"learning_rate": 9.27302100161551e-06,
"loss": 2.6557722091674805,
"step": 575
},
{
"epoch": 0.2792050412021328,
"grad_norm": 7.542120308063972,
"learning_rate": 9.289176090468498e-06,
"loss": 2.703477382659912,
"step": 576
},
{
"epoch": 0.2796897721764421,
"grad_norm": 28.009645176722533,
"learning_rate": 9.305331179321486e-06,
"loss": 3.03425931930542,
"step": 577
},
{
"epoch": 0.2801745031507513,
"grad_norm": 17.110839020224976,
"learning_rate": 9.321486268174475e-06,
"loss": 3.137514591217041,
"step": 578
},
{
"epoch": 0.2806592341250606,
"grad_norm": 10.913501718645092,
"learning_rate": 9.337641357027464e-06,
"loss": 2.24821138381958,
"step": 579
},
{
"epoch": 0.28114396509936984,
"grad_norm": 13.496660040828896,
"learning_rate": 9.353796445880453e-06,
"loss": 2.759817123413086,
"step": 580
},
{
"epoch": 0.28162869607367913,
"grad_norm": 15.207393577867734,
"learning_rate": 9.36995153473344e-06,
"loss": 2.4526164531707764,
"step": 581
},
{
"epoch": 0.28211342704798836,
"grad_norm": 10.921073809353944,
"learning_rate": 9.386106623586431e-06,
"loss": 2.839501142501831,
"step": 582
},
{
"epoch": 0.28259815802229765,
"grad_norm": 14.631289577036307,
"learning_rate": 9.402261712439418e-06,
"loss": 2.544313907623291,
"step": 583
},
{
"epoch": 0.2830828889966069,
"grad_norm": 7.582950692268401,
"learning_rate": 9.418416801292407e-06,
"loss": 2.7478795051574707,
"step": 584
},
{
"epoch": 0.2835676199709161,
"grad_norm": 18.52892738698398,
"learning_rate": 9.434571890145396e-06,
"loss": 2.717921733856201,
"step": 585
},
{
"epoch": 0.2840523509452254,
"grad_norm": 13.098186483318289,
"learning_rate": 9.450726978998385e-06,
"loss": 3.7606163024902344,
"step": 586
},
{
"epoch": 0.28453708191953464,
"grad_norm": 8.159342352414145,
"learning_rate": 9.466882067851374e-06,
"loss": 2.818803310394287,
"step": 587
},
{
"epoch": 0.28502181289384393,
"grad_norm": 7.6660707426336865,
"learning_rate": 9.483037156704361e-06,
"loss": 2.703669786453247,
"step": 588
},
{
"epoch": 0.28550654386815316,
"grad_norm": 10.795290568863644,
"learning_rate": 9.499192245557352e-06,
"loss": 2.9259095191955566,
"step": 589
},
{
"epoch": 0.28599127484246245,
"grad_norm": 24.145420029717414,
"learning_rate": 9.51534733441034e-06,
"loss": 2.3472390174865723,
"step": 590
},
{
"epoch": 0.2864760058167717,
"grad_norm": 13.722238193006714,
"learning_rate": 9.531502423263328e-06,
"loss": 2.146268844604492,
"step": 591
},
{
"epoch": 0.286960736791081,
"grad_norm": 14.739100544274947,
"learning_rate": 9.547657512116317e-06,
"loss": 2.953192710876465,
"step": 592
},
{
"epoch": 0.2874454677653902,
"grad_norm": 22.987303665265085,
"learning_rate": 9.563812600969306e-06,
"loss": 2.140713691711426,
"step": 593
},
{
"epoch": 0.2879301987396995,
"grad_norm": 9.194633747375827,
"learning_rate": 9.579967689822295e-06,
"loss": 2.8747780323028564,
"step": 594
},
{
"epoch": 0.2884149297140087,
"grad_norm": 15.183725453880548,
"learning_rate": 9.596122778675282e-06,
"loss": 3.1375646591186523,
"step": 595
},
{
"epoch": 0.28889966068831796,
"grad_norm": 7.077213675037,
"learning_rate": 9.612277867528273e-06,
"loss": 2.696035861968994,
"step": 596
},
{
"epoch": 0.28938439166262725,
"grad_norm": 12.007502678952362,
"learning_rate": 9.62843295638126e-06,
"loss": 2.5059971809387207,
"step": 597
},
{
"epoch": 0.2898691226369365,
"grad_norm": 16.273451036124623,
"learning_rate": 9.64458804523425e-06,
"loss": 3.2842116355895996,
"step": 598
},
{
"epoch": 0.29035385361124577,
"grad_norm": 6.84635880527528,
"learning_rate": 9.660743134087238e-06,
"loss": 2.6920928955078125,
"step": 599
},
{
"epoch": 0.290838584585555,
"grad_norm": 8.745121504455017,
"learning_rate": 9.676898222940227e-06,
"loss": 2.691059112548828,
"step": 600
},
{
"epoch": 0.2913233155598643,
"grad_norm": 9.064944164238769,
"learning_rate": 9.693053311793216e-06,
"loss": 3.032911777496338,
"step": 601
},
{
"epoch": 0.2918080465341735,
"grad_norm": 14.699707444123556,
"learning_rate": 9.709208400646203e-06,
"loss": 2.4884867668151855,
"step": 602
},
{
"epoch": 0.2922927775084828,
"grad_norm": 10.863236501083309,
"learning_rate": 9.725363489499194e-06,
"loss": 3.2907872200012207,
"step": 603
},
{
"epoch": 0.29277750848279205,
"grad_norm": 8.446063068442388,
"learning_rate": 9.741518578352181e-06,
"loss": 2.4553451538085938,
"step": 604
},
{
"epoch": 0.2932622394571013,
"grad_norm": 14.677203178529009,
"learning_rate": 9.75767366720517e-06,
"loss": 3.2830653190612793,
"step": 605
},
{
"epoch": 0.29374697043141057,
"grad_norm": 20.97620289628613,
"learning_rate": 9.773828756058159e-06,
"loss": 2.687744140625,
"step": 606
},
{
"epoch": 0.2942317014057198,
"grad_norm": 15.615110658461212,
"learning_rate": 9.789983844911148e-06,
"loss": 3.00041127204895,
"step": 607
},
{
"epoch": 0.2947164323800291,
"grad_norm": 9.097400389535657,
"learning_rate": 9.806138933764137e-06,
"loss": 2.8493447303771973,
"step": 608
},
{
"epoch": 0.2952011633543383,
"grad_norm": 19.56823901327804,
"learning_rate": 9.822294022617124e-06,
"loss": 2.214118719100952,
"step": 609
},
{
"epoch": 0.2956858943286476,
"grad_norm": 7.720621987644497,
"learning_rate": 9.838449111470115e-06,
"loss": 1.8667590618133545,
"step": 610
},
{
"epoch": 0.29617062530295685,
"grad_norm": 20.02024663715265,
"learning_rate": 9.854604200323102e-06,
"loss": 2.5378341674804688,
"step": 611
},
{
"epoch": 0.29665535627726614,
"grad_norm": 9.860324680178577,
"learning_rate": 9.870759289176091e-06,
"loss": 2.353903293609619,
"step": 612
},
{
"epoch": 0.29714008725157537,
"grad_norm": 8.688639638334932,
"learning_rate": 9.88691437802908e-06,
"loss": 2.7170298099517822,
"step": 613
},
{
"epoch": 0.29762481822588466,
"grad_norm": 8.50526461410402,
"learning_rate": 9.903069466882069e-06,
"loss": 2.5609781742095947,
"step": 614
},
{
"epoch": 0.2981095492001939,
"grad_norm": 17.381565556073028,
"learning_rate": 9.919224555735058e-06,
"loss": 2.5710270404815674,
"step": 615
},
{
"epoch": 0.2985942801745031,
"grad_norm": 13.496579693880118,
"learning_rate": 9.935379644588045e-06,
"loss": 3.379662036895752,
"step": 616
},
{
"epoch": 0.2990790111488124,
"grad_norm": 12.231608668071722,
"learning_rate": 9.951534733441036e-06,
"loss": 2.882258892059326,
"step": 617
},
{
"epoch": 0.29956374212312165,
"grad_norm": 9.147049259855587,
"learning_rate": 9.967689822294023e-06,
"loss": 2.7139391899108887,
"step": 618
},
{
"epoch": 0.30004847309743093,
"grad_norm": 22.23495790990954,
"learning_rate": 9.983844911147012e-06,
"loss": 2.8503494262695312,
"step": 619
},
{
"epoch": 0.30053320407174017,
"grad_norm": 9.828121138692163,
"learning_rate": 1e-05,
"loss": 2.7309300899505615,
"step": 620
},
{
"epoch": 0.30101793504604946,
"grad_norm": 12.260740716341134,
"learning_rate": 9.999999204702986e-06,
"loss": 2.670846462249756,
"step": 621
},
{
"epoch": 0.3015026660203587,
"grad_norm": 6.751906393305764,
"learning_rate": 9.999996818812196e-06,
"loss": 2.921444892883301,
"step": 622
},
{
"epoch": 0.301987396994668,
"grad_norm": 8.747467977786895,
"learning_rate": 9.999992842328388e-06,
"loss": 3.1028003692626953,
"step": 623
},
{
"epoch": 0.3024721279689772,
"grad_norm": 8.47629697025785,
"learning_rate": 9.999987275252826e-06,
"loss": 2.22802734375,
"step": 624
},
{
"epoch": 0.3029568589432865,
"grad_norm": 13.714557604437413,
"learning_rate": 9.999980117587285e-06,
"loss": 3.135064125061035,
"step": 625
},
{
"epoch": 0.30344158991759573,
"grad_norm": 17.739533437070303,
"learning_rate": 9.999971369334039e-06,
"loss": 3.171187400817871,
"step": 626
},
{
"epoch": 0.30392632089190497,
"grad_norm": 9.331249324864066,
"learning_rate": 9.999961030495872e-06,
"loss": 2.6114511489868164,
"step": 627
},
{
"epoch": 0.30441105186621426,
"grad_norm": 12.17837529007698,
"learning_rate": 9.999949101076074e-06,
"loss": 2.145001173019409,
"step": 628
},
{
"epoch": 0.3048957828405235,
"grad_norm": 10.227361078376676,
"learning_rate": 9.999935581078437e-06,
"loss": 2.6250903606414795,
"step": 629
},
{
"epoch": 0.3053805138148328,
"grad_norm": 14.451796968207363,
"learning_rate": 9.999920470507263e-06,
"loss": 3.285691738128662,
"step": 630
},
{
"epoch": 0.305865244789142,
"grad_norm": 7.482055721278114,
"learning_rate": 9.99990376936736e-06,
"loss": 2.8316941261291504,
"step": 631
},
{
"epoch": 0.3063499757634513,
"grad_norm": 19.745139640477202,
"learning_rate": 9.999885477664042e-06,
"loss": 3.228722095489502,
"step": 632
},
{
"epoch": 0.30683470673776053,
"grad_norm": 8.86185865644675,
"learning_rate": 9.999865595403126e-06,
"loss": 2.8015646934509277,
"step": 633
},
{
"epoch": 0.3073194377120698,
"grad_norm": 10.182680710365217,
"learning_rate": 9.999844122590937e-06,
"loss": 2.5243935585021973,
"step": 634
},
{
"epoch": 0.30780416868637905,
"grad_norm": 10.80885414848839,
"learning_rate": 9.999821059234308e-06,
"loss": 2.48337984085083,
"step": 635
},
{
"epoch": 0.30828889966068834,
"grad_norm": 12.115101483044057,
"learning_rate": 9.999796405340572e-06,
"loss": 3.2530620098114014,
"step": 636
},
{
"epoch": 0.3087736306349976,
"grad_norm": 16.53151337252489,
"learning_rate": 9.999770160917573e-06,
"loss": 2.501361131668091,
"step": 637
},
{
"epoch": 0.3092583616093068,
"grad_norm": 13.388829933088832,
"learning_rate": 9.999742325973662e-06,
"loss": 3.0092897415161133,
"step": 638
},
{
"epoch": 0.3097430925836161,
"grad_norm": 15.936454835703657,
"learning_rate": 9.999712900517694e-06,
"loss": 3.056016683578491,
"step": 639
},
{
"epoch": 0.31022782355792533,
"grad_norm": 10.252108509714137,
"learning_rate": 9.999681884559027e-06,
"loss": 2.442275047302246,
"step": 640
},
{
"epoch": 0.3107125545322346,
"grad_norm": 11.4342727402994,
"learning_rate": 9.999649278107531e-06,
"loss": 2.7167398929595947,
"step": 641
},
{
"epoch": 0.31119728550654385,
"grad_norm": 14.373808628825653,
"learning_rate": 9.999615081173576e-06,
"loss": 2.4128470420837402,
"step": 642
},
{
"epoch": 0.31168201648085314,
"grad_norm": 17.06352856828186,
"learning_rate": 9.999579293768042e-06,
"loss": 3.646886110305786,
"step": 643
},
{
"epoch": 0.3121667474551624,
"grad_norm": 24.831499930706403,
"learning_rate": 9.999541915902313e-06,
"loss": 2.5089776515960693,
"step": 644
},
{
"epoch": 0.31265147842947166,
"grad_norm": 15.088930613790332,
"learning_rate": 9.999502947588279e-06,
"loss": 2.709338665008545,
"step": 645
},
{
"epoch": 0.3131362094037809,
"grad_norm": 7.535134095424508,
"learning_rate": 9.999462388838339e-06,
"loss": 2.8010928630828857,
"step": 646
},
{
"epoch": 0.3136209403780902,
"grad_norm": 10.383630204348195,
"learning_rate": 9.999420239665393e-06,
"loss": 2.990818738937378,
"step": 647
},
{
"epoch": 0.3141056713523994,
"grad_norm": 6.5086245541791286,
"learning_rate": 9.999376500082852e-06,
"loss": 2.7192511558532715,
"step": 648
},
{
"epoch": 0.31459040232670865,
"grad_norm": 13.358638167257467,
"learning_rate": 9.999331170104628e-06,
"loss": 2.513715982437134,
"step": 649
},
{
"epoch": 0.31507513330101794,
"grad_norm": 23.47142171100438,
"learning_rate": 9.999284249745143e-06,
"loss": 2.777709722518921,
"step": 650
},
{
"epoch": 0.3155598642753272,
"grad_norm": 25.526091409629558,
"learning_rate": 9.99923573901932e-06,
"loss": 3.1815686225891113,
"step": 651
},
{
"epoch": 0.31604459524963646,
"grad_norm": 15.960638307909411,
"learning_rate": 9.999185637942595e-06,
"loss": 2.186250686645508,
"step": 652
},
{
"epoch": 0.3165293262239457,
"grad_norm": 15.532884816279603,
"learning_rate": 9.999133946530905e-06,
"loss": 2.3726680278778076,
"step": 653
},
{
"epoch": 0.317014057198255,
"grad_norm": 13.315462728051703,
"learning_rate": 9.999080664800692e-06,
"loss": 2.8985157012939453,
"step": 654
},
{
"epoch": 0.3174987881725642,
"grad_norm": 10.850411499188183,
"learning_rate": 9.99902579276891e-06,
"loss": 2.851388454437256,
"step": 655
},
{
"epoch": 0.3179835191468735,
"grad_norm": 7.955447458463231,
"learning_rate": 9.998969330453012e-06,
"loss": 2.4516022205352783,
"step": 656
},
{
"epoch": 0.31846825012118274,
"grad_norm": 9.110993721566086,
"learning_rate": 9.99891127787096e-06,
"loss": 2.838332176208496,
"step": 657
},
{
"epoch": 0.318952981095492,
"grad_norm": 7.762468563705271,
"learning_rate": 9.99885163504122e-06,
"loss": 2.653883934020996,
"step": 658
},
{
"epoch": 0.31943771206980126,
"grad_norm": 41.62966945163742,
"learning_rate": 9.998790401982768e-06,
"loss": 2.4844813346862793,
"step": 659
},
{
"epoch": 0.3199224430441105,
"grad_norm": 15.148214198807628,
"learning_rate": 9.998727578715083e-06,
"loss": 3.15179443359375,
"step": 660
},
{
"epoch": 0.3204071740184198,
"grad_norm": 7.421656844856917,
"learning_rate": 9.99866316525815e-06,
"loss": 2.7141027450561523,
"step": 661
},
{
"epoch": 0.320891904992729,
"grad_norm": 8.389262471627141,
"learning_rate": 9.99859716163246e-06,
"loss": 2.700939178466797,
"step": 662
},
{
"epoch": 0.3213766359670383,
"grad_norm": 17.619554372193434,
"learning_rate": 9.99852956785901e-06,
"loss": 2.5625526905059814,
"step": 663
},
{
"epoch": 0.32186136694134754,
"grad_norm": 7.1321212850298785,
"learning_rate": 9.998460383959303e-06,
"loss": 3.0957679748535156,
"step": 664
},
{
"epoch": 0.3223460979156568,
"grad_norm": 9.57186612445119,
"learning_rate": 9.998389609955348e-06,
"loss": 2.4932756423950195,
"step": 665
},
{
"epoch": 0.32283082888996606,
"grad_norm": 12.343177145558466,
"learning_rate": 9.998317245869658e-06,
"loss": 3.2262141704559326,
"step": 666
},
{
"epoch": 0.32331555986427535,
"grad_norm": 14.715539347983599,
"learning_rate": 9.998243291725257e-06,
"loss": 2.7957613468170166,
"step": 667
},
{
"epoch": 0.3238002908385846,
"grad_norm": 13.236848291359275,
"learning_rate": 9.998167747545667e-06,
"loss": 2.6855316162109375,
"step": 668
},
{
"epoch": 0.32428502181289387,
"grad_norm": 7.765066296209513,
"learning_rate": 9.99809061335492e-06,
"loss": 2.8205742835998535,
"step": 669
},
{
"epoch": 0.3247697527872031,
"grad_norm": 23.166535544374998,
"learning_rate": 9.998011889177558e-06,
"loss": 2.5017223358154297,
"step": 670
},
{
"epoch": 0.32525448376151234,
"grad_norm": 8.54577120123754,
"learning_rate": 9.99793157503862e-06,
"loss": 2.772392749786377,
"step": 671
},
{
"epoch": 0.3257392147358216,
"grad_norm": 7.682566488292613,
"learning_rate": 9.99784967096366e-06,
"loss": 2.8288636207580566,
"step": 672
},
{
"epoch": 0.32622394571013086,
"grad_norm": 11.20796967126013,
"learning_rate": 9.99776617697873e-06,
"loss": 2.674302101135254,
"step": 673
},
{
"epoch": 0.32670867668444015,
"grad_norm": 12.109335239049603,
"learning_rate": 9.997681093110392e-06,
"loss": 2.962531089782715,
"step": 674
},
{
"epoch": 0.3271934076587494,
"grad_norm": 18.66388438117223,
"learning_rate": 9.997594419385712e-06,
"loss": 2.788407802581787,
"step": 675
},
{
"epoch": 0.32767813863305867,
"grad_norm": 20.245620349169513,
"learning_rate": 9.997506155832263e-06,
"loss": 3.160149574279785,
"step": 676
},
{
"epoch": 0.3281628696073679,
"grad_norm": 15.999338375712332,
"learning_rate": 9.997416302478126e-06,
"loss": 2.868262767791748,
"step": 677
},
{
"epoch": 0.3286476005816772,
"grad_norm": 13.594944483524483,
"learning_rate": 9.99732485935188e-06,
"loss": 3.368138313293457,
"step": 678
},
{
"epoch": 0.3291323315559864,
"grad_norm": 7.316347610464814,
"learning_rate": 9.997231826482618e-06,
"loss": 2.6404144763946533,
"step": 679
},
{
"epoch": 0.3296170625302957,
"grad_norm": 9.032744957022963,
"learning_rate": 9.997137203899935e-06,
"loss": 2.5584897994995117,
"step": 680
},
{
"epoch": 0.33010179350460495,
"grad_norm": 14.516297394020462,
"learning_rate": 9.997040991633933e-06,
"loss": 3.285829544067383,
"step": 681
},
{
"epoch": 0.3305865244789142,
"grad_norm": 18.40328496830814,
"learning_rate": 9.996943189715216e-06,
"loss": 3.060077667236328,
"step": 682
},
{
"epoch": 0.33107125545322347,
"grad_norm": 8.359074399863477,
"learning_rate": 9.996843798174901e-06,
"loss": 2.499845266342163,
"step": 683
},
{
"epoch": 0.3315559864275327,
"grad_norm": 7.502818577242123,
"learning_rate": 9.996742817044603e-06,
"loss": 2.7958526611328125,
"step": 684
},
{
"epoch": 0.332040717401842,
"grad_norm": 19.65466581712014,
"learning_rate": 9.996640246356446e-06,
"loss": 2.839144229888916,
"step": 685
},
{
"epoch": 0.3325254483761512,
"grad_norm": 10.457571749744073,
"learning_rate": 9.996536086143061e-06,
"loss": 2.8158533573150635,
"step": 686
},
{
"epoch": 0.3330101793504605,
"grad_norm": 10.761210646325297,
"learning_rate": 9.996430336437582e-06,
"loss": 2.817018985748291,
"step": 687
},
{
"epoch": 0.33349491032476974,
"grad_norm": 8.430580979770134,
"learning_rate": 9.996322997273652e-06,
"loss": 2.4647176265716553,
"step": 688
},
{
"epoch": 0.33397964129907903,
"grad_norm": 11.086868733438022,
"learning_rate": 9.996214068685416e-06,
"loss": 2.7634971141815186,
"step": 689
},
{
"epoch": 0.33446437227338827,
"grad_norm": 13.491725766519021,
"learning_rate": 9.996103550707528e-06,
"loss": 2.4872968196868896,
"step": 690
},
{
"epoch": 0.33494910324769755,
"grad_norm": 15.105527566601912,
"learning_rate": 9.995991443375143e-06,
"loss": 3.3228759765625,
"step": 691
},
{
"epoch": 0.3354338342220068,
"grad_norm": 31.92218748542898,
"learning_rate": 9.995877746723928e-06,
"loss": 2.5436038970947266,
"step": 692
},
{
"epoch": 0.335918565196316,
"grad_norm": 13.395975811592427,
"learning_rate": 9.995762460790048e-06,
"loss": 2.941404342651367,
"step": 693
},
{
"epoch": 0.3364032961706253,
"grad_norm": 11.372931285412914,
"learning_rate": 9.995645585610182e-06,
"loss": 2.1275315284729004,
"step": 694
},
{
"epoch": 0.33688802714493454,
"grad_norm": 16.40656428126764,
"learning_rate": 9.995527121221504e-06,
"loss": 3.1295480728149414,
"step": 695
},
{
"epoch": 0.33737275811924383,
"grad_norm": 9.208166347153867,
"learning_rate": 9.995407067661707e-06,
"loss": 2.825859546661377,
"step": 696
},
{
"epoch": 0.33785748909355307,
"grad_norm": 26.144926925562075,
"learning_rate": 9.995285424968977e-06,
"loss": 2.2747159004211426,
"step": 697
},
{
"epoch": 0.33834222006786235,
"grad_norm": 12.388511007540066,
"learning_rate": 9.995162193182015e-06,
"loss": 2.8758692741394043,
"step": 698
},
{
"epoch": 0.3388269510421716,
"grad_norm": 15.431266193913823,
"learning_rate": 9.99503737234002e-06,
"loss": 3.138793706893921,
"step": 699
},
{
"epoch": 0.3393116820164809,
"grad_norm": 6.510317622035266,
"learning_rate": 9.9949109624827e-06,
"loss": 3.006511688232422,
"step": 700
},
{
"epoch": 0.3397964129907901,
"grad_norm": 8.705628877984175,
"learning_rate": 9.99478296365027e-06,
"loss": 2.1318817138671875,
"step": 701
},
{
"epoch": 0.3402811439650994,
"grad_norm": 19.295042054482156,
"learning_rate": 9.994653375883448e-06,
"loss": 3.1855883598327637,
"step": 702
},
{
"epoch": 0.34076587493940863,
"grad_norm": 18.882894072648934,
"learning_rate": 9.99452219922346e-06,
"loss": 3.116706132888794,
"step": 703
},
{
"epoch": 0.34125060591371786,
"grad_norm": 37.820225290666365,
"learning_rate": 9.994389433712032e-06,
"loss": 4.677980899810791,
"step": 704
},
{
"epoch": 0.34173533688802715,
"grad_norm": 25.760852198464256,
"learning_rate": 9.994255079391402e-06,
"loss": 4.274224281311035,
"step": 705
},
{
"epoch": 0.3422200678623364,
"grad_norm": 12.075281316184196,
"learning_rate": 9.99411913630431e-06,
"loss": 3.0805575847625732,
"step": 706
},
{
"epoch": 0.3427047988366457,
"grad_norm": 14.295547474570395,
"learning_rate": 9.993981604494003e-06,
"loss": 2.807041883468628,
"step": 707
},
{
"epoch": 0.3431895298109549,
"grad_norm": 10.687996221506241,
"learning_rate": 9.99384248400423e-06,
"loss": 3.1927270889282227,
"step": 708
},
{
"epoch": 0.3436742607852642,
"grad_norm": 18.78772703430284,
"learning_rate": 9.993701774879252e-06,
"loss": 2.9225175380706787,
"step": 709
},
{
"epoch": 0.34415899175957343,
"grad_norm": 10.374467283105126,
"learning_rate": 9.993559477163827e-06,
"loss": 2.4934635162353516,
"step": 710
},
{
"epoch": 0.3446437227338827,
"grad_norm": 13.587070110834002,
"learning_rate": 9.993415590903224e-06,
"loss": 2.622765302658081,
"step": 711
},
{
"epoch": 0.34512845370819195,
"grad_norm": 20.420629256992772,
"learning_rate": 9.993270116143217e-06,
"loss": 2.4725875854492188,
"step": 712
},
{
"epoch": 0.3456131846825012,
"grad_norm": 10.900176740933942,
"learning_rate": 9.993123052930083e-06,
"loss": 2.2463479042053223,
"step": 713
},
{
"epoch": 0.3460979156568105,
"grad_norm": 20.149690685580605,
"learning_rate": 9.992974401310605e-06,
"loss": 3.01509428024292,
"step": 714
},
{
"epoch": 0.3465826466311197,
"grad_norm": 14.591730564683237,
"learning_rate": 9.992824161332073e-06,
"loss": 3.165428638458252,
"step": 715
},
{
"epoch": 0.347067377605429,
"grad_norm": 15.900310744209849,
"learning_rate": 9.992672333042281e-06,
"loss": 2.8884077072143555,
"step": 716
},
{
"epoch": 0.34755210857973823,
"grad_norm": 11.83170309761493,
"learning_rate": 9.992518916489531e-06,
"loss": 2.9176363945007324,
"step": 717
},
{
"epoch": 0.3480368395540475,
"grad_norm": 22.111583525829165,
"learning_rate": 9.992363911722622e-06,
"loss": 2.871831178665161,
"step": 718
},
{
"epoch": 0.34852157052835675,
"grad_norm": 9.789917145570021,
"learning_rate": 9.992207318790868e-06,
"loss": 2.7866430282592773,
"step": 719
},
{
"epoch": 0.34900630150266604,
"grad_norm": 26.484649043352412,
"learning_rate": 9.992049137744084e-06,
"loss": 2.6336092948913574,
"step": 720
},
{
"epoch": 0.34949103247697527,
"grad_norm": 11.430221599078621,
"learning_rate": 9.99188936863259e-06,
"loss": 1.943892478942871,
"step": 721
},
{
"epoch": 0.34997576345128456,
"grad_norm": 14.477054180681316,
"learning_rate": 9.99172801150721e-06,
"loss": 2.7128067016601562,
"step": 722
},
{
"epoch": 0.3504604944255938,
"grad_norm": 13.418821570231168,
"learning_rate": 9.991565066419276e-06,
"loss": 2.7818965911865234,
"step": 723
},
{
"epoch": 0.350945225399903,
"grad_norm": 8.38091474278535,
"learning_rate": 9.991400533420624e-06,
"loss": 2.665830135345459,
"step": 724
},
{
"epoch": 0.3514299563742123,
"grad_norm": 10.045037972531208,
"learning_rate": 9.991234412563594e-06,
"loss": 2.653308153152466,
"step": 725
},
{
"epoch": 0.35191468734852155,
"grad_norm": 12.718864692254323,
"learning_rate": 9.991066703901034e-06,
"loss": 3.412222385406494,
"step": 726
},
{
"epoch": 0.35239941832283084,
"grad_norm": 10.366314039608465,
"learning_rate": 9.990897407486293e-06,
"loss": 2.7911813259124756,
"step": 727
},
{
"epoch": 0.35288414929714007,
"grad_norm": 8.984961793180673,
"learning_rate": 9.99072652337323e-06,
"loss": 2.4562792778015137,
"step": 728
},
{
"epoch": 0.35336888027144936,
"grad_norm": 15.954807808396774,
"learning_rate": 9.990554051616205e-06,
"loss": 3.1246654987335205,
"step": 729
},
{
"epoch": 0.3538536112457586,
"grad_norm": 17.59120491745722,
"learning_rate": 9.990379992270084e-06,
"loss": 2.4972896575927734,
"step": 730
},
{
"epoch": 0.3543383422200679,
"grad_norm": 7.55668875076074,
"learning_rate": 9.99020434539024e-06,
"loss": 3.3529186248779297,
"step": 731
},
{
"epoch": 0.3548230731943771,
"grad_norm": 11.463745241829738,
"learning_rate": 9.990027111032548e-06,
"loss": 2.7614083290100098,
"step": 732
},
{
"epoch": 0.3553078041686864,
"grad_norm": 19.593624212754158,
"learning_rate": 9.98984828925339e-06,
"loss": 3.479376792907715,
"step": 733
},
{
"epoch": 0.35579253514299564,
"grad_norm": 12.103140403261358,
"learning_rate": 9.989667880109653e-06,
"loss": 2.6250128746032715,
"step": 734
},
{
"epoch": 0.35627726611730487,
"grad_norm": 20.7999140812316,
"learning_rate": 9.989485883658729e-06,
"loss": 2.908766269683838,
"step": 735
},
{
"epoch": 0.35676199709161416,
"grad_norm": 7.1793083359960415,
"learning_rate": 9.989302299958514e-06,
"loss": 2.898524761199951,
"step": 736
},
{
"epoch": 0.3572467280659234,
"grad_norm": 9.73964056610203,
"learning_rate": 9.98911712906741e-06,
"loss": 2.855175495147705,
"step": 737
},
{
"epoch": 0.3577314590402327,
"grad_norm": 6.98264701218196,
"learning_rate": 9.988930371044321e-06,
"loss": 3.06706166267395,
"step": 738
},
{
"epoch": 0.3582161900145419,
"grad_norm": 10.521198082884736,
"learning_rate": 9.988742025948661e-06,
"loss": 2.754708766937256,
"step": 739
},
{
"epoch": 0.3587009209888512,
"grad_norm": 7.329937320042151,
"learning_rate": 9.988552093840344e-06,
"loss": 2.797355890274048,
"step": 740
},
{
"epoch": 0.35918565196316043,
"grad_norm": 30.781183553898863,
"learning_rate": 9.988360574779792e-06,
"loss": 2.861785888671875,
"step": 741
},
{
"epoch": 0.3596703829374697,
"grad_norm": 13.338257936626617,
"learning_rate": 9.988167468827932e-06,
"loss": 3.2163774967193604,
"step": 742
},
{
"epoch": 0.36015511391177896,
"grad_norm": 9.734496909713723,
"learning_rate": 9.987972776046192e-06,
"loss": 2.755861282348633,
"step": 743
},
{
"epoch": 0.36063984488608825,
"grad_norm": 6.889224423175338,
"learning_rate": 9.987776496496511e-06,
"loss": 2.3172495365142822,
"step": 744
},
{
"epoch": 0.3611245758603975,
"grad_norm": 25.41407706464948,
"learning_rate": 9.987578630241326e-06,
"loss": 3.1131293773651123,
"step": 745
},
{
"epoch": 0.3616093068347067,
"grad_norm": 10.677955486843599,
"learning_rate": 9.987379177343584e-06,
"loss": 2.6914126873016357,
"step": 746
},
{
"epoch": 0.362094037809016,
"grad_norm": 15.673217238208752,
"learning_rate": 9.987178137866731e-06,
"loss": 2.9465999603271484,
"step": 747
},
{
"epoch": 0.36257876878332523,
"grad_norm": 12.649363677441324,
"learning_rate": 9.986975511874727e-06,
"loss": 2.845003128051758,
"step": 748
},
{
"epoch": 0.3630634997576345,
"grad_norm": 12.178396678194826,
"learning_rate": 9.986771299432028e-06,
"loss": 3.3292555809020996,
"step": 749
},
{
"epoch": 0.36354823073194376,
"grad_norm": 24.28539932970293,
"learning_rate": 9.986565500603598e-06,
"loss": 3.0570216178894043,
"step": 750
},
{
"epoch": 0.36403296170625304,
"grad_norm": 12.92425092168703,
"learning_rate": 9.986358115454904e-06,
"loss": 3.309410572052002,
"step": 751
},
{
"epoch": 0.3645176926805623,
"grad_norm": 15.978603275993946,
"learning_rate": 9.986149144051922e-06,
"loss": 3.30885648727417,
"step": 752
},
{
"epoch": 0.36500242365487157,
"grad_norm": 24.634105171371885,
"learning_rate": 9.98593858646113e-06,
"loss": 2.1120412349700928,
"step": 753
},
{
"epoch": 0.3654871546291808,
"grad_norm": 17.337985355053544,
"learning_rate": 9.985726442749506e-06,
"loss": 2.8474154472351074,
"step": 754
},
{
"epoch": 0.3659718856034901,
"grad_norm": 16.720051042204737,
"learning_rate": 9.985512712984543e-06,
"loss": 3.208766222000122,
"step": 755
},
{
"epoch": 0.3664566165777993,
"grad_norm": 10.618559167425476,
"learning_rate": 9.985297397234226e-06,
"loss": 2.4884374141693115,
"step": 756
},
{
"epoch": 0.36694134755210855,
"grad_norm": 8.24970123492983,
"learning_rate": 9.985080495567057e-06,
"loss": 2.7054331302642822,
"step": 757
},
{
"epoch": 0.36742607852641784,
"grad_norm": 8.89588790164145,
"learning_rate": 9.984862008052032e-06,
"loss": 2.5597891807556152,
"step": 758
},
{
"epoch": 0.3679108095007271,
"grad_norm": 8.247318791981902,
"learning_rate": 9.984641934758659e-06,
"loss": 2.904324531555176,
"step": 759
},
{
"epoch": 0.36839554047503636,
"grad_norm": 7.592881033814577,
"learning_rate": 9.984420275756945e-06,
"loss": 2.8177924156188965,
"step": 760
},
{
"epoch": 0.3688802714493456,
"grad_norm": 13.727909229380197,
"learning_rate": 9.984197031117404e-06,
"loss": 3.0664210319519043,
"step": 761
},
{
"epoch": 0.3693650024236549,
"grad_norm": 20.96544280200233,
"learning_rate": 9.983972200911058e-06,
"loss": 2.62821102142334,
"step": 762
},
{
"epoch": 0.3698497333979641,
"grad_norm": 16.961075397396545,
"learning_rate": 9.983745785209426e-06,
"loss": 2.5816564559936523,
"step": 763
},
{
"epoch": 0.3703344643722734,
"grad_norm": 9.050186358719078,
"learning_rate": 9.983517784084535e-06,
"loss": 2.591451406478882,
"step": 764
},
{
"epoch": 0.37081919534658264,
"grad_norm": 15.093283020522257,
"learning_rate": 9.98328819760892e-06,
"loss": 2.492016077041626,
"step": 765
},
{
"epoch": 0.37130392632089193,
"grad_norm": 8.19103602786296,
"learning_rate": 9.983057025855611e-06,
"loss": 3.091909408569336,
"step": 766
},
{
"epoch": 0.37178865729520116,
"grad_norm": 19.929814655883945,
"learning_rate": 9.982824268898156e-06,
"loss": 3.1959097385406494,
"step": 767
},
{
"epoch": 0.3722733882695104,
"grad_norm": 14.500363547774946,
"learning_rate": 9.982589926810591e-06,
"loss": 2.3195278644561768,
"step": 768
},
{
"epoch": 0.3727581192438197,
"grad_norm": 18.804145083873195,
"learning_rate": 9.982353999667469e-06,
"loss": 2.3113183975219727,
"step": 769
},
{
"epoch": 0.3732428502181289,
"grad_norm": 12.650978622607019,
"learning_rate": 9.982116487543844e-06,
"loss": 3.2522385120391846,
"step": 770
},
{
"epoch": 0.3737275811924382,
"grad_norm": 10.349565366946237,
"learning_rate": 9.981877390515271e-06,
"loss": 2.557713031768799,
"step": 771
},
{
"epoch": 0.37421231216674744,
"grad_norm": 14.793733435059222,
"learning_rate": 9.981636708657811e-06,
"loss": 3.659187078475952,
"step": 772
},
{
"epoch": 0.37469704314105673,
"grad_norm": 6.46497457788762,
"learning_rate": 9.981394442048029e-06,
"loss": 2.658334732055664,
"step": 773
},
{
"epoch": 0.37518177411536596,
"grad_norm": 14.978661992938164,
"learning_rate": 9.981150590762999e-06,
"loss": 2.99147891998291,
"step": 774
},
{
"epoch": 0.37566650508967525,
"grad_norm": 8.566397106276742,
"learning_rate": 9.980905154880288e-06,
"loss": 2.846555709838867,
"step": 775
},
{
"epoch": 0.3761512360639845,
"grad_norm": 14.436707286974379,
"learning_rate": 9.980658134477979e-06,
"loss": 2.9560694694519043,
"step": 776
},
{
"epoch": 0.3766359670382938,
"grad_norm": 10.454847034950456,
"learning_rate": 9.980409529634652e-06,
"loss": 2.599022626876831,
"step": 777
},
{
"epoch": 0.377120698012603,
"grad_norm": 9.58883037082888,
"learning_rate": 9.980159340429392e-06,
"loss": 2.766244888305664,
"step": 778
},
{
"epoch": 0.37760542898691224,
"grad_norm": 16.988704009672954,
"learning_rate": 9.97990756694179e-06,
"loss": 2.8693313598632812,
"step": 779
},
{
"epoch": 0.37809015996122153,
"grad_norm": 15.779326203685509,
"learning_rate": 9.979654209251939e-06,
"loss": 3.1817002296447754,
"step": 780
},
{
"epoch": 0.37857489093553076,
"grad_norm": 7.47064092965736,
"learning_rate": 9.979399267440436e-06,
"loss": 2.446748733520508,
"step": 781
},
{
"epoch": 0.37905962190984005,
"grad_norm": 9.748087349295412,
"learning_rate": 9.979142741588388e-06,
"loss": 2.9469456672668457,
"step": 782
},
{
"epoch": 0.3795443528841493,
"grad_norm": 14.041546008318983,
"learning_rate": 9.978884631777394e-06,
"loss": 3.0535736083984375,
"step": 783
},
{
"epoch": 0.38002908385845857,
"grad_norm": 13.968195626408669,
"learning_rate": 9.978624938089567e-06,
"loss": 2.715036153793335,
"step": 784
},
{
"epoch": 0.3805138148327678,
"grad_norm": 11.376572195368652,
"learning_rate": 9.978363660607522e-06,
"loss": 2.8412609100341797,
"step": 785
},
{
"epoch": 0.3809985458070771,
"grad_norm": 11.661442848968191,
"learning_rate": 9.978100799414371e-06,
"loss": 2.2916884422302246,
"step": 786
},
{
"epoch": 0.3814832767813863,
"grad_norm": 9.677442124172593,
"learning_rate": 9.977836354593741e-06,
"loss": 2.498358726501465,
"step": 787
},
{
"epoch": 0.3819680077556956,
"grad_norm": 12.300200681911294,
"learning_rate": 9.977570326229753e-06,
"loss": 2.706941843032837,
"step": 788
},
{
"epoch": 0.38245273873000485,
"grad_norm": 12.04789357001427,
"learning_rate": 9.977302714407038e-06,
"loss": 2.8039681911468506,
"step": 789
},
{
"epoch": 0.3829374697043141,
"grad_norm": 10.941875808030085,
"learning_rate": 9.977033519210725e-06,
"loss": 3.303619861602783,
"step": 790
},
{
"epoch": 0.38342220067862337,
"grad_norm": 6.755114410718181,
"learning_rate": 9.976762740726453e-06,
"loss": 2.9911322593688965,
"step": 791
},
{
"epoch": 0.3839069316529326,
"grad_norm": 8.176042232174456,
"learning_rate": 9.976490379040362e-06,
"loss": 1.9212416410446167,
"step": 792
},
{
"epoch": 0.3843916626272419,
"grad_norm": 21.498601899062606,
"learning_rate": 9.976216434239094e-06,
"loss": 2.8068642616271973,
"step": 793
},
{
"epoch": 0.3848763936015511,
"grad_norm": 13.048947242848914,
"learning_rate": 9.975940906409796e-06,
"loss": 2.488760471343994,
"step": 794
},
{
"epoch": 0.3853611245758604,
"grad_norm": 10.434208836951724,
"learning_rate": 9.975663795640118e-06,
"loss": 2.7397751808166504,
"step": 795
},
{
"epoch": 0.38584585555016965,
"grad_norm": 12.176348053473065,
"learning_rate": 9.975385102018217e-06,
"loss": 2.893411636352539,
"step": 796
},
{
"epoch": 0.38633058652447894,
"grad_norm": 12.107152039436476,
"learning_rate": 9.975104825632746e-06,
"loss": 3.171610116958618,
"step": 797
},
{
"epoch": 0.38681531749878817,
"grad_norm": 10.94710234835201,
"learning_rate": 9.974822966572872e-06,
"loss": 2.724651336669922,
"step": 798
},
{
"epoch": 0.38730004847309746,
"grad_norm": 16.287702037143056,
"learning_rate": 9.974539524928255e-06,
"loss": 2.9989566802978516,
"step": 799
},
{
"epoch": 0.3877847794474067,
"grad_norm": 11.720772227178934,
"learning_rate": 9.974254500789065e-06,
"loss": 2.647449016571045,
"step": 800
},
{
"epoch": 0.3882695104217159,
"grad_norm": 8.088210269080143,
"learning_rate": 9.973967894245972e-06,
"loss": 2.7603378295898438,
"step": 801
},
{
"epoch": 0.3887542413960252,
"grad_norm": 12.69473862767583,
"learning_rate": 9.973679705390153e-06,
"loss": 2.4153244495391846,
"step": 802
},
{
"epoch": 0.38923897237033445,
"grad_norm": 18.083544982202866,
"learning_rate": 9.973389934313287e-06,
"loss": 3.293628215789795,
"step": 803
},
{
"epoch": 0.38972370334464373,
"grad_norm": 9.193464631967416,
"learning_rate": 9.973098581107554e-06,
"loss": 2.8635215759277344,
"step": 804
},
{
"epoch": 0.39020843431895297,
"grad_norm": 17.88922836050367,
"learning_rate": 9.972805645865637e-06,
"loss": 2.9886934757232666,
"step": 805
},
{
"epoch": 0.39069316529326226,
"grad_norm": 54.19597998127307,
"learning_rate": 9.972511128680727e-06,
"loss": 2.9712276458740234,
"step": 806
},
{
"epoch": 0.3911778962675715,
"grad_norm": 18.537005358875827,
"learning_rate": 9.972215029646514e-06,
"loss": 2.903895139694214,
"step": 807
},
{
"epoch": 0.3916626272418808,
"grad_norm": 7.2093534725467645,
"learning_rate": 9.971917348857194e-06,
"loss": 2.698345422744751,
"step": 808
},
{
"epoch": 0.39214735821619,
"grad_norm": 8.067277736832471,
"learning_rate": 9.971618086407464e-06,
"loss": 2.5387744903564453,
"step": 809
},
{
"epoch": 0.3926320891904993,
"grad_norm": 7.308023870660365,
"learning_rate": 9.971317242392527e-06,
"loss": 2.55743408203125,
"step": 810
},
{
"epoch": 0.39311682016480853,
"grad_norm": 12.872447369612788,
"learning_rate": 9.971014816908085e-06,
"loss": 2.4330544471740723,
"step": 811
},
{
"epoch": 0.39360155113911777,
"grad_norm": 8.682795666363997,
"learning_rate": 9.970710810050344e-06,
"loss": 2.929043769836426,
"step": 812
},
{
"epoch": 0.39408628211342706,
"grad_norm": 15.486072968622553,
"learning_rate": 9.970405221916016e-06,
"loss": 2.617649555206299,
"step": 813
},
{
"epoch": 0.3945710130877363,
"grad_norm": 20.382992748765897,
"learning_rate": 9.970098052602316e-06,
"loss": 3.1482324600219727,
"step": 814
},
{
"epoch": 0.3950557440620456,
"grad_norm": 8.78698567219078,
"learning_rate": 9.969789302206957e-06,
"loss": 2.4106903076171875,
"step": 815
},
{
"epoch": 0.3955404750363548,
"grad_norm": 7.176010742407377,
"learning_rate": 9.969478970828159e-06,
"loss": 2.3184237480163574,
"step": 816
},
{
"epoch": 0.3960252060106641,
"grad_norm": 7.146720417367874,
"learning_rate": 9.969167058564646e-06,
"loss": 2.7002248764038086,
"step": 817
},
{
"epoch": 0.39650993698497333,
"grad_norm": 17.890605021073014,
"learning_rate": 9.968853565515644e-06,
"loss": 3.12306547164917,
"step": 818
},
{
"epoch": 0.3969946679592826,
"grad_norm": 23.07156608140979,
"learning_rate": 9.968538491780877e-06,
"loss": 2.127638816833496,
"step": 819
},
{
"epoch": 0.39747939893359185,
"grad_norm": 13.205286896151009,
"learning_rate": 9.968221837460578e-06,
"loss": 1.755111575126648,
"step": 820
},
{
"epoch": 0.3979641299079011,
"grad_norm": 20.766178458113057,
"learning_rate": 9.967903602655485e-06,
"loss": 3.426455020904541,
"step": 821
},
{
"epoch": 0.3984488608822104,
"grad_norm": 19.92935640509457,
"learning_rate": 9.967583787466825e-06,
"loss": 2.898526668548584,
"step": 822
},
{
"epoch": 0.3989335918565196,
"grad_norm": 11.776828152985075,
"learning_rate": 9.967262391996346e-06,
"loss": 2.6761062145233154,
"step": 823
},
{
"epoch": 0.3994183228308289,
"grad_norm": 8.849034518286391,
"learning_rate": 9.966939416346287e-06,
"loss": 3.0885534286499023,
"step": 824
},
{
"epoch": 0.39990305380513813,
"grad_norm": 15.882411107323405,
"learning_rate": 9.96661486061939e-06,
"loss": 2.9527316093444824,
"step": 825
},
{
"epoch": 0.4003877847794474,
"grad_norm": 6.409332943738933,
"learning_rate": 9.966288724918904e-06,
"loss": 2.634197473526001,
"step": 826
},
{
"epoch": 0.40087251575375665,
"grad_norm": 8.659326332626073,
"learning_rate": 9.965961009348581e-06,
"loss": 2.062946081161499,
"step": 827
},
{
"epoch": 0.40135724672806594,
"grad_norm": 15.421691936107223,
"learning_rate": 9.965631714012672e-06,
"loss": 3.2153429985046387,
"step": 828
},
{
"epoch": 0.4018419777023752,
"grad_norm": 8.43305420278724,
"learning_rate": 9.96530083901593e-06,
"loss": 2.307030200958252,
"step": 829
},
{
"epoch": 0.40232670867668446,
"grad_norm": 11.917899753626758,
"learning_rate": 9.964968384463616e-06,
"loss": 2.8913068771362305,
"step": 830
},
{
"epoch": 0.4028114396509937,
"grad_norm": 14.91196355281247,
"learning_rate": 9.964634350461489e-06,
"loss": 3.018622398376465,
"step": 831
},
{
"epoch": 0.40329617062530293,
"grad_norm": 11.726447031489519,
"learning_rate": 9.96429873711581e-06,
"loss": 2.9379870891571045,
"step": 832
},
{
"epoch": 0.4037809015996122,
"grad_norm": 16.40303667532536,
"learning_rate": 9.963961544533345e-06,
"loss": 2.9584810733795166,
"step": 833
},
{
"epoch": 0.40426563257392145,
"grad_norm": 15.978114374132975,
"learning_rate": 9.963622772821363e-06,
"loss": 3.0638749599456787,
"step": 834
},
{
"epoch": 0.40475036354823074,
"grad_norm": 14.71999028410575,
"learning_rate": 9.963282422087628e-06,
"loss": 3.256152868270874,
"step": 835
},
{
"epoch": 0.40523509452254,
"grad_norm": 8.653764869726439,
"learning_rate": 9.962940492440419e-06,
"loss": 3.438579559326172,
"step": 836
},
{
"epoch": 0.40571982549684926,
"grad_norm": 14.226209087665225,
"learning_rate": 9.962596983988504e-06,
"loss": 3.0970964431762695,
"step": 837
},
{
"epoch": 0.4062045564711585,
"grad_norm": 39.166778871684535,
"learning_rate": 9.962251896841167e-06,
"loss": 2.3669538497924805,
"step": 838
},
{
"epoch": 0.4066892874454678,
"grad_norm": 12.99862535475374,
"learning_rate": 9.961905231108178e-06,
"loss": 2.783137559890747,
"step": 839
},
{
"epoch": 0.407174018419777,
"grad_norm": 14.290419684190756,
"learning_rate": 9.961556986899824e-06,
"loss": 3.281296491622925,
"step": 840
},
{
"epoch": 0.4076587493940863,
"grad_norm": 10.160672558651909,
"learning_rate": 9.961207164326887e-06,
"loss": 2.195772886276245,
"step": 841
},
{
"epoch": 0.40814348036839554,
"grad_norm": 11.860923498963412,
"learning_rate": 9.96085576350065e-06,
"loss": 3.0294415950775146,
"step": 842
},
{
"epoch": 0.4086282113427048,
"grad_norm": 11.512605827366885,
"learning_rate": 9.960502784532901e-06,
"loss": 3.2066032886505127,
"step": 843
},
{
"epoch": 0.40911294231701406,
"grad_norm": 10.349595551093355,
"learning_rate": 9.960148227535932e-06,
"loss": 2.4805502891540527,
"step": 844
},
{
"epoch": 0.4095976732913233,
"grad_norm": 19.59646634090976,
"learning_rate": 9.959792092622532e-06,
"loss": 2.77164363861084,
"step": 845
},
{
"epoch": 0.4100824042656326,
"grad_norm": 11.788129738782203,
"learning_rate": 9.959434379905993e-06,
"loss": 2.8068301677703857,
"step": 846
},
{
"epoch": 0.4105671352399418,
"grad_norm": 16.90276076771267,
"learning_rate": 9.95907508950011e-06,
"loss": 3.124781370162964,
"step": 847
},
{
"epoch": 0.4110518662142511,
"grad_norm": 22.888542793085794,
"learning_rate": 9.958714221519184e-06,
"loss": 2.9154090881347656,
"step": 848
},
{
"epoch": 0.41153659718856034,
"grad_norm": 17.101760572444793,
"learning_rate": 9.95835177607801e-06,
"loss": 3.137425422668457,
"step": 849
},
{
"epoch": 0.4120213281628696,
"grad_norm": 29.29600643964302,
"learning_rate": 9.95798775329189e-06,
"loss": 2.357113838195801,
"step": 850
},
{
"epoch": 0.41250605913717886,
"grad_norm": 15.757750173655678,
"learning_rate": 9.957622153276627e-06,
"loss": 2.753471851348877,
"step": 851
},
{
"epoch": 0.41299079011148815,
"grad_norm": 10.243482483127424,
"learning_rate": 9.957254976148525e-06,
"loss": 2.626065969467163,
"step": 852
},
{
"epoch": 0.4134755210857974,
"grad_norm": 18.185774573500364,
"learning_rate": 9.95688622202439e-06,
"loss": 2.322932720184326,
"step": 853
},
{
"epoch": 0.4139602520601066,
"grad_norm": 8.829039517348551,
"learning_rate": 9.956515891021528e-06,
"loss": 2.608257293701172,
"step": 854
},
{
"epoch": 0.4144449830344159,
"grad_norm": 19.507675360532417,
"learning_rate": 9.95614398325775e-06,
"loss": 2.8876819610595703,
"step": 855
},
{
"epoch": 0.41492971400872514,
"grad_norm": 10.389340490537881,
"learning_rate": 9.955770498851367e-06,
"loss": 2.5203845500946045,
"step": 856
},
{
"epoch": 0.4154144449830344,
"grad_norm": 15.951307030317214,
"learning_rate": 9.95539543792119e-06,
"loss": 3.5578508377075195,
"step": 857
},
{
"epoch": 0.41589917595734366,
"grad_norm": 8.741974441707496,
"learning_rate": 9.955018800586536e-06,
"loss": 2.8089959621429443,
"step": 858
},
{
"epoch": 0.41638390693165295,
"grad_norm": 24.80995348491372,
"learning_rate": 9.954640586967216e-06,
"loss": 2.576078414916992,
"step": 859
},
{
"epoch": 0.4168686379059622,
"grad_norm": 14.373543346721036,
"learning_rate": 9.95426079718355e-06,
"loss": 2.6374711990356445,
"step": 860
},
{
"epoch": 0.41735336888027147,
"grad_norm": 13.34935715472329,
"learning_rate": 9.953879431356355e-06,
"loss": 2.1702702045440674,
"step": 861
},
{
"epoch": 0.4178380998545807,
"grad_norm": 9.948514279029046,
"learning_rate": 9.95349648960695e-06,
"loss": 2.1966440677642822,
"step": 862
},
{
"epoch": 0.41832283082889,
"grad_norm": 11.446102750941174,
"learning_rate": 9.95311197205716e-06,
"loss": 2.2091026306152344,
"step": 863
},
{
"epoch": 0.4188075618031992,
"grad_norm": 18.32311923928352,
"learning_rate": 9.9527258788293e-06,
"loss": 2.1235604286193848,
"step": 864
},
{
"epoch": 0.41929229277750846,
"grad_norm": 9.7342662428761,
"learning_rate": 9.952338210046202e-06,
"loss": 2.954572916030884,
"step": 865
},
{
"epoch": 0.41977702375181775,
"grad_norm": 7.335287714001434,
"learning_rate": 9.951948965831183e-06,
"loss": 2.903658866882324,
"step": 866
},
{
"epoch": 0.420261754726127,
"grad_norm": 12.20581127601869,
"learning_rate": 9.951558146308075e-06,
"loss": 2.5692315101623535,
"step": 867
},
{
"epoch": 0.42074648570043627,
"grad_norm": 26.230338511048345,
"learning_rate": 9.9511657516012e-06,
"loss": 2.64137864112854,
"step": 868
},
{
"epoch": 0.4212312166747455,
"grad_norm": 16.89277215999552,
"learning_rate": 9.950771781835392e-06,
"loss": 2.53505277633667,
"step": 869
},
{
"epoch": 0.4217159476490548,
"grad_norm": 22.384486264770583,
"learning_rate": 9.950376237135974e-06,
"loss": 2.8084864616394043,
"step": 870
},
{
"epoch": 0.422200678623364,
"grad_norm": 8.617392376740655,
"learning_rate": 9.949979117628779e-06,
"loss": 2.9003732204437256,
"step": 871
},
{
"epoch": 0.4226854095976733,
"grad_norm": 11.172883277158649,
"learning_rate": 9.94958042344014e-06,
"loss": 2.6358532905578613,
"step": 872
},
{
"epoch": 0.42317014057198254,
"grad_norm": 9.856812135379304,
"learning_rate": 9.949180154696885e-06,
"loss": 2.607546329498291,
"step": 873
},
{
"epoch": 0.42365487154629183,
"grad_norm": 8.926631650777102,
"learning_rate": 9.94877831152635e-06,
"loss": 3.06667423248291,
"step": 874
},
{
"epoch": 0.42413960252060107,
"grad_norm": 9.483869056538449,
"learning_rate": 9.94837489405637e-06,
"loss": 3.0768628120422363,
"step": 875
},
{
"epoch": 0.4246243334949103,
"grad_norm": 7.0744381902737175,
"learning_rate": 9.947969902415275e-06,
"loss": 2.4557063579559326,
"step": 876
},
{
"epoch": 0.4251090644692196,
"grad_norm": 17.54966373306345,
"learning_rate": 9.947563336731905e-06,
"loss": 2.8779842853546143,
"step": 877
},
{
"epoch": 0.4255937954435288,
"grad_norm": 10.805778833403087,
"learning_rate": 9.947155197135594e-06,
"loss": 3.0794942378997803,
"step": 878
},
{
"epoch": 0.4260785264178381,
"grad_norm": 15.57789364406794,
"learning_rate": 9.94674548375618e-06,
"loss": 3.252434730529785,
"step": 879
},
{
"epoch": 0.42656325739214734,
"grad_norm": 7.334470517262432,
"learning_rate": 9.946334196724e-06,
"loss": 2.6487343311309814,
"step": 880
},
{
"epoch": 0.42704798836645663,
"grad_norm": 9.62666760110104,
"learning_rate": 9.94592133616989e-06,
"loss": 2.6679821014404297,
"step": 881
},
{
"epoch": 0.42753271934076587,
"grad_norm": 13.147046454789825,
"learning_rate": 9.945506902225192e-06,
"loss": 2.7069010734558105,
"step": 882
},
{
"epoch": 0.42801745031507515,
"grad_norm": 10.879842810583677,
"learning_rate": 9.945090895021743e-06,
"loss": 2.749770164489746,
"step": 883
},
{
"epoch": 0.4285021812893844,
"grad_norm": 16.48304930581014,
"learning_rate": 9.944673314691886e-06,
"loss": 2.6937592029571533,
"step": 884
},
{
"epoch": 0.4289869122636937,
"grad_norm": 9.100923657211393,
"learning_rate": 9.944254161368457e-06,
"loss": 2.539243221282959,
"step": 885
},
{
"epoch": 0.4294716432380029,
"grad_norm": 17.75487112217364,
"learning_rate": 9.9438334351848e-06,
"loss": 3.4809632301330566,
"step": 886
},
{
"epoch": 0.42995637421231214,
"grad_norm": 14.771486355668129,
"learning_rate": 9.943411136274752e-06,
"loss": 2.8317904472351074,
"step": 887
},
{
"epoch": 0.43044110518662143,
"grad_norm": 16.199227300559958,
"learning_rate": 9.942987264772657e-06,
"loss": 2.695528507232666,
"step": 888
},
{
"epoch": 0.43092583616093066,
"grad_norm": 7.209098661316944,
"learning_rate": 9.942561820813358e-06,
"loss": 2.7278075218200684,
"step": 889
},
{
"epoch": 0.43141056713523995,
"grad_norm": 13.264699003885877,
"learning_rate": 9.942134804532194e-06,
"loss": 2.5141401290893555,
"step": 890
},
{
"epoch": 0.4318952981095492,
"grad_norm": 10.923436133827286,
"learning_rate": 9.941706216065006e-06,
"loss": 2.888681650161743,
"step": 891
},
{
"epoch": 0.4323800290838585,
"grad_norm": 9.067959081035415,
"learning_rate": 9.94127605554814e-06,
"loss": 3.1626381874084473,
"step": 892
},
{
"epoch": 0.4328647600581677,
"grad_norm": 9.227951723337736,
"learning_rate": 9.940844323118434e-06,
"loss": 2.5171074867248535,
"step": 893
},
{
"epoch": 0.433349491032477,
"grad_norm": 21.64639390559997,
"learning_rate": 9.940411018913233e-06,
"loss": 3.5113587379455566,
"step": 894
},
{
"epoch": 0.43383422200678623,
"grad_norm": 7.442333712739556,
"learning_rate": 9.939976143070378e-06,
"loss": 2.8152475357055664,
"step": 895
},
{
"epoch": 0.4343189529810955,
"grad_norm": 7.447469181412787,
"learning_rate": 9.939539695728211e-06,
"loss": 2.674321413040161,
"step": 896
},
{
"epoch": 0.43480368395540475,
"grad_norm": 8.292665501852293,
"learning_rate": 9.939101677025575e-06,
"loss": 2.575413227081299,
"step": 897
},
{
"epoch": 0.435288414929714,
"grad_norm": 24.994392380486424,
"learning_rate": 9.938662087101814e-06,
"loss": 2.308053970336914,
"step": 898
},
{
"epoch": 0.4357731459040233,
"grad_norm": 9.055990926550926,
"learning_rate": 9.938220926096765e-06,
"loss": 2.8128561973571777,
"step": 899
},
{
"epoch": 0.4362578768783325,
"grad_norm": 12.779126817413456,
"learning_rate": 9.937778194150771e-06,
"loss": 2.9120326042175293,
"step": 900
},
{
"epoch": 0.4367426078526418,
"grad_norm": 16.98954417045018,
"learning_rate": 9.937333891404676e-06,
"loss": 2.555014133453369,
"step": 901
},
{
"epoch": 0.43722733882695103,
"grad_norm": 15.22285532600766,
"learning_rate": 9.936888017999818e-06,
"loss": 2.898951530456543,
"step": 902
},
{
"epoch": 0.4377120698012603,
"grad_norm": 8.458734362738733,
"learning_rate": 9.936440574078042e-06,
"loss": 2.9044172763824463,
"step": 903
},
{
"epoch": 0.43819680077556955,
"grad_norm": 10.867721449967458,
"learning_rate": 9.93599155978168e-06,
"loss": 2.603816032409668,
"step": 904
},
{
"epoch": 0.43868153174987884,
"grad_norm": 12.606908289324412,
"learning_rate": 9.935540975253582e-06,
"loss": 2.2739739418029785,
"step": 905
},
{
"epoch": 0.4391662627241881,
"grad_norm": 12.260779287075382,
"learning_rate": 9.935088820637083e-06,
"loss": 2.9081344604492188,
"step": 906
},
{
"epoch": 0.43965099369849736,
"grad_norm": 10.17947094293445,
"learning_rate": 9.93463509607602e-06,
"loss": 2.9883246421813965,
"step": 907
},
{
"epoch": 0.4401357246728066,
"grad_norm": 10.99061053715347,
"learning_rate": 9.934179801714733e-06,
"loss": 3.3566060066223145,
"step": 908
},
{
"epoch": 0.4406204556471158,
"grad_norm": 13.876407066647992,
"learning_rate": 9.93372293769806e-06,
"loss": 2.7122247219085693,
"step": 909
},
{
"epoch": 0.4411051866214251,
"grad_norm": 10.477353825310622,
"learning_rate": 9.933264504171337e-06,
"loss": 2.7516961097717285,
"step": 910
},
{
"epoch": 0.44158991759573435,
"grad_norm": 10.371433581018227,
"learning_rate": 9.932804501280402e-06,
"loss": 2.672307252883911,
"step": 911
},
{
"epoch": 0.44207464857004364,
"grad_norm": 14.491769091397384,
"learning_rate": 9.932342929171588e-06,
"loss": 3.213305950164795,
"step": 912
},
{
"epoch": 0.44255937954435287,
"grad_norm": 6.156825663685206,
"learning_rate": 9.931879787991733e-06,
"loss": 2.4944980144500732,
"step": 913
},
{
"epoch": 0.44304411051866216,
"grad_norm": 7.640905016241382,
"learning_rate": 9.93141507788817e-06,
"loss": 1.6737315654754639,
"step": 914
},
{
"epoch": 0.4435288414929714,
"grad_norm": 11.113388576666262,
"learning_rate": 9.930948799008728e-06,
"loss": 2.4887936115264893,
"step": 915
},
{
"epoch": 0.4440135724672807,
"grad_norm": 7.576828589729805,
"learning_rate": 9.930480951501745e-06,
"loss": 2.339722156524658,
"step": 916
},
{
"epoch": 0.4444983034415899,
"grad_norm": 8.584362333002524,
"learning_rate": 9.930011535516049e-06,
"loss": 2.8290393352508545,
"step": 917
},
{
"epoch": 0.4449830344158992,
"grad_norm": 10.96659730164184,
"learning_rate": 9.929540551200971e-06,
"loss": 2.7112555503845215,
"step": 918
},
{
"epoch": 0.44546776539020844,
"grad_norm": 13.144431064885582,
"learning_rate": 9.929067998706339e-06,
"loss": 2.938088893890381,
"step": 919
},
{
"epoch": 0.44595249636451767,
"grad_norm": 13.66300589411794,
"learning_rate": 9.92859387818248e-06,
"loss": 2.9114770889282227,
"step": 920
},
{
"epoch": 0.44643722733882696,
"grad_norm": 11.390586591153085,
"learning_rate": 9.928118189780224e-06,
"loss": 2.823763847351074,
"step": 921
},
{
"epoch": 0.4469219583131362,
"grad_norm": 10.46226331639242,
"learning_rate": 9.927640933650893e-06,
"loss": 2.4871768951416016,
"step": 922
},
{
"epoch": 0.4474066892874455,
"grad_norm": 24.31896839985638,
"learning_rate": 9.927162109946313e-06,
"loss": 2.830982208251953,
"step": 923
},
{
"epoch": 0.4478914202617547,
"grad_norm": 11.39974011576639,
"learning_rate": 9.926681718818807e-06,
"loss": 2.2062058448791504,
"step": 924
},
{
"epoch": 0.448376151236064,
"grad_norm": 9.6484987600931,
"learning_rate": 9.926199760421196e-06,
"loss": 3.0457334518432617,
"step": 925
},
{
"epoch": 0.44886088221037324,
"grad_norm": 11.052631203848724,
"learning_rate": 9.9257162349068e-06,
"loss": 2.7804908752441406,
"step": 926
},
{
"epoch": 0.4493456131846825,
"grad_norm": 9.684288346117523,
"learning_rate": 9.925231142429436e-06,
"loss": 2.773710250854492,
"step": 927
},
{
"epoch": 0.44983034415899176,
"grad_norm": 10.658528896051246,
"learning_rate": 9.924744483143423e-06,
"loss": 3.056731700897217,
"step": 928
},
{
"epoch": 0.450315075133301,
"grad_norm": 11.134933547097036,
"learning_rate": 9.924256257203574e-06,
"loss": 3.0162692070007324,
"step": 929
},
{
"epoch": 0.4507998061076103,
"grad_norm": 7.500696564514522,
"learning_rate": 9.923766464765208e-06,
"loss": 3.0310330390930176,
"step": 930
},
{
"epoch": 0.4512845370819195,
"grad_norm": 11.959350240119146,
"learning_rate": 9.923275105984134e-06,
"loss": 2.794121265411377,
"step": 931
},
{
"epoch": 0.4517692680562288,
"grad_norm": 5.443837604672989,
"learning_rate": 9.92278218101666e-06,
"loss": 2.8181772232055664,
"step": 932
},
{
"epoch": 0.45225399903053803,
"grad_norm": 10.07951549831749,
"learning_rate": 9.9222876900196e-06,
"loss": 3.2829995155334473,
"step": 933
},
{
"epoch": 0.4527387300048473,
"grad_norm": 7.848567018467971,
"learning_rate": 9.921791633150254e-06,
"loss": 3.003978967666626,
"step": 934
},
{
"epoch": 0.45322346097915656,
"grad_norm": 8.394764975971865,
"learning_rate": 9.921294010566434e-06,
"loss": 2.9382338523864746,
"step": 935
},
{
"epoch": 0.45370819195346584,
"grad_norm": 20.690642205080895,
"learning_rate": 9.92079482242644e-06,
"loss": 3.2244584560394287,
"step": 936
},
{
"epoch": 0.4541929229277751,
"grad_norm": 16.044161323665215,
"learning_rate": 9.920294068889074e-06,
"loss": 2.5953123569488525,
"step": 937
},
{
"epoch": 0.45467765390208437,
"grad_norm": 17.660747020399516,
"learning_rate": 9.919791750113631e-06,
"loss": 2.834041118621826,
"step": 938
},
{
"epoch": 0.4551623848763936,
"grad_norm": 12.527511769942492,
"learning_rate": 9.919287866259915e-06,
"loss": 2.592001438140869,
"step": 939
},
{
"epoch": 0.45564711585070283,
"grad_norm": 15.233185861949215,
"learning_rate": 9.918782417488216e-06,
"loss": 3.287425994873047,
"step": 940
},
{
"epoch": 0.4561318468250121,
"grad_norm": 11.322562986739413,
"learning_rate": 9.91827540395933e-06,
"loss": 2.9243264198303223,
"step": 941
},
{
"epoch": 0.45661657779932135,
"grad_norm": 21.537626276527277,
"learning_rate": 9.917766825834542e-06,
"loss": 2.9267029762268066,
"step": 942
},
{
"epoch": 0.45710130877363064,
"grad_norm": 18.545516050813315,
"learning_rate": 9.917256683275646e-06,
"loss": 2.9791932106018066,
"step": 943
},
{
"epoch": 0.4575860397479399,
"grad_norm": 14.008721305364782,
"learning_rate": 9.916744976444926e-06,
"loss": 2.5083844661712646,
"step": 944
},
{
"epoch": 0.45807077072224917,
"grad_norm": 7.624380368435279,
"learning_rate": 9.916231705505166e-06,
"loss": 3.0512709617614746,
"step": 945
},
{
"epoch": 0.4585555016965584,
"grad_norm": 9.652933551750905,
"learning_rate": 9.915716870619645e-06,
"loss": 3.2629611492156982,
"step": 946
},
{
"epoch": 0.4590402326708677,
"grad_norm": 7.518845175456966,
"learning_rate": 9.915200471952145e-06,
"loss": 2.7326979637145996,
"step": 947
},
{
"epoch": 0.4595249636451769,
"grad_norm": 9.414674666186208,
"learning_rate": 9.91468250966694e-06,
"loss": 2.3150877952575684,
"step": 948
},
{
"epoch": 0.4600096946194862,
"grad_norm": 11.236978264285623,
"learning_rate": 9.914162983928803e-06,
"loss": 3.681236982345581,
"step": 949
},
{
"epoch": 0.46049442559379544,
"grad_norm": 10.238095810982212,
"learning_rate": 9.913641894903006e-06,
"loss": 3.108487606048584,
"step": 950
},
{
"epoch": 0.4609791565681047,
"grad_norm": 14.266103838975736,
"learning_rate": 9.913119242755316e-06,
"loss": 3.052557945251465,
"step": 951
},
{
"epoch": 0.46146388754241396,
"grad_norm": 10.311690568670075,
"learning_rate": 9.912595027652e-06,
"loss": 2.790510654449463,
"step": 952
},
{
"epoch": 0.4619486185167232,
"grad_norm": 11.211191447457818,
"learning_rate": 9.912069249759823e-06,
"loss": 3.255596160888672,
"step": 953
},
{
"epoch": 0.4624333494910325,
"grad_norm": 15.040620052938221,
"learning_rate": 9.91154190924604e-06,
"loss": 2.9534125328063965,
"step": 954
},
{
"epoch": 0.4629180804653417,
"grad_norm": 8.297956651091807,
"learning_rate": 9.91101300627841e-06,
"loss": 2.6271116733551025,
"step": 955
},
{
"epoch": 0.463402811439651,
"grad_norm": 9.080726639903766,
"learning_rate": 9.910482541025185e-06,
"loss": 2.5242326259613037,
"step": 956
},
{
"epoch": 0.46388754241396024,
"grad_norm": 9.16916640006152,
"learning_rate": 9.909950513655121e-06,
"loss": 3.1885504722595215,
"step": 957
},
{
"epoch": 0.46437227338826953,
"grad_norm": 8.679324176413495,
"learning_rate": 9.909416924337463e-06,
"loss": 2.681900978088379,
"step": 958
},
{
"epoch": 0.46485700436257876,
"grad_norm": 8.816943229216422,
"learning_rate": 9.908881773241953e-06,
"loss": 2.3982796669006348,
"step": 959
},
{
"epoch": 0.46534173533688805,
"grad_norm": 17.530476918731313,
"learning_rate": 9.908345060538838e-06,
"loss": 2.763704776763916,
"step": 960
},
{
"epoch": 0.4658264663111973,
"grad_norm": 37.38378136878293,
"learning_rate": 9.907806786398851e-06,
"loss": 2.78493070602417,
"step": 961
},
{
"epoch": 0.4663111972855065,
"grad_norm": 15.2152387398859,
"learning_rate": 9.90726695099323e-06,
"loss": 3.923656463623047,
"step": 962
},
{
"epoch": 0.4667959282598158,
"grad_norm": 11.149910560463574,
"learning_rate": 9.90672555449371e-06,
"loss": 2.8372859954833984,
"step": 963
},
{
"epoch": 0.46728065923412504,
"grad_norm": 10.668038445236169,
"learning_rate": 9.906182597072512e-06,
"loss": 2.606412410736084,
"step": 964
},
{
"epoch": 0.46776539020843433,
"grad_norm": 13.603517127651445,
"learning_rate": 9.905638078902367e-06,
"loss": 2.9094948768615723,
"step": 965
},
{
"epoch": 0.46825012118274356,
"grad_norm": 21.356130653208854,
"learning_rate": 9.905092000156494e-06,
"loss": 3.588754177093506,
"step": 966
},
{
"epoch": 0.46873485215705285,
"grad_norm": 22.693336302965943,
"learning_rate": 9.904544361008612e-06,
"loss": 3.7027783393859863,
"step": 967
},
{
"epoch": 0.4692195831313621,
"grad_norm": 9.103644762930019,
"learning_rate": 9.903995161632933e-06,
"loss": 1.8259680271148682,
"step": 968
},
{
"epoch": 0.46970431410567137,
"grad_norm": 11.696254632708442,
"learning_rate": 9.90344440220417e-06,
"loss": 3.2656121253967285,
"step": 969
},
{
"epoch": 0.4701890450799806,
"grad_norm": 7.503177527557716,
"learning_rate": 9.90289208289753e-06,
"loss": 3.021833896636963,
"step": 970
},
{
"epoch": 0.4706737760542899,
"grad_norm": 7.5067830592101785,
"learning_rate": 9.902338203888715e-06,
"loss": 2.6221776008605957,
"step": 971
},
{
"epoch": 0.4711585070285991,
"grad_norm": 11.238484557279268,
"learning_rate": 9.901782765353926e-06,
"loss": 2.572232484817505,
"step": 972
},
{
"epoch": 0.47164323800290836,
"grad_norm": 14.119866188820131,
"learning_rate": 9.901225767469856e-06,
"loss": 2.486013174057007,
"step": 973
},
{
"epoch": 0.47212796897721765,
"grad_norm": 13.452385551760747,
"learning_rate": 9.900667210413697e-06,
"loss": 3.125227928161621,
"step": 974
},
{
"epoch": 0.4726126999515269,
"grad_norm": 17.079990003845168,
"learning_rate": 9.900107094363139e-06,
"loss": 2.6686253547668457,
"step": 975
},
{
"epoch": 0.47309743092583617,
"grad_norm": 8.544635604043844,
"learning_rate": 9.89954541949636e-06,
"loss": 2.406129837036133,
"step": 976
},
{
"epoch": 0.4735821619001454,
"grad_norm": 10.005804804340999,
"learning_rate": 9.898982185992047e-06,
"loss": 2.661750555038452,
"step": 977
},
{
"epoch": 0.4740668928744547,
"grad_norm": 10.429977834967914,
"learning_rate": 9.89841739402937e-06,
"loss": 2.547941207885742,
"step": 978
},
{
"epoch": 0.4745516238487639,
"grad_norm": 7.686569713549642,
"learning_rate": 9.897851043788e-06,
"loss": 2.8515067100524902,
"step": 979
},
{
"epoch": 0.4750363548230732,
"grad_norm": 9.472041344854155,
"learning_rate": 9.897283135448106e-06,
"loss": 2.792850971221924,
"step": 980
},
{
"epoch": 0.47552108579738245,
"grad_norm": 15.90986801169682,
"learning_rate": 9.896713669190347e-06,
"loss": 2.626140832901001,
"step": 981
},
{
"epoch": 0.47600581677169174,
"grad_norm": 8.667698674604013,
"learning_rate": 9.896142645195885e-06,
"loss": 2.4676156044006348,
"step": 982
},
{
"epoch": 0.47649054774600097,
"grad_norm": 12.710129550439117,
"learning_rate": 9.895570063646371e-06,
"loss": 3.0994203090667725,
"step": 983
},
{
"epoch": 0.4769752787203102,
"grad_norm": 11.221732019457338,
"learning_rate": 9.894995924723953e-06,
"loss": 2.5294628143310547,
"step": 984
},
{
"epoch": 0.4774600096946195,
"grad_norm": 8.345429952018417,
"learning_rate": 9.89442022861128e-06,
"loss": 2.6745970249176025,
"step": 985
},
{
"epoch": 0.4779447406689287,
"grad_norm": 17.96814106781326,
"learning_rate": 9.893842975491486e-06,
"loss": 3.412853240966797,
"step": 986
},
{
"epoch": 0.478429471643238,
"grad_norm": 10.1329075832442,
"learning_rate": 9.89326416554821e-06,
"loss": 2.8422627449035645,
"step": 987
},
{
"epoch": 0.47891420261754725,
"grad_norm": 11.24636021715359,
"learning_rate": 9.89268379896558e-06,
"loss": 2.85766863822937,
"step": 988
},
{
"epoch": 0.47939893359185654,
"grad_norm": 14.403332638586752,
"learning_rate": 9.892101875928223e-06,
"loss": 2.9919815063476562,
"step": 989
},
{
"epoch": 0.47988366456616577,
"grad_norm": 9.140257707827443,
"learning_rate": 9.891518396621257e-06,
"loss": 2.3975579738616943,
"step": 990
},
{
"epoch": 0.48036839554047506,
"grad_norm": 11.882341430772827,
"learning_rate": 9.890933361230303e-06,
"loss": 2.553560972213745,
"step": 991
},
{
"epoch": 0.4808531265147843,
"grad_norm": 7.972086510444715,
"learning_rate": 9.890346769941468e-06,
"loss": 2.589210271835327,
"step": 992
},
{
"epoch": 0.4813378574890936,
"grad_norm": 14.643860448915103,
"learning_rate": 9.889758622941358e-06,
"loss": 2.60798978805542,
"step": 993
},
{
"epoch": 0.4818225884634028,
"grad_norm": 10.840755505059208,
"learning_rate": 9.889168920417074e-06,
"loss": 2.6480767726898193,
"step": 994
},
{
"epoch": 0.48230731943771205,
"grad_norm": 16.911804369932952,
"learning_rate": 9.888577662556211e-06,
"loss": 3.012880802154541,
"step": 995
},
{
"epoch": 0.48279205041202133,
"grad_norm": 11.834651751532522,
"learning_rate": 9.887984849546862e-06,
"loss": 2.5578513145446777,
"step": 996
},
{
"epoch": 0.48327678138633057,
"grad_norm": 19.670323037499728,
"learning_rate": 9.887390481577606e-06,
"loss": 3.3951401710510254,
"step": 997
},
{
"epoch": 0.48376151236063986,
"grad_norm": 7.924053484854834,
"learning_rate": 9.886794558837527e-06,
"loss": 2.7687392234802246,
"step": 998
},
{
"epoch": 0.4842462433349491,
"grad_norm": 9.729415367211805,
"learning_rate": 9.8861970815162e-06,
"loss": 2.750181198120117,
"step": 999
},
{
"epoch": 0.4847309743092584,
"grad_norm": 6.72328026777666,
"learning_rate": 9.885598049803693e-06,
"loss": 1.4694268703460693,
"step": 1000
},
{
"epoch": 0.4852157052835676,
"grad_norm": 10.250137848213972,
"learning_rate": 9.884997463890566e-06,
"loss": 2.52376127243042,
"step": 1001
},
{
"epoch": 0.4857004362578769,
"grad_norm": 8.554041206913217,
"learning_rate": 9.88439532396788e-06,
"loss": 2.5747761726379395,
"step": 1002
},
{
"epoch": 0.48618516723218613,
"grad_norm": 14.117365065853617,
"learning_rate": 9.883791630227189e-06,
"loss": 2.7597575187683105,
"step": 1003
},
{
"epoch": 0.4866698982064954,
"grad_norm": 6.794591369336236,
"learning_rate": 9.883186382860534e-06,
"loss": 2.7373569011688232,
"step": 1004
},
{
"epoch": 0.48715462918080465,
"grad_norm": 10.657688133571682,
"learning_rate": 9.882579582060459e-06,
"loss": 2.6971001625061035,
"step": 1005
},
{
"epoch": 0.4876393601551139,
"grad_norm": 9.824995588203532,
"learning_rate": 9.881971228019997e-06,
"loss": 2.847486972808838,
"step": 1006
},
{
"epoch": 0.4881240911294232,
"grad_norm": 12.133649018959163,
"learning_rate": 9.88136132093268e-06,
"loss": 3.24324631690979,
"step": 1007
},
{
"epoch": 0.4886088221037324,
"grad_norm": 10.183134219323474,
"learning_rate": 9.880749860992527e-06,
"loss": 2.841967821121216,
"step": 1008
},
{
"epoch": 0.4890935530780417,
"grad_norm": 6.660784537261562,
"learning_rate": 9.880136848394057e-06,
"loss": 2.6099486351013184,
"step": 1009
},
{
"epoch": 0.48957828405235093,
"grad_norm": 17.359084645247236,
"learning_rate": 9.87952228333228e-06,
"loss": 2.6345252990722656,
"step": 1010
},
{
"epoch": 0.4900630150266602,
"grad_norm": 23.56808642299523,
"learning_rate": 9.8789061660027e-06,
"loss": 2.3069255352020264,
"step": 1011
},
{
"epoch": 0.49054774600096945,
"grad_norm": 23.700810407644163,
"learning_rate": 9.878288496601318e-06,
"loss": 2.9906699657440186,
"step": 1012
},
{
"epoch": 0.49103247697527874,
"grad_norm": 29.264096269641293,
"learning_rate": 9.877669275324626e-06,
"loss": 2.449378490447998,
"step": 1013
},
{
"epoch": 0.491517207949588,
"grad_norm": 12.04168067224837,
"learning_rate": 9.877048502369607e-06,
"loss": 2.863020896911621,
"step": 1014
},
{
"epoch": 0.49200193892389726,
"grad_norm": 8.91271609399822,
"learning_rate": 9.876426177933743e-06,
"loss": 2.7834742069244385,
"step": 1015
},
{
"epoch": 0.4924866698982065,
"grad_norm": 9.924382482306475,
"learning_rate": 9.875802302215008e-06,
"loss": 2.7994070053100586,
"step": 1016
},
{
"epoch": 0.49297140087251573,
"grad_norm": 12.785061592214115,
"learning_rate": 9.875176875411866e-06,
"loss": 2.829901933670044,
"step": 1017
},
{
"epoch": 0.493456131846825,
"grad_norm": 9.743704729629021,
"learning_rate": 9.874549897723276e-06,
"loss": 2.182157516479492,
"step": 1018
},
{
"epoch": 0.49394086282113425,
"grad_norm": 9.290126432714535,
"learning_rate": 9.873921369348696e-06,
"loss": 2.565211296081543,
"step": 1019
},
{
"epoch": 0.49442559379544354,
"grad_norm": 16.815834315732292,
"learning_rate": 9.873291290488068e-06,
"loss": 2.2940378189086914,
"step": 1020
},
{
"epoch": 0.4949103247697528,
"grad_norm": 13.38652698261677,
"learning_rate": 9.872659661341837e-06,
"loss": 2.529844284057617,
"step": 1021
},
{
"epoch": 0.49539505574406206,
"grad_norm": 25.477358830308685,
"learning_rate": 9.87202648211093e-06,
"loss": 2.2028541564941406,
"step": 1022
},
{
"epoch": 0.4958797867183713,
"grad_norm": 10.581430488610854,
"learning_rate": 9.87139175299678e-06,
"loss": 2.720010280609131,
"step": 1023
},
{
"epoch": 0.4963645176926806,
"grad_norm": 8.96613881058317,
"learning_rate": 9.8707554742013e-06,
"loss": 2.8723998069763184,
"step": 1024
},
{
"epoch": 0.4968492486669898,
"grad_norm": 8.05711946306889,
"learning_rate": 9.870117645926907e-06,
"loss": 2.8642780780792236,
"step": 1025
},
{
"epoch": 0.4973339796412991,
"grad_norm": 9.996227228911794,
"learning_rate": 9.869478268376502e-06,
"loss": 2.6866164207458496,
"step": 1026
},
{
"epoch": 0.49781871061560834,
"grad_norm": 23.882491562076886,
"learning_rate": 9.868837341753487e-06,
"loss": 2.763071060180664,
"step": 1027
},
{
"epoch": 0.4983034415899176,
"grad_norm": 16.058270544954333,
"learning_rate": 9.86819486626175e-06,
"loss": 2.6563053131103516,
"step": 1028
},
{
"epoch": 0.49878817256422686,
"grad_norm": 7.768117235026698,
"learning_rate": 9.867550842105675e-06,
"loss": 2.8727705478668213,
"step": 1029
},
{
"epoch": 0.4992729035385361,
"grad_norm": 11.572281358875612,
"learning_rate": 9.866905269490141e-06,
"loss": 2.6243324279785156,
"step": 1030
},
{
"epoch": 0.4997576345128454,
"grad_norm": 15.894371837681174,
"learning_rate": 9.866258148620514e-06,
"loss": 2.7805705070495605,
"step": 1031
},
{
"epoch": 0.5002423654871546,
"grad_norm": 12.31523824404948,
"learning_rate": 9.865609479702653e-06,
"loss": 2.4917640686035156,
"step": 1032
},
{
"epoch": 0.5007270964614638,
"grad_norm": 10.53833723419931,
"learning_rate": 9.864959262942917e-06,
"loss": 2.644118309020996,
"step": 1033
},
{
"epoch": 0.5012118274357732,
"grad_norm": 21.765049165384248,
"learning_rate": 9.864307498548149e-06,
"loss": 3.062168598175049,
"step": 1034
},
{
"epoch": 0.5016965584100824,
"grad_norm": 14.37486118113577,
"learning_rate": 9.863654186725688e-06,
"loss": 2.637399196624756,
"step": 1035
},
{
"epoch": 0.5021812893843917,
"grad_norm": 14.768472247164215,
"learning_rate": 9.862999327683367e-06,
"loss": 2.9871702194213867,
"step": 1036
},
{
"epoch": 0.5026660203587009,
"grad_norm": 14.058882109295748,
"learning_rate": 9.862342921629504e-06,
"loss": 3.024217367172241,
"step": 1037
},
{
"epoch": 0.5031507513330101,
"grad_norm": 22.891255893176663,
"learning_rate": 9.86168496877292e-06,
"loss": 2.7615714073181152,
"step": 1038
},
{
"epoch": 0.5036354823073195,
"grad_norm": 14.088536362616379,
"learning_rate": 9.861025469322919e-06,
"loss": 2.845489501953125,
"step": 1039
},
{
"epoch": 0.5041202132816287,
"grad_norm": 20.589018078549806,
"learning_rate": 9.860364423489299e-06,
"loss": 2.976428508758545,
"step": 1040
},
{
"epoch": 0.5046049442559379,
"grad_norm": 7.762325068805252,
"learning_rate": 9.859701831482353e-06,
"loss": 2.5466015338897705,
"step": 1041
},
{
"epoch": 0.5050896752302472,
"grad_norm": 11.918893938151726,
"learning_rate": 9.859037693512865e-06,
"loss": 2.320150375366211,
"step": 1042
},
{
"epoch": 0.5055744062045565,
"grad_norm": 22.02832388222752,
"learning_rate": 9.858372009792108e-06,
"loss": 2.6106042861938477,
"step": 1043
},
{
"epoch": 0.5060591371788657,
"grad_norm": 9.424232312008622,
"learning_rate": 9.857704780531847e-06,
"loss": 2.603177070617676,
"step": 1044
},
{
"epoch": 0.506543868153175,
"grad_norm": 11.38709796810897,
"learning_rate": 9.857036005944344e-06,
"loss": 2.959136486053467,
"step": 1045
},
{
"epoch": 0.5070285991274842,
"grad_norm": 11.686543978320243,
"learning_rate": 9.856365686242347e-06,
"loss": 2.9856157302856445,
"step": 1046
},
{
"epoch": 0.5075133301017936,
"grad_norm": 6.992860914650961,
"learning_rate": 9.855693821639095e-06,
"loss": 2.4904956817626953,
"step": 1047
},
{
"epoch": 0.5079980610761028,
"grad_norm": 21.205637341516706,
"learning_rate": 9.855020412348323e-06,
"loss": 3.2075436115264893,
"step": 1048
},
{
"epoch": 0.508482792050412,
"grad_norm": 15.07126551664431,
"learning_rate": 9.854345458584257e-06,
"loss": 3.140666961669922,
"step": 1049
},
{
"epoch": 0.5089675230247213,
"grad_norm": 10.74423687384662,
"learning_rate": 9.853668960561611e-06,
"loss": 2.4158449172973633,
"step": 1050
},
{
"epoch": 0.5094522539990305,
"grad_norm": 31.003860438915066,
"learning_rate": 9.85299091849559e-06,
"loss": 2.130145311355591,
"step": 1051
},
{
"epoch": 0.5099369849733398,
"grad_norm": 11.218379521307638,
"learning_rate": 9.852311332601894e-06,
"loss": 3.152785539627075,
"step": 1052
},
{
"epoch": 0.5104217159476491,
"grad_norm": 33.64833351793415,
"learning_rate": 9.851630203096711e-06,
"loss": 3.0474212169647217,
"step": 1053
},
{
"epoch": 0.5109064469219583,
"grad_norm": 9.924383121116444,
"learning_rate": 9.85094753019672e-06,
"loss": 3.122361183166504,
"step": 1054
},
{
"epoch": 0.5113911778962675,
"grad_norm": 39.7379969720227,
"learning_rate": 9.850263314119095e-06,
"loss": 2.8021435737609863,
"step": 1055
},
{
"epoch": 0.5118759088705769,
"grad_norm": 10.180240962026364,
"learning_rate": 9.849577555081495e-06,
"loss": 2.6521310806274414,
"step": 1056
},
{
"epoch": 0.5123606398448861,
"grad_norm": 9.708735172097468,
"learning_rate": 9.848890253302076e-06,
"loss": 2.8268299102783203,
"step": 1057
},
{
"epoch": 0.5128453708191953,
"grad_norm": 11.382947734076861,
"learning_rate": 9.84820140899948e-06,
"loss": 3.3115015029907227,
"step": 1058
},
{
"epoch": 0.5133301017935046,
"grad_norm": 10.693236848270995,
"learning_rate": 9.847511022392841e-06,
"loss": 3.047999382019043,
"step": 1059
},
{
"epoch": 0.5138148327678138,
"grad_norm": 10.225647843361847,
"learning_rate": 9.846819093701782e-06,
"loss": 3.194293975830078,
"step": 1060
},
{
"epoch": 0.5142995637421232,
"grad_norm": 14.432604726710753,
"learning_rate": 9.846125623146422e-06,
"loss": 2.8317575454711914,
"step": 1061
},
{
"epoch": 0.5147842947164324,
"grad_norm": 8.437539991098111,
"learning_rate": 9.845430610947368e-06,
"loss": 2.9917330741882324,
"step": 1062
},
{
"epoch": 0.5152690256907416,
"grad_norm": 11.480633002469155,
"learning_rate": 9.844734057325713e-06,
"loss": 2.019951581954956,
"step": 1063
},
{
"epoch": 0.5157537566650509,
"grad_norm": 7.268449528061382,
"learning_rate": 9.844035962503045e-06,
"loss": 2.350142240524292,
"step": 1064
},
{
"epoch": 0.5162384876393602,
"grad_norm": 13.798974294581859,
"learning_rate": 9.84333632670144e-06,
"loss": 2.599806308746338,
"step": 1065
},
{
"epoch": 0.5167232186136694,
"grad_norm": 8.490920353109352,
"learning_rate": 9.842635150143466e-06,
"loss": 2.5097579956054688,
"step": 1066
},
{
"epoch": 0.5172079495879787,
"grad_norm": 11.012928722274047,
"learning_rate": 9.841932433052183e-06,
"loss": 2.9277567863464355,
"step": 1067
},
{
"epoch": 0.5176926805622879,
"grad_norm": 13.246180162771905,
"learning_rate": 9.841228175651136e-06,
"loss": 2.070112705230713,
"step": 1068
},
{
"epoch": 0.5181774115365972,
"grad_norm": 16.139123651363104,
"learning_rate": 9.840522378164363e-06,
"loss": 2.703327178955078,
"step": 1069
},
{
"epoch": 0.5186621425109065,
"grad_norm": 9.652107513489996,
"learning_rate": 9.839815040816391e-06,
"loss": 2.830338716506958,
"step": 1070
},
{
"epoch": 0.5191468734852157,
"grad_norm": 10.664035041939508,
"learning_rate": 9.839106163832237e-06,
"loss": 2.6244304180145264,
"step": 1071
},
{
"epoch": 0.5196316044595249,
"grad_norm": 7.0083689634795014,
"learning_rate": 9.83839574743741e-06,
"loss": 3.1320714950561523,
"step": 1072
},
{
"epoch": 0.5201163354338342,
"grad_norm": 6.241092757850588,
"learning_rate": 9.837683791857906e-06,
"loss": 2.9877982139587402,
"step": 1073
},
{
"epoch": 0.5206010664081435,
"grad_norm": 14.015105514062915,
"learning_rate": 9.83697029732021e-06,
"loss": 2.7237892150878906,
"step": 1074
},
{
"epoch": 0.5210857973824528,
"grad_norm": 6.8730584683802025,
"learning_rate": 9.8362552640513e-06,
"loss": 2.176792621612549,
"step": 1075
},
{
"epoch": 0.521570528356762,
"grad_norm": 16.40882740967128,
"learning_rate": 9.835538692278639e-06,
"loss": 2.812861442565918,
"step": 1076
},
{
"epoch": 0.5220552593310712,
"grad_norm": 11.07246800655944,
"learning_rate": 9.834820582230184e-06,
"loss": 2.96339750289917,
"step": 1077
},
{
"epoch": 0.5225399903053806,
"grad_norm": 8.978197579841742,
"learning_rate": 9.83410093413438e-06,
"loss": 2.6822853088378906,
"step": 1078
},
{
"epoch": 0.5230247212796898,
"grad_norm": 9.859955417048361,
"learning_rate": 9.83337974822016e-06,
"loss": 2.996520757675171,
"step": 1079
},
{
"epoch": 0.523509452253999,
"grad_norm": 24.04288481993264,
"learning_rate": 9.832657024716944e-06,
"loss": 2.847539186477661,
"step": 1080
},
{
"epoch": 0.5239941832283083,
"grad_norm": 174.09286829679996,
"learning_rate": 9.831932763854648e-06,
"loss": 2.324014186859131,
"step": 1081
},
{
"epoch": 0.5244789142026175,
"grad_norm": 10.681740639586067,
"learning_rate": 9.83120696586367e-06,
"loss": 2.458848476409912,
"step": 1082
},
{
"epoch": 0.5249636451769268,
"grad_norm": 13.379906803558322,
"learning_rate": 9.830479630974901e-06,
"loss": 2.5775575637817383,
"step": 1083
},
{
"epoch": 0.5254483761512361,
"grad_norm": 18.463123435886917,
"learning_rate": 9.829750759419722e-06,
"loss": 2.452864646911621,
"step": 1084
},
{
"epoch": 0.5259331071255453,
"grad_norm": 9.146461367304674,
"learning_rate": 9.829020351429999e-06,
"loss": 2.8031649589538574,
"step": 1085
},
{
"epoch": 0.5264178380998545,
"grad_norm": 13.60119543852757,
"learning_rate": 9.828288407238087e-06,
"loss": 2.31554913520813,
"step": 1086
},
{
"epoch": 0.5269025690741639,
"grad_norm": 8.075590427267556,
"learning_rate": 9.827554927076832e-06,
"loss": 3.0478415489196777,
"step": 1087
},
{
"epoch": 0.5273873000484731,
"grad_norm": 10.70330350556762,
"learning_rate": 9.82681991117957e-06,
"loss": 2.5834081172943115,
"step": 1088
},
{
"epoch": 0.5278720310227824,
"grad_norm": 10.206428024489503,
"learning_rate": 9.82608335978012e-06,
"loss": 2.471259593963623,
"step": 1089
},
{
"epoch": 0.5283567619970916,
"grad_norm": 15.399322933512009,
"learning_rate": 9.825345273112796e-06,
"loss": 2.774324417114258,
"step": 1090
},
{
"epoch": 0.5288414929714008,
"grad_norm": 9.02735165558292,
"learning_rate": 9.824605651412397e-06,
"loss": 2.9828133583068848,
"step": 1091
},
{
"epoch": 0.5293262239457102,
"grad_norm": 9.486973481486501,
"learning_rate": 9.823864494914208e-06,
"loss": 2.936738967895508,
"step": 1092
},
{
"epoch": 0.5298109549200194,
"grad_norm": 9.45440374362207,
"learning_rate": 9.823121803854006e-06,
"loss": 2.7692136764526367,
"step": 1093
},
{
"epoch": 0.5302956858943286,
"grad_norm": 15.450581781470884,
"learning_rate": 9.822377578468056e-06,
"loss": 3.328883409500122,
"step": 1094
},
{
"epoch": 0.5307804168686379,
"grad_norm": 8.945749661743843,
"learning_rate": 9.82163181899311e-06,
"loss": 2.8084590435028076,
"step": 1095
},
{
"epoch": 0.5312651478429472,
"grad_norm": 8.311902436536577,
"learning_rate": 9.820884525666408e-06,
"loss": 2.7339155673980713,
"step": 1096
},
{
"epoch": 0.5317498788172564,
"grad_norm": 18.47645538970739,
"learning_rate": 9.820135698725676e-06,
"loss": 2.6100807189941406,
"step": 1097
},
{
"epoch": 0.5322346097915657,
"grad_norm": 12.874638391057363,
"learning_rate": 9.81938533840913e-06,
"loss": 2.6991491317749023,
"step": 1098
},
{
"epoch": 0.5327193407658749,
"grad_norm": 9.679419052430582,
"learning_rate": 9.818633444955479e-06,
"loss": 2.462651014328003,
"step": 1099
},
{
"epoch": 0.5332040717401842,
"grad_norm": 19.35633059930277,
"learning_rate": 9.81788001860391e-06,
"loss": 3.0926308631896973,
"step": 1100
},
{
"epoch": 0.5336888027144935,
"grad_norm": 11.319587678021136,
"learning_rate": 9.817125059594102e-06,
"loss": 2.705207347869873,
"step": 1101
},
{
"epoch": 0.5341735336888027,
"grad_norm": 9.676166978401893,
"learning_rate": 9.816368568166222e-06,
"loss": 2.6645994186401367,
"step": 1102
},
{
"epoch": 0.534658264663112,
"grad_norm": 11.322108413480759,
"learning_rate": 9.815610544560923e-06,
"loss": 2.8481390476226807,
"step": 1103
},
{
"epoch": 0.5351429956374212,
"grad_norm": 7.901776709597571,
"learning_rate": 9.81485098901935e-06,
"loss": 2.8257975578308105,
"step": 1104
},
{
"epoch": 0.5356277266117305,
"grad_norm": 15.902677508366905,
"learning_rate": 9.81408990178313e-06,
"loss": 2.5208539962768555,
"step": 1105
},
{
"epoch": 0.5361124575860398,
"grad_norm": 8.176802731291636,
"learning_rate": 9.813327283094381e-06,
"loss": 2.6135478019714355,
"step": 1106
},
{
"epoch": 0.536597188560349,
"grad_norm": 18.028102624491726,
"learning_rate": 9.812563133195701e-06,
"loss": 3.473402976989746,
"step": 1107
},
{
"epoch": 0.5370819195346582,
"grad_norm": 13.579156194653281,
"learning_rate": 9.811797452330186e-06,
"loss": 2.929812431335449,
"step": 1108
},
{
"epoch": 0.5375666505089676,
"grad_norm": 9.505260327644459,
"learning_rate": 9.811030240741412e-06,
"loss": 2.264110565185547,
"step": 1109
},
{
"epoch": 0.5380513814832768,
"grad_norm": 15.241529671943582,
"learning_rate": 9.810261498673441e-06,
"loss": 2.5124588012695312,
"step": 1110
},
{
"epoch": 0.538536112457586,
"grad_norm": 25.80183222101284,
"learning_rate": 9.80949122637083e-06,
"loss": 4.53973388671875,
"step": 1111
},
{
"epoch": 0.5390208434318953,
"grad_norm": 7.827517174278282,
"learning_rate": 9.808719424078609e-06,
"loss": 3.0261032581329346,
"step": 1112
},
{
"epoch": 0.5395055744062045,
"grad_norm": 9.384615690193357,
"learning_rate": 9.807946092042309e-06,
"loss": 3.0889899730682373,
"step": 1113
},
{
"epoch": 0.5399903053805138,
"grad_norm": 18.624945820545452,
"learning_rate": 9.807171230507938e-06,
"loss": 2.764979362487793,
"step": 1114
},
{
"epoch": 0.5404750363548231,
"grad_norm": 9.659240297305994,
"learning_rate": 9.806394839722e-06,
"loss": 2.4855992794036865,
"step": 1115
},
{
"epoch": 0.5409597673291323,
"grad_norm": 11.112605798062342,
"learning_rate": 9.805616919931471e-06,
"loss": 2.760863780975342,
"step": 1116
},
{
"epoch": 0.5414444983034415,
"grad_norm": 10.829186072695078,
"learning_rate": 9.804837471383828e-06,
"loss": 2.7857561111450195,
"step": 1117
},
{
"epoch": 0.5419292292777509,
"grad_norm": 14.510335581975077,
"learning_rate": 9.804056494327025e-06,
"loss": 2.8935790061950684,
"step": 1118
},
{
"epoch": 0.5424139602520601,
"grad_norm": 16.59267846178188,
"learning_rate": 9.803273989009507e-06,
"loss": 3.2165207862854004,
"step": 1119
},
{
"epoch": 0.5428986912263694,
"grad_norm": 13.238668749451605,
"learning_rate": 9.802489955680206e-06,
"loss": 2.935166597366333,
"step": 1120
},
{
"epoch": 0.5433834222006786,
"grad_norm": 9.226897758723137,
"learning_rate": 9.801704394588532e-06,
"loss": 2.7070274353027344,
"step": 1121
},
{
"epoch": 0.5438681531749879,
"grad_norm": 6.312456288794379,
"learning_rate": 9.800917305984392e-06,
"loss": 2.882218360900879,
"step": 1122
},
{
"epoch": 0.5443528841492972,
"grad_norm": 14.268569772148236,
"learning_rate": 9.800128690118172e-06,
"loss": 2.382678985595703,
"step": 1123
},
{
"epoch": 0.5448376151236064,
"grad_norm": 10.412450221643345,
"learning_rate": 9.799338547240743e-06,
"loss": 2.5499935150146484,
"step": 1124
},
{
"epoch": 0.5453223460979156,
"grad_norm": 15.00578970147371,
"learning_rate": 9.798546877603468e-06,
"loss": 3.384899616241455,
"step": 1125
},
{
"epoch": 0.5458070770722249,
"grad_norm": 9.58595748105252,
"learning_rate": 9.79775368145819e-06,
"loss": 2.6548514366149902,
"step": 1126
},
{
"epoch": 0.5462918080465342,
"grad_norm": 13.598855393197109,
"learning_rate": 9.79695895905724e-06,
"loss": 2.4907751083374023,
"step": 1127
},
{
"epoch": 0.5467765390208434,
"grad_norm": 12.797622778054546,
"learning_rate": 9.796162710653433e-06,
"loss": 3.454143524169922,
"step": 1128
},
{
"epoch": 0.5472612699951527,
"grad_norm": 17.209652799635652,
"learning_rate": 9.795364936500077e-06,
"loss": 2.5829474925994873,
"step": 1129
},
{
"epoch": 0.5477460009694619,
"grad_norm": 19.70116743311588,
"learning_rate": 9.794565636850948e-06,
"loss": 3.0730276107788086,
"step": 1130
},
{
"epoch": 0.5482307319437713,
"grad_norm": 17.637134908524523,
"learning_rate": 9.793764811960328e-06,
"loss": 2.844787359237671,
"step": 1131
},
{
"epoch": 0.5487154629180805,
"grad_norm": 10.680251502658729,
"learning_rate": 9.792962462082967e-06,
"loss": 2.0516152381896973,
"step": 1132
},
{
"epoch": 0.5492001938923897,
"grad_norm": 10.688416365760725,
"learning_rate": 9.792158587474113e-06,
"loss": 2.7626171112060547,
"step": 1133
},
{
"epoch": 0.549684924866699,
"grad_norm": 8.737323369027093,
"learning_rate": 9.791353188389491e-06,
"loss": 2.4166452884674072,
"step": 1134
},
{
"epoch": 0.5501696558410082,
"grad_norm": 15.572890334653012,
"learning_rate": 9.790546265085317e-06,
"loss": 2.804481029510498,
"step": 1135
},
{
"epoch": 0.5506543868153175,
"grad_norm": 9.297279543069715,
"learning_rate": 9.789737817818283e-06,
"loss": 2.6296539306640625,
"step": 1136
},
{
"epoch": 0.5511391177896268,
"grad_norm": 7.134492605684497,
"learning_rate": 9.788927846845574e-06,
"loss": 2.874401569366455,
"step": 1137
},
{
"epoch": 0.551623848763936,
"grad_norm": 9.907797240967465,
"learning_rate": 9.788116352424859e-06,
"loss": 2.0568830966949463,
"step": 1138
},
{
"epoch": 0.5521085797382452,
"grad_norm": 28.019997849573645,
"learning_rate": 9.787303334814286e-06,
"loss": 3.5667967796325684,
"step": 1139
},
{
"epoch": 0.5525933107125546,
"grad_norm": 31.625721261177564,
"learning_rate": 9.786488794272494e-06,
"loss": 2.4199562072753906,
"step": 1140
},
{
"epoch": 0.5530780416868638,
"grad_norm": 14.37102147988273,
"learning_rate": 9.785672731058602e-06,
"loss": 2.514289140701294,
"step": 1141
},
{
"epoch": 0.553562772661173,
"grad_norm": 6.795750734293398,
"learning_rate": 9.784855145432215e-06,
"loss": 2.5528664588928223,
"step": 1142
},
{
"epoch": 0.5540475036354823,
"grad_norm": 12.879391874043147,
"learning_rate": 9.784036037653423e-06,
"loss": 2.751577615737915,
"step": 1143
},
{
"epoch": 0.5545322346097916,
"grad_norm": 7.935053159396872,
"learning_rate": 9.783215407982802e-06,
"loss": 2.835953712463379,
"step": 1144
},
{
"epoch": 0.5550169655841009,
"grad_norm": 7.340210676630709,
"learning_rate": 9.782393256681406e-06,
"loss": 2.5556366443634033,
"step": 1145
},
{
"epoch": 0.5555016965584101,
"grad_norm": 14.0425507845139,
"learning_rate": 9.781569584010777e-06,
"loss": 2.8475332260131836,
"step": 1146
},
{
"epoch": 0.5559864275327193,
"grad_norm": 16.367496177583483,
"learning_rate": 9.780744390232944e-06,
"loss": 2.9099035263061523,
"step": 1147
},
{
"epoch": 0.5564711585070286,
"grad_norm": 8.280289402551023,
"learning_rate": 9.779917675610413e-06,
"loss": 2.5079104900360107,
"step": 1148
},
{
"epoch": 0.5569558894813379,
"grad_norm": 18.802204054889195,
"learning_rate": 9.77908944040618e-06,
"loss": 2.9531798362731934,
"step": 1149
},
{
"epoch": 0.5574406204556471,
"grad_norm": 7.960968815129477,
"learning_rate": 9.77825968488372e-06,
"loss": 2.538639545440674,
"step": 1150
},
{
"epoch": 0.5579253514299564,
"grad_norm": 15.899424778619327,
"learning_rate": 9.777428409306995e-06,
"loss": 2.961426258087158,
"step": 1151
},
{
"epoch": 0.5584100824042656,
"grad_norm": 10.150643067298532,
"learning_rate": 9.77659561394045e-06,
"loss": 2.6949453353881836,
"step": 1152
},
{
"epoch": 0.5588948133785749,
"grad_norm": 15.164631975335121,
"learning_rate": 9.775761299049011e-06,
"loss": 2.101285934448242,
"step": 1153
},
{
"epoch": 0.5593795443528842,
"grad_norm": 8.307934101052243,
"learning_rate": 9.774925464898092e-06,
"loss": 2.64029598236084,
"step": 1154
},
{
"epoch": 0.5598642753271934,
"grad_norm": 8.650794907369617,
"learning_rate": 9.774088111753586e-06,
"loss": 2.645280361175537,
"step": 1155
},
{
"epoch": 0.5603490063015026,
"grad_norm": 18.959129410504993,
"learning_rate": 9.773249239881871e-06,
"loss": 3.5338239669799805,
"step": 1156
},
{
"epoch": 0.5608337372758119,
"grad_norm": 10.30556177253235,
"learning_rate": 9.772408849549807e-06,
"loss": 2.770819664001465,
"step": 1157
},
{
"epoch": 0.5613184682501212,
"grad_norm": 12.696861273928473,
"learning_rate": 9.77156694102474e-06,
"loss": 2.8076000213623047,
"step": 1158
},
{
"epoch": 0.5618031992244304,
"grad_norm": 13.896798512536499,
"learning_rate": 9.770723514574493e-06,
"loss": 2.6810646057128906,
"step": 1159
},
{
"epoch": 0.5622879301987397,
"grad_norm": 9.494124652649141,
"learning_rate": 9.769878570467382e-06,
"loss": 2.809934616088867,
"step": 1160
},
{
"epoch": 0.5627726611730489,
"grad_norm": 6.949985925144723,
"learning_rate": 9.769032108972195e-06,
"loss": 2.498781204223633,
"step": 1161
},
{
"epoch": 0.5632573921473583,
"grad_norm": 10.473868774805132,
"learning_rate": 9.768184130358207e-06,
"loss": 2.303638458251953,
"step": 1162
},
{
"epoch": 0.5637421231216675,
"grad_norm": 11.2055140419879,
"learning_rate": 9.767334634895179e-06,
"loss": 3.2677836418151855,
"step": 1163
},
{
"epoch": 0.5642268540959767,
"grad_norm": 7.419322924483981,
"learning_rate": 9.76648362285335e-06,
"loss": 1.9477100372314453,
"step": 1164
},
{
"epoch": 0.564711585070286,
"grad_norm": 8.143573931734716,
"learning_rate": 9.765631094503442e-06,
"loss": 2.4743399620056152,
"step": 1165
},
{
"epoch": 0.5651963160445953,
"grad_norm": 11.86542853499395,
"learning_rate": 9.76477705011666e-06,
"loss": 2.5882630348205566,
"step": 1166
},
{
"epoch": 0.5656810470189045,
"grad_norm": 13.82785246508648,
"learning_rate": 9.763921489964696e-06,
"loss": 3.0731239318847656,
"step": 1167
},
{
"epoch": 0.5661657779932138,
"grad_norm": 8.304574478465126,
"learning_rate": 9.763064414319715e-06,
"loss": 2.7785632610321045,
"step": 1168
},
{
"epoch": 0.566650508967523,
"grad_norm": 17.917198370101286,
"learning_rate": 9.762205823454369e-06,
"loss": 3.542367935180664,
"step": 1169
},
{
"epoch": 0.5671352399418322,
"grad_norm": 10.126383686208216,
"learning_rate": 9.761345717641794e-06,
"loss": 3.060417890548706,
"step": 1170
},
{
"epoch": 0.5676199709161416,
"grad_norm": 7.871841763347475,
"learning_rate": 9.760484097155604e-06,
"loss": 2.1751911640167236,
"step": 1171
},
{
"epoch": 0.5681047018904508,
"grad_norm": 14.04862153822243,
"learning_rate": 9.7596209622699e-06,
"loss": 2.9659714698791504,
"step": 1172
},
{
"epoch": 0.56858943286476,
"grad_norm": 10.952283609683828,
"learning_rate": 9.758756313259259e-06,
"loss": 2.3940958976745605,
"step": 1173
},
{
"epoch": 0.5690741638390693,
"grad_norm": 18.021229016956696,
"learning_rate": 9.757890150398741e-06,
"loss": 2.7084031105041504,
"step": 1174
},
{
"epoch": 0.5695588948133786,
"grad_norm": 13.632689128810282,
"learning_rate": 9.757022473963891e-06,
"loss": 3.1089911460876465,
"step": 1175
},
{
"epoch": 0.5700436257876879,
"grad_norm": 12.31222674063815,
"learning_rate": 9.756153284230731e-06,
"loss": 2.6257095336914062,
"step": 1176
},
{
"epoch": 0.5705283567619971,
"grad_norm": 7.613024228655374,
"learning_rate": 9.755282581475769e-06,
"loss": 2.7569613456726074,
"step": 1177
},
{
"epoch": 0.5710130877363063,
"grad_norm": 14.249514709346908,
"learning_rate": 9.75441036597599e-06,
"loss": 2.61777400970459,
"step": 1178
},
{
"epoch": 0.5714978187106156,
"grad_norm": 14.122858602386986,
"learning_rate": 9.753536638008863e-06,
"loss": 3.1042232513427734,
"step": 1179
},
{
"epoch": 0.5719825496849249,
"grad_norm": 25.796884408253,
"learning_rate": 9.752661397852338e-06,
"loss": 2.793063163757324,
"step": 1180
},
{
"epoch": 0.5724672806592341,
"grad_norm": 20.478935575753162,
"learning_rate": 9.751784645784843e-06,
"loss": 2.9792580604553223,
"step": 1181
},
{
"epoch": 0.5729520116335434,
"grad_norm": 14.12460491724721,
"learning_rate": 9.750906382085292e-06,
"loss": 3.2587618827819824,
"step": 1182
},
{
"epoch": 0.5734367426078526,
"grad_norm": 7.679885012124906,
"learning_rate": 9.750026607033076e-06,
"loss": 3.0464859008789062,
"step": 1183
},
{
"epoch": 0.573921473582162,
"grad_norm": 15.456040581930496,
"learning_rate": 9.749145320908067e-06,
"loss": 2.4638161659240723,
"step": 1184
},
{
"epoch": 0.5744062045564712,
"grad_norm": 8.175075424075878,
"learning_rate": 9.748262523990621e-06,
"loss": 2.7897748947143555,
"step": 1185
},
{
"epoch": 0.5748909355307804,
"grad_norm": 64.78020509985774,
"learning_rate": 9.747378216561572e-06,
"loss": 2.8044168949127197,
"step": 1186
},
{
"epoch": 0.5753756665050896,
"grad_norm": 7.931441585414274,
"learning_rate": 9.746492398902232e-06,
"loss": 2.312488555908203,
"step": 1187
},
{
"epoch": 0.575860397479399,
"grad_norm": 12.52985282020833,
"learning_rate": 9.745605071294398e-06,
"loss": 2.999720335006714,
"step": 1188
},
{
"epoch": 0.5763451284537082,
"grad_norm": 11.729051196446555,
"learning_rate": 9.744716234020347e-06,
"loss": 2.7057430744171143,
"step": 1189
},
{
"epoch": 0.5768298594280175,
"grad_norm": 13.81338999111667,
"learning_rate": 9.743825887362832e-06,
"loss": 2.5842151641845703,
"step": 1190
},
{
"epoch": 0.5773145904023267,
"grad_norm": 10.847130180337288,
"learning_rate": 9.74293403160509e-06,
"loss": 2.7588133811950684,
"step": 1191
},
{
"epoch": 0.5777993213766359,
"grad_norm": 9.588050023134858,
"learning_rate": 9.742040667030842e-06,
"loss": 2.8157958984375,
"step": 1192
},
{
"epoch": 0.5782840523509453,
"grad_norm": 16.17496100915274,
"learning_rate": 9.741145793924277e-06,
"loss": 3.00178599357605,
"step": 1193
},
{
"epoch": 0.5787687833252545,
"grad_norm": 11.066599575933475,
"learning_rate": 9.740249412570071e-06,
"loss": 2.7602477073669434,
"step": 1194
},
{
"epoch": 0.5792535142995637,
"grad_norm": 16.25954557025958,
"learning_rate": 9.739351523253386e-06,
"loss": 3.4951250553131104,
"step": 1195
},
{
"epoch": 0.579738245273873,
"grad_norm": 13.100812949970274,
"learning_rate": 9.738452126259851e-06,
"loss": 2.6309256553649902,
"step": 1196
},
{
"epoch": 0.5802229762481823,
"grad_norm": 18.1079287985411,
"learning_rate": 9.737551221875587e-06,
"loss": 2.8419744968414307,
"step": 1197
},
{
"epoch": 0.5807077072224915,
"grad_norm": 6.433759729974519,
"learning_rate": 9.736648810387185e-06,
"loss": 2.780362606048584,
"step": 1198
},
{
"epoch": 0.5811924381968008,
"grad_norm": 9.416194402900418,
"learning_rate": 9.73574489208172e-06,
"loss": 2.5310463905334473,
"step": 1199
},
{
"epoch": 0.58167716917111,
"grad_norm": 35.008932496553896,
"learning_rate": 9.734839467246744e-06,
"loss": 2.8021655082702637,
"step": 1200
},
{
"epoch": 0.5821619001454192,
"grad_norm": 25.456398302835765,
"learning_rate": 9.733932536170291e-06,
"loss": 2.5079126358032227,
"step": 1201
},
{
"epoch": 0.5826466311197286,
"grad_norm": 13.10782471063404,
"learning_rate": 9.733024099140874e-06,
"loss": 2.9973483085632324,
"step": 1202
},
{
"epoch": 0.5831313620940378,
"grad_norm": 16.495371866804188,
"learning_rate": 9.732114156447481e-06,
"loss": 3.1317059993743896,
"step": 1203
},
{
"epoch": 0.583616093068347,
"grad_norm": 17.99811574069338,
"learning_rate": 9.731202708379583e-06,
"loss": 2.9335405826568604,
"step": 1204
},
{
"epoch": 0.5841008240426563,
"grad_norm": 9.824510617402053,
"learning_rate": 9.730289755227131e-06,
"loss": 1.4390316009521484,
"step": 1205
},
{
"epoch": 0.5845855550169656,
"grad_norm": 12.661543548755356,
"learning_rate": 9.72937529728055e-06,
"loss": 1.67311692237854,
"step": 1206
},
{
"epoch": 0.5850702859912749,
"grad_norm": 10.777971115618897,
"learning_rate": 9.728459334830746e-06,
"loss": 2.8733181953430176,
"step": 1207
},
{
"epoch": 0.5855550169655841,
"grad_norm": 15.102366781908367,
"learning_rate": 9.727541868169106e-06,
"loss": 2.9049313068389893,
"step": 1208
},
{
"epoch": 0.5860397479398933,
"grad_norm": 15.877961572937101,
"learning_rate": 9.726622897587493e-06,
"loss": 3.026360511779785,
"step": 1209
},
{
"epoch": 0.5865244789142026,
"grad_norm": 6.638863434754075,
"learning_rate": 9.725702423378248e-06,
"loss": 2.9490883350372314,
"step": 1210
},
{
"epoch": 0.5870092098885119,
"grad_norm": 7.682092953782158,
"learning_rate": 9.72478044583419e-06,
"loss": 2.8273630142211914,
"step": 1211
},
{
"epoch": 0.5874939408628211,
"grad_norm": 7.461346359805399,
"learning_rate": 9.72385696524862e-06,
"loss": 2.6390433311462402,
"step": 1212
},
{
"epoch": 0.5879786718371304,
"grad_norm": 9.716964995288022,
"learning_rate": 9.722931981915312e-06,
"loss": 2.4283909797668457,
"step": 1213
},
{
"epoch": 0.5884634028114396,
"grad_norm": 10.74193044195543,
"learning_rate": 9.722005496128522e-06,
"loss": 2.847947120666504,
"step": 1214
},
{
"epoch": 0.588948133785749,
"grad_norm": 9.647484843446001,
"learning_rate": 9.721077508182983e-06,
"loss": 2.6344096660614014,
"step": 1215
},
{
"epoch": 0.5894328647600582,
"grad_norm": 11.135562197752522,
"learning_rate": 9.720148018373905e-06,
"loss": 2.8421471118927,
"step": 1216
},
{
"epoch": 0.5899175957343674,
"grad_norm": 23.251195545470253,
"learning_rate": 9.719217026996976e-06,
"loss": 2.973006248474121,
"step": 1217
},
{
"epoch": 0.5904023267086767,
"grad_norm": 12.309907589574838,
"learning_rate": 9.71828453434836e-06,
"loss": 2.7532055377960205,
"step": 1218
},
{
"epoch": 0.590887057682986,
"grad_norm": 9.053960688637732,
"learning_rate": 9.717350540724705e-06,
"loss": 2.839115619659424,
"step": 1219
},
{
"epoch": 0.5913717886572952,
"grad_norm": 12.486950912461053,
"learning_rate": 9.716415046423126e-06,
"loss": 1.9866429567337036,
"step": 1220
},
{
"epoch": 0.5918565196316045,
"grad_norm": 10.238791155717328,
"learning_rate": 9.715478051741228e-06,
"loss": 2.9217402935028076,
"step": 1221
},
{
"epoch": 0.5923412506059137,
"grad_norm": 19.564742735846895,
"learning_rate": 9.714539556977082e-06,
"loss": 3.3793063163757324,
"step": 1222
},
{
"epoch": 0.5928259815802229,
"grad_norm": 11.25811161253022,
"learning_rate": 9.713599562429242e-06,
"loss": 2.599295139312744,
"step": 1223
},
{
"epoch": 0.5933107125545323,
"grad_norm": 24.120280853631463,
"learning_rate": 9.712658068396736e-06,
"loss": 2.816826105117798,
"step": 1224
},
{
"epoch": 0.5937954435288415,
"grad_norm": 7.878047204683934,
"learning_rate": 9.711715075179075e-06,
"loss": 2.7512388229370117,
"step": 1225
},
{
"epoch": 0.5942801745031507,
"grad_norm": 6.857719646355066,
"learning_rate": 9.71077058307624e-06,
"loss": 2.7942605018615723,
"step": 1226
},
{
"epoch": 0.59476490547746,
"grad_norm": 19.95451969938032,
"learning_rate": 9.709824592388694e-06,
"loss": 2.941157341003418,
"step": 1227
},
{
"epoch": 0.5952496364517693,
"grad_norm": 8.063115768230059,
"learning_rate": 9.708877103417371e-06,
"loss": 2.5547988414764404,
"step": 1228
},
{
"epoch": 0.5957343674260785,
"grad_norm": 11.647338523229202,
"learning_rate": 9.707928116463688e-06,
"loss": 2.3730785846710205,
"step": 1229
},
{
"epoch": 0.5962190984003878,
"grad_norm": 28.58937121142785,
"learning_rate": 9.706977631829535e-06,
"loss": 3.48671817779541,
"step": 1230
},
{
"epoch": 0.596703829374697,
"grad_norm": 7.072400390936636,
"learning_rate": 9.706025649817278e-06,
"loss": 2.2693777084350586,
"step": 1231
},
{
"epoch": 0.5971885603490062,
"grad_norm": 12.475586572562985,
"learning_rate": 9.705072170729761e-06,
"loss": 3.3566994667053223,
"step": 1232
},
{
"epoch": 0.5976732913233156,
"grad_norm": 19.924037460256105,
"learning_rate": 9.704117194870305e-06,
"loss": 2.825540542602539,
"step": 1233
},
{
"epoch": 0.5981580222976248,
"grad_norm": 6.8375602605055485,
"learning_rate": 9.703160722542702e-06,
"loss": 2.7710745334625244,
"step": 1234
},
{
"epoch": 0.5986427532719341,
"grad_norm": 11.593890857173866,
"learning_rate": 9.702202754051227e-06,
"loss": 2.829296350479126,
"step": 1235
},
{
"epoch": 0.5991274842462433,
"grad_norm": 9.055780347304841,
"learning_rate": 9.701243289700629e-06,
"loss": 2.507204532623291,
"step": 1236
},
{
"epoch": 0.5996122152205526,
"grad_norm": 9.874569465801313,
"learning_rate": 9.700282329796127e-06,
"loss": 2.81136417388916,
"step": 1237
},
{
"epoch": 0.6000969461948619,
"grad_norm": 13.77714929004532,
"learning_rate": 9.699319874643425e-06,
"loss": 1.6337616443634033,
"step": 1238
},
{
"epoch": 0.6005816771691711,
"grad_norm": 7.305148783646291,
"learning_rate": 9.698355924548695e-06,
"loss": 2.660491466522217,
"step": 1239
},
{
"epoch": 0.6010664081434803,
"grad_norm": 8.283177092396283,
"learning_rate": 9.69739047981859e-06,
"loss": 1.8544533252716064,
"step": 1240
},
{
"epoch": 0.6015511391177897,
"grad_norm": 20.471684718099965,
"learning_rate": 9.696423540760234e-06,
"loss": 2.3804805278778076,
"step": 1241
},
{
"epoch": 0.6020358700920989,
"grad_norm": 10.994713873264526,
"learning_rate": 9.69545510768123e-06,
"loss": 2.831374406814575,
"step": 1242
},
{
"epoch": 0.6025206010664081,
"grad_norm": 18.982319556746404,
"learning_rate": 9.694485180889652e-06,
"loss": 2.4262874126434326,
"step": 1243
},
{
"epoch": 0.6030053320407174,
"grad_norm": 35.90981781446046,
"learning_rate": 9.693513760694057e-06,
"loss": 3.2116212844848633,
"step": 1244
},
{
"epoch": 0.6034900630150266,
"grad_norm": 17.16257105946667,
"learning_rate": 9.692540847403468e-06,
"loss": 3.4644274711608887,
"step": 1245
},
{
"epoch": 0.603974793989336,
"grad_norm": 11.829153568598809,
"learning_rate": 9.691566441327389e-06,
"loss": 2.9634737968444824,
"step": 1246
},
{
"epoch": 0.6044595249636452,
"grad_norm": 12.742815377229391,
"learning_rate": 9.690590542775795e-06,
"loss": 2.909914016723633,
"step": 1247
},
{
"epoch": 0.6049442559379544,
"grad_norm": 16.691767719118904,
"learning_rate": 9.689613152059141e-06,
"loss": 2.466724157333374,
"step": 1248
},
{
"epoch": 0.6054289869122637,
"grad_norm": 87.73123310613445,
"learning_rate": 9.688634269488349e-06,
"loss": 2.701446056365967,
"step": 1249
},
{
"epoch": 0.605913717886573,
"grad_norm": 8.189710112577602,
"learning_rate": 9.687653895374824e-06,
"loss": 2.8975820541381836,
"step": 1250
},
{
"epoch": 0.6063984488608822,
"grad_norm": 13.50555436398315,
"learning_rate": 9.686672030030437e-06,
"loss": 2.854891061782837,
"step": 1251
},
{
"epoch": 0.6068831798351915,
"grad_norm": 21.64471797972825,
"learning_rate": 9.685688673767543e-06,
"loss": 3.790677785873413,
"step": 1252
},
{
"epoch": 0.6073679108095007,
"grad_norm": 9.405269889306354,
"learning_rate": 9.684703826898963e-06,
"loss": 2.716235876083374,
"step": 1253
},
{
"epoch": 0.6078526417838099,
"grad_norm": 10.154911201479454,
"learning_rate": 9.683717489737996e-06,
"loss": 2.7545981407165527,
"step": 1254
},
{
"epoch": 0.6083373727581193,
"grad_norm": 8.429841540585063,
"learning_rate": 9.682729662598412e-06,
"loss": 3.0737528800964355,
"step": 1255
},
{
"epoch": 0.6088221037324285,
"grad_norm": 9.823019405388587,
"learning_rate": 9.681740345794462e-06,
"loss": 2.3333704471588135,
"step": 1256
},
{
"epoch": 0.6093068347067377,
"grad_norm": 16.166185283709275,
"learning_rate": 9.680749539640863e-06,
"loss": 2.737691640853882,
"step": 1257
},
{
"epoch": 0.609791565681047,
"grad_norm": 13.59082254761184,
"learning_rate": 9.67975724445281e-06,
"loss": 3.2686944007873535,
"step": 1258
},
{
"epoch": 0.6102762966553563,
"grad_norm": 8.978339788947588,
"learning_rate": 9.67876346054597e-06,
"loss": 2.7856388092041016,
"step": 1259
},
{
"epoch": 0.6107610276296656,
"grad_norm": 16.433453471013536,
"learning_rate": 9.677768188236487e-06,
"loss": 2.6277668476104736,
"step": 1260
},
{
"epoch": 0.6112457586039748,
"grad_norm": 14.893449860566404,
"learning_rate": 9.676771427840972e-06,
"loss": 3.0933825969696045,
"step": 1261
},
{
"epoch": 0.611730489578284,
"grad_norm": 10.433952119677153,
"learning_rate": 9.675773179676518e-06,
"loss": 3.1385128498077393,
"step": 1262
},
{
"epoch": 0.6122152205525934,
"grad_norm": 12.984175428595229,
"learning_rate": 9.67477344406068e-06,
"loss": 3.572765350341797,
"step": 1263
},
{
"epoch": 0.6126999515269026,
"grad_norm": 12.905935211969704,
"learning_rate": 9.6737722213115e-06,
"loss": 2.335437297821045,
"step": 1264
},
{
"epoch": 0.6131846825012118,
"grad_norm": 6.597377537103181,
"learning_rate": 9.67276951174748e-06,
"loss": 2.524695873260498,
"step": 1265
},
{
"epoch": 0.6136694134755211,
"grad_norm": 7.784437865632349,
"learning_rate": 9.671765315687603e-06,
"loss": 2.7013940811157227,
"step": 1266
},
{
"epoch": 0.6141541444498303,
"grad_norm": 12.29026877943521,
"learning_rate": 9.670759633451323e-06,
"loss": 2.6506309509277344,
"step": 1267
},
{
"epoch": 0.6146388754241396,
"grad_norm": 10.600246122734491,
"learning_rate": 9.669752465358567e-06,
"loss": 2.510054349899292,
"step": 1268
},
{
"epoch": 0.6151236063984489,
"grad_norm": 16.083824178473396,
"learning_rate": 9.66874381172973e-06,
"loss": 2.821484088897705,
"step": 1269
},
{
"epoch": 0.6156083373727581,
"grad_norm": 18.18501645437564,
"learning_rate": 9.667733672885688e-06,
"loss": 2.9507317543029785,
"step": 1270
},
{
"epoch": 0.6160930683470673,
"grad_norm": 17.64643144528969,
"learning_rate": 9.666722049147786e-06,
"loss": 3.200502872467041,
"step": 1271
},
{
"epoch": 0.6165777993213767,
"grad_norm": 10.48638488901433,
"learning_rate": 9.665708940837836e-06,
"loss": 3.1169986724853516,
"step": 1272
},
{
"epoch": 0.6170625302956859,
"grad_norm": 11.169156010868788,
"learning_rate": 9.664694348278131e-06,
"loss": 2.58497953414917,
"step": 1273
},
{
"epoch": 0.6175472612699952,
"grad_norm": 25.634746679804593,
"learning_rate": 9.663678271791431e-06,
"loss": 2.5898594856262207,
"step": 1274
},
{
"epoch": 0.6180319922443044,
"grad_norm": 10.0645411809049,
"learning_rate": 9.662660711700967e-06,
"loss": 2.6258163452148438,
"step": 1275
},
{
"epoch": 0.6185167232186136,
"grad_norm": 13.304844054457106,
"learning_rate": 9.661641668330445e-06,
"loss": 3.1934192180633545,
"step": 1276
},
{
"epoch": 0.619001454192923,
"grad_norm": 17.996905211721764,
"learning_rate": 9.660621142004045e-06,
"loss": 2.6970136165618896,
"step": 1277
},
{
"epoch": 0.6194861851672322,
"grad_norm": 8.044245111891998,
"learning_rate": 9.65959913304641e-06,
"loss": 2.5664029121398926,
"step": 1278
},
{
"epoch": 0.6199709161415414,
"grad_norm": 7.458544952748735,
"learning_rate": 9.658575641782664e-06,
"loss": 2.409101963043213,
"step": 1279
},
{
"epoch": 0.6204556471158507,
"grad_norm": 8.767305463209334,
"learning_rate": 9.657550668538396e-06,
"loss": 2.1131796836853027,
"step": 1280
},
{
"epoch": 0.62094037809016,
"grad_norm": 13.423411258948198,
"learning_rate": 9.656524213639674e-06,
"loss": 2.733224868774414,
"step": 1281
},
{
"epoch": 0.6214251090644692,
"grad_norm": 11.464607695995497,
"learning_rate": 9.655496277413028e-06,
"loss": 2.8935070037841797,
"step": 1282
},
{
"epoch": 0.6219098400387785,
"grad_norm": 14.977903069324395,
"learning_rate": 9.654466860185466e-06,
"loss": 2.8278021812438965,
"step": 1283
},
{
"epoch": 0.6223945710130877,
"grad_norm": 13.238138820874005,
"learning_rate": 9.653435962284465e-06,
"loss": 2.6914374828338623,
"step": 1284
},
{
"epoch": 0.622879301987397,
"grad_norm": 10.055963773329307,
"learning_rate": 9.652403584037973e-06,
"loss": 2.4892258644104004,
"step": 1285
},
{
"epoch": 0.6233640329617063,
"grad_norm": 14.921511033193314,
"learning_rate": 9.651369725774408e-06,
"loss": 2.7163734436035156,
"step": 1286
},
{
"epoch": 0.6238487639360155,
"grad_norm": 12.960432730650512,
"learning_rate": 9.650334387822659e-06,
"loss": 2.7989115715026855,
"step": 1287
},
{
"epoch": 0.6243334949103247,
"grad_norm": 19.334747283641097,
"learning_rate": 9.64929757051209e-06,
"loss": 2.9968109130859375,
"step": 1288
},
{
"epoch": 0.624818225884634,
"grad_norm": 11.052291738834649,
"learning_rate": 9.648259274172529e-06,
"loss": 2.765847682952881,
"step": 1289
},
{
"epoch": 0.6253029568589433,
"grad_norm": 18.79947854969259,
"learning_rate": 9.647219499134278e-06,
"loss": 3.0693283081054688,
"step": 1290
},
{
"epoch": 0.6257876878332526,
"grad_norm": 8.668192626929157,
"learning_rate": 9.64617824572811e-06,
"loss": 2.9720540046691895,
"step": 1291
},
{
"epoch": 0.6262724188075618,
"grad_norm": 25.710536330522018,
"learning_rate": 9.645135514285267e-06,
"loss": 2.3041207790374756,
"step": 1292
},
{
"epoch": 0.626757149781871,
"grad_norm": 9.890966395806464,
"learning_rate": 9.64409130513746e-06,
"loss": 2.71254825592041,
"step": 1293
},
{
"epoch": 0.6272418807561804,
"grad_norm": 9.962573535178242,
"learning_rate": 9.643045618616873e-06,
"loss": 3.20729398727417,
"step": 1294
},
{
"epoch": 0.6277266117304896,
"grad_norm": 8.461133647195654,
"learning_rate": 9.641998455056158e-06,
"loss": 2.3823466300964355,
"step": 1295
},
{
"epoch": 0.6282113427047988,
"grad_norm": 50.61090374635888,
"learning_rate": 9.64094981478844e-06,
"loss": 3.1338205337524414,
"step": 1296
},
{
"epoch": 0.6286960736791081,
"grad_norm": 9.330072145433814,
"learning_rate": 9.639899698147308e-06,
"loss": 2.636714458465576,
"step": 1297
},
{
"epoch": 0.6291808046534173,
"grad_norm": 14.653672598609832,
"learning_rate": 9.638848105466823e-06,
"loss": 3.249427318572998,
"step": 1298
},
{
"epoch": 0.6296655356277266,
"grad_norm": 29.74518040246311,
"learning_rate": 9.63779503708152e-06,
"loss": 2.2406225204467773,
"step": 1299
},
{
"epoch": 0.6301502666020359,
"grad_norm": 8.377180828379172,
"learning_rate": 9.636740493326398e-06,
"loss": 2.771450996398926,
"step": 1300
},
{
"epoch": 0.6306349975763451,
"grad_norm": 8.072307382597899,
"learning_rate": 9.635684474536927e-06,
"loss": 2.6512184143066406,
"step": 1301
},
{
"epoch": 0.6311197285506543,
"grad_norm": 10.556283549390264,
"learning_rate": 9.634626981049045e-06,
"loss": 2.5420923233032227,
"step": 1302
},
{
"epoch": 0.6316044595249637,
"grad_norm": 11.651369169527335,
"learning_rate": 9.633568013199163e-06,
"loss": 3.1008496284484863,
"step": 1303
},
{
"epoch": 0.6320891904992729,
"grad_norm": 6.710997045423366,
"learning_rate": 9.632507571324159e-06,
"loss": 2.7059860229492188,
"step": 1304
},
{
"epoch": 0.6325739214735822,
"grad_norm": 12.293647907113176,
"learning_rate": 9.631445655761378e-06,
"loss": 2.839236259460449,
"step": 1305
},
{
"epoch": 0.6330586524478914,
"grad_norm": 11.211941259486915,
"learning_rate": 9.630382266848636e-06,
"loss": 3.0187528133392334,
"step": 1306
},
{
"epoch": 0.6335433834222006,
"grad_norm": 12.964855803779981,
"learning_rate": 9.629317404924215e-06,
"loss": 3.522489547729492,
"step": 1307
},
{
"epoch": 0.63402811439651,
"grad_norm": 13.766237358744194,
"learning_rate": 9.62825107032687e-06,
"loss": 3.0566506385803223,
"step": 1308
},
{
"epoch": 0.6345128453708192,
"grad_norm": 9.414275816031902,
"learning_rate": 9.627183263395823e-06,
"loss": 2.348069906234741,
"step": 1309
},
{
"epoch": 0.6349975763451284,
"grad_norm": 7.855297299433272,
"learning_rate": 9.626113984470761e-06,
"loss": 2.644871234893799,
"step": 1310
},
{
"epoch": 0.6354823073194377,
"grad_norm": 14.9026737327165,
"learning_rate": 9.625043233891842e-06,
"loss": 2.955164909362793,
"step": 1311
},
{
"epoch": 0.635967038293747,
"grad_norm": 10.423776556626452,
"learning_rate": 9.623971011999693e-06,
"loss": 2.8354568481445312,
"step": 1312
},
{
"epoch": 0.6364517692680562,
"grad_norm": 11.30899870097666,
"learning_rate": 9.622897319135407e-06,
"loss": 2.971055507659912,
"step": 1313
},
{
"epoch": 0.6369365002423655,
"grad_norm": 6.817459623636938,
"learning_rate": 9.621822155640546e-06,
"loss": 2.593094825744629,
"step": 1314
},
{
"epoch": 0.6374212312166747,
"grad_norm": 19.334759375945374,
"learning_rate": 9.62074552185714e-06,
"loss": 3.052973747253418,
"step": 1315
},
{
"epoch": 0.637905962190984,
"grad_norm": 12.334833668322792,
"learning_rate": 9.619667418127687e-06,
"loss": 2.3673534393310547,
"step": 1316
},
{
"epoch": 0.6383906931652933,
"grad_norm": 6.546152102677602,
"learning_rate": 9.618587844795151e-06,
"loss": 2.8805131912231445,
"step": 1317
},
{
"epoch": 0.6388754241396025,
"grad_norm": 9.89629630955172,
"learning_rate": 9.617506802202967e-06,
"loss": 1.993342399597168,
"step": 1318
},
{
"epoch": 0.6393601551139118,
"grad_norm": 13.510302070248562,
"learning_rate": 9.61642429069503e-06,
"loss": 2.6627776622772217,
"step": 1319
},
{
"epoch": 0.639844886088221,
"grad_norm": 9.162663947335437,
"learning_rate": 9.615340310615713e-06,
"loss": 2.6424052715301514,
"step": 1320
},
{
"epoch": 0.6403296170625303,
"grad_norm": 15.227573540401036,
"learning_rate": 9.614254862309847e-06,
"loss": 2.714040756225586,
"step": 1321
},
{
"epoch": 0.6408143480368396,
"grad_norm": 36.797841342565064,
"learning_rate": 9.613167946122733e-06,
"loss": 3.1160645484924316,
"step": 1322
},
{
"epoch": 0.6412990790111488,
"grad_norm": 11.40261446544079,
"learning_rate": 9.612079562400143e-06,
"loss": 2.8167805671691895,
"step": 1323
},
{
"epoch": 0.641783809985458,
"grad_norm": 14.120519918511345,
"learning_rate": 9.610989711488307e-06,
"loss": 2.840053081512451,
"step": 1324
},
{
"epoch": 0.6422685409597674,
"grad_norm": 11.519316057807503,
"learning_rate": 9.609898393733933e-06,
"loss": 2.8692281246185303,
"step": 1325
},
{
"epoch": 0.6427532719340766,
"grad_norm": 12.294921201009439,
"learning_rate": 9.608805609484186e-06,
"loss": 2.9801721572875977,
"step": 1326
},
{
"epoch": 0.6432380029083858,
"grad_norm": 13.490361255973436,
"learning_rate": 9.607711359086704e-06,
"loss": 3.332772970199585,
"step": 1327
},
{
"epoch": 0.6437227338826951,
"grad_norm": 12.045717149581066,
"learning_rate": 9.606615642889586e-06,
"loss": 2.4592411518096924,
"step": 1328
},
{
"epoch": 0.6442074648570043,
"grad_norm": 12.72975811810281,
"learning_rate": 9.605518461241399e-06,
"loss": 3.0670223236083984,
"step": 1329
},
{
"epoch": 0.6446921958313137,
"grad_norm": 11.913571534198844,
"learning_rate": 9.604419814491179e-06,
"loss": 3.0553855895996094,
"step": 1330
},
{
"epoch": 0.6451769268056229,
"grad_norm": 8.90628434306242,
"learning_rate": 9.603319702988427e-06,
"loss": 3.1585206985473633,
"step": 1331
},
{
"epoch": 0.6456616577799321,
"grad_norm": 11.58809914144362,
"learning_rate": 9.602218127083108e-06,
"loss": 2.9646003246307373,
"step": 1332
},
{
"epoch": 0.6461463887542414,
"grad_norm": 9.092649112486491,
"learning_rate": 9.601115087125654e-06,
"loss": 3.4191198348999023,
"step": 1333
},
{
"epoch": 0.6466311197285507,
"grad_norm": 25.06024435574234,
"learning_rate": 9.600010583466964e-06,
"loss": 2.2552268505096436,
"step": 1334
},
{
"epoch": 0.6471158507028599,
"grad_norm": 12.208341032765297,
"learning_rate": 9.598904616458398e-06,
"loss": 3.163729667663574,
"step": 1335
},
{
"epoch": 0.6476005816771692,
"grad_norm": 19.604972081360756,
"learning_rate": 9.597797186451787e-06,
"loss": 3.3706116676330566,
"step": 1336
},
{
"epoch": 0.6480853126514784,
"grad_norm": 21.866167833593995,
"learning_rate": 9.596688293799427e-06,
"loss": 2.5653302669525146,
"step": 1337
},
{
"epoch": 0.6485700436257877,
"grad_norm": 11.548788274973445,
"learning_rate": 9.595577938854077e-06,
"loss": 2.7365260124206543,
"step": 1338
},
{
"epoch": 0.649054774600097,
"grad_norm": 25.222096759628343,
"learning_rate": 9.594466121968961e-06,
"loss": 2.3195202350616455,
"step": 1339
},
{
"epoch": 0.6495395055744062,
"grad_norm": 8.929027262948214,
"learning_rate": 9.593352843497768e-06,
"loss": 2.936469554901123,
"step": 1340
},
{
"epoch": 0.6500242365487154,
"grad_norm": 10.46164645552426,
"learning_rate": 9.592238103794653e-06,
"loss": 2.908937454223633,
"step": 1341
},
{
"epoch": 0.6505089675230247,
"grad_norm": 8.044545627881293,
"learning_rate": 9.591121903214237e-06,
"loss": 3.036461353302002,
"step": 1342
},
{
"epoch": 0.650993698497334,
"grad_norm": 11.157522569479136,
"learning_rate": 9.590004242111604e-06,
"loss": 3.0666141510009766,
"step": 1343
},
{
"epoch": 0.6514784294716433,
"grad_norm": 13.125797882235016,
"learning_rate": 9.588885120842302e-06,
"loss": 3.5554440021514893,
"step": 1344
},
{
"epoch": 0.6519631604459525,
"grad_norm": 16.61078202990426,
"learning_rate": 9.587764539762345e-06,
"loss": 2.9402122497558594,
"step": 1345
},
{
"epoch": 0.6524478914202617,
"grad_norm": 15.933102030616704,
"learning_rate": 9.586642499228211e-06,
"loss": 2.5040204524993896,
"step": 1346
},
{
"epoch": 0.6529326223945711,
"grad_norm": 16.498848422486226,
"learning_rate": 9.585518999596843e-06,
"loss": 2.877326250076294,
"step": 1347
},
{
"epoch": 0.6534173533688803,
"grad_norm": 8.018100332015823,
"learning_rate": 9.584394041225647e-06,
"loss": 2.9246764183044434,
"step": 1348
},
{
"epoch": 0.6539020843431895,
"grad_norm": 9.367651088024125,
"learning_rate": 9.58326762447249e-06,
"loss": 2.9979798793792725,
"step": 1349
},
{
"epoch": 0.6543868153174988,
"grad_norm": 9.753350949878513,
"learning_rate": 9.582139749695713e-06,
"loss": 2.45381498336792,
"step": 1350
},
{
"epoch": 0.654871546291808,
"grad_norm": 8.297082091948766,
"learning_rate": 9.58101041725411e-06,
"loss": 2.6907663345336914,
"step": 1351
},
{
"epoch": 0.6553562772661173,
"grad_norm": 7.060654326079692,
"learning_rate": 9.579879627506943e-06,
"loss": 3.061694383621216,
"step": 1352
},
{
"epoch": 0.6558410082404266,
"grad_norm": 9.20612614531374,
"learning_rate": 9.578747380813938e-06,
"loss": 2.5479798316955566,
"step": 1353
},
{
"epoch": 0.6563257392147358,
"grad_norm": 16.25667522512923,
"learning_rate": 9.577613677535283e-06,
"loss": 2.97050404548645,
"step": 1354
},
{
"epoch": 0.656810470189045,
"grad_norm": 15.869516460874216,
"learning_rate": 9.576478518031634e-06,
"loss": 3.081106185913086,
"step": 1355
},
{
"epoch": 0.6572952011633544,
"grad_norm": 8.435768097630396,
"learning_rate": 9.575341902664101e-06,
"loss": 2.9467854499816895,
"step": 1356
},
{
"epoch": 0.6577799321376636,
"grad_norm": 9.908287017674386,
"learning_rate": 9.574203831794266e-06,
"loss": 2.932912588119507,
"step": 1357
},
{
"epoch": 0.6582646631119728,
"grad_norm": 8.775631069956253,
"learning_rate": 9.573064305784172e-06,
"loss": 2.6274056434631348,
"step": 1358
},
{
"epoch": 0.6587493940862821,
"grad_norm": 12.565122469086193,
"learning_rate": 9.57192332499632e-06,
"loss": 2.655245065689087,
"step": 1359
},
{
"epoch": 0.6592341250605914,
"grad_norm": 10.052255377655797,
"learning_rate": 9.570780889793681e-06,
"loss": 2.6474573612213135,
"step": 1360
},
{
"epoch": 0.6597188560349007,
"grad_norm": 9.800617341775446,
"learning_rate": 9.569637000539684e-06,
"loss": 2.7474002838134766,
"step": 1361
},
{
"epoch": 0.6602035870092099,
"grad_norm": 6.76471140626327,
"learning_rate": 9.56849165759822e-06,
"loss": 2.930555820465088,
"step": 1362
},
{
"epoch": 0.6606883179835191,
"grad_norm": 12.428184402071599,
"learning_rate": 9.567344861333644e-06,
"loss": 2.600066661834717,
"step": 1363
},
{
"epoch": 0.6611730489578284,
"grad_norm": 11.15566734771866,
"learning_rate": 9.566196612110777e-06,
"loss": 2.475008249282837,
"step": 1364
},
{
"epoch": 0.6616577799321377,
"grad_norm": 30.377867734688373,
"learning_rate": 9.565046910294895e-06,
"loss": 4.456007957458496,
"step": 1365
},
{
"epoch": 0.6621425109064469,
"grad_norm": 12.176444956455954,
"learning_rate": 9.563895756251744e-06,
"loss": 3.150609254837036,
"step": 1366
},
{
"epoch": 0.6626272418807562,
"grad_norm": 6.981620360326501,
"learning_rate": 9.562743150347523e-06,
"loss": 2.394958019256592,
"step": 1367
},
{
"epoch": 0.6631119728550654,
"grad_norm": 36.319333914504526,
"learning_rate": 9.5615890929489e-06,
"loss": 2.352415084838867,
"step": 1368
},
{
"epoch": 0.6635967038293747,
"grad_norm": 17.631202592644414,
"learning_rate": 9.560433584423e-06,
"loss": 3.0391244888305664,
"step": 1369
},
{
"epoch": 0.664081434803684,
"grad_norm": 13.877326769128173,
"learning_rate": 9.559276625137416e-06,
"loss": 2.0516538619995117,
"step": 1370
},
{
"epoch": 0.6645661657779932,
"grad_norm": 16.29962010621063,
"learning_rate": 9.558118215460198e-06,
"loss": 2.812425136566162,
"step": 1371
},
{
"epoch": 0.6650508967523024,
"grad_norm": 8.588071731663685,
"learning_rate": 9.556958355759855e-06,
"loss": 2.901492118835449,
"step": 1372
},
{
"epoch": 0.6655356277266117,
"grad_norm": 13.113719716724775,
"learning_rate": 9.555797046405362e-06,
"loss": 2.4583747386932373,
"step": 1373
},
{
"epoch": 0.666020358700921,
"grad_norm": 10.648833604279151,
"learning_rate": 9.554634287766152e-06,
"loss": 3.0381040573120117,
"step": 1374
},
{
"epoch": 0.6665050896752303,
"grad_norm": 9.708003171505393,
"learning_rate": 9.553470080212122e-06,
"loss": 2.655698299407959,
"step": 1375
},
{
"epoch": 0.6669898206495395,
"grad_norm": 10.245101332881736,
"learning_rate": 9.552304424113626e-06,
"loss": 3.034816026687622,
"step": 1376
},
{
"epoch": 0.6674745516238487,
"grad_norm": 18.779636980433082,
"learning_rate": 9.551137319841484e-06,
"loss": 3.3991496562957764,
"step": 1377
},
{
"epoch": 0.6679592825981581,
"grad_norm": 8.414108125078686,
"learning_rate": 9.549968767766973e-06,
"loss": 2.642399311065674,
"step": 1378
},
{
"epoch": 0.6684440135724673,
"grad_norm": 13.715100686240898,
"learning_rate": 9.548798768261831e-06,
"loss": 2.8903682231903076,
"step": 1379
},
{
"epoch": 0.6689287445467765,
"grad_norm": 11.88386707028694,
"learning_rate": 9.547627321698257e-06,
"loss": 2.605835437774658,
"step": 1380
},
{
"epoch": 0.6694134755210858,
"grad_norm": 6.696428048835147,
"learning_rate": 9.546454428448907e-06,
"loss": 2.373936176300049,
"step": 1381
},
{
"epoch": 0.6698982064953951,
"grad_norm": 17.62001457988264,
"learning_rate": 9.545280088886908e-06,
"loss": 3.424015998840332,
"step": 1382
},
{
"epoch": 0.6703829374697043,
"grad_norm": 6.020482856875192,
"learning_rate": 9.544104303385831e-06,
"loss": 2.9024391174316406,
"step": 1383
},
{
"epoch": 0.6708676684440136,
"grad_norm": 9.259580291142834,
"learning_rate": 9.542927072319722e-06,
"loss": 2.424191474914551,
"step": 1384
},
{
"epoch": 0.6713523994183228,
"grad_norm": 7.451328899143511,
"learning_rate": 9.541748396063077e-06,
"loss": 2.812472343444824,
"step": 1385
},
{
"epoch": 0.671837130392632,
"grad_norm": 12.461079919031764,
"learning_rate": 9.540568274990856e-06,
"loss": 3.279971122741699,
"step": 1386
},
{
"epoch": 0.6723218613669414,
"grad_norm": 12.433680999104194,
"learning_rate": 9.539386709478475e-06,
"loss": 3.0729784965515137,
"step": 1387
},
{
"epoch": 0.6728065923412506,
"grad_norm": 16.384624034496888,
"learning_rate": 9.538203699901818e-06,
"loss": 3.0297694206237793,
"step": 1388
},
{
"epoch": 0.6732913233155599,
"grad_norm": 12.379814937510474,
"learning_rate": 9.537019246637218e-06,
"loss": 2.0425400733947754,
"step": 1389
},
{
"epoch": 0.6737760542898691,
"grad_norm": 11.518700037953638,
"learning_rate": 9.535833350061473e-06,
"loss": 2.552464485168457,
"step": 1390
},
{
"epoch": 0.6742607852641784,
"grad_norm": 5.536851001931116,
"learning_rate": 9.53464601055184e-06,
"loss": 2.8660264015197754,
"step": 1391
},
{
"epoch": 0.6747455162384877,
"grad_norm": 7.132556635518905,
"learning_rate": 9.533457228486031e-06,
"loss": 2.6181082725524902,
"step": 1392
},
{
"epoch": 0.6752302472127969,
"grad_norm": 20.661650172232594,
"learning_rate": 9.532267004242226e-06,
"loss": 3.3939406871795654,
"step": 1393
},
{
"epoch": 0.6757149781871061,
"grad_norm": 9.915233220648272,
"learning_rate": 9.53107533819905e-06,
"loss": 3.304542064666748,
"step": 1394
},
{
"epoch": 0.6761997091614154,
"grad_norm": 6.443792546843741,
"learning_rate": 9.5298822307356e-06,
"loss": 2.6681745052337646,
"step": 1395
},
{
"epoch": 0.6766844401357247,
"grad_norm": 9.97825788510846,
"learning_rate": 9.528687682231424e-06,
"loss": 2.6357030868530273,
"step": 1396
},
{
"epoch": 0.6771691711100339,
"grad_norm": 11.918731536615118,
"learning_rate": 9.52749169306653e-06,
"loss": 2.9796302318573,
"step": 1397
},
{
"epoch": 0.6776539020843432,
"grad_norm": 11.359060157534486,
"learning_rate": 9.526294263621386e-06,
"loss": 2.9863572120666504,
"step": 1398
},
{
"epoch": 0.6781386330586524,
"grad_norm": 24.7469667017027,
"learning_rate": 9.525095394276916e-06,
"loss": 2.908522129058838,
"step": 1399
},
{
"epoch": 0.6786233640329618,
"grad_norm": 7.54648287965597,
"learning_rate": 9.523895085414503e-06,
"loss": 2.5075600147247314,
"step": 1400
},
{
"epoch": 0.679108095007271,
"grad_norm": 10.574613294903882,
"learning_rate": 9.522693337415987e-06,
"loss": 2.71517276763916,
"step": 1401
},
{
"epoch": 0.6795928259815802,
"grad_norm": 7.588350729243297,
"learning_rate": 9.521490150663666e-06,
"loss": 2.011288642883301,
"step": 1402
},
{
"epoch": 0.6800775569558895,
"grad_norm": 13.545250969567581,
"learning_rate": 9.5202855255403e-06,
"loss": 2.287952184677124,
"step": 1403
},
{
"epoch": 0.6805622879301988,
"grad_norm": 18.145902295734643,
"learning_rate": 9.519079462429099e-06,
"loss": 3.0985336303710938,
"step": 1404
},
{
"epoch": 0.681047018904508,
"grad_norm": 7.77883619213037,
"learning_rate": 9.517871961713736e-06,
"loss": 2.662289619445801,
"step": 1405
},
{
"epoch": 0.6815317498788173,
"grad_norm": 20.80761817853385,
"learning_rate": 9.516663023778341e-06,
"loss": 3.246368885040283,
"step": 1406
},
{
"epoch": 0.6820164808531265,
"grad_norm": 5.396441033371444,
"learning_rate": 9.515452649007497e-06,
"loss": 2.9325077533721924,
"step": 1407
},
{
"epoch": 0.6825012118274357,
"grad_norm": 28.988672134085192,
"learning_rate": 9.514240837786248e-06,
"loss": 3.0697431564331055,
"step": 1408
},
{
"epoch": 0.6829859428017451,
"grad_norm": 18.366800764454553,
"learning_rate": 9.513027590500096e-06,
"loss": 2.697140693664551,
"step": 1409
},
{
"epoch": 0.6834706737760543,
"grad_norm": 11.343634755333992,
"learning_rate": 9.511812907534995e-06,
"loss": 2.3944616317749023,
"step": 1410
},
{
"epoch": 0.6839554047503635,
"grad_norm": 7.152505988566719,
"learning_rate": 9.51059678927736e-06,
"loss": 2.5330209732055664,
"step": 1411
},
{
"epoch": 0.6844401357246728,
"grad_norm": 6.989155490484811,
"learning_rate": 9.509379236114061e-06,
"loss": 2.793569564819336,
"step": 1412
},
{
"epoch": 0.6849248666989821,
"grad_norm": 11.177411586043737,
"learning_rate": 9.508160248432424e-06,
"loss": 2.8352482318878174,
"step": 1413
},
{
"epoch": 0.6854095976732913,
"grad_norm": 18.21325305796575,
"learning_rate": 9.506939826620231e-06,
"loss": 2.1784019470214844,
"step": 1414
},
{
"epoch": 0.6858943286476006,
"grad_norm": 10.756916393159896,
"learning_rate": 9.505717971065724e-06,
"loss": 2.8084328174591064,
"step": 1415
},
{
"epoch": 0.6863790596219098,
"grad_norm": 9.022593326451933,
"learning_rate": 9.504494682157597e-06,
"loss": 3.161400079727173,
"step": 1416
},
{
"epoch": 0.686863790596219,
"grad_norm": 10.845748190169322,
"learning_rate": 9.503269960284999e-06,
"loss": 2.5980188846588135,
"step": 1417
},
{
"epoch": 0.6873485215705284,
"grad_norm": 9.098062356015276,
"learning_rate": 9.502043805837541e-06,
"loss": 2.387038469314575,
"step": 1418
},
{
"epoch": 0.6878332525448376,
"grad_norm": 13.845513711433648,
"learning_rate": 9.500816219205282e-06,
"loss": 1.4184207916259766,
"step": 1419
},
{
"epoch": 0.6883179835191469,
"grad_norm": 7.659432309405131,
"learning_rate": 9.499587200778744e-06,
"loss": 2.908942222595215,
"step": 1420
},
{
"epoch": 0.6888027144934561,
"grad_norm": 19.688206462747285,
"learning_rate": 9.498356750948896e-06,
"loss": 2.6211698055267334,
"step": 1421
},
{
"epoch": 0.6892874454677654,
"grad_norm": 12.629870926972968,
"learning_rate": 9.497124870107173e-06,
"loss": 2.4190256595611572,
"step": 1422
},
{
"epoch": 0.6897721764420747,
"grad_norm": 7.003351769543691,
"learning_rate": 9.495891558645454e-06,
"loss": 2.7183585166931152,
"step": 1423
},
{
"epoch": 0.6902569074163839,
"grad_norm": 10.099830122865969,
"learning_rate": 9.494656816956083e-06,
"loss": 2.6783268451690674,
"step": 1424
},
{
"epoch": 0.6907416383906931,
"grad_norm": 16.745473414103014,
"learning_rate": 9.493420645431853e-06,
"loss": 2.811492919921875,
"step": 1425
},
{
"epoch": 0.6912263693650024,
"grad_norm": 12.40796620965054,
"learning_rate": 9.492183044466012e-06,
"loss": 2.5731558799743652,
"step": 1426
},
{
"epoch": 0.6917111003393117,
"grad_norm": 24.67273200938859,
"learning_rate": 9.490944014452265e-06,
"loss": 3.1754183769226074,
"step": 1427
},
{
"epoch": 0.692195831313621,
"grad_norm": 7.12767534605436,
"learning_rate": 9.489703555784771e-06,
"loss": 2.942049980163574,
"step": 1428
},
{
"epoch": 0.6926805622879302,
"grad_norm": 12.794637911325323,
"learning_rate": 9.488461668858143e-06,
"loss": 2.6166112422943115,
"step": 1429
},
{
"epoch": 0.6931652932622394,
"grad_norm": 12.63195603195629,
"learning_rate": 9.48721835406745e-06,
"loss": 2.376641273498535,
"step": 1430
},
{
"epoch": 0.6936500242365488,
"grad_norm": 8.567635656819169,
"learning_rate": 9.485973611808213e-06,
"loss": 3.126929998397827,
"step": 1431
},
{
"epoch": 0.694134755210858,
"grad_norm": 12.718357222146166,
"learning_rate": 9.484727442476406e-06,
"loss": 2.5806055068969727,
"step": 1432
},
{
"epoch": 0.6946194861851672,
"grad_norm": 16.979744686020613,
"learning_rate": 9.483479846468462e-06,
"loss": 3.298356056213379,
"step": 1433
},
{
"epoch": 0.6951042171594765,
"grad_norm": 11.805075963033534,
"learning_rate": 9.482230824181261e-06,
"loss": 2.4741060733795166,
"step": 1434
},
{
"epoch": 0.6955889481337858,
"grad_norm": 11.75787341045279,
"learning_rate": 9.480980376012145e-06,
"loss": 2.721428155899048,
"step": 1435
},
{
"epoch": 0.696073679108095,
"grad_norm": 16.16266451488439,
"learning_rate": 9.4797285023589e-06,
"loss": 3.4553561210632324,
"step": 1436
},
{
"epoch": 0.6965584100824043,
"grad_norm": 14.50565351008717,
"learning_rate": 9.478475203619777e-06,
"loss": 2.7529284954071045,
"step": 1437
},
{
"epoch": 0.6970431410567135,
"grad_norm": 12.252318497820003,
"learning_rate": 9.477220480193467e-06,
"loss": 2.835092544555664,
"step": 1438
},
{
"epoch": 0.6975278720310227,
"grad_norm": 9.549903905120376,
"learning_rate": 9.475964332479126e-06,
"loss": 2.9027302265167236,
"step": 1439
},
{
"epoch": 0.6980126030053321,
"grad_norm": 10.170635973775909,
"learning_rate": 9.474706760876356e-06,
"loss": 2.757601022720337,
"step": 1440
},
{
"epoch": 0.6984973339796413,
"grad_norm": 17.394951627672317,
"learning_rate": 9.473447765785215e-06,
"loss": 2.971360445022583,
"step": 1441
},
{
"epoch": 0.6989820649539505,
"grad_norm": 16.192906035104468,
"learning_rate": 9.472187347606213e-06,
"loss": 2.7836265563964844,
"step": 1442
},
{
"epoch": 0.6994667959282598,
"grad_norm": 12.2865552927096,
"learning_rate": 9.470925506740311e-06,
"loss": 2.5990500450134277,
"step": 1443
},
{
"epoch": 0.6999515269025691,
"grad_norm": 16.901460235183958,
"learning_rate": 9.469662243588928e-06,
"loss": 2.877948522567749,
"step": 1444
},
{
"epoch": 0.7004362578768784,
"grad_norm": 17.76655288273471,
"learning_rate": 9.468397558553928e-06,
"loss": 2.8937325477600098,
"step": 1445
},
{
"epoch": 0.7009209888511876,
"grad_norm": 7.6827204796137165,
"learning_rate": 9.467131452037634e-06,
"loss": 2.5177114009857178,
"step": 1446
},
{
"epoch": 0.7014057198254968,
"grad_norm": 11.854389906766784,
"learning_rate": 9.465863924442815e-06,
"loss": 2.956967830657959,
"step": 1447
},
{
"epoch": 0.701890450799806,
"grad_norm": 6.0181849125276115,
"learning_rate": 9.464594976172698e-06,
"loss": 2.526536226272583,
"step": 1448
},
{
"epoch": 0.7023751817741154,
"grad_norm": 7.865929752090386,
"learning_rate": 9.463324607630959e-06,
"loss": 2.524170160293579,
"step": 1449
},
{
"epoch": 0.7028599127484246,
"grad_norm": 18.65733913744807,
"learning_rate": 9.462052819221726e-06,
"loss": 2.753725051879883,
"step": 1450
},
{
"epoch": 0.7033446437227339,
"grad_norm": 10.41695461868547,
"learning_rate": 9.460779611349579e-06,
"loss": 2.560856342315674,
"step": 1451
},
{
"epoch": 0.7038293746970431,
"grad_norm": 9.333914465584542,
"learning_rate": 9.459504984419547e-06,
"loss": 2.8208718299865723,
"step": 1452
},
{
"epoch": 0.7043141056713524,
"grad_norm": 10.964369555209153,
"learning_rate": 9.458228938837118e-06,
"loss": 3.069180488586426,
"step": 1453
},
{
"epoch": 0.7047988366456617,
"grad_norm": 20.147234517658156,
"learning_rate": 9.456951475008218e-06,
"loss": 3.3423216342926025,
"step": 1454
},
{
"epoch": 0.7052835676199709,
"grad_norm": 31.92682865279221,
"learning_rate": 9.455672593339241e-06,
"loss": 3.148364543914795,
"step": 1455
},
{
"epoch": 0.7057682985942801,
"grad_norm": 11.936303065817086,
"learning_rate": 9.454392294237018e-06,
"loss": 2.1257944107055664,
"step": 1456
},
{
"epoch": 0.7062530295685895,
"grad_norm": 10.058431789165564,
"learning_rate": 9.453110578108838e-06,
"loss": 3.174621105194092,
"step": 1457
},
{
"epoch": 0.7067377605428987,
"grad_norm": 10.37746787204219,
"learning_rate": 9.451827445362438e-06,
"loss": 2.1320273876190186,
"step": 1458
},
{
"epoch": 0.707222491517208,
"grad_norm": 10.840960840622042,
"learning_rate": 9.450542896406007e-06,
"loss": 3.0804877281188965,
"step": 1459
},
{
"epoch": 0.7077072224915172,
"grad_norm": 11.109266223840326,
"learning_rate": 9.449256931648185e-06,
"loss": 2.4446563720703125,
"step": 1460
},
{
"epoch": 0.7081919534658264,
"grad_norm": 14.67286114314901,
"learning_rate": 9.447969551498061e-06,
"loss": 2.744884490966797,
"step": 1461
},
{
"epoch": 0.7086766844401358,
"grad_norm": 9.629304294947678,
"learning_rate": 9.446680756365174e-06,
"loss": 1.8127485513687134,
"step": 1462
},
{
"epoch": 0.709161415414445,
"grad_norm": 7.562179372733873,
"learning_rate": 9.445390546659516e-06,
"loss": 2.647463321685791,
"step": 1463
},
{
"epoch": 0.7096461463887542,
"grad_norm": 13.09749173329459,
"learning_rate": 9.444098922791526e-06,
"loss": 2.5864837169647217,
"step": 1464
},
{
"epoch": 0.7101308773630635,
"grad_norm": 9.57821396740512,
"learning_rate": 9.442805885172092e-06,
"loss": 3.0054728984832764,
"step": 1465
},
{
"epoch": 0.7106156083373728,
"grad_norm": 16.038291811751442,
"learning_rate": 9.441511434212556e-06,
"loss": 3.0239531993865967,
"step": 1466
},
{
"epoch": 0.711100339311682,
"grad_norm": 14.371704655690388,
"learning_rate": 9.440215570324706e-06,
"loss": 2.4949238300323486,
"step": 1467
},
{
"epoch": 0.7115850702859913,
"grad_norm": 14.526556024017303,
"learning_rate": 9.438918293920783e-06,
"loss": 2.5292062759399414,
"step": 1468
},
{
"epoch": 0.7120698012603005,
"grad_norm": 11.825738002540033,
"learning_rate": 9.437619605413471e-06,
"loss": 3.2351322174072266,
"step": 1469
},
{
"epoch": 0.7125545322346097,
"grad_norm": 14.505575870586751,
"learning_rate": 9.43631950521591e-06,
"loss": 2.58762788772583,
"step": 1470
},
{
"epoch": 0.7130392632089191,
"grad_norm": 7.917489206834237,
"learning_rate": 9.435017993741686e-06,
"loss": 2.5905556678771973,
"step": 1471
},
{
"epoch": 0.7135239941832283,
"grad_norm": 10.058781547141805,
"learning_rate": 9.433715071404834e-06,
"loss": 2.3822853565216064,
"step": 1472
},
{
"epoch": 0.7140087251575375,
"grad_norm": 16.288825853388925,
"learning_rate": 9.432410738619839e-06,
"loss": 2.7304368019104004,
"step": 1473
},
{
"epoch": 0.7144934561318468,
"grad_norm": 12.192577318456655,
"learning_rate": 9.43110499580163e-06,
"loss": 2.575087547302246,
"step": 1474
},
{
"epoch": 0.7149781871061561,
"grad_norm": 10.832206915460956,
"learning_rate": 9.429797843365594e-06,
"loss": 2.6703062057495117,
"step": 1475
},
{
"epoch": 0.7154629180804654,
"grad_norm": 7.894468625202938,
"learning_rate": 9.428489281727556e-06,
"loss": 2.8191263675689697,
"step": 1476
},
{
"epoch": 0.7159476490547746,
"grad_norm": 11.931291422121499,
"learning_rate": 9.427179311303796e-06,
"loss": 2.258380174636841,
"step": 1477
},
{
"epoch": 0.7164323800290838,
"grad_norm": 8.360941284373734,
"learning_rate": 9.425867932511042e-06,
"loss": 2.599374294281006,
"step": 1478
},
{
"epoch": 0.7169171110033932,
"grad_norm": 16.284629206990893,
"learning_rate": 9.424555145766465e-06,
"loss": 2.6958394050598145,
"step": 1479
},
{
"epoch": 0.7174018419777024,
"grad_norm": 10.006025047903861,
"learning_rate": 9.42324095148769e-06,
"loss": 2.6776390075683594,
"step": 1480
},
{
"epoch": 0.7178865729520116,
"grad_norm": 12.99543891484446,
"learning_rate": 9.421925350092783e-06,
"loss": 2.1604859828948975,
"step": 1481
},
{
"epoch": 0.7183713039263209,
"grad_norm": 11.361717074271807,
"learning_rate": 9.420608342000265e-06,
"loss": 2.9096837043762207,
"step": 1482
},
{
"epoch": 0.7188560349006301,
"grad_norm": 11.653079369301496,
"learning_rate": 9.419289927629101e-06,
"loss": 3.2829113006591797,
"step": 1483
},
{
"epoch": 0.7193407658749394,
"grad_norm": 19.542020421092722,
"learning_rate": 9.417970107398702e-06,
"loss": 2.653435230255127,
"step": 1484
},
{
"epoch": 0.7198254968492487,
"grad_norm": 12.479535613091567,
"learning_rate": 9.41664888172893e-06,
"loss": 3.0862741470336914,
"step": 1485
},
{
"epoch": 0.7203102278235579,
"grad_norm": 7.571189795146019,
"learning_rate": 9.415326251040088e-06,
"loss": 2.735212564468384,
"step": 1486
},
{
"epoch": 0.7207949587978671,
"grad_norm": 6.765460415164223,
"learning_rate": 9.414002215752933e-06,
"loss": 2.131359100341797,
"step": 1487
},
{
"epoch": 0.7212796897721765,
"grad_norm": 12.796417646272495,
"learning_rate": 9.412676776288662e-06,
"loss": 2.976102352142334,
"step": 1488
},
{
"epoch": 0.7217644207464857,
"grad_norm": 9.264478557611339,
"learning_rate": 9.411349933068925e-06,
"loss": 2.7312099933624268,
"step": 1489
},
{
"epoch": 0.722249151720795,
"grad_norm": 8.510554261496221,
"learning_rate": 9.410021686515815e-06,
"loss": 2.8794751167297363,
"step": 1490
},
{
"epoch": 0.7227338826951042,
"grad_norm": 6.274868362148077,
"learning_rate": 9.408692037051874e-06,
"loss": 2.7417521476745605,
"step": 1491
},
{
"epoch": 0.7232186136694134,
"grad_norm": 9.808961472933431,
"learning_rate": 9.407360985100086e-06,
"loss": 2.9287161827087402,
"step": 1492
},
{
"epoch": 0.7237033446437228,
"grad_norm": 12.80973066508866,
"learning_rate": 9.406028531083884e-06,
"loss": 2.7110424041748047,
"step": 1493
},
{
"epoch": 0.724188075618032,
"grad_norm": 12.878743064280307,
"learning_rate": 9.404694675427147e-06,
"loss": 2.527987003326416,
"step": 1494
},
{
"epoch": 0.7246728065923412,
"grad_norm": 11.189759772038013,
"learning_rate": 9.4033594185542e-06,
"loss": 3.0689563751220703,
"step": 1495
},
{
"epoch": 0.7251575375666505,
"grad_norm": 7.9546092528070576,
"learning_rate": 9.402022760889814e-06,
"loss": 2.5669164657592773,
"step": 1496
},
{
"epoch": 0.7256422685409598,
"grad_norm": 10.526162978999194,
"learning_rate": 9.400684702859203e-06,
"loss": 2.812495708465576,
"step": 1497
},
{
"epoch": 0.726126999515269,
"grad_norm": 10.477905798803885,
"learning_rate": 9.39934524488803e-06,
"loss": 2.9409713745117188,
"step": 1498
},
{
"epoch": 0.7266117304895783,
"grad_norm": 9.092084440409124,
"learning_rate": 9.3980043874024e-06,
"loss": 2.9753665924072266,
"step": 1499
},
{
"epoch": 0.7270964614638875,
"grad_norm": 10.9906552581551,
"learning_rate": 9.396662130828869e-06,
"loss": 2.732933521270752,
"step": 1500
},
{
"epoch": 0.7275811924381969,
"grad_norm": 7.836261305447653,
"learning_rate": 9.395318475594428e-06,
"loss": 2.9762020111083984,
"step": 1501
},
{
"epoch": 0.7280659234125061,
"grad_norm": 20.94500291338437,
"learning_rate": 9.393973422126523e-06,
"loss": 2.0075907707214355,
"step": 1502
},
{
"epoch": 0.7285506543868153,
"grad_norm": 14.36192002692805,
"learning_rate": 9.39262697085304e-06,
"loss": 2.9380297660827637,
"step": 1503
},
{
"epoch": 0.7290353853611246,
"grad_norm": 14.02406674581808,
"learning_rate": 9.391279122202313e-06,
"loss": 2.911068916320801,
"step": 1504
},
{
"epoch": 0.7295201163354338,
"grad_norm": 18.348392837589643,
"learning_rate": 9.389929876603112e-06,
"loss": 2.5987472534179688,
"step": 1505
},
{
"epoch": 0.7300048473097431,
"grad_norm": 13.77551242539495,
"learning_rate": 9.388579234484662e-06,
"loss": 2.393489360809326,
"step": 1506
},
{
"epoch": 0.7304895782840524,
"grad_norm": 18.16337001445443,
"learning_rate": 9.387227196276629e-06,
"loss": 2.5087618827819824,
"step": 1507
},
{
"epoch": 0.7309743092583616,
"grad_norm": 7.803325096123903,
"learning_rate": 9.385873762409117e-06,
"loss": 2.7613558769226074,
"step": 1508
},
{
"epoch": 0.7314590402326708,
"grad_norm": 17.659369432891648,
"learning_rate": 9.384518933312681e-06,
"loss": 3.0857369899749756,
"step": 1509
},
{
"epoch": 0.7319437712069802,
"grad_norm": 9.644531675421003,
"learning_rate": 9.38316270941832e-06,
"loss": 2.8036317825317383,
"step": 1510
},
{
"epoch": 0.7324285021812894,
"grad_norm": 11.516107840110363,
"learning_rate": 9.38180509115747e-06,
"loss": 2.9910659790039062,
"step": 1511
},
{
"epoch": 0.7329132331555986,
"grad_norm": 22.458102724268482,
"learning_rate": 9.380446078962016e-06,
"loss": 2.626082420349121,
"step": 1512
},
{
"epoch": 0.7333979641299079,
"grad_norm": 11.650774126996248,
"learning_rate": 9.379085673264288e-06,
"loss": 2.529747724533081,
"step": 1513
},
{
"epoch": 0.7338826951042171,
"grad_norm": 9.462139473292913,
"learning_rate": 9.377723874497055e-06,
"loss": 2.69979190826416,
"step": 1514
},
{
"epoch": 0.7343674260785265,
"grad_norm": 22.584423500674916,
"learning_rate": 9.37636068309353e-06,
"loss": 2.714998483657837,
"step": 1515
},
{
"epoch": 0.7348521570528357,
"grad_norm": 6.928040185917785,
"learning_rate": 9.37499609948737e-06,
"loss": 2.742631435394287,
"step": 1516
},
{
"epoch": 0.7353368880271449,
"grad_norm": 9.08750848753828,
"learning_rate": 9.373630124112676e-06,
"loss": 2.5681686401367188,
"step": 1517
},
{
"epoch": 0.7358216190014542,
"grad_norm": 9.968238701069737,
"learning_rate": 9.372262757403991e-06,
"loss": 2.6886332035064697,
"step": 1518
},
{
"epoch": 0.7363063499757635,
"grad_norm": 10.776971723836098,
"learning_rate": 9.370893999796296e-06,
"loss": 2.810791015625,
"step": 1519
},
{
"epoch": 0.7367910809500727,
"grad_norm": 12.000219035635963,
"learning_rate": 9.369523851725024e-06,
"loss": 2.5394554138183594,
"step": 1520
},
{
"epoch": 0.737275811924382,
"grad_norm": 8.585546321825847,
"learning_rate": 9.368152313626041e-06,
"loss": 2.369260549545288,
"step": 1521
},
{
"epoch": 0.7377605428986912,
"grad_norm": 8.774573732285196,
"learning_rate": 9.36677938593566e-06,
"loss": 2.8716447353363037,
"step": 1522
},
{
"epoch": 0.7382452738730004,
"grad_norm": 7.606377278659761,
"learning_rate": 9.365405069090637e-06,
"loss": 2.919455051422119,
"step": 1523
},
{
"epoch": 0.7387300048473098,
"grad_norm": 14.478529627554593,
"learning_rate": 9.364029363528164e-06,
"loss": 2.9222073554992676,
"step": 1524
},
{
"epoch": 0.739214735821619,
"grad_norm": 5.295982074602412,
"learning_rate": 9.362652269685881e-06,
"loss": 2.8216776847839355,
"step": 1525
},
{
"epoch": 0.7396994667959282,
"grad_norm": 22.480767592469082,
"learning_rate": 9.36127378800187e-06,
"loss": 2.5013017654418945,
"step": 1526
},
{
"epoch": 0.7401841977702375,
"grad_norm": 6.64269078983094,
"learning_rate": 9.35989391891465e-06,
"loss": 2.3291432857513428,
"step": 1527
},
{
"epoch": 0.7406689287445468,
"grad_norm": 19.14038608234103,
"learning_rate": 9.35851266286318e-06,
"loss": 3.181480884552002,
"step": 1528
},
{
"epoch": 0.741153659718856,
"grad_norm": 7.67784761974008,
"learning_rate": 9.357130020286867e-06,
"loss": 2.9565939903259277,
"step": 1529
},
{
"epoch": 0.7416383906931653,
"grad_norm": 22.46564286423051,
"learning_rate": 9.355745991625556e-06,
"loss": 2.7754907608032227,
"step": 1530
},
{
"epoch": 0.7421231216674745,
"grad_norm": 15.32874349462064,
"learning_rate": 9.354360577319532e-06,
"loss": 2.9508121013641357,
"step": 1531
},
{
"epoch": 0.7426078526417839,
"grad_norm": 10.055894245397035,
"learning_rate": 9.35297377780952e-06,
"loss": 2.8062567710876465,
"step": 1532
},
{
"epoch": 0.7430925836160931,
"grad_norm": 14.159066894984795,
"learning_rate": 9.351585593536689e-06,
"loss": 2.4076309204101562,
"step": 1533
},
{
"epoch": 0.7435773145904023,
"grad_norm": 9.452988832395027,
"learning_rate": 9.350196024942643e-06,
"loss": 2.683483839035034,
"step": 1534
},
{
"epoch": 0.7440620455647116,
"grad_norm": 8.863470456561895,
"learning_rate": 9.348805072469435e-06,
"loss": 2.4847021102905273,
"step": 1535
},
{
"epoch": 0.7445467765390208,
"grad_norm": 11.351374076154041,
"learning_rate": 9.34741273655955e-06,
"loss": 2.8761191368103027,
"step": 1536
},
{
"epoch": 0.7450315075133301,
"grad_norm": 7.2514738122163465,
"learning_rate": 9.346019017655916e-06,
"loss": 2.43723726272583,
"step": 1537
},
{
"epoch": 0.7455162384876394,
"grad_norm": 11.9565443474616,
"learning_rate": 9.344623916201903e-06,
"loss": 2.739774465560913,
"step": 1538
},
{
"epoch": 0.7460009694619486,
"grad_norm": 11.811830490470266,
"learning_rate": 9.343227432641317e-06,
"loss": 2.7188758850097656,
"step": 1539
},
{
"epoch": 0.7464857004362578,
"grad_norm": 12.71318797953747,
"learning_rate": 9.341829567418407e-06,
"loss": 2.6589345932006836,
"step": 1540
},
{
"epoch": 0.7469704314105672,
"grad_norm": 8.853939934573862,
"learning_rate": 9.34043032097786e-06,
"loss": 2.7700717449188232,
"step": 1541
},
{
"epoch": 0.7474551623848764,
"grad_norm": 11.95013632295872,
"learning_rate": 9.339029693764801e-06,
"loss": 2.8544650077819824,
"step": 1542
},
{
"epoch": 0.7479398933591856,
"grad_norm": 15.328626597432597,
"learning_rate": 9.3376276862248e-06,
"loss": 3.1909828186035156,
"step": 1543
},
{
"epoch": 0.7484246243334949,
"grad_norm": 11.37896357634448,
"learning_rate": 9.336224298803857e-06,
"loss": 3.4569053649902344,
"step": 1544
},
{
"epoch": 0.7489093553078041,
"grad_norm": 8.57506180438207,
"learning_rate": 9.334819531948418e-06,
"loss": 2.2693538665771484,
"step": 1545
},
{
"epoch": 0.7493940862821135,
"grad_norm": 14.406113038268199,
"learning_rate": 9.333413386105368e-06,
"loss": 2.9066381454467773,
"step": 1546
},
{
"epoch": 0.7498788172564227,
"grad_norm": 10.112192307958107,
"learning_rate": 9.332005861722025e-06,
"loss": 2.581667423248291,
"step": 1547
},
{
"epoch": 0.7503635482307319,
"grad_norm": 14.12985982249773,
"learning_rate": 9.33059695924615e-06,
"loss": 2.65659761428833,
"step": 1548
},
{
"epoch": 0.7508482792050412,
"grad_norm": 20.22913676524115,
"learning_rate": 9.329186679125942e-06,
"loss": 2.801243782043457,
"step": 1549
},
{
"epoch": 0.7513330101793505,
"grad_norm": 6.701692862388492,
"learning_rate": 9.327775021810037e-06,
"loss": 2.7834229469299316,
"step": 1550
},
{
"epoch": 0.7518177411536597,
"grad_norm": 10.577470402698939,
"learning_rate": 9.326361987747511e-06,
"loss": 2.8022191524505615,
"step": 1551
},
{
"epoch": 0.752302472127969,
"grad_norm": 24.057569535397334,
"learning_rate": 9.324947577387875e-06,
"loss": 3.0937247276306152,
"step": 1552
},
{
"epoch": 0.7527872031022782,
"grad_norm": 11.196810718893433,
"learning_rate": 9.323531791181082e-06,
"loss": 3.1358888149261475,
"step": 1553
},
{
"epoch": 0.7532719340765875,
"grad_norm": 10.94333855742144,
"learning_rate": 9.322114629577516e-06,
"loss": 2.4239554405212402,
"step": 1554
},
{
"epoch": 0.7537566650508968,
"grad_norm": 7.309843930237911,
"learning_rate": 9.320696093028009e-06,
"loss": 3.085015296936035,
"step": 1555
},
{
"epoch": 0.754241396025206,
"grad_norm": 22.580220778316388,
"learning_rate": 9.319276181983818e-06,
"loss": 3.1415982246398926,
"step": 1556
},
{
"epoch": 0.7547261269995152,
"grad_norm": 21.795195626339087,
"learning_rate": 9.317854896896645e-06,
"loss": 2.700037956237793,
"step": 1557
},
{
"epoch": 0.7552108579738245,
"grad_norm": 6.969152178940745,
"learning_rate": 9.316432238218628e-06,
"loss": 2.2683050632476807,
"step": 1558
},
{
"epoch": 0.7556955889481338,
"grad_norm": 9.230825553415388,
"learning_rate": 9.315008206402346e-06,
"loss": 2.677746295928955,
"step": 1559
},
{
"epoch": 0.7561803199224431,
"grad_norm": 10.034990488560302,
"learning_rate": 9.313582801900802e-06,
"loss": 2.3306241035461426,
"step": 1560
},
{
"epoch": 0.7566650508967523,
"grad_norm": 12.386983913569354,
"learning_rate": 9.31215602516745e-06,
"loss": 2.77996563911438,
"step": 1561
},
{
"epoch": 0.7571497818710615,
"grad_norm": 6.850422340735002,
"learning_rate": 9.310727876656172e-06,
"loss": 3.4691553115844727,
"step": 1562
},
{
"epoch": 0.7576345128453709,
"grad_norm": 19.048895193449987,
"learning_rate": 9.30929835682129e-06,
"loss": 2.9749889373779297,
"step": 1563
},
{
"epoch": 0.7581192438196801,
"grad_norm": 17.896376681983657,
"learning_rate": 9.307867466117561e-06,
"loss": 2.3366074562072754,
"step": 1564
},
{
"epoch": 0.7586039747939893,
"grad_norm": 13.024865018748457,
"learning_rate": 9.306435205000178e-06,
"loss": 2.8074111938476562,
"step": 1565
},
{
"epoch": 0.7590887057682986,
"grad_norm": 8.393353075721103,
"learning_rate": 9.305001573924769e-06,
"loss": 2.9515299797058105,
"step": 1566
},
{
"epoch": 0.7595734367426078,
"grad_norm": 10.493783437919967,
"learning_rate": 9.3035665733474e-06,
"loss": 2.4140520095825195,
"step": 1567
},
{
"epoch": 0.7600581677169171,
"grad_norm": 25.010682520646053,
"learning_rate": 9.302130203724574e-06,
"loss": 2.960160255432129,
"step": 1568
},
{
"epoch": 0.7605428986912264,
"grad_norm": 9.482727985312442,
"learning_rate": 9.300692465513223e-06,
"loss": 3.0124671459198,
"step": 1569
},
{
"epoch": 0.7610276296655356,
"grad_norm": 11.862280442099403,
"learning_rate": 9.299253359170722e-06,
"loss": 2.5204708576202393,
"step": 1570
},
{
"epoch": 0.7615123606398448,
"grad_norm": 14.355061015765683,
"learning_rate": 9.297812885154875e-06,
"loss": 3.053539752960205,
"step": 1571
},
{
"epoch": 0.7619970916141542,
"grad_norm": 10.500103269143905,
"learning_rate": 9.296371043923925e-06,
"loss": 2.3664186000823975,
"step": 1572
},
{
"epoch": 0.7624818225884634,
"grad_norm": 10.546804466102055,
"learning_rate": 9.29492783593655e-06,
"loss": 2.3901124000549316,
"step": 1573
},
{
"epoch": 0.7629665535627727,
"grad_norm": 7.970376052523777,
"learning_rate": 9.29348326165186e-06,
"loss": 2.8535256385803223,
"step": 1574
},
{
"epoch": 0.7634512845370819,
"grad_norm": 7.992841583011151,
"learning_rate": 9.292037321529404e-06,
"loss": 2.8557896614074707,
"step": 1575
},
{
"epoch": 0.7639360155113912,
"grad_norm": 25.8510636948795,
"learning_rate": 9.290590016029158e-06,
"loss": 1.8093364238739014,
"step": 1576
},
{
"epoch": 0.7644207464857005,
"grad_norm": 7.412146365833598,
"learning_rate": 9.289141345611541e-06,
"loss": 2.5356040000915527,
"step": 1577
},
{
"epoch": 0.7649054774600097,
"grad_norm": 10.67268303617549,
"learning_rate": 9.2876913107374e-06,
"loss": 2.538668632507324,
"step": 1578
},
{
"epoch": 0.7653902084343189,
"grad_norm": 9.641494697110009,
"learning_rate": 9.28623991186802e-06,
"loss": 2.6523380279541016,
"step": 1579
},
{
"epoch": 0.7658749394086282,
"grad_norm": 11.494126079837779,
"learning_rate": 9.284787149465119e-06,
"loss": 2.7265849113464355,
"step": 1580
},
{
"epoch": 0.7663596703829375,
"grad_norm": 14.226391367317746,
"learning_rate": 9.283333023990844e-06,
"loss": 2.5436153411865234,
"step": 1581
},
{
"epoch": 0.7668444013572467,
"grad_norm": 20.78756054714505,
"learning_rate": 9.281877535907785e-06,
"loss": 2.5216355323791504,
"step": 1582
},
{
"epoch": 0.767329132331556,
"grad_norm": 21.653324887415163,
"learning_rate": 9.280420685678958e-06,
"loss": 2.535581111907959,
"step": 1583
},
{
"epoch": 0.7678138633058652,
"grad_norm": 13.806231162196116,
"learning_rate": 9.278962473767811e-06,
"loss": 3.2980897426605225,
"step": 1584
},
{
"epoch": 0.7682985942801746,
"grad_norm": 23.74595318694819,
"learning_rate": 9.277502900638233e-06,
"loss": 2.6614980697631836,
"step": 1585
},
{
"epoch": 0.7687833252544838,
"grad_norm": 8.158943285776353,
"learning_rate": 9.27604196675454e-06,
"loss": 2.6015067100524902,
"step": 1586
},
{
"epoch": 0.769268056228793,
"grad_norm": 7.248732982119414,
"learning_rate": 9.274579672581482e-06,
"loss": 2.849276065826416,
"step": 1587
},
{
"epoch": 0.7697527872031023,
"grad_norm": 9.137563956952814,
"learning_rate": 9.273116018584245e-06,
"loss": 3.060957908630371,
"step": 1588
},
{
"epoch": 0.7702375181774115,
"grad_norm": 14.181275039842706,
"learning_rate": 9.271651005228442e-06,
"loss": 3.0366504192352295,
"step": 1589
},
{
"epoch": 0.7707222491517208,
"grad_norm": 10.140430342450827,
"learning_rate": 9.270184632980121e-06,
"loss": 3.1342520713806152,
"step": 1590
},
{
"epoch": 0.7712069801260301,
"grad_norm": 8.916764025304362,
"learning_rate": 9.268716902305766e-06,
"loss": 2.7974772453308105,
"step": 1591
},
{
"epoch": 0.7716917111003393,
"grad_norm": 9.11334763250776,
"learning_rate": 9.267247813672287e-06,
"loss": 2.7843494415283203,
"step": 1592
},
{
"epoch": 0.7721764420746485,
"grad_norm": 16.900751075658185,
"learning_rate": 9.26577736754703e-06,
"loss": 2.622519016265869,
"step": 1593
},
{
"epoch": 0.7726611730489579,
"grad_norm": 7.372491258340839,
"learning_rate": 9.264305564397769e-06,
"loss": 2.4944875240325928,
"step": 1594
},
{
"epoch": 0.7731459040232671,
"grad_norm": 20.893054208785767,
"learning_rate": 9.262832404692714e-06,
"loss": 2.9884276390075684,
"step": 1595
},
{
"epoch": 0.7736306349975763,
"grad_norm": 8.09424794677086,
"learning_rate": 9.261357888900507e-06,
"loss": 2.494638442993164,
"step": 1596
},
{
"epoch": 0.7741153659718856,
"grad_norm": 15.083027552069902,
"learning_rate": 9.259882017490216e-06,
"loss": 2.8210558891296387,
"step": 1597
},
{
"epoch": 0.7746000969461949,
"grad_norm": 10.448809749361791,
"learning_rate": 9.258404790931343e-06,
"loss": 2.9193553924560547,
"step": 1598
},
{
"epoch": 0.7750848279205041,
"grad_norm": 14.652503688021946,
"learning_rate": 9.256926209693823e-06,
"loss": 2.6584157943725586,
"step": 1599
},
{
"epoch": 0.7755695588948134,
"grad_norm": 20.34657031242411,
"learning_rate": 9.255446274248023e-06,
"loss": 2.9523706436157227,
"step": 1600
},
{
"epoch": 0.7760542898691226,
"grad_norm": 10.264113714394897,
"learning_rate": 9.253964985064736e-06,
"loss": 2.683238983154297,
"step": 1601
},
{
"epoch": 0.7765390208434318,
"grad_norm": 8.587172546677975,
"learning_rate": 9.252482342615185e-06,
"loss": 2.6980133056640625,
"step": 1602
},
{
"epoch": 0.7770237518177412,
"grad_norm": 10.315217209905363,
"learning_rate": 9.250998347371032e-06,
"loss": 3.031913995742798,
"step": 1603
},
{
"epoch": 0.7775084827920504,
"grad_norm": 16.08077288679048,
"learning_rate": 9.249512999804361e-06,
"loss": 2.712409496307373,
"step": 1604
},
{
"epoch": 0.7779932137663597,
"grad_norm": 11.841491589854222,
"learning_rate": 9.248026300387688e-06,
"loss": 2.7710776329040527,
"step": 1605
},
{
"epoch": 0.7784779447406689,
"grad_norm": 7.356872248690035,
"learning_rate": 9.246538249593961e-06,
"loss": 2.976235866546631,
"step": 1606
},
{
"epoch": 0.7789626757149782,
"grad_norm": 16.80051822118692,
"learning_rate": 9.245048847896558e-06,
"loss": 2.780235528945923,
"step": 1607
},
{
"epoch": 0.7794474066892875,
"grad_norm": 12.308898916436938,
"learning_rate": 9.243558095769285e-06,
"loss": 3.0532784461975098,
"step": 1608
},
{
"epoch": 0.7799321376635967,
"grad_norm": 31.107341678956995,
"learning_rate": 9.242065993686375e-06,
"loss": 3.10501766204834,
"step": 1609
},
{
"epoch": 0.7804168686379059,
"grad_norm": 11.231917172836374,
"learning_rate": 9.240572542122502e-06,
"loss": 2.747856855392456,
"step": 1610
},
{
"epoch": 0.7809015996122152,
"grad_norm": 7.6781143250708785,
"learning_rate": 9.23907774155275e-06,
"loss": 2.6217665672302246,
"step": 1611
},
{
"epoch": 0.7813863305865245,
"grad_norm": 13.825967459816688,
"learning_rate": 9.237581592452654e-06,
"loss": 2.588951587677002,
"step": 1612
},
{
"epoch": 0.7818710615608337,
"grad_norm": 10.103112687717726,
"learning_rate": 9.23608409529816e-06,
"loss": 3.048114538192749,
"step": 1613
},
{
"epoch": 0.782355792535143,
"grad_norm": 14.67033925247957,
"learning_rate": 9.234585250565651e-06,
"loss": 2.822618007659912,
"step": 1614
},
{
"epoch": 0.7828405235094522,
"grad_norm": 9.563607442539716,
"learning_rate": 9.23308505873194e-06,
"loss": 2.829341411590576,
"step": 1615
},
{
"epoch": 0.7833252544837616,
"grad_norm": 15.896332110417754,
"learning_rate": 9.231583520274265e-06,
"loss": 2.4976329803466797,
"step": 1616
},
{
"epoch": 0.7838099854580708,
"grad_norm": 9.499361702706048,
"learning_rate": 9.230080635670294e-06,
"loss": 2.557704210281372,
"step": 1617
},
{
"epoch": 0.78429471643238,
"grad_norm": 7.927334345918185,
"learning_rate": 9.228576405398121e-06,
"loss": 2.462937355041504,
"step": 1618
},
{
"epoch": 0.7847794474066893,
"grad_norm": 36.254063822545376,
"learning_rate": 9.227070829936273e-06,
"loss": 3.542135238647461,
"step": 1619
},
{
"epoch": 0.7852641783809986,
"grad_norm": 21.1893967844636,
"learning_rate": 9.225563909763701e-06,
"loss": 3.357707977294922,
"step": 1620
},
{
"epoch": 0.7857489093553078,
"grad_norm": 12.910556191627121,
"learning_rate": 9.224055645359783e-06,
"loss": 2.1481127738952637,
"step": 1621
},
{
"epoch": 0.7862336403296171,
"grad_norm": 9.181177327930193,
"learning_rate": 9.222546037204327e-06,
"loss": 2.7332916259765625,
"step": 1622
},
{
"epoch": 0.7867183713039263,
"grad_norm": 14.090426852326273,
"learning_rate": 9.22103508577757e-06,
"loss": 2.662663459777832,
"step": 1623
},
{
"epoch": 0.7872031022782355,
"grad_norm": 8.194647122428213,
"learning_rate": 9.219522791560169e-06,
"loss": 3.158961296081543,
"step": 1624
},
{
"epoch": 0.7876878332525449,
"grad_norm": 9.193113425764453,
"learning_rate": 9.218009155033218e-06,
"loss": 2.4406208992004395,
"step": 1625
},
{
"epoch": 0.7881725642268541,
"grad_norm": 11.987920822673999,
"learning_rate": 9.216494176678233e-06,
"loss": 2.4097015857696533,
"step": 1626
},
{
"epoch": 0.7886572952011633,
"grad_norm": 16.370672004098104,
"learning_rate": 9.214977856977152e-06,
"loss": 3.484684705734253,
"step": 1627
},
{
"epoch": 0.7891420261754726,
"grad_norm": 19.54535807038985,
"learning_rate": 9.213460196412351e-06,
"loss": 3.3559200763702393,
"step": 1628
},
{
"epoch": 0.7896267571497819,
"grad_norm": 15.782648369137535,
"learning_rate": 9.211941195466623e-06,
"loss": 3.006683588027954,
"step": 1629
},
{
"epoch": 0.7901114881240912,
"grad_norm": 20.550002534383506,
"learning_rate": 9.210420854623191e-06,
"loss": 3.3474390506744385,
"step": 1630
},
{
"epoch": 0.7905962190984004,
"grad_norm": 13.423152766012059,
"learning_rate": 9.208899174365708e-06,
"loss": 2.353362560272217,
"step": 1631
},
{
"epoch": 0.7910809500727096,
"grad_norm": 27.324451712932397,
"learning_rate": 9.207376155178242e-06,
"loss": 2.728178024291992,
"step": 1632
},
{
"epoch": 0.7915656810470189,
"grad_norm": 16.457909734025886,
"learning_rate": 9.2058517975453e-06,
"loss": 2.0382869243621826,
"step": 1633
},
{
"epoch": 0.7920504120213282,
"grad_norm": 16.342678090288686,
"learning_rate": 9.204326101951807e-06,
"loss": 2.9014203548431396,
"step": 1634
},
{
"epoch": 0.7925351429956374,
"grad_norm": 12.81877281901092,
"learning_rate": 9.202799068883113e-06,
"loss": 2.232778787612915,
"step": 1635
},
{
"epoch": 0.7930198739699467,
"grad_norm": 11.431094082732987,
"learning_rate": 9.201270698824998e-06,
"loss": 2.9244284629821777,
"step": 1636
},
{
"epoch": 0.7935046049442559,
"grad_norm": 14.929830693338383,
"learning_rate": 9.199740992263667e-06,
"loss": 2.8643462657928467,
"step": 1637
},
{
"epoch": 0.7939893359185652,
"grad_norm": 8.901004640089536,
"learning_rate": 9.198209949685745e-06,
"loss": 2.7304718494415283,
"step": 1638
},
{
"epoch": 0.7944740668928745,
"grad_norm": 12.11203147934254,
"learning_rate": 9.196677571578288e-06,
"loss": 2.9058027267456055,
"step": 1639
},
{
"epoch": 0.7949587978671837,
"grad_norm": 15.448166746831385,
"learning_rate": 9.195143858428773e-06,
"loss": 2.99456787109375,
"step": 1640
},
{
"epoch": 0.7954435288414929,
"grad_norm": 17.06610947548166,
"learning_rate": 9.193608810725105e-06,
"loss": 2.9785256385803223,
"step": 1641
},
{
"epoch": 0.7959282598158022,
"grad_norm": 18.4056893975679,
"learning_rate": 9.192072428955607e-06,
"loss": 3.5478134155273438,
"step": 1642
},
{
"epoch": 0.7964129907901115,
"grad_norm": 6.837353816511932,
"learning_rate": 9.190534713609037e-06,
"loss": 1.095738172531128,
"step": 1643
},
{
"epoch": 0.7968977217644208,
"grad_norm": 11.602238811757061,
"learning_rate": 9.188995665174566e-06,
"loss": 2.7502434253692627,
"step": 1644
},
{
"epoch": 0.79738245273873,
"grad_norm": 16.08684744061828,
"learning_rate": 9.187455284141798e-06,
"loss": 2.7178263664245605,
"step": 1645
},
{
"epoch": 0.7978671837130392,
"grad_norm": 10.680031207156585,
"learning_rate": 9.185913571000753e-06,
"loss": 2.694974422454834,
"step": 1646
},
{
"epoch": 0.7983519146873486,
"grad_norm": 11.076771563934585,
"learning_rate": 9.184370526241884e-06,
"loss": 3.618739604949951,
"step": 1647
},
{
"epoch": 0.7988366456616578,
"grad_norm": 7.989338585916179,
"learning_rate": 9.182826150356059e-06,
"loss": 2.5960960388183594,
"step": 1648
},
{
"epoch": 0.799321376635967,
"grad_norm": 10.406476302809184,
"learning_rate": 9.181280443834573e-06,
"loss": 2.7052226066589355,
"step": 1649
},
{
"epoch": 0.7998061076102763,
"grad_norm": 10.463760052102282,
"learning_rate": 9.179733407169146e-06,
"loss": 2.841214179992676,
"step": 1650
},
{
"epoch": 0.8002908385845856,
"grad_norm": 9.095338017147574,
"learning_rate": 9.17818504085192e-06,
"loss": 2.793886423110962,
"step": 1651
},
{
"epoch": 0.8007755695588948,
"grad_norm": 7.637493097985883,
"learning_rate": 9.176635345375455e-06,
"loss": 2.1593971252441406,
"step": 1652
},
{
"epoch": 0.8012603005332041,
"grad_norm": 14.432978404357012,
"learning_rate": 9.175084321232743e-06,
"loss": 3.1084089279174805,
"step": 1653
},
{
"epoch": 0.8017450315075133,
"grad_norm": 8.529910759512017,
"learning_rate": 9.173531968917195e-06,
"loss": 2.588754177093506,
"step": 1654
},
{
"epoch": 0.8022297624818225,
"grad_norm": 6.989286936943219,
"learning_rate": 9.171978288922638e-06,
"loss": 2.835556983947754,
"step": 1655
},
{
"epoch": 0.8027144934561319,
"grad_norm": 7.951846397329911,
"learning_rate": 9.17042328174333e-06,
"loss": 2.766777515411377,
"step": 1656
},
{
"epoch": 0.8031992244304411,
"grad_norm": 15.738335101336524,
"learning_rate": 9.16886694787395e-06,
"loss": 2.406235694885254,
"step": 1657
},
{
"epoch": 0.8036839554047503,
"grad_norm": 7.761366775737718,
"learning_rate": 9.167309287809592e-06,
"loss": 2.7203879356384277,
"step": 1658
},
{
"epoch": 0.8041686863790596,
"grad_norm": 6.921354773713193,
"learning_rate": 9.165750302045782e-06,
"loss": 2.666314125061035,
"step": 1659
},
{
"epoch": 0.8046534173533689,
"grad_norm": 9.657211831079346,
"learning_rate": 9.164189991078458e-06,
"loss": 2.7674028873443604,
"step": 1660
},
{
"epoch": 0.8051381483276782,
"grad_norm": 8.612111669177668,
"learning_rate": 9.162628355403989e-06,
"loss": 2.4518837928771973,
"step": 1661
},
{
"epoch": 0.8056228793019874,
"grad_norm": 21.37635512823088,
"learning_rate": 9.161065395519157e-06,
"loss": 3.200061321258545,
"step": 1662
},
{
"epoch": 0.8061076102762966,
"grad_norm": 10.620830177477604,
"learning_rate": 9.15950111192117e-06,
"loss": 2.3332769870758057,
"step": 1663
},
{
"epoch": 0.8065923412506059,
"grad_norm": 11.519551902328624,
"learning_rate": 9.157935505107657e-06,
"loss": 2.466541051864624,
"step": 1664
},
{
"epoch": 0.8070770722249152,
"grad_norm": 11.373076497848729,
"learning_rate": 9.156368575576667e-06,
"loss": 2.2016377449035645,
"step": 1665
},
{
"epoch": 0.8075618031992244,
"grad_norm": 9.1593536558205,
"learning_rate": 9.15480032382667e-06,
"loss": 2.8498589992523193,
"step": 1666
},
{
"epoch": 0.8080465341735337,
"grad_norm": 9.28315241150134,
"learning_rate": 9.153230750356555e-06,
"loss": 3.0351996421813965,
"step": 1667
},
{
"epoch": 0.8085312651478429,
"grad_norm": 18.821872730402426,
"learning_rate": 9.151659855665631e-06,
"loss": 2.99627685546875,
"step": 1668
},
{
"epoch": 0.8090159961221522,
"grad_norm": 10.19775873464429,
"learning_rate": 9.150087640253633e-06,
"loss": 2.8756134510040283,
"step": 1669
},
{
"epoch": 0.8095007270964615,
"grad_norm": 10.612380656164392,
"learning_rate": 9.148514104620711e-06,
"loss": 2.883441686630249,
"step": 1670
},
{
"epoch": 0.8099854580707707,
"grad_norm": 10.367269850015859,
"learning_rate": 9.146939249267435e-06,
"loss": 2.8718788623809814,
"step": 1671
},
{
"epoch": 0.81047018904508,
"grad_norm": 8.342122102666567,
"learning_rate": 9.145363074694797e-06,
"loss": 2.858919858932495,
"step": 1672
},
{
"epoch": 0.8109549200193893,
"grad_norm": 18.532448086091488,
"learning_rate": 9.14378558140421e-06,
"loss": 2.908618927001953,
"step": 1673
},
{
"epoch": 0.8114396509936985,
"grad_norm": 16.146387343082303,
"learning_rate": 9.1422067698975e-06,
"loss": 2.9479475021362305,
"step": 1674
},
{
"epoch": 0.8119243819680078,
"grad_norm": 15.058577623255852,
"learning_rate": 9.14062664067692e-06,
"loss": 2.9279379844665527,
"step": 1675
},
{
"epoch": 0.812409112942317,
"grad_norm": 11.301797034211669,
"learning_rate": 9.139045194245138e-06,
"loss": 2.8552093505859375,
"step": 1676
},
{
"epoch": 0.8128938439166262,
"grad_norm": 14.138124159148797,
"learning_rate": 9.13746243110524e-06,
"loss": 3.1762681007385254,
"step": 1677
},
{
"epoch": 0.8133785748909356,
"grad_norm": 7.911924498936774,
"learning_rate": 9.135878351760735e-06,
"loss": 2.965045213699341,
"step": 1678
},
{
"epoch": 0.8138633058652448,
"grad_norm": 9.751468493784389,
"learning_rate": 9.134292956715547e-06,
"loss": 3.071866750717163,
"step": 1679
},
{
"epoch": 0.814348036839554,
"grad_norm": 10.076849636912216,
"learning_rate": 9.13270624647402e-06,
"loss": 2.492373466491699,
"step": 1680
},
{
"epoch": 0.8148327678138633,
"grad_norm": 15.580252658737225,
"learning_rate": 9.131118221540919e-06,
"loss": 2.3612875938415527,
"step": 1681
},
{
"epoch": 0.8153174987881726,
"grad_norm": 9.92197147628149,
"learning_rate": 9.129528882421421e-06,
"loss": 2.208688259124756,
"step": 1682
},
{
"epoch": 0.8158022297624818,
"grad_norm": 7.569205082113494,
"learning_rate": 9.127938229621127e-06,
"loss": 2.6175284385681152,
"step": 1683
},
{
"epoch": 0.8162869607367911,
"grad_norm": 7.3112454558745705,
"learning_rate": 9.126346263646052e-06,
"loss": 2.630100965499878,
"step": 1684
},
{
"epoch": 0.8167716917111003,
"grad_norm": 7.831094974253712,
"learning_rate": 9.124752985002632e-06,
"loss": 2.7231063842773438,
"step": 1685
},
{
"epoch": 0.8172564226854095,
"grad_norm": 10.420058169463463,
"learning_rate": 9.123158394197717e-06,
"loss": 3.1031088829040527,
"step": 1686
},
{
"epoch": 0.8177411536597189,
"grad_norm": 14.096667959765128,
"learning_rate": 9.12156249173858e-06,
"loss": 2.465496301651001,
"step": 1687
},
{
"epoch": 0.8182258846340281,
"grad_norm": 10.681426246431533,
"learning_rate": 9.119965278132902e-06,
"loss": 3.323312759399414,
"step": 1688
},
{
"epoch": 0.8187106156083374,
"grad_norm": 11.556504035949851,
"learning_rate": 9.11836675388879e-06,
"loss": 2.8400731086730957,
"step": 1689
},
{
"epoch": 0.8191953465826466,
"grad_norm": 8.428742574920562,
"learning_rate": 9.116766919514765e-06,
"loss": 2.638695478439331,
"step": 1690
},
{
"epoch": 0.8196800775569559,
"grad_norm": 20.609452072965855,
"learning_rate": 9.115165775519763e-06,
"loss": 5.321481704711914,
"step": 1691
},
{
"epoch": 0.8201648085312652,
"grad_norm": 10.97444885927245,
"learning_rate": 9.11356332241314e-06,
"loss": 2.930129289627075,
"step": 1692
},
{
"epoch": 0.8206495395055744,
"grad_norm": 11.413770094798986,
"learning_rate": 9.111959560704664e-06,
"loss": 2.8815276622772217,
"step": 1693
},
{
"epoch": 0.8211342704798836,
"grad_norm": 12.716743349068464,
"learning_rate": 9.110354490904524e-06,
"loss": 2.5687026977539062,
"step": 1694
},
{
"epoch": 0.821619001454193,
"grad_norm": 30.536946109315263,
"learning_rate": 9.10874811352332e-06,
"loss": 2.715296983718872,
"step": 1695
},
{
"epoch": 0.8221037324285022,
"grad_norm": 6.7473692790144115,
"learning_rate": 9.107140429072074e-06,
"loss": 2.0419819355010986,
"step": 1696
},
{
"epoch": 0.8225884634028114,
"grad_norm": 8.011891508133395,
"learning_rate": 9.105531438062219e-06,
"loss": 2.347334861755371,
"step": 1697
},
{
"epoch": 0.8230731943771207,
"grad_norm": 8.358491492949355,
"learning_rate": 9.103921141005604e-06,
"loss": 3.01957106590271,
"step": 1698
},
{
"epoch": 0.8235579253514299,
"grad_norm": 8.188582327968023,
"learning_rate": 9.102309538414498e-06,
"loss": 2.7389798164367676,
"step": 1699
},
{
"epoch": 0.8240426563257393,
"grad_norm": 10.475520403078619,
"learning_rate": 9.10069663080158e-06,
"loss": 3.0844790935516357,
"step": 1700
},
{
"epoch": 0.8245273873000485,
"grad_norm": 7.360136033473291,
"learning_rate": 9.099082418679946e-06,
"loss": 2.494316577911377,
"step": 1701
},
{
"epoch": 0.8250121182743577,
"grad_norm": 14.383829718800536,
"learning_rate": 9.097466902563108e-06,
"loss": 1.266149878501892,
"step": 1702
},
{
"epoch": 0.825496849248667,
"grad_norm": 11.856390756637337,
"learning_rate": 9.095850082964994e-06,
"loss": 2.8569297790527344,
"step": 1703
},
{
"epoch": 0.8259815802229763,
"grad_norm": 25.931619349777584,
"learning_rate": 9.09423196039994e-06,
"loss": 2.5717663764953613,
"step": 1704
},
{
"epoch": 0.8264663111972855,
"grad_norm": 15.515046534809812,
"learning_rate": 9.092612535382705e-06,
"loss": 2.4790194034576416,
"step": 1705
},
{
"epoch": 0.8269510421715948,
"grad_norm": 13.164396630895666,
"learning_rate": 9.090991808428457e-06,
"loss": 3.3317065238952637,
"step": 1706
},
{
"epoch": 0.827435773145904,
"grad_norm": 9.775802766772086,
"learning_rate": 9.089369780052782e-06,
"loss": 3.357060432434082,
"step": 1707
},
{
"epoch": 0.8279205041202132,
"grad_norm": 9.753573957596071,
"learning_rate": 9.087746450771675e-06,
"loss": 2.779670238494873,
"step": 1708
},
{
"epoch": 0.8284052350945226,
"grad_norm": 13.995349266274795,
"learning_rate": 9.086121821101549e-06,
"loss": 2.9500746726989746,
"step": 1709
},
{
"epoch": 0.8288899660688318,
"grad_norm": 8.168036871860867,
"learning_rate": 9.084495891559227e-06,
"loss": 3.2876782417297363,
"step": 1710
},
{
"epoch": 0.829374697043141,
"grad_norm": 8.111988719631464,
"learning_rate": 9.08286866266195e-06,
"loss": 2.942697525024414,
"step": 1711
},
{
"epoch": 0.8298594280174503,
"grad_norm": 13.619895730156122,
"learning_rate": 9.081240134927371e-06,
"loss": 2.717061996459961,
"step": 1712
},
{
"epoch": 0.8303441589917596,
"grad_norm": 12.511510168763374,
"learning_rate": 9.079610308873552e-06,
"loss": 2.4467694759368896,
"step": 1713
},
{
"epoch": 0.8308288899660689,
"grad_norm": 12.660163082997503,
"learning_rate": 9.077979185018975e-06,
"loss": 2.245495557785034,
"step": 1714
},
{
"epoch": 0.8313136209403781,
"grad_norm": 8.03038391623797,
"learning_rate": 9.076346763882529e-06,
"loss": 2.575601577758789,
"step": 1715
},
{
"epoch": 0.8317983519146873,
"grad_norm": 13.602516775097742,
"learning_rate": 9.074713045983517e-06,
"loss": 3.6843793392181396,
"step": 1716
},
{
"epoch": 0.8322830828889967,
"grad_norm": 14.898791890006777,
"learning_rate": 9.073078031841659e-06,
"loss": 2.3162999153137207,
"step": 1717
},
{
"epoch": 0.8327678138633059,
"grad_norm": 12.864847451896273,
"learning_rate": 9.071441721977079e-06,
"loss": 2.706434965133667,
"step": 1718
},
{
"epoch": 0.8332525448376151,
"grad_norm": 11.591523927057839,
"learning_rate": 9.069804116910322e-06,
"loss": 2.535141706466675,
"step": 1719
},
{
"epoch": 0.8337372758119244,
"grad_norm": 14.46703871389546,
"learning_rate": 9.068165217162337e-06,
"loss": 2.064110517501831,
"step": 1720
},
{
"epoch": 0.8342220067862336,
"grad_norm": 6.765869441220489,
"learning_rate": 9.066525023254494e-06,
"loss": 3.0520949363708496,
"step": 1721
},
{
"epoch": 0.8347067377605429,
"grad_norm": 19.059435370680035,
"learning_rate": 9.064883535708565e-06,
"loss": 2.974348306655884,
"step": 1722
},
{
"epoch": 0.8351914687348522,
"grad_norm": 12.622676574446189,
"learning_rate": 9.063240755046739e-06,
"loss": 2.6537134647369385,
"step": 1723
},
{
"epoch": 0.8356761997091614,
"grad_norm": 5.261654838133345,
"learning_rate": 9.061596681791617e-06,
"loss": 2.839151620864868,
"step": 1724
},
{
"epoch": 0.8361609306834706,
"grad_norm": 12.156003665170847,
"learning_rate": 9.059951316466209e-06,
"loss": 2.5023727416992188,
"step": 1725
},
{
"epoch": 0.83664566165778,
"grad_norm": 19.31420112514109,
"learning_rate": 9.058304659593938e-06,
"loss": 3.0227324962615967,
"step": 1726
},
{
"epoch": 0.8371303926320892,
"grad_norm": 9.001780471575955,
"learning_rate": 9.056656711698633e-06,
"loss": 2.8468785285949707,
"step": 1727
},
{
"epoch": 0.8376151236063984,
"grad_norm": 10.097095231946454,
"learning_rate": 9.055007473304538e-06,
"loss": 3.1778364181518555,
"step": 1728
},
{
"epoch": 0.8380998545807077,
"grad_norm": 11.784471376887232,
"learning_rate": 9.05335694493631e-06,
"loss": 3.1376423835754395,
"step": 1729
},
{
"epoch": 0.8385845855550169,
"grad_norm": 16.990645473950856,
"learning_rate": 9.05170512711901e-06,
"loss": 2.2843408584594727,
"step": 1730
},
{
"epoch": 0.8390693165293263,
"grad_norm": 11.149001183575987,
"learning_rate": 9.050052020378115e-06,
"loss": 2.7623353004455566,
"step": 1731
},
{
"epoch": 0.8395540475036355,
"grad_norm": 9.684340687234444,
"learning_rate": 9.048397625239506e-06,
"loss": 2.95358943939209,
"step": 1732
},
{
"epoch": 0.8400387784779447,
"grad_norm": 8.817042576995618,
"learning_rate": 9.046741942229479e-06,
"loss": 2.639188766479492,
"step": 1733
},
{
"epoch": 0.840523509452254,
"grad_norm": 11.529899824632984,
"learning_rate": 9.045084971874738e-06,
"loss": 2.542839527130127,
"step": 1734
},
{
"epoch": 0.8410082404265633,
"grad_norm": 6.0706490046439665,
"learning_rate": 9.043426714702398e-06,
"loss": 1.7179720401763916,
"step": 1735
},
{
"epoch": 0.8414929714008725,
"grad_norm": 14.415398264390797,
"learning_rate": 9.041767171239977e-06,
"loss": 2.6000216007232666,
"step": 1736
},
{
"epoch": 0.8419777023751818,
"grad_norm": 15.658630740664519,
"learning_rate": 9.04010634201541e-06,
"loss": 3.210470676422119,
"step": 1737
},
{
"epoch": 0.842462433349491,
"grad_norm": 19.183885147726773,
"learning_rate": 9.03844422755704e-06,
"loss": 3.00373911857605,
"step": 1738
},
{
"epoch": 0.8429471643238002,
"grad_norm": 9.055129670811308,
"learning_rate": 9.036780828393614e-06,
"loss": 2.6675968170166016,
"step": 1739
},
{
"epoch": 0.8434318952981096,
"grad_norm": 9.379889070322003,
"learning_rate": 9.035116145054292e-06,
"loss": 2.836029529571533,
"step": 1740
},
{
"epoch": 0.8439166262724188,
"grad_norm": 8.244732372937337,
"learning_rate": 9.033450178068639e-06,
"loss": 2.8455371856689453,
"step": 1741
},
{
"epoch": 0.844401357246728,
"grad_norm": 12.466503954858343,
"learning_rate": 9.031782927966633e-06,
"loss": 2.9662647247314453,
"step": 1742
},
{
"epoch": 0.8448860882210373,
"grad_norm": 8.233895877962404,
"learning_rate": 9.030114395278657e-06,
"loss": 1.8088935613632202,
"step": 1743
},
{
"epoch": 0.8453708191953466,
"grad_norm": 11.655306741833739,
"learning_rate": 9.028444580535501e-06,
"loss": 2.574103832244873,
"step": 1744
},
{
"epoch": 0.8458555501696559,
"grad_norm": 7.340985747430725,
"learning_rate": 9.026773484268368e-06,
"loss": 2.269094467163086,
"step": 1745
},
{
"epoch": 0.8463402811439651,
"grad_norm": 9.418810316200165,
"learning_rate": 9.025101107008861e-06,
"loss": 2.4806389808654785,
"step": 1746
},
{
"epoch": 0.8468250121182743,
"grad_norm": 12.22118118542799,
"learning_rate": 9.023427449288996e-06,
"loss": 3.0049543380737305,
"step": 1747
},
{
"epoch": 0.8473097430925837,
"grad_norm": 8.780841916663695,
"learning_rate": 9.021752511641195e-06,
"loss": 3.036696195602417,
"step": 1748
},
{
"epoch": 0.8477944740668929,
"grad_norm": 6.966994056767846,
"learning_rate": 9.020076294598287e-06,
"loss": 2.228451728820801,
"step": 1749
},
{
"epoch": 0.8482792050412021,
"grad_norm": 7.93415881439134,
"learning_rate": 9.018398798693512e-06,
"loss": 2.637190818786621,
"step": 1750
},
{
"epoch": 0.8487639360155114,
"grad_norm": 7.162956178075893,
"learning_rate": 9.016720024460508e-06,
"loss": 2.8156065940856934,
"step": 1751
},
{
"epoch": 0.8492486669898206,
"grad_norm": 5.988964353659631,
"learning_rate": 9.015039972433327e-06,
"loss": 2.521368980407715,
"step": 1752
},
{
"epoch": 0.8497333979641299,
"grad_norm": 7.649586600824849,
"learning_rate": 9.013358643146424e-06,
"loss": 2.8192248344421387,
"step": 1753
},
{
"epoch": 0.8502181289384392,
"grad_norm": 11.030129025500267,
"learning_rate": 9.011676037134661e-06,
"loss": 3.2142632007598877,
"step": 1754
},
{
"epoch": 0.8507028599127484,
"grad_norm": 19.517375079914164,
"learning_rate": 9.009992154933309e-06,
"loss": 2.694934844970703,
"step": 1755
},
{
"epoch": 0.8511875908870576,
"grad_norm": 9.333364913804019,
"learning_rate": 9.00830699707804e-06,
"loss": 2.158740520477295,
"step": 1756
},
{
"epoch": 0.851672321861367,
"grad_norm": 10.464530657493192,
"learning_rate": 9.006620564104938e-06,
"loss": 2.879805564880371,
"step": 1757
},
{
"epoch": 0.8521570528356762,
"grad_norm": 11.44890953322707,
"learning_rate": 9.004932856550484e-06,
"loss": 2.6020114421844482,
"step": 1758
},
{
"epoch": 0.8526417838099855,
"grad_norm": 7.925832255699197,
"learning_rate": 9.003243874951573e-06,
"loss": 2.913911819458008,
"step": 1759
},
{
"epoch": 0.8531265147842947,
"grad_norm": 7.192176460107086,
"learning_rate": 9.001553619845502e-06,
"loss": 2.6111960411071777,
"step": 1760
},
{
"epoch": 0.8536112457586039,
"grad_norm": 24.423432908953508,
"learning_rate": 8.999862091769971e-06,
"loss": 3.232832908630371,
"step": 1761
},
{
"epoch": 0.8540959767329133,
"grad_norm": 9.952964446221015,
"learning_rate": 8.998169291263089e-06,
"loss": 2.049193859100342,
"step": 1762
},
{
"epoch": 0.8545807077072225,
"grad_norm": 10.539381690963102,
"learning_rate": 8.996475218863365e-06,
"loss": 3.0872106552124023,
"step": 1763
},
{
"epoch": 0.8550654386815317,
"grad_norm": 16.08680586831515,
"learning_rate": 8.994779875109718e-06,
"loss": 2.9271841049194336,
"step": 1764
},
{
"epoch": 0.855550169655841,
"grad_norm": 11.123603833255988,
"learning_rate": 8.993083260541467e-06,
"loss": 2.5349268913269043,
"step": 1765
},
{
"epoch": 0.8560349006301503,
"grad_norm": 10.138725412347384,
"learning_rate": 8.991385375698335e-06,
"loss": 3.0934181213378906,
"step": 1766
},
{
"epoch": 0.8565196316044595,
"grad_norm": 11.458464180224606,
"learning_rate": 8.989686221120458e-06,
"loss": 2.6210672855377197,
"step": 1767
},
{
"epoch": 0.8570043625787688,
"grad_norm": 18.30773432101369,
"learning_rate": 8.987985797348361e-06,
"loss": 2.985403060913086,
"step": 1768
},
{
"epoch": 0.857489093553078,
"grad_norm": 14.070741262716934,
"learning_rate": 8.986284104922986e-06,
"loss": 2.530153751373291,
"step": 1769
},
{
"epoch": 0.8579738245273874,
"grad_norm": 11.466464578411012,
"learning_rate": 8.984581144385673e-06,
"loss": 2.958134651184082,
"step": 1770
},
{
"epoch": 0.8584585555016966,
"grad_norm": 7.40882246975861,
"learning_rate": 8.982876916278163e-06,
"loss": 2.75773286819458,
"step": 1771
},
{
"epoch": 0.8589432864760058,
"grad_norm": 8.892778291693814,
"learning_rate": 8.981171421142605e-06,
"loss": 2.754281997680664,
"step": 1772
},
{
"epoch": 0.859428017450315,
"grad_norm": 14.377655288252791,
"learning_rate": 8.979464659521548e-06,
"loss": 3.129836082458496,
"step": 1773
},
{
"epoch": 0.8599127484246243,
"grad_norm": 11.607719569610122,
"learning_rate": 8.977756631957947e-06,
"loss": 2.9182910919189453,
"step": 1774
},
{
"epoch": 0.8603974793989336,
"grad_norm": 11.183101548548018,
"learning_rate": 8.976047338995156e-06,
"loss": 4.1659393310546875,
"step": 1775
},
{
"epoch": 0.8608822103732429,
"grad_norm": 9.719250988423221,
"learning_rate": 8.974336781176933e-06,
"loss": 2.0286707878112793,
"step": 1776
},
{
"epoch": 0.8613669413475521,
"grad_norm": 12.131688788706636,
"learning_rate": 8.972624959047439e-06,
"loss": 3.178272247314453,
"step": 1777
},
{
"epoch": 0.8618516723218613,
"grad_norm": 12.547611700230542,
"learning_rate": 8.970911873151238e-06,
"loss": 2.673373222351074,
"step": 1778
},
{
"epoch": 0.8623364032961707,
"grad_norm": 13.835515621685019,
"learning_rate": 8.969197524033293e-06,
"loss": 3.06781005859375,
"step": 1779
},
{
"epoch": 0.8628211342704799,
"grad_norm": 14.157657599786482,
"learning_rate": 8.967481912238971e-06,
"loss": 2.5154495239257812,
"step": 1780
},
{
"epoch": 0.8633058652447891,
"grad_norm": 25.70762892261527,
"learning_rate": 8.96576503831404e-06,
"loss": 2.039489269256592,
"step": 1781
},
{
"epoch": 0.8637905962190984,
"grad_norm": 7.517163646411105,
"learning_rate": 8.964046902804673e-06,
"loss": 3.130321741104126,
"step": 1782
},
{
"epoch": 0.8642753271934076,
"grad_norm": 9.457638575597512,
"learning_rate": 8.962327506257438e-06,
"loss": 2.7923436164855957,
"step": 1783
},
{
"epoch": 0.864760058167717,
"grad_norm": 12.075759911068992,
"learning_rate": 8.960606849219309e-06,
"loss": 2.525322198867798,
"step": 1784
},
{
"epoch": 0.8652447891420262,
"grad_norm": 15.155362337326775,
"learning_rate": 8.958884932237658e-06,
"loss": 2.5937390327453613,
"step": 1785
},
{
"epoch": 0.8657295201163354,
"grad_norm": 14.08827400493596,
"learning_rate": 8.95716175586026e-06,
"loss": 2.6289193630218506,
"step": 1786
},
{
"epoch": 0.8662142510906446,
"grad_norm": 9.328981544685611,
"learning_rate": 8.95543732063529e-06,
"loss": 2.9672393798828125,
"step": 1787
},
{
"epoch": 0.866698982064954,
"grad_norm": 9.346409686545163,
"learning_rate": 8.95371162711132e-06,
"loss": 2.393282175064087,
"step": 1788
},
{
"epoch": 0.8671837130392632,
"grad_norm": 18.691516405724283,
"learning_rate": 8.951984675837332e-06,
"loss": 2.3577089309692383,
"step": 1789
},
{
"epoch": 0.8676684440135725,
"grad_norm": 14.175289845788916,
"learning_rate": 8.9502564673627e-06,
"loss": 2.6541500091552734,
"step": 1790
},
{
"epoch": 0.8681531749878817,
"grad_norm": 7.06236660015784,
"learning_rate": 8.948527002237193e-06,
"loss": 2.3331141471862793,
"step": 1791
},
{
"epoch": 0.868637905962191,
"grad_norm": 13.776305008280016,
"learning_rate": 8.946796281010994e-06,
"loss": 2.5802552700042725,
"step": 1792
},
{
"epoch": 0.8691226369365003,
"grad_norm": 17.25631465289018,
"learning_rate": 8.945064304234673e-06,
"loss": 2.7634568214416504,
"step": 1793
},
{
"epoch": 0.8696073679108095,
"grad_norm": 6.9824593757859965,
"learning_rate": 8.94333107245921e-06,
"loss": 2.6045613288879395,
"step": 1794
},
{
"epoch": 0.8700920988851187,
"grad_norm": 13.71595549921609,
"learning_rate": 8.941596586235972e-06,
"loss": 2.6867332458496094,
"step": 1795
},
{
"epoch": 0.870576829859428,
"grad_norm": 14.969603127913837,
"learning_rate": 8.939860846116737e-06,
"loss": 3.102445602416992,
"step": 1796
},
{
"epoch": 0.8710615608337373,
"grad_norm": 16.73053082793482,
"learning_rate": 8.938123852653673e-06,
"loss": 3.0592124462127686,
"step": 1797
},
{
"epoch": 0.8715462918080465,
"grad_norm": 6.679837954806097,
"learning_rate": 8.936385606399354e-06,
"loss": 2.4538826942443848,
"step": 1798
},
{
"epoch": 0.8720310227823558,
"grad_norm": 13.259099232592492,
"learning_rate": 8.934646107906744e-06,
"loss": 3.413822889328003,
"step": 1799
},
{
"epoch": 0.872515753756665,
"grad_norm": 31.07135767221308,
"learning_rate": 8.932905357729213e-06,
"loss": 2.521759271621704,
"step": 1800
},
{
"epoch": 0.8730004847309744,
"grad_norm": 9.683595723810585,
"learning_rate": 8.931163356420526e-06,
"loss": 1.8212223052978516,
"step": 1801
},
{
"epoch": 0.8734852157052836,
"grad_norm": 7.437921227583561,
"learning_rate": 8.92942010453485e-06,
"loss": 2.8291423320770264,
"step": 1802
},
{
"epoch": 0.8739699466795928,
"grad_norm": 20.254560097995334,
"learning_rate": 8.927675602626738e-06,
"loss": 3.433669328689575,
"step": 1803
},
{
"epoch": 0.8744546776539021,
"grad_norm": 13.969009057825948,
"learning_rate": 8.925929851251156e-06,
"loss": 2.4590353965759277,
"step": 1804
},
{
"epoch": 0.8749394086282113,
"grad_norm": 8.503179384055903,
"learning_rate": 8.924182850963457e-06,
"loss": 3.0101685523986816,
"step": 1805
},
{
"epoch": 0.8754241396025206,
"grad_norm": 10.609864181523598,
"learning_rate": 8.922434602319397e-06,
"loss": 2.65299129486084,
"step": 1806
},
{
"epoch": 0.8759088705768299,
"grad_norm": 8.395576145610665,
"learning_rate": 8.920685105875124e-06,
"loss": 2.669032573699951,
"step": 1807
},
{
"epoch": 0.8763936015511391,
"grad_norm": 9.938002281566096,
"learning_rate": 8.918934362187185e-06,
"loss": 2.8076412677764893,
"step": 1808
},
{
"epoch": 0.8768783325254483,
"grad_norm": 13.68211607342376,
"learning_rate": 8.917182371812531e-06,
"loss": 2.8565468788146973,
"step": 1809
},
{
"epoch": 0.8773630634997577,
"grad_norm": 11.706946187686755,
"learning_rate": 8.915429135308496e-06,
"loss": 2.5435962677001953,
"step": 1810
},
{
"epoch": 0.8778477944740669,
"grad_norm": 11.115304983633484,
"learning_rate": 8.91367465323282e-06,
"loss": 2.9961040019989014,
"step": 1811
},
{
"epoch": 0.8783325254483761,
"grad_norm": 13.134232994676465,
"learning_rate": 8.911918926143637e-06,
"loss": 3.010784387588501,
"step": 1812
},
{
"epoch": 0.8788172564226854,
"grad_norm": 13.441279434668147,
"learning_rate": 8.910161954599477e-06,
"loss": 2.2601382732391357,
"step": 1813
},
{
"epoch": 0.8793019873969947,
"grad_norm": 24.038881699279212,
"learning_rate": 8.908403739159265e-06,
"loss": 2.476396322250366,
"step": 1814
},
{
"epoch": 0.879786718371304,
"grad_norm": 12.13015347363121,
"learning_rate": 8.906644280382325e-06,
"loss": 2.4267737865448,
"step": 1815
},
{
"epoch": 0.8802714493456132,
"grad_norm": 14.355675487940434,
"learning_rate": 8.904883578828369e-06,
"loss": 2.873523473739624,
"step": 1816
},
{
"epoch": 0.8807561803199224,
"grad_norm": 11.950263782130797,
"learning_rate": 8.903121635057513e-06,
"loss": 2.7426564693450928,
"step": 1817
},
{
"epoch": 0.8812409112942317,
"grad_norm": 10.944278123132758,
"learning_rate": 8.901358449630262e-06,
"loss": 2.5972647666931152,
"step": 1818
},
{
"epoch": 0.881725642268541,
"grad_norm": 7.963001960751293,
"learning_rate": 8.89959402310752e-06,
"loss": 2.554691791534424,
"step": 1819
},
{
"epoch": 0.8822103732428502,
"grad_norm": 17.41397775340923,
"learning_rate": 8.897828356050587e-06,
"loss": 3.509577512741089,
"step": 1820
},
{
"epoch": 0.8826951042171595,
"grad_norm": 9.420486399737108,
"learning_rate": 8.896061449021148e-06,
"loss": 2.5087761878967285,
"step": 1821
},
{
"epoch": 0.8831798351914687,
"grad_norm": 13.192992778870863,
"learning_rate": 8.894293302581297e-06,
"loss": 2.738391399383545,
"step": 1822
},
{
"epoch": 0.883664566165778,
"grad_norm": 16.1333189903543,
"learning_rate": 8.892523917293508e-06,
"loss": 3.1003167629241943,
"step": 1823
},
{
"epoch": 0.8841492971400873,
"grad_norm": 5.823254696510134,
"learning_rate": 8.89075329372066e-06,
"loss": 1.680354118347168,
"step": 1824
},
{
"epoch": 0.8846340281143965,
"grad_norm": 21.576695999328287,
"learning_rate": 8.88898143242602e-06,
"loss": 3.543755531311035,
"step": 1825
},
{
"epoch": 0.8851187590887057,
"grad_norm": 8.510919473818305,
"learning_rate": 8.887208333973252e-06,
"loss": 2.636371374130249,
"step": 1826
},
{
"epoch": 0.885603490063015,
"grad_norm": 10.043565632004606,
"learning_rate": 8.885433998926409e-06,
"loss": 2.896994113922119,
"step": 1827
},
{
"epoch": 0.8860882210373243,
"grad_norm": 14.424454921042264,
"learning_rate": 8.883658427849944e-06,
"loss": 2.349423408508301,
"step": 1828
},
{
"epoch": 0.8865729520116336,
"grad_norm": 14.786643247919507,
"learning_rate": 8.881881621308696e-06,
"loss": 2.3234753608703613,
"step": 1829
},
{
"epoch": 0.8870576829859428,
"grad_norm": 7.218323032022206,
"learning_rate": 8.880103579867903e-06,
"loss": 2.2576420307159424,
"step": 1830
},
{
"epoch": 0.887542413960252,
"grad_norm": 12.041202526791407,
"learning_rate": 8.878324304093192e-06,
"loss": 2.829591751098633,
"step": 1831
},
{
"epoch": 0.8880271449345614,
"grad_norm": 11.723390131353606,
"learning_rate": 8.876543794550586e-06,
"loss": 2.775280714035034,
"step": 1832
},
{
"epoch": 0.8885118759088706,
"grad_norm": 10.300582329828325,
"learning_rate": 8.874762051806495e-06,
"loss": 2.500619649887085,
"step": 1833
},
{
"epoch": 0.8889966068831798,
"grad_norm": 9.133997030953724,
"learning_rate": 8.872979076427727e-06,
"loss": 2.8442254066467285,
"step": 1834
},
{
"epoch": 0.8894813378574891,
"grad_norm": 7.87243899103,
"learning_rate": 8.871194868981483e-06,
"loss": 2.7700319290161133,
"step": 1835
},
{
"epoch": 0.8899660688317984,
"grad_norm": 19.145984875114113,
"learning_rate": 8.869409430035349e-06,
"loss": 3.0255699157714844,
"step": 1836
},
{
"epoch": 0.8904507998061076,
"grad_norm": 6.348673637099621,
"learning_rate": 8.867622760157308e-06,
"loss": 2.738523483276367,
"step": 1837
},
{
"epoch": 0.8909355307804169,
"grad_norm": 8.091737836013666,
"learning_rate": 8.865834859915731e-06,
"loss": 2.829514980316162,
"step": 1838
},
{
"epoch": 0.8914202617547261,
"grad_norm": 6.207137279047199,
"learning_rate": 8.864045729879387e-06,
"loss": 2.5040223598480225,
"step": 1839
},
{
"epoch": 0.8919049927290353,
"grad_norm": 7.1778160496594,
"learning_rate": 8.86225537061743e-06,
"loss": 2.924677848815918,
"step": 1840
},
{
"epoch": 0.8923897237033447,
"grad_norm": 10.316324903657259,
"learning_rate": 8.860463782699404e-06,
"loss": 2.5135669708251953,
"step": 1841
},
{
"epoch": 0.8928744546776539,
"grad_norm": 7.596317984129521,
"learning_rate": 8.85867096669525e-06,
"loss": 2.7505133152008057,
"step": 1842
},
{
"epoch": 0.8933591856519632,
"grad_norm": 9.212631028083413,
"learning_rate": 8.8568769231753e-06,
"loss": 2.5425920486450195,
"step": 1843
},
{
"epoch": 0.8938439166262724,
"grad_norm": 13.298841134971633,
"learning_rate": 8.855081652710264e-06,
"loss": 2.3109400272369385,
"step": 1844
},
{
"epoch": 0.8943286476005817,
"grad_norm": 9.942621634224384,
"learning_rate": 8.853285155871258e-06,
"loss": 2.524900436401367,
"step": 1845
},
{
"epoch": 0.894813378574891,
"grad_norm": 7.259826096154673,
"learning_rate": 8.851487433229781e-06,
"loss": 2.4583520889282227,
"step": 1846
},
{
"epoch": 0.8952981095492002,
"grad_norm": 10.945229416643485,
"learning_rate": 8.849688485357719e-06,
"loss": 2.870063066482544,
"step": 1847
},
{
"epoch": 0.8957828405235094,
"grad_norm": 21.870541247746242,
"learning_rate": 8.847888312827354e-06,
"loss": 2.8025145530700684,
"step": 1848
},
{
"epoch": 0.8962675714978187,
"grad_norm": 10.986557817995175,
"learning_rate": 8.846086916211354e-06,
"loss": 2.986161231994629,
"step": 1849
},
{
"epoch": 0.896752302472128,
"grad_norm": 9.504883592310211,
"learning_rate": 8.844284296082776e-06,
"loss": 2.7488012313842773,
"step": 1850
},
{
"epoch": 0.8972370334464372,
"grad_norm": 11.430463500073964,
"learning_rate": 8.84248045301507e-06,
"loss": 2.67085337638855,
"step": 1851
},
{
"epoch": 0.8977217644207465,
"grad_norm": 18.732999590788115,
"learning_rate": 8.84067538758207e-06,
"loss": 2.6723132133483887,
"step": 1852
},
{
"epoch": 0.8982064953950557,
"grad_norm": 10.59087574988106,
"learning_rate": 8.838869100358003e-06,
"loss": 2.8628668785095215,
"step": 1853
},
{
"epoch": 0.898691226369365,
"grad_norm": 11.479407751744509,
"learning_rate": 8.837061591917481e-06,
"loss": 3.21571683883667,
"step": 1854
},
{
"epoch": 0.8991759573436743,
"grad_norm": 15.057717856194609,
"learning_rate": 8.835252862835508e-06,
"loss": 2.557255983352661,
"step": 1855
},
{
"epoch": 0.8996606883179835,
"grad_norm": 9.685011631050491,
"learning_rate": 8.833442913687473e-06,
"loss": 2.6234912872314453,
"step": 1856
},
{
"epoch": 0.9001454192922927,
"grad_norm": 13.946546023026519,
"learning_rate": 8.831631745049156e-06,
"loss": 2.5307345390319824,
"step": 1857
},
{
"epoch": 0.900630150266602,
"grad_norm": 21.172697556181657,
"learning_rate": 8.829819357496725e-06,
"loss": 4.103190898895264,
"step": 1858
},
{
"epoch": 0.9011148812409113,
"grad_norm": 12.6597467659809,
"learning_rate": 8.828005751606733e-06,
"loss": 3.076680898666382,
"step": 1859
},
{
"epoch": 0.9015996122152206,
"grad_norm": 7.339181141570583,
"learning_rate": 8.826190927956123e-06,
"loss": 2.982416868209839,
"step": 1860
},
{
"epoch": 0.9020843431895298,
"grad_norm": 18.722683538808333,
"learning_rate": 8.824374887122224e-06,
"loss": 2.727665424346924,
"step": 1861
},
{
"epoch": 0.902569074163839,
"grad_norm": 14.71816004365023,
"learning_rate": 8.822557629682751e-06,
"loss": 2.1354525089263916,
"step": 1862
},
{
"epoch": 0.9030538051381484,
"grad_norm": 23.150719484407148,
"learning_rate": 8.820739156215812e-06,
"loss": 3.111706018447876,
"step": 1863
},
{
"epoch": 0.9035385361124576,
"grad_norm": 10.971606026920911,
"learning_rate": 8.818919467299893e-06,
"loss": 2.636897087097168,
"step": 1864
},
{
"epoch": 0.9040232670867668,
"grad_norm": 12.468494708144522,
"learning_rate": 8.817098563513874e-06,
"loss": 2.7168922424316406,
"step": 1865
},
{
"epoch": 0.9045079980610761,
"grad_norm": 13.202946491277007,
"learning_rate": 8.81527644543702e-06,
"loss": 2.9529995918273926,
"step": 1866
},
{
"epoch": 0.9049927290353854,
"grad_norm": 11.601462714840931,
"learning_rate": 8.813453113648978e-06,
"loss": 2.9956860542297363,
"step": 1867
},
{
"epoch": 0.9054774600096946,
"grad_norm": 10.712093565961803,
"learning_rate": 8.811628568729785e-06,
"loss": 2.6150622367858887,
"step": 1868
},
{
"epoch": 0.9059621909840039,
"grad_norm": 11.03666280857133,
"learning_rate": 8.809802811259865e-06,
"loss": 2.688225746154785,
"step": 1869
},
{
"epoch": 0.9064469219583131,
"grad_norm": 13.85966074237617,
"learning_rate": 8.807975841820023e-06,
"loss": 2.7338786125183105,
"step": 1870
},
{
"epoch": 0.9069316529326223,
"grad_norm": 16.87902663198634,
"learning_rate": 8.806147660991453e-06,
"loss": 2.8702595233917236,
"step": 1871
},
{
"epoch": 0.9074163839069317,
"grad_norm": 8.186308546594686,
"learning_rate": 8.804318269355734e-06,
"loss": 2.488560676574707,
"step": 1872
},
{
"epoch": 0.9079011148812409,
"grad_norm": 8.181248680858568,
"learning_rate": 8.80248766749483e-06,
"loss": 3.1274666786193848,
"step": 1873
},
{
"epoch": 0.9083858458555502,
"grad_norm": 40.791979162488026,
"learning_rate": 8.800655855991092e-06,
"loss": 3.350297451019287,
"step": 1874
},
{
"epoch": 0.9088705768298594,
"grad_norm": 12.696329023417885,
"learning_rate": 8.79882283542725e-06,
"loss": 2.912957191467285,
"step": 1875
},
{
"epoch": 0.9093553078041687,
"grad_norm": 11.897486368644262,
"learning_rate": 8.796988606386424e-06,
"loss": 3.3371503353118896,
"step": 1876
},
{
"epoch": 0.909840038778478,
"grad_norm": 16.570545874891533,
"learning_rate": 8.795153169452117e-06,
"loss": 2.498155117034912,
"step": 1877
},
{
"epoch": 0.9103247697527872,
"grad_norm": 23.20296277228549,
"learning_rate": 8.793316525208215e-06,
"loss": 2.934013843536377,
"step": 1878
},
{
"epoch": 0.9108095007270964,
"grad_norm": 13.698009259696581,
"learning_rate": 8.791478674238992e-06,
"loss": 2.9019968509674072,
"step": 1879
},
{
"epoch": 0.9112942317014057,
"grad_norm": 14.3242568105932,
"learning_rate": 8.7896396171291e-06,
"loss": 3.01591420173645,
"step": 1880
},
{
"epoch": 0.911778962675715,
"grad_norm": 9.173309894872737,
"learning_rate": 8.787799354463578e-06,
"loss": 3.0389394760131836,
"step": 1881
},
{
"epoch": 0.9122636936500242,
"grad_norm": 11.438222315828963,
"learning_rate": 8.785957886827847e-06,
"loss": 2.3968560695648193,
"step": 1882
},
{
"epoch": 0.9127484246243335,
"grad_norm": 11.535188735989385,
"learning_rate": 8.784115214807715e-06,
"loss": 2.953376293182373,
"step": 1883
},
{
"epoch": 0.9132331555986427,
"grad_norm": 99.61687826864078,
"learning_rate": 8.782271338989372e-06,
"loss": 2.7422237396240234,
"step": 1884
},
{
"epoch": 0.913717886572952,
"grad_norm": 10.072595556231034,
"learning_rate": 8.780426259959385e-06,
"loss": 2.6766695976257324,
"step": 1885
},
{
"epoch": 0.9142026175472613,
"grad_norm": 9.247595098996241,
"learning_rate": 8.778579978304711e-06,
"loss": 2.6227469444274902,
"step": 1886
},
{
"epoch": 0.9146873485215705,
"grad_norm": 15.516325021062798,
"learning_rate": 8.776732494612685e-06,
"loss": 3.5802161693573,
"step": 1887
},
{
"epoch": 0.9151720794958798,
"grad_norm": 54.36536774373774,
"learning_rate": 8.77488380947103e-06,
"loss": 2.774005651473999,
"step": 1888
},
{
"epoch": 0.9156568104701891,
"grad_norm": 9.37838365371323,
"learning_rate": 8.773033923467846e-06,
"loss": 2.631140947341919,
"step": 1889
},
{
"epoch": 0.9161415414444983,
"grad_norm": 18.72899179813291,
"learning_rate": 8.771182837191614e-06,
"loss": 2.9803085327148438,
"step": 1890
},
{
"epoch": 0.9166262724188076,
"grad_norm": 17.008514713870778,
"learning_rate": 8.769330551231201e-06,
"loss": 2.6217234134674072,
"step": 1891
},
{
"epoch": 0.9171110033931168,
"grad_norm": 11.89395144656241,
"learning_rate": 8.767477066175856e-06,
"loss": 2.8732593059539795,
"step": 1892
},
{
"epoch": 0.917595734367426,
"grad_norm": 14.492050734866902,
"learning_rate": 8.765622382615204e-06,
"loss": 2.7058236598968506,
"step": 1893
},
{
"epoch": 0.9180804653417354,
"grad_norm": 8.184411216660413,
"learning_rate": 8.763766501139257e-06,
"loss": 2.7016513347625732,
"step": 1894
},
{
"epoch": 0.9185651963160446,
"grad_norm": 14.887298837891898,
"learning_rate": 8.761909422338404e-06,
"loss": 2.9003806114196777,
"step": 1895
},
{
"epoch": 0.9190499272903538,
"grad_norm": 8.484225663743993,
"learning_rate": 8.76005114680342e-06,
"loss": 2.42445707321167,
"step": 1896
},
{
"epoch": 0.9195346582646631,
"grad_norm": 12.857201115620878,
"learning_rate": 8.758191675125453e-06,
"loss": 2.547945976257324,
"step": 1897
},
{
"epoch": 0.9200193892389724,
"grad_norm": 9.31412198103668,
"learning_rate": 8.75633100789604e-06,
"loss": 3.0125792026519775,
"step": 1898
},
{
"epoch": 0.9205041202132817,
"grad_norm": 7.879003440652629,
"learning_rate": 8.754469145707093e-06,
"loss": 2.614844560623169,
"step": 1899
},
{
"epoch": 0.9209888511875909,
"grad_norm": 23.981740931613498,
"learning_rate": 8.752606089150903e-06,
"loss": 2.1799612045288086,
"step": 1900
},
{
"epoch": 0.9214735821619001,
"grad_norm": 7.66330802604762,
"learning_rate": 8.750741838820146e-06,
"loss": 2.8104896545410156,
"step": 1901
},
{
"epoch": 0.9219583131362094,
"grad_norm": 7.745368463846469,
"learning_rate": 8.748876395307876e-06,
"loss": 3.1747066974639893,
"step": 1902
},
{
"epoch": 0.9224430441105187,
"grad_norm": 11.324200112233502,
"learning_rate": 8.74700975920752e-06,
"loss": 2.5670511722564697,
"step": 1903
},
{
"epoch": 0.9229277750848279,
"grad_norm": 9.784675831709233,
"learning_rate": 8.745141931112896e-06,
"loss": 3.0088131427764893,
"step": 1904
},
{
"epoch": 0.9234125060591372,
"grad_norm": 7.669652920321612,
"learning_rate": 8.743272911618193e-06,
"loss": 2.071211338043213,
"step": 1905
},
{
"epoch": 0.9238972370334464,
"grad_norm": 9.423081267031387,
"learning_rate": 8.74140270131798e-06,
"loss": 2.958768367767334,
"step": 1906
},
{
"epoch": 0.9243819680077557,
"grad_norm": 9.62636482731335,
"learning_rate": 8.739531300807208e-06,
"loss": 2.647197723388672,
"step": 1907
},
{
"epoch": 0.924866698982065,
"grad_norm": 5.703218222689481,
"learning_rate": 8.737658710681206e-06,
"loss": 2.801380157470703,
"step": 1908
},
{
"epoch": 0.9253514299563742,
"grad_norm": 6.515461322701219,
"learning_rate": 8.735784931535678e-06,
"loss": 2.3631937503814697,
"step": 1909
},
{
"epoch": 0.9258361609306834,
"grad_norm": 11.546222581370143,
"learning_rate": 8.733909963966709e-06,
"loss": 2.5831642150878906,
"step": 1910
},
{
"epoch": 0.9263208919049928,
"grad_norm": 7.840075864760483,
"learning_rate": 8.732033808570759e-06,
"loss": 2.3269357681274414,
"step": 1911
},
{
"epoch": 0.926805622879302,
"grad_norm": 10.175688756016886,
"learning_rate": 8.730156465944672e-06,
"loss": 2.6938583850860596,
"step": 1912
},
{
"epoch": 0.9272903538536112,
"grad_norm": 15.250750069784727,
"learning_rate": 8.728277936685663e-06,
"loss": 2.599515438079834,
"step": 1913
},
{
"epoch": 0.9277750848279205,
"grad_norm": 10.77719994197126,
"learning_rate": 8.72639822139133e-06,
"loss": 2.647207021713257,
"step": 1914
},
{
"epoch": 0.9282598158022297,
"grad_norm": 14.338670994064554,
"learning_rate": 8.724517320659644e-06,
"loss": 2.9003286361694336,
"step": 1915
},
{
"epoch": 0.9287445467765391,
"grad_norm": 7.811679052444301,
"learning_rate": 8.722635235088957e-06,
"loss": 2.510582685470581,
"step": 1916
},
{
"epoch": 0.9292292777508483,
"grad_norm": 15.941412746048414,
"learning_rate": 8.720751965277993e-06,
"loss": 2.2696876525878906,
"step": 1917
},
{
"epoch": 0.9297140087251575,
"grad_norm": 22.181363018436464,
"learning_rate": 8.718867511825859e-06,
"loss": 2.690702199935913,
"step": 1918
},
{
"epoch": 0.9301987396994668,
"grad_norm": 18.11116022006824,
"learning_rate": 8.716981875332031e-06,
"loss": 2.362877368927002,
"step": 1919
},
{
"epoch": 0.9306834706737761,
"grad_norm": 7.769734232570251,
"learning_rate": 8.715095056396369e-06,
"loss": 2.4956839084625244,
"step": 1920
},
{
"epoch": 0.9311682016480853,
"grad_norm": 12.36394359284412,
"learning_rate": 8.713207055619101e-06,
"loss": 2.796283006668091,
"step": 1921
},
{
"epoch": 0.9316529326223946,
"grad_norm": 6.8420518339414444,
"learning_rate": 8.711317873600843e-06,
"loss": 2.717341423034668,
"step": 1922
},
{
"epoch": 0.9321376635967038,
"grad_norm": 16.218015436390985,
"learning_rate": 8.709427510942572e-06,
"loss": 3.405665397644043,
"step": 1923
},
{
"epoch": 0.932622394571013,
"grad_norm": 9.645742172271023,
"learning_rate": 8.707535968245652e-06,
"loss": 2.7355775833129883,
"step": 1924
},
{
"epoch": 0.9331071255453224,
"grad_norm": 11.265061704728906,
"learning_rate": 8.705643246111817e-06,
"loss": 2.675165891647339,
"step": 1925
},
{
"epoch": 0.9335918565196316,
"grad_norm": 7.566727518955628,
"learning_rate": 8.703749345143178e-06,
"loss": 1.9695744514465332,
"step": 1926
},
{
"epoch": 0.9340765874939408,
"grad_norm": 7.7074255806806065,
"learning_rate": 8.701854265942218e-06,
"loss": 3.2827837467193604,
"step": 1927
},
{
"epoch": 0.9345613184682501,
"grad_norm": 10.633945965810081,
"learning_rate": 8.699958009111803e-06,
"loss": 2.4340579509735107,
"step": 1928
},
{
"epoch": 0.9350460494425594,
"grad_norm": 11.890145123932024,
"learning_rate": 8.69806057525516e-06,
"loss": 3.2177937030792236,
"step": 1929
},
{
"epoch": 0.9355307804168687,
"grad_norm": 16.70189875516341,
"learning_rate": 8.696161964975907e-06,
"loss": 2.790924549102783,
"step": 1930
},
{
"epoch": 0.9360155113911779,
"grad_norm": 9.879869201577735,
"learning_rate": 8.69426217887802e-06,
"loss": 2.569194793701172,
"step": 1931
},
{
"epoch": 0.9365002423654871,
"grad_norm": 8.10674924843645,
"learning_rate": 8.692361217565864e-06,
"loss": 2.500406503677368,
"step": 1932
},
{
"epoch": 0.9369849733397965,
"grad_norm": 9.883821419500334,
"learning_rate": 8.690459081644164e-06,
"loss": 2.859179973602295,
"step": 1933
},
{
"epoch": 0.9374697043141057,
"grad_norm": 11.717291774441527,
"learning_rate": 8.688555771718029e-06,
"loss": 2.213712215423584,
"step": 1934
},
{
"epoch": 0.9379544352884149,
"grad_norm": 7.109650969110893,
"learning_rate": 8.686651288392937e-06,
"loss": 2.8320116996765137,
"step": 1935
},
{
"epoch": 0.9384391662627242,
"grad_norm": 9.979020032509634,
"learning_rate": 8.684745632274738e-06,
"loss": 2.8142216205596924,
"step": 1936
},
{
"epoch": 0.9389238972370334,
"grad_norm": 16.054078810625533,
"learning_rate": 8.68283880396966e-06,
"loss": 2.1438629627227783,
"step": 1937
},
{
"epoch": 0.9394086282113427,
"grad_norm": 8.294626719976428,
"learning_rate": 8.680930804084298e-06,
"loss": 2.4089982509613037,
"step": 1938
},
{
"epoch": 0.939893359185652,
"grad_norm": 8.277471920427637,
"learning_rate": 8.679021633225626e-06,
"loss": 2.403167724609375,
"step": 1939
},
{
"epoch": 0.9403780901599612,
"grad_norm": 9.996869514579886,
"learning_rate": 8.677111292000985e-06,
"loss": 2.309584617614746,
"step": 1940
},
{
"epoch": 0.9408628211342704,
"grad_norm": 8.736589170400856,
"learning_rate": 8.675199781018091e-06,
"loss": 2.3277974128723145,
"step": 1941
},
{
"epoch": 0.9413475521085798,
"grad_norm": 18.78142479358872,
"learning_rate": 8.67328710088503e-06,
"loss": 3.04707670211792,
"step": 1942
},
{
"epoch": 0.941832283082889,
"grad_norm": 11.170774845072822,
"learning_rate": 8.671373252210265e-06,
"loss": 3.022925853729248,
"step": 1943
},
{
"epoch": 0.9423170140571983,
"grad_norm": 23.660794381353668,
"learning_rate": 8.669458235602623e-06,
"loss": 2.5860238075256348,
"step": 1944
},
{
"epoch": 0.9428017450315075,
"grad_norm": 11.93320408806997,
"learning_rate": 8.66754205167131e-06,
"loss": 3.0962934494018555,
"step": 1945
},
{
"epoch": 0.9432864760058167,
"grad_norm": 10.924055362461454,
"learning_rate": 8.6656247010259e-06,
"loss": 2.4726343154907227,
"step": 1946
},
{
"epoch": 0.9437712069801261,
"grad_norm": 6.697268179494113,
"learning_rate": 8.663706184276335e-06,
"loss": 2.8334953784942627,
"step": 1947
},
{
"epoch": 0.9442559379544353,
"grad_norm": 10.422187010184816,
"learning_rate": 8.661786502032935e-06,
"loss": 2.8255715370178223,
"step": 1948
},
{
"epoch": 0.9447406689287445,
"grad_norm": 11.58826488176463,
"learning_rate": 8.659865654906387e-06,
"loss": 2.9311716556549072,
"step": 1949
},
{
"epoch": 0.9452253999030538,
"grad_norm": 7.594848356959508,
"learning_rate": 8.657943643507747e-06,
"loss": 2.8164868354797363,
"step": 1950
},
{
"epoch": 0.9457101308773631,
"grad_norm": 6.327893375114602,
"learning_rate": 8.656020468448442e-06,
"loss": 2.4570212364196777,
"step": 1951
},
{
"epoch": 0.9461948618516723,
"grad_norm": 12.244305354902846,
"learning_rate": 8.654096130340273e-06,
"loss": 2.8069143295288086,
"step": 1952
},
{
"epoch": 0.9466795928259816,
"grad_norm": 16.198984867944553,
"learning_rate": 8.652170629795405e-06,
"loss": 2.8398876190185547,
"step": 1953
},
{
"epoch": 0.9471643238002908,
"grad_norm": 17.889498234118495,
"learning_rate": 8.650243967426378e-06,
"loss": 2.6630845069885254,
"step": 1954
},
{
"epoch": 0.9476490547746,
"grad_norm": 7.092160778282363,
"learning_rate": 8.6483161438461e-06,
"loss": 2.4187560081481934,
"step": 1955
},
{
"epoch": 0.9481337857489094,
"grad_norm": 15.005228089262587,
"learning_rate": 8.646387159667847e-06,
"loss": 2.662118434906006,
"step": 1956
},
{
"epoch": 0.9486185167232186,
"grad_norm": 47.348510330427835,
"learning_rate": 8.644457015505262e-06,
"loss": 2.7657558917999268,
"step": 1957
},
{
"epoch": 0.9491032476975279,
"grad_norm": 9.438123085801129,
"learning_rate": 8.642525711972367e-06,
"loss": 2.6979575157165527,
"step": 1958
},
{
"epoch": 0.9495879786718371,
"grad_norm": 7.33928227968113,
"learning_rate": 8.64059324968354e-06,
"loss": 2.9913055896759033,
"step": 1959
},
{
"epoch": 0.9500727096461464,
"grad_norm": 9.27292399135283,
"learning_rate": 8.638659629253536e-06,
"loss": 2.908094882965088,
"step": 1960
},
{
"epoch": 0.9505574406204557,
"grad_norm": 6.9885646079117505,
"learning_rate": 8.636724851297477e-06,
"loss": 2.240222930908203,
"step": 1961
},
{
"epoch": 0.9510421715947649,
"grad_norm": 14.622792743298906,
"learning_rate": 8.634788916430853e-06,
"loss": 2.8597612380981445,
"step": 1962
},
{
"epoch": 0.9515269025690741,
"grad_norm": 14.052859305092591,
"learning_rate": 8.632851825269514e-06,
"loss": 2.6397294998168945,
"step": 1963
},
{
"epoch": 0.9520116335433835,
"grad_norm": 5.1873066461555055,
"learning_rate": 8.630913578429695e-06,
"loss": 2.082341194152832,
"step": 1964
},
{
"epoch": 0.9524963645176927,
"grad_norm": 15.191524085746007,
"learning_rate": 8.628974176527982e-06,
"loss": 4.117185592651367,
"step": 1965
},
{
"epoch": 0.9529810954920019,
"grad_norm": 10.562366807347752,
"learning_rate": 8.62703362018134e-06,
"loss": 2.959322690963745,
"step": 1966
},
{
"epoch": 0.9534658264663112,
"grad_norm": 5.99510854720358,
"learning_rate": 8.625091910007093e-06,
"loss": 2.5222291946411133,
"step": 1967
},
{
"epoch": 0.9539505574406204,
"grad_norm": 11.505337680273069,
"learning_rate": 8.623149046622934e-06,
"loss": 2.365757703781128,
"step": 1968
},
{
"epoch": 0.9544352884149298,
"grad_norm": 7.791023608464084,
"learning_rate": 8.62120503064693e-06,
"loss": 2.465360164642334,
"step": 1969
},
{
"epoch": 0.954920019389239,
"grad_norm": 11.138191225205269,
"learning_rate": 8.619259862697504e-06,
"loss": 2.3364317417144775,
"step": 1970
},
{
"epoch": 0.9554047503635482,
"grad_norm": 12.21627912943736,
"learning_rate": 8.617313543393454e-06,
"loss": 3.296461582183838,
"step": 1971
},
{
"epoch": 0.9558894813378574,
"grad_norm": 8.350118949950927,
"learning_rate": 8.615366073353937e-06,
"loss": 3.0171380043029785,
"step": 1972
},
{
"epoch": 0.9563742123121668,
"grad_norm": 9.580508217849115,
"learning_rate": 8.613417453198484e-06,
"loss": 1.6231756210327148,
"step": 1973
},
{
"epoch": 0.956858943286476,
"grad_norm": 7.451456553797411,
"learning_rate": 8.611467683546986e-06,
"loss": 2.951653003692627,
"step": 1974
},
{
"epoch": 0.9573436742607853,
"grad_norm": 9.245339690769237,
"learning_rate": 8.609516765019699e-06,
"loss": 2.590963125228882,
"step": 1975
},
{
"epoch": 0.9578284052350945,
"grad_norm": 7.901238962361197,
"learning_rate": 8.607564698237251e-06,
"loss": 2.85538387298584,
"step": 1976
},
{
"epoch": 0.9583131362094037,
"grad_norm": 10.799887253493058,
"learning_rate": 8.605611483820628e-06,
"loss": 2.850358009338379,
"step": 1977
},
{
"epoch": 0.9587978671837131,
"grad_norm": 18.231060392213262,
"learning_rate": 8.603657122391186e-06,
"loss": 2.876830577850342,
"step": 1978
},
{
"epoch": 0.9592825981580223,
"grad_norm": 14.18970902406236,
"learning_rate": 8.601701614570644e-06,
"loss": 2.330190658569336,
"step": 1979
},
{
"epoch": 0.9597673291323315,
"grad_norm": 22.464151724822027,
"learning_rate": 8.599744960981085e-06,
"loss": 3.1165943145751953,
"step": 1980
},
{
"epoch": 0.9602520601066408,
"grad_norm": 15.23275305518031,
"learning_rate": 8.597787162244957e-06,
"loss": 2.0315256118774414,
"step": 1981
},
{
"epoch": 0.9607367910809501,
"grad_norm": 11.577436999271638,
"learning_rate": 8.595828218985074e-06,
"loss": 1.8104982376098633,
"step": 1982
},
{
"epoch": 0.9612215220552593,
"grad_norm": 14.690398024059737,
"learning_rate": 8.593868131824612e-06,
"loss": 3.135465145111084,
"step": 1983
},
{
"epoch": 0.9617062530295686,
"grad_norm": 12.737470614446694,
"learning_rate": 8.591906901387112e-06,
"loss": 2.756223201751709,
"step": 1984
},
{
"epoch": 0.9621909840038778,
"grad_norm": 12.61011715638845,
"learning_rate": 8.589944528296476e-06,
"loss": 2.684046506881714,
"step": 1985
},
{
"epoch": 0.9626757149781872,
"grad_norm": 10.61553363080006,
"learning_rate": 8.587981013176976e-06,
"loss": 3.2455074787139893,
"step": 1986
},
{
"epoch": 0.9631604459524964,
"grad_norm": 7.220975447077769,
"learning_rate": 8.58601635665324e-06,
"loss": 2.8824968338012695,
"step": 1987
},
{
"epoch": 0.9636451769268056,
"grad_norm": 8.393492032886979,
"learning_rate": 8.584050559350261e-06,
"loss": 2.444610834121704,
"step": 1988
},
{
"epoch": 0.9641299079011149,
"grad_norm": 5.7179498991540365,
"learning_rate": 8.5820836218934e-06,
"loss": 2.524416446685791,
"step": 1989
},
{
"epoch": 0.9646146388754241,
"grad_norm": 18.368532646723953,
"learning_rate": 8.580115544908374e-06,
"loss": 2.6269724369049072,
"step": 1990
},
{
"epoch": 0.9650993698497334,
"grad_norm": 11.18399784501954,
"learning_rate": 8.578146329021266e-06,
"loss": 2.71397066116333,
"step": 1991
},
{
"epoch": 0.9655841008240427,
"grad_norm": 11.550580925240194,
"learning_rate": 8.576175974858521e-06,
"loss": 2.560657501220703,
"step": 1992
},
{
"epoch": 0.9660688317983519,
"grad_norm": 11.803840762284699,
"learning_rate": 8.574204483046946e-06,
"loss": 2.741572380065918,
"step": 1993
},
{
"epoch": 0.9665535627726611,
"grad_norm": 9.362121495691312,
"learning_rate": 8.572231854213707e-06,
"loss": 2.7943296432495117,
"step": 1994
},
{
"epoch": 0.9670382937469705,
"grad_norm": 10.943054761805573,
"learning_rate": 8.570258088986339e-06,
"loss": 2.957057476043701,
"step": 1995
},
{
"epoch": 0.9675230247212797,
"grad_norm": 9.279277250841538,
"learning_rate": 8.56828318799273e-06,
"loss": 2.8321995735168457,
"step": 1996
},
{
"epoch": 0.968007755695589,
"grad_norm": 11.898469977063716,
"learning_rate": 8.566307151861132e-06,
"loss": 2.8590781688690186,
"step": 1997
},
{
"epoch": 0.9684924866698982,
"grad_norm": 11.230356293804666,
"learning_rate": 8.564329981220164e-06,
"loss": 3.115314483642578,
"step": 1998
},
{
"epoch": 0.9689772176442074,
"grad_norm": 5.6441616162360955,
"learning_rate": 8.5623516766988e-06,
"loss": 1.5555663108825684,
"step": 1999
},
{
"epoch": 0.9694619486185168,
"grad_norm": 10.407274906482309,
"learning_rate": 8.560372238926372e-06,
"loss": 2.658076047897339,
"step": 2000
},
{
"epoch": 0.969946679592826,
"grad_norm": 10.139237213723646,
"learning_rate": 8.55839166853258e-06,
"loss": 1.8371249437332153,
"step": 2001
},
{
"epoch": 0.9704314105671352,
"grad_norm": 10.5390649172521,
"learning_rate": 8.55640996614748e-06,
"loss": 3.0024967193603516,
"step": 2002
},
{
"epoch": 0.9709161415414445,
"grad_norm": 5.738459844970122,
"learning_rate": 8.554427132401488e-06,
"loss": 2.720551013946533,
"step": 2003
},
{
"epoch": 0.9714008725157538,
"grad_norm": 10.236736048692789,
"learning_rate": 8.552443167925382e-06,
"loss": 2.6168856620788574,
"step": 2004
},
{
"epoch": 0.971885603490063,
"grad_norm": 11.45429979451379,
"learning_rate": 8.550458073350296e-06,
"loss": 2.7450809478759766,
"step": 2005
},
{
"epoch": 0.9723703344643723,
"grad_norm": 10.440307804965062,
"learning_rate": 8.548471849307729e-06,
"loss": 2.634845733642578,
"step": 2006
},
{
"epoch": 0.9728550654386815,
"grad_norm": 11.493989096832841,
"learning_rate": 8.546484496429535e-06,
"loss": 2.8418378829956055,
"step": 2007
},
{
"epoch": 0.9733397964129908,
"grad_norm": 10.762609925559124,
"learning_rate": 8.544496015347927e-06,
"loss": 2.530881404876709,
"step": 2008
},
{
"epoch": 0.9738245273873001,
"grad_norm": 8.112460492175925,
"learning_rate": 8.542506406695478e-06,
"loss": 2.2933056354522705,
"step": 2009
},
{
"epoch": 0.9743092583616093,
"grad_norm": 19.183969797909747,
"learning_rate": 8.540515671105122e-06,
"loss": 2.6151843070983887,
"step": 2010
},
{
"epoch": 0.9747939893359185,
"grad_norm": 11.260141678995483,
"learning_rate": 8.53852380921015e-06,
"loss": 3.0902421474456787,
"step": 2011
},
{
"epoch": 0.9752787203102278,
"grad_norm": 9.991637592184805,
"learning_rate": 8.53653082164421e-06,
"loss": 2.453904628753662,
"step": 2012
},
{
"epoch": 0.9757634512845371,
"grad_norm": 7.880980030246133,
"learning_rate": 8.534536709041305e-06,
"loss": 2.32138729095459,
"step": 2013
},
{
"epoch": 0.9762481822588464,
"grad_norm": 7.030163249976755,
"learning_rate": 8.532541472035805e-06,
"loss": 2.985018730163574,
"step": 2014
},
{
"epoch": 0.9767329132331556,
"grad_norm": 11.137166330949626,
"learning_rate": 8.530545111262432e-06,
"loss": 2.5624194145202637,
"step": 2015
},
{
"epoch": 0.9772176442074648,
"grad_norm": 15.367723858146588,
"learning_rate": 8.528547627356262e-06,
"loss": 3.226872682571411,
"step": 2016
},
{
"epoch": 0.9777023751817742,
"grad_norm": 7.696763148545897,
"learning_rate": 8.526549020952733e-06,
"loss": 3.049192428588867,
"step": 2017
},
{
"epoch": 0.9781871061560834,
"grad_norm": 11.484439117326177,
"learning_rate": 8.524549292687644e-06,
"loss": 2.100214958190918,
"step": 2018
},
{
"epoch": 0.9786718371303926,
"grad_norm": 11.002339159153616,
"learning_rate": 8.52254844319714e-06,
"loss": 2.221618890762329,
"step": 2019
},
{
"epoch": 0.9791565681047019,
"grad_norm": 8.801223822446165,
"learning_rate": 8.520546473117735e-06,
"loss": 2.6317431926727295,
"step": 2020
},
{
"epoch": 0.9796412990790111,
"grad_norm": 10.242510063278894,
"learning_rate": 8.518543383086286e-06,
"loss": 2.7983436584472656,
"step": 2021
},
{
"epoch": 0.9801260300533204,
"grad_norm": 17.02780575148533,
"learning_rate": 8.51653917374002e-06,
"loss": 2.4788575172424316,
"step": 2022
},
{
"epoch": 0.9806107610276297,
"grad_norm": 14.436644111042112,
"learning_rate": 8.514533845716512e-06,
"loss": 2.515129566192627,
"step": 2023
},
{
"epoch": 0.9810954920019389,
"grad_norm": 9.198820727517058,
"learning_rate": 8.512527399653693e-06,
"loss": 2.9626872539520264,
"step": 2024
},
{
"epoch": 0.9815802229762481,
"grad_norm": 8.567321433255588,
"learning_rate": 8.510519836189853e-06,
"loss": 2.728564739227295,
"step": 2025
},
{
"epoch": 0.9820649539505575,
"grad_norm": 8.076658090034295,
"learning_rate": 8.508511155963633e-06,
"loss": 3.085002899169922,
"step": 2026
},
{
"epoch": 0.9825496849248667,
"grad_norm": 8.38409775849158,
"learning_rate": 8.506501359614035e-06,
"loss": 2.369558334350586,
"step": 2027
},
{
"epoch": 0.983034415899176,
"grad_norm": 12.7459490574201,
"learning_rate": 8.50449044778041e-06,
"loss": 3.1477346420288086,
"step": 2028
},
{
"epoch": 0.9835191468734852,
"grad_norm": 18.522255296571895,
"learning_rate": 8.50247842110247e-06,
"loss": 2.8857433795928955,
"step": 2029
},
{
"epoch": 0.9840038778477945,
"grad_norm": 7.376066826097202,
"learning_rate": 8.500465280220278e-06,
"loss": 2.665149450302124,
"step": 2030
},
{
"epoch": 0.9844886088221038,
"grad_norm": 14.196270846977388,
"learning_rate": 8.49845102577425e-06,
"loss": 2.5080814361572266,
"step": 2031
},
{
"epoch": 0.984973339796413,
"grad_norm": 11.24230276515079,
"learning_rate": 8.49643565840516e-06,
"loss": 2.430192708969116,
"step": 2032
},
{
"epoch": 0.9854580707707222,
"grad_norm": 7.470177314345939,
"learning_rate": 8.494419178754135e-06,
"loss": 2.34472918510437,
"step": 2033
},
{
"epoch": 0.9859428017450315,
"grad_norm": 8.775584837593241,
"learning_rate": 8.492401587462651e-06,
"loss": 3.179020881652832,
"step": 2034
},
{
"epoch": 0.9864275327193408,
"grad_norm": 9.064514798192343,
"learning_rate": 8.490382885172545e-06,
"loss": 2.597214460372925,
"step": 2035
},
{
"epoch": 0.98691226369365,
"grad_norm": 11.783044102472884,
"learning_rate": 8.488363072526004e-06,
"loss": 3.0729503631591797,
"step": 2036
},
{
"epoch": 0.9873969946679593,
"grad_norm": 8.283556086916786,
"learning_rate": 8.486342150165569e-06,
"loss": 2.728915214538574,
"step": 2037
},
{
"epoch": 0.9878817256422685,
"grad_norm": 18.25906669624224,
"learning_rate": 8.484320118734133e-06,
"loss": 3.169036388397217,
"step": 2038
},
{
"epoch": 0.9883664566165778,
"grad_norm": 7.799204644443294,
"learning_rate": 8.48229697887494e-06,
"loss": 2.464231252670288,
"step": 2039
},
{
"epoch": 0.9888511875908871,
"grad_norm": 13.136401007284755,
"learning_rate": 8.480272731231591e-06,
"loss": 2.9758846759796143,
"step": 2040
},
{
"epoch": 0.9893359185651963,
"grad_norm": 17.011923264987207,
"learning_rate": 8.478247376448037e-06,
"loss": 2.662714958190918,
"step": 2041
},
{
"epoch": 0.9898206495395055,
"grad_norm": 8.929811995194102,
"learning_rate": 8.47622091516858e-06,
"loss": 2.585850715637207,
"step": 2042
},
{
"epoch": 0.9903053805138148,
"grad_norm": 12.062565253219923,
"learning_rate": 8.474193348037878e-06,
"loss": 2.8015387058258057,
"step": 2043
},
{
"epoch": 0.9907901114881241,
"grad_norm": 17.384917139023695,
"learning_rate": 8.472164675700937e-06,
"loss": 2.869448661804199,
"step": 2044
},
{
"epoch": 0.9912748424624334,
"grad_norm": 14.891192300894122,
"learning_rate": 8.470134898803113e-06,
"loss": 2.25075101852417,
"step": 2045
},
{
"epoch": 0.9917595734367426,
"grad_norm": 11.658511874678085,
"learning_rate": 8.468104017990121e-06,
"loss": 2.4524435997009277,
"step": 2046
},
{
"epoch": 0.9922443044110518,
"grad_norm": 10.705700202034317,
"learning_rate": 8.46607203390802e-06,
"loss": 2.6521849632263184,
"step": 2047
},
{
"epoch": 0.9927290353853612,
"grad_norm": 54.884469310003084,
"learning_rate": 8.464038947203224e-06,
"loss": 2.1187703609466553,
"step": 2048
},
{
"epoch": 0.9932137663596704,
"grad_norm": 19.287996230249547,
"learning_rate": 8.462004758522492e-06,
"loss": 2.6402831077575684,
"step": 2049
},
{
"epoch": 0.9936984973339796,
"grad_norm": 15.960868025124041,
"learning_rate": 8.459969468512943e-06,
"loss": 2.4937515258789062,
"step": 2050
},
{
"epoch": 0.9941832283082889,
"grad_norm": 14.089049899801687,
"learning_rate": 8.457933077822037e-06,
"loss": 2.7609357833862305,
"step": 2051
},
{
"epoch": 0.9946679592825982,
"grad_norm": 7.723928525891312,
"learning_rate": 8.455895587097588e-06,
"loss": 3.142179012298584,
"step": 2052
},
{
"epoch": 0.9951526902569074,
"grad_norm": 7.561912043294194,
"learning_rate": 8.453856996987766e-06,
"loss": 2.5077853202819824,
"step": 2053
},
{
"epoch": 0.9956374212312167,
"grad_norm": 8.176636859726068,
"learning_rate": 8.451817308141077e-06,
"loss": 2.785898447036743,
"step": 2054
},
{
"epoch": 0.9961221522055259,
"grad_norm": 8.069021580378392,
"learning_rate": 8.44977652120639e-06,
"loss": 2.983569622039795,
"step": 2055
},
{
"epoch": 0.9966068831798351,
"grad_norm": 12.267780017000893,
"learning_rate": 8.447734636832914e-06,
"loss": 2.7087275981903076,
"step": 2056
},
{
"epoch": 0.9970916141541445,
"grad_norm": 11.454804443184823,
"learning_rate": 8.445691655670213e-06,
"loss": 2.8633995056152344,
"step": 2057
},
{
"epoch": 0.9975763451284537,
"grad_norm": 15.942504804111689,
"learning_rate": 8.443647578368196e-06,
"loss": 2.964904308319092,
"step": 2058
},
{
"epoch": 0.998061076102763,
"grad_norm": 9.731423432654134,
"learning_rate": 8.441602405577127e-06,
"loss": 2.7250185012817383,
"step": 2059
},
{
"epoch": 0.9985458070770722,
"grad_norm": 19.727810870568007,
"learning_rate": 8.439556137947607e-06,
"loss": 3.20878529548645,
"step": 2060
},
{
"epoch": 0.9990305380513815,
"grad_norm": 6.424936013884626,
"learning_rate": 8.437508776130598e-06,
"loss": 2.5987377166748047,
"step": 2061
},
{
"epoch": 0.9995152690256908,
"grad_norm": 15.371606562917329,
"learning_rate": 8.435460320777401e-06,
"loss": 3.223637580871582,
"step": 2062
},
{
"epoch": 1.0,
"grad_norm": 13.28158948731056,
"learning_rate": 8.43341077253967e-06,
"loss": 2.601677417755127,
"step": 2063
},
{
"epoch": 1.0004847309743092,
"grad_norm": 13.342521737780528,
"learning_rate": 8.431360132069403e-06,
"loss": 1.5718538761138916,
"step": 2064
},
{
"epoch": 1.0009694619486185,
"grad_norm": 13.360360696702852,
"learning_rate": 8.429308400018949e-06,
"loss": 1.378113031387329,
"step": 2065
},
{
"epoch": 1.0014541929229277,
"grad_norm": 8.919363001637661,
"learning_rate": 8.427255577041002e-06,
"loss": 1.4751250743865967,
"step": 2066
},
{
"epoch": 1.001938923897237,
"grad_norm": 11.856082826722126,
"learning_rate": 8.425201663788604e-06,
"loss": 1.8071658611297607,
"step": 2067
},
{
"epoch": 1.0024236548715464,
"grad_norm": 11.291300776533538,
"learning_rate": 8.423146660915141e-06,
"loss": 1.5255979299545288,
"step": 2068
},
{
"epoch": 1.0029083858458556,
"grad_norm": 12.780408220564844,
"learning_rate": 8.421090569074351e-06,
"loss": 2.5664989948272705,
"step": 2069
},
{
"epoch": 1.0033931168201649,
"grad_norm": 15.877918807130007,
"learning_rate": 8.419033388920315e-06,
"loss": 1.5010913610458374,
"step": 2070
},
{
"epoch": 1.003877847794474,
"grad_norm": 11.072138932508972,
"learning_rate": 8.41697512110746e-06,
"loss": 1.4406582117080688,
"step": 2071
},
{
"epoch": 1.0043625787687833,
"grad_norm": 6.228900397450069,
"learning_rate": 8.41491576629056e-06,
"loss": 1.6853134632110596,
"step": 2072
},
{
"epoch": 1.0048473097430926,
"grad_norm": 8.754431978391612,
"learning_rate": 8.412855325124734e-06,
"loss": 1.4524598121643066,
"step": 2073
},
{
"epoch": 1.0053320407174018,
"grad_norm": 12.422102825308336,
"learning_rate": 8.410793798265449e-06,
"loss": 1.4226226806640625,
"step": 2074
},
{
"epoch": 1.005816771691711,
"grad_norm": 10.798029218437337,
"learning_rate": 8.40873118636851e-06,
"loss": 1.1664376258850098,
"step": 2075
},
{
"epoch": 1.0063015026660203,
"grad_norm": 12.331262894880298,
"learning_rate": 8.406667490090082e-06,
"loss": 1.8079708814620972,
"step": 2076
},
{
"epoch": 1.0067862336403297,
"grad_norm": 12.623706277071163,
"learning_rate": 8.404602710086656e-06,
"loss": 1.6387643814086914,
"step": 2077
},
{
"epoch": 1.007270964614639,
"grad_norm": 10.106725485102823,
"learning_rate": 8.402536847015083e-06,
"loss": 1.4902422428131104,
"step": 2078
},
{
"epoch": 1.0077556955889482,
"grad_norm": 16.711146913107722,
"learning_rate": 8.40046990153255e-06,
"loss": 1.4201444387435913,
"step": 2079
},
{
"epoch": 1.0082404265632574,
"grad_norm": 13.438416976974038,
"learning_rate": 8.398401874296595e-06,
"loss": 1.7784080505371094,
"step": 2080
},
{
"epoch": 1.0087251575375666,
"grad_norm": 13.945319206060931,
"learning_rate": 8.396332765965091e-06,
"loss": 1.1514551639556885,
"step": 2081
},
{
"epoch": 1.0092098885118759,
"grad_norm": 16.688341903140312,
"learning_rate": 8.394262577196266e-06,
"loss": 2.784750461578369,
"step": 2082
},
{
"epoch": 1.009694619486185,
"grad_norm": 12.720710200901378,
"learning_rate": 8.392191308648683e-06,
"loss": 2.1893866062164307,
"step": 2083
},
{
"epoch": 1.0101793504604943,
"grad_norm": 9.73547437664745,
"learning_rate": 8.390118960981251e-06,
"loss": 1.8728020191192627,
"step": 2084
},
{
"epoch": 1.0106640814348036,
"grad_norm": 18.709887964543285,
"learning_rate": 8.388045534853222e-06,
"loss": 1.1610491275787354,
"step": 2085
},
{
"epoch": 1.011148812409113,
"grad_norm": 9.909996942508089,
"learning_rate": 8.385971030924197e-06,
"loss": 2.317023515701294,
"step": 2086
},
{
"epoch": 1.0116335433834223,
"grad_norm": 10.510631683205753,
"learning_rate": 8.383895449854107e-06,
"loss": 1.4001628160476685,
"step": 2087
},
{
"epoch": 1.0121182743577315,
"grad_norm": 12.691943039454204,
"learning_rate": 8.381818792303239e-06,
"loss": 0.7575283050537109,
"step": 2088
},
{
"epoch": 1.0126030053320407,
"grad_norm": 12.80958826639515,
"learning_rate": 8.379741058932215e-06,
"loss": 1.3837909698486328,
"step": 2089
},
{
"epoch": 1.01308773630635,
"grad_norm": 37.82742473742007,
"learning_rate": 8.377662250402001e-06,
"loss": 1.698325753211975,
"step": 2090
},
{
"epoch": 1.0135724672806592,
"grad_norm": 11.38124205645569,
"learning_rate": 8.375582367373907e-06,
"loss": 1.3107880353927612,
"step": 2091
},
{
"epoch": 1.0140571982549684,
"grad_norm": 13.9514384218975,
"learning_rate": 8.37350141050958e-06,
"loss": 1.2033599615097046,
"step": 2092
},
{
"epoch": 1.0145419292292777,
"grad_norm": 12.547146851150929,
"learning_rate": 8.371419380471013e-06,
"loss": 1.6389203071594238,
"step": 2093
},
{
"epoch": 1.0150266602035871,
"grad_norm": 11.792925502220035,
"learning_rate": 8.369336277920537e-06,
"loss": 1.4581947326660156,
"step": 2094
},
{
"epoch": 1.0155113911778963,
"grad_norm": 18.400924327282763,
"learning_rate": 8.36725210352083e-06,
"loss": 1.2591956853866577,
"step": 2095
},
{
"epoch": 1.0159961221522056,
"grad_norm": 11.219567044098365,
"learning_rate": 8.365166857934904e-06,
"loss": 1.678415060043335,
"step": 2096
},
{
"epoch": 1.0164808531265148,
"grad_norm": 9.499883641396215,
"learning_rate": 8.363080541826116e-06,
"loss": 1.5941039323806763,
"step": 2097
},
{
"epoch": 1.016965584100824,
"grad_norm": 13.161737502638612,
"learning_rate": 8.360993155858161e-06,
"loss": 2.0408997535705566,
"step": 2098
},
{
"epoch": 1.0174503150751333,
"grad_norm": 19.203184166157495,
"learning_rate": 8.358904700695078e-06,
"loss": 1.949871301651001,
"step": 2099
},
{
"epoch": 1.0179350460494425,
"grad_norm": 11.341557102811414,
"learning_rate": 8.356815177001243e-06,
"loss": 0.9234069585800171,
"step": 2100
},
{
"epoch": 1.0184197770237517,
"grad_norm": 11.094143903334894,
"learning_rate": 8.354724585441372e-06,
"loss": 1.0520992279052734,
"step": 2101
},
{
"epoch": 1.018904507998061,
"grad_norm": 12.996917449860357,
"learning_rate": 8.352632926680522e-06,
"loss": 1.7282954454421997,
"step": 2102
},
{
"epoch": 1.0193892389723704,
"grad_norm": 9.773849251575301,
"learning_rate": 8.35054020138409e-06,
"loss": 1.3652217388153076,
"step": 2103
},
{
"epoch": 1.0198739699466797,
"grad_norm": 11.912250909024241,
"learning_rate": 8.348446410217808e-06,
"loss": 1.486652135848999,
"step": 2104
},
{
"epoch": 1.020358700920989,
"grad_norm": 20.632405903075043,
"learning_rate": 8.346351553847754e-06,
"loss": 1.3055459260940552,
"step": 2105
},
{
"epoch": 1.0208434318952981,
"grad_norm": 10.715642188760702,
"learning_rate": 8.34425563294034e-06,
"loss": 1.4878087043762207,
"step": 2106
},
{
"epoch": 1.0213281628696074,
"grad_norm": 11.808352597527653,
"learning_rate": 8.342158648162315e-06,
"loss": 1.3772530555725098,
"step": 2107
},
{
"epoch": 1.0218128938439166,
"grad_norm": 11.730797483502583,
"learning_rate": 8.340060600180776e-06,
"loss": 2.3520209789276123,
"step": 2108
},
{
"epoch": 1.0222976248182258,
"grad_norm": 7.319472270853378,
"learning_rate": 8.337961489663144e-06,
"loss": 1.170849323272705,
"step": 2109
},
{
"epoch": 1.022782355792535,
"grad_norm": 10.639472666874115,
"learning_rate": 8.33586131727719e-06,
"loss": 1.6360126733779907,
"step": 2110
},
{
"epoch": 1.0232670867668443,
"grad_norm": 12.100091562162893,
"learning_rate": 8.333760083691018e-06,
"loss": 1.3012382984161377,
"step": 2111
},
{
"epoch": 1.0237518177411538,
"grad_norm": 10.583181705460937,
"learning_rate": 8.331657789573067e-06,
"loss": 1.3660626411437988,
"step": 2112
},
{
"epoch": 1.024236548715463,
"grad_norm": 9.07374270859253,
"learning_rate": 8.329554435592122e-06,
"loss": 1.6993227005004883,
"step": 2113
},
{
"epoch": 1.0247212796897722,
"grad_norm": 9.201888912091002,
"learning_rate": 8.327450022417294e-06,
"loss": 1.9003961086273193,
"step": 2114
},
{
"epoch": 1.0252060106640815,
"grad_norm": 9.35512233568713,
"learning_rate": 8.325344550718037e-06,
"loss": 1.8023403882980347,
"step": 2115
},
{
"epoch": 1.0256907416383907,
"grad_norm": 11.191860077937681,
"learning_rate": 8.323238021164146e-06,
"loss": 0.8760477304458618,
"step": 2116
},
{
"epoch": 1.0261754726127,
"grad_norm": 8.382499303278971,
"learning_rate": 8.32113043442574e-06,
"loss": 1.8204891681671143,
"step": 2117
},
{
"epoch": 1.0266602035870092,
"grad_norm": 11.916595720599275,
"learning_rate": 8.31902179117329e-06,
"loss": 1.1426283121109009,
"step": 2118
},
{
"epoch": 1.0271449345613184,
"grad_norm": 21.685961526377206,
"learning_rate": 8.316912092077588e-06,
"loss": 1.468607783317566,
"step": 2119
},
{
"epoch": 1.0276296655356276,
"grad_norm": 11.723644488420609,
"learning_rate": 8.314801337809775e-06,
"loss": 1.0401384830474854,
"step": 2120
},
{
"epoch": 1.028114396509937,
"grad_norm": 8.363636703204557,
"learning_rate": 8.312689529041318e-06,
"loss": 2.067119598388672,
"step": 2121
},
{
"epoch": 1.0285991274842463,
"grad_norm": 9.288829034289224,
"learning_rate": 8.310576666444021e-06,
"loss": 1.2174421548843384,
"step": 2122
},
{
"epoch": 1.0290838584585555,
"grad_norm": 14.271405332844505,
"learning_rate": 8.308462750690032e-06,
"loss": 1.301609992980957,
"step": 2123
},
{
"epoch": 1.0295685894328648,
"grad_norm": 10.230830116189304,
"learning_rate": 8.306347782451822e-06,
"loss": 1.310603141784668,
"step": 2124
},
{
"epoch": 1.030053320407174,
"grad_norm": 10.261959447506268,
"learning_rate": 8.304231762402203e-06,
"loss": 1.572640299797058,
"step": 2125
},
{
"epoch": 1.0305380513814832,
"grad_norm": 11.413980328695075,
"learning_rate": 8.302114691214323e-06,
"loss": 1.050504207611084,
"step": 2126
},
{
"epoch": 1.0310227823557925,
"grad_norm": 13.404665239269066,
"learning_rate": 8.29999656956166e-06,
"loss": 1.9220675230026245,
"step": 2127
},
{
"epoch": 1.0315075133301017,
"grad_norm": 11.73617509977592,
"learning_rate": 8.297877398118029e-06,
"loss": 1.2858209609985352,
"step": 2128
},
{
"epoch": 1.0319922443044112,
"grad_norm": 15.48484981497742,
"learning_rate": 8.29575717755758e-06,
"loss": 1.0760868787765503,
"step": 2129
},
{
"epoch": 1.0324769752787204,
"grad_norm": 23.649416636097587,
"learning_rate": 8.29363590855479e-06,
"loss": 2.264504909515381,
"step": 2130
},
{
"epoch": 1.0329617062530296,
"grad_norm": 14.937574579390839,
"learning_rate": 8.291513591784479e-06,
"loss": 1.5348783731460571,
"step": 2131
},
{
"epoch": 1.0334464372273389,
"grad_norm": 7.735745451603059,
"learning_rate": 8.289390227921797e-06,
"loss": 0.8413087129592896,
"step": 2132
},
{
"epoch": 1.033931168201648,
"grad_norm": 13.882677959680652,
"learning_rate": 8.28726581764222e-06,
"loss": 1.9933491945266724,
"step": 2133
},
{
"epoch": 1.0344158991759573,
"grad_norm": 11.773950422285546,
"learning_rate": 8.28514036162157e-06,
"loss": 1.6544911861419678,
"step": 2134
},
{
"epoch": 1.0349006301502666,
"grad_norm": 11.513517041325796,
"learning_rate": 8.28301386053599e-06,
"loss": 1.8610843420028687,
"step": 2135
},
{
"epoch": 1.0353853611245758,
"grad_norm": 9.773883867744415,
"learning_rate": 8.28088631506196e-06,
"loss": 1.2537195682525635,
"step": 2136
},
{
"epoch": 1.035870092098885,
"grad_norm": 14.965755768899603,
"learning_rate": 8.278757725876296e-06,
"loss": 1.6357024908065796,
"step": 2137
},
{
"epoch": 1.0363548230731943,
"grad_norm": 15.618635431083867,
"learning_rate": 8.276628093656136e-06,
"loss": 1.8704427480697632,
"step": 2138
},
{
"epoch": 1.0368395540475037,
"grad_norm": 12.527391805842443,
"learning_rate": 8.274497419078961e-06,
"loss": 2.07002329826355,
"step": 2139
},
{
"epoch": 1.037324285021813,
"grad_norm": 13.372328834417731,
"learning_rate": 8.272365702822577e-06,
"loss": 1.0981192588806152,
"step": 2140
},
{
"epoch": 1.0378090159961222,
"grad_norm": 12.884676237946035,
"learning_rate": 8.270232945565127e-06,
"loss": 1.322986125946045,
"step": 2141
},
{
"epoch": 1.0382937469704314,
"grad_norm": 8.994641733672193,
"learning_rate": 8.268099147985074e-06,
"loss": 1.7671488523483276,
"step": 2142
},
{
"epoch": 1.0387784779447407,
"grad_norm": 9.889980187799338,
"learning_rate": 8.265964310761223e-06,
"loss": 1.7539520263671875,
"step": 2143
},
{
"epoch": 1.0392632089190499,
"grad_norm": 10.719718914633026,
"learning_rate": 8.263828434572705e-06,
"loss": 1.9354057312011719,
"step": 2144
},
{
"epoch": 1.0397479398933591,
"grad_norm": 7.835773080620412,
"learning_rate": 8.261691520098985e-06,
"loss": 1.7045327425003052,
"step": 2145
},
{
"epoch": 1.0402326708676684,
"grad_norm": 8.286066311218411,
"learning_rate": 8.259553568019852e-06,
"loss": 1.4101792573928833,
"step": 2146
},
{
"epoch": 1.0407174018419778,
"grad_norm": 13.218701846952731,
"learning_rate": 8.25741457901543e-06,
"loss": 1.030219316482544,
"step": 2147
},
{
"epoch": 1.041202132816287,
"grad_norm": 12.211134381670774,
"learning_rate": 8.255274553766173e-06,
"loss": 2.3467471599578857,
"step": 2148
},
{
"epoch": 1.0416868637905963,
"grad_norm": 23.379152412707626,
"learning_rate": 8.253133492952863e-06,
"loss": 1.717346429824829,
"step": 2149
},
{
"epoch": 1.0421715947649055,
"grad_norm": 16.612001573805564,
"learning_rate": 8.250991397256609e-06,
"loss": 1.4836816787719727,
"step": 2150
},
{
"epoch": 1.0426563257392147,
"grad_norm": 9.936110582357049,
"learning_rate": 8.248848267358856e-06,
"loss": 1.7558997869491577,
"step": 2151
},
{
"epoch": 1.043141056713524,
"grad_norm": 16.920111364227434,
"learning_rate": 8.246704103941369e-06,
"loss": 1.6846768856048584,
"step": 2152
},
{
"epoch": 1.0436257876878332,
"grad_norm": 11.25517885590575,
"learning_rate": 8.244558907686252e-06,
"loss": 1.3733316659927368,
"step": 2153
},
{
"epoch": 1.0441105186621424,
"grad_norm": 14.163346138018426,
"learning_rate": 8.242412679275928e-06,
"loss": 0.9169270396232605,
"step": 2154
},
{
"epoch": 1.0445952496364517,
"grad_norm": 8.512838534611678,
"learning_rate": 8.240265419393157e-06,
"loss": 1.9425660371780396,
"step": 2155
},
{
"epoch": 1.0450799806107611,
"grad_norm": 14.44626015181308,
"learning_rate": 8.238117128721017e-06,
"loss": 1.898789882659912,
"step": 2156
},
{
"epoch": 1.0455647115850704,
"grad_norm": 13.84630724696313,
"learning_rate": 8.235967807942924e-06,
"loss": 2.539224147796631,
"step": 2157
},
{
"epoch": 1.0460494425593796,
"grad_norm": 9.260193030343071,
"learning_rate": 8.233817457742617e-06,
"loss": 2.1352100372314453,
"step": 2158
},
{
"epoch": 1.0465341735336888,
"grad_norm": 9.588107333352937,
"learning_rate": 8.23166607880416e-06,
"loss": 1.5350868701934814,
"step": 2159
},
{
"epoch": 1.047018904507998,
"grad_norm": 15.45508530033381,
"learning_rate": 8.229513671811953e-06,
"loss": 1.367403507232666,
"step": 2160
},
{
"epoch": 1.0475036354823073,
"grad_norm": 16.64007394766427,
"learning_rate": 8.22736023745071e-06,
"loss": 2.4164962768554688,
"step": 2161
},
{
"epoch": 1.0479883664566165,
"grad_norm": 8.435274175939991,
"learning_rate": 8.225205776405484e-06,
"loss": 1.6408510208129883,
"step": 2162
},
{
"epoch": 1.0484730974309258,
"grad_norm": 14.15145832839855,
"learning_rate": 8.223050289361646e-06,
"loss": 1.72284734249115,
"step": 2163
},
{
"epoch": 1.048957828405235,
"grad_norm": 10.735226818130172,
"learning_rate": 8.2208937770049e-06,
"loss": 1.2531838417053223,
"step": 2164
},
{
"epoch": 1.0494425593795444,
"grad_norm": 12.856251181327444,
"learning_rate": 8.218736240021271e-06,
"loss": 1.4023059606552124,
"step": 2165
},
{
"epoch": 1.0499272903538537,
"grad_norm": 14.091373748671167,
"learning_rate": 8.216577679097116e-06,
"loss": 1.604212999343872,
"step": 2166
},
{
"epoch": 1.050412021328163,
"grad_norm": 11.257864665222106,
"learning_rate": 8.214418094919108e-06,
"loss": 1.3728293180465698,
"step": 2167
},
{
"epoch": 1.0508967523024721,
"grad_norm": 8.198683419590626,
"learning_rate": 8.212257488174253e-06,
"loss": 1.5410269498825073,
"step": 2168
},
{
"epoch": 1.0513814832767814,
"grad_norm": 15.386897602350635,
"learning_rate": 8.210095859549884e-06,
"loss": 1.0663068294525146,
"step": 2169
},
{
"epoch": 1.0518662142510906,
"grad_norm": 10.558483939471003,
"learning_rate": 8.207933209733654e-06,
"loss": 1.6346923112869263,
"step": 2170
},
{
"epoch": 1.0523509452253998,
"grad_norm": 9.012011479013827,
"learning_rate": 8.20576953941354e-06,
"loss": 1.6921615600585938,
"step": 2171
},
{
"epoch": 1.052835676199709,
"grad_norm": 10.491431086794638,
"learning_rate": 8.20360484927785e-06,
"loss": 1.2218983173370361,
"step": 2172
},
{
"epoch": 1.0533204071740183,
"grad_norm": 11.787089360274154,
"learning_rate": 8.201439140015211e-06,
"loss": 1.4458768367767334,
"step": 2173
},
{
"epoch": 1.0538051381483278,
"grad_norm": 7.908397235713993,
"learning_rate": 8.199272412314574e-06,
"loss": 1.0795042514801025,
"step": 2174
},
{
"epoch": 1.054289869122637,
"grad_norm": 16.66949554392743,
"learning_rate": 8.197104666865218e-06,
"loss": 2.2518651485443115,
"step": 2175
},
{
"epoch": 1.0547746000969462,
"grad_norm": 19.127625699226076,
"learning_rate": 8.194935904356745e-06,
"loss": 1.4185807704925537,
"step": 2176
},
{
"epoch": 1.0552593310712555,
"grad_norm": 9.345783546741396,
"learning_rate": 8.192766125479075e-06,
"loss": 1.0926324129104614,
"step": 2177
},
{
"epoch": 1.0557440620455647,
"grad_norm": 13.971939809030241,
"learning_rate": 8.19059533092246e-06,
"loss": 1.35152268409729,
"step": 2178
},
{
"epoch": 1.056228793019874,
"grad_norm": 19.367010663431408,
"learning_rate": 8.188423521377467e-06,
"loss": 1.194026231765747,
"step": 2179
},
{
"epoch": 1.0567135239941832,
"grad_norm": 10.750928924716849,
"learning_rate": 8.186250697534993e-06,
"loss": 1.5034373998641968,
"step": 2180
},
{
"epoch": 1.0571982549684924,
"grad_norm": 11.06784043466523,
"learning_rate": 8.18407686008625e-06,
"loss": 1.5031787157058716,
"step": 2181
},
{
"epoch": 1.0576829859428019,
"grad_norm": 22.062300199779123,
"learning_rate": 8.181902009722777e-06,
"loss": 1.2370879650115967,
"step": 2182
},
{
"epoch": 1.058167716917111,
"grad_norm": 16.641425165612105,
"learning_rate": 8.17972614713644e-06,
"loss": 1.1452357769012451,
"step": 2183
},
{
"epoch": 1.0586524478914203,
"grad_norm": 26.24055542898261,
"learning_rate": 8.177549273019417e-06,
"loss": 1.8388257026672363,
"step": 2184
},
{
"epoch": 1.0591371788657296,
"grad_norm": 11.254095713222503,
"learning_rate": 8.175371388064212e-06,
"loss": 1.60293447971344,
"step": 2185
},
{
"epoch": 1.0596219098400388,
"grad_norm": 12.175703088197457,
"learning_rate": 8.173192492963653e-06,
"loss": 1.2001619338989258,
"step": 2186
},
{
"epoch": 1.060106640814348,
"grad_norm": 13.947766937507298,
"learning_rate": 8.17101258841089e-06,
"loss": 1.3177313804626465,
"step": 2187
},
{
"epoch": 1.0605913717886573,
"grad_norm": 18.401514085407506,
"learning_rate": 8.168831675099386e-06,
"loss": 1.4793353080749512,
"step": 2188
},
{
"epoch": 1.0610761027629665,
"grad_norm": 11.696425829416672,
"learning_rate": 8.166649753722935e-06,
"loss": 1.2635457515716553,
"step": 2189
},
{
"epoch": 1.0615608337372757,
"grad_norm": 9.735671383282824,
"learning_rate": 8.164466824975648e-06,
"loss": 1.4152415990829468,
"step": 2190
},
{
"epoch": 1.0620455647115852,
"grad_norm": 9.625861345212464,
"learning_rate": 8.162282889551952e-06,
"loss": 2.180312156677246,
"step": 2191
},
{
"epoch": 1.0625302956858944,
"grad_norm": 13.43165826733779,
"learning_rate": 8.160097948146597e-06,
"loss": 1.4941524267196655,
"step": 2192
},
{
"epoch": 1.0630150266602036,
"grad_norm": 12.262732297405314,
"learning_rate": 8.157912001454659e-06,
"loss": 1.60727858543396,
"step": 2193
},
{
"epoch": 1.0634997576345129,
"grad_norm": 9.14085825300931,
"learning_rate": 8.155725050171524e-06,
"loss": 1.950103521347046,
"step": 2194
},
{
"epoch": 1.063984488608822,
"grad_norm": 11.943286969074514,
"learning_rate": 8.153537094992907e-06,
"loss": 1.115267276763916,
"step": 2195
},
{
"epoch": 1.0644692195831313,
"grad_norm": 28.094130435817625,
"learning_rate": 8.151348136614834e-06,
"loss": 1.5407185554504395,
"step": 2196
},
{
"epoch": 1.0649539505574406,
"grad_norm": 8.318932656183192,
"learning_rate": 8.149158175733654e-06,
"loss": 1.8639978170394897,
"step": 2197
},
{
"epoch": 1.0654386815317498,
"grad_norm": 11.478725167374506,
"learning_rate": 8.146967213046037e-06,
"loss": 1.6920052766799927,
"step": 2198
},
{
"epoch": 1.065923412506059,
"grad_norm": 10.295217230659297,
"learning_rate": 8.144775249248969e-06,
"loss": 1.1837260723114014,
"step": 2199
},
{
"epoch": 1.0664081434803685,
"grad_norm": 13.885806510354211,
"learning_rate": 8.142582285039753e-06,
"loss": 2.019367218017578,
"step": 2200
},
{
"epoch": 1.0668928744546777,
"grad_norm": 8.511765077720211,
"learning_rate": 8.140388321116012e-06,
"loss": 1.214223027229309,
"step": 2201
},
{
"epoch": 1.067377605428987,
"grad_norm": 12.146861952524413,
"learning_rate": 8.13819335817569e-06,
"loss": 2.255362033843994,
"step": 2202
},
{
"epoch": 1.0678623364032962,
"grad_norm": 9.935723388560396,
"learning_rate": 8.135997396917044e-06,
"loss": 1.9059261083602905,
"step": 2203
},
{
"epoch": 1.0683470673776054,
"grad_norm": 16.610371681780094,
"learning_rate": 8.13380043803865e-06,
"loss": 1.7593109607696533,
"step": 2204
},
{
"epoch": 1.0688317983519147,
"grad_norm": 9.304621720899181,
"learning_rate": 8.131602482239405e-06,
"loss": 1.7788236141204834,
"step": 2205
},
{
"epoch": 1.069316529326224,
"grad_norm": 10.180840706832534,
"learning_rate": 8.129403530218515e-06,
"loss": 1.7654491662979126,
"step": 2206
},
{
"epoch": 1.0698012603005331,
"grad_norm": 9.661213073131844,
"learning_rate": 8.127203582675515e-06,
"loss": 1.1240934133529663,
"step": 2207
},
{
"epoch": 1.0702859912748424,
"grad_norm": 12.824980630773673,
"learning_rate": 8.125002640310241e-06,
"loss": 2.5355429649353027,
"step": 2208
},
{
"epoch": 1.0707707222491518,
"grad_norm": 7.219427876167571,
"learning_rate": 8.122800703822862e-06,
"loss": 1.150955080986023,
"step": 2209
},
{
"epoch": 1.071255453223461,
"grad_norm": 13.516483558258487,
"learning_rate": 8.120597773913851e-06,
"loss": 2.0926904678344727,
"step": 2210
},
{
"epoch": 1.0717401841977703,
"grad_norm": 10.465903424616897,
"learning_rate": 8.118393851284005e-06,
"loss": 1.7707479000091553,
"step": 2211
},
{
"epoch": 1.0722249151720795,
"grad_norm": 9.412907345719448,
"learning_rate": 8.116188936634428e-06,
"loss": 1.6150329113006592,
"step": 2212
},
{
"epoch": 1.0727096461463888,
"grad_norm": 17.10273459437597,
"learning_rate": 8.113983030666547e-06,
"loss": 1.9956977367401123,
"step": 2213
},
{
"epoch": 1.073194377120698,
"grad_norm": 14.672648313656914,
"learning_rate": 8.111776134082105e-06,
"loss": 1.4133307933807373,
"step": 2214
},
{
"epoch": 1.0736791080950072,
"grad_norm": 17.121057283597242,
"learning_rate": 8.109568247583155e-06,
"loss": 1.5394426584243774,
"step": 2215
},
{
"epoch": 1.0741638390693165,
"grad_norm": 6.844610957677005,
"learning_rate": 8.107359371872068e-06,
"loss": 1.3910284042358398,
"step": 2216
},
{
"epoch": 1.0746485700436257,
"grad_norm": 15.81192210488849,
"learning_rate": 8.105149507651527e-06,
"loss": 1.993469476699829,
"step": 2217
},
{
"epoch": 1.0751333010179351,
"grad_norm": 8.482624333557066,
"learning_rate": 8.102938655624532e-06,
"loss": 1.243818759918213,
"step": 2218
},
{
"epoch": 1.0756180319922444,
"grad_norm": 18.61453921184875,
"learning_rate": 8.1007268164944e-06,
"loss": 3.473478317260742,
"step": 2219
},
{
"epoch": 1.0761027629665536,
"grad_norm": 13.824845412926809,
"learning_rate": 8.098513990964754e-06,
"loss": 1.432735562324524,
"step": 2220
},
{
"epoch": 1.0765874939408628,
"grad_norm": 11.028075953749916,
"learning_rate": 8.096300179739537e-06,
"loss": 2.1962697505950928,
"step": 2221
},
{
"epoch": 1.077072224915172,
"grad_norm": 13.567097930184454,
"learning_rate": 8.094085383523005e-06,
"loss": 1.687326431274414,
"step": 2222
},
{
"epoch": 1.0775569558894813,
"grad_norm": 11.813404676364398,
"learning_rate": 8.091869603019724e-06,
"loss": 2.831268072128296,
"step": 2223
},
{
"epoch": 1.0780416868637905,
"grad_norm": 14.29511029205126,
"learning_rate": 8.089652838934579e-06,
"loss": 1.824756383895874,
"step": 2224
},
{
"epoch": 1.0785264178380998,
"grad_norm": 10.859766669971417,
"learning_rate": 8.087435091972762e-06,
"loss": 1.2902121543884277,
"step": 2225
},
{
"epoch": 1.079011148812409,
"grad_norm": 7.668186202799075,
"learning_rate": 8.08521636283978e-06,
"loss": 1.366214394569397,
"step": 2226
},
{
"epoch": 1.0794958797867185,
"grad_norm": 9.524037995640466,
"learning_rate": 8.082996652241452e-06,
"loss": 2.655726671218872,
"step": 2227
},
{
"epoch": 1.0799806107610277,
"grad_norm": 10.190663858317464,
"learning_rate": 8.080775960883913e-06,
"loss": 1.656011700630188,
"step": 2228
},
{
"epoch": 1.080465341735337,
"grad_norm": 9.000024989804997,
"learning_rate": 8.078554289473603e-06,
"loss": 1.6677184104919434,
"step": 2229
},
{
"epoch": 1.0809500727096462,
"grad_norm": 10.56988697950755,
"learning_rate": 8.076331638717278e-06,
"loss": 1.8868074417114258,
"step": 2230
},
{
"epoch": 1.0814348036839554,
"grad_norm": 19.688241561224018,
"learning_rate": 8.074108009322006e-06,
"loss": 1.5612624883651733,
"step": 2231
},
{
"epoch": 1.0819195346582646,
"grad_norm": 8.357818534800769,
"learning_rate": 8.071883401995167e-06,
"loss": 1.1138391494750977,
"step": 2232
},
{
"epoch": 1.0824042656325739,
"grad_norm": 9.799533369968577,
"learning_rate": 8.069657817444446e-06,
"loss": 1.4916517734527588,
"step": 2233
},
{
"epoch": 1.082888996606883,
"grad_norm": 13.799960851017165,
"learning_rate": 8.067431256377847e-06,
"loss": 1.3813457489013672,
"step": 2234
},
{
"epoch": 1.0833737275811925,
"grad_norm": 14.608521412922256,
"learning_rate": 8.065203719503679e-06,
"loss": 1.9007681608200073,
"step": 2235
},
{
"epoch": 1.0838584585555018,
"grad_norm": 7.917038148690495,
"learning_rate": 8.062975207530564e-06,
"loss": 2.1677284240722656,
"step": 2236
},
{
"epoch": 1.084343189529811,
"grad_norm": 8.607727360662354,
"learning_rate": 8.060745721167435e-06,
"loss": 1.5999740362167358,
"step": 2237
},
{
"epoch": 1.0848279205041202,
"grad_norm": 11.165635512668123,
"learning_rate": 8.058515261123529e-06,
"loss": 1.512948989868164,
"step": 2238
},
{
"epoch": 1.0853126514784295,
"grad_norm": 10.686512263116684,
"learning_rate": 8.056283828108405e-06,
"loss": 1.2961838245391846,
"step": 2239
},
{
"epoch": 1.0857973824527387,
"grad_norm": 10.62013855872294,
"learning_rate": 8.054051422831918e-06,
"loss": 1.5543544292449951,
"step": 2240
},
{
"epoch": 1.086282113427048,
"grad_norm": 13.758945245862455,
"learning_rate": 8.051818046004235e-06,
"loss": 1.6795318126678467,
"step": 2241
},
{
"epoch": 1.0867668444013572,
"grad_norm": 8.859032026962783,
"learning_rate": 8.049583698335843e-06,
"loss": 1.57673978805542,
"step": 2242
},
{
"epoch": 1.0872515753756664,
"grad_norm": 23.352624719395482,
"learning_rate": 8.047348380537527e-06,
"loss": 1.9545248746871948,
"step": 2243
},
{
"epoch": 1.0877363063499756,
"grad_norm": 8.153632940626146,
"learning_rate": 8.045112093320383e-06,
"loss": 2.025895833969116,
"step": 2244
},
{
"epoch": 1.088221037324285,
"grad_norm": 9.768399437785966,
"learning_rate": 8.042874837395816e-06,
"loss": 0.9490424394607544,
"step": 2245
},
{
"epoch": 1.0887057682985943,
"grad_norm": 8.710528649937364,
"learning_rate": 8.040636613475539e-06,
"loss": 1.6868281364440918,
"step": 2246
},
{
"epoch": 1.0891904992729036,
"grad_norm": 9.42834054600166,
"learning_rate": 8.038397422271575e-06,
"loss": 1.5042721033096313,
"step": 2247
},
{
"epoch": 1.0896752302472128,
"grad_norm": 13.339433966758346,
"learning_rate": 8.03615726449625e-06,
"loss": 1.5242350101470947,
"step": 2248
},
{
"epoch": 1.090159961221522,
"grad_norm": 9.992026708044795,
"learning_rate": 8.0339161408622e-06,
"loss": 1.203425407409668,
"step": 2249
},
{
"epoch": 1.0906446921958313,
"grad_norm": 19.36251412720828,
"learning_rate": 8.031674052082372e-06,
"loss": 1.711252212524414,
"step": 2250
},
{
"epoch": 1.0911294231701405,
"grad_norm": 14.964539706489894,
"learning_rate": 8.029430998870015e-06,
"loss": 0.8105027675628662,
"step": 2251
},
{
"epoch": 1.0916141541444497,
"grad_norm": 13.583489460415747,
"learning_rate": 8.027186981938685e-06,
"loss": 1.9334917068481445,
"step": 2252
},
{
"epoch": 1.0920988851187592,
"grad_norm": 9.815261436889301,
"learning_rate": 8.02494200200225e-06,
"loss": 1.8331525325775146,
"step": 2253
},
{
"epoch": 1.0925836160930684,
"grad_norm": 14.092333592597383,
"learning_rate": 8.022696059774877e-06,
"loss": 1.5910823345184326,
"step": 2254
},
{
"epoch": 1.0930683470673777,
"grad_norm": 12.761087143919084,
"learning_rate": 8.02044915597104e-06,
"loss": 1.7695002555847168,
"step": 2255
},
{
"epoch": 1.0935530780416869,
"grad_norm": 12.26455686680506,
"learning_rate": 8.018201291305527e-06,
"loss": 1.7094886302947998,
"step": 2256
},
{
"epoch": 1.0940378090159961,
"grad_norm": 13.198295417899809,
"learning_rate": 8.015952466493422e-06,
"loss": 1.607521414756775,
"step": 2257
},
{
"epoch": 1.0945225399903054,
"grad_norm": 13.01042771495031,
"learning_rate": 8.013702682250122e-06,
"loss": 1.7898848056793213,
"step": 2258
},
{
"epoch": 1.0950072709646146,
"grad_norm": 21.185081834097915,
"learning_rate": 8.011451939291322e-06,
"loss": 2.04896879196167,
"step": 2259
},
{
"epoch": 1.0954920019389238,
"grad_norm": 8.497225559495202,
"learning_rate": 8.009200238333028e-06,
"loss": 1.5077424049377441,
"step": 2260
},
{
"epoch": 1.095976732913233,
"grad_norm": 15.616083873774432,
"learning_rate": 8.006947580091547e-06,
"loss": 2.7665951251983643,
"step": 2261
},
{
"epoch": 1.0964614638875425,
"grad_norm": 10.750716748147568,
"learning_rate": 8.004693965283492e-06,
"loss": 1.4017243385314941,
"step": 2262
},
{
"epoch": 1.0969461948618517,
"grad_norm": 11.41566748528167,
"learning_rate": 8.00243939462578e-06,
"loss": 1.9364910125732422,
"step": 2263
},
{
"epoch": 1.097430925836161,
"grad_norm": 9.781266365574742,
"learning_rate": 8.000183868835637e-06,
"loss": 1.2326563596725464,
"step": 2264
},
{
"epoch": 1.0979156568104702,
"grad_norm": 8.935716224005631,
"learning_rate": 7.99792738863058e-06,
"loss": 1.7152838706970215,
"step": 2265
},
{
"epoch": 1.0984003877847794,
"grad_norm": 10.884366442408696,
"learning_rate": 7.995669954728444e-06,
"loss": 1.403039813041687,
"step": 2266
},
{
"epoch": 1.0988851187590887,
"grad_norm": 13.482252522322218,
"learning_rate": 7.99341156784736e-06,
"loss": 1.8942010402679443,
"step": 2267
},
{
"epoch": 1.099369849733398,
"grad_norm": 7.928110644050593,
"learning_rate": 7.991152228705763e-06,
"loss": 1.3022117614746094,
"step": 2268
},
{
"epoch": 1.0998545807077071,
"grad_norm": 10.913876429843599,
"learning_rate": 7.98889193802239e-06,
"loss": 1.3404736518859863,
"step": 2269
},
{
"epoch": 1.1003393116820166,
"grad_norm": 15.837209856536052,
"learning_rate": 7.986630696516281e-06,
"loss": 1.8412797451019287,
"step": 2270
},
{
"epoch": 1.1008240426563258,
"grad_norm": 10.366489625655289,
"learning_rate": 7.984368504906784e-06,
"loss": 1.554381251335144,
"step": 2271
},
{
"epoch": 1.101308773630635,
"grad_norm": 11.63953715274444,
"learning_rate": 7.982105363913541e-06,
"loss": 1.1341830492019653,
"step": 2272
},
{
"epoch": 1.1017935046049443,
"grad_norm": 14.51361920512031,
"learning_rate": 7.979841274256502e-06,
"loss": 2.2317349910736084,
"step": 2273
},
{
"epoch": 1.1022782355792535,
"grad_norm": 11.123322909864077,
"learning_rate": 7.977576236655912e-06,
"loss": 1.7540678977966309,
"step": 2274
},
{
"epoch": 1.1027629665535628,
"grad_norm": 8.845878155918056,
"learning_rate": 7.975310251832328e-06,
"loss": 1.2129943370819092,
"step": 2275
},
{
"epoch": 1.103247697527872,
"grad_norm": 8.78989564266576,
"learning_rate": 7.973043320506599e-06,
"loss": 1.5544174909591675,
"step": 2276
},
{
"epoch": 1.1037324285021812,
"grad_norm": 7.25043969230313,
"learning_rate": 7.970775443399879e-06,
"loss": 1.339923620223999,
"step": 2277
},
{
"epoch": 1.1042171594764905,
"grad_norm": 13.05223969347117,
"learning_rate": 7.968506621233623e-06,
"loss": 1.6046265363693237,
"step": 2278
},
{
"epoch": 1.1047018904507997,
"grad_norm": 11.344799348089415,
"learning_rate": 7.966236854729585e-06,
"loss": 1.8519675731658936,
"step": 2279
},
{
"epoch": 1.1051866214251092,
"grad_norm": 13.771568422405137,
"learning_rate": 7.963966144609821e-06,
"loss": 1.761566162109375,
"step": 2280
},
{
"epoch": 1.1056713523994184,
"grad_norm": 9.796205115333215,
"learning_rate": 7.961694491596688e-06,
"loss": 1.2581082582473755,
"step": 2281
},
{
"epoch": 1.1061560833737276,
"grad_norm": 16.37458648628779,
"learning_rate": 7.959421896412838e-06,
"loss": 1.61737060546875,
"step": 2282
},
{
"epoch": 1.1066408143480368,
"grad_norm": 12.524846078962279,
"learning_rate": 7.957148359781229e-06,
"loss": 1.588525414466858,
"step": 2283
},
{
"epoch": 1.107125545322346,
"grad_norm": 12.930781529094133,
"learning_rate": 7.954873882425116e-06,
"loss": 1.4330575466156006,
"step": 2284
},
{
"epoch": 1.1076102762966553,
"grad_norm": 12.236753271225398,
"learning_rate": 7.95259846506805e-06,
"loss": 1.478423833847046,
"step": 2285
},
{
"epoch": 1.1080950072709645,
"grad_norm": 11.165387317692112,
"learning_rate": 7.950322108433888e-06,
"loss": 1.5939775705337524,
"step": 2286
},
{
"epoch": 1.1085797382452738,
"grad_norm": 9.701156497774331,
"learning_rate": 7.94804481324678e-06,
"loss": 1.3513188362121582,
"step": 2287
},
{
"epoch": 1.1090644692195832,
"grad_norm": 12.903256192667962,
"learning_rate": 7.945766580231175e-06,
"loss": 0.9453591704368591,
"step": 2288
},
{
"epoch": 1.1095492001938925,
"grad_norm": 15.178659640351785,
"learning_rate": 7.943487410111824e-06,
"loss": 1.646367073059082,
"step": 2289
},
{
"epoch": 1.1100339311682017,
"grad_norm": 10.99397470015724,
"learning_rate": 7.941207303613773e-06,
"loss": 1.2749927043914795,
"step": 2290
},
{
"epoch": 1.110518662142511,
"grad_norm": 14.919889681326417,
"learning_rate": 7.938926261462366e-06,
"loss": 1.7882070541381836,
"step": 2291
},
{
"epoch": 1.1110033931168202,
"grad_norm": 12.857661139481705,
"learning_rate": 7.936644284383248e-06,
"loss": 1.0446141958236694,
"step": 2292
},
{
"epoch": 1.1114881240911294,
"grad_norm": 11.016009763597204,
"learning_rate": 7.934361373102355e-06,
"loss": 1.1500638723373413,
"step": 2293
},
{
"epoch": 1.1119728550654386,
"grad_norm": 12.81174856513589,
"learning_rate": 7.932077528345929e-06,
"loss": 1.8753407001495361,
"step": 2294
},
{
"epoch": 1.1124575860397479,
"grad_norm": 14.916173817050181,
"learning_rate": 7.929792750840499e-06,
"loss": 1.4362127780914307,
"step": 2295
},
{
"epoch": 1.112942317014057,
"grad_norm": 10.73346964993034,
"learning_rate": 7.927507041312898e-06,
"loss": 1.7706456184387207,
"step": 2296
},
{
"epoch": 1.1134270479883666,
"grad_norm": 8.371124596097722,
"learning_rate": 7.925220400490252e-06,
"loss": 1.8246495723724365,
"step": 2297
},
{
"epoch": 1.1139117789626758,
"grad_norm": 10.747541512035887,
"learning_rate": 7.922932829099987e-06,
"loss": 1.2524843215942383,
"step": 2298
},
{
"epoch": 1.114396509936985,
"grad_norm": 17.840100617445984,
"learning_rate": 7.92064432786982e-06,
"loss": 0.8935575485229492,
"step": 2299
},
{
"epoch": 1.1148812409112943,
"grad_norm": 15.143723267069882,
"learning_rate": 7.918354897527767e-06,
"loss": 1.8751877546310425,
"step": 2300
},
{
"epoch": 1.1153659718856035,
"grad_norm": 12.418692357391159,
"learning_rate": 7.916064538802139e-06,
"loss": 2.089934825897217,
"step": 2301
},
{
"epoch": 1.1158507028599127,
"grad_norm": 12.290368674961645,
"learning_rate": 7.913773252421542e-06,
"loss": 1.1343867778778076,
"step": 2302
},
{
"epoch": 1.116335433834222,
"grad_norm": 7.633210327094751,
"learning_rate": 7.911481039114879e-06,
"loss": 1.3821626901626587,
"step": 2303
},
{
"epoch": 1.1168201648085312,
"grad_norm": 10.251804803387664,
"learning_rate": 7.909187899611342e-06,
"loss": 1.94291353225708,
"step": 2304
},
{
"epoch": 1.1173048957828404,
"grad_norm": 13.48164906370093,
"learning_rate": 7.906893834640428e-06,
"loss": 1.2537460327148438,
"step": 2305
},
{
"epoch": 1.1177896267571499,
"grad_norm": 13.457438121495109,
"learning_rate": 7.904598844931916e-06,
"loss": 2.745664596557617,
"step": 2306
},
{
"epoch": 1.1182743577314591,
"grad_norm": 9.331235402386966,
"learning_rate": 7.902302931215889e-06,
"loss": 1.8685383796691895,
"step": 2307
},
{
"epoch": 1.1187590887057683,
"grad_norm": 20.779073845000692,
"learning_rate": 7.900006094222719e-06,
"loss": 1.6461851596832275,
"step": 2308
},
{
"epoch": 1.1192438196800776,
"grad_norm": 15.688014132977763,
"learning_rate": 7.897708334683073e-06,
"loss": 1.9920883178710938,
"step": 2309
},
{
"epoch": 1.1197285506543868,
"grad_norm": 11.714080708840758,
"learning_rate": 7.89540965332791e-06,
"loss": 1.0976452827453613,
"step": 2310
},
{
"epoch": 1.120213281628696,
"grad_norm": 16.907566589099122,
"learning_rate": 7.893110050888489e-06,
"loss": 0.9889817833900452,
"step": 2311
},
{
"epoch": 1.1206980126030053,
"grad_norm": 12.495080701403042,
"learning_rate": 7.890809528096352e-06,
"loss": 1.9929473400115967,
"step": 2312
},
{
"epoch": 1.1211827435773145,
"grad_norm": 13.652611249822343,
"learning_rate": 7.88850808568334e-06,
"loss": 1.5651073455810547,
"step": 2313
},
{
"epoch": 1.1216674745516237,
"grad_norm": 9.767923762368747,
"learning_rate": 7.886205724381585e-06,
"loss": 1.934337854385376,
"step": 2314
},
{
"epoch": 1.1221522055259332,
"grad_norm": 11.356677419465493,
"learning_rate": 7.883902444923513e-06,
"loss": 1.0394479036331177,
"step": 2315
},
{
"epoch": 1.1226369365002424,
"grad_norm": 14.065486515431138,
"learning_rate": 7.881598248041835e-06,
"loss": 1.2906277179718018,
"step": 2316
},
{
"epoch": 1.1231216674745517,
"grad_norm": 10.380152795820333,
"learning_rate": 7.879293134469567e-06,
"loss": 1.3878847360610962,
"step": 2317
},
{
"epoch": 1.123606398448861,
"grad_norm": 12.408106017669024,
"learning_rate": 7.876987104940005e-06,
"loss": 2.4924263954162598,
"step": 2318
},
{
"epoch": 1.1240911294231701,
"grad_norm": 9.453167774760415,
"learning_rate": 7.87468016018674e-06,
"loss": 1.7807565927505493,
"step": 2319
},
{
"epoch": 1.1245758603974794,
"grad_norm": 13.08547770622076,
"learning_rate": 7.872372300943657e-06,
"loss": 1.4958064556121826,
"step": 2320
},
{
"epoch": 1.1250605913717886,
"grad_norm": 8.268995040945246,
"learning_rate": 7.870063527944924e-06,
"loss": 1.2813563346862793,
"step": 2321
},
{
"epoch": 1.1255453223460978,
"grad_norm": 12.45210592794731,
"learning_rate": 7.867753841925013e-06,
"loss": 1.756922960281372,
"step": 2322
},
{
"epoch": 1.1260300533204073,
"grad_norm": 10.501272644616234,
"learning_rate": 7.865443243618672e-06,
"loss": 0.9772135019302368,
"step": 2323
},
{
"epoch": 1.1265147842947165,
"grad_norm": 14.903777009679764,
"learning_rate": 7.86313173376095e-06,
"loss": 1.5646026134490967,
"step": 2324
},
{
"epoch": 1.1269995152690258,
"grad_norm": 12.799623796861587,
"learning_rate": 7.860819313087177e-06,
"loss": 1.8566142320632935,
"step": 2325
},
{
"epoch": 1.127484246243335,
"grad_norm": 11.402285085809533,
"learning_rate": 7.858505982332982e-06,
"loss": 1.5203584432601929,
"step": 2326
},
{
"epoch": 1.1279689772176442,
"grad_norm": 12.209567758466498,
"learning_rate": 7.856191742234277e-06,
"loss": 3.23656964302063,
"step": 2327
},
{
"epoch": 1.1284537081919535,
"grad_norm": 13.926416229098978,
"learning_rate": 7.853876593527266e-06,
"loss": 1.4664387702941895,
"step": 2328
},
{
"epoch": 1.1289384391662627,
"grad_norm": 7.761073772791252,
"learning_rate": 7.851560536948441e-06,
"loss": 1.8207951784133911,
"step": 2329
},
{
"epoch": 1.129423170140572,
"grad_norm": 19.400646181302175,
"learning_rate": 7.849243573234582e-06,
"loss": 2.7226216793060303,
"step": 2330
},
{
"epoch": 1.1299079011148812,
"grad_norm": 10.134791238660766,
"learning_rate": 7.846925703122762e-06,
"loss": 1.5246868133544922,
"step": 2331
},
{
"epoch": 1.1303926320891904,
"grad_norm": 14.744910738110134,
"learning_rate": 7.844606927350335e-06,
"loss": 1.5284154415130615,
"step": 2332
},
{
"epoch": 1.1308773630634998,
"grad_norm": 7.150417531598178,
"learning_rate": 7.842287246654951e-06,
"loss": 1.0502232313156128,
"step": 2333
},
{
"epoch": 1.131362094037809,
"grad_norm": 21.338912423326516,
"learning_rate": 7.839966661774541e-06,
"loss": 1.104759931564331,
"step": 2334
},
{
"epoch": 1.1318468250121183,
"grad_norm": 14.374238000373861,
"learning_rate": 7.837645173447329e-06,
"loss": 1.0342198610305786,
"step": 2335
},
{
"epoch": 1.1323315559864275,
"grad_norm": 8.607053374198193,
"learning_rate": 7.835322782411822e-06,
"loss": 1.3468244075775146,
"step": 2336
},
{
"epoch": 1.1328162869607368,
"grad_norm": 8.039606708791899,
"learning_rate": 7.832999489406818e-06,
"loss": 1.928612232208252,
"step": 2337
},
{
"epoch": 1.133301017935046,
"grad_norm": 11.856362571852214,
"learning_rate": 7.8306752951714e-06,
"loss": 1.851364016532898,
"step": 2338
},
{
"epoch": 1.1337857489093552,
"grad_norm": 14.151894402566644,
"learning_rate": 7.828350200444935e-06,
"loss": 1.6308441162109375,
"step": 2339
},
{
"epoch": 1.1342704798836645,
"grad_norm": 12.568578794052577,
"learning_rate": 7.826024205967084e-06,
"loss": 1.2580432891845703,
"step": 2340
},
{
"epoch": 1.134755210857974,
"grad_norm": 13.120514398749917,
"learning_rate": 7.823697312477786e-06,
"loss": 1.2573580741882324,
"step": 2341
},
{
"epoch": 1.1352399418322832,
"grad_norm": 10.500153118157032,
"learning_rate": 7.821369520717273e-06,
"loss": 1.7347338199615479,
"step": 2342
},
{
"epoch": 1.1357246728065924,
"grad_norm": 11.773547993454432,
"learning_rate": 7.819040831426055e-06,
"loss": 1.5472034215927124,
"step": 2343
},
{
"epoch": 1.1362094037809016,
"grad_norm": 6.8231479912357935,
"learning_rate": 7.816711245344934e-06,
"loss": 1.9018725156784058,
"step": 2344
},
{
"epoch": 1.1366941347552109,
"grad_norm": 9.934424532179184,
"learning_rate": 7.814380763214996e-06,
"loss": 1.2785378694534302,
"step": 2345
},
{
"epoch": 1.13717886572952,
"grad_norm": 9.064028428021512,
"learning_rate": 7.81204938577761e-06,
"loss": 2.4101107120513916,
"step": 2346
},
{
"epoch": 1.1376635967038293,
"grad_norm": 10.061593946865745,
"learning_rate": 7.809717113774432e-06,
"loss": 1.1891649961471558,
"step": 2347
},
{
"epoch": 1.1381483276781386,
"grad_norm": 8.873014489461584,
"learning_rate": 7.8073839479474e-06,
"loss": 1.5466057062149048,
"step": 2348
},
{
"epoch": 1.1386330586524478,
"grad_norm": 11.086806305677124,
"learning_rate": 7.80504988903874e-06,
"loss": 2.046384572982788,
"step": 2349
},
{
"epoch": 1.139117789626757,
"grad_norm": 17.39745602526834,
"learning_rate": 7.80271493779096e-06,
"loss": 1.4060511589050293,
"step": 2350
},
{
"epoch": 1.1396025206010665,
"grad_norm": 18.903216750784065,
"learning_rate": 7.800379094946849e-06,
"loss": 1.2001181840896606,
"step": 2351
},
{
"epoch": 1.1400872515753757,
"grad_norm": 10.247819841337126,
"learning_rate": 7.798042361249485e-06,
"loss": 1.619157314300537,
"step": 2352
},
{
"epoch": 1.140571982549685,
"grad_norm": 10.987810260834934,
"learning_rate": 7.795704737442227e-06,
"loss": 1.2238428592681885,
"step": 2353
},
{
"epoch": 1.1410567135239942,
"grad_norm": 10.66040941459362,
"learning_rate": 7.793366224268715e-06,
"loss": 1.718814730644226,
"step": 2354
},
{
"epoch": 1.1415414444983034,
"grad_norm": 12.280691633932772,
"learning_rate": 7.791026822472876e-06,
"loss": 1.7955684661865234,
"step": 2355
},
{
"epoch": 1.1420261754726126,
"grad_norm": 12.971900917270448,
"learning_rate": 7.788686532798917e-06,
"loss": 1.1724660396575928,
"step": 2356
},
{
"epoch": 1.1425109064469219,
"grad_norm": 9.397541810738813,
"learning_rate": 7.786345355991329e-06,
"loss": 1.4337654113769531,
"step": 2357
},
{
"epoch": 1.1429956374212313,
"grad_norm": 8.636990752922097,
"learning_rate": 7.784003292794882e-06,
"loss": 1.3664586544036865,
"step": 2358
},
{
"epoch": 1.1434803683955406,
"grad_norm": 10.53368972151585,
"learning_rate": 7.781660343954636e-06,
"loss": 1.852053165435791,
"step": 2359
},
{
"epoch": 1.1439650993698498,
"grad_norm": 7.102986429811425,
"learning_rate": 7.779316510215919e-06,
"loss": 1.703042984008789,
"step": 2360
},
{
"epoch": 1.144449830344159,
"grad_norm": 8.728022759128828,
"learning_rate": 7.776971792324356e-06,
"loss": 2.5288145542144775,
"step": 2361
},
{
"epoch": 1.1449345613184683,
"grad_norm": 13.198930432401115,
"learning_rate": 7.774626191025838e-06,
"loss": 1.2905550003051758,
"step": 2362
},
{
"epoch": 1.1454192922927775,
"grad_norm": 14.028721785737083,
"learning_rate": 7.772279707066553e-06,
"loss": 1.5276139974594116,
"step": 2363
},
{
"epoch": 1.1459040232670867,
"grad_norm": 11.832113086625478,
"learning_rate": 7.769932341192958e-06,
"loss": 3.241154670715332,
"step": 2364
},
{
"epoch": 1.146388754241396,
"grad_norm": 10.584547815697338,
"learning_rate": 7.767584094151793e-06,
"loss": 1.2448686361312866,
"step": 2365
},
{
"epoch": 1.1468734852157052,
"grad_norm": 10.276003357197833,
"learning_rate": 7.765234966690081e-06,
"loss": 1.7939293384552002,
"step": 2366
},
{
"epoch": 1.1473582161900144,
"grad_norm": 15.726531833319585,
"learning_rate": 7.762884959555124e-06,
"loss": 1.1835649013519287,
"step": 2367
},
{
"epoch": 1.147842947164324,
"grad_norm": 18.16106844162785,
"learning_rate": 7.760534073494504e-06,
"loss": 0.966528058052063,
"step": 2368
},
{
"epoch": 1.1483276781386331,
"grad_norm": 11.602878719768874,
"learning_rate": 7.75818230925608e-06,
"loss": 1.9541475772857666,
"step": 2369
},
{
"epoch": 1.1488124091129424,
"grad_norm": 14.712441820049795,
"learning_rate": 7.755829667587993e-06,
"loss": 2.0851473808288574,
"step": 2370
},
{
"epoch": 1.1492971400872516,
"grad_norm": 8.36130991873389,
"learning_rate": 7.753476149238663e-06,
"loss": 1.7325831651687622,
"step": 2371
},
{
"epoch": 1.1497818710615608,
"grad_norm": 10.542698722053258,
"learning_rate": 7.75112175495679e-06,
"loss": 1.2469505071640015,
"step": 2372
},
{
"epoch": 1.15026660203587,
"grad_norm": 8.680408730148955,
"learning_rate": 7.748766485491348e-06,
"loss": 2.0029642581939697,
"step": 2373
},
{
"epoch": 1.1507513330101793,
"grad_norm": 10.340975716128064,
"learning_rate": 7.746410341591594e-06,
"loss": 1.081038475036621,
"step": 2374
},
{
"epoch": 1.1512360639844885,
"grad_norm": 18.564403825314393,
"learning_rate": 7.744053324007064e-06,
"loss": 1.6806474924087524,
"step": 2375
},
{
"epoch": 1.151720794958798,
"grad_norm": 30.058083378113682,
"learning_rate": 7.741695433487566e-06,
"loss": 2.1892545223236084,
"step": 2376
},
{
"epoch": 1.1522055259331072,
"grad_norm": 9.614374354336736,
"learning_rate": 7.739336670783191e-06,
"loss": 1.1259362697601318,
"step": 2377
},
{
"epoch": 1.1526902569074164,
"grad_norm": 13.08763338023228,
"learning_rate": 7.736977036644305e-06,
"loss": 3.5154099464416504,
"step": 2378
},
{
"epoch": 1.1531749878817257,
"grad_norm": 16.17938602304979,
"learning_rate": 7.734616531821551e-06,
"loss": 0.7833991050720215,
"step": 2379
},
{
"epoch": 1.153659718856035,
"grad_norm": 9.50640634476707,
"learning_rate": 7.732255157065854e-06,
"loss": 1.3649941682815552,
"step": 2380
},
{
"epoch": 1.1541444498303441,
"grad_norm": 23.827231413693617,
"learning_rate": 7.729892913128407e-06,
"loss": 1.3118276596069336,
"step": 2381
},
{
"epoch": 1.1546291808046534,
"grad_norm": 18.529402424011447,
"learning_rate": 7.727529800760689e-06,
"loss": 1.7378580570220947,
"step": 2382
},
{
"epoch": 1.1551139117789626,
"grad_norm": 6.633384793052112,
"learning_rate": 7.725165820714447e-06,
"loss": 1.4335076808929443,
"step": 2383
},
{
"epoch": 1.1555986427532718,
"grad_norm": 10.881124077419617,
"learning_rate": 7.722800973741707e-06,
"loss": 1.4353153705596924,
"step": 2384
},
{
"epoch": 1.156083373727581,
"grad_norm": 14.729286812916982,
"learning_rate": 7.720435260594774e-06,
"loss": 2.058542013168335,
"step": 2385
},
{
"epoch": 1.1565681047018905,
"grad_norm": 9.449488470625678,
"learning_rate": 7.718068682026224e-06,
"loss": 2.173652172088623,
"step": 2386
},
{
"epoch": 1.1570528356761998,
"grad_norm": 10.812246627278808,
"learning_rate": 7.71570123878891e-06,
"loss": 1.6244595050811768,
"step": 2387
},
{
"epoch": 1.157537566650509,
"grad_norm": 10.171638331273723,
"learning_rate": 7.713332931635962e-06,
"loss": 1.7041317224502563,
"step": 2388
},
{
"epoch": 1.1580222976248182,
"grad_norm": 10.865726423960169,
"learning_rate": 7.710963761320782e-06,
"loss": 1.5206122398376465,
"step": 2389
},
{
"epoch": 1.1585070285991275,
"grad_norm": 8.722106726648592,
"learning_rate": 7.708593728597047e-06,
"loss": 1.6410032510757446,
"step": 2390
},
{
"epoch": 1.1589917595734367,
"grad_norm": 8.895167303585934,
"learning_rate": 7.70622283421871e-06,
"loss": 1.179684042930603,
"step": 2391
},
{
"epoch": 1.159476490547746,
"grad_norm": 14.557981892104381,
"learning_rate": 7.703851078939996e-06,
"loss": 1.8273512125015259,
"step": 2392
},
{
"epoch": 1.1599612215220552,
"grad_norm": 21.873101279202423,
"learning_rate": 7.701478463515405e-06,
"loss": 2.813673973083496,
"step": 2393
},
{
"epoch": 1.1604459524963646,
"grad_norm": 13.610916838351583,
"learning_rate": 7.699104988699712e-06,
"loss": 1.6435704231262207,
"step": 2394
},
{
"epoch": 1.1609306834706739,
"grad_norm": 9.740379607592654,
"learning_rate": 7.696730655247963e-06,
"loss": 1.73248291015625,
"step": 2395
},
{
"epoch": 1.161415414444983,
"grad_norm": 13.269133244263243,
"learning_rate": 7.694355463915479e-06,
"loss": 1.1489980220794678,
"step": 2396
},
{
"epoch": 1.1619001454192923,
"grad_norm": 9.312436035731576,
"learning_rate": 7.691979415457852e-06,
"loss": 1.0276745557785034,
"step": 2397
},
{
"epoch": 1.1623848763936016,
"grad_norm": 9.424881437638641,
"learning_rate": 7.689602510630948e-06,
"loss": 1.9429165124893188,
"step": 2398
},
{
"epoch": 1.1628696073679108,
"grad_norm": 9.391382123309775,
"learning_rate": 7.687224750190905e-06,
"loss": 1.3588855266571045,
"step": 2399
},
{
"epoch": 1.16335433834222,
"grad_norm": 13.11907697968951,
"learning_rate": 7.684846134894133e-06,
"loss": 2.2479846477508545,
"step": 2400
},
{
"epoch": 1.1638390693165293,
"grad_norm": 7.029902083238253,
"learning_rate": 7.682466665497315e-06,
"loss": 1.4529460668563843,
"step": 2401
},
{
"epoch": 1.1643238002908385,
"grad_norm": 16.34700753581038,
"learning_rate": 7.680086342757404e-06,
"loss": 1.2353930473327637,
"step": 2402
},
{
"epoch": 1.164808531265148,
"grad_norm": 19.444033116917485,
"learning_rate": 7.677705167431627e-06,
"loss": 1.2363176345825195,
"step": 2403
},
{
"epoch": 1.1652932622394572,
"grad_norm": 10.27018166265896,
"learning_rate": 7.67532314027748e-06,
"loss": 1.5347752571105957,
"step": 2404
},
{
"epoch": 1.1657779932137664,
"grad_norm": 8.168794715959123,
"learning_rate": 7.67294026205273e-06,
"loss": 2.133976936340332,
"step": 2405
},
{
"epoch": 1.1662627241880756,
"grad_norm": 10.996551731092897,
"learning_rate": 7.670556533515417e-06,
"loss": 1.624966025352478,
"step": 2406
},
{
"epoch": 1.1667474551623849,
"grad_norm": 12.793665070296205,
"learning_rate": 7.668171955423845e-06,
"loss": 1.9132251739501953,
"step": 2407
},
{
"epoch": 1.167232186136694,
"grad_norm": 15.3209503357764,
"learning_rate": 7.665786528536598e-06,
"loss": 1.6125633716583252,
"step": 2408
},
{
"epoch": 1.1677169171110033,
"grad_norm": 12.569579034762077,
"learning_rate": 7.663400253612527e-06,
"loss": 1.7772804498672485,
"step": 2409
},
{
"epoch": 1.1682016480853126,
"grad_norm": 11.062360134897858,
"learning_rate": 7.661013131410745e-06,
"loss": 1.2968356609344482,
"step": 2410
},
{
"epoch": 1.168686379059622,
"grad_norm": 8.62487902353753,
"learning_rate": 7.658625162690643e-06,
"loss": 1.7450478076934814,
"step": 2411
},
{
"epoch": 1.1691711100339313,
"grad_norm": 19.331377985457333,
"learning_rate": 7.65623634821188e-06,
"loss": 1.8735930919647217,
"step": 2412
},
{
"epoch": 1.1696558410082405,
"grad_norm": 11.317632326716398,
"learning_rate": 7.653846688734384e-06,
"loss": 1.6112592220306396,
"step": 2413
},
{
"epoch": 1.1701405719825497,
"grad_norm": 12.65965006738976,
"learning_rate": 7.651456185018344e-06,
"loss": 1.6046311855316162,
"step": 2414
},
{
"epoch": 1.170625302956859,
"grad_norm": 14.720858666170159,
"learning_rate": 7.649064837824231e-06,
"loss": 1.3739455938339233,
"step": 2415
},
{
"epoch": 1.1711100339311682,
"grad_norm": 12.614044800185741,
"learning_rate": 7.646672647912777e-06,
"loss": 1.1553995609283447,
"step": 2416
},
{
"epoch": 1.1715947649054774,
"grad_norm": 8.611797217306751,
"learning_rate": 7.644279616044979e-06,
"loss": 1.4981369972229004,
"step": 2417
},
{
"epoch": 1.1720794958797867,
"grad_norm": 6.532873505067751,
"learning_rate": 7.641885742982107e-06,
"loss": 1.5530508756637573,
"step": 2418
},
{
"epoch": 1.172564226854096,
"grad_norm": 18.734413279191042,
"learning_rate": 7.639491029485697e-06,
"loss": 1.71492600440979,
"step": 2419
},
{
"epoch": 1.1730489578284051,
"grad_norm": 17.967294542316637,
"learning_rate": 7.637095476317553e-06,
"loss": 2.154179096221924,
"step": 2420
},
{
"epoch": 1.1735336888027146,
"grad_norm": 11.207304081341906,
"learning_rate": 7.634699084239745e-06,
"loss": 2.025331974029541,
"step": 2421
},
{
"epoch": 1.1740184197770238,
"grad_norm": 10.50590720805447,
"learning_rate": 7.632301854014612e-06,
"loss": 1.3070893287658691,
"step": 2422
},
{
"epoch": 1.174503150751333,
"grad_norm": 7.435411869981049,
"learning_rate": 7.629903786404754e-06,
"loss": 0.768419623374939,
"step": 2423
},
{
"epoch": 1.1749878817256423,
"grad_norm": 7.938742653997332,
"learning_rate": 7.627504882173047e-06,
"loss": 1.903712272644043,
"step": 2424
},
{
"epoch": 1.1754726126999515,
"grad_norm": 15.198791934598951,
"learning_rate": 7.6251051420826224e-06,
"loss": 1.8782013654708862,
"step": 2425
},
{
"epoch": 1.1759573436742607,
"grad_norm": 11.832571541243743,
"learning_rate": 7.622704566896886e-06,
"loss": 1.6164613962173462,
"step": 2426
},
{
"epoch": 1.17644207464857,
"grad_norm": 7.536214320498983,
"learning_rate": 7.620303157379504e-06,
"loss": 1.7085106372833252,
"step": 2427
},
{
"epoch": 1.1769268056228792,
"grad_norm": 10.863442357450612,
"learning_rate": 7.61790091429441e-06,
"loss": 1.3599423170089722,
"step": 2428
},
{
"epoch": 1.1774115365971887,
"grad_norm": 13.135596023083618,
"learning_rate": 7.615497838405805e-06,
"loss": 0.8170541524887085,
"step": 2429
},
{
"epoch": 1.177896267571498,
"grad_norm": 11.448186838629843,
"learning_rate": 7.613093930478148e-06,
"loss": 1.3749488592147827,
"step": 2430
},
{
"epoch": 1.1783809985458071,
"grad_norm": 15.014869882586709,
"learning_rate": 7.610689191276173e-06,
"loss": 1.4620132446289062,
"step": 2431
},
{
"epoch": 1.1788657295201164,
"grad_norm": 10.043315870070776,
"learning_rate": 7.60828362156487e-06,
"loss": 1.7125437259674072,
"step": 2432
},
{
"epoch": 1.1793504604944256,
"grad_norm": 13.292047475395357,
"learning_rate": 7.605877222109494e-06,
"loss": 1.4756567478179932,
"step": 2433
},
{
"epoch": 1.1798351914687348,
"grad_norm": 16.59718998353907,
"learning_rate": 7.603469993675571e-06,
"loss": 1.8406963348388672,
"step": 2434
},
{
"epoch": 1.180319922443044,
"grad_norm": 8.603438757372984,
"learning_rate": 7.601061937028881e-06,
"loss": 1.184929370880127,
"step": 2435
},
{
"epoch": 1.1808046534173533,
"grad_norm": 24.08148639082423,
"learning_rate": 7.598653052935474e-06,
"loss": 2.0396251678466797,
"step": 2436
},
{
"epoch": 1.1812893843916625,
"grad_norm": 14.321608422144868,
"learning_rate": 7.596243342161659e-06,
"loss": 1.9904685020446777,
"step": 2437
},
{
"epoch": 1.1817741153659718,
"grad_norm": 10.137984361827435,
"learning_rate": 7.593832805474014e-06,
"loss": 1.8739778995513916,
"step": 2438
},
{
"epoch": 1.1822588463402812,
"grad_norm": 15.349341324791256,
"learning_rate": 7.591421443639374e-06,
"loss": 1.8908329010009766,
"step": 2439
},
{
"epoch": 1.1827435773145905,
"grad_norm": 8.347694209316233,
"learning_rate": 7.58900925742484e-06,
"loss": 1.0362977981567383,
"step": 2440
},
{
"epoch": 1.1832283082888997,
"grad_norm": 11.65287770547587,
"learning_rate": 7.586596247597771e-06,
"loss": 1.7537963390350342,
"step": 2441
},
{
"epoch": 1.183713039263209,
"grad_norm": 13.209734607777131,
"learning_rate": 7.584182414925793e-06,
"loss": 1.7000313997268677,
"step": 2442
},
{
"epoch": 1.1841977702375182,
"grad_norm": 9.864742890532172,
"learning_rate": 7.581767760176793e-06,
"loss": 2.3855783939361572,
"step": 2443
},
{
"epoch": 1.1846825012118274,
"grad_norm": 11.725945180426587,
"learning_rate": 7.579352284118912e-06,
"loss": 1.4516990184783936,
"step": 2444
},
{
"epoch": 1.1851672321861366,
"grad_norm": 8.73289256773283,
"learning_rate": 7.576935987520566e-06,
"loss": 1.6577363014221191,
"step": 2445
},
{
"epoch": 1.185651963160446,
"grad_norm": 11.033257740895484,
"learning_rate": 7.57451887115042e-06,
"loss": 1.4656476974487305,
"step": 2446
},
{
"epoch": 1.1861366941347553,
"grad_norm": 14.210903429748104,
"learning_rate": 7.5721009357774056e-06,
"loss": 1.7171008586883545,
"step": 2447
},
{
"epoch": 1.1866214251090645,
"grad_norm": 20.052689571669408,
"learning_rate": 7.569682182170712e-06,
"loss": 1.5200668573379517,
"step": 2448
},
{
"epoch": 1.1871061560833738,
"grad_norm": 9.30837794598242,
"learning_rate": 7.567262611099792e-06,
"loss": 2.1013290882110596,
"step": 2449
},
{
"epoch": 1.187590887057683,
"grad_norm": 12.898383165050394,
"learning_rate": 7.5648422233343564e-06,
"loss": 1.5563066005706787,
"step": 2450
},
{
"epoch": 1.1880756180319922,
"grad_norm": 14.416701781970595,
"learning_rate": 7.562421019644373e-06,
"loss": 1.5434622764587402,
"step": 2451
},
{
"epoch": 1.1885603490063015,
"grad_norm": 9.058662774608722,
"learning_rate": 7.559999000800077e-06,
"loss": 1.480661153793335,
"step": 2452
},
{
"epoch": 1.1890450799806107,
"grad_norm": 10.469929588441007,
"learning_rate": 7.557576167571954e-06,
"loss": 1.5909556150436401,
"step": 2453
},
{
"epoch": 1.18952981095492,
"grad_norm": 13.340157317127977,
"learning_rate": 7.555152520730757e-06,
"loss": 1.5679166316986084,
"step": 2454
},
{
"epoch": 1.1900145419292292,
"grad_norm": 22.225988594146983,
"learning_rate": 7.552728061047492e-06,
"loss": 1.914211392402649,
"step": 2455
},
{
"epoch": 1.1904992729035386,
"grad_norm": 11.912994104207112,
"learning_rate": 7.550302789293421e-06,
"loss": 1.428321123123169,
"step": 2456
},
{
"epoch": 1.1909840038778479,
"grad_norm": 9.110794102703343,
"learning_rate": 7.547876706240074e-06,
"loss": 1.2087781429290771,
"step": 2457
},
{
"epoch": 1.191468734852157,
"grad_norm": 15.618701450163766,
"learning_rate": 7.545449812659233e-06,
"loss": 1.3007173538208008,
"step": 2458
},
{
"epoch": 1.1919534658264663,
"grad_norm": 16.574751518878685,
"learning_rate": 7.543022109322935e-06,
"loss": 1.7998478412628174,
"step": 2459
},
{
"epoch": 1.1924381968007756,
"grad_norm": 11.438031150505136,
"learning_rate": 7.54059359700348e-06,
"loss": 1.5106911659240723,
"step": 2460
},
{
"epoch": 1.1929229277750848,
"grad_norm": 11.419770185790403,
"learning_rate": 7.5381642764734276e-06,
"loss": 1.2413208484649658,
"step": 2461
},
{
"epoch": 1.193407658749394,
"grad_norm": 9.014675598655167,
"learning_rate": 7.535734148505585e-06,
"loss": 1.5268748998641968,
"step": 2462
},
{
"epoch": 1.1938923897237033,
"grad_norm": 12.710496762089734,
"learning_rate": 7.533303213873021e-06,
"loss": 2.131546974182129,
"step": 2463
},
{
"epoch": 1.1943771206980127,
"grad_norm": 12.319627192070625,
"learning_rate": 7.530871473349065e-06,
"loss": 1.2422480583190918,
"step": 2464
},
{
"epoch": 1.194861851672322,
"grad_norm": 18.685774301431195,
"learning_rate": 7.528438927707298e-06,
"loss": 1.1376121044158936,
"step": 2465
},
{
"epoch": 1.1953465826466312,
"grad_norm": 31.702831741148106,
"learning_rate": 7.52600557772156e-06,
"loss": 1.9522264003753662,
"step": 2466
},
{
"epoch": 1.1958313136209404,
"grad_norm": 9.389644295620784,
"learning_rate": 7.523571424165942e-06,
"loss": 2.0375959873199463,
"step": 2467
},
{
"epoch": 1.1963160445952497,
"grad_norm": 12.042331171259082,
"learning_rate": 7.521136467814797e-06,
"loss": 1.3805906772613525,
"step": 2468
},
{
"epoch": 1.1968007755695589,
"grad_norm": 12.748386316586238,
"learning_rate": 7.51870070944273e-06,
"loss": 1.483691692352295,
"step": 2469
},
{
"epoch": 1.1972855065438681,
"grad_norm": 11.989577676675111,
"learning_rate": 7.5162641498246e-06,
"loss": 1.8858270645141602,
"step": 2470
},
{
"epoch": 1.1977702375181773,
"grad_norm": 13.52995974403838,
"learning_rate": 7.513826789735524e-06,
"loss": 1.6237690448760986,
"step": 2471
},
{
"epoch": 1.1982549684924866,
"grad_norm": 10.493542086923494,
"learning_rate": 7.511388629950871e-06,
"loss": 1.517688274383545,
"step": 2472
},
{
"epoch": 1.1987396994667958,
"grad_norm": 10.315983740543658,
"learning_rate": 7.5089496712462686e-06,
"loss": 0.8867512941360474,
"step": 2473
},
{
"epoch": 1.1992244304411053,
"grad_norm": 18.248553240702247,
"learning_rate": 7.50650991439759e-06,
"loss": 1.603925347328186,
"step": 2474
},
{
"epoch": 1.1997091614154145,
"grad_norm": 7.925049725441224,
"learning_rate": 7.504069360180971e-06,
"loss": 1.3937921524047852,
"step": 2475
},
{
"epoch": 1.2001938923897237,
"grad_norm": 10.363492086047703,
"learning_rate": 7.5016280093727976e-06,
"loss": 1.6232811212539673,
"step": 2476
},
{
"epoch": 1.200678623364033,
"grad_norm": 11.20954874668053,
"learning_rate": 7.49918586274971e-06,
"loss": 1.1700522899627686,
"step": 2477
},
{
"epoch": 1.2011633543383422,
"grad_norm": 23.201985651372567,
"learning_rate": 7.4967429210886e-06,
"loss": 1.6805195808410645,
"step": 2478
},
{
"epoch": 1.2016480853126514,
"grad_norm": 15.756972718522695,
"learning_rate": 7.494299185166612e-06,
"loss": 1.4827827215194702,
"step": 2479
},
{
"epoch": 1.2021328162869607,
"grad_norm": 12.838258218831367,
"learning_rate": 7.491854655761149e-06,
"loss": 1.1145033836364746,
"step": 2480
},
{
"epoch": 1.20261754726127,
"grad_norm": 7.9821013213410605,
"learning_rate": 7.489409333649854e-06,
"loss": 1.1222736835479736,
"step": 2481
},
{
"epoch": 1.2031022782355794,
"grad_norm": 11.187142849428016,
"learning_rate": 7.486963219610636e-06,
"loss": 1.72554349899292,
"step": 2482
},
{
"epoch": 1.2035870092098886,
"grad_norm": 9.218017024269866,
"learning_rate": 7.48451631442165e-06,
"loss": 0.7622585296630859,
"step": 2483
},
{
"epoch": 1.2040717401841978,
"grad_norm": 18.791718328312324,
"learning_rate": 7.482068618861301e-06,
"loss": 2.4998385906219482,
"step": 2484
},
{
"epoch": 1.204556471158507,
"grad_norm": 8.692718185112895,
"learning_rate": 7.479620133708246e-06,
"loss": 1.7312088012695312,
"step": 2485
},
{
"epoch": 1.2050412021328163,
"grad_norm": 17.09554382669891,
"learning_rate": 7.477170859741395e-06,
"loss": 2.637773275375366,
"step": 2486
},
{
"epoch": 1.2055259331071255,
"grad_norm": 7.901266104946936,
"learning_rate": 7.474720797739909e-06,
"loss": 2.0762829780578613,
"step": 2487
},
{
"epoch": 1.2060106640814348,
"grad_norm": 13.545213388024935,
"learning_rate": 7.472269948483197e-06,
"loss": 2.3098514080047607,
"step": 2488
},
{
"epoch": 1.206495395055744,
"grad_norm": 21.285877562995537,
"learning_rate": 7.469818312750923e-06,
"loss": 2.0178561210632324,
"step": 2489
},
{
"epoch": 1.2069801260300532,
"grad_norm": 17.297421665418202,
"learning_rate": 7.467365891322996e-06,
"loss": 1.6984999179840088,
"step": 2490
},
{
"epoch": 1.2074648570043625,
"grad_norm": 11.122601894672966,
"learning_rate": 7.464912684979577e-06,
"loss": 1.4981290102005005,
"step": 2491
},
{
"epoch": 1.207949587978672,
"grad_norm": 15.743459351222375,
"learning_rate": 7.462458694501079e-06,
"loss": 1.6273584365844727,
"step": 2492
},
{
"epoch": 1.2084343189529811,
"grad_norm": 12.934217911972874,
"learning_rate": 7.46000392066816e-06,
"loss": 1.1797376871109009,
"step": 2493
},
{
"epoch": 1.2089190499272904,
"grad_norm": 9.412264552040972,
"learning_rate": 7.457548364261733e-06,
"loss": 1.5815296173095703,
"step": 2494
},
{
"epoch": 1.2094037809015996,
"grad_norm": 26.788653973034343,
"learning_rate": 7.455092026062955e-06,
"loss": 1.0649645328521729,
"step": 2495
},
{
"epoch": 1.2098885118759088,
"grad_norm": 7.960072566675796,
"learning_rate": 7.452634906853233e-06,
"loss": 2.4729201793670654,
"step": 2496
},
{
"epoch": 1.210373242850218,
"grad_norm": 23.241155714942554,
"learning_rate": 7.450177007414224e-06,
"loss": 0.9797571301460266,
"step": 2497
},
{
"epoch": 1.2108579738245273,
"grad_norm": 7.599478209780183,
"learning_rate": 7.4477183285278306e-06,
"loss": 1.4153597354888916,
"step": 2498
},
{
"epoch": 1.2113427047988368,
"grad_norm": 8.157492683692489,
"learning_rate": 7.445258870976206e-06,
"loss": 1.676889419555664,
"step": 2499
},
{
"epoch": 1.211827435773146,
"grad_norm": 12.67942301712379,
"learning_rate": 7.442798635541749e-06,
"loss": 1.306362271308899,
"step": 2500
},
{
"epoch": 1.2123121667474552,
"grad_norm": 7.699399067550713,
"learning_rate": 7.440337623007108e-06,
"loss": 1.8947131633758545,
"step": 2501
},
{
"epoch": 1.2127968977217645,
"grad_norm": 10.2909115467665,
"learning_rate": 7.4378758341551745e-06,
"loss": 1.5575590133666992,
"step": 2502
},
{
"epoch": 1.2132816286960737,
"grad_norm": 13.159023998459917,
"learning_rate": 7.435413269769094e-06,
"loss": 1.568695306777954,
"step": 2503
},
{
"epoch": 1.213766359670383,
"grad_norm": 11.02602750452367,
"learning_rate": 7.432949930632252e-06,
"loss": 1.4135680198669434,
"step": 2504
},
{
"epoch": 1.2142510906446922,
"grad_norm": 8.274197154240527,
"learning_rate": 7.430485817528281e-06,
"loss": 1.7049115896224976,
"step": 2505
},
{
"epoch": 1.2147358216190014,
"grad_norm": 9.048587989562318,
"learning_rate": 7.428020931241067e-06,
"loss": 1.3227704763412476,
"step": 2506
},
{
"epoch": 1.2152205525933106,
"grad_norm": 14.962943091104783,
"learning_rate": 7.425555272554733e-06,
"loss": 1.2441350221633911,
"step": 2507
},
{
"epoch": 1.2157052835676199,
"grad_norm": 13.155313186987826,
"learning_rate": 7.423088842253652e-06,
"loss": 1.528452754020691,
"step": 2508
},
{
"epoch": 1.2161900145419293,
"grad_norm": 11.59734059679346,
"learning_rate": 7.420621641122441e-06,
"loss": 2.0148072242736816,
"step": 2509
},
{
"epoch": 1.2166747455162386,
"grad_norm": 13.400042483713602,
"learning_rate": 7.418153669945966e-06,
"loss": 1.501041293144226,
"step": 2510
},
{
"epoch": 1.2171594764905478,
"grad_norm": 12.720187897369057,
"learning_rate": 7.415684929509333e-06,
"loss": 2.2114574909210205,
"step": 2511
},
{
"epoch": 1.217644207464857,
"grad_norm": 12.202581548721723,
"learning_rate": 7.4132154205978935e-06,
"loss": 1.820117473602295,
"step": 2512
},
{
"epoch": 1.2181289384391663,
"grad_norm": 13.749129044518574,
"learning_rate": 7.410745143997247e-06,
"loss": 1.2999699115753174,
"step": 2513
},
{
"epoch": 1.2186136694134755,
"grad_norm": 9.919095499010478,
"learning_rate": 7.4082741004932336e-06,
"loss": 1.2936867475509644,
"step": 2514
},
{
"epoch": 1.2190984003877847,
"grad_norm": 15.97451824861015,
"learning_rate": 7.40580229087194e-06,
"loss": 1.915593147277832,
"step": 2515
},
{
"epoch": 1.219583131362094,
"grad_norm": 10.008548335227003,
"learning_rate": 7.403329715919693e-06,
"loss": 1.4015388488769531,
"step": 2516
},
{
"epoch": 1.2200678623364034,
"grad_norm": 14.871230657520904,
"learning_rate": 7.400856376423068e-06,
"loss": 1.3237380981445312,
"step": 2517
},
{
"epoch": 1.2205525933107126,
"grad_norm": 9.247909802589477,
"learning_rate": 7.39838227316888e-06,
"loss": 1.2725276947021484,
"step": 2518
},
{
"epoch": 1.2210373242850219,
"grad_norm": 9.49489070371401,
"learning_rate": 7.395907406944187e-06,
"loss": 1.776747703552246,
"step": 2519
},
{
"epoch": 1.221522055259331,
"grad_norm": 7.918232638127259,
"learning_rate": 7.3934317785362905e-06,
"loss": 1.6185253858566284,
"step": 2520
},
{
"epoch": 1.2220067862336403,
"grad_norm": 17.13096890443452,
"learning_rate": 7.390955388732735e-06,
"loss": 1.533545970916748,
"step": 2521
},
{
"epoch": 1.2224915172079496,
"grad_norm": 8.383889343655541,
"learning_rate": 7.388478238321306e-06,
"loss": 1.9933111667633057,
"step": 2522
},
{
"epoch": 1.2229762481822588,
"grad_norm": 12.159609938943678,
"learning_rate": 7.3860003280900316e-06,
"loss": 0.9855192303657532,
"step": 2523
},
{
"epoch": 1.223460979156568,
"grad_norm": 8.69537230752322,
"learning_rate": 7.383521658827182e-06,
"loss": 1.3984806537628174,
"step": 2524
},
{
"epoch": 1.2239457101308773,
"grad_norm": 18.363250364362663,
"learning_rate": 7.381042231321269e-06,
"loss": 3.8758931159973145,
"step": 2525
},
{
"epoch": 1.2244304411051865,
"grad_norm": 14.39356156677307,
"learning_rate": 7.378562046361045e-06,
"loss": 1.8409374952316284,
"step": 2526
},
{
"epoch": 1.224915172079496,
"grad_norm": 8.86963044270807,
"learning_rate": 7.376081104735504e-06,
"loss": 1.481100082397461,
"step": 2527
},
{
"epoch": 1.2253999030538052,
"grad_norm": 15.016100034528307,
"learning_rate": 7.373599407233878e-06,
"loss": 1.4816688299179077,
"step": 2528
},
{
"epoch": 1.2258846340281144,
"grad_norm": 7.668157524812295,
"learning_rate": 7.371116954645644e-06,
"loss": 1.4637811183929443,
"step": 2529
},
{
"epoch": 1.2263693650024237,
"grad_norm": 8.542950691847803,
"learning_rate": 7.368633747760515e-06,
"loss": 1.4365973472595215,
"step": 2530
},
{
"epoch": 1.226854095976733,
"grad_norm": 7.839375364508462,
"learning_rate": 7.3661497873684466e-06,
"loss": 1.5157506465911865,
"step": 2531
},
{
"epoch": 1.2273388269510421,
"grad_norm": 7.600069978874766,
"learning_rate": 7.363665074259634e-06,
"loss": 1.711186170578003,
"step": 2532
},
{
"epoch": 1.2278235579253514,
"grad_norm": 10.636948274516442,
"learning_rate": 7.361179609224511e-06,
"loss": 1.397657871246338,
"step": 2533
},
{
"epoch": 1.2283082888996606,
"grad_norm": 10.203633457161462,
"learning_rate": 7.35869339305375e-06,
"loss": 1.497189998626709,
"step": 2534
},
{
"epoch": 1.22879301987397,
"grad_norm": 13.722752403902206,
"learning_rate": 7.356206426538262e-06,
"loss": 1.390775203704834,
"step": 2535
},
{
"epoch": 1.2292777508482793,
"grad_norm": 8.953286552896582,
"learning_rate": 7.3537187104692e-06,
"loss": 1.7948120832443237,
"step": 2536
},
{
"epoch": 1.2297624818225885,
"grad_norm": 13.86435284758628,
"learning_rate": 7.351230245637955e-06,
"loss": 1.3055355548858643,
"step": 2537
},
{
"epoch": 1.2302472127968977,
"grad_norm": 8.790419210175166,
"learning_rate": 7.348741032836149e-06,
"loss": 1.4650830030441284,
"step": 2538
},
{
"epoch": 1.230731943771207,
"grad_norm": 12.63533168188042,
"learning_rate": 7.346251072855651e-06,
"loss": 1.301893711090088,
"step": 2539
},
{
"epoch": 1.2312166747455162,
"grad_norm": 17.736581089815015,
"learning_rate": 7.343760366488564e-06,
"loss": 1.3099243640899658,
"step": 2540
},
{
"epoch": 1.2317014057198254,
"grad_norm": 11.217246043842279,
"learning_rate": 7.341268914527229e-06,
"loss": 1.5335956811904907,
"step": 2541
},
{
"epoch": 1.2321861366941347,
"grad_norm": 10.531294969826238,
"learning_rate": 7.33877671776422e-06,
"loss": 1.451130747795105,
"step": 2542
},
{
"epoch": 1.232670867668444,
"grad_norm": 8.816020635512565,
"learning_rate": 7.336283776992357e-06,
"loss": 1.2571508884429932,
"step": 2543
},
{
"epoch": 1.2331555986427531,
"grad_norm": 7.9330261933289306,
"learning_rate": 7.333790093004688e-06,
"loss": 1.8488579988479614,
"step": 2544
},
{
"epoch": 1.2336403296170626,
"grad_norm": 9.84009320655712,
"learning_rate": 7.3312956665945e-06,
"loss": 1.3867506980895996,
"step": 2545
},
{
"epoch": 1.2341250605913718,
"grad_norm": 11.692818893014238,
"learning_rate": 7.328800498555321e-06,
"loss": 1.9450370073318481,
"step": 2546
},
{
"epoch": 1.234609791565681,
"grad_norm": 11.587574535006468,
"learning_rate": 7.326304589680906e-06,
"loss": 1.8632066249847412,
"step": 2547
},
{
"epoch": 1.2350945225399903,
"grad_norm": 7.782502661297397,
"learning_rate": 7.323807940765255e-06,
"loss": 2.0558319091796875,
"step": 2548
},
{
"epoch": 1.2355792535142995,
"grad_norm": 10.062311808061509,
"learning_rate": 7.321310552602595e-06,
"loss": 1.3792788982391357,
"step": 2549
},
{
"epoch": 1.2360639844886088,
"grad_norm": 11.654604520011823,
"learning_rate": 7.3188124259873946e-06,
"loss": 1.6507569551467896,
"step": 2550
},
{
"epoch": 1.236548715462918,
"grad_norm": 11.055155027160398,
"learning_rate": 7.3163135617143555e-06,
"loss": 1.7571851015090942,
"step": 2551
},
{
"epoch": 1.2370334464372275,
"grad_norm": 13.117390322989692,
"learning_rate": 7.313813960578411e-06,
"loss": 1.808323860168457,
"step": 2552
},
{
"epoch": 1.2375181774115367,
"grad_norm": 10.767258358188915,
"learning_rate": 7.311313623374733e-06,
"loss": 1.6692733764648438,
"step": 2553
},
{
"epoch": 1.238002908385846,
"grad_norm": 11.453138748382177,
"learning_rate": 7.308812550898724e-06,
"loss": 1.7368632555007935,
"step": 2554
},
{
"epoch": 1.2384876393601552,
"grad_norm": 15.42702403057548,
"learning_rate": 7.306310743946024e-06,
"loss": 1.2626579999923706,
"step": 2555
},
{
"epoch": 1.2389723703344644,
"grad_norm": 15.652050393052024,
"learning_rate": 7.303808203312505e-06,
"loss": 1.4444985389709473,
"step": 2556
},
{
"epoch": 1.2394571013087736,
"grad_norm": 7.99137578476542,
"learning_rate": 7.301304929794271e-06,
"loss": 1.4109306335449219,
"step": 2557
},
{
"epoch": 1.2399418322830829,
"grad_norm": 8.144282031744165,
"learning_rate": 7.298800924187661e-06,
"loss": 1.3704736232757568,
"step": 2558
},
{
"epoch": 1.240426563257392,
"grad_norm": 21.17950573219104,
"learning_rate": 7.296296187289245e-06,
"loss": 2.865687370300293,
"step": 2559
},
{
"epoch": 1.2409112942317013,
"grad_norm": 9.695647207921262,
"learning_rate": 7.29379071989583e-06,
"loss": 1.3086566925048828,
"step": 2560
},
{
"epoch": 1.2413960252060106,
"grad_norm": 10.151067001733368,
"learning_rate": 7.2912845228044496e-06,
"loss": 1.4411911964416504,
"step": 2561
},
{
"epoch": 1.24188075618032,
"grad_norm": 10.34754691424565,
"learning_rate": 7.2887775968123715e-06,
"loss": 1.9956254959106445,
"step": 2562
},
{
"epoch": 1.2423654871546292,
"grad_norm": 9.290750237427613,
"learning_rate": 7.286269942717099e-06,
"loss": 1.4846282005310059,
"step": 2563
},
{
"epoch": 1.2428502181289385,
"grad_norm": 13.887946141005255,
"learning_rate": 7.283761561316363e-06,
"loss": 2.0276951789855957,
"step": 2564
},
{
"epoch": 1.2433349491032477,
"grad_norm": 14.589084246757531,
"learning_rate": 7.281252453408125e-06,
"loss": 1.9211995601654053,
"step": 2565
},
{
"epoch": 1.243819680077557,
"grad_norm": 7.244023654589985,
"learning_rate": 7.27874261979058e-06,
"loss": 2.1164982318878174,
"step": 2566
},
{
"epoch": 1.2443044110518662,
"grad_norm": 8.827962056874057,
"learning_rate": 7.276232061262156e-06,
"loss": 1.8963249921798706,
"step": 2567
},
{
"epoch": 1.2447891420261754,
"grad_norm": 19.030475149097445,
"learning_rate": 7.273720778621505e-06,
"loss": 0.9618524312973022,
"step": 2568
},
{
"epoch": 1.2452738730004846,
"grad_norm": 11.934288117492935,
"learning_rate": 7.271208772667516e-06,
"loss": 2.1178925037384033,
"step": 2569
},
{
"epoch": 1.245758603974794,
"grad_norm": 12.3535490461203,
"learning_rate": 7.268696044199305e-06,
"loss": 1.3622890710830688,
"step": 2570
},
{
"epoch": 1.2462433349491033,
"grad_norm": 9.56432324060198,
"learning_rate": 7.266182594016217e-06,
"loss": 1.7005566358566284,
"step": 2571
},
{
"epoch": 1.2467280659234126,
"grad_norm": 7.767605784182577,
"learning_rate": 7.263668422917829e-06,
"loss": 1.1043100357055664,
"step": 2572
},
{
"epoch": 1.2472127968977218,
"grad_norm": 14.797230384538869,
"learning_rate": 7.261153531703944e-06,
"loss": 3.3799712657928467,
"step": 2573
},
{
"epoch": 1.247697527872031,
"grad_norm": 12.41029585297352,
"learning_rate": 7.2586379211746e-06,
"loss": 2.5852293968200684,
"step": 2574
},
{
"epoch": 1.2481822588463403,
"grad_norm": 16.185810108701762,
"learning_rate": 7.2561215921300545e-06,
"loss": 1.7975380420684814,
"step": 2575
},
{
"epoch": 1.2486669898206495,
"grad_norm": 18.79180833003568,
"learning_rate": 7.253604545370804e-06,
"loss": 1.3364269733428955,
"step": 2576
},
{
"epoch": 1.2491517207949587,
"grad_norm": 12.148380972614213,
"learning_rate": 7.2510867816975665e-06,
"loss": 1.4247151613235474,
"step": 2577
},
{
"epoch": 1.249636451769268,
"grad_norm": 15.533670964928078,
"learning_rate": 7.248568301911291e-06,
"loss": 1.531552791595459,
"step": 2578
},
{
"epoch": 1.2501211827435772,
"grad_norm": 6.9189119075384475,
"learning_rate": 7.246049106813151e-06,
"loss": 1.5629932880401611,
"step": 2579
},
{
"epoch": 1.2506059137178867,
"grad_norm": 14.399773858108983,
"learning_rate": 7.243529197204552e-06,
"loss": 1.3156144618988037,
"step": 2580
},
{
"epoch": 1.2510906446921959,
"grad_norm": 11.732302801540586,
"learning_rate": 7.241008573887124e-06,
"loss": 1.4003751277923584,
"step": 2581
},
{
"epoch": 1.2515753756665051,
"grad_norm": 7.029227550582684,
"learning_rate": 7.238487237662723e-06,
"loss": 1.7983324527740479,
"step": 2582
},
{
"epoch": 1.2520601066408144,
"grad_norm": 12.430436497066708,
"learning_rate": 7.235965189333438e-06,
"loss": 1.5199651718139648,
"step": 2583
},
{
"epoch": 1.2525448376151236,
"grad_norm": 8.768292657309011,
"learning_rate": 7.233442429701575e-06,
"loss": 1.0879511833190918,
"step": 2584
},
{
"epoch": 1.2530295685894328,
"grad_norm": 17.79364605141695,
"learning_rate": 7.230918959569675e-06,
"loss": 2.0632271766662598,
"step": 2585
},
{
"epoch": 1.253514299563742,
"grad_norm": 10.57255218680035,
"learning_rate": 7.228394779740498e-06,
"loss": 1.895407795906067,
"step": 2586
},
{
"epoch": 1.2539990305380515,
"grad_norm": 9.288834678991734,
"learning_rate": 7.2258698910170345e-06,
"loss": 1.437435507774353,
"step": 2587
},
{
"epoch": 1.2544837615123607,
"grad_norm": 8.519431816132794,
"learning_rate": 7.223344294202501e-06,
"loss": 1.6811978816986084,
"step": 2588
},
{
"epoch": 1.25496849248667,
"grad_norm": 17.27174750604752,
"learning_rate": 7.220817990100335e-06,
"loss": 1.3489437103271484,
"step": 2589
},
{
"epoch": 1.2554532234609792,
"grad_norm": 8.220024878952495,
"learning_rate": 7.218290979514202e-06,
"loss": 1.8917100429534912,
"step": 2590
},
{
"epoch": 1.2559379544352884,
"grad_norm": 10.589525672678526,
"learning_rate": 7.21576326324799e-06,
"loss": 1.368747353553772,
"step": 2591
},
{
"epoch": 1.2564226854095977,
"grad_norm": 8.130840531693586,
"learning_rate": 7.213234842105816e-06,
"loss": 1.1382763385772705,
"step": 2592
},
{
"epoch": 1.256907416383907,
"grad_norm": 9.55372378590885,
"learning_rate": 7.210705716892018e-06,
"loss": 1.1645352840423584,
"step": 2593
},
{
"epoch": 1.2573921473582161,
"grad_norm": 9.21518116277268,
"learning_rate": 7.208175888411155e-06,
"loss": 1.5871660709381104,
"step": 2594
},
{
"epoch": 1.2578768783325254,
"grad_norm": 10.36723914427655,
"learning_rate": 7.205645357468016e-06,
"loss": 1.190037727355957,
"step": 2595
},
{
"epoch": 1.2583616093068346,
"grad_norm": 12.52746849571927,
"learning_rate": 7.203114124867612e-06,
"loss": 1.6740703582763672,
"step": 2596
},
{
"epoch": 1.2588463402811438,
"grad_norm": 13.635196789750733,
"learning_rate": 7.200582191415171e-06,
"loss": 2.010629415512085,
"step": 2597
},
{
"epoch": 1.2593310712554533,
"grad_norm": 12.988873933045182,
"learning_rate": 7.198049557916151e-06,
"loss": 1.392869234085083,
"step": 2598
},
{
"epoch": 1.2598158022297625,
"grad_norm": 11.38699455762809,
"learning_rate": 7.195516225176231e-06,
"loss": 2.2210335731506348,
"step": 2599
},
{
"epoch": 1.2603005332040718,
"grad_norm": 10.418339133998487,
"learning_rate": 7.192982194001312e-06,
"loss": 0.9275608062744141,
"step": 2600
},
{
"epoch": 1.260785264178381,
"grad_norm": 12.078988232889593,
"learning_rate": 7.1904474651975166e-06,
"loss": 1.0268757343292236,
"step": 2601
},
{
"epoch": 1.2612699951526902,
"grad_norm": 16.16087038120512,
"learning_rate": 7.187912039571188e-06,
"loss": 1.5950409173965454,
"step": 2602
},
{
"epoch": 1.2617547261269995,
"grad_norm": 19.6194710904242,
"learning_rate": 7.185375917928894e-06,
"loss": 1.7503864765167236,
"step": 2603
},
{
"epoch": 1.2622394571013087,
"grad_norm": 12.09373696376966,
"learning_rate": 7.182839101077423e-06,
"loss": 1.523659110069275,
"step": 2604
},
{
"epoch": 1.2627241880756181,
"grad_norm": 10.036174197629835,
"learning_rate": 7.180301589823784e-06,
"loss": 1.6703938245773315,
"step": 2605
},
{
"epoch": 1.2632089190499274,
"grad_norm": 11.051353427905987,
"learning_rate": 7.177763384975207e-06,
"loss": 1.9682910442352295,
"step": 2606
},
{
"epoch": 1.2636936500242366,
"grad_norm": 12.547865929167802,
"learning_rate": 7.175224487339143e-06,
"loss": 1.790818214416504,
"step": 2607
},
{
"epoch": 1.2641783809985458,
"grad_norm": 16.82887246700537,
"learning_rate": 7.1726848977232625e-06,
"loss": 1.5245842933654785,
"step": 2608
},
{
"epoch": 1.264663111972855,
"grad_norm": 26.136963495164203,
"learning_rate": 7.170144616935456e-06,
"loss": 1.844719409942627,
"step": 2609
},
{
"epoch": 1.2651478429471643,
"grad_norm": 11.262941428159966,
"learning_rate": 7.167603645783835e-06,
"loss": 1.3185977935791016,
"step": 2610
},
{
"epoch": 1.2656325739214735,
"grad_norm": 10.189337957916923,
"learning_rate": 7.165061985076731e-06,
"loss": 1.6360523700714111,
"step": 2611
},
{
"epoch": 1.2661173048957828,
"grad_norm": 12.683430400518374,
"learning_rate": 7.162519635622695e-06,
"loss": 1.2850764989852905,
"step": 2612
},
{
"epoch": 1.266602035870092,
"grad_norm": 16.27874091949543,
"learning_rate": 7.159976598230495e-06,
"loss": 1.6878960132598877,
"step": 2613
},
{
"epoch": 1.2670867668444012,
"grad_norm": 6.741246252932207,
"learning_rate": 7.157432873709118e-06,
"loss": 1.4250261783599854,
"step": 2614
},
{
"epoch": 1.2675714978187105,
"grad_norm": 7.108884757816339,
"learning_rate": 7.154888462867771e-06,
"loss": 1.1605195999145508,
"step": 2615
},
{
"epoch": 1.26805622879302,
"grad_norm": 10.534532700837433,
"learning_rate": 7.15234336651588e-06,
"loss": 1.4817969799041748,
"step": 2616
},
{
"epoch": 1.2685409597673292,
"grad_norm": 7.483297040999984,
"learning_rate": 7.149797585463087e-06,
"loss": 1.0304006338119507,
"step": 2617
},
{
"epoch": 1.2690256907416384,
"grad_norm": 9.943274641402873,
"learning_rate": 7.147251120519254e-06,
"loss": 1.2823541164398193,
"step": 2618
},
{
"epoch": 1.2695104217159476,
"grad_norm": 16.95785920584304,
"learning_rate": 7.144703972494459e-06,
"loss": 1.6593437194824219,
"step": 2619
},
{
"epoch": 1.2699951526902569,
"grad_norm": 9.26295109866919,
"learning_rate": 7.142156142198997e-06,
"loss": 1.6021984815597534,
"step": 2620
},
{
"epoch": 1.270479883664566,
"grad_norm": 13.523778379976793,
"learning_rate": 7.139607630443381e-06,
"loss": 2.064095973968506,
"step": 2621
},
{
"epoch": 1.2709646146388756,
"grad_norm": 10.341215766686004,
"learning_rate": 7.13705843803834e-06,
"loss": 1.24155592918396,
"step": 2622
},
{
"epoch": 1.2714493456131848,
"grad_norm": 9.588229219581061,
"learning_rate": 7.134508565794822e-06,
"loss": 2.2093505859375,
"step": 2623
},
{
"epoch": 1.271934076587494,
"grad_norm": 13.760043543752232,
"learning_rate": 7.1319580145239865e-06,
"loss": 1.904679775238037,
"step": 2624
},
{
"epoch": 1.2724188075618033,
"grad_norm": 14.234018918914407,
"learning_rate": 7.129406785037214e-06,
"loss": 1.6944365501403809,
"step": 2625
},
{
"epoch": 1.2729035385361125,
"grad_norm": 11.892976833718091,
"learning_rate": 7.126854878146099e-06,
"loss": 1.6468071937561035,
"step": 2626
},
{
"epoch": 1.2733882695104217,
"grad_norm": 12.141129055081961,
"learning_rate": 7.124302294662448e-06,
"loss": 1.6316359043121338,
"step": 2627
},
{
"epoch": 1.273873000484731,
"grad_norm": 16.06235695536144,
"learning_rate": 7.12174903539829e-06,
"loss": 1.3245782852172852,
"step": 2628
},
{
"epoch": 1.2743577314590402,
"grad_norm": 13.07937123080727,
"learning_rate": 7.11919510116586e-06,
"loss": 1.3136610984802246,
"step": 2629
},
{
"epoch": 1.2748424624333494,
"grad_norm": 7.5188698480431135,
"learning_rate": 7.116640492777617e-06,
"loss": 1.3294055461883545,
"step": 2630
},
{
"epoch": 1.2753271934076587,
"grad_norm": 13.665831113536694,
"learning_rate": 7.114085211046227e-06,
"loss": 1.567731261253357,
"step": 2631
},
{
"epoch": 1.2758119243819679,
"grad_norm": 6.907709495174773,
"learning_rate": 7.111529256784575e-06,
"loss": 1.1404545307159424,
"step": 2632
},
{
"epoch": 1.2762966553562773,
"grad_norm": 10.949545146349967,
"learning_rate": 7.108972630805756e-06,
"loss": 1.1609227657318115,
"step": 2633
},
{
"epoch": 1.2767813863305866,
"grad_norm": 10.529190464426371,
"learning_rate": 7.1064153339230845e-06,
"loss": 2.5801687240600586,
"step": 2634
},
{
"epoch": 1.2772661173048958,
"grad_norm": 21.02113610959195,
"learning_rate": 7.103857366950081e-06,
"loss": 3.0269856452941895,
"step": 2635
},
{
"epoch": 1.277750848279205,
"grad_norm": 9.5484235821932,
"learning_rate": 7.101298730700483e-06,
"loss": 1.4333648681640625,
"step": 2636
},
{
"epoch": 1.2782355792535143,
"grad_norm": 8.45048546686159,
"learning_rate": 7.098739425988244e-06,
"loss": 1.7441418170928955,
"step": 2637
},
{
"epoch": 1.2787203102278235,
"grad_norm": 9.515675839059897,
"learning_rate": 7.096179453627524e-06,
"loss": 1.9722498655319214,
"step": 2638
},
{
"epoch": 1.2792050412021327,
"grad_norm": 15.464260136510955,
"learning_rate": 7.093618814432699e-06,
"loss": 1.8233509063720703,
"step": 2639
},
{
"epoch": 1.2796897721764422,
"grad_norm": 12.042963115652606,
"learning_rate": 7.091057509218357e-06,
"loss": 1.4273242950439453,
"step": 2640
},
{
"epoch": 1.2801745031507514,
"grad_norm": 8.966507475208878,
"learning_rate": 7.088495538799298e-06,
"loss": 1.4522364139556885,
"step": 2641
},
{
"epoch": 1.2806592341250607,
"grad_norm": 11.239146572166046,
"learning_rate": 7.085932903990534e-06,
"loss": 1.6673474311828613,
"step": 2642
},
{
"epoch": 1.28114396509937,
"grad_norm": 7.894082577383908,
"learning_rate": 7.083369605607282e-06,
"loss": 1.5347341299057007,
"step": 2643
},
{
"epoch": 1.2816286960736791,
"grad_norm": 8.206846337381274,
"learning_rate": 7.080805644464979e-06,
"loss": 1.617067575454712,
"step": 2644
},
{
"epoch": 1.2821134270479884,
"grad_norm": 10.311012001964645,
"learning_rate": 7.078241021379272e-06,
"loss": 1.794668436050415,
"step": 2645
},
{
"epoch": 1.2825981580222976,
"grad_norm": 11.658937701875107,
"learning_rate": 7.075675737166011e-06,
"loss": 2.024739980697632,
"step": 2646
},
{
"epoch": 1.2830828889966068,
"grad_norm": 10.74372972453181,
"learning_rate": 7.073109792641264e-06,
"loss": 2.4990785121917725,
"step": 2647
},
{
"epoch": 1.283567619970916,
"grad_norm": 12.50584596413547,
"learning_rate": 7.070543188621304e-06,
"loss": 2.620846748352051,
"step": 2648
},
{
"epoch": 1.2840523509452253,
"grad_norm": 10.577481242862339,
"learning_rate": 7.0679759259226185e-06,
"loss": 2.0838005542755127,
"step": 2649
},
{
"epoch": 1.2845370819195345,
"grad_norm": 11.977258707799841,
"learning_rate": 7.065408005361902e-06,
"loss": 1.4354901313781738,
"step": 2650
},
{
"epoch": 1.285021812893844,
"grad_norm": 7.823754219930427,
"learning_rate": 7.062839427756056e-06,
"loss": 1.478150725364685,
"step": 2651
},
{
"epoch": 1.2855065438681532,
"grad_norm": 14.435702559857502,
"learning_rate": 7.060270193922194e-06,
"loss": 1.5867103338241577,
"step": 2652
},
{
"epoch": 1.2859912748424625,
"grad_norm": 9.646649073176759,
"learning_rate": 7.05770030467764e-06,
"loss": 2.129995584487915,
"step": 2653
},
{
"epoch": 1.2864760058167717,
"grad_norm": 14.520014155407832,
"learning_rate": 7.05512976083992e-06,
"loss": 1.6967542171478271,
"step": 2654
},
{
"epoch": 1.286960736791081,
"grad_norm": 17.600979660482757,
"learning_rate": 7.052558563226777e-06,
"loss": 1.3444368839263916,
"step": 2655
},
{
"epoch": 1.2874454677653902,
"grad_norm": 11.194541436431905,
"learning_rate": 7.0499867126561536e-06,
"loss": 1.1984986066818237,
"step": 2656
},
{
"epoch": 1.2879301987396996,
"grad_norm": 13.39491469320762,
"learning_rate": 7.047414209946205e-06,
"loss": 1.8186016082763672,
"step": 2657
},
{
"epoch": 1.2884149297140088,
"grad_norm": 13.682097950108263,
"learning_rate": 7.044841055915294e-06,
"loss": 1.8331794738769531,
"step": 2658
},
{
"epoch": 1.288899660688318,
"grad_norm": 28.771222145344893,
"learning_rate": 7.042267251381987e-06,
"loss": 2.2597715854644775,
"step": 2659
},
{
"epoch": 1.2893843916626273,
"grad_norm": 9.291654935268989,
"learning_rate": 7.039692797165061e-06,
"loss": 1.184234619140625,
"step": 2660
},
{
"epoch": 1.2898691226369365,
"grad_norm": 9.076491751090261,
"learning_rate": 7.037117694083497e-06,
"loss": 1.2205357551574707,
"step": 2661
},
{
"epoch": 1.2903538536112458,
"grad_norm": 7.729607497857864,
"learning_rate": 7.0345419429564874e-06,
"loss": 1.520797848701477,
"step": 2662
},
{
"epoch": 1.290838584585555,
"grad_norm": 8.159711787436866,
"learning_rate": 7.031965544603423e-06,
"loss": 1.5232294797897339,
"step": 2663
},
{
"epoch": 1.2913233155598642,
"grad_norm": 8.351427423752904,
"learning_rate": 7.0293884998439064e-06,
"loss": 1.5022087097167969,
"step": 2664
},
{
"epoch": 1.2918080465341735,
"grad_norm": 9.335362225925481,
"learning_rate": 7.026810809497744e-06,
"loss": 1.001664638519287,
"step": 2665
},
{
"epoch": 1.2922927775084827,
"grad_norm": 8.57398027836293,
"learning_rate": 7.0242324743849454e-06,
"loss": 1.4680243730545044,
"step": 2666
},
{
"epoch": 1.292777508482792,
"grad_norm": 13.252244448992098,
"learning_rate": 7.021653495325731e-06,
"loss": 1.4887704849243164,
"step": 2667
},
{
"epoch": 1.2932622394571012,
"grad_norm": 12.580794727484443,
"learning_rate": 7.019073873140519e-06,
"loss": 1.9645668268203735,
"step": 2668
},
{
"epoch": 1.2937469704314106,
"grad_norm": 7.567462126302967,
"learning_rate": 7.01649360864994e-06,
"loss": 1.5926556587219238,
"step": 2669
},
{
"epoch": 1.2942317014057199,
"grad_norm": 14.837911778045196,
"learning_rate": 7.013912702674821e-06,
"loss": 3.218324661254883,
"step": 2670
},
{
"epoch": 1.294716432380029,
"grad_norm": 13.499031176566923,
"learning_rate": 7.011331156036197e-06,
"loss": 1.7180119752883911,
"step": 2671
},
{
"epoch": 1.2952011633543383,
"grad_norm": 7.566812868774241,
"learning_rate": 7.00874896955531e-06,
"loss": 1.5831120014190674,
"step": 2672
},
{
"epoch": 1.2956858943286476,
"grad_norm": 11.898229501059692,
"learning_rate": 7.006166144053597e-06,
"loss": 1.1447322368621826,
"step": 2673
},
{
"epoch": 1.2961706253029568,
"grad_norm": 11.869305180576143,
"learning_rate": 7.003582680352707e-06,
"loss": 1.249545693397522,
"step": 2674
},
{
"epoch": 1.2966553562772662,
"grad_norm": 11.752535576978797,
"learning_rate": 7.000998579274487e-06,
"loss": 1.6342761516571045,
"step": 2675
},
{
"epoch": 1.2971400872515755,
"grad_norm": 8.891806813500386,
"learning_rate": 6.99841384164099e-06,
"loss": 1.572068691253662,
"step": 2676
},
{
"epoch": 1.2976248182258847,
"grad_norm": 13.065414057217211,
"learning_rate": 6.995828468274467e-06,
"loss": 1.4865103960037231,
"step": 2677
},
{
"epoch": 1.298109549200194,
"grad_norm": 10.059200403218382,
"learning_rate": 6.993242459997374e-06,
"loss": 1.3010653257369995,
"step": 2678
},
{
"epoch": 1.2985942801745032,
"grad_norm": 15.098844500341567,
"learning_rate": 6.990655817632373e-06,
"loss": 1.1240870952606201,
"step": 2679
},
{
"epoch": 1.2990790111488124,
"grad_norm": 8.821223096422548,
"learning_rate": 6.988068542002316e-06,
"loss": 1.1776816844940186,
"step": 2680
},
{
"epoch": 1.2995637421231216,
"grad_norm": 13.272662666194941,
"learning_rate": 6.985480633930271e-06,
"loss": 1.4182302951812744,
"step": 2681
},
{
"epoch": 1.3000484730974309,
"grad_norm": 13.485024899564266,
"learning_rate": 6.982892094239498e-06,
"loss": 1.093178153038025,
"step": 2682
},
{
"epoch": 1.3005332040717401,
"grad_norm": 14.632577382849247,
"learning_rate": 6.980302923753459e-06,
"loss": 1.5015352964401245,
"step": 2683
},
{
"epoch": 1.3010179350460493,
"grad_norm": 14.421357161454706,
"learning_rate": 6.97771312329582e-06,
"loss": 1.8231858015060425,
"step": 2684
},
{
"epoch": 1.3015026660203586,
"grad_norm": 13.391060442826717,
"learning_rate": 6.9751226936904415e-06,
"loss": 1.5935288667678833,
"step": 2685
},
{
"epoch": 1.301987396994668,
"grad_norm": 11.607282893193842,
"learning_rate": 6.972531635761391e-06,
"loss": 1.7090460062026978,
"step": 2686
},
{
"epoch": 1.3024721279689773,
"grad_norm": 11.695695510574874,
"learning_rate": 6.969939950332933e-06,
"loss": 2.0531232357025146,
"step": 2687
},
{
"epoch": 1.3029568589432865,
"grad_norm": 8.301414032488816,
"learning_rate": 6.967347638229531e-06,
"loss": 1.2256492376327515,
"step": 2688
},
{
"epoch": 1.3034415899175957,
"grad_norm": 10.784406691668888,
"learning_rate": 6.964754700275845e-06,
"loss": 2.085047721862793,
"step": 2689
},
{
"epoch": 1.303926320891905,
"grad_norm": 9.994352882107014,
"learning_rate": 6.9621611372967436e-06,
"loss": 1.6879757642745972,
"step": 2690
},
{
"epoch": 1.3044110518662142,
"grad_norm": 11.067388125566623,
"learning_rate": 6.959566950117283e-06,
"loss": 1.337203860282898,
"step": 2691
},
{
"epoch": 1.3048957828405234,
"grad_norm": 9.834584862867402,
"learning_rate": 6.956972139562722e-06,
"loss": 1.662172555923462,
"step": 2692
},
{
"epoch": 1.3053805138148329,
"grad_norm": 10.173437939282817,
"learning_rate": 6.954376706458523e-06,
"loss": 1.670196294784546,
"step": 2693
},
{
"epoch": 1.3058652447891421,
"grad_norm": 9.420577061144174,
"learning_rate": 6.95178065163034e-06,
"loss": 1.5795142650604248,
"step": 2694
},
{
"epoch": 1.3063499757634514,
"grad_norm": 10.451833142648834,
"learning_rate": 6.949183975904027e-06,
"loss": 1.4017903804779053,
"step": 2695
},
{
"epoch": 1.3068347067377606,
"grad_norm": 11.944298080335805,
"learning_rate": 6.946586680105634e-06,
"loss": 1.754650354385376,
"step": 2696
},
{
"epoch": 1.3073194377120698,
"grad_norm": 6.929785129717615,
"learning_rate": 6.943988765061411e-06,
"loss": 1.2194511890411377,
"step": 2697
},
{
"epoch": 1.307804168686379,
"grad_norm": 10.473147307528865,
"learning_rate": 6.941390231597805e-06,
"loss": 1.0666606426239014,
"step": 2698
},
{
"epoch": 1.3082888996606883,
"grad_norm": 9.89043696483036,
"learning_rate": 6.938791080541455e-06,
"loss": 1.3168085813522339,
"step": 2699
},
{
"epoch": 1.3087736306349975,
"grad_norm": 13.55080142019911,
"learning_rate": 6.9361913127192026e-06,
"loss": 1.3136096000671387,
"step": 2700
},
{
"epoch": 1.3092583616093068,
"grad_norm": 10.119792740517516,
"learning_rate": 6.9335909289580825e-06,
"loss": 1.2397469282150269,
"step": 2701
},
{
"epoch": 1.309743092583616,
"grad_norm": 8.99382831536076,
"learning_rate": 6.930989930085325e-06,
"loss": 1.447587251663208,
"step": 2702
},
{
"epoch": 1.3102278235579252,
"grad_norm": 16.590893768031748,
"learning_rate": 6.928388316928354e-06,
"loss": 1.5321955680847168,
"step": 2703
},
{
"epoch": 1.3107125545322347,
"grad_norm": 11.2526154304058,
"learning_rate": 6.925786090314796e-06,
"loss": 1.1240001916885376,
"step": 2704
},
{
"epoch": 1.311197285506544,
"grad_norm": 14.3639821921088,
"learning_rate": 6.923183251072468e-06,
"loss": 1.1779067516326904,
"step": 2705
},
{
"epoch": 1.3116820164808531,
"grad_norm": 9.722313121171538,
"learning_rate": 6.920579800029381e-06,
"loss": 1.067950963973999,
"step": 2706
},
{
"epoch": 1.3121667474551624,
"grad_norm": 9.530226280549314,
"learning_rate": 6.91797573801374e-06,
"loss": 0.7862571477890015,
"step": 2707
},
{
"epoch": 1.3126514784294716,
"grad_norm": 11.336311731148866,
"learning_rate": 6.9153710658539476e-06,
"loss": 1.874943494796753,
"step": 2708
},
{
"epoch": 1.3131362094037808,
"grad_norm": 9.37637265223162,
"learning_rate": 6.912765784378601e-06,
"loss": 1.2833199501037598,
"step": 2709
},
{
"epoch": 1.3136209403780903,
"grad_norm": 11.199460054829938,
"learning_rate": 6.910159894416485e-06,
"loss": 1.2328799962997437,
"step": 2710
},
{
"epoch": 1.3141056713523995,
"grad_norm": 12.226493628092456,
"learning_rate": 6.907553396796585e-06,
"loss": 1.4243240356445312,
"step": 2711
},
{
"epoch": 1.3145904023267088,
"grad_norm": 20.648606964542047,
"learning_rate": 6.904946292348076e-06,
"loss": 2.0109925270080566,
"step": 2712
},
{
"epoch": 1.315075133301018,
"grad_norm": 12.326329229336737,
"learning_rate": 6.902338581900328e-06,
"loss": 1.183553695678711,
"step": 2713
},
{
"epoch": 1.3155598642753272,
"grad_norm": 9.61568721161442,
"learning_rate": 6.899730266282903e-06,
"loss": 1.20918607711792,
"step": 2714
},
{
"epoch": 1.3160445952496365,
"grad_norm": 15.038860495468494,
"learning_rate": 6.897121346325552e-06,
"loss": 1.4947562217712402,
"step": 2715
},
{
"epoch": 1.3165293262239457,
"grad_norm": 8.645327089757734,
"learning_rate": 6.8945118228582245e-06,
"loss": 1.7168762683868408,
"step": 2716
},
{
"epoch": 1.317014057198255,
"grad_norm": 12.619893429947313,
"learning_rate": 6.891901696711058e-06,
"loss": 1.9261353015899658,
"step": 2717
},
{
"epoch": 1.3174987881725642,
"grad_norm": 7.795274317931427,
"learning_rate": 6.889290968714384e-06,
"loss": 1.6652623414993286,
"step": 2718
},
{
"epoch": 1.3179835191468734,
"grad_norm": 11.639054514075951,
"learning_rate": 6.886679639698723e-06,
"loss": 1.3053873777389526,
"step": 2719
},
{
"epoch": 1.3184682501211826,
"grad_norm": 10.843820064925294,
"learning_rate": 6.884067710494788e-06,
"loss": 2.247399091720581,
"step": 2720
},
{
"epoch": 1.318952981095492,
"grad_norm": 11.98483078881408,
"learning_rate": 6.881455181933483e-06,
"loss": 2.0917577743530273,
"step": 2721
},
{
"epoch": 1.3194377120698013,
"grad_norm": 9.282918703698634,
"learning_rate": 6.8788420548459e-06,
"loss": 1.0688282251358032,
"step": 2722
},
{
"epoch": 1.3199224430441105,
"grad_norm": 10.104477819449023,
"learning_rate": 6.876228330063328e-06,
"loss": 1.1787288188934326,
"step": 2723
},
{
"epoch": 1.3204071740184198,
"grad_norm": 10.620548617265497,
"learning_rate": 6.87361400841724e-06,
"loss": 1.5302374362945557,
"step": 2724
},
{
"epoch": 1.320891904992729,
"grad_norm": 8.74554170691658,
"learning_rate": 6.870999090739301e-06,
"loss": 2.240323543548584,
"step": 2725
},
{
"epoch": 1.3213766359670382,
"grad_norm": 10.812887144188634,
"learning_rate": 6.868383577861366e-06,
"loss": 1.7070324420928955,
"step": 2726
},
{
"epoch": 1.3218613669413475,
"grad_norm": 10.947248147284773,
"learning_rate": 6.865767470615477e-06,
"loss": 1.4400510787963867,
"step": 2727
},
{
"epoch": 1.322346097915657,
"grad_norm": 12.889742788504314,
"learning_rate": 6.86315076983387e-06,
"loss": 1.4813041687011719,
"step": 2728
},
{
"epoch": 1.3228308288899662,
"grad_norm": 10.652109926252239,
"learning_rate": 6.860533476348964e-06,
"loss": 2.1611900329589844,
"step": 2729
},
{
"epoch": 1.3233155598642754,
"grad_norm": 11.914567613553988,
"learning_rate": 6.857915590993371e-06,
"loss": 0.5627620220184326,
"step": 2730
},
{
"epoch": 1.3238002908385846,
"grad_norm": 6.781757982143423,
"learning_rate": 6.855297114599888e-06,
"loss": 1.3556296825408936,
"step": 2731
},
{
"epoch": 1.3242850218128939,
"grad_norm": 16.232374162368316,
"learning_rate": 6.852678048001502e-06,
"loss": 1.5020642280578613,
"step": 2732
},
{
"epoch": 1.324769752787203,
"grad_norm": 6.584273739136492,
"learning_rate": 6.850058392031389e-06,
"loss": 1.3404439687728882,
"step": 2733
},
{
"epoch": 1.3252544837615123,
"grad_norm": 18.762701741722246,
"learning_rate": 6.847438147522909e-06,
"loss": 1.3078633546829224,
"step": 2734
},
{
"epoch": 1.3257392147358216,
"grad_norm": 7.941140607823284,
"learning_rate": 6.844817315309611e-06,
"loss": 1.1651277542114258,
"step": 2735
},
{
"epoch": 1.3262239457101308,
"grad_norm": 17.194751899718078,
"learning_rate": 6.842195896225232e-06,
"loss": 2.8044919967651367,
"step": 2736
},
{
"epoch": 1.32670867668444,
"grad_norm": 9.905195388091856,
"learning_rate": 6.8395738911036954e-06,
"loss": 1.9220428466796875,
"step": 2737
},
{
"epoch": 1.3271934076587493,
"grad_norm": 8.442564859090888,
"learning_rate": 6.8369513007791076e-06,
"loss": 1.3444184064865112,
"step": 2738
},
{
"epoch": 1.3276781386330587,
"grad_norm": 15.276998145840759,
"learning_rate": 6.834328126085768e-06,
"loss": 1.689761996269226,
"step": 2739
},
{
"epoch": 1.328162869607368,
"grad_norm": 8.011339166007401,
"learning_rate": 6.831704367858154e-06,
"loss": 1.3979251384735107,
"step": 2740
},
{
"epoch": 1.3286476005816772,
"grad_norm": 9.44427705577559,
"learning_rate": 6.829080026930933e-06,
"loss": 1.2098438739776611,
"step": 2741
},
{
"epoch": 1.3291323315559864,
"grad_norm": 8.478324980177579,
"learning_rate": 6.8264551041389595e-06,
"loss": 1.4103032350540161,
"step": 2742
},
{
"epoch": 1.3296170625302957,
"grad_norm": 8.597314058999801,
"learning_rate": 6.8238296003172685e-06,
"loss": 1.199397087097168,
"step": 2743
},
{
"epoch": 1.330101793504605,
"grad_norm": 15.3170693333678,
"learning_rate": 6.821203516301083e-06,
"loss": 1.8485050201416016,
"step": 2744
},
{
"epoch": 1.3305865244789141,
"grad_norm": 14.74704500992352,
"learning_rate": 6.818576852925809e-06,
"loss": 2.1179556846618652,
"step": 2745
},
{
"epoch": 1.3310712554532236,
"grad_norm": 15.203531088775803,
"learning_rate": 6.81594961102704e-06,
"loss": 1.574703335762024,
"step": 2746
},
{
"epoch": 1.3315559864275328,
"grad_norm": 11.658209779576566,
"learning_rate": 6.813321791440549e-06,
"loss": 1.8958122730255127,
"step": 2747
},
{
"epoch": 1.332040717401842,
"grad_norm": 10.878198237237822,
"learning_rate": 6.8106933950022935e-06,
"loss": 1.4493790864944458,
"step": 2748
},
{
"epoch": 1.3325254483761513,
"grad_norm": 15.129467389553119,
"learning_rate": 6.808064422548418e-06,
"loss": 1.527030348777771,
"step": 2749
},
{
"epoch": 1.3330101793504605,
"grad_norm": 24.040638674470333,
"learning_rate": 6.805434874915249e-06,
"loss": 2.0494565963745117,
"step": 2750
},
{
"epoch": 1.3334949103247697,
"grad_norm": 16.985050605502522,
"learning_rate": 6.8028047529392916e-06,
"loss": 0.8867975473403931,
"step": 2751
},
{
"epoch": 1.333979641299079,
"grad_norm": 7.221292375552481,
"learning_rate": 6.800174057457239e-06,
"loss": 1.412139892578125,
"step": 2752
},
{
"epoch": 1.3344643722733882,
"grad_norm": 14.02436409287915,
"learning_rate": 6.797542789305967e-06,
"loss": 1.081179141998291,
"step": 2753
},
{
"epoch": 1.3349491032476974,
"grad_norm": 8.194719459291123,
"learning_rate": 6.794910949322527e-06,
"loss": 2.8622887134552,
"step": 2754
},
{
"epoch": 1.3354338342220067,
"grad_norm": 18.402007053030957,
"learning_rate": 6.792278538344161e-06,
"loss": 1.1679730415344238,
"step": 2755
},
{
"epoch": 1.335918565196316,
"grad_norm": 11.7250712524423,
"learning_rate": 6.789645557208286e-06,
"loss": 1.5607802867889404,
"step": 2756
},
{
"epoch": 1.3364032961706254,
"grad_norm": 8.765154060482123,
"learning_rate": 6.7870120067525025e-06,
"loss": 1.5427156686782837,
"step": 2757
},
{
"epoch": 1.3368880271449346,
"grad_norm": 10.209264643458358,
"learning_rate": 6.784377887814596e-06,
"loss": 1.814351201057434,
"step": 2758
},
{
"epoch": 1.3373727581192438,
"grad_norm": 8.389038078816304,
"learning_rate": 6.781743201232524e-06,
"loss": 1.7629752159118652,
"step": 2759
},
{
"epoch": 1.337857489093553,
"grad_norm": 9.700359227229757,
"learning_rate": 6.779107947844434e-06,
"loss": 1.743185043334961,
"step": 2760
},
{
"epoch": 1.3383422200678623,
"grad_norm": 14.942577296915566,
"learning_rate": 6.776472128488648e-06,
"loss": 1.1754260063171387,
"step": 2761
},
{
"epoch": 1.3388269510421715,
"grad_norm": 43.05027369362208,
"learning_rate": 6.77383574400367e-06,
"loss": 2.6530070304870605,
"step": 2762
},
{
"epoch": 1.339311682016481,
"grad_norm": 25.00759469950711,
"learning_rate": 6.771198795228182e-06,
"loss": 2.1020262241363525,
"step": 2763
},
{
"epoch": 1.3397964129907902,
"grad_norm": 13.399086794225346,
"learning_rate": 6.76856128300105e-06,
"loss": 1.7257804870605469,
"step": 2764
},
{
"epoch": 1.3402811439650995,
"grad_norm": 12.923147936768343,
"learning_rate": 6.765923208161313e-06,
"loss": 2.1809120178222656,
"step": 2765
},
{
"epoch": 1.3407658749394087,
"grad_norm": 10.039090298484126,
"learning_rate": 6.763284571548196e-06,
"loss": 1.7988758087158203,
"step": 2766
},
{
"epoch": 1.341250605913718,
"grad_norm": 9.922647775990724,
"learning_rate": 6.760645374001095e-06,
"loss": 1.3121153116226196,
"step": 2767
},
{
"epoch": 1.3417353368880272,
"grad_norm": 13.070612174522305,
"learning_rate": 6.7580056163595904e-06,
"loss": 1.3418669700622559,
"step": 2768
},
{
"epoch": 1.3422200678623364,
"grad_norm": 10.996945757116132,
"learning_rate": 6.755365299463439e-06,
"loss": 2.4004647731781006,
"step": 2769
},
{
"epoch": 1.3427047988366456,
"grad_norm": 16.027056752637446,
"learning_rate": 6.752724424152575e-06,
"loss": 1.40712308883667,
"step": 2770
},
{
"epoch": 1.3431895298109549,
"grad_norm": 10.757121774249063,
"learning_rate": 6.750082991267107e-06,
"loss": 1.2487272024154663,
"step": 2771
},
{
"epoch": 1.343674260785264,
"grad_norm": 16.688846881451827,
"learning_rate": 6.747441001647331e-06,
"loss": 1.6186199188232422,
"step": 2772
},
{
"epoch": 1.3441589917595733,
"grad_norm": 14.636764108332995,
"learning_rate": 6.744798456133711e-06,
"loss": 3.4406280517578125,
"step": 2773
},
{
"epoch": 1.3446437227338828,
"grad_norm": 15.678801963214804,
"learning_rate": 6.742155355566887e-06,
"loss": 1.3716957569122314,
"step": 2774
},
{
"epoch": 1.345128453708192,
"grad_norm": 9.806668719151288,
"learning_rate": 6.739511700787683e-06,
"loss": 2.0001118183135986,
"step": 2775
},
{
"epoch": 1.3456131846825012,
"grad_norm": 10.223931859495853,
"learning_rate": 6.7368674926370925e-06,
"loss": 1.9970065355300903,
"step": 2776
},
{
"epoch": 1.3460979156568105,
"grad_norm": 11.94804992636878,
"learning_rate": 6.734222731956293e-06,
"loss": 2.106091022491455,
"step": 2777
},
{
"epoch": 1.3465826466311197,
"grad_norm": 13.549691986426257,
"learning_rate": 6.731577419586625e-06,
"loss": 1.4981154203414917,
"step": 2778
},
{
"epoch": 1.347067377605429,
"grad_norm": 9.60121285776475,
"learning_rate": 6.728931556369618e-06,
"loss": 1.5081239938735962,
"step": 2779
},
{
"epoch": 1.3475521085797382,
"grad_norm": 13.145058024708272,
"learning_rate": 6.726285143146969e-06,
"loss": 2.0256271362304688,
"step": 2780
},
{
"epoch": 1.3480368395540476,
"grad_norm": 8.929463774311907,
"learning_rate": 6.723638180760551e-06,
"loss": 1.761423110961914,
"step": 2781
},
{
"epoch": 1.3485215705283569,
"grad_norm": 11.41324403230852,
"learning_rate": 6.720990670052414e-06,
"loss": 1.3226218223571777,
"step": 2782
},
{
"epoch": 1.349006301502666,
"grad_norm": 9.40678958184366,
"learning_rate": 6.7183426118647785e-06,
"loss": 2.0823349952697754,
"step": 2783
},
{
"epoch": 1.3494910324769753,
"grad_norm": 7.945108333847448,
"learning_rate": 6.715694007040046e-06,
"loss": 1.4377546310424805,
"step": 2784
},
{
"epoch": 1.3499757634512846,
"grad_norm": 11.072442370880351,
"learning_rate": 6.713044856420781e-06,
"loss": 1.9137089252471924,
"step": 2785
},
{
"epoch": 1.3504604944255938,
"grad_norm": 11.964322296448055,
"learning_rate": 6.710395160849735e-06,
"loss": 1.5044748783111572,
"step": 2786
},
{
"epoch": 1.350945225399903,
"grad_norm": 10.008572058629088,
"learning_rate": 6.707744921169823e-06,
"loss": 1.8896678686141968,
"step": 2787
},
{
"epoch": 1.3514299563742123,
"grad_norm": 7.970487223817579,
"learning_rate": 6.705094138224135e-06,
"loss": 2.1711513996124268,
"step": 2788
},
{
"epoch": 1.3519146873485215,
"grad_norm": 9.465157680703946,
"learning_rate": 6.702442812855936e-06,
"loss": 1.4851741790771484,
"step": 2789
},
{
"epoch": 1.3523994183228307,
"grad_norm": 9.878488912310317,
"learning_rate": 6.699790945908662e-06,
"loss": 1.6764813661575317,
"step": 2790
},
{
"epoch": 1.35288414929714,
"grad_norm": 15.830052388184871,
"learning_rate": 6.697138538225923e-06,
"loss": 1.4774694442749023,
"step": 2791
},
{
"epoch": 1.3533688802714494,
"grad_norm": 23.338106233129505,
"learning_rate": 6.6944855906514976e-06,
"loss": 1.686642050743103,
"step": 2792
},
{
"epoch": 1.3538536112457586,
"grad_norm": 10.106671461245211,
"learning_rate": 6.69183210402934e-06,
"loss": 2.255929708480835,
"step": 2793
},
{
"epoch": 1.3543383422200679,
"grad_norm": 11.374016934821311,
"learning_rate": 6.689178079203574e-06,
"loss": 1.6203906536102295,
"step": 2794
},
{
"epoch": 1.3548230731943771,
"grad_norm": 10.997591173219798,
"learning_rate": 6.686523517018494e-06,
"loss": 1.8300294876098633,
"step": 2795
},
{
"epoch": 1.3553078041686863,
"grad_norm": 14.154843083146408,
"learning_rate": 6.683868418318568e-06,
"loss": 1.6836199760437012,
"step": 2796
},
{
"epoch": 1.3557925351429956,
"grad_norm": 7.431166314668852,
"learning_rate": 6.681212783948431e-06,
"loss": 1.0954937934875488,
"step": 2797
},
{
"epoch": 1.3562772661173048,
"grad_norm": 8.856657078046862,
"learning_rate": 6.678556614752891e-06,
"loss": 1.7077445983886719,
"step": 2798
},
{
"epoch": 1.3567619970916143,
"grad_norm": 13.18933338190591,
"learning_rate": 6.675899911576924e-06,
"loss": 1.4245802164077759,
"step": 2799
},
{
"epoch": 1.3572467280659235,
"grad_norm": 12.644040243662053,
"learning_rate": 6.67324267526568e-06,
"loss": 1.8433825969696045,
"step": 2800
},
{
"epoch": 1.3577314590402327,
"grad_norm": 8.522498588058232,
"learning_rate": 6.670584906664474e-06,
"loss": 1.6370255947113037,
"step": 2801
},
{
"epoch": 1.358216190014542,
"grad_norm": 7.138996240806393,
"learning_rate": 6.667926606618791e-06,
"loss": 1.5184980630874634,
"step": 2802
},
{
"epoch": 1.3587009209888512,
"grad_norm": 16.146501417417976,
"learning_rate": 6.66526777597429e-06,
"loss": 1.8438892364501953,
"step": 2803
},
{
"epoch": 1.3591856519631604,
"grad_norm": 8.074035997177232,
"learning_rate": 6.66260841557679e-06,
"loss": 1.8074979782104492,
"step": 2804
},
{
"epoch": 1.3596703829374697,
"grad_norm": 7.660291316969739,
"learning_rate": 6.6599485262722885e-06,
"loss": 2.2282111644744873,
"step": 2805
},
{
"epoch": 1.360155113911779,
"grad_norm": 11.273099160575908,
"learning_rate": 6.657288108906942e-06,
"loss": 1.6345499753952026,
"step": 2806
},
{
"epoch": 1.3606398448860881,
"grad_norm": 9.722075089361914,
"learning_rate": 6.654627164327083e-06,
"loss": 1.2074038982391357,
"step": 2807
},
{
"epoch": 1.3611245758603974,
"grad_norm": 14.15346554593844,
"learning_rate": 6.6519656933792065e-06,
"loss": 1.8371583223342896,
"step": 2808
},
{
"epoch": 1.3616093068347066,
"grad_norm": 9.302050283155152,
"learning_rate": 6.649303696909974e-06,
"loss": 1.5039016008377075,
"step": 2809
},
{
"epoch": 1.362094037809016,
"grad_norm": 15.529401541139709,
"learning_rate": 6.646641175766222e-06,
"loss": 1.3906043767929077,
"step": 2810
},
{
"epoch": 1.3625787687833253,
"grad_norm": 11.532972274098531,
"learning_rate": 6.643978130794943e-06,
"loss": 1.585586667060852,
"step": 2811
},
{
"epoch": 1.3630634997576345,
"grad_norm": 8.641767384631592,
"learning_rate": 6.641314562843306e-06,
"loss": 1.7685726881027222,
"step": 2812
},
{
"epoch": 1.3635482307319438,
"grad_norm": 16.82456367629184,
"learning_rate": 6.638650472758639e-06,
"loss": 1.8515329360961914,
"step": 2813
},
{
"epoch": 1.364032961706253,
"grad_norm": 8.644969106515655,
"learning_rate": 6.635985861388443e-06,
"loss": 2.149686098098755,
"step": 2814
},
{
"epoch": 1.3645176926805622,
"grad_norm": 13.180117659508475,
"learning_rate": 6.633320729580376e-06,
"loss": 1.5628929138183594,
"step": 2815
},
{
"epoch": 1.3650024236548717,
"grad_norm": 9.141019291784115,
"learning_rate": 6.6306550781822686e-06,
"loss": 1.3178354501724243,
"step": 2816
},
{
"epoch": 1.365487154629181,
"grad_norm": 13.2426691624968,
"learning_rate": 6.627988908042115e-06,
"loss": 1.128499150276184,
"step": 2817
},
{
"epoch": 1.3659718856034901,
"grad_norm": 13.15446854537879,
"learning_rate": 6.625322220008076e-06,
"loss": 1.5582585334777832,
"step": 2818
},
{
"epoch": 1.3664566165777994,
"grad_norm": 14.872929223298806,
"learning_rate": 6.6226550149284716e-06,
"loss": 2.3409371376037598,
"step": 2819
},
{
"epoch": 1.3669413475521086,
"grad_norm": 9.170079284494166,
"learning_rate": 6.6199872936517915e-06,
"loss": 1.1661415100097656,
"step": 2820
},
{
"epoch": 1.3674260785264178,
"grad_norm": 15.18810876880277,
"learning_rate": 6.6173190570266885e-06,
"loss": 1.590632438659668,
"step": 2821
},
{
"epoch": 1.367910809500727,
"grad_norm": 17.366653847659702,
"learning_rate": 6.614650305901981e-06,
"loss": 2.535059690475464,
"step": 2822
},
{
"epoch": 1.3683955404750363,
"grad_norm": 13.897291608722165,
"learning_rate": 6.611981041126643e-06,
"loss": 1.4143518209457397,
"step": 2823
},
{
"epoch": 1.3688802714493455,
"grad_norm": 12.036561599348321,
"learning_rate": 6.609311263549821e-06,
"loss": 1.6214914321899414,
"step": 2824
},
{
"epoch": 1.3693650024236548,
"grad_norm": 18.487163919982105,
"learning_rate": 6.606640974020824e-06,
"loss": 1.1934046745300293,
"step": 2825
},
{
"epoch": 1.369849733397964,
"grad_norm": 8.200084347363166,
"learning_rate": 6.603970173389117e-06,
"loss": 1.66689932346344,
"step": 2826
},
{
"epoch": 1.3703344643722735,
"grad_norm": 9.521363007545913,
"learning_rate": 6.601298862504333e-06,
"loss": 1.1493444442749023,
"step": 2827
},
{
"epoch": 1.3708191953465827,
"grad_norm": 11.669910586538231,
"learning_rate": 6.598627042216269e-06,
"loss": 1.2467503547668457,
"step": 2828
},
{
"epoch": 1.371303926320892,
"grad_norm": 6.892651719621045,
"learning_rate": 6.595954713374879e-06,
"loss": 1.6071979999542236,
"step": 2829
},
{
"epoch": 1.3717886572952012,
"grad_norm": 9.664469664456217,
"learning_rate": 6.593281876830281e-06,
"loss": 1.5084457397460938,
"step": 2830
},
{
"epoch": 1.3722733882695104,
"grad_norm": 24.674899801021738,
"learning_rate": 6.5906085334327555e-06,
"loss": 1.1103029251098633,
"step": 2831
},
{
"epoch": 1.3727581192438196,
"grad_norm": 8.179696283359826,
"learning_rate": 6.587934684032742e-06,
"loss": 1.9183093309402466,
"step": 2832
},
{
"epoch": 1.3732428502181289,
"grad_norm": 9.13908974311121,
"learning_rate": 6.5852603294808435e-06,
"loss": 2.0077593326568604,
"step": 2833
},
{
"epoch": 1.3737275811924383,
"grad_norm": 9.279076939472302,
"learning_rate": 6.582585470627821e-06,
"loss": 1.1147346496582031,
"step": 2834
},
{
"epoch": 1.3742123121667476,
"grad_norm": 11.48993714839789,
"learning_rate": 6.5799101083246e-06,
"loss": 1.3755685091018677,
"step": 2835
},
{
"epoch": 1.3746970431410568,
"grad_norm": 8.77978159115431,
"learning_rate": 6.577234243422261e-06,
"loss": 1.8253116607666016,
"step": 2836
},
{
"epoch": 1.375181774115366,
"grad_norm": 17.380397617320508,
"learning_rate": 6.57455787677205e-06,
"loss": 1.7347941398620605,
"step": 2837
},
{
"epoch": 1.3756665050896753,
"grad_norm": 19.323236227469785,
"learning_rate": 6.571881009225365e-06,
"loss": 2.1819424629211426,
"step": 2838
},
{
"epoch": 1.3761512360639845,
"grad_norm": 10.905561808069326,
"learning_rate": 6.56920364163377e-06,
"loss": 2.225593090057373,
"step": 2839
},
{
"epoch": 1.3766359670382937,
"grad_norm": 9.467702500535076,
"learning_rate": 6.566525774848988e-06,
"loss": 1.629666805267334,
"step": 2840
},
{
"epoch": 1.377120698012603,
"grad_norm": 11.359348882804808,
"learning_rate": 6.563847409722898e-06,
"loss": 1.0879054069519043,
"step": 2841
},
{
"epoch": 1.3776054289869122,
"grad_norm": 10.744758952687445,
"learning_rate": 6.561168547107536e-06,
"loss": 1.5671108961105347,
"step": 2842
},
{
"epoch": 1.3780901599612214,
"grad_norm": 13.919843826844433,
"learning_rate": 6.558489187855101e-06,
"loss": 2.2659265995025635,
"step": 2843
},
{
"epoch": 1.3785748909355307,
"grad_norm": 9.520760710312956,
"learning_rate": 6.555809332817946e-06,
"loss": 1.1429851055145264,
"step": 2844
},
{
"epoch": 1.37905962190984,
"grad_norm": 10.68860927784845,
"learning_rate": 6.553128982848584e-06,
"loss": 1.7208127975463867,
"step": 2845
},
{
"epoch": 1.3795443528841493,
"grad_norm": 10.405380831586347,
"learning_rate": 6.550448138799683e-06,
"loss": 1.8715708255767822,
"step": 2846
},
{
"epoch": 1.3800290838584586,
"grad_norm": 7.583906874117453,
"learning_rate": 6.547766801524073e-06,
"loss": 1.8097851276397705,
"step": 2847
},
{
"epoch": 1.3805138148327678,
"grad_norm": 8.42071346897902,
"learning_rate": 6.545084971874738e-06,
"loss": 1.8845399618148804,
"step": 2848
},
{
"epoch": 1.380998545807077,
"grad_norm": 8.992257399426485,
"learning_rate": 6.542402650704816e-06,
"loss": 1.0018585920333862,
"step": 2849
},
{
"epoch": 1.3814832767813863,
"grad_norm": 10.78470337286181,
"learning_rate": 6.539719838867604e-06,
"loss": 1.41841721534729,
"step": 2850
},
{
"epoch": 1.3819680077556957,
"grad_norm": 13.342224098445653,
"learning_rate": 6.537036537216554e-06,
"loss": 0.8690760135650635,
"step": 2851
},
{
"epoch": 1.382452738730005,
"grad_norm": 8.307902120193454,
"learning_rate": 6.5343527466052795e-06,
"loss": 1.4277197122573853,
"step": 2852
},
{
"epoch": 1.3829374697043142,
"grad_norm": 19.15905102087311,
"learning_rate": 6.531668467887539e-06,
"loss": 2.1669118404388428,
"step": 2853
},
{
"epoch": 1.3834222006786234,
"grad_norm": 14.221036830264763,
"learning_rate": 6.528983701917255e-06,
"loss": 0.886838436126709,
"step": 2854
},
{
"epoch": 1.3839069316529327,
"grad_norm": 13.025898697764738,
"learning_rate": 6.526298449548502e-06,
"loss": 1.2599549293518066,
"step": 2855
},
{
"epoch": 1.384391662627242,
"grad_norm": 10.066231999220156,
"learning_rate": 6.52361271163551e-06,
"loss": 1.7888251543045044,
"step": 2856
},
{
"epoch": 1.3848763936015511,
"grad_norm": 9.522741870273377,
"learning_rate": 6.5209264890326594e-06,
"loss": 1.4750444889068604,
"step": 2857
},
{
"epoch": 1.3853611245758604,
"grad_norm": 11.363730884381535,
"learning_rate": 6.518239782594491e-06,
"loss": 2.380269765853882,
"step": 2858
},
{
"epoch": 1.3858458555501696,
"grad_norm": 13.541427920078904,
"learning_rate": 6.515552593175696e-06,
"loss": 1.1895170211791992,
"step": 2859
},
{
"epoch": 1.3863305865244788,
"grad_norm": 11.123004312016867,
"learning_rate": 6.512864921631121e-06,
"loss": 1.212777018547058,
"step": 2860
},
{
"epoch": 1.386815317498788,
"grad_norm": 8.033893425928943,
"learning_rate": 6.5101767688157626e-06,
"loss": 1.516396164894104,
"step": 2861
},
{
"epoch": 1.3873000484730975,
"grad_norm": 9.807286141366175,
"learning_rate": 6.507488135584773e-06,
"loss": 2.1933400630950928,
"step": 2862
},
{
"epoch": 1.3877847794474067,
"grad_norm": 11.97069671204751,
"learning_rate": 6.504799022793461e-06,
"loss": 1.5838534832000732,
"step": 2863
},
{
"epoch": 1.388269510421716,
"grad_norm": 17.49239381997767,
"learning_rate": 6.502109431297279e-06,
"loss": 1.8320446014404297,
"step": 2864
},
{
"epoch": 1.3887542413960252,
"grad_norm": 12.313003776296274,
"learning_rate": 6.499419361951837e-06,
"loss": 1.2666571140289307,
"step": 2865
},
{
"epoch": 1.3892389723703344,
"grad_norm": 10.27292169559262,
"learning_rate": 6.4967288156129e-06,
"loss": 1.7962591648101807,
"step": 2866
},
{
"epoch": 1.3897237033446437,
"grad_norm": 16.623948137398507,
"learning_rate": 6.494037793136378e-06,
"loss": 2.5771145820617676,
"step": 2867
},
{
"epoch": 1.390208434318953,
"grad_norm": 8.94760432344375,
"learning_rate": 6.49134629537834e-06,
"loss": 1.2751082181930542,
"step": 2868
},
{
"epoch": 1.3906931652932624,
"grad_norm": 25.68239897572417,
"learning_rate": 6.488654323194996e-06,
"loss": 1.3342503309249878,
"step": 2869
},
{
"epoch": 1.3911778962675716,
"grad_norm": 13.790437844152644,
"learning_rate": 6.4859618774427195e-06,
"loss": 1.078425407409668,
"step": 2870
},
{
"epoch": 1.3916626272418808,
"grad_norm": 14.337919976908601,
"learning_rate": 6.483268958978024e-06,
"loss": 1.0852110385894775,
"step": 2871
},
{
"epoch": 1.39214735821619,
"grad_norm": 12.667811848542593,
"learning_rate": 6.480575568657578e-06,
"loss": 1.4710466861724854,
"step": 2872
},
{
"epoch": 1.3926320891904993,
"grad_norm": 11.70155190396838,
"learning_rate": 6.477881707338201e-06,
"loss": 1.186589241027832,
"step": 2873
},
{
"epoch": 1.3931168201648085,
"grad_norm": 10.814926811971796,
"learning_rate": 6.475187375876859e-06,
"loss": 1.305647850036621,
"step": 2874
},
{
"epoch": 1.3936015511391178,
"grad_norm": 8.89668804740708,
"learning_rate": 6.472492575130671e-06,
"loss": 1.6714637279510498,
"step": 2875
},
{
"epoch": 1.394086282113427,
"grad_norm": 9.736380190741587,
"learning_rate": 6.469797305956902e-06,
"loss": 1.473501205444336,
"step": 2876
},
{
"epoch": 1.3945710130877362,
"grad_norm": 23.779174117824198,
"learning_rate": 6.4671015692129716e-06,
"loss": 1.3280760049819946,
"step": 2877
},
{
"epoch": 1.3950557440620455,
"grad_norm": 23.789960769215796,
"learning_rate": 6.464405365756441e-06,
"loss": 1.7667365074157715,
"step": 2878
},
{
"epoch": 1.3955404750363547,
"grad_norm": 10.206723272343142,
"learning_rate": 6.461708696445024e-06,
"loss": 1.5835596323013306,
"step": 2879
},
{
"epoch": 1.3960252060106642,
"grad_norm": 7.529576861111066,
"learning_rate": 6.459011562136582e-06,
"loss": 1.3056460618972778,
"step": 2880
},
{
"epoch": 1.3965099369849734,
"grad_norm": 8.517740283962965,
"learning_rate": 6.456313963689123e-06,
"loss": 1.983647346496582,
"step": 2881
},
{
"epoch": 1.3969946679592826,
"grad_norm": 10.90682203265335,
"learning_rate": 6.453615901960808e-06,
"loss": 1.29656183719635,
"step": 2882
},
{
"epoch": 1.3974793989335919,
"grad_norm": 12.483756357712394,
"learning_rate": 6.450917377809936e-06,
"loss": 1.2443307638168335,
"step": 2883
},
{
"epoch": 1.397964129907901,
"grad_norm": 10.249609294441193,
"learning_rate": 6.44821839209496e-06,
"loss": 0.8837788701057434,
"step": 2884
},
{
"epoch": 1.3984488608822103,
"grad_norm": 9.118310390310269,
"learning_rate": 6.44551894567448e-06,
"loss": 1.7439568042755127,
"step": 2885
},
{
"epoch": 1.3989335918565196,
"grad_norm": 16.602513417002555,
"learning_rate": 6.442819039407238e-06,
"loss": 1.4477957487106323,
"step": 2886
},
{
"epoch": 1.399418322830829,
"grad_norm": 10.875917901999536,
"learning_rate": 6.440118674152127e-06,
"loss": 1.8627246618270874,
"step": 2887
},
{
"epoch": 1.3999030538051382,
"grad_norm": 10.805398794675542,
"learning_rate": 6.4374178507681815e-06,
"loss": 1.4263126850128174,
"step": 2888
},
{
"epoch": 1.4003877847794475,
"grad_norm": 7.523633091519531,
"learning_rate": 6.434716570114587e-06,
"loss": 1.7709013223648071,
"step": 2889
},
{
"epoch": 1.4008725157537567,
"grad_norm": 20.95223442862245,
"learning_rate": 6.432014833050671e-06,
"loss": 1.4389630556106567,
"step": 2890
},
{
"epoch": 1.401357246728066,
"grad_norm": 11.397385825407401,
"learning_rate": 6.429312640435905e-06,
"loss": 1.5596511363983154,
"step": 2891
},
{
"epoch": 1.4018419777023752,
"grad_norm": 7.747480276769802,
"learning_rate": 6.42660999312991e-06,
"loss": 1.5562864542007446,
"step": 2892
},
{
"epoch": 1.4023267086766844,
"grad_norm": 9.777616815013978,
"learning_rate": 6.423906891992447e-06,
"loss": 1.1822433471679688,
"step": 2893
},
{
"epoch": 1.4028114396509936,
"grad_norm": 15.907405050402064,
"learning_rate": 6.421203337883423e-06,
"loss": 1.4504576921463013,
"step": 2894
},
{
"epoch": 1.4032961706253029,
"grad_norm": 14.08264598671325,
"learning_rate": 6.41849933166289e-06,
"loss": 1.9695332050323486,
"step": 2895
},
{
"epoch": 1.403780901599612,
"grad_norm": 13.655795966591334,
"learning_rate": 6.415794874191046e-06,
"loss": 2.054135322570801,
"step": 2896
},
{
"epoch": 1.4042656325739213,
"grad_norm": 14.1102284225698,
"learning_rate": 6.413089966328227e-06,
"loss": 1.7003706693649292,
"step": 2897
},
{
"epoch": 1.4047503635482308,
"grad_norm": 9.76105552853384,
"learning_rate": 6.410384608934914e-06,
"loss": 1.4214481115341187,
"step": 2898
},
{
"epoch": 1.40523509452254,
"grad_norm": 12.744197021838936,
"learning_rate": 6.407678802871734e-06,
"loss": 1.7024595737457275,
"step": 2899
},
{
"epoch": 1.4057198254968493,
"grad_norm": 6.601178244941216,
"learning_rate": 6.404972548999453e-06,
"loss": 1.9962822198867798,
"step": 2900
},
{
"epoch": 1.4062045564711585,
"grad_norm": 12.607824809041336,
"learning_rate": 6.402265848178984e-06,
"loss": 1.6238032579421997,
"step": 2901
},
{
"epoch": 1.4066892874454677,
"grad_norm": 21.932105024702132,
"learning_rate": 6.399558701271376e-06,
"loss": 1.4691059589385986,
"step": 2902
},
{
"epoch": 1.407174018419777,
"grad_norm": 12.52428395282099,
"learning_rate": 6.396851109137827e-06,
"loss": 2.3425590991973877,
"step": 2903
},
{
"epoch": 1.4076587493940864,
"grad_norm": 13.05382503968199,
"learning_rate": 6.3941430726396715e-06,
"loss": 1.1556280851364136,
"step": 2904
},
{
"epoch": 1.4081434803683957,
"grad_norm": 14.660808117837506,
"learning_rate": 6.3914345926383855e-06,
"loss": 1.8003339767456055,
"step": 2905
},
{
"epoch": 1.4086282113427049,
"grad_norm": 14.270861922119877,
"learning_rate": 6.38872566999559e-06,
"loss": 1.3378307819366455,
"step": 2906
},
{
"epoch": 1.4091129423170141,
"grad_norm": 9.910291016092593,
"learning_rate": 6.38601630557304e-06,
"loss": 2.3690683841705322,
"step": 2907
},
{
"epoch": 1.4095976732913233,
"grad_norm": 7.71034838223863,
"learning_rate": 6.383306500232641e-06,
"loss": 1.707003116607666,
"step": 2908
},
{
"epoch": 1.4100824042656326,
"grad_norm": 12.516432851066837,
"learning_rate": 6.380596254836428e-06,
"loss": 2.121159076690674,
"step": 2909
},
{
"epoch": 1.4105671352399418,
"grad_norm": 8.237345234773297,
"learning_rate": 6.3778855702465835e-06,
"loss": 1.1688681840896606,
"step": 2910
},
{
"epoch": 1.411051866214251,
"grad_norm": 12.141031756993943,
"learning_rate": 6.375174447325426e-06,
"loss": 1.8245136737823486,
"step": 2911
},
{
"epoch": 1.4115365971885603,
"grad_norm": 7.347670854038371,
"learning_rate": 6.372462886935418e-06,
"loss": 1.9845755100250244,
"step": 2912
},
{
"epoch": 1.4120213281628695,
"grad_norm": 27.0651008977007,
"learning_rate": 6.369750889939153e-06,
"loss": 1.8287997245788574,
"step": 2913
},
{
"epoch": 1.4125060591371787,
"grad_norm": 10.97041561547899,
"learning_rate": 6.3670384571993715e-06,
"loss": 1.520342230796814,
"step": 2914
},
{
"epoch": 1.4129907901114882,
"grad_norm": 7.247372130496776,
"learning_rate": 6.364325589578948e-06,
"loss": 1.1564778089523315,
"step": 2915
},
{
"epoch": 1.4134755210857974,
"grad_norm": 12.227345535527752,
"learning_rate": 6.361612287940898e-06,
"loss": 0.8854108452796936,
"step": 2916
},
{
"epoch": 1.4139602520601067,
"grad_norm": 9.8535509673892,
"learning_rate": 6.358898553148373e-06,
"loss": 1.51835298538208,
"step": 2917
},
{
"epoch": 1.414444983034416,
"grad_norm": 16.017929855446166,
"learning_rate": 6.356184386064661e-06,
"loss": 1.3843207359313965,
"step": 2918
},
{
"epoch": 1.4149297140087251,
"grad_norm": 9.02826761725242,
"learning_rate": 6.353469787553195e-06,
"loss": 1.418726921081543,
"step": 2919
},
{
"epoch": 1.4154144449830344,
"grad_norm": 27.814955559208,
"learning_rate": 6.350754758477534e-06,
"loss": 2.1142475605010986,
"step": 2920
},
{
"epoch": 1.4158991759573436,
"grad_norm": 11.250420302228637,
"learning_rate": 6.348039299701382e-06,
"loss": 1.5022425651550293,
"step": 2921
},
{
"epoch": 1.416383906931653,
"grad_norm": 14.385016497024882,
"learning_rate": 6.3453234120885775e-06,
"loss": 1.745765209197998,
"step": 2922
},
{
"epoch": 1.4168686379059623,
"grad_norm": 13.02102493384356,
"learning_rate": 6.342607096503097e-06,
"loss": 1.250004529953003,
"step": 2923
},
{
"epoch": 1.4173533688802715,
"grad_norm": 7.913297359824305,
"learning_rate": 6.33989035380905e-06,
"loss": 1.8517099618911743,
"step": 2924
},
{
"epoch": 1.4178380998545808,
"grad_norm": 9.497029316943223,
"learning_rate": 6.337173184870683e-06,
"loss": 1.186525583267212,
"step": 2925
},
{
"epoch": 1.41832283082889,
"grad_norm": 9.218854868430816,
"learning_rate": 6.33445559055238e-06,
"loss": 1.7906490564346313,
"step": 2926
},
{
"epoch": 1.4188075618031992,
"grad_norm": 11.729822666165518,
"learning_rate": 6.331737571718659e-06,
"loss": 1.5125144720077515,
"step": 2927
},
{
"epoch": 1.4192922927775085,
"grad_norm": 8.380080029876845,
"learning_rate": 6.329019129234171e-06,
"loss": 0.9002367854118347,
"step": 2928
},
{
"epoch": 1.4197770237518177,
"grad_norm": 11.026266210263996,
"learning_rate": 6.326300263963704e-06,
"loss": 1.008684515953064,
"step": 2929
},
{
"epoch": 1.420261754726127,
"grad_norm": 10.580182861679669,
"learning_rate": 6.323580976772181e-06,
"loss": 2.9325032234191895,
"step": 2930
},
{
"epoch": 1.4207464857004362,
"grad_norm": 11.332850912863945,
"learning_rate": 6.3208612685246586e-06,
"loss": 0.966004490852356,
"step": 2931
},
{
"epoch": 1.4212312166747454,
"grad_norm": 10.811084050187665,
"learning_rate": 6.3181411400863265e-06,
"loss": 1.4632458686828613,
"step": 2932
},
{
"epoch": 1.4217159476490548,
"grad_norm": 12.7385315034181,
"learning_rate": 6.31542059232251e-06,
"loss": 1.6788486242294312,
"step": 2933
},
{
"epoch": 1.422200678623364,
"grad_norm": 10.331170701136372,
"learning_rate": 6.312699626098664e-06,
"loss": 1.5578458309173584,
"step": 2934
},
{
"epoch": 1.4226854095976733,
"grad_norm": 8.253556545273026,
"learning_rate": 6.309978242280382e-06,
"loss": 1.8070613145828247,
"step": 2935
},
{
"epoch": 1.4231701405719825,
"grad_norm": 11.428725716030105,
"learning_rate": 6.3072564417333844e-06,
"loss": 1.5051987171173096,
"step": 2936
},
{
"epoch": 1.4236548715462918,
"grad_norm": 20.30473480658528,
"learning_rate": 6.304534225323529e-06,
"loss": 1.395492672920227,
"step": 2937
},
{
"epoch": 1.424139602520601,
"grad_norm": 7.2465433443248894,
"learning_rate": 6.301811593916804e-06,
"loss": 2.1837189197540283,
"step": 2938
},
{
"epoch": 1.4246243334949102,
"grad_norm": 11.02997671213899,
"learning_rate": 6.299088548379329e-06,
"loss": 1.1022478342056274,
"step": 2939
},
{
"epoch": 1.4251090644692197,
"grad_norm": 17.123876137368462,
"learning_rate": 6.2963650895773566e-06,
"loss": 1.6746876239776611,
"step": 2940
},
{
"epoch": 1.425593795443529,
"grad_norm": 8.102032291486497,
"learning_rate": 6.2936412183772714e-06,
"loss": 1.1522527933120728,
"step": 2941
},
{
"epoch": 1.4260785264178382,
"grad_norm": 16.68156983707512,
"learning_rate": 6.290916935645584e-06,
"loss": 1.6395665407180786,
"step": 2942
},
{
"epoch": 1.4265632573921474,
"grad_norm": 12.059808466400712,
"learning_rate": 6.288192242248945e-06,
"loss": 1.4017729759216309,
"step": 2943
},
{
"epoch": 1.4270479883664566,
"grad_norm": 14.228040771468017,
"learning_rate": 6.285467139054126e-06,
"loss": 1.2529339790344238,
"step": 2944
},
{
"epoch": 1.4275327193407659,
"grad_norm": 10.118506972971348,
"learning_rate": 6.282741626928036e-06,
"loss": 1.463725209236145,
"step": 2945
},
{
"epoch": 1.428017450315075,
"grad_norm": 10.193451441089486,
"learning_rate": 6.280015706737713e-06,
"loss": 1.3756728172302246,
"step": 2946
},
{
"epoch": 1.4285021812893843,
"grad_norm": 9.109633528431706,
"learning_rate": 6.277289379350321e-06,
"loss": 1.471501111984253,
"step": 2947
},
{
"epoch": 1.4289869122636936,
"grad_norm": 12.28917547828161,
"learning_rate": 6.274562645633158e-06,
"loss": 1.4014277458190918,
"step": 2948
},
{
"epoch": 1.4294716432380028,
"grad_norm": 9.60734738808628,
"learning_rate": 6.271835506453646e-06,
"loss": 1.1480872631072998,
"step": 2949
},
{
"epoch": 1.429956374212312,
"grad_norm": 11.17284359708391,
"learning_rate": 6.269107962679344e-06,
"loss": 1.3244919776916504,
"step": 2950
},
{
"epoch": 1.4304411051866215,
"grad_norm": 9.23736469324054,
"learning_rate": 6.266380015177929e-06,
"loss": 1.417168378829956,
"step": 2951
},
{
"epoch": 1.4309258361609307,
"grad_norm": 23.19157951878772,
"learning_rate": 6.263651664817219e-06,
"loss": 1.1960029602050781,
"step": 2952
},
{
"epoch": 1.43141056713524,
"grad_norm": 22.219708817725415,
"learning_rate": 6.26092291246515e-06,
"loss": 3.6870288848876953,
"step": 2953
},
{
"epoch": 1.4318952981095492,
"grad_norm": 8.239522541529047,
"learning_rate": 6.258193758989788e-06,
"loss": 1.403731107711792,
"step": 2954
},
{
"epoch": 1.4323800290838584,
"grad_norm": 16.540297642856473,
"learning_rate": 6.255464205259332e-06,
"loss": 1.2761162519454956,
"step": 2955
},
{
"epoch": 1.4328647600581677,
"grad_norm": 13.99049494338894,
"learning_rate": 6.2527342521420995e-06,
"loss": 1.8413549661636353,
"step": 2956
},
{
"epoch": 1.433349491032477,
"grad_norm": 10.784889746620193,
"learning_rate": 6.250003900506546e-06,
"loss": 1.6162748336791992,
"step": 2957
},
{
"epoch": 1.4338342220067863,
"grad_norm": 10.188105722283115,
"learning_rate": 6.247273151221242e-06,
"loss": 1.7474815845489502,
"step": 2958
},
{
"epoch": 1.4343189529810956,
"grad_norm": 9.679451276925105,
"learning_rate": 6.2445420051548934e-06,
"loss": 2.0764636993408203,
"step": 2959
},
{
"epoch": 1.4348036839554048,
"grad_norm": 15.247466186801024,
"learning_rate": 6.241810463176329e-06,
"loss": 1.2694478034973145,
"step": 2960
},
{
"epoch": 1.435288414929714,
"grad_norm": 9.81903128660868,
"learning_rate": 6.239078526154502e-06,
"loss": 1.1754562854766846,
"step": 2961
},
{
"epoch": 1.4357731459040233,
"grad_norm": 6.175196307163278,
"learning_rate": 6.2363461949584936e-06,
"loss": 1.1154698133468628,
"step": 2962
},
{
"epoch": 1.4362578768783325,
"grad_norm": 9.559387153198266,
"learning_rate": 6.23361347045751e-06,
"loss": 1.4864766597747803,
"step": 2963
},
{
"epoch": 1.4367426078526417,
"grad_norm": 14.129283485252376,
"learning_rate": 6.230880353520883e-06,
"loss": 2.066925048828125,
"step": 2964
},
{
"epoch": 1.437227338826951,
"grad_norm": 11.780779927634667,
"learning_rate": 6.228146845018068e-06,
"loss": 1.9589524269104004,
"step": 2965
},
{
"epoch": 1.4377120698012602,
"grad_norm": 10.92699082254899,
"learning_rate": 6.225412945818645e-06,
"loss": 1.9730360507965088,
"step": 2966
},
{
"epoch": 1.4381968007755694,
"grad_norm": 11.75735731252574,
"learning_rate": 6.222678656792318e-06,
"loss": 1.5802345275878906,
"step": 2967
},
{
"epoch": 1.438681531749879,
"grad_norm": 10.597764238296765,
"learning_rate": 6.219943978808919e-06,
"loss": 1.9461740255355835,
"step": 2968
},
{
"epoch": 1.4391662627241881,
"grad_norm": 10.280377888996751,
"learning_rate": 6.217208912738397e-06,
"loss": 1.307907223701477,
"step": 2969
},
{
"epoch": 1.4396509936984974,
"grad_norm": 16.533580795613293,
"learning_rate": 6.214473459450828e-06,
"loss": 1.0101754665374756,
"step": 2970
},
{
"epoch": 1.4401357246728066,
"grad_norm": 20.232142789433123,
"learning_rate": 6.211737619816414e-06,
"loss": 1.888695240020752,
"step": 2971
},
{
"epoch": 1.4406204556471158,
"grad_norm": 17.307903873990444,
"learning_rate": 6.209001394705475e-06,
"loss": 1.2301647663116455,
"step": 2972
},
{
"epoch": 1.441105186621425,
"grad_norm": 8.138176926998549,
"learning_rate": 6.2062647849884565e-06,
"loss": 1.3752868175506592,
"step": 2973
},
{
"epoch": 1.4415899175957343,
"grad_norm": 12.602227833704006,
"learning_rate": 6.203527791535923e-06,
"loss": 1.4023637771606445,
"step": 2974
},
{
"epoch": 1.4420746485700437,
"grad_norm": 7.462852627491171,
"learning_rate": 6.200790415218568e-06,
"loss": 1.681844711303711,
"step": 2975
},
{
"epoch": 1.442559379544353,
"grad_norm": 11.259477539038157,
"learning_rate": 6.1980526569072004e-06,
"loss": 1.7577931880950928,
"step": 2976
},
{
"epoch": 1.4430441105186622,
"grad_norm": 8.283781682194475,
"learning_rate": 6.19531451747275e-06,
"loss": 1.533901572227478,
"step": 2977
},
{
"epoch": 1.4435288414929714,
"grad_norm": 6.618256789208207,
"learning_rate": 6.192575997786274e-06,
"loss": 1.6538772583007812,
"step": 2978
},
{
"epoch": 1.4440135724672807,
"grad_norm": 8.460952548372688,
"learning_rate": 6.189837098718946e-06,
"loss": 2.529167652130127,
"step": 2979
},
{
"epoch": 1.44449830344159,
"grad_norm": 15.171657575725,
"learning_rate": 6.187097821142061e-06,
"loss": 1.4904506206512451,
"step": 2980
},
{
"epoch": 1.4449830344158991,
"grad_norm": 12.381271702506691,
"learning_rate": 6.184358165927032e-06,
"loss": 0.6503773927688599,
"step": 2981
},
{
"epoch": 1.4454677653902084,
"grad_norm": 15.172568032928417,
"learning_rate": 6.1816181339454e-06,
"loss": 2.1535258293151855,
"step": 2982
},
{
"epoch": 1.4459524963645176,
"grad_norm": 8.582026592773726,
"learning_rate": 6.178877726068819e-06,
"loss": 1.1872332096099854,
"step": 2983
},
{
"epoch": 1.4464372273388268,
"grad_norm": 11.52771559933192,
"learning_rate": 6.176136943169062e-06,
"loss": 2.221867561340332,
"step": 2984
},
{
"epoch": 1.446921958313136,
"grad_norm": 10.535602127542292,
"learning_rate": 6.173395786118026e-06,
"loss": 0.8907658457756042,
"step": 2985
},
{
"epoch": 1.4474066892874455,
"grad_norm": 15.685253194980035,
"learning_rate": 6.170654255787723e-06,
"loss": 1.7805465459823608,
"step": 2986
},
{
"epoch": 1.4478914202617548,
"grad_norm": 13.24152543485011,
"learning_rate": 6.1679123530502884e-06,
"loss": 2.039827823638916,
"step": 2987
},
{
"epoch": 1.448376151236064,
"grad_norm": 14.319441868700356,
"learning_rate": 6.1651700787779675e-06,
"loss": 1.6898863315582275,
"step": 2988
},
{
"epoch": 1.4488608822103732,
"grad_norm": 8.895088436861908,
"learning_rate": 6.1624274338431344e-06,
"loss": 1.1851747035980225,
"step": 2989
},
{
"epoch": 1.4493456131846825,
"grad_norm": 10.557696442178816,
"learning_rate": 6.159684419118274e-06,
"loss": 2.2060446739196777,
"step": 2990
},
{
"epoch": 1.4498303441589917,
"grad_norm": 18.291566584611413,
"learning_rate": 6.15694103547599e-06,
"loss": 1.3145079612731934,
"step": 2991
},
{
"epoch": 1.450315075133301,
"grad_norm": 10.342993815786025,
"learning_rate": 6.154197283789006e-06,
"loss": 1.664581298828125,
"step": 2992
},
{
"epoch": 1.4507998061076104,
"grad_norm": 12.042767003801762,
"learning_rate": 6.15145316493016e-06,
"loss": 1.7026350498199463,
"step": 2993
},
{
"epoch": 1.4512845370819196,
"grad_norm": 7.669697901254764,
"learning_rate": 6.148708679772408e-06,
"loss": 1.6844992637634277,
"step": 2994
},
{
"epoch": 1.4517692680562289,
"grad_norm": 16.267895762122397,
"learning_rate": 6.145963829188823e-06,
"loss": 1.6681444644927979,
"step": 2995
},
{
"epoch": 1.452253999030538,
"grad_norm": 10.74867931887039,
"learning_rate": 6.143218614052594e-06,
"loss": 1.9506993293762207,
"step": 2996
},
{
"epoch": 1.4527387300048473,
"grad_norm": 8.920853497787409,
"learning_rate": 6.140473035237025e-06,
"loss": 1.659022569656372,
"step": 2997
},
{
"epoch": 1.4532234609791566,
"grad_norm": 7.617955062952668,
"learning_rate": 6.137727093615534e-06,
"loss": 1.9831223487854004,
"step": 2998
},
{
"epoch": 1.4537081919534658,
"grad_norm": 10.47805911298986,
"learning_rate": 6.1349807900616595e-06,
"loss": 1.75303053855896,
"step": 2999
},
{
"epoch": 1.454192922927775,
"grad_norm": 11.493521225144294,
"learning_rate": 6.1322341254490515e-06,
"loss": 1.8476343154907227,
"step": 3000
},
{
"epoch": 1.4546776539020843,
"grad_norm": 16.04922988784884,
"learning_rate": 6.129487100651476e-06,
"loss": 1.2560758590698242,
"step": 3001
},
{
"epoch": 1.4551623848763935,
"grad_norm": 11.376415186120573,
"learning_rate": 6.126739716542812e-06,
"loss": 1.4548351764678955,
"step": 3002
},
{
"epoch": 1.4556471158507027,
"grad_norm": 21.637392896393056,
"learning_rate": 6.123991973997056e-06,
"loss": 1.0915518999099731,
"step": 3003
},
{
"epoch": 1.4561318468250122,
"grad_norm": 11.04389640165621,
"learning_rate": 6.121243873888314e-06,
"loss": 1.433899164199829,
"step": 3004
},
{
"epoch": 1.4566165777993214,
"grad_norm": 7.92094440849735,
"learning_rate": 6.118495417090809e-06,
"loss": 1.8113715648651123,
"step": 3005
},
{
"epoch": 1.4571013087736306,
"grad_norm": 8.604526388420283,
"learning_rate": 6.11574660447888e-06,
"loss": 1.3573418855667114,
"step": 3006
},
{
"epoch": 1.4575860397479399,
"grad_norm": 10.61326804994277,
"learning_rate": 6.112997436926972e-06,
"loss": 1.3302245140075684,
"step": 3007
},
{
"epoch": 1.458070770722249,
"grad_norm": 14.847044082989415,
"learning_rate": 6.11024791530965e-06,
"loss": 1.2435803413391113,
"step": 3008
},
{
"epoch": 1.4585555016965583,
"grad_norm": 8.141754568789645,
"learning_rate": 6.107498040501585e-06,
"loss": 1.1631006002426147,
"step": 3009
},
{
"epoch": 1.4590402326708678,
"grad_norm": 8.19864215202274,
"learning_rate": 6.104747813377567e-06,
"loss": 0.5529677867889404,
"step": 3010
},
{
"epoch": 1.459524963645177,
"grad_norm": 9.631736498433217,
"learning_rate": 6.1019972348124944e-06,
"loss": 1.6275358200073242,
"step": 3011
},
{
"epoch": 1.4600096946194863,
"grad_norm": 18.115114689643825,
"learning_rate": 6.099246305681376e-06,
"loss": 1.3723927736282349,
"step": 3012
},
{
"epoch": 1.4604944255937955,
"grad_norm": 9.884711074515526,
"learning_rate": 6.096495026859337e-06,
"loss": 1.2343268394470215,
"step": 3013
},
{
"epoch": 1.4609791565681047,
"grad_norm": 7.662665833096144,
"learning_rate": 6.093743399221608e-06,
"loss": 1.0674901008605957,
"step": 3014
},
{
"epoch": 1.461463887542414,
"grad_norm": 8.609103060762049,
"learning_rate": 6.0909914236435355e-06,
"loss": 1.4156947135925293,
"step": 3015
},
{
"epoch": 1.4619486185167232,
"grad_norm": 10.36413455220568,
"learning_rate": 6.088239101000573e-06,
"loss": 1.5057365894317627,
"step": 3016
},
{
"epoch": 1.4624333494910324,
"grad_norm": 12.374067134327285,
"learning_rate": 6.0854864321682905e-06,
"loss": 1.6184793710708618,
"step": 3017
},
{
"epoch": 1.4629180804653417,
"grad_norm": 10.148293118793429,
"learning_rate": 6.082733418022358e-06,
"loss": 1.4702119827270508,
"step": 3018
},
{
"epoch": 1.463402811439651,
"grad_norm": 12.754735084738089,
"learning_rate": 6.079980059438562e-06,
"loss": 1.7384393215179443,
"step": 3019
},
{
"epoch": 1.4638875424139601,
"grad_norm": 11.889994693060936,
"learning_rate": 6.077226357292802e-06,
"loss": 0.9817169904708862,
"step": 3020
},
{
"epoch": 1.4643722733882696,
"grad_norm": 13.704487177447755,
"learning_rate": 6.074472312461079e-06,
"loss": 1.1807661056518555,
"step": 3021
},
{
"epoch": 1.4648570043625788,
"grad_norm": 10.145009188845068,
"learning_rate": 6.071717925819507e-06,
"loss": 1.4280580282211304,
"step": 3022
},
{
"epoch": 1.465341735336888,
"grad_norm": 13.467023006561023,
"learning_rate": 6.068963198244307e-06,
"loss": 1.9910547733306885,
"step": 3023
},
{
"epoch": 1.4658264663111973,
"grad_norm": 9.788985232462387,
"learning_rate": 6.066208130611813e-06,
"loss": 1.4049584865570068,
"step": 3024
},
{
"epoch": 1.4663111972855065,
"grad_norm": 12.48056848658199,
"learning_rate": 6.063452723798461e-06,
"loss": 1.1312198638916016,
"step": 3025
},
{
"epoch": 1.4667959282598158,
"grad_norm": 48.8122893406774,
"learning_rate": 6.060696978680797e-06,
"loss": 1.5637060403823853,
"step": 3026
},
{
"epoch": 1.467280659234125,
"grad_norm": 8.65236584068153,
"learning_rate": 6.057940896135478e-06,
"loss": 2.100719451904297,
"step": 3027
},
{
"epoch": 1.4677653902084344,
"grad_norm": 13.43011664216835,
"learning_rate": 6.055184477039265e-06,
"loss": 1.3645853996276855,
"step": 3028
},
{
"epoch": 1.4682501211827437,
"grad_norm": 10.55613739720471,
"learning_rate": 6.052427722269024e-06,
"loss": 1.3609590530395508,
"step": 3029
},
{
"epoch": 1.468734852157053,
"grad_norm": 7.918326577243454,
"learning_rate": 6.0496706327017355e-06,
"loss": 1.7090437412261963,
"step": 3030
},
{
"epoch": 1.4692195831313621,
"grad_norm": 14.088030944450658,
"learning_rate": 6.046913209214476e-06,
"loss": 2.459972858428955,
"step": 3031
},
{
"epoch": 1.4697043141056714,
"grad_norm": 12.206440839936725,
"learning_rate": 6.044155452684439e-06,
"loss": 1.7158188819885254,
"step": 3032
},
{
"epoch": 1.4701890450799806,
"grad_norm": 10.27661187457141,
"learning_rate": 6.0413973639889145e-06,
"loss": 1.3632056713104248,
"step": 3033
},
{
"epoch": 1.4706737760542898,
"grad_norm": 7.333002189793791,
"learning_rate": 6.038638944005305e-06,
"loss": 1.3683981895446777,
"step": 3034
},
{
"epoch": 1.471158507028599,
"grad_norm": 13.463520386866012,
"learning_rate": 6.0358801936111145e-06,
"loss": 1.438292145729065,
"step": 3035
},
{
"epoch": 1.4716432380029083,
"grad_norm": 11.41176424718711,
"learning_rate": 6.033121113683955e-06,
"loss": 1.9122142791748047,
"step": 3036
},
{
"epoch": 1.4721279689772175,
"grad_norm": 14.773578617478757,
"learning_rate": 6.030361705101539e-06,
"loss": 2.07595157623291,
"step": 3037
},
{
"epoch": 1.4726126999515268,
"grad_norm": 14.07125736264263,
"learning_rate": 6.027601968741688e-06,
"loss": 1.5198700428009033,
"step": 3038
},
{
"epoch": 1.4730974309258362,
"grad_norm": 23.443268020650688,
"learning_rate": 6.024841905482327e-06,
"loss": 1.2152661085128784,
"step": 3039
},
{
"epoch": 1.4735821619001455,
"grad_norm": 10.074895559010645,
"learning_rate": 6.022081516201483e-06,
"loss": 1.490390419960022,
"step": 3040
},
{
"epoch": 1.4740668928744547,
"grad_norm": 13.375166189510598,
"learning_rate": 6.019320801777286e-06,
"loss": 1.1661713123321533,
"step": 3041
},
{
"epoch": 1.474551623848764,
"grad_norm": 10.044669757386883,
"learning_rate": 6.016559763087973e-06,
"loss": 2.0665180683135986,
"step": 3042
},
{
"epoch": 1.4750363548230732,
"grad_norm": 8.593374308509622,
"learning_rate": 6.013798401011883e-06,
"loss": 1.1033211946487427,
"step": 3043
},
{
"epoch": 1.4755210857973824,
"grad_norm": 7.709118849696485,
"learning_rate": 6.011036716427456e-06,
"loss": 1.1352871656417847,
"step": 3044
},
{
"epoch": 1.4760058167716918,
"grad_norm": 13.684974442747627,
"learning_rate": 6.008274710213235e-06,
"loss": 1.8956313133239746,
"step": 3045
},
{
"epoch": 1.476490547746001,
"grad_norm": 12.606359824995248,
"learning_rate": 6.005512383247869e-06,
"loss": 1.986720085144043,
"step": 3046
},
{
"epoch": 1.4769752787203103,
"grad_norm": 12.964417710167076,
"learning_rate": 6.002749736410104e-06,
"loss": 1.9185357093811035,
"step": 3047
},
{
"epoch": 1.4774600096946195,
"grad_norm": 11.106019889742223,
"learning_rate": 5.9999867705787905e-06,
"loss": 1.5218660831451416,
"step": 3048
},
{
"epoch": 1.4779447406689288,
"grad_norm": 11.471597173962005,
"learning_rate": 5.997223486632878e-06,
"loss": 1.7303510904312134,
"step": 3049
},
{
"epoch": 1.478429471643238,
"grad_norm": 19.429461392062414,
"learning_rate": 5.994459885451423e-06,
"loss": 1.4501875638961792,
"step": 3050
},
{
"epoch": 1.4789142026175472,
"grad_norm": 10.038135992510098,
"learning_rate": 5.991695967913576e-06,
"loss": 1.035030484199524,
"step": 3051
},
{
"epoch": 1.4793989335918565,
"grad_norm": 8.41481885388534,
"learning_rate": 5.988931734898591e-06,
"loss": 1.4514412879943848,
"step": 3052
},
{
"epoch": 1.4798836645661657,
"grad_norm": 8.536622058705314,
"learning_rate": 5.986167187285824e-06,
"loss": 1.61575186252594,
"step": 3053
},
{
"epoch": 1.480368395540475,
"grad_norm": 14.38297823590022,
"learning_rate": 5.983402325954729e-06,
"loss": 1.3650301694869995,
"step": 3054
},
{
"epoch": 1.4808531265147842,
"grad_norm": 10.419365621289275,
"learning_rate": 5.9806371517848605e-06,
"loss": 1.7909021377563477,
"step": 3055
},
{
"epoch": 1.4813378574890936,
"grad_norm": 11.74383052676854,
"learning_rate": 5.977871665655871e-06,
"loss": 1.1922688484191895,
"step": 3056
},
{
"epoch": 1.4818225884634029,
"grad_norm": 16.24720548460729,
"learning_rate": 5.9751058684475174e-06,
"loss": 2.6116392612457275,
"step": 3057
},
{
"epoch": 1.482307319437712,
"grad_norm": 8.730864117339257,
"learning_rate": 5.972339761039647e-06,
"loss": 1.385637879371643,
"step": 3058
},
{
"epoch": 1.4827920504120213,
"grad_norm": 11.046352640844235,
"learning_rate": 5.969573344312215e-06,
"loss": 1.3691716194152832,
"step": 3059
},
{
"epoch": 1.4832767813863306,
"grad_norm": 11.556178379134407,
"learning_rate": 5.966806619145268e-06,
"loss": 1.110331416130066,
"step": 3060
},
{
"epoch": 1.4837615123606398,
"grad_norm": 9.62100788986088,
"learning_rate": 5.964039586418953e-06,
"loss": 1.2661356925964355,
"step": 3061
},
{
"epoch": 1.484246243334949,
"grad_norm": 12.603596070439234,
"learning_rate": 5.961272247013517e-06,
"loss": 2.372647285461426,
"step": 3062
},
{
"epoch": 1.4847309743092585,
"grad_norm": 17.30848099179714,
"learning_rate": 5.958504601809301e-06,
"loss": 1.413576602935791,
"step": 3063
},
{
"epoch": 1.4852157052835677,
"grad_norm": 9.097150957498743,
"learning_rate": 5.955736651686745e-06,
"loss": 1.1444735527038574,
"step": 3064
},
{
"epoch": 1.485700436257877,
"grad_norm": 9.506644540008805,
"learning_rate": 5.952968397526388e-06,
"loss": 1.3547585010528564,
"step": 3065
},
{
"epoch": 1.4861851672321862,
"grad_norm": 12.14637492566965,
"learning_rate": 5.950199840208861e-06,
"loss": 1.0756683349609375,
"step": 3066
},
{
"epoch": 1.4866698982064954,
"grad_norm": 13.55962687577564,
"learning_rate": 5.947430980614898e-06,
"loss": 1.2979472875595093,
"step": 3067
},
{
"epoch": 1.4871546291808047,
"grad_norm": 12.067165851328228,
"learning_rate": 5.944661819625319e-06,
"loss": 1.7280662059783936,
"step": 3068
},
{
"epoch": 1.4876393601551139,
"grad_norm": 13.569114170145744,
"learning_rate": 5.941892358121053e-06,
"loss": 1.43258535861969,
"step": 3069
},
{
"epoch": 1.4881240911294231,
"grad_norm": 14.635017304576108,
"learning_rate": 5.9391225969831145e-06,
"loss": 1.7134771347045898,
"step": 3070
},
{
"epoch": 1.4886088221037324,
"grad_norm": 17.07498177741844,
"learning_rate": 5.936352537092616e-06,
"loss": 1.6405296325683594,
"step": 3071
},
{
"epoch": 1.4890935530780416,
"grad_norm": 18.19168917691867,
"learning_rate": 5.933582179330768e-06,
"loss": 1.0638742446899414,
"step": 3072
},
{
"epoch": 1.4895782840523508,
"grad_norm": 17.536650388334024,
"learning_rate": 5.93081152457887e-06,
"loss": 1.2632195949554443,
"step": 3073
},
{
"epoch": 1.4900630150266603,
"grad_norm": 12.812797302596275,
"learning_rate": 5.928040573718323e-06,
"loss": 2.3429245948791504,
"step": 3074
},
{
"epoch": 1.4905477460009695,
"grad_norm": 12.328995371655326,
"learning_rate": 5.925269327630615e-06,
"loss": 1.024444341659546,
"step": 3075
},
{
"epoch": 1.4910324769752787,
"grad_norm": 7.23943320116158,
"learning_rate": 5.922497787197334e-06,
"loss": 2.196528434753418,
"step": 3076
},
{
"epoch": 1.491517207949588,
"grad_norm": 8.917846932708299,
"learning_rate": 5.91972595330016e-06,
"loss": 1.419363260269165,
"step": 3077
},
{
"epoch": 1.4920019389238972,
"grad_norm": 9.97360133526632,
"learning_rate": 5.916953826820864e-06,
"loss": 2.079148530960083,
"step": 3078
},
{
"epoch": 1.4924866698982064,
"grad_norm": 16.14698194488825,
"learning_rate": 5.914181408641311e-06,
"loss": 1.4103670120239258,
"step": 3079
},
{
"epoch": 1.4929714008725157,
"grad_norm": 7.0819647205772185,
"learning_rate": 5.911408699643458e-06,
"loss": 1.7856640815734863,
"step": 3080
},
{
"epoch": 1.4934561318468251,
"grad_norm": 12.266295924673038,
"learning_rate": 5.908635700709361e-06,
"loss": 1.505777359008789,
"step": 3081
},
{
"epoch": 1.4939408628211344,
"grad_norm": 8.19794655946528,
"learning_rate": 5.905862412721157e-06,
"loss": 1.023951530456543,
"step": 3082
},
{
"epoch": 1.4944255937954436,
"grad_norm": 11.55899806691225,
"learning_rate": 5.9030888365610865e-06,
"loss": 1.8475290536880493,
"step": 3083
},
{
"epoch": 1.4949103247697528,
"grad_norm": 13.249612969402186,
"learning_rate": 5.900314973111473e-06,
"loss": 1.7807238101959229,
"step": 3084
},
{
"epoch": 1.495395055744062,
"grad_norm": 10.05385851725494,
"learning_rate": 5.897540823254735e-06,
"loss": 1.5290724039077759,
"step": 3085
},
{
"epoch": 1.4958797867183713,
"grad_norm": 16.92168568606904,
"learning_rate": 5.894766387873382e-06,
"loss": 1.91074538230896,
"step": 3086
},
{
"epoch": 1.4963645176926805,
"grad_norm": 14.50526322772532,
"learning_rate": 5.891991667850014e-06,
"loss": 1.520146369934082,
"step": 3087
},
{
"epoch": 1.4968492486669898,
"grad_norm": 10.134324036818196,
"learning_rate": 5.889216664067322e-06,
"loss": 1.600569486618042,
"step": 3088
},
{
"epoch": 1.497333979641299,
"grad_norm": 20.253665789814182,
"learning_rate": 5.8864413774080875e-06,
"loss": 2.582176923751831,
"step": 3089
},
{
"epoch": 1.4978187106156082,
"grad_norm": 15.39865866840433,
"learning_rate": 5.883665808755179e-06,
"loss": 1.997237205505371,
"step": 3090
},
{
"epoch": 1.4983034415899175,
"grad_norm": 8.953066740554844,
"learning_rate": 5.880889958991559e-06,
"loss": 1.5953449010849,
"step": 3091
},
{
"epoch": 1.498788172564227,
"grad_norm": 8.7057014605224,
"learning_rate": 5.878113829000279e-06,
"loss": 1.8357727527618408,
"step": 3092
},
{
"epoch": 1.4992729035385362,
"grad_norm": 10.943489033414648,
"learning_rate": 5.875337419664475e-06,
"loss": 1.8009201288223267,
"step": 3093
},
{
"epoch": 1.4997576345128454,
"grad_norm": 13.554296027524789,
"learning_rate": 5.872560731867376e-06,
"loss": 1.685943841934204,
"step": 3094
},
{
"epoch": 1.5002423654871546,
"grad_norm": 7.253670656709753,
"learning_rate": 5.8697837664923e-06,
"loss": 1.1856329441070557,
"step": 3095
},
{
"epoch": 1.5007270964614638,
"grad_norm": 10.210820606495039,
"learning_rate": 5.86700652442265e-06,
"loss": 1.3389616012573242,
"step": 3096
},
{
"epoch": 1.5012118274357733,
"grad_norm": 12.092908545271543,
"learning_rate": 5.864229006541921e-06,
"loss": 2.0217485427856445,
"step": 3097
},
{
"epoch": 1.5016965584100825,
"grad_norm": 7.759771590749809,
"learning_rate": 5.86145121373369e-06,
"loss": 1.7653205394744873,
"step": 3098
},
{
"epoch": 1.5021812893843918,
"grad_norm": 15.998889583934087,
"learning_rate": 5.858673146881629e-06,
"loss": 1.512422800064087,
"step": 3099
},
{
"epoch": 1.502666020358701,
"grad_norm": 10.219025743333521,
"learning_rate": 5.855894806869493e-06,
"loss": 1.5851058959960938,
"step": 3100
},
{
"epoch": 1.5031507513330102,
"grad_norm": 12.509967405426666,
"learning_rate": 5.853116194581121e-06,
"loss": 2.0941951274871826,
"step": 3101
},
{
"epoch": 1.5036354823073195,
"grad_norm": 8.484053974774225,
"learning_rate": 5.850337310900446e-06,
"loss": 1.7887934446334839,
"step": 3102
},
{
"epoch": 1.5041202132816287,
"grad_norm": 15.196058724727822,
"learning_rate": 5.8475581567114805e-06,
"loss": 0.9526448249816895,
"step": 3103
},
{
"epoch": 1.504604944255938,
"grad_norm": 14.136100009048983,
"learning_rate": 5.844778732898325e-06,
"loss": 1.8056544065475464,
"step": 3104
},
{
"epoch": 1.5050896752302472,
"grad_norm": 7.7663922707837205,
"learning_rate": 5.841999040345168e-06,
"loss": 1.511648416519165,
"step": 3105
},
{
"epoch": 1.5055744062045564,
"grad_norm": 11.212635980622899,
"learning_rate": 5.8392190799362825e-06,
"loss": 1.7690919637680054,
"step": 3106
},
{
"epoch": 1.5060591371788656,
"grad_norm": 14.075468606978463,
"learning_rate": 5.836438852556027e-06,
"loss": 1.5805039405822754,
"step": 3107
},
{
"epoch": 1.5065438681531749,
"grad_norm": 12.366467351349403,
"learning_rate": 5.833658359088841e-06,
"loss": 1.6354631185531616,
"step": 3108
},
{
"epoch": 1.507028599127484,
"grad_norm": 30.80750424781981,
"learning_rate": 5.830877600419252e-06,
"loss": 1.5974907875061035,
"step": 3109
},
{
"epoch": 1.5075133301017936,
"grad_norm": 19.026492486287864,
"learning_rate": 5.828096577431874e-06,
"loss": 1.5657811164855957,
"step": 3110
},
{
"epoch": 1.5079980610761028,
"grad_norm": 10.901264090818803,
"learning_rate": 5.825315291011403e-06,
"loss": 1.70612370967865,
"step": 3111
},
{
"epoch": 1.508482792050412,
"grad_norm": 11.61826703460747,
"learning_rate": 5.8225337420426154e-06,
"loss": 2.0540218353271484,
"step": 3112
},
{
"epoch": 1.5089675230247213,
"grad_norm": 7.259146141638686,
"learning_rate": 5.819751931410376e-06,
"loss": 1.6169822216033936,
"step": 3113
},
{
"epoch": 1.5094522539990305,
"grad_norm": 11.565701571190187,
"learning_rate": 5.816969859999633e-06,
"loss": 1.1805297136306763,
"step": 3114
},
{
"epoch": 1.50993698497334,
"grad_norm": 9.461995649747093,
"learning_rate": 5.814187528695412e-06,
"loss": 1.403253197669983,
"step": 3115
},
{
"epoch": 1.5104217159476492,
"grad_norm": 18.2952749855259,
"learning_rate": 5.8114049383828274e-06,
"loss": 1.3992559909820557,
"step": 3116
},
{
"epoch": 1.5109064469219584,
"grad_norm": 11.267962195406973,
"learning_rate": 5.808622089947072e-06,
"loss": 1.0874629020690918,
"step": 3117
},
{
"epoch": 1.5113911778962676,
"grad_norm": 14.280273164521173,
"learning_rate": 5.805838984273424e-06,
"loss": 2.1385393142700195,
"step": 3118
},
{
"epoch": 1.5118759088705769,
"grad_norm": 31.138253346201914,
"learning_rate": 5.80305562224724e-06,
"loss": 2.942538261413574,
"step": 3119
},
{
"epoch": 1.5123606398448861,
"grad_norm": 21.19291554105078,
"learning_rate": 5.800272004753961e-06,
"loss": 1.3851125240325928,
"step": 3120
},
{
"epoch": 1.5128453708191953,
"grad_norm": 12.314321710168652,
"learning_rate": 5.797488132679107e-06,
"loss": 1.5632003545761108,
"step": 3121
},
{
"epoch": 1.5133301017935046,
"grad_norm": 11.257602433186234,
"learning_rate": 5.79470400690828e-06,
"loss": 1.7046034336090088,
"step": 3122
},
{
"epoch": 1.5138148327678138,
"grad_norm": 9.55853034248335,
"learning_rate": 5.791919628327165e-06,
"loss": 1.9653615951538086,
"step": 3123
},
{
"epoch": 1.514299563742123,
"grad_norm": 13.521292641937213,
"learning_rate": 5.789134997821522e-06,
"loss": 1.8219507932662964,
"step": 3124
},
{
"epoch": 1.5147842947164323,
"grad_norm": 17.645335490458756,
"learning_rate": 5.786350116277195e-06,
"loss": 1.7837233543395996,
"step": 3125
},
{
"epoch": 1.5152690256907415,
"grad_norm": 11.415158885566298,
"learning_rate": 5.78356498458011e-06,
"loss": 1.093092918395996,
"step": 3126
},
{
"epoch": 1.5157537566650507,
"grad_norm": 15.999248418026509,
"learning_rate": 5.780779603616268e-06,
"loss": 1.7339609861373901,
"step": 3127
},
{
"epoch": 1.5162384876393602,
"grad_norm": 6.052045272005883,
"learning_rate": 5.777993974271749e-06,
"loss": 1.137744426727295,
"step": 3128
},
{
"epoch": 1.5167232186136694,
"grad_norm": 13.04348976329714,
"learning_rate": 5.775208097432716e-06,
"loss": 1.9427688121795654,
"step": 3129
},
{
"epoch": 1.5172079495879787,
"grad_norm": 10.309424567558674,
"learning_rate": 5.772421973985412e-06,
"loss": 1.7186199426651,
"step": 3130
},
{
"epoch": 1.517692680562288,
"grad_norm": 10.968405909827583,
"learning_rate": 5.769635604816148e-06,
"loss": 1.6170837879180908,
"step": 3131
},
{
"epoch": 1.5181774115365974,
"grad_norm": 18.54405327588475,
"learning_rate": 5.766848990811327e-06,
"loss": 1.0714950561523438,
"step": 3132
},
{
"epoch": 1.5186621425109066,
"grad_norm": 15.883643374467262,
"learning_rate": 5.76406213285742e-06,
"loss": 1.7230950593948364,
"step": 3133
},
{
"epoch": 1.5191468734852158,
"grad_norm": 11.889408472359822,
"learning_rate": 5.761275031840979e-06,
"loss": 1.146807074546814,
"step": 3134
},
{
"epoch": 1.519631604459525,
"grad_norm": 10.780568700226302,
"learning_rate": 5.758487688648635e-06,
"loss": 1.3926653861999512,
"step": 3135
},
{
"epoch": 1.5201163354338343,
"grad_norm": 13.32128165674282,
"learning_rate": 5.755700104167093e-06,
"loss": 1.714560627937317,
"step": 3136
},
{
"epoch": 1.5206010664081435,
"grad_norm": 8.785396610469686,
"learning_rate": 5.752912279283136e-06,
"loss": 2.064635753631592,
"step": 3137
},
{
"epoch": 1.5210857973824528,
"grad_norm": 8.882472294499419,
"learning_rate": 5.750124214883623e-06,
"loss": 1.920058250427246,
"step": 3138
},
{
"epoch": 1.521570528356762,
"grad_norm": 28.772545872195455,
"learning_rate": 5.747335911855492e-06,
"loss": 1.3404093980789185,
"step": 3139
},
{
"epoch": 1.5220552593310712,
"grad_norm": 8.599519857622953,
"learning_rate": 5.744547371085752e-06,
"loss": 1.686621069908142,
"step": 3140
},
{
"epoch": 1.5225399903053805,
"grad_norm": 12.567658533052676,
"learning_rate": 5.741758593461493e-06,
"loss": 1.7730522155761719,
"step": 3141
},
{
"epoch": 1.5230247212796897,
"grad_norm": 6.504668983434017,
"learning_rate": 5.738969579869875e-06,
"loss": 1.4583998918533325,
"step": 3142
},
{
"epoch": 1.523509452253999,
"grad_norm": 9.09107797751124,
"learning_rate": 5.7361803311981356e-06,
"loss": 1.568248987197876,
"step": 3143
},
{
"epoch": 1.5239941832283082,
"grad_norm": 14.273479201281472,
"learning_rate": 5.733390848333591e-06,
"loss": 1.4136570692062378,
"step": 3144
},
{
"epoch": 1.5244789142026174,
"grad_norm": 18.79662800277242,
"learning_rate": 5.730601132163623e-06,
"loss": 1.7999093532562256,
"step": 3145
},
{
"epoch": 1.5249636451769268,
"grad_norm": 8.348621444788701,
"learning_rate": 5.727811183575697e-06,
"loss": 1.2896095514297485,
"step": 3146
},
{
"epoch": 1.525448376151236,
"grad_norm": 10.846117286834193,
"learning_rate": 5.725021003457345e-06,
"loss": 2.044335126876831,
"step": 3147
},
{
"epoch": 1.5259331071255453,
"grad_norm": 9.862979653679066,
"learning_rate": 5.722230592696178e-06,
"loss": 0.9014812707901001,
"step": 3148
},
{
"epoch": 1.5264178380998545,
"grad_norm": 14.740001946846576,
"learning_rate": 5.719439952179878e-06,
"loss": 0.997398853302002,
"step": 3149
},
{
"epoch": 1.526902569074164,
"grad_norm": 10.580042304791428,
"learning_rate": 5.716649082796199e-06,
"loss": 1.6253154277801514,
"step": 3150
},
{
"epoch": 1.5273873000484732,
"grad_norm": 9.778683652528484,
"learning_rate": 5.713857985432969e-06,
"loss": 1.2620056867599487,
"step": 3151
},
{
"epoch": 1.5278720310227825,
"grad_norm": 9.583852098427299,
"learning_rate": 5.711066660978089e-06,
"loss": 1.5131386518478394,
"step": 3152
},
{
"epoch": 1.5283567619970917,
"grad_norm": 10.163138342432864,
"learning_rate": 5.708275110319532e-06,
"loss": 1.0960172414779663,
"step": 3153
},
{
"epoch": 1.528841492971401,
"grad_norm": 10.013838962178367,
"learning_rate": 5.705483334345343e-06,
"loss": 1.1687841415405273,
"step": 3154
},
{
"epoch": 1.5293262239457102,
"grad_norm": 16.099300929660675,
"learning_rate": 5.702691333943638e-06,
"loss": 1.6134662628173828,
"step": 3155
},
{
"epoch": 1.5298109549200194,
"grad_norm": 9.950299041564792,
"learning_rate": 5.699899110002607e-06,
"loss": 1.2402589321136475,
"step": 3156
},
{
"epoch": 1.5302956858943286,
"grad_norm": 17.032863196475752,
"learning_rate": 5.697106663410503e-06,
"loss": 1.3126130104064941,
"step": 3157
},
{
"epoch": 1.5307804168686379,
"grad_norm": 11.593691924994397,
"learning_rate": 5.694313995055663e-06,
"loss": 1.2917590141296387,
"step": 3158
},
{
"epoch": 1.531265147842947,
"grad_norm": 11.788437745901252,
"learning_rate": 5.691521105826483e-06,
"loss": 1.6970527172088623,
"step": 3159
},
{
"epoch": 1.5317498788172563,
"grad_norm": 24.335701126174,
"learning_rate": 5.688727996611434e-06,
"loss": 1.0946605205535889,
"step": 3160
},
{
"epoch": 1.5322346097915656,
"grad_norm": 8.55415652884847,
"learning_rate": 5.685934668299057e-06,
"loss": 1.457349419593811,
"step": 3161
},
{
"epoch": 1.5327193407658748,
"grad_norm": 11.33893241240686,
"learning_rate": 5.683141121777963e-06,
"loss": 1.1582280397415161,
"step": 3162
},
{
"epoch": 1.5332040717401842,
"grad_norm": 11.98523701453036,
"learning_rate": 5.6803473579368305e-06,
"loss": 1.5872471332550049,
"step": 3163
},
{
"epoch": 1.5336888027144935,
"grad_norm": 17.688955416017464,
"learning_rate": 5.6775533776644095e-06,
"loss": 1.608926773071289,
"step": 3164
},
{
"epoch": 1.5341735336888027,
"grad_norm": 16.514150840674674,
"learning_rate": 5.6747591818495185e-06,
"loss": 1.9056482315063477,
"step": 3165
},
{
"epoch": 1.534658264663112,
"grad_norm": 9.136452585557414,
"learning_rate": 5.67196477138104e-06,
"loss": 1.897186279296875,
"step": 3166
},
{
"epoch": 1.5351429956374212,
"grad_norm": 17.91768508233929,
"learning_rate": 5.6691701471479336e-06,
"loss": 2.348006010055542,
"step": 3167
},
{
"epoch": 1.5356277266117306,
"grad_norm": 7.80130974690158,
"learning_rate": 5.666375310039216e-06,
"loss": 1.8471159934997559,
"step": 3168
},
{
"epoch": 1.5361124575860399,
"grad_norm": 19.36891154543662,
"learning_rate": 5.663580260943981e-06,
"loss": 2.09128737449646,
"step": 3169
},
{
"epoch": 1.536597188560349,
"grad_norm": 10.216876295333842,
"learning_rate": 5.6607850007513876e-06,
"loss": 1.5239460468292236,
"step": 3170
},
{
"epoch": 1.5370819195346583,
"grad_norm": 10.078980475585633,
"learning_rate": 5.657989530350658e-06,
"loss": 1.4794437885284424,
"step": 3171
},
{
"epoch": 1.5375666505089676,
"grad_norm": 13.952621519505646,
"learning_rate": 5.6551938506310844e-06,
"loss": 1.3064488172531128,
"step": 3172
},
{
"epoch": 1.5380513814832768,
"grad_norm": 9.52343664647949,
"learning_rate": 5.652397962482025e-06,
"loss": 1.8463671207427979,
"step": 3173
},
{
"epoch": 1.538536112457586,
"grad_norm": 20.187203972358393,
"learning_rate": 5.649601866792906e-06,
"loss": 1.5476722717285156,
"step": 3174
},
{
"epoch": 1.5390208434318953,
"grad_norm": 22.064742697974133,
"learning_rate": 5.646805564453216e-06,
"loss": 2.2858152389526367,
"step": 3175
},
{
"epoch": 1.5395055744062045,
"grad_norm": 11.672857564643026,
"learning_rate": 5.6440090563525126e-06,
"loss": 1.3743767738342285,
"step": 3176
},
{
"epoch": 1.5399903053805137,
"grad_norm": 7.588163152451002,
"learning_rate": 5.641212343380418e-06,
"loss": 1.7196424007415771,
"step": 3177
},
{
"epoch": 1.540475036354823,
"grad_norm": 7.4060732124422,
"learning_rate": 5.638415426426617e-06,
"loss": 0.7258339524269104,
"step": 3178
},
{
"epoch": 1.5409597673291322,
"grad_norm": 13.008552493355362,
"learning_rate": 5.635618306380864e-06,
"loss": 1.5131597518920898,
"step": 3179
},
{
"epoch": 1.5414444983034414,
"grad_norm": 8.449472998363845,
"learning_rate": 5.632820984132973e-06,
"loss": 1.7521719932556152,
"step": 3180
},
{
"epoch": 1.541929229277751,
"grad_norm": 15.47211255415256,
"learning_rate": 5.6300234605728275e-06,
"loss": 1.612574815750122,
"step": 3181
},
{
"epoch": 1.5424139602520601,
"grad_norm": 16.598269350155803,
"learning_rate": 5.627225736590371e-06,
"loss": 2.1137583255767822,
"step": 3182
},
{
"epoch": 1.5428986912263694,
"grad_norm": 8.883051606559631,
"learning_rate": 5.624427813075613e-06,
"loss": 1.772160291671753,
"step": 3183
},
{
"epoch": 1.5433834222006786,
"grad_norm": 18.876479121376523,
"learning_rate": 5.621629690918623e-06,
"loss": 1.331835389137268,
"step": 3184
},
{
"epoch": 1.543868153174988,
"grad_norm": 9.478813724966455,
"learning_rate": 5.6188313710095375e-06,
"loss": 1.5557994842529297,
"step": 3185
},
{
"epoch": 1.5443528841492973,
"grad_norm": 15.709427701635232,
"learning_rate": 5.6160328542385576e-06,
"loss": 2.0117568969726562,
"step": 3186
},
{
"epoch": 1.5448376151236065,
"grad_norm": 23.570329000788853,
"learning_rate": 5.61323414149594e-06,
"loss": 1.7978945970535278,
"step": 3187
},
{
"epoch": 1.5453223460979157,
"grad_norm": 11.340836542978627,
"learning_rate": 5.6104352336720104e-06,
"loss": 0.9448959827423096,
"step": 3188
},
{
"epoch": 1.545807077072225,
"grad_norm": 9.55809082305078,
"learning_rate": 5.607636131657154e-06,
"loss": 1.598088264465332,
"step": 3189
},
{
"epoch": 1.5462918080465342,
"grad_norm": 10.18244602456412,
"learning_rate": 5.604836836341816e-06,
"loss": 2.0254077911376953,
"step": 3190
},
{
"epoch": 1.5467765390208434,
"grad_norm": 10.020240009228798,
"learning_rate": 5.602037348616506e-06,
"loss": 1.2766050100326538,
"step": 3191
},
{
"epoch": 1.5472612699951527,
"grad_norm": 13.03604119170089,
"learning_rate": 5.599237669371792e-06,
"loss": 1.1567262411117554,
"step": 3192
},
{
"epoch": 1.547746000969462,
"grad_norm": 8.504107140070515,
"learning_rate": 5.5964377994983085e-06,
"loss": 1.635516881942749,
"step": 3193
},
{
"epoch": 1.5482307319437711,
"grad_norm": 8.332084149991635,
"learning_rate": 5.5936377398867434e-06,
"loss": 1.8262708187103271,
"step": 3194
},
{
"epoch": 1.5487154629180804,
"grad_norm": 11.881693803650297,
"learning_rate": 5.5908374914278495e-06,
"loss": 1.5955265760421753,
"step": 3195
},
{
"epoch": 1.5492001938923896,
"grad_norm": 12.041654190991727,
"learning_rate": 5.588037055012439e-06,
"loss": 1.7683839797973633,
"step": 3196
},
{
"epoch": 1.5496849248666988,
"grad_norm": 12.043738341250556,
"learning_rate": 5.585236431531382e-06,
"loss": 2.0858216285705566,
"step": 3197
},
{
"epoch": 1.550169655841008,
"grad_norm": 6.500546781568657,
"learning_rate": 5.5824356218756115e-06,
"loss": 1.5161316394805908,
"step": 3198
},
{
"epoch": 1.5506543868153175,
"grad_norm": 10.5568212779916,
"learning_rate": 5.5796346269361145e-06,
"loss": 2.2158145904541016,
"step": 3199
},
{
"epoch": 1.5511391177896268,
"grad_norm": 8.07400015424476,
"learning_rate": 5.576833447603943e-06,
"loss": 0.9138671159744263,
"step": 3200
},
{
"epoch": 1.551623848763936,
"grad_norm": 11.217331306372436,
"learning_rate": 5.574032084770205e-06,
"loss": 1.499199628829956,
"step": 3201
},
{
"epoch": 1.5521085797382452,
"grad_norm": 28.488750986888313,
"learning_rate": 5.5712305393260645e-06,
"loss": 1.7508792877197266,
"step": 3202
},
{
"epoch": 1.5525933107125547,
"grad_norm": 9.658268487946794,
"learning_rate": 5.568428812162747e-06,
"loss": 2.1385388374328613,
"step": 3203
},
{
"epoch": 1.553078041686864,
"grad_norm": 8.8662795825845,
"learning_rate": 5.565626904171535e-06,
"loss": 2.8096837997436523,
"step": 3204
},
{
"epoch": 1.5535627726611732,
"grad_norm": 8.11644397397769,
"learning_rate": 5.562824816243769e-06,
"loss": 1.6745256185531616,
"step": 3205
},
{
"epoch": 1.5540475036354824,
"grad_norm": 7.804422644794596,
"learning_rate": 5.560022549270843e-06,
"loss": 1.4386223554611206,
"step": 3206
},
{
"epoch": 1.5545322346097916,
"grad_norm": 11.520161622635273,
"learning_rate": 5.557220104144212e-06,
"loss": 1.6010260581970215,
"step": 3207
},
{
"epoch": 1.5550169655841009,
"grad_norm": 14.543537709226161,
"learning_rate": 5.554417481755387e-06,
"loss": 2.0326883792877197,
"step": 3208
},
{
"epoch": 1.55550169655841,
"grad_norm": 14.220213152977445,
"learning_rate": 5.551614682995936e-06,
"loss": 1.8532507419586182,
"step": 3209
},
{
"epoch": 1.5559864275327193,
"grad_norm": 11.920287797788248,
"learning_rate": 5.5488117087574785e-06,
"loss": 1.325872778892517,
"step": 3210
},
{
"epoch": 1.5564711585070286,
"grad_norm": 12.102142042514275,
"learning_rate": 5.5460085599316974e-06,
"loss": 1.5745577812194824,
"step": 3211
},
{
"epoch": 1.5569558894813378,
"grad_norm": 16.065016608232437,
"learning_rate": 5.543205237410325e-06,
"loss": 1.34876549243927,
"step": 3212
},
{
"epoch": 1.557440620455647,
"grad_norm": 12.15308235918269,
"learning_rate": 5.540401742085152e-06,
"loss": 2.4127120971679688,
"step": 3213
},
{
"epoch": 1.5579253514299563,
"grad_norm": 27.848715408315442,
"learning_rate": 5.53759807484802e-06,
"loss": 1.6328977346420288,
"step": 3214
},
{
"epoch": 1.5584100824042655,
"grad_norm": 9.68352505437515,
"learning_rate": 5.5347942365908315e-06,
"loss": 1.9171411991119385,
"step": 3215
},
{
"epoch": 1.558894813378575,
"grad_norm": 19.584425383368817,
"learning_rate": 5.5319902282055404e-06,
"loss": 1.4641289710998535,
"step": 3216
},
{
"epoch": 1.5593795443528842,
"grad_norm": 13.411496469818522,
"learning_rate": 5.529186050584151e-06,
"loss": 2.021831512451172,
"step": 3217
},
{
"epoch": 1.5598642753271934,
"grad_norm": 11.232188125304388,
"learning_rate": 5.526381704618729e-06,
"loss": 1.106504201889038,
"step": 3218
},
{
"epoch": 1.5603490063015026,
"grad_norm": 9.660060185606902,
"learning_rate": 5.523577191201386e-06,
"loss": 1.473414659500122,
"step": 3219
},
{
"epoch": 1.5608337372758119,
"grad_norm": 9.18987220457937,
"learning_rate": 5.520772511224293e-06,
"loss": 1.85602605342865,
"step": 3220
},
{
"epoch": 1.5613184682501213,
"grad_norm": 13.520623952813512,
"learning_rate": 5.517967665579671e-06,
"loss": 1.7199385166168213,
"step": 3221
},
{
"epoch": 1.5618031992244306,
"grad_norm": 10.621122003805144,
"learning_rate": 5.515162655159793e-06,
"loss": 1.4154744148254395,
"step": 3222
},
{
"epoch": 1.5622879301987398,
"grad_norm": 14.364488897696997,
"learning_rate": 5.5123574808569854e-06,
"loss": 1.8047477006912231,
"step": 3223
},
{
"epoch": 1.562772661173049,
"grad_norm": 10.63387476616787,
"learning_rate": 5.5095521435636304e-06,
"loss": 1.5842938423156738,
"step": 3224
},
{
"epoch": 1.5632573921473583,
"grad_norm": 12.470353053350642,
"learning_rate": 5.506746644172154e-06,
"loss": 1.0355643033981323,
"step": 3225
},
{
"epoch": 1.5637421231216675,
"grad_norm": 9.735443060044213,
"learning_rate": 5.503940983575041e-06,
"loss": 1.2376421689987183,
"step": 3226
},
{
"epoch": 1.5642268540959767,
"grad_norm": 15.067584815779925,
"learning_rate": 5.501135162664824e-06,
"loss": 1.094757080078125,
"step": 3227
},
{
"epoch": 1.564711585070286,
"grad_norm": 11.978911857879158,
"learning_rate": 5.498329182334088e-06,
"loss": 1.3216087818145752,
"step": 3228
},
{
"epoch": 1.5651963160445952,
"grad_norm": 13.313869194530055,
"learning_rate": 5.495523043475466e-06,
"loss": 1.3105000257492065,
"step": 3229
},
{
"epoch": 1.5656810470189044,
"grad_norm": 13.69305052726858,
"learning_rate": 5.492716746981646e-06,
"loss": 2.191154956817627,
"step": 3230
},
{
"epoch": 1.5661657779932137,
"grad_norm": 10.72564828509121,
"learning_rate": 5.489910293745364e-06,
"loss": 1.5126183032989502,
"step": 3231
},
{
"epoch": 1.566650508967523,
"grad_norm": 8.248245054905528,
"learning_rate": 5.487103684659403e-06,
"loss": 1.4442534446716309,
"step": 3232
},
{
"epoch": 1.5671352399418321,
"grad_norm": 8.953116283677627,
"learning_rate": 5.484296920616601e-06,
"loss": 1.306715726852417,
"step": 3233
},
{
"epoch": 1.5676199709161416,
"grad_norm": 17.73000021723809,
"learning_rate": 5.481490002509838e-06,
"loss": 1.4798527956008911,
"step": 3234
},
{
"epoch": 1.5681047018904508,
"grad_norm": 20.13620292181096,
"learning_rate": 5.478682931232053e-06,
"loss": 1.587162971496582,
"step": 3235
},
{
"epoch": 1.56858943286476,
"grad_norm": 11.85572680543437,
"learning_rate": 5.475875707676225e-06,
"loss": 1.137992262840271,
"step": 3236
},
{
"epoch": 1.5690741638390693,
"grad_norm": 7.927473024422815,
"learning_rate": 5.473068332735385e-06,
"loss": 1.3873844146728516,
"step": 3237
},
{
"epoch": 1.5695588948133787,
"grad_norm": 16.72309651844479,
"learning_rate": 5.470260807302612e-06,
"loss": 1.587931513786316,
"step": 3238
},
{
"epoch": 1.570043625787688,
"grad_norm": 10.030450685488441,
"learning_rate": 5.467453132271032e-06,
"loss": 1.423433542251587,
"step": 3239
},
{
"epoch": 1.5705283567619972,
"grad_norm": 7.946187527089199,
"learning_rate": 5.46464530853382e-06,
"loss": 1.823045015335083,
"step": 3240
},
{
"epoch": 1.5710130877363064,
"grad_norm": 32.45477736844906,
"learning_rate": 5.461837336984197e-06,
"loss": 1.7400637865066528,
"step": 3241
},
{
"epoch": 1.5714978187106157,
"grad_norm": 12.722273405417125,
"learning_rate": 5.459029218515434e-06,
"loss": 0.9803401827812195,
"step": 3242
},
{
"epoch": 1.571982549684925,
"grad_norm": 9.329353330881876,
"learning_rate": 5.456220954020841e-06,
"loss": 1.2463791370391846,
"step": 3243
},
{
"epoch": 1.5724672806592341,
"grad_norm": 11.66383275675764,
"learning_rate": 5.453412544393785e-06,
"loss": 1.8626301288604736,
"step": 3244
},
{
"epoch": 1.5729520116335434,
"grad_norm": 20.445772876109046,
"learning_rate": 5.45060399052767e-06,
"loss": 1.5492891073226929,
"step": 3245
},
{
"epoch": 1.5734367426078526,
"grad_norm": 16.49074992860232,
"learning_rate": 5.447795293315954e-06,
"loss": 2.776432752609253,
"step": 3246
},
{
"epoch": 1.5739214735821618,
"grad_norm": 9.626155878561823,
"learning_rate": 5.4449864536521335e-06,
"loss": 1.47083580493927,
"step": 3247
},
{
"epoch": 1.574406204556471,
"grad_norm": 9.016266362917458,
"learning_rate": 5.442177472429751e-06,
"loss": 1.4951528310775757,
"step": 3248
},
{
"epoch": 1.5748909355307803,
"grad_norm": 22.2895168627847,
"learning_rate": 5.439368350542402e-06,
"loss": 2.2156386375427246,
"step": 3249
},
{
"epoch": 1.5753756665050895,
"grad_norm": 9.314552502473829,
"learning_rate": 5.4365590888837175e-06,
"loss": 1.8188363313674927,
"step": 3250
},
{
"epoch": 1.575860397479399,
"grad_norm": 19.246086809976084,
"learning_rate": 5.4337496883473775e-06,
"loss": 1.7006903886795044,
"step": 3251
},
{
"epoch": 1.5763451284537082,
"grad_norm": 7.200401775523784,
"learning_rate": 5.430940149827102e-06,
"loss": 1.737643837928772,
"step": 3252
},
{
"epoch": 1.5768298594280175,
"grad_norm": 13.986415968643616,
"learning_rate": 5.428130474216664e-06,
"loss": 1.5081868171691895,
"step": 3253
},
{
"epoch": 1.5773145904023267,
"grad_norm": 8.551303101363265,
"learning_rate": 5.425320662409868e-06,
"loss": 1.2907871007919312,
"step": 3254
},
{
"epoch": 1.577799321376636,
"grad_norm": 18.067934305692727,
"learning_rate": 5.4225107153005715e-06,
"loss": 1.0734738111495972,
"step": 3255
},
{
"epoch": 1.5782840523509454,
"grad_norm": 14.919815331376457,
"learning_rate": 5.419700633782672e-06,
"loss": 2.041714906692505,
"step": 3256
},
{
"epoch": 1.5787687833252546,
"grad_norm": 12.679787963321344,
"learning_rate": 5.416890418750107e-06,
"loss": 1.2983736991882324,
"step": 3257
},
{
"epoch": 1.5792535142995638,
"grad_norm": 12.453586124260607,
"learning_rate": 5.414080071096861e-06,
"loss": 1.388606071472168,
"step": 3258
},
{
"epoch": 1.579738245273873,
"grad_norm": 11.086829876637971,
"learning_rate": 5.411269591716955e-06,
"loss": 1.5412366390228271,
"step": 3259
},
{
"epoch": 1.5802229762481823,
"grad_norm": 11.874500423252728,
"learning_rate": 5.408458981504458e-06,
"loss": 2.03385329246521,
"step": 3260
},
{
"epoch": 1.5807077072224915,
"grad_norm": 15.572131580744463,
"learning_rate": 5.405648241353481e-06,
"loss": 1.5084433555603027,
"step": 3261
},
{
"epoch": 1.5811924381968008,
"grad_norm": 9.289031309870076,
"learning_rate": 5.402837372158166e-06,
"loss": 1.6297979354858398,
"step": 3262
},
{
"epoch": 1.58167716917111,
"grad_norm": 7.969329664435611,
"learning_rate": 5.400026374812709e-06,
"loss": 1.802436351776123,
"step": 3263
},
{
"epoch": 1.5821619001454192,
"grad_norm": 13.092318071618678,
"learning_rate": 5.39721525021134e-06,
"loss": 2.244051456451416,
"step": 3264
},
{
"epoch": 1.5826466311197285,
"grad_norm": 7.788375641853272,
"learning_rate": 5.394403999248327e-06,
"loss": 1.3704739809036255,
"step": 3265
},
{
"epoch": 1.5831313620940377,
"grad_norm": 12.354484371035406,
"learning_rate": 5.391592622817987e-06,
"loss": 1.5454440116882324,
"step": 3266
},
{
"epoch": 1.583616093068347,
"grad_norm": 7.650131812082438,
"learning_rate": 5.388781121814669e-06,
"loss": 1.8390469551086426,
"step": 3267
},
{
"epoch": 1.5841008240426562,
"grad_norm": 11.736581895889074,
"learning_rate": 5.385969497132764e-06,
"loss": 1.7171339988708496,
"step": 3268
},
{
"epoch": 1.5845855550169656,
"grad_norm": 18.502713004020773,
"learning_rate": 5.383157749666705e-06,
"loss": 1.7865076065063477,
"step": 3269
},
{
"epoch": 1.5850702859912749,
"grad_norm": 10.28004419372759,
"learning_rate": 5.3803458803109606e-06,
"loss": 2.046036958694458,
"step": 3270
},
{
"epoch": 1.585555016965584,
"grad_norm": 12.969773810579868,
"learning_rate": 5.3775338899600375e-06,
"loss": 1.3358713388442993,
"step": 3271
},
{
"epoch": 1.5860397479398933,
"grad_norm": 10.276769678811144,
"learning_rate": 5.374721779508485e-06,
"loss": 1.5968865156173706,
"step": 3272
},
{
"epoch": 1.5865244789142026,
"grad_norm": 12.522668397863152,
"learning_rate": 5.371909549850887e-06,
"loss": 2.015533447265625,
"step": 3273
},
{
"epoch": 1.587009209888512,
"grad_norm": 10.014560562456733,
"learning_rate": 5.369097201881867e-06,
"loss": 1.05318021774292,
"step": 3274
},
{
"epoch": 1.5874939408628213,
"grad_norm": 8.363544447817274,
"learning_rate": 5.3662847364960855e-06,
"loss": 1.6878701448440552,
"step": 3275
},
{
"epoch": 1.5879786718371305,
"grad_norm": 10.075862524068464,
"learning_rate": 5.3634721545882415e-06,
"loss": 1.4426183700561523,
"step": 3276
},
{
"epoch": 1.5884634028114397,
"grad_norm": 22.763870688482957,
"learning_rate": 5.36065945705307e-06,
"loss": 1.1590771675109863,
"step": 3277
},
{
"epoch": 1.588948133785749,
"grad_norm": 13.331377414602672,
"learning_rate": 5.357846644785343e-06,
"loss": 1.5377168655395508,
"step": 3278
},
{
"epoch": 1.5894328647600582,
"grad_norm": 9.60603395948558,
"learning_rate": 5.355033718679868e-06,
"loss": 1.2596938610076904,
"step": 3279
},
{
"epoch": 1.5899175957343674,
"grad_norm": 9.53617955759034,
"learning_rate": 5.352220679631491e-06,
"loss": 2.110222339630127,
"step": 3280
},
{
"epoch": 1.5904023267086767,
"grad_norm": 12.537037484908488,
"learning_rate": 5.349407528535091e-06,
"loss": 1.5017247200012207,
"step": 3281
},
{
"epoch": 1.5908870576829859,
"grad_norm": 7.091087936741277,
"learning_rate": 5.346594266285586e-06,
"loss": 1.7502574920654297,
"step": 3282
},
{
"epoch": 1.5913717886572951,
"grad_norm": 13.07448479475698,
"learning_rate": 5.3437808937779265e-06,
"loss": 1.557572841644287,
"step": 3283
},
{
"epoch": 1.5918565196316043,
"grad_norm": 9.764465984999054,
"learning_rate": 5.340967411907101e-06,
"loss": 1.8626011610031128,
"step": 3284
},
{
"epoch": 1.5923412506059136,
"grad_norm": 10.549354343764291,
"learning_rate": 5.338153821568127e-06,
"loss": 1.9209561347961426,
"step": 3285
},
{
"epoch": 1.5928259815802228,
"grad_norm": 8.71625219141494,
"learning_rate": 5.3353401236560655e-06,
"loss": 1.9181565046310425,
"step": 3286
},
{
"epoch": 1.5933107125545323,
"grad_norm": 11.966409509138424,
"learning_rate": 5.332526319066003e-06,
"loss": 1.374216914176941,
"step": 3287
},
{
"epoch": 1.5937954435288415,
"grad_norm": 12.416268824806641,
"learning_rate": 5.329712408693065e-06,
"loss": 1.6586657762527466,
"step": 3288
},
{
"epoch": 1.5942801745031507,
"grad_norm": 13.409844615799903,
"learning_rate": 5.3268983934324105e-06,
"loss": 1.4232492446899414,
"step": 3289
},
{
"epoch": 1.59476490547746,
"grad_norm": 11.660396135350377,
"learning_rate": 5.324084274179228e-06,
"loss": 1.5922138690948486,
"step": 3290
},
{
"epoch": 1.5952496364517694,
"grad_norm": 9.805417496137762,
"learning_rate": 5.321270051828744e-06,
"loss": 0.6598211526870728,
"step": 3291
},
{
"epoch": 1.5957343674260787,
"grad_norm": 7.679817756582895,
"learning_rate": 5.318455727276214e-06,
"loss": 1.3667329549789429,
"step": 3292
},
{
"epoch": 1.596219098400388,
"grad_norm": 16.571005686028148,
"learning_rate": 5.3156413014169284e-06,
"loss": 1.7157261371612549,
"step": 3293
},
{
"epoch": 1.5967038293746971,
"grad_norm": 7.340317061809659,
"learning_rate": 5.31282677514621e-06,
"loss": 1.490825891494751,
"step": 3294
},
{
"epoch": 1.5971885603490064,
"grad_norm": 9.252283490626542,
"learning_rate": 5.310012149359411e-06,
"loss": 1.6677199602127075,
"step": 3295
},
{
"epoch": 1.5976732913233156,
"grad_norm": 13.650616994266864,
"learning_rate": 5.307197424951917e-06,
"loss": 1.714477777481079,
"step": 3296
},
{
"epoch": 1.5981580222976248,
"grad_norm": 11.839922372604622,
"learning_rate": 5.304382602819145e-06,
"loss": 1.857146978378296,
"step": 3297
},
{
"epoch": 1.598642753271934,
"grad_norm": 8.482840363656834,
"learning_rate": 5.3015676838565426e-06,
"loss": 2.1837007999420166,
"step": 3298
},
{
"epoch": 1.5991274842462433,
"grad_norm": 11.056023929439746,
"learning_rate": 5.298752668959589e-06,
"loss": 1.7900069952011108,
"step": 3299
},
{
"epoch": 1.5996122152205525,
"grad_norm": 14.979370704213116,
"learning_rate": 5.295937559023794e-06,
"loss": 1.2340954542160034,
"step": 3300
},
{
"epoch": 1.6000969461948618,
"grad_norm": 9.648923477520324,
"learning_rate": 5.293122354944696e-06,
"loss": 1.5804572105407715,
"step": 3301
},
{
"epoch": 1.600581677169171,
"grad_norm": 11.745676150113445,
"learning_rate": 5.290307057617863e-06,
"loss": 1.8582123517990112,
"step": 3302
},
{
"epoch": 1.6010664081434802,
"grad_norm": 11.726179270870382,
"learning_rate": 5.287491667938896e-06,
"loss": 1.4423037767410278,
"step": 3303
},
{
"epoch": 1.6015511391177897,
"grad_norm": 15.325043426879748,
"learning_rate": 5.284676186803423e-06,
"loss": 1.6085193157196045,
"step": 3304
},
{
"epoch": 1.602035870092099,
"grad_norm": 11.305066281625958,
"learning_rate": 5.2818606151071015e-06,
"loss": 1.435456395149231,
"step": 3305
},
{
"epoch": 1.6025206010664081,
"grad_norm": 10.102048207588558,
"learning_rate": 5.279044953745618e-06,
"loss": 0.7582844495773315,
"step": 3306
},
{
"epoch": 1.6030053320407174,
"grad_norm": 9.55477611287459,
"learning_rate": 5.2762292036146864e-06,
"loss": 1.3427917957305908,
"step": 3307
},
{
"epoch": 1.6034900630150266,
"grad_norm": 13.613470833725039,
"learning_rate": 5.273413365610052e-06,
"loss": 1.4056464433670044,
"step": 3308
},
{
"epoch": 1.603974793989336,
"grad_norm": 14.337462603602946,
"learning_rate": 5.270597440627482e-06,
"loss": 1.4058890342712402,
"step": 3309
},
{
"epoch": 1.6044595249636453,
"grad_norm": 14.24553313158257,
"learning_rate": 5.26778142956278e-06,
"loss": 1.6327017545700073,
"step": 3310
},
{
"epoch": 1.6049442559379545,
"grad_norm": 15.990110138241299,
"learning_rate": 5.264965333311766e-06,
"loss": 2.002110004425049,
"step": 3311
},
{
"epoch": 1.6054289869122638,
"grad_norm": 14.34175699600799,
"learning_rate": 5.262149152770298e-06,
"loss": 1.9786747694015503,
"step": 3312
},
{
"epoch": 1.605913717886573,
"grad_norm": 8.183751181541822,
"learning_rate": 5.259332888834255e-06,
"loss": 1.7964591979980469,
"step": 3313
},
{
"epoch": 1.6063984488608822,
"grad_norm": 9.475166455139533,
"learning_rate": 5.256516542399543e-06,
"loss": 1.4023810625076294,
"step": 3314
},
{
"epoch": 1.6068831798351915,
"grad_norm": 24.101734958509613,
"learning_rate": 5.253700114362096e-06,
"loss": 1.704437017440796,
"step": 3315
},
{
"epoch": 1.6073679108095007,
"grad_norm": 7.6838245583484035,
"learning_rate": 5.250883605617869e-06,
"loss": 1.469896912574768,
"step": 3316
},
{
"epoch": 1.60785264178381,
"grad_norm": 8.640684770057158,
"learning_rate": 5.248067017062851e-06,
"loss": 1.3910003900527954,
"step": 3317
},
{
"epoch": 1.6083373727581192,
"grad_norm": 14.165874929189572,
"learning_rate": 5.24525034959305e-06,
"loss": 1.4788191318511963,
"step": 3318
},
{
"epoch": 1.6088221037324284,
"grad_norm": 10.85507480262457,
"learning_rate": 5.242433604104499e-06,
"loss": 1.4917384386062622,
"step": 3319
},
{
"epoch": 1.6093068347067376,
"grad_norm": 9.502778201897735,
"learning_rate": 5.23961678149326e-06,
"loss": 1.5489904880523682,
"step": 3320
},
{
"epoch": 1.6097915656810469,
"grad_norm": 11.034949484788823,
"learning_rate": 5.236799882655418e-06,
"loss": 1.38010835647583,
"step": 3321
},
{
"epoch": 1.6102762966553563,
"grad_norm": 11.3501881560586,
"learning_rate": 5.233982908487078e-06,
"loss": 1.4913840293884277,
"step": 3322
},
{
"epoch": 1.6107610276296656,
"grad_norm": 7.102174532752915,
"learning_rate": 5.231165859884375e-06,
"loss": 1.6890347003936768,
"step": 3323
},
{
"epoch": 1.6112457586039748,
"grad_norm": 12.09581953041057,
"learning_rate": 5.228348737743466e-06,
"loss": 1.553570032119751,
"step": 3324
},
{
"epoch": 1.611730489578284,
"grad_norm": 12.381607040810879,
"learning_rate": 5.225531542960528e-06,
"loss": 2.1991188526153564,
"step": 3325
},
{
"epoch": 1.6122152205525935,
"grad_norm": 8.444144356224218,
"learning_rate": 5.222714276431766e-06,
"loss": 1.5009393692016602,
"step": 3326
},
{
"epoch": 1.6126999515269027,
"grad_norm": 21.23965944644756,
"learning_rate": 5.219896939053403e-06,
"loss": 1.6265301704406738,
"step": 3327
},
{
"epoch": 1.613184682501212,
"grad_norm": 8.876722935950747,
"learning_rate": 5.217079531721688e-06,
"loss": 1.3987481594085693,
"step": 3328
},
{
"epoch": 1.6136694134755212,
"grad_norm": 12.37216736458819,
"learning_rate": 5.214262055332895e-06,
"loss": 1.2009589672088623,
"step": 3329
},
{
"epoch": 1.6141541444498304,
"grad_norm": 7.959249088297491,
"learning_rate": 5.211444510783309e-06,
"loss": 1.8664753437042236,
"step": 3330
},
{
"epoch": 1.6146388754241396,
"grad_norm": 12.251823182265552,
"learning_rate": 5.208626898969249e-06,
"loss": 1.9449552297592163,
"step": 3331
},
{
"epoch": 1.6151236063984489,
"grad_norm": 8.501973409584412,
"learning_rate": 5.205809220787049e-06,
"loss": 1.0572518110275269,
"step": 3332
},
{
"epoch": 1.615608337372758,
"grad_norm": 35.22622044970353,
"learning_rate": 5.202991477133066e-06,
"loss": 2.0282793045043945,
"step": 3333
},
{
"epoch": 1.6160930683470673,
"grad_norm": 11.41610713550299,
"learning_rate": 5.200173668903675e-06,
"loss": 1.5323443412780762,
"step": 3334
},
{
"epoch": 1.6165777993213766,
"grad_norm": 10.121470840176585,
"learning_rate": 5.197355796995277e-06,
"loss": 2.244778633117676,
"step": 3335
},
{
"epoch": 1.6170625302956858,
"grad_norm": 16.274298308053687,
"learning_rate": 5.194537862304289e-06,
"loss": 1.3895263671875,
"step": 3336
},
{
"epoch": 1.617547261269995,
"grad_norm": 17.211750708943438,
"learning_rate": 5.191719865727148e-06,
"loss": 1.5378880500793457,
"step": 3337
},
{
"epoch": 1.6180319922443043,
"grad_norm": 29.913013002217443,
"learning_rate": 5.188901808160313e-06,
"loss": 2.3161351680755615,
"step": 3338
},
{
"epoch": 1.6185167232186135,
"grad_norm": 12.560101696955927,
"learning_rate": 5.186083690500258e-06,
"loss": 1.2736417055130005,
"step": 3339
},
{
"epoch": 1.619001454192923,
"grad_norm": 11.134388651865482,
"learning_rate": 5.183265513643484e-06,
"loss": 1.5803170204162598,
"step": 3340
},
{
"epoch": 1.6194861851672322,
"grad_norm": 18.32034258159539,
"learning_rate": 5.180447278486502e-06,
"loss": 1.940481424331665,
"step": 3341
},
{
"epoch": 1.6199709161415414,
"grad_norm": 11.89078253445586,
"learning_rate": 5.1776289859258474e-06,
"loss": 2.1839897632598877,
"step": 3342
},
{
"epoch": 1.6204556471158507,
"grad_norm": 10.457984041710137,
"learning_rate": 5.174810636858073e-06,
"loss": 1.3741536140441895,
"step": 3343
},
{
"epoch": 1.6209403780901601,
"grad_norm": 8.991461202104162,
"learning_rate": 5.1719922321797476e-06,
"loss": 1.5788397789001465,
"step": 3344
},
{
"epoch": 1.6214251090644693,
"grad_norm": 9.369849504275628,
"learning_rate": 5.169173772787458e-06,
"loss": 1.5458173751831055,
"step": 3345
},
{
"epoch": 1.6219098400387786,
"grad_norm": 12.612848291910405,
"learning_rate": 5.166355259577808e-06,
"loss": 0.6380617022514343,
"step": 3346
},
{
"epoch": 1.6223945710130878,
"grad_norm": 12.114637229791864,
"learning_rate": 5.163536693447426e-06,
"loss": 1.8352665901184082,
"step": 3347
},
{
"epoch": 1.622879301987397,
"grad_norm": 10.725255918876474,
"learning_rate": 5.160718075292943e-06,
"loss": 1.2781107425689697,
"step": 3348
},
{
"epoch": 1.6233640329617063,
"grad_norm": 7.976565328946117,
"learning_rate": 5.157899406011019e-06,
"loss": 1.7201844453811646,
"step": 3349
},
{
"epoch": 1.6238487639360155,
"grad_norm": 10.554627985017918,
"learning_rate": 5.1550806864983235e-06,
"loss": 0.9459984302520752,
"step": 3350
},
{
"epoch": 1.6243334949103247,
"grad_norm": 14.63984926320301,
"learning_rate": 5.152261917651547e-06,
"loss": 1.2886574268341064,
"step": 3351
},
{
"epoch": 1.624818225884634,
"grad_norm": 7.310677326067383,
"learning_rate": 5.14944310036739e-06,
"loss": 1.7894126176834106,
"step": 3352
},
{
"epoch": 1.6253029568589432,
"grad_norm": 9.84288325671643,
"learning_rate": 5.146624235542571e-06,
"loss": 1.8390626907348633,
"step": 3353
},
{
"epoch": 1.6257876878332524,
"grad_norm": 19.57805771635548,
"learning_rate": 5.143805324073827e-06,
"loss": 1.1044903993606567,
"step": 3354
},
{
"epoch": 1.6262724188075617,
"grad_norm": 11.538919083924782,
"learning_rate": 5.140986366857904e-06,
"loss": 1.8182334899902344,
"step": 3355
},
{
"epoch": 1.626757149781871,
"grad_norm": 8.002006239933769,
"learning_rate": 5.1381673647915675e-06,
"loss": 1.7118239402770996,
"step": 3356
},
{
"epoch": 1.6272418807561804,
"grad_norm": 11.525393023517504,
"learning_rate": 5.1353483187715936e-06,
"loss": 1.0876256227493286,
"step": 3357
},
{
"epoch": 1.6277266117304896,
"grad_norm": 7.324546831131933,
"learning_rate": 5.132529229694774e-06,
"loss": 1.3225966691970825,
"step": 3358
},
{
"epoch": 1.6282113427047988,
"grad_norm": 6.288643327070672,
"learning_rate": 5.129710098457914e-06,
"loss": 1.4484889507293701,
"step": 3359
},
{
"epoch": 1.628696073679108,
"grad_norm": 13.842652066741097,
"learning_rate": 5.126890925957832e-06,
"loss": 1.3124864101409912,
"step": 3360
},
{
"epoch": 1.6291808046534173,
"grad_norm": 10.928626678965836,
"learning_rate": 5.12407171309136e-06,
"loss": 1.3265398740768433,
"step": 3361
},
{
"epoch": 1.6296655356277268,
"grad_norm": 9.08645339057136,
"learning_rate": 5.121252460755344e-06,
"loss": 2.1480867862701416,
"step": 3362
},
{
"epoch": 1.630150266602036,
"grad_norm": 14.357421144416493,
"learning_rate": 5.118433169846639e-06,
"loss": 1.6437472105026245,
"step": 3363
},
{
"epoch": 1.6306349975763452,
"grad_norm": 11.482052791010089,
"learning_rate": 5.1156138412621155e-06,
"loss": 1.455790400505066,
"step": 3364
},
{
"epoch": 1.6311197285506545,
"grad_norm": 13.068749486462842,
"learning_rate": 5.1127944758986545e-06,
"loss": 2.3791556358337402,
"step": 3365
},
{
"epoch": 1.6316044595249637,
"grad_norm": 20.603577353927008,
"learning_rate": 5.1099750746531515e-06,
"loss": 2.197213649749756,
"step": 3366
},
{
"epoch": 1.632089190499273,
"grad_norm": 13.365451545094437,
"learning_rate": 5.107155638422507e-06,
"loss": 1.9537744522094727,
"step": 3367
},
{
"epoch": 1.6325739214735822,
"grad_norm": 8.800779224372238,
"learning_rate": 5.10433616810364e-06,
"loss": 1.5248888731002808,
"step": 3368
},
{
"epoch": 1.6330586524478914,
"grad_norm": 11.707167837249266,
"learning_rate": 5.101516664593474e-06,
"loss": 2.1103415489196777,
"step": 3369
},
{
"epoch": 1.6335433834222006,
"grad_norm": 14.480378027603914,
"learning_rate": 5.098697128788951e-06,
"loss": 1.6609928607940674,
"step": 3370
},
{
"epoch": 1.6340281143965099,
"grad_norm": 9.545598510338435,
"learning_rate": 5.0958775615870125e-06,
"loss": 1.7015540599822998,
"step": 3371
},
{
"epoch": 1.634512845370819,
"grad_norm": 17.81588013763369,
"learning_rate": 5.093057963884619e-06,
"loss": 1.3887819051742554,
"step": 3372
},
{
"epoch": 1.6349975763451283,
"grad_norm": 17.932947751140667,
"learning_rate": 5.090238336578738e-06,
"loss": 1.213496208190918,
"step": 3373
},
{
"epoch": 1.6354823073194376,
"grad_norm": 8.471578200373525,
"learning_rate": 5.087418680566345e-06,
"loss": 1.1731033325195312,
"step": 3374
},
{
"epoch": 1.635967038293747,
"grad_norm": 10.942907149243412,
"learning_rate": 5.084598996744426e-06,
"loss": 1.6805680990219116,
"step": 3375
},
{
"epoch": 1.6364517692680562,
"grad_norm": 9.694879044560563,
"learning_rate": 5.081779286009974e-06,
"loss": 1.5904394388198853,
"step": 3376
},
{
"epoch": 1.6369365002423655,
"grad_norm": 11.715013256304768,
"learning_rate": 5.078959549259994e-06,
"loss": 1.4444942474365234,
"step": 3377
},
{
"epoch": 1.6374212312166747,
"grad_norm": 13.506731173120583,
"learning_rate": 5.076139787391498e-06,
"loss": 1.4853179454803467,
"step": 3378
},
{
"epoch": 1.6379059621909842,
"grad_norm": 16.54469616219076,
"learning_rate": 5.073320001301501e-06,
"loss": 1.3449724912643433,
"step": 3379
},
{
"epoch": 1.6383906931652934,
"grad_norm": 13.527712637749946,
"learning_rate": 5.070500191887034e-06,
"loss": 1.31024169921875,
"step": 3380
},
{
"epoch": 1.6388754241396026,
"grad_norm": 9.093614993415528,
"learning_rate": 5.06768036004513e-06,
"loss": 1.67156183719635,
"step": 3381
},
{
"epoch": 1.6393601551139119,
"grad_norm": 10.029560024406845,
"learning_rate": 5.0648605066728305e-06,
"loss": 1.2719720602035522,
"step": 3382
},
{
"epoch": 1.639844886088221,
"grad_norm": 9.032948992748768,
"learning_rate": 5.062040632667183e-06,
"loss": 1.4539239406585693,
"step": 3383
},
{
"epoch": 1.6403296170625303,
"grad_norm": 14.012842247349745,
"learning_rate": 5.059220738925246e-06,
"loss": 0.9461653828620911,
"step": 3384
},
{
"epoch": 1.6408143480368396,
"grad_norm": 8.565231782476353,
"learning_rate": 5.056400826344078e-06,
"loss": 1.0301700830459595,
"step": 3385
},
{
"epoch": 1.6412990790111488,
"grad_norm": 10.676621652423878,
"learning_rate": 5.0535808958207445e-06,
"loss": 1.5859544277191162,
"step": 3386
},
{
"epoch": 1.641783809985458,
"grad_norm": 9.416294991368732,
"learning_rate": 5.050760948252321e-06,
"loss": 1.6328840255737305,
"step": 3387
},
{
"epoch": 1.6422685409597673,
"grad_norm": 11.22572899948707,
"learning_rate": 5.047940984535886e-06,
"loss": 1.396874189376831,
"step": 3388
},
{
"epoch": 1.6427532719340765,
"grad_norm": 12.144533696754515,
"learning_rate": 5.045121005568522e-06,
"loss": 1.205815076828003,
"step": 3389
},
{
"epoch": 1.6432380029083857,
"grad_norm": 8.133924770088342,
"learning_rate": 5.042301012247317e-06,
"loss": 1.3044114112854004,
"step": 3390
},
{
"epoch": 1.643722733882695,
"grad_norm": 6.893738138750137,
"learning_rate": 5.039481005469366e-06,
"loss": 1.7526649236679077,
"step": 3391
},
{
"epoch": 1.6442074648570042,
"grad_norm": 16.565409562554716,
"learning_rate": 5.036660986131763e-06,
"loss": 1.5954477787017822,
"step": 3392
},
{
"epoch": 1.6446921958313137,
"grad_norm": 10.522770824774973,
"learning_rate": 5.0338409551316116e-06,
"loss": 0.8167741894721985,
"step": 3393
},
{
"epoch": 1.6451769268056229,
"grad_norm": 10.490759258508781,
"learning_rate": 5.031020913366016e-06,
"loss": 1.5661015510559082,
"step": 3394
},
{
"epoch": 1.6456616577799321,
"grad_norm": 10.178295922949864,
"learning_rate": 5.028200861732083e-06,
"loss": 1.7556021213531494,
"step": 3395
},
{
"epoch": 1.6461463887542414,
"grad_norm": 16.55267816555351,
"learning_rate": 5.025380801126929e-06,
"loss": 1.3795446157455444,
"step": 3396
},
{
"epoch": 1.6466311197285508,
"grad_norm": 12.056491260427542,
"learning_rate": 5.022560732447662e-06,
"loss": 1.408111810684204,
"step": 3397
},
{
"epoch": 1.64711585070286,
"grad_norm": 19.59226744850261,
"learning_rate": 5.019740656591403e-06,
"loss": 2.005002021789551,
"step": 3398
},
{
"epoch": 1.6476005816771693,
"grad_norm": 13.07896726006562,
"learning_rate": 5.01692057445527e-06,
"loss": 1.1557812690734863,
"step": 3399
},
{
"epoch": 1.6480853126514785,
"grad_norm": 11.533817298578976,
"learning_rate": 5.014100486936383e-06,
"loss": 0.7443182468414307,
"step": 3400
},
{
"epoch": 1.6485700436257877,
"grad_norm": 10.519674662795769,
"learning_rate": 5.011280394931866e-06,
"loss": 1.7948346138000488,
"step": 3401
},
{
"epoch": 1.649054774600097,
"grad_norm": 10.211154137676667,
"learning_rate": 5.008460299338843e-06,
"loss": 1.2718459367752075,
"step": 3402
},
{
"epoch": 1.6495395055744062,
"grad_norm": 18.174441061479936,
"learning_rate": 5.005640201054442e-06,
"loss": 0.8984038233757019,
"step": 3403
},
{
"epoch": 1.6500242365487154,
"grad_norm": 12.430784503935143,
"learning_rate": 5.002820100975785e-06,
"loss": 1.7029848098754883,
"step": 3404
},
{
"epoch": 1.6505089675230247,
"grad_norm": 11.403828689009742,
"learning_rate": 5e-06,
"loss": 1.7444953918457031,
"step": 3405
},
{
"epoch": 1.650993698497334,
"grad_norm": 14.902877638618328,
"learning_rate": 4.997179899024217e-06,
"loss": 1.72121262550354,
"step": 3406
},
{
"epoch": 1.6514784294716431,
"grad_norm": 13.19622516237836,
"learning_rate": 4.994359798945561e-06,
"loss": 1.526086688041687,
"step": 3407
},
{
"epoch": 1.6519631604459524,
"grad_norm": 8.870747571697034,
"learning_rate": 4.9915397006611575e-06,
"loss": 1.6957201957702637,
"step": 3408
},
{
"epoch": 1.6524478914202616,
"grad_norm": 10.70861610351782,
"learning_rate": 4.9887196050681345e-06,
"loss": 1.349076747894287,
"step": 3409
},
{
"epoch": 1.652932622394571,
"grad_norm": 12.173782793482568,
"learning_rate": 4.985899513063618e-06,
"loss": 2.283435344696045,
"step": 3410
},
{
"epoch": 1.6534173533688803,
"grad_norm": 13.145857273046017,
"learning_rate": 4.983079425544732e-06,
"loss": 1.4622068405151367,
"step": 3411
},
{
"epoch": 1.6539020843431895,
"grad_norm": 18.893921122732603,
"learning_rate": 4.9802593434086e-06,
"loss": 1.6549479961395264,
"step": 3412
},
{
"epoch": 1.6543868153174988,
"grad_norm": 13.032636782711174,
"learning_rate": 4.9774392675523386e-06,
"loss": 1.0799684524536133,
"step": 3413
},
{
"epoch": 1.654871546291808,
"grad_norm": 11.08750297787435,
"learning_rate": 4.974619198873073e-06,
"loss": 1.7194573879241943,
"step": 3414
},
{
"epoch": 1.6553562772661174,
"grad_norm": 16.67490738087757,
"learning_rate": 4.971799138267918e-06,
"loss": 1.8271223306655884,
"step": 3415
},
{
"epoch": 1.6558410082404267,
"grad_norm": 10.647656371201698,
"learning_rate": 4.968979086633986e-06,
"loss": 1.4650964736938477,
"step": 3416
},
{
"epoch": 1.656325739214736,
"grad_norm": 18.44576233876307,
"learning_rate": 4.966159044868389e-06,
"loss": 1.7628443241119385,
"step": 3417
},
{
"epoch": 1.6568104701890451,
"grad_norm": 9.744471899842216,
"learning_rate": 4.963339013868239e-06,
"loss": 1.486038327217102,
"step": 3418
},
{
"epoch": 1.6572952011633544,
"grad_norm": 7.9385361879137815,
"learning_rate": 4.960518994530637e-06,
"loss": 1.2183815240859985,
"step": 3419
},
{
"epoch": 1.6577799321376636,
"grad_norm": 14.438694054247804,
"learning_rate": 4.957698987752684e-06,
"loss": 1.3943876028060913,
"step": 3420
},
{
"epoch": 1.6582646631119728,
"grad_norm": 8.23174773590464,
"learning_rate": 4.95487899443148e-06,
"loss": 1.430005669593811,
"step": 3421
},
{
"epoch": 1.658749394086282,
"grad_norm": 12.489634462324485,
"learning_rate": 4.952059015464115e-06,
"loss": 1.3272600173950195,
"step": 3422
},
{
"epoch": 1.6592341250605913,
"grad_norm": 24.07278315460815,
"learning_rate": 4.9492390517476805e-06,
"loss": 2.4073591232299805,
"step": 3423
},
{
"epoch": 1.6597188560349005,
"grad_norm": 27.72394561665317,
"learning_rate": 4.946419104179257e-06,
"loss": 1.4462850093841553,
"step": 3424
},
{
"epoch": 1.6602035870092098,
"grad_norm": 11.477673864880405,
"learning_rate": 4.943599173655924e-06,
"loss": 2.4056246280670166,
"step": 3425
},
{
"epoch": 1.660688317983519,
"grad_norm": 21.24736021324791,
"learning_rate": 4.940779261074756e-06,
"loss": 1.9365158081054688,
"step": 3426
},
{
"epoch": 1.6611730489578282,
"grad_norm": 7.600184209808522,
"learning_rate": 4.937959367332817e-06,
"loss": 1.4880907535552979,
"step": 3427
},
{
"epoch": 1.6616577799321377,
"grad_norm": 12.807807783971542,
"learning_rate": 4.935139493327171e-06,
"loss": 1.538408637046814,
"step": 3428
},
{
"epoch": 1.662142510906447,
"grad_norm": 9.535307504469062,
"learning_rate": 4.932319639954872e-06,
"loss": 1.7405787706375122,
"step": 3429
},
{
"epoch": 1.6626272418807562,
"grad_norm": 8.526303816798443,
"learning_rate": 4.929499808112969e-06,
"loss": 1.5072468519210815,
"step": 3430
},
{
"epoch": 1.6631119728550654,
"grad_norm": 8.34745300617823,
"learning_rate": 4.9266799986985e-06,
"loss": 0.8946444392204285,
"step": 3431
},
{
"epoch": 1.6635967038293749,
"grad_norm": 10.029363674049279,
"learning_rate": 4.923860212608505e-06,
"loss": 1.5457556247711182,
"step": 3432
},
{
"epoch": 1.664081434803684,
"grad_norm": 10.491210545096774,
"learning_rate": 4.921040450740007e-06,
"loss": 1.395813226699829,
"step": 3433
},
{
"epoch": 1.6645661657779933,
"grad_norm": 8.849185273945317,
"learning_rate": 4.918220713990028e-06,
"loss": 1.1233563423156738,
"step": 3434
},
{
"epoch": 1.6650508967523026,
"grad_norm": 11.447866527598455,
"learning_rate": 4.915401003255577e-06,
"loss": 3.1025240421295166,
"step": 3435
},
{
"epoch": 1.6655356277266118,
"grad_norm": 16.321617612631492,
"learning_rate": 4.912581319433655e-06,
"loss": 1.745712399482727,
"step": 3436
},
{
"epoch": 1.666020358700921,
"grad_norm": 11.763158196964715,
"learning_rate": 4.909761663421264e-06,
"loss": 1.322537899017334,
"step": 3437
},
{
"epoch": 1.6665050896752303,
"grad_norm": 10.408368331803972,
"learning_rate": 4.906942036115383e-06,
"loss": 1.5660854578018188,
"step": 3438
},
{
"epoch": 1.6669898206495395,
"grad_norm": 9.873939855456154,
"learning_rate": 4.904122438412989e-06,
"loss": 1.7830311059951782,
"step": 3439
},
{
"epoch": 1.6674745516238487,
"grad_norm": 13.381353274333705,
"learning_rate": 4.9013028712110526e-06,
"loss": 1.2029526233673096,
"step": 3440
},
{
"epoch": 1.667959282598158,
"grad_norm": 18.033244470577905,
"learning_rate": 4.898483335406527e-06,
"loss": 1.2830138206481934,
"step": 3441
},
{
"epoch": 1.6684440135724672,
"grad_norm": 6.806788526730472,
"learning_rate": 4.895663831896362e-06,
"loss": 1.2634434700012207,
"step": 3442
},
{
"epoch": 1.6689287445467764,
"grad_norm": 10.348457500886198,
"learning_rate": 4.892844361577495e-06,
"loss": 1.2683029174804688,
"step": 3443
},
{
"epoch": 1.6694134755210857,
"grad_norm": 11.918315998559613,
"learning_rate": 4.890024925346851e-06,
"loss": 1.840536117553711,
"step": 3444
},
{
"epoch": 1.669898206495395,
"grad_norm": 8.359233867311238,
"learning_rate": 4.8872055241013455e-06,
"loss": 2.0101304054260254,
"step": 3445
},
{
"epoch": 1.6703829374697043,
"grad_norm": 12.082331696670593,
"learning_rate": 4.884386158737885e-06,
"loss": 1.4854000806808472,
"step": 3446
},
{
"epoch": 1.6708676684440136,
"grad_norm": 9.807058018148155,
"learning_rate": 4.881566830153362e-06,
"loss": 1.310605764389038,
"step": 3447
},
{
"epoch": 1.6713523994183228,
"grad_norm": 8.660494217652092,
"learning_rate": 4.878747539244658e-06,
"loss": 2.3520350456237793,
"step": 3448
},
{
"epoch": 1.671837130392632,
"grad_norm": 10.261517952900439,
"learning_rate": 4.875928286908642e-06,
"loss": 1.3733183145523071,
"step": 3449
},
{
"epoch": 1.6723218613669415,
"grad_norm": 9.067973561306403,
"learning_rate": 4.873109074042169e-06,
"loss": 1.5717179775238037,
"step": 3450
},
{
"epoch": 1.6728065923412507,
"grad_norm": 10.863752704845991,
"learning_rate": 4.870289901542087e-06,
"loss": 1.3314428329467773,
"step": 3451
},
{
"epoch": 1.67329132331556,
"grad_norm": 13.665785568804052,
"learning_rate": 4.8674707703052275e-06,
"loss": 1.6778781414031982,
"step": 3452
},
{
"epoch": 1.6737760542898692,
"grad_norm": 9.157017730799996,
"learning_rate": 4.864651681228409e-06,
"loss": 1.38461172580719,
"step": 3453
},
{
"epoch": 1.6742607852641784,
"grad_norm": 11.11531933012133,
"learning_rate": 4.861832635208435e-06,
"loss": 1.7196877002716064,
"step": 3454
},
{
"epoch": 1.6747455162384877,
"grad_norm": 7.888403297434342,
"learning_rate": 4.859013633142096e-06,
"loss": 1.2248778343200684,
"step": 3455
},
{
"epoch": 1.675230247212797,
"grad_norm": 10.698271194523583,
"learning_rate": 4.856194675926174e-06,
"loss": 1.9417346715927124,
"step": 3456
},
{
"epoch": 1.6757149781871061,
"grad_norm": 11.734670734383933,
"learning_rate": 4.8533757644574306e-06,
"loss": 1.6080979108810425,
"step": 3457
},
{
"epoch": 1.6761997091614154,
"grad_norm": 8.524207642591817,
"learning_rate": 4.850556899632613e-06,
"loss": 1.8455740213394165,
"step": 3458
},
{
"epoch": 1.6766844401357246,
"grad_norm": 8.654496578677174,
"learning_rate": 4.847738082348455e-06,
"loss": 1.9836328029632568,
"step": 3459
},
{
"epoch": 1.6771691711100338,
"grad_norm": 14.894667016269231,
"learning_rate": 4.844919313501677e-06,
"loss": 1.2323832511901855,
"step": 3460
},
{
"epoch": 1.677653902084343,
"grad_norm": 11.345669465463931,
"learning_rate": 4.842100593988983e-06,
"loss": 2.1706173419952393,
"step": 3461
},
{
"epoch": 1.6781386330586523,
"grad_norm": 14.108525506888832,
"learning_rate": 4.839281924707058e-06,
"loss": 1.4739412069320679,
"step": 3462
},
{
"epoch": 1.6786233640329618,
"grad_norm": 13.622601774281076,
"learning_rate": 4.836463306552577e-06,
"loss": 1.5395324230194092,
"step": 3463
},
{
"epoch": 1.679108095007271,
"grad_norm": 8.980608606247289,
"learning_rate": 4.833644740422191e-06,
"loss": 1.3861232995986938,
"step": 3464
},
{
"epoch": 1.6795928259815802,
"grad_norm": 11.291345997161537,
"learning_rate": 4.830826227212543e-06,
"loss": 1.182739019393921,
"step": 3465
},
{
"epoch": 1.6800775569558895,
"grad_norm": 9.674642177858113,
"learning_rate": 4.828007767820253e-06,
"loss": 1.346815586090088,
"step": 3466
},
{
"epoch": 1.680562287930199,
"grad_norm": 10.111162793505414,
"learning_rate": 4.825189363141928e-06,
"loss": 1.9719144105911255,
"step": 3467
},
{
"epoch": 1.6810470189045081,
"grad_norm": 8.704876021161507,
"learning_rate": 4.822371014074154e-06,
"loss": 1.1415139436721802,
"step": 3468
},
{
"epoch": 1.6815317498788174,
"grad_norm": 8.142107263641279,
"learning_rate": 4.819552721513499e-06,
"loss": 1.577944278717041,
"step": 3469
},
{
"epoch": 1.6820164808531266,
"grad_norm": 10.345032924396357,
"learning_rate": 4.816734486356518e-06,
"loss": 1.8536227941513062,
"step": 3470
},
{
"epoch": 1.6825012118274358,
"grad_norm": 9.195565290866217,
"learning_rate": 4.813916309499744e-06,
"loss": 1.0237804651260376,
"step": 3471
},
{
"epoch": 1.682985942801745,
"grad_norm": 9.963774395856575,
"learning_rate": 4.81109819183969e-06,
"loss": 1.5156160593032837,
"step": 3472
},
{
"epoch": 1.6834706737760543,
"grad_norm": 9.965089181385698,
"learning_rate": 4.8082801342728525e-06,
"loss": 1.5354803800582886,
"step": 3473
},
{
"epoch": 1.6839554047503635,
"grad_norm": 15.846548861547348,
"learning_rate": 4.8054621376957115e-06,
"loss": 1.8302356004714966,
"step": 3474
},
{
"epoch": 1.6844401357246728,
"grad_norm": 10.231737613062553,
"learning_rate": 4.802644203004724e-06,
"loss": 0.8374685049057007,
"step": 3475
},
{
"epoch": 1.684924866698982,
"grad_norm": 15.070810939655095,
"learning_rate": 4.799826331096326e-06,
"loss": 1.180443286895752,
"step": 3476
},
{
"epoch": 1.6854095976732912,
"grad_norm": 7.496340888114508,
"learning_rate": 4.797008522866937e-06,
"loss": 1.469144344329834,
"step": 3477
},
{
"epoch": 1.6858943286476005,
"grad_norm": 13.264096073309863,
"learning_rate": 4.794190779212952e-06,
"loss": 1.7788269519805908,
"step": 3478
},
{
"epoch": 1.6863790596219097,
"grad_norm": 11.59360234188508,
"learning_rate": 4.7913731010307525e-06,
"loss": 1.666331171989441,
"step": 3479
},
{
"epoch": 1.686863790596219,
"grad_norm": 9.941004590392785,
"learning_rate": 4.788555489216692e-06,
"loss": 1.1724320650100708,
"step": 3480
},
{
"epoch": 1.6873485215705284,
"grad_norm": 8.200628795414156,
"learning_rate": 4.785737944667108e-06,
"loss": 1.7598240375518799,
"step": 3481
},
{
"epoch": 1.6878332525448376,
"grad_norm": 12.819536218652678,
"learning_rate": 4.7829204682783135e-06,
"loss": 1.0397443771362305,
"step": 3482
},
{
"epoch": 1.6883179835191469,
"grad_norm": 13.008070852519879,
"learning_rate": 4.780103060946598e-06,
"loss": 1.9801990985870361,
"step": 3483
},
{
"epoch": 1.688802714493456,
"grad_norm": 9.877596829298938,
"learning_rate": 4.777285723568235e-06,
"loss": 1.8053021430969238,
"step": 3484
},
{
"epoch": 1.6892874454677655,
"grad_norm": 12.161058780506782,
"learning_rate": 4.7744684570394734e-06,
"loss": 1.3000643253326416,
"step": 3485
},
{
"epoch": 1.6897721764420748,
"grad_norm": 11.450944990674977,
"learning_rate": 4.771651262256536e-06,
"loss": 1.1853013038635254,
"step": 3486
},
{
"epoch": 1.690256907416384,
"grad_norm": 13.866378407656429,
"learning_rate": 4.768834140115625e-06,
"loss": 1.4135433435440063,
"step": 3487
},
{
"epoch": 1.6907416383906932,
"grad_norm": 14.97341497791508,
"learning_rate": 4.7660170915129225e-06,
"loss": 2.072874069213867,
"step": 3488
},
{
"epoch": 1.6912263693650025,
"grad_norm": 17.57112359236176,
"learning_rate": 4.763200117344584e-06,
"loss": 1.6050326824188232,
"step": 3489
},
{
"epoch": 1.6917111003393117,
"grad_norm": 20.027237686899458,
"learning_rate": 4.760383218506742e-06,
"loss": 2.4712870121002197,
"step": 3490
},
{
"epoch": 1.692195831313621,
"grad_norm": 10.855564481019274,
"learning_rate": 4.757566395895503e-06,
"loss": 1.9764649868011475,
"step": 3491
},
{
"epoch": 1.6926805622879302,
"grad_norm": 14.284598536066055,
"learning_rate": 4.754749650406951e-06,
"loss": 1.3312709331512451,
"step": 3492
},
{
"epoch": 1.6931652932622394,
"grad_norm": 6.518042665608667,
"learning_rate": 4.751932982937151e-06,
"loss": 0.9497141242027283,
"step": 3493
},
{
"epoch": 1.6936500242365486,
"grad_norm": 22.883775755319665,
"learning_rate": 4.749116394382132e-06,
"loss": 2.2461905479431152,
"step": 3494
},
{
"epoch": 1.6941347552108579,
"grad_norm": 8.90435651771367,
"learning_rate": 4.7462998856379065e-06,
"loss": 1.3589391708374023,
"step": 3495
},
{
"epoch": 1.6946194861851671,
"grad_norm": 8.836239783552873,
"learning_rate": 4.743483457600459e-06,
"loss": 1.7185380458831787,
"step": 3496
},
{
"epoch": 1.6951042171594763,
"grad_norm": 12.328209751941623,
"learning_rate": 4.740667111165746e-06,
"loss": 1.275503158569336,
"step": 3497
},
{
"epoch": 1.6955889481337858,
"grad_norm": 8.195661400665252,
"learning_rate": 4.737850847229703e-06,
"loss": 1.6425025463104248,
"step": 3498
},
{
"epoch": 1.696073679108095,
"grad_norm": 163.57353078786895,
"learning_rate": 4.735034666688236e-06,
"loss": 1.7959420680999756,
"step": 3499
},
{
"epoch": 1.6965584100824043,
"grad_norm": 6.245750996122851,
"learning_rate": 4.732218570437224e-06,
"loss": 1.473252296447754,
"step": 3500
},
{
"epoch": 1.6970431410567135,
"grad_norm": 62.453864941900264,
"learning_rate": 4.729402559372519e-06,
"loss": 2.79498028755188,
"step": 3501
},
{
"epoch": 1.6975278720310227,
"grad_norm": 25.711970909640275,
"learning_rate": 4.7265866343899505e-06,
"loss": 2.685568332672119,
"step": 3502
},
{
"epoch": 1.6980126030053322,
"grad_norm": 10.859141197019877,
"learning_rate": 4.723770796385314e-06,
"loss": 2.168656826019287,
"step": 3503
},
{
"epoch": 1.6984973339796414,
"grad_norm": 10.713641901454952,
"learning_rate": 4.720955046254384e-06,
"loss": 1.8022913932800293,
"step": 3504
},
{
"epoch": 1.6989820649539507,
"grad_norm": 20.111400910160622,
"learning_rate": 4.7181393848929e-06,
"loss": 1.382580280303955,
"step": 3505
},
{
"epoch": 1.6994667959282599,
"grad_norm": 16.40223499006563,
"learning_rate": 4.715323813196578e-06,
"loss": 1.8081510066986084,
"step": 3506
},
{
"epoch": 1.6999515269025691,
"grad_norm": 13.203627864775045,
"learning_rate": 4.712508332061105e-06,
"loss": 1.4247595071792603,
"step": 3507
},
{
"epoch": 1.7004362578768784,
"grad_norm": 8.025444643874396,
"learning_rate": 4.709692942382139e-06,
"loss": 1.578348159790039,
"step": 3508
},
{
"epoch": 1.7009209888511876,
"grad_norm": 12.498176761871878,
"learning_rate": 4.706877645055308e-06,
"loss": 1.6591717004776,
"step": 3509
},
{
"epoch": 1.7014057198254968,
"grad_norm": 20.447745200200668,
"learning_rate": 4.704062440976209e-06,
"loss": 1.178070068359375,
"step": 3510
},
{
"epoch": 1.701890450799806,
"grad_norm": 11.306624489748048,
"learning_rate": 4.701247331040411e-06,
"loss": 1.8000319004058838,
"step": 3511
},
{
"epoch": 1.7023751817741153,
"grad_norm": 10.657105404397832,
"learning_rate": 4.698432316143459e-06,
"loss": 1.9091507196426392,
"step": 3512
},
{
"epoch": 1.7028599127484245,
"grad_norm": 12.174509336113706,
"learning_rate": 4.6956173971808575e-06,
"loss": 0.9670447707176208,
"step": 3513
},
{
"epoch": 1.7033446437227338,
"grad_norm": 13.73680042592868,
"learning_rate": 4.692802575048085e-06,
"loss": 2.0260729789733887,
"step": 3514
},
{
"epoch": 1.703829374697043,
"grad_norm": 41.66656795834247,
"learning_rate": 4.6899878506405904e-06,
"loss": 2.2435989379882812,
"step": 3515
},
{
"epoch": 1.7043141056713524,
"grad_norm": 13.80519249902518,
"learning_rate": 4.6871732248537915e-06,
"loss": 1.4010107517242432,
"step": 3516
},
{
"epoch": 1.7047988366456617,
"grad_norm": 11.67143647478391,
"learning_rate": 4.684358698583072e-06,
"loss": 1.658797264099121,
"step": 3517
},
{
"epoch": 1.705283567619971,
"grad_norm": 12.621920359626404,
"learning_rate": 4.681544272723788e-06,
"loss": 1.6624871492385864,
"step": 3518
},
{
"epoch": 1.7057682985942801,
"grad_norm": 6.40156027320004,
"learning_rate": 4.678729948171259e-06,
"loss": 1.9828673601150513,
"step": 3519
},
{
"epoch": 1.7062530295685896,
"grad_norm": 10.235248675789752,
"learning_rate": 4.675915725820773e-06,
"loss": 2.6461071968078613,
"step": 3520
},
{
"epoch": 1.7067377605428988,
"grad_norm": 11.026577090294873,
"learning_rate": 4.673101606567592e-06,
"loss": 1.4216346740722656,
"step": 3521
},
{
"epoch": 1.707222491517208,
"grad_norm": 11.234375390467115,
"learning_rate": 4.670287591306936e-06,
"loss": 1.5689836740493774,
"step": 3522
},
{
"epoch": 1.7077072224915173,
"grad_norm": 10.140121281464788,
"learning_rate": 4.667473680933999e-06,
"loss": 2.057168960571289,
"step": 3523
},
{
"epoch": 1.7081919534658265,
"grad_norm": 10.060938869118282,
"learning_rate": 4.664659876343938e-06,
"loss": 1.280088186264038,
"step": 3524
},
{
"epoch": 1.7086766844401358,
"grad_norm": 11.554755583613982,
"learning_rate": 4.661846178431873e-06,
"loss": 1.5790586471557617,
"step": 3525
},
{
"epoch": 1.709161415414445,
"grad_norm": 11.812400545022074,
"learning_rate": 4.659032588092901e-06,
"loss": 2.2347846031188965,
"step": 3526
},
{
"epoch": 1.7096461463887542,
"grad_norm": 15.744252858908778,
"learning_rate": 4.656219106222074e-06,
"loss": 1.521249771118164,
"step": 3527
},
{
"epoch": 1.7101308773630635,
"grad_norm": 11.50842288452763,
"learning_rate": 4.653405733714416e-06,
"loss": 1.517781138420105,
"step": 3528
},
{
"epoch": 1.7106156083373727,
"grad_norm": 8.526164684624737,
"learning_rate": 4.65059247146491e-06,
"loss": 1.6598153114318848,
"step": 3529
},
{
"epoch": 1.711100339311682,
"grad_norm": 9.71620659586432,
"learning_rate": 4.64777932036851e-06,
"loss": 1.8393371105194092,
"step": 3530
},
{
"epoch": 1.7115850702859912,
"grad_norm": 9.509769879618306,
"learning_rate": 4.644966281320134e-06,
"loss": 1.2561402320861816,
"step": 3531
},
{
"epoch": 1.7120698012603004,
"grad_norm": 15.209489581381314,
"learning_rate": 4.642153355214659e-06,
"loss": 1.7660053968429565,
"step": 3532
},
{
"epoch": 1.7125545322346096,
"grad_norm": 11.63948345724553,
"learning_rate": 4.639340542946932e-06,
"loss": 1.6986900568008423,
"step": 3533
},
{
"epoch": 1.713039263208919,
"grad_norm": 8.494683910012958,
"learning_rate": 4.636527845411759e-06,
"loss": 1.173429250717163,
"step": 3534
},
{
"epoch": 1.7135239941832283,
"grad_norm": 11.77766470381075,
"learning_rate": 4.633715263503915e-06,
"loss": 1.8726043701171875,
"step": 3535
},
{
"epoch": 1.7140087251575375,
"grad_norm": 14.597692528400673,
"learning_rate": 4.630902798118135e-06,
"loss": 1.5998622179031372,
"step": 3536
},
{
"epoch": 1.7144934561318468,
"grad_norm": 9.547994796361808,
"learning_rate": 4.6280904501491156e-06,
"loss": 2.1583919525146484,
"step": 3537
},
{
"epoch": 1.7149781871061562,
"grad_norm": 16.242199473691034,
"learning_rate": 4.625278220491518e-06,
"loss": 2.3668665885925293,
"step": 3538
},
{
"epoch": 1.7154629180804655,
"grad_norm": 7.73676768764985,
"learning_rate": 4.622466110039963e-06,
"loss": 1.9065172672271729,
"step": 3539
},
{
"epoch": 1.7159476490547747,
"grad_norm": 14.938708735114192,
"learning_rate": 4.619654119689041e-06,
"loss": 1.7962863445281982,
"step": 3540
},
{
"epoch": 1.716432380029084,
"grad_norm": 12.134315098224283,
"learning_rate": 4.616842250333296e-06,
"loss": 1.4835119247436523,
"step": 3541
},
{
"epoch": 1.7169171110033932,
"grad_norm": 34.31934721176669,
"learning_rate": 4.6140305028672365e-06,
"loss": 3.681175708770752,
"step": 3542
},
{
"epoch": 1.7174018419777024,
"grad_norm": 9.197619452153504,
"learning_rate": 4.611218878185331e-06,
"loss": 2.099792003631592,
"step": 3543
},
{
"epoch": 1.7178865729520116,
"grad_norm": 18.94277621872456,
"learning_rate": 4.608407377182014e-06,
"loss": 1.0737311840057373,
"step": 3544
},
{
"epoch": 1.7183713039263209,
"grad_norm": 12.116784937596247,
"learning_rate": 4.6055960007516734e-06,
"loss": 1.8427119255065918,
"step": 3545
},
{
"epoch": 1.71885603490063,
"grad_norm": 11.343297536547173,
"learning_rate": 4.602784749788662e-06,
"loss": 1.455077886581421,
"step": 3546
},
{
"epoch": 1.7193407658749393,
"grad_norm": 8.203832886383042,
"learning_rate": 4.599973625187294e-06,
"loss": 1.5448791980743408,
"step": 3547
},
{
"epoch": 1.7198254968492486,
"grad_norm": 9.944184175663857,
"learning_rate": 4.597162627841834e-06,
"loss": 1.3303534984588623,
"step": 3548
},
{
"epoch": 1.7203102278235578,
"grad_norm": 18.204514007527795,
"learning_rate": 4.59435175864652e-06,
"loss": 1.9602077007293701,
"step": 3549
},
{
"epoch": 1.720794958797867,
"grad_norm": 8.071173810280243,
"learning_rate": 4.591541018495543e-06,
"loss": 1.626417636871338,
"step": 3550
},
{
"epoch": 1.7212796897721765,
"grad_norm": 8.718654075770656,
"learning_rate": 4.588730408283046e-06,
"loss": 1.0654566287994385,
"step": 3551
},
{
"epoch": 1.7217644207464857,
"grad_norm": 16.906820528694844,
"learning_rate": 4.585919928903142e-06,
"loss": 1.3972883224487305,
"step": 3552
},
{
"epoch": 1.722249151720795,
"grad_norm": 13.86242409003885,
"learning_rate": 4.583109581249894e-06,
"loss": 2.8147881031036377,
"step": 3553
},
{
"epoch": 1.7227338826951042,
"grad_norm": 12.18044304689337,
"learning_rate": 4.58029936621733e-06,
"loss": 1.1856569051742554,
"step": 3554
},
{
"epoch": 1.7232186136694134,
"grad_norm": 10.870567900250903,
"learning_rate": 4.577489284699429e-06,
"loss": 2.079906940460205,
"step": 3555
},
{
"epoch": 1.7237033446437229,
"grad_norm": 10.862841348795335,
"learning_rate": 4.574679337590133e-06,
"loss": 1.3669720888137817,
"step": 3556
},
{
"epoch": 1.7241880756180321,
"grad_norm": 11.335288187074536,
"learning_rate": 4.571869525783338e-06,
"loss": 1.6925151348114014,
"step": 3557
},
{
"epoch": 1.7246728065923413,
"grad_norm": 16.35046538838455,
"learning_rate": 4.569059850172898e-06,
"loss": 1.545648455619812,
"step": 3558
},
{
"epoch": 1.7251575375666506,
"grad_norm": 19.34870460513142,
"learning_rate": 4.566250311652625e-06,
"loss": 1.939577341079712,
"step": 3559
},
{
"epoch": 1.7256422685409598,
"grad_norm": 13.101637541300317,
"learning_rate": 4.563440911116283e-06,
"loss": 1.1206107139587402,
"step": 3560
},
{
"epoch": 1.726126999515269,
"grad_norm": 11.41805560809114,
"learning_rate": 4.560631649457599e-06,
"loss": 1.4958640336990356,
"step": 3561
},
{
"epoch": 1.7266117304895783,
"grad_norm": 14.761162546877001,
"learning_rate": 4.557822527570248e-06,
"loss": 1.7024421691894531,
"step": 3562
},
{
"epoch": 1.7270964614638875,
"grad_norm": 14.841742201635196,
"learning_rate": 4.555013546347868e-06,
"loss": 1.8249473571777344,
"step": 3563
},
{
"epoch": 1.7275811924381967,
"grad_norm": 14.82868000010263,
"learning_rate": 4.552204706684047e-06,
"loss": 1.5369750261306763,
"step": 3564
},
{
"epoch": 1.728065923412506,
"grad_norm": 18.16953893374326,
"learning_rate": 4.549396009472331e-06,
"loss": 1.6620410680770874,
"step": 3565
},
{
"epoch": 1.7285506543868152,
"grad_norm": 11.696738528290886,
"learning_rate": 4.546587455606218e-06,
"loss": 1.2067139148712158,
"step": 3566
},
{
"epoch": 1.7290353853611244,
"grad_norm": 9.84445586449876,
"learning_rate": 4.543779045979159e-06,
"loss": 1.552353858947754,
"step": 3567
},
{
"epoch": 1.7295201163354337,
"grad_norm": 14.477528724996553,
"learning_rate": 4.540970781484568e-06,
"loss": 2.128376007080078,
"step": 3568
},
{
"epoch": 1.7300048473097431,
"grad_norm": 12.091127833703265,
"learning_rate": 4.5381626630158046e-06,
"loss": 1.7524771690368652,
"step": 3569
},
{
"epoch": 1.7304895782840524,
"grad_norm": 9.772365074134326,
"learning_rate": 4.535354691466181e-06,
"loss": 1.7921123504638672,
"step": 3570
},
{
"epoch": 1.7309743092583616,
"grad_norm": 11.019851884091837,
"learning_rate": 4.532546867728968e-06,
"loss": 1.3780815601348877,
"step": 3571
},
{
"epoch": 1.7314590402326708,
"grad_norm": 10.548340452876456,
"learning_rate": 4.52973919269739e-06,
"loss": 1.5101526975631714,
"step": 3572
},
{
"epoch": 1.7319437712069803,
"grad_norm": 10.622178663611436,
"learning_rate": 4.526931667264617e-06,
"loss": 1.4917762279510498,
"step": 3573
},
{
"epoch": 1.7324285021812895,
"grad_norm": 9.14905079970047,
"learning_rate": 4.524124292323777e-06,
"loss": 1.7143943309783936,
"step": 3574
},
{
"epoch": 1.7329132331555988,
"grad_norm": 10.649489198399428,
"learning_rate": 4.521317068767949e-06,
"loss": 2.024395704269409,
"step": 3575
},
{
"epoch": 1.733397964129908,
"grad_norm": 14.965126747338651,
"learning_rate": 4.518509997490162e-06,
"loss": 1.8436052799224854,
"step": 3576
},
{
"epoch": 1.7338826951042172,
"grad_norm": 13.83625800238117,
"learning_rate": 4.5157030793834015e-06,
"loss": 1.8558815717697144,
"step": 3577
},
{
"epoch": 1.7343674260785265,
"grad_norm": 12.515551545085843,
"learning_rate": 4.512896315340598e-06,
"loss": 1.3925811052322388,
"step": 3578
},
{
"epoch": 1.7348521570528357,
"grad_norm": 10.290848648867948,
"learning_rate": 4.5100897062546375e-06,
"loss": 1.194291591644287,
"step": 3579
},
{
"epoch": 1.735336888027145,
"grad_norm": 9.708801416225826,
"learning_rate": 4.507283253018355e-06,
"loss": 1.833857774734497,
"step": 3580
},
{
"epoch": 1.7358216190014542,
"grad_norm": 15.966358316504083,
"learning_rate": 4.504476956524534e-06,
"loss": 2.1976101398468018,
"step": 3581
},
{
"epoch": 1.7363063499757634,
"grad_norm": 8.736480248050706,
"learning_rate": 4.5016708176659135e-06,
"loss": 1.810282588005066,
"step": 3582
},
{
"epoch": 1.7367910809500726,
"grad_norm": 14.884347229613256,
"learning_rate": 4.498864837335177e-06,
"loss": 0.9330735802650452,
"step": 3583
},
{
"epoch": 1.7372758119243819,
"grad_norm": 13.479775544601043,
"learning_rate": 4.496059016424961e-06,
"loss": 1.2803133726119995,
"step": 3584
},
{
"epoch": 1.737760542898691,
"grad_norm": 10.81972778710382,
"learning_rate": 4.493253355827846e-06,
"loss": 1.3631963729858398,
"step": 3585
},
{
"epoch": 1.7382452738730003,
"grad_norm": 12.10593922847017,
"learning_rate": 4.49044785643637e-06,
"loss": 2.0277295112609863,
"step": 3586
},
{
"epoch": 1.7387300048473098,
"grad_norm": 10.63099245503113,
"learning_rate": 4.487642519143015e-06,
"loss": 1.4958107471466064,
"step": 3587
},
{
"epoch": 1.739214735821619,
"grad_norm": 10.674215695644904,
"learning_rate": 4.484837344840209e-06,
"loss": 1.6056259870529175,
"step": 3588
},
{
"epoch": 1.7396994667959282,
"grad_norm": 12.560434621859761,
"learning_rate": 4.482032334420332e-06,
"loss": 1.6043020486831665,
"step": 3589
},
{
"epoch": 1.7401841977702375,
"grad_norm": 8.335344157419172,
"learning_rate": 4.479227488775707e-06,
"loss": 2.11765718460083,
"step": 3590
},
{
"epoch": 1.740668928744547,
"grad_norm": 5.889543822097004,
"learning_rate": 4.4764228087986155e-06,
"loss": 1.4100923538208008,
"step": 3591
},
{
"epoch": 1.7411536597188562,
"grad_norm": 13.997749063627941,
"learning_rate": 4.473618295381273e-06,
"loss": 2.023439884185791,
"step": 3592
},
{
"epoch": 1.7416383906931654,
"grad_norm": 9.752905109867626,
"learning_rate": 4.47081394941585e-06,
"loss": 1.2311660051345825,
"step": 3593
},
{
"epoch": 1.7421231216674746,
"grad_norm": 13.159230515812121,
"learning_rate": 4.468009771794462e-06,
"loss": 1.5862916707992554,
"step": 3594
},
{
"epoch": 1.7426078526417839,
"grad_norm": 11.397591382302982,
"learning_rate": 4.465205763409169e-06,
"loss": 2.178180694580078,
"step": 3595
},
{
"epoch": 1.743092583616093,
"grad_norm": 7.887727240906609,
"learning_rate": 4.4624019251519805e-06,
"loss": 1.4033215045928955,
"step": 3596
},
{
"epoch": 1.7435773145904023,
"grad_norm": 15.570902550772562,
"learning_rate": 4.45959825791485e-06,
"loss": 1.158086895942688,
"step": 3597
},
{
"epoch": 1.7440620455647116,
"grad_norm": 10.947534295269978,
"learning_rate": 4.456794762589676e-06,
"loss": 1.4473536014556885,
"step": 3598
},
{
"epoch": 1.7445467765390208,
"grad_norm": 17.18906595423635,
"learning_rate": 4.4539914400683025e-06,
"loss": 2.4496471881866455,
"step": 3599
},
{
"epoch": 1.74503150751333,
"grad_norm": 11.099928319364755,
"learning_rate": 4.4511882912425214e-06,
"loss": 0.9094204902648926,
"step": 3600
},
{
"epoch": 1.7455162384876393,
"grad_norm": 10.197041675556333,
"learning_rate": 4.448385317004065e-06,
"loss": 1.9797378778457642,
"step": 3601
},
{
"epoch": 1.7460009694619485,
"grad_norm": 57.93878831841946,
"learning_rate": 4.4455825182446135e-06,
"loss": 1.4761810302734375,
"step": 3602
},
{
"epoch": 1.7464857004362577,
"grad_norm": 9.487150631511842,
"learning_rate": 4.442779895855791e-06,
"loss": 1.4360229969024658,
"step": 3603
},
{
"epoch": 1.7469704314105672,
"grad_norm": 13.476302028947735,
"learning_rate": 4.439977450729158e-06,
"loss": 1.4644253253936768,
"step": 3604
},
{
"epoch": 1.7474551623848764,
"grad_norm": 17.081392242897415,
"learning_rate": 4.437175183756233e-06,
"loss": 1.129432201385498,
"step": 3605
},
{
"epoch": 1.7479398933591856,
"grad_norm": 7.660898001677902,
"learning_rate": 4.434373095828467e-06,
"loss": 1.7715790271759033,
"step": 3606
},
{
"epoch": 1.7484246243334949,
"grad_norm": 20.89784387554245,
"learning_rate": 4.431571187837255e-06,
"loss": 1.5449835062026978,
"step": 3607
},
{
"epoch": 1.7489093553078041,
"grad_norm": 10.513708658231975,
"learning_rate": 4.428769460673937e-06,
"loss": 1.4615668058395386,
"step": 3608
},
{
"epoch": 1.7493940862821136,
"grad_norm": 12.241000992354893,
"learning_rate": 4.425967915229795e-06,
"loss": 1.709411382675171,
"step": 3609
},
{
"epoch": 1.7498788172564228,
"grad_norm": 7.403035499684961,
"learning_rate": 4.4231665523960574e-06,
"loss": 2.1436305046081543,
"step": 3610
},
{
"epoch": 1.750363548230732,
"grad_norm": 18.017204569436863,
"learning_rate": 4.420365373063887e-06,
"loss": 1.4507575035095215,
"step": 3611
},
{
"epoch": 1.7508482792050413,
"grad_norm": 12.714120492530796,
"learning_rate": 4.417564378124391e-06,
"loss": 0.8132778406143188,
"step": 3612
},
{
"epoch": 1.7513330101793505,
"grad_norm": 17.53448373754589,
"learning_rate": 4.414763568468619e-06,
"loss": 1.7324559688568115,
"step": 3613
},
{
"epoch": 1.7518177411536597,
"grad_norm": 17.242211333740954,
"learning_rate": 4.411962944987562e-06,
"loss": 1.609053611755371,
"step": 3614
},
{
"epoch": 1.752302472127969,
"grad_norm": 8.98974664839641,
"learning_rate": 4.409162508572151e-06,
"loss": 1.8061102628707886,
"step": 3615
},
{
"epoch": 1.7527872031022782,
"grad_norm": 16.88331265739437,
"learning_rate": 4.406362260113257e-06,
"loss": 1.773357629776001,
"step": 3616
},
{
"epoch": 1.7532719340765874,
"grad_norm": 10.926543289119197,
"learning_rate": 4.403562200501693e-06,
"loss": 1.4704374074935913,
"step": 3617
},
{
"epoch": 1.7537566650508967,
"grad_norm": 16.935387694961616,
"learning_rate": 4.4007623306282086e-06,
"loss": 1.6495808362960815,
"step": 3618
},
{
"epoch": 1.754241396025206,
"grad_norm": 9.886314743701558,
"learning_rate": 4.397962651383495e-06,
"loss": 1.4660115242004395,
"step": 3619
},
{
"epoch": 1.7547261269995151,
"grad_norm": 10.52994081209734,
"learning_rate": 4.395163163658186e-06,
"loss": 1.2977604866027832,
"step": 3620
},
{
"epoch": 1.7552108579738244,
"grad_norm": 12.182660468185473,
"learning_rate": 4.392363868342848e-06,
"loss": 3.315325975418091,
"step": 3621
},
{
"epoch": 1.7556955889481338,
"grad_norm": 13.089187153519454,
"learning_rate": 4.389564766327992e-06,
"loss": 1.7570114135742188,
"step": 3622
},
{
"epoch": 1.756180319922443,
"grad_norm": 12.085327996033048,
"learning_rate": 4.3867658585040605e-06,
"loss": 2.2507286071777344,
"step": 3623
},
{
"epoch": 1.7566650508967523,
"grad_norm": 8.646141497218506,
"learning_rate": 4.383967145761443e-06,
"loss": 1.5823016166687012,
"step": 3624
},
{
"epoch": 1.7571497818710615,
"grad_norm": 8.394415846023177,
"learning_rate": 4.381168628990463e-06,
"loss": 1.328770637512207,
"step": 3625
},
{
"epoch": 1.757634512845371,
"grad_norm": 13.318698402517294,
"learning_rate": 4.37837030908138e-06,
"loss": 1.3626353740692139,
"step": 3626
},
{
"epoch": 1.7581192438196802,
"grad_norm": 14.084553704108695,
"learning_rate": 4.375572186924388e-06,
"loss": 1.685867190361023,
"step": 3627
},
{
"epoch": 1.7586039747939894,
"grad_norm": 10.63819545317599,
"learning_rate": 4.3727742634096304e-06,
"loss": 2.091759204864502,
"step": 3628
},
{
"epoch": 1.7590887057682987,
"grad_norm": 10.39095959640523,
"learning_rate": 4.369976539427173e-06,
"loss": 1.585899829864502,
"step": 3629
},
{
"epoch": 1.759573436742608,
"grad_norm": 11.514563691829895,
"learning_rate": 4.367179015867028e-06,
"loss": 1.4735686779022217,
"step": 3630
},
{
"epoch": 1.7600581677169171,
"grad_norm": 16.63719533703477,
"learning_rate": 4.364381693619138e-06,
"loss": 1.7868287563323975,
"step": 3631
},
{
"epoch": 1.7605428986912264,
"grad_norm": 8.138442271345543,
"learning_rate": 4.361584573573384e-06,
"loss": 1.438446283340454,
"step": 3632
},
{
"epoch": 1.7610276296655356,
"grad_norm": 13.504024425442696,
"learning_rate": 4.358787656619584e-06,
"loss": 1.498321771621704,
"step": 3633
},
{
"epoch": 1.7615123606398448,
"grad_norm": 11.70511491529799,
"learning_rate": 4.355990943647488e-06,
"loss": 1.3285961151123047,
"step": 3634
},
{
"epoch": 1.761997091614154,
"grad_norm": 10.707475426848745,
"learning_rate": 4.3531944355467855e-06,
"loss": 1.6180756092071533,
"step": 3635
},
{
"epoch": 1.7624818225884633,
"grad_norm": 11.395794334898438,
"learning_rate": 4.350398133207096e-06,
"loss": 1.7790104150772095,
"step": 3636
},
{
"epoch": 1.7629665535627725,
"grad_norm": 13.896371482774237,
"learning_rate": 4.347602037517975e-06,
"loss": 1.7424697875976562,
"step": 3637
},
{
"epoch": 1.7634512845370818,
"grad_norm": 13.190253865394293,
"learning_rate": 4.344806149368917e-06,
"loss": 1.5244466066360474,
"step": 3638
},
{
"epoch": 1.7639360155113912,
"grad_norm": 11.132135853684433,
"learning_rate": 4.342010469649344e-06,
"loss": 1.8901088237762451,
"step": 3639
},
{
"epoch": 1.7644207464857005,
"grad_norm": 11.077968277001478,
"learning_rate": 4.339214999248614e-06,
"loss": 1.4957873821258545,
"step": 3640
},
{
"epoch": 1.7649054774600097,
"grad_norm": 12.621010318353887,
"learning_rate": 4.336419739056019e-06,
"loss": 1.7172781229019165,
"step": 3641
},
{
"epoch": 1.765390208434319,
"grad_norm": 19.956232611041646,
"learning_rate": 4.333624689960785e-06,
"loss": 1.2428683042526245,
"step": 3642
},
{
"epoch": 1.7658749394086282,
"grad_norm": 9.840114853450501,
"learning_rate": 4.330829852852069e-06,
"loss": 1.4166439771652222,
"step": 3643
},
{
"epoch": 1.7663596703829376,
"grad_norm": 21.33456401495,
"learning_rate": 4.328035228618962e-06,
"loss": 1.7611212730407715,
"step": 3644
},
{
"epoch": 1.7668444013572469,
"grad_norm": 12.669204478517223,
"learning_rate": 4.325240818150485e-06,
"loss": 1.3352839946746826,
"step": 3645
},
{
"epoch": 1.767329132331556,
"grad_norm": 9.956431567702998,
"learning_rate": 4.32244662233559e-06,
"loss": 1.8803012371063232,
"step": 3646
},
{
"epoch": 1.7678138633058653,
"grad_norm": 12.254918154711591,
"learning_rate": 4.31965264206317e-06,
"loss": 1.3153480291366577,
"step": 3647
},
{
"epoch": 1.7682985942801746,
"grad_norm": 17.14414587207317,
"learning_rate": 4.316858878222039e-06,
"loss": 1.9313719272613525,
"step": 3648
},
{
"epoch": 1.7687833252544838,
"grad_norm": 15.385588827129633,
"learning_rate": 4.314065331700945e-06,
"loss": 2.4319169521331787,
"step": 3649
},
{
"epoch": 1.769268056228793,
"grad_norm": 17.033115282658446,
"learning_rate": 4.311272003388569e-06,
"loss": 1.4824252128601074,
"step": 3650
},
{
"epoch": 1.7697527872031023,
"grad_norm": 10.79904369441906,
"learning_rate": 4.308478894173519e-06,
"loss": 1.3559365272521973,
"step": 3651
},
{
"epoch": 1.7702375181774115,
"grad_norm": 28.27964545657713,
"learning_rate": 4.305686004944339e-06,
"loss": 2.3489835262298584,
"step": 3652
},
{
"epoch": 1.7707222491517207,
"grad_norm": 8.751406003337625,
"learning_rate": 4.302893336589498e-06,
"loss": 1.3686087131500244,
"step": 3653
},
{
"epoch": 1.77120698012603,
"grad_norm": 15.137366441145536,
"learning_rate": 4.300100889997396e-06,
"loss": 1.6254873275756836,
"step": 3654
},
{
"epoch": 1.7716917111003392,
"grad_norm": 14.888510398324097,
"learning_rate": 4.297308666056363e-06,
"loss": 1.4069033861160278,
"step": 3655
},
{
"epoch": 1.7721764420746484,
"grad_norm": 12.009981877229613,
"learning_rate": 4.294516665654658e-06,
"loss": 1.375983715057373,
"step": 3656
},
{
"epoch": 1.7726611730489579,
"grad_norm": 14.712767908219474,
"learning_rate": 4.291724889680469e-06,
"loss": 2.0046839714050293,
"step": 3657
},
{
"epoch": 1.773145904023267,
"grad_norm": 14.479515127923246,
"learning_rate": 4.288933339021912e-06,
"loss": 1.1344040632247925,
"step": 3658
},
{
"epoch": 1.7736306349975763,
"grad_norm": 14.428998612661564,
"learning_rate": 4.286142014567034e-06,
"loss": 1.5071690082550049,
"step": 3659
},
{
"epoch": 1.7741153659718856,
"grad_norm": 17.138273108865494,
"learning_rate": 4.283350917203802e-06,
"loss": 2.0417094230651855,
"step": 3660
},
{
"epoch": 1.774600096946195,
"grad_norm": 9.577632616479407,
"learning_rate": 4.280560047820123e-06,
"loss": 1.4199309349060059,
"step": 3661
},
{
"epoch": 1.7750848279205043,
"grad_norm": 9.38242090273378,
"learning_rate": 4.277769407303823e-06,
"loss": 2.6770167350769043,
"step": 3662
},
{
"epoch": 1.7755695588948135,
"grad_norm": 11.25197547183548,
"learning_rate": 4.274978996542657e-06,
"loss": 1.8788301944732666,
"step": 3663
},
{
"epoch": 1.7760542898691227,
"grad_norm": 10.463448005824828,
"learning_rate": 4.2721888164243055e-06,
"loss": 1.6072955131530762,
"step": 3664
},
{
"epoch": 1.776539020843432,
"grad_norm": 9.55204011719622,
"learning_rate": 4.269398867836377e-06,
"loss": 1.578052282333374,
"step": 3665
},
{
"epoch": 1.7770237518177412,
"grad_norm": 12.909230425279574,
"learning_rate": 4.266609151666411e-06,
"loss": 1.5048332214355469,
"step": 3666
},
{
"epoch": 1.7775084827920504,
"grad_norm": 14.546789148017927,
"learning_rate": 4.263819668801865e-06,
"loss": 1.7558023929595947,
"step": 3667
},
{
"epoch": 1.7779932137663597,
"grad_norm": 7.065210781910312,
"learning_rate": 4.261030420130127e-06,
"loss": 1.4009593725204468,
"step": 3668
},
{
"epoch": 1.778477944740669,
"grad_norm": 8.274699588200523,
"learning_rate": 4.2582414065385095e-06,
"loss": 2.3353748321533203,
"step": 3669
},
{
"epoch": 1.7789626757149781,
"grad_norm": 11.747583055189963,
"learning_rate": 4.255452628914248e-06,
"loss": 1.0544579029083252,
"step": 3670
},
{
"epoch": 1.7794474066892874,
"grad_norm": 13.633929428049582,
"learning_rate": 4.252664088144509e-06,
"loss": 2.059731960296631,
"step": 3671
},
{
"epoch": 1.7799321376635966,
"grad_norm": 8.059154088579184,
"learning_rate": 4.249875785116378e-06,
"loss": 1.3257054090499878,
"step": 3672
},
{
"epoch": 1.7804168686379058,
"grad_norm": 11.898599245319017,
"learning_rate": 4.247087720716866e-06,
"loss": 1.3900198936462402,
"step": 3673
},
{
"epoch": 1.780901599612215,
"grad_norm": 16.303528315594953,
"learning_rate": 4.244299895832908e-06,
"loss": 1.5687198638916016,
"step": 3674
},
{
"epoch": 1.7813863305865245,
"grad_norm": 8.891800858188002,
"learning_rate": 4.2415123113513665e-06,
"loss": 1.115545392036438,
"step": 3675
},
{
"epoch": 1.7818710615608337,
"grad_norm": 10.508273171548387,
"learning_rate": 4.238724968159022e-06,
"loss": 1.6661266088485718,
"step": 3676
},
{
"epoch": 1.782355792535143,
"grad_norm": 9.652630560922818,
"learning_rate": 4.235937867142582e-06,
"loss": 1.3842214345932007,
"step": 3677
},
{
"epoch": 1.7828405235094522,
"grad_norm": 11.342754123927657,
"learning_rate": 4.233151009188677e-06,
"loss": 1.9586501121520996,
"step": 3678
},
{
"epoch": 1.7833252544837617,
"grad_norm": 9.311744474548707,
"learning_rate": 4.230364395183853e-06,
"loss": 0.650603711605072,
"step": 3679
},
{
"epoch": 1.783809985458071,
"grad_norm": 13.42311570612274,
"learning_rate": 4.22757802601459e-06,
"loss": 1.6106181144714355,
"step": 3680
},
{
"epoch": 1.7842947164323801,
"grad_norm": 11.24183035015063,
"learning_rate": 4.224791902567285e-06,
"loss": 1.533249855041504,
"step": 3681
},
{
"epoch": 1.7847794474066894,
"grad_norm": 9.42824945342648,
"learning_rate": 4.222006025728253e-06,
"loss": 1.7668535709381104,
"step": 3682
},
{
"epoch": 1.7852641783809986,
"grad_norm": 7.4193518045719005,
"learning_rate": 4.219220396383736e-06,
"loss": 1.750149130821228,
"step": 3683
},
{
"epoch": 1.7857489093553078,
"grad_norm": 18.669821245086084,
"learning_rate": 4.21643501541989e-06,
"loss": 1.0534980297088623,
"step": 3684
},
{
"epoch": 1.786233640329617,
"grad_norm": 9.537852230784667,
"learning_rate": 4.213649883722806e-06,
"loss": 2.436964273452759,
"step": 3685
},
{
"epoch": 1.7867183713039263,
"grad_norm": 11.575113218335328,
"learning_rate": 4.2108650021784805e-06,
"loss": 1.82785964012146,
"step": 3686
},
{
"epoch": 1.7872031022782355,
"grad_norm": 8.937870017579147,
"learning_rate": 4.208080371672838e-06,
"loss": 1.5567359924316406,
"step": 3687
},
{
"epoch": 1.7876878332525448,
"grad_norm": 8.42653875765951,
"learning_rate": 4.2052959930917205e-06,
"loss": 1.9590201377868652,
"step": 3688
},
{
"epoch": 1.788172564226854,
"grad_norm": 10.3237760676339,
"learning_rate": 4.202511867320894e-06,
"loss": 1.4386303424835205,
"step": 3689
},
{
"epoch": 1.7886572952011632,
"grad_norm": 17.351810033551974,
"learning_rate": 4.199727995246041e-06,
"loss": 1.2756080627441406,
"step": 3690
},
{
"epoch": 1.7891420261754725,
"grad_norm": 21.84957888113295,
"learning_rate": 4.1969443777527615e-06,
"loss": 1.874251127243042,
"step": 3691
},
{
"epoch": 1.789626757149782,
"grad_norm": 12.866826449701817,
"learning_rate": 4.194161015726577e-06,
"loss": 2.210601329803467,
"step": 3692
},
{
"epoch": 1.7901114881240912,
"grad_norm": 11.131667162623732,
"learning_rate": 4.1913779100529285e-06,
"loss": 1.682827353477478,
"step": 3693
},
{
"epoch": 1.7905962190984004,
"grad_norm": 12.296185957378936,
"learning_rate": 4.188595061617173e-06,
"loss": 2.3948755264282227,
"step": 3694
},
{
"epoch": 1.7910809500727096,
"grad_norm": 9.571880064242288,
"learning_rate": 4.185812471304589e-06,
"loss": 1.4596314430236816,
"step": 3695
},
{
"epoch": 1.7915656810470189,
"grad_norm": 12.922248552297877,
"learning_rate": 4.1830301400003685e-06,
"loss": 1.8211833238601685,
"step": 3696
},
{
"epoch": 1.7920504120213283,
"grad_norm": 10.874198973017373,
"learning_rate": 4.180248068589626e-06,
"loss": 3.5587663650512695,
"step": 3697
},
{
"epoch": 1.7925351429956375,
"grad_norm": 13.655927325956325,
"learning_rate": 4.1774662579573845e-06,
"loss": 1.4736275672912598,
"step": 3698
},
{
"epoch": 1.7930198739699468,
"grad_norm": 12.655208376189865,
"learning_rate": 4.1746847089885984e-06,
"loss": 1.2672220468521118,
"step": 3699
},
{
"epoch": 1.793504604944256,
"grad_norm": 12.730682008298054,
"learning_rate": 4.171903422568128e-06,
"loss": 1.962795615196228,
"step": 3700
},
{
"epoch": 1.7939893359185652,
"grad_norm": 7.025018424466,
"learning_rate": 4.1691223995807504e-06,
"loss": 1.2529321908950806,
"step": 3701
},
{
"epoch": 1.7944740668928745,
"grad_norm": 10.161885418249454,
"learning_rate": 4.16634164091116e-06,
"loss": 1.1998951435089111,
"step": 3702
},
{
"epoch": 1.7949587978671837,
"grad_norm": 8.307238743856344,
"learning_rate": 4.163561147443974e-06,
"loss": 1.0750868320465088,
"step": 3703
},
{
"epoch": 1.795443528841493,
"grad_norm": 8.020131973057367,
"learning_rate": 4.160780920063718e-06,
"loss": 1.5044105052947998,
"step": 3704
},
{
"epoch": 1.7959282598158022,
"grad_norm": 16.76089103810596,
"learning_rate": 4.158000959654833e-06,
"loss": 1.6345034837722778,
"step": 3705
},
{
"epoch": 1.7964129907901114,
"grad_norm": 6.555104866327527,
"learning_rate": 4.155221267101677e-06,
"loss": 0.9233959913253784,
"step": 3706
},
{
"epoch": 1.7968977217644206,
"grad_norm": 13.258684834693675,
"learning_rate": 4.152441843288522e-06,
"loss": 2.200833320617676,
"step": 3707
},
{
"epoch": 1.7973824527387299,
"grad_norm": 19.226018846726802,
"learning_rate": 4.149662689099556e-06,
"loss": 1.5393571853637695,
"step": 3708
},
{
"epoch": 1.797867183713039,
"grad_norm": 12.432518648147104,
"learning_rate": 4.1468838054188795e-06,
"loss": 1.918775200843811,
"step": 3709
},
{
"epoch": 1.7983519146873486,
"grad_norm": 11.698299863201916,
"learning_rate": 4.1441051931305095e-06,
"loss": 0.9305634498596191,
"step": 3710
},
{
"epoch": 1.7988366456616578,
"grad_norm": 6.562866867632027,
"learning_rate": 4.141326853118372e-06,
"loss": 0.712604284286499,
"step": 3711
},
{
"epoch": 1.799321376635967,
"grad_norm": 10.101829064703741,
"learning_rate": 4.1385487862663105e-06,
"loss": 1.3356056213378906,
"step": 3712
},
{
"epoch": 1.7998061076102763,
"grad_norm": 9.499191399725188,
"learning_rate": 4.135770993458082e-06,
"loss": 1.80926513671875,
"step": 3713
},
{
"epoch": 1.8002908385845857,
"grad_norm": 15.144747255133913,
"learning_rate": 4.1329934755773514e-06,
"loss": 1.7013288736343384,
"step": 3714
},
{
"epoch": 1.800775569558895,
"grad_norm": 11.455036403303648,
"learning_rate": 4.130216233507702e-06,
"loss": 1.7975797653198242,
"step": 3715
},
{
"epoch": 1.8012603005332042,
"grad_norm": 13.921320672577533,
"learning_rate": 4.127439268132624e-06,
"loss": 1.8846755027770996,
"step": 3716
},
{
"epoch": 1.8017450315075134,
"grad_norm": 13.30595154320593,
"learning_rate": 4.1246625803355266e-06,
"loss": 2.579927921295166,
"step": 3717
},
{
"epoch": 1.8022297624818227,
"grad_norm": 12.480655520078912,
"learning_rate": 4.121886170999722e-06,
"loss": 1.5983532667160034,
"step": 3718
},
{
"epoch": 1.8027144934561319,
"grad_norm": 11.37601181130405,
"learning_rate": 4.119110041008442e-06,
"loss": 1.4984403848648071,
"step": 3719
},
{
"epoch": 1.8031992244304411,
"grad_norm": 9.854491136571937,
"learning_rate": 4.116334191244823e-06,
"loss": 1.8213236331939697,
"step": 3720
},
{
"epoch": 1.8036839554047503,
"grad_norm": 9.198042910810448,
"learning_rate": 4.113558622591913e-06,
"loss": 1.4511713981628418,
"step": 3721
},
{
"epoch": 1.8041686863790596,
"grad_norm": 7.186246790984862,
"learning_rate": 4.11078333593268e-06,
"loss": 1.7804372310638428,
"step": 3722
},
{
"epoch": 1.8046534173533688,
"grad_norm": 12.06221605310869,
"learning_rate": 4.108008332149988e-06,
"loss": 1.8109028339385986,
"step": 3723
},
{
"epoch": 1.805138148327678,
"grad_norm": 9.543016062764915,
"learning_rate": 4.10523361212662e-06,
"loss": 1.9732664823532104,
"step": 3724
},
{
"epoch": 1.8056228793019873,
"grad_norm": 16.435433123389007,
"learning_rate": 4.102459176745267e-06,
"loss": 2.499666452407837,
"step": 3725
},
{
"epoch": 1.8061076102762965,
"grad_norm": 10.170439427092674,
"learning_rate": 4.0996850268885295e-06,
"loss": 1.492712378501892,
"step": 3726
},
{
"epoch": 1.8065923412506057,
"grad_norm": 60.112721427328296,
"learning_rate": 4.096911163438915e-06,
"loss": 1.3314521312713623,
"step": 3727
},
{
"epoch": 1.8070770722249152,
"grad_norm": 9.215614359459057,
"learning_rate": 4.0941375872788445e-06,
"loss": 1.4583919048309326,
"step": 3728
},
{
"epoch": 1.8075618031992244,
"grad_norm": 11.928002367430915,
"learning_rate": 4.091364299290641e-06,
"loss": 1.0973515510559082,
"step": 3729
},
{
"epoch": 1.8080465341735337,
"grad_norm": 9.515237503381998,
"learning_rate": 4.088591300356543e-06,
"loss": 1.7234880924224854,
"step": 3730
},
{
"epoch": 1.808531265147843,
"grad_norm": 22.778119405543237,
"learning_rate": 4.0858185913586915e-06,
"loss": 1.6476805210113525,
"step": 3731
},
{
"epoch": 1.8090159961221524,
"grad_norm": 8.097947765588424,
"learning_rate": 4.083046173179138e-06,
"loss": 1.3130159378051758,
"step": 3732
},
{
"epoch": 1.8095007270964616,
"grad_norm": 7.683225193481411,
"learning_rate": 4.080274046699841e-06,
"loss": 1.8140177726745605,
"step": 3733
},
{
"epoch": 1.8099854580707708,
"grad_norm": 10.072972642297593,
"learning_rate": 4.0775022128026665e-06,
"loss": 0.9570883512496948,
"step": 3734
},
{
"epoch": 1.81047018904508,
"grad_norm": 25.76167403540859,
"learning_rate": 4.074730672369386e-06,
"loss": 1.3602854013442993,
"step": 3735
},
{
"epoch": 1.8109549200193893,
"grad_norm": 9.497034222995179,
"learning_rate": 4.071959426281678e-06,
"loss": 1.4630851745605469,
"step": 3736
},
{
"epoch": 1.8114396509936985,
"grad_norm": 15.05320345937948,
"learning_rate": 4.069188475421131e-06,
"loss": 1.7668306827545166,
"step": 3737
},
{
"epoch": 1.8119243819680078,
"grad_norm": 14.807450145250591,
"learning_rate": 4.066417820669235e-06,
"loss": 1.6931331157684326,
"step": 3738
},
{
"epoch": 1.812409112942317,
"grad_norm": 11.782888317574868,
"learning_rate": 4.0636474629073865e-06,
"loss": 1.1880111694335938,
"step": 3739
},
{
"epoch": 1.8128938439166262,
"grad_norm": 9.235996159055949,
"learning_rate": 4.060877403016886e-06,
"loss": 1.655306100845337,
"step": 3740
},
{
"epoch": 1.8133785748909355,
"grad_norm": 17.242946032811627,
"learning_rate": 4.0581076418789485e-06,
"loss": 1.7742290496826172,
"step": 3741
},
{
"epoch": 1.8138633058652447,
"grad_norm": 14.301780758875573,
"learning_rate": 4.055338180374682e-06,
"loss": 2.0392141342163086,
"step": 3742
},
{
"epoch": 1.814348036839554,
"grad_norm": 8.145276005641687,
"learning_rate": 4.0525690193851056e-06,
"loss": 1.5631906986236572,
"step": 3743
},
{
"epoch": 1.8148327678138632,
"grad_norm": 10.08800629260178,
"learning_rate": 4.04980015979114e-06,
"loss": 1.2516242265701294,
"step": 3744
},
{
"epoch": 1.8153174987881726,
"grad_norm": 31.139247069397708,
"learning_rate": 4.047031602473613e-06,
"loss": 1.4843958616256714,
"step": 3745
},
{
"epoch": 1.8158022297624818,
"grad_norm": 7.674992833098177,
"learning_rate": 4.0442633483132565e-06,
"loss": 1.1570463180541992,
"step": 3746
},
{
"epoch": 1.816286960736791,
"grad_norm": 22.769621440298422,
"learning_rate": 4.041495398190702e-06,
"loss": 2.3770360946655273,
"step": 3747
},
{
"epoch": 1.8167716917111003,
"grad_norm": 11.677686573651549,
"learning_rate": 4.038727752986486e-06,
"loss": 1.4298577308654785,
"step": 3748
},
{
"epoch": 1.8172564226854095,
"grad_norm": 11.417480894796578,
"learning_rate": 4.035960413581048e-06,
"loss": 2.4383697509765625,
"step": 3749
},
{
"epoch": 1.817741153659719,
"grad_norm": 7.853672264330791,
"learning_rate": 4.033193380854733e-06,
"loss": 1.8938368558883667,
"step": 3750
},
{
"epoch": 1.8182258846340282,
"grad_norm": 10.830674797769275,
"learning_rate": 4.030426655687787e-06,
"loss": 1.2280088663101196,
"step": 3751
},
{
"epoch": 1.8187106156083375,
"grad_norm": 8.471668500956753,
"learning_rate": 4.027660238960354e-06,
"loss": 1.3665108680725098,
"step": 3752
},
{
"epoch": 1.8191953465826467,
"grad_norm": 7.055754709564754,
"learning_rate": 4.024894131552486e-06,
"loss": 1.5206105709075928,
"step": 3753
},
{
"epoch": 1.819680077556956,
"grad_norm": 10.114924878667376,
"learning_rate": 4.022128334344129e-06,
"loss": 1.6363009214401245,
"step": 3754
},
{
"epoch": 1.8201648085312652,
"grad_norm": 14.727813292951055,
"learning_rate": 4.019362848215141e-06,
"loss": 1.3666086196899414,
"step": 3755
},
{
"epoch": 1.8206495395055744,
"grad_norm": 24.737583865637465,
"learning_rate": 4.016597674045273e-06,
"loss": 3.5202527046203613,
"step": 3756
},
{
"epoch": 1.8211342704798836,
"grad_norm": 11.334927888163287,
"learning_rate": 4.013832812714179e-06,
"loss": 1.3444154262542725,
"step": 3757
},
{
"epoch": 1.8216190014541929,
"grad_norm": 10.116591477171154,
"learning_rate": 4.01106826510141e-06,
"loss": 1.4685707092285156,
"step": 3758
},
{
"epoch": 1.822103732428502,
"grad_norm": 12.319100098445402,
"learning_rate": 4.008304032086425e-06,
"loss": 0.875248908996582,
"step": 3759
},
{
"epoch": 1.8225884634028113,
"grad_norm": 8.459341076507751,
"learning_rate": 4.00554011454858e-06,
"loss": 2.103783369064331,
"step": 3760
},
{
"epoch": 1.8230731943771206,
"grad_norm": 9.504163844349044,
"learning_rate": 4.002776513367123e-06,
"loss": 1.4354588985443115,
"step": 3761
},
{
"epoch": 1.8235579253514298,
"grad_norm": 10.225852090922572,
"learning_rate": 4.000013229421213e-06,
"loss": 1.1800808906555176,
"step": 3762
},
{
"epoch": 1.8240426563257393,
"grad_norm": 10.99242169028264,
"learning_rate": 3.997250263589898e-06,
"loss": 1.4577085971832275,
"step": 3763
},
{
"epoch": 1.8245273873000485,
"grad_norm": 8.811650968228033,
"learning_rate": 3.994487616752132e-06,
"loss": 1.9783974885940552,
"step": 3764
},
{
"epoch": 1.8250121182743577,
"grad_norm": 8.997453355413786,
"learning_rate": 3.991725289786766e-06,
"loss": 1.5668283700942993,
"step": 3765
},
{
"epoch": 1.825496849248667,
"grad_norm": 9.346876375170993,
"learning_rate": 3.988963283572545e-06,
"loss": 1.1187801361083984,
"step": 3766
},
{
"epoch": 1.8259815802229764,
"grad_norm": 8.326803328456116,
"learning_rate": 3.986201598988118e-06,
"loss": 1.2908697128295898,
"step": 3767
},
{
"epoch": 1.8264663111972856,
"grad_norm": 18.82809717320864,
"learning_rate": 3.983440236912027e-06,
"loss": 1.7772873640060425,
"step": 3768
},
{
"epoch": 1.8269510421715949,
"grad_norm": 8.469945791153595,
"learning_rate": 3.980679198222715e-06,
"loss": 1.3602116107940674,
"step": 3769
},
{
"epoch": 1.827435773145904,
"grad_norm": 13.96717212707672,
"learning_rate": 3.977918483798519e-06,
"loss": 1.8045034408569336,
"step": 3770
},
{
"epoch": 1.8279205041202133,
"grad_norm": 10.563703125138899,
"learning_rate": 3.975158094517675e-06,
"loss": 1.9095211029052734,
"step": 3771
},
{
"epoch": 1.8284052350945226,
"grad_norm": 19.714256773449176,
"learning_rate": 3.9723980312583125e-06,
"loss": 1.8017765283584595,
"step": 3772
},
{
"epoch": 1.8288899660688318,
"grad_norm": 10.753528867753143,
"learning_rate": 3.969638294898462e-06,
"loss": 1.6712760925292969,
"step": 3773
},
{
"epoch": 1.829374697043141,
"grad_norm": 8.714255815507894,
"learning_rate": 3.9668788863160466e-06,
"loss": 1.421858310699463,
"step": 3774
},
{
"epoch": 1.8298594280174503,
"grad_norm": 8.315037334581802,
"learning_rate": 3.964119806388887e-06,
"loss": 0.9889470934867859,
"step": 3775
},
{
"epoch": 1.8303441589917595,
"grad_norm": 13.26135029321593,
"learning_rate": 3.961361055994698e-06,
"loss": 1.4065864086151123,
"step": 3776
},
{
"epoch": 1.8308288899660687,
"grad_norm": 6.657852517367023,
"learning_rate": 3.9586026360110855e-06,
"loss": 1.1384623050689697,
"step": 3777
},
{
"epoch": 1.831313620940378,
"grad_norm": 10.658353607280269,
"learning_rate": 3.955844547315562e-06,
"loss": 1.7764381170272827,
"step": 3778
},
{
"epoch": 1.8317983519146872,
"grad_norm": 9.86979733904879,
"learning_rate": 3.953086790785525e-06,
"loss": 2.155393600463867,
"step": 3779
},
{
"epoch": 1.8322830828889967,
"grad_norm": 14.643722692957343,
"learning_rate": 3.950329367298268e-06,
"loss": 2.23819899559021,
"step": 3780
},
{
"epoch": 1.832767813863306,
"grad_norm": 9.73484221143393,
"learning_rate": 3.947572277730978e-06,
"loss": 1.6925466060638428,
"step": 3781
},
{
"epoch": 1.8332525448376151,
"grad_norm": 9.78491594894027,
"learning_rate": 3.944815522960738e-06,
"loss": 1.2990046739578247,
"step": 3782
},
{
"epoch": 1.8337372758119244,
"grad_norm": 12.433302740462029,
"learning_rate": 3.942059103864524e-06,
"loss": 1.3794564008712769,
"step": 3783
},
{
"epoch": 1.8342220067862336,
"grad_norm": 9.92894072002093,
"learning_rate": 3.939303021319205e-06,
"loss": 1.7950350046157837,
"step": 3784
},
{
"epoch": 1.834706737760543,
"grad_norm": 11.591057863277735,
"learning_rate": 3.936547276201542e-06,
"loss": 1.5944920778274536,
"step": 3785
},
{
"epoch": 1.8351914687348523,
"grad_norm": 12.47545320159593,
"learning_rate": 3.933791869388188e-06,
"loss": 3.061401128768921,
"step": 3786
},
{
"epoch": 1.8356761997091615,
"grad_norm": 13.280590522735768,
"learning_rate": 3.931036801755693e-06,
"loss": 2.0348942279815674,
"step": 3787
},
{
"epoch": 1.8361609306834707,
"grad_norm": 21.85142215236257,
"learning_rate": 3.928282074180494e-06,
"loss": 1.855841040611267,
"step": 3788
},
{
"epoch": 1.83664566165778,
"grad_norm": 9.630180543078714,
"learning_rate": 3.925527687538922e-06,
"loss": 2.0324370861053467,
"step": 3789
},
{
"epoch": 1.8371303926320892,
"grad_norm": 12.402859389465828,
"learning_rate": 3.9227736427071995e-06,
"loss": 1.6489412784576416,
"step": 3790
},
{
"epoch": 1.8376151236063984,
"grad_norm": 13.634232139525652,
"learning_rate": 3.920019940561437e-06,
"loss": 1.3916902542114258,
"step": 3791
},
{
"epoch": 1.8380998545807077,
"grad_norm": 7.7111174244045335,
"learning_rate": 3.917266581977643e-06,
"loss": 1.62168288230896,
"step": 3792
},
{
"epoch": 1.838584585555017,
"grad_norm": 10.79760442347353,
"learning_rate": 3.914513567831711e-06,
"loss": 0.9355969429016113,
"step": 3793
},
{
"epoch": 1.8390693165293261,
"grad_norm": 13.766566652236824,
"learning_rate": 3.911760898999428e-06,
"loss": 2.368802547454834,
"step": 3794
},
{
"epoch": 1.8395540475036354,
"grad_norm": 9.989447058587505,
"learning_rate": 3.909008576356467e-06,
"loss": 1.9037365913391113,
"step": 3795
},
{
"epoch": 1.8400387784779446,
"grad_norm": 6.067750370377916,
"learning_rate": 3.906256600778392e-06,
"loss": 1.1181681156158447,
"step": 3796
},
{
"epoch": 1.8405235094522538,
"grad_norm": 7.622957784255769,
"learning_rate": 3.903504973140664e-06,
"loss": 1.558342695236206,
"step": 3797
},
{
"epoch": 1.8410082404265633,
"grad_norm": 9.358715118391993,
"learning_rate": 3.900753694318626e-06,
"loss": 1.901029348373413,
"step": 3798
},
{
"epoch": 1.8414929714008725,
"grad_norm": 9.886341420384696,
"learning_rate": 3.898002765187509e-06,
"loss": 1.6478240489959717,
"step": 3799
},
{
"epoch": 1.8419777023751818,
"grad_norm": 9.490638372852942,
"learning_rate": 3.895252186622433e-06,
"loss": 1.8067935705184937,
"step": 3800
},
{
"epoch": 1.842462433349491,
"grad_norm": 16.71560404819666,
"learning_rate": 3.892501959498416e-06,
"loss": 1.612290620803833,
"step": 3801
},
{
"epoch": 1.8429471643238002,
"grad_norm": 15.833890449197547,
"learning_rate": 3.889752084690353e-06,
"loss": 1.1467247009277344,
"step": 3802
},
{
"epoch": 1.8434318952981097,
"grad_norm": 7.146268597237893,
"learning_rate": 3.887002563073029e-06,
"loss": 1.3569939136505127,
"step": 3803
},
{
"epoch": 1.843916626272419,
"grad_norm": 14.576991142966707,
"learning_rate": 3.884253395521122e-06,
"loss": 1.547729730606079,
"step": 3804
},
{
"epoch": 1.8444013572467282,
"grad_norm": 12.157587134420627,
"learning_rate": 3.8815045829091915e-06,
"loss": 1.9924629926681519,
"step": 3805
},
{
"epoch": 1.8448860882210374,
"grad_norm": 15.151242191823822,
"learning_rate": 3.8787561261116876e-06,
"loss": 1.370276689529419,
"step": 3806
},
{
"epoch": 1.8453708191953466,
"grad_norm": 10.705200754119602,
"learning_rate": 3.876008026002947e-06,
"loss": 1.3334518671035767,
"step": 3807
},
{
"epoch": 1.8458555501696559,
"grad_norm": 8.643439443492571,
"learning_rate": 3.87326028345719e-06,
"loss": 1.4470396041870117,
"step": 3808
},
{
"epoch": 1.846340281143965,
"grad_norm": 9.395144057619849,
"learning_rate": 3.870512899348526e-06,
"loss": 1.7511154413223267,
"step": 3809
},
{
"epoch": 1.8468250121182743,
"grad_norm": 15.107550013834276,
"learning_rate": 3.867765874550949e-06,
"loss": 0.9291616082191467,
"step": 3810
},
{
"epoch": 1.8473097430925836,
"grad_norm": 9.075170897235848,
"learning_rate": 3.865019209938341e-06,
"loss": 1.6716485023498535,
"step": 3811
},
{
"epoch": 1.8477944740668928,
"grad_norm": 11.510767182146923,
"learning_rate": 3.862272906384467e-06,
"loss": 1.670668363571167,
"step": 3812
},
{
"epoch": 1.848279205041202,
"grad_norm": 11.202895394091387,
"learning_rate": 3.859526964762978e-06,
"loss": 1.7871921062469482,
"step": 3813
},
{
"epoch": 1.8487639360155113,
"grad_norm": 9.760437849360805,
"learning_rate": 3.856781385947405e-06,
"loss": 1.2092647552490234,
"step": 3814
},
{
"epoch": 1.8492486669898205,
"grad_norm": 9.063269296504105,
"learning_rate": 3.854036170811176e-06,
"loss": 1.477590799331665,
"step": 3815
},
{
"epoch": 1.84973339796413,
"grad_norm": 9.931031982254957,
"learning_rate": 3.851291320227592e-06,
"loss": 2.049102544784546,
"step": 3816
},
{
"epoch": 1.8502181289384392,
"grad_norm": 14.481504680529103,
"learning_rate": 3.8485468350698415e-06,
"loss": 1.6403090953826904,
"step": 3817
},
{
"epoch": 1.8507028599127484,
"grad_norm": 10.13257661814961,
"learning_rate": 3.8458027162109965e-06,
"loss": 1.146431565284729,
"step": 3818
},
{
"epoch": 1.8511875908870576,
"grad_norm": 7.267449327630187,
"learning_rate": 3.8430589645240105e-06,
"loss": 1.2104136943817139,
"step": 3819
},
{
"epoch": 1.851672321861367,
"grad_norm": 7.666105977276118,
"learning_rate": 3.840315580881728e-06,
"loss": 1.2945067882537842,
"step": 3820
},
{
"epoch": 1.8521570528356763,
"grad_norm": 8.542022193174528,
"learning_rate": 3.837572566156867e-06,
"loss": 1.968456745147705,
"step": 3821
},
{
"epoch": 1.8526417838099856,
"grad_norm": 16.03046537894807,
"learning_rate": 3.834829921222034e-06,
"loss": 1.8554558753967285,
"step": 3822
},
{
"epoch": 1.8531265147842948,
"grad_norm": 7.108579886110114,
"learning_rate": 3.832087646949715e-06,
"loss": 1.2227234840393066,
"step": 3823
},
{
"epoch": 1.853611245758604,
"grad_norm": 9.68574808122959,
"learning_rate": 3.829345744212277e-06,
"loss": 1.615128517150879,
"step": 3824
},
{
"epoch": 1.8540959767329133,
"grad_norm": 8.948462657212309,
"learning_rate": 3.826604213881975e-06,
"loss": 1.2934107780456543,
"step": 3825
},
{
"epoch": 1.8545807077072225,
"grad_norm": 26.562282167027906,
"learning_rate": 3.823863056830939e-06,
"loss": 1.4367725849151611,
"step": 3826
},
{
"epoch": 1.8550654386815317,
"grad_norm": 9.158864004512173,
"learning_rate": 3.821122273931183e-06,
"loss": 1.628298282623291,
"step": 3827
},
{
"epoch": 1.855550169655841,
"grad_norm": 14.300117329879615,
"learning_rate": 3.8183818660545995e-06,
"loss": 1.9286634922027588,
"step": 3828
},
{
"epoch": 1.8560349006301502,
"grad_norm": 12.82917240141784,
"learning_rate": 3.815641834072967e-06,
"loss": 1.4501365423202515,
"step": 3829
},
{
"epoch": 1.8565196316044594,
"grad_norm": 8.404813307658236,
"learning_rate": 3.812902178857941e-06,
"loss": 1.7171928882598877,
"step": 3830
},
{
"epoch": 1.8570043625787687,
"grad_norm": 14.21723313319785,
"learning_rate": 3.810162901281056e-06,
"loss": 1.1035404205322266,
"step": 3831
},
{
"epoch": 1.857489093553078,
"grad_norm": 7.482010397434698,
"learning_rate": 3.8074240022137282e-06,
"loss": 1.3687548637390137,
"step": 3832
},
{
"epoch": 1.8579738245273874,
"grad_norm": 28.26462425612034,
"learning_rate": 3.80468548252725e-06,
"loss": 2.189478874206543,
"step": 3833
},
{
"epoch": 1.8584585555016966,
"grad_norm": 11.604418369930155,
"learning_rate": 3.8019473430928012e-06,
"loss": 2.445333957672119,
"step": 3834
},
{
"epoch": 1.8589432864760058,
"grad_norm": 9.866031493978847,
"learning_rate": 3.7992095847814337e-06,
"loss": 1.7706125974655151,
"step": 3835
},
{
"epoch": 1.859428017450315,
"grad_norm": 11.810110009985312,
"learning_rate": 3.7964722084640783e-06,
"loss": 1.7974560260772705,
"step": 3836
},
{
"epoch": 1.8599127484246243,
"grad_norm": 14.801731866607998,
"learning_rate": 3.793735215011546e-06,
"loss": 1.408696174621582,
"step": 3837
},
{
"epoch": 1.8603974793989337,
"grad_norm": 11.53252446679149,
"learning_rate": 3.790998605294526e-06,
"loss": 1.6232142448425293,
"step": 3838
},
{
"epoch": 1.860882210373243,
"grad_norm": 15.002722235868852,
"learning_rate": 3.7882623801835873e-06,
"loss": 1.5384654998779297,
"step": 3839
},
{
"epoch": 1.8613669413475522,
"grad_norm": 7.70803510243967,
"learning_rate": 3.785526540549173e-06,
"loss": 1.987929344177246,
"step": 3840
},
{
"epoch": 1.8618516723218614,
"grad_norm": 14.908568930418838,
"learning_rate": 3.7827910872616053e-06,
"loss": 3.056663751602173,
"step": 3841
},
{
"epoch": 1.8623364032961707,
"grad_norm": 9.583292549103543,
"learning_rate": 3.7800560211910826e-06,
"loss": 1.9691431522369385,
"step": 3842
},
{
"epoch": 1.86282113427048,
"grad_norm": 11.109373284245208,
"learning_rate": 3.777321343207682e-06,
"loss": 1.5921955108642578,
"step": 3843
},
{
"epoch": 1.8633058652447891,
"grad_norm": 11.03352487190911,
"learning_rate": 3.774587054181356e-06,
"loss": 1.7958393096923828,
"step": 3844
},
{
"epoch": 1.8637905962190984,
"grad_norm": 14.95609058097342,
"learning_rate": 3.771853154981934e-06,
"loss": 1.8356173038482666,
"step": 3845
},
{
"epoch": 1.8642753271934076,
"grad_norm": 23.939768065640767,
"learning_rate": 3.7691196464791178e-06,
"loss": 2.623054265975952,
"step": 3846
},
{
"epoch": 1.8647600581677168,
"grad_norm": 12.745131433279994,
"learning_rate": 3.76638652954249e-06,
"loss": 1.2515921592712402,
"step": 3847
},
{
"epoch": 1.865244789142026,
"grad_norm": 12.831773187837012,
"learning_rate": 3.7636538050415073e-06,
"loss": 1.4680964946746826,
"step": 3848
},
{
"epoch": 1.8657295201163353,
"grad_norm": 18.946356005512385,
"learning_rate": 3.7609214738454995e-06,
"loss": 1.5978691577911377,
"step": 3849
},
{
"epoch": 1.8662142510906445,
"grad_norm": 14.559970233457305,
"learning_rate": 3.758189536823673e-06,
"loss": 2.1459200382232666,
"step": 3850
},
{
"epoch": 1.866698982064954,
"grad_norm": 9.195269491876537,
"learning_rate": 3.755457994845109e-06,
"loss": 1.3858362436294556,
"step": 3851
},
{
"epoch": 1.8671837130392632,
"grad_norm": 13.30627911587158,
"learning_rate": 3.7527268487787583e-06,
"loss": 1.4989409446716309,
"step": 3852
},
{
"epoch": 1.8676684440135725,
"grad_norm": 7.249866616329221,
"learning_rate": 3.749996099493455e-06,
"loss": 1.056333303451538,
"step": 3853
},
{
"epoch": 1.8681531749878817,
"grad_norm": 13.092314826687089,
"learning_rate": 3.7472657478579017e-06,
"loss": 1.7785322666168213,
"step": 3854
},
{
"epoch": 1.8686379059621911,
"grad_norm": 23.16214097243705,
"learning_rate": 3.7445357947406714e-06,
"loss": 0.784498393535614,
"step": 3855
},
{
"epoch": 1.8691226369365004,
"grad_norm": 16.076248686450786,
"learning_rate": 3.741806241010212e-06,
"loss": 1.5876598358154297,
"step": 3856
},
{
"epoch": 1.8696073679108096,
"grad_norm": 23.90209619665504,
"learning_rate": 3.7390770875348525e-06,
"loss": 1.667549729347229,
"step": 3857
},
{
"epoch": 1.8700920988851188,
"grad_norm": 12.604705573676055,
"learning_rate": 3.7363483351827827e-06,
"loss": 2.259801149368286,
"step": 3858
},
{
"epoch": 1.870576829859428,
"grad_norm": 12.671187221365516,
"learning_rate": 3.7336199848220724e-06,
"loss": 1.7935236692428589,
"step": 3859
},
{
"epoch": 1.8710615608337373,
"grad_norm": 12.05091009798915,
"learning_rate": 3.730892037320659e-06,
"loss": 1.047160029411316,
"step": 3860
},
{
"epoch": 1.8715462918080465,
"grad_norm": 9.289934720361307,
"learning_rate": 3.7281644935463545e-06,
"loss": 1.5085285902023315,
"step": 3861
},
{
"epoch": 1.8720310227823558,
"grad_norm": 10.171617184245077,
"learning_rate": 3.725437354366844e-06,
"loss": 1.9728556871414185,
"step": 3862
},
{
"epoch": 1.872515753756665,
"grad_norm": 11.109570890147632,
"learning_rate": 3.7227106206496806e-06,
"loss": 1.169795036315918,
"step": 3863
},
{
"epoch": 1.8730004847309742,
"grad_norm": 12.656430507088448,
"learning_rate": 3.719984293262289e-06,
"loss": 1.156691312789917,
"step": 3864
},
{
"epoch": 1.8734852157052835,
"grad_norm": 11.26675363800494,
"learning_rate": 3.717258373071965e-06,
"loss": 1.840480923652649,
"step": 3865
},
{
"epoch": 1.8739699466795927,
"grad_norm": 14.592709504274996,
"learning_rate": 3.714532860945875e-06,
"loss": 1.2747111320495605,
"step": 3866
},
{
"epoch": 1.874454677653902,
"grad_norm": 7.620229856310621,
"learning_rate": 3.7118077577510568e-06,
"loss": 1.547795295715332,
"step": 3867
},
{
"epoch": 1.8749394086282112,
"grad_norm": 14.044374027402359,
"learning_rate": 3.709083064354417e-06,
"loss": 1.2866015434265137,
"step": 3868
},
{
"epoch": 1.8754241396025206,
"grad_norm": 15.981075320948474,
"learning_rate": 3.706358781622731e-06,
"loss": 1.8032989501953125,
"step": 3869
},
{
"epoch": 1.8759088705768299,
"grad_norm": 6.49719000008223,
"learning_rate": 3.7036349104226434e-06,
"loss": 1.3746938705444336,
"step": 3870
},
{
"epoch": 1.876393601551139,
"grad_norm": 37.223128835810485,
"learning_rate": 3.700911451620671e-06,
"loss": 1.1380945444107056,
"step": 3871
},
{
"epoch": 1.8768783325254483,
"grad_norm": 9.740215552838352,
"learning_rate": 3.698188406083196e-06,
"loss": 1.7016152143478394,
"step": 3872
},
{
"epoch": 1.8773630634997578,
"grad_norm": 16.476485414509536,
"learning_rate": 3.695465774676473e-06,
"loss": 1.3272976875305176,
"step": 3873
},
{
"epoch": 1.877847794474067,
"grad_norm": 13.313527565693274,
"learning_rate": 3.6927435582666177e-06,
"loss": 1.4082832336425781,
"step": 3874
},
{
"epoch": 1.8783325254483763,
"grad_norm": 13.111296281669487,
"learning_rate": 3.6900217577196183e-06,
"loss": 1.7072147130966187,
"step": 3875
},
{
"epoch": 1.8788172564226855,
"grad_norm": 17.619718847307137,
"learning_rate": 3.6873003739013373e-06,
"loss": 1.6426606178283691,
"step": 3876
},
{
"epoch": 1.8793019873969947,
"grad_norm": 13.095537546335411,
"learning_rate": 3.6845794076774923e-06,
"loss": 2.4092113971710205,
"step": 3877
},
{
"epoch": 1.879786718371304,
"grad_norm": 12.697671567095762,
"learning_rate": 3.681858859913675e-06,
"loss": 1.7790416479110718,
"step": 3878
},
{
"epoch": 1.8802714493456132,
"grad_norm": 13.843523194803327,
"learning_rate": 3.6791387314753435e-06,
"loss": 1.9519529342651367,
"step": 3879
},
{
"epoch": 1.8807561803199224,
"grad_norm": 9.642180297880978,
"learning_rate": 3.67641902322782e-06,
"loss": 1.140458345413208,
"step": 3880
},
{
"epoch": 1.8812409112942317,
"grad_norm": 10.470228431946486,
"learning_rate": 3.6736997360362974e-06,
"loss": 1.666051983833313,
"step": 3881
},
{
"epoch": 1.8817256422685409,
"grad_norm": 8.104970100921333,
"learning_rate": 3.6709808707658312e-06,
"loss": 0.8969978094100952,
"step": 3882
},
{
"epoch": 1.8822103732428501,
"grad_norm": 14.353261687612592,
"learning_rate": 3.668262428281343e-06,
"loss": 1.4742242097854614,
"step": 3883
},
{
"epoch": 1.8826951042171594,
"grad_norm": 18.8300447476687,
"learning_rate": 3.66554440944762e-06,
"loss": 1.5895284414291382,
"step": 3884
},
{
"epoch": 1.8831798351914686,
"grad_norm": 17.961605131033078,
"learning_rate": 3.662826815129317e-06,
"loss": 1.5626221895217896,
"step": 3885
},
{
"epoch": 1.883664566165778,
"grad_norm": 10.373599417993802,
"learning_rate": 3.660109646190951e-06,
"loss": 1.9574177265167236,
"step": 3886
},
{
"epoch": 1.8841492971400873,
"grad_norm": 7.269417129454365,
"learning_rate": 3.6573929034969035e-06,
"loss": 1.6184039115905762,
"step": 3887
},
{
"epoch": 1.8846340281143965,
"grad_norm": 11.447422734777803,
"learning_rate": 3.654676587911424e-06,
"loss": 1.543810248374939,
"step": 3888
},
{
"epoch": 1.8851187590887057,
"grad_norm": 21.929418636448727,
"learning_rate": 3.651960700298619e-06,
"loss": 1.8076457977294922,
"step": 3889
},
{
"epoch": 1.885603490063015,
"grad_norm": 14.417757398330167,
"learning_rate": 3.6492452415224675e-06,
"loss": 2.1195812225341797,
"step": 3890
},
{
"epoch": 1.8860882210373244,
"grad_norm": 17.511808787442728,
"learning_rate": 3.646530212446807e-06,
"loss": 1.2539801597595215,
"step": 3891
},
{
"epoch": 1.8865729520116337,
"grad_norm": 16.684183046340713,
"learning_rate": 3.6438156139353403e-06,
"loss": 2.764033794403076,
"step": 3892
},
{
"epoch": 1.887057682985943,
"grad_norm": 10.564838390129184,
"learning_rate": 3.6411014468516303e-06,
"loss": 1.5271735191345215,
"step": 3893
},
{
"epoch": 1.8875424139602521,
"grad_norm": 9.266966163622177,
"learning_rate": 3.6383877120591026e-06,
"loss": 1.3750379085540771,
"step": 3894
},
{
"epoch": 1.8880271449345614,
"grad_norm": 9.224542707820223,
"learning_rate": 3.6356744104210528e-06,
"loss": 1.6342830657958984,
"step": 3895
},
{
"epoch": 1.8885118759088706,
"grad_norm": 8.123264600548124,
"learning_rate": 3.6329615428006306e-06,
"loss": 2.269853115081787,
"step": 3896
},
{
"epoch": 1.8889966068831798,
"grad_norm": 11.333325245561115,
"learning_rate": 3.6302491100608484e-06,
"loss": 1.5088672637939453,
"step": 3897
},
{
"epoch": 1.889481337857489,
"grad_norm": 10.907060457201112,
"learning_rate": 3.627537113064583e-06,
"loss": 0.8382123112678528,
"step": 3898
},
{
"epoch": 1.8899660688317983,
"grad_norm": 12.204070562382503,
"learning_rate": 3.624825552674574e-06,
"loss": 1.3781334161758423,
"step": 3899
},
{
"epoch": 1.8904507998061075,
"grad_norm": 10.864384507112767,
"learning_rate": 3.6221144297534178e-06,
"loss": 1.0128146409988403,
"step": 3900
},
{
"epoch": 1.8909355307804168,
"grad_norm": 13.109676654858099,
"learning_rate": 3.6194037451635734e-06,
"loss": 1.2570695877075195,
"step": 3901
},
{
"epoch": 1.891420261754726,
"grad_norm": 14.657869442057853,
"learning_rate": 3.6166934997673613e-06,
"loss": 1.9119762182235718,
"step": 3902
},
{
"epoch": 1.8919049927290352,
"grad_norm": 17.02473786087026,
"learning_rate": 3.61398369442696e-06,
"loss": 1.2781530618667603,
"step": 3903
},
{
"epoch": 1.8923897237033447,
"grad_norm": 9.819881936317984,
"learning_rate": 3.6112743300044117e-06,
"loss": 2.3941824436187744,
"step": 3904
},
{
"epoch": 1.892874454677654,
"grad_norm": 6.419445441191274,
"learning_rate": 3.608565407361615e-06,
"loss": 1.177260398864746,
"step": 3905
},
{
"epoch": 1.8933591856519632,
"grad_norm": 12.91423995573886,
"learning_rate": 3.6058569273603293e-06,
"loss": 1.0827527046203613,
"step": 3906
},
{
"epoch": 1.8938439166262724,
"grad_norm": 7.942202872558142,
"learning_rate": 3.603148890862175e-06,
"loss": 1.2460119724273682,
"step": 3907
},
{
"epoch": 1.8943286476005818,
"grad_norm": 25.816867742461252,
"learning_rate": 3.600441298728623e-06,
"loss": 1.0854291915893555,
"step": 3908
},
{
"epoch": 1.894813378574891,
"grad_norm": 12.419339230986074,
"learning_rate": 3.597734151821017e-06,
"loss": 1.2260878086090088,
"step": 3909
},
{
"epoch": 1.8952981095492003,
"grad_norm": 8.430072593377142,
"learning_rate": 3.595027451000549e-06,
"loss": 1.9578346014022827,
"step": 3910
},
{
"epoch": 1.8957828405235095,
"grad_norm": 15.79255157393538,
"learning_rate": 3.592321197128269e-06,
"loss": 1.834532618522644,
"step": 3911
},
{
"epoch": 1.8962675714978188,
"grad_norm": 11.171008508238168,
"learning_rate": 3.589615391065089e-06,
"loss": 1.86167573928833,
"step": 3912
},
{
"epoch": 1.896752302472128,
"grad_norm": 8.191255283455058,
"learning_rate": 3.5869100336717744e-06,
"loss": 2.1376025676727295,
"step": 3913
},
{
"epoch": 1.8972370334464372,
"grad_norm": 10.355720857717106,
"learning_rate": 3.5842051258089548e-06,
"loss": 2.3454747200012207,
"step": 3914
},
{
"epoch": 1.8977217644207465,
"grad_norm": 11.088804790940792,
"learning_rate": 3.58150066833711e-06,
"loss": 1.6400861740112305,
"step": 3915
},
{
"epoch": 1.8982064953950557,
"grad_norm": 9.829531053922265,
"learning_rate": 3.5787966621165783e-06,
"loss": 2.3520257472991943,
"step": 3916
},
{
"epoch": 1.898691226369365,
"grad_norm": 14.409419275107522,
"learning_rate": 3.5760931080075546e-06,
"loss": 1.8249943256378174,
"step": 3917
},
{
"epoch": 1.8991759573436742,
"grad_norm": 12.978328064204067,
"learning_rate": 3.5733900068700916e-06,
"loss": 1.3235158920288086,
"step": 3918
},
{
"epoch": 1.8996606883179834,
"grad_norm": 12.318337934918544,
"learning_rate": 3.5706873595640964e-06,
"loss": 1.6246753931045532,
"step": 3919
},
{
"epoch": 1.9001454192922926,
"grad_norm": 8.655693792653388,
"learning_rate": 3.567985166949331e-06,
"loss": 1.6081624031066895,
"step": 3920
},
{
"epoch": 1.9006301502666019,
"grad_norm": 12.004580052664044,
"learning_rate": 3.5652834298854145e-06,
"loss": 1.7309609651565552,
"step": 3921
},
{
"epoch": 1.9011148812409113,
"grad_norm": 9.14617864964884,
"learning_rate": 3.5625821492318193e-06,
"loss": 1.925060749053955,
"step": 3922
},
{
"epoch": 1.9015996122152206,
"grad_norm": 8.171485479951233,
"learning_rate": 3.559881325847875e-06,
"loss": 0.5452160835266113,
"step": 3923
},
{
"epoch": 1.9020843431895298,
"grad_norm": 15.222653581288773,
"learning_rate": 3.5571809605927634e-06,
"loss": 1.582798719406128,
"step": 3924
},
{
"epoch": 1.902569074163839,
"grad_norm": 13.036603179971515,
"learning_rate": 3.554481054325522e-06,
"loss": 2.0783259868621826,
"step": 3925
},
{
"epoch": 1.9030538051381485,
"grad_norm": 14.860995116714927,
"learning_rate": 3.5517816079050428e-06,
"loss": 2.186687707901001,
"step": 3926
},
{
"epoch": 1.9035385361124577,
"grad_norm": 9.983796430229297,
"learning_rate": 3.549082622190065e-06,
"loss": 1.3317524194717407,
"step": 3927
},
{
"epoch": 1.904023267086767,
"grad_norm": 11.584829110372459,
"learning_rate": 3.546384098039194e-06,
"loss": 1.4475691318511963,
"step": 3928
},
{
"epoch": 1.9045079980610762,
"grad_norm": 11.464537096933528,
"learning_rate": 3.5436860363108783e-06,
"loss": 1.6440680027008057,
"step": 3929
},
{
"epoch": 1.9049927290353854,
"grad_norm": 15.167745788482916,
"learning_rate": 3.540988437863421e-06,
"loss": 1.6232281923294067,
"step": 3930
},
{
"epoch": 1.9054774600096946,
"grad_norm": 12.436832723915735,
"learning_rate": 3.5382913035549764e-06,
"loss": 1.7416480779647827,
"step": 3931
},
{
"epoch": 1.9059621909840039,
"grad_norm": 7.331499241137207,
"learning_rate": 3.535594634243561e-06,
"loss": 1.5295146703720093,
"step": 3932
},
{
"epoch": 1.9064469219583131,
"grad_norm": 7.126158941428132,
"learning_rate": 3.5328984307870305e-06,
"loss": 0.8982141017913818,
"step": 3933
},
{
"epoch": 1.9069316529326223,
"grad_norm": 11.818813668200931,
"learning_rate": 3.530202694043099e-06,
"loss": 1.398158073425293,
"step": 3934
},
{
"epoch": 1.9074163839069316,
"grad_norm": 10.849685555898867,
"learning_rate": 3.527507424869332e-06,
"loss": 1.8073395490646362,
"step": 3935
},
{
"epoch": 1.9079011148812408,
"grad_norm": 9.986251578950288,
"learning_rate": 3.5248126241231427e-06,
"loss": 1.6623549461364746,
"step": 3936
},
{
"epoch": 1.90838584585555,
"grad_norm": 8.677994484198274,
"learning_rate": 3.5221182926618012e-06,
"loss": 1.263074278831482,
"step": 3937
},
{
"epoch": 1.9088705768298593,
"grad_norm": 15.948367465243608,
"learning_rate": 3.519424431342424e-06,
"loss": 1.7493032217025757,
"step": 3938
},
{
"epoch": 1.9093553078041687,
"grad_norm": 12.493386479316666,
"learning_rate": 3.516731041021978e-06,
"loss": 1.1146942377090454,
"step": 3939
},
{
"epoch": 1.909840038778478,
"grad_norm": 9.126848654429478,
"learning_rate": 3.5140381225572826e-06,
"loss": 1.1476249694824219,
"step": 3940
},
{
"epoch": 1.9103247697527872,
"grad_norm": 8.081044605394956,
"learning_rate": 3.5113456768050034e-06,
"loss": 2.4797306060791016,
"step": 3941
},
{
"epoch": 1.9108095007270964,
"grad_norm": 9.564015191865991,
"learning_rate": 3.5086537046216618e-06,
"loss": 1.2998127937316895,
"step": 3942
},
{
"epoch": 1.9112942317014057,
"grad_norm": 10.97244852984566,
"learning_rate": 3.5059622068636224e-06,
"loss": 1.5118680000305176,
"step": 3943
},
{
"epoch": 1.9117789626757151,
"grad_norm": 11.633749647479805,
"learning_rate": 3.5032711843871013e-06,
"loss": 1.5342599153518677,
"step": 3944
},
{
"epoch": 1.9122636936500244,
"grad_norm": 12.281473020658545,
"learning_rate": 3.5005806380481634e-06,
"loss": 1.3705642223358154,
"step": 3945
},
{
"epoch": 1.9127484246243336,
"grad_norm": 10.908442570085795,
"learning_rate": 3.4978905687027228e-06,
"loss": 1.36629319190979,
"step": 3946
},
{
"epoch": 1.9132331555986428,
"grad_norm": 11.213312939673628,
"learning_rate": 3.495200977206541e-06,
"loss": 1.128406047821045,
"step": 3947
},
{
"epoch": 1.913717886572952,
"grad_norm": 12.5260781867788,
"learning_rate": 3.4925118644152277e-06,
"loss": 1.4350323677062988,
"step": 3948
},
{
"epoch": 1.9142026175472613,
"grad_norm": 9.34003006815694,
"learning_rate": 3.4898232311842395e-06,
"loss": 1.4455536603927612,
"step": 3949
},
{
"epoch": 1.9146873485215705,
"grad_norm": 9.114238678691034,
"learning_rate": 3.4871350783688795e-06,
"loss": 1.814175009727478,
"step": 3950
},
{
"epoch": 1.9151720794958798,
"grad_norm": 7.309914866389954,
"learning_rate": 3.484447406824305e-06,
"loss": 1.5604901313781738,
"step": 3951
},
{
"epoch": 1.915656810470189,
"grad_norm": 15.830153082944902,
"learning_rate": 3.481760217405511e-06,
"loss": 1.4824331998825073,
"step": 3952
},
{
"epoch": 1.9161415414444982,
"grad_norm": 17.088146148358906,
"learning_rate": 3.4790735109673422e-06,
"loss": 1.9730150699615479,
"step": 3953
},
{
"epoch": 1.9166262724188075,
"grad_norm": 13.519478310619794,
"learning_rate": 3.4763872883644934e-06,
"loss": 1.68584406375885,
"step": 3954
},
{
"epoch": 1.9171110033931167,
"grad_norm": 11.405300561972854,
"learning_rate": 3.4737015504514993e-06,
"loss": 1.291823148727417,
"step": 3955
},
{
"epoch": 1.917595734367426,
"grad_norm": 19.396814386680695,
"learning_rate": 3.471016298082746e-06,
"loss": 1.4392280578613281,
"step": 3956
},
{
"epoch": 1.9180804653417354,
"grad_norm": 10.238702078168478,
"learning_rate": 3.4683315321124626e-06,
"loss": 1.3800902366638184,
"step": 3957
},
{
"epoch": 1.9185651963160446,
"grad_norm": 7.882813721230638,
"learning_rate": 3.4656472533947234e-06,
"loss": 1.6689096689224243,
"step": 3958
},
{
"epoch": 1.9190499272903538,
"grad_norm": 17.144911817271993,
"learning_rate": 3.462963462783446e-06,
"loss": 2.4903616905212402,
"step": 3959
},
{
"epoch": 1.919534658264663,
"grad_norm": 14.462761811556827,
"learning_rate": 3.4602801611323977e-06,
"loss": 1.2347651720046997,
"step": 3960
},
{
"epoch": 1.9200193892389725,
"grad_norm": 8.984324840306442,
"learning_rate": 3.457597349295186e-06,
"loss": 1.6082990169525146,
"step": 3961
},
{
"epoch": 1.9205041202132818,
"grad_norm": 9.31316791616818,
"learning_rate": 3.4549150281252635e-06,
"loss": 1.4320874214172363,
"step": 3962
},
{
"epoch": 1.920988851187591,
"grad_norm": 7.9117365621651645,
"learning_rate": 3.452233198475927e-06,
"loss": 1.4429666996002197,
"step": 3963
},
{
"epoch": 1.9214735821619002,
"grad_norm": 12.368896972340732,
"learning_rate": 3.4495518612003167e-06,
"loss": 1.6724499464035034,
"step": 3964
},
{
"epoch": 1.9219583131362095,
"grad_norm": 16.885369692487153,
"learning_rate": 3.4468710171514175e-06,
"loss": 1.275808572769165,
"step": 3965
},
{
"epoch": 1.9224430441105187,
"grad_norm": 7.9713608105797364,
"learning_rate": 3.444190667182056e-06,
"loss": 1.8975622653961182,
"step": 3966
},
{
"epoch": 1.922927775084828,
"grad_norm": 29.939523944818323,
"learning_rate": 3.4415108121449013e-06,
"loss": 2.279844284057617,
"step": 3967
},
{
"epoch": 1.9234125060591372,
"grad_norm": 12.177354056809348,
"learning_rate": 3.438831452892466e-06,
"loss": 1.2905189990997314,
"step": 3968
},
{
"epoch": 1.9238972370334464,
"grad_norm": 17.14616965968978,
"learning_rate": 3.436152590277103e-06,
"loss": 1.6779305934906006,
"step": 3969
},
{
"epoch": 1.9243819680077556,
"grad_norm": 10.250541988906761,
"learning_rate": 3.4334742251510127e-06,
"loss": 1.7671997547149658,
"step": 3970
},
{
"epoch": 1.9248666989820649,
"grad_norm": 10.887505447986674,
"learning_rate": 3.4307963583662307e-06,
"loss": 1.1810566186904907,
"step": 3971
},
{
"epoch": 1.925351429956374,
"grad_norm": 12.544065108326325,
"learning_rate": 3.428118990774637e-06,
"loss": 1.3872532844543457,
"step": 3972
},
{
"epoch": 1.9258361609306833,
"grad_norm": 11.57024361619496,
"learning_rate": 3.425442123227952e-06,
"loss": 1.3888881206512451,
"step": 3973
},
{
"epoch": 1.9263208919049928,
"grad_norm": 12.325395594667116,
"learning_rate": 3.4227657565777395e-06,
"loss": 0.756981372833252,
"step": 3974
},
{
"epoch": 1.926805622879302,
"grad_norm": 16.60343022202815,
"learning_rate": 3.420089891675401e-06,
"loss": 2.093275308609009,
"step": 3975
},
{
"epoch": 1.9272903538536112,
"grad_norm": 10.924670371053804,
"learning_rate": 3.4174145293721793e-06,
"loss": 1.4760019779205322,
"step": 3976
},
{
"epoch": 1.9277750848279205,
"grad_norm": 13.026288795968684,
"learning_rate": 3.414739670519158e-06,
"loss": 1.3458720445632935,
"step": 3977
},
{
"epoch": 1.9282598158022297,
"grad_norm": 8.79427108288371,
"learning_rate": 3.412065315967259e-06,
"loss": 1.3386306762695312,
"step": 3978
},
{
"epoch": 1.9287445467765392,
"grad_norm": 11.227770926499907,
"learning_rate": 3.409391466567246e-06,
"loss": 0.982848584651947,
"step": 3979
},
{
"epoch": 1.9292292777508484,
"grad_norm": 11.003530551348984,
"learning_rate": 3.40671812316972e-06,
"loss": 1.7455159425735474,
"step": 3980
},
{
"epoch": 1.9297140087251576,
"grad_norm": 18.250304052953577,
"learning_rate": 3.4040452866251227e-06,
"loss": 1.8938674926757812,
"step": 3981
},
{
"epoch": 1.9301987396994669,
"grad_norm": 8.967652851585836,
"learning_rate": 3.401372957783734e-06,
"loss": 1.2882962226867676,
"step": 3982
},
{
"epoch": 1.930683470673776,
"grad_norm": 13.61730346619982,
"learning_rate": 3.3987011374956667e-06,
"loss": 1.259023904800415,
"step": 3983
},
{
"epoch": 1.9311682016480853,
"grad_norm": 9.906082062223247,
"learning_rate": 3.396029826610885e-06,
"loss": 1.5270193815231323,
"step": 3984
},
{
"epoch": 1.9316529326223946,
"grad_norm": 12.694438101247476,
"learning_rate": 3.393359025979178e-06,
"loss": 1.5069454908370972,
"step": 3985
},
{
"epoch": 1.9321376635967038,
"grad_norm": 22.468957699704564,
"learning_rate": 3.3906887364501813e-06,
"loss": 1.717566728591919,
"step": 3986
},
{
"epoch": 1.932622394571013,
"grad_norm": 12.860300692945742,
"learning_rate": 3.3880189588733583e-06,
"loss": 1.243229627609253,
"step": 3987
},
{
"epoch": 1.9331071255453223,
"grad_norm": 10.89880777896392,
"learning_rate": 3.3853496940980213e-06,
"loss": 1.5140823125839233,
"step": 3988
},
{
"epoch": 1.9335918565196315,
"grad_norm": 14.21259287183119,
"learning_rate": 3.382680942973312e-06,
"loss": 1.5047807693481445,
"step": 3989
},
{
"epoch": 1.9340765874939407,
"grad_norm": 10.534523524343712,
"learning_rate": 3.3800127063482097e-06,
"loss": 1.5335553884506226,
"step": 3990
},
{
"epoch": 1.93456131846825,
"grad_norm": 12.795572742431792,
"learning_rate": 3.37734498507153e-06,
"loss": 1.374946117401123,
"step": 3991
},
{
"epoch": 1.9350460494425594,
"grad_norm": 13.117077393609778,
"learning_rate": 3.374677779991925e-06,
"loss": 2.378258228302002,
"step": 3992
},
{
"epoch": 1.9355307804168687,
"grad_norm": 8.777454441771413,
"learning_rate": 3.372011091957885e-06,
"loss": 1.9577720165252686,
"step": 3993
},
{
"epoch": 1.936015511391178,
"grad_norm": 16.273913986352408,
"learning_rate": 3.3693449218177327e-06,
"loss": 1.8117246627807617,
"step": 3994
},
{
"epoch": 1.9365002423654871,
"grad_norm": 11.709018102166146,
"learning_rate": 3.366679270419626e-06,
"loss": 1.1304072141647339,
"step": 3995
},
{
"epoch": 1.9369849733397966,
"grad_norm": 14.505304899464937,
"learning_rate": 3.36401413861156e-06,
"loss": 0.7441200017929077,
"step": 3996
},
{
"epoch": 1.9374697043141058,
"grad_norm": 11.633163498681856,
"learning_rate": 3.361349527241361e-06,
"loss": 1.298384189605713,
"step": 3997
},
{
"epoch": 1.937954435288415,
"grad_norm": 11.012473508487254,
"learning_rate": 3.3586854371566945e-06,
"loss": 1.494674563407898,
"step": 3998
},
{
"epoch": 1.9384391662627243,
"grad_norm": 12.33385251000266,
"learning_rate": 3.356021869205057e-06,
"loss": 1.883178949356079,
"step": 3999
},
{
"epoch": 1.9389238972370335,
"grad_norm": 11.65328061272837,
"learning_rate": 3.35335882423378e-06,
"loss": 1.9166979789733887,
"step": 4000
},
{
"epoch": 1.9394086282113427,
"grad_norm": 12.431859811016695,
"learning_rate": 3.3506963030900254e-06,
"loss": 1.4378068447113037,
"step": 4001
},
{
"epoch": 1.939893359185652,
"grad_norm": 8.7114687244788,
"learning_rate": 3.3480343066207948e-06,
"loss": 0.9931128621101379,
"step": 4002
},
{
"epoch": 1.9403780901599612,
"grad_norm": 15.5939736623459,
"learning_rate": 3.3453728356729177e-06,
"loss": 1.6293809413909912,
"step": 4003
},
{
"epoch": 1.9408628211342704,
"grad_norm": 10.270963268130922,
"learning_rate": 3.3427118910930595e-06,
"loss": 1.7943034172058105,
"step": 4004
},
{
"epoch": 1.9413475521085797,
"grad_norm": 10.554795662947278,
"learning_rate": 3.3400514737277144e-06,
"loss": 1.2377315759658813,
"step": 4005
},
{
"epoch": 1.941832283082889,
"grad_norm": 12.956178669464471,
"learning_rate": 3.3373915844232102e-06,
"loss": 1.7713916301727295,
"step": 4006
},
{
"epoch": 1.9423170140571981,
"grad_norm": 13.162894001583565,
"learning_rate": 3.3347322240257117e-06,
"loss": 1.4930777549743652,
"step": 4007
},
{
"epoch": 1.9428017450315074,
"grad_norm": 18.32787848186863,
"learning_rate": 3.3320733933812103e-06,
"loss": 2.383993148803711,
"step": 4008
},
{
"epoch": 1.9432864760058166,
"grad_norm": 11.217492455292382,
"learning_rate": 3.3294150933355287e-06,
"loss": 1.1197657585144043,
"step": 4009
},
{
"epoch": 1.943771206980126,
"grad_norm": 13.419526297697866,
"learning_rate": 3.326757324734322e-06,
"loss": 1.0516507625579834,
"step": 4010
},
{
"epoch": 1.9442559379544353,
"grad_norm": 9.635428485983422,
"learning_rate": 3.324100088423077e-06,
"loss": 1.4331152439117432,
"step": 4011
},
{
"epoch": 1.9447406689287445,
"grad_norm": 10.684819909542535,
"learning_rate": 3.3214433852471107e-06,
"loss": 1.252654790878296,
"step": 4012
},
{
"epoch": 1.9452253999030538,
"grad_norm": 8.45156412320274,
"learning_rate": 3.318787216051571e-06,
"loss": 1.350597620010376,
"step": 4013
},
{
"epoch": 1.9457101308773632,
"grad_norm": 26.125785474241948,
"learning_rate": 3.3161315816814338e-06,
"loss": 1.9233274459838867,
"step": 4014
},
{
"epoch": 1.9461948618516725,
"grad_norm": 8.280428672764964,
"learning_rate": 3.3134764829815064e-06,
"loss": 1.8210303783416748,
"step": 4015
},
{
"epoch": 1.9466795928259817,
"grad_norm": 14.910656391153884,
"learning_rate": 3.310821920796427e-06,
"loss": 1.8380781412124634,
"step": 4016
},
{
"epoch": 1.947164323800291,
"grad_norm": 11.371559119692092,
"learning_rate": 3.3081678959706613e-06,
"loss": 1.8093634843826294,
"step": 4017
},
{
"epoch": 1.9476490547746002,
"grad_norm": 11.699396550077198,
"learning_rate": 3.305514409348504e-06,
"loss": 2.5339412689208984,
"step": 4018
},
{
"epoch": 1.9481337857489094,
"grad_norm": 8.89805275495581,
"learning_rate": 3.3028614617740797e-06,
"loss": 1.7287352085113525,
"step": 4019
},
{
"epoch": 1.9486185167232186,
"grad_norm": 10.132897532685504,
"learning_rate": 3.300209054091339e-06,
"loss": 1.3113386631011963,
"step": 4020
},
{
"epoch": 1.9491032476975279,
"grad_norm": 14.296617119570087,
"learning_rate": 3.297557187144066e-06,
"loss": 1.0473952293395996,
"step": 4021
},
{
"epoch": 1.949587978671837,
"grad_norm": 13.877278403680595,
"learning_rate": 3.2949058617758665e-06,
"loss": 1.0467098951339722,
"step": 4022
},
{
"epoch": 1.9500727096461463,
"grad_norm": 9.031585921656134,
"learning_rate": 3.29225507883018e-06,
"loss": 1.4788975715637207,
"step": 4023
},
{
"epoch": 1.9505574406204556,
"grad_norm": 12.702656897491208,
"learning_rate": 3.289604839150267e-06,
"loss": 1.387740135192871,
"step": 4024
},
{
"epoch": 1.9510421715947648,
"grad_norm": 8.676706519401122,
"learning_rate": 3.2869551435792185e-06,
"loss": 1.5669760704040527,
"step": 4025
},
{
"epoch": 1.951526902569074,
"grad_norm": 17.76621811830972,
"learning_rate": 3.2843059929599558e-06,
"loss": 0.9056435227394104,
"step": 4026
},
{
"epoch": 1.9520116335433835,
"grad_norm": 11.514373646790409,
"learning_rate": 3.281657388135223e-06,
"loss": 1.783588171005249,
"step": 4027
},
{
"epoch": 1.9524963645176927,
"grad_norm": 11.489478103344187,
"learning_rate": 3.279009329947589e-06,
"loss": 2.5112509727478027,
"step": 4028
},
{
"epoch": 1.952981095492002,
"grad_norm": 11.693982087030196,
"learning_rate": 3.2763618192394496e-06,
"loss": 1.5817515850067139,
"step": 4029
},
{
"epoch": 1.9534658264663112,
"grad_norm": 10.190473022600127,
"learning_rate": 3.273714856853033e-06,
"loss": 1.675148844718933,
"step": 4030
},
{
"epoch": 1.9539505574406204,
"grad_norm": 13.829636352690606,
"learning_rate": 3.2710684436303834e-06,
"loss": 1.846989393234253,
"step": 4031
},
{
"epoch": 1.9544352884149299,
"grad_norm": 8.146418374588917,
"learning_rate": 3.2684225804133763e-06,
"loss": 1.445336103439331,
"step": 4032
},
{
"epoch": 1.954920019389239,
"grad_norm": 10.559502061613104,
"learning_rate": 3.2657772680437106e-06,
"loss": 2.7004940509796143,
"step": 4033
},
{
"epoch": 1.9554047503635483,
"grad_norm": 8.72610687439366,
"learning_rate": 3.263132507362907e-06,
"loss": 1.208324909210205,
"step": 4034
},
{
"epoch": 1.9558894813378576,
"grad_norm": 11.071308160713862,
"learning_rate": 3.260488299212319e-06,
"loss": 2.08083438873291,
"step": 4035
},
{
"epoch": 1.9563742123121668,
"grad_norm": 14.177799929247703,
"learning_rate": 3.2578446444331148e-06,
"loss": 0.6555132865905762,
"step": 4036
},
{
"epoch": 1.956858943286476,
"grad_norm": 10.371262364740149,
"learning_rate": 3.255201543866292e-06,
"loss": 1.1863126754760742,
"step": 4037
},
{
"epoch": 1.9573436742607853,
"grad_norm": 13.178342565646291,
"learning_rate": 3.2525589983526708e-06,
"loss": 2.0506715774536133,
"step": 4038
},
{
"epoch": 1.9578284052350945,
"grad_norm": 22.325298148000854,
"learning_rate": 3.2499170087328926e-06,
"loss": 1.6709489822387695,
"step": 4039
},
{
"epoch": 1.9583131362094037,
"grad_norm": 9.555550547498028,
"learning_rate": 3.247275575847427e-06,
"loss": 1.594322681427002,
"step": 4040
},
{
"epoch": 1.958797867183713,
"grad_norm": 9.953130190470889,
"learning_rate": 3.244634700536563e-06,
"loss": 1.5303876399993896,
"step": 4041
},
{
"epoch": 1.9592825981580222,
"grad_norm": 11.215414024654955,
"learning_rate": 3.241994383640412e-06,
"loss": 2.228297710418701,
"step": 4042
},
{
"epoch": 1.9597673291323314,
"grad_norm": 13.499591354132411,
"learning_rate": 3.2393546259989063e-06,
"loss": 1.9384974241256714,
"step": 4043
},
{
"epoch": 1.9602520601066407,
"grad_norm": 7.796868661249886,
"learning_rate": 3.2367154284518056e-06,
"loss": 1.2812941074371338,
"step": 4044
},
{
"epoch": 1.9607367910809501,
"grad_norm": 13.410428602123144,
"learning_rate": 3.2340767918386883e-06,
"loss": 1.4935312271118164,
"step": 4045
},
{
"epoch": 1.9612215220552593,
"grad_norm": 13.509569224520982,
"learning_rate": 3.2314387169989525e-06,
"loss": 1.791900873184204,
"step": 4046
},
{
"epoch": 1.9617062530295686,
"grad_norm": 12.575527372135113,
"learning_rate": 3.22880120477182e-06,
"loss": 0.8082662224769592,
"step": 4047
},
{
"epoch": 1.9621909840038778,
"grad_norm": 13.778518078369384,
"learning_rate": 3.2261642559963307e-06,
"loss": 1.433318018913269,
"step": 4048
},
{
"epoch": 1.9626757149781873,
"grad_norm": 9.135176076233716,
"learning_rate": 3.223527871511354e-06,
"loss": 1.871477723121643,
"step": 4049
},
{
"epoch": 1.9631604459524965,
"grad_norm": 14.138446863884596,
"learning_rate": 3.2208920521555677e-06,
"loss": 1.8148657083511353,
"step": 4050
},
{
"epoch": 1.9636451769268057,
"grad_norm": 19.966069829748115,
"learning_rate": 3.2182567987674774e-06,
"loss": 2.0185489654541016,
"step": 4051
},
{
"epoch": 1.964129907901115,
"grad_norm": 13.799200064151727,
"learning_rate": 3.2156221121854057e-06,
"loss": 1.3392252922058105,
"step": 4052
},
{
"epoch": 1.9646146388754242,
"grad_norm": 12.922345766772988,
"learning_rate": 3.212987993247497e-06,
"loss": 1.774055004119873,
"step": 4053
},
{
"epoch": 1.9650993698497334,
"grad_norm": 9.054050906371984,
"learning_rate": 3.210354442791715e-06,
"loss": 1.7332825660705566,
"step": 4054
},
{
"epoch": 1.9655841008240427,
"grad_norm": 9.388499084905177,
"learning_rate": 3.20772146165584e-06,
"loss": 1.209599256515503,
"step": 4055
},
{
"epoch": 1.966068831798352,
"grad_norm": 10.319737495908916,
"learning_rate": 3.2050890506774734e-06,
"loss": 0.8886001110076904,
"step": 4056
},
{
"epoch": 1.9665535627726611,
"grad_norm": 7.32415748889794,
"learning_rate": 3.2024572106940336e-06,
"loss": 1.5818983316421509,
"step": 4057
},
{
"epoch": 1.9670382937469704,
"grad_norm": 8.925321026286902,
"learning_rate": 3.1998259425427606e-06,
"loss": 1.3812787532806396,
"step": 4058
},
{
"epoch": 1.9675230247212796,
"grad_norm": 10.63480205971038,
"learning_rate": 3.1971952470607093e-06,
"loss": 1.8008320331573486,
"step": 4059
},
{
"epoch": 1.9680077556955888,
"grad_norm": 11.66526809435149,
"learning_rate": 3.194565125084753e-06,
"loss": 1.008023977279663,
"step": 4060
},
{
"epoch": 1.968492486669898,
"grad_norm": 10.87915003981383,
"learning_rate": 3.191935577451584e-06,
"loss": 1.426201343536377,
"step": 4061
},
{
"epoch": 1.9689772176442073,
"grad_norm": 12.118571103374569,
"learning_rate": 3.1893066049977073e-06,
"loss": 1.7979223728179932,
"step": 4062
},
{
"epoch": 1.9694619486185168,
"grad_norm": 11.96763119969827,
"learning_rate": 3.1866782085594526e-06,
"loss": 1.034325122833252,
"step": 4063
},
{
"epoch": 1.969946679592826,
"grad_norm": 14.935524340583871,
"learning_rate": 3.1840503889729624e-06,
"loss": 2.041565179824829,
"step": 4064
},
{
"epoch": 1.9704314105671352,
"grad_norm": 12.334208979702417,
"learning_rate": 3.181423147074192e-06,
"loss": 2.3051655292510986,
"step": 4065
},
{
"epoch": 1.9709161415414445,
"grad_norm": 18.385664342347326,
"learning_rate": 3.1787964836989195e-06,
"loss": 0.9874346256256104,
"step": 4066
},
{
"epoch": 1.971400872515754,
"grad_norm": 16.959754104824306,
"learning_rate": 3.176170399682733e-06,
"loss": 1.781829595565796,
"step": 4067
},
{
"epoch": 1.9718856034900631,
"grad_norm": 9.305818475623116,
"learning_rate": 3.1735448958610426e-06,
"loss": 1.2408761978149414,
"step": 4068
},
{
"epoch": 1.9723703344643724,
"grad_norm": 12.791995124872429,
"learning_rate": 3.1709199730690687e-06,
"loss": 1.5499281883239746,
"step": 4069
},
{
"epoch": 1.9728550654386816,
"grad_norm": 8.711810440634745,
"learning_rate": 3.1682956321418484e-06,
"loss": 1.8005973100662231,
"step": 4070
},
{
"epoch": 1.9733397964129908,
"grad_norm": 9.235320057489226,
"learning_rate": 3.1656718739142335e-06,
"loss": 1.7147592306137085,
"step": 4071
},
{
"epoch": 1.9738245273873,
"grad_norm": 11.439338910240213,
"learning_rate": 3.1630486992208924e-06,
"loss": 1.557666301727295,
"step": 4072
},
{
"epoch": 1.9743092583616093,
"grad_norm": 11.672696645880956,
"learning_rate": 3.1604261088963062e-06,
"loss": 1.4913452863693237,
"step": 4073
},
{
"epoch": 1.9747939893359185,
"grad_norm": 9.199128476903063,
"learning_rate": 3.1578041037747686e-06,
"loss": 1.680264949798584,
"step": 4074
},
{
"epoch": 1.9752787203102278,
"grad_norm": 12.369792594060868,
"learning_rate": 3.15518268469039e-06,
"loss": 2.491694927215576,
"step": 4075
},
{
"epoch": 1.975763451284537,
"grad_norm": 15.856676772332433,
"learning_rate": 3.152561852477092e-06,
"loss": 1.4962843656539917,
"step": 4076
},
{
"epoch": 1.9762481822588462,
"grad_norm": 11.817283891463624,
"learning_rate": 3.1499416079686118e-06,
"loss": 1.1540114879608154,
"step": 4077
},
{
"epoch": 1.9767329132331555,
"grad_norm": 8.568218027364612,
"learning_rate": 3.1473219519984986e-06,
"loss": 1.2117725610733032,
"step": 4078
},
{
"epoch": 1.9772176442074647,
"grad_norm": 10.951030491965223,
"learning_rate": 3.1447028854001137e-06,
"loss": 1.4838528633117676,
"step": 4079
},
{
"epoch": 1.9777023751817742,
"grad_norm": 9.026087321221043,
"learning_rate": 3.1420844090066315e-06,
"loss": 1.581190586090088,
"step": 4080
},
{
"epoch": 1.9781871061560834,
"grad_norm": 10.699985209499673,
"learning_rate": 3.1394665236510367e-06,
"loss": 1.691838264465332,
"step": 4081
},
{
"epoch": 1.9786718371303926,
"grad_norm": 20.15970414020839,
"learning_rate": 3.136849230166131e-06,
"loss": 2.523690938949585,
"step": 4082
},
{
"epoch": 1.9791565681047019,
"grad_norm": 11.547204281661351,
"learning_rate": 3.1342325293845243e-06,
"loss": 1.4156148433685303,
"step": 4083
},
{
"epoch": 1.979641299079011,
"grad_norm": 9.562067690414196,
"learning_rate": 3.131616422138636e-06,
"loss": 1.6861294507980347,
"step": 4084
},
{
"epoch": 1.9801260300533206,
"grad_norm": 10.360008534484683,
"learning_rate": 3.1290009092606988e-06,
"loss": 2.1498637199401855,
"step": 4085
},
{
"epoch": 1.9806107610276298,
"grad_norm": 11.186232841329488,
"learning_rate": 3.126385991582761e-06,
"loss": 1.2764662504196167,
"step": 4086
},
{
"epoch": 1.981095492001939,
"grad_norm": 9.100332310325493,
"learning_rate": 3.1237716699366733e-06,
"loss": 1.9387953281402588,
"step": 4087
},
{
"epoch": 1.9815802229762483,
"grad_norm": 8.518365651362023,
"learning_rate": 3.1211579451541012e-06,
"loss": 1.4308803081512451,
"step": 4088
},
{
"epoch": 1.9820649539505575,
"grad_norm": 11.070680803558904,
"learning_rate": 3.11854481806652e-06,
"loss": 1.8865894079208374,
"step": 4089
},
{
"epoch": 1.9825496849248667,
"grad_norm": 8.733780278007176,
"learning_rate": 3.1159322895052135e-06,
"loss": 1.313092827796936,
"step": 4090
},
{
"epoch": 1.983034415899176,
"grad_norm": 20.710079902277165,
"learning_rate": 3.1133203603012787e-06,
"loss": 1.4556174278259277,
"step": 4091
},
{
"epoch": 1.9835191468734852,
"grad_norm": 10.44020238263834,
"learning_rate": 3.110709031285617e-06,
"loss": 1.8028613328933716,
"step": 4092
},
{
"epoch": 1.9840038778477944,
"grad_norm": 13.610814080432037,
"learning_rate": 3.1080983032889427e-06,
"loss": 1.1902108192443848,
"step": 4093
},
{
"epoch": 1.9844886088221037,
"grad_norm": 6.781818374387261,
"learning_rate": 3.1054881771417767e-06,
"loss": 1.4288663864135742,
"step": 4094
},
{
"epoch": 1.9849733397964129,
"grad_norm": 9.030124646483753,
"learning_rate": 3.1028786536744495e-06,
"loss": 1.227708339691162,
"step": 4095
},
{
"epoch": 1.9854580707707221,
"grad_norm": 14.252038763152516,
"learning_rate": 3.1002697337170994e-06,
"loss": 1.5044844150543213,
"step": 4096
},
{
"epoch": 1.9859428017450313,
"grad_norm": 12.472139660182522,
"learning_rate": 3.0976614180996734e-06,
"loss": 1.5200773477554321,
"step": 4097
},
{
"epoch": 1.9864275327193408,
"grad_norm": 13.389196860204574,
"learning_rate": 3.0950537076519262e-06,
"loss": 1.749234914779663,
"step": 4098
},
{
"epoch": 1.98691226369365,
"grad_norm": 10.829910150042824,
"learning_rate": 3.092446603203415e-06,
"loss": 1.7394351959228516,
"step": 4099
},
{
"epoch": 1.9873969946679593,
"grad_norm": 12.134099076257801,
"learning_rate": 3.089840105583516e-06,
"loss": 1.5213429927825928,
"step": 4100
},
{
"epoch": 1.9878817256422685,
"grad_norm": 9.887200749626341,
"learning_rate": 3.0872342156214007e-06,
"loss": 1.6353826522827148,
"step": 4101
},
{
"epoch": 1.988366456616578,
"grad_norm": 10.819689777297608,
"learning_rate": 3.0846289341460533e-06,
"loss": 1.9347128868103027,
"step": 4102
},
{
"epoch": 1.9888511875908872,
"grad_norm": 7.140398375119139,
"learning_rate": 3.0820242619862616e-06,
"loss": 1.2043614387512207,
"step": 4103
},
{
"epoch": 1.9893359185651964,
"grad_norm": 11.555350038421471,
"learning_rate": 3.0794201999706195e-06,
"loss": 2.104130506515503,
"step": 4104
},
{
"epoch": 1.9898206495395057,
"grad_norm": 13.42584170152544,
"learning_rate": 3.0768167489275325e-06,
"loss": 1.8067561388015747,
"step": 4105
},
{
"epoch": 1.990305380513815,
"grad_norm": 12.541330077220824,
"learning_rate": 3.074213909685204e-06,
"loss": 1.7349745035171509,
"step": 4106
},
{
"epoch": 1.9907901114881241,
"grad_norm": 14.95494745059657,
"learning_rate": 3.071611683071647e-06,
"loss": 1.9462093114852905,
"step": 4107
},
{
"epoch": 1.9912748424624334,
"grad_norm": 7.0700617329492905,
"learning_rate": 3.0690100699146784e-06,
"loss": 1.4613916873931885,
"step": 4108
},
{
"epoch": 1.9917595734367426,
"grad_norm": 14.895059346450937,
"learning_rate": 3.066409071041919e-06,
"loss": 1.7253888845443726,
"step": 4109
},
{
"epoch": 1.9922443044110518,
"grad_norm": 13.79651238476077,
"learning_rate": 3.0638086872807987e-06,
"loss": 2.047419309616089,
"step": 4110
},
{
"epoch": 1.992729035385361,
"grad_norm": 16.377534804405904,
"learning_rate": 3.061208919458546e-06,
"loss": 1.167083978652954,
"step": 4111
},
{
"epoch": 1.9932137663596703,
"grad_norm": 8.672740575692044,
"learning_rate": 3.0586097684021976e-06,
"loss": 1.455622673034668,
"step": 4112
},
{
"epoch": 1.9936984973339795,
"grad_norm": 16.020198742833117,
"learning_rate": 3.0560112349385895e-06,
"loss": 1.4970701932907104,
"step": 4113
},
{
"epoch": 1.9941832283082888,
"grad_norm": 9.907147625598189,
"learning_rate": 3.0534133198943673e-06,
"loss": 1.0073597431182861,
"step": 4114
},
{
"epoch": 1.9946679592825982,
"grad_norm": 7.956387873162489,
"learning_rate": 3.050816024095975e-06,
"loss": 1.4039490222930908,
"step": 4115
},
{
"epoch": 1.9951526902569074,
"grad_norm": 12.202273793960506,
"learning_rate": 3.0482193483696615e-06,
"loss": 1.933135747909546,
"step": 4116
},
{
"epoch": 1.9956374212312167,
"grad_norm": 7.514585777223179,
"learning_rate": 3.045623293541479e-06,
"loss": 1.8027403354644775,
"step": 4117
},
{
"epoch": 1.996122152205526,
"grad_norm": 7.945138581087314,
"learning_rate": 3.0430278604372776e-06,
"loss": 1.5283968448638916,
"step": 4118
},
{
"epoch": 1.9966068831798351,
"grad_norm": 11.328604239521537,
"learning_rate": 3.0404330498827185e-06,
"loss": 1.6343646049499512,
"step": 4119
},
{
"epoch": 1.9970916141541446,
"grad_norm": 5.755805637850513,
"learning_rate": 3.037838862703258e-06,
"loss": 1.4706766605377197,
"step": 4120
},
{
"epoch": 1.9975763451284538,
"grad_norm": 11.78785907847224,
"learning_rate": 3.0352452997241554e-06,
"loss": 1.326154351234436,
"step": 4121
},
{
"epoch": 1.998061076102763,
"grad_norm": 14.209429659436074,
"learning_rate": 3.032652361770472e-06,
"loss": 2.0569825172424316,
"step": 4122
},
{
"epoch": 1.9985458070770723,
"grad_norm": 12.674508538637363,
"learning_rate": 3.0300600496670666e-06,
"loss": 1.7905054092407227,
"step": 4123
},
{
"epoch": 1.9990305380513815,
"grad_norm": 7.421309777299854,
"learning_rate": 3.0274683642386094e-06,
"loss": 1.9077792167663574,
"step": 4124
},
{
"epoch": 1.9995152690256908,
"grad_norm": 15.990154427331758,
"learning_rate": 3.0248773063095606e-06,
"loss": 1.8959567546844482,
"step": 4125
},
{
"epoch": 2.0,
"grad_norm": 11.338313280770219,
"learning_rate": 3.022286876704183e-06,
"loss": 1.2083970308303833,
"step": 4126
},
{
"epoch": 2.0004847309743092,
"grad_norm": 6.5516496222163925,
"learning_rate": 3.019697076246542e-06,
"loss": 0.3800406754016876,
"step": 4127
},
{
"epoch": 2.0009694619486185,
"grad_norm": 11.174600466857019,
"learning_rate": 3.0171079057605035e-06,
"loss": 0.44570815563201904,
"step": 4128
},
{
"epoch": 2.0014541929229277,
"grad_norm": 9.669986382940241,
"learning_rate": 3.01451936606973e-06,
"loss": 0.593719482421875,
"step": 4129
},
{
"epoch": 2.001938923897237,
"grad_norm": 9.772874967715861,
"learning_rate": 3.0119314579976854e-06,
"loss": 0.6425056457519531,
"step": 4130
},
{
"epoch": 2.002423654871546,
"grad_norm": 13.822219761935818,
"learning_rate": 3.0093441823676306e-06,
"loss": 0.7986268997192383,
"step": 4131
},
{
"epoch": 2.0029083858458554,
"grad_norm": 9.012319948740835,
"learning_rate": 3.006757540002626e-06,
"loss": 0.4169493615627289,
"step": 4132
},
{
"epoch": 2.0033931168201646,
"grad_norm": 9.71113905043614,
"learning_rate": 3.0041715317255343e-06,
"loss": 0.4634905457496643,
"step": 4133
},
{
"epoch": 2.003877847794474,
"grad_norm": 6.721432164981929,
"learning_rate": 3.0015861583590113e-06,
"loss": 0.5192229747772217,
"step": 4134
},
{
"epoch": 2.0043625787687835,
"grad_norm": 9.020812029434705,
"learning_rate": 2.9990014207255134e-06,
"loss": 1.1851242780685425,
"step": 4135
},
{
"epoch": 2.0048473097430928,
"grad_norm": 10.953067816792382,
"learning_rate": 2.996417319647295e-06,
"loss": 0.38821670413017273,
"step": 4136
},
{
"epoch": 2.005332040717402,
"grad_norm": 14.851490322907749,
"learning_rate": 2.993833855946403e-06,
"loss": 1.069528579711914,
"step": 4137
},
{
"epoch": 2.0058167716917112,
"grad_norm": 13.149740102672396,
"learning_rate": 2.991251030444692e-06,
"loss": 0.8650757670402527,
"step": 4138
},
{
"epoch": 2.0063015026660205,
"grad_norm": 10.217880303990999,
"learning_rate": 2.988668843963804e-06,
"loss": 0.5563746690750122,
"step": 4139
},
{
"epoch": 2.0067862336403297,
"grad_norm": 11.132095093105589,
"learning_rate": 2.9860872973251815e-06,
"loss": 0.29935428500175476,
"step": 4140
},
{
"epoch": 2.007270964614639,
"grad_norm": 17.357224606969446,
"learning_rate": 2.9835063913500604e-06,
"loss": 0.9707812070846558,
"step": 4141
},
{
"epoch": 2.007755695588948,
"grad_norm": 12.125709537002281,
"learning_rate": 2.9809261268594803e-06,
"loss": 0.3189229369163513,
"step": 4142
},
{
"epoch": 2.0082404265632574,
"grad_norm": 12.600046018092108,
"learning_rate": 2.978346504674271e-06,
"loss": 1.0298067331314087,
"step": 4143
},
{
"epoch": 2.0087251575375666,
"grad_norm": 9.628254241101784,
"learning_rate": 2.9757675256150562e-06,
"loss": 0.4764339327812195,
"step": 4144
},
{
"epoch": 2.009209888511876,
"grad_norm": 15.534871830190156,
"learning_rate": 2.9731891905022593e-06,
"loss": 1.110782265663147,
"step": 4145
},
{
"epoch": 2.009694619486185,
"grad_norm": 11.856972575090461,
"learning_rate": 2.9706115001560952e-06,
"loss": 0.4650868773460388,
"step": 4146
},
{
"epoch": 2.0101793504604943,
"grad_norm": 15.671847891480768,
"learning_rate": 2.9680344553965782e-06,
"loss": 0.3473857641220093,
"step": 4147
},
{
"epoch": 2.0106640814348036,
"grad_norm": 13.81243486265362,
"learning_rate": 2.9654580570435142e-06,
"loss": 0.707101583480835,
"step": 4148
},
{
"epoch": 2.011148812409113,
"grad_norm": 27.120002611975405,
"learning_rate": 2.9628823059165033e-06,
"loss": 1.2089945077896118,
"step": 4149
},
{
"epoch": 2.011633543383422,
"grad_norm": 15.2964059308606,
"learning_rate": 2.960307202834941e-06,
"loss": 0.6468220949172974,
"step": 4150
},
{
"epoch": 2.0121182743577313,
"grad_norm": 12.913556826372679,
"learning_rate": 2.957732748618014e-06,
"loss": 0.7934861779212952,
"step": 4151
},
{
"epoch": 2.0126030053320405,
"grad_norm": 9.38430993629848,
"learning_rate": 2.9551589440847074e-06,
"loss": 0.33546918630599976,
"step": 4152
},
{
"epoch": 2.01308773630635,
"grad_norm": 14.79278006214687,
"learning_rate": 2.9525857900537957e-06,
"loss": 0.6313265562057495,
"step": 4153
},
{
"epoch": 2.0135724672806594,
"grad_norm": 18.45752260205983,
"learning_rate": 2.950013287343848e-06,
"loss": 0.518038272857666,
"step": 4154
},
{
"epoch": 2.0140571982549687,
"grad_norm": 6.424570646888725,
"learning_rate": 2.947441436773224e-06,
"loss": 0.19213822484016418,
"step": 4155
},
{
"epoch": 2.014541929229278,
"grad_norm": 10.299452250499243,
"learning_rate": 2.9448702391600804e-06,
"loss": 0.5293344259262085,
"step": 4156
},
{
"epoch": 2.015026660203587,
"grad_norm": 7.515218009097645,
"learning_rate": 2.9422996953223613e-06,
"loss": 0.33286699652671814,
"step": 4157
},
{
"epoch": 2.0155113911778963,
"grad_norm": 10.996845426490637,
"learning_rate": 2.9397298060778075e-06,
"loss": 0.4284389615058899,
"step": 4158
},
{
"epoch": 2.0159961221522056,
"grad_norm": 8.305766580981967,
"learning_rate": 2.9371605722439465e-06,
"loss": 0.24357056617736816,
"step": 4159
},
{
"epoch": 2.016480853126515,
"grad_norm": 16.780286882147994,
"learning_rate": 2.9345919946380985e-06,
"loss": 1.2170867919921875,
"step": 4160
},
{
"epoch": 2.016965584100824,
"grad_norm": 7.392691238705047,
"learning_rate": 2.932024074077382e-06,
"loss": 0.21354785561561584,
"step": 4161
},
{
"epoch": 2.0174503150751333,
"grad_norm": 10.978762786048716,
"learning_rate": 2.9294568113786968e-06,
"loss": 1.4143675565719604,
"step": 4162
},
{
"epoch": 2.0179350460494425,
"grad_norm": 14.627928611138925,
"learning_rate": 2.926890207358738e-06,
"loss": 1.0946059226989746,
"step": 4163
},
{
"epoch": 2.0184197770237517,
"grad_norm": 15.513089167200235,
"learning_rate": 2.9243242628339906e-06,
"loss": 0.3778882622718811,
"step": 4164
},
{
"epoch": 2.018904507998061,
"grad_norm": 10.939596748748999,
"learning_rate": 2.9217589786207296e-06,
"loss": 0.6775059103965759,
"step": 4165
},
{
"epoch": 2.01938923897237,
"grad_norm": 12.450176831862922,
"learning_rate": 2.9191943555350198e-06,
"loss": 0.39204275608062744,
"step": 4166
},
{
"epoch": 2.0198739699466794,
"grad_norm": 11.677269993036994,
"learning_rate": 2.916630394392719e-06,
"loss": 0.4509889781475067,
"step": 4167
},
{
"epoch": 2.0203587009209887,
"grad_norm": 16.38258752528308,
"learning_rate": 2.91406709600947e-06,
"loss": 0.9301784038543701,
"step": 4168
},
{
"epoch": 2.020843431895298,
"grad_norm": 8.913635203413303,
"learning_rate": 2.9115044612007044e-06,
"loss": 0.349587082862854,
"step": 4169
},
{
"epoch": 2.021328162869607,
"grad_norm": 12.869656930999163,
"learning_rate": 2.9089424907816433e-06,
"loss": 0.5335432291030884,
"step": 4170
},
{
"epoch": 2.021812893843917,
"grad_norm": 9.973708316895788,
"learning_rate": 2.9063811855673017e-06,
"loss": 0.7384313941001892,
"step": 4171
},
{
"epoch": 2.022297624818226,
"grad_norm": 11.059786154006858,
"learning_rate": 2.903820546372478e-06,
"loss": 0.3957206904888153,
"step": 4172
},
{
"epoch": 2.0227823557925353,
"grad_norm": 14.687272093581145,
"learning_rate": 2.9012605740117585e-06,
"loss": 0.36009371280670166,
"step": 4173
},
{
"epoch": 2.0232670867668445,
"grad_norm": 10.27953641124229,
"learning_rate": 2.8987012692995186e-06,
"loss": 0.8456242084503174,
"step": 4174
},
{
"epoch": 2.0237518177411538,
"grad_norm": 7.656101980650777,
"learning_rate": 2.896142633049922e-06,
"loss": 0.36282098293304443,
"step": 4175
},
{
"epoch": 2.024236548715463,
"grad_norm": 10.948258241420541,
"learning_rate": 2.8935846660769184e-06,
"loss": 0.33220958709716797,
"step": 4176
},
{
"epoch": 2.0247212796897722,
"grad_norm": 11.828403286577503,
"learning_rate": 2.891027369194246e-06,
"loss": 0.3301085829734802,
"step": 4177
},
{
"epoch": 2.0252060106640815,
"grad_norm": 15.713488461972153,
"learning_rate": 2.8884707432154274e-06,
"loss": 0.5742039084434509,
"step": 4178
},
{
"epoch": 2.0256907416383907,
"grad_norm": 10.696097454995385,
"learning_rate": 2.885914788953773e-06,
"loss": 0.7552458643913269,
"step": 4179
},
{
"epoch": 2.0261754726127,
"grad_norm": 7.245938884126019,
"learning_rate": 2.8833595072223842e-06,
"loss": 0.2407374382019043,
"step": 4180
},
{
"epoch": 2.026660203587009,
"grad_norm": 8.970587134957762,
"learning_rate": 2.8808048988341405e-06,
"loss": 0.4623566269874573,
"step": 4181
},
{
"epoch": 2.0271449345613184,
"grad_norm": 16.963313662902507,
"learning_rate": 2.878250964601712e-06,
"loss": 0.3059930205345154,
"step": 4182
},
{
"epoch": 2.0276296655356276,
"grad_norm": 12.330147111689543,
"learning_rate": 2.8756977053375544e-06,
"loss": 0.4601826071739197,
"step": 4183
},
{
"epoch": 2.028114396509937,
"grad_norm": 14.852844552224889,
"learning_rate": 2.873145121853903e-06,
"loss": 0.5393328070640564,
"step": 4184
},
{
"epoch": 2.028599127484246,
"grad_norm": 14.993324996166212,
"learning_rate": 2.870593214962787e-06,
"loss": 0.782923698425293,
"step": 4185
},
{
"epoch": 2.0290838584585553,
"grad_norm": 15.167402638999825,
"learning_rate": 2.8680419854760144e-06,
"loss": 0.8374148607254028,
"step": 4186
},
{
"epoch": 2.0295685894328646,
"grad_norm": 9.947655821013146,
"learning_rate": 2.8654914342051797e-06,
"loss": 0.29379305243492126,
"step": 4187
},
{
"epoch": 2.0300533204071742,
"grad_norm": 11.29640125049261,
"learning_rate": 2.8629415619616594e-06,
"loss": 0.39982444047927856,
"step": 4188
},
{
"epoch": 2.0305380513814835,
"grad_norm": 8.912076439348475,
"learning_rate": 2.8603923695566196e-06,
"loss": 0.5612793564796448,
"step": 4189
},
{
"epoch": 2.0310227823557927,
"grad_norm": 11.021160357084044,
"learning_rate": 2.8578438578010053e-06,
"loss": 0.46551287174224854,
"step": 4190
},
{
"epoch": 2.031507513330102,
"grad_norm": 12.853809202439889,
"learning_rate": 2.8552960275055437e-06,
"loss": 0.3544050455093384,
"step": 4191
},
{
"epoch": 2.031992244304411,
"grad_norm": 8.636043673646121,
"learning_rate": 2.8527488794807477e-06,
"loss": 0.5800259113311768,
"step": 4192
},
{
"epoch": 2.0324769752787204,
"grad_norm": 9.591144946228829,
"learning_rate": 2.8502024145369133e-06,
"loss": 0.40963223576545715,
"step": 4193
},
{
"epoch": 2.0329617062530296,
"grad_norm": 11.29335008887198,
"learning_rate": 2.8476566334841204e-06,
"loss": 0.5956078171730042,
"step": 4194
},
{
"epoch": 2.033446437227339,
"grad_norm": 11.319221551239185,
"learning_rate": 2.8451115371322302e-06,
"loss": 0.6884644627571106,
"step": 4195
},
{
"epoch": 2.033931168201648,
"grad_norm": 14.782142207415914,
"learning_rate": 2.842567126290884e-06,
"loss": 0.8981316685676575,
"step": 4196
},
{
"epoch": 2.0344158991759573,
"grad_norm": 11.731304273516814,
"learning_rate": 2.8400234017695074e-06,
"loss": 1.070948839187622,
"step": 4197
},
{
"epoch": 2.0349006301502666,
"grad_norm": 12.20426274120406,
"learning_rate": 2.8374803643773065e-06,
"loss": 0.3594021797180176,
"step": 4198
},
{
"epoch": 2.035385361124576,
"grad_norm": 21.74044486889264,
"learning_rate": 2.8349380149232695e-06,
"loss": 1.3570621013641357,
"step": 4199
},
{
"epoch": 2.035870092098885,
"grad_norm": 16.114185009936218,
"learning_rate": 2.8323963542161665e-06,
"loss": 0.863288402557373,
"step": 4200
},
{
"epoch": 2.0363548230731943,
"grad_norm": 11.389072111159196,
"learning_rate": 2.8298553830645463e-06,
"loss": 0.414512038230896,
"step": 4201
},
{
"epoch": 2.0368395540475035,
"grad_norm": 10.23361241151572,
"learning_rate": 2.8273151022767387e-06,
"loss": 0.7795964479446411,
"step": 4202
},
{
"epoch": 2.0373242850218127,
"grad_norm": 7.001034939958751,
"learning_rate": 2.824775512660858e-06,
"loss": 0.444904088973999,
"step": 4203
},
{
"epoch": 2.037809015996122,
"grad_norm": 7.835088773698179,
"learning_rate": 2.8222366150247933e-06,
"loss": 0.46037107706069946,
"step": 4204
},
{
"epoch": 2.038293746970431,
"grad_norm": 8.55880699479888,
"learning_rate": 2.8196984101762182e-06,
"loss": 0.3479774594306946,
"step": 4205
},
{
"epoch": 2.038778477944741,
"grad_norm": 9.00410632969007,
"learning_rate": 2.8171608989225795e-06,
"loss": 0.2767934501171112,
"step": 4206
},
{
"epoch": 2.03926320891905,
"grad_norm": 12.816607953968319,
"learning_rate": 2.8146240820711067e-06,
"loss": 0.9970000386238098,
"step": 4207
},
{
"epoch": 2.0397479398933593,
"grad_norm": 14.69644953609646,
"learning_rate": 2.812087960428813e-06,
"loss": 0.441932737827301,
"step": 4208
},
{
"epoch": 2.0402326708676686,
"grad_norm": 14.23073527071385,
"learning_rate": 2.8095525348024855e-06,
"loss": 0.3549739122390747,
"step": 4209
},
{
"epoch": 2.040717401841978,
"grad_norm": 47.842588960207586,
"learning_rate": 2.807017805998689e-06,
"loss": 0.48839473724365234,
"step": 4210
},
{
"epoch": 2.041202132816287,
"grad_norm": 11.745055636686317,
"learning_rate": 2.80448377482377e-06,
"loss": 0.7453871965408325,
"step": 4211
},
{
"epoch": 2.0416868637905963,
"grad_norm": 10.123695676675931,
"learning_rate": 2.80195044208385e-06,
"loss": 0.20357847213745117,
"step": 4212
},
{
"epoch": 2.0421715947649055,
"grad_norm": 13.680568500511269,
"learning_rate": 2.799417808584831e-06,
"loss": 0.34398266673088074,
"step": 4213
},
{
"epoch": 2.0426563257392147,
"grad_norm": 8.224270646598162,
"learning_rate": 2.7968858751323912e-06,
"loss": 0.28643473982810974,
"step": 4214
},
{
"epoch": 2.043141056713524,
"grad_norm": 9.551110244296776,
"learning_rate": 2.7943546425319857e-06,
"loss": 0.5298528671264648,
"step": 4215
},
{
"epoch": 2.043625787687833,
"grad_norm": 12.620353679191814,
"learning_rate": 2.791824111588845e-06,
"loss": 0.7016066312789917,
"step": 4216
},
{
"epoch": 2.0441105186621424,
"grad_norm": 35.466387564109255,
"learning_rate": 2.7892942831079834e-06,
"loss": 0.5559395551681519,
"step": 4217
},
{
"epoch": 2.0445952496364517,
"grad_norm": 10.305037462990345,
"learning_rate": 2.7867651578941846e-06,
"loss": 0.22680407762527466,
"step": 4218
},
{
"epoch": 2.045079980610761,
"grad_norm": 8.777311702753128,
"learning_rate": 2.7842367367520105e-06,
"loss": 0.3798913359642029,
"step": 4219
},
{
"epoch": 2.04556471158507,
"grad_norm": 16.873261099307626,
"learning_rate": 2.7817090204857997e-06,
"loss": 2.654900074005127,
"step": 4220
},
{
"epoch": 2.0460494425593794,
"grad_norm": 9.356920272107251,
"learning_rate": 2.779182009899667e-06,
"loss": 0.6559471487998962,
"step": 4221
},
{
"epoch": 2.0465341735336886,
"grad_norm": 12.503150282176328,
"learning_rate": 2.7766557057975008e-06,
"loss": 0.6401382684707642,
"step": 4222
},
{
"epoch": 2.047018904507998,
"grad_norm": 10.24514641248502,
"learning_rate": 2.7741301089829663e-06,
"loss": 0.7320972681045532,
"step": 4223
},
{
"epoch": 2.0475036354823075,
"grad_norm": 10.632157741432682,
"learning_rate": 2.771605220259504e-06,
"loss": 0.3971707820892334,
"step": 4224
},
{
"epoch": 2.0479883664566167,
"grad_norm": 11.737261777217396,
"learning_rate": 2.7690810404303276e-06,
"loss": 0.47302764654159546,
"step": 4225
},
{
"epoch": 2.048473097430926,
"grad_norm": 9.525603276035644,
"learning_rate": 2.766557570298425e-06,
"loss": 0.24325141310691833,
"step": 4226
},
{
"epoch": 2.048957828405235,
"grad_norm": 12.997368461500972,
"learning_rate": 2.764034810666565e-06,
"loss": 0.9079539179801941,
"step": 4227
},
{
"epoch": 2.0494425593795444,
"grad_norm": 11.082865544502347,
"learning_rate": 2.7615127623372784e-06,
"loss": 0.6476845741271973,
"step": 4228
},
{
"epoch": 2.0499272903538537,
"grad_norm": 10.300529382454034,
"learning_rate": 2.7589914261128788e-06,
"loss": 0.808599591255188,
"step": 4229
},
{
"epoch": 2.050412021328163,
"grad_norm": 13.845868797764252,
"learning_rate": 2.756470802795449e-06,
"loss": 0.41432979702949524,
"step": 4230
},
{
"epoch": 2.050896752302472,
"grad_norm": 10.32780877496349,
"learning_rate": 2.75395089318685e-06,
"loss": 0.8118299841880798,
"step": 4231
},
{
"epoch": 2.0513814832767814,
"grad_norm": 12.433128068987338,
"learning_rate": 2.7514316980887106e-06,
"loss": 0.34646645188331604,
"step": 4232
},
{
"epoch": 2.0518662142510906,
"grad_norm": 10.513164421158702,
"learning_rate": 2.7489132183024347e-06,
"loss": 0.7360924482345581,
"step": 4233
},
{
"epoch": 2.0523509452254,
"grad_norm": 8.481025666839615,
"learning_rate": 2.746395454629197e-06,
"loss": 0.31484752893447876,
"step": 4234
},
{
"epoch": 2.052835676199709,
"grad_norm": 7.636302258963155,
"learning_rate": 2.743878407869947e-06,
"loss": 0.5853752493858337,
"step": 4235
},
{
"epoch": 2.0533204071740183,
"grad_norm": 12.817481949693882,
"learning_rate": 2.7413620788254035e-06,
"loss": 0.6761990785598755,
"step": 4236
},
{
"epoch": 2.0538051381483275,
"grad_norm": 25.949480947306007,
"learning_rate": 2.738846468296058e-06,
"loss": 0.7668818235397339,
"step": 4237
},
{
"epoch": 2.054289869122637,
"grad_norm": 8.742565477401518,
"learning_rate": 2.736331577082174e-06,
"loss": 0.79445880651474,
"step": 4238
},
{
"epoch": 2.054774600096946,
"grad_norm": 8.481590109191322,
"learning_rate": 2.733817405983785e-06,
"loss": 0.5159662365913391,
"step": 4239
},
{
"epoch": 2.0552593310712552,
"grad_norm": 21.90977096674965,
"learning_rate": 2.7313039558006952e-06,
"loss": 1.5501304864883423,
"step": 4240
},
{
"epoch": 2.055744062045565,
"grad_norm": 6.3375744356559975,
"learning_rate": 2.7287912273324842e-06,
"loss": 0.18871022760868073,
"step": 4241
},
{
"epoch": 2.056228793019874,
"grad_norm": 10.402300667464196,
"learning_rate": 2.726279221378495e-06,
"loss": 0.9462136030197144,
"step": 4242
},
{
"epoch": 2.0567135239941834,
"grad_norm": 11.053940117235463,
"learning_rate": 2.723767938737847e-06,
"loss": 0.6428258419036865,
"step": 4243
},
{
"epoch": 2.0571982549684926,
"grad_norm": 9.775211486570127,
"learning_rate": 2.72125738020942e-06,
"loss": 0.26116061210632324,
"step": 4244
},
{
"epoch": 2.057682985942802,
"grad_norm": 9.42576562777616,
"learning_rate": 2.7187475465918768e-06,
"loss": 0.33411794900894165,
"step": 4245
},
{
"epoch": 2.058167716917111,
"grad_norm": 11.988914003682657,
"learning_rate": 2.7162384386836393e-06,
"loss": 0.4598311185836792,
"step": 4246
},
{
"epoch": 2.0586524478914203,
"grad_norm": 10.13688959995465,
"learning_rate": 2.7137300572829023e-06,
"loss": 0.37994831800460815,
"step": 4247
},
{
"epoch": 2.0591371788657296,
"grad_norm": 11.148361431912265,
"learning_rate": 2.7112224031876293e-06,
"loss": 0.5522850155830383,
"step": 4248
},
{
"epoch": 2.059621909840039,
"grad_norm": 9.150838635859301,
"learning_rate": 2.7087154771955525e-06,
"loss": 0.44318923354148865,
"step": 4249
},
{
"epoch": 2.060106640814348,
"grad_norm": 8.533889390967474,
"learning_rate": 2.7062092801041717e-06,
"loss": 0.3271547555923462,
"step": 4250
},
{
"epoch": 2.0605913717886573,
"grad_norm": 6.3916999296488095,
"learning_rate": 2.7037038127107563e-06,
"loss": 0.12867851555347443,
"step": 4251
},
{
"epoch": 2.0610761027629665,
"grad_norm": 15.378963791352684,
"learning_rate": 2.7011990758123412e-06,
"loss": 1.0349328517913818,
"step": 4252
},
{
"epoch": 2.0615608337372757,
"grad_norm": 23.726641042091558,
"learning_rate": 2.6986950702057314e-06,
"loss": 2.2486636638641357,
"step": 4253
},
{
"epoch": 2.062045564711585,
"grad_norm": 8.603112314019251,
"learning_rate": 2.696191796687495e-06,
"loss": 0.49494248628616333,
"step": 4254
},
{
"epoch": 2.062530295685894,
"grad_norm": 16.96908774435314,
"learning_rate": 2.693689256053976e-06,
"loss": 1.0243817567825317,
"step": 4255
},
{
"epoch": 2.0630150266602034,
"grad_norm": 11.330463436895753,
"learning_rate": 2.6911874491012766e-06,
"loss": 0.6004877090454102,
"step": 4256
},
{
"epoch": 2.0634997576345127,
"grad_norm": 10.322348171141007,
"learning_rate": 2.6886863766252686e-06,
"loss": 0.4225353002548218,
"step": 4257
},
{
"epoch": 2.0639844886088223,
"grad_norm": 14.655630802829876,
"learning_rate": 2.68618603942159e-06,
"loss": 0.5107121467590332,
"step": 4258
},
{
"epoch": 2.0644692195831316,
"grad_norm": 12.361740733647405,
"learning_rate": 2.6836864382856466e-06,
"loss": 0.5796098113059998,
"step": 4259
},
{
"epoch": 2.064953950557441,
"grad_norm": 13.156519315251892,
"learning_rate": 2.6811875740126063e-06,
"loss": 0.34799376130104065,
"step": 4260
},
{
"epoch": 2.06543868153175,
"grad_norm": 12.7465494931956,
"learning_rate": 2.678689447397407e-06,
"loss": 0.34838199615478516,
"step": 4261
},
{
"epoch": 2.0659234125060593,
"grad_norm": 8.124079914210611,
"learning_rate": 2.6761920592347475e-06,
"loss": 0.27010807394981384,
"step": 4262
},
{
"epoch": 2.0664081434803685,
"grad_norm": 7.6978386759567945,
"learning_rate": 2.673695410319094e-06,
"loss": 0.32837921380996704,
"step": 4263
},
{
"epoch": 2.0668928744546777,
"grad_norm": 10.624126674247423,
"learning_rate": 2.67119950144468e-06,
"loss": 0.7727179527282715,
"step": 4264
},
{
"epoch": 2.067377605428987,
"grad_norm": 9.578839624173002,
"learning_rate": 2.6687043334055017e-06,
"loss": 0.44907253980636597,
"step": 4265
},
{
"epoch": 2.067862336403296,
"grad_norm": 11.924831410654619,
"learning_rate": 2.666209906995315e-06,
"loss": 0.8042439818382263,
"step": 4266
},
{
"epoch": 2.0683470673776054,
"grad_norm": 10.234871141726002,
"learning_rate": 2.6637162230076463e-06,
"loss": 0.6248884201049805,
"step": 4267
},
{
"epoch": 2.0688317983519147,
"grad_norm": 7.99583884886672,
"learning_rate": 2.6612232822357805e-06,
"loss": 0.6333153247833252,
"step": 4268
},
{
"epoch": 2.069316529326224,
"grad_norm": 13.19369206775872,
"learning_rate": 2.658731085472773e-06,
"loss": 0.2571144104003906,
"step": 4269
},
{
"epoch": 2.069801260300533,
"grad_norm": 12.629775820389082,
"learning_rate": 2.656239633511437e-06,
"loss": 1.0051394701004028,
"step": 4270
},
{
"epoch": 2.0702859912748424,
"grad_norm": 8.271341435490546,
"learning_rate": 2.65374892714435e-06,
"loss": 0.823790967464447,
"step": 4271
},
{
"epoch": 2.0707707222491516,
"grad_norm": 13.488929443362338,
"learning_rate": 2.651258967163853e-06,
"loss": 0.3491865396499634,
"step": 4272
},
{
"epoch": 2.071255453223461,
"grad_norm": 9.663216587209812,
"learning_rate": 2.648769754362048e-06,
"loss": 0.4834080636501312,
"step": 4273
},
{
"epoch": 2.07174018419777,
"grad_norm": 9.303444334655223,
"learning_rate": 2.646281289530801e-06,
"loss": 0.28869837522506714,
"step": 4274
},
{
"epoch": 2.0722249151720793,
"grad_norm": 10.667017709120174,
"learning_rate": 2.643793573461739e-06,
"loss": 0.42517879605293274,
"step": 4275
},
{
"epoch": 2.0727096461463885,
"grad_norm": 20.00303925865765,
"learning_rate": 2.6413066069462527e-06,
"loss": 0.5779356956481934,
"step": 4276
},
{
"epoch": 2.073194377120698,
"grad_norm": 11.583765524248479,
"learning_rate": 2.6388203907754893e-06,
"loss": 0.40583837032318115,
"step": 4277
},
{
"epoch": 2.0736791080950074,
"grad_norm": 18.3649562690795,
"learning_rate": 2.6363349257403657e-06,
"loss": 0.4433738589286804,
"step": 4278
},
{
"epoch": 2.0741638390693167,
"grad_norm": 8.655339729246917,
"learning_rate": 2.6338502126315534e-06,
"loss": 0.47724759578704834,
"step": 4279
},
{
"epoch": 2.074648570043626,
"grad_norm": 12.182203229580338,
"learning_rate": 2.631366252239488e-06,
"loss": 0.5201858282089233,
"step": 4280
},
{
"epoch": 2.075133301017935,
"grad_norm": 10.038927229154748,
"learning_rate": 2.628883045354359e-06,
"loss": 0.3217492997646332,
"step": 4281
},
{
"epoch": 2.0756180319922444,
"grad_norm": 13.369273902959327,
"learning_rate": 2.626400592766123e-06,
"loss": 0.5592190027236938,
"step": 4282
},
{
"epoch": 2.0761027629665536,
"grad_norm": 16.09584979923174,
"learning_rate": 2.623918895264498e-06,
"loss": 0.3836628198623657,
"step": 4283
},
{
"epoch": 2.076587493940863,
"grad_norm": 17.094827296833156,
"learning_rate": 2.6214379536389554e-06,
"loss": 1.206301212310791,
"step": 4284
},
{
"epoch": 2.077072224915172,
"grad_norm": 10.900322656169116,
"learning_rate": 2.6189577686787317e-06,
"loss": 0.6401437520980835,
"step": 4285
},
{
"epoch": 2.0775569558894813,
"grad_norm": 14.150521971767581,
"learning_rate": 2.616478341172817e-06,
"loss": 0.6126570105552673,
"step": 4286
},
{
"epoch": 2.0780416868637905,
"grad_norm": 9.932247153426667,
"learning_rate": 2.61399967190997e-06,
"loss": 0.3142251968383789,
"step": 4287
},
{
"epoch": 2.0785264178380998,
"grad_norm": 6.388778546870824,
"learning_rate": 2.611521761678697e-06,
"loss": 0.45337268710136414,
"step": 4288
},
{
"epoch": 2.079011148812409,
"grad_norm": 6.985732910185184,
"learning_rate": 2.6090446112672678e-06,
"loss": 0.5191164016723633,
"step": 4289
},
{
"epoch": 2.0794958797867182,
"grad_norm": 14.371579814527715,
"learning_rate": 2.6065682214637124e-06,
"loss": 0.4278711676597595,
"step": 4290
},
{
"epoch": 2.0799806107610275,
"grad_norm": 9.833440536749464,
"learning_rate": 2.6040925930558134e-06,
"loss": 0.28957682847976685,
"step": 4291
},
{
"epoch": 2.0804653417353367,
"grad_norm": 9.942033918359758,
"learning_rate": 2.6016177268311205e-06,
"loss": 0.68109530210495,
"step": 4292
},
{
"epoch": 2.080950072709646,
"grad_norm": 19.011645497314074,
"learning_rate": 2.5991436235769317e-06,
"loss": 0.7632704973220825,
"step": 4293
},
{
"epoch": 2.0814348036839556,
"grad_norm": 7.709503866458775,
"learning_rate": 2.596670284080307e-06,
"loss": 0.4049154818058014,
"step": 4294
},
{
"epoch": 2.081919534658265,
"grad_norm": 11.27614005101795,
"learning_rate": 2.5941977091280614e-06,
"loss": 0.5537459254264832,
"step": 4295
},
{
"epoch": 2.082404265632574,
"grad_norm": 13.147846333433224,
"learning_rate": 2.5917258995067672e-06,
"loss": 0.29643988609313965,
"step": 4296
},
{
"epoch": 2.0828889966068833,
"grad_norm": 9.55781970130817,
"learning_rate": 2.5892548560027546e-06,
"loss": 0.47218966484069824,
"step": 4297
},
{
"epoch": 2.0833737275811925,
"grad_norm": 11.898186435520358,
"learning_rate": 2.5867845794021086e-06,
"loss": 0.5330972075462341,
"step": 4298
},
{
"epoch": 2.083858458555502,
"grad_norm": 9.588356025529206,
"learning_rate": 2.5843150704906695e-06,
"loss": 0.21934130787849426,
"step": 4299
},
{
"epoch": 2.084343189529811,
"grad_norm": 20.2521905942998,
"learning_rate": 2.581846330054034e-06,
"loss": 0.4614270329475403,
"step": 4300
},
{
"epoch": 2.0848279205041202,
"grad_norm": 11.487774750157497,
"learning_rate": 2.579378358877558e-06,
"loss": 0.4133484363555908,
"step": 4301
},
{
"epoch": 2.0853126514784295,
"grad_norm": 8.699178404239557,
"learning_rate": 2.5769111577463503e-06,
"loss": 0.16288897395133972,
"step": 4302
},
{
"epoch": 2.0857973824527387,
"grad_norm": 12.873524090918869,
"learning_rate": 2.5744447274452696e-06,
"loss": 0.5984461307525635,
"step": 4303
},
{
"epoch": 2.086282113427048,
"grad_norm": 11.645815845312722,
"learning_rate": 2.571979068758935e-06,
"loss": 0.26501691341400146,
"step": 4304
},
{
"epoch": 2.086766844401357,
"grad_norm": 9.485938456223,
"learning_rate": 2.5695141824717183e-06,
"loss": 0.5574601292610168,
"step": 4305
},
{
"epoch": 2.0872515753756664,
"grad_norm": 19.39715112504949,
"learning_rate": 2.5670500693677495e-06,
"loss": 1.0685298442840576,
"step": 4306
},
{
"epoch": 2.0877363063499756,
"grad_norm": 11.932853354932151,
"learning_rate": 2.564586730230907e-06,
"loss": 0.7756215929985046,
"step": 4307
},
{
"epoch": 2.088221037324285,
"grad_norm": 17.300578224116872,
"learning_rate": 2.562124165844826e-06,
"loss": 0.3351747989654541,
"step": 4308
},
{
"epoch": 2.088705768298594,
"grad_norm": 13.720002591235676,
"learning_rate": 2.5596623769928943e-06,
"loss": 0.4206147789955139,
"step": 4309
},
{
"epoch": 2.0891904992729033,
"grad_norm": 11.077415342077765,
"learning_rate": 2.557201364458252e-06,
"loss": 0.3672848045825958,
"step": 4310
},
{
"epoch": 2.089675230247213,
"grad_norm": 15.326605952124694,
"learning_rate": 2.5547411290237956e-06,
"loss": 0.4754721522331238,
"step": 4311
},
{
"epoch": 2.0901599612215223,
"grad_norm": 6.864188835387159,
"learning_rate": 2.552281671472171e-06,
"loss": 0.4424870014190674,
"step": 4312
},
{
"epoch": 2.0906446921958315,
"grad_norm": 8.646020758876274,
"learning_rate": 2.5498229925857776e-06,
"loss": 0.2980886697769165,
"step": 4313
},
{
"epoch": 2.0911294231701407,
"grad_norm": 7.617693513012984,
"learning_rate": 2.5473650931467665e-06,
"loss": 0.5240786671638489,
"step": 4314
},
{
"epoch": 2.09161415414445,
"grad_norm": 11.55012975258118,
"learning_rate": 2.5449079739370454e-06,
"loss": 0.9372491836547852,
"step": 4315
},
{
"epoch": 2.092098885118759,
"grad_norm": 8.920798331423127,
"learning_rate": 2.5424516357382665e-06,
"loss": 0.198202982544899,
"step": 4316
},
{
"epoch": 2.0925836160930684,
"grad_norm": 9.563950252967441,
"learning_rate": 2.53999607933184e-06,
"loss": 0.282177209854126,
"step": 4317
},
{
"epoch": 2.0930683470673777,
"grad_norm": 16.22292237325075,
"learning_rate": 2.5375413054989245e-06,
"loss": 0.5692302584648132,
"step": 4318
},
{
"epoch": 2.093553078041687,
"grad_norm": 15.97055558439534,
"learning_rate": 2.535087315020425e-06,
"loss": 0.3006570339202881,
"step": 4319
},
{
"epoch": 2.094037809015996,
"grad_norm": 11.295536046887193,
"learning_rate": 2.532634108677006e-06,
"loss": 0.6527411937713623,
"step": 4320
},
{
"epoch": 2.0945225399903054,
"grad_norm": 10.214793740835885,
"learning_rate": 2.530181687249079e-06,
"loss": 0.3527181148529053,
"step": 4321
},
{
"epoch": 2.0950072709646146,
"grad_norm": 10.320012471994215,
"learning_rate": 2.5277300515168034e-06,
"loss": 0.6366623044013977,
"step": 4322
},
{
"epoch": 2.095492001938924,
"grad_norm": 7.824776589965516,
"learning_rate": 2.5252792022600924e-06,
"loss": 0.4556957483291626,
"step": 4323
},
{
"epoch": 2.095976732913233,
"grad_norm": 12.882066248201353,
"learning_rate": 2.5228291402586047e-06,
"loss": 0.4930788278579712,
"step": 4324
},
{
"epoch": 2.0964614638875423,
"grad_norm": 10.202797796934215,
"learning_rate": 2.5203798662917555e-06,
"loss": 0.43474268913269043,
"step": 4325
},
{
"epoch": 2.0969461948618515,
"grad_norm": 8.507357667226062,
"learning_rate": 2.5179313811387007e-06,
"loss": 0.6960121393203735,
"step": 4326
},
{
"epoch": 2.0974309258361608,
"grad_norm": 7.38916733617197,
"learning_rate": 2.5154836855783514e-06,
"loss": 0.2683953046798706,
"step": 4327
},
{
"epoch": 2.09791565681047,
"grad_norm": 9.5007191530244,
"learning_rate": 2.5130367803893628e-06,
"loss": 0.6923701167106628,
"step": 4328
},
{
"epoch": 2.098400387784779,
"grad_norm": 9.770386998786826,
"learning_rate": 2.510590666350146e-06,
"loss": 0.9187780618667603,
"step": 4329
},
{
"epoch": 2.098885118759089,
"grad_norm": 7.156050294553467,
"learning_rate": 2.508145344238854e-06,
"loss": 0.5186793208122253,
"step": 4330
},
{
"epoch": 2.099369849733398,
"grad_norm": 12.042739801861158,
"learning_rate": 2.5057008148333883e-06,
"loss": 0.6509032845497131,
"step": 4331
},
{
"epoch": 2.0998545807077074,
"grad_norm": 9.36803382230094,
"learning_rate": 2.5032570789114017e-06,
"loss": 0.548608660697937,
"step": 4332
},
{
"epoch": 2.1003393116820166,
"grad_norm": 11.032674357872162,
"learning_rate": 2.5008141372502912e-06,
"loss": 0.5658021569252014,
"step": 4333
},
{
"epoch": 2.100824042656326,
"grad_norm": 9.642462866775716,
"learning_rate": 2.4983719906272037e-06,
"loss": 1.0657050609588623,
"step": 4334
},
{
"epoch": 2.101308773630635,
"grad_norm": 9.154641859655328,
"learning_rate": 2.4959306398190304e-06,
"loss": 0.6236649751663208,
"step": 4335
},
{
"epoch": 2.1017935046049443,
"grad_norm": 12.762486422743555,
"learning_rate": 2.493490085602412e-06,
"loss": 1.032112717628479,
"step": 4336
},
{
"epoch": 2.1022782355792535,
"grad_norm": 9.639894438536793,
"learning_rate": 2.491050328753735e-06,
"loss": 0.22689928114414215,
"step": 4337
},
{
"epoch": 2.1027629665535628,
"grad_norm": 12.78184675541053,
"learning_rate": 2.488611370049128e-06,
"loss": 0.21678589284420013,
"step": 4338
},
{
"epoch": 2.103247697527872,
"grad_norm": 8.147566682946811,
"learning_rate": 2.486173210264476e-06,
"loss": 0.6392786502838135,
"step": 4339
},
{
"epoch": 2.1037324285021812,
"grad_norm": 9.387951942547343,
"learning_rate": 2.483735850175402e-06,
"loss": 0.3008251190185547,
"step": 4340
},
{
"epoch": 2.1042171594764905,
"grad_norm": 12.852197104191333,
"learning_rate": 2.481299290557273e-06,
"loss": 0.7698485851287842,
"step": 4341
},
{
"epoch": 2.1047018904507997,
"grad_norm": 12.25015819059606,
"learning_rate": 2.4788635321852033e-06,
"loss": 0.2633882164955139,
"step": 4342
},
{
"epoch": 2.105186621425109,
"grad_norm": 13.152383810744794,
"learning_rate": 2.476428575834059e-06,
"loss": 0.5607249140739441,
"step": 4343
},
{
"epoch": 2.105671352399418,
"grad_norm": 11.200812698401664,
"learning_rate": 2.4739944222784416e-06,
"loss": 0.6245933771133423,
"step": 4344
},
{
"epoch": 2.1061560833737274,
"grad_norm": 7.0089694950139805,
"learning_rate": 2.471561072292703e-06,
"loss": 0.6994107365608215,
"step": 4345
},
{
"epoch": 2.1066408143480366,
"grad_norm": 10.030745635462509,
"learning_rate": 2.469128526650936e-06,
"loss": 0.626001238822937,
"step": 4346
},
{
"epoch": 2.1071255453223463,
"grad_norm": 10.041566586350559,
"learning_rate": 2.4666967861269804e-06,
"loss": 0.9712479114532471,
"step": 4347
},
{
"epoch": 2.1076102762966555,
"grad_norm": 18.052701360928342,
"learning_rate": 2.464265851494418e-06,
"loss": 0.83934086561203,
"step": 4348
},
{
"epoch": 2.1080950072709648,
"grad_norm": 13.728371503157348,
"learning_rate": 2.4618357235265745e-06,
"loss": 1.7469478845596313,
"step": 4349
},
{
"epoch": 2.108579738245274,
"grad_norm": 20.83427328246971,
"learning_rate": 2.4594064029965197e-06,
"loss": 0.36108270287513733,
"step": 4350
},
{
"epoch": 2.1090644692195832,
"grad_norm": 14.276804634513823,
"learning_rate": 2.4569778906770665e-06,
"loss": 0.6693019270896912,
"step": 4351
},
{
"epoch": 2.1095492001938925,
"grad_norm": 12.940481940193603,
"learning_rate": 2.4545501873407677e-06,
"loss": 1.2206013202667236,
"step": 4352
},
{
"epoch": 2.1100339311682017,
"grad_norm": 16.5427480235548,
"learning_rate": 2.452123293759926e-06,
"loss": 0.6591336727142334,
"step": 4353
},
{
"epoch": 2.110518662142511,
"grad_norm": 9.575021304022927,
"learning_rate": 2.449697210706579e-06,
"loss": 0.4536278247833252,
"step": 4354
},
{
"epoch": 2.11100339311682,
"grad_norm": 15.139414945947173,
"learning_rate": 2.44727193895251e-06,
"loss": 0.496498703956604,
"step": 4355
},
{
"epoch": 2.1114881240911294,
"grad_norm": 15.419397808156386,
"learning_rate": 2.444847479269244e-06,
"loss": 0.4323655068874359,
"step": 4356
},
{
"epoch": 2.1119728550654386,
"grad_norm": 9.668786707037086,
"learning_rate": 2.4424238324280457e-06,
"loss": 0.7506427764892578,
"step": 4357
},
{
"epoch": 2.112457586039748,
"grad_norm": 10.28000949644714,
"learning_rate": 2.4400009991999246e-06,
"loss": 0.4554784297943115,
"step": 4358
},
{
"epoch": 2.112942317014057,
"grad_norm": 12.61570139037447,
"learning_rate": 2.437578980355628e-06,
"loss": 0.38232526183128357,
"step": 4359
},
{
"epoch": 2.1134270479883663,
"grad_norm": 13.24789111740674,
"learning_rate": 2.4351577766656465e-06,
"loss": 0.6038130521774292,
"step": 4360
},
{
"epoch": 2.1139117789626756,
"grad_norm": 12.24794061818567,
"learning_rate": 2.432737388900208e-06,
"loss": 0.7476538419723511,
"step": 4361
},
{
"epoch": 2.114396509936985,
"grad_norm": 12.624369335940187,
"learning_rate": 2.4303178178292897e-06,
"loss": 0.7500994801521301,
"step": 4362
},
{
"epoch": 2.114881240911294,
"grad_norm": 6.267402938426612,
"learning_rate": 2.427899064222597e-06,
"loss": 0.1353798508644104,
"step": 4363
},
{
"epoch": 2.1153659718856037,
"grad_norm": 13.07250280360747,
"learning_rate": 2.425481128849582e-06,
"loss": 0.49101710319519043,
"step": 4364
},
{
"epoch": 2.115850702859913,
"grad_norm": 9.209721514882856,
"learning_rate": 2.4230640124794364e-06,
"loss": 0.2497841864824295,
"step": 4365
},
{
"epoch": 2.116335433834222,
"grad_norm": 10.648110891028187,
"learning_rate": 2.4206477158810876e-06,
"loss": 0.48266565799713135,
"step": 4366
},
{
"epoch": 2.1168201648085314,
"grad_norm": 9.641690493422546,
"learning_rate": 2.418232239823209e-06,
"loss": 0.5541664958000183,
"step": 4367
},
{
"epoch": 2.1173048957828406,
"grad_norm": 13.429887547787265,
"learning_rate": 2.4158175850742077e-06,
"loss": 0.4029924273490906,
"step": 4368
},
{
"epoch": 2.11778962675715,
"grad_norm": 13.508810722935715,
"learning_rate": 2.4134037524022302e-06,
"loss": 0.33879825472831726,
"step": 4369
},
{
"epoch": 2.118274357731459,
"grad_norm": 13.902415667182266,
"learning_rate": 2.4109907425751616e-06,
"loss": 0.4479033052921295,
"step": 4370
},
{
"epoch": 2.1187590887057683,
"grad_norm": 13.251120286869805,
"learning_rate": 2.4085785563606275e-06,
"loss": 0.9111617803573608,
"step": 4371
},
{
"epoch": 2.1192438196800776,
"grad_norm": 16.500359288110566,
"learning_rate": 2.4061671945259873e-06,
"loss": 0.5382887721061707,
"step": 4372
},
{
"epoch": 2.119728550654387,
"grad_norm": 15.228110722073708,
"learning_rate": 2.4037566578383424e-06,
"loss": 0.8038559556007385,
"step": 4373
},
{
"epoch": 2.120213281628696,
"grad_norm": 10.435948243902198,
"learning_rate": 2.401346947064529e-06,
"loss": 0.47342413663864136,
"step": 4374
},
{
"epoch": 2.1206980126030053,
"grad_norm": 10.012526832537018,
"learning_rate": 2.3989380629711197e-06,
"loss": 0.3607815206050873,
"step": 4375
},
{
"epoch": 2.1211827435773145,
"grad_norm": 8.733834390816849,
"learning_rate": 2.3965300063244298e-06,
"loss": 0.4885476231575012,
"step": 4376
},
{
"epoch": 2.1216674745516237,
"grad_norm": 10.108954621162294,
"learning_rate": 2.3941227778905052e-06,
"loss": 0.825659453868866,
"step": 4377
},
{
"epoch": 2.122152205525933,
"grad_norm": 7.791544106404262,
"learning_rate": 2.391716378435132e-06,
"loss": 0.273433655500412,
"step": 4378
},
{
"epoch": 2.122636936500242,
"grad_norm": 16.116502446251953,
"learning_rate": 2.3893108087238286e-06,
"loss": 0.3811129331588745,
"step": 4379
},
{
"epoch": 2.1231216674745514,
"grad_norm": 8.869392999040434,
"learning_rate": 2.3869060695218513e-06,
"loss": 0.5276844501495361,
"step": 4380
},
{
"epoch": 2.1236063984488607,
"grad_norm": 12.366852258281869,
"learning_rate": 2.3845021615941965e-06,
"loss": 0.45255517959594727,
"step": 4381
},
{
"epoch": 2.1240911294231704,
"grad_norm": 21.54870232159335,
"learning_rate": 2.3820990857055907e-06,
"loss": 0.5698171257972717,
"step": 4382
},
{
"epoch": 2.1245758603974796,
"grad_norm": 11.072152727443845,
"learning_rate": 2.3796968426204974e-06,
"loss": 0.318770170211792,
"step": 4383
},
{
"epoch": 2.125060591371789,
"grad_norm": 8.275931175611323,
"learning_rate": 2.3772954331031156e-06,
"loss": 0.509885311126709,
"step": 4384
},
{
"epoch": 2.125545322346098,
"grad_norm": 14.816368474969918,
"learning_rate": 2.3748948579173792e-06,
"loss": 0.3250060975551605,
"step": 4385
},
{
"epoch": 2.1260300533204073,
"grad_norm": 9.562273793461204,
"learning_rate": 2.372495117826955e-06,
"loss": 0.5226597785949707,
"step": 4386
},
{
"epoch": 2.1265147842947165,
"grad_norm": 13.28667469377057,
"learning_rate": 2.370096213595247e-06,
"loss": 0.48397576808929443,
"step": 4387
},
{
"epoch": 2.1269995152690258,
"grad_norm": 11.88991534908334,
"learning_rate": 2.3676981459853904e-06,
"loss": 0.3568519651889801,
"step": 4388
},
{
"epoch": 2.127484246243335,
"grad_norm": 16.668190033994655,
"learning_rate": 2.3653009157602545e-06,
"loss": 0.5783289670944214,
"step": 4389
},
{
"epoch": 2.127968977217644,
"grad_norm": 16.740721025506225,
"learning_rate": 2.362904523682447e-06,
"loss": 0.5177597403526306,
"step": 4390
},
{
"epoch": 2.1284537081919535,
"grad_norm": 13.282278279627018,
"learning_rate": 2.3605089705143034e-06,
"loss": 1.0089330673217773,
"step": 4391
},
{
"epoch": 2.1289384391662627,
"grad_norm": 12.93858874185972,
"learning_rate": 2.3581142570178943e-06,
"loss": 1.1009010076522827,
"step": 4392
},
{
"epoch": 2.129423170140572,
"grad_norm": 9.882271865022936,
"learning_rate": 2.3557203839550247e-06,
"loss": 0.8619928956031799,
"step": 4393
},
{
"epoch": 2.129907901114881,
"grad_norm": 10.139359818894757,
"learning_rate": 2.3533273520872246e-06,
"loss": 0.5847680568695068,
"step": 4394
},
{
"epoch": 2.1303926320891904,
"grad_norm": 25.76918492096311,
"learning_rate": 2.350935162175769e-06,
"loss": 0.7714149951934814,
"step": 4395
},
{
"epoch": 2.1308773630634996,
"grad_norm": 8.244042840195792,
"learning_rate": 2.3485438149816565e-06,
"loss": 0.4588845372200012,
"step": 4396
},
{
"epoch": 2.131362094037809,
"grad_norm": 9.625627493900026,
"learning_rate": 2.3461533112656188e-06,
"loss": 0.6415393352508545,
"step": 4397
},
{
"epoch": 2.131846825012118,
"grad_norm": 9.54515419610012,
"learning_rate": 2.343763651788119e-06,
"loss": 0.49650317430496216,
"step": 4398
},
{
"epoch": 2.1323315559864273,
"grad_norm": 15.779952735934415,
"learning_rate": 2.3413748373093566e-06,
"loss": 0.7168660759925842,
"step": 4399
},
{
"epoch": 2.132816286960737,
"grad_norm": 12.224193558708969,
"learning_rate": 2.3389868685892573e-06,
"loss": 0.32028430700302124,
"step": 4400
},
{
"epoch": 2.1333010179350462,
"grad_norm": 12.479383788225693,
"learning_rate": 2.3365997463874764e-06,
"loss": 0.487453430891037,
"step": 4401
},
{
"epoch": 2.1337857489093555,
"grad_norm": 13.609736377055038,
"learning_rate": 2.334213471463403e-06,
"loss": 0.3389878273010254,
"step": 4402
},
{
"epoch": 2.1342704798836647,
"grad_norm": 13.275659290521222,
"learning_rate": 2.3318280445761556e-06,
"loss": 0.4051658809185028,
"step": 4403
},
{
"epoch": 2.134755210857974,
"grad_norm": 16.437101978475383,
"learning_rate": 2.3294434664845854e-06,
"loss": 0.292762815952301,
"step": 4404
},
{
"epoch": 2.135239941832283,
"grad_norm": 12.093612273052596,
"learning_rate": 2.3270597379472713e-06,
"loss": 0.6090400218963623,
"step": 4405
},
{
"epoch": 2.1357246728065924,
"grad_norm": 9.769900749836994,
"learning_rate": 2.3246768597225216e-06,
"loss": 0.5344865918159485,
"step": 4406
},
{
"epoch": 2.1362094037809016,
"grad_norm": 11.283219375694477,
"learning_rate": 2.322294832568374e-06,
"loss": 0.740075945854187,
"step": 4407
},
{
"epoch": 2.136694134755211,
"grad_norm": 9.276601718276224,
"learning_rate": 2.319913657242597e-06,
"loss": 0.6081941723823547,
"step": 4408
},
{
"epoch": 2.13717886572952,
"grad_norm": 8.805531168216064,
"learning_rate": 2.317533334502687e-06,
"loss": 0.40270987153053284,
"step": 4409
},
{
"epoch": 2.1376635967038293,
"grad_norm": 9.560870819323409,
"learning_rate": 2.3151538651058687e-06,
"loss": 0.6254655718803406,
"step": 4410
},
{
"epoch": 2.1381483276781386,
"grad_norm": 16.270420283292395,
"learning_rate": 2.3127752498090973e-06,
"loss": 0.6911880970001221,
"step": 4411
},
{
"epoch": 2.138633058652448,
"grad_norm": 11.32639746846736,
"learning_rate": 2.3103974893690523e-06,
"loss": 0.363311231136322,
"step": 4412
},
{
"epoch": 2.139117789626757,
"grad_norm": 11.48522223417413,
"learning_rate": 2.3080205845421484e-06,
"loss": 0.2983347177505493,
"step": 4413
},
{
"epoch": 2.1396025206010663,
"grad_norm": 8.995239518906835,
"learning_rate": 2.3056445360845214e-06,
"loss": 0.28856778144836426,
"step": 4414
},
{
"epoch": 2.1400872515753755,
"grad_norm": 12.359543583620564,
"learning_rate": 2.303269344752039e-06,
"loss": 0.5005329251289368,
"step": 4415
},
{
"epoch": 2.1405719825496847,
"grad_norm": 11.092181194763661,
"learning_rate": 2.3008950113002898e-06,
"loss": 0.4218388795852661,
"step": 4416
},
{
"epoch": 2.1410567135239944,
"grad_norm": 14.996803377512393,
"learning_rate": 2.2985215364845955e-06,
"loss": 0.5367415547370911,
"step": 4417
},
{
"epoch": 2.1415414444983036,
"grad_norm": 16.889593643562552,
"learning_rate": 2.2961489210600053e-06,
"loss": 0.7607506513595581,
"step": 4418
},
{
"epoch": 2.142026175472613,
"grad_norm": 10.578241036607247,
"learning_rate": 2.2937771657812918e-06,
"loss": 0.32907798886299133,
"step": 4419
},
{
"epoch": 2.142510906446922,
"grad_norm": 9.535820388908428,
"learning_rate": 2.2914062714029545e-06,
"loss": 0.37020617723464966,
"step": 4420
},
{
"epoch": 2.1429956374212313,
"grad_norm": 23.9397730150345,
"learning_rate": 2.2890362386792196e-06,
"loss": 0.5963550209999084,
"step": 4421
},
{
"epoch": 2.1434803683955406,
"grad_norm": 11.531873041748588,
"learning_rate": 2.2866670683640395e-06,
"loss": 0.4945273995399475,
"step": 4422
},
{
"epoch": 2.14396509936985,
"grad_norm": 11.47645144771782,
"learning_rate": 2.284298761211091e-06,
"loss": 0.3520990014076233,
"step": 4423
},
{
"epoch": 2.144449830344159,
"grad_norm": 10.793539966422987,
"learning_rate": 2.2819313179737784e-06,
"loss": 0.30792710185050964,
"step": 4424
},
{
"epoch": 2.1449345613184683,
"grad_norm": 12.644136737237623,
"learning_rate": 2.2795647394052284e-06,
"loss": 0.8874340057373047,
"step": 4425
},
{
"epoch": 2.1454192922927775,
"grad_norm": 6.876616654697707,
"learning_rate": 2.277199026258295e-06,
"loss": 0.21706412732601166,
"step": 4426
},
{
"epoch": 2.1459040232670867,
"grad_norm": 9.942344944352264,
"learning_rate": 2.2748341792855545e-06,
"loss": 0.7760946750640869,
"step": 4427
},
{
"epoch": 2.146388754241396,
"grad_norm": 12.638006105307358,
"learning_rate": 2.2724701992393116e-06,
"loss": 0.358303964138031,
"step": 4428
},
{
"epoch": 2.146873485215705,
"grad_norm": 9.58607021056745,
"learning_rate": 2.2701070868715924e-06,
"loss": 0.27742481231689453,
"step": 4429
},
{
"epoch": 2.1473582161900144,
"grad_norm": 15.50058315163071,
"learning_rate": 2.267744842934147e-06,
"loss": 0.530869722366333,
"step": 4430
},
{
"epoch": 2.1478429471643237,
"grad_norm": 21.775305844239785,
"learning_rate": 2.265383468178449e-06,
"loss": 1.0886576175689697,
"step": 4431
},
{
"epoch": 2.148327678138633,
"grad_norm": 13.89083441184049,
"learning_rate": 2.2630229633556972e-06,
"loss": 1.1702598333358765,
"step": 4432
},
{
"epoch": 2.148812409112942,
"grad_norm": 10.007107543586383,
"learning_rate": 2.2606633292168113e-06,
"loss": 0.2262575477361679,
"step": 4433
},
{
"epoch": 2.1492971400872514,
"grad_norm": 12.28065150826906,
"learning_rate": 2.2583045665124358e-06,
"loss": 0.2926369905471802,
"step": 4434
},
{
"epoch": 2.1497818710615606,
"grad_norm": 9.14551940910844,
"learning_rate": 2.255946675992938e-06,
"loss": 0.647142767906189,
"step": 4435
},
{
"epoch": 2.1502666020358703,
"grad_norm": 8.556692103744314,
"learning_rate": 2.253589658408405e-06,
"loss": 0.31547537446022034,
"step": 4436
},
{
"epoch": 2.1507513330101795,
"grad_norm": 16.448883393403133,
"learning_rate": 2.2512335145086534e-06,
"loss": 0.270944744348526,
"step": 4437
},
{
"epoch": 2.1512360639844887,
"grad_norm": 13.466424872957026,
"learning_rate": 2.248878245043212e-06,
"loss": 0.3543540835380554,
"step": 4438
},
{
"epoch": 2.151720794958798,
"grad_norm": 11.855263233910271,
"learning_rate": 2.2465238507613385e-06,
"loss": 0.34767138957977295,
"step": 4439
},
{
"epoch": 2.152205525933107,
"grad_norm": 11.376738199803453,
"learning_rate": 2.2441703324120095e-06,
"loss": 0.502405047416687,
"step": 4440
},
{
"epoch": 2.1526902569074164,
"grad_norm": 14.31266553960509,
"learning_rate": 2.241817690743921e-06,
"loss": 0.32892003655433655,
"step": 4441
},
{
"epoch": 2.1531749878817257,
"grad_norm": 10.909433104013639,
"learning_rate": 2.2394659265054976e-06,
"loss": 0.809765636920929,
"step": 4442
},
{
"epoch": 2.153659718856035,
"grad_norm": 11.752463159624083,
"learning_rate": 2.2371150404448766e-06,
"loss": 0.6160309314727783,
"step": 4443
},
{
"epoch": 2.154144449830344,
"grad_norm": 14.241578283940322,
"learning_rate": 2.2347650333099195e-06,
"loss": 0.8176978230476379,
"step": 4444
},
{
"epoch": 2.1546291808046534,
"grad_norm": 15.08760791569379,
"learning_rate": 2.2324159058482086e-06,
"loss": 0.34964025020599365,
"step": 4445
},
{
"epoch": 2.1551139117789626,
"grad_norm": 13.005528862500745,
"learning_rate": 2.230067658807044e-06,
"loss": 0.4808919429779053,
"step": 4446
},
{
"epoch": 2.155598642753272,
"grad_norm": 8.744579732347326,
"learning_rate": 2.2277202929334483e-06,
"loss": 0.29446691274642944,
"step": 4447
},
{
"epoch": 2.156083373727581,
"grad_norm": 13.186521728590225,
"learning_rate": 2.2253738089741635e-06,
"loss": 0.5295696258544922,
"step": 4448
},
{
"epoch": 2.1565681047018903,
"grad_norm": 8.338165918562568,
"learning_rate": 2.223028207675648e-06,
"loss": 0.2563644051551819,
"step": 4449
},
{
"epoch": 2.1570528356761995,
"grad_norm": 9.260663742540839,
"learning_rate": 2.2206834897840814e-06,
"loss": 0.835071861743927,
"step": 4450
},
{
"epoch": 2.1575375666505088,
"grad_norm": 13.421242683092542,
"learning_rate": 2.2183396560453657e-06,
"loss": 0.4028346538543701,
"step": 4451
},
{
"epoch": 2.158022297624818,
"grad_norm": 9.813711347629177,
"learning_rate": 2.2159967072051176e-06,
"loss": 0.29173344373703003,
"step": 4452
},
{
"epoch": 2.1585070285991277,
"grad_norm": 8.678913219647043,
"learning_rate": 2.2136546440086735e-06,
"loss": 0.5390990376472473,
"step": 4453
},
{
"epoch": 2.158991759573437,
"grad_norm": 9.307199834785767,
"learning_rate": 2.2113134672010854e-06,
"loss": 0.4224136769771576,
"step": 4454
},
{
"epoch": 2.159476490547746,
"grad_norm": 10.029874149817845,
"learning_rate": 2.208973177527125e-06,
"loss": 0.781512439250946,
"step": 4455
},
{
"epoch": 2.1599612215220554,
"grad_norm": 9.65971740809655,
"learning_rate": 2.2066337757312867e-06,
"loss": 0.8000625371932983,
"step": 4456
},
{
"epoch": 2.1604459524963646,
"grad_norm": 12.492128233749431,
"learning_rate": 2.204295262557775e-06,
"loss": 0.9096415638923645,
"step": 4457
},
{
"epoch": 2.160930683470674,
"grad_norm": 8.366735508767709,
"learning_rate": 2.201957638750517e-06,
"loss": 0.5500125885009766,
"step": 4458
},
{
"epoch": 2.161415414444983,
"grad_norm": 7.296581733217851,
"learning_rate": 2.199620905053153e-06,
"loss": 0.29896625876426697,
"step": 4459
},
{
"epoch": 2.1619001454192923,
"grad_norm": 11.703056517735476,
"learning_rate": 2.1972850622090426e-06,
"loss": 0.5937597751617432,
"step": 4460
},
{
"epoch": 2.1623848763936016,
"grad_norm": 15.306349514624722,
"learning_rate": 2.1949501109612615e-06,
"loss": 0.8717677593231201,
"step": 4461
},
{
"epoch": 2.162869607367911,
"grad_norm": 10.200252969357267,
"learning_rate": 2.1926160520526014e-06,
"loss": 0.3284316062927246,
"step": 4462
},
{
"epoch": 2.16335433834222,
"grad_norm": 7.0985123018958,
"learning_rate": 2.1902828862255704e-06,
"loss": 0.4391622841358185,
"step": 4463
},
{
"epoch": 2.1638390693165293,
"grad_norm": 13.323465518562731,
"learning_rate": 2.1879506142223905e-06,
"loss": 1.9031668901443481,
"step": 4464
},
{
"epoch": 2.1643238002908385,
"grad_norm": 17.575310831039115,
"learning_rate": 2.185619236785005e-06,
"loss": 0.8007135987281799,
"step": 4465
},
{
"epoch": 2.1648085312651477,
"grad_norm": 13.558136504953525,
"learning_rate": 2.183288754655067e-06,
"loss": 0.7256397008895874,
"step": 4466
},
{
"epoch": 2.165293262239457,
"grad_norm": 7.077065481422446,
"learning_rate": 2.1809591685739466e-06,
"loss": 0.28264063596725464,
"step": 4467
},
{
"epoch": 2.165777993213766,
"grad_norm": 10.440240219500375,
"learning_rate": 2.178630479282731e-06,
"loss": 0.6226823329925537,
"step": 4468
},
{
"epoch": 2.1662627241880754,
"grad_norm": 9.358781075883865,
"learning_rate": 2.176302687522215e-06,
"loss": 0.98133784532547,
"step": 4469
},
{
"epoch": 2.166747455162385,
"grad_norm": 14.477662140086522,
"learning_rate": 2.1739757940329177e-06,
"loss": 0.4022904634475708,
"step": 4470
},
{
"epoch": 2.1672321861366943,
"grad_norm": 10.13863543641054,
"learning_rate": 2.171649799555066e-06,
"loss": 0.5251460075378418,
"step": 4471
},
{
"epoch": 2.1677169171110036,
"grad_norm": 17.18057392687797,
"learning_rate": 2.1693247048286027e-06,
"loss": 1.5000563859939575,
"step": 4472
},
{
"epoch": 2.168201648085313,
"grad_norm": 12.962140107640284,
"learning_rate": 2.1670005105931824e-06,
"loss": 0.36698979139328003,
"step": 4473
},
{
"epoch": 2.168686379059622,
"grad_norm": 13.346306567198297,
"learning_rate": 2.164677217588178e-06,
"loss": 0.8042100667953491,
"step": 4474
},
{
"epoch": 2.1691711100339313,
"grad_norm": 8.949726122778307,
"learning_rate": 2.1623548265526734e-06,
"loss": 0.9109041690826416,
"step": 4475
},
{
"epoch": 2.1696558410082405,
"grad_norm": 11.035312480440686,
"learning_rate": 2.1600333382254608e-06,
"loss": 0.464444637298584,
"step": 4476
},
{
"epoch": 2.1701405719825497,
"grad_norm": 7.041786750545003,
"learning_rate": 2.1577127533450517e-06,
"loss": 0.38475000858306885,
"step": 4477
},
{
"epoch": 2.170625302956859,
"grad_norm": 10.904488490462555,
"learning_rate": 2.1553930726496654e-06,
"loss": 0.30781424045562744,
"step": 4478
},
{
"epoch": 2.171110033931168,
"grad_norm": 12.967900188184814,
"learning_rate": 2.1530742968772395e-06,
"loss": 0.3626231253147125,
"step": 4479
},
{
"epoch": 2.1715947649054774,
"grad_norm": 7.436904455562268,
"learning_rate": 2.1507564267654187e-06,
"loss": 0.2618992030620575,
"step": 4480
},
{
"epoch": 2.1720794958797867,
"grad_norm": 18.22446995386227,
"learning_rate": 2.1484394630515607e-06,
"loss": 0.5772433280944824,
"step": 4481
},
{
"epoch": 2.172564226854096,
"grad_norm": 15.047137123328914,
"learning_rate": 2.146123406472736e-06,
"loss": 0.7613720893859863,
"step": 4482
},
{
"epoch": 2.173048957828405,
"grad_norm": 12.924000532759063,
"learning_rate": 2.143808257765725e-06,
"loss": 0.2784912586212158,
"step": 4483
},
{
"epoch": 2.1735336888027144,
"grad_norm": 10.547653779273952,
"learning_rate": 2.14149401766702e-06,
"loss": 0.43407589197158813,
"step": 4484
},
{
"epoch": 2.1740184197770236,
"grad_norm": 9.570203973807851,
"learning_rate": 2.139180686912825e-06,
"loss": 0.34093332290649414,
"step": 4485
},
{
"epoch": 2.174503150751333,
"grad_norm": 23.896872130462903,
"learning_rate": 2.136868266239054e-06,
"loss": 1.2885031700134277,
"step": 4486
},
{
"epoch": 2.1749878817256425,
"grad_norm": 11.425609210504781,
"learning_rate": 2.1345567563813284e-06,
"loss": 0.4486278295516968,
"step": 4487
},
{
"epoch": 2.1754726126999513,
"grad_norm": 15.647122764810778,
"learning_rate": 2.1322461580749883e-06,
"loss": 0.5928796529769897,
"step": 4488
},
{
"epoch": 2.175957343674261,
"grad_norm": 13.713626236694266,
"learning_rate": 2.1299364720550752e-06,
"loss": 0.3220873773097992,
"step": 4489
},
{
"epoch": 2.17644207464857,
"grad_norm": 9.91472083112399,
"learning_rate": 2.127627699056345e-06,
"loss": 0.48300206661224365,
"step": 4490
},
{
"epoch": 2.1769268056228794,
"grad_norm": 9.925289430544721,
"learning_rate": 2.1253198398132625e-06,
"loss": 0.2463838905096054,
"step": 4491
},
{
"epoch": 2.1774115365971887,
"grad_norm": 9.36258879910722,
"learning_rate": 2.123012895059996e-06,
"loss": 0.6514277458190918,
"step": 4492
},
{
"epoch": 2.177896267571498,
"grad_norm": 11.377554488090519,
"learning_rate": 2.1207068655304337e-06,
"loss": 0.42757388949394226,
"step": 4493
},
{
"epoch": 2.178380998545807,
"grad_norm": 12.044844496674843,
"learning_rate": 2.118401751958165e-06,
"loss": 0.497768759727478,
"step": 4494
},
{
"epoch": 2.1788657295201164,
"grad_norm": 17.409978283852354,
"learning_rate": 2.11609755507649e-06,
"loss": 0.48649924993515015,
"step": 4495
},
{
"epoch": 2.1793504604944256,
"grad_norm": 12.319761454077081,
"learning_rate": 2.113794275618417e-06,
"loss": 0.4158462882041931,
"step": 4496
},
{
"epoch": 2.179835191468735,
"grad_norm": 12.284398039822024,
"learning_rate": 2.1114919143166626e-06,
"loss": 0.405046671628952,
"step": 4497
},
{
"epoch": 2.180319922443044,
"grad_norm": 13.61282185690151,
"learning_rate": 2.1091904719036507e-06,
"loss": 0.5179070830345154,
"step": 4498
},
{
"epoch": 2.1808046534173533,
"grad_norm": 8.006621944721601,
"learning_rate": 2.1068899491115135e-06,
"loss": 0.30539587140083313,
"step": 4499
},
{
"epoch": 2.1812893843916625,
"grad_norm": 12.464651268277777,
"learning_rate": 2.1045903466720915e-06,
"loss": 0.3698441684246063,
"step": 4500
},
{
"epoch": 2.1817741153659718,
"grad_norm": 10.450203480766543,
"learning_rate": 2.102291665316929e-06,
"loss": 0.6474381685256958,
"step": 4501
},
{
"epoch": 2.182258846340281,
"grad_norm": 10.279402409047304,
"learning_rate": 2.099993905777283e-06,
"loss": 0.57194584608078,
"step": 4502
},
{
"epoch": 2.1827435773145902,
"grad_norm": 14.368637492080216,
"learning_rate": 2.097697068784113e-06,
"loss": 0.3390950560569763,
"step": 4503
},
{
"epoch": 2.1832283082888995,
"grad_norm": 9.467399566980705,
"learning_rate": 2.0954011550680857e-06,
"loss": 0.31228107213974,
"step": 4504
},
{
"epoch": 2.1837130392632087,
"grad_norm": 17.92399700453114,
"learning_rate": 2.093106165359574e-06,
"loss": 0.5335447788238525,
"step": 4505
},
{
"epoch": 2.1841977702375184,
"grad_norm": 8.623736910074129,
"learning_rate": 2.0908121003886583e-06,
"loss": 0.22458243370056152,
"step": 4506
},
{
"epoch": 2.1846825012118276,
"grad_norm": 10.495976460362776,
"learning_rate": 2.0885189608851225e-06,
"loss": 0.3181239068508148,
"step": 4507
},
{
"epoch": 2.185167232186137,
"grad_norm": 16.736129358124163,
"learning_rate": 2.0862267475784585e-06,
"loss": 0.573546290397644,
"step": 4508
},
{
"epoch": 2.185651963160446,
"grad_norm": 10.784925486167444,
"learning_rate": 2.0839354611978625e-06,
"loss": 0.6608833074569702,
"step": 4509
},
{
"epoch": 2.1861366941347553,
"grad_norm": 7.450394828844607,
"learning_rate": 2.0816451024722344e-06,
"loss": 0.2710973918437958,
"step": 4510
},
{
"epoch": 2.1866214251090645,
"grad_norm": 8.372430128499275,
"learning_rate": 2.0793556721301806e-06,
"loss": 0.3655508756637573,
"step": 4511
},
{
"epoch": 2.1871061560833738,
"grad_norm": 18.169707360177373,
"learning_rate": 2.0770671709000133e-06,
"loss": 1.8301770687103271,
"step": 4512
},
{
"epoch": 2.187590887057683,
"grad_norm": 12.823900210797646,
"learning_rate": 2.0747795995097498e-06,
"loss": 0.3632928729057312,
"step": 4513
},
{
"epoch": 2.1880756180319922,
"grad_norm": 11.009747921693526,
"learning_rate": 2.0724929586871052e-06,
"loss": 0.4423820376396179,
"step": 4514
},
{
"epoch": 2.1885603490063015,
"grad_norm": 10.647408137211112,
"learning_rate": 2.0702072491595023e-06,
"loss": 0.200720876455307,
"step": 4515
},
{
"epoch": 2.1890450799806107,
"grad_norm": 10.212961849755454,
"learning_rate": 2.067922471654073e-06,
"loss": 0.5186176300048828,
"step": 4516
},
{
"epoch": 2.18952981095492,
"grad_norm": 8.668749603770074,
"learning_rate": 2.065638626897645e-06,
"loss": 0.2633720636367798,
"step": 4517
},
{
"epoch": 2.190014541929229,
"grad_norm": 15.775201460880757,
"learning_rate": 2.063355715616754e-06,
"loss": 0.5434201955795288,
"step": 4518
},
{
"epoch": 2.1904992729035384,
"grad_norm": 13.87821275757755,
"learning_rate": 2.061073738537635e-06,
"loss": 0.31555187702178955,
"step": 4519
},
{
"epoch": 2.1909840038778476,
"grad_norm": 8.731966689785018,
"learning_rate": 2.0587926963862287e-06,
"loss": 0.6183403730392456,
"step": 4520
},
{
"epoch": 2.191468734852157,
"grad_norm": 10.305765738797918,
"learning_rate": 2.0565125898881784e-06,
"loss": 0.61611008644104,
"step": 4521
},
{
"epoch": 2.191953465826466,
"grad_norm": 9.692732089944062,
"learning_rate": 2.054233419768827e-06,
"loss": 0.4834679663181305,
"step": 4522
},
{
"epoch": 2.192438196800776,
"grad_norm": 8.577099250421654,
"learning_rate": 2.0519551867532235e-06,
"loss": 0.3569614589214325,
"step": 4523
},
{
"epoch": 2.192922927775085,
"grad_norm": 8.749261335722935,
"learning_rate": 2.049677891566115e-06,
"loss": 0.48290392756462097,
"step": 4524
},
{
"epoch": 2.1934076587493943,
"grad_norm": 9.669901576778145,
"learning_rate": 2.0474015349319505e-06,
"loss": 0.5255041122436523,
"step": 4525
},
{
"epoch": 2.1938923897237035,
"grad_norm": 13.55663526913222,
"learning_rate": 2.045126117574886e-06,
"loss": 0.5165213346481323,
"step": 4526
},
{
"epoch": 2.1943771206980127,
"grad_norm": 14.58423120024342,
"learning_rate": 2.042851640218772e-06,
"loss": 0.3698723316192627,
"step": 4527
},
{
"epoch": 2.194861851672322,
"grad_norm": 21.127712880361344,
"learning_rate": 2.040578103587165e-06,
"loss": 3.715176820755005,
"step": 4528
},
{
"epoch": 2.195346582646631,
"grad_norm": 10.817788240699224,
"learning_rate": 2.0383055084033136e-06,
"loss": 0.45615142583847046,
"step": 4529
},
{
"epoch": 2.1958313136209404,
"grad_norm": 9.331309230319013,
"learning_rate": 2.03603385539018e-06,
"loss": 0.3640298843383789,
"step": 4530
},
{
"epoch": 2.1963160445952497,
"grad_norm": 9.001053851825144,
"learning_rate": 2.033763145270416e-06,
"loss": 0.5558571815490723,
"step": 4531
},
{
"epoch": 2.196800775569559,
"grad_norm": 7.749971871225275,
"learning_rate": 2.0314933787663783e-06,
"loss": 0.4788832366466522,
"step": 4532
},
{
"epoch": 2.197285506543868,
"grad_norm": 10.14142309461541,
"learning_rate": 2.029224556600122e-06,
"loss": 0.4039214849472046,
"step": 4533
},
{
"epoch": 2.1977702375181773,
"grad_norm": 12.439332365821633,
"learning_rate": 2.026956679493401e-06,
"loss": 0.5174943208694458,
"step": 4534
},
{
"epoch": 2.1982549684924866,
"grad_norm": 12.478197893693787,
"learning_rate": 2.0246897481676735e-06,
"loss": 0.41483747959136963,
"step": 4535
},
{
"epoch": 2.198739699466796,
"grad_norm": 9.769089921902891,
"learning_rate": 2.022423763344089e-06,
"loss": 0.767789363861084,
"step": 4536
},
{
"epoch": 2.199224430441105,
"grad_norm": 10.876178707908565,
"learning_rate": 2.020158725743501e-06,
"loss": 0.4117639660835266,
"step": 4537
},
{
"epoch": 2.1997091614154143,
"grad_norm": 12.29615350139457,
"learning_rate": 2.0178946360864615e-06,
"loss": 0.39266437292099,
"step": 4538
},
{
"epoch": 2.2001938923897235,
"grad_norm": 9.029888823641375,
"learning_rate": 2.0156314950932166e-06,
"loss": 0.3333319425582886,
"step": 4539
},
{
"epoch": 2.200678623364033,
"grad_norm": 12.787744479417524,
"learning_rate": 2.013369303483719e-06,
"loss": 0.7261334657669067,
"step": 4540
},
{
"epoch": 2.2011633543383424,
"grad_norm": 8.74189259401425,
"learning_rate": 2.011108061977612e-06,
"loss": 0.23050381243228912,
"step": 4541
},
{
"epoch": 2.2016480853126517,
"grad_norm": 9.011255010162301,
"learning_rate": 2.008847771294239e-06,
"loss": 0.2387632131576538,
"step": 4542
},
{
"epoch": 2.202132816286961,
"grad_norm": 7.744620371810387,
"learning_rate": 2.0065884321526412e-06,
"loss": 0.43797770142555237,
"step": 4543
},
{
"epoch": 2.20261754726127,
"grad_norm": 8.815207693995943,
"learning_rate": 2.0043300452715563e-06,
"loss": 0.5482735633850098,
"step": 4544
},
{
"epoch": 2.2031022782355794,
"grad_norm": 7.2995348176041635,
"learning_rate": 2.0020726113694204e-06,
"loss": 0.29151034355163574,
"step": 4545
},
{
"epoch": 2.2035870092098886,
"grad_norm": 8.860621701001897,
"learning_rate": 1.999816131164366e-06,
"loss": 0.33349132537841797,
"step": 4546
},
{
"epoch": 2.204071740184198,
"grad_norm": 10.824409924894294,
"learning_rate": 1.9975606053742207e-06,
"loss": 0.6333401203155518,
"step": 4547
},
{
"epoch": 2.204556471158507,
"grad_norm": 11.285156440505483,
"learning_rate": 1.995306034716508e-06,
"loss": 0.46796858310699463,
"step": 4548
},
{
"epoch": 2.2050412021328163,
"grad_norm": 10.33143369233109,
"learning_rate": 1.9930524199084538e-06,
"loss": 0.3530353605747223,
"step": 4549
},
{
"epoch": 2.2055259331071255,
"grad_norm": 14.08773212182487,
"learning_rate": 1.990799761666975e-06,
"loss": 0.6534809470176697,
"step": 4550
},
{
"epoch": 2.2060106640814348,
"grad_norm": 11.127218497931878,
"learning_rate": 1.9885480607086807e-06,
"loss": 0.24578604102134705,
"step": 4551
},
{
"epoch": 2.206495395055744,
"grad_norm": 16.621110691462185,
"learning_rate": 1.9862973177498807e-06,
"loss": 0.5019030570983887,
"step": 4552
},
{
"epoch": 2.2069801260300532,
"grad_norm": 12.072866228566205,
"learning_rate": 1.9840475335065777e-06,
"loss": 0.9751251339912415,
"step": 4553
},
{
"epoch": 2.2074648570043625,
"grad_norm": 9.687685422757989,
"learning_rate": 1.9817987086944736e-06,
"loss": 0.1938537210226059,
"step": 4554
},
{
"epoch": 2.2079495879786717,
"grad_norm": 9.32104697355482,
"learning_rate": 1.97955084402896e-06,
"loss": 0.7192533612251282,
"step": 4555
},
{
"epoch": 2.208434318952981,
"grad_norm": 20.031641771017732,
"learning_rate": 1.9773039402251253e-06,
"loss": 0.9419565796852112,
"step": 4556
},
{
"epoch": 2.20891904992729,
"grad_norm": 9.678112128177881,
"learning_rate": 1.9750579979977514e-06,
"loss": 0.9294630885124207,
"step": 4557
},
{
"epoch": 2.2094037809015994,
"grad_norm": 13.364884770320097,
"learning_rate": 1.9728130180613146e-06,
"loss": 0.4222845137119293,
"step": 4558
},
{
"epoch": 2.209888511875909,
"grad_norm": 5.69048270518461,
"learning_rate": 1.970569001129986e-06,
"loss": 0.11838191747665405,
"step": 4559
},
{
"epoch": 2.2103732428502183,
"grad_norm": 14.027428108584692,
"learning_rate": 1.9683259479176294e-06,
"loss": 0.49090057611465454,
"step": 4560
},
{
"epoch": 2.2108579738245275,
"grad_norm": 8.27940812671135,
"learning_rate": 1.9660838591378014e-06,
"loss": 0.39307013154029846,
"step": 4561
},
{
"epoch": 2.2113427047988368,
"grad_norm": 15.288462798840532,
"learning_rate": 1.9638427355037515e-06,
"loss": 0.6772592663764954,
"step": 4562
},
{
"epoch": 2.211827435773146,
"grad_norm": 13.993146156256556,
"learning_rate": 1.9616025777284266e-06,
"loss": 0.4679684638977051,
"step": 4563
},
{
"epoch": 2.2123121667474552,
"grad_norm": 8.769106590123902,
"learning_rate": 1.9593633865244614e-06,
"loss": 0.1466677486896515,
"step": 4564
},
{
"epoch": 2.2127968977217645,
"grad_norm": 14.946973750003746,
"learning_rate": 1.9571251626041847e-06,
"loss": 0.5210084319114685,
"step": 4565
},
{
"epoch": 2.2132816286960737,
"grad_norm": 14.610194023354273,
"learning_rate": 1.9548879066796195e-06,
"loss": 0.612429141998291,
"step": 4566
},
{
"epoch": 2.213766359670383,
"grad_norm": 12.135433353122254,
"learning_rate": 1.9526516194624735e-06,
"loss": 0.4791604280471802,
"step": 4567
},
{
"epoch": 2.214251090644692,
"grad_norm": 10.326796437985502,
"learning_rate": 1.950416301664157e-06,
"loss": 0.29469314217567444,
"step": 4568
},
{
"epoch": 2.2147358216190014,
"grad_norm": 12.704299300316386,
"learning_rate": 1.948181953995765e-06,
"loss": 0.6219005584716797,
"step": 4569
},
{
"epoch": 2.2152205525933106,
"grad_norm": 9.167500379691338,
"learning_rate": 1.945948577168086e-06,
"loss": 0.6557912826538086,
"step": 4570
},
{
"epoch": 2.21570528356762,
"grad_norm": 18.432097567414367,
"learning_rate": 1.943716171891596e-06,
"loss": 0.47479498386383057,
"step": 4571
},
{
"epoch": 2.216190014541929,
"grad_norm": 12.04567741624689,
"learning_rate": 1.941484738876472e-06,
"loss": 0.2351894974708557,
"step": 4572
},
{
"epoch": 2.2166747455162383,
"grad_norm": 11.833018775176244,
"learning_rate": 1.939254278832568e-06,
"loss": 0.24956226348876953,
"step": 4573
},
{
"epoch": 2.2171594764905476,
"grad_norm": 10.874340218081777,
"learning_rate": 1.937024792469438e-06,
"loss": 0.5098115801811218,
"step": 4574
},
{
"epoch": 2.217644207464857,
"grad_norm": 9.265599109092523,
"learning_rate": 1.9347962804963238e-06,
"loss": 0.22621148824691772,
"step": 4575
},
{
"epoch": 2.2181289384391665,
"grad_norm": 10.069114395086736,
"learning_rate": 1.932568743622154e-06,
"loss": 0.635562539100647,
"step": 4576
},
{
"epoch": 2.2186136694134757,
"grad_norm": 13.519588870924627,
"learning_rate": 1.9303421825555552e-06,
"loss": 0.22401553392410278,
"step": 4577
},
{
"epoch": 2.219098400387785,
"grad_norm": 12.120077735718214,
"learning_rate": 1.928116598004835e-06,
"loss": 0.23918691277503967,
"step": 4578
},
{
"epoch": 2.219583131362094,
"grad_norm": 11.308791483248298,
"learning_rate": 1.9258919906779944e-06,
"loss": 0.5311349630355835,
"step": 4579
},
{
"epoch": 2.2200678623364034,
"grad_norm": 20.268519360042962,
"learning_rate": 1.923668361282723e-06,
"loss": 0.347164511680603,
"step": 4580
},
{
"epoch": 2.2205525933107126,
"grad_norm": 11.471212324415818,
"learning_rate": 1.9214457105263983e-06,
"loss": 0.36791202425956726,
"step": 4581
},
{
"epoch": 2.221037324285022,
"grad_norm": 10.403423768015669,
"learning_rate": 1.9192240391160887e-06,
"loss": 0.7696720361709595,
"step": 4582
},
{
"epoch": 2.221522055259331,
"grad_norm": 9.137842097587939,
"learning_rate": 1.9170033477585485e-06,
"loss": 0.8162957429885864,
"step": 4583
},
{
"epoch": 2.2220067862336403,
"grad_norm": 12.330009769096728,
"learning_rate": 1.914783637160222e-06,
"loss": 0.5635539889335632,
"step": 4584
},
{
"epoch": 2.2224915172079496,
"grad_norm": 8.413601632577501,
"learning_rate": 1.9125649080272383e-06,
"loss": 0.4942958950996399,
"step": 4585
},
{
"epoch": 2.222976248182259,
"grad_norm": 12.358979366899062,
"learning_rate": 1.910347161065421e-06,
"loss": 0.24689364433288574,
"step": 4586
},
{
"epoch": 2.223460979156568,
"grad_norm": 15.881597915529573,
"learning_rate": 1.9081303969802756e-06,
"loss": 0.5170539617538452,
"step": 4587
},
{
"epoch": 2.2239457101308773,
"grad_norm": 11.320894429214972,
"learning_rate": 1.9059146164769976e-06,
"loss": 0.6699502468109131,
"step": 4588
},
{
"epoch": 2.2244304411051865,
"grad_norm": 14.753366351064559,
"learning_rate": 1.9036998202604657e-06,
"loss": 0.3548262119293213,
"step": 4589
},
{
"epoch": 2.2249151720794957,
"grad_norm": 9.159697074057714,
"learning_rate": 1.9014860090352477e-06,
"loss": 0.6978816390037537,
"step": 4590
},
{
"epoch": 2.225399903053805,
"grad_norm": 9.076625312356484,
"learning_rate": 1.8992731835056016e-06,
"loss": 0.379266619682312,
"step": 4591
},
{
"epoch": 2.225884634028114,
"grad_norm": 14.39746513779578,
"learning_rate": 1.897061344375468e-06,
"loss": 0.7198437452316284,
"step": 4592
},
{
"epoch": 2.226369365002424,
"grad_norm": 17.432909410136325,
"learning_rate": 1.8948504923484745e-06,
"loss": 0.5462425947189331,
"step": 4593
},
{
"epoch": 2.226854095976733,
"grad_norm": 11.992570839066591,
"learning_rate": 1.8926406281279336e-06,
"loss": 0.3816736936569214,
"step": 4594
},
{
"epoch": 2.2273388269510423,
"grad_norm": 11.089352187367046,
"learning_rate": 1.8904317524168458e-06,
"loss": 0.4114934206008911,
"step": 4595
},
{
"epoch": 2.2278235579253516,
"grad_norm": 11.01918339857373,
"learning_rate": 1.8882238659178958e-06,
"loss": 1.4223182201385498,
"step": 4596
},
{
"epoch": 2.228308288899661,
"grad_norm": 10.860484123366694,
"learning_rate": 1.8860169693334534e-06,
"loss": 0.7256608009338379,
"step": 4597
},
{
"epoch": 2.22879301987397,
"grad_norm": 12.913394702463707,
"learning_rate": 1.8838110633655738e-06,
"loss": 0.8076510429382324,
"step": 4598
},
{
"epoch": 2.2292777508482793,
"grad_norm": 7.4805886987450645,
"learning_rate": 1.8816061487159965e-06,
"loss": 0.3579220175743103,
"step": 4599
},
{
"epoch": 2.2297624818225885,
"grad_norm": 13.113091927645762,
"learning_rate": 1.8794022260861483e-06,
"loss": 0.35654309391975403,
"step": 4600
},
{
"epoch": 2.2302472127968977,
"grad_norm": 11.262204143039597,
"learning_rate": 1.8771992961771379e-06,
"loss": 0.3982086777687073,
"step": 4601
},
{
"epoch": 2.230731943771207,
"grad_norm": 10.840851480835745,
"learning_rate": 1.874997359689758e-06,
"loss": 0.3499617576599121,
"step": 4602
},
{
"epoch": 2.231216674745516,
"grad_norm": 16.811374235685385,
"learning_rate": 1.8727964173244883e-06,
"loss": 0.3501591980457306,
"step": 4603
},
{
"epoch": 2.2317014057198254,
"grad_norm": 9.161772764304708,
"learning_rate": 1.8705964697814848e-06,
"loss": 0.3104599118232727,
"step": 4604
},
{
"epoch": 2.2321861366941347,
"grad_norm": 42.28188326848377,
"learning_rate": 1.8683975177605968e-06,
"loss": 3.1205191612243652,
"step": 4605
},
{
"epoch": 2.232670867668444,
"grad_norm": 11.288228297122025,
"learning_rate": 1.8661995619613504e-06,
"loss": 0.4823826551437378,
"step": 4606
},
{
"epoch": 2.233155598642753,
"grad_norm": 10.730067602937982,
"learning_rate": 1.8640026030829579e-06,
"loss": 0.6579826474189758,
"step": 4607
},
{
"epoch": 2.2336403296170624,
"grad_norm": 13.005972380695598,
"learning_rate": 1.8618066418243115e-06,
"loss": 0.5832761526107788,
"step": 4608
},
{
"epoch": 2.2341250605913716,
"grad_norm": 10.02928529365069,
"learning_rate": 1.8596116788839875e-06,
"loss": 0.6775950789451599,
"step": 4609
},
{
"epoch": 2.234609791565681,
"grad_norm": 10.427387001297854,
"learning_rate": 1.8574177149602496e-06,
"loss": 0.7154476046562195,
"step": 4610
},
{
"epoch": 2.23509452253999,
"grad_norm": 14.02628976318423,
"learning_rate": 1.8552247507510334e-06,
"loss": 0.40675288438796997,
"step": 4611
},
{
"epoch": 2.2355792535142998,
"grad_norm": 8.554782016766799,
"learning_rate": 1.853032786953965e-06,
"loss": 0.5805850028991699,
"step": 4612
},
{
"epoch": 2.236063984488609,
"grad_norm": 16.161303425008228,
"learning_rate": 1.8508418242663457e-06,
"loss": 0.9404526352882385,
"step": 4613
},
{
"epoch": 2.2365487154629182,
"grad_norm": 9.361645508919654,
"learning_rate": 1.8486518633851668e-06,
"loss": 0.2716456353664398,
"step": 4614
},
{
"epoch": 2.2370334464372275,
"grad_norm": 17.48699558561912,
"learning_rate": 1.8464629050070941e-06,
"loss": 0.5887691378593445,
"step": 4615
},
{
"epoch": 2.2375181774115367,
"grad_norm": 9.617515619699493,
"learning_rate": 1.8442749498284763e-06,
"loss": 0.6246103048324585,
"step": 4616
},
{
"epoch": 2.238002908385846,
"grad_norm": 8.524345886430627,
"learning_rate": 1.8420879985453427e-06,
"loss": 0.2316717952489853,
"step": 4617
},
{
"epoch": 2.238487639360155,
"grad_norm": 8.431650836189919,
"learning_rate": 1.8399020518534038e-06,
"loss": 1.0385822057724,
"step": 4618
},
{
"epoch": 2.2389723703344644,
"grad_norm": 11.577942042871227,
"learning_rate": 1.8377171104480512e-06,
"loss": 0.49385517835617065,
"step": 4619
},
{
"epoch": 2.2394571013087736,
"grad_norm": 11.290971347161369,
"learning_rate": 1.835533175024355e-06,
"loss": 0.4828706383705139,
"step": 4620
},
{
"epoch": 2.239941832283083,
"grad_norm": 8.749735536565739,
"learning_rate": 1.8333502462770658e-06,
"loss": 0.3436130881309509,
"step": 4621
},
{
"epoch": 2.240426563257392,
"grad_norm": 8.147090231431454,
"learning_rate": 1.8311683249006152e-06,
"loss": 0.5824630260467529,
"step": 4622
},
{
"epoch": 2.2409112942317013,
"grad_norm": 11.48922754535417,
"learning_rate": 1.828987411589111e-06,
"loss": 0.6679857969284058,
"step": 4623
},
{
"epoch": 2.2413960252060106,
"grad_norm": 7.47289755570465,
"learning_rate": 1.8268075070363467e-06,
"loss": 0.4467092752456665,
"step": 4624
},
{
"epoch": 2.24188075618032,
"grad_norm": 8.585167079880776,
"learning_rate": 1.8246286119357903e-06,
"loss": 0.37717118859291077,
"step": 4625
},
{
"epoch": 2.242365487154629,
"grad_norm": 8.01190617732545,
"learning_rate": 1.8224507269805869e-06,
"loss": 0.45324471592903137,
"step": 4626
},
{
"epoch": 2.2428502181289383,
"grad_norm": 5.593594293875785,
"learning_rate": 1.8202738528635617e-06,
"loss": 0.26558202505111694,
"step": 4627
},
{
"epoch": 2.2433349491032475,
"grad_norm": 14.142141576640427,
"learning_rate": 1.8180979902772228e-06,
"loss": 0.5024372339248657,
"step": 4628
},
{
"epoch": 2.243819680077557,
"grad_norm": 10.589485536156694,
"learning_rate": 1.8159231399137523e-06,
"loss": 0.34280821681022644,
"step": 4629
},
{
"epoch": 2.2443044110518664,
"grad_norm": 15.049985296858182,
"learning_rate": 1.8137493024650094e-06,
"loss": 0.500849187374115,
"step": 4630
},
{
"epoch": 2.2447891420261756,
"grad_norm": 7.149167471520175,
"learning_rate": 1.8115764786225342e-06,
"loss": 0.4156193137168884,
"step": 4631
},
{
"epoch": 2.245273873000485,
"grad_norm": 8.558485465380459,
"learning_rate": 1.809404669077542e-06,
"loss": 0.7242496609687805,
"step": 4632
},
{
"epoch": 2.245758603974794,
"grad_norm": 14.887653151850065,
"learning_rate": 1.8072338745209262e-06,
"loss": 0.6469169855117798,
"step": 4633
},
{
"epoch": 2.2462433349491033,
"grad_norm": 10.861733826076602,
"learning_rate": 1.8050640956432575e-06,
"loss": 0.8038536310195923,
"step": 4634
},
{
"epoch": 2.2467280659234126,
"grad_norm": 8.938661451243428,
"learning_rate": 1.802895333134783e-06,
"loss": 0.17267125844955444,
"step": 4635
},
{
"epoch": 2.247212796897722,
"grad_norm": 11.680376513938905,
"learning_rate": 1.800727587685428e-06,
"loss": 0.4807482361793518,
"step": 4636
},
{
"epoch": 2.247697527872031,
"grad_norm": 14.815056694758749,
"learning_rate": 1.7985608599847908e-06,
"loss": 0.6592692732810974,
"step": 4637
},
{
"epoch": 2.2481822588463403,
"grad_norm": 9.250724741017715,
"learning_rate": 1.7963951507221511e-06,
"loss": 0.3501704931259155,
"step": 4638
},
{
"epoch": 2.2486669898206495,
"grad_norm": 12.500838311820724,
"learning_rate": 1.7942304605864607e-06,
"loss": 1.098838448524475,
"step": 4639
},
{
"epoch": 2.2491517207949587,
"grad_norm": 14.826853122487854,
"learning_rate": 1.792066790266348e-06,
"loss": 0.48755836486816406,
"step": 4640
},
{
"epoch": 2.249636451769268,
"grad_norm": 11.45506653961591,
"learning_rate": 1.7899041404501167e-06,
"loss": 0.25951868295669556,
"step": 4641
},
{
"epoch": 2.250121182743577,
"grad_norm": 12.850611563209972,
"learning_rate": 1.7877425118257474e-06,
"loss": 0.35064202547073364,
"step": 4642
},
{
"epoch": 2.2506059137178864,
"grad_norm": 12.445834251366058,
"learning_rate": 1.7855819050808942e-06,
"loss": 0.2929108142852783,
"step": 4643
},
{
"epoch": 2.2510906446921957,
"grad_norm": 11.819010030575575,
"learning_rate": 1.7834223209028867e-06,
"loss": 0.9599419236183167,
"step": 4644
},
{
"epoch": 2.251575375666505,
"grad_norm": 8.472148376086416,
"learning_rate": 1.7812637599787298e-06,
"loss": 0.30757856369018555,
"step": 4645
},
{
"epoch": 2.2520601066408146,
"grad_norm": 33.749300675204616,
"learning_rate": 1.7791062229950996e-06,
"loss": 1.0506646633148193,
"step": 4646
},
{
"epoch": 2.2525448376151234,
"grad_norm": 11.00864011678647,
"learning_rate": 1.7769497106383537e-06,
"loss": 0.46911728382110596,
"step": 4647
},
{
"epoch": 2.253029568589433,
"grad_norm": 12.909781680341156,
"learning_rate": 1.774794223594518e-06,
"loss": 0.4539117217063904,
"step": 4648
},
{
"epoch": 2.2535142995637423,
"grad_norm": 6.919456630262986,
"learning_rate": 1.7726397625492913e-06,
"loss": 0.5377547144889832,
"step": 4649
},
{
"epoch": 2.2539990305380515,
"grad_norm": 8.614631032450925,
"learning_rate": 1.7704863281880496e-06,
"loss": 0.800126314163208,
"step": 4650
},
{
"epoch": 2.2544837615123607,
"grad_norm": 11.257651779390942,
"learning_rate": 1.7683339211958389e-06,
"loss": 0.6491338610649109,
"step": 4651
},
{
"epoch": 2.25496849248667,
"grad_norm": 16.278199518021367,
"learning_rate": 1.7661825422573836e-06,
"loss": 0.43776237964630127,
"step": 4652
},
{
"epoch": 2.255453223460979,
"grad_norm": 10.205034350162883,
"learning_rate": 1.7640321920570763e-06,
"loss": 0.3443514108657837,
"step": 4653
},
{
"epoch": 2.2559379544352884,
"grad_norm": 12.42423721706128,
"learning_rate": 1.7618828712789837e-06,
"loss": 0.23737701773643494,
"step": 4654
},
{
"epoch": 2.2564226854095977,
"grad_norm": 9.354779510645985,
"learning_rate": 1.759734580606845e-06,
"loss": 0.3827439546585083,
"step": 4655
},
{
"epoch": 2.256907416383907,
"grad_norm": 11.413661453248892,
"learning_rate": 1.7575873207240729e-06,
"loss": 0.5798490047454834,
"step": 4656
},
{
"epoch": 2.257392147358216,
"grad_norm": 12.100370762616384,
"learning_rate": 1.7554410923137498e-06,
"loss": 0.5191994905471802,
"step": 4657
},
{
"epoch": 2.2578768783325254,
"grad_norm": 13.356762746779127,
"learning_rate": 1.7532958960586322e-06,
"loss": 0.3188408315181732,
"step": 4658
},
{
"epoch": 2.2583616093068346,
"grad_norm": 13.391498952652066,
"learning_rate": 1.751151732641147e-06,
"loss": 0.3756614923477173,
"step": 4659
},
{
"epoch": 2.258846340281144,
"grad_norm": 10.06757717610637,
"learning_rate": 1.749008602743391e-06,
"loss": 0.32622140645980835,
"step": 4660
},
{
"epoch": 2.259331071255453,
"grad_norm": 15.383238569007114,
"learning_rate": 1.746866507047138e-06,
"loss": 0.31360748410224915,
"step": 4661
},
{
"epoch": 2.2598158022297623,
"grad_norm": 13.369087738080543,
"learning_rate": 1.744725446233827e-06,
"loss": 0.7224504947662354,
"step": 4662
},
{
"epoch": 2.260300533204072,
"grad_norm": 12.121350963801783,
"learning_rate": 1.7425854209845717e-06,
"loss": 0.8923028707504272,
"step": 4663
},
{
"epoch": 2.2607852641783808,
"grad_norm": 9.851984248461694,
"learning_rate": 1.7404464319801506e-06,
"loss": 0.6620737314224243,
"step": 4664
},
{
"epoch": 2.2612699951526904,
"grad_norm": 13.141329555047934,
"learning_rate": 1.7383084799010164e-06,
"loss": 0.4140910506248474,
"step": 4665
},
{
"epoch": 2.2617547261269997,
"grad_norm": 7.628450304570471,
"learning_rate": 1.7361715654272954e-06,
"loss": 0.34744542837142944,
"step": 4666
},
{
"epoch": 2.262239457101309,
"grad_norm": 8.468696334732446,
"learning_rate": 1.7340356892387784e-06,
"loss": 0.5147067308425903,
"step": 4667
},
{
"epoch": 2.262724188075618,
"grad_norm": 11.111124342968143,
"learning_rate": 1.7319008520149277e-06,
"loss": 0.5480530261993408,
"step": 4668
},
{
"epoch": 2.2632089190499274,
"grad_norm": 9.557761870204535,
"learning_rate": 1.7297670544348738e-06,
"loss": 0.2831796407699585,
"step": 4669
},
{
"epoch": 2.2636936500242366,
"grad_norm": 11.111358162027969,
"learning_rate": 1.7276342971774225e-06,
"loss": 0.5627727508544922,
"step": 4670
},
{
"epoch": 2.264178380998546,
"grad_norm": 12.883971695680245,
"learning_rate": 1.7255025809210396e-06,
"loss": 0.6674090027809143,
"step": 4671
},
{
"epoch": 2.264663111972855,
"grad_norm": 8.292885643552907,
"learning_rate": 1.7233719063438653e-06,
"loss": 0.5416049361228943,
"step": 4672
},
{
"epoch": 2.2651478429471643,
"grad_norm": 14.741388055878875,
"learning_rate": 1.7212422741237072e-06,
"loss": 0.6208717226982117,
"step": 4673
},
{
"epoch": 2.2656325739214735,
"grad_norm": 25.708718460147832,
"learning_rate": 1.7191136849380397e-06,
"loss": 0.6459677219390869,
"step": 4674
},
{
"epoch": 2.266117304895783,
"grad_norm": 7.843910047599252,
"learning_rate": 1.7169861394640108e-06,
"loss": 0.24167883396148682,
"step": 4675
},
{
"epoch": 2.266602035870092,
"grad_norm": 16.190867088864685,
"learning_rate": 1.714859638378431e-06,
"loss": 0.6070338487625122,
"step": 4676
},
{
"epoch": 2.2670867668444012,
"grad_norm": 12.103393176662895,
"learning_rate": 1.7127341823577798e-06,
"loss": 0.416581392288208,
"step": 4677
},
{
"epoch": 2.2675714978187105,
"grad_norm": 6.945028061140182,
"learning_rate": 1.710609772078205e-06,
"loss": 0.3110671937465668,
"step": 4678
},
{
"epoch": 2.2680562287930197,
"grad_norm": 12.569124093410558,
"learning_rate": 1.708486408215521e-06,
"loss": 0.48729097843170166,
"step": 4679
},
{
"epoch": 2.268540959767329,
"grad_norm": 11.8253474740488,
"learning_rate": 1.7063640914452113e-06,
"loss": 0.6895624399185181,
"step": 4680
},
{
"epoch": 2.269025690741638,
"grad_norm": 12.90484854819931,
"learning_rate": 1.704242822442423e-06,
"loss": 0.530284583568573,
"step": 4681
},
{
"epoch": 2.269510421715948,
"grad_norm": 6.932409870603111,
"learning_rate": 1.7021226018819725e-06,
"loss": 0.4846137762069702,
"step": 4682
},
{
"epoch": 2.269995152690257,
"grad_norm": 22.22510998916613,
"learning_rate": 1.7000034304383416e-06,
"loss": 1.1913726329803467,
"step": 4683
},
{
"epoch": 2.2704798836645663,
"grad_norm": 15.184358820078115,
"learning_rate": 1.6978853087856772e-06,
"loss": 0.5274566411972046,
"step": 4684
},
{
"epoch": 2.2709646146388756,
"grad_norm": 13.251775034089183,
"learning_rate": 1.6957682375977986e-06,
"loss": 0.37450575828552246,
"step": 4685
},
{
"epoch": 2.271449345613185,
"grad_norm": 13.451412872977004,
"learning_rate": 1.69365221754818e-06,
"loss": 0.562325119972229,
"step": 4686
},
{
"epoch": 2.271934076587494,
"grad_norm": 9.951838387066973,
"learning_rate": 1.6915372493099708e-06,
"loss": 0.28912708163261414,
"step": 4687
},
{
"epoch": 2.2724188075618033,
"grad_norm": 9.496367176007551,
"learning_rate": 1.6894233335559784e-06,
"loss": 0.6734484434127808,
"step": 4688
},
{
"epoch": 2.2729035385361125,
"grad_norm": 14.349885407119938,
"learning_rate": 1.687310470958684e-06,
"loss": 0.3323378264904022,
"step": 4689
},
{
"epoch": 2.2733882695104217,
"grad_norm": 16.752975936558776,
"learning_rate": 1.6851986621902267e-06,
"loss": 1.6050649881362915,
"step": 4690
},
{
"epoch": 2.273873000484731,
"grad_norm": 10.827543378128063,
"learning_rate": 1.6830879079224126e-06,
"loss": 0.280720055103302,
"step": 4691
},
{
"epoch": 2.27435773145904,
"grad_norm": 8.73873660578722,
"learning_rate": 1.6809782088267124e-06,
"loss": 0.3353247046470642,
"step": 4692
},
{
"epoch": 2.2748424624333494,
"grad_norm": 13.853929652542442,
"learning_rate": 1.6788695655742603e-06,
"loss": 0.6708588004112244,
"step": 4693
},
{
"epoch": 2.2753271934076587,
"grad_norm": 12.902424943257387,
"learning_rate": 1.6767619788358569e-06,
"loss": 0.521849513053894,
"step": 4694
},
{
"epoch": 2.275811924381968,
"grad_norm": 8.857755246732246,
"learning_rate": 1.6746554492819638e-06,
"loss": 0.5044288635253906,
"step": 4695
},
{
"epoch": 2.276296655356277,
"grad_norm": 17.814378128045945,
"learning_rate": 1.6725499775827087e-06,
"loss": 0.6211201548576355,
"step": 4696
},
{
"epoch": 2.2767813863305864,
"grad_norm": 11.967133135452707,
"learning_rate": 1.670445564407881e-06,
"loss": 0.6961284279823303,
"step": 4697
},
{
"epoch": 2.2772661173048956,
"grad_norm": 11.989045192412252,
"learning_rate": 1.6683422104269325e-06,
"loss": 0.6855754852294922,
"step": 4698
},
{
"epoch": 2.2777508482792053,
"grad_norm": 12.92879542328012,
"learning_rate": 1.6662399163089832e-06,
"loss": 0.5760030150413513,
"step": 4699
},
{
"epoch": 2.278235579253514,
"grad_norm": 9.502650088091857,
"learning_rate": 1.6641386827228107e-06,
"loss": 0.38143736124038696,
"step": 4700
},
{
"epoch": 2.2787203102278237,
"grad_norm": 12.441491212825241,
"learning_rate": 1.6620385103368585e-06,
"loss": 0.26557666063308716,
"step": 4701
},
{
"epoch": 2.279205041202133,
"grad_norm": 11.766899397818994,
"learning_rate": 1.6599393998192264e-06,
"loss": 0.9593709111213684,
"step": 4702
},
{
"epoch": 2.279689772176442,
"grad_norm": 10.322777553638572,
"learning_rate": 1.657841351837685e-06,
"loss": 0.4112951457500458,
"step": 4703
},
{
"epoch": 2.2801745031507514,
"grad_norm": 9.89754230220589,
"learning_rate": 1.6557443670596624e-06,
"loss": 0.7026643753051758,
"step": 4704
},
{
"epoch": 2.2806592341250607,
"grad_norm": 13.04358884065612,
"learning_rate": 1.653648446152248e-06,
"loss": 0.5734392404556274,
"step": 4705
},
{
"epoch": 2.28114396509937,
"grad_norm": 10.908721297622622,
"learning_rate": 1.6515535897821933e-06,
"loss": 0.7834110260009766,
"step": 4706
},
{
"epoch": 2.281628696073679,
"grad_norm": 10.671563895086875,
"learning_rate": 1.6494597986159123e-06,
"loss": 0.4882291853427887,
"step": 4707
},
{
"epoch": 2.2821134270479884,
"grad_norm": 10.129675370132528,
"learning_rate": 1.6473670733194796e-06,
"loss": 0.3186482787132263,
"step": 4708
},
{
"epoch": 2.2825981580222976,
"grad_norm": 7.676066486110088,
"learning_rate": 1.6452754145586291e-06,
"loss": 0.5818383693695068,
"step": 4709
},
{
"epoch": 2.283082888996607,
"grad_norm": 13.339899840827556,
"learning_rate": 1.6431848229987586e-06,
"loss": 0.26431381702423096,
"step": 4710
},
{
"epoch": 2.283567619970916,
"grad_norm": 12.905166602520284,
"learning_rate": 1.6410952993049228e-06,
"loss": 0.5089691877365112,
"step": 4711
},
{
"epoch": 2.2840523509452253,
"grad_norm": 11.584702438158171,
"learning_rate": 1.6390068441418384e-06,
"loss": 0.6573249697685242,
"step": 4712
},
{
"epoch": 2.2845370819195345,
"grad_norm": 11.044576332226645,
"learning_rate": 1.6369194581738844e-06,
"loss": 0.4264582693576813,
"step": 4713
},
{
"epoch": 2.2850218128938438,
"grad_norm": 10.730983574317086,
"learning_rate": 1.6348331420650965e-06,
"loss": 0.4232235252857208,
"step": 4714
},
{
"epoch": 2.285506543868153,
"grad_norm": 10.299426609346803,
"learning_rate": 1.6327478964791705e-06,
"loss": 0.5605621933937073,
"step": 4715
},
{
"epoch": 2.2859912748424627,
"grad_norm": 12.028685134819316,
"learning_rate": 1.630663722079463e-06,
"loss": 0.4119406044483185,
"step": 4716
},
{
"epoch": 2.2864760058167715,
"grad_norm": 7.93723524745098,
"learning_rate": 1.6285806195289889e-06,
"loss": 0.2865312695503235,
"step": 4717
},
{
"epoch": 2.286960736791081,
"grad_norm": 9.30681191522316,
"learning_rate": 1.6264985894904218e-06,
"loss": 0.36626124382019043,
"step": 4718
},
{
"epoch": 2.2874454677653904,
"grad_norm": 6.728555158558388,
"learning_rate": 1.6244176326260953e-06,
"loss": 0.44384366273880005,
"step": 4719
},
{
"epoch": 2.2879301987396996,
"grad_norm": 11.55026231377018,
"learning_rate": 1.6223377495980003e-06,
"loss": 0.5540550351142883,
"step": 4720
},
{
"epoch": 2.288414929714009,
"grad_norm": 9.702267970550132,
"learning_rate": 1.6202589410677849e-06,
"loss": 0.16705243289470673,
"step": 4721
},
{
"epoch": 2.288899660688318,
"grad_norm": 11.059888116761021,
"learning_rate": 1.6181812076967618e-06,
"loss": 0.31071552634239197,
"step": 4722
},
{
"epoch": 2.2893843916626273,
"grad_norm": 8.675021658654963,
"learning_rate": 1.6161045501458956e-06,
"loss": 0.548267662525177,
"step": 4723
},
{
"epoch": 2.2898691226369365,
"grad_norm": 8.40452606314197,
"learning_rate": 1.614028969075807e-06,
"loss": 0.6951320171356201,
"step": 4724
},
{
"epoch": 2.2903538536112458,
"grad_norm": 6.8244351361940865,
"learning_rate": 1.61195446514678e-06,
"loss": 0.19842781126499176,
"step": 4725
},
{
"epoch": 2.290838584585555,
"grad_norm": 13.123021943981938,
"learning_rate": 1.6098810390187507e-06,
"loss": 0.3566179871559143,
"step": 4726
},
{
"epoch": 2.2913233155598642,
"grad_norm": 17.53708072602008,
"learning_rate": 1.6078086913513185e-06,
"loss": 0.5727189779281616,
"step": 4727
},
{
"epoch": 2.2918080465341735,
"grad_norm": 8.744565586696119,
"learning_rate": 1.605737422803735e-06,
"loss": 0.5133573412895203,
"step": 4728
},
{
"epoch": 2.2922927775084827,
"grad_norm": 10.827983767796022,
"learning_rate": 1.6036672340349091e-06,
"loss": 0.5527103543281555,
"step": 4729
},
{
"epoch": 2.292777508482792,
"grad_norm": 13.731199686304999,
"learning_rate": 1.601598125703407e-06,
"loss": 0.6277362108230591,
"step": 4730
},
{
"epoch": 2.293262239457101,
"grad_norm": 9.201417514871281,
"learning_rate": 1.5995300984674506e-06,
"loss": 0.48913484811782837,
"step": 4731
},
{
"epoch": 2.2937469704314104,
"grad_norm": 11.65560894255174,
"learning_rate": 1.5974631529849188e-06,
"loss": 0.7578060030937195,
"step": 4732
},
{
"epoch": 2.2942317014057196,
"grad_norm": 12.674538877599316,
"learning_rate": 1.5953972899133457e-06,
"loss": 1.1667486429214478,
"step": 4733
},
{
"epoch": 2.294716432380029,
"grad_norm": 10.68013352189254,
"learning_rate": 1.5933325099099213e-06,
"loss": 0.59281325340271,
"step": 4734
},
{
"epoch": 2.2952011633543385,
"grad_norm": 11.356945914212647,
"learning_rate": 1.5912688136314886e-06,
"loss": 0.36960625648498535,
"step": 4735
},
{
"epoch": 2.295685894328648,
"grad_norm": 10.393289892415533,
"learning_rate": 1.5892062017345522e-06,
"loss": 0.6202839612960815,
"step": 4736
},
{
"epoch": 2.296170625302957,
"grad_norm": 10.827064846061637,
"learning_rate": 1.5871446748752661e-06,
"loss": 0.5743878483772278,
"step": 4737
},
{
"epoch": 2.2966553562772662,
"grad_norm": 11.242606130838416,
"learning_rate": 1.585084233709442e-06,
"loss": 0.3823305666446686,
"step": 4738
},
{
"epoch": 2.2971400872515755,
"grad_norm": 9.706137139684065,
"learning_rate": 1.5830248788925423e-06,
"loss": 0.4335842430591583,
"step": 4739
},
{
"epoch": 2.2976248182258847,
"grad_norm": 8.619917525487615,
"learning_rate": 1.5809666110796856e-06,
"loss": 0.19183361530303955,
"step": 4740
},
{
"epoch": 2.298109549200194,
"grad_norm": 12.936713920794906,
"learning_rate": 1.5789094309256492e-06,
"loss": 0.39670076966285706,
"step": 4741
},
{
"epoch": 2.298594280174503,
"grad_norm": 8.96959454076689,
"learning_rate": 1.5768533390848601e-06,
"loss": 0.7568356394767761,
"step": 4742
},
{
"epoch": 2.2990790111488124,
"grad_norm": 12.74486540865625,
"learning_rate": 1.5747983362113983e-06,
"loss": 0.4762938916683197,
"step": 4743
},
{
"epoch": 2.2995637421231216,
"grad_norm": 6.906960601695363,
"learning_rate": 1.5727444229589984e-06,
"loss": 0.5296663045883179,
"step": 4744
},
{
"epoch": 2.300048473097431,
"grad_norm": 15.047196940721705,
"learning_rate": 1.570691599981053e-06,
"loss": 0.4027053415775299,
"step": 4745
},
{
"epoch": 2.30053320407174,
"grad_norm": 12.096781387577995,
"learning_rate": 1.5686398679305987e-06,
"loss": 0.5552543997764587,
"step": 4746
},
{
"epoch": 2.3010179350460493,
"grad_norm": 7.334721068444492,
"learning_rate": 1.5665892274603323e-06,
"loss": 0.33285489678382874,
"step": 4747
},
{
"epoch": 2.3015026660203586,
"grad_norm": 12.382452605454269,
"learning_rate": 1.564539679222601e-06,
"loss": 0.3641362190246582,
"step": 4748
},
{
"epoch": 2.301987396994668,
"grad_norm": 11.194000453691954,
"learning_rate": 1.562491223869403e-06,
"loss": 1.1205974817276,
"step": 4749
},
{
"epoch": 2.302472127968977,
"grad_norm": 10.476209610817492,
"learning_rate": 1.5604438620523932e-06,
"loss": 0.20259451866149902,
"step": 4750
},
{
"epoch": 2.3029568589432863,
"grad_norm": 10.560045414423453,
"learning_rate": 1.5583975944228746e-06,
"loss": 0.41386958956718445,
"step": 4751
},
{
"epoch": 2.303441589917596,
"grad_norm": 17.163695418648324,
"learning_rate": 1.5563524216318037e-06,
"loss": 0.48706451058387756,
"step": 4752
},
{
"epoch": 2.3039263208919047,
"grad_norm": 14.652461906526225,
"learning_rate": 1.554308344329788e-06,
"loss": 1.0375773906707764,
"step": 4753
},
{
"epoch": 2.3044110518662144,
"grad_norm": 15.510578165041956,
"learning_rate": 1.5522653631670876e-06,
"loss": 0.6584975123405457,
"step": 4754
},
{
"epoch": 2.3048957828405237,
"grad_norm": 23.548215203534408,
"learning_rate": 1.550223478793612e-06,
"loss": 0.7408478260040283,
"step": 4755
},
{
"epoch": 2.305380513814833,
"grad_norm": 11.496938252889814,
"learning_rate": 1.5481826918589243e-06,
"loss": 0.7197750210762024,
"step": 4756
},
{
"epoch": 2.305865244789142,
"grad_norm": 15.581540159899717,
"learning_rate": 1.5461430030122366e-06,
"loss": 0.5868872404098511,
"step": 4757
},
{
"epoch": 2.3063499757634514,
"grad_norm": 11.9821903292849,
"learning_rate": 1.5441044129024108e-06,
"loss": 0.5717617273330688,
"step": 4758
},
{
"epoch": 2.3068347067377606,
"grad_norm": 10.897575151330788,
"learning_rate": 1.5420669221779632e-06,
"loss": 0.39358535408973694,
"step": 4759
},
{
"epoch": 2.30731943771207,
"grad_norm": 11.290361647617312,
"learning_rate": 1.5400305314870596e-06,
"loss": 0.5631994605064392,
"step": 4760
},
{
"epoch": 2.307804168686379,
"grad_norm": 7.789788910917565,
"learning_rate": 1.537995241477509e-06,
"loss": 0.41580358147621155,
"step": 4761
},
{
"epoch": 2.3082888996606883,
"grad_norm": 11.852338997478533,
"learning_rate": 1.5359610527967789e-06,
"loss": 0.37899643182754517,
"step": 4762
},
{
"epoch": 2.3087736306349975,
"grad_norm": 12.38075257590185,
"learning_rate": 1.5339279660919798e-06,
"loss": 0.5863749980926514,
"step": 4763
},
{
"epoch": 2.3092583616093068,
"grad_norm": 17.798178936641207,
"learning_rate": 1.5318959820098789e-06,
"loss": 0.36048153042793274,
"step": 4764
},
{
"epoch": 2.309743092583616,
"grad_norm": 8.166215607420773,
"learning_rate": 1.5298651011968868e-06,
"loss": 0.41943392157554626,
"step": 4765
},
{
"epoch": 2.310227823557925,
"grad_norm": 8.528515822549103,
"learning_rate": 1.5278353242990651e-06,
"loss": 0.34761688113212585,
"step": 4766
},
{
"epoch": 2.3107125545322345,
"grad_norm": 11.401988423420649,
"learning_rate": 1.5258066519621235e-06,
"loss": 0.6383098363876343,
"step": 4767
},
{
"epoch": 2.3111972855065437,
"grad_norm": 11.073464498851402,
"learning_rate": 1.523779084831421e-06,
"loss": 0.9231284856796265,
"step": 4768
},
{
"epoch": 2.3116820164808534,
"grad_norm": 15.234394161790817,
"learning_rate": 1.5217526235519647e-06,
"loss": 0.5795779824256897,
"step": 4769
},
{
"epoch": 2.312166747455162,
"grad_norm": 11.113315561178727,
"learning_rate": 1.5197272687684106e-06,
"loss": 0.4709451198577881,
"step": 4770
},
{
"epoch": 2.312651478429472,
"grad_norm": 12.052453181878882,
"learning_rate": 1.5177030211250616e-06,
"loss": 0.42030900716781616,
"step": 4771
},
{
"epoch": 2.313136209403781,
"grad_norm": 10.400762956462668,
"learning_rate": 1.5156798812658679e-06,
"loss": 0.44610920548439026,
"step": 4772
},
{
"epoch": 2.3136209403780903,
"grad_norm": 11.622287222506957,
"learning_rate": 1.5136578498344307e-06,
"loss": 0.5084064602851868,
"step": 4773
},
{
"epoch": 2.3141056713523995,
"grad_norm": 10.473419645695428,
"learning_rate": 1.511636927473995e-06,
"loss": 0.7020914554595947,
"step": 4774
},
{
"epoch": 2.3145904023267088,
"grad_norm": 13.664591226438656,
"learning_rate": 1.5096171148274546e-06,
"loss": 0.45969802141189575,
"step": 4775
},
{
"epoch": 2.315075133301018,
"grad_norm": 8.826294880448643,
"learning_rate": 1.507598412537351e-06,
"loss": 0.504812479019165,
"step": 4776
},
{
"epoch": 2.3155598642753272,
"grad_norm": 21.95127691569799,
"learning_rate": 1.5055808212458666e-06,
"loss": 1.9691369533538818,
"step": 4777
},
{
"epoch": 2.3160445952496365,
"grad_norm": 13.401206506201902,
"learning_rate": 1.5035643415948403e-06,
"loss": 0.5359756946563721,
"step": 4778
},
{
"epoch": 2.3165293262239457,
"grad_norm": 15.759263564048233,
"learning_rate": 1.5015489742257505e-06,
"loss": 0.507117748260498,
"step": 4779
},
{
"epoch": 2.317014057198255,
"grad_norm": 13.120126752787863,
"learning_rate": 1.4995347197797227e-06,
"loss": 0.34706830978393555,
"step": 4780
},
{
"epoch": 2.317498788172564,
"grad_norm": 11.00902574461628,
"learning_rate": 1.4975215788975305e-06,
"loss": 0.8201497793197632,
"step": 4781
},
{
"epoch": 2.3179835191468734,
"grad_norm": 11.131078803931882,
"learning_rate": 1.4955095522195906e-06,
"loss": 0.5792070031166077,
"step": 4782
},
{
"epoch": 2.3184682501211826,
"grad_norm": 8.187232967394264,
"learning_rate": 1.4934986403859674e-06,
"loss": 0.619109034538269,
"step": 4783
},
{
"epoch": 2.318952981095492,
"grad_norm": 9.02476895411913,
"learning_rate": 1.4914888440363689e-06,
"loss": 0.21734917163848877,
"step": 4784
},
{
"epoch": 2.319437712069801,
"grad_norm": 15.96503707466455,
"learning_rate": 1.4894801638101502e-06,
"loss": 0.6344023942947388,
"step": 4785
},
{
"epoch": 2.3199224430441103,
"grad_norm": 10.515891131924178,
"learning_rate": 1.487472600346308e-06,
"loss": 0.4414065182209015,
"step": 4786
},
{
"epoch": 2.3204071740184196,
"grad_norm": 13.728218623247532,
"learning_rate": 1.4854661542834893e-06,
"loss": 0.46271398663520813,
"step": 4787
},
{
"epoch": 2.3208919049927292,
"grad_norm": 15.888370864728554,
"learning_rate": 1.4834608262599803e-06,
"loss": 0.22325487434864044,
"step": 4788
},
{
"epoch": 2.3213766359670385,
"grad_norm": 15.272265620031645,
"learning_rate": 1.481456616913714e-06,
"loss": 0.5717293620109558,
"step": 4789
},
{
"epoch": 2.3218613669413477,
"grad_norm": 10.189438685502454,
"learning_rate": 1.4794535268822674e-06,
"loss": 0.8590483069419861,
"step": 4790
},
{
"epoch": 2.322346097915657,
"grad_norm": 10.887142555915606,
"learning_rate": 1.4774515568028602e-06,
"loss": 0.5057954788208008,
"step": 4791
},
{
"epoch": 2.322830828889966,
"grad_norm": 7.647924327792923,
"learning_rate": 1.4754507073123581e-06,
"loss": 0.2868471145629883,
"step": 4792
},
{
"epoch": 2.3233155598642754,
"grad_norm": 10.110181856994394,
"learning_rate": 1.4734509790472673e-06,
"loss": 0.2772441804409027,
"step": 4793
},
{
"epoch": 2.3238002908385846,
"grad_norm": 8.409373230505619,
"learning_rate": 1.4714523726437402e-06,
"loss": 0.27258196473121643,
"step": 4794
},
{
"epoch": 2.324285021812894,
"grad_norm": 11.638636817567601,
"learning_rate": 1.469454888737571e-06,
"loss": 0.24060478806495667,
"step": 4795
},
{
"epoch": 2.324769752787203,
"grad_norm": 8.657284206641846,
"learning_rate": 1.4674585279641945e-06,
"loss": 0.45457327365875244,
"step": 4796
},
{
"epoch": 2.3252544837615123,
"grad_norm": 8.853178164719969,
"learning_rate": 1.4654632909586941e-06,
"loss": 0.40283116698265076,
"step": 4797
},
{
"epoch": 2.3257392147358216,
"grad_norm": 11.493598373070938,
"learning_rate": 1.463469178355793e-06,
"loss": 0.4587151110172272,
"step": 4798
},
{
"epoch": 2.326223945710131,
"grad_norm": 15.282637772174555,
"learning_rate": 1.4614761907898518e-06,
"loss": 0.23980700969696045,
"step": 4799
},
{
"epoch": 2.32670867668444,
"grad_norm": 15.210565351793354,
"learning_rate": 1.4594843288948773e-06,
"loss": 0.7285716533660889,
"step": 4800
},
{
"epoch": 2.3271934076587493,
"grad_norm": 7.048493154414109,
"learning_rate": 1.4574935933045225e-06,
"loss": 0.2569322884082794,
"step": 4801
},
{
"epoch": 2.3276781386330585,
"grad_norm": 11.715813190102079,
"learning_rate": 1.4555039846520751e-06,
"loss": 0.7087355852127075,
"step": 4802
},
{
"epoch": 2.3281628696073677,
"grad_norm": 9.463977552599792,
"learning_rate": 1.4535155035704673e-06,
"loss": 0.6605049967765808,
"step": 4803
},
{
"epoch": 2.328647600581677,
"grad_norm": 13.476357131652353,
"learning_rate": 1.4515281506922729e-06,
"loss": 0.6701607704162598,
"step": 4804
},
{
"epoch": 2.3291323315559866,
"grad_norm": 13.354297962874739,
"learning_rate": 1.449541926649705e-06,
"loss": 0.46804288029670715,
"step": 4805
},
{
"epoch": 2.329617062530296,
"grad_norm": 7.151176075424403,
"learning_rate": 1.4475568320746202e-06,
"loss": 0.4236087501049042,
"step": 4806
},
{
"epoch": 2.330101793504605,
"grad_norm": 8.560610587294349,
"learning_rate": 1.4455728675985136e-06,
"loss": 0.41856715083122253,
"step": 4807
},
{
"epoch": 2.3305865244789143,
"grad_norm": 11.753104856912856,
"learning_rate": 1.4435900338525221e-06,
"loss": 0.4312533736228943,
"step": 4808
},
{
"epoch": 2.3310712554532236,
"grad_norm": 19.783673115059187,
"learning_rate": 1.4416083314674217e-06,
"loss": 0.6610235571861267,
"step": 4809
},
{
"epoch": 2.331555986427533,
"grad_norm": 16.135753765512455,
"learning_rate": 1.4396277610736287e-06,
"loss": 0.4623912274837494,
"step": 4810
},
{
"epoch": 2.332040717401842,
"grad_norm": 10.551741729960439,
"learning_rate": 1.4376483233012018e-06,
"loss": 0.4051987826824188,
"step": 4811
},
{
"epoch": 2.3325254483761513,
"grad_norm": 14.56221635062976,
"learning_rate": 1.435670018779836e-06,
"loss": 0.809001624584198,
"step": 4812
},
{
"epoch": 2.3330101793504605,
"grad_norm": 15.330958087600601,
"learning_rate": 1.433692848138868e-06,
"loss": 0.4558753967285156,
"step": 4813
},
{
"epoch": 2.3334949103247697,
"grad_norm": 8.954289279136143,
"learning_rate": 1.4317168120072722e-06,
"loss": 0.3973991274833679,
"step": 4814
},
{
"epoch": 2.333979641299079,
"grad_norm": 21.752829528395605,
"learning_rate": 1.4297419110136628e-06,
"loss": 0.5032663345336914,
"step": 4815
},
{
"epoch": 2.334464372273388,
"grad_norm": 11.579004778022913,
"learning_rate": 1.4277681457862936e-06,
"loss": 0.2611708641052246,
"step": 4816
},
{
"epoch": 2.3349491032476974,
"grad_norm": 17.607396401793594,
"learning_rate": 1.4257955169530563e-06,
"loss": 0.7334272861480713,
"step": 4817
},
{
"epoch": 2.3354338342220067,
"grad_norm": 11.29398018776377,
"learning_rate": 1.4238240251414804e-06,
"loss": 0.4902573823928833,
"step": 4818
},
{
"epoch": 2.335918565196316,
"grad_norm": 7.140625657442017,
"learning_rate": 1.421853670978734e-06,
"loss": 0.3688885271549225,
"step": 4819
},
{
"epoch": 2.336403296170625,
"grad_norm": 10.678797473827265,
"learning_rate": 1.419884455091628e-06,
"loss": 0.39082178473472595,
"step": 4820
},
{
"epoch": 2.3368880271449344,
"grad_norm": 10.179272594213675,
"learning_rate": 1.417916378106602e-06,
"loss": 0.5327097773551941,
"step": 4821
},
{
"epoch": 2.337372758119244,
"grad_norm": 10.556130111382581,
"learning_rate": 1.4159494406497404e-06,
"loss": 0.7205041646957397,
"step": 4822
},
{
"epoch": 2.337857489093553,
"grad_norm": 11.635723990488335,
"learning_rate": 1.413983643346763e-06,
"loss": 0.571276843547821,
"step": 4823
},
{
"epoch": 2.3383422200678625,
"grad_norm": 11.30778207956262,
"learning_rate": 1.412018986823025e-06,
"loss": 0.5254219174385071,
"step": 4824
},
{
"epoch": 2.3388269510421718,
"grad_norm": 21.35934425194641,
"learning_rate": 1.4100554717035242e-06,
"loss": 0.5566554069519043,
"step": 4825
},
{
"epoch": 2.339311682016481,
"grad_norm": 19.305900514215395,
"learning_rate": 1.4080930986128894e-06,
"loss": 0.8682740926742554,
"step": 4826
},
{
"epoch": 2.33979641299079,
"grad_norm": 12.515505555229439,
"learning_rate": 1.406131868175389e-06,
"loss": 0.333560973405838,
"step": 4827
},
{
"epoch": 2.3402811439650995,
"grad_norm": 10.332567432114013,
"learning_rate": 1.404171781014927e-06,
"loss": 0.4897095561027527,
"step": 4828
},
{
"epoch": 2.3407658749394087,
"grad_norm": 28.447251324862844,
"learning_rate": 1.4022128377550437e-06,
"loss": 0.957683265209198,
"step": 4829
},
{
"epoch": 2.341250605913718,
"grad_norm": 7.414835875701812,
"learning_rate": 1.4002550390189162e-06,
"loss": 0.3329513669013977,
"step": 4830
},
{
"epoch": 2.341735336888027,
"grad_norm": 8.348515020955094,
"learning_rate": 1.3982983854293574e-06,
"loss": 0.3001159429550171,
"step": 4831
},
{
"epoch": 2.3422200678623364,
"grad_norm": 11.518249782080854,
"learning_rate": 1.396342877608815e-06,
"loss": 0.37477099895477295,
"step": 4832
},
{
"epoch": 2.3427047988366456,
"grad_norm": 12.362832818112402,
"learning_rate": 1.394388516179372e-06,
"loss": 0.32460272312164307,
"step": 4833
},
{
"epoch": 2.343189529810955,
"grad_norm": 15.5899154858773,
"learning_rate": 1.3924353017627496e-06,
"loss": 0.4820556044578552,
"step": 4834
},
{
"epoch": 2.343674260785264,
"grad_norm": 10.705396012639323,
"learning_rate": 1.3904832349803011e-06,
"loss": 0.3090282082557678,
"step": 4835
},
{
"epoch": 2.3441589917595733,
"grad_norm": 10.169751874382788,
"learning_rate": 1.3885323164530174e-06,
"loss": 1.4575990438461304,
"step": 4836
},
{
"epoch": 2.3446437227338826,
"grad_norm": 6.760680481156299,
"learning_rate": 1.3865825468015183e-06,
"loss": 0.4738176167011261,
"step": 4837
},
{
"epoch": 2.345128453708192,
"grad_norm": 11.812514177436826,
"learning_rate": 1.3846339266460635e-06,
"loss": 0.4885929226875305,
"step": 4838
},
{
"epoch": 2.345613184682501,
"grad_norm": 13.902675311824435,
"learning_rate": 1.382686456606548e-06,
"loss": 0.5856861472129822,
"step": 4839
},
{
"epoch": 2.3460979156568103,
"grad_norm": 9.473045566216983,
"learning_rate": 1.380740137302497e-06,
"loss": 0.41420263051986694,
"step": 4840
},
{
"epoch": 2.34658264663112,
"grad_norm": 10.282121219351254,
"learning_rate": 1.3787949693530722e-06,
"loss": 0.7139632701873779,
"step": 4841
},
{
"epoch": 2.347067377605429,
"grad_norm": 13.7913060244041,
"learning_rate": 1.3768509533770668e-06,
"loss": 0.4803200960159302,
"step": 4842
},
{
"epoch": 2.3475521085797384,
"grad_norm": 10.150994156986167,
"learning_rate": 1.3749080899929102e-06,
"loss": 0.3635396957397461,
"step": 4843
},
{
"epoch": 2.3480368395540476,
"grad_norm": 10.141603218895227,
"learning_rate": 1.3729663798186626e-06,
"loss": 0.5636881589889526,
"step": 4844
},
{
"epoch": 2.348521570528357,
"grad_norm": 8.258476004790642,
"learning_rate": 1.3710258234720191e-06,
"loss": 0.6196156740188599,
"step": 4845
},
{
"epoch": 2.349006301502666,
"grad_norm": 7.873964435321154,
"learning_rate": 1.369086421570307e-06,
"loss": 0.47987306118011475,
"step": 4846
},
{
"epoch": 2.3494910324769753,
"grad_norm": 20.274725551039342,
"learning_rate": 1.3671481747304855e-06,
"loss": 0.9388213157653809,
"step": 4847
},
{
"epoch": 2.3499757634512846,
"grad_norm": 12.28753764827836,
"learning_rate": 1.3652110835691495e-06,
"loss": 0.41539466381073,
"step": 4848
},
{
"epoch": 2.350460494425594,
"grad_norm": 10.116616306181985,
"learning_rate": 1.3632751487025232e-06,
"loss": 0.5662532448768616,
"step": 4849
},
{
"epoch": 2.350945225399903,
"grad_norm": 9.006325778916802,
"learning_rate": 1.361340370746464e-06,
"loss": 0.31446224451065063,
"step": 4850
},
{
"epoch": 2.3514299563742123,
"grad_norm": 11.56173433156946,
"learning_rate": 1.359406750316462e-06,
"loss": 0.7770442962646484,
"step": 4851
},
{
"epoch": 2.3519146873485215,
"grad_norm": 20.45469077239289,
"learning_rate": 1.357474288027635e-06,
"loss": 0.7762843370437622,
"step": 4852
},
{
"epoch": 2.3523994183228307,
"grad_norm": 8.883338891154603,
"learning_rate": 1.3555429844947382e-06,
"loss": 0.27225950360298157,
"step": 4853
},
{
"epoch": 2.35288414929714,
"grad_norm": 9.57479100975046,
"learning_rate": 1.3536128403321558e-06,
"loss": 0.6343807578086853,
"step": 4854
},
{
"epoch": 2.353368880271449,
"grad_norm": 10.22558649154391,
"learning_rate": 1.3516838561539019e-06,
"loss": 0.3903856873512268,
"step": 4855
},
{
"epoch": 2.3538536112457584,
"grad_norm": 6.473594806082487,
"learning_rate": 1.3497560325736219e-06,
"loss": 0.27714645862579346,
"step": 4856
},
{
"epoch": 2.3543383422200677,
"grad_norm": 15.416115589165683,
"learning_rate": 1.3478293702045947e-06,
"loss": 0.2729610800743103,
"step": 4857
},
{
"epoch": 2.3548230731943773,
"grad_norm": 12.48617209627909,
"learning_rate": 1.3459038696597292e-06,
"loss": 0.21694113314151764,
"step": 4858
},
{
"epoch": 2.3553078041686866,
"grad_norm": 18.926935859252577,
"learning_rate": 1.3439795315515596e-06,
"loss": 1.4375572204589844,
"step": 4859
},
{
"epoch": 2.355792535142996,
"grad_norm": 7.724394793201147,
"learning_rate": 1.342056356492255e-06,
"loss": 0.5075562000274658,
"step": 4860
},
{
"epoch": 2.356277266117305,
"grad_norm": 13.002004165740324,
"learning_rate": 1.3401343450936133e-06,
"loss": 0.26866427063941956,
"step": 4861
},
{
"epoch": 2.3567619970916143,
"grad_norm": 9.785540159291108,
"learning_rate": 1.3382134979670642e-06,
"loss": 0.4170803129673004,
"step": 4862
},
{
"epoch": 2.3572467280659235,
"grad_norm": 6.960441225919997,
"learning_rate": 1.3362938157236649e-06,
"loss": 0.1982431411743164,
"step": 4863
},
{
"epoch": 2.3577314590402327,
"grad_norm": 10.4007140331724,
"learning_rate": 1.3343752989741015e-06,
"loss": 0.3751939535140991,
"step": 4864
},
{
"epoch": 2.358216190014542,
"grad_norm": 15.001610866232252,
"learning_rate": 1.332457948328691e-06,
"loss": 0.6781045794487,
"step": 4865
},
{
"epoch": 2.358700920988851,
"grad_norm": 9.78331229743595,
"learning_rate": 1.3305417643973778e-06,
"loss": 0.4589520990848541,
"step": 4866
},
{
"epoch": 2.3591856519631604,
"grad_norm": 11.873032686637158,
"learning_rate": 1.328626747789737e-06,
"loss": 0.5523812174797058,
"step": 4867
},
{
"epoch": 2.3596703829374697,
"grad_norm": 13.553864022100743,
"learning_rate": 1.3267128991149714e-06,
"loss": 0.2729569971561432,
"step": 4868
},
{
"epoch": 2.360155113911779,
"grad_norm": 11.593661875955869,
"learning_rate": 1.3248002189819108e-06,
"loss": 0.3988463878631592,
"step": 4869
},
{
"epoch": 2.360639844886088,
"grad_norm": 12.88132897181203,
"learning_rate": 1.3228887079990155e-06,
"loss": 0.41369014978408813,
"step": 4870
},
{
"epoch": 2.3611245758603974,
"grad_norm": 10.542504175494424,
"learning_rate": 1.3209783667743743e-06,
"loss": 1.0684478282928467,
"step": 4871
},
{
"epoch": 2.3616093068347066,
"grad_norm": 10.377924823936713,
"learning_rate": 1.3190691959157014e-06,
"loss": 0.5137849450111389,
"step": 4872
},
{
"epoch": 2.362094037809016,
"grad_norm": 7.60350163191895,
"learning_rate": 1.3171611960303421e-06,
"loss": 0.3682156503200531,
"step": 4873
},
{
"epoch": 2.362578768783325,
"grad_norm": 12.974116609683152,
"learning_rate": 1.3152543677252638e-06,
"loss": 0.277057409286499,
"step": 4874
},
{
"epoch": 2.3630634997576347,
"grad_norm": 17.31799260164072,
"learning_rate": 1.3133487116070643e-06,
"loss": 0.5475950241088867,
"step": 4875
},
{
"epoch": 2.3635482307319435,
"grad_norm": 7.918023707881153,
"learning_rate": 1.3114442282819717e-06,
"loss": 0.2609449326992035,
"step": 4876
},
{
"epoch": 2.364032961706253,
"grad_norm": 9.102155628691973,
"learning_rate": 1.3095409183558365e-06,
"loss": 0.4723406434059143,
"step": 4877
},
{
"epoch": 2.3645176926805624,
"grad_norm": 25.675284643083717,
"learning_rate": 1.3076387824341375e-06,
"loss": 1.0447555780410767,
"step": 4878
},
{
"epoch": 2.3650024236548717,
"grad_norm": 15.2374752346556,
"learning_rate": 1.30573782112198e-06,
"loss": 0.33240896463394165,
"step": 4879
},
{
"epoch": 2.365487154629181,
"grad_norm": 11.732560787679365,
"learning_rate": 1.3038380350240948e-06,
"loss": 0.685335636138916,
"step": 4880
},
{
"epoch": 2.36597188560349,
"grad_norm": 12.593124765425925,
"learning_rate": 1.3019394247448402e-06,
"loss": 0.5161505937576294,
"step": 4881
},
{
"epoch": 2.3664566165777994,
"grad_norm": 11.521444487998702,
"learning_rate": 1.3000419908882001e-06,
"loss": 1.618674397468567,
"step": 4882
},
{
"epoch": 2.3669413475521086,
"grad_norm": 9.441933992724001,
"learning_rate": 1.2981457340577835e-06,
"loss": 0.2403048574924469,
"step": 4883
},
{
"epoch": 2.367426078526418,
"grad_norm": 11.888088274872423,
"learning_rate": 1.2962506548568237e-06,
"loss": 1.207635521888733,
"step": 4884
},
{
"epoch": 2.367910809500727,
"grad_norm": 13.76294011550936,
"learning_rate": 1.2943567538881841e-06,
"loss": 0.4523044228553772,
"step": 4885
},
{
"epoch": 2.3683955404750363,
"grad_norm": 9.604905480028018,
"learning_rate": 1.292464031754349e-06,
"loss": 0.7157598733901978,
"step": 4886
},
{
"epoch": 2.3688802714493455,
"grad_norm": 14.275574613284762,
"learning_rate": 1.2905724890574284e-06,
"loss": 0.2953788638114929,
"step": 4887
},
{
"epoch": 2.3693650024236548,
"grad_norm": 11.99219829728694,
"learning_rate": 1.2886821263991584e-06,
"loss": 0.5304001569747925,
"step": 4888
},
{
"epoch": 2.369849733397964,
"grad_norm": 13.432998748812997,
"learning_rate": 1.2867929443808986e-06,
"loss": 0.9108721017837524,
"step": 4889
},
{
"epoch": 2.3703344643722732,
"grad_norm": 12.936887180947783,
"learning_rate": 1.2849049436036325e-06,
"loss": 0.5427827835083008,
"step": 4890
},
{
"epoch": 2.3708191953465825,
"grad_norm": 10.493382417537818,
"learning_rate": 1.2830181246679701e-06,
"loss": 0.4755171537399292,
"step": 4891
},
{
"epoch": 2.371303926320892,
"grad_norm": 10.959923747881072,
"learning_rate": 1.2811324881741428e-06,
"loss": 0.6097816824913025,
"step": 4892
},
{
"epoch": 2.371788657295201,
"grad_norm": 6.442087112695492,
"learning_rate": 1.2792480347220076e-06,
"loss": 0.3211216628551483,
"step": 4893
},
{
"epoch": 2.3722733882695106,
"grad_norm": 9.201870358891313,
"learning_rate": 1.2773647649110427e-06,
"loss": 0.4635968804359436,
"step": 4894
},
{
"epoch": 2.37275811924382,
"grad_norm": 12.703240246495804,
"learning_rate": 1.2754826793403563e-06,
"loss": 0.8456937074661255,
"step": 4895
},
{
"epoch": 2.373242850218129,
"grad_norm": 16.787088620801626,
"learning_rate": 1.2736017786086714e-06,
"loss": 0.5108314752578735,
"step": 4896
},
{
"epoch": 2.3737275811924383,
"grad_norm": 16.00203356278725,
"learning_rate": 1.2717220633143385e-06,
"loss": 0.29771488904953003,
"step": 4897
},
{
"epoch": 2.3742123121667476,
"grad_norm": 8.800656576229363,
"learning_rate": 1.2698435340553289e-06,
"loss": 0.40227043628692627,
"step": 4898
},
{
"epoch": 2.374697043141057,
"grad_norm": 13.49350122282726,
"learning_rate": 1.2679661914292418e-06,
"loss": 0.45134299993515015,
"step": 4899
},
{
"epoch": 2.375181774115366,
"grad_norm": 7.431513592985156,
"learning_rate": 1.2660900360332927e-06,
"loss": 0.5815091133117676,
"step": 4900
},
{
"epoch": 2.3756665050896753,
"grad_norm": 12.242929775025997,
"learning_rate": 1.2642150684643228e-06,
"loss": 0.576924204826355,
"step": 4901
},
{
"epoch": 2.3761512360639845,
"grad_norm": 12.894166609569195,
"learning_rate": 1.2623412893187941e-06,
"loss": 0.3692578971385956,
"step": 4902
},
{
"epoch": 2.3766359670382937,
"grad_norm": 10.76045629032043,
"learning_rate": 1.2604686991927912e-06,
"loss": 0.4235173761844635,
"step": 4903
},
{
"epoch": 2.377120698012603,
"grad_norm": 10.148374659931376,
"learning_rate": 1.2585972986820206e-06,
"loss": 0.9858570098876953,
"step": 4904
},
{
"epoch": 2.377605428986912,
"grad_norm": 11.122138796191207,
"learning_rate": 1.256727088381809e-06,
"loss": 0.282648503780365,
"step": 4905
},
{
"epoch": 2.3780901599612214,
"grad_norm": 10.842888517861136,
"learning_rate": 1.2548580688871058e-06,
"loss": 0.6288855075836182,
"step": 4906
},
{
"epoch": 2.3785748909355307,
"grad_norm": 12.023925136796347,
"learning_rate": 1.2529902407924816e-06,
"loss": 0.3236083984375,
"step": 4907
},
{
"epoch": 2.37905962190984,
"grad_norm": 10.841809768883122,
"learning_rate": 1.251123604692126e-06,
"loss": 0.36272040009498596,
"step": 4908
},
{
"epoch": 2.379544352884149,
"grad_norm": 8.525418213553941,
"learning_rate": 1.2492581611798543e-06,
"loss": 0.3246185779571533,
"step": 4909
},
{
"epoch": 2.3800290838584583,
"grad_norm": 9.81457661909981,
"learning_rate": 1.2473939108490974e-06,
"loss": 0.3443466126918793,
"step": 4910
},
{
"epoch": 2.380513814832768,
"grad_norm": 11.213533563420532,
"learning_rate": 1.2455308542929096e-06,
"loss": 0.5695024728775024,
"step": 4911
},
{
"epoch": 2.3809985458070773,
"grad_norm": 9.390103703763062,
"learning_rate": 1.2436689921039602e-06,
"loss": 0.2297539860010147,
"step": 4912
},
{
"epoch": 2.3814832767813865,
"grad_norm": 30.093823708127854,
"learning_rate": 1.2418083248745465e-06,
"loss": 0.4178144633769989,
"step": 4913
},
{
"epoch": 2.3819680077556957,
"grad_norm": 13.451811822069294,
"learning_rate": 1.239948853196581e-06,
"loss": 0.4418802261352539,
"step": 4914
},
{
"epoch": 2.382452738730005,
"grad_norm": 8.699410545597301,
"learning_rate": 1.238090577661596e-06,
"loss": 0.7492071390151978,
"step": 4915
},
{
"epoch": 2.382937469704314,
"grad_norm": 8.23456895111103,
"learning_rate": 1.2362334988607444e-06,
"loss": 0.24223706126213074,
"step": 4916
},
{
"epoch": 2.3834222006786234,
"grad_norm": 7.485163879804718,
"learning_rate": 1.2343776173847978e-06,
"loss": 0.4025956094264984,
"step": 4917
},
{
"epoch": 2.3839069316529327,
"grad_norm": 7.0663256696452175,
"learning_rate": 1.2325229338241462e-06,
"loss": 0.41019168496131897,
"step": 4918
},
{
"epoch": 2.384391662627242,
"grad_norm": 14.047585387435806,
"learning_rate": 1.2306694487688004e-06,
"loss": 0.5100324153900146,
"step": 4919
},
{
"epoch": 2.384876393601551,
"grad_norm": 9.894494239708141,
"learning_rate": 1.2288171628083883e-06,
"loss": 0.25181344151496887,
"step": 4920
},
{
"epoch": 2.3853611245758604,
"grad_norm": 9.641043295766176,
"learning_rate": 1.2269660765321567e-06,
"loss": 0.32552099227905273,
"step": 4921
},
{
"epoch": 2.3858458555501696,
"grad_norm": 12.707797357198165,
"learning_rate": 1.22511619052897e-06,
"loss": 0.3569487929344177,
"step": 4922
},
{
"epoch": 2.386330586524479,
"grad_norm": 10.062128374545836,
"learning_rate": 1.2232675053873145e-06,
"loss": 0.7171885967254639,
"step": 4923
},
{
"epoch": 2.386815317498788,
"grad_norm": 8.26979637336288,
"learning_rate": 1.2214200216952903e-06,
"loss": 0.8646424412727356,
"step": 4924
},
{
"epoch": 2.3873000484730973,
"grad_norm": 10.205102797926054,
"learning_rate": 1.2195737400406165e-06,
"loss": 0.30173593759536743,
"step": 4925
},
{
"epoch": 2.3877847794474065,
"grad_norm": 8.738034244501304,
"learning_rate": 1.2177286610106314e-06,
"loss": 0.49544787406921387,
"step": 4926
},
{
"epoch": 2.3882695104217158,
"grad_norm": 8.862068546275975,
"learning_rate": 1.215884785192285e-06,
"loss": 0.2795226573944092,
"step": 4927
},
{
"epoch": 2.3887542413960254,
"grad_norm": 7.363720078998661,
"learning_rate": 1.2140421131721536e-06,
"loss": 0.4983435273170471,
"step": 4928
},
{
"epoch": 2.3892389723703342,
"grad_norm": 10.813579026213345,
"learning_rate": 1.2122006455364244e-06,
"loss": 0.8232021927833557,
"step": 4929
},
{
"epoch": 2.389723703344644,
"grad_norm": 7.202136508594459,
"learning_rate": 1.210360382870902e-06,
"loss": 0.18965177237987518,
"step": 4930
},
{
"epoch": 2.390208434318953,
"grad_norm": 18.53315802794987,
"learning_rate": 1.2085213257610084e-06,
"loss": 0.7208030223846436,
"step": 4931
},
{
"epoch": 2.3906931652932624,
"grad_norm": 8.969617997089927,
"learning_rate": 1.2066834747917837e-06,
"loss": 0.36385655403137207,
"step": 4932
},
{
"epoch": 2.3911778962675716,
"grad_norm": 10.035325580069577,
"learning_rate": 1.2048468305478843e-06,
"loss": 0.3454814851284027,
"step": 4933
},
{
"epoch": 2.391662627241881,
"grad_norm": 10.553847265545077,
"learning_rate": 1.2030113936135779e-06,
"loss": 0.39972996711730957,
"step": 4934
},
{
"epoch": 2.39214735821619,
"grad_norm": 19.091606763009448,
"learning_rate": 1.201177164572752e-06,
"loss": 0.46332699060440063,
"step": 4935
},
{
"epoch": 2.3926320891904993,
"grad_norm": 13.827395681168147,
"learning_rate": 1.1993441440089092e-06,
"loss": 0.44526541233062744,
"step": 4936
},
{
"epoch": 2.3931168201648085,
"grad_norm": 8.388350222839325,
"learning_rate": 1.1975123325051703e-06,
"loss": 0.48074936866760254,
"step": 4937
},
{
"epoch": 2.3936015511391178,
"grad_norm": 16.954229331310703,
"learning_rate": 1.1956817306442669e-06,
"loss": 0.7154406309127808,
"step": 4938
},
{
"epoch": 2.394086282113427,
"grad_norm": 9.154508132219021,
"learning_rate": 1.1938523390085488e-06,
"loss": 0.4695637822151184,
"step": 4939
},
{
"epoch": 2.3945710130877362,
"grad_norm": 12.270050563043164,
"learning_rate": 1.192024158179979e-06,
"loss": 0.3399227261543274,
"step": 4940
},
{
"epoch": 2.3950557440620455,
"grad_norm": 11.259698470815577,
"learning_rate": 1.190197188740137e-06,
"loss": 0.26234114170074463,
"step": 4941
},
{
"epoch": 2.3955404750363547,
"grad_norm": 8.057411499203583,
"learning_rate": 1.188371431270216e-06,
"loss": 0.5828617215156555,
"step": 4942
},
{
"epoch": 2.396025206010664,
"grad_norm": 7.6609337169830924,
"learning_rate": 1.1865468863510232e-06,
"loss": 0.12003932893276215,
"step": 4943
},
{
"epoch": 2.396509936984973,
"grad_norm": 14.431348332893416,
"learning_rate": 1.1847235545629815e-06,
"loss": 0.3571510314941406,
"step": 4944
},
{
"epoch": 2.396994667959283,
"grad_norm": 13.029450798311439,
"learning_rate": 1.1829014364861252e-06,
"loss": 0.8507670760154724,
"step": 4945
},
{
"epoch": 2.3974793989335916,
"grad_norm": 10.834773134984626,
"learning_rate": 1.1810805327001067e-06,
"loss": 0.9383180141448975,
"step": 4946
},
{
"epoch": 2.3979641299079013,
"grad_norm": 11.409701356018434,
"learning_rate": 1.179260843784189e-06,
"loss": 0.3734802007675171,
"step": 4947
},
{
"epoch": 2.3984488608822105,
"grad_norm": 11.047059780622913,
"learning_rate": 1.1774423703172506e-06,
"loss": 0.392840713262558,
"step": 4948
},
{
"epoch": 2.3989335918565198,
"grad_norm": 9.406832829822514,
"learning_rate": 1.175625112877779e-06,
"loss": 0.3397457003593445,
"step": 4949
},
{
"epoch": 2.399418322830829,
"grad_norm": 14.197986429640677,
"learning_rate": 1.1738090720438782e-06,
"loss": 0.49389612674713135,
"step": 4950
},
{
"epoch": 2.3999030538051382,
"grad_norm": 11.036893656370047,
"learning_rate": 1.1719942483932677e-06,
"loss": 0.3472108542919159,
"step": 4951
},
{
"epoch": 2.4003877847794475,
"grad_norm": 8.26954506822831,
"learning_rate": 1.1701806425032758e-06,
"loss": 0.5981207489967346,
"step": 4952
},
{
"epoch": 2.4008725157537567,
"grad_norm": 16.867700185100023,
"learning_rate": 1.1683682549508446e-06,
"loss": 0.7180719971656799,
"step": 4953
},
{
"epoch": 2.401357246728066,
"grad_norm": 11.523485160955364,
"learning_rate": 1.1665570863125286e-06,
"loss": 0.42877814173698425,
"step": 4954
},
{
"epoch": 2.401841977702375,
"grad_norm": 10.996248064117616,
"learning_rate": 1.1647471371644943e-06,
"loss": 0.34830671548843384,
"step": 4955
},
{
"epoch": 2.4023267086766844,
"grad_norm": 14.698832758902881,
"learning_rate": 1.1629384080825212e-06,
"loss": 0.3710178732872009,
"step": 4956
},
{
"epoch": 2.4028114396509936,
"grad_norm": 5.94923588095834,
"learning_rate": 1.1611308996419995e-06,
"loss": 0.28167861700057983,
"step": 4957
},
{
"epoch": 2.403296170625303,
"grad_norm": 8.855753446715907,
"learning_rate": 1.159324612417932e-06,
"loss": 0.44208309054374695,
"step": 4958
},
{
"epoch": 2.403780901599612,
"grad_norm": 10.188685285624032,
"learning_rate": 1.1575195469849304e-06,
"loss": 0.3690137267112732,
"step": 4959
},
{
"epoch": 2.4042656325739213,
"grad_norm": 13.021417298759333,
"learning_rate": 1.155715703917224e-06,
"loss": 1.1069295406341553,
"step": 4960
},
{
"epoch": 2.4047503635482306,
"grad_norm": 7.426761901453825,
"learning_rate": 1.153913083788647e-06,
"loss": 0.5353105068206787,
"step": 4961
},
{
"epoch": 2.40523509452254,
"grad_norm": 11.115811748279931,
"learning_rate": 1.1521116871726468e-06,
"loss": 0.41100817918777466,
"step": 4962
},
{
"epoch": 2.405719825496849,
"grad_norm": 12.029320008038875,
"learning_rate": 1.1503115146422817e-06,
"loss": 0.46077755093574524,
"step": 4963
},
{
"epoch": 2.4062045564711587,
"grad_norm": 9.563836814091156,
"learning_rate": 1.1485125667702208e-06,
"loss": 0.47655558586120605,
"step": 4964
},
{
"epoch": 2.406689287445468,
"grad_norm": 11.163206883101088,
"learning_rate": 1.1467148441287423e-06,
"loss": 0.4084402918815613,
"step": 4965
},
{
"epoch": 2.407174018419777,
"grad_norm": 11.316061016608193,
"learning_rate": 1.144918347289737e-06,
"loss": 0.22617055475711823,
"step": 4966
},
{
"epoch": 2.4076587493940864,
"grad_norm": 7.866581345065156,
"learning_rate": 1.1431230768247032e-06,
"loss": 0.41540631651878357,
"step": 4967
},
{
"epoch": 2.4081434803683957,
"grad_norm": 14.111327364575779,
"learning_rate": 1.14132903330475e-06,
"loss": 0.9139132499694824,
"step": 4968
},
{
"epoch": 2.408628211342705,
"grad_norm": 14.02066363893898,
"learning_rate": 1.1395362173005957e-06,
"loss": 0.4901125431060791,
"step": 4969
},
{
"epoch": 2.409112942317014,
"grad_norm": 6.94330020800381,
"learning_rate": 1.1377446293825717e-06,
"loss": 0.26762276887893677,
"step": 4970
},
{
"epoch": 2.4095976732913233,
"grad_norm": 9.889702730970818,
"learning_rate": 1.135954270120615e-06,
"loss": 0.2921201288700104,
"step": 4971
},
{
"epoch": 2.4100824042656326,
"grad_norm": 10.466495112878947,
"learning_rate": 1.1341651400842707e-06,
"loss": 0.24874183535575867,
"step": 4972
},
{
"epoch": 2.410567135239942,
"grad_norm": 16.332195389306683,
"learning_rate": 1.1323772398426935e-06,
"loss": 0.5935323238372803,
"step": 4973
},
{
"epoch": 2.411051866214251,
"grad_norm": 10.218187040882242,
"learning_rate": 1.1305905699646524e-06,
"loss": 0.43148162961006165,
"step": 4974
},
{
"epoch": 2.4115365971885603,
"grad_norm": 12.712268642342874,
"learning_rate": 1.1288051310185182e-06,
"loss": 0.41129106283187866,
"step": 4975
},
{
"epoch": 2.4120213281628695,
"grad_norm": 15.374509727982797,
"learning_rate": 1.127020923572273e-06,
"loss": 0.3221362233161926,
"step": 4976
},
{
"epoch": 2.4125060591371787,
"grad_norm": 11.340990545286852,
"learning_rate": 1.1252379481935066e-06,
"loss": 0.5168125629425049,
"step": 4977
},
{
"epoch": 2.412990790111488,
"grad_norm": 10.127631070527798,
"learning_rate": 1.1234562054494168e-06,
"loss": 0.3189733624458313,
"step": 4978
},
{
"epoch": 2.413475521085797,
"grad_norm": 11.52495594845394,
"learning_rate": 1.1216756959068098e-06,
"loss": 0.45604124665260315,
"step": 4979
},
{
"epoch": 2.4139602520601064,
"grad_norm": 14.819746446012523,
"learning_rate": 1.1198964201320994e-06,
"loss": 0.45853450894355774,
"step": 4980
},
{
"epoch": 2.414444983034416,
"grad_norm": 12.53315015320678,
"learning_rate": 1.1181183786913063e-06,
"loss": 1.04206120967865,
"step": 4981
},
{
"epoch": 2.414929714008725,
"grad_norm": 9.285987834256467,
"learning_rate": 1.1163415721500587e-06,
"loss": 0.48494952917099,
"step": 4982
},
{
"epoch": 2.4154144449830346,
"grad_norm": 14.013630634292625,
"learning_rate": 1.1145660010735914e-06,
"loss": 0.9980771541595459,
"step": 4983
},
{
"epoch": 2.415899175957344,
"grad_norm": 14.969799904024118,
"learning_rate": 1.112791666026749e-06,
"loss": 0.4520975947380066,
"step": 4984
},
{
"epoch": 2.416383906931653,
"grad_norm": 11.317400707142035,
"learning_rate": 1.1110185675739804e-06,
"loss": 1.0971494913101196,
"step": 4985
},
{
"epoch": 2.4168686379059623,
"grad_norm": 10.617994947664878,
"learning_rate": 1.109246706279342e-06,
"loss": 0.6798084378242493,
"step": 4986
},
{
"epoch": 2.4173533688802715,
"grad_norm": 8.91787099715373,
"learning_rate": 1.1074760827064929e-06,
"loss": 0.30793866515159607,
"step": 4987
},
{
"epoch": 2.4178380998545808,
"grad_norm": 10.88739870667225,
"learning_rate": 1.1057066974187052e-06,
"loss": 0.8040207624435425,
"step": 4988
},
{
"epoch": 2.41832283082889,
"grad_norm": 10.49291551528256,
"learning_rate": 1.1039385509788525e-06,
"loss": 0.8278577923774719,
"step": 4989
},
{
"epoch": 2.4188075618031992,
"grad_norm": 13.061845098246458,
"learning_rate": 1.1021716439494157e-06,
"loss": 0.3881208896636963,
"step": 4990
},
{
"epoch": 2.4192922927775085,
"grad_norm": 17.00982548580921,
"learning_rate": 1.1004059768924807e-06,
"loss": 1.073038101196289,
"step": 4991
},
{
"epoch": 2.4197770237518177,
"grad_norm": 9.292384769851312,
"learning_rate": 1.0986415503697385e-06,
"loss": 0.3589513599872589,
"step": 4992
},
{
"epoch": 2.420261754726127,
"grad_norm": 13.558957186444296,
"learning_rate": 1.0968783649424896e-06,
"loss": 0.2238079458475113,
"step": 4993
},
{
"epoch": 2.420746485700436,
"grad_norm": 20.289952099322214,
"learning_rate": 1.0951164211716337e-06,
"loss": 0.7308096885681152,
"step": 4994
},
{
"epoch": 2.4212312166747454,
"grad_norm": 11.549398041083409,
"learning_rate": 1.093355719617678e-06,
"loss": 0.5369495749473572,
"step": 4995
},
{
"epoch": 2.4217159476490546,
"grad_norm": 12.227966644000988,
"learning_rate": 1.0915962608407366e-06,
"loss": 0.2046864628791809,
"step": 4996
},
{
"epoch": 2.422200678623364,
"grad_norm": 9.918056412006381,
"learning_rate": 1.0898380454005235e-06,
"loss": 0.4285496771335602,
"step": 4997
},
{
"epoch": 2.4226854095976735,
"grad_norm": 10.60283385444399,
"learning_rate": 1.0880810738563635e-06,
"loss": 0.6435464024543762,
"step": 4998
},
{
"epoch": 2.4231701405719823,
"grad_norm": 14.316619988918095,
"learning_rate": 1.0863253467671807e-06,
"loss": 1.244687795639038,
"step": 4999
},
{
"epoch": 2.423654871546292,
"grad_norm": 8.165531795407345,
"learning_rate": 1.0845708646915054e-06,
"loss": 0.699385404586792,
"step": 5000
},
{
"epoch": 2.4241396025206012,
"grad_norm": 20.939886966989555,
"learning_rate": 1.0828176281874709e-06,
"loss": 0.6410003304481506,
"step": 5001
},
{
"epoch": 2.4246243334949105,
"grad_norm": 11.139308443260044,
"learning_rate": 1.0810656378128148e-06,
"loss": 0.2817669212818146,
"step": 5002
},
{
"epoch": 2.4251090644692197,
"grad_norm": 13.236245621627171,
"learning_rate": 1.0793148941248783e-06,
"loss": 0.3773602843284607,
"step": 5003
},
{
"epoch": 2.425593795443529,
"grad_norm": 9.95159615574894,
"learning_rate": 1.0775653976806056e-06,
"loss": 0.3126314580440521,
"step": 5004
},
{
"epoch": 2.426078526417838,
"grad_norm": 15.128130857016636,
"learning_rate": 1.0758171490365444e-06,
"loss": 0.31885308027267456,
"step": 5005
},
{
"epoch": 2.4265632573921474,
"grad_norm": 9.785099256799686,
"learning_rate": 1.0740701487488442e-06,
"loss": 0.5818062424659729,
"step": 5006
},
{
"epoch": 2.4270479883664566,
"grad_norm": 6.683548684470391,
"learning_rate": 1.0723243973732616e-06,
"loss": 0.28406623005867004,
"step": 5007
},
{
"epoch": 2.427532719340766,
"grad_norm": 9.97304341077794,
"learning_rate": 1.070579895465153e-06,
"loss": 1.0243444442749023,
"step": 5008
},
{
"epoch": 2.428017450315075,
"grad_norm": 10.097952710770747,
"learning_rate": 1.0688366435794744e-06,
"loss": 0.3555721640586853,
"step": 5009
},
{
"epoch": 2.4285021812893843,
"grad_norm": 9.828485890668599,
"learning_rate": 1.0670946422707883e-06,
"loss": 0.4002639055252075,
"step": 5010
},
{
"epoch": 2.4289869122636936,
"grad_norm": 19.831332445536212,
"learning_rate": 1.0653538920932567e-06,
"loss": 0.4636850953102112,
"step": 5011
},
{
"epoch": 2.429471643238003,
"grad_norm": 11.889928256768634,
"learning_rate": 1.0636143936006477e-06,
"loss": 0.3433040976524353,
"step": 5012
},
{
"epoch": 2.429956374212312,
"grad_norm": 11.596482075608602,
"learning_rate": 1.0618761473463273e-06,
"loss": 0.7608712315559387,
"step": 5013
},
{
"epoch": 2.4304411051866213,
"grad_norm": 11.797043590293766,
"learning_rate": 1.060139153883264e-06,
"loss": 1.0956389904022217,
"step": 5014
},
{
"epoch": 2.4309258361609305,
"grad_norm": 7.539950101547488,
"learning_rate": 1.058403413764028e-06,
"loss": 0.2630883753299713,
"step": 5015
},
{
"epoch": 2.4314105671352397,
"grad_norm": 8.668060573487894,
"learning_rate": 1.0566689275407916e-06,
"loss": 0.5150175094604492,
"step": 5016
},
{
"epoch": 2.4318952981095494,
"grad_norm": 7.493464831537736,
"learning_rate": 1.0549356957653267e-06,
"loss": 0.28143125772476196,
"step": 5017
},
{
"epoch": 2.4323800290838586,
"grad_norm": 9.292723376761309,
"learning_rate": 1.0532037189890076e-06,
"loss": 0.6449586153030396,
"step": 5018
},
{
"epoch": 2.432864760058168,
"grad_norm": 12.989803866564989,
"learning_rate": 1.051472997762808e-06,
"loss": 0.4488782584667206,
"step": 5019
},
{
"epoch": 2.433349491032477,
"grad_norm": 11.10852880647524,
"learning_rate": 1.0497435326373023e-06,
"loss": 0.6522911787033081,
"step": 5020
},
{
"epoch": 2.4338342220067863,
"grad_norm": 11.85306159279164,
"learning_rate": 1.0480153241626679e-06,
"loss": 0.5406022667884827,
"step": 5021
},
{
"epoch": 2.4343189529810956,
"grad_norm": 10.498401988531059,
"learning_rate": 1.0462883728886792e-06,
"loss": 0.5319374799728394,
"step": 5022
},
{
"epoch": 2.434803683955405,
"grad_norm": 9.64208709794913,
"learning_rate": 1.0445626793647118e-06,
"loss": 0.4085714817047119,
"step": 5023
},
{
"epoch": 2.435288414929714,
"grad_norm": 20.81686458226658,
"learning_rate": 1.0428382441397427e-06,
"loss": 0.809928297996521,
"step": 5024
},
{
"epoch": 2.4357731459040233,
"grad_norm": 12.380248954948051,
"learning_rate": 1.0411150677623438e-06,
"loss": 0.392758309841156,
"step": 5025
},
{
"epoch": 2.4362578768783325,
"grad_norm": 14.59451594463993,
"learning_rate": 1.0393931507806926e-06,
"loss": 1.469990849494934,
"step": 5026
},
{
"epoch": 2.4367426078526417,
"grad_norm": 8.146739670295162,
"learning_rate": 1.037672493742563e-06,
"loss": 0.4583389163017273,
"step": 5027
},
{
"epoch": 2.437227338826951,
"grad_norm": 19.85261780057379,
"learning_rate": 1.0359530971953285e-06,
"loss": 0.5523641109466553,
"step": 5028
},
{
"epoch": 2.43771206980126,
"grad_norm": 11.447225353600857,
"learning_rate": 1.0342349616859593e-06,
"loss": 0.8737152218818665,
"step": 5029
},
{
"epoch": 2.4381968007755694,
"grad_norm": 18.066241738322343,
"learning_rate": 1.0325180877610313e-06,
"loss": 0.5330846309661865,
"step": 5030
},
{
"epoch": 2.4386815317498787,
"grad_norm": 8.50041061447006,
"learning_rate": 1.0308024759667095e-06,
"loss": 0.47681719064712524,
"step": 5031
},
{
"epoch": 2.439166262724188,
"grad_norm": 15.18700404085319,
"learning_rate": 1.0290881268487646e-06,
"loss": 0.5376464128494263,
"step": 5032
},
{
"epoch": 2.439650993698497,
"grad_norm": 11.224507912209878,
"learning_rate": 1.0273750409525629e-06,
"loss": 0.28645461797714233,
"step": 5033
},
{
"epoch": 2.440135724672807,
"grad_norm": 9.233811659406262,
"learning_rate": 1.025663218823068e-06,
"loss": 0.38081368803977966,
"step": 5034
},
{
"epoch": 2.4406204556471156,
"grad_norm": 10.765542130533655,
"learning_rate": 1.023952661004845e-06,
"loss": 0.39292922616004944,
"step": 5035
},
{
"epoch": 2.4411051866214253,
"grad_norm": 16.726513432185175,
"learning_rate": 1.022243368042054e-06,
"loss": 0.8207542300224304,
"step": 5036
},
{
"epoch": 2.4415899175957345,
"grad_norm": 8.217753855124275,
"learning_rate": 1.0205353404784524e-06,
"loss": 0.9149539470672607,
"step": 5037
},
{
"epoch": 2.4420746485700437,
"grad_norm": 20.300963692733514,
"learning_rate": 1.018828578857396e-06,
"loss": 0.6708599328994751,
"step": 5038
},
{
"epoch": 2.442559379544353,
"grad_norm": 11.132235569765172,
"learning_rate": 1.0171230837218383e-06,
"loss": 0.5294389128684998,
"step": 5039
},
{
"epoch": 2.443044110518662,
"grad_norm": 9.945184817957605,
"learning_rate": 1.0154188556143286e-06,
"loss": 0.4368644952774048,
"step": 5040
},
{
"epoch": 2.4435288414929714,
"grad_norm": 11.287554276553859,
"learning_rate": 1.0137158950770142e-06,
"loss": 0.45760780572891235,
"step": 5041
},
{
"epoch": 2.4440135724672807,
"grad_norm": 6.474907645552905,
"learning_rate": 1.0120142026516395e-06,
"loss": 0.2449059933423996,
"step": 5042
},
{
"epoch": 2.44449830344159,
"grad_norm": 7.21255393467084,
"learning_rate": 1.010313778879543e-06,
"loss": 0.32444024085998535,
"step": 5043
},
{
"epoch": 2.444983034415899,
"grad_norm": 8.949083829192197,
"learning_rate": 1.0086146243016643e-06,
"loss": 0.21590851247310638,
"step": 5044
},
{
"epoch": 2.4454677653902084,
"grad_norm": 6.234877876337591,
"learning_rate": 1.006916739458535e-06,
"loss": 0.31505054235458374,
"step": 5045
},
{
"epoch": 2.4459524963645176,
"grad_norm": 8.310339593442977,
"learning_rate": 1.0052201248902849e-06,
"loss": 0.31747421622276306,
"step": 5046
},
{
"epoch": 2.446437227338827,
"grad_norm": 8.75004524374298,
"learning_rate": 1.0035247811366372e-06,
"loss": 0.4950546622276306,
"step": 5047
},
{
"epoch": 2.446921958313136,
"grad_norm": 9.918043429022903,
"learning_rate": 1.0018307087369128e-06,
"loss": 0.38030555844306946,
"step": 5048
},
{
"epoch": 2.4474066892874453,
"grad_norm": 13.394534066150579,
"learning_rate": 1.00013790823003e-06,
"loss": 0.6552985906600952,
"step": 5049
},
{
"epoch": 2.4478914202617545,
"grad_norm": 11.745480328220804,
"learning_rate": 9.984463801544992e-07,
"loss": 0.5911254286766052,
"step": 5050
},
{
"epoch": 2.4483761512360642,
"grad_norm": 15.144622423497285,
"learning_rate": 9.967561250484275e-07,
"loss": 0.23480281233787537,
"step": 5051
},
{
"epoch": 2.448860882210373,
"grad_norm": 14.729079711273855,
"learning_rate": 9.950671434495174e-07,
"loss": 0.6638896465301514,
"step": 5052
},
{
"epoch": 2.4493456131846827,
"grad_norm": 10.096597524197778,
"learning_rate": 9.933794358950643e-07,
"loss": 0.7619686722755432,
"step": 5053
},
{
"epoch": 2.449830344158992,
"grad_norm": 8.2870136766394,
"learning_rate": 9.916930029219606e-07,
"loss": 0.4991978704929352,
"step": 5054
},
{
"epoch": 2.450315075133301,
"grad_norm": 8.946841684045053,
"learning_rate": 9.900078450666929e-07,
"loss": 0.4690955877304077,
"step": 5055
},
{
"epoch": 2.4507998061076104,
"grad_norm": 12.609027123038732,
"learning_rate": 9.883239628653407e-07,
"loss": 0.4442998468875885,
"step": 5056
},
{
"epoch": 2.4512845370819196,
"grad_norm": 12.296275070709633,
"learning_rate": 9.86641356853577e-07,
"loss": 0.4955703914165497,
"step": 5057
},
{
"epoch": 2.451769268056229,
"grad_norm": 21.155967542587607,
"learning_rate": 9.849600275666744e-07,
"loss": 0.5151807069778442,
"step": 5058
},
{
"epoch": 2.452253999030538,
"grad_norm": 21.487578297961875,
"learning_rate": 9.83279975539493e-07,
"loss": 0.35074102878570557,
"step": 5059
},
{
"epoch": 2.4527387300048473,
"grad_norm": 10.910342091204749,
"learning_rate": 9.81601201306489e-07,
"loss": 0.2705617845058441,
"step": 5060
},
{
"epoch": 2.4532234609791566,
"grad_norm": 13.30627823174059,
"learning_rate": 9.799237054017135e-07,
"loss": 0.5519003868103027,
"step": 5061
},
{
"epoch": 2.453708191953466,
"grad_norm": 13.395158908988536,
"learning_rate": 9.78247488358806e-07,
"loss": 0.4283914566040039,
"step": 5062
},
{
"epoch": 2.454192922927775,
"grad_norm": 5.859023773073722,
"learning_rate": 9.765725507110059e-07,
"loss": 0.300790011882782,
"step": 5063
},
{
"epoch": 2.4546776539020843,
"grad_norm": 11.126373823733708,
"learning_rate": 9.748988929911412e-07,
"loss": 0.3849515914916992,
"step": 5064
},
{
"epoch": 2.4551623848763935,
"grad_norm": 6.971823283189664,
"learning_rate": 9.732265157316344e-07,
"loss": 0.39618128538131714,
"step": 5065
},
{
"epoch": 2.4556471158507027,
"grad_norm": 10.664297115641308,
"learning_rate": 9.715554194644998e-07,
"loss": 0.23370996117591858,
"step": 5066
},
{
"epoch": 2.456131846825012,
"grad_norm": 12.095666086244718,
"learning_rate": 9.698856047213433e-07,
"loss": 0.5647187232971191,
"step": 5067
},
{
"epoch": 2.456616577799321,
"grad_norm": 10.956972125605773,
"learning_rate": 9.682170720333684e-07,
"loss": 0.4006896913051605,
"step": 5068
},
{
"epoch": 2.4571013087736304,
"grad_norm": 9.43787058626775,
"learning_rate": 9.665498219313625e-07,
"loss": 0.5127300024032593,
"step": 5069
},
{
"epoch": 2.45758603974794,
"grad_norm": 12.03306409667895,
"learning_rate": 9.648838549457101e-07,
"loss": 0.7849088907241821,
"step": 5070
},
{
"epoch": 2.4580707707222493,
"grad_norm": 13.249389159821247,
"learning_rate": 9.632191716063866e-07,
"loss": 0.4960913062095642,
"step": 5071
},
{
"epoch": 2.4585555016965586,
"grad_norm": 10.825443929054314,
"learning_rate": 9.615557724429609e-07,
"loss": 0.3858298659324646,
"step": 5072
},
{
"epoch": 2.459040232670868,
"grad_norm": 9.019017326482144,
"learning_rate": 9.598936579845901e-07,
"loss": 0.9191493391990662,
"step": 5073
},
{
"epoch": 2.459524963645177,
"grad_norm": 23.938602615069904,
"learning_rate": 9.582328287600245e-07,
"loss": 0.997169017791748,
"step": 5074
},
{
"epoch": 2.4600096946194863,
"grad_norm": 19.43644153815365,
"learning_rate": 9.56573285297605e-07,
"loss": 0.6392226815223694,
"step": 5075
},
{
"epoch": 2.4604944255937955,
"grad_norm": 10.717651964859614,
"learning_rate": 9.549150281252633e-07,
"loss": 0.8885111212730408,
"step": 5076
},
{
"epoch": 2.4609791565681047,
"grad_norm": 11.005997484608473,
"learning_rate": 9.532580577705225e-07,
"loss": 0.8026849031448364,
"step": 5077
},
{
"epoch": 2.461463887542414,
"grad_norm": 9.79053914419721,
"learning_rate": 9.516023747604958e-07,
"loss": 0.3984474539756775,
"step": 5078
},
{
"epoch": 2.461948618516723,
"grad_norm": 9.731911431113252,
"learning_rate": 9.499479796218874e-07,
"loss": 0.8203182816505432,
"step": 5079
},
{
"epoch": 2.4624333494910324,
"grad_norm": 7.244367880231695,
"learning_rate": 9.482948728809909e-07,
"loss": 0.2828768193721771,
"step": 5080
},
{
"epoch": 2.4629180804653417,
"grad_norm": 9.985556514082226,
"learning_rate": 9.466430550636902e-07,
"loss": 0.37519678473472595,
"step": 5081
},
{
"epoch": 2.463402811439651,
"grad_norm": 11.409292914368745,
"learning_rate": 9.449925266954619e-07,
"loss": 0.2933107912540436,
"step": 5082
},
{
"epoch": 2.46388754241396,
"grad_norm": 14.692736407199257,
"learning_rate": 9.433432883013699e-07,
"loss": 0.4098074734210968,
"step": 5083
},
{
"epoch": 2.4643722733882694,
"grad_norm": 15.514381305733625,
"learning_rate": 9.416953404060652e-07,
"loss": 0.8221198916435242,
"step": 5084
},
{
"epoch": 2.4648570043625786,
"grad_norm": 14.608551385130479,
"learning_rate": 9.400486835337913e-07,
"loss": 1.3898814916610718,
"step": 5085
},
{
"epoch": 2.465341735336888,
"grad_norm": 16.75352116311987,
"learning_rate": 9.384033182083835e-07,
"loss": 0.5302530527114868,
"step": 5086
},
{
"epoch": 2.4658264663111975,
"grad_norm": 12.89448617751229,
"learning_rate": 9.367592449532619e-07,
"loss": 1.0164381265640259,
"step": 5087
},
{
"epoch": 2.4663111972855063,
"grad_norm": 11.49045142805847,
"learning_rate": 9.351164642914368e-07,
"loss": 0.6028796434402466,
"step": 5088
},
{
"epoch": 2.466795928259816,
"grad_norm": 12.111430299210415,
"learning_rate": 9.334749767455086e-07,
"loss": 0.5500833988189697,
"step": 5089
},
{
"epoch": 2.467280659234125,
"grad_norm": 12.177379640175257,
"learning_rate": 9.318347828376639e-07,
"loss": 0.33282384276390076,
"step": 5090
},
{
"epoch": 2.4677653902084344,
"grad_norm": 8.731400705734139,
"learning_rate": 9.301958830896807e-07,
"loss": 0.4829321503639221,
"step": 5091
},
{
"epoch": 2.4682501211827437,
"grad_norm": 27.079522634365286,
"learning_rate": 9.28558278022923e-07,
"loss": 0.9240590333938599,
"step": 5092
},
{
"epoch": 2.468734852157053,
"grad_norm": 10.513493376994822,
"learning_rate": 9.269219681583442e-07,
"loss": 0.8277894854545593,
"step": 5093
},
{
"epoch": 2.469219583131362,
"grad_norm": 13.017757147113615,
"learning_rate": 9.252869540164849e-07,
"loss": 0.4442701041698456,
"step": 5094
},
{
"epoch": 2.4697043141056714,
"grad_norm": 7.5670298511209015,
"learning_rate": 9.236532361174727e-07,
"loss": 0.47914206981658936,
"step": 5095
},
{
"epoch": 2.4701890450799806,
"grad_norm": 14.965571534471499,
"learning_rate": 9.220208149810262e-07,
"loss": 0.5411586165428162,
"step": 5096
},
{
"epoch": 2.47067377605429,
"grad_norm": 19.857939896587485,
"learning_rate": 9.203896911264487e-07,
"loss": 0.8919562101364136,
"step": 5097
},
{
"epoch": 2.471158507028599,
"grad_norm": 15.634383698364845,
"learning_rate": 9.187598650726304e-07,
"loss": 0.3413768410682678,
"step": 5098
},
{
"epoch": 2.4716432380029083,
"grad_norm": 9.20218255330869,
"learning_rate": 9.171313373380508e-07,
"loss": 0.3445432484149933,
"step": 5099
},
{
"epoch": 2.4721279689772175,
"grad_norm": 7.8951727884971925,
"learning_rate": 9.15504108440774e-07,
"loss": 0.332294225692749,
"step": 5100
},
{
"epoch": 2.4726126999515268,
"grad_norm": 9.978682406834462,
"learning_rate": 9.13878178898453e-07,
"loss": 0.5233199000358582,
"step": 5101
},
{
"epoch": 2.473097430925836,
"grad_norm": 13.063945510649123,
"learning_rate": 9.122535492283263e-07,
"loss": 0.724299967288971,
"step": 5102
},
{
"epoch": 2.4735821619001452,
"grad_norm": 9.894087091483124,
"learning_rate": 9.106302199472194e-07,
"loss": 0.4604230523109436,
"step": 5103
},
{
"epoch": 2.474066892874455,
"grad_norm": 11.009067120841811,
"learning_rate": 9.090081915715421e-07,
"loss": 0.5166958570480347,
"step": 5104
},
{
"epoch": 2.4745516238487637,
"grad_norm": 10.066993467717657,
"learning_rate": 9.073874646172958e-07,
"loss": 0.4352535605430603,
"step": 5105
},
{
"epoch": 2.4750363548230734,
"grad_norm": 11.703194185518084,
"learning_rate": 9.057680396000612e-07,
"loss": 0.3934059739112854,
"step": 5106
},
{
"epoch": 2.4755210857973826,
"grad_norm": 17.72293322431261,
"learning_rate": 9.041499170350082e-07,
"loss": 0.3739461898803711,
"step": 5107
},
{
"epoch": 2.476005816771692,
"grad_norm": 9.763292837485235,
"learning_rate": 9.025330974368929e-07,
"loss": 0.3800202012062073,
"step": 5108
},
{
"epoch": 2.476490547746001,
"grad_norm": 8.831559539732332,
"learning_rate": 9.009175813200543e-07,
"loss": 0.6724454164505005,
"step": 5109
},
{
"epoch": 2.4769752787203103,
"grad_norm": 10.795587333163429,
"learning_rate": 8.993033691984215e-07,
"loss": 0.3932473659515381,
"step": 5110
},
{
"epoch": 2.4774600096946195,
"grad_norm": 12.054806588254653,
"learning_rate": 8.976904615855037e-07,
"loss": 0.7263174653053284,
"step": 5111
},
{
"epoch": 2.477944740668929,
"grad_norm": 9.048234082550987,
"learning_rate": 8.960788589943969e-07,
"loss": 0.2812870740890503,
"step": 5112
},
{
"epoch": 2.478429471643238,
"grad_norm": 14.05074311360395,
"learning_rate": 8.944685619377835e-07,
"loss": 0.5292341709136963,
"step": 5113
},
{
"epoch": 2.4789142026175472,
"grad_norm": 12.043932927731756,
"learning_rate": 8.92859570927928e-07,
"loss": 0.7909167408943176,
"step": 5114
},
{
"epoch": 2.4793989335918565,
"grad_norm": 11.520028510353908,
"learning_rate": 8.912518864766817e-07,
"loss": 0.9739600419998169,
"step": 5115
},
{
"epoch": 2.4798836645661657,
"grad_norm": 10.44305910786702,
"learning_rate": 8.896455090954787e-07,
"loss": 0.36973685026168823,
"step": 5116
},
{
"epoch": 2.480368395540475,
"grad_norm": 9.731607203752137,
"learning_rate": 8.880404392953374e-07,
"loss": 0.25732624530792236,
"step": 5117
},
{
"epoch": 2.480853126514784,
"grad_norm": 11.890064026931787,
"learning_rate": 8.864366775868605e-07,
"loss": 0.5294973850250244,
"step": 5118
},
{
"epoch": 2.4813378574890934,
"grad_norm": 6.263507722144576,
"learning_rate": 8.848342244802366e-07,
"loss": 0.22377191483974457,
"step": 5119
},
{
"epoch": 2.4818225884634026,
"grad_norm": 13.07921841716906,
"learning_rate": 8.832330804852351e-07,
"loss": 0.3034033179283142,
"step": 5120
},
{
"epoch": 2.482307319437712,
"grad_norm": 8.176625942212505,
"learning_rate": 8.816332461112115e-07,
"loss": 0.4280610680580139,
"step": 5121
},
{
"epoch": 2.482792050412021,
"grad_norm": 9.988270560886573,
"learning_rate": 8.800347218671001e-07,
"loss": 0.9183732271194458,
"step": 5122
},
{
"epoch": 2.483276781386331,
"grad_norm": 14.896312326962605,
"learning_rate": 8.784375082614216e-07,
"loss": 0.4025680422782898,
"step": 5123
},
{
"epoch": 2.48376151236064,
"grad_norm": 9.147260208068674,
"learning_rate": 8.768416058022828e-07,
"loss": 0.2711940407752991,
"step": 5124
},
{
"epoch": 2.4842462433349493,
"grad_norm": 12.3120449340827,
"learning_rate": 8.752470149973686e-07,
"loss": 0.28876298666000366,
"step": 5125
},
{
"epoch": 2.4847309743092585,
"grad_norm": 8.814331646298324,
"learning_rate": 8.736537363539488e-07,
"loss": 0.38748735189437866,
"step": 5126
},
{
"epoch": 2.4852157052835677,
"grad_norm": 7.503166782432467,
"learning_rate": 8.720617703788731e-07,
"loss": 0.2904665172100067,
"step": 5127
},
{
"epoch": 2.485700436257877,
"grad_norm": 11.698216660045928,
"learning_rate": 8.704711175785802e-07,
"loss": 0.425048828125,
"step": 5128
},
{
"epoch": 2.486185167232186,
"grad_norm": 15.092670599245757,
"learning_rate": 8.688817784590831e-07,
"loss": 0.3026830554008484,
"step": 5129
},
{
"epoch": 2.4866698982064954,
"grad_norm": 4.814421358535483,
"learning_rate": 8.672937535259812e-07,
"loss": 0.12544503808021545,
"step": 5130
},
{
"epoch": 2.4871546291808047,
"grad_norm": 17.087843996172342,
"learning_rate": 8.65707043284455e-07,
"loss": 0.4575248062610626,
"step": 5131
},
{
"epoch": 2.487639360155114,
"grad_norm": 13.95966976735902,
"learning_rate": 8.641216482392667e-07,
"loss": 0.7188377976417542,
"step": 5132
},
{
"epoch": 2.488124091129423,
"grad_norm": 16.34925755863187,
"learning_rate": 8.625375688947613e-07,
"loss": 0.748095691204071,
"step": 5133
},
{
"epoch": 2.4886088221037324,
"grad_norm": 9.751170328836963,
"learning_rate": 8.609548057548639e-07,
"loss": 0.2745838761329651,
"step": 5134
},
{
"epoch": 2.4890935530780416,
"grad_norm": 10.531154265402876,
"learning_rate": 8.593733593230813e-07,
"loss": 0.5910099148750305,
"step": 5135
},
{
"epoch": 2.489578284052351,
"grad_norm": 12.365904226998943,
"learning_rate": 8.57793230102501e-07,
"loss": 0.4439387023448944,
"step": 5136
},
{
"epoch": 2.49006301502666,
"grad_norm": 26.258189138324283,
"learning_rate": 8.562144185957921e-07,
"loss": 0.8619256615638733,
"step": 5137
},
{
"epoch": 2.4905477460009693,
"grad_norm": 8.868466567122752,
"learning_rate": 8.546369253052034e-07,
"loss": 0.3427177965641022,
"step": 5138
},
{
"epoch": 2.4910324769752785,
"grad_norm": 99.35834600229225,
"learning_rate": 8.530607507325667e-07,
"loss": 0.7031149864196777,
"step": 5139
},
{
"epoch": 2.491517207949588,
"grad_norm": 11.477048014861465,
"learning_rate": 8.51485895379291e-07,
"loss": 0.5863415002822876,
"step": 5140
},
{
"epoch": 2.4920019389238974,
"grad_norm": 13.769264567567639,
"learning_rate": 8.499123597463677e-07,
"loss": 0.5092834830284119,
"step": 5141
},
{
"epoch": 2.4924866698982067,
"grad_norm": 14.388114896936811,
"learning_rate": 8.483401443343698e-07,
"loss": 0.23157083988189697,
"step": 5142
},
{
"epoch": 2.492971400872516,
"grad_norm": 15.356122998620165,
"learning_rate": 8.467692496434482e-07,
"loss": 1.0688319206237793,
"step": 5143
},
{
"epoch": 2.493456131846825,
"grad_norm": 9.945095334132757,
"learning_rate": 8.451996761733322e-07,
"loss": 0.6849609613418579,
"step": 5144
},
{
"epoch": 2.4939408628211344,
"grad_norm": 17.660318533756637,
"learning_rate": 8.43631424423334e-07,
"loss": 0.5404912829399109,
"step": 5145
},
{
"epoch": 2.4944255937954436,
"grad_norm": 14.971141883358818,
"learning_rate": 8.420644948923429e-07,
"loss": 0.8244320154190063,
"step": 5146
},
{
"epoch": 2.494910324769753,
"grad_norm": 10.474302602002806,
"learning_rate": 8.404988880788301e-07,
"loss": 0.4441289007663727,
"step": 5147
},
{
"epoch": 2.495395055744062,
"grad_norm": 14.188454116727653,
"learning_rate": 8.389346044808439e-07,
"loss": 0.5755598545074463,
"step": 5148
},
{
"epoch": 2.4958797867183713,
"grad_norm": 9.607618023646257,
"learning_rate": 8.373716445960123e-07,
"loss": 0.4448331892490387,
"step": 5149
},
{
"epoch": 2.4963645176926805,
"grad_norm": 11.012515376249205,
"learning_rate": 8.358100089215426e-07,
"loss": 0.7329759001731873,
"step": 5150
},
{
"epoch": 2.4968492486669898,
"grad_norm": 13.7969523505879,
"learning_rate": 8.342496979542203e-07,
"loss": 0.337097704410553,
"step": 5151
},
{
"epoch": 2.497333979641299,
"grad_norm": 12.292436522392482,
"learning_rate": 8.326907121904093e-07,
"loss": 0.8203500509262085,
"step": 5152
},
{
"epoch": 2.4978187106156082,
"grad_norm": 10.267953221853816,
"learning_rate": 8.311330521260525e-07,
"loss": 0.33090293407440186,
"step": 5153
},
{
"epoch": 2.4983034415899175,
"grad_norm": 13.041200138309659,
"learning_rate": 8.295767182566711e-07,
"loss": 0.5348773002624512,
"step": 5154
},
{
"epoch": 2.4987881725642267,
"grad_norm": 25.839085424745416,
"learning_rate": 8.280217110773625e-07,
"loss": 0.6884046792984009,
"step": 5155
},
{
"epoch": 2.499272903538536,
"grad_norm": 10.368476782703468,
"learning_rate": 8.264680310828066e-07,
"loss": 1.0154566764831543,
"step": 5156
},
{
"epoch": 2.4997576345128456,
"grad_norm": 10.236421689793932,
"learning_rate": 8.249156787672564e-07,
"loss": 0.34912508726119995,
"step": 5157
},
{
"epoch": 2.5002423654871544,
"grad_norm": 12.126822200816443,
"learning_rate": 8.23364654624545e-07,
"loss": 0.6828233003616333,
"step": 5158
},
{
"epoch": 2.500727096461464,
"grad_norm": 9.00864013746354,
"learning_rate": 8.218149591480834e-07,
"loss": 0.7068486213684082,
"step": 5159
},
{
"epoch": 2.5012118274357733,
"grad_norm": 11.165288799341868,
"learning_rate": 8.202665928308551e-07,
"loss": 0.5260044932365417,
"step": 5160
},
{
"epoch": 2.5016965584100825,
"grad_norm": 10.525313806936062,
"learning_rate": 8.187195561654282e-07,
"loss": 0.878354012966156,
"step": 5161
},
{
"epoch": 2.5021812893843918,
"grad_norm": 9.918705547557021,
"learning_rate": 8.17173849643943e-07,
"loss": 1.1444768905639648,
"step": 5162
},
{
"epoch": 2.502666020358701,
"grad_norm": 18.652155310399234,
"learning_rate": 8.156294737581183e-07,
"loss": 0.8901700973510742,
"step": 5163
},
{
"epoch": 2.5031507513330102,
"grad_norm": 7.762202383370473,
"learning_rate": 8.140864289992478e-07,
"loss": 0.20104451477527618,
"step": 5164
},
{
"epoch": 2.5036354823073195,
"grad_norm": 7.361795663776279,
"learning_rate": 8.125447158582045e-07,
"loss": 0.33507105708122253,
"step": 5165
},
{
"epoch": 2.5041202132816287,
"grad_norm": 11.387639437218011,
"learning_rate": 8.110043348254354e-07,
"loss": 0.7102490663528442,
"step": 5166
},
{
"epoch": 2.504604944255938,
"grad_norm": 11.544971364181153,
"learning_rate": 8.094652863909652e-07,
"loss": 0.35308969020843506,
"step": 5167
},
{
"epoch": 2.505089675230247,
"grad_norm": 18.2054524477966,
"learning_rate": 8.07927571044394e-07,
"loss": 2.8292787075042725,
"step": 5168
},
{
"epoch": 2.5055744062045564,
"grad_norm": 9.81105894460356,
"learning_rate": 8.063911892748965e-07,
"loss": 0.6467292308807373,
"step": 5169
},
{
"epoch": 2.5060591371788656,
"grad_norm": 7.128588125520516,
"learning_rate": 8.048561415712269e-07,
"loss": 0.24123141169548035,
"step": 5170
},
{
"epoch": 2.506543868153175,
"grad_norm": 16.597348185774038,
"learning_rate": 8.033224284217122e-07,
"loss": 0.4327443242073059,
"step": 5171
},
{
"epoch": 2.507028599127484,
"grad_norm": 13.181123757977048,
"learning_rate": 8.017900503142556e-07,
"loss": 0.35542425513267517,
"step": 5172
},
{
"epoch": 2.5075133301017933,
"grad_norm": 11.147069868339708,
"learning_rate": 8.002590077363343e-07,
"loss": 0.6497737765312195,
"step": 5173
},
{
"epoch": 2.507998061076103,
"grad_norm": 12.806147667552917,
"learning_rate": 7.987293011750025e-07,
"loss": 0.8118247389793396,
"step": 5174
},
{
"epoch": 2.508482792050412,
"grad_norm": 13.005268482484373,
"learning_rate": 7.972009311168883e-07,
"loss": 0.5602610111236572,
"step": 5175
},
{
"epoch": 2.5089675230247215,
"grad_norm": 16.34345441912836,
"learning_rate": 7.956738980481954e-07,
"loss": 1.0860916376113892,
"step": 5176
},
{
"epoch": 2.5094522539990303,
"grad_norm": 16.454159393876314,
"learning_rate": 7.941482024547009e-07,
"loss": 0.5363277792930603,
"step": 5177
},
{
"epoch": 2.50993698497334,
"grad_norm": 10.64942924086907,
"learning_rate": 7.926238448217583e-07,
"loss": 0.29958274960517883,
"step": 5178
},
{
"epoch": 2.510421715947649,
"grad_norm": 10.973397449321773,
"learning_rate": 7.911008256342929e-07,
"loss": 0.6033792495727539,
"step": 5179
},
{
"epoch": 2.5109064469219584,
"grad_norm": 8.188142534954178,
"learning_rate": 7.895791453768076e-07,
"loss": 0.5978741645812988,
"step": 5180
},
{
"epoch": 2.5113911778962676,
"grad_norm": 17.164485570209866,
"learning_rate": 7.880588045333776e-07,
"loss": 0.37425172328948975,
"step": 5181
},
{
"epoch": 2.511875908870577,
"grad_norm": 9.75564456687569,
"learning_rate": 7.865398035876504e-07,
"loss": 0.5708469748497009,
"step": 5182
},
{
"epoch": 2.512360639844886,
"grad_norm": 9.23024609413222,
"learning_rate": 7.850221430228489e-07,
"loss": 0.3808041512966156,
"step": 5183
},
{
"epoch": 2.5128453708191953,
"grad_norm": 12.445044725577343,
"learning_rate": 7.83505823321769e-07,
"loss": 0.41913333535194397,
"step": 5184
},
{
"epoch": 2.5133301017935046,
"grad_norm": 15.334376233710907,
"learning_rate": 7.819908449667824e-07,
"loss": 0.7106448411941528,
"step": 5185
},
{
"epoch": 2.513814832767814,
"grad_norm": 16.67139745359352,
"learning_rate": 7.80477208439831e-07,
"loss": 0.8754925727844238,
"step": 5186
},
{
"epoch": 2.514299563742123,
"grad_norm": 11.777270905434069,
"learning_rate": 7.78964914222432e-07,
"loss": 0.5458954572677612,
"step": 5187
},
{
"epoch": 2.5147842947164323,
"grad_norm": 23.452808286815124,
"learning_rate": 7.774539627956735e-07,
"loss": 0.8046850562095642,
"step": 5188
},
{
"epoch": 2.5152690256907415,
"grad_norm": 9.767338253732412,
"learning_rate": 7.759443546402179e-07,
"loss": 0.2682785987854004,
"step": 5189
},
{
"epoch": 2.5157537566650507,
"grad_norm": 13.507076204513542,
"learning_rate": 7.744360902363002e-07,
"loss": 0.4033460021018982,
"step": 5190
},
{
"epoch": 2.5162384876393604,
"grad_norm": 9.257933452529246,
"learning_rate": 7.729291700637276e-07,
"loss": 0.5908496975898743,
"step": 5191
},
{
"epoch": 2.516723218613669,
"grad_norm": 12.869262975242007,
"learning_rate": 7.714235946018795e-07,
"loss": 0.34078124165534973,
"step": 5192
},
{
"epoch": 2.517207949587979,
"grad_norm": 10.257002888898587,
"learning_rate": 7.699193643297065e-07,
"loss": 0.5963305234909058,
"step": 5193
},
{
"epoch": 2.5176926805622877,
"grad_norm": 12.058306719320374,
"learning_rate": 7.684164797257354e-07,
"loss": 0.6415491104125977,
"step": 5194
},
{
"epoch": 2.5181774115365974,
"grad_norm": 9.754601711700985,
"learning_rate": 7.669149412680604e-07,
"loss": 0.5438104271888733,
"step": 5195
},
{
"epoch": 2.5186621425109066,
"grad_norm": 10.25887539958661,
"learning_rate": 7.654147494343506e-07,
"loss": 0.4035845696926117,
"step": 5196
},
{
"epoch": 2.519146873485216,
"grad_norm": 12.058711564460808,
"learning_rate": 7.639159047018429e-07,
"loss": 0.7954018712043762,
"step": 5197
},
{
"epoch": 2.519631604459525,
"grad_norm": 10.688713435373847,
"learning_rate": 7.624184075473479e-07,
"loss": 0.3709966540336609,
"step": 5198
},
{
"epoch": 2.5201163354338343,
"grad_norm": 9.26668243734778,
"learning_rate": 7.609222584472498e-07,
"loss": 0.3273622989654541,
"step": 5199
},
{
"epoch": 2.5206010664081435,
"grad_norm": 13.48506504541645,
"learning_rate": 7.594274578775007e-07,
"loss": 0.5977814197540283,
"step": 5200
},
{
"epoch": 2.5210857973824528,
"grad_norm": 9.61981291132735,
"learning_rate": 7.579340063136248e-07,
"loss": 0.41646572947502136,
"step": 5201
},
{
"epoch": 2.521570528356762,
"grad_norm": 10.286172023278004,
"learning_rate": 7.564419042307158e-07,
"loss": 0.499817430973053,
"step": 5202
},
{
"epoch": 2.522055259331071,
"grad_norm": 10.605545285253383,
"learning_rate": 7.549511521034436e-07,
"loss": 0.802613377571106,
"step": 5203
},
{
"epoch": 2.5225399903053805,
"grad_norm": 13.966595890429222,
"learning_rate": 7.534617504060404e-07,
"loss": 0.6010617613792419,
"step": 5204
},
{
"epoch": 2.5230247212796897,
"grad_norm": 16.529593475075195,
"learning_rate": 7.519736996123139e-07,
"loss": 0.8197612762451172,
"step": 5205
},
{
"epoch": 2.523509452253999,
"grad_norm": 12.65106234236112,
"learning_rate": 7.50487000195641e-07,
"loss": 0.6611180901527405,
"step": 5206
},
{
"epoch": 2.523994183228308,
"grad_norm": 18.067256164636145,
"learning_rate": 7.490016526289685e-07,
"loss": 0.5366533994674683,
"step": 5207
},
{
"epoch": 2.5244789142026174,
"grad_norm": 10.061507535580969,
"learning_rate": 7.475176573848142e-07,
"loss": 0.3019563853740692,
"step": 5208
},
{
"epoch": 2.5249636451769266,
"grad_norm": 14.136318201943983,
"learning_rate": 7.460350149352652e-07,
"loss": 0.4951847493648529,
"step": 5209
},
{
"epoch": 2.5254483761512363,
"grad_norm": 8.801032638733597,
"learning_rate": 7.445537257519775e-07,
"loss": 0.37318283319473267,
"step": 5210
},
{
"epoch": 2.525933107125545,
"grad_norm": 10.152174614525007,
"learning_rate": 7.430737903061763e-07,
"loss": 0.31460291147232056,
"step": 5211
},
{
"epoch": 2.5264178380998548,
"grad_norm": 13.2054102054077,
"learning_rate": 7.415952090686579e-07,
"loss": 0.7984022498130798,
"step": 5212
},
{
"epoch": 2.526902569074164,
"grad_norm": 9.969307109121411,
"learning_rate": 7.401179825097865e-07,
"loss": 0.4089438021183014,
"step": 5213
},
{
"epoch": 2.5273873000484732,
"grad_norm": 6.52321093155267,
"learning_rate": 7.386421110994956e-07,
"loss": 0.22463569045066833,
"step": 5214
},
{
"epoch": 2.5278720310227825,
"grad_norm": 15.19992226797299,
"learning_rate": 7.371675953072871e-07,
"loss": 0.8300232887268066,
"step": 5215
},
{
"epoch": 2.5283567619970917,
"grad_norm": 12.146515801546354,
"learning_rate": 7.356944356022322e-07,
"loss": 0.24235066771507263,
"step": 5216
},
{
"epoch": 2.528841492971401,
"grad_norm": 14.730236469624213,
"learning_rate": 7.342226324529717e-07,
"loss": 0.561655580997467,
"step": 5217
},
{
"epoch": 2.52932622394571,
"grad_norm": 8.862096294934044,
"learning_rate": 7.327521863277148e-07,
"loss": 0.263545423746109,
"step": 5218
},
{
"epoch": 2.5298109549200194,
"grad_norm": 11.065894144642124,
"learning_rate": 7.312830976942359e-07,
"loss": 0.8126932382583618,
"step": 5219
},
{
"epoch": 2.5302956858943286,
"grad_norm": 11.638791775383488,
"learning_rate": 7.298153670198799e-07,
"loss": 0.4509631097316742,
"step": 5220
},
{
"epoch": 2.530780416868638,
"grad_norm": 10.353478138400904,
"learning_rate": 7.283489947715589e-07,
"loss": 0.24191921949386597,
"step": 5221
},
{
"epoch": 2.531265147842947,
"grad_norm": 10.008575806830976,
"learning_rate": 7.268839814157564e-07,
"loss": 0.29851847887039185,
"step": 5222
},
{
"epoch": 2.5317498788172563,
"grad_norm": 8.82463258537528,
"learning_rate": 7.254203274185185e-07,
"loss": 0.6864142417907715,
"step": 5223
},
{
"epoch": 2.5322346097915656,
"grad_norm": 14.780753061364601,
"learning_rate": 7.239580332454616e-07,
"loss": 0.8043969869613647,
"step": 5224
},
{
"epoch": 2.532719340765875,
"grad_norm": 11.940352164356062,
"learning_rate": 7.224970993617685e-07,
"loss": 0.23131749033927917,
"step": 5225
},
{
"epoch": 2.533204071740184,
"grad_norm": 8.247393995403797,
"learning_rate": 7.210375262321906e-07,
"loss": 0.5085111856460571,
"step": 5226
},
{
"epoch": 2.5336888027144937,
"grad_norm": 8.47942978956117,
"learning_rate": 7.19579314321045e-07,
"loss": 0.3820352852344513,
"step": 5227
},
{
"epoch": 2.5341735336888025,
"grad_norm": 14.115291428982534,
"learning_rate": 7.181224640922168e-07,
"loss": 0.7498958110809326,
"step": 5228
},
{
"epoch": 2.534658264663112,
"grad_norm": 12.76185996561033,
"learning_rate": 7.166669760091565e-07,
"loss": 0.40938812494277954,
"step": 5229
},
{
"epoch": 2.535142995637421,
"grad_norm": 15.465067146348002,
"learning_rate": 7.152128505348821e-07,
"loss": 0.620145857334137,
"step": 5230
},
{
"epoch": 2.5356277266117306,
"grad_norm": 10.354628112381922,
"learning_rate": 7.1376008813198e-07,
"loss": 0.5393531918525696,
"step": 5231
},
{
"epoch": 2.53611245758604,
"grad_norm": 10.02642432157708,
"learning_rate": 7.123086892626002e-07,
"loss": 0.4191517233848572,
"step": 5232
},
{
"epoch": 2.536597188560349,
"grad_norm": 15.749226762545522,
"learning_rate": 7.108586543884599e-07,
"loss": 0.4974726438522339,
"step": 5233
},
{
"epoch": 2.5370819195346583,
"grad_norm": 15.842948864450051,
"learning_rate": 7.09409983970844e-07,
"loss": 0.4539375603199005,
"step": 5234
},
{
"epoch": 2.5375666505089676,
"grad_norm": 18.72524811493014,
"learning_rate": 7.079626784705978e-07,
"loss": 0.3586680293083191,
"step": 5235
},
{
"epoch": 2.538051381483277,
"grad_norm": 10.43298965623579,
"learning_rate": 7.065167383481403e-07,
"loss": 0.30617254972457886,
"step": 5236
},
{
"epoch": 2.538536112457586,
"grad_norm": 12.80331944040433,
"learning_rate": 7.050721640634506e-07,
"loss": 0.7266295552253723,
"step": 5237
},
{
"epoch": 2.5390208434318953,
"grad_norm": 11.333526358750941,
"learning_rate": 7.036289560760757e-07,
"loss": 0.3852267563343048,
"step": 5238
},
{
"epoch": 2.5395055744062045,
"grad_norm": 11.794581184241457,
"learning_rate": 7.021871148451265e-07,
"loss": 0.6785153150558472,
"step": 5239
},
{
"epoch": 2.5399903053805137,
"grad_norm": 13.757657859746214,
"learning_rate": 7.007466408292801e-07,
"loss": 0.5606598854064941,
"step": 5240
},
{
"epoch": 2.540475036354823,
"grad_norm": 12.005772956086407,
"learning_rate": 6.993075344867783e-07,
"loss": 0.8204671740531921,
"step": 5241
},
{
"epoch": 2.540959767329132,
"grad_norm": 9.222579329652232,
"learning_rate": 6.978697962754283e-07,
"loss": 0.44794636964797974,
"step": 5242
},
{
"epoch": 2.5414444983034414,
"grad_norm": 16.816362469911596,
"learning_rate": 6.964334266526007e-07,
"loss": 0.5415976643562317,
"step": 5243
},
{
"epoch": 2.541929229277751,
"grad_norm": 11.048416060933745,
"learning_rate": 6.949984260752318e-07,
"loss": 0.44183093309402466,
"step": 5244
},
{
"epoch": 2.54241396025206,
"grad_norm": 7.1676983003007475,
"learning_rate": 6.935647949998231e-07,
"loss": 0.48899316787719727,
"step": 5245
},
{
"epoch": 2.5428986912263696,
"grad_norm": 13.199102988152157,
"learning_rate": 6.921325338824397e-07,
"loss": 0.3603067994117737,
"step": 5246
},
{
"epoch": 2.5433834222006784,
"grad_norm": 13.758905303007209,
"learning_rate": 6.907016431787105e-07,
"loss": 0.4875377118587494,
"step": 5247
},
{
"epoch": 2.543868153174988,
"grad_norm": 11.719774332092472,
"learning_rate": 6.892721233438288e-07,
"loss": 0.43306171894073486,
"step": 5248
},
{
"epoch": 2.5443528841492973,
"grad_norm": 10.789936357407706,
"learning_rate": 6.878439748325511e-07,
"loss": 0.43765366077423096,
"step": 5249
},
{
"epoch": 2.5448376151236065,
"grad_norm": 10.867479532364122,
"learning_rate": 6.864171980991985e-07,
"loss": 0.6202862858772278,
"step": 5250
},
{
"epoch": 2.5453223460979157,
"grad_norm": 6.584611289452971,
"learning_rate": 6.849917935976564e-07,
"loss": 0.5145155191421509,
"step": 5251
},
{
"epoch": 2.545807077072225,
"grad_norm": 11.387681707262653,
"learning_rate": 6.835677617813719e-07,
"loss": 0.3264930546283722,
"step": 5252
},
{
"epoch": 2.546291808046534,
"grad_norm": 10.499759591951443,
"learning_rate": 6.82145103103356e-07,
"loss": 0.19178640842437744,
"step": 5253
},
{
"epoch": 2.5467765390208434,
"grad_norm": 10.91835594519579,
"learning_rate": 6.807238180161835e-07,
"loss": 0.5042040348052979,
"step": 5254
},
{
"epoch": 2.5472612699951527,
"grad_norm": 19.093767532662383,
"learning_rate": 6.793039069719925e-07,
"loss": 0.7786322832107544,
"step": 5255
},
{
"epoch": 2.547746000969462,
"grad_norm": 11.481808780036523,
"learning_rate": 6.778853704224847e-07,
"loss": 1.0647106170654297,
"step": 5256
},
{
"epoch": 2.548230731943771,
"grad_norm": 8.326158610625814,
"learning_rate": 6.7646820881892e-07,
"loss": 0.6545748710632324,
"step": 5257
},
{
"epoch": 2.5487154629180804,
"grad_norm": 10.484859310456699,
"learning_rate": 6.750524226121247e-07,
"loss": 0.5989864468574524,
"step": 5258
},
{
"epoch": 2.5492001938923896,
"grad_norm": 24.039525604053264,
"learning_rate": 6.736380122524893e-07,
"loss": 0.47405052185058594,
"step": 5259
},
{
"epoch": 2.549684924866699,
"grad_norm": 7.8738325376317775,
"learning_rate": 6.722249781899631e-07,
"loss": 0.17151108384132385,
"step": 5260
},
{
"epoch": 2.550169655841008,
"grad_norm": 11.62794063189841,
"learning_rate": 6.708133208740592e-07,
"loss": 0.3050806522369385,
"step": 5261
},
{
"epoch": 2.5506543868153173,
"grad_norm": 12.785913138232306,
"learning_rate": 6.69403040753851e-07,
"loss": 0.9988372325897217,
"step": 5262
},
{
"epoch": 2.551139117789627,
"grad_norm": 18.429325095516692,
"learning_rate": 6.67994138277977e-07,
"loss": 0.9263361692428589,
"step": 5263
},
{
"epoch": 2.5516238487639358,
"grad_norm": 17.789843846270124,
"learning_rate": 6.665866138946342e-07,
"loss": 0.31618648767471313,
"step": 5264
},
{
"epoch": 2.5521085797382455,
"grad_norm": 10.444915154016895,
"learning_rate": 6.651804680515828e-07,
"loss": 0.2879525423049927,
"step": 5265
},
{
"epoch": 2.5525933107125547,
"grad_norm": 9.672684520059526,
"learning_rate": 6.637757011961448e-07,
"loss": 0.8028120994567871,
"step": 5266
},
{
"epoch": 2.553078041686864,
"grad_norm": 9.710569927172756,
"learning_rate": 6.623723137752025e-07,
"loss": 0.5649852156639099,
"step": 5267
},
{
"epoch": 2.553562772661173,
"grad_norm": 8.765956277623566,
"learning_rate": 6.609703062351991e-07,
"loss": 0.39483776688575745,
"step": 5268
},
{
"epoch": 2.5540475036354824,
"grad_norm": 10.433902057624998,
"learning_rate": 6.595696790221411e-07,
"loss": 0.5819726586341858,
"step": 5269
},
{
"epoch": 2.5545322346097916,
"grad_norm": 13.186148697646466,
"learning_rate": 6.581704325815941e-07,
"loss": 0.58092200756073,
"step": 5270
},
{
"epoch": 2.555016965584101,
"grad_norm": 9.69430298540356,
"learning_rate": 6.56772567358685e-07,
"loss": 0.6161866784095764,
"step": 5271
},
{
"epoch": 2.55550169655841,
"grad_norm": 11.884563345183219,
"learning_rate": 6.553760837980982e-07,
"loss": 0.6843283772468567,
"step": 5272
},
{
"epoch": 2.5559864275327193,
"grad_norm": 15.823197336504222,
"learning_rate": 6.539809823440845e-07,
"loss": 0.388225257396698,
"step": 5273
},
{
"epoch": 2.5564711585070286,
"grad_norm": 10.224109698674782,
"learning_rate": 6.525872634404506e-07,
"loss": 0.24939113855361938,
"step": 5274
},
{
"epoch": 2.556955889481338,
"grad_norm": 8.006916115426963,
"learning_rate": 6.511949275305657e-07,
"loss": 0.2716357111930847,
"step": 5275
},
{
"epoch": 2.557440620455647,
"grad_norm": 13.171435555170335,
"learning_rate": 6.498039750573571e-07,
"loss": 0.6872978210449219,
"step": 5276
},
{
"epoch": 2.5579253514299563,
"grad_norm": 17.640873648794617,
"learning_rate": 6.484144064633113e-07,
"loss": 0.5162938833236694,
"step": 5277
},
{
"epoch": 2.5584100824042655,
"grad_norm": 12.354630209541325,
"learning_rate": 6.47026222190481e-07,
"loss": 0.25918617844581604,
"step": 5278
},
{
"epoch": 2.5588948133785747,
"grad_norm": 12.916293450136425,
"learning_rate": 6.456394226804696e-07,
"loss": 0.43214309215545654,
"step": 5279
},
{
"epoch": 2.5593795443528844,
"grad_norm": 10.40915146516383,
"learning_rate": 6.442540083744453e-07,
"loss": 1.6842622756958008,
"step": 5280
},
{
"epoch": 2.559864275327193,
"grad_norm": 10.677413860862904,
"learning_rate": 6.428699797131338e-07,
"loss": 0.42942309379577637,
"step": 5281
},
{
"epoch": 2.560349006301503,
"grad_norm": 9.445621257856802,
"learning_rate": 6.414873371368207e-07,
"loss": 0.7796382904052734,
"step": 5282
},
{
"epoch": 2.5608337372758117,
"grad_norm": 14.641548728648695,
"learning_rate": 6.401060810853521e-07,
"loss": 0.3128775954246521,
"step": 5283
},
{
"epoch": 2.5613184682501213,
"grad_norm": 17.251272037704187,
"learning_rate": 6.387262119981308e-07,
"loss": 0.9540740847587585,
"step": 5284
},
{
"epoch": 2.5618031992244306,
"grad_norm": 10.383584982493867,
"learning_rate": 6.37347730314119e-07,
"loss": 0.21876254677772522,
"step": 5285
},
{
"epoch": 2.56228793019874,
"grad_norm": 6.1734467523675045,
"learning_rate": 6.359706364718371e-07,
"loss": 0.20470035076141357,
"step": 5286
},
{
"epoch": 2.562772661173049,
"grad_norm": 10.049764489745725,
"learning_rate": 6.345949309093652e-07,
"loss": 0.7563157677650452,
"step": 5287
},
{
"epoch": 2.5632573921473583,
"grad_norm": 14.840467945595938,
"learning_rate": 6.33220614064341e-07,
"loss": 0.5269960165023804,
"step": 5288
},
{
"epoch": 2.5637421231216675,
"grad_norm": 14.469389433102476,
"learning_rate": 6.318476863739603e-07,
"loss": 0.8838167190551758,
"step": 5289
},
{
"epoch": 2.5642268540959767,
"grad_norm": 11.211982109742895,
"learning_rate": 6.304761482749777e-07,
"loss": 0.2843981385231018,
"step": 5290
},
{
"epoch": 2.564711585070286,
"grad_norm": 10.092366345188431,
"learning_rate": 6.291060002037036e-07,
"loss": 0.18182142078876495,
"step": 5291
},
{
"epoch": 2.565196316044595,
"grad_norm": 14.28413019030447,
"learning_rate": 6.277372425960099e-07,
"loss": 0.3083690404891968,
"step": 5292
},
{
"epoch": 2.5656810470189044,
"grad_norm": 8.797549083249809,
"learning_rate": 6.263698758873238e-07,
"loss": 0.1531216949224472,
"step": 5293
},
{
"epoch": 2.5661657779932137,
"grad_norm": 9.910939805358288,
"learning_rate": 6.250039005126312e-07,
"loss": 0.45073604583740234,
"step": 5294
},
{
"epoch": 2.566650508967523,
"grad_norm": 9.61811186417529,
"learning_rate": 6.23639316906472e-07,
"loss": 0.2588232457637787,
"step": 5295
},
{
"epoch": 2.567135239941832,
"grad_norm": 13.575137182210737,
"learning_rate": 6.222761255029463e-07,
"loss": 0.22414301335811615,
"step": 5296
},
{
"epoch": 2.567619970916142,
"grad_norm": 9.067446516139071,
"learning_rate": 6.209143267357132e-07,
"loss": 0.43266814947128296,
"step": 5297
},
{
"epoch": 2.5681047018904506,
"grad_norm": 12.787581134352708,
"learning_rate": 6.195539210379848e-07,
"loss": 0.7370214462280273,
"step": 5298
},
{
"epoch": 2.5685894328647603,
"grad_norm": 11.199353949783317,
"learning_rate": 6.18194908842532e-07,
"loss": 0.5289144515991211,
"step": 5299
},
{
"epoch": 2.569074163839069,
"grad_norm": 10.8719418975076,
"learning_rate": 6.168372905816822e-07,
"loss": 0.5173137187957764,
"step": 5300
},
{
"epoch": 2.5695588948133787,
"grad_norm": 10.876897233538804,
"learning_rate": 6.154810666873196e-07,
"loss": 0.475345641374588,
"step": 5301
},
{
"epoch": 2.570043625787688,
"grad_norm": 9.455556696247879,
"learning_rate": 6.141262375908846e-07,
"loss": 0.5842643976211548,
"step": 5302
},
{
"epoch": 2.570528356761997,
"grad_norm": 12.631539471364727,
"learning_rate": 6.127728037233732e-07,
"loss": 0.6083354949951172,
"step": 5303
},
{
"epoch": 2.5710130877363064,
"grad_norm": 11.96654561732189,
"learning_rate": 6.114207655153382e-07,
"loss": 0.6871814727783203,
"step": 5304
},
{
"epoch": 2.5714978187106157,
"grad_norm": 16.318830493867527,
"learning_rate": 6.100701233968876e-07,
"loss": 0.49108684062957764,
"step": 5305
},
{
"epoch": 2.571982549684925,
"grad_norm": 7.302313971274541,
"learning_rate": 6.087208777976883e-07,
"loss": 0.43617022037506104,
"step": 5306
},
{
"epoch": 2.572467280659234,
"grad_norm": 14.968609160489077,
"learning_rate": 6.073730291469598e-07,
"loss": 3.1627655029296875,
"step": 5307
},
{
"epoch": 2.5729520116335434,
"grad_norm": 6.973729983369836,
"learning_rate": 6.06026577873477e-07,
"loss": 0.15709719061851501,
"step": 5308
},
{
"epoch": 2.5734367426078526,
"grad_norm": 9.250129951747459,
"learning_rate": 6.046815244055737e-07,
"loss": 0.47064656019210815,
"step": 5309
},
{
"epoch": 2.573921473582162,
"grad_norm": 8.538299675634823,
"learning_rate": 6.033378691711333e-07,
"loss": 0.4251113533973694,
"step": 5310
},
{
"epoch": 2.574406204556471,
"grad_norm": 27.66317749440441,
"learning_rate": 6.019956125976007e-07,
"loss": 2.178931951522827,
"step": 5311
},
{
"epoch": 2.5748909355307803,
"grad_norm": 9.528647795522735,
"learning_rate": 6.006547551119712e-07,
"loss": 0.6812432408332825,
"step": 5312
},
{
"epoch": 2.5753756665050895,
"grad_norm": 11.711278240209541,
"learning_rate": 5.993152971407984e-07,
"loss": 0.6234825849533081,
"step": 5313
},
{
"epoch": 2.575860397479399,
"grad_norm": 7.795409253675787,
"learning_rate": 5.979772391101862e-07,
"loss": 0.3846379518508911,
"step": 5314
},
{
"epoch": 2.576345128453708,
"grad_norm": 13.557341365450515,
"learning_rate": 5.966405814457999e-07,
"loss": 0.21489863097667694,
"step": 5315
},
{
"epoch": 2.5768298594280177,
"grad_norm": 15.228330068280716,
"learning_rate": 5.953053245728541e-07,
"loss": 0.46749913692474365,
"step": 5316
},
{
"epoch": 2.5773145904023265,
"grad_norm": 11.046978858916832,
"learning_rate": 5.939714689161175e-07,
"loss": 0.24091699719429016,
"step": 5317
},
{
"epoch": 2.577799321376636,
"grad_norm": 14.063766896076157,
"learning_rate": 5.926390148999162e-07,
"loss": 0.47964394092559814,
"step": 5318
},
{
"epoch": 2.5782840523509454,
"grad_norm": 7.20839743588004,
"learning_rate": 5.913079629481267e-07,
"loss": 0.24857065081596375,
"step": 5319
},
{
"epoch": 2.5787687833252546,
"grad_norm": 8.251653845160414,
"learning_rate": 5.899783134841846e-07,
"loss": 0.644751787185669,
"step": 5320
},
{
"epoch": 2.579253514299564,
"grad_norm": 10.498370734667931,
"learning_rate": 5.886500669310752e-07,
"loss": 0.3141726553440094,
"step": 5321
},
{
"epoch": 2.579738245273873,
"grad_norm": 10.179527367711534,
"learning_rate": 5.87323223711339e-07,
"loss": 0.6486436128616333,
"step": 5322
},
{
"epoch": 2.5802229762481823,
"grad_norm": 12.021587625475842,
"learning_rate": 5.859977842470693e-07,
"loss": 0.39998531341552734,
"step": 5323
},
{
"epoch": 2.5807077072224915,
"grad_norm": 9.618921603555167,
"learning_rate": 5.846737489599131e-07,
"loss": 0.3939536213874817,
"step": 5324
},
{
"epoch": 2.5811924381968008,
"grad_norm": 7.218334895661397,
"learning_rate": 5.833511182710716e-07,
"loss": 0.3763646185398102,
"step": 5325
},
{
"epoch": 2.58167716917111,
"grad_norm": 7.096540700432124,
"learning_rate": 5.820298926012985e-07,
"loss": 0.478519469499588,
"step": 5326
},
{
"epoch": 2.5821619001454192,
"grad_norm": 12.165495946971088,
"learning_rate": 5.807100723708998e-07,
"loss": 0.5184627175331116,
"step": 5327
},
{
"epoch": 2.5826466311197285,
"grad_norm": 14.842807078842087,
"learning_rate": 5.793916579997344e-07,
"loss": 0.9664980173110962,
"step": 5328
},
{
"epoch": 2.5831313620940377,
"grad_norm": 13.790218776881705,
"learning_rate": 5.78074649907217e-07,
"loss": 0.6856528520584106,
"step": 5329
},
{
"epoch": 2.583616093068347,
"grad_norm": 15.442790089403651,
"learning_rate": 5.76759048512312e-07,
"loss": 0.3691287338733673,
"step": 5330
},
{
"epoch": 2.584100824042656,
"grad_norm": 11.230355686190695,
"learning_rate": 5.754448542335367e-07,
"loss": 0.3536158800125122,
"step": 5331
},
{
"epoch": 2.5845855550169654,
"grad_norm": 9.516181746241747,
"learning_rate": 5.741320674889606e-07,
"loss": 0.35371553897857666,
"step": 5332
},
{
"epoch": 2.585070285991275,
"grad_norm": 11.27732339270348,
"learning_rate": 5.72820688696204e-07,
"loss": 0.2472122609615326,
"step": 5333
},
{
"epoch": 2.585555016965584,
"grad_norm": 9.715738364489756,
"learning_rate": 5.715107182724449e-07,
"loss": 1.0334928035736084,
"step": 5334
},
{
"epoch": 2.5860397479398936,
"grad_norm": 11.593056270110418,
"learning_rate": 5.702021566344079e-07,
"loss": 0.5249466896057129,
"step": 5335
},
{
"epoch": 2.5865244789142023,
"grad_norm": 8.344216855455427,
"learning_rate": 5.688950041983704e-07,
"loss": 0.40798482298851013,
"step": 5336
},
{
"epoch": 2.587009209888512,
"grad_norm": 11.424761870397717,
"learning_rate": 5.675892613801631e-07,
"loss": 0.5892904996871948,
"step": 5337
},
{
"epoch": 2.5874939408628213,
"grad_norm": 7.514579277570162,
"learning_rate": 5.662849285951666e-07,
"loss": 0.6041972041130066,
"step": 5338
},
{
"epoch": 2.5879786718371305,
"grad_norm": 10.357853346371424,
"learning_rate": 5.649820062583145e-07,
"loss": 0.7057873606681824,
"step": 5339
},
{
"epoch": 2.5884634028114397,
"grad_norm": 17.868305757634847,
"learning_rate": 5.636804947840907e-07,
"loss": 0.36028823256492615,
"step": 5340
},
{
"epoch": 2.588948133785749,
"grad_norm": 68.19726221712664,
"learning_rate": 5.623803945865297e-07,
"loss": 2.4872841835021973,
"step": 5341
},
{
"epoch": 2.589432864760058,
"grad_norm": 11.529965600567076,
"learning_rate": 5.610817060792173e-07,
"loss": 0.5302658081054688,
"step": 5342
},
{
"epoch": 2.5899175957343674,
"grad_norm": 23.93130012756396,
"learning_rate": 5.59784429675293e-07,
"loss": 0.6529248356819153,
"step": 5343
},
{
"epoch": 2.5904023267086767,
"grad_norm": 18.235590575194927,
"learning_rate": 5.584885657874439e-07,
"loss": 0.6219267249107361,
"step": 5344
},
{
"epoch": 2.590887057682986,
"grad_norm": 9.27211884953253,
"learning_rate": 5.571941148279081e-07,
"loss": 0.3531941771507263,
"step": 5345
},
{
"epoch": 2.591371788657295,
"grad_norm": 12.180872201606688,
"learning_rate": 5.55901077208475e-07,
"loss": 0.32594379782676697,
"step": 5346
},
{
"epoch": 2.5918565196316043,
"grad_norm": 15.506955889168184,
"learning_rate": 5.546094533404844e-07,
"loss": 0.4514632821083069,
"step": 5347
},
{
"epoch": 2.5923412506059136,
"grad_norm": 18.279719208923307,
"learning_rate": 5.533192436348262e-07,
"loss": 0.738452672958374,
"step": 5348
},
{
"epoch": 2.592825981580223,
"grad_norm": 14.393541856792202,
"learning_rate": 5.520304485019401e-07,
"loss": 0.46203064918518066,
"step": 5349
},
{
"epoch": 2.5933107125545325,
"grad_norm": 14.101523685521567,
"learning_rate": 5.507430683518161e-07,
"loss": 0.25629836320877075,
"step": 5350
},
{
"epoch": 2.5937954435288413,
"grad_norm": 8.09968917857487,
"learning_rate": 5.494571035939938e-07,
"loss": 0.2265012413263321,
"step": 5351
},
{
"epoch": 2.594280174503151,
"grad_norm": 8.16743460085136,
"learning_rate": 5.481725546375627e-07,
"loss": 0.5482033491134644,
"step": 5352
},
{
"epoch": 2.5947649054774597,
"grad_norm": 12.70851661183531,
"learning_rate": 5.468894218911642e-07,
"loss": 0.5053032636642456,
"step": 5353
},
{
"epoch": 2.5952496364517694,
"grad_norm": 13.153126753878984,
"learning_rate": 5.456077057629838e-07,
"loss": 0.6604177951812744,
"step": 5354
},
{
"epoch": 2.5957343674260787,
"grad_norm": 9.005323214830227,
"learning_rate": 5.443274066607607e-07,
"loss": 0.4827417731285095,
"step": 5355
},
{
"epoch": 2.596219098400388,
"grad_norm": 10.37838538211962,
"learning_rate": 5.430485249917816e-07,
"loss": 0.4454408288002014,
"step": 5356
},
{
"epoch": 2.596703829374697,
"grad_norm": 18.842923002999026,
"learning_rate": 5.41771061162884e-07,
"loss": 0.9535866379737854,
"step": 5357
},
{
"epoch": 2.5971885603490064,
"grad_norm": 9.054255976698748,
"learning_rate": 5.404950155804528e-07,
"loss": 0.39875948429107666,
"step": 5358
},
{
"epoch": 2.5976732913233156,
"grad_norm": 12.458009393491139,
"learning_rate": 5.392203886504221e-07,
"loss": 0.9799472093582153,
"step": 5359
},
{
"epoch": 2.598158022297625,
"grad_norm": 15.155874841797319,
"learning_rate": 5.379471807782743e-07,
"loss": 0.5553998947143555,
"step": 5360
},
{
"epoch": 2.598642753271934,
"grad_norm": 10.375761336863231,
"learning_rate": 5.36675392369041e-07,
"loss": 0.5623567700386047,
"step": 5361
},
{
"epoch": 2.5991274842462433,
"grad_norm": 17.37673561988,
"learning_rate": 5.354050238273023e-07,
"loss": 0.8768773078918457,
"step": 5362
},
{
"epoch": 2.5996122152205525,
"grad_norm": 11.18367614961891,
"learning_rate": 5.341360755571856e-07,
"loss": 0.27500325441360474,
"step": 5363
},
{
"epoch": 2.6000969461948618,
"grad_norm": 16.36007482422779,
"learning_rate": 5.328685479623674e-07,
"loss": 0.3017226755619049,
"step": 5364
},
{
"epoch": 2.600581677169171,
"grad_norm": 15.549870473158007,
"learning_rate": 5.316024414460729e-07,
"loss": 0.35917675495147705,
"step": 5365
},
{
"epoch": 2.6010664081434802,
"grad_norm": 8.45672525036449,
"learning_rate": 5.303377564110723e-07,
"loss": 0.2641231417655945,
"step": 5366
},
{
"epoch": 2.60155113911779,
"grad_norm": 11.111268344139988,
"learning_rate": 5.290744932596886e-07,
"loss": 0.3669453561306,
"step": 5367
},
{
"epoch": 2.6020358700920987,
"grad_norm": 13.352405173900793,
"learning_rate": 5.278126523937876e-07,
"loss": 0.46954941749572754,
"step": 5368
},
{
"epoch": 2.6025206010664084,
"grad_norm": 10.088137768957402,
"learning_rate": 5.265522342147866e-07,
"loss": 0.23579466342926025,
"step": 5369
},
{
"epoch": 2.603005332040717,
"grad_norm": 10.38257798043098,
"learning_rate": 5.252932391236443e-07,
"loss": 0.23452001810073853,
"step": 5370
},
{
"epoch": 2.603490063015027,
"grad_norm": 6.704576327445838,
"learning_rate": 5.240356675208746e-07,
"loss": 0.12292048335075378,
"step": 5371
},
{
"epoch": 2.603974793989336,
"grad_norm": 6.544721209894747,
"learning_rate": 5.227795198065333e-07,
"loss": 0.14592257142066956,
"step": 5372
},
{
"epoch": 2.6044595249636453,
"grad_norm": 8.672438761831376,
"learning_rate": 5.21524796380225e-07,
"loss": 0.25710630416870117,
"step": 5373
},
{
"epoch": 2.6049442559379545,
"grad_norm": 8.223389974068944,
"learning_rate": 5.202714976410999e-07,
"loss": 0.47972357273101807,
"step": 5374
},
{
"epoch": 2.6054289869122638,
"grad_norm": 13.482915884169973,
"learning_rate": 5.19019623987857e-07,
"loss": 0.3203021287918091,
"step": 5375
},
{
"epoch": 2.605913717886573,
"grad_norm": 15.90391276913022,
"learning_rate": 5.177691758187403e-07,
"loss": 1.0101443529129028,
"step": 5376
},
{
"epoch": 2.6063984488608822,
"grad_norm": 8.829895449141254,
"learning_rate": 5.165201535315406e-07,
"loss": 0.2835880517959595,
"step": 5377
},
{
"epoch": 2.6068831798351915,
"grad_norm": 16.345876866477212,
"learning_rate": 5.152725575235956e-07,
"loss": 0.447638601064682,
"step": 5378
},
{
"epoch": 2.6073679108095007,
"grad_norm": 13.392578280529776,
"learning_rate": 5.140263881917895e-07,
"loss": 0.26758843660354614,
"step": 5379
},
{
"epoch": 2.60785264178381,
"grad_norm": 9.95399114323578,
"learning_rate": 5.127816459325508e-07,
"loss": 0.21256791055202484,
"step": 5380
},
{
"epoch": 2.608337372758119,
"grad_norm": 9.280245207908928,
"learning_rate": 5.115383311418571e-07,
"loss": 0.605769157409668,
"step": 5381
},
{
"epoch": 2.6088221037324284,
"grad_norm": 6.353754603444032,
"learning_rate": 5.102964442152297e-07,
"loss": 0.24464286863803864,
"step": 5382
},
{
"epoch": 2.6093068347067376,
"grad_norm": 7.317710820936104,
"learning_rate": 5.090559855477362e-07,
"loss": 0.19198918342590332,
"step": 5383
},
{
"epoch": 2.609791565681047,
"grad_norm": 12.553976158575175,
"learning_rate": 5.078169555339901e-07,
"loss": 0.5619820356369019,
"step": 5384
},
{
"epoch": 2.610276296655356,
"grad_norm": 11.096349196001052,
"learning_rate": 5.065793545681491e-07,
"loss": 0.2167019546031952,
"step": 5385
},
{
"epoch": 2.6107610276296658,
"grad_norm": 12.116319500536525,
"learning_rate": 5.053431830439181e-07,
"loss": 0.8415489196777344,
"step": 5386
},
{
"epoch": 2.6112457586039746,
"grad_norm": 10.835239448702369,
"learning_rate": 5.041084413545466e-07,
"loss": 0.5040441751480103,
"step": 5387
},
{
"epoch": 2.6117304895782842,
"grad_norm": 12.05708008668283,
"learning_rate": 5.028751298928286e-07,
"loss": 0.7127473950386047,
"step": 5388
},
{
"epoch": 2.6122152205525935,
"grad_norm": 9.83085905043436,
"learning_rate": 5.016432490511036e-07,
"loss": 0.5779480934143066,
"step": 5389
},
{
"epoch": 2.6126999515269027,
"grad_norm": 5.359192223192768,
"learning_rate": 5.00412799221257e-07,
"loss": 0.14790233969688416,
"step": 5390
},
{
"epoch": 2.613184682501212,
"grad_norm": 7.574956449326839,
"learning_rate": 4.991837807947187e-07,
"loss": 0.26003503799438477,
"step": 5391
},
{
"epoch": 2.613669413475521,
"grad_norm": 16.17119980917047,
"learning_rate": 4.9795619416246e-07,
"loss": 0.5294740200042725,
"step": 5392
},
{
"epoch": 2.6141541444498304,
"grad_norm": 11.919705341101475,
"learning_rate": 4.96730039715001e-07,
"loss": 0.3173370957374573,
"step": 5393
},
{
"epoch": 2.6146388754241396,
"grad_norm": 14.03045522087972,
"learning_rate": 4.955053178424036e-07,
"loss": 0.6506468653678894,
"step": 5394
},
{
"epoch": 2.615123606398449,
"grad_norm": 16.94963544280873,
"learning_rate": 4.942820289342759e-07,
"loss": 0.41403698921203613,
"step": 5395
},
{
"epoch": 2.615608337372758,
"grad_norm": 17.921613361892067,
"learning_rate": 4.930601733797685e-07,
"loss": 1.0154664516448975,
"step": 5396
},
{
"epoch": 2.6160930683470673,
"grad_norm": 8.337764320649855,
"learning_rate": 4.918397515675772e-07,
"loss": 0.5907363295555115,
"step": 5397
},
{
"epoch": 2.6165777993213766,
"grad_norm": 17.7445635730872,
"learning_rate": 4.906207638859401e-07,
"loss": 0.38895538449287415,
"step": 5398
},
{
"epoch": 2.617062530295686,
"grad_norm": 18.71778560184672,
"learning_rate": 4.89403210722641e-07,
"loss": 0.7288837432861328,
"step": 5399
},
{
"epoch": 2.617547261269995,
"grad_norm": 8.820989555120434,
"learning_rate": 4.881870924650062e-07,
"loss": 0.36354053020477295,
"step": 5400
},
{
"epoch": 2.6180319922443043,
"grad_norm": 12.279864179308358,
"learning_rate": 4.869724094999056e-07,
"loss": 0.7058432102203369,
"step": 5401
},
{
"epoch": 2.6185167232186135,
"grad_norm": 14.218796402040358,
"learning_rate": 4.857591622137531e-07,
"loss": 0.45724040269851685,
"step": 5402
},
{
"epoch": 2.619001454192923,
"grad_norm": 9.006389837298045,
"learning_rate": 4.845473509925042e-07,
"loss": 0.47296810150146484,
"step": 5403
},
{
"epoch": 2.619486185167232,
"grad_norm": 11.964406597588328,
"learning_rate": 4.833369762216606e-07,
"loss": 0.4926716685295105,
"step": 5404
},
{
"epoch": 2.6199709161415417,
"grad_norm": 16.372865229653453,
"learning_rate": 4.821280382862647e-07,
"loss": 0.9048281908035278,
"step": 5405
},
{
"epoch": 2.6204556471158504,
"grad_norm": 14.671467789441312,
"learning_rate": 4.80920537570903e-07,
"loss": 0.7690280079841614,
"step": 5406
},
{
"epoch": 2.62094037809016,
"grad_norm": 16.810741197130298,
"learning_rate": 4.797144744597027e-07,
"loss": 0.4907899498939514,
"step": 5407
},
{
"epoch": 2.6214251090644693,
"grad_norm": 9.985001435466264,
"learning_rate": 4.785098493363343e-07,
"loss": 0.2875097095966339,
"step": 5408
},
{
"epoch": 2.6219098400387786,
"grad_norm": 13.249701462794906,
"learning_rate": 4.77306662584015e-07,
"loss": 0.5486938953399658,
"step": 5409
},
{
"epoch": 2.622394571013088,
"grad_norm": 8.968391749024267,
"learning_rate": 4.76104914585499e-07,
"loss": 0.4905383586883545,
"step": 5410
},
{
"epoch": 2.622879301987397,
"grad_norm": 17.699395145100226,
"learning_rate": 4.749046057230855e-07,
"loss": 0.876335620880127,
"step": 5411
},
{
"epoch": 2.6233640329617063,
"grad_norm": 11.52015590895735,
"learning_rate": 4.7370573637861405e-07,
"loss": 0.781601071357727,
"step": 5412
},
{
"epoch": 2.6238487639360155,
"grad_norm": 13.160882093127999,
"learning_rate": 4.7250830693347074e-07,
"loss": 0.46153420209884644,
"step": 5413
},
{
"epoch": 2.6243334949103247,
"grad_norm": 9.24216095537947,
"learning_rate": 4.713123177685769e-07,
"loss": 0.3317301869392395,
"step": 5414
},
{
"epoch": 2.624818225884634,
"grad_norm": 18.518424599592635,
"learning_rate": 4.70117769264401e-07,
"loss": 0.509996771812439,
"step": 5415
},
{
"epoch": 2.625302956858943,
"grad_norm": 9.816169461644947,
"learning_rate": 4.6892466180095077e-07,
"loss": 0.8019520044326782,
"step": 5416
},
{
"epoch": 2.6257876878332524,
"grad_norm": 8.380735279966398,
"learning_rate": 4.677329957577759e-07,
"loss": 0.3765624165534973,
"step": 5417
},
{
"epoch": 2.6262724188075617,
"grad_norm": 13.243132664798067,
"learning_rate": 4.665427715139687e-07,
"loss": 0.2481462061405182,
"step": 5418
},
{
"epoch": 2.626757149781871,
"grad_norm": 8.074856820234311,
"learning_rate": 4.6535398944816125e-07,
"loss": 0.7310369610786438,
"step": 5419
},
{
"epoch": 2.6272418807561806,
"grad_norm": 11.963796550800701,
"learning_rate": 4.641666499385278e-07,
"loss": 0.49114689230918884,
"step": 5420
},
{
"epoch": 2.6277266117304894,
"grad_norm": 9.022366714635023,
"learning_rate": 4.629807533627828e-07,
"loss": 0.21474146842956543,
"step": 5421
},
{
"epoch": 2.628211342704799,
"grad_norm": 13.810985181963437,
"learning_rate": 4.6179630009818286e-07,
"loss": 0.2361384928226471,
"step": 5422
},
{
"epoch": 2.628696073679108,
"grad_norm": 14.088413137392578,
"learning_rate": 4.606132905215249e-07,
"loss": 0.5555413961410522,
"step": 5423
},
{
"epoch": 2.6291808046534175,
"grad_norm": 13.383125188959236,
"learning_rate": 4.5943172500914625e-07,
"loss": 0.4667271375656128,
"step": 5424
},
{
"epoch": 2.6296655356277268,
"grad_norm": 10.041590702428403,
"learning_rate": 4.582516039369245e-07,
"loss": 0.3430459201335907,
"step": 5425
},
{
"epoch": 2.630150266602036,
"grad_norm": 11.292048687697479,
"learning_rate": 4.5707292768027877e-07,
"loss": 0.42621228098869324,
"step": 5426
},
{
"epoch": 2.6306349975763452,
"grad_norm": 11.799800840579051,
"learning_rate": 4.5589569661416866e-07,
"loss": 0.9052614569664001,
"step": 5427
},
{
"epoch": 2.6311197285506545,
"grad_norm": 8.864127737265738,
"learning_rate": 4.5471991111309455e-07,
"loss": 0.696924090385437,
"step": 5428
},
{
"epoch": 2.6316044595249637,
"grad_norm": 12.543689503403984,
"learning_rate": 4.5354557155109336e-07,
"loss": 0.5234421491622925,
"step": 5429
},
{
"epoch": 2.632089190499273,
"grad_norm": 13.016937543704502,
"learning_rate": 4.523726783017457e-07,
"loss": 0.412057101726532,
"step": 5430
},
{
"epoch": 2.632573921473582,
"grad_norm": 12.589896273813721,
"learning_rate": 4.512012317381703e-07,
"loss": 0.38582563400268555,
"step": 5431
},
{
"epoch": 2.6330586524478914,
"grad_norm": 14.315017250798766,
"learning_rate": 4.50031232233028e-07,
"loss": 0.4801258146762848,
"step": 5432
},
{
"epoch": 2.6335433834222006,
"grad_norm": 10.036968989278156,
"learning_rate": 4.4886268015851665e-07,
"loss": 0.28337255120277405,
"step": 5433
},
{
"epoch": 2.63402811439651,
"grad_norm": 9.167763216693984,
"learning_rate": 4.4769557588637435e-07,
"loss": 0.28114455938339233,
"step": 5434
},
{
"epoch": 2.634512845370819,
"grad_norm": 13.92250433768793,
"learning_rate": 4.4652991978787975e-07,
"loss": 0.5862511396408081,
"step": 5435
},
{
"epoch": 2.6349975763451283,
"grad_norm": 8.94577137275024,
"learning_rate": 4.453657122338495e-07,
"loss": 0.3282536268234253,
"step": 5436
},
{
"epoch": 2.6354823073194376,
"grad_norm": 20.086630310579253,
"learning_rate": 4.4420295359464014e-07,
"loss": 3.195401191711426,
"step": 5437
},
{
"epoch": 2.635967038293747,
"grad_norm": 15.401589697041352,
"learning_rate": 4.430416442401464e-07,
"loss": 0.740722119808197,
"step": 5438
},
{
"epoch": 2.6364517692680565,
"grad_norm": 24.23655023324113,
"learning_rate": 4.4188178453980324e-07,
"loss": 1.537482500076294,
"step": 5439
},
{
"epoch": 2.6369365002423653,
"grad_norm": 11.17637448349109,
"learning_rate": 4.407233748625839e-07,
"loss": 0.40486887097358704,
"step": 5440
},
{
"epoch": 2.637421231216675,
"grad_norm": 7.99568972942551,
"learning_rate": 4.395664155769991e-07,
"loss": 0.4286423921585083,
"step": 5441
},
{
"epoch": 2.637905962190984,
"grad_norm": 11.239850083990602,
"learning_rate": 4.3841090705110057e-07,
"loss": 0.29131361842155457,
"step": 5442
},
{
"epoch": 2.6383906931652934,
"grad_norm": 8.55114074740875,
"learning_rate": 4.372568496524776e-07,
"loss": 0.3001346290111542,
"step": 5443
},
{
"epoch": 2.6388754241396026,
"grad_norm": 18.626199585524642,
"learning_rate": 4.361042437482582e-07,
"loss": 0.524885892868042,
"step": 5444
},
{
"epoch": 2.639360155113912,
"grad_norm": 7.220762970404719,
"learning_rate": 4.3495308970510463e-07,
"loss": 0.27561283111572266,
"step": 5445
},
{
"epoch": 2.639844886088221,
"grad_norm": 10.601001577988235,
"learning_rate": 4.338033878892234e-07,
"loss": 0.2653716206550598,
"step": 5446
},
{
"epoch": 2.6403296170625303,
"grad_norm": 10.241674688152717,
"learning_rate": 4.326551386663563e-07,
"loss": 0.2489595115184784,
"step": 5447
},
{
"epoch": 2.6408143480368396,
"grad_norm": 10.106371445566728,
"learning_rate": 4.3150834240178187e-07,
"loss": 0.4709815979003906,
"step": 5448
},
{
"epoch": 2.641299079011149,
"grad_norm": 11.941522361347937,
"learning_rate": 4.303629994603181e-07,
"loss": 0.31576988101005554,
"step": 5449
},
{
"epoch": 2.641783809985458,
"grad_norm": 12.221871148752626,
"learning_rate": 4.2921911020631926e-07,
"loss": 0.3384070098400116,
"step": 5450
},
{
"epoch": 2.6422685409597673,
"grad_norm": 9.94490690769486,
"learning_rate": 4.280766750036808e-07,
"loss": 0.31797274947166443,
"step": 5451
},
{
"epoch": 2.6427532719340765,
"grad_norm": 9.529192481576095,
"learning_rate": 4.269356942158298e-07,
"loss": 0.2801746726036072,
"step": 5452
},
{
"epoch": 2.6432380029083857,
"grad_norm": 10.53655811298617,
"learning_rate": 4.2579616820573486e-07,
"loss": 1.319901466369629,
"step": 5453
},
{
"epoch": 2.643722733882695,
"grad_norm": 13.080751346848183,
"learning_rate": 4.2465809733590047e-07,
"loss": 0.6046762466430664,
"step": 5454
},
{
"epoch": 2.644207464857004,
"grad_norm": 10.487922485425115,
"learning_rate": 4.235214819683681e-07,
"loss": 0.4557761549949646,
"step": 5455
},
{
"epoch": 2.644692195831314,
"grad_norm": 7.776827759594931,
"learning_rate": 4.2238632246471733e-07,
"loss": 0.3082202672958374,
"step": 5456
},
{
"epoch": 2.6451769268056227,
"grad_norm": 13.756980390328437,
"learning_rate": 4.212526191860633e-07,
"loss": 0.42918282747268677,
"step": 5457
},
{
"epoch": 2.6456616577799323,
"grad_norm": 11.99707231569847,
"learning_rate": 4.2012037249305857e-07,
"loss": 0.39406484365463257,
"step": 5458
},
{
"epoch": 2.646146388754241,
"grad_norm": 9.779553501516453,
"learning_rate": 4.1898958274589117e-07,
"loss": 0.4363619387149811,
"step": 5459
},
{
"epoch": 2.646631119728551,
"grad_norm": 11.27843366774087,
"learning_rate": 4.1786025030428776e-07,
"loss": 0.33820557594299316,
"step": 5460
},
{
"epoch": 2.64711585070286,
"grad_norm": 8.819480010141945,
"learning_rate": 4.1673237552750987e-07,
"loss": 0.5932563543319702,
"step": 5461
},
{
"epoch": 2.6476005816771693,
"grad_norm": 14.198564564341648,
"learning_rate": 4.156059587743555e-07,
"loss": 0.6587765216827393,
"step": 5462
},
{
"epoch": 2.6480853126514785,
"grad_norm": 12.914167666545797,
"learning_rate": 4.1448100040315906e-07,
"loss": 0.44061270356178284,
"step": 5463
},
{
"epoch": 2.6485700436257877,
"grad_norm": 12.676275240354105,
"learning_rate": 4.133575007717899e-07,
"loss": 0.7630748152732849,
"step": 5464
},
{
"epoch": 2.649054774600097,
"grad_norm": 10.234035416628508,
"learning_rate": 4.1223546023765604e-07,
"loss": 0.6388047933578491,
"step": 5465
},
{
"epoch": 2.649539505574406,
"grad_norm": 8.985974326675937,
"learning_rate": 4.1111487915770075e-07,
"loss": 0.33576729893684387,
"step": 5466
},
{
"epoch": 2.6500242365487154,
"grad_norm": 8.840420330700187,
"learning_rate": 4.099957578883984e-07,
"loss": 0.32670116424560547,
"step": 5467
},
{
"epoch": 2.6505089675230247,
"grad_norm": 8.678834741543612,
"learning_rate": 4.088780967857653e-07,
"loss": 0.3775448799133301,
"step": 5468
},
{
"epoch": 2.650993698497334,
"grad_norm": 10.851477172237326,
"learning_rate": 4.077618962053481e-07,
"loss": 0.6048464179039001,
"step": 5469
},
{
"epoch": 2.651478429471643,
"grad_norm": 13.554900276601852,
"learning_rate": 4.0664715650223343e-07,
"loss": 0.5729650855064392,
"step": 5470
},
{
"epoch": 2.6519631604459524,
"grad_norm": 16.493136827604904,
"learning_rate": 4.0553387803104025e-07,
"loss": 0.46666404604911804,
"step": 5471
},
{
"epoch": 2.6524478914202616,
"grad_norm": 15.76426243953238,
"learning_rate": 4.0442206114592374e-07,
"loss": 0.5208025574684143,
"step": 5472
},
{
"epoch": 2.6529326223945713,
"grad_norm": 6.57648476860426,
"learning_rate": 4.0331170620057314e-07,
"loss": 0.25682637095451355,
"step": 5473
},
{
"epoch": 2.65341735336888,
"grad_norm": 8.188283986366038,
"learning_rate": 4.022028135482131e-07,
"loss": 0.356034517288208,
"step": 5474
},
{
"epoch": 2.6539020843431897,
"grad_norm": 11.621456548069366,
"learning_rate": 4.010953835416037e-07,
"loss": 0.3010556995868683,
"step": 5475
},
{
"epoch": 2.6543868153174985,
"grad_norm": 10.18699678292041,
"learning_rate": 3.999894165330387e-07,
"loss": 0.5383352637290955,
"step": 5476
},
{
"epoch": 2.654871546291808,
"grad_norm": 9.886938627133135,
"learning_rate": 3.988849128743477e-07,
"loss": 0.6462916731834412,
"step": 5477
},
{
"epoch": 2.6553562772661174,
"grad_norm": 8.281087905511903,
"learning_rate": 3.9778187291689307e-07,
"loss": 0.6071977615356445,
"step": 5478
},
{
"epoch": 2.6558410082404267,
"grad_norm": 10.293422187133485,
"learning_rate": 3.9668029701157406e-07,
"loss": 0.490591824054718,
"step": 5479
},
{
"epoch": 2.656325739214736,
"grad_norm": 8.539643398339265,
"learning_rate": 3.9558018550882204e-07,
"loss": 0.2501641809940338,
"step": 5480
},
{
"epoch": 2.656810470189045,
"grad_norm": 7.482880386563049,
"learning_rate": 3.9448153875860264e-07,
"loss": 0.6731146574020386,
"step": 5481
},
{
"epoch": 2.6572952011633544,
"grad_norm": 13.594761278119067,
"learning_rate": 3.9338435711041733e-07,
"loss": 0.6829290390014648,
"step": 5482
},
{
"epoch": 2.6577799321376636,
"grad_norm": 9.322433743710336,
"learning_rate": 3.9228864091329757e-07,
"loss": 0.6796175837516785,
"step": 5483
},
{
"epoch": 2.658264663111973,
"grad_norm": 7.9952743311264465,
"learning_rate": 3.911943905158139e-07,
"loss": 0.5921481251716614,
"step": 5484
},
{
"epoch": 2.658749394086282,
"grad_norm": 11.583926561000979,
"learning_rate": 3.901016062660673e-07,
"loss": 0.44441908597946167,
"step": 5485
},
{
"epoch": 2.6592341250605913,
"grad_norm": 11.986014506880693,
"learning_rate": 3.890102885116931e-07,
"loss": 0.8112941980361938,
"step": 5486
},
{
"epoch": 2.6597188560349005,
"grad_norm": 11.9503915637031,
"learning_rate": 3.87920437599858e-07,
"loss": 0.6868834495544434,
"step": 5487
},
{
"epoch": 2.66020358700921,
"grad_norm": 11.263387636827828,
"learning_rate": 3.8683205387726796e-07,
"loss": 0.6939114928245544,
"step": 5488
},
{
"epoch": 2.660688317983519,
"grad_norm": 8.616487279933049,
"learning_rate": 3.85745137690155e-07,
"loss": 0.25282764434814453,
"step": 5489
},
{
"epoch": 2.6611730489578282,
"grad_norm": 14.290383885734885,
"learning_rate": 3.846596893842891e-07,
"loss": 0.3083244562149048,
"step": 5490
},
{
"epoch": 2.6616577799321375,
"grad_norm": 14.137284383484705,
"learning_rate": 3.8357570930497135e-07,
"loss": 0.6836187243461609,
"step": 5491
},
{
"epoch": 2.662142510906447,
"grad_norm": 14.72967247131545,
"learning_rate": 3.8249319779703475e-07,
"loss": 0.8720105886459351,
"step": 5492
},
{
"epoch": 2.662627241880756,
"grad_norm": 8.074823759629812,
"learning_rate": 3.814121552048494e-07,
"loss": 0.16812659800052643,
"step": 5493
},
{
"epoch": 2.6631119728550656,
"grad_norm": 13.472876435263057,
"learning_rate": 3.8033258187231414e-07,
"loss": 0.7013896107673645,
"step": 5494
},
{
"epoch": 2.663596703829375,
"grad_norm": 12.461904626529572,
"learning_rate": 3.792544781428609e-07,
"loss": 0.27183711528778076,
"step": 5495
},
{
"epoch": 2.664081434803684,
"grad_norm": 15.779471639267209,
"learning_rate": 3.7817784435945524e-07,
"loss": 0.47918638586997986,
"step": 5496
},
{
"epoch": 2.6645661657779933,
"grad_norm": 17.56765811269185,
"learning_rate": 3.7710268086459447e-07,
"loss": 0.42026007175445557,
"step": 5497
},
{
"epoch": 2.6650508967523026,
"grad_norm": 9.498554464244991,
"learning_rate": 3.7602898800030874e-07,
"loss": 0.5951410531997681,
"step": 5498
},
{
"epoch": 2.665535627726612,
"grad_norm": 7.835410327325615,
"learning_rate": 3.749567661081593e-07,
"loss": 0.44577157497406006,
"step": 5499
},
{
"epoch": 2.666020358700921,
"grad_norm": 11.521069805657428,
"learning_rate": 3.7388601552924066e-07,
"loss": 0.19111663103103638,
"step": 5500
},
{
"epoch": 2.6665050896752303,
"grad_norm": 8.446786370310974,
"learning_rate": 3.7281673660417737e-07,
"loss": 0.6608403921127319,
"step": 5501
},
{
"epoch": 2.6669898206495395,
"grad_norm": 11.367315613075574,
"learning_rate": 3.717489296731297e-07,
"loss": 0.33594757318496704,
"step": 5502
},
{
"epoch": 2.6674745516238487,
"grad_norm": 9.531978216733068,
"learning_rate": 3.7068259507578475e-07,
"loss": 0.3431253731250763,
"step": 5503
},
{
"epoch": 2.667959282598158,
"grad_norm": 13.376823385199113,
"learning_rate": 3.696177331513662e-07,
"loss": 0.37833860516548157,
"step": 5504
},
{
"epoch": 2.668444013572467,
"grad_norm": 10.527165728429722,
"learning_rate": 3.6855434423862356e-07,
"loss": 0.5485377311706543,
"step": 5505
},
{
"epoch": 2.6689287445467764,
"grad_norm": 8.84027813082921,
"learning_rate": 3.674924286758419e-07,
"loss": 0.4851524233818054,
"step": 5506
},
{
"epoch": 2.6694134755210857,
"grad_norm": 10.29698290279821,
"learning_rate": 3.66431986800837e-07,
"loss": 0.40543103218078613,
"step": 5507
},
{
"epoch": 2.669898206495395,
"grad_norm": 9.347670358520368,
"learning_rate": 3.653730189509558e-07,
"loss": 0.31933891773223877,
"step": 5508
},
{
"epoch": 2.6703829374697046,
"grad_norm": 17.591641653990823,
"learning_rate": 3.6431552546307534e-07,
"loss": 0.5208455324172974,
"step": 5509
},
{
"epoch": 2.6708676684440134,
"grad_norm": 12.438102413329055,
"learning_rate": 3.6325950667360443e-07,
"loss": 0.5623095035552979,
"step": 5510
},
{
"epoch": 2.671352399418323,
"grad_norm": 8.499817101396529,
"learning_rate": 3.622049629184815e-07,
"loss": 0.39935302734375,
"step": 5511
},
{
"epoch": 2.671837130392632,
"grad_norm": 15.277750640307218,
"learning_rate": 3.6115189453317825e-07,
"loss": 0.7014456391334534,
"step": 5512
},
{
"epoch": 2.6723218613669415,
"grad_norm": 12.724323636924392,
"learning_rate": 3.601003018526944e-07,
"loss": 0.983346164226532,
"step": 5513
},
{
"epoch": 2.6728065923412507,
"grad_norm": 13.170418331605612,
"learning_rate": 3.590501852115619e-07,
"loss": 0.3329489827156067,
"step": 5514
},
{
"epoch": 2.67329132331556,
"grad_norm": 11.64916542616792,
"learning_rate": 3.5800154494384176e-07,
"loss": 2.547976493835449,
"step": 5515
},
{
"epoch": 2.673776054289869,
"grad_norm": 10.253392013596862,
"learning_rate": 3.5695438138312776e-07,
"loss": 0.3123473525047302,
"step": 5516
},
{
"epoch": 2.6742607852641784,
"grad_norm": 9.092637765712487,
"learning_rate": 3.559086948625412e-07,
"loss": 0.42272859811782837,
"step": 5517
},
{
"epoch": 2.6747455162384877,
"grad_norm": 10.958057893200134,
"learning_rate": 3.548644857147349e-07,
"loss": 0.411432147026062,
"step": 5518
},
{
"epoch": 2.675230247212797,
"grad_norm": 6.3110023942364615,
"learning_rate": 3.538217542718919e-07,
"loss": 0.2506001889705658,
"step": 5519
},
{
"epoch": 2.675714978187106,
"grad_norm": 16.8014105255347,
"learning_rate": 3.5278050086572313e-07,
"loss": 0.5672577619552612,
"step": 5520
},
{
"epoch": 2.6761997091614154,
"grad_norm": 18.470923128110332,
"learning_rate": 3.5174072582747245e-07,
"loss": 0.7251436710357666,
"step": 5521
},
{
"epoch": 2.6766844401357246,
"grad_norm": 11.287971369805346,
"learning_rate": 3.507024294879113e-07,
"loss": 0.37072721123695374,
"step": 5522
},
{
"epoch": 2.677169171110034,
"grad_norm": 9.841987565439247,
"learning_rate": 3.4966561217734117e-07,
"loss": 0.33923089504241943,
"step": 5523
},
{
"epoch": 2.677653902084343,
"grad_norm": 14.391644264842336,
"learning_rate": 3.486302742255937e-07,
"loss": 0.5673322677612305,
"step": 5524
},
{
"epoch": 2.6781386330586523,
"grad_norm": 16.732057790133847,
"learning_rate": 3.4759641596202766e-07,
"loss": 0.4533677101135254,
"step": 5525
},
{
"epoch": 2.678623364032962,
"grad_norm": 15.039580539893455,
"learning_rate": 3.4656403771553604e-07,
"loss": 0.7210106253623962,
"step": 5526
},
{
"epoch": 2.6791080950072708,
"grad_norm": 10.957559915909231,
"learning_rate": 3.4553313981453505e-07,
"loss": 0.38452959060668945,
"step": 5527
},
{
"epoch": 2.6795928259815804,
"grad_norm": 10.729377491512883,
"learning_rate": 3.4450372258697297e-07,
"loss": 0.4969039857387543,
"step": 5528
},
{
"epoch": 2.6800775569558892,
"grad_norm": 13.43019139723567,
"learning_rate": 3.434757863603272e-07,
"loss": 0.7821241617202759,
"step": 5529
},
{
"epoch": 2.680562287930199,
"grad_norm": 10.522901514580747,
"learning_rate": 3.4244933146160395e-07,
"loss": 0.32767409086227417,
"step": 5530
},
{
"epoch": 2.681047018904508,
"grad_norm": 8.875601988861433,
"learning_rate": 3.4142435821733767e-07,
"loss": 0.42604029178619385,
"step": 5531
},
{
"epoch": 2.6815317498788174,
"grad_norm": 10.669788610454791,
"learning_rate": 3.404008669535913e-07,
"loss": 0.4410785436630249,
"step": 5532
},
{
"epoch": 2.6820164808531266,
"grad_norm": 9.270786719757131,
"learning_rate": 3.3937885799595727e-07,
"loss": 0.16268709301948547,
"step": 5533
},
{
"epoch": 2.682501211827436,
"grad_norm": 9.400286792084344,
"learning_rate": 3.383583316695549e-07,
"loss": 0.36884021759033203,
"step": 5534
},
{
"epoch": 2.682985942801745,
"grad_norm": 5.02756358653072,
"learning_rate": 3.3733928829903396e-07,
"loss": 0.16884319484233856,
"step": 5535
},
{
"epoch": 2.6834706737760543,
"grad_norm": 17.567156123051376,
"learning_rate": 3.363217282085701e-07,
"loss": 0.6396051645278931,
"step": 5536
},
{
"epoch": 2.6839554047503635,
"grad_norm": 11.566679077131656,
"learning_rate": 3.353056517218689e-07,
"loss": 0.8034859895706177,
"step": 5537
},
{
"epoch": 2.6844401357246728,
"grad_norm": 10.013594773447002,
"learning_rate": 3.3429105916216397e-07,
"loss": 0.5980116724967957,
"step": 5538
},
{
"epoch": 2.684924866698982,
"grad_norm": 11.565804938385842,
"learning_rate": 3.3327795085221436e-07,
"loss": 0.4990668296813965,
"step": 5539
},
{
"epoch": 2.6854095976732912,
"grad_norm": 15.591461602008966,
"learning_rate": 3.322663271143112e-07,
"loss": 0.4503266215324402,
"step": 5540
},
{
"epoch": 2.6858943286476005,
"grad_norm": 14.18080876226277,
"learning_rate": 3.3125618827027085e-07,
"loss": 1.0968286991119385,
"step": 5541
},
{
"epoch": 2.6863790596219097,
"grad_norm": 8.446387995875865,
"learning_rate": 3.3024753464143633e-07,
"loss": 0.30315378308296204,
"step": 5542
},
{
"epoch": 2.686863790596219,
"grad_norm": 19.00434869779232,
"learning_rate": 3.292403665486782e-07,
"loss": 0.5532218217849731,
"step": 5543
},
{
"epoch": 2.687348521570528,
"grad_norm": 9.522250465424333,
"learning_rate": 3.282346843123985e-07,
"loss": 0.19483399391174316,
"step": 5544
},
{
"epoch": 2.687833252544838,
"grad_norm": 14.676980990771865,
"learning_rate": 3.2723048825252177e-07,
"loss": 0.6158111095428467,
"step": 5545
},
{
"epoch": 2.6883179835191466,
"grad_norm": 8.872849052164305,
"learning_rate": 3.2622777868850207e-07,
"loss": 0.3459939956665039,
"step": 5546
},
{
"epoch": 2.6888027144934563,
"grad_norm": 17.135167501179012,
"learning_rate": 3.2522655593932026e-07,
"loss": 0.38884034752845764,
"step": 5547
},
{
"epoch": 2.6892874454677655,
"grad_norm": 12.429988025508939,
"learning_rate": 3.242268203234844e-07,
"loss": 0.5886789560317993,
"step": 5548
},
{
"epoch": 2.689772176442075,
"grad_norm": 10.692152012421527,
"learning_rate": 3.2322857215902834e-07,
"loss": 0.5561063289642334,
"step": 5549
},
{
"epoch": 2.690256907416384,
"grad_norm": 20.761346976148587,
"learning_rate": 3.222318117635143e-07,
"loss": 0.4919826090335846,
"step": 5550
},
{
"epoch": 2.6907416383906932,
"grad_norm": 14.271260716072682,
"learning_rate": 3.2123653945403065e-07,
"loss": 0.508813738822937,
"step": 5551
},
{
"epoch": 2.6912263693650025,
"grad_norm": 10.661221663658718,
"learning_rate": 3.2024275554719154e-07,
"loss": 0.2937590181827545,
"step": 5552
},
{
"epoch": 2.6917111003393117,
"grad_norm": 12.871595000789732,
"learning_rate": 3.1925046035913787e-07,
"loss": 0.3570226728916168,
"step": 5553
},
{
"epoch": 2.692195831313621,
"grad_norm": 11.319060867194471,
"learning_rate": 3.182596542055394e-07,
"loss": 0.23885241150856018,
"step": 5554
},
{
"epoch": 2.69268056228793,
"grad_norm": 14.533962840799225,
"learning_rate": 3.172703374015884e-07,
"loss": 0.4378066658973694,
"step": 5555
},
{
"epoch": 2.6931652932622394,
"grad_norm": 12.631081284361034,
"learning_rate": 3.1628251026200653e-07,
"loss": 0.21135424077510834,
"step": 5556
},
{
"epoch": 2.6936500242365486,
"grad_norm": 15.728555737496489,
"learning_rate": 3.1529617310103844e-07,
"loss": 0.8689573407173157,
"step": 5557
},
{
"epoch": 2.694134755210858,
"grad_norm": 9.21803323471694,
"learning_rate": 3.143113262324582e-07,
"loss": 0.6920663118362427,
"step": 5558
},
{
"epoch": 2.694619486185167,
"grad_norm": 12.802186396363016,
"learning_rate": 3.133279699695635e-07,
"loss": 0.2362968921661377,
"step": 5559
},
{
"epoch": 2.6951042171594763,
"grad_norm": 14.698483539907787,
"learning_rate": 3.12346104625178e-07,
"loss": 1.161097764968872,
"step": 5560
},
{
"epoch": 2.6955889481337856,
"grad_norm": 10.475507755834494,
"learning_rate": 3.113657305116519e-07,
"loss": 0.9154737591743469,
"step": 5561
},
{
"epoch": 2.6960736791080953,
"grad_norm": 18.084599426882647,
"learning_rate": 3.1038684794086007e-07,
"loss": 0.37914717197418213,
"step": 5562
},
{
"epoch": 2.696558410082404,
"grad_norm": 10.073058064327713,
"learning_rate": 3.0940945722420566e-07,
"loss": 0.3908422589302063,
"step": 5563
},
{
"epoch": 2.6970431410567137,
"grad_norm": 10.364425365182298,
"learning_rate": 3.0843355867261225e-07,
"loss": 0.45687249302864075,
"step": 5564
},
{
"epoch": 2.6975278720310225,
"grad_norm": 10.171565002495711,
"learning_rate": 3.0745915259653314e-07,
"loss": 0.25674912333488464,
"step": 5565
},
{
"epoch": 2.698012603005332,
"grad_norm": 16.920818661165704,
"learning_rate": 3.064862393059448e-07,
"loss": 0.3643187880516052,
"step": 5566
},
{
"epoch": 2.6984973339796414,
"grad_norm": 14.250895831006037,
"learning_rate": 3.0551481911034796e-07,
"loss": 0.716118574142456,
"step": 5567
},
{
"epoch": 2.6989820649539507,
"grad_norm": 11.358105941520432,
"learning_rate": 3.045448923187722e-07,
"loss": 0.350005567073822,
"step": 5568
},
{
"epoch": 2.69946679592826,
"grad_norm": 7.930085895659518,
"learning_rate": 3.0357645923976775e-07,
"loss": 0.2607939839363098,
"step": 5569
},
{
"epoch": 2.699951526902569,
"grad_norm": 9.746475337965244,
"learning_rate": 3.026095201814122e-07,
"loss": 0.6963765621185303,
"step": 5570
},
{
"epoch": 2.7004362578768784,
"grad_norm": 12.77451212704608,
"learning_rate": 3.016440754513067e-07,
"loss": 0.8779592514038086,
"step": 5571
},
{
"epoch": 2.7009209888511876,
"grad_norm": 14.17669204178007,
"learning_rate": 3.006801253565772e-07,
"loss": 0.7811137437820435,
"step": 5572
},
{
"epoch": 2.701405719825497,
"grad_norm": 16.558491686248708,
"learning_rate": 2.99717670203874e-07,
"loss": 0.5024473667144775,
"step": 5573
},
{
"epoch": 2.701890450799806,
"grad_norm": 11.741812702805342,
"learning_rate": 2.9875671029937325e-07,
"loss": 0.33915823698043823,
"step": 5574
},
{
"epoch": 2.7023751817741153,
"grad_norm": 12.374098612629224,
"learning_rate": 2.9779724594877377e-07,
"loss": 0.39664027094841003,
"step": 5575
},
{
"epoch": 2.7028599127484245,
"grad_norm": 10.479249152055226,
"learning_rate": 2.9683927745729866e-07,
"loss": 0.34032052755355835,
"step": 5576
},
{
"epoch": 2.7033446437227338,
"grad_norm": 10.422558771532971,
"learning_rate": 2.9588280512969693e-07,
"loss": 0.4295572340488434,
"step": 5577
},
{
"epoch": 2.703829374697043,
"grad_norm": 21.35887206013934,
"learning_rate": 2.9492782927023953e-07,
"loss": 0.5932765603065491,
"step": 5578
},
{
"epoch": 2.7043141056713527,
"grad_norm": 14.338275506102043,
"learning_rate": 2.9397435018272357e-07,
"loss": 0.4092278480529785,
"step": 5579
},
{
"epoch": 2.7047988366456615,
"grad_norm": 19.910540856311826,
"learning_rate": 2.9302236817046636e-07,
"loss": 0.6203029155731201,
"step": 5580
},
{
"epoch": 2.705283567619971,
"grad_norm": 10.8372579366805,
"learning_rate": 2.9207188353631235e-07,
"loss": 0.33979904651641846,
"step": 5581
},
{
"epoch": 2.70576829859428,
"grad_norm": 17.509466438788444,
"learning_rate": 2.9112289658262907e-07,
"loss": 0.672860324382782,
"step": 5582
},
{
"epoch": 2.7062530295685896,
"grad_norm": 14.66623565691144,
"learning_rate": 2.9017540761130726e-07,
"loss": 0.3607822358608246,
"step": 5583
},
{
"epoch": 2.706737760542899,
"grad_norm": 10.580542109309183,
"learning_rate": 2.8922941692376026e-07,
"loss": 0.199172705411911,
"step": 5584
},
{
"epoch": 2.707222491517208,
"grad_norm": 18.21557699871553,
"learning_rate": 2.8828492482092576e-07,
"loss": 2.7609760761260986,
"step": 5585
},
{
"epoch": 2.7077072224915173,
"grad_norm": 13.272358957257467,
"learning_rate": 2.873419316032644e-07,
"loss": 0.5558392405509949,
"step": 5586
},
{
"epoch": 2.7081919534658265,
"grad_norm": 9.713399272295467,
"learning_rate": 2.864004375707602e-07,
"loss": 0.6343469023704529,
"step": 5587
},
{
"epoch": 2.7086766844401358,
"grad_norm": 13.029130910322873,
"learning_rate": 2.8546044302291963e-07,
"loss": 0.6718389391899109,
"step": 5588
},
{
"epoch": 2.709161415414445,
"grad_norm": 8.399692433008893,
"learning_rate": 2.845219482587736e-07,
"loss": 0.7526370286941528,
"step": 5589
},
{
"epoch": 2.7096461463887542,
"grad_norm": 9.558422669121617,
"learning_rate": 2.8358495357687366e-07,
"loss": 0.5014089941978455,
"step": 5590
},
{
"epoch": 2.7101308773630635,
"grad_norm": 16.459560918304756,
"learning_rate": 2.826494592752965e-07,
"loss": 0.5714015364646912,
"step": 5591
},
{
"epoch": 2.7106156083373727,
"grad_norm": 7.994305067123236,
"learning_rate": 2.817154656516402e-07,
"loss": 0.30691003799438477,
"step": 5592
},
{
"epoch": 2.711100339311682,
"grad_norm": 11.538562482071526,
"learning_rate": 2.807829730030254e-07,
"loss": 0.6549206972122192,
"step": 5593
},
{
"epoch": 2.711585070285991,
"grad_norm": 9.649547218563898,
"learning_rate": 2.7985198162609694e-07,
"loss": 0.5677828192710876,
"step": 5594
},
{
"epoch": 2.7120698012603004,
"grad_norm": 17.515719108087076,
"learning_rate": 2.78922491817018e-07,
"loss": 0.4994135797023773,
"step": 5595
},
{
"epoch": 2.7125545322346096,
"grad_norm": 21.81023748911209,
"learning_rate": 2.7799450387147873e-07,
"loss": 2.7684006690979004,
"step": 5596
},
{
"epoch": 2.713039263208919,
"grad_norm": 8.157315967682912,
"learning_rate": 2.770680180846891e-07,
"loss": 0.7893019914627075,
"step": 5597
},
{
"epoch": 2.7135239941832285,
"grad_norm": 17.162624189631398,
"learning_rate": 2.761430347513816e-07,
"loss": 0.5613259673118591,
"step": 5598
},
{
"epoch": 2.7140087251575373,
"grad_norm": 12.410396374276935,
"learning_rate": 2.752195541658104e-07,
"loss": 0.32242023944854736,
"step": 5599
},
{
"epoch": 2.714493456131847,
"grad_norm": 7.3203069709978905,
"learning_rate": 2.7429757662175316e-07,
"loss": 0.528655469417572,
"step": 5600
},
{
"epoch": 2.7149781871061562,
"grad_norm": 12.538432678609972,
"learning_rate": 2.7337710241250817e-07,
"loss": 0.4983547329902649,
"step": 5601
},
{
"epoch": 2.7154629180804655,
"grad_norm": 9.35395710325261,
"learning_rate": 2.724581318308944e-07,
"loss": 0.3835195302963257,
"step": 5602
},
{
"epoch": 2.7159476490547747,
"grad_norm": 9.509911243509192,
"learning_rate": 2.7154066516925427e-07,
"loss": 0.5719138383865356,
"step": 5603
},
{
"epoch": 2.716432380029084,
"grad_norm": 13.631044141164647,
"learning_rate": 2.7062470271945087e-07,
"loss": 0.47216469049453735,
"step": 5604
},
{
"epoch": 2.716917111003393,
"grad_norm": 9.559227202154865,
"learning_rate": 2.6971024477287e-07,
"loss": 0.2492595613002777,
"step": 5605
},
{
"epoch": 2.7174018419777024,
"grad_norm": 10.321297503992222,
"learning_rate": 2.687972916204168e-07,
"loss": 0.8298410177230835,
"step": 5606
},
{
"epoch": 2.7178865729520116,
"grad_norm": 10.740113707639157,
"learning_rate": 2.6788584355252e-07,
"loss": 0.7026622295379639,
"step": 5607
},
{
"epoch": 2.718371303926321,
"grad_norm": 11.168731357136325,
"learning_rate": 2.669759008591277e-07,
"loss": 0.45447608828544617,
"step": 5608
},
{
"epoch": 2.71885603490063,
"grad_norm": 9.551401979247311,
"learning_rate": 2.660674638297095e-07,
"loss": 0.4995330572128296,
"step": 5609
},
{
"epoch": 2.7193407658749393,
"grad_norm": 11.096560843287397,
"learning_rate": 2.651605327532569e-07,
"loss": 0.48031923174858093,
"step": 5610
},
{
"epoch": 2.7198254968492486,
"grad_norm": 11.627047760321576,
"learning_rate": 2.6425510791828144e-07,
"loss": 0.32477569580078125,
"step": 5611
},
{
"epoch": 2.720310227823558,
"grad_norm": 10.675222892277848,
"learning_rate": 2.63351189612816e-07,
"loss": 0.8259336352348328,
"step": 5612
},
{
"epoch": 2.720794958797867,
"grad_norm": 23.076573147964353,
"learning_rate": 2.624487781244128e-07,
"loss": 0.8140875101089478,
"step": 5613
},
{
"epoch": 2.7212796897721763,
"grad_norm": 11.997097008543143,
"learning_rate": 2.615478737401478e-07,
"loss": 0.5096031427383423,
"step": 5614
},
{
"epoch": 2.721764420746486,
"grad_norm": 8.670151424285581,
"learning_rate": 2.60648476746615e-07,
"loss": 0.3211718201637268,
"step": 5615
},
{
"epoch": 2.7222491517207947,
"grad_norm": 7.941214226705641,
"learning_rate": 2.5975058742993e-07,
"loss": 0.407539963722229,
"step": 5616
},
{
"epoch": 2.7227338826951044,
"grad_norm": 10.222372102741174,
"learning_rate": 2.588542060757265e-07,
"loss": 0.3813439607620239,
"step": 5617
},
{
"epoch": 2.723218613669413,
"grad_norm": 12.315481854722638,
"learning_rate": 2.579593329691604e-07,
"loss": 0.4033142626285553,
"step": 5618
},
{
"epoch": 2.723703344643723,
"grad_norm": 16.579418389994146,
"learning_rate": 2.5706596839490947e-07,
"loss": 0.5321418046951294,
"step": 5619
},
{
"epoch": 2.724188075618032,
"grad_norm": 15.969161802277394,
"learning_rate": 2.561741126371692e-07,
"loss": 0.3321477770805359,
"step": 5620
},
{
"epoch": 2.7246728065923413,
"grad_norm": 17.655741893338625,
"learning_rate": 2.5528376597965474e-07,
"loss": 0.34378373622894287,
"step": 5621
},
{
"epoch": 2.7251575375666506,
"grad_norm": 14.325291719788254,
"learning_rate": 2.54394928705603e-07,
"loss": 0.3548823595046997,
"step": 5622
},
{
"epoch": 2.72564226854096,
"grad_norm": 9.873163449643235,
"learning_rate": 2.5350760109777e-07,
"loss": 0.3364192247390747,
"step": 5623
},
{
"epoch": 2.726126999515269,
"grad_norm": 12.609430236440998,
"learning_rate": 2.5262178343843035e-07,
"loss": 0.844537615776062,
"step": 5624
},
{
"epoch": 2.7266117304895783,
"grad_norm": 7.796711556693153,
"learning_rate": 2.5173747600937994e-07,
"loss": 0.42560234665870667,
"step": 5625
},
{
"epoch": 2.7270964614638875,
"grad_norm": 8.44462971245607,
"learning_rate": 2.508546790919336e-07,
"loss": 0.7080024480819702,
"step": 5626
},
{
"epoch": 2.7275811924381967,
"grad_norm": 18.905794874999927,
"learning_rate": 2.4997339296692456e-07,
"loss": 1.8499634265899658,
"step": 5627
},
{
"epoch": 2.728065923412506,
"grad_norm": 15.102088782022664,
"learning_rate": 2.4909361791470856e-07,
"loss": 0.6276177167892456,
"step": 5628
},
{
"epoch": 2.728550654386815,
"grad_norm": 11.651408996292352,
"learning_rate": 2.482153542151572e-07,
"loss": 0.6058307886123657,
"step": 5629
},
{
"epoch": 2.7290353853611244,
"grad_norm": 9.153967395586754,
"learning_rate": 2.4733860214766315e-07,
"loss": 0.5776544809341431,
"step": 5630
},
{
"epoch": 2.7295201163354337,
"grad_norm": 10.04298776698902,
"learning_rate": 2.464633619911377e-07,
"loss": 0.3364386558532715,
"step": 5631
},
{
"epoch": 2.7300048473097434,
"grad_norm": 13.897803897953109,
"learning_rate": 2.45589634024011e-07,
"loss": 0.5694541931152344,
"step": 5632
},
{
"epoch": 2.730489578284052,
"grad_norm": 13.830249055327826,
"learning_rate": 2.447174185242324e-07,
"loss": 0.6110515594482422,
"step": 5633
},
{
"epoch": 2.730974309258362,
"grad_norm": 8.246928501538594,
"learning_rate": 2.4384671576926976e-07,
"loss": 0.3169976770877838,
"step": 5634
},
{
"epoch": 2.7314590402326706,
"grad_norm": 12.934642259929504,
"learning_rate": 2.429775260361106e-07,
"loss": 0.7644168734550476,
"step": 5635
},
{
"epoch": 2.7319437712069803,
"grad_norm": 15.276364978153513,
"learning_rate": 2.4210984960126027e-07,
"loss": 0.6693063974380493,
"step": 5636
},
{
"epoch": 2.7324285021812895,
"grad_norm": 15.806824994625012,
"learning_rate": 2.412436867407425e-07,
"loss": 0.32637467980384827,
"step": 5637
},
{
"epoch": 2.7329132331555988,
"grad_norm": 10.217078839967732,
"learning_rate": 2.403790377301007e-07,
"loss": 0.33752429485321045,
"step": 5638
},
{
"epoch": 2.733397964129908,
"grad_norm": 8.194445818065764,
"learning_rate": 2.3951590284439605e-07,
"loss": 0.21712520718574524,
"step": 5639
},
{
"epoch": 2.733882695104217,
"grad_norm": 10.783176753186543,
"learning_rate": 2.3865428235820775e-07,
"loss": 0.21599963307380676,
"step": 5640
},
{
"epoch": 2.7343674260785265,
"grad_norm": 26.963316286999515,
"learning_rate": 2.3779417654563207e-07,
"loss": 1.1763331890106201,
"step": 5641
},
{
"epoch": 2.7348521570528357,
"grad_norm": 8.921799287195583,
"learning_rate": 2.3693558568028686e-07,
"loss": 0.47406673431396484,
"step": 5642
},
{
"epoch": 2.735336888027145,
"grad_norm": 10.997434533407414,
"learning_rate": 2.360785100353058e-07,
"loss": 0.5608886480331421,
"step": 5643
},
{
"epoch": 2.735821619001454,
"grad_norm": 8.628676963307084,
"learning_rate": 2.352229498833397e-07,
"loss": 0.7346041798591614,
"step": 5644
},
{
"epoch": 2.7363063499757634,
"grad_norm": 12.30196433094394,
"learning_rate": 2.3436890549655922e-07,
"loss": 0.7719544172286987,
"step": 5645
},
{
"epoch": 2.7367910809500726,
"grad_norm": 10.94148716802442,
"learning_rate": 2.3351637714665142e-07,
"loss": 0.7077443599700928,
"step": 5646
},
{
"epoch": 2.737275811924382,
"grad_norm": 12.336844961066184,
"learning_rate": 2.3266536510482162e-07,
"loss": 0.7886053323745728,
"step": 5647
},
{
"epoch": 2.737760542898691,
"grad_norm": 13.072752252549531,
"learning_rate": 2.3181586964179325e-07,
"loss": 0.9396924376487732,
"step": 5648
},
{
"epoch": 2.7382452738730003,
"grad_norm": 19.098240324819763,
"learning_rate": 2.309678910278068e-07,
"loss": 0.6058845520019531,
"step": 5649
},
{
"epoch": 2.7387300048473096,
"grad_norm": 11.439760197325961,
"learning_rate": 2.301214295326193e-07,
"loss": 0.42299556732177734,
"step": 5650
},
{
"epoch": 2.7392147358216192,
"grad_norm": 9.87543463856578,
"learning_rate": 2.292764854255064e-07,
"loss": 0.26356250047683716,
"step": 5651
},
{
"epoch": 2.739699466795928,
"grad_norm": 12.766187526454539,
"learning_rate": 2.2843305897526147e-07,
"loss": 0.8022994995117188,
"step": 5652
},
{
"epoch": 2.7401841977702377,
"grad_norm": 8.189260304137306,
"learning_rate": 2.2759115045019375e-07,
"loss": 1.331093192100525,
"step": 5653
},
{
"epoch": 2.740668928744547,
"grad_norm": 9.34030641563664,
"learning_rate": 2.2675076011813068e-07,
"loss": 0.35420358180999756,
"step": 5654
},
{
"epoch": 2.741153659718856,
"grad_norm": 6.901842543754542,
"learning_rate": 2.2591188824641508e-07,
"loss": 0.10940980911254883,
"step": 5655
},
{
"epoch": 2.7416383906931654,
"grad_norm": 16.990608635883095,
"learning_rate": 2.2507453510190902e-07,
"loss": 0.3639891445636749,
"step": 5656
},
{
"epoch": 2.7421231216674746,
"grad_norm": 17.700592350389943,
"learning_rate": 2.2423870095098943e-07,
"loss": 0.7494065761566162,
"step": 5657
},
{
"epoch": 2.742607852641784,
"grad_norm": 10.036244988832166,
"learning_rate": 2.2340438605955195e-07,
"loss": 0.6344140768051147,
"step": 5658
},
{
"epoch": 2.743092583616093,
"grad_norm": 10.966594630447837,
"learning_rate": 2.225715906930065e-07,
"loss": 0.3009337782859802,
"step": 5659
},
{
"epoch": 2.7435773145904023,
"grad_norm": 11.000695161220534,
"learning_rate": 2.217403151162817e-07,
"loss": 0.6476365327835083,
"step": 5660
},
{
"epoch": 2.7440620455647116,
"grad_norm": 17.51234420392653,
"learning_rate": 2.209105595938227e-07,
"loss": 0.7910495400428772,
"step": 5661
},
{
"epoch": 2.744546776539021,
"grad_norm": 8.553275636110584,
"learning_rate": 2.2008232438958887e-07,
"loss": 0.35017329454421997,
"step": 5662
},
{
"epoch": 2.74503150751333,
"grad_norm": 14.214970604490713,
"learning_rate": 2.1925560976705784e-07,
"loss": 0.5244268774986267,
"step": 5663
},
{
"epoch": 2.7455162384876393,
"grad_norm": 13.239007372896731,
"learning_rate": 2.1843041598922365e-07,
"loss": 0.6301036477088928,
"step": 5664
},
{
"epoch": 2.7460009694619485,
"grad_norm": 10.062347676255957,
"learning_rate": 2.176067433185952e-07,
"loss": 0.32126230001449585,
"step": 5665
},
{
"epoch": 2.7464857004362577,
"grad_norm": 9.710708602506154,
"learning_rate": 2.1678459201719902e-07,
"loss": 0.4359632134437561,
"step": 5666
},
{
"epoch": 2.746970431410567,
"grad_norm": 15.138709016420416,
"learning_rate": 2.1596396234657646e-07,
"loss": 0.6925677061080933,
"step": 5667
},
{
"epoch": 2.7474551623848766,
"grad_norm": 11.450176790141278,
"learning_rate": 2.1514485456778533e-07,
"loss": 0.7011528015136719,
"step": 5668
},
{
"epoch": 2.7479398933591854,
"grad_norm": 14.131815848977785,
"learning_rate": 2.1432726894139888e-07,
"loss": 0.22416150569915771,
"step": 5669
},
{
"epoch": 2.748424624333495,
"grad_norm": 8.652116334908806,
"learning_rate": 2.1351120572750737e-07,
"loss": 0.8561699390411377,
"step": 5670
},
{
"epoch": 2.748909355307804,
"grad_norm": 8.548276994584132,
"learning_rate": 2.1269666518571485e-07,
"loss": 0.3938404321670532,
"step": 5671
},
{
"epoch": 2.7493940862821136,
"grad_norm": 11.043207327004556,
"learning_rate": 2.1188364757514234e-07,
"loss": 0.40988442301750183,
"step": 5672
},
{
"epoch": 2.749878817256423,
"grad_norm": 11.326357337397628,
"learning_rate": 2.110721531544263e-07,
"loss": 0.40803998708724976,
"step": 5673
},
{
"epoch": 2.750363548230732,
"grad_norm": 7.164788033841951,
"learning_rate": 2.10262182181718e-07,
"loss": 0.3724493980407715,
"step": 5674
},
{
"epoch": 2.7508482792050413,
"grad_norm": 9.90640523510801,
"learning_rate": 2.0945373491468468e-07,
"loss": 0.8222996592521667,
"step": 5675
},
{
"epoch": 2.7513330101793505,
"grad_norm": 9.575122667454542,
"learning_rate": 2.0864681161050948e-07,
"loss": 0.44317805767059326,
"step": 5676
},
{
"epoch": 2.7518177411536597,
"grad_norm": 11.367267797346615,
"learning_rate": 2.078414125258882e-07,
"loss": 0.33283576369285583,
"step": 5677
},
{
"epoch": 2.752302472127969,
"grad_norm": 10.013812774534244,
"learning_rate": 2.0703753791703418e-07,
"loss": 0.4969249963760376,
"step": 5678
},
{
"epoch": 2.752787203102278,
"grad_norm": 8.170908610899042,
"learning_rate": 2.06235188039674e-07,
"loss": 0.240337073802948,
"step": 5679
},
{
"epoch": 2.7532719340765874,
"grad_norm": 13.783392203240307,
"learning_rate": 2.0543436314905242e-07,
"loss": 0.42299845814704895,
"step": 5680
},
{
"epoch": 2.7537566650508967,
"grad_norm": 9.736553216632824,
"learning_rate": 2.0463506349992556e-07,
"loss": 0.23305197060108185,
"step": 5681
},
{
"epoch": 2.754241396025206,
"grad_norm": 9.669595632959034,
"learning_rate": 2.0383728934656622e-07,
"loss": 0.4878266453742981,
"step": 5682
},
{
"epoch": 2.754726126999515,
"grad_norm": 13.259874999742372,
"learning_rate": 2.0304104094276078e-07,
"loss": 0.546998143196106,
"step": 5683
},
{
"epoch": 2.7552108579738244,
"grad_norm": 13.246497786202449,
"learning_rate": 2.022463185418111e-07,
"loss": 0.8995538949966431,
"step": 5684
},
{
"epoch": 2.755695588948134,
"grad_norm": 8.828630885709096,
"learning_rate": 2.0145312239653325e-07,
"loss": 0.520280659198761,
"step": 5685
},
{
"epoch": 2.756180319922443,
"grad_norm": 10.270060442135838,
"learning_rate": 2.006614527592582e-07,
"loss": 0.5037822723388672,
"step": 5686
},
{
"epoch": 2.7566650508967525,
"grad_norm": 10.446613694401217,
"learning_rate": 1.9987130988183056e-07,
"loss": 0.47408267855644226,
"step": 5687
},
{
"epoch": 2.7571497818710613,
"grad_norm": 8.745655917671908,
"learning_rate": 1.9908269401560876e-07,
"loss": 0.22923362255096436,
"step": 5688
},
{
"epoch": 2.757634512845371,
"grad_norm": 9.788877697444505,
"learning_rate": 1.9829560541146819e-07,
"loss": 0.39154309034347534,
"step": 5689
},
{
"epoch": 2.75811924381968,
"grad_norm": 12.722597493804448,
"learning_rate": 1.975100443197958e-07,
"loss": 0.45256417989730835,
"step": 5690
},
{
"epoch": 2.7586039747939894,
"grad_norm": 10.349413433181335,
"learning_rate": 1.9672601099049282e-07,
"loss": 0.351608544588089,
"step": 5691
},
{
"epoch": 2.7590887057682987,
"grad_norm": 14.612385586706733,
"learning_rate": 1.9594350567297583e-07,
"loss": 0.2978692948818207,
"step": 5692
},
{
"epoch": 2.759573436742608,
"grad_norm": 9.330161036879263,
"learning_rate": 1.9516252861617348e-07,
"loss": 0.2798566222190857,
"step": 5693
},
{
"epoch": 2.760058167716917,
"grad_norm": 13.845530949768133,
"learning_rate": 1.9438308006852978e-07,
"loss": 0.3531230092048645,
"step": 5694
},
{
"epoch": 2.7605428986912264,
"grad_norm": 15.562415472840174,
"learning_rate": 1.9360516027800258e-07,
"loss": 0.4404766857624054,
"step": 5695
},
{
"epoch": 2.7610276296655356,
"grad_norm": 10.570041795248448,
"learning_rate": 1.9282876949206163e-07,
"loss": 0.28965723514556885,
"step": 5696
},
{
"epoch": 2.761512360639845,
"grad_norm": 8.711085802991006,
"learning_rate": 1.9205390795769218e-07,
"loss": 0.5658340454101562,
"step": 5697
},
{
"epoch": 2.761997091614154,
"grad_norm": 9.099364244819943,
"learning_rate": 1.9128057592139203e-07,
"loss": 0.5508014559745789,
"step": 5698
},
{
"epoch": 2.7624818225884633,
"grad_norm": 12.227945818341333,
"learning_rate": 1.905087736291733e-07,
"loss": 0.37196803092956543,
"step": 5699
},
{
"epoch": 2.7629665535627725,
"grad_norm": 14.799247036472012,
"learning_rate": 1.8973850132655957e-07,
"loss": 1.5219953060150146,
"step": 5700
},
{
"epoch": 2.7634512845370818,
"grad_norm": 19.090928585068472,
"learning_rate": 1.8896975925858984e-07,
"loss": 1.020242691040039,
"step": 5701
},
{
"epoch": 2.7639360155113915,
"grad_norm": 14.686847187598024,
"learning_rate": 1.882025476698146e-07,
"loss": 0.3423435688018799,
"step": 5702
},
{
"epoch": 2.7644207464857002,
"grad_norm": 13.047447997760369,
"learning_rate": 1.8743686680429919e-07,
"loss": 0.440598726272583,
"step": 5703
},
{
"epoch": 2.76490547746001,
"grad_norm": 10.20382924711756,
"learning_rate": 1.8667271690562094e-07,
"loss": 0.47679632902145386,
"step": 5704
},
{
"epoch": 2.7653902084343187,
"grad_norm": 14.648183684277852,
"learning_rate": 1.8591009821687044e-07,
"loss": 0.3477858304977417,
"step": 5705
},
{
"epoch": 2.7658749394086284,
"grad_norm": 13.768428951368316,
"learning_rate": 1.851490109806503e-07,
"loss": 0.7247068881988525,
"step": 5706
},
{
"epoch": 2.7663596703829376,
"grad_norm": 10.134843229452217,
"learning_rate": 1.8438945543907738e-07,
"loss": 0.4723333716392517,
"step": 5707
},
{
"epoch": 2.766844401357247,
"grad_norm": 15.422241237636714,
"learning_rate": 1.8363143183378007e-07,
"loss": 0.5128608345985413,
"step": 5708
},
{
"epoch": 2.767329132331556,
"grad_norm": 11.617106322167853,
"learning_rate": 1.8287494040590049e-07,
"loss": 0.3357827067375183,
"step": 5709
},
{
"epoch": 2.7678138633058653,
"grad_norm": 11.70957346304107,
"learning_rate": 1.8211998139609222e-07,
"loss": 0.37261509895324707,
"step": 5710
},
{
"epoch": 2.7682985942801746,
"grad_norm": 11.163524671393763,
"learning_rate": 1.8136655504452205e-07,
"loss": 0.6593072414398193,
"step": 5711
},
{
"epoch": 2.768783325254484,
"grad_norm": 5.799433049387915,
"learning_rate": 1.8061466159086882e-07,
"loss": 0.17010098695755005,
"step": 5712
},
{
"epoch": 2.769268056228793,
"grad_norm": 13.38647943677115,
"learning_rate": 1.7986430127432507e-07,
"loss": 0.489210307598114,
"step": 5713
},
{
"epoch": 2.7697527872031023,
"grad_norm": 13.20895439448917,
"learning_rate": 1.7911547433359434e-07,
"loss": 0.5832932591438293,
"step": 5714
},
{
"epoch": 2.7702375181774115,
"grad_norm": 27.14621657757214,
"learning_rate": 1.78368181006891e-07,
"loss": 0.4723288416862488,
"step": 5715
},
{
"epoch": 2.7707222491517207,
"grad_norm": 19.588864704667458,
"learning_rate": 1.7762242153194386e-07,
"loss": 0.4344691336154938,
"step": 5716
},
{
"epoch": 2.77120698012603,
"grad_norm": 10.066943955888016,
"learning_rate": 1.7687819614599423e-07,
"loss": 0.5882493853569031,
"step": 5717
},
{
"epoch": 2.771691711100339,
"grad_norm": 12.963937300197292,
"learning_rate": 1.761355050857927e-07,
"loss": 0.2972922921180725,
"step": 5718
},
{
"epoch": 2.7721764420746484,
"grad_norm": 10.482323963233576,
"learning_rate": 1.7539434858760418e-07,
"loss": 0.6031232476234436,
"step": 5719
},
{
"epoch": 2.7726611730489577,
"grad_norm": 8.279718872433639,
"learning_rate": 1.7465472688720397e-07,
"loss": 0.6772762537002563,
"step": 5720
},
{
"epoch": 2.7731459040232673,
"grad_norm": 15.884504713708537,
"learning_rate": 1.7391664021987999e-07,
"loss": 0.5062968134880066,
"step": 5721
},
{
"epoch": 2.773630634997576,
"grad_norm": 11.093363406956595,
"learning_rate": 1.7318008882043158e-07,
"loss": 0.6407750844955444,
"step": 5722
},
{
"epoch": 2.774115365971886,
"grad_norm": 19.280671169686432,
"learning_rate": 1.7244507292316914e-07,
"loss": 0.4370200037956238,
"step": 5723
},
{
"epoch": 2.774600096946195,
"grad_norm": 11.725472645459229,
"learning_rate": 1.7171159276191508e-07,
"loss": 0.790355920791626,
"step": 5724
},
{
"epoch": 2.7750848279205043,
"grad_norm": 7.637695536683325,
"learning_rate": 1.7097964857000326e-07,
"loss": 0.34491318464279175,
"step": 5725
},
{
"epoch": 2.7755695588948135,
"grad_norm": 14.868087634715566,
"learning_rate": 1.7024924058027858e-07,
"loss": 0.3479264974594116,
"step": 5726
},
{
"epoch": 2.7760542898691227,
"grad_norm": 13.487232627444467,
"learning_rate": 1.6952036902509906e-07,
"loss": 0.6930355429649353,
"step": 5727
},
{
"epoch": 2.776539020843432,
"grad_norm": 13.20700973667158,
"learning_rate": 1.6879303413633087e-07,
"loss": 0.2652239501476288,
"step": 5728
},
{
"epoch": 2.777023751817741,
"grad_norm": 14.377705954737655,
"learning_rate": 1.6806723614535448e-07,
"loss": 0.44364047050476074,
"step": 5729
},
{
"epoch": 2.7775084827920504,
"grad_norm": 8.450834364579876,
"learning_rate": 1.6734297528305687e-07,
"loss": 0.303627610206604,
"step": 5730
},
{
"epoch": 2.7779932137663597,
"grad_norm": 12.625730104349259,
"learning_rate": 1.6662025177984264e-07,
"loss": 0.34260088205337524,
"step": 5731
},
{
"epoch": 2.778477944740669,
"grad_norm": 10.071110155961376,
"learning_rate": 1.658990658656212e-07,
"loss": 0.3665241301059723,
"step": 5732
},
{
"epoch": 2.778962675714978,
"grad_norm": 7.455661798570275,
"learning_rate": 1.6517941776981682e-07,
"loss": 0.4548915922641754,
"step": 5733
},
{
"epoch": 2.7794474066892874,
"grad_norm": 13.260092765834846,
"learning_rate": 1.644613077213625e-07,
"loss": 0.577038049697876,
"step": 5734
},
{
"epoch": 2.7799321376635966,
"grad_norm": 17.251818675503184,
"learning_rate": 1.6374473594870155e-07,
"loss": 0.6231193542480469,
"step": 5735
},
{
"epoch": 2.780416868637906,
"grad_norm": 9.028832405891873,
"learning_rate": 1.6302970267979167e-07,
"loss": 0.17643167078495026,
"step": 5736
},
{
"epoch": 2.780901599612215,
"grad_norm": 13.250189558686197,
"learning_rate": 1.6231620814209591e-07,
"loss": 0.15616117417812347,
"step": 5737
},
{
"epoch": 2.7813863305865247,
"grad_norm": 13.933022381333647,
"learning_rate": 1.6160425256259103e-07,
"loss": 0.5181273221969604,
"step": 5738
},
{
"epoch": 2.7818710615608335,
"grad_norm": 10.978406447038063,
"learning_rate": 1.6089383616776366e-07,
"loss": 0.4918001890182495,
"step": 5739
},
{
"epoch": 2.782355792535143,
"grad_norm": 9.214305302389851,
"learning_rate": 1.6018495918360965e-07,
"loss": 0.3786023259162903,
"step": 5740
},
{
"epoch": 2.782840523509452,
"grad_norm": 13.482395576851761,
"learning_rate": 1.5947762183563754e-07,
"loss": 1.3102922439575195,
"step": 5741
},
{
"epoch": 2.7833252544837617,
"grad_norm": 10.39330586452816,
"learning_rate": 1.587718243488645e-07,
"loss": 0.6737884879112244,
"step": 5742
},
{
"epoch": 2.783809985458071,
"grad_norm": 7.91612491287779,
"learning_rate": 1.580675669478171e-07,
"loss": 0.5122435688972473,
"step": 5743
},
{
"epoch": 2.78429471643238,
"grad_norm": 15.487342348660604,
"learning_rate": 1.5736484985653334e-07,
"loss": 0.916961133480072,
"step": 5744
},
{
"epoch": 2.7847794474066894,
"grad_norm": 14.200542838398226,
"learning_rate": 1.5666367329856046e-07,
"loss": 0.5634340643882751,
"step": 5745
},
{
"epoch": 2.7852641783809986,
"grad_norm": 7.5679717203863355,
"learning_rate": 1.5596403749695622e-07,
"loss": 0.1977248191833496,
"step": 5746
},
{
"epoch": 2.785748909355308,
"grad_norm": 15.707344655057977,
"learning_rate": 1.5526594267428808e-07,
"loss": 0.720991849899292,
"step": 5747
},
{
"epoch": 2.786233640329617,
"grad_norm": 10.501751188430811,
"learning_rate": 1.5456938905263285e-07,
"loss": 0.3830573856830597,
"step": 5748
},
{
"epoch": 2.7867183713039263,
"grad_norm": 15.173095360434727,
"learning_rate": 1.5387437685357665e-07,
"loss": 0.5682836174964905,
"step": 5749
},
{
"epoch": 2.7872031022782355,
"grad_norm": 13.615285638915873,
"learning_rate": 1.5318090629821757e-07,
"loss": 0.44040194153785706,
"step": 5750
},
{
"epoch": 2.7876878332525448,
"grad_norm": 9.922658416435533,
"learning_rate": 1.5248897760716087e-07,
"loss": 0.31792810559272766,
"step": 5751
},
{
"epoch": 2.788172564226854,
"grad_norm": 13.697335895724354,
"learning_rate": 1.5179859100052153e-07,
"loss": 0.5327882766723633,
"step": 5752
},
{
"epoch": 2.7886572952011632,
"grad_norm": 15.52058277142957,
"learning_rate": 1.5110974669792499e-07,
"loss": 0.8105844259262085,
"step": 5753
},
{
"epoch": 2.7891420261754725,
"grad_norm": 10.584222329049853,
"learning_rate": 1.504224449185049e-07,
"loss": 0.4364907741546631,
"step": 5754
},
{
"epoch": 2.789626757149782,
"grad_norm": 9.672322864369345,
"learning_rate": 1.4973668588090572e-07,
"loss": 0.4114936590194702,
"step": 5755
},
{
"epoch": 2.790111488124091,
"grad_norm": 14.11841698799951,
"learning_rate": 1.4905246980328082e-07,
"loss": 0.2892499566078186,
"step": 5756
},
{
"epoch": 2.7905962190984006,
"grad_norm": 8.691256373193726,
"learning_rate": 1.48369796903291e-07,
"loss": 0.763958752155304,
"step": 5757
},
{
"epoch": 2.7910809500727094,
"grad_norm": 8.595078663875666,
"learning_rate": 1.476886673981076e-07,
"loss": 0.2811703085899353,
"step": 5758
},
{
"epoch": 2.791565681047019,
"grad_norm": 17.067517562331748,
"learning_rate": 1.4700908150441118e-07,
"loss": 0.6154250502586365,
"step": 5759
},
{
"epoch": 2.7920504120213283,
"grad_norm": 14.662759710397372,
"learning_rate": 1.4633103943839045e-07,
"loss": 0.4562567472457886,
"step": 5760
},
{
"epoch": 2.7925351429956375,
"grad_norm": 8.7935431984625,
"learning_rate": 1.4565454141574343e-07,
"loss": 0.3395078182220459,
"step": 5761
},
{
"epoch": 2.7930198739699468,
"grad_norm": 22.219875266063653,
"learning_rate": 1.4497958765167684e-07,
"loss": 0.4027353823184967,
"step": 5762
},
{
"epoch": 2.793504604944256,
"grad_norm": 11.256456167915124,
"learning_rate": 1.443061783609051e-07,
"loss": 0.7824878096580505,
"step": 5763
},
{
"epoch": 2.7939893359185652,
"grad_norm": 9.808424238084976,
"learning_rate": 1.4363431375765458e-07,
"loss": 0.5260804891586304,
"step": 5764
},
{
"epoch": 2.7944740668928745,
"grad_norm": 7.624113670191106,
"learning_rate": 1.429639940556571e-07,
"loss": 0.32197272777557373,
"step": 5765
},
{
"epoch": 2.7949587978671837,
"grad_norm": 10.265226602374607,
"learning_rate": 1.4229521946815317e-07,
"loss": 0.3607000410556793,
"step": 5766
},
{
"epoch": 2.795443528841493,
"grad_norm": 9.042971630326775,
"learning_rate": 1.416279902078943e-07,
"loss": 0.2601453363895416,
"step": 5767
},
{
"epoch": 2.795928259815802,
"grad_norm": 17.997582691792797,
"learning_rate": 1.4096230648713626e-07,
"loss": 0.49258917570114136,
"step": 5768
},
{
"epoch": 2.7964129907901114,
"grad_norm": 9.420505704820798,
"learning_rate": 1.4029816851764743e-07,
"loss": 0.7508581876754761,
"step": 5769
},
{
"epoch": 2.7968977217644206,
"grad_norm": 11.339949743222101,
"learning_rate": 1.396355765107016e-07,
"loss": 0.41167014837265015,
"step": 5770
},
{
"epoch": 2.79738245273873,
"grad_norm": 17.59063726656045,
"learning_rate": 1.3897453067708233e-07,
"loss": 0.831938624382019,
"step": 5771
},
{
"epoch": 2.797867183713039,
"grad_norm": 10.645849256978817,
"learning_rate": 1.3831503122708033e-07,
"loss": 0.4881696105003357,
"step": 5772
},
{
"epoch": 2.7983519146873483,
"grad_norm": 12.774335814691883,
"learning_rate": 1.376570783704956e-07,
"loss": 0.4156322479248047,
"step": 5773
},
{
"epoch": 2.798836645661658,
"grad_norm": 10.845910727407105,
"learning_rate": 1.370006723166345e-07,
"loss": 0.35132622718811035,
"step": 5774
},
{
"epoch": 2.799321376635967,
"grad_norm": 12.583588972677381,
"learning_rate": 1.363458132743123e-07,
"loss": 0.39444082975387573,
"step": 5775
},
{
"epoch": 2.7998061076102765,
"grad_norm": 13.424129681755657,
"learning_rate": 1.3569250145185175e-07,
"loss": 0.2593204975128174,
"step": 5776
},
{
"epoch": 2.8002908385845857,
"grad_norm": 8.568681352945285,
"learning_rate": 1.3504073705708388e-07,
"loss": 0.4759184420108795,
"step": 5777
},
{
"epoch": 2.800775569558895,
"grad_norm": 15.854933532117714,
"learning_rate": 1.3439052029734722e-07,
"loss": 0.8784979581832886,
"step": 5778
},
{
"epoch": 2.801260300533204,
"grad_norm": 12.707510504536126,
"learning_rate": 1.3374185137948804e-07,
"loss": 0.48477786779403687,
"step": 5779
},
{
"epoch": 2.8017450315075134,
"grad_norm": 11.277358921864607,
"learning_rate": 1.3309473050986067e-07,
"loss": 0.4044339656829834,
"step": 5780
},
{
"epoch": 2.8022297624818227,
"grad_norm": 11.188377402631076,
"learning_rate": 1.324491578943249e-07,
"loss": 0.5381585359573364,
"step": 5781
},
{
"epoch": 2.802714493456132,
"grad_norm": 13.095423021533733,
"learning_rate": 1.3180513373825088e-07,
"loss": 0.36557871103286743,
"step": 5782
},
{
"epoch": 2.803199224430441,
"grad_norm": 11.327932049964748,
"learning_rate": 1.311626582465142e-07,
"loss": 0.6679927110671997,
"step": 5783
},
{
"epoch": 2.8036839554047503,
"grad_norm": 13.34492657601019,
"learning_rate": 1.3052173162349858e-07,
"loss": 0.45547381043434143,
"step": 5784
},
{
"epoch": 2.8041686863790596,
"grad_norm": 15.292243238315123,
"learning_rate": 1.298823540730948e-07,
"loss": 0.45665717124938965,
"step": 5785
},
{
"epoch": 2.804653417353369,
"grad_norm": 11.270303201866902,
"learning_rate": 1.2924452579870018e-07,
"loss": 1.7753090858459473,
"step": 5786
},
{
"epoch": 2.805138148327678,
"grad_norm": 9.278399746329434,
"learning_rate": 1.2860824700322127e-07,
"loss": 0.5383343696594238,
"step": 5787
},
{
"epoch": 2.8056228793019873,
"grad_norm": 10.845221667651431,
"learning_rate": 1.2797351788906952e-07,
"loss": 0.2867419719696045,
"step": 5788
},
{
"epoch": 2.8061076102762965,
"grad_norm": 8.00793564124142,
"learning_rate": 1.273403386581651e-07,
"loss": 0.40015560388565063,
"step": 5789
},
{
"epoch": 2.8065923412506057,
"grad_norm": 15.062185648000172,
"learning_rate": 1.2670870951193293e-07,
"loss": 0.9323871731758118,
"step": 5790
},
{
"epoch": 2.8070770722249154,
"grad_norm": 11.102691258307477,
"learning_rate": 1.2607863065130566e-07,
"loss": 1.1430847644805908,
"step": 5791
},
{
"epoch": 2.807561803199224,
"grad_norm": 12.117980847285802,
"learning_rate": 1.2545010227672516e-07,
"loss": 0.5643987655639648,
"step": 5792
},
{
"epoch": 2.808046534173534,
"grad_norm": 14.727079293171643,
"learning_rate": 1.248231245881365e-07,
"loss": 1.000656247138977,
"step": 5793
},
{
"epoch": 2.8085312651478427,
"grad_norm": 15.526978694243095,
"learning_rate": 1.2419769778499458e-07,
"loss": 0.45679643750190735,
"step": 5794
},
{
"epoch": 2.8090159961221524,
"grad_norm": 12.433938131913273,
"learning_rate": 1.2357382206625802e-07,
"loss": 0.4663653075695038,
"step": 5795
},
{
"epoch": 2.8095007270964616,
"grad_norm": 9.234395054072605,
"learning_rate": 1.2295149763039416e-07,
"loss": 0.2940272390842438,
"step": 5796
},
{
"epoch": 2.809985458070771,
"grad_norm": 15.716882375569751,
"learning_rate": 1.2233072467537575e-07,
"loss": 0.8142058849334717,
"step": 5797
},
{
"epoch": 2.81047018904508,
"grad_norm": 8.942086855494402,
"learning_rate": 1.2171150339868265e-07,
"loss": 0.34026822447776794,
"step": 5798
},
{
"epoch": 2.8109549200193893,
"grad_norm": 13.470678159052442,
"learning_rate": 1.2109383399730058e-07,
"loss": 0.276980996131897,
"step": 5799
},
{
"epoch": 2.8114396509936985,
"grad_norm": 6.710969037298714,
"learning_rate": 1.2047771666772124e-07,
"loss": 0.36903122067451477,
"step": 5800
},
{
"epoch": 2.8119243819680078,
"grad_norm": 8.915897895794759,
"learning_rate": 1.1986315160594454e-07,
"loss": 0.41700419783592224,
"step": 5801
},
{
"epoch": 2.812409112942317,
"grad_norm": 9.63607432604467,
"learning_rate": 1.1925013900747407e-07,
"loss": 0.4545201063156128,
"step": 5802
},
{
"epoch": 2.8128938439166262,
"grad_norm": 11.199465398125945,
"learning_rate": 1.1863867906732163e-07,
"loss": 0.6309000253677368,
"step": 5803
},
{
"epoch": 2.8133785748909355,
"grad_norm": 11.762043871132098,
"learning_rate": 1.1802877198000273e-07,
"loss": 0.8475041389465332,
"step": 5804
},
{
"epoch": 2.8138633058652447,
"grad_norm": 10.356021334243461,
"learning_rate": 1.1742041793954162e-07,
"loss": 1.0352853536605835,
"step": 5805
},
{
"epoch": 2.814348036839554,
"grad_norm": 11.209779095183997,
"learning_rate": 1.1681361713946627e-07,
"loss": 0.867552638053894,
"step": 5806
},
{
"epoch": 2.814832767813863,
"grad_norm": 10.081180772818641,
"learning_rate": 1.1620836977281225e-07,
"loss": 0.5955582857131958,
"step": 5807
},
{
"epoch": 2.815317498788173,
"grad_norm": 11.984842533666793,
"learning_rate": 1.1560467603211889e-07,
"loss": 0.5301733016967773,
"step": 5808
},
{
"epoch": 2.8158022297624816,
"grad_norm": 11.143743736417292,
"learning_rate": 1.1500253610943368e-07,
"loss": 0.5227071046829224,
"step": 5809
},
{
"epoch": 2.8162869607367913,
"grad_norm": 10.118885009800584,
"learning_rate": 1.1440195019630784e-07,
"loss": 0.7030061483383179,
"step": 5810
},
{
"epoch": 2.8167716917111,
"grad_norm": 10.516972580817411,
"learning_rate": 1.138029184838002e-07,
"loss": 0.30802470445632935,
"step": 5811
},
{
"epoch": 2.8172564226854098,
"grad_norm": 10.677847025200519,
"learning_rate": 1.1320544116247279e-07,
"loss": 0.550485372543335,
"step": 5812
},
{
"epoch": 2.817741153659719,
"grad_norm": 8.500106874811301,
"learning_rate": 1.126095184223952e-07,
"loss": 0.08068777620792389,
"step": 5813
},
{
"epoch": 2.8182258846340282,
"grad_norm": 9.547941474568727,
"learning_rate": 1.1201515045314026e-07,
"loss": 0.3014385402202606,
"step": 5814
},
{
"epoch": 2.8187106156083375,
"grad_norm": 13.808825832570296,
"learning_rate": 1.114223374437895e-07,
"loss": 0.6898598074913025,
"step": 5815
},
{
"epoch": 2.8191953465826467,
"grad_norm": 9.701219041526793,
"learning_rate": 1.1083107958292705e-07,
"loss": 0.5655490159988403,
"step": 5816
},
{
"epoch": 2.819680077556956,
"grad_norm": 12.904017653644681,
"learning_rate": 1.1024137705864302e-07,
"loss": 0.5612677335739136,
"step": 5817
},
{
"epoch": 2.820164808531265,
"grad_norm": 10.144383906891003,
"learning_rate": 1.096532300585329e-07,
"loss": 0.2284274697303772,
"step": 5818
},
{
"epoch": 2.8206495395055744,
"grad_norm": 12.324615650347987,
"learning_rate": 1.0906663876969759e-07,
"loss": 0.29151439666748047,
"step": 5819
},
{
"epoch": 2.8211342704798836,
"grad_norm": 7.40230156845738,
"learning_rate": 1.0848160337874225e-07,
"loss": 0.29780182242393494,
"step": 5820
},
{
"epoch": 2.821619001454193,
"grad_norm": 19.48567596902887,
"learning_rate": 1.0789812407177858e-07,
"loss": 0.7960946559906006,
"step": 5821
},
{
"epoch": 2.822103732428502,
"grad_norm": 12.192283245679379,
"learning_rate": 1.0731620103442141e-07,
"loss": 0.43098902702331543,
"step": 5822
},
{
"epoch": 2.8225884634028113,
"grad_norm": 7.850306435014977,
"learning_rate": 1.0673583445179159e-07,
"loss": 0.41072794795036316,
"step": 5823
},
{
"epoch": 2.8230731943771206,
"grad_norm": 8.775045815662946,
"learning_rate": 1.0615702450851473e-07,
"loss": 0.2862530052661896,
"step": 5824
},
{
"epoch": 2.82355792535143,
"grad_norm": 11.120485122658572,
"learning_rate": 1.0557977138872133e-07,
"loss": 0.6671080589294434,
"step": 5825
},
{
"epoch": 2.824042656325739,
"grad_norm": 13.388125484824727,
"learning_rate": 1.0500407527604673e-07,
"loss": 0.299510657787323,
"step": 5826
},
{
"epoch": 2.8245273873000487,
"grad_norm": 12.64077747045657,
"learning_rate": 1.0442993635363052e-07,
"loss": 0.4958818554878235,
"step": 5827
},
{
"epoch": 2.8250121182743575,
"grad_norm": 19.365317840277037,
"learning_rate": 1.0385735480411552e-07,
"loss": 0.49527913331985474,
"step": 5828
},
{
"epoch": 2.825496849248667,
"grad_norm": 8.956005524585162,
"learning_rate": 1.0328633080965322e-07,
"loss": 0.5318137407302856,
"step": 5829
},
{
"epoch": 2.8259815802229764,
"grad_norm": 10.51460369626135,
"learning_rate": 1.0271686455189556e-07,
"loss": 0.5073873996734619,
"step": 5830
},
{
"epoch": 2.8264663111972856,
"grad_norm": 7.922574109622564,
"learning_rate": 1.0214895621200093e-07,
"loss": 0.4513818025588989,
"step": 5831
},
{
"epoch": 2.826951042171595,
"grad_norm": 14.318769475835436,
"learning_rate": 1.0158260597063152e-07,
"loss": 0.7917163372039795,
"step": 5832
},
{
"epoch": 2.827435773145904,
"grad_norm": 11.602612454440234,
"learning_rate": 1.010178140079543e-07,
"loss": 0.6372048854827881,
"step": 5833
},
{
"epoch": 2.8279205041202133,
"grad_norm": 17.9469089206955,
"learning_rate": 1.0045458050363999e-07,
"loss": 0.38250526785850525,
"step": 5834
},
{
"epoch": 2.8284052350945226,
"grad_norm": 11.600823439837121,
"learning_rate": 9.989290563686305e-08,
"loss": 0.457716166973114,
"step": 5835
},
{
"epoch": 2.828889966068832,
"grad_norm": 10.278749829890515,
"learning_rate": 9.933278958630388e-08,
"loss": 0.5759618878364563,
"step": 5836
},
{
"epoch": 2.829374697043141,
"grad_norm": 7.983724596114936,
"learning_rate": 9.877423253014551e-08,
"loss": 0.2215237319469452,
"step": 5837
},
{
"epoch": 2.8298594280174503,
"grad_norm": 14.577489295228713,
"learning_rate": 9.821723464607524e-08,
"loss": 0.5222772359848022,
"step": 5838
},
{
"epoch": 2.8303441589917595,
"grad_norm": 10.27030753400338,
"learning_rate": 9.766179611128523e-08,
"loss": 0.47634726762771606,
"step": 5839
},
{
"epoch": 2.8308288899660687,
"grad_norm": 9.333842397291479,
"learning_rate": 9.710791710247025e-08,
"loss": 0.7052749395370483,
"step": 5840
},
{
"epoch": 2.831313620940378,
"grad_norm": 9.814796916410389,
"learning_rate": 9.65555977958299e-08,
"loss": 0.32388627529144287,
"step": 5841
},
{
"epoch": 2.831798351914687,
"grad_norm": 10.244839215022493,
"learning_rate": 9.600483836706698e-08,
"loss": 0.3620661795139313,
"step": 5842
},
{
"epoch": 2.8322830828889964,
"grad_norm": 12.898816053964197,
"learning_rate": 9.54556389913891e-08,
"loss": 1.1512136459350586,
"step": 5843
},
{
"epoch": 2.832767813863306,
"grad_norm": 17.65764557977153,
"learning_rate": 9.490799984350651e-08,
"loss": 0.3743966817855835,
"step": 5844
},
{
"epoch": 2.833252544837615,
"grad_norm": 8.863871231410366,
"learning_rate": 9.436192109763376e-08,
"loss": 0.8138227462768555,
"step": 5845
},
{
"epoch": 2.8337372758119246,
"grad_norm": 9.697111754006468,
"learning_rate": 9.381740292748853e-08,
"loss": 0.5767191052436829,
"step": 5846
},
{
"epoch": 2.8342220067862334,
"grad_norm": 13.548721084880727,
"learning_rate": 9.327444550629173e-08,
"loss": 0.5349211692810059,
"step": 5847
},
{
"epoch": 2.834706737760543,
"grad_norm": 9.568925158278994,
"learning_rate": 9.27330490067696e-08,
"loss": 0.7039353847503662,
"step": 5848
},
{
"epoch": 2.8351914687348523,
"grad_norm": 16.071773632279815,
"learning_rate": 9.219321360114997e-08,
"loss": 0.44319432973861694,
"step": 5849
},
{
"epoch": 2.8356761997091615,
"grad_norm": 11.878175604835855,
"learning_rate": 9.165493946116432e-08,
"loss": 1.0694646835327148,
"step": 5850
},
{
"epoch": 2.8361609306834707,
"grad_norm": 7.04127319630404,
"learning_rate": 9.111822675804794e-08,
"loss": 0.16005834937095642,
"step": 5851
},
{
"epoch": 2.83664566165778,
"grad_norm": 8.932828199552741,
"learning_rate": 9.058307566253865e-08,
"loss": 0.5513485670089722,
"step": 5852
},
{
"epoch": 2.837130392632089,
"grad_norm": 10.496423098603753,
"learning_rate": 9.004948634487975e-08,
"loss": 0.4208821654319763,
"step": 5853
},
{
"epoch": 2.8376151236063984,
"grad_norm": 7.23481014531622,
"learning_rate": 8.951745897481434e-08,
"loss": 0.3429228365421295,
"step": 5854
},
{
"epoch": 2.8380998545807077,
"grad_norm": 12.426590317470708,
"learning_rate": 8.898699372159147e-08,
"loss": 0.6429924964904785,
"step": 5855
},
{
"epoch": 2.838584585555017,
"grad_norm": 8.094190948934726,
"learning_rate": 8.845809075396173e-08,
"loss": 0.3419804275035858,
"step": 5856
},
{
"epoch": 2.839069316529326,
"grad_norm": 11.892947859393256,
"learning_rate": 8.793075024017827e-08,
"loss": 0.5527294278144836,
"step": 5857
},
{
"epoch": 2.8395540475036354,
"grad_norm": 16.273395542720174,
"learning_rate": 8.740497234799971e-08,
"loss": 1.2529551982879639,
"step": 5858
},
{
"epoch": 2.8400387784779446,
"grad_norm": 15.082762815493473,
"learning_rate": 8.688075724468447e-08,
"loss": 0.9228161573410034,
"step": 5859
},
{
"epoch": 2.840523509452254,
"grad_norm": 11.560698773207763,
"learning_rate": 8.635810509699583e-08,
"loss": 0.5103272199630737,
"step": 5860
},
{
"epoch": 2.8410082404265635,
"grad_norm": 7.94464210588956,
"learning_rate": 8.583701607119854e-08,
"loss": 0.4987528920173645,
"step": 5861
},
{
"epoch": 2.8414929714008723,
"grad_norm": 10.336165959450563,
"learning_rate": 8.531749033306169e-08,
"loss": 0.3774292767047882,
"step": 5862
},
{
"epoch": 2.841977702375182,
"grad_norm": 16.219764163937015,
"learning_rate": 8.479952804785641e-08,
"loss": 0.6562195420265198,
"step": 5863
},
{
"epoch": 2.842462433349491,
"grad_norm": 10.274836207409033,
"learning_rate": 8.428312938035588e-08,
"loss": 0.4560563564300537,
"step": 5864
},
{
"epoch": 2.8429471643238005,
"grad_norm": 12.525207553906307,
"learning_rate": 8.376829449483537e-08,
"loss": 0.6962409019470215,
"step": 5865
},
{
"epoch": 2.8434318952981097,
"grad_norm": 10.181526418930721,
"learning_rate": 8.325502355507442e-08,
"loss": 0.5315423607826233,
"step": 5866
},
{
"epoch": 2.843916626272419,
"grad_norm": 11.47041530970181,
"learning_rate": 8.274331672435409e-08,
"loss": 0.18707691133022308,
"step": 5867
},
{
"epoch": 2.844401357246728,
"grad_norm": 12.776206616335884,
"learning_rate": 8.223317416545807e-08,
"loss": 0.5244537591934204,
"step": 5868
},
{
"epoch": 2.8448860882210374,
"grad_norm": 9.79680870331481,
"learning_rate": 8.172459604067206e-08,
"loss": 0.37978386878967285,
"step": 5869
},
{
"epoch": 2.8453708191953466,
"grad_norm": 7.986716167735526,
"learning_rate": 8.121758251178391e-08,
"loss": 0.5259273052215576,
"step": 5870
},
{
"epoch": 2.845855550169656,
"grad_norm": 12.52084862901769,
"learning_rate": 8.071213374008569e-08,
"loss": 0.6145712733268738,
"step": 5871
},
{
"epoch": 2.846340281143965,
"grad_norm": 10.478089498464216,
"learning_rate": 8.020824988636822e-08,
"loss": 0.21372196078300476,
"step": 5872
},
{
"epoch": 2.8468250121182743,
"grad_norm": 10.123747849918841,
"learning_rate": 7.970593111092773e-08,
"loss": 0.40529486536979675,
"step": 5873
},
{
"epoch": 2.8473097430925836,
"grad_norm": 10.788730251793437,
"learning_rate": 7.920517757356084e-08,
"loss": 0.9202580451965332,
"step": 5874
},
{
"epoch": 2.847794474066893,
"grad_norm": 8.425048608408307,
"learning_rate": 7.870598943356622e-08,
"loss": 0.1562986671924591,
"step": 5875
},
{
"epoch": 2.848279205041202,
"grad_norm": 8.24184319605104,
"learning_rate": 7.820836684974575e-08,
"loss": 0.5501362085342407,
"step": 5876
},
{
"epoch": 2.8487639360155113,
"grad_norm": 15.212206543367222,
"learning_rate": 7.77123099804017e-08,
"loss": 0.3147481381893158,
"step": 5877
},
{
"epoch": 2.8492486669898205,
"grad_norm": 8.250240522801732,
"learning_rate": 7.721781898334057e-08,
"loss": 0.4940647482872009,
"step": 5878
},
{
"epoch": 2.8497333979641297,
"grad_norm": 12.773060159127049,
"learning_rate": 7.672489401586769e-08,
"loss": 0.5473623275756836,
"step": 5879
},
{
"epoch": 2.8502181289384394,
"grad_norm": 12.174687098189597,
"learning_rate": 7.62335352347926e-08,
"loss": 0.4704712927341461,
"step": 5880
},
{
"epoch": 2.850702859912748,
"grad_norm": 10.57960068191497,
"learning_rate": 7.574374279642527e-08,
"loss": 0.3494165241718292,
"step": 5881
},
{
"epoch": 2.851187590887058,
"grad_norm": 12.194948643669004,
"learning_rate": 7.525551685657828e-08,
"loss": 0.47216856479644775,
"step": 5882
},
{
"epoch": 2.851672321861367,
"grad_norm": 10.479432307715333,
"learning_rate": 7.476885757056573e-08,
"loss": 0.5013052821159363,
"step": 5883
},
{
"epoch": 2.8521570528356763,
"grad_norm": 12.35306727329503,
"learning_rate": 7.428376509320212e-08,
"loss": 1.1330370903015137,
"step": 5884
},
{
"epoch": 2.8526417838099856,
"grad_norm": 12.521747413490996,
"learning_rate": 7.380023957880511e-08,
"loss": 0.19514033198356628,
"step": 5885
},
{
"epoch": 2.853126514784295,
"grad_norm": 10.490199177956491,
"learning_rate": 7.331828118119388e-08,
"loss": 0.5655241012573242,
"step": 5886
},
{
"epoch": 2.853611245758604,
"grad_norm": 19.227180666146722,
"learning_rate": 7.283789005368747e-08,
"loss": 0.5903247594833374,
"step": 5887
},
{
"epoch": 2.8540959767329133,
"grad_norm": 17.581666566311203,
"learning_rate": 7.23590663491075e-08,
"loss": 0.3393847346305847,
"step": 5888
},
{
"epoch": 2.8545807077072225,
"grad_norm": 10.864364576469878,
"learning_rate": 7.188181021977713e-08,
"loss": 0.3700229525566101,
"step": 5889
},
{
"epoch": 2.8550654386815317,
"grad_norm": 11.643903444376376,
"learning_rate": 7.140612181752049e-08,
"loss": 0.9525031447410583,
"step": 5890
},
{
"epoch": 2.855550169655841,
"grad_norm": 10.73965551372967,
"learning_rate": 7.093200129366262e-08,
"loss": 0.40604549646377563,
"step": 5891
},
{
"epoch": 2.85603490063015,
"grad_norm": 15.434465926015518,
"learning_rate": 7.045944879903066e-08,
"loss": 0.4503433406352997,
"step": 5892
},
{
"epoch": 2.8565196316044594,
"grad_norm": 9.576847160625084,
"learning_rate": 6.998846448395214e-08,
"loss": 0.5373659729957581,
"step": 5893
},
{
"epoch": 2.8570043625787687,
"grad_norm": 10.853376443204535,
"learning_rate": 6.951904849825553e-08,
"loss": 0.5794847011566162,
"step": 5894
},
{
"epoch": 2.857489093553078,
"grad_norm": 10.242876813270627,
"learning_rate": 6.905120099127249e-08,
"loss": 0.40642139315605164,
"step": 5895
},
{
"epoch": 2.857973824527387,
"grad_norm": 14.649829813917515,
"learning_rate": 6.858492211183231e-08,
"loss": 2.7310495376586914,
"step": 5896
},
{
"epoch": 2.858458555501697,
"grad_norm": 9.293582277014721,
"learning_rate": 6.812021200826801e-08,
"loss": 0.5910731554031372,
"step": 5897
},
{
"epoch": 2.8589432864760056,
"grad_norm": 13.832379625262238,
"learning_rate": 6.765707082841245e-08,
"loss": 0.2679729461669922,
"step": 5898
},
{
"epoch": 2.8594280174503153,
"grad_norm": 14.047101635667879,
"learning_rate": 6.71954987195994e-08,
"loss": 0.7802907228469849,
"step": 5899
},
{
"epoch": 2.859912748424624,
"grad_norm": 11.729853155969233,
"learning_rate": 6.673549582866368e-08,
"loss": 0.36580783128738403,
"step": 5900
},
{
"epoch": 2.8603974793989337,
"grad_norm": 7.664322710623767,
"learning_rate": 6.627706230194154e-08,
"loss": 0.5195889472961426,
"step": 5901
},
{
"epoch": 2.860882210373243,
"grad_norm": 9.897812150071475,
"learning_rate": 6.582019828526853e-08,
"loss": 0.43519026041030884,
"step": 5902
},
{
"epoch": 2.861366941347552,
"grad_norm": 8.390856233342767,
"learning_rate": 6.53649039239812e-08,
"loss": 0.6999077796936035,
"step": 5903
},
{
"epoch": 2.8618516723218614,
"grad_norm": 9.870736917080613,
"learning_rate": 6.491117936291868e-08,
"loss": 0.6938372850418091,
"step": 5904
},
{
"epoch": 2.8623364032961707,
"grad_norm": 12.445436464148766,
"learning_rate": 6.44590247464183e-08,
"loss": 0.5147584676742554,
"step": 5905
},
{
"epoch": 2.86282113427048,
"grad_norm": 9.734518486759354,
"learning_rate": 6.400844021831887e-08,
"loss": 0.43391576409339905,
"step": 5906
},
{
"epoch": 2.863305865244789,
"grad_norm": 7.3899154598444055,
"learning_rate": 6.355942592196074e-08,
"loss": 0.663561999797821,
"step": 5907
},
{
"epoch": 2.8637905962190984,
"grad_norm": 16.395713330102296,
"learning_rate": 6.311198200018242e-08,
"loss": 0.6063555479049683,
"step": 5908
},
{
"epoch": 2.8642753271934076,
"grad_norm": 7.948699098905469,
"learning_rate": 6.266610859532563e-08,
"loss": 0.16760848462581635,
"step": 5909
},
{
"epoch": 2.864760058167717,
"grad_norm": 7.227045479842888,
"learning_rate": 6.222180584923021e-08,
"loss": 0.27745920419692993,
"step": 5910
},
{
"epoch": 2.865244789142026,
"grad_norm": 14.634921331202719,
"learning_rate": 6.177907390323701e-08,
"loss": 0.7534891366958618,
"step": 5911
},
{
"epoch": 2.8657295201163353,
"grad_norm": 10.397509482582478,
"learning_rate": 6.13379128981878e-08,
"loss": 0.8437290191650391,
"step": 5912
},
{
"epoch": 2.8662142510906445,
"grad_norm": 11.762026626298631,
"learning_rate": 6.089832297442477e-08,
"loss": 0.5860869884490967,
"step": 5913
},
{
"epoch": 2.866698982064954,
"grad_norm": 10.065176784481212,
"learning_rate": 6.046030427178884e-08,
"loss": 0.3122708201408386,
"step": 5914
},
{
"epoch": 2.867183713039263,
"grad_norm": 14.439649646479653,
"learning_rate": 6.002385692962242e-08,
"loss": 0.30537450313568115,
"step": 5915
},
{
"epoch": 2.8676684440135727,
"grad_norm": 8.431758517219391,
"learning_rate": 5.958898108676781e-08,
"loss": 0.3570736348628998,
"step": 5916
},
{
"epoch": 2.8681531749878815,
"grad_norm": 8.016734861578511,
"learning_rate": 5.915567688156654e-08,
"loss": 0.36046135425567627,
"step": 5917
},
{
"epoch": 2.868637905962191,
"grad_norm": 9.524563279667655,
"learning_rate": 5.872394445186169e-08,
"loss": 0.4034743010997772,
"step": 5918
},
{
"epoch": 2.8691226369365004,
"grad_norm": 7.638744501975598,
"learning_rate": 5.829378393499452e-08,
"loss": 0.30314579606056213,
"step": 5919
},
{
"epoch": 2.8696073679108096,
"grad_norm": 11.825707106537633,
"learning_rate": 5.7865195467807775e-08,
"loss": 0.7800722122192383,
"step": 5920
},
{
"epoch": 2.870092098885119,
"grad_norm": 10.608708295411331,
"learning_rate": 5.7438179186642964e-08,
"loss": 0.21259805560112,
"step": 5921
},
{
"epoch": 2.870576829859428,
"grad_norm": 17.930212601306813,
"learning_rate": 5.701273522734252e-08,
"loss": 0.5208104848861694,
"step": 5922
},
{
"epoch": 2.8710615608337373,
"grad_norm": 14.167913738737523,
"learning_rate": 5.6588863725248744e-08,
"loss": 0.37588998675346375,
"step": 5923
},
{
"epoch": 2.8715462918080465,
"grad_norm": 11.666081640249176,
"learning_rate": 5.616656481520211e-08,
"loss": 0.41319945454597473,
"step": 5924
},
{
"epoch": 2.872031022782356,
"grad_norm": 16.48075896828656,
"learning_rate": 5.5745838631544036e-08,
"loss": 0.6580319404602051,
"step": 5925
},
{
"epoch": 2.872515753756665,
"grad_norm": 8.502261013320613,
"learning_rate": 5.532668530811525e-08,
"loss": 0.20235109329223633,
"step": 5926
},
{
"epoch": 2.8730004847309742,
"grad_norm": 9.433580874253696,
"learning_rate": 5.490910497825741e-08,
"loss": 0.3926432728767395,
"step": 5927
},
{
"epoch": 2.8734852157052835,
"grad_norm": 10.813373646120112,
"learning_rate": 5.4493097774809265e-08,
"loss": 0.5916074514389038,
"step": 5928
},
{
"epoch": 2.8739699466795927,
"grad_norm": 11.979730491424743,
"learning_rate": 5.407866383011107e-08,
"loss": 0.6474952697753906,
"step": 5929
},
{
"epoch": 2.874454677653902,
"grad_norm": 11.50322807519009,
"learning_rate": 5.3665803276002906e-08,
"loss": 0.5709153413772583,
"step": 5930
},
{
"epoch": 2.874939408628211,
"grad_norm": 16.82287057016491,
"learning_rate": 5.325451624382194e-08,
"loss": 0.9207970499992371,
"step": 5931
},
{
"epoch": 2.8754241396025204,
"grad_norm": 12.273665808672611,
"learning_rate": 5.284480286440741e-08,
"loss": 0.24820779263973236,
"step": 5932
},
{
"epoch": 2.87590887057683,
"grad_norm": 11.899758138726133,
"learning_rate": 5.243666326809671e-08,
"loss": 0.2907410264015198,
"step": 5933
},
{
"epoch": 2.876393601551139,
"grad_norm": 10.97424013461849,
"learning_rate": 5.203009758472599e-08,
"loss": 1.8566793203353882,
"step": 5934
},
{
"epoch": 2.8768783325254486,
"grad_norm": 17.66809368584607,
"learning_rate": 5.162510594363235e-08,
"loss": 0.3793075680732727,
"step": 5935
},
{
"epoch": 2.877363063499758,
"grad_norm": 7.393011797695791,
"learning_rate": 5.12216884736505e-08,
"loss": 0.26105913519859314,
"step": 5936
},
{
"epoch": 2.877847794474067,
"grad_norm": 10.669893030915874,
"learning_rate": 5.081984530311612e-08,
"loss": 0.8276540040969849,
"step": 5937
},
{
"epoch": 2.8783325254483763,
"grad_norm": 8.015511820497798,
"learning_rate": 5.041957655986196e-08,
"loss": 0.3756595849990845,
"step": 5938
},
{
"epoch": 2.8788172564226855,
"grad_norm": 15.428844378723317,
"learning_rate": 5.0020882371222265e-08,
"loss": 1.2074958086013794,
"step": 5939
},
{
"epoch": 2.8793019873969947,
"grad_norm": 12.520773044617595,
"learning_rate": 4.9623762864027815e-08,
"loss": 0.5694674253463745,
"step": 5940
},
{
"epoch": 2.879786718371304,
"grad_norm": 7.954037208381963,
"learning_rate": 4.922821816461032e-08,
"loss": 0.5283546447753906,
"step": 5941
},
{
"epoch": 2.880271449345613,
"grad_norm": 8.340476937269559,
"learning_rate": 4.883424839880024e-08,
"loss": 0.4576568901538849,
"step": 5942
},
{
"epoch": 2.8807561803199224,
"grad_norm": 15.906564320108298,
"learning_rate": 4.844185369192622e-08,
"loss": 0.3194923996925354,
"step": 5943
},
{
"epoch": 2.8812409112942317,
"grad_norm": 12.000834116644759,
"learning_rate": 4.805103416881729e-08,
"loss": 0.7066478133201599,
"step": 5944
},
{
"epoch": 2.881725642268541,
"grad_norm": 11.851171826823204,
"learning_rate": 4.7661789953799553e-08,
"loss": 0.31669795513153076,
"step": 5945
},
{
"epoch": 2.88221037324285,
"grad_norm": 17.69716610827346,
"learning_rate": 4.7274121170700074e-08,
"loss": 0.3698909878730774,
"step": 5946
},
{
"epoch": 2.8826951042171594,
"grad_norm": 18.62758294431867,
"learning_rate": 4.688802794284242e-08,
"loss": 1.2510086297988892,
"step": 5947
},
{
"epoch": 2.8831798351914686,
"grad_norm": 8.762916884670503,
"learning_rate": 4.6503510393050565e-08,
"loss": 0.5634292960166931,
"step": 5948
},
{
"epoch": 2.883664566165778,
"grad_norm": 13.489095397827157,
"learning_rate": 4.6120568643646666e-08,
"loss": 0.4936351180076599,
"step": 5949
},
{
"epoch": 2.8841492971400875,
"grad_norm": 11.28222024254748,
"learning_rate": 4.573920281645161e-08,
"loss": 0.5927865505218506,
"step": 5950
},
{
"epoch": 2.8846340281143963,
"grad_norm": 9.73758085486712,
"learning_rate": 4.535941303278502e-08,
"loss": 0.9500457644462585,
"step": 5951
},
{
"epoch": 2.885118759088706,
"grad_norm": 13.430893178016152,
"learning_rate": 4.498119941346579e-08,
"loss": 0.4091995358467102,
"step": 5952
},
{
"epoch": 2.8856034900630148,
"grad_norm": 8.813707348880806,
"learning_rate": 4.4604562078810474e-08,
"loss": 0.28386732935905457,
"step": 5953
},
{
"epoch": 2.8860882210373244,
"grad_norm": 8.86547716185383,
"learning_rate": 4.422950114863378e-08,
"loss": 0.35150766372680664,
"step": 5954
},
{
"epoch": 2.8865729520116337,
"grad_norm": 11.71303236580276,
"learning_rate": 4.385601674225082e-08,
"loss": 0.6709303259849548,
"step": 5955
},
{
"epoch": 2.887057682985943,
"grad_norm": 13.119230804315578,
"learning_rate": 4.348410897847322e-08,
"loss": 0.2729036808013916,
"step": 5956
},
{
"epoch": 2.887542413960252,
"grad_norm": 10.351298559112804,
"learning_rate": 4.311377797561189e-08,
"loss": 0.8035892844200134,
"step": 5957
},
{
"epoch": 2.8880271449345614,
"grad_norm": 10.757858187375042,
"learning_rate": 4.274502385147594e-08,
"loss": 0.5944191813468933,
"step": 5958
},
{
"epoch": 2.8885118759088706,
"grad_norm": 12.899490105508974,
"learning_rate": 4.237784672337375e-08,
"loss": 0.4363734722137451,
"step": 5959
},
{
"epoch": 2.88899660688318,
"grad_norm": 13.855361427126379,
"learning_rate": 4.2012246708110774e-08,
"loss": 0.5299267172813416,
"step": 5960
},
{
"epoch": 2.889481337857489,
"grad_norm": 13.440637404146747,
"learning_rate": 4.1648223921990637e-08,
"loss": 0.9316009879112244,
"step": 5961
},
{
"epoch": 2.8899660688317983,
"grad_norm": 10.321863344300658,
"learning_rate": 4.1285778480817384e-08,
"loss": 0.32670286297798157,
"step": 5962
},
{
"epoch": 2.8904507998061075,
"grad_norm": 16.67199166914779,
"learning_rate": 4.0924910499890445e-08,
"loss": 0.7158206105232239,
"step": 5963
},
{
"epoch": 2.8909355307804168,
"grad_norm": 8.039394260151427,
"learning_rate": 4.056562009400855e-08,
"loss": 0.3539021611213684,
"step": 5964
},
{
"epoch": 2.891420261754726,
"grad_norm": 12.71508004038422,
"learning_rate": 4.020790737746971e-08,
"loss": 0.3905048966407776,
"step": 5965
},
{
"epoch": 2.8919049927290352,
"grad_norm": 12.234909779334355,
"learning_rate": 3.9851772464069015e-08,
"loss": 0.4864279627799988,
"step": 5966
},
{
"epoch": 2.892389723703345,
"grad_norm": 10.125289753849566,
"learning_rate": 3.9497215467098614e-08,
"loss": 0.28039854764938354,
"step": 5967
},
{
"epoch": 2.8928744546776537,
"grad_norm": 8.527274503533889,
"learning_rate": 3.914423649935106e-08,
"loss": 0.9626950621604919,
"step": 5968
},
{
"epoch": 2.8933591856519634,
"grad_norm": 8.978745314356173,
"learning_rate": 3.879283567311432e-08,
"loss": 0.631823718547821,
"step": 5969
},
{
"epoch": 2.893843916626272,
"grad_norm": 8.94963081582913,
"learning_rate": 3.844301310017673e-08,
"loss": 0.30064576864242554,
"step": 5970
},
{
"epoch": 2.894328647600582,
"grad_norm": 6.618747925661583,
"learning_rate": 3.809476889182262e-08,
"loss": 0.4750918447971344,
"step": 5971
},
{
"epoch": 2.894813378574891,
"grad_norm": 9.999012498484035,
"learning_rate": 3.774810315883559e-08,
"loss": 0.15996243059635162,
"step": 5972
},
{
"epoch": 2.8952981095492003,
"grad_norm": 6.623147378254294,
"learning_rate": 3.7403016011495765e-08,
"loss": 0.34242892265319824,
"step": 5973
},
{
"epoch": 2.8957828405235095,
"grad_norm": 7.094210457378597,
"learning_rate": 3.705950755958254e-08,
"loss": 0.4648013710975647,
"step": 5974
},
{
"epoch": 2.8962675714978188,
"grad_norm": 12.439377899723286,
"learning_rate": 3.6717577912372406e-08,
"loss": 0.9784833788871765,
"step": 5975
},
{
"epoch": 2.896752302472128,
"grad_norm": 11.980771812773579,
"learning_rate": 3.637722717863945e-08,
"loss": 0.26043301820755005,
"step": 5976
},
{
"epoch": 2.8972370334464372,
"grad_norm": 13.513412527632294,
"learning_rate": 3.6038455466655965e-08,
"loss": 0.6482728123664856,
"step": 5977
},
{
"epoch": 2.8977217644207465,
"grad_norm": 13.128824876591803,
"learning_rate": 3.5701262884190735e-08,
"loss": 0.4881347417831421,
"step": 5978
},
{
"epoch": 2.8982064953950557,
"grad_norm": 8.906953269703925,
"learning_rate": 3.5365649538512404e-08,
"loss": 0.36698904633522034,
"step": 5979
},
{
"epoch": 2.898691226369365,
"grad_norm": 15.710899993275314,
"learning_rate": 3.503161553638445e-08,
"loss": 0.3479185998439789,
"step": 5980
},
{
"epoch": 2.899175957343674,
"grad_norm": 14.326408248442185,
"learning_rate": 3.469916098407022e-08,
"loss": 0.3953303396701813,
"step": 5981
},
{
"epoch": 2.8996606883179834,
"grad_norm": 13.809969926335379,
"learning_rate": 3.436828598732955e-08,
"loss": 0.4396994113922119,
"step": 5982
},
{
"epoch": 2.9001454192922926,
"grad_norm": 14.949376635227258,
"learning_rate": 3.403899065141936e-08,
"loss": 0.43458276987075806,
"step": 5983
},
{
"epoch": 2.900630150266602,
"grad_norm": 6.122826432481862,
"learning_rate": 3.37112750810964e-08,
"loss": 0.1644650101661682,
"step": 5984
},
{
"epoch": 2.901114881240911,
"grad_norm": 8.14686120062024,
"learning_rate": 3.338513938061172e-08,
"loss": 0.35466882586479187,
"step": 5985
},
{
"epoch": 2.901599612215221,
"grad_norm": 11.413731044839237,
"learning_rate": 3.306058365371512e-08,
"loss": 0.7938759326934814,
"step": 5986
},
{
"epoch": 2.9020843431895296,
"grad_norm": 7.794766514803591,
"learning_rate": 3.2737608003654556e-08,
"loss": 0.2288265973329544,
"step": 5987
},
{
"epoch": 2.9025690741638392,
"grad_norm": 9.181087082644671,
"learning_rate": 3.241621253317451e-08,
"loss": 0.43168091773986816,
"step": 5988
},
{
"epoch": 2.9030538051381485,
"grad_norm": 14.41920258393648,
"learning_rate": 3.209639734451708e-08,
"loss": 0.5740717053413391,
"step": 5989
},
{
"epoch": 2.9035385361124577,
"grad_norm": 13.575139963954731,
"learning_rate": 3.177816253942145e-08,
"loss": 0.2865266799926758,
"step": 5990
},
{
"epoch": 2.904023267086767,
"grad_norm": 10.32423255800694,
"learning_rate": 3.1461508219123304e-08,
"loss": 0.8125402927398682,
"step": 5991
},
{
"epoch": 2.904507998061076,
"grad_norm": 11.170062971645805,
"learning_rate": 3.114643448435761e-08,
"loss": 0.30358487367630005,
"step": 5992
},
{
"epoch": 2.9049927290353854,
"grad_norm": 9.215689366383835,
"learning_rate": 3.08329414353542e-08,
"loss": 0.5297562479972839,
"step": 5993
},
{
"epoch": 2.9054774600096946,
"grad_norm": 18.99763225499204,
"learning_rate": 3.052102917184163e-08,
"loss": 0.5134474039077759,
"step": 5994
},
{
"epoch": 2.905962190984004,
"grad_norm": 12.34709139753866,
"learning_rate": 3.021069779304498e-08,
"loss": 0.47791361808776855,
"step": 5995
},
{
"epoch": 2.906446921958313,
"grad_norm": 8.977662617277515,
"learning_rate": 2.990194739768637e-08,
"loss": 0.4427282214164734,
"step": 5996
},
{
"epoch": 2.9069316529326223,
"grad_norm": 12.371938858574262,
"learning_rate": 2.9594778083984478e-08,
"loss": 1.1323615312576294,
"step": 5997
},
{
"epoch": 2.9074163839069316,
"grad_norm": 9.830214046125626,
"learning_rate": 2.9289189949657238e-08,
"loss": 0.2167329341173172,
"step": 5998
},
{
"epoch": 2.907901114881241,
"grad_norm": 10.241997755940941,
"learning_rate": 2.8985183091916892e-08,
"loss": 0.36869633197784424,
"step": 5999
},
{
"epoch": 2.90838584585555,
"grad_norm": 10.84433737720804,
"learning_rate": 2.868275760747441e-08,
"loss": 0.5078045725822449,
"step": 6000
},
{
"epoch": 2.9088705768298593,
"grad_norm": 11.104209256317407,
"learning_rate": 2.8381913592535616e-08,
"loss": 0.7826927900314331,
"step": 6001
},
{
"epoch": 2.9093553078041685,
"grad_norm": 9.354806836960037,
"learning_rate": 2.8082651142806174e-08,
"loss": 0.3776162266731262,
"step": 6002
},
{
"epoch": 2.909840038778478,
"grad_norm": 16.815751057200718,
"learning_rate": 2.778497035348604e-08,
"loss": 0.7099158763885498,
"step": 6003
},
{
"epoch": 2.910324769752787,
"grad_norm": 10.343267001618063,
"learning_rate": 2.7488871319273913e-08,
"loss": 0.8901838064193726,
"step": 6004
},
{
"epoch": 2.9108095007270967,
"grad_norm": 12.485798555751938,
"learning_rate": 2.7194354134363886e-08,
"loss": 0.3979784846305847,
"step": 6005
},
{
"epoch": 2.9112942317014054,
"grad_norm": 11.161020949200442,
"learning_rate": 2.6901418892448238e-08,
"loss": 0.28235742449760437,
"step": 6006
},
{
"epoch": 2.911778962675715,
"grad_norm": 14.084812760703182,
"learning_rate": 2.6610065686714094e-08,
"loss": 0.5515085458755493,
"step": 6007
},
{
"epoch": 2.9122636936500244,
"grad_norm": 13.3323895943058,
"learning_rate": 2.6320294609846753e-08,
"loss": 0.5779732465744019,
"step": 6008
},
{
"epoch": 2.9127484246243336,
"grad_norm": 7.684364075792758,
"learning_rate": 2.6032105754028036e-08,
"loss": 0.3004339933395386,
"step": 6009
},
{
"epoch": 2.913233155598643,
"grad_norm": 10.756200509899246,
"learning_rate": 2.5745499210936274e-08,
"loss": 0.4186292290687561,
"step": 6010
},
{
"epoch": 2.913717886572952,
"grad_norm": 9.936310720687919,
"learning_rate": 2.5460475071745762e-08,
"loss": 0.3151519000530243,
"step": 6011
},
{
"epoch": 2.9142026175472613,
"grad_norm": 11.549017898167474,
"learning_rate": 2.517703342712896e-08,
"loss": 0.28237080574035645,
"step": 6012
},
{
"epoch": 2.9146873485215705,
"grad_norm": 14.024121783699954,
"learning_rate": 2.489517436725375e-08,
"loss": 0.2950671911239624,
"step": 6013
},
{
"epoch": 2.9151720794958798,
"grad_norm": 8.156879442768334,
"learning_rate": 2.4614897981783957e-08,
"loss": 0.4967763125896454,
"step": 6014
},
{
"epoch": 2.915656810470189,
"grad_norm": 13.760447437402837,
"learning_rate": 2.4336204359882153e-08,
"loss": 1.4089293479919434,
"step": 6015
},
{
"epoch": 2.916141541444498,
"grad_norm": 7.857676378228879,
"learning_rate": 2.4059093590205196e-08,
"loss": 0.25243279337882996,
"step": 6016
},
{
"epoch": 2.9166262724188075,
"grad_norm": 7.782372701745954,
"learning_rate": 2.378356576090701e-08,
"loss": 0.13056299090385437,
"step": 6017
},
{
"epoch": 2.9171110033931167,
"grad_norm": 10.057472737915065,
"learning_rate": 2.35096209596386e-08,
"loss": 0.39133405685424805,
"step": 6018
},
{
"epoch": 2.917595734367426,
"grad_norm": 10.46139087456488,
"learning_rate": 2.323725927354692e-08,
"loss": 0.37400490045547485,
"step": 6019
},
{
"epoch": 2.9180804653417356,
"grad_norm": 11.208937403920018,
"learning_rate": 2.2966480789275438e-08,
"loss": 0.542975664138794,
"step": 6020
},
{
"epoch": 2.9185651963160444,
"grad_norm": 27.61904936225121,
"learning_rate": 2.269728559296358e-08,
"loss": 0.907672643661499,
"step": 6021
},
{
"epoch": 2.919049927290354,
"grad_norm": 7.7732825873076825,
"learning_rate": 2.242967377024785e-08,
"loss": 0.525005042552948,
"step": 6022
},
{
"epoch": 2.919534658264663,
"grad_norm": 10.700355128558543,
"learning_rate": 2.2163645406260148e-08,
"loss": 0.4323291778564453,
"step": 6023
},
{
"epoch": 2.9200193892389725,
"grad_norm": 11.704943552084615,
"learning_rate": 2.189920058562889e-08,
"loss": 0.6259667873382568,
"step": 6024
},
{
"epoch": 2.9205041202132818,
"grad_norm": 11.376678681798904,
"learning_rate": 2.1636339392479553e-08,
"loss": 0.2780863642692566,
"step": 6025
},
{
"epoch": 2.920988851187591,
"grad_norm": 9.42042624873842,
"learning_rate": 2.1375061910433037e-08,
"loss": 0.5824218988418579,
"step": 6026
},
{
"epoch": 2.9214735821619002,
"grad_norm": 9.649693555325811,
"learning_rate": 2.1115368222606736e-08,
"loss": 0.3316745162010193,
"step": 6027
},
{
"epoch": 2.9219583131362095,
"grad_norm": 9.585939707451818,
"learning_rate": 2.0857258411613456e-08,
"loss": 0.4493401348590851,
"step": 6028
},
{
"epoch": 2.9224430441105187,
"grad_norm": 13.443819717329692,
"learning_rate": 2.060073255956363e-08,
"loss": 0.394611656665802,
"step": 6029
},
{
"epoch": 2.922927775084828,
"grad_norm": 9.041832344616367,
"learning_rate": 2.0345790748062532e-08,
"loss": 0.2310238778591156,
"step": 6030
},
{
"epoch": 2.923412506059137,
"grad_norm": 9.161060337570373,
"learning_rate": 2.0092433058211957e-08,
"loss": 0.36818206310272217,
"step": 6031
},
{
"epoch": 2.9238972370334464,
"grad_norm": 7.960760581621899,
"learning_rate": 1.9840659570609654e-08,
"loss": 0.23315617442131042,
"step": 6032
},
{
"epoch": 2.9243819680077556,
"grad_norm": 11.197115299682109,
"learning_rate": 1.959047036534989e-08,
"loss": 0.4208966791629791,
"step": 6033
},
{
"epoch": 2.924866698982065,
"grad_norm": 11.123975467751432,
"learning_rate": 1.934186552202233e-08,
"loss": 0.3149680495262146,
"step": 6034
},
{
"epoch": 2.925351429956374,
"grad_norm": 9.960971610720993,
"learning_rate": 1.9094845119712603e-08,
"loss": 0.35825133323669434,
"step": 6035
},
{
"epoch": 2.9258361609306833,
"grad_norm": 12.066497524754254,
"learning_rate": 1.8849409237002846e-08,
"loss": 0.4265539050102234,
"step": 6036
},
{
"epoch": 2.926320891904993,
"grad_norm": 16.996128085416473,
"learning_rate": 1.8605557951971163e-08,
"loss": 1.3578051328659058,
"step": 6037
},
{
"epoch": 2.926805622879302,
"grad_norm": 12.8483887405276,
"learning_rate": 1.8363291342190505e-08,
"loss": 0.3850378096103668,
"step": 6038
},
{
"epoch": 2.9272903538536115,
"grad_norm": 18.845560299933773,
"learning_rate": 1.8122609484730324e-08,
"loss": 0.5102823972702026,
"step": 6039
},
{
"epoch": 2.9277750848279203,
"grad_norm": 12.388850414139124,
"learning_rate": 1.788351245615716e-08,
"loss": 0.15377715229988098,
"step": 6040
},
{
"epoch": 2.92825981580223,
"grad_norm": 12.48688058974606,
"learning_rate": 1.7646000332531276e-08,
"loss": 1.2259811162948608,
"step": 6041
},
{
"epoch": 2.928744546776539,
"grad_norm": 13.534469465449963,
"learning_rate": 1.7410073189410015e-08,
"loss": 0.5879335403442383,
"step": 6042
},
{
"epoch": 2.9292292777508484,
"grad_norm": 10.19862427652549,
"learning_rate": 1.7175731101846115e-08,
"loss": 0.31893426179885864,
"step": 6043
},
{
"epoch": 2.9297140087251576,
"grad_norm": 10.971298044539944,
"learning_rate": 1.6942974144388836e-08,
"loss": 0.22042419016361237,
"step": 6044
},
{
"epoch": 2.930198739699467,
"grad_norm": 8.823889552959272,
"learning_rate": 1.6711802391081723e-08,
"loss": 0.6401005983352661,
"step": 6045
},
{
"epoch": 2.930683470673776,
"grad_norm": 14.507837903373584,
"learning_rate": 1.6482215915465395e-08,
"loss": 1.3012192249298096,
"step": 6046
},
{
"epoch": 2.9311682016480853,
"grad_norm": 6.7496016426338254,
"learning_rate": 1.625421479057532e-08,
"loss": 0.3033841848373413,
"step": 6047
},
{
"epoch": 2.9316529326223946,
"grad_norm": 10.650987955410063,
"learning_rate": 1.6027799088943474e-08,
"loss": 0.3531116545200348,
"step": 6048
},
{
"epoch": 2.932137663596704,
"grad_norm": 9.085370161858869,
"learning_rate": 1.5802968882596137e-08,
"loss": 1.763594388961792,
"step": 6049
},
{
"epoch": 2.932622394571013,
"grad_norm": 10.566071385084067,
"learning_rate": 1.557972424305665e-08,
"loss": 0.3041530251502991,
"step": 6050
},
{
"epoch": 2.9331071255453223,
"grad_norm": 6.0117653426365045,
"learning_rate": 1.5358065241342644e-08,
"loss": 0.26538756489753723,
"step": 6051
},
{
"epoch": 2.9335918565196315,
"grad_norm": 9.65324271421133,
"learning_rate": 1.5137991947968834e-08,
"loss": 0.23287180066108704,
"step": 6052
},
{
"epoch": 2.9340765874939407,
"grad_norm": 24.175669899575844,
"learning_rate": 1.491950443294421e-08,
"loss": 0.6429685354232788,
"step": 6053
},
{
"epoch": 2.93456131846825,
"grad_norm": 10.556329375934943,
"learning_rate": 1.4702602765774287e-08,
"loss": 0.30960798263549805,
"step": 6054
},
{
"epoch": 2.935046049442559,
"grad_norm": 13.07425027126538,
"learning_rate": 1.4487287015458872e-08,
"loss": 0.9743395447731018,
"step": 6055
},
{
"epoch": 2.935530780416869,
"grad_norm": 13.47711029989318,
"learning_rate": 1.4273557250494285e-08,
"loss": 0.7063348293304443,
"step": 6056
},
{
"epoch": 2.9360155113911777,
"grad_norm": 7.745444055700202,
"learning_rate": 1.4061413538871694e-08,
"loss": 0.1446765959262848,
"step": 6057
},
{
"epoch": 2.9365002423654873,
"grad_norm": 10.714266597193195,
"learning_rate": 1.3850855948078224e-08,
"loss": 0.6281470656394958,
"step": 6058
},
{
"epoch": 2.9369849733397966,
"grad_norm": 10.376176404395657,
"learning_rate": 1.3641884545096407e-08,
"loss": 0.2858467400074005,
"step": 6059
},
{
"epoch": 2.937469704314106,
"grad_norm": 12.707347820016015,
"learning_rate": 1.3434499396404176e-08,
"loss": 0.5258976221084595,
"step": 6060
},
{
"epoch": 2.937954435288415,
"grad_norm": 7.682616039176062,
"learning_rate": 1.3228700567973763e-08,
"loss": 0.33989518880844116,
"step": 6061
},
{
"epoch": 2.9384391662627243,
"grad_norm": 7.74777175226203,
"learning_rate": 1.3024488125273905e-08,
"loss": 0.5700668692588806,
"step": 6062
},
{
"epoch": 2.9389238972370335,
"grad_norm": 8.482931182058094,
"learning_rate": 1.2821862133269303e-08,
"loss": 0.31263554096221924,
"step": 6063
},
{
"epoch": 2.9394086282113427,
"grad_norm": 11.290555468196196,
"learning_rate": 1.2620822656417841e-08,
"loss": 0.48466435074806213,
"step": 6064
},
{
"epoch": 2.939893359185652,
"grad_norm": 8.74559233551689,
"learning_rate": 1.2421369758675027e-08,
"loss": 0.5688841938972473,
"step": 6065
},
{
"epoch": 2.940378090159961,
"grad_norm": 17.370582878560022,
"learning_rate": 1.2223503503490108e-08,
"loss": 1.1885102987289429,
"step": 6066
},
{
"epoch": 2.9408628211342704,
"grad_norm": 12.022963197106627,
"learning_rate": 1.2027223953807733e-08,
"loss": 0.44349709153175354,
"step": 6067
},
{
"epoch": 2.9413475521085797,
"grad_norm": 8.79219829148081,
"learning_rate": 1.1832531172069062e-08,
"loss": 0.4521138668060303,
"step": 6068
},
{
"epoch": 2.941832283082889,
"grad_norm": 10.709971510575924,
"learning_rate": 1.1639425220208445e-08,
"loss": 0.3387830853462219,
"step": 6069
},
{
"epoch": 2.942317014057198,
"grad_norm": 12.716932174970813,
"learning_rate": 1.1447906159656741e-08,
"loss": 0.5983822345733643,
"step": 6070
},
{
"epoch": 2.9428017450315074,
"grad_norm": 9.56003387557589,
"learning_rate": 1.1257974051340214e-08,
"loss": 0.36832496523857117,
"step": 6071
},
{
"epoch": 2.9432864760058166,
"grad_norm": 10.026445943928344,
"learning_rate": 1.1069628955679979e-08,
"loss": 0.3538658618927002,
"step": 6072
},
{
"epoch": 2.9437712069801263,
"grad_norm": 8.528996727680989,
"learning_rate": 1.0882870932591994e-08,
"loss": 0.19099490344524384,
"step": 6073
},
{
"epoch": 2.944255937954435,
"grad_norm": 10.27624363173778,
"learning_rate": 1.069770004148707e-08,
"loss": 0.2207443118095398,
"step": 6074
},
{
"epoch": 2.9447406689287448,
"grad_norm": 8.566993909609115,
"learning_rate": 1.0514116341271419e-08,
"loss": 0.35561323165893555,
"step": 6075
},
{
"epoch": 2.9452253999030535,
"grad_norm": 8.975171783482281,
"learning_rate": 1.0332119890347214e-08,
"loss": 0.3280664086341858,
"step": 6076
},
{
"epoch": 2.945710130877363,
"grad_norm": 10.366188213342452,
"learning_rate": 1.0151710746610366e-08,
"loss": 0.4264522194862366,
"step": 6077
},
{
"epoch": 2.9461948618516725,
"grad_norm": 13.381584218733334,
"learning_rate": 9.972888967452743e-09,
"loss": 1.0658262968063354,
"step": 6078
},
{
"epoch": 2.9466795928259817,
"grad_norm": 10.131146156463224,
"learning_rate": 9.795654609761063e-09,
"loss": 0.4047228991985321,
"step": 6079
},
{
"epoch": 2.947164323800291,
"grad_norm": 18.23175964713629,
"learning_rate": 9.620007729916337e-09,
"loss": 0.6509835124015808,
"step": 6080
},
{
"epoch": 2.9476490547746,
"grad_norm": 11.712325212646016,
"learning_rate": 9.445948383795534e-09,
"loss": 0.4948289394378662,
"step": 6081
},
{
"epoch": 2.9481337857489094,
"grad_norm": 13.301328601757058,
"learning_rate": 9.273476626770472e-09,
"loss": 0.33991894125938416,
"step": 6082
},
{
"epoch": 2.9486185167232186,
"grad_norm": 8.766810767188746,
"learning_rate": 9.102592513707264e-09,
"loss": 0.19762565195560455,
"step": 6083
},
{
"epoch": 2.949103247697528,
"grad_norm": 8.165600121409703,
"learning_rate": 8.933296098966871e-09,
"loss": 0.7439101338386536,
"step": 6084
},
{
"epoch": 2.949587978671837,
"grad_norm": 17.994401184038793,
"learning_rate": 8.765587436406765e-09,
"loss": 0.364452600479126,
"step": 6085
},
{
"epoch": 2.9500727096461463,
"grad_norm": 16.284540584513177,
"learning_rate": 8.599466579377047e-09,
"loss": 0.5443949699401855,
"step": 6086
},
{
"epoch": 2.9505574406204556,
"grad_norm": 9.970144698506992,
"learning_rate": 8.434933580724891e-09,
"loss": 0.6021090745925903,
"step": 6087
},
{
"epoch": 2.951042171594765,
"grad_norm": 13.772158406866753,
"learning_rate": 8.271988492791205e-09,
"loss": 0.9978276491165161,
"step": 6088
},
{
"epoch": 2.951526902569074,
"grad_norm": 17.453106585175945,
"learning_rate": 8.110631367411748e-09,
"loss": 0.9078904986381531,
"step": 6089
},
{
"epoch": 2.9520116335433837,
"grad_norm": 12.585659264496972,
"learning_rate": 7.95086225591657e-09,
"loss": 0.3093130588531494,
"step": 6090
},
{
"epoch": 2.9524963645176925,
"grad_norm": 9.089624067396374,
"learning_rate": 7.792681209132236e-09,
"loss": 0.39968669414520264,
"step": 6091
},
{
"epoch": 2.952981095492002,
"grad_norm": 9.830443842597672,
"learning_rate": 7.636088277378494e-09,
"loss": 1.8058167695999146,
"step": 6092
},
{
"epoch": 2.953465826466311,
"grad_norm": 21.442310255967406,
"learning_rate": 7.4810835104705e-09,
"loss": 0.7088901996612549,
"step": 6093
},
{
"epoch": 2.9539505574406206,
"grad_norm": 16.708959471399112,
"learning_rate": 7.3276669577188045e-09,
"loss": 0.6866514086723328,
"step": 6094
},
{
"epoch": 2.95443528841493,
"grad_norm": 7.694718818952561,
"learning_rate": 7.175838667927149e-09,
"loss": 0.39814040064811707,
"step": 6095
},
{
"epoch": 2.954920019389239,
"grad_norm": 9.921495595872127,
"learning_rate": 7.025598689395785e-09,
"loss": 0.602914035320282,
"step": 6096
},
{
"epoch": 2.9554047503635483,
"grad_norm": 12.491513575262111,
"learning_rate": 6.876947069918705e-09,
"loss": 0.6090549826622009,
"step": 6097
},
{
"epoch": 2.9558894813378576,
"grad_norm": 8.47754876499776,
"learning_rate": 6.729883856784747e-09,
"loss": 0.5949843525886536,
"step": 6098
},
{
"epoch": 2.956374212312167,
"grad_norm": 9.743544457745504,
"learning_rate": 6.584409096777045e-09,
"loss": 1.148530125617981,
"step": 6099
},
{
"epoch": 2.956858943286476,
"grad_norm": 7.921251145163905,
"learning_rate": 6.440522836174135e-09,
"loss": 0.611380934715271,
"step": 6100
},
{
"epoch": 2.9573436742607853,
"grad_norm": 15.320714237430739,
"learning_rate": 6.2982251207494015e-09,
"loss": 0.42794954776763916,
"step": 6101
},
{
"epoch": 2.9578284052350945,
"grad_norm": 14.478611972397534,
"learning_rate": 6.15751599576997e-09,
"loss": 0.8157196640968323,
"step": 6102
},
{
"epoch": 2.9583131362094037,
"grad_norm": 8.966042537976238,
"learning_rate": 6.01839550599781e-09,
"loss": 0.5557937026023865,
"step": 6103
},
{
"epoch": 2.958797867183713,
"grad_norm": 10.12862398069346,
"learning_rate": 5.8808636956902974e-09,
"loss": 0.5320282578468323,
"step": 6104
},
{
"epoch": 2.959282598158022,
"grad_norm": 7.990770093552083,
"learning_rate": 5.744920608598547e-09,
"loss": 0.3071337044239044,
"step": 6105
},
{
"epoch": 2.9597673291323314,
"grad_norm": 9.52033577390147,
"learning_rate": 5.610566287969077e-09,
"loss": 0.31753987073898315,
"step": 6106
},
{
"epoch": 2.9602520601066407,
"grad_norm": 19.577193137816455,
"learning_rate": 5.477800776542142e-09,
"loss": 0.8154133558273315,
"step": 6107
},
{
"epoch": 2.96073679108095,
"grad_norm": 10.238746542957323,
"learning_rate": 5.346624116552845e-09,
"loss": 0.4842221140861511,
"step": 6108
},
{
"epoch": 2.9612215220552596,
"grad_norm": 13.248834541132165,
"learning_rate": 5.217036349730586e-09,
"loss": 0.38985157012939453,
"step": 6109
},
{
"epoch": 2.9617062530295684,
"grad_norm": 19.893017813107928,
"learning_rate": 5.089037517300721e-09,
"loss": 0.7005216479301453,
"step": 6110
},
{
"epoch": 2.962190984003878,
"grad_norm": 16.097285443030895,
"learning_rate": 4.962627659981789e-09,
"loss": 0.44029539823532104,
"step": 6111
},
{
"epoch": 2.9626757149781873,
"grad_norm": 10.008161623873153,
"learning_rate": 4.8378068179866235e-09,
"loss": 0.44578880071640015,
"step": 6112
},
{
"epoch": 2.9631604459524965,
"grad_norm": 16.938169379517248,
"learning_rate": 4.714575031022906e-09,
"loss": 1.059675931930542,
"step": 6113
},
{
"epoch": 2.9636451769268057,
"grad_norm": 10.405822053674068,
"learning_rate": 4.592932338293721e-09,
"loss": 0.3419416546821594,
"step": 6114
},
{
"epoch": 2.964129907901115,
"grad_norm": 11.68932786358248,
"learning_rate": 4.472878778495892e-09,
"loss": 0.868506133556366,
"step": 6115
},
{
"epoch": 2.964614638875424,
"grad_norm": 8.289409955544238,
"learning_rate": 4.354414389820539e-09,
"loss": 0.23658201098442078,
"step": 6116
},
{
"epoch": 2.9650993698497334,
"grad_norm": 8.71924189700725,
"learning_rate": 4.237539209953068e-09,
"loss": 0.4232713580131531,
"step": 6117
},
{
"epoch": 2.9655841008240427,
"grad_norm": 11.414458077613503,
"learning_rate": 4.12225327607374e-09,
"loss": 0.4088977873325348,
"step": 6118
},
{
"epoch": 2.966068831798352,
"grad_norm": 12.375708378194586,
"learning_rate": 4.008556624857107e-09,
"loss": 1.1987653970718384,
"step": 6119
},
{
"epoch": 2.966553562772661,
"grad_norm": 12.012247682876545,
"learning_rate": 3.896449292473125e-09,
"loss": 0.5716989040374756,
"step": 6120
},
{
"epoch": 2.9670382937469704,
"grad_norm": 10.513856144445409,
"learning_rate": 3.785931314584379e-09,
"loss": 0.6593649387359619,
"step": 6121
},
{
"epoch": 2.9675230247212796,
"grad_norm": 8.931984821431259,
"learning_rate": 3.6770027263483e-09,
"loss": 0.31276682019233704,
"step": 6122
},
{
"epoch": 2.968007755695589,
"grad_norm": 14.11294550426988,
"learning_rate": 3.5696635624182795e-09,
"loss": 0.5213582515716553,
"step": 6123
},
{
"epoch": 2.968492486669898,
"grad_norm": 8.484530905890034,
"learning_rate": 3.4639138569403376e-09,
"loss": 0.3591412901878357,
"step": 6124
},
{
"epoch": 2.9689772176442073,
"grad_norm": 8.39043275609961,
"learning_rate": 3.359753643555341e-09,
"loss": 0.303200364112854,
"step": 6125
},
{
"epoch": 2.969461948618517,
"grad_norm": 14.262401098909132,
"learning_rate": 3.2571829553990077e-09,
"loss": 0.5083468556404114,
"step": 6126
},
{
"epoch": 2.9699466795928258,
"grad_norm": 8.272362800188853,
"learning_rate": 3.156201825100791e-09,
"loss": 0.25461217761039734,
"step": 6127
},
{
"epoch": 2.9704314105671354,
"grad_norm": 8.153660963265732,
"learning_rate": 3.0568102847844395e-09,
"loss": 0.9535685777664185,
"step": 6128
},
{
"epoch": 2.9709161415414442,
"grad_norm": 11.015447160429112,
"learning_rate": 2.9590083660679946e-09,
"loss": 0.741432249546051,
"step": 6129
},
{
"epoch": 2.971400872515754,
"grad_norm": 8.896884201110549,
"learning_rate": 2.862796100065457e-09,
"loss": 0.4105842113494873,
"step": 6130
},
{
"epoch": 2.971885603490063,
"grad_norm": 17.05696071902025,
"learning_rate": 2.7681735173823443e-09,
"loss": 0.5498364567756653,
"step": 6131
},
{
"epoch": 2.9723703344643724,
"grad_norm": 8.089172263473932,
"learning_rate": 2.6751406481201338e-09,
"loss": 0.3141481876373291,
"step": 6132
},
{
"epoch": 2.9728550654386816,
"grad_norm": 10.710581479315604,
"learning_rate": 2.5836975218751504e-09,
"loss": 0.5759821534156799,
"step": 6133
},
{
"epoch": 2.973339796412991,
"grad_norm": 9.058218534307263,
"learning_rate": 2.4938441677363478e-09,
"loss": 0.4407198429107666,
"step": 6134
},
{
"epoch": 2.9738245273873,
"grad_norm": 11.22413664265377,
"learning_rate": 2.4055806142880835e-09,
"loss": 0.7808700799942017,
"step": 6135
},
{
"epoch": 2.9743092583616093,
"grad_norm": 12.10587489163231,
"learning_rate": 2.3189068896090072e-09,
"loss": 0.18762469291687012,
"step": 6136
},
{
"epoch": 2.9747939893359185,
"grad_norm": 8.905098405264175,
"learning_rate": 2.2338230212709534e-09,
"loss": 0.41759881377220154,
"step": 6137
},
{
"epoch": 2.9752787203102278,
"grad_norm": 7.4862452898901335,
"learning_rate": 2.1503290363406037e-09,
"loss": 0.46401849389076233,
"step": 6138
},
{
"epoch": 2.975763451284537,
"grad_norm": 8.420113388869172,
"learning_rate": 2.0684249613800445e-09,
"loss": 0.5684583783149719,
"step": 6139
},
{
"epoch": 2.9762481822588462,
"grad_norm": 9.7398461692242,
"learning_rate": 1.988110822443434e-09,
"loss": 0.26349395513534546,
"step": 6140
},
{
"epoch": 2.9767329132331555,
"grad_norm": 19.86648811516327,
"learning_rate": 1.909386645080891e-09,
"loss": 0.3533119559288025,
"step": 6141
},
{
"epoch": 2.9772176442074647,
"grad_norm": 7.0144795913143625,
"learning_rate": 1.8322524543351594e-09,
"loss": 0.3530123233795166,
"step": 6142
},
{
"epoch": 2.9777023751817744,
"grad_norm": 14.6781493435613,
"learning_rate": 1.7567082747455e-09,
"loss": 1.3882102966308594,
"step": 6143
},
{
"epoch": 2.978187106156083,
"grad_norm": 12.098303048830275,
"learning_rate": 1.6827541303426898e-09,
"loss": 0.3269319534301758,
"step": 6144
},
{
"epoch": 2.978671837130393,
"grad_norm": 12.330505047486884,
"learning_rate": 1.6103900446534648e-09,
"loss": 1.0583677291870117,
"step": 6145
},
{
"epoch": 2.9791565681047016,
"grad_norm": 10.055828298780972,
"learning_rate": 1.5396160406977445e-09,
"loss": 0.6820932626724243,
"step": 6146
},
{
"epoch": 2.9796412990790113,
"grad_norm": 7.729523172414643,
"learning_rate": 1.4704321409908518e-09,
"loss": 0.29265111684799194,
"step": 6147
},
{
"epoch": 2.9801260300533206,
"grad_norm": 13.423602928791311,
"learning_rate": 1.4028383675407375e-09,
"loss": 0.5072175860404968,
"step": 6148
},
{
"epoch": 2.98061076102763,
"grad_norm": 10.097417276703506,
"learning_rate": 1.3368347418507565e-09,
"loss": 0.28219592571258545,
"step": 6149
},
{
"epoch": 2.981095492001939,
"grad_norm": 9.633863999651108,
"learning_rate": 1.2724212849180019e-09,
"loss": 0.36114829778671265,
"step": 6150
},
{
"epoch": 2.9815802229762483,
"grad_norm": 7.300553311046291,
"learning_rate": 1.2095980172327493e-09,
"loss": 0.3515419363975525,
"step": 6151
},
{
"epoch": 2.9820649539505575,
"grad_norm": 15.666229736931719,
"learning_rate": 1.148364958781234e-09,
"loss": 0.5125706195831299,
"step": 6152
},
{
"epoch": 2.9825496849248667,
"grad_norm": 10.277842872527637,
"learning_rate": 1.088722129042319e-09,
"loss": 0.28185921907424927,
"step": 6153
},
{
"epoch": 2.983034415899176,
"grad_norm": 22.96444876813984,
"learning_rate": 1.0306695469897154e-09,
"loss": 0.5970894694328308,
"step": 6154
},
{
"epoch": 2.983519146873485,
"grad_norm": 8.69651424778119,
"learning_rate": 9.742072310908735e-10,
"loss": 0.7012733221054077,
"step": 6155
},
{
"epoch": 2.9840038778477944,
"grad_norm": 6.870534835574542,
"learning_rate": 9.193351993080912e-10,
"loss": 0.37112554907798767,
"step": 6156
},
{
"epoch": 2.9844886088221037,
"grad_norm": 14.940371810833458,
"learning_rate": 8.660534690962951e-10,
"loss": 0.6240072846412659,
"step": 6157
},
{
"epoch": 2.984973339796413,
"grad_norm": 12.473523111702558,
"learning_rate": 8.143620574058153e-10,
"loss": 1.158759355545044,
"step": 6158
},
{
"epoch": 2.985458070770722,
"grad_norm": 10.750953506188859,
"learning_rate": 7.642609806807199e-10,
"loss": 0.3599957525730133,
"step": 6159
},
{
"epoch": 2.9859428017450313,
"grad_norm": 4.8350405229949,
"learning_rate": 7.157502548588158e-10,
"loss": 0.20270752906799316,
"step": 6160
},
{
"epoch": 2.9864275327193406,
"grad_norm": 15.010537867061476,
"learning_rate": 6.688298953727579e-10,
"loss": 1.0158485174179077,
"step": 6161
},
{
"epoch": 2.9869122636936503,
"grad_norm": 9.914983489667136,
"learning_rate": 6.234999171489398e-10,
"loss": 0.3277459740638733,
"step": 6162
},
{
"epoch": 2.987396994667959,
"grad_norm": 15.418996258691399,
"learning_rate": 5.797603346069381e-10,
"loss": 0.4453030228614807,
"step": 6163
},
{
"epoch": 2.9878817256422687,
"grad_norm": 8.056490875655872,
"learning_rate": 5.376111616611779e-10,
"loss": 0.606433093547821,
"step": 6164
},
{
"epoch": 2.988366456616578,
"grad_norm": 9.874297470925725,
"learning_rate": 4.97052411720933e-10,
"loss": 0.417066365480423,
"step": 6165
},
{
"epoch": 2.988851187590887,
"grad_norm": 9.224975350303383,
"learning_rate": 4.5808409768810515e-10,
"loss": 0.2849874794483185,
"step": 6166
},
{
"epoch": 2.9893359185651964,
"grad_norm": 21.161523654576634,
"learning_rate": 4.2070623195888947e-10,
"loss": 0.6185486316680908,
"step": 6167
},
{
"epoch": 2.9898206495395057,
"grad_norm": 9.219713795133986,
"learning_rate": 3.8491882642488487e-10,
"loss": 0.2993675768375397,
"step": 6168
},
{
"epoch": 2.990305380513815,
"grad_norm": 12.080583415406602,
"learning_rate": 3.507218924697631e-10,
"loss": 0.6377032995223999,
"step": 6169
},
{
"epoch": 2.990790111488124,
"grad_norm": 13.21652530962457,
"learning_rate": 3.181154409725995e-10,
"loss": 0.7057659029960632,
"step": 6170
},
{
"epoch": 2.9912748424624334,
"grad_norm": 11.945953212796372,
"learning_rate": 2.87099482306763e-10,
"loss": 0.42906373739242554,
"step": 6171
},
{
"epoch": 2.9917595734367426,
"grad_norm": 16.051570582896453,
"learning_rate": 2.5767402633769533e-10,
"loss": 0.5704144835472107,
"step": 6172
},
{
"epoch": 2.992244304411052,
"grad_norm": 7.671007379226658,
"learning_rate": 2.298390824273522e-10,
"loss": 0.4568086862564087,
"step": 6173
},
{
"epoch": 2.992729035385361,
"grad_norm": 10.322984793586624,
"learning_rate": 2.035946594297622e-10,
"loss": 0.33135950565338135,
"step": 6174
},
{
"epoch": 2.9932137663596703,
"grad_norm": 12.02249348727681,
"learning_rate": 1.7894076569435759e-10,
"loss": 0.3890235424041748,
"step": 6175
},
{
"epoch": 2.9936984973339795,
"grad_norm": 13.214114993088641,
"learning_rate": 1.5587740906375381e-10,
"loss": 0.43293482065200806,
"step": 6176
},
{
"epoch": 2.9941832283082888,
"grad_norm": 9.685422837329986,
"learning_rate": 1.3440459687485973e-10,
"loss": 0.24065633118152618,
"step": 6177
},
{
"epoch": 2.994667959282598,
"grad_norm": 9.452951413149936,
"learning_rate": 1.1452233595832252e-10,
"loss": 0.40811222791671753,
"step": 6178
},
{
"epoch": 2.9951526902569077,
"grad_norm": 21.7552305472371,
"learning_rate": 9.623063263963783e-11,
"loss": 0.37126070261001587,
"step": 6179
},
{
"epoch": 2.9956374212312165,
"grad_norm": 11.138011961648482,
"learning_rate": 7.952949273748456e-11,
"loss": 0.322839617729187,
"step": 6180
},
{
"epoch": 2.996122152205526,
"grad_norm": 10.506764077698517,
"learning_rate": 6.441892156427987e-11,
"loss": 0.3194468319416046,
"step": 6181
},
{
"epoch": 2.996606883179835,
"grad_norm": 9.09786539745752,
"learning_rate": 5.0898923927289453e-11,
"loss": 0.3342864513397217,
"step": 6182
},
{
"epoch": 2.9970916141541446,
"grad_norm": 11.42175847184712,
"learning_rate": 3.896950412807243e-11,
"loss": 0.9399932622909546,
"step": 6183
},
{
"epoch": 2.997576345128454,
"grad_norm": 12.626413474422353,
"learning_rate": 2.8630665961371097e-11,
"loss": 0.30624812841415405,
"step": 6184
},
{
"epoch": 2.998061076102763,
"grad_norm": 9.73444216487184,
"learning_rate": 1.9882412715110932e-11,
"loss": 0.2668805420398712,
"step": 6185
},
{
"epoch": 2.9985458070770723,
"grad_norm": 10.635942311897164,
"learning_rate": 1.272474717373129e-11,
"loss": 0.47254765033721924,
"step": 6186
},
{
"epoch": 2.9990305380513815,
"grad_norm": 9.639349721386944,
"learning_rate": 7.157671613744477e-12,
"loss": 0.5997754335403442,
"step": 6187
},
{
"epoch": 2.9995152690256908,
"grad_norm": 14.006121017038165,
"learning_rate": 3.181187805401109e-12,
"loss": 0.641929566860199,
"step": 6188
},
{
"epoch": 3.0,
"grad_norm": 14.95435011266563,
"learning_rate": 7.952970143554339e-13,
"loss": 0.4479464292526245,
"step": 6189
},
{
"epoch": 3.0,
"step": 6189,
"total_flos": 16450670346240.0,
"train_loss": 1.665625898082319,
"train_runtime": 7008.0374,
"train_samples_per_second": 3.532,
"train_steps_per_second": 0.883
}
],
"logging_steps": 1,
"max_steps": 6189,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 16450670346240.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}