Files
llama3-8b-full-pretrain-was…/trainer_state.json
ModelHub XC ac0e99bc9c 初始化项目,由ModelHub XC社区提供模型
Model: shuoxing/llama3-8b-full-pretrain-wash-c4-1-2m-bs4
Source: Original Platform
2026-06-12 17:08:16 +08:00

13253 lines
341 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1887,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001589825119236884,
"grad_norm": 35.49121667328633,
"learning_rate": 0.0,
"loss": 3.658745765686035,
"step": 1
},
{
"epoch": 0.003179650238473768,
"grad_norm": 32.412269860175655,
"learning_rate": 5.291005291005291e-08,
"loss": 4.507900238037109,
"step": 2
},
{
"epoch": 0.0047694753577106515,
"grad_norm": 33.443946853548105,
"learning_rate": 1.0582010582010582e-07,
"loss": 3.916531562805176,
"step": 3
},
{
"epoch": 0.006359300476947536,
"grad_norm": 37.291251000399825,
"learning_rate": 1.5873015873015874e-07,
"loss": 3.8956263065338135,
"step": 4
},
{
"epoch": 0.00794912559618442,
"grad_norm": 41.0555838913476,
"learning_rate": 2.1164021164021165e-07,
"loss": 4.495701789855957,
"step": 5
},
{
"epoch": 0.009538950715421303,
"grad_norm": 38.94745771005691,
"learning_rate": 2.6455026455026455e-07,
"loss": 4.289466857910156,
"step": 6
},
{
"epoch": 0.011128775834658187,
"grad_norm": 42.29901893801629,
"learning_rate": 3.174603174603175e-07,
"loss": 4.111724853515625,
"step": 7
},
{
"epoch": 0.012718600953895072,
"grad_norm": 33.45027607438258,
"learning_rate": 3.7037037037037036e-07,
"loss": 3.8888838291168213,
"step": 8
},
{
"epoch": 0.014308426073131956,
"grad_norm": 32.67762061785612,
"learning_rate": 4.232804232804233e-07,
"loss": 3.9692318439483643,
"step": 9
},
{
"epoch": 0.01589825119236884,
"grad_norm": 36.061843620608656,
"learning_rate": 4.7619047619047623e-07,
"loss": 4.238317489624023,
"step": 10
},
{
"epoch": 0.017488076311605722,
"grad_norm": 32.52520374971258,
"learning_rate": 5.291005291005291e-07,
"loss": 4.491572380065918,
"step": 11
},
{
"epoch": 0.019077901430842606,
"grad_norm": 38.448364167095846,
"learning_rate": 5.82010582010582e-07,
"loss": 4.081965446472168,
"step": 12
},
{
"epoch": 0.02066772655007949,
"grad_norm": 38.243259913890064,
"learning_rate": 6.34920634920635e-07,
"loss": 3.595273971557617,
"step": 13
},
{
"epoch": 0.022257551669316374,
"grad_norm": 34.58522048956281,
"learning_rate": 6.878306878306879e-07,
"loss": 3.703660011291504,
"step": 14
},
{
"epoch": 0.02384737678855326,
"grad_norm": 29.350503975224882,
"learning_rate": 7.407407407407407e-07,
"loss": 3.525111675262451,
"step": 15
},
{
"epoch": 0.025437201907790145,
"grad_norm": 27.96937416222195,
"learning_rate": 7.936507936507937e-07,
"loss": 3.422595977783203,
"step": 16
},
{
"epoch": 0.02702702702702703,
"grad_norm": 26.409953728370734,
"learning_rate": 8.465608465608466e-07,
"loss": 2.9090871810913086,
"step": 17
},
{
"epoch": 0.028616852146263912,
"grad_norm": 31.509950255139216,
"learning_rate": 8.994708994708995e-07,
"loss": 4.4084930419921875,
"step": 18
},
{
"epoch": 0.030206677265500796,
"grad_norm": 26.354701063294822,
"learning_rate": 9.523809523809525e-07,
"loss": 4.3907470703125,
"step": 19
},
{
"epoch": 0.03179650238473768,
"grad_norm": 23.96857278346574,
"learning_rate": 1.0052910052910054e-06,
"loss": 3.679255962371826,
"step": 20
},
{
"epoch": 0.033386327503974564,
"grad_norm": 35.688302893278596,
"learning_rate": 1.0582010582010582e-06,
"loss": 4.266496181488037,
"step": 21
},
{
"epoch": 0.034976152623211444,
"grad_norm": 25.110803639662702,
"learning_rate": 1.111111111111111e-06,
"loss": 3.3960649967193604,
"step": 22
},
{
"epoch": 0.03656597774244833,
"grad_norm": 18.38682934266701,
"learning_rate": 1.164021164021164e-06,
"loss": 3.22914981842041,
"step": 23
},
{
"epoch": 0.03815580286168521,
"grad_norm": 24.20775912617202,
"learning_rate": 1.216931216931217e-06,
"loss": 4.515796661376953,
"step": 24
},
{
"epoch": 0.0397456279809221,
"grad_norm": 33.73019056293916,
"learning_rate": 1.26984126984127e-06,
"loss": 3.897707939147949,
"step": 25
},
{
"epoch": 0.04133545310015898,
"grad_norm": 20.544890832137906,
"learning_rate": 1.3227513227513228e-06,
"loss": 4.087409496307373,
"step": 26
},
{
"epoch": 0.04292527821939587,
"grad_norm": 20.859207917695514,
"learning_rate": 1.3756613756613758e-06,
"loss": 3.8370442390441895,
"step": 27
},
{
"epoch": 0.04451510333863275,
"grad_norm": 19.89855554686202,
"learning_rate": 1.4285714285714286e-06,
"loss": 3.5972700119018555,
"step": 28
},
{
"epoch": 0.046104928457869634,
"grad_norm": 15.825946032493054,
"learning_rate": 1.4814814814814815e-06,
"loss": 3.3083245754241943,
"step": 29
},
{
"epoch": 0.04769475357710652,
"grad_norm": 15.256333204941079,
"learning_rate": 1.5343915343915345e-06,
"loss": 3.2843480110168457,
"step": 30
},
{
"epoch": 0.0492845786963434,
"grad_norm": 15.65253909378043,
"learning_rate": 1.5873015873015873e-06,
"loss": 3.409064292907715,
"step": 31
},
{
"epoch": 0.05087440381558029,
"grad_norm": 19.432929271120607,
"learning_rate": 1.6402116402116404e-06,
"loss": 3.590700149536133,
"step": 32
},
{
"epoch": 0.05246422893481717,
"grad_norm": 12.289857474808553,
"learning_rate": 1.6931216931216932e-06,
"loss": 3.363887310028076,
"step": 33
},
{
"epoch": 0.05405405405405406,
"grad_norm": 13.2686181349725,
"learning_rate": 1.746031746031746e-06,
"loss": 3.2396044731140137,
"step": 34
},
{
"epoch": 0.05564387917329094,
"grad_norm": 13.096435381422967,
"learning_rate": 1.798941798941799e-06,
"loss": 3.619406223297119,
"step": 35
},
{
"epoch": 0.057233704292527825,
"grad_norm": 11.215843898706959,
"learning_rate": 1.8518518518518519e-06,
"loss": 3.35813045501709,
"step": 36
},
{
"epoch": 0.058823529411764705,
"grad_norm": 10.649493440987735,
"learning_rate": 1.904761904761905e-06,
"loss": 3.1609840393066406,
"step": 37
},
{
"epoch": 0.06041335453100159,
"grad_norm": 13.24735699781108,
"learning_rate": 1.9576719576719577e-06,
"loss": 3.2981178760528564,
"step": 38
},
{
"epoch": 0.06200317965023847,
"grad_norm": 10.225948738076381,
"learning_rate": 2.0105820105820108e-06,
"loss": 3.604062080383301,
"step": 39
},
{
"epoch": 0.06359300476947535,
"grad_norm": 10.090641137113177,
"learning_rate": 2.0634920634920634e-06,
"loss": 3.3760879039764404,
"step": 40
},
{
"epoch": 0.06518282988871224,
"grad_norm": 19.205372173322857,
"learning_rate": 2.1164021164021164e-06,
"loss": 3.1986072063446045,
"step": 41
},
{
"epoch": 0.06677265500794913,
"grad_norm": 12.193860558769504,
"learning_rate": 2.1693121693121695e-06,
"loss": 3.4260833263397217,
"step": 42
},
{
"epoch": 0.06836248012718601,
"grad_norm": 10.613034738721613,
"learning_rate": 2.222222222222222e-06,
"loss": 2.6513419151306152,
"step": 43
},
{
"epoch": 0.06995230524642289,
"grad_norm": 11.451444159725924,
"learning_rate": 2.275132275132275e-06,
"loss": 3.1006345748901367,
"step": 44
},
{
"epoch": 0.07154213036565978,
"grad_norm": 13.125197373192298,
"learning_rate": 2.328042328042328e-06,
"loss": 3.506385326385498,
"step": 45
},
{
"epoch": 0.07313195548489666,
"grad_norm": 14.894703172780192,
"learning_rate": 2.380952380952381e-06,
"loss": 3.1718366146087646,
"step": 46
},
{
"epoch": 0.07472178060413355,
"grad_norm": 20.50688643480365,
"learning_rate": 2.433862433862434e-06,
"loss": 3.573230743408203,
"step": 47
},
{
"epoch": 0.07631160572337042,
"grad_norm": 12.171684621094393,
"learning_rate": 2.486772486772487e-06,
"loss": 3.4022092819213867,
"step": 48
},
{
"epoch": 0.07790143084260731,
"grad_norm": 10.546372018299202,
"learning_rate": 2.53968253968254e-06,
"loss": 3.517230272293091,
"step": 49
},
{
"epoch": 0.0794912559618442,
"grad_norm": 11.236950853709759,
"learning_rate": 2.5925925925925925e-06,
"loss": 3.098986864089966,
"step": 50
},
{
"epoch": 0.08108108108108109,
"grad_norm": 10.377712550018598,
"learning_rate": 2.6455026455026455e-06,
"loss": 3.3276524543762207,
"step": 51
},
{
"epoch": 0.08267090620031796,
"grad_norm": 11.570551381613551,
"learning_rate": 2.6984126984126986e-06,
"loss": 3.817161798477173,
"step": 52
},
{
"epoch": 0.08426073131955485,
"grad_norm": 10.609436953514747,
"learning_rate": 2.7513227513227516e-06,
"loss": 3.2749571800231934,
"step": 53
},
{
"epoch": 0.08585055643879173,
"grad_norm": 10.676393530890982,
"learning_rate": 2.8042328042328042e-06,
"loss": 2.591442584991455,
"step": 54
},
{
"epoch": 0.08744038155802862,
"grad_norm": 8.786141043027358,
"learning_rate": 2.8571428571428573e-06,
"loss": 2.674818277359009,
"step": 55
},
{
"epoch": 0.0890302066772655,
"grad_norm": 13.90775039571811,
"learning_rate": 2.9100529100529103e-06,
"loss": 3.775853157043457,
"step": 56
},
{
"epoch": 0.09062003179650238,
"grad_norm": 15.306506102949852,
"learning_rate": 2.962962962962963e-06,
"loss": 3.712808609008789,
"step": 57
},
{
"epoch": 0.09220985691573927,
"grad_norm": 11.45788428532906,
"learning_rate": 3.015873015873016e-06,
"loss": 2.6537160873413086,
"step": 58
},
{
"epoch": 0.09379968203497616,
"grad_norm": 11.474945819053824,
"learning_rate": 3.068783068783069e-06,
"loss": 3.005936622619629,
"step": 59
},
{
"epoch": 0.09538950715421304,
"grad_norm": 10.112738468503954,
"learning_rate": 3.1216931216931216e-06,
"loss": 3.352091073989868,
"step": 60
},
{
"epoch": 0.09697933227344992,
"grad_norm": 23.636986765198987,
"learning_rate": 3.1746031746031746e-06,
"loss": 3.0014185905456543,
"step": 61
},
{
"epoch": 0.0985691573926868,
"grad_norm": 10.538684251581273,
"learning_rate": 3.2275132275132277e-06,
"loss": 3.156514883041382,
"step": 62
},
{
"epoch": 0.10015898251192369,
"grad_norm": 24.68530877347611,
"learning_rate": 3.2804232804232807e-06,
"loss": 3.4716105461120605,
"step": 63
},
{
"epoch": 0.10174880763116058,
"grad_norm": 8.880780828279468,
"learning_rate": 3.3333333333333333e-06,
"loss": 2.882477045059204,
"step": 64
},
{
"epoch": 0.10333863275039745,
"grad_norm": 18.338165806941713,
"learning_rate": 3.3862433862433864e-06,
"loss": 3.8387961387634277,
"step": 65
},
{
"epoch": 0.10492845786963434,
"grad_norm": 7.3597979784866885,
"learning_rate": 3.4391534391534394e-06,
"loss": 2.620795726776123,
"step": 66
},
{
"epoch": 0.10651828298887123,
"grad_norm": 8.922781731032991,
"learning_rate": 3.492063492063492e-06,
"loss": 2.8749918937683105,
"step": 67
},
{
"epoch": 0.10810810810810811,
"grad_norm": 21.37372028445257,
"learning_rate": 3.544973544973545e-06,
"loss": 3.401191473007202,
"step": 68
},
{
"epoch": 0.10969793322734499,
"grad_norm": 8.887838451374009,
"learning_rate": 3.597883597883598e-06,
"loss": 2.8735604286193848,
"step": 69
},
{
"epoch": 0.11128775834658187,
"grad_norm": 6.866225935716483,
"learning_rate": 3.6507936507936507e-06,
"loss": 2.937927484512329,
"step": 70
},
{
"epoch": 0.11287758346581876,
"grad_norm": 6.5543979903091065,
"learning_rate": 3.7037037037037037e-06,
"loss": 2.569362163543701,
"step": 71
},
{
"epoch": 0.11446740858505565,
"grad_norm": 10.30064064068897,
"learning_rate": 3.7566137566137568e-06,
"loss": 3.0775258541107178,
"step": 72
},
{
"epoch": 0.11605723370429252,
"grad_norm": 11.18234696698082,
"learning_rate": 3.80952380952381e-06,
"loss": 3.062443256378174,
"step": 73
},
{
"epoch": 0.11764705882352941,
"grad_norm": 11.066100259352726,
"learning_rate": 3.862433862433863e-06,
"loss": 2.7398781776428223,
"step": 74
},
{
"epoch": 0.1192368839427663,
"grad_norm": 12.12909564338818,
"learning_rate": 3.9153439153439155e-06,
"loss": 3.0419538021087646,
"step": 75
},
{
"epoch": 0.12082670906200318,
"grad_norm": 10.757566880795,
"learning_rate": 3.968253968253968e-06,
"loss": 2.9976096153259277,
"step": 76
},
{
"epoch": 0.12241653418124006,
"grad_norm": 21.28785396901485,
"learning_rate": 4.0211640211640215e-06,
"loss": 3.4554760456085205,
"step": 77
},
{
"epoch": 0.12400635930047695,
"grad_norm": 8.800500423676487,
"learning_rate": 4.074074074074074e-06,
"loss": 3.0005970001220703,
"step": 78
},
{
"epoch": 0.12559618441971382,
"grad_norm": 12.667911903178506,
"learning_rate": 4.126984126984127e-06,
"loss": 2.7835562229156494,
"step": 79
},
{
"epoch": 0.1271860095389507,
"grad_norm": 5.552759331146878,
"learning_rate": 4.17989417989418e-06,
"loss": 1.1964036226272583,
"step": 80
},
{
"epoch": 0.1287758346581876,
"grad_norm": 9.482995769072225,
"learning_rate": 4.232804232804233e-06,
"loss": 3.1558589935302734,
"step": 81
},
{
"epoch": 0.13036565977742448,
"grad_norm": 15.867917019873019,
"learning_rate": 4.2857142857142855e-06,
"loss": 3.090247631072998,
"step": 82
},
{
"epoch": 0.13195548489666137,
"grad_norm": 15.813469147158665,
"learning_rate": 4.338624338624339e-06,
"loss": 3.1648030281066895,
"step": 83
},
{
"epoch": 0.13354531001589826,
"grad_norm": 9.823156078564097,
"learning_rate": 4.3915343915343915e-06,
"loss": 2.5610551834106445,
"step": 84
},
{
"epoch": 0.13513513513513514,
"grad_norm": 13.090071748263863,
"learning_rate": 4.444444444444444e-06,
"loss": 3.0754504203796387,
"step": 85
},
{
"epoch": 0.13672496025437203,
"grad_norm": 10.149629926167787,
"learning_rate": 4.497354497354498e-06,
"loss": 2.997760772705078,
"step": 86
},
{
"epoch": 0.1383147853736089,
"grad_norm": 18.58538419182332,
"learning_rate": 4.55026455026455e-06,
"loss": 3.5637764930725098,
"step": 87
},
{
"epoch": 0.13990461049284578,
"grad_norm": 16.999293585853817,
"learning_rate": 4.603174603174604e-06,
"loss": 2.937600612640381,
"step": 88
},
{
"epoch": 0.14149443561208266,
"grad_norm": 9.804133729065864,
"learning_rate": 4.656084656084656e-06,
"loss": 3.095571517944336,
"step": 89
},
{
"epoch": 0.14308426073131955,
"grad_norm": 11.531802089182209,
"learning_rate": 4.708994708994709e-06,
"loss": 3.0191006660461426,
"step": 90
},
{
"epoch": 0.14467408585055644,
"grad_norm": 15.351908734558066,
"learning_rate": 4.761904761904762e-06,
"loss": 2.917482852935791,
"step": 91
},
{
"epoch": 0.14626391096979333,
"grad_norm": 11.937099193403586,
"learning_rate": 4.814814814814815e-06,
"loss": 4.431112289428711,
"step": 92
},
{
"epoch": 0.1478537360890302,
"grad_norm": 21.62832537445158,
"learning_rate": 4.867724867724868e-06,
"loss": 2.640915870666504,
"step": 93
},
{
"epoch": 0.1494435612082671,
"grad_norm": 7.260841410847577,
"learning_rate": 4.920634920634921e-06,
"loss": 3.0813612937927246,
"step": 94
},
{
"epoch": 0.151033386327504,
"grad_norm": 10.301768618589072,
"learning_rate": 4.973544973544974e-06,
"loss": 2.7878708839416504,
"step": 95
},
{
"epoch": 0.15262321144674085,
"grad_norm": 10.6973156598335,
"learning_rate": 5.026455026455027e-06,
"loss": 3.8078625202178955,
"step": 96
},
{
"epoch": 0.15421303656597773,
"grad_norm": 12.524211447252867,
"learning_rate": 5.07936507936508e-06,
"loss": 3.1541152000427246,
"step": 97
},
{
"epoch": 0.15580286168521462,
"grad_norm": 8.435308952157827,
"learning_rate": 5.132275132275133e-06,
"loss": 2.810488700866699,
"step": 98
},
{
"epoch": 0.1573926868044515,
"grad_norm": 9.946665987848993,
"learning_rate": 5.185185185185185e-06,
"loss": 3.088862895965576,
"step": 99
},
{
"epoch": 0.1589825119236884,
"grad_norm": 16.409093732708502,
"learning_rate": 5.2380952380952384e-06,
"loss": 2.9376237392425537,
"step": 100
},
{
"epoch": 0.16057233704292528,
"grad_norm": 11.980502505351422,
"learning_rate": 5.291005291005291e-06,
"loss": 3.024081230163574,
"step": 101
},
{
"epoch": 0.16216216216216217,
"grad_norm": 6.313212264071273,
"learning_rate": 5.3439153439153445e-06,
"loss": 2.360293388366699,
"step": 102
},
{
"epoch": 0.16375198728139906,
"grad_norm": 14.096128703783076,
"learning_rate": 5.396825396825397e-06,
"loss": 2.982285976409912,
"step": 103
},
{
"epoch": 0.16534181240063592,
"grad_norm": 10.009478552439436,
"learning_rate": 5.449735449735451e-06,
"loss": 3.326803684234619,
"step": 104
},
{
"epoch": 0.1669316375198728,
"grad_norm": 7.599874356869111,
"learning_rate": 5.502645502645503e-06,
"loss": 2.5373692512512207,
"step": 105
},
{
"epoch": 0.1685214626391097,
"grad_norm": 16.7898561209663,
"learning_rate": 5.555555555555557e-06,
"loss": 3.4259955883026123,
"step": 106
},
{
"epoch": 0.17011128775834658,
"grad_norm": 13.958791391532715,
"learning_rate": 5.6084656084656084e-06,
"loss": 3.3131277561187744,
"step": 107
},
{
"epoch": 0.17170111287758347,
"grad_norm": 12.009776509727333,
"learning_rate": 5.661375661375662e-06,
"loss": 2.851423978805542,
"step": 108
},
{
"epoch": 0.17329093799682035,
"grad_norm": 16.62552204724523,
"learning_rate": 5.7142857142857145e-06,
"loss": 3.5492098331451416,
"step": 109
},
{
"epoch": 0.17488076311605724,
"grad_norm": 7.10122679815253,
"learning_rate": 5.767195767195768e-06,
"loss": 2.4778614044189453,
"step": 110
},
{
"epoch": 0.17647058823529413,
"grad_norm": 12.461370297402736,
"learning_rate": 5.820105820105821e-06,
"loss": 2.2983148097991943,
"step": 111
},
{
"epoch": 0.178060413354531,
"grad_norm": 8.373213032362216,
"learning_rate": 5.873015873015874e-06,
"loss": 3.072960138320923,
"step": 112
},
{
"epoch": 0.17965023847376788,
"grad_norm": 8.151798706622255,
"learning_rate": 5.925925925925926e-06,
"loss": 2.70046329498291,
"step": 113
},
{
"epoch": 0.18124006359300476,
"grad_norm": 29.73366314903388,
"learning_rate": 5.978835978835979e-06,
"loss": 4.8392863273620605,
"step": 114
},
{
"epoch": 0.18282988871224165,
"grad_norm": 7.301103721049628,
"learning_rate": 6.031746031746032e-06,
"loss": 2.8294947147369385,
"step": 115
},
{
"epoch": 0.18441971383147854,
"grad_norm": 16.06044084552397,
"learning_rate": 6.084656084656085e-06,
"loss": 3.274482250213623,
"step": 116
},
{
"epoch": 0.18600953895071543,
"grad_norm": 7.585391293597456,
"learning_rate": 6.137566137566138e-06,
"loss": 2.9996328353881836,
"step": 117
},
{
"epoch": 0.1875993640699523,
"grad_norm": 20.191267861289145,
"learning_rate": 6.1904761904761914e-06,
"loss": 3.5223331451416016,
"step": 118
},
{
"epoch": 0.1891891891891892,
"grad_norm": 16.500509647866277,
"learning_rate": 6.243386243386243e-06,
"loss": 3.3577396869659424,
"step": 119
},
{
"epoch": 0.1907790143084261,
"grad_norm": 8.060506214524787,
"learning_rate": 6.296296296296297e-06,
"loss": 2.650815486907959,
"step": 120
},
{
"epoch": 0.19236883942766295,
"grad_norm": 12.673879706396006,
"learning_rate": 6.349206349206349e-06,
"loss": 2.8354992866516113,
"step": 121
},
{
"epoch": 0.19395866454689983,
"grad_norm": 11.414637618226603,
"learning_rate": 6.402116402116403e-06,
"loss": 2.889648675918579,
"step": 122
},
{
"epoch": 0.19554848966613672,
"grad_norm": 12.922762478535862,
"learning_rate": 6.455026455026455e-06,
"loss": 3.0907392501831055,
"step": 123
},
{
"epoch": 0.1971383147853736,
"grad_norm": 61.136138891067894,
"learning_rate": 6.507936507936509e-06,
"loss": 3.7759904861450195,
"step": 124
},
{
"epoch": 0.1987281399046105,
"grad_norm": 10.356849079188658,
"learning_rate": 6.560846560846561e-06,
"loss": 3.2002317905426025,
"step": 125
},
{
"epoch": 0.20031796502384738,
"grad_norm": 16.78438797563512,
"learning_rate": 6.613756613756615e-06,
"loss": 2.974229574203491,
"step": 126
},
{
"epoch": 0.20190779014308427,
"grad_norm": 12.81961909287759,
"learning_rate": 6.666666666666667e-06,
"loss": 2.952580690383911,
"step": 127
},
{
"epoch": 0.20349761526232116,
"grad_norm": 12.190840843520247,
"learning_rate": 6.71957671957672e-06,
"loss": 3.3993425369262695,
"step": 128
},
{
"epoch": 0.20508744038155802,
"grad_norm": 7.37093293280535,
"learning_rate": 6.772486772486773e-06,
"loss": 3.5084962844848633,
"step": 129
},
{
"epoch": 0.2066772655007949,
"grad_norm": 8.766940367947488,
"learning_rate": 6.825396825396826e-06,
"loss": 2.979722499847412,
"step": 130
},
{
"epoch": 0.2082670906200318,
"grad_norm": 18.195649510942715,
"learning_rate": 6.878306878306879e-06,
"loss": 2.716529130935669,
"step": 131
},
{
"epoch": 0.20985691573926868,
"grad_norm": 10.849621644780795,
"learning_rate": 6.931216931216932e-06,
"loss": 3.278958797454834,
"step": 132
},
{
"epoch": 0.21144674085850557,
"grad_norm": 11.035071999700392,
"learning_rate": 6.984126984126984e-06,
"loss": 3.1757240295410156,
"step": 133
},
{
"epoch": 0.21303656597774245,
"grad_norm": 9.638319374477183,
"learning_rate": 7.0370370370370375e-06,
"loss": 2.755430221557617,
"step": 134
},
{
"epoch": 0.21462639109697934,
"grad_norm": 13.908631086183993,
"learning_rate": 7.08994708994709e-06,
"loss": 3.236640453338623,
"step": 135
},
{
"epoch": 0.21621621621621623,
"grad_norm": 21.68147330866866,
"learning_rate": 7.1428571428571436e-06,
"loss": 3.0127792358398438,
"step": 136
},
{
"epoch": 0.2178060413354531,
"grad_norm": 11.192606660727337,
"learning_rate": 7.195767195767196e-06,
"loss": 2.746997117996216,
"step": 137
},
{
"epoch": 0.21939586645468998,
"grad_norm": 6.435274969530198,
"learning_rate": 7.24867724867725e-06,
"loss": 2.208346366882324,
"step": 138
},
{
"epoch": 0.22098569157392686,
"grad_norm": 9.977014012465766,
"learning_rate": 7.301587301587301e-06,
"loss": 2.976306676864624,
"step": 139
},
{
"epoch": 0.22257551669316375,
"grad_norm": 6.835119442466233,
"learning_rate": 7.354497354497355e-06,
"loss": 2.2386083602905273,
"step": 140
},
{
"epoch": 0.22416534181240064,
"grad_norm": 27.664987866635204,
"learning_rate": 7.4074074074074075e-06,
"loss": 3.936030626296997,
"step": 141
},
{
"epoch": 0.22575516693163752,
"grad_norm": 13.281763744066357,
"learning_rate": 7.460317460317461e-06,
"loss": 3.425809383392334,
"step": 142
},
{
"epoch": 0.2273449920508744,
"grad_norm": 8.883887810388247,
"learning_rate": 7.5132275132275136e-06,
"loss": 2.791560411453247,
"step": 143
},
{
"epoch": 0.2289348171701113,
"grad_norm": 13.575129037863244,
"learning_rate": 7.566137566137567e-06,
"loss": 3.240875720977783,
"step": 144
},
{
"epoch": 0.23052464228934816,
"grad_norm": 12.951685526951211,
"learning_rate": 7.61904761904762e-06,
"loss": 3.083731174468994,
"step": 145
},
{
"epoch": 0.23211446740858505,
"grad_norm": 12.57259309996259,
"learning_rate": 7.671957671957672e-06,
"loss": 2.3552327156066895,
"step": 146
},
{
"epoch": 0.23370429252782193,
"grad_norm": 14.929160501589186,
"learning_rate": 7.724867724867726e-06,
"loss": 2.911569595336914,
"step": 147
},
{
"epoch": 0.23529411764705882,
"grad_norm": 9.10971441639092,
"learning_rate": 7.77777777777778e-06,
"loss": 2.477398633956909,
"step": 148
},
{
"epoch": 0.2368839427662957,
"grad_norm": 13.685768768755157,
"learning_rate": 7.830687830687831e-06,
"loss": 2.6684117317199707,
"step": 149
},
{
"epoch": 0.2384737678855326,
"grad_norm": 32.20168223485909,
"learning_rate": 7.883597883597884e-06,
"loss": 3.338864803314209,
"step": 150
},
{
"epoch": 0.24006359300476948,
"grad_norm": 7.915241057603823,
"learning_rate": 7.936507936507936e-06,
"loss": 2.8785102367401123,
"step": 151
},
{
"epoch": 0.24165341812400637,
"grad_norm": 9.990195796041824,
"learning_rate": 7.98941798941799e-06,
"loss": 2.784539222717285,
"step": 152
},
{
"epoch": 0.24324324324324326,
"grad_norm": 13.621954030852349,
"learning_rate": 8.042328042328043e-06,
"loss": 2.820026159286499,
"step": 153
},
{
"epoch": 0.24483306836248012,
"grad_norm": 7.4478722820007865,
"learning_rate": 8.095238095238097e-06,
"loss": 3.184044361114502,
"step": 154
},
{
"epoch": 0.246422893481717,
"grad_norm": 16.120590939401566,
"learning_rate": 8.148148148148148e-06,
"loss": 2.2940902709960938,
"step": 155
},
{
"epoch": 0.2480127186009539,
"grad_norm": 11.591244973819766,
"learning_rate": 8.201058201058202e-06,
"loss": 2.3563649654388428,
"step": 156
},
{
"epoch": 0.24960254372019078,
"grad_norm": 9.161949195292504,
"learning_rate": 8.253968253968254e-06,
"loss": 3.106217861175537,
"step": 157
},
{
"epoch": 0.25119236883942764,
"grad_norm": 15.37253533460567,
"learning_rate": 8.306878306878307e-06,
"loss": 3.1488471031188965,
"step": 158
},
{
"epoch": 0.2527821939586645,
"grad_norm": 11.948620374727305,
"learning_rate": 8.35978835978836e-06,
"loss": 2.8425357341766357,
"step": 159
},
{
"epoch": 0.2543720190779014,
"grad_norm": 11.489434173785702,
"learning_rate": 8.412698412698414e-06,
"loss": 3.0375239849090576,
"step": 160
},
{
"epoch": 0.2559618441971383,
"grad_norm": 12.43920449942672,
"learning_rate": 8.465608465608466e-06,
"loss": 3.767285108566284,
"step": 161
},
{
"epoch": 0.2575516693163752,
"grad_norm": 14.656284556147316,
"learning_rate": 8.518518518518519e-06,
"loss": 3.189174175262451,
"step": 162
},
{
"epoch": 0.2591414944356121,
"grad_norm": 10.107696999004142,
"learning_rate": 8.571428571428571e-06,
"loss": 2.8151469230651855,
"step": 163
},
{
"epoch": 0.26073131955484896,
"grad_norm": 7.8333910009033065,
"learning_rate": 8.624338624338624e-06,
"loss": 2.86727237701416,
"step": 164
},
{
"epoch": 0.26232114467408585,
"grad_norm": 18.344054105660557,
"learning_rate": 8.677248677248678e-06,
"loss": 3.4317922592163086,
"step": 165
},
{
"epoch": 0.26391096979332274,
"grad_norm": 14.109564273449301,
"learning_rate": 8.730158730158731e-06,
"loss": 3.358736038208008,
"step": 166
},
{
"epoch": 0.2655007949125596,
"grad_norm": 10.64636873319405,
"learning_rate": 8.783068783068783e-06,
"loss": 2.5848193168640137,
"step": 167
},
{
"epoch": 0.2670906200317965,
"grad_norm": 7.140157930387994,
"learning_rate": 8.835978835978837e-06,
"loss": 3.1791625022888184,
"step": 168
},
{
"epoch": 0.2686804451510334,
"grad_norm": 17.161706285667417,
"learning_rate": 8.888888888888888e-06,
"loss": 2.3590657711029053,
"step": 169
},
{
"epoch": 0.2702702702702703,
"grad_norm": 29.446892543609344,
"learning_rate": 8.941798941798942e-06,
"loss": 3.5621213912963867,
"step": 170
},
{
"epoch": 0.2718600953895072,
"grad_norm": 16.72759384781146,
"learning_rate": 8.994708994708995e-06,
"loss": 3.151611089706421,
"step": 171
},
{
"epoch": 0.27344992050874406,
"grad_norm": 12.032239707153675,
"learning_rate": 9.047619047619049e-06,
"loss": 3.3835530281066895,
"step": 172
},
{
"epoch": 0.27503974562798095,
"grad_norm": 6.721126601850909,
"learning_rate": 9.1005291005291e-06,
"loss": 2.485015869140625,
"step": 173
},
{
"epoch": 0.2766295707472178,
"grad_norm": 13.098556820950769,
"learning_rate": 9.153439153439154e-06,
"loss": 3.190592050552368,
"step": 174
},
{
"epoch": 0.27821939586645467,
"grad_norm": 7.6498571126734145,
"learning_rate": 9.206349206349207e-06,
"loss": 1.9906291961669922,
"step": 175
},
{
"epoch": 0.27980922098569155,
"grad_norm": 10.086096084629292,
"learning_rate": 9.25925925925926e-06,
"loss": 2.9297289848327637,
"step": 176
},
{
"epoch": 0.28139904610492844,
"grad_norm": 10.521555282547693,
"learning_rate": 9.312169312169313e-06,
"loss": 2.9031777381896973,
"step": 177
},
{
"epoch": 0.28298887122416533,
"grad_norm": 11.275372751518375,
"learning_rate": 9.365079365079366e-06,
"loss": 3.1964385509490967,
"step": 178
},
{
"epoch": 0.2845786963434022,
"grad_norm": 31.857894626364228,
"learning_rate": 9.417989417989418e-06,
"loss": 3.562572717666626,
"step": 179
},
{
"epoch": 0.2861685214626391,
"grad_norm": 11.67182979337774,
"learning_rate": 9.470899470899471e-06,
"loss": 2.9622483253479004,
"step": 180
},
{
"epoch": 0.287758346581876,
"grad_norm": 8.433382292034693,
"learning_rate": 9.523809523809525e-06,
"loss": 2.673311710357666,
"step": 181
},
{
"epoch": 0.2893481717011129,
"grad_norm": 9.41809032552769,
"learning_rate": 9.576719576719578e-06,
"loss": 3.147991895675659,
"step": 182
},
{
"epoch": 0.29093799682034976,
"grad_norm": 14.469006681412353,
"learning_rate": 9.62962962962963e-06,
"loss": 3.427096128463745,
"step": 183
},
{
"epoch": 0.29252782193958665,
"grad_norm": 11.315982466451157,
"learning_rate": 9.682539682539683e-06,
"loss": 2.644692897796631,
"step": 184
},
{
"epoch": 0.29411764705882354,
"grad_norm": 7.855532445920403,
"learning_rate": 9.735449735449735e-06,
"loss": 2.748213291168213,
"step": 185
},
{
"epoch": 0.2957074721780604,
"grad_norm": 8.270032489370442,
"learning_rate": 9.788359788359789e-06,
"loss": 2.9862685203552246,
"step": 186
},
{
"epoch": 0.2972972972972973,
"grad_norm": 14.12339483663101,
"learning_rate": 9.841269841269842e-06,
"loss": 3.0839715003967285,
"step": 187
},
{
"epoch": 0.2988871224165342,
"grad_norm": 24.634858333440526,
"learning_rate": 9.894179894179896e-06,
"loss": 3.622962474822998,
"step": 188
},
{
"epoch": 0.3004769475357711,
"grad_norm": 10.995163007585823,
"learning_rate": 9.947089947089947e-06,
"loss": 3.127063751220703,
"step": 189
},
{
"epoch": 0.302066772655008,
"grad_norm": 12.483636136677996,
"learning_rate": 1e-05,
"loss": 2.9223406314849854,
"step": 190
},
{
"epoch": 0.3036565977742448,
"grad_norm": 20.178311042194125,
"learning_rate": 9.999991442158113e-06,
"loss": 3.5398926734924316,
"step": 191
},
{
"epoch": 0.3052464228934817,
"grad_norm": 10.479156797595314,
"learning_rate": 9.99996576866174e-06,
"loss": 2.693631649017334,
"step": 192
},
{
"epoch": 0.3068362480127186,
"grad_norm": 70.96351629759928,
"learning_rate": 9.999922979598773e-06,
"loss": 3.0234291553497314,
"step": 193
},
{
"epoch": 0.30842607313195547,
"grad_norm": 14.7334787992814,
"learning_rate": 9.999863075115677e-06,
"loss": 3.117661952972412,
"step": 194
},
{
"epoch": 0.31001589825119236,
"grad_norm": 10.128979481357636,
"learning_rate": 9.999786055417519e-06,
"loss": 2.6096296310424805,
"step": 195
},
{
"epoch": 0.31160572337042924,
"grad_norm": 9.41421099953952,
"learning_rate": 9.999691920767945e-06,
"loss": 2.9272522926330566,
"step": 196
},
{
"epoch": 0.31319554848966613,
"grad_norm": 15.414651367634708,
"learning_rate": 9.999580671489191e-06,
"loss": 2.8805973529815674,
"step": 197
},
{
"epoch": 0.314785373608903,
"grad_norm": 15.828806976113896,
"learning_rate": 9.999452307962079e-06,
"loss": 2.961799144744873,
"step": 198
},
{
"epoch": 0.3163751987281399,
"grad_norm": 14.843803608404203,
"learning_rate": 9.999306830626015e-06,
"loss": 2.878042221069336,
"step": 199
},
{
"epoch": 0.3179650238473768,
"grad_norm": 14.036434182045523,
"learning_rate": 9.999144239978987e-06,
"loss": 3.0351200103759766,
"step": 200
},
{
"epoch": 0.3195548489666137,
"grad_norm": 10.730468002314861,
"learning_rate": 9.998964536577566e-06,
"loss": 2.9610581398010254,
"step": 201
},
{
"epoch": 0.32114467408585057,
"grad_norm": 9.587107087736614,
"learning_rate": 9.998767721036901e-06,
"loss": 3.2652862071990967,
"step": 202
},
{
"epoch": 0.32273449920508746,
"grad_norm": 14.206705864824103,
"learning_rate": 9.998553794030719e-06,
"loss": 3.029031276702881,
"step": 203
},
{
"epoch": 0.32432432432432434,
"grad_norm": 7.982790434048001,
"learning_rate": 9.998322756291321e-06,
"loss": 3.0771546363830566,
"step": 204
},
{
"epoch": 0.32591414944356123,
"grad_norm": 15.760513036342372,
"learning_rate": 9.998074608609579e-06,
"loss": 2.7780063152313232,
"step": 205
},
{
"epoch": 0.3275039745627981,
"grad_norm": 10.059217166278922,
"learning_rate": 9.997809351834939e-06,
"loss": 2.802194833755493,
"step": 206
},
{
"epoch": 0.32909379968203495,
"grad_norm": 18.29933240314462,
"learning_rate": 9.997526986875412e-06,
"loss": 3.0691840648651123,
"step": 207
},
{
"epoch": 0.33068362480127184,
"grad_norm": 8.443032752679324,
"learning_rate": 9.997227514697568e-06,
"loss": 2.6535720825195312,
"step": 208
},
{
"epoch": 0.3322734499205087,
"grad_norm": 9.084135114248914,
"learning_rate": 9.996910936326545e-06,
"loss": 2.830028772354126,
"step": 209
},
{
"epoch": 0.3338632750397456,
"grad_norm": 11.92996879784718,
"learning_rate": 9.996577252846032e-06,
"loss": 2.7349863052368164,
"step": 210
},
{
"epoch": 0.3354531001589825,
"grad_norm": 8.51522244619189,
"learning_rate": 9.996226465398272e-06,
"loss": 2.55774188041687,
"step": 211
},
{
"epoch": 0.3370429252782194,
"grad_norm": 19.349878654802804,
"learning_rate": 9.995858575184062e-06,
"loss": 2.8570663928985596,
"step": 212
},
{
"epoch": 0.3386327503974563,
"grad_norm": 7.4415089683943085,
"learning_rate": 9.995473583462737e-06,
"loss": 2.795311450958252,
"step": 213
},
{
"epoch": 0.34022257551669316,
"grad_norm": 19.748222755481127,
"learning_rate": 9.99507149155218e-06,
"loss": 3.103321075439453,
"step": 214
},
{
"epoch": 0.34181240063593005,
"grad_norm": 13.405923907887225,
"learning_rate": 9.994652300828803e-06,
"loss": 2.971069574356079,
"step": 215
},
{
"epoch": 0.34340222575516693,
"grad_norm": 15.173562859355869,
"learning_rate": 9.994216012727556e-06,
"loss": 2.4139862060546875,
"step": 216
},
{
"epoch": 0.3449920508744038,
"grad_norm": 10.818103405590593,
"learning_rate": 9.99376262874191e-06,
"loss": 2.6961464881896973,
"step": 217
},
{
"epoch": 0.3465818759936407,
"grad_norm": 7.93512381654976,
"learning_rate": 9.993292150423862e-06,
"loss": 2.362365484237671,
"step": 218
},
{
"epoch": 0.3481717011128776,
"grad_norm": 14.70978474527209,
"learning_rate": 9.992804579383924e-06,
"loss": 3.3039183616638184,
"step": 219
},
{
"epoch": 0.3497615262321145,
"grad_norm": 18.572829651801086,
"learning_rate": 9.992299917291118e-06,
"loss": 2.4119772911071777,
"step": 220
},
{
"epoch": 0.35135135135135137,
"grad_norm": 10.133845798143364,
"learning_rate": 9.991778165872973e-06,
"loss": 3.023960590362549,
"step": 221
},
{
"epoch": 0.35294117647058826,
"grad_norm": 21.576817140456043,
"learning_rate": 9.991239326915509e-06,
"loss": 2.799973964691162,
"step": 222
},
{
"epoch": 0.35453100158982515,
"grad_norm": 9.74809561137367,
"learning_rate": 9.990683402263254e-06,
"loss": 2.593146800994873,
"step": 223
},
{
"epoch": 0.356120826709062,
"grad_norm": 11.298489884990596,
"learning_rate": 9.990110393819207e-06,
"loss": 2.991196632385254,
"step": 224
},
{
"epoch": 0.35771065182829886,
"grad_norm": 7.74310426714735,
"learning_rate": 9.989520303544861e-06,
"loss": 2.5676424503326416,
"step": 225
},
{
"epoch": 0.35930047694753575,
"grad_norm": 11.040637572487988,
"learning_rate": 9.98891313346017e-06,
"loss": 2.484053611755371,
"step": 226
},
{
"epoch": 0.36089030206677264,
"grad_norm": 13.73072858076415,
"learning_rate": 9.988288885643565e-06,
"loss": 2.5456459522247314,
"step": 227
},
{
"epoch": 0.3624801271860095,
"grad_norm": 11.673610223259558,
"learning_rate": 9.987647562231926e-06,
"loss": 3.305084228515625,
"step": 228
},
{
"epoch": 0.3640699523052464,
"grad_norm": 18.366434893044207,
"learning_rate": 9.986989165420596e-06,
"loss": 3.1925582885742188,
"step": 229
},
{
"epoch": 0.3656597774244833,
"grad_norm": 12.177300199600618,
"learning_rate": 9.986313697463353e-06,
"loss": 2.929258346557617,
"step": 230
},
{
"epoch": 0.3672496025437202,
"grad_norm": 6.5814118651427345,
"learning_rate": 9.98562116067242e-06,
"loss": 2.5541605949401855,
"step": 231
},
{
"epoch": 0.3688394276629571,
"grad_norm": 10.820882864838984,
"learning_rate": 9.984911557418444e-06,
"loss": 2.5791280269622803,
"step": 232
},
{
"epoch": 0.37042925278219396,
"grad_norm": 15.37113923702593,
"learning_rate": 9.984184890130491e-06,
"loss": 3.1903953552246094,
"step": 233
},
{
"epoch": 0.37201907790143085,
"grad_norm": 9.045995242642721,
"learning_rate": 9.983441161296048e-06,
"loss": 2.4408342838287354,
"step": 234
},
{
"epoch": 0.37360890302066774,
"grad_norm": 15.152661135606118,
"learning_rate": 9.982680373460996e-06,
"loss": 2.5460476875305176,
"step": 235
},
{
"epoch": 0.3751987281399046,
"grad_norm": 8.16353063007233,
"learning_rate": 9.981902529229617e-06,
"loss": 2.487499713897705,
"step": 236
},
{
"epoch": 0.3767885532591415,
"grad_norm": 11.463426369962924,
"learning_rate": 9.981107631264578e-06,
"loss": 2.4622912406921387,
"step": 237
},
{
"epoch": 0.3783783783783784,
"grad_norm": 13.891724957278424,
"learning_rate": 9.980295682286924e-06,
"loss": 3.4187865257263184,
"step": 238
},
{
"epoch": 0.3799682034976153,
"grad_norm": 8.8302666510195,
"learning_rate": 9.979466685076069e-06,
"loss": 2.393531322479248,
"step": 239
},
{
"epoch": 0.3815580286168522,
"grad_norm": 11.8201828045838,
"learning_rate": 9.97862064246978e-06,
"loss": 2.8234825134277344,
"step": 240
},
{
"epoch": 0.383147853736089,
"grad_norm": 14.642242808889572,
"learning_rate": 9.97775755736418e-06,
"loss": 2.8137476444244385,
"step": 241
},
{
"epoch": 0.3847376788553259,
"grad_norm": 10.460666558488356,
"learning_rate": 9.976877432713725e-06,
"loss": 2.6718130111694336,
"step": 242
},
{
"epoch": 0.3863275039745628,
"grad_norm": 9.41352654863366,
"learning_rate": 9.975980271531205e-06,
"loss": 2.797076940536499,
"step": 243
},
{
"epoch": 0.38791732909379967,
"grad_norm": 22.912991700027433,
"learning_rate": 9.97506607688772e-06,
"loss": 4.023824691772461,
"step": 244
},
{
"epoch": 0.38950715421303655,
"grad_norm": 11.547224083305466,
"learning_rate": 9.974134851912688e-06,
"loss": 3.1276447772979736,
"step": 245
},
{
"epoch": 0.39109697933227344,
"grad_norm": 23.66944749659379,
"learning_rate": 9.97318659979382e-06,
"loss": 2.84647798538208,
"step": 246
},
{
"epoch": 0.39268680445151033,
"grad_norm": 8.463015832445738,
"learning_rate": 9.97222132377711e-06,
"loss": 2.8939974308013916,
"step": 247
},
{
"epoch": 0.3942766295707472,
"grad_norm": 8.792225539309547,
"learning_rate": 9.971239027166832e-06,
"loss": 2.658247947692871,
"step": 248
},
{
"epoch": 0.3958664546899841,
"grad_norm": 46.91904473108387,
"learning_rate": 9.970239713325518e-06,
"loss": 3.596339464187622,
"step": 249
},
{
"epoch": 0.397456279809221,
"grad_norm": 8.496075126599893,
"learning_rate": 9.969223385673958e-06,
"loss": 2.798522472381592,
"step": 250
},
{
"epoch": 0.3990461049284579,
"grad_norm": 10.626098168110953,
"learning_rate": 9.968190047691184e-06,
"loss": 3.0867185592651367,
"step": 251
},
{
"epoch": 0.40063593004769477,
"grad_norm": 8.956578837726095,
"learning_rate": 9.967139702914447e-06,
"loss": 3.229172706604004,
"step": 252
},
{
"epoch": 0.40222575516693165,
"grad_norm": 12.855914124198701,
"learning_rate": 9.966072354939225e-06,
"loss": 2.9705493450164795,
"step": 253
},
{
"epoch": 0.40381558028616854,
"grad_norm": 23.688975978884205,
"learning_rate": 9.964988007419195e-06,
"loss": 2.8320472240448,
"step": 254
},
{
"epoch": 0.40540540540540543,
"grad_norm": 11.823651203027762,
"learning_rate": 9.963886664066224e-06,
"loss": 2.742363452911377,
"step": 255
},
{
"epoch": 0.4069952305246423,
"grad_norm": 32.55567327428842,
"learning_rate": 9.962768328650367e-06,
"loss": 2.6898093223571777,
"step": 256
},
{
"epoch": 0.40858505564387915,
"grad_norm": 12.333778411337002,
"learning_rate": 9.961633004999835e-06,
"loss": 2.416064739227295,
"step": 257
},
{
"epoch": 0.41017488076311603,
"grad_norm": 12.46481852710721,
"learning_rate": 9.960480697000996e-06,
"loss": 2.5227324962615967,
"step": 258
},
{
"epoch": 0.4117647058823529,
"grad_norm": 14.821827754412858,
"learning_rate": 9.95931140859836e-06,
"loss": 2.843254566192627,
"step": 259
},
{
"epoch": 0.4133545310015898,
"grad_norm": 13.241902003603611,
"learning_rate": 9.95812514379456e-06,
"loss": 2.8716843128204346,
"step": 260
},
{
"epoch": 0.4149443561208267,
"grad_norm": 9.97892993021681,
"learning_rate": 9.956921906650342e-06,
"loss": 2.8791468143463135,
"step": 261
},
{
"epoch": 0.4165341812400636,
"grad_norm": 10.340993757400383,
"learning_rate": 9.95570170128455e-06,
"loss": 2.94400691986084,
"step": 262
},
{
"epoch": 0.41812400635930047,
"grad_norm": 14.429065449122799,
"learning_rate": 9.954464531874118e-06,
"loss": 2.703669786453247,
"step": 263
},
{
"epoch": 0.41971383147853736,
"grad_norm": 9.52424184559037,
"learning_rate": 9.953210402654043e-06,
"loss": 1.8419052362442017,
"step": 264
},
{
"epoch": 0.42130365659777425,
"grad_norm": 10.85229462688143,
"learning_rate": 9.951939317917381e-06,
"loss": 2.812004327774048,
"step": 265
},
{
"epoch": 0.42289348171701113,
"grad_norm": 10.563617697488839,
"learning_rate": 9.95065128201523e-06,
"loss": 3.1184329986572266,
"step": 266
},
{
"epoch": 0.424483306836248,
"grad_norm": 15.181152546843654,
"learning_rate": 9.949346299356711e-06,
"loss": 2.8881607055664062,
"step": 267
},
{
"epoch": 0.4260731319554849,
"grad_norm": 7.730113415937909,
"learning_rate": 9.94802437440896e-06,
"loss": 2.938474655151367,
"step": 268
},
{
"epoch": 0.4276629570747218,
"grad_norm": 16.011410990937947,
"learning_rate": 9.946685511697108e-06,
"loss": 2.8643383979797363,
"step": 269
},
{
"epoch": 0.4292527821939587,
"grad_norm": 14.148092529488144,
"learning_rate": 9.945329715804261e-06,
"loss": 2.6052684783935547,
"step": 270
},
{
"epoch": 0.43084260731319557,
"grad_norm": 13.558262505075879,
"learning_rate": 9.9439569913715e-06,
"loss": 3.440485954284668,
"step": 271
},
{
"epoch": 0.43243243243243246,
"grad_norm": 9.467937948634649,
"learning_rate": 9.942567343097843e-06,
"loss": 3.508424997329712,
"step": 272
},
{
"epoch": 0.43402225755166934,
"grad_norm": 10.39498217285857,
"learning_rate": 9.941160775740247e-06,
"loss": 2.8846826553344727,
"step": 273
},
{
"epoch": 0.4356120826709062,
"grad_norm": 14.176390300187323,
"learning_rate": 9.939737294113585e-06,
"loss": 2.803740978240967,
"step": 274
},
{
"epoch": 0.43720190779014306,
"grad_norm": 17.785521530494066,
"learning_rate": 9.938296903090631e-06,
"loss": 4.964699745178223,
"step": 275
},
{
"epoch": 0.43879173290937995,
"grad_norm": 18.588550086313642,
"learning_rate": 9.936839607602038e-06,
"loss": 2.787569046020508,
"step": 276
},
{
"epoch": 0.44038155802861684,
"grad_norm": 15.38132057951777,
"learning_rate": 9.93536541263633e-06,
"loss": 3.089069366455078,
"step": 277
},
{
"epoch": 0.4419713831478537,
"grad_norm": 11.531976186480126,
"learning_rate": 9.933874323239876e-06,
"loss": 2.688262701034546,
"step": 278
},
{
"epoch": 0.4435612082670906,
"grad_norm": 8.767813327048348,
"learning_rate": 9.932366344516879e-06,
"loss": 2.903958320617676,
"step": 279
},
{
"epoch": 0.4451510333863275,
"grad_norm": 14.186522002107646,
"learning_rate": 9.930841481629358e-06,
"loss": 3.396346092224121,
"step": 280
},
{
"epoch": 0.4467408585055644,
"grad_norm": 8.4581258239869,
"learning_rate": 9.929299739797127e-06,
"loss": 3.1296937465667725,
"step": 281
},
{
"epoch": 0.4483306836248013,
"grad_norm": 10.627174140105737,
"learning_rate": 9.927741124297776e-06,
"loss": 3.211103916168213,
"step": 282
},
{
"epoch": 0.44992050874403816,
"grad_norm": 12.699516974685517,
"learning_rate": 9.926165640466664e-06,
"loss": 2.7114107608795166,
"step": 283
},
{
"epoch": 0.45151033386327505,
"grad_norm": 18.797540035139892,
"learning_rate": 9.924573293696885e-06,
"loss": 2.0409135818481445,
"step": 284
},
{
"epoch": 0.45310015898251194,
"grad_norm": 8.667150407323442,
"learning_rate": 9.922964089439257e-06,
"loss": 2.540942430496216,
"step": 285
},
{
"epoch": 0.4546899841017488,
"grad_norm": 12.965206512595481,
"learning_rate": 9.92133803320231e-06,
"loss": 2.699608325958252,
"step": 286
},
{
"epoch": 0.4562798092209857,
"grad_norm": 21.082413121018245,
"learning_rate": 9.919695130552257e-06,
"loss": 3.058361053466797,
"step": 287
},
{
"epoch": 0.4578696343402226,
"grad_norm": 6.991367140145618,
"learning_rate": 9.918035387112976e-06,
"loss": 2.7070534229278564,
"step": 288
},
{
"epoch": 0.4594594594594595,
"grad_norm": 13.102007296803354,
"learning_rate": 9.916358808565999e-06,
"loss": 2.6915783882141113,
"step": 289
},
{
"epoch": 0.4610492845786963,
"grad_norm": 10.195274915471822,
"learning_rate": 9.91466540065048e-06,
"loss": 2.9073867797851562,
"step": 290
},
{
"epoch": 0.4626391096979332,
"grad_norm": 10.689475549111851,
"learning_rate": 9.91295516916319e-06,
"loss": 2.729437828063965,
"step": 291
},
{
"epoch": 0.4642289348171701,
"grad_norm": 6.740276455448954,
"learning_rate": 9.91122811995848e-06,
"loss": 2.762054443359375,
"step": 292
},
{
"epoch": 0.465818759936407,
"grad_norm": 8.284711631921079,
"learning_rate": 9.90948425894828e-06,
"loss": 3.0620100498199463,
"step": 293
},
{
"epoch": 0.46740858505564387,
"grad_norm": 12.179751636155302,
"learning_rate": 9.907723592102062e-06,
"loss": 2.8368825912475586,
"step": 294
},
{
"epoch": 0.46899841017488075,
"grad_norm": 17.5460433101431,
"learning_rate": 9.905946125446832e-06,
"loss": 2.9089303016662598,
"step": 295
},
{
"epoch": 0.47058823529411764,
"grad_norm": 10.357264423725997,
"learning_rate": 9.9041518650671e-06,
"loss": 3.0648887157440186,
"step": 296
},
{
"epoch": 0.47217806041335453,
"grad_norm": 20.25108319774201,
"learning_rate": 9.902340817104864e-06,
"loss": 3.5885443687438965,
"step": 297
},
{
"epoch": 0.4737678855325914,
"grad_norm": 20.107004352553076,
"learning_rate": 9.90051298775959e-06,
"loss": 2.6394972801208496,
"step": 298
},
{
"epoch": 0.4753577106518283,
"grad_norm": 23.997090021624807,
"learning_rate": 9.898668383288185e-06,
"loss": 3.3596107959747314,
"step": 299
},
{
"epoch": 0.4769475357710652,
"grad_norm": 14.980828112071602,
"learning_rate": 9.896807010004988e-06,
"loss": 3.2323708534240723,
"step": 300
},
{
"epoch": 0.4785373608903021,
"grad_norm": 10.84905018042917,
"learning_rate": 9.89492887428173e-06,
"loss": 2.7953944206237793,
"step": 301
},
{
"epoch": 0.48012718600953896,
"grad_norm": 16.74258275045779,
"learning_rate": 9.893033982547528e-06,
"loss": 2.7926273345947266,
"step": 302
},
{
"epoch": 0.48171701112877585,
"grad_norm": 10.11527276573299,
"learning_rate": 9.891122341288854e-06,
"loss": 2.3538496494293213,
"step": 303
},
{
"epoch": 0.48330683624801274,
"grad_norm": 13.074222431480202,
"learning_rate": 9.88919395704952e-06,
"loss": 3.081326961517334,
"step": 304
},
{
"epoch": 0.4848966613672496,
"grad_norm": 12.083919152113767,
"learning_rate": 9.887248836430645e-06,
"loss": 3.050244092941284,
"step": 305
},
{
"epoch": 0.4864864864864865,
"grad_norm": 8.75019250675372,
"learning_rate": 9.885286986090646e-06,
"loss": 2.987945556640625,
"step": 306
},
{
"epoch": 0.48807631160572335,
"grad_norm": 18.32254235621283,
"learning_rate": 9.883308412745206e-06,
"loss": 3.4225993156433105,
"step": 307
},
{
"epoch": 0.48966613672496023,
"grad_norm": 8.832288826661868,
"learning_rate": 9.88131312316725e-06,
"loss": 2.4250919818878174,
"step": 308
},
{
"epoch": 0.4912559618441971,
"grad_norm": 14.295320282866411,
"learning_rate": 9.879301124186926e-06,
"loss": 2.9305214881896973,
"step": 309
},
{
"epoch": 0.492845786963434,
"grad_norm": 9.963541379890719,
"learning_rate": 9.877272422691583e-06,
"loss": 2.68511962890625,
"step": 310
},
{
"epoch": 0.4944356120826709,
"grad_norm": 18.504257818010892,
"learning_rate": 9.875227025625744e-06,
"loss": 2.9232048988342285,
"step": 311
},
{
"epoch": 0.4960254372019078,
"grad_norm": 6.865199788520621,
"learning_rate": 9.873164939991085e-06,
"loss": 2.5240325927734375,
"step": 312
},
{
"epoch": 0.49761526232114467,
"grad_norm": 10.196649184912834,
"learning_rate": 9.871086172846403e-06,
"loss": 2.937847137451172,
"step": 313
},
{
"epoch": 0.49920508744038156,
"grad_norm": 10.807236690204771,
"learning_rate": 9.868990731307604e-06,
"loss": 2.607318162918091,
"step": 314
},
{
"epoch": 0.5007949125596184,
"grad_norm": 10.04817027309746,
"learning_rate": 9.866878622547671e-06,
"loss": 3.0139381885528564,
"step": 315
},
{
"epoch": 0.5023847376788553,
"grad_norm": 14.04657834855741,
"learning_rate": 9.864749853796642e-06,
"loss": 2.713085412979126,
"step": 316
},
{
"epoch": 0.5039745627980922,
"grad_norm": 10.580139193512768,
"learning_rate": 9.862604432341583e-06,
"loss": 3.2179996967315674,
"step": 317
},
{
"epoch": 0.505564387917329,
"grad_norm": 14.060855679931999,
"learning_rate": 9.860442365526565e-06,
"loss": 2.7278504371643066,
"step": 318
},
{
"epoch": 0.5071542130365659,
"grad_norm": 9.70708934527147,
"learning_rate": 9.858263660752637e-06,
"loss": 3.0756285190582275,
"step": 319
},
{
"epoch": 0.5087440381558028,
"grad_norm": 13.293623447658439,
"learning_rate": 9.856068325477805e-06,
"loss": 3.088465690612793,
"step": 320
},
{
"epoch": 0.5103338632750397,
"grad_norm": 7.794568252567064,
"learning_rate": 9.853856367217001e-06,
"loss": 2.9818029403686523,
"step": 321
},
{
"epoch": 0.5119236883942766,
"grad_norm": 7.851326897921035,
"learning_rate": 9.85162779354206e-06,
"loss": 2.2186264991760254,
"step": 322
},
{
"epoch": 0.5135135135135135,
"grad_norm": 27.73475205218493,
"learning_rate": 9.849382612081698e-06,
"loss": 3.252265453338623,
"step": 323
},
{
"epoch": 0.5151033386327504,
"grad_norm": 12.4240113753297,
"learning_rate": 9.847120830521476e-06,
"loss": 2.715832233428955,
"step": 324
},
{
"epoch": 0.5166931637519873,
"grad_norm": 11.173745279264194,
"learning_rate": 9.844842456603779e-06,
"loss": 3.116093635559082,
"step": 325
},
{
"epoch": 0.5182829888712241,
"grad_norm": 12.86649071600495,
"learning_rate": 9.842547498127794e-06,
"loss": 2.7198853492736816,
"step": 326
},
{
"epoch": 0.519872813990461,
"grad_norm": 9.750789020946055,
"learning_rate": 9.84023596294948e-06,
"loss": 2.7712936401367188,
"step": 327
},
{
"epoch": 0.5214626391096979,
"grad_norm": 5.8993949186997146,
"learning_rate": 9.837907858981536e-06,
"loss": 2.7403852939605713,
"step": 328
},
{
"epoch": 0.5230524642289348,
"grad_norm": 8.210808345252927,
"learning_rate": 9.835563194193382e-06,
"loss": 3.234954357147217,
"step": 329
},
{
"epoch": 0.5246422893481717,
"grad_norm": 17.58386529096575,
"learning_rate": 9.833201976611125e-06,
"loss": 2.921865463256836,
"step": 330
},
{
"epoch": 0.5262321144674086,
"grad_norm": 17.745817173384197,
"learning_rate": 9.830824214317533e-06,
"loss": 2.9681968688964844,
"step": 331
},
{
"epoch": 0.5278219395866455,
"grad_norm": 8.88335299895624,
"learning_rate": 9.828429915452018e-06,
"loss": 2.7954001426696777,
"step": 332
},
{
"epoch": 0.5294117647058824,
"grad_norm": 14.866460548402223,
"learning_rate": 9.826019088210586e-06,
"loss": 2.6031131744384766,
"step": 333
},
{
"epoch": 0.5310015898251192,
"grad_norm": 15.725264129746899,
"learning_rate": 9.823591740845831e-06,
"loss": 3.0973379611968994,
"step": 334
},
{
"epoch": 0.5325914149443561,
"grad_norm": 14.628458921610125,
"learning_rate": 9.821147881666896e-06,
"loss": 2.6520161628723145,
"step": 335
},
{
"epoch": 0.534181240063593,
"grad_norm": 16.54064563714969,
"learning_rate": 9.818687519039444e-06,
"loss": 2.7431864738464355,
"step": 336
},
{
"epoch": 0.5357710651828299,
"grad_norm": 10.962393061580121,
"learning_rate": 9.816210661385633e-06,
"loss": 2.5551836490631104,
"step": 337
},
{
"epoch": 0.5373608903020668,
"grad_norm": 18.832152884024985,
"learning_rate": 9.813717317184085e-06,
"loss": 3.045194625854492,
"step": 338
},
{
"epoch": 0.5389507154213037,
"grad_norm": 17.38158539650151,
"learning_rate": 9.811207494969857e-06,
"loss": 3.4276580810546875,
"step": 339
},
{
"epoch": 0.5405405405405406,
"grad_norm": 10.622363155005946,
"learning_rate": 9.808681203334416e-06,
"loss": 3.2487316131591797,
"step": 340
},
{
"epoch": 0.5421303656597775,
"grad_norm": 9.260816023702622,
"learning_rate": 9.806138450925604e-06,
"loss": 2.612975597381592,
"step": 341
},
{
"epoch": 0.5437201907790143,
"grad_norm": 11.74463313569475,
"learning_rate": 9.803579246447609e-06,
"loss": 2.7874436378479004,
"step": 342
},
{
"epoch": 0.5453100158982512,
"grad_norm": 17.837043198698524,
"learning_rate": 9.801003598660937e-06,
"loss": 2.9916462898254395,
"step": 343
},
{
"epoch": 0.5468998410174881,
"grad_norm": 24.438251717009827,
"learning_rate": 9.798411516382385e-06,
"loss": 2.434546947479248,
"step": 344
},
{
"epoch": 0.548489666136725,
"grad_norm": 14.013153276669572,
"learning_rate": 9.795803008485004e-06,
"loss": 3.085341453552246,
"step": 345
},
{
"epoch": 0.5500794912559619,
"grad_norm": 13.557057584862173,
"learning_rate": 9.793178083898073e-06,
"loss": 3.117433547973633,
"step": 346
},
{
"epoch": 0.5516693163751988,
"grad_norm": 10.637125299139159,
"learning_rate": 9.790536751607065e-06,
"loss": 2.892432689666748,
"step": 347
},
{
"epoch": 0.5532591414944356,
"grad_norm": 10.550865957146144,
"learning_rate": 9.787879020653627e-06,
"loss": 2.861921787261963,
"step": 348
},
{
"epoch": 0.5548489666136724,
"grad_norm": 22.137580282732337,
"learning_rate": 9.785204900135533e-06,
"loss": 2.463737726211548,
"step": 349
},
{
"epoch": 0.5564387917329093,
"grad_norm": 21.77300397400076,
"learning_rate": 9.782514399206664e-06,
"loss": 3.39715838432312,
"step": 350
},
{
"epoch": 0.5580286168521462,
"grad_norm": 26.488755598164676,
"learning_rate": 9.77980752707697e-06,
"loss": 2.856335163116455,
"step": 351
},
{
"epoch": 0.5596184419713831,
"grad_norm": 15.482056115230122,
"learning_rate": 9.777084293012448e-06,
"loss": 3.082500457763672,
"step": 352
},
{
"epoch": 0.56120826709062,
"grad_norm": 17.940874790430644,
"learning_rate": 9.774344706335097e-06,
"loss": 2.5417776107788086,
"step": 353
},
{
"epoch": 0.5627980922098569,
"grad_norm": 10.606810121460613,
"learning_rate": 9.7715887764229e-06,
"loss": 2.677812099456787,
"step": 354
},
{
"epoch": 0.5643879173290938,
"grad_norm": 18.117698123278753,
"learning_rate": 9.768816512709782e-06,
"loss": 2.576479911804199,
"step": 355
},
{
"epoch": 0.5659777424483307,
"grad_norm": 11.765350214656465,
"learning_rate": 9.766027924685579e-06,
"loss": 2.7888994216918945,
"step": 356
},
{
"epoch": 0.5675675675675675,
"grad_norm": 14.785341483147235,
"learning_rate": 9.76322302189601e-06,
"loss": 2.476093292236328,
"step": 357
},
{
"epoch": 0.5691573926868044,
"grad_norm": 51.843503178912506,
"learning_rate": 9.760401813942641e-06,
"loss": 2.794877290725708,
"step": 358
},
{
"epoch": 0.5707472178060413,
"grad_norm": 13.440897755315447,
"learning_rate": 9.75756431048285e-06,
"loss": 2.6585850715637207,
"step": 359
},
{
"epoch": 0.5723370429252782,
"grad_norm": 14.7711393223718,
"learning_rate": 9.754710521229804e-06,
"loss": 3.022064447402954,
"step": 360
},
{
"epoch": 0.5739268680445151,
"grad_norm": 11.102369711091598,
"learning_rate": 9.751840455952411e-06,
"loss": 2.669562339782715,
"step": 361
},
{
"epoch": 0.575516693163752,
"grad_norm": 11.41916142117208,
"learning_rate": 9.748954124475297e-06,
"loss": 2.7201461791992188,
"step": 362
},
{
"epoch": 0.5771065182829889,
"grad_norm": 10.443214334920143,
"learning_rate": 9.74605153667877e-06,
"loss": 2.6275901794433594,
"step": 363
},
{
"epoch": 0.5786963434022258,
"grad_norm": 20.437613024405156,
"learning_rate": 9.743132702498785e-06,
"loss": 2.591904401779175,
"step": 364
},
{
"epoch": 0.5802861685214626,
"grad_norm": 11.389555234258582,
"learning_rate": 9.740197631926911e-06,
"loss": 2.7886199951171875,
"step": 365
},
{
"epoch": 0.5818759936406995,
"grad_norm": 8.461581392383087,
"learning_rate": 9.737246335010295e-06,
"loss": 2.6747968196868896,
"step": 366
},
{
"epoch": 0.5834658187599364,
"grad_norm": 33.08363519117193,
"learning_rate": 9.734278821851631e-06,
"loss": 2.841123342514038,
"step": 367
},
{
"epoch": 0.5850556438791733,
"grad_norm": 17.003662815071053,
"learning_rate": 9.73129510260912e-06,
"loss": 3.1505885124206543,
"step": 368
},
{
"epoch": 0.5866454689984102,
"grad_norm": 22.111399140586197,
"learning_rate": 9.728295187496444e-06,
"loss": 2.7856974601745605,
"step": 369
},
{
"epoch": 0.5882352941176471,
"grad_norm": 19.583461060784327,
"learning_rate": 9.725279086782719e-06,
"loss": 3.3989617824554443,
"step": 370
},
{
"epoch": 0.589825119236884,
"grad_norm": 11.456541765804014,
"learning_rate": 9.722246810792476e-06,
"loss": 2.9938831329345703,
"step": 371
},
{
"epoch": 0.5914149443561209,
"grad_norm": 8.169487886232476,
"learning_rate": 9.719198369905605e-06,
"loss": 2.4466989040374756,
"step": 372
},
{
"epoch": 0.5930047694753577,
"grad_norm": 7.971907696739063,
"learning_rate": 9.716133774557337e-06,
"loss": 3.164093494415283,
"step": 373
},
{
"epoch": 0.5945945945945946,
"grad_norm": 10.363321904123197,
"learning_rate": 9.713053035238205e-06,
"loss": 2.953866958618164,
"step": 374
},
{
"epoch": 0.5961844197138315,
"grad_norm": 9.777251544649713,
"learning_rate": 9.709956162493996e-06,
"loss": 2.710660457611084,
"step": 375
},
{
"epoch": 0.5977742448330684,
"grad_norm": 14.487810223583852,
"learning_rate": 9.706843166925733e-06,
"loss": 2.712660312652588,
"step": 376
},
{
"epoch": 0.5993640699523053,
"grad_norm": 12.25933759921934,
"learning_rate": 9.70371405918962e-06,
"loss": 2.8972253799438477,
"step": 377
},
{
"epoch": 0.6009538950715422,
"grad_norm": 18.03088885129158,
"learning_rate": 9.700568849997026e-06,
"loss": 3.1258721351623535,
"step": 378
},
{
"epoch": 0.6025437201907791,
"grad_norm": 8.835107319947879,
"learning_rate": 9.69740755011443e-06,
"loss": 2.955259084701538,
"step": 379
},
{
"epoch": 0.604133545310016,
"grad_norm": 9.600351639857102,
"learning_rate": 9.694230170363396e-06,
"loss": 2.7996139526367188,
"step": 380
},
{
"epoch": 0.6057233704292527,
"grad_norm": 8.374042344290787,
"learning_rate": 9.691036721620525e-06,
"loss": 2.9617061614990234,
"step": 381
},
{
"epoch": 0.6073131955484896,
"grad_norm": 16.200759577206647,
"learning_rate": 9.687827214817433e-06,
"loss": 4.232911586761475,
"step": 382
},
{
"epoch": 0.6089030206677265,
"grad_norm": 9.221291164598371,
"learning_rate": 9.6846016609407e-06,
"loss": 2.861079692840576,
"step": 383
},
{
"epoch": 0.6104928457869634,
"grad_norm": 7.14794391477156,
"learning_rate": 9.681360071031835e-06,
"loss": 3.0849013328552246,
"step": 384
},
{
"epoch": 0.6120826709062003,
"grad_norm": 21.29708798650396,
"learning_rate": 9.678102456187246e-06,
"loss": 3.0311594009399414,
"step": 385
},
{
"epoch": 0.6136724960254372,
"grad_norm": 20.074075644381143,
"learning_rate": 9.674828827558194e-06,
"loss": 2.7004942893981934,
"step": 386
},
{
"epoch": 0.615262321144674,
"grad_norm": 14.83050652679666,
"learning_rate": 9.671539196350757e-06,
"loss": 2.589656114578247,
"step": 387
},
{
"epoch": 0.6168521462639109,
"grad_norm": 8.87067945584946,
"learning_rate": 9.668233573825794e-06,
"loss": 2.9575343132019043,
"step": 388
},
{
"epoch": 0.6184419713831478,
"grad_norm": 10.545476536739276,
"learning_rate": 9.664911971298901e-06,
"loss": 2.8987927436828613,
"step": 389
},
{
"epoch": 0.6200317965023847,
"grad_norm": 10.16073064262438,
"learning_rate": 9.661574400140378e-06,
"loss": 2.5674970149993896,
"step": 390
},
{
"epoch": 0.6216216216216216,
"grad_norm": 27.051068878155856,
"learning_rate": 9.658220871775188e-06,
"loss": 3.2474257946014404,
"step": 391
},
{
"epoch": 0.6232114467408585,
"grad_norm": 9.452922834229314,
"learning_rate": 9.654851397682918e-06,
"loss": 2.8457717895507812,
"step": 392
},
{
"epoch": 0.6248012718600954,
"grad_norm": 11.005579432042241,
"learning_rate": 9.651465989397735e-06,
"loss": 2.455747365951538,
"step": 393
},
{
"epoch": 0.6263910969793323,
"grad_norm": 7.215356608742707,
"learning_rate": 9.64806465850836e-06,
"loss": 2.7574803829193115,
"step": 394
},
{
"epoch": 0.6279809220985691,
"grad_norm": 14.962227882431128,
"learning_rate": 9.64464741665801e-06,
"loss": 2.429494857788086,
"step": 395
},
{
"epoch": 0.629570747217806,
"grad_norm": 8.013197856622966,
"learning_rate": 9.641214275544373e-06,
"loss": 2.9387574195861816,
"step": 396
},
{
"epoch": 0.6311605723370429,
"grad_norm": 24.94726816321973,
"learning_rate": 9.637765246919559e-06,
"loss": 2.8494510650634766,
"step": 397
},
{
"epoch": 0.6327503974562798,
"grad_norm": 10.457210193453564,
"learning_rate": 9.634300342590067e-06,
"loss": 2.627678871154785,
"step": 398
},
{
"epoch": 0.6343402225755167,
"grad_norm": 14.716507833574038,
"learning_rate": 9.630819574416735e-06,
"loss": 3.5401620864868164,
"step": 399
},
{
"epoch": 0.6359300476947536,
"grad_norm": 18.887530242887852,
"learning_rate": 9.62732295431471e-06,
"loss": 3.0188817977905273,
"step": 400
},
{
"epoch": 0.6375198728139905,
"grad_norm": 18.229834565392796,
"learning_rate": 9.623810494253403e-06,
"loss": 3.1571972370147705,
"step": 401
},
{
"epoch": 0.6391096979332274,
"grad_norm": 13.56039900606889,
"learning_rate": 9.620282206256442e-06,
"loss": 3.1719672679901123,
"step": 402
},
{
"epoch": 0.6406995230524642,
"grad_norm": 7.963613224338305,
"learning_rate": 9.616738102401641e-06,
"loss": 2.8952155113220215,
"step": 403
},
{
"epoch": 0.6422893481717011,
"grad_norm": 6.335654799572909,
"learning_rate": 9.613178194820952e-06,
"loss": 1.8771438598632812,
"step": 404
},
{
"epoch": 0.643879173290938,
"grad_norm": 15.581044704241915,
"learning_rate": 9.609602495700422e-06,
"loss": 3.0524277687072754,
"step": 405
},
{
"epoch": 0.6454689984101749,
"grad_norm": 12.433857715011595,
"learning_rate": 9.606011017280166e-06,
"loss": 2.399130344390869,
"step": 406
},
{
"epoch": 0.6470588235294118,
"grad_norm": 6.87894749964456,
"learning_rate": 9.602403771854299e-06,
"loss": 2.580353021621704,
"step": 407
},
{
"epoch": 0.6486486486486487,
"grad_norm": 6.444440504558877,
"learning_rate": 9.598780771770916e-06,
"loss": 2.5789973735809326,
"step": 408
},
{
"epoch": 0.6502384737678856,
"grad_norm": 9.629062198154633,
"learning_rate": 9.595142029432044e-06,
"loss": 2.6498067378997803,
"step": 409
},
{
"epoch": 0.6518282988871225,
"grad_norm": 7.644410796929077,
"learning_rate": 9.591487557293595e-06,
"loss": 2.4748358726501465,
"step": 410
},
{
"epoch": 0.6534181240063593,
"grad_norm": 9.511237801081927,
"learning_rate": 9.587817367865328e-06,
"loss": 2.9340078830718994,
"step": 411
},
{
"epoch": 0.6550079491255962,
"grad_norm": 7.5549355398077545,
"learning_rate": 9.5841314737108e-06,
"loss": 3.104971408843994,
"step": 412
},
{
"epoch": 0.6565977742448331,
"grad_norm": 26.222964600786014,
"learning_rate": 9.580429887447334e-06,
"loss": 3.083625316619873,
"step": 413
},
{
"epoch": 0.6581875993640699,
"grad_norm": 19.623632426290964,
"learning_rate": 9.576712621745965e-06,
"loss": 2.4024219512939453,
"step": 414
},
{
"epoch": 0.6597774244833068,
"grad_norm": 10.650849632134157,
"learning_rate": 9.572979689331402e-06,
"loss": 3.272728443145752,
"step": 415
},
{
"epoch": 0.6613672496025437,
"grad_norm": 9.688717423425123,
"learning_rate": 9.569231102981982e-06,
"loss": 2.8303894996643066,
"step": 416
},
{
"epoch": 0.6629570747217806,
"grad_norm": 16.254612607461073,
"learning_rate": 9.56546687552963e-06,
"loss": 3.3961193561553955,
"step": 417
},
{
"epoch": 0.6645468998410174,
"grad_norm": 28.318241565491313,
"learning_rate": 9.56168701985981e-06,
"loss": 3.2787365913391113,
"step": 418
},
{
"epoch": 0.6661367249602543,
"grad_norm": 9.984301715361173,
"learning_rate": 9.557891548911486e-06,
"loss": 2.706429958343506,
"step": 419
},
{
"epoch": 0.6677265500794912,
"grad_norm": 52.69149354170844,
"learning_rate": 9.554080475677075e-06,
"loss": 2.649432897567749,
"step": 420
},
{
"epoch": 0.6693163751987281,
"grad_norm": 10.82457584221695,
"learning_rate": 9.5502538132024e-06,
"loss": 3.203946828842163,
"step": 421
},
{
"epoch": 0.670906200317965,
"grad_norm": 9.824887603835084,
"learning_rate": 9.546411574586649e-06,
"loss": 2.792487859725952,
"step": 422
},
{
"epoch": 0.6724960254372019,
"grad_norm": 10.790171066484273,
"learning_rate": 9.542553772982334e-06,
"loss": 2.542821168899536,
"step": 423
},
{
"epoch": 0.6740858505564388,
"grad_norm": 9.834899956988384,
"learning_rate": 9.538680421595236e-06,
"loss": 3.0764918327331543,
"step": 424
},
{
"epoch": 0.6756756756756757,
"grad_norm": 9.056903123069523,
"learning_rate": 9.534791533684365e-06,
"loss": 2.803356170654297,
"step": 425
},
{
"epoch": 0.6772655007949125,
"grad_norm": 10.761225528100839,
"learning_rate": 9.530887122561917e-06,
"loss": 3.1509580612182617,
"step": 426
},
{
"epoch": 0.6788553259141494,
"grad_norm": 11.131539654369165,
"learning_rate": 9.526967201593225e-06,
"loss": 3.372119903564453,
"step": 427
},
{
"epoch": 0.6804451510333863,
"grad_norm": 33.68779997608416,
"learning_rate": 9.523031784196714e-06,
"loss": 2.6376187801361084,
"step": 428
},
{
"epoch": 0.6820349761526232,
"grad_norm": 22.943648324118026,
"learning_rate": 9.51908088384386e-06,
"loss": 2.1887574195861816,
"step": 429
},
{
"epoch": 0.6836248012718601,
"grad_norm": 20.916906264947617,
"learning_rate": 9.515114514059127e-06,
"loss": 2.9147121906280518,
"step": 430
},
{
"epoch": 0.685214626391097,
"grad_norm": 12.377959096426785,
"learning_rate": 9.51113268841995e-06,
"loss": 2.66879940032959,
"step": 431
},
{
"epoch": 0.6868044515103339,
"grad_norm": 8.735671059314264,
"learning_rate": 9.507135420556658e-06,
"loss": 2.8298702239990234,
"step": 432
},
{
"epoch": 0.6883942766295708,
"grad_norm": 9.704810477955409,
"learning_rate": 9.503122724152445e-06,
"loss": 2.8676247596740723,
"step": 433
},
{
"epoch": 0.6899841017488076,
"grad_norm": 11.241082482696205,
"learning_rate": 9.499094612943323e-06,
"loss": 2.931668758392334,
"step": 434
},
{
"epoch": 0.6915739268680445,
"grad_norm": 9.173049928921404,
"learning_rate": 9.495051100718063e-06,
"loss": 2.5799193382263184,
"step": 435
},
{
"epoch": 0.6931637519872814,
"grad_norm": 18.007659676402852,
"learning_rate": 9.490992201318165e-06,
"loss": 3.0089612007141113,
"step": 436
},
{
"epoch": 0.6947535771065183,
"grad_norm": 11.362416256495003,
"learning_rate": 9.486917928637793e-06,
"loss": 2.896777629852295,
"step": 437
},
{
"epoch": 0.6963434022257552,
"grad_norm": 8.416093561210923,
"learning_rate": 9.482828296623743e-06,
"loss": 2.038195848464966,
"step": 438
},
{
"epoch": 0.6979332273449921,
"grad_norm": 8.48123035061315,
"learning_rate": 9.47872331927538e-06,
"loss": 2.0925214290618896,
"step": 439
},
{
"epoch": 0.699523052464229,
"grad_norm": 13.309219294696476,
"learning_rate": 9.474603010644608e-06,
"loss": 3.1267426013946533,
"step": 440
},
{
"epoch": 0.7011128775834659,
"grad_norm": 19.576731189877943,
"learning_rate": 9.470467384835804e-06,
"loss": 2.386526107788086,
"step": 441
},
{
"epoch": 0.7027027027027027,
"grad_norm": 11.84793161548726,
"learning_rate": 9.466316456005783e-06,
"loss": 2.735654592514038,
"step": 442
},
{
"epoch": 0.7042925278219396,
"grad_norm": 14.370017088694938,
"learning_rate": 9.462150238363737e-06,
"loss": 3.2645516395568848,
"step": 443
},
{
"epoch": 0.7058823529411765,
"grad_norm": 17.806748620572648,
"learning_rate": 9.457968746171202e-06,
"loss": 2.775618076324463,
"step": 444
},
{
"epoch": 0.7074721780604134,
"grad_norm": 15.470507917573313,
"learning_rate": 9.453771993742e-06,
"loss": 3.137962579727173,
"step": 445
},
{
"epoch": 0.7090620031796503,
"grad_norm": 10.518008388501737,
"learning_rate": 9.449559995442184e-06,
"loss": 3.061692237854004,
"step": 446
},
{
"epoch": 0.7106518282988871,
"grad_norm": 10.581699627893139,
"learning_rate": 9.445332765690003e-06,
"loss": 3.165436029434204,
"step": 447
},
{
"epoch": 0.712241653418124,
"grad_norm": 8.91004376713061,
"learning_rate": 9.441090318955843e-06,
"loss": 2.745981216430664,
"step": 448
},
{
"epoch": 0.7138314785373608,
"grad_norm": 30.01216048271335,
"learning_rate": 9.436832669762177e-06,
"loss": 2.914241313934326,
"step": 449
},
{
"epoch": 0.7154213036565977,
"grad_norm": 42.15439355029616,
"learning_rate": 9.432559832683523e-06,
"loss": 2.9794774055480957,
"step": 450
},
{
"epoch": 0.7170111287758346,
"grad_norm": 13.71971441245524,
"learning_rate": 9.428271822346384e-06,
"loss": 2.801947832107544,
"step": 451
},
{
"epoch": 0.7186009538950715,
"grad_norm": 14.134554485370607,
"learning_rate": 9.423968653429207e-06,
"loss": 2.9650607109069824,
"step": 452
},
{
"epoch": 0.7201907790143084,
"grad_norm": 8.604798420906718,
"learning_rate": 9.419650340662329e-06,
"loss": 2.595290184020996,
"step": 453
},
{
"epoch": 0.7217806041335453,
"grad_norm": 11.249268948449823,
"learning_rate": 9.415316898827923e-06,
"loss": 2.633866310119629,
"step": 454
},
{
"epoch": 0.7233704292527822,
"grad_norm": 11.52964004514907,
"learning_rate": 9.410968342759954e-06,
"loss": 3.424924850463867,
"step": 455
},
{
"epoch": 0.724960254372019,
"grad_norm": 17.639877653458488,
"learning_rate": 9.406604687344123e-06,
"loss": 2.369297504425049,
"step": 456
},
{
"epoch": 0.7265500794912559,
"grad_norm": 22.81396466382483,
"learning_rate": 9.402225947517822e-06,
"loss": 2.883362293243408,
"step": 457
},
{
"epoch": 0.7281399046104928,
"grad_norm": 16.222209071079284,
"learning_rate": 9.397832138270073e-06,
"loss": 2.8191261291503906,
"step": 458
},
{
"epoch": 0.7297297297297297,
"grad_norm": 7.895306649079384,
"learning_rate": 9.393423274641489e-06,
"loss": 2.7146449089050293,
"step": 459
},
{
"epoch": 0.7313195548489666,
"grad_norm": 10.89587170199451,
"learning_rate": 9.388999371724212e-06,
"loss": 3.090642213821411,
"step": 460
},
{
"epoch": 0.7329093799682035,
"grad_norm": 23.9918511541304,
"learning_rate": 9.384560444661866e-06,
"loss": 2.4599502086639404,
"step": 461
},
{
"epoch": 0.7344992050874404,
"grad_norm": 17.30770085496429,
"learning_rate": 9.380106508649504e-06,
"loss": 3.157010555267334,
"step": 462
},
{
"epoch": 0.7360890302066773,
"grad_norm": 7.37592450124233,
"learning_rate": 9.37563757893356e-06,
"loss": 3.021430730819702,
"step": 463
},
{
"epoch": 0.7376788553259142,
"grad_norm": 17.999614114821366,
"learning_rate": 9.371153670811792e-06,
"loss": 2.3801822662353516,
"step": 464
},
{
"epoch": 0.739268680445151,
"grad_norm": 18.329872618090533,
"learning_rate": 9.36665479963323e-06,
"loss": 3.0229134559631348,
"step": 465
},
{
"epoch": 0.7408585055643879,
"grad_norm": 16.29800538004355,
"learning_rate": 9.362140980798127e-06,
"loss": 2.883070945739746,
"step": 466
},
{
"epoch": 0.7424483306836248,
"grad_norm": 6.700420005013404,
"learning_rate": 9.357612229757898e-06,
"loss": 2.8372249603271484,
"step": 467
},
{
"epoch": 0.7440381558028617,
"grad_norm": 8.49441227773889,
"learning_rate": 9.353068562015081e-06,
"loss": 2.289818525314331,
"step": 468
},
{
"epoch": 0.7456279809220986,
"grad_norm": 27.79324687080065,
"learning_rate": 9.34850999312327e-06,
"loss": 2.7569055557250977,
"step": 469
},
{
"epoch": 0.7472178060413355,
"grad_norm": 10.233973203234271,
"learning_rate": 9.343936538687071e-06,
"loss": 2.8224129676818848,
"step": 470
},
{
"epoch": 0.7488076311605724,
"grad_norm": 9.536916310154247,
"learning_rate": 9.339348214362042e-06,
"loss": 2.7565484046936035,
"step": 471
},
{
"epoch": 0.7503974562798092,
"grad_norm": 12.660265416201005,
"learning_rate": 9.334745035854646e-06,
"loss": 2.609936475753784,
"step": 472
},
{
"epoch": 0.7519872813990461,
"grad_norm": 6.609631626506061,
"learning_rate": 9.330127018922195e-06,
"loss": 1.537891149520874,
"step": 473
},
{
"epoch": 0.753577106518283,
"grad_norm": 11.839925280210922,
"learning_rate": 9.325494179372787e-06,
"loss": 2.920321464538574,
"step": 474
},
{
"epoch": 0.7551669316375199,
"grad_norm": 8.033768579527674,
"learning_rate": 9.32084653306527e-06,
"loss": 2.322841167449951,
"step": 475
},
{
"epoch": 0.7567567567567568,
"grad_norm": 18.74724817038534,
"learning_rate": 9.316184095909172e-06,
"loss": 3.282191276550293,
"step": 476
},
{
"epoch": 0.7583465818759937,
"grad_norm": 9.591961467690806,
"learning_rate": 9.311506883864652e-06,
"loss": 3.099551200866699,
"step": 477
},
{
"epoch": 0.7599364069952306,
"grad_norm": 6.813041303534929,
"learning_rate": 9.306814912942445e-06,
"loss": 2.680548667907715,
"step": 478
},
{
"epoch": 0.7615262321144675,
"grad_norm": 7.440877828862766,
"learning_rate": 9.302108199203811e-06,
"loss": 3.059520721435547,
"step": 479
},
{
"epoch": 0.7631160572337043,
"grad_norm": 11.063755034328096,
"learning_rate": 9.297386758760476e-06,
"loss": 2.7226760387420654,
"step": 480
},
{
"epoch": 0.7647058823529411,
"grad_norm": 16.03801166420759,
"learning_rate": 9.292650607774576e-06,
"loss": 3.021273374557495,
"step": 481
},
{
"epoch": 0.766295707472178,
"grad_norm": 17.476643714670104,
"learning_rate": 9.287899762458602e-06,
"loss": 3.0549211502075195,
"step": 482
},
{
"epoch": 0.7678855325914149,
"grad_norm": 15.837190093947614,
"learning_rate": 9.283134239075345e-06,
"loss": 2.7466187477111816,
"step": 483
},
{
"epoch": 0.7694753577106518,
"grad_norm": 7.252402922157257,
"learning_rate": 9.278354053937848e-06,
"loss": 3.251795768737793,
"step": 484
},
{
"epoch": 0.7710651828298887,
"grad_norm": 13.96476074258806,
"learning_rate": 9.273559223409336e-06,
"loss": 2.9985158443450928,
"step": 485
},
{
"epoch": 0.7726550079491256,
"grad_norm": 13.605199897553412,
"learning_rate": 9.268749763903171e-06,
"loss": 3.1657190322875977,
"step": 486
},
{
"epoch": 0.7742448330683624,
"grad_norm": 23.870950335808047,
"learning_rate": 9.26392569188279e-06,
"loss": 2.4962430000305176,
"step": 487
},
{
"epoch": 0.7758346581875993,
"grad_norm": 36.442408380505434,
"learning_rate": 9.259087023861649e-06,
"loss": 2.98346209526062,
"step": 488
},
{
"epoch": 0.7774244833068362,
"grad_norm": 19.137833562524634,
"learning_rate": 9.254233776403172e-06,
"loss": 3.3266477584838867,
"step": 489
},
{
"epoch": 0.7790143084260731,
"grad_norm": 8.57517970453567,
"learning_rate": 9.249365966120692e-06,
"loss": 2.872415542602539,
"step": 490
},
{
"epoch": 0.78060413354531,
"grad_norm": 20.351472280664066,
"learning_rate": 9.244483609677384e-06,
"loss": 2.7851204872131348,
"step": 491
},
{
"epoch": 0.7821939586645469,
"grad_norm": 18.738667613531746,
"learning_rate": 9.239586723786223e-06,
"loss": 3.367607593536377,
"step": 492
},
{
"epoch": 0.7837837837837838,
"grad_norm": 11.655735916018001,
"learning_rate": 9.234675325209923e-06,
"loss": 2.86293625831604,
"step": 493
},
{
"epoch": 0.7853736089030207,
"grad_norm": 15.195275789671278,
"learning_rate": 9.229749430760868e-06,
"loss": 3.1182608604431152,
"step": 494
},
{
"epoch": 0.7869634340222575,
"grad_norm": 12.571602562023449,
"learning_rate": 9.224809057301072e-06,
"loss": 3.185694694519043,
"step": 495
},
{
"epoch": 0.7885532591414944,
"grad_norm": 9.37913029848071,
"learning_rate": 9.219854221742106e-06,
"loss": 3.187572956085205,
"step": 496
},
{
"epoch": 0.7901430842607313,
"grad_norm": 10.256423074234739,
"learning_rate": 9.214884941045053e-06,
"loss": 2.5662600994110107,
"step": 497
},
{
"epoch": 0.7917329093799682,
"grad_norm": 18.076215929843727,
"learning_rate": 9.209901232220436e-06,
"loss": 3.0311079025268555,
"step": 498
},
{
"epoch": 0.7933227344992051,
"grad_norm": 16.631781115308666,
"learning_rate": 9.204903112328177e-06,
"loss": 1.7491254806518555,
"step": 499
},
{
"epoch": 0.794912559618442,
"grad_norm": 23.267256685202685,
"learning_rate": 9.19989059847752e-06,
"loss": 2.581984043121338,
"step": 500
},
{
"epoch": 0.7965023847376789,
"grad_norm": 12.378630702599818,
"learning_rate": 9.194863707826987e-06,
"loss": 3.037818193435669,
"step": 501
},
{
"epoch": 0.7980922098569158,
"grad_norm": 8.336093692737641,
"learning_rate": 9.189822457584311e-06,
"loss": 2.6411571502685547,
"step": 502
},
{
"epoch": 0.7996820349761526,
"grad_norm": 8.867434380285168,
"learning_rate": 9.184766865006384e-06,
"loss": 2.9949069023132324,
"step": 503
},
{
"epoch": 0.8012718600953895,
"grad_norm": 11.193965664653925,
"learning_rate": 9.179696947399188e-06,
"loss": 3.144390821456909,
"step": 504
},
{
"epoch": 0.8028616852146264,
"grad_norm": 16.057125696326533,
"learning_rate": 9.174612722117744e-06,
"loss": 2.9249026775360107,
"step": 505
},
{
"epoch": 0.8044515103338633,
"grad_norm": 20.94564798661583,
"learning_rate": 9.169514206566053e-06,
"loss": 2.9030885696411133,
"step": 506
},
{
"epoch": 0.8060413354531002,
"grad_norm": 9.485465282445661,
"learning_rate": 9.164401418197028e-06,
"loss": 2.723435878753662,
"step": 507
},
{
"epoch": 0.8076311605723371,
"grad_norm": 7.97901333149408,
"learning_rate": 9.159274374512444e-06,
"loss": 2.268899917602539,
"step": 508
},
{
"epoch": 0.809220985691574,
"grad_norm": 11.56378755396159,
"learning_rate": 9.154133093062874e-06,
"loss": 2.7658634185791016,
"step": 509
},
{
"epoch": 0.8108108108108109,
"grad_norm": 16.318599971485703,
"learning_rate": 9.148977591447625e-06,
"loss": 2.3817219734191895,
"step": 510
},
{
"epoch": 0.8124006359300477,
"grad_norm": 14.944743792365866,
"learning_rate": 9.143807887314686e-06,
"loss": 2.5911664962768555,
"step": 511
},
{
"epoch": 0.8139904610492846,
"grad_norm": 16.811534487344034,
"learning_rate": 9.138623998360662e-06,
"loss": 3.377835988998413,
"step": 512
},
{
"epoch": 0.8155802861685215,
"grad_norm": 7.7776791479934895,
"learning_rate": 9.133425942330711e-06,
"loss": 2.6951489448547363,
"step": 513
},
{
"epoch": 0.8171701112877583,
"grad_norm": 11.502170896327272,
"learning_rate": 9.128213737018493e-06,
"loss": 3.042034149169922,
"step": 514
},
{
"epoch": 0.8187599364069952,
"grad_norm": 10.755740854329733,
"learning_rate": 9.122987400266095e-06,
"loss": 3.1462788581848145,
"step": 515
},
{
"epoch": 0.8203497615262321,
"grad_norm": 8.557466859234387,
"learning_rate": 9.117746949963986e-06,
"loss": 3.2351651191711426,
"step": 516
},
{
"epoch": 0.821939586645469,
"grad_norm": 6.6138108427559,
"learning_rate": 9.112492404050944e-06,
"loss": 2.52327036857605,
"step": 517
},
{
"epoch": 0.8235294117647058,
"grad_norm": 7.739658405948868,
"learning_rate": 9.107223780513997e-06,
"loss": 3.155184030532837,
"step": 518
},
{
"epoch": 0.8251192368839427,
"grad_norm": 14.958547409490388,
"learning_rate": 9.101941097388364e-06,
"loss": 3.060459613800049,
"step": 519
},
{
"epoch": 0.8267090620031796,
"grad_norm": 13.544711692721917,
"learning_rate": 9.096644372757393e-06,
"loss": 2.502777338027954,
"step": 520
},
{
"epoch": 0.8282988871224165,
"grad_norm": 16.67047370206468,
"learning_rate": 9.091333624752497e-06,
"loss": 2.7691304683685303,
"step": 521
},
{
"epoch": 0.8298887122416534,
"grad_norm": 42.77834127326481,
"learning_rate": 9.086008871553088e-06,
"loss": 2.007439136505127,
"step": 522
},
{
"epoch": 0.8314785373608903,
"grad_norm": 10.81903026213424,
"learning_rate": 9.08067013138653e-06,
"loss": 2.60162353515625,
"step": 523
},
{
"epoch": 0.8330683624801272,
"grad_norm": 12.050402588569701,
"learning_rate": 9.07531742252806e-06,
"loss": 3.2098569869995117,
"step": 524
},
{
"epoch": 0.834658187599364,
"grad_norm": 14.320246766204002,
"learning_rate": 9.06995076330073e-06,
"loss": 2.8991613388061523,
"step": 525
},
{
"epoch": 0.8362480127186009,
"grad_norm": 17.41210724230328,
"learning_rate": 9.064570172075349e-06,
"loss": 2.1841237545013428,
"step": 526
},
{
"epoch": 0.8378378378378378,
"grad_norm": 10.548683513908175,
"learning_rate": 9.059175667270417e-06,
"loss": 2.322880744934082,
"step": 527
},
{
"epoch": 0.8394276629570747,
"grad_norm": 12.137179668874367,
"learning_rate": 9.053767267352063e-06,
"loss": 2.756648540496826,
"step": 528
},
{
"epoch": 0.8410174880763116,
"grad_norm": 15.333393821308167,
"learning_rate": 9.048344990833978e-06,
"loss": 2.9139137268066406,
"step": 529
},
{
"epoch": 0.8426073131955485,
"grad_norm": 15.676264866891273,
"learning_rate": 9.042908856277354e-06,
"loss": 1.6564269065856934,
"step": 530
},
{
"epoch": 0.8441971383147854,
"grad_norm": 10.23989787328245,
"learning_rate": 9.037458882290829e-06,
"loss": 2.8947908878326416,
"step": 531
},
{
"epoch": 0.8457869634340223,
"grad_norm": 13.077387466685536,
"learning_rate": 9.031995087530403e-06,
"loss": 2.4343180656433105,
"step": 532
},
{
"epoch": 0.8473767885532592,
"grad_norm": 9.515333984171859,
"learning_rate": 9.026517490699397e-06,
"loss": 2.7388577461242676,
"step": 533
},
{
"epoch": 0.848966613672496,
"grad_norm": 31.050205309879313,
"learning_rate": 9.021026110548372e-06,
"loss": 2.9309582710266113,
"step": 534
},
{
"epoch": 0.8505564387917329,
"grad_norm": 19.203823669695584,
"learning_rate": 9.015520965875073e-06,
"loss": 2.706590175628662,
"step": 535
},
{
"epoch": 0.8521462639109698,
"grad_norm": 14.252051747524709,
"learning_rate": 9.010002075524365e-06,
"loss": 2.7433180809020996,
"step": 536
},
{
"epoch": 0.8537360890302067,
"grad_norm": 22.594564192159826,
"learning_rate": 9.004469458388161e-06,
"loss": 2.817378044128418,
"step": 537
},
{
"epoch": 0.8553259141494436,
"grad_norm": 67.57887294043289,
"learning_rate": 8.99892313340537e-06,
"loss": 2.8166146278381348,
"step": 538
},
{
"epoch": 0.8569157392686805,
"grad_norm": 16.373029641709035,
"learning_rate": 8.993363119561819e-06,
"loss": 2.914787530899048,
"step": 539
},
{
"epoch": 0.8585055643879174,
"grad_norm": 9.294623127371112,
"learning_rate": 8.987789435890196e-06,
"loss": 2.9436442852020264,
"step": 540
},
{
"epoch": 0.8600953895071543,
"grad_norm": 12.96701127912914,
"learning_rate": 8.98220210146998e-06,
"loss": 3.34321928024292,
"step": 541
},
{
"epoch": 0.8616852146263911,
"grad_norm": 8.138125083528765,
"learning_rate": 8.976601135427386e-06,
"loss": 2.543393611907959,
"step": 542
},
{
"epoch": 0.863275039745628,
"grad_norm": 21.988109277338108,
"learning_rate": 8.970986556935282e-06,
"loss": 2.928457021713257,
"step": 543
},
{
"epoch": 0.8648648648648649,
"grad_norm": 8.033779110302003,
"learning_rate": 8.96535838521314e-06,
"loss": 2.7310256958007812,
"step": 544
},
{
"epoch": 0.8664546899841018,
"grad_norm": 7.304588914684417,
"learning_rate": 8.959716639526962e-06,
"loss": 3.030553102493286,
"step": 545
},
{
"epoch": 0.8680445151033387,
"grad_norm": 10.380113781227514,
"learning_rate": 8.954061339189214e-06,
"loss": 2.711671829223633,
"step": 546
},
{
"epoch": 0.8696343402225755,
"grad_norm": 18.269075985818574,
"learning_rate": 8.948392503558763e-06,
"loss": 2.4586758613586426,
"step": 547
},
{
"epoch": 0.8712241653418124,
"grad_norm": 17.86796455198677,
"learning_rate": 8.942710152040807e-06,
"loss": 2.281625270843506,
"step": 548
},
{
"epoch": 0.8728139904610492,
"grad_norm": 11.659312416575938,
"learning_rate": 8.937014304086814e-06,
"loss": 2.8658084869384766,
"step": 549
},
{
"epoch": 0.8744038155802861,
"grad_norm": 9.678259451150254,
"learning_rate": 8.931304979194452e-06,
"loss": 2.6468729972839355,
"step": 550
},
{
"epoch": 0.875993640699523,
"grad_norm": 11.13451090154638,
"learning_rate": 8.925582196907519e-06,
"loss": 2.5170133113861084,
"step": 551
},
{
"epoch": 0.8775834658187599,
"grad_norm": 7.04164151428309,
"learning_rate": 8.91984597681588e-06,
"loss": 2.8820958137512207,
"step": 552
},
{
"epoch": 0.8791732909379968,
"grad_norm": 12.357390772893178,
"learning_rate": 8.914096338555402e-06,
"loss": 3.473822593688965,
"step": 553
},
{
"epoch": 0.8807631160572337,
"grad_norm": 28.33219274401018,
"learning_rate": 8.908333301807886e-06,
"loss": 2.5123298168182373,
"step": 554
},
{
"epoch": 0.8823529411764706,
"grad_norm": 11.030402564865922,
"learning_rate": 8.90255688630099e-06,
"loss": 2.7539072036743164,
"step": 555
},
{
"epoch": 0.8839427662957074,
"grad_norm": 10.922187259016807,
"learning_rate": 8.896767111808177e-06,
"loss": 2.488431453704834,
"step": 556
},
{
"epoch": 0.8855325914149443,
"grad_norm": 15.435384223393465,
"learning_rate": 8.890963998148637e-06,
"loss": 2.2676663398742676,
"step": 557
},
{
"epoch": 0.8871224165341812,
"grad_norm": 11.885270047863468,
"learning_rate": 8.88514756518722e-06,
"loss": 2.365499496459961,
"step": 558
},
{
"epoch": 0.8887122416534181,
"grad_norm": 21.43582746324342,
"learning_rate": 8.879317832834372e-06,
"loss": 3.2253689765930176,
"step": 559
},
{
"epoch": 0.890302066772655,
"grad_norm": 9.087125791490214,
"learning_rate": 8.873474821046066e-06,
"loss": 2.479543685913086,
"step": 560
},
{
"epoch": 0.8918918918918919,
"grad_norm": 14.37236834034769,
"learning_rate": 8.867618549823728e-06,
"loss": 2.6958513259887695,
"step": 561
},
{
"epoch": 0.8934817170111288,
"grad_norm": 26.795955747432917,
"learning_rate": 8.861749039214177e-06,
"loss": 3.0564427375793457,
"step": 562
},
{
"epoch": 0.8950715421303657,
"grad_norm": 14.008735885861535,
"learning_rate": 8.85586630930955e-06,
"loss": 3.0955772399902344,
"step": 563
},
{
"epoch": 0.8966613672496025,
"grad_norm": 17.499999308226226,
"learning_rate": 8.849970380247237e-06,
"loss": 2.753736972808838,
"step": 564
},
{
"epoch": 0.8982511923688394,
"grad_norm": 16.86991628103478,
"learning_rate": 8.844061272209807e-06,
"loss": 3.1091933250427246,
"step": 565
},
{
"epoch": 0.8998410174880763,
"grad_norm": 7.292520353862746,
"learning_rate": 8.838139005424945e-06,
"loss": 2.73673152923584,
"step": 566
},
{
"epoch": 0.9014308426073132,
"grad_norm": 26.10050227785304,
"learning_rate": 8.832203600165383e-06,
"loss": 2.820924758911133,
"step": 567
},
{
"epoch": 0.9030206677265501,
"grad_norm": 10.983586406500114,
"learning_rate": 8.826255076748823e-06,
"loss": 2.9828057289123535,
"step": 568
},
{
"epoch": 0.904610492845787,
"grad_norm": 9.500150629192056,
"learning_rate": 8.820293455537872e-06,
"loss": 2.7929773330688477,
"step": 569
},
{
"epoch": 0.9062003179650239,
"grad_norm": 18.630927541392282,
"learning_rate": 8.814318756939979e-06,
"loss": 2.3121395111083984,
"step": 570
},
{
"epoch": 0.9077901430842608,
"grad_norm": 19.23342906269139,
"learning_rate": 8.808331001407352e-06,
"loss": 2.8814163208007812,
"step": 571
},
{
"epoch": 0.9093799682034976,
"grad_norm": 14.377934501655712,
"learning_rate": 8.802330209436898e-06,
"loss": 3.316739559173584,
"step": 572
},
{
"epoch": 0.9109697933227345,
"grad_norm": 11.871126508660163,
"learning_rate": 8.796316401570146e-06,
"loss": 2.679964780807495,
"step": 573
},
{
"epoch": 0.9125596184419714,
"grad_norm": 13.85556629537282,
"learning_rate": 8.790289598393186e-06,
"loss": 2.9453659057617188,
"step": 574
},
{
"epoch": 0.9141494435612083,
"grad_norm": 8.232252631291905,
"learning_rate": 8.784249820536588e-06,
"loss": 2.6362810134887695,
"step": 575
},
{
"epoch": 0.9157392686804452,
"grad_norm": 15.867140433488505,
"learning_rate": 8.778197088675339e-06,
"loss": 2.6648402214050293,
"step": 576
},
{
"epoch": 0.9173290937996821,
"grad_norm": 13.237751697039622,
"learning_rate": 8.772131423528766e-06,
"loss": 2.9705429077148438,
"step": 577
},
{
"epoch": 0.918918918918919,
"grad_norm": 18.41414811767829,
"learning_rate": 8.766052845860472e-06,
"loss": 1.8093316555023193,
"step": 578
},
{
"epoch": 0.9205087440381559,
"grad_norm": 13.413765564452232,
"learning_rate": 8.759961376478256e-06,
"loss": 3.0572826862335205,
"step": 579
},
{
"epoch": 0.9220985691573926,
"grad_norm": 15.16419866353513,
"learning_rate": 8.753857036234055e-06,
"loss": 3.2078309059143066,
"step": 580
},
{
"epoch": 0.9236883942766295,
"grad_norm": 10.520075066302136,
"learning_rate": 8.747739846023858e-06,
"loss": 2.571777105331421,
"step": 581
},
{
"epoch": 0.9252782193958664,
"grad_norm": 17.393520422365786,
"learning_rate": 8.741609826787644e-06,
"loss": 2.815624713897705,
"step": 582
},
{
"epoch": 0.9268680445151033,
"grad_norm": 12.114943627687087,
"learning_rate": 8.73546699950931e-06,
"loss": 2.674105644226074,
"step": 583
},
{
"epoch": 0.9284578696343402,
"grad_norm": 20.829494890927517,
"learning_rate": 8.72931138521659e-06,
"loss": 2.616847515106201,
"step": 584
},
{
"epoch": 0.9300476947535771,
"grad_norm": 18.7974227472542,
"learning_rate": 8.723143004980995e-06,
"loss": 3.3333654403686523,
"step": 585
},
{
"epoch": 0.931637519872814,
"grad_norm": 9.621624789248896,
"learning_rate": 8.716961879917734e-06,
"loss": 2.8845057487487793,
"step": 586
},
{
"epoch": 0.9332273449920508,
"grad_norm": 7.583388841207975,
"learning_rate": 8.710768031185643e-06,
"loss": 2.532384157180786,
"step": 587
},
{
"epoch": 0.9348171701112877,
"grad_norm": 7.226870112533399,
"learning_rate": 8.704561479987115e-06,
"loss": 2.9145328998565674,
"step": 588
},
{
"epoch": 0.9364069952305246,
"grad_norm": 10.814912634036599,
"learning_rate": 8.698342247568021e-06,
"loss": 3.091761827468872,
"step": 589
},
{
"epoch": 0.9379968203497615,
"grad_norm": 15.997550555584759,
"learning_rate": 8.692110355217646e-06,
"loss": 2.7953693866729736,
"step": 590
},
{
"epoch": 0.9395866454689984,
"grad_norm": 11.411767007121696,
"learning_rate": 8.685865824268608e-06,
"loss": 3.1209115982055664,
"step": 591
},
{
"epoch": 0.9411764705882353,
"grad_norm": 9.582592328840802,
"learning_rate": 8.679608676096793e-06,
"loss": 2.7394025325775146,
"step": 592
},
{
"epoch": 0.9427662957074722,
"grad_norm": 18.39337660729296,
"learning_rate": 8.673338932121274e-06,
"loss": 3.261842966079712,
"step": 593
},
{
"epoch": 0.9443561208267091,
"grad_norm": 10.601794336067574,
"learning_rate": 8.66705661380424e-06,
"loss": 3.0146102905273438,
"step": 594
},
{
"epoch": 0.9459459459459459,
"grad_norm": 12.563607294258158,
"learning_rate": 8.660761742650928e-06,
"loss": 2.659600019454956,
"step": 595
},
{
"epoch": 0.9475357710651828,
"grad_norm": 39.18151668287377,
"learning_rate": 8.654454340209542e-06,
"loss": 2.043147087097168,
"step": 596
},
{
"epoch": 0.9491255961844197,
"grad_norm": 12.18798117002197,
"learning_rate": 8.648134428071182e-06,
"loss": 2.62393856048584,
"step": 597
},
{
"epoch": 0.9507154213036566,
"grad_norm": 25.381543731028497,
"learning_rate": 8.641802027869774e-06,
"loss": 2.586343288421631,
"step": 598
},
{
"epoch": 0.9523052464228935,
"grad_norm": 10.863871219074202,
"learning_rate": 8.635457161281988e-06,
"loss": 2.907933235168457,
"step": 599
},
{
"epoch": 0.9538950715421304,
"grad_norm": 17.560668770317324,
"learning_rate": 8.629099850027172e-06,
"loss": 2.894634962081909,
"step": 600
},
{
"epoch": 0.9554848966613673,
"grad_norm": 15.359181392618892,
"learning_rate": 8.622730115867268e-06,
"loss": 3.1808290481567383,
"step": 601
},
{
"epoch": 0.9570747217806042,
"grad_norm": 19.61490599139797,
"learning_rate": 8.616347980606749e-06,
"loss": 2.564119338989258,
"step": 602
},
{
"epoch": 0.958664546899841,
"grad_norm": 11.460288433511876,
"learning_rate": 8.60995346609254e-06,
"loss": 2.3505280017852783,
"step": 603
},
{
"epoch": 0.9602543720190779,
"grad_norm": 8.178876009589425,
"learning_rate": 8.603546594213935e-06,
"loss": 2.901543617248535,
"step": 604
},
{
"epoch": 0.9618441971383148,
"grad_norm": 12.305038523339514,
"learning_rate": 8.597127386902536e-06,
"loss": 2.8978724479675293,
"step": 605
},
{
"epoch": 0.9634340222575517,
"grad_norm": 8.450718398735082,
"learning_rate": 8.590695866132162e-06,
"loss": 2.6897552013397217,
"step": 606
},
{
"epoch": 0.9650238473767886,
"grad_norm": 28.815336253830008,
"learning_rate": 8.58425205391879e-06,
"loss": 2.6910252571105957,
"step": 607
},
{
"epoch": 0.9666136724960255,
"grad_norm": 26.111368898008163,
"learning_rate": 8.577795972320475e-06,
"loss": 2.798401355743408,
"step": 608
},
{
"epoch": 0.9682034976152624,
"grad_norm": 8.913376825484798,
"learning_rate": 8.571327643437261e-06,
"loss": 2.1879935264587402,
"step": 609
},
{
"epoch": 0.9697933227344993,
"grad_norm": 12.695120776563611,
"learning_rate": 8.564847089411128e-06,
"loss": 2.797454357147217,
"step": 610
},
{
"epoch": 0.9713831478537361,
"grad_norm": 10.59170000095689,
"learning_rate": 8.558354332425893e-06,
"loss": 2.911411762237549,
"step": 611
},
{
"epoch": 0.972972972972973,
"grad_norm": 11.516078755335915,
"learning_rate": 8.551849394707158e-06,
"loss": 3.4041268825531006,
"step": 612
},
{
"epoch": 0.9745627980922098,
"grad_norm": 15.186668948081552,
"learning_rate": 8.545332298522207e-06,
"loss": 2.0779900550842285,
"step": 613
},
{
"epoch": 0.9761526232114467,
"grad_norm": 10.329332615910896,
"learning_rate": 8.538803066179955e-06,
"loss": 2.844508647918701,
"step": 614
},
{
"epoch": 0.9777424483306836,
"grad_norm": 7.134532897405484,
"learning_rate": 8.53226172003086e-06,
"loss": 3.226003646850586,
"step": 615
},
{
"epoch": 0.9793322734499205,
"grad_norm": 7.92503508430859,
"learning_rate": 8.525708282466839e-06,
"loss": 2.8174638748168945,
"step": 616
},
{
"epoch": 0.9809220985691574,
"grad_norm": 13.882957144040324,
"learning_rate": 8.519142775921207e-06,
"loss": 3.629255533218384,
"step": 617
},
{
"epoch": 0.9825119236883942,
"grad_norm": 17.89036699856641,
"learning_rate": 8.512565222868592e-06,
"loss": 2.345249652862549,
"step": 618
},
{
"epoch": 0.9841017488076311,
"grad_norm": 15.385106008821687,
"learning_rate": 8.505975645824858e-06,
"loss": 2.824721097946167,
"step": 619
},
{
"epoch": 0.985691573926868,
"grad_norm": 8.342182862592603,
"learning_rate": 8.499374067347026e-06,
"loss": 2.341355562210083,
"step": 620
},
{
"epoch": 0.9872813990461049,
"grad_norm": 9.78424313321291,
"learning_rate": 8.492760510033203e-06,
"loss": 2.6399459838867188,
"step": 621
},
{
"epoch": 0.9888712241653418,
"grad_norm": 18.50621272333162,
"learning_rate": 8.486134996522502e-06,
"loss": 2.872849941253662,
"step": 622
},
{
"epoch": 0.9904610492845787,
"grad_norm": 16.893372126409265,
"learning_rate": 8.47949754949496e-06,
"loss": 2.854398727416992,
"step": 623
},
{
"epoch": 0.9920508744038156,
"grad_norm": 33.989665060746646,
"learning_rate": 8.472848191671465e-06,
"loss": 2.569676160812378,
"step": 624
},
{
"epoch": 0.9936406995230525,
"grad_norm": 10.364248356846579,
"learning_rate": 8.46618694581368e-06,
"loss": 2.520169734954834,
"step": 625
},
{
"epoch": 0.9952305246422893,
"grad_norm": 9.047743501472581,
"learning_rate": 8.459513834723957e-06,
"loss": 2.9824767112731934,
"step": 626
},
{
"epoch": 0.9968203497615262,
"grad_norm": 15.714759225038321,
"learning_rate": 8.452828881245273e-06,
"loss": 2.8227317333221436,
"step": 627
},
{
"epoch": 0.9984101748807631,
"grad_norm": 8.567173852792775,
"learning_rate": 8.446132108261136e-06,
"loss": 2.5381555557250977,
"step": 628
},
{
"epoch": 1.0,
"grad_norm": 19.503916867140724,
"learning_rate": 8.439423538695515e-06,
"loss": 2.3427681922912598,
"step": 629
},
{
"epoch": 1.0015898251192368,
"grad_norm": 10.202895702857752,
"learning_rate": 8.432703195512761e-06,
"loss": 2.6940202713012695,
"step": 630
},
{
"epoch": 1.0031796502384738,
"grad_norm": 9.200573955983792,
"learning_rate": 8.425971101717528e-06,
"loss": 1.772001028060913,
"step": 631
},
{
"epoch": 1.0047694753577106,
"grad_norm": 10.123855095623277,
"learning_rate": 8.419227280354693e-06,
"loss": 2.2012226581573486,
"step": 632
},
{
"epoch": 1.0063593004769475,
"grad_norm": 13.960221710056468,
"learning_rate": 8.412471754509282e-06,
"loss": 1.5737675428390503,
"step": 633
},
{
"epoch": 1.0079491255961843,
"grad_norm": 12.284994971813214,
"learning_rate": 8.405704547306379e-06,
"loss": 1.8023271560668945,
"step": 634
},
{
"epoch": 1.0095389507154213,
"grad_norm": 7.548097649993748,
"learning_rate": 8.398925681911064e-06,
"loss": 1.4765472412109375,
"step": 635
},
{
"epoch": 1.011128775834658,
"grad_norm": 16.46778155673934,
"learning_rate": 8.392135181528318e-06,
"loss": 1.8113789558410645,
"step": 636
},
{
"epoch": 1.012718600953895,
"grad_norm": 14.205221993999034,
"learning_rate": 8.385333069402952e-06,
"loss": 2.0972166061401367,
"step": 637
},
{
"epoch": 1.0143084260731319,
"grad_norm": 20.2955824590524,
"learning_rate": 8.378519368819528e-06,
"loss": 1.1075962781906128,
"step": 638
},
{
"epoch": 1.0158982511923689,
"grad_norm": 13.78054218173786,
"learning_rate": 8.371694103102272e-06,
"loss": 2.185720205307007,
"step": 639
},
{
"epoch": 1.0174880763116056,
"grad_norm": 11.86981515345048,
"learning_rate": 8.364857295615006e-06,
"loss": 1.1814801692962646,
"step": 640
},
{
"epoch": 1.0190779014308426,
"grad_norm": 9.618387923402087,
"learning_rate": 8.358008969761054e-06,
"loss": 1.6825406551361084,
"step": 641
},
{
"epoch": 1.0206677265500794,
"grad_norm": 9.906258760224635,
"learning_rate": 8.351149148983173e-06,
"loss": 1.643816351890564,
"step": 642
},
{
"epoch": 1.0222575516693164,
"grad_norm": 19.68985006500769,
"learning_rate": 8.344277856763465e-06,
"loss": 1.3807225227355957,
"step": 643
},
{
"epoch": 1.0238473767885532,
"grad_norm": 28.597373473820486,
"learning_rate": 8.337395116623308e-06,
"loss": 0.641170084476471,
"step": 644
},
{
"epoch": 1.0254372019077902,
"grad_norm": 7.082787066403454,
"learning_rate": 8.330500952123259e-06,
"loss": 1.495134711265564,
"step": 645
},
{
"epoch": 1.027027027027027,
"grad_norm": 15.416921380517037,
"learning_rate": 8.323595386862985e-06,
"loss": 2.411254644393921,
"step": 646
},
{
"epoch": 1.028616852146264,
"grad_norm": 14.334933490816235,
"learning_rate": 8.316678444481186e-06,
"loss": 1.6603529453277588,
"step": 647
},
{
"epoch": 1.0302066772655007,
"grad_norm": 7.775666873968019,
"learning_rate": 8.309750148655496e-06,
"loss": 1.343907356262207,
"step": 648
},
{
"epoch": 1.0317965023847377,
"grad_norm": 9.43239062990287,
"learning_rate": 8.302810523102422e-06,
"loss": 1.6101237535476685,
"step": 649
},
{
"epoch": 1.0333863275039745,
"grad_norm": 14.172496355509377,
"learning_rate": 8.295859591577249e-06,
"loss": 1.3219900131225586,
"step": 650
},
{
"epoch": 1.0349761526232115,
"grad_norm": 15.249734205542557,
"learning_rate": 8.288897377873967e-06,
"loss": 1.5491715669631958,
"step": 651
},
{
"epoch": 1.0365659777424483,
"grad_norm": 15.005853149685576,
"learning_rate": 8.281923905825188e-06,
"loss": 1.1344152688980103,
"step": 652
},
{
"epoch": 1.0381558028616853,
"grad_norm": 11.299005381994808,
"learning_rate": 8.274939199302058e-06,
"loss": 0.9863616228103638,
"step": 653
},
{
"epoch": 1.039745627980922,
"grad_norm": 19.033875793735564,
"learning_rate": 8.267943282214182e-06,
"loss": 1.5967910289764404,
"step": 654
},
{
"epoch": 1.041335453100159,
"grad_norm": 12.368037602013091,
"learning_rate": 8.260936178509543e-06,
"loss": 1.2763534784317017,
"step": 655
},
{
"epoch": 1.0429252782193958,
"grad_norm": 11.113205357262565,
"learning_rate": 8.253917912174415e-06,
"loss": 1.4309293031692505,
"step": 656
},
{
"epoch": 1.0445151033386328,
"grad_norm": 19.420228317811137,
"learning_rate": 8.246888507233281e-06,
"loss": 1.7432823181152344,
"step": 657
},
{
"epoch": 1.0461049284578696,
"grad_norm": 12.251793126929448,
"learning_rate": 8.23984798774876e-06,
"loss": 1.3506265878677368,
"step": 658
},
{
"epoch": 1.0476947535771066,
"grad_norm": 14.720427025655871,
"learning_rate": 8.232796377821509e-06,
"loss": 1.5710445642471313,
"step": 659
},
{
"epoch": 1.0492845786963434,
"grad_norm": 12.7661392760613,
"learning_rate": 8.225733701590153e-06,
"loss": 2.116056203842163,
"step": 660
},
{
"epoch": 1.0508744038155804,
"grad_norm": 16.26962811578906,
"learning_rate": 8.218659983231203e-06,
"loss": 1.7300777435302734,
"step": 661
},
{
"epoch": 1.0524642289348172,
"grad_norm": 9.013874583793882,
"learning_rate": 8.211575246958959e-06,
"loss": 1.5254652500152588,
"step": 662
},
{
"epoch": 1.054054054054054,
"grad_norm": 12.516975753036407,
"learning_rate": 8.204479517025445e-06,
"loss": 1.6050835847854614,
"step": 663
},
{
"epoch": 1.055643879173291,
"grad_norm": 8.674526511907334,
"learning_rate": 8.197372817720314e-06,
"loss": 1.4675190448760986,
"step": 664
},
{
"epoch": 1.0572337042925277,
"grad_norm": 14.429611267241105,
"learning_rate": 8.190255173370768e-06,
"loss": 1.2936460971832275,
"step": 665
},
{
"epoch": 1.0588235294117647,
"grad_norm": 10.802671282364987,
"learning_rate": 8.183126608341483e-06,
"loss": 1.7229145765304565,
"step": 666
},
{
"epoch": 1.0604133545310015,
"grad_norm": 13.845554587035924,
"learning_rate": 8.175987147034505e-06,
"loss": 1.276991367340088,
"step": 667
},
{
"epoch": 1.0620031796502385,
"grad_norm": 8.591410251790123,
"learning_rate": 8.168836813889192e-06,
"loss": 1.0878384113311768,
"step": 668
},
{
"epoch": 1.0635930047694753,
"grad_norm": 8.17770282098833,
"learning_rate": 8.161675633382109e-06,
"loss": 1.4470587968826294,
"step": 669
},
{
"epoch": 1.0651828298887123,
"grad_norm": 7.708900221646167,
"learning_rate": 8.154503630026955e-06,
"loss": 2.3226277828216553,
"step": 670
},
{
"epoch": 1.066772655007949,
"grad_norm": 12.815998930989513,
"learning_rate": 8.14732082837448e-06,
"loss": 1.5032761096954346,
"step": 671
},
{
"epoch": 1.068362480127186,
"grad_norm": 13.292465683699978,
"learning_rate": 8.140127253012398e-06,
"loss": 1.2072701454162598,
"step": 672
},
{
"epoch": 1.0699523052464228,
"grad_norm": 14.920491770207613,
"learning_rate": 8.1329229285653e-06,
"loss": 1.2873613834381104,
"step": 673
},
{
"epoch": 1.0715421303656598,
"grad_norm": 12.48917920314591,
"learning_rate": 8.125707879694572e-06,
"loss": 1.3614212274551392,
"step": 674
},
{
"epoch": 1.0731319554848966,
"grad_norm": 13.758213612800427,
"learning_rate": 8.118482131098316e-06,
"loss": 0.9290915727615356,
"step": 675
},
{
"epoch": 1.0747217806041336,
"grad_norm": 9.095753898258069,
"learning_rate": 8.111245707511253e-06,
"loss": 2.0878541469573975,
"step": 676
},
{
"epoch": 1.0763116057233704,
"grad_norm": 15.04706722861245,
"learning_rate": 8.103998633704657e-06,
"loss": 0.9775704145431519,
"step": 677
},
{
"epoch": 1.0779014308426074,
"grad_norm": 11.071354494668453,
"learning_rate": 8.096740934486247e-06,
"loss": 2.289834499359131,
"step": 678
},
{
"epoch": 1.0794912559618441,
"grad_norm": 9.251271380712895,
"learning_rate": 8.089472634700123e-06,
"loss": 1.649209976196289,
"step": 679
},
{
"epoch": 1.0810810810810811,
"grad_norm": 24.42903087444763,
"learning_rate": 8.082193759226669e-06,
"loss": 1.4703314304351807,
"step": 680
},
{
"epoch": 1.082670906200318,
"grad_norm": 45.72760949916053,
"learning_rate": 8.074904332982469e-06,
"loss": 1.6743850708007812,
"step": 681
},
{
"epoch": 1.084260731319555,
"grad_norm": 16.323601623107166,
"learning_rate": 8.067604380920228e-06,
"loss": 1.056239128112793,
"step": 682
},
{
"epoch": 1.0858505564387917,
"grad_norm": 11.65057660638623,
"learning_rate": 8.060293928028681e-06,
"loss": 1.9537746906280518,
"step": 683
},
{
"epoch": 1.0874403815580287,
"grad_norm": 8.664990565760272,
"learning_rate": 8.052972999332506e-06,
"loss": 1.6714719533920288,
"step": 684
},
{
"epoch": 1.0890302066772655,
"grad_norm": 12.294197165646066,
"learning_rate": 8.045641619892243e-06,
"loss": 2.0577895641326904,
"step": 685
},
{
"epoch": 1.0906200317965025,
"grad_norm": 9.4942774330208,
"learning_rate": 8.038299814804209e-06,
"loss": 1.5982561111450195,
"step": 686
},
{
"epoch": 1.0922098569157392,
"grad_norm": 8.929620325013477,
"learning_rate": 8.030947609200404e-06,
"loss": 1.3098976612091064,
"step": 687
},
{
"epoch": 1.0937996820349762,
"grad_norm": 9.428379383957022,
"learning_rate": 8.023585028248435e-06,
"loss": 1.7451062202453613,
"step": 688
},
{
"epoch": 1.095389507154213,
"grad_norm": 8.148617459246555,
"learning_rate": 8.01621209715142e-06,
"loss": 1.3702692985534668,
"step": 689
},
{
"epoch": 1.09697933227345,
"grad_norm": 12.424080461489897,
"learning_rate": 8.008828841147915e-06,
"loss": 1.6049578189849854,
"step": 690
},
{
"epoch": 1.0985691573926868,
"grad_norm": 11.336668812432293,
"learning_rate": 8.001435285511815e-06,
"loss": 1.5943506956100464,
"step": 691
},
{
"epoch": 1.1001589825119238,
"grad_norm": 9.734477712731705,
"learning_rate": 7.994031455552267e-06,
"loss": 1.1714757680892944,
"step": 692
},
{
"epoch": 1.1017488076311606,
"grad_norm": 8.456057076789037,
"learning_rate": 7.986617376613599e-06,
"loss": 1.6149002313613892,
"step": 693
},
{
"epoch": 1.1033386327503973,
"grad_norm": 10.887262581227894,
"learning_rate": 7.979193074075216e-06,
"loss": 1.3291692733764648,
"step": 694
},
{
"epoch": 1.1049284578696343,
"grad_norm": 16.529055036565882,
"learning_rate": 7.971758573351517e-06,
"loss": 1.4333473443984985,
"step": 695
},
{
"epoch": 1.1065182829888713,
"grad_norm": 16.027442702620533,
"learning_rate": 7.964313899891818e-06,
"loss": 1.6329424381256104,
"step": 696
},
{
"epoch": 1.1081081081081081,
"grad_norm": 11.167845097822756,
"learning_rate": 7.956859079180255e-06,
"loss": 1.4067692756652832,
"step": 697
},
{
"epoch": 1.109697933227345,
"grad_norm": 20.0858437953387,
"learning_rate": 7.949394136735696e-06,
"loss": 1.185004472732544,
"step": 698
},
{
"epoch": 1.1112877583465819,
"grad_norm": 10.101135491074523,
"learning_rate": 7.941919098111662e-06,
"loss": 1.6585707664489746,
"step": 699
},
{
"epoch": 1.1128775834658187,
"grad_norm": 17.35505415719115,
"learning_rate": 7.934433988896233e-06,
"loss": 1.474552869796753,
"step": 700
},
{
"epoch": 1.1144674085850557,
"grad_norm": 10.23106161583393,
"learning_rate": 7.92693883471196e-06,
"loss": 1.6253128051757812,
"step": 701
},
{
"epoch": 1.1160572337042924,
"grad_norm": 16.105820321363836,
"learning_rate": 7.91943366121578e-06,
"loss": 1.8239731788635254,
"step": 702
},
{
"epoch": 1.1176470588235294,
"grad_norm": 13.76306602324918,
"learning_rate": 7.911918494098928e-06,
"loss": 2.7172493934631348,
"step": 703
},
{
"epoch": 1.1192368839427662,
"grad_norm": 11.332518828743096,
"learning_rate": 7.904393359086854e-06,
"loss": 1.7896854877471924,
"step": 704
},
{
"epoch": 1.1208267090620032,
"grad_norm": 21.736127737439453,
"learning_rate": 7.896858281939118e-06,
"loss": 1.4515012502670288,
"step": 705
},
{
"epoch": 1.12241653418124,
"grad_norm": 13.403604508681815,
"learning_rate": 7.889313288449323e-06,
"loss": 0.8405922651290894,
"step": 706
},
{
"epoch": 1.124006359300477,
"grad_norm": 9.592906016715583,
"learning_rate": 7.881758404445012e-06,
"loss": 2.1267611980438232,
"step": 707
},
{
"epoch": 1.1255961844197138,
"grad_norm": 10.32928485222235,
"learning_rate": 7.874193655787586e-06,
"loss": 2.1224472522735596,
"step": 708
},
{
"epoch": 1.1271860095389508,
"grad_norm": 14.678514253644037,
"learning_rate": 7.866619068372217e-06,
"loss": 1.2487913370132446,
"step": 709
},
{
"epoch": 1.1287758346581875,
"grad_norm": 7.59899885047442,
"learning_rate": 7.859034668127749e-06,
"loss": 1.7427008152008057,
"step": 710
},
{
"epoch": 1.1303656597774245,
"grad_norm": 9.848850083387825,
"learning_rate": 7.851440481016623e-06,
"loss": 1.2126924991607666,
"step": 711
},
{
"epoch": 1.1319554848966613,
"grad_norm": 15.671482915827829,
"learning_rate": 7.843836533034784e-06,
"loss": 1.1827189922332764,
"step": 712
},
{
"epoch": 1.1335453100158983,
"grad_norm": 10.250019023250589,
"learning_rate": 7.836222850211579e-06,
"loss": 0.995161771774292,
"step": 713
},
{
"epoch": 1.135135135135135,
"grad_norm": 14.836541179215907,
"learning_rate": 7.828599458609691e-06,
"loss": 1.2809135913848877,
"step": 714
},
{
"epoch": 1.136724960254372,
"grad_norm": 7.69267269624143,
"learning_rate": 7.82096638432503e-06,
"loss": 1.3715252876281738,
"step": 715
},
{
"epoch": 1.1383147853736089,
"grad_norm": 12.400395047939194,
"learning_rate": 7.813323653486654e-06,
"loss": 1.2230970859527588,
"step": 716
},
{
"epoch": 1.1399046104928459,
"grad_norm": 10.468062369029404,
"learning_rate": 7.805671292256671e-06,
"loss": 1.3827756643295288,
"step": 717
},
{
"epoch": 1.1414944356120826,
"grad_norm": 18.099237128900125,
"learning_rate": 7.798009326830167e-06,
"loss": 3.2046289443969727,
"step": 718
},
{
"epoch": 1.1430842607313196,
"grad_norm": 16.498804467781206,
"learning_rate": 7.790337783435093e-06,
"loss": 1.0387102365493774,
"step": 719
},
{
"epoch": 1.1446740858505564,
"grad_norm": 46.94419002015813,
"learning_rate": 7.782656688332194e-06,
"loss": 1.9753658771514893,
"step": 720
},
{
"epoch": 1.1462639109697934,
"grad_norm": 9.385769778995229,
"learning_rate": 7.774966067814906e-06,
"loss": 1.6574186086654663,
"step": 721
},
{
"epoch": 1.1478537360890302,
"grad_norm": 13.56562472812322,
"learning_rate": 7.767265948209278e-06,
"loss": 1.7107985019683838,
"step": 722
},
{
"epoch": 1.1494435612082672,
"grad_norm": 12.854159815575816,
"learning_rate": 7.75955635587387e-06,
"loss": 1.8311526775360107,
"step": 723
},
{
"epoch": 1.151033386327504,
"grad_norm": 11.318847288794403,
"learning_rate": 7.751837317199673e-06,
"loss": 2.2952828407287598,
"step": 724
},
{
"epoch": 1.1526232114467407,
"grad_norm": 8.927949871396361,
"learning_rate": 7.744108858610008e-06,
"loss": 1.077453374862671,
"step": 725
},
{
"epoch": 1.1542130365659777,
"grad_norm": 15.25654710884335,
"learning_rate": 7.73637100656045e-06,
"loss": 1.7947218418121338,
"step": 726
},
{
"epoch": 1.1558028616852147,
"grad_norm": 14.775370524705814,
"learning_rate": 7.728623787538722e-06,
"loss": 1.9251363277435303,
"step": 727
},
{
"epoch": 1.1573926868044515,
"grad_norm": 12.83647833543155,
"learning_rate": 7.720867228064616e-06,
"loss": 1.8598628044128418,
"step": 728
},
{
"epoch": 1.1589825119236883,
"grad_norm": 10.830550994338182,
"learning_rate": 7.713101354689897e-06,
"loss": 1.4215333461761475,
"step": 729
},
{
"epoch": 1.1605723370429253,
"grad_norm": 13.323716181839108,
"learning_rate": 7.705326193998207e-06,
"loss": 3.242117404937744,
"step": 730
},
{
"epoch": 1.1621621621621623,
"grad_norm": 7.33997375680322,
"learning_rate": 7.697541772604988e-06,
"loss": 1.8210642337799072,
"step": 731
},
{
"epoch": 1.163751987281399,
"grad_norm": 14.779318326563873,
"learning_rate": 7.689748117157379e-06,
"loss": 1.1805927753448486,
"step": 732
},
{
"epoch": 1.1653418124006358,
"grad_norm": 11.600067060820573,
"learning_rate": 7.681945254334126e-06,
"loss": 1.0212841033935547,
"step": 733
},
{
"epoch": 1.1669316375198728,
"grad_norm": 11.25816159523179,
"learning_rate": 7.674133210845496e-06,
"loss": 1.844172477722168,
"step": 734
},
{
"epoch": 1.1685214626391096,
"grad_norm": 19.155279561674575,
"learning_rate": 7.666312013433183e-06,
"loss": 1.9130163192749023,
"step": 735
},
{
"epoch": 1.1701112877583466,
"grad_norm": 19.710624892947475,
"learning_rate": 7.658481688870218e-06,
"loss": 1.312086582183838,
"step": 736
},
{
"epoch": 1.1717011128775834,
"grad_norm": 11.4303087593856,
"learning_rate": 7.65064226396087e-06,
"loss": 1.8819104433059692,
"step": 737
},
{
"epoch": 1.1732909379968204,
"grad_norm": 12.468213355805375,
"learning_rate": 7.642793765540561e-06,
"loss": 1.4843418598175049,
"step": 738
},
{
"epoch": 1.1748807631160572,
"grad_norm": 11.022423957178553,
"learning_rate": 7.634936220475777e-06,
"loss": 1.4506335258483887,
"step": 739
},
{
"epoch": 1.1764705882352942,
"grad_norm": 12.175893084008573,
"learning_rate": 7.62706965566397e-06,
"loss": 1.5481715202331543,
"step": 740
},
{
"epoch": 1.178060413354531,
"grad_norm": 16.694869997171303,
"learning_rate": 7.619194098033466e-06,
"loss": 1.3884726762771606,
"step": 741
},
{
"epoch": 1.179650238473768,
"grad_norm": 13.212581507544572,
"learning_rate": 7.611309574543373e-06,
"loss": 1.5078057050704956,
"step": 742
},
{
"epoch": 1.1812400635930047,
"grad_norm": 27.58219967229084,
"learning_rate": 7.603416112183497e-06,
"loss": 3.193087100982666,
"step": 743
},
{
"epoch": 1.1828298887122417,
"grad_norm": 41.878024638548794,
"learning_rate": 7.595513737974238e-06,
"loss": 1.6256263256072998,
"step": 744
},
{
"epoch": 1.1844197138314785,
"grad_norm": 10.48182151817921,
"learning_rate": 7.587602478966503e-06,
"loss": 1.0705622434616089,
"step": 745
},
{
"epoch": 1.1860095389507155,
"grad_norm": 13.769941816011189,
"learning_rate": 7.579682362241613e-06,
"loss": 2.1637659072875977,
"step": 746
},
{
"epoch": 1.1875993640699523,
"grad_norm": 9.491111225073396,
"learning_rate": 7.571753414911213e-06,
"loss": 2.2312355041503906,
"step": 747
},
{
"epoch": 1.1891891891891893,
"grad_norm": 8.897617093166733,
"learning_rate": 7.563815664117173e-06,
"loss": 2.0733022689819336,
"step": 748
},
{
"epoch": 1.190779014308426,
"grad_norm": 13.523802090806276,
"learning_rate": 7.555869137031497e-06,
"loss": 1.4615492820739746,
"step": 749
},
{
"epoch": 1.192368839427663,
"grad_norm": 13.737519147436364,
"learning_rate": 7.547913860856239e-06,
"loss": 1.8079819679260254,
"step": 750
},
{
"epoch": 1.1939586645468998,
"grad_norm": 12.315997979642338,
"learning_rate": 7.5399498628233925e-06,
"loss": 1.159532070159912,
"step": 751
},
{
"epoch": 1.1955484896661368,
"grad_norm": 8.999780532264547,
"learning_rate": 7.531977170194813e-06,
"loss": 0.9958317279815674,
"step": 752
},
{
"epoch": 1.1971383147853736,
"grad_norm": 10.867977410197962,
"learning_rate": 7.52399581026212e-06,
"loss": 1.3158916234970093,
"step": 753
},
{
"epoch": 1.1987281399046106,
"grad_norm": 13.816531992054164,
"learning_rate": 7.5160058103465985e-06,
"loss": 2.40507173538208,
"step": 754
},
{
"epoch": 1.2003179650238474,
"grad_norm": 21.31941893078477,
"learning_rate": 7.508007197799111e-06,
"loss": 1.0883036851882935,
"step": 755
},
{
"epoch": 1.2019077901430844,
"grad_norm": 7.2796164125970515,
"learning_rate": 7.500000000000001e-06,
"loss": 1.3112430572509766,
"step": 756
},
{
"epoch": 1.2034976152623211,
"grad_norm": 7.6642364598545605,
"learning_rate": 7.491984244359003e-06,
"loss": 1.5843225717544556,
"step": 757
},
{
"epoch": 1.2050874403815581,
"grad_norm": 10.412610985057054,
"learning_rate": 7.483959958315143e-06,
"loss": 1.3042569160461426,
"step": 758
},
{
"epoch": 1.206677265500795,
"grad_norm": 12.386068998781987,
"learning_rate": 7.475927169336653e-06,
"loss": 1.1159758567810059,
"step": 759
},
{
"epoch": 1.2082670906200317,
"grad_norm": 10.560876113350165,
"learning_rate": 7.467885904920864e-06,
"loss": 1.8457821607589722,
"step": 760
},
{
"epoch": 1.2098569157392687,
"grad_norm": 6.7816933066214,
"learning_rate": 7.459836192594127e-06,
"loss": 1.314563274383545,
"step": 761
},
{
"epoch": 1.2114467408585057,
"grad_norm": 15.265650006770516,
"learning_rate": 7.451778059911706e-06,
"loss": 1.4867005348205566,
"step": 762
},
{
"epoch": 1.2130365659777425,
"grad_norm": 10.657337325730579,
"learning_rate": 7.4437115344576935e-06,
"loss": 1.0135457515716553,
"step": 763
},
{
"epoch": 1.2146263910969792,
"grad_norm": 14.7213218030471,
"learning_rate": 7.4356366438449065e-06,
"loss": 1.6390702724456787,
"step": 764
},
{
"epoch": 1.2162162162162162,
"grad_norm": 11.865827724018164,
"learning_rate": 7.427553415714801e-06,
"loss": 1.6365562677383423,
"step": 765
},
{
"epoch": 1.217806041335453,
"grad_norm": 16.70042228025528,
"learning_rate": 7.419461877737373e-06,
"loss": 1.411786437034607,
"step": 766
},
{
"epoch": 1.21939586645469,
"grad_norm": 9.721633410028563,
"learning_rate": 7.411362057611065e-06,
"loss": 0.9351043105125427,
"step": 767
},
{
"epoch": 1.2209856915739268,
"grad_norm": 13.790912646142422,
"learning_rate": 7.403253983062665e-06,
"loss": 0.7709986567497253,
"step": 768
},
{
"epoch": 1.2225755166931638,
"grad_norm": 17.60584845779764,
"learning_rate": 7.395137681847223e-06,
"loss": 1.7028567790985107,
"step": 769
},
{
"epoch": 1.2241653418124006,
"grad_norm": 12.669423900706361,
"learning_rate": 7.387013181747949e-06,
"loss": 1.0321797132492065,
"step": 770
},
{
"epoch": 1.2257551669316376,
"grad_norm": 12.734948593506429,
"learning_rate": 7.378880510576115e-06,
"loss": 1.5205578804016113,
"step": 771
},
{
"epoch": 1.2273449920508743,
"grad_norm": 16.313619320103047,
"learning_rate": 7.370739696170971e-06,
"loss": 2.0671231746673584,
"step": 772
},
{
"epoch": 1.2289348171701113,
"grad_norm": 13.651887497636158,
"learning_rate": 7.362590766399635e-06,
"loss": 1.5689630508422852,
"step": 773
},
{
"epoch": 1.230524642289348,
"grad_norm": 12.77935664125304,
"learning_rate": 7.3544337491570075e-06,
"loss": 1.2613396644592285,
"step": 774
},
{
"epoch": 1.232114467408585,
"grad_norm": 6.431511096471851,
"learning_rate": 7.346268672365675e-06,
"loss": 1.390768051147461,
"step": 775
},
{
"epoch": 1.2337042925278219,
"grad_norm": 11.04117437186309,
"learning_rate": 7.338095563975813e-06,
"loss": 1.3204916715621948,
"step": 776
},
{
"epoch": 1.2352941176470589,
"grad_norm": 8.025556102160234,
"learning_rate": 7.329914451965089e-06,
"loss": 1.8049380779266357,
"step": 777
},
{
"epoch": 1.2368839427662957,
"grad_norm": 12.664895874014816,
"learning_rate": 7.321725364338566e-06,
"loss": 2.279134511947632,
"step": 778
},
{
"epoch": 1.2384737678855327,
"grad_norm": 10.40779875172629,
"learning_rate": 7.313528329128613e-06,
"loss": 1.6769804954528809,
"step": 779
},
{
"epoch": 1.2400635930047694,
"grad_norm": 16.92840882244054,
"learning_rate": 7.305323374394802e-06,
"loss": 1.8052300214767456,
"step": 780
},
{
"epoch": 1.2416534181240064,
"grad_norm": 8.142736484313943,
"learning_rate": 7.297110528223817e-06,
"loss": 1.4213504791259766,
"step": 781
},
{
"epoch": 1.2432432432432432,
"grad_norm": 10.747768055674868,
"learning_rate": 7.28888981872935e-06,
"loss": 1.988961100578308,
"step": 782
},
{
"epoch": 1.2448330683624802,
"grad_norm": 18.15875563346072,
"learning_rate": 7.280661274052014e-06,
"loss": 1.5727958679199219,
"step": 783
},
{
"epoch": 1.246422893481717,
"grad_norm": 8.433529741351467,
"learning_rate": 7.272424922359246e-06,
"loss": 1.3556486368179321,
"step": 784
},
{
"epoch": 1.248012718600954,
"grad_norm": 14.194850048972247,
"learning_rate": 7.264180791845201e-06,
"loss": 1.7819693088531494,
"step": 785
},
{
"epoch": 1.2496025437201908,
"grad_norm": 8.397921807638253,
"learning_rate": 7.255928910730669e-06,
"loss": 1.6179646253585815,
"step": 786
},
{
"epoch": 1.2511923688394275,
"grad_norm": 14.386921536746254,
"learning_rate": 7.247669307262964e-06,
"loss": 1.554338812828064,
"step": 787
},
{
"epoch": 1.2527821939586645,
"grad_norm": 12.800105634391487,
"learning_rate": 7.239402009715838e-06,
"loss": 1.8695118427276611,
"step": 788
},
{
"epoch": 1.2543720190779015,
"grad_norm": 15.512989476889842,
"learning_rate": 7.231127046389384e-06,
"loss": 1.8640936613082886,
"step": 789
},
{
"epoch": 1.2559618441971383,
"grad_norm": 14.578922134309607,
"learning_rate": 7.222844445609931e-06,
"loss": 1.0992615222930908,
"step": 790
},
{
"epoch": 1.257551669316375,
"grad_norm": 8.724455013991323,
"learning_rate": 7.214554235729955e-06,
"loss": 1.2543790340423584,
"step": 791
},
{
"epoch": 1.259141494435612,
"grad_norm": 7.692029468393919,
"learning_rate": 7.206256445127977e-06,
"loss": 1.2529809474945068,
"step": 792
},
{
"epoch": 1.260731319554849,
"grad_norm": 14.13800376092366,
"learning_rate": 7.19795110220847e-06,
"loss": 1.3441779613494873,
"step": 793
},
{
"epoch": 1.2623211446740858,
"grad_norm": 9.49380607849554,
"learning_rate": 7.18963823540176e-06,
"loss": 1.3629730939865112,
"step": 794
},
{
"epoch": 1.2639109697933226,
"grad_norm": 12.44600649205267,
"learning_rate": 7.1813178731639255e-06,
"loss": 1.345304012298584,
"step": 795
},
{
"epoch": 1.2655007949125596,
"grad_norm": 10.304580436383407,
"learning_rate": 7.172990043976703e-06,
"loss": 1.1120240688323975,
"step": 796
},
{
"epoch": 1.2670906200317966,
"grad_norm": 13.784064682241143,
"learning_rate": 7.1646547763473916e-06,
"loss": 1.0464750528335571,
"step": 797
},
{
"epoch": 1.2686804451510334,
"grad_norm": 16.03338491525074,
"learning_rate": 7.156312098808753e-06,
"loss": 1.034813404083252,
"step": 798
},
{
"epoch": 1.2702702702702702,
"grad_norm": 16.52768239877676,
"learning_rate": 7.147962039918913e-06,
"loss": 1.8651677370071411,
"step": 799
},
{
"epoch": 1.2718600953895072,
"grad_norm": 7.8097708732231625,
"learning_rate": 7.139604628261265e-06,
"loss": 1.138526201248169,
"step": 800
},
{
"epoch": 1.2734499205087442,
"grad_norm": 9.153076753451778,
"learning_rate": 7.131239892444371e-06,
"loss": 1.4918463230133057,
"step": 801
},
{
"epoch": 1.275039745627981,
"grad_norm": 10.804677171421506,
"learning_rate": 7.122867861101868e-06,
"loss": 1.0829172134399414,
"step": 802
},
{
"epoch": 1.2766295707472177,
"grad_norm": 10.229751180792467,
"learning_rate": 7.114488562892363e-06,
"loss": 1.1312910318374634,
"step": 803
},
{
"epoch": 1.2782193958664547,
"grad_norm": 9.417078438717807,
"learning_rate": 7.106102026499339e-06,
"loss": 1.0001945495605469,
"step": 804
},
{
"epoch": 1.2798092209856915,
"grad_norm": 17.074149708506145,
"learning_rate": 7.097708280631057e-06,
"loss": 1.3354151248931885,
"step": 805
},
{
"epoch": 1.2813990461049285,
"grad_norm": 12.118419172188265,
"learning_rate": 7.089307354020459e-06,
"loss": 1.0924017429351807,
"step": 806
},
{
"epoch": 1.2829888712241653,
"grad_norm": 11.901441327586127,
"learning_rate": 7.080899275425063e-06,
"loss": 1.7671406269073486,
"step": 807
},
{
"epoch": 1.2845786963434023,
"grad_norm": 22.741979314506512,
"learning_rate": 7.072484073626872e-06,
"loss": 1.15092134475708,
"step": 808
},
{
"epoch": 1.286168521462639,
"grad_norm": 12.17848493630658,
"learning_rate": 7.064061777432276e-06,
"loss": 1.0537457466125488,
"step": 809
},
{
"epoch": 1.287758346581876,
"grad_norm": 14.484284581597736,
"learning_rate": 7.055632415671942e-06,
"loss": 2.0027740001678467,
"step": 810
},
{
"epoch": 1.2893481717011128,
"grad_norm": 12.812643289560867,
"learning_rate": 7.047196017200731e-06,
"loss": 1.8905892372131348,
"step": 811
},
{
"epoch": 1.2909379968203498,
"grad_norm": 10.311201489126852,
"learning_rate": 7.038752610897589e-06,
"loss": 2.1269192695617676,
"step": 812
},
{
"epoch": 1.2925278219395866,
"grad_norm": 17.571625875332888,
"learning_rate": 7.03030222566545e-06,
"loss": 1.0327365398406982,
"step": 813
},
{
"epoch": 1.2941176470588236,
"grad_norm": 7.576322051647097,
"learning_rate": 7.021844890431136e-06,
"loss": 1.481746792793274,
"step": 814
},
{
"epoch": 1.2957074721780604,
"grad_norm": 9.000745253510628,
"learning_rate": 7.013380634145264e-06,
"loss": 2.112708330154419,
"step": 815
},
{
"epoch": 1.2972972972972974,
"grad_norm": 8.205491038831578,
"learning_rate": 7.004909485782141e-06,
"loss": 1.0585367679595947,
"step": 816
},
{
"epoch": 1.2988871224165341,
"grad_norm": 15.117454396605899,
"learning_rate": 6.996431474339666e-06,
"loss": 2.16007924079895,
"step": 817
},
{
"epoch": 1.3004769475357711,
"grad_norm": 8.864090810986143,
"learning_rate": 6.987946628839232e-06,
"loss": 1.8724396228790283,
"step": 818
},
{
"epoch": 1.302066772655008,
"grad_norm": 8.531503498684259,
"learning_rate": 6.979454978325625e-06,
"loss": 1.6091532707214355,
"step": 819
},
{
"epoch": 1.303656597774245,
"grad_norm": 7.992191856625111,
"learning_rate": 6.970956551866925e-06,
"loss": 1.0313612222671509,
"step": 820
},
{
"epoch": 1.3052464228934817,
"grad_norm": 6.984013339692948,
"learning_rate": 6.962451378554411e-06,
"loss": 0.973236083984375,
"step": 821
},
{
"epoch": 1.3068362480127185,
"grad_norm": 8.439028091626657,
"learning_rate": 6.9539394875024525e-06,
"loss": 1.5291297435760498,
"step": 822
},
{
"epoch": 1.3084260731319555,
"grad_norm": 11.687167308435836,
"learning_rate": 6.945420907848415e-06,
"loss": 1.6111561059951782,
"step": 823
},
{
"epoch": 1.3100158982511925,
"grad_norm": 17.086783862630924,
"learning_rate": 6.936895668752564e-06,
"loss": 1.6212303638458252,
"step": 824
},
{
"epoch": 1.3116057233704292,
"grad_norm": 16.37346021342094,
"learning_rate": 6.9283637993979565e-06,
"loss": 1.2969826459884644,
"step": 825
},
{
"epoch": 1.313195548489666,
"grad_norm": 10.311683648355972,
"learning_rate": 6.9198253289903515e-06,
"loss": 1.4965565204620361,
"step": 826
},
{
"epoch": 1.314785373608903,
"grad_norm": 15.199989391439724,
"learning_rate": 6.911280286758097e-06,
"loss": 1.3168373107910156,
"step": 827
},
{
"epoch": 1.31637519872814,
"grad_norm": 16.279163654631084,
"learning_rate": 6.902728701952045e-06,
"loss": 2.215139389038086,
"step": 828
},
{
"epoch": 1.3179650238473768,
"grad_norm": 11.490345488333293,
"learning_rate": 6.894170603845436e-06,
"loss": 1.1593304872512817,
"step": 829
},
{
"epoch": 1.3195548489666136,
"grad_norm": 10.651962301231425,
"learning_rate": 6.885606021733814e-06,
"loss": 1.9360640048980713,
"step": 830
},
{
"epoch": 1.3211446740858506,
"grad_norm": 24.69552648567043,
"learning_rate": 6.877034984934912e-06,
"loss": 1.5336499214172363,
"step": 831
},
{
"epoch": 1.3227344992050876,
"grad_norm": 10.99837687708425,
"learning_rate": 6.868457522788561e-06,
"loss": 1.7535721063613892,
"step": 832
},
{
"epoch": 1.3243243243243243,
"grad_norm": 9.662947464026942,
"learning_rate": 6.859873664656588e-06,
"loss": 1.508925437927246,
"step": 833
},
{
"epoch": 1.3259141494435611,
"grad_norm": 17.79955371157225,
"learning_rate": 6.851283439922714e-06,
"loss": 1.1767382621765137,
"step": 834
},
{
"epoch": 1.3275039745627981,
"grad_norm": 18.972019834967206,
"learning_rate": 6.842686877992453e-06,
"loss": 2.279311418533325,
"step": 835
},
{
"epoch": 1.329093799682035,
"grad_norm": 9.823083495354286,
"learning_rate": 6.834084008293009e-06,
"loss": 1.5969994068145752,
"step": 836
},
{
"epoch": 1.330683624801272,
"grad_norm": 17.311712777539558,
"learning_rate": 6.825474860273186e-06,
"loss": 1.2723362445831299,
"step": 837
},
{
"epoch": 1.3322734499205087,
"grad_norm": 18.227029489838436,
"learning_rate": 6.816859463403271e-06,
"loss": 1.6115031242370605,
"step": 838
},
{
"epoch": 1.3338632750397457,
"grad_norm": 11.055264990980145,
"learning_rate": 6.808237847174948e-06,
"loss": 1.4109325408935547,
"step": 839
},
{
"epoch": 1.3354531001589824,
"grad_norm": 11.959325782123942,
"learning_rate": 6.799610041101188e-06,
"loss": 1.4117895364761353,
"step": 840
},
{
"epoch": 1.3370429252782194,
"grad_norm": 9.938907954624465,
"learning_rate": 6.790976074716151e-06,
"loss": 1.3602039813995361,
"step": 841
},
{
"epoch": 1.3386327503974562,
"grad_norm": 13.427330990539836,
"learning_rate": 6.782335977575084e-06,
"loss": 1.29445219039917,
"step": 842
},
{
"epoch": 1.3402225755166932,
"grad_norm": 10.882630644745714,
"learning_rate": 6.773689779254222e-06,
"loss": 3.1862294673919678,
"step": 843
},
{
"epoch": 1.34181240063593,
"grad_norm": 9.840468197614767,
"learning_rate": 6.765037509350685e-06,
"loss": 1.4964901208877563,
"step": 844
},
{
"epoch": 1.343402225755167,
"grad_norm": 8.65407373608341,
"learning_rate": 6.756379197482374e-06,
"loss": 1.8972535133361816,
"step": 845
},
{
"epoch": 1.3449920508744038,
"grad_norm": 10.226098512099883,
"learning_rate": 6.747714873287876e-06,
"loss": 1.2278270721435547,
"step": 846
},
{
"epoch": 1.3465818759936408,
"grad_norm": 14.157256838775108,
"learning_rate": 6.7390445664263586e-06,
"loss": 1.52341628074646,
"step": 847
},
{
"epoch": 1.3481717011128775,
"grad_norm": 16.97191198333951,
"learning_rate": 6.730368306577464e-06,
"loss": 1.5410349369049072,
"step": 848
},
{
"epoch": 1.3497615262321145,
"grad_norm": 14.05811570041505,
"learning_rate": 6.721686123441221e-06,
"loss": 1.2722220420837402,
"step": 849
},
{
"epoch": 1.3513513513513513,
"grad_norm": 14.11019835180354,
"learning_rate": 6.7129980467379265e-06,
"loss": 2.0446019172668457,
"step": 850
},
{
"epoch": 1.3529411764705883,
"grad_norm": 10.718116025458626,
"learning_rate": 6.704304106208056e-06,
"loss": 1.768629789352417,
"step": 851
},
{
"epoch": 1.354531001589825,
"grad_norm": 14.34390925267869,
"learning_rate": 6.695604331612158e-06,
"loss": 1.5219838619232178,
"step": 852
},
{
"epoch": 1.3561208267090619,
"grad_norm": 15.353615186067385,
"learning_rate": 6.686898752730751e-06,
"loss": 2.1381354331970215,
"step": 853
},
{
"epoch": 1.3577106518282989,
"grad_norm": 8.976124092423499,
"learning_rate": 6.678187399364219e-06,
"loss": 1.3002848625183105,
"step": 854
},
{
"epoch": 1.3593004769475359,
"grad_norm": 9.815958640374769,
"learning_rate": 6.669470301332718e-06,
"loss": 1.3151880502700806,
"step": 855
},
{
"epoch": 1.3608903020667726,
"grad_norm": 11.547257227718601,
"learning_rate": 6.660747488476066e-06,
"loss": 1.8249077796936035,
"step": 856
},
{
"epoch": 1.3624801271860094,
"grad_norm": 9.516915628583785,
"learning_rate": 6.652018990653646e-06,
"loss": 1.3392479419708252,
"step": 857
},
{
"epoch": 1.3640699523052464,
"grad_norm": 17.36011863152519,
"learning_rate": 6.643284837744298e-06,
"loss": 2.1219942569732666,
"step": 858
},
{
"epoch": 1.3656597774244834,
"grad_norm": 15.704349365528364,
"learning_rate": 6.6345450596462224e-06,
"loss": 1.3290646076202393,
"step": 859
},
{
"epoch": 1.3672496025437202,
"grad_norm": 18.10725760531691,
"learning_rate": 6.625799686276876e-06,
"loss": 1.8200846910476685,
"step": 860
},
{
"epoch": 1.368839427662957,
"grad_norm": 12.506844604116118,
"learning_rate": 6.617048747572865e-06,
"loss": 2.4371092319488525,
"step": 861
},
{
"epoch": 1.370429252782194,
"grad_norm": 7.80995257903257,
"learning_rate": 6.608292273489851e-06,
"loss": 2.0654964447021484,
"step": 862
},
{
"epoch": 1.372019077901431,
"grad_norm": 12.966837058086185,
"learning_rate": 6.599530294002443e-06,
"loss": 1.5136079788208008,
"step": 863
},
{
"epoch": 1.3736089030206677,
"grad_norm": 13.843991854836343,
"learning_rate": 6.5907628391040945e-06,
"loss": 1.7351160049438477,
"step": 864
},
{
"epoch": 1.3751987281399045,
"grad_norm": 9.5849956241888,
"learning_rate": 6.581989938807001e-06,
"loss": 1.0918192863464355,
"step": 865
},
{
"epoch": 1.3767885532591415,
"grad_norm": 13.699873383622664,
"learning_rate": 6.573211623142002e-06,
"loss": 1.272527813911438,
"step": 866
},
{
"epoch": 1.3783783783783785,
"grad_norm": 20.5345031546365,
"learning_rate": 6.564427922158472e-06,
"loss": 1.753305196762085,
"step": 867
},
{
"epoch": 1.3799682034976153,
"grad_norm": 16.200604524764035,
"learning_rate": 6.555638865924221e-06,
"loss": 1.4228838682174683,
"step": 868
},
{
"epoch": 1.381558028616852,
"grad_norm": 11.07654229906319,
"learning_rate": 6.546844484525389e-06,
"loss": 1.5971556901931763,
"step": 869
},
{
"epoch": 1.383147853736089,
"grad_norm": 8.07355505884821,
"learning_rate": 6.538044808066346e-06,
"loss": 1.8155808448791504,
"step": 870
},
{
"epoch": 1.3847376788553258,
"grad_norm": 16.04438512619071,
"learning_rate": 6.529239866669592e-06,
"loss": 1.3249969482421875,
"step": 871
},
{
"epoch": 1.3863275039745628,
"grad_norm": 6.72430675838066,
"learning_rate": 6.5204296904756405e-06,
"loss": 1.5359678268432617,
"step": 872
},
{
"epoch": 1.3879173290937996,
"grad_norm": 19.07489525371284,
"learning_rate": 6.511614309642933e-06,
"loss": 0.7233240008354187,
"step": 873
},
{
"epoch": 1.3895071542130366,
"grad_norm": 12.401419042352451,
"learning_rate": 6.502793754347721e-06,
"loss": 1.3856028318405151,
"step": 874
},
{
"epoch": 1.3910969793322734,
"grad_norm": 14.058889831537517,
"learning_rate": 6.493968054783973e-06,
"loss": 1.1357369422912598,
"step": 875
},
{
"epoch": 1.3926868044515104,
"grad_norm": 11.845628341686973,
"learning_rate": 6.485137241163266e-06,
"loss": 1.6570309400558472,
"step": 876
},
{
"epoch": 1.3942766295707472,
"grad_norm": 8.769841987385185,
"learning_rate": 6.476301343714682e-06,
"loss": 1.8941020965576172,
"step": 877
},
{
"epoch": 1.3958664546899842,
"grad_norm": 11.197728580579021,
"learning_rate": 6.467460392684706e-06,
"loss": 1.2110662460327148,
"step": 878
},
{
"epoch": 1.397456279809221,
"grad_norm": 22.348478293445435,
"learning_rate": 6.4586144183371215e-06,
"loss": 1.6229068040847778,
"step": 879
},
{
"epoch": 1.399046104928458,
"grad_norm": 17.601335745522142,
"learning_rate": 6.449763450952912e-06,
"loss": 1.4073424339294434,
"step": 880
},
{
"epoch": 1.4006359300476947,
"grad_norm": 9.69254485007978,
"learning_rate": 6.4409075208301454e-06,
"loss": 2.3579235076904297,
"step": 881
},
{
"epoch": 1.4022257551669317,
"grad_norm": 63.767907422838185,
"learning_rate": 6.432046658283882e-06,
"loss": 1.7618337869644165,
"step": 882
},
{
"epoch": 1.4038155802861685,
"grad_norm": 12.355457680018553,
"learning_rate": 6.423180893646068e-06,
"loss": 1.5357401371002197,
"step": 883
},
{
"epoch": 1.4054054054054055,
"grad_norm": 12.449348927552991,
"learning_rate": 6.41431025726543e-06,
"loss": 1.253293514251709,
"step": 884
},
{
"epoch": 1.4069952305246423,
"grad_norm": 10.283883999418105,
"learning_rate": 6.405434779507363e-06,
"loss": 0.9170820713043213,
"step": 885
},
{
"epoch": 1.4085850556438793,
"grad_norm": 8.815585806232317,
"learning_rate": 6.396554490753848e-06,
"loss": 1.651247262954712,
"step": 886
},
{
"epoch": 1.410174880763116,
"grad_norm": 12.925043436380768,
"learning_rate": 6.387669421403324e-06,
"loss": 1.3426076173782349,
"step": 887
},
{
"epoch": 1.4117647058823528,
"grad_norm": 4.816050020353849,
"learning_rate": 6.378779601870598e-06,
"loss": 0.7309417724609375,
"step": 888
},
{
"epoch": 1.4133545310015898,
"grad_norm": 18.174019009330134,
"learning_rate": 6.369885062586741e-06,
"loss": 1.451963186264038,
"step": 889
},
{
"epoch": 1.4149443561208268,
"grad_norm": 21.93877974703329,
"learning_rate": 6.360985833998974e-06,
"loss": 1.6305956840515137,
"step": 890
},
{
"epoch": 1.4165341812400636,
"grad_norm": 9.31424518285767,
"learning_rate": 6.352081946570577e-06,
"loss": 1.859921932220459,
"step": 891
},
{
"epoch": 1.4181240063593004,
"grad_norm": 10.563231176463914,
"learning_rate": 6.343173430780769e-06,
"loss": 1.586233377456665,
"step": 892
},
{
"epoch": 1.4197138314785374,
"grad_norm": 11.50204395935882,
"learning_rate": 6.334260317124623e-06,
"loss": 1.5006359815597534,
"step": 893
},
{
"epoch": 1.4213036565977744,
"grad_norm": 14.898950229103232,
"learning_rate": 6.325342636112945e-06,
"loss": 1.1938085556030273,
"step": 894
},
{
"epoch": 1.4228934817170111,
"grad_norm": 9.523797786916452,
"learning_rate": 6.316420418272176e-06,
"loss": 1.42879056930542,
"step": 895
},
{
"epoch": 1.424483306836248,
"grad_norm": 10.87010950961569,
"learning_rate": 6.3074936941442865e-06,
"loss": 1.358415126800537,
"step": 896
},
{
"epoch": 1.426073131955485,
"grad_norm": 13.930262341030167,
"learning_rate": 6.2985624942866764e-06,
"loss": 1.1303586959838867,
"step": 897
},
{
"epoch": 1.427662957074722,
"grad_norm": 18.54295613105915,
"learning_rate": 6.289626849272062e-06,
"loss": 1.4245703220367432,
"step": 898
},
{
"epoch": 1.4292527821939587,
"grad_norm": 8.732229316644414,
"learning_rate": 6.2806867896883795e-06,
"loss": 1.403051495552063,
"step": 899
},
{
"epoch": 1.4308426073131955,
"grad_norm": 11.823310719194478,
"learning_rate": 6.271742346138676e-06,
"loss": 1.0180282592773438,
"step": 900
},
{
"epoch": 1.4324324324324325,
"grad_norm": 11.29613478139605,
"learning_rate": 6.262793549241003e-06,
"loss": 1.287471055984497,
"step": 901
},
{
"epoch": 1.4340222575516695,
"grad_norm": 13.77247104303683,
"learning_rate": 6.253840429628317e-06,
"loss": 1.5134391784667969,
"step": 902
},
{
"epoch": 1.4356120826709062,
"grad_norm": 14.298318991675389,
"learning_rate": 6.244883017948371e-06,
"loss": 1.1660302877426147,
"step": 903
},
{
"epoch": 1.437201907790143,
"grad_norm": 14.375040695355453,
"learning_rate": 6.2359213448636104e-06,
"loss": 1.808586835861206,
"step": 904
},
{
"epoch": 1.43879173290938,
"grad_norm": 8.861984330110152,
"learning_rate": 6.226955441051067e-06,
"loss": 1.7996587753295898,
"step": 905
},
{
"epoch": 1.4403815580286168,
"grad_norm": 12.94910466867558,
"learning_rate": 6.2179853372022555e-06,
"loss": 1.6222466230392456,
"step": 906
},
{
"epoch": 1.4419713831478538,
"grad_norm": 10.618344021283788,
"learning_rate": 6.209011064023072e-06,
"loss": 1.047587513923645,
"step": 907
},
{
"epoch": 1.4435612082670906,
"grad_norm": 9.516494660750666,
"learning_rate": 6.200032652233674e-06,
"loss": 1.5758092403411865,
"step": 908
},
{
"epoch": 1.4451510333863276,
"grad_norm": 16.19352518570402,
"learning_rate": 6.191050132568397e-06,
"loss": 1.4394742250442505,
"step": 909
},
{
"epoch": 1.4467408585055643,
"grad_norm": 7.677253578283328,
"learning_rate": 6.182063535775634e-06,
"loss": 1.2608493566513062,
"step": 910
},
{
"epoch": 1.4483306836248013,
"grad_norm": 13.912530870059598,
"learning_rate": 6.173072892617737e-06,
"loss": 1.3285424709320068,
"step": 911
},
{
"epoch": 1.449920508744038,
"grad_norm": 11.724953848878124,
"learning_rate": 6.164078233870902e-06,
"loss": 1.33537757396698,
"step": 912
},
{
"epoch": 1.451510333863275,
"grad_norm": 22.12907802469097,
"learning_rate": 6.155079590325079e-06,
"loss": 1.7496384382247925,
"step": 913
},
{
"epoch": 1.4531001589825119,
"grad_norm": 8.76725712308349,
"learning_rate": 6.1460769927838535e-06,
"loss": 1.0802656412124634,
"step": 914
},
{
"epoch": 1.4546899841017489,
"grad_norm": 11.521977294381971,
"learning_rate": 6.137070472064351e-06,
"loss": 1.8613876104354858,
"step": 915
},
{
"epoch": 1.4562798092209857,
"grad_norm": 9.847645207715075,
"learning_rate": 6.1280600589971225e-06,
"loss": 1.8818397521972656,
"step": 916
},
{
"epoch": 1.4578696343402227,
"grad_norm": 9.180918128743544,
"learning_rate": 6.1190457844260434e-06,
"loss": 1.3407704830169678,
"step": 917
},
{
"epoch": 1.4594594594594594,
"grad_norm": 8.21057896116912,
"learning_rate": 6.110027679208208e-06,
"loss": 1.2275454998016357,
"step": 918
},
{
"epoch": 1.4610492845786962,
"grad_norm": 11.421076884146746,
"learning_rate": 6.1010057742138255e-06,
"loss": 0.33396875858306885,
"step": 919
},
{
"epoch": 1.4626391096979332,
"grad_norm": 16.782553590361438,
"learning_rate": 6.091980100326109e-06,
"loss": 1.9670312404632568,
"step": 920
},
{
"epoch": 1.4642289348171702,
"grad_norm": 13.30887232068492,
"learning_rate": 6.082950688441174e-06,
"loss": 1.1424121856689453,
"step": 921
},
{
"epoch": 1.465818759936407,
"grad_norm": 9.503687911554648,
"learning_rate": 6.073917569467934e-06,
"loss": 2.301384925842285,
"step": 922
},
{
"epoch": 1.4674085850556438,
"grad_norm": 9.930986204824418,
"learning_rate": 6.064880774327989e-06,
"loss": 1.5081627368927002,
"step": 923
},
{
"epoch": 1.4689984101748808,
"grad_norm": 14.51109800380038,
"learning_rate": 6.055840333955526e-06,
"loss": 1.5140700340270996,
"step": 924
},
{
"epoch": 1.4705882352941178,
"grad_norm": 14.537439872109701,
"learning_rate": 6.046796279297208e-06,
"loss": 1.4806504249572754,
"step": 925
},
{
"epoch": 1.4721780604133545,
"grad_norm": 6.594814975273433,
"learning_rate": 6.037748641312071e-06,
"loss": 1.6404941082000732,
"step": 926
},
{
"epoch": 1.4737678855325913,
"grad_norm": 11.604362595785672,
"learning_rate": 6.028697450971417e-06,
"loss": 1.1991019248962402,
"step": 927
},
{
"epoch": 1.4753577106518283,
"grad_norm": 10.249079970643205,
"learning_rate": 6.0196427392587085e-06,
"loss": 1.0598170757293701,
"step": 928
},
{
"epoch": 1.4769475357710653,
"grad_norm": 23.586165274015066,
"learning_rate": 6.0105845371694615e-06,
"loss": 2.617990016937256,
"step": 929
},
{
"epoch": 1.478537360890302,
"grad_norm": 10.652102340118171,
"learning_rate": 6.001522875711142e-06,
"loss": 1.792860746383667,
"step": 930
},
{
"epoch": 1.4801271860095389,
"grad_norm": 11.329630653040569,
"learning_rate": 5.992457785903054e-06,
"loss": 1.4832801818847656,
"step": 931
},
{
"epoch": 1.4817170111287759,
"grad_norm": 8.972410054989348,
"learning_rate": 5.983389298776241e-06,
"loss": 1.5356411933898926,
"step": 932
},
{
"epoch": 1.4833068362480128,
"grad_norm": 10.268881109477642,
"learning_rate": 5.974317445373374e-06,
"loss": 1.5654367208480835,
"step": 933
},
{
"epoch": 1.4848966613672496,
"grad_norm": 11.714234165487198,
"learning_rate": 5.96524225674865e-06,
"loss": 1.6659619808197021,
"step": 934
},
{
"epoch": 1.4864864864864864,
"grad_norm": 10.228252789232146,
"learning_rate": 5.956163763967678e-06,
"loss": 1.9939287900924683,
"step": 935
},
{
"epoch": 1.4880763116057234,
"grad_norm": 16.7890095794601,
"learning_rate": 5.947081998107381e-06,
"loss": 1.8214223384857178,
"step": 936
},
{
"epoch": 1.4896661367249602,
"grad_norm": 7.800897781524228,
"learning_rate": 5.937996990255886e-06,
"loss": 1.4987486600875854,
"step": 937
},
{
"epoch": 1.4912559618441972,
"grad_norm": 15.394073679043718,
"learning_rate": 5.928908771512418e-06,
"loss": 1.1124498844146729,
"step": 938
},
{
"epoch": 1.492845786963434,
"grad_norm": 16.106188196353283,
"learning_rate": 5.919817372987192e-06,
"loss": 2.0639588832855225,
"step": 939
},
{
"epoch": 1.494435612082671,
"grad_norm": 11.880407283343887,
"learning_rate": 5.9107228258013085e-06,
"loss": 2.061716079711914,
"step": 940
},
{
"epoch": 1.4960254372019077,
"grad_norm": 8.204725650078993,
"learning_rate": 5.901625161086645e-06,
"loss": 2.1551291942596436,
"step": 941
},
{
"epoch": 1.4976152623211447,
"grad_norm": 17.715945196929077,
"learning_rate": 5.892524409985754e-06,
"loss": 1.2744243144989014,
"step": 942
},
{
"epoch": 1.4992050874403815,
"grad_norm": 11.613878065057113,
"learning_rate": 5.883420603651749e-06,
"loss": 1.4785696268081665,
"step": 943
},
{
"epoch": 1.5007949125596185,
"grad_norm": 27.10249353739021,
"learning_rate": 5.874313773248206e-06,
"loss": 1.4471817016601562,
"step": 944
},
{
"epoch": 1.5023847376788553,
"grad_norm": 21.66926685817701,
"learning_rate": 5.86520394994905e-06,
"loss": 1.6950613260269165,
"step": 945
},
{
"epoch": 1.503974562798092,
"grad_norm": 13.085644815567496,
"learning_rate": 5.856091164938451e-06,
"loss": 1.689048171043396,
"step": 946
},
{
"epoch": 1.505564387917329,
"grad_norm": 8.749648322881661,
"learning_rate": 5.8469754494107215e-06,
"loss": 1.3710697889328003,
"step": 947
},
{
"epoch": 1.507154213036566,
"grad_norm": 10.92476604200737,
"learning_rate": 5.837856834570197e-06,
"loss": 1.0909825563430786,
"step": 948
},
{
"epoch": 1.5087440381558028,
"grad_norm": 10.261657935970968,
"learning_rate": 5.828735351631149e-06,
"loss": 1.0412880182266235,
"step": 949
},
{
"epoch": 1.5103338632750396,
"grad_norm": 8.37623419784984,
"learning_rate": 5.819611031817657e-06,
"loss": 1.4581108093261719,
"step": 950
},
{
"epoch": 1.5119236883942766,
"grad_norm": 8.534526301543458,
"learning_rate": 5.8104839063635164e-06,
"loss": 1.3928742408752441,
"step": 951
},
{
"epoch": 1.5135135135135136,
"grad_norm": 17.1404773700483,
"learning_rate": 5.801354006512127e-06,
"loss": 1.7017488479614258,
"step": 952
},
{
"epoch": 1.5151033386327504,
"grad_norm": 10.78331640625184,
"learning_rate": 5.792221363516386e-06,
"loss": 1.185091495513916,
"step": 953
},
{
"epoch": 1.5166931637519872,
"grad_norm": 13.293730796736146,
"learning_rate": 5.7830860086385746e-06,
"loss": 1.9326300621032715,
"step": 954
},
{
"epoch": 1.5182829888712241,
"grad_norm": 8.801862772509093,
"learning_rate": 5.773947973150265e-06,
"loss": 1.367699384689331,
"step": 955
},
{
"epoch": 1.5198728139904611,
"grad_norm": 9.794675722959532,
"learning_rate": 5.764807288332202e-06,
"loss": 1.971652626991272,
"step": 956
},
{
"epoch": 1.521462639109698,
"grad_norm": 10.324089171093677,
"learning_rate": 5.7556639854741995e-06,
"loss": 1.718085765838623,
"step": 957
},
{
"epoch": 1.5230524642289347,
"grad_norm": 14.973860462918022,
"learning_rate": 5.746518095875033e-06,
"loss": 1.8058040142059326,
"step": 958
},
{
"epoch": 1.5246422893481717,
"grad_norm": 10.33878097613058,
"learning_rate": 5.737369650842334e-06,
"loss": 2.024052143096924,
"step": 959
},
{
"epoch": 1.5262321144674087,
"grad_norm": 10.877213574316285,
"learning_rate": 5.728218681692482e-06,
"loss": 1.4764926433563232,
"step": 960
},
{
"epoch": 1.5278219395866455,
"grad_norm": 12.69209007016564,
"learning_rate": 5.719065219750493e-06,
"loss": 1.2469747066497803,
"step": 961
},
{
"epoch": 1.5294117647058822,
"grad_norm": 61.643149474300564,
"learning_rate": 5.709909296349921e-06,
"loss": 1.5892256498336792,
"step": 962
},
{
"epoch": 1.5310015898251192,
"grad_norm": 10.179597580056537,
"learning_rate": 5.700750942832744e-06,
"loss": 1.0561248064041138,
"step": 963
},
{
"epoch": 1.5325914149443562,
"grad_norm": 11.321189364453208,
"learning_rate": 5.6915901905492586e-06,
"loss": 1.670686960220337,
"step": 964
},
{
"epoch": 1.534181240063593,
"grad_norm": 11.261606261134911,
"learning_rate": 5.682427070857973e-06,
"loss": 1.058276653289795,
"step": 965
},
{
"epoch": 1.5357710651828298,
"grad_norm": 15.864794483406374,
"learning_rate": 5.673261615125498e-06,
"loss": 0.6892185807228088,
"step": 966
},
{
"epoch": 1.5373608903020668,
"grad_norm": 18.230716361815034,
"learning_rate": 5.664093854726442e-06,
"loss": 1.2923883199691772,
"step": 967
},
{
"epoch": 1.5389507154213038,
"grad_norm": 8.837189988255762,
"learning_rate": 5.6549238210433035e-06,
"loss": 1.2818342447280884,
"step": 968
},
{
"epoch": 1.5405405405405406,
"grad_norm": 13.745290482460153,
"learning_rate": 5.6457515454663595e-06,
"loss": 0.8728968501091003,
"step": 969
},
{
"epoch": 1.5421303656597773,
"grad_norm": 14.575907914126436,
"learning_rate": 5.6365770593935665e-06,
"loss": 1.9964404106140137,
"step": 970
},
{
"epoch": 1.5437201907790143,
"grad_norm": 11.633894434963024,
"learning_rate": 5.627400394230443e-06,
"loss": 1.6522562503814697,
"step": 971
},
{
"epoch": 1.5453100158982513,
"grad_norm": 10.625355051973981,
"learning_rate": 5.618221581389971e-06,
"loss": 1.1675052642822266,
"step": 972
},
{
"epoch": 1.5468998410174881,
"grad_norm": 16.143556764927435,
"learning_rate": 5.609040652292479e-06,
"loss": 1.5812228918075562,
"step": 973
},
{
"epoch": 1.548489666136725,
"grad_norm": 22.11039303511628,
"learning_rate": 5.599857638365547e-06,
"loss": 1.923478364944458,
"step": 974
},
{
"epoch": 1.550079491255962,
"grad_norm": 11.631721410256578,
"learning_rate": 5.590672571043883e-06,
"loss": 2.511260986328125,
"step": 975
},
{
"epoch": 1.551669316375199,
"grad_norm": 10.028949234619766,
"learning_rate": 5.581485481769231e-06,
"loss": 2.299067258834839,
"step": 976
},
{
"epoch": 1.5532591414944354,
"grad_norm": 23.468699681971557,
"learning_rate": 5.5722964019902535e-06,
"loss": 1.86583411693573,
"step": 977
},
{
"epoch": 1.5548489666136724,
"grad_norm": 12.304920816212974,
"learning_rate": 5.56310536316243e-06,
"loss": 2.1012837886810303,
"step": 978
},
{
"epoch": 1.5564387917329094,
"grad_norm": 17.576061655061082,
"learning_rate": 5.553912396747938e-06,
"loss": 1.7390809059143066,
"step": 979
},
{
"epoch": 1.5580286168521462,
"grad_norm": 9.199264804010845,
"learning_rate": 5.544717534215562e-06,
"loss": 1.872962474822998,
"step": 980
},
{
"epoch": 1.559618441971383,
"grad_norm": 14.006346639764674,
"learning_rate": 5.535520807040574e-06,
"loss": 1.6516368389129639,
"step": 981
},
{
"epoch": 1.56120826709062,
"grad_norm": 8.25412090051608,
"learning_rate": 5.526322246704628e-06,
"loss": 1.301893711090088,
"step": 982
},
{
"epoch": 1.562798092209857,
"grad_norm": 7.93205855182012,
"learning_rate": 5.517121884695652e-06,
"loss": 1.6840949058532715,
"step": 983
},
{
"epoch": 1.5643879173290938,
"grad_norm": 8.670962811174403,
"learning_rate": 5.507919752507749e-06,
"loss": 1.8640323877334595,
"step": 984
},
{
"epoch": 1.5659777424483305,
"grad_norm": 8.520452058979373,
"learning_rate": 5.498715881641069e-06,
"loss": 1.0217180252075195,
"step": 985
},
{
"epoch": 1.5675675675675675,
"grad_norm": 9.740115136069019,
"learning_rate": 5.489510303601726e-06,
"loss": 1.6914985179901123,
"step": 986
},
{
"epoch": 1.5691573926868045,
"grad_norm": 11.47073775556394,
"learning_rate": 5.480303049901669e-06,
"loss": 1.4903960227966309,
"step": 987
},
{
"epoch": 1.5707472178060413,
"grad_norm": 10.408355607354645,
"learning_rate": 5.471094152058592e-06,
"loss": 1.6876851320266724,
"step": 988
},
{
"epoch": 1.572337042925278,
"grad_norm": 10.029839032638279,
"learning_rate": 5.461883641595804e-06,
"loss": 1.29941987991333,
"step": 989
},
{
"epoch": 1.573926868044515,
"grad_norm": 20.084310299738938,
"learning_rate": 5.4526715500421465e-06,
"loss": 2.3986659049987793,
"step": 990
},
{
"epoch": 1.575516693163752,
"grad_norm": 18.092918310438236,
"learning_rate": 5.443457908931868e-06,
"loss": 1.7800534963607788,
"step": 991
},
{
"epoch": 1.5771065182829889,
"grad_norm": 13.684640461036262,
"learning_rate": 5.434242749804523e-06,
"loss": 1.9280859231948853,
"step": 992
},
{
"epoch": 1.5786963434022256,
"grad_norm": 11.004874677450085,
"learning_rate": 5.42502610420486e-06,
"loss": 1.2366968393325806,
"step": 993
},
{
"epoch": 1.5802861685214626,
"grad_norm": 7.500201196806951,
"learning_rate": 5.415808003682717e-06,
"loss": 1.7288457155227661,
"step": 994
},
{
"epoch": 1.5818759936406996,
"grad_norm": 15.307405528491252,
"learning_rate": 5.406588479792915e-06,
"loss": 1.1540336608886719,
"step": 995
},
{
"epoch": 1.5834658187599364,
"grad_norm": 8.924512960021882,
"learning_rate": 5.397367564095142e-06,
"loss": 1.3142873048782349,
"step": 996
},
{
"epoch": 1.5850556438791732,
"grad_norm": 13.789877615094275,
"learning_rate": 5.388145288153855e-06,
"loss": 0.8072051405906677,
"step": 997
},
{
"epoch": 1.5866454689984102,
"grad_norm": 11.004905111807771,
"learning_rate": 5.378921683538166e-06,
"loss": 1.4714614152908325,
"step": 998
},
{
"epoch": 1.5882352941176472,
"grad_norm": 8.752774902984761,
"learning_rate": 5.369696781821735e-06,
"loss": 2.083068370819092,
"step": 999
},
{
"epoch": 1.589825119236884,
"grad_norm": 8.877980237727733,
"learning_rate": 5.360470614582661e-06,
"loss": 1.6199175119400024,
"step": 1000
},
{
"epoch": 1.5914149443561207,
"grad_norm": 7.5027659446752555,
"learning_rate": 5.351243213403378e-06,
"loss": 0.8739040493965149,
"step": 1001
},
{
"epoch": 1.5930047694753577,
"grad_norm": 15.849688654140593,
"learning_rate": 5.3420146098705404e-06,
"loss": 1.8732268810272217,
"step": 1002
},
{
"epoch": 1.5945945945945947,
"grad_norm": 13.424960671306968,
"learning_rate": 5.33278483557492e-06,
"loss": 1.1949975490570068,
"step": 1003
},
{
"epoch": 1.5961844197138315,
"grad_norm": 8.314079314375906,
"learning_rate": 5.323553922111299e-06,
"loss": 1.6262646913528442,
"step": 1004
},
{
"epoch": 1.5977742448330683,
"grad_norm": 7.52713288008192,
"learning_rate": 5.314321901078355e-06,
"loss": 2.4691824913024902,
"step": 1005
},
{
"epoch": 1.5993640699523053,
"grad_norm": 11.37720619082473,
"learning_rate": 5.305088804078559e-06,
"loss": 1.672831416130066,
"step": 1006
},
{
"epoch": 1.6009538950715423,
"grad_norm": 9.716827384821913,
"learning_rate": 5.295854662718062e-06,
"loss": 1.9149669408798218,
"step": 1007
},
{
"epoch": 1.602543720190779,
"grad_norm": 16.708710476919038,
"learning_rate": 5.286619508606595e-06,
"loss": 1.4306385517120361,
"step": 1008
},
{
"epoch": 1.6041335453100158,
"grad_norm": 9.407323555091757,
"learning_rate": 5.277383373357353e-06,
"loss": 1.8381001949310303,
"step": 1009
},
{
"epoch": 1.6057233704292528,
"grad_norm": 13.934447524856429,
"learning_rate": 5.268146288586893e-06,
"loss": 1.8029513359069824,
"step": 1010
},
{
"epoch": 1.6073131955484896,
"grad_norm": 9.50195517935946,
"learning_rate": 5.258908285915014e-06,
"loss": 1.353687047958374,
"step": 1011
},
{
"epoch": 1.6089030206677264,
"grad_norm": 11.305832651115942,
"learning_rate": 5.249669396964665e-06,
"loss": 1.2117490768432617,
"step": 1012
},
{
"epoch": 1.6104928457869634,
"grad_norm": 13.13493812642368,
"learning_rate": 5.2404296533618285e-06,
"loss": 1.570952296257019,
"step": 1013
},
{
"epoch": 1.6120826709062004,
"grad_norm": 8.042406666850942,
"learning_rate": 5.231189086735406e-06,
"loss": 1.1835918426513672,
"step": 1014
},
{
"epoch": 1.6136724960254372,
"grad_norm": 7.452052581285824,
"learning_rate": 5.221947728717126e-06,
"loss": 1.1483402252197266,
"step": 1015
},
{
"epoch": 1.615262321144674,
"grad_norm": 10.17335158903229,
"learning_rate": 5.212705610941417e-06,
"loss": 1.547670602798462,
"step": 1016
},
{
"epoch": 1.616852146263911,
"grad_norm": 17.268602211235677,
"learning_rate": 5.203462765045313e-06,
"loss": 2.911768913269043,
"step": 1017
},
{
"epoch": 1.618441971383148,
"grad_norm": 10.827239036290603,
"learning_rate": 5.1942192226683385e-06,
"loss": 1.5376920700073242,
"step": 1018
},
{
"epoch": 1.6200317965023847,
"grad_norm": 12.49070377010686,
"learning_rate": 5.184975015452407e-06,
"loss": 2.1733736991882324,
"step": 1019
},
{
"epoch": 1.6216216216216215,
"grad_norm": 9.51294833642187,
"learning_rate": 5.1757301750416996e-06,
"loss": 1.2068171501159668,
"step": 1020
},
{
"epoch": 1.6232114467408585,
"grad_norm": 17.13733775730942,
"learning_rate": 5.166484733082572e-06,
"loss": 2.067399501800537,
"step": 1021
},
{
"epoch": 1.6248012718600955,
"grad_norm": 9.397305167537892,
"learning_rate": 5.157238721223433e-06,
"loss": 1.0969213247299194,
"step": 1022
},
{
"epoch": 1.6263910969793323,
"grad_norm": 14.102433470211162,
"learning_rate": 5.1479921711146495e-06,
"loss": 2.003542184829712,
"step": 1023
},
{
"epoch": 1.627980922098569,
"grad_norm": 11.795355928597715,
"learning_rate": 5.138745114408427e-06,
"loss": 1.537621021270752,
"step": 1024
},
{
"epoch": 1.629570747217806,
"grad_norm": 8.8711281512968,
"learning_rate": 5.1294975827587015e-06,
"loss": 1.231778621673584,
"step": 1025
},
{
"epoch": 1.631160572337043,
"grad_norm": 11.85504205931933,
"learning_rate": 5.1202496078210415e-06,
"loss": 1.3609113693237305,
"step": 1026
},
{
"epoch": 1.6327503974562798,
"grad_norm": 11.946327249887682,
"learning_rate": 5.111001221252528e-06,
"loss": 1.6268502473831177,
"step": 1027
},
{
"epoch": 1.6343402225755166,
"grad_norm": 12.232703013961357,
"learning_rate": 5.101752454711657e-06,
"loss": 1.505003571510315,
"step": 1028
},
{
"epoch": 1.6359300476947536,
"grad_norm": 19.083352218371367,
"learning_rate": 5.092503339858216e-06,
"loss": 1.8680495023727417,
"step": 1029
},
{
"epoch": 1.6375198728139906,
"grad_norm": 14.054082187442837,
"learning_rate": 5.083253908353193e-06,
"loss": 1.8762643337249756,
"step": 1030
},
{
"epoch": 1.6391096979332274,
"grad_norm": 7.699927162964524,
"learning_rate": 5.074004191858656e-06,
"loss": 1.466231346130371,
"step": 1031
},
{
"epoch": 1.6406995230524641,
"grad_norm": 12.776351055451174,
"learning_rate": 5.06475422203765e-06,
"loss": 2.1224594116210938,
"step": 1032
},
{
"epoch": 1.6422893481717011,
"grad_norm": 9.167891892890259,
"learning_rate": 5.055504030554088e-06,
"loss": 0.92369544506073,
"step": 1033
},
{
"epoch": 1.6438791732909381,
"grad_norm": 9.219248419523527,
"learning_rate": 5.046253649072637e-06,
"loss": 1.374680995941162,
"step": 1034
},
{
"epoch": 1.645468998410175,
"grad_norm": 13.856228172472624,
"learning_rate": 5.037003109258619e-06,
"loss": 1.4022250175476074,
"step": 1035
},
{
"epoch": 1.6470588235294117,
"grad_norm": 18.383371823664145,
"learning_rate": 5.0277524427778986e-06,
"loss": 1.4787659645080566,
"step": 1036
},
{
"epoch": 1.6486486486486487,
"grad_norm": 16.679763723284253,
"learning_rate": 5.018501681296772e-06,
"loss": 1.5815346240997314,
"step": 1037
},
{
"epoch": 1.6502384737678857,
"grad_norm": 14.783063826660744,
"learning_rate": 5.00925085648186e-06,
"loss": 1.5328896045684814,
"step": 1038
},
{
"epoch": 1.6518282988871225,
"grad_norm": 17.505178205736698,
"learning_rate": 5e-06,
"loss": 1.2714117765426636,
"step": 1039
},
{
"epoch": 1.6534181240063592,
"grad_norm": 9.665591959431389,
"learning_rate": 4.990749143518141e-06,
"loss": 1.1989299058914185,
"step": 1040
},
{
"epoch": 1.6550079491255962,
"grad_norm": 8.381579498477992,
"learning_rate": 4.9814983187032285e-06,
"loss": 1.792160987854004,
"step": 1041
},
{
"epoch": 1.6565977742448332,
"grad_norm": 12.647260516370203,
"learning_rate": 4.972247557222102e-06,
"loss": 1.248565435409546,
"step": 1042
},
{
"epoch": 1.6581875993640698,
"grad_norm": 11.615346352985599,
"learning_rate": 4.962996890741382e-06,
"loss": 2.1052873134613037,
"step": 1043
},
{
"epoch": 1.6597774244833068,
"grad_norm": 9.539901754318029,
"learning_rate": 4.953746350927365e-06,
"loss": 1.4773461818695068,
"step": 1044
},
{
"epoch": 1.6613672496025438,
"grad_norm": 12.189800981021298,
"learning_rate": 4.944495969445914e-06,
"loss": 1.362640619277954,
"step": 1045
},
{
"epoch": 1.6629570747217806,
"grad_norm": 13.405337529322672,
"learning_rate": 4.9352457779623515e-06,
"loss": 1.1369311809539795,
"step": 1046
},
{
"epoch": 1.6645468998410173,
"grad_norm": 12.750578128848364,
"learning_rate": 4.925995808141345e-06,
"loss": 1.287116289138794,
"step": 1047
},
{
"epoch": 1.6661367249602543,
"grad_norm": 12.715618535880669,
"learning_rate": 4.916746091646808e-06,
"loss": 1.0644608736038208,
"step": 1048
},
{
"epoch": 1.6677265500794913,
"grad_norm": 10.084441280599995,
"learning_rate": 4.907496660141784e-06,
"loss": 1.4485750198364258,
"step": 1049
},
{
"epoch": 1.669316375198728,
"grad_norm": 9.612770128377981,
"learning_rate": 4.898247545288345e-06,
"loss": 1.5343468189239502,
"step": 1050
},
{
"epoch": 1.6709062003179649,
"grad_norm": 15.81650026520017,
"learning_rate": 4.8889987787474716e-06,
"loss": 1.8680295944213867,
"step": 1051
},
{
"epoch": 1.6724960254372019,
"grad_norm": 9.857358251920665,
"learning_rate": 4.879750392178959e-06,
"loss": 1.6718649864196777,
"step": 1052
},
{
"epoch": 1.6740858505564389,
"grad_norm": 8.410780391145309,
"learning_rate": 4.870502417241301e-06,
"loss": 1.4237810373306274,
"step": 1053
},
{
"epoch": 1.6756756756756757,
"grad_norm": 12.84467315414432,
"learning_rate": 4.8612548855915755e-06,
"loss": 1.3827040195465088,
"step": 1054
},
{
"epoch": 1.6772655007949124,
"grad_norm": 9.523992781457146,
"learning_rate": 4.852007828885351e-06,
"loss": 1.0847723484039307,
"step": 1055
},
{
"epoch": 1.6788553259141494,
"grad_norm": 21.357651931004337,
"learning_rate": 4.842761278776569e-06,
"loss": 1.4171757698059082,
"step": 1056
},
{
"epoch": 1.6804451510333864,
"grad_norm": 11.891847367476064,
"learning_rate": 4.833515266917431e-06,
"loss": 1.6752054691314697,
"step": 1057
},
{
"epoch": 1.6820349761526232,
"grad_norm": 13.400296577285918,
"learning_rate": 4.824269824958303e-06,
"loss": 1.6439988613128662,
"step": 1058
},
{
"epoch": 1.68362480127186,
"grad_norm": 12.5119030672635,
"learning_rate": 4.815024984547595e-06,
"loss": 1.913288950920105,
"step": 1059
},
{
"epoch": 1.685214626391097,
"grad_norm": 20.74654892530919,
"learning_rate": 4.805780777331662e-06,
"loss": 1.6886498928070068,
"step": 1060
},
{
"epoch": 1.686804451510334,
"grad_norm": 7.865464674765795,
"learning_rate": 4.796537234954689e-06,
"loss": 1.3677489757537842,
"step": 1061
},
{
"epoch": 1.6883942766295708,
"grad_norm": 10.073894502078147,
"learning_rate": 4.787294389058584e-06,
"loss": 1.2702281475067139,
"step": 1062
},
{
"epoch": 1.6899841017488075,
"grad_norm": 30.369206738847247,
"learning_rate": 4.778052271282875e-06,
"loss": 1.4017125368118286,
"step": 1063
},
{
"epoch": 1.6915739268680445,
"grad_norm": 9.499711488415217,
"learning_rate": 4.7688109132645945e-06,
"loss": 1.7769944667816162,
"step": 1064
},
{
"epoch": 1.6931637519872815,
"grad_norm": 9.145985663104037,
"learning_rate": 4.759570346638174e-06,
"loss": 1.2236988544464111,
"step": 1065
},
{
"epoch": 1.6947535771065183,
"grad_norm": 10.142690351521992,
"learning_rate": 4.750330603035336e-06,
"loss": 1.8145155906677246,
"step": 1066
},
{
"epoch": 1.696343402225755,
"grad_norm": 8.110942874236551,
"learning_rate": 4.7410917140849875e-06,
"loss": 1.3042653799057007,
"step": 1067
},
{
"epoch": 1.697933227344992,
"grad_norm": 15.99942668926325,
"learning_rate": 4.731853711413109e-06,
"loss": 3.3661022186279297,
"step": 1068
},
{
"epoch": 1.699523052464229,
"grad_norm": 17.349050742085467,
"learning_rate": 4.722616626642648e-06,
"loss": 1.630997896194458,
"step": 1069
},
{
"epoch": 1.7011128775834659,
"grad_norm": 9.241213734802546,
"learning_rate": 4.713380491393407e-06,
"loss": 2.056382179260254,
"step": 1070
},
{
"epoch": 1.7027027027027026,
"grad_norm": 7.777110027923069,
"learning_rate": 4.704145337281939e-06,
"loss": 1.5880179405212402,
"step": 1071
},
{
"epoch": 1.7042925278219396,
"grad_norm": 10.501792077584147,
"learning_rate": 4.694911195921443e-06,
"loss": 1.3245116472244263,
"step": 1072
},
{
"epoch": 1.7058823529411766,
"grad_norm": 14.581112130568533,
"learning_rate": 4.685678098921646e-06,
"loss": 1.5311322212219238,
"step": 1073
},
{
"epoch": 1.7074721780604134,
"grad_norm": 23.662013399713306,
"learning_rate": 4.676446077888702e-06,
"loss": 1.4951614141464233,
"step": 1074
},
{
"epoch": 1.7090620031796502,
"grad_norm": 12.80668956016228,
"learning_rate": 4.66721516442508e-06,
"loss": 1.113204002380371,
"step": 1075
},
{
"epoch": 1.7106518282988872,
"grad_norm": 8.974898814582193,
"learning_rate": 4.65798539012946e-06,
"loss": 1.6649549007415771,
"step": 1076
},
{
"epoch": 1.712241653418124,
"grad_norm": 16.18611041927037,
"learning_rate": 4.648756786596623e-06,
"loss": 1.1735410690307617,
"step": 1077
},
{
"epoch": 1.7138314785373607,
"grad_norm": 13.935037443806461,
"learning_rate": 4.6395293854173395e-06,
"loss": 1.265052080154419,
"step": 1078
},
{
"epoch": 1.7154213036565977,
"grad_norm": 7.812844399134349,
"learning_rate": 4.630303218178268e-06,
"loss": 1.7603724002838135,
"step": 1079
},
{
"epoch": 1.7170111287758347,
"grad_norm": 21.589524221730095,
"learning_rate": 4.6210783164618365e-06,
"loss": 1.5719702243804932,
"step": 1080
},
{
"epoch": 1.7186009538950715,
"grad_norm": 10.395595369165518,
"learning_rate": 4.611854711846147e-06,
"loss": 0.8241512775421143,
"step": 1081
},
{
"epoch": 1.7201907790143083,
"grad_norm": 9.214004268952669,
"learning_rate": 4.6026324359048605e-06,
"loss": 0.8531097173690796,
"step": 1082
},
{
"epoch": 1.7217806041335453,
"grad_norm": 28.760024359718773,
"learning_rate": 4.593411520207089e-06,
"loss": 1.7780548334121704,
"step": 1083
},
{
"epoch": 1.7233704292527823,
"grad_norm": 12.82561855274378,
"learning_rate": 4.584191996317285e-06,
"loss": 1.3491740226745605,
"step": 1084
},
{
"epoch": 1.724960254372019,
"grad_norm": 11.42159575316779,
"learning_rate": 4.574973895795142e-06,
"loss": 1.5338444709777832,
"step": 1085
},
{
"epoch": 1.7265500794912558,
"grad_norm": 13.967603062524692,
"learning_rate": 4.565757250195478e-06,
"loss": 1.6991304159164429,
"step": 1086
},
{
"epoch": 1.7281399046104928,
"grad_norm": 8.779947538783102,
"learning_rate": 4.5565420910681334e-06,
"loss": 1.460195779800415,
"step": 1087
},
{
"epoch": 1.7297297297297298,
"grad_norm": 13.016026044250268,
"learning_rate": 4.547328449957855e-06,
"loss": 1.5150680541992188,
"step": 1088
},
{
"epoch": 1.7313195548489666,
"grad_norm": 13.351283400681424,
"learning_rate": 4.538116358404197e-06,
"loss": 1.753143548965454,
"step": 1089
},
{
"epoch": 1.7329093799682034,
"grad_norm": 19.367068157556663,
"learning_rate": 4.528905847941411e-06,
"loss": 2.2308993339538574,
"step": 1090
},
{
"epoch": 1.7344992050874404,
"grad_norm": 44.34625747692627,
"learning_rate": 4.5196969500983315e-06,
"loss": 2.8182756900787354,
"step": 1091
},
{
"epoch": 1.7360890302066774,
"grad_norm": 25.01461465314549,
"learning_rate": 4.510489696398276e-06,
"loss": 0.9819879531860352,
"step": 1092
},
{
"epoch": 1.7376788553259142,
"grad_norm": 10.034790142202231,
"learning_rate": 4.501284118358932e-06,
"loss": 2.123917818069458,
"step": 1093
},
{
"epoch": 1.739268680445151,
"grad_norm": 10.237222093871212,
"learning_rate": 4.492080247492253e-06,
"loss": 1.394527554512024,
"step": 1094
},
{
"epoch": 1.740858505564388,
"grad_norm": 8.342748762012173,
"learning_rate": 4.482878115304349e-06,
"loss": 1.162081003189087,
"step": 1095
},
{
"epoch": 1.742448330683625,
"grad_norm": 17.246089483390268,
"learning_rate": 4.473677753295375e-06,
"loss": 1.4420222043991089,
"step": 1096
},
{
"epoch": 1.7440381558028617,
"grad_norm": 6.643472749032102,
"learning_rate": 4.4644791929594275e-06,
"loss": 1.4935743808746338,
"step": 1097
},
{
"epoch": 1.7456279809220985,
"grad_norm": 14.660507382859036,
"learning_rate": 4.455282465784439e-06,
"loss": 1.5564453601837158,
"step": 1098
},
{
"epoch": 1.7472178060413355,
"grad_norm": 12.368786222061885,
"learning_rate": 4.446087603252063e-06,
"loss": 1.4903934001922607,
"step": 1099
},
{
"epoch": 1.7488076311605725,
"grad_norm": 11.650791064204592,
"learning_rate": 4.436894636837572e-06,
"loss": 1.6506072282791138,
"step": 1100
},
{
"epoch": 1.7503974562798092,
"grad_norm": 8.847823276469398,
"learning_rate": 4.427703598009746e-06,
"loss": 2.0559582710266113,
"step": 1101
},
{
"epoch": 1.751987281399046,
"grad_norm": 17.273679884242704,
"learning_rate": 4.418514518230769e-06,
"loss": 1.9226901531219482,
"step": 1102
},
{
"epoch": 1.753577106518283,
"grad_norm": 11.410574357531535,
"learning_rate": 4.4093274289561175e-06,
"loss": 1.615866780281067,
"step": 1103
},
{
"epoch": 1.75516693163752,
"grad_norm": 7.140395814782854,
"learning_rate": 4.400142361634455e-06,
"loss": 1.7190699577331543,
"step": 1104
},
{
"epoch": 1.7567567567567568,
"grad_norm": 8.70500340975263,
"learning_rate": 4.390959347707521e-06,
"loss": 1.3968921899795532,
"step": 1105
},
{
"epoch": 1.7583465818759936,
"grad_norm": 10.63964214517544,
"learning_rate": 4.381778418610032e-06,
"loss": 1.8509771823883057,
"step": 1106
},
{
"epoch": 1.7599364069952306,
"grad_norm": 12.689878068855194,
"learning_rate": 4.372599605769559e-06,
"loss": 1.6396044492721558,
"step": 1107
},
{
"epoch": 1.7615262321144676,
"grad_norm": 9.633114460073033,
"learning_rate": 4.363422940606435e-06,
"loss": 2.012916326522827,
"step": 1108
},
{
"epoch": 1.7631160572337043,
"grad_norm": 10.88535400055693,
"learning_rate": 4.354248454533642e-06,
"loss": 1.6241707801818848,
"step": 1109
},
{
"epoch": 1.7647058823529411,
"grad_norm": 14.496830646494981,
"learning_rate": 4.3450761789567e-06,
"loss": 1.930153250694275,
"step": 1110
},
{
"epoch": 1.7662957074721781,
"grad_norm": 8.198111506627123,
"learning_rate": 4.33590614527356e-06,
"loss": 1.298691987991333,
"step": 1111
},
{
"epoch": 1.767885532591415,
"grad_norm": 17.898130255991628,
"learning_rate": 4.326738384874504e-06,
"loss": 1.3378024101257324,
"step": 1112
},
{
"epoch": 1.7694753577106517,
"grad_norm": 17.628047594457833,
"learning_rate": 4.3175729291420274e-06,
"loss": 1.0957921743392944,
"step": 1113
},
{
"epoch": 1.7710651828298887,
"grad_norm": 19.940812720592632,
"learning_rate": 4.308409809450742e-06,
"loss": 1.656261682510376,
"step": 1114
},
{
"epoch": 1.7726550079491257,
"grad_norm": 13.828440615939675,
"learning_rate": 4.299249057167257e-06,
"loss": 1.5342800617218018,
"step": 1115
},
{
"epoch": 1.7742448330683624,
"grad_norm": 20.91069601345002,
"learning_rate": 4.29009070365008e-06,
"loss": 2.8556060791015625,
"step": 1116
},
{
"epoch": 1.7758346581875992,
"grad_norm": 11.843670910836927,
"learning_rate": 4.280934780249508e-06,
"loss": 1.4000861644744873,
"step": 1117
},
{
"epoch": 1.7774244833068362,
"grad_norm": 9.403771542463636,
"learning_rate": 4.271781318307521e-06,
"loss": 1.664405345916748,
"step": 1118
},
{
"epoch": 1.7790143084260732,
"grad_norm": 9.385108394505194,
"learning_rate": 4.262630349157668e-06,
"loss": 1.1378430128097534,
"step": 1119
},
{
"epoch": 1.78060413354531,
"grad_norm": 11.862856460689603,
"learning_rate": 4.253481904124968e-06,
"loss": 1.1843236684799194,
"step": 1120
},
{
"epoch": 1.7821939586645468,
"grad_norm": 10.593527889540056,
"learning_rate": 4.244336014525802e-06,
"loss": 1.4108467102050781,
"step": 1121
},
{
"epoch": 1.7837837837837838,
"grad_norm": 9.984790464911894,
"learning_rate": 4.235192711667801e-06,
"loss": 1.5492326021194458,
"step": 1122
},
{
"epoch": 1.7853736089030208,
"grad_norm": 17.193473504259163,
"learning_rate": 4.226052026849737e-06,
"loss": 2.0661959648132324,
"step": 1123
},
{
"epoch": 1.7869634340222575,
"grad_norm": 10.56957621143626,
"learning_rate": 4.216913991361426e-06,
"loss": 1.0835058689117432,
"step": 1124
},
{
"epoch": 1.7885532591414943,
"grad_norm": 13.32367793440441,
"learning_rate": 4.207778636483616e-06,
"loss": 1.1431982517242432,
"step": 1125
},
{
"epoch": 1.7901430842607313,
"grad_norm": 12.912193647240832,
"learning_rate": 4.198645993487872e-06,
"loss": 1.5615761280059814,
"step": 1126
},
{
"epoch": 1.7917329093799683,
"grad_norm": 14.856664790888582,
"learning_rate": 4.1895160936364835e-06,
"loss": 1.387691617012024,
"step": 1127
},
{
"epoch": 1.793322734499205,
"grad_norm": 18.856555512029352,
"learning_rate": 4.180388968182344e-06,
"loss": 2.3428382873535156,
"step": 1128
},
{
"epoch": 1.7949125596184419,
"grad_norm": 13.097645343955634,
"learning_rate": 4.171264648368852e-06,
"loss": 1.2207417488098145,
"step": 1129
},
{
"epoch": 1.7965023847376789,
"grad_norm": 13.416910681636399,
"learning_rate": 4.1621431654298024e-06,
"loss": 1.4143396615982056,
"step": 1130
},
{
"epoch": 1.7980922098569159,
"grad_norm": 15.210472614344281,
"learning_rate": 4.153024550589281e-06,
"loss": 1.1632781028747559,
"step": 1131
},
{
"epoch": 1.7996820349761526,
"grad_norm": 13.475608442079784,
"learning_rate": 4.143908835061551e-06,
"loss": 1.1491469144821167,
"step": 1132
},
{
"epoch": 1.8012718600953894,
"grad_norm": 13.319515226159563,
"learning_rate": 4.134796050050953e-06,
"loss": 1.3703821897506714,
"step": 1133
},
{
"epoch": 1.8028616852146264,
"grad_norm": 14.676186602780401,
"learning_rate": 4.125686226751797e-06,
"loss": 1.8206853866577148,
"step": 1134
},
{
"epoch": 1.8044515103338634,
"grad_norm": 13.940910741736662,
"learning_rate": 4.116579396348253e-06,
"loss": 1.140876293182373,
"step": 1135
},
{
"epoch": 1.8060413354531002,
"grad_norm": 11.407075681936295,
"learning_rate": 4.107475590014249e-06,
"loss": 1.3825416564941406,
"step": 1136
},
{
"epoch": 1.807631160572337,
"grad_norm": 19.395287518899767,
"learning_rate": 4.098374838913357e-06,
"loss": 1.551064133644104,
"step": 1137
},
{
"epoch": 1.809220985691574,
"grad_norm": 13.327024051149664,
"learning_rate": 4.089277174198694e-06,
"loss": 1.6357027292251587,
"step": 1138
},
{
"epoch": 1.810810810810811,
"grad_norm": 9.964726159477475,
"learning_rate": 4.080182627012809e-06,
"loss": 1.3184521198272705,
"step": 1139
},
{
"epoch": 1.8124006359300477,
"grad_norm": 12.739326586882429,
"learning_rate": 4.0710912284875825e-06,
"loss": 1.131117343902588,
"step": 1140
},
{
"epoch": 1.8139904610492845,
"grad_norm": 10.310528998181276,
"learning_rate": 4.062003009744115e-06,
"loss": 0.9722883105278015,
"step": 1141
},
{
"epoch": 1.8155802861685215,
"grad_norm": 9.048014501981426,
"learning_rate": 4.0529180018926204e-06,
"loss": 1.4159162044525146,
"step": 1142
},
{
"epoch": 1.8171701112877583,
"grad_norm": 11.393083494341436,
"learning_rate": 4.0438362360323235e-06,
"loss": 1.8594584465026855,
"step": 1143
},
{
"epoch": 1.818759936406995,
"grad_norm": 9.77361123634561,
"learning_rate": 4.0347577432513515e-06,
"loss": 1.2384434938430786,
"step": 1144
},
{
"epoch": 1.820349761526232,
"grad_norm": 9.69850917062634,
"learning_rate": 4.025682554626627e-06,
"loss": 1.332160234451294,
"step": 1145
},
{
"epoch": 1.821939586645469,
"grad_norm": 8.92969480348373,
"learning_rate": 4.016610701223761e-06,
"loss": 1.4852681159973145,
"step": 1146
},
{
"epoch": 1.8235294117647058,
"grad_norm": 9.888731821630902,
"learning_rate": 4.007542214096947e-06,
"loss": 0.9297858476638794,
"step": 1147
},
{
"epoch": 1.8251192368839426,
"grad_norm": 12.811337259058794,
"learning_rate": 3.99847712428886e-06,
"loss": 1.359275221824646,
"step": 1148
},
{
"epoch": 1.8267090620031796,
"grad_norm": 19.484812837344027,
"learning_rate": 3.98941546283054e-06,
"loss": 1.4129400253295898,
"step": 1149
},
{
"epoch": 1.8282988871224166,
"grad_norm": 9.3270830388869,
"learning_rate": 3.980357260741293e-06,
"loss": 2.279143810272217,
"step": 1150
},
{
"epoch": 1.8298887122416534,
"grad_norm": 16.43903195846325,
"learning_rate": 3.971302549028584e-06,
"loss": 1.1392877101898193,
"step": 1151
},
{
"epoch": 1.8314785373608902,
"grad_norm": 14.606589733785624,
"learning_rate": 3.96225135868793e-06,
"loss": 1.7062054872512817,
"step": 1152
},
{
"epoch": 1.8330683624801272,
"grad_norm": 14.665917808717666,
"learning_rate": 3.953203720702793e-06,
"loss": 1.5431506633758545,
"step": 1153
},
{
"epoch": 1.8346581875993642,
"grad_norm": 13.356989223513242,
"learning_rate": 3.944159666044475e-06,
"loss": 1.9477022886276245,
"step": 1154
},
{
"epoch": 1.836248012718601,
"grad_norm": 8.341193828787407,
"learning_rate": 3.935119225672011e-06,
"loss": 1.216181755065918,
"step": 1155
},
{
"epoch": 1.8378378378378377,
"grad_norm": 10.457325709133471,
"learning_rate": 3.926082430532067e-06,
"loss": 1.0427238941192627,
"step": 1156
},
{
"epoch": 1.8394276629570747,
"grad_norm": 16.0775639146379,
"learning_rate": 3.917049311558826e-06,
"loss": 2.7715682983398438,
"step": 1157
},
{
"epoch": 1.8410174880763117,
"grad_norm": 21.198142682060794,
"learning_rate": 3.908019899673893e-06,
"loss": 1.289351463317871,
"step": 1158
},
{
"epoch": 1.8426073131955485,
"grad_norm": 10.329857163101414,
"learning_rate": 3.898994225786178e-06,
"loss": 2.824537754058838,
"step": 1159
},
{
"epoch": 1.8441971383147853,
"grad_norm": 8.45326317662246,
"learning_rate": 3.889972320791794e-06,
"loss": 1.3787089586257935,
"step": 1160
},
{
"epoch": 1.8457869634340223,
"grad_norm": 9.351953802142202,
"learning_rate": 3.880954215573959e-06,
"loss": 1.7395728826522827,
"step": 1161
},
{
"epoch": 1.8473767885532593,
"grad_norm": 11.805906719129295,
"learning_rate": 3.87193994100288e-06,
"loss": 1.3916877508163452,
"step": 1162
},
{
"epoch": 1.848966613672496,
"grad_norm": 11.13344464632568,
"learning_rate": 3.8629295279356495e-06,
"loss": 1.7816511392593384,
"step": 1163
},
{
"epoch": 1.8505564387917328,
"grad_norm": 13.802686240477355,
"learning_rate": 3.853923007216148e-06,
"loss": 1.5507429838180542,
"step": 1164
},
{
"epoch": 1.8521462639109698,
"grad_norm": 9.43063113054961,
"learning_rate": 3.8449204096749235e-06,
"loss": 1.4492930173873901,
"step": 1165
},
{
"epoch": 1.8537360890302068,
"grad_norm": 13.107457709159075,
"learning_rate": 3.8359217661291e-06,
"loss": 1.2537250518798828,
"step": 1166
},
{
"epoch": 1.8553259141494436,
"grad_norm": 16.879618538572284,
"learning_rate": 3.826927107382266e-06,
"loss": 1.6310523748397827,
"step": 1167
},
{
"epoch": 1.8569157392686804,
"grad_norm": 15.612894861680394,
"learning_rate": 3.817936464224367e-06,
"loss": 1.04306960105896,
"step": 1168
},
{
"epoch": 1.8585055643879174,
"grad_norm": 11.568208703788091,
"learning_rate": 3.8089498674316038e-06,
"loss": 1.5552500486373901,
"step": 1169
},
{
"epoch": 1.8600953895071544,
"grad_norm": 10.170721127512651,
"learning_rate": 3.7999673477663275e-06,
"loss": 1.0391424894332886,
"step": 1170
},
{
"epoch": 1.8616852146263911,
"grad_norm": 13.356876657888899,
"learning_rate": 3.79098893597693e-06,
"loss": 1.7671284675598145,
"step": 1171
},
{
"epoch": 1.863275039745628,
"grad_norm": 10.694613723154546,
"learning_rate": 3.782014662797745e-06,
"loss": 1.4554939270019531,
"step": 1172
},
{
"epoch": 1.864864864864865,
"grad_norm": 11.259540296955532,
"learning_rate": 3.773044558948934e-06,
"loss": 1.2981938123703003,
"step": 1173
},
{
"epoch": 1.866454689984102,
"grad_norm": 9.445366035807636,
"learning_rate": 3.764078655136391e-06,
"loss": 1.6467961072921753,
"step": 1174
},
{
"epoch": 1.8680445151033387,
"grad_norm": 10.085891490611074,
"learning_rate": 3.75511698205163e-06,
"loss": 1.3536697626113892,
"step": 1175
},
{
"epoch": 1.8696343402225755,
"grad_norm": 14.678813192106523,
"learning_rate": 3.7461595703716847e-06,
"loss": 1.0834566354751587,
"step": 1176
},
{
"epoch": 1.8712241653418125,
"grad_norm": 10.338033788368241,
"learning_rate": 3.737206450758999e-06,
"loss": 1.3749675750732422,
"step": 1177
},
{
"epoch": 1.8728139904610492,
"grad_norm": 12.932288807465243,
"learning_rate": 3.7282576538613257e-06,
"loss": 1.0677857398986816,
"step": 1178
},
{
"epoch": 1.874403815580286,
"grad_norm": 14.637002385739367,
"learning_rate": 3.7193132103116204e-06,
"loss": 1.2733405828475952,
"step": 1179
},
{
"epoch": 1.875993640699523,
"grad_norm": 9.054040962514696,
"learning_rate": 3.7103731507279383e-06,
"loss": 1.2556195259094238,
"step": 1180
},
{
"epoch": 1.87758346581876,
"grad_norm": 10.435062065797702,
"learning_rate": 3.7014375057133244e-06,
"loss": 1.6212596893310547,
"step": 1181
},
{
"epoch": 1.8791732909379968,
"grad_norm": 8.755640184498002,
"learning_rate": 3.692506305855713e-06,
"loss": 1.2575325965881348,
"step": 1182
},
{
"epoch": 1.8807631160572336,
"grad_norm": 7.678575585216897,
"learning_rate": 3.683579581727824e-06,
"loss": 1.2312979698181152,
"step": 1183
},
{
"epoch": 1.8823529411764706,
"grad_norm": 9.349154680394342,
"learning_rate": 3.674657363887054e-06,
"loss": 1.901777982711792,
"step": 1184
},
{
"epoch": 1.8839427662957076,
"grad_norm": 9.810069442357346,
"learning_rate": 3.6657396828753777e-06,
"loss": 1.9274979829788208,
"step": 1185
},
{
"epoch": 1.8855325914149443,
"grad_norm": 17.442528159931612,
"learning_rate": 3.656826569219233e-06,
"loss": 2.0956759452819824,
"step": 1186
},
{
"epoch": 1.8871224165341811,
"grad_norm": 14.980699322166306,
"learning_rate": 3.6479180534294266e-06,
"loss": 1.862923502922058,
"step": 1187
},
{
"epoch": 1.8887122416534181,
"grad_norm": 13.712955069068984,
"learning_rate": 3.639014166001028e-06,
"loss": 1.348436951637268,
"step": 1188
},
{
"epoch": 1.890302066772655,
"grad_norm": 20.71826439241576,
"learning_rate": 3.6301149374132615e-06,
"loss": 2.031398296356201,
"step": 1189
},
{
"epoch": 1.8918918918918919,
"grad_norm": 9.594973129413956,
"learning_rate": 3.6212203981294036e-06,
"loss": 1.497904658317566,
"step": 1190
},
{
"epoch": 1.8934817170111287,
"grad_norm": 11.319616227089226,
"learning_rate": 3.612330578596679e-06,
"loss": 1.0648324489593506,
"step": 1191
},
{
"epoch": 1.8950715421303657,
"grad_norm": 10.380379765203418,
"learning_rate": 3.603445509246154e-06,
"loss": 1.3865478038787842,
"step": 1192
},
{
"epoch": 1.8966613672496027,
"grad_norm": 13.477553345089566,
"learning_rate": 3.5945652204926372e-06,
"loss": 1.2981629371643066,
"step": 1193
},
{
"epoch": 1.8982511923688394,
"grad_norm": 12.006104786236229,
"learning_rate": 3.585689742734572e-06,
"loss": 1.333137035369873,
"step": 1194
},
{
"epoch": 1.8998410174880762,
"grad_norm": 8.21852860282474,
"learning_rate": 3.5768191063539326e-06,
"loss": 1.3987650871276855,
"step": 1195
},
{
"epoch": 1.9014308426073132,
"grad_norm": 18.415493771192033,
"learning_rate": 3.567953341716119e-06,
"loss": 1.9830102920532227,
"step": 1196
},
{
"epoch": 1.9030206677265502,
"grad_norm": 9.680446315656202,
"learning_rate": 3.5590924791698567e-06,
"loss": 1.4442445039749146,
"step": 1197
},
{
"epoch": 1.904610492845787,
"grad_norm": 11.630049989862073,
"learning_rate": 3.55023654904709e-06,
"loss": 1.189267873764038,
"step": 1198
},
{
"epoch": 1.9062003179650238,
"grad_norm": 11.072476326883493,
"learning_rate": 3.5413855816628793e-06,
"loss": 1.0022368431091309,
"step": 1199
},
{
"epoch": 1.9077901430842608,
"grad_norm": 10.767005058187586,
"learning_rate": 3.5325396073152964e-06,
"loss": 1.1614134311676025,
"step": 1200
},
{
"epoch": 1.9093799682034978,
"grad_norm": 12.5607678458347,
"learning_rate": 3.5236986562853193e-06,
"loss": 1.1036311388015747,
"step": 1201
},
{
"epoch": 1.9109697933227345,
"grad_norm": 12.982347384348031,
"learning_rate": 3.5148627588367345e-06,
"loss": 2.124173164367676,
"step": 1202
},
{
"epoch": 1.9125596184419713,
"grad_norm": 14.021168603906222,
"learning_rate": 3.506031945216028e-06,
"loss": 1.0432281494140625,
"step": 1203
},
{
"epoch": 1.9141494435612083,
"grad_norm": 10.671876312297726,
"learning_rate": 3.49720624565228e-06,
"loss": 1.9567753076553345,
"step": 1204
},
{
"epoch": 1.9157392686804453,
"grad_norm": 18.378038994448797,
"learning_rate": 3.488385690357068e-06,
"loss": 1.7380425930023193,
"step": 1205
},
{
"epoch": 1.917329093799682,
"grad_norm": 9.523531652407353,
"learning_rate": 3.4795703095243594e-06,
"loss": 1.3146196603775024,
"step": 1206
},
{
"epoch": 1.9189189189189189,
"grad_norm": 8.311896562107178,
"learning_rate": 3.4707601333304093e-06,
"loss": 1.1068713665008545,
"step": 1207
},
{
"epoch": 1.9205087440381559,
"grad_norm": 11.447076014813135,
"learning_rate": 3.4619551919336538e-06,
"loss": 2.32598876953125,
"step": 1208
},
{
"epoch": 1.9220985691573926,
"grad_norm": 11.267848664247097,
"learning_rate": 3.453155515474612e-06,
"loss": 1.343285322189331,
"step": 1209
},
{
"epoch": 1.9236883942766294,
"grad_norm": 18.00190098116035,
"learning_rate": 3.44436113407578e-06,
"loss": 1.4658093452453613,
"step": 1210
},
{
"epoch": 1.9252782193958664,
"grad_norm": 12.268418304840932,
"learning_rate": 3.435572077841528e-06,
"loss": 1.5446200370788574,
"step": 1211
},
{
"epoch": 1.9268680445151034,
"grad_norm": 10.940958777182,
"learning_rate": 3.4267883768579996e-06,
"loss": 1.5812410116195679,
"step": 1212
},
{
"epoch": 1.9284578696343402,
"grad_norm": 10.559323390919907,
"learning_rate": 3.4180100611930012e-06,
"loss": 1.3475531339645386,
"step": 1213
},
{
"epoch": 1.930047694753577,
"grad_norm": 11.972634265361458,
"learning_rate": 3.4092371608959085e-06,
"loss": 1.5513516664505005,
"step": 1214
},
{
"epoch": 1.931637519872814,
"grad_norm": 12.588846619515197,
"learning_rate": 3.4004697059975587e-06,
"loss": 1.2521653175354004,
"step": 1215
},
{
"epoch": 1.933227344992051,
"grad_norm": 13.983216807336552,
"learning_rate": 3.3917077265101505e-06,
"loss": 1.5460331439971924,
"step": 1216
},
{
"epoch": 1.9348171701112877,
"grad_norm": 16.21049385463335,
"learning_rate": 3.3829512524271378e-06,
"loss": 1.8424224853515625,
"step": 1217
},
{
"epoch": 1.9364069952305245,
"grad_norm": 8.623575480512274,
"learning_rate": 3.3742003137231273e-06,
"loss": 1.7361804246902466,
"step": 1218
},
{
"epoch": 1.9379968203497615,
"grad_norm": 12.923991206768896,
"learning_rate": 3.365454940353779e-06,
"loss": 1.7546651363372803,
"step": 1219
},
{
"epoch": 1.9395866454689985,
"grad_norm": 8.194594971743348,
"learning_rate": 3.3567151622557033e-06,
"loss": 1.4870991706848145,
"step": 1220
},
{
"epoch": 1.9411764705882353,
"grad_norm": 16.249647945871697,
"learning_rate": 3.3479810093463547e-06,
"loss": 2.124351978302002,
"step": 1221
},
{
"epoch": 1.942766295707472,
"grad_norm": 9.919591367300772,
"learning_rate": 3.3392525115239353e-06,
"loss": 1.6585767269134521,
"step": 1222
},
{
"epoch": 1.944356120826709,
"grad_norm": 9.884097866620877,
"learning_rate": 3.330529698667284e-06,
"loss": 1.5039808750152588,
"step": 1223
},
{
"epoch": 1.945945945945946,
"grad_norm": 13.899317503642905,
"learning_rate": 3.321812600635783e-06,
"loss": 1.514086365699768,
"step": 1224
},
{
"epoch": 1.9475357710651828,
"grad_norm": 11.120223530554286,
"learning_rate": 3.3131012472692515e-06,
"loss": 2.1714556217193604,
"step": 1225
},
{
"epoch": 1.9491255961844196,
"grad_norm": 10.914354080354897,
"learning_rate": 3.3043956683878437e-06,
"loss": 1.9673073291778564,
"step": 1226
},
{
"epoch": 1.9507154213036566,
"grad_norm": 12.365178192411566,
"learning_rate": 3.2956958937919448e-06,
"loss": 1.3523991107940674,
"step": 1227
},
{
"epoch": 1.9523052464228936,
"grad_norm": 8.521930902170034,
"learning_rate": 3.2870019532620744e-06,
"loss": 1.9819375276565552,
"step": 1228
},
{
"epoch": 1.9538950715421304,
"grad_norm": 10.504799370654721,
"learning_rate": 3.278313876558781e-06,
"loss": 1.7047924995422363,
"step": 1229
},
{
"epoch": 1.9554848966613672,
"grad_norm": 9.055204538526395,
"learning_rate": 3.269631693422537e-06,
"loss": 1.5361595153808594,
"step": 1230
},
{
"epoch": 1.9570747217806042,
"grad_norm": 6.708125686672845,
"learning_rate": 3.2609554335736435e-06,
"loss": 1.481113314628601,
"step": 1231
},
{
"epoch": 1.9586645468998412,
"grad_norm": 9.296827575354715,
"learning_rate": 3.2522851267121245e-06,
"loss": 1.2066642045974731,
"step": 1232
},
{
"epoch": 1.960254372019078,
"grad_norm": 12.594687183204563,
"learning_rate": 3.2436208025176265e-06,
"loss": 1.624826431274414,
"step": 1233
},
{
"epoch": 1.9618441971383147,
"grad_norm": 9.957789777175805,
"learning_rate": 3.2349624906493164e-06,
"loss": 1.5784817934036255,
"step": 1234
},
{
"epoch": 1.9634340222575517,
"grad_norm": 9.4320758167637,
"learning_rate": 3.2263102207457788e-06,
"loss": 1.2005516290664673,
"step": 1235
},
{
"epoch": 1.9650238473767887,
"grad_norm": 12.596584496831804,
"learning_rate": 3.217664022424917e-06,
"loss": 1.8746891021728516,
"step": 1236
},
{
"epoch": 1.9666136724960255,
"grad_norm": 10.576192009066764,
"learning_rate": 3.2090239252838496e-06,
"loss": 2.1368587017059326,
"step": 1237
},
{
"epoch": 1.9682034976152623,
"grad_norm": 19.169212542664745,
"learning_rate": 3.2003899588988143e-06,
"loss": 1.5662051439285278,
"step": 1238
},
{
"epoch": 1.9697933227344993,
"grad_norm": 15.769420962817392,
"learning_rate": 3.191762152825054e-06,
"loss": 1.284698247909546,
"step": 1239
},
{
"epoch": 1.9713831478537363,
"grad_norm": 9.28155247533557,
"learning_rate": 3.1831405365967315e-06,
"loss": 1.336848497390747,
"step": 1240
},
{
"epoch": 1.972972972972973,
"grad_norm": 12.239541301618456,
"learning_rate": 3.1745251397268175e-06,
"loss": 1.6406328678131104,
"step": 1241
},
{
"epoch": 1.9745627980922098,
"grad_norm": 17.29048355766668,
"learning_rate": 3.1659159917069927e-06,
"loss": 0.8950412273406982,
"step": 1242
},
{
"epoch": 1.9761526232114468,
"grad_norm": 10.856600740023122,
"learning_rate": 3.1573131220075494e-06,
"loss": 1.6217354536056519,
"step": 1243
},
{
"epoch": 1.9777424483306836,
"grad_norm": 8.637494876738593,
"learning_rate": 3.1487165600772883e-06,
"loss": 1.7622885704040527,
"step": 1244
},
{
"epoch": 1.9793322734499204,
"grad_norm": 9.898715586809786,
"learning_rate": 3.140126335343413e-06,
"loss": 1.273987889289856,
"step": 1245
},
{
"epoch": 1.9809220985691574,
"grad_norm": 21.598449261710204,
"learning_rate": 3.1315424772114404e-06,
"loss": 0.9739120006561279,
"step": 1246
},
{
"epoch": 1.9825119236883944,
"grad_norm": 11.108789078925794,
"learning_rate": 3.1229650150650905e-06,
"loss": 1.1030012369155884,
"step": 1247
},
{
"epoch": 1.9841017488076311,
"grad_norm": 11.772345198716755,
"learning_rate": 3.1143939782661875e-06,
"loss": 1.5073179006576538,
"step": 1248
},
{
"epoch": 1.985691573926868,
"grad_norm": 10.00286915146364,
"learning_rate": 3.1058293961545648e-06,
"loss": 1.473888874053955,
"step": 1249
},
{
"epoch": 1.987281399046105,
"grad_norm": 13.510717709557214,
"learning_rate": 3.0972712980479567e-06,
"loss": 1.3762142658233643,
"step": 1250
},
{
"epoch": 1.988871224165342,
"grad_norm": 17.515999727799556,
"learning_rate": 3.0887197132419033e-06,
"loss": 1.0883512496948242,
"step": 1251
},
{
"epoch": 1.9904610492845787,
"grad_norm": 11.385402537880124,
"learning_rate": 3.0801746710096497e-06,
"loss": 1.5562313795089722,
"step": 1252
},
{
"epoch": 1.9920508744038155,
"grad_norm": 9.894651386230901,
"learning_rate": 3.0716362006020443e-06,
"loss": 1.555297613143921,
"step": 1253
},
{
"epoch": 1.9936406995230525,
"grad_norm": 11.531440417844689,
"learning_rate": 3.0631043312474375e-06,
"loss": 1.4571053981781006,
"step": 1254
},
{
"epoch": 1.9952305246422894,
"grad_norm": 11.438541092850024,
"learning_rate": 3.054579092151586e-06,
"loss": 1.3141937255859375,
"step": 1255
},
{
"epoch": 1.9968203497615262,
"grad_norm": 10.832938440767606,
"learning_rate": 3.0460605124975483e-06,
"loss": 1.8371860980987549,
"step": 1256
},
{
"epoch": 1.998410174880763,
"grad_norm": 10.707640697615673,
"learning_rate": 3.0375486214455895e-06,
"loss": 1.7980101108551025,
"step": 1257
},
{
"epoch": 2.0,
"grad_norm": 9.479988371577287,
"learning_rate": 3.0290434481330746e-06,
"loss": 1.6025118827819824,
"step": 1258
},
{
"epoch": 2.001589825119237,
"grad_norm": 15.618086616205854,
"learning_rate": 3.0205450216743753e-06,
"loss": 0.5739709734916687,
"step": 1259
},
{
"epoch": 2.0031796502384736,
"grad_norm": 11.240797695090038,
"learning_rate": 3.012053371160768e-06,
"loss": 0.8699474334716797,
"step": 1260
},
{
"epoch": 2.0047694753577106,
"grad_norm": 9.552987335691743,
"learning_rate": 3.003568525660334e-06,
"loss": 0.43918731808662415,
"step": 1261
},
{
"epoch": 2.0063593004769475,
"grad_norm": 7.818036854277269,
"learning_rate": 2.9950905142178594e-06,
"loss": 0.39753860235214233,
"step": 1262
},
{
"epoch": 2.0079491255961845,
"grad_norm": 9.416756298898695,
"learning_rate": 2.9866193658547365e-06,
"loss": 0.7467893362045288,
"step": 1263
},
{
"epoch": 2.009538950715421,
"grad_norm": 9.986682901762805,
"learning_rate": 2.978155109568864e-06,
"loss": 0.5302591919898987,
"step": 1264
},
{
"epoch": 2.011128775834658,
"grad_norm": 7.2866809350396275,
"learning_rate": 2.9696977743345533e-06,
"loss": 0.36087679862976074,
"step": 1265
},
{
"epoch": 2.012718600953895,
"grad_norm": 7.8227632427897555,
"learning_rate": 2.961247389102413e-06,
"loss": 0.6572636365890503,
"step": 1266
},
{
"epoch": 2.014308426073132,
"grad_norm": 9.835888531542967,
"learning_rate": 2.952803982799271e-06,
"loss": 0.7076925039291382,
"step": 1267
},
{
"epoch": 2.0158982511923687,
"grad_norm": 14.608403115303773,
"learning_rate": 2.94436758432806e-06,
"loss": 0.4047316908836365,
"step": 1268
},
{
"epoch": 2.0174880763116056,
"grad_norm": 7.860380258115871,
"learning_rate": 2.935938222567727e-06,
"loss": 0.6956659555435181,
"step": 1269
},
{
"epoch": 2.0190779014308426,
"grad_norm": 15.00644807734612,
"learning_rate": 2.927515926373129e-06,
"loss": 0.774339497089386,
"step": 1270
},
{
"epoch": 2.0206677265500796,
"grad_norm": 15.25860483287857,
"learning_rate": 2.9191007245749404e-06,
"loss": 0.3957682251930237,
"step": 1271
},
{
"epoch": 2.022257551669316,
"grad_norm": 8.500581046667302,
"learning_rate": 2.9106926459795426e-06,
"loss": 0.5914150476455688,
"step": 1272
},
{
"epoch": 2.023847376788553,
"grad_norm": 12.026226845794612,
"learning_rate": 2.902291719368945e-06,
"loss": 0.4321010708808899,
"step": 1273
},
{
"epoch": 2.02543720190779,
"grad_norm": 11.930831367434969,
"learning_rate": 2.8938979735006635e-06,
"loss": 0.6191248893737793,
"step": 1274
},
{
"epoch": 2.027027027027027,
"grad_norm": 9.521327776569159,
"learning_rate": 2.885511437107638e-06,
"loss": 0.41829949617385864,
"step": 1275
},
{
"epoch": 2.0286168521462637,
"grad_norm": 21.361526672258712,
"learning_rate": 2.8771321388981334e-06,
"loss": 0.9986895322799683,
"step": 1276
},
{
"epoch": 2.0302066772655007,
"grad_norm": 10.078827235638975,
"learning_rate": 2.868760107555628e-06,
"loss": 0.5669399499893188,
"step": 1277
},
{
"epoch": 2.0317965023847377,
"grad_norm": 12.278853369947052,
"learning_rate": 2.860395371738736e-06,
"loss": 0.6854183673858643,
"step": 1278
},
{
"epoch": 2.0333863275039747,
"grad_norm": 9.391439827398543,
"learning_rate": 2.8520379600810886e-06,
"loss": 0.276329904794693,
"step": 1279
},
{
"epoch": 2.0349761526232113,
"grad_norm": 9.96245742568317,
"learning_rate": 2.843687901191248e-06,
"loss": 0.7409742474555969,
"step": 1280
},
{
"epoch": 2.0365659777424483,
"grad_norm": 9.636782695588359,
"learning_rate": 2.8353452236526097e-06,
"loss": 0.4047026038169861,
"step": 1281
},
{
"epoch": 2.0381558028616853,
"grad_norm": 16.916538036900032,
"learning_rate": 2.8270099560232992e-06,
"loss": 0.6543888449668884,
"step": 1282
},
{
"epoch": 2.0397456279809223,
"grad_norm": 10.992728384261314,
"learning_rate": 2.8186821268360757e-06,
"loss": 0.40383610129356384,
"step": 1283
},
{
"epoch": 2.041335453100159,
"grad_norm": 12.030605161431646,
"learning_rate": 2.810361764598241e-06,
"loss": 0.5539910197257996,
"step": 1284
},
{
"epoch": 2.042925278219396,
"grad_norm": 9.36847445067224,
"learning_rate": 2.802048897791529e-06,
"loss": 0.4439224898815155,
"step": 1285
},
{
"epoch": 2.044515103338633,
"grad_norm": 10.237830248539021,
"learning_rate": 2.7937435548720232e-06,
"loss": 0.32635053992271423,
"step": 1286
},
{
"epoch": 2.04610492845787,
"grad_norm": 13.144565026480977,
"learning_rate": 2.785445764270047e-06,
"loss": 0.540446937084198,
"step": 1287
},
{
"epoch": 2.0476947535771064,
"grad_norm": 9.94255306056615,
"learning_rate": 2.77715555439007e-06,
"loss": 0.749458909034729,
"step": 1288
},
{
"epoch": 2.0492845786963434,
"grad_norm": 8.42062161680688,
"learning_rate": 2.7688729536106175e-06,
"loss": 0.42192327976226807,
"step": 1289
},
{
"epoch": 2.0508744038155804,
"grad_norm": 9.790604103222373,
"learning_rate": 2.7605979902841635e-06,
"loss": 0.18696679174900055,
"step": 1290
},
{
"epoch": 2.0524642289348174,
"grad_norm": 8.557591397101714,
"learning_rate": 2.7523306927370375e-06,
"loss": 0.3152458965778351,
"step": 1291
},
{
"epoch": 2.054054054054054,
"grad_norm": 9.373458399738638,
"learning_rate": 2.7440710892693346e-06,
"loss": 0.4110928177833557,
"step": 1292
},
{
"epoch": 2.055643879173291,
"grad_norm": 12.948354145830105,
"learning_rate": 2.7358192081547994e-06,
"loss": 0.49277588725090027,
"step": 1293
},
{
"epoch": 2.057233704292528,
"grad_norm": 9.130956209811558,
"learning_rate": 2.7275750776407568e-06,
"loss": 1.085226058959961,
"step": 1294
},
{
"epoch": 2.0588235294117645,
"grad_norm": 10.060570275418396,
"learning_rate": 2.719338725947987e-06,
"loss": 0.7210683822631836,
"step": 1295
},
{
"epoch": 2.0604133545310015,
"grad_norm": 9.069136226968924,
"learning_rate": 2.711110181270653e-06,
"loss": 1.3576455116271973,
"step": 1296
},
{
"epoch": 2.0620031796502385,
"grad_norm": 25.167615589113343,
"learning_rate": 2.7028894717761867e-06,
"loss": 2.676640748977661,
"step": 1297
},
{
"epoch": 2.0635930047694755,
"grad_norm": 9.696060766286752,
"learning_rate": 2.6946766256051983e-06,
"loss": 0.30056455731391907,
"step": 1298
},
{
"epoch": 2.065182829888712,
"grad_norm": 9.185968609559813,
"learning_rate": 2.6864716708713885e-06,
"loss": 0.6304574012756348,
"step": 1299
},
{
"epoch": 2.066772655007949,
"grad_norm": 10.355978305326843,
"learning_rate": 2.6782746356614364e-06,
"loss": 0.45259398221969604,
"step": 1300
},
{
"epoch": 2.068362480127186,
"grad_norm": 11.804791422786808,
"learning_rate": 2.670085548034913e-06,
"loss": 0.5118886232376099,
"step": 1301
},
{
"epoch": 2.069952305246423,
"grad_norm": 7.416042020504371,
"learning_rate": 2.6619044360241886e-06,
"loss": 0.26387444138526917,
"step": 1302
},
{
"epoch": 2.0715421303656596,
"grad_norm": 10.262238601993216,
"learning_rate": 2.6537313276343255e-06,
"loss": 0.5545535087585449,
"step": 1303
},
{
"epoch": 2.0731319554848966,
"grad_norm": 7.876916704043814,
"learning_rate": 2.6455662508429946e-06,
"loss": 0.2981090545654297,
"step": 1304
},
{
"epoch": 2.0747217806041336,
"grad_norm": 11.160084976263871,
"learning_rate": 2.6374092336003684e-06,
"loss": 0.29364457726478577,
"step": 1305
},
{
"epoch": 2.0763116057233706,
"grad_norm": 11.303770977415407,
"learning_rate": 2.6292603038290306e-06,
"loss": 0.7849453687667847,
"step": 1306
},
{
"epoch": 2.077901430842607,
"grad_norm": 19.75269483714375,
"learning_rate": 2.6211194894238863e-06,
"loss": 0.9576752781867981,
"step": 1307
},
{
"epoch": 2.079491255961844,
"grad_norm": 8.001166271043143,
"learning_rate": 2.6129868182520525e-06,
"loss": 0.1691819727420807,
"step": 1308
},
{
"epoch": 2.081081081081081,
"grad_norm": 9.026308181157667,
"learning_rate": 2.604862318152778e-06,
"loss": 0.22451704740524292,
"step": 1309
},
{
"epoch": 2.082670906200318,
"grad_norm": 12.843536821449838,
"learning_rate": 2.596746016937337e-06,
"loss": 0.4650968313217163,
"step": 1310
},
{
"epoch": 2.0842607313195547,
"grad_norm": 9.988774639170009,
"learning_rate": 2.5886379423889362e-06,
"loss": 0.38352689146995544,
"step": 1311
},
{
"epoch": 2.0858505564387917,
"grad_norm": 17.79353421678456,
"learning_rate": 2.580538122262627e-06,
"loss": 0.6193521022796631,
"step": 1312
},
{
"epoch": 2.0874403815580287,
"grad_norm": 7.326817064479934,
"learning_rate": 2.5724465842852e-06,
"loss": 0.2504882514476776,
"step": 1313
},
{
"epoch": 2.0890302066772657,
"grad_norm": 18.334658146906335,
"learning_rate": 2.564363356155094e-06,
"loss": 0.9683111906051636,
"step": 1314
},
{
"epoch": 2.0906200317965022,
"grad_norm": 8.054717169135303,
"learning_rate": 2.556288465542308e-06,
"loss": 0.297244668006897,
"step": 1315
},
{
"epoch": 2.0922098569157392,
"grad_norm": 11.01684001675487,
"learning_rate": 2.5482219400882934e-06,
"loss": 0.4446839690208435,
"step": 1316
},
{
"epoch": 2.0937996820349762,
"grad_norm": 9.838931398311988,
"learning_rate": 2.540163807405873e-06,
"loss": 0.8158186674118042,
"step": 1317
},
{
"epoch": 2.0953895071542132,
"grad_norm": 7.416228081106363,
"learning_rate": 2.532114095079137e-06,
"loss": 0.31487804651260376,
"step": 1318
},
{
"epoch": 2.09697933227345,
"grad_norm": 12.833134362946721,
"learning_rate": 2.5240728306633492e-06,
"loss": 0.7158694863319397,
"step": 1319
},
{
"epoch": 2.098569157392687,
"grad_norm": 15.148450523189657,
"learning_rate": 2.5160400416848583e-06,
"loss": 0.5310110449790955,
"step": 1320
},
{
"epoch": 2.100158982511924,
"grad_norm": 8.187789663576774,
"learning_rate": 2.508015755640999e-06,
"loss": 0.31758755445480347,
"step": 1321
},
{
"epoch": 2.101748807631161,
"grad_norm": 12.040527490093737,
"learning_rate": 2.5000000000000015e-06,
"loss": 0.5420504212379456,
"step": 1322
},
{
"epoch": 2.1033386327503973,
"grad_norm": 8.696591339854574,
"learning_rate": 2.491992802200892e-06,
"loss": 0.8775444030761719,
"step": 1323
},
{
"epoch": 2.1049284578696343,
"grad_norm": 14.542466373749383,
"learning_rate": 2.4839941896534027e-06,
"loss": 0.34773313999176025,
"step": 1324
},
{
"epoch": 2.1065182829888713,
"grad_norm": 7.5325204562743675,
"learning_rate": 2.4760041897378813e-06,
"loss": 0.3825177252292633,
"step": 1325
},
{
"epoch": 2.108108108108108,
"grad_norm": 7.061725758761062,
"learning_rate": 2.4680228298051866e-06,
"loss": 0.16116374731063843,
"step": 1326
},
{
"epoch": 2.109697933227345,
"grad_norm": 15.681078852075046,
"learning_rate": 2.4600501371766087e-06,
"loss": 0.7574387788772583,
"step": 1327
},
{
"epoch": 2.111287758346582,
"grad_norm": 8.187924042692945,
"learning_rate": 2.4520861391437635e-06,
"loss": 0.19623573124408722,
"step": 1328
},
{
"epoch": 2.112877583465819,
"grad_norm": 12.631065786042955,
"learning_rate": 2.444130862968503e-06,
"loss": 0.24437788128852844,
"step": 1329
},
{
"epoch": 2.1144674085850554,
"grad_norm": 10.764006349436796,
"learning_rate": 2.4361843358828287e-06,
"loss": 0.35523247718811035,
"step": 1330
},
{
"epoch": 2.1160572337042924,
"grad_norm": 15.904616252885177,
"learning_rate": 2.4282465850887887e-06,
"loss": 0.754123866558075,
"step": 1331
},
{
"epoch": 2.1176470588235294,
"grad_norm": 9.693189940959178,
"learning_rate": 2.420317637758387e-06,
"loss": 0.2743876576423645,
"step": 1332
},
{
"epoch": 2.1192368839427664,
"grad_norm": 14.041254548938046,
"learning_rate": 2.4123975210334987e-06,
"loss": 0.6556911468505859,
"step": 1333
},
{
"epoch": 2.120826709062003,
"grad_norm": 10.60443572924436,
"learning_rate": 2.404486262025763e-06,
"loss": 0.7615698575973511,
"step": 1334
},
{
"epoch": 2.12241653418124,
"grad_norm": 9.536018360894495,
"learning_rate": 2.3965838878165043e-06,
"loss": 0.7127845883369446,
"step": 1335
},
{
"epoch": 2.124006359300477,
"grad_norm": 8.952164367508987,
"learning_rate": 2.388690425456629e-06,
"loss": 0.5734552145004272,
"step": 1336
},
{
"epoch": 2.125596184419714,
"grad_norm": 7.609720701343017,
"learning_rate": 2.380805901966536e-06,
"loss": 0.8266602158546448,
"step": 1337
},
{
"epoch": 2.1271860095389505,
"grad_norm": 11.743301492870309,
"learning_rate": 2.3729303443360312e-06,
"loss": 0.5090128779411316,
"step": 1338
},
{
"epoch": 2.1287758346581875,
"grad_norm": 10.592030711418884,
"learning_rate": 2.365063779524222e-06,
"loss": 0.20535558462142944,
"step": 1339
},
{
"epoch": 2.1303656597774245,
"grad_norm": 11.864484547591088,
"learning_rate": 2.3572062344594387e-06,
"loss": 0.40618768334388733,
"step": 1340
},
{
"epoch": 2.1319554848966615,
"grad_norm": 8.684602509877466,
"learning_rate": 2.3493577360391316e-06,
"loss": 0.386374831199646,
"step": 1341
},
{
"epoch": 2.133545310015898,
"grad_norm": 57.672457327244935,
"learning_rate": 2.341518311129781e-06,
"loss": 1.3086575269699097,
"step": 1342
},
{
"epoch": 2.135135135135135,
"grad_norm": 10.240140748912529,
"learning_rate": 2.333687986566816e-06,
"loss": 0.6220468282699585,
"step": 1343
},
{
"epoch": 2.136724960254372,
"grad_norm": 12.813613405444732,
"learning_rate": 2.325866789154505e-06,
"loss": 1.0330783128738403,
"step": 1344
},
{
"epoch": 2.138314785373609,
"grad_norm": 11.520654618841975,
"learning_rate": 2.318054745665877e-06,
"loss": 1.003685474395752,
"step": 1345
},
{
"epoch": 2.1399046104928456,
"grad_norm": 10.468340083489583,
"learning_rate": 2.3102518828426253e-06,
"loss": 0.48100218176841736,
"step": 1346
},
{
"epoch": 2.1414944356120826,
"grad_norm": 10.79612151629994,
"learning_rate": 2.3024582273950136e-06,
"loss": 0.2844333350658417,
"step": 1347
},
{
"epoch": 2.1430842607313196,
"grad_norm": 16.202827122761803,
"learning_rate": 2.2946738060017947e-06,
"loss": 0.9998087882995605,
"step": 1348
},
{
"epoch": 2.1446740858505566,
"grad_norm": 12.545135440285307,
"learning_rate": 2.2868986453101044e-06,
"loss": 0.3493340313434601,
"step": 1349
},
{
"epoch": 2.146263910969793,
"grad_norm": 15.89328036419762,
"learning_rate": 2.2791327719353847e-06,
"loss": 0.2903032898902893,
"step": 1350
},
{
"epoch": 2.14785373608903,
"grad_norm": 12.85129240340186,
"learning_rate": 2.2713762124612794e-06,
"loss": 0.2625690996646881,
"step": 1351
},
{
"epoch": 2.149443561208267,
"grad_norm": 14.737479053475628,
"learning_rate": 2.2636289934395506e-06,
"loss": 0.4280650317668915,
"step": 1352
},
{
"epoch": 2.151033386327504,
"grad_norm": 11.465910225292893,
"learning_rate": 2.2558911413899933e-06,
"loss": 0.4616699516773224,
"step": 1353
},
{
"epoch": 2.1526232114467407,
"grad_norm": 12.150448305231292,
"learning_rate": 2.24816268280033e-06,
"loss": 0.5582720041275024,
"step": 1354
},
{
"epoch": 2.1542130365659777,
"grad_norm": 9.110587400603661,
"learning_rate": 2.2404436441261305e-06,
"loss": 0.2962486445903778,
"step": 1355
},
{
"epoch": 2.1558028616852147,
"grad_norm": 6.569549294651593,
"learning_rate": 2.2327340517907232e-06,
"loss": 0.28374403715133667,
"step": 1356
},
{
"epoch": 2.1573926868044513,
"grad_norm": 13.75762553276656,
"learning_rate": 2.2250339321850934e-06,
"loss": 0.6057413816452026,
"step": 1357
},
{
"epoch": 2.1589825119236883,
"grad_norm": 8.669490763404966,
"learning_rate": 2.217343311667807e-06,
"loss": 0.3976594805717468,
"step": 1358
},
{
"epoch": 2.1605723370429253,
"grad_norm": 9.243989451796354,
"learning_rate": 2.2096622165649082e-06,
"loss": 0.36878764629364014,
"step": 1359
},
{
"epoch": 2.1621621621621623,
"grad_norm": 9.941680984264261,
"learning_rate": 2.2019906731698337e-06,
"loss": 0.3705075979232788,
"step": 1360
},
{
"epoch": 2.1637519872813993,
"grad_norm": 16.291839171810043,
"learning_rate": 2.1943287077433302e-06,
"loss": 1.8146584033966064,
"step": 1361
},
{
"epoch": 2.165341812400636,
"grad_norm": 11.59444692669252,
"learning_rate": 2.1866763465133483e-06,
"loss": 0.9066391587257385,
"step": 1362
},
{
"epoch": 2.166931637519873,
"grad_norm": 14.500132701105679,
"learning_rate": 2.179033615674971e-06,
"loss": 0.5588962435722351,
"step": 1363
},
{
"epoch": 2.16852146263911,
"grad_norm": 19.71889006674096,
"learning_rate": 2.1714005413903105e-06,
"loss": 0.4099670350551605,
"step": 1364
},
{
"epoch": 2.1701112877583464,
"grad_norm": 9.778223957653488,
"learning_rate": 2.1637771497884208e-06,
"loss": 0.5692353248596191,
"step": 1365
},
{
"epoch": 2.1717011128775834,
"grad_norm": 26.06102052980053,
"learning_rate": 2.156163466965218e-06,
"loss": 0.5873644948005676,
"step": 1366
},
{
"epoch": 2.1732909379968204,
"grad_norm": 10.595142110604016,
"learning_rate": 2.1485595189833773e-06,
"loss": 0.29316580295562744,
"step": 1367
},
{
"epoch": 2.1748807631160574,
"grad_norm": 19.21378248414993,
"learning_rate": 2.1409653318722517e-06,
"loss": 1.0865153074264526,
"step": 1368
},
{
"epoch": 2.176470588235294,
"grad_norm": 8.650427105241409,
"learning_rate": 2.1333809316277854e-06,
"loss": 0.4446878135204315,
"step": 1369
},
{
"epoch": 2.178060413354531,
"grad_norm": 14.234623763340775,
"learning_rate": 2.125806344212413e-06,
"loss": 0.17620611190795898,
"step": 1370
},
{
"epoch": 2.179650238473768,
"grad_norm": 7.848600881166288,
"learning_rate": 2.1182415955549905e-06,
"loss": 0.29350030422210693,
"step": 1371
},
{
"epoch": 2.181240063593005,
"grad_norm": 11.44030757418845,
"learning_rate": 2.110686711550678e-06,
"loss": 0.3198983669281006,
"step": 1372
},
{
"epoch": 2.1828298887122415,
"grad_norm": 7.244211102436453,
"learning_rate": 2.103141718060883e-06,
"loss": 0.31241801381111145,
"step": 1373
},
{
"epoch": 2.1844197138314785,
"grad_norm": 10.576054299489337,
"learning_rate": 2.095606640913149e-06,
"loss": 0.32452335953712463,
"step": 1374
},
{
"epoch": 2.1860095389507155,
"grad_norm": 8.527885992710734,
"learning_rate": 2.0880815059010716e-06,
"loss": 0.28504425287246704,
"step": 1375
},
{
"epoch": 2.1875993640699525,
"grad_norm": 10.577578790165875,
"learning_rate": 2.080566338784222e-06,
"loss": 0.38088685274124146,
"step": 1376
},
{
"epoch": 2.189189189189189,
"grad_norm": 10.395418619328238,
"learning_rate": 2.0730611652880435e-06,
"loss": 0.44801580905914307,
"step": 1377
},
{
"epoch": 2.190779014308426,
"grad_norm": 8.837899961147453,
"learning_rate": 2.0655660111037685e-06,
"loss": 0.33956456184387207,
"step": 1378
},
{
"epoch": 2.192368839427663,
"grad_norm": 11.45866723702121,
"learning_rate": 2.0580809018883397e-06,
"loss": 0.5810889601707458,
"step": 1379
},
{
"epoch": 2.1939586645469,
"grad_norm": 9.413304931538189,
"learning_rate": 2.0506058632643044e-06,
"loss": 0.4389991760253906,
"step": 1380
},
{
"epoch": 2.1955484896661366,
"grad_norm": 12.110966125967227,
"learning_rate": 2.043140920819747e-06,
"loss": 1.0444782972335815,
"step": 1381
},
{
"epoch": 2.1971383147853736,
"grad_norm": 10.789879175191414,
"learning_rate": 2.0356861001081833e-06,
"loss": 0.4147152006626129,
"step": 1382
},
{
"epoch": 2.1987281399046106,
"grad_norm": 14.597872553807216,
"learning_rate": 2.028241426648484e-06,
"loss": 1.171076774597168,
"step": 1383
},
{
"epoch": 2.2003179650238476,
"grad_norm": 10.365723003698111,
"learning_rate": 2.0208069259247866e-06,
"loss": 0.30065760016441345,
"step": 1384
},
{
"epoch": 2.201907790143084,
"grad_norm": 10.161072256605,
"learning_rate": 2.0133826233864023e-06,
"loss": 0.7872642278671265,
"step": 1385
},
{
"epoch": 2.203497615262321,
"grad_norm": 10.02599250498573,
"learning_rate": 2.005968544447733e-06,
"loss": 0.31960901618003845,
"step": 1386
},
{
"epoch": 2.205087440381558,
"grad_norm": 7.6759113967009736,
"learning_rate": 1.998564714488187e-06,
"loss": 0.5354412794113159,
"step": 1387
},
{
"epoch": 2.2066772655007947,
"grad_norm": 38.3513743093145,
"learning_rate": 1.9911711588520845e-06,
"loss": 1.2136881351470947,
"step": 1388
},
{
"epoch": 2.2082670906200317,
"grad_norm": 9.27571774690589,
"learning_rate": 1.98378790284858e-06,
"loss": 0.4841063916683197,
"step": 1389
},
{
"epoch": 2.2098569157392687,
"grad_norm": 15.933026654093874,
"learning_rate": 1.976414971751568e-06,
"loss": 0.5539760589599609,
"step": 1390
},
{
"epoch": 2.2114467408585057,
"grad_norm": 15.932623846230143,
"learning_rate": 1.9690523907995968e-06,
"loss": 0.5171653032302856,
"step": 1391
},
{
"epoch": 2.2130365659777427,
"grad_norm": 7.63792357629499,
"learning_rate": 1.9617001851957924e-06,
"loss": 0.3998708724975586,
"step": 1392
},
{
"epoch": 2.2146263910969792,
"grad_norm": 8.332002906686242,
"learning_rate": 1.9543583801077567e-06,
"loss": 0.16828802227973938,
"step": 1393
},
{
"epoch": 2.2162162162162162,
"grad_norm": 10.513665677086328,
"learning_rate": 1.9470270006674944e-06,
"loss": 0.5081832408905029,
"step": 1394
},
{
"epoch": 2.2178060413354532,
"grad_norm": 8.948435977246879,
"learning_rate": 1.93970607197132e-06,
"loss": 0.4326249361038208,
"step": 1395
},
{
"epoch": 2.21939586645469,
"grad_norm": 18.420956573075454,
"learning_rate": 1.932395619079771e-06,
"loss": 0.5133779644966125,
"step": 1396
},
{
"epoch": 2.220985691573927,
"grad_norm": 13.78197766711825,
"learning_rate": 1.9250956670175315e-06,
"loss": 0.8035323619842529,
"step": 1397
},
{
"epoch": 2.2225755166931638,
"grad_norm": 14.628351212094666,
"learning_rate": 1.917806240773333e-06,
"loss": 0.25781551003456116,
"step": 1398
},
{
"epoch": 2.2241653418124008,
"grad_norm": 8.528547429899023,
"learning_rate": 1.910527365299879e-06,
"loss": 0.5672615766525269,
"step": 1399
},
{
"epoch": 2.2257551669316373,
"grad_norm": 7.602237876830262,
"learning_rate": 1.9032590655137557e-06,
"loss": 0.24282558262348175,
"step": 1400
},
{
"epoch": 2.2273449920508743,
"grad_norm": 13.220120887526408,
"learning_rate": 1.8960013662953452e-06,
"loss": 0.3136386573314667,
"step": 1401
},
{
"epoch": 2.2289348171701113,
"grad_norm": 7.446562133244804,
"learning_rate": 1.8887542924887486e-06,
"loss": 0.4707590937614441,
"step": 1402
},
{
"epoch": 2.2305246422893483,
"grad_norm": 11.925665302137256,
"learning_rate": 1.8815178689016862e-06,
"loss": 0.598182201385498,
"step": 1403
},
{
"epoch": 2.232114467408585,
"grad_norm": 10.82355623400178,
"learning_rate": 1.87429212030543e-06,
"loss": 0.17749015986919403,
"step": 1404
},
{
"epoch": 2.233704292527822,
"grad_norm": 14.204338243223313,
"learning_rate": 1.8670770714347024e-06,
"loss": 0.2691603899002075,
"step": 1405
},
{
"epoch": 2.235294117647059,
"grad_norm": 13.602729724029773,
"learning_rate": 1.8598727469876027e-06,
"loss": 0.4229642152786255,
"step": 1406
},
{
"epoch": 2.236883942766296,
"grad_norm": 10.317753597321978,
"learning_rate": 1.8526791716255205e-06,
"loss": 0.3552161157131195,
"step": 1407
},
{
"epoch": 2.2384737678855324,
"grad_norm": 8.135565138075629,
"learning_rate": 1.8454963699730471e-06,
"loss": 0.275473952293396,
"step": 1408
},
{
"epoch": 2.2400635930047694,
"grad_norm": 9.137160576002834,
"learning_rate": 1.8383243666178929e-06,
"loss": 0.2102886140346527,
"step": 1409
},
{
"epoch": 2.2416534181240064,
"grad_norm": 9.360877896344201,
"learning_rate": 1.8311631861108097e-06,
"loss": 0.1918153464794159,
"step": 1410
},
{
"epoch": 2.2432432432432434,
"grad_norm": 8.962443180010503,
"learning_rate": 1.8240128529654944e-06,
"loss": 0.5940899848937988,
"step": 1411
},
{
"epoch": 2.24483306836248,
"grad_norm": 11.917226870320402,
"learning_rate": 1.816873391658518e-06,
"loss": 0.3903201222419739,
"step": 1412
},
{
"epoch": 2.246422893481717,
"grad_norm": 9.826294489562173,
"learning_rate": 1.8097448266292322e-06,
"loss": 0.6102331876754761,
"step": 1413
},
{
"epoch": 2.248012718600954,
"grad_norm": 13.517844391934938,
"learning_rate": 1.802627182279687e-06,
"loss": 0.4329093098640442,
"step": 1414
},
{
"epoch": 2.249602543720191,
"grad_norm": 13.547536022058566,
"learning_rate": 1.7955204829745571e-06,
"loss": 0.33794981241226196,
"step": 1415
},
{
"epoch": 2.2511923688394275,
"grad_norm": 13.285342374121825,
"learning_rate": 1.7884247530410436e-06,
"loss": 0.31554755568504333,
"step": 1416
},
{
"epoch": 2.2527821939586645,
"grad_norm": 12.434017910171066,
"learning_rate": 1.781340016768799e-06,
"loss": 0.21422359347343445,
"step": 1417
},
{
"epoch": 2.2543720190779015,
"grad_norm": 12.28525539103986,
"learning_rate": 1.774266298409848e-06,
"loss": 0.9881556034088135,
"step": 1418
},
{
"epoch": 2.255961844197138,
"grad_norm": 8.457333426414545,
"learning_rate": 1.7672036221784917e-06,
"loss": 0.4250524044036865,
"step": 1419
},
{
"epoch": 2.257551669316375,
"grad_norm": 13.958386580318106,
"learning_rate": 1.760152012251241e-06,
"loss": 1.0648713111877441,
"step": 1420
},
{
"epoch": 2.259141494435612,
"grad_norm": 5.811308085366414,
"learning_rate": 1.7531114927667192e-06,
"loss": 0.2532946765422821,
"step": 1421
},
{
"epoch": 2.260731319554849,
"grad_norm": 10.684705977173687,
"learning_rate": 1.7460820878255853e-06,
"loss": 0.5165972113609314,
"step": 1422
},
{
"epoch": 2.262321144674086,
"grad_norm": 9.810147715383405,
"learning_rate": 1.7390638214904576e-06,
"loss": 0.3521971106529236,
"step": 1423
},
{
"epoch": 2.2639109697933226,
"grad_norm": 12.393073778447988,
"learning_rate": 1.7320567177858188e-06,
"loss": 0.6745153665542603,
"step": 1424
},
{
"epoch": 2.2655007949125596,
"grad_norm": 15.83584524219963,
"learning_rate": 1.7250608006979447e-06,
"loss": 1.0417171716690063,
"step": 1425
},
{
"epoch": 2.2670906200317966,
"grad_norm": 10.451819766363082,
"learning_rate": 1.7180760941748132e-06,
"loss": 0.6115795373916626,
"step": 1426
},
{
"epoch": 2.268680445151033,
"grad_norm": 11.76193784891942,
"learning_rate": 1.7111026221260334e-06,
"loss": 0.2907196879386902,
"step": 1427
},
{
"epoch": 2.27027027027027,
"grad_norm": 7.9808693082080655,
"learning_rate": 1.704140408422753e-06,
"loss": 0.5010504126548767,
"step": 1428
},
{
"epoch": 2.271860095389507,
"grad_norm": 9.392613329966396,
"learning_rate": 1.6971894768975794e-06,
"loss": 0.4334721565246582,
"step": 1429
},
{
"epoch": 2.273449920508744,
"grad_norm": 10.483383181145562,
"learning_rate": 1.6902498513445053e-06,
"loss": 0.8298370838165283,
"step": 1430
},
{
"epoch": 2.275039745627981,
"grad_norm": 8.48946421286137,
"learning_rate": 1.683321555518816e-06,
"loss": 0.309246689081192,
"step": 1431
},
{
"epoch": 2.2766295707472177,
"grad_norm": 12.704343511214526,
"learning_rate": 1.6764046131370142e-06,
"loss": 0.3676954507827759,
"step": 1432
},
{
"epoch": 2.2782193958664547,
"grad_norm": 14.891085610228341,
"learning_rate": 1.6694990478767432e-06,
"loss": 1.0234798192977905,
"step": 1433
},
{
"epoch": 2.2798092209856917,
"grad_norm": 13.465531205203249,
"learning_rate": 1.6626048833766927e-06,
"loss": 0.8669548630714417,
"step": 1434
},
{
"epoch": 2.2813990461049283,
"grad_norm": 9.123390257093554,
"learning_rate": 1.6557221432365355e-06,
"loss": 0.3418199419975281,
"step": 1435
},
{
"epoch": 2.2829888712241653,
"grad_norm": 7.501081284249146,
"learning_rate": 1.64885085101683e-06,
"loss": 0.19252462685108185,
"step": 1436
},
{
"epoch": 2.2845786963434023,
"grad_norm": 13.366478333865068,
"learning_rate": 1.6419910302389475e-06,
"loss": 0.3347507119178772,
"step": 1437
},
{
"epoch": 2.2861685214626393,
"grad_norm": 8.47034032498675,
"learning_rate": 1.6351427043849955e-06,
"loss": 0.3430837094783783,
"step": 1438
},
{
"epoch": 2.287758346581876,
"grad_norm": 11.75645210643237,
"learning_rate": 1.6283058968977289e-06,
"loss": 0.27441170811653137,
"step": 1439
},
{
"epoch": 2.289348171701113,
"grad_norm": 11.279080865104184,
"learning_rate": 1.621480631180473e-06,
"loss": 0.4956094026565552,
"step": 1440
},
{
"epoch": 2.29093799682035,
"grad_norm": 15.448500087736196,
"learning_rate": 1.6146669305970493e-06,
"loss": 0.7079007625579834,
"step": 1441
},
{
"epoch": 2.292527821939587,
"grad_norm": 9.231845523966356,
"learning_rate": 1.6078648184716827e-06,
"loss": 0.4254942536354065,
"step": 1442
},
{
"epoch": 2.2941176470588234,
"grad_norm": 11.905805737388867,
"learning_rate": 1.601074318088937e-06,
"loss": 0.7360373735427856,
"step": 1443
},
{
"epoch": 2.2957074721780604,
"grad_norm": 12.941732719146424,
"learning_rate": 1.5942954526936217e-06,
"loss": 0.6561013460159302,
"step": 1444
},
{
"epoch": 2.2972972972972974,
"grad_norm": 7.9487159972862536,
"learning_rate": 1.5875282454907187e-06,
"loss": 0.08330412209033966,
"step": 1445
},
{
"epoch": 2.2988871224165344,
"grad_norm": 10.834323705822364,
"learning_rate": 1.5807727196453065e-06,
"loss": 0.6605761647224426,
"step": 1446
},
{
"epoch": 2.300476947535771,
"grad_norm": 9.209055284124979,
"learning_rate": 1.574028898282472e-06,
"loss": 0.37634578347206116,
"step": 1447
},
{
"epoch": 2.302066772655008,
"grad_norm": 8.042351795789497,
"learning_rate": 1.5672968044872395e-06,
"loss": 0.4199534058570862,
"step": 1448
},
{
"epoch": 2.303656597774245,
"grad_norm": 14.25177973250915,
"learning_rate": 1.560576461304486e-06,
"loss": 0.736668586730957,
"step": 1449
},
{
"epoch": 2.3052464228934815,
"grad_norm": 13.585164988982509,
"learning_rate": 1.5538678917388638e-06,
"loss": 0.9653232097625732,
"step": 1450
},
{
"epoch": 2.3068362480127185,
"grad_norm": 8.887027967619444,
"learning_rate": 1.5471711187547284e-06,
"loss": 0.38832274079322815,
"step": 1451
},
{
"epoch": 2.3084260731319555,
"grad_norm": 6.267315249131483,
"learning_rate": 1.5404861652760434e-06,
"loss": 0.31964796781539917,
"step": 1452
},
{
"epoch": 2.3100158982511925,
"grad_norm": 11.376526955253818,
"learning_rate": 1.5338130541863233e-06,
"loss": 0.44085028767585754,
"step": 1453
},
{
"epoch": 2.3116057233704295,
"grad_norm": 10.160907274712994,
"learning_rate": 1.527151808328538e-06,
"loss": 0.670817494392395,
"step": 1454
},
{
"epoch": 2.313195548489666,
"grad_norm": 12.580031788905817,
"learning_rate": 1.5205024505050424e-06,
"loss": 0.3954434394836426,
"step": 1455
},
{
"epoch": 2.314785373608903,
"grad_norm": 15.317878122772438,
"learning_rate": 1.5138650034775004e-06,
"loss": 0.2827695310115814,
"step": 1456
},
{
"epoch": 2.31637519872814,
"grad_norm": 7.489429551522345,
"learning_rate": 1.5072394899667974e-06,
"loss": 0.41150280833244324,
"step": 1457
},
{
"epoch": 2.3179650238473766,
"grad_norm": 10.601003962682444,
"learning_rate": 1.5006259326529755e-06,
"loss": 0.36833304166793823,
"step": 1458
},
{
"epoch": 2.3195548489666136,
"grad_norm": 7.868890901677638,
"learning_rate": 1.4940243541751449e-06,
"loss": 0.2758025825023651,
"step": 1459
},
{
"epoch": 2.3211446740858506,
"grad_norm": 7.750782960909832,
"learning_rate": 1.487434777131409e-06,
"loss": 0.32291388511657715,
"step": 1460
},
{
"epoch": 2.3227344992050876,
"grad_norm": 8.392809402568808,
"learning_rate": 1.4808572240787943e-06,
"loss": 0.3069787621498108,
"step": 1461
},
{
"epoch": 2.3243243243243246,
"grad_norm": 7.163401464386679,
"learning_rate": 1.4742917175331644e-06,
"loss": 0.16901874542236328,
"step": 1462
},
{
"epoch": 2.325914149443561,
"grad_norm": 14.390629374585183,
"learning_rate": 1.4677382799691425e-06,
"loss": 1.3525761365890503,
"step": 1463
},
{
"epoch": 2.327503974562798,
"grad_norm": 6.678330116875288,
"learning_rate": 1.461196933820046e-06,
"loss": 0.20596320927143097,
"step": 1464
},
{
"epoch": 2.329093799682035,
"grad_norm": 12.532799405880928,
"learning_rate": 1.4546677014777938e-06,
"loss": 0.322355717420578,
"step": 1465
},
{
"epoch": 2.3306836248012717,
"grad_norm": 16.793179759046723,
"learning_rate": 1.4481506052928445e-06,
"loss": 0.3826938271522522,
"step": 1466
},
{
"epoch": 2.3322734499205087,
"grad_norm": 9.905044680880017,
"learning_rate": 1.4416456675741076e-06,
"loss": 0.3694925904273987,
"step": 1467
},
{
"epoch": 2.3338632750397457,
"grad_norm": 10.461931343295252,
"learning_rate": 1.4351529105888735e-06,
"loss": 0.29014065861701965,
"step": 1468
},
{
"epoch": 2.3354531001589827,
"grad_norm": 14.000724794554966,
"learning_rate": 1.4286723565627397e-06,
"loss": 0.3127569556236267,
"step": 1469
},
{
"epoch": 2.337042925278219,
"grad_norm": 7.689620479500538,
"learning_rate": 1.4222040276795273e-06,
"loss": 0.2523467540740967,
"step": 1470
},
{
"epoch": 2.338632750397456,
"grad_norm": 14.373753716425853,
"learning_rate": 1.41574794608121e-06,
"loss": 0.3797416090965271,
"step": 1471
},
{
"epoch": 2.340222575516693,
"grad_norm": 9.054123209399933,
"learning_rate": 1.4093041338678404e-06,
"loss": 0.4659533202648163,
"step": 1472
},
{
"epoch": 2.34181240063593,
"grad_norm": 15.438748343325372,
"learning_rate": 1.4028726130974662e-06,
"loss": 0.3603324890136719,
"step": 1473
},
{
"epoch": 2.3434022257551668,
"grad_norm": 10.560118294734133,
"learning_rate": 1.3964534057860652e-06,
"loss": 0.6682232618331909,
"step": 1474
},
{
"epoch": 2.3449920508744038,
"grad_norm": 11.401281185340983,
"learning_rate": 1.3900465339074609e-06,
"loss": 0.6063719391822815,
"step": 1475
},
{
"epoch": 2.3465818759936408,
"grad_norm": 11.52440897543909,
"learning_rate": 1.3836520193932495e-06,
"loss": 0.44089335203170776,
"step": 1476
},
{
"epoch": 2.3481717011128778,
"grad_norm": 13.657311760357237,
"learning_rate": 1.3772698841327347e-06,
"loss": 0.6193767189979553,
"step": 1477
},
{
"epoch": 2.3497615262321143,
"grad_norm": 9.295719556907873,
"learning_rate": 1.3709001499728308e-06,
"loss": 0.3555140495300293,
"step": 1478
},
{
"epoch": 2.3513513513513513,
"grad_norm": 11.645080860666992,
"learning_rate": 1.3645428387180137e-06,
"loss": 0.6090583801269531,
"step": 1479
},
{
"epoch": 2.3529411764705883,
"grad_norm": 8.470948193400224,
"learning_rate": 1.3581979721302286e-06,
"loss": 0.34452319145202637,
"step": 1480
},
{
"epoch": 2.3545310015898253,
"grad_norm": 9.909508425078533,
"learning_rate": 1.3518655719288193e-06,
"loss": 0.39460867643356323,
"step": 1481
},
{
"epoch": 2.356120826709062,
"grad_norm": 13.853217505747493,
"learning_rate": 1.3455456597904605e-06,
"loss": 0.43775254487991333,
"step": 1482
},
{
"epoch": 2.357710651828299,
"grad_norm": 10.876942487445342,
"learning_rate": 1.339238257349073e-06,
"loss": 0.2791391611099243,
"step": 1483
},
{
"epoch": 2.359300476947536,
"grad_norm": 12.82786673007337,
"learning_rate": 1.3329433861957614e-06,
"loss": 0.6097040772438049,
"step": 1484
},
{
"epoch": 2.360890302066773,
"grad_norm": 8.760167908169596,
"learning_rate": 1.3266610678787283e-06,
"loss": 0.2755807936191559,
"step": 1485
},
{
"epoch": 2.3624801271860094,
"grad_norm": 20.110558363159395,
"learning_rate": 1.3203913239032074e-06,
"loss": 0.73570317029953,
"step": 1486
},
{
"epoch": 2.3640699523052464,
"grad_norm": 11.972504031113099,
"learning_rate": 1.3141341757313924e-06,
"loss": 0.6548980474472046,
"step": 1487
},
{
"epoch": 2.3656597774244834,
"grad_norm": 13.51699058640222,
"learning_rate": 1.3078896447823547e-06,
"loss": 0.3493387699127197,
"step": 1488
},
{
"epoch": 2.36724960254372,
"grad_norm": 10.566178335617067,
"learning_rate": 1.30165775243198e-06,
"loss": 0.33287400007247925,
"step": 1489
},
{
"epoch": 2.368839427662957,
"grad_norm": 12.768459562661848,
"learning_rate": 1.295438520012887e-06,
"loss": 0.40229332447052,
"step": 1490
},
{
"epoch": 2.370429252782194,
"grad_norm": 10.88296099547651,
"learning_rate": 1.2892319688143578e-06,
"loss": 0.4362897276878357,
"step": 1491
},
{
"epoch": 2.372019077901431,
"grad_norm": 17.82269470385356,
"learning_rate": 1.283038120082268e-06,
"loss": 0.684404194355011,
"step": 1492
},
{
"epoch": 2.373608903020668,
"grad_norm": 8.305480002810821,
"learning_rate": 1.2768569950190074e-06,
"loss": 0.4447444677352905,
"step": 1493
},
{
"epoch": 2.3751987281399045,
"grad_norm": 8.946310245620616,
"learning_rate": 1.2706886147834114e-06,
"loss": 0.2831430435180664,
"step": 1494
},
{
"epoch": 2.3767885532591415,
"grad_norm": 12.95406255499207,
"learning_rate": 1.2645330004906919e-06,
"loss": 0.750991702079773,
"step": 1495
},
{
"epoch": 2.3783783783783785,
"grad_norm": 9.138565382188945,
"learning_rate": 1.2583901732123555e-06,
"loss": 0.3836651146411896,
"step": 1496
},
{
"epoch": 2.379968203497615,
"grad_norm": 10.766809743823636,
"learning_rate": 1.252260153976143e-06,
"loss": 0.2510315775871277,
"step": 1497
},
{
"epoch": 2.381558028616852,
"grad_norm": 17.47014235175777,
"learning_rate": 1.2461429637659466e-06,
"loss": 0.7679001688957214,
"step": 1498
},
{
"epoch": 2.383147853736089,
"grad_norm": 13.745920912677008,
"learning_rate": 1.2400386235217444e-06,
"loss": 0.3160330653190613,
"step": 1499
},
{
"epoch": 2.384737678855326,
"grad_norm": 8.265195502867067,
"learning_rate": 1.2339471541395304e-06,
"loss": 0.6791805624961853,
"step": 1500
},
{
"epoch": 2.3863275039745626,
"grad_norm": 8.707414478394726,
"learning_rate": 1.2278685764712356e-06,
"loss": 0.4915718138217926,
"step": 1501
},
{
"epoch": 2.3879173290937996,
"grad_norm": 14.070274594206333,
"learning_rate": 1.2218029113246616e-06,
"loss": 0.37402093410491943,
"step": 1502
},
{
"epoch": 2.3895071542130366,
"grad_norm": 7.969660705498184,
"learning_rate": 1.2157501794634118e-06,
"loss": 0.5574676990509033,
"step": 1503
},
{
"epoch": 2.3910969793322736,
"grad_norm": 13.836136257943703,
"learning_rate": 1.2097104016068146e-06,
"loss": 1.1037708520889282,
"step": 1504
},
{
"epoch": 2.39268680445151,
"grad_norm": 13.651653278356111,
"learning_rate": 1.203683598429855e-06,
"loss": 0.5910956859588623,
"step": 1505
},
{
"epoch": 2.394276629570747,
"grad_norm": 9.494496018410414,
"learning_rate": 1.1976697905631036e-06,
"loss": 0.6444679498672485,
"step": 1506
},
{
"epoch": 2.395866454689984,
"grad_norm": 9.573835932767102,
"learning_rate": 1.1916689985926494e-06,
"loss": 0.41417208313941956,
"step": 1507
},
{
"epoch": 2.397456279809221,
"grad_norm": 10.069807567033948,
"learning_rate": 1.1856812430600228e-06,
"loss": 0.7423563599586487,
"step": 1508
},
{
"epoch": 2.3990461049284577,
"grad_norm": 7.835882290899644,
"learning_rate": 1.1797065444621286e-06,
"loss": 0.44855624437332153,
"step": 1509
},
{
"epoch": 2.4006359300476947,
"grad_norm": 12.999112533935575,
"learning_rate": 1.1737449232511799e-06,
"loss": 0.5191413760185242,
"step": 1510
},
{
"epoch": 2.4022257551669317,
"grad_norm": 15.62081982034232,
"learning_rate": 1.1677963998346182e-06,
"loss": 0.3923201560974121,
"step": 1511
},
{
"epoch": 2.4038155802861687,
"grad_norm": 11.802569751706425,
"learning_rate": 1.1618609945750558e-06,
"loss": 0.6530142426490784,
"step": 1512
},
{
"epoch": 2.4054054054054053,
"grad_norm": 7.39912848585775,
"learning_rate": 1.1559387277901958e-06,
"loss": 0.18781472742557526,
"step": 1513
},
{
"epoch": 2.4069952305246423,
"grad_norm": 12.320828388785662,
"learning_rate": 1.1500296197527643e-06,
"loss": 0.739341676235199,
"step": 1514
},
{
"epoch": 2.4085850556438793,
"grad_norm": 10.052444335743562,
"learning_rate": 1.1441336906904504e-06,
"loss": 0.3746979534626007,
"step": 1515
},
{
"epoch": 2.4101748807631163,
"grad_norm": 10.274927395942361,
"learning_rate": 1.1382509607858233e-06,
"loss": 0.5018946528434753,
"step": 1516
},
{
"epoch": 2.411764705882353,
"grad_norm": 12.618193233095981,
"learning_rate": 1.1323814501762714e-06,
"loss": 0.4841238856315613,
"step": 1517
},
{
"epoch": 2.41335453100159,
"grad_norm": 9.154264171272432,
"learning_rate": 1.126525178953935e-06,
"loss": 0.6009554862976074,
"step": 1518
},
{
"epoch": 2.414944356120827,
"grad_norm": 13.673528451375187,
"learning_rate": 1.1206821671656277e-06,
"loss": 0.4096434712409973,
"step": 1519
},
{
"epoch": 2.4165341812400634,
"grad_norm": 10.476072583039118,
"learning_rate": 1.114852434812781e-06,
"loss": 0.3984518051147461,
"step": 1520
},
{
"epoch": 2.4181240063593004,
"grad_norm": 7.612865941416167,
"learning_rate": 1.1090360018513652e-06,
"loss": 0.3841229975223541,
"step": 1521
},
{
"epoch": 2.4197138314785374,
"grad_norm": 10.054845231558906,
"learning_rate": 1.1032328881918237e-06,
"loss": 0.35508057475090027,
"step": 1522
},
{
"epoch": 2.4213036565977744,
"grad_norm": 10.924634607628944,
"learning_rate": 1.0974431136990115e-06,
"loss": 0.538759708404541,
"step": 1523
},
{
"epoch": 2.4228934817170114,
"grad_norm": 9.906978216623733,
"learning_rate": 1.0916666981921164e-06,
"loss": 0.267231285572052,
"step": 1524
},
{
"epoch": 2.424483306836248,
"grad_norm": 8.161633923040958,
"learning_rate": 1.0859036614445977e-06,
"loss": 0.20279455184936523,
"step": 1525
},
{
"epoch": 2.426073131955485,
"grad_norm": 9.59102246457474,
"learning_rate": 1.0801540231841213e-06,
"loss": 0.2875203490257263,
"step": 1526
},
{
"epoch": 2.427662957074722,
"grad_norm": 8.82762418705778,
"learning_rate": 1.0744178030924817e-06,
"loss": 0.4241867661476135,
"step": 1527
},
{
"epoch": 2.4292527821939585,
"grad_norm": 12.305547733445321,
"learning_rate": 1.0686950208055486e-06,
"loss": 0.5881316065788269,
"step": 1528
},
{
"epoch": 2.4308426073131955,
"grad_norm": 10.00443798673381,
"learning_rate": 1.0629856959131861e-06,
"loss": 0.26692256331443787,
"step": 1529
},
{
"epoch": 2.4324324324324325,
"grad_norm": 11.916760785837651,
"learning_rate": 1.0572898479591942e-06,
"loss": 0.390109121799469,
"step": 1530
},
{
"epoch": 2.4340222575516695,
"grad_norm": 12.113929514101105,
"learning_rate": 1.05160749644124e-06,
"loss": 0.39742356538772583,
"step": 1531
},
{
"epoch": 2.435612082670906,
"grad_norm": 9.975231061986387,
"learning_rate": 1.045938660810788e-06,
"loss": 0.18685436248779297,
"step": 1532
},
{
"epoch": 2.437201907790143,
"grad_norm": 14.781171323125493,
"learning_rate": 1.04028336047304e-06,
"loss": 1.9972474575042725,
"step": 1533
},
{
"epoch": 2.43879173290938,
"grad_norm": 12.799015715830842,
"learning_rate": 1.034641614786862e-06,
"loss": 0.896147608757019,
"step": 1534
},
{
"epoch": 2.440381558028617,
"grad_norm": 14.960349944848524,
"learning_rate": 1.0290134430647196e-06,
"loss": 0.6048048734664917,
"step": 1535
},
{
"epoch": 2.4419713831478536,
"grad_norm": 9.68263048334465,
"learning_rate": 1.0233988645726166e-06,
"loss": 0.3319162130355835,
"step": 1536
},
{
"epoch": 2.4435612082670906,
"grad_norm": 11.347543341617659,
"learning_rate": 1.0177978985300203e-06,
"loss": 1.3338065147399902,
"step": 1537
},
{
"epoch": 2.4451510333863276,
"grad_norm": 13.339900500358667,
"learning_rate": 1.0122105641098062e-06,
"loss": 0.36019963026046753,
"step": 1538
},
{
"epoch": 2.4467408585055646,
"grad_norm": 9.811098173161305,
"learning_rate": 1.0066368804381833e-06,
"loss": 0.46575456857681274,
"step": 1539
},
{
"epoch": 2.448330683624801,
"grad_norm": 11.560409893961497,
"learning_rate": 1.0010768665946309e-06,
"loss": 0.29183441400527954,
"step": 1540
},
{
"epoch": 2.449920508744038,
"grad_norm": 14.482821614446557,
"learning_rate": 9.9553054161184e-07,
"loss": 0.3994084298610687,
"step": 1541
},
{
"epoch": 2.451510333863275,
"grad_norm": 10.71186356653993,
"learning_rate": 9.899979244756358e-07,
"loss": 0.6856818795204163,
"step": 1542
},
{
"epoch": 2.453100158982512,
"grad_norm": 15.849209979348569,
"learning_rate": 9.844790341249276e-07,
"loss": 0.29254722595214844,
"step": 1543
},
{
"epoch": 2.4546899841017487,
"grad_norm": 13.001389500403421,
"learning_rate": 9.789738894516294e-07,
"loss": 0.5249795913696289,
"step": 1544
},
{
"epoch": 2.4562798092209857,
"grad_norm": 11.939268619149114,
"learning_rate": 9.734825093006034e-07,
"loss": 0.5683445930480957,
"step": 1545
},
{
"epoch": 2.4578696343402227,
"grad_norm": 15.97735060116779,
"learning_rate": 9.680049124695973e-07,
"loss": 1.1561030149459839,
"step": 1546
},
{
"epoch": 2.4594594594594597,
"grad_norm": 12.890313458125426,
"learning_rate": 9.625411177091731e-07,
"loss": 0.5048433542251587,
"step": 1547
},
{
"epoch": 2.461049284578696,
"grad_norm": 13.662968435753552,
"learning_rate": 9.570911437226454e-07,
"loss": 0.416229248046875,
"step": 1548
},
{
"epoch": 2.462639109697933,
"grad_norm": 13.285516516957886,
"learning_rate": 9.516550091660237e-07,
"loss": 0.4497603178024292,
"step": 1549
},
{
"epoch": 2.46422893481717,
"grad_norm": 6.541102361228737,
"learning_rate": 9.462327326479376e-07,
"loss": 0.38231202960014343,
"step": 1550
},
{
"epoch": 2.4658187599364068,
"grad_norm": 10.61910465149233,
"learning_rate": 9.408243327295835e-07,
"loss": 0.34195244312286377,
"step": 1551
},
{
"epoch": 2.4674085850556438,
"grad_norm": 8.454641509882954,
"learning_rate": 9.35429827924652e-07,
"loss": 0.5992942452430725,
"step": 1552
},
{
"epoch": 2.4689984101748808,
"grad_norm": 10.462490419970619,
"learning_rate": 9.300492366992708e-07,
"loss": 0.49838757514953613,
"step": 1553
},
{
"epoch": 2.4705882352941178,
"grad_norm": 11.436890037466688,
"learning_rate": 9.246825774719409e-07,
"loss": 0.42259490489959717,
"step": 1554
},
{
"epoch": 2.4721780604133547,
"grad_norm": 14.950258496228066,
"learning_rate": 9.193298686134699e-07,
"loss": 0.6221784949302673,
"step": 1555
},
{
"epoch": 2.4737678855325913,
"grad_norm": 8.371376759124747,
"learning_rate": 9.139911284469111e-07,
"loss": 0.2900388836860657,
"step": 1556
},
{
"epoch": 2.4753577106518283,
"grad_norm": 7.643866077422786,
"learning_rate": 9.086663752475061e-07,
"loss": 0.2714345455169678,
"step": 1557
},
{
"epoch": 2.4769475357710653,
"grad_norm": 10.447803262264925,
"learning_rate": 9.033556272426075e-07,
"loss": 0.24355517327785492,
"step": 1558
},
{
"epoch": 2.478537360890302,
"grad_norm": 23.242538512807673,
"learning_rate": 8.980589026116365e-07,
"loss": 0.6441739797592163,
"step": 1559
},
{
"epoch": 2.480127186009539,
"grad_norm": 6.995497707685557,
"learning_rate": 8.927762194860034e-07,
"loss": 0.38172465562820435,
"step": 1560
},
{
"epoch": 2.481717011128776,
"grad_norm": 10.39319097779616,
"learning_rate": 8.87507595949057e-07,
"loss": 0.42091259360313416,
"step": 1561
},
{
"epoch": 2.483306836248013,
"grad_norm": 9.574348703016542,
"learning_rate": 8.822530500360149e-07,
"loss": 0.463512659072876,
"step": 1562
},
{
"epoch": 2.48489666136725,
"grad_norm": 13.936979413116735,
"learning_rate": 8.770125997339058e-07,
"loss": 0.414907842874527,
"step": 1563
},
{
"epoch": 2.4864864864864864,
"grad_norm": 7.396202045732436,
"learning_rate": 8.717862629815099e-07,
"loss": 0.27465397119522095,
"step": 1564
},
{
"epoch": 2.4880763116057234,
"grad_norm": 10.039782203585236,
"learning_rate": 8.665740576692905e-07,
"loss": 0.3823194205760956,
"step": 1565
},
{
"epoch": 2.4896661367249604,
"grad_norm": 14.621887501884695,
"learning_rate": 8.613760016393396e-07,
"loss": 0.4154517948627472,
"step": 1566
},
{
"epoch": 2.491255961844197,
"grad_norm": 10.717313100279354,
"learning_rate": 8.561921126853151e-07,
"loss": 0.6768748760223389,
"step": 1567
},
{
"epoch": 2.492845786963434,
"grad_norm": 15.849704032961357,
"learning_rate": 8.510224085523755e-07,
"loss": 0.8631222248077393,
"step": 1568
},
{
"epoch": 2.494435612082671,
"grad_norm": 9.36745553642082,
"learning_rate": 8.458669069371278e-07,
"loss": 0.6986095905303955,
"step": 1569
},
{
"epoch": 2.496025437201908,
"grad_norm": 9.584416315406488,
"learning_rate": 8.407256254875573e-07,
"loss": 0.48314929008483887,
"step": 1570
},
{
"epoch": 2.4976152623211445,
"grad_norm": 17.063669106186904,
"learning_rate": 8.355985818029733e-07,
"loss": 0.3524690866470337,
"step": 1571
},
{
"epoch": 2.4992050874403815,
"grad_norm": 10.396412903566285,
"learning_rate": 8.304857934339494e-07,
"loss": 0.5200834274291992,
"step": 1572
},
{
"epoch": 2.5007949125596185,
"grad_norm": 7.711979616193333,
"learning_rate": 8.253872778822564e-07,
"loss": 0.2625265419483185,
"step": 1573
},
{
"epoch": 2.502384737678855,
"grad_norm": 9.955593458884781,
"learning_rate": 8.203030526008132e-07,
"loss": 0.3811442255973816,
"step": 1574
},
{
"epoch": 2.503974562798092,
"grad_norm": 18.040384809390915,
"learning_rate": 8.152331349936177e-07,
"loss": 0.3784019351005554,
"step": 1575
},
{
"epoch": 2.505564387917329,
"grad_norm": 9.853858936601155,
"learning_rate": 8.101775424156888e-07,
"loss": 0.526119589805603,
"step": 1576
},
{
"epoch": 2.507154213036566,
"grad_norm": 14.200070785171242,
"learning_rate": 8.051362921730139e-07,
"loss": 0.3691103458404541,
"step": 1577
},
{
"epoch": 2.508744038155803,
"grad_norm": 10.184463467888294,
"learning_rate": 8.001094015224813e-07,
"loss": 0.9008461236953735,
"step": 1578
},
{
"epoch": 2.5103338632750396,
"grad_norm": 10.603969424774617,
"learning_rate": 7.95096887671824e-07,
"loss": 0.4990682005882263,
"step": 1579
},
{
"epoch": 2.5119236883942766,
"grad_norm": 10.785510067750703,
"learning_rate": 7.900987677795646e-07,
"loss": 0.48411014676094055,
"step": 1580
},
{
"epoch": 2.5135135135135136,
"grad_norm": 14.022141952676426,
"learning_rate": 7.851150589549483e-07,
"loss": 0.32116425037384033,
"step": 1581
},
{
"epoch": 2.51510333863275,
"grad_norm": 8.850164925121264,
"learning_rate": 7.801457782578947e-07,
"loss": 0.2642374336719513,
"step": 1582
},
{
"epoch": 2.516693163751987,
"grad_norm": 8.03220177579705,
"learning_rate": 7.751909426989296e-07,
"loss": 0.3452329933643341,
"step": 1583
},
{
"epoch": 2.518282988871224,
"grad_norm": 9.267574245471948,
"learning_rate": 7.702505692391332e-07,
"loss": 0.21324273943901062,
"step": 1584
},
{
"epoch": 2.519872813990461,
"grad_norm": 15.753336355835474,
"learning_rate": 7.653246747900794e-07,
"loss": 0.45646774768829346,
"step": 1585
},
{
"epoch": 2.521462639109698,
"grad_norm": 17.20248048464396,
"learning_rate": 7.604132762137773e-07,
"loss": 0.7054089307785034,
"step": 1586
},
{
"epoch": 2.5230524642289347,
"grad_norm": 16.001424238132984,
"learning_rate": 7.555163903226182e-07,
"loss": 0.22410085797309875,
"step": 1587
},
{
"epoch": 2.5246422893481717,
"grad_norm": 37.693778212601856,
"learning_rate": 7.506340338793111e-07,
"loss": 0.45611119270324707,
"step": 1588
},
{
"epoch": 2.5262321144674087,
"grad_norm": 11.700490279132813,
"learning_rate": 7.457662235968283e-07,
"loss": 0.4790339171886444,
"step": 1589
},
{
"epoch": 2.5278219395866453,
"grad_norm": 11.098838776133144,
"learning_rate": 7.409129761383527e-07,
"loss": 0.3785508871078491,
"step": 1590
},
{
"epoch": 2.5294117647058822,
"grad_norm": 16.927801069701665,
"learning_rate": 7.360743081172122e-07,
"loss": 0.4303959608078003,
"step": 1591
},
{
"epoch": 2.5310015898251192,
"grad_norm": 10.982760647223845,
"learning_rate": 7.312502360968305e-07,
"loss": 0.7339632511138916,
"step": 1592
},
{
"epoch": 2.5325914149443562,
"grad_norm": 10.853451470652951,
"learning_rate": 7.26440776590665e-07,
"loss": 0.23932099342346191,
"step": 1593
},
{
"epoch": 2.5341812400635932,
"grad_norm": 12.964493817987583,
"learning_rate": 7.216459460621528e-07,
"loss": 0.6250436305999756,
"step": 1594
},
{
"epoch": 2.53577106518283,
"grad_norm": 7.910019100161251,
"learning_rate": 7.16865760924656e-07,
"loss": 0.1341008096933365,
"step": 1595
},
{
"epoch": 2.537360890302067,
"grad_norm": 13.263447526640324,
"learning_rate": 7.121002375413999e-07,
"loss": 0.36424165964126587,
"step": 1596
},
{
"epoch": 2.538950715421304,
"grad_norm": 14.3513570530778,
"learning_rate": 7.073493922254254e-07,
"loss": 0.2999057471752167,
"step": 1597
},
{
"epoch": 2.5405405405405403,
"grad_norm": 10.559755824923737,
"learning_rate": 7.026132412395247e-07,
"loss": 0.7477350234985352,
"step": 1598
},
{
"epoch": 2.5421303656597773,
"grad_norm": 8.702177987866476,
"learning_rate": 6.978918007961888e-07,
"loss": 0.20698505640029907,
"step": 1599
},
{
"epoch": 2.5437201907790143,
"grad_norm": 7.471293346977418,
"learning_rate": 6.931850870575563e-07,
"loss": 0.2608323097229004,
"step": 1600
},
{
"epoch": 2.5453100158982513,
"grad_norm": 15.229612340693974,
"learning_rate": 6.884931161353509e-07,
"loss": 0.4971431493759155,
"step": 1601
},
{
"epoch": 2.5468998410174883,
"grad_norm": 9.507301452331756,
"learning_rate": 6.838159040908294e-07,
"loss": 0.37410780787467957,
"step": 1602
},
{
"epoch": 2.548489666136725,
"grad_norm": 13.095317314498544,
"learning_rate": 6.791534669347311e-07,
"loss": 0.807184100151062,
"step": 1603
},
{
"epoch": 2.550079491255962,
"grad_norm": 6.730399733139424,
"learning_rate": 6.745058206272132e-07,
"loss": 0.38807761669158936,
"step": 1604
},
{
"epoch": 2.551669316375199,
"grad_norm": 8.515536935569736,
"learning_rate": 6.698729810778065e-07,
"loss": 0.5509893894195557,
"step": 1605
},
{
"epoch": 2.5532591414944354,
"grad_norm": 12.284475731688362,
"learning_rate": 6.652549641453543e-07,
"loss": 0.20167602598667145,
"step": 1606
},
{
"epoch": 2.5548489666136724,
"grad_norm": 12.986517686509385,
"learning_rate": 6.606517856379585e-07,
"loss": 0.695963978767395,
"step": 1607
},
{
"epoch": 2.5564387917329094,
"grad_norm": 10.273584333782026,
"learning_rate": 6.560634613129308e-07,
"loss": 0.6035486459732056,
"step": 1608
},
{
"epoch": 2.5580286168521464,
"grad_norm": 17.874198920320232,
"learning_rate": 6.514900068767316e-07,
"loss": 0.7390936613082886,
"step": 1609
},
{
"epoch": 2.559618441971383,
"grad_norm": 16.7843093457424,
"learning_rate": 6.469314379849212e-07,
"loss": 0.9999498128890991,
"step": 1610
},
{
"epoch": 2.56120826709062,
"grad_norm": 8.422352180286689,
"learning_rate": 6.423877702421038e-07,
"loss": 0.21220804750919342,
"step": 1611
},
{
"epoch": 2.562798092209857,
"grad_norm": 8.76100880894034,
"learning_rate": 6.378590192018752e-07,
"loss": 0.30193108320236206,
"step": 1612
},
{
"epoch": 2.5643879173290935,
"grad_norm": 11.065829417180284,
"learning_rate": 6.333452003667712e-07,
"loss": 0.4609653353691101,
"step": 1613
},
{
"epoch": 2.5659777424483305,
"grad_norm": 7.808157083980439,
"learning_rate": 6.288463291882085e-07,
"loss": 0.1653745174407959,
"step": 1614
},
{
"epoch": 2.5675675675675675,
"grad_norm": 11.188389838014922,
"learning_rate": 6.243624210664406e-07,
"loss": 0.4522320032119751,
"step": 1615
},
{
"epoch": 2.5691573926868045,
"grad_norm": 11.317333524643795,
"learning_rate": 6.198934913504978e-07,
"loss": 0.2544410228729248,
"step": 1616
},
{
"epoch": 2.5707472178060415,
"grad_norm": 12.812176029042877,
"learning_rate": 6.15439555338136e-07,
"loss": 0.6666281223297119,
"step": 1617
},
{
"epoch": 2.572337042925278,
"grad_norm": 16.036796436324735,
"learning_rate": 6.110006282757897e-07,
"loss": 1.2459876537322998,
"step": 1618
},
{
"epoch": 2.573926868044515,
"grad_norm": 10.515581594009006,
"learning_rate": 6.065767253585125e-07,
"loss": 0.3247292637825012,
"step": 1619
},
{
"epoch": 2.575516693163752,
"grad_norm": 12.829504232267913,
"learning_rate": 6.021678617299271e-07,
"loss": 0.709840714931488,
"step": 1620
},
{
"epoch": 2.5771065182829886,
"grad_norm": 13.365765765443042,
"learning_rate": 5.977740524821796e-07,
"loss": 0.3702651858329773,
"step": 1621
},
{
"epoch": 2.5786963434022256,
"grad_norm": 12.440548025842865,
"learning_rate": 5.933953126558772e-07,
"loss": 0.7516118288040161,
"step": 1622
},
{
"epoch": 2.5802861685214626,
"grad_norm": 10.061767061306744,
"learning_rate": 5.890316572400478e-07,
"loss": 0.2932838797569275,
"step": 1623
},
{
"epoch": 2.5818759936406996,
"grad_norm": 10.82001039911602,
"learning_rate": 5.846831011720789e-07,
"loss": 0.45325106382369995,
"step": 1624
},
{
"epoch": 2.5834658187599366,
"grad_norm": 11.537942990883321,
"learning_rate": 5.803496593376722e-07,
"loss": 0.38455528020858765,
"step": 1625
},
{
"epoch": 2.585055643879173,
"grad_norm": 14.921829719693056,
"learning_rate": 5.76031346570794e-07,
"loss": 0.1773259937763214,
"step": 1626
},
{
"epoch": 2.58664546899841,
"grad_norm": 16.04680262394032,
"learning_rate": 5.717281776536166e-07,
"loss": 0.83031165599823,
"step": 1627
},
{
"epoch": 2.588235294117647,
"grad_norm": 15.320530679465007,
"learning_rate": 5.674401673164781e-07,
"loss": 0.39672306180000305,
"step": 1628
},
{
"epoch": 2.5898251192368837,
"grad_norm": 11.056819549654698,
"learning_rate": 5.631673302378238e-07,
"loss": 0.6218395829200745,
"step": 1629
},
{
"epoch": 2.5914149443561207,
"grad_norm": 7.723791379520601,
"learning_rate": 5.589096810441574e-07,
"loss": 0.29606300592422485,
"step": 1630
},
{
"epoch": 2.5930047694753577,
"grad_norm": 18.886174979076156,
"learning_rate": 5.546672343099968e-07,
"loss": 0.5936672687530518,
"step": 1631
},
{
"epoch": 2.5945945945945947,
"grad_norm": 10.563788848015669,
"learning_rate": 5.504400045578167e-07,
"loss": 0.4816802144050598,
"step": 1632
},
{
"epoch": 2.5961844197138317,
"grad_norm": 14.40919010146339,
"learning_rate": 5.462280062580011e-07,
"loss": 0.7849152684211731,
"step": 1633
},
{
"epoch": 2.5977742448330683,
"grad_norm": 12.083284436625968,
"learning_rate": 5.420312538287981e-07,
"loss": 0.23630741238594055,
"step": 1634
},
{
"epoch": 2.5993640699523053,
"grad_norm": 17.052932534514387,
"learning_rate": 5.378497616362638e-07,
"loss": 0.5553884506225586,
"step": 1635
},
{
"epoch": 2.6009538950715423,
"grad_norm": 13.1190492109006,
"learning_rate": 5.3368354399422e-07,
"loss": 0.2689938545227051,
"step": 1636
},
{
"epoch": 2.602543720190779,
"grad_norm": 9.06212736745851,
"learning_rate": 5.295326151641966e-07,
"loss": 0.4750140309333801,
"step": 1637
},
{
"epoch": 2.604133545310016,
"grad_norm": 11.792696999459864,
"learning_rate": 5.253969893553929e-07,
"loss": 0.2415129542350769,
"step": 1638
},
{
"epoch": 2.605723370429253,
"grad_norm": 7.8365374598623765,
"learning_rate": 5.212766807246206e-07,
"loss": 0.20322957634925842,
"step": 1639
},
{
"epoch": 2.60731319554849,
"grad_norm": 12.072105526436122,
"learning_rate": 5.171717033762585e-07,
"loss": 0.7623979449272156,
"step": 1640
},
{
"epoch": 2.6089030206677264,
"grad_norm": 12.667251885854688,
"learning_rate": 5.130820713622076e-07,
"loss": 0.5517823696136475,
"step": 1641
},
{
"epoch": 2.6104928457869634,
"grad_norm": 12.32673641615129,
"learning_rate": 5.090077986818365e-07,
"loss": 0.6303294897079468,
"step": 1642
},
{
"epoch": 2.6120826709062004,
"grad_norm": 6.0750732760489905,
"learning_rate": 5.049488992819373e-07,
"loss": 0.14904797077178955,
"step": 1643
},
{
"epoch": 2.613672496025437,
"grad_norm": 12.847502830626928,
"learning_rate": 5.009053870566793e-07,
"loss": 0.9003888964653015,
"step": 1644
},
{
"epoch": 2.615262321144674,
"grad_norm": 12.970180432873189,
"learning_rate": 4.968772758475554e-07,
"loss": 0.7371312379837036,
"step": 1645
},
{
"epoch": 2.616852146263911,
"grad_norm": 8.484598724820248,
"learning_rate": 4.92864579443344e-07,
"loss": 0.2992517948150635,
"step": 1646
},
{
"epoch": 2.618441971383148,
"grad_norm": 10.894264933359624,
"learning_rate": 4.888673115800519e-07,
"loss": 0.39728879928588867,
"step": 1647
},
{
"epoch": 2.620031796502385,
"grad_norm": 9.56178561855161,
"learning_rate": 4.848854859408731e-07,
"loss": 0.7288751602172852,
"step": 1648
},
{
"epoch": 2.6216216216216215,
"grad_norm": 9.516919345378364,
"learning_rate": 4.809191161561432e-07,
"loss": 0.36588314175605774,
"step": 1649
},
{
"epoch": 2.6232114467408585,
"grad_norm": 13.863795640760882,
"learning_rate": 4.769682158032873e-07,
"loss": 0.3402399718761444,
"step": 1650
},
{
"epoch": 2.6248012718600955,
"grad_norm": 15.993452666820417,
"learning_rate": 4.7303279840677675e-07,
"loss": 0.5849899649620056,
"step": 1651
},
{
"epoch": 2.626391096979332,
"grad_norm": 7.7860113567275056,
"learning_rate": 4.6911287743808486e-07,
"loss": 0.15354162454605103,
"step": 1652
},
{
"epoch": 2.627980922098569,
"grad_norm": 7.792147534654751,
"learning_rate": 4.652084663156364e-07,
"loss": 0.10557040572166443,
"step": 1653
},
{
"epoch": 2.629570747217806,
"grad_norm": 12.969276276510007,
"learning_rate": 4.613195784047653e-07,
"loss": 0.7443052530288696,
"step": 1654
},
{
"epoch": 2.631160572337043,
"grad_norm": 23.001421263013384,
"learning_rate": 4.574462270176666e-07,
"loss": 0.4804832935333252,
"step": 1655
},
{
"epoch": 2.63275039745628,
"grad_norm": 8.452243256757463,
"learning_rate": 4.5358842541335047e-07,
"loss": 0.4740391969680786,
"step": 1656
},
{
"epoch": 2.6343402225755166,
"grad_norm": 9.240565385002999,
"learning_rate": 4.4974618679760164e-07,
"loss": 0.4672290086746216,
"step": 1657
},
{
"epoch": 2.6359300476947536,
"grad_norm": 11.37108360105448,
"learning_rate": 4.4591952432292584e-07,
"loss": 0.9921671152114868,
"step": 1658
},
{
"epoch": 2.6375198728139906,
"grad_norm": 9.649192907468974,
"learning_rate": 4.421084510885143e-07,
"loss": 0.7385834455490112,
"step": 1659
},
{
"epoch": 2.639109697933227,
"grad_norm": 19.59520026392758,
"learning_rate": 4.3831298014019144e-07,
"loss": 0.8268567323684692,
"step": 1660
},
{
"epoch": 2.640699523052464,
"grad_norm": 16.78123363872058,
"learning_rate": 4.34533124470371e-07,
"loss": 0.3472693860530853,
"step": 1661
},
{
"epoch": 2.642289348171701,
"grad_norm": 11.874912683513834,
"learning_rate": 4.3076889701801905e-07,
"loss": 0.2339482307434082,
"step": 1662
},
{
"epoch": 2.643879173290938,
"grad_norm": 8.252398533852787,
"learning_rate": 4.2702031066859993e-07,
"loss": 0.351040780544281,
"step": 1663
},
{
"epoch": 2.645468998410175,
"grad_norm": 10.127133252654218,
"learning_rate": 4.2328737825403645e-07,
"loss": 0.7667810916900635,
"step": 1664
},
{
"epoch": 2.6470588235294117,
"grad_norm": 9.643925839418634,
"learning_rate": 4.195701125526674e-07,
"loss": 0.4055326282978058,
"step": 1665
},
{
"epoch": 2.6486486486486487,
"grad_norm": 14.210118121798061,
"learning_rate": 4.1586852628920095e-07,
"loss": 0.9189319610595703,
"step": 1666
},
{
"epoch": 2.6502384737678857,
"grad_norm": 8.495696836045818,
"learning_rate": 4.121826321346739e-07,
"loss": 0.49973970651626587,
"step": 1667
},
{
"epoch": 2.6518282988871222,
"grad_norm": 10.94379871167478,
"learning_rate": 4.085124427064052e-07,
"loss": 0.5071969628334045,
"step": 1668
},
{
"epoch": 2.6534181240063592,
"grad_norm": 11.065890666647261,
"learning_rate": 4.0485797056795675e-07,
"loss": 0.5154078006744385,
"step": 1669
},
{
"epoch": 2.6550079491255962,
"grad_norm": 10.462810011955655,
"learning_rate": 4.0121922822908556e-07,
"loss": 0.6714562177658081,
"step": 1670
},
{
"epoch": 2.6565977742448332,
"grad_norm": 8.423522122403073,
"learning_rate": 3.975962281457035e-07,
"loss": 0.32664385437965393,
"step": 1671
},
{
"epoch": 2.65818759936407,
"grad_norm": 11.128517439642401,
"learning_rate": 3.939889827198362e-07,
"loss": 0.4714875817298889,
"step": 1672
},
{
"epoch": 2.659777424483307,
"grad_norm": 13.923378222882656,
"learning_rate": 3.9039750429957835e-07,
"loss": 0.33897116780281067,
"step": 1673
},
{
"epoch": 2.661367249602544,
"grad_norm": 20.032379341723246,
"learning_rate": 3.868218051790501e-07,
"loss": 2.706486940383911,
"step": 1674
},
{
"epoch": 2.6629570747217803,
"grad_norm": 10.415209305400058,
"learning_rate": 3.8326189759836097e-07,
"loss": 0.6098494529724121,
"step": 1675
},
{
"epoch": 2.6645468998410173,
"grad_norm": 10.828557752450559,
"learning_rate": 3.7971779374355866e-07,
"loss": 0.3058406412601471,
"step": 1676
},
{
"epoch": 2.6661367249602543,
"grad_norm": 11.462076872010293,
"learning_rate": 3.7618950574659807e-07,
"loss": 0.3489352762699127,
"step": 1677
},
{
"epoch": 2.6677265500794913,
"grad_norm": 6.422738759532046,
"learning_rate": 3.7267704568529015e-07,
"loss": 0.23481842875480652,
"step": 1678
},
{
"epoch": 2.6693163751987283,
"grad_norm": 17.138628565054685,
"learning_rate": 3.6918042558326597e-07,
"loss": 0.5964177846908569,
"step": 1679
},
{
"epoch": 2.670906200317965,
"grad_norm": 15.512416207448021,
"learning_rate": 3.6569965740993475e-07,
"loss": 0.4641593098640442,
"step": 1680
},
{
"epoch": 2.672496025437202,
"grad_norm": 17.229910284953995,
"learning_rate": 3.622347530804415e-07,
"loss": 0.603171706199646,
"step": 1681
},
{
"epoch": 2.674085850556439,
"grad_norm": 15.562808091273189,
"learning_rate": 3.5878572445562754e-07,
"loss": 0.6970657110214233,
"step": 1682
},
{
"epoch": 2.6756756756756754,
"grad_norm": 12.024300997243945,
"learning_rate": 3.553525833419902e-07,
"loss": 0.4082297384738922,
"step": 1683
},
{
"epoch": 2.6772655007949124,
"grad_norm": 10.420633401843734,
"learning_rate": 3.519353414916404e-07,
"loss": 0.2764531970024109,
"step": 1684
},
{
"epoch": 2.6788553259141494,
"grad_norm": 15.681718598155335,
"learning_rate": 3.48534010602265e-07,
"loss": 0.8584216237068176,
"step": 1685
},
{
"epoch": 2.6804451510333864,
"grad_norm": 9.83380597552202,
"learning_rate": 3.4514860231708414e-07,
"loss": 0.2912900149822235,
"step": 1686
},
{
"epoch": 2.6820349761526234,
"grad_norm": 24.192951811930875,
"learning_rate": 3.4177912822481286e-07,
"loss": 0.5030669569969177,
"step": 1687
},
{
"epoch": 2.68362480127186,
"grad_norm": 11.88358896966254,
"learning_rate": 3.3842559985962363e-07,
"loss": 1.0259262323379517,
"step": 1688
},
{
"epoch": 2.685214626391097,
"grad_norm": 11.567529976995225,
"learning_rate": 3.3508802870109993e-07,
"loss": 0.3652951717376709,
"step": 1689
},
{
"epoch": 2.686804451510334,
"grad_norm": 12.496576402718473,
"learning_rate": 3.3176642617420817e-07,
"loss": 0.4120873212814331,
"step": 1690
},
{
"epoch": 2.6883942766295705,
"grad_norm": 7.095600452380263,
"learning_rate": 3.2846080364924373e-07,
"loss": 0.1631387323141098,
"step": 1691
},
{
"epoch": 2.6899841017488075,
"grad_norm": 9.065489804730754,
"learning_rate": 3.251711724418072e-07,
"loss": 1.10606050491333,
"step": 1692
},
{
"epoch": 2.6915739268680445,
"grad_norm": 13.879363856578601,
"learning_rate": 3.218975438127558e-07,
"loss": 0.4376085698604584,
"step": 1693
},
{
"epoch": 2.6931637519872815,
"grad_norm": 10.554489461431915,
"learning_rate": 3.1863992896816634e-07,
"loss": 0.3944145441055298,
"step": 1694
},
{
"epoch": 2.6947535771065185,
"grad_norm": 9.418832517818354,
"learning_rate": 3.153983390593024e-07,
"loss": 0.32152724266052246,
"step": 1695
},
{
"epoch": 2.696343402225755,
"grad_norm": 8.092387137721639,
"learning_rate": 3.1217278518256844e-07,
"loss": 0.28785014152526855,
"step": 1696
},
{
"epoch": 2.697933227344992,
"grad_norm": 10.686618269640556,
"learning_rate": 3.089632783794755e-07,
"loss": 0.3542977273464203,
"step": 1697
},
{
"epoch": 2.699523052464229,
"grad_norm": 10.530056142263104,
"learning_rate": 3.0576982963660575e-07,
"loss": 0.45689189434051514,
"step": 1698
},
{
"epoch": 2.7011128775834656,
"grad_norm": 11.937948434636159,
"learning_rate": 3.0259244988556977e-07,
"loss": 0.657897412776947,
"step": 1699
},
{
"epoch": 2.7027027027027026,
"grad_norm": 7.739921962689363,
"learning_rate": 2.9943115000297453e-07,
"loss": 0.17261117696762085,
"step": 1700
},
{
"epoch": 2.7042925278219396,
"grad_norm": 11.964371899552889,
"learning_rate": 2.962859408103808e-07,
"loss": 0.5867359638214111,
"step": 1701
},
{
"epoch": 2.7058823529411766,
"grad_norm": 22.050755638219414,
"learning_rate": 2.93156833074269e-07,
"loss": 0.38635432720184326,
"step": 1702
},
{
"epoch": 2.7074721780604136,
"grad_norm": 12.547226895622131,
"learning_rate": 2.9004383750600495e-07,
"loss": 0.679887056350708,
"step": 1703
},
{
"epoch": 2.70906200317965,
"grad_norm": 14.929427715187519,
"learning_rate": 2.869469647617967e-07,
"loss": 0.9920786619186401,
"step": 1704
},
{
"epoch": 2.710651828298887,
"grad_norm": 11.228509815017409,
"learning_rate": 2.8386622544266273e-07,
"loss": 0.34728148579597473,
"step": 1705
},
{
"epoch": 2.7122416534181237,
"grad_norm": 15.131913430676436,
"learning_rate": 2.808016300943961e-07,
"loss": 0.5798835158348083,
"step": 1706
},
{
"epoch": 2.7138314785373607,
"grad_norm": 11.26085110975895,
"learning_rate": 2.777531892075253e-07,
"loss": 0.4404895603656769,
"step": 1707
},
{
"epoch": 2.7154213036565977,
"grad_norm": 12.7970246377021,
"learning_rate": 2.7472091321728067e-07,
"loss": 0.2727869749069214,
"step": 1708
},
{
"epoch": 2.7170111287758347,
"grad_norm": 6.175092667265632,
"learning_rate": 2.717048125035582e-07,
"loss": 0.4119480550289154,
"step": 1709
},
{
"epoch": 2.7186009538950717,
"grad_norm": 9.03462824372083,
"learning_rate": 2.6870489739088124e-07,
"loss": 0.3013087511062622,
"step": 1710
},
{
"epoch": 2.7201907790143083,
"grad_norm": 11.724798586268738,
"learning_rate": 2.6572117814837096e-07,
"loss": 1.1680785417556763,
"step": 1711
},
{
"epoch": 2.7217806041335453,
"grad_norm": 14.084016441362202,
"learning_rate": 2.6275366498970553e-07,
"loss": 0.44381779432296753,
"step": 1712
},
{
"epoch": 2.7233704292527823,
"grad_norm": 7.543543684045383,
"learning_rate": 2.598023680730899e-07,
"loss": 0.39759576320648193,
"step": 1713
},
{
"epoch": 2.724960254372019,
"grad_norm": 12.813558667772158,
"learning_rate": 2.568672975012154e-07,
"loss": 0.29264506697654724,
"step": 1714
},
{
"epoch": 2.726550079491256,
"grad_norm": 15.940170547428268,
"learning_rate": 2.5394846332123026e-07,
"loss": 0.38988327980041504,
"step": 1715
},
{
"epoch": 2.728139904610493,
"grad_norm": 9.447351866023746,
"learning_rate": 2.510458755247042e-07,
"loss": 0.3465936779975891,
"step": 1716
},
{
"epoch": 2.72972972972973,
"grad_norm": 8.66900468551076,
"learning_rate": 2.4815954404759034e-07,
"loss": 0.32057106494903564,
"step": 1717
},
{
"epoch": 2.731319554848967,
"grad_norm": 11.317074550394812,
"learning_rate": 2.4528947877019706e-07,
"loss": 0.4713735282421112,
"step": 1718
},
{
"epoch": 2.7329093799682034,
"grad_norm": 11.300950450913268,
"learning_rate": 2.424356895171509e-07,
"loss": 0.8660344481468201,
"step": 1719
},
{
"epoch": 2.7344992050874404,
"grad_norm": 12.591955390910721,
"learning_rate": 2.3959818605736095e-07,
"loss": 0.5647845268249512,
"step": 1720
},
{
"epoch": 2.7360890302066774,
"grad_norm": 10.689880018027425,
"learning_rate": 2.3677697810399135e-07,
"loss": 0.7428934574127197,
"step": 1721
},
{
"epoch": 2.737678855325914,
"grad_norm": 14.450292631625917,
"learning_rate": 2.3397207531442144e-07,
"loss": 0.6741989850997925,
"step": 1722
},
{
"epoch": 2.739268680445151,
"grad_norm": 10.934277929099444,
"learning_rate": 2.3118348729021856e-07,
"loss": 0.3719398081302643,
"step": 1723
},
{
"epoch": 2.740858505564388,
"grad_norm": 8.582688526207399,
"learning_rate": 2.284112235771002e-07,
"loss": 0.35930246114730835,
"step": 1724
},
{
"epoch": 2.742448330683625,
"grad_norm": 11.584902631146877,
"learning_rate": 2.2565529366490312e-07,
"loss": 0.35735633969306946,
"step": 1725
},
{
"epoch": 2.744038155802862,
"grad_norm": 8.992750559088263,
"learning_rate": 2.229157069875537e-07,
"loss": 0.32046449184417725,
"step": 1726
},
{
"epoch": 2.7456279809220985,
"grad_norm": 11.202624312115873,
"learning_rate": 2.2019247292303148e-07,
"loss": 0.33610597252845764,
"step": 1727
},
{
"epoch": 2.7472178060413355,
"grad_norm": 11.550655643274695,
"learning_rate": 2.174856007933379e-07,
"loss": 0.3931558132171631,
"step": 1728
},
{
"epoch": 2.7488076311605725,
"grad_norm": 14.989140380847523,
"learning_rate": 2.1479509986446822e-07,
"loss": 0.32899802923202515,
"step": 1729
},
{
"epoch": 2.750397456279809,
"grad_norm": 9.482109135796849,
"learning_rate": 2.1212097934637356e-07,
"loss": 0.7810277938842773,
"step": 1730
},
{
"epoch": 2.751987281399046,
"grad_norm": 15.35055441486757,
"learning_rate": 2.094632483929354e-07,
"loss": 0.5776809453964233,
"step": 1731
},
{
"epoch": 2.753577106518283,
"grad_norm": 8.57731695828125,
"learning_rate": 2.068219161019297e-07,
"loss": 0.6691749095916748,
"step": 1732
},
{
"epoch": 2.75516693163752,
"grad_norm": 7.381290455070177,
"learning_rate": 2.0419699151499773e-07,
"loss": 0.27193355560302734,
"step": 1733
},
{
"epoch": 2.756756756756757,
"grad_norm": 5.699874460976283,
"learning_rate": 2.015884836176163e-07,
"loss": 0.1572606861591339,
"step": 1734
},
{
"epoch": 2.7583465818759936,
"grad_norm": 10.571373513647107,
"learning_rate": 1.9899640133906384e-07,
"loss": 0.436165452003479,
"step": 1735
},
{
"epoch": 2.7599364069952306,
"grad_norm": 12.62306507671066,
"learning_rate": 1.964207535523921e-07,
"loss": 1.0329084396362305,
"step": 1736
},
{
"epoch": 2.7615262321144676,
"grad_norm": 20.404350098474076,
"learning_rate": 1.938615490743967e-07,
"loss": 0.9515388011932373,
"step": 1737
},
{
"epoch": 2.763116057233704,
"grad_norm": 16.516993328724855,
"learning_rate": 1.9131879666558385e-07,
"loss": 0.5335630774497986,
"step": 1738
},
{
"epoch": 2.764705882352941,
"grad_norm": 9.50707389592946,
"learning_rate": 1.8879250503014367e-07,
"loss": 0.23655389249324799,
"step": 1739
},
{
"epoch": 2.766295707472178,
"grad_norm": 8.11653483870117,
"learning_rate": 1.86282682815917e-07,
"loss": 0.24722740054130554,
"step": 1740
},
{
"epoch": 2.767885532591415,
"grad_norm": 9.555643771432486,
"learning_rate": 1.8378933861436855e-07,
"loss": 0.4017741084098816,
"step": 1741
},
{
"epoch": 2.7694753577106517,
"grad_norm": 11.245709313022,
"learning_rate": 1.813124809605571e-07,
"loss": 0.8513485193252563,
"step": 1742
},
{
"epoch": 2.7710651828298887,
"grad_norm": 10.289858176218264,
"learning_rate": 1.788521183331049e-07,
"loss": 0.4348328709602356,
"step": 1743
},
{
"epoch": 2.7726550079491257,
"grad_norm": 13.033295111219543,
"learning_rate": 1.7640825915416994e-07,
"loss": 0.34685057401657104,
"step": 1744
},
{
"epoch": 2.7742448330683622,
"grad_norm": 14.12925895553136,
"learning_rate": 1.739809117894148e-07,
"loss": 1.3768744468688965,
"step": 1745
},
{
"epoch": 2.7758346581875992,
"grad_norm": 10.75066455008062,
"learning_rate": 1.7157008454798395e-07,
"loss": 0.5484975576400757,
"step": 1746
},
{
"epoch": 2.7774244833068362,
"grad_norm": 12.245859424903898,
"learning_rate": 1.6917578568246717e-07,
"loss": 0.6153717041015625,
"step": 1747
},
{
"epoch": 2.779014308426073,
"grad_norm": 8.749615472468598,
"learning_rate": 1.6679802338887662e-07,
"loss": 0.39723843336105347,
"step": 1748
},
{
"epoch": 2.78060413354531,
"grad_norm": 10.912593855202864,
"learning_rate": 1.644368058066187e-07,
"loss": 0.5568326711654663,
"step": 1749
},
{
"epoch": 2.7821939586645468,
"grad_norm": 6.709663791277373,
"learning_rate": 1.6209214101846394e-07,
"loss": 0.1708815097808838,
"step": 1750
},
{
"epoch": 2.7837837837837838,
"grad_norm": 16.67528928478053,
"learning_rate": 1.597640370505199e-07,
"loss": 0.4418516755104065,
"step": 1751
},
{
"epoch": 2.7853736089030208,
"grad_norm": 14.998369237081626,
"learning_rate": 1.5745250187220617e-07,
"loss": 1.134207844734192,
"step": 1752
},
{
"epoch": 2.7869634340222573,
"grad_norm": 11.137562040895848,
"learning_rate": 1.5515754339622214e-07,
"loss": 0.35479578375816345,
"step": 1753
},
{
"epoch": 2.7885532591414943,
"grad_norm": 11.636705668013217,
"learning_rate": 1.5287916947852643e-07,
"loss": 0.46284008026123047,
"step": 1754
},
{
"epoch": 2.7901430842607313,
"grad_norm": 16.63965921358452,
"learning_rate": 1.506173879183026e-07,
"loss": 1.0378285646438599,
"step": 1755
},
{
"epoch": 2.7917329093799683,
"grad_norm": 8.155129987603422,
"learning_rate": 1.4837220645793905e-07,
"loss": 0.39164969325065613,
"step": 1756
},
{
"epoch": 2.7933227344992053,
"grad_norm": 10.211018022040458,
"learning_rate": 1.461436327829996e-07,
"loss": 0.48248136043548584,
"step": 1757
},
{
"epoch": 2.794912559618442,
"grad_norm": 15.348307269375146,
"learning_rate": 1.4393167452219637e-07,
"loss": 0.2933463156223297,
"step": 1758
},
{
"epoch": 2.796502384737679,
"grad_norm": 9.875084012501972,
"learning_rate": 1.4173633924736364e-07,
"loss": 0.20340853929519653,
"step": 1759
},
{
"epoch": 2.798092209856916,
"grad_norm": 9.633227056455548,
"learning_rate": 1.3955763447343618e-07,
"loss": 0.5475227236747742,
"step": 1760
},
{
"epoch": 2.7996820349761524,
"grad_norm": 12.958123384803194,
"learning_rate": 1.3739556765841712e-07,
"loss": 0.4999805688858032,
"step": 1761
},
{
"epoch": 2.8012718600953894,
"grad_norm": 8.208968385354538,
"learning_rate": 1.3525014620335786e-07,
"loss": 0.23228999972343445,
"step": 1762
},
{
"epoch": 2.8028616852146264,
"grad_norm": 10.342733229057272,
"learning_rate": 1.3312137745232878e-07,
"loss": 0.34087157249450684,
"step": 1763
},
{
"epoch": 2.8044515103338634,
"grad_norm": 7.714862229275131,
"learning_rate": 1.3100926869239583e-07,
"loss": 0.2724880576133728,
"step": 1764
},
{
"epoch": 2.8060413354531004,
"grad_norm": 9.850744407589888,
"learning_rate": 1.289138271535978e-07,
"loss": 0.3237707316875458,
"step": 1765
},
{
"epoch": 2.807631160572337,
"grad_norm": 12.26752422629431,
"learning_rate": 1.2683506000891634e-07,
"loss": 0.3386530876159668,
"step": 1766
},
{
"epoch": 2.809220985691574,
"grad_norm": 9.679795722605048,
"learning_rate": 1.2477297437425596e-07,
"loss": 0.26200276613235474,
"step": 1767
},
{
"epoch": 2.810810810810811,
"grad_norm": 8.943234925828914,
"learning_rate": 1.2272757730841744e-07,
"loss": 0.5326122045516968,
"step": 1768
},
{
"epoch": 2.8124006359300475,
"grad_norm": 13.672979508806772,
"learning_rate": 1.2069887581307615e-07,
"loss": 0.4719829857349396,
"step": 1769
},
{
"epoch": 2.8139904610492845,
"grad_norm": 14.36269478305396,
"learning_rate": 1.1868687683275259e-07,
"loss": 0.3456736207008362,
"step": 1770
},
{
"epoch": 2.8155802861685215,
"grad_norm": 9.423080558364749,
"learning_rate": 1.1669158725479579e-07,
"loss": 0.4019232988357544,
"step": 1771
},
{
"epoch": 2.8171701112877585,
"grad_norm": 9.173722366323783,
"learning_rate": 1.1471301390935497e-07,
"loss": 0.6657088398933411,
"step": 1772
},
{
"epoch": 2.818759936406995,
"grad_norm": 9.737309689447205,
"learning_rate": 1.1275116356935622e-07,
"loss": 1.166163444519043,
"step": 1773
},
{
"epoch": 2.820349761526232,
"grad_norm": 9.741914720900354,
"learning_rate": 1.1080604295048203e-07,
"loss": 0.5329183340072632,
"step": 1774
},
{
"epoch": 2.821939586645469,
"grad_norm": 12.579467080243882,
"learning_rate": 1.0887765871114731e-07,
"loss": 0.3091738522052765,
"step": 1775
},
{
"epoch": 2.8235294117647056,
"grad_norm": 6.7573935666940015,
"learning_rate": 1.0696601745247337e-07,
"loss": 0.20654040575027466,
"step": 1776
},
{
"epoch": 2.8251192368839426,
"grad_norm": 11.409061390186178,
"learning_rate": 1.0507112571827072e-07,
"loss": 0.5325148701667786,
"step": 1777
},
{
"epoch": 2.8267090620031796,
"grad_norm": 13.405759164053116,
"learning_rate": 1.0319298999501293e-07,
"loss": 0.9440799355506897,
"step": 1778
},
{
"epoch": 2.8282988871224166,
"grad_norm": 6.761965750103733,
"learning_rate": 1.0133161671181447e-07,
"loss": 0.17241652309894562,
"step": 1779
},
{
"epoch": 2.8298887122416536,
"grad_norm": 9.47481619139041,
"learning_rate": 9.948701224041124e-08,
"loss": 0.28443360328674316,
"step": 1780
},
{
"epoch": 2.83147853736089,
"grad_norm": 7.723901421020683,
"learning_rate": 9.765918289513731e-08,
"loss": 0.31056225299835205,
"step": 1781
},
{
"epoch": 2.833068362480127,
"grad_norm": 11.520132965767612,
"learning_rate": 9.584813493290157e-08,
"loss": 0.3421097993850708,
"step": 1782
},
{
"epoch": 2.834658187599364,
"grad_norm": 12.072702849486587,
"learning_rate": 9.405387455316884e-08,
"loss": 0.3810597062110901,
"step": 1783
},
{
"epoch": 2.8362480127186007,
"grad_norm": 12.027592705127029,
"learning_rate": 9.227640789793823e-08,
"loss": 0.3737994432449341,
"step": 1784
},
{
"epoch": 2.8378378378378377,
"grad_norm": 7.216954866434148,
"learning_rate": 9.051574105172101e-08,
"loss": 0.6682506799697876,
"step": 1785
},
{
"epoch": 2.8394276629570747,
"grad_norm": 12.646397663815016,
"learning_rate": 8.877188004152104e-08,
"loss": 0.28744667768478394,
"step": 1786
},
{
"epoch": 2.8410174880763117,
"grad_norm": 10.695651972504482,
"learning_rate": 8.704483083681159e-08,
"loss": 0.25048574805259705,
"step": 1787
},
{
"epoch": 2.8426073131955487,
"grad_norm": 16.61769931593027,
"learning_rate": 8.533459934952026e-08,
"loss": 0.5408765077590942,
"step": 1788
},
{
"epoch": 2.8441971383147853,
"grad_norm": 9.70944893214215,
"learning_rate": 8.364119143400185e-08,
"loss": 0.9409646987915039,
"step": 1789
},
{
"epoch": 2.8457869634340223,
"grad_norm": 9.090433208717643,
"learning_rate": 8.196461288702384e-08,
"loss": 0.8656669855117798,
"step": 1790
},
{
"epoch": 2.8473767885532593,
"grad_norm": 7.850531662470157,
"learning_rate": 8.030486944774374e-08,
"loss": 0.2649431824684143,
"step": 1791
},
{
"epoch": 2.848966613672496,
"grad_norm": 12.160314277952102,
"learning_rate": 7.866196679768956e-08,
"loss": 0.3169953525066376,
"step": 1792
},
{
"epoch": 2.850556438791733,
"grad_norm": 12.203789685459478,
"learning_rate": 7.703591056074377e-08,
"loss": 0.3425353765487671,
"step": 1793
},
{
"epoch": 2.85214626391097,
"grad_norm": 9.92531773421057,
"learning_rate": 7.542670630311721e-08,
"loss": 0.22243787348270416,
"step": 1794
},
{
"epoch": 2.853736089030207,
"grad_norm": 10.55218952704597,
"learning_rate": 7.383435953333684e-08,
"loss": 0.42897993326187134,
"step": 1795
},
{
"epoch": 2.855325914149444,
"grad_norm": 8.544479560565039,
"learning_rate": 7.225887570222412e-08,
"loss": 0.2135796844959259,
"step": 1796
},
{
"epoch": 2.8569157392686804,
"grad_norm": 8.879226711644062,
"learning_rate": 7.070026020287446e-08,
"loss": 0.3496057391166687,
"step": 1797
},
{
"epoch": 2.8585055643879174,
"grad_norm": 12.957349298601518,
"learning_rate": 6.91585183706428e-08,
"loss": 0.8193036317825317,
"step": 1798
},
{
"epoch": 2.8600953895071544,
"grad_norm": 10.197298296785444,
"learning_rate": 6.76336554831214e-08,
"loss": 0.3923993706703186,
"step": 1799
},
{
"epoch": 2.861685214626391,
"grad_norm": 16.559874726296197,
"learning_rate": 6.612567676012538e-08,
"loss": 1.1482793092727661,
"step": 1800
},
{
"epoch": 2.863275039745628,
"grad_norm": 7.444020657790237,
"learning_rate": 6.463458736367111e-08,
"loss": 0.4067334234714508,
"step": 1801
},
{
"epoch": 2.864864864864865,
"grad_norm": 9.179057916272,
"learning_rate": 6.316039239796235e-08,
"loss": 0.4661983549594879,
"step": 1802
},
{
"epoch": 2.866454689984102,
"grad_norm": 15.313397406588937,
"learning_rate": 6.170309690937015e-08,
"loss": 1.1066529750823975,
"step": 1803
},
{
"epoch": 2.868044515103339,
"grad_norm": 10.898770814177196,
"learning_rate": 6.02627058864158e-08,
"loss": 0.5644552707672119,
"step": 1804
},
{
"epoch": 2.8696343402225755,
"grad_norm": 13.306162346511908,
"learning_rate": 5.883922425975464e-08,
"loss": 0.8384277820587158,
"step": 1805
},
{
"epoch": 2.8712241653418125,
"grad_norm": 9.004595737987405,
"learning_rate": 5.743265690215938e-08,
"loss": 0.41449958086013794,
"step": 1806
},
{
"epoch": 2.872813990461049,
"grad_norm": 8.57015304482371,
"learning_rate": 5.604300862850187e-08,
"loss": 0.6183022856712341,
"step": 1807
},
{
"epoch": 2.874403815580286,
"grad_norm": 8.604669091621398,
"learning_rate": 5.467028419573861e-08,
"loss": 0.4073048532009125,
"step": 1808
},
{
"epoch": 2.875993640699523,
"grad_norm": 10.173799114716752,
"learning_rate": 5.331448830289354e-08,
"loss": 0.4159020185470581,
"step": 1809
},
{
"epoch": 2.87758346581876,
"grad_norm": 11.635023497299457,
"learning_rate": 5.19756255910403e-08,
"loss": 0.5088472962379456,
"step": 1810
},
{
"epoch": 2.879173290937997,
"grad_norm": 15.532805142667417,
"learning_rate": 5.0653700643290006e-08,
"loss": 1.051582932472229,
"step": 1811
},
{
"epoch": 2.8807631160572336,
"grad_norm": 12.281460786775908,
"learning_rate": 4.934871798477236e-08,
"loss": 0.305294394493103,
"step": 1812
},
{
"epoch": 2.8823529411764706,
"grad_norm": 13.511513272154906,
"learning_rate": 4.806068208262071e-08,
"loss": 0.270668625831604,
"step": 1813
},
{
"epoch": 2.8839427662957076,
"grad_norm": 12.19454070516872,
"learning_rate": 4.6789597345959223e-08,
"loss": 0.38206809759140015,
"step": 1814
},
{
"epoch": 2.885532591414944,
"grad_norm": 10.512867995816194,
"learning_rate": 4.5535468125883496e-08,
"loss": 0.4620250165462494,
"step": 1815
},
{
"epoch": 2.887122416534181,
"grad_norm": 10.831279386109593,
"learning_rate": 4.429829871545055e-08,
"loss": 0.5227418541908264,
"step": 1816
},
{
"epoch": 2.888712241653418,
"grad_norm": 6.535067821791534,
"learning_rate": 4.3078093349659955e-08,
"loss": 0.08256521075963974,
"step": 1817
},
{
"epoch": 2.890302066772655,
"grad_norm": 8.583564015284736,
"learning_rate": 4.187485620544163e-08,
"loss": 0.3790075182914734,
"step": 1818
},
{
"epoch": 2.891891891891892,
"grad_norm": 7.114382474719892,
"learning_rate": 4.068859140164083e-08,
"loss": 0.44887927174568176,
"step": 1819
},
{
"epoch": 2.8934817170111287,
"grad_norm": 9.563998007491127,
"learning_rate": 3.9519302999004305e-08,
"loss": 0.2510707378387451,
"step": 1820
},
{
"epoch": 2.8950715421303657,
"grad_norm": 9.712756564236205,
"learning_rate": 3.836699500016583e-08,
"loss": 0.43258237838745117,
"step": 1821
},
{
"epoch": 2.8966613672496027,
"grad_norm": 12.81052648821815,
"learning_rate": 3.7231671349634015e-08,
"loss": 0.2630723714828491,
"step": 1822
},
{
"epoch": 2.898251192368839,
"grad_norm": 14.894067536846054,
"learning_rate": 3.611333593377564e-08,
"loss": 0.2762864828109741,
"step": 1823
},
{
"epoch": 2.899841017488076,
"grad_norm": 11.847490898397657,
"learning_rate": 3.501199258080734e-08,
"loss": 0.4702332019805908,
"step": 1824
},
{
"epoch": 2.901430842607313,
"grad_norm": 9.937636788408996,
"learning_rate": 3.3927645060776725e-08,
"loss": 0.25099214911460876,
"step": 1825
},
{
"epoch": 2.90302066772655,
"grad_norm": 10.55993956444248,
"learning_rate": 3.286029708555405e-08,
"loss": 0.4693886339664459,
"step": 1826
},
{
"epoch": 2.904610492845787,
"grad_norm": 12.608332995664167,
"learning_rate": 3.1809952308818336e-08,
"loss": 0.2815450429916382,
"step": 1827
},
{
"epoch": 2.9062003179650238,
"grad_norm": 8.572207822982811,
"learning_rate": 3.077661432604184e-08,
"loss": 0.2545127272605896,
"step": 1828
},
{
"epoch": 2.9077901430842608,
"grad_norm": 15.921191531162503,
"learning_rate": 2.976028667448283e-08,
"loss": 1.7518559694290161,
"step": 1829
},
{
"epoch": 2.9093799682034978,
"grad_norm": 9.007500414306167,
"learning_rate": 2.8760972833170032e-08,
"loss": 0.34792059659957886,
"step": 1830
},
{
"epoch": 2.9109697933227343,
"grad_norm": 13.566823435631076,
"learning_rate": 2.7778676222890433e-08,
"loss": 0.5375604629516602,
"step": 1831
},
{
"epoch": 2.9125596184419713,
"grad_norm": 9.262644800722068,
"learning_rate": 2.6813400206180394e-08,
"loss": 0.6013987064361572,
"step": 1832
},
{
"epoch": 2.9141494435612083,
"grad_norm": 11.66773133548321,
"learning_rate": 2.586514808731122e-08,
"loss": 0.207585871219635,
"step": 1833
},
{
"epoch": 2.9157392686804453,
"grad_norm": 8.883715589798319,
"learning_rate": 2.4933923112279712e-08,
"loss": 0.22254782915115356,
"step": 1834
},
{
"epoch": 2.9173290937996823,
"grad_norm": 13.622198817526261,
"learning_rate": 2.4019728468797077e-08,
"loss": 0.6427359580993652,
"step": 1835
},
{
"epoch": 2.918918918918919,
"grad_norm": 8.570336486844115,
"learning_rate": 2.31225672862756e-08,
"loss": 0.3003300726413727,
"step": 1836
},
{
"epoch": 2.920508744038156,
"grad_norm": 12.493611999424541,
"learning_rate": 2.224244263582087e-08,
"loss": 0.39228111505508423,
"step": 1837
},
{
"epoch": 2.9220985691573924,
"grad_norm": 23.237304392000453,
"learning_rate": 2.137935753022069e-08,
"loss": 1.1374372243881226,
"step": 1838
},
{
"epoch": 2.9236883942766294,
"grad_norm": 9.929956522519397,
"learning_rate": 2.053331492393229e-08,
"loss": 0.24724097549915314,
"step": 1839
},
{
"epoch": 2.9252782193958664,
"grad_norm": 13.859185232682037,
"learning_rate": 1.9704317713076236e-08,
"loss": 0.5617753863334656,
"step": 1840
},
{
"epoch": 2.9268680445151034,
"grad_norm": 8.689194933842717,
"learning_rate": 1.8892368735422552e-08,
"loss": 0.1591765284538269,
"step": 1841
},
{
"epoch": 2.9284578696343404,
"grad_norm": 9.592978204928395,
"learning_rate": 1.8097470770384596e-08,
"loss": 0.685766339302063,
"step": 1842
},
{
"epoch": 2.930047694753577,
"grad_norm": 8.85441695249632,
"learning_rate": 1.7319626539005762e-08,
"loss": 0.30965808033943176,
"step": 1843
},
{
"epoch": 2.931637519872814,
"grad_norm": 9.140691655581454,
"learning_rate": 1.655883870395336e-08,
"loss": 0.25469398498535156,
"step": 1844
},
{
"epoch": 2.933227344992051,
"grad_norm": 14.984105659136183,
"learning_rate": 1.5815109869509183e-08,
"loss": 0.31988632678985596,
"step": 1845
},
{
"epoch": 2.9348171701112875,
"grad_norm": 8.04716745056648,
"learning_rate": 1.508844258155728e-08,
"loss": 0.5178359150886536,
"step": 1846
},
{
"epoch": 2.9364069952305245,
"grad_norm": 11.080451900012784,
"learning_rate": 1.4378839327580663e-08,
"loss": 0.31709960103034973,
"step": 1847
},
{
"epoch": 2.9379968203497615,
"grad_norm": 34.195811485082544,
"learning_rate": 1.3686302536647378e-08,
"loss": 0.7739929556846619,
"step": 1848
},
{
"epoch": 2.9395866454689985,
"grad_norm": 10.74556954521327,
"learning_rate": 1.3010834579405552e-08,
"loss": 0.2143116295337677,
"step": 1849
},
{
"epoch": 2.9411764705882355,
"grad_norm": 11.30842772051544,
"learning_rate": 1.2352437768074487e-08,
"loss": 0.3580181896686554,
"step": 1850
},
{
"epoch": 2.942766295707472,
"grad_norm": 9.620337397959851,
"learning_rate": 1.1711114356436903e-08,
"loss": 0.5074045062065125,
"step": 1851
},
{
"epoch": 2.944356120826709,
"grad_norm": 9.053749272015484,
"learning_rate": 1.1086866539830044e-08,
"loss": 0.2650204002857208,
"step": 1852
},
{
"epoch": 2.945945945945946,
"grad_norm": 9.516284200897292,
"learning_rate": 1.0479696455139576e-08,
"loss": 0.340106725692749,
"step": 1853
},
{
"epoch": 2.9475357710651826,
"grad_norm": 12.839207556626045,
"learning_rate": 9.889606180792378e-09,
"loss": 1.0296393632888794,
"step": 1854
},
{
"epoch": 2.9491255961844196,
"grad_norm": 13.736410843682803,
"learning_rate": 9.316597736747091e-09,
"loss": 0.8265128135681152,
"step": 1855
},
{
"epoch": 2.9507154213036566,
"grad_norm": 8.982349734294708,
"learning_rate": 8.7606730844908e-09,
"loss": 0.2861822247505188,
"step": 1856
},
{
"epoch": 2.9523052464228936,
"grad_norm": 10.159889957225653,
"learning_rate": 8.221834127029593e-09,
"loss": 0.670875072479248,
"step": 1857
},
{
"epoch": 2.9538950715421306,
"grad_norm": 13.273432006247354,
"learning_rate": 7.700082708883006e-09,
"loss": 0.9394416213035583,
"step": 1858
},
{
"epoch": 2.955484896661367,
"grad_norm": 9.790433583461628,
"learning_rate": 7.1954206160768096e-09,
"loss": 0.9223511219024658,
"step": 1859
},
{
"epoch": 2.957074721780604,
"grad_norm": 23.37243487658773,
"learning_rate": 6.7078495761385695e-09,
"loss": 0.3571210205554962,
"step": 1860
},
{
"epoch": 2.958664546899841,
"grad_norm": 11.351052610839561,
"learning_rate": 6.237371258090985e-09,
"loss": 0.2777283191680908,
"step": 1861
},
{
"epoch": 2.9602543720190777,
"grad_norm": 8.885306462727502,
"learning_rate": 5.783987272445779e-09,
"loss": 0.3929477334022522,
"step": 1862
},
{
"epoch": 2.9618441971383147,
"grad_norm": 16.28630558472394,
"learning_rate": 5.347699171197595e-09,
"loss": 1.0299521684646606,
"step": 1863
},
{
"epoch": 2.9634340222575517,
"grad_norm": 13.806574966471507,
"learning_rate": 4.928508447821223e-09,
"loss": 0.6577022075653076,
"step": 1864
},
{
"epoch": 2.9650238473767887,
"grad_norm": 12.093973318345537,
"learning_rate": 4.526416537263267e-09,
"loss": 0.6602462530136108,
"step": 1865
},
{
"epoch": 2.9666136724960257,
"grad_norm": 9.484393327040737,
"learning_rate": 4.141424815938822e-09,
"loss": 0.42496952414512634,
"step": 1866
},
{
"epoch": 2.9682034976152623,
"grad_norm": 7.945852000270968,
"learning_rate": 3.77353460172869e-09,
"loss": 0.20469367504119873,
"step": 1867
},
{
"epoch": 2.9697933227344993,
"grad_norm": 10.084966174225068,
"learning_rate": 3.422747153969952e-09,
"loss": 0.26603108644485474,
"step": 1868
},
{
"epoch": 2.9713831478537363,
"grad_norm": 9.533596522923068,
"learning_rate": 3.089063673456516e-09,
"loss": 0.5473682284355164,
"step": 1869
},
{
"epoch": 2.972972972972973,
"grad_norm": 11.424298125013499,
"learning_rate": 2.7724853024324594e-09,
"loss": 0.22095058858394623,
"step": 1870
},
{
"epoch": 2.97456279809221,
"grad_norm": 9.266786716981644,
"learning_rate": 2.473013124589252e-09,
"loss": 0.31715139746665955,
"step": 1871
},
{
"epoch": 2.976152623211447,
"grad_norm": 9.974606963747325,
"learning_rate": 2.1906481650613153e-09,
"loss": 0.32596707344055176,
"step": 1872
},
{
"epoch": 2.977742448330684,
"grad_norm": 10.174795901337596,
"learning_rate": 1.925391390421583e-09,
"loss": 0.841127872467041,
"step": 1873
},
{
"epoch": 2.9793322734499204,
"grad_norm": 22.599584004927102,
"learning_rate": 1.6772437086803873e-09,
"loss": 0.6419406533241272,
"step": 1874
},
{
"epoch": 2.9809220985691574,
"grad_norm": 11.655290018579056,
"learning_rate": 1.446205969282133e-09,
"loss": 0.2810055613517761,
"step": 1875
},
{
"epoch": 2.9825119236883944,
"grad_norm": 7.105036483547834,
"learning_rate": 1.2322789630997422e-09,
"loss": 0.23816505074501038,
"step": 1876
},
{
"epoch": 2.984101748807631,
"grad_norm": 11.368820848007578,
"learning_rate": 1.0354634224346572e-09,
"loss": 0.30685269832611084,
"step": 1877
},
{
"epoch": 2.985691573926868,
"grad_norm": 13.391698269787614,
"learning_rate": 8.557600210140627e-10,
"loss": 1.1483169794082642,
"step": 1878
},
{
"epoch": 2.987281399046105,
"grad_norm": 14.95082540693622,
"learning_rate": 6.931693739864465e-10,
"loss": 0.5511203408241272,
"step": 1879
},
{
"epoch": 2.988871224165342,
"grad_norm": 12.50188330470179,
"learning_rate": 5.476920379221539e-10,
"loss": 0.4261815547943115,
"step": 1880
},
{
"epoch": 2.990461049284579,
"grad_norm": 16.76303275226908,
"learning_rate": 4.1932851081005753e-10,
"loss": 0.5755501389503479,
"step": 1881
},
{
"epoch": 2.9920508744038155,
"grad_norm": 16.077528372751395,
"learning_rate": 3.080792320564463e-10,
"loss": 0.3609001636505127,
"step": 1882
},
{
"epoch": 2.9936406995230525,
"grad_norm": 13.199387427384234,
"learning_rate": 2.1394458248169548e-10,
"loss": 0.5746971368789673,
"step": 1883
},
{
"epoch": 2.9952305246422894,
"grad_norm": 11.976309923228456,
"learning_rate": 1.3692488432304195e-10,
"loss": 0.8742420673370361,
"step": 1884
},
{
"epoch": 2.996820349761526,
"grad_norm": 12.499556410677085,
"learning_rate": 7.702040122847809e-11,
"loss": 0.3924184739589691,
"step": 1885
},
{
"epoch": 2.998410174880763,
"grad_norm": 11.370980136761439,
"learning_rate": 3.423133825897207e-11,
"loss": 0.27480587363243103,
"step": 1886
},
{
"epoch": 3.0,
"grad_norm": 12.874401912588322,
"learning_rate": 8.557841888467977e-12,
"loss": 0.2574765980243683,
"step": 1887
},
{
"epoch": 3.0,
"step": 1887,
"total_flos": 5124506050560.0,
"train_loss": 1.6660761659738874,
"train_runtime": 2303.3437,
"train_samples_per_second": 3.274,
"train_steps_per_second": 0.819
}
],
"logging_steps": 1,
"max_steps": 1887,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5124506050560.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}