Files
llama3-8b-full-pretrain-was…/trainer_state.json
ModelHub XC 6c31f339d0 初始化项目,由ModelHub XC社区提供模型
Model: shuoxing/llama3-8b-full-pretrain-wash-c4-3-0m-bs4
Source: Original Platform
2026-06-12 17:03:18 +08:00

33035 lines
852 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 4713,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006365372374283895,
"grad_norm": 39.59088810381954,
"learning_rate": 0.0,
"loss": 4.035896301269531,
"step": 1
},
{
"epoch": 0.001273074474856779,
"grad_norm": 33.95388657769443,
"learning_rate": 2.1186440677966104e-08,
"loss": 4.143885612487793,
"step": 2
},
{
"epoch": 0.0019096117122851686,
"grad_norm": 39.650793384067434,
"learning_rate": 4.237288135593221e-08,
"loss": 4.37307596206665,
"step": 3
},
{
"epoch": 0.002546148949713558,
"grad_norm": 38.44073023933529,
"learning_rate": 6.355932203389831e-08,
"loss": 4.574411392211914,
"step": 4
},
{
"epoch": 0.003182686187141948,
"grad_norm": 34.55309692208958,
"learning_rate": 8.474576271186442e-08,
"loss": 3.7624588012695312,
"step": 5
},
{
"epoch": 0.0038192234245703373,
"grad_norm": 42.562598379702074,
"learning_rate": 1.0593220338983051e-07,
"loss": 4.798104763031006,
"step": 6
},
{
"epoch": 0.004455760661998727,
"grad_norm": 37.89472823315551,
"learning_rate": 1.2711864406779662e-07,
"loss": 3.6923468112945557,
"step": 7
},
{
"epoch": 0.005092297899427116,
"grad_norm": 42.462788278686155,
"learning_rate": 1.4830508474576274e-07,
"loss": 4.457640171051025,
"step": 8
},
{
"epoch": 0.005728835136855506,
"grad_norm": 30.085931923114636,
"learning_rate": 1.6949152542372883e-07,
"loss": 4.173328876495361,
"step": 9
},
{
"epoch": 0.006365372374283896,
"grad_norm": 35.73217315240775,
"learning_rate": 1.9067796610169495e-07,
"loss": 3.9059009552001953,
"step": 10
},
{
"epoch": 0.007001909611712286,
"grad_norm": 40.04548602304375,
"learning_rate": 2.1186440677966102e-07,
"loss": 4.167541027069092,
"step": 11
},
{
"epoch": 0.0076384468491406746,
"grad_norm": 42.683295384244765,
"learning_rate": 2.3305084745762714e-07,
"loss": 4.3580641746521,
"step": 12
},
{
"epoch": 0.008274984086569064,
"grad_norm": 41.38080814598698,
"learning_rate": 2.5423728813559323e-07,
"loss": 3.817225217819214,
"step": 13
},
{
"epoch": 0.008911521323997454,
"grad_norm": 43.28513684057297,
"learning_rate": 2.7542372881355935e-07,
"loss": 4.469485282897949,
"step": 14
},
{
"epoch": 0.009548058561425843,
"grad_norm": 37.357571431767745,
"learning_rate": 2.966101694915255e-07,
"loss": 4.299037933349609,
"step": 15
},
{
"epoch": 0.010184595798854232,
"grad_norm": 43.5541276589533,
"learning_rate": 3.1779661016949154e-07,
"loss": 4.044004917144775,
"step": 16
},
{
"epoch": 0.010821133036282623,
"grad_norm": 33.41877568658477,
"learning_rate": 3.3898305084745766e-07,
"loss": 3.832738161087036,
"step": 17
},
{
"epoch": 0.011457670273711012,
"grad_norm": 38.34028230142563,
"learning_rate": 3.601694915254238e-07,
"loss": 4.182341575622559,
"step": 18
},
{
"epoch": 0.0120942075111394,
"grad_norm": 31.12436344380363,
"learning_rate": 3.813559322033899e-07,
"loss": 3.7764952182769775,
"step": 19
},
{
"epoch": 0.012730744748567792,
"grad_norm": 43.26168495696408,
"learning_rate": 4.025423728813559e-07,
"loss": 4.379842281341553,
"step": 20
},
{
"epoch": 0.01336728198599618,
"grad_norm": 38.6827307460478,
"learning_rate": 4.2372881355932204e-07,
"loss": 4.382342338562012,
"step": 21
},
{
"epoch": 0.014003819223424571,
"grad_norm": 33.078543164747124,
"learning_rate": 4.4491525423728816e-07,
"loss": 4.717368125915527,
"step": 22
},
{
"epoch": 0.01464035646085296,
"grad_norm": 35.76103525996946,
"learning_rate": 4.661016949152543e-07,
"loss": 3.935464382171631,
"step": 23
},
{
"epoch": 0.015276893698281349,
"grad_norm": 40.65497068724489,
"learning_rate": 4.872881355932204e-07,
"loss": 4.350847244262695,
"step": 24
},
{
"epoch": 0.015913430935709738,
"grad_norm": 38.848863637549826,
"learning_rate": 5.084745762711865e-07,
"loss": 3.9706592559814453,
"step": 25
},
{
"epoch": 0.016549968173138127,
"grad_norm": 30.366224160895303,
"learning_rate": 5.296610169491525e-07,
"loss": 4.436844825744629,
"step": 26
},
{
"epoch": 0.01718650541056652,
"grad_norm": 44.05414632376414,
"learning_rate": 5.508474576271187e-07,
"loss": 4.4211554527282715,
"step": 27
},
{
"epoch": 0.01782304264799491,
"grad_norm": 32.334770588920634,
"learning_rate": 5.720338983050848e-07,
"loss": 4.006234169006348,
"step": 28
},
{
"epoch": 0.018459579885423297,
"grad_norm": 30.367874935733056,
"learning_rate": 5.93220338983051e-07,
"loss": 3.741302490234375,
"step": 29
},
{
"epoch": 0.019096117122851686,
"grad_norm": 37.81163169963108,
"learning_rate": 6.14406779661017e-07,
"loss": 4.7005791664123535,
"step": 30
},
{
"epoch": 0.019732654360280075,
"grad_norm": 25.301865125654622,
"learning_rate": 6.355932203389831e-07,
"loss": 3.332984447479248,
"step": 31
},
{
"epoch": 0.020369191597708464,
"grad_norm": 27.638898241178488,
"learning_rate": 6.567796610169493e-07,
"loss": 3.882140636444092,
"step": 32
},
{
"epoch": 0.021005728835136857,
"grad_norm": 25.13937296259451,
"learning_rate": 6.779661016949153e-07,
"loss": 3.568941831588745,
"step": 33
},
{
"epoch": 0.021642266072565246,
"grad_norm": 21.604747939074876,
"learning_rate": 6.991525423728814e-07,
"loss": 2.9004387855529785,
"step": 34
},
{
"epoch": 0.022278803309993635,
"grad_norm": 22.890488241425363,
"learning_rate": 7.203389830508476e-07,
"loss": 3.6715078353881836,
"step": 35
},
{
"epoch": 0.022915340547422024,
"grad_norm": 21.55591651651768,
"learning_rate": 7.415254237288136e-07,
"loss": 3.8014559745788574,
"step": 36
},
{
"epoch": 0.023551877784850413,
"grad_norm": 17.678790230951982,
"learning_rate": 7.627118644067798e-07,
"loss": 3.3839111328125,
"step": 37
},
{
"epoch": 0.0241884150222788,
"grad_norm": 27.699179563792164,
"learning_rate": 7.838983050847458e-07,
"loss": 3.6236789226531982,
"step": 38
},
{
"epoch": 0.024824952259707194,
"grad_norm": 20.74523751782202,
"learning_rate": 8.050847457627118e-07,
"loss": 3.4830198287963867,
"step": 39
},
{
"epoch": 0.025461489497135583,
"grad_norm": 22.527501846528462,
"learning_rate": 8.26271186440678e-07,
"loss": 3.575878620147705,
"step": 40
},
{
"epoch": 0.026098026734563972,
"grad_norm": 19.274175358858386,
"learning_rate": 8.474576271186441e-07,
"loss": 3.8505752086639404,
"step": 41
},
{
"epoch": 0.02673456397199236,
"grad_norm": 15.568017150909261,
"learning_rate": 8.686440677966103e-07,
"loss": 3.506964683532715,
"step": 42
},
{
"epoch": 0.02737110120942075,
"grad_norm": 12.75278847276089,
"learning_rate": 8.898305084745763e-07,
"loss": 3.152952194213867,
"step": 43
},
{
"epoch": 0.028007638446849142,
"grad_norm": 28.932237833127076,
"learning_rate": 9.110169491525424e-07,
"loss": 3.9635744094848633,
"step": 44
},
{
"epoch": 0.02864417568427753,
"grad_norm": 16.928064417217087,
"learning_rate": 9.322033898305086e-07,
"loss": 3.3759422302246094,
"step": 45
},
{
"epoch": 0.02928071292170592,
"grad_norm": 14.603814518550873,
"learning_rate": 9.533898305084746e-07,
"loss": 2.9782557487487793,
"step": 46
},
{
"epoch": 0.02991725015913431,
"grad_norm": 15.836946438005068,
"learning_rate": 9.745762711864408e-07,
"loss": 2.773993968963623,
"step": 47
},
{
"epoch": 0.030553787396562698,
"grad_norm": 21.03323938551633,
"learning_rate": 9.957627118644069e-07,
"loss": 3.337113857269287,
"step": 48
},
{
"epoch": 0.031190324633991087,
"grad_norm": 17.099584020068214,
"learning_rate": 1.016949152542373e-06,
"loss": 3.631762981414795,
"step": 49
},
{
"epoch": 0.031826861871419476,
"grad_norm": 18.911972887238512,
"learning_rate": 1.038135593220339e-06,
"loss": 3.622838020324707,
"step": 50
},
{
"epoch": 0.032463399108847865,
"grad_norm": 19.659886788763284,
"learning_rate": 1.059322033898305e-06,
"loss": 3.443356990814209,
"step": 51
},
{
"epoch": 0.033099936346276254,
"grad_norm": 21.357907922654753,
"learning_rate": 1.0805084745762714e-06,
"loss": 3.409188747406006,
"step": 52
},
{
"epoch": 0.03373647358370465,
"grad_norm": 12.567319854338075,
"learning_rate": 1.1016949152542374e-06,
"loss": 2.806626796722412,
"step": 53
},
{
"epoch": 0.03437301082113304,
"grad_norm": 14.440684929731635,
"learning_rate": 1.1228813559322035e-06,
"loss": 3.726706027984619,
"step": 54
},
{
"epoch": 0.03500954805856143,
"grad_norm": 13.848312040485071,
"learning_rate": 1.1440677966101696e-06,
"loss": 3.378289222717285,
"step": 55
},
{
"epoch": 0.03564608529598982,
"grad_norm": 21.80998498242552,
"learning_rate": 1.1652542372881356e-06,
"loss": 3.6709766387939453,
"step": 56
},
{
"epoch": 0.036282622533418206,
"grad_norm": 24.52282633397571,
"learning_rate": 1.186440677966102e-06,
"loss": 3.545236110687256,
"step": 57
},
{
"epoch": 0.036919159770846595,
"grad_norm": 15.228617435626584,
"learning_rate": 1.207627118644068e-06,
"loss": 3.3617208003997803,
"step": 58
},
{
"epoch": 0.037555697008274984,
"grad_norm": 15.97183299589106,
"learning_rate": 1.228813559322034e-06,
"loss": 3.2228782176971436,
"step": 59
},
{
"epoch": 0.03819223424570337,
"grad_norm": 19.003049600129348,
"learning_rate": 1.25e-06,
"loss": 3.9675958156585693,
"step": 60
},
{
"epoch": 0.03882877148313176,
"grad_norm": 12.655134824362944,
"learning_rate": 1.2711864406779662e-06,
"loss": 2.9336557388305664,
"step": 61
},
{
"epoch": 0.03946530872056015,
"grad_norm": 11.09126231961165,
"learning_rate": 1.2923728813559322e-06,
"loss": 3.391512870788574,
"step": 62
},
{
"epoch": 0.04010184595798854,
"grad_norm": 10.472726831882387,
"learning_rate": 1.3135593220338985e-06,
"loss": 3.2284183502197266,
"step": 63
},
{
"epoch": 0.04073838319541693,
"grad_norm": 12.588960784459385,
"learning_rate": 1.3347457627118646e-06,
"loss": 3.676628589630127,
"step": 64
},
{
"epoch": 0.041374920432845325,
"grad_norm": 15.535810556617445,
"learning_rate": 1.3559322033898307e-06,
"loss": 3.5504660606384277,
"step": 65
},
{
"epoch": 0.042011457670273714,
"grad_norm": 11.190845261977694,
"learning_rate": 1.3771186440677967e-06,
"loss": 2.8842861652374268,
"step": 66
},
{
"epoch": 0.0426479949077021,
"grad_norm": 15.172609013946255,
"learning_rate": 1.3983050847457628e-06,
"loss": 2.523017406463623,
"step": 67
},
{
"epoch": 0.04328453214513049,
"grad_norm": 16.3482667287732,
"learning_rate": 1.419491525423729e-06,
"loss": 4.006606101989746,
"step": 68
},
{
"epoch": 0.04392106938255888,
"grad_norm": 10.951360933671682,
"learning_rate": 1.4406779661016951e-06,
"loss": 3.1742300987243652,
"step": 69
},
{
"epoch": 0.04455760661998727,
"grad_norm": 14.28121281093357,
"learning_rate": 1.4618644067796612e-06,
"loss": 3.2154297828674316,
"step": 70
},
{
"epoch": 0.04519414385741566,
"grad_norm": 12.175471088449958,
"learning_rate": 1.4830508474576273e-06,
"loss": 3.3929946422576904,
"step": 71
},
{
"epoch": 0.04583068109484405,
"grad_norm": 15.38019255288318,
"learning_rate": 1.5042372881355931e-06,
"loss": 3.283337354660034,
"step": 72
},
{
"epoch": 0.046467218332272436,
"grad_norm": 14.003505937351898,
"learning_rate": 1.5254237288135596e-06,
"loss": 3.858952045440674,
"step": 73
},
{
"epoch": 0.047103755569700825,
"grad_norm": 13.489946161626735,
"learning_rate": 1.5466101694915257e-06,
"loss": 3.017693519592285,
"step": 74
},
{
"epoch": 0.047740292807129214,
"grad_norm": 12.05605526116894,
"learning_rate": 1.5677966101694915e-06,
"loss": 3.2235941886901855,
"step": 75
},
{
"epoch": 0.0483768300445576,
"grad_norm": 14.773077715980815,
"learning_rate": 1.5889830508474576e-06,
"loss": 3.0594072341918945,
"step": 76
},
{
"epoch": 0.049013367281986,
"grad_norm": 11.069301477529606,
"learning_rate": 1.6101694915254237e-06,
"loss": 3.198401927947998,
"step": 77
},
{
"epoch": 0.04964990451941439,
"grad_norm": 9.895147990284883,
"learning_rate": 1.63135593220339e-06,
"loss": 3.6801962852478027,
"step": 78
},
{
"epoch": 0.05028644175684278,
"grad_norm": 10.978847446356875,
"learning_rate": 1.652542372881356e-06,
"loss": 2.884700059890747,
"step": 79
},
{
"epoch": 0.050922978994271166,
"grad_norm": 9.941976716243797,
"learning_rate": 1.673728813559322e-06,
"loss": 3.1672863960266113,
"step": 80
},
{
"epoch": 0.051559516231699555,
"grad_norm": 8.355979527089534,
"learning_rate": 1.6949152542372882e-06,
"loss": 2.9366581439971924,
"step": 81
},
{
"epoch": 0.052196053469127944,
"grad_norm": 12.47555798769808,
"learning_rate": 1.7161016949152542e-06,
"loss": 2.9476046562194824,
"step": 82
},
{
"epoch": 0.05283259070655633,
"grad_norm": 11.376509533386537,
"learning_rate": 1.7372881355932205e-06,
"loss": 3.0564022064208984,
"step": 83
},
{
"epoch": 0.05346912794398472,
"grad_norm": 10.246748605896254,
"learning_rate": 1.7584745762711866e-06,
"loss": 3.380188226699829,
"step": 84
},
{
"epoch": 0.05410566518141311,
"grad_norm": 7.143523650475739,
"learning_rate": 1.7796610169491526e-06,
"loss": 2.8444905281066895,
"step": 85
},
{
"epoch": 0.0547422024188415,
"grad_norm": 11.871565488249749,
"learning_rate": 1.8008474576271187e-06,
"loss": 4.042019844055176,
"step": 86
},
{
"epoch": 0.05537873965626989,
"grad_norm": 10.33040691524808,
"learning_rate": 1.8220338983050848e-06,
"loss": 3.086951732635498,
"step": 87
},
{
"epoch": 0.056015276893698285,
"grad_norm": 12.662422362320376,
"learning_rate": 1.843220338983051e-06,
"loss": 3.7293930053710938,
"step": 88
},
{
"epoch": 0.056651814131126674,
"grad_norm": 45.45569964927334,
"learning_rate": 1.8644067796610171e-06,
"loss": 2.7154955863952637,
"step": 89
},
{
"epoch": 0.05728835136855506,
"grad_norm": 8.894617344554897,
"learning_rate": 1.8855932203389832e-06,
"loss": 3.0096163749694824,
"step": 90
},
{
"epoch": 0.05792488860598345,
"grad_norm": 9.333847276631833,
"learning_rate": 1.9067796610169493e-06,
"loss": 3.182313919067383,
"step": 91
},
{
"epoch": 0.05856142584341184,
"grad_norm": 12.328514043388411,
"learning_rate": 1.9279661016949157e-06,
"loss": 2.5885255336761475,
"step": 92
},
{
"epoch": 0.05919796308084023,
"grad_norm": 14.47210902997613,
"learning_rate": 1.9491525423728816e-06,
"loss": 2.846513509750366,
"step": 93
},
{
"epoch": 0.05983450031826862,
"grad_norm": 14.1904181336431,
"learning_rate": 1.9703389830508475e-06,
"loss": 2.99960994720459,
"step": 94
},
{
"epoch": 0.06047103755569701,
"grad_norm": 10.037634547267674,
"learning_rate": 1.9915254237288137e-06,
"loss": 3.055741786956787,
"step": 95
},
{
"epoch": 0.061107574793125397,
"grad_norm": 9.063588281861795,
"learning_rate": 2.0127118644067796e-06,
"loss": 3.3117358684539795,
"step": 96
},
{
"epoch": 0.061744112030553785,
"grad_norm": 11.734408626893863,
"learning_rate": 2.033898305084746e-06,
"loss": 3.515754222869873,
"step": 97
},
{
"epoch": 0.062380649267982174,
"grad_norm": 15.579342259561264,
"learning_rate": 2.055084745762712e-06,
"loss": 2.7893433570861816,
"step": 98
},
{
"epoch": 0.06301718650541056,
"grad_norm": 14.002678700389755,
"learning_rate": 2.076271186440678e-06,
"loss": 3.548750877380371,
"step": 99
},
{
"epoch": 0.06365372374283895,
"grad_norm": 14.08928760330149,
"learning_rate": 2.0974576271186443e-06,
"loss": 3.7734217643737793,
"step": 100
},
{
"epoch": 0.06429026098026734,
"grad_norm": 10.865641175484704,
"learning_rate": 2.11864406779661e-06,
"loss": 2.918238878250122,
"step": 101
},
{
"epoch": 0.06492679821769573,
"grad_norm": 9.385929922096402,
"learning_rate": 2.1398305084745764e-06,
"loss": 3.298204183578491,
"step": 102
},
{
"epoch": 0.06556333545512412,
"grad_norm": 9.54293780239165,
"learning_rate": 2.1610169491525427e-06,
"loss": 3.274136781692505,
"step": 103
},
{
"epoch": 0.06619987269255251,
"grad_norm": 12.0008027622266,
"learning_rate": 2.1822033898305086e-06,
"loss": 2.752487897872925,
"step": 104
},
{
"epoch": 0.0668364099299809,
"grad_norm": 9.039075799265685,
"learning_rate": 2.203389830508475e-06,
"loss": 3.3141207695007324,
"step": 105
},
{
"epoch": 0.0674729471674093,
"grad_norm": 21.970990311790214,
"learning_rate": 2.2245762711864407e-06,
"loss": 3.47676420211792,
"step": 106
},
{
"epoch": 0.06810948440483769,
"grad_norm": 15.184316115178678,
"learning_rate": 2.245762711864407e-06,
"loss": 3.7983956336975098,
"step": 107
},
{
"epoch": 0.06874602164226608,
"grad_norm": 9.243450593109065,
"learning_rate": 2.2669491525423732e-06,
"loss": 2.919459342956543,
"step": 108
},
{
"epoch": 0.06938255887969447,
"grad_norm": 8.560492177295222,
"learning_rate": 2.288135593220339e-06,
"loss": 3.0816235542297363,
"step": 109
},
{
"epoch": 0.07001909611712286,
"grad_norm": 13.125075871676552,
"learning_rate": 2.3093220338983054e-06,
"loss": 3.1430845260620117,
"step": 110
},
{
"epoch": 0.07065563335455124,
"grad_norm": 7.213511595375255,
"learning_rate": 2.3305084745762712e-06,
"loss": 3.198106288909912,
"step": 111
},
{
"epoch": 0.07129217059197963,
"grad_norm": 9.725007194186064,
"learning_rate": 2.3516949152542375e-06,
"loss": 3.343648672103882,
"step": 112
},
{
"epoch": 0.07192870782940802,
"grad_norm": 17.716815300327376,
"learning_rate": 2.372881355932204e-06,
"loss": 3.273315191268921,
"step": 113
},
{
"epoch": 0.07256524506683641,
"grad_norm": 15.090511908740678,
"learning_rate": 2.3940677966101697e-06,
"loss": 3.6860384941101074,
"step": 114
},
{
"epoch": 0.0732017823042648,
"grad_norm": 23.619875049161756,
"learning_rate": 2.415254237288136e-06,
"loss": 3.4047603607177734,
"step": 115
},
{
"epoch": 0.07383831954169319,
"grad_norm": 11.085596287501351,
"learning_rate": 2.436440677966102e-06,
"loss": 3.36086368560791,
"step": 116
},
{
"epoch": 0.07447485677912158,
"grad_norm": 10.401067566452673,
"learning_rate": 2.457627118644068e-06,
"loss": 2.746903419494629,
"step": 117
},
{
"epoch": 0.07511139401654997,
"grad_norm": 8.167729784535762,
"learning_rate": 2.4788135593220343e-06,
"loss": 2.8697047233581543,
"step": 118
},
{
"epoch": 0.07574793125397836,
"grad_norm": 18.19405515066229,
"learning_rate": 2.5e-06,
"loss": 2.6754255294799805,
"step": 119
},
{
"epoch": 0.07638446849140675,
"grad_norm": 8.32936145746687,
"learning_rate": 2.521186440677966e-06,
"loss": 2.7582414150238037,
"step": 120
},
{
"epoch": 0.07702100572883513,
"grad_norm": 13.676158548645208,
"learning_rate": 2.5423728813559323e-06,
"loss": 2.912391185760498,
"step": 121
},
{
"epoch": 0.07765754296626352,
"grad_norm": 11.723743727298041,
"learning_rate": 2.563559322033898e-06,
"loss": 3.0973339080810547,
"step": 122
},
{
"epoch": 0.07829408020369191,
"grad_norm": 8.264572543716843,
"learning_rate": 2.5847457627118645e-06,
"loss": 3.3936641216278076,
"step": 123
},
{
"epoch": 0.0789306174411203,
"grad_norm": 13.1592378849208,
"learning_rate": 2.605932203389831e-06,
"loss": 2.7813830375671387,
"step": 124
},
{
"epoch": 0.07956715467854869,
"grad_norm": 15.192533604335532,
"learning_rate": 2.627118644067797e-06,
"loss": 3.2265381813049316,
"step": 125
},
{
"epoch": 0.08020369191597708,
"grad_norm": 10.566445314436896,
"learning_rate": 2.648305084745763e-06,
"loss": 3.334951162338257,
"step": 126
},
{
"epoch": 0.08084022915340547,
"grad_norm": 12.255041094999912,
"learning_rate": 2.669491525423729e-06,
"loss": 3.0126452445983887,
"step": 127
},
{
"epoch": 0.08147676639083386,
"grad_norm": 9.566517458586116,
"learning_rate": 2.690677966101695e-06,
"loss": 2.845134735107422,
"step": 128
},
{
"epoch": 0.08211330362826226,
"grad_norm": 8.235351971330045,
"learning_rate": 2.7118644067796613e-06,
"loss": 2.729961633682251,
"step": 129
},
{
"epoch": 0.08274984086569065,
"grad_norm": 19.066643065863207,
"learning_rate": 2.733050847457627e-06,
"loss": 3.170295238494873,
"step": 130
},
{
"epoch": 0.08338637810311904,
"grad_norm": 13.82108602310928,
"learning_rate": 2.7542372881355934e-06,
"loss": 2.8606677055358887,
"step": 131
},
{
"epoch": 0.08402291534054743,
"grad_norm": 9.485675344751035,
"learning_rate": 2.7754237288135593e-06,
"loss": 2.759913444519043,
"step": 132
},
{
"epoch": 0.08465945257797582,
"grad_norm": 7.561287712062645,
"learning_rate": 2.7966101694915256e-06,
"loss": 3.171691417694092,
"step": 133
},
{
"epoch": 0.0852959898154042,
"grad_norm": 15.081790970276133,
"learning_rate": 2.817796610169492e-06,
"loss": 3.1571059226989746,
"step": 134
},
{
"epoch": 0.0859325270528326,
"grad_norm": 17.49080914956136,
"learning_rate": 2.838983050847458e-06,
"loss": 3.1260931491851807,
"step": 135
},
{
"epoch": 0.08656906429026098,
"grad_norm": 10.831529056783937,
"learning_rate": 2.860169491525424e-06,
"loss": 3.4544715881347656,
"step": 136
},
{
"epoch": 0.08720560152768937,
"grad_norm": 12.650862284069477,
"learning_rate": 2.8813559322033903e-06,
"loss": 2.1055846214294434,
"step": 137
},
{
"epoch": 0.08784213876511776,
"grad_norm": 10.937242513866803,
"learning_rate": 2.902542372881356e-06,
"loss": 3.1760659217834473,
"step": 138
},
{
"epoch": 0.08847867600254615,
"grad_norm": 16.562684519262675,
"learning_rate": 2.9237288135593224e-06,
"loss": 3.499706506729126,
"step": 139
},
{
"epoch": 0.08911521323997454,
"grad_norm": 14.663338858011086,
"learning_rate": 2.9449152542372883e-06,
"loss": 3.097550630569458,
"step": 140
},
{
"epoch": 0.08975175047740293,
"grad_norm": 8.782288227621974,
"learning_rate": 2.9661016949152545e-06,
"loss": 2.998491048812866,
"step": 141
},
{
"epoch": 0.09038828771483132,
"grad_norm": 14.102110477033168,
"learning_rate": 2.9872881355932204e-06,
"loss": 2.690117835998535,
"step": 142
},
{
"epoch": 0.0910248249522597,
"grad_norm": 12.496914670356034,
"learning_rate": 3.0084745762711862e-06,
"loss": 2.811756134033203,
"step": 143
},
{
"epoch": 0.0916613621896881,
"grad_norm": 12.822381579853225,
"learning_rate": 3.029661016949153e-06,
"loss": 3.9626450538635254,
"step": 144
},
{
"epoch": 0.09229789942711648,
"grad_norm": 12.582879172097762,
"learning_rate": 3.0508474576271192e-06,
"loss": 3.199486255645752,
"step": 145
},
{
"epoch": 0.09293443666454487,
"grad_norm": 18.02029221942558,
"learning_rate": 3.072033898305085e-06,
"loss": 3.243107557296753,
"step": 146
},
{
"epoch": 0.09357097390197326,
"grad_norm": 13.581114680729833,
"learning_rate": 3.0932203389830514e-06,
"loss": 3.156430959701538,
"step": 147
},
{
"epoch": 0.09420751113940165,
"grad_norm": 12.683469403176712,
"learning_rate": 3.1144067796610172e-06,
"loss": 3.62229323387146,
"step": 148
},
{
"epoch": 0.09484404837683004,
"grad_norm": 11.866601635486056,
"learning_rate": 3.135593220338983e-06,
"loss": 2.9691965579986572,
"step": 149
},
{
"epoch": 0.09548058561425843,
"grad_norm": 12.329303680517521,
"learning_rate": 3.1567796610169494e-06,
"loss": 3.507720470428467,
"step": 150
},
{
"epoch": 0.09611712285168682,
"grad_norm": 12.864876607372913,
"learning_rate": 3.1779661016949152e-06,
"loss": 3.1271495819091797,
"step": 151
},
{
"epoch": 0.0967536600891152,
"grad_norm": 8.330358552705086,
"learning_rate": 3.1991525423728815e-06,
"loss": 2.7351913452148438,
"step": 152
},
{
"epoch": 0.09739019732654361,
"grad_norm": 13.856684572001178,
"learning_rate": 3.2203389830508473e-06,
"loss": 3.1276650428771973,
"step": 153
},
{
"epoch": 0.098026734563972,
"grad_norm": 6.978723672843207,
"learning_rate": 3.241525423728814e-06,
"loss": 2.640872001647949,
"step": 154
},
{
"epoch": 0.09866327180140039,
"grad_norm": 15.254420137587498,
"learning_rate": 3.26271186440678e-06,
"loss": 3.469313859939575,
"step": 155
},
{
"epoch": 0.09929980903882878,
"grad_norm": 13.603149074385739,
"learning_rate": 3.283898305084746e-06,
"loss": 2.619128942489624,
"step": 156
},
{
"epoch": 0.09993634627625717,
"grad_norm": 17.31160612407412,
"learning_rate": 3.305084745762712e-06,
"loss": 3.1266517639160156,
"step": 157
},
{
"epoch": 0.10057288351368555,
"grad_norm": 19.03803760262164,
"learning_rate": 3.3262711864406783e-06,
"loss": 3.0743002891540527,
"step": 158
},
{
"epoch": 0.10120942075111394,
"grad_norm": 19.269861713711055,
"learning_rate": 3.347457627118644e-06,
"loss": 3.1711010932922363,
"step": 159
},
{
"epoch": 0.10184595798854233,
"grad_norm": 13.692244884511693,
"learning_rate": 3.3686440677966105e-06,
"loss": 3.3521604537963867,
"step": 160
},
{
"epoch": 0.10248249522597072,
"grad_norm": 10.14050843079561,
"learning_rate": 3.3898305084745763e-06,
"loss": 2.723728895187378,
"step": 161
},
{
"epoch": 0.10311903246339911,
"grad_norm": 18.8592821942506,
"learning_rate": 3.4110169491525426e-06,
"loss": 2.988776206970215,
"step": 162
},
{
"epoch": 0.1037555697008275,
"grad_norm": 11.99779759520072,
"learning_rate": 3.4322033898305084e-06,
"loss": 3.3126285076141357,
"step": 163
},
{
"epoch": 0.10439210693825589,
"grad_norm": 9.02626861062283,
"learning_rate": 3.453389830508475e-06,
"loss": 2.866410255432129,
"step": 164
},
{
"epoch": 0.10502864417568428,
"grad_norm": 12.382144134507604,
"learning_rate": 3.474576271186441e-06,
"loss": 3.0566554069519043,
"step": 165
},
{
"epoch": 0.10566518141311267,
"grad_norm": 16.03984892684878,
"learning_rate": 3.4957627118644073e-06,
"loss": 3.033036708831787,
"step": 166
},
{
"epoch": 0.10630171865054105,
"grad_norm": 12.078141339396046,
"learning_rate": 3.516949152542373e-06,
"loss": 3.273803234100342,
"step": 167
},
{
"epoch": 0.10693825588796944,
"grad_norm": 9.884875875484646,
"learning_rate": 3.5381355932203394e-06,
"loss": 2.768066167831421,
"step": 168
},
{
"epoch": 0.10757479312539783,
"grad_norm": 19.838989040044574,
"learning_rate": 3.5593220338983053e-06,
"loss": 2.800044536590576,
"step": 169
},
{
"epoch": 0.10821133036282622,
"grad_norm": 33.923812377724516,
"learning_rate": 3.5805084745762716e-06,
"loss": 3.8336315155029297,
"step": 170
},
{
"epoch": 0.10884786760025461,
"grad_norm": 15.309054680459798,
"learning_rate": 3.6016949152542374e-06,
"loss": 2.6189794540405273,
"step": 171
},
{
"epoch": 0.109484404837683,
"grad_norm": 10.169867188606077,
"learning_rate": 3.6228813559322033e-06,
"loss": 3.249156951904297,
"step": 172
},
{
"epoch": 0.11012094207511139,
"grad_norm": 9.366559092798653,
"learning_rate": 3.6440677966101695e-06,
"loss": 2.6109180450439453,
"step": 173
},
{
"epoch": 0.11075747931253978,
"grad_norm": 7.839500435241882,
"learning_rate": 3.6652542372881362e-06,
"loss": 3.592599391937256,
"step": 174
},
{
"epoch": 0.11139401654996817,
"grad_norm": 10.071594229699002,
"learning_rate": 3.686440677966102e-06,
"loss": 2.9701881408691406,
"step": 175
},
{
"epoch": 0.11203055378739657,
"grad_norm": 36.967183531556714,
"learning_rate": 3.7076271186440684e-06,
"loss": 3.1862998008728027,
"step": 176
},
{
"epoch": 0.11266709102482496,
"grad_norm": 10.128382960721483,
"learning_rate": 3.7288135593220342e-06,
"loss": 2.8713183403015137,
"step": 177
},
{
"epoch": 0.11330362826225335,
"grad_norm": 7.899179110880989,
"learning_rate": 3.7500000000000005e-06,
"loss": 2.440099000930786,
"step": 178
},
{
"epoch": 0.11394016549968174,
"grad_norm": 10.555616860338874,
"learning_rate": 3.7711864406779664e-06,
"loss": 2.952479362487793,
"step": 179
},
{
"epoch": 0.11457670273711013,
"grad_norm": 17.067174677468486,
"learning_rate": 3.7923728813559322e-06,
"loss": 3.5981414318084717,
"step": 180
},
{
"epoch": 0.11521323997453851,
"grad_norm": 16.822699566862887,
"learning_rate": 3.8135593220338985e-06,
"loss": 2.943324327468872,
"step": 181
},
{
"epoch": 0.1158497772119669,
"grad_norm": 21.844528101489537,
"learning_rate": 3.834745762711865e-06,
"loss": 3.8883442878723145,
"step": 182
},
{
"epoch": 0.11648631444939529,
"grad_norm": 13.662656301441373,
"learning_rate": 3.8559322033898315e-06,
"loss": 3.2004919052124023,
"step": 183
},
{
"epoch": 0.11712285168682368,
"grad_norm": 10.436671362782297,
"learning_rate": 3.877118644067797e-06,
"loss": 3.233124017715454,
"step": 184
},
{
"epoch": 0.11775938892425207,
"grad_norm": 15.712145612017746,
"learning_rate": 3.898305084745763e-06,
"loss": 3.511937141418457,
"step": 185
},
{
"epoch": 0.11839592616168046,
"grad_norm": 6.192919383191328,
"learning_rate": 3.919491525423729e-06,
"loss": 2.831325054168701,
"step": 186
},
{
"epoch": 0.11903246339910885,
"grad_norm": 19.21846235119949,
"learning_rate": 3.940677966101695e-06,
"loss": 3.607250690460205,
"step": 187
},
{
"epoch": 0.11966900063653724,
"grad_norm": 15.562894252261083,
"learning_rate": 3.961864406779662e-06,
"loss": 2.9302172660827637,
"step": 188
},
{
"epoch": 0.12030553787396563,
"grad_norm": 18.97211358017462,
"learning_rate": 3.9830508474576275e-06,
"loss": 3.5242438316345215,
"step": 189
},
{
"epoch": 0.12094207511139402,
"grad_norm": 11.677567589260672,
"learning_rate": 4.004237288135593e-06,
"loss": 3.025068998336792,
"step": 190
},
{
"epoch": 0.1215786123488224,
"grad_norm": 11.681417522086605,
"learning_rate": 4.025423728813559e-06,
"loss": 2.592844009399414,
"step": 191
},
{
"epoch": 0.12221514958625079,
"grad_norm": 10.442960645780184,
"learning_rate": 4.046610169491526e-06,
"loss": 2.919527530670166,
"step": 192
},
{
"epoch": 0.12285168682367918,
"grad_norm": 9.591281451399857,
"learning_rate": 4.067796610169492e-06,
"loss": 2.864609956741333,
"step": 193
},
{
"epoch": 0.12348822406110757,
"grad_norm": 9.553179683924215,
"learning_rate": 4.0889830508474584e-06,
"loss": 2.886277198791504,
"step": 194
},
{
"epoch": 0.12412476129853596,
"grad_norm": 8.9114375203598,
"learning_rate": 4.110169491525424e-06,
"loss": 2.700643301010132,
"step": 195
},
{
"epoch": 0.12476129853596435,
"grad_norm": 11.804094590244754,
"learning_rate": 4.13135593220339e-06,
"loss": 3.145249366760254,
"step": 196
},
{
"epoch": 0.12539783577339275,
"grad_norm": 15.860462809303401,
"learning_rate": 4.152542372881356e-06,
"loss": 2.9896278381347656,
"step": 197
},
{
"epoch": 0.12603437301082113,
"grad_norm": 13.152319755016842,
"learning_rate": 4.173728813559323e-06,
"loss": 2.631516456604004,
"step": 198
},
{
"epoch": 0.12667091024824953,
"grad_norm": 16.436797093248362,
"learning_rate": 4.1949152542372886e-06,
"loss": 2.742974281311035,
"step": 199
},
{
"epoch": 0.1273074474856779,
"grad_norm": 8.75887823731754,
"learning_rate": 4.2161016949152544e-06,
"loss": 2.4858527183532715,
"step": 200
},
{
"epoch": 0.1279439847231063,
"grad_norm": 11.313045010110079,
"learning_rate": 4.23728813559322e-06,
"loss": 3.0495362281799316,
"step": 201
},
{
"epoch": 0.12858052196053468,
"grad_norm": 10.37774972184091,
"learning_rate": 4.258474576271186e-06,
"loss": 2.8722662925720215,
"step": 202
},
{
"epoch": 0.12921705919796309,
"grad_norm": 15.002975107696502,
"learning_rate": 4.279661016949153e-06,
"loss": 3.10701322555542,
"step": 203
},
{
"epoch": 0.12985359643539146,
"grad_norm": 14.296117051079547,
"learning_rate": 4.3008474576271195e-06,
"loss": 2.8308165073394775,
"step": 204
},
{
"epoch": 0.13049013367281986,
"grad_norm": 10.289126134210136,
"learning_rate": 4.322033898305085e-06,
"loss": 3.036111354827881,
"step": 205
},
{
"epoch": 0.13112667091024824,
"grad_norm": 16.829371148640362,
"learning_rate": 4.343220338983051e-06,
"loss": 3.3032469749450684,
"step": 206
},
{
"epoch": 0.13176320814767664,
"grad_norm": 11.15761088911933,
"learning_rate": 4.364406779661017e-06,
"loss": 3.1203994750976562,
"step": 207
},
{
"epoch": 0.13239974538510502,
"grad_norm": 8.624902828702124,
"learning_rate": 4.385593220338983e-06,
"loss": 2.4616947174072266,
"step": 208
},
{
"epoch": 0.13303628262253342,
"grad_norm": 14.652551487527845,
"learning_rate": 4.40677966101695e-06,
"loss": 3.2015225887298584,
"step": 209
},
{
"epoch": 0.1336728198599618,
"grad_norm": 17.85769559664722,
"learning_rate": 4.4279661016949155e-06,
"loss": 3.392197370529175,
"step": 210
},
{
"epoch": 0.1343093570973902,
"grad_norm": 14.662673921570757,
"learning_rate": 4.449152542372881e-06,
"loss": 3.2083888053894043,
"step": 211
},
{
"epoch": 0.1349458943348186,
"grad_norm": 17.467302319248383,
"learning_rate": 4.470338983050847e-06,
"loss": 3.1518492698669434,
"step": 212
},
{
"epoch": 0.13558243157224698,
"grad_norm": 10.51838123835325,
"learning_rate": 4.491525423728814e-06,
"loss": 2.855264902114868,
"step": 213
},
{
"epoch": 0.13621896880967538,
"grad_norm": 14.762402460834231,
"learning_rate": 4.51271186440678e-06,
"loss": 2.360743522644043,
"step": 214
},
{
"epoch": 0.13685550604710375,
"grad_norm": 13.047537180129066,
"learning_rate": 4.5338983050847465e-06,
"loss": 3.403221368789673,
"step": 215
},
{
"epoch": 0.13749204328453216,
"grad_norm": 11.286423215824348,
"learning_rate": 4.555084745762712e-06,
"loss": 2.9744622707366943,
"step": 216
},
{
"epoch": 0.13812858052196053,
"grad_norm": 22.128236162508518,
"learning_rate": 4.576271186440678e-06,
"loss": 3.943368673324585,
"step": 217
},
{
"epoch": 0.13876511775938893,
"grad_norm": 11.945689425899966,
"learning_rate": 4.597457627118644e-06,
"loss": 2.857276201248169,
"step": 218
},
{
"epoch": 0.1394016549968173,
"grad_norm": 8.384366498024004,
"learning_rate": 4.618644067796611e-06,
"loss": 2.535996913909912,
"step": 219
},
{
"epoch": 0.1400381922342457,
"grad_norm": 15.461923137664042,
"learning_rate": 4.639830508474577e-06,
"loss": 3.1761062145233154,
"step": 220
},
{
"epoch": 0.1406747294716741,
"grad_norm": 8.509254573006608,
"learning_rate": 4.6610169491525425e-06,
"loss": 2.7607967853546143,
"step": 221
},
{
"epoch": 0.1413112667091025,
"grad_norm": 9.170873253831214,
"learning_rate": 4.682203389830508e-06,
"loss": 3.2002480030059814,
"step": 222
},
{
"epoch": 0.14194780394653086,
"grad_norm": 7.339418444481694,
"learning_rate": 4.703389830508475e-06,
"loss": 2.5810189247131348,
"step": 223
},
{
"epoch": 0.14258434118395927,
"grad_norm": 10.766721995996047,
"learning_rate": 4.724576271186441e-06,
"loss": 2.8315610885620117,
"step": 224
},
{
"epoch": 0.14322087842138764,
"grad_norm": 13.548350579282785,
"learning_rate": 4.745762711864408e-06,
"loss": 2.837813377380371,
"step": 225
},
{
"epoch": 0.14385741565881605,
"grad_norm": 34.5958667853913,
"learning_rate": 4.7669491525423735e-06,
"loss": 2.95943546295166,
"step": 226
},
{
"epoch": 0.14449395289624442,
"grad_norm": 11.725989147517986,
"learning_rate": 4.788135593220339e-06,
"loss": 2.85552978515625,
"step": 227
},
{
"epoch": 0.14513049013367282,
"grad_norm": 12.307115633222764,
"learning_rate": 4.809322033898305e-06,
"loss": 2.4775352478027344,
"step": 228
},
{
"epoch": 0.1457670273711012,
"grad_norm": 10.895472196267837,
"learning_rate": 4.830508474576272e-06,
"loss": 3.1042962074279785,
"step": 229
},
{
"epoch": 0.1464035646085296,
"grad_norm": 18.15671039636725,
"learning_rate": 4.851694915254238e-06,
"loss": 3.226940631866455,
"step": 230
},
{
"epoch": 0.14704010184595798,
"grad_norm": 17.865544568948273,
"learning_rate": 4.872881355932204e-06,
"loss": 2.958282947540283,
"step": 231
},
{
"epoch": 0.14767663908338638,
"grad_norm": 13.075581071672993,
"learning_rate": 4.8940677966101694e-06,
"loss": 2.860640287399292,
"step": 232
},
{
"epoch": 0.14831317632081475,
"grad_norm": 7.3011973358144155,
"learning_rate": 4.915254237288136e-06,
"loss": 1.8848614692687988,
"step": 233
},
{
"epoch": 0.14894971355824316,
"grad_norm": 8.961754059399695,
"learning_rate": 4.936440677966102e-06,
"loss": 2.6384811401367188,
"step": 234
},
{
"epoch": 0.14958625079567156,
"grad_norm": 6.695575838860698,
"learning_rate": 4.957627118644069e-06,
"loss": 2.6513447761535645,
"step": 235
},
{
"epoch": 0.15022278803309994,
"grad_norm": 10.172999182069105,
"learning_rate": 4.9788135593220346e-06,
"loss": 3.049452304840088,
"step": 236
},
{
"epoch": 0.15085932527052834,
"grad_norm": 10.885031573274539,
"learning_rate": 5e-06,
"loss": 3.013901710510254,
"step": 237
},
{
"epoch": 0.1514958625079567,
"grad_norm": 9.789951162121023,
"learning_rate": 5.021186440677966e-06,
"loss": 2.4011707305908203,
"step": 238
},
{
"epoch": 0.15213239974538512,
"grad_norm": 29.10099418080209,
"learning_rate": 5.042372881355932e-06,
"loss": 3.279197931289673,
"step": 239
},
{
"epoch": 0.1527689369828135,
"grad_norm": 10.18710797249541,
"learning_rate": 5.063559322033899e-06,
"loss": 2.9297866821289062,
"step": 240
},
{
"epoch": 0.1534054742202419,
"grad_norm": 16.259716765444157,
"learning_rate": 5.084745762711865e-06,
"loss": 2.587176561355591,
"step": 241
},
{
"epoch": 0.15404201145767027,
"grad_norm": 12.250110022036033,
"learning_rate": 5.1059322033898305e-06,
"loss": 2.983497142791748,
"step": 242
},
{
"epoch": 0.15467854869509867,
"grad_norm": 13.257585716683908,
"learning_rate": 5.127118644067796e-06,
"loss": 3.0938868522644043,
"step": 243
},
{
"epoch": 0.15531508593252705,
"grad_norm": 8.009985272716898,
"learning_rate": 5.148305084745763e-06,
"loss": 2.4970593452453613,
"step": 244
},
{
"epoch": 0.15595162316995545,
"grad_norm": 13.706175661301124,
"learning_rate": 5.169491525423729e-06,
"loss": 3.3828718662261963,
"step": 245
},
{
"epoch": 0.15658816040738383,
"grad_norm": 7.54785337703857,
"learning_rate": 5.190677966101695e-06,
"loss": 2.363739490509033,
"step": 246
},
{
"epoch": 0.15722469764481223,
"grad_norm": 8.535146691547306,
"learning_rate": 5.211864406779662e-06,
"loss": 2.6143975257873535,
"step": 247
},
{
"epoch": 0.1578612348822406,
"grad_norm": 6.4085304399908685,
"learning_rate": 5.233050847457628e-06,
"loss": 2.821204900741577,
"step": 248
},
{
"epoch": 0.158497772119669,
"grad_norm": 10.0578421471266,
"learning_rate": 5.254237288135594e-06,
"loss": 2.8076894283294678,
"step": 249
},
{
"epoch": 0.15913430935709738,
"grad_norm": 9.687543277555026,
"learning_rate": 5.27542372881356e-06,
"loss": 2.650265693664551,
"step": 250
},
{
"epoch": 0.15977084659452578,
"grad_norm": 8.634396293605054,
"learning_rate": 5.296610169491526e-06,
"loss": 2.70159912109375,
"step": 251
},
{
"epoch": 0.16040738383195416,
"grad_norm": 12.644669612153189,
"learning_rate": 5.3177966101694925e-06,
"loss": 3.199131965637207,
"step": 252
},
{
"epoch": 0.16104392106938256,
"grad_norm": 10.563494009915301,
"learning_rate": 5.338983050847458e-06,
"loss": 2.6978464126586914,
"step": 253
},
{
"epoch": 0.16168045830681094,
"grad_norm": 10.910365311666162,
"learning_rate": 5.360169491525424e-06,
"loss": 2.7021255493164062,
"step": 254
},
{
"epoch": 0.16231699554423934,
"grad_norm": 7.3941820229008846,
"learning_rate": 5.38135593220339e-06,
"loss": 3.066408395767212,
"step": 255
},
{
"epoch": 0.16295353278166771,
"grad_norm": 10.356816747079062,
"learning_rate": 5.402542372881357e-06,
"loss": 3.2458343505859375,
"step": 256
},
{
"epoch": 0.16359007001909612,
"grad_norm": 17.605242578677803,
"learning_rate": 5.423728813559323e-06,
"loss": 2.8568882942199707,
"step": 257
},
{
"epoch": 0.16422660725652452,
"grad_norm": 14.926319024507961,
"learning_rate": 5.4449152542372885e-06,
"loss": 2.7378273010253906,
"step": 258
},
{
"epoch": 0.1648631444939529,
"grad_norm": 10.554202554221286,
"learning_rate": 5.466101694915254e-06,
"loss": 2.680108070373535,
"step": 259
},
{
"epoch": 0.1654996817313813,
"grad_norm": 10.015652391833274,
"learning_rate": 5.487288135593221e-06,
"loss": 3.29970645904541,
"step": 260
},
{
"epoch": 0.16613621896880967,
"grad_norm": 9.58361415978751,
"learning_rate": 5.508474576271187e-06,
"loss": 2.9083588123321533,
"step": 261
},
{
"epoch": 0.16677275620623808,
"grad_norm": 21.535696433326326,
"learning_rate": 5.529661016949153e-06,
"loss": 3.120295524597168,
"step": 262
},
{
"epoch": 0.16740929344366645,
"grad_norm": 13.25929140074107,
"learning_rate": 5.550847457627119e-06,
"loss": 2.9800517559051514,
"step": 263
},
{
"epoch": 0.16804583068109485,
"grad_norm": 9.757729544531495,
"learning_rate": 5.5720338983050844e-06,
"loss": 3.061345100402832,
"step": 264
},
{
"epoch": 0.16868236791852323,
"grad_norm": 12.95629265024167,
"learning_rate": 5.593220338983051e-06,
"loss": 2.787202835083008,
"step": 265
},
{
"epoch": 0.16931890515595163,
"grad_norm": 10.982801430256657,
"learning_rate": 5.614406779661017e-06,
"loss": 3.1923770904541016,
"step": 266
},
{
"epoch": 0.16995544239338,
"grad_norm": 8.224155321892544,
"learning_rate": 5.635593220338984e-06,
"loss": 2.5791826248168945,
"step": 267
},
{
"epoch": 0.1705919796308084,
"grad_norm": 10.460293961838643,
"learning_rate": 5.65677966101695e-06,
"loss": 2.779829740524292,
"step": 268
},
{
"epoch": 0.17122851686823679,
"grad_norm": 10.261645668961702,
"learning_rate": 5.677966101694916e-06,
"loss": 3.2863683700561523,
"step": 269
},
{
"epoch": 0.1718650541056652,
"grad_norm": 6.860353021754464,
"learning_rate": 5.699152542372882e-06,
"loss": 2.6788125038146973,
"step": 270
},
{
"epoch": 0.17250159134309356,
"grad_norm": 13.718261035931205,
"learning_rate": 5.720338983050848e-06,
"loss": 2.84999942779541,
"step": 271
},
{
"epoch": 0.17313812858052197,
"grad_norm": 9.450110201931974,
"learning_rate": 5.741525423728815e-06,
"loss": 2.3188841342926025,
"step": 272
},
{
"epoch": 0.17377466581795034,
"grad_norm": 24.399578162992768,
"learning_rate": 5.7627118644067805e-06,
"loss": 2.3342952728271484,
"step": 273
},
{
"epoch": 0.17441120305537874,
"grad_norm": 14.342609925306267,
"learning_rate": 5.783898305084746e-06,
"loss": 3.366730213165283,
"step": 274
},
{
"epoch": 0.17504774029280712,
"grad_norm": 7.275306607886118,
"learning_rate": 5.805084745762712e-06,
"loss": 2.290006637573242,
"step": 275
},
{
"epoch": 0.17568427753023552,
"grad_norm": 9.939524487285707,
"learning_rate": 5.826271186440678e-06,
"loss": 2.8823177814483643,
"step": 276
},
{
"epoch": 0.1763208147676639,
"grad_norm": 8.669514074833224,
"learning_rate": 5.847457627118645e-06,
"loss": 2.6550536155700684,
"step": 277
},
{
"epoch": 0.1769573520050923,
"grad_norm": 11.02825638441472,
"learning_rate": 5.868644067796611e-06,
"loss": 2.757049798965454,
"step": 278
},
{
"epoch": 0.17759388924252067,
"grad_norm": 21.597046178374484,
"learning_rate": 5.8898305084745765e-06,
"loss": 3.2479753494262695,
"step": 279
},
{
"epoch": 0.17823042647994908,
"grad_norm": 10.172094762492458,
"learning_rate": 5.911016949152542e-06,
"loss": 2.887638568878174,
"step": 280
},
{
"epoch": 0.17886696371737745,
"grad_norm": 9.39680421509699,
"learning_rate": 5.932203389830509e-06,
"loss": 3.185732841491699,
"step": 281
},
{
"epoch": 0.17950350095480586,
"grad_norm": 11.4576049744317,
"learning_rate": 5.953389830508475e-06,
"loss": 3.0998311042785645,
"step": 282
},
{
"epoch": 0.18014003819223426,
"grad_norm": 11.683392046372845,
"learning_rate": 5.974576271186441e-06,
"loss": 2.983189582824707,
"step": 283
},
{
"epoch": 0.18077657542966263,
"grad_norm": 11.11153403684068,
"learning_rate": 5.995762711864407e-06,
"loss": 2.9110331535339355,
"step": 284
},
{
"epoch": 0.18141311266709104,
"grad_norm": 10.259066032322758,
"learning_rate": 6.0169491525423725e-06,
"loss": 3.0029218196868896,
"step": 285
},
{
"epoch": 0.1820496499045194,
"grad_norm": 9.504956577472365,
"learning_rate": 6.038135593220339e-06,
"loss": 2.7357711791992188,
"step": 286
},
{
"epoch": 0.18268618714194781,
"grad_norm": 12.30922909428048,
"learning_rate": 6.059322033898306e-06,
"loss": 2.705357551574707,
"step": 287
},
{
"epoch": 0.1833227243793762,
"grad_norm": 12.708040498692581,
"learning_rate": 6.080508474576272e-06,
"loss": 2.4068801403045654,
"step": 288
},
{
"epoch": 0.1839592616168046,
"grad_norm": 7.5926474714142245,
"learning_rate": 6.1016949152542385e-06,
"loss": 2.8826093673706055,
"step": 289
},
{
"epoch": 0.18459579885423297,
"grad_norm": 16.415575077576264,
"learning_rate": 6.122881355932204e-06,
"loss": 3.080456256866455,
"step": 290
},
{
"epoch": 0.18523233609166137,
"grad_norm": 10.94938631452902,
"learning_rate": 6.14406779661017e-06,
"loss": 2.8359527587890625,
"step": 291
},
{
"epoch": 0.18586887332908975,
"grad_norm": 18.625269551728508,
"learning_rate": 6.165254237288136e-06,
"loss": 3.1971914768218994,
"step": 292
},
{
"epoch": 0.18650541056651815,
"grad_norm": 16.038218558942276,
"learning_rate": 6.186440677966103e-06,
"loss": 2.786512851715088,
"step": 293
},
{
"epoch": 0.18714194780394652,
"grad_norm": 30.036590776335498,
"learning_rate": 6.207627118644069e-06,
"loss": 2.433791160583496,
"step": 294
},
{
"epoch": 0.18777848504137493,
"grad_norm": 7.181996858345177,
"learning_rate": 6.2288135593220344e-06,
"loss": 3.180842876434326,
"step": 295
},
{
"epoch": 0.1884150222788033,
"grad_norm": 12.848065653568986,
"learning_rate": 6.25e-06,
"loss": 2.833150863647461,
"step": 296
},
{
"epoch": 0.1890515595162317,
"grad_norm": 10.249632541183296,
"learning_rate": 6.271186440677966e-06,
"loss": 3.3664321899414062,
"step": 297
},
{
"epoch": 0.18968809675366008,
"grad_norm": 9.17490216034036,
"learning_rate": 6.292372881355933e-06,
"loss": 3.034820556640625,
"step": 298
},
{
"epoch": 0.19032463399108848,
"grad_norm": 12.29588017430244,
"learning_rate": 6.313559322033899e-06,
"loss": 3.113218307495117,
"step": 299
},
{
"epoch": 0.19096117122851686,
"grad_norm": 10.551364939017459,
"learning_rate": 6.3347457627118646e-06,
"loss": 2.583988666534424,
"step": 300
},
{
"epoch": 0.19159770846594526,
"grad_norm": 9.118649476570695,
"learning_rate": 6.3559322033898304e-06,
"loss": 2.780367374420166,
"step": 301
},
{
"epoch": 0.19223424570337364,
"grad_norm": 17.380906168062385,
"learning_rate": 6.377118644067797e-06,
"loss": 2.361020088195801,
"step": 302
},
{
"epoch": 0.19287078294080204,
"grad_norm": 16.52007281181012,
"learning_rate": 6.398305084745763e-06,
"loss": 3.059126377105713,
"step": 303
},
{
"epoch": 0.1935073201782304,
"grad_norm": 15.360216329621078,
"learning_rate": 6.419491525423729e-06,
"loss": 2.1948864459991455,
"step": 304
},
{
"epoch": 0.19414385741565882,
"grad_norm": 13.924681412606185,
"learning_rate": 6.440677966101695e-06,
"loss": 2.9051995277404785,
"step": 305
},
{
"epoch": 0.19478039465308722,
"grad_norm": 20.45103908918982,
"learning_rate": 6.461864406779662e-06,
"loss": 2.8492469787597656,
"step": 306
},
{
"epoch": 0.1954169318905156,
"grad_norm": 11.272264147744535,
"learning_rate": 6.483050847457628e-06,
"loss": 2.796387195587158,
"step": 307
},
{
"epoch": 0.196053469127944,
"grad_norm": 7.491254613489933,
"learning_rate": 6.504237288135594e-06,
"loss": 2.8268957138061523,
"step": 308
},
{
"epoch": 0.19669000636537237,
"grad_norm": 11.248178380854952,
"learning_rate": 6.52542372881356e-06,
"loss": 2.594062566757202,
"step": 309
},
{
"epoch": 0.19732654360280077,
"grad_norm": 8.812198808149372,
"learning_rate": 6.5466101694915265e-06,
"loss": 2.788079023361206,
"step": 310
},
{
"epoch": 0.19796308084022915,
"grad_norm": 18.203946006098864,
"learning_rate": 6.567796610169492e-06,
"loss": 2.060673713684082,
"step": 311
},
{
"epoch": 0.19859961807765755,
"grad_norm": 10.21084736659449,
"learning_rate": 6.588983050847458e-06,
"loss": 2.218992233276367,
"step": 312
},
{
"epoch": 0.19923615531508593,
"grad_norm": 24.98324005759942,
"learning_rate": 6.610169491525424e-06,
"loss": 3.5444846153259277,
"step": 313
},
{
"epoch": 0.19987269255251433,
"grad_norm": 13.942804366495361,
"learning_rate": 6.631355932203391e-06,
"loss": 2.5274245738983154,
"step": 314
},
{
"epoch": 0.2005092297899427,
"grad_norm": 7.447244363066533,
"learning_rate": 6.652542372881357e-06,
"loss": 2.453437089920044,
"step": 315
},
{
"epoch": 0.2011457670273711,
"grad_norm": 8.780622079696341,
"learning_rate": 6.6737288135593225e-06,
"loss": 2.773866891860962,
"step": 316
},
{
"epoch": 0.20178230426479948,
"grad_norm": 11.604249557547087,
"learning_rate": 6.694915254237288e-06,
"loss": 3.253352165222168,
"step": 317
},
{
"epoch": 0.2024188415022279,
"grad_norm": 15.879127615597556,
"learning_rate": 6.716101694915255e-06,
"loss": 3.153146266937256,
"step": 318
},
{
"epoch": 0.20305537873965626,
"grad_norm": 13.86771255756771,
"learning_rate": 6.737288135593221e-06,
"loss": 3.3321497440338135,
"step": 319
},
{
"epoch": 0.20369191597708466,
"grad_norm": 7.73155294931535,
"learning_rate": 6.758474576271187e-06,
"loss": 3.2387685775756836,
"step": 320
},
{
"epoch": 0.20432845321451304,
"grad_norm": 11.810915669641348,
"learning_rate": 6.779661016949153e-06,
"loss": 3.321180582046509,
"step": 321
},
{
"epoch": 0.20496499045194144,
"grad_norm": 9.777026830947202,
"learning_rate": 6.8008474576271185e-06,
"loss": 2.50833797454834,
"step": 322
},
{
"epoch": 0.20560152768936982,
"grad_norm": 20.25164388427156,
"learning_rate": 6.822033898305085e-06,
"loss": 3.063729763031006,
"step": 323
},
{
"epoch": 0.20623806492679822,
"grad_norm": 7.211655129625353,
"learning_rate": 6.843220338983051e-06,
"loss": 2.7206499576568604,
"step": 324
},
{
"epoch": 0.2068746021642266,
"grad_norm": 12.00457664613567,
"learning_rate": 6.864406779661017e-06,
"loss": 2.948394775390625,
"step": 325
},
{
"epoch": 0.207511139401655,
"grad_norm": 14.193817895558638,
"learning_rate": 6.8855932203389844e-06,
"loss": 2.6999831199645996,
"step": 326
},
{
"epoch": 0.20814767663908337,
"grad_norm": 9.22847901312038,
"learning_rate": 6.90677966101695e-06,
"loss": 2.798079013824463,
"step": 327
},
{
"epoch": 0.20878421387651178,
"grad_norm": 14.274035645282098,
"learning_rate": 6.927966101694916e-06,
"loss": 2.9096508026123047,
"step": 328
},
{
"epoch": 0.20942075111394018,
"grad_norm": 13.806618597816842,
"learning_rate": 6.949152542372882e-06,
"loss": 3.3791441917419434,
"step": 329
},
{
"epoch": 0.21005728835136855,
"grad_norm": 9.200499313513514,
"learning_rate": 6.970338983050849e-06,
"loss": 2.008662223815918,
"step": 330
},
{
"epoch": 0.21069382558879696,
"grad_norm": 19.573127652755893,
"learning_rate": 6.9915254237288146e-06,
"loss": 3.0151891708374023,
"step": 331
},
{
"epoch": 0.21133036282622533,
"grad_norm": 9.570429551637792,
"learning_rate": 7.0127118644067804e-06,
"loss": 2.319244623184204,
"step": 332
},
{
"epoch": 0.21196690006365373,
"grad_norm": 9.731842053626478,
"learning_rate": 7.033898305084746e-06,
"loss": 2.5693533420562744,
"step": 333
},
{
"epoch": 0.2126034373010821,
"grad_norm": 12.27977297948714,
"learning_rate": 7.055084745762712e-06,
"loss": 3.262115478515625,
"step": 334
},
{
"epoch": 0.2132399745385105,
"grad_norm": 9.818826469267037,
"learning_rate": 7.076271186440679e-06,
"loss": 3.6047706604003906,
"step": 335
},
{
"epoch": 0.2138765117759389,
"grad_norm": 12.754731870818834,
"learning_rate": 7.097457627118645e-06,
"loss": 3.1616177558898926,
"step": 336
},
{
"epoch": 0.2145130490133673,
"grad_norm": 9.426972453147444,
"learning_rate": 7.1186440677966106e-06,
"loss": 3.418569564819336,
"step": 337
},
{
"epoch": 0.21514958625079567,
"grad_norm": 13.087753204446434,
"learning_rate": 7.139830508474576e-06,
"loss": 3.402482509613037,
"step": 338
},
{
"epoch": 0.21578612348822407,
"grad_norm": 30.225108315818193,
"learning_rate": 7.161016949152543e-06,
"loss": 3.0675745010375977,
"step": 339
},
{
"epoch": 0.21642266072565244,
"grad_norm": 16.509686833125368,
"learning_rate": 7.182203389830509e-06,
"loss": 3.3344573974609375,
"step": 340
},
{
"epoch": 0.21705919796308085,
"grad_norm": 10.449599766088038,
"learning_rate": 7.203389830508475e-06,
"loss": 2.42911434173584,
"step": 341
},
{
"epoch": 0.21769573520050922,
"grad_norm": 10.412822022342818,
"learning_rate": 7.224576271186441e-06,
"loss": 2.7670388221740723,
"step": 342
},
{
"epoch": 0.21833227243793762,
"grad_norm": 9.912292165640567,
"learning_rate": 7.2457627118644065e-06,
"loss": 1.9683561325073242,
"step": 343
},
{
"epoch": 0.218968809675366,
"grad_norm": 10.749636171231584,
"learning_rate": 7.266949152542373e-06,
"loss": 2.915195941925049,
"step": 344
},
{
"epoch": 0.2196053469127944,
"grad_norm": 28.50950758853137,
"learning_rate": 7.288135593220339e-06,
"loss": 3.151520252227783,
"step": 345
},
{
"epoch": 0.22024188415022278,
"grad_norm": 24.28887681473371,
"learning_rate": 7.309322033898306e-06,
"loss": 2.0973618030548096,
"step": 346
},
{
"epoch": 0.22087842138765118,
"grad_norm": 9.132240644095473,
"learning_rate": 7.3305084745762725e-06,
"loss": 2.961150884628296,
"step": 347
},
{
"epoch": 0.22151495862507956,
"grad_norm": 16.8031595245657,
"learning_rate": 7.351694915254238e-06,
"loss": 2.981980800628662,
"step": 348
},
{
"epoch": 0.22215149586250796,
"grad_norm": 14.149484197365297,
"learning_rate": 7.372881355932204e-06,
"loss": 2.886014938354492,
"step": 349
},
{
"epoch": 0.22278803309993633,
"grad_norm": 10.654240894425453,
"learning_rate": 7.39406779661017e-06,
"loss": 2.521888017654419,
"step": 350
},
{
"epoch": 0.22342457033736474,
"grad_norm": 14.061149508322904,
"learning_rate": 7.415254237288137e-06,
"loss": 3.571470260620117,
"step": 351
},
{
"epoch": 0.22406110757479314,
"grad_norm": 10.714801517941787,
"learning_rate": 7.436440677966103e-06,
"loss": 3.155707359313965,
"step": 352
},
{
"epoch": 0.22469764481222151,
"grad_norm": 31.60341838506027,
"learning_rate": 7.4576271186440685e-06,
"loss": 2.7946484088897705,
"step": 353
},
{
"epoch": 0.22533418204964992,
"grad_norm": 9.672907182159392,
"learning_rate": 7.478813559322034e-06,
"loss": 2.146177291870117,
"step": 354
},
{
"epoch": 0.2259707192870783,
"grad_norm": 9.098969239754291,
"learning_rate": 7.500000000000001e-06,
"loss": 2.894664764404297,
"step": 355
},
{
"epoch": 0.2266072565245067,
"grad_norm": 8.885785452611078,
"learning_rate": 7.521186440677967e-06,
"loss": 2.8311991691589355,
"step": 356
},
{
"epoch": 0.22724379376193507,
"grad_norm": 8.736221842860376,
"learning_rate": 7.542372881355933e-06,
"loss": 2.7373971939086914,
"step": 357
},
{
"epoch": 0.22788033099936347,
"grad_norm": 15.8309467636366,
"learning_rate": 7.563559322033899e-06,
"loss": 2.6248059272766113,
"step": 358
},
{
"epoch": 0.22851686823679185,
"grad_norm": 6.802455424970265,
"learning_rate": 7.5847457627118645e-06,
"loss": 2.6633729934692383,
"step": 359
},
{
"epoch": 0.22915340547422025,
"grad_norm": 10.659381205942202,
"learning_rate": 7.605932203389831e-06,
"loss": 2.8522844314575195,
"step": 360
},
{
"epoch": 0.22978994271164863,
"grad_norm": 28.091313355141022,
"learning_rate": 7.627118644067797e-06,
"loss": 2.4526000022888184,
"step": 361
},
{
"epoch": 0.23042647994907703,
"grad_norm": 19.06538512702582,
"learning_rate": 7.648305084745763e-06,
"loss": 2.7997803688049316,
"step": 362
},
{
"epoch": 0.2310630171865054,
"grad_norm": 12.427334341770118,
"learning_rate": 7.66949152542373e-06,
"loss": 3.680173873901367,
"step": 363
},
{
"epoch": 0.2316995544239338,
"grad_norm": 8.06715078576492,
"learning_rate": 7.690677966101695e-06,
"loss": 2.341188907623291,
"step": 364
},
{
"epoch": 0.23233609166136218,
"grad_norm": 36.55034307823031,
"learning_rate": 7.711864406779663e-06,
"loss": 2.418898105621338,
"step": 365
},
{
"epoch": 0.23297262889879058,
"grad_norm": 17.870010953346153,
"learning_rate": 7.733050847457628e-06,
"loss": 3.0440874099731445,
"step": 366
},
{
"epoch": 0.23360916613621896,
"grad_norm": 15.645822085927204,
"learning_rate": 7.754237288135595e-06,
"loss": 2.9087657928466797,
"step": 367
},
{
"epoch": 0.23424570337364736,
"grad_norm": 21.73213532363426,
"learning_rate": 7.77542372881356e-06,
"loss": 2.755650520324707,
"step": 368
},
{
"epoch": 0.23488224061107574,
"grad_norm": 10.353282795706956,
"learning_rate": 7.796610169491526e-06,
"loss": 3.011885643005371,
"step": 369
},
{
"epoch": 0.23551877784850414,
"grad_norm": 8.286035315592235,
"learning_rate": 7.817796610169493e-06,
"loss": 2.9681286811828613,
"step": 370
},
{
"epoch": 0.23615531508593252,
"grad_norm": 7.304703005206985,
"learning_rate": 7.838983050847458e-06,
"loss": 2.7695226669311523,
"step": 371
},
{
"epoch": 0.23679185232336092,
"grad_norm": 13.132082630543904,
"learning_rate": 7.860169491525425e-06,
"loss": 2.765014410018921,
"step": 372
},
{
"epoch": 0.2374283895607893,
"grad_norm": 12.186426941036258,
"learning_rate": 7.88135593220339e-06,
"loss": 3.3698410987854004,
"step": 373
},
{
"epoch": 0.2380649267982177,
"grad_norm": 7.52386088621253,
"learning_rate": 7.902542372881357e-06,
"loss": 2.7172579765319824,
"step": 374
},
{
"epoch": 0.23870146403564607,
"grad_norm": 10.996801834275313,
"learning_rate": 7.923728813559323e-06,
"loss": 3.2583115100860596,
"step": 375
},
{
"epoch": 0.23933800127307447,
"grad_norm": 17.864622869491424,
"learning_rate": 7.944915254237288e-06,
"loss": 3.1072745323181152,
"step": 376
},
{
"epoch": 0.23997453851050288,
"grad_norm": 13.01921262197225,
"learning_rate": 7.966101694915255e-06,
"loss": 2.8089587688446045,
"step": 377
},
{
"epoch": 0.24061107574793125,
"grad_norm": 10.776360085944695,
"learning_rate": 7.987288135593222e-06,
"loss": 3.1854512691497803,
"step": 378
},
{
"epoch": 0.24124761298535966,
"grad_norm": 13.463580198251714,
"learning_rate": 8.008474576271187e-06,
"loss": 3.1022424697875977,
"step": 379
},
{
"epoch": 0.24188415022278803,
"grad_norm": 26.22940889508125,
"learning_rate": 8.029661016949153e-06,
"loss": 2.5203371047973633,
"step": 380
},
{
"epoch": 0.24252068746021643,
"grad_norm": 30.178839640571866,
"learning_rate": 8.050847457627118e-06,
"loss": 4.099800109863281,
"step": 381
},
{
"epoch": 0.2431572246976448,
"grad_norm": 19.990288591582754,
"learning_rate": 8.072033898305085e-06,
"loss": 3.4770667552948,
"step": 382
},
{
"epoch": 0.2437937619350732,
"grad_norm": 18.299023986163313,
"learning_rate": 8.093220338983052e-06,
"loss": 3.0465381145477295,
"step": 383
},
{
"epoch": 0.24443029917250159,
"grad_norm": 15.49218350727859,
"learning_rate": 8.114406779661017e-06,
"loss": 2.6916580200195312,
"step": 384
},
{
"epoch": 0.24506683640993,
"grad_norm": 8.89218904507717,
"learning_rate": 8.135593220338983e-06,
"loss": 3.0373010635375977,
"step": 385
},
{
"epoch": 0.24570337364735836,
"grad_norm": 17.202047794875796,
"learning_rate": 8.15677966101695e-06,
"loss": 2.984710216522217,
"step": 386
},
{
"epoch": 0.24633991088478677,
"grad_norm": 11.366110782522894,
"learning_rate": 8.177966101694917e-06,
"loss": 2.7723522186279297,
"step": 387
},
{
"epoch": 0.24697644812221514,
"grad_norm": 14.78172131810989,
"learning_rate": 8.199152542372882e-06,
"loss": 2.9807076454162598,
"step": 388
},
{
"epoch": 0.24761298535964354,
"grad_norm": 8.448025825266503,
"learning_rate": 8.220338983050849e-06,
"loss": 2.8512401580810547,
"step": 389
},
{
"epoch": 0.24824952259707192,
"grad_norm": 13.018778265333706,
"learning_rate": 8.241525423728815e-06,
"loss": 2.7343480587005615,
"step": 390
},
{
"epoch": 0.24888605983450032,
"grad_norm": 11.046371381945525,
"learning_rate": 8.26271186440678e-06,
"loss": 2.8503451347351074,
"step": 391
},
{
"epoch": 0.2495225970719287,
"grad_norm": 8.81851252595491,
"learning_rate": 8.283898305084747e-06,
"loss": 2.6752705574035645,
"step": 392
},
{
"epoch": 0.2501591343093571,
"grad_norm": 8.522653693459269,
"learning_rate": 8.305084745762712e-06,
"loss": 2.9626564979553223,
"step": 393
},
{
"epoch": 0.2507956715467855,
"grad_norm": 8.503352134234069,
"learning_rate": 8.326271186440679e-06,
"loss": 3.0923047065734863,
"step": 394
},
{
"epoch": 0.25143220878421385,
"grad_norm": 17.595169204707954,
"learning_rate": 8.347457627118645e-06,
"loss": 3.0121350288391113,
"step": 395
},
{
"epoch": 0.25206874602164225,
"grad_norm": 10.995988483462757,
"learning_rate": 8.36864406779661e-06,
"loss": 3.194448947906494,
"step": 396
},
{
"epoch": 0.25270528325907066,
"grad_norm": 11.93319408618761,
"learning_rate": 8.389830508474577e-06,
"loss": 2.894463539123535,
"step": 397
},
{
"epoch": 0.25334182049649906,
"grad_norm": 9.689945795143238,
"learning_rate": 8.411016949152542e-06,
"loss": 2.6376705169677734,
"step": 398
},
{
"epoch": 0.25397835773392746,
"grad_norm": 13.926989612744364,
"learning_rate": 8.432203389830509e-06,
"loss": 3.0703635215759277,
"step": 399
},
{
"epoch": 0.2546148949713558,
"grad_norm": 13.351847195447283,
"learning_rate": 8.453389830508476e-06,
"loss": 2.6202921867370605,
"step": 400
},
{
"epoch": 0.2552514322087842,
"grad_norm": 8.133830219500812,
"learning_rate": 8.47457627118644e-06,
"loss": 2.5352962017059326,
"step": 401
},
{
"epoch": 0.2558879694462126,
"grad_norm": 13.975975633984332,
"learning_rate": 8.495762711864407e-06,
"loss": 2.9092273712158203,
"step": 402
},
{
"epoch": 0.256524506683641,
"grad_norm": 14.933245520629898,
"learning_rate": 8.516949152542372e-06,
"loss": 3.0582923889160156,
"step": 403
},
{
"epoch": 0.25716104392106937,
"grad_norm": 10.687432366545924,
"learning_rate": 8.538135593220339e-06,
"loss": 2.8868885040283203,
"step": 404
},
{
"epoch": 0.25779758115849777,
"grad_norm": 34.34936840711408,
"learning_rate": 8.559322033898306e-06,
"loss": 3.094916343688965,
"step": 405
},
{
"epoch": 0.25843411839592617,
"grad_norm": 8.011256674559252,
"learning_rate": 8.580508474576272e-06,
"loss": 2.123218536376953,
"step": 406
},
{
"epoch": 0.2590706556333546,
"grad_norm": 10.024564423138338,
"learning_rate": 8.601694915254239e-06,
"loss": 2.763322114944458,
"step": 407
},
{
"epoch": 0.2597071928707829,
"grad_norm": 8.52914664622448,
"learning_rate": 8.622881355932204e-06,
"loss": 3.241649627685547,
"step": 408
},
{
"epoch": 0.2603437301082113,
"grad_norm": 17.156374018350956,
"learning_rate": 8.64406779661017e-06,
"loss": 2.307551383972168,
"step": 409
},
{
"epoch": 0.2609802673456397,
"grad_norm": 8.445989772507607,
"learning_rate": 8.665254237288136e-06,
"loss": 2.7405078411102295,
"step": 410
},
{
"epoch": 0.26161680458306813,
"grad_norm": 16.246864152111847,
"learning_rate": 8.686440677966103e-06,
"loss": 2.4398441314697266,
"step": 411
},
{
"epoch": 0.2622533418204965,
"grad_norm": 17.23025344721073,
"learning_rate": 8.70762711864407e-06,
"loss": 2.6711883544921875,
"step": 412
},
{
"epoch": 0.2628898790579249,
"grad_norm": 9.665913856397358,
"learning_rate": 8.728813559322034e-06,
"loss": 3.1612720489501953,
"step": 413
},
{
"epoch": 0.2635264162953533,
"grad_norm": 9.039243412902335,
"learning_rate": 8.750000000000001e-06,
"loss": 2.9666695594787598,
"step": 414
},
{
"epoch": 0.2641629535327817,
"grad_norm": 14.904274886523316,
"learning_rate": 8.771186440677966e-06,
"loss": 2.4938879013061523,
"step": 415
},
{
"epoch": 0.26479949077021003,
"grad_norm": 14.327685487799815,
"learning_rate": 8.792372881355933e-06,
"loss": 2.7676544189453125,
"step": 416
},
{
"epoch": 0.26543602800763844,
"grad_norm": 12.777936146844038,
"learning_rate": 8.8135593220339e-06,
"loss": 2.7289962768554688,
"step": 417
},
{
"epoch": 0.26607256524506684,
"grad_norm": 11.135318077153878,
"learning_rate": 8.834745762711864e-06,
"loss": 2.8916702270507812,
"step": 418
},
{
"epoch": 0.26670910248249524,
"grad_norm": 14.64604027349478,
"learning_rate": 8.855932203389831e-06,
"loss": 2.8258821964263916,
"step": 419
},
{
"epoch": 0.2673456397199236,
"grad_norm": 11.557535360945662,
"learning_rate": 8.877118644067798e-06,
"loss": 2.5489492416381836,
"step": 420
},
{
"epoch": 0.267982176957352,
"grad_norm": 10.935033235337775,
"learning_rate": 8.898305084745763e-06,
"loss": 3.2372262477874756,
"step": 421
},
{
"epoch": 0.2686187141947804,
"grad_norm": 12.059942367454795,
"learning_rate": 8.91949152542373e-06,
"loss": 2.420478582382202,
"step": 422
},
{
"epoch": 0.2692552514322088,
"grad_norm": 9.62013973863689,
"learning_rate": 8.940677966101694e-06,
"loss": 2.610492467880249,
"step": 423
},
{
"epoch": 0.2698917886696372,
"grad_norm": 16.215747002933785,
"learning_rate": 8.961864406779663e-06,
"loss": 2.431087017059326,
"step": 424
},
{
"epoch": 0.27052832590706555,
"grad_norm": 23.718620449268744,
"learning_rate": 8.983050847457628e-06,
"loss": 3.0121328830718994,
"step": 425
},
{
"epoch": 0.27116486314449395,
"grad_norm": 13.427577259890885,
"learning_rate": 9.004237288135595e-06,
"loss": 2.930263042449951,
"step": 426
},
{
"epoch": 0.27180140038192235,
"grad_norm": 7.896740529131796,
"learning_rate": 9.02542372881356e-06,
"loss": 2.788511276245117,
"step": 427
},
{
"epoch": 0.27243793761935076,
"grad_norm": 10.768266885604065,
"learning_rate": 9.046610169491526e-06,
"loss": 2.880270481109619,
"step": 428
},
{
"epoch": 0.2730744748567791,
"grad_norm": 10.236726722596405,
"learning_rate": 9.067796610169493e-06,
"loss": 3.149750232696533,
"step": 429
},
{
"epoch": 0.2737110120942075,
"grad_norm": 8.867595980322418,
"learning_rate": 9.088983050847458e-06,
"loss": 1.7043266296386719,
"step": 430
},
{
"epoch": 0.2743475493316359,
"grad_norm": 12.618029867336706,
"learning_rate": 9.110169491525425e-06,
"loss": 2.908963680267334,
"step": 431
},
{
"epoch": 0.2749840865690643,
"grad_norm": 11.301320525059047,
"learning_rate": 9.131355932203391e-06,
"loss": 2.958331346511841,
"step": 432
},
{
"epoch": 0.27562062380649266,
"grad_norm": 11.392544145128007,
"learning_rate": 9.152542372881356e-06,
"loss": 3.156254768371582,
"step": 433
},
{
"epoch": 0.27625716104392106,
"grad_norm": 11.513492123693869,
"learning_rate": 9.173728813559323e-06,
"loss": 2.940934658050537,
"step": 434
},
{
"epoch": 0.27689369828134947,
"grad_norm": 7.947649134277705,
"learning_rate": 9.194915254237288e-06,
"loss": 1.6323720216751099,
"step": 435
},
{
"epoch": 0.27753023551877787,
"grad_norm": 39.70214629314905,
"learning_rate": 9.216101694915255e-06,
"loss": 2.9174299240112305,
"step": 436
},
{
"epoch": 0.2781667727562062,
"grad_norm": 13.98217055446752,
"learning_rate": 9.237288135593222e-06,
"loss": 2.852600574493408,
"step": 437
},
{
"epoch": 0.2788033099936346,
"grad_norm": 10.160783020020252,
"learning_rate": 9.258474576271187e-06,
"loss": 2.575179100036621,
"step": 438
},
{
"epoch": 0.279439847231063,
"grad_norm": 9.146012963974533,
"learning_rate": 9.279661016949153e-06,
"loss": 3.156705379486084,
"step": 439
},
{
"epoch": 0.2800763844684914,
"grad_norm": 11.31879412709326,
"learning_rate": 9.300847457627118e-06,
"loss": 3.304896116256714,
"step": 440
},
{
"epoch": 0.28071292170591977,
"grad_norm": 31.10169595912864,
"learning_rate": 9.322033898305085e-06,
"loss": 2.4765782356262207,
"step": 441
},
{
"epoch": 0.2813494589433482,
"grad_norm": 14.496768399170477,
"learning_rate": 9.343220338983052e-06,
"loss": 2.4921059608459473,
"step": 442
},
{
"epoch": 0.2819859961807766,
"grad_norm": 11.883722809358867,
"learning_rate": 9.364406779661017e-06,
"loss": 2.52455472946167,
"step": 443
},
{
"epoch": 0.282622533418205,
"grad_norm": 9.844759935775308,
"learning_rate": 9.385593220338985e-06,
"loss": 3.400085687637329,
"step": 444
},
{
"epoch": 0.2832590706556333,
"grad_norm": 24.414897984043137,
"learning_rate": 9.40677966101695e-06,
"loss": 2.9252350330352783,
"step": 445
},
{
"epoch": 0.28389560789306173,
"grad_norm": 8.332066822030134,
"learning_rate": 9.427966101694917e-06,
"loss": 3.3196964263916016,
"step": 446
},
{
"epoch": 0.28453214513049013,
"grad_norm": 7.5710032108323615,
"learning_rate": 9.449152542372882e-06,
"loss": 2.261486768722534,
"step": 447
},
{
"epoch": 0.28516868236791854,
"grad_norm": 8.54694090543886,
"learning_rate": 9.470338983050848e-06,
"loss": 2.7075605392456055,
"step": 448
},
{
"epoch": 0.28580521960534694,
"grad_norm": 8.80921600047023,
"learning_rate": 9.491525423728815e-06,
"loss": 2.6770546436309814,
"step": 449
},
{
"epoch": 0.2864417568427753,
"grad_norm": 13.060286286479855,
"learning_rate": 9.51271186440678e-06,
"loss": 3.233011245727539,
"step": 450
},
{
"epoch": 0.2870782940802037,
"grad_norm": 21.014549565777614,
"learning_rate": 9.533898305084747e-06,
"loss": 3.1268162727355957,
"step": 451
},
{
"epoch": 0.2877148313176321,
"grad_norm": 9.74840063951361,
"learning_rate": 9.555084745762712e-06,
"loss": 3.2135486602783203,
"step": 452
},
{
"epoch": 0.2883513685550605,
"grad_norm": 15.179806457385586,
"learning_rate": 9.576271186440679e-06,
"loss": 3.2502529621124268,
"step": 453
},
{
"epoch": 0.28898790579248884,
"grad_norm": 7.967595752995628,
"learning_rate": 9.597457627118645e-06,
"loss": 2.67390513420105,
"step": 454
},
{
"epoch": 0.28962444302991724,
"grad_norm": 33.20858419977011,
"learning_rate": 9.61864406779661e-06,
"loss": 3.2899956703186035,
"step": 455
},
{
"epoch": 0.29026098026734565,
"grad_norm": 25.325568051510427,
"learning_rate": 9.639830508474577e-06,
"loss": 2.495117664337158,
"step": 456
},
{
"epoch": 0.29089751750477405,
"grad_norm": 11.262876751379158,
"learning_rate": 9.661016949152544e-06,
"loss": 2.460306167602539,
"step": 457
},
{
"epoch": 0.2915340547422024,
"grad_norm": 12.122573664434807,
"learning_rate": 9.682203389830509e-06,
"loss": 2.653489351272583,
"step": 458
},
{
"epoch": 0.2921705919796308,
"grad_norm": 15.897150153182967,
"learning_rate": 9.703389830508475e-06,
"loss": 2.756795883178711,
"step": 459
},
{
"epoch": 0.2928071292170592,
"grad_norm": 14.52269368653367,
"learning_rate": 9.72457627118644e-06,
"loss": 2.753854513168335,
"step": 460
},
{
"epoch": 0.2934436664544876,
"grad_norm": 12.896301614550035,
"learning_rate": 9.745762711864407e-06,
"loss": 2.9368858337402344,
"step": 461
},
{
"epoch": 0.29408020369191595,
"grad_norm": 9.860006651102344,
"learning_rate": 9.766949152542374e-06,
"loss": 2.6420769691467285,
"step": 462
},
{
"epoch": 0.29471674092934436,
"grad_norm": 7.025532866083078,
"learning_rate": 9.788135593220339e-06,
"loss": 2.633246421813965,
"step": 463
},
{
"epoch": 0.29535327816677276,
"grad_norm": 12.667326337177768,
"learning_rate": 9.809322033898306e-06,
"loss": 3.018620729446411,
"step": 464
},
{
"epoch": 0.29598981540420116,
"grad_norm": 18.77679465997328,
"learning_rate": 9.830508474576272e-06,
"loss": 2.952024459838867,
"step": 465
},
{
"epoch": 0.2966263526416295,
"grad_norm": 8.310045697764552,
"learning_rate": 9.851694915254239e-06,
"loss": 2.5029094219207764,
"step": 466
},
{
"epoch": 0.2972628898790579,
"grad_norm": 8.826548109977605,
"learning_rate": 9.872881355932204e-06,
"loss": 2.9038829803466797,
"step": 467
},
{
"epoch": 0.2978994271164863,
"grad_norm": 9.884818861483527,
"learning_rate": 9.89406779661017e-06,
"loss": 2.832332134246826,
"step": 468
},
{
"epoch": 0.2985359643539147,
"grad_norm": 6.665482253383969,
"learning_rate": 9.915254237288137e-06,
"loss": 2.741182327270508,
"step": 469
},
{
"epoch": 0.2991725015913431,
"grad_norm": 15.653183149015302,
"learning_rate": 9.936440677966102e-06,
"loss": 2.4737367630004883,
"step": 470
},
{
"epoch": 0.29980903882877147,
"grad_norm": 14.824362566450842,
"learning_rate": 9.957627118644069e-06,
"loss": 2.59440279006958,
"step": 471
},
{
"epoch": 0.30044557606619987,
"grad_norm": 23.18730021521652,
"learning_rate": 9.978813559322034e-06,
"loss": 2.371671438217163,
"step": 472
},
{
"epoch": 0.3010821133036283,
"grad_norm": 10.391918996320895,
"learning_rate": 1e-05,
"loss": 3.30546498298645,
"step": 473
},
{
"epoch": 0.3017186505410567,
"grad_norm": 31.66023743421455,
"learning_rate": 9.999998628160862e-06,
"loss": 2.7432923316955566,
"step": 474
},
{
"epoch": 0.302355187778485,
"grad_norm": 9.588630315588624,
"learning_rate": 9.999994512644197e-06,
"loss": 2.9125497341156006,
"step": 475
},
{
"epoch": 0.3029917250159134,
"grad_norm": 16.592800570861797,
"learning_rate": 9.999987653452265e-06,
"loss": 3.4428765773773193,
"step": 476
},
{
"epoch": 0.30362826225334183,
"grad_norm": 14.76583786906148,
"learning_rate": 9.999978050588832e-06,
"loss": 2.7113728523254395,
"step": 477
},
{
"epoch": 0.30426479949077023,
"grad_norm": 10.841929591033558,
"learning_rate": 9.999965704059163e-06,
"loss": 2.9176578521728516,
"step": 478
},
{
"epoch": 0.3049013367281986,
"grad_norm": 10.33304861879112,
"learning_rate": 9.999950613870036e-06,
"loss": 2.924442768096924,
"step": 479
},
{
"epoch": 0.305537873965627,
"grad_norm": 10.096621224894694,
"learning_rate": 9.99993278002973e-06,
"loss": 3.140645980834961,
"step": 480
},
{
"epoch": 0.3061744112030554,
"grad_norm": 10.4212452713938,
"learning_rate": 9.999912202548033e-06,
"loss": 2.9142580032348633,
"step": 481
},
{
"epoch": 0.3068109484404838,
"grad_norm": 12.045245392438261,
"learning_rate": 9.999888881436235e-06,
"loss": 2.679471015930176,
"step": 482
},
{
"epoch": 0.30744748567791214,
"grad_norm": 13.404477364271433,
"learning_rate": 9.999862816707133e-06,
"loss": 2.6989357471466064,
"step": 483
},
{
"epoch": 0.30808402291534054,
"grad_norm": 24.878929931157536,
"learning_rate": 9.999834008375032e-06,
"loss": 2.761178493499756,
"step": 484
},
{
"epoch": 0.30872056015276894,
"grad_norm": 8.567649269925306,
"learning_rate": 9.999802456455736e-06,
"loss": 3.2187318801879883,
"step": 485
},
{
"epoch": 0.30935709739019734,
"grad_norm": 8.764111500182137,
"learning_rate": 9.999768160966561e-06,
"loss": 2.8718371391296387,
"step": 486
},
{
"epoch": 0.3099936346276257,
"grad_norm": 11.748973884828251,
"learning_rate": 9.999731121926329e-06,
"loss": 2.8609938621520996,
"step": 487
},
{
"epoch": 0.3106301718650541,
"grad_norm": 9.107045063551201,
"learning_rate": 9.99969133935536e-06,
"loss": 2.7174265384674072,
"step": 488
},
{
"epoch": 0.3112667091024825,
"grad_norm": 9.583605472207113,
"learning_rate": 9.999648813275484e-06,
"loss": 3.1644539833068848,
"step": 489
},
{
"epoch": 0.3119032463399109,
"grad_norm": 12.364365943398141,
"learning_rate": 9.999603543710042e-06,
"loss": 2.8273873329162598,
"step": 490
},
{
"epoch": 0.31253978357733925,
"grad_norm": 18.171015419037342,
"learning_rate": 9.99955553068387e-06,
"loss": 2.6150119304656982,
"step": 491
},
{
"epoch": 0.31317632081476765,
"grad_norm": 10.619980186641529,
"learning_rate": 9.999504774223317e-06,
"loss": 2.663158416748047,
"step": 492
},
{
"epoch": 0.31381285805219605,
"grad_norm": 12.381455556494492,
"learning_rate": 9.99945127435623e-06,
"loss": 2.661107063293457,
"step": 493
},
{
"epoch": 0.31444939528962446,
"grad_norm": 12.757876969453523,
"learning_rate": 9.999395031111975e-06,
"loss": 2.856588363647461,
"step": 494
},
{
"epoch": 0.31508593252705286,
"grad_norm": 14.213876144934218,
"learning_rate": 9.999336044521408e-06,
"loss": 2.613553524017334,
"step": 495
},
{
"epoch": 0.3157224697644812,
"grad_norm": 15.726970016629625,
"learning_rate": 9.999274314616898e-06,
"loss": 3.4927902221679688,
"step": 496
},
{
"epoch": 0.3163590070019096,
"grad_norm": 12.253250019487384,
"learning_rate": 9.99920984143232e-06,
"loss": 2.8653440475463867,
"step": 497
},
{
"epoch": 0.316995544239338,
"grad_norm": 9.617755498584044,
"learning_rate": 9.999142625003054e-06,
"loss": 2.6097002029418945,
"step": 498
},
{
"epoch": 0.3176320814767664,
"grad_norm": 7.086589541534136,
"learning_rate": 9.999072665365978e-06,
"loss": 2.7578887939453125,
"step": 499
},
{
"epoch": 0.31826861871419476,
"grad_norm": 13.344826735283398,
"learning_rate": 9.998999962559489e-06,
"loss": 3.936290740966797,
"step": 500
},
{
"epoch": 0.31890515595162316,
"grad_norm": 14.140754100517357,
"learning_rate": 9.998924516623476e-06,
"loss": 2.5598175525665283,
"step": 501
},
{
"epoch": 0.31954169318905157,
"grad_norm": 10.289825379300616,
"learning_rate": 9.998846327599343e-06,
"loss": 2.9495205879211426,
"step": 502
},
{
"epoch": 0.32017823042647997,
"grad_norm": 17.17421690989167,
"learning_rate": 9.998765395529991e-06,
"loss": 2.556317090988159,
"step": 503
},
{
"epoch": 0.3208147676639083,
"grad_norm": 7.900979015012702,
"learning_rate": 9.998681720459832e-06,
"loss": 3.07352876663208,
"step": 504
},
{
"epoch": 0.3214513049013367,
"grad_norm": 9.178466790864046,
"learning_rate": 9.998595302434783e-06,
"loss": 3.1477761268615723,
"step": 505
},
{
"epoch": 0.3220878421387651,
"grad_norm": 11.158747107423268,
"learning_rate": 9.998506141502264e-06,
"loss": 2.565523624420166,
"step": 506
},
{
"epoch": 0.3227243793761935,
"grad_norm": 9.745259368574294,
"learning_rate": 9.9984142377112e-06,
"loss": 2.4731647968292236,
"step": 507
},
{
"epoch": 0.3233609166136219,
"grad_norm": 10.504835167489151,
"learning_rate": 9.998319591112023e-06,
"loss": 2.833559036254883,
"step": 508
},
{
"epoch": 0.3239974538510503,
"grad_norm": 8.475445634985794,
"learning_rate": 9.998222201756665e-06,
"loss": 2.8836066722869873,
"step": 509
},
{
"epoch": 0.3246339910884787,
"grad_norm": 19.237070041175475,
"learning_rate": 9.998122069698572e-06,
"loss": 2.7430686950683594,
"step": 510
},
{
"epoch": 0.3252705283259071,
"grad_norm": 14.958651715813355,
"learning_rate": 9.998019194992689e-06,
"loss": 2.396372079849243,
"step": 511
},
{
"epoch": 0.32590706556333543,
"grad_norm": 16.29851203620696,
"learning_rate": 9.997913577695466e-06,
"loss": 3.14420747756958,
"step": 512
},
{
"epoch": 0.32654360280076383,
"grad_norm": 14.00413096447635,
"learning_rate": 9.997805217864858e-06,
"loss": 3.0623974800109863,
"step": 513
},
{
"epoch": 0.32718014003819224,
"grad_norm": 11.661727203641812,
"learning_rate": 9.997694115560327e-06,
"loss": 2.466407537460327,
"step": 514
},
{
"epoch": 0.32781667727562064,
"grad_norm": 9.217472463327738,
"learning_rate": 9.99758027084284e-06,
"loss": 3.4243903160095215,
"step": 515
},
{
"epoch": 0.32845321451304904,
"grad_norm": 8.627597567343209,
"learning_rate": 9.997463683774866e-06,
"loss": 2.910386562347412,
"step": 516
},
{
"epoch": 0.3290897517504774,
"grad_norm": 26.295612271979316,
"learning_rate": 9.99734435442038e-06,
"loss": 2.6467678546905518,
"step": 517
},
{
"epoch": 0.3297262889879058,
"grad_norm": 20.16174022986734,
"learning_rate": 9.997222282844865e-06,
"loss": 2.6588282585144043,
"step": 518
},
{
"epoch": 0.3303628262253342,
"grad_norm": 6.755631493856407,
"learning_rate": 9.997097469115303e-06,
"loss": 1.3066935539245605,
"step": 519
},
{
"epoch": 0.3309993634627626,
"grad_norm": 14.766691885632358,
"learning_rate": 9.996969913300185e-06,
"loss": 2.514312267303467,
"step": 520
},
{
"epoch": 0.33163590070019094,
"grad_norm": 77.17893427003337,
"learning_rate": 9.996839615469507e-06,
"loss": 2.473106622695923,
"step": 521
},
{
"epoch": 0.33227243793761935,
"grad_norm": 11.151217304145346,
"learning_rate": 9.996706575694764e-06,
"loss": 2.7616677284240723,
"step": 522
},
{
"epoch": 0.33290897517504775,
"grad_norm": 21.575010036196733,
"learning_rate": 9.996570794048964e-06,
"loss": 2.2915472984313965,
"step": 523
},
{
"epoch": 0.33354551241247615,
"grad_norm": 36.58605310330633,
"learning_rate": 9.996432270606614e-06,
"loss": 3.331940174102783,
"step": 524
},
{
"epoch": 0.3341820496499045,
"grad_norm": 19.316844663679245,
"learning_rate": 9.996291005443725e-06,
"loss": 2.7368221282958984,
"step": 525
},
{
"epoch": 0.3348185868873329,
"grad_norm": 18.255012442611356,
"learning_rate": 9.996146998637814e-06,
"loss": 3.4562439918518066,
"step": 526
},
{
"epoch": 0.3354551241247613,
"grad_norm": 9.180146555325413,
"learning_rate": 9.996000250267908e-06,
"loss": 2.8834753036499023,
"step": 527
},
{
"epoch": 0.3360916613621897,
"grad_norm": 14.17213899043496,
"learning_rate": 9.995850760414525e-06,
"loss": 2.6626763343811035,
"step": 528
},
{
"epoch": 0.33672819859961806,
"grad_norm": 14.437809542755755,
"learning_rate": 9.995698529159701e-06,
"loss": 3.103738307952881,
"step": 529
},
{
"epoch": 0.33736473583704646,
"grad_norm": 12.49658370440524,
"learning_rate": 9.995543556586971e-06,
"loss": 2.893144369125366,
"step": 530
},
{
"epoch": 0.33800127307447486,
"grad_norm": 11.91200029152462,
"learning_rate": 9.99538584278137e-06,
"loss": 2.814668893814087,
"step": 531
},
{
"epoch": 0.33863781031190326,
"grad_norm": 11.029866774072888,
"learning_rate": 9.995225387829446e-06,
"loss": 2.77158784866333,
"step": 532
},
{
"epoch": 0.3392743475493316,
"grad_norm": 9.409247027470174,
"learning_rate": 9.99506219181924e-06,
"loss": 3.173114061355591,
"step": 533
},
{
"epoch": 0.33991088478676,
"grad_norm": 19.82056407187092,
"learning_rate": 9.99489625484031e-06,
"loss": 3.1451973915100098,
"step": 534
},
{
"epoch": 0.3405474220241884,
"grad_norm": 14.168695873290911,
"learning_rate": 9.994727576983709e-06,
"loss": 2.498263120651245,
"step": 535
},
{
"epoch": 0.3411839592616168,
"grad_norm": 9.430546621447903,
"learning_rate": 9.994556158341995e-06,
"loss": 2.7113990783691406,
"step": 536
},
{
"epoch": 0.34182049649904517,
"grad_norm": 19.540585641156934,
"learning_rate": 9.994381999009234e-06,
"loss": 2.5611259937286377,
"step": 537
},
{
"epoch": 0.34245703373647357,
"grad_norm": 14.771991055023605,
"learning_rate": 9.994205099080992e-06,
"loss": 2.760798692703247,
"step": 538
},
{
"epoch": 0.343093570973902,
"grad_norm": 7.95913180934273,
"learning_rate": 9.994025458654339e-06,
"loss": 2.6646268367767334,
"step": 539
},
{
"epoch": 0.3437301082113304,
"grad_norm": 15.474046292497501,
"learning_rate": 9.993843077827854e-06,
"loss": 2.42655611038208,
"step": 540
},
{
"epoch": 0.3443666454487588,
"grad_norm": 12.187750005188336,
"learning_rate": 9.993657956701613e-06,
"loss": 2.930875301361084,
"step": 541
},
{
"epoch": 0.3450031826861871,
"grad_norm": 13.272631871315637,
"learning_rate": 9.993470095377199e-06,
"loss": 2.7495932579040527,
"step": 542
},
{
"epoch": 0.34563971992361553,
"grad_norm": 7.456501849105595,
"learning_rate": 9.993279493957698e-06,
"loss": 2.7832741737365723,
"step": 543
},
{
"epoch": 0.34627625716104393,
"grad_norm": 20.699299594098285,
"learning_rate": 9.993086152547699e-06,
"loss": 3.2728452682495117,
"step": 544
},
{
"epoch": 0.34691279439847234,
"grad_norm": 8.257838784063091,
"learning_rate": 9.992890071253297e-06,
"loss": 3.3546133041381836,
"step": 545
},
{
"epoch": 0.3475493316359007,
"grad_norm": 9.102418071358285,
"learning_rate": 9.99269125018209e-06,
"loss": 2.539501190185547,
"step": 546
},
{
"epoch": 0.3481858688733291,
"grad_norm": 40.908770676555775,
"learning_rate": 9.992489689443175e-06,
"loss": 4.694552421569824,
"step": 547
},
{
"epoch": 0.3488224061107575,
"grad_norm": 11.033322133394357,
"learning_rate": 9.992285389147156e-06,
"loss": 2.7217929363250732,
"step": 548
},
{
"epoch": 0.3494589433481859,
"grad_norm": 33.19531232099363,
"learning_rate": 9.992078349406143e-06,
"loss": 2.987252712249756,
"step": 549
},
{
"epoch": 0.35009548058561424,
"grad_norm": 10.09416414307797,
"learning_rate": 9.991868570333742e-06,
"loss": 3.0493078231811523,
"step": 550
},
{
"epoch": 0.35073201782304264,
"grad_norm": 11.205696352596215,
"learning_rate": 9.991656052045071e-06,
"loss": 3.0120527744293213,
"step": 551
},
{
"epoch": 0.35136855506047104,
"grad_norm": 8.706664163773446,
"learning_rate": 9.991440794656741e-06,
"loss": 2.504729747772217,
"step": 552
},
{
"epoch": 0.35200509229789945,
"grad_norm": 16.08095287730785,
"learning_rate": 9.991222798286876e-06,
"loss": 2.8735032081604004,
"step": 553
},
{
"epoch": 0.3526416295353278,
"grad_norm": 16.445657787827177,
"learning_rate": 9.991002063055095e-06,
"loss": 2.7720909118652344,
"step": 554
},
{
"epoch": 0.3532781667727562,
"grad_norm": 21.854022860671776,
"learning_rate": 9.990778589082527e-06,
"loss": 2.3629848957061768,
"step": 555
},
{
"epoch": 0.3539147040101846,
"grad_norm": 10.944655744451333,
"learning_rate": 9.990552376491794e-06,
"loss": 2.9007742404937744,
"step": 556
},
{
"epoch": 0.354551241247613,
"grad_norm": 12.410073833010728,
"learning_rate": 9.990323425407034e-06,
"loss": 2.83327054977417,
"step": 557
},
{
"epoch": 0.35518777848504135,
"grad_norm": 8.95949921903947,
"learning_rate": 9.990091735953875e-06,
"loss": 3.135423183441162,
"step": 558
},
{
"epoch": 0.35582431572246975,
"grad_norm": 7.162605329310381,
"learning_rate": 9.989857308259457e-06,
"loss": 2.7775559425354004,
"step": 559
},
{
"epoch": 0.35646085295989816,
"grad_norm": 8.52004662486134,
"learning_rate": 9.989620142452417e-06,
"loss": 2.7399826049804688,
"step": 560
},
{
"epoch": 0.35709739019732656,
"grad_norm": 17.744651939267612,
"learning_rate": 9.989380238662897e-06,
"loss": 2.7530295848846436,
"step": 561
},
{
"epoch": 0.3577339274347549,
"grad_norm": 15.84289335198656,
"learning_rate": 9.98913759702254e-06,
"loss": 2.881882429122925,
"step": 562
},
{
"epoch": 0.3583704646721833,
"grad_norm": 11.55565022140253,
"learning_rate": 9.988892217664492e-06,
"loss": 2.9270741939544678,
"step": 563
},
{
"epoch": 0.3590070019096117,
"grad_norm": 11.03340184827849,
"learning_rate": 9.988644100723403e-06,
"loss": 2.8461787700653076,
"step": 564
},
{
"epoch": 0.3596435391470401,
"grad_norm": 14.578234992013659,
"learning_rate": 9.988393246335424e-06,
"loss": 2.517241954803467,
"step": 565
},
{
"epoch": 0.3602800763844685,
"grad_norm": 8.127008050680834,
"learning_rate": 9.988139654638204e-06,
"loss": 2.966914653778076,
"step": 566
},
{
"epoch": 0.36091661362189686,
"grad_norm": 9.216070318677676,
"learning_rate": 9.987883325770903e-06,
"loss": 2.6558094024658203,
"step": 567
},
{
"epoch": 0.36155315085932527,
"grad_norm": 17.169149508599855,
"learning_rate": 9.987624259874172e-06,
"loss": 2.323289155960083,
"step": 568
},
{
"epoch": 0.36218968809675367,
"grad_norm": 9.166183587477484,
"learning_rate": 9.987362457090174e-06,
"loss": 2.409285306930542,
"step": 569
},
{
"epoch": 0.3628262253341821,
"grad_norm": 11.13012364580221,
"learning_rate": 9.98709791756257e-06,
"loss": 2.4598522186279297,
"step": 570
},
{
"epoch": 0.3634627625716104,
"grad_norm": 11.942641315185213,
"learning_rate": 9.986830641436519e-06,
"loss": 2.520512819290161,
"step": 571
},
{
"epoch": 0.3640992998090388,
"grad_norm": 19.551435054071653,
"learning_rate": 9.986560628858686e-06,
"loss": 2.8555023670196533,
"step": 572
},
{
"epoch": 0.3647358370464672,
"grad_norm": 13.766408048514078,
"learning_rate": 9.986287879977237e-06,
"loss": 2.6136066913604736,
"step": 573
},
{
"epoch": 0.36537237428389563,
"grad_norm": 12.105556637811555,
"learning_rate": 9.98601239494184e-06,
"loss": 2.950223684310913,
"step": 574
},
{
"epoch": 0.366008911521324,
"grad_norm": 7.891574119862333,
"learning_rate": 9.98573417390366e-06,
"loss": 2.742581367492676,
"step": 575
},
{
"epoch": 0.3666454487587524,
"grad_norm": 16.58863121732629,
"learning_rate": 9.985453217015371e-06,
"loss": 2.458749294281006,
"step": 576
},
{
"epoch": 0.3672819859961808,
"grad_norm": 20.69997579999368,
"learning_rate": 9.985169524431143e-06,
"loss": 2.656479835510254,
"step": 577
},
{
"epoch": 0.3679185232336092,
"grad_norm": 14.165292656115142,
"learning_rate": 9.984883096306648e-06,
"loss": 2.546654462814331,
"step": 578
},
{
"epoch": 0.36855506047103753,
"grad_norm": 7.15022029441597,
"learning_rate": 9.984593932799057e-06,
"loss": 2.6810786724090576,
"step": 579
},
{
"epoch": 0.36919159770846594,
"grad_norm": 6.180154264135546,
"learning_rate": 9.984302034067048e-06,
"loss": 2.5700953006744385,
"step": 580
},
{
"epoch": 0.36982813494589434,
"grad_norm": 11.703039591161678,
"learning_rate": 9.984007400270793e-06,
"loss": 2.629293918609619,
"step": 581
},
{
"epoch": 0.37046467218332274,
"grad_norm": 7.005997624163455,
"learning_rate": 9.98371003157197e-06,
"loss": 2.742830991744995,
"step": 582
},
{
"epoch": 0.3711012094207511,
"grad_norm": 11.272756626497822,
"learning_rate": 9.983409928133756e-06,
"loss": 3.019237995147705,
"step": 583
},
{
"epoch": 0.3717377466581795,
"grad_norm": 88.39848337199575,
"learning_rate": 9.983107090120828e-06,
"loss": 2.901144027709961,
"step": 584
},
{
"epoch": 0.3723742838956079,
"grad_norm": 10.809523763851594,
"learning_rate": 9.982801517699363e-06,
"loss": 2.8116211891174316,
"step": 585
},
{
"epoch": 0.3730108211330363,
"grad_norm": 10.52028232851953,
"learning_rate": 9.98249321103704e-06,
"loss": 2.6679322719573975,
"step": 586
},
{
"epoch": 0.3736473583704647,
"grad_norm": 14.30042583766645,
"learning_rate": 9.982182170303038e-06,
"loss": 2.7962963581085205,
"step": 587
},
{
"epoch": 0.37428389560789305,
"grad_norm": 21.373418854152547,
"learning_rate": 9.981868395668037e-06,
"loss": 2.6593198776245117,
"step": 588
},
{
"epoch": 0.37492043284532145,
"grad_norm": 10.357703767017133,
"learning_rate": 9.981551887304214e-06,
"loss": 2.869109630584717,
"step": 589
},
{
"epoch": 0.37555697008274985,
"grad_norm": 11.762580385143742,
"learning_rate": 9.981232645385253e-06,
"loss": 2.3226709365844727,
"step": 590
},
{
"epoch": 0.37619350732017826,
"grad_norm": 14.93629322268585,
"learning_rate": 9.98091067008633e-06,
"loss": 3.0382394790649414,
"step": 591
},
{
"epoch": 0.3768300445576066,
"grad_norm": 12.06044711282513,
"learning_rate": 9.980585961584124e-06,
"loss": 2.413412094116211,
"step": 592
},
{
"epoch": 0.377466581795035,
"grad_norm": 13.507343372488394,
"learning_rate": 9.980258520056813e-06,
"loss": 2.8003458976745605,
"step": 593
},
{
"epoch": 0.3781031190324634,
"grad_norm": 8.617531636027781,
"learning_rate": 9.97992834568408e-06,
"loss": 2.4336965084075928,
"step": 594
},
{
"epoch": 0.3787396562698918,
"grad_norm": 10.284383480836185,
"learning_rate": 9.979595438647101e-06,
"loss": 2.6842195987701416,
"step": 595
},
{
"epoch": 0.37937619350732016,
"grad_norm": 12.604083500248924,
"learning_rate": 9.979259799128554e-06,
"loss": 2.740729331970215,
"step": 596
},
{
"epoch": 0.38001273074474856,
"grad_norm": 21.70938956261165,
"learning_rate": 9.978921427312617e-06,
"loss": 2.901966094970703,
"step": 597
},
{
"epoch": 0.38064926798217696,
"grad_norm": 17.052471808172093,
"learning_rate": 9.978580323384965e-06,
"loss": 2.6793160438537598,
"step": 598
},
{
"epoch": 0.38128580521960537,
"grad_norm": 9.38891050745183,
"learning_rate": 9.978236487532778e-06,
"loss": 2.7052338123321533,
"step": 599
},
{
"epoch": 0.3819223424570337,
"grad_norm": 11.226494849053653,
"learning_rate": 9.977889919944725e-06,
"loss": 2.664078712463379,
"step": 600
},
{
"epoch": 0.3825588796944621,
"grad_norm": 26.16925716656446,
"learning_rate": 9.977540620810984e-06,
"loss": 2.1137261390686035,
"step": 601
},
{
"epoch": 0.3831954169318905,
"grad_norm": 14.849349743588643,
"learning_rate": 9.977188590323228e-06,
"loss": 3.0612120628356934,
"step": 602
},
{
"epoch": 0.3838319541693189,
"grad_norm": 8.819966272718052,
"learning_rate": 9.976833828674627e-06,
"loss": 3.0031580924987793,
"step": 603
},
{
"epoch": 0.38446849140674727,
"grad_norm": 13.720082436634515,
"learning_rate": 9.976476336059852e-06,
"loss": 2.5077881813049316,
"step": 604
},
{
"epoch": 0.3851050286441757,
"grad_norm": 12.654287287694025,
"learning_rate": 9.976116112675072e-06,
"loss": 2.7970845699310303,
"step": 605
},
{
"epoch": 0.3857415658816041,
"grad_norm": 8.055594500030699,
"learning_rate": 9.975753158717954e-06,
"loss": 2.961820602416992,
"step": 606
},
{
"epoch": 0.3863781031190325,
"grad_norm": 15.703236546101197,
"learning_rate": 9.975387474387664e-06,
"loss": 2.8235034942626953,
"step": 607
},
{
"epoch": 0.3870146403564608,
"grad_norm": 10.555876441922358,
"learning_rate": 9.975019059884867e-06,
"loss": 2.211468458175659,
"step": 608
},
{
"epoch": 0.38765117759388923,
"grad_norm": 16.658200678841155,
"learning_rate": 9.974647915411725e-06,
"loss": 3.1141555309295654,
"step": 609
},
{
"epoch": 0.38828771483131763,
"grad_norm": 9.53456679210011,
"learning_rate": 9.974274041171896e-06,
"loss": 2.7841145992279053,
"step": 610
},
{
"epoch": 0.38892425206874603,
"grad_norm": 11.411951635683534,
"learning_rate": 9.97389743737054e-06,
"loss": 2.8071258068084717,
"step": 611
},
{
"epoch": 0.38956078930617444,
"grad_norm": 22.36863147283409,
"learning_rate": 9.973518104214315e-06,
"loss": 3.1157443523406982,
"step": 612
},
{
"epoch": 0.3901973265436028,
"grad_norm": 24.276390785654325,
"learning_rate": 9.97313604191137e-06,
"loss": 2.271000385284424,
"step": 613
},
{
"epoch": 0.3908338637810312,
"grad_norm": 7.016670503494138,
"learning_rate": 9.972751250671359e-06,
"loss": 2.9037952423095703,
"step": 614
},
{
"epoch": 0.3914704010184596,
"grad_norm": 7.376550389326394,
"learning_rate": 9.97236373070543e-06,
"loss": 2.7015273571014404,
"step": 615
},
{
"epoch": 0.392106938255888,
"grad_norm": 15.295151798888124,
"learning_rate": 9.97197348222623e-06,
"loss": 3.1652095317840576,
"step": 616
},
{
"epoch": 0.39274347549331634,
"grad_norm": 13.765527468480125,
"learning_rate": 9.9715805054479e-06,
"loss": 2.8960907459259033,
"step": 617
},
{
"epoch": 0.39338001273074474,
"grad_norm": 8.971602174986844,
"learning_rate": 9.971184800586082e-06,
"loss": 3.0302138328552246,
"step": 618
},
{
"epoch": 0.39401654996817315,
"grad_norm": 16.81102951947778,
"learning_rate": 9.970786367857914e-06,
"loss": 2.9071855545043945,
"step": 619
},
{
"epoch": 0.39465308720560155,
"grad_norm": 7.906382550378165,
"learning_rate": 9.97038520748203e-06,
"loss": 2.3067169189453125,
"step": 620
},
{
"epoch": 0.3952896244430299,
"grad_norm": 13.34748819688486,
"learning_rate": 9.969981319678558e-06,
"loss": 3.124617576599121,
"step": 621
},
{
"epoch": 0.3959261616804583,
"grad_norm": 11.989181453339398,
"learning_rate": 9.969574704669132e-06,
"loss": 2.90457820892334,
"step": 622
},
{
"epoch": 0.3965626989178867,
"grad_norm": 10.967880077200139,
"learning_rate": 9.969165362676869e-06,
"loss": 2.6836605072021484,
"step": 623
},
{
"epoch": 0.3971992361553151,
"grad_norm": 10.674495764110725,
"learning_rate": 9.968753293926394e-06,
"loss": 2.4494035243988037,
"step": 624
},
{
"epoch": 0.39783577339274345,
"grad_norm": 11.439167548793653,
"learning_rate": 9.968338498643822e-06,
"loss": 2.9132814407348633,
"step": 625
},
{
"epoch": 0.39847231063017186,
"grad_norm": 12.866268633011725,
"learning_rate": 9.967920977056767e-06,
"loss": 3.1518666744232178,
"step": 626
},
{
"epoch": 0.39910884786760026,
"grad_norm": 9.872952578923261,
"learning_rate": 9.967500729394337e-06,
"loss": 3.1968982219696045,
"step": 627
},
{
"epoch": 0.39974538510502866,
"grad_norm": 20.085831417612255,
"learning_rate": 9.967077755887137e-06,
"loss": 2.6791770458221436,
"step": 628
},
{
"epoch": 0.400381922342457,
"grad_norm": 14.205638034512429,
"learning_rate": 9.96665205676727e-06,
"loss": 2.7240817546844482,
"step": 629
},
{
"epoch": 0.4010184595798854,
"grad_norm": 7.919533080851466,
"learning_rate": 9.966223632268329e-06,
"loss": 2.643958806991577,
"step": 630
},
{
"epoch": 0.4016549968173138,
"grad_norm": 18.29736046843817,
"learning_rate": 9.965792482625408e-06,
"loss": 3.0323963165283203,
"step": 631
},
{
"epoch": 0.4022915340547422,
"grad_norm": 8.696314110576866,
"learning_rate": 9.965358608075093e-06,
"loss": 2.7381138801574707,
"step": 632
},
{
"epoch": 0.40292807129217056,
"grad_norm": 18.829773132087,
"learning_rate": 9.964922008855467e-06,
"loss": 3.537816286087036,
"step": 633
},
{
"epoch": 0.40356460852959897,
"grad_norm": 21.751986969184298,
"learning_rate": 9.964482685206105e-06,
"loss": 2.8908934593200684,
"step": 634
},
{
"epoch": 0.40420114576702737,
"grad_norm": 6.860374488872398,
"learning_rate": 9.964040637368084e-06,
"loss": 2.899407386779785,
"step": 635
},
{
"epoch": 0.4048376830044558,
"grad_norm": 9.023760950668839,
"learning_rate": 9.963595865583969e-06,
"loss": 2.890510320663452,
"step": 636
},
{
"epoch": 0.4054742202418842,
"grad_norm": 6.053230462493601,
"learning_rate": 9.963148370097822e-06,
"loss": 2.758610248565674,
"step": 637
},
{
"epoch": 0.4061107574793125,
"grad_norm": 7.3110311511029655,
"learning_rate": 9.962698151155201e-06,
"loss": 2.8290634155273438,
"step": 638
},
{
"epoch": 0.4067472947167409,
"grad_norm": 6.097610481770767,
"learning_rate": 9.962245209003156e-06,
"loss": 2.7542171478271484,
"step": 639
},
{
"epoch": 0.40738383195416933,
"grad_norm": 15.90993244320391,
"learning_rate": 9.961789543890232e-06,
"loss": 2.9697728157043457,
"step": 640
},
{
"epoch": 0.40802036919159773,
"grad_norm": 10.735451573885806,
"learning_rate": 9.96133115606647e-06,
"loss": 2.8094568252563477,
"step": 641
},
{
"epoch": 0.4086569064290261,
"grad_norm": 8.813848610851771,
"learning_rate": 9.960870045783404e-06,
"loss": 2.468761920928955,
"step": 642
},
{
"epoch": 0.4092934436664545,
"grad_norm": 14.91989636635262,
"learning_rate": 9.96040621329406e-06,
"loss": 3.4151830673217773,
"step": 643
},
{
"epoch": 0.4099299809038829,
"grad_norm": 10.110691044294901,
"learning_rate": 9.959939658852961e-06,
"loss": 2.3592095375061035,
"step": 644
},
{
"epoch": 0.4105665181413113,
"grad_norm": 14.067433272395801,
"learning_rate": 9.959470382716121e-06,
"loss": 3.0757241249084473,
"step": 645
},
{
"epoch": 0.41120305537873963,
"grad_norm": 17.355318698645497,
"learning_rate": 9.958998385141048e-06,
"loss": 4.224935054779053,
"step": 646
},
{
"epoch": 0.41183959261616804,
"grad_norm": 14.524481722394714,
"learning_rate": 9.958523666386746e-06,
"loss": 2.840717315673828,
"step": 647
},
{
"epoch": 0.41247612985359644,
"grad_norm": 9.864751867360656,
"learning_rate": 9.958046226713709e-06,
"loss": 1.4242722988128662,
"step": 648
},
{
"epoch": 0.41311266709102484,
"grad_norm": 10.183096885150817,
"learning_rate": 9.957566066383925e-06,
"loss": 2.89633846282959,
"step": 649
},
{
"epoch": 0.4137492043284532,
"grad_norm": 13.838812863625304,
"learning_rate": 9.957083185660876e-06,
"loss": 2.4605355262756348,
"step": 650
},
{
"epoch": 0.4143857415658816,
"grad_norm": 20.649038029654793,
"learning_rate": 9.956597584809533e-06,
"loss": 2.7936930656433105,
"step": 651
},
{
"epoch": 0.41502227880331,
"grad_norm": 10.011543784442619,
"learning_rate": 9.956109264096368e-06,
"loss": 3.692840337753296,
"step": 652
},
{
"epoch": 0.4156588160407384,
"grad_norm": 9.515737004335836,
"learning_rate": 9.955618223789334e-06,
"loss": 3.034600257873535,
"step": 653
},
{
"epoch": 0.41629535327816675,
"grad_norm": 16.094413428203612,
"learning_rate": 9.955124464157883e-06,
"loss": 3.0032362937927246,
"step": 654
},
{
"epoch": 0.41693189051559515,
"grad_norm": 15.849849486662814,
"learning_rate": 9.954627985472964e-06,
"loss": 2.671009063720703,
"step": 655
},
{
"epoch": 0.41756842775302355,
"grad_norm": 13.141905364366565,
"learning_rate": 9.954128788007007e-06,
"loss": 2.6174559593200684,
"step": 656
},
{
"epoch": 0.41820496499045196,
"grad_norm": 12.43941337981924,
"learning_rate": 9.953626872033943e-06,
"loss": 2.541435718536377,
"step": 657
},
{
"epoch": 0.41884150222788036,
"grad_norm": 10.743486385474622,
"learning_rate": 9.953122237829189e-06,
"loss": 2.7599356174468994,
"step": 658
},
{
"epoch": 0.4194780394653087,
"grad_norm": 10.12403135229293,
"learning_rate": 9.952614885669656e-06,
"loss": 2.745006561279297,
"step": 659
},
{
"epoch": 0.4201145767027371,
"grad_norm": 12.487193950798103,
"learning_rate": 9.952104815833747e-06,
"loss": 2.7711846828460693,
"step": 660
},
{
"epoch": 0.4207511139401655,
"grad_norm": 8.37166640608455,
"learning_rate": 9.951592028601356e-06,
"loss": 2.6092634201049805,
"step": 661
},
{
"epoch": 0.4213876511775939,
"grad_norm": 8.707068362495248,
"learning_rate": 9.951076524253866e-06,
"loss": 2.708832263946533,
"step": 662
},
{
"epoch": 0.42202418841502226,
"grad_norm": 11.391734001053349,
"learning_rate": 9.950558303074151e-06,
"loss": 3.039702892303467,
"step": 663
},
{
"epoch": 0.42266072565245066,
"grad_norm": 13.764788229417277,
"learning_rate": 9.950037365346583e-06,
"loss": 3.0277504920959473,
"step": 664
},
{
"epoch": 0.42329726288987907,
"grad_norm": 9.324681051738402,
"learning_rate": 9.949513711357015e-06,
"loss": 2.3469479084014893,
"step": 665
},
{
"epoch": 0.42393380012730747,
"grad_norm": 23.944453285208347,
"learning_rate": 9.948987341392794e-06,
"loss": 3.179440975189209,
"step": 666
},
{
"epoch": 0.4245703373647358,
"grad_norm": 10.989252593514273,
"learning_rate": 9.948458255742764e-06,
"loss": 2.5995612144470215,
"step": 667
},
{
"epoch": 0.4252068746021642,
"grad_norm": 14.886237899360502,
"learning_rate": 9.947926454697245e-06,
"loss": 2.4212164878845215,
"step": 668
},
{
"epoch": 0.4258434118395926,
"grad_norm": 10.460721255755393,
"learning_rate": 9.947391938548058e-06,
"loss": 2.848560094833374,
"step": 669
},
{
"epoch": 0.426479949077021,
"grad_norm": 15.285012142076349,
"learning_rate": 9.946854707588514e-06,
"loss": 3.141273021697998,
"step": 670
},
{
"epoch": 0.4271164863144494,
"grad_norm": 9.873965430465756,
"learning_rate": 9.946314762113408e-06,
"loss": 1.6153454780578613,
"step": 671
},
{
"epoch": 0.4277530235518778,
"grad_norm": 8.24353593993326,
"learning_rate": 9.945772102419028e-06,
"loss": 3.2559409141540527,
"step": 672
},
{
"epoch": 0.4283895607893062,
"grad_norm": 17.188774446865704,
"learning_rate": 9.945226728803152e-06,
"loss": 3.3109982013702393,
"step": 673
},
{
"epoch": 0.4290260980267346,
"grad_norm": 26.23000486353143,
"learning_rate": 9.944678641565043e-06,
"loss": 2.5540897846221924,
"step": 674
},
{
"epoch": 0.42966263526416293,
"grad_norm": 10.949099660874174,
"learning_rate": 9.944127841005458e-06,
"loss": 2.8961610794067383,
"step": 675
},
{
"epoch": 0.43029917250159133,
"grad_norm": 22.67851337735571,
"learning_rate": 9.943574327426642e-06,
"loss": 2.8129518032073975,
"step": 676
},
{
"epoch": 0.43093570973901973,
"grad_norm": 18.30572432799705,
"learning_rate": 9.943018101132324e-06,
"loss": 3.113130569458008,
"step": 677
},
{
"epoch": 0.43157224697644814,
"grad_norm": 12.785299949548826,
"learning_rate": 9.94245916242773e-06,
"loss": 2.912236213684082,
"step": 678
},
{
"epoch": 0.4322087842138765,
"grad_norm": 7.682688420962503,
"learning_rate": 9.941897511619566e-06,
"loss": 2.8037664890289307,
"step": 679
},
{
"epoch": 0.4328453214513049,
"grad_norm": 16.30450802410894,
"learning_rate": 9.94133314901603e-06,
"loss": 2.6482651233673096,
"step": 680
},
{
"epoch": 0.4334818586887333,
"grad_norm": 7.005472690303165,
"learning_rate": 9.940766074926812e-06,
"loss": 2.8123576641082764,
"step": 681
},
{
"epoch": 0.4341183959261617,
"grad_norm": 15.53144189825324,
"learning_rate": 9.940196289663078e-06,
"loss": 4.070590972900391,
"step": 682
},
{
"epoch": 0.4347549331635901,
"grad_norm": 10.903253979305704,
"learning_rate": 9.939623793537496e-06,
"loss": 2.9066810607910156,
"step": 683
},
{
"epoch": 0.43539147040101844,
"grad_norm": 12.756922386256557,
"learning_rate": 9.939048586864213e-06,
"loss": 2.80411696434021,
"step": 684
},
{
"epoch": 0.43602800763844685,
"grad_norm": 16.621654802617265,
"learning_rate": 9.938470669958866e-06,
"loss": 3.1619603633880615,
"step": 685
},
{
"epoch": 0.43666454487587525,
"grad_norm": 11.49900426573002,
"learning_rate": 9.937890043138578e-06,
"loss": 3.234433174133301,
"step": 686
},
{
"epoch": 0.43730108211330365,
"grad_norm": 7.860862381899233,
"learning_rate": 9.93730670672196e-06,
"loss": 3.456599712371826,
"step": 687
},
{
"epoch": 0.437937619350732,
"grad_norm": 17.170411030564026,
"learning_rate": 9.936720661029109e-06,
"loss": 3.1284303665161133,
"step": 688
},
{
"epoch": 0.4385741565881604,
"grad_norm": 16.663238705856116,
"learning_rate": 9.93613190638161e-06,
"loss": 2.4781811237335205,
"step": 689
},
{
"epoch": 0.4392106938255888,
"grad_norm": 12.371678503540068,
"learning_rate": 9.935540443102531e-06,
"loss": 2.9643073081970215,
"step": 690
},
{
"epoch": 0.4398472310630172,
"grad_norm": 8.250209786550368,
"learning_rate": 9.934946271516433e-06,
"loss": 2.8749005794525146,
"step": 691
},
{
"epoch": 0.44048376830044556,
"grad_norm": 12.74480203323608,
"learning_rate": 9.934349391949356e-06,
"loss": 2.5457563400268555,
"step": 692
},
{
"epoch": 0.44112030553787396,
"grad_norm": 14.380859550838787,
"learning_rate": 9.933749804728832e-06,
"loss": 2.69622802734375,
"step": 693
},
{
"epoch": 0.44175684277530236,
"grad_norm": 12.535761815732146,
"learning_rate": 9.933147510183872e-06,
"loss": 2.3187177181243896,
"step": 694
},
{
"epoch": 0.44239338001273076,
"grad_norm": 16.108853721123644,
"learning_rate": 9.93254250864498e-06,
"loss": 2.924973726272583,
"step": 695
},
{
"epoch": 0.4430299172501591,
"grad_norm": 9.691182175468388,
"learning_rate": 9.931934800444141e-06,
"loss": 3.213982105255127,
"step": 696
},
{
"epoch": 0.4436664544875875,
"grad_norm": 6.131677330440111,
"learning_rate": 9.931324385914824e-06,
"loss": 2.690908670425415,
"step": 697
},
{
"epoch": 0.4443029917250159,
"grad_norm": 13.510963421325568,
"learning_rate": 9.930711265391988e-06,
"loss": 2.8179783821105957,
"step": 698
},
{
"epoch": 0.4449395289624443,
"grad_norm": 14.225533631814386,
"learning_rate": 9.930095439212073e-06,
"loss": 2.7640280723571777,
"step": 699
},
{
"epoch": 0.44557606619987267,
"grad_norm": 12.185149829665308,
"learning_rate": 9.929476907713005e-06,
"loss": 2.298121452331543,
"step": 700
},
{
"epoch": 0.44621260343730107,
"grad_norm": 16.24845527493673,
"learning_rate": 9.928855671234194e-06,
"loss": 3.070202350616455,
"step": 701
},
{
"epoch": 0.4468491406747295,
"grad_norm": 9.20682146322305,
"learning_rate": 9.928231730116535e-06,
"loss": 2.338399648666382,
"step": 702
},
{
"epoch": 0.4474856779121579,
"grad_norm": 8.226678303992347,
"learning_rate": 9.927605084702407e-06,
"loss": 2.8359477519989014,
"step": 703
},
{
"epoch": 0.4481222151495863,
"grad_norm": 32.5617697214951,
"learning_rate": 9.926975735335671e-06,
"loss": 2.9955806732177734,
"step": 704
},
{
"epoch": 0.4487587523870146,
"grad_norm": 13.246120214661767,
"learning_rate": 9.926343682361675e-06,
"loss": 2.639787197113037,
"step": 705
},
{
"epoch": 0.44939528962444303,
"grad_norm": 26.332193602580052,
"learning_rate": 9.925708926127248e-06,
"loss": 2.940730094909668,
"step": 706
},
{
"epoch": 0.45003182686187143,
"grad_norm": 19.297488474218106,
"learning_rate": 9.925071466980705e-06,
"loss": 3.354499340057373,
"step": 707
},
{
"epoch": 0.45066836409929983,
"grad_norm": 11.74488535410552,
"learning_rate": 9.92443130527184e-06,
"loss": 2.925593614578247,
"step": 708
},
{
"epoch": 0.4513049013367282,
"grad_norm": 12.733023497209542,
"learning_rate": 9.923788441351935e-06,
"loss": 2.8628973960876465,
"step": 709
},
{
"epoch": 0.4519414385741566,
"grad_norm": 9.947678922714372,
"learning_rate": 9.923142875573753e-06,
"loss": 2.3758437633514404,
"step": 710
},
{
"epoch": 0.452577975811585,
"grad_norm": 18.26488793608698,
"learning_rate": 9.922494608291535e-06,
"loss": 3.158717632293701,
"step": 711
},
{
"epoch": 0.4532145130490134,
"grad_norm": 16.815761519714297,
"learning_rate": 9.921843639861012e-06,
"loss": 2.8898587226867676,
"step": 712
},
{
"epoch": 0.45385105028644174,
"grad_norm": 14.557286967833983,
"learning_rate": 9.921189970639394e-06,
"loss": 2.966440200805664,
"step": 713
},
{
"epoch": 0.45448758752387014,
"grad_norm": 12.302618152679335,
"learning_rate": 9.920533600985368e-06,
"loss": 2.9261152744293213,
"step": 714
},
{
"epoch": 0.45512412476129854,
"grad_norm": 13.768228767235051,
"learning_rate": 9.919874531259112e-06,
"loss": 2.9286997318267822,
"step": 715
},
{
"epoch": 0.45576066199872695,
"grad_norm": 10.564120236385856,
"learning_rate": 9.919212761822279e-06,
"loss": 2.805097818374634,
"step": 716
},
{
"epoch": 0.4563971992361553,
"grad_norm": 7.490544863271503,
"learning_rate": 9.918548293038007e-06,
"loss": 1.9821001291275024,
"step": 717
},
{
"epoch": 0.4570337364735837,
"grad_norm": 14.085964886136328,
"learning_rate": 9.91788112527091e-06,
"loss": 3.0433640480041504,
"step": 718
},
{
"epoch": 0.4576702737110121,
"grad_norm": 14.829040046446302,
"learning_rate": 9.917211258887091e-06,
"loss": 2.8717331886291504,
"step": 719
},
{
"epoch": 0.4583068109484405,
"grad_norm": 10.565377930992518,
"learning_rate": 9.916538694254127e-06,
"loss": 2.2944936752319336,
"step": 720
},
{
"epoch": 0.45894334818586885,
"grad_norm": 9.174823180165214,
"learning_rate": 9.915863431741082e-06,
"loss": 2.902968406677246,
"step": 721
},
{
"epoch": 0.45957988542329725,
"grad_norm": 9.891210211114071,
"learning_rate": 9.915185471718491e-06,
"loss": 2.9564785957336426,
"step": 722
},
{
"epoch": 0.46021642266072565,
"grad_norm": 7.411473503493354,
"learning_rate": 9.914504814558379e-06,
"loss": 2.7194151878356934,
"step": 723
},
{
"epoch": 0.46085295989815406,
"grad_norm": 13.449222461916701,
"learning_rate": 9.913821460634246e-06,
"loss": 2.8414273262023926,
"step": 724
},
{
"epoch": 0.4614894971355824,
"grad_norm": 7.288076499119426,
"learning_rate": 9.913135410321072e-06,
"loss": 2.5523955821990967,
"step": 725
},
{
"epoch": 0.4621260343730108,
"grad_norm": 7.746636346731804,
"learning_rate": 9.912446663995315e-06,
"loss": 2.893989086151123,
"step": 726
},
{
"epoch": 0.4627625716104392,
"grad_norm": 11.28743653597152,
"learning_rate": 9.911755222034919e-06,
"loss": 3.0573744773864746,
"step": 727
},
{
"epoch": 0.4633991088478676,
"grad_norm": 7.5053281354024595,
"learning_rate": 9.9110610848193e-06,
"loss": 2.850208282470703,
"step": 728
},
{
"epoch": 0.464035646085296,
"grad_norm": 9.480537984079172,
"learning_rate": 9.910364252729357e-06,
"loss": 2.0863776206970215,
"step": 729
},
{
"epoch": 0.46467218332272436,
"grad_norm": 12.382920487266663,
"learning_rate": 9.909664726147467e-06,
"loss": 2.8669216632843018,
"step": 730
},
{
"epoch": 0.46530872056015277,
"grad_norm": 7.000986070921776,
"learning_rate": 9.908962505457484e-06,
"loss": 2.857827663421631,
"step": 731
},
{
"epoch": 0.46594525779758117,
"grad_norm": 9.848954673114715,
"learning_rate": 9.908257591044742e-06,
"loss": 2.8766777515411377,
"step": 732
},
{
"epoch": 0.46658179503500957,
"grad_norm": 15.293732536165164,
"learning_rate": 9.907549983296054e-06,
"loss": 3.4860715866088867,
"step": 733
},
{
"epoch": 0.4672183322724379,
"grad_norm": 10.331704044040437,
"learning_rate": 9.906839682599708e-06,
"loss": 2.6472580432891846,
"step": 734
},
{
"epoch": 0.4678548695098663,
"grad_norm": 16.276006167657393,
"learning_rate": 9.906126689345471e-06,
"loss": 2.6463623046875,
"step": 735
},
{
"epoch": 0.4684914067472947,
"grad_norm": 12.940481515444338,
"learning_rate": 9.90541100392459e-06,
"loss": 3.2068190574645996,
"step": 736
},
{
"epoch": 0.46912794398472313,
"grad_norm": 9.008600298107018,
"learning_rate": 9.904692626729784e-06,
"loss": 2.37585186958313,
"step": 737
},
{
"epoch": 0.4697644812221515,
"grad_norm": 16.266152128877287,
"learning_rate": 9.903971558155253e-06,
"loss": 3.045964002609253,
"step": 738
},
{
"epoch": 0.4704010184595799,
"grad_norm": 9.267594041537762,
"learning_rate": 9.903247798596677e-06,
"loss": 2.497579574584961,
"step": 739
},
{
"epoch": 0.4710375556970083,
"grad_norm": 13.422704581590162,
"learning_rate": 9.902521348451202e-06,
"loss": 3.0034847259521484,
"step": 740
},
{
"epoch": 0.4716740929344367,
"grad_norm": 13.641050524632726,
"learning_rate": 9.901792208117463e-06,
"loss": 2.625041961669922,
"step": 741
},
{
"epoch": 0.47231063017186503,
"grad_norm": 9.295405384635396,
"learning_rate": 9.901060377995562e-06,
"loss": 2.9143404960632324,
"step": 742
},
{
"epoch": 0.47294716740929343,
"grad_norm": 21.799358663541497,
"learning_rate": 9.900325858487082e-06,
"loss": 2.649592876434326,
"step": 743
},
{
"epoch": 0.47358370464672184,
"grad_norm": 16.81058879968057,
"learning_rate": 9.899588649995077e-06,
"loss": 2.91402006149292,
"step": 744
},
{
"epoch": 0.47422024188415024,
"grad_norm": 9.29700232907205,
"learning_rate": 9.898848752924084e-06,
"loss": 3.0308291912078857,
"step": 745
},
{
"epoch": 0.4748567791215786,
"grad_norm": 12.310748128320457,
"learning_rate": 9.898106167680108e-06,
"loss": 2.667323112487793,
"step": 746
},
{
"epoch": 0.475493316359007,
"grad_norm": 15.21271639587925,
"learning_rate": 9.897360894670634e-06,
"loss": 2.8982882499694824,
"step": 747
},
{
"epoch": 0.4761298535964354,
"grad_norm": 14.951213153498294,
"learning_rate": 9.896612934304618e-06,
"loss": 3.133816719055176,
"step": 748
},
{
"epoch": 0.4767663908338638,
"grad_norm": 14.808614861733782,
"learning_rate": 9.895862286992493e-06,
"loss": 2.8779456615448,
"step": 749
},
{
"epoch": 0.47740292807129214,
"grad_norm": 13.234676558337618,
"learning_rate": 9.895108953146165e-06,
"loss": 2.416349411010742,
"step": 750
},
{
"epoch": 0.47803946530872055,
"grad_norm": 15.70764287472319,
"learning_rate": 9.894352933179017e-06,
"loss": 2.63266658782959,
"step": 751
},
{
"epoch": 0.47867600254614895,
"grad_norm": 16.09723215871181,
"learning_rate": 9.893594227505902e-06,
"loss": 2.9402222633361816,
"step": 752
},
{
"epoch": 0.47931253978357735,
"grad_norm": 10.85603282705008,
"learning_rate": 9.892832836543151e-06,
"loss": 2.929889678955078,
"step": 753
},
{
"epoch": 0.47994907702100575,
"grad_norm": 7.658332157080424,
"learning_rate": 9.892068760708566e-06,
"loss": 2.414501190185547,
"step": 754
},
{
"epoch": 0.4805856142584341,
"grad_norm": 7.061927223311403,
"learning_rate": 9.891302000421423e-06,
"loss": 2.4067840576171875,
"step": 755
},
{
"epoch": 0.4812221514958625,
"grad_norm": 15.166517750731245,
"learning_rate": 9.890532556102468e-06,
"loss": 2.6367745399475098,
"step": 756
},
{
"epoch": 0.4818586887332909,
"grad_norm": 21.45726864766029,
"learning_rate": 9.889760428173927e-06,
"loss": 2.8288209438323975,
"step": 757
},
{
"epoch": 0.4824952259707193,
"grad_norm": 13.462199082179964,
"learning_rate": 9.88898561705949e-06,
"loss": 2.6893558502197266,
"step": 758
},
{
"epoch": 0.48313176320814766,
"grad_norm": 7.9536278817913155,
"learning_rate": 9.888208123184325e-06,
"loss": 2.590125322341919,
"step": 759
},
{
"epoch": 0.48376830044557606,
"grad_norm": 12.01274821779052,
"learning_rate": 9.88742794697507e-06,
"loss": 2.795884370803833,
"step": 760
},
{
"epoch": 0.48440483768300446,
"grad_norm": 10.370167179771329,
"learning_rate": 9.886645088859837e-06,
"loss": 2.782790422439575,
"step": 761
},
{
"epoch": 0.48504137492043287,
"grad_norm": 10.86893918590165,
"learning_rate": 9.885859549268206e-06,
"loss": 2.7205393314361572,
"step": 762
},
{
"epoch": 0.4856779121578612,
"grad_norm": 17.66342960824175,
"learning_rate": 9.885071328631233e-06,
"loss": 2.9844560623168945,
"step": 763
},
{
"epoch": 0.4863144493952896,
"grad_norm": 20.865322483392422,
"learning_rate": 9.884280427381442e-06,
"loss": 2.2549309730529785,
"step": 764
},
{
"epoch": 0.486950986632718,
"grad_norm": 10.188536008150164,
"learning_rate": 9.883486845952829e-06,
"loss": 2.3112339973449707,
"step": 765
},
{
"epoch": 0.4875875238701464,
"grad_norm": 10.749696325917178,
"learning_rate": 9.88269058478086e-06,
"loss": 3.1069111824035645,
"step": 766
},
{
"epoch": 0.48822406110757477,
"grad_norm": 31.00622065512355,
"learning_rate": 9.88189164430247e-06,
"loss": 3.462052583694458,
"step": 767
},
{
"epoch": 0.48886059834500317,
"grad_norm": 30.79585810187201,
"learning_rate": 9.881090024956068e-06,
"loss": 2.8704326152801514,
"step": 768
},
{
"epoch": 0.4894971355824316,
"grad_norm": 9.06903211486752,
"learning_rate": 9.88028572718153e-06,
"loss": 2.5768370628356934,
"step": 769
},
{
"epoch": 0.49013367281986,
"grad_norm": 19.877000304609403,
"learning_rate": 9.879478751420208e-06,
"loss": 2.7790379524230957,
"step": 770
},
{
"epoch": 0.4907702100572883,
"grad_norm": 9.541041502021342,
"learning_rate": 9.87866909811491e-06,
"loss": 3.077629804611206,
"step": 771
},
{
"epoch": 0.49140674729471673,
"grad_norm": 16.724559952167805,
"learning_rate": 9.877856767709928e-06,
"loss": 2.3151135444641113,
"step": 772
},
{
"epoch": 0.49204328453214513,
"grad_norm": 14.827081502963878,
"learning_rate": 9.877041760651014e-06,
"loss": 1.7901434898376465,
"step": 773
},
{
"epoch": 0.49267982176957353,
"grad_norm": 19.18607133804805,
"learning_rate": 9.876224077385392e-06,
"loss": 2.463925361633301,
"step": 774
},
{
"epoch": 0.49331635900700194,
"grad_norm": 15.711551704422293,
"learning_rate": 9.875403718361753e-06,
"loss": 3.406130313873291,
"step": 775
},
{
"epoch": 0.4939528962444303,
"grad_norm": 8.00068256562752,
"learning_rate": 9.87458068403026e-06,
"loss": 2.3577256202697754,
"step": 776
},
{
"epoch": 0.4945894334818587,
"grad_norm": 6.881720407233433,
"learning_rate": 9.873754974842537e-06,
"loss": 2.3718245029449463,
"step": 777
},
{
"epoch": 0.4952259707192871,
"grad_norm": 13.003329103082704,
"learning_rate": 9.872926591251684e-06,
"loss": 3.0024499893188477,
"step": 778
},
{
"epoch": 0.4958625079567155,
"grad_norm": 11.43543882830748,
"learning_rate": 9.872095533712263e-06,
"loss": 2.6259024143218994,
"step": 779
},
{
"epoch": 0.49649904519414384,
"grad_norm": 12.53610972874729,
"learning_rate": 9.871261802680305e-06,
"loss": 2.6501283645629883,
"step": 780
},
{
"epoch": 0.49713558243157224,
"grad_norm": 12.420994645703136,
"learning_rate": 9.870425398613308e-06,
"loss": 2.614926815032959,
"step": 781
},
{
"epoch": 0.49777211966900065,
"grad_norm": 8.036488508872816,
"learning_rate": 9.869586321970237e-06,
"loss": 2.8055872917175293,
"step": 782
},
{
"epoch": 0.49840865690642905,
"grad_norm": 8.231379769197854,
"learning_rate": 9.868744573211522e-06,
"loss": 2.818807363510132,
"step": 783
},
{
"epoch": 0.4990451941438574,
"grad_norm": 10.730920165600457,
"learning_rate": 9.867900152799061e-06,
"loss": 2.8013768196105957,
"step": 784
},
{
"epoch": 0.4996817313812858,
"grad_norm": 9.970979928602242,
"learning_rate": 9.86705306119622e-06,
"loss": 2.3457491397857666,
"step": 785
},
{
"epoch": 0.5003182686187142,
"grad_norm": 13.470490571372912,
"learning_rate": 9.866203298867825e-06,
"loss": 3.1120364665985107,
"step": 786
},
{
"epoch": 0.5009548058561426,
"grad_norm": 11.28766870904382,
"learning_rate": 9.865350866280174e-06,
"loss": 2.7780635356903076,
"step": 787
},
{
"epoch": 0.501591343093571,
"grad_norm": 15.678352974832473,
"learning_rate": 9.864495763901024e-06,
"loss": 2.325300693511963,
"step": 788
},
{
"epoch": 0.5022278803309994,
"grad_norm": 17.138791740026022,
"learning_rate": 9.863637992199601e-06,
"loss": 3.143601179122925,
"step": 789
},
{
"epoch": 0.5028644175684277,
"grad_norm": 9.663510119121984,
"learning_rate": 9.862777551646599e-06,
"loss": 2.5356907844543457,
"step": 790
},
{
"epoch": 0.5035009548058561,
"grad_norm": 14.12414506218859,
"learning_rate": 9.861914442714165e-06,
"loss": 2.4630978107452393,
"step": 791
},
{
"epoch": 0.5041374920432845,
"grad_norm": 9.924069328992848,
"learning_rate": 9.861048665875924e-06,
"loss": 2.6629276275634766,
"step": 792
},
{
"epoch": 0.5047740292807129,
"grad_norm": 19.79033509372283,
"learning_rate": 9.860180221606956e-06,
"loss": 3.092639923095703,
"step": 793
},
{
"epoch": 0.5054105665181413,
"grad_norm": 22.25388725246896,
"learning_rate": 9.859309110383808e-06,
"loss": 3.434253215789795,
"step": 794
},
{
"epoch": 0.5060471037555697,
"grad_norm": 6.684954672164134,
"learning_rate": 9.858435332684488e-06,
"loss": 2.879507064819336,
"step": 795
},
{
"epoch": 0.5066836409929981,
"grad_norm": 9.31451468087128,
"learning_rate": 9.85755888898847e-06,
"loss": 2.7654552459716797,
"step": 796
},
{
"epoch": 0.5073201782304265,
"grad_norm": 7.756931597209781,
"learning_rate": 9.856679779776692e-06,
"loss": 2.353954792022705,
"step": 797
},
{
"epoch": 0.5079567154678549,
"grad_norm": 11.146460893348015,
"learning_rate": 9.85579800553155e-06,
"loss": 2.53048038482666,
"step": 798
},
{
"epoch": 0.5085932527052832,
"grad_norm": 16.141401070616066,
"learning_rate": 9.854913566736906e-06,
"loss": 2.05796480178833,
"step": 799
},
{
"epoch": 0.5092297899427116,
"grad_norm": 5.988900255080467,
"learning_rate": 9.854026463878083e-06,
"loss": 1.685745120048523,
"step": 800
},
{
"epoch": 0.50986632718014,
"grad_norm": 6.3572999477726,
"learning_rate": 9.853136697441866e-06,
"loss": 2.7123618125915527,
"step": 801
},
{
"epoch": 0.5105028644175684,
"grad_norm": 8.290859697152273,
"learning_rate": 9.852244267916502e-06,
"loss": 2.608476161956787,
"step": 802
},
{
"epoch": 0.5111394016549968,
"grad_norm": 11.46663507794697,
"learning_rate": 9.851349175791697e-06,
"loss": 3.4248664379119873,
"step": 803
},
{
"epoch": 0.5117759388924252,
"grad_norm": 14.950475213614503,
"learning_rate": 9.850451421558622e-06,
"loss": 2.7767040729522705,
"step": 804
},
{
"epoch": 0.5124124761298536,
"grad_norm": 6.741305480732476,
"learning_rate": 9.849551005709906e-06,
"loss": 1.7822707891464233,
"step": 805
},
{
"epoch": 0.513049013367282,
"grad_norm": 10.883114124452293,
"learning_rate": 9.848647928739639e-06,
"loss": 2.5189385414123535,
"step": 806
},
{
"epoch": 0.5136855506047103,
"grad_norm": 6.732295304938295,
"learning_rate": 9.847742191143373e-06,
"loss": 2.514547824859619,
"step": 807
},
{
"epoch": 0.5143220878421387,
"grad_norm": 5.9711797832121745,
"learning_rate": 9.846833793418115e-06,
"loss": 2.482044219970703,
"step": 808
},
{
"epoch": 0.5149586250795671,
"grad_norm": 14.400383245002256,
"learning_rate": 9.845922736062341e-06,
"loss": 2.887598991394043,
"step": 809
},
{
"epoch": 0.5155951623169955,
"grad_norm": 11.630919644932487,
"learning_rate": 9.845009019575974e-06,
"loss": 3.5304949283599854,
"step": 810
},
{
"epoch": 0.5162316995544239,
"grad_norm": 22.73996741336589,
"learning_rate": 9.84409264446041e-06,
"loss": 2.7991387844085693,
"step": 811
},
{
"epoch": 0.5168682367918523,
"grad_norm": 8.436400591965535,
"learning_rate": 9.843173611218489e-06,
"loss": 2.507692813873291,
"step": 812
},
{
"epoch": 0.5175047740292807,
"grad_norm": 7.6339677846969956,
"learning_rate": 9.842251920354523e-06,
"loss": 1.9077794551849365,
"step": 813
},
{
"epoch": 0.5181413112667091,
"grad_norm": 19.745421791939673,
"learning_rate": 9.841327572374275e-06,
"loss": 3.050415277481079,
"step": 814
},
{
"epoch": 0.5187778485041374,
"grad_norm": 19.317035595884978,
"learning_rate": 9.840400567784966e-06,
"loss": 2.524138927459717,
"step": 815
},
{
"epoch": 0.5194143857415658,
"grad_norm": 7.540364687402807,
"learning_rate": 9.839470907095279e-06,
"loss": 3.2449207305908203,
"step": 816
},
{
"epoch": 0.5200509229789942,
"grad_norm": 5.854218704116551,
"learning_rate": 9.83853859081535e-06,
"loss": 2.8554091453552246,
"step": 817
},
{
"epoch": 0.5206874602164226,
"grad_norm": 7.311068476898229,
"learning_rate": 9.837603619456777e-06,
"loss": 2.0652270317077637,
"step": 818
},
{
"epoch": 0.521323997453851,
"grad_norm": 13.146507027324729,
"learning_rate": 9.836665993532608e-06,
"loss": 2.597984790802002,
"step": 819
},
{
"epoch": 0.5219605346912795,
"grad_norm": 10.059988055991992,
"learning_rate": 9.835725713557356e-06,
"loss": 2.4166486263275146,
"step": 820
},
{
"epoch": 0.5225970719287079,
"grad_norm": 13.784036388869687,
"learning_rate": 9.834782780046983e-06,
"loss": 2.607042074203491,
"step": 821
},
{
"epoch": 0.5232336091661363,
"grad_norm": 13.905566237616293,
"learning_rate": 9.833837193518912e-06,
"loss": 2.538478136062622,
"step": 822
},
{
"epoch": 0.5238701464035647,
"grad_norm": 11.112091320017088,
"learning_rate": 9.832888954492018e-06,
"loss": 2.88309907913208,
"step": 823
},
{
"epoch": 0.524506683640993,
"grad_norm": 22.507543127539847,
"learning_rate": 9.831938063486636e-06,
"loss": 3.4071497917175293,
"step": 824
},
{
"epoch": 0.5251432208784214,
"grad_norm": 22.28789638263004,
"learning_rate": 9.830984521024551e-06,
"loss": 2.59210205078125,
"step": 825
},
{
"epoch": 0.5257797581158498,
"grad_norm": 6.5223751894038084,
"learning_rate": 9.830028327629009e-06,
"loss": 2.6576318740844727,
"step": 826
},
{
"epoch": 0.5264162953532782,
"grad_norm": 7.5901390591435645,
"learning_rate": 9.829069483824707e-06,
"loss": 3.3522415161132812,
"step": 827
},
{
"epoch": 0.5270528325907066,
"grad_norm": 14.8866670545974,
"learning_rate": 9.828107990137794e-06,
"loss": 2.4617838859558105,
"step": 828
},
{
"epoch": 0.527689369828135,
"grad_norm": 8.130441007275053,
"learning_rate": 9.827143847095879e-06,
"loss": 2.8005857467651367,
"step": 829
},
{
"epoch": 0.5283259070655634,
"grad_norm": 11.192259552237669,
"learning_rate": 9.826177055228018e-06,
"loss": 3.020345449447632,
"step": 830
},
{
"epoch": 0.5289624443029918,
"grad_norm": 12.587075512510381,
"learning_rate": 9.82520761506473e-06,
"loss": 2.8561697006225586,
"step": 831
},
{
"epoch": 0.5295989815404201,
"grad_norm": 10.730495935245877,
"learning_rate": 9.824235527137975e-06,
"loss": 2.875257968902588,
"step": 832
},
{
"epoch": 0.5302355187778485,
"grad_norm": 14.975132541702031,
"learning_rate": 9.823260791981174e-06,
"loss": 2.9994492530822754,
"step": 833
},
{
"epoch": 0.5308720560152769,
"grad_norm": 10.025600576297306,
"learning_rate": 9.822283410129204e-06,
"loss": 2.6414132118225098,
"step": 834
},
{
"epoch": 0.5315085932527053,
"grad_norm": 17.164757722920452,
"learning_rate": 9.821303382118382e-06,
"loss": 2.8303494453430176,
"step": 835
},
{
"epoch": 0.5321451304901337,
"grad_norm": 28.000081297657083,
"learning_rate": 9.82032070848649e-06,
"loss": 3.836611032485962,
"step": 836
},
{
"epoch": 0.5327816677275621,
"grad_norm": 8.688852482490768,
"learning_rate": 9.819335389772751e-06,
"loss": 2.7299723625183105,
"step": 837
},
{
"epoch": 0.5334182049649905,
"grad_norm": 14.423747949313897,
"learning_rate": 9.81834742651785e-06,
"loss": 2.899153232574463,
"step": 838
},
{
"epoch": 0.5340547422024189,
"grad_norm": 11.071276785005018,
"learning_rate": 9.817356819263912e-06,
"loss": 2.9401285648345947,
"step": 839
},
{
"epoch": 0.5346912794398472,
"grad_norm": 7.119354016240981,
"learning_rate": 9.816363568554523e-06,
"loss": 2.84574294090271,
"step": 840
},
{
"epoch": 0.5353278166772756,
"grad_norm": 13.480709693656054,
"learning_rate": 9.815367674934713e-06,
"loss": 3.2451748847961426,
"step": 841
},
{
"epoch": 0.535964353914704,
"grad_norm": 8.436170064658976,
"learning_rate": 9.814369138950965e-06,
"loss": 2.8468217849731445,
"step": 842
},
{
"epoch": 0.5366008911521324,
"grad_norm": 16.629327310426202,
"learning_rate": 9.813367961151212e-06,
"loss": 3.0509352684020996,
"step": 843
},
{
"epoch": 0.5372374283895608,
"grad_norm": 7.767734513755592,
"learning_rate": 9.812364142084833e-06,
"loss": 2.8520469665527344,
"step": 844
},
{
"epoch": 0.5378739656269892,
"grad_norm": 9.647175482826967,
"learning_rate": 9.811357682302664e-06,
"loss": 2.677677631378174,
"step": 845
},
{
"epoch": 0.5385105028644176,
"grad_norm": 6.7647162407733985,
"learning_rate": 9.81034858235698e-06,
"loss": 2.339890241622925,
"step": 846
},
{
"epoch": 0.539147040101846,
"grad_norm": 33.107560376267216,
"learning_rate": 9.809336842801514e-06,
"loss": 2.5347745418548584,
"step": 847
},
{
"epoch": 0.5397835773392744,
"grad_norm": 21.429620888652362,
"learning_rate": 9.808322464191444e-06,
"loss": 3.6045989990234375,
"step": 848
},
{
"epoch": 0.5404201145767027,
"grad_norm": 31.986307429772406,
"learning_rate": 9.807305447083392e-06,
"loss": 2.769184112548828,
"step": 849
},
{
"epoch": 0.5410566518141311,
"grad_norm": 21.509681978929585,
"learning_rate": 9.806285792035435e-06,
"loss": 3.472559928894043,
"step": 850
},
{
"epoch": 0.5416931890515595,
"grad_norm": 7.898482917664102,
"learning_rate": 9.805263499607091e-06,
"loss": 2.580059289932251,
"step": 851
},
{
"epoch": 0.5423297262889879,
"grad_norm": 8.556659491857221,
"learning_rate": 9.804238570359331e-06,
"loss": 2.169471025466919,
"step": 852
},
{
"epoch": 0.5429662635264163,
"grad_norm": 10.484489660238392,
"learning_rate": 9.803211004854569e-06,
"loss": 2.7918176651000977,
"step": 853
},
{
"epoch": 0.5436028007638447,
"grad_norm": 6.744177575899762,
"learning_rate": 9.802180803656667e-06,
"loss": 2.4259371757507324,
"step": 854
},
{
"epoch": 0.5442393380012731,
"grad_norm": 18.8934287974784,
"learning_rate": 9.801147967330935e-06,
"loss": 2.8702199459075928,
"step": 855
},
{
"epoch": 0.5448758752387015,
"grad_norm": 24.583047129270803,
"learning_rate": 9.800112496444124e-06,
"loss": 2.676443576812744,
"step": 856
},
{
"epoch": 0.5455124124761298,
"grad_norm": 16.929109118145526,
"learning_rate": 9.799074391564436e-06,
"loss": 2.5846240520477295,
"step": 857
},
{
"epoch": 0.5461489497135582,
"grad_norm": 12.033435100538304,
"learning_rate": 9.798033653261515e-06,
"loss": 2.775334358215332,
"step": 858
},
{
"epoch": 0.5467854869509866,
"grad_norm": 9.943603717670662,
"learning_rate": 9.79699028210645e-06,
"loss": 2.725459337234497,
"step": 859
},
{
"epoch": 0.547422024188415,
"grad_norm": 14.828949430654744,
"learning_rate": 9.795944278671779e-06,
"loss": 2.655045986175537,
"step": 860
},
{
"epoch": 0.5480585614258434,
"grad_norm": 16.830141958946086,
"learning_rate": 9.794895643531479e-06,
"loss": 2.982860803604126,
"step": 861
},
{
"epoch": 0.5486950986632718,
"grad_norm": 10.861165616915436,
"learning_rate": 9.793844377260975e-06,
"loss": 2.0161194801330566,
"step": 862
},
{
"epoch": 0.5493316359007002,
"grad_norm": 58.85838285619661,
"learning_rate": 9.792790480437133e-06,
"loss": 2.948622703552246,
"step": 863
},
{
"epoch": 0.5499681731381286,
"grad_norm": 9.757855439261121,
"learning_rate": 9.791733953638264e-06,
"loss": 3.0861761569976807,
"step": 864
},
{
"epoch": 0.5506047103755569,
"grad_norm": 16.381773991834848,
"learning_rate": 9.790674797444125e-06,
"loss": 2.9444048404693604,
"step": 865
},
{
"epoch": 0.5512412476129853,
"grad_norm": 11.160265952315564,
"learning_rate": 9.789613012435908e-06,
"loss": 2.9539167881011963,
"step": 866
},
{
"epoch": 0.5518777848504137,
"grad_norm": 14.1635863067067,
"learning_rate": 9.788548599196254e-06,
"loss": 2.430659294128418,
"step": 867
},
{
"epoch": 0.5525143220878421,
"grad_norm": 12.908142057583989,
"learning_rate": 9.787481558309247e-06,
"loss": 2.671661853790283,
"step": 868
},
{
"epoch": 0.5531508593252705,
"grad_norm": 8.43503045093665,
"learning_rate": 9.786411890360407e-06,
"loss": 2.872176170349121,
"step": 869
},
{
"epoch": 0.5537873965626989,
"grad_norm": 9.16301389740033,
"learning_rate": 9.7853395959367e-06,
"loss": 2.8996071815490723,
"step": 870
},
{
"epoch": 0.5544239338001273,
"grad_norm": 16.856257304408356,
"learning_rate": 9.784264675626532e-06,
"loss": 4.690608024597168,
"step": 871
},
{
"epoch": 0.5550604710375557,
"grad_norm": 20.926523578798506,
"learning_rate": 9.783187130019751e-06,
"loss": 3.114534854888916,
"step": 872
},
{
"epoch": 0.5556970082749841,
"grad_norm": 10.52875239635774,
"learning_rate": 9.782106959707644e-06,
"loss": 2.5665369033813477,
"step": 873
},
{
"epoch": 0.5563335455124124,
"grad_norm": 13.848634211652863,
"learning_rate": 9.781024165282939e-06,
"loss": 3.06032133102417,
"step": 874
},
{
"epoch": 0.5569700827498408,
"grad_norm": 12.6790668481712,
"learning_rate": 9.779938747339805e-06,
"loss": 3.3275718688964844,
"step": 875
},
{
"epoch": 0.5576066199872692,
"grad_norm": 16.91386790155097,
"learning_rate": 9.778850706473847e-06,
"loss": 2.9962024688720703,
"step": 876
},
{
"epoch": 0.5582431572246976,
"grad_norm": 14.213173107920003,
"learning_rate": 9.777760043282117e-06,
"loss": 3.2456562519073486,
"step": 877
},
{
"epoch": 0.558879694462126,
"grad_norm": 8.098999526064096,
"learning_rate": 9.776666758363093e-06,
"loss": 3.231693983078003,
"step": 878
},
{
"epoch": 0.5595162316995544,
"grad_norm": 7.154392240607928,
"learning_rate": 9.775570852316706e-06,
"loss": 2.285749673843384,
"step": 879
},
{
"epoch": 0.5601527689369828,
"grad_norm": 8.402539117834204,
"learning_rate": 9.774472325744315e-06,
"loss": 2.0394725799560547,
"step": 880
},
{
"epoch": 0.5607893061744113,
"grad_norm": 17.14200855773335,
"learning_rate": 9.773371179248724e-06,
"loss": 2.8273448944091797,
"step": 881
},
{
"epoch": 0.5614258434118395,
"grad_norm": 16.82522755670069,
"learning_rate": 9.772267413434167e-06,
"loss": 3.1758463382720947,
"step": 882
},
{
"epoch": 0.562062380649268,
"grad_norm": 10.466562159025571,
"learning_rate": 9.771161028906325e-06,
"loss": 2.5195975303649902,
"step": 883
},
{
"epoch": 0.5626989178866963,
"grad_norm": 7.869239737887258,
"learning_rate": 9.770052026272306e-06,
"loss": 2.8067564964294434,
"step": 884
},
{
"epoch": 0.5633354551241248,
"grad_norm": 19.44302133114353,
"learning_rate": 9.768940406140658e-06,
"loss": 2.5152382850646973,
"step": 885
},
{
"epoch": 0.5639719923615532,
"grad_norm": 12.238428636395993,
"learning_rate": 9.767826169121374e-06,
"loss": 2.4085590839385986,
"step": 886
},
{
"epoch": 0.5646085295989816,
"grad_norm": 9.563967483322969,
"learning_rate": 9.766709315825869e-06,
"loss": 2.646481513977051,
"step": 887
},
{
"epoch": 0.56524506683641,
"grad_norm": 18.87493687296846,
"learning_rate": 9.765589846867003e-06,
"loss": 3.2062935829162598,
"step": 888
},
{
"epoch": 0.5658816040738384,
"grad_norm": 14.302660604413548,
"learning_rate": 9.76446776285907e-06,
"loss": 3.0545549392700195,
"step": 889
},
{
"epoch": 0.5665181413112667,
"grad_norm": 8.132199904211502,
"learning_rate": 9.763343064417792e-06,
"loss": 3.130021095275879,
"step": 890
},
{
"epoch": 0.5671546785486951,
"grad_norm": 11.562566038181126,
"learning_rate": 9.762215752160335e-06,
"loss": 2.8721697330474854,
"step": 891
},
{
"epoch": 0.5677912157861235,
"grad_norm": 17.44011854244868,
"learning_rate": 9.761085826705296e-06,
"loss": 2.7695465087890625,
"step": 892
},
{
"epoch": 0.5684277530235519,
"grad_norm": 35.124551871281746,
"learning_rate": 9.759953288672704e-06,
"loss": 3.2944228649139404,
"step": 893
},
{
"epoch": 0.5690642902609803,
"grad_norm": 7.512069889218877,
"learning_rate": 9.758818138684023e-06,
"loss": 2.6480884552001953,
"step": 894
},
{
"epoch": 0.5697008274984087,
"grad_norm": 21.641091074007168,
"learning_rate": 9.757680377362152e-06,
"loss": 3.268338680267334,
"step": 895
},
{
"epoch": 0.5703373647358371,
"grad_norm": 14.399202436529807,
"learning_rate": 9.756540005331418e-06,
"loss": 2.1671676635742188,
"step": 896
},
{
"epoch": 0.5709739019732655,
"grad_norm": 11.623304830160155,
"learning_rate": 9.755397023217588e-06,
"loss": 2.6696858406066895,
"step": 897
},
{
"epoch": 0.5716104392106939,
"grad_norm": 19.233964426153847,
"learning_rate": 9.754251431647853e-06,
"loss": 3.2551486492156982,
"step": 898
},
{
"epoch": 0.5722469764481222,
"grad_norm": 7.270503407460572,
"learning_rate": 9.753103231250841e-06,
"loss": 2.863981246948242,
"step": 899
},
{
"epoch": 0.5728835136855506,
"grad_norm": 10.975237208385844,
"learning_rate": 9.751952422656613e-06,
"loss": 2.517639636993408,
"step": 900
},
{
"epoch": 0.573520050922979,
"grad_norm": 8.652240634932285,
"learning_rate": 9.750799006496657e-06,
"loss": 2.6670122146606445,
"step": 901
},
{
"epoch": 0.5741565881604074,
"grad_norm": 7.744172147062743,
"learning_rate": 9.74964298340389e-06,
"loss": 2.9616057872772217,
"step": 902
},
{
"epoch": 0.5747931253978358,
"grad_norm": 15.501557926799377,
"learning_rate": 9.748484354012671e-06,
"loss": 3.0340139865875244,
"step": 903
},
{
"epoch": 0.5754296626352642,
"grad_norm": 15.054369483127644,
"learning_rate": 9.747323118958774e-06,
"loss": 3.0973074436187744,
"step": 904
},
{
"epoch": 0.5760661998726926,
"grad_norm": 7.461008632303077,
"learning_rate": 9.746159278879412e-06,
"loss": 2.881272315979004,
"step": 905
},
{
"epoch": 0.576702737110121,
"grad_norm": 19.756922229382635,
"learning_rate": 9.744992834413227e-06,
"loss": 2.6304001808166504,
"step": 906
},
{
"epoch": 0.5773392743475493,
"grad_norm": 7.575054718020066,
"learning_rate": 9.74382378620029e-06,
"loss": 2.5317938327789307,
"step": 907
},
{
"epoch": 0.5779758115849777,
"grad_norm": 8.897515039328026,
"learning_rate": 9.742652134882095e-06,
"loss": 2.1269655227661133,
"step": 908
},
{
"epoch": 0.5786123488224061,
"grad_norm": 16.927725495237198,
"learning_rate": 9.741477881101573e-06,
"loss": 3.230391502380371,
"step": 909
},
{
"epoch": 0.5792488860598345,
"grad_norm": 10.379257456934797,
"learning_rate": 9.740301025503078e-06,
"loss": 2.900475263595581,
"step": 910
},
{
"epoch": 0.5798854232972629,
"grad_norm": 15.1456457698115,
"learning_rate": 9.73912156873239e-06,
"loss": 3.2446060180664062,
"step": 911
},
{
"epoch": 0.5805219605346913,
"grad_norm": 9.067027801427905,
"learning_rate": 9.737939511436722e-06,
"loss": 3.015232563018799,
"step": 912
},
{
"epoch": 0.5811584977721197,
"grad_norm": 23.831019820426956,
"learning_rate": 9.73675485426471e-06,
"loss": 2.4809412956237793,
"step": 913
},
{
"epoch": 0.5817950350095481,
"grad_norm": 8.97624293130639,
"learning_rate": 9.735567597866415e-06,
"loss": 2.744965076446533,
"step": 914
},
{
"epoch": 0.5824315722469765,
"grad_norm": 25.53509484031687,
"learning_rate": 9.734377742893333e-06,
"loss": 2.4732604026794434,
"step": 915
},
{
"epoch": 0.5830681094844048,
"grad_norm": 13.15529474155569,
"learning_rate": 9.733185289998373e-06,
"loss": 3.0583267211914062,
"step": 916
},
{
"epoch": 0.5837046467218332,
"grad_norm": 8.932012421437175,
"learning_rate": 9.731990239835882e-06,
"loss": 2.5329785346984863,
"step": 917
},
{
"epoch": 0.5843411839592616,
"grad_norm": 12.273886175504305,
"learning_rate": 9.730792593061624e-06,
"loss": 2.9934682846069336,
"step": 918
},
{
"epoch": 0.58497772119669,
"grad_norm": 25.24523050203931,
"learning_rate": 9.72959235033279e-06,
"loss": 2.8382387161254883,
"step": 919
},
{
"epoch": 0.5856142584341184,
"grad_norm": 6.725669150751144,
"learning_rate": 9.728389512307996e-06,
"loss": 2.401772975921631,
"step": 920
},
{
"epoch": 0.5862507956715468,
"grad_norm": 11.789649229172577,
"learning_rate": 9.727184079647284e-06,
"loss": 2.4895544052124023,
"step": 921
},
{
"epoch": 0.5868873329089752,
"grad_norm": 10.58143888341926,
"learning_rate": 9.725976053012117e-06,
"loss": 2.4084954261779785,
"step": 922
},
{
"epoch": 0.5875238701464036,
"grad_norm": 12.445412046081634,
"learning_rate": 9.724765433065382e-06,
"loss": 2.738898992538452,
"step": 923
},
{
"epoch": 0.5881604073838319,
"grad_norm": 12.001087768801614,
"learning_rate": 9.72355222047139e-06,
"loss": 2.6212992668151855,
"step": 924
},
{
"epoch": 0.5887969446212603,
"grad_norm": 10.360026449336509,
"learning_rate": 9.722336415895873e-06,
"loss": 2.7570643424987793,
"step": 925
},
{
"epoch": 0.5894334818586887,
"grad_norm": 10.389681022297916,
"learning_rate": 9.721118020005985e-06,
"loss": 2.8604931831359863,
"step": 926
},
{
"epoch": 0.5900700190961171,
"grad_norm": 7.173012423210488,
"learning_rate": 9.719897033470307e-06,
"loss": 2.645080327987671,
"step": 927
},
{
"epoch": 0.5907065563335455,
"grad_norm": 9.77458249204749,
"learning_rate": 9.718673456958834e-06,
"loss": 2.715708017349243,
"step": 928
},
{
"epoch": 0.5913430935709739,
"grad_norm": 11.542551453098346,
"learning_rate": 9.71744729114299e-06,
"loss": 2.594083309173584,
"step": 929
},
{
"epoch": 0.5919796308084023,
"grad_norm": 11.834252345490496,
"learning_rate": 9.716218536695611e-06,
"loss": 3.1679861545562744,
"step": 930
},
{
"epoch": 0.5926161680458307,
"grad_norm": 11.12942908667238,
"learning_rate": 9.714987194290963e-06,
"loss": 3.133230686187744,
"step": 931
},
{
"epoch": 0.593252705283259,
"grad_norm": 11.039033615390888,
"learning_rate": 9.713753264604726e-06,
"loss": 2.560100555419922,
"step": 932
},
{
"epoch": 0.5938892425206874,
"grad_norm": 11.582517006358362,
"learning_rate": 9.712516748314001e-06,
"loss": 2.733274459838867,
"step": 933
},
{
"epoch": 0.5945257797581158,
"grad_norm": 11.971564179710118,
"learning_rate": 9.711277646097308e-06,
"loss": 2.7829017639160156,
"step": 934
},
{
"epoch": 0.5951623169955442,
"grad_norm": 10.570919690785455,
"learning_rate": 9.710035958634587e-06,
"loss": 2.2792487144470215,
"step": 935
},
{
"epoch": 0.5957988542329726,
"grad_norm": 12.995799525018636,
"learning_rate": 9.708791686607195e-06,
"loss": 3.31978702545166,
"step": 936
},
{
"epoch": 0.596435391470401,
"grad_norm": 13.201972697733703,
"learning_rate": 9.707544830697912e-06,
"loss": 2.9213595390319824,
"step": 937
},
{
"epoch": 0.5970719287078294,
"grad_norm": 12.013554515053972,
"learning_rate": 9.706295391590928e-06,
"loss": 2.7369606494903564,
"step": 938
},
{
"epoch": 0.5977084659452578,
"grad_norm": 14.418462050130708,
"learning_rate": 9.705043369971857e-06,
"loss": 2.67887282371521,
"step": 939
},
{
"epoch": 0.5983450031826862,
"grad_norm": 10.116254580416458,
"learning_rate": 9.703788766527728e-06,
"loss": 2.2887253761291504,
"step": 940
},
{
"epoch": 0.5989815404201145,
"grad_norm": 29.1755959070319,
"learning_rate": 9.702531581946985e-06,
"loss": 3.5381555557250977,
"step": 941
},
{
"epoch": 0.5996180776575429,
"grad_norm": 7.5457809902099315,
"learning_rate": 9.701271816919493e-06,
"loss": 2.7492623329162598,
"step": 942
},
{
"epoch": 0.6002546148949713,
"grad_norm": 9.666136274144206,
"learning_rate": 9.700009472136527e-06,
"loss": 3.052298069000244,
"step": 943
},
{
"epoch": 0.6008911521323997,
"grad_norm": 11.284501524470071,
"learning_rate": 9.698744548290784e-06,
"loss": 3.2382991313934326,
"step": 944
},
{
"epoch": 0.6015276893698281,
"grad_norm": 20.447480527022496,
"learning_rate": 9.697477046076368e-06,
"loss": 2.696122646331787,
"step": 945
},
{
"epoch": 0.6021642266072565,
"grad_norm": 14.817358593665878,
"learning_rate": 9.696206966188808e-06,
"loss": 2.5964112281799316,
"step": 946
},
{
"epoch": 0.602800763844685,
"grad_norm": 7.632477261909105,
"learning_rate": 9.694934309325037e-06,
"loss": 3.009591579437256,
"step": 947
},
{
"epoch": 0.6034373010821134,
"grad_norm": 12.63016437996808,
"learning_rate": 9.69365907618341e-06,
"loss": 2.727325201034546,
"step": 948
},
{
"epoch": 0.6040738383195416,
"grad_norm": 7.407783999732234,
"learning_rate": 9.692381267463693e-06,
"loss": 2.722775459289551,
"step": 949
},
{
"epoch": 0.60471037555697,
"grad_norm": 29.043660573819057,
"learning_rate": 9.691100883867065e-06,
"loss": 3.696639060974121,
"step": 950
},
{
"epoch": 0.6053469127943985,
"grad_norm": 13.532160636970735,
"learning_rate": 9.689817926096117e-06,
"loss": 2.8891990184783936,
"step": 951
},
{
"epoch": 0.6059834500318269,
"grad_norm": 9.623235170481506,
"learning_rate": 9.688532394854855e-06,
"loss": 2.2772269248962402,
"step": 952
},
{
"epoch": 0.6066199872692553,
"grad_norm": 6.814780336997041,
"learning_rate": 9.687244290848696e-06,
"loss": 2.648423433303833,
"step": 953
},
{
"epoch": 0.6072565245066837,
"grad_norm": 10.864208391067539,
"learning_rate": 9.685953614784468e-06,
"loss": 2.5183968544006348,
"step": 954
},
{
"epoch": 0.6078930617441121,
"grad_norm": 13.659964858560148,
"learning_rate": 9.68466036737041e-06,
"loss": 2.199960947036743,
"step": 955
},
{
"epoch": 0.6085295989815405,
"grad_norm": 20.25190674068994,
"learning_rate": 9.683364549316175e-06,
"loss": 2.9352359771728516,
"step": 956
},
{
"epoch": 0.6091661362189688,
"grad_norm": 11.730022670663669,
"learning_rate": 9.68206616133282e-06,
"loss": 2.289551258087158,
"step": 957
},
{
"epoch": 0.6098026734563972,
"grad_norm": 10.590887207972477,
"learning_rate": 9.680765204132824e-06,
"loss": 2.6397624015808105,
"step": 958
},
{
"epoch": 0.6104392106938256,
"grad_norm": 14.6562312186882,
"learning_rate": 9.679461678430064e-06,
"loss": 2.5100960731506348,
"step": 959
},
{
"epoch": 0.611075747931254,
"grad_norm": 6.842906166218895,
"learning_rate": 9.678155584939832e-06,
"loss": 2.798889636993408,
"step": 960
},
{
"epoch": 0.6117122851686824,
"grad_norm": 8.101430276051339,
"learning_rate": 9.676846924378829e-06,
"loss": 2.937227487564087,
"step": 961
},
{
"epoch": 0.6123488224061108,
"grad_norm": 8.938006978202733,
"learning_rate": 9.67553569746516e-06,
"loss": 3.0781333446502686,
"step": 962
},
{
"epoch": 0.6129853596435392,
"grad_norm": 10.016307273177329,
"learning_rate": 9.674221904918346e-06,
"loss": 2.077643394470215,
"step": 963
},
{
"epoch": 0.6136218968809676,
"grad_norm": 11.847324449864272,
"learning_rate": 9.67290554745931e-06,
"loss": 2.616429567337036,
"step": 964
},
{
"epoch": 0.614258434118396,
"grad_norm": 9.245571883498446,
"learning_rate": 9.671586625810387e-06,
"loss": 2.694222927093506,
"step": 965
},
{
"epoch": 0.6148949713558243,
"grad_norm": 19.46466262879445,
"learning_rate": 9.670265140695312e-06,
"loss": 3.4693071842193604,
"step": 966
},
{
"epoch": 0.6155315085932527,
"grad_norm": 17.95113213590351,
"learning_rate": 9.668941092839233e-06,
"loss": 3.5103704929351807,
"step": 967
},
{
"epoch": 0.6161680458306811,
"grad_norm": 12.472820424341016,
"learning_rate": 9.667614482968702e-06,
"loss": 3.2685065269470215,
"step": 968
},
{
"epoch": 0.6168045830681095,
"grad_norm": 8.937493281166018,
"learning_rate": 9.666285311811678e-06,
"loss": 2.4610326290130615,
"step": 969
},
{
"epoch": 0.6174411203055379,
"grad_norm": 6.988745721519354,
"learning_rate": 9.664953580097525e-06,
"loss": 2.848910331726074,
"step": 970
},
{
"epoch": 0.6180776575429663,
"grad_norm": 21.585861018039346,
"learning_rate": 9.663619288557009e-06,
"loss": 2.805069923400879,
"step": 971
},
{
"epoch": 0.6187141947803947,
"grad_norm": 16.996625729938586,
"learning_rate": 9.662282437922305e-06,
"loss": 2.859009265899658,
"step": 972
},
{
"epoch": 0.6193507320178231,
"grad_norm": 57.42435078890917,
"learning_rate": 9.660943028926992e-06,
"loss": 3.6436121463775635,
"step": 973
},
{
"epoch": 0.6199872692552514,
"grad_norm": 14.889729750770474,
"learning_rate": 9.65960106230605e-06,
"loss": 4.130721092224121,
"step": 974
},
{
"epoch": 0.6206238064926798,
"grad_norm": 17.925553905210514,
"learning_rate": 9.658256538795864e-06,
"loss": 3.6854283809661865,
"step": 975
},
{
"epoch": 0.6212603437301082,
"grad_norm": 10.893711830450949,
"learning_rate": 9.656909459134221e-06,
"loss": 2.419802665710449,
"step": 976
},
{
"epoch": 0.6218968809675366,
"grad_norm": 5.808124910458566,
"learning_rate": 9.655559824060315e-06,
"loss": 2.861011028289795,
"step": 977
},
{
"epoch": 0.622533418204965,
"grad_norm": 9.59566663231614,
"learning_rate": 9.654207634314735e-06,
"loss": 2.3466222286224365,
"step": 978
},
{
"epoch": 0.6231699554423934,
"grad_norm": 11.957468337787667,
"learning_rate": 9.652852890639476e-06,
"loss": 2.4655849933624268,
"step": 979
},
{
"epoch": 0.6238064926798218,
"grad_norm": 13.671944162679766,
"learning_rate": 9.651495593777938e-06,
"loss": 2.838242292404175,
"step": 980
},
{
"epoch": 0.6244430299172502,
"grad_norm": 7.5068812433289835,
"learning_rate": 9.650135744474916e-06,
"loss": 2.5288822650909424,
"step": 981
},
{
"epoch": 0.6250795671546785,
"grad_norm": 10.547951834836692,
"learning_rate": 9.648773343476605e-06,
"loss": 2.060285806655884,
"step": 982
},
{
"epoch": 0.6257161043921069,
"grad_norm": 14.1063328514092,
"learning_rate": 9.647408391530606e-06,
"loss": 2.828702926635742,
"step": 983
},
{
"epoch": 0.6263526416295353,
"grad_norm": 8.410368601907516,
"learning_rate": 9.646040889385918e-06,
"loss": 2.6558516025543213,
"step": 984
},
{
"epoch": 0.6269891788669637,
"grad_norm": 17.404941932367286,
"learning_rate": 9.644670837792935e-06,
"loss": 2.8424549102783203,
"step": 985
},
{
"epoch": 0.6276257161043921,
"grad_norm": 19.693506640938104,
"learning_rate": 9.643298237503455e-06,
"loss": 3.578190565109253,
"step": 986
},
{
"epoch": 0.6282622533418205,
"grad_norm": 12.247945521255543,
"learning_rate": 9.641923089270672e-06,
"loss": 2.4419023990631104,
"step": 987
},
{
"epoch": 0.6288987905792489,
"grad_norm": 18.897474817120475,
"learning_rate": 9.64054539384918e-06,
"loss": 3.304727792739868,
"step": 988
},
{
"epoch": 0.6295353278166773,
"grad_norm": 9.666539700787844,
"learning_rate": 9.639165151994968e-06,
"loss": 3.081479072570801,
"step": 989
},
{
"epoch": 0.6301718650541057,
"grad_norm": 13.300604337026657,
"learning_rate": 9.637782364465426e-06,
"loss": 2.7969231605529785,
"step": 990
},
{
"epoch": 0.630808402291534,
"grad_norm": 13.544924066767441,
"learning_rate": 9.636397032019336e-06,
"loss": 2.6117305755615234,
"step": 991
},
{
"epoch": 0.6314449395289624,
"grad_norm": 8.34701314140355,
"learning_rate": 9.635009155416883e-06,
"loss": 2.816713571548462,
"step": 992
},
{
"epoch": 0.6320814767663908,
"grad_norm": 17.956217540623307,
"learning_rate": 9.63361873541964e-06,
"loss": 2.4624338150024414,
"step": 993
},
{
"epoch": 0.6327180140038192,
"grad_norm": 21.573934589758643,
"learning_rate": 9.632225772790585e-06,
"loss": 2.5641918182373047,
"step": 994
},
{
"epoch": 0.6333545512412476,
"grad_norm": 10.965534948552364,
"learning_rate": 9.63083026829408e-06,
"loss": 2.498544216156006,
"step": 995
},
{
"epoch": 0.633991088478676,
"grad_norm": 8.197591538777314,
"learning_rate": 9.629432222695894e-06,
"loss": 2.6886236667633057,
"step": 996
},
{
"epoch": 0.6346276257161044,
"grad_norm": 8.450584454061838,
"learning_rate": 9.628031636763181e-06,
"loss": 2.6613171100616455,
"step": 997
},
{
"epoch": 0.6352641629535328,
"grad_norm": 9.322665846807503,
"learning_rate": 9.626628511264495e-06,
"loss": 2.8803696632385254,
"step": 998
},
{
"epoch": 0.6359007001909611,
"grad_norm": 11.80233014986357,
"learning_rate": 9.625222846969778e-06,
"loss": 2.804666519165039,
"step": 999
},
{
"epoch": 0.6365372374283895,
"grad_norm": 105.30794617750519,
"learning_rate": 9.623814644650368e-06,
"loss": 2.4511563777923584,
"step": 1000
},
{
"epoch": 0.6371737746658179,
"grad_norm": 14.243059144369466,
"learning_rate": 9.622403905079002e-06,
"loss": 2.575465202331543,
"step": 1001
},
{
"epoch": 0.6378103119032463,
"grad_norm": 25.870916372150262,
"learning_rate": 9.620990629029795e-06,
"loss": 2.641998052597046,
"step": 1002
},
{
"epoch": 0.6384468491406747,
"grad_norm": 46.348127357952244,
"learning_rate": 9.619574817278266e-06,
"loss": 3.3244099617004395,
"step": 1003
},
{
"epoch": 0.6390833863781031,
"grad_norm": 16.95867609025848,
"learning_rate": 9.618156470601323e-06,
"loss": 2.8683221340179443,
"step": 1004
},
{
"epoch": 0.6397199236155315,
"grad_norm": 5.463313912510345,
"learning_rate": 9.61673558977726e-06,
"loss": 2.685462713241577,
"step": 1005
},
{
"epoch": 0.6403564608529599,
"grad_norm": 21.998438759448344,
"learning_rate": 9.615312175585765e-06,
"loss": 1.7907297611236572,
"step": 1006
},
{
"epoch": 0.6409929980903882,
"grad_norm": 17.84617329993066,
"learning_rate": 9.613886228807919e-06,
"loss": 2.961916446685791,
"step": 1007
},
{
"epoch": 0.6416295353278166,
"grad_norm": 11.342330696063822,
"learning_rate": 9.612457750226188e-06,
"loss": 2.5012712478637695,
"step": 1008
},
{
"epoch": 0.642266072565245,
"grad_norm": 12.458024468303618,
"learning_rate": 9.611026740624428e-06,
"loss": 2.8131322860717773,
"step": 1009
},
{
"epoch": 0.6429026098026734,
"grad_norm": 13.48943331389402,
"learning_rate": 9.609593200787887e-06,
"loss": 2.596027135848999,
"step": 1010
},
{
"epoch": 0.6435391470401018,
"grad_norm": 11.278368207207846,
"learning_rate": 9.6081571315032e-06,
"loss": 3.150757074356079,
"step": 1011
},
{
"epoch": 0.6441756842775302,
"grad_norm": 9.32868713743566,
"learning_rate": 9.606718533558386e-06,
"loss": 2.443676710128784,
"step": 1012
},
{
"epoch": 0.6448122215149587,
"grad_norm": 9.53734309113041,
"learning_rate": 9.60527740774286e-06,
"loss": 2.869262218475342,
"step": 1013
},
{
"epoch": 0.645448758752387,
"grad_norm": 15.34769784613947,
"learning_rate": 9.603833754847414e-06,
"loss": 2.650500774383545,
"step": 1014
},
{
"epoch": 0.6460852959898155,
"grad_norm": 9.355525595561428,
"learning_rate": 9.602387575664236e-06,
"loss": 2.9539999961853027,
"step": 1015
},
{
"epoch": 0.6467218332272437,
"grad_norm": 7.6381224701693915,
"learning_rate": 9.600938870986892e-06,
"loss": 2.8956141471862793,
"step": 1016
},
{
"epoch": 0.6473583704646722,
"grad_norm": 15.916159840938626,
"learning_rate": 9.59948764161034e-06,
"loss": 2.505089282989502,
"step": 1017
},
{
"epoch": 0.6479949077021006,
"grad_norm": 14.391975673606755,
"learning_rate": 9.598033888330925e-06,
"loss": 3.1346678733825684,
"step": 1018
},
{
"epoch": 0.648631444939529,
"grad_norm": 10.184848624541882,
"learning_rate": 9.596577611946367e-06,
"loss": 2.797295093536377,
"step": 1019
},
{
"epoch": 0.6492679821769574,
"grad_norm": 12.211496912574797,
"learning_rate": 9.595118813255779e-06,
"loss": 3.117927074432373,
"step": 1020
},
{
"epoch": 0.6499045194143858,
"grad_norm": 7.0076195792319025,
"learning_rate": 9.593657493059657e-06,
"loss": 2.616992473602295,
"step": 1021
},
{
"epoch": 0.6505410566518142,
"grad_norm": 17.164091830210232,
"learning_rate": 9.592193652159879e-06,
"loss": 1.9837325811386108,
"step": 1022
},
{
"epoch": 0.6511775938892426,
"grad_norm": 12.128964402426474,
"learning_rate": 9.590727291359706e-06,
"loss": 3.1724250316619873,
"step": 1023
},
{
"epoch": 0.6518141311266709,
"grad_norm": 14.328040852937985,
"learning_rate": 9.589258411463784e-06,
"loss": 3.0149502754211426,
"step": 1024
},
{
"epoch": 0.6524506683640993,
"grad_norm": 20.823373719756965,
"learning_rate": 9.587787013278139e-06,
"loss": 1.9769032001495361,
"step": 1025
},
{
"epoch": 0.6530872056015277,
"grad_norm": 8.90460619737646,
"learning_rate": 9.586313097610178e-06,
"loss": 2.537348508834839,
"step": 1026
},
{
"epoch": 0.6537237428389561,
"grad_norm": 11.089614753035884,
"learning_rate": 9.584836665268693e-06,
"loss": 2.513148307800293,
"step": 1027
},
{
"epoch": 0.6543602800763845,
"grad_norm": 9.366911537113666,
"learning_rate": 9.583357717063854e-06,
"loss": 2.6030631065368652,
"step": 1028
},
{
"epoch": 0.6549968173138129,
"grad_norm": 9.853856674039763,
"learning_rate": 9.581876253807214e-06,
"loss": 2.9064598083496094,
"step": 1029
},
{
"epoch": 0.6556333545512413,
"grad_norm": 30.320994571634927,
"learning_rate": 9.580392276311702e-06,
"loss": 2.4541501998901367,
"step": 1030
},
{
"epoch": 0.6562698917886697,
"grad_norm": 11.556472751008794,
"learning_rate": 9.578905785391633e-06,
"loss": 2.508260726928711,
"step": 1031
},
{
"epoch": 0.6569064290260981,
"grad_norm": 8.359823622259187,
"learning_rate": 9.577416781862696e-06,
"loss": 3.3850183486938477,
"step": 1032
},
{
"epoch": 0.6575429662635264,
"grad_norm": 19.05734669637527,
"learning_rate": 9.575925266541959e-06,
"loss": 3.4974966049194336,
"step": 1033
},
{
"epoch": 0.6581795035009548,
"grad_norm": 29.14312544087796,
"learning_rate": 9.57443124024787e-06,
"loss": 3.006150245666504,
"step": 1034
},
{
"epoch": 0.6588160407383832,
"grad_norm": 14.691926004444978,
"learning_rate": 9.572934703800258e-06,
"loss": 3.4365720748901367,
"step": 1035
},
{
"epoch": 0.6594525779758116,
"grad_norm": 14.807081676008671,
"learning_rate": 9.57143565802032e-06,
"loss": 2.8804702758789062,
"step": 1036
},
{
"epoch": 0.66008911521324,
"grad_norm": 11.654120918304145,
"learning_rate": 9.569934103730642e-06,
"loss": 2.5688960552215576,
"step": 1037
},
{
"epoch": 0.6607256524506684,
"grad_norm": 6.234164356177007,
"learning_rate": 9.568430041755175e-06,
"loss": 2.5211703777313232,
"step": 1038
},
{
"epoch": 0.6613621896880968,
"grad_norm": 32.671300998365844,
"learning_rate": 9.566923472919256e-06,
"loss": 2.8633322715759277,
"step": 1039
},
{
"epoch": 0.6619987269255252,
"grad_norm": 21.709034107636633,
"learning_rate": 9.56541439804959e-06,
"loss": 3.4090006351470947,
"step": 1040
},
{
"epoch": 0.6626352641629535,
"grad_norm": 7.764859129161498,
"learning_rate": 9.56390281797426e-06,
"loss": 2.351412296295166,
"step": 1041
},
{
"epoch": 0.6632718014003819,
"grad_norm": 12.587310805196983,
"learning_rate": 9.562388733522727e-06,
"loss": 3.3595516681671143,
"step": 1042
},
{
"epoch": 0.6639083386378103,
"grad_norm": 30.16334384136349,
"learning_rate": 9.560872145525819e-06,
"loss": 2.237828016281128,
"step": 1043
},
{
"epoch": 0.6645448758752387,
"grad_norm": 13.365520517631683,
"learning_rate": 9.559353054815744e-06,
"loss": 2.6913130283355713,
"step": 1044
},
{
"epoch": 0.6651814131126671,
"grad_norm": 10.191037178015522,
"learning_rate": 9.557831462226083e-06,
"loss": 2.3729376792907715,
"step": 1045
},
{
"epoch": 0.6658179503500955,
"grad_norm": 10.584728421237422,
"learning_rate": 9.556307368591784e-06,
"loss": 3.0043184757232666,
"step": 1046
},
{
"epoch": 0.6664544875875239,
"grad_norm": 11.97516253252263,
"learning_rate": 9.554780774749176e-06,
"loss": 2.6922152042388916,
"step": 1047
},
{
"epoch": 0.6670910248249523,
"grad_norm": 5.019123938332489,
"learning_rate": 9.553251681535953e-06,
"loss": 2.967332363128662,
"step": 1048
},
{
"epoch": 0.6677275620623806,
"grad_norm": 12.709829778529743,
"learning_rate": 9.551720089791183e-06,
"loss": 2.6839001178741455,
"step": 1049
},
{
"epoch": 0.668364099299809,
"grad_norm": 61.530954347092546,
"learning_rate": 9.550186000355304e-06,
"loss": 4.202565670013428,
"step": 1050
},
{
"epoch": 0.6690006365372374,
"grad_norm": 26.613941491404976,
"learning_rate": 9.548649414070128e-06,
"loss": 2.917819023132324,
"step": 1051
},
{
"epoch": 0.6696371737746658,
"grad_norm": 11.67392234440942,
"learning_rate": 9.547110331778832e-06,
"loss": 2.832442283630371,
"step": 1052
},
{
"epoch": 0.6702737110120942,
"grad_norm": 10.380615672059479,
"learning_rate": 9.545568754325968e-06,
"loss": 2.8541481494903564,
"step": 1053
},
{
"epoch": 0.6709102482495226,
"grad_norm": 8.866291751004375,
"learning_rate": 9.544024682557456e-06,
"loss": 3.009812593460083,
"step": 1054
},
{
"epoch": 0.671546785486951,
"grad_norm": 10.313225718417016,
"learning_rate": 9.542478117320577e-06,
"loss": 2.984719753265381,
"step": 1055
},
{
"epoch": 0.6721833227243794,
"grad_norm": 14.315658019076201,
"learning_rate": 9.540929059463993e-06,
"loss": 3.14715576171875,
"step": 1056
},
{
"epoch": 0.6728198599618078,
"grad_norm": 16.66994713084257,
"learning_rate": 9.539377509837723e-06,
"loss": 2.7449615001678467,
"step": 1057
},
{
"epoch": 0.6734563971992361,
"grad_norm": 13.715914989308832,
"learning_rate": 9.537823469293156e-06,
"loss": 2.9749996662139893,
"step": 1058
},
{
"epoch": 0.6740929344366645,
"grad_norm": 13.04283331969029,
"learning_rate": 9.536266938683056e-06,
"loss": 2.70072865486145,
"step": 1059
},
{
"epoch": 0.6747294716740929,
"grad_norm": 7.154339431473497,
"learning_rate": 9.534707918861544e-06,
"loss": 2.8494629859924316,
"step": 1060
},
{
"epoch": 0.6753660089115213,
"grad_norm": 16.606313397601085,
"learning_rate": 9.533146410684107e-06,
"loss": 3.6407909393310547,
"step": 1061
},
{
"epoch": 0.6760025461489497,
"grad_norm": 19.74189912335196,
"learning_rate": 9.531582415007602e-06,
"loss": 2.9390859603881836,
"step": 1062
},
{
"epoch": 0.6766390833863781,
"grad_norm": 11.167525562480838,
"learning_rate": 9.530015932690251e-06,
"loss": 2.607184410095215,
"step": 1063
},
{
"epoch": 0.6772756206238065,
"grad_norm": 9.308043099018203,
"learning_rate": 9.528446964591636e-06,
"loss": 2.301570177078247,
"step": 1064
},
{
"epoch": 0.6779121578612349,
"grad_norm": 9.454496131504424,
"learning_rate": 9.526875511572706e-06,
"loss": 2.7193057537078857,
"step": 1065
},
{
"epoch": 0.6785486950986632,
"grad_norm": 16.5191053949389,
"learning_rate": 9.525301574495776e-06,
"loss": 2.7189886569976807,
"step": 1066
},
{
"epoch": 0.6791852323360916,
"grad_norm": 11.651824766272222,
"learning_rate": 9.523725154224518e-06,
"loss": 2.7225799560546875,
"step": 1067
},
{
"epoch": 0.67982176957352,
"grad_norm": 8.086333384701465,
"learning_rate": 9.522146251623974e-06,
"loss": 2.777008533477783,
"step": 1068
},
{
"epoch": 0.6804583068109484,
"grad_norm": 11.185613683095509,
"learning_rate": 9.52056486756054e-06,
"loss": 2.9254977703094482,
"step": 1069
},
{
"epoch": 0.6810948440483768,
"grad_norm": 11.274061507401917,
"learning_rate": 9.51898100290198e-06,
"loss": 3.084275484085083,
"step": 1070
},
{
"epoch": 0.6817313812858052,
"grad_norm": 28.518984093764796,
"learning_rate": 9.517394658517416e-06,
"loss": 2.595210075378418,
"step": 1071
},
{
"epoch": 0.6823679185232336,
"grad_norm": 16.67925796088532,
"learning_rate": 9.515805835277334e-06,
"loss": 1.6448781490325928,
"step": 1072
},
{
"epoch": 0.683004455760662,
"grad_norm": 12.092154100602222,
"learning_rate": 9.514214534053575e-06,
"loss": 2.535048246383667,
"step": 1073
},
{
"epoch": 0.6836409929980903,
"grad_norm": 9.646276725987144,
"learning_rate": 9.512620755719344e-06,
"loss": 2.979246139526367,
"step": 1074
},
{
"epoch": 0.6842775302355187,
"grad_norm": 10.015633260078708,
"learning_rate": 9.511024501149205e-06,
"loss": 2.9097678661346436,
"step": 1075
},
{
"epoch": 0.6849140674729471,
"grad_norm": 10.332398060011624,
"learning_rate": 9.509425771219076e-06,
"loss": 2.832192897796631,
"step": 1076
},
{
"epoch": 0.6855506047103755,
"grad_norm": 21.906498330389713,
"learning_rate": 9.507824566806243e-06,
"loss": 2.8919320106506348,
"step": 1077
},
{
"epoch": 0.686187141947804,
"grad_norm": 18.16755948802436,
"learning_rate": 9.506220888789339e-06,
"loss": 3.3066060543060303,
"step": 1078
},
{
"epoch": 0.6868236791852323,
"grad_norm": 8.896605672805526,
"learning_rate": 9.504614738048363e-06,
"loss": 3.0294253826141357,
"step": 1079
},
{
"epoch": 0.6874602164226608,
"grad_norm": 9.587432718346571,
"learning_rate": 9.503006115464663e-06,
"loss": 3.116400718688965,
"step": 1080
},
{
"epoch": 0.6880967536600892,
"grad_norm": 15.001096439974274,
"learning_rate": 9.501395021920952e-06,
"loss": 2.7199864387512207,
"step": 1081
},
{
"epoch": 0.6887332908975176,
"grad_norm": 10.840538675762422,
"learning_rate": 9.49978145830129e-06,
"loss": 2.873979091644287,
"step": 1082
},
{
"epoch": 0.6893698281349459,
"grad_norm": 22.937429004050184,
"learning_rate": 9.498165425491101e-06,
"loss": 2.8262085914611816,
"step": 1083
},
{
"epoch": 0.6900063653723743,
"grad_norm": 7.69599132567219,
"learning_rate": 9.49654692437716e-06,
"loss": 2.6464715003967285,
"step": 1084
},
{
"epoch": 0.6906429026098027,
"grad_norm": 9.192389428527143,
"learning_rate": 9.494925955847592e-06,
"loss": 2.8342204093933105,
"step": 1085
},
{
"epoch": 0.6912794398472311,
"grad_norm": 17.28956378461768,
"learning_rate": 9.493302520791882e-06,
"loss": 2.3837647438049316,
"step": 1086
},
{
"epoch": 0.6919159770846595,
"grad_norm": 9.197528424140573,
"learning_rate": 9.491676620100868e-06,
"loss": 2.9350028038024902,
"step": 1087
},
{
"epoch": 0.6925525143220879,
"grad_norm": 10.823400516266032,
"learning_rate": 9.490048254666739e-06,
"loss": 2.621924877166748,
"step": 1088
},
{
"epoch": 0.6931890515595163,
"grad_norm": 9.534846604119528,
"learning_rate": 9.488417425383038e-06,
"loss": 2.387388229370117,
"step": 1089
},
{
"epoch": 0.6938255887969447,
"grad_norm": 7.087019594894608,
"learning_rate": 9.486784133144658e-06,
"loss": 2.721595287322998,
"step": 1090
},
{
"epoch": 0.694462126034373,
"grad_norm": 10.574013511195234,
"learning_rate": 9.485148378847844e-06,
"loss": 2.258244037628174,
"step": 1091
},
{
"epoch": 0.6950986632718014,
"grad_norm": 9.11464724688561,
"learning_rate": 9.483510163390195e-06,
"loss": 2.755533456802368,
"step": 1092
},
{
"epoch": 0.6957352005092298,
"grad_norm": 23.335375102792593,
"learning_rate": 9.481869487670656e-06,
"loss": 2.5640640258789062,
"step": 1093
},
{
"epoch": 0.6963717377466582,
"grad_norm": 7.944933327304487,
"learning_rate": 9.480226352589525e-06,
"loss": 2.8003828525543213,
"step": 1094
},
{
"epoch": 0.6970082749840866,
"grad_norm": 16.110349833581328,
"learning_rate": 9.47858075904845e-06,
"loss": 3.6288437843322754,
"step": 1095
},
{
"epoch": 0.697644812221515,
"grad_norm": 10.757773901045345,
"learning_rate": 9.476932707950425e-06,
"loss": 2.5156314373016357,
"step": 1096
},
{
"epoch": 0.6982813494589434,
"grad_norm": 8.398479467906514,
"learning_rate": 9.475282200199796e-06,
"loss": 2.8104264736175537,
"step": 1097
},
{
"epoch": 0.6989178866963718,
"grad_norm": 10.303948490621195,
"learning_rate": 9.473629236702256e-06,
"loss": 2.428539276123047,
"step": 1098
},
{
"epoch": 0.6995544239338001,
"grad_norm": 22.128001640346156,
"learning_rate": 9.47197381836484e-06,
"loss": 2.9523701667785645,
"step": 1099
},
{
"epoch": 0.7001909611712285,
"grad_norm": 11.573226008669508,
"learning_rate": 9.470315946095943e-06,
"loss": 2.7546629905700684,
"step": 1100
},
{
"epoch": 0.7008274984086569,
"grad_norm": 8.90312500544347,
"learning_rate": 9.468655620805292e-06,
"loss": 2.6183578968048096,
"step": 1101
},
{
"epoch": 0.7014640356460853,
"grad_norm": 14.678059850042777,
"learning_rate": 9.46699284340397e-06,
"loss": 2.7603936195373535,
"step": 1102
},
{
"epoch": 0.7021005728835137,
"grad_norm": 17.25477344357432,
"learning_rate": 9.4653276148044e-06,
"loss": 3.5560851097106934,
"step": 1103
},
{
"epoch": 0.7027371101209421,
"grad_norm": 8.082436193059547,
"learning_rate": 9.463659935920354e-06,
"loss": 3.1456332206726074,
"step": 1104
},
{
"epoch": 0.7033736473583705,
"grad_norm": 6.9670496222287435,
"learning_rate": 9.461989807666949e-06,
"loss": 2.6217257976531982,
"step": 1105
},
{
"epoch": 0.7040101845957989,
"grad_norm": 7.4847329907513895,
"learning_rate": 9.460317230960638e-06,
"loss": 2.6880407333374023,
"step": 1106
},
{
"epoch": 0.7046467218332273,
"grad_norm": 11.371556771803995,
"learning_rate": 9.45864220671923e-06,
"loss": 2.5942535400390625,
"step": 1107
},
{
"epoch": 0.7052832590706556,
"grad_norm": 8.785336056701258,
"learning_rate": 9.456964735861866e-06,
"loss": 2.8641157150268555,
"step": 1108
},
{
"epoch": 0.705919796308084,
"grad_norm": 7.219188900852088,
"learning_rate": 9.455284819309036e-06,
"loss": 2.8831381797790527,
"step": 1109
},
{
"epoch": 0.7065563335455124,
"grad_norm": 13.683598183394244,
"learning_rate": 9.453602457982569e-06,
"loss": 2.969987154006958,
"step": 1110
},
{
"epoch": 0.7071928707829408,
"grad_norm": 12.315788555192363,
"learning_rate": 9.451917652805638e-06,
"loss": 3.027716636657715,
"step": 1111
},
{
"epoch": 0.7078294080203692,
"grad_norm": 36.19197415120843,
"learning_rate": 9.450230404702754e-06,
"loss": 3.1672916412353516,
"step": 1112
},
{
"epoch": 0.7084659452577976,
"grad_norm": 12.316134019779161,
"learning_rate": 9.448540714599772e-06,
"loss": 2.8427469730377197,
"step": 1113
},
{
"epoch": 0.709102482495226,
"grad_norm": 8.576093057041657,
"learning_rate": 9.446848583423884e-06,
"loss": 2.7625861167907715,
"step": 1114
},
{
"epoch": 0.7097390197326544,
"grad_norm": 12.739162574810516,
"learning_rate": 9.445154012103623e-06,
"loss": 2.662567615509033,
"step": 1115
},
{
"epoch": 0.7103755569700827,
"grad_norm": 10.806407126421561,
"learning_rate": 9.44345700156886e-06,
"loss": 2.995030641555786,
"step": 1116
},
{
"epoch": 0.7110120942075111,
"grad_norm": 7.97895001806647,
"learning_rate": 9.441757552750808e-06,
"loss": 2.9475135803222656,
"step": 1117
},
{
"epoch": 0.7116486314449395,
"grad_norm": 8.043717440661599,
"learning_rate": 9.440055666582014e-06,
"loss": 2.9263863563537598,
"step": 1118
},
{
"epoch": 0.7122851686823679,
"grad_norm": 23.862671167213218,
"learning_rate": 9.438351343996358e-06,
"loss": 3.3885464668273926,
"step": 1119
},
{
"epoch": 0.7129217059197963,
"grad_norm": 7.42878869816598,
"learning_rate": 9.43664458592907e-06,
"loss": 1.4678406715393066,
"step": 1120
},
{
"epoch": 0.7135582431572247,
"grad_norm": 8.834797638802678,
"learning_rate": 9.434935393316709e-06,
"loss": 3.115852117538452,
"step": 1121
},
{
"epoch": 0.7141947803946531,
"grad_norm": 8.062053836270886,
"learning_rate": 9.433223767097163e-06,
"loss": 2.804098129272461,
"step": 1122
},
{
"epoch": 0.7148313176320815,
"grad_norm": 8.62380241880142,
"learning_rate": 9.431509708209669e-06,
"loss": 2.8543026447296143,
"step": 1123
},
{
"epoch": 0.7154678548695098,
"grad_norm": 10.171759338761628,
"learning_rate": 9.42979321759479e-06,
"loss": 3.0115180015563965,
"step": 1124
},
{
"epoch": 0.7161043921069382,
"grad_norm": 7.720748428345774,
"learning_rate": 9.428074296194426e-06,
"loss": 2.703876495361328,
"step": 1125
},
{
"epoch": 0.7167409293443666,
"grad_norm": 15.75954335417896,
"learning_rate": 9.426352944951806e-06,
"loss": 2.066390037536621,
"step": 1126
},
{
"epoch": 0.717377466581795,
"grad_norm": 30.210470512783385,
"learning_rate": 9.424629164811506e-06,
"loss": 2.41465425491333,
"step": 1127
},
{
"epoch": 0.7180140038192234,
"grad_norm": 29.886494199621875,
"learning_rate": 9.422902956719416e-06,
"loss": 3.1158735752105713,
"step": 1128
},
{
"epoch": 0.7186505410566518,
"grad_norm": 9.160744179358328,
"learning_rate": 9.421174321622775e-06,
"loss": 3.1245779991149902,
"step": 1129
},
{
"epoch": 0.7192870782940802,
"grad_norm": 10.036965295273822,
"learning_rate": 9.419443260470142e-06,
"loss": 3.2130062580108643,
"step": 1130
},
{
"epoch": 0.7199236155315086,
"grad_norm": 11.262756434590441,
"learning_rate": 9.417709774211415e-06,
"loss": 2.304853916168213,
"step": 1131
},
{
"epoch": 0.720560152768937,
"grad_norm": 6.95100006508944,
"learning_rate": 9.415973863797819e-06,
"loss": 3.0001373291015625,
"step": 1132
},
{
"epoch": 0.7211966900063653,
"grad_norm": 6.779094526020992,
"learning_rate": 9.414235530181907e-06,
"loss": 3.058368444442749,
"step": 1133
},
{
"epoch": 0.7218332272437937,
"grad_norm": 7.288056439154986,
"learning_rate": 9.412494774317571e-06,
"loss": 2.1561214923858643,
"step": 1134
},
{
"epoch": 0.7224697644812221,
"grad_norm": 18.231587786618007,
"learning_rate": 9.41075159716002e-06,
"loss": 3.1413750648498535,
"step": 1135
},
{
"epoch": 0.7231063017186505,
"grad_norm": 27.046129491897492,
"learning_rate": 9.409005999665799e-06,
"loss": 2.6081013679504395,
"step": 1136
},
{
"epoch": 0.7237428389560789,
"grad_norm": 12.010714101061575,
"learning_rate": 9.40725798279278e-06,
"loss": 2.7399230003356934,
"step": 1137
},
{
"epoch": 0.7243793761935073,
"grad_norm": 16.747765104249073,
"learning_rate": 9.405507547500165e-06,
"loss": 3.929919481277466,
"step": 1138
},
{
"epoch": 0.7250159134309357,
"grad_norm": 5.850715995419746,
"learning_rate": 9.403754694748475e-06,
"loss": 2.792790651321411,
"step": 1139
},
{
"epoch": 0.7256524506683641,
"grad_norm": 18.78460343789035,
"learning_rate": 9.401999425499565e-06,
"loss": 3.219160556793213,
"step": 1140
},
{
"epoch": 0.7262889879057924,
"grad_norm": 8.65575509331915,
"learning_rate": 9.400241740716617e-06,
"loss": 2.7919883728027344,
"step": 1141
},
{
"epoch": 0.7269255251432208,
"grad_norm": 12.096316324170619,
"learning_rate": 9.39848164136413e-06,
"loss": 2.685350179672241,
"step": 1142
},
{
"epoch": 0.7275620623806492,
"grad_norm": 8.389734545374605,
"learning_rate": 9.396719128407936e-06,
"loss": 2.8102335929870605,
"step": 1143
},
{
"epoch": 0.7281985996180776,
"grad_norm": 7.9309970738400075,
"learning_rate": 9.39495420281519e-06,
"loss": 2.8420093059539795,
"step": 1144
},
{
"epoch": 0.728835136855506,
"grad_norm": 19.095733546546768,
"learning_rate": 9.393186865554366e-06,
"loss": 2.548253297805786,
"step": 1145
},
{
"epoch": 0.7294716740929345,
"grad_norm": 10.026448629988439,
"learning_rate": 9.391417117595269e-06,
"loss": 2.791952610015869,
"step": 1146
},
{
"epoch": 0.7301082113303629,
"grad_norm": 10.751990454708412,
"learning_rate": 9.38964495990902e-06,
"loss": 3.065579414367676,
"step": 1147
},
{
"epoch": 0.7307447485677913,
"grad_norm": 7.276288885514958,
"learning_rate": 9.387870393468064e-06,
"loss": 2.558170795440674,
"step": 1148
},
{
"epoch": 0.7313812858052197,
"grad_norm": 9.52733291220368,
"learning_rate": 9.386093419246175e-06,
"loss": 2.5268654823303223,
"step": 1149
},
{
"epoch": 0.732017823042648,
"grad_norm": 7.579137019575734,
"learning_rate": 9.384314038218434e-06,
"loss": 2.5803568363189697,
"step": 1150
},
{
"epoch": 0.7326543602800764,
"grad_norm": 26.13725565815998,
"learning_rate": 9.382532251361257e-06,
"loss": 2.5109944343566895,
"step": 1151
},
{
"epoch": 0.7332908975175048,
"grad_norm": 9.713395534743217,
"learning_rate": 9.38074805965237e-06,
"loss": 2.518691062927246,
"step": 1152
},
{
"epoch": 0.7339274347549332,
"grad_norm": 10.806958111464304,
"learning_rate": 9.378961464070825e-06,
"loss": 2.5075459480285645,
"step": 1153
},
{
"epoch": 0.7345639719923616,
"grad_norm": 17.77282123835234,
"learning_rate": 9.377172465596992e-06,
"loss": 3.975275993347168,
"step": 1154
},
{
"epoch": 0.73520050922979,
"grad_norm": 9.82543914517833,
"learning_rate": 9.375381065212551e-06,
"loss": 2.5041916370391846,
"step": 1155
},
{
"epoch": 0.7358370464672184,
"grad_norm": 8.599307048137147,
"learning_rate": 9.373587263900518e-06,
"loss": 2.904417037963867,
"step": 1156
},
{
"epoch": 0.7364735837046468,
"grad_norm": 18.94314466908295,
"learning_rate": 9.371791062645208e-06,
"loss": 3.084871768951416,
"step": 1157
},
{
"epoch": 0.7371101209420751,
"grad_norm": 17.099900159090645,
"learning_rate": 9.369992462432264e-06,
"loss": 2.809452772140503,
"step": 1158
},
{
"epoch": 0.7377466581795035,
"grad_norm": 9.567564299680873,
"learning_rate": 9.36819146424864e-06,
"loss": 2.633657932281494,
"step": 1159
},
{
"epoch": 0.7383831954169319,
"grad_norm": 7.416023977531236,
"learning_rate": 9.366388069082609e-06,
"loss": 3.0897791385650635,
"step": 1160
},
{
"epoch": 0.7390197326543603,
"grad_norm": 9.546210002606603,
"learning_rate": 9.364582277923759e-06,
"loss": 2.208278179168701,
"step": 1161
},
{
"epoch": 0.7396562698917887,
"grad_norm": 10.933237630516508,
"learning_rate": 9.362774091762991e-06,
"loss": 2.7841458320617676,
"step": 1162
},
{
"epoch": 0.7402928071292171,
"grad_norm": 9.645369764277728,
"learning_rate": 9.36096351159252e-06,
"loss": 2.597827911376953,
"step": 1163
},
{
"epoch": 0.7409293443666455,
"grad_norm": 9.38784523263509,
"learning_rate": 9.35915053840588e-06,
"loss": 2.2382140159606934,
"step": 1164
},
{
"epoch": 0.7415658816040739,
"grad_norm": 8.964585442610392,
"learning_rate": 9.357335173197907e-06,
"loss": 2.700472354888916,
"step": 1165
},
{
"epoch": 0.7422024188415022,
"grad_norm": 82.45417581534511,
"learning_rate": 9.355517416964766e-06,
"loss": 2.7609753608703613,
"step": 1166
},
{
"epoch": 0.7428389560789306,
"grad_norm": 13.459972566387188,
"learning_rate": 9.353697270703917e-06,
"loss": 2.068809986114502,
"step": 1167
},
{
"epoch": 0.743475493316359,
"grad_norm": 9.022372778306588,
"learning_rate": 9.351874735414142e-06,
"loss": 3.0394768714904785,
"step": 1168
},
{
"epoch": 0.7441120305537874,
"grad_norm": 9.369511630945446,
"learning_rate": 9.35004981209553e-06,
"loss": 2.525172233581543,
"step": 1169
},
{
"epoch": 0.7447485677912158,
"grad_norm": 6.383815758559997,
"learning_rate": 9.348222501749482e-06,
"loss": 2.437615394592285,
"step": 1170
},
{
"epoch": 0.7453851050286442,
"grad_norm": 12.229040190394912,
"learning_rate": 9.34639280537871e-06,
"loss": 2.1296939849853516,
"step": 1171
},
{
"epoch": 0.7460216422660726,
"grad_norm": 9.876802802320743,
"learning_rate": 9.344560723987233e-06,
"loss": 3.275932550430298,
"step": 1172
},
{
"epoch": 0.746658179503501,
"grad_norm": 7.334313364656516,
"learning_rate": 9.342726258580377e-06,
"loss": 2.3332691192626953,
"step": 1173
},
{
"epoch": 0.7472947167409294,
"grad_norm": 16.286215753465054,
"learning_rate": 9.340889410164782e-06,
"loss": 2.85125994682312,
"step": 1174
},
{
"epoch": 0.7479312539783577,
"grad_norm": 8.035551862241554,
"learning_rate": 9.339050179748387e-06,
"loss": 3.0281364917755127,
"step": 1175
},
{
"epoch": 0.7485677912157861,
"grad_norm": 19.96642755772481,
"learning_rate": 9.33720856834045e-06,
"loss": 2.666728973388672,
"step": 1176
},
{
"epoch": 0.7492043284532145,
"grad_norm": 14.636540466976383,
"learning_rate": 9.335364576951527e-06,
"loss": 2.8794403076171875,
"step": 1177
},
{
"epoch": 0.7498408656906429,
"grad_norm": 10.583978621968955,
"learning_rate": 9.333518206593478e-06,
"loss": 2.5915474891662598,
"step": 1178
},
{
"epoch": 0.7504774029280713,
"grad_norm": 8.361868690386437,
"learning_rate": 9.331669458279474e-06,
"loss": 2.6375017166137695,
"step": 1179
},
{
"epoch": 0.7511139401654997,
"grad_norm": 39.954423796317634,
"learning_rate": 9.329818333023991e-06,
"loss": 2.1379952430725098,
"step": 1180
},
{
"epoch": 0.7517504774029281,
"grad_norm": 10.740761853769259,
"learning_rate": 9.327964831842807e-06,
"loss": 2.44290828704834,
"step": 1181
},
{
"epoch": 0.7523870146403565,
"grad_norm": 11.805613866789095,
"learning_rate": 9.326108955753001e-06,
"loss": 2.77449893951416,
"step": 1182
},
{
"epoch": 0.7530235518777848,
"grad_norm": 28.55282517673017,
"learning_rate": 9.324250705772964e-06,
"loss": 2.7647769451141357,
"step": 1183
},
{
"epoch": 0.7536600891152132,
"grad_norm": 12.002513981835214,
"learning_rate": 9.32239008292238e-06,
"loss": 2.573099136352539,
"step": 1184
},
{
"epoch": 0.7542966263526416,
"grad_norm": 8.43108586151736,
"learning_rate": 9.320527088222238e-06,
"loss": 3.139158010482788,
"step": 1185
},
{
"epoch": 0.75493316359007,
"grad_norm": 12.307148227097715,
"learning_rate": 9.318661722694832e-06,
"loss": 2.6034903526306152,
"step": 1186
},
{
"epoch": 0.7555697008274984,
"grad_norm": 15.0918032800552,
"learning_rate": 9.316793987363756e-06,
"loss": 3.005361557006836,
"step": 1187
},
{
"epoch": 0.7562062380649268,
"grad_norm": 8.91516719160434,
"learning_rate": 9.3149238832539e-06,
"loss": 2.7192862033843994,
"step": 1188
},
{
"epoch": 0.7568427753023552,
"grad_norm": 8.34591431880029,
"learning_rate": 9.313051411391458e-06,
"loss": 3.159897804260254,
"step": 1189
},
{
"epoch": 0.7574793125397836,
"grad_norm": 13.632637030695298,
"learning_rate": 9.311176572803922e-06,
"loss": 3.081416368484497,
"step": 1190
},
{
"epoch": 0.7581158497772119,
"grad_norm": 17.878514996352035,
"learning_rate": 9.309299368520084e-06,
"loss": 2.8336782455444336,
"step": 1191
},
{
"epoch": 0.7587523870146403,
"grad_norm": 18.65260863756698,
"learning_rate": 9.30741979957003e-06,
"loss": 2.881049871444702,
"step": 1192
},
{
"epoch": 0.7593889242520687,
"grad_norm": 29.42335411588454,
"learning_rate": 9.305537866985148e-06,
"loss": 3.6260194778442383,
"step": 1193
},
{
"epoch": 0.7600254614894971,
"grad_norm": 7.110804111133651,
"learning_rate": 9.303653571798124e-06,
"loss": 2.459531307220459,
"step": 1194
},
{
"epoch": 0.7606619987269255,
"grad_norm": 7.2639112639482875,
"learning_rate": 9.301766915042934e-06,
"loss": 2.501810073852539,
"step": 1195
},
{
"epoch": 0.7612985359643539,
"grad_norm": 7.162170089670997,
"learning_rate": 9.299877897754855e-06,
"loss": 2.7279324531555176,
"step": 1196
},
{
"epoch": 0.7619350732017823,
"grad_norm": 10.673649917991852,
"learning_rate": 9.297986520970458e-06,
"loss": 2.7421388626098633,
"step": 1197
},
{
"epoch": 0.7625716104392107,
"grad_norm": 12.13990360629329,
"learning_rate": 9.296092785727612e-06,
"loss": 2.960937023162842,
"step": 1198
},
{
"epoch": 0.7632081476766391,
"grad_norm": 10.83061394650552,
"learning_rate": 9.294196693065474e-06,
"loss": 2.5112879276275635,
"step": 1199
},
{
"epoch": 0.7638446849140674,
"grad_norm": 12.340066397980847,
"learning_rate": 9.292298244024497e-06,
"loss": 3.506455183029175,
"step": 1200
},
{
"epoch": 0.7644812221514958,
"grad_norm": 13.4782121228838,
"learning_rate": 9.290397439646429e-06,
"loss": 2.788090229034424,
"step": 1201
},
{
"epoch": 0.7651177593889242,
"grad_norm": 7.4373661345385615,
"learning_rate": 9.28849428097431e-06,
"loss": 2.7960195541381836,
"step": 1202
},
{
"epoch": 0.7657542966263526,
"grad_norm": 16.03603811943839,
"learning_rate": 9.286588769052469e-06,
"loss": 2.8688406944274902,
"step": 1203
},
{
"epoch": 0.766390833863781,
"grad_norm": 6.974990491610596,
"learning_rate": 9.28468090492653e-06,
"loss": 2.451228618621826,
"step": 1204
},
{
"epoch": 0.7670273711012094,
"grad_norm": 16.16256255384718,
"learning_rate": 9.282770689643406e-06,
"loss": 2.9304986000061035,
"step": 1205
},
{
"epoch": 0.7676639083386378,
"grad_norm": 14.651508634400052,
"learning_rate": 9.2808581242513e-06,
"loss": 2.8623952865600586,
"step": 1206
},
{
"epoch": 0.7683004455760662,
"grad_norm": 12.761068791969821,
"learning_rate": 9.278943209799703e-06,
"loss": 2.48644757270813,
"step": 1207
},
{
"epoch": 0.7689369828134945,
"grad_norm": 13.89816323154538,
"learning_rate": 9.277025947339398e-06,
"loss": 2.7891924381256104,
"step": 1208
},
{
"epoch": 0.7695735200509229,
"grad_norm": 7.760896895883094,
"learning_rate": 9.275106337922458e-06,
"loss": 2.4967544078826904,
"step": 1209
},
{
"epoch": 0.7702100572883513,
"grad_norm": 9.291985160258172,
"learning_rate": 9.273184382602237e-06,
"loss": 2.770961284637451,
"step": 1210
},
{
"epoch": 0.7708465945257797,
"grad_norm": 11.32569160257647,
"learning_rate": 9.271260082433381e-06,
"loss": 2.617584228515625,
"step": 1211
},
{
"epoch": 0.7714831317632082,
"grad_norm": 12.730047256965866,
"learning_rate": 9.269333438471826e-06,
"loss": 2.486670732498169,
"step": 1212
},
{
"epoch": 0.7721196690006366,
"grad_norm": 20.55652485105194,
"learning_rate": 9.267404451774787e-06,
"loss": 2.6450343132019043,
"step": 1213
},
{
"epoch": 0.772756206238065,
"grad_norm": 15.363179234075059,
"learning_rate": 9.265473123400768e-06,
"loss": 3.0391898155212402,
"step": 1214
},
{
"epoch": 0.7733927434754934,
"grad_norm": 11.067307771157337,
"learning_rate": 9.263539454409556e-06,
"loss": 2.699723243713379,
"step": 1215
},
{
"epoch": 0.7740292807129217,
"grad_norm": 18.131580548315544,
"learning_rate": 9.261603445862229e-06,
"loss": 2.382633686065674,
"step": 1216
},
{
"epoch": 0.77466581795035,
"grad_norm": 9.521834048728877,
"learning_rate": 9.25966509882114e-06,
"loss": 2.558096408843994,
"step": 1217
},
{
"epoch": 0.7753023551877785,
"grad_norm": 12.828514116759349,
"learning_rate": 9.25772441434993e-06,
"loss": 2.796450614929199,
"step": 1218
},
{
"epoch": 0.7759388924252069,
"grad_norm": 48.60647380704801,
"learning_rate": 9.255781393513523e-06,
"loss": 2.49711012840271,
"step": 1219
},
{
"epoch": 0.7765754296626353,
"grad_norm": 7.8399530210561865,
"learning_rate": 9.253836037378122e-06,
"loss": 2.9384589195251465,
"step": 1220
},
{
"epoch": 0.7772119669000637,
"grad_norm": 8.124337203909667,
"learning_rate": 9.251888347011214e-06,
"loss": 3.1889712810516357,
"step": 1221
},
{
"epoch": 0.7778485041374921,
"grad_norm": 8.605963861661598,
"learning_rate": 9.249938323481566e-06,
"loss": 2.405120372772217,
"step": 1222
},
{
"epoch": 0.7784850413749205,
"grad_norm": 10.301738676212663,
"learning_rate": 9.247985967859225e-06,
"loss": 3.1040446758270264,
"step": 1223
},
{
"epoch": 0.7791215786123489,
"grad_norm": 17.131686017400916,
"learning_rate": 9.246031281215522e-06,
"loss": 2.957456350326538,
"step": 1224
},
{
"epoch": 0.7797581158497772,
"grad_norm": 8.429220498236724,
"learning_rate": 9.244074264623058e-06,
"loss": 2.8081865310668945,
"step": 1225
},
{
"epoch": 0.7803946530872056,
"grad_norm": 7.907723815277553,
"learning_rate": 9.242114919155718e-06,
"loss": 2.852213144302368,
"step": 1226
},
{
"epoch": 0.781031190324634,
"grad_norm": 6.567851790415435,
"learning_rate": 9.24015324588867e-06,
"loss": 2.73412823677063,
"step": 1227
},
{
"epoch": 0.7816677275620624,
"grad_norm": 8.213853688073232,
"learning_rate": 9.238189245898348e-06,
"loss": 2.8029489517211914,
"step": 1228
},
{
"epoch": 0.7823042647994908,
"grad_norm": 8.365629205047716,
"learning_rate": 9.236222920262473e-06,
"loss": 2.9127280712127686,
"step": 1229
},
{
"epoch": 0.7829408020369192,
"grad_norm": 9.801297553132422,
"learning_rate": 9.234254270060036e-06,
"loss": 2.470463991165161,
"step": 1230
},
{
"epoch": 0.7835773392743476,
"grad_norm": 6.687872116603668,
"learning_rate": 9.232283296371305e-06,
"loss": 2.604677438735962,
"step": 1231
},
{
"epoch": 0.784213876511776,
"grad_norm": 15.611478833788476,
"learning_rate": 9.230310000277826e-06,
"loss": 2.8246655464172363,
"step": 1232
},
{
"epoch": 0.7848504137492043,
"grad_norm": 10.496722310569893,
"learning_rate": 9.228334382862415e-06,
"loss": 2.9619932174682617,
"step": 1233
},
{
"epoch": 0.7854869509866327,
"grad_norm": 21.111577164492267,
"learning_rate": 9.226356445209164e-06,
"loss": 3.1211252212524414,
"step": 1234
},
{
"epoch": 0.7861234882240611,
"grad_norm": 5.650042844459213,
"learning_rate": 9.224376188403438e-06,
"loss": 2.3700907230377197,
"step": 1235
},
{
"epoch": 0.7867600254614895,
"grad_norm": 11.614984343188375,
"learning_rate": 9.222393613531875e-06,
"loss": 2.7831969261169434,
"step": 1236
},
{
"epoch": 0.7873965626989179,
"grad_norm": 14.451173649063868,
"learning_rate": 9.220408721682384e-06,
"loss": 2.5475869178771973,
"step": 1237
},
{
"epoch": 0.7880330999363463,
"grad_norm": 8.626604462271105,
"learning_rate": 9.218421513944146e-06,
"loss": 2.251643180847168,
"step": 1238
},
{
"epoch": 0.7886696371737747,
"grad_norm": 10.541006366514067,
"learning_rate": 9.216431991407614e-06,
"loss": 2.9884228706359863,
"step": 1239
},
{
"epoch": 0.7893061744112031,
"grad_norm": 10.3895400900976,
"learning_rate": 9.214440155164509e-06,
"loss": 2.6941347122192383,
"step": 1240
},
{
"epoch": 0.7899427116486314,
"grad_norm": 15.634802726402812,
"learning_rate": 9.21244600630782e-06,
"loss": 3.2982771396636963,
"step": 1241
},
{
"epoch": 0.7905792488860598,
"grad_norm": 12.391044593349184,
"learning_rate": 9.21044954593181e-06,
"loss": 2.489248752593994,
"step": 1242
},
{
"epoch": 0.7912157861234882,
"grad_norm": 7.369020917300009,
"learning_rate": 9.20845077513201e-06,
"loss": 3.02134108543396,
"step": 1243
},
{
"epoch": 0.7918523233609166,
"grad_norm": 12.967257970395574,
"learning_rate": 9.206449695005214e-06,
"loss": 2.752878189086914,
"step": 1244
},
{
"epoch": 0.792488860598345,
"grad_norm": 22.412338612647503,
"learning_rate": 9.204446306649485e-06,
"loss": 2.832862615585327,
"step": 1245
},
{
"epoch": 0.7931253978357734,
"grad_norm": 13.838732237567118,
"learning_rate": 9.202440611164156e-06,
"loss": 2.739649772644043,
"step": 1246
},
{
"epoch": 0.7937619350732018,
"grad_norm": 13.15421166907182,
"learning_rate": 9.200432609649826e-06,
"loss": 3.1091763973236084,
"step": 1247
},
{
"epoch": 0.7943984723106302,
"grad_norm": 25.083539759246076,
"learning_rate": 9.198422303208349e-06,
"loss": 3.1699438095092773,
"step": 1248
},
{
"epoch": 0.7950350095480586,
"grad_norm": 12.736876776944172,
"learning_rate": 9.19640969294286e-06,
"loss": 2.7697696685791016,
"step": 1249
},
{
"epoch": 0.7956715467854869,
"grad_norm": 8.601857286692102,
"learning_rate": 9.194394779957746e-06,
"loss": 3.2362310886383057,
"step": 1250
},
{
"epoch": 0.7963080840229153,
"grad_norm": 13.459429768113056,
"learning_rate": 9.192377565358664e-06,
"loss": 2.5905492305755615,
"step": 1251
},
{
"epoch": 0.7969446212603437,
"grad_norm": 16.871641087210467,
"learning_rate": 9.190358050252528e-06,
"loss": 2.7865467071533203,
"step": 1252
},
{
"epoch": 0.7975811584977721,
"grad_norm": 9.844720147880034,
"learning_rate": 9.188336235747521e-06,
"loss": 2.3726232051849365,
"step": 1253
},
{
"epoch": 0.7982176957352005,
"grad_norm": 11.23669122084876,
"learning_rate": 9.186312122953083e-06,
"loss": 3.300795555114746,
"step": 1254
},
{
"epoch": 0.7988542329726289,
"grad_norm": 16.360634620929737,
"learning_rate": 9.184285712979919e-06,
"loss": 2.935802698135376,
"step": 1255
},
{
"epoch": 0.7994907702100573,
"grad_norm": 13.509288456680997,
"learning_rate": 9.182257006939989e-06,
"loss": 2.9251818656921387,
"step": 1256
},
{
"epoch": 0.8001273074474857,
"grad_norm": 9.639067408736738,
"learning_rate": 9.18022600594652e-06,
"loss": 2.9768736362457275,
"step": 1257
},
{
"epoch": 0.800763844684914,
"grad_norm": 10.675066476296655,
"learning_rate": 9.178192711113991e-06,
"loss": 2.221604585647583,
"step": 1258
},
{
"epoch": 0.8014003819223424,
"grad_norm": 19.08622224944117,
"learning_rate": 9.176157123558147e-06,
"loss": 2.6881661415100098,
"step": 1259
},
{
"epoch": 0.8020369191597708,
"grad_norm": 6.681873457109386,
"learning_rate": 9.174119244395984e-06,
"loss": 2.6460700035095215,
"step": 1260
},
{
"epoch": 0.8026734563971992,
"grad_norm": 9.120975168940044,
"learning_rate": 9.172079074745764e-06,
"loss": 2.587338924407959,
"step": 1261
},
{
"epoch": 0.8033099936346276,
"grad_norm": 8.470241427468848,
"learning_rate": 9.170036615726995e-06,
"loss": 2.9558582305908203,
"step": 1262
},
{
"epoch": 0.803946530872056,
"grad_norm": 21.420413651089113,
"learning_rate": 9.167991868460451e-06,
"loss": 2.270899772644043,
"step": 1263
},
{
"epoch": 0.8045830681094844,
"grad_norm": 10.088992613200938,
"learning_rate": 9.165944834068154e-06,
"loss": 2.834348678588867,
"step": 1264
},
{
"epoch": 0.8052196053469128,
"grad_norm": 17.773011919639547,
"learning_rate": 9.163895513673388e-06,
"loss": 2.6700775623321533,
"step": 1265
},
{
"epoch": 0.8058561425843411,
"grad_norm": 6.313584556413577,
"learning_rate": 9.16184390840069e-06,
"loss": 3.119436264038086,
"step": 1266
},
{
"epoch": 0.8064926798217695,
"grad_norm": 10.683183886824269,
"learning_rate": 9.159790019375844e-06,
"loss": 2.6771233081817627,
"step": 1267
},
{
"epoch": 0.8071292170591979,
"grad_norm": 17.039075471902997,
"learning_rate": 9.157733847725895e-06,
"loss": 2.8111491203308105,
"step": 1268
},
{
"epoch": 0.8077657542966263,
"grad_norm": 9.61025446735227,
"learning_rate": 9.155675394579137e-06,
"loss": 3.3145298957824707,
"step": 1269
},
{
"epoch": 0.8084022915340547,
"grad_norm": 13.026516215465113,
"learning_rate": 9.153614661065115e-06,
"loss": 3.323643922805786,
"step": 1270
},
{
"epoch": 0.8090388287714831,
"grad_norm": 13.865669627803625,
"learning_rate": 9.151551648314632e-06,
"loss": 2.976489782333374,
"step": 1271
},
{
"epoch": 0.8096753660089115,
"grad_norm": 23.93851468624548,
"learning_rate": 9.149486357459731e-06,
"loss": 3.009335517883301,
"step": 1272
},
{
"epoch": 0.81031190324634,
"grad_norm": 10.469437347525474,
"learning_rate": 9.147418789633715e-06,
"loss": 2.6073620319366455,
"step": 1273
},
{
"epoch": 0.8109484404837684,
"grad_norm": 10.853234944446672,
"learning_rate": 9.145348945971129e-06,
"loss": 2.799849510192871,
"step": 1274
},
{
"epoch": 0.8115849777211966,
"grad_norm": 9.688487286126236,
"learning_rate": 9.143276827607772e-06,
"loss": 2.2954964637756348,
"step": 1275
},
{
"epoch": 0.812221514958625,
"grad_norm": 10.669955887868204,
"learning_rate": 9.141202435680687e-06,
"loss": 2.92189359664917,
"step": 1276
},
{
"epoch": 0.8128580521960534,
"grad_norm": 11.626387329071216,
"learning_rate": 9.13912577132817e-06,
"loss": 2.646479845046997,
"step": 1277
},
{
"epoch": 0.8134945894334819,
"grad_norm": 10.031668621938243,
"learning_rate": 9.137046835689758e-06,
"loss": 2.1151552200317383,
"step": 1278
},
{
"epoch": 0.8141311266709103,
"grad_norm": 15.09847410739582,
"learning_rate": 9.134965629906238e-06,
"loss": 2.561397075653076,
"step": 1279
},
{
"epoch": 0.8147676639083387,
"grad_norm": 18.197636178571923,
"learning_rate": 9.132882155119645e-06,
"loss": 2.14336895942688,
"step": 1280
},
{
"epoch": 0.8154042011457671,
"grad_norm": 9.844441612822695,
"learning_rate": 9.13079641247325e-06,
"loss": 2.4377920627593994,
"step": 1281
},
{
"epoch": 0.8160407383831955,
"grad_norm": 7.707595564529393,
"learning_rate": 9.128708403111577e-06,
"loss": 2.7710394859313965,
"step": 1282
},
{
"epoch": 0.8166772756206238,
"grad_norm": 10.049701606083062,
"learning_rate": 9.126618128180394e-06,
"loss": 3.043046474456787,
"step": 1283
},
{
"epoch": 0.8173138128580522,
"grad_norm": 9.019335112227964,
"learning_rate": 9.124525588826706e-06,
"loss": 2.1620230674743652,
"step": 1284
},
{
"epoch": 0.8179503500954806,
"grad_norm": 8.67035035284991,
"learning_rate": 9.122430786198763e-06,
"loss": 2.6001639366149902,
"step": 1285
},
{
"epoch": 0.818586887332909,
"grad_norm": 10.043588823477533,
"learning_rate": 9.12033372144606e-06,
"loss": 1.823639988899231,
"step": 1286
},
{
"epoch": 0.8192234245703374,
"grad_norm": 9.22606774169981,
"learning_rate": 9.118234395719332e-06,
"loss": 2.6349215507507324,
"step": 1287
},
{
"epoch": 0.8198599618077658,
"grad_norm": 27.4129637623895,
"learning_rate": 9.116132810170554e-06,
"loss": 3.1643121242523193,
"step": 1288
},
{
"epoch": 0.8204964990451942,
"grad_norm": 13.522827712587821,
"learning_rate": 9.114028965952939e-06,
"loss": 2.936291217803955,
"step": 1289
},
{
"epoch": 0.8211330362826226,
"grad_norm": 14.31455799494892,
"learning_rate": 9.111922864220942e-06,
"loss": 2.8982882499694824,
"step": 1290
},
{
"epoch": 0.821769573520051,
"grad_norm": 30.02027998642032,
"learning_rate": 9.109814506130255e-06,
"loss": 3.107332706451416,
"step": 1291
},
{
"epoch": 0.8224061107574793,
"grad_norm": 16.261932973577068,
"learning_rate": 9.107703892837812e-06,
"loss": 2.811025619506836,
"step": 1292
},
{
"epoch": 0.8230426479949077,
"grad_norm": 18.45679320046287,
"learning_rate": 9.105591025501779e-06,
"loss": 2.2192745208740234,
"step": 1293
},
{
"epoch": 0.8236791852323361,
"grad_norm": 7.736250035717551,
"learning_rate": 9.103475905281563e-06,
"loss": 3.3386051654815674,
"step": 1294
},
{
"epoch": 0.8243157224697645,
"grad_norm": 13.766465002122054,
"learning_rate": 9.101358533337808e-06,
"loss": 2.681990385055542,
"step": 1295
},
{
"epoch": 0.8249522597071929,
"grad_norm": 12.720474806758402,
"learning_rate": 9.099238910832387e-06,
"loss": 2.671180009841919,
"step": 1296
},
{
"epoch": 0.8255887969446213,
"grad_norm": 11.870978572789058,
"learning_rate": 9.097117038928414e-06,
"loss": 2.5459225177764893,
"step": 1297
},
{
"epoch": 0.8262253341820497,
"grad_norm": 14.051712105259883,
"learning_rate": 9.094992918790238e-06,
"loss": 2.7658138275146484,
"step": 1298
},
{
"epoch": 0.8268618714194781,
"grad_norm": 9.78623844647995,
"learning_rate": 9.092866551583436e-06,
"loss": 2.522017240524292,
"step": 1299
},
{
"epoch": 0.8274984086569064,
"grad_norm": 16.121189036643578,
"learning_rate": 9.090737938474825e-06,
"loss": 3.155287265777588,
"step": 1300
},
{
"epoch": 0.8281349458943348,
"grad_norm": 8.482916829059928,
"learning_rate": 9.08860708063245e-06,
"loss": 2.536132574081421,
"step": 1301
},
{
"epoch": 0.8287714831317632,
"grad_norm": 12.819378495105811,
"learning_rate": 9.086473979225588e-06,
"loss": 2.7831597328186035,
"step": 1302
},
{
"epoch": 0.8294080203691916,
"grad_norm": 12.130627293232624,
"learning_rate": 9.084338635424745e-06,
"loss": 3.048809051513672,
"step": 1303
},
{
"epoch": 0.83004455760662,
"grad_norm": 11.074352418134843,
"learning_rate": 9.082201050401666e-06,
"loss": 2.6596789360046387,
"step": 1304
},
{
"epoch": 0.8306810948440484,
"grad_norm": 8.317566117685402,
"learning_rate": 9.080061225329317e-06,
"loss": 2.4225730895996094,
"step": 1305
},
{
"epoch": 0.8313176320814768,
"grad_norm": 12.478378501589457,
"learning_rate": 9.077919161381894e-06,
"loss": 2.435547113418579,
"step": 1306
},
{
"epoch": 0.8319541693189052,
"grad_norm": 8.20518520638,
"learning_rate": 9.075774859734829e-06,
"loss": 2.624713659286499,
"step": 1307
},
{
"epoch": 0.8325907065563335,
"grad_norm": 13.114609407099252,
"learning_rate": 9.073628321564773e-06,
"loss": 2.7705373764038086,
"step": 1308
},
{
"epoch": 0.8332272437937619,
"grad_norm": 11.553709399956617,
"learning_rate": 9.07147954804961e-06,
"loss": 2.776597023010254,
"step": 1309
},
{
"epoch": 0.8338637810311903,
"grad_norm": 14.484349806596981,
"learning_rate": 9.069328540368448e-06,
"loss": 2.5158157348632812,
"step": 1310
},
{
"epoch": 0.8345003182686187,
"grad_norm": 9.077943384505767,
"learning_rate": 9.067175299701619e-06,
"loss": 2.854553699493408,
"step": 1311
},
{
"epoch": 0.8351368555060471,
"grad_norm": 20.600143979613403,
"learning_rate": 9.065019827230688e-06,
"loss": 2.805817127227783,
"step": 1312
},
{
"epoch": 0.8357733927434755,
"grad_norm": 20.94123910800333,
"learning_rate": 9.062862124138435e-06,
"loss": 3.1979176998138428,
"step": 1313
},
{
"epoch": 0.8364099299809039,
"grad_norm": 10.511034456544738,
"learning_rate": 9.060702191608873e-06,
"loss": 2.674525022506714,
"step": 1314
},
{
"epoch": 0.8370464672183323,
"grad_norm": 13.282101608961089,
"learning_rate": 9.058540030827228e-06,
"loss": 3.2750120162963867,
"step": 1315
},
{
"epoch": 0.8376830044557607,
"grad_norm": 12.468491160804797,
"learning_rate": 9.056375642979961e-06,
"loss": 2.586822032928467,
"step": 1316
},
{
"epoch": 0.838319541693189,
"grad_norm": 12.825356591572639,
"learning_rate": 9.054209029254746e-06,
"loss": 2.5680291652679443,
"step": 1317
},
{
"epoch": 0.8389560789306174,
"grad_norm": 7.0251047845826236,
"learning_rate": 9.05204019084048e-06,
"loss": 2.9343976974487305,
"step": 1318
},
{
"epoch": 0.8395926161680458,
"grad_norm": 10.35830665819943,
"learning_rate": 9.049869128927284e-06,
"loss": 2.5775201320648193,
"step": 1319
},
{
"epoch": 0.8402291534054742,
"grad_norm": 12.074121871920275,
"learning_rate": 9.047695844706496e-06,
"loss": 3.1281425952911377,
"step": 1320
},
{
"epoch": 0.8408656906429026,
"grad_norm": 13.46786151972881,
"learning_rate": 9.045520339370675e-06,
"loss": 2.9933152198791504,
"step": 1321
},
{
"epoch": 0.841502227880331,
"grad_norm": 9.191417658963479,
"learning_rate": 9.0433426141136e-06,
"loss": 2.0610406398773193,
"step": 1322
},
{
"epoch": 0.8421387651177594,
"grad_norm": 20.31098015489218,
"learning_rate": 9.041162670130262e-06,
"loss": 2.398113250732422,
"step": 1323
},
{
"epoch": 0.8427753023551878,
"grad_norm": 7.247621506671208,
"learning_rate": 9.038980508616877e-06,
"loss": 2.683847427368164,
"step": 1324
},
{
"epoch": 0.8434118395926161,
"grad_norm": 21.14208058029866,
"learning_rate": 9.036796130770876e-06,
"loss": 3.447007656097412,
"step": 1325
},
{
"epoch": 0.8440483768300445,
"grad_norm": 11.308599379337744,
"learning_rate": 9.034609537790901e-06,
"loss": 2.7181715965270996,
"step": 1326
},
{
"epoch": 0.8446849140674729,
"grad_norm": 10.468205576836482,
"learning_rate": 9.032420730876819e-06,
"loss": 3.347914934158325,
"step": 1327
},
{
"epoch": 0.8453214513049013,
"grad_norm": 13.696870519850068,
"learning_rate": 9.030229711229701e-06,
"loss": 2.550179958343506,
"step": 1328
},
{
"epoch": 0.8459579885423297,
"grad_norm": 9.140413413821953,
"learning_rate": 9.028036480051843e-06,
"loss": 2.393416404724121,
"step": 1329
},
{
"epoch": 0.8465945257797581,
"grad_norm": 12.087748946308038,
"learning_rate": 9.025841038546743e-06,
"loss": 2.280996799468994,
"step": 1330
},
{
"epoch": 0.8472310630171865,
"grad_norm": 7.168215986872914,
"learning_rate": 9.023643387919123e-06,
"loss": 3.248619318008423,
"step": 1331
},
{
"epoch": 0.8478676002546149,
"grad_norm": 13.387201526610566,
"learning_rate": 9.021443529374912e-06,
"loss": 2.639176368713379,
"step": 1332
},
{
"epoch": 0.8485041374920432,
"grad_norm": 18.594921898086263,
"learning_rate": 9.019241464121246e-06,
"loss": 2.147660255432129,
"step": 1333
},
{
"epoch": 0.8491406747294716,
"grad_norm": 37.31900502048071,
"learning_rate": 9.017037193366483e-06,
"loss": 4.2564802169799805,
"step": 1334
},
{
"epoch": 0.8497772119669,
"grad_norm": 15.112857114582322,
"learning_rate": 9.01483071832018e-06,
"loss": 3.1673173904418945,
"step": 1335
},
{
"epoch": 0.8504137492043284,
"grad_norm": 21.676381708582444,
"learning_rate": 9.01262204019311e-06,
"loss": 3.0029163360595703,
"step": 1336
},
{
"epoch": 0.8510502864417568,
"grad_norm": 14.870952033681172,
"learning_rate": 9.010411160197257e-06,
"loss": 2.3692140579223633,
"step": 1337
},
{
"epoch": 0.8516868236791852,
"grad_norm": 9.74821280635995,
"learning_rate": 9.008198079545805e-06,
"loss": 2.133089542388916,
"step": 1338
},
{
"epoch": 0.8523233609166136,
"grad_norm": 15.469018293158362,
"learning_rate": 9.00598279945315e-06,
"loss": 2.728011131286621,
"step": 1339
},
{
"epoch": 0.852959898154042,
"grad_norm": 8.185856482659961,
"learning_rate": 9.0037653211349e-06,
"loss": 2.749013662338257,
"step": 1340
},
{
"epoch": 0.8535964353914705,
"grad_norm": 19.98457423207181,
"learning_rate": 9.00154564580786e-06,
"loss": 3.011110782623291,
"step": 1341
},
{
"epoch": 0.8542329726288987,
"grad_norm": 9.592951858163861,
"learning_rate": 8.999323774690047e-06,
"loss": 2.5260725021362305,
"step": 1342
},
{
"epoch": 0.8548695098663271,
"grad_norm": 18.64522308186827,
"learning_rate": 8.99709970900068e-06,
"loss": 1.9496815204620361,
"step": 1343
},
{
"epoch": 0.8555060471037556,
"grad_norm": 12.076529092259971,
"learning_rate": 8.994873449960184e-06,
"loss": 3.127403736114502,
"step": 1344
},
{
"epoch": 0.856142584341184,
"grad_norm": 10.462164153573513,
"learning_rate": 8.992644998790185e-06,
"loss": 2.9761838912963867,
"step": 1345
},
{
"epoch": 0.8567791215786124,
"grad_norm": 12.99995423386628,
"learning_rate": 8.990414356713517e-06,
"loss": 3.0213937759399414,
"step": 1346
},
{
"epoch": 0.8574156588160408,
"grad_norm": 8.95455484422871,
"learning_rate": 8.98818152495421e-06,
"loss": 2.820939779281616,
"step": 1347
},
{
"epoch": 0.8580521960534692,
"grad_norm": 11.266491420640211,
"learning_rate": 8.985946504737498e-06,
"loss": 2.9036173820495605,
"step": 1348
},
{
"epoch": 0.8586887332908976,
"grad_norm": 34.454539446401014,
"learning_rate": 8.983709297289818e-06,
"loss": 2.1190078258514404,
"step": 1349
},
{
"epoch": 0.8593252705283259,
"grad_norm": 12.587663196042058,
"learning_rate": 8.981469903838806e-06,
"loss": 2.672821044921875,
"step": 1350
},
{
"epoch": 0.8599618077657543,
"grad_norm": 17.88028573907417,
"learning_rate": 8.979228325613294e-06,
"loss": 2.6679329872131348,
"step": 1351
},
{
"epoch": 0.8605983450031827,
"grad_norm": 8.982580712828911,
"learning_rate": 8.97698456384332e-06,
"loss": 2.5248546600341797,
"step": 1352
},
{
"epoch": 0.8612348822406111,
"grad_norm": 16.158320589562617,
"learning_rate": 8.974738619760112e-06,
"loss": 3.293686866760254,
"step": 1353
},
{
"epoch": 0.8618714194780395,
"grad_norm": 21.576743503101472,
"learning_rate": 8.972490494596103e-06,
"loss": 3.087799549102783,
"step": 1354
},
{
"epoch": 0.8625079567154679,
"grad_norm": 20.43473137721611,
"learning_rate": 8.970240189584917e-06,
"loss": 2.5691702365875244,
"step": 1355
},
{
"epoch": 0.8631444939528963,
"grad_norm": 13.286826836661218,
"learning_rate": 8.967987705961379e-06,
"loss": 2.7788045406341553,
"step": 1356
},
{
"epoch": 0.8637810311903247,
"grad_norm": 10.982787627190925,
"learning_rate": 8.965733044961503e-06,
"loss": 2.638690948486328,
"step": 1357
},
{
"epoch": 0.864417568427753,
"grad_norm": 21.56517749749914,
"learning_rate": 8.963476207822506e-06,
"loss": 2.9436588287353516,
"step": 1358
},
{
"epoch": 0.8650541056651814,
"grad_norm": 7.846050590025499,
"learning_rate": 8.961217195782794e-06,
"loss": 2.5019116401672363,
"step": 1359
},
{
"epoch": 0.8656906429026098,
"grad_norm": 8.88665701771191,
"learning_rate": 8.958956010081967e-06,
"loss": 2.64182186126709,
"step": 1360
},
{
"epoch": 0.8663271801400382,
"grad_norm": 7.9286584819096015,
"learning_rate": 8.956692651960817e-06,
"loss": 2.5709879398345947,
"step": 1361
},
{
"epoch": 0.8669637173774666,
"grad_norm": 35.852572977236775,
"learning_rate": 8.95442712266133e-06,
"loss": 3.238485813140869,
"step": 1362
},
{
"epoch": 0.867600254614895,
"grad_norm": 13.061886199348878,
"learning_rate": 8.952159423426685e-06,
"loss": 2.509033203125,
"step": 1363
},
{
"epoch": 0.8682367918523234,
"grad_norm": 9.757063645996611,
"learning_rate": 8.949889555501248e-06,
"loss": 2.698711395263672,
"step": 1364
},
{
"epoch": 0.8688733290897518,
"grad_norm": 15.904767375002612,
"learning_rate": 8.947617520130575e-06,
"loss": 2.8830275535583496,
"step": 1365
},
{
"epoch": 0.8695098663271802,
"grad_norm": 9.859340776191049,
"learning_rate": 8.945343318561415e-06,
"loss": 2.7333996295928955,
"step": 1366
},
{
"epoch": 0.8701464035646085,
"grad_norm": 29.90621729856709,
"learning_rate": 8.9430669520417e-06,
"loss": 2.312429666519165,
"step": 1367
},
{
"epoch": 0.8707829408020369,
"grad_norm": 15.774793322102777,
"learning_rate": 8.940788421820557e-06,
"loss": 1.8989139795303345,
"step": 1368
},
{
"epoch": 0.8714194780394653,
"grad_norm": 26.079386831668614,
"learning_rate": 8.938507729148297e-06,
"loss": 3.147024631500244,
"step": 1369
},
{
"epoch": 0.8720560152768937,
"grad_norm": 25.93833059043368,
"learning_rate": 8.936224875276415e-06,
"loss": 3.548795223236084,
"step": 1370
},
{
"epoch": 0.8726925525143221,
"grad_norm": 15.127123147983411,
"learning_rate": 8.933939861457594e-06,
"loss": 2.9894320964813232,
"step": 1371
},
{
"epoch": 0.8733290897517505,
"grad_norm": 14.870429368132742,
"learning_rate": 8.931652688945706e-06,
"loss": 2.587772846221924,
"step": 1372
},
{
"epoch": 0.8739656269891789,
"grad_norm": 11.875634881012301,
"learning_rate": 8.929363358995802e-06,
"loss": 2.8910446166992188,
"step": 1373
},
{
"epoch": 0.8746021642266073,
"grad_norm": 13.721192137682051,
"learning_rate": 8.927071872864119e-06,
"loss": 2.673334836959839,
"step": 1374
},
{
"epoch": 0.8752387014640356,
"grad_norm": 12.460130848575641,
"learning_rate": 8.924778231808075e-06,
"loss": 2.914884090423584,
"step": 1375
},
{
"epoch": 0.875875238701464,
"grad_norm": 14.042103142396336,
"learning_rate": 8.922482437086276e-06,
"loss": 2.707538366317749,
"step": 1376
},
{
"epoch": 0.8765117759388924,
"grad_norm": 15.981995867165395,
"learning_rate": 8.920184489958505e-06,
"loss": 2.903834104537964,
"step": 1377
},
{
"epoch": 0.8771483131763208,
"grad_norm": 8.218622342353424,
"learning_rate": 8.917884391685729e-06,
"loss": 2.9739201068878174,
"step": 1378
},
{
"epoch": 0.8777848504137492,
"grad_norm": 12.842525285476123,
"learning_rate": 8.915582143530091e-06,
"loss": 2.8159000873565674,
"step": 1379
},
{
"epoch": 0.8784213876511776,
"grad_norm": 9.770965768425329,
"learning_rate": 8.91327774675492e-06,
"loss": 2.6709249019622803,
"step": 1380
},
{
"epoch": 0.879057924888606,
"grad_norm": 10.943758929362236,
"learning_rate": 8.910971202624717e-06,
"loss": 2.720698833465576,
"step": 1381
},
{
"epoch": 0.8796944621260344,
"grad_norm": 11.14834211284623,
"learning_rate": 8.90866251240517e-06,
"loss": 2.7545647621154785,
"step": 1382
},
{
"epoch": 0.8803309993634627,
"grad_norm": 6.992842046540137,
"learning_rate": 8.906351677363133e-06,
"loss": 2.6223673820495605,
"step": 1383
},
{
"epoch": 0.8809675366008911,
"grad_norm": 9.969659175978776,
"learning_rate": 8.904038698766649e-06,
"loss": 2.7305006980895996,
"step": 1384
},
{
"epoch": 0.8816040738383195,
"grad_norm": 11.375758939160075,
"learning_rate": 8.90172357788493e-06,
"loss": 2.7022647857666016,
"step": 1385
},
{
"epoch": 0.8822406110757479,
"grad_norm": 15.765826665284663,
"learning_rate": 8.899406315988363e-06,
"loss": 2.983297109603882,
"step": 1386
},
{
"epoch": 0.8828771483131763,
"grad_norm": 8.208433575257724,
"learning_rate": 8.897086914348519e-06,
"loss": 3.147146224975586,
"step": 1387
},
{
"epoch": 0.8835136855506047,
"grad_norm": 33.796645109789594,
"learning_rate": 8.894765374238129e-06,
"loss": 3.396644115447998,
"step": 1388
},
{
"epoch": 0.8841502227880331,
"grad_norm": 8.069088660483963,
"learning_rate": 8.89244169693111e-06,
"loss": 2.0669808387756348,
"step": 1389
},
{
"epoch": 0.8847867600254615,
"grad_norm": 14.457108495239018,
"learning_rate": 8.890115883702541e-06,
"loss": 2.5612125396728516,
"step": 1390
},
{
"epoch": 0.8854232972628899,
"grad_norm": 10.12258697850429,
"learning_rate": 8.887787935828684e-06,
"loss": 2.816615104675293,
"step": 1391
},
{
"epoch": 0.8860598345003182,
"grad_norm": 18.87692819262475,
"learning_rate": 8.885457854586966e-06,
"loss": 3.1916136741638184,
"step": 1392
},
{
"epoch": 0.8866963717377466,
"grad_norm": 15.834949119750588,
"learning_rate": 8.883125641255983e-06,
"loss": 3.166013717651367,
"step": 1393
},
{
"epoch": 0.887332908975175,
"grad_norm": 8.248272140916804,
"learning_rate": 8.880791297115507e-06,
"loss": 2.665917158126831,
"step": 1394
},
{
"epoch": 0.8879694462126034,
"grad_norm": 7.997372015258678,
"learning_rate": 8.878454823446474e-06,
"loss": 3.2195849418640137,
"step": 1395
},
{
"epoch": 0.8886059834500318,
"grad_norm": 8.305528328608894,
"learning_rate": 8.87611622153099e-06,
"loss": 2.7519631385803223,
"step": 1396
},
{
"epoch": 0.8892425206874602,
"grad_norm": 10.925992673424062,
"learning_rate": 8.87377549265233e-06,
"loss": 3.0465550422668457,
"step": 1397
},
{
"epoch": 0.8898790579248886,
"grad_norm": 5.948482134120402,
"learning_rate": 8.871432638094934e-06,
"loss": 3.009675979614258,
"step": 1398
},
{
"epoch": 0.890515595162317,
"grad_norm": 11.127002967272093,
"learning_rate": 8.869087659144413e-06,
"loss": 2.685904026031494,
"step": 1399
},
{
"epoch": 0.8911521323997453,
"grad_norm": 8.97087685278395,
"learning_rate": 8.866740557087539e-06,
"loss": 2.706536293029785,
"step": 1400
},
{
"epoch": 0.8917886696371737,
"grad_norm": 12.335864123989294,
"learning_rate": 8.864391333212248e-06,
"loss": 2.7309699058532715,
"step": 1401
},
{
"epoch": 0.8924252068746021,
"grad_norm": 11.930723853868875,
"learning_rate": 8.862039988807647e-06,
"loss": 2.5690298080444336,
"step": 1402
},
{
"epoch": 0.8930617441120305,
"grad_norm": 16.887269252746883,
"learning_rate": 8.859686525164e-06,
"loss": 3.1849586963653564,
"step": 1403
},
{
"epoch": 0.893698281349459,
"grad_norm": 10.744619526984696,
"learning_rate": 8.857330943572737e-06,
"loss": 2.9309535026550293,
"step": 1404
},
{
"epoch": 0.8943348185868873,
"grad_norm": 10.942121115571394,
"learning_rate": 8.854973245326451e-06,
"loss": 2.506758451461792,
"step": 1405
},
{
"epoch": 0.8949713558243158,
"grad_norm": 10.889820092495796,
"learning_rate": 8.852613431718891e-06,
"loss": 3.1235971450805664,
"step": 1406
},
{
"epoch": 0.8956078930617442,
"grad_norm": 18.299621145317612,
"learning_rate": 8.850251504044975e-06,
"loss": 2.1713199615478516,
"step": 1407
},
{
"epoch": 0.8962444302991726,
"grad_norm": 8.906668963816784,
"learning_rate": 8.847887463600778e-06,
"loss": 2.947065830230713,
"step": 1408
},
{
"epoch": 0.8968809675366008,
"grad_norm": 13.573712364791167,
"learning_rate": 8.845521311683528e-06,
"loss": 3.0217905044555664,
"step": 1409
},
{
"epoch": 0.8975175047740293,
"grad_norm": 8.352908652813237,
"learning_rate": 8.84315304959162e-06,
"loss": 2.857410192489624,
"step": 1410
},
{
"epoch": 0.8981540420114577,
"grad_norm": 16.935545380125163,
"learning_rate": 8.840782678624604e-06,
"loss": 2.941894292831421,
"step": 1411
},
{
"epoch": 0.8987905792488861,
"grad_norm": 9.37896485158657,
"learning_rate": 8.838410200083188e-06,
"loss": 2.708009719848633,
"step": 1412
},
{
"epoch": 0.8994271164863145,
"grad_norm": 11.051573410175154,
"learning_rate": 8.836035615269231e-06,
"loss": 2.9529104232788086,
"step": 1413
},
{
"epoch": 0.9000636537237429,
"grad_norm": 9.180184437257068,
"learning_rate": 8.833658925485759e-06,
"loss": 2.0678229331970215,
"step": 1414
},
{
"epoch": 0.9007001909611713,
"grad_norm": 12.008331322969955,
"learning_rate": 8.83128013203694e-06,
"loss": 2.78902006149292,
"step": 1415
},
{
"epoch": 0.9013367281985997,
"grad_norm": 15.298097773896862,
"learning_rate": 8.82889923622811e-06,
"loss": 3.3059544563293457,
"step": 1416
},
{
"epoch": 0.901973265436028,
"grad_norm": 12.92119685958626,
"learning_rate": 8.826516239365744e-06,
"loss": 3.177778720855713,
"step": 1417
},
{
"epoch": 0.9026098026734564,
"grad_norm": 7.231262540738608,
"learning_rate": 8.824131142757482e-06,
"loss": 2.2111966609954834,
"step": 1418
},
{
"epoch": 0.9032463399108848,
"grad_norm": 30.231236682369715,
"learning_rate": 8.82174394771211e-06,
"loss": 4.152851104736328,
"step": 1419
},
{
"epoch": 0.9038828771483132,
"grad_norm": 6.955888883185339,
"learning_rate": 8.819354655539567e-06,
"loss": 2.9363958835601807,
"step": 1420
},
{
"epoch": 0.9045194143857416,
"grad_norm": 6.221227295523858,
"learning_rate": 8.816963267550943e-06,
"loss": 2.814230442047119,
"step": 1421
},
{
"epoch": 0.90515595162317,
"grad_norm": 20.576731164909663,
"learning_rate": 8.814569785058478e-06,
"loss": 3.2863783836364746,
"step": 1422
},
{
"epoch": 0.9057924888605984,
"grad_norm": 10.124190036592271,
"learning_rate": 8.812174209375561e-06,
"loss": 2.850348949432373,
"step": 1423
},
{
"epoch": 0.9064290260980268,
"grad_norm": 15.761874926578889,
"learning_rate": 8.809776541816728e-06,
"loss": 2.9629907608032227,
"step": 1424
},
{
"epoch": 0.9070655633354551,
"grad_norm": 11.721717574823254,
"learning_rate": 8.80737678369767e-06,
"loss": 3.1664376258850098,
"step": 1425
},
{
"epoch": 0.9077021005728835,
"grad_norm": 17.22977161945999,
"learning_rate": 8.804974936335213e-06,
"loss": 2.2064313888549805,
"step": 1426
},
{
"epoch": 0.9083386378103119,
"grad_norm": 10.830257301815648,
"learning_rate": 8.802571001047343e-06,
"loss": 2.7995364665985107,
"step": 1427
},
{
"epoch": 0.9089751750477403,
"grad_norm": 6.327442683683713,
"learning_rate": 8.80016497915318e-06,
"loss": 2.714388370513916,
"step": 1428
},
{
"epoch": 0.9096117122851687,
"grad_norm": 8.555210132299838,
"learning_rate": 8.797756871972994e-06,
"loss": 2.8293895721435547,
"step": 1429
},
{
"epoch": 0.9102482495225971,
"grad_norm": 13.3626090821256,
"learning_rate": 8.795346680828203e-06,
"loss": 2.70363712310791,
"step": 1430
},
{
"epoch": 0.9108847867600255,
"grad_norm": 9.989760346933297,
"learning_rate": 8.79293440704136e-06,
"loss": 2.878157138824463,
"step": 1431
},
{
"epoch": 0.9115213239974539,
"grad_norm": 17.574273760979565,
"learning_rate": 8.790520051936172e-06,
"loss": 3.03108811378479,
"step": 1432
},
{
"epoch": 0.9121578612348823,
"grad_norm": 8.686800566695476,
"learning_rate": 8.788103616837476e-06,
"loss": 2.6360678672790527,
"step": 1433
},
{
"epoch": 0.9127943984723106,
"grad_norm": 9.07875542348685,
"learning_rate": 8.78568510307126e-06,
"loss": 2.9197378158569336,
"step": 1434
},
{
"epoch": 0.913430935709739,
"grad_norm": 10.295645175866365,
"learning_rate": 8.783264511964646e-06,
"loss": 2.4602155685424805,
"step": 1435
},
{
"epoch": 0.9140674729471674,
"grad_norm": 8.86363992461272,
"learning_rate": 8.7808418448459e-06,
"loss": 2.739168643951416,
"step": 1436
},
{
"epoch": 0.9147040101845958,
"grad_norm": 17.086921316775623,
"learning_rate": 8.778417103044423e-06,
"loss": 2.7610721588134766,
"step": 1437
},
{
"epoch": 0.9153405474220242,
"grad_norm": 10.019750837918233,
"learning_rate": 8.775990287890762e-06,
"loss": 2.855961322784424,
"step": 1438
},
{
"epoch": 0.9159770846594526,
"grad_norm": 12.118780675229214,
"learning_rate": 8.773561400716595e-06,
"loss": 2.713207483291626,
"step": 1439
},
{
"epoch": 0.916613621896881,
"grad_norm": 23.77219006288666,
"learning_rate": 8.771130442854739e-06,
"loss": 3.4736533164978027,
"step": 1440
},
{
"epoch": 0.9172501591343094,
"grad_norm": 16.064787393783657,
"learning_rate": 8.768697415639145e-06,
"loss": 1.408898949623108,
"step": 1441
},
{
"epoch": 0.9178866963717377,
"grad_norm": 21.290176402581753,
"learning_rate": 8.766262320404905e-06,
"loss": 2.7757620811462402,
"step": 1442
},
{
"epoch": 0.9185232336091661,
"grad_norm": 6.002070652945452,
"learning_rate": 8.76382515848824e-06,
"loss": 1.9027442932128906,
"step": 1443
},
{
"epoch": 0.9191597708465945,
"grad_norm": 21.26932231262647,
"learning_rate": 8.761385931226512e-06,
"loss": 3.4795098304748535,
"step": 1444
},
{
"epoch": 0.9197963080840229,
"grad_norm": 11.06161342307327,
"learning_rate": 8.758944639958205e-06,
"loss": 2.4269492626190186,
"step": 1445
},
{
"epoch": 0.9204328453214513,
"grad_norm": 11.03285367018562,
"learning_rate": 8.75650128602295e-06,
"loss": 2.875896453857422,
"step": 1446
},
{
"epoch": 0.9210693825588797,
"grad_norm": 10.287848138362884,
"learning_rate": 8.754055870761496e-06,
"loss": 2.742246389389038,
"step": 1447
},
{
"epoch": 0.9217059197963081,
"grad_norm": 12.590892977618639,
"learning_rate": 8.751608395515736e-06,
"loss": 2.545811891555786,
"step": 1448
},
{
"epoch": 0.9223424570337365,
"grad_norm": 9.673003668850864,
"learning_rate": 8.749158861628681e-06,
"loss": 2.839231014251709,
"step": 1449
},
{
"epoch": 0.9229789942711648,
"grad_norm": 10.17123811958482,
"learning_rate": 8.746707270444479e-06,
"loss": 3.0635018348693848,
"step": 1450
},
{
"epoch": 0.9236155315085932,
"grad_norm": 8.243523500914826,
"learning_rate": 8.744253623308407e-06,
"loss": 2.7437732219696045,
"step": 1451
},
{
"epoch": 0.9242520687460216,
"grad_norm": 17.156068983624365,
"learning_rate": 8.74179792156687e-06,
"loss": 2.2365593910217285,
"step": 1452
},
{
"epoch": 0.92488860598345,
"grad_norm": 9.491232000478549,
"learning_rate": 8.739340166567397e-06,
"loss": 2.858064651489258,
"step": 1453
},
{
"epoch": 0.9255251432208784,
"grad_norm": 11.029283599258722,
"learning_rate": 8.736880359658644e-06,
"loss": 2.6800594329833984,
"step": 1454
},
{
"epoch": 0.9261616804583068,
"grad_norm": 15.387910212162224,
"learning_rate": 8.734418502190398e-06,
"loss": 2.687188148498535,
"step": 1455
},
{
"epoch": 0.9267982176957352,
"grad_norm": 12.536653251071481,
"learning_rate": 8.731954595513567e-06,
"loss": 2.1078858375549316,
"step": 1456
},
{
"epoch": 0.9274347549331636,
"grad_norm": 9.031328925760082,
"learning_rate": 8.729488640980184e-06,
"loss": 2.6077308654785156,
"step": 1457
},
{
"epoch": 0.928071292170592,
"grad_norm": 12.265888896157241,
"learning_rate": 8.727020639943408e-06,
"loss": 2.6413002014160156,
"step": 1458
},
{
"epoch": 0.9287078294080203,
"grad_norm": 20.994279033111983,
"learning_rate": 8.724550593757515e-06,
"loss": 2.8199195861816406,
"step": 1459
},
{
"epoch": 0.9293443666454487,
"grad_norm": 8.39466199019947,
"learning_rate": 8.722078503777913e-06,
"loss": 2.7334635257720947,
"step": 1460
},
{
"epoch": 0.9299809038828771,
"grad_norm": 32.385117522692184,
"learning_rate": 8.71960437136112e-06,
"loss": 2.6803812980651855,
"step": 1461
},
{
"epoch": 0.9306174411203055,
"grad_norm": 13.34959832293905,
"learning_rate": 8.717128197864786e-06,
"loss": 2.946092128753662,
"step": 1462
},
{
"epoch": 0.9312539783577339,
"grad_norm": 6.930835219331372,
"learning_rate": 8.714649984647671e-06,
"loss": 2.860358715057373,
"step": 1463
},
{
"epoch": 0.9318905155951623,
"grad_norm": 13.35982857598474,
"learning_rate": 8.712169733069661e-06,
"loss": 3.013518810272217,
"step": 1464
},
{
"epoch": 0.9325270528325907,
"grad_norm": 21.64470981198118,
"learning_rate": 8.70968744449176e-06,
"loss": 2.914074182510376,
"step": 1465
},
{
"epoch": 0.9331635900700191,
"grad_norm": 10.043908040113202,
"learning_rate": 8.707203120276088e-06,
"loss": 2.9579620361328125,
"step": 1466
},
{
"epoch": 0.9338001273074474,
"grad_norm": 12.852889894909499,
"learning_rate": 8.704716761785881e-06,
"loss": 2.7109644412994385,
"step": 1467
},
{
"epoch": 0.9344366645448758,
"grad_norm": 20.892577897821393,
"learning_rate": 8.702228370385491e-06,
"loss": 2.1865947246551514,
"step": 1468
},
{
"epoch": 0.9350732017823042,
"grad_norm": 5.255934140558565,
"learning_rate": 8.699737947440389e-06,
"loss": 1.8131765127182007,
"step": 1469
},
{
"epoch": 0.9357097390197326,
"grad_norm": 10.931141906832528,
"learning_rate": 8.697245494317161e-06,
"loss": 2.68973970413208,
"step": 1470
},
{
"epoch": 0.936346276257161,
"grad_norm": 7.480666948740592,
"learning_rate": 8.6947510123835e-06,
"loss": 2.608941078186035,
"step": 1471
},
{
"epoch": 0.9369828134945895,
"grad_norm": 9.197846809716948,
"learning_rate": 8.692254503008221e-06,
"loss": 2.719707489013672,
"step": 1472
},
{
"epoch": 0.9376193507320179,
"grad_norm": 17.65088384619828,
"learning_rate": 8.689755967561248e-06,
"loss": 3.246102809906006,
"step": 1473
},
{
"epoch": 0.9382558879694463,
"grad_norm": 6.859171370041902,
"learning_rate": 8.687255407413612e-06,
"loss": 2.854102373123169,
"step": 1474
},
{
"epoch": 0.9388924252068745,
"grad_norm": 15.917817820678835,
"learning_rate": 8.684752823937466e-06,
"loss": 2.3989462852478027,
"step": 1475
},
{
"epoch": 0.939528962444303,
"grad_norm": 13.405391328912662,
"learning_rate": 8.682248218506061e-06,
"loss": 2.702059745788574,
"step": 1476
},
{
"epoch": 0.9401654996817314,
"grad_norm": 11.22720816995638,
"learning_rate": 8.679741592493766e-06,
"loss": 2.9737422466278076,
"step": 1477
},
{
"epoch": 0.9408020369191598,
"grad_norm": 24.210110695277212,
"learning_rate": 8.677232947276056e-06,
"loss": 2.9821133613586426,
"step": 1478
},
{
"epoch": 0.9414385741565882,
"grad_norm": 9.828711134527046,
"learning_rate": 8.674722284229514e-06,
"loss": 3.1264657974243164,
"step": 1479
},
{
"epoch": 0.9420751113940166,
"grad_norm": 12.506343891248635,
"learning_rate": 8.672209604731828e-06,
"loss": 2.760064125061035,
"step": 1480
},
{
"epoch": 0.942711648631445,
"grad_norm": 19.506236940040612,
"learning_rate": 8.669694910161799e-06,
"loss": 2.769021511077881,
"step": 1481
},
{
"epoch": 0.9433481858688734,
"grad_norm": 17.374370719201394,
"learning_rate": 8.667178201899326e-06,
"loss": 3.542201042175293,
"step": 1482
},
{
"epoch": 0.9439847231063018,
"grad_norm": 13.011819122669904,
"learning_rate": 8.66465948132542e-06,
"loss": 2.921635150909424,
"step": 1483
},
{
"epoch": 0.9446212603437301,
"grad_norm": 12.177558375703372,
"learning_rate": 8.662138749822191e-06,
"loss": 2.766899585723877,
"step": 1484
},
{
"epoch": 0.9452577975811585,
"grad_norm": 21.145265923147296,
"learning_rate": 8.659616008772854e-06,
"loss": 2.9893975257873535,
"step": 1485
},
{
"epoch": 0.9458943348185869,
"grad_norm": 31.766007666900276,
"learning_rate": 8.657091259561725e-06,
"loss": 2.5439658164978027,
"step": 1486
},
{
"epoch": 0.9465308720560153,
"grad_norm": 40.732855528912786,
"learning_rate": 8.654564503574228e-06,
"loss": 2.271054983139038,
"step": 1487
},
{
"epoch": 0.9471674092934437,
"grad_norm": 12.740859681461453,
"learning_rate": 8.65203574219688e-06,
"loss": 2.9355921745300293,
"step": 1488
},
{
"epoch": 0.9478039465308721,
"grad_norm": 14.131184418169383,
"learning_rate": 8.649504976817306e-06,
"loss": 2.431492567062378,
"step": 1489
},
{
"epoch": 0.9484404837683005,
"grad_norm": 12.272107098901829,
"learning_rate": 8.646972208824225e-06,
"loss": 2.5882158279418945,
"step": 1490
},
{
"epoch": 0.9490770210057289,
"grad_norm": 10.001110098598382,
"learning_rate": 8.64443743960746e-06,
"loss": 2.8265151977539062,
"step": 1491
},
{
"epoch": 0.9497135582431572,
"grad_norm": 7.870281496081134,
"learning_rate": 8.641900670557925e-06,
"loss": 2.6009325981140137,
"step": 1492
},
{
"epoch": 0.9503500954805856,
"grad_norm": 6.606186180448112,
"learning_rate": 8.639361903067638e-06,
"loss": 2.490417003631592,
"step": 1493
},
{
"epoch": 0.950986632718014,
"grad_norm": 16.32749021566751,
"learning_rate": 8.636821138529712e-06,
"loss": 2.382847547531128,
"step": 1494
},
{
"epoch": 0.9516231699554424,
"grad_norm": 18.58714970509347,
"learning_rate": 8.634278378338355e-06,
"loss": 1.9837441444396973,
"step": 1495
},
{
"epoch": 0.9522597071928708,
"grad_norm": 9.236163048934394,
"learning_rate": 8.631733623888867e-06,
"loss": 2.772226095199585,
"step": 1496
},
{
"epoch": 0.9528962444302992,
"grad_norm": 8.323499539185473,
"learning_rate": 8.629186876577648e-06,
"loss": 3.0804758071899414,
"step": 1497
},
{
"epoch": 0.9535327816677276,
"grad_norm": 13.627861487868559,
"learning_rate": 8.62663813780219e-06,
"loss": 2.870920181274414,
"step": 1498
},
{
"epoch": 0.954169318905156,
"grad_norm": 8.187420035976405,
"learning_rate": 8.624087408961075e-06,
"loss": 2.554389476776123,
"step": 1499
},
{
"epoch": 0.9548058561425843,
"grad_norm": 26.241958745271468,
"learning_rate": 8.621534691453981e-06,
"loss": 2.704555034637451,
"step": 1500
},
{
"epoch": 0.9554423933800127,
"grad_norm": 7.5367818384376175,
"learning_rate": 8.618979986681673e-06,
"loss": 2.818161725997925,
"step": 1501
},
{
"epoch": 0.9560789306174411,
"grad_norm": 8.177986099961563,
"learning_rate": 8.616423296046008e-06,
"loss": 2.6251158714294434,
"step": 1502
},
{
"epoch": 0.9567154678548695,
"grad_norm": 13.83128265339274,
"learning_rate": 8.613864620949937e-06,
"loss": 2.8486170768737793,
"step": 1503
},
{
"epoch": 0.9573520050922979,
"grad_norm": 9.839905952517228,
"learning_rate": 8.61130396279749e-06,
"loss": 2.5475683212280273,
"step": 1504
},
{
"epoch": 0.9579885423297263,
"grad_norm": 12.113119789871057,
"learning_rate": 8.608741322993798e-06,
"loss": 2.8252108097076416,
"step": 1505
},
{
"epoch": 0.9586250795671547,
"grad_norm": 8.472124174735304,
"learning_rate": 8.60617670294507e-06,
"loss": 3.012871265411377,
"step": 1506
},
{
"epoch": 0.9592616168045831,
"grad_norm": 11.311776521607479,
"learning_rate": 8.603610104058605e-06,
"loss": 2.7057697772979736,
"step": 1507
},
{
"epoch": 0.9598981540420115,
"grad_norm": 11.951505741269612,
"learning_rate": 8.601041527742787e-06,
"loss": 3.0853965282440186,
"step": 1508
},
{
"epoch": 0.9605346912794398,
"grad_norm": 11.891976254889936,
"learning_rate": 8.598470975407084e-06,
"loss": 2.3195412158966064,
"step": 1509
},
{
"epoch": 0.9611712285168682,
"grad_norm": 14.493443942962644,
"learning_rate": 8.595898448462053e-06,
"loss": 2.4563865661621094,
"step": 1510
},
{
"epoch": 0.9618077657542966,
"grad_norm": 20.022326730150414,
"learning_rate": 8.593323948319327e-06,
"loss": 3.090728759765625,
"step": 1511
},
{
"epoch": 0.962444302991725,
"grad_norm": 8.803355787881419,
"learning_rate": 8.59074747639163e-06,
"loss": 2.791250467300415,
"step": 1512
},
{
"epoch": 0.9630808402291534,
"grad_norm": 9.726485152075368,
"learning_rate": 8.588169034092761e-06,
"loss": 2.771373748779297,
"step": 1513
},
{
"epoch": 0.9637173774665818,
"grad_norm": 17.172151469542165,
"learning_rate": 8.585588622837606e-06,
"loss": 2.77121901512146,
"step": 1514
},
{
"epoch": 0.9643539147040102,
"grad_norm": 10.659171167179752,
"learning_rate": 8.583006244042126e-06,
"loss": 2.3193166255950928,
"step": 1515
},
{
"epoch": 0.9649904519414386,
"grad_norm": 7.045433564971674,
"learning_rate": 8.580421899123365e-06,
"loss": 2.8459174633026123,
"step": 1516
},
{
"epoch": 0.9656269891788669,
"grad_norm": 7.611562545836365,
"learning_rate": 8.577835589499448e-06,
"loss": 2.073949098587036,
"step": 1517
},
{
"epoch": 0.9662635264162953,
"grad_norm": 9.501402373558316,
"learning_rate": 8.575247316589573e-06,
"loss": 2.8364038467407227,
"step": 1518
},
{
"epoch": 0.9669000636537237,
"grad_norm": 10.663972357593154,
"learning_rate": 8.572657081814015e-06,
"loss": 2.4350342750549316,
"step": 1519
},
{
"epoch": 0.9675366008911521,
"grad_norm": 7.670157567002363,
"learning_rate": 8.570064886594132e-06,
"loss": 2.678783416748047,
"step": 1520
},
{
"epoch": 0.9681731381285805,
"grad_norm": 12.829595385967687,
"learning_rate": 8.567470732352354e-06,
"loss": 2.789569854736328,
"step": 1521
},
{
"epoch": 0.9688096753660089,
"grad_norm": 14.896042959437638,
"learning_rate": 8.564874620512184e-06,
"loss": 2.594637870788574,
"step": 1522
},
{
"epoch": 0.9694462126034373,
"grad_norm": 7.643925149013311,
"learning_rate": 8.562276552498201e-06,
"loss": 2.6070942878723145,
"step": 1523
},
{
"epoch": 0.9700827498408657,
"grad_norm": 11.0729438128728,
"learning_rate": 8.55967652973606e-06,
"loss": 3.331258535385132,
"step": 1524
},
{
"epoch": 0.9707192870782941,
"grad_norm": 10.651256199318965,
"learning_rate": 8.557074553652483e-06,
"loss": 4.254055976867676,
"step": 1525
},
{
"epoch": 0.9713558243157224,
"grad_norm": 12.234073637186876,
"learning_rate": 8.554470625675271e-06,
"loss": 2.9486451148986816,
"step": 1526
},
{
"epoch": 0.9719923615531508,
"grad_norm": 11.010439780773314,
"learning_rate": 8.551864747233288e-06,
"loss": 2.861355781555176,
"step": 1527
},
{
"epoch": 0.9726288987905792,
"grad_norm": 12.227179490251034,
"learning_rate": 8.549256919756475e-06,
"loss": 3.217989921569824,
"step": 1528
},
{
"epoch": 0.9732654360280076,
"grad_norm": 10.441061006667185,
"learning_rate": 8.546647144675837e-06,
"loss": 2.611697196960449,
"step": 1529
},
{
"epoch": 0.973901973265436,
"grad_norm": 7.047109217495742,
"learning_rate": 8.544035423423455e-06,
"loss": 2.819429874420166,
"step": 1530
},
{
"epoch": 0.9745385105028644,
"grad_norm": 14.928594244386037,
"learning_rate": 8.54142175743247e-06,
"loss": 2.841555118560791,
"step": 1531
},
{
"epoch": 0.9751750477402928,
"grad_norm": 15.51848853672532,
"learning_rate": 8.538806148137096e-06,
"loss": 2.815037250518799,
"step": 1532
},
{
"epoch": 0.9758115849777212,
"grad_norm": 11.219557094986436,
"learning_rate": 8.53618859697261e-06,
"loss": 2.755141019821167,
"step": 1533
},
{
"epoch": 0.9764481222151495,
"grad_norm": 9.756635768103603,
"learning_rate": 8.533569105375357e-06,
"loss": 2.5859339237213135,
"step": 1534
},
{
"epoch": 0.9770846594525779,
"grad_norm": 16.576216282836434,
"learning_rate": 8.530947674782741e-06,
"loss": 1.9348076581954956,
"step": 1535
},
{
"epoch": 0.9777211966900063,
"grad_norm": 9.797743250760059,
"learning_rate": 8.528324306633242e-06,
"loss": 2.7046377658843994,
"step": 1536
},
{
"epoch": 0.9783577339274347,
"grad_norm": 23.995662617500987,
"learning_rate": 8.525699002366387e-06,
"loss": 2.7023534774780273,
"step": 1537
},
{
"epoch": 0.9789942711648632,
"grad_norm": 10.395050557255956,
"learning_rate": 8.523071763422783e-06,
"loss": 2.550647735595703,
"step": 1538
},
{
"epoch": 0.9796308084022916,
"grad_norm": 9.417150815396226,
"learning_rate": 8.520442591244082e-06,
"loss": 2.7410709857940674,
"step": 1539
},
{
"epoch": 0.98026734563972,
"grad_norm": 8.895620373795214,
"learning_rate": 8.517811487273006e-06,
"loss": 2.1507298946380615,
"step": 1540
},
{
"epoch": 0.9809038828771484,
"grad_norm": 12.658307430881369,
"learning_rate": 8.515178452953341e-06,
"loss": 2.679276943206787,
"step": 1541
},
{
"epoch": 0.9815404201145767,
"grad_norm": 16.42879642738362,
"learning_rate": 8.512543489729921e-06,
"loss": 3.140566110610962,
"step": 1542
},
{
"epoch": 0.982176957352005,
"grad_norm": 8.84247180780308,
"learning_rate": 8.509906599048645e-06,
"loss": 2.7825236320495605,
"step": 1543
},
{
"epoch": 0.9828134945894335,
"grad_norm": 17.36356516865007,
"learning_rate": 8.507267782356471e-06,
"loss": 2.2732324600219727,
"step": 1544
},
{
"epoch": 0.9834500318268619,
"grad_norm": 29.981262242077374,
"learning_rate": 8.504627041101414e-06,
"loss": 3.3028225898742676,
"step": 1545
},
{
"epoch": 0.9840865690642903,
"grad_norm": 9.68713457137553,
"learning_rate": 8.501984376732535e-06,
"loss": 2.693692207336426,
"step": 1546
},
{
"epoch": 0.9847231063017187,
"grad_norm": 6.352198735843667,
"learning_rate": 8.499339790699967e-06,
"loss": 2.297257900238037,
"step": 1547
},
{
"epoch": 0.9853596435391471,
"grad_norm": 11.840233932732097,
"learning_rate": 8.496693284454882e-06,
"loss": 2.068459987640381,
"step": 1548
},
{
"epoch": 0.9859961807765755,
"grad_norm": 6.504503565462898,
"learning_rate": 8.494044859449518e-06,
"loss": 2.554231643676758,
"step": 1549
},
{
"epoch": 0.9866327180140039,
"grad_norm": 16.154139207848132,
"learning_rate": 8.491394517137153e-06,
"loss": 2.6632418632507324,
"step": 1550
},
{
"epoch": 0.9872692552514322,
"grad_norm": 8.142231355844261,
"learning_rate": 8.488742258972132e-06,
"loss": 2.740466356277466,
"step": 1551
},
{
"epoch": 0.9879057924888606,
"grad_norm": 9.893905854679373,
"learning_rate": 8.486088086409838e-06,
"loss": 2.541133403778076,
"step": 1552
},
{
"epoch": 0.988542329726289,
"grad_norm": 8.076922941798173,
"learning_rate": 8.483432000906715e-06,
"loss": 2.548953056335449,
"step": 1553
},
{
"epoch": 0.9891788669637174,
"grad_norm": 13.7974032727787,
"learning_rate": 8.480774003920247e-06,
"loss": 2.322848320007324,
"step": 1554
},
{
"epoch": 0.9898154042011458,
"grad_norm": 13.016432084972447,
"learning_rate": 8.478114096908974e-06,
"loss": 2.960254430770874,
"step": 1555
},
{
"epoch": 0.9904519414385742,
"grad_norm": 10.281067028596047,
"learning_rate": 8.475452281332484e-06,
"loss": 2.9151744842529297,
"step": 1556
},
{
"epoch": 0.9910884786760026,
"grad_norm": 13.783399482618753,
"learning_rate": 8.472788558651405e-06,
"loss": 2.9045119285583496,
"step": 1557
},
{
"epoch": 0.991725015913431,
"grad_norm": 8.37074340276371,
"learning_rate": 8.47012293032742e-06,
"loss": 1.4143991470336914,
"step": 1558
},
{
"epoch": 0.9923615531508593,
"grad_norm": 7.656752111840836,
"learning_rate": 8.467455397823254e-06,
"loss": 2.999601364135742,
"step": 1559
},
{
"epoch": 0.9929980903882877,
"grad_norm": 8.887901058027566,
"learning_rate": 8.464785962602678e-06,
"loss": 2.7708587646484375,
"step": 1560
},
{
"epoch": 0.9936346276257161,
"grad_norm": 9.58320143577911,
"learning_rate": 8.462114626130503e-06,
"loss": 3.1646625995635986,
"step": 1561
},
{
"epoch": 0.9942711648631445,
"grad_norm": 15.311980878917554,
"learning_rate": 8.459441389872589e-06,
"loss": 2.4998202323913574,
"step": 1562
},
{
"epoch": 0.9949077021005729,
"grad_norm": 18.622779764340642,
"learning_rate": 8.456766255295837e-06,
"loss": 2.445733070373535,
"step": 1563
},
{
"epoch": 0.9955442393380013,
"grad_norm": 29.80278794741722,
"learning_rate": 8.454089223868186e-06,
"loss": 2.949950695037842,
"step": 1564
},
{
"epoch": 0.9961807765754297,
"grad_norm": 8.981348515972892,
"learning_rate": 8.451410297058623e-06,
"loss": 2.407602548599243,
"step": 1565
},
{
"epoch": 0.9968173138128581,
"grad_norm": 10.86318288054507,
"learning_rate": 8.448729476337166e-06,
"loss": 1.6942147016525269,
"step": 1566
},
{
"epoch": 0.9974538510502864,
"grad_norm": 6.849075208924991,
"learning_rate": 8.446046763174877e-06,
"loss": 2.9985971450805664,
"step": 1567
},
{
"epoch": 0.9980903882877148,
"grad_norm": 19.64826633587671,
"learning_rate": 8.443362159043862e-06,
"loss": 2.626286029815674,
"step": 1568
},
{
"epoch": 0.9987269255251432,
"grad_norm": 12.44239321093429,
"learning_rate": 8.440675665417252e-06,
"loss": 2.616281032562256,
"step": 1569
},
{
"epoch": 0.9993634627625716,
"grad_norm": 5.899105846398005,
"learning_rate": 8.437987283769226e-06,
"loss": 2.447068452835083,
"step": 1570
},
{
"epoch": 1.0,
"grad_norm": 27.94940618217712,
"learning_rate": 8.435297015574993e-06,
"loss": 2.3542513847351074,
"step": 1571
},
{
"epoch": 1.0006365372374284,
"grad_norm": 15.685358641118798,
"learning_rate": 8.432604862310803e-06,
"loss": 1.9478979110717773,
"step": 1572
},
{
"epoch": 1.0012730744748568,
"grad_norm": 8.85788052553538,
"learning_rate": 8.42991082545393e-06,
"loss": 2.1175429821014404,
"step": 1573
},
{
"epoch": 1.0019096117122852,
"grad_norm": 6.592589839240197,
"learning_rate": 8.427214906482693e-06,
"loss": 1.3762362003326416,
"step": 1574
},
{
"epoch": 1.0025461489497136,
"grad_norm": 8.129854163091727,
"learning_rate": 8.424517106876436e-06,
"loss": 1.7788302898406982,
"step": 1575
},
{
"epoch": 1.003182686187142,
"grad_norm": 14.092814327955969,
"learning_rate": 8.42181742811554e-06,
"loss": 1.1838836669921875,
"step": 1576
},
{
"epoch": 1.0038192234245704,
"grad_norm": 12.036099446001996,
"learning_rate": 8.419115871681414e-06,
"loss": 1.7293897867202759,
"step": 1577
},
{
"epoch": 1.0044557606619988,
"grad_norm": 9.88203344495813,
"learning_rate": 8.416412439056498e-06,
"loss": 1.3909540176391602,
"step": 1578
},
{
"epoch": 1.0050922978994272,
"grad_norm": 12.304474894364937,
"learning_rate": 8.413707131724263e-06,
"loss": 1.5465770959854126,
"step": 1579
},
{
"epoch": 1.0057288351368554,
"grad_norm": 9.143181239083816,
"learning_rate": 8.410999951169206e-06,
"loss": 1.8051213026046753,
"step": 1580
},
{
"epoch": 1.0063653723742838,
"grad_norm": 14.138006600362315,
"learning_rate": 8.408290898876856e-06,
"loss": 1.6632486581802368,
"step": 1581
},
{
"epoch": 1.0070019096117122,
"grad_norm": 18.78978923745861,
"learning_rate": 8.405579976333764e-06,
"loss": 2.232478141784668,
"step": 1582
},
{
"epoch": 1.0076384468491406,
"grad_norm": 9.46237728283357,
"learning_rate": 8.40286718502751e-06,
"loss": 1.7277331352233887,
"step": 1583
},
{
"epoch": 1.008274984086569,
"grad_norm": 13.544314021896062,
"learning_rate": 8.400152526446701e-06,
"loss": 1.2284635305404663,
"step": 1584
},
{
"epoch": 1.0089115213239974,
"grad_norm": 13.307320270591866,
"learning_rate": 8.397436002080967e-06,
"loss": 1.753080129623413,
"step": 1585
},
{
"epoch": 1.0095480585614258,
"grad_norm": 8.320353525601858,
"learning_rate": 8.39471761342096e-06,
"loss": 2.0175912380218506,
"step": 1586
},
{
"epoch": 1.0101845957988542,
"grad_norm": 8.850243754059989,
"learning_rate": 8.391997361958358e-06,
"loss": 1.502495527267456,
"step": 1587
},
{
"epoch": 1.0108211330362826,
"grad_norm": 12.157825237703662,
"learning_rate": 8.389275249185859e-06,
"loss": 1.617289423942566,
"step": 1588
},
{
"epoch": 1.011457670273711,
"grad_norm": 12.327022025899392,
"learning_rate": 8.386551276597186e-06,
"loss": 0.9483035206794739,
"step": 1589
},
{
"epoch": 1.0120942075111394,
"grad_norm": 12.697210098026716,
"learning_rate": 8.383825445687078e-06,
"loss": 1.5856382846832275,
"step": 1590
},
{
"epoch": 1.0127307447485678,
"grad_norm": 13.618734707796046,
"learning_rate": 8.381097757951294e-06,
"loss": 1.6918671131134033,
"step": 1591
},
{
"epoch": 1.0133672819859962,
"grad_norm": 8.663336283678763,
"learning_rate": 8.378368214886614e-06,
"loss": 1.5759856700897217,
"step": 1592
},
{
"epoch": 1.0140038192234246,
"grad_norm": 12.74485422998351,
"learning_rate": 8.375636817990837e-06,
"loss": 1.5827107429504395,
"step": 1593
},
{
"epoch": 1.014640356460853,
"grad_norm": 12.249941353369072,
"learning_rate": 8.372903568762779e-06,
"loss": 1.51044762134552,
"step": 1594
},
{
"epoch": 1.0152768936982814,
"grad_norm": 12.163581079496975,
"learning_rate": 8.370168468702269e-06,
"loss": 1.3908805847167969,
"step": 1595
},
{
"epoch": 1.0159134309357096,
"grad_norm": 12.64406261605538,
"learning_rate": 8.367431519310154e-06,
"loss": 1.3298909664154053,
"step": 1596
},
{
"epoch": 1.016549968173138,
"grad_norm": 7.839462555287826,
"learning_rate": 8.364692722088297e-06,
"loss": 1.5802236795425415,
"step": 1597
},
{
"epoch": 1.0171865054105664,
"grad_norm": 8.839530298728409,
"learning_rate": 8.361952078539574e-06,
"loss": 1.6365337371826172,
"step": 1598
},
{
"epoch": 1.0178230426479948,
"grad_norm": 15.198183689244685,
"learning_rate": 8.359209590167874e-06,
"loss": 2.3807125091552734,
"step": 1599
},
{
"epoch": 1.0184595798854232,
"grad_norm": 7.8077619973182735,
"learning_rate": 8.356465258478095e-06,
"loss": 1.4906675815582275,
"step": 1600
},
{
"epoch": 1.0190961171228516,
"grad_norm": 12.662926431534332,
"learning_rate": 8.353719084976152e-06,
"loss": 1.9658763408660889,
"step": 1601
},
{
"epoch": 1.01973265436028,
"grad_norm": 9.823848422637573,
"learning_rate": 8.350971071168968e-06,
"loss": 1.9681274890899658,
"step": 1602
},
{
"epoch": 1.0203691915977084,
"grad_norm": 10.064730309571255,
"learning_rate": 8.348221218564477e-06,
"loss": 1.8324413299560547,
"step": 1603
},
{
"epoch": 1.0210057288351368,
"grad_norm": 11.942685996288617,
"learning_rate": 8.34546952867162e-06,
"loss": 1.2514326572418213,
"step": 1604
},
{
"epoch": 1.0216422660725653,
"grad_norm": 10.182151962700415,
"learning_rate": 8.34271600300035e-06,
"loss": 1.5857393741607666,
"step": 1605
},
{
"epoch": 1.0222788033099937,
"grad_norm": 10.063217245392764,
"learning_rate": 8.33996064306162e-06,
"loss": 1.670034646987915,
"step": 1606
},
{
"epoch": 1.022915340547422,
"grad_norm": 46.41291108988851,
"learning_rate": 8.337203450367396e-06,
"loss": 1.8335425853729248,
"step": 1607
},
{
"epoch": 1.0235518777848505,
"grad_norm": 10.28045855353841,
"learning_rate": 8.33444442643065e-06,
"loss": 1.368062138557434,
"step": 1608
},
{
"epoch": 1.0241884150222789,
"grad_norm": 7.572962609232563,
"learning_rate": 8.331683572765355e-06,
"loss": 1.1756280660629272,
"step": 1609
},
{
"epoch": 1.0248249522597073,
"grad_norm": 12.38812491043937,
"learning_rate": 8.328920890886491e-06,
"loss": 1.6752097606658936,
"step": 1610
},
{
"epoch": 1.0254614894971357,
"grad_norm": 10.205420290092412,
"learning_rate": 8.32615638231004e-06,
"loss": 1.743167757987976,
"step": 1611
},
{
"epoch": 1.026098026734564,
"grad_norm": 13.275152342780736,
"learning_rate": 8.323390048552984e-06,
"loss": 2.1024320125579834,
"step": 1612
},
{
"epoch": 1.0267345639719923,
"grad_norm": 14.043989902284169,
"learning_rate": 8.320621891133313e-06,
"loss": 1.6133620738983154,
"step": 1613
},
{
"epoch": 1.0273711012094207,
"grad_norm": 7.852264686561906,
"learning_rate": 8.31785191157001e-06,
"loss": 1.6739814281463623,
"step": 1614
},
{
"epoch": 1.028007638446849,
"grad_norm": 9.716595275602375,
"learning_rate": 8.315080111383062e-06,
"loss": 1.868904948234558,
"step": 1615
},
{
"epoch": 1.0286441756842775,
"grad_norm": 13.483793704840371,
"learning_rate": 8.312306492093457e-06,
"loss": 1.848407506942749,
"step": 1616
},
{
"epoch": 1.0292807129217059,
"grad_norm": 15.349042363205411,
"learning_rate": 8.309531055223177e-06,
"loss": 1.406929850578308,
"step": 1617
},
{
"epoch": 1.0299172501591343,
"grad_norm": 17.03207340688238,
"learning_rate": 8.306753802295204e-06,
"loss": 1.5864617824554443,
"step": 1618
},
{
"epoch": 1.0305537873965627,
"grad_norm": 9.135669188167283,
"learning_rate": 8.303974734833516e-06,
"loss": 1.3825290203094482,
"step": 1619
},
{
"epoch": 1.031190324633991,
"grad_norm": 11.185379992251864,
"learning_rate": 8.301193854363084e-06,
"loss": 1.4131884574890137,
"step": 1620
},
{
"epoch": 1.0318268618714195,
"grad_norm": 13.57316677525489,
"learning_rate": 8.298411162409879e-06,
"loss": 1.4866886138916016,
"step": 1621
},
{
"epoch": 1.0324633991088479,
"grad_norm": 15.475000913383688,
"learning_rate": 8.295626660500861e-06,
"loss": 1.2877317667007446,
"step": 1622
},
{
"epoch": 1.0330999363462763,
"grad_norm": 11.315482981271101,
"learning_rate": 8.29284035016399e-06,
"loss": 1.6080381870269775,
"step": 1623
},
{
"epoch": 1.0337364735837047,
"grad_norm": 8.886044421157376,
"learning_rate": 8.290052232928207e-06,
"loss": 1.622658610343933,
"step": 1624
},
{
"epoch": 1.034373010821133,
"grad_norm": 8.647345686560026,
"learning_rate": 8.287262310323457e-06,
"loss": 1.5690397024154663,
"step": 1625
},
{
"epoch": 1.0350095480585615,
"grad_norm": 14.808459194207277,
"learning_rate": 8.284470583880666e-06,
"loss": 0.9358468055725098,
"step": 1626
},
{
"epoch": 1.03564608529599,
"grad_norm": 14.594537374935992,
"learning_rate": 8.281677055131758e-06,
"loss": 0.9582507610321045,
"step": 1627
},
{
"epoch": 1.0362826225334183,
"grad_norm": 9.592079100648048,
"learning_rate": 8.278881725609637e-06,
"loss": 1.7575924396514893,
"step": 1628
},
{
"epoch": 1.0369191597708467,
"grad_norm": 17.559436776381975,
"learning_rate": 8.276084596848205e-06,
"loss": 1.4164583683013916,
"step": 1629
},
{
"epoch": 1.0375556970082749,
"grad_norm": 8.268171667234254,
"learning_rate": 8.273285670382342e-06,
"loss": 1.7452517747879028,
"step": 1630
},
{
"epoch": 1.0381922342457033,
"grad_norm": 12.883950420509974,
"learning_rate": 8.270484947747924e-06,
"loss": 1.6780678033828735,
"step": 1631
},
{
"epoch": 1.0388287714831317,
"grad_norm": 10.997414804461346,
"learning_rate": 8.2676824304818e-06,
"loss": 2.120938301086426,
"step": 1632
},
{
"epoch": 1.03946530872056,
"grad_norm": 23.64960364916769,
"learning_rate": 8.264878120121816e-06,
"loss": 1.6598933935165405,
"step": 1633
},
{
"epoch": 1.0401018459579885,
"grad_norm": 9.024975550352027,
"learning_rate": 8.262072018206797e-06,
"loss": 1.4836094379425049,
"step": 1634
},
{
"epoch": 1.040738383195417,
"grad_norm": 9.132486507595646,
"learning_rate": 8.25926412627655e-06,
"loss": 1.0962557792663574,
"step": 1635
},
{
"epoch": 1.0413749204328453,
"grad_norm": 8.5793107306121,
"learning_rate": 8.256454445871866e-06,
"loss": 1.4689795970916748,
"step": 1636
},
{
"epoch": 1.0420114576702737,
"grad_norm": 15.016592288950207,
"learning_rate": 8.253642978534517e-06,
"loss": 1.6325409412384033,
"step": 1637
},
{
"epoch": 1.042647994907702,
"grad_norm": 13.412850714810107,
"learning_rate": 8.250829725807254e-06,
"loss": 1.6299569606781006,
"step": 1638
},
{
"epoch": 1.0432845321451305,
"grad_norm": 8.820604892388392,
"learning_rate": 8.24801468923381e-06,
"loss": 1.2900068759918213,
"step": 1639
},
{
"epoch": 1.043921069382559,
"grad_norm": 20.395329531383943,
"learning_rate": 8.245197870358898e-06,
"loss": 0.8589210510253906,
"step": 1640
},
{
"epoch": 1.0445576066199873,
"grad_norm": 9.708335028804578,
"learning_rate": 8.242379270728203e-06,
"loss": 1.7297940254211426,
"step": 1641
},
{
"epoch": 1.0451941438574157,
"grad_norm": 8.801661655986257,
"learning_rate": 8.239558891888393e-06,
"loss": 1.6582874059677124,
"step": 1642
},
{
"epoch": 1.0458306810948441,
"grad_norm": 12.963233181543366,
"learning_rate": 8.236736735387112e-06,
"loss": 1.2064335346221924,
"step": 1643
},
{
"epoch": 1.0464672183322725,
"grad_norm": 13.620795112093358,
"learning_rate": 8.233912802772976e-06,
"loss": 1.1542505025863647,
"step": 1644
},
{
"epoch": 1.047103755569701,
"grad_norm": 7.104123287679158,
"learning_rate": 8.231087095595579e-06,
"loss": 1.7787470817565918,
"step": 1645
},
{
"epoch": 1.0477402928071293,
"grad_norm": 13.901048416335506,
"learning_rate": 8.228259615405483e-06,
"loss": 2.1882848739624023,
"step": 1646
},
{
"epoch": 1.0483768300445575,
"grad_norm": 9.753015737304835,
"learning_rate": 8.225430363754231e-06,
"loss": 1.0799856185913086,
"step": 1647
},
{
"epoch": 1.049013367281986,
"grad_norm": 10.008718057996257,
"learning_rate": 8.222599342194335e-06,
"loss": 1.6267973184585571,
"step": 1648
},
{
"epoch": 1.0496499045194143,
"grad_norm": 9.611378096682964,
"learning_rate": 8.219766552279276e-06,
"loss": 1.9469648599624634,
"step": 1649
},
{
"epoch": 1.0502864417568427,
"grad_norm": 17.05947987283305,
"learning_rate": 8.216931995563506e-06,
"loss": 1.1864004135131836,
"step": 1650
},
{
"epoch": 1.0509229789942711,
"grad_norm": 13.146496163651747,
"learning_rate": 8.21409567360245e-06,
"loss": 1.4743739366531372,
"step": 1651
},
{
"epoch": 1.0515595162316995,
"grad_norm": 7.7948899872850514,
"learning_rate": 8.211257587952495e-06,
"loss": 1.2133256196975708,
"step": 1652
},
{
"epoch": 1.052196053469128,
"grad_norm": 11.999777880354147,
"learning_rate": 8.208417740171004e-06,
"loss": 2.0245907306671143,
"step": 1653
},
{
"epoch": 1.0528325907065563,
"grad_norm": 13.810605113801204,
"learning_rate": 8.205576131816302e-06,
"loss": 1.4931745529174805,
"step": 1654
},
{
"epoch": 1.0534691279439847,
"grad_norm": 9.44986492555049,
"learning_rate": 8.202732764447676e-06,
"loss": 1.8107151985168457,
"step": 1655
},
{
"epoch": 1.0541056651814131,
"grad_norm": 9.301071037131026,
"learning_rate": 8.199887639625389e-06,
"loss": 1.9895278215408325,
"step": 1656
},
{
"epoch": 1.0547422024188415,
"grad_norm": 15.019405626502477,
"learning_rate": 8.19704075891066e-06,
"loss": 1.9410228729248047,
"step": 1657
},
{
"epoch": 1.05537873965627,
"grad_norm": 8.383353222270486,
"learning_rate": 8.194192123865673e-06,
"loss": 1.5013635158538818,
"step": 1658
},
{
"epoch": 1.0560152768936983,
"grad_norm": 8.328580997876143,
"learning_rate": 8.191341736053577e-06,
"loss": 1.2550219297409058,
"step": 1659
},
{
"epoch": 1.0566518141311267,
"grad_norm": 11.050002560420085,
"learning_rate": 8.188489597038482e-06,
"loss": 1.3909871578216553,
"step": 1660
},
{
"epoch": 1.0572883513685551,
"grad_norm": 11.143182923060522,
"learning_rate": 8.185635708385457e-06,
"loss": 1.5867688655853271,
"step": 1661
},
{
"epoch": 1.0579248886059835,
"grad_norm": 10.20222436417481,
"learning_rate": 8.182780071660535e-06,
"loss": 1.4897205829620361,
"step": 1662
},
{
"epoch": 1.0585614258434117,
"grad_norm": 8.931242975370665,
"learning_rate": 8.1799226884307e-06,
"loss": 1.809645414352417,
"step": 1663
},
{
"epoch": 1.0591979630808401,
"grad_norm": 6.970685917982441,
"learning_rate": 8.177063560263906e-06,
"loss": 1.7880017757415771,
"step": 1664
},
{
"epoch": 1.0598345003182685,
"grad_norm": 15.792839546078376,
"learning_rate": 8.174202688729057e-06,
"loss": 1.8639477491378784,
"step": 1665
},
{
"epoch": 1.060471037555697,
"grad_norm": 17.502707465480373,
"learning_rate": 8.171340075396012e-06,
"loss": 1.335056185722351,
"step": 1666
},
{
"epoch": 1.0611075747931253,
"grad_norm": 9.727716405794908,
"learning_rate": 8.168475721835592e-06,
"loss": 1.384705901145935,
"step": 1667
},
{
"epoch": 1.0617441120305537,
"grad_norm": 11.962255893736563,
"learning_rate": 8.165609629619571e-06,
"loss": 2.0106027126312256,
"step": 1668
},
{
"epoch": 1.0623806492679821,
"grad_norm": 14.464330599123075,
"learning_rate": 8.162741800320672e-06,
"loss": 1.2054011821746826,
"step": 1669
},
{
"epoch": 1.0630171865054105,
"grad_norm": 9.253641747409125,
"learning_rate": 8.159872235512581e-06,
"loss": 1.8153573274612427,
"step": 1670
},
{
"epoch": 1.063653723742839,
"grad_norm": 11.975176677076705,
"learning_rate": 8.157000936769923e-06,
"loss": 1.0125563144683838,
"step": 1671
},
{
"epoch": 1.0642902609802674,
"grad_norm": 10.428104840908096,
"learning_rate": 8.154127905668289e-06,
"loss": 1.7303546667099,
"step": 1672
},
{
"epoch": 1.0649267982176958,
"grad_norm": 8.080398278681482,
"learning_rate": 8.15125314378421e-06,
"loss": 1.1772122383117676,
"step": 1673
},
{
"epoch": 1.0655633354551242,
"grad_norm": 7.161770853050351,
"learning_rate": 8.148376652695172e-06,
"loss": 1.873197078704834,
"step": 1674
},
{
"epoch": 1.0661998726925526,
"grad_norm": 14.207244484786004,
"learning_rate": 8.145498433979605e-06,
"loss": 1.9405423402786255,
"step": 1675
},
{
"epoch": 1.066836409929981,
"grad_norm": 88.9360573943596,
"learning_rate": 8.142618489216896e-06,
"loss": 1.2580645084381104,
"step": 1676
},
{
"epoch": 1.0674729471674094,
"grad_norm": 17.09333933215392,
"learning_rate": 8.139736819987368e-06,
"loss": 1.4222946166992188,
"step": 1677
},
{
"epoch": 1.0681094844048378,
"grad_norm": 11.66708175206413,
"learning_rate": 8.136853427872298e-06,
"loss": 1.4385242462158203,
"step": 1678
},
{
"epoch": 1.0687460216422662,
"grad_norm": 21.08110180359124,
"learning_rate": 8.133968314453903e-06,
"loss": 1.3539273738861084,
"step": 1679
},
{
"epoch": 1.0693825588796946,
"grad_norm": 11.332922309922932,
"learning_rate": 8.131081481315353e-06,
"loss": 1.4039275646209717,
"step": 1680
},
{
"epoch": 1.0700190961171228,
"grad_norm": 11.822415386227963,
"learning_rate": 8.128192930040752e-06,
"loss": 1.3482317924499512,
"step": 1681
},
{
"epoch": 1.0706556333545512,
"grad_norm": 11.300193815902606,
"learning_rate": 8.125302662215156e-06,
"loss": 1.556139588356018,
"step": 1682
},
{
"epoch": 1.0712921705919796,
"grad_norm": 11.001260944554838,
"learning_rate": 8.12241067942455e-06,
"loss": 1.6594297885894775,
"step": 1683
},
{
"epoch": 1.071928707829408,
"grad_norm": 17.26006568095415,
"learning_rate": 8.119516983255875e-06,
"loss": 1.8062752485275269,
"step": 1684
},
{
"epoch": 1.0725652450668364,
"grad_norm": 8.880410724554594,
"learning_rate": 8.116621575297004e-06,
"loss": 1.5717209577560425,
"step": 1685
},
{
"epoch": 1.0732017823042648,
"grad_norm": 9.979602337699953,
"learning_rate": 8.113724457136747e-06,
"loss": 1.7199119329452515,
"step": 1686
},
{
"epoch": 1.0738383195416932,
"grad_norm": 12.614829392114382,
"learning_rate": 8.11082563036486e-06,
"loss": 1.873417615890503,
"step": 1687
},
{
"epoch": 1.0744748567791216,
"grad_norm": 12.185931739938681,
"learning_rate": 8.107925096572031e-06,
"loss": 1.826694369316101,
"step": 1688
},
{
"epoch": 1.07511139401655,
"grad_norm": 20.925554372417256,
"learning_rate": 8.105022857349886e-06,
"loss": 1.0506352186203003,
"step": 1689
},
{
"epoch": 1.0757479312539784,
"grad_norm": 10.185233251622277,
"learning_rate": 8.102118914290988e-06,
"loss": 1.5804609060287476,
"step": 1690
},
{
"epoch": 1.0763844684914068,
"grad_norm": 9.775951299246545,
"learning_rate": 8.099213268988835e-06,
"loss": 1.5286052227020264,
"step": 1691
},
{
"epoch": 1.0770210057288352,
"grad_norm": 8.04895628585496,
"learning_rate": 8.096305923037857e-06,
"loss": 2.2917532920837402,
"step": 1692
},
{
"epoch": 1.0776575429662636,
"grad_norm": 12.396131316168823,
"learning_rate": 8.093396878033416e-06,
"loss": 1.8517321348190308,
"step": 1693
},
{
"epoch": 1.078294080203692,
"grad_norm": 11.839840208595898,
"learning_rate": 8.090486135571811e-06,
"loss": 1.7966738939285278,
"step": 1694
},
{
"epoch": 1.0789306174411204,
"grad_norm": 10.538205100213034,
"learning_rate": 8.087573697250271e-06,
"loss": 1.6291019916534424,
"step": 1695
},
{
"epoch": 1.0795671546785486,
"grad_norm": 12.542598375280967,
"learning_rate": 8.084659564666954e-06,
"loss": 1.483119249343872,
"step": 1696
},
{
"epoch": 1.080203691915977,
"grad_norm": 17.92790634167383,
"learning_rate": 8.081743739420949e-06,
"loss": 1.184516191482544,
"step": 1697
},
{
"epoch": 1.0808402291534054,
"grad_norm": 8.120614428265217,
"learning_rate": 8.078826223112272e-06,
"loss": 1.1689471006393433,
"step": 1698
},
{
"epoch": 1.0814767663908338,
"grad_norm": 10.39273268340443,
"learning_rate": 8.075907017341869e-06,
"loss": 1.3193955421447754,
"step": 1699
},
{
"epoch": 1.0821133036282622,
"grad_norm": 11.749209026655937,
"learning_rate": 8.072986123711612e-06,
"loss": 1.7868832349777222,
"step": 1700
},
{
"epoch": 1.0827498408656906,
"grad_norm": 11.9206432112175,
"learning_rate": 8.0700635438243e-06,
"loss": 1.9372731447219849,
"step": 1701
},
{
"epoch": 1.083386378103119,
"grad_norm": 16.76571690643751,
"learning_rate": 8.067139279283657e-06,
"loss": 1.4213175773620605,
"step": 1702
},
{
"epoch": 1.0840229153405474,
"grad_norm": 11.541077397920052,
"learning_rate": 8.06421333169433e-06,
"loss": 1.4775488376617432,
"step": 1703
},
{
"epoch": 1.0846594525779758,
"grad_norm": 15.4708223985343,
"learning_rate": 8.061285702661891e-06,
"loss": 1.1895475387573242,
"step": 1704
},
{
"epoch": 1.0852959898154042,
"grad_norm": 9.385116394727838,
"learning_rate": 8.058356393792836e-06,
"loss": 1.3861514329910278,
"step": 1705
},
{
"epoch": 1.0859325270528326,
"grad_norm": 12.674100329040792,
"learning_rate": 8.05542540669458e-06,
"loss": 1.6155014038085938,
"step": 1706
},
{
"epoch": 1.086569064290261,
"grad_norm": 11.967887698809248,
"learning_rate": 8.052492742975457e-06,
"loss": 1.6911782026290894,
"step": 1707
},
{
"epoch": 1.0872056015276894,
"grad_norm": 15.376295836200836,
"learning_rate": 8.049558404244731e-06,
"loss": 1.6580021381378174,
"step": 1708
},
{
"epoch": 1.0878421387651178,
"grad_norm": 17.518359669332686,
"learning_rate": 8.046622392112575e-06,
"loss": 1.4573746919631958,
"step": 1709
},
{
"epoch": 1.0884786760025462,
"grad_norm": 10.174914731743716,
"learning_rate": 8.04368470819008e-06,
"loss": 1.7645249366760254,
"step": 1710
},
{
"epoch": 1.0891152132399746,
"grad_norm": 14.816749816494264,
"learning_rate": 8.040745354089264e-06,
"loss": 1.3396403789520264,
"step": 1711
},
{
"epoch": 1.089751750477403,
"grad_norm": 8.500623260090219,
"learning_rate": 8.03780433142305e-06,
"loss": 1.8244812488555908,
"step": 1712
},
{
"epoch": 1.0903882877148314,
"grad_norm": 13.835563353359229,
"learning_rate": 8.034861641805287e-06,
"loss": 1.9595309495925903,
"step": 1713
},
{
"epoch": 1.0910248249522596,
"grad_norm": 7.6032087361840945,
"learning_rate": 8.03191728685073e-06,
"loss": 1.6191723346710205,
"step": 1714
},
{
"epoch": 1.091661362189688,
"grad_norm": 9.612118436341923,
"learning_rate": 8.028971268175054e-06,
"loss": 1.7245670557022095,
"step": 1715
},
{
"epoch": 1.0922978994271164,
"grad_norm": 7.733082218539383,
"learning_rate": 8.026023587394842e-06,
"loss": 1.4619736671447754,
"step": 1716
},
{
"epoch": 1.0929344366645448,
"grad_norm": 9.701026061431111,
"learning_rate": 8.023074246127593e-06,
"loss": 1.5184788703918457,
"step": 1717
},
{
"epoch": 1.0935709739019732,
"grad_norm": 11.363088864289784,
"learning_rate": 8.020123245991716e-06,
"loss": 1.9125618934631348,
"step": 1718
},
{
"epoch": 1.0942075111394016,
"grad_norm": 11.886266396828022,
"learning_rate": 8.017170588606529e-06,
"loss": 1.4592000246047974,
"step": 1719
},
{
"epoch": 1.09484404837683,
"grad_norm": 15.953624175177238,
"learning_rate": 8.014216275592262e-06,
"loss": 2.6135425567626953,
"step": 1720
},
{
"epoch": 1.0954805856142584,
"grad_norm": 12.383683402706158,
"learning_rate": 8.011260308570048e-06,
"loss": 1.8721458911895752,
"step": 1721
},
{
"epoch": 1.0961171228516868,
"grad_norm": 21.712072561809958,
"learning_rate": 8.008302689161938e-06,
"loss": 2.3193273544311523,
"step": 1722
},
{
"epoch": 1.0967536600891152,
"grad_norm": 12.745679385393263,
"learning_rate": 8.005343418990877e-06,
"loss": 1.9192438125610352,
"step": 1723
},
{
"epoch": 1.0973901973265436,
"grad_norm": 11.49317536790788,
"learning_rate": 8.002382499680725e-06,
"loss": 1.9271581172943115,
"step": 1724
},
{
"epoch": 1.098026734563972,
"grad_norm": 11.47631910729549,
"learning_rate": 7.999419932856245e-06,
"loss": 1.7452729940414429,
"step": 1725
},
{
"epoch": 1.0986632718014004,
"grad_norm": 13.548063560780626,
"learning_rate": 7.9964557201431e-06,
"loss": 1.7192658185958862,
"step": 1726
},
{
"epoch": 1.0992998090388288,
"grad_norm": 14.44945874753324,
"learning_rate": 7.99348986316786e-06,
"loss": 1.4745674133300781,
"step": 1727
},
{
"epoch": 1.0999363462762572,
"grad_norm": 8.790939478068589,
"learning_rate": 7.990522363558e-06,
"loss": 1.7271530628204346,
"step": 1728
},
{
"epoch": 1.1005728835136857,
"grad_norm": 13.294298649309285,
"learning_rate": 7.987553222941888e-06,
"loss": 2.079482078552246,
"step": 1729
},
{
"epoch": 1.1012094207511138,
"grad_norm": 8.955186343290276,
"learning_rate": 7.9845824429488e-06,
"loss": 1.9009345769882202,
"step": 1730
},
{
"epoch": 1.1018459579885422,
"grad_norm": 11.965178893767549,
"learning_rate": 7.98161002520891e-06,
"loss": 1.871706485748291,
"step": 1731
},
{
"epoch": 1.1024824952259706,
"grad_norm": 12.071102366896111,
"learning_rate": 7.978635971353285e-06,
"loss": 1.2385210990905762,
"step": 1732
},
{
"epoch": 1.103119032463399,
"grad_norm": 10.402210828323987,
"learning_rate": 7.9756602830139e-06,
"loss": 1.4987719058990479,
"step": 1733
},
{
"epoch": 1.1037555697008274,
"grad_norm": 15.882877710866513,
"learning_rate": 7.972682961823618e-06,
"loss": 1.0441172122955322,
"step": 1734
},
{
"epoch": 1.1043921069382558,
"grad_norm": 10.44535984277186,
"learning_rate": 7.969704009416201e-06,
"loss": 1.6794166564941406,
"step": 1735
},
{
"epoch": 1.1050286441756842,
"grad_norm": 8.294825851080079,
"learning_rate": 7.966723427426309e-06,
"loss": 1.5168843269348145,
"step": 1736
},
{
"epoch": 1.1056651814131127,
"grad_norm": 8.750430308096748,
"learning_rate": 7.963741217489489e-06,
"loss": 0.831028938293457,
"step": 1737
},
{
"epoch": 1.106301718650541,
"grad_norm": 15.572356503260961,
"learning_rate": 7.960757381242192e-06,
"loss": 1.599800944328308,
"step": 1738
},
{
"epoch": 1.1069382558879695,
"grad_norm": 12.139791785945542,
"learning_rate": 7.95777192032175e-06,
"loss": 1.6609857082366943,
"step": 1739
},
{
"epoch": 1.1075747931253979,
"grad_norm": 73.58212966042169,
"learning_rate": 7.954784836366395e-06,
"loss": 2.646315097808838,
"step": 1740
},
{
"epoch": 1.1082113303628263,
"grad_norm": 12.359273737070797,
"learning_rate": 7.951796131015246e-06,
"loss": 2.328749179840088,
"step": 1741
},
{
"epoch": 1.1088478676002547,
"grad_norm": 13.229289004438948,
"learning_rate": 7.948805805908313e-06,
"loss": 1.6222223043441772,
"step": 1742
},
{
"epoch": 1.109484404837683,
"grad_norm": 6.561897383325581,
"learning_rate": 7.94581386268649e-06,
"loss": 1.1249849796295166,
"step": 1743
},
{
"epoch": 1.1101209420751115,
"grad_norm": 12.860463321432597,
"learning_rate": 7.942820302991569e-06,
"loss": 1.9121992588043213,
"step": 1744
},
{
"epoch": 1.1107574793125399,
"grad_norm": 9.986315850505079,
"learning_rate": 7.939825128466216e-06,
"loss": 1.5456956624984741,
"step": 1745
},
{
"epoch": 1.1113940165499683,
"grad_norm": 9.239962459355732,
"learning_rate": 7.936828340753996e-06,
"loss": 1.7931642532348633,
"step": 1746
},
{
"epoch": 1.1120305537873967,
"grad_norm": 12.404016054106627,
"learning_rate": 7.93382994149935e-06,
"loss": 1.4237359762191772,
"step": 1747
},
{
"epoch": 1.1126670910248249,
"grad_norm": 18.822826852660583,
"learning_rate": 7.930829932347608e-06,
"loss": 1.8694027662277222,
"step": 1748
},
{
"epoch": 1.1133036282622533,
"grad_norm": 8.086491923138064,
"learning_rate": 7.92782831494498e-06,
"loss": 1.4876083135604858,
"step": 1749
},
{
"epoch": 1.1139401654996817,
"grad_norm": 16.538632235045654,
"learning_rate": 7.924825090938564e-06,
"loss": 1.4833011627197266,
"step": 1750
},
{
"epoch": 1.11457670273711,
"grad_norm": 7.1295183448037935,
"learning_rate": 7.921820261976334e-06,
"loss": 1.4614789485931396,
"step": 1751
},
{
"epoch": 1.1152132399745385,
"grad_norm": 10.045674188034987,
"learning_rate": 7.918813829707146e-06,
"loss": 1.6283634901046753,
"step": 1752
},
{
"epoch": 1.1158497772119669,
"grad_norm": 7.969780060386495,
"learning_rate": 7.915805795780737e-06,
"loss": 1.2911505699157715,
"step": 1753
},
{
"epoch": 1.1164863144493953,
"grad_norm": 9.928915387567933,
"learning_rate": 7.912796161847724e-06,
"loss": 1.5987910032272339,
"step": 1754
},
{
"epoch": 1.1171228516868237,
"grad_norm": 10.269427265068847,
"learning_rate": 7.9097849295596e-06,
"loss": 1.5262665748596191,
"step": 1755
},
{
"epoch": 1.117759388924252,
"grad_norm": 10.51806130174475,
"learning_rate": 7.906772100568734e-06,
"loss": 1.4935064315795898,
"step": 1756
},
{
"epoch": 1.1183959261616805,
"grad_norm": 18.204930179540256,
"learning_rate": 7.903757676528374e-06,
"loss": 1.5943958759307861,
"step": 1757
},
{
"epoch": 1.119032463399109,
"grad_norm": 28.708012637672688,
"learning_rate": 7.900741659092641e-06,
"loss": 1.8090311288833618,
"step": 1758
},
{
"epoch": 1.1196690006365373,
"grad_norm": 8.987710870395777,
"learning_rate": 7.897724049916534e-06,
"loss": 1.5836857557296753,
"step": 1759
},
{
"epoch": 1.1203055378739657,
"grad_norm": 20.438928041322313,
"learning_rate": 7.894704850655919e-06,
"loss": 2.117549419403076,
"step": 1760
},
{
"epoch": 1.120942075111394,
"grad_norm": 11.837901712992668,
"learning_rate": 7.891684062967539e-06,
"loss": 1.4354597330093384,
"step": 1761
},
{
"epoch": 1.1215786123488225,
"grad_norm": 12.643607966368892,
"learning_rate": 7.88866168850901e-06,
"loss": 1.7912664413452148,
"step": 1762
},
{
"epoch": 1.1222151495862507,
"grad_norm": 16.56441221996852,
"learning_rate": 7.885637728938815e-06,
"loss": 1.8154982328414917,
"step": 1763
},
{
"epoch": 1.122851686823679,
"grad_norm": 16.868727426573127,
"learning_rate": 7.882612185916308e-06,
"loss": 1.723301887512207,
"step": 1764
},
{
"epoch": 1.1234882240611075,
"grad_norm": 13.243390871390828,
"learning_rate": 7.879585061101715e-06,
"loss": 1.9012724161148071,
"step": 1765
},
{
"epoch": 1.124124761298536,
"grad_norm": 11.83742733644601,
"learning_rate": 7.876556356156124e-06,
"loss": 1.3822736740112305,
"step": 1766
},
{
"epoch": 1.1247612985359643,
"grad_norm": 10.513509466461578,
"learning_rate": 7.873526072741497e-06,
"loss": 1.6562281847000122,
"step": 1767
},
{
"epoch": 1.1253978357733927,
"grad_norm": 10.696287820390786,
"learning_rate": 7.870494212520653e-06,
"loss": 1.3009065389633179,
"step": 1768
},
{
"epoch": 1.126034373010821,
"grad_norm": 9.152413319145252,
"learning_rate": 7.867460777157289e-06,
"loss": 1.3122084140777588,
"step": 1769
},
{
"epoch": 1.1266709102482495,
"grad_norm": 13.990709454170132,
"learning_rate": 7.864425768315953e-06,
"loss": 1.8093345165252686,
"step": 1770
},
{
"epoch": 1.127307447485678,
"grad_norm": 22.660617128504835,
"learning_rate": 7.861389187662068e-06,
"loss": 1.0877732038497925,
"step": 1771
},
{
"epoch": 1.1279439847231063,
"grad_norm": 13.109659843410897,
"learning_rate": 7.858351036861908e-06,
"loss": 1.716296911239624,
"step": 1772
},
{
"epoch": 1.1285805219605347,
"grad_norm": 14.317353777136642,
"learning_rate": 7.85531131758262e-06,
"loss": 1.273411750793457,
"step": 1773
},
{
"epoch": 1.1292170591979631,
"grad_norm": 8.174385753265293,
"learning_rate": 7.852270031492201e-06,
"loss": 1.389054536819458,
"step": 1774
},
{
"epoch": 1.1298535964353915,
"grad_norm": 10.651345319559745,
"learning_rate": 7.849227180259517e-06,
"loss": 1.492347240447998,
"step": 1775
},
{
"epoch": 1.13049013367282,
"grad_norm": 10.040317898362389,
"learning_rate": 7.84618276555429e-06,
"loss": 1.1550723314285278,
"step": 1776
},
{
"epoch": 1.1311266709102483,
"grad_norm": 7.862519842844843,
"learning_rate": 7.843136789047097e-06,
"loss": 1.390535831451416,
"step": 1777
},
{
"epoch": 1.1317632081476767,
"grad_norm": 8.24074987762661,
"learning_rate": 7.840089252409374e-06,
"loss": 1.1107431650161743,
"step": 1778
},
{
"epoch": 1.1323997453851051,
"grad_norm": 21.117127745526005,
"learning_rate": 7.837040157313413e-06,
"loss": 1.3700048923492432,
"step": 1779
},
{
"epoch": 1.1330362826225335,
"grad_norm": 15.746327248024805,
"learning_rate": 7.83398950543236e-06,
"loss": 1.5517867803573608,
"step": 1780
},
{
"epoch": 1.1336728198599617,
"grad_norm": 18.013815785912914,
"learning_rate": 7.830937298440219e-06,
"loss": 2.208071708679199,
"step": 1781
},
{
"epoch": 1.1343093570973901,
"grad_norm": 8.300726112769016,
"learning_rate": 7.827883538011842e-06,
"loss": 1.2979140281677246,
"step": 1782
},
{
"epoch": 1.1349458943348185,
"grad_norm": 14.200131073264666,
"learning_rate": 7.82482822582294e-06,
"loss": 1.2123842239379883,
"step": 1783
},
{
"epoch": 1.135582431572247,
"grad_norm": 15.773698395658762,
"learning_rate": 7.821771363550067e-06,
"loss": 1.339749813079834,
"step": 1784
},
{
"epoch": 1.1362189688096753,
"grad_norm": 8.75008900624983,
"learning_rate": 7.818712952870637e-06,
"loss": 1.4869803190231323,
"step": 1785
},
{
"epoch": 1.1368555060471037,
"grad_norm": 13.827153229079332,
"learning_rate": 7.815652995462905e-06,
"loss": 2.4990031719207764,
"step": 1786
},
{
"epoch": 1.1374920432845321,
"grad_norm": 8.293429813915223,
"learning_rate": 7.812591493005982e-06,
"loss": 1.3112232685089111,
"step": 1787
},
{
"epoch": 1.1381285805219605,
"grad_norm": 10.883169553839318,
"learning_rate": 7.80952844717982e-06,
"loss": 1.0375416278839111,
"step": 1788
},
{
"epoch": 1.138765117759389,
"grad_norm": 19.383618591487423,
"learning_rate": 7.806463859665225e-06,
"loss": 1.0073294639587402,
"step": 1789
},
{
"epoch": 1.1394016549968173,
"grad_norm": 15.8668111613722,
"learning_rate": 7.803397732143843e-06,
"loss": 1.5781023502349854,
"step": 1790
},
{
"epoch": 1.1400381922342457,
"grad_norm": 17.26900887878871,
"learning_rate": 7.80033006629817e-06,
"loss": 1.7327053546905518,
"step": 1791
},
{
"epoch": 1.1406747294716741,
"grad_norm": 11.9573596402166,
"learning_rate": 7.79726086381154e-06,
"loss": 1.2857234477996826,
"step": 1792
},
{
"epoch": 1.1413112667091025,
"grad_norm": 11.302351050801876,
"learning_rate": 7.794190126368139e-06,
"loss": 1.0917978286743164,
"step": 1793
},
{
"epoch": 1.141947803946531,
"grad_norm": 12.837413206229778,
"learning_rate": 7.791117855652985e-06,
"loss": 1.8788642883300781,
"step": 1794
},
{
"epoch": 1.1425843411839594,
"grad_norm": 13.492431193858089,
"learning_rate": 7.788044053351943e-06,
"loss": 1.754347562789917,
"step": 1795
},
{
"epoch": 1.1432208784213875,
"grad_norm": 10.83367514175402,
"learning_rate": 7.784968721151722e-06,
"loss": 1.1002882719039917,
"step": 1796
},
{
"epoch": 1.143857415658816,
"grad_norm": 15.818914507619862,
"learning_rate": 7.781891860739863e-06,
"loss": 2.2377448081970215,
"step": 1797
},
{
"epoch": 1.1444939528962443,
"grad_norm": 13.614411287281241,
"learning_rate": 7.77881347380475e-06,
"loss": 2.431692123413086,
"step": 1798
},
{
"epoch": 1.1451304901336727,
"grad_norm": 25.435200541665722,
"learning_rate": 7.775733562035605e-06,
"loss": 2.7545251846313477,
"step": 1799
},
{
"epoch": 1.1457670273711011,
"grad_norm": 11.172948238391447,
"learning_rate": 7.772652127122482e-06,
"loss": 1.219679832458496,
"step": 1800
},
{
"epoch": 1.1464035646085295,
"grad_norm": 10.9219009801967,
"learning_rate": 7.769569170756277e-06,
"loss": 2.6224284172058105,
"step": 1801
},
{
"epoch": 1.147040101845958,
"grad_norm": 11.780326597680638,
"learning_rate": 7.766484694628715e-06,
"loss": 1.0525163412094116,
"step": 1802
},
{
"epoch": 1.1476766390833864,
"grad_norm": 17.373733744790346,
"learning_rate": 7.763398700432363e-06,
"loss": 1.9834010601043701,
"step": 1803
},
{
"epoch": 1.1483131763208148,
"grad_norm": 7.610458221030429,
"learning_rate": 7.760311189860613e-06,
"loss": 1.5376927852630615,
"step": 1804
},
{
"epoch": 1.1489497135582432,
"grad_norm": 9.622458596879596,
"learning_rate": 7.757222164607691e-06,
"loss": 1.308505654335022,
"step": 1805
},
{
"epoch": 1.1495862507956716,
"grad_norm": 15.326202689254016,
"learning_rate": 7.75413162636866e-06,
"loss": 1.2876720428466797,
"step": 1806
},
{
"epoch": 1.1502227880331,
"grad_norm": 15.444950525292843,
"learning_rate": 7.751039576839402e-06,
"loss": 1.4229402542114258,
"step": 1807
},
{
"epoch": 1.1508593252705284,
"grad_norm": 16.0730420050437,
"learning_rate": 7.74794601771664e-06,
"loss": 1.0552520751953125,
"step": 1808
},
{
"epoch": 1.1514958625079568,
"grad_norm": 26.543528058889677,
"learning_rate": 7.744850950697917e-06,
"loss": 2.448869228363037,
"step": 1809
},
{
"epoch": 1.1521323997453852,
"grad_norm": 15.795894360343198,
"learning_rate": 7.741754377481609e-06,
"loss": 1.8211095333099365,
"step": 1810
},
{
"epoch": 1.1527689369828136,
"grad_norm": 11.273729371681432,
"learning_rate": 7.738656299766916e-06,
"loss": 2.2845656871795654,
"step": 1811
},
{
"epoch": 1.153405474220242,
"grad_norm": 11.986781913527139,
"learning_rate": 7.73555671925386e-06,
"loss": 1.1179295778274536,
"step": 1812
},
{
"epoch": 1.1540420114576704,
"grad_norm": 14.604113835297118,
"learning_rate": 7.732455637643297e-06,
"loss": 1.9219005107879639,
"step": 1813
},
{
"epoch": 1.1546785486950988,
"grad_norm": 26.90837734397635,
"learning_rate": 7.729353056636898e-06,
"loss": 1.3155293464660645,
"step": 1814
},
{
"epoch": 1.155315085932527,
"grad_norm": 11.242979431752286,
"learning_rate": 7.726248977937156e-06,
"loss": 1.120147943496704,
"step": 1815
},
{
"epoch": 1.1559516231699554,
"grad_norm": 11.931042786205527,
"learning_rate": 7.723143403247397e-06,
"loss": 1.7911725044250488,
"step": 1816
},
{
"epoch": 1.1565881604073838,
"grad_norm": 8.92477471372061,
"learning_rate": 7.720036334271757e-06,
"loss": 1.445631980895996,
"step": 1817
},
{
"epoch": 1.1572246976448122,
"grad_norm": 11.739799772875195,
"learning_rate": 7.716927772715196e-06,
"loss": 1.9247690439224243,
"step": 1818
},
{
"epoch": 1.1578612348822406,
"grad_norm": 11.36316342840103,
"learning_rate": 7.713817720283491e-06,
"loss": 1.324549913406372,
"step": 1819
},
{
"epoch": 1.158497772119669,
"grad_norm": 10.467262638791574,
"learning_rate": 7.710706178683242e-06,
"loss": 1.5766773223876953,
"step": 1820
},
{
"epoch": 1.1591343093570974,
"grad_norm": 11.381685058632478,
"learning_rate": 7.70759314962186e-06,
"loss": 1.0806773900985718,
"step": 1821
},
{
"epoch": 1.1597708465945258,
"grad_norm": 13.481992728762078,
"learning_rate": 7.704478634807575e-06,
"loss": 1.5848543643951416,
"step": 1822
},
{
"epoch": 1.1604073838319542,
"grad_norm": 11.769840779789444,
"learning_rate": 7.701362635949433e-06,
"loss": 2.3119468688964844,
"step": 1823
},
{
"epoch": 1.1610439210693826,
"grad_norm": 10.926997952681917,
"learning_rate": 7.698245154757295e-06,
"loss": 1.5413258075714111,
"step": 1824
},
{
"epoch": 1.161680458306811,
"grad_norm": 19.13593601880582,
"learning_rate": 7.695126192941833e-06,
"loss": 1.515957236289978,
"step": 1825
},
{
"epoch": 1.1623169955442394,
"grad_norm": 11.667160824247867,
"learning_rate": 7.692005752214531e-06,
"loss": 1.2359853982925415,
"step": 1826
},
{
"epoch": 1.1629535327816678,
"grad_norm": 16.44512782464998,
"learning_rate": 7.688883834287689e-06,
"loss": 1.6302152872085571,
"step": 1827
},
{
"epoch": 1.1635900700190962,
"grad_norm": 13.784706775186404,
"learning_rate": 7.685760440874414e-06,
"loss": 1.7101160287857056,
"step": 1828
},
{
"epoch": 1.1642266072565246,
"grad_norm": 38.77274421757055,
"learning_rate": 7.682635573688621e-06,
"loss": 0.7238671779632568,
"step": 1829
},
{
"epoch": 1.1648631444939528,
"grad_norm": 11.771204327312155,
"learning_rate": 7.67950923444504e-06,
"loss": 1.0701311826705933,
"step": 1830
},
{
"epoch": 1.1654996817313812,
"grad_norm": 15.418361855175581,
"learning_rate": 7.6763814248592e-06,
"loss": 1.8999749422073364,
"step": 1831
},
{
"epoch": 1.1661362189688096,
"grad_norm": 8.98113563451655,
"learning_rate": 7.673252146647445e-06,
"loss": 1.3261454105377197,
"step": 1832
},
{
"epoch": 1.166772756206238,
"grad_norm": 17.45204550530026,
"learning_rate": 7.67012140152692e-06,
"loss": 1.6088931560516357,
"step": 1833
},
{
"epoch": 1.1674092934436664,
"grad_norm": 19.327759547982886,
"learning_rate": 7.666989191215577e-06,
"loss": 2.3624980449676514,
"step": 1834
},
{
"epoch": 1.1680458306810948,
"grad_norm": 14.35439210001662,
"learning_rate": 7.663855517432173e-06,
"loss": 1.3708419799804688,
"step": 1835
},
{
"epoch": 1.1686823679185232,
"grad_norm": 10.933628615499206,
"learning_rate": 7.660720381896262e-06,
"loss": 1.6434717178344727,
"step": 1836
},
{
"epoch": 1.1693189051559516,
"grad_norm": 8.645972322880423,
"learning_rate": 7.657583786328211e-06,
"loss": 1.540701150894165,
"step": 1837
},
{
"epoch": 1.16995544239338,
"grad_norm": 12.360248198918942,
"learning_rate": 7.654445732449178e-06,
"loss": 1.469840168952942,
"step": 1838
},
{
"epoch": 1.1705919796308084,
"grad_norm": 14.17181638673728,
"learning_rate": 7.651306221981125e-06,
"loss": 1.4820390939712524,
"step": 1839
},
{
"epoch": 1.1712285168682368,
"grad_norm": 13.19981255660452,
"learning_rate": 7.648165256646816e-06,
"loss": 1.763018250465393,
"step": 1840
},
{
"epoch": 1.1718650541056652,
"grad_norm": 19.96206173343936,
"learning_rate": 7.645022838169805e-06,
"loss": 1.2311797142028809,
"step": 1841
},
{
"epoch": 1.1725015913430936,
"grad_norm": 10.356742138040985,
"learning_rate": 7.641878968274455e-06,
"loss": 1.3975334167480469,
"step": 1842
},
{
"epoch": 1.173138128580522,
"grad_norm": 19.129275715059812,
"learning_rate": 7.638733648685919e-06,
"loss": 1.2975008487701416,
"step": 1843
},
{
"epoch": 1.1737746658179504,
"grad_norm": 11.905535019943983,
"learning_rate": 7.63558688113014e-06,
"loss": 2.2674379348754883,
"step": 1844
},
{
"epoch": 1.1744112030553788,
"grad_norm": 7.786688666985661,
"learning_rate": 7.63243866733387e-06,
"loss": 1.6974135637283325,
"step": 1845
},
{
"epoch": 1.1750477402928072,
"grad_norm": 12.52119689416767,
"learning_rate": 7.629289009024639e-06,
"loss": 1.4131574630737305,
"step": 1846
},
{
"epoch": 1.1756842775302356,
"grad_norm": 31.107198017749575,
"learning_rate": 7.626137907930782e-06,
"loss": 1.3976935148239136,
"step": 1847
},
{
"epoch": 1.1763208147676638,
"grad_norm": 10.351372658599026,
"learning_rate": 7.6229853657814166e-06,
"loss": 1.9820079803466797,
"step": 1848
},
{
"epoch": 1.1769573520050922,
"grad_norm": 10.707854928577772,
"learning_rate": 7.619831384306457e-06,
"loss": 1.7318838834762573,
"step": 1849
},
{
"epoch": 1.1775938892425206,
"grad_norm": 9.943147550417313,
"learning_rate": 7.616675965236606e-06,
"loss": 1.736013650894165,
"step": 1850
},
{
"epoch": 1.178230426479949,
"grad_norm": 12.006886202792714,
"learning_rate": 7.613519110303352e-06,
"loss": 1.7214933633804321,
"step": 1851
},
{
"epoch": 1.1788669637173774,
"grad_norm": 11.816337891212694,
"learning_rate": 7.610360821238978e-06,
"loss": 1.8942065238952637,
"step": 1852
},
{
"epoch": 1.1795035009548058,
"grad_norm": 10.752902737121095,
"learning_rate": 7.607201099776546e-06,
"loss": 1.3639030456542969,
"step": 1853
},
{
"epoch": 1.1801400381922342,
"grad_norm": 11.19879417637204,
"learning_rate": 7.604039947649909e-06,
"loss": 2.0798466205596924,
"step": 1854
},
{
"epoch": 1.1807765754296626,
"grad_norm": 8.51324032774973,
"learning_rate": 7.600877366593704e-06,
"loss": 2.0732204914093018,
"step": 1855
},
{
"epoch": 1.181413112667091,
"grad_norm": 12.805733964922954,
"learning_rate": 7.59771335834335e-06,
"loss": 1.4387568235397339,
"step": 1856
},
{
"epoch": 1.1820496499045194,
"grad_norm": 12.253794086756363,
"learning_rate": 7.594547924635056e-06,
"loss": 1.7229176759719849,
"step": 1857
},
{
"epoch": 1.1826861871419478,
"grad_norm": 17.225392838689363,
"learning_rate": 7.591381067205805e-06,
"loss": 1.971238613128662,
"step": 1858
},
{
"epoch": 1.1833227243793762,
"grad_norm": 11.216256085419301,
"learning_rate": 7.588212787793365e-06,
"loss": 1.4213664531707764,
"step": 1859
},
{
"epoch": 1.1839592616168046,
"grad_norm": 9.820068560590832,
"learning_rate": 7.585043088136283e-06,
"loss": 1.348130464553833,
"step": 1860
},
{
"epoch": 1.184595798854233,
"grad_norm": 15.906925938143027,
"learning_rate": 7.581871969973889e-06,
"loss": 1.6581814289093018,
"step": 1861
},
{
"epoch": 1.1852323360916615,
"grad_norm": 16.064981417373456,
"learning_rate": 7.578699435046286e-06,
"loss": 2.1315577030181885,
"step": 1862
},
{
"epoch": 1.1858688733290896,
"grad_norm": 9.032266355524344,
"learning_rate": 7.575525485094359e-06,
"loss": 2.0244717597961426,
"step": 1863
},
{
"epoch": 1.186505410566518,
"grad_norm": 9.942529375009329,
"learning_rate": 7.572350121859764e-06,
"loss": 1.2924580574035645,
"step": 1864
},
{
"epoch": 1.1871419478039464,
"grad_norm": 11.412308784951723,
"learning_rate": 7.569173347084939e-06,
"loss": 1.9106409549713135,
"step": 1865
},
{
"epoch": 1.1877784850413748,
"grad_norm": 11.939167987075383,
"learning_rate": 7.565995162513094e-06,
"loss": 1.6946731805801392,
"step": 1866
},
{
"epoch": 1.1884150222788032,
"grad_norm": 8.32850550886508,
"learning_rate": 7.562815569888211e-06,
"loss": 1.7525533437728882,
"step": 1867
},
{
"epoch": 1.1890515595162316,
"grad_norm": 13.010192788395944,
"learning_rate": 7.559634570955046e-06,
"loss": 2.2896313667297363,
"step": 1868
},
{
"epoch": 1.18968809675366,
"grad_norm": 15.350701716931825,
"learning_rate": 7.556452167459125e-06,
"loss": 1.0601682662963867,
"step": 1869
},
{
"epoch": 1.1903246339910885,
"grad_norm": 11.38436143738909,
"learning_rate": 7.5532683611467485e-06,
"loss": 1.624541997909546,
"step": 1870
},
{
"epoch": 1.1909611712285169,
"grad_norm": 19.38365810517717,
"learning_rate": 7.550083153764984e-06,
"loss": 1.4402720928192139,
"step": 1871
},
{
"epoch": 1.1915977084659453,
"grad_norm": 7.9509984357329255,
"learning_rate": 7.546896547061668e-06,
"loss": 1.712436556816101,
"step": 1872
},
{
"epoch": 1.1922342457033737,
"grad_norm": 13.016419894010188,
"learning_rate": 7.543708542785406e-06,
"loss": 1.6512826681137085,
"step": 1873
},
{
"epoch": 1.192870782940802,
"grad_norm": 16.13392114064012,
"learning_rate": 7.540519142685569e-06,
"loss": 1.245017409324646,
"step": 1874
},
{
"epoch": 1.1935073201782305,
"grad_norm": 13.50493833728495,
"learning_rate": 7.5373283485122954e-06,
"loss": 1.4898035526275635,
"step": 1875
},
{
"epoch": 1.1941438574156589,
"grad_norm": 20.855488269849733,
"learning_rate": 7.534136162016485e-06,
"loss": 3.2437474727630615,
"step": 1876
},
{
"epoch": 1.1947803946530873,
"grad_norm": 11.252751042792164,
"learning_rate": 7.530942584949807e-06,
"loss": 1.0865591764450073,
"step": 1877
},
{
"epoch": 1.1954169318905157,
"grad_norm": 13.032275349388597,
"learning_rate": 7.527747619064691e-06,
"loss": 1.626455545425415,
"step": 1878
},
{
"epoch": 1.196053469127944,
"grad_norm": 11.598490138086122,
"learning_rate": 7.524551266114328e-06,
"loss": 1.7297886610031128,
"step": 1879
},
{
"epoch": 1.1966900063653725,
"grad_norm": 10.894012669547493,
"learning_rate": 7.521353527852671e-06,
"loss": 1.2762043476104736,
"step": 1880
},
{
"epoch": 1.1973265436028009,
"grad_norm": 14.494653177716215,
"learning_rate": 7.518154406034431e-06,
"loss": 1.3345061540603638,
"step": 1881
},
{
"epoch": 1.197963080840229,
"grad_norm": 11.458183627356052,
"learning_rate": 7.514953902415083e-06,
"loss": 1.5401028394699097,
"step": 1882
},
{
"epoch": 1.1985996180776575,
"grad_norm": 6.092832911656193,
"learning_rate": 7.5117520187508575e-06,
"loss": 1.7554508447647095,
"step": 1883
},
{
"epoch": 1.1992361553150859,
"grad_norm": 11.981245755129045,
"learning_rate": 7.508548756798739e-06,
"loss": 1.2973220348358154,
"step": 1884
},
{
"epoch": 1.1998726925525143,
"grad_norm": 13.132185393244692,
"learning_rate": 7.505344118316475e-06,
"loss": 1.973362684249878,
"step": 1885
},
{
"epoch": 1.2005092297899427,
"grad_norm": 48.10276803054581,
"learning_rate": 7.5021381050625654e-06,
"loss": 1.0365291833877563,
"step": 1886
},
{
"epoch": 1.201145767027371,
"grad_norm": 16.014538047607346,
"learning_rate": 7.49893071879626e-06,
"loss": 1.9781842231750488,
"step": 1887
},
{
"epoch": 1.2017823042647995,
"grad_norm": 11.269003654897768,
"learning_rate": 7.495721961277569e-06,
"loss": 1.3941630125045776,
"step": 1888
},
{
"epoch": 1.2024188415022279,
"grad_norm": 11.591777344228708,
"learning_rate": 7.492511834267251e-06,
"loss": 1.9853066205978394,
"step": 1889
},
{
"epoch": 1.2030553787396563,
"grad_norm": 12.068802080673693,
"learning_rate": 7.489300339526817e-06,
"loss": 1.7171443700790405,
"step": 1890
},
{
"epoch": 1.2036919159770847,
"grad_norm": 22.920721006940184,
"learning_rate": 7.486087478818531e-06,
"loss": 2.167785167694092,
"step": 1891
},
{
"epoch": 1.204328453214513,
"grad_norm": 12.688299047469862,
"learning_rate": 7.4828732539054005e-06,
"loss": 1.8467473983764648,
"step": 1892
},
{
"epoch": 1.2049649904519415,
"grad_norm": 14.15099463149888,
"learning_rate": 7.479657666551188e-06,
"loss": 1.6865915060043335,
"step": 1893
},
{
"epoch": 1.20560152768937,
"grad_norm": 12.599599937093739,
"learning_rate": 7.4764407185204e-06,
"loss": 1.6907148361206055,
"step": 1894
},
{
"epoch": 1.2062380649267983,
"grad_norm": 10.347272305572426,
"learning_rate": 7.473222411578289e-06,
"loss": 1.4749281406402588,
"step": 1895
},
{
"epoch": 1.2068746021642265,
"grad_norm": 10.44069149604762,
"learning_rate": 7.47000274749086e-06,
"loss": 1.3367358446121216,
"step": 1896
},
{
"epoch": 1.2075111394016549,
"grad_norm": 18.336987946116935,
"learning_rate": 7.466781728024851e-06,
"loss": 1.7666252851486206,
"step": 1897
},
{
"epoch": 1.2081476766390833,
"grad_norm": 16.32370262672562,
"learning_rate": 7.463559354947755e-06,
"loss": 1.9802442789077759,
"step": 1898
},
{
"epoch": 1.2087842138765117,
"grad_norm": 22.8740603627606,
"learning_rate": 7.4603356300278e-06,
"loss": 2.067876100540161,
"step": 1899
},
{
"epoch": 1.20942075111394,
"grad_norm": 14.307070850765847,
"learning_rate": 7.45711055503396e-06,
"loss": 0.8940960168838501,
"step": 1900
},
{
"epoch": 1.2100572883513685,
"grad_norm": 9.175223965297342,
"learning_rate": 7.453884131735949e-06,
"loss": 1.8019193410873413,
"step": 1901
},
{
"epoch": 1.210693825588797,
"grad_norm": 15.949480275539605,
"learning_rate": 7.4506563619042205e-06,
"loss": 1.8527977466583252,
"step": 1902
},
{
"epoch": 1.2113303628262253,
"grad_norm": 9.419830167238736,
"learning_rate": 7.447427247309966e-06,
"loss": 1.4505186080932617,
"step": 1903
},
{
"epoch": 1.2119669000636537,
"grad_norm": 10.233950621374445,
"learning_rate": 7.444196789725117e-06,
"loss": 1.766714096069336,
"step": 1904
},
{
"epoch": 1.212603437301082,
"grad_norm": 8.783456450910034,
"learning_rate": 7.440964990922338e-06,
"loss": 1.0600299835205078,
"step": 1905
},
{
"epoch": 1.2132399745385105,
"grad_norm": 11.519901378517396,
"learning_rate": 7.437731852675036e-06,
"loss": 1.3673579692840576,
"step": 1906
},
{
"epoch": 1.213876511775939,
"grad_norm": 12.291262365507965,
"learning_rate": 7.434497376757347e-06,
"loss": 1.8734787702560425,
"step": 1907
},
{
"epoch": 1.2145130490133673,
"grad_norm": 10.393224808246323,
"learning_rate": 7.431261564944145e-06,
"loss": 0.9546700716018677,
"step": 1908
},
{
"epoch": 1.2151495862507957,
"grad_norm": 14.151477044961855,
"learning_rate": 7.428024419011033e-06,
"loss": 1.3177452087402344,
"step": 1909
},
{
"epoch": 1.2157861234882241,
"grad_norm": 7.269007351633296,
"learning_rate": 7.4247859407343495e-06,
"loss": 1.0580735206604004,
"step": 1910
},
{
"epoch": 1.2164226607256525,
"grad_norm": 8.089654185837398,
"learning_rate": 7.421546131891164e-06,
"loss": 1.882283329963684,
"step": 1911
},
{
"epoch": 1.217059197963081,
"grad_norm": 13.459427281395609,
"learning_rate": 7.418304994259273e-06,
"loss": 2.182218074798584,
"step": 1912
},
{
"epoch": 1.2176957352005093,
"grad_norm": 12.694279765624392,
"learning_rate": 7.415062529617208e-06,
"loss": 1.1375731229782104,
"step": 1913
},
{
"epoch": 1.2183322724379377,
"grad_norm": 10.727215231250955,
"learning_rate": 7.411818739744221e-06,
"loss": 1.3777339458465576,
"step": 1914
},
{
"epoch": 1.218968809675366,
"grad_norm": 9.732416797658074,
"learning_rate": 7.408573626420295e-06,
"loss": 1.4339038133621216,
"step": 1915
},
{
"epoch": 1.2196053469127943,
"grad_norm": 10.419394245073262,
"learning_rate": 7.405327191426142e-06,
"loss": 1.509937047958374,
"step": 1916
},
{
"epoch": 1.2202418841502227,
"grad_norm": 10.317576358758561,
"learning_rate": 7.402079436543195e-06,
"loss": 1.7624815702438354,
"step": 1917
},
{
"epoch": 1.2208784213876511,
"grad_norm": 12.729222656689798,
"learning_rate": 7.398830363553615e-06,
"loss": 1.4026525020599365,
"step": 1918
},
{
"epoch": 1.2215149586250795,
"grad_norm": 14.685361883429874,
"learning_rate": 7.3955799742402825e-06,
"loss": 1.026107907295227,
"step": 1919
},
{
"epoch": 1.222151495862508,
"grad_norm": 19.168456503917845,
"learning_rate": 7.392328270386801e-06,
"loss": 1.0268229246139526,
"step": 1920
},
{
"epoch": 1.2227880330999363,
"grad_norm": 9.54643154777968,
"learning_rate": 7.3890752537774975e-06,
"loss": 1.7518980503082275,
"step": 1921
},
{
"epoch": 1.2234245703373647,
"grad_norm": 10.395051446444308,
"learning_rate": 7.385820926197419e-06,
"loss": 1.529889464378357,
"step": 1922
},
{
"epoch": 1.2240611075747931,
"grad_norm": 18.2093731668895,
"learning_rate": 7.382565289432331e-06,
"loss": 1.613027572631836,
"step": 1923
},
{
"epoch": 1.2246976448122215,
"grad_norm": 23.82887472108906,
"learning_rate": 7.379308345268716e-06,
"loss": 1.5242559909820557,
"step": 1924
},
{
"epoch": 1.22533418204965,
"grad_norm": 13.673385908421059,
"learning_rate": 7.3760500954937765e-06,
"loss": 1.6235510110855103,
"step": 1925
},
{
"epoch": 1.2259707192870783,
"grad_norm": 9.144281260916904,
"learning_rate": 7.372790541895429e-06,
"loss": 1.1471562385559082,
"step": 1926
},
{
"epoch": 1.2266072565245068,
"grad_norm": 15.216430272604757,
"learning_rate": 7.369529686262309e-06,
"loss": 1.7793552875518799,
"step": 1927
},
{
"epoch": 1.2272437937619352,
"grad_norm": 8.894150277277527,
"learning_rate": 7.3662675303837625e-06,
"loss": 0.7938424944877625,
"step": 1928
},
{
"epoch": 1.2278803309993636,
"grad_norm": 15.264368220129498,
"learning_rate": 7.3630040760498526e-06,
"loss": 1.5442453622817993,
"step": 1929
},
{
"epoch": 1.2285168682367917,
"grad_norm": 11.649599369756318,
"learning_rate": 7.359739325051351e-06,
"loss": 1.6375675201416016,
"step": 1930
},
{
"epoch": 1.2291534054742201,
"grad_norm": 13.56701183347168,
"learning_rate": 7.356473279179743e-06,
"loss": 1.2604639530181885,
"step": 1931
},
{
"epoch": 1.2297899427116485,
"grad_norm": 12.737330913380749,
"learning_rate": 7.353205940227225e-06,
"loss": 1.1772258281707764,
"step": 1932
},
{
"epoch": 1.230426479949077,
"grad_norm": 8.291940590066453,
"learning_rate": 7.349937309986703e-06,
"loss": 1.5388436317443848,
"step": 1933
},
{
"epoch": 1.2310630171865053,
"grad_norm": 13.070371482826912,
"learning_rate": 7.3466673902517915e-06,
"loss": 1.3742997646331787,
"step": 1934
},
{
"epoch": 1.2316995544239338,
"grad_norm": 12.425396825123366,
"learning_rate": 7.34339618281681e-06,
"loss": 0.957462728023529,
"step": 1935
},
{
"epoch": 1.2323360916613622,
"grad_norm": 13.523298976937086,
"learning_rate": 7.340123689476788e-06,
"loss": 1.2024550437927246,
"step": 1936
},
{
"epoch": 1.2329726288987906,
"grad_norm": 11.391117925680897,
"learning_rate": 7.3368499120274595e-06,
"loss": 1.7216256856918335,
"step": 1937
},
{
"epoch": 1.233609166136219,
"grad_norm": 8.126973409603934,
"learning_rate": 7.333574852265261e-06,
"loss": 1.8681612014770508,
"step": 1938
},
{
"epoch": 1.2342457033736474,
"grad_norm": 9.012360390891047,
"learning_rate": 7.330298511987337e-06,
"loss": 1.4095933437347412,
"step": 1939
},
{
"epoch": 1.2348822406110758,
"grad_norm": 18.81487800771032,
"learning_rate": 7.327020892991531e-06,
"loss": 1.675499677658081,
"step": 1940
},
{
"epoch": 1.2355187778485042,
"grad_norm": 8.353732734462897,
"learning_rate": 7.32374199707639e-06,
"loss": 1.9631736278533936,
"step": 1941
},
{
"epoch": 1.2361553150859326,
"grad_norm": 11.817769020339089,
"learning_rate": 7.3204618260411606e-06,
"loss": 1.558953046798706,
"step": 1942
},
{
"epoch": 1.236791852323361,
"grad_norm": 14.333347533600476,
"learning_rate": 7.317180381685789e-06,
"loss": 1.446763515472412,
"step": 1943
},
{
"epoch": 1.2374283895607894,
"grad_norm": 11.845641737043543,
"learning_rate": 7.313897665810923e-06,
"loss": 1.7084661722183228,
"step": 1944
},
{
"epoch": 1.2380649267982178,
"grad_norm": 18.63755781167361,
"learning_rate": 7.310613680217901e-06,
"loss": 1.6522732973098755,
"step": 1945
},
{
"epoch": 1.2387014640356462,
"grad_norm": 6.604413600891718,
"learning_rate": 7.30732842670877e-06,
"loss": 1.3374499082565308,
"step": 1946
},
{
"epoch": 1.2393380012730746,
"grad_norm": 9.848894297617093,
"learning_rate": 7.304041907086262e-06,
"loss": 2.0001447200775146,
"step": 1947
},
{
"epoch": 1.239974538510503,
"grad_norm": 14.55893211755636,
"learning_rate": 7.300754123153806e-06,
"loss": 1.9936014413833618,
"step": 1948
},
{
"epoch": 1.2406110757479312,
"grad_norm": 15.810108156059623,
"learning_rate": 7.297465076715528e-06,
"loss": 2.4722843170166016,
"step": 1949
},
{
"epoch": 1.2412476129853596,
"grad_norm": 7.731696396145016,
"learning_rate": 7.294174769576243e-06,
"loss": 0.870339035987854,
"step": 1950
},
{
"epoch": 1.241884150222788,
"grad_norm": 16.16220622173208,
"learning_rate": 7.290883203541464e-06,
"loss": 1.5957900285720825,
"step": 1951
},
{
"epoch": 1.2425206874602164,
"grad_norm": 9.38622954000017,
"learning_rate": 7.287590380417389e-06,
"loss": 2.2406721115112305,
"step": 1952
},
{
"epoch": 1.2431572246976448,
"grad_norm": 11.29560243287352,
"learning_rate": 7.284296302010905e-06,
"loss": 1.4800920486450195,
"step": 1953
},
{
"epoch": 1.2437937619350732,
"grad_norm": 8.933884439359806,
"learning_rate": 7.281000970129593e-06,
"loss": 1.5932163000106812,
"step": 1954
},
{
"epoch": 1.2444302991725016,
"grad_norm": 12.318508004136184,
"learning_rate": 7.277704386581716e-06,
"loss": 1.2271223068237305,
"step": 1955
},
{
"epoch": 1.24506683640993,
"grad_norm": 12.675989142462607,
"learning_rate": 7.274406553176232e-06,
"loss": 1.360648512840271,
"step": 1956
},
{
"epoch": 1.2457033736473584,
"grad_norm": 15.242853964555625,
"learning_rate": 7.271107471722776e-06,
"loss": 1.1471834182739258,
"step": 1957
},
{
"epoch": 1.2463399108847868,
"grad_norm": 17.277403268443276,
"learning_rate": 7.267807144031671e-06,
"loss": 1.5737574100494385,
"step": 1958
},
{
"epoch": 1.2469764481222152,
"grad_norm": 15.151191106733325,
"learning_rate": 7.264505571913927e-06,
"loss": 1.2921802997589111,
"step": 1959
},
{
"epoch": 1.2476129853596436,
"grad_norm": 7.124277759919316,
"learning_rate": 7.2612027571812335e-06,
"loss": 1.5414897203445435,
"step": 1960
},
{
"epoch": 1.248249522597072,
"grad_norm": 7.852127681955942,
"learning_rate": 7.257898701645962e-06,
"loss": 1.8702853918075562,
"step": 1961
},
{
"epoch": 1.2488860598345004,
"grad_norm": 9.455603758918162,
"learning_rate": 7.2545934071211675e-06,
"loss": 1.3539278507232666,
"step": 1962
},
{
"epoch": 1.2495225970719286,
"grad_norm": 14.359858074624915,
"learning_rate": 7.25128687542058e-06,
"loss": 0.6792780160903931,
"step": 1963
},
{
"epoch": 1.250159134309357,
"grad_norm": 8.643654455985653,
"learning_rate": 7.247979108358615e-06,
"loss": 1.5339438915252686,
"step": 1964
},
{
"epoch": 1.2507956715467854,
"grad_norm": 10.896720766576127,
"learning_rate": 7.244670107750358e-06,
"loss": 1.652394413948059,
"step": 1965
},
{
"epoch": 1.2514322087842138,
"grad_norm": 13.721184303942685,
"learning_rate": 7.2413598754115785e-06,
"loss": 1.497282862663269,
"step": 1966
},
{
"epoch": 1.2520687460216422,
"grad_norm": 9.695875545347585,
"learning_rate": 7.238048413158718e-06,
"loss": 1.332318663597107,
"step": 1967
},
{
"epoch": 1.2527052832590706,
"grad_norm": 8.380761797235664,
"learning_rate": 7.234735722808895e-06,
"loss": 1.8001642227172852,
"step": 1968
},
{
"epoch": 1.253341820496499,
"grad_norm": 9.735036157652448,
"learning_rate": 7.231421806179899e-06,
"loss": 1.1970624923706055,
"step": 1969
},
{
"epoch": 1.2539783577339274,
"grad_norm": 13.537763020398728,
"learning_rate": 7.228106665090196e-06,
"loss": 1.8446029424667358,
"step": 1970
},
{
"epoch": 1.2546148949713558,
"grad_norm": 18.120197954506995,
"learning_rate": 7.22479030135892e-06,
"loss": 1.7946934700012207,
"step": 1971
},
{
"epoch": 1.2552514322087842,
"grad_norm": 10.984008655652975,
"learning_rate": 7.221472716805881e-06,
"loss": 2.002692461013794,
"step": 1972
},
{
"epoch": 1.2558879694462126,
"grad_norm": 14.064546620525391,
"learning_rate": 7.218153913251553e-06,
"loss": 1.6635444164276123,
"step": 1973
},
{
"epoch": 1.256524506683641,
"grad_norm": 11.49170384270617,
"learning_rate": 7.214833892517084e-06,
"loss": 2.171565294265747,
"step": 1974
},
{
"epoch": 1.2571610439210694,
"grad_norm": 12.582796268957058,
"learning_rate": 7.211512656424287e-06,
"loss": 1.2512736320495605,
"step": 1975
},
{
"epoch": 1.2577975811584978,
"grad_norm": 10.677004325528,
"learning_rate": 7.208190206795641e-06,
"loss": 1.4064788818359375,
"step": 1976
},
{
"epoch": 1.2584341183959262,
"grad_norm": 10.900151676996787,
"learning_rate": 7.2048665454542954e-06,
"loss": 1.9992822408676147,
"step": 1977
},
{
"epoch": 1.2590706556333546,
"grad_norm": 17.393340138680646,
"learning_rate": 7.2015416742240595e-06,
"loss": 3.2122464179992676,
"step": 1978
},
{
"epoch": 1.259707192870783,
"grad_norm": 15.261897561606352,
"learning_rate": 7.19821559492941e-06,
"loss": 2.054863214492798,
"step": 1979
},
{
"epoch": 1.2603437301082114,
"grad_norm": 15.896242229019826,
"learning_rate": 7.194888309395486e-06,
"loss": 1.4458142518997192,
"step": 1980
},
{
"epoch": 1.2609802673456398,
"grad_norm": 11.826879293543872,
"learning_rate": 7.191559819448086e-06,
"loss": 1.6558723449707031,
"step": 1981
},
{
"epoch": 1.2616168045830682,
"grad_norm": 12.788160019710507,
"learning_rate": 7.188230126913671e-06,
"loss": 1.4784185886383057,
"step": 1982
},
{
"epoch": 1.2622533418204964,
"grad_norm": 16.074729863093676,
"learning_rate": 7.184899233619362e-06,
"loss": 1.4221988916397095,
"step": 1983
},
{
"epoch": 1.2628898790579248,
"grad_norm": 8.801890393952833,
"learning_rate": 7.181567141392941e-06,
"loss": 1.517238736152649,
"step": 1984
},
{
"epoch": 1.2635264162953532,
"grad_norm": 10.54471122109457,
"learning_rate": 7.178233852062844e-06,
"loss": 1.365362524986267,
"step": 1985
},
{
"epoch": 1.2641629535327816,
"grad_norm": 11.962413012543848,
"learning_rate": 7.174899367458166e-06,
"loss": 1.3890221118927002,
"step": 1986
},
{
"epoch": 1.26479949077021,
"grad_norm": 25.494484636850306,
"learning_rate": 7.171563689408657e-06,
"loss": 1.5127917528152466,
"step": 1987
},
{
"epoch": 1.2654360280076384,
"grad_norm": 12.67963301824009,
"learning_rate": 7.168226819744723e-06,
"loss": 1.737844467163086,
"step": 1988
},
{
"epoch": 1.2660725652450668,
"grad_norm": 8.567371030578066,
"learning_rate": 7.1648887602974234e-06,
"loss": 1.6531749963760376,
"step": 1989
},
{
"epoch": 1.2667091024824952,
"grad_norm": 8.328964482464091,
"learning_rate": 7.161549512898472e-06,
"loss": 2.115635395050049,
"step": 1990
},
{
"epoch": 1.2673456397199236,
"grad_norm": 9.676401838353584,
"learning_rate": 7.1582090793802305e-06,
"loss": 1.2644509077072144,
"step": 1991
},
{
"epoch": 1.267982176957352,
"grad_norm": 12.357088545404718,
"learning_rate": 7.154867461575715e-06,
"loss": 1.1090812683105469,
"step": 1992
},
{
"epoch": 1.2686187141947805,
"grad_norm": 11.789160247345793,
"learning_rate": 7.151524661318591e-06,
"loss": 1.8918412923812866,
"step": 1993
},
{
"epoch": 1.2692552514322089,
"grad_norm": 12.369888357071828,
"learning_rate": 7.14818068044317e-06,
"loss": 1.4245556592941284,
"step": 1994
},
{
"epoch": 1.2698917886696373,
"grad_norm": 9.486321675919198,
"learning_rate": 7.144835520784416e-06,
"loss": 1.4290460348129272,
"step": 1995
},
{
"epoch": 1.2705283259070654,
"grad_norm": 7.9313436358179095,
"learning_rate": 7.141489184177934e-06,
"loss": 1.4305641651153564,
"step": 1996
},
{
"epoch": 1.2711648631444938,
"grad_norm": 15.104715008058536,
"learning_rate": 7.138141672459984e-06,
"loss": 1.949066400527954,
"step": 1997
},
{
"epoch": 1.2718014003819222,
"grad_norm": 15.101458585570494,
"learning_rate": 7.13479298746746e-06,
"loss": 1.6702244281768799,
"step": 1998
},
{
"epoch": 1.2724379376193506,
"grad_norm": 21.074675537203603,
"learning_rate": 7.131443131037906e-06,
"loss": 0.8868348598480225,
"step": 1999
},
{
"epoch": 1.273074474856779,
"grad_norm": 10.140580524034965,
"learning_rate": 7.128092105009509e-06,
"loss": 1.5796661376953125,
"step": 2000
},
{
"epoch": 1.2737110120942075,
"grad_norm": 12.149644745921481,
"learning_rate": 7.124739911221094e-06,
"loss": 1.8856110572814941,
"step": 2001
},
{
"epoch": 1.2743475493316359,
"grad_norm": 17.78423658720528,
"learning_rate": 7.1213865515121315e-06,
"loss": 1.5670487880706787,
"step": 2002
},
{
"epoch": 1.2749840865690643,
"grad_norm": 13.651830225927696,
"learning_rate": 7.118032027722729e-06,
"loss": 1.7737964391708374,
"step": 2003
},
{
"epoch": 1.2756206238064927,
"grad_norm": 9.323624641875844,
"learning_rate": 7.114676341693633e-06,
"loss": 1.735838770866394,
"step": 2004
},
{
"epoch": 1.276257161043921,
"grad_norm": 15.74868714614688,
"learning_rate": 7.111319495266228e-06,
"loss": 1.7061078548431396,
"step": 2005
},
{
"epoch": 1.2768936982813495,
"grad_norm": 14.577106488605871,
"learning_rate": 7.107961490282535e-06,
"loss": 1.8450149297714233,
"step": 2006
},
{
"epoch": 1.2775302355187779,
"grad_norm": 11.567185054926867,
"learning_rate": 7.104602328585213e-06,
"loss": 1.1935040950775146,
"step": 2007
},
{
"epoch": 1.2781667727562063,
"grad_norm": 29.766723610848548,
"learning_rate": 7.101242012017551e-06,
"loss": 3.633208751678467,
"step": 2008
},
{
"epoch": 1.2788033099936347,
"grad_norm": 12.374153481618576,
"learning_rate": 7.097880542423477e-06,
"loss": 1.8976576328277588,
"step": 2009
},
{
"epoch": 1.279439847231063,
"grad_norm": 9.798275521758873,
"learning_rate": 7.094517921647547e-06,
"loss": 1.4608005285263062,
"step": 2010
},
{
"epoch": 1.2800763844684915,
"grad_norm": 11.77683814782217,
"learning_rate": 7.091154151534953e-06,
"loss": 1.8218762874603271,
"step": 2011
},
{
"epoch": 1.2807129217059199,
"grad_norm": 7.679624917182098,
"learning_rate": 7.087789233931514e-06,
"loss": 1.4904704093933105,
"step": 2012
},
{
"epoch": 1.2813494589433483,
"grad_norm": 9.850412372734413,
"learning_rate": 7.08442317068368e-06,
"loss": 1.1225043535232544,
"step": 2013
},
{
"epoch": 1.2819859961807767,
"grad_norm": 22.3080237103038,
"learning_rate": 7.081055963638533e-06,
"loss": 1.6578782796859741,
"step": 2014
},
{
"epoch": 1.282622533418205,
"grad_norm": 23.354108651261082,
"learning_rate": 7.077687614643778e-06,
"loss": 1.5169906616210938,
"step": 2015
},
{
"epoch": 1.2832590706556333,
"grad_norm": 11.337491078888114,
"learning_rate": 7.074318125547745e-06,
"loss": 2.2514548301696777,
"step": 2016
},
{
"epoch": 1.2838956078930617,
"grad_norm": 11.375855346550463,
"learning_rate": 7.070947498199396e-06,
"loss": 1.5008771419525146,
"step": 2017
},
{
"epoch": 1.28453214513049,
"grad_norm": 9.429966713722655,
"learning_rate": 7.067575734448315e-06,
"loss": 1.3150955438613892,
"step": 2018
},
{
"epoch": 1.2851686823679185,
"grad_norm": 14.907133668929445,
"learning_rate": 7.064202836144707e-06,
"loss": 1.8760895729064941,
"step": 2019
},
{
"epoch": 1.2858052196053469,
"grad_norm": 9.708287072038424,
"learning_rate": 7.060828805139402e-06,
"loss": 1.8177990913391113,
"step": 2020
},
{
"epoch": 1.2864417568427753,
"grad_norm": 17.331700123350217,
"learning_rate": 7.057453643283851e-06,
"loss": 1.7386019229888916,
"step": 2021
},
{
"epoch": 1.2870782940802037,
"grad_norm": 11.728116844052103,
"learning_rate": 7.0540773524301275e-06,
"loss": 1.9298803806304932,
"step": 2022
},
{
"epoch": 1.287714831317632,
"grad_norm": 10.258038328070816,
"learning_rate": 7.0506999344309205e-06,
"loss": 1.8520281314849854,
"step": 2023
},
{
"epoch": 1.2883513685550605,
"grad_norm": 13.236169943162205,
"learning_rate": 7.04732139113954e-06,
"loss": 1.4344263076782227,
"step": 2024
},
{
"epoch": 1.288987905792489,
"grad_norm": 27.691402197217176,
"learning_rate": 7.043941724409915e-06,
"loss": 1.9201428890228271,
"step": 2025
},
{
"epoch": 1.2896244430299173,
"grad_norm": 9.082853874702812,
"learning_rate": 7.040560936096588e-06,
"loss": 1.872815728187561,
"step": 2026
},
{
"epoch": 1.2902609802673457,
"grad_norm": 9.519775269621894,
"learning_rate": 7.037179028054716e-06,
"loss": 1.6231926679611206,
"step": 2027
},
{
"epoch": 1.290897517504774,
"grad_norm": 12.265492071269582,
"learning_rate": 7.0337960021400755e-06,
"loss": 1.675493836402893,
"step": 2028
},
{
"epoch": 1.2915340547422023,
"grad_norm": 8.0998711496812,
"learning_rate": 7.030411860209052e-06,
"loss": 1.3420138359069824,
"step": 2029
},
{
"epoch": 1.2921705919796307,
"grad_norm": 14.83520231632638,
"learning_rate": 7.027026604118645e-06,
"loss": 1.67080557346344,
"step": 2030
},
{
"epoch": 1.292807129217059,
"grad_norm": 8.11513909549515,
"learning_rate": 7.023640235726467e-06,
"loss": 1.672710657119751,
"step": 2031
},
{
"epoch": 1.2934436664544875,
"grad_norm": 15.093764989824638,
"learning_rate": 7.020252756890736e-06,
"loss": 2.1060738563537598,
"step": 2032
},
{
"epoch": 1.294080203691916,
"grad_norm": 15.423871848235027,
"learning_rate": 7.016864169470284e-06,
"loss": 1.3418937921524048,
"step": 2033
},
{
"epoch": 1.2947167409293443,
"grad_norm": 12.45850837138449,
"learning_rate": 7.01347447532455e-06,
"loss": 1.6437814235687256,
"step": 2034
},
{
"epoch": 1.2953532781667727,
"grad_norm": 9.53408634455495,
"learning_rate": 7.01008367631358e-06,
"loss": 1.5858882665634155,
"step": 2035
},
{
"epoch": 1.295989815404201,
"grad_norm": 15.835859764318025,
"learning_rate": 7.006691774298025e-06,
"loss": 1.6233675479888916,
"step": 2036
},
{
"epoch": 1.2966263526416295,
"grad_norm": 13.247317583251935,
"learning_rate": 7.003298771139144e-06,
"loss": 2.148451328277588,
"step": 2037
},
{
"epoch": 1.297262889879058,
"grad_norm": 9.240344580069769,
"learning_rate": 6.999904668698799e-06,
"loss": 1.329669713973999,
"step": 2038
},
{
"epoch": 1.2978994271164863,
"grad_norm": 11.93373973334314,
"learning_rate": 6.996509468839453e-06,
"loss": 1.0661579370498657,
"step": 2039
},
{
"epoch": 1.2985359643539147,
"grad_norm": 11.492354390856546,
"learning_rate": 6.9931131734241766e-06,
"loss": 1.4517229795455933,
"step": 2040
},
{
"epoch": 1.2991725015913431,
"grad_norm": 8.709574489210635,
"learning_rate": 6.989715784316635e-06,
"loss": 1.35374116897583,
"step": 2041
},
{
"epoch": 1.2998090388287715,
"grad_norm": 15.839168012300563,
"learning_rate": 6.986317303381098e-06,
"loss": 1.1875379085540771,
"step": 2042
},
{
"epoch": 1.3004455760662,
"grad_norm": 12.002009511836185,
"learning_rate": 6.982917732482434e-06,
"loss": 1.3147296905517578,
"step": 2043
},
{
"epoch": 1.3010821133036283,
"grad_norm": 12.279551382216725,
"learning_rate": 6.979517073486107e-06,
"loss": 1.1447714567184448,
"step": 2044
},
{
"epoch": 1.3017186505410567,
"grad_norm": 7.876509462548768,
"learning_rate": 6.9761153282581804e-06,
"loss": 1.0903241634368896,
"step": 2045
},
{
"epoch": 1.3023551877784851,
"grad_norm": 11.317979083182502,
"learning_rate": 6.972712498665315e-06,
"loss": 1.6404948234558105,
"step": 2046
},
{
"epoch": 1.3029917250159135,
"grad_norm": 10.268270285673013,
"learning_rate": 6.969308586574763e-06,
"loss": 1.132559061050415,
"step": 2047
},
{
"epoch": 1.303628262253342,
"grad_norm": 10.786154234965135,
"learning_rate": 6.965903593854372e-06,
"loss": 1.2636125087738037,
"step": 2048
},
{
"epoch": 1.3042647994907703,
"grad_norm": 8.923504311774622,
"learning_rate": 6.962497522372584e-06,
"loss": 1.4736952781677246,
"step": 2049
},
{
"epoch": 1.3049013367281985,
"grad_norm": 8.6728743118788,
"learning_rate": 6.959090373998431e-06,
"loss": 1.9195170402526855,
"step": 2050
},
{
"epoch": 1.305537873965627,
"grad_norm": 7.665220601241584,
"learning_rate": 6.955682150601538e-06,
"loss": 1.4942491054534912,
"step": 2051
},
{
"epoch": 1.3061744112030553,
"grad_norm": 13.84671648780625,
"learning_rate": 6.9522728540521166e-06,
"loss": 2.2120413780212402,
"step": 2052
},
{
"epoch": 1.3068109484404837,
"grad_norm": 10.970140333694308,
"learning_rate": 6.9488624862209716e-06,
"loss": 1.000679850578308,
"step": 2053
},
{
"epoch": 1.3074474856779121,
"grad_norm": 14.96466821479489,
"learning_rate": 6.945451048979492e-06,
"loss": 0.9493977427482605,
"step": 2054
},
{
"epoch": 1.3080840229153405,
"grad_norm": 10.075101987656128,
"learning_rate": 6.9420385441996565e-06,
"loss": 1.9519625902175903,
"step": 2055
},
{
"epoch": 1.308720560152769,
"grad_norm": 10.501008247507887,
"learning_rate": 6.938624973754027e-06,
"loss": 0.9726603031158447,
"step": 2056
},
{
"epoch": 1.3093570973901973,
"grad_norm": 22.55639172223488,
"learning_rate": 6.93521033951575e-06,
"loss": 1.9709603786468506,
"step": 2057
},
{
"epoch": 1.3099936346276257,
"grad_norm": 9.24041575267706,
"learning_rate": 6.93179464335856e-06,
"loss": 1.548864722251892,
"step": 2058
},
{
"epoch": 1.3106301718650541,
"grad_norm": 10.444488265025456,
"learning_rate": 6.92837788715677e-06,
"loss": 1.6025577783584595,
"step": 2059
},
{
"epoch": 1.3112667091024826,
"grad_norm": 12.361006266083994,
"learning_rate": 6.924960072785274e-06,
"loss": 0.9072170257568359,
"step": 2060
},
{
"epoch": 1.311903246339911,
"grad_norm": 10.824248532783093,
"learning_rate": 6.921541202119552e-06,
"loss": 1.682476282119751,
"step": 2061
},
{
"epoch": 1.3125397835773391,
"grad_norm": 9.488210016660991,
"learning_rate": 6.918121277035657e-06,
"loss": 1.2035472393035889,
"step": 2062
},
{
"epoch": 1.3131763208147675,
"grad_norm": 8.83075244244878,
"learning_rate": 6.914700299410226e-06,
"loss": 1.960437536239624,
"step": 2063
},
{
"epoch": 1.313812858052196,
"grad_norm": 9.35789480959845,
"learning_rate": 6.9112782711204725e-06,
"loss": 1.5221984386444092,
"step": 2064
},
{
"epoch": 1.3144493952896243,
"grad_norm": 11.0167554457926,
"learning_rate": 6.9078551940441815e-06,
"loss": 1.681092619895935,
"step": 2065
},
{
"epoch": 1.3150859325270527,
"grad_norm": 8.03885310323271,
"learning_rate": 6.904431070059723e-06,
"loss": 1.3990423679351807,
"step": 2066
},
{
"epoch": 1.3157224697644812,
"grad_norm": 10.290753111505369,
"learning_rate": 6.901005901046031e-06,
"loss": 2.1758856773376465,
"step": 2067
},
{
"epoch": 1.3163590070019096,
"grad_norm": 14.92561642195631,
"learning_rate": 6.897579688882618e-06,
"loss": 1.563002586364746,
"step": 2068
},
{
"epoch": 1.316995544239338,
"grad_norm": 19.336800974196322,
"learning_rate": 6.894152435449572e-06,
"loss": 1.6746331453323364,
"step": 2069
},
{
"epoch": 1.3176320814767664,
"grad_norm": 12.397856385646229,
"learning_rate": 6.890724142627548e-06,
"loss": 1.199541687965393,
"step": 2070
},
{
"epoch": 1.3182686187141948,
"grad_norm": 11.45353796465258,
"learning_rate": 6.887294812297771e-06,
"loss": 1.6930968761444092,
"step": 2071
},
{
"epoch": 1.3189051559516232,
"grad_norm": 18.97514851516703,
"learning_rate": 6.883864446342036e-06,
"loss": 1.544563889503479,
"step": 2072
},
{
"epoch": 1.3195416931890516,
"grad_norm": 11.764993353068165,
"learning_rate": 6.880433046642712e-06,
"loss": 1.5668516159057617,
"step": 2073
},
{
"epoch": 1.32017823042648,
"grad_norm": 18.634138519650218,
"learning_rate": 6.877000615082726e-06,
"loss": 1.6925992965698242,
"step": 2074
},
{
"epoch": 1.3208147676639084,
"grad_norm": 10.291932073028669,
"learning_rate": 6.873567153545576e-06,
"loss": 1.4353834390640259,
"step": 2075
},
{
"epoch": 1.3214513049013368,
"grad_norm": 13.752283678046844,
"learning_rate": 6.870132663915328e-06,
"loss": 1.4297081232070923,
"step": 2076
},
{
"epoch": 1.3220878421387652,
"grad_norm": 10.23322896339913,
"learning_rate": 6.866697148076604e-06,
"loss": 1.1460442543029785,
"step": 2077
},
{
"epoch": 1.3227243793761936,
"grad_norm": 9.359470853685485,
"learning_rate": 6.863260607914597e-06,
"loss": 1.9953954219818115,
"step": 2078
},
{
"epoch": 1.323360916613622,
"grad_norm": 12.755737197521405,
"learning_rate": 6.859823045315059e-06,
"loss": 1.7688608169555664,
"step": 2079
},
{
"epoch": 1.3239974538510504,
"grad_norm": 9.255344407283879,
"learning_rate": 6.856384462164304e-06,
"loss": 1.6144315004348755,
"step": 2080
},
{
"epoch": 1.3246339910884788,
"grad_norm": 9.951025825254108,
"learning_rate": 6.8529448603492035e-06,
"loss": 1.6852055788040161,
"step": 2081
},
{
"epoch": 1.3252705283259072,
"grad_norm": 10.053643668777605,
"learning_rate": 6.84950424175719e-06,
"loss": 1.4977197647094727,
"step": 2082
},
{
"epoch": 1.3259070655633354,
"grad_norm": 14.38622341198003,
"learning_rate": 6.846062608276254e-06,
"loss": 1.5500648021697998,
"step": 2083
},
{
"epoch": 1.3265436028007638,
"grad_norm": 11.612784911737373,
"learning_rate": 6.842619961794943e-06,
"loss": 1.296177864074707,
"step": 2084
},
{
"epoch": 1.3271801400381922,
"grad_norm": 10.073476173831192,
"learning_rate": 6.839176304202357e-06,
"loss": 0.49733754992485046,
"step": 2085
},
{
"epoch": 1.3278166772756206,
"grad_norm": 8.403967414605898,
"learning_rate": 6.835731637388158e-06,
"loss": 1.5261521339416504,
"step": 2086
},
{
"epoch": 1.328453214513049,
"grad_norm": 14.621238975703363,
"learning_rate": 6.8322859632425545e-06,
"loss": 1.2459487915039062,
"step": 2087
},
{
"epoch": 1.3290897517504774,
"grad_norm": 9.749969165926782,
"learning_rate": 6.828839283656311e-06,
"loss": 1.4671571254730225,
"step": 2088
},
{
"epoch": 1.3297262889879058,
"grad_norm": 13.994804517959674,
"learning_rate": 6.825391600520746e-06,
"loss": 1.8795738220214844,
"step": 2089
},
{
"epoch": 1.3303628262253342,
"grad_norm": 15.225403118861353,
"learning_rate": 6.821942915727721e-06,
"loss": 1.9147197008132935,
"step": 2090
},
{
"epoch": 1.3309993634627626,
"grad_norm": 12.529091804970822,
"learning_rate": 6.818493231169659e-06,
"loss": 1.5957629680633545,
"step": 2091
},
{
"epoch": 1.331635900700191,
"grad_norm": 21.73659702613509,
"learning_rate": 6.81504254873952e-06,
"loss": 1.6237444877624512,
"step": 2092
},
{
"epoch": 1.3322724379376194,
"grad_norm": 16.648087289267234,
"learning_rate": 6.8115908703308175e-06,
"loss": 2.1958940029144287,
"step": 2093
},
{
"epoch": 1.3329089751750478,
"grad_norm": 14.81116786339602,
"learning_rate": 6.808138197837613e-06,
"loss": 2.1454992294311523,
"step": 2094
},
{
"epoch": 1.3335455124124762,
"grad_norm": 11.479577307549393,
"learning_rate": 6.804684533154506e-06,
"loss": 1.1923106908798218,
"step": 2095
},
{
"epoch": 1.3341820496499044,
"grad_norm": 28.788853303774175,
"learning_rate": 6.801229878176652e-06,
"loss": 3.3707993030548096,
"step": 2096
},
{
"epoch": 1.3348185868873328,
"grad_norm": 10.576484877317576,
"learning_rate": 6.797774234799739e-06,
"loss": 2.0627214908599854,
"step": 2097
},
{
"epoch": 1.3354551241247612,
"grad_norm": 13.257095358800573,
"learning_rate": 6.79431760492e-06,
"loss": 2.7560529708862305,
"step": 2098
},
{
"epoch": 1.3360916613621896,
"grad_norm": 14.989501136818301,
"learning_rate": 6.790859990434217e-06,
"loss": 1.19850492477417,
"step": 2099
},
{
"epoch": 1.336728198599618,
"grad_norm": 9.018102816625172,
"learning_rate": 6.787401393239701e-06,
"loss": 1.2823089361190796,
"step": 2100
},
{
"epoch": 1.3373647358370464,
"grad_norm": 7.995870948103689,
"learning_rate": 6.783941815234311e-06,
"loss": 1.873117446899414,
"step": 2101
},
{
"epoch": 1.3380012730744748,
"grad_norm": 7.519059288647954,
"learning_rate": 6.780481258316438e-06,
"loss": 1.185487985610962,
"step": 2102
},
{
"epoch": 1.3386378103119032,
"grad_norm": 17.16417414265568,
"learning_rate": 6.777019724385014e-06,
"loss": 1.7703580856323242,
"step": 2103
},
{
"epoch": 1.3392743475493316,
"grad_norm": 10.6109152574734,
"learning_rate": 6.773557215339508e-06,
"loss": 2.2608628273010254,
"step": 2104
},
{
"epoch": 1.33991088478676,
"grad_norm": 16.146306015694844,
"learning_rate": 6.770093733079919e-06,
"loss": 1.4796688556671143,
"step": 2105
},
{
"epoch": 1.3405474220241884,
"grad_norm": 15.417553824160835,
"learning_rate": 6.766629279506786e-06,
"loss": 1.3636486530303955,
"step": 2106
},
{
"epoch": 1.3411839592616168,
"grad_norm": 9.80590059967913,
"learning_rate": 6.763163856521178e-06,
"loss": 2.326904058456421,
"step": 2107
},
{
"epoch": 1.3418204964990452,
"grad_norm": 11.308190471866444,
"learning_rate": 6.7596974660246925e-06,
"loss": 1.6813642978668213,
"step": 2108
},
{
"epoch": 1.3424570337364736,
"grad_norm": 8.214793086651406,
"learning_rate": 6.7562301099194675e-06,
"loss": 2.079240083694458,
"step": 2109
},
{
"epoch": 1.343093570973902,
"grad_norm": 7.190351936622931,
"learning_rate": 6.75276179010816e-06,
"loss": 1.669417142868042,
"step": 2110
},
{
"epoch": 1.3437301082113304,
"grad_norm": 7.714811460919819,
"learning_rate": 6.749292508493962e-06,
"loss": 1.6795430183410645,
"step": 2111
},
{
"epoch": 1.3443666454487588,
"grad_norm": 9.850212126655698,
"learning_rate": 6.745822266980593e-06,
"loss": 1.6036384105682373,
"step": 2112
},
{
"epoch": 1.3450031826861872,
"grad_norm": 12.67826523216415,
"learning_rate": 6.742351067472297e-06,
"loss": 1.8061586618423462,
"step": 2113
},
{
"epoch": 1.3456397199236156,
"grad_norm": 9.403740555012115,
"learning_rate": 6.738878911873846e-06,
"loss": 1.4865294694900513,
"step": 2114
},
{
"epoch": 1.346276257161044,
"grad_norm": 9.326954129628175,
"learning_rate": 6.735405802090536e-06,
"loss": 1.6607012748718262,
"step": 2115
},
{
"epoch": 1.3469127943984724,
"grad_norm": 9.102535065466336,
"learning_rate": 6.731931740028184e-06,
"loss": 1.8555759191513062,
"step": 2116
},
{
"epoch": 1.3475493316359006,
"grad_norm": 8.085776555039383,
"learning_rate": 6.728456727593136e-06,
"loss": 1.4615516662597656,
"step": 2117
},
{
"epoch": 1.348185868873329,
"grad_norm": 8.35552783607606,
"learning_rate": 6.72498076669225e-06,
"loss": 1.8943145275115967,
"step": 2118
},
{
"epoch": 1.3488224061107574,
"grad_norm": 11.708244885366367,
"learning_rate": 6.7215038592329125e-06,
"loss": 1.4016022682189941,
"step": 2119
},
{
"epoch": 1.3494589433481858,
"grad_norm": 8.840274040091456,
"learning_rate": 6.718026007123026e-06,
"loss": 1.2076852321624756,
"step": 2120
},
{
"epoch": 1.3500954805856142,
"grad_norm": 8.43847580334136,
"learning_rate": 6.714547212271012e-06,
"loss": 1.924154281616211,
"step": 2121
},
{
"epoch": 1.3507320178230426,
"grad_norm": 18.779962057283058,
"learning_rate": 6.711067476585811e-06,
"loss": 1.948960304260254,
"step": 2122
},
{
"epoch": 1.351368555060471,
"grad_norm": 12.094242164729383,
"learning_rate": 6.707586801976873e-06,
"loss": 2.12758469581604,
"step": 2123
},
{
"epoch": 1.3520050922978994,
"grad_norm": 10.368550448608548,
"learning_rate": 6.7041051903541744e-06,
"loss": 1.8979034423828125,
"step": 2124
},
{
"epoch": 1.3526416295353278,
"grad_norm": 13.188567587338362,
"learning_rate": 6.700622643628196e-06,
"loss": 2.141956329345703,
"step": 2125
},
{
"epoch": 1.3532781667727563,
"grad_norm": 10.984995851433844,
"learning_rate": 6.697139163709936e-06,
"loss": 1.3657575845718384,
"step": 2126
},
{
"epoch": 1.3539147040101847,
"grad_norm": 11.289468170136036,
"learning_rate": 6.693654752510905e-06,
"loss": 1.7047542333602905,
"step": 2127
},
{
"epoch": 1.354551241247613,
"grad_norm": 12.636042984409194,
"learning_rate": 6.690169411943124e-06,
"loss": 1.6948938369750977,
"step": 2128
},
{
"epoch": 1.3551877784850412,
"grad_norm": 15.027155188825471,
"learning_rate": 6.686683143919119e-06,
"loss": 1.554497480392456,
"step": 2129
},
{
"epoch": 1.3558243157224696,
"grad_norm": 17.482166423106033,
"learning_rate": 6.683195950351937e-06,
"loss": 1.5829187631607056,
"step": 2130
},
{
"epoch": 1.356460852959898,
"grad_norm": 8.993795361126464,
"learning_rate": 6.679707833155119e-06,
"loss": 1.196561574935913,
"step": 2131
},
{
"epoch": 1.3570973901973264,
"grad_norm": 10.224631073276461,
"learning_rate": 6.676218794242724e-06,
"loss": 1.5029449462890625,
"step": 2132
},
{
"epoch": 1.3577339274347549,
"grad_norm": 13.622151145333707,
"learning_rate": 6.6727288355293085e-06,
"loss": 1.1759300231933594,
"step": 2133
},
{
"epoch": 1.3583704646721833,
"grad_norm": 13.787038924100134,
"learning_rate": 6.669237958929939e-06,
"loss": 1.5236375331878662,
"step": 2134
},
{
"epoch": 1.3590070019096117,
"grad_norm": 9.50872271068457,
"learning_rate": 6.665746166360185e-06,
"loss": 1.835353136062622,
"step": 2135
},
{
"epoch": 1.35964353914704,
"grad_norm": 8.86326023305628,
"learning_rate": 6.662253459736114e-06,
"loss": 1.880853533744812,
"step": 2136
},
{
"epoch": 1.3602800763844685,
"grad_norm": 12.350598605749058,
"learning_rate": 6.658759840974303e-06,
"loss": 2.347693681716919,
"step": 2137
},
{
"epoch": 1.3609166136218969,
"grad_norm": 10.603024134542418,
"learning_rate": 6.655265311991822e-06,
"loss": 1.2131357192993164,
"step": 2138
},
{
"epoch": 1.3615531508593253,
"grad_norm": 17.34616083937125,
"learning_rate": 6.651769874706245e-06,
"loss": 1.8799031972885132,
"step": 2139
},
{
"epoch": 1.3621896880967537,
"grad_norm": 12.15157410218963,
"learning_rate": 6.6482735310356425e-06,
"loss": 1.9390839338302612,
"step": 2140
},
{
"epoch": 1.362826225334182,
"grad_norm": 20.42026972304762,
"learning_rate": 6.644776282898584e-06,
"loss": 1.3343855142593384,
"step": 2141
},
{
"epoch": 1.3634627625716105,
"grad_norm": 13.438106280833992,
"learning_rate": 6.641278132214133e-06,
"loss": 1.5200254917144775,
"step": 2142
},
{
"epoch": 1.3640992998090389,
"grad_norm": 6.413930253727851,
"learning_rate": 6.637779080901851e-06,
"loss": 1.2077040672302246,
"step": 2143
},
{
"epoch": 1.3647358370464673,
"grad_norm": 9.180870332071997,
"learning_rate": 6.63427913088179e-06,
"loss": 2.02775239944458,
"step": 2144
},
{
"epoch": 1.3653723742838957,
"grad_norm": 11.304053679491917,
"learning_rate": 6.6307782840745e-06,
"loss": 1.7007734775543213,
"step": 2145
},
{
"epoch": 1.366008911521324,
"grad_norm": 8.518756502344205,
"learning_rate": 6.627276542401017e-06,
"loss": 1.2333950996398926,
"step": 2146
},
{
"epoch": 1.3666454487587525,
"grad_norm": 12.418587258954801,
"learning_rate": 6.623773907782876e-06,
"loss": 1.3447022438049316,
"step": 2147
},
{
"epoch": 1.367281985996181,
"grad_norm": 9.194698166497082,
"learning_rate": 6.6202703821420935e-06,
"loss": 1.5185067653656006,
"step": 2148
},
{
"epoch": 1.3679185232336093,
"grad_norm": 8.879987789360879,
"learning_rate": 6.61676596740118e-06,
"loss": 1.0394394397735596,
"step": 2149
},
{
"epoch": 1.3685550604710375,
"grad_norm": 7.841250549021613,
"learning_rate": 6.613260665483135e-06,
"loss": 1.3038004636764526,
"step": 2150
},
{
"epoch": 1.3691915977084659,
"grad_norm": 17.55731240460262,
"learning_rate": 6.6097544783114405e-06,
"loss": 1.7838655710220337,
"step": 2151
},
{
"epoch": 1.3698281349458943,
"grad_norm": 8.228606331506027,
"learning_rate": 6.606247407810067e-06,
"loss": 1.8675861358642578,
"step": 2152
},
{
"epoch": 1.3704646721833227,
"grad_norm": 7.467059484823506,
"learning_rate": 6.60273945590347e-06,
"loss": 1.3836982250213623,
"step": 2153
},
{
"epoch": 1.371101209420751,
"grad_norm": 12.485888251305484,
"learning_rate": 6.599230624516585e-06,
"loss": 1.5675023794174194,
"step": 2154
},
{
"epoch": 1.3717377466581795,
"grad_norm": 15.567851452398823,
"learning_rate": 6.5957209155748355e-06,
"loss": 1.7862575054168701,
"step": 2155
},
{
"epoch": 1.372374283895608,
"grad_norm": 13.981443969638242,
"learning_rate": 6.592210331004123e-06,
"loss": 1.1278631687164307,
"step": 2156
},
{
"epoch": 1.3730108211330363,
"grad_norm": 13.15212372653646,
"learning_rate": 6.588698872730831e-06,
"loss": 1.6467361450195312,
"step": 2157
},
{
"epoch": 1.3736473583704647,
"grad_norm": 15.616145367996944,
"learning_rate": 6.5851865426818215e-06,
"loss": 1.6315853595733643,
"step": 2158
},
{
"epoch": 1.374283895607893,
"grad_norm": 12.071659180855534,
"learning_rate": 6.581673342784435e-06,
"loss": 1.690008282661438,
"step": 2159
},
{
"epoch": 1.3749204328453215,
"grad_norm": 10.211327025135933,
"learning_rate": 6.578159274966489e-06,
"loss": 0.9187987446784973,
"step": 2160
},
{
"epoch": 1.37555697008275,
"grad_norm": 13.438711563797808,
"learning_rate": 6.57464434115628e-06,
"loss": 1.6902811527252197,
"step": 2161
},
{
"epoch": 1.3761935073201783,
"grad_norm": 12.453079163260508,
"learning_rate": 6.571128543282576e-06,
"loss": 1.4750566482543945,
"step": 2162
},
{
"epoch": 1.3768300445576065,
"grad_norm": 17.959531061122476,
"learning_rate": 6.5676118832746195e-06,
"loss": 1.9648330211639404,
"step": 2163
},
{
"epoch": 1.377466581795035,
"grad_norm": 12.300719608998236,
"learning_rate": 6.564094363062128e-06,
"loss": 1.2455105781555176,
"step": 2164
},
{
"epoch": 1.3781031190324633,
"grad_norm": 11.219402471677457,
"learning_rate": 6.5605759845752925e-06,
"loss": 1.2983028888702393,
"step": 2165
},
{
"epoch": 1.3787396562698917,
"grad_norm": 14.328811997520638,
"learning_rate": 6.55705674974477e-06,
"loss": 1.9251346588134766,
"step": 2166
},
{
"epoch": 1.37937619350732,
"grad_norm": 14.64826858357797,
"learning_rate": 6.553536660501691e-06,
"loss": 1.6900074481964111,
"step": 2167
},
{
"epoch": 1.3800127307447485,
"grad_norm": 8.528301843090656,
"learning_rate": 6.5500157187776555e-06,
"loss": 2.1877009868621826,
"step": 2168
},
{
"epoch": 1.380649267982177,
"grad_norm": 7.8651908301529225,
"learning_rate": 6.546493926504727e-06,
"loss": 1.7229034900665283,
"step": 2169
},
{
"epoch": 1.3812858052196053,
"grad_norm": 15.94456459374675,
"learning_rate": 6.54297128561544e-06,
"loss": 1.7511520385742188,
"step": 2170
},
{
"epoch": 1.3819223424570337,
"grad_norm": 14.878961962405457,
"learning_rate": 6.539447798042794e-06,
"loss": 2.0114879608154297,
"step": 2171
},
{
"epoch": 1.3825588796944621,
"grad_norm": 10.800956160351202,
"learning_rate": 6.535923465720249e-06,
"loss": 1.239030361175537,
"step": 2172
},
{
"epoch": 1.3831954169318905,
"grad_norm": 10.261961533103598,
"learning_rate": 6.532398290581736e-06,
"loss": 1.4675251245498657,
"step": 2173
},
{
"epoch": 1.383831954169319,
"grad_norm": 9.636887736053735,
"learning_rate": 6.528872274561641e-06,
"loss": 1.2436105012893677,
"step": 2174
},
{
"epoch": 1.3844684914067473,
"grad_norm": 11.54901510869167,
"learning_rate": 6.525345419594818e-06,
"loss": 1.1725226640701294,
"step": 2175
},
{
"epoch": 1.3851050286441757,
"grad_norm": 10.3832145763699,
"learning_rate": 6.521817727616574e-06,
"loss": 1.9142147302627563,
"step": 2176
},
{
"epoch": 1.3857415658816041,
"grad_norm": 10.929996772636745,
"learning_rate": 6.518289200562682e-06,
"loss": 1.3388845920562744,
"step": 2177
},
{
"epoch": 1.3863781031190325,
"grad_norm": 12.734890563619743,
"learning_rate": 6.51475984036937e-06,
"loss": 2.039224624633789,
"step": 2178
},
{
"epoch": 1.387014640356461,
"grad_norm": 14.31460829114896,
"learning_rate": 6.511229648973323e-06,
"loss": 1.4467695951461792,
"step": 2179
},
{
"epoch": 1.3876511775938893,
"grad_norm": 10.081285391928505,
"learning_rate": 6.507698628311686e-06,
"loss": 1.5399799346923828,
"step": 2180
},
{
"epoch": 1.3882877148313177,
"grad_norm": 18.571662120205282,
"learning_rate": 6.5041667803220535e-06,
"loss": 1.2376508712768555,
"step": 2181
},
{
"epoch": 1.3889242520687461,
"grad_norm": 14.5132149255744,
"learning_rate": 6.500634106942474e-06,
"loss": 1.773813247680664,
"step": 2182
},
{
"epoch": 1.3895607893061745,
"grad_norm": 8.728097996032005,
"learning_rate": 6.497100610111456e-06,
"loss": 1.359797477722168,
"step": 2183
},
{
"epoch": 1.3901973265436027,
"grad_norm": 14.700494437182511,
"learning_rate": 6.4935662917679546e-06,
"loss": 0.739691436290741,
"step": 2184
},
{
"epoch": 1.3908338637810311,
"grad_norm": 16.262194491870044,
"learning_rate": 6.490031153851373e-06,
"loss": 1.7340049743652344,
"step": 2185
},
{
"epoch": 1.3914704010184595,
"grad_norm": 10.227465908930895,
"learning_rate": 6.48649519830157e-06,
"loss": 1.7808657884597778,
"step": 2186
},
{
"epoch": 1.392106938255888,
"grad_norm": 14.236925681514215,
"learning_rate": 6.482958427058847e-06,
"loss": 1.6473512649536133,
"step": 2187
},
{
"epoch": 1.3927434754933163,
"grad_norm": 18.488163841989763,
"learning_rate": 6.479420842063963e-06,
"loss": 1.766308307647705,
"step": 2188
},
{
"epoch": 1.3933800127307447,
"grad_norm": 7.312654768912787,
"learning_rate": 6.47588244525811e-06,
"loss": 1.4837501049041748,
"step": 2189
},
{
"epoch": 1.3940165499681731,
"grad_norm": 11.066710488368418,
"learning_rate": 6.4723432385829384e-06,
"loss": 2.069955825805664,
"step": 2190
},
{
"epoch": 1.3946530872056015,
"grad_norm": 12.34520529771813,
"learning_rate": 6.468803223980534e-06,
"loss": 1.2535465955734253,
"step": 2191
},
{
"epoch": 1.39528962444303,
"grad_norm": 12.652184847236011,
"learning_rate": 6.465262403393429e-06,
"loss": 1.2388365268707275,
"step": 2192
},
{
"epoch": 1.3959261616804584,
"grad_norm": 10.567766751278663,
"learning_rate": 6.461720778764597e-06,
"loss": 1.4995468854904175,
"step": 2193
},
{
"epoch": 1.3965626989178868,
"grad_norm": 7.834618981007849,
"learning_rate": 6.458178352037459e-06,
"loss": 1.8272128105163574,
"step": 2194
},
{
"epoch": 1.3971992361553152,
"grad_norm": 12.325471764275232,
"learning_rate": 6.454635125155863e-06,
"loss": 1.2270289659500122,
"step": 2195
},
{
"epoch": 1.3978357733927433,
"grad_norm": 14.79567697031164,
"learning_rate": 6.45109110006411e-06,
"loss": 1.8408695459365845,
"step": 2196
},
{
"epoch": 1.3984723106301717,
"grad_norm": 7.701637127659964,
"learning_rate": 6.447546278706929e-06,
"loss": 1.272929072380066,
"step": 2197
},
{
"epoch": 1.3991088478676001,
"grad_norm": 7.0890368356852855,
"learning_rate": 6.444000663029494e-06,
"loss": 1.5190505981445312,
"step": 2198
},
{
"epoch": 1.3997453851050286,
"grad_norm": 23.48191736552914,
"learning_rate": 6.440454254977407e-06,
"loss": 1.439098834991455,
"step": 2199
},
{
"epoch": 1.400381922342457,
"grad_norm": 9.401723931268236,
"learning_rate": 6.43690705649671e-06,
"loss": 1.9440866708755493,
"step": 2200
},
{
"epoch": 1.4010184595798854,
"grad_norm": 14.025034385209763,
"learning_rate": 6.433359069533878e-06,
"loss": 2.302647352218628,
"step": 2201
},
{
"epoch": 1.4016549968173138,
"grad_norm": 14.723959105524486,
"learning_rate": 6.429810296035817e-06,
"loss": 1.3513097763061523,
"step": 2202
},
{
"epoch": 1.4022915340547422,
"grad_norm": 14.133590601314184,
"learning_rate": 6.426260737949866e-06,
"loss": 1.1011390686035156,
"step": 2203
},
{
"epoch": 1.4029280712921706,
"grad_norm": 17.586480443216193,
"learning_rate": 6.422710397223794e-06,
"loss": 1.9014208316802979,
"step": 2204
},
{
"epoch": 1.403564608529599,
"grad_norm": 12.757630956889631,
"learning_rate": 6.419159275805799e-06,
"loss": 2.630929708480835,
"step": 2205
},
{
"epoch": 1.4042011457670274,
"grad_norm": 13.732215912250094,
"learning_rate": 6.4156073756445084e-06,
"loss": 1.3734755516052246,
"step": 2206
},
{
"epoch": 1.4048376830044558,
"grad_norm": 32.171196888479926,
"learning_rate": 6.4120546986889755e-06,
"loss": 2.9236714839935303,
"step": 2207
},
{
"epoch": 1.4054742202418842,
"grad_norm": 14.712605726738637,
"learning_rate": 6.408501246888682e-06,
"loss": 1.4317361116409302,
"step": 2208
},
{
"epoch": 1.4061107574793126,
"grad_norm": 13.863448900221524,
"learning_rate": 6.404947022193535e-06,
"loss": 2.090886354446411,
"step": 2209
},
{
"epoch": 1.406747294716741,
"grad_norm": 7.631648416620053,
"learning_rate": 6.4013920265538595e-06,
"loss": 1.593640923500061,
"step": 2210
},
{
"epoch": 1.4073838319541694,
"grad_norm": 7.401858916293992,
"learning_rate": 6.397836261920415e-06,
"loss": 1.1235381364822388,
"step": 2211
},
{
"epoch": 1.4080203691915978,
"grad_norm": 9.132508483179071,
"learning_rate": 6.3942797302443706e-06,
"loss": 1.4245104789733887,
"step": 2212
},
{
"epoch": 1.4086569064290262,
"grad_norm": 10.062049180418628,
"learning_rate": 6.390722433477325e-06,
"loss": 1.3633389472961426,
"step": 2213
},
{
"epoch": 1.4092934436664546,
"grad_norm": 11.052650137533167,
"learning_rate": 6.387164373571293e-06,
"loss": 1.1453993320465088,
"step": 2214
},
{
"epoch": 1.409929980903883,
"grad_norm": 8.670156035931027,
"learning_rate": 6.383605552478709e-06,
"loss": 1.2008607387542725,
"step": 2215
},
{
"epoch": 1.4105665181413114,
"grad_norm": 10.963651499575823,
"learning_rate": 6.3800459721524255e-06,
"loss": 1.474987506866455,
"step": 2216
},
{
"epoch": 1.4112030553787396,
"grad_norm": 16.231420560110255,
"learning_rate": 6.37648563454571e-06,
"loss": 2.3234455585479736,
"step": 2217
},
{
"epoch": 1.411839592616168,
"grad_norm": 13.941402140619234,
"learning_rate": 6.372924541612248e-06,
"loss": 1.3550442457199097,
"step": 2218
},
{
"epoch": 1.4124761298535964,
"grad_norm": 7.991598436727154,
"learning_rate": 6.369362695306138e-06,
"loss": 1.4468458890914917,
"step": 2219
},
{
"epoch": 1.4131126670910248,
"grad_norm": 8.336015467632643,
"learning_rate": 6.36580009758189e-06,
"loss": 1.655072808265686,
"step": 2220
},
{
"epoch": 1.4137492043284532,
"grad_norm": 16.633901474263247,
"learning_rate": 6.362236750394431e-06,
"loss": 1.7641286849975586,
"step": 2221
},
{
"epoch": 1.4143857415658816,
"grad_norm": 19.725347224513143,
"learning_rate": 6.3586726556990955e-06,
"loss": 2.0049242973327637,
"step": 2222
},
{
"epoch": 1.41502227880331,
"grad_norm": 10.7435012585398,
"learning_rate": 6.355107815451629e-06,
"loss": 1.0433449745178223,
"step": 2223
},
{
"epoch": 1.4156588160407384,
"grad_norm": 19.339243493861588,
"learning_rate": 6.351542231608188e-06,
"loss": 1.3136436939239502,
"step": 2224
},
{
"epoch": 1.4162953532781668,
"grad_norm": 10.497933067797925,
"learning_rate": 6.3479759061253334e-06,
"loss": 0.8929241895675659,
"step": 2225
},
{
"epoch": 1.4169318905155952,
"grad_norm": 9.559182782439565,
"learning_rate": 6.344408840960037e-06,
"loss": 1.1983423233032227,
"step": 2226
},
{
"epoch": 1.4175684277530236,
"grad_norm": 9.529239589057827,
"learning_rate": 6.340841038069673e-06,
"loss": 1.0780221223831177,
"step": 2227
},
{
"epoch": 1.418204964990452,
"grad_norm": 9.608827881385222,
"learning_rate": 6.337272499412023e-06,
"loss": 1.1054682731628418,
"step": 2228
},
{
"epoch": 1.4188415022278804,
"grad_norm": 15.719206287796904,
"learning_rate": 6.3337032269452715e-06,
"loss": 2.25411319732666,
"step": 2229
},
{
"epoch": 1.4194780394653086,
"grad_norm": 17.794842478363268,
"learning_rate": 6.330133222628004e-06,
"loss": 1.542565941810608,
"step": 2230
},
{
"epoch": 1.420114576702737,
"grad_norm": 9.751934079958934,
"learning_rate": 6.326562488419213e-06,
"loss": 1.5496629476547241,
"step": 2231
},
{
"epoch": 1.4207511139401654,
"grad_norm": 22.19222644496835,
"learning_rate": 6.322991026278285e-06,
"loss": 1.441185474395752,
"step": 2232
},
{
"epoch": 1.4213876511775938,
"grad_norm": 10.510101925919152,
"learning_rate": 6.319418838165005e-06,
"loss": 1.432142734527588,
"step": 2233
},
{
"epoch": 1.4220241884150222,
"grad_norm": 13.36563236105199,
"learning_rate": 6.315845926039568e-06,
"loss": 1.7360641956329346,
"step": 2234
},
{
"epoch": 1.4226607256524506,
"grad_norm": 15.16706158597897,
"learning_rate": 6.312272291862553e-06,
"loss": 1.6843822002410889,
"step": 2235
},
{
"epoch": 1.423297262889879,
"grad_norm": 10.978306537880643,
"learning_rate": 6.308697937594942e-06,
"loss": 1.5882022380828857,
"step": 2236
},
{
"epoch": 1.4239338001273074,
"grad_norm": 11.26713450947295,
"learning_rate": 6.30512286519811e-06,
"loss": 1.4534211158752441,
"step": 2237
},
{
"epoch": 1.4245703373647358,
"grad_norm": 9.62433982888293,
"learning_rate": 6.301547076633825e-06,
"loss": 1.552571177482605,
"step": 2238
},
{
"epoch": 1.4252068746021642,
"grad_norm": 23.46147658043418,
"learning_rate": 6.2979705738642535e-06,
"loss": 2.4739670753479004,
"step": 2239
},
{
"epoch": 1.4258434118395926,
"grad_norm": 10.946123772411452,
"learning_rate": 6.294393358851947e-06,
"loss": 1.0432250499725342,
"step": 2240
},
{
"epoch": 1.426479949077021,
"grad_norm": 15.258792066809578,
"learning_rate": 6.290815433559853e-06,
"loss": 1.9332001209259033,
"step": 2241
},
{
"epoch": 1.4271164863144494,
"grad_norm": 14.441468370947723,
"learning_rate": 6.287236799951306e-06,
"loss": 2.2849225997924805,
"step": 2242
},
{
"epoch": 1.4277530235518778,
"grad_norm": 8.478215874496488,
"learning_rate": 6.283657459990028e-06,
"loss": 2.5756845474243164,
"step": 2243
},
{
"epoch": 1.4283895607893062,
"grad_norm": 10.33023886552243,
"learning_rate": 6.2800774156401334e-06,
"loss": 1.2622883319854736,
"step": 2244
},
{
"epoch": 1.4290260980267346,
"grad_norm": 10.23879794338887,
"learning_rate": 6.276496668866118e-06,
"loss": 1.4622156620025635,
"step": 2245
},
{
"epoch": 1.429662635264163,
"grad_norm": 11.428320725826136,
"learning_rate": 6.272915221632867e-06,
"loss": 2.4076485633850098,
"step": 2246
},
{
"epoch": 1.4302991725015914,
"grad_norm": 9.18367055531244,
"learning_rate": 6.269333075905647e-06,
"loss": 2.7986347675323486,
"step": 2247
},
{
"epoch": 1.4309357097390198,
"grad_norm": 10.718811501953986,
"learning_rate": 6.265750233650108e-06,
"loss": 1.0415111780166626,
"step": 2248
},
{
"epoch": 1.4315722469764482,
"grad_norm": 13.89304174407212,
"learning_rate": 6.262166696832288e-06,
"loss": 1.673966407775879,
"step": 2249
},
{
"epoch": 1.4322087842138764,
"grad_norm": 13.614234895048883,
"learning_rate": 6.258582467418596e-06,
"loss": 1.7757503986358643,
"step": 2250
},
{
"epoch": 1.4328453214513048,
"grad_norm": 8.229189188841083,
"learning_rate": 6.254997547375828e-06,
"loss": 1.5330592393875122,
"step": 2251
},
{
"epoch": 1.4334818586887332,
"grad_norm": 13.279023169672653,
"learning_rate": 6.251411938671159e-06,
"loss": 1.8557310104370117,
"step": 2252
},
{
"epoch": 1.4341183959261616,
"grad_norm": 12.420664420892885,
"learning_rate": 6.2478256432721395e-06,
"loss": 1.8852431774139404,
"step": 2253
},
{
"epoch": 1.43475493316359,
"grad_norm": 17.22679859354758,
"learning_rate": 6.244238663146698e-06,
"loss": 1.0919334888458252,
"step": 2254
},
{
"epoch": 1.4353914704010184,
"grad_norm": 11.259133977733073,
"learning_rate": 6.240651000263136e-06,
"loss": 1.5498840808868408,
"step": 2255
},
{
"epoch": 1.4360280076384468,
"grad_norm": 18.24539838096365,
"learning_rate": 6.237062656590135e-06,
"loss": 1.4166648387908936,
"step": 2256
},
{
"epoch": 1.4366645448758752,
"grad_norm": 8.713505921407725,
"learning_rate": 6.233473634096746e-06,
"loss": 1.3557902574539185,
"step": 2257
},
{
"epoch": 1.4373010821133037,
"grad_norm": 8.716074123775524,
"learning_rate": 6.229883934752393e-06,
"loss": 1.2707334756851196,
"step": 2258
},
{
"epoch": 1.437937619350732,
"grad_norm": 14.63987692349558,
"learning_rate": 6.226293560526875e-06,
"loss": 1.2643358707427979,
"step": 2259
},
{
"epoch": 1.4385741565881605,
"grad_norm": 10.401459052982707,
"learning_rate": 6.222702513390354e-06,
"loss": 2.7216434478759766,
"step": 2260
},
{
"epoch": 1.4392106938255889,
"grad_norm": 8.333916830423911,
"learning_rate": 6.219110795313368e-06,
"loss": 1.3003531694412231,
"step": 2261
},
{
"epoch": 1.4398472310630173,
"grad_norm": 11.773605144365478,
"learning_rate": 6.21551840826682e-06,
"loss": 1.5093822479248047,
"step": 2262
},
{
"epoch": 1.4404837683004454,
"grad_norm": 17.371610763815383,
"learning_rate": 6.211925354221981e-06,
"loss": 1.3118256330490112,
"step": 2263
},
{
"epoch": 1.4411203055378738,
"grad_norm": 8.882387238630173,
"learning_rate": 6.208331635150491e-06,
"loss": 1.1394437551498413,
"step": 2264
},
{
"epoch": 1.4417568427753022,
"grad_norm": 8.044599760767078,
"learning_rate": 6.204737253024347e-06,
"loss": 1.6412131786346436,
"step": 2265
},
{
"epoch": 1.4423933800127307,
"grad_norm": 10.922916029421351,
"learning_rate": 6.201142209815915e-06,
"loss": 0.9688152074813843,
"step": 2266
},
{
"epoch": 1.443029917250159,
"grad_norm": 12.828609189083918,
"learning_rate": 6.197546507497928e-06,
"loss": 1.4837284088134766,
"step": 2267
},
{
"epoch": 1.4436664544875875,
"grad_norm": 11.860294561287034,
"learning_rate": 6.193950148043473e-06,
"loss": 1.5708303451538086,
"step": 2268
},
{
"epoch": 1.4443029917250159,
"grad_norm": 7.952589988233202,
"learning_rate": 6.1903531334259985e-06,
"loss": 1.4329659938812256,
"step": 2269
},
{
"epoch": 1.4449395289624443,
"grad_norm": 10.539927680438874,
"learning_rate": 6.186755465619319e-06,
"loss": 2.3485629558563232,
"step": 2270
},
{
"epoch": 1.4455760661998727,
"grad_norm": 9.23104905413811,
"learning_rate": 6.183157146597599e-06,
"loss": 1.5098153352737427,
"step": 2271
},
{
"epoch": 1.446212603437301,
"grad_norm": 15.346346501941884,
"learning_rate": 6.179558178335367e-06,
"loss": 1.768712043762207,
"step": 2272
},
{
"epoch": 1.4468491406747295,
"grad_norm": 12.7331926715348,
"learning_rate": 6.1759585628075045e-06,
"loss": 2.09372615814209,
"step": 2273
},
{
"epoch": 1.4474856779121579,
"grad_norm": 10.14585137644003,
"learning_rate": 6.1723583019892504e-06,
"loss": 1.878859043121338,
"step": 2274
},
{
"epoch": 1.4481222151495863,
"grad_norm": 6.674020724819407,
"learning_rate": 6.168757397856194e-06,
"loss": 1.0077816247940063,
"step": 2275
},
{
"epoch": 1.4487587523870147,
"grad_norm": 8.66182461368363,
"learning_rate": 6.1651558523842804e-06,
"loss": 1.1360124349594116,
"step": 2276
},
{
"epoch": 1.449395289624443,
"grad_norm": 10.22036379463639,
"learning_rate": 6.161553667549807e-06,
"loss": 1.7001159191131592,
"step": 2277
},
{
"epoch": 1.4500318268618715,
"grad_norm": 11.501379211176305,
"learning_rate": 6.157950845329419e-06,
"loss": 1.5191199779510498,
"step": 2278
},
{
"epoch": 1.4506683640993,
"grad_norm": 13.489428655557099,
"learning_rate": 6.154347387700115e-06,
"loss": 0.9401975870132446,
"step": 2279
},
{
"epoch": 1.4513049013367283,
"grad_norm": 9.189412362825243,
"learning_rate": 6.150743296639241e-06,
"loss": 1.4657886028289795,
"step": 2280
},
{
"epoch": 1.4519414385741567,
"grad_norm": 7.063744317605319,
"learning_rate": 6.14713857412449e-06,
"loss": 1.6262617111206055,
"step": 2281
},
{
"epoch": 1.452577975811585,
"grad_norm": 7.319488479677199,
"learning_rate": 6.143533222133901e-06,
"loss": 1.9885435104370117,
"step": 2282
},
{
"epoch": 1.4532145130490135,
"grad_norm": 9.816619736767258,
"learning_rate": 6.139927242645859e-06,
"loss": 2.211883306503296,
"step": 2283
},
{
"epoch": 1.4538510502864417,
"grad_norm": 19.194333040452655,
"learning_rate": 6.136320637639094e-06,
"loss": 1.593003749847412,
"step": 2284
},
{
"epoch": 1.45448758752387,
"grad_norm": 9.861021206968063,
"learning_rate": 6.13271340909268e-06,
"loss": 1.4990943670272827,
"step": 2285
},
{
"epoch": 1.4551241247612985,
"grad_norm": 8.89575659198883,
"learning_rate": 6.12910555898603e-06,
"loss": 1.908895492553711,
"step": 2286
},
{
"epoch": 1.455760661998727,
"grad_norm": 11.68517641170503,
"learning_rate": 6.125497089298902e-06,
"loss": 1.4656141996383667,
"step": 2287
},
{
"epoch": 1.4563971992361553,
"grad_norm": 12.049955844153972,
"learning_rate": 6.121888002011389e-06,
"loss": 1.4878228902816772,
"step": 2288
},
{
"epoch": 1.4570337364735837,
"grad_norm": 12.72492012451628,
"learning_rate": 6.118278299103929e-06,
"loss": 1.8361848592758179,
"step": 2289
},
{
"epoch": 1.457670273711012,
"grad_norm": 9.191650634484828,
"learning_rate": 6.1146679825572945e-06,
"loss": 2.2196669578552246,
"step": 2290
},
{
"epoch": 1.4583068109484405,
"grad_norm": 7.759297972058429,
"learning_rate": 6.11105705435259e-06,
"loss": 1.926821231842041,
"step": 2291
},
{
"epoch": 1.458943348185869,
"grad_norm": 19.403444235316353,
"learning_rate": 6.107445516471268e-06,
"loss": 0.9025589227676392,
"step": 2292
},
{
"epoch": 1.4595798854232973,
"grad_norm": 6.93287755275663,
"learning_rate": 6.1038333708951025e-06,
"loss": 0.9668861627578735,
"step": 2293
},
{
"epoch": 1.4602164226607257,
"grad_norm": 14.934226720346048,
"learning_rate": 6.100220619606209e-06,
"loss": 1.6219764947891235,
"step": 2294
},
{
"epoch": 1.4608529598981541,
"grad_norm": 10.80098156770589,
"learning_rate": 6.096607264587032e-06,
"loss": 1.6168935298919678,
"step": 2295
},
{
"epoch": 1.4614894971355823,
"grad_norm": 11.977524581829597,
"learning_rate": 6.092993307820349e-06,
"loss": 1.3764303922653198,
"step": 2296
},
{
"epoch": 1.4621260343730107,
"grad_norm": 7.609612791648267,
"learning_rate": 6.089378751289268e-06,
"loss": 1.2406864166259766,
"step": 2297
},
{
"epoch": 1.462762571610439,
"grad_norm": 14.744503007862395,
"learning_rate": 6.085763596977222e-06,
"loss": 1.6459439992904663,
"step": 2298
},
{
"epoch": 1.4633991088478675,
"grad_norm": 11.516187438025026,
"learning_rate": 6.0821478468679766e-06,
"loss": 1.4948744773864746,
"step": 2299
},
{
"epoch": 1.464035646085296,
"grad_norm": 15.331463785959949,
"learning_rate": 6.078531502945624e-06,
"loss": 2.1930792331695557,
"step": 2300
},
{
"epoch": 1.4646721833227243,
"grad_norm": 9.429387511806071,
"learning_rate": 6.07491456719458e-06,
"loss": 1.778498888015747,
"step": 2301
},
{
"epoch": 1.4653087205601527,
"grad_norm": 16.032039111111153,
"learning_rate": 6.071297041599585e-06,
"loss": 2.0730414390563965,
"step": 2302
},
{
"epoch": 1.4659452577975811,
"grad_norm": 11.614462376707863,
"learning_rate": 6.067678928145707e-06,
"loss": 1.0565383434295654,
"step": 2303
},
{
"epoch": 1.4665817950350095,
"grad_norm": 14.730772994555107,
"learning_rate": 6.0640602288183315e-06,
"loss": 1.4123332500457764,
"step": 2304
},
{
"epoch": 1.467218332272438,
"grad_norm": 8.805644728306925,
"learning_rate": 6.06044094560317e-06,
"loss": 1.7742290496826172,
"step": 2305
},
{
"epoch": 1.4678548695098663,
"grad_norm": 10.025269858987773,
"learning_rate": 6.056821080486248e-06,
"loss": 1.5650653839111328,
"step": 2306
},
{
"epoch": 1.4684914067472947,
"grad_norm": 12.177960282142802,
"learning_rate": 6.0532006354539205e-06,
"loss": 1.2841260433197021,
"step": 2307
},
{
"epoch": 1.4691279439847231,
"grad_norm": 11.225977777200889,
"learning_rate": 6.049579612492851e-06,
"loss": 1.70684015750885,
"step": 2308
},
{
"epoch": 1.4697644812221515,
"grad_norm": 15.679589152504976,
"learning_rate": 6.045958013590024e-06,
"loss": 1.214942455291748,
"step": 2309
},
{
"epoch": 1.47040101845958,
"grad_norm": 11.248471711515633,
"learning_rate": 6.042335840732741e-06,
"loss": 1.8510022163391113,
"step": 2310
},
{
"epoch": 1.4710375556970083,
"grad_norm": 8.008516568916232,
"learning_rate": 6.038713095908617e-06,
"loss": 1.433444857597351,
"step": 2311
},
{
"epoch": 1.4716740929344367,
"grad_norm": 13.613960224083982,
"learning_rate": 6.035089781105581e-06,
"loss": 1.693634271621704,
"step": 2312
},
{
"epoch": 1.4723106301718651,
"grad_norm": 7.621590152687834,
"learning_rate": 6.031465898311877e-06,
"loss": 1.5318776369094849,
"step": 2313
},
{
"epoch": 1.4729471674092935,
"grad_norm": 18.121718920303067,
"learning_rate": 6.027841449516054e-06,
"loss": 1.6473984718322754,
"step": 2314
},
{
"epoch": 1.473583704646722,
"grad_norm": 8.14648709448568,
"learning_rate": 6.024216436706983e-06,
"loss": 1.8800264596939087,
"step": 2315
},
{
"epoch": 1.4742202418841504,
"grad_norm": 8.403442395625294,
"learning_rate": 6.020590861873832e-06,
"loss": 1.84152352809906,
"step": 2316
},
{
"epoch": 1.4748567791215785,
"grad_norm": 28.308542384872574,
"learning_rate": 6.016964727006084e-06,
"loss": 2.369072437286377,
"step": 2317
},
{
"epoch": 1.475493316359007,
"grad_norm": 10.266489194171298,
"learning_rate": 6.013338034093532e-06,
"loss": 1.7621761560440063,
"step": 2318
},
{
"epoch": 1.4761298535964353,
"grad_norm": 10.115881623135751,
"learning_rate": 6.009710785126267e-06,
"loss": 1.779378056526184,
"step": 2319
},
{
"epoch": 1.4767663908338637,
"grad_norm": 17.80621757863576,
"learning_rate": 6.006082982094695e-06,
"loss": 1.8648977279663086,
"step": 2320
},
{
"epoch": 1.4774029280712921,
"grad_norm": 14.094940079453211,
"learning_rate": 6.002454626989519e-06,
"loss": 1.298775315284729,
"step": 2321
},
{
"epoch": 1.4780394653087205,
"grad_norm": 9.400726504781899,
"learning_rate": 5.9988257218017435e-06,
"loss": 1.314895749092102,
"step": 2322
},
{
"epoch": 1.478676002546149,
"grad_norm": 9.726742378726325,
"learning_rate": 5.995196268522681e-06,
"loss": 1.7099711894989014,
"step": 2323
},
{
"epoch": 1.4793125397835774,
"grad_norm": 8.707329797886894,
"learning_rate": 5.991566269143941e-06,
"loss": 1.7361701726913452,
"step": 2324
},
{
"epoch": 1.4799490770210058,
"grad_norm": 12.476393055946213,
"learning_rate": 5.987935725657436e-06,
"loss": 0.9174911975860596,
"step": 2325
},
{
"epoch": 1.4805856142584342,
"grad_norm": 9.336089625750024,
"learning_rate": 5.984304640055373e-06,
"loss": 1.816239833831787,
"step": 2326
},
{
"epoch": 1.4812221514958626,
"grad_norm": 18.8249747839833,
"learning_rate": 5.980673014330256e-06,
"loss": 1.1981911659240723,
"step": 2327
},
{
"epoch": 1.481858688733291,
"grad_norm": 9.651018850571326,
"learning_rate": 5.977040850474891e-06,
"loss": 2.1803438663482666,
"step": 2328
},
{
"epoch": 1.4824952259707194,
"grad_norm": 6.872288607868434,
"learning_rate": 5.973408150482374e-06,
"loss": 1.6503804922103882,
"step": 2329
},
{
"epoch": 1.4831317632081475,
"grad_norm": 14.17087632352517,
"learning_rate": 5.969774916346097e-06,
"loss": 1.5935099124908447,
"step": 2330
},
{
"epoch": 1.483768300445576,
"grad_norm": 7.923951253168774,
"learning_rate": 5.966141150059745e-06,
"loss": 1.5434465408325195,
"step": 2331
},
{
"epoch": 1.4844048376830044,
"grad_norm": 10.317158939383916,
"learning_rate": 5.962506853617297e-06,
"loss": 1.6459558010101318,
"step": 2332
},
{
"epoch": 1.4850413749204328,
"grad_norm": 39.307323069970685,
"learning_rate": 5.958872029013019e-06,
"loss": 1.6799983978271484,
"step": 2333
},
{
"epoch": 1.4856779121578612,
"grad_norm": 16.928986573700584,
"learning_rate": 5.955236678241469e-06,
"loss": 1.854414939880371,
"step": 2334
},
{
"epoch": 1.4863144493952896,
"grad_norm": 12.311553995465173,
"learning_rate": 5.9516008032974944e-06,
"loss": 1.1528398990631104,
"step": 2335
},
{
"epoch": 1.486950986632718,
"grad_norm": 16.079098630901317,
"learning_rate": 5.94796440617623e-06,
"loss": 1.7140576839447021,
"step": 2336
},
{
"epoch": 1.4875875238701464,
"grad_norm": 12.817788151844667,
"learning_rate": 5.944327488873094e-06,
"loss": 1.3568150997161865,
"step": 2337
},
{
"epoch": 1.4882240611075748,
"grad_norm": 10.369456896344717,
"learning_rate": 5.940690053383795e-06,
"loss": 1.5332515239715576,
"step": 2338
},
{
"epoch": 1.4888605983450032,
"grad_norm": 12.169721343242498,
"learning_rate": 5.937052101704324e-06,
"loss": 1.5990650653839111,
"step": 2339
},
{
"epoch": 1.4894971355824316,
"grad_norm": 14.30655326063879,
"learning_rate": 5.933413635830953e-06,
"loss": 1.647970199584961,
"step": 2340
},
{
"epoch": 1.49013367281986,
"grad_norm": 19.416266648259818,
"learning_rate": 5.929774657760239e-06,
"loss": 1.4303776025772095,
"step": 2341
},
{
"epoch": 1.4907702100572884,
"grad_norm": 8.739110013653029,
"learning_rate": 5.9261351694890166e-06,
"loss": 1.4337981939315796,
"step": 2342
},
{
"epoch": 1.4914067472947168,
"grad_norm": 13.121068350218405,
"learning_rate": 5.922495173014406e-06,
"loss": 2.3536746501922607,
"step": 2343
},
{
"epoch": 1.4920432845321452,
"grad_norm": 13.086547441778615,
"learning_rate": 5.9188546703338024e-06,
"loss": 1.1129584312438965,
"step": 2344
},
{
"epoch": 1.4926798217695736,
"grad_norm": 12.599650011923513,
"learning_rate": 5.915213663444878e-06,
"loss": 1.907105565071106,
"step": 2345
},
{
"epoch": 1.493316359007002,
"grad_norm": 16.103481442488864,
"learning_rate": 5.911572154345584e-06,
"loss": 1.2642226219177246,
"step": 2346
},
{
"epoch": 1.4939528962444304,
"grad_norm": 6.702106812818325,
"learning_rate": 5.907930145034145e-06,
"loss": 0.8979045152664185,
"step": 2347
},
{
"epoch": 1.4945894334818588,
"grad_norm": 9.570102582838961,
"learning_rate": 5.904287637509065e-06,
"loss": 1.5047214031219482,
"step": 2348
},
{
"epoch": 1.4952259707192872,
"grad_norm": 14.410444106232045,
"learning_rate": 5.900644633769115e-06,
"loss": 2.863888740539551,
"step": 2349
},
{
"epoch": 1.4958625079567156,
"grad_norm": 10.733488379158382,
"learning_rate": 5.89700113581334e-06,
"loss": 1.1438448429107666,
"step": 2350
},
{
"epoch": 1.4964990451941438,
"grad_norm": 10.27487028623432,
"learning_rate": 5.893357145641059e-06,
"loss": 1.913692831993103,
"step": 2351
},
{
"epoch": 1.4971355824315722,
"grad_norm": 17.762700060064628,
"learning_rate": 5.889712665251859e-06,
"loss": 1.2852919101715088,
"step": 2352
},
{
"epoch": 1.4977721196690006,
"grad_norm": 8.948382406158954,
"learning_rate": 5.886067696645597e-06,
"loss": 1.6822280883789062,
"step": 2353
},
{
"epoch": 1.498408656906429,
"grad_norm": 17.53624658522359,
"learning_rate": 5.8824222418223965e-06,
"loss": 1.7187851667404175,
"step": 2354
},
{
"epoch": 1.4990451941438574,
"grad_norm": 9.515580296782518,
"learning_rate": 5.878776302782648e-06,
"loss": 1.5445634126663208,
"step": 2355
},
{
"epoch": 1.4996817313812858,
"grad_norm": 14.900367983746788,
"learning_rate": 5.87512988152701e-06,
"loss": 1.8200538158416748,
"step": 2356
},
{
"epoch": 1.5003182686187142,
"grad_norm": 11.66663984483443,
"learning_rate": 5.871482980056401e-06,
"loss": 1.5881679058074951,
"step": 2357
},
{
"epoch": 1.5009548058561426,
"grad_norm": 13.639376618960366,
"learning_rate": 5.867835600372008e-06,
"loss": 1.586029291152954,
"step": 2358
},
{
"epoch": 1.501591343093571,
"grad_norm": 13.085645464864266,
"learning_rate": 5.86418774447528e-06,
"loss": 0.9381835460662842,
"step": 2359
},
{
"epoch": 1.5022278803309994,
"grad_norm": 8.388082049487934,
"learning_rate": 5.860539414367921e-06,
"loss": 1.4036571979522705,
"step": 2360
},
{
"epoch": 1.5028644175684276,
"grad_norm": 11.032147030076258,
"learning_rate": 5.856890612051902e-06,
"loss": 1.1054632663726807,
"step": 2361
},
{
"epoch": 1.503500954805856,
"grad_norm": 6.449258628897456,
"learning_rate": 5.853241339529452e-06,
"loss": 1.6686400175094604,
"step": 2362
},
{
"epoch": 1.5041374920432844,
"grad_norm": 14.877905193213572,
"learning_rate": 5.849591598803056e-06,
"loss": 1.7445240020751953,
"step": 2363
},
{
"epoch": 1.5047740292807128,
"grad_norm": 13.48403607496307,
"learning_rate": 5.845941391875458e-06,
"loss": 1.478830337524414,
"step": 2364
},
{
"epoch": 1.5054105665181412,
"grad_norm": 12.624608988158526,
"learning_rate": 5.842290720749655e-06,
"loss": 1.498946189880371,
"step": 2365
},
{
"epoch": 1.5060471037555696,
"grad_norm": 10.248213528519813,
"learning_rate": 5.8386395874289e-06,
"loss": 1.4745010137557983,
"step": 2366
},
{
"epoch": 1.506683640992998,
"grad_norm": 15.927668266457836,
"learning_rate": 5.834987993916703e-06,
"loss": 2.136512279510498,
"step": 2367
},
{
"epoch": 1.5073201782304264,
"grad_norm": 10.552288301041084,
"learning_rate": 5.831335942216821e-06,
"loss": 0.8879401683807373,
"step": 2368
},
{
"epoch": 1.5079567154678548,
"grad_norm": 13.51108816180199,
"learning_rate": 5.827683434333266e-06,
"loss": 1.3464289903640747,
"step": 2369
},
{
"epoch": 1.5085932527052832,
"grad_norm": 7.8398218967830635,
"learning_rate": 5.824030472270298e-06,
"loss": 1.5729990005493164,
"step": 2370
},
{
"epoch": 1.5092297899427116,
"grad_norm": 16.613616433122516,
"learning_rate": 5.820377058032429e-06,
"loss": 1.59991455078125,
"step": 2371
},
{
"epoch": 1.50986632718014,
"grad_norm": 19.457630942072974,
"learning_rate": 5.816723193624419e-06,
"loss": 1.623780369758606,
"step": 2372
},
{
"epoch": 1.5105028644175684,
"grad_norm": 11.827239090313682,
"learning_rate": 5.81306888105127e-06,
"loss": 1.1282806396484375,
"step": 2373
},
{
"epoch": 1.5111394016549968,
"grad_norm": 11.807090303270067,
"learning_rate": 5.809414122318235e-06,
"loss": 1.6361347436904907,
"step": 2374
},
{
"epoch": 1.5117759388924252,
"grad_norm": 14.222246913731992,
"learning_rate": 5.805758919430812e-06,
"loss": 1.2901973724365234,
"step": 2375
},
{
"epoch": 1.5124124761298536,
"grad_norm": 9.621662642884392,
"learning_rate": 5.802103274394739e-06,
"loss": 1.0700773000717163,
"step": 2376
},
{
"epoch": 1.513049013367282,
"grad_norm": 12.445854880335943,
"learning_rate": 5.798447189216002e-06,
"loss": 1.4615952968597412,
"step": 2377
},
{
"epoch": 1.5136855506047104,
"grad_norm": 10.563388400964374,
"learning_rate": 5.79479066590082e-06,
"loss": 1.8412933349609375,
"step": 2378
},
{
"epoch": 1.5143220878421388,
"grad_norm": 10.105264788768947,
"learning_rate": 5.7911337064556635e-06,
"loss": 1.7908297777175903,
"step": 2379
},
{
"epoch": 1.5149586250795672,
"grad_norm": 15.031091034073702,
"learning_rate": 5.787476312887233e-06,
"loss": 1.5867691040039062,
"step": 2380
},
{
"epoch": 1.5155951623169956,
"grad_norm": 12.5518698581082,
"learning_rate": 5.783818487202472e-06,
"loss": 1.8068511486053467,
"step": 2381
},
{
"epoch": 1.516231699554424,
"grad_norm": 13.640365163199606,
"learning_rate": 5.780160231408559e-06,
"loss": 1.2256360054016113,
"step": 2382
},
{
"epoch": 1.5168682367918525,
"grad_norm": 16.753718900128565,
"learning_rate": 5.776501547512909e-06,
"loss": 1.6830406188964844,
"step": 2383
},
{
"epoch": 1.5175047740292809,
"grad_norm": 7.767233254984847,
"learning_rate": 5.772842437523175e-06,
"loss": 1.4654542207717896,
"step": 2384
},
{
"epoch": 1.5181413112667093,
"grad_norm": 13.135493185756758,
"learning_rate": 5.769182903447239e-06,
"loss": 1.823169231414795,
"step": 2385
},
{
"epoch": 1.5187778485041374,
"grad_norm": 11.192415782515821,
"learning_rate": 5.765522947293217e-06,
"loss": 1.5445574522018433,
"step": 2386
},
{
"epoch": 1.5194143857415658,
"grad_norm": 10.165697469491613,
"learning_rate": 5.76186257106946e-06,
"loss": 1.0204110145568848,
"step": 2387
},
{
"epoch": 1.5200509229789942,
"grad_norm": 15.312932633201065,
"learning_rate": 5.758201776784545e-06,
"loss": 1.7300519943237305,
"step": 2388
},
{
"epoch": 1.5206874602164226,
"grad_norm": 12.383392551599943,
"learning_rate": 5.7545405664472805e-06,
"loss": 0.9942628145217896,
"step": 2389
},
{
"epoch": 1.521323997453851,
"grad_norm": 15.634403404390396,
"learning_rate": 5.750878942066704e-06,
"loss": 1.2621705532073975,
"step": 2390
},
{
"epoch": 1.5219605346912795,
"grad_norm": 8.708812744078262,
"learning_rate": 5.74721690565208e-06,
"loss": 1.4440770149230957,
"step": 2391
},
{
"epoch": 1.5225970719287079,
"grad_norm": 21.633404286701108,
"learning_rate": 5.743554459212896e-06,
"loss": 1.27446448802948,
"step": 2392
},
{
"epoch": 1.5232336091661363,
"grad_norm": 13.130933163467803,
"learning_rate": 5.739891604758868e-06,
"loss": 1.325007438659668,
"step": 2393
},
{
"epoch": 1.5238701464035647,
"grad_norm": 12.790325736291626,
"learning_rate": 5.736228344299937e-06,
"loss": 1.3999392986297607,
"step": 2394
},
{
"epoch": 1.5245066836409928,
"grad_norm": 13.80507100462981,
"learning_rate": 5.73256467984626e-06,
"loss": 1.116119623184204,
"step": 2395
},
{
"epoch": 1.5251432208784212,
"grad_norm": 8.33182348678265,
"learning_rate": 5.728900613408225e-06,
"loss": 1.4327359199523926,
"step": 2396
},
{
"epoch": 1.5257797581158496,
"grad_norm": 10.471323325547097,
"learning_rate": 5.725236146996435e-06,
"loss": 1.56307053565979,
"step": 2397
},
{
"epoch": 1.526416295353278,
"grad_norm": 8.88639225392792,
"learning_rate": 5.72157128262171e-06,
"loss": 0.9844847917556763,
"step": 2398
},
{
"epoch": 1.5270528325907065,
"grad_norm": 9.56586278919019,
"learning_rate": 5.717906022295095e-06,
"loss": 1.6100908517837524,
"step": 2399
},
{
"epoch": 1.5276893698281349,
"grad_norm": 21.839175985650733,
"learning_rate": 5.714240368027849e-06,
"loss": 1.9893325567245483,
"step": 2400
},
{
"epoch": 1.5283259070655633,
"grad_norm": 8.588983401953769,
"learning_rate": 5.7105743218314445e-06,
"loss": 1.646536946296692,
"step": 2401
},
{
"epoch": 1.5289624443029917,
"grad_norm": 15.34499421303345,
"learning_rate": 5.706907885717577e-06,
"loss": 1.8654241561889648,
"step": 2402
},
{
"epoch": 1.52959898154042,
"grad_norm": 16.78750142696331,
"learning_rate": 5.703241061698144e-06,
"loss": 1.8605923652648926,
"step": 2403
},
{
"epoch": 1.5302355187778485,
"grad_norm": 8.32178861249778,
"learning_rate": 5.699573851785267e-06,
"loss": 1.4465360641479492,
"step": 2404
},
{
"epoch": 1.5308720560152769,
"grad_norm": 19.960113686351008,
"learning_rate": 5.6959062579912734e-06,
"loss": 1.6353950500488281,
"step": 2405
},
{
"epoch": 1.5315085932527053,
"grad_norm": 11.170449072193628,
"learning_rate": 5.692238282328704e-06,
"loss": 1.8010241985321045,
"step": 2406
},
{
"epoch": 1.5321451304901337,
"grad_norm": 10.21315576880138,
"learning_rate": 5.688569926810308e-06,
"loss": 1.8595669269561768,
"step": 2407
},
{
"epoch": 1.532781667727562,
"grad_norm": 16.1700537055414,
"learning_rate": 5.684901193449039e-06,
"loss": 1.5712897777557373,
"step": 2408
},
{
"epoch": 1.5334182049649905,
"grad_norm": 7.650369644512302,
"learning_rate": 5.681232084258067e-06,
"loss": 1.8795337677001953,
"step": 2409
},
{
"epoch": 1.5340547422024189,
"grad_norm": 10.860659163538914,
"learning_rate": 5.677562601250759e-06,
"loss": 1.3441723585128784,
"step": 2410
},
{
"epoch": 1.5346912794398473,
"grad_norm": 11.017657705757012,
"learning_rate": 5.673892746440693e-06,
"loss": 1.0673878192901611,
"step": 2411
},
{
"epoch": 1.5353278166772757,
"grad_norm": 11.87282419575077,
"learning_rate": 5.670222521841649e-06,
"loss": 2.402776002883911,
"step": 2412
},
{
"epoch": 1.535964353914704,
"grad_norm": 11.069682527119124,
"learning_rate": 5.666551929467609e-06,
"loss": 1.244581937789917,
"step": 2413
},
{
"epoch": 1.5366008911521325,
"grad_norm": 18.981452453693823,
"learning_rate": 5.662880971332761e-06,
"loss": 1.5043175220489502,
"step": 2414
},
{
"epoch": 1.537237428389561,
"grad_norm": 11.344567062763744,
"learning_rate": 5.6592096494514894e-06,
"loss": 2.5523786544799805,
"step": 2415
},
{
"epoch": 1.5378739656269893,
"grad_norm": 7.97452314720193,
"learning_rate": 5.655537965838376e-06,
"loss": 1.6291911602020264,
"step": 2416
},
{
"epoch": 1.5385105028644177,
"grad_norm": 10.080095765845345,
"learning_rate": 5.651865922508209e-06,
"loss": 2.0800302028656006,
"step": 2417
},
{
"epoch": 1.539147040101846,
"grad_norm": 34.72810949100083,
"learning_rate": 5.6481935214759665e-06,
"loss": 1.6141481399536133,
"step": 2418
},
{
"epoch": 1.5397835773392745,
"grad_norm": 12.48720555430153,
"learning_rate": 5.644520764756827e-06,
"loss": 2.1375699043273926,
"step": 2419
},
{
"epoch": 1.5404201145767027,
"grad_norm": 15.210646215132524,
"learning_rate": 5.640847654366164e-06,
"loss": 1.831993818283081,
"step": 2420
},
{
"epoch": 1.541056651814131,
"grad_norm": 15.851301537320069,
"learning_rate": 5.637174192319541e-06,
"loss": 1.8037670850753784,
"step": 2421
},
{
"epoch": 1.5416931890515595,
"grad_norm": 13.314051769818072,
"learning_rate": 5.633500380632723e-06,
"loss": 1.2829947471618652,
"step": 2422
},
{
"epoch": 1.542329726288988,
"grad_norm": 15.188898639694466,
"learning_rate": 5.629826221321657e-06,
"loss": 1.741872787475586,
"step": 2423
},
{
"epoch": 1.5429662635264163,
"grad_norm": 8.78353436178482,
"learning_rate": 5.626151716402486e-06,
"loss": 1.5937385559082031,
"step": 2424
},
{
"epoch": 1.5436028007638447,
"grad_norm": 10.59836028443109,
"learning_rate": 5.622476867891543e-06,
"loss": 1.3646893501281738,
"step": 2425
},
{
"epoch": 1.544239338001273,
"grad_norm": 9.844120096253837,
"learning_rate": 5.618801677805347e-06,
"loss": 1.365708589553833,
"step": 2426
},
{
"epoch": 1.5448758752387015,
"grad_norm": 11.504525050991903,
"learning_rate": 5.615126148160607e-06,
"loss": 1.3138048648834229,
"step": 2427
},
{
"epoch": 1.5455124124761297,
"grad_norm": 9.410993220717272,
"learning_rate": 5.611450280974218e-06,
"loss": 1.3601410388946533,
"step": 2428
},
{
"epoch": 1.546148949713558,
"grad_norm": 12.05500495032501,
"learning_rate": 5.607774078263256e-06,
"loss": 1.2759274244308472,
"step": 2429
},
{
"epoch": 1.5467854869509865,
"grad_norm": 9.13620329173272,
"learning_rate": 5.604097542044988e-06,
"loss": 1.2775843143463135,
"step": 2430
},
{
"epoch": 1.547422024188415,
"grad_norm": 13.704179908789458,
"learning_rate": 5.600420674336858e-06,
"loss": 1.4167678356170654,
"step": 2431
},
{
"epoch": 1.5480585614258433,
"grad_norm": 17.068954969593896,
"learning_rate": 5.596743477156496e-06,
"loss": 1.973123550415039,
"step": 2432
},
{
"epoch": 1.5486950986632717,
"grad_norm": 7.823725363411393,
"learning_rate": 5.593065952521712e-06,
"loss": 1.330206036567688,
"step": 2433
},
{
"epoch": 1.5493316359007,
"grad_norm": 11.234869141267719,
"learning_rate": 5.589388102450492e-06,
"loss": 1.5377652645111084,
"step": 2434
},
{
"epoch": 1.5499681731381285,
"grad_norm": 22.322730252272205,
"learning_rate": 5.585709928961007e-06,
"loss": 1.4899756908416748,
"step": 2435
},
{
"epoch": 1.550604710375557,
"grad_norm": 11.263512633909038,
"learning_rate": 5.582031434071598e-06,
"loss": 1.6189697980880737,
"step": 2436
},
{
"epoch": 1.5512412476129853,
"grad_norm": 9.78607233920165,
"learning_rate": 5.578352619800791e-06,
"loss": 1.5311734676361084,
"step": 2437
},
{
"epoch": 1.5518777848504137,
"grad_norm": 10.142816293555294,
"learning_rate": 5.574673488167279e-06,
"loss": 1.1967965364456177,
"step": 2438
},
{
"epoch": 1.5525143220878421,
"grad_norm": 19.320225330331976,
"learning_rate": 5.570994041189933e-06,
"loss": 1.6374616622924805,
"step": 2439
},
{
"epoch": 1.5531508593252705,
"grad_norm": 13.637618207149412,
"learning_rate": 5.5673142808877974e-06,
"loss": 1.9044063091278076,
"step": 2440
},
{
"epoch": 1.553787396562699,
"grad_norm": 11.081736434115335,
"learning_rate": 5.5636342092800895e-06,
"loss": 1.7417365312576294,
"step": 2441
},
{
"epoch": 1.5544239338001273,
"grad_norm": 10.429877995477352,
"learning_rate": 5.5599538283861944e-06,
"loss": 1.6746402978897095,
"step": 2442
},
{
"epoch": 1.5550604710375557,
"grad_norm": 24.387037640475516,
"learning_rate": 5.5562731402256666e-06,
"loss": 0.9348142147064209,
"step": 2443
},
{
"epoch": 1.5556970082749841,
"grad_norm": 15.02622300900033,
"learning_rate": 5.552592146818232e-06,
"loss": 1.704547643661499,
"step": 2444
},
{
"epoch": 1.5563335455124125,
"grad_norm": 11.53393011045004,
"learning_rate": 5.5489108501837855e-06,
"loss": 1.6509344577789307,
"step": 2445
},
{
"epoch": 1.556970082749841,
"grad_norm": 11.946294271093686,
"learning_rate": 5.545229252342381e-06,
"loss": 1.8902714252471924,
"step": 2446
},
{
"epoch": 1.5576066199872693,
"grad_norm": 8.345232669757912,
"learning_rate": 5.541547355314248e-06,
"loss": 1.7008066177368164,
"step": 2447
},
{
"epoch": 1.5582431572246978,
"grad_norm": 14.631995751980634,
"learning_rate": 5.53786516111977e-06,
"loss": 1.8355236053466797,
"step": 2448
},
{
"epoch": 1.5588796944621262,
"grad_norm": 10.202501297184629,
"learning_rate": 5.5341826717795e-06,
"loss": 1.3646094799041748,
"step": 2449
},
{
"epoch": 1.5595162316995546,
"grad_norm": 11.332643315382237,
"learning_rate": 5.530499889314152e-06,
"loss": 1.356947898864746,
"step": 2450
},
{
"epoch": 1.560152768936983,
"grad_norm": 18.041208568131353,
"learning_rate": 5.526816815744601e-06,
"loss": 1.8688534498214722,
"step": 2451
},
{
"epoch": 1.5607893061744114,
"grad_norm": 20.385089468087184,
"learning_rate": 5.523133453091877e-06,
"loss": 1.5583463907241821,
"step": 2452
},
{
"epoch": 1.5614258434118395,
"grad_norm": 10.911689392828249,
"learning_rate": 5.519449803377176e-06,
"loss": 1.2631714344024658,
"step": 2453
},
{
"epoch": 1.562062380649268,
"grad_norm": 12.433198469900724,
"learning_rate": 5.515765868621845e-06,
"loss": 1.6228747367858887,
"step": 2454
},
{
"epoch": 1.5626989178866963,
"grad_norm": 17.27844214745092,
"learning_rate": 5.512081650847393e-06,
"loss": 1.9856112003326416,
"step": 2455
},
{
"epoch": 1.5633354551241248,
"grad_norm": 7.399488641281761,
"learning_rate": 5.508397152075481e-06,
"loss": 0.9606994390487671,
"step": 2456
},
{
"epoch": 1.5639719923615532,
"grad_norm": 8.18254649238998,
"learning_rate": 5.504712374327923e-06,
"loss": 2.0466148853302,
"step": 2457
},
{
"epoch": 1.5646085295989816,
"grad_norm": 13.273908143367608,
"learning_rate": 5.501027319626693e-06,
"loss": 1.5686981678009033,
"step": 2458
},
{
"epoch": 1.56524506683641,
"grad_norm": 17.596294750850674,
"learning_rate": 5.497341989993904e-06,
"loss": 1.8726886510849,
"step": 2459
},
{
"epoch": 1.5658816040738384,
"grad_norm": 11.709153911604465,
"learning_rate": 5.493656387451834e-06,
"loss": 1.1663498878479004,
"step": 2460
},
{
"epoch": 1.5665181413112665,
"grad_norm": 12.373461059287882,
"learning_rate": 5.489970514022903e-06,
"loss": 1.1478195190429688,
"step": 2461
},
{
"epoch": 1.567154678548695,
"grad_norm": 12.126425305330907,
"learning_rate": 5.486284371729679e-06,
"loss": 1.1417909860610962,
"step": 2462
},
{
"epoch": 1.5677912157861233,
"grad_norm": 7.733955402071897,
"learning_rate": 5.482597962594883e-06,
"loss": 1.6889036893844604,
"step": 2463
},
{
"epoch": 1.5684277530235518,
"grad_norm": 7.730134513765488,
"learning_rate": 5.478911288641376e-06,
"loss": 1.6023591756820679,
"step": 2464
},
{
"epoch": 1.5690642902609802,
"grad_norm": 15.541811993059941,
"learning_rate": 5.475224351892171e-06,
"loss": 1.387468695640564,
"step": 2465
},
{
"epoch": 1.5697008274984086,
"grad_norm": 10.083374347297031,
"learning_rate": 5.4715371543704185e-06,
"loss": 1.9434430599212646,
"step": 2466
},
{
"epoch": 1.570337364735837,
"grad_norm": 16.39709004328844,
"learning_rate": 5.467849698099416e-06,
"loss": 1.7062137126922607,
"step": 2467
},
{
"epoch": 1.5709739019732654,
"grad_norm": 7.256369540200404,
"learning_rate": 5.464161985102604e-06,
"loss": 1.0557485818862915,
"step": 2468
},
{
"epoch": 1.5716104392106938,
"grad_norm": 8.870464485244176,
"learning_rate": 5.46047401740356e-06,
"loss": 1.541259765625,
"step": 2469
},
{
"epoch": 1.5722469764481222,
"grad_norm": 9.413632462509831,
"learning_rate": 5.456785797026004e-06,
"loss": 1.2600197792053223,
"step": 2470
},
{
"epoch": 1.5728835136855506,
"grad_norm": 10.456644794335347,
"learning_rate": 5.453097325993796e-06,
"loss": 1.3559761047363281,
"step": 2471
},
{
"epoch": 1.573520050922979,
"grad_norm": 13.882304324175811,
"learning_rate": 5.449408606330927e-06,
"loss": 1.2437595129013062,
"step": 2472
},
{
"epoch": 1.5741565881604074,
"grad_norm": 9.962278892638508,
"learning_rate": 5.445719640061533e-06,
"loss": 1.031199336051941,
"step": 2473
},
{
"epoch": 1.5747931253978358,
"grad_norm": 13.334460805236132,
"learning_rate": 5.442030429209881e-06,
"loss": 1.6973717212677002,
"step": 2474
},
{
"epoch": 1.5754296626352642,
"grad_norm": 11.320974865850614,
"learning_rate": 5.438340975800371e-06,
"loss": 1.422226905822754,
"step": 2475
},
{
"epoch": 1.5760661998726926,
"grad_norm": 14.987753764178102,
"learning_rate": 5.434651281857538e-06,
"loss": 1.8186506032943726,
"step": 2476
},
{
"epoch": 1.576702737110121,
"grad_norm": 10.98446346920428,
"learning_rate": 5.430961349406049e-06,
"loss": 1.5441179275512695,
"step": 2477
},
{
"epoch": 1.5773392743475494,
"grad_norm": 10.176140350382148,
"learning_rate": 5.427271180470701e-06,
"loss": 1.8851035833358765,
"step": 2478
},
{
"epoch": 1.5779758115849778,
"grad_norm": 10.319189885338636,
"learning_rate": 5.423580777076421e-06,
"loss": 1.3101853132247925,
"step": 2479
},
{
"epoch": 1.5786123488224062,
"grad_norm": 17.722473439859765,
"learning_rate": 5.419890141248267e-06,
"loss": 1.411855936050415,
"step": 2480
},
{
"epoch": 1.5792488860598346,
"grad_norm": 11.071651967569489,
"learning_rate": 5.416199275011421e-06,
"loss": 1.4189932346343994,
"step": 2481
},
{
"epoch": 1.579885423297263,
"grad_norm": 9.421983059684807,
"learning_rate": 5.412508180391192e-06,
"loss": 1.4346650838851929,
"step": 2482
},
{
"epoch": 1.5805219605346914,
"grad_norm": 11.813742057186015,
"learning_rate": 5.408816859413017e-06,
"loss": 1.2399206161499023,
"step": 2483
},
{
"epoch": 1.5811584977721198,
"grad_norm": 17.20658112273176,
"learning_rate": 5.405125314102454e-06,
"loss": 1.7983934879302979,
"step": 2484
},
{
"epoch": 1.5817950350095482,
"grad_norm": 12.90079430805244,
"learning_rate": 5.401433546485186e-06,
"loss": 1.5692299604415894,
"step": 2485
},
{
"epoch": 1.5824315722469766,
"grad_norm": 11.226849180867754,
"learning_rate": 5.397741558587019e-06,
"loss": 1.787600040435791,
"step": 2486
},
{
"epoch": 1.5830681094844048,
"grad_norm": 10.462778919604604,
"learning_rate": 5.3940493524338755e-06,
"loss": 1.2418570518493652,
"step": 2487
},
{
"epoch": 1.5837046467218332,
"grad_norm": 13.48895578831498,
"learning_rate": 5.390356930051803e-06,
"loss": 1.8791996240615845,
"step": 2488
},
{
"epoch": 1.5843411839592616,
"grad_norm": 8.943457777264197,
"learning_rate": 5.386664293466965e-06,
"loss": 1.3849817514419556,
"step": 2489
},
{
"epoch": 1.58497772119669,
"grad_norm": 7.084618489747768,
"learning_rate": 5.382971444705641e-06,
"loss": 1.5093352794647217,
"step": 2490
},
{
"epoch": 1.5856142584341184,
"grad_norm": 7.131231857259523,
"learning_rate": 5.379278385794231e-06,
"loss": 1.9903844594955444,
"step": 2491
},
{
"epoch": 1.5862507956715468,
"grad_norm": 9.948365368652288,
"learning_rate": 5.3755851187592476e-06,
"loss": 1.2966089248657227,
"step": 2492
},
{
"epoch": 1.5868873329089752,
"grad_norm": 9.351928892073133,
"learning_rate": 5.371891645627319e-06,
"loss": 1.3795301914215088,
"step": 2493
},
{
"epoch": 1.5875238701464036,
"grad_norm": 14.312100472969226,
"learning_rate": 5.3681979684251825e-06,
"loss": 1.3844339847564697,
"step": 2494
},
{
"epoch": 1.5881604073838318,
"grad_norm": 14.685176653500934,
"learning_rate": 5.364504089179693e-06,
"loss": 1.266568899154663,
"step": 2495
},
{
"epoch": 1.5887969446212602,
"grad_norm": 13.013869857277845,
"learning_rate": 5.3608100099178125e-06,
"loss": 1.5471580028533936,
"step": 2496
},
{
"epoch": 1.5894334818586886,
"grad_norm": 20.74069345556099,
"learning_rate": 5.357115732666616e-06,
"loss": 1.4884121417999268,
"step": 2497
},
{
"epoch": 1.590070019096117,
"grad_norm": 13.287116136135257,
"learning_rate": 5.353421259453283e-06,
"loss": 2.1138341426849365,
"step": 2498
},
{
"epoch": 1.5907065563335454,
"grad_norm": 9.592183280451499,
"learning_rate": 5.3497265923051035e-06,
"loss": 1.4981415271759033,
"step": 2499
},
{
"epoch": 1.5913430935709738,
"grad_norm": 9.406310225065061,
"learning_rate": 5.346031733249472e-06,
"loss": 0.8034936785697937,
"step": 2500
},
{
"epoch": 1.5919796308084022,
"grad_norm": 18.87792843664383,
"learning_rate": 5.342336684313893e-06,
"loss": 0.7998415231704712,
"step": 2501
},
{
"epoch": 1.5926161680458306,
"grad_norm": 10.08298596053083,
"learning_rate": 5.338641447525966e-06,
"loss": 1.3965051174163818,
"step": 2502
},
{
"epoch": 1.593252705283259,
"grad_norm": 14.175780476680286,
"learning_rate": 5.334946024913404e-06,
"loss": 1.2556719779968262,
"step": 2503
},
{
"epoch": 1.5938892425206874,
"grad_norm": 15.267074107268423,
"learning_rate": 5.3312504185040155e-06,
"loss": 2.0348596572875977,
"step": 2504
},
{
"epoch": 1.5945257797581158,
"grad_norm": 9.195903000153212,
"learning_rate": 5.327554630325711e-06,
"loss": 1.3694276809692383,
"step": 2505
},
{
"epoch": 1.5951623169955442,
"grad_norm": 12.841150269370587,
"learning_rate": 5.323858662406502e-06,
"loss": 1.2917990684509277,
"step": 2506
},
{
"epoch": 1.5957988542329726,
"grad_norm": 9.307524612673058,
"learning_rate": 5.3201625167744984e-06,
"loss": 1.1434298753738403,
"step": 2507
},
{
"epoch": 1.596435391470401,
"grad_norm": 8.718760930508182,
"learning_rate": 5.3164661954579045e-06,
"loss": 1.6025032997131348,
"step": 2508
},
{
"epoch": 1.5970719287078294,
"grad_norm": 12.735275486509599,
"learning_rate": 5.312769700485028e-06,
"loss": 1.7678284645080566,
"step": 2509
},
{
"epoch": 1.5977084659452578,
"grad_norm": 7.799973529517469,
"learning_rate": 5.309073033884262e-06,
"loss": 1.5079340934753418,
"step": 2510
},
{
"epoch": 1.5983450031826862,
"grad_norm": 8.154451083063064,
"learning_rate": 5.305376197684104e-06,
"loss": 1.562661051750183,
"step": 2511
},
{
"epoch": 1.5989815404201146,
"grad_norm": 8.15660927982753,
"learning_rate": 5.301679193913136e-06,
"loss": 1.6808500289916992,
"step": 2512
},
{
"epoch": 1.599618077657543,
"grad_norm": 9.094573426234467,
"learning_rate": 5.297982024600039e-06,
"loss": 1.566948413848877,
"step": 2513
},
{
"epoch": 1.6002546148949714,
"grad_norm": 34.31357429002888,
"learning_rate": 5.2942846917735816e-06,
"loss": 2.2005081176757812,
"step": 2514
},
{
"epoch": 1.6008911521323999,
"grad_norm": 13.782375923718162,
"learning_rate": 5.290587197462618e-06,
"loss": 1.8872666358947754,
"step": 2515
},
{
"epoch": 1.6015276893698283,
"grad_norm": 14.337836831748135,
"learning_rate": 5.2868895436961e-06,
"loss": 1.3187255859375,
"step": 2516
},
{
"epoch": 1.6021642266072567,
"grad_norm": 9.925545018631547,
"learning_rate": 5.28319173250306e-06,
"loss": 2.0606861114501953,
"step": 2517
},
{
"epoch": 1.602800763844685,
"grad_norm": 8.634133392617938,
"learning_rate": 5.279493765912618e-06,
"loss": 1.5563485622406006,
"step": 2518
},
{
"epoch": 1.6034373010821135,
"grad_norm": 14.27870926472679,
"learning_rate": 5.2757956459539824e-06,
"loss": 1.2816444635391235,
"step": 2519
},
{
"epoch": 1.6040738383195416,
"grad_norm": 7.42769119113961,
"learning_rate": 5.272097374656441e-06,
"loss": 1.4142775535583496,
"step": 2520
},
{
"epoch": 1.60471037555697,
"grad_norm": 10.609080901884434,
"learning_rate": 5.26839895404937e-06,
"loss": 1.1905722618103027,
"step": 2521
},
{
"epoch": 1.6053469127943985,
"grad_norm": 10.837566893071427,
"learning_rate": 5.264700386162221e-06,
"loss": 1.6138941049575806,
"step": 2522
},
{
"epoch": 1.6059834500318269,
"grad_norm": 10.63493174465722,
"learning_rate": 5.261001673024533e-06,
"loss": 1.6332387924194336,
"step": 2523
},
{
"epoch": 1.6066199872692553,
"grad_norm": 18.05268427884533,
"learning_rate": 5.257302816665921e-06,
"loss": 1.358574390411377,
"step": 2524
},
{
"epoch": 1.6072565245066837,
"grad_norm": 10.264455479855231,
"learning_rate": 5.2536038191160795e-06,
"loss": 1.794968843460083,
"step": 2525
},
{
"epoch": 1.607893061744112,
"grad_norm": 11.673489931037276,
"learning_rate": 5.24990468240478e-06,
"loss": 1.9878292083740234,
"step": 2526
},
{
"epoch": 1.6085295989815405,
"grad_norm": 9.934338495268458,
"learning_rate": 5.246205408561871e-06,
"loss": 1.2982629537582397,
"step": 2527
},
{
"epoch": 1.6091661362189686,
"grad_norm": 8.439367650438797,
"learning_rate": 5.242505999617275e-06,
"loss": 1.7368496656417847,
"step": 2528
},
{
"epoch": 1.609802673456397,
"grad_norm": 9.550554873364206,
"learning_rate": 5.23880645760099e-06,
"loss": 1.5525805950164795,
"step": 2529
},
{
"epoch": 1.6104392106938255,
"grad_norm": 19.304529934923032,
"learning_rate": 5.235106784543087e-06,
"loss": 1.269942045211792,
"step": 2530
},
{
"epoch": 1.6110757479312539,
"grad_norm": 15.007163801558022,
"learning_rate": 5.231406982473708e-06,
"loss": 1.7000211477279663,
"step": 2531
},
{
"epoch": 1.6117122851686823,
"grad_norm": 6.807140346409658,
"learning_rate": 5.2277070534230676e-06,
"loss": 1.319089412689209,
"step": 2532
},
{
"epoch": 1.6123488224061107,
"grad_norm": 11.928631242362574,
"learning_rate": 5.224006999421448e-06,
"loss": 1.4072086811065674,
"step": 2533
},
{
"epoch": 1.612985359643539,
"grad_norm": 11.328101118148659,
"learning_rate": 5.220306822499201e-06,
"loss": 1.5428425073623657,
"step": 2534
},
{
"epoch": 1.6136218968809675,
"grad_norm": 16.49286894082796,
"learning_rate": 5.216606524686744e-06,
"loss": 1.7508461475372314,
"step": 2535
},
{
"epoch": 1.6142584341183959,
"grad_norm": 16.35103594988865,
"learning_rate": 5.212906108014565e-06,
"loss": 1.1230452060699463,
"step": 2536
},
{
"epoch": 1.6148949713558243,
"grad_norm": 7.659799653499345,
"learning_rate": 5.209205574513213e-06,
"loss": 1.4330987930297852,
"step": 2537
},
{
"epoch": 1.6155315085932527,
"grad_norm": 10.221897463363504,
"learning_rate": 5.205504926213301e-06,
"loss": 1.6965951919555664,
"step": 2538
},
{
"epoch": 1.616168045830681,
"grad_norm": 13.112421908722954,
"learning_rate": 5.201804165145511e-06,
"loss": 1.099511742591858,
"step": 2539
},
{
"epoch": 1.6168045830681095,
"grad_norm": 10.853254721405618,
"learning_rate": 5.19810329334058e-06,
"loss": 1.3006181716918945,
"step": 2540
},
{
"epoch": 1.6174411203055379,
"grad_norm": 14.37833836842456,
"learning_rate": 5.194402312829308e-06,
"loss": 1.0019958019256592,
"step": 2541
},
{
"epoch": 1.6180776575429663,
"grad_norm": 17.54365619853365,
"learning_rate": 5.190701225642555e-06,
"loss": 1.5287961959838867,
"step": 2542
},
{
"epoch": 1.6187141947803947,
"grad_norm": 8.907175647988893,
"learning_rate": 5.18700003381124e-06,
"loss": 1.8033959865570068,
"step": 2543
},
{
"epoch": 1.619350732017823,
"grad_norm": 8.150764698477932,
"learning_rate": 5.183298739366339e-06,
"loss": 1.845013976097107,
"step": 2544
},
{
"epoch": 1.6199872692552515,
"grad_norm": 11.033085898851356,
"learning_rate": 5.1795973443388835e-06,
"loss": 1.9903559684753418,
"step": 2545
},
{
"epoch": 1.62062380649268,
"grad_norm": 17.502845998892802,
"learning_rate": 5.175895850759963e-06,
"loss": 1.7325098514556885,
"step": 2546
},
{
"epoch": 1.6212603437301083,
"grad_norm": 8.914388916100945,
"learning_rate": 5.172194260660716e-06,
"loss": 1.6158649921417236,
"step": 2547
},
{
"epoch": 1.6218968809675367,
"grad_norm": 23.376651275892883,
"learning_rate": 5.168492576072339e-06,
"loss": 1.8969463109970093,
"step": 2548
},
{
"epoch": 1.622533418204965,
"grad_norm": 7.7616691794319514,
"learning_rate": 5.164790799026078e-06,
"loss": 1.9469236135482788,
"step": 2549
},
{
"epoch": 1.6231699554423935,
"grad_norm": 11.448904373559774,
"learning_rate": 5.1610889315532305e-06,
"loss": 1.2270617485046387,
"step": 2550
},
{
"epoch": 1.623806492679822,
"grad_norm": 13.076404992383189,
"learning_rate": 5.157386975685141e-06,
"loss": 1.4642993211746216,
"step": 2551
},
{
"epoch": 1.6244430299172503,
"grad_norm": 8.66512216203333,
"learning_rate": 5.153684933453207e-06,
"loss": 1.241729974746704,
"step": 2552
},
{
"epoch": 1.6250795671546785,
"grad_norm": 11.325137785870561,
"learning_rate": 5.14998280688887e-06,
"loss": 1.555477261543274,
"step": 2553
},
{
"epoch": 1.625716104392107,
"grad_norm": 17.34661177920995,
"learning_rate": 5.14628059802362e-06,
"loss": 1.5977141857147217,
"step": 2554
},
{
"epoch": 1.6263526416295353,
"grad_norm": 12.576231546456594,
"learning_rate": 5.142578308888989e-06,
"loss": 1.2389638423919678,
"step": 2555
},
{
"epoch": 1.6269891788669637,
"grad_norm": 12.855957753677716,
"learning_rate": 5.138875941516557e-06,
"loss": 1.4821150302886963,
"step": 2556
},
{
"epoch": 1.627625716104392,
"grad_norm": 11.431309949656288,
"learning_rate": 5.1351734979379444e-06,
"loss": 1.5326590538024902,
"step": 2557
},
{
"epoch": 1.6282622533418205,
"grad_norm": 7.821937185891337,
"learning_rate": 5.1314709801848125e-06,
"loss": 1.4889559745788574,
"step": 2558
},
{
"epoch": 1.628898790579249,
"grad_norm": 9.49676349166614,
"learning_rate": 5.127768390288866e-06,
"loss": 1.6810004711151123,
"step": 2559
},
{
"epoch": 1.6295353278166773,
"grad_norm": 10.71161695931628,
"learning_rate": 5.124065730281851e-06,
"loss": 1.0240229368209839,
"step": 2560
},
{
"epoch": 1.6301718650541057,
"grad_norm": 11.268129447521497,
"learning_rate": 5.120363002195543e-06,
"loss": 1.7059662342071533,
"step": 2561
},
{
"epoch": 1.630808402291534,
"grad_norm": 14.221674921133687,
"learning_rate": 5.116660208061766e-06,
"loss": 1.7881436347961426,
"step": 2562
},
{
"epoch": 1.6314449395289623,
"grad_norm": 14.767107252921956,
"learning_rate": 5.1129573499123716e-06,
"loss": 2.1184029579162598,
"step": 2563
},
{
"epoch": 1.6320814767663907,
"grad_norm": 9.641730487301958,
"learning_rate": 5.109254429779253e-06,
"loss": 1.4992198944091797,
"step": 2564
},
{
"epoch": 1.632718014003819,
"grad_norm": 7.604590478701982,
"learning_rate": 5.105551449694333e-06,
"loss": 1.6534342765808105,
"step": 2565
},
{
"epoch": 1.6333545512412475,
"grad_norm": 9.55502878757529,
"learning_rate": 5.101848411689568e-06,
"loss": 1.5150446891784668,
"step": 2566
},
{
"epoch": 1.633991088478676,
"grad_norm": 9.899677596535001,
"learning_rate": 5.09814531779695e-06,
"loss": 1.0682098865509033,
"step": 2567
},
{
"epoch": 1.6346276257161043,
"grad_norm": 11.019149561544571,
"learning_rate": 5.0944421700484945e-06,
"loss": 1.5136057138442993,
"step": 2568
},
{
"epoch": 1.6352641629535327,
"grad_norm": 8.75181650552953,
"learning_rate": 5.090738970476254e-06,
"loss": 1.3633084297180176,
"step": 2569
},
{
"epoch": 1.6359007001909611,
"grad_norm": 15.60729853294145,
"learning_rate": 5.087035721112306e-06,
"loss": 1.7520954608917236,
"step": 2570
},
{
"epoch": 1.6365372374283895,
"grad_norm": 5.7399923227452225,
"learning_rate": 5.083332423988751e-06,
"loss": 0.6073510050773621,
"step": 2571
},
{
"epoch": 1.637173774665818,
"grad_norm": 8.930390639791247,
"learning_rate": 5.079629081137728e-06,
"loss": 1.3986365795135498,
"step": 2572
},
{
"epoch": 1.6378103119032463,
"grad_norm": 12.220633447031721,
"learning_rate": 5.075925694591387e-06,
"loss": 1.580613136291504,
"step": 2573
},
{
"epoch": 1.6384468491406747,
"grad_norm": 16.007715628823384,
"learning_rate": 5.07222226638191e-06,
"loss": 1.8403897285461426,
"step": 2574
},
{
"epoch": 1.6390833863781031,
"grad_norm": 16.745345585391913,
"learning_rate": 5.068518798541501e-06,
"loss": 1.9516756534576416,
"step": 2575
},
{
"epoch": 1.6397199236155315,
"grad_norm": 33.70732705282881,
"learning_rate": 5.064815293102383e-06,
"loss": 1.200810432434082,
"step": 2576
},
{
"epoch": 1.64035646085296,
"grad_norm": 12.366219401140018,
"learning_rate": 5.061111752096805e-06,
"loss": 0.7799091935157776,
"step": 2577
},
{
"epoch": 1.6409929980903883,
"grad_norm": 13.177154968084707,
"learning_rate": 5.057408177557028e-06,
"loss": 1.4837524890899658,
"step": 2578
},
{
"epoch": 1.6416295353278167,
"grad_norm": 9.513504824800297,
"learning_rate": 5.053704571515337e-06,
"loss": 1.6435632705688477,
"step": 2579
},
{
"epoch": 1.6422660725652451,
"grad_norm": 11.653974469164183,
"learning_rate": 5.0500009360040335e-06,
"loss": 1.476144790649414,
"step": 2580
},
{
"epoch": 1.6429026098026736,
"grad_norm": 14.882494814850851,
"learning_rate": 5.0462972730554316e-06,
"loss": 2.2405483722686768,
"step": 2581
},
{
"epoch": 1.643539147040102,
"grad_norm": 12.340221318641138,
"learning_rate": 5.042593584701867e-06,
"loss": 1.5619505643844604,
"step": 2582
},
{
"epoch": 1.6441756842775304,
"grad_norm": 12.679333565407244,
"learning_rate": 5.038889872975684e-06,
"loss": 2.5269551277160645,
"step": 2583
},
{
"epoch": 1.6448122215149588,
"grad_norm": 9.707074909772693,
"learning_rate": 5.035186139909239e-06,
"loss": 1.4109852313995361,
"step": 2584
},
{
"epoch": 1.6454487587523872,
"grad_norm": 9.739550403269806,
"learning_rate": 5.031482387534907e-06,
"loss": 2.023956060409546,
"step": 2585
},
{
"epoch": 1.6460852959898156,
"grad_norm": 10.453317414956889,
"learning_rate": 5.027778617885065e-06,
"loss": 1.4007673263549805,
"step": 2586
},
{
"epoch": 1.6467218332272437,
"grad_norm": 15.831567048732454,
"learning_rate": 5.024074832992105e-06,
"loss": 2.015324831008911,
"step": 2587
},
{
"epoch": 1.6473583704646722,
"grad_norm": 9.887873218708938,
"learning_rate": 5.020371034888426e-06,
"loss": 1.171660304069519,
"step": 2588
},
{
"epoch": 1.6479949077021006,
"grad_norm": 13.261114636419348,
"learning_rate": 5.016667225606434e-06,
"loss": 1.9868791103363037,
"step": 2589
},
{
"epoch": 1.648631444939529,
"grad_norm": 11.113892939147165,
"learning_rate": 5.01296340717854e-06,
"loss": 1.2732374668121338,
"step": 2590
},
{
"epoch": 1.6492679821769574,
"grad_norm": 9.880403389941325,
"learning_rate": 5.009259581637165e-06,
"loss": 1.531540036201477,
"step": 2591
},
{
"epoch": 1.6499045194143858,
"grad_norm": 10.854817982624324,
"learning_rate": 5.005555751014723e-06,
"loss": 1.3397401571273804,
"step": 2592
},
{
"epoch": 1.6505410566518142,
"grad_norm": 10.913040502838221,
"learning_rate": 5.001851917343647e-06,
"loss": 1.732617735862732,
"step": 2593
},
{
"epoch": 1.6511775938892426,
"grad_norm": 11.477416538848326,
"learning_rate": 4.998148082656356e-06,
"loss": 1.8442219495773315,
"step": 2594
},
{
"epoch": 1.6518141311266707,
"grad_norm": 9.798566088736951,
"learning_rate": 4.994444248985276e-06,
"loss": 1.4900766611099243,
"step": 2595
},
{
"epoch": 1.6524506683640992,
"grad_norm": 7.6013755458264365,
"learning_rate": 4.990740418362837e-06,
"loss": 0.8129755258560181,
"step": 2596
},
{
"epoch": 1.6530872056015276,
"grad_norm": 17.22161084772196,
"learning_rate": 4.987036592821459e-06,
"loss": 0.9461665153503418,
"step": 2597
},
{
"epoch": 1.653723742838956,
"grad_norm": 8.291848018121122,
"learning_rate": 4.983332774393568e-06,
"loss": 0.6947128176689148,
"step": 2598
},
{
"epoch": 1.6543602800763844,
"grad_norm": 11.197807245764695,
"learning_rate": 4.979628965111575e-06,
"loss": 1.4426134824752808,
"step": 2599
},
{
"epoch": 1.6549968173138128,
"grad_norm": 18.112945902640668,
"learning_rate": 4.9759251670078965e-06,
"loss": 1.5626939535140991,
"step": 2600
},
{
"epoch": 1.6556333545512412,
"grad_norm": 14.94374928312868,
"learning_rate": 4.972221382114936e-06,
"loss": 2.3489809036254883,
"step": 2601
},
{
"epoch": 1.6562698917886696,
"grad_norm": 13.568681793115239,
"learning_rate": 4.968517612465094e-06,
"loss": 1.7263020277023315,
"step": 2602
},
{
"epoch": 1.656906429026098,
"grad_norm": 7.3874521974438325,
"learning_rate": 4.964813860090763e-06,
"loss": 1.4950655698776245,
"step": 2603
},
{
"epoch": 1.6575429662635264,
"grad_norm": 11.905322353506724,
"learning_rate": 4.961110127024317e-06,
"loss": 1.5684951543807983,
"step": 2604
},
{
"epoch": 1.6581795035009548,
"grad_norm": 12.179213263352665,
"learning_rate": 4.9574064152981355e-06,
"loss": 1.3854238986968994,
"step": 2605
},
{
"epoch": 1.6588160407383832,
"grad_norm": 6.669421169405933,
"learning_rate": 4.95370272694457e-06,
"loss": 1.1074484586715698,
"step": 2606
},
{
"epoch": 1.6594525779758116,
"grad_norm": 11.107662432773974,
"learning_rate": 4.949999063995969e-06,
"loss": 1.2956738471984863,
"step": 2607
},
{
"epoch": 1.66008911521324,
"grad_norm": 8.723558893408683,
"learning_rate": 4.946295428484665e-06,
"loss": 1.7084920406341553,
"step": 2608
},
{
"epoch": 1.6607256524506684,
"grad_norm": 12.675058168280549,
"learning_rate": 4.942591822442975e-06,
"loss": 1.451248288154602,
"step": 2609
},
{
"epoch": 1.6613621896880968,
"grad_norm": 6.957939997001016,
"learning_rate": 4.938888247903196e-06,
"loss": 1.521910309791565,
"step": 2610
},
{
"epoch": 1.6619987269255252,
"grad_norm": 16.388122496129952,
"learning_rate": 4.935184706897619e-06,
"loss": 1.5063462257385254,
"step": 2611
},
{
"epoch": 1.6626352641629536,
"grad_norm": 12.627133798409567,
"learning_rate": 4.9314812014585e-06,
"loss": 2.344245433807373,
"step": 2612
},
{
"epoch": 1.663271801400382,
"grad_norm": 17.10431999242891,
"learning_rate": 4.927777733618092e-06,
"loss": 0.8642339110374451,
"step": 2613
},
{
"epoch": 1.6639083386378104,
"grad_norm": 8.360206683707844,
"learning_rate": 4.924074305408614e-06,
"loss": 1.2939438819885254,
"step": 2614
},
{
"epoch": 1.6645448758752388,
"grad_norm": 18.579337833934677,
"learning_rate": 4.920370918862274e-06,
"loss": 0.9887975454330444,
"step": 2615
},
{
"epoch": 1.6651814131126672,
"grad_norm": 11.852916789044158,
"learning_rate": 4.916667576011248e-06,
"loss": 1.2906113862991333,
"step": 2616
},
{
"epoch": 1.6658179503500956,
"grad_norm": 8.75643929770764,
"learning_rate": 4.912964278887696e-06,
"loss": 1.9068280458450317,
"step": 2617
},
{
"epoch": 1.666454487587524,
"grad_norm": 15.47526259851981,
"learning_rate": 4.9092610295237464e-06,
"loss": 1.1410505771636963,
"step": 2618
},
{
"epoch": 1.6670910248249524,
"grad_norm": 8.5861311208462,
"learning_rate": 4.905557829951506e-06,
"loss": 1.0436980724334717,
"step": 2619
},
{
"epoch": 1.6677275620623806,
"grad_norm": 13.336515755525518,
"learning_rate": 4.9018546822030535e-06,
"loss": 1.2206828594207764,
"step": 2620
},
{
"epoch": 1.668364099299809,
"grad_norm": 11.084065750932826,
"learning_rate": 4.8981515883104334e-06,
"loss": 1.3082199096679688,
"step": 2621
},
{
"epoch": 1.6690006365372374,
"grad_norm": 13.293188677416547,
"learning_rate": 4.894448550305669e-06,
"loss": 1.2076120376586914,
"step": 2622
},
{
"epoch": 1.6696371737746658,
"grad_norm": 13.921015014631717,
"learning_rate": 4.890745570220748e-06,
"loss": 2.084150552749634,
"step": 2623
},
{
"epoch": 1.6702737110120942,
"grad_norm": 10.879765103715789,
"learning_rate": 4.88704265008763e-06,
"loss": 1.5977354049682617,
"step": 2624
},
{
"epoch": 1.6709102482495226,
"grad_norm": 15.36979222502608,
"learning_rate": 4.883339791938236e-06,
"loss": 1.1671185493469238,
"step": 2625
},
{
"epoch": 1.671546785486951,
"grad_norm": 11.292821889433561,
"learning_rate": 4.8796369978044595e-06,
"loss": 1.3492268323898315,
"step": 2626
},
{
"epoch": 1.6721833227243794,
"grad_norm": 11.001488301329799,
"learning_rate": 4.875934269718151e-06,
"loss": 1.9842692613601685,
"step": 2627
},
{
"epoch": 1.6728198599618078,
"grad_norm": 8.484898229157698,
"learning_rate": 4.872231609711135e-06,
"loss": 1.5810469388961792,
"step": 2628
},
{
"epoch": 1.673456397199236,
"grad_norm": 9.00134511149233,
"learning_rate": 4.868529019815188e-06,
"loss": 1.4162614345550537,
"step": 2629
},
{
"epoch": 1.6740929344366644,
"grad_norm": 8.237923462320568,
"learning_rate": 4.864826502062058e-06,
"loss": 1.422102689743042,
"step": 2630
},
{
"epoch": 1.6747294716740928,
"grad_norm": 11.225714993475835,
"learning_rate": 4.861124058483444e-06,
"loss": 1.182719111442566,
"step": 2631
},
{
"epoch": 1.6753660089115212,
"grad_norm": 8.982227872237772,
"learning_rate": 4.857421691111012e-06,
"loss": 1.7780842781066895,
"step": 2632
},
{
"epoch": 1.6760025461489496,
"grad_norm": 16.986305284004324,
"learning_rate": 4.853719401976381e-06,
"loss": 2.028423547744751,
"step": 2633
},
{
"epoch": 1.676639083386378,
"grad_norm": 16.22236168730729,
"learning_rate": 4.850017193111132e-06,
"loss": 1.4137513637542725,
"step": 2634
},
{
"epoch": 1.6772756206238064,
"grad_norm": 17.063882116609896,
"learning_rate": 4.846315066546793e-06,
"loss": 1.62284517288208,
"step": 2635
},
{
"epoch": 1.6779121578612348,
"grad_norm": 14.463341304966352,
"learning_rate": 4.842613024314861e-06,
"loss": 1.4167145490646362,
"step": 2636
},
{
"epoch": 1.6785486950986632,
"grad_norm": 7.12265951247381,
"learning_rate": 4.838911068446772e-06,
"loss": 1.2699812650680542,
"step": 2637
},
{
"epoch": 1.6791852323360916,
"grad_norm": 11.560495425660083,
"learning_rate": 4.835209200973923e-06,
"loss": 1.7189140319824219,
"step": 2638
},
{
"epoch": 1.67982176957352,
"grad_norm": 9.284297454278853,
"learning_rate": 4.831507423927662e-06,
"loss": 1.8094840049743652,
"step": 2639
},
{
"epoch": 1.6804583068109484,
"grad_norm": 10.5547572694535,
"learning_rate": 4.8278057393392845e-06,
"loss": 1.5845067501068115,
"step": 2640
},
{
"epoch": 1.6810948440483768,
"grad_norm": 13.076491851412808,
"learning_rate": 4.82410414924004e-06,
"loss": 1.0932317972183228,
"step": 2641
},
{
"epoch": 1.6817313812858052,
"grad_norm": 15.369196486975776,
"learning_rate": 4.820402655661117e-06,
"loss": 2.6274807453155518,
"step": 2642
},
{
"epoch": 1.6823679185232336,
"grad_norm": 9.292207744571861,
"learning_rate": 4.816701260633663e-06,
"loss": 1.6815487146377563,
"step": 2643
},
{
"epoch": 1.683004455760662,
"grad_norm": 7.972676529087167,
"learning_rate": 4.812999966188761e-06,
"loss": 1.5698246955871582,
"step": 2644
},
{
"epoch": 1.6836409929980904,
"grad_norm": 8.171446802527788,
"learning_rate": 4.809298774357447e-06,
"loss": 1.4061400890350342,
"step": 2645
},
{
"epoch": 1.6842775302355188,
"grad_norm": 10.587272437483414,
"learning_rate": 4.805597687170693e-06,
"loss": 1.510296106338501,
"step": 2646
},
{
"epoch": 1.6849140674729473,
"grad_norm": 15.292650449536161,
"learning_rate": 4.801896706659421e-06,
"loss": 1.462324619293213,
"step": 2647
},
{
"epoch": 1.6855506047103757,
"grad_norm": 11.774977159376329,
"learning_rate": 4.7981958348544885e-06,
"loss": 1.1011741161346436,
"step": 2648
},
{
"epoch": 1.686187141947804,
"grad_norm": 9.09207607187413,
"learning_rate": 4.7944950737866995e-06,
"loss": 1.5601119995117188,
"step": 2649
},
{
"epoch": 1.6868236791852325,
"grad_norm": 7.4325759097753,
"learning_rate": 4.790794425486787e-06,
"loss": 1.452481746673584,
"step": 2650
},
{
"epoch": 1.6874602164226609,
"grad_norm": 5.863343330390682,
"learning_rate": 4.787093891985437e-06,
"loss": 1.1045328378677368,
"step": 2651
},
{
"epoch": 1.6880967536600893,
"grad_norm": 13.42455223072694,
"learning_rate": 4.7833934753132565e-06,
"loss": 1.9689677953720093,
"step": 2652
},
{
"epoch": 1.6887332908975177,
"grad_norm": 13.269272548547477,
"learning_rate": 4.7796931775008e-06,
"loss": 1.5194764137268066,
"step": 2653
},
{
"epoch": 1.6893698281349459,
"grad_norm": 7.962121367092392,
"learning_rate": 4.7759930005785545e-06,
"loss": 1.214949131011963,
"step": 2654
},
{
"epoch": 1.6900063653723743,
"grad_norm": 11.204364022067033,
"learning_rate": 4.772292946576933e-06,
"loss": 2.322883129119873,
"step": 2655
},
{
"epoch": 1.6906429026098027,
"grad_norm": 15.409102974042531,
"learning_rate": 4.768593017526294e-06,
"loss": 1.7185745239257812,
"step": 2656
},
{
"epoch": 1.691279439847231,
"grad_norm": 8.06740086429854,
"learning_rate": 4.764893215456915e-06,
"loss": 1.651207447052002,
"step": 2657
},
{
"epoch": 1.6919159770846595,
"grad_norm": 12.292151951284634,
"learning_rate": 4.7611935423990126e-06,
"loss": 1.7684534788131714,
"step": 2658
},
{
"epoch": 1.6925525143220879,
"grad_norm": 19.782795435990266,
"learning_rate": 4.757494000382728e-06,
"loss": 1.3692883253097534,
"step": 2659
},
{
"epoch": 1.6931890515595163,
"grad_norm": 11.36673899932888,
"learning_rate": 4.753794591438132e-06,
"loss": 1.336172342300415,
"step": 2660
},
{
"epoch": 1.6938255887969447,
"grad_norm": 14.702726471415264,
"learning_rate": 4.750095317595221e-06,
"loss": 1.4075257778167725,
"step": 2661
},
{
"epoch": 1.6944621260343729,
"grad_norm": 13.13559681156395,
"learning_rate": 4.746396180883922e-06,
"loss": 2.2185723781585693,
"step": 2662
},
{
"epoch": 1.6950986632718013,
"grad_norm": 13.993201185079574,
"learning_rate": 4.742697183334079e-06,
"loss": 1.713407278060913,
"step": 2663
},
{
"epoch": 1.6957352005092297,
"grad_norm": 11.250450854267283,
"learning_rate": 4.7389983269754685e-06,
"loss": 1.4078516960144043,
"step": 2664
},
{
"epoch": 1.696371737746658,
"grad_norm": 8.866806251529898,
"learning_rate": 4.7352996138377795e-06,
"loss": 1.0841351747512817,
"step": 2665
},
{
"epoch": 1.6970082749840865,
"grad_norm": 15.22251665328589,
"learning_rate": 4.731601045950632e-06,
"loss": 2.2208127975463867,
"step": 2666
},
{
"epoch": 1.6976448122215149,
"grad_norm": 12.755584434542216,
"learning_rate": 4.72790262534356e-06,
"loss": 1.541381597518921,
"step": 2667
},
{
"epoch": 1.6982813494589433,
"grad_norm": 12.973773040042227,
"learning_rate": 4.724204354046019e-06,
"loss": 4.1180739402771,
"step": 2668
},
{
"epoch": 1.6989178866963717,
"grad_norm": 14.865288813598779,
"learning_rate": 4.720506234087382e-06,
"loss": 1.2021582126617432,
"step": 2669
},
{
"epoch": 1.6995544239338,
"grad_norm": 9.45945210243453,
"learning_rate": 4.7168082674969414e-06,
"loss": 1.6852741241455078,
"step": 2670
},
{
"epoch": 1.7001909611712285,
"grad_norm": 11.74632445740345,
"learning_rate": 4.713110456303902e-06,
"loss": 1.4227819442749023,
"step": 2671
},
{
"epoch": 1.7008274984086569,
"grad_norm": 12.312044268760175,
"learning_rate": 4.709412802537383e-06,
"loss": 1.231907844543457,
"step": 2672
},
{
"epoch": 1.7014640356460853,
"grad_norm": 8.019791070796293,
"learning_rate": 4.705715308226422e-06,
"loss": 1.3580896854400635,
"step": 2673
},
{
"epoch": 1.7021005728835137,
"grad_norm": 11.25609712511326,
"learning_rate": 4.702017975399962e-06,
"loss": 1.5906083583831787,
"step": 2674
},
{
"epoch": 1.702737110120942,
"grad_norm": 13.351174211208962,
"learning_rate": 4.698320806086865e-06,
"loss": 1.5613493919372559,
"step": 2675
},
{
"epoch": 1.7033736473583705,
"grad_norm": 10.067603950433067,
"learning_rate": 4.694623802315897e-06,
"loss": 1.824951410293579,
"step": 2676
},
{
"epoch": 1.704010184595799,
"grad_norm": 8.619097293119305,
"learning_rate": 4.69092696611574e-06,
"loss": 1.9785186052322388,
"step": 2677
},
{
"epoch": 1.7046467218332273,
"grad_norm": 24.780299073551443,
"learning_rate": 4.687230299514973e-06,
"loss": 2.073528289794922,
"step": 2678
},
{
"epoch": 1.7052832590706557,
"grad_norm": 10.394760869831988,
"learning_rate": 4.683533804542096e-06,
"loss": 1.5442249774932861,
"step": 2679
},
{
"epoch": 1.705919796308084,
"grad_norm": 9.109112189999545,
"learning_rate": 4.679837483225502e-06,
"loss": 1.3770475387573242,
"step": 2680
},
{
"epoch": 1.7065563335455125,
"grad_norm": 11.288737093630047,
"learning_rate": 4.6761413375934985e-06,
"loss": 1.0954097509384155,
"step": 2681
},
{
"epoch": 1.707192870782941,
"grad_norm": 7.6022964620470885,
"learning_rate": 4.672445369674289e-06,
"loss": 1.6249165534973145,
"step": 2682
},
{
"epoch": 1.7078294080203693,
"grad_norm": 12.311050123793805,
"learning_rate": 4.668749581495985e-06,
"loss": 2.0466883182525635,
"step": 2683
},
{
"epoch": 1.7084659452577977,
"grad_norm": 9.652623133287916,
"learning_rate": 4.665053975086596e-06,
"loss": 1.3413220643997192,
"step": 2684
},
{
"epoch": 1.7091024824952261,
"grad_norm": 14.407945494916643,
"learning_rate": 4.661358552474035e-06,
"loss": 1.534101963043213,
"step": 2685
},
{
"epoch": 1.7097390197326545,
"grad_norm": 13.699681515021897,
"learning_rate": 4.657663315686108e-06,
"loss": 1.7639039754867554,
"step": 2686
},
{
"epoch": 1.7103755569700827,
"grad_norm": 9.835204331445263,
"learning_rate": 4.653968266750529e-06,
"loss": 1.663576364517212,
"step": 2687
},
{
"epoch": 1.711012094207511,
"grad_norm": 8.29213353900224,
"learning_rate": 4.650273407694899e-06,
"loss": 1.8401124477386475,
"step": 2688
},
{
"epoch": 1.7116486314449395,
"grad_norm": 16.21116667459892,
"learning_rate": 4.646578740546719e-06,
"loss": 2.051877498626709,
"step": 2689
},
{
"epoch": 1.712285168682368,
"grad_norm": 10.334816052918256,
"learning_rate": 4.642884267333386e-06,
"loss": 2.043306350708008,
"step": 2690
},
{
"epoch": 1.7129217059197963,
"grad_norm": 10.694960629551327,
"learning_rate": 4.639189990082188e-06,
"loss": 1.655669927597046,
"step": 2691
},
{
"epoch": 1.7135582431572247,
"grad_norm": 9.789722825125446,
"learning_rate": 4.63549591082031e-06,
"loss": 2.1007449626922607,
"step": 2692
},
{
"epoch": 1.7141947803946531,
"grad_norm": 10.841182097179676,
"learning_rate": 4.631802031574818e-06,
"loss": 1.9260590076446533,
"step": 2693
},
{
"epoch": 1.7148313176320815,
"grad_norm": 9.661695123917502,
"learning_rate": 4.628108354372684e-06,
"loss": 1.5422745943069458,
"step": 2694
},
{
"epoch": 1.7154678548695097,
"grad_norm": 11.278926682171598,
"learning_rate": 4.624414881240753e-06,
"loss": 1.5441336631774902,
"step": 2695
},
{
"epoch": 1.716104392106938,
"grad_norm": 9.370853240561832,
"learning_rate": 4.62072161420577e-06,
"loss": 1.4866554737091064,
"step": 2696
},
{
"epoch": 1.7167409293443665,
"grad_norm": 10.634180490901398,
"learning_rate": 4.6170285552943596e-06,
"loss": 1.3072073459625244,
"step": 2697
},
{
"epoch": 1.717377466581795,
"grad_norm": 10.154650081060982,
"learning_rate": 4.613335706533036e-06,
"loss": 1.5072894096374512,
"step": 2698
},
{
"epoch": 1.7180140038192233,
"grad_norm": 8.639145327939204,
"learning_rate": 4.609643069948198e-06,
"loss": 1.424002766609192,
"step": 2699
},
{
"epoch": 1.7186505410566517,
"grad_norm": 14.166335213503675,
"learning_rate": 4.605950647566126e-06,
"loss": 1.6846305131912231,
"step": 2700
},
{
"epoch": 1.7192870782940801,
"grad_norm": 9.192112419179182,
"learning_rate": 4.602258441412981e-06,
"loss": 1.3232005834579468,
"step": 2701
},
{
"epoch": 1.7199236155315085,
"grad_norm": 15.282658775457303,
"learning_rate": 4.598566453514815e-06,
"loss": 1.3494807481765747,
"step": 2702
},
{
"epoch": 1.720560152768937,
"grad_norm": 7.554112889870477,
"learning_rate": 4.594874685897547e-06,
"loss": 1.5970849990844727,
"step": 2703
},
{
"epoch": 1.7211966900063653,
"grad_norm": 12.5940463637466,
"learning_rate": 4.591183140586984e-06,
"loss": 1.9829490184783936,
"step": 2704
},
{
"epoch": 1.7218332272437937,
"grad_norm": 8.357527203466196,
"learning_rate": 4.58749181960881e-06,
"loss": 1.2931435108184814,
"step": 2705
},
{
"epoch": 1.7224697644812221,
"grad_norm": 13.079920830712595,
"learning_rate": 4.58380072498858e-06,
"loss": 1.723933219909668,
"step": 2706
},
{
"epoch": 1.7231063017186505,
"grad_norm": 15.043892445958704,
"learning_rate": 4.580109858751735e-06,
"loss": 1.5980844497680664,
"step": 2707
},
{
"epoch": 1.723742838956079,
"grad_norm": 13.773161210386057,
"learning_rate": 4.5764192229235806e-06,
"loss": 1.5282667875289917,
"step": 2708
},
{
"epoch": 1.7243793761935073,
"grad_norm": 13.30959014815685,
"learning_rate": 4.5727288195293015e-06,
"loss": 1.3275375366210938,
"step": 2709
},
{
"epoch": 1.7250159134309357,
"grad_norm": 12.88024093785115,
"learning_rate": 4.569038650593953e-06,
"loss": 1.5965644121170044,
"step": 2710
},
{
"epoch": 1.7256524506683641,
"grad_norm": 9.375123164985691,
"learning_rate": 4.565348718142464e-06,
"loss": 0.7428637742996216,
"step": 2711
},
{
"epoch": 1.7262889879057925,
"grad_norm": 15.354425070286716,
"learning_rate": 4.561659024199631e-06,
"loss": 1.6778018474578857,
"step": 2712
},
{
"epoch": 1.726925525143221,
"grad_norm": 10.325063708220172,
"learning_rate": 4.5579695707901206e-06,
"loss": 0.8608949780464172,
"step": 2713
},
{
"epoch": 1.7275620623806494,
"grad_norm": 9.69379985752633,
"learning_rate": 4.554280359938467e-06,
"loss": 1.8087621927261353,
"step": 2714
},
{
"epoch": 1.7281985996180778,
"grad_norm": 10.391679895250121,
"learning_rate": 4.550591393669075e-06,
"loss": 1.2912559509277344,
"step": 2715
},
{
"epoch": 1.7288351368555062,
"grad_norm": 9.509452341637738,
"learning_rate": 4.546902674006205e-06,
"loss": 1.325238823890686,
"step": 2716
},
{
"epoch": 1.7294716740929346,
"grad_norm": 10.031245400265288,
"learning_rate": 4.543214202973997e-06,
"loss": 1.3821557760238647,
"step": 2717
},
{
"epoch": 1.730108211330363,
"grad_norm": 7.8763825659782825,
"learning_rate": 4.539525982596441e-06,
"loss": 1.4219706058502197,
"step": 2718
},
{
"epoch": 1.7307447485677914,
"grad_norm": 8.131227606844485,
"learning_rate": 4.535838014897397e-06,
"loss": 1.3258090019226074,
"step": 2719
},
{
"epoch": 1.7313812858052198,
"grad_norm": 15.438585926443563,
"learning_rate": 4.5321503019005845e-06,
"loss": 1.742460012435913,
"step": 2720
},
{
"epoch": 1.732017823042648,
"grad_norm": 10.980552780427807,
"learning_rate": 4.528462845629582e-06,
"loss": 1.804673671722412,
"step": 2721
},
{
"epoch": 1.7326543602800764,
"grad_norm": 12.225911915985892,
"learning_rate": 4.5247756481078316e-06,
"loss": 1.4501913785934448,
"step": 2722
},
{
"epoch": 1.7332908975175048,
"grad_norm": 9.058366703392052,
"learning_rate": 4.521088711358625e-06,
"loss": 2.058661699295044,
"step": 2723
},
{
"epoch": 1.7339274347549332,
"grad_norm": 14.457279486215986,
"learning_rate": 4.5174020374051196e-06,
"loss": 2.465367317199707,
"step": 2724
},
{
"epoch": 1.7345639719923616,
"grad_norm": 9.968762813500518,
"learning_rate": 4.513715628270322e-06,
"loss": 1.7616548538208008,
"step": 2725
},
{
"epoch": 1.73520050922979,
"grad_norm": 19.23324342547291,
"learning_rate": 4.5100294859771e-06,
"loss": 1.5484445095062256,
"step": 2726
},
{
"epoch": 1.7358370464672184,
"grad_norm": 9.05027175359414,
"learning_rate": 4.506343612548167e-06,
"loss": 1.5154013633728027,
"step": 2727
},
{
"epoch": 1.7364735837046468,
"grad_norm": 16.351489583050718,
"learning_rate": 4.502658010006099e-06,
"loss": 1.9099916219711304,
"step": 2728
},
{
"epoch": 1.737110120942075,
"grad_norm": 14.012444498061342,
"learning_rate": 4.498972680373309e-06,
"loss": 1.546755075454712,
"step": 2729
},
{
"epoch": 1.7377466581795034,
"grad_norm": 9.658306319539678,
"learning_rate": 4.495287625672078e-06,
"loss": 1.8375014066696167,
"step": 2730
},
{
"epoch": 1.7383831954169318,
"grad_norm": 10.236324118990494,
"learning_rate": 4.49160284792452e-06,
"loss": 1.5359485149383545,
"step": 2731
},
{
"epoch": 1.7390197326543602,
"grad_norm": 15.876697672007449,
"learning_rate": 4.487918349152608e-06,
"loss": 1.550153136253357,
"step": 2732
},
{
"epoch": 1.7396562698917886,
"grad_norm": 10.035313981197161,
"learning_rate": 4.484234131378156e-06,
"loss": 1.5224084854125977,
"step": 2733
},
{
"epoch": 1.740292807129217,
"grad_norm": 16.188799288719043,
"learning_rate": 4.480550196622826e-06,
"loss": 1.6584681272506714,
"step": 2734
},
{
"epoch": 1.7409293443666454,
"grad_norm": 24.536301582125997,
"learning_rate": 4.476866546908123e-06,
"loss": 1.3278266191482544,
"step": 2735
},
{
"epoch": 1.7415658816040738,
"grad_norm": 13.660091693006471,
"learning_rate": 4.4731831842554005e-06,
"loss": 1.5143094062805176,
"step": 2736
},
{
"epoch": 1.7422024188415022,
"grad_norm": 12.097725864268526,
"learning_rate": 4.4695001106858476e-06,
"loss": 1.1598591804504395,
"step": 2737
},
{
"epoch": 1.7428389560789306,
"grad_norm": 6.817629496229504,
"learning_rate": 4.465817328220501e-06,
"loss": 1.4929516315460205,
"step": 2738
},
{
"epoch": 1.743475493316359,
"grad_norm": 12.298725463933952,
"learning_rate": 4.462134838880232e-06,
"loss": 1.676002025604248,
"step": 2739
},
{
"epoch": 1.7441120305537874,
"grad_norm": 12.790239051801278,
"learning_rate": 4.458452644685755e-06,
"loss": 1.4403189420700073,
"step": 2740
},
{
"epoch": 1.7447485677912158,
"grad_norm": 15.214184935419688,
"learning_rate": 4.454770747657621e-06,
"loss": 1.8207788467407227,
"step": 2741
},
{
"epoch": 1.7453851050286442,
"grad_norm": 18.147328398579585,
"learning_rate": 4.451089149816217e-06,
"loss": 1.6734029054641724,
"step": 2742
},
{
"epoch": 1.7460216422660726,
"grad_norm": 17.008129338089915,
"learning_rate": 4.44740785318177e-06,
"loss": 1.4844833612442017,
"step": 2743
},
{
"epoch": 1.746658179503501,
"grad_norm": 19.944149069153134,
"learning_rate": 4.443726859774335e-06,
"loss": 1.705742359161377,
"step": 2744
},
{
"epoch": 1.7472947167409294,
"grad_norm": 9.00735539820465,
"learning_rate": 4.440046171613809e-06,
"loss": 1.8046600818634033,
"step": 2745
},
{
"epoch": 1.7479312539783578,
"grad_norm": 11.165564361332601,
"learning_rate": 4.436365790719911e-06,
"loss": 1.7548118829727173,
"step": 2746
},
{
"epoch": 1.7485677912157862,
"grad_norm": 12.905787308633506,
"learning_rate": 4.432685719112203e-06,
"loss": 2.3659415245056152,
"step": 2747
},
{
"epoch": 1.7492043284532146,
"grad_norm": 10.468041323057008,
"learning_rate": 4.429005958810068e-06,
"loss": 1.8399016857147217,
"step": 2748
},
{
"epoch": 1.749840865690643,
"grad_norm": 16.12950367376264,
"learning_rate": 4.425326511832722e-06,
"loss": 1.7204582691192627,
"step": 2749
},
{
"epoch": 1.7504774029280714,
"grad_norm": 15.272007450644736,
"learning_rate": 4.421647380199209e-06,
"loss": 1.4919688701629639,
"step": 2750
},
{
"epoch": 1.7511139401654998,
"grad_norm": 7.684914591730195,
"learning_rate": 4.417968565928402e-06,
"loss": 1.036765694618225,
"step": 2751
},
{
"epoch": 1.7517504774029282,
"grad_norm": 14.096573359376926,
"learning_rate": 4.414290071038993e-06,
"loss": 1.4351688623428345,
"step": 2752
},
{
"epoch": 1.7523870146403566,
"grad_norm": 8.972784578658036,
"learning_rate": 4.410611897549508e-06,
"loss": 1.400472640991211,
"step": 2753
},
{
"epoch": 1.7530235518777848,
"grad_norm": 15.530583142097809,
"learning_rate": 4.406934047478289e-06,
"loss": 1.7428488731384277,
"step": 2754
},
{
"epoch": 1.7536600891152132,
"grad_norm": 9.999208053693803,
"learning_rate": 4.4032565228435045e-06,
"loss": 1.6773875951766968,
"step": 2755
},
{
"epoch": 1.7542966263526416,
"grad_norm": 13.141219762306736,
"learning_rate": 4.399579325663143e-06,
"loss": 1.8271172046661377,
"step": 2756
},
{
"epoch": 1.75493316359007,
"grad_norm": 9.646887729468569,
"learning_rate": 4.395902457955013e-06,
"loss": 1.2367219924926758,
"step": 2757
},
{
"epoch": 1.7555697008274984,
"grad_norm": 11.672356507759368,
"learning_rate": 4.392225921736747e-06,
"loss": 1.7831553220748901,
"step": 2758
},
{
"epoch": 1.7562062380649268,
"grad_norm": 11.740461262184539,
"learning_rate": 4.3885497190257846e-06,
"loss": 1.3299363851547241,
"step": 2759
},
{
"epoch": 1.7568427753023552,
"grad_norm": 14.282444391041624,
"learning_rate": 4.3848738518393955e-06,
"loss": 1.4146277904510498,
"step": 2760
},
{
"epoch": 1.7574793125397836,
"grad_norm": 11.946687790079677,
"learning_rate": 4.381198322194655e-06,
"loss": 1.8097727298736572,
"step": 2761
},
{
"epoch": 1.7581158497772118,
"grad_norm": 8.509222426900719,
"learning_rate": 4.37752313210846e-06,
"loss": 2.181358575820923,
"step": 2762
},
{
"epoch": 1.7587523870146402,
"grad_norm": 9.877851555069872,
"learning_rate": 4.373848283597515e-06,
"loss": 1.792051911354065,
"step": 2763
},
{
"epoch": 1.7593889242520686,
"grad_norm": 7.880989115735572,
"learning_rate": 4.370173778678345e-06,
"loss": 1.6314315795898438,
"step": 2764
},
{
"epoch": 1.760025461489497,
"grad_norm": 11.511721765097521,
"learning_rate": 4.366499619367278e-06,
"loss": 1.300513744354248,
"step": 2765
},
{
"epoch": 1.7606619987269254,
"grad_norm": 8.235853079907368,
"learning_rate": 4.36282580768046e-06,
"loss": 1.2249748706817627,
"step": 2766
},
{
"epoch": 1.7612985359643538,
"grad_norm": 15.517502556500824,
"learning_rate": 4.359152345633837e-06,
"loss": 1.568852186203003,
"step": 2767
},
{
"epoch": 1.7619350732017822,
"grad_norm": 13.196318433501105,
"learning_rate": 4.355479235243175e-06,
"loss": 1.5850822925567627,
"step": 2768
},
{
"epoch": 1.7625716104392106,
"grad_norm": 9.491915527963855,
"learning_rate": 4.351806478524034e-06,
"loss": 1.2834690809249878,
"step": 2769
},
{
"epoch": 1.763208147676639,
"grad_norm": 8.13351804237787,
"learning_rate": 4.348134077491793e-06,
"loss": 1.5589559078216553,
"step": 2770
},
{
"epoch": 1.7638446849140674,
"grad_norm": 9.71242858538455,
"learning_rate": 4.344462034161625e-06,
"loss": 1.8301423788070679,
"step": 2771
},
{
"epoch": 1.7644812221514958,
"grad_norm": 13.56209659158831,
"learning_rate": 4.340790350548512e-06,
"loss": 1.9303207397460938,
"step": 2772
},
{
"epoch": 1.7651177593889242,
"grad_norm": 9.37162818204778,
"learning_rate": 4.33711902866724e-06,
"loss": 1.2622904777526855,
"step": 2773
},
{
"epoch": 1.7657542966263526,
"grad_norm": 9.21554839176676,
"learning_rate": 4.3334480705323915e-06,
"loss": 1.9959380626678467,
"step": 2774
},
{
"epoch": 1.766390833863781,
"grad_norm": 7.683670140357001,
"learning_rate": 4.329777478158353e-06,
"loss": 1.663625955581665,
"step": 2775
},
{
"epoch": 1.7670273711012094,
"grad_norm": 11.91599494046607,
"learning_rate": 4.3261072535593085e-06,
"loss": 2.1813929080963135,
"step": 2776
},
{
"epoch": 1.7676639083386378,
"grad_norm": 13.039022995026968,
"learning_rate": 4.322437398749243e-06,
"loss": 1.7397382259368896,
"step": 2777
},
{
"epoch": 1.7683004455760662,
"grad_norm": 8.714024062774746,
"learning_rate": 4.318767915741935e-06,
"loss": 1.4001973867416382,
"step": 2778
},
{
"epoch": 1.7689369828134947,
"grad_norm": 9.16784933246916,
"learning_rate": 4.315098806550962e-06,
"loss": 1.0145443677902222,
"step": 2779
},
{
"epoch": 1.769573520050923,
"grad_norm": 10.226845400525429,
"learning_rate": 4.311430073189694e-06,
"loss": 1.6936746835708618,
"step": 2780
},
{
"epoch": 1.7702100572883515,
"grad_norm": 10.063227134440265,
"learning_rate": 4.307761717671298e-06,
"loss": 2.4931836128234863,
"step": 2781
},
{
"epoch": 1.7708465945257799,
"grad_norm": 16.45329187318938,
"learning_rate": 4.304093742008727e-06,
"loss": 1.7186734676361084,
"step": 2782
},
{
"epoch": 1.7714831317632083,
"grad_norm": 9.545672658717793,
"learning_rate": 4.300426148214735e-06,
"loss": 1.5424296855926514,
"step": 2783
},
{
"epoch": 1.7721196690006367,
"grad_norm": 12.51102086704486,
"learning_rate": 4.296758938301857e-06,
"loss": 1.3923466205596924,
"step": 2784
},
{
"epoch": 1.772756206238065,
"grad_norm": 14.011935924875702,
"learning_rate": 4.293092114282426e-06,
"loss": 2.013211727142334,
"step": 2785
},
{
"epoch": 1.7733927434754935,
"grad_norm": 7.779708386248659,
"learning_rate": 4.289425678168555e-06,
"loss": 1.4323763847351074,
"step": 2786
},
{
"epoch": 1.7740292807129217,
"grad_norm": 12.065057462565743,
"learning_rate": 4.285759631972152e-06,
"loss": 1.068676233291626,
"step": 2787
},
{
"epoch": 1.77466581795035,
"grad_norm": 14.248003851494179,
"learning_rate": 4.282093977704906e-06,
"loss": 1.8798476457595825,
"step": 2788
},
{
"epoch": 1.7753023551877785,
"grad_norm": 11.232685981221792,
"learning_rate": 4.278428717378292e-06,
"loss": 3.5437092781066895,
"step": 2789
},
{
"epoch": 1.7759388924252069,
"grad_norm": 11.183540202316731,
"learning_rate": 4.274763853003569e-06,
"loss": 1.4824597835540771,
"step": 2790
},
{
"epoch": 1.7765754296626353,
"grad_norm": 13.802222955664616,
"learning_rate": 4.271099386591776e-06,
"loss": 1.8571828603744507,
"step": 2791
},
{
"epoch": 1.7772119669000637,
"grad_norm": 9.628240361912486,
"learning_rate": 4.267435320153741e-06,
"loss": 1.421258807182312,
"step": 2792
},
{
"epoch": 1.777848504137492,
"grad_norm": 10.805433128164388,
"learning_rate": 4.263771655700065e-06,
"loss": 1.3578135967254639,
"step": 2793
},
{
"epoch": 1.7784850413749205,
"grad_norm": 9.825479299999373,
"learning_rate": 4.260108395241134e-06,
"loss": 1.8957493305206299,
"step": 2794
},
{
"epoch": 1.7791215786123489,
"grad_norm": 14.76523941209378,
"learning_rate": 4.256445540787105e-06,
"loss": 1.4083436727523804,
"step": 2795
},
{
"epoch": 1.779758115849777,
"grad_norm": 17.658230056553823,
"learning_rate": 4.252783094347923e-06,
"loss": 1.2712535858154297,
"step": 2796
},
{
"epoch": 1.7803946530872055,
"grad_norm": 12.532150521837378,
"learning_rate": 4.249121057933297e-06,
"loss": 1.4908297061920166,
"step": 2797
},
{
"epoch": 1.7810311903246339,
"grad_norm": 17.047390143890933,
"learning_rate": 4.245459433552721e-06,
"loss": 2.1099019050598145,
"step": 2798
},
{
"epoch": 1.7816677275620623,
"grad_norm": 17.241080570413338,
"learning_rate": 4.241798223215456e-06,
"loss": 1.5396265983581543,
"step": 2799
},
{
"epoch": 1.7823042647994907,
"grad_norm": 8.765223068782607,
"learning_rate": 4.238137428930542e-06,
"loss": 1.4360289573669434,
"step": 2800
},
{
"epoch": 1.782940802036919,
"grad_norm": 17.34606194623906,
"learning_rate": 4.234477052706784e-06,
"loss": 0.9871362447738647,
"step": 2801
},
{
"epoch": 1.7835773392743475,
"grad_norm": 12.55512283517256,
"learning_rate": 4.230817096552762e-06,
"loss": 1.7521235942840576,
"step": 2802
},
{
"epoch": 1.7842138765117759,
"grad_norm": 12.308565339677186,
"learning_rate": 4.227157562476826e-06,
"loss": 1.4013760089874268,
"step": 2803
},
{
"epoch": 1.7848504137492043,
"grad_norm": 10.723269316446883,
"learning_rate": 4.223498452487092e-06,
"loss": 1.6195989847183228,
"step": 2804
},
{
"epoch": 1.7854869509866327,
"grad_norm": 8.628473860906015,
"learning_rate": 4.2198397685914425e-06,
"loss": 1.5191609859466553,
"step": 2805
},
{
"epoch": 1.786123488224061,
"grad_norm": 9.935647980668826,
"learning_rate": 4.21618151279753e-06,
"loss": 1.8672910928726196,
"step": 2806
},
{
"epoch": 1.7867600254614895,
"grad_norm": 6.068502730361051,
"learning_rate": 4.212523687112769e-06,
"loss": 1.6413987874984741,
"step": 2807
},
{
"epoch": 1.787396562698918,
"grad_norm": 17.78448689223399,
"learning_rate": 4.208866293544338e-06,
"loss": 2.192814350128174,
"step": 2808
},
{
"epoch": 1.7880330999363463,
"grad_norm": 7.8161475752745435,
"learning_rate": 4.205209334099182e-06,
"loss": 2.1916379928588867,
"step": 2809
},
{
"epoch": 1.7886696371737747,
"grad_norm": 11.403672726304672,
"learning_rate": 4.2015528107839996e-06,
"loss": 1.607360601425171,
"step": 2810
},
{
"epoch": 1.789306174411203,
"grad_norm": 11.625093642522316,
"learning_rate": 4.197896725605263e-06,
"loss": 2.0406980514526367,
"step": 2811
},
{
"epoch": 1.7899427116486315,
"grad_norm": 13.724398599204527,
"learning_rate": 4.1942410805691896e-06,
"loss": 0.8421260118484497,
"step": 2812
},
{
"epoch": 1.79057924888606,
"grad_norm": 7.647726118571639,
"learning_rate": 4.190585877681766e-06,
"loss": 1.7024935483932495,
"step": 2813
},
{
"epoch": 1.7912157861234883,
"grad_norm": 12.4303655102848,
"learning_rate": 4.186931118948731e-06,
"loss": 1.6000210046768188,
"step": 2814
},
{
"epoch": 1.7918523233609167,
"grad_norm": 12.559554822963113,
"learning_rate": 4.183276806375584e-06,
"loss": 1.8290843963623047,
"step": 2815
},
{
"epoch": 1.7924888605983451,
"grad_norm": 10.582583743391472,
"learning_rate": 4.179622941967571e-06,
"loss": 1.9351472854614258,
"step": 2816
},
{
"epoch": 1.7931253978357735,
"grad_norm": 6.851608509117778,
"learning_rate": 4.175969527729704e-06,
"loss": 1.5427175760269165,
"step": 2817
},
{
"epoch": 1.793761935073202,
"grad_norm": 18.944559369270223,
"learning_rate": 4.172316565666735e-06,
"loss": 1.5095255374908447,
"step": 2818
},
{
"epoch": 1.7943984723106303,
"grad_norm": 13.99424007088511,
"learning_rate": 4.16866405778318e-06,
"loss": 1.5849189758300781,
"step": 2819
},
{
"epoch": 1.7950350095480587,
"grad_norm": 9.020531963095502,
"learning_rate": 4.165012006083298e-06,
"loss": 1.1962220668792725,
"step": 2820
},
{
"epoch": 1.795671546785487,
"grad_norm": 10.760225463876855,
"learning_rate": 4.161360412571101e-06,
"loss": 1.3352441787719727,
"step": 2821
},
{
"epoch": 1.7963080840229153,
"grad_norm": 9.095159526474035,
"learning_rate": 4.157709279250346e-06,
"loss": 1.5690834522247314,
"step": 2822
},
{
"epoch": 1.7969446212603437,
"grad_norm": 9.124062592636774,
"learning_rate": 4.154058608124543e-06,
"loss": 1.7572336196899414,
"step": 2823
},
{
"epoch": 1.7975811584977721,
"grad_norm": 11.155238440406327,
"learning_rate": 4.150408401196946e-06,
"loss": 1.1329994201660156,
"step": 2824
},
{
"epoch": 1.7982176957352005,
"grad_norm": 15.493019195835576,
"learning_rate": 4.14675866047055e-06,
"loss": 1.4053606986999512,
"step": 2825
},
{
"epoch": 1.798854232972629,
"grad_norm": 15.211171874329507,
"learning_rate": 4.1431093879481e-06,
"loss": 1.3623132705688477,
"step": 2826
},
{
"epoch": 1.7994907702100573,
"grad_norm": 13.133712319487321,
"learning_rate": 4.139460585632081e-06,
"loss": 1.4178318977355957,
"step": 2827
},
{
"epoch": 1.8001273074474857,
"grad_norm": 10.699256194063082,
"learning_rate": 4.135812255524723e-06,
"loss": 1.45753812789917,
"step": 2828
},
{
"epoch": 1.800763844684914,
"grad_norm": 26.809440286239152,
"learning_rate": 4.132164399627993e-06,
"loss": 1.496826410293579,
"step": 2829
},
{
"epoch": 1.8014003819223423,
"grad_norm": 12.57082790356433,
"learning_rate": 4.1285170199436e-06,
"loss": 1.5901217460632324,
"step": 2830
},
{
"epoch": 1.8020369191597707,
"grad_norm": 10.532688344602471,
"learning_rate": 4.124870118472992e-06,
"loss": 2.642929792404175,
"step": 2831
},
{
"epoch": 1.8026734563971991,
"grad_norm": 11.077436237440404,
"learning_rate": 4.121223697217354e-06,
"loss": 1.6081825494766235,
"step": 2832
},
{
"epoch": 1.8033099936346275,
"grad_norm": 12.664290795705691,
"learning_rate": 4.117577758177604e-06,
"loss": 1.4939444065093994,
"step": 2833
},
{
"epoch": 1.803946530872056,
"grad_norm": 11.856528120460341,
"learning_rate": 4.113932303354405e-06,
"loss": 1.7006382942199707,
"step": 2834
},
{
"epoch": 1.8045830681094843,
"grad_norm": 15.82601670872119,
"learning_rate": 4.110287334748141e-06,
"loss": 1.1850695610046387,
"step": 2835
},
{
"epoch": 1.8052196053469127,
"grad_norm": 9.943366762495765,
"learning_rate": 4.106642854358942e-06,
"loss": 1.621656894683838,
"step": 2836
},
{
"epoch": 1.8058561425843411,
"grad_norm": 9.600068209440105,
"learning_rate": 4.102998864186661e-06,
"loss": 1.1193110942840576,
"step": 2837
},
{
"epoch": 1.8064926798217695,
"grad_norm": 12.835685292632498,
"learning_rate": 4.099355366230887e-06,
"loss": 1.2406222820281982,
"step": 2838
},
{
"epoch": 1.807129217059198,
"grad_norm": 12.449219178421625,
"learning_rate": 4.095712362490935e-06,
"loss": 1.3829739093780518,
"step": 2839
},
{
"epoch": 1.8077657542966263,
"grad_norm": 41.33211392686159,
"learning_rate": 4.092069854965856e-06,
"loss": 3.00693678855896,
"step": 2840
},
{
"epoch": 1.8084022915340547,
"grad_norm": 15.632638203964227,
"learning_rate": 4.088427845654418e-06,
"loss": 1.4938067197799683,
"step": 2841
},
{
"epoch": 1.8090388287714831,
"grad_norm": 22.81446834451609,
"learning_rate": 4.084786336555124e-06,
"loss": 1.9294646978378296,
"step": 2842
},
{
"epoch": 1.8096753660089115,
"grad_norm": 18.78376746011658,
"learning_rate": 4.0811453296662e-06,
"loss": 1.6908161640167236,
"step": 2843
},
{
"epoch": 1.81031190324634,
"grad_norm": 8.078771065082595,
"learning_rate": 4.0775048269855955e-06,
"loss": 1.3115285634994507,
"step": 2844
},
{
"epoch": 1.8109484404837684,
"grad_norm": 8.175003735662003,
"learning_rate": 4.073864830510985e-06,
"loss": 1.3126105070114136,
"step": 2845
},
{
"epoch": 1.8115849777211968,
"grad_norm": 17.900707543853887,
"learning_rate": 4.070225342239763e-06,
"loss": 2.337632179260254,
"step": 2846
},
{
"epoch": 1.8122215149586252,
"grad_norm": 16.942782810165667,
"learning_rate": 4.066586364169049e-06,
"loss": 1.7859814167022705,
"step": 2847
},
{
"epoch": 1.8128580521960536,
"grad_norm": 9.346466278405702,
"learning_rate": 4.062947898295677e-06,
"loss": 1.3761142492294312,
"step": 2848
},
{
"epoch": 1.813494589433482,
"grad_norm": 10.113135117790584,
"learning_rate": 4.059309946616206e-06,
"loss": 1.356911063194275,
"step": 2849
},
{
"epoch": 1.8141311266709104,
"grad_norm": 13.562865840191234,
"learning_rate": 4.055672511126907e-06,
"loss": 1.37875497341156,
"step": 2850
},
{
"epoch": 1.8147676639083388,
"grad_norm": 7.885191606778201,
"learning_rate": 4.052035593823772e-06,
"loss": 1.5034464597702026,
"step": 2851
},
{
"epoch": 1.8154042011457672,
"grad_norm": 17.376643446227266,
"learning_rate": 4.048399196702506e-06,
"loss": 1.2730200290679932,
"step": 2852
},
{
"epoch": 1.8160407383831956,
"grad_norm": 23.47641940891411,
"learning_rate": 4.044763321758532e-06,
"loss": 1.4678232669830322,
"step": 2853
},
{
"epoch": 1.8166772756206238,
"grad_norm": 10.334364788346084,
"learning_rate": 4.041127970986981e-06,
"loss": 1.3934051990509033,
"step": 2854
},
{
"epoch": 1.8173138128580522,
"grad_norm": 17.405189949747346,
"learning_rate": 4.037493146382705e-06,
"loss": 1.7411506175994873,
"step": 2855
},
{
"epoch": 1.8179503500954806,
"grad_norm": 10.249084254626336,
"learning_rate": 4.0338588499402554e-06,
"loss": 1.3829255104064941,
"step": 2856
},
{
"epoch": 1.818586887332909,
"grad_norm": 9.37668767905887,
"learning_rate": 4.030225083653905e-06,
"loss": 2.6021153926849365,
"step": 2857
},
{
"epoch": 1.8192234245703374,
"grad_norm": 10.928394393961895,
"learning_rate": 4.026591849517628e-06,
"loss": 1.741642951965332,
"step": 2858
},
{
"epoch": 1.8198599618077658,
"grad_norm": 10.465955855411217,
"learning_rate": 4.02295914952511e-06,
"loss": 1.0957083702087402,
"step": 2859
},
{
"epoch": 1.8204964990451942,
"grad_norm": 11.356543633241605,
"learning_rate": 4.019326985669746e-06,
"loss": 2.1968047618865967,
"step": 2860
},
{
"epoch": 1.8211330362826226,
"grad_norm": 9.778457182295519,
"learning_rate": 4.015695359944628e-06,
"loss": 2.30609393119812,
"step": 2861
},
{
"epoch": 1.821769573520051,
"grad_norm": 13.123023423602369,
"learning_rate": 4.012064274342566e-06,
"loss": 1.8456538915634155,
"step": 2862
},
{
"epoch": 1.8224061107574792,
"grad_norm": 8.063328834273289,
"learning_rate": 4.0084337308560596e-06,
"loss": 1.5011341571807861,
"step": 2863
},
{
"epoch": 1.8230426479949076,
"grad_norm": 10.625179441651248,
"learning_rate": 4.004803731477321e-06,
"loss": 1.461496353149414,
"step": 2864
},
{
"epoch": 1.823679185232336,
"grad_norm": 18.777330914781412,
"learning_rate": 4.001174278198258e-06,
"loss": 1.1114155054092407,
"step": 2865
},
{
"epoch": 1.8243157224697644,
"grad_norm": 7.930945458759789,
"learning_rate": 3.997545373010484e-06,
"loss": 2.230154514312744,
"step": 2866
},
{
"epoch": 1.8249522597071928,
"grad_norm": 11.068666242334524,
"learning_rate": 3.993917017905306e-06,
"loss": 1.503533959388733,
"step": 2867
},
{
"epoch": 1.8255887969446212,
"grad_norm": 15.690509574737762,
"learning_rate": 3.990289214873734e-06,
"loss": 1.798210859298706,
"step": 2868
},
{
"epoch": 1.8262253341820496,
"grad_norm": 9.716017451785957,
"learning_rate": 3.986661965906469e-06,
"loss": 1.6863963603973389,
"step": 2869
},
{
"epoch": 1.826861871419478,
"grad_norm": 11.571783437092481,
"learning_rate": 3.983035272993918e-06,
"loss": 1.7966833114624023,
"step": 2870
},
{
"epoch": 1.8274984086569064,
"grad_norm": 19.255229113259986,
"learning_rate": 3.97940913812617e-06,
"loss": 1.4381554126739502,
"step": 2871
},
{
"epoch": 1.8281349458943348,
"grad_norm": 11.381610972405511,
"learning_rate": 3.97578356329302e-06,
"loss": 1.7153146266937256,
"step": 2872
},
{
"epoch": 1.8287714831317632,
"grad_norm": 11.386655212116958,
"learning_rate": 3.972158550483946e-06,
"loss": 1.6358150243759155,
"step": 2873
},
{
"epoch": 1.8294080203691916,
"grad_norm": 6.593922569952327,
"learning_rate": 3.9685341016881255e-06,
"loss": 2.0091328620910645,
"step": 2874
},
{
"epoch": 1.83004455760662,
"grad_norm": 10.038346284443978,
"learning_rate": 3.964910218894421e-06,
"loss": 1.77092707157135,
"step": 2875
},
{
"epoch": 1.8306810948440484,
"grad_norm": 9.160996970211745,
"learning_rate": 3.9612869040913834e-06,
"loss": 1.5817136764526367,
"step": 2876
},
{
"epoch": 1.8313176320814768,
"grad_norm": 10.907978103645188,
"learning_rate": 3.957664159267261e-06,
"loss": 1.5602571964263916,
"step": 2877
},
{
"epoch": 1.8319541693189052,
"grad_norm": 13.827821511690907,
"learning_rate": 3.954041986409978e-06,
"loss": 1.4133808612823486,
"step": 2878
},
{
"epoch": 1.8325907065563336,
"grad_norm": 13.478353814469886,
"learning_rate": 3.9504203875071515e-06,
"loss": 1.5663726329803467,
"step": 2879
},
{
"epoch": 1.833227243793762,
"grad_norm": 10.710541030666576,
"learning_rate": 3.946799364546081e-06,
"loss": 1.351537823677063,
"step": 2880
},
{
"epoch": 1.8338637810311904,
"grad_norm": 9.498717426202266,
"learning_rate": 3.943178919513753e-06,
"loss": 1.4036643505096436,
"step": 2881
},
{
"epoch": 1.8345003182686188,
"grad_norm": 10.248746115258475,
"learning_rate": 3.939559054396832e-06,
"loss": 1.1356933116912842,
"step": 2882
},
{
"epoch": 1.8351368555060472,
"grad_norm": 13.648579581067839,
"learning_rate": 3.93593977118167e-06,
"loss": 1.7145284414291382,
"step": 2883
},
{
"epoch": 1.8357733927434756,
"grad_norm": 11.237385330356203,
"learning_rate": 3.932321071854294e-06,
"loss": 1.3724491596221924,
"step": 2884
},
{
"epoch": 1.836409929980904,
"grad_norm": 15.40429795331263,
"learning_rate": 3.928702958400416e-06,
"loss": 1.3502287864685059,
"step": 2885
},
{
"epoch": 1.8370464672183324,
"grad_norm": 9.659071962495439,
"learning_rate": 3.925085432805422e-06,
"loss": 2.138554334640503,
"step": 2886
},
{
"epoch": 1.8376830044557608,
"grad_norm": 21.795752879699876,
"learning_rate": 3.921468497054378e-06,
"loss": 1.198209524154663,
"step": 2887
},
{
"epoch": 1.838319541693189,
"grad_norm": 9.759312699966467,
"learning_rate": 3.917852153132023e-06,
"loss": 0.8229266405105591,
"step": 2888
},
{
"epoch": 1.8389560789306174,
"grad_norm": 10.839219755866015,
"learning_rate": 3.914236403022779e-06,
"loss": 1.0355024337768555,
"step": 2889
},
{
"epoch": 1.8395926161680458,
"grad_norm": 8.041699784000208,
"learning_rate": 3.910621248710733e-06,
"loss": 1.4596374034881592,
"step": 2890
},
{
"epoch": 1.8402291534054742,
"grad_norm": 11.145251443689975,
"learning_rate": 3.9070066921796525e-06,
"loss": 1.5663981437683105,
"step": 2891
},
{
"epoch": 1.8408656906429026,
"grad_norm": 7.941898425454865,
"learning_rate": 3.903392735412969e-06,
"loss": 1.4436841011047363,
"step": 2892
},
{
"epoch": 1.841502227880331,
"grad_norm": 14.280872060726427,
"learning_rate": 3.899779380393793e-06,
"loss": 1.8422952890396118,
"step": 2893
},
{
"epoch": 1.8421387651177594,
"grad_norm": 14.615091656505317,
"learning_rate": 3.896166629104899e-06,
"loss": 1.3043195009231567,
"step": 2894
},
{
"epoch": 1.8427753023551878,
"grad_norm": 11.03565890218722,
"learning_rate": 3.892554483528734e-06,
"loss": 1.9933313131332397,
"step": 2895
},
{
"epoch": 1.843411839592616,
"grad_norm": 10.26711083737239,
"learning_rate": 3.8889429456474106e-06,
"loss": 1.9972119331359863,
"step": 2896
},
{
"epoch": 1.8440483768300444,
"grad_norm": 10.043013669477036,
"learning_rate": 3.885332017442708e-06,
"loss": 1.2068595886230469,
"step": 2897
},
{
"epoch": 1.8446849140674728,
"grad_norm": 16.03634140712494,
"learning_rate": 3.881721700896074e-06,
"loss": 1.071032166481018,
"step": 2898
},
{
"epoch": 1.8453214513049012,
"grad_norm": 19.28330328190598,
"learning_rate": 3.878111997988612e-06,
"loss": 1.5138368606567383,
"step": 2899
},
{
"epoch": 1.8459579885423296,
"grad_norm": 8.448239192095798,
"learning_rate": 3.874502910701101e-06,
"loss": 1.7184983491897583,
"step": 2900
},
{
"epoch": 1.846594525779758,
"grad_norm": 7.877076063101697,
"learning_rate": 3.870894441013971e-06,
"loss": 1.4885249137878418,
"step": 2901
},
{
"epoch": 1.8472310630171864,
"grad_norm": 27.704910370885152,
"learning_rate": 3.8672865909073214e-06,
"loss": 1.4520801305770874,
"step": 2902
},
{
"epoch": 1.8478676002546148,
"grad_norm": 7.779608946735297,
"learning_rate": 3.863679362360907e-06,
"loss": 1.6304466724395752,
"step": 2903
},
{
"epoch": 1.8485041374920432,
"grad_norm": 9.669324175312662,
"learning_rate": 3.860072757354143e-06,
"loss": 1.3839410543441772,
"step": 2904
},
{
"epoch": 1.8491406747294716,
"grad_norm": 10.378972470624392,
"learning_rate": 3.8564667778661e-06,
"loss": 2.3011062145233154,
"step": 2905
},
{
"epoch": 1.8497772119669,
"grad_norm": 8.17189708315968,
"learning_rate": 3.852861425875513e-06,
"loss": 0.993788480758667,
"step": 2906
},
{
"epoch": 1.8504137492043284,
"grad_norm": 15.400598985121874,
"learning_rate": 3.849256703360759e-06,
"loss": 1.5165927410125732,
"step": 2907
},
{
"epoch": 1.8510502864417568,
"grad_norm": 7.665762925712199,
"learning_rate": 3.845652612299886e-06,
"loss": 1.3429007530212402,
"step": 2908
},
{
"epoch": 1.8516868236791852,
"grad_norm": 9.251559888124305,
"learning_rate": 3.842049154670583e-06,
"loss": 1.6699916124343872,
"step": 2909
},
{
"epoch": 1.8523233609166136,
"grad_norm": 15.144716220358037,
"learning_rate": 3.838446332450195e-06,
"loss": 2.5920000076293945,
"step": 2910
},
{
"epoch": 1.852959898154042,
"grad_norm": 10.530008928040017,
"learning_rate": 3.834844147615722e-06,
"loss": 1.108059048652649,
"step": 2911
},
{
"epoch": 1.8535964353914705,
"grad_norm": 8.636412664373463,
"learning_rate": 3.831242602143807e-06,
"loss": 1.2285619974136353,
"step": 2912
},
{
"epoch": 1.8542329726288989,
"grad_norm": 9.758526949200924,
"learning_rate": 3.827641698010752e-06,
"loss": 2.0095975399017334,
"step": 2913
},
{
"epoch": 1.8548695098663273,
"grad_norm": 9.336569556525562,
"learning_rate": 3.824041437192496e-06,
"loss": 2.0980563163757324,
"step": 2914
},
{
"epoch": 1.8555060471037557,
"grad_norm": 10.296892917057084,
"learning_rate": 3.8204418216646344e-06,
"loss": 1.698615550994873,
"step": 2915
},
{
"epoch": 1.856142584341184,
"grad_norm": 9.196996996757687,
"learning_rate": 3.8168428534024024e-06,
"loss": 1.6235389709472656,
"step": 2916
},
{
"epoch": 1.8567791215786125,
"grad_norm": 8.807171550328754,
"learning_rate": 3.8132445343806834e-06,
"loss": 2.061776638031006,
"step": 2917
},
{
"epoch": 1.8574156588160409,
"grad_norm": 7.403972337272077,
"learning_rate": 3.8096468665740023e-06,
"loss": 1.6480553150177002,
"step": 2918
},
{
"epoch": 1.8580521960534693,
"grad_norm": 6.170162563114787,
"learning_rate": 3.806049851956529e-06,
"loss": 1.8125827312469482,
"step": 2919
},
{
"epoch": 1.8586887332908977,
"grad_norm": 14.151706780698929,
"learning_rate": 3.8024534925020723e-06,
"loss": 0.9944297075271606,
"step": 2920
},
{
"epoch": 1.8593252705283259,
"grad_norm": 13.473813351532165,
"learning_rate": 3.798857790184085e-06,
"loss": 1.1409094333648682,
"step": 2921
},
{
"epoch": 1.8599618077657543,
"grad_norm": 8.23206808050268,
"learning_rate": 3.795262746975654e-06,
"loss": 1.6605459451675415,
"step": 2922
},
{
"epoch": 1.8605983450031827,
"grad_norm": 17.797137157177552,
"learning_rate": 3.7916683648495106e-06,
"loss": 1.2091991901397705,
"step": 2923
},
{
"epoch": 1.861234882240611,
"grad_norm": 37.69629321952834,
"learning_rate": 3.788074645778018e-06,
"loss": 0.7815721035003662,
"step": 2924
},
{
"epoch": 1.8618714194780395,
"grad_norm": 13.372576382344619,
"learning_rate": 3.7844815917331805e-06,
"loss": 0.9410151839256287,
"step": 2925
},
{
"epoch": 1.8625079567154679,
"grad_norm": 10.587506192567798,
"learning_rate": 3.780889204686635e-06,
"loss": 2.008251190185547,
"step": 2926
},
{
"epoch": 1.8631444939528963,
"grad_norm": 15.339078608543764,
"learning_rate": 3.7772974866096467e-06,
"loss": 0.6460528373718262,
"step": 2927
},
{
"epoch": 1.8637810311903247,
"grad_norm": 15.296511645201427,
"learning_rate": 3.773706439473128e-06,
"loss": 1.3764185905456543,
"step": 2928
},
{
"epoch": 1.8644175684277529,
"grad_norm": 6.883611777851248,
"learning_rate": 3.7701160652476075e-06,
"loss": 1.6351902484893799,
"step": 2929
},
{
"epoch": 1.8650541056651813,
"grad_norm": 12.887349048316088,
"learning_rate": 3.7665263659032557e-06,
"loss": 2.1986546516418457,
"step": 2930
},
{
"epoch": 1.8656906429026097,
"grad_norm": 14.54225038195362,
"learning_rate": 3.7629373434098666e-06,
"loss": 2.2645087242126465,
"step": 2931
},
{
"epoch": 1.866327180140038,
"grad_norm": 7.771014705777784,
"learning_rate": 3.759348999736866e-06,
"loss": 0.7444299459457397,
"step": 2932
},
{
"epoch": 1.8669637173774665,
"grad_norm": 11.737576573508946,
"learning_rate": 3.755761336853304e-06,
"loss": 2.10587477684021,
"step": 2933
},
{
"epoch": 1.8676002546148949,
"grad_norm": 8.775640918155895,
"learning_rate": 3.752174356727863e-06,
"loss": 1.9457027912139893,
"step": 2934
},
{
"epoch": 1.8682367918523233,
"grad_norm": 13.515937867687345,
"learning_rate": 3.7485880613288417e-06,
"loss": 1.6708476543426514,
"step": 2935
},
{
"epoch": 1.8688733290897517,
"grad_norm": 23.093293304399715,
"learning_rate": 3.7450024526241736e-06,
"loss": 0.8383031487464905,
"step": 2936
},
{
"epoch": 1.86950986632718,
"grad_norm": 15.142424514156689,
"learning_rate": 3.7414175325814055e-06,
"loss": 1.975140929222107,
"step": 2937
},
{
"epoch": 1.8701464035646085,
"grad_norm": 8.808594675559215,
"learning_rate": 3.737833303167714e-06,
"loss": 1.585309386253357,
"step": 2938
},
{
"epoch": 1.8707829408020369,
"grad_norm": 12.54269974142991,
"learning_rate": 3.734249766349891e-06,
"loss": 2.28334903717041,
"step": 2939
},
{
"epoch": 1.8714194780394653,
"grad_norm": 12.039706631211335,
"learning_rate": 3.730666924094354e-06,
"loss": 1.6018445491790771,
"step": 2940
},
{
"epoch": 1.8720560152768937,
"grad_norm": 12.606822880989533,
"learning_rate": 3.727084778367133e-06,
"loss": 2.0158743858337402,
"step": 2941
},
{
"epoch": 1.872692552514322,
"grad_norm": 13.18481824489173,
"learning_rate": 3.7235033311338832e-06,
"loss": 0.8656675815582275,
"step": 2942
},
{
"epoch": 1.8733290897517505,
"grad_norm": 11.761203832027727,
"learning_rate": 3.719922584359869e-06,
"loss": 1.662010908126831,
"step": 2943
},
{
"epoch": 1.873965626989179,
"grad_norm": 11.055295697247734,
"learning_rate": 3.7163425400099734e-06,
"loss": 1.6481142044067383,
"step": 2944
},
{
"epoch": 1.8746021642266073,
"grad_norm": 8.184052509423914,
"learning_rate": 3.712763200048697e-06,
"loss": 1.3201755285263062,
"step": 2945
},
{
"epoch": 1.8752387014640357,
"grad_norm": 9.257340721993499,
"learning_rate": 3.709184566440148e-06,
"loss": 0.6050750613212585,
"step": 2946
},
{
"epoch": 1.875875238701464,
"grad_norm": 9.004885741171472,
"learning_rate": 3.7056066411480546e-06,
"loss": 0.8616651892662048,
"step": 2947
},
{
"epoch": 1.8765117759388925,
"grad_norm": 10.262472561157548,
"learning_rate": 3.702029426135748e-06,
"loss": 1.235988974571228,
"step": 2948
},
{
"epoch": 1.877148313176321,
"grad_norm": 10.006326012931487,
"learning_rate": 3.698452923366177e-06,
"loss": 1.6546168327331543,
"step": 2949
},
{
"epoch": 1.8777848504137493,
"grad_norm": 9.239380099845183,
"learning_rate": 3.694877134801892e-06,
"loss": 1.868090271949768,
"step": 2950
},
{
"epoch": 1.8784213876511777,
"grad_norm": 11.724873434767735,
"learning_rate": 3.6913020624050607e-06,
"loss": 1.8908902406692505,
"step": 2951
},
{
"epoch": 1.8790579248886061,
"grad_norm": 6.782205554830913,
"learning_rate": 3.6877277081374485e-06,
"loss": 1.5437768697738647,
"step": 2952
},
{
"epoch": 1.8796944621260345,
"grad_norm": 13.250147606465557,
"learning_rate": 3.6841540739604333e-06,
"loss": 1.6006748676300049,
"step": 2953
},
{
"epoch": 1.8803309993634627,
"grad_norm": 8.263585193238786,
"learning_rate": 3.680581161834994e-06,
"loss": 1.3121932744979858,
"step": 2954
},
{
"epoch": 1.880967536600891,
"grad_norm": 15.855281274860497,
"learning_rate": 3.6770089737217173e-06,
"loss": 1.5933866500854492,
"step": 2955
},
{
"epoch": 1.8816040738383195,
"grad_norm": 17.104636402653167,
"learning_rate": 3.6734375115807873e-06,
"loss": 1.366287112236023,
"step": 2956
},
{
"epoch": 1.882240611075748,
"grad_norm": 11.889323434402883,
"learning_rate": 3.6698667773719964e-06,
"loss": 1.087631344795227,
"step": 2957
},
{
"epoch": 1.8828771483131763,
"grad_norm": 10.28887515614746,
"learning_rate": 3.666296773054729e-06,
"loss": 1.2407673597335815,
"step": 2958
},
{
"epoch": 1.8835136855506047,
"grad_norm": 13.182129320643282,
"learning_rate": 3.6627275005879793e-06,
"loss": 1.1016771793365479,
"step": 2959
},
{
"epoch": 1.8841502227880331,
"grad_norm": 11.447446637699572,
"learning_rate": 3.659158961930329e-06,
"loss": 3.0205774307250977,
"step": 2960
},
{
"epoch": 1.8847867600254615,
"grad_norm": 11.336888264151233,
"learning_rate": 3.655591159039965e-06,
"loss": 1.6845563650131226,
"step": 2961
},
{
"epoch": 1.88542329726289,
"grad_norm": 8.513133099222893,
"learning_rate": 3.6520240938746686e-06,
"loss": 1.57521390914917,
"step": 2962
},
{
"epoch": 1.886059834500318,
"grad_norm": 13.843253725878345,
"learning_rate": 3.6484577683918137e-06,
"loss": 1.55642831325531,
"step": 2963
},
{
"epoch": 1.8866963717377465,
"grad_norm": 11.336427097104853,
"learning_rate": 3.644892184548373e-06,
"loss": 1.7204231023788452,
"step": 2964
},
{
"epoch": 1.887332908975175,
"grad_norm": 14.937415415408747,
"learning_rate": 3.6413273443009066e-06,
"loss": 1.8140678405761719,
"step": 2965
},
{
"epoch": 1.8879694462126033,
"grad_norm": 15.12413403522489,
"learning_rate": 3.6377632496055715e-06,
"loss": 1.4444818496704102,
"step": 2966
},
{
"epoch": 1.8886059834500317,
"grad_norm": 12.952422188153427,
"learning_rate": 3.6341999024181113e-06,
"loss": 1.0764448642730713,
"step": 2967
},
{
"epoch": 1.8892425206874601,
"grad_norm": 18.259926105660973,
"learning_rate": 3.6306373046938646e-06,
"loss": 1.1982852220535278,
"step": 2968
},
{
"epoch": 1.8898790579248885,
"grad_norm": 11.890515486530168,
"learning_rate": 3.627075458387753e-06,
"loss": 1.6283388137817383,
"step": 2969
},
{
"epoch": 1.890515595162317,
"grad_norm": 13.811771576252777,
"learning_rate": 3.623514365454291e-06,
"loss": 0.8901544213294983,
"step": 2970
},
{
"epoch": 1.8911521323997453,
"grad_norm": 9.679165937792929,
"learning_rate": 3.6199540278475753e-06,
"loss": 1.6047818660736084,
"step": 2971
},
{
"epoch": 1.8917886696371737,
"grad_norm": 20.126024292697785,
"learning_rate": 3.6163944475212925e-06,
"loss": 1.312869668006897,
"step": 2972
},
{
"epoch": 1.8924252068746021,
"grad_norm": 8.611093827796402,
"learning_rate": 3.612835626428707e-06,
"loss": 1.3583612442016602,
"step": 2973
},
{
"epoch": 1.8930617441120305,
"grad_norm": 9.155912381502747,
"learning_rate": 3.6092775665226766e-06,
"loss": 1.6458258628845215,
"step": 2974
},
{
"epoch": 1.893698281349459,
"grad_norm": 10.489661405812855,
"learning_rate": 3.6057202697556303e-06,
"loss": 1.483068823814392,
"step": 2975
},
{
"epoch": 1.8943348185868873,
"grad_norm": 5.669088167056578,
"learning_rate": 3.602163738079587e-06,
"loss": 1.5122544765472412,
"step": 2976
},
{
"epoch": 1.8949713558243158,
"grad_norm": 15.730385517328935,
"learning_rate": 3.598607973446142e-06,
"loss": 1.6851162910461426,
"step": 2977
},
{
"epoch": 1.8956078930617442,
"grad_norm": 9.754807356798125,
"learning_rate": 3.595052977806467e-06,
"loss": 1.4745590686798096,
"step": 2978
},
{
"epoch": 1.8962444302991726,
"grad_norm": 13.659337878420038,
"learning_rate": 3.59149875311132e-06,
"loss": 1.6734472513198853,
"step": 2979
},
{
"epoch": 1.896880967536601,
"grad_norm": 11.288157866459745,
"learning_rate": 3.5879453013110266e-06,
"loss": 1.503297209739685,
"step": 2980
},
{
"epoch": 1.8975175047740294,
"grad_norm": 9.833318826750947,
"learning_rate": 3.584392624355495e-06,
"loss": 1.2672520875930786,
"step": 2981
},
{
"epoch": 1.8981540420114578,
"grad_norm": 10.781652948334775,
"learning_rate": 3.5808407241942035e-06,
"loss": 1.4455342292785645,
"step": 2982
},
{
"epoch": 1.8987905792488862,
"grad_norm": 24.804714712071927,
"learning_rate": 3.577289602776208e-06,
"loss": 1.375553011894226,
"step": 2983
},
{
"epoch": 1.8994271164863146,
"grad_norm": 13.14656310606751,
"learning_rate": 3.5737392620501356e-06,
"loss": 2.576221466064453,
"step": 2984
},
{
"epoch": 1.900063653723743,
"grad_norm": 13.585164189074876,
"learning_rate": 3.5701897039641854e-06,
"loss": 1.90523099899292,
"step": 2985
},
{
"epoch": 1.9007001909611714,
"grad_norm": 17.586577936911645,
"learning_rate": 3.5666409304661226e-06,
"loss": 1.1571316719055176,
"step": 2986
},
{
"epoch": 1.9013367281985998,
"grad_norm": 6.545400747345549,
"learning_rate": 3.5630929435032915e-06,
"loss": 1.630649447441101,
"step": 2987
},
{
"epoch": 1.901973265436028,
"grad_norm": 9.512514849418265,
"learning_rate": 3.5595457450225944e-06,
"loss": 1.8141216039657593,
"step": 2988
},
{
"epoch": 1.9026098026734564,
"grad_norm": 8.506375900740151,
"learning_rate": 3.555999336970508e-06,
"loss": 1.11874258518219,
"step": 2989
},
{
"epoch": 1.9032463399108848,
"grad_norm": 13.794812449480618,
"learning_rate": 3.552453721293071e-06,
"loss": 1.619382619857788,
"step": 2990
},
{
"epoch": 1.9038828771483132,
"grad_norm": 16.651260707094043,
"learning_rate": 3.5489088999358916e-06,
"loss": 1.60298490524292,
"step": 2991
},
{
"epoch": 1.9045194143857416,
"grad_norm": 10.972801539111252,
"learning_rate": 3.545364874844137e-06,
"loss": 1.5508108139038086,
"step": 2992
},
{
"epoch": 1.90515595162317,
"grad_norm": 9.233405121504681,
"learning_rate": 3.5418216479625432e-06,
"loss": 1.0458866357803345,
"step": 2993
},
{
"epoch": 1.9057924888605984,
"grad_norm": 9.564752919579051,
"learning_rate": 3.5382792212354044e-06,
"loss": 1.7472103834152222,
"step": 2994
},
{
"epoch": 1.9064290260980268,
"grad_norm": 11.317185828966672,
"learning_rate": 3.5347375966065732e-06,
"loss": 1.9647533893585205,
"step": 2995
},
{
"epoch": 1.907065563335455,
"grad_norm": 16.039174800688794,
"learning_rate": 3.5311967760194685e-06,
"loss": 1.8768528699874878,
"step": 2996
},
{
"epoch": 1.9077021005728834,
"grad_norm": 9.69725685029296,
"learning_rate": 3.5276567614170632e-06,
"loss": 1.6337780952453613,
"step": 2997
},
{
"epoch": 1.9083386378103118,
"grad_norm": 13.84728746714734,
"learning_rate": 3.5241175547418905e-06,
"loss": 1.0726393461227417,
"step": 2998
},
{
"epoch": 1.9089751750477402,
"grad_norm": 12.253571745159757,
"learning_rate": 3.5205791579360386e-06,
"loss": 1.3294373750686646,
"step": 2999
},
{
"epoch": 1.9096117122851686,
"grad_norm": 7.145469680327638,
"learning_rate": 3.5170415729411544e-06,
"loss": 1.3319453001022339,
"step": 3000
},
{
"epoch": 1.910248249522597,
"grad_norm": 12.670397007583942,
"learning_rate": 3.513504801698432e-06,
"loss": 1.0952013731002808,
"step": 3001
},
{
"epoch": 1.9108847867600254,
"grad_norm": 19.599620263971225,
"learning_rate": 3.50996884614863e-06,
"loss": 1.1098661422729492,
"step": 3002
},
{
"epoch": 1.9115213239974538,
"grad_norm": 10.708770007076673,
"learning_rate": 3.5064337082320475e-06,
"loss": 2.0629000663757324,
"step": 3003
},
{
"epoch": 1.9121578612348822,
"grad_norm": 11.917806878874917,
"learning_rate": 3.502899389888545e-06,
"loss": 1.2926945686340332,
"step": 3004
},
{
"epoch": 1.9127943984723106,
"grad_norm": 14.183901970473984,
"learning_rate": 3.499365893057526e-06,
"loss": 1.500540018081665,
"step": 3005
},
{
"epoch": 1.913430935709739,
"grad_norm": 9.166386496776243,
"learning_rate": 3.4958332196779486e-06,
"loss": 1.163301706314087,
"step": 3006
},
{
"epoch": 1.9140674729471674,
"grad_norm": 12.255480602242926,
"learning_rate": 3.4923013716883146e-06,
"loss": 1.0257372856140137,
"step": 3007
},
{
"epoch": 1.9147040101845958,
"grad_norm": 13.22483419522713,
"learning_rate": 3.488770351026678e-06,
"loss": 1.455714225769043,
"step": 3008
},
{
"epoch": 1.9153405474220242,
"grad_norm": 10.179432970934371,
"learning_rate": 3.4852401596306306e-06,
"loss": 1.5575066804885864,
"step": 3009
},
{
"epoch": 1.9159770846594526,
"grad_norm": 20.12038533874464,
"learning_rate": 3.48171079943732e-06,
"loss": 1.757549524307251,
"step": 3010
},
{
"epoch": 1.916613621896881,
"grad_norm": 12.203643867997348,
"learning_rate": 3.4781822723834286e-06,
"loss": 1.7452330589294434,
"step": 3011
},
{
"epoch": 1.9172501591343094,
"grad_norm": 15.840159433670774,
"learning_rate": 3.4746545804051845e-06,
"loss": 1.858777403831482,
"step": 3012
},
{
"epoch": 1.9178866963717378,
"grad_norm": 16.066331909644493,
"learning_rate": 3.47112772543836e-06,
"loss": 1.0916454792022705,
"step": 3013
},
{
"epoch": 1.9185232336091662,
"grad_norm": 9.991258615289015,
"learning_rate": 3.4676017094182655e-06,
"loss": 1.8284687995910645,
"step": 3014
},
{
"epoch": 1.9191597708465946,
"grad_norm": 23.562108037135385,
"learning_rate": 3.464076534279753e-06,
"loss": 1.6296882629394531,
"step": 3015
},
{
"epoch": 1.919796308084023,
"grad_norm": 13.350276842306824,
"learning_rate": 3.4605522019572075e-06,
"loss": 2.1474852561950684,
"step": 3016
},
{
"epoch": 1.9204328453214514,
"grad_norm": 12.152677668203303,
"learning_rate": 3.4570287143845617e-06,
"loss": 1.6121530532836914,
"step": 3017
},
{
"epoch": 1.9210693825588798,
"grad_norm": 10.792615169339534,
"learning_rate": 3.453506073495274e-06,
"loss": 1.7401103973388672,
"step": 3018
},
{
"epoch": 1.9217059197963082,
"grad_norm": 19.617397599137732,
"learning_rate": 3.449984281222346e-06,
"loss": 2.0249862670898438,
"step": 3019
},
{
"epoch": 1.9223424570337366,
"grad_norm": 12.861936097372928,
"learning_rate": 3.4464633394983087e-06,
"loss": 1.87858247756958,
"step": 3020
},
{
"epoch": 1.9229789942711648,
"grad_norm": 12.821502062384148,
"learning_rate": 3.4429432502552306e-06,
"loss": 1.3402378559112549,
"step": 3021
},
{
"epoch": 1.9236155315085932,
"grad_norm": 12.271130860284014,
"learning_rate": 3.439424015424708e-06,
"loss": 1.3296496868133545,
"step": 3022
},
{
"epoch": 1.9242520687460216,
"grad_norm": 12.094984073587872,
"learning_rate": 3.435905636937873e-06,
"loss": 1.14638090133667,
"step": 3023
},
{
"epoch": 1.92488860598345,
"grad_norm": 9.547944000672503,
"learning_rate": 3.4323881167253804e-06,
"loss": 2.336252212524414,
"step": 3024
},
{
"epoch": 1.9255251432208784,
"grad_norm": 17.369817974746017,
"learning_rate": 3.4288714567174264e-06,
"loss": 1.4508349895477295,
"step": 3025
},
{
"epoch": 1.9261616804583068,
"grad_norm": 10.74696941864543,
"learning_rate": 3.4253556588437208e-06,
"loss": 1.2710459232330322,
"step": 3026
},
{
"epoch": 1.9267982176957352,
"grad_norm": 12.987178620919483,
"learning_rate": 3.421840725033512e-06,
"loss": 1.2776530981063843,
"step": 3027
},
{
"epoch": 1.9274347549331636,
"grad_norm": 7.520585806761156,
"learning_rate": 3.418326657215567e-06,
"loss": 1.1973588466644287,
"step": 3028
},
{
"epoch": 1.928071292170592,
"grad_norm": 13.139221862631066,
"learning_rate": 3.4148134573181798e-06,
"loss": 1.5048329830169678,
"step": 3029
},
{
"epoch": 1.9287078294080202,
"grad_norm": 14.009969949467193,
"learning_rate": 3.4113011272691708e-06,
"loss": 0.9765514731407166,
"step": 3030
},
{
"epoch": 1.9293443666454486,
"grad_norm": 9.474747894878966,
"learning_rate": 3.4077896689958782e-06,
"loss": 1.5028693675994873,
"step": 3031
},
{
"epoch": 1.929980903882877,
"grad_norm": 24.981700309160786,
"learning_rate": 3.4042790844251666e-06,
"loss": 2.431849956512451,
"step": 3032
},
{
"epoch": 1.9306174411203054,
"grad_norm": 12.405622244492198,
"learning_rate": 3.4007693754834166e-06,
"loss": 1.5558407306671143,
"step": 3033
},
{
"epoch": 1.9312539783577338,
"grad_norm": 8.277798441928566,
"learning_rate": 3.397260544096532e-06,
"loss": 1.9231243133544922,
"step": 3034
},
{
"epoch": 1.9318905155951622,
"grad_norm": 10.4446041402095,
"learning_rate": 3.393752592189934e-06,
"loss": 1.745375156402588,
"step": 3035
},
{
"epoch": 1.9325270528325906,
"grad_norm": 13.11851467672906,
"learning_rate": 3.3902455216885603e-06,
"loss": 1.7263861894607544,
"step": 3036
},
{
"epoch": 1.933163590070019,
"grad_norm": 10.810645515967366,
"learning_rate": 3.3867393345168653e-06,
"loss": 1.180358648300171,
"step": 3037
},
{
"epoch": 1.9338001273074474,
"grad_norm": 8.822196535546214,
"learning_rate": 3.3832340325988212e-06,
"loss": 2.118812084197998,
"step": 3038
},
{
"epoch": 1.9344366645448758,
"grad_norm": 11.163193376397869,
"learning_rate": 3.379729617857908e-06,
"loss": 1.6012519598007202,
"step": 3039
},
{
"epoch": 1.9350732017823042,
"grad_norm": 8.381398488181672,
"learning_rate": 3.3762260922171265e-06,
"loss": 1.4120349884033203,
"step": 3040
},
{
"epoch": 1.9357097390197326,
"grad_norm": 11.787336756078439,
"learning_rate": 3.372723457598983e-06,
"loss": 1.2969615459442139,
"step": 3041
},
{
"epoch": 1.936346276257161,
"grad_norm": 9.389599449868138,
"learning_rate": 3.3692217159255025e-06,
"loss": 1.8524794578552246,
"step": 3042
},
{
"epoch": 1.9369828134945895,
"grad_norm": 10.81217000988976,
"learning_rate": 3.3657208691182107e-06,
"loss": 0.9801638126373291,
"step": 3043
},
{
"epoch": 1.9376193507320179,
"grad_norm": 18.38620988657736,
"learning_rate": 3.3622209190981503e-06,
"loss": 1.663733720779419,
"step": 3044
},
{
"epoch": 1.9382558879694463,
"grad_norm": 11.710734609627233,
"learning_rate": 3.358721867785869e-06,
"loss": 3.6056973934173584,
"step": 3045
},
{
"epoch": 1.9388924252068747,
"grad_norm": 15.338078554184479,
"learning_rate": 3.355223717101418e-06,
"loss": 1.8417390584945679,
"step": 3046
},
{
"epoch": 1.939528962444303,
"grad_norm": 10.293351880303502,
"learning_rate": 3.351726468964359e-06,
"loss": 1.7809994220733643,
"step": 3047
},
{
"epoch": 1.9401654996817315,
"grad_norm": 13.24712762250747,
"learning_rate": 3.3482301252937564e-06,
"loss": 1.5533814430236816,
"step": 3048
},
{
"epoch": 1.9408020369191599,
"grad_norm": 11.022160859919824,
"learning_rate": 3.3447346880081798e-06,
"loss": 2.2012224197387695,
"step": 3049
},
{
"epoch": 1.9414385741565883,
"grad_norm": 8.570175454275299,
"learning_rate": 3.341240159025698e-06,
"loss": 1.6400574445724487,
"step": 3050
},
{
"epoch": 1.9420751113940167,
"grad_norm": 15.425440780971087,
"learning_rate": 3.3377465402638877e-06,
"loss": 2.379615306854248,
"step": 3051
},
{
"epoch": 1.942711648631445,
"grad_norm": 10.70244668276611,
"learning_rate": 3.3342538336398166e-06,
"loss": 1.0482215881347656,
"step": 3052
},
{
"epoch": 1.9433481858688735,
"grad_norm": 14.689318248877823,
"learning_rate": 3.3307620410700625e-06,
"loss": 1.3926039934158325,
"step": 3053
},
{
"epoch": 1.9439847231063019,
"grad_norm": 15.604591834753055,
"learning_rate": 3.3272711644706923e-06,
"loss": 1.5713223218917847,
"step": 3054
},
{
"epoch": 1.94462126034373,
"grad_norm": 11.132370985223409,
"learning_rate": 3.323781205757278e-06,
"loss": 1.5595935583114624,
"step": 3055
},
{
"epoch": 1.9452577975811585,
"grad_norm": 8.240956588375802,
"learning_rate": 3.320292166844881e-06,
"loss": 1.0932841300964355,
"step": 3056
},
{
"epoch": 1.9458943348185869,
"grad_norm": 13.688867206661676,
"learning_rate": 3.316804049648065e-06,
"loss": 1.921128749847412,
"step": 3057
},
{
"epoch": 1.9465308720560153,
"grad_norm": 11.620609053138507,
"learning_rate": 3.3133168560808805e-06,
"loss": 1.355882167816162,
"step": 3058
},
{
"epoch": 1.9471674092934437,
"grad_norm": 14.284538765313782,
"learning_rate": 3.3098305880568783e-06,
"loss": 1.9279327392578125,
"step": 3059
},
{
"epoch": 1.947803946530872,
"grad_norm": 9.855480881398547,
"learning_rate": 3.306345247489095e-06,
"loss": 1.4592701196670532,
"step": 3060
},
{
"epoch": 1.9484404837683005,
"grad_norm": 10.332419100270505,
"learning_rate": 3.3028608362900647e-06,
"loss": 1.398805856704712,
"step": 3061
},
{
"epoch": 1.9490770210057289,
"grad_norm": 13.959536418752435,
"learning_rate": 3.2993773563718057e-06,
"loss": 1.51752769947052,
"step": 3062
},
{
"epoch": 1.949713558243157,
"grad_norm": 16.848860128572603,
"learning_rate": 3.2958948096458272e-06,
"loss": 1.6075987815856934,
"step": 3063
},
{
"epoch": 1.9503500954805855,
"grad_norm": 10.370030263170925,
"learning_rate": 3.2924131980231276e-06,
"loss": 2.661520004272461,
"step": 3064
},
{
"epoch": 1.9509866327180139,
"grad_norm": 7.636469667188669,
"learning_rate": 3.288932523414191e-06,
"loss": 1.9577052593231201,
"step": 3065
},
{
"epoch": 1.9516231699554423,
"grad_norm": 10.240085727743649,
"learning_rate": 3.28545278772899e-06,
"loss": 1.8488285541534424,
"step": 3066
},
{
"epoch": 1.9522597071928707,
"grad_norm": 12.132299789528824,
"learning_rate": 3.2819739928769745e-06,
"loss": 2.030672550201416,
"step": 3067
},
{
"epoch": 1.952896244430299,
"grad_norm": 10.193505387271667,
"learning_rate": 3.2784961407670896e-06,
"loss": 2.250061511993408,
"step": 3068
},
{
"epoch": 1.9535327816677275,
"grad_norm": 12.017657693658537,
"learning_rate": 3.2750192333077514e-06,
"loss": 1.1749529838562012,
"step": 3069
},
{
"epoch": 1.9541693189051559,
"grad_norm": 12.250906336492893,
"learning_rate": 3.271543272406866e-06,
"loss": 1.6595430374145508,
"step": 3070
},
{
"epoch": 1.9548058561425843,
"grad_norm": 11.297035216753892,
"learning_rate": 3.2680682599718156e-06,
"loss": 1.1674996614456177,
"step": 3071
},
{
"epoch": 1.9554423933800127,
"grad_norm": 12.65734863075847,
"learning_rate": 3.264594197909465e-06,
"loss": 1.424347162246704,
"step": 3072
},
{
"epoch": 1.956078930617441,
"grad_norm": 10.226004926281766,
"learning_rate": 3.2611210881261535e-06,
"loss": 1.4201499223709106,
"step": 3073
},
{
"epoch": 1.9567154678548695,
"grad_norm": 7.916157632060394,
"learning_rate": 3.2576489325277045e-06,
"loss": 1.4614274501800537,
"step": 3074
},
{
"epoch": 1.957352005092298,
"grad_norm": 20.01032846884706,
"learning_rate": 3.2541777330194073e-06,
"loss": 0.9933788776397705,
"step": 3075
},
{
"epoch": 1.9579885423297263,
"grad_norm": 10.315125777877338,
"learning_rate": 3.2507074915060397e-06,
"loss": 1.8490878343582153,
"step": 3076
},
{
"epoch": 1.9586250795671547,
"grad_norm": 11.613095337512359,
"learning_rate": 3.2472382098918414e-06,
"loss": 1.0492528676986694,
"step": 3077
},
{
"epoch": 1.959261616804583,
"grad_norm": 5.274761670290617,
"learning_rate": 3.2437698900805346e-06,
"loss": 1.2276555299758911,
"step": 3078
},
{
"epoch": 1.9598981540420115,
"grad_norm": 10.485079102811254,
"learning_rate": 3.240302533975308e-06,
"loss": 1.5378557443618774,
"step": 3079
},
{
"epoch": 1.96053469127944,
"grad_norm": 8.610489273802843,
"learning_rate": 3.236836143478823e-06,
"loss": 1.1437630653381348,
"step": 3080
},
{
"epoch": 1.9611712285168683,
"grad_norm": 11.48334352465963,
"learning_rate": 3.2333707204932158e-06,
"loss": 1.811964988708496,
"step": 3081
},
{
"epoch": 1.9618077657542967,
"grad_norm": 11.60340422904036,
"learning_rate": 3.229906266920082e-06,
"loss": 1.6359905004501343,
"step": 3082
},
{
"epoch": 1.9624443029917251,
"grad_norm": 15.929546389831012,
"learning_rate": 3.226442784660494e-06,
"loss": 1.6629650592803955,
"step": 3083
},
{
"epoch": 1.9630808402291535,
"grad_norm": 16.669821865914205,
"learning_rate": 3.222980275614987e-06,
"loss": 2.0848677158355713,
"step": 3084
},
{
"epoch": 1.963717377466582,
"grad_norm": 8.99541983738079,
"learning_rate": 3.219518741683564e-06,
"loss": 1.6872613430023193,
"step": 3085
},
{
"epoch": 1.9643539147040103,
"grad_norm": 7.953084515602438,
"learning_rate": 3.2160581847656915e-06,
"loss": 1.7856699228286743,
"step": 3086
},
{
"epoch": 1.9649904519414387,
"grad_norm": 11.550514933921072,
"learning_rate": 3.2125986067603004e-06,
"loss": 1.3385860919952393,
"step": 3087
},
{
"epoch": 1.965626989178867,
"grad_norm": 10.070313074436045,
"learning_rate": 3.2091400095657842e-06,
"loss": 0.8561789393424988,
"step": 3088
},
{
"epoch": 1.9662635264162953,
"grad_norm": 10.382826957485825,
"learning_rate": 3.205682395080001e-06,
"loss": 1.1104825735092163,
"step": 3089
},
{
"epoch": 1.9669000636537237,
"grad_norm": 8.21773412283748,
"learning_rate": 3.202225765200262e-06,
"loss": 1.4783658981323242,
"step": 3090
},
{
"epoch": 1.9675366008911521,
"grad_norm": 17.873833279171105,
"learning_rate": 3.1987701218233496e-06,
"loss": 0.8753082752227783,
"step": 3091
},
{
"epoch": 1.9681731381285805,
"grad_norm": 10.414163992523427,
"learning_rate": 3.195315466845493e-06,
"loss": 1.0268055200576782,
"step": 3092
},
{
"epoch": 1.968809675366009,
"grad_norm": 8.412822041327617,
"learning_rate": 3.1918618021623885e-06,
"loss": 1.1761889457702637,
"step": 3093
},
{
"epoch": 1.9694462126034373,
"grad_norm": 8.725455364591456,
"learning_rate": 3.188409129669182e-06,
"loss": 1.1024329662322998,
"step": 3094
},
{
"epoch": 1.9700827498408657,
"grad_norm": 9.389550890164376,
"learning_rate": 3.1849574512604808e-06,
"loss": 1.2440528869628906,
"step": 3095
},
{
"epoch": 1.9707192870782941,
"grad_norm": 13.494648821582157,
"learning_rate": 3.181506768830344e-06,
"loss": 1.670837640762329,
"step": 3096
},
{
"epoch": 1.9713558243157223,
"grad_norm": 12.150357325567763,
"learning_rate": 3.1780570842722795e-06,
"loss": 1.5949982404708862,
"step": 3097
},
{
"epoch": 1.9719923615531507,
"grad_norm": 15.163911389676239,
"learning_rate": 3.1746083994792577e-06,
"loss": 1.7955188751220703,
"step": 3098
},
{
"epoch": 1.9726288987905791,
"grad_norm": 9.175712392609714,
"learning_rate": 3.1711607163436904e-06,
"loss": 1.4577592611312866,
"step": 3099
},
{
"epoch": 1.9732654360280075,
"grad_norm": 9.234930841935416,
"learning_rate": 3.1677140367574476e-06,
"loss": 2.5002384185791016,
"step": 3100
},
{
"epoch": 1.973901973265436,
"grad_norm": 11.337944959254775,
"learning_rate": 3.1642683626118442e-06,
"loss": 1.9067683219909668,
"step": 3101
},
{
"epoch": 1.9745385105028643,
"grad_norm": 10.149407145948285,
"learning_rate": 3.1608236957976445e-06,
"loss": 1.2545390129089355,
"step": 3102
},
{
"epoch": 1.9751750477402927,
"grad_norm": 9.27457318057026,
"learning_rate": 3.157380038205059e-06,
"loss": 1.3965643644332886,
"step": 3103
},
{
"epoch": 1.9758115849777211,
"grad_norm": 12.246186032090677,
"learning_rate": 3.153937391723748e-06,
"loss": 3.5177059173583984,
"step": 3104
},
{
"epoch": 1.9764481222151495,
"grad_norm": 10.391359576104954,
"learning_rate": 3.1504957582428115e-06,
"loss": 1.5710853338241577,
"step": 3105
},
{
"epoch": 1.977084659452578,
"grad_norm": 10.559254099194881,
"learning_rate": 3.147055139650798e-06,
"loss": 1.2918438911437988,
"step": 3106
},
{
"epoch": 1.9777211966900063,
"grad_norm": 10.52035800031223,
"learning_rate": 3.143615537835697e-06,
"loss": 1.754537582397461,
"step": 3107
},
{
"epoch": 1.9783577339274347,
"grad_norm": 31.605830555945424,
"learning_rate": 3.1401769546849414e-06,
"loss": 1.4454526901245117,
"step": 3108
},
{
"epoch": 1.9789942711648632,
"grad_norm": 9.062007515657388,
"learning_rate": 3.1367393920854027e-06,
"loss": 1.5038361549377441,
"step": 3109
},
{
"epoch": 1.9796308084022916,
"grad_norm": 10.276281144397306,
"learning_rate": 3.1333028519233964e-06,
"loss": 1.5616546869277954,
"step": 3110
},
{
"epoch": 1.98026734563972,
"grad_norm": 9.410325778375519,
"learning_rate": 3.129867336084673e-06,
"loss": 1.0828948020935059,
"step": 3111
},
{
"epoch": 1.9809038828771484,
"grad_norm": 15.13284583064293,
"learning_rate": 3.1264328464544253e-06,
"loss": 0.9905999898910522,
"step": 3112
},
{
"epoch": 1.9815404201145768,
"grad_norm": 10.574551454451893,
"learning_rate": 3.1229993849172764e-06,
"loss": 1.5945922136306763,
"step": 3113
},
{
"epoch": 1.9821769573520052,
"grad_norm": 12.976134401077557,
"learning_rate": 3.11956695335729e-06,
"loss": 1.8390474319458008,
"step": 3114
},
{
"epoch": 1.9828134945894336,
"grad_norm": 12.392306071093337,
"learning_rate": 3.1161355536579653e-06,
"loss": 1.447790265083313,
"step": 3115
},
{
"epoch": 1.983450031826862,
"grad_norm": 15.440907540898268,
"learning_rate": 3.1127051877022307e-06,
"loss": 0.8936029672622681,
"step": 3116
},
{
"epoch": 1.9840865690642904,
"grad_norm": 8.33224896953088,
"learning_rate": 3.1092758573724552e-06,
"loss": 1.157752275466919,
"step": 3117
},
{
"epoch": 1.9847231063017188,
"grad_norm": 10.907324824305018,
"learning_rate": 3.1058475645504284e-06,
"loss": 0.8514528870582581,
"step": 3118
},
{
"epoch": 1.9853596435391472,
"grad_norm": 10.654887083620297,
"learning_rate": 3.1024203111173833e-06,
"loss": 1.6049655675888062,
"step": 3119
},
{
"epoch": 1.9859961807765756,
"grad_norm": 6.115826190640891,
"learning_rate": 3.098994098953971e-06,
"loss": 0.8340649604797363,
"step": 3120
},
{
"epoch": 1.986632718014004,
"grad_norm": 11.47366728517658,
"learning_rate": 3.0955689299402793e-06,
"loss": 1.9429899454116821,
"step": 3121
},
{
"epoch": 1.9872692552514322,
"grad_norm": 9.459059050362953,
"learning_rate": 3.092144805955818e-06,
"loss": 1.7819583415985107,
"step": 3122
},
{
"epoch": 1.9879057924888606,
"grad_norm": 11.438816769213076,
"learning_rate": 3.0887217288795283e-06,
"loss": 1.437670350074768,
"step": 3123
},
{
"epoch": 1.988542329726289,
"grad_norm": 14.66202127305559,
"learning_rate": 3.0852997005897735e-06,
"loss": 1.8193809986114502,
"step": 3124
},
{
"epoch": 1.9891788669637174,
"grad_norm": 9.445926120985163,
"learning_rate": 3.0818787229643442e-06,
"loss": 1.6506184339523315,
"step": 3125
},
{
"epoch": 1.9898154042011458,
"grad_norm": 8.465231817536171,
"learning_rate": 3.078458797880449e-06,
"loss": 1.2958638668060303,
"step": 3126
},
{
"epoch": 1.9904519414385742,
"grad_norm": 9.799611362744931,
"learning_rate": 3.075039927214728e-06,
"loss": 1.4738852977752686,
"step": 3127
},
{
"epoch": 1.9910884786760026,
"grad_norm": 10.91720753319989,
"learning_rate": 3.071622112843232e-06,
"loss": 1.6265835762023926,
"step": 3128
},
{
"epoch": 1.991725015913431,
"grad_norm": 7.2470953939062035,
"learning_rate": 3.0682053566414416e-06,
"loss": 1.5042080879211426,
"step": 3129
},
{
"epoch": 1.9923615531508592,
"grad_norm": 15.139102548489543,
"learning_rate": 3.064789660484251e-06,
"loss": 2.352555274963379,
"step": 3130
},
{
"epoch": 1.9929980903882876,
"grad_norm": 9.044104466068717,
"learning_rate": 3.0613750262459753e-06,
"loss": 1.2178021669387817,
"step": 3131
},
{
"epoch": 1.993634627625716,
"grad_norm": 10.056864766304423,
"learning_rate": 3.057961455800347e-06,
"loss": 1.5974866151809692,
"step": 3132
},
{
"epoch": 1.9942711648631444,
"grad_norm": 23.906234837401477,
"learning_rate": 3.0545489510205083e-06,
"loss": 1.9503036737442017,
"step": 3133
},
{
"epoch": 1.9949077021005728,
"grad_norm": 10.64905333497331,
"learning_rate": 3.0511375137790305e-06,
"loss": 1.0582550764083862,
"step": 3134
},
{
"epoch": 1.9955442393380012,
"grad_norm": 11.165521278744388,
"learning_rate": 3.0477271459478847e-06,
"loss": 1.5541431903839111,
"step": 3135
},
{
"epoch": 1.9961807765754296,
"grad_norm": 9.471209935426868,
"learning_rate": 3.044317849398464e-06,
"loss": 1.5843894481658936,
"step": 3136
},
{
"epoch": 1.996817313812858,
"grad_norm": 10.442171241885108,
"learning_rate": 3.0409096260015703e-06,
"loss": 1.4579355716705322,
"step": 3137
},
{
"epoch": 1.9974538510502864,
"grad_norm": 18.94728789653089,
"learning_rate": 3.0375024776274178e-06,
"loss": 1.1820597648620605,
"step": 3138
},
{
"epoch": 1.9980903882877148,
"grad_norm": 18.230185244403344,
"learning_rate": 3.0340964061456284e-06,
"loss": 2.0564804077148438,
"step": 3139
},
{
"epoch": 1.9987269255251432,
"grad_norm": 12.652908441573445,
"learning_rate": 3.030691413425239e-06,
"loss": 1.3657358884811401,
"step": 3140
},
{
"epoch": 1.9993634627625716,
"grad_norm": 8.32228181735295,
"learning_rate": 3.0272875013346853e-06,
"loss": 2.350313425064087,
"step": 3141
},
{
"epoch": 2.0,
"grad_norm": 9.406506494829356,
"learning_rate": 3.023884671741821e-06,
"loss": 1.6760640144348145,
"step": 3142
},
{
"epoch": 2.0006365372374284,
"grad_norm": 12.46356256089401,
"learning_rate": 3.0204829265138947e-06,
"loss": 0.7003447413444519,
"step": 3143
},
{
"epoch": 2.001273074474857,
"grad_norm": 12.40322846590644,
"learning_rate": 3.017082267517568e-06,
"loss": 0.5960747003555298,
"step": 3144
},
{
"epoch": 2.001909611712285,
"grad_norm": 11.234549616820136,
"learning_rate": 3.0136826966189024e-06,
"loss": 0.43681585788726807,
"step": 3145
},
{
"epoch": 2.0025461489497136,
"grad_norm": 9.633365087032399,
"learning_rate": 3.0102842156833665e-06,
"loss": 0.6057643294334412,
"step": 3146
},
{
"epoch": 2.003182686187142,
"grad_norm": 12.766291712291087,
"learning_rate": 3.0068868265758242e-06,
"loss": 0.35702773928642273,
"step": 3147
},
{
"epoch": 2.0038192234245704,
"grad_norm": 6.618096264582037,
"learning_rate": 3.0034905311605485e-06,
"loss": 0.24602855741977692,
"step": 3148
},
{
"epoch": 2.004455760661999,
"grad_norm": 8.905638834371095,
"learning_rate": 3.0000953313012036e-06,
"loss": 0.5150216221809387,
"step": 3149
},
{
"epoch": 2.0050922978994272,
"grad_norm": 9.164941120941812,
"learning_rate": 2.9967012288608576e-06,
"loss": 0.4516746997833252,
"step": 3150
},
{
"epoch": 2.0057288351368556,
"grad_norm": 11.837563041422573,
"learning_rate": 2.9933082257019767e-06,
"loss": 0.2624654769897461,
"step": 3151
},
{
"epoch": 2.006365372374284,
"grad_norm": 11.487323607343468,
"learning_rate": 2.9899163236864215e-06,
"loss": 0.5795379877090454,
"step": 3152
},
{
"epoch": 2.0070019096117124,
"grad_norm": 10.46961916492163,
"learning_rate": 2.9865255246754516e-06,
"loss": 0.4388521909713745,
"step": 3153
},
{
"epoch": 2.007638446849141,
"grad_norm": 12.549862910935373,
"learning_rate": 2.9831358305297166e-06,
"loss": 1.4091285467147827,
"step": 3154
},
{
"epoch": 2.0082749840865692,
"grad_norm": 7.69507473690529,
"learning_rate": 2.979747243109267e-06,
"loss": 0.5970480442047119,
"step": 3155
},
{
"epoch": 2.0089115213239976,
"grad_norm": 11.892005033640343,
"learning_rate": 2.9763597642735355e-06,
"loss": 0.5806328654289246,
"step": 3156
},
{
"epoch": 2.009548058561426,
"grad_norm": 9.012121762112493,
"learning_rate": 2.9729733958813563e-06,
"loss": 0.622743546962738,
"step": 3157
},
{
"epoch": 2.0101845957988544,
"grad_norm": 9.039845428183092,
"learning_rate": 2.9695881397909485e-06,
"loss": 0.48474836349487305,
"step": 3158
},
{
"epoch": 2.0108211330362824,
"grad_norm": 12.999442613720388,
"learning_rate": 2.966203997859926e-06,
"loss": 0.5977911949157715,
"step": 3159
},
{
"epoch": 2.011457670273711,
"grad_norm": 9.388140845650517,
"learning_rate": 2.962820971945285e-06,
"loss": 0.5747494697570801,
"step": 3160
},
{
"epoch": 2.012094207511139,
"grad_norm": 12.801041945381415,
"learning_rate": 2.9594390639034143e-06,
"loss": 0.8811707496643066,
"step": 3161
},
{
"epoch": 2.0127307447485676,
"grad_norm": 11.13358117745356,
"learning_rate": 2.956058275590086e-06,
"loss": 0.5279093980789185,
"step": 3162
},
{
"epoch": 2.013367281985996,
"grad_norm": 13.612001915960958,
"learning_rate": 2.952678608860461e-06,
"loss": 0.6851604580879211,
"step": 3163
},
{
"epoch": 2.0140038192234244,
"grad_norm": 8.822751640844997,
"learning_rate": 2.9493000655690795e-06,
"loss": 0.4466755986213684,
"step": 3164
},
{
"epoch": 2.014640356460853,
"grad_norm": 17.17901738850658,
"learning_rate": 2.945922647569874e-06,
"loss": 2.6146445274353027,
"step": 3165
},
{
"epoch": 2.0152768936982812,
"grad_norm": 20.311318251889887,
"learning_rate": 2.9425463567161505e-06,
"loss": 0.5570971369743347,
"step": 3166
},
{
"epoch": 2.0159134309357096,
"grad_norm": 13.492459253820568,
"learning_rate": 2.9391711948605995e-06,
"loss": 1.6028361320495605,
"step": 3167
},
{
"epoch": 2.016549968173138,
"grad_norm": 17.20228134636775,
"learning_rate": 2.935797163855296e-06,
"loss": 0.524744987487793,
"step": 3168
},
{
"epoch": 2.0171865054105664,
"grad_norm": 7.376589765501107,
"learning_rate": 2.9324242655516864e-06,
"loss": 0.26688352227211,
"step": 3169
},
{
"epoch": 2.017823042647995,
"grad_norm": 11.092143023254515,
"learning_rate": 2.9290525018006054e-06,
"loss": 0.45281243324279785,
"step": 3170
},
{
"epoch": 2.0184595798854232,
"grad_norm": 9.938214285976413,
"learning_rate": 2.925681874452256e-06,
"loss": 0.3849692940711975,
"step": 3171
},
{
"epoch": 2.0190961171228516,
"grad_norm": 11.597512917048412,
"learning_rate": 2.922312385356225e-06,
"loss": 0.6820840239524841,
"step": 3172
},
{
"epoch": 2.01973265436028,
"grad_norm": 10.973755623049307,
"learning_rate": 2.9189440363614664e-06,
"loss": 0.3673578202724457,
"step": 3173
},
{
"epoch": 2.0203691915977084,
"grad_norm": 7.9700444777730315,
"learning_rate": 2.9155768293163213e-06,
"loss": 0.6118618845939636,
"step": 3174
},
{
"epoch": 2.021005728835137,
"grad_norm": 9.991332698180424,
"learning_rate": 2.912210766068486e-06,
"loss": 0.6370064616203308,
"step": 3175
},
{
"epoch": 2.0216422660725653,
"grad_norm": 18.42201627294179,
"learning_rate": 2.9088458484650485e-06,
"loss": 0.5225192308425903,
"step": 3176
},
{
"epoch": 2.0222788033099937,
"grad_norm": 17.312231651699243,
"learning_rate": 2.905482078352454e-06,
"loss": 0.6268512606620789,
"step": 3177
},
{
"epoch": 2.022915340547422,
"grad_norm": 8.952186497649738,
"learning_rate": 2.9021194575765257e-06,
"loss": 0.6104077100753784,
"step": 3178
},
{
"epoch": 2.0235518777848505,
"grad_norm": 15.475594532921043,
"learning_rate": 2.8987579879824492e-06,
"loss": 1.112720251083374,
"step": 3179
},
{
"epoch": 2.024188415022279,
"grad_norm": 16.902034475138702,
"learning_rate": 2.895397671414788e-06,
"loss": 0.472690224647522,
"step": 3180
},
{
"epoch": 2.0248249522597073,
"grad_norm": 9.839014247286904,
"learning_rate": 2.892038509717465e-06,
"loss": 0.4841421842575073,
"step": 3181
},
{
"epoch": 2.0254614894971357,
"grad_norm": 8.590762320274365,
"learning_rate": 2.8886805047337736e-06,
"loss": 0.518042266368866,
"step": 3182
},
{
"epoch": 2.026098026734564,
"grad_norm": 12.50307119973155,
"learning_rate": 2.8853236583063695e-06,
"loss": 0.30463069677352905,
"step": 3183
},
{
"epoch": 2.0267345639719925,
"grad_norm": 13.55415563944142,
"learning_rate": 2.8819679722772732e-06,
"loss": 0.4761585593223572,
"step": 3184
},
{
"epoch": 2.027371101209421,
"grad_norm": 10.323909824063984,
"learning_rate": 2.878613448487871e-06,
"loss": 0.3689367473125458,
"step": 3185
},
{
"epoch": 2.0280076384468493,
"grad_norm": 17.01497476426275,
"learning_rate": 2.875260088778907e-06,
"loss": 0.7028419375419617,
"step": 3186
},
{
"epoch": 2.0286441756842777,
"grad_norm": 13.542252184708316,
"learning_rate": 2.871907894990495e-06,
"loss": 0.6263160705566406,
"step": 3187
},
{
"epoch": 2.029280712921706,
"grad_norm": 7.3376341289827325,
"learning_rate": 2.8685568689620958e-06,
"loss": 0.6165609359741211,
"step": 3188
},
{
"epoch": 2.0299172501591345,
"grad_norm": 12.669897190649397,
"learning_rate": 2.8652070125325416e-06,
"loss": 0.9314853549003601,
"step": 3189
},
{
"epoch": 2.030553787396563,
"grad_norm": 8.7196582806334,
"learning_rate": 2.861858327540018e-06,
"loss": 0.6993128657341003,
"step": 3190
},
{
"epoch": 2.0311903246339913,
"grad_norm": 9.952062518974616,
"learning_rate": 2.8585108158220664e-06,
"loss": 0.6346950531005859,
"step": 3191
},
{
"epoch": 2.0318268618714193,
"grad_norm": 10.307825570615718,
"learning_rate": 2.8551644792155845e-06,
"loss": 0.4012235999107361,
"step": 3192
},
{
"epoch": 2.0324633991088477,
"grad_norm": 8.842722997144174,
"learning_rate": 2.8518193195568304e-06,
"loss": 0.4270016551017761,
"step": 3193
},
{
"epoch": 2.033099936346276,
"grad_norm": 12.341905472926273,
"learning_rate": 2.84847533868141e-06,
"loss": 0.40161120891571045,
"step": 3194
},
{
"epoch": 2.0337364735837045,
"grad_norm": 9.676443158290573,
"learning_rate": 2.845132538424286e-06,
"loss": 0.8682132363319397,
"step": 3195
},
{
"epoch": 2.034373010821133,
"grad_norm": 9.562213215313651,
"learning_rate": 2.841790920619769e-06,
"loss": 0.2562720775604248,
"step": 3196
},
{
"epoch": 2.0350095480585613,
"grad_norm": 11.518279123176042,
"learning_rate": 2.83845048710153e-06,
"loss": 0.4420645236968994,
"step": 3197
},
{
"epoch": 2.0356460852959897,
"grad_norm": 10.452049091263088,
"learning_rate": 2.835111239702576e-06,
"loss": 0.7477583885192871,
"step": 3198
},
{
"epoch": 2.036282622533418,
"grad_norm": 11.797998040714148,
"learning_rate": 2.8317731802552774e-06,
"loss": 0.6933112144470215,
"step": 3199
},
{
"epoch": 2.0369191597708465,
"grad_norm": 10.919498256245369,
"learning_rate": 2.8284363105913447e-06,
"loss": 0.6165541410446167,
"step": 3200
},
{
"epoch": 2.037555697008275,
"grad_norm": 9.858438891717784,
"learning_rate": 2.825100632541836e-06,
"loss": 0.783021867275238,
"step": 3201
},
{
"epoch": 2.0381922342457033,
"grad_norm": 11.84280267272239,
"learning_rate": 2.8217661479371585e-06,
"loss": 0.3734252452850342,
"step": 3202
},
{
"epoch": 2.0388287714831317,
"grad_norm": 17.567056788118094,
"learning_rate": 2.8184328586070596e-06,
"loss": 0.5697858929634094,
"step": 3203
},
{
"epoch": 2.03946530872056,
"grad_norm": 11.951047163545796,
"learning_rate": 2.81510076638064e-06,
"loss": 0.4297953248023987,
"step": 3204
},
{
"epoch": 2.0401018459579885,
"grad_norm": 7.165365957833696,
"learning_rate": 2.8117698730863302e-06,
"loss": 0.37496840953826904,
"step": 3205
},
{
"epoch": 2.040738383195417,
"grad_norm": 12.119192215519528,
"learning_rate": 2.808440180551916e-06,
"loss": 1.0547622442245483,
"step": 3206
},
{
"epoch": 2.0413749204328453,
"grad_norm": 15.33298908135013,
"learning_rate": 2.8051116906045154e-06,
"loss": 0.5565409064292908,
"step": 3207
},
{
"epoch": 2.0420114576702737,
"grad_norm": 11.897131902832205,
"learning_rate": 2.801784405070591e-06,
"loss": 0.7787500619888306,
"step": 3208
},
{
"epoch": 2.042647994907702,
"grad_norm": 9.393851599365979,
"learning_rate": 2.79845832577594e-06,
"loss": 0.41894006729125977,
"step": 3209
},
{
"epoch": 2.0432845321451305,
"grad_norm": 11.147627022876764,
"learning_rate": 2.795133454545707e-06,
"loss": 0.5602455139160156,
"step": 3210
},
{
"epoch": 2.043921069382559,
"grad_norm": 6.846635597479448,
"learning_rate": 2.7918097932043598e-06,
"loss": 0.2927100658416748,
"step": 3211
},
{
"epoch": 2.0445576066199873,
"grad_norm": 9.165171726398286,
"learning_rate": 2.788487343575715e-06,
"loss": 0.5903818011283875,
"step": 3212
},
{
"epoch": 2.0451941438574157,
"grad_norm": 23.476619403147776,
"learning_rate": 2.7851661074829176e-06,
"loss": 1.7944833040237427,
"step": 3213
},
{
"epoch": 2.045830681094844,
"grad_norm": 10.48520006726488,
"learning_rate": 2.7818460867484488e-06,
"loss": 0.48250359296798706,
"step": 3214
},
{
"epoch": 2.0464672183322725,
"grad_norm": 9.44127208339123,
"learning_rate": 2.7785272831941198e-06,
"loss": 0.4169970452785492,
"step": 3215
},
{
"epoch": 2.047103755569701,
"grad_norm": 16.499270916009138,
"learning_rate": 2.7752096986410802e-06,
"loss": 0.4681619703769684,
"step": 3216
},
{
"epoch": 2.0477402928071293,
"grad_norm": 15.458816531706349,
"learning_rate": 2.7718933349098077e-06,
"loss": 0.4672512412071228,
"step": 3217
},
{
"epoch": 2.0483768300445577,
"grad_norm": 10.022530098921154,
"learning_rate": 2.7685781938201024e-06,
"loss": 0.4352753460407257,
"step": 3218
},
{
"epoch": 2.049013367281986,
"grad_norm": 12.324950558960667,
"learning_rate": 2.765264277191107e-06,
"loss": 0.32542985677719116,
"step": 3219
},
{
"epoch": 2.0496499045194145,
"grad_norm": 10.715295354976668,
"learning_rate": 2.761951586841284e-06,
"loss": 0.3411189913749695,
"step": 3220
},
{
"epoch": 2.050286441756843,
"grad_norm": 15.631613837657154,
"learning_rate": 2.7586401245884236e-06,
"loss": 1.1421493291854858,
"step": 3221
},
{
"epoch": 2.0509229789942713,
"grad_norm": 11.130166870127876,
"learning_rate": 2.7553298922496423e-06,
"loss": 0.5865961909294128,
"step": 3222
},
{
"epoch": 2.0515595162316997,
"grad_norm": 7.13843922070195,
"learning_rate": 2.7520208916413866e-06,
"loss": 0.4451628625392914,
"step": 3223
},
{
"epoch": 2.052196053469128,
"grad_norm": 6.859986120553279,
"learning_rate": 2.7487131245794207e-06,
"loss": 0.29068124294281006,
"step": 3224
},
{
"epoch": 2.0528325907065565,
"grad_norm": 7.086139260244435,
"learning_rate": 2.745406592878834e-06,
"loss": 0.34774288535118103,
"step": 3225
},
{
"epoch": 2.0534691279439845,
"grad_norm": 7.444475437890723,
"learning_rate": 2.7421012983540384e-06,
"loss": 0.38049668073654175,
"step": 3226
},
{
"epoch": 2.054105665181413,
"grad_norm": 11.944953328297366,
"learning_rate": 2.738797242818768e-06,
"loss": 0.46898943185806274,
"step": 3227
},
{
"epoch": 2.0547422024188413,
"grad_norm": 8.084521244857383,
"learning_rate": 2.735494428086073e-06,
"loss": 0.3792363405227661,
"step": 3228
},
{
"epoch": 2.0553787396562697,
"grad_norm": 9.797542752171367,
"learning_rate": 2.7321928559683296e-06,
"loss": 0.38770341873168945,
"step": 3229
},
{
"epoch": 2.056015276893698,
"grad_norm": 9.792132348109558,
"learning_rate": 2.728892528277226e-06,
"loss": 1.0433180332183838,
"step": 3230
},
{
"epoch": 2.0566518141311265,
"grad_norm": 6.557964529559676,
"learning_rate": 2.72559344682377e-06,
"loss": 0.18909412622451782,
"step": 3231
},
{
"epoch": 2.057288351368555,
"grad_norm": 14.05273896444134,
"learning_rate": 2.7222956134182833e-06,
"loss": 0.49194207787513733,
"step": 3232
},
{
"epoch": 2.0579248886059833,
"grad_norm": 10.913357132899804,
"learning_rate": 2.7189990298704105e-06,
"loss": 0.5012593865394592,
"step": 3233
},
{
"epoch": 2.0585614258434117,
"grad_norm": 18.480467244427906,
"learning_rate": 2.7157036979890984e-06,
"loss": 0.30408960580825806,
"step": 3234
},
{
"epoch": 2.05919796308084,
"grad_norm": 12.692004595044567,
"learning_rate": 2.7124096195826133e-06,
"loss": 0.9789431095123291,
"step": 3235
},
{
"epoch": 2.0598345003182685,
"grad_norm": 6.891068890017745,
"learning_rate": 2.7091167964585374e-06,
"loss": 0.41833198070526123,
"step": 3236
},
{
"epoch": 2.060471037555697,
"grad_norm": 19.272741777843954,
"learning_rate": 2.7058252304237577e-06,
"loss": 0.42587608098983765,
"step": 3237
},
{
"epoch": 2.0611075747931253,
"grad_norm": 14.959014212111224,
"learning_rate": 2.7025349232844744e-06,
"loss": 0.4084237813949585,
"step": 3238
},
{
"epoch": 2.0617441120305537,
"grad_norm": 11.767916514265051,
"learning_rate": 2.6992458768461947e-06,
"loss": 0.42100873589515686,
"step": 3239
},
{
"epoch": 2.062380649267982,
"grad_norm": 13.732195873693154,
"learning_rate": 2.695958092913741e-06,
"loss": 0.3621373474597931,
"step": 3240
},
{
"epoch": 2.0630171865054105,
"grad_norm": 15.052316193112196,
"learning_rate": 2.69267157329123e-06,
"loss": 0.36196765303611755,
"step": 3241
},
{
"epoch": 2.063653723742839,
"grad_norm": 8.791123051446233,
"learning_rate": 2.6893863197820978e-06,
"loss": 0.733702540397644,
"step": 3242
},
{
"epoch": 2.0642902609802674,
"grad_norm": 7.475648812336085,
"learning_rate": 2.686102334189079e-06,
"loss": 0.42003414034843445,
"step": 3243
},
{
"epoch": 2.0649267982176958,
"grad_norm": 13.569102527684445,
"learning_rate": 2.6828196183142117e-06,
"loss": 0.3186565637588501,
"step": 3244
},
{
"epoch": 2.065563335455124,
"grad_norm": 9.324977184898154,
"learning_rate": 2.6795381739588394e-06,
"loss": 0.32224637269973755,
"step": 3245
},
{
"epoch": 2.0661998726925526,
"grad_norm": 6.415901405634741,
"learning_rate": 2.6762580029236102e-06,
"loss": 0.30635929107666016,
"step": 3246
},
{
"epoch": 2.066836409929981,
"grad_norm": 10.35372111155344,
"learning_rate": 2.6729791070084693e-06,
"loss": 0.8054674863815308,
"step": 3247
},
{
"epoch": 2.0674729471674094,
"grad_norm": 10.696306455544944,
"learning_rate": 2.669701488012664e-06,
"loss": 0.49849972128868103,
"step": 3248
},
{
"epoch": 2.0681094844048378,
"grad_norm": 8.263034695346395,
"learning_rate": 2.6664251477347404e-06,
"loss": 0.21850839257240295,
"step": 3249
},
{
"epoch": 2.068746021642266,
"grad_norm": 9.372937594365398,
"learning_rate": 2.6631500879725425e-06,
"loss": 0.3750782907009125,
"step": 3250
},
{
"epoch": 2.0693825588796946,
"grad_norm": 10.426364379097361,
"learning_rate": 2.659876310523214e-06,
"loss": 0.5895072221755981,
"step": 3251
},
{
"epoch": 2.070019096117123,
"grad_norm": 8.629619368306836,
"learning_rate": 2.6566038171831902e-06,
"loss": 0.18251235783100128,
"step": 3252
},
{
"epoch": 2.0706556333545514,
"grad_norm": 14.157748250145481,
"learning_rate": 2.6533326097482114e-06,
"loss": 0.6843599081039429,
"step": 3253
},
{
"epoch": 2.07129217059198,
"grad_norm": 10.639887975474291,
"learning_rate": 2.6500626900132974e-06,
"loss": 0.35676315426826477,
"step": 3254
},
{
"epoch": 2.071928707829408,
"grad_norm": 15.337959408542437,
"learning_rate": 2.646794059772776e-06,
"loss": 2.2118444442749023,
"step": 3255
},
{
"epoch": 2.0725652450668366,
"grad_norm": 12.644577074348945,
"learning_rate": 2.643526720820259e-06,
"loss": 0.6072080135345459,
"step": 3256
},
{
"epoch": 2.073201782304265,
"grad_norm": 11.434453441228607,
"learning_rate": 2.640260674948652e-06,
"loss": 0.3190002739429474,
"step": 3257
},
{
"epoch": 2.0738383195416934,
"grad_norm": 8.98137016879291,
"learning_rate": 2.6369959239501487e-06,
"loss": 0.19415044784545898,
"step": 3258
},
{
"epoch": 2.074474856779122,
"grad_norm": 11.308842338843283,
"learning_rate": 2.633732469616238e-06,
"loss": 0.28903019428253174,
"step": 3259
},
{
"epoch": 2.0751113940165498,
"grad_norm": 10.390526838135772,
"learning_rate": 2.630470313737692e-06,
"loss": 0.5196407437324524,
"step": 3260
},
{
"epoch": 2.075747931253978,
"grad_norm": 14.766330473913122,
"learning_rate": 2.627209458104572e-06,
"loss": 0.9248642921447754,
"step": 3261
},
{
"epoch": 2.0763844684914066,
"grad_norm": 7.277822898693093,
"learning_rate": 2.6239499045062235e-06,
"loss": 0.37658756971359253,
"step": 3262
},
{
"epoch": 2.077021005728835,
"grad_norm": 12.905872120715083,
"learning_rate": 2.6206916547312865e-06,
"loss": 0.3785715103149414,
"step": 3263
},
{
"epoch": 2.0776575429662634,
"grad_norm": 18.72481288464424,
"learning_rate": 2.61743471056767e-06,
"loss": 0.7707376480102539,
"step": 3264
},
{
"epoch": 2.0782940802036918,
"grad_norm": 8.587041867271958,
"learning_rate": 2.614179073802582e-06,
"loss": 0.3318308889865875,
"step": 3265
},
{
"epoch": 2.07893061744112,
"grad_norm": 7.771217491488594,
"learning_rate": 2.610924746222503e-06,
"loss": 0.43681594729423523,
"step": 3266
},
{
"epoch": 2.0795671546785486,
"grad_norm": 18.48845128764159,
"learning_rate": 2.6076717296132005e-06,
"loss": 0.5202630162239075,
"step": 3267
},
{
"epoch": 2.080203691915977,
"grad_norm": 12.7775423658601,
"learning_rate": 2.60442002575972e-06,
"loss": 1.0267428159713745,
"step": 3268
},
{
"epoch": 2.0808402291534054,
"grad_norm": 9.385733118886481,
"learning_rate": 2.6011696364463878e-06,
"loss": 0.2559840679168701,
"step": 3269
},
{
"epoch": 2.081476766390834,
"grad_norm": 17.94835737406477,
"learning_rate": 2.5979205634568073e-06,
"loss": 0.4922923147678375,
"step": 3270
},
{
"epoch": 2.082113303628262,
"grad_norm": 5.832914015146009,
"learning_rate": 2.5946728085738593e-06,
"loss": 0.13679368793964386,
"step": 3271
},
{
"epoch": 2.0827498408656906,
"grad_norm": 7.716837449346257,
"learning_rate": 2.591426373579706e-06,
"loss": 0.6853848695755005,
"step": 3272
},
{
"epoch": 2.083386378103119,
"grad_norm": 9.795383883477603,
"learning_rate": 2.588181260255782e-06,
"loss": 0.9720170497894287,
"step": 3273
},
{
"epoch": 2.0840229153405474,
"grad_norm": 5.503650578746022,
"learning_rate": 2.5849374703827947e-06,
"loss": 0.27854397892951965,
"step": 3274
},
{
"epoch": 2.084659452577976,
"grad_norm": 9.340122442262517,
"learning_rate": 2.5816950057407263e-06,
"loss": 0.6471270322799683,
"step": 3275
},
{
"epoch": 2.085295989815404,
"grad_norm": 12.855186955075286,
"learning_rate": 2.5784538681088377e-06,
"loss": 0.647802472114563,
"step": 3276
},
{
"epoch": 2.0859325270528326,
"grad_norm": 9.115571266909637,
"learning_rate": 2.5752140592656505e-06,
"loss": 0.2930002510547638,
"step": 3277
},
{
"epoch": 2.086569064290261,
"grad_norm": 7.823769640298533,
"learning_rate": 2.5719755809889678e-06,
"loss": 0.42180970311164856,
"step": 3278
},
{
"epoch": 2.0872056015276894,
"grad_norm": 12.014748704805303,
"learning_rate": 2.5687384350558566e-06,
"loss": 0.340411514043808,
"step": 3279
},
{
"epoch": 2.087842138765118,
"grad_norm": 13.492842536158898,
"learning_rate": 2.565502623242654e-06,
"loss": 0.5913677215576172,
"step": 3280
},
{
"epoch": 2.088478676002546,
"grad_norm": 9.916878464298009,
"learning_rate": 2.562268147324964e-06,
"loss": 0.7465711832046509,
"step": 3281
},
{
"epoch": 2.0891152132399746,
"grad_norm": 9.611117503273233,
"learning_rate": 2.5590350090776617e-06,
"loss": 0.6559357643127441,
"step": 3282
},
{
"epoch": 2.089751750477403,
"grad_norm": 8.058500342204235,
"learning_rate": 2.5558032102748852e-06,
"loss": 0.5747423768043518,
"step": 3283
},
{
"epoch": 2.0903882877148314,
"grad_norm": 10.742648777279273,
"learning_rate": 2.5525727526900356e-06,
"loss": 0.3125855624675751,
"step": 3284
},
{
"epoch": 2.09102482495226,
"grad_norm": 11.296454103945338,
"learning_rate": 2.5493436380957816e-06,
"loss": 0.514959454536438,
"step": 3285
},
{
"epoch": 2.0916613621896882,
"grad_norm": 9.074937939487064,
"learning_rate": 2.546115868264053e-06,
"loss": 0.7121496796607971,
"step": 3286
},
{
"epoch": 2.0922978994271166,
"grad_norm": 9.350015709223264,
"learning_rate": 2.5428894449660424e-06,
"loss": 0.39893782138824463,
"step": 3287
},
{
"epoch": 2.092934436664545,
"grad_norm": 10.172545873229877,
"learning_rate": 2.5396643699722014e-06,
"loss": 0.38090085983276367,
"step": 3288
},
{
"epoch": 2.0935709739019734,
"grad_norm": 7.161333925652746,
"learning_rate": 2.536440645052246e-06,
"loss": 0.47203075885772705,
"step": 3289
},
{
"epoch": 2.094207511139402,
"grad_norm": 8.98813571216817,
"learning_rate": 2.5332182719751497e-06,
"loss": 0.908381462097168,
"step": 3290
},
{
"epoch": 2.0948440483768302,
"grad_norm": 10.17741567098585,
"learning_rate": 2.529997252509142e-06,
"loss": 0.35994988679885864,
"step": 3291
},
{
"epoch": 2.0954805856142587,
"grad_norm": 13.046329332971709,
"learning_rate": 2.5267775884217116e-06,
"loss": 0.40062403678894043,
"step": 3292
},
{
"epoch": 2.0961171228516866,
"grad_norm": 10.984336231521858,
"learning_rate": 2.523559281479603e-06,
"loss": 0.29584938287734985,
"step": 3293
},
{
"epoch": 2.096753660089115,
"grad_norm": 8.825847942241639,
"learning_rate": 2.520342333448813e-06,
"loss": 0.21844874322414398,
"step": 3294
},
{
"epoch": 2.0973901973265434,
"grad_norm": 10.869370059987933,
"learning_rate": 2.5171267460946003e-06,
"loss": 0.36157089471817017,
"step": 3295
},
{
"epoch": 2.098026734563972,
"grad_norm": 12.479038780665515,
"learning_rate": 2.5139125211814706e-06,
"loss": 0.4132142961025238,
"step": 3296
},
{
"epoch": 2.0986632718014,
"grad_norm": 13.589806666910155,
"learning_rate": 2.5106996604731835e-06,
"loss": 0.49431464076042175,
"step": 3297
},
{
"epoch": 2.0992998090388286,
"grad_norm": 9.092134090342647,
"learning_rate": 2.5074881657327486e-06,
"loss": 0.24856360256671906,
"step": 3298
},
{
"epoch": 2.099936346276257,
"grad_norm": 11.59206153297446,
"learning_rate": 2.504278038722433e-06,
"loss": 0.2738165855407715,
"step": 3299
},
{
"epoch": 2.1005728835136854,
"grad_norm": 16.841788854770154,
"learning_rate": 2.5010692812037406e-06,
"loss": 0.5679492354393005,
"step": 3300
},
{
"epoch": 2.101209420751114,
"grad_norm": 13.049966543578787,
"learning_rate": 2.4978618949374362e-06,
"loss": 0.8120748996734619,
"step": 3301
},
{
"epoch": 2.1018459579885422,
"grad_norm": 10.370958796341869,
"learning_rate": 2.4946558816835246e-06,
"loss": 0.6072478294372559,
"step": 3302
},
{
"epoch": 2.1024824952259706,
"grad_norm": 16.288540373970964,
"learning_rate": 2.4914512432012614e-06,
"loss": 1.911726951599121,
"step": 3303
},
{
"epoch": 2.103119032463399,
"grad_norm": 12.68702746760304,
"learning_rate": 2.4882479812491446e-06,
"loss": 0.5572983026504517,
"step": 3304
},
{
"epoch": 2.1037555697008274,
"grad_norm": 9.826034235539806,
"learning_rate": 2.4850460975849167e-06,
"loss": 0.5541264414787292,
"step": 3305
},
{
"epoch": 2.104392106938256,
"grad_norm": 12.29051435892266,
"learning_rate": 2.481845593965571e-06,
"loss": 0.847062885761261,
"step": 3306
},
{
"epoch": 2.1050286441756842,
"grad_norm": 7.337326578570493,
"learning_rate": 2.4786464721473307e-06,
"loss": 0.33833038806915283,
"step": 3307
},
{
"epoch": 2.1056651814131127,
"grad_norm": 11.58552968150726,
"learning_rate": 2.4754487338856734e-06,
"loss": 0.49039170145988464,
"step": 3308
},
{
"epoch": 2.106301718650541,
"grad_norm": 12.422496920890385,
"learning_rate": 2.47225238093531e-06,
"loss": 0.5499177575111389,
"step": 3309
},
{
"epoch": 2.1069382558879695,
"grad_norm": 10.993418822033284,
"learning_rate": 2.469057415050194e-06,
"loss": 0.3142160177230835,
"step": 3310
},
{
"epoch": 2.107574793125398,
"grad_norm": 8.08307128642135,
"learning_rate": 2.465863837983515e-06,
"loss": 0.400633305311203,
"step": 3311
},
{
"epoch": 2.1082113303628263,
"grad_norm": 9.531634017421851,
"learning_rate": 2.4626716514877075e-06,
"loss": 0.4359270930290222,
"step": 3312
},
{
"epoch": 2.1088478676002547,
"grad_norm": 10.103996351256544,
"learning_rate": 2.459480857314431e-06,
"loss": 0.5022025108337402,
"step": 3313
},
{
"epoch": 2.109484404837683,
"grad_norm": 13.30724035553672,
"learning_rate": 2.4562914572145944e-06,
"loss": 0.4654034376144409,
"step": 3314
},
{
"epoch": 2.1101209420751115,
"grad_norm": 12.528962490919293,
"learning_rate": 2.453103452938333e-06,
"loss": 0.4383985996246338,
"step": 3315
},
{
"epoch": 2.11075747931254,
"grad_norm": 7.779359381296516,
"learning_rate": 2.4499168462350177e-06,
"loss": 0.534390926361084,
"step": 3316
},
{
"epoch": 2.1113940165499683,
"grad_norm": 11.26314805292291,
"learning_rate": 2.4467316388532514e-06,
"loss": 0.6711363196372986,
"step": 3317
},
{
"epoch": 2.1120305537873967,
"grad_norm": 12.586661952502734,
"learning_rate": 2.4435478325408757e-06,
"loss": 0.3104287385940552,
"step": 3318
},
{
"epoch": 2.112667091024825,
"grad_norm": 15.877232865075307,
"learning_rate": 2.4403654290449578e-06,
"loss": 0.5714265704154968,
"step": 3319
},
{
"epoch": 2.1133036282622535,
"grad_norm": 14.025053810912718,
"learning_rate": 2.4371844301117908e-06,
"loss": 0.7892574667930603,
"step": 3320
},
{
"epoch": 2.113940165499682,
"grad_norm": 5.492807502345089,
"learning_rate": 2.434004837486908e-06,
"loss": 0.17987748980522156,
"step": 3321
},
{
"epoch": 2.1145767027371103,
"grad_norm": 10.495775224170274,
"learning_rate": 2.4308266529150626e-06,
"loss": 0.3170955777168274,
"step": 3322
},
{
"epoch": 2.1152132399745387,
"grad_norm": 11.296350017331362,
"learning_rate": 2.427649878140238e-06,
"loss": 0.35575318336486816,
"step": 3323
},
{
"epoch": 2.115849777211967,
"grad_norm": 10.181222472050495,
"learning_rate": 2.4244745149056425e-06,
"loss": 0.3627958595752716,
"step": 3324
},
{
"epoch": 2.1164863144493955,
"grad_norm": 17.56340979043379,
"learning_rate": 2.4213005649537146e-06,
"loss": 0.43335339426994324,
"step": 3325
},
{
"epoch": 2.1171228516868235,
"grad_norm": 7.677899069873162,
"learning_rate": 2.418128030026112e-06,
"loss": 0.3178519010543823,
"step": 3326
},
{
"epoch": 2.117759388924252,
"grad_norm": 8.990077681724406,
"learning_rate": 2.414956911863717e-06,
"loss": 0.3949454128742218,
"step": 3327
},
{
"epoch": 2.1183959261616803,
"grad_norm": 9.751373077349003,
"learning_rate": 2.4117872122066342e-06,
"loss": 0.5346337556838989,
"step": 3328
},
{
"epoch": 2.1190324633991087,
"grad_norm": 14.43557995717087,
"learning_rate": 2.4086189327941965e-06,
"loss": 1.041166067123413,
"step": 3329
},
{
"epoch": 2.119669000636537,
"grad_norm": 11.471000549538617,
"learning_rate": 2.405452075364944e-06,
"loss": 0.2893381714820862,
"step": 3330
},
{
"epoch": 2.1203055378739655,
"grad_norm": 9.708549926564013,
"learning_rate": 2.40228664165665e-06,
"loss": 0.28888773918151855,
"step": 3331
},
{
"epoch": 2.120942075111394,
"grad_norm": 8.09289181475409,
"learning_rate": 2.399122633406298e-06,
"loss": 0.43209660053253174,
"step": 3332
},
{
"epoch": 2.1215786123488223,
"grad_norm": 13.020839598169353,
"learning_rate": 2.3959600523500935e-06,
"loss": 0.6120706796646118,
"step": 3333
},
{
"epoch": 2.1222151495862507,
"grad_norm": 11.919528418883417,
"learning_rate": 2.392798900223455e-06,
"loss": 0.5933191180229187,
"step": 3334
},
{
"epoch": 2.122851686823679,
"grad_norm": 16.538186311923443,
"learning_rate": 2.389639178761025e-06,
"loss": 0.3194655478000641,
"step": 3335
},
{
"epoch": 2.1234882240611075,
"grad_norm": 11.932840938324789,
"learning_rate": 2.3864808896966503e-06,
"loss": 0.4698467254638672,
"step": 3336
},
{
"epoch": 2.124124761298536,
"grad_norm": 11.383974444908038,
"learning_rate": 2.3833240347633955e-06,
"loss": 0.3712739646434784,
"step": 3337
},
{
"epoch": 2.1247612985359643,
"grad_norm": 14.064938826149863,
"learning_rate": 2.3801686156935445e-06,
"loss": 0.546394944190979,
"step": 3338
},
{
"epoch": 2.1253978357733927,
"grad_norm": 40.742558161554435,
"learning_rate": 2.377014634218585e-06,
"loss": 0.7850564122200012,
"step": 3339
},
{
"epoch": 2.126034373010821,
"grad_norm": 12.769398445637938,
"learning_rate": 2.3738620920692208e-06,
"loss": 0.4774132966995239,
"step": 3340
},
{
"epoch": 2.1266709102482495,
"grad_norm": 13.97263062864823,
"learning_rate": 2.3707109909753613e-06,
"loss": 0.7087494730949402,
"step": 3341
},
{
"epoch": 2.127307447485678,
"grad_norm": 14.869233235968746,
"learning_rate": 2.3675613326661333e-06,
"loss": 0.22911036014556885,
"step": 3342
},
{
"epoch": 2.1279439847231063,
"grad_norm": 12.025692409048506,
"learning_rate": 2.3644131188698598e-06,
"loss": 0.9791948199272156,
"step": 3343
},
{
"epoch": 2.1285805219605347,
"grad_norm": 13.08724211137776,
"learning_rate": 2.3612663513140834e-06,
"loss": 0.2889867126941681,
"step": 3344
},
{
"epoch": 2.129217059197963,
"grad_norm": 12.579778834984843,
"learning_rate": 2.358121031725546e-06,
"loss": 0.9773622751235962,
"step": 3345
},
{
"epoch": 2.1298535964353915,
"grad_norm": 10.412626731111317,
"learning_rate": 2.354977161830196e-06,
"loss": 0.6916447877883911,
"step": 3346
},
{
"epoch": 2.13049013367282,
"grad_norm": 16.34771903703682,
"learning_rate": 2.3518347433531852e-06,
"loss": 0.7564796209335327,
"step": 3347
},
{
"epoch": 2.1311266709102483,
"grad_norm": 9.657327417102435,
"learning_rate": 2.348693778018875e-06,
"loss": 0.4786078631877899,
"step": 3348
},
{
"epoch": 2.1317632081476767,
"grad_norm": 12.092130029235612,
"learning_rate": 2.3455542675508224e-06,
"loss": 0.29180699586868286,
"step": 3349
},
{
"epoch": 2.132399745385105,
"grad_norm": 10.57023457071009,
"learning_rate": 2.34241621367179e-06,
"loss": 0.5671373605728149,
"step": 3350
},
{
"epoch": 2.1330362826225335,
"grad_norm": 13.767941904011684,
"learning_rate": 2.3392796181037363e-06,
"loss": 0.9068795442581177,
"step": 3351
},
{
"epoch": 2.133672819859962,
"grad_norm": 20.86251906321779,
"learning_rate": 2.33614448256783e-06,
"loss": 2.0571038722991943,
"step": 3352
},
{
"epoch": 2.1343093570973903,
"grad_norm": 7.064031628252607,
"learning_rate": 2.333010808784425e-06,
"loss": 0.23232118785381317,
"step": 3353
},
{
"epoch": 2.1349458943348187,
"grad_norm": 10.490484673171892,
"learning_rate": 2.329878598473081e-06,
"loss": 0.23985019326210022,
"step": 3354
},
{
"epoch": 2.135582431572247,
"grad_norm": 10.871615354833349,
"learning_rate": 2.3267478533525588e-06,
"loss": 0.7772335410118103,
"step": 3355
},
{
"epoch": 2.1362189688096755,
"grad_norm": 7.113976929626125,
"learning_rate": 2.323618575140802e-06,
"loss": 0.3307351768016815,
"step": 3356
},
{
"epoch": 2.136855506047104,
"grad_norm": 15.063746459776494,
"learning_rate": 2.320490765554963e-06,
"loss": 0.4741772413253784,
"step": 3357
},
{
"epoch": 2.1374920432845324,
"grad_norm": 9.765039441767453,
"learning_rate": 2.317364426311381e-06,
"loss": 0.36270594596862793,
"step": 3358
},
{
"epoch": 2.1381285805219603,
"grad_norm": 10.138198927802664,
"learning_rate": 2.3142395591255886e-06,
"loss": 0.4879109859466553,
"step": 3359
},
{
"epoch": 2.138765117759389,
"grad_norm": 8.631337639937422,
"learning_rate": 2.311116165712311e-06,
"loss": 0.7709143161773682,
"step": 3360
},
{
"epoch": 2.139401654996817,
"grad_norm": 10.304133281355053,
"learning_rate": 2.307994247785469e-06,
"loss": 1.0132774114608765,
"step": 3361
},
{
"epoch": 2.1400381922342455,
"grad_norm": 10.165065906293025,
"learning_rate": 2.3048738070581684e-06,
"loss": 0.5290195941925049,
"step": 3362
},
{
"epoch": 2.140674729471674,
"grad_norm": 10.864930726399276,
"learning_rate": 2.3017548452427064e-06,
"loss": 0.6377224326133728,
"step": 3363
},
{
"epoch": 2.1413112667091023,
"grad_norm": 10.193155575179244,
"learning_rate": 2.2986373640505665e-06,
"loss": 0.2541917562484741,
"step": 3364
},
{
"epoch": 2.1419478039465307,
"grad_norm": 8.608283836651443,
"learning_rate": 2.2955213651924274e-06,
"loss": 0.5369041562080383,
"step": 3365
},
{
"epoch": 2.142584341183959,
"grad_norm": 14.625959790118241,
"learning_rate": 2.2924068503781417e-06,
"loss": 0.3130542039871216,
"step": 3366
},
{
"epoch": 2.1432208784213875,
"grad_norm": 14.18643094034974,
"learning_rate": 2.289293821316759e-06,
"loss": 0.8524119257926941,
"step": 3367
},
{
"epoch": 2.143857415658816,
"grad_norm": 9.696829447325998,
"learning_rate": 2.2861822797165094e-06,
"loss": 0.6507335901260376,
"step": 3368
},
{
"epoch": 2.1444939528962443,
"grad_norm": 13.733199118600341,
"learning_rate": 2.283072227284806e-06,
"loss": 0.4282124638557434,
"step": 3369
},
{
"epoch": 2.1451304901336727,
"grad_norm": 18.052391064109116,
"learning_rate": 2.2799636657282446e-06,
"loss": 0.31571298837661743,
"step": 3370
},
{
"epoch": 2.145767027371101,
"grad_norm": 10.979925381824847,
"learning_rate": 2.276856596752603e-06,
"loss": 0.2936651110649109,
"step": 3371
},
{
"epoch": 2.1464035646085295,
"grad_norm": 9.207625773915087,
"learning_rate": 2.2737510220628458e-06,
"loss": 1.6048418283462524,
"step": 3372
},
{
"epoch": 2.147040101845958,
"grad_norm": 9.29457373486712,
"learning_rate": 2.2706469433631053e-06,
"loss": 0.2723718285560608,
"step": 3373
},
{
"epoch": 2.1476766390833864,
"grad_norm": 10.815154334010828,
"learning_rate": 2.267544362356705e-06,
"loss": 0.6221505999565125,
"step": 3374
},
{
"epoch": 2.1483131763208148,
"grad_norm": 10.290402728477853,
"learning_rate": 2.2644432807461413e-06,
"loss": 0.4234142005443573,
"step": 3375
},
{
"epoch": 2.148949713558243,
"grad_norm": 19.12884671376245,
"learning_rate": 2.261343700233087e-06,
"loss": 0.3772827386856079,
"step": 3376
},
{
"epoch": 2.1495862507956716,
"grad_norm": 9.55602219342714,
"learning_rate": 2.2582456225183913e-06,
"loss": 0.3183250427246094,
"step": 3377
},
{
"epoch": 2.1502227880331,
"grad_norm": 10.238368888861226,
"learning_rate": 2.255149049302085e-06,
"loss": 0.903436005115509,
"step": 3378
},
{
"epoch": 2.1508593252705284,
"grad_norm": 11.501655274907222,
"learning_rate": 2.2520539822833615e-06,
"loss": 0.8668727278709412,
"step": 3379
},
{
"epoch": 2.1514958625079568,
"grad_norm": 10.771103520530469,
"learning_rate": 2.248960423160599e-06,
"loss": 0.4836471676826477,
"step": 3380
},
{
"epoch": 2.152132399745385,
"grad_norm": 16.828109760744976,
"learning_rate": 2.2458683736313423e-06,
"loss": 0.3216490149497986,
"step": 3381
},
{
"epoch": 2.1527689369828136,
"grad_norm": 13.887107466378085,
"learning_rate": 2.2427778353923098e-06,
"loss": 0.49047866463661194,
"step": 3382
},
{
"epoch": 2.153405474220242,
"grad_norm": 10.932175157083606,
"learning_rate": 2.239688810139387e-06,
"loss": 0.3700225353240967,
"step": 3383
},
{
"epoch": 2.1540420114576704,
"grad_norm": 13.566420319087964,
"learning_rate": 2.236601299567637e-06,
"loss": 0.306808203458786,
"step": 3384
},
{
"epoch": 2.154678548695099,
"grad_norm": 11.56158787347894,
"learning_rate": 2.233515305371285e-06,
"loss": 0.2995290458202362,
"step": 3385
},
{
"epoch": 2.155315085932527,
"grad_norm": 17.568356017949135,
"learning_rate": 2.230430829243725e-06,
"loss": 0.37916237115859985,
"step": 3386
},
{
"epoch": 2.1559516231699556,
"grad_norm": 11.5306787030788,
"learning_rate": 2.22734787287752e-06,
"loss": 0.3119584321975708,
"step": 3387
},
{
"epoch": 2.156588160407384,
"grad_norm": 16.14158189328427,
"learning_rate": 2.224266437964398e-06,
"loss": 0.8759464025497437,
"step": 3388
},
{
"epoch": 2.1572246976448124,
"grad_norm": 10.92577996358869,
"learning_rate": 2.221186526195252e-06,
"loss": 0.593475341796875,
"step": 3389
},
{
"epoch": 2.157861234882241,
"grad_norm": 9.259688538944122,
"learning_rate": 2.2181081392601373e-06,
"loss": 0.3868349492549896,
"step": 3390
},
{
"epoch": 2.158497772119669,
"grad_norm": 19.06674991929584,
"learning_rate": 2.2150312788482786e-06,
"loss": 1.1448237895965576,
"step": 3391
},
{
"epoch": 2.159134309357097,
"grad_norm": 15.166406304972211,
"learning_rate": 2.211955946648057e-06,
"loss": 0.47934362292289734,
"step": 3392
},
{
"epoch": 2.159770846594526,
"grad_norm": 7.844403336761152,
"learning_rate": 2.2088821443470173e-06,
"loss": 0.2168283313512802,
"step": 3393
},
{
"epoch": 2.160407383831954,
"grad_norm": 9.223131609246044,
"learning_rate": 2.205809873631862e-06,
"loss": 0.47109267115592957,
"step": 3394
},
{
"epoch": 2.1610439210693824,
"grad_norm": 8.042898406416613,
"learning_rate": 2.2027391361884616e-06,
"loss": 0.29720538854599,
"step": 3395
},
{
"epoch": 2.1616804583068108,
"grad_norm": 7.93343186703424,
"learning_rate": 2.1996699337018307e-06,
"loss": 0.44718751311302185,
"step": 3396
},
{
"epoch": 2.162316995544239,
"grad_norm": 12.236668570673348,
"learning_rate": 2.1966022678561573e-06,
"loss": 0.23177067935466766,
"step": 3397
},
{
"epoch": 2.1629535327816676,
"grad_norm": 11.421250218894645,
"learning_rate": 2.1935361403347767e-06,
"loss": 0.45731085538864136,
"step": 3398
},
{
"epoch": 2.163590070019096,
"grad_norm": 15.152537089028039,
"learning_rate": 2.1904715528201813e-06,
"loss": 0.6630115509033203,
"step": 3399
},
{
"epoch": 2.1642266072565244,
"grad_norm": 15.471967915610055,
"learning_rate": 2.187408506994019e-06,
"loss": 0.701779842376709,
"step": 3400
},
{
"epoch": 2.164863144493953,
"grad_norm": 10.448807375221385,
"learning_rate": 2.1843470045370975e-06,
"loss": 0.5520093441009521,
"step": 3401
},
{
"epoch": 2.165499681731381,
"grad_norm": 11.663133424177104,
"learning_rate": 2.1812870471293647e-06,
"loss": 0.44490212202072144,
"step": 3402
},
{
"epoch": 2.1661362189688096,
"grad_norm": 11.561579013164737,
"learning_rate": 2.178228636449934e-06,
"loss": 0.37317323684692383,
"step": 3403
},
{
"epoch": 2.166772756206238,
"grad_norm": 8.431268474419799,
"learning_rate": 2.1751717741770623e-06,
"loss": 0.2616347372531891,
"step": 3404
},
{
"epoch": 2.1674092934436664,
"grad_norm": 8.198634993492524,
"learning_rate": 2.172116461988159e-06,
"loss": 0.24132557213306427,
"step": 3405
},
{
"epoch": 2.168045830681095,
"grad_norm": 15.943977266921602,
"learning_rate": 2.169062701559783e-06,
"loss": 0.5138012766838074,
"step": 3406
},
{
"epoch": 2.168682367918523,
"grad_norm": 8.866395917936815,
"learning_rate": 2.1660104945676402e-06,
"loss": 0.3448604643344879,
"step": 3407
},
{
"epoch": 2.1693189051559516,
"grad_norm": 14.056501432611842,
"learning_rate": 2.16295984268659e-06,
"loss": 0.5287642478942871,
"step": 3408
},
{
"epoch": 2.16995544239338,
"grad_norm": 8.825882101897346,
"learning_rate": 2.159910747590627e-06,
"loss": 0.5530349016189575,
"step": 3409
},
{
"epoch": 2.1705919796308084,
"grad_norm": 5.441821345737116,
"learning_rate": 2.156863210952904e-06,
"loss": 0.2527669370174408,
"step": 3410
},
{
"epoch": 2.171228516868237,
"grad_norm": 8.826445358427215,
"learning_rate": 2.1538172344457105e-06,
"loss": 0.566216230392456,
"step": 3411
},
{
"epoch": 2.171865054105665,
"grad_norm": 9.262464590061974,
"learning_rate": 2.150772819740483e-06,
"loss": 0.247681125998497,
"step": 3412
},
{
"epoch": 2.1725015913430936,
"grad_norm": 7.934189991415934,
"learning_rate": 2.147729968507799e-06,
"loss": 0.20549309253692627,
"step": 3413
},
{
"epoch": 2.173138128580522,
"grad_norm": 9.28831714399594,
"learning_rate": 2.1446886824173825e-06,
"loss": 0.33926114439964294,
"step": 3414
},
{
"epoch": 2.1737746658179504,
"grad_norm": 14.385866919389695,
"learning_rate": 2.1416489631380933e-06,
"loss": 0.5584162473678589,
"step": 3415
},
{
"epoch": 2.174411203055379,
"grad_norm": 16.47689599944588,
"learning_rate": 2.1386108123379344e-06,
"loss": 0.6566636562347412,
"step": 3416
},
{
"epoch": 2.1750477402928072,
"grad_norm": 12.663563404690024,
"learning_rate": 2.135574231684046e-06,
"loss": 0.6372646689414978,
"step": 3417
},
{
"epoch": 2.1756842775302356,
"grad_norm": 11.914424264265422,
"learning_rate": 2.132539222842713e-06,
"loss": 0.3574853539466858,
"step": 3418
},
{
"epoch": 2.176320814767664,
"grad_norm": 7.612202698436665,
"learning_rate": 2.129505787479346e-06,
"loss": 0.38791799545288086,
"step": 3419
},
{
"epoch": 2.1769573520050924,
"grad_norm": 11.840601030317876,
"learning_rate": 2.126473927258505e-06,
"loss": 0.443769246339798,
"step": 3420
},
{
"epoch": 2.177593889242521,
"grad_norm": 15.072477442673236,
"learning_rate": 2.123443643843879e-06,
"loss": 0.5991731286048889,
"step": 3421
},
{
"epoch": 2.1782304264799492,
"grad_norm": 9.692132708178386,
"learning_rate": 2.120414938898287e-06,
"loss": 0.4930022358894348,
"step": 3422
},
{
"epoch": 2.1788669637173776,
"grad_norm": 15.720705301313773,
"learning_rate": 2.1173878140836935e-06,
"loss": 1.2701705694198608,
"step": 3423
},
{
"epoch": 2.179503500954806,
"grad_norm": 11.093115176269182,
"learning_rate": 2.1143622710611876e-06,
"loss": 0.4316216707229614,
"step": 3424
},
{
"epoch": 2.1801400381922345,
"grad_norm": 8.613318889664129,
"learning_rate": 2.111338311490993e-06,
"loss": 0.48520854115486145,
"step": 3425
},
{
"epoch": 2.180776575429663,
"grad_norm": 11.89834708055669,
"learning_rate": 2.1083159370324624e-06,
"loss": 0.7003180980682373,
"step": 3426
},
{
"epoch": 2.181413112667091,
"grad_norm": 10.511478621180155,
"learning_rate": 2.105295149344083e-06,
"loss": 0.31332698464393616,
"step": 3427
},
{
"epoch": 2.182049649904519,
"grad_norm": 8.056646325285707,
"learning_rate": 2.1022759500834683e-06,
"loss": 0.5531500577926636,
"step": 3428
},
{
"epoch": 2.1826861871419476,
"grad_norm": 9.398688177006418,
"learning_rate": 2.0992583409073595e-06,
"loss": 0.7712733149528503,
"step": 3429
},
{
"epoch": 2.183322724379376,
"grad_norm": 11.817155962412096,
"learning_rate": 2.0962423234716257e-06,
"loss": 0.3385595381259918,
"step": 3430
},
{
"epoch": 2.1839592616168044,
"grad_norm": 8.082736627038416,
"learning_rate": 2.093227899431268e-06,
"loss": 0.615544319152832,
"step": 3431
},
{
"epoch": 2.184595798854233,
"grad_norm": 7.406037118037002,
"learning_rate": 2.0902150704404005e-06,
"loss": 0.5680183172225952,
"step": 3432
},
{
"epoch": 2.1852323360916612,
"grad_norm": 8.77753492251007,
"learning_rate": 2.0872038381522756e-06,
"loss": 0.3414529860019684,
"step": 3433
},
{
"epoch": 2.1858688733290896,
"grad_norm": 14.233806480293957,
"learning_rate": 2.084194204219263e-06,
"loss": 0.4020627737045288,
"step": 3434
},
{
"epoch": 2.186505410566518,
"grad_norm": 14.750428545877757,
"learning_rate": 2.0811861702928554e-06,
"loss": 0.2982262969017029,
"step": 3435
},
{
"epoch": 2.1871419478039464,
"grad_norm": 9.438642641641179,
"learning_rate": 2.0781797380236664e-06,
"loss": 0.287892609834671,
"step": 3436
},
{
"epoch": 2.187778485041375,
"grad_norm": 15.392978053152753,
"learning_rate": 2.075174909061436e-06,
"loss": 0.5279977917671204,
"step": 3437
},
{
"epoch": 2.1884150222788032,
"grad_norm": 10.211965640112897,
"learning_rate": 2.072171685055021e-06,
"loss": 0.7710495591163635,
"step": 3438
},
{
"epoch": 2.1890515595162316,
"grad_norm": 17.000200447049338,
"learning_rate": 2.069170067652393e-06,
"loss": 0.6563599109649658,
"step": 3439
},
{
"epoch": 2.18968809675366,
"grad_norm": 12.776188039892988,
"learning_rate": 2.066170058500651e-06,
"loss": 0.1997983604669571,
"step": 3440
},
{
"epoch": 2.1903246339910885,
"grad_norm": 14.253037502020888,
"learning_rate": 2.063171659246006e-06,
"loss": 0.7213305830955505,
"step": 3441
},
{
"epoch": 2.190961171228517,
"grad_norm": 7.797513778541945,
"learning_rate": 2.0601748715337855e-06,
"loss": 0.30038130283355713,
"step": 3442
},
{
"epoch": 2.1915977084659453,
"grad_norm": 8.828566563812117,
"learning_rate": 2.0571796970084325e-06,
"loss": 0.3803648352622986,
"step": 3443
},
{
"epoch": 2.1922342457033737,
"grad_norm": 8.664138024964915,
"learning_rate": 2.054186137313512e-06,
"loss": 0.20640689134597778,
"step": 3444
},
{
"epoch": 2.192870782940802,
"grad_norm": 9.637871630571919,
"learning_rate": 2.0511941940916886e-06,
"loss": 0.4140985906124115,
"step": 3445
},
{
"epoch": 2.1935073201782305,
"grad_norm": 14.413703626032627,
"learning_rate": 2.0482038689847543e-06,
"loss": 0.5255602598190308,
"step": 3446
},
{
"epoch": 2.194143857415659,
"grad_norm": 10.79025408848945,
"learning_rate": 2.0452151636336056e-06,
"loss": 0.6795587539672852,
"step": 3447
},
{
"epoch": 2.1947803946530873,
"grad_norm": 22.437331601177345,
"learning_rate": 2.0422280796782506e-06,
"loss": 0.780468761920929,
"step": 3448
},
{
"epoch": 2.1954169318905157,
"grad_norm": 7.879027645388186,
"learning_rate": 2.0392426187578083e-06,
"loss": 0.36017906665802,
"step": 3449
},
{
"epoch": 2.196053469127944,
"grad_norm": 15.99673880556755,
"learning_rate": 2.0362587825105106e-06,
"loss": 0.9976467490196228,
"step": 3450
},
{
"epoch": 2.1966900063653725,
"grad_norm": 14.272801791911292,
"learning_rate": 2.033276572573693e-06,
"loss": 0.8597637414932251,
"step": 3451
},
{
"epoch": 2.197326543602801,
"grad_norm": 10.17845326878243,
"learning_rate": 2.0302959905838004e-06,
"loss": 0.7297912836074829,
"step": 3452
},
{
"epoch": 2.1979630808402293,
"grad_norm": 13.628742232472177,
"learning_rate": 2.0273170381763826e-06,
"loss": 0.41223394870758057,
"step": 3453
},
{
"epoch": 2.1985996180776577,
"grad_norm": 8.170955544592292,
"learning_rate": 2.0243397169861025e-06,
"loss": 0.632558286190033,
"step": 3454
},
{
"epoch": 2.199236155315086,
"grad_norm": 12.061276941034809,
"learning_rate": 2.021364028646716e-06,
"loss": 0.20762304961681366,
"step": 3455
},
{
"epoch": 2.1998726925525145,
"grad_norm": 11.979995957223165,
"learning_rate": 2.018389974791092e-06,
"loss": 0.6424295902252197,
"step": 3456
},
{
"epoch": 2.200509229789943,
"grad_norm": 9.486108115100198,
"learning_rate": 2.0154175570512e-06,
"loss": 0.6736075282096863,
"step": 3457
},
{
"epoch": 2.2011457670273713,
"grad_norm": 7.480055412604567,
"learning_rate": 2.012446777058113e-06,
"loss": 0.3194897174835205,
"step": 3458
},
{
"epoch": 2.2017823042647997,
"grad_norm": 10.170951742702815,
"learning_rate": 2.0094776364420023e-06,
"loss": 0.2124815732240677,
"step": 3459
},
{
"epoch": 2.2024188415022277,
"grad_norm": 10.127641246813296,
"learning_rate": 2.0065101368321393e-06,
"loss": 0.24739108979701996,
"step": 3460
},
{
"epoch": 2.203055378739656,
"grad_norm": 9.648299884470832,
"learning_rate": 2.0035442798569028e-06,
"loss": 0.4586668312549591,
"step": 3461
},
{
"epoch": 2.2036919159770845,
"grad_norm": 7.732486438234397,
"learning_rate": 2.000580067143757e-06,
"loss": 0.5461676120758057,
"step": 3462
},
{
"epoch": 2.204328453214513,
"grad_norm": 12.03816666748737,
"learning_rate": 1.997617500319276e-06,
"loss": 0.3828747570514679,
"step": 3463
},
{
"epoch": 2.2049649904519413,
"grad_norm": 10.413194518688728,
"learning_rate": 1.9946565810091243e-06,
"loss": 0.6122806072235107,
"step": 3464
},
{
"epoch": 2.2056015276893697,
"grad_norm": 11.951136610172114,
"learning_rate": 1.9916973108380643e-06,
"loss": 0.8122888803482056,
"step": 3465
},
{
"epoch": 2.206238064926798,
"grad_norm": 10.950814954022292,
"learning_rate": 1.988739691429951e-06,
"loss": 0.4455221891403198,
"step": 3466
},
{
"epoch": 2.2068746021642265,
"grad_norm": 12.47943814048296,
"learning_rate": 1.985783724407741e-06,
"loss": 0.36648666858673096,
"step": 3467
},
{
"epoch": 2.207511139401655,
"grad_norm": 9.063001941567858,
"learning_rate": 1.9828294113934713e-06,
"loss": 0.2546212673187256,
"step": 3468
},
{
"epoch": 2.2081476766390833,
"grad_norm": 16.061139222682293,
"learning_rate": 1.9798767540082853e-06,
"loss": 0.4800608158111572,
"step": 3469
},
{
"epoch": 2.2087842138765117,
"grad_norm": 9.170828154978512,
"learning_rate": 1.9769257538724077e-06,
"loss": 0.6345722079277039,
"step": 3470
},
{
"epoch": 2.20942075111394,
"grad_norm": 16.10928402086594,
"learning_rate": 1.9739764126051598e-06,
"loss": 0.5042457580566406,
"step": 3471
},
{
"epoch": 2.2100572883513685,
"grad_norm": 18.298420948761144,
"learning_rate": 1.9710287318249482e-06,
"loss": 0.6672847867012024,
"step": 3472
},
{
"epoch": 2.210693825588797,
"grad_norm": 12.030607632707705,
"learning_rate": 1.9680827131492698e-06,
"loss": 0.3506079912185669,
"step": 3473
},
{
"epoch": 2.2113303628262253,
"grad_norm": 12.246713934244447,
"learning_rate": 1.9651383581947147e-06,
"loss": 0.7041405439376831,
"step": 3474
},
{
"epoch": 2.2119669000636537,
"grad_norm": 14.179514272460512,
"learning_rate": 1.9621956685769493e-06,
"loss": 0.9863985180854797,
"step": 3475
},
{
"epoch": 2.212603437301082,
"grad_norm": 18.424120635555123,
"learning_rate": 1.9592546459107376e-06,
"loss": 1.1433643102645874,
"step": 3476
},
{
"epoch": 2.2132399745385105,
"grad_norm": 9.587679560238161,
"learning_rate": 1.9563152918099205e-06,
"loss": 1.1305607557296753,
"step": 3477
},
{
"epoch": 2.213876511775939,
"grad_norm": 19.40340803531654,
"learning_rate": 1.953377607887428e-06,
"loss": 0.3361985981464386,
"step": 3478
},
{
"epoch": 2.2145130490133673,
"grad_norm": 9.603731451813786,
"learning_rate": 1.950441595755269e-06,
"loss": 0.2718903124332428,
"step": 3479
},
{
"epoch": 2.2151495862507957,
"grad_norm": 9.83675170481703,
"learning_rate": 1.9475072570245423e-06,
"loss": 0.5077819228172302,
"step": 3480
},
{
"epoch": 2.215786123488224,
"grad_norm": 11.252206727126238,
"learning_rate": 1.9445745933054223e-06,
"loss": 0.483068585395813,
"step": 3481
},
{
"epoch": 2.2164226607256525,
"grad_norm": 9.441422576334581,
"learning_rate": 1.941643606207166e-06,
"loss": 0.32851487398147583,
"step": 3482
},
{
"epoch": 2.217059197963081,
"grad_norm": 9.857250124104858,
"learning_rate": 1.938714297338111e-06,
"loss": 0.3394015431404114,
"step": 3483
},
{
"epoch": 2.2176957352005093,
"grad_norm": 15.190433733779852,
"learning_rate": 1.935786668305672e-06,
"loss": 0.47739294171333313,
"step": 3484
},
{
"epoch": 2.2183322724379377,
"grad_norm": 12.466585032510054,
"learning_rate": 1.9328607207163434e-06,
"loss": 0.4199254810810089,
"step": 3485
},
{
"epoch": 2.218968809675366,
"grad_norm": 18.101362610018096,
"learning_rate": 1.9299364561757005e-06,
"loss": 1.0428451299667358,
"step": 3486
},
{
"epoch": 2.2196053469127945,
"grad_norm": 8.797168262568427,
"learning_rate": 1.9270138762883883e-06,
"loss": 0.35159802436828613,
"step": 3487
},
{
"epoch": 2.220241884150223,
"grad_norm": 7.294094135618442,
"learning_rate": 1.9240929826581324e-06,
"loss": 0.268513560295105,
"step": 3488
},
{
"epoch": 2.2208784213876513,
"grad_norm": 9.797229909640722,
"learning_rate": 1.921173776887729e-06,
"loss": 0.7970614433288574,
"step": 3489
},
{
"epoch": 2.2215149586250797,
"grad_norm": 13.17501761413796,
"learning_rate": 1.918256260579053e-06,
"loss": 0.4983261227607727,
"step": 3490
},
{
"epoch": 2.222151495862508,
"grad_norm": 11.075571985620726,
"learning_rate": 1.9153404353330474e-06,
"loss": 0.5156108140945435,
"step": 3491
},
{
"epoch": 2.2227880330999366,
"grad_norm": 9.991317988793591,
"learning_rate": 1.912426302749729e-06,
"loss": 0.7613148093223572,
"step": 3492
},
{
"epoch": 2.2234245703373645,
"grad_norm": 13.125191694729143,
"learning_rate": 1.9095138644281895e-06,
"loss": 0.3501238226890564,
"step": 3493
},
{
"epoch": 2.2240611075747934,
"grad_norm": 9.294124454311604,
"learning_rate": 1.906603121966586e-06,
"loss": 0.6417360305786133,
"step": 3494
},
{
"epoch": 2.2246976448122213,
"grad_norm": 9.351177396602285,
"learning_rate": 1.9036940769621464e-06,
"loss": 0.27995768189430237,
"step": 3495
},
{
"epoch": 2.2253341820496497,
"grad_norm": 14.206047223911522,
"learning_rate": 1.9007867310111655e-06,
"loss": 0.4409753382205963,
"step": 3496
},
{
"epoch": 2.225970719287078,
"grad_norm": 9.337434032529131,
"learning_rate": 1.8978810857090136e-06,
"loss": 0.3434121012687683,
"step": 3497
},
{
"epoch": 2.2266072565245065,
"grad_norm": 7.5565624664541735,
"learning_rate": 1.894977142650114e-06,
"loss": 0.40334582328796387,
"step": 3498
},
{
"epoch": 2.227243793761935,
"grad_norm": 12.59713529766472,
"learning_rate": 1.8920749034279695e-06,
"loss": 0.2577935457229614,
"step": 3499
},
{
"epoch": 2.2278803309993633,
"grad_norm": 12.102862449583023,
"learning_rate": 1.889174369635141e-06,
"loss": 0.5428228974342346,
"step": 3500
},
{
"epoch": 2.2285168682367917,
"grad_norm": 17.341617869002587,
"learning_rate": 1.886275542863254e-06,
"loss": 0.863734245300293,
"step": 3501
},
{
"epoch": 2.22915340547422,
"grad_norm": 9.91678128764048,
"learning_rate": 1.8833784247029968e-06,
"loss": 0.9740700721740723,
"step": 3502
},
{
"epoch": 2.2297899427116485,
"grad_norm": 8.32449044346867,
"learning_rate": 1.880483016744125e-06,
"loss": 0.4950059652328491,
"step": 3503
},
{
"epoch": 2.230426479949077,
"grad_norm": 10.52685661018625,
"learning_rate": 1.8775893205754503e-06,
"loss": 0.5759857892990112,
"step": 3504
},
{
"epoch": 2.2310630171865053,
"grad_norm": 10.75385839034482,
"learning_rate": 1.8746973377848465e-06,
"loss": 0.6030562520027161,
"step": 3505
},
{
"epoch": 2.2316995544239338,
"grad_norm": 9.66589218132674,
"learning_rate": 1.8718070699592482e-06,
"loss": 0.29806768894195557,
"step": 3506
},
{
"epoch": 2.232336091661362,
"grad_norm": 19.13089110715104,
"learning_rate": 1.8689185186846487e-06,
"loss": 0.7423710227012634,
"step": 3507
},
{
"epoch": 2.2329726288987906,
"grad_norm": 8.660819879371079,
"learning_rate": 1.866031685546098e-06,
"loss": 0.3044222295284271,
"step": 3508
},
{
"epoch": 2.233609166136219,
"grad_norm": 11.72671394723013,
"learning_rate": 1.8631465721277037e-06,
"loss": 0.24697977304458618,
"step": 3509
},
{
"epoch": 2.2342457033736474,
"grad_norm": 9.81009915238982,
"learning_rate": 1.8602631800126352e-06,
"loss": 0.33691731095314026,
"step": 3510
},
{
"epoch": 2.2348822406110758,
"grad_norm": 8.567721807499018,
"learning_rate": 1.8573815107831062e-06,
"loss": 0.6139578223228455,
"step": 3511
},
{
"epoch": 2.235518777848504,
"grad_norm": 10.927962860060859,
"learning_rate": 1.8545015660203952e-06,
"loss": 0.4013819098472595,
"step": 3512
},
{
"epoch": 2.2361553150859326,
"grad_norm": 11.724217216411903,
"learning_rate": 1.85162334730483e-06,
"loss": 0.5242598652839661,
"step": 3513
},
{
"epoch": 2.236791852323361,
"grad_norm": 16.38145871625454,
"learning_rate": 1.8487468562157917e-06,
"loss": 0.5505284070968628,
"step": 3514
},
{
"epoch": 2.2374283895607894,
"grad_norm": 13.25113768978638,
"learning_rate": 1.845872094331711e-06,
"loss": 0.7210999131202698,
"step": 3515
},
{
"epoch": 2.238064926798218,
"grad_norm": 11.672761794127785,
"learning_rate": 1.842999063230077e-06,
"loss": 0.5329117178916931,
"step": 3516
},
{
"epoch": 2.238701464035646,
"grad_norm": 11.357489189319121,
"learning_rate": 1.8401277644874216e-06,
"loss": 1.5066075325012207,
"step": 3517
},
{
"epoch": 2.2393380012730746,
"grad_norm": 7.904148975413503,
"learning_rate": 1.8372581996793287e-06,
"loss": 0.36226022243499756,
"step": 3518
},
{
"epoch": 2.239974538510503,
"grad_norm": 12.1270379916499,
"learning_rate": 1.83439037038043e-06,
"loss": 0.5828202366828918,
"step": 3519
},
{
"epoch": 2.2406110757479314,
"grad_norm": 14.331951695286904,
"learning_rate": 1.8315242781644099e-06,
"loss": 0.40014493465423584,
"step": 3520
},
{
"epoch": 2.24124761298536,
"grad_norm": 18.50762287265391,
"learning_rate": 1.8286599246039888e-06,
"loss": 0.5343925952911377,
"step": 3521
},
{
"epoch": 2.241884150222788,
"grad_norm": 11.85502060687402,
"learning_rate": 1.8257973112709453e-06,
"loss": 0.653959333896637,
"step": 3522
},
{
"epoch": 2.2425206874602166,
"grad_norm": 11.992880165168499,
"learning_rate": 1.8229364397360954e-06,
"loss": 0.31213849782943726,
"step": 3523
},
{
"epoch": 2.243157224697645,
"grad_norm": 12.159627487239916,
"learning_rate": 1.820077311569301e-06,
"loss": 0.4305686354637146,
"step": 3524
},
{
"epoch": 2.2437937619350734,
"grad_norm": 12.667597733768334,
"learning_rate": 1.8172199283394682e-06,
"loss": 0.49271827936172485,
"step": 3525
},
{
"epoch": 2.2444302991725014,
"grad_norm": 10.54382177570777,
"learning_rate": 1.8143642916145443e-06,
"loss": 0.7646178603172302,
"step": 3526
},
{
"epoch": 2.24506683640993,
"grad_norm": 10.696436714563873,
"learning_rate": 1.8115104029615194e-06,
"loss": 0.7205324769020081,
"step": 3527
},
{
"epoch": 2.245703373647358,
"grad_norm": 8.487431434568393,
"learning_rate": 1.8086582639464228e-06,
"loss": 0.6558152437210083,
"step": 3528
},
{
"epoch": 2.2463399108847866,
"grad_norm": 11.426331407500314,
"learning_rate": 1.8058078761343273e-06,
"loss": 0.43178167939186096,
"step": 3529
},
{
"epoch": 2.246976448122215,
"grad_norm": 10.959997262762583,
"learning_rate": 1.8029592410893414e-06,
"loss": 0.5075429677963257,
"step": 3530
},
{
"epoch": 2.2476129853596434,
"grad_norm": 8.905995860964222,
"learning_rate": 1.8001123603746123e-06,
"loss": 0.2820245921611786,
"step": 3531
},
{
"epoch": 2.248249522597072,
"grad_norm": 13.666972145092824,
"learning_rate": 1.7972672355523235e-06,
"loss": 0.6120468974113464,
"step": 3532
},
{
"epoch": 2.2488860598345,
"grad_norm": 10.946458503354657,
"learning_rate": 1.7944238681837012e-06,
"loss": 0.23049457371234894,
"step": 3533
},
{
"epoch": 2.2495225970719286,
"grad_norm": 8.114239993841824,
"learning_rate": 1.791582259828996e-06,
"loss": 0.3650050461292267,
"step": 3534
},
{
"epoch": 2.250159134309357,
"grad_norm": 12.544174489980014,
"learning_rate": 1.788742412047505e-06,
"loss": 0.5506513714790344,
"step": 3535
},
{
"epoch": 2.2507956715467854,
"grad_norm": 7.435459228814035,
"learning_rate": 1.7859043263975517e-06,
"loss": 0.5111548900604248,
"step": 3536
},
{
"epoch": 2.251432208784214,
"grad_norm": 9.983069033347435,
"learning_rate": 1.783068004436495e-06,
"loss": 0.4751511812210083,
"step": 3537
},
{
"epoch": 2.252068746021642,
"grad_norm": 11.77034056871515,
"learning_rate": 1.7802334477207238e-06,
"loss": 0.2234572470188141,
"step": 3538
},
{
"epoch": 2.2527052832590706,
"grad_norm": 9.735581359291695,
"learning_rate": 1.7774006578056652e-06,
"loss": 0.7333520650863647,
"step": 3539
},
{
"epoch": 2.253341820496499,
"grad_norm": 11.904991462573685,
"learning_rate": 1.774569636245771e-06,
"loss": 0.28692349791526794,
"step": 3540
},
{
"epoch": 2.2539783577339274,
"grad_norm": 9.358363138502039,
"learning_rate": 1.7717403845945185e-06,
"loss": 0.613878607749939,
"step": 3541
},
{
"epoch": 2.254614894971356,
"grad_norm": 12.091175156448605,
"learning_rate": 1.7689129044044245e-06,
"loss": 0.57314532995224,
"step": 3542
},
{
"epoch": 2.255251432208784,
"grad_norm": 10.635856312842957,
"learning_rate": 1.766087197227026e-06,
"loss": 0.48670288920402527,
"step": 3543
},
{
"epoch": 2.2558879694462126,
"grad_norm": 11.618410214495096,
"learning_rate": 1.76326326461289e-06,
"loss": 0.2895565629005432,
"step": 3544
},
{
"epoch": 2.256524506683641,
"grad_norm": 18.28761505687289,
"learning_rate": 1.760441108111607e-06,
"loss": 0.8712286353111267,
"step": 3545
},
{
"epoch": 2.2571610439210694,
"grad_norm": 12.308177743704901,
"learning_rate": 1.7576207292717979e-06,
"loss": 0.41069579124450684,
"step": 3546
},
{
"epoch": 2.257797581158498,
"grad_norm": 14.432734857390464,
"learning_rate": 1.7548021296411033e-06,
"loss": 0.4294593334197998,
"step": 3547
},
{
"epoch": 2.2584341183959262,
"grad_norm": 8.502333486055484,
"learning_rate": 1.7519853107661904e-06,
"loss": 0.23566997051239014,
"step": 3548
},
{
"epoch": 2.2590706556333546,
"grad_norm": 14.074281353047315,
"learning_rate": 1.7491702741927475e-06,
"loss": 0.49657586216926575,
"step": 3549
},
{
"epoch": 2.259707192870783,
"grad_norm": 9.910752879118258,
"learning_rate": 1.7463570214654852e-06,
"loss": 1.0669946670532227,
"step": 3550
},
{
"epoch": 2.2603437301082114,
"grad_norm": 10.602541481981014,
"learning_rate": 1.7435455541281343e-06,
"loss": 0.26584136486053467,
"step": 3551
},
{
"epoch": 2.26098026734564,
"grad_norm": 9.008666869195622,
"learning_rate": 1.7407358737234503e-06,
"loss": 0.7706364393234253,
"step": 3552
},
{
"epoch": 2.2616168045830682,
"grad_norm": 16.070066176018877,
"learning_rate": 1.7379279817932037e-06,
"loss": 1.0075418949127197,
"step": 3553
},
{
"epoch": 2.2622533418204966,
"grad_norm": 10.117869692663858,
"learning_rate": 1.7351218798781849e-06,
"loss": 1.0107684135437012,
"step": 3554
},
{
"epoch": 2.262889879057925,
"grad_norm": 9.314656091569422,
"learning_rate": 1.7323175695182e-06,
"loss": 0.6869802474975586,
"step": 3555
},
{
"epoch": 2.2635264162953534,
"grad_norm": 11.814939794631169,
"learning_rate": 1.7295150522520792e-06,
"loss": 0.528224527835846,
"step": 3556
},
{
"epoch": 2.264162953532782,
"grad_norm": 7.247212661080214,
"learning_rate": 1.726714329617659e-06,
"loss": 0.2727453112602234,
"step": 3557
},
{
"epoch": 2.2647994907702103,
"grad_norm": 9.98633554031247,
"learning_rate": 1.7239154031517957e-06,
"loss": 0.9306679368019104,
"step": 3558
},
{
"epoch": 2.265436028007638,
"grad_norm": 9.587220257942565,
"learning_rate": 1.7211182743903631e-06,
"loss": 0.4386458098888397,
"step": 3559
},
{
"epoch": 2.266072565245067,
"grad_norm": 13.838614818326626,
"learning_rate": 1.7183229448682436e-06,
"loss": 0.42543673515319824,
"step": 3560
},
{
"epoch": 2.266709102482495,
"grad_norm": 9.473609878611798,
"learning_rate": 1.7155294161193348e-06,
"loss": 0.6633802056312561,
"step": 3561
},
{
"epoch": 2.2673456397199234,
"grad_norm": 15.38725086141798,
"learning_rate": 1.7127376896765436e-06,
"loss": 0.4467209577560425,
"step": 3562
},
{
"epoch": 2.267982176957352,
"grad_norm": 13.185191378907108,
"learning_rate": 1.7099477670717946e-06,
"loss": 0.8797957301139832,
"step": 3563
},
{
"epoch": 2.2686187141947802,
"grad_norm": 15.28725312673086,
"learning_rate": 1.7071596498360116e-06,
"loss": 0.6036324501037598,
"step": 3564
},
{
"epoch": 2.2692552514322086,
"grad_norm": 12.901406334213773,
"learning_rate": 1.7043733394991386e-06,
"loss": 0.6327831745147705,
"step": 3565
},
{
"epoch": 2.269891788669637,
"grad_norm": 13.033898575857316,
"learning_rate": 1.7015888375901223e-06,
"loss": 0.4433327913284302,
"step": 3566
},
{
"epoch": 2.2705283259070654,
"grad_norm": 9.735752826770067,
"learning_rate": 1.698806145636917e-06,
"loss": 0.4037848114967346,
"step": 3567
},
{
"epoch": 2.271164863144494,
"grad_norm": 9.852589667866491,
"learning_rate": 1.6960252651664843e-06,
"loss": 0.5703111290931702,
"step": 3568
},
{
"epoch": 2.2718014003819222,
"grad_norm": 12.487549278669688,
"learning_rate": 1.6932461977047976e-06,
"loss": 0.390015184879303,
"step": 3569
},
{
"epoch": 2.2724379376193506,
"grad_norm": 14.724846728798648,
"learning_rate": 1.690468944776823e-06,
"loss": 0.9128044247627258,
"step": 3570
},
{
"epoch": 2.273074474856779,
"grad_norm": 12.091500775810264,
"learning_rate": 1.6876935079065438e-06,
"loss": 0.5277889966964722,
"step": 3571
},
{
"epoch": 2.2737110120942075,
"grad_norm": 8.654633805223162,
"learning_rate": 1.6849198886169389e-06,
"loss": 0.22507601976394653,
"step": 3572
},
{
"epoch": 2.274347549331636,
"grad_norm": 18.948358571595122,
"learning_rate": 1.6821480884299924e-06,
"loss": 0.3390110433101654,
"step": 3573
},
{
"epoch": 2.2749840865690643,
"grad_norm": 10.613780305031355,
"learning_rate": 1.67937810886669e-06,
"loss": 0.21446850895881653,
"step": 3574
},
{
"epoch": 2.2756206238064927,
"grad_norm": 14.447555420927046,
"learning_rate": 1.6766099514470163e-06,
"loss": 0.6864573955535889,
"step": 3575
},
{
"epoch": 2.276257161043921,
"grad_norm": 13.672767198982472,
"learning_rate": 1.673843617689963e-06,
"loss": 0.37464284896850586,
"step": 3576
},
{
"epoch": 2.2768936982813495,
"grad_norm": 9.297357998616375,
"learning_rate": 1.6710791091135097e-06,
"loss": 0.6171706914901733,
"step": 3577
},
{
"epoch": 2.277530235518778,
"grad_norm": 12.898598837075022,
"learning_rate": 1.6683164272346459e-06,
"loss": 0.5255354046821594,
"step": 3578
},
{
"epoch": 2.2781667727562063,
"grad_norm": 15.456525894320567,
"learning_rate": 1.665555573569352e-06,
"loss": 0.4737781882286072,
"step": 3579
},
{
"epoch": 2.2788033099936347,
"grad_norm": 12.7709637980077,
"learning_rate": 1.662796549632606e-06,
"loss": 0.46658074855804443,
"step": 3580
},
{
"epoch": 2.279439847231063,
"grad_norm": 13.404313326487049,
"learning_rate": 1.6600393569383817e-06,
"loss": 1.0031671524047852,
"step": 3581
},
{
"epoch": 2.2800763844684915,
"grad_norm": 16.7742953789968,
"learning_rate": 1.6572839969996524e-06,
"loss": 0.32030603289604187,
"step": 3582
},
{
"epoch": 2.28071292170592,
"grad_norm": 13.927633778218528,
"learning_rate": 1.6545304713283806e-06,
"loss": 0.3638037443161011,
"step": 3583
},
{
"epoch": 2.2813494589433483,
"grad_norm": 6.9188645768966675,
"learning_rate": 1.651778781435524e-06,
"loss": 0.15295016765594482,
"step": 3584
},
{
"epoch": 2.2819859961807767,
"grad_norm": 14.31366451095305,
"learning_rate": 1.6490289288310313e-06,
"loss": 0.4253908395767212,
"step": 3585
},
{
"epoch": 2.282622533418205,
"grad_norm": 13.682501342420357,
"learning_rate": 1.64628091502385e-06,
"loss": 0.46490031480789185,
"step": 3586
},
{
"epoch": 2.2832590706556335,
"grad_norm": 8.150257314012448,
"learning_rate": 1.643534741521906e-06,
"loss": 0.5571666359901428,
"step": 3587
},
{
"epoch": 2.283895607893062,
"grad_norm": 9.60175840484246,
"learning_rate": 1.6407904098321282e-06,
"loss": 0.22285878658294678,
"step": 3588
},
{
"epoch": 2.2845321451304903,
"grad_norm": 10.014175229236065,
"learning_rate": 1.6380479214604267e-06,
"loss": 0.32916271686553955,
"step": 3589
},
{
"epoch": 2.2851686823679187,
"grad_norm": 8.299736960226006,
"learning_rate": 1.6353072779117036e-06,
"loss": 0.24011826515197754,
"step": 3590
},
{
"epoch": 2.285805219605347,
"grad_norm": 9.357967429035314,
"learning_rate": 1.6325684806898468e-06,
"loss": 0.2705532908439636,
"step": 3591
},
{
"epoch": 2.286441756842775,
"grad_norm": 11.089414258649509,
"learning_rate": 1.6298315312977331e-06,
"loss": 0.8302360773086548,
"step": 3592
},
{
"epoch": 2.287078294080204,
"grad_norm": 6.278016252890907,
"learning_rate": 1.6270964312372234e-06,
"loss": 0.28905045986175537,
"step": 3593
},
{
"epoch": 2.287714831317632,
"grad_norm": 8.197033849058773,
"learning_rate": 1.6243631820091638e-06,
"loss": 0.20282121002674103,
"step": 3594
},
{
"epoch": 2.2883513685550607,
"grad_norm": 12.163942331346213,
"learning_rate": 1.6216317851133877e-06,
"loss": 0.42968446016311646,
"step": 3595
},
{
"epoch": 2.2889879057924887,
"grad_norm": 15.279159484889568,
"learning_rate": 1.6189022420487094e-06,
"loss": 0.6520296931266785,
"step": 3596
},
{
"epoch": 2.289624443029917,
"grad_norm": 20.072671279159017,
"learning_rate": 1.6161745543129254e-06,
"loss": 1.815822720527649,
"step": 3597
},
{
"epoch": 2.2902609802673455,
"grad_norm": 11.702856245846263,
"learning_rate": 1.6134487234028145e-06,
"loss": 0.49471431970596313,
"step": 3598
},
{
"epoch": 2.290897517504774,
"grad_norm": 9.614888447397261,
"learning_rate": 1.6107247508141427e-06,
"loss": 0.5017402172088623,
"step": 3599
},
{
"epoch": 2.2915340547422023,
"grad_norm": 13.845876963255792,
"learning_rate": 1.608002638041643e-06,
"loss": 0.7130351066589355,
"step": 3600
},
{
"epoch": 2.2921705919796307,
"grad_norm": 14.779216377796459,
"learning_rate": 1.6052823865790412e-06,
"loss": 0.3332129120826721,
"step": 3601
},
{
"epoch": 2.292807129217059,
"grad_norm": 15.587667274654402,
"learning_rate": 1.6025639979190344e-06,
"loss": 1.0253678560256958,
"step": 3602
},
{
"epoch": 2.2934436664544875,
"grad_norm": 9.645842356106199,
"learning_rate": 1.5998474735533004e-06,
"loss": 0.43014228343963623,
"step": 3603
},
{
"epoch": 2.294080203691916,
"grad_norm": 7.4465732312366715,
"learning_rate": 1.5971328149724901e-06,
"loss": 0.3042871356010437,
"step": 3604
},
{
"epoch": 2.2947167409293443,
"grad_norm": 12.106940306712147,
"learning_rate": 1.5944200236662372e-06,
"loss": 0.5175577402114868,
"step": 3605
},
{
"epoch": 2.2953532781667727,
"grad_norm": 8.721367854893396,
"learning_rate": 1.5917091011231455e-06,
"loss": 0.4424808621406555,
"step": 3606
},
{
"epoch": 2.295989815404201,
"grad_norm": 14.684383732784086,
"learning_rate": 1.589000048830795e-06,
"loss": 0.4613659977912903,
"step": 3607
},
{
"epoch": 2.2966263526416295,
"grad_norm": 15.347722509583157,
"learning_rate": 1.586292868275739e-06,
"loss": 0.3522476553916931,
"step": 3608
},
{
"epoch": 2.297262889879058,
"grad_norm": 9.360582647015702,
"learning_rate": 1.5835875609435042e-06,
"loss": 0.4199846684932709,
"step": 3609
},
{
"epoch": 2.2978994271164863,
"grad_norm": 19.05247901253659,
"learning_rate": 1.5808841283185888e-06,
"loss": 0.5410860776901245,
"step": 3610
},
{
"epoch": 2.2985359643539147,
"grad_norm": 9.043382369943455,
"learning_rate": 1.5781825718844612e-06,
"loss": 0.3832551836967468,
"step": 3611
},
{
"epoch": 2.299172501591343,
"grad_norm": 8.461003927432863,
"learning_rate": 1.5754828931235672e-06,
"loss": 0.6752854585647583,
"step": 3612
},
{
"epoch": 2.2998090388287715,
"grad_norm": 6.12202896232288,
"learning_rate": 1.5727850935173095e-06,
"loss": 0.14791876077651978,
"step": 3613
},
{
"epoch": 2.3004455760662,
"grad_norm": 16.87998184328102,
"learning_rate": 1.5700891745460717e-06,
"loss": 1.508462905883789,
"step": 3614
},
{
"epoch": 2.3010821133036283,
"grad_norm": 11.425713840851401,
"learning_rate": 1.5673951376891999e-06,
"loss": 0.6087247133255005,
"step": 3615
},
{
"epoch": 2.3017186505410567,
"grad_norm": 12.360023388162777,
"learning_rate": 1.5647029844250077e-06,
"loss": 0.44248509407043457,
"step": 3616
},
{
"epoch": 2.302355187778485,
"grad_norm": 12.79301570004959,
"learning_rate": 1.5620127162307741e-06,
"loss": 0.24971036612987518,
"step": 3617
},
{
"epoch": 2.3029917250159135,
"grad_norm": 11.71850432844077,
"learning_rate": 1.5593243345827485e-06,
"loss": 0.3522535562515259,
"step": 3618
},
{
"epoch": 2.303628262253342,
"grad_norm": 13.119297552670384,
"learning_rate": 1.5566378409561394e-06,
"loss": 0.3391970992088318,
"step": 3619
},
{
"epoch": 2.3042647994907703,
"grad_norm": 7.923977606616533,
"learning_rate": 1.5539532368251226e-06,
"loss": 0.14920666813850403,
"step": 3620
},
{
"epoch": 2.3049013367281987,
"grad_norm": 7.5987958105107385,
"learning_rate": 1.5512705236628344e-06,
"loss": 0.49967896938323975,
"step": 3621
},
{
"epoch": 2.305537873965627,
"grad_norm": 10.078837502478047,
"learning_rate": 1.5485897029413793e-06,
"loss": 0.24752558767795563,
"step": 3622
},
{
"epoch": 2.3061744112030556,
"grad_norm": 6.351868764551711,
"learning_rate": 1.5459107761318132e-06,
"loss": 0.31946277618408203,
"step": 3623
},
{
"epoch": 2.306810948440484,
"grad_norm": 9.89620046223258,
"learning_rate": 1.5432337447041634e-06,
"loss": 0.9290648102760315,
"step": 3624
},
{
"epoch": 2.307447485677912,
"grad_norm": 7.680230646555971,
"learning_rate": 1.5405586101274117e-06,
"loss": 0.3040928840637207,
"step": 3625
},
{
"epoch": 2.3080840229153408,
"grad_norm": 12.380153963373857,
"learning_rate": 1.5378853738694987e-06,
"loss": 0.6746101379394531,
"step": 3626
},
{
"epoch": 2.3087205601527687,
"grad_norm": 17.8068297838926,
"learning_rate": 1.535214037397325e-06,
"loss": 0.7092942595481873,
"step": 3627
},
{
"epoch": 2.3093570973901976,
"grad_norm": 12.020871176944416,
"learning_rate": 1.5325446021767465e-06,
"loss": 0.17868655920028687,
"step": 3628
},
{
"epoch": 2.3099936346276255,
"grad_norm": 11.454025233433736,
"learning_rate": 1.5298770696725824e-06,
"loss": 0.5282299518585205,
"step": 3629
},
{
"epoch": 2.310630171865054,
"grad_norm": 26.109461817262574,
"learning_rate": 1.527211441348596e-06,
"loss": 1.2998456954956055,
"step": 3630
},
{
"epoch": 2.3112667091024823,
"grad_norm": 10.725439607251838,
"learning_rate": 1.5245477186675184e-06,
"loss": 0.38805341720581055,
"step": 3631
},
{
"epoch": 2.3119032463399107,
"grad_norm": 7.801657132057306,
"learning_rate": 1.5218859030910266e-06,
"loss": 0.46178191900253296,
"step": 3632
},
{
"epoch": 2.312539783577339,
"grad_norm": 10.629175987996685,
"learning_rate": 1.5192259960797546e-06,
"loss": 0.3144703209400177,
"step": 3633
},
{
"epoch": 2.3131763208147675,
"grad_norm": 14.413967861963577,
"learning_rate": 1.5165679990932857e-06,
"loss": 0.41735124588012695,
"step": 3634
},
{
"epoch": 2.313812858052196,
"grad_norm": 9.713189535062696,
"learning_rate": 1.5139119135901632e-06,
"loss": 0.4353427290916443,
"step": 3635
},
{
"epoch": 2.3144493952896243,
"grad_norm": 14.915821662254057,
"learning_rate": 1.511257741027869e-06,
"loss": 0.6386640071868896,
"step": 3636
},
{
"epoch": 2.3150859325270527,
"grad_norm": 7.073726796970359,
"learning_rate": 1.508605482862847e-06,
"loss": 0.14773771166801453,
"step": 3637
},
{
"epoch": 2.315722469764481,
"grad_norm": 14.52182409740128,
"learning_rate": 1.5059551405504846e-06,
"loss": 0.6305415034294128,
"step": 3638
},
{
"epoch": 2.3163590070019096,
"grad_norm": 12.06601036037594,
"learning_rate": 1.5033067155451186e-06,
"loss": 0.46238037943840027,
"step": 3639
},
{
"epoch": 2.316995544239338,
"grad_norm": 14.773896058423377,
"learning_rate": 1.500660209300034e-06,
"loss": 0.4040284752845764,
"step": 3640
},
{
"epoch": 2.3176320814767664,
"grad_norm": 12.757647220731817,
"learning_rate": 1.4980156232674641e-06,
"loss": 0.7795870304107666,
"step": 3641
},
{
"epoch": 2.3182686187141948,
"grad_norm": 19.24362559783828,
"learning_rate": 1.4953729588985894e-06,
"loss": 0.9090286493301392,
"step": 3642
},
{
"epoch": 2.318905155951623,
"grad_norm": 15.672532528484432,
"learning_rate": 1.4927322176435288e-06,
"loss": 0.3751720190048218,
"step": 3643
},
{
"epoch": 2.3195416931890516,
"grad_norm": 18.742423426587592,
"learning_rate": 1.4900934009513558e-06,
"loss": 0.6430118680000305,
"step": 3644
},
{
"epoch": 2.32017823042648,
"grad_norm": 14.611731063893258,
"learning_rate": 1.487456510270081e-06,
"loss": 0.6180001497268677,
"step": 3645
},
{
"epoch": 2.3208147676639084,
"grad_norm": 15.521679418580392,
"learning_rate": 1.4848215470466614e-06,
"loss": 0.38686472177505493,
"step": 3646
},
{
"epoch": 2.3214513049013368,
"grad_norm": 16.252399738789812,
"learning_rate": 1.4821885127269936e-06,
"loss": 0.3846510946750641,
"step": 3647
},
{
"epoch": 2.322087842138765,
"grad_norm": 11.871510763089457,
"learning_rate": 1.47955740875592e-06,
"loss": 1.2616004943847656,
"step": 3648
},
{
"epoch": 2.3227243793761936,
"grad_norm": 8.79403623074544,
"learning_rate": 1.4769282365772196e-06,
"loss": 0.625005841255188,
"step": 3649
},
{
"epoch": 2.323360916613622,
"grad_norm": 10.253647287150587,
"learning_rate": 1.4743009976336132e-06,
"loss": 0.5820122361183167,
"step": 3650
},
{
"epoch": 2.3239974538510504,
"grad_norm": 9.848322146907023,
"learning_rate": 1.4716756933667592e-06,
"loss": 0.23914802074432373,
"step": 3651
},
{
"epoch": 2.324633991088479,
"grad_norm": 10.340995805837212,
"learning_rate": 1.4690523252172595e-06,
"loss": 0.21634802222251892,
"step": 3652
},
{
"epoch": 2.325270528325907,
"grad_norm": 5.760077510283241,
"learning_rate": 1.4664308946246441e-06,
"loss": 0.19866140186786652,
"step": 3653
},
{
"epoch": 2.3259070655633356,
"grad_norm": 22.605378194422197,
"learning_rate": 1.4638114030273903e-06,
"loss": 0.843527615070343,
"step": 3654
},
{
"epoch": 2.326543602800764,
"grad_norm": 12.217049871532764,
"learning_rate": 1.4611938518629048e-06,
"loss": 0.4112418591976166,
"step": 3655
},
{
"epoch": 2.3271801400381924,
"grad_norm": 17.596310727776576,
"learning_rate": 1.458578242567531e-06,
"loss": 0.7754092216491699,
"step": 3656
},
{
"epoch": 2.327816677275621,
"grad_norm": 11.569529283786105,
"learning_rate": 1.4559645765765452e-06,
"loss": 0.45173966884613037,
"step": 3657
},
{
"epoch": 2.328453214513049,
"grad_norm": 6.008874695167993,
"learning_rate": 1.4533528553241643e-06,
"loss": 0.3209451735019684,
"step": 3658
},
{
"epoch": 2.3290897517504776,
"grad_norm": 11.975411718365635,
"learning_rate": 1.4507430802435285e-06,
"loss": 0.6683560013771057,
"step": 3659
},
{
"epoch": 2.3297262889879056,
"grad_norm": 13.102032681202644,
"learning_rate": 1.4481352527667136e-06,
"loss": 0.3309083580970764,
"step": 3660
},
{
"epoch": 2.3303628262253344,
"grad_norm": 9.721885000749575,
"learning_rate": 1.4455293743247313e-06,
"loss": 0.664567232131958,
"step": 3661
},
{
"epoch": 2.3309993634627624,
"grad_norm": 10.822122824742895,
"learning_rate": 1.4429254463475179e-06,
"loss": 0.27729907631874084,
"step": 3662
},
{
"epoch": 2.3316359007001908,
"grad_norm": 10.841564476697524,
"learning_rate": 1.4403234702639418e-06,
"loss": 0.5381125211715698,
"step": 3663
},
{
"epoch": 2.332272437937619,
"grad_norm": 14.675376700233853,
"learning_rate": 1.4377234475017987e-06,
"loss": 0.738660991191864,
"step": 3664
},
{
"epoch": 2.3329089751750476,
"grad_norm": 9.306934376497246,
"learning_rate": 1.4351253794878184e-06,
"loss": 0.7385612726211548,
"step": 3665
},
{
"epoch": 2.333545512412476,
"grad_norm": 10.892424385174278,
"learning_rate": 1.4325292676476471e-06,
"loss": 0.20372740924358368,
"step": 3666
},
{
"epoch": 2.3341820496499044,
"grad_norm": 10.257811251772715,
"learning_rate": 1.4299351134058686e-06,
"loss": 0.5008155703544617,
"step": 3667
},
{
"epoch": 2.334818586887333,
"grad_norm": 13.887540797057827,
"learning_rate": 1.4273429181859861e-06,
"loss": 0.3023715913295746,
"step": 3668
},
{
"epoch": 2.335455124124761,
"grad_norm": 8.847549201264535,
"learning_rate": 1.4247526834104303e-06,
"loss": 0.3329947888851166,
"step": 3669
},
{
"epoch": 2.3360916613621896,
"grad_norm": 5.95348852151552,
"learning_rate": 1.4221644105005528e-06,
"loss": 0.29144102334976196,
"step": 3670
},
{
"epoch": 2.336728198599618,
"grad_norm": 14.170395427490837,
"learning_rate": 1.4195781008766345e-06,
"loss": 0.6279107332229614,
"step": 3671
},
{
"epoch": 2.3373647358370464,
"grad_norm": 7.372939737730689,
"learning_rate": 1.416993755957875e-06,
"loss": 0.215074822306633,
"step": 3672
},
{
"epoch": 2.338001273074475,
"grad_norm": 12.742643327109723,
"learning_rate": 1.4144113771623957e-06,
"loss": 0.496043860912323,
"step": 3673
},
{
"epoch": 2.338637810311903,
"grad_norm": 10.232372103690514,
"learning_rate": 1.4118309659072387e-06,
"loss": 0.9400693774223328,
"step": 3674
},
{
"epoch": 2.3392743475493316,
"grad_norm": 9.038286266911594,
"learning_rate": 1.4092525236083721e-06,
"loss": 0.42702949047088623,
"step": 3675
},
{
"epoch": 2.33991088478676,
"grad_norm": 15.18231475470037,
"learning_rate": 1.406676051680675e-06,
"loss": 0.3502260446548462,
"step": 3676
},
{
"epoch": 2.3405474220241884,
"grad_norm": 21.9055595938898,
"learning_rate": 1.4041015515379486e-06,
"loss": 1.3659839630126953,
"step": 3677
},
{
"epoch": 2.341183959261617,
"grad_norm": 9.562326576124976,
"learning_rate": 1.401529024592918e-06,
"loss": 0.4016745090484619,
"step": 3678
},
{
"epoch": 2.3418204964990452,
"grad_norm": 8.122059425645102,
"learning_rate": 1.3989584722572143e-06,
"loss": 0.2181989550590515,
"step": 3679
},
{
"epoch": 2.3424570337364736,
"grad_norm": 8.775741385145263,
"learning_rate": 1.3963898959413958e-06,
"loss": 0.5251392126083374,
"step": 3680
},
{
"epoch": 2.343093570973902,
"grad_norm": 12.806934367488545,
"learning_rate": 1.393823297054931e-06,
"loss": 0.3657844066619873,
"step": 3681
},
{
"epoch": 2.3437301082113304,
"grad_norm": 11.410244101635918,
"learning_rate": 1.3912586770062024e-06,
"loss": 0.6960375905036926,
"step": 3682
},
{
"epoch": 2.344366645448759,
"grad_norm": 9.2192755074986,
"learning_rate": 1.3886960372025093e-06,
"loss": 0.5058915019035339,
"step": 3683
},
{
"epoch": 2.3450031826861872,
"grad_norm": 12.820251241137715,
"learning_rate": 1.3861353790500648e-06,
"loss": 0.48139941692352295,
"step": 3684
},
{
"epoch": 2.3456397199236156,
"grad_norm": 9.015032547221407,
"learning_rate": 1.3835767039539927e-06,
"loss": 0.39420944452285767,
"step": 3685
},
{
"epoch": 2.346276257161044,
"grad_norm": 11.20665558473499,
"learning_rate": 1.3810200133183294e-06,
"loss": 0.2258508801460266,
"step": 3686
},
{
"epoch": 2.3469127943984724,
"grad_norm": 16.92148946423977,
"learning_rate": 1.3784653085460198e-06,
"loss": 0.7787349224090576,
"step": 3687
},
{
"epoch": 2.347549331635901,
"grad_norm": 9.930702156235032,
"learning_rate": 1.3759125910389265e-06,
"loss": 0.4799356460571289,
"step": 3688
},
{
"epoch": 2.3481858688733293,
"grad_norm": 9.677716675408867,
"learning_rate": 1.3733618621978106e-06,
"loss": 0.8267190456390381,
"step": 3689
},
{
"epoch": 2.3488224061107577,
"grad_norm": 10.166112821165283,
"learning_rate": 1.3708131234223526e-06,
"loss": 0.39371243119239807,
"step": 3690
},
{
"epoch": 2.349458943348186,
"grad_norm": 10.575227773552253,
"learning_rate": 1.3682663761111348e-06,
"loss": 0.26244211196899414,
"step": 3691
},
{
"epoch": 2.3500954805856145,
"grad_norm": 9.545223561877174,
"learning_rate": 1.3657216216616476e-06,
"loss": 0.35921522974967957,
"step": 3692
},
{
"epoch": 2.3507320178230424,
"grad_norm": 15.089034021230734,
"learning_rate": 1.3631788614702896e-06,
"loss": 0.3103064000606537,
"step": 3693
},
{
"epoch": 2.3513685550604713,
"grad_norm": 12.757867285846213,
"learning_rate": 1.3606380969323625e-06,
"loss": 0.8469514846801758,
"step": 3694
},
{
"epoch": 2.3520050922978992,
"grad_norm": 10.219037032437837,
"learning_rate": 1.3580993294420775e-06,
"loss": 0.25363725423812866,
"step": 3695
},
{
"epoch": 2.3526416295353276,
"grad_norm": 9.395656924052952,
"learning_rate": 1.355562560392542e-06,
"loss": 0.2070430964231491,
"step": 3696
},
{
"epoch": 2.353278166772756,
"grad_norm": 13.838000505289177,
"learning_rate": 1.3530277911757756e-06,
"loss": 0.3234326243400574,
"step": 3697
},
{
"epoch": 2.3539147040101844,
"grad_norm": 8.125871300091289,
"learning_rate": 1.3504950231826958e-06,
"loss": 0.40520572662353516,
"step": 3698
},
{
"epoch": 2.354551241247613,
"grad_norm": 9.23480525202698,
"learning_rate": 1.3479642578031216e-06,
"loss": 0.22022618353366852,
"step": 3699
},
{
"epoch": 2.3551877784850412,
"grad_norm": 14.856183006106253,
"learning_rate": 1.3454354964257737e-06,
"loss": 0.5824393033981323,
"step": 3700
},
{
"epoch": 2.3558243157224696,
"grad_norm": 9.315125647702207,
"learning_rate": 1.3429087404382773e-06,
"loss": 0.44092607498168945,
"step": 3701
},
{
"epoch": 2.356460852959898,
"grad_norm": 13.949875737013524,
"learning_rate": 1.3403839912271482e-06,
"loss": 1.0444612503051758,
"step": 3702
},
{
"epoch": 2.3570973901973264,
"grad_norm": 14.252694461991577,
"learning_rate": 1.337861250177811e-06,
"loss": 0.6289519667625427,
"step": 3703
},
{
"epoch": 2.357733927434755,
"grad_norm": 10.892049820100306,
"learning_rate": 1.335340518674581e-06,
"loss": 0.4143725335597992,
"step": 3704
},
{
"epoch": 2.3583704646721833,
"grad_norm": 7.999663674342672,
"learning_rate": 1.3328217981006742e-06,
"loss": 0.28366735577583313,
"step": 3705
},
{
"epoch": 2.3590070019096117,
"grad_norm": 7.264284970868845,
"learning_rate": 1.3303050898382015e-06,
"loss": 0.26763850450515747,
"step": 3706
},
{
"epoch": 2.35964353914704,
"grad_norm": 8.267938251337856,
"learning_rate": 1.327790395268172e-06,
"loss": 0.29509538412094116,
"step": 3707
},
{
"epoch": 2.3602800763844685,
"grad_norm": 17.361067481080763,
"learning_rate": 1.3252777157704876e-06,
"loss": 0.35680949687957764,
"step": 3708
},
{
"epoch": 2.360916613621897,
"grad_norm": 10.301732022860866,
"learning_rate": 1.3227670527239455e-06,
"loss": 0.30002152919769287,
"step": 3709
},
{
"epoch": 2.3615531508593253,
"grad_norm": 14.611437530806262,
"learning_rate": 1.320258407506236e-06,
"loss": 0.4250519871711731,
"step": 3710
},
{
"epoch": 2.3621896880967537,
"grad_norm": 12.930521859583482,
"learning_rate": 1.3177517814939411e-06,
"loss": 0.5760447978973389,
"step": 3711
},
{
"epoch": 2.362826225334182,
"grad_norm": 10.870235332190978,
"learning_rate": 1.3152471760625368e-06,
"loss": 0.2658051550388336,
"step": 3712
},
{
"epoch": 2.3634627625716105,
"grad_norm": 7.405506116873889,
"learning_rate": 1.3127445925863875e-06,
"loss": 0.23447053134441376,
"step": 3713
},
{
"epoch": 2.364099299809039,
"grad_norm": 16.75744131230429,
"learning_rate": 1.3102440324387534e-06,
"loss": 0.5982019901275635,
"step": 3714
},
{
"epoch": 2.3647358370464673,
"grad_norm": 10.673282715242074,
"learning_rate": 1.307745496991779e-06,
"loss": 0.5700613260269165,
"step": 3715
},
{
"epoch": 2.3653723742838957,
"grad_norm": 8.21014487686139,
"learning_rate": 1.3052489876165004e-06,
"loss": 0.41640716791152954,
"step": 3716
},
{
"epoch": 2.366008911521324,
"grad_norm": 9.297449050392139,
"learning_rate": 1.3027545056828395e-06,
"loss": 0.34303605556488037,
"step": 3717
},
{
"epoch": 2.3666454487587525,
"grad_norm": 8.899360190569867,
"learning_rate": 1.3002620525596116e-06,
"loss": 0.4346969425678253,
"step": 3718
},
{
"epoch": 2.367281985996181,
"grad_norm": 8.992667699431827,
"learning_rate": 1.2977716296145093e-06,
"loss": 0.4370039999485016,
"step": 3719
},
{
"epoch": 2.3679185232336093,
"grad_norm": 11.313717968304134,
"learning_rate": 1.2952832382141207e-06,
"loss": 0.47830891609191895,
"step": 3720
},
{
"epoch": 2.3685550604710377,
"grad_norm": 10.377423833062291,
"learning_rate": 1.2927968797239133e-06,
"loss": 0.36754053831100464,
"step": 3721
},
{
"epoch": 2.369191597708466,
"grad_norm": 12.771258128785176,
"learning_rate": 1.2903125555082402e-06,
"loss": 0.35375094413757324,
"step": 3722
},
{
"epoch": 2.3698281349458945,
"grad_norm": 9.387597548109925,
"learning_rate": 1.2878302669303377e-06,
"loss": 0.7419187426567078,
"step": 3723
},
{
"epoch": 2.370464672183323,
"grad_norm": 14.36263963222831,
"learning_rate": 1.2853500153523308e-06,
"loss": 0.433469295501709,
"step": 3724
},
{
"epoch": 2.3711012094207513,
"grad_norm": 15.116553305119123,
"learning_rate": 1.2828718021352155e-06,
"loss": 0.8447028398513794,
"step": 3725
},
{
"epoch": 2.3717377466581793,
"grad_norm": 16.418494635265372,
"learning_rate": 1.2803956286388808e-06,
"loss": 0.2990822196006775,
"step": 3726
},
{
"epoch": 2.372374283895608,
"grad_norm": 12.237072180575987,
"learning_rate": 1.2779214962220888e-06,
"loss": 0.6836670637130737,
"step": 3727
},
{
"epoch": 2.373010821133036,
"grad_norm": 12.27843950813826,
"learning_rate": 1.2754494062424854e-06,
"loss": 0.403628408908844,
"step": 3728
},
{
"epoch": 2.373647358370465,
"grad_norm": 11.134214718224426,
"learning_rate": 1.2729793600565937e-06,
"loss": 0.38044747710227966,
"step": 3729
},
{
"epoch": 2.374283895607893,
"grad_norm": 15.165916020979232,
"learning_rate": 1.2705113590198155e-06,
"loss": 1.098738670349121,
"step": 3730
},
{
"epoch": 2.3749204328453213,
"grad_norm": 8.261495854160689,
"learning_rate": 1.2680454044864342e-06,
"loss": 0.2793627977371216,
"step": 3731
},
{
"epoch": 2.3755569700827497,
"grad_norm": 7.863877940829437,
"learning_rate": 1.265581497809602e-06,
"loss": 0.3842894434928894,
"step": 3732
},
{
"epoch": 2.376193507320178,
"grad_norm": 11.758657961547582,
"learning_rate": 1.2631196403413565e-06,
"loss": 1.5793788433074951,
"step": 3733
},
{
"epoch": 2.3768300445576065,
"grad_norm": 17.24367253985176,
"learning_rate": 1.2606598334326049e-06,
"loss": 0.8001893162727356,
"step": 3734
},
{
"epoch": 2.377466581795035,
"grad_norm": 15.980789517537973,
"learning_rate": 1.2582020784331318e-06,
"loss": 0.6164204478263855,
"step": 3735
},
{
"epoch": 2.3781031190324633,
"grad_norm": 15.81210521828585,
"learning_rate": 1.2557463766915922e-06,
"loss": 0.4665430784225464,
"step": 3736
},
{
"epoch": 2.3787396562698917,
"grad_norm": 10.837136433926592,
"learning_rate": 1.2532927295555214e-06,
"loss": 0.31216520071029663,
"step": 3737
},
{
"epoch": 2.37937619350732,
"grad_norm": 15.59644961979668,
"learning_rate": 1.250841138371321e-06,
"loss": 0.23173931241035461,
"step": 3738
},
{
"epoch": 2.3800127307447485,
"grad_norm": 27.273050075745154,
"learning_rate": 1.2483916044842665e-06,
"loss": 0.5167461037635803,
"step": 3739
},
{
"epoch": 2.380649267982177,
"grad_norm": 8.845762808947617,
"learning_rate": 1.2459441292385049e-06,
"loss": 0.5043585300445557,
"step": 3740
},
{
"epoch": 2.3812858052196053,
"grad_norm": 9.78578565378147,
"learning_rate": 1.2434987139770522e-06,
"loss": 0.3009123206138611,
"step": 3741
},
{
"epoch": 2.3819223424570337,
"grad_norm": 10.419282093700664,
"learning_rate": 1.2410553600417946e-06,
"loss": 1.1111235618591309,
"step": 3742
},
{
"epoch": 2.382558879694462,
"grad_norm": 12.761737956411759,
"learning_rate": 1.2386140687734898e-06,
"loss": 1.0053675174713135,
"step": 3743
},
{
"epoch": 2.3831954169318905,
"grad_norm": 7.132611172936865,
"learning_rate": 1.2361748415117619e-06,
"loss": 0.28074517846107483,
"step": 3744
},
{
"epoch": 2.383831954169319,
"grad_norm": 12.054551595891324,
"learning_rate": 1.2337376795950967e-06,
"loss": 0.26301613450050354,
"step": 3745
},
{
"epoch": 2.3844684914067473,
"grad_norm": 21.629165559579967,
"learning_rate": 1.2313025843608566e-06,
"loss": 0.4631958603858948,
"step": 3746
},
{
"epoch": 2.3851050286441757,
"grad_norm": 11.758611509943327,
"learning_rate": 1.2288695571452636e-06,
"loss": 0.5927736163139343,
"step": 3747
},
{
"epoch": 2.385741565881604,
"grad_norm": 10.403057055436687,
"learning_rate": 1.2264385992834072e-06,
"loss": 0.4360220432281494,
"step": 3748
},
{
"epoch": 2.3863781031190325,
"grad_norm": 13.989298831570544,
"learning_rate": 1.2240097121092382e-06,
"loss": 0.23291362822055817,
"step": 3749
},
{
"epoch": 2.387014640356461,
"grad_norm": 18.028698512975225,
"learning_rate": 1.2215828969555771e-06,
"loss": 0.2985227108001709,
"step": 3750
},
{
"epoch": 2.3876511775938893,
"grad_norm": 10.613505530236429,
"learning_rate": 1.219158155154102e-06,
"loss": 0.44736921787261963,
"step": 3751
},
{
"epoch": 2.3882877148313177,
"grad_norm": 12.855100341515485,
"learning_rate": 1.216735488035356e-06,
"loss": 0.44182783365249634,
"step": 3752
},
{
"epoch": 2.388924252068746,
"grad_norm": 10.414754482940225,
"learning_rate": 1.2143148969287405e-06,
"loss": 0.4356725215911865,
"step": 3753
},
{
"epoch": 2.3895607893061745,
"grad_norm": 8.769305233582422,
"learning_rate": 1.2118963831625252e-06,
"loss": 0.43675944209098816,
"step": 3754
},
{
"epoch": 2.390197326543603,
"grad_norm": 14.660117839785551,
"learning_rate": 1.2094799480638287e-06,
"loss": 0.7107505798339844,
"step": 3755
},
{
"epoch": 2.3908338637810314,
"grad_norm": 11.421187401800076,
"learning_rate": 1.2070655929586395e-06,
"loss": 0.4475456476211548,
"step": 3756
},
{
"epoch": 2.3914704010184598,
"grad_norm": 9.641057919738367,
"learning_rate": 1.2046533191717985e-06,
"loss": 0.5334263443946838,
"step": 3757
},
{
"epoch": 2.392106938255888,
"grad_norm": 9.625328248093474,
"learning_rate": 1.2022431280270075e-06,
"loss": 0.4311234652996063,
"step": 3758
},
{
"epoch": 2.392743475493316,
"grad_norm": 16.962406680330254,
"learning_rate": 1.1998350208468217e-06,
"loss": 0.8091346621513367,
"step": 3759
},
{
"epoch": 2.393380012730745,
"grad_norm": 13.138031971976169,
"learning_rate": 1.197428998952659e-06,
"loss": 0.5169646143913269,
"step": 3760
},
{
"epoch": 2.394016549968173,
"grad_norm": 22.08801596951749,
"learning_rate": 1.1950250636647887e-06,
"loss": 0.6854633092880249,
"step": 3761
},
{
"epoch": 2.3946530872056018,
"grad_norm": 11.698573413964109,
"learning_rate": 1.192623216302332e-06,
"loss": 0.44086697697639465,
"step": 3762
},
{
"epoch": 2.3952896244430297,
"grad_norm": 15.886156214297149,
"learning_rate": 1.1902234581832723e-06,
"loss": 0.6274378895759583,
"step": 3763
},
{
"epoch": 2.395926161680458,
"grad_norm": 12.206434564474439,
"learning_rate": 1.1878257906244412e-06,
"loss": 0.2722568213939667,
"step": 3764
},
{
"epoch": 2.3965626989178865,
"grad_norm": 12.071911618002728,
"learning_rate": 1.1854302149415242e-06,
"loss": 0.7645981907844543,
"step": 3765
},
{
"epoch": 2.397199236155315,
"grad_norm": 16.724975946030447,
"learning_rate": 1.1830367324490577e-06,
"loss": 1.381920576095581,
"step": 3766
},
{
"epoch": 2.3978357733927433,
"grad_norm": 13.074704894380814,
"learning_rate": 1.1806453444604354e-06,
"loss": 0.23027241230010986,
"step": 3767
},
{
"epoch": 2.3984723106301717,
"grad_norm": 7.501926380582122,
"learning_rate": 1.178256052287891e-06,
"loss": 0.773628830909729,
"step": 3768
},
{
"epoch": 2.3991088478676,
"grad_norm": 10.976355752611195,
"learning_rate": 1.1758688572425191e-06,
"loss": 0.28537803888320923,
"step": 3769
},
{
"epoch": 2.3997453851050286,
"grad_norm": 15.216087889854332,
"learning_rate": 1.173483760634257e-06,
"loss": 0.3113643527030945,
"step": 3770
},
{
"epoch": 2.400381922342457,
"grad_norm": 9.511957746561064,
"learning_rate": 1.1711007637718925e-06,
"loss": 0.25821590423583984,
"step": 3771
},
{
"epoch": 2.4010184595798854,
"grad_norm": 13.948635189961193,
"learning_rate": 1.1687198679630586e-06,
"loss": 0.37554019689559937,
"step": 3772
},
{
"epoch": 2.4016549968173138,
"grad_norm": 9.362461620607593,
"learning_rate": 1.1663410745142416e-06,
"loss": 0.2878197133541107,
"step": 3773
},
{
"epoch": 2.402291534054742,
"grad_norm": 9.39243982324823,
"learning_rate": 1.1639643847307685e-06,
"loss": 0.3661644160747528,
"step": 3774
},
{
"epoch": 2.4029280712921706,
"grad_norm": 10.569202056049091,
"learning_rate": 1.161589799916814e-06,
"loss": 0.588375449180603,
"step": 3775
},
{
"epoch": 2.403564608529599,
"grad_norm": 9.96661944037226,
"learning_rate": 1.159217321375396e-06,
"loss": 0.5742862224578857,
"step": 3776
},
{
"epoch": 2.4042011457670274,
"grad_norm": 22.19818033196068,
"learning_rate": 1.1568469504083818e-06,
"loss": 0.49878042936325073,
"step": 3777
},
{
"epoch": 2.4048376830044558,
"grad_norm": 17.590500198044325,
"learning_rate": 1.1544786883164743e-06,
"loss": 0.4632866382598877,
"step": 3778
},
{
"epoch": 2.405474220241884,
"grad_norm": 14.108065744139283,
"learning_rate": 1.152112536399224e-06,
"loss": 0.38641777634620667,
"step": 3779
},
{
"epoch": 2.4061107574793126,
"grad_norm": 8.107163245572407,
"learning_rate": 1.1497484959550254e-06,
"loss": 0.35480377078056335,
"step": 3780
},
{
"epoch": 2.406747294716741,
"grad_norm": 10.467818303600838,
"learning_rate": 1.1473865682811097e-06,
"loss": 0.4747023582458496,
"step": 3781
},
{
"epoch": 2.4073838319541694,
"grad_norm": 12.019573075340698,
"learning_rate": 1.1450267546735516e-06,
"loss": 0.8633090257644653,
"step": 3782
},
{
"epoch": 2.408020369191598,
"grad_norm": 7.77339547449022,
"learning_rate": 1.1426690564272647e-06,
"loss": 0.5159689784049988,
"step": 3783
},
{
"epoch": 2.408656906429026,
"grad_norm": 12.864002851351529,
"learning_rate": 1.1403134748360023e-06,
"loss": 0.561131477355957,
"step": 3784
},
{
"epoch": 2.4092934436664546,
"grad_norm": 9.417541984936822,
"learning_rate": 1.1379600111923538e-06,
"loss": 0.14377526938915253,
"step": 3785
},
{
"epoch": 2.409929980903883,
"grad_norm": 14.314802212997668,
"learning_rate": 1.1356086667877526e-06,
"loss": 0.7224687337875366,
"step": 3786
},
{
"epoch": 2.4105665181413114,
"grad_norm": 11.384630848147092,
"learning_rate": 1.1332594429124633e-06,
"loss": 0.20363056659698486,
"step": 3787
},
{
"epoch": 2.41120305537874,
"grad_norm": 9.607657429807452,
"learning_rate": 1.1309123408555883e-06,
"loss": 0.5656764507293701,
"step": 3788
},
{
"epoch": 2.411839592616168,
"grad_norm": 9.635754273335122,
"learning_rate": 1.1285673619050657e-06,
"loss": 0.3478263318538666,
"step": 3789
},
{
"epoch": 2.4124761298535966,
"grad_norm": 8.287801158986275,
"learning_rate": 1.1262245073476725e-06,
"loss": 0.3498222231864929,
"step": 3790
},
{
"epoch": 2.413112667091025,
"grad_norm": 10.369047085113845,
"learning_rate": 1.123883778469011e-06,
"loss": 0.40166744589805603,
"step": 3791
},
{
"epoch": 2.413749204328453,
"grad_norm": 10.61015162872025,
"learning_rate": 1.1215451765535273e-06,
"loss": 0.3910064697265625,
"step": 3792
},
{
"epoch": 2.414385741565882,
"grad_norm": 9.242009512504998,
"learning_rate": 1.1192087028844945e-06,
"loss": 0.7114223837852478,
"step": 3793
},
{
"epoch": 2.4150222788033098,
"grad_norm": 16.541659582866036,
"learning_rate": 1.1168743587440179e-06,
"loss": 1.0183207988739014,
"step": 3794
},
{
"epoch": 2.4156588160407386,
"grad_norm": 14.033990169863475,
"learning_rate": 1.1145421454130361e-06,
"loss": 0.44376140832901,
"step": 3795
},
{
"epoch": 2.4162953532781666,
"grad_norm": 11.656556714232906,
"learning_rate": 1.1122120641713159e-06,
"loss": 0.25253862142562866,
"step": 3796
},
{
"epoch": 2.416931890515595,
"grad_norm": 12.314637203602851,
"learning_rate": 1.1098841162974605e-06,
"loss": 0.4938369393348694,
"step": 3797
},
{
"epoch": 2.4175684277530234,
"grad_norm": 12.14700600555856,
"learning_rate": 1.1075583030688924e-06,
"loss": 0.28885698318481445,
"step": 3798
},
{
"epoch": 2.418204964990452,
"grad_norm": 11.805020765397705,
"learning_rate": 1.1052346257618724e-06,
"loss": 0.3935962915420532,
"step": 3799
},
{
"epoch": 2.41884150222788,
"grad_norm": 14.522669527435724,
"learning_rate": 1.1029130856514835e-06,
"loss": 0.2798091769218445,
"step": 3800
},
{
"epoch": 2.4194780394653086,
"grad_norm": 17.99675844279146,
"learning_rate": 1.1005936840116377e-06,
"loss": 2.819949150085449,
"step": 3801
},
{
"epoch": 2.420114576702737,
"grad_norm": 14.060814447743827,
"learning_rate": 1.0982764221150715e-06,
"loss": 0.412301629781723,
"step": 3802
},
{
"epoch": 2.4207511139401654,
"grad_norm": 11.723081349594906,
"learning_rate": 1.0959613012333526e-06,
"loss": 0.25898265838623047,
"step": 3803
},
{
"epoch": 2.421387651177594,
"grad_norm": 7.855819697294254,
"learning_rate": 1.0936483226368684e-06,
"loss": 0.16626277565956116,
"step": 3804
},
{
"epoch": 2.422024188415022,
"grad_norm": 8.328223383028789,
"learning_rate": 1.0913374875948329e-06,
"loss": 0.28895729780197144,
"step": 3805
},
{
"epoch": 2.4226607256524506,
"grad_norm": 10.557735560596633,
"learning_rate": 1.0890287973752845e-06,
"loss": 0.5577061176300049,
"step": 3806
},
{
"epoch": 2.423297262889879,
"grad_norm": 8.996233245177725,
"learning_rate": 1.0867222532450823e-06,
"loss": 1.2347557544708252,
"step": 3807
},
{
"epoch": 2.4239338001273074,
"grad_norm": 14.016253040281782,
"learning_rate": 1.0844178564699092e-06,
"loss": 1.2900872230529785,
"step": 3808
},
{
"epoch": 2.424570337364736,
"grad_norm": 10.886693344909176,
"learning_rate": 1.082115608314272e-06,
"loss": 0.40640875697135925,
"step": 3809
},
{
"epoch": 2.425206874602164,
"grad_norm": 12.222734334172792,
"learning_rate": 1.0798155100414953e-06,
"loss": 0.427412748336792,
"step": 3810
},
{
"epoch": 2.4258434118395926,
"grad_norm": 8.353529674874979,
"learning_rate": 1.0775175629137252e-06,
"loss": 0.2718370258808136,
"step": 3811
},
{
"epoch": 2.426479949077021,
"grad_norm": 13.897497422723054,
"learning_rate": 1.0752217681919263e-06,
"loss": 0.5335069894790649,
"step": 3812
},
{
"epoch": 2.4271164863144494,
"grad_norm": 15.600924585359062,
"learning_rate": 1.0729281271358838e-06,
"loss": 0.4916003346443176,
"step": 3813
},
{
"epoch": 2.427753023551878,
"grad_norm": 8.757702880845267,
"learning_rate": 1.0706366410042006e-06,
"loss": 0.2865592837333679,
"step": 3814
},
{
"epoch": 2.4283895607893062,
"grad_norm": 12.820178121170283,
"learning_rate": 1.0683473110542946e-06,
"loss": 0.29609590768814087,
"step": 3815
},
{
"epoch": 2.4290260980267346,
"grad_norm": 7.820138710598236,
"learning_rate": 1.0660601385424063e-06,
"loss": 0.5924516916275024,
"step": 3816
},
{
"epoch": 2.429662635264163,
"grad_norm": 11.94479420786885,
"learning_rate": 1.0637751247235866e-06,
"loss": 0.7252217531204224,
"step": 3817
},
{
"epoch": 2.4302991725015914,
"grad_norm": 9.864533670982793,
"learning_rate": 1.061492270851705e-06,
"loss": 0.22076858580112457,
"step": 3818
},
{
"epoch": 2.43093570973902,
"grad_norm": 6.5697555037533775,
"learning_rate": 1.0592115781794427e-06,
"loss": 0.22088488936424255,
"step": 3819
},
{
"epoch": 2.4315722469764482,
"grad_norm": 11.366776949775208,
"learning_rate": 1.0569330479583019e-06,
"loss": 0.31297528743743896,
"step": 3820
},
{
"epoch": 2.4322087842138767,
"grad_norm": 14.907722172487633,
"learning_rate": 1.0546566814385866e-06,
"loss": 0.6428528428077698,
"step": 3821
},
{
"epoch": 2.432845321451305,
"grad_norm": 9.9467471887832,
"learning_rate": 1.052382479869426e-06,
"loss": 0.6574569940567017,
"step": 3822
},
{
"epoch": 2.4334818586887335,
"grad_norm": 17.2034045866165,
"learning_rate": 1.0501104444987536e-06,
"loss": 0.49759557843208313,
"step": 3823
},
{
"epoch": 2.434118395926162,
"grad_norm": 12.884996001729155,
"learning_rate": 1.0478405765733157e-06,
"loss": 0.33131590485572815,
"step": 3824
},
{
"epoch": 2.4347549331635903,
"grad_norm": 9.493850689262331,
"learning_rate": 1.0455728773386691e-06,
"loss": 0.675175130367279,
"step": 3825
},
{
"epoch": 2.4353914704010187,
"grad_norm": 17.20863386024244,
"learning_rate": 1.0433073480391848e-06,
"loss": 0.3058862090110779,
"step": 3826
},
{
"epoch": 2.4360280076384466,
"grad_norm": 10.466162475421772,
"learning_rate": 1.0410439899180347e-06,
"loss": 0.3168693482875824,
"step": 3827
},
{
"epoch": 2.4366645448758755,
"grad_norm": 13.166745994255525,
"learning_rate": 1.0387828042172072e-06,
"loss": 1.440507411956787,
"step": 3828
},
{
"epoch": 2.4373010821133034,
"grad_norm": 10.833093993085129,
"learning_rate": 1.0365237921774952e-06,
"loss": 0.30918723344802856,
"step": 3829
},
{
"epoch": 2.437937619350732,
"grad_norm": 11.116131242470681,
"learning_rate": 1.0342669550384982e-06,
"loss": 0.428306519985199,
"step": 3830
},
{
"epoch": 2.4385741565881602,
"grad_norm": 10.634908492128861,
"learning_rate": 1.032012294038624e-06,
"loss": 0.19637051224708557,
"step": 3831
},
{
"epoch": 2.4392106938255886,
"grad_norm": 9.013619391139507,
"learning_rate": 1.0297598104150836e-06,
"loss": 0.37704598903656006,
"step": 3832
},
{
"epoch": 2.439847231063017,
"grad_norm": 9.922329764110929,
"learning_rate": 1.0275095054038998e-06,
"loss": 0.25277867913246155,
"step": 3833
},
{
"epoch": 2.4404837683004454,
"grad_norm": 9.473928918102837,
"learning_rate": 1.0252613802398887e-06,
"loss": 0.5325944423675537,
"step": 3834
},
{
"epoch": 2.441120305537874,
"grad_norm": 11.327363252811761,
"learning_rate": 1.023015436156682e-06,
"loss": 0.3226351737976074,
"step": 3835
},
{
"epoch": 2.4417568427753022,
"grad_norm": 12.635263437424054,
"learning_rate": 1.020771674386707e-06,
"loss": 0.3378393352031708,
"step": 3836
},
{
"epoch": 2.4423933800127307,
"grad_norm": 8.004090298554114,
"learning_rate": 1.0185300961611965e-06,
"loss": 0.3406223654747009,
"step": 3837
},
{
"epoch": 2.443029917250159,
"grad_norm": 8.953996713880791,
"learning_rate": 1.0162907027101826e-06,
"loss": 0.5684438943862915,
"step": 3838
},
{
"epoch": 2.4436664544875875,
"grad_norm": 8.149317314762413,
"learning_rate": 1.014053495262503e-06,
"loss": 0.1832207292318344,
"step": 3839
},
{
"epoch": 2.444302991725016,
"grad_norm": 13.813658586031599,
"learning_rate": 1.011818475045792e-06,
"loss": 0.2514493465423584,
"step": 3840
},
{
"epoch": 2.4449395289624443,
"grad_norm": 9.074076999622344,
"learning_rate": 1.0095856432864847e-06,
"loss": 0.30189287662506104,
"step": 3841
},
{
"epoch": 2.4455760661998727,
"grad_norm": 13.241646581537935,
"learning_rate": 1.007355001209815e-06,
"loss": 0.4534422755241394,
"step": 3842
},
{
"epoch": 2.446212603437301,
"grad_norm": 6.572917170512304,
"learning_rate": 1.0051265500398183e-06,
"loss": 0.3295198678970337,
"step": 3843
},
{
"epoch": 2.4468491406747295,
"grad_norm": 10.286209203796565,
"learning_rate": 1.0029002909993207e-06,
"loss": 0.2722637951374054,
"step": 3844
},
{
"epoch": 2.447485677912158,
"grad_norm": 11.733757980427388,
"learning_rate": 1.0006762253099544e-06,
"loss": 0.42865797877311707,
"step": 3845
},
{
"epoch": 2.4481222151495863,
"grad_norm": 9.607900959176765,
"learning_rate": 9.984543541921414e-07,
"loss": 0.7750614285469055,
"step": 3846
},
{
"epoch": 2.4487587523870147,
"grad_norm": 14.099006354407399,
"learning_rate": 9.962346788651024e-07,
"loss": 0.3322318196296692,
"step": 3847
},
{
"epoch": 2.449395289624443,
"grad_norm": 9.773312345373958,
"learning_rate": 9.940172005468513e-07,
"loss": 0.2014181762933731,
"step": 3848
},
{
"epoch": 2.4500318268618715,
"grad_norm": 11.983172594501728,
"learning_rate": 9.918019204541979e-07,
"loss": 0.28286194801330566,
"step": 3849
},
{
"epoch": 2.4506683640993,
"grad_norm": 13.473466908382198,
"learning_rate": 9.89588839802746e-07,
"loss": 0.6655032634735107,
"step": 3850
},
{
"epoch": 2.4513049013367283,
"grad_norm": 6.089497199138919,
"learning_rate": 9.8737795980689e-07,
"loss": 0.15333639085292816,
"step": 3851
},
{
"epoch": 2.4519414385741567,
"grad_norm": 10.061578182572855,
"learning_rate": 9.851692816798214e-07,
"loss": 0.7303205132484436,
"step": 3852
},
{
"epoch": 2.452577975811585,
"grad_norm": 10.28287128520656,
"learning_rate": 9.829628066335196e-07,
"loss": 0.23664222657680511,
"step": 3853
},
{
"epoch": 2.4532145130490135,
"grad_norm": 11.125450201620936,
"learning_rate": 9.807585358787552e-07,
"loss": 0.7476584911346436,
"step": 3854
},
{
"epoch": 2.453851050286442,
"grad_norm": 14.15945730067247,
"learning_rate": 9.785564706250895e-07,
"loss": 0.39169731736183167,
"step": 3855
},
{
"epoch": 2.4544875875238703,
"grad_norm": 11.623734157450794,
"learning_rate": 9.763566120808788e-07,
"loss": 0.30617618560791016,
"step": 3856
},
{
"epoch": 2.4551241247612987,
"grad_norm": 17.45937790047464,
"learning_rate": 9.74158961453257e-07,
"loss": 0.501999020576477,
"step": 3857
},
{
"epoch": 2.455760661998727,
"grad_norm": 11.718341948276963,
"learning_rate": 9.719635199481586e-07,
"loss": 0.41996297240257263,
"step": 3858
},
{
"epoch": 2.4563971992361555,
"grad_norm": 9.199374022804443,
"learning_rate": 9.697702887702992e-07,
"loss": 0.5343721508979797,
"step": 3859
},
{
"epoch": 2.4570337364735835,
"grad_norm": 10.281772022429909,
"learning_rate": 9.675792691231829e-07,
"loss": 0.45031020045280457,
"step": 3860
},
{
"epoch": 2.4576702737110123,
"grad_norm": 11.539028460864573,
"learning_rate": 9.653904622090988e-07,
"loss": 0.7029082775115967,
"step": 3861
},
{
"epoch": 2.4583068109484403,
"grad_norm": 8.411805511544255,
"learning_rate": 9.632038692291257e-07,
"loss": 0.54490065574646,
"step": 3862
},
{
"epoch": 2.4589433481858687,
"grad_norm": 11.488121758279865,
"learning_rate": 9.61019491383125e-07,
"loss": 0.18793101608753204,
"step": 3863
},
{
"epoch": 2.459579885423297,
"grad_norm": 11.843548707182343,
"learning_rate": 9.588373298697396e-07,
"loss": 1.120024561882019,
"step": 3864
},
{
"epoch": 2.4602164226607255,
"grad_norm": 10.143041543062624,
"learning_rate": 9.56657385886403e-07,
"loss": 0.35088586807250977,
"step": 3865
},
{
"epoch": 2.460852959898154,
"grad_norm": 11.223033111281344,
"learning_rate": 9.544796606293267e-07,
"loss": 0.3293212056159973,
"step": 3866
},
{
"epoch": 2.4614894971355823,
"grad_norm": 8.76476499373512,
"learning_rate": 9.523041552935058e-07,
"loss": 0.41763240098953247,
"step": 3867
},
{
"epoch": 2.4621260343730107,
"grad_norm": 14.93547195452655,
"learning_rate": 9.501308710727169e-07,
"loss": 0.41176217794418335,
"step": 3868
},
{
"epoch": 2.462762571610439,
"grad_norm": 7.530577228729636,
"learning_rate": 9.479598091595221e-07,
"loss": 0.3675522208213806,
"step": 3869
},
{
"epoch": 2.4633991088478675,
"grad_norm": 8.908702620139834,
"learning_rate": 9.457909707452556e-07,
"loss": 0.30302858352661133,
"step": 3870
},
{
"epoch": 2.464035646085296,
"grad_norm": 12.533662515672638,
"learning_rate": 9.436243570200404e-07,
"loss": 0.3298501968383789,
"step": 3871
},
{
"epoch": 2.4646721833227243,
"grad_norm": 11.650253720807704,
"learning_rate": 9.414599691727728e-07,
"loss": 0.22967742383480072,
"step": 3872
},
{
"epoch": 2.4653087205601527,
"grad_norm": 8.163958695083027,
"learning_rate": 9.392978083911303e-07,
"loss": 0.14559996128082275,
"step": 3873
},
{
"epoch": 2.465945257797581,
"grad_norm": 10.821015630406567,
"learning_rate": 9.371378758615657e-07,
"loss": 0.2332065999507904,
"step": 3874
},
{
"epoch": 2.4665817950350095,
"grad_norm": 23.81465779995915,
"learning_rate": 9.349801727693137e-07,
"loss": 0.48943302035331726,
"step": 3875
},
{
"epoch": 2.467218332272438,
"grad_norm": 18.46479765305656,
"learning_rate": 9.328247002983815e-07,
"loss": 0.4668262004852295,
"step": 3876
},
{
"epoch": 2.4678548695098663,
"grad_norm": 14.125759122355031,
"learning_rate": 9.306714596315547e-07,
"loss": 0.5885692834854126,
"step": 3877
},
{
"epoch": 2.4684914067472947,
"grad_norm": 9.984601554196471,
"learning_rate": 9.285204519503904e-07,
"loss": 0.43786299228668213,
"step": 3878
},
{
"epoch": 2.469127943984723,
"grad_norm": 10.915351114585736,
"learning_rate": 9.263716784352284e-07,
"loss": 0.4853839874267578,
"step": 3879
},
{
"epoch": 2.4697644812221515,
"grad_norm": 14.445961340018746,
"learning_rate": 9.242251402651731e-07,
"loss": 0.7993488311767578,
"step": 3880
},
{
"epoch": 2.47040101845958,
"grad_norm": 18.093932768325843,
"learning_rate": 9.220808386181058e-07,
"loss": 0.5057962536811829,
"step": 3881
},
{
"epoch": 2.4710375556970083,
"grad_norm": 8.153569956664871,
"learning_rate": 9.19938774670685e-07,
"loss": 0.5910652279853821,
"step": 3882
},
{
"epoch": 2.4716740929344367,
"grad_norm": 8.917882167784342,
"learning_rate": 9.177989495983353e-07,
"loss": 0.5409796237945557,
"step": 3883
},
{
"epoch": 2.472310630171865,
"grad_norm": 11.998262245548794,
"learning_rate": 9.156613645752554e-07,
"loss": 0.3271867632865906,
"step": 3884
},
{
"epoch": 2.4729471674092935,
"grad_norm": 15.912088236186927,
"learning_rate": 9.135260207744134e-07,
"loss": 0.4743756055831909,
"step": 3885
},
{
"epoch": 2.473583704646722,
"grad_norm": 13.545048782327063,
"learning_rate": 9.11392919367552e-07,
"loss": 0.45293328166007996,
"step": 3886
},
{
"epoch": 2.4742202418841504,
"grad_norm": 17.78342517288051,
"learning_rate": 9.092620615251752e-07,
"loss": 0.5736050605773926,
"step": 3887
},
{
"epoch": 2.4748567791215788,
"grad_norm": 11.884262782314487,
"learning_rate": 9.071334484165639e-07,
"loss": 1.0076842308044434,
"step": 3888
},
{
"epoch": 2.475493316359007,
"grad_norm": 11.20110060220873,
"learning_rate": 9.050070812097639e-07,
"loss": 0.7217289805412292,
"step": 3889
},
{
"epoch": 2.4761298535964356,
"grad_norm": 12.016217792995912,
"learning_rate": 9.028829610715872e-07,
"loss": 0.7828927040100098,
"step": 3890
},
{
"epoch": 2.476766390833864,
"grad_norm": 12.341574031143532,
"learning_rate": 9.007610891676144e-07,
"loss": 0.6456081867218018,
"step": 3891
},
{
"epoch": 2.4774029280712924,
"grad_norm": 12.92072370891837,
"learning_rate": 8.986414666621951e-07,
"loss": 0.8124405145645142,
"step": 3892
},
{
"epoch": 2.4780394653087203,
"grad_norm": 12.145863675913166,
"learning_rate": 8.965240947184373e-07,
"loss": 0.8363019824028015,
"step": 3893
},
{
"epoch": 2.478676002546149,
"grad_norm": 7.295683922551025,
"learning_rate": 8.94408974498222e-07,
"loss": 0.35038474202156067,
"step": 3894
},
{
"epoch": 2.479312539783577,
"grad_norm": 9.352470171523942,
"learning_rate": 8.9229610716219e-07,
"loss": 0.3922230005264282,
"step": 3895
},
{
"epoch": 2.479949077021006,
"grad_norm": 10.36961540397416,
"learning_rate": 8.901854938697463e-07,
"loss": 0.2653026282787323,
"step": 3896
},
{
"epoch": 2.480585614258434,
"grad_norm": 10.90727526950787,
"learning_rate": 8.880771357790608e-07,
"loss": 0.4123893976211548,
"step": 3897
},
{
"epoch": 2.4812221514958623,
"grad_norm": 16.136954113514566,
"learning_rate": 8.859710340470623e-07,
"loss": 1.1528195142745972,
"step": 3898
},
{
"epoch": 2.4818586887332907,
"grad_norm": 24.286155538119257,
"learning_rate": 8.838671898294482e-07,
"loss": 0.6724939942359924,
"step": 3899
},
{
"epoch": 2.482495225970719,
"grad_norm": 8.850487740533744,
"learning_rate": 8.817656042806683e-07,
"loss": 0.44073688983917236,
"step": 3900
},
{
"epoch": 2.4831317632081475,
"grad_norm": 8.418386604147216,
"learning_rate": 8.7966627855394e-07,
"loss": 0.3672140836715698,
"step": 3901
},
{
"epoch": 2.483768300445576,
"grad_norm": 11.04770614810734,
"learning_rate": 8.775692138012387e-07,
"loss": 0.48692113161087036,
"step": 3902
},
{
"epoch": 2.4844048376830044,
"grad_norm": 10.974564403318755,
"learning_rate": 8.754744111732972e-07,
"loss": 0.9503442645072937,
"step": 3903
},
{
"epoch": 2.4850413749204328,
"grad_norm": 13.990625782968438,
"learning_rate": 8.733818718196075e-07,
"loss": 0.7163272500038147,
"step": 3904
},
{
"epoch": 2.485677912157861,
"grad_norm": 12.00443011795822,
"learning_rate": 8.712915968884234e-07,
"loss": 0.5029400587081909,
"step": 3905
},
{
"epoch": 2.4863144493952896,
"grad_norm": 17.965271833041577,
"learning_rate": 8.692035875267519e-07,
"loss": 0.41578686237335205,
"step": 3906
},
{
"epoch": 2.486950986632718,
"grad_norm": 12.673551002344931,
"learning_rate": 8.67117844880358e-07,
"loss": 0.7005525827407837,
"step": 3907
},
{
"epoch": 2.4875875238701464,
"grad_norm": 21.309574822893687,
"learning_rate": 8.650343700937614e-07,
"loss": 0.46643948554992676,
"step": 3908
},
{
"epoch": 2.4882240611075748,
"grad_norm": 9.894789904552644,
"learning_rate": 8.62953164310244e-07,
"loss": 0.5438557267189026,
"step": 3909
},
{
"epoch": 2.488860598345003,
"grad_norm": 8.866708788641482,
"learning_rate": 8.608742286718314e-07,
"loss": 0.5588964819908142,
"step": 3910
},
{
"epoch": 2.4894971355824316,
"grad_norm": 11.462387009170216,
"learning_rate": 8.587975643193136e-07,
"loss": 0.3707104027271271,
"step": 3911
},
{
"epoch": 2.49013367281986,
"grad_norm": 12.067292862429397,
"learning_rate": 8.567231723922298e-07,
"loss": 1.8560024499893188,
"step": 3912
},
{
"epoch": 2.4907702100572884,
"grad_norm": 8.383572870077932,
"learning_rate": 8.546510540288728e-07,
"loss": 0.2791942059993744,
"step": 3913
},
{
"epoch": 2.491406747294717,
"grad_norm": 12.672973099526894,
"learning_rate": 8.525812103662872e-07,
"loss": 0.7258465886116028,
"step": 3914
},
{
"epoch": 2.492043284532145,
"grad_norm": 10.568179289112296,
"learning_rate": 8.505136425402704e-07,
"loss": 0.45907819271087646,
"step": 3915
},
{
"epoch": 2.4926798217695736,
"grad_norm": 13.027772226849727,
"learning_rate": 8.484483516853703e-07,
"loss": 2.177560806274414,
"step": 3916
},
{
"epoch": 2.493316359007002,
"grad_norm": 8.846560603648301,
"learning_rate": 8.463853389348847e-07,
"loss": 0.21484342217445374,
"step": 3917
},
{
"epoch": 2.4939528962444304,
"grad_norm": 10.11725067405563,
"learning_rate": 8.443246054208642e-07,
"loss": 0.5265414714813232,
"step": 3918
},
{
"epoch": 2.494589433481859,
"grad_norm": 28.25200085103127,
"learning_rate": 8.422661522741065e-07,
"loss": 0.648182213306427,
"step": 3919
},
{
"epoch": 2.495225970719287,
"grad_norm": 11.453150987186422,
"learning_rate": 8.402099806241576e-07,
"loss": 0.6163341999053955,
"step": 3920
},
{
"epoch": 2.4958625079567156,
"grad_norm": 12.861579324596951,
"learning_rate": 8.381560915993109e-07,
"loss": 0.23936185240745544,
"step": 3921
},
{
"epoch": 2.496499045194144,
"grad_norm": 9.511261670278138,
"learning_rate": 8.361044863266127e-07,
"loss": 0.6329669952392578,
"step": 3922
},
{
"epoch": 2.4971355824315724,
"grad_norm": 11.293623223865943,
"learning_rate": 8.340551659318463e-07,
"loss": 0.5023452043533325,
"step": 3923
},
{
"epoch": 2.497772119669001,
"grad_norm": 20.634585799349573,
"learning_rate": 8.320081315395512e-07,
"loss": 0.6741424798965454,
"step": 3924
},
{
"epoch": 2.498408656906429,
"grad_norm": 25.022550487994334,
"learning_rate": 8.299633842730065e-07,
"loss": 0.32343339920043945,
"step": 3925
},
{
"epoch": 2.499045194143857,
"grad_norm": 11.903436268800743,
"learning_rate": 8.27920925254238e-07,
"loss": 0.32865163683891296,
"step": 3926
},
{
"epoch": 2.499681731381286,
"grad_norm": 14.19780664980985,
"learning_rate": 8.258807556040149e-07,
"loss": 0.8066063523292542,
"step": 3927
},
{
"epoch": 2.500318268618714,
"grad_norm": 12.088809442154751,
"learning_rate": 8.238428764418532e-07,
"loss": 0.9081128835678101,
"step": 3928
},
{
"epoch": 2.500954805856143,
"grad_norm": 15.293012723065615,
"learning_rate": 8.21807288886009e-07,
"loss": 0.8295964598655701,
"step": 3929
},
{
"epoch": 2.501591343093571,
"grad_norm": 15.73977461196448,
"learning_rate": 8.197739940534815e-07,
"loss": 0.43273162841796875,
"step": 3930
},
{
"epoch": 2.5022278803309996,
"grad_norm": 13.338943731268314,
"learning_rate": 8.177429930600117e-07,
"loss": 0.27769073843955994,
"step": 3931
},
{
"epoch": 2.5028644175684276,
"grad_norm": 14.220792861448592,
"learning_rate": 8.15714287020083e-07,
"loss": 0.49164503812789917,
"step": 3932
},
{
"epoch": 2.503500954805856,
"grad_norm": 12.05284287892093,
"learning_rate": 8.136878770469181e-07,
"loss": 0.48813530802726746,
"step": 3933
},
{
"epoch": 2.5041374920432844,
"grad_norm": 8.556958885253536,
"learning_rate": 8.116637642524794e-07,
"loss": 0.26071983575820923,
"step": 3934
},
{
"epoch": 2.504774029280713,
"grad_norm": 8.225598932794536,
"learning_rate": 8.096419497474739e-07,
"loss": 0.2683258652687073,
"step": 3935
},
{
"epoch": 2.505410566518141,
"grad_norm": 19.262279699824376,
"learning_rate": 8.076224346413375e-07,
"loss": 0.4536847770214081,
"step": 3936
},
{
"epoch": 2.5060471037555696,
"grad_norm": 17.66581177773771,
"learning_rate": 8.056052200422543e-07,
"loss": 0.41516709327697754,
"step": 3937
},
{
"epoch": 2.506683640992998,
"grad_norm": 11.601108596142094,
"learning_rate": 8.035903070571411e-07,
"loss": 0.27830269932746887,
"step": 3938
},
{
"epoch": 2.5073201782304264,
"grad_norm": 7.974001864271789,
"learning_rate": 8.015776967916517e-07,
"loss": 0.3432263135910034,
"step": 3939
},
{
"epoch": 2.507956715467855,
"grad_norm": 10.27169560967598,
"learning_rate": 7.995673903501766e-07,
"loss": 0.25255173444747925,
"step": 3940
},
{
"epoch": 2.508593252705283,
"grad_norm": 9.636318330866025,
"learning_rate": 7.97559388835844e-07,
"loss": 0.32532811164855957,
"step": 3941
},
{
"epoch": 2.5092297899427116,
"grad_norm": 11.27672640424502,
"learning_rate": 7.955536933505159e-07,
"loss": 0.39307156205177307,
"step": 3942
},
{
"epoch": 2.50986632718014,
"grad_norm": 11.312664236327247,
"learning_rate": 7.935503049947885e-07,
"loss": 0.606777548789978,
"step": 3943
},
{
"epoch": 2.5105028644175684,
"grad_norm": 10.63463186456689,
"learning_rate": 7.915492248679912e-07,
"loss": 0.3355543613433838,
"step": 3944
},
{
"epoch": 2.511139401654997,
"grad_norm": 10.974903445083752,
"learning_rate": 7.895504540681914e-07,
"loss": 0.32796362042427063,
"step": 3945
},
{
"epoch": 2.5117759388924252,
"grad_norm": 12.012493693189512,
"learning_rate": 7.875539936921811e-07,
"loss": 0.33286190032958984,
"step": 3946
},
{
"epoch": 2.5124124761298536,
"grad_norm": 7.76592086315613,
"learning_rate": 7.855598448354935e-07,
"loss": 0.4606126844882965,
"step": 3947
},
{
"epoch": 2.513049013367282,
"grad_norm": 13.569980323182346,
"learning_rate": 7.835680085923874e-07,
"loss": 0.4506418704986572,
"step": 3948
},
{
"epoch": 2.5136855506047104,
"grad_norm": 7.968347232818058,
"learning_rate": 7.815784860558545e-07,
"loss": 0.40884724259376526,
"step": 3949
},
{
"epoch": 2.514322087842139,
"grad_norm": 8.849704702994723,
"learning_rate": 7.795912783176169e-07,
"loss": 0.2936815917491913,
"step": 3950
},
{
"epoch": 2.5149586250795672,
"grad_norm": 8.899514894978735,
"learning_rate": 7.776063864681249e-07,
"loss": 0.44772791862487793,
"step": 3951
},
{
"epoch": 2.5155951623169956,
"grad_norm": 7.9871753424606675,
"learning_rate": 7.756238115965631e-07,
"loss": 0.5370556116104126,
"step": 3952
},
{
"epoch": 2.516231699554424,
"grad_norm": 10.282090886763076,
"learning_rate": 7.736435547908367e-07,
"loss": 0.7182104587554932,
"step": 3953
},
{
"epoch": 2.5168682367918525,
"grad_norm": 11.921670321742583,
"learning_rate": 7.716656171375858e-07,
"loss": 0.2773297131061554,
"step": 3954
},
{
"epoch": 2.517504774029281,
"grad_norm": 10.810132079204251,
"learning_rate": 7.696899997221752e-07,
"loss": 0.9390149712562561,
"step": 3955
},
{
"epoch": 2.5181413112667093,
"grad_norm": 14.82963570830732,
"learning_rate": 7.677167036286959e-07,
"loss": 0.36737754940986633,
"step": 3956
},
{
"epoch": 2.518777848504137,
"grad_norm": 9.67913638860974,
"learning_rate": 7.657457299399645e-07,
"loss": 0.6483880281448364,
"step": 3957
},
{
"epoch": 2.519414385741566,
"grad_norm": 17.581677424639686,
"learning_rate": 7.637770797375288e-07,
"loss": 0.6671686172485352,
"step": 3958
},
{
"epoch": 2.520050922978994,
"grad_norm": 8.585393533998928,
"learning_rate": 7.618107541016523e-07,
"loss": 0.22666820883750916,
"step": 3959
},
{
"epoch": 2.520687460216423,
"grad_norm": 9.979454509852893,
"learning_rate": 7.598467541113319e-07,
"loss": 0.1724715530872345,
"step": 3960
},
{
"epoch": 2.521323997453851,
"grad_norm": 12.356985596453415,
"learning_rate": 7.578850808442823e-07,
"loss": 0.3336951732635498,
"step": 3961
},
{
"epoch": 2.5219605346912797,
"grad_norm": 11.86635527683857,
"learning_rate": 7.559257353769445e-07,
"loss": 0.7841365933418274,
"step": 3962
},
{
"epoch": 2.5225970719287076,
"grad_norm": 11.404116510648365,
"learning_rate": 7.539687187844791e-07,
"loss": 0.3960840404033661,
"step": 3963
},
{
"epoch": 2.5232336091661365,
"grad_norm": 13.485822498869954,
"learning_rate": 7.520140321407743e-07,
"loss": 0.6474733352661133,
"step": 3964
},
{
"epoch": 2.5238701464035644,
"grad_norm": 10.513039577614204,
"learning_rate": 7.50061676518436e-07,
"loss": 0.44929003715515137,
"step": 3965
},
{
"epoch": 2.524506683640993,
"grad_norm": 9.096772031182493,
"learning_rate": 7.481116529887872e-07,
"loss": 0.1913546770811081,
"step": 3966
},
{
"epoch": 2.5251432208784212,
"grad_norm": 10.32703529747825,
"learning_rate": 7.461639626218797e-07,
"loss": 0.6519386768341064,
"step": 3967
},
{
"epoch": 2.5257797581158496,
"grad_norm": 10.775277765817435,
"learning_rate": 7.442186064864787e-07,
"loss": 0.49530237913131714,
"step": 3968
},
{
"epoch": 2.526416295353278,
"grad_norm": 12.445278410113824,
"learning_rate": 7.422755856500713e-07,
"loss": 0.3465782105922699,
"step": 3969
},
{
"epoch": 2.5270528325907065,
"grad_norm": 7.851044789090474,
"learning_rate": 7.403349011788608e-07,
"loss": 0.6121078729629517,
"step": 3970
},
{
"epoch": 2.527689369828135,
"grad_norm": 10.571941989295352,
"learning_rate": 7.383965541377719e-07,
"loss": 0.30977076292037964,
"step": 3971
},
{
"epoch": 2.5283259070655633,
"grad_norm": 14.761430912459188,
"learning_rate": 7.364605455904439e-07,
"loss": 0.4765622615814209,
"step": 3972
},
{
"epoch": 2.5289624443029917,
"grad_norm": 13.96702196193006,
"learning_rate": 7.345268765992342e-07,
"loss": 0.4707704782485962,
"step": 3973
},
{
"epoch": 2.52959898154042,
"grad_norm": 7.610110479363033,
"learning_rate": 7.325955482252139e-07,
"loss": 0.15941064059734344,
"step": 3974
},
{
"epoch": 2.5302355187778485,
"grad_norm": 14.639664463905218,
"learning_rate": 7.306665615281755e-07,
"loss": 0.37662458419799805,
"step": 3975
},
{
"epoch": 2.530872056015277,
"grad_norm": 5.972874401662598,
"learning_rate": 7.287399175666187e-07,
"loss": 0.08262288570404053,
"step": 3976
},
{
"epoch": 2.5315085932527053,
"grad_norm": 8.333552733177665,
"learning_rate": 7.268156173977642e-07,
"loss": 0.4235314130783081,
"step": 3977
},
{
"epoch": 2.5321451304901337,
"grad_norm": 34.24994602674094,
"learning_rate": 7.248936620775438e-07,
"loss": 1.9823403358459473,
"step": 3978
},
{
"epoch": 2.532781667727562,
"grad_norm": 7.633287923802357,
"learning_rate": 7.229740526606027e-07,
"loss": 0.3385160565376282,
"step": 3979
},
{
"epoch": 2.5334182049649905,
"grad_norm": 11.401673992594684,
"learning_rate": 7.210567902002979e-07,
"loss": 0.36040711402893066,
"step": 3980
},
{
"epoch": 2.534054742202419,
"grad_norm": 10.234848659750531,
"learning_rate": 7.191418757487029e-07,
"loss": 0.7135156393051147,
"step": 3981
},
{
"epoch": 2.5346912794398473,
"grad_norm": 18.309217691170605,
"learning_rate": 7.172293103565963e-07,
"loss": 0.623892068862915,
"step": 3982
},
{
"epoch": 2.5353278166772757,
"grad_norm": 14.302095093065146,
"learning_rate": 7.153190950734707e-07,
"loss": 0.8380212187767029,
"step": 3983
},
{
"epoch": 2.535964353914704,
"grad_norm": 11.761961408069393,
"learning_rate": 7.134112309475316e-07,
"loss": 0.48324182629585266,
"step": 3984
},
{
"epoch": 2.5366008911521325,
"grad_norm": 14.556210987287836,
"learning_rate": 7.115057190256913e-07,
"loss": 0.606602668762207,
"step": 3985
},
{
"epoch": 2.537237428389561,
"grad_norm": 6.9981359977166715,
"learning_rate": 7.096025603535722e-07,
"loss": 0.3626210689544678,
"step": 3986
},
{
"epoch": 2.5378739656269893,
"grad_norm": 13.52001722625275,
"learning_rate": 7.07701755975504e-07,
"loss": 0.6667066216468811,
"step": 3987
},
{
"epoch": 2.5385105028644177,
"grad_norm": 8.004767110842625,
"learning_rate": 7.058033069345288e-07,
"loss": 0.723233163356781,
"step": 3988
},
{
"epoch": 2.539147040101846,
"grad_norm": 15.23538216180776,
"learning_rate": 7.03907214272389e-07,
"loss": 0.4600798487663269,
"step": 3989
},
{
"epoch": 2.5397835773392745,
"grad_norm": 7.956737760053174,
"learning_rate": 7.020134790295419e-07,
"loss": 0.3741765320301056,
"step": 3990
},
{
"epoch": 2.540420114576703,
"grad_norm": 11.974922232816839,
"learning_rate": 7.001221022451466e-07,
"loss": 0.39778295159339905,
"step": 3991
},
{
"epoch": 2.541056651814131,
"grad_norm": 7.161783117632343,
"learning_rate": 6.982330849570684e-07,
"loss": 0.666446328163147,
"step": 3992
},
{
"epoch": 2.5416931890515597,
"grad_norm": 9.27701058935869,
"learning_rate": 6.963464282018773e-07,
"loss": 0.2722229063510895,
"step": 3993
},
{
"epoch": 2.5423297262889877,
"grad_norm": 13.649664330044764,
"learning_rate": 6.944621330148521e-07,
"loss": 0.35371774435043335,
"step": 3994
},
{
"epoch": 2.5429662635264165,
"grad_norm": 12.557999458691365,
"learning_rate": 6.925802004299709e-07,
"loss": 0.5170055627822876,
"step": 3995
},
{
"epoch": 2.5436028007638445,
"grad_norm": 12.669537478796821,
"learning_rate": 6.90700631479918e-07,
"loss": 0.7935366630554199,
"step": 3996
},
{
"epoch": 2.5442393380012733,
"grad_norm": 15.257197141295174,
"learning_rate": 6.888234271960786e-07,
"loss": 0.35724225640296936,
"step": 3997
},
{
"epoch": 2.5448758752387013,
"grad_norm": 10.843805919979113,
"learning_rate": 6.869485886085431e-07,
"loss": 0.18914970755577087,
"step": 3998
},
{
"epoch": 2.5455124124761297,
"grad_norm": 8.336491374798458,
"learning_rate": 6.850761167461012e-07,
"loss": 0.24970906972885132,
"step": 3999
},
{
"epoch": 2.546148949713558,
"grad_norm": 8.864669665117166,
"learning_rate": 6.832060126362444e-07,
"loss": 0.4097861647605896,
"step": 4000
},
{
"epoch": 2.5467854869509865,
"grad_norm": 12.519767174722935,
"learning_rate": 6.813382773051686e-07,
"loss": 0.30255192518234253,
"step": 4001
},
{
"epoch": 2.547422024188415,
"grad_norm": 10.04382326079557,
"learning_rate": 6.794729117777626e-07,
"loss": 0.35676640272140503,
"step": 4002
},
{
"epoch": 2.5480585614258433,
"grad_norm": 26.03594131936014,
"learning_rate": 6.776099170776224e-07,
"loss": 1.2304989099502563,
"step": 4003
},
{
"epoch": 2.5486950986632717,
"grad_norm": 18.41409836626958,
"learning_rate": 6.757492942270377e-07,
"loss": 0.7692633867263794,
"step": 4004
},
{
"epoch": 2.5493316359007,
"grad_norm": 11.782444142211297,
"learning_rate": 6.738910442469993e-07,
"loss": 0.6399699449539185,
"step": 4005
},
{
"epoch": 2.5499681731381285,
"grad_norm": 10.106840689907994,
"learning_rate": 6.72035168157194e-07,
"loss": 0.5657060146331787,
"step": 4006
},
{
"epoch": 2.550604710375557,
"grad_norm": 7.183838333427905,
"learning_rate": 6.701816669760097e-07,
"loss": 0.23255078494548798,
"step": 4007
},
{
"epoch": 2.5512412476129853,
"grad_norm": 8.659732129996089,
"learning_rate": 6.683305417205266e-07,
"loss": 0.22366073727607727,
"step": 4008
},
{
"epoch": 2.5518777848504137,
"grad_norm": 12.520710299607957,
"learning_rate": 6.664817934065237e-07,
"loss": 0.3836514949798584,
"step": 4009
},
{
"epoch": 2.552514322087842,
"grad_norm": 12.39630940270907,
"learning_rate": 6.646354230484741e-07,
"loss": 0.2722182273864746,
"step": 4010
},
{
"epoch": 2.5531508593252705,
"grad_norm": 12.83135676896471,
"learning_rate": 6.627914316595508e-07,
"loss": 0.32897594571113586,
"step": 4011
},
{
"epoch": 2.553787396562699,
"grad_norm": 10.065896376482925,
"learning_rate": 6.609498202516119e-07,
"loss": 0.7564833164215088,
"step": 4012
},
{
"epoch": 2.5544239338001273,
"grad_norm": 9.221994387866406,
"learning_rate": 6.591105898352195e-07,
"loss": 0.3179935812950134,
"step": 4013
},
{
"epoch": 2.5550604710375557,
"grad_norm": 15.666651912373274,
"learning_rate": 6.57273741419624e-07,
"loss": 0.42274990677833557,
"step": 4014
},
{
"epoch": 2.555697008274984,
"grad_norm": 15.109293361481123,
"learning_rate": 6.554392760127687e-07,
"loss": 0.4832228422164917,
"step": 4015
},
{
"epoch": 2.5563335455124125,
"grad_norm": 10.385417905551119,
"learning_rate": 6.536071946212908e-07,
"loss": 0.8315209746360779,
"step": 4016
},
{
"epoch": 2.556970082749841,
"grad_norm": 10.986378584095577,
"learning_rate": 6.517774982505176e-07,
"loss": 0.43238815665245056,
"step": 4017
},
{
"epoch": 2.5576066199872693,
"grad_norm": 14.03450280081931,
"learning_rate": 6.499501879044723e-07,
"loss": 0.6071986556053162,
"step": 4018
},
{
"epoch": 2.5582431572246978,
"grad_norm": 10.179249658471798,
"learning_rate": 6.481252645858599e-07,
"loss": 0.4217533767223358,
"step": 4019
},
{
"epoch": 2.558879694462126,
"grad_norm": 25.390846667098813,
"learning_rate": 6.463027292960849e-07,
"loss": 0.6482077836990356,
"step": 4020
},
{
"epoch": 2.5595162316995546,
"grad_norm": 15.746180799660124,
"learning_rate": 6.444825830352363e-07,
"loss": 0.27979302406311035,
"step": 4021
},
{
"epoch": 2.560152768936983,
"grad_norm": 10.620053353234267,
"learning_rate": 6.42664826802093e-07,
"loss": 0.3468106687068939,
"step": 4022
},
{
"epoch": 2.5607893061744114,
"grad_norm": 11.187646474061209,
"learning_rate": 6.408494615941219e-07,
"loss": 0.30796951055526733,
"step": 4023
},
{
"epoch": 2.5614258434118398,
"grad_norm": 11.421373336232325,
"learning_rate": 6.390364884074812e-07,
"loss": 0.4836077094078064,
"step": 4024
},
{
"epoch": 2.5620623806492677,
"grad_norm": 12.070982990539402,
"learning_rate": 6.372259082370103e-07,
"loss": 0.649453341960907,
"step": 4025
},
{
"epoch": 2.5626989178866966,
"grad_norm": 16.80272281678233,
"learning_rate": 6.35417722076242e-07,
"loss": 0.45360037684440613,
"step": 4026
},
{
"epoch": 2.5633354551241245,
"grad_norm": 15.968912437067937,
"learning_rate": 6.336119309173922e-07,
"loss": 0.43244290351867676,
"step": 4027
},
{
"epoch": 2.5639719923615534,
"grad_norm": 9.986780279856935,
"learning_rate": 6.318085357513615e-07,
"loss": 0.5531265139579773,
"step": 4028
},
{
"epoch": 2.5646085295989813,
"grad_norm": 9.755041940144544,
"learning_rate": 6.300075375677367e-07,
"loss": 0.5455867052078247,
"step": 4029
},
{
"epoch": 2.56524506683641,
"grad_norm": 15.749565956253951,
"learning_rate": 6.282089373547922e-07,
"loss": 0.7256792783737183,
"step": 4030
},
{
"epoch": 2.565881604073838,
"grad_norm": 10.14669687129771,
"learning_rate": 6.264127360994832e-07,
"loss": 0.3116908073425293,
"step": 4031
},
{
"epoch": 2.5665181413112665,
"grad_norm": 23.012494647304095,
"learning_rate": 6.246189347874482e-07,
"loss": 0.5365034341812134,
"step": 4032
},
{
"epoch": 2.567154678548695,
"grad_norm": 10.798557794766793,
"learning_rate": 6.22827534403011e-07,
"loss": 0.5954269170761108,
"step": 4033
},
{
"epoch": 2.5677912157861233,
"grad_norm": 12.057567307557491,
"learning_rate": 6.210385359291765e-07,
"loss": 0.3405521810054779,
"step": 4034
},
{
"epoch": 2.5684277530235518,
"grad_norm": 8.084289365850049,
"learning_rate": 6.192519403476316e-07,
"loss": 0.5951095223426819,
"step": 4035
},
{
"epoch": 2.56906429026098,
"grad_norm": 10.125116903279523,
"learning_rate": 6.174677486387448e-07,
"loss": 0.6498456001281738,
"step": 4036
},
{
"epoch": 2.5697008274984086,
"grad_norm": 11.203961418863967,
"learning_rate": 6.156859617815675e-07,
"loss": 0.250361829996109,
"step": 4037
},
{
"epoch": 2.570337364735837,
"grad_norm": 11.947080547454908,
"learning_rate": 6.13906580753828e-07,
"loss": 0.4113607406616211,
"step": 4038
},
{
"epoch": 2.5709739019732654,
"grad_norm": 7.409866854811151,
"learning_rate": 6.121296065319365e-07,
"loss": 0.31993091106414795,
"step": 4039
},
{
"epoch": 2.5716104392106938,
"grad_norm": 9.538678239747178,
"learning_rate": 6.103550400909824e-07,
"loss": 0.2650909721851349,
"step": 4040
},
{
"epoch": 2.572246976448122,
"grad_norm": 11.683157176185997,
"learning_rate": 6.085828824047336e-07,
"loss": 0.26428112387657166,
"step": 4041
},
{
"epoch": 2.5728835136855506,
"grad_norm": 17.138762476077595,
"learning_rate": 6.068131344456346e-07,
"loss": 0.5150033235549927,
"step": 4042
},
{
"epoch": 2.573520050922979,
"grad_norm": 8.654326928252814,
"learning_rate": 6.050457971848117e-07,
"loss": 0.5108681321144104,
"step": 4043
},
{
"epoch": 2.5741565881604074,
"grad_norm": 7.773533322785913,
"learning_rate": 6.032808715920646e-07,
"loss": 0.17469176650047302,
"step": 4044
},
{
"epoch": 2.574793125397836,
"grad_norm": 10.35576578065918,
"learning_rate": 6.015183586358714e-07,
"loss": 0.21947219967842102,
"step": 4045
},
{
"epoch": 2.575429662635264,
"grad_norm": 7.9682456882626775,
"learning_rate": 5.997582592833839e-07,
"loss": 0.4151739776134491,
"step": 4046
},
{
"epoch": 2.5760661998726926,
"grad_norm": 9.959392351046564,
"learning_rate": 5.980005745004352e-07,
"loss": 0.3670939803123474,
"step": 4047
},
{
"epoch": 2.576702737110121,
"grad_norm": 9.984677466828005,
"learning_rate": 5.962453052515255e-07,
"loss": 0.5253440737724304,
"step": 4048
},
{
"epoch": 2.5773392743475494,
"grad_norm": 11.23299850265963,
"learning_rate": 5.944924524998363e-07,
"loss": 0.7628803253173828,
"step": 4049
},
{
"epoch": 2.577975811584978,
"grad_norm": 15.965633055637232,
"learning_rate": 5.927420172072195e-07,
"loss": 0.7902500629425049,
"step": 4050
},
{
"epoch": 2.578612348822406,
"grad_norm": 10.850945173909906,
"learning_rate": 5.909940003342018e-07,
"loss": 0.39650219678878784,
"step": 4051
},
{
"epoch": 2.5792488860598346,
"grad_norm": 7.319122127601429,
"learning_rate": 5.89248402839982e-07,
"loss": 0.1823950707912445,
"step": 4052
},
{
"epoch": 2.579885423297263,
"grad_norm": 17.984587464302077,
"learning_rate": 5.875052256824304e-07,
"loss": 0.4608056843280792,
"step": 4053
},
{
"epoch": 2.5805219605346914,
"grad_norm": 9.775152121470637,
"learning_rate": 5.857644698180937e-07,
"loss": 0.24857516586780548,
"step": 4054
},
{
"epoch": 2.58115849777212,
"grad_norm": 7.72330608213562,
"learning_rate": 5.840261362021831e-07,
"loss": 0.4499109089374542,
"step": 4055
},
{
"epoch": 2.581795035009548,
"grad_norm": 8.649994562620037,
"learning_rate": 5.822902257885865e-07,
"loss": 0.18724137544631958,
"step": 4056
},
{
"epoch": 2.5824315722469766,
"grad_norm": 10.508979666019846,
"learning_rate": 5.805567395298594e-07,
"loss": 0.44745534658432007,
"step": 4057
},
{
"epoch": 2.5830681094844046,
"grad_norm": 9.752479468668094,
"learning_rate": 5.788256783772272e-07,
"loss": 0.5347998142242432,
"step": 4058
},
{
"epoch": 2.5837046467218334,
"grad_norm": 13.769343454022025,
"learning_rate": 5.770970432805844e-07,
"loss": 0.38834691047668457,
"step": 4059
},
{
"epoch": 2.5843411839592614,
"grad_norm": 12.757115175870183,
"learning_rate": 5.753708351884963e-07,
"loss": 0.6713066101074219,
"step": 4060
},
{
"epoch": 2.5849777211966902,
"grad_norm": 18.32754619739884,
"learning_rate": 5.736470550481938e-07,
"loss": 0.7586411237716675,
"step": 4061
},
{
"epoch": 2.585614258434118,
"grad_norm": 10.271474583245858,
"learning_rate": 5.719257038055764e-07,
"loss": 0.30766427516937256,
"step": 4062
},
{
"epoch": 2.586250795671547,
"grad_norm": 15.201939737404972,
"learning_rate": 5.702067824052116e-07,
"loss": 1.1935421228408813,
"step": 4063
},
{
"epoch": 2.586887332908975,
"grad_norm": 24.59159721596722,
"learning_rate": 5.684902917903318e-07,
"loss": 0.9108860492706299,
"step": 4064
},
{
"epoch": 2.587523870146404,
"grad_norm": 9.04995185422225,
"learning_rate": 5.667762329028364e-07,
"loss": 0.2907896637916565,
"step": 4065
},
{
"epoch": 2.588160407383832,
"grad_norm": 10.678201116420878,
"learning_rate": 5.650646066832921e-07,
"loss": 0.3620736598968506,
"step": 4066
},
{
"epoch": 2.58879694462126,
"grad_norm": 7.9690751317708,
"learning_rate": 5.633554140709302e-07,
"loss": 0.7071490287780762,
"step": 4067
},
{
"epoch": 2.5894334818586886,
"grad_norm": 12.722277605000698,
"learning_rate": 5.616486560036416e-07,
"loss": 0.835293710231781,
"step": 4068
},
{
"epoch": 2.590070019096117,
"grad_norm": 7.3315614431992415,
"learning_rate": 5.599443334179889e-07,
"loss": 0.6301496028900146,
"step": 4069
},
{
"epoch": 2.5907065563335454,
"grad_norm": 15.104010990271298,
"learning_rate": 5.582424472491927e-07,
"loss": 0.6260243654251099,
"step": 4070
},
{
"epoch": 2.591343093570974,
"grad_norm": 10.351975406104097,
"learning_rate": 5.565429984311399e-07,
"loss": 0.8103821277618408,
"step": 4071
},
{
"epoch": 2.591979630808402,
"grad_norm": 14.33078262299076,
"learning_rate": 5.548459878963774e-07,
"loss": 0.38548845052719116,
"step": 4072
},
{
"epoch": 2.5926161680458306,
"grad_norm": 9.073929636402251,
"learning_rate": 5.531514165761164e-07,
"loss": 0.2681104242801666,
"step": 4073
},
{
"epoch": 2.593252705283259,
"grad_norm": 9.403328586706515,
"learning_rate": 5.514592854002287e-07,
"loss": 0.6098455786705017,
"step": 4074
},
{
"epoch": 2.5938892425206874,
"grad_norm": 9.82235715727146,
"learning_rate": 5.497695952972471e-07,
"loss": 0.38505712151527405,
"step": 4075
},
{
"epoch": 2.594525779758116,
"grad_norm": 11.536931030268624,
"learning_rate": 5.480823471943625e-07,
"loss": 0.6019818782806396,
"step": 4076
},
{
"epoch": 2.5951623169955442,
"grad_norm": 8.008487992794725,
"learning_rate": 5.463975420174327e-07,
"loss": 0.5317175984382629,
"step": 4077
},
{
"epoch": 2.5957988542329726,
"grad_norm": 18.47173478084601,
"learning_rate": 5.447151806909651e-07,
"loss": 0.29498717188835144,
"step": 4078
},
{
"epoch": 2.596435391470401,
"grad_norm": 11.995029436681856,
"learning_rate": 5.430352641381348e-07,
"loss": 0.456617146730423,
"step": 4079
},
{
"epoch": 2.5970719287078294,
"grad_norm": 7.926238739595407,
"learning_rate": 5.413577932807712e-07,
"loss": 0.3908483684062958,
"step": 4080
},
{
"epoch": 2.597708465945258,
"grad_norm": 4.227522381328757,
"learning_rate": 5.396827690393624e-07,
"loss": 0.1724707931280136,
"step": 4081
},
{
"epoch": 2.5983450031826862,
"grad_norm": 11.12669868267013,
"learning_rate": 5.380101923330522e-07,
"loss": 0.504496693611145,
"step": 4082
},
{
"epoch": 2.5989815404201146,
"grad_norm": 8.202957526577867,
"learning_rate": 5.363400640796467e-07,
"loss": 0.3466747999191284,
"step": 4083
},
{
"epoch": 2.599618077657543,
"grad_norm": 11.685783801431908,
"learning_rate": 5.346723851956015e-07,
"loss": 0.7704594135284424,
"step": 4084
},
{
"epoch": 2.6002546148949714,
"grad_norm": 11.803081780401964,
"learning_rate": 5.330071565960315e-07,
"loss": 0.24229168891906738,
"step": 4085
},
{
"epoch": 2.6008911521324,
"grad_norm": 12.095848705382494,
"learning_rate": 5.313443791947093e-07,
"loss": 0.28725600242614746,
"step": 4086
},
{
"epoch": 2.6015276893698283,
"grad_norm": 6.946883519139256,
"learning_rate": 5.296840539040593e-07,
"loss": 0.35463210940361023,
"step": 4087
},
{
"epoch": 2.6021642266072567,
"grad_norm": 7.713586628251217,
"learning_rate": 5.280261816351606e-07,
"loss": 0.6036189794540405,
"step": 4088
},
{
"epoch": 2.602800763844685,
"grad_norm": 12.353688937807355,
"learning_rate": 5.26370763297746e-07,
"loss": 0.3834241032600403,
"step": 4089
},
{
"epoch": 2.6034373010821135,
"grad_norm": 20.077134309396904,
"learning_rate": 5.247177998002057e-07,
"loss": 0.7139850854873657,
"step": 4090
},
{
"epoch": 2.6040738383195414,
"grad_norm": 12.63580826802718,
"learning_rate": 5.230672920495755e-07,
"loss": 0.38600242137908936,
"step": 4091
},
{
"epoch": 2.6047103755569703,
"grad_norm": 11.58134646523883,
"learning_rate": 5.214192409515512e-07,
"loss": 0.43848711252212524,
"step": 4092
},
{
"epoch": 2.6053469127943982,
"grad_norm": 11.317996434790762,
"learning_rate": 5.197736474104759e-07,
"loss": 0.2798628807067871,
"step": 4093
},
{
"epoch": 2.605983450031827,
"grad_norm": 7.499908830645163,
"learning_rate": 5.181305123293457e-07,
"loss": 0.18069583177566528,
"step": 4094
},
{
"epoch": 2.606619987269255,
"grad_norm": 7.327706335983863,
"learning_rate": 5.164898366098065e-07,
"loss": 0.40995126962661743,
"step": 4095
},
{
"epoch": 2.607256524506684,
"grad_norm": 13.765019235257453,
"learning_rate": 5.148516211521571e-07,
"loss": 0.24555763602256775,
"step": 4096
},
{
"epoch": 2.607893061744112,
"grad_norm": 10.689174968770708,
"learning_rate": 5.132158668553439e-07,
"loss": 0.39908403158187866,
"step": 4097
},
{
"epoch": 2.6085295989815407,
"grad_norm": 14.212210735050636,
"learning_rate": 5.11582574616964e-07,
"loss": 0.4648662209510803,
"step": 4098
},
{
"epoch": 2.6091661362189686,
"grad_norm": 9.805160053637175,
"learning_rate": 5.099517453332614e-07,
"loss": 0.32188141345977783,
"step": 4099
},
{
"epoch": 2.609802673456397,
"grad_norm": 12.411383679871745,
"learning_rate": 5.083233798991333e-07,
"loss": 0.41573289036750793,
"step": 4100
},
{
"epoch": 2.6104392106938255,
"grad_norm": 11.748131290202561,
"learning_rate": 5.066974792081197e-07,
"loss": 0.24640752375125885,
"step": 4101
},
{
"epoch": 2.611075747931254,
"grad_norm": 13.19669454957644,
"learning_rate": 5.050740441524099e-07,
"loss": 0.4697602093219757,
"step": 4102
},
{
"epoch": 2.6117122851686823,
"grad_norm": 10.841348257912811,
"learning_rate": 5.034530756228423e-07,
"loss": 0.26399198174476624,
"step": 4103
},
{
"epoch": 2.6123488224061107,
"grad_norm": 9.624692964681238,
"learning_rate": 5.018345745088993e-07,
"loss": 0.5276693105697632,
"step": 4104
},
{
"epoch": 2.612985359643539,
"grad_norm": 11.806129449246459,
"learning_rate": 5.002185416987104e-07,
"loss": 1.066301941871643,
"step": 4105
},
{
"epoch": 2.6136218968809675,
"grad_norm": 8.46730554469622,
"learning_rate": 4.986049780790502e-07,
"loss": 0.2760232090950012,
"step": 4106
},
{
"epoch": 2.614258434118396,
"grad_norm": 18.65712952851169,
"learning_rate": 4.969938845353384e-07,
"loss": 0.48304522037506104,
"step": 4107
},
{
"epoch": 2.6148949713558243,
"grad_norm": 7.4577480127183495,
"learning_rate": 4.953852619516386e-07,
"loss": 0.1664918065071106,
"step": 4108
},
{
"epoch": 2.6155315085932527,
"grad_norm": 8.603236740899478,
"learning_rate": 4.937791112106616e-07,
"loss": 0.22398081421852112,
"step": 4109
},
{
"epoch": 2.616168045830681,
"grad_norm": 9.607031452896308,
"learning_rate": 4.921754331937584e-07,
"loss": 0.6455321311950684,
"step": 4110
},
{
"epoch": 2.6168045830681095,
"grad_norm": 10.595930480633525,
"learning_rate": 4.905742287809246e-07,
"loss": 0.5751909613609314,
"step": 4111
},
{
"epoch": 2.617441120305538,
"grad_norm": 8.102143643941776,
"learning_rate": 4.889754988507967e-07,
"loss": 0.255344033241272,
"step": 4112
},
{
"epoch": 2.6180776575429663,
"grad_norm": 10.077475170586256,
"learning_rate": 4.87379244280658e-07,
"loss": 0.2505664825439453,
"step": 4113
},
{
"epoch": 2.6187141947803947,
"grad_norm": 14.044138182783676,
"learning_rate": 4.857854659464262e-07,
"loss": 0.7108798623085022,
"step": 4114
},
{
"epoch": 2.619350732017823,
"grad_norm": 6.653144230731202,
"learning_rate": 4.841941647226672e-07,
"loss": 0.39315658807754517,
"step": 4115
},
{
"epoch": 2.6199872692552515,
"grad_norm": 8.55804768661834,
"learning_rate": 4.826053414825844e-07,
"loss": 0.30764633417129517,
"step": 4116
},
{
"epoch": 2.62062380649268,
"grad_norm": 13.600367121111642,
"learning_rate": 4.810189970980211e-07,
"loss": 0.31536000967025757,
"step": 4117
},
{
"epoch": 2.6212603437301083,
"grad_norm": 14.969748889955568,
"learning_rate": 4.794351324394614e-07,
"loss": 0.455342561006546,
"step": 4118
},
{
"epoch": 2.6218968809675367,
"grad_norm": 10.651339465849556,
"learning_rate": 4.778537483760271e-07,
"loss": 0.36839210987091064,
"step": 4119
},
{
"epoch": 2.622533418204965,
"grad_norm": 18.23953235309341,
"learning_rate": 4.7627484577548234e-07,
"loss": 0.932968020439148,
"step": 4120
},
{
"epoch": 2.6231699554423935,
"grad_norm": 11.322514949690214,
"learning_rate": 4.746984255042247e-07,
"loss": 0.8573684096336365,
"step": 4121
},
{
"epoch": 2.623806492679822,
"grad_norm": 9.967012324875226,
"learning_rate": 4.731244884272945e-07,
"loss": 0.1748356968164444,
"step": 4122
},
{
"epoch": 2.6244430299172503,
"grad_norm": 15.446225825378987,
"learning_rate": 4.715530354083658e-07,
"loss": 1.1506575345993042,
"step": 4123
},
{
"epoch": 2.6250795671546783,
"grad_norm": 20.610873169385787,
"learning_rate": 4.699840673097511e-07,
"loss": 0.32537931203842163,
"step": 4124
},
{
"epoch": 2.625716104392107,
"grad_norm": 11.03342668393611,
"learning_rate": 4.6841758499239887e-07,
"loss": 0.6731318235397339,
"step": 4125
},
{
"epoch": 2.626352641629535,
"grad_norm": 7.615237564334669,
"learning_rate": 4.668535893158954e-07,
"loss": 0.5569811463356018,
"step": 4126
},
{
"epoch": 2.626989178866964,
"grad_norm": 9.731183305210399,
"learning_rate": 4.6529208113845816e-07,
"loss": 0.2950124740600586,
"step": 4127
},
{
"epoch": 2.627625716104392,
"grad_norm": 9.03741441722945,
"learning_rate": 4.6373306131694493e-07,
"loss": 0.8522462844848633,
"step": 4128
},
{
"epoch": 2.6282622533418207,
"grad_norm": 11.08357409031455,
"learning_rate": 4.621765307068443e-07,
"loss": 0.4782252311706543,
"step": 4129
},
{
"epoch": 2.6288987905792487,
"grad_norm": 17.39278566316733,
"learning_rate": 4.606224901622797e-07,
"loss": 2.614020347595215,
"step": 4130
},
{
"epoch": 2.6295353278166775,
"grad_norm": 9.915714779805283,
"learning_rate": 4.5907094053600887e-07,
"loss": 0.5547182559967041,
"step": 4131
},
{
"epoch": 2.6301718650541055,
"grad_norm": 7.416640257462956,
"learning_rate": 4.575218826794231e-07,
"loss": 0.42412108182907104,
"step": 4132
},
{
"epoch": 2.630808402291534,
"grad_norm": 8.435811343186867,
"learning_rate": 4.5597531744254575e-07,
"loss": 0.2449091523885727,
"step": 4133
},
{
"epoch": 2.6314449395289623,
"grad_norm": 12.655023181613203,
"learning_rate": 4.544312456740313e-07,
"loss": 0.3620641827583313,
"step": 4134
},
{
"epoch": 2.6320814767663907,
"grad_norm": 9.13939931506395,
"learning_rate": 4.5288966822116807e-07,
"loss": 0.5226321816444397,
"step": 4135
},
{
"epoch": 2.632718014003819,
"grad_norm": 13.019124127750661,
"learning_rate": 4.5135058592987333e-07,
"loss": 0.37915101647377014,
"step": 4136
},
{
"epoch": 2.6333545512412475,
"grad_norm": 10.808420778477727,
"learning_rate": 4.498139996446976e-07,
"loss": 0.6647825837135315,
"step": 4137
},
{
"epoch": 2.633991088478676,
"grad_norm": 11.57786296368533,
"learning_rate": 4.482799102088187e-07,
"loss": 0.3755315840244293,
"step": 4138
},
{
"epoch": 2.6346276257161043,
"grad_norm": 8.875610773562085,
"learning_rate": 4.4674831846404863e-07,
"loss": 0.5440626740455627,
"step": 4139
},
{
"epoch": 2.6352641629535327,
"grad_norm": 8.752354832583505,
"learning_rate": 4.4521922525082526e-07,
"loss": 0.3065177798271179,
"step": 4140
},
{
"epoch": 2.635900700190961,
"grad_norm": 11.179851780551138,
"learning_rate": 4.436926314082163e-07,
"loss": 0.5731112957000732,
"step": 4141
},
{
"epoch": 2.6365372374283895,
"grad_norm": 9.757327221220137,
"learning_rate": 4.421685377739182e-07,
"loss": 0.2809770703315735,
"step": 4142
},
{
"epoch": 2.637173774665818,
"grad_norm": 14.298061338871626,
"learning_rate": 4.406469451842571e-07,
"loss": 0.4565696120262146,
"step": 4143
},
{
"epoch": 2.6378103119032463,
"grad_norm": 8.291246701697267,
"learning_rate": 4.3912785447418227e-07,
"loss": 0.5066615343093872,
"step": 4144
},
{
"epoch": 2.6384468491406747,
"grad_norm": 7.975907034830099,
"learning_rate": 4.376112664772747e-07,
"loss": 0.3097657561302185,
"step": 4145
},
{
"epoch": 2.639083386378103,
"grad_norm": 10.15791583988077,
"learning_rate": 4.360971820257409e-07,
"loss": 0.5922439694404602,
"step": 4146
},
{
"epoch": 2.6397199236155315,
"grad_norm": 14.877454156446959,
"learning_rate": 4.345856019504119e-07,
"loss": 0.8842718601226807,
"step": 4147
},
{
"epoch": 2.64035646085296,
"grad_norm": 10.4929514799719,
"learning_rate": 4.3307652708074465e-07,
"loss": 0.3251538872718811,
"step": 4148
},
{
"epoch": 2.6409929980903883,
"grad_norm": 14.58847937445859,
"learning_rate": 4.315699582448257e-07,
"loss": 1.2450225353240967,
"step": 4149
},
{
"epoch": 2.6416295353278167,
"grad_norm": 8.125947860689633,
"learning_rate": 4.300658962693588e-07,
"loss": 0.48094889521598816,
"step": 4150
},
{
"epoch": 2.642266072565245,
"grad_norm": 16.411867779273845,
"learning_rate": 4.285643419796798e-07,
"loss": 0.7015743851661682,
"step": 4151
},
{
"epoch": 2.6429026098026736,
"grad_norm": 9.545006001252185,
"learning_rate": 4.2706529619974345e-07,
"loss": 0.665250301361084,
"step": 4152
},
{
"epoch": 2.643539147040102,
"grad_norm": 14.62170973169928,
"learning_rate": 4.255687597521302e-07,
"loss": 0.5696743726730347,
"step": 4153
},
{
"epoch": 2.6441756842775304,
"grad_norm": 16.869967323632615,
"learning_rate": 4.240747334580425e-07,
"loss": 0.5156615376472473,
"step": 4154
},
{
"epoch": 2.6448122215149588,
"grad_norm": 11.516758088803265,
"learning_rate": 4.225832181373052e-07,
"loss": 0.2613343596458435,
"step": 4155
},
{
"epoch": 2.645448758752387,
"grad_norm": 14.348577838654752,
"learning_rate": 4.210942146083685e-07,
"loss": 0.4780927300453186,
"step": 4156
},
{
"epoch": 2.6460852959898156,
"grad_norm": 12.026397828025917,
"learning_rate": 4.1960772368829837e-07,
"loss": 0.2942112684249878,
"step": 4157
},
{
"epoch": 2.646721833227244,
"grad_norm": 10.600619397365858,
"learning_rate": 4.1812374619278785e-07,
"loss": 0.20896536111831665,
"step": 4158
},
{
"epoch": 2.647358370464672,
"grad_norm": 10.57984621363277,
"learning_rate": 4.166422829361477e-07,
"loss": 0.592284083366394,
"step": 4159
},
{
"epoch": 2.6479949077021008,
"grad_norm": 7.535197852927329,
"learning_rate": 4.1516333473130886e-07,
"loss": 0.201694056391716,
"step": 4160
},
{
"epoch": 2.6486314449395287,
"grad_norm": 7.352041767336546,
"learning_rate": 4.1368690238982323e-07,
"loss": 0.34570127725601196,
"step": 4161
},
{
"epoch": 2.6492679821769576,
"grad_norm": 9.610957600875114,
"learning_rate": 4.122129867218627e-07,
"loss": 0.5658895969390869,
"step": 4162
},
{
"epoch": 2.6499045194143855,
"grad_norm": 8.540151651713604,
"learning_rate": 4.1074158853621704e-07,
"loss": 0.5704756379127502,
"step": 4163
},
{
"epoch": 2.6505410566518144,
"grad_norm": 14.217319486489174,
"learning_rate": 4.092727086402942e-07,
"loss": 0.2871258556842804,
"step": 4164
},
{
"epoch": 2.6511775938892423,
"grad_norm": 11.297679532923324,
"learning_rate": 4.078063478401212e-07,
"loss": 0.6045399904251099,
"step": 4165
},
{
"epoch": 2.6518141311266707,
"grad_norm": 12.20227079381005,
"learning_rate": 4.063425069403437e-07,
"loss": 1.1303468942642212,
"step": 4166
},
{
"epoch": 2.652450668364099,
"grad_norm": 9.686851935712701,
"learning_rate": 4.048811867442215e-07,
"loss": 0.31758686900138855,
"step": 4167
},
{
"epoch": 2.6530872056015276,
"grad_norm": 8.068110064996098,
"learning_rate": 4.034223880536342e-07,
"loss": 0.31641390919685364,
"step": 4168
},
{
"epoch": 2.653723742838956,
"grad_norm": 8.874594750065006,
"learning_rate": 4.0196611166907764e-07,
"loss": 0.3589363098144531,
"step": 4169
},
{
"epoch": 2.6543602800763844,
"grad_norm": 14.345723374561224,
"learning_rate": 4.0051235838965973e-07,
"loss": 0.3416450023651123,
"step": 4170
},
{
"epoch": 2.6549968173138128,
"grad_norm": 11.228361720735702,
"learning_rate": 3.990611290131091e-07,
"loss": 0.5151109099388123,
"step": 4171
},
{
"epoch": 2.655633354551241,
"grad_norm": 8.780694002750788,
"learning_rate": 3.9761242433576595e-07,
"loss": 0.31821924448013306,
"step": 4172
},
{
"epoch": 2.6562698917886696,
"grad_norm": 18.539933039403408,
"learning_rate": 3.961662451525872e-07,
"loss": 0.5817490220069885,
"step": 4173
},
{
"epoch": 2.656906429026098,
"grad_norm": 13.025868195763747,
"learning_rate": 3.9472259225714127e-07,
"loss": 0.27606385946273804,
"step": 4174
},
{
"epoch": 2.6575429662635264,
"grad_norm": 8.506420635098863,
"learning_rate": 3.9328146644161403e-07,
"loss": 0.28132539987564087,
"step": 4175
},
{
"epoch": 2.6581795035009548,
"grad_norm": 12.089889238567629,
"learning_rate": 3.918428684968012e-07,
"loss": 0.41118401288986206,
"step": 4176
},
{
"epoch": 2.658816040738383,
"grad_norm": 14.3359741660011,
"learning_rate": 3.9040679921211356e-07,
"loss": 0.8642361760139465,
"step": 4177
},
{
"epoch": 2.6594525779758116,
"grad_norm": 10.318794332910182,
"learning_rate": 3.889732593755724e-07,
"loss": 0.7048256993293762,
"step": 4178
},
{
"epoch": 2.66008911521324,
"grad_norm": 8.504319948271643,
"learning_rate": 3.875422497738146e-07,
"loss": 0.5382933616638184,
"step": 4179
},
{
"epoch": 2.6607256524506684,
"grad_norm": 9.842494108262397,
"learning_rate": 3.8611377119208206e-07,
"loss": 0.29906243085861206,
"step": 4180
},
{
"epoch": 2.661362189688097,
"grad_norm": 11.093492258647938,
"learning_rate": 3.8468782441423604e-07,
"loss": 0.30749762058258057,
"step": 4181
},
{
"epoch": 2.661998726925525,
"grad_norm": 8.551960698376003,
"learning_rate": 3.832644102227423e-07,
"loss": 0.37611305713653564,
"step": 4182
},
{
"epoch": 2.6626352641629536,
"grad_norm": 10.240978386480732,
"learning_rate": 3.8184352939867885e-07,
"loss": 0.3026156723499298,
"step": 4183
},
{
"epoch": 2.663271801400382,
"grad_norm": 12.51845793159237,
"learning_rate": 3.8042518272173366e-07,
"loss": 0.391543984413147,
"step": 4184
},
{
"epoch": 2.6639083386378104,
"grad_norm": 13.412053496998116,
"learning_rate": 3.790093709702053e-07,
"loss": 0.32294613122940063,
"step": 4185
},
{
"epoch": 2.664544875875239,
"grad_norm": 12.542049477092295,
"learning_rate": 3.775960949209995e-07,
"loss": 0.687692403793335,
"step": 4186
},
{
"epoch": 2.665181413112667,
"grad_norm": 13.725429694562523,
"learning_rate": 3.761853553496314e-07,
"loss": 0.4429126977920532,
"step": 4187
},
{
"epoch": 2.6658179503500956,
"grad_norm": 17.929115713521966,
"learning_rate": 3.7477715303022353e-07,
"loss": 0.8667738437652588,
"step": 4188
},
{
"epoch": 2.666454487587524,
"grad_norm": 9.416903155413737,
"learning_rate": 3.733714887355072e-07,
"loss": 0.44168245792388916,
"step": 4189
},
{
"epoch": 2.6670910248249524,
"grad_norm": 15.632077153500358,
"learning_rate": 3.719683632368204e-07,
"loss": 0.6333277225494385,
"step": 4190
},
{
"epoch": 2.667727562062381,
"grad_norm": 11.312890890394607,
"learning_rate": 3.7056777730410664e-07,
"loss": 0.9854741096496582,
"step": 4191
},
{
"epoch": 2.668364099299809,
"grad_norm": 7.932323143278352,
"learning_rate": 3.6916973170592117e-07,
"loss": 0.3559843897819519,
"step": 4192
},
{
"epoch": 2.6690006365372376,
"grad_norm": 8.839586522841298,
"learning_rate": 3.6777422720941704e-07,
"loss": 0.4284633696079254,
"step": 4193
},
{
"epoch": 2.6696371737746656,
"grad_norm": 11.759073839521456,
"learning_rate": 3.663812645803605e-07,
"loss": 0.7548391819000244,
"step": 4194
},
{
"epoch": 2.6702737110120944,
"grad_norm": 10.435700302664355,
"learning_rate": 3.6499084458311853e-07,
"loss": 0.5070865750312805,
"step": 4195
},
{
"epoch": 2.6709102482495224,
"grad_norm": 13.789427295975079,
"learning_rate": 3.6360296798066464e-07,
"loss": 0.5341179966926575,
"step": 4196
},
{
"epoch": 2.6715467854869512,
"grad_norm": 12.21163817391416,
"learning_rate": 3.622176355345747e-07,
"loss": 0.3842467963695526,
"step": 4197
},
{
"epoch": 2.672183322724379,
"grad_norm": 14.848081871873784,
"learning_rate": 3.6083484800503163e-07,
"loss": 0.8403598070144653,
"step": 4198
},
{
"epoch": 2.672819859961808,
"grad_norm": 11.371899569255133,
"learning_rate": 3.594546061508203e-07,
"loss": 0.3693830370903015,
"step": 4199
},
{
"epoch": 2.673456397199236,
"grad_norm": 17.943021819553415,
"learning_rate": 3.5807691072932803e-07,
"loss": 0.7598447799682617,
"step": 4200
},
{
"epoch": 2.6740929344366644,
"grad_norm": 28.16227522314146,
"learning_rate": 3.567017624965452e-07,
"loss": 0.6277492046356201,
"step": 4201
},
{
"epoch": 2.674729471674093,
"grad_norm": 16.85459217174598,
"learning_rate": 3.553291622070659e-07,
"loss": 0.45080533623695374,
"step": 4202
},
{
"epoch": 2.675366008911521,
"grad_norm": 7.95395586381539,
"learning_rate": 3.539591106140827e-07,
"loss": 0.2938351631164551,
"step": 4203
},
{
"epoch": 2.6760025461489496,
"grad_norm": 12.044280872279797,
"learning_rate": 3.5259160846939423e-07,
"loss": 0.2124682068824768,
"step": 4204
},
{
"epoch": 2.676639083386378,
"grad_norm": 14.932533595284871,
"learning_rate": 3.5122665652339574e-07,
"loss": 0.4682321548461914,
"step": 4205
},
{
"epoch": 2.6772756206238064,
"grad_norm": 10.475507395361465,
"learning_rate": 3.498642555250864e-07,
"loss": 0.21980136632919312,
"step": 4206
},
{
"epoch": 2.677912157861235,
"grad_norm": 14.50840416951671,
"learning_rate": 3.4850440622206326e-07,
"loss": 0.731866717338562,
"step": 4207
},
{
"epoch": 2.6785486950986632,
"grad_norm": 12.09856820155982,
"learning_rate": 3.471471093605233e-07,
"loss": 0.272088885307312,
"step": 4208
},
{
"epoch": 2.6791852323360916,
"grad_norm": 11.76734698312069,
"learning_rate": 3.4579236568526684e-07,
"loss": 0.9739639759063721,
"step": 4209
},
{
"epoch": 2.67982176957352,
"grad_norm": 14.33233822163741,
"learning_rate": 3.4444017593968647e-07,
"loss": 1.1476022005081177,
"step": 4210
},
{
"epoch": 2.6804583068109484,
"grad_norm": 12.152674084089535,
"learning_rate": 3.430905408657792e-07,
"loss": 0.39027106761932373,
"step": 4211
},
{
"epoch": 2.681094844048377,
"grad_norm": 10.30755416028074,
"learning_rate": 3.417434612041376e-07,
"loss": 0.7856820225715637,
"step": 4212
},
{
"epoch": 2.6817313812858052,
"grad_norm": 8.167847327382658,
"learning_rate": 3.403989376939515e-07,
"loss": 0.32235854864120483,
"step": 4213
},
{
"epoch": 2.6823679185232336,
"grad_norm": 13.740937395800405,
"learning_rate": 3.390569710730085e-07,
"loss": 0.30362454056739807,
"step": 4214
},
{
"epoch": 2.683004455760662,
"grad_norm": 9.93572791846366,
"learning_rate": 3.3771756207769567e-07,
"loss": 0.4014972448348999,
"step": 4215
},
{
"epoch": 2.6836409929980904,
"grad_norm": 16.83983652036534,
"learning_rate": 3.3638071144299176e-07,
"loss": 0.42258787155151367,
"step": 4216
},
{
"epoch": 2.684277530235519,
"grad_norm": 8.89934446769384,
"learning_rate": 3.3504641990247724e-07,
"loss": 0.5550803542137146,
"step": 4217
},
{
"epoch": 2.6849140674729473,
"grad_norm": 11.28235054693943,
"learning_rate": 3.3371468818832295e-07,
"loss": 0.3023662269115448,
"step": 4218
},
{
"epoch": 2.6855506047103757,
"grad_norm": 10.598756471075994,
"learning_rate": 3.3238551703129894e-07,
"loss": 0.5563198328018188,
"step": 4219
},
{
"epoch": 2.686187141947804,
"grad_norm": 10.170743643512592,
"learning_rate": 3.310589071607678e-07,
"loss": 0.2445223331451416,
"step": 4220
},
{
"epoch": 2.6868236791852325,
"grad_norm": 16.22936474685597,
"learning_rate": 3.2973485930468953e-07,
"loss": 0.3825610876083374,
"step": 4221
},
{
"epoch": 2.687460216422661,
"grad_norm": 9.90879097876087,
"learning_rate": 3.284133741896156e-07,
"loss": 0.3859259784221649,
"step": 4222
},
{
"epoch": 2.6880967536600893,
"grad_norm": 11.884020046231964,
"learning_rate": 3.2709445254068993e-07,
"loss": 0.42776572704315186,
"step": 4223
},
{
"epoch": 2.6887332908975177,
"grad_norm": 14.177835263166626,
"learning_rate": 3.257780950816547e-07,
"loss": 0.34132957458496094,
"step": 4224
},
{
"epoch": 2.6893698281349456,
"grad_norm": 10.709825078803748,
"learning_rate": 3.244643025348404e-07,
"loss": 0.45440587401390076,
"step": 4225
},
{
"epoch": 2.6900063653723745,
"grad_norm": 10.881942645660274,
"learning_rate": 3.2315307562117316e-07,
"loss": 0.45042353868484497,
"step": 4226
},
{
"epoch": 2.6906429026098024,
"grad_norm": 12.177737578153689,
"learning_rate": 3.2184441506016793e-07,
"loss": 0.26608598232269287,
"step": 4227
},
{
"epoch": 2.6912794398472313,
"grad_norm": 9.865083306888648,
"learning_rate": 3.205383215699359e-07,
"loss": 0.20638345181941986,
"step": 4228
},
{
"epoch": 2.6919159770846592,
"grad_norm": 9.930389339260044,
"learning_rate": 3.19234795867176e-07,
"loss": 0.5358873605728149,
"step": 4229
},
{
"epoch": 2.692552514322088,
"grad_norm": 8.006867762632641,
"learning_rate": 3.179338386671788e-07,
"loss": 0.24539242684841156,
"step": 4230
},
{
"epoch": 2.693189051559516,
"grad_norm": 11.262185953320438,
"learning_rate": 3.1663545068382597e-07,
"loss": 0.24088186025619507,
"step": 4231
},
{
"epoch": 2.693825588796945,
"grad_norm": 11.996875378641526,
"learning_rate": 3.15339632629591e-07,
"loss": 0.6157453656196594,
"step": 4232
},
{
"epoch": 2.694462126034373,
"grad_norm": 12.755239058966195,
"learning_rate": 3.140463852155329e-07,
"loss": 0.6274869441986084,
"step": 4233
},
{
"epoch": 2.6950986632718013,
"grad_norm": 9.94810465609412,
"learning_rate": 3.127557091513045e-07,
"loss": 0.5132277011871338,
"step": 4234
},
{
"epoch": 2.6957352005092297,
"grad_norm": 10.532205631405654,
"learning_rate": 3.1146760514514483e-07,
"loss": 0.721315324306488,
"step": 4235
},
{
"epoch": 2.696371737746658,
"grad_norm": 12.06851584523107,
"learning_rate": 3.1018207390388357e-07,
"loss": 0.5017930865287781,
"step": 4236
},
{
"epoch": 2.6970082749840865,
"grad_norm": 9.68712290443339,
"learning_rate": 3.088991161329352e-07,
"loss": 0.6609745025634766,
"step": 4237
},
{
"epoch": 2.697644812221515,
"grad_norm": 6.394919682690512,
"learning_rate": 3.076187325363078e-07,
"loss": 0.17533256113529205,
"step": 4238
},
{
"epoch": 2.6982813494589433,
"grad_norm": 10.203306521485871,
"learning_rate": 3.06340923816591e-07,
"loss": 0.24491241574287415,
"step": 4239
},
{
"epoch": 2.6989178866963717,
"grad_norm": 15.974156481461375,
"learning_rate": 3.050656906749638e-07,
"loss": 0.36442863941192627,
"step": 4240
},
{
"epoch": 2.6995544239338,
"grad_norm": 8.844978133477396,
"learning_rate": 3.0379303381119386e-07,
"loss": 0.3554074168205261,
"step": 4241
},
{
"epoch": 2.7001909611712285,
"grad_norm": 11.394578376951932,
"learning_rate": 3.025229539236324e-07,
"loss": 0.4078906774520874,
"step": 4242
},
{
"epoch": 2.700827498408657,
"grad_norm": 7.115448073721455,
"learning_rate": 3.0125545170921756e-07,
"loss": 0.29125985503196716,
"step": 4243
},
{
"epoch": 2.7014640356460853,
"grad_norm": 14.513988491089108,
"learning_rate": 2.9999052786347236e-07,
"loss": 0.7354227304458618,
"step": 4244
},
{
"epoch": 2.7021005728835137,
"grad_norm": 12.27832873637522,
"learning_rate": 2.987281830805078e-07,
"loss": 0.5535342693328857,
"step": 4245
},
{
"epoch": 2.702737110120942,
"grad_norm": 10.623514225123696,
"learning_rate": 2.9746841805301464e-07,
"loss": 0.4273834824562073,
"step": 4246
},
{
"epoch": 2.7033736473583705,
"grad_norm": 7.716892779508859,
"learning_rate": 2.9621123347227275e-07,
"loss": 0.9452025890350342,
"step": 4247
},
{
"epoch": 2.704010184595799,
"grad_norm": 6.920436913369039,
"learning_rate": 2.9495663002814367e-07,
"loss": 0.1669454574584961,
"step": 4248
},
{
"epoch": 2.7046467218332273,
"grad_norm": 9.880936132825969,
"learning_rate": 2.937046084090733e-07,
"loss": 0.5540735721588135,
"step": 4249
},
{
"epoch": 2.7052832590706557,
"grad_norm": 9.150729785239252,
"learning_rate": 2.924551693020894e-07,
"loss": 0.33344244956970215,
"step": 4250
},
{
"epoch": 2.705919796308084,
"grad_norm": 9.323586923127733,
"learning_rate": 2.9120831339280506e-07,
"loss": 0.668574333190918,
"step": 4251
},
{
"epoch": 2.7065563335455125,
"grad_norm": 6.744282662840181,
"learning_rate": 2.8996404136541423e-07,
"loss": 0.1677035689353943,
"step": 4252
},
{
"epoch": 2.707192870782941,
"grad_norm": 11.268284309249525,
"learning_rate": 2.887223539026934e-07,
"loss": 1.3707690238952637,
"step": 4253
},
{
"epoch": 2.7078294080203693,
"grad_norm": 13.204599760495604,
"learning_rate": 2.874832516859999e-07,
"loss": 0.48145592212677,
"step": 4254
},
{
"epoch": 2.7084659452577977,
"grad_norm": 11.43020400431346,
"learning_rate": 2.862467353952747e-07,
"loss": 0.342043936252594,
"step": 4255
},
{
"epoch": 2.709102482495226,
"grad_norm": 10.141017242865624,
"learning_rate": 2.8501280570903735e-07,
"loss": 0.2240038812160492,
"step": 4256
},
{
"epoch": 2.7097390197326545,
"grad_norm": 12.12872544336196,
"learning_rate": 2.8378146330438836e-07,
"loss": 0.5272125601768494,
"step": 4257
},
{
"epoch": 2.7103755569700825,
"grad_norm": 6.964906546317959,
"learning_rate": 2.8255270885701235e-07,
"loss": 0.3645150363445282,
"step": 4258
},
{
"epoch": 2.7110120942075113,
"grad_norm": 9.799382317823582,
"learning_rate": 2.813265430411666e-07,
"loss": 0.617933988571167,
"step": 4259
},
{
"epoch": 2.7116486314449393,
"grad_norm": 15.829285612557603,
"learning_rate": 2.801029665296945e-07,
"loss": 0.46067652106285095,
"step": 4260
},
{
"epoch": 2.712285168682368,
"grad_norm": 8.74206547069849,
"learning_rate": 2.7888197999401624e-07,
"loss": 0.4388101100921631,
"step": 4261
},
{
"epoch": 2.712921705919796,
"grad_norm": 10.588534515353642,
"learning_rate": 2.776635841041292e-07,
"loss": 0.2808324992656708,
"step": 4262
},
{
"epoch": 2.713558243157225,
"grad_norm": 9.079775389707791,
"learning_rate": 2.7644777952861135e-07,
"loss": 0.6054136753082275,
"step": 4263
},
{
"epoch": 2.714194780394653,
"grad_norm": 8.092659107903318,
"learning_rate": 2.752345669346185e-07,
"loss": 0.6175906658172607,
"step": 4264
},
{
"epoch": 2.7148313176320817,
"grad_norm": 6.813250221303636,
"learning_rate": 2.740239469878836e-07,
"loss": 0.6342043876647949,
"step": 4265
},
{
"epoch": 2.7154678548695097,
"grad_norm": 12.294422459487345,
"learning_rate": 2.7281592035271643e-07,
"loss": 0.6270891427993774,
"step": 4266
},
{
"epoch": 2.716104392106938,
"grad_norm": 23.360853471957164,
"learning_rate": 2.7161048769200337e-07,
"loss": 0.4364585280418396,
"step": 4267
},
{
"epoch": 2.7167409293443665,
"grad_norm": 10.32664074659213,
"learning_rate": 2.7040764966721144e-07,
"loss": 0.5619296431541443,
"step": 4268
},
{
"epoch": 2.717377466581795,
"grad_norm": 18.76663824164998,
"learning_rate": 2.692074069383771e-07,
"loss": 0.8377590179443359,
"step": 4269
},
{
"epoch": 2.7180140038192233,
"grad_norm": 12.19355629888269,
"learning_rate": 2.6800976016411904e-07,
"loss": 0.4755368232727051,
"step": 4270
},
{
"epoch": 2.7186505410566517,
"grad_norm": 11.71763158935508,
"learning_rate": 2.668147100016272e-07,
"loss": 0.24313849210739136,
"step": 4271
},
{
"epoch": 2.71928707829408,
"grad_norm": 20.84674385099233,
"learning_rate": 2.6562225710666855e-07,
"loss": 0.8417501449584961,
"step": 4272
},
{
"epoch": 2.7199236155315085,
"grad_norm": 8.944673335448611,
"learning_rate": 2.644324021335853e-07,
"loss": 0.24403586983680725,
"step": 4273
},
{
"epoch": 2.720560152768937,
"grad_norm": 15.081005478574195,
"learning_rate": 2.6324514573529135e-07,
"loss": 0.3433034121990204,
"step": 4274
},
{
"epoch": 2.7211966900063653,
"grad_norm": 12.717091166830789,
"learning_rate": 2.620604885632799e-07,
"loss": 0.30776041746139526,
"step": 4275
},
{
"epoch": 2.7218332272437937,
"grad_norm": 9.233628456958835,
"learning_rate": 2.608784312676105e-07,
"loss": 0.6768572330474854,
"step": 4276
},
{
"epoch": 2.722469764481222,
"grad_norm": 8.716046991275995,
"learning_rate": 2.596989744969236e-07,
"loss": 0.30308881402015686,
"step": 4277
},
{
"epoch": 2.7231063017186505,
"grad_norm": 9.640686155457239,
"learning_rate": 2.5852211889842717e-07,
"loss": 0.2957676947116852,
"step": 4278
},
{
"epoch": 2.723742838956079,
"grad_norm": 14.9960780673145,
"learning_rate": 2.5734786511790513e-07,
"loss": 0.3485487103462219,
"step": 4279
},
{
"epoch": 2.7243793761935073,
"grad_norm": 12.30043852886103,
"learning_rate": 2.561762137997109e-07,
"loss": 0.557969331741333,
"step": 4280
},
{
"epoch": 2.7250159134309357,
"grad_norm": 13.250766622765129,
"learning_rate": 2.5500716558677307e-07,
"loss": 0.5280587673187256,
"step": 4281
},
{
"epoch": 2.725652450668364,
"grad_norm": 8.648265992310122,
"learning_rate": 2.5384072112058845e-07,
"loss": 0.3716697692871094,
"step": 4282
},
{
"epoch": 2.7262889879057925,
"grad_norm": 7.810171136937785,
"learning_rate": 2.5267688104122777e-07,
"loss": 0.2588565945625305,
"step": 4283
},
{
"epoch": 2.726925525143221,
"grad_norm": 11.040988609947247,
"learning_rate": 2.5151564598733135e-07,
"loss": 0.6797652244567871,
"step": 4284
},
{
"epoch": 2.7275620623806494,
"grad_norm": 9.569822238643798,
"learning_rate": 2.5035701659611e-07,
"loss": 1.268112063407898,
"step": 4285
},
{
"epoch": 2.7281985996180778,
"grad_norm": 10.1928489327968,
"learning_rate": 2.4920099350334513e-07,
"loss": 0.5419979095458984,
"step": 4286
},
{
"epoch": 2.728835136855506,
"grad_norm": 13.988082334266707,
"learning_rate": 2.480475773433877e-07,
"loss": 0.7767527103424072,
"step": 4287
},
{
"epoch": 2.7294716740929346,
"grad_norm": 12.854143263048222,
"learning_rate": 2.4689676874915913e-07,
"loss": 0.9870243072509766,
"step": 4288
},
{
"epoch": 2.730108211330363,
"grad_norm": 18.57218071967357,
"learning_rate": 2.457485683521482e-07,
"loss": 1.311352252960205,
"step": 4289
},
{
"epoch": 2.7307447485677914,
"grad_norm": 12.492913638813363,
"learning_rate": 2.4460297678241376e-07,
"loss": 0.7109584808349609,
"step": 4290
},
{
"epoch": 2.7313812858052198,
"grad_norm": 12.692789088860808,
"learning_rate": 2.434599946685823e-07,
"loss": 0.26433488726615906,
"step": 4291
},
{
"epoch": 2.732017823042648,
"grad_norm": 17.792228239365656,
"learning_rate": 2.4231962263784934e-07,
"loss": 0.3746406137943268,
"step": 4292
},
{
"epoch": 2.732654360280076,
"grad_norm": 17.318376070131,
"learning_rate": 2.4118186131597666e-07,
"loss": 0.3980744183063507,
"step": 4293
},
{
"epoch": 2.733290897517505,
"grad_norm": 10.790409083983436,
"learning_rate": 2.400467113272964e-07,
"loss": 0.5603748559951782,
"step": 4294
},
{
"epoch": 2.733927434754933,
"grad_norm": 10.64101332957832,
"learning_rate": 2.389141732947048e-07,
"loss": 0.45091766119003296,
"step": 4295
},
{
"epoch": 2.734563971992362,
"grad_norm": 10.629186706240326,
"learning_rate": 2.377842478396658e-07,
"loss": 0.516888439655304,
"step": 4296
},
{
"epoch": 2.7352005092297897,
"grad_norm": 7.721465776912218,
"learning_rate": 2.3665693558220958e-07,
"loss": 0.3436046838760376,
"step": 4297
},
{
"epoch": 2.7358370464672186,
"grad_norm": 20.292564735940314,
"learning_rate": 2.3553223714093298e-07,
"loss": 0.2613334655761719,
"step": 4298
},
{
"epoch": 2.7364735837046466,
"grad_norm": 12.793194491300461,
"learning_rate": 2.3441015313299732e-07,
"loss": 0.4410665035247803,
"step": 4299
},
{
"epoch": 2.737110120942075,
"grad_norm": 9.111984906586548,
"learning_rate": 2.3329068417413114e-07,
"loss": 0.3262574374675751,
"step": 4300
},
{
"epoch": 2.7377466581795034,
"grad_norm": 18.06494995847896,
"learning_rate": 2.32173830878627e-07,
"loss": 0.48437198996543884,
"step": 4301
},
{
"epoch": 2.7383831954169318,
"grad_norm": 9.779436167696465,
"learning_rate": 2.310595938593413e-07,
"loss": 0.19420596957206726,
"step": 4302
},
{
"epoch": 2.73901973265436,
"grad_norm": 6.850524200944048,
"learning_rate": 2.299479737276955e-07,
"loss": 0.2850743532180786,
"step": 4303
},
{
"epoch": 2.7396562698917886,
"grad_norm": 19.076556050860997,
"learning_rate": 2.2883897109367724e-07,
"loss": 0.6600810885429382,
"step": 4304
},
{
"epoch": 2.740292807129217,
"grad_norm": 13.227385928964791,
"learning_rate": 2.2773258656583307e-07,
"loss": 1.904581904411316,
"step": 4305
},
{
"epoch": 2.7409293443666454,
"grad_norm": 19.440778689480968,
"learning_rate": 2.2662882075127735e-07,
"loss": 0.40808939933776855,
"step": 4306
},
{
"epoch": 2.7415658816040738,
"grad_norm": 11.79851520394274,
"learning_rate": 2.25527674255685e-07,
"loss": 0.41511595249176025,
"step": 4307
},
{
"epoch": 2.742202418841502,
"grad_norm": 10.102280558043457,
"learning_rate": 2.2442914768329492e-07,
"loss": 0.38626226782798767,
"step": 4308
},
{
"epoch": 2.7428389560789306,
"grad_norm": 17.005846309084074,
"learning_rate": 2.2333324163690716e-07,
"loss": 1.135148286819458,
"step": 4309
},
{
"epoch": 2.743475493316359,
"grad_norm": 7.747987474427863,
"learning_rate": 2.2223995671788457e-07,
"loss": 0.502388060092926,
"step": 4310
},
{
"epoch": 2.7441120305537874,
"grad_norm": 14.17202773113162,
"learning_rate": 2.2114929352615277e-07,
"loss": 0.414657324552536,
"step": 4311
},
{
"epoch": 2.744748567791216,
"grad_norm": 10.539192399359145,
"learning_rate": 2.2006125266019528e-07,
"loss": 0.27732551097869873,
"step": 4312
},
{
"epoch": 2.745385105028644,
"grad_norm": 10.561237554141723,
"learning_rate": 2.189758347170612e-07,
"loss": 0.6421482563018799,
"step": 4313
},
{
"epoch": 2.7460216422660726,
"grad_norm": 11.135916872443241,
"learning_rate": 2.1789304029235624e-07,
"loss": 0.7741884589195251,
"step": 4314
},
{
"epoch": 2.746658179503501,
"grad_norm": 16.655163146149526,
"learning_rate": 2.1681286998024963e-07,
"loss": 0.4049973785877228,
"step": 4315
},
{
"epoch": 2.7472947167409294,
"grad_norm": 9.368393370456381,
"learning_rate": 2.1573532437346834e-07,
"loss": 0.36269906163215637,
"step": 4316
},
{
"epoch": 2.747931253978358,
"grad_norm": 14.5726052197661,
"learning_rate": 2.1466040406330102e-07,
"loss": 0.3802472949028015,
"step": 4317
},
{
"epoch": 2.748567791215786,
"grad_norm": 7.329278276781925,
"learning_rate": 2.135881096395942e-07,
"loss": 0.2172292321920395,
"step": 4318
},
{
"epoch": 2.7492043284532146,
"grad_norm": 7.354116971460457,
"learning_rate": 2.1251844169075442e-07,
"loss": 0.25726011395454407,
"step": 4319
},
{
"epoch": 2.749840865690643,
"grad_norm": 10.02657240608222,
"learning_rate": 2.1145140080374604e-07,
"loss": 0.7946832180023193,
"step": 4320
},
{
"epoch": 2.7504774029280714,
"grad_norm": 17.6762639881742,
"learning_rate": 2.1038698756409292e-07,
"loss": 0.5637105703353882,
"step": 4321
},
{
"epoch": 2.7511139401655,
"grad_norm": 10.778476221349027,
"learning_rate": 2.093252025558762e-07,
"loss": 0.5554542541503906,
"step": 4322
},
{
"epoch": 2.751750477402928,
"grad_norm": 12.393549129861261,
"learning_rate": 2.082660463617353e-07,
"loss": 0.4928439259529114,
"step": 4323
},
{
"epoch": 2.7523870146403566,
"grad_norm": 8.142573967633252,
"learning_rate": 2.0720951956286818e-07,
"loss": 0.1775183379650116,
"step": 4324
},
{
"epoch": 2.753023551877785,
"grad_norm": 10.425979217267406,
"learning_rate": 2.0615562273902602e-07,
"loss": 0.40366294980049133,
"step": 4325
},
{
"epoch": 2.753660089115213,
"grad_norm": 13.655468798230945,
"learning_rate": 2.0510435646852177e-07,
"loss": 0.5020191073417664,
"step": 4326
},
{
"epoch": 2.754296626352642,
"grad_norm": 10.402700144189954,
"learning_rate": 2.0405572132822239e-07,
"loss": 0.5099070072174072,
"step": 4327
},
{
"epoch": 2.75493316359007,
"grad_norm": 9.91157832192849,
"learning_rate": 2.0300971789355138e-07,
"loss": 0.4746452867984772,
"step": 4328
},
{
"epoch": 2.7555697008274986,
"grad_norm": 12.25888997540068,
"learning_rate": 2.0196634673848693e-07,
"loss": 0.312614381313324,
"step": 4329
},
{
"epoch": 2.7562062380649266,
"grad_norm": 15.773800201853582,
"learning_rate": 2.009256084355654e-07,
"loss": 0.5852431058883667,
"step": 4330
},
{
"epoch": 2.7568427753023554,
"grad_norm": 10.578932486634834,
"learning_rate": 1.9988750355587728e-07,
"loss": 0.374070405960083,
"step": 4331
},
{
"epoch": 2.7574793125397834,
"grad_norm": 10.559529194780191,
"learning_rate": 1.9885203266906627e-07,
"loss": 0.34271422028541565,
"step": 4332
},
{
"epoch": 2.758115849777212,
"grad_norm": 8.559469427860115,
"learning_rate": 1.9781919634333237e-07,
"loss": 0.5302675366401672,
"step": 4333
},
{
"epoch": 2.75875238701464,
"grad_norm": 9.92723889513059,
"learning_rate": 1.967889951454316e-07,
"loss": 0.4150368869304657,
"step": 4334
},
{
"epoch": 2.7593889242520686,
"grad_norm": 10.751289855859332,
"learning_rate": 1.9576142964066957e-07,
"loss": 0.17283464968204498,
"step": 4335
},
{
"epoch": 2.760025461489497,
"grad_norm": 8.631591277855032,
"learning_rate": 1.947365003929097e-07,
"loss": 0.3527897000312805,
"step": 4336
},
{
"epoch": 2.7606619987269254,
"grad_norm": 15.585220226368758,
"learning_rate": 1.9371420796456707e-07,
"loss": 0.49996358156204224,
"step": 4337
},
{
"epoch": 2.761298535964354,
"grad_norm": 9.14196026995527,
"learning_rate": 1.926945529166091e-07,
"loss": 0.8745436668395996,
"step": 4338
},
{
"epoch": 2.761935073201782,
"grad_norm": 19.59578373154723,
"learning_rate": 1.9167753580855763e-07,
"loss": 0.3348027467727661,
"step": 4339
},
{
"epoch": 2.7625716104392106,
"grad_norm": 12.28375878643904,
"learning_rate": 1.9066315719848682e-07,
"loss": 0.3094848096370697,
"step": 4340
},
{
"epoch": 2.763208147676639,
"grad_norm": 8.846129210433077,
"learning_rate": 1.8965141764302087e-07,
"loss": 0.29138296842575073,
"step": 4341
},
{
"epoch": 2.7638446849140674,
"grad_norm": 13.503871417296063,
"learning_rate": 1.886423176973373e-07,
"loss": 0.5916569232940674,
"step": 4342
},
{
"epoch": 2.764481222151496,
"grad_norm": 8.279942974037798,
"learning_rate": 1.8763585791516715e-07,
"loss": 0.338371217250824,
"step": 4343
},
{
"epoch": 2.7651177593889242,
"grad_norm": 7.552973688877967,
"learning_rate": 1.8663203884878967e-07,
"loss": 0.36693233251571655,
"step": 4344
},
{
"epoch": 2.7657542966263526,
"grad_norm": 12.881044660588703,
"learning_rate": 1.8563086104903537e-07,
"loss": 0.584098756313324,
"step": 4345
},
{
"epoch": 2.766390833863781,
"grad_norm": 8.476461186413237,
"learning_rate": 1.8463232506528705e-07,
"loss": 0.5080705285072327,
"step": 4346
},
{
"epoch": 2.7670273711012094,
"grad_norm": 8.899640042190644,
"learning_rate": 1.8363643144547805e-07,
"loss": 0.19185948371887207,
"step": 4347
},
{
"epoch": 2.767663908338638,
"grad_norm": 9.801897987444429,
"learning_rate": 1.8264318073608843e-07,
"loss": 0.6385388970375061,
"step": 4348
},
{
"epoch": 2.7683004455760662,
"grad_norm": 11.978459962825934,
"learning_rate": 1.816525734821517e-07,
"loss": 0.5047708749771118,
"step": 4349
},
{
"epoch": 2.7689369828134947,
"grad_norm": 12.717043613698428,
"learning_rate": 1.8066461022724967e-07,
"loss": 0.3372824192047119,
"step": 4350
},
{
"epoch": 2.769573520050923,
"grad_norm": 11.568553357345325,
"learning_rate": 1.7967929151351206e-07,
"loss": 0.39070644974708557,
"step": 4351
},
{
"epoch": 2.7702100572883515,
"grad_norm": 14.984865987869235,
"learning_rate": 1.7869661788161795e-07,
"loss": 0.3570117950439453,
"step": 4352
},
{
"epoch": 2.77084659452578,
"grad_norm": 11.783701810628614,
"learning_rate": 1.7771658987079722e-07,
"loss": 0.4199962019920349,
"step": 4353
},
{
"epoch": 2.7714831317632083,
"grad_norm": 7.5642975673352,
"learning_rate": 1.7673920801882517e-07,
"loss": 0.4335901737213135,
"step": 4354
},
{
"epoch": 2.7721196690006367,
"grad_norm": 15.375826355953931,
"learning_rate": 1.7576447286202615e-07,
"loss": 1.3275525569915771,
"step": 4355
},
{
"epoch": 2.772756206238065,
"grad_norm": 10.102903902947347,
"learning_rate": 1.7479238493527119e-07,
"loss": 0.7213789820671082,
"step": 4356
},
{
"epoch": 2.7733927434754935,
"grad_norm": 11.805411010215806,
"learning_rate": 1.7382294477198193e-07,
"loss": 0.4079378843307495,
"step": 4357
},
{
"epoch": 2.774029280712922,
"grad_norm": 10.400630313568238,
"learning_rate": 1.7285615290412227e-07,
"loss": 0.39478975534439087,
"step": 4358
},
{
"epoch": 2.77466581795035,
"grad_norm": 7.97993204598414,
"learning_rate": 1.7189200986220621e-07,
"loss": 0.3212743103504181,
"step": 4359
},
{
"epoch": 2.7753023551877787,
"grad_norm": 15.573333045289687,
"learning_rate": 1.709305161752939e-07,
"loss": 0.30908530950546265,
"step": 4360
},
{
"epoch": 2.7759388924252066,
"grad_norm": 14.914602988528788,
"learning_rate": 1.69971672370991e-07,
"loss": 0.7659950852394104,
"step": 4361
},
{
"epoch": 2.7765754296626355,
"grad_norm": 15.287999147369128,
"learning_rate": 1.690154789754489e-07,
"loss": 0.8660287857055664,
"step": 4362
},
{
"epoch": 2.7772119669000634,
"grad_norm": 14.065918614954981,
"learning_rate": 1.6806193651336567e-07,
"loss": 0.5355532765388489,
"step": 4363
},
{
"epoch": 2.7778485041374923,
"grad_norm": 10.71175139279742,
"learning_rate": 1.6711104550798329e-07,
"loss": 0.40091973543167114,
"step": 4364
},
{
"epoch": 2.7784850413749203,
"grad_norm": 15.497332132924607,
"learning_rate": 1.6616280648108995e-07,
"loss": 0.22542725503444672,
"step": 4365
},
{
"epoch": 2.779121578612349,
"grad_norm": 11.0783049415082,
"learning_rate": 1.652172199530183e-07,
"loss": 0.5950685739517212,
"step": 4366
},
{
"epoch": 2.779758115849777,
"grad_norm": 9.073428141803928,
"learning_rate": 1.6427428644264553e-07,
"loss": 0.5838128924369812,
"step": 4367
},
{
"epoch": 2.7803946530872055,
"grad_norm": 8.66730110289837,
"learning_rate": 1.6333400646739217e-07,
"loss": 0.4302659034729004,
"step": 4368
},
{
"epoch": 2.781031190324634,
"grad_norm": 16.11172081698985,
"learning_rate": 1.6239638054322382e-07,
"loss": 0.567771315574646,
"step": 4369
},
{
"epoch": 2.7816677275620623,
"grad_norm": 13.93911649098347,
"learning_rate": 1.6146140918465003e-07,
"loss": 0.4877224862575531,
"step": 4370
},
{
"epoch": 2.7823042647994907,
"grad_norm": 8.422358165698013,
"learning_rate": 1.605290929047215e-07,
"loss": 0.2488918900489807,
"step": 4371
},
{
"epoch": 2.782940802036919,
"grad_norm": 12.34100509614224,
"learning_rate": 1.5959943221503403e-07,
"loss": 0.38559985160827637,
"step": 4372
},
{
"epoch": 2.7835773392743475,
"grad_norm": 9.202840878020266,
"learning_rate": 1.586724276257262e-07,
"loss": 0.576676607131958,
"step": 4373
},
{
"epoch": 2.784213876511776,
"grad_norm": 31.45421870136579,
"learning_rate": 1.577480796454778e-07,
"loss": 1.7332954406738281,
"step": 4374
},
{
"epoch": 2.7848504137492043,
"grad_norm": 7.66025647326814,
"learning_rate": 1.5682638878151136e-07,
"loss": 0.5894250869750977,
"step": 4375
},
{
"epoch": 2.7854869509866327,
"grad_norm": 9.533588932793453,
"learning_rate": 1.559073555395918e-07,
"loss": 0.7092304229736328,
"step": 4376
},
{
"epoch": 2.786123488224061,
"grad_norm": 9.16700802908481,
"learning_rate": 1.5499098042402628e-07,
"loss": 0.32052963972091675,
"step": 4377
},
{
"epoch": 2.7867600254614895,
"grad_norm": 9.352889099815966,
"learning_rate": 1.5407726393766032e-07,
"loss": 0.25640130043029785,
"step": 4378
},
{
"epoch": 2.787396562698918,
"grad_norm": 8.143726963086726,
"learning_rate": 1.5316620658188452e-07,
"loss": 0.6448952555656433,
"step": 4379
},
{
"epoch": 2.7880330999363463,
"grad_norm": 5.935753301265953,
"learning_rate": 1.5225780885662843e-07,
"loss": 0.13304725289344788,
"step": 4380
},
{
"epoch": 2.7886696371737747,
"grad_norm": 12.077162673939155,
"learning_rate": 1.5135207126036166e-07,
"loss": 0.4364404082298279,
"step": 4381
},
{
"epoch": 2.789306174411203,
"grad_norm": 14.210781282286607,
"learning_rate": 1.5044899429009497e-07,
"loss": 0.2122376412153244,
"step": 4382
},
{
"epoch": 2.7899427116486315,
"grad_norm": 8.561175375474564,
"learning_rate": 1.495485784413797e-07,
"loss": 0.4473179578781128,
"step": 4383
},
{
"epoch": 2.79057924888606,
"grad_norm": 12.795212797822536,
"learning_rate": 1.48650824208304e-07,
"loss": 0.4171499013900757,
"step": 4384
},
{
"epoch": 2.7912157861234883,
"grad_norm": 10.259963311843471,
"learning_rate": 1.4775573208349992e-07,
"loss": 0.5803985595703125,
"step": 4385
},
{
"epoch": 2.7918523233609167,
"grad_norm": 12.184224660402068,
"learning_rate": 1.4686330255813507e-07,
"loss": 0.3893691301345825,
"step": 4386
},
{
"epoch": 2.792488860598345,
"grad_norm": 15.039144206017967,
"learning_rate": 1.4597353612191834e-07,
"loss": 0.35126081109046936,
"step": 4387
},
{
"epoch": 2.7931253978357735,
"grad_norm": 13.367470953116724,
"learning_rate": 1.450864332630947e-07,
"loss": 0.36777427792549133,
"step": 4388
},
{
"epoch": 2.793761935073202,
"grad_norm": 18.182113804637456,
"learning_rate": 1.4420199446845084e-07,
"loss": 0.2519111633300781,
"step": 4389
},
{
"epoch": 2.7943984723106303,
"grad_norm": 12.261095206471172,
"learning_rate": 1.4332022022330917e-07,
"loss": 0.22938407957553864,
"step": 4390
},
{
"epoch": 2.7950350095480587,
"grad_norm": 9.392276560302708,
"learning_rate": 1.4244111101153035e-07,
"loss": 0.43280625343322754,
"step": 4391
},
{
"epoch": 2.7956715467854867,
"grad_norm": 7.890062323305005,
"learning_rate": 1.4156466731551355e-07,
"loss": 0.36694902181625366,
"step": 4392
},
{
"epoch": 2.7963080840229155,
"grad_norm": 12.172257145089155,
"learning_rate": 1.4069088961619404e-07,
"loss": 0.31940770149230957,
"step": 4393
},
{
"epoch": 2.7969446212603435,
"grad_norm": 11.744985066780853,
"learning_rate": 1.398197783930455e-07,
"loss": 0.8017423152923584,
"step": 4394
},
{
"epoch": 2.7975811584977723,
"grad_norm": 6.8405376068039505,
"learning_rate": 1.3895133412407667e-07,
"loss": 0.2951199412345886,
"step": 4395
},
{
"epoch": 2.7982176957352003,
"grad_norm": 12.042007738455073,
"learning_rate": 1.3808555728583528e-07,
"loss": 0.6661902070045471,
"step": 4396
},
{
"epoch": 2.798854232972629,
"grad_norm": 12.779626795387536,
"learning_rate": 1.3722244835340292e-07,
"loss": 0.2595215141773224,
"step": 4397
},
{
"epoch": 2.799490770210057,
"grad_norm": 5.861522223792799,
"learning_rate": 1.363620078003991e-07,
"loss": 0.5347214937210083,
"step": 4398
},
{
"epoch": 2.800127307447486,
"grad_norm": 12.83984882710008,
"learning_rate": 1.3550423609897668e-07,
"loss": 0.6801286935806274,
"step": 4399
},
{
"epoch": 2.800763844684914,
"grad_norm": 10.006507456419525,
"learning_rate": 1.3464913371982745e-07,
"loss": 0.7041571140289307,
"step": 4400
},
{
"epoch": 2.8014003819223423,
"grad_norm": 15.455367002004685,
"learning_rate": 1.33796701132175e-07,
"loss": 0.529301643371582,
"step": 4401
},
{
"epoch": 2.8020369191597707,
"grad_norm": 9.709162767265719,
"learning_rate": 1.3294693880378074e-07,
"loss": 0.34043580293655396,
"step": 4402
},
{
"epoch": 2.802673456397199,
"grad_norm": 8.926447006722517,
"learning_rate": 1.3209984720093882e-07,
"loss": 0.30980032682418823,
"step": 4403
},
{
"epoch": 2.8033099936346275,
"grad_norm": 14.853184439285796,
"learning_rate": 1.3125542678847912e-07,
"loss": 0.7388812303543091,
"step": 4404
},
{
"epoch": 2.803946530872056,
"grad_norm": 7.8258107218619966,
"learning_rate": 1.3041367802976436e-07,
"loss": 0.4440646767616272,
"step": 4405
},
{
"epoch": 2.8045830681094843,
"grad_norm": 17.863623310914267,
"learning_rate": 1.295746013866933e-07,
"loss": 0.6827105283737183,
"step": 4406
},
{
"epoch": 2.8052196053469127,
"grad_norm": 12.058619449369857,
"learning_rate": 1.287381973196955e-07,
"loss": 0.32942473888397217,
"step": 4407
},
{
"epoch": 2.805856142584341,
"grad_norm": 9.213035106753438,
"learning_rate": 1.2790446628773768e-07,
"loss": 0.1439140886068344,
"step": 4408
},
{
"epoch": 2.8064926798217695,
"grad_norm": 14.420382113247083,
"learning_rate": 1.2707340874831664e-07,
"loss": 0.5104187726974487,
"step": 4409
},
{
"epoch": 2.807129217059198,
"grad_norm": 17.32293520025481,
"learning_rate": 1.2624502515746317e-07,
"loss": 0.26788467168807983,
"step": 4410
},
{
"epoch": 2.8077657542966263,
"grad_norm": 11.73405977666017,
"learning_rate": 1.25419315969742e-07,
"loss": 0.7914992570877075,
"step": 4411
},
{
"epoch": 2.8084022915340547,
"grad_norm": 9.259963725798652,
"learning_rate": 1.2459628163824732e-07,
"loss": 0.6886008381843567,
"step": 4412
},
{
"epoch": 2.809038828771483,
"grad_norm": 13.705961703248914,
"learning_rate": 1.2377592261460959e-07,
"loss": 0.6125488877296448,
"step": 4413
},
{
"epoch": 2.8096753660089115,
"grad_norm": 14.013193224784624,
"learning_rate": 1.2295823934898697e-07,
"loss": 1.1255991458892822,
"step": 4414
},
{
"epoch": 2.81031190324634,
"grad_norm": 14.636493982016376,
"learning_rate": 1.2214323229007284e-07,
"loss": 0.44004884362220764,
"step": 4415
},
{
"epoch": 2.8109484404837684,
"grad_norm": 13.468519102553497,
"learning_rate": 1.2133090188509056e-07,
"loss": 0.34507501125335693,
"step": 4416
},
{
"epoch": 2.8115849777211968,
"grad_norm": 12.549436518383526,
"learning_rate": 1.2052124857979408e-07,
"loss": 1.268148422241211,
"step": 4417
},
{
"epoch": 2.812221514958625,
"grad_norm": 9.770274877561667,
"learning_rate": 1.1971427281846913e-07,
"loss": 0.35538095235824585,
"step": 4418
},
{
"epoch": 2.8128580521960536,
"grad_norm": 15.965965459171379,
"learning_rate": 1.1890997504393265e-07,
"loss": 0.6932916045188904,
"step": 4419
},
{
"epoch": 2.813494589433482,
"grad_norm": 12.382519919401737,
"learning_rate": 1.1810835569753099e-07,
"loss": 0.6585299968719482,
"step": 4420
},
{
"epoch": 2.8141311266709104,
"grad_norm": 13.007575538585334,
"learning_rate": 1.1730941521914175e-07,
"loss": 0.4269363284111023,
"step": 4421
},
{
"epoch": 2.8147676639083388,
"grad_norm": 12.545204648672675,
"learning_rate": 1.1651315404717145e-07,
"loss": 0.41912248730659485,
"step": 4422
},
{
"epoch": 2.815404201145767,
"grad_norm": 13.506300554886902,
"learning_rate": 1.1571957261855837e-07,
"loss": 0.49605053663253784,
"step": 4423
},
{
"epoch": 2.8160407383831956,
"grad_norm": 12.247286636282382,
"learning_rate": 1.1492867136876696e-07,
"loss": 0.8589668869972229,
"step": 4424
},
{
"epoch": 2.8166772756206235,
"grad_norm": 9.49353796946391,
"learning_rate": 1.1414045073179392e-07,
"loss": 0.2600698471069336,
"step": 4425
},
{
"epoch": 2.8173138128580524,
"grad_norm": 13.340490605059793,
"learning_rate": 1.133549111401644e-07,
"loss": 0.2603069543838501,
"step": 4426
},
{
"epoch": 2.8179503500954803,
"grad_norm": 8.527047807044804,
"learning_rate": 1.1257205302493135e-07,
"loss": 0.6665278673171997,
"step": 4427
},
{
"epoch": 2.818586887332909,
"grad_norm": 5.083032832097961,
"learning_rate": 1.1179187681567671e-07,
"loss": 0.1172168105840683,
"step": 4428
},
{
"epoch": 2.819223424570337,
"grad_norm": 13.398556136651946,
"learning_rate": 1.1101438294051193e-07,
"loss": 0.22658070921897888,
"step": 4429
},
{
"epoch": 2.819859961807766,
"grad_norm": 12.701150729171005,
"learning_rate": 1.1023957182607515e-07,
"loss": 0.651612401008606,
"step": 4430
},
{
"epoch": 2.820496499045194,
"grad_norm": 7.146573089855455,
"learning_rate": 1.0946744389753239e-07,
"loss": 0.31838878989219666,
"step": 4431
},
{
"epoch": 2.821133036282623,
"grad_norm": 8.654357939781463,
"learning_rate": 1.0869799957857808e-07,
"loss": 0.2821580171585083,
"step": 4432
},
{
"epoch": 2.8217695735200508,
"grad_norm": 10.506211168917828,
"learning_rate": 1.0793123929143445e-07,
"loss": 0.6332144737243652,
"step": 4433
},
{
"epoch": 2.822406110757479,
"grad_norm": 14.490206968208446,
"learning_rate": 1.0716716345684941e-07,
"loss": 0.5318081378936768,
"step": 4434
},
{
"epoch": 2.8230426479949076,
"grad_norm": 9.92884904499283,
"learning_rate": 1.0640577249409811e-07,
"loss": 0.34390687942504883,
"step": 4435
},
{
"epoch": 2.823679185232336,
"grad_norm": 14.743175828458485,
"learning_rate": 1.0564706682098525e-07,
"loss": 0.3705610930919647,
"step": 4436
},
{
"epoch": 2.8243157224697644,
"grad_norm": 14.384452616369366,
"learning_rate": 1.0489104685383666e-07,
"loss": 0.3779531717300415,
"step": 4437
},
{
"epoch": 2.8249522597071928,
"grad_norm": 9.363966887729818,
"learning_rate": 1.0413771300750942e-07,
"loss": 0.38987523317337036,
"step": 4438
},
{
"epoch": 2.825588796944621,
"grad_norm": 7.020404932998415,
"learning_rate": 1.0338706569538392e-07,
"loss": 0.18849973380565643,
"step": 4439
},
{
"epoch": 2.8262253341820496,
"grad_norm": 12.0655847741128,
"learning_rate": 1.0263910532936738e-07,
"loss": 0.3690700829029083,
"step": 4440
},
{
"epoch": 2.826861871419478,
"grad_norm": 9.57987101996461,
"learning_rate": 1.0189383231989202e-07,
"loss": 0.4701506495475769,
"step": 4441
},
{
"epoch": 2.8274984086569064,
"grad_norm": 11.950188096317811,
"learning_rate": 1.0115124707591628e-07,
"loss": 0.291561484336853,
"step": 4442
},
{
"epoch": 2.828134945894335,
"grad_norm": 13.548603120880893,
"learning_rate": 1.0041135000492309e-07,
"loss": 0.5485737323760986,
"step": 4443
},
{
"epoch": 2.828771483131763,
"grad_norm": 7.931899406269338,
"learning_rate": 9.967414151291932e-08,
"loss": 0.3453434109687805,
"step": 4444
},
{
"epoch": 2.8294080203691916,
"grad_norm": 15.473453832173739,
"learning_rate": 9.893962200443863e-08,
"loss": 0.4912480115890503,
"step": 4445
},
{
"epoch": 2.83004455760662,
"grad_norm": 5.9488169805606,
"learning_rate": 9.820779188253748e-08,
"loss": 0.09553024172782898,
"step": 4446
},
{
"epoch": 2.8306810948440484,
"grad_norm": 13.764654400798912,
"learning_rate": 9.747865154879798e-08,
"loss": 0.5168312191963196,
"step": 4447
},
{
"epoch": 2.831317632081477,
"grad_norm": 15.331341215166258,
"learning_rate": 9.675220140332452e-08,
"loss": 0.35934263467788696,
"step": 4448
},
{
"epoch": 2.831954169318905,
"grad_norm": 7.198756293350672,
"learning_rate": 9.602844184474714e-08,
"loss": 0.15390601754188538,
"step": 4449
},
{
"epoch": 2.8325907065563336,
"grad_norm": 8.836555196785069,
"learning_rate": 9.5307373270217e-08,
"loss": 0.20732107758522034,
"step": 4450
},
{
"epoch": 2.833227243793762,
"grad_norm": 11.160982633499165,
"learning_rate": 9.45889960754115e-08,
"loss": 0.5410911440849304,
"step": 4451
},
{
"epoch": 2.8338637810311904,
"grad_norm": 12.726741562345024,
"learning_rate": 9.387331065452921e-08,
"loss": 0.9187192916870117,
"step": 4452
},
{
"epoch": 2.834500318268619,
"grad_norm": 9.604486032470266,
"learning_rate": 9.316031740029264e-08,
"loss": 0.40101706981658936,
"step": 4453
},
{
"epoch": 2.835136855506047,
"grad_norm": 5.062763001665208,
"learning_rate": 9.245001670394604e-08,
"loss": 0.2110881507396698,
"step": 4454
},
{
"epoch": 2.8357733927434756,
"grad_norm": 16.827513675147042,
"learning_rate": 9.17424089552571e-08,
"loss": 0.7232325077056885,
"step": 4455
},
{
"epoch": 2.836409929980904,
"grad_norm": 17.956751990784042,
"learning_rate": 9.103749454251576e-08,
"loss": 0.5139992237091064,
"step": 4456
},
{
"epoch": 2.8370464672183324,
"grad_norm": 15.380880382871782,
"learning_rate": 9.033527385253316e-08,
"loss": 0.4737415909767151,
"step": 4457
},
{
"epoch": 2.837683004455761,
"grad_norm": 11.139858245851308,
"learning_rate": 8.963574727064273e-08,
"loss": 0.22228547930717468,
"step": 4458
},
{
"epoch": 2.8383195416931892,
"grad_norm": 11.395808275607928,
"learning_rate": 8.893891518070075e-08,
"loss": 0.21647092700004578,
"step": 4459
},
{
"epoch": 2.838956078930617,
"grad_norm": 11.593829700153915,
"learning_rate": 8.824477796508247e-08,
"loss": 0.18228834867477417,
"step": 4460
},
{
"epoch": 2.839592616168046,
"grad_norm": 8.503341670307945,
"learning_rate": 8.755333600468596e-08,
"loss": 0.5319182276725769,
"step": 4461
},
{
"epoch": 2.840229153405474,
"grad_norm": 8.25358143487463,
"learning_rate": 8.686458967893052e-08,
"loss": 0.3258248269557953,
"step": 4462
},
{
"epoch": 2.840865690642903,
"grad_norm": 22.093221809127197,
"learning_rate": 8.617853936575604e-08,
"loss": 1.1194725036621094,
"step": 4463
},
{
"epoch": 2.841502227880331,
"grad_norm": 8.837268868104667,
"learning_rate": 8.549518544162194e-08,
"loss": 0.28002914786338806,
"step": 4464
},
{
"epoch": 2.8421387651177596,
"grad_norm": 17.381718385312954,
"learning_rate": 8.481452828150938e-08,
"loss": 0.8821957111358643,
"step": 4465
},
{
"epoch": 2.8427753023551876,
"grad_norm": 10.12248178665113,
"learning_rate": 8.413656825891958e-08,
"loss": 0.6032583713531494,
"step": 4466
},
{
"epoch": 2.843411839592616,
"grad_norm": 7.600963808353676,
"learning_rate": 8.346130574587275e-08,
"loss": 0.40028008818626404,
"step": 4467
},
{
"epoch": 2.8440483768300444,
"grad_norm": 11.117287975623862,
"learning_rate": 8.278874111290969e-08,
"loss": 0.5357126593589783,
"step": 4468
},
{
"epoch": 2.844684914067473,
"grad_norm": 10.871594098621824,
"learning_rate": 8.211887472909074e-08,
"loss": 0.32229459285736084,
"step": 4469
},
{
"epoch": 2.845321451304901,
"grad_norm": 9.347649770819665,
"learning_rate": 8.145170696199578e-08,
"loss": 0.3124019503593445,
"step": 4470
},
{
"epoch": 2.8459579885423296,
"grad_norm": 9.641620728344396,
"learning_rate": 8.078723817772249e-08,
"loss": 0.3217499256134033,
"step": 4471
},
{
"epoch": 2.846594525779758,
"grad_norm": 7.362273253859314,
"learning_rate": 8.012546874088978e-08,
"loss": 0.610997200012207,
"step": 4472
},
{
"epoch": 2.8472310630171864,
"grad_norm": 8.47987539393995,
"learning_rate": 7.946639901463272e-08,
"loss": 0.2376602590084076,
"step": 4473
},
{
"epoch": 2.847867600254615,
"grad_norm": 7.9456884869410445,
"learning_rate": 7.88100293606081e-08,
"loss": 0.3960516154766083,
"step": 4474
},
{
"epoch": 2.8485041374920432,
"grad_norm": 9.140345764203408,
"learning_rate": 7.815636013898842e-08,
"loss": 0.1851504147052765,
"step": 4475
},
{
"epoch": 2.8491406747294716,
"grad_norm": 14.128717881268285,
"learning_rate": 7.750539170846505e-08,
"loss": 0.48731547594070435,
"step": 4476
},
{
"epoch": 2.8497772119669,
"grad_norm": 17.627662686809067,
"learning_rate": 7.685712442624838e-08,
"loss": 0.45498213171958923,
"step": 4477
},
{
"epoch": 2.8504137492043284,
"grad_norm": 14.841619557061877,
"learning_rate": 7.621155864806495e-08,
"loss": 0.368963360786438,
"step": 4478
},
{
"epoch": 2.851050286441757,
"grad_norm": 9.3619052997257,
"learning_rate": 7.556869472816031e-08,
"loss": 0.26591670513153076,
"step": 4479
},
{
"epoch": 2.8516868236791852,
"grad_norm": 14.375253108276187,
"learning_rate": 7.492853301929615e-08,
"loss": 1.0872457027435303,
"step": 4480
},
{
"epoch": 2.8523233609166136,
"grad_norm": 12.391381272569898,
"learning_rate": 7.429107387275259e-08,
"loss": 0.38050276041030884,
"step": 4481
},
{
"epoch": 2.852959898154042,
"grad_norm": 9.970198127704585,
"learning_rate": 7.365631763832648e-08,
"loss": 0.236327663064003,
"step": 4482
},
{
"epoch": 2.8535964353914705,
"grad_norm": 10.51881185040378,
"learning_rate": 7.302426466433032e-08,
"loss": 0.4160456955432892,
"step": 4483
},
{
"epoch": 2.854232972628899,
"grad_norm": 18.06197119086671,
"learning_rate": 7.239491529759445e-08,
"loss": 0.6310904026031494,
"step": 4484
},
{
"epoch": 2.8548695098663273,
"grad_norm": 9.489537695208638,
"learning_rate": 7.176826988346597e-08,
"loss": 0.6764538884162903,
"step": 4485
},
{
"epoch": 2.8555060471037557,
"grad_norm": 15.862886500570081,
"learning_rate": 7.114432876580646e-08,
"loss": 0.3962682783603668,
"step": 4486
},
{
"epoch": 2.856142584341184,
"grad_norm": 7.742437758829943,
"learning_rate": 7.052309228699594e-08,
"loss": 0.5590054392814636,
"step": 4487
},
{
"epoch": 2.8567791215786125,
"grad_norm": 9.470130875072385,
"learning_rate": 6.990456078792729e-08,
"loss": 0.3280986547470093,
"step": 4488
},
{
"epoch": 2.857415658816041,
"grad_norm": 10.316280864701609,
"learning_rate": 6.928873460801288e-08,
"loss": 0.454135000705719,
"step": 4489
},
{
"epoch": 2.8580521960534693,
"grad_norm": 15.569255253727679,
"learning_rate": 6.867561408517631e-08,
"loss": 0.5000993609428406,
"step": 4490
},
{
"epoch": 2.8586887332908977,
"grad_norm": 11.534845057933433,
"learning_rate": 6.806519955586066e-08,
"loss": 0.2902962863445282,
"step": 4491
},
{
"epoch": 2.859325270528326,
"grad_norm": 8.14551359453101,
"learning_rate": 6.745749135502077e-08,
"loss": 0.2559558153152466,
"step": 4492
},
{
"epoch": 2.859961807765754,
"grad_norm": 8.109206483058342,
"learning_rate": 6.68524898161288e-08,
"loss": 0.16547997295856476,
"step": 4493
},
{
"epoch": 2.860598345003183,
"grad_norm": 12.540330946147781,
"learning_rate": 6.625019527116972e-08,
"loss": 0.2976877987384796,
"step": 4494
},
{
"epoch": 2.861234882240611,
"grad_norm": 9.735846164542485,
"learning_rate": 6.565060805064471e-08,
"loss": 0.9716310501098633,
"step": 4495
},
{
"epoch": 2.8618714194780397,
"grad_norm": 27.405946650731224,
"learning_rate": 6.505372848356838e-08,
"loss": 1.2291693687438965,
"step": 4496
},
{
"epoch": 2.8625079567154676,
"grad_norm": 10.291637457435106,
"learning_rate": 6.44595568974693e-08,
"loss": 0.6201648712158203,
"step": 4497
},
{
"epoch": 2.8631444939528965,
"grad_norm": 16.41300650112146,
"learning_rate": 6.386809361839163e-08,
"loss": 1.1137560606002808,
"step": 4498
},
{
"epoch": 2.8637810311903245,
"grad_norm": 9.763512408580295,
"learning_rate": 6.327933897089245e-08,
"loss": 0.46922871470451355,
"step": 4499
},
{
"epoch": 2.864417568427753,
"grad_norm": 10.325504020447568,
"learning_rate": 6.269329327804107e-08,
"loss": 0.49104800820350647,
"step": 4500
},
{
"epoch": 2.8650541056651813,
"grad_norm": 8.177469348115752,
"learning_rate": 6.210995686142252e-08,
"loss": 0.5692614912986755,
"step": 4501
},
{
"epoch": 2.8656906429026097,
"grad_norm": 9.886226559193878,
"learning_rate": 6.15293300411346e-08,
"loss": 0.20902472734451294,
"step": 4502
},
{
"epoch": 2.866327180140038,
"grad_norm": 13.619568394311024,
"learning_rate": 6.09514131357869e-08,
"loss": 0.40585821866989136,
"step": 4503
},
{
"epoch": 2.8669637173774665,
"grad_norm": 10.789705295549705,
"learning_rate": 6.037620646250464e-08,
"loss": 0.6018178462982178,
"step": 4504
},
{
"epoch": 2.867600254614895,
"grad_norm": 14.345999572237337,
"learning_rate": 5.980371033692257e-08,
"loss": 0.670649528503418,
"step": 4505
},
{
"epoch": 2.8682367918523233,
"grad_norm": 9.990127169021587,
"learning_rate": 5.923392507319104e-08,
"loss": 0.17083537578582764,
"step": 4506
},
{
"epoch": 2.8688733290897517,
"grad_norm": 9.976647046836414,
"learning_rate": 5.866685098396996e-08,
"loss": 0.3060707151889801,
"step": 4507
},
{
"epoch": 2.86950986632718,
"grad_norm": 11.948991226308062,
"learning_rate": 5.8102488380434866e-08,
"loss": 0.6045752167701721,
"step": 4508
},
{
"epoch": 2.8701464035646085,
"grad_norm": 9.162474701421623,
"learning_rate": 5.75408375722708e-08,
"loss": 0.8474628329277039,
"step": 4509
},
{
"epoch": 2.870782940802037,
"grad_norm": 12.94460002971908,
"learning_rate": 5.698189886767569e-08,
"loss": 0.4504960775375366,
"step": 4510
},
{
"epoch": 2.8714194780394653,
"grad_norm": 12.159960350468108,
"learning_rate": 5.642567257335918e-08,
"loss": 0.7247211337089539,
"step": 4511
},
{
"epoch": 2.8720560152768937,
"grad_norm": 12.917950196781288,
"learning_rate": 5.587215899454268e-08,
"loss": 0.6724990606307983,
"step": 4512
},
{
"epoch": 2.872692552514322,
"grad_norm": 13.821904656241426,
"learning_rate": 5.532135843495767e-08,
"loss": 0.488506019115448,
"step": 4513
},
{
"epoch": 2.8733290897517505,
"grad_norm": 9.892853582240695,
"learning_rate": 5.477327119684961e-08,
"loss": 0.6169945597648621,
"step": 4514
},
{
"epoch": 2.873965626989179,
"grad_norm": 10.54057102665935,
"learning_rate": 5.422789758097291e-08,
"loss": 0.588119626045227,
"step": 4515
},
{
"epoch": 2.8746021642266073,
"grad_norm": 10.328070081405057,
"learning_rate": 5.3685237886592635e-08,
"loss": 0.5582101345062256,
"step": 4516
},
{
"epoch": 2.8752387014640357,
"grad_norm": 7.769263307122998,
"learning_rate": 5.314529241148669e-08,
"loss": 0.32208502292633057,
"step": 4517
},
{
"epoch": 2.875875238701464,
"grad_norm": 8.19608496412307,
"learning_rate": 5.260806145194253e-08,
"loss": 0.3067604899406433,
"step": 4518
},
{
"epoch": 2.8765117759388925,
"grad_norm": 10.461016766778824,
"learning_rate": 5.20735453027571e-08,
"loss": 0.19890695810317993,
"step": 4519
},
{
"epoch": 2.877148313176321,
"grad_norm": 15.077438205436966,
"learning_rate": 5.1541744257238014e-08,
"loss": 0.7313632965087891,
"step": 4520
},
{
"epoch": 2.8777848504137493,
"grad_norm": 13.788361009118347,
"learning_rate": 5.101265860720517e-08,
"loss": 0.5750613808631897,
"step": 4521
},
{
"epoch": 2.8784213876511777,
"grad_norm": 8.771392030295573,
"learning_rate": 5.048628864298577e-08,
"loss": 0.5299889445304871,
"step": 4522
},
{
"epoch": 2.879057924888606,
"grad_norm": 13.156372264193747,
"learning_rate": 4.9962634653417664e-08,
"loss": 0.8597928881645203,
"step": 4523
},
{
"epoch": 2.8796944621260345,
"grad_norm": 6.407389889901951,
"learning_rate": 4.9441696925848214e-08,
"loss": 0.206428661942482,
"step": 4524
},
{
"epoch": 2.880330999363463,
"grad_norm": 9.55087603823779,
"learning_rate": 4.892347574613599e-08,
"loss": 0.26703721284866333,
"step": 4525
},
{
"epoch": 2.880967536600891,
"grad_norm": 12.118321419929345,
"learning_rate": 4.840797139864517e-08,
"loss": 0.34286391735076904,
"step": 4526
},
{
"epoch": 2.8816040738383197,
"grad_norm": 10.712130402413909,
"learning_rate": 4.789518416625338e-08,
"loss": 0.27562084794044495,
"step": 4527
},
{
"epoch": 2.8822406110757477,
"grad_norm": 9.872198012445521,
"learning_rate": 4.738511433034443e-08,
"loss": 0.6138008832931519,
"step": 4528
},
{
"epoch": 2.8828771483131765,
"grad_norm": 13.11905700699076,
"learning_rate": 4.6877762170811633e-08,
"loss": 0.45805659890174866,
"step": 4529
},
{
"epoch": 2.8835136855506045,
"grad_norm": 7.1092825683274405,
"learning_rate": 4.6373127966057863e-08,
"loss": 0.34670984745025635,
"step": 4530
},
{
"epoch": 2.8841502227880333,
"grad_norm": 10.35525817290448,
"learning_rate": 4.587121199299272e-08,
"loss": 0.9458390474319458,
"step": 4531
},
{
"epoch": 2.8847867600254613,
"grad_norm": 8.325769927238404,
"learning_rate": 4.537201452703699e-08,
"loss": 0.35492727160453796,
"step": 4532
},
{
"epoch": 2.88542329726289,
"grad_norm": 12.974471443984363,
"learning_rate": 4.487553584211657e-08,
"loss": 0.5512891411781311,
"step": 4533
},
{
"epoch": 2.886059834500318,
"grad_norm": 11.181319485427325,
"learning_rate": 4.438177621066797e-08,
"loss": 0.3274085223674774,
"step": 4534
},
{
"epoch": 2.8866963717377465,
"grad_norm": 13.108270074939265,
"learning_rate": 4.3890735903634465e-08,
"loss": 0.23640866577625275,
"step": 4535
},
{
"epoch": 2.887332908975175,
"grad_norm": 8.616832133111684,
"learning_rate": 4.3402415190467174e-08,
"loss": 0.19059917330741882,
"step": 4536
},
{
"epoch": 2.8879694462126033,
"grad_norm": 10.438912659103528,
"learning_rate": 4.291681433912509e-08,
"loss": 0.7314203977584839,
"step": 4537
},
{
"epoch": 2.8886059834500317,
"grad_norm": 7.2983698418945915,
"learning_rate": 4.243393361607562e-08,
"loss": 0.4773388206958771,
"step": 4538
},
{
"epoch": 2.88924252068746,
"grad_norm": 11.003429159610093,
"learning_rate": 4.1953773286291243e-08,
"loss": 0.512122631072998,
"step": 4539
},
{
"epoch": 2.8898790579248885,
"grad_norm": 7.072640879091521,
"learning_rate": 4.147633361325454e-08,
"loss": 0.7262274622917175,
"step": 4540
},
{
"epoch": 2.890515595162317,
"grad_norm": 28.306193355621552,
"learning_rate": 4.1001614858952046e-08,
"loss": 0.5705623030662537,
"step": 4541
},
{
"epoch": 2.8911521323997453,
"grad_norm": 12.629888537952215,
"learning_rate": 4.0529617283880386e-08,
"loss": 0.26210007071495056,
"step": 4542
},
{
"epoch": 2.8917886696371737,
"grad_norm": 11.907186782578643,
"learning_rate": 4.006034114704016e-08,
"loss": 0.387427419424057,
"step": 4543
},
{
"epoch": 2.892425206874602,
"grad_norm": 13.79877895212065,
"learning_rate": 3.959378670594094e-08,
"loss": 0.3232886493206024,
"step": 4544
},
{
"epoch": 2.8930617441120305,
"grad_norm": 9.935636510229395,
"learning_rate": 3.9129954216597375e-08,
"loss": 0.34298163652420044,
"step": 4545
},
{
"epoch": 2.893698281349459,
"grad_norm": 13.409272917447078,
"learning_rate": 3.866884393353087e-08,
"loss": 1.2276716232299805,
"step": 4546
},
{
"epoch": 2.8943348185868873,
"grad_norm": 9.455407692868997,
"learning_rate": 3.8210456109769034e-08,
"loss": 1.2836456298828125,
"step": 4547
},
{
"epoch": 2.8949713558243158,
"grad_norm": 12.842722851236312,
"learning_rate": 3.775479099684565e-08,
"loss": 0.4995691776275635,
"step": 4548
},
{
"epoch": 2.895607893061744,
"grad_norm": 13.569094031140875,
"learning_rate": 3.7301848844800145e-08,
"loss": 0.3900608718395233,
"step": 4549
},
{
"epoch": 2.8962444302991726,
"grad_norm": 12.442618503205223,
"learning_rate": 3.685162990217816e-08,
"loss": 0.6684539914131165,
"step": 4550
},
{
"epoch": 2.896880967536601,
"grad_norm": 7.420570756299823,
"learning_rate": 3.6404134416031524e-08,
"loss": 0.23755618929862976,
"step": 4551
},
{
"epoch": 2.8975175047740294,
"grad_norm": 22.238530919928944,
"learning_rate": 3.595936263191657e-08,
"loss": 0.41127052903175354,
"step": 4552
},
{
"epoch": 2.8981540420114578,
"grad_norm": 8.114414533211912,
"learning_rate": 3.551731479389531e-08,
"loss": 0.2637931704521179,
"step": 4553
},
{
"epoch": 2.898790579248886,
"grad_norm": 13.86090703061008,
"learning_rate": 3.507799114453481e-08,
"loss": 0.6877958178520203,
"step": 4554
},
{
"epoch": 2.8994271164863146,
"grad_norm": 10.643344179517298,
"learning_rate": 3.4641391924908344e-08,
"loss": 0.4997013509273529,
"step": 4555
},
{
"epoch": 2.900063653723743,
"grad_norm": 8.078577324109752,
"learning_rate": 3.420751737459316e-08,
"loss": 0.408937931060791,
"step": 4556
},
{
"epoch": 2.9007001909611714,
"grad_norm": 11.094398976129265,
"learning_rate": 3.377636773167159e-08,
"loss": 0.5586139559745789,
"step": 4557
},
{
"epoch": 2.9013367281986,
"grad_norm": 9.230475613217141,
"learning_rate": 3.3347943232730495e-08,
"loss": 0.2931652069091797,
"step": 4558
},
{
"epoch": 2.9019732654360277,
"grad_norm": 20.033575353557268,
"learning_rate": 3.2922244112862376e-08,
"loss": 1.0695240497589111,
"step": 4559
},
{
"epoch": 2.9026098026734566,
"grad_norm": 10.139050366427407,
"learning_rate": 3.249927060566316e-08,
"loss": 0.9362115859985352,
"step": 4560
},
{
"epoch": 2.9032463399108845,
"grad_norm": 9.012167247191057,
"learning_rate": 3.207902294323384e-08,
"loss": 0.37736958265304565,
"step": 4561
},
{
"epoch": 2.9038828771483134,
"grad_norm": 11.550500708163534,
"learning_rate": 3.166150135617885e-08,
"loss": 0.39598530530929565,
"step": 4562
},
{
"epoch": 2.9045194143857413,
"grad_norm": 12.595159471499244,
"learning_rate": 3.124670607360714e-08,
"loss": 0.6241990327835083,
"step": 4563
},
{
"epoch": 2.90515595162317,
"grad_norm": 13.267944266639695,
"learning_rate": 3.083463732313163e-08,
"loss": 0.1851673275232315,
"step": 4564
},
{
"epoch": 2.905792488860598,
"grad_norm": 10.521859273531112,
"learning_rate": 3.04252953308698e-08,
"loss": 0.580502986907959,
"step": 4565
},
{
"epoch": 2.906429026098027,
"grad_norm": 16.216781262611818,
"learning_rate": 3.0018680321441396e-08,
"loss": 0.3015652596950531,
"step": 4566
},
{
"epoch": 2.907065563335455,
"grad_norm": 15.73240355724326,
"learning_rate": 2.9614792517970726e-08,
"loss": 0.6678402423858643,
"step": 4567
},
{
"epoch": 2.9077021005728834,
"grad_norm": 9.54454044413332,
"learning_rate": 2.9213632142086612e-08,
"loss": 0.4288226366043091,
"step": 4568
},
{
"epoch": 2.9083386378103118,
"grad_norm": 8.183428493587117,
"learning_rate": 2.881519941391797e-08,
"loss": 0.27657654881477356,
"step": 4569
},
{
"epoch": 2.90897517504774,
"grad_norm": 12.805530169715631,
"learning_rate": 2.8419494552101025e-08,
"loss": 1.0283787250518799,
"step": 4570
},
{
"epoch": 2.9096117122851686,
"grad_norm": 9.02253089877856,
"learning_rate": 2.802651777377152e-08,
"loss": 0.5577377676963806,
"step": 4571
},
{
"epoch": 2.910248249522597,
"grad_norm": 13.027508164723109,
"learning_rate": 2.763626929457086e-08,
"loss": 0.5071287155151367,
"step": 4572
},
{
"epoch": 2.9108847867600254,
"grad_norm": 11.013042426784692,
"learning_rate": 2.7248749328642187e-08,
"loss": 0.2327755093574524,
"step": 4573
},
{
"epoch": 2.911521323997454,
"grad_norm": 8.90380140679578,
"learning_rate": 2.6863958088630958e-08,
"loss": 0.57288658618927,
"step": 4574
},
{
"epoch": 2.912157861234882,
"grad_norm": 7.094931858902456,
"learning_rate": 2.6481895785686607e-08,
"loss": 0.1919298768043518,
"step": 4575
},
{
"epoch": 2.9127943984723106,
"grad_norm": 12.198952033207512,
"learning_rate": 2.610256262945976e-08,
"loss": 0.35462987422943115,
"step": 4576
},
{
"epoch": 2.913430935709739,
"grad_norm": 12.269093207476155,
"learning_rate": 2.572595882810447e-08,
"loss": 0.36865872144699097,
"step": 4577
},
{
"epoch": 2.9140674729471674,
"grad_norm": 10.972251642161854,
"learning_rate": 2.5352084588275984e-08,
"loss": 0.7054241895675659,
"step": 4578
},
{
"epoch": 2.914704010184596,
"grad_norm": 12.250179674659089,
"learning_rate": 2.4980940115133524e-08,
"loss": 0.7186670303344727,
"step": 4579
},
{
"epoch": 2.915340547422024,
"grad_norm": 9.284270543081451,
"learning_rate": 2.46125256123364e-08,
"loss": 0.6131752133369446,
"step": 4580
},
{
"epoch": 2.9159770846594526,
"grad_norm": 9.888657702691015,
"learning_rate": 2.4246841282046795e-08,
"loss": 0.3839394450187683,
"step": 4581
},
{
"epoch": 2.916613621896881,
"grad_norm": 14.768578185654794,
"learning_rate": 2.3883887324929188e-08,
"loss": 0.7487176060676575,
"step": 4582
},
{
"epoch": 2.9172501591343094,
"grad_norm": 9.897920758430246,
"learning_rate": 2.3523663940149266e-08,
"loss": 0.35754120349884033,
"step": 4583
},
{
"epoch": 2.917886696371738,
"grad_norm": 11.68323652294888,
"learning_rate": 2.3166171325374464e-08,
"loss": 0.6378008723258972,
"step": 4584
},
{
"epoch": 2.918523233609166,
"grad_norm": 9.558492839347917,
"learning_rate": 2.2811409676773422e-08,
"loss": 0.4749261438846588,
"step": 4585
},
{
"epoch": 2.9191597708465946,
"grad_norm": 7.966784517798446,
"learning_rate": 2.2459379189016528e-08,
"loss": 0.3044714033603668,
"step": 4586
},
{
"epoch": 2.919796308084023,
"grad_norm": 16.523261646513284,
"learning_rate": 2.2110080055275375e-08,
"loss": 0.32255470752716064,
"step": 4587
},
{
"epoch": 2.9204328453214514,
"grad_norm": 10.563321431499633,
"learning_rate": 2.176351246722386e-08,
"loss": 0.6167469024658203,
"step": 4588
},
{
"epoch": 2.92106938255888,
"grad_norm": 11.348854001473573,
"learning_rate": 2.1419676615034857e-08,
"loss": 0.3167179822921753,
"step": 4589
},
{
"epoch": 2.9217059197963082,
"grad_norm": 12.836393825008408,
"learning_rate": 2.1078572687383558e-08,
"loss": 1.1272660493850708,
"step": 4590
},
{
"epoch": 2.9223424570337366,
"grad_norm": 8.765601042318105,
"learning_rate": 2.074020087144635e-08,
"loss": 0.2598417401313782,
"step": 4591
},
{
"epoch": 2.9229789942711646,
"grad_norm": 9.30002751072817,
"learning_rate": 2.0404561352899144e-08,
"loss": 0.5057159066200256,
"step": 4592
},
{
"epoch": 2.9236155315085934,
"grad_norm": 6.063826370602459,
"learning_rate": 2.0071654315920176e-08,
"loss": 0.5362277626991272,
"step": 4593
},
{
"epoch": 2.9242520687460214,
"grad_norm": 9.056439821206423,
"learning_rate": 1.9741479943186647e-08,
"loss": 0.6873083710670471,
"step": 4594
},
{
"epoch": 2.9248886059834502,
"grad_norm": 14.735859378043989,
"learning_rate": 1.9414038415877523e-08,
"loss": 0.3906959295272827,
"step": 4595
},
{
"epoch": 2.925525143220878,
"grad_norm": 10.403426445842749,
"learning_rate": 1.908932991367185e-08,
"loss": 0.5455116033554077,
"step": 4596
},
{
"epoch": 2.926161680458307,
"grad_norm": 13.158173803457993,
"learning_rate": 1.8767354614747657e-08,
"loss": 0.3528488278388977,
"step": 4597
},
{
"epoch": 2.926798217695735,
"grad_norm": 11.653583575239574,
"learning_rate": 1.8448112695785282e-08,
"loss": 0.5714707970619202,
"step": 4598
},
{
"epoch": 2.927434754933164,
"grad_norm": 12.06362024924125,
"learning_rate": 1.813160433196348e-08,
"loss": 0.6397544145584106,
"step": 4599
},
{
"epoch": 2.928071292170592,
"grad_norm": 7.940811517503116,
"learning_rate": 1.7817829696962773e-08,
"loss": 0.29710057377815247,
"step": 4600
},
{
"epoch": 2.92870782940802,
"grad_norm": 9.856410599398739,
"learning_rate": 1.750678896296154e-08,
"loss": 0.4119393825531006,
"step": 4601
},
{
"epoch": 2.9293443666454486,
"grad_norm": 14.80908541737794,
"learning_rate": 1.7198482300638808e-08,
"loss": 0.5728417634963989,
"step": 4602
},
{
"epoch": 2.929980903882877,
"grad_norm": 10.862940523560596,
"learning_rate": 1.689290987917369e-08,
"loss": 0.49775296449661255,
"step": 4603
},
{
"epoch": 2.9306174411203054,
"grad_norm": 10.420456319482795,
"learning_rate": 1.6590071866245393e-08,
"loss": 0.34582358598709106,
"step": 4604
},
{
"epoch": 2.931253978357734,
"grad_norm": 10.049706056612026,
"learning_rate": 1.6289968428030433e-08,
"loss": 0.4532100558280945,
"step": 4605
},
{
"epoch": 2.9318905155951622,
"grad_norm": 6.757820628953149,
"learning_rate": 1.5992599729207638e-08,
"loss": 0.3634646534919739,
"step": 4606
},
{
"epoch": 2.9325270528325906,
"grad_norm": 8.555784308495546,
"learning_rate": 1.5697965932953694e-08,
"loss": 0.26822400093078613,
"step": 4607
},
{
"epoch": 2.933163590070019,
"grad_norm": 11.672770541356844,
"learning_rate": 1.540606720094373e-08,
"loss": 0.651909351348877,
"step": 4608
},
{
"epoch": 2.9338001273074474,
"grad_norm": 9.979001378964576,
"learning_rate": 1.51169036933535e-08,
"loss": 0.4629194140434265,
"step": 4609
},
{
"epoch": 2.934436664544876,
"grad_norm": 11.251043223072452,
"learning_rate": 1.4830475568857749e-08,
"loss": 2.339359998703003,
"step": 4610
},
{
"epoch": 2.9350732017823042,
"grad_norm": 11.403279780930562,
"learning_rate": 1.4546782984629082e-08,
"loss": 0.3291216492652893,
"step": 4611
},
{
"epoch": 2.9357097390197326,
"grad_norm": 12.874134226120445,
"learning_rate": 1.4265826096340197e-08,
"loss": 0.4676075577735901,
"step": 4612
},
{
"epoch": 2.936346276257161,
"grad_norm": 20.105035086696223,
"learning_rate": 1.3987605058162213e-08,
"loss": 0.4496423006057739,
"step": 4613
},
{
"epoch": 2.9369828134945895,
"grad_norm": 7.423854726260795,
"learning_rate": 1.3712120022764119e-08,
"loss": 0.5345197916030884,
"step": 4614
},
{
"epoch": 2.937619350732018,
"grad_norm": 23.551985114198768,
"learning_rate": 1.3439371141315549e-08,
"loss": 0.47419023513793945,
"step": 4615
},
{
"epoch": 2.9382558879694463,
"grad_norm": 8.284858958714235,
"learning_rate": 1.3169358563482893e-08,
"loss": 0.5698477029800415,
"step": 4616
},
{
"epoch": 2.9388924252068747,
"grad_norm": 12.162989842432431,
"learning_rate": 1.2902082437432074e-08,
"loss": 0.7193920612335205,
"step": 4617
},
{
"epoch": 2.939528962444303,
"grad_norm": 10.31378049105075,
"learning_rate": 1.2637542909826329e-08,
"loss": 0.3350120782852173,
"step": 4618
},
{
"epoch": 2.9401654996817315,
"grad_norm": 11.33761404680134,
"learning_rate": 1.2375740125828428e-08,
"loss": 0.4467247426509857,
"step": 4619
},
{
"epoch": 2.94080203691916,
"grad_norm": 13.919944337539462,
"learning_rate": 1.2116674229099012e-08,
"loss": 0.4478244185447693,
"step": 4620
},
{
"epoch": 2.9414385741565883,
"grad_norm": 9.019832744100302,
"learning_rate": 1.1860345361796032e-08,
"loss": 0.3990923762321472,
"step": 4621
},
{
"epoch": 2.9420751113940167,
"grad_norm": 15.089779838926905,
"learning_rate": 1.160675366457753e-08,
"loss": 0.20606833696365356,
"step": 4622
},
{
"epoch": 2.942711648631445,
"grad_norm": 9.342650066134949,
"learning_rate": 1.1355899276597193e-08,
"loss": 0.25931254029273987,
"step": 4623
},
{
"epoch": 2.9433481858688735,
"grad_norm": 9.576070508434526,
"learning_rate": 1.1107782335508244e-08,
"loss": 0.3865275979042053,
"step": 4624
},
{
"epoch": 2.943984723106302,
"grad_norm": 10.708802698444359,
"learning_rate": 1.0862402977461217e-08,
"loss": 0.44095659255981445,
"step": 4625
},
{
"epoch": 2.9446212603437303,
"grad_norm": 7.8796593248007065,
"learning_rate": 1.0619761337104517e-08,
"loss": 0.5816665291786194,
"step": 4626
},
{
"epoch": 2.9452577975811582,
"grad_norm": 19.374681129891762,
"learning_rate": 1.0379857547584415e-08,
"loss": 0.408510684967041,
"step": 4627
},
{
"epoch": 2.945894334818587,
"grad_norm": 9.477790975890397,
"learning_rate": 1.0142691740544497e-08,
"loss": 0.2748204469680786,
"step": 4628
},
{
"epoch": 2.946530872056015,
"grad_norm": 13.022074413201983,
"learning_rate": 9.908264046126215e-09,
"loss": 0.7232655882835388,
"step": 4629
},
{
"epoch": 2.947167409293444,
"grad_norm": 10.553840607174726,
"learning_rate": 9.67657459296778e-09,
"loss": 0.255391925573349,
"step": 4630
},
{
"epoch": 2.947803946530872,
"grad_norm": 11.646437471889387,
"learning_rate": 9.447623508206383e-09,
"loss": 0.6574585437774658,
"step": 4631
},
{
"epoch": 2.9484404837683007,
"grad_norm": 11.192933249385815,
"learning_rate": 9.221410917475415e-09,
"loss": 0.6308625340461731,
"step": 4632
},
{
"epoch": 2.9490770210057287,
"grad_norm": 16.47639477707107,
"learning_rate": 8.997936944905583e-09,
"loss": 0.7614609003067017,
"step": 4633
},
{
"epoch": 2.949713558243157,
"grad_norm": 18.821031093522826,
"learning_rate": 8.777201713124906e-09,
"loss": 0.3736048936843872,
"step": 4634
},
{
"epoch": 2.9503500954805855,
"grad_norm": 14.956951920215104,
"learning_rate": 8.55920534325927e-09,
"loss": 0.380316823720932,
"step": 4635
},
{
"epoch": 2.950986632718014,
"grad_norm": 11.998666501834945,
"learning_rate": 8.343947954930209e-09,
"loss": 0.7638375163078308,
"step": 4636
},
{
"epoch": 2.9516231699554423,
"grad_norm": 9.74268219477764,
"learning_rate": 8.131429666257685e-09,
"loss": 0.2994990050792694,
"step": 4637
},
{
"epoch": 2.9522597071928707,
"grad_norm": 9.755158696628564,
"learning_rate": 7.921650593857854e-09,
"loss": 0.6474143266677856,
"step": 4638
},
{
"epoch": 2.952896244430299,
"grad_norm": 8.125385774870677,
"learning_rate": 7.714610852844195e-09,
"loss": 0.2557169795036316,
"step": 4639
},
{
"epoch": 2.9535327816677275,
"grad_norm": 11.572466470400945,
"learning_rate": 7.510310556826384e-09,
"loss": 0.5875788927078247,
"step": 4640
},
{
"epoch": 2.954169318905156,
"grad_norm": 13.952905422739963,
"learning_rate": 7.308749817911409e-09,
"loss": 0.549329936504364,
"step": 4641
},
{
"epoch": 2.9548058561425843,
"grad_norm": 10.413226557769837,
"learning_rate": 7.109928746703576e-09,
"loss": 0.27711164951324463,
"step": 4642
},
{
"epoch": 2.9554423933800127,
"grad_norm": 8.048674249875848,
"learning_rate": 6.91384745230228e-09,
"loss": 0.548971951007843,
"step": 4643
},
{
"epoch": 2.956078930617441,
"grad_norm": 10.925300197821286,
"learning_rate": 6.720506042304231e-09,
"loss": 0.7924700379371643,
"step": 4644
},
{
"epoch": 2.9567154678548695,
"grad_norm": 9.54046390077054,
"learning_rate": 6.529904622803451e-09,
"loss": 0.5511906743049622,
"step": 4645
},
{
"epoch": 2.957352005092298,
"grad_norm": 6.960396340630606,
"learning_rate": 6.342043298389055e-09,
"loss": 0.30417490005493164,
"step": 4646
},
{
"epoch": 2.9579885423297263,
"grad_norm": 11.312873173337188,
"learning_rate": 6.156922172147472e-09,
"loss": 0.8745008707046509,
"step": 4647
},
{
"epoch": 2.9586250795671547,
"grad_norm": 9.28198695690304,
"learning_rate": 5.974541345661333e-09,
"loss": 0.7359851598739624,
"step": 4648
},
{
"epoch": 2.959261616804583,
"grad_norm": 17.70072258269222,
"learning_rate": 5.794900919009472e-09,
"loss": 0.5813353657722473,
"step": 4649
},
{
"epoch": 2.9598981540420115,
"grad_norm": 13.46079452508331,
"learning_rate": 5.61800099076748e-09,
"loss": 0.33794909715652466,
"step": 4650
},
{
"epoch": 2.96053469127944,
"grad_norm": 8.172576505106319,
"learning_rate": 5.4438416580054885e-09,
"loss": 0.39185667037963867,
"step": 4651
},
{
"epoch": 2.9611712285168683,
"grad_norm": 10.144199515250063,
"learning_rate": 5.27242301629205e-09,
"loss": 0.4409201443195343,
"step": 4652
},
{
"epoch": 2.9618077657542967,
"grad_norm": 9.141650159655349,
"learning_rate": 5.103745159690254e-09,
"loss": 0.4678126871585846,
"step": 4653
},
{
"epoch": 2.962444302991725,
"grad_norm": 9.289043350192085,
"learning_rate": 4.937808180759951e-09,
"loss": 0.3816818594932556,
"step": 4654
},
{
"epoch": 2.9630808402291535,
"grad_norm": 8.214529228244643,
"learning_rate": 4.774612170556081e-09,
"loss": 0.4803459346294403,
"step": 4655
},
{
"epoch": 2.963717377466582,
"grad_norm": 7.198439299523344,
"learning_rate": 4.614157218630344e-09,
"loss": 0.40270543098449707,
"step": 4656
},
{
"epoch": 2.9643539147040103,
"grad_norm": 11.045642870577113,
"learning_rate": 4.456443413030087e-09,
"loss": 0.8374072313308716,
"step": 4657
},
{
"epoch": 2.9649904519414387,
"grad_norm": 6.684385318568651,
"learning_rate": 4.30147084029886e-09,
"loss": 0.3140571415424347,
"step": 4658
},
{
"epoch": 2.965626989178867,
"grad_norm": 12.318220516683906,
"learning_rate": 4.149239585475306e-09,
"loss": 0.45646950602531433,
"step": 4659
},
{
"epoch": 2.966263526416295,
"grad_norm": 9.625743666116774,
"learning_rate": 3.999749732093716e-09,
"loss": 0.5095931887626648,
"step": 4660
},
{
"epoch": 2.966900063653724,
"grad_norm": 9.296466825780978,
"learning_rate": 3.853001362185693e-09,
"loss": 0.46697038412094116,
"step": 4661
},
{
"epoch": 2.967536600891152,
"grad_norm": 14.602637922669667,
"learning_rate": 3.7089945562762687e-09,
"loss": 0.5334930419921875,
"step": 4662
},
{
"epoch": 2.9681731381285807,
"grad_norm": 8.338407899437348,
"learning_rate": 3.5677293933877865e-09,
"loss": 0.4201638698577881,
"step": 4663
},
{
"epoch": 2.9688096753660087,
"grad_norm": 15.004695387156493,
"learning_rate": 3.429205951036574e-09,
"loss": 0.47473764419555664,
"step": 4664
},
{
"epoch": 2.9694462126034376,
"grad_norm": 11.349277045893329,
"learning_rate": 3.29342430523627e-09,
"loss": 0.577049732208252,
"step": 4665
},
{
"epoch": 2.9700827498408655,
"grad_norm": 13.624833672125106,
"learning_rate": 3.160384530494498e-09,
"loss": 0.3924509286880493,
"step": 4666
},
{
"epoch": 2.9707192870782944,
"grad_norm": 14.59513753642718,
"learning_rate": 3.0300866998156376e-09,
"loss": 0.19112181663513184,
"step": 4667
},
{
"epoch": 2.9713558243157223,
"grad_norm": 11.858530858795417,
"learning_rate": 2.9025308846980514e-09,
"loss": 0.962306022644043,
"step": 4668
},
{
"epoch": 2.9719923615531507,
"grad_norm": 12.201211180370539,
"learning_rate": 2.7777171551363058e-09,
"loss": 0.4326895475387573,
"step": 4669
},
{
"epoch": 2.972628898790579,
"grad_norm": 9.78405868292641,
"learning_rate": 2.655645579620614e-09,
"loss": 0.34527263045310974,
"step": 4670
},
{
"epoch": 2.9732654360280075,
"grad_norm": 9.898585483398586,
"learning_rate": 2.5363162251357265e-09,
"loss": 0.36358243227005005,
"step": 4671
},
{
"epoch": 2.973901973265436,
"grad_norm": 10.108074306615567,
"learning_rate": 2.419729157161488e-09,
"loss": 0.6575568318367004,
"step": 4672
},
{
"epoch": 2.9745385105028643,
"grad_norm": 13.735777287176315,
"learning_rate": 2.3058844396739443e-09,
"loss": 1.230634331703186,
"step": 4673
},
{
"epoch": 2.9751750477402927,
"grad_norm": 12.500055991788804,
"learning_rate": 2.1947821351431252e-09,
"loss": 0.5693601369857788,
"step": 4674
},
{
"epoch": 2.975811584977721,
"grad_norm": 10.532481489192321,
"learning_rate": 2.086422304535818e-09,
"loss": 0.4035893678665161,
"step": 4675
},
{
"epoch": 2.9764481222151495,
"grad_norm": 17.158646254901626,
"learning_rate": 1.9808050073122365e-09,
"loss": 1.1364099979400635,
"step": 4676
},
{
"epoch": 2.977084659452578,
"grad_norm": 10.770126703442818,
"learning_rate": 1.8779303014282436e-09,
"loss": 0.31037622690200806,
"step": 4677
},
{
"epoch": 2.9777211966900063,
"grad_norm": 5.2507568097299995,
"learning_rate": 1.7777982433353492e-09,
"loss": 0.13051451742649078,
"step": 4678
},
{
"epoch": 2.9783577339274347,
"grad_norm": 8.059077860358032,
"learning_rate": 1.680408887979046e-09,
"loss": 0.5374547839164734,
"step": 4679
},
{
"epoch": 2.978994271164863,
"grad_norm": 8.361687841450028,
"learning_rate": 1.5857622888010294e-09,
"loss": 0.2703975439071655,
"step": 4680
},
{
"epoch": 2.9796308084022916,
"grad_norm": 10.860569488784169,
"learning_rate": 1.4938584977364223e-09,
"loss": 0.5047432780265808,
"step": 4681
},
{
"epoch": 2.98026734563972,
"grad_norm": 10.641551737562104,
"learning_rate": 1.4046975652165506e-09,
"loss": 0.38319021463394165,
"step": 4682
},
{
"epoch": 2.9809038828771484,
"grad_norm": 9.358560198175164,
"learning_rate": 1.3182795401678328e-09,
"loss": 0.6337284445762634,
"step": 4683
},
{
"epoch": 2.9815404201145768,
"grad_norm": 7.857281843436642,
"learning_rate": 1.2346044700095594e-09,
"loss": 0.7023638486862183,
"step": 4684
},
{
"epoch": 2.982176957352005,
"grad_norm": 21.27162566836479,
"learning_rate": 1.1536724006583344e-09,
"loss": 0.36084359884262085,
"step": 4685
},
{
"epoch": 2.9828134945894336,
"grad_norm": 19.295798481613037,
"learning_rate": 1.075483376524189e-09,
"loss": 0.6320829391479492,
"step": 4686
},
{
"epoch": 2.983450031826862,
"grad_norm": 8.970561560053307,
"learning_rate": 1.000037440511692e-09,
"loss": 0.35005611181259155,
"step": 4687
},
{
"epoch": 2.9840865690642904,
"grad_norm": 9.301973166109358,
"learning_rate": 9.273346340216149e-10,
"loss": 0.1751422882080078,
"step": 4688
},
{
"epoch": 2.9847231063017188,
"grad_norm": 20.854029071881634,
"learning_rate": 8.57374996947602e-10,
"loss": 0.6614738702774048,
"step": 4689
},
{
"epoch": 2.985359643539147,
"grad_norm": 9.719595901457362,
"learning_rate": 7.90158567680055e-10,
"loss": 0.6357887387275696,
"step": 4690
},
{
"epoch": 2.9859961807765756,
"grad_norm": 11.102547776524217,
"learning_rate": 7.256853831022481e-10,
"loss": 0.6367413997650146,
"step": 4691
},
{
"epoch": 2.986632718014004,
"grad_norm": 15.364075839766894,
"learning_rate": 6.639554785931035e-10,
"loss": 0.4574640393257141,
"step": 4692
},
{
"epoch": 2.987269255251432,
"grad_norm": 12.457287020917468,
"learning_rate": 6.049688880260807e-10,
"loss": 0.36697688698768616,
"step": 4693
},
{
"epoch": 2.987905792488861,
"grad_norm": 9.161327121663907,
"learning_rate": 5.487256437691768e-10,
"loss": 0.5309689044952393,
"step": 4694
},
{
"epoch": 2.9885423297262887,
"grad_norm": 10.882506683089108,
"learning_rate": 4.952257766849266e-10,
"loss": 0.5090453624725342,
"step": 4695
},
{
"epoch": 2.9891788669637176,
"grad_norm": 11.67842769259947,
"learning_rate": 4.444693161309577e-10,
"loss": 0.7584124803543091,
"step": 4696
},
{
"epoch": 2.9898154042011456,
"grad_norm": 8.416305458186724,
"learning_rate": 3.9645628995943487e-10,
"loss": 0.7738195657730103,
"step": 4697
},
{
"epoch": 2.9904519414385744,
"grad_norm": 19.47558414095772,
"learning_rate": 3.511867245159506e-10,
"loss": 0.9212406277656555,
"step": 4698
},
{
"epoch": 2.9910884786760024,
"grad_norm": 8.161966078157366,
"learning_rate": 3.0866064464174505e-10,
"loss": 0.9757088422775269,
"step": 4699
},
{
"epoch": 2.991725015913431,
"grad_norm": 10.453815620055869,
"learning_rate": 2.68878073672596e-10,
"loss": 0.5119154453277588,
"step": 4700
},
{
"epoch": 2.992361553150859,
"grad_norm": 20.832447435219233,
"learning_rate": 2.3183903343881875e-10,
"loss": 0.6462416052818298,
"step": 4701
},
{
"epoch": 2.9929980903882876,
"grad_norm": 7.854065037269155,
"learning_rate": 1.9754354426526623e-10,
"loss": 0.5890966653823853,
"step": 4702
},
{
"epoch": 2.993634627625716,
"grad_norm": 14.755030293269593,
"learning_rate": 1.659916249702187e-10,
"loss": 0.7764922380447388,
"step": 4703
},
{
"epoch": 2.9942711648631444,
"grad_norm": 13.240081469391923,
"learning_rate": 1.3718329286760423e-10,
"loss": 0.39418211579322815,
"step": 4704
},
{
"epoch": 2.9949077021005728,
"grad_norm": 10.974899505583934,
"learning_rate": 1.1111856376644359e-10,
"loss": 0.4528464078903198,
"step": 4705
},
{
"epoch": 2.995544239338001,
"grad_norm": 13.295350963853561,
"learning_rate": 8.779745196807465e-11,
"loss": 0.4834592640399933,
"step": 4706
},
{
"epoch": 2.9961807765754296,
"grad_norm": 13.441788607828286,
"learning_rate": 6.721997027059335e-11,
"loss": 0.3847227990627289,
"step": 4707
},
{
"epoch": 2.996817313812858,
"grad_norm": 9.842613528414857,
"learning_rate": 4.938612996552294e-11,
"loss": 0.3487985134124756,
"step": 4708
},
{
"epoch": 2.9974538510502864,
"grad_norm": 17.369804534311466,
"learning_rate": 3.4295940838369176e-11,
"loss": 0.33690494298934937,
"step": 4709
},
{
"epoch": 2.998090388287715,
"grad_norm": 9.474979710917523,
"learning_rate": 2.1949411169730484e-11,
"loss": 0.6540824174880981,
"step": 4710
},
{
"epoch": 2.998726925525143,
"grad_norm": 10.217589047260072,
"learning_rate": 1.2346547735297975e-11,
"loss": 0.3194640874862671,
"step": 4711
},
{
"epoch": 2.9993634627625716,
"grad_norm": 11.528238108079135,
"learning_rate": 5.487355803635019e-12,
"loss": 0.6700233817100525,
"step": 4712
},
{
"epoch": 3.0,
"grad_norm": 10.094211639717656,
"learning_rate": 1.3718391389527796e-12,
"loss": 0.3551580309867859,
"step": 4713
},
{
"epoch": 3.0,
"step": 4713,
"total_flos": 12614518824960.0,
"train_loss": 1.6530290696965189,
"train_runtime": 5359.1808,
"train_samples_per_second": 3.517,
"train_steps_per_second": 0.879
}
],
"logging_steps": 1,
"max_steps": 4713,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 12614518824960.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}