33035 lines
852 KiB
JSON
33035 lines
852 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4713,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0006365372374283895,
|
|
"grad_norm": 39.59088810381954,
|
|
"learning_rate": 0.0,
|
|
"loss": 4.035896301269531,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.001273074474856779,
|
|
"grad_norm": 33.95388657769443,
|
|
"learning_rate": 2.1186440677966104e-08,
|
|
"loss": 4.143885612487793,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0019096117122851686,
|
|
"grad_norm": 39.650793384067434,
|
|
"learning_rate": 4.237288135593221e-08,
|
|
"loss": 4.37307596206665,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.002546148949713558,
|
|
"grad_norm": 38.44073023933529,
|
|
"learning_rate": 6.355932203389831e-08,
|
|
"loss": 4.574411392211914,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.003182686187141948,
|
|
"grad_norm": 34.55309692208958,
|
|
"learning_rate": 8.474576271186442e-08,
|
|
"loss": 3.7624588012695312,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0038192234245703373,
|
|
"grad_norm": 42.562598379702074,
|
|
"learning_rate": 1.0593220338983051e-07,
|
|
"loss": 4.798104763031006,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.004455760661998727,
|
|
"grad_norm": 37.89472823315551,
|
|
"learning_rate": 1.2711864406779662e-07,
|
|
"loss": 3.6923468112945557,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.005092297899427116,
|
|
"grad_norm": 42.462788278686155,
|
|
"learning_rate": 1.4830508474576274e-07,
|
|
"loss": 4.457640171051025,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.005728835136855506,
|
|
"grad_norm": 30.085931923114636,
|
|
"learning_rate": 1.6949152542372883e-07,
|
|
"loss": 4.173328876495361,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.006365372374283896,
|
|
"grad_norm": 35.73217315240775,
|
|
"learning_rate": 1.9067796610169495e-07,
|
|
"loss": 3.9059009552001953,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.007001909611712286,
|
|
"grad_norm": 40.04548602304375,
|
|
"learning_rate": 2.1186440677966102e-07,
|
|
"loss": 4.167541027069092,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.0076384468491406746,
|
|
"grad_norm": 42.683295384244765,
|
|
"learning_rate": 2.3305084745762714e-07,
|
|
"loss": 4.3580641746521,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.008274984086569064,
|
|
"grad_norm": 41.38080814598698,
|
|
"learning_rate": 2.5423728813559323e-07,
|
|
"loss": 3.817225217819214,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.008911521323997454,
|
|
"grad_norm": 43.28513684057297,
|
|
"learning_rate": 2.7542372881355935e-07,
|
|
"loss": 4.469485282897949,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.009548058561425843,
|
|
"grad_norm": 37.357571431767745,
|
|
"learning_rate": 2.966101694915255e-07,
|
|
"loss": 4.299037933349609,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.010184595798854232,
|
|
"grad_norm": 43.5541276589533,
|
|
"learning_rate": 3.1779661016949154e-07,
|
|
"loss": 4.044004917144775,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.010821133036282623,
|
|
"grad_norm": 33.41877568658477,
|
|
"learning_rate": 3.3898305084745766e-07,
|
|
"loss": 3.832738161087036,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.011457670273711012,
|
|
"grad_norm": 38.34028230142563,
|
|
"learning_rate": 3.601694915254238e-07,
|
|
"loss": 4.182341575622559,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.0120942075111394,
|
|
"grad_norm": 31.12436344380363,
|
|
"learning_rate": 3.813559322033899e-07,
|
|
"loss": 3.7764952182769775,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.012730744748567792,
|
|
"grad_norm": 43.26168495696408,
|
|
"learning_rate": 4.025423728813559e-07,
|
|
"loss": 4.379842281341553,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.01336728198599618,
|
|
"grad_norm": 38.6827307460478,
|
|
"learning_rate": 4.2372881355932204e-07,
|
|
"loss": 4.382342338562012,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.014003819223424571,
|
|
"grad_norm": 33.078543164747124,
|
|
"learning_rate": 4.4491525423728816e-07,
|
|
"loss": 4.717368125915527,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.01464035646085296,
|
|
"grad_norm": 35.76103525996946,
|
|
"learning_rate": 4.661016949152543e-07,
|
|
"loss": 3.935464382171631,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.015276893698281349,
|
|
"grad_norm": 40.65497068724489,
|
|
"learning_rate": 4.872881355932204e-07,
|
|
"loss": 4.350847244262695,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.015913430935709738,
|
|
"grad_norm": 38.848863637549826,
|
|
"learning_rate": 5.084745762711865e-07,
|
|
"loss": 3.9706592559814453,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.016549968173138127,
|
|
"grad_norm": 30.366224160895303,
|
|
"learning_rate": 5.296610169491525e-07,
|
|
"loss": 4.436844825744629,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.01718650541056652,
|
|
"grad_norm": 44.05414632376414,
|
|
"learning_rate": 5.508474576271187e-07,
|
|
"loss": 4.4211554527282715,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.01782304264799491,
|
|
"grad_norm": 32.334770588920634,
|
|
"learning_rate": 5.720338983050848e-07,
|
|
"loss": 4.006234169006348,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.018459579885423297,
|
|
"grad_norm": 30.367874935733056,
|
|
"learning_rate": 5.93220338983051e-07,
|
|
"loss": 3.741302490234375,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.019096117122851686,
|
|
"grad_norm": 37.81163169963108,
|
|
"learning_rate": 6.14406779661017e-07,
|
|
"loss": 4.7005791664123535,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.019732654360280075,
|
|
"grad_norm": 25.301865125654622,
|
|
"learning_rate": 6.355932203389831e-07,
|
|
"loss": 3.332984447479248,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.020369191597708464,
|
|
"grad_norm": 27.638898241178488,
|
|
"learning_rate": 6.567796610169493e-07,
|
|
"loss": 3.882140636444092,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.021005728835136857,
|
|
"grad_norm": 25.13937296259451,
|
|
"learning_rate": 6.779661016949153e-07,
|
|
"loss": 3.568941831588745,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.021642266072565246,
|
|
"grad_norm": 21.604747939074876,
|
|
"learning_rate": 6.991525423728814e-07,
|
|
"loss": 2.9004387855529785,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.022278803309993635,
|
|
"grad_norm": 22.890488241425363,
|
|
"learning_rate": 7.203389830508476e-07,
|
|
"loss": 3.6715078353881836,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.022915340547422024,
|
|
"grad_norm": 21.55591651651768,
|
|
"learning_rate": 7.415254237288136e-07,
|
|
"loss": 3.8014559745788574,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.023551877784850413,
|
|
"grad_norm": 17.678790230951982,
|
|
"learning_rate": 7.627118644067798e-07,
|
|
"loss": 3.3839111328125,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.0241884150222788,
|
|
"grad_norm": 27.699179563792164,
|
|
"learning_rate": 7.838983050847458e-07,
|
|
"loss": 3.6236789226531982,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.024824952259707194,
|
|
"grad_norm": 20.74523751782202,
|
|
"learning_rate": 8.050847457627118e-07,
|
|
"loss": 3.4830198287963867,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.025461489497135583,
|
|
"grad_norm": 22.527501846528462,
|
|
"learning_rate": 8.26271186440678e-07,
|
|
"loss": 3.575878620147705,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.026098026734563972,
|
|
"grad_norm": 19.274175358858386,
|
|
"learning_rate": 8.474576271186441e-07,
|
|
"loss": 3.8505752086639404,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.02673456397199236,
|
|
"grad_norm": 15.568017150909261,
|
|
"learning_rate": 8.686440677966103e-07,
|
|
"loss": 3.506964683532715,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.02737110120942075,
|
|
"grad_norm": 12.75278847276089,
|
|
"learning_rate": 8.898305084745763e-07,
|
|
"loss": 3.152952194213867,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.028007638446849142,
|
|
"grad_norm": 28.932237833127076,
|
|
"learning_rate": 9.110169491525424e-07,
|
|
"loss": 3.9635744094848633,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.02864417568427753,
|
|
"grad_norm": 16.928064417217087,
|
|
"learning_rate": 9.322033898305086e-07,
|
|
"loss": 3.3759422302246094,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.02928071292170592,
|
|
"grad_norm": 14.603814518550873,
|
|
"learning_rate": 9.533898305084746e-07,
|
|
"loss": 2.9782557487487793,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.02991725015913431,
|
|
"grad_norm": 15.836946438005068,
|
|
"learning_rate": 9.745762711864408e-07,
|
|
"loss": 2.773993968963623,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.030553787396562698,
|
|
"grad_norm": 21.03323938551633,
|
|
"learning_rate": 9.957627118644069e-07,
|
|
"loss": 3.337113857269287,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.031190324633991087,
|
|
"grad_norm": 17.099584020068214,
|
|
"learning_rate": 1.016949152542373e-06,
|
|
"loss": 3.631762981414795,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.031826861871419476,
|
|
"grad_norm": 18.911972887238512,
|
|
"learning_rate": 1.038135593220339e-06,
|
|
"loss": 3.622838020324707,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.032463399108847865,
|
|
"grad_norm": 19.659886788763284,
|
|
"learning_rate": 1.059322033898305e-06,
|
|
"loss": 3.443356990814209,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.033099936346276254,
|
|
"grad_norm": 21.357907922654753,
|
|
"learning_rate": 1.0805084745762714e-06,
|
|
"loss": 3.409188747406006,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.03373647358370465,
|
|
"grad_norm": 12.567319854338075,
|
|
"learning_rate": 1.1016949152542374e-06,
|
|
"loss": 2.806626796722412,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.03437301082113304,
|
|
"grad_norm": 14.440684929731635,
|
|
"learning_rate": 1.1228813559322035e-06,
|
|
"loss": 3.726706027984619,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.03500954805856143,
|
|
"grad_norm": 13.848312040485071,
|
|
"learning_rate": 1.1440677966101696e-06,
|
|
"loss": 3.378289222717285,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.03564608529598982,
|
|
"grad_norm": 21.80998498242552,
|
|
"learning_rate": 1.1652542372881356e-06,
|
|
"loss": 3.6709766387939453,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.036282622533418206,
|
|
"grad_norm": 24.52282633397571,
|
|
"learning_rate": 1.186440677966102e-06,
|
|
"loss": 3.545236110687256,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.036919159770846595,
|
|
"grad_norm": 15.228617435626584,
|
|
"learning_rate": 1.207627118644068e-06,
|
|
"loss": 3.3617208003997803,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.037555697008274984,
|
|
"grad_norm": 15.97183299589106,
|
|
"learning_rate": 1.228813559322034e-06,
|
|
"loss": 3.2228782176971436,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.03819223424570337,
|
|
"grad_norm": 19.003049600129348,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 3.9675958156585693,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.03882877148313176,
|
|
"grad_norm": 12.655134824362944,
|
|
"learning_rate": 1.2711864406779662e-06,
|
|
"loss": 2.9336557388305664,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.03946530872056015,
|
|
"grad_norm": 11.09126231961165,
|
|
"learning_rate": 1.2923728813559322e-06,
|
|
"loss": 3.391512870788574,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.04010184595798854,
|
|
"grad_norm": 10.472726831882387,
|
|
"learning_rate": 1.3135593220338985e-06,
|
|
"loss": 3.2284183502197266,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.04073838319541693,
|
|
"grad_norm": 12.588960784459385,
|
|
"learning_rate": 1.3347457627118646e-06,
|
|
"loss": 3.676628589630127,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.041374920432845325,
|
|
"grad_norm": 15.535810556617445,
|
|
"learning_rate": 1.3559322033898307e-06,
|
|
"loss": 3.5504660606384277,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.042011457670273714,
|
|
"grad_norm": 11.190845261977694,
|
|
"learning_rate": 1.3771186440677967e-06,
|
|
"loss": 2.8842861652374268,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.0426479949077021,
|
|
"grad_norm": 15.172609013946255,
|
|
"learning_rate": 1.3983050847457628e-06,
|
|
"loss": 2.523017406463623,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.04328453214513049,
|
|
"grad_norm": 16.3482667287732,
|
|
"learning_rate": 1.419491525423729e-06,
|
|
"loss": 4.006606101989746,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.04392106938255888,
|
|
"grad_norm": 10.951360933671682,
|
|
"learning_rate": 1.4406779661016951e-06,
|
|
"loss": 3.1742300987243652,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.04455760661998727,
|
|
"grad_norm": 14.28121281093357,
|
|
"learning_rate": 1.4618644067796612e-06,
|
|
"loss": 3.2154297828674316,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.04519414385741566,
|
|
"grad_norm": 12.175471088449958,
|
|
"learning_rate": 1.4830508474576273e-06,
|
|
"loss": 3.3929946422576904,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.04583068109484405,
|
|
"grad_norm": 15.38019255288318,
|
|
"learning_rate": 1.5042372881355931e-06,
|
|
"loss": 3.283337354660034,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.046467218332272436,
|
|
"grad_norm": 14.003505937351898,
|
|
"learning_rate": 1.5254237288135596e-06,
|
|
"loss": 3.858952045440674,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.047103755569700825,
|
|
"grad_norm": 13.489946161626735,
|
|
"learning_rate": 1.5466101694915257e-06,
|
|
"loss": 3.017693519592285,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.047740292807129214,
|
|
"grad_norm": 12.05605526116894,
|
|
"learning_rate": 1.5677966101694915e-06,
|
|
"loss": 3.2235941886901855,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.0483768300445576,
|
|
"grad_norm": 14.773077715980815,
|
|
"learning_rate": 1.5889830508474576e-06,
|
|
"loss": 3.0594072341918945,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.049013367281986,
|
|
"grad_norm": 11.069301477529606,
|
|
"learning_rate": 1.6101694915254237e-06,
|
|
"loss": 3.198401927947998,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.04964990451941439,
|
|
"grad_norm": 9.895147990284883,
|
|
"learning_rate": 1.63135593220339e-06,
|
|
"loss": 3.6801962852478027,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.05028644175684278,
|
|
"grad_norm": 10.978847446356875,
|
|
"learning_rate": 1.652542372881356e-06,
|
|
"loss": 2.884700059890747,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.050922978994271166,
|
|
"grad_norm": 9.941976716243797,
|
|
"learning_rate": 1.673728813559322e-06,
|
|
"loss": 3.1672863960266113,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.051559516231699555,
|
|
"grad_norm": 8.355979527089534,
|
|
"learning_rate": 1.6949152542372882e-06,
|
|
"loss": 2.9366581439971924,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.052196053469127944,
|
|
"grad_norm": 12.47555798769808,
|
|
"learning_rate": 1.7161016949152542e-06,
|
|
"loss": 2.9476046562194824,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.05283259070655633,
|
|
"grad_norm": 11.376509533386537,
|
|
"learning_rate": 1.7372881355932205e-06,
|
|
"loss": 3.0564022064208984,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.05346912794398472,
|
|
"grad_norm": 10.246748605896254,
|
|
"learning_rate": 1.7584745762711866e-06,
|
|
"loss": 3.380188226699829,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.05410566518141311,
|
|
"grad_norm": 7.143523650475739,
|
|
"learning_rate": 1.7796610169491526e-06,
|
|
"loss": 2.8444905281066895,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.0547422024188415,
|
|
"grad_norm": 11.871565488249749,
|
|
"learning_rate": 1.8008474576271187e-06,
|
|
"loss": 4.042019844055176,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.05537873965626989,
|
|
"grad_norm": 10.33040691524808,
|
|
"learning_rate": 1.8220338983050848e-06,
|
|
"loss": 3.086951732635498,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.056015276893698285,
|
|
"grad_norm": 12.662422362320376,
|
|
"learning_rate": 1.843220338983051e-06,
|
|
"loss": 3.7293930053710938,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.056651814131126674,
|
|
"grad_norm": 45.45569964927334,
|
|
"learning_rate": 1.8644067796610171e-06,
|
|
"loss": 2.7154955863952637,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.05728835136855506,
|
|
"grad_norm": 8.894617344554897,
|
|
"learning_rate": 1.8855932203389832e-06,
|
|
"loss": 3.0096163749694824,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.05792488860598345,
|
|
"grad_norm": 9.333847276631833,
|
|
"learning_rate": 1.9067796610169493e-06,
|
|
"loss": 3.182313919067383,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.05856142584341184,
|
|
"grad_norm": 12.328514043388411,
|
|
"learning_rate": 1.9279661016949157e-06,
|
|
"loss": 2.5885255336761475,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.05919796308084023,
|
|
"grad_norm": 14.47210902997613,
|
|
"learning_rate": 1.9491525423728816e-06,
|
|
"loss": 2.846513509750366,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.05983450031826862,
|
|
"grad_norm": 14.1904181336431,
|
|
"learning_rate": 1.9703389830508475e-06,
|
|
"loss": 2.99960994720459,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.06047103755569701,
|
|
"grad_norm": 10.037634547267674,
|
|
"learning_rate": 1.9915254237288137e-06,
|
|
"loss": 3.055741786956787,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.061107574793125397,
|
|
"grad_norm": 9.063588281861795,
|
|
"learning_rate": 2.0127118644067796e-06,
|
|
"loss": 3.3117358684539795,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.061744112030553785,
|
|
"grad_norm": 11.734408626893863,
|
|
"learning_rate": 2.033898305084746e-06,
|
|
"loss": 3.515754222869873,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.062380649267982174,
|
|
"grad_norm": 15.579342259561264,
|
|
"learning_rate": 2.055084745762712e-06,
|
|
"loss": 2.7893433570861816,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.06301718650541056,
|
|
"grad_norm": 14.002678700389755,
|
|
"learning_rate": 2.076271186440678e-06,
|
|
"loss": 3.548750877380371,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.06365372374283895,
|
|
"grad_norm": 14.08928760330149,
|
|
"learning_rate": 2.0974576271186443e-06,
|
|
"loss": 3.7734217643737793,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.06429026098026734,
|
|
"grad_norm": 10.865641175484704,
|
|
"learning_rate": 2.11864406779661e-06,
|
|
"loss": 2.918238878250122,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.06492679821769573,
|
|
"grad_norm": 9.385929922096402,
|
|
"learning_rate": 2.1398305084745764e-06,
|
|
"loss": 3.298204183578491,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.06556333545512412,
|
|
"grad_norm": 9.54293780239165,
|
|
"learning_rate": 2.1610169491525427e-06,
|
|
"loss": 3.274136781692505,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.06619987269255251,
|
|
"grad_norm": 12.0008027622266,
|
|
"learning_rate": 2.1822033898305086e-06,
|
|
"loss": 2.752487897872925,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.0668364099299809,
|
|
"grad_norm": 9.039075799265685,
|
|
"learning_rate": 2.203389830508475e-06,
|
|
"loss": 3.3141207695007324,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.0674729471674093,
|
|
"grad_norm": 21.970990311790214,
|
|
"learning_rate": 2.2245762711864407e-06,
|
|
"loss": 3.47676420211792,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.06810948440483769,
|
|
"grad_norm": 15.184316115178678,
|
|
"learning_rate": 2.245762711864407e-06,
|
|
"loss": 3.7983956336975098,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.06874602164226608,
|
|
"grad_norm": 9.243450593109065,
|
|
"learning_rate": 2.2669491525423732e-06,
|
|
"loss": 2.919459342956543,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.06938255887969447,
|
|
"grad_norm": 8.560492177295222,
|
|
"learning_rate": 2.288135593220339e-06,
|
|
"loss": 3.0816235542297363,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.07001909611712286,
|
|
"grad_norm": 13.125075871676552,
|
|
"learning_rate": 2.3093220338983054e-06,
|
|
"loss": 3.1430845260620117,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.07065563335455124,
|
|
"grad_norm": 7.213511595375255,
|
|
"learning_rate": 2.3305084745762712e-06,
|
|
"loss": 3.198106288909912,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.07129217059197963,
|
|
"grad_norm": 9.725007194186064,
|
|
"learning_rate": 2.3516949152542375e-06,
|
|
"loss": 3.343648672103882,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.07192870782940802,
|
|
"grad_norm": 17.716815300327376,
|
|
"learning_rate": 2.372881355932204e-06,
|
|
"loss": 3.273315191268921,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.07256524506683641,
|
|
"grad_norm": 15.090511908740678,
|
|
"learning_rate": 2.3940677966101697e-06,
|
|
"loss": 3.6860384941101074,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.0732017823042648,
|
|
"grad_norm": 23.619875049161756,
|
|
"learning_rate": 2.415254237288136e-06,
|
|
"loss": 3.4047603607177734,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.07383831954169319,
|
|
"grad_norm": 11.085596287501351,
|
|
"learning_rate": 2.436440677966102e-06,
|
|
"loss": 3.36086368560791,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.07447485677912158,
|
|
"grad_norm": 10.401067566452673,
|
|
"learning_rate": 2.457627118644068e-06,
|
|
"loss": 2.746903419494629,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.07511139401654997,
|
|
"grad_norm": 8.167729784535762,
|
|
"learning_rate": 2.4788135593220343e-06,
|
|
"loss": 2.8697047233581543,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.07574793125397836,
|
|
"grad_norm": 18.19405515066229,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 2.6754255294799805,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.07638446849140675,
|
|
"grad_norm": 8.32936145746687,
|
|
"learning_rate": 2.521186440677966e-06,
|
|
"loss": 2.7582414150238037,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.07702100572883513,
|
|
"grad_norm": 13.676158548645208,
|
|
"learning_rate": 2.5423728813559323e-06,
|
|
"loss": 2.912391185760498,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.07765754296626352,
|
|
"grad_norm": 11.723743727298041,
|
|
"learning_rate": 2.563559322033898e-06,
|
|
"loss": 3.0973339080810547,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.07829408020369191,
|
|
"grad_norm": 8.264572543716843,
|
|
"learning_rate": 2.5847457627118645e-06,
|
|
"loss": 3.3936641216278076,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.0789306174411203,
|
|
"grad_norm": 13.1592378849208,
|
|
"learning_rate": 2.605932203389831e-06,
|
|
"loss": 2.7813830375671387,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.07956715467854869,
|
|
"grad_norm": 15.192533604335532,
|
|
"learning_rate": 2.627118644067797e-06,
|
|
"loss": 3.2265381813049316,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.08020369191597708,
|
|
"grad_norm": 10.566445314436896,
|
|
"learning_rate": 2.648305084745763e-06,
|
|
"loss": 3.334951162338257,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.08084022915340547,
|
|
"grad_norm": 12.255041094999912,
|
|
"learning_rate": 2.669491525423729e-06,
|
|
"loss": 3.0126452445983887,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.08147676639083386,
|
|
"grad_norm": 9.566517458586116,
|
|
"learning_rate": 2.690677966101695e-06,
|
|
"loss": 2.845134735107422,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.08211330362826226,
|
|
"grad_norm": 8.235351971330045,
|
|
"learning_rate": 2.7118644067796613e-06,
|
|
"loss": 2.729961633682251,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.08274984086569065,
|
|
"grad_norm": 19.066643065863207,
|
|
"learning_rate": 2.733050847457627e-06,
|
|
"loss": 3.170295238494873,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.08338637810311904,
|
|
"grad_norm": 13.82108602310928,
|
|
"learning_rate": 2.7542372881355934e-06,
|
|
"loss": 2.8606677055358887,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.08402291534054743,
|
|
"grad_norm": 9.485675344751035,
|
|
"learning_rate": 2.7754237288135593e-06,
|
|
"loss": 2.759913444519043,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.08465945257797582,
|
|
"grad_norm": 7.561287712062645,
|
|
"learning_rate": 2.7966101694915256e-06,
|
|
"loss": 3.171691417694092,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.0852959898154042,
|
|
"grad_norm": 15.081790970276133,
|
|
"learning_rate": 2.817796610169492e-06,
|
|
"loss": 3.1571059226989746,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.0859325270528326,
|
|
"grad_norm": 17.49080914956136,
|
|
"learning_rate": 2.838983050847458e-06,
|
|
"loss": 3.1260931491851807,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.08656906429026098,
|
|
"grad_norm": 10.831529056783937,
|
|
"learning_rate": 2.860169491525424e-06,
|
|
"loss": 3.4544715881347656,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.08720560152768937,
|
|
"grad_norm": 12.650862284069477,
|
|
"learning_rate": 2.8813559322033903e-06,
|
|
"loss": 2.1055846214294434,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.08784213876511776,
|
|
"grad_norm": 10.937242513866803,
|
|
"learning_rate": 2.902542372881356e-06,
|
|
"loss": 3.1760659217834473,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.08847867600254615,
|
|
"grad_norm": 16.562684519262675,
|
|
"learning_rate": 2.9237288135593224e-06,
|
|
"loss": 3.499706506729126,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.08911521323997454,
|
|
"grad_norm": 14.663338858011086,
|
|
"learning_rate": 2.9449152542372883e-06,
|
|
"loss": 3.097550630569458,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.08975175047740293,
|
|
"grad_norm": 8.782288227621974,
|
|
"learning_rate": 2.9661016949152545e-06,
|
|
"loss": 2.998491048812866,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.09038828771483132,
|
|
"grad_norm": 14.102110477033168,
|
|
"learning_rate": 2.9872881355932204e-06,
|
|
"loss": 2.690117835998535,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.0910248249522597,
|
|
"grad_norm": 12.496914670356034,
|
|
"learning_rate": 3.0084745762711862e-06,
|
|
"loss": 2.811756134033203,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.0916613621896881,
|
|
"grad_norm": 12.822381579853225,
|
|
"learning_rate": 3.029661016949153e-06,
|
|
"loss": 3.9626450538635254,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.09229789942711648,
|
|
"grad_norm": 12.582879172097762,
|
|
"learning_rate": 3.0508474576271192e-06,
|
|
"loss": 3.199486255645752,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.09293443666454487,
|
|
"grad_norm": 18.02029221942558,
|
|
"learning_rate": 3.072033898305085e-06,
|
|
"loss": 3.243107557296753,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.09357097390197326,
|
|
"grad_norm": 13.581114680729833,
|
|
"learning_rate": 3.0932203389830514e-06,
|
|
"loss": 3.156430959701538,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.09420751113940165,
|
|
"grad_norm": 12.683469403176712,
|
|
"learning_rate": 3.1144067796610172e-06,
|
|
"loss": 3.62229323387146,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.09484404837683004,
|
|
"grad_norm": 11.866601635486056,
|
|
"learning_rate": 3.135593220338983e-06,
|
|
"loss": 2.9691965579986572,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.09548058561425843,
|
|
"grad_norm": 12.329303680517521,
|
|
"learning_rate": 3.1567796610169494e-06,
|
|
"loss": 3.507720470428467,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.09611712285168682,
|
|
"grad_norm": 12.864876607372913,
|
|
"learning_rate": 3.1779661016949152e-06,
|
|
"loss": 3.1271495819091797,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.0967536600891152,
|
|
"grad_norm": 8.330358552705086,
|
|
"learning_rate": 3.1991525423728815e-06,
|
|
"loss": 2.7351913452148438,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.09739019732654361,
|
|
"grad_norm": 13.856684572001178,
|
|
"learning_rate": 3.2203389830508473e-06,
|
|
"loss": 3.1276650428771973,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.098026734563972,
|
|
"grad_norm": 6.978723672843207,
|
|
"learning_rate": 3.241525423728814e-06,
|
|
"loss": 2.640872001647949,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.09866327180140039,
|
|
"grad_norm": 15.254420137587498,
|
|
"learning_rate": 3.26271186440678e-06,
|
|
"loss": 3.469313859939575,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.09929980903882878,
|
|
"grad_norm": 13.603149074385739,
|
|
"learning_rate": 3.283898305084746e-06,
|
|
"loss": 2.619128942489624,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.09993634627625717,
|
|
"grad_norm": 17.31160612407412,
|
|
"learning_rate": 3.305084745762712e-06,
|
|
"loss": 3.1266517639160156,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.10057288351368555,
|
|
"grad_norm": 19.03803760262164,
|
|
"learning_rate": 3.3262711864406783e-06,
|
|
"loss": 3.0743002891540527,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.10120942075111394,
|
|
"grad_norm": 19.269861713711055,
|
|
"learning_rate": 3.347457627118644e-06,
|
|
"loss": 3.1711010932922363,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.10184595798854233,
|
|
"grad_norm": 13.692244884511693,
|
|
"learning_rate": 3.3686440677966105e-06,
|
|
"loss": 3.3521604537963867,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.10248249522597072,
|
|
"grad_norm": 10.14050843079561,
|
|
"learning_rate": 3.3898305084745763e-06,
|
|
"loss": 2.723728895187378,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.10311903246339911,
|
|
"grad_norm": 18.8592821942506,
|
|
"learning_rate": 3.4110169491525426e-06,
|
|
"loss": 2.988776206970215,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.1037555697008275,
|
|
"grad_norm": 11.99779759520072,
|
|
"learning_rate": 3.4322033898305084e-06,
|
|
"loss": 3.3126285076141357,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.10439210693825589,
|
|
"grad_norm": 9.02626861062283,
|
|
"learning_rate": 3.453389830508475e-06,
|
|
"loss": 2.866410255432129,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.10502864417568428,
|
|
"grad_norm": 12.382144134507604,
|
|
"learning_rate": 3.474576271186441e-06,
|
|
"loss": 3.0566554069519043,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.10566518141311267,
|
|
"grad_norm": 16.03984892684878,
|
|
"learning_rate": 3.4957627118644073e-06,
|
|
"loss": 3.033036708831787,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.10630171865054105,
|
|
"grad_norm": 12.078141339396046,
|
|
"learning_rate": 3.516949152542373e-06,
|
|
"loss": 3.273803234100342,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.10693825588796944,
|
|
"grad_norm": 9.884875875484646,
|
|
"learning_rate": 3.5381355932203394e-06,
|
|
"loss": 2.768066167831421,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.10757479312539783,
|
|
"grad_norm": 19.838989040044574,
|
|
"learning_rate": 3.5593220338983053e-06,
|
|
"loss": 2.800044536590576,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.10821133036282622,
|
|
"grad_norm": 33.923812377724516,
|
|
"learning_rate": 3.5805084745762716e-06,
|
|
"loss": 3.8336315155029297,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.10884786760025461,
|
|
"grad_norm": 15.309054680459798,
|
|
"learning_rate": 3.6016949152542374e-06,
|
|
"loss": 2.6189794540405273,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.109484404837683,
|
|
"grad_norm": 10.169867188606077,
|
|
"learning_rate": 3.6228813559322033e-06,
|
|
"loss": 3.249156951904297,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.11012094207511139,
|
|
"grad_norm": 9.366559092798653,
|
|
"learning_rate": 3.6440677966101695e-06,
|
|
"loss": 2.6109180450439453,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.11075747931253978,
|
|
"grad_norm": 7.839500435241882,
|
|
"learning_rate": 3.6652542372881362e-06,
|
|
"loss": 3.592599391937256,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.11139401654996817,
|
|
"grad_norm": 10.071594229699002,
|
|
"learning_rate": 3.686440677966102e-06,
|
|
"loss": 2.9701881408691406,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.11203055378739657,
|
|
"grad_norm": 36.967183531556714,
|
|
"learning_rate": 3.7076271186440684e-06,
|
|
"loss": 3.1862998008728027,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.11266709102482496,
|
|
"grad_norm": 10.128382960721483,
|
|
"learning_rate": 3.7288135593220342e-06,
|
|
"loss": 2.8713183403015137,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.11330362826225335,
|
|
"grad_norm": 7.899179110880989,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": 2.440099000930786,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.11394016549968174,
|
|
"grad_norm": 10.555616860338874,
|
|
"learning_rate": 3.7711864406779664e-06,
|
|
"loss": 2.952479362487793,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.11457670273711013,
|
|
"grad_norm": 17.067174677468486,
|
|
"learning_rate": 3.7923728813559322e-06,
|
|
"loss": 3.5981414318084717,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.11521323997453851,
|
|
"grad_norm": 16.822699566862887,
|
|
"learning_rate": 3.8135593220338985e-06,
|
|
"loss": 2.943324327468872,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.1158497772119669,
|
|
"grad_norm": 21.844528101489537,
|
|
"learning_rate": 3.834745762711865e-06,
|
|
"loss": 3.8883442878723145,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.11648631444939529,
|
|
"grad_norm": 13.662656301441373,
|
|
"learning_rate": 3.8559322033898315e-06,
|
|
"loss": 3.2004919052124023,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.11712285168682368,
|
|
"grad_norm": 10.436671362782297,
|
|
"learning_rate": 3.877118644067797e-06,
|
|
"loss": 3.233124017715454,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.11775938892425207,
|
|
"grad_norm": 15.712145612017746,
|
|
"learning_rate": 3.898305084745763e-06,
|
|
"loss": 3.511937141418457,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.11839592616168046,
|
|
"grad_norm": 6.192919383191328,
|
|
"learning_rate": 3.919491525423729e-06,
|
|
"loss": 2.831325054168701,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.11903246339910885,
|
|
"grad_norm": 19.21846235119949,
|
|
"learning_rate": 3.940677966101695e-06,
|
|
"loss": 3.607250690460205,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.11966900063653724,
|
|
"grad_norm": 15.562894252261083,
|
|
"learning_rate": 3.961864406779662e-06,
|
|
"loss": 2.9302172660827637,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.12030553787396563,
|
|
"grad_norm": 18.97211358017462,
|
|
"learning_rate": 3.9830508474576275e-06,
|
|
"loss": 3.5242438316345215,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.12094207511139402,
|
|
"grad_norm": 11.677567589260672,
|
|
"learning_rate": 4.004237288135593e-06,
|
|
"loss": 3.025068998336792,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.1215786123488224,
|
|
"grad_norm": 11.681417522086605,
|
|
"learning_rate": 4.025423728813559e-06,
|
|
"loss": 2.592844009399414,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.12221514958625079,
|
|
"grad_norm": 10.442960645780184,
|
|
"learning_rate": 4.046610169491526e-06,
|
|
"loss": 2.919527530670166,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.12285168682367918,
|
|
"grad_norm": 9.591281451399857,
|
|
"learning_rate": 4.067796610169492e-06,
|
|
"loss": 2.864609956741333,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.12348822406110757,
|
|
"grad_norm": 9.553179683924215,
|
|
"learning_rate": 4.0889830508474584e-06,
|
|
"loss": 2.886277198791504,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.12412476129853596,
|
|
"grad_norm": 8.9114375203598,
|
|
"learning_rate": 4.110169491525424e-06,
|
|
"loss": 2.700643301010132,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.12476129853596435,
|
|
"grad_norm": 11.804094590244754,
|
|
"learning_rate": 4.13135593220339e-06,
|
|
"loss": 3.145249366760254,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.12539783577339275,
|
|
"grad_norm": 15.860462809303401,
|
|
"learning_rate": 4.152542372881356e-06,
|
|
"loss": 2.9896278381347656,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.12603437301082113,
|
|
"grad_norm": 13.152319755016842,
|
|
"learning_rate": 4.173728813559323e-06,
|
|
"loss": 2.631516456604004,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.12667091024824953,
|
|
"grad_norm": 16.436797093248362,
|
|
"learning_rate": 4.1949152542372886e-06,
|
|
"loss": 2.742974281311035,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.1273074474856779,
|
|
"grad_norm": 8.75887823731754,
|
|
"learning_rate": 4.2161016949152544e-06,
|
|
"loss": 2.4858527183532715,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.1279439847231063,
|
|
"grad_norm": 11.313045010110079,
|
|
"learning_rate": 4.23728813559322e-06,
|
|
"loss": 3.0495362281799316,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.12858052196053468,
|
|
"grad_norm": 10.37774972184091,
|
|
"learning_rate": 4.258474576271186e-06,
|
|
"loss": 2.8722662925720215,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.12921705919796309,
|
|
"grad_norm": 15.002975107696502,
|
|
"learning_rate": 4.279661016949153e-06,
|
|
"loss": 3.10701322555542,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.12985359643539146,
|
|
"grad_norm": 14.296117051079547,
|
|
"learning_rate": 4.3008474576271195e-06,
|
|
"loss": 2.8308165073394775,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.13049013367281986,
|
|
"grad_norm": 10.289126134210136,
|
|
"learning_rate": 4.322033898305085e-06,
|
|
"loss": 3.036111354827881,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.13112667091024824,
|
|
"grad_norm": 16.829371148640362,
|
|
"learning_rate": 4.343220338983051e-06,
|
|
"loss": 3.3032469749450684,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.13176320814767664,
|
|
"grad_norm": 11.15761088911933,
|
|
"learning_rate": 4.364406779661017e-06,
|
|
"loss": 3.1203994750976562,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.13239974538510502,
|
|
"grad_norm": 8.624902828702124,
|
|
"learning_rate": 4.385593220338983e-06,
|
|
"loss": 2.4616947174072266,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.13303628262253342,
|
|
"grad_norm": 14.652551487527845,
|
|
"learning_rate": 4.40677966101695e-06,
|
|
"loss": 3.2015225887298584,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.1336728198599618,
|
|
"grad_norm": 17.85769559664722,
|
|
"learning_rate": 4.4279661016949155e-06,
|
|
"loss": 3.392197370529175,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.1343093570973902,
|
|
"grad_norm": 14.662673921570757,
|
|
"learning_rate": 4.449152542372881e-06,
|
|
"loss": 3.2083888053894043,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.1349458943348186,
|
|
"grad_norm": 17.467302319248383,
|
|
"learning_rate": 4.470338983050847e-06,
|
|
"loss": 3.1518492698669434,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.13558243157224698,
|
|
"grad_norm": 10.51838123835325,
|
|
"learning_rate": 4.491525423728814e-06,
|
|
"loss": 2.855264902114868,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.13621896880967538,
|
|
"grad_norm": 14.762402460834231,
|
|
"learning_rate": 4.51271186440678e-06,
|
|
"loss": 2.360743522644043,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.13685550604710375,
|
|
"grad_norm": 13.047537180129066,
|
|
"learning_rate": 4.5338983050847465e-06,
|
|
"loss": 3.403221368789673,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.13749204328453216,
|
|
"grad_norm": 11.286423215824348,
|
|
"learning_rate": 4.555084745762712e-06,
|
|
"loss": 2.9744622707366943,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.13812858052196053,
|
|
"grad_norm": 22.128236162508518,
|
|
"learning_rate": 4.576271186440678e-06,
|
|
"loss": 3.943368673324585,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.13876511775938893,
|
|
"grad_norm": 11.945689425899966,
|
|
"learning_rate": 4.597457627118644e-06,
|
|
"loss": 2.857276201248169,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.1394016549968173,
|
|
"grad_norm": 8.384366498024004,
|
|
"learning_rate": 4.618644067796611e-06,
|
|
"loss": 2.535996913909912,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.1400381922342457,
|
|
"grad_norm": 15.461923137664042,
|
|
"learning_rate": 4.639830508474577e-06,
|
|
"loss": 3.1761062145233154,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.1406747294716741,
|
|
"grad_norm": 8.509254573006608,
|
|
"learning_rate": 4.6610169491525425e-06,
|
|
"loss": 2.7607967853546143,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.1413112667091025,
|
|
"grad_norm": 9.170873253831214,
|
|
"learning_rate": 4.682203389830508e-06,
|
|
"loss": 3.2002480030059814,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.14194780394653086,
|
|
"grad_norm": 7.339418444481694,
|
|
"learning_rate": 4.703389830508475e-06,
|
|
"loss": 2.5810189247131348,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.14258434118395927,
|
|
"grad_norm": 10.766721995996047,
|
|
"learning_rate": 4.724576271186441e-06,
|
|
"loss": 2.8315610885620117,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.14322087842138764,
|
|
"grad_norm": 13.548350579282785,
|
|
"learning_rate": 4.745762711864408e-06,
|
|
"loss": 2.837813377380371,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.14385741565881605,
|
|
"grad_norm": 34.5958667853913,
|
|
"learning_rate": 4.7669491525423735e-06,
|
|
"loss": 2.95943546295166,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.14449395289624442,
|
|
"grad_norm": 11.725989147517986,
|
|
"learning_rate": 4.788135593220339e-06,
|
|
"loss": 2.85552978515625,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.14513049013367282,
|
|
"grad_norm": 12.307115633222764,
|
|
"learning_rate": 4.809322033898305e-06,
|
|
"loss": 2.4775352478027344,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.1457670273711012,
|
|
"grad_norm": 10.895472196267837,
|
|
"learning_rate": 4.830508474576272e-06,
|
|
"loss": 3.1042962074279785,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.1464035646085296,
|
|
"grad_norm": 18.15671039636725,
|
|
"learning_rate": 4.851694915254238e-06,
|
|
"loss": 3.226940631866455,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.14704010184595798,
|
|
"grad_norm": 17.865544568948273,
|
|
"learning_rate": 4.872881355932204e-06,
|
|
"loss": 2.958282947540283,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.14767663908338638,
|
|
"grad_norm": 13.075581071672993,
|
|
"learning_rate": 4.8940677966101694e-06,
|
|
"loss": 2.860640287399292,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.14831317632081475,
|
|
"grad_norm": 7.3011973358144155,
|
|
"learning_rate": 4.915254237288136e-06,
|
|
"loss": 1.8848614692687988,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.14894971355824316,
|
|
"grad_norm": 8.961754059399695,
|
|
"learning_rate": 4.936440677966102e-06,
|
|
"loss": 2.6384811401367188,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.14958625079567156,
|
|
"grad_norm": 6.695575838860698,
|
|
"learning_rate": 4.957627118644069e-06,
|
|
"loss": 2.6513447761535645,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.15022278803309994,
|
|
"grad_norm": 10.172999182069105,
|
|
"learning_rate": 4.9788135593220346e-06,
|
|
"loss": 3.049452304840088,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.15085932527052834,
|
|
"grad_norm": 10.885031573274539,
|
|
"learning_rate": 5e-06,
|
|
"loss": 3.013901710510254,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.1514958625079567,
|
|
"grad_norm": 9.789951162121023,
|
|
"learning_rate": 5.021186440677966e-06,
|
|
"loss": 2.4011707305908203,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.15213239974538512,
|
|
"grad_norm": 29.10099418080209,
|
|
"learning_rate": 5.042372881355932e-06,
|
|
"loss": 3.279197931289673,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.1527689369828135,
|
|
"grad_norm": 10.18710797249541,
|
|
"learning_rate": 5.063559322033899e-06,
|
|
"loss": 2.9297866821289062,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.1534054742202419,
|
|
"grad_norm": 16.259716765444157,
|
|
"learning_rate": 5.084745762711865e-06,
|
|
"loss": 2.587176561355591,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.15404201145767027,
|
|
"grad_norm": 12.250110022036033,
|
|
"learning_rate": 5.1059322033898305e-06,
|
|
"loss": 2.983497142791748,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.15467854869509867,
|
|
"grad_norm": 13.257585716683908,
|
|
"learning_rate": 5.127118644067796e-06,
|
|
"loss": 3.0938868522644043,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.15531508593252705,
|
|
"grad_norm": 8.009985272716898,
|
|
"learning_rate": 5.148305084745763e-06,
|
|
"loss": 2.4970593452453613,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.15595162316995545,
|
|
"grad_norm": 13.706175661301124,
|
|
"learning_rate": 5.169491525423729e-06,
|
|
"loss": 3.3828718662261963,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.15658816040738383,
|
|
"grad_norm": 7.54785337703857,
|
|
"learning_rate": 5.190677966101695e-06,
|
|
"loss": 2.363739490509033,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.15722469764481223,
|
|
"grad_norm": 8.535146691547306,
|
|
"learning_rate": 5.211864406779662e-06,
|
|
"loss": 2.6143975257873535,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.1578612348822406,
|
|
"grad_norm": 6.4085304399908685,
|
|
"learning_rate": 5.233050847457628e-06,
|
|
"loss": 2.821204900741577,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.158497772119669,
|
|
"grad_norm": 10.0578421471266,
|
|
"learning_rate": 5.254237288135594e-06,
|
|
"loss": 2.8076894283294678,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.15913430935709738,
|
|
"grad_norm": 9.687543277555026,
|
|
"learning_rate": 5.27542372881356e-06,
|
|
"loss": 2.650265693664551,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.15977084659452578,
|
|
"grad_norm": 8.634396293605054,
|
|
"learning_rate": 5.296610169491526e-06,
|
|
"loss": 2.70159912109375,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.16040738383195416,
|
|
"grad_norm": 12.644669612153189,
|
|
"learning_rate": 5.3177966101694925e-06,
|
|
"loss": 3.199131965637207,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.16104392106938256,
|
|
"grad_norm": 10.563494009915301,
|
|
"learning_rate": 5.338983050847458e-06,
|
|
"loss": 2.6978464126586914,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.16168045830681094,
|
|
"grad_norm": 10.910365311666162,
|
|
"learning_rate": 5.360169491525424e-06,
|
|
"loss": 2.7021255493164062,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.16231699554423934,
|
|
"grad_norm": 7.3941820229008846,
|
|
"learning_rate": 5.38135593220339e-06,
|
|
"loss": 3.066408395767212,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.16295353278166771,
|
|
"grad_norm": 10.356816747079062,
|
|
"learning_rate": 5.402542372881357e-06,
|
|
"loss": 3.2458343505859375,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.16359007001909612,
|
|
"grad_norm": 17.605242578677803,
|
|
"learning_rate": 5.423728813559323e-06,
|
|
"loss": 2.8568882942199707,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.16422660725652452,
|
|
"grad_norm": 14.926319024507961,
|
|
"learning_rate": 5.4449152542372885e-06,
|
|
"loss": 2.7378273010253906,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.1648631444939529,
|
|
"grad_norm": 10.554202554221286,
|
|
"learning_rate": 5.466101694915254e-06,
|
|
"loss": 2.680108070373535,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.1654996817313813,
|
|
"grad_norm": 10.015652391833274,
|
|
"learning_rate": 5.487288135593221e-06,
|
|
"loss": 3.29970645904541,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.16613621896880967,
|
|
"grad_norm": 9.58361415978751,
|
|
"learning_rate": 5.508474576271187e-06,
|
|
"loss": 2.9083588123321533,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.16677275620623808,
|
|
"grad_norm": 21.535696433326326,
|
|
"learning_rate": 5.529661016949153e-06,
|
|
"loss": 3.120295524597168,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.16740929344366645,
|
|
"grad_norm": 13.25929140074107,
|
|
"learning_rate": 5.550847457627119e-06,
|
|
"loss": 2.9800517559051514,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.16804583068109485,
|
|
"grad_norm": 9.757729544531495,
|
|
"learning_rate": 5.5720338983050844e-06,
|
|
"loss": 3.061345100402832,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.16868236791852323,
|
|
"grad_norm": 12.95629265024167,
|
|
"learning_rate": 5.593220338983051e-06,
|
|
"loss": 2.787202835083008,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.16931890515595163,
|
|
"grad_norm": 10.982801430256657,
|
|
"learning_rate": 5.614406779661017e-06,
|
|
"loss": 3.1923770904541016,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.16995544239338,
|
|
"grad_norm": 8.224155321892544,
|
|
"learning_rate": 5.635593220338984e-06,
|
|
"loss": 2.5791826248168945,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.1705919796308084,
|
|
"grad_norm": 10.460293961838643,
|
|
"learning_rate": 5.65677966101695e-06,
|
|
"loss": 2.779829740524292,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.17122851686823679,
|
|
"grad_norm": 10.261645668961702,
|
|
"learning_rate": 5.677966101694916e-06,
|
|
"loss": 3.2863683700561523,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.1718650541056652,
|
|
"grad_norm": 6.860353021754464,
|
|
"learning_rate": 5.699152542372882e-06,
|
|
"loss": 2.6788125038146973,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.17250159134309356,
|
|
"grad_norm": 13.718261035931205,
|
|
"learning_rate": 5.720338983050848e-06,
|
|
"loss": 2.84999942779541,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.17313812858052197,
|
|
"grad_norm": 9.450110201931974,
|
|
"learning_rate": 5.741525423728815e-06,
|
|
"loss": 2.3188841342926025,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.17377466581795034,
|
|
"grad_norm": 24.399578162992768,
|
|
"learning_rate": 5.7627118644067805e-06,
|
|
"loss": 2.3342952728271484,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.17441120305537874,
|
|
"grad_norm": 14.342609925306267,
|
|
"learning_rate": 5.783898305084746e-06,
|
|
"loss": 3.366730213165283,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.17504774029280712,
|
|
"grad_norm": 7.275306607886118,
|
|
"learning_rate": 5.805084745762712e-06,
|
|
"loss": 2.290006637573242,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.17568427753023552,
|
|
"grad_norm": 9.939524487285707,
|
|
"learning_rate": 5.826271186440678e-06,
|
|
"loss": 2.8823177814483643,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.1763208147676639,
|
|
"grad_norm": 8.669514074833224,
|
|
"learning_rate": 5.847457627118645e-06,
|
|
"loss": 2.6550536155700684,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.1769573520050923,
|
|
"grad_norm": 11.02825638441472,
|
|
"learning_rate": 5.868644067796611e-06,
|
|
"loss": 2.757049798965454,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.17759388924252067,
|
|
"grad_norm": 21.597046178374484,
|
|
"learning_rate": 5.8898305084745765e-06,
|
|
"loss": 3.2479753494262695,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.17823042647994908,
|
|
"grad_norm": 10.172094762492458,
|
|
"learning_rate": 5.911016949152542e-06,
|
|
"loss": 2.887638568878174,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.17886696371737745,
|
|
"grad_norm": 9.39680421509699,
|
|
"learning_rate": 5.932203389830509e-06,
|
|
"loss": 3.185732841491699,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.17950350095480586,
|
|
"grad_norm": 11.4576049744317,
|
|
"learning_rate": 5.953389830508475e-06,
|
|
"loss": 3.0998311042785645,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.18014003819223426,
|
|
"grad_norm": 11.683392046372845,
|
|
"learning_rate": 5.974576271186441e-06,
|
|
"loss": 2.983189582824707,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.18077657542966263,
|
|
"grad_norm": 11.11153403684068,
|
|
"learning_rate": 5.995762711864407e-06,
|
|
"loss": 2.9110331535339355,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.18141311266709104,
|
|
"grad_norm": 10.259066032322758,
|
|
"learning_rate": 6.0169491525423725e-06,
|
|
"loss": 3.0029218196868896,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.1820496499045194,
|
|
"grad_norm": 9.504956577472365,
|
|
"learning_rate": 6.038135593220339e-06,
|
|
"loss": 2.7357711791992188,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.18268618714194781,
|
|
"grad_norm": 12.30922909428048,
|
|
"learning_rate": 6.059322033898306e-06,
|
|
"loss": 2.705357551574707,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.1833227243793762,
|
|
"grad_norm": 12.708040498692581,
|
|
"learning_rate": 6.080508474576272e-06,
|
|
"loss": 2.4068801403045654,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.1839592616168046,
|
|
"grad_norm": 7.5926474714142245,
|
|
"learning_rate": 6.1016949152542385e-06,
|
|
"loss": 2.8826093673706055,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.18459579885423297,
|
|
"grad_norm": 16.415575077576264,
|
|
"learning_rate": 6.122881355932204e-06,
|
|
"loss": 3.080456256866455,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.18523233609166137,
|
|
"grad_norm": 10.94938631452902,
|
|
"learning_rate": 6.14406779661017e-06,
|
|
"loss": 2.8359527587890625,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.18586887332908975,
|
|
"grad_norm": 18.625269551728508,
|
|
"learning_rate": 6.165254237288136e-06,
|
|
"loss": 3.1971914768218994,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.18650541056651815,
|
|
"grad_norm": 16.038218558942276,
|
|
"learning_rate": 6.186440677966103e-06,
|
|
"loss": 2.786512851715088,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.18714194780394652,
|
|
"grad_norm": 30.036590776335498,
|
|
"learning_rate": 6.207627118644069e-06,
|
|
"loss": 2.433791160583496,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.18777848504137493,
|
|
"grad_norm": 7.181996858345177,
|
|
"learning_rate": 6.2288135593220344e-06,
|
|
"loss": 3.180842876434326,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.1884150222788033,
|
|
"grad_norm": 12.848065653568986,
|
|
"learning_rate": 6.25e-06,
|
|
"loss": 2.833150863647461,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.1890515595162317,
|
|
"grad_norm": 10.249632541183296,
|
|
"learning_rate": 6.271186440677966e-06,
|
|
"loss": 3.3664321899414062,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.18968809675366008,
|
|
"grad_norm": 9.17490216034036,
|
|
"learning_rate": 6.292372881355933e-06,
|
|
"loss": 3.034820556640625,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.19032463399108848,
|
|
"grad_norm": 12.29588017430244,
|
|
"learning_rate": 6.313559322033899e-06,
|
|
"loss": 3.113218307495117,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.19096117122851686,
|
|
"grad_norm": 10.551364939017459,
|
|
"learning_rate": 6.3347457627118646e-06,
|
|
"loss": 2.583988666534424,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.19159770846594526,
|
|
"grad_norm": 9.118649476570695,
|
|
"learning_rate": 6.3559322033898304e-06,
|
|
"loss": 2.780367374420166,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.19223424570337364,
|
|
"grad_norm": 17.380906168062385,
|
|
"learning_rate": 6.377118644067797e-06,
|
|
"loss": 2.361020088195801,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.19287078294080204,
|
|
"grad_norm": 16.52007281181012,
|
|
"learning_rate": 6.398305084745763e-06,
|
|
"loss": 3.059126377105713,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.1935073201782304,
|
|
"grad_norm": 15.360216329621078,
|
|
"learning_rate": 6.419491525423729e-06,
|
|
"loss": 2.1948864459991455,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.19414385741565882,
|
|
"grad_norm": 13.924681412606185,
|
|
"learning_rate": 6.440677966101695e-06,
|
|
"loss": 2.9051995277404785,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.19478039465308722,
|
|
"grad_norm": 20.45103908918982,
|
|
"learning_rate": 6.461864406779662e-06,
|
|
"loss": 2.8492469787597656,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.1954169318905156,
|
|
"grad_norm": 11.272264147744535,
|
|
"learning_rate": 6.483050847457628e-06,
|
|
"loss": 2.796387195587158,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.196053469127944,
|
|
"grad_norm": 7.491254613489933,
|
|
"learning_rate": 6.504237288135594e-06,
|
|
"loss": 2.8268957138061523,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.19669000636537237,
|
|
"grad_norm": 11.248178380854952,
|
|
"learning_rate": 6.52542372881356e-06,
|
|
"loss": 2.594062566757202,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.19732654360280077,
|
|
"grad_norm": 8.812198808149372,
|
|
"learning_rate": 6.5466101694915265e-06,
|
|
"loss": 2.788079023361206,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.19796308084022915,
|
|
"grad_norm": 18.203946006098864,
|
|
"learning_rate": 6.567796610169492e-06,
|
|
"loss": 2.060673713684082,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.19859961807765755,
|
|
"grad_norm": 10.21084736659449,
|
|
"learning_rate": 6.588983050847458e-06,
|
|
"loss": 2.218992233276367,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.19923615531508593,
|
|
"grad_norm": 24.98324005759942,
|
|
"learning_rate": 6.610169491525424e-06,
|
|
"loss": 3.5444846153259277,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.19987269255251433,
|
|
"grad_norm": 13.942804366495361,
|
|
"learning_rate": 6.631355932203391e-06,
|
|
"loss": 2.5274245738983154,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.2005092297899427,
|
|
"grad_norm": 7.447244363066533,
|
|
"learning_rate": 6.652542372881357e-06,
|
|
"loss": 2.453437089920044,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.2011457670273711,
|
|
"grad_norm": 8.780622079696341,
|
|
"learning_rate": 6.6737288135593225e-06,
|
|
"loss": 2.773866891860962,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.20178230426479948,
|
|
"grad_norm": 11.604249557547087,
|
|
"learning_rate": 6.694915254237288e-06,
|
|
"loss": 3.253352165222168,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.2024188415022279,
|
|
"grad_norm": 15.879127615597556,
|
|
"learning_rate": 6.716101694915255e-06,
|
|
"loss": 3.153146266937256,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.20305537873965626,
|
|
"grad_norm": 13.86771255756771,
|
|
"learning_rate": 6.737288135593221e-06,
|
|
"loss": 3.3321497440338135,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.20369191597708466,
|
|
"grad_norm": 7.73155294931535,
|
|
"learning_rate": 6.758474576271187e-06,
|
|
"loss": 3.2387685775756836,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.20432845321451304,
|
|
"grad_norm": 11.810915669641348,
|
|
"learning_rate": 6.779661016949153e-06,
|
|
"loss": 3.321180582046509,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.20496499045194144,
|
|
"grad_norm": 9.777026830947202,
|
|
"learning_rate": 6.8008474576271185e-06,
|
|
"loss": 2.50833797454834,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.20560152768936982,
|
|
"grad_norm": 20.25164388427156,
|
|
"learning_rate": 6.822033898305085e-06,
|
|
"loss": 3.063729763031006,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.20623806492679822,
|
|
"grad_norm": 7.211655129625353,
|
|
"learning_rate": 6.843220338983051e-06,
|
|
"loss": 2.7206499576568604,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.2068746021642266,
|
|
"grad_norm": 12.00457664613567,
|
|
"learning_rate": 6.864406779661017e-06,
|
|
"loss": 2.948394775390625,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.207511139401655,
|
|
"grad_norm": 14.193817895558638,
|
|
"learning_rate": 6.8855932203389844e-06,
|
|
"loss": 2.6999831199645996,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.20814767663908337,
|
|
"grad_norm": 9.22847901312038,
|
|
"learning_rate": 6.90677966101695e-06,
|
|
"loss": 2.798079013824463,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.20878421387651178,
|
|
"grad_norm": 14.274035645282098,
|
|
"learning_rate": 6.927966101694916e-06,
|
|
"loss": 2.9096508026123047,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.20942075111394018,
|
|
"grad_norm": 13.806618597816842,
|
|
"learning_rate": 6.949152542372882e-06,
|
|
"loss": 3.3791441917419434,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.21005728835136855,
|
|
"grad_norm": 9.200499313513514,
|
|
"learning_rate": 6.970338983050849e-06,
|
|
"loss": 2.008662223815918,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.21069382558879696,
|
|
"grad_norm": 19.573127652755893,
|
|
"learning_rate": 6.9915254237288146e-06,
|
|
"loss": 3.0151891708374023,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.21133036282622533,
|
|
"grad_norm": 9.570429551637792,
|
|
"learning_rate": 7.0127118644067804e-06,
|
|
"loss": 2.319244623184204,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.21196690006365373,
|
|
"grad_norm": 9.731842053626478,
|
|
"learning_rate": 7.033898305084746e-06,
|
|
"loss": 2.5693533420562744,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.2126034373010821,
|
|
"grad_norm": 12.27977297948714,
|
|
"learning_rate": 7.055084745762712e-06,
|
|
"loss": 3.262115478515625,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.2132399745385105,
|
|
"grad_norm": 9.818826469267037,
|
|
"learning_rate": 7.076271186440679e-06,
|
|
"loss": 3.6047706604003906,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.2138765117759389,
|
|
"grad_norm": 12.754731870818834,
|
|
"learning_rate": 7.097457627118645e-06,
|
|
"loss": 3.1616177558898926,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.2145130490133673,
|
|
"grad_norm": 9.426972453147444,
|
|
"learning_rate": 7.1186440677966106e-06,
|
|
"loss": 3.418569564819336,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.21514958625079567,
|
|
"grad_norm": 13.087753204446434,
|
|
"learning_rate": 7.139830508474576e-06,
|
|
"loss": 3.402482509613037,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.21578612348822407,
|
|
"grad_norm": 30.225108315818193,
|
|
"learning_rate": 7.161016949152543e-06,
|
|
"loss": 3.0675745010375977,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.21642266072565244,
|
|
"grad_norm": 16.509686833125368,
|
|
"learning_rate": 7.182203389830509e-06,
|
|
"loss": 3.3344573974609375,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.21705919796308085,
|
|
"grad_norm": 10.449599766088038,
|
|
"learning_rate": 7.203389830508475e-06,
|
|
"loss": 2.42911434173584,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.21769573520050922,
|
|
"grad_norm": 10.412822022342818,
|
|
"learning_rate": 7.224576271186441e-06,
|
|
"loss": 2.7670388221740723,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.21833227243793762,
|
|
"grad_norm": 9.912292165640567,
|
|
"learning_rate": 7.2457627118644065e-06,
|
|
"loss": 1.9683561325073242,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.218968809675366,
|
|
"grad_norm": 10.749636171231584,
|
|
"learning_rate": 7.266949152542373e-06,
|
|
"loss": 2.915195941925049,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.2196053469127944,
|
|
"grad_norm": 28.50950758853137,
|
|
"learning_rate": 7.288135593220339e-06,
|
|
"loss": 3.151520252227783,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.22024188415022278,
|
|
"grad_norm": 24.28887681473371,
|
|
"learning_rate": 7.309322033898306e-06,
|
|
"loss": 2.0973618030548096,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.22087842138765118,
|
|
"grad_norm": 9.132240644095473,
|
|
"learning_rate": 7.3305084745762725e-06,
|
|
"loss": 2.961150884628296,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.22151495862507956,
|
|
"grad_norm": 16.8031595245657,
|
|
"learning_rate": 7.351694915254238e-06,
|
|
"loss": 2.981980800628662,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.22215149586250796,
|
|
"grad_norm": 14.149484197365297,
|
|
"learning_rate": 7.372881355932204e-06,
|
|
"loss": 2.886014938354492,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.22278803309993633,
|
|
"grad_norm": 10.654240894425453,
|
|
"learning_rate": 7.39406779661017e-06,
|
|
"loss": 2.521888017654419,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.22342457033736474,
|
|
"grad_norm": 14.061149508322904,
|
|
"learning_rate": 7.415254237288137e-06,
|
|
"loss": 3.571470260620117,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.22406110757479314,
|
|
"grad_norm": 10.714801517941787,
|
|
"learning_rate": 7.436440677966103e-06,
|
|
"loss": 3.155707359313965,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.22469764481222151,
|
|
"grad_norm": 31.60341838506027,
|
|
"learning_rate": 7.4576271186440685e-06,
|
|
"loss": 2.7946484088897705,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.22533418204964992,
|
|
"grad_norm": 9.672907182159392,
|
|
"learning_rate": 7.478813559322034e-06,
|
|
"loss": 2.146177291870117,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.2259707192870783,
|
|
"grad_norm": 9.098969239754291,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 2.894664764404297,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.2266072565245067,
|
|
"grad_norm": 8.885785452611078,
|
|
"learning_rate": 7.521186440677967e-06,
|
|
"loss": 2.8311991691589355,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.22724379376193507,
|
|
"grad_norm": 8.736221842860376,
|
|
"learning_rate": 7.542372881355933e-06,
|
|
"loss": 2.7373971939086914,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.22788033099936347,
|
|
"grad_norm": 15.8309467636366,
|
|
"learning_rate": 7.563559322033899e-06,
|
|
"loss": 2.6248059272766113,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.22851686823679185,
|
|
"grad_norm": 6.802455424970265,
|
|
"learning_rate": 7.5847457627118645e-06,
|
|
"loss": 2.6633729934692383,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.22915340547422025,
|
|
"grad_norm": 10.659381205942202,
|
|
"learning_rate": 7.605932203389831e-06,
|
|
"loss": 2.8522844314575195,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.22978994271164863,
|
|
"grad_norm": 28.091313355141022,
|
|
"learning_rate": 7.627118644067797e-06,
|
|
"loss": 2.4526000022888184,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.23042647994907703,
|
|
"grad_norm": 19.06538512702582,
|
|
"learning_rate": 7.648305084745763e-06,
|
|
"loss": 2.7997803688049316,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.2310630171865054,
|
|
"grad_norm": 12.427334341770118,
|
|
"learning_rate": 7.66949152542373e-06,
|
|
"loss": 3.680173873901367,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.2316995544239338,
|
|
"grad_norm": 8.06715078576492,
|
|
"learning_rate": 7.690677966101695e-06,
|
|
"loss": 2.341188907623291,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.23233609166136218,
|
|
"grad_norm": 36.55034307823031,
|
|
"learning_rate": 7.711864406779663e-06,
|
|
"loss": 2.418898105621338,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.23297262889879058,
|
|
"grad_norm": 17.870010953346153,
|
|
"learning_rate": 7.733050847457628e-06,
|
|
"loss": 3.0440874099731445,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.23360916613621896,
|
|
"grad_norm": 15.645822085927204,
|
|
"learning_rate": 7.754237288135595e-06,
|
|
"loss": 2.9087657928466797,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.23424570337364736,
|
|
"grad_norm": 21.73213532363426,
|
|
"learning_rate": 7.77542372881356e-06,
|
|
"loss": 2.755650520324707,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.23488224061107574,
|
|
"grad_norm": 10.353282795706956,
|
|
"learning_rate": 7.796610169491526e-06,
|
|
"loss": 3.011885643005371,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.23551877784850414,
|
|
"grad_norm": 8.286035315592235,
|
|
"learning_rate": 7.817796610169493e-06,
|
|
"loss": 2.9681286811828613,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.23615531508593252,
|
|
"grad_norm": 7.304703005206985,
|
|
"learning_rate": 7.838983050847458e-06,
|
|
"loss": 2.7695226669311523,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.23679185232336092,
|
|
"grad_norm": 13.132082630543904,
|
|
"learning_rate": 7.860169491525425e-06,
|
|
"loss": 2.765014410018921,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.2374283895607893,
|
|
"grad_norm": 12.186426941036258,
|
|
"learning_rate": 7.88135593220339e-06,
|
|
"loss": 3.3698410987854004,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.2380649267982177,
|
|
"grad_norm": 7.52386088621253,
|
|
"learning_rate": 7.902542372881357e-06,
|
|
"loss": 2.7172579765319824,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.23870146403564607,
|
|
"grad_norm": 10.996801834275313,
|
|
"learning_rate": 7.923728813559323e-06,
|
|
"loss": 3.2583115100860596,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.23933800127307447,
|
|
"grad_norm": 17.864622869491424,
|
|
"learning_rate": 7.944915254237288e-06,
|
|
"loss": 3.1072745323181152,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.23997453851050288,
|
|
"grad_norm": 13.01921262197225,
|
|
"learning_rate": 7.966101694915255e-06,
|
|
"loss": 2.8089587688446045,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.24061107574793125,
|
|
"grad_norm": 10.776360085944695,
|
|
"learning_rate": 7.987288135593222e-06,
|
|
"loss": 3.1854512691497803,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.24124761298535966,
|
|
"grad_norm": 13.463580198251714,
|
|
"learning_rate": 8.008474576271187e-06,
|
|
"loss": 3.1022424697875977,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.24188415022278803,
|
|
"grad_norm": 26.22940889508125,
|
|
"learning_rate": 8.029661016949153e-06,
|
|
"loss": 2.5203371047973633,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.24252068746021643,
|
|
"grad_norm": 30.178839640571866,
|
|
"learning_rate": 8.050847457627118e-06,
|
|
"loss": 4.099800109863281,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.2431572246976448,
|
|
"grad_norm": 19.990288591582754,
|
|
"learning_rate": 8.072033898305085e-06,
|
|
"loss": 3.4770667552948,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.2437937619350732,
|
|
"grad_norm": 18.299023986163313,
|
|
"learning_rate": 8.093220338983052e-06,
|
|
"loss": 3.0465381145477295,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.24443029917250159,
|
|
"grad_norm": 15.49218350727859,
|
|
"learning_rate": 8.114406779661017e-06,
|
|
"loss": 2.6916580200195312,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.24506683640993,
|
|
"grad_norm": 8.89218904507717,
|
|
"learning_rate": 8.135593220338983e-06,
|
|
"loss": 3.0373010635375977,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.24570337364735836,
|
|
"grad_norm": 17.202047794875796,
|
|
"learning_rate": 8.15677966101695e-06,
|
|
"loss": 2.984710216522217,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.24633991088478677,
|
|
"grad_norm": 11.366110782522894,
|
|
"learning_rate": 8.177966101694917e-06,
|
|
"loss": 2.7723522186279297,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.24697644812221514,
|
|
"grad_norm": 14.78172131810989,
|
|
"learning_rate": 8.199152542372882e-06,
|
|
"loss": 2.9807076454162598,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.24761298535964354,
|
|
"grad_norm": 8.448025825266503,
|
|
"learning_rate": 8.220338983050849e-06,
|
|
"loss": 2.8512401580810547,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.24824952259707192,
|
|
"grad_norm": 13.018778265333706,
|
|
"learning_rate": 8.241525423728815e-06,
|
|
"loss": 2.7343480587005615,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.24888605983450032,
|
|
"grad_norm": 11.046371381945525,
|
|
"learning_rate": 8.26271186440678e-06,
|
|
"loss": 2.8503451347351074,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.2495225970719287,
|
|
"grad_norm": 8.81851252595491,
|
|
"learning_rate": 8.283898305084747e-06,
|
|
"loss": 2.6752705574035645,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.2501591343093571,
|
|
"grad_norm": 8.522653693459269,
|
|
"learning_rate": 8.305084745762712e-06,
|
|
"loss": 2.9626564979553223,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.2507956715467855,
|
|
"grad_norm": 8.503352134234069,
|
|
"learning_rate": 8.326271186440679e-06,
|
|
"loss": 3.0923047065734863,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.25143220878421385,
|
|
"grad_norm": 17.595169204707954,
|
|
"learning_rate": 8.347457627118645e-06,
|
|
"loss": 3.0121350288391113,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.25206874602164225,
|
|
"grad_norm": 10.995988483462757,
|
|
"learning_rate": 8.36864406779661e-06,
|
|
"loss": 3.194448947906494,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.25270528325907066,
|
|
"grad_norm": 11.93319408618761,
|
|
"learning_rate": 8.389830508474577e-06,
|
|
"loss": 2.894463539123535,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.25334182049649906,
|
|
"grad_norm": 9.689945795143238,
|
|
"learning_rate": 8.411016949152542e-06,
|
|
"loss": 2.6376705169677734,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.25397835773392746,
|
|
"grad_norm": 13.926989612744364,
|
|
"learning_rate": 8.432203389830509e-06,
|
|
"loss": 3.0703635215759277,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.2546148949713558,
|
|
"grad_norm": 13.351847195447283,
|
|
"learning_rate": 8.453389830508476e-06,
|
|
"loss": 2.6202921867370605,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.2552514322087842,
|
|
"grad_norm": 8.133830219500812,
|
|
"learning_rate": 8.47457627118644e-06,
|
|
"loss": 2.5352962017059326,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.2558879694462126,
|
|
"grad_norm": 13.975975633984332,
|
|
"learning_rate": 8.495762711864407e-06,
|
|
"loss": 2.9092273712158203,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.256524506683641,
|
|
"grad_norm": 14.933245520629898,
|
|
"learning_rate": 8.516949152542372e-06,
|
|
"loss": 3.0582923889160156,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.25716104392106937,
|
|
"grad_norm": 10.687432366545924,
|
|
"learning_rate": 8.538135593220339e-06,
|
|
"loss": 2.8868885040283203,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.25779758115849777,
|
|
"grad_norm": 34.34936840711408,
|
|
"learning_rate": 8.559322033898306e-06,
|
|
"loss": 3.094916343688965,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.25843411839592617,
|
|
"grad_norm": 8.011256674559252,
|
|
"learning_rate": 8.580508474576272e-06,
|
|
"loss": 2.123218536376953,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.2590706556333546,
|
|
"grad_norm": 10.024564423138338,
|
|
"learning_rate": 8.601694915254239e-06,
|
|
"loss": 2.763322114944458,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.2597071928707829,
|
|
"grad_norm": 8.52914664622448,
|
|
"learning_rate": 8.622881355932204e-06,
|
|
"loss": 3.241649627685547,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.2603437301082113,
|
|
"grad_norm": 17.156374018350956,
|
|
"learning_rate": 8.64406779661017e-06,
|
|
"loss": 2.307551383972168,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.2609802673456397,
|
|
"grad_norm": 8.445989772507607,
|
|
"learning_rate": 8.665254237288136e-06,
|
|
"loss": 2.7405078411102295,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.26161680458306813,
|
|
"grad_norm": 16.246864152111847,
|
|
"learning_rate": 8.686440677966103e-06,
|
|
"loss": 2.4398441314697266,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.2622533418204965,
|
|
"grad_norm": 17.23025344721073,
|
|
"learning_rate": 8.70762711864407e-06,
|
|
"loss": 2.6711883544921875,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.2628898790579249,
|
|
"grad_norm": 9.665913856397358,
|
|
"learning_rate": 8.728813559322034e-06,
|
|
"loss": 3.1612720489501953,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.2635264162953533,
|
|
"grad_norm": 9.039243412902335,
|
|
"learning_rate": 8.750000000000001e-06,
|
|
"loss": 2.9666695594787598,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.2641629535327817,
|
|
"grad_norm": 14.904274886523316,
|
|
"learning_rate": 8.771186440677966e-06,
|
|
"loss": 2.4938879013061523,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.26479949077021003,
|
|
"grad_norm": 14.327685487799815,
|
|
"learning_rate": 8.792372881355933e-06,
|
|
"loss": 2.7676544189453125,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.26543602800763844,
|
|
"grad_norm": 12.777936146844038,
|
|
"learning_rate": 8.8135593220339e-06,
|
|
"loss": 2.7289962768554688,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.26607256524506684,
|
|
"grad_norm": 11.135318077153878,
|
|
"learning_rate": 8.834745762711864e-06,
|
|
"loss": 2.8916702270507812,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.26670910248249524,
|
|
"grad_norm": 14.64604027349478,
|
|
"learning_rate": 8.855932203389831e-06,
|
|
"loss": 2.8258821964263916,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.2673456397199236,
|
|
"grad_norm": 11.557535360945662,
|
|
"learning_rate": 8.877118644067798e-06,
|
|
"loss": 2.5489492416381836,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.267982176957352,
|
|
"grad_norm": 10.935033235337775,
|
|
"learning_rate": 8.898305084745763e-06,
|
|
"loss": 3.2372262477874756,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.2686187141947804,
|
|
"grad_norm": 12.059942367454795,
|
|
"learning_rate": 8.91949152542373e-06,
|
|
"loss": 2.420478582382202,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.2692552514322088,
|
|
"grad_norm": 9.62013973863689,
|
|
"learning_rate": 8.940677966101694e-06,
|
|
"loss": 2.610492467880249,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.2698917886696372,
|
|
"grad_norm": 16.215747002933785,
|
|
"learning_rate": 8.961864406779663e-06,
|
|
"loss": 2.431087017059326,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.27052832590706555,
|
|
"grad_norm": 23.718620449268744,
|
|
"learning_rate": 8.983050847457628e-06,
|
|
"loss": 3.0121328830718994,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.27116486314449395,
|
|
"grad_norm": 13.427577259890885,
|
|
"learning_rate": 9.004237288135595e-06,
|
|
"loss": 2.930263042449951,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.27180140038192235,
|
|
"grad_norm": 7.896740529131796,
|
|
"learning_rate": 9.02542372881356e-06,
|
|
"loss": 2.788511276245117,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.27243793761935076,
|
|
"grad_norm": 10.768266885604065,
|
|
"learning_rate": 9.046610169491526e-06,
|
|
"loss": 2.880270481109619,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.2730744748567791,
|
|
"grad_norm": 10.236726722596405,
|
|
"learning_rate": 9.067796610169493e-06,
|
|
"loss": 3.149750232696533,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.2737110120942075,
|
|
"grad_norm": 8.867595980322418,
|
|
"learning_rate": 9.088983050847458e-06,
|
|
"loss": 1.7043266296386719,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.2743475493316359,
|
|
"grad_norm": 12.618029867336706,
|
|
"learning_rate": 9.110169491525425e-06,
|
|
"loss": 2.908963680267334,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.2749840865690643,
|
|
"grad_norm": 11.301320525059047,
|
|
"learning_rate": 9.131355932203391e-06,
|
|
"loss": 2.958331346511841,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.27562062380649266,
|
|
"grad_norm": 11.392544145128007,
|
|
"learning_rate": 9.152542372881356e-06,
|
|
"loss": 3.156254768371582,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.27625716104392106,
|
|
"grad_norm": 11.513492123693869,
|
|
"learning_rate": 9.173728813559323e-06,
|
|
"loss": 2.940934658050537,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.27689369828134947,
|
|
"grad_norm": 7.947649134277705,
|
|
"learning_rate": 9.194915254237288e-06,
|
|
"loss": 1.6323720216751099,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.27753023551877787,
|
|
"grad_norm": 39.70214629314905,
|
|
"learning_rate": 9.216101694915255e-06,
|
|
"loss": 2.9174299240112305,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.2781667727562062,
|
|
"grad_norm": 13.98217055446752,
|
|
"learning_rate": 9.237288135593222e-06,
|
|
"loss": 2.852600574493408,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.2788033099936346,
|
|
"grad_norm": 10.160783020020252,
|
|
"learning_rate": 9.258474576271187e-06,
|
|
"loss": 2.575179100036621,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.279439847231063,
|
|
"grad_norm": 9.146012963974533,
|
|
"learning_rate": 9.279661016949153e-06,
|
|
"loss": 3.156705379486084,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.2800763844684914,
|
|
"grad_norm": 11.31879412709326,
|
|
"learning_rate": 9.300847457627118e-06,
|
|
"loss": 3.304896116256714,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.28071292170591977,
|
|
"grad_norm": 31.10169595912864,
|
|
"learning_rate": 9.322033898305085e-06,
|
|
"loss": 2.4765782356262207,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.2813494589433482,
|
|
"grad_norm": 14.496768399170477,
|
|
"learning_rate": 9.343220338983052e-06,
|
|
"loss": 2.4921059608459473,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.2819859961807766,
|
|
"grad_norm": 11.883722809358867,
|
|
"learning_rate": 9.364406779661017e-06,
|
|
"loss": 2.52455472946167,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.282622533418205,
|
|
"grad_norm": 9.844759935775308,
|
|
"learning_rate": 9.385593220338985e-06,
|
|
"loss": 3.400085687637329,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.2832590706556333,
|
|
"grad_norm": 24.414897984043137,
|
|
"learning_rate": 9.40677966101695e-06,
|
|
"loss": 2.9252350330352783,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.28389560789306173,
|
|
"grad_norm": 8.332066822030134,
|
|
"learning_rate": 9.427966101694917e-06,
|
|
"loss": 3.3196964263916016,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.28453214513049013,
|
|
"grad_norm": 7.5710032108323615,
|
|
"learning_rate": 9.449152542372882e-06,
|
|
"loss": 2.261486768722534,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.28516868236791854,
|
|
"grad_norm": 8.54694090543886,
|
|
"learning_rate": 9.470338983050848e-06,
|
|
"loss": 2.7075605392456055,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.28580521960534694,
|
|
"grad_norm": 8.80921600047023,
|
|
"learning_rate": 9.491525423728815e-06,
|
|
"loss": 2.6770546436309814,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.2864417568427753,
|
|
"grad_norm": 13.060286286479855,
|
|
"learning_rate": 9.51271186440678e-06,
|
|
"loss": 3.233011245727539,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.2870782940802037,
|
|
"grad_norm": 21.014549565777614,
|
|
"learning_rate": 9.533898305084747e-06,
|
|
"loss": 3.1268162727355957,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.2877148313176321,
|
|
"grad_norm": 9.74840063951361,
|
|
"learning_rate": 9.555084745762712e-06,
|
|
"loss": 3.2135486602783203,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.2883513685550605,
|
|
"grad_norm": 15.179806457385586,
|
|
"learning_rate": 9.576271186440679e-06,
|
|
"loss": 3.2502529621124268,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.28898790579248884,
|
|
"grad_norm": 7.967595752995628,
|
|
"learning_rate": 9.597457627118645e-06,
|
|
"loss": 2.67390513420105,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.28962444302991724,
|
|
"grad_norm": 33.20858419977011,
|
|
"learning_rate": 9.61864406779661e-06,
|
|
"loss": 3.2899956703186035,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.29026098026734565,
|
|
"grad_norm": 25.325568051510427,
|
|
"learning_rate": 9.639830508474577e-06,
|
|
"loss": 2.495117664337158,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.29089751750477405,
|
|
"grad_norm": 11.262876751379158,
|
|
"learning_rate": 9.661016949152544e-06,
|
|
"loss": 2.460306167602539,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.2915340547422024,
|
|
"grad_norm": 12.122573664434807,
|
|
"learning_rate": 9.682203389830509e-06,
|
|
"loss": 2.653489351272583,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.2921705919796308,
|
|
"grad_norm": 15.897150153182967,
|
|
"learning_rate": 9.703389830508475e-06,
|
|
"loss": 2.756795883178711,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.2928071292170592,
|
|
"grad_norm": 14.52269368653367,
|
|
"learning_rate": 9.72457627118644e-06,
|
|
"loss": 2.753854513168335,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.2934436664544876,
|
|
"grad_norm": 12.896301614550035,
|
|
"learning_rate": 9.745762711864407e-06,
|
|
"loss": 2.9368858337402344,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.29408020369191595,
|
|
"grad_norm": 9.860006651102344,
|
|
"learning_rate": 9.766949152542374e-06,
|
|
"loss": 2.6420769691467285,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.29471674092934436,
|
|
"grad_norm": 7.025532866083078,
|
|
"learning_rate": 9.788135593220339e-06,
|
|
"loss": 2.633246421813965,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.29535327816677276,
|
|
"grad_norm": 12.667326337177768,
|
|
"learning_rate": 9.809322033898306e-06,
|
|
"loss": 3.018620729446411,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.29598981540420116,
|
|
"grad_norm": 18.77679465997328,
|
|
"learning_rate": 9.830508474576272e-06,
|
|
"loss": 2.952024459838867,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.2966263526416295,
|
|
"grad_norm": 8.310045697764552,
|
|
"learning_rate": 9.851694915254239e-06,
|
|
"loss": 2.5029094219207764,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.2972628898790579,
|
|
"grad_norm": 8.826548109977605,
|
|
"learning_rate": 9.872881355932204e-06,
|
|
"loss": 2.9038829803466797,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.2978994271164863,
|
|
"grad_norm": 9.884818861483527,
|
|
"learning_rate": 9.89406779661017e-06,
|
|
"loss": 2.832332134246826,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.2985359643539147,
|
|
"grad_norm": 6.665482253383969,
|
|
"learning_rate": 9.915254237288137e-06,
|
|
"loss": 2.741182327270508,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.2991725015913431,
|
|
"grad_norm": 15.653183149015302,
|
|
"learning_rate": 9.936440677966102e-06,
|
|
"loss": 2.4737367630004883,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.29980903882877147,
|
|
"grad_norm": 14.824362566450842,
|
|
"learning_rate": 9.957627118644069e-06,
|
|
"loss": 2.59440279006958,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.30044557606619987,
|
|
"grad_norm": 23.18730021521652,
|
|
"learning_rate": 9.978813559322034e-06,
|
|
"loss": 2.371671438217163,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.3010821133036283,
|
|
"grad_norm": 10.391918996320895,
|
|
"learning_rate": 1e-05,
|
|
"loss": 3.30546498298645,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.3017186505410567,
|
|
"grad_norm": 31.66023743421455,
|
|
"learning_rate": 9.999998628160862e-06,
|
|
"loss": 2.7432923316955566,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.302355187778485,
|
|
"grad_norm": 9.588630315588624,
|
|
"learning_rate": 9.999994512644197e-06,
|
|
"loss": 2.9125497341156006,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.3029917250159134,
|
|
"grad_norm": 16.592800570861797,
|
|
"learning_rate": 9.999987653452265e-06,
|
|
"loss": 3.4428765773773193,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.30362826225334183,
|
|
"grad_norm": 14.76583786906148,
|
|
"learning_rate": 9.999978050588832e-06,
|
|
"loss": 2.7113728523254395,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.30426479949077023,
|
|
"grad_norm": 10.841929591033558,
|
|
"learning_rate": 9.999965704059163e-06,
|
|
"loss": 2.9176578521728516,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.3049013367281986,
|
|
"grad_norm": 10.33304861879112,
|
|
"learning_rate": 9.999950613870036e-06,
|
|
"loss": 2.924442768096924,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.305537873965627,
|
|
"grad_norm": 10.096621224894694,
|
|
"learning_rate": 9.99993278002973e-06,
|
|
"loss": 3.140645980834961,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.3061744112030554,
|
|
"grad_norm": 10.4212452713938,
|
|
"learning_rate": 9.999912202548033e-06,
|
|
"loss": 2.9142580032348633,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.3068109484404838,
|
|
"grad_norm": 12.045245392438261,
|
|
"learning_rate": 9.999888881436235e-06,
|
|
"loss": 2.679471015930176,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.30744748567791214,
|
|
"grad_norm": 13.404477364271433,
|
|
"learning_rate": 9.999862816707133e-06,
|
|
"loss": 2.6989357471466064,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.30808402291534054,
|
|
"grad_norm": 24.878929931157536,
|
|
"learning_rate": 9.999834008375032e-06,
|
|
"loss": 2.761178493499756,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.30872056015276894,
|
|
"grad_norm": 8.567649269925306,
|
|
"learning_rate": 9.999802456455736e-06,
|
|
"loss": 3.2187318801879883,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.30935709739019734,
|
|
"grad_norm": 8.764111500182137,
|
|
"learning_rate": 9.999768160966561e-06,
|
|
"loss": 2.8718371391296387,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.3099936346276257,
|
|
"grad_norm": 11.748973884828251,
|
|
"learning_rate": 9.999731121926329e-06,
|
|
"loss": 2.8609938621520996,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.3106301718650541,
|
|
"grad_norm": 9.107045063551201,
|
|
"learning_rate": 9.99969133935536e-06,
|
|
"loss": 2.7174265384674072,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.3112667091024825,
|
|
"grad_norm": 9.583605472207113,
|
|
"learning_rate": 9.999648813275484e-06,
|
|
"loss": 3.1644539833068848,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.3119032463399109,
|
|
"grad_norm": 12.364365943398141,
|
|
"learning_rate": 9.999603543710042e-06,
|
|
"loss": 2.8273873329162598,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.31253978357733925,
|
|
"grad_norm": 18.171015419037342,
|
|
"learning_rate": 9.99955553068387e-06,
|
|
"loss": 2.6150119304656982,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.31317632081476765,
|
|
"grad_norm": 10.619980186641529,
|
|
"learning_rate": 9.999504774223317e-06,
|
|
"loss": 2.663158416748047,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.31381285805219605,
|
|
"grad_norm": 12.381455556494492,
|
|
"learning_rate": 9.99945127435623e-06,
|
|
"loss": 2.661107063293457,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.31444939528962446,
|
|
"grad_norm": 12.757876969453523,
|
|
"learning_rate": 9.999395031111975e-06,
|
|
"loss": 2.856588363647461,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.31508593252705286,
|
|
"grad_norm": 14.213876144934218,
|
|
"learning_rate": 9.999336044521408e-06,
|
|
"loss": 2.613553524017334,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.3157224697644812,
|
|
"grad_norm": 15.726970016629625,
|
|
"learning_rate": 9.999274314616898e-06,
|
|
"loss": 3.4927902221679688,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.3163590070019096,
|
|
"grad_norm": 12.253250019487384,
|
|
"learning_rate": 9.99920984143232e-06,
|
|
"loss": 2.8653440475463867,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.316995544239338,
|
|
"grad_norm": 9.617755498584044,
|
|
"learning_rate": 9.999142625003054e-06,
|
|
"loss": 2.6097002029418945,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.3176320814767664,
|
|
"grad_norm": 7.086589541534136,
|
|
"learning_rate": 9.999072665365978e-06,
|
|
"loss": 2.7578887939453125,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.31826861871419476,
|
|
"grad_norm": 13.344826735283398,
|
|
"learning_rate": 9.998999962559489e-06,
|
|
"loss": 3.936290740966797,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.31890515595162316,
|
|
"grad_norm": 14.140754100517357,
|
|
"learning_rate": 9.998924516623476e-06,
|
|
"loss": 2.5598175525665283,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.31954169318905157,
|
|
"grad_norm": 10.289825379300616,
|
|
"learning_rate": 9.998846327599343e-06,
|
|
"loss": 2.9495205879211426,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.32017823042647997,
|
|
"grad_norm": 17.17421690989167,
|
|
"learning_rate": 9.998765395529991e-06,
|
|
"loss": 2.556317090988159,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.3208147676639083,
|
|
"grad_norm": 7.900979015012702,
|
|
"learning_rate": 9.998681720459832e-06,
|
|
"loss": 3.07352876663208,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.3214513049013367,
|
|
"grad_norm": 9.178466790864046,
|
|
"learning_rate": 9.998595302434783e-06,
|
|
"loss": 3.1477761268615723,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.3220878421387651,
|
|
"grad_norm": 11.158747107423268,
|
|
"learning_rate": 9.998506141502264e-06,
|
|
"loss": 2.565523624420166,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.3227243793761935,
|
|
"grad_norm": 9.745259368574294,
|
|
"learning_rate": 9.9984142377112e-06,
|
|
"loss": 2.4731647968292236,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.3233609166136219,
|
|
"grad_norm": 10.504835167489151,
|
|
"learning_rate": 9.998319591112023e-06,
|
|
"loss": 2.833559036254883,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.3239974538510503,
|
|
"grad_norm": 8.475445634985794,
|
|
"learning_rate": 9.998222201756665e-06,
|
|
"loss": 2.8836066722869873,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.3246339910884787,
|
|
"grad_norm": 19.237070041175475,
|
|
"learning_rate": 9.998122069698572e-06,
|
|
"loss": 2.7430686950683594,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.3252705283259071,
|
|
"grad_norm": 14.958651715813355,
|
|
"learning_rate": 9.998019194992689e-06,
|
|
"loss": 2.396372079849243,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.32590706556333543,
|
|
"grad_norm": 16.29851203620696,
|
|
"learning_rate": 9.997913577695466e-06,
|
|
"loss": 3.14420747756958,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.32654360280076383,
|
|
"grad_norm": 14.00413096447635,
|
|
"learning_rate": 9.997805217864858e-06,
|
|
"loss": 3.0623974800109863,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.32718014003819224,
|
|
"grad_norm": 11.661727203641812,
|
|
"learning_rate": 9.997694115560327e-06,
|
|
"loss": 2.466407537460327,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.32781667727562064,
|
|
"grad_norm": 9.217472463327738,
|
|
"learning_rate": 9.99758027084284e-06,
|
|
"loss": 3.4243903160095215,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.32845321451304904,
|
|
"grad_norm": 8.627597567343209,
|
|
"learning_rate": 9.997463683774866e-06,
|
|
"loss": 2.910386562347412,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.3290897517504774,
|
|
"grad_norm": 26.295612271979316,
|
|
"learning_rate": 9.99734435442038e-06,
|
|
"loss": 2.6467678546905518,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.3297262889879058,
|
|
"grad_norm": 20.16174022986734,
|
|
"learning_rate": 9.997222282844865e-06,
|
|
"loss": 2.6588282585144043,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.3303628262253342,
|
|
"grad_norm": 6.755631493856407,
|
|
"learning_rate": 9.997097469115303e-06,
|
|
"loss": 1.3066935539245605,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.3309993634627626,
|
|
"grad_norm": 14.766691885632358,
|
|
"learning_rate": 9.996969913300185e-06,
|
|
"loss": 2.514312267303467,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.33163590070019094,
|
|
"grad_norm": 77.17893427003337,
|
|
"learning_rate": 9.996839615469507e-06,
|
|
"loss": 2.473106622695923,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.33227243793761935,
|
|
"grad_norm": 11.151217304145346,
|
|
"learning_rate": 9.996706575694764e-06,
|
|
"loss": 2.7616677284240723,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.33290897517504775,
|
|
"grad_norm": 21.575010036196733,
|
|
"learning_rate": 9.996570794048964e-06,
|
|
"loss": 2.2915472984313965,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.33354551241247615,
|
|
"grad_norm": 36.58605310330633,
|
|
"learning_rate": 9.996432270606614e-06,
|
|
"loss": 3.331940174102783,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.3341820496499045,
|
|
"grad_norm": 19.316844663679245,
|
|
"learning_rate": 9.996291005443725e-06,
|
|
"loss": 2.7368221282958984,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.3348185868873329,
|
|
"grad_norm": 18.255012442611356,
|
|
"learning_rate": 9.996146998637814e-06,
|
|
"loss": 3.4562439918518066,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.3354551241247613,
|
|
"grad_norm": 9.180146555325413,
|
|
"learning_rate": 9.996000250267908e-06,
|
|
"loss": 2.8834753036499023,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.3360916613621897,
|
|
"grad_norm": 14.17213899043496,
|
|
"learning_rate": 9.995850760414525e-06,
|
|
"loss": 2.6626763343811035,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.33672819859961806,
|
|
"grad_norm": 14.437809542755755,
|
|
"learning_rate": 9.995698529159701e-06,
|
|
"loss": 3.103738307952881,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.33736473583704646,
|
|
"grad_norm": 12.49658370440524,
|
|
"learning_rate": 9.995543556586971e-06,
|
|
"loss": 2.893144369125366,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.33800127307447486,
|
|
"grad_norm": 11.91200029152462,
|
|
"learning_rate": 9.99538584278137e-06,
|
|
"loss": 2.814668893814087,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.33863781031190326,
|
|
"grad_norm": 11.029866774072888,
|
|
"learning_rate": 9.995225387829446e-06,
|
|
"loss": 2.77158784866333,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.3392743475493316,
|
|
"grad_norm": 9.409247027470174,
|
|
"learning_rate": 9.99506219181924e-06,
|
|
"loss": 3.173114061355591,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.33991088478676,
|
|
"grad_norm": 19.82056407187092,
|
|
"learning_rate": 9.99489625484031e-06,
|
|
"loss": 3.1451973915100098,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.3405474220241884,
|
|
"grad_norm": 14.168695873290911,
|
|
"learning_rate": 9.994727576983709e-06,
|
|
"loss": 2.498263120651245,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.3411839592616168,
|
|
"grad_norm": 9.430546621447903,
|
|
"learning_rate": 9.994556158341995e-06,
|
|
"loss": 2.7113990783691406,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.34182049649904517,
|
|
"grad_norm": 19.540585641156934,
|
|
"learning_rate": 9.994381999009234e-06,
|
|
"loss": 2.5611259937286377,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.34245703373647357,
|
|
"grad_norm": 14.771991055023605,
|
|
"learning_rate": 9.994205099080992e-06,
|
|
"loss": 2.760798692703247,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.343093570973902,
|
|
"grad_norm": 7.95913180934273,
|
|
"learning_rate": 9.994025458654339e-06,
|
|
"loss": 2.6646268367767334,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.3437301082113304,
|
|
"grad_norm": 15.474046292497501,
|
|
"learning_rate": 9.993843077827854e-06,
|
|
"loss": 2.42655611038208,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.3443666454487588,
|
|
"grad_norm": 12.187750005188336,
|
|
"learning_rate": 9.993657956701613e-06,
|
|
"loss": 2.930875301361084,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.3450031826861871,
|
|
"grad_norm": 13.272631871315637,
|
|
"learning_rate": 9.993470095377199e-06,
|
|
"loss": 2.7495932579040527,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.34563971992361553,
|
|
"grad_norm": 7.456501849105595,
|
|
"learning_rate": 9.993279493957698e-06,
|
|
"loss": 2.7832741737365723,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.34627625716104393,
|
|
"grad_norm": 20.699299594098285,
|
|
"learning_rate": 9.993086152547699e-06,
|
|
"loss": 3.2728452682495117,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.34691279439847234,
|
|
"grad_norm": 8.257838784063091,
|
|
"learning_rate": 9.992890071253297e-06,
|
|
"loss": 3.3546133041381836,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.3475493316359007,
|
|
"grad_norm": 9.102418071358285,
|
|
"learning_rate": 9.99269125018209e-06,
|
|
"loss": 2.539501190185547,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.3481858688733291,
|
|
"grad_norm": 40.908770676555775,
|
|
"learning_rate": 9.992489689443175e-06,
|
|
"loss": 4.694552421569824,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.3488224061107575,
|
|
"grad_norm": 11.033322133394357,
|
|
"learning_rate": 9.992285389147156e-06,
|
|
"loss": 2.7217929363250732,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.3494589433481859,
|
|
"grad_norm": 33.19531232099363,
|
|
"learning_rate": 9.992078349406143e-06,
|
|
"loss": 2.987252712249756,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.35009548058561424,
|
|
"grad_norm": 10.09416414307797,
|
|
"learning_rate": 9.991868570333742e-06,
|
|
"loss": 3.0493078231811523,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.35073201782304264,
|
|
"grad_norm": 11.205696352596215,
|
|
"learning_rate": 9.991656052045071e-06,
|
|
"loss": 3.0120527744293213,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.35136855506047104,
|
|
"grad_norm": 8.706664163773446,
|
|
"learning_rate": 9.991440794656741e-06,
|
|
"loss": 2.504729747772217,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.35200509229789945,
|
|
"grad_norm": 16.08095287730785,
|
|
"learning_rate": 9.991222798286876e-06,
|
|
"loss": 2.8735032081604004,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.3526416295353278,
|
|
"grad_norm": 16.445657787827177,
|
|
"learning_rate": 9.991002063055095e-06,
|
|
"loss": 2.7720909118652344,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.3532781667727562,
|
|
"grad_norm": 21.854022860671776,
|
|
"learning_rate": 9.990778589082527e-06,
|
|
"loss": 2.3629848957061768,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.3539147040101846,
|
|
"grad_norm": 10.944655744451333,
|
|
"learning_rate": 9.990552376491794e-06,
|
|
"loss": 2.9007742404937744,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.354551241247613,
|
|
"grad_norm": 12.410073833010728,
|
|
"learning_rate": 9.990323425407034e-06,
|
|
"loss": 2.83327054977417,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.35518777848504135,
|
|
"grad_norm": 8.95949921903947,
|
|
"learning_rate": 9.990091735953875e-06,
|
|
"loss": 3.135423183441162,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.35582431572246975,
|
|
"grad_norm": 7.162605329310381,
|
|
"learning_rate": 9.989857308259457e-06,
|
|
"loss": 2.7775559425354004,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.35646085295989816,
|
|
"grad_norm": 8.52004662486134,
|
|
"learning_rate": 9.989620142452417e-06,
|
|
"loss": 2.7399826049804688,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.35709739019732656,
|
|
"grad_norm": 17.744651939267612,
|
|
"learning_rate": 9.989380238662897e-06,
|
|
"loss": 2.7530295848846436,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.3577339274347549,
|
|
"grad_norm": 15.84289335198656,
|
|
"learning_rate": 9.98913759702254e-06,
|
|
"loss": 2.881882429122925,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.3583704646721833,
|
|
"grad_norm": 11.55565022140253,
|
|
"learning_rate": 9.988892217664492e-06,
|
|
"loss": 2.9270741939544678,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.3590070019096117,
|
|
"grad_norm": 11.03340184827849,
|
|
"learning_rate": 9.988644100723403e-06,
|
|
"loss": 2.8461787700653076,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.3596435391470401,
|
|
"grad_norm": 14.578234992013659,
|
|
"learning_rate": 9.988393246335424e-06,
|
|
"loss": 2.517241954803467,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.3602800763844685,
|
|
"grad_norm": 8.127008050680834,
|
|
"learning_rate": 9.988139654638204e-06,
|
|
"loss": 2.966914653778076,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.36091661362189686,
|
|
"grad_norm": 9.216070318677676,
|
|
"learning_rate": 9.987883325770903e-06,
|
|
"loss": 2.6558094024658203,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.36155315085932527,
|
|
"grad_norm": 17.169149508599855,
|
|
"learning_rate": 9.987624259874172e-06,
|
|
"loss": 2.323289155960083,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.36218968809675367,
|
|
"grad_norm": 9.166183587477484,
|
|
"learning_rate": 9.987362457090174e-06,
|
|
"loss": 2.409285306930542,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.3628262253341821,
|
|
"grad_norm": 11.13012364580221,
|
|
"learning_rate": 9.98709791756257e-06,
|
|
"loss": 2.4598522186279297,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.3634627625716104,
|
|
"grad_norm": 11.942641315185213,
|
|
"learning_rate": 9.986830641436519e-06,
|
|
"loss": 2.520512819290161,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.3640992998090388,
|
|
"grad_norm": 19.551435054071653,
|
|
"learning_rate": 9.986560628858686e-06,
|
|
"loss": 2.8555023670196533,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.3647358370464672,
|
|
"grad_norm": 13.766408048514078,
|
|
"learning_rate": 9.986287879977237e-06,
|
|
"loss": 2.6136066913604736,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.36537237428389563,
|
|
"grad_norm": 12.105556637811555,
|
|
"learning_rate": 9.98601239494184e-06,
|
|
"loss": 2.950223684310913,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.366008911521324,
|
|
"grad_norm": 7.891574119862333,
|
|
"learning_rate": 9.98573417390366e-06,
|
|
"loss": 2.742581367492676,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.3666454487587524,
|
|
"grad_norm": 16.58863121732629,
|
|
"learning_rate": 9.985453217015371e-06,
|
|
"loss": 2.458749294281006,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.3672819859961808,
|
|
"grad_norm": 20.69997579999368,
|
|
"learning_rate": 9.985169524431143e-06,
|
|
"loss": 2.656479835510254,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.3679185232336092,
|
|
"grad_norm": 14.165292656115142,
|
|
"learning_rate": 9.984883096306648e-06,
|
|
"loss": 2.546654462814331,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.36855506047103753,
|
|
"grad_norm": 7.15022029441597,
|
|
"learning_rate": 9.984593932799057e-06,
|
|
"loss": 2.6810786724090576,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.36919159770846594,
|
|
"grad_norm": 6.180154264135546,
|
|
"learning_rate": 9.984302034067048e-06,
|
|
"loss": 2.5700953006744385,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.36982813494589434,
|
|
"grad_norm": 11.703039591161678,
|
|
"learning_rate": 9.984007400270793e-06,
|
|
"loss": 2.629293918609619,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.37046467218332274,
|
|
"grad_norm": 7.005997624163455,
|
|
"learning_rate": 9.98371003157197e-06,
|
|
"loss": 2.742830991744995,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.3711012094207511,
|
|
"grad_norm": 11.272756626497822,
|
|
"learning_rate": 9.983409928133756e-06,
|
|
"loss": 3.019237995147705,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.3717377466581795,
|
|
"grad_norm": 88.39848337199575,
|
|
"learning_rate": 9.983107090120828e-06,
|
|
"loss": 2.901144027709961,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.3723742838956079,
|
|
"grad_norm": 10.809523763851594,
|
|
"learning_rate": 9.982801517699363e-06,
|
|
"loss": 2.8116211891174316,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.3730108211330363,
|
|
"grad_norm": 10.52028232851953,
|
|
"learning_rate": 9.98249321103704e-06,
|
|
"loss": 2.6679322719573975,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.3736473583704647,
|
|
"grad_norm": 14.30042583766645,
|
|
"learning_rate": 9.982182170303038e-06,
|
|
"loss": 2.7962963581085205,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.37428389560789305,
|
|
"grad_norm": 21.373418854152547,
|
|
"learning_rate": 9.981868395668037e-06,
|
|
"loss": 2.6593198776245117,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.37492043284532145,
|
|
"grad_norm": 10.357703767017133,
|
|
"learning_rate": 9.981551887304214e-06,
|
|
"loss": 2.869109630584717,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.37555697008274985,
|
|
"grad_norm": 11.762580385143742,
|
|
"learning_rate": 9.981232645385253e-06,
|
|
"loss": 2.3226709365844727,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.37619350732017826,
|
|
"grad_norm": 14.93629322268585,
|
|
"learning_rate": 9.98091067008633e-06,
|
|
"loss": 3.0382394790649414,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.3768300445576066,
|
|
"grad_norm": 12.06044711282513,
|
|
"learning_rate": 9.980585961584124e-06,
|
|
"loss": 2.413412094116211,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.377466581795035,
|
|
"grad_norm": 13.507343372488394,
|
|
"learning_rate": 9.980258520056813e-06,
|
|
"loss": 2.8003458976745605,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.3781031190324634,
|
|
"grad_norm": 8.617531636027781,
|
|
"learning_rate": 9.97992834568408e-06,
|
|
"loss": 2.4336965084075928,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.3787396562698918,
|
|
"grad_norm": 10.284383480836185,
|
|
"learning_rate": 9.979595438647101e-06,
|
|
"loss": 2.6842195987701416,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.37937619350732016,
|
|
"grad_norm": 12.604083500248924,
|
|
"learning_rate": 9.979259799128554e-06,
|
|
"loss": 2.740729331970215,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.38001273074474856,
|
|
"grad_norm": 21.70938956261165,
|
|
"learning_rate": 9.978921427312617e-06,
|
|
"loss": 2.901966094970703,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.38064926798217696,
|
|
"grad_norm": 17.052471808172093,
|
|
"learning_rate": 9.978580323384965e-06,
|
|
"loss": 2.6793160438537598,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.38128580521960537,
|
|
"grad_norm": 9.38891050745183,
|
|
"learning_rate": 9.978236487532778e-06,
|
|
"loss": 2.7052338123321533,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.3819223424570337,
|
|
"grad_norm": 11.226494849053653,
|
|
"learning_rate": 9.977889919944725e-06,
|
|
"loss": 2.664078712463379,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.3825588796944621,
|
|
"grad_norm": 26.16925716656446,
|
|
"learning_rate": 9.977540620810984e-06,
|
|
"loss": 2.1137261390686035,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.3831954169318905,
|
|
"grad_norm": 14.849349743588643,
|
|
"learning_rate": 9.977188590323228e-06,
|
|
"loss": 3.0612120628356934,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.3838319541693189,
|
|
"grad_norm": 8.819966272718052,
|
|
"learning_rate": 9.976833828674627e-06,
|
|
"loss": 3.0031580924987793,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.38446849140674727,
|
|
"grad_norm": 13.720082436634515,
|
|
"learning_rate": 9.976476336059852e-06,
|
|
"loss": 2.5077881813049316,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.3851050286441757,
|
|
"grad_norm": 12.654287287694025,
|
|
"learning_rate": 9.976116112675072e-06,
|
|
"loss": 2.7970845699310303,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.3857415658816041,
|
|
"grad_norm": 8.055594500030699,
|
|
"learning_rate": 9.975753158717954e-06,
|
|
"loss": 2.961820602416992,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.3863781031190325,
|
|
"grad_norm": 15.703236546101197,
|
|
"learning_rate": 9.975387474387664e-06,
|
|
"loss": 2.8235034942626953,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.3870146403564608,
|
|
"grad_norm": 10.555876441922358,
|
|
"learning_rate": 9.975019059884867e-06,
|
|
"loss": 2.211468458175659,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.38765117759388923,
|
|
"grad_norm": 16.658200678841155,
|
|
"learning_rate": 9.974647915411725e-06,
|
|
"loss": 3.1141555309295654,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.38828771483131763,
|
|
"grad_norm": 9.53456679210011,
|
|
"learning_rate": 9.974274041171896e-06,
|
|
"loss": 2.7841145992279053,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.38892425206874603,
|
|
"grad_norm": 11.411951635683534,
|
|
"learning_rate": 9.97389743737054e-06,
|
|
"loss": 2.8071258068084717,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.38956078930617444,
|
|
"grad_norm": 22.36863147283409,
|
|
"learning_rate": 9.973518104214315e-06,
|
|
"loss": 3.1157443523406982,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.3901973265436028,
|
|
"grad_norm": 24.276390785654325,
|
|
"learning_rate": 9.97313604191137e-06,
|
|
"loss": 2.271000385284424,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.3908338637810312,
|
|
"grad_norm": 7.016670503494138,
|
|
"learning_rate": 9.972751250671359e-06,
|
|
"loss": 2.9037952423095703,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.3914704010184596,
|
|
"grad_norm": 7.376550389326394,
|
|
"learning_rate": 9.97236373070543e-06,
|
|
"loss": 2.7015273571014404,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.392106938255888,
|
|
"grad_norm": 15.295151798888124,
|
|
"learning_rate": 9.97197348222623e-06,
|
|
"loss": 3.1652095317840576,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.39274347549331634,
|
|
"grad_norm": 13.765527468480125,
|
|
"learning_rate": 9.9715805054479e-06,
|
|
"loss": 2.8960907459259033,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.39338001273074474,
|
|
"grad_norm": 8.971602174986844,
|
|
"learning_rate": 9.971184800586082e-06,
|
|
"loss": 3.0302138328552246,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.39401654996817315,
|
|
"grad_norm": 16.81102951947778,
|
|
"learning_rate": 9.970786367857914e-06,
|
|
"loss": 2.9071855545043945,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.39465308720560155,
|
|
"grad_norm": 7.906382550378165,
|
|
"learning_rate": 9.97038520748203e-06,
|
|
"loss": 2.3067169189453125,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.3952896244430299,
|
|
"grad_norm": 13.34748819688486,
|
|
"learning_rate": 9.969981319678558e-06,
|
|
"loss": 3.124617576599121,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.3959261616804583,
|
|
"grad_norm": 11.989181453339398,
|
|
"learning_rate": 9.969574704669132e-06,
|
|
"loss": 2.90457820892334,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.3965626989178867,
|
|
"grad_norm": 10.967880077200139,
|
|
"learning_rate": 9.969165362676869e-06,
|
|
"loss": 2.6836605072021484,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.3971992361553151,
|
|
"grad_norm": 10.674495764110725,
|
|
"learning_rate": 9.968753293926394e-06,
|
|
"loss": 2.4494035243988037,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.39783577339274345,
|
|
"grad_norm": 11.439167548793653,
|
|
"learning_rate": 9.968338498643822e-06,
|
|
"loss": 2.9132814407348633,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.39847231063017186,
|
|
"grad_norm": 12.866268633011725,
|
|
"learning_rate": 9.967920977056767e-06,
|
|
"loss": 3.1518666744232178,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.39910884786760026,
|
|
"grad_norm": 9.872952578923261,
|
|
"learning_rate": 9.967500729394337e-06,
|
|
"loss": 3.1968982219696045,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.39974538510502866,
|
|
"grad_norm": 20.085831417612255,
|
|
"learning_rate": 9.967077755887137e-06,
|
|
"loss": 2.6791770458221436,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.400381922342457,
|
|
"grad_norm": 14.205638034512429,
|
|
"learning_rate": 9.96665205676727e-06,
|
|
"loss": 2.7240817546844482,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.4010184595798854,
|
|
"grad_norm": 7.919533080851466,
|
|
"learning_rate": 9.966223632268329e-06,
|
|
"loss": 2.643958806991577,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.4016549968173138,
|
|
"grad_norm": 18.29736046843817,
|
|
"learning_rate": 9.965792482625408e-06,
|
|
"loss": 3.0323963165283203,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.4022915340547422,
|
|
"grad_norm": 8.696314110576866,
|
|
"learning_rate": 9.965358608075093e-06,
|
|
"loss": 2.7381138801574707,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.40292807129217056,
|
|
"grad_norm": 18.829773132087,
|
|
"learning_rate": 9.964922008855467e-06,
|
|
"loss": 3.537816286087036,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.40356460852959897,
|
|
"grad_norm": 21.751986969184298,
|
|
"learning_rate": 9.964482685206105e-06,
|
|
"loss": 2.8908934593200684,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.40420114576702737,
|
|
"grad_norm": 6.860374488872398,
|
|
"learning_rate": 9.964040637368084e-06,
|
|
"loss": 2.899407386779785,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.4048376830044558,
|
|
"grad_norm": 9.023760950668839,
|
|
"learning_rate": 9.963595865583969e-06,
|
|
"loss": 2.890510320663452,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.4054742202418842,
|
|
"grad_norm": 6.053230462493601,
|
|
"learning_rate": 9.963148370097822e-06,
|
|
"loss": 2.758610248565674,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.4061107574793125,
|
|
"grad_norm": 7.3110311511029655,
|
|
"learning_rate": 9.962698151155201e-06,
|
|
"loss": 2.8290634155273438,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.4067472947167409,
|
|
"grad_norm": 6.097610481770767,
|
|
"learning_rate": 9.962245209003156e-06,
|
|
"loss": 2.7542171478271484,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.40738383195416933,
|
|
"grad_norm": 15.90993244320391,
|
|
"learning_rate": 9.961789543890232e-06,
|
|
"loss": 2.9697728157043457,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.40802036919159773,
|
|
"grad_norm": 10.735451573885806,
|
|
"learning_rate": 9.96133115606647e-06,
|
|
"loss": 2.8094568252563477,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.4086569064290261,
|
|
"grad_norm": 8.813848610851771,
|
|
"learning_rate": 9.960870045783404e-06,
|
|
"loss": 2.468761920928955,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.4092934436664545,
|
|
"grad_norm": 14.91989636635262,
|
|
"learning_rate": 9.96040621329406e-06,
|
|
"loss": 3.4151830673217773,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.4099299809038829,
|
|
"grad_norm": 10.110691044294901,
|
|
"learning_rate": 9.959939658852961e-06,
|
|
"loss": 2.3592095375061035,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.4105665181413113,
|
|
"grad_norm": 14.067433272395801,
|
|
"learning_rate": 9.959470382716121e-06,
|
|
"loss": 3.0757241249084473,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.41120305537873963,
|
|
"grad_norm": 17.355318698645497,
|
|
"learning_rate": 9.958998385141048e-06,
|
|
"loss": 4.224935054779053,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.41183959261616804,
|
|
"grad_norm": 14.524481722394714,
|
|
"learning_rate": 9.958523666386746e-06,
|
|
"loss": 2.840717315673828,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.41247612985359644,
|
|
"grad_norm": 9.864751867360656,
|
|
"learning_rate": 9.958046226713709e-06,
|
|
"loss": 1.4242722988128662,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.41311266709102484,
|
|
"grad_norm": 10.183096885150817,
|
|
"learning_rate": 9.957566066383925e-06,
|
|
"loss": 2.89633846282959,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.4137492043284532,
|
|
"grad_norm": 13.838812863625304,
|
|
"learning_rate": 9.957083185660876e-06,
|
|
"loss": 2.4605355262756348,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.4143857415658816,
|
|
"grad_norm": 20.649038029654793,
|
|
"learning_rate": 9.956597584809533e-06,
|
|
"loss": 2.7936930656433105,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.41502227880331,
|
|
"grad_norm": 10.011543784442619,
|
|
"learning_rate": 9.956109264096368e-06,
|
|
"loss": 3.692840337753296,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.4156588160407384,
|
|
"grad_norm": 9.515737004335836,
|
|
"learning_rate": 9.955618223789334e-06,
|
|
"loss": 3.034600257873535,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.41629535327816675,
|
|
"grad_norm": 16.094413428203612,
|
|
"learning_rate": 9.955124464157883e-06,
|
|
"loss": 3.0032362937927246,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.41693189051559515,
|
|
"grad_norm": 15.849849486662814,
|
|
"learning_rate": 9.954627985472964e-06,
|
|
"loss": 2.671009063720703,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.41756842775302355,
|
|
"grad_norm": 13.141905364366565,
|
|
"learning_rate": 9.954128788007007e-06,
|
|
"loss": 2.6174559593200684,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.41820496499045196,
|
|
"grad_norm": 12.43941337981924,
|
|
"learning_rate": 9.953626872033943e-06,
|
|
"loss": 2.541435718536377,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.41884150222788036,
|
|
"grad_norm": 10.743486385474622,
|
|
"learning_rate": 9.953122237829189e-06,
|
|
"loss": 2.7599356174468994,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.4194780394653087,
|
|
"grad_norm": 10.12403135229293,
|
|
"learning_rate": 9.952614885669656e-06,
|
|
"loss": 2.745006561279297,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.4201145767027371,
|
|
"grad_norm": 12.487193950798103,
|
|
"learning_rate": 9.952104815833747e-06,
|
|
"loss": 2.7711846828460693,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.4207511139401655,
|
|
"grad_norm": 8.37166640608455,
|
|
"learning_rate": 9.951592028601356e-06,
|
|
"loss": 2.6092634201049805,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.4213876511775939,
|
|
"grad_norm": 8.707068362495248,
|
|
"learning_rate": 9.951076524253866e-06,
|
|
"loss": 2.708832263946533,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.42202418841502226,
|
|
"grad_norm": 11.391734001053349,
|
|
"learning_rate": 9.950558303074151e-06,
|
|
"loss": 3.039702892303467,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.42266072565245066,
|
|
"grad_norm": 13.764788229417277,
|
|
"learning_rate": 9.950037365346583e-06,
|
|
"loss": 3.0277504920959473,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.42329726288987907,
|
|
"grad_norm": 9.324681051738402,
|
|
"learning_rate": 9.949513711357015e-06,
|
|
"loss": 2.3469479084014893,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.42393380012730747,
|
|
"grad_norm": 23.944453285208347,
|
|
"learning_rate": 9.948987341392794e-06,
|
|
"loss": 3.179440975189209,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.4245703373647358,
|
|
"grad_norm": 10.989252593514273,
|
|
"learning_rate": 9.948458255742764e-06,
|
|
"loss": 2.5995612144470215,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.4252068746021642,
|
|
"grad_norm": 14.886237899360502,
|
|
"learning_rate": 9.947926454697245e-06,
|
|
"loss": 2.4212164878845215,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.4258434118395926,
|
|
"grad_norm": 10.460721255755393,
|
|
"learning_rate": 9.947391938548058e-06,
|
|
"loss": 2.848560094833374,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.426479949077021,
|
|
"grad_norm": 15.285012142076349,
|
|
"learning_rate": 9.946854707588514e-06,
|
|
"loss": 3.141273021697998,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.4271164863144494,
|
|
"grad_norm": 9.873965430465756,
|
|
"learning_rate": 9.946314762113408e-06,
|
|
"loss": 1.6153454780578613,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.4277530235518778,
|
|
"grad_norm": 8.24353593993326,
|
|
"learning_rate": 9.945772102419028e-06,
|
|
"loss": 3.2559409141540527,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.4283895607893062,
|
|
"grad_norm": 17.188774446865704,
|
|
"learning_rate": 9.945226728803152e-06,
|
|
"loss": 3.3109982013702393,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.4290260980267346,
|
|
"grad_norm": 26.23000486353143,
|
|
"learning_rate": 9.944678641565043e-06,
|
|
"loss": 2.5540897846221924,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.42966263526416293,
|
|
"grad_norm": 10.949099660874174,
|
|
"learning_rate": 9.944127841005458e-06,
|
|
"loss": 2.8961610794067383,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.43029917250159133,
|
|
"grad_norm": 22.67851337735571,
|
|
"learning_rate": 9.943574327426642e-06,
|
|
"loss": 2.8129518032073975,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.43093570973901973,
|
|
"grad_norm": 18.30572432799705,
|
|
"learning_rate": 9.943018101132324e-06,
|
|
"loss": 3.113130569458008,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.43157224697644814,
|
|
"grad_norm": 12.785299949548826,
|
|
"learning_rate": 9.94245916242773e-06,
|
|
"loss": 2.912236213684082,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.4322087842138765,
|
|
"grad_norm": 7.682688420962503,
|
|
"learning_rate": 9.941897511619566e-06,
|
|
"loss": 2.8037664890289307,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.4328453214513049,
|
|
"grad_norm": 16.30450802410894,
|
|
"learning_rate": 9.94133314901603e-06,
|
|
"loss": 2.6482651233673096,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.4334818586887333,
|
|
"grad_norm": 7.005472690303165,
|
|
"learning_rate": 9.940766074926812e-06,
|
|
"loss": 2.8123576641082764,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.4341183959261617,
|
|
"grad_norm": 15.53144189825324,
|
|
"learning_rate": 9.940196289663078e-06,
|
|
"loss": 4.070590972900391,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.4347549331635901,
|
|
"grad_norm": 10.903253979305704,
|
|
"learning_rate": 9.939623793537496e-06,
|
|
"loss": 2.9066810607910156,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.43539147040101844,
|
|
"grad_norm": 12.756922386256557,
|
|
"learning_rate": 9.939048586864213e-06,
|
|
"loss": 2.80411696434021,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.43602800763844685,
|
|
"grad_norm": 16.621654802617265,
|
|
"learning_rate": 9.938470669958866e-06,
|
|
"loss": 3.1619603633880615,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.43666454487587525,
|
|
"grad_norm": 11.49900426573002,
|
|
"learning_rate": 9.937890043138578e-06,
|
|
"loss": 3.234433174133301,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.43730108211330365,
|
|
"grad_norm": 7.860862381899233,
|
|
"learning_rate": 9.93730670672196e-06,
|
|
"loss": 3.456599712371826,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.437937619350732,
|
|
"grad_norm": 17.170411030564026,
|
|
"learning_rate": 9.936720661029109e-06,
|
|
"loss": 3.1284303665161133,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.4385741565881604,
|
|
"grad_norm": 16.663238705856116,
|
|
"learning_rate": 9.93613190638161e-06,
|
|
"loss": 2.4781811237335205,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.4392106938255888,
|
|
"grad_norm": 12.371678503540068,
|
|
"learning_rate": 9.935540443102531e-06,
|
|
"loss": 2.9643073081970215,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.4398472310630172,
|
|
"grad_norm": 8.250209786550368,
|
|
"learning_rate": 9.934946271516433e-06,
|
|
"loss": 2.8749005794525146,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.44048376830044556,
|
|
"grad_norm": 12.74480203323608,
|
|
"learning_rate": 9.934349391949356e-06,
|
|
"loss": 2.5457563400268555,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.44112030553787396,
|
|
"grad_norm": 14.380859550838787,
|
|
"learning_rate": 9.933749804728832e-06,
|
|
"loss": 2.69622802734375,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.44175684277530236,
|
|
"grad_norm": 12.535761815732146,
|
|
"learning_rate": 9.933147510183872e-06,
|
|
"loss": 2.3187177181243896,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.44239338001273076,
|
|
"grad_norm": 16.108853721123644,
|
|
"learning_rate": 9.93254250864498e-06,
|
|
"loss": 2.924973726272583,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.4430299172501591,
|
|
"grad_norm": 9.691182175468388,
|
|
"learning_rate": 9.931934800444141e-06,
|
|
"loss": 3.213982105255127,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.4436664544875875,
|
|
"grad_norm": 6.131677330440111,
|
|
"learning_rate": 9.931324385914824e-06,
|
|
"loss": 2.690908670425415,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.4443029917250159,
|
|
"grad_norm": 13.510963421325568,
|
|
"learning_rate": 9.930711265391988e-06,
|
|
"loss": 2.8179783821105957,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.4449395289624443,
|
|
"grad_norm": 14.225533631814386,
|
|
"learning_rate": 9.930095439212073e-06,
|
|
"loss": 2.7640280723571777,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.44557606619987267,
|
|
"grad_norm": 12.185149829665308,
|
|
"learning_rate": 9.929476907713005e-06,
|
|
"loss": 2.298121452331543,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.44621260343730107,
|
|
"grad_norm": 16.24845527493673,
|
|
"learning_rate": 9.928855671234194e-06,
|
|
"loss": 3.070202350616455,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.4468491406747295,
|
|
"grad_norm": 9.20682146322305,
|
|
"learning_rate": 9.928231730116535e-06,
|
|
"loss": 2.338399648666382,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.4474856779121579,
|
|
"grad_norm": 8.226678303992347,
|
|
"learning_rate": 9.927605084702407e-06,
|
|
"loss": 2.8359477519989014,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.4481222151495863,
|
|
"grad_norm": 32.5617697214951,
|
|
"learning_rate": 9.926975735335671e-06,
|
|
"loss": 2.9955806732177734,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.4487587523870146,
|
|
"grad_norm": 13.246120214661767,
|
|
"learning_rate": 9.926343682361675e-06,
|
|
"loss": 2.639787197113037,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.44939528962444303,
|
|
"grad_norm": 26.332193602580052,
|
|
"learning_rate": 9.925708926127248e-06,
|
|
"loss": 2.940730094909668,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.45003182686187143,
|
|
"grad_norm": 19.297488474218106,
|
|
"learning_rate": 9.925071466980705e-06,
|
|
"loss": 3.354499340057373,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.45066836409929983,
|
|
"grad_norm": 11.74488535410552,
|
|
"learning_rate": 9.92443130527184e-06,
|
|
"loss": 2.925593614578247,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.4513049013367282,
|
|
"grad_norm": 12.733023497209542,
|
|
"learning_rate": 9.923788441351935e-06,
|
|
"loss": 2.8628973960876465,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.4519414385741566,
|
|
"grad_norm": 9.947678922714372,
|
|
"learning_rate": 9.923142875573753e-06,
|
|
"loss": 2.3758437633514404,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.452577975811585,
|
|
"grad_norm": 18.26488793608698,
|
|
"learning_rate": 9.922494608291535e-06,
|
|
"loss": 3.158717632293701,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.4532145130490134,
|
|
"grad_norm": 16.815761519714297,
|
|
"learning_rate": 9.921843639861012e-06,
|
|
"loss": 2.8898587226867676,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.45385105028644174,
|
|
"grad_norm": 14.557286967833983,
|
|
"learning_rate": 9.921189970639394e-06,
|
|
"loss": 2.966440200805664,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.45448758752387014,
|
|
"grad_norm": 12.302618152679335,
|
|
"learning_rate": 9.920533600985368e-06,
|
|
"loss": 2.9261152744293213,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.45512412476129854,
|
|
"grad_norm": 13.768228767235051,
|
|
"learning_rate": 9.919874531259112e-06,
|
|
"loss": 2.9286997318267822,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.45576066199872695,
|
|
"grad_norm": 10.564120236385856,
|
|
"learning_rate": 9.919212761822279e-06,
|
|
"loss": 2.805097818374634,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.4563971992361553,
|
|
"grad_norm": 7.490544863271503,
|
|
"learning_rate": 9.918548293038007e-06,
|
|
"loss": 1.9821001291275024,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.4570337364735837,
|
|
"grad_norm": 14.085964886136328,
|
|
"learning_rate": 9.91788112527091e-06,
|
|
"loss": 3.0433640480041504,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.4576702737110121,
|
|
"grad_norm": 14.829040046446302,
|
|
"learning_rate": 9.917211258887091e-06,
|
|
"loss": 2.8717331886291504,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.4583068109484405,
|
|
"grad_norm": 10.565377930992518,
|
|
"learning_rate": 9.916538694254127e-06,
|
|
"loss": 2.2944936752319336,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.45894334818586885,
|
|
"grad_norm": 9.174823180165214,
|
|
"learning_rate": 9.915863431741082e-06,
|
|
"loss": 2.902968406677246,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.45957988542329725,
|
|
"grad_norm": 9.891210211114071,
|
|
"learning_rate": 9.915185471718491e-06,
|
|
"loss": 2.9564785957336426,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.46021642266072565,
|
|
"grad_norm": 7.411473503493354,
|
|
"learning_rate": 9.914504814558379e-06,
|
|
"loss": 2.7194151878356934,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.46085295989815406,
|
|
"grad_norm": 13.449222461916701,
|
|
"learning_rate": 9.913821460634246e-06,
|
|
"loss": 2.8414273262023926,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.4614894971355824,
|
|
"grad_norm": 7.288076499119426,
|
|
"learning_rate": 9.913135410321072e-06,
|
|
"loss": 2.5523955821990967,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.4621260343730108,
|
|
"grad_norm": 7.746636346731804,
|
|
"learning_rate": 9.912446663995315e-06,
|
|
"loss": 2.893989086151123,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.4627625716104392,
|
|
"grad_norm": 11.28743653597152,
|
|
"learning_rate": 9.911755222034919e-06,
|
|
"loss": 3.0573744773864746,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.4633991088478676,
|
|
"grad_norm": 7.5053281354024595,
|
|
"learning_rate": 9.9110610848193e-06,
|
|
"loss": 2.850208282470703,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.464035646085296,
|
|
"grad_norm": 9.480537984079172,
|
|
"learning_rate": 9.910364252729357e-06,
|
|
"loss": 2.0863776206970215,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.46467218332272436,
|
|
"grad_norm": 12.382920487266663,
|
|
"learning_rate": 9.909664726147467e-06,
|
|
"loss": 2.8669216632843018,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.46530872056015277,
|
|
"grad_norm": 7.000986070921776,
|
|
"learning_rate": 9.908962505457484e-06,
|
|
"loss": 2.857827663421631,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.46594525779758117,
|
|
"grad_norm": 9.848954673114715,
|
|
"learning_rate": 9.908257591044742e-06,
|
|
"loss": 2.8766777515411377,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.46658179503500957,
|
|
"grad_norm": 15.293732536165164,
|
|
"learning_rate": 9.907549983296054e-06,
|
|
"loss": 3.4860715866088867,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.4672183322724379,
|
|
"grad_norm": 10.331704044040437,
|
|
"learning_rate": 9.906839682599708e-06,
|
|
"loss": 2.6472580432891846,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.4678548695098663,
|
|
"grad_norm": 16.276006167657393,
|
|
"learning_rate": 9.906126689345471e-06,
|
|
"loss": 2.6463623046875,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.4684914067472947,
|
|
"grad_norm": 12.940481515444338,
|
|
"learning_rate": 9.90541100392459e-06,
|
|
"loss": 3.2068190574645996,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.46912794398472313,
|
|
"grad_norm": 9.008600298107018,
|
|
"learning_rate": 9.904692626729784e-06,
|
|
"loss": 2.37585186958313,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.4697644812221515,
|
|
"grad_norm": 16.266152128877287,
|
|
"learning_rate": 9.903971558155253e-06,
|
|
"loss": 3.045964002609253,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.4704010184595799,
|
|
"grad_norm": 9.267594041537762,
|
|
"learning_rate": 9.903247798596677e-06,
|
|
"loss": 2.497579574584961,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.4710375556970083,
|
|
"grad_norm": 13.422704581590162,
|
|
"learning_rate": 9.902521348451202e-06,
|
|
"loss": 3.0034847259521484,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.4716740929344367,
|
|
"grad_norm": 13.641050524632726,
|
|
"learning_rate": 9.901792208117463e-06,
|
|
"loss": 2.625041961669922,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.47231063017186503,
|
|
"grad_norm": 9.295405384635396,
|
|
"learning_rate": 9.901060377995562e-06,
|
|
"loss": 2.9143404960632324,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.47294716740929343,
|
|
"grad_norm": 21.799358663541497,
|
|
"learning_rate": 9.900325858487082e-06,
|
|
"loss": 2.649592876434326,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.47358370464672184,
|
|
"grad_norm": 16.81058879968057,
|
|
"learning_rate": 9.899588649995077e-06,
|
|
"loss": 2.91402006149292,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.47422024188415024,
|
|
"grad_norm": 9.29700232907205,
|
|
"learning_rate": 9.898848752924084e-06,
|
|
"loss": 3.0308291912078857,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.4748567791215786,
|
|
"grad_norm": 12.310748128320457,
|
|
"learning_rate": 9.898106167680108e-06,
|
|
"loss": 2.667323112487793,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.475493316359007,
|
|
"grad_norm": 15.21271639587925,
|
|
"learning_rate": 9.897360894670634e-06,
|
|
"loss": 2.8982882499694824,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.4761298535964354,
|
|
"grad_norm": 14.951213153498294,
|
|
"learning_rate": 9.896612934304618e-06,
|
|
"loss": 3.133816719055176,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.4767663908338638,
|
|
"grad_norm": 14.808614861733782,
|
|
"learning_rate": 9.895862286992493e-06,
|
|
"loss": 2.8779456615448,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.47740292807129214,
|
|
"grad_norm": 13.234676558337618,
|
|
"learning_rate": 9.895108953146165e-06,
|
|
"loss": 2.416349411010742,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.47803946530872055,
|
|
"grad_norm": 15.70764287472319,
|
|
"learning_rate": 9.894352933179017e-06,
|
|
"loss": 2.63266658782959,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.47867600254614895,
|
|
"grad_norm": 16.09723215871181,
|
|
"learning_rate": 9.893594227505902e-06,
|
|
"loss": 2.9402222633361816,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.47931253978357735,
|
|
"grad_norm": 10.85603282705008,
|
|
"learning_rate": 9.892832836543151e-06,
|
|
"loss": 2.929889678955078,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.47994907702100575,
|
|
"grad_norm": 7.658332157080424,
|
|
"learning_rate": 9.892068760708566e-06,
|
|
"loss": 2.414501190185547,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.4805856142584341,
|
|
"grad_norm": 7.061927223311403,
|
|
"learning_rate": 9.891302000421423e-06,
|
|
"loss": 2.4067840576171875,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.4812221514958625,
|
|
"grad_norm": 15.166517750731245,
|
|
"learning_rate": 9.890532556102468e-06,
|
|
"loss": 2.6367745399475098,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.4818586887332909,
|
|
"grad_norm": 21.45726864766029,
|
|
"learning_rate": 9.889760428173927e-06,
|
|
"loss": 2.8288209438323975,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.4824952259707193,
|
|
"grad_norm": 13.462199082179964,
|
|
"learning_rate": 9.88898561705949e-06,
|
|
"loss": 2.6893558502197266,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.48313176320814766,
|
|
"grad_norm": 7.9536278817913155,
|
|
"learning_rate": 9.888208123184325e-06,
|
|
"loss": 2.590125322341919,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.48376830044557606,
|
|
"grad_norm": 12.01274821779052,
|
|
"learning_rate": 9.88742794697507e-06,
|
|
"loss": 2.795884370803833,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.48440483768300446,
|
|
"grad_norm": 10.370167179771329,
|
|
"learning_rate": 9.886645088859837e-06,
|
|
"loss": 2.782790422439575,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.48504137492043287,
|
|
"grad_norm": 10.86893918590165,
|
|
"learning_rate": 9.885859549268206e-06,
|
|
"loss": 2.7205393314361572,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.4856779121578612,
|
|
"grad_norm": 17.66342960824175,
|
|
"learning_rate": 9.885071328631233e-06,
|
|
"loss": 2.9844560623168945,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.4863144493952896,
|
|
"grad_norm": 20.865322483392422,
|
|
"learning_rate": 9.884280427381442e-06,
|
|
"loss": 2.2549309730529785,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.486950986632718,
|
|
"grad_norm": 10.188536008150164,
|
|
"learning_rate": 9.883486845952829e-06,
|
|
"loss": 2.3112339973449707,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.4875875238701464,
|
|
"grad_norm": 10.749696325917178,
|
|
"learning_rate": 9.88269058478086e-06,
|
|
"loss": 3.1069111824035645,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.48822406110757477,
|
|
"grad_norm": 31.00622065512355,
|
|
"learning_rate": 9.88189164430247e-06,
|
|
"loss": 3.462052583694458,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.48886059834500317,
|
|
"grad_norm": 30.79585810187201,
|
|
"learning_rate": 9.881090024956068e-06,
|
|
"loss": 2.8704326152801514,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.4894971355824316,
|
|
"grad_norm": 9.06903211486752,
|
|
"learning_rate": 9.88028572718153e-06,
|
|
"loss": 2.5768370628356934,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.49013367281986,
|
|
"grad_norm": 19.877000304609403,
|
|
"learning_rate": 9.879478751420208e-06,
|
|
"loss": 2.7790379524230957,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.4907702100572883,
|
|
"grad_norm": 9.541041502021342,
|
|
"learning_rate": 9.87866909811491e-06,
|
|
"loss": 3.077629804611206,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.49140674729471673,
|
|
"grad_norm": 16.724559952167805,
|
|
"learning_rate": 9.877856767709928e-06,
|
|
"loss": 2.3151135444641113,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.49204328453214513,
|
|
"grad_norm": 14.827081502963878,
|
|
"learning_rate": 9.877041760651014e-06,
|
|
"loss": 1.7901434898376465,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.49267982176957353,
|
|
"grad_norm": 19.18607133804805,
|
|
"learning_rate": 9.876224077385392e-06,
|
|
"loss": 2.463925361633301,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.49331635900700194,
|
|
"grad_norm": 15.711551704422293,
|
|
"learning_rate": 9.875403718361753e-06,
|
|
"loss": 3.406130313873291,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.4939528962444303,
|
|
"grad_norm": 8.00068256562752,
|
|
"learning_rate": 9.87458068403026e-06,
|
|
"loss": 2.3577256202697754,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.4945894334818587,
|
|
"grad_norm": 6.881720407233433,
|
|
"learning_rate": 9.873754974842537e-06,
|
|
"loss": 2.3718245029449463,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.4952259707192871,
|
|
"grad_norm": 13.003329103082704,
|
|
"learning_rate": 9.872926591251684e-06,
|
|
"loss": 3.0024499893188477,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.4958625079567155,
|
|
"grad_norm": 11.43543882830748,
|
|
"learning_rate": 9.872095533712263e-06,
|
|
"loss": 2.6259024143218994,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.49649904519414384,
|
|
"grad_norm": 12.53610972874729,
|
|
"learning_rate": 9.871261802680305e-06,
|
|
"loss": 2.6501283645629883,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.49713558243157224,
|
|
"grad_norm": 12.420994645703136,
|
|
"learning_rate": 9.870425398613308e-06,
|
|
"loss": 2.614926815032959,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.49777211966900065,
|
|
"grad_norm": 8.036488508872816,
|
|
"learning_rate": 9.869586321970237e-06,
|
|
"loss": 2.8055872917175293,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.49840865690642905,
|
|
"grad_norm": 8.231379769197854,
|
|
"learning_rate": 9.868744573211522e-06,
|
|
"loss": 2.818807363510132,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.4990451941438574,
|
|
"grad_norm": 10.730920165600457,
|
|
"learning_rate": 9.867900152799061e-06,
|
|
"loss": 2.8013768196105957,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.4996817313812858,
|
|
"grad_norm": 9.970979928602242,
|
|
"learning_rate": 9.86705306119622e-06,
|
|
"loss": 2.3457491397857666,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.5003182686187142,
|
|
"grad_norm": 13.470490571372912,
|
|
"learning_rate": 9.866203298867825e-06,
|
|
"loss": 3.1120364665985107,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.5009548058561426,
|
|
"grad_norm": 11.28766870904382,
|
|
"learning_rate": 9.865350866280174e-06,
|
|
"loss": 2.7780635356903076,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.501591343093571,
|
|
"grad_norm": 15.678352974832473,
|
|
"learning_rate": 9.864495763901024e-06,
|
|
"loss": 2.325300693511963,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.5022278803309994,
|
|
"grad_norm": 17.138791740026022,
|
|
"learning_rate": 9.863637992199601e-06,
|
|
"loss": 3.143601179122925,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.5028644175684277,
|
|
"grad_norm": 9.663510119121984,
|
|
"learning_rate": 9.862777551646599e-06,
|
|
"loss": 2.5356907844543457,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.5035009548058561,
|
|
"grad_norm": 14.12414506218859,
|
|
"learning_rate": 9.861914442714165e-06,
|
|
"loss": 2.4630978107452393,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.5041374920432845,
|
|
"grad_norm": 9.924069328992848,
|
|
"learning_rate": 9.861048665875924e-06,
|
|
"loss": 2.6629276275634766,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.5047740292807129,
|
|
"grad_norm": 19.79033509372283,
|
|
"learning_rate": 9.860180221606956e-06,
|
|
"loss": 3.092639923095703,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.5054105665181413,
|
|
"grad_norm": 22.25388725246896,
|
|
"learning_rate": 9.859309110383808e-06,
|
|
"loss": 3.434253215789795,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.5060471037555697,
|
|
"grad_norm": 6.684954672164134,
|
|
"learning_rate": 9.858435332684488e-06,
|
|
"loss": 2.879507064819336,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.5066836409929981,
|
|
"grad_norm": 9.31451468087128,
|
|
"learning_rate": 9.85755888898847e-06,
|
|
"loss": 2.7654552459716797,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.5073201782304265,
|
|
"grad_norm": 7.756931597209781,
|
|
"learning_rate": 9.856679779776692e-06,
|
|
"loss": 2.353954792022705,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.5079567154678549,
|
|
"grad_norm": 11.146460893348015,
|
|
"learning_rate": 9.85579800553155e-06,
|
|
"loss": 2.53048038482666,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.5085932527052832,
|
|
"grad_norm": 16.141401070616066,
|
|
"learning_rate": 9.854913566736906e-06,
|
|
"loss": 2.05796480178833,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.5092297899427116,
|
|
"grad_norm": 5.988900255080467,
|
|
"learning_rate": 9.854026463878083e-06,
|
|
"loss": 1.685745120048523,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.50986632718014,
|
|
"grad_norm": 6.3572999477726,
|
|
"learning_rate": 9.853136697441866e-06,
|
|
"loss": 2.7123618125915527,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.5105028644175684,
|
|
"grad_norm": 8.290859697152273,
|
|
"learning_rate": 9.852244267916502e-06,
|
|
"loss": 2.608476161956787,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.5111394016549968,
|
|
"grad_norm": 11.46663507794697,
|
|
"learning_rate": 9.851349175791697e-06,
|
|
"loss": 3.4248664379119873,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.5117759388924252,
|
|
"grad_norm": 14.950475213614503,
|
|
"learning_rate": 9.850451421558622e-06,
|
|
"loss": 2.7767040729522705,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.5124124761298536,
|
|
"grad_norm": 6.741305480732476,
|
|
"learning_rate": 9.849551005709906e-06,
|
|
"loss": 1.7822707891464233,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.513049013367282,
|
|
"grad_norm": 10.883114124452293,
|
|
"learning_rate": 9.848647928739639e-06,
|
|
"loss": 2.5189385414123535,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.5136855506047103,
|
|
"grad_norm": 6.732295304938295,
|
|
"learning_rate": 9.847742191143373e-06,
|
|
"loss": 2.514547824859619,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.5143220878421387,
|
|
"grad_norm": 5.9711797832121745,
|
|
"learning_rate": 9.846833793418115e-06,
|
|
"loss": 2.482044219970703,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.5149586250795671,
|
|
"grad_norm": 14.400383245002256,
|
|
"learning_rate": 9.845922736062341e-06,
|
|
"loss": 2.887598991394043,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.5155951623169955,
|
|
"grad_norm": 11.630919644932487,
|
|
"learning_rate": 9.845009019575974e-06,
|
|
"loss": 3.5304949283599854,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.5162316995544239,
|
|
"grad_norm": 22.73996741336589,
|
|
"learning_rate": 9.84409264446041e-06,
|
|
"loss": 2.7991387844085693,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.5168682367918523,
|
|
"grad_norm": 8.436400591965535,
|
|
"learning_rate": 9.843173611218489e-06,
|
|
"loss": 2.507692813873291,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.5175047740292807,
|
|
"grad_norm": 7.6339677846969956,
|
|
"learning_rate": 9.842251920354523e-06,
|
|
"loss": 1.9077794551849365,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.5181413112667091,
|
|
"grad_norm": 19.745421791939673,
|
|
"learning_rate": 9.841327572374275e-06,
|
|
"loss": 3.050415277481079,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.5187778485041374,
|
|
"grad_norm": 19.317035595884978,
|
|
"learning_rate": 9.840400567784966e-06,
|
|
"loss": 2.524138927459717,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.5194143857415658,
|
|
"grad_norm": 7.540364687402807,
|
|
"learning_rate": 9.839470907095279e-06,
|
|
"loss": 3.2449207305908203,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.5200509229789942,
|
|
"grad_norm": 5.854218704116551,
|
|
"learning_rate": 9.83853859081535e-06,
|
|
"loss": 2.8554091453552246,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.5206874602164226,
|
|
"grad_norm": 7.311068476898229,
|
|
"learning_rate": 9.837603619456777e-06,
|
|
"loss": 2.0652270317077637,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.521323997453851,
|
|
"grad_norm": 13.146507027324729,
|
|
"learning_rate": 9.836665993532608e-06,
|
|
"loss": 2.597984790802002,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.5219605346912795,
|
|
"grad_norm": 10.059988055991992,
|
|
"learning_rate": 9.835725713557356e-06,
|
|
"loss": 2.4166486263275146,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.5225970719287079,
|
|
"grad_norm": 13.784036388869687,
|
|
"learning_rate": 9.834782780046983e-06,
|
|
"loss": 2.607042074203491,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.5232336091661363,
|
|
"grad_norm": 13.905566237616293,
|
|
"learning_rate": 9.833837193518912e-06,
|
|
"loss": 2.538478136062622,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.5238701464035647,
|
|
"grad_norm": 11.112091320017088,
|
|
"learning_rate": 9.832888954492018e-06,
|
|
"loss": 2.88309907913208,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.524506683640993,
|
|
"grad_norm": 22.507543127539847,
|
|
"learning_rate": 9.831938063486636e-06,
|
|
"loss": 3.4071497917175293,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.5251432208784214,
|
|
"grad_norm": 22.28789638263004,
|
|
"learning_rate": 9.830984521024551e-06,
|
|
"loss": 2.59210205078125,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.5257797581158498,
|
|
"grad_norm": 6.5223751894038084,
|
|
"learning_rate": 9.830028327629009e-06,
|
|
"loss": 2.6576318740844727,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.5264162953532782,
|
|
"grad_norm": 7.5901390591435645,
|
|
"learning_rate": 9.829069483824707e-06,
|
|
"loss": 3.3522415161132812,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.5270528325907066,
|
|
"grad_norm": 14.8866670545974,
|
|
"learning_rate": 9.828107990137794e-06,
|
|
"loss": 2.4617838859558105,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.527689369828135,
|
|
"grad_norm": 8.130441007275053,
|
|
"learning_rate": 9.827143847095879e-06,
|
|
"loss": 2.8005857467651367,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.5283259070655634,
|
|
"grad_norm": 11.192259552237669,
|
|
"learning_rate": 9.826177055228018e-06,
|
|
"loss": 3.020345449447632,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.5289624443029918,
|
|
"grad_norm": 12.587075512510381,
|
|
"learning_rate": 9.82520761506473e-06,
|
|
"loss": 2.8561697006225586,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.5295989815404201,
|
|
"grad_norm": 10.730495935245877,
|
|
"learning_rate": 9.824235527137975e-06,
|
|
"loss": 2.875257968902588,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.5302355187778485,
|
|
"grad_norm": 14.975132541702031,
|
|
"learning_rate": 9.823260791981174e-06,
|
|
"loss": 2.9994492530822754,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.5308720560152769,
|
|
"grad_norm": 10.025600576297306,
|
|
"learning_rate": 9.822283410129204e-06,
|
|
"loss": 2.6414132118225098,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.5315085932527053,
|
|
"grad_norm": 17.164757722920452,
|
|
"learning_rate": 9.821303382118382e-06,
|
|
"loss": 2.8303494453430176,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.5321451304901337,
|
|
"grad_norm": 28.000081297657083,
|
|
"learning_rate": 9.82032070848649e-06,
|
|
"loss": 3.836611032485962,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.5327816677275621,
|
|
"grad_norm": 8.688852482490768,
|
|
"learning_rate": 9.819335389772751e-06,
|
|
"loss": 2.7299723625183105,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.5334182049649905,
|
|
"grad_norm": 14.423747949313897,
|
|
"learning_rate": 9.81834742651785e-06,
|
|
"loss": 2.899153232574463,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.5340547422024189,
|
|
"grad_norm": 11.071276785005018,
|
|
"learning_rate": 9.817356819263912e-06,
|
|
"loss": 2.9401285648345947,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.5346912794398472,
|
|
"grad_norm": 7.119354016240981,
|
|
"learning_rate": 9.816363568554523e-06,
|
|
"loss": 2.84574294090271,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.5353278166772756,
|
|
"grad_norm": 13.480709693656054,
|
|
"learning_rate": 9.815367674934713e-06,
|
|
"loss": 3.2451748847961426,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.535964353914704,
|
|
"grad_norm": 8.436170064658976,
|
|
"learning_rate": 9.814369138950965e-06,
|
|
"loss": 2.8468217849731445,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.5366008911521324,
|
|
"grad_norm": 16.629327310426202,
|
|
"learning_rate": 9.813367961151212e-06,
|
|
"loss": 3.0509352684020996,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.5372374283895608,
|
|
"grad_norm": 7.767734513755592,
|
|
"learning_rate": 9.812364142084833e-06,
|
|
"loss": 2.8520469665527344,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.5378739656269892,
|
|
"grad_norm": 9.647175482826967,
|
|
"learning_rate": 9.811357682302664e-06,
|
|
"loss": 2.677677631378174,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.5385105028644176,
|
|
"grad_norm": 6.7647162407733985,
|
|
"learning_rate": 9.81034858235698e-06,
|
|
"loss": 2.339890241622925,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.539147040101846,
|
|
"grad_norm": 33.107560376267216,
|
|
"learning_rate": 9.809336842801514e-06,
|
|
"loss": 2.5347745418548584,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.5397835773392744,
|
|
"grad_norm": 21.429620888652362,
|
|
"learning_rate": 9.808322464191444e-06,
|
|
"loss": 3.6045989990234375,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.5404201145767027,
|
|
"grad_norm": 31.986307429772406,
|
|
"learning_rate": 9.807305447083392e-06,
|
|
"loss": 2.769184112548828,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.5410566518141311,
|
|
"grad_norm": 21.509681978929585,
|
|
"learning_rate": 9.806285792035435e-06,
|
|
"loss": 3.472559928894043,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.5416931890515595,
|
|
"grad_norm": 7.898482917664102,
|
|
"learning_rate": 9.805263499607091e-06,
|
|
"loss": 2.580059289932251,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.5423297262889879,
|
|
"grad_norm": 8.556659491857221,
|
|
"learning_rate": 9.804238570359331e-06,
|
|
"loss": 2.169471025466919,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.5429662635264163,
|
|
"grad_norm": 10.484489660238392,
|
|
"learning_rate": 9.803211004854569e-06,
|
|
"loss": 2.7918176651000977,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.5436028007638447,
|
|
"grad_norm": 6.744177575899762,
|
|
"learning_rate": 9.802180803656667e-06,
|
|
"loss": 2.4259371757507324,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.5442393380012731,
|
|
"grad_norm": 18.8934287974784,
|
|
"learning_rate": 9.801147967330935e-06,
|
|
"loss": 2.8702199459075928,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.5448758752387015,
|
|
"grad_norm": 24.583047129270803,
|
|
"learning_rate": 9.800112496444124e-06,
|
|
"loss": 2.676443576812744,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.5455124124761298,
|
|
"grad_norm": 16.929109118145526,
|
|
"learning_rate": 9.799074391564436e-06,
|
|
"loss": 2.5846240520477295,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.5461489497135582,
|
|
"grad_norm": 12.033435100538304,
|
|
"learning_rate": 9.798033653261515e-06,
|
|
"loss": 2.775334358215332,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.5467854869509866,
|
|
"grad_norm": 9.943603717670662,
|
|
"learning_rate": 9.79699028210645e-06,
|
|
"loss": 2.725459337234497,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.547422024188415,
|
|
"grad_norm": 14.828949430654744,
|
|
"learning_rate": 9.795944278671779e-06,
|
|
"loss": 2.655045986175537,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.5480585614258434,
|
|
"grad_norm": 16.830141958946086,
|
|
"learning_rate": 9.794895643531479e-06,
|
|
"loss": 2.982860803604126,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.5486950986632718,
|
|
"grad_norm": 10.861165616915436,
|
|
"learning_rate": 9.793844377260975e-06,
|
|
"loss": 2.0161194801330566,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.5493316359007002,
|
|
"grad_norm": 58.85838285619661,
|
|
"learning_rate": 9.792790480437133e-06,
|
|
"loss": 2.948622703552246,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.5499681731381286,
|
|
"grad_norm": 9.757855439261121,
|
|
"learning_rate": 9.791733953638264e-06,
|
|
"loss": 3.0861761569976807,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.5506047103755569,
|
|
"grad_norm": 16.381773991834848,
|
|
"learning_rate": 9.790674797444125e-06,
|
|
"loss": 2.9444048404693604,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.5512412476129853,
|
|
"grad_norm": 11.160265952315564,
|
|
"learning_rate": 9.789613012435908e-06,
|
|
"loss": 2.9539167881011963,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.5518777848504137,
|
|
"grad_norm": 14.1635863067067,
|
|
"learning_rate": 9.788548599196254e-06,
|
|
"loss": 2.430659294128418,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.5525143220878421,
|
|
"grad_norm": 12.908142057583989,
|
|
"learning_rate": 9.787481558309247e-06,
|
|
"loss": 2.671661853790283,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.5531508593252705,
|
|
"grad_norm": 8.43503045093665,
|
|
"learning_rate": 9.786411890360407e-06,
|
|
"loss": 2.872176170349121,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.5537873965626989,
|
|
"grad_norm": 9.16301389740033,
|
|
"learning_rate": 9.7853395959367e-06,
|
|
"loss": 2.8996071815490723,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.5544239338001273,
|
|
"grad_norm": 16.856257304408356,
|
|
"learning_rate": 9.784264675626532e-06,
|
|
"loss": 4.690608024597168,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.5550604710375557,
|
|
"grad_norm": 20.926523578798506,
|
|
"learning_rate": 9.783187130019751e-06,
|
|
"loss": 3.114534854888916,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.5556970082749841,
|
|
"grad_norm": 10.52875239635774,
|
|
"learning_rate": 9.782106959707644e-06,
|
|
"loss": 2.5665369033813477,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.5563335455124124,
|
|
"grad_norm": 13.848634211652863,
|
|
"learning_rate": 9.781024165282939e-06,
|
|
"loss": 3.06032133102417,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.5569700827498408,
|
|
"grad_norm": 12.6790668481712,
|
|
"learning_rate": 9.779938747339805e-06,
|
|
"loss": 3.3275718688964844,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.5576066199872692,
|
|
"grad_norm": 16.91386790155097,
|
|
"learning_rate": 9.778850706473847e-06,
|
|
"loss": 2.9962024688720703,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.5582431572246976,
|
|
"grad_norm": 14.213173107920003,
|
|
"learning_rate": 9.777760043282117e-06,
|
|
"loss": 3.2456562519073486,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.558879694462126,
|
|
"grad_norm": 8.098999526064096,
|
|
"learning_rate": 9.776666758363093e-06,
|
|
"loss": 3.231693983078003,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.5595162316995544,
|
|
"grad_norm": 7.154392240607928,
|
|
"learning_rate": 9.775570852316706e-06,
|
|
"loss": 2.285749673843384,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.5601527689369828,
|
|
"grad_norm": 8.402539117834204,
|
|
"learning_rate": 9.774472325744315e-06,
|
|
"loss": 2.0394725799560547,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.5607893061744113,
|
|
"grad_norm": 17.14200855773335,
|
|
"learning_rate": 9.773371179248724e-06,
|
|
"loss": 2.8273448944091797,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.5614258434118395,
|
|
"grad_norm": 16.82522755670069,
|
|
"learning_rate": 9.772267413434167e-06,
|
|
"loss": 3.1758463382720947,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.562062380649268,
|
|
"grad_norm": 10.466562159025571,
|
|
"learning_rate": 9.771161028906325e-06,
|
|
"loss": 2.5195975303649902,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.5626989178866963,
|
|
"grad_norm": 7.869239737887258,
|
|
"learning_rate": 9.770052026272306e-06,
|
|
"loss": 2.8067564964294434,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.5633354551241248,
|
|
"grad_norm": 19.44302133114353,
|
|
"learning_rate": 9.768940406140658e-06,
|
|
"loss": 2.5152382850646973,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.5639719923615532,
|
|
"grad_norm": 12.238428636395993,
|
|
"learning_rate": 9.767826169121374e-06,
|
|
"loss": 2.4085590839385986,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.5646085295989816,
|
|
"grad_norm": 9.563967483322969,
|
|
"learning_rate": 9.766709315825869e-06,
|
|
"loss": 2.646481513977051,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.56524506683641,
|
|
"grad_norm": 18.87493687296846,
|
|
"learning_rate": 9.765589846867003e-06,
|
|
"loss": 3.2062935829162598,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.5658816040738384,
|
|
"grad_norm": 14.302660604413548,
|
|
"learning_rate": 9.76446776285907e-06,
|
|
"loss": 3.0545549392700195,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.5665181413112667,
|
|
"grad_norm": 8.132199904211502,
|
|
"learning_rate": 9.763343064417792e-06,
|
|
"loss": 3.130021095275879,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.5671546785486951,
|
|
"grad_norm": 11.562566038181126,
|
|
"learning_rate": 9.762215752160335e-06,
|
|
"loss": 2.8721697330474854,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.5677912157861235,
|
|
"grad_norm": 17.44011854244868,
|
|
"learning_rate": 9.761085826705296e-06,
|
|
"loss": 2.7695465087890625,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.5684277530235519,
|
|
"grad_norm": 35.124551871281746,
|
|
"learning_rate": 9.759953288672704e-06,
|
|
"loss": 3.2944228649139404,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.5690642902609803,
|
|
"grad_norm": 7.512069889218877,
|
|
"learning_rate": 9.758818138684023e-06,
|
|
"loss": 2.6480884552001953,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.5697008274984087,
|
|
"grad_norm": 21.641091074007168,
|
|
"learning_rate": 9.757680377362152e-06,
|
|
"loss": 3.268338680267334,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.5703373647358371,
|
|
"grad_norm": 14.399202436529807,
|
|
"learning_rate": 9.756540005331418e-06,
|
|
"loss": 2.1671676635742188,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.5709739019732655,
|
|
"grad_norm": 11.623304830160155,
|
|
"learning_rate": 9.755397023217588e-06,
|
|
"loss": 2.6696858406066895,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.5716104392106939,
|
|
"grad_norm": 19.233964426153847,
|
|
"learning_rate": 9.754251431647853e-06,
|
|
"loss": 3.2551486492156982,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.5722469764481222,
|
|
"grad_norm": 7.270503407460572,
|
|
"learning_rate": 9.753103231250841e-06,
|
|
"loss": 2.863981246948242,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.5728835136855506,
|
|
"grad_norm": 10.975237208385844,
|
|
"learning_rate": 9.751952422656613e-06,
|
|
"loss": 2.517639636993408,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.573520050922979,
|
|
"grad_norm": 8.652240634932285,
|
|
"learning_rate": 9.750799006496657e-06,
|
|
"loss": 2.6670122146606445,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.5741565881604074,
|
|
"grad_norm": 7.744172147062743,
|
|
"learning_rate": 9.74964298340389e-06,
|
|
"loss": 2.9616057872772217,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.5747931253978358,
|
|
"grad_norm": 15.501557926799377,
|
|
"learning_rate": 9.748484354012671e-06,
|
|
"loss": 3.0340139865875244,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 0.5754296626352642,
|
|
"grad_norm": 15.054369483127644,
|
|
"learning_rate": 9.747323118958774e-06,
|
|
"loss": 3.0973074436187744,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.5760661998726926,
|
|
"grad_norm": 7.461008632303077,
|
|
"learning_rate": 9.746159278879412e-06,
|
|
"loss": 2.881272315979004,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.576702737110121,
|
|
"grad_norm": 19.756922229382635,
|
|
"learning_rate": 9.744992834413227e-06,
|
|
"loss": 2.6304001808166504,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.5773392743475493,
|
|
"grad_norm": 7.575054718020066,
|
|
"learning_rate": 9.74382378620029e-06,
|
|
"loss": 2.5317938327789307,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 0.5779758115849777,
|
|
"grad_norm": 8.897515039328026,
|
|
"learning_rate": 9.742652134882095e-06,
|
|
"loss": 2.1269655227661133,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.5786123488224061,
|
|
"grad_norm": 16.927725495237198,
|
|
"learning_rate": 9.741477881101573e-06,
|
|
"loss": 3.230391502380371,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 0.5792488860598345,
|
|
"grad_norm": 10.379257456934797,
|
|
"learning_rate": 9.740301025503078e-06,
|
|
"loss": 2.900475263595581,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.5798854232972629,
|
|
"grad_norm": 15.1456457698115,
|
|
"learning_rate": 9.73912156873239e-06,
|
|
"loss": 3.2446060180664062,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 0.5805219605346913,
|
|
"grad_norm": 9.067027801427905,
|
|
"learning_rate": 9.737939511436722e-06,
|
|
"loss": 3.015232563018799,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.5811584977721197,
|
|
"grad_norm": 23.831019820426956,
|
|
"learning_rate": 9.73675485426471e-06,
|
|
"loss": 2.4809412956237793,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 0.5817950350095481,
|
|
"grad_norm": 8.97624293130639,
|
|
"learning_rate": 9.735567597866415e-06,
|
|
"loss": 2.744965076446533,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.5824315722469765,
|
|
"grad_norm": 25.53509484031687,
|
|
"learning_rate": 9.734377742893333e-06,
|
|
"loss": 2.4732604026794434,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.5830681094844048,
|
|
"grad_norm": 13.15529474155569,
|
|
"learning_rate": 9.733185289998373e-06,
|
|
"loss": 3.0583267211914062,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.5837046467218332,
|
|
"grad_norm": 8.932012421437175,
|
|
"learning_rate": 9.731990239835882e-06,
|
|
"loss": 2.5329785346984863,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 0.5843411839592616,
|
|
"grad_norm": 12.273886175504305,
|
|
"learning_rate": 9.730792593061624e-06,
|
|
"loss": 2.9934682846069336,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.58497772119669,
|
|
"grad_norm": 25.24523050203931,
|
|
"learning_rate": 9.72959235033279e-06,
|
|
"loss": 2.8382387161254883,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 0.5856142584341184,
|
|
"grad_norm": 6.725669150751144,
|
|
"learning_rate": 9.728389512307996e-06,
|
|
"loss": 2.401772975921631,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.5862507956715468,
|
|
"grad_norm": 11.789649229172577,
|
|
"learning_rate": 9.727184079647284e-06,
|
|
"loss": 2.4895544052124023,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 0.5868873329089752,
|
|
"grad_norm": 10.58143888341926,
|
|
"learning_rate": 9.725976053012117e-06,
|
|
"loss": 2.4084954261779785,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.5875238701464036,
|
|
"grad_norm": 12.445412046081634,
|
|
"learning_rate": 9.724765433065382e-06,
|
|
"loss": 2.738898992538452,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 0.5881604073838319,
|
|
"grad_norm": 12.001087768801614,
|
|
"learning_rate": 9.72355222047139e-06,
|
|
"loss": 2.6212992668151855,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.5887969446212603,
|
|
"grad_norm": 10.360026449336509,
|
|
"learning_rate": 9.722336415895873e-06,
|
|
"loss": 2.7570643424987793,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.5894334818586887,
|
|
"grad_norm": 10.389681022297916,
|
|
"learning_rate": 9.721118020005985e-06,
|
|
"loss": 2.8604931831359863,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.5900700190961171,
|
|
"grad_norm": 7.173012423210488,
|
|
"learning_rate": 9.719897033470307e-06,
|
|
"loss": 2.645080327987671,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 0.5907065563335455,
|
|
"grad_norm": 9.77458249204749,
|
|
"learning_rate": 9.718673456958834e-06,
|
|
"loss": 2.715708017349243,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.5913430935709739,
|
|
"grad_norm": 11.542551453098346,
|
|
"learning_rate": 9.71744729114299e-06,
|
|
"loss": 2.594083309173584,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 0.5919796308084023,
|
|
"grad_norm": 11.834252345490496,
|
|
"learning_rate": 9.716218536695611e-06,
|
|
"loss": 3.1679861545562744,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.5926161680458307,
|
|
"grad_norm": 11.12942908667238,
|
|
"learning_rate": 9.714987194290963e-06,
|
|
"loss": 3.133230686187744,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 0.593252705283259,
|
|
"grad_norm": 11.039033615390888,
|
|
"learning_rate": 9.713753264604726e-06,
|
|
"loss": 2.560100555419922,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.5938892425206874,
|
|
"grad_norm": 11.582517006358362,
|
|
"learning_rate": 9.712516748314001e-06,
|
|
"loss": 2.733274459838867,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 0.5945257797581158,
|
|
"grad_norm": 11.971564179710118,
|
|
"learning_rate": 9.711277646097308e-06,
|
|
"loss": 2.7829017639160156,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.5951623169955442,
|
|
"grad_norm": 10.570919690785455,
|
|
"learning_rate": 9.710035958634587e-06,
|
|
"loss": 2.2792487144470215,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.5957988542329726,
|
|
"grad_norm": 12.995799525018636,
|
|
"learning_rate": 9.708791686607195e-06,
|
|
"loss": 3.31978702545166,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.596435391470401,
|
|
"grad_norm": 13.201972697733703,
|
|
"learning_rate": 9.707544830697912e-06,
|
|
"loss": 2.9213595390319824,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 0.5970719287078294,
|
|
"grad_norm": 12.013554515053972,
|
|
"learning_rate": 9.706295391590928e-06,
|
|
"loss": 2.7369606494903564,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.5977084659452578,
|
|
"grad_norm": 14.418462050130708,
|
|
"learning_rate": 9.705043369971857e-06,
|
|
"loss": 2.67887282371521,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 0.5983450031826862,
|
|
"grad_norm": 10.116254580416458,
|
|
"learning_rate": 9.703788766527728e-06,
|
|
"loss": 2.2887253761291504,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.5989815404201145,
|
|
"grad_norm": 29.1755959070319,
|
|
"learning_rate": 9.702531581946985e-06,
|
|
"loss": 3.5381555557250977,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 0.5996180776575429,
|
|
"grad_norm": 7.5457809902099315,
|
|
"learning_rate": 9.701271816919493e-06,
|
|
"loss": 2.7492623329162598,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.6002546148949713,
|
|
"grad_norm": 9.666136274144206,
|
|
"learning_rate": 9.700009472136527e-06,
|
|
"loss": 3.052298069000244,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 0.6008911521323997,
|
|
"grad_norm": 11.284501524470071,
|
|
"learning_rate": 9.698744548290784e-06,
|
|
"loss": 3.2382991313934326,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.6015276893698281,
|
|
"grad_norm": 20.447480527022496,
|
|
"learning_rate": 9.697477046076368e-06,
|
|
"loss": 2.696122646331787,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.6021642266072565,
|
|
"grad_norm": 14.817358593665878,
|
|
"learning_rate": 9.696206966188808e-06,
|
|
"loss": 2.5964112281799316,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.602800763844685,
|
|
"grad_norm": 7.632477261909105,
|
|
"learning_rate": 9.694934309325037e-06,
|
|
"loss": 3.009591579437256,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 0.6034373010821134,
|
|
"grad_norm": 12.63016437996808,
|
|
"learning_rate": 9.69365907618341e-06,
|
|
"loss": 2.727325201034546,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.6040738383195416,
|
|
"grad_norm": 7.407783999732234,
|
|
"learning_rate": 9.692381267463693e-06,
|
|
"loss": 2.722775459289551,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 0.60471037555697,
|
|
"grad_norm": 29.043660573819057,
|
|
"learning_rate": 9.691100883867065e-06,
|
|
"loss": 3.696639060974121,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.6053469127943985,
|
|
"grad_norm": 13.532160636970735,
|
|
"learning_rate": 9.689817926096117e-06,
|
|
"loss": 2.8891990184783936,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 0.6059834500318269,
|
|
"grad_norm": 9.623235170481506,
|
|
"learning_rate": 9.688532394854855e-06,
|
|
"loss": 2.2772269248962402,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.6066199872692553,
|
|
"grad_norm": 6.814780336997041,
|
|
"learning_rate": 9.687244290848696e-06,
|
|
"loss": 2.648423433303833,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 0.6072565245066837,
|
|
"grad_norm": 10.864208391067539,
|
|
"learning_rate": 9.685953614784468e-06,
|
|
"loss": 2.5183968544006348,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.6078930617441121,
|
|
"grad_norm": 13.659964858560148,
|
|
"learning_rate": 9.68466036737041e-06,
|
|
"loss": 2.199960947036743,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.6085295989815405,
|
|
"grad_norm": 20.25190674068994,
|
|
"learning_rate": 9.683364549316175e-06,
|
|
"loss": 2.9352359771728516,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.6091661362189688,
|
|
"grad_norm": 11.730022670663669,
|
|
"learning_rate": 9.68206616133282e-06,
|
|
"loss": 2.289551258087158,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 0.6098026734563972,
|
|
"grad_norm": 10.590887207972477,
|
|
"learning_rate": 9.680765204132824e-06,
|
|
"loss": 2.6397624015808105,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.6104392106938256,
|
|
"grad_norm": 14.6562312186882,
|
|
"learning_rate": 9.679461678430064e-06,
|
|
"loss": 2.5100960731506348,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 0.611075747931254,
|
|
"grad_norm": 6.842906166218895,
|
|
"learning_rate": 9.678155584939832e-06,
|
|
"loss": 2.798889636993408,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.6117122851686824,
|
|
"grad_norm": 8.101430276051339,
|
|
"learning_rate": 9.676846924378829e-06,
|
|
"loss": 2.937227487564087,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 0.6123488224061108,
|
|
"grad_norm": 8.938006978202733,
|
|
"learning_rate": 9.67553569746516e-06,
|
|
"loss": 3.0781333446502686,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.6129853596435392,
|
|
"grad_norm": 10.016307273177329,
|
|
"learning_rate": 9.674221904918346e-06,
|
|
"loss": 2.077643394470215,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 0.6136218968809676,
|
|
"grad_norm": 11.847324449864272,
|
|
"learning_rate": 9.67290554745931e-06,
|
|
"loss": 2.616429567337036,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.614258434118396,
|
|
"grad_norm": 9.245571883498446,
|
|
"learning_rate": 9.671586625810387e-06,
|
|
"loss": 2.694222927093506,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.6148949713558243,
|
|
"grad_norm": 19.46466262879445,
|
|
"learning_rate": 9.670265140695312e-06,
|
|
"loss": 3.4693071842193604,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.6155315085932527,
|
|
"grad_norm": 17.95113213590351,
|
|
"learning_rate": 9.668941092839233e-06,
|
|
"loss": 3.5103704929351807,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 0.6161680458306811,
|
|
"grad_norm": 12.472820424341016,
|
|
"learning_rate": 9.667614482968702e-06,
|
|
"loss": 3.2685065269470215,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.6168045830681095,
|
|
"grad_norm": 8.937493281166018,
|
|
"learning_rate": 9.666285311811678e-06,
|
|
"loss": 2.4610326290130615,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 0.6174411203055379,
|
|
"grad_norm": 6.988745721519354,
|
|
"learning_rate": 9.664953580097525e-06,
|
|
"loss": 2.848910331726074,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.6180776575429663,
|
|
"grad_norm": 21.585861018039346,
|
|
"learning_rate": 9.663619288557009e-06,
|
|
"loss": 2.805069923400879,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 0.6187141947803947,
|
|
"grad_norm": 16.996625729938586,
|
|
"learning_rate": 9.662282437922305e-06,
|
|
"loss": 2.859009265899658,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.6193507320178231,
|
|
"grad_norm": 57.42435078890917,
|
|
"learning_rate": 9.660943028926992e-06,
|
|
"loss": 3.6436121463775635,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 0.6199872692552514,
|
|
"grad_norm": 14.889729750770474,
|
|
"learning_rate": 9.65960106230605e-06,
|
|
"loss": 4.130721092224121,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.6206238064926798,
|
|
"grad_norm": 17.925553905210514,
|
|
"learning_rate": 9.658256538795864e-06,
|
|
"loss": 3.6854283809661865,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.6212603437301082,
|
|
"grad_norm": 10.893711830450949,
|
|
"learning_rate": 9.656909459134221e-06,
|
|
"loss": 2.419802665710449,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.6218968809675366,
|
|
"grad_norm": 5.808124910458566,
|
|
"learning_rate": 9.655559824060315e-06,
|
|
"loss": 2.861011028289795,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 0.622533418204965,
|
|
"grad_norm": 9.59566663231614,
|
|
"learning_rate": 9.654207634314735e-06,
|
|
"loss": 2.3466222286224365,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.6231699554423934,
|
|
"grad_norm": 11.957468337787667,
|
|
"learning_rate": 9.652852890639476e-06,
|
|
"loss": 2.4655849933624268,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 0.6238064926798218,
|
|
"grad_norm": 13.671944162679766,
|
|
"learning_rate": 9.651495593777938e-06,
|
|
"loss": 2.838242292404175,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.6244430299172502,
|
|
"grad_norm": 7.5068812433289835,
|
|
"learning_rate": 9.650135744474916e-06,
|
|
"loss": 2.5288822650909424,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 0.6250795671546785,
|
|
"grad_norm": 10.547951834836692,
|
|
"learning_rate": 9.648773343476605e-06,
|
|
"loss": 2.060285806655884,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.6257161043921069,
|
|
"grad_norm": 14.1063328514092,
|
|
"learning_rate": 9.647408391530606e-06,
|
|
"loss": 2.828702926635742,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 0.6263526416295353,
|
|
"grad_norm": 8.410368601907516,
|
|
"learning_rate": 9.646040889385918e-06,
|
|
"loss": 2.6558516025543213,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.6269891788669637,
|
|
"grad_norm": 17.404941932367286,
|
|
"learning_rate": 9.644670837792935e-06,
|
|
"loss": 2.8424549102783203,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.6276257161043921,
|
|
"grad_norm": 19.693506640938104,
|
|
"learning_rate": 9.643298237503455e-06,
|
|
"loss": 3.578190565109253,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.6282622533418205,
|
|
"grad_norm": 12.247945521255543,
|
|
"learning_rate": 9.641923089270672e-06,
|
|
"loss": 2.4419023990631104,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 0.6288987905792489,
|
|
"grad_norm": 18.897474817120475,
|
|
"learning_rate": 9.64054539384918e-06,
|
|
"loss": 3.304727792739868,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.6295353278166773,
|
|
"grad_norm": 9.666539700787844,
|
|
"learning_rate": 9.639165151994968e-06,
|
|
"loss": 3.081479072570801,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 0.6301718650541057,
|
|
"grad_norm": 13.300604337026657,
|
|
"learning_rate": 9.637782364465426e-06,
|
|
"loss": 2.7969231605529785,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.630808402291534,
|
|
"grad_norm": 13.544924066767441,
|
|
"learning_rate": 9.636397032019336e-06,
|
|
"loss": 2.6117305755615234,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 0.6314449395289624,
|
|
"grad_norm": 8.34701314140355,
|
|
"learning_rate": 9.635009155416883e-06,
|
|
"loss": 2.816713571548462,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.6320814767663908,
|
|
"grad_norm": 17.956217540623307,
|
|
"learning_rate": 9.63361873541964e-06,
|
|
"loss": 2.4624338150024414,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 0.6327180140038192,
|
|
"grad_norm": 21.573934589758643,
|
|
"learning_rate": 9.632225772790585e-06,
|
|
"loss": 2.5641918182373047,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.6333545512412476,
|
|
"grad_norm": 10.965534948552364,
|
|
"learning_rate": 9.63083026829408e-06,
|
|
"loss": 2.498544216156006,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.633991088478676,
|
|
"grad_norm": 8.197591538777314,
|
|
"learning_rate": 9.629432222695894e-06,
|
|
"loss": 2.6886236667633057,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.6346276257161044,
|
|
"grad_norm": 8.450584454061838,
|
|
"learning_rate": 9.628031636763181e-06,
|
|
"loss": 2.6613171100616455,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 0.6352641629535328,
|
|
"grad_norm": 9.322665846807503,
|
|
"learning_rate": 9.626628511264495e-06,
|
|
"loss": 2.8803696632385254,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.6359007001909611,
|
|
"grad_norm": 11.80233014986357,
|
|
"learning_rate": 9.625222846969778e-06,
|
|
"loss": 2.804666519165039,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 0.6365372374283895,
|
|
"grad_norm": 105.30794617750519,
|
|
"learning_rate": 9.623814644650368e-06,
|
|
"loss": 2.4511563777923584,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.6371737746658179,
|
|
"grad_norm": 14.243059144369466,
|
|
"learning_rate": 9.622403905079002e-06,
|
|
"loss": 2.575465202331543,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 0.6378103119032463,
|
|
"grad_norm": 25.870916372150262,
|
|
"learning_rate": 9.620990629029795e-06,
|
|
"loss": 2.641998052597046,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.6384468491406747,
|
|
"grad_norm": 46.348127357952244,
|
|
"learning_rate": 9.619574817278266e-06,
|
|
"loss": 3.3244099617004395,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 0.6390833863781031,
|
|
"grad_norm": 16.95867609025848,
|
|
"learning_rate": 9.618156470601323e-06,
|
|
"loss": 2.8683221340179443,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.6397199236155315,
|
|
"grad_norm": 5.463313912510345,
|
|
"learning_rate": 9.61673558977726e-06,
|
|
"loss": 2.685462713241577,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.6403564608529599,
|
|
"grad_norm": 21.998438759448344,
|
|
"learning_rate": 9.615312175585765e-06,
|
|
"loss": 1.7907297611236572,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.6409929980903882,
|
|
"grad_norm": 17.84617329993066,
|
|
"learning_rate": 9.613886228807919e-06,
|
|
"loss": 2.961916446685791,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 0.6416295353278166,
|
|
"grad_norm": 11.342330696063822,
|
|
"learning_rate": 9.612457750226188e-06,
|
|
"loss": 2.5012712478637695,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.642266072565245,
|
|
"grad_norm": 12.458024468303618,
|
|
"learning_rate": 9.611026740624428e-06,
|
|
"loss": 2.8131322860717773,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 0.6429026098026734,
|
|
"grad_norm": 13.48943331389402,
|
|
"learning_rate": 9.609593200787887e-06,
|
|
"loss": 2.596027135848999,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.6435391470401018,
|
|
"grad_norm": 11.278368207207846,
|
|
"learning_rate": 9.6081571315032e-06,
|
|
"loss": 3.150757074356079,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 0.6441756842775302,
|
|
"grad_norm": 9.32868713743566,
|
|
"learning_rate": 9.606718533558386e-06,
|
|
"loss": 2.443676710128784,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.6448122215149587,
|
|
"grad_norm": 9.53734309113041,
|
|
"learning_rate": 9.60527740774286e-06,
|
|
"loss": 2.869262218475342,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 0.645448758752387,
|
|
"grad_norm": 15.34769784613947,
|
|
"learning_rate": 9.603833754847414e-06,
|
|
"loss": 2.650500774383545,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.6460852959898155,
|
|
"grad_norm": 9.355525595561428,
|
|
"learning_rate": 9.602387575664236e-06,
|
|
"loss": 2.9539999961853027,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.6467218332272437,
|
|
"grad_norm": 7.6381224701693915,
|
|
"learning_rate": 9.600938870986892e-06,
|
|
"loss": 2.8956141471862793,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.6473583704646722,
|
|
"grad_norm": 15.916159840938626,
|
|
"learning_rate": 9.59948764161034e-06,
|
|
"loss": 2.505089282989502,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 0.6479949077021006,
|
|
"grad_norm": 14.391975673606755,
|
|
"learning_rate": 9.598033888330925e-06,
|
|
"loss": 3.1346678733825684,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.648631444939529,
|
|
"grad_norm": 10.184848624541882,
|
|
"learning_rate": 9.596577611946367e-06,
|
|
"loss": 2.797295093536377,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 0.6492679821769574,
|
|
"grad_norm": 12.211496912574797,
|
|
"learning_rate": 9.595118813255779e-06,
|
|
"loss": 3.117927074432373,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.6499045194143858,
|
|
"grad_norm": 7.0076195792319025,
|
|
"learning_rate": 9.593657493059657e-06,
|
|
"loss": 2.616992473602295,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 0.6505410566518142,
|
|
"grad_norm": 17.164091830210232,
|
|
"learning_rate": 9.592193652159879e-06,
|
|
"loss": 1.9837325811386108,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.6511775938892426,
|
|
"grad_norm": 12.128964402426474,
|
|
"learning_rate": 9.590727291359706e-06,
|
|
"loss": 3.1724250316619873,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 0.6518141311266709,
|
|
"grad_norm": 14.328040852937985,
|
|
"learning_rate": 9.589258411463784e-06,
|
|
"loss": 3.0149502754211426,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.6524506683640993,
|
|
"grad_norm": 20.823373719756965,
|
|
"learning_rate": 9.587787013278139e-06,
|
|
"loss": 1.9769032001495361,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.6530872056015277,
|
|
"grad_norm": 8.90460619737646,
|
|
"learning_rate": 9.586313097610178e-06,
|
|
"loss": 2.537348508834839,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.6537237428389561,
|
|
"grad_norm": 11.089614753035884,
|
|
"learning_rate": 9.584836665268693e-06,
|
|
"loss": 2.513148307800293,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 0.6543602800763845,
|
|
"grad_norm": 9.366911537113666,
|
|
"learning_rate": 9.583357717063854e-06,
|
|
"loss": 2.6030631065368652,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.6549968173138129,
|
|
"grad_norm": 9.853856674039763,
|
|
"learning_rate": 9.581876253807214e-06,
|
|
"loss": 2.9064598083496094,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 0.6556333545512413,
|
|
"grad_norm": 30.320994571634927,
|
|
"learning_rate": 9.580392276311702e-06,
|
|
"loss": 2.4541501998901367,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.6562698917886697,
|
|
"grad_norm": 11.556472751008794,
|
|
"learning_rate": 9.578905785391633e-06,
|
|
"loss": 2.508260726928711,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 0.6569064290260981,
|
|
"grad_norm": 8.359823622259187,
|
|
"learning_rate": 9.577416781862696e-06,
|
|
"loss": 3.3850183486938477,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.6575429662635264,
|
|
"grad_norm": 19.05734669637527,
|
|
"learning_rate": 9.575925266541959e-06,
|
|
"loss": 3.4974966049194336,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 0.6581795035009548,
|
|
"grad_norm": 29.14312544087796,
|
|
"learning_rate": 9.57443124024787e-06,
|
|
"loss": 3.006150245666504,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.6588160407383832,
|
|
"grad_norm": 14.691926004444978,
|
|
"learning_rate": 9.572934703800258e-06,
|
|
"loss": 3.4365720748901367,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.6594525779758116,
|
|
"grad_norm": 14.807081676008671,
|
|
"learning_rate": 9.57143565802032e-06,
|
|
"loss": 2.8804702758789062,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.66008911521324,
|
|
"grad_norm": 11.654120918304145,
|
|
"learning_rate": 9.569934103730642e-06,
|
|
"loss": 2.5688960552215576,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 0.6607256524506684,
|
|
"grad_norm": 6.234164356177007,
|
|
"learning_rate": 9.568430041755175e-06,
|
|
"loss": 2.5211703777313232,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.6613621896880968,
|
|
"grad_norm": 32.671300998365844,
|
|
"learning_rate": 9.566923472919256e-06,
|
|
"loss": 2.8633322715759277,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 0.6619987269255252,
|
|
"grad_norm": 21.709034107636633,
|
|
"learning_rate": 9.56541439804959e-06,
|
|
"loss": 3.4090006351470947,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.6626352641629535,
|
|
"grad_norm": 7.764859129161498,
|
|
"learning_rate": 9.56390281797426e-06,
|
|
"loss": 2.351412296295166,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 0.6632718014003819,
|
|
"grad_norm": 12.587310805196983,
|
|
"learning_rate": 9.562388733522727e-06,
|
|
"loss": 3.3595516681671143,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.6639083386378103,
|
|
"grad_norm": 30.16334384136349,
|
|
"learning_rate": 9.560872145525819e-06,
|
|
"loss": 2.237828016281128,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 0.6645448758752387,
|
|
"grad_norm": 13.365520517631683,
|
|
"learning_rate": 9.559353054815744e-06,
|
|
"loss": 2.6913130283355713,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.6651814131126671,
|
|
"grad_norm": 10.191037178015522,
|
|
"learning_rate": 9.557831462226083e-06,
|
|
"loss": 2.3729376792907715,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.6658179503500955,
|
|
"grad_norm": 10.584728421237422,
|
|
"learning_rate": 9.556307368591784e-06,
|
|
"loss": 3.0043184757232666,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.6664544875875239,
|
|
"grad_norm": 11.97516253252263,
|
|
"learning_rate": 9.554780774749176e-06,
|
|
"loss": 2.6922152042388916,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 0.6670910248249523,
|
|
"grad_norm": 5.019123938332489,
|
|
"learning_rate": 9.553251681535953e-06,
|
|
"loss": 2.967332363128662,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.6677275620623806,
|
|
"grad_norm": 12.709829778529743,
|
|
"learning_rate": 9.551720089791183e-06,
|
|
"loss": 2.6839001178741455,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 0.668364099299809,
|
|
"grad_norm": 61.530954347092546,
|
|
"learning_rate": 9.550186000355304e-06,
|
|
"loss": 4.202565670013428,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.6690006365372374,
|
|
"grad_norm": 26.613941491404976,
|
|
"learning_rate": 9.548649414070128e-06,
|
|
"loss": 2.917819023132324,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 0.6696371737746658,
|
|
"grad_norm": 11.67392234440942,
|
|
"learning_rate": 9.547110331778832e-06,
|
|
"loss": 2.832442283630371,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.6702737110120942,
|
|
"grad_norm": 10.380615672059479,
|
|
"learning_rate": 9.545568754325968e-06,
|
|
"loss": 2.8541481494903564,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 0.6709102482495226,
|
|
"grad_norm": 8.866291751004375,
|
|
"learning_rate": 9.544024682557456e-06,
|
|
"loss": 3.009812593460083,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.671546785486951,
|
|
"grad_norm": 10.313225718417016,
|
|
"learning_rate": 9.542478117320577e-06,
|
|
"loss": 2.984719753265381,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.6721833227243794,
|
|
"grad_norm": 14.315658019076201,
|
|
"learning_rate": 9.540929059463993e-06,
|
|
"loss": 3.14715576171875,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.6728198599618078,
|
|
"grad_norm": 16.66994713084257,
|
|
"learning_rate": 9.539377509837723e-06,
|
|
"loss": 2.7449615001678467,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 0.6734563971992361,
|
|
"grad_norm": 13.715914989308832,
|
|
"learning_rate": 9.537823469293156e-06,
|
|
"loss": 2.9749996662139893,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.6740929344366645,
|
|
"grad_norm": 13.04283331969029,
|
|
"learning_rate": 9.536266938683056e-06,
|
|
"loss": 2.70072865486145,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 0.6747294716740929,
|
|
"grad_norm": 7.154339431473497,
|
|
"learning_rate": 9.534707918861544e-06,
|
|
"loss": 2.8494629859924316,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.6753660089115213,
|
|
"grad_norm": 16.606313397601085,
|
|
"learning_rate": 9.533146410684107e-06,
|
|
"loss": 3.6407909393310547,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 0.6760025461489497,
|
|
"grad_norm": 19.74189912335196,
|
|
"learning_rate": 9.531582415007602e-06,
|
|
"loss": 2.9390859603881836,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.6766390833863781,
|
|
"grad_norm": 11.167525562480838,
|
|
"learning_rate": 9.530015932690251e-06,
|
|
"loss": 2.607184410095215,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 0.6772756206238065,
|
|
"grad_norm": 9.308043099018203,
|
|
"learning_rate": 9.528446964591636e-06,
|
|
"loss": 2.301570177078247,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 0.6779121578612349,
|
|
"grad_norm": 9.454496131504424,
|
|
"learning_rate": 9.526875511572706e-06,
|
|
"loss": 2.7193057537078857,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.6785486950986632,
|
|
"grad_norm": 16.5191053949389,
|
|
"learning_rate": 9.525301574495776e-06,
|
|
"loss": 2.7189886569976807,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 0.6791852323360916,
|
|
"grad_norm": 11.651824766272222,
|
|
"learning_rate": 9.523725154224518e-06,
|
|
"loss": 2.7225799560546875,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 0.67982176957352,
|
|
"grad_norm": 8.086333384701465,
|
|
"learning_rate": 9.522146251623974e-06,
|
|
"loss": 2.777008533477783,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 0.6804583068109484,
|
|
"grad_norm": 11.185613683095509,
|
|
"learning_rate": 9.52056486756054e-06,
|
|
"loss": 2.9254977703094482,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 0.6810948440483768,
|
|
"grad_norm": 11.274061507401917,
|
|
"learning_rate": 9.51898100290198e-06,
|
|
"loss": 3.084275484085083,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.6817313812858052,
|
|
"grad_norm": 28.518984093764796,
|
|
"learning_rate": 9.517394658517416e-06,
|
|
"loss": 2.595210075378418,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 0.6823679185232336,
|
|
"grad_norm": 16.67925796088532,
|
|
"learning_rate": 9.515805835277334e-06,
|
|
"loss": 1.6448781490325928,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 0.683004455760662,
|
|
"grad_norm": 12.092154100602222,
|
|
"learning_rate": 9.514214534053575e-06,
|
|
"loss": 2.535048246383667,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 0.6836409929980903,
|
|
"grad_norm": 9.646276725987144,
|
|
"learning_rate": 9.512620755719344e-06,
|
|
"loss": 2.979246139526367,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 0.6842775302355187,
|
|
"grad_norm": 10.015633260078708,
|
|
"learning_rate": 9.511024501149205e-06,
|
|
"loss": 2.9097678661346436,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.6849140674729471,
|
|
"grad_norm": 10.332398060011624,
|
|
"learning_rate": 9.509425771219076e-06,
|
|
"loss": 2.832192897796631,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 0.6855506047103755,
|
|
"grad_norm": 21.906498330389713,
|
|
"learning_rate": 9.507824566806243e-06,
|
|
"loss": 2.8919320106506348,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 0.686187141947804,
|
|
"grad_norm": 18.16755948802436,
|
|
"learning_rate": 9.506220888789339e-06,
|
|
"loss": 3.3066060543060303,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 0.6868236791852323,
|
|
"grad_norm": 8.896605672805526,
|
|
"learning_rate": 9.504614738048363e-06,
|
|
"loss": 3.0294253826141357,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 0.6874602164226608,
|
|
"grad_norm": 9.587432718346571,
|
|
"learning_rate": 9.503006115464663e-06,
|
|
"loss": 3.116400718688965,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.6880967536600892,
|
|
"grad_norm": 15.001096439974274,
|
|
"learning_rate": 9.501395021920952e-06,
|
|
"loss": 2.7199864387512207,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 0.6887332908975176,
|
|
"grad_norm": 10.840538675762422,
|
|
"learning_rate": 9.49978145830129e-06,
|
|
"loss": 2.873979091644287,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 0.6893698281349459,
|
|
"grad_norm": 22.937429004050184,
|
|
"learning_rate": 9.498165425491101e-06,
|
|
"loss": 2.8262085914611816,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 0.6900063653723743,
|
|
"grad_norm": 7.69599132567219,
|
|
"learning_rate": 9.49654692437716e-06,
|
|
"loss": 2.6464715003967285,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 0.6906429026098027,
|
|
"grad_norm": 9.192389428527143,
|
|
"learning_rate": 9.494925955847592e-06,
|
|
"loss": 2.8342204093933105,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.6912794398472311,
|
|
"grad_norm": 17.28956378461768,
|
|
"learning_rate": 9.493302520791882e-06,
|
|
"loss": 2.3837647438049316,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 0.6919159770846595,
|
|
"grad_norm": 9.197528424140573,
|
|
"learning_rate": 9.491676620100868e-06,
|
|
"loss": 2.9350028038024902,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 0.6925525143220879,
|
|
"grad_norm": 10.823400516266032,
|
|
"learning_rate": 9.490048254666739e-06,
|
|
"loss": 2.621924877166748,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 0.6931890515595163,
|
|
"grad_norm": 9.534846604119528,
|
|
"learning_rate": 9.488417425383038e-06,
|
|
"loss": 2.387388229370117,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 0.6938255887969447,
|
|
"grad_norm": 7.087019594894608,
|
|
"learning_rate": 9.486784133144658e-06,
|
|
"loss": 2.721595287322998,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.694462126034373,
|
|
"grad_norm": 10.574013511195234,
|
|
"learning_rate": 9.485148378847844e-06,
|
|
"loss": 2.258244037628174,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 0.6950986632718014,
|
|
"grad_norm": 9.11464724688561,
|
|
"learning_rate": 9.483510163390195e-06,
|
|
"loss": 2.755533456802368,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 0.6957352005092298,
|
|
"grad_norm": 23.335375102792593,
|
|
"learning_rate": 9.481869487670656e-06,
|
|
"loss": 2.5640640258789062,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 0.6963717377466582,
|
|
"grad_norm": 7.944933327304487,
|
|
"learning_rate": 9.480226352589525e-06,
|
|
"loss": 2.8003828525543213,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 0.6970082749840866,
|
|
"grad_norm": 16.110349833581328,
|
|
"learning_rate": 9.47858075904845e-06,
|
|
"loss": 3.6288437843322754,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.697644812221515,
|
|
"grad_norm": 10.757773901045345,
|
|
"learning_rate": 9.476932707950425e-06,
|
|
"loss": 2.5156314373016357,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 0.6982813494589434,
|
|
"grad_norm": 8.398479467906514,
|
|
"learning_rate": 9.475282200199796e-06,
|
|
"loss": 2.8104264736175537,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 0.6989178866963718,
|
|
"grad_norm": 10.303948490621195,
|
|
"learning_rate": 9.473629236702256e-06,
|
|
"loss": 2.428539276123047,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 0.6995544239338001,
|
|
"grad_norm": 22.128001640346156,
|
|
"learning_rate": 9.47197381836484e-06,
|
|
"loss": 2.9523701667785645,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 0.7001909611712285,
|
|
"grad_norm": 11.573226008669508,
|
|
"learning_rate": 9.470315946095943e-06,
|
|
"loss": 2.7546629905700684,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.7008274984086569,
|
|
"grad_norm": 8.90312500544347,
|
|
"learning_rate": 9.468655620805292e-06,
|
|
"loss": 2.6183578968048096,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 0.7014640356460853,
|
|
"grad_norm": 14.678059850042777,
|
|
"learning_rate": 9.46699284340397e-06,
|
|
"loss": 2.7603936195373535,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 0.7021005728835137,
|
|
"grad_norm": 17.25477344357432,
|
|
"learning_rate": 9.4653276148044e-06,
|
|
"loss": 3.5560851097106934,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 0.7027371101209421,
|
|
"grad_norm": 8.082436193059547,
|
|
"learning_rate": 9.463659935920354e-06,
|
|
"loss": 3.1456332206726074,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 0.7033736473583705,
|
|
"grad_norm": 6.9670496222287435,
|
|
"learning_rate": 9.461989807666949e-06,
|
|
"loss": 2.6217257976531982,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.7040101845957989,
|
|
"grad_norm": 7.4847329907513895,
|
|
"learning_rate": 9.460317230960638e-06,
|
|
"loss": 2.6880407333374023,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 0.7046467218332273,
|
|
"grad_norm": 11.371556771803995,
|
|
"learning_rate": 9.45864220671923e-06,
|
|
"loss": 2.5942535400390625,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 0.7052832590706556,
|
|
"grad_norm": 8.785336056701258,
|
|
"learning_rate": 9.456964735861866e-06,
|
|
"loss": 2.8641157150268555,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 0.705919796308084,
|
|
"grad_norm": 7.219188900852088,
|
|
"learning_rate": 9.455284819309036e-06,
|
|
"loss": 2.8831381797790527,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 0.7065563335455124,
|
|
"grad_norm": 13.683598183394244,
|
|
"learning_rate": 9.453602457982569e-06,
|
|
"loss": 2.969987154006958,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.7071928707829408,
|
|
"grad_norm": 12.315788555192363,
|
|
"learning_rate": 9.451917652805638e-06,
|
|
"loss": 3.027716636657715,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 0.7078294080203692,
|
|
"grad_norm": 36.19197415120843,
|
|
"learning_rate": 9.450230404702754e-06,
|
|
"loss": 3.1672916412353516,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 0.7084659452577976,
|
|
"grad_norm": 12.316134019779161,
|
|
"learning_rate": 9.448540714599772e-06,
|
|
"loss": 2.8427469730377197,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 0.709102482495226,
|
|
"grad_norm": 8.576093057041657,
|
|
"learning_rate": 9.446848583423884e-06,
|
|
"loss": 2.7625861167907715,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 0.7097390197326544,
|
|
"grad_norm": 12.739162574810516,
|
|
"learning_rate": 9.445154012103623e-06,
|
|
"loss": 2.662567615509033,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.7103755569700827,
|
|
"grad_norm": 10.806407126421561,
|
|
"learning_rate": 9.44345700156886e-06,
|
|
"loss": 2.995030641555786,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 0.7110120942075111,
|
|
"grad_norm": 7.97895001806647,
|
|
"learning_rate": 9.441757552750808e-06,
|
|
"loss": 2.9475135803222656,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 0.7116486314449395,
|
|
"grad_norm": 8.043717440661599,
|
|
"learning_rate": 9.440055666582014e-06,
|
|
"loss": 2.9263863563537598,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 0.7122851686823679,
|
|
"grad_norm": 23.862671167213218,
|
|
"learning_rate": 9.438351343996358e-06,
|
|
"loss": 3.3885464668273926,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 0.7129217059197963,
|
|
"grad_norm": 7.42878869816598,
|
|
"learning_rate": 9.43664458592907e-06,
|
|
"loss": 1.4678406715393066,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.7135582431572247,
|
|
"grad_norm": 8.834797638802678,
|
|
"learning_rate": 9.434935393316709e-06,
|
|
"loss": 3.115852117538452,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 0.7141947803946531,
|
|
"grad_norm": 8.062053836270886,
|
|
"learning_rate": 9.433223767097163e-06,
|
|
"loss": 2.804098129272461,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 0.7148313176320815,
|
|
"grad_norm": 8.62380241880142,
|
|
"learning_rate": 9.431509708209669e-06,
|
|
"loss": 2.8543026447296143,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 0.7154678548695098,
|
|
"grad_norm": 10.171759338761628,
|
|
"learning_rate": 9.42979321759479e-06,
|
|
"loss": 3.0115180015563965,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 0.7161043921069382,
|
|
"grad_norm": 7.720748428345774,
|
|
"learning_rate": 9.428074296194426e-06,
|
|
"loss": 2.703876495361328,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.7167409293443666,
|
|
"grad_norm": 15.75954335417896,
|
|
"learning_rate": 9.426352944951806e-06,
|
|
"loss": 2.066390037536621,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 0.717377466581795,
|
|
"grad_norm": 30.210470512783385,
|
|
"learning_rate": 9.424629164811506e-06,
|
|
"loss": 2.41465425491333,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 0.7180140038192234,
|
|
"grad_norm": 29.886494199621875,
|
|
"learning_rate": 9.422902956719416e-06,
|
|
"loss": 3.1158735752105713,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 0.7186505410566518,
|
|
"grad_norm": 9.160744179358328,
|
|
"learning_rate": 9.421174321622775e-06,
|
|
"loss": 3.1245779991149902,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 0.7192870782940802,
|
|
"grad_norm": 10.036965295273822,
|
|
"learning_rate": 9.419443260470142e-06,
|
|
"loss": 3.2130062580108643,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.7199236155315086,
|
|
"grad_norm": 11.262756434590441,
|
|
"learning_rate": 9.417709774211415e-06,
|
|
"loss": 2.304853916168213,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 0.720560152768937,
|
|
"grad_norm": 6.95100006508944,
|
|
"learning_rate": 9.415973863797819e-06,
|
|
"loss": 3.0001373291015625,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 0.7211966900063653,
|
|
"grad_norm": 6.779094526020992,
|
|
"learning_rate": 9.414235530181907e-06,
|
|
"loss": 3.058368444442749,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 0.7218332272437937,
|
|
"grad_norm": 7.288056439154986,
|
|
"learning_rate": 9.412494774317571e-06,
|
|
"loss": 2.1561214923858643,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 0.7224697644812221,
|
|
"grad_norm": 18.231587786618007,
|
|
"learning_rate": 9.41075159716002e-06,
|
|
"loss": 3.1413750648498535,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.7231063017186505,
|
|
"grad_norm": 27.046129491897492,
|
|
"learning_rate": 9.409005999665799e-06,
|
|
"loss": 2.6081013679504395,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 0.7237428389560789,
|
|
"grad_norm": 12.010714101061575,
|
|
"learning_rate": 9.40725798279278e-06,
|
|
"loss": 2.7399230003356934,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 0.7243793761935073,
|
|
"grad_norm": 16.747765104249073,
|
|
"learning_rate": 9.405507547500165e-06,
|
|
"loss": 3.929919481277466,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 0.7250159134309357,
|
|
"grad_norm": 5.850715995419746,
|
|
"learning_rate": 9.403754694748475e-06,
|
|
"loss": 2.792790651321411,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 0.7256524506683641,
|
|
"grad_norm": 18.78460343789035,
|
|
"learning_rate": 9.401999425499565e-06,
|
|
"loss": 3.219160556793213,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.7262889879057924,
|
|
"grad_norm": 8.65575509331915,
|
|
"learning_rate": 9.400241740716617e-06,
|
|
"loss": 2.7919883728027344,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 0.7269255251432208,
|
|
"grad_norm": 12.096316324170619,
|
|
"learning_rate": 9.39848164136413e-06,
|
|
"loss": 2.685350179672241,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 0.7275620623806492,
|
|
"grad_norm": 8.389734545374605,
|
|
"learning_rate": 9.396719128407936e-06,
|
|
"loss": 2.8102335929870605,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 0.7281985996180776,
|
|
"grad_norm": 7.9309970738400075,
|
|
"learning_rate": 9.39495420281519e-06,
|
|
"loss": 2.8420093059539795,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 0.728835136855506,
|
|
"grad_norm": 19.095733546546768,
|
|
"learning_rate": 9.393186865554366e-06,
|
|
"loss": 2.548253297805786,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.7294716740929345,
|
|
"grad_norm": 10.026448629988439,
|
|
"learning_rate": 9.391417117595269e-06,
|
|
"loss": 2.791952610015869,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 0.7301082113303629,
|
|
"grad_norm": 10.751990454708412,
|
|
"learning_rate": 9.38964495990902e-06,
|
|
"loss": 3.065579414367676,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 0.7307447485677913,
|
|
"grad_norm": 7.276288885514958,
|
|
"learning_rate": 9.387870393468064e-06,
|
|
"loss": 2.558170795440674,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 0.7313812858052197,
|
|
"grad_norm": 9.52733291220368,
|
|
"learning_rate": 9.386093419246175e-06,
|
|
"loss": 2.5268654823303223,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 0.732017823042648,
|
|
"grad_norm": 7.579137019575734,
|
|
"learning_rate": 9.384314038218434e-06,
|
|
"loss": 2.5803568363189697,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.7326543602800764,
|
|
"grad_norm": 26.13725565815998,
|
|
"learning_rate": 9.382532251361257e-06,
|
|
"loss": 2.5109944343566895,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 0.7332908975175048,
|
|
"grad_norm": 9.713395534743217,
|
|
"learning_rate": 9.38074805965237e-06,
|
|
"loss": 2.518691062927246,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 0.7339274347549332,
|
|
"grad_norm": 10.806958111464304,
|
|
"learning_rate": 9.378961464070825e-06,
|
|
"loss": 2.5075459480285645,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 0.7345639719923616,
|
|
"grad_norm": 17.77282123835234,
|
|
"learning_rate": 9.377172465596992e-06,
|
|
"loss": 3.975275993347168,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 0.73520050922979,
|
|
"grad_norm": 9.82543914517833,
|
|
"learning_rate": 9.375381065212551e-06,
|
|
"loss": 2.5041916370391846,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.7358370464672184,
|
|
"grad_norm": 8.599307048137147,
|
|
"learning_rate": 9.373587263900518e-06,
|
|
"loss": 2.904417037963867,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 0.7364735837046468,
|
|
"grad_norm": 18.94314466908295,
|
|
"learning_rate": 9.371791062645208e-06,
|
|
"loss": 3.084871768951416,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 0.7371101209420751,
|
|
"grad_norm": 17.099900159090645,
|
|
"learning_rate": 9.369992462432264e-06,
|
|
"loss": 2.809452772140503,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 0.7377466581795035,
|
|
"grad_norm": 9.567564299680873,
|
|
"learning_rate": 9.36819146424864e-06,
|
|
"loss": 2.633657932281494,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 0.7383831954169319,
|
|
"grad_norm": 7.416023977531236,
|
|
"learning_rate": 9.366388069082609e-06,
|
|
"loss": 3.0897791385650635,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.7390197326543603,
|
|
"grad_norm": 9.546210002606603,
|
|
"learning_rate": 9.364582277923759e-06,
|
|
"loss": 2.208278179168701,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 0.7396562698917887,
|
|
"grad_norm": 10.933237630516508,
|
|
"learning_rate": 9.362774091762991e-06,
|
|
"loss": 2.7841458320617676,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 0.7402928071292171,
|
|
"grad_norm": 9.645369764277728,
|
|
"learning_rate": 9.36096351159252e-06,
|
|
"loss": 2.597827911376953,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 0.7409293443666455,
|
|
"grad_norm": 9.38784523263509,
|
|
"learning_rate": 9.35915053840588e-06,
|
|
"loss": 2.2382140159606934,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 0.7415658816040739,
|
|
"grad_norm": 8.964585442610392,
|
|
"learning_rate": 9.357335173197907e-06,
|
|
"loss": 2.700472354888916,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.7422024188415022,
|
|
"grad_norm": 82.45417581534511,
|
|
"learning_rate": 9.355517416964766e-06,
|
|
"loss": 2.7609753608703613,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 0.7428389560789306,
|
|
"grad_norm": 13.459972566387188,
|
|
"learning_rate": 9.353697270703917e-06,
|
|
"loss": 2.068809986114502,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 0.743475493316359,
|
|
"grad_norm": 9.022372778306588,
|
|
"learning_rate": 9.351874735414142e-06,
|
|
"loss": 3.0394768714904785,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 0.7441120305537874,
|
|
"grad_norm": 9.369511630945446,
|
|
"learning_rate": 9.35004981209553e-06,
|
|
"loss": 2.525172233581543,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 0.7447485677912158,
|
|
"grad_norm": 6.383815758559997,
|
|
"learning_rate": 9.348222501749482e-06,
|
|
"loss": 2.437615394592285,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.7453851050286442,
|
|
"grad_norm": 12.229040190394912,
|
|
"learning_rate": 9.34639280537871e-06,
|
|
"loss": 2.1296939849853516,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 0.7460216422660726,
|
|
"grad_norm": 9.876802802320743,
|
|
"learning_rate": 9.344560723987233e-06,
|
|
"loss": 3.275932550430298,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 0.746658179503501,
|
|
"grad_norm": 7.334313364656516,
|
|
"learning_rate": 9.342726258580377e-06,
|
|
"loss": 2.3332691192626953,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 0.7472947167409294,
|
|
"grad_norm": 16.286215753465054,
|
|
"learning_rate": 9.340889410164782e-06,
|
|
"loss": 2.85125994682312,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 0.7479312539783577,
|
|
"grad_norm": 8.035551862241554,
|
|
"learning_rate": 9.339050179748387e-06,
|
|
"loss": 3.0281364917755127,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.7485677912157861,
|
|
"grad_norm": 19.96642755772481,
|
|
"learning_rate": 9.33720856834045e-06,
|
|
"loss": 2.666728973388672,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 0.7492043284532145,
|
|
"grad_norm": 14.636540466976383,
|
|
"learning_rate": 9.335364576951527e-06,
|
|
"loss": 2.8794403076171875,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 0.7498408656906429,
|
|
"grad_norm": 10.583978621968955,
|
|
"learning_rate": 9.333518206593478e-06,
|
|
"loss": 2.5915474891662598,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 0.7504774029280713,
|
|
"grad_norm": 8.361868690386437,
|
|
"learning_rate": 9.331669458279474e-06,
|
|
"loss": 2.6375017166137695,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 0.7511139401654997,
|
|
"grad_norm": 39.954423796317634,
|
|
"learning_rate": 9.329818333023991e-06,
|
|
"loss": 2.1379952430725098,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.7517504774029281,
|
|
"grad_norm": 10.740761853769259,
|
|
"learning_rate": 9.327964831842807e-06,
|
|
"loss": 2.44290828704834,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 0.7523870146403565,
|
|
"grad_norm": 11.805613866789095,
|
|
"learning_rate": 9.326108955753001e-06,
|
|
"loss": 2.77449893951416,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 0.7530235518777848,
|
|
"grad_norm": 28.55282517673017,
|
|
"learning_rate": 9.324250705772964e-06,
|
|
"loss": 2.7647769451141357,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 0.7536600891152132,
|
|
"grad_norm": 12.002513981835214,
|
|
"learning_rate": 9.32239008292238e-06,
|
|
"loss": 2.573099136352539,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 0.7542966263526416,
|
|
"grad_norm": 8.43108586151736,
|
|
"learning_rate": 9.320527088222238e-06,
|
|
"loss": 3.139158010482788,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.75493316359007,
|
|
"grad_norm": 12.307148227097715,
|
|
"learning_rate": 9.318661722694832e-06,
|
|
"loss": 2.6034903526306152,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 0.7555697008274984,
|
|
"grad_norm": 15.0918032800552,
|
|
"learning_rate": 9.316793987363756e-06,
|
|
"loss": 3.005361557006836,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 0.7562062380649268,
|
|
"grad_norm": 8.91516719160434,
|
|
"learning_rate": 9.3149238832539e-06,
|
|
"loss": 2.7192862033843994,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 0.7568427753023552,
|
|
"grad_norm": 8.34591431880029,
|
|
"learning_rate": 9.313051411391458e-06,
|
|
"loss": 3.159897804260254,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 0.7574793125397836,
|
|
"grad_norm": 13.632637030695298,
|
|
"learning_rate": 9.311176572803922e-06,
|
|
"loss": 3.081416368484497,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.7581158497772119,
|
|
"grad_norm": 17.878514996352035,
|
|
"learning_rate": 9.309299368520084e-06,
|
|
"loss": 2.8336782455444336,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 0.7587523870146403,
|
|
"grad_norm": 18.65260863756698,
|
|
"learning_rate": 9.30741979957003e-06,
|
|
"loss": 2.881049871444702,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 0.7593889242520687,
|
|
"grad_norm": 29.42335411588454,
|
|
"learning_rate": 9.305537866985148e-06,
|
|
"loss": 3.6260194778442383,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 0.7600254614894971,
|
|
"grad_norm": 7.110804111133651,
|
|
"learning_rate": 9.303653571798124e-06,
|
|
"loss": 2.459531307220459,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 0.7606619987269255,
|
|
"grad_norm": 7.2639112639482875,
|
|
"learning_rate": 9.301766915042934e-06,
|
|
"loss": 2.501810073852539,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.7612985359643539,
|
|
"grad_norm": 7.162170089670997,
|
|
"learning_rate": 9.299877897754855e-06,
|
|
"loss": 2.7279324531555176,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 0.7619350732017823,
|
|
"grad_norm": 10.673649917991852,
|
|
"learning_rate": 9.297986520970458e-06,
|
|
"loss": 2.7421388626098633,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 0.7625716104392107,
|
|
"grad_norm": 12.13990360629329,
|
|
"learning_rate": 9.296092785727612e-06,
|
|
"loss": 2.960937023162842,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 0.7632081476766391,
|
|
"grad_norm": 10.83061394650552,
|
|
"learning_rate": 9.294196693065474e-06,
|
|
"loss": 2.5112879276275635,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 0.7638446849140674,
|
|
"grad_norm": 12.340066397980847,
|
|
"learning_rate": 9.292298244024497e-06,
|
|
"loss": 3.506455183029175,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.7644812221514958,
|
|
"grad_norm": 13.4782121228838,
|
|
"learning_rate": 9.290397439646429e-06,
|
|
"loss": 2.788090229034424,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 0.7651177593889242,
|
|
"grad_norm": 7.4373661345385615,
|
|
"learning_rate": 9.28849428097431e-06,
|
|
"loss": 2.7960195541381836,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 0.7657542966263526,
|
|
"grad_norm": 16.03603811943839,
|
|
"learning_rate": 9.286588769052469e-06,
|
|
"loss": 2.8688406944274902,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 0.766390833863781,
|
|
"grad_norm": 6.974990491610596,
|
|
"learning_rate": 9.28468090492653e-06,
|
|
"loss": 2.451228618621826,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 0.7670273711012094,
|
|
"grad_norm": 16.16256255384718,
|
|
"learning_rate": 9.282770689643406e-06,
|
|
"loss": 2.9304986000061035,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.7676639083386378,
|
|
"grad_norm": 14.651508634400052,
|
|
"learning_rate": 9.2808581242513e-06,
|
|
"loss": 2.8623952865600586,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 0.7683004455760662,
|
|
"grad_norm": 12.761068791969821,
|
|
"learning_rate": 9.278943209799703e-06,
|
|
"loss": 2.48644757270813,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 0.7689369828134945,
|
|
"grad_norm": 13.89816323154538,
|
|
"learning_rate": 9.277025947339398e-06,
|
|
"loss": 2.7891924381256104,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 0.7695735200509229,
|
|
"grad_norm": 7.760896895883094,
|
|
"learning_rate": 9.275106337922458e-06,
|
|
"loss": 2.4967544078826904,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 0.7702100572883513,
|
|
"grad_norm": 9.291985160258172,
|
|
"learning_rate": 9.273184382602237e-06,
|
|
"loss": 2.770961284637451,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.7708465945257797,
|
|
"grad_norm": 11.32569160257647,
|
|
"learning_rate": 9.271260082433381e-06,
|
|
"loss": 2.617584228515625,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 0.7714831317632082,
|
|
"grad_norm": 12.730047256965866,
|
|
"learning_rate": 9.269333438471826e-06,
|
|
"loss": 2.486670732498169,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 0.7721196690006366,
|
|
"grad_norm": 20.55652485105194,
|
|
"learning_rate": 9.267404451774787e-06,
|
|
"loss": 2.6450343132019043,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 0.772756206238065,
|
|
"grad_norm": 15.363179234075059,
|
|
"learning_rate": 9.265473123400768e-06,
|
|
"loss": 3.0391898155212402,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 0.7733927434754934,
|
|
"grad_norm": 11.067307771157337,
|
|
"learning_rate": 9.263539454409556e-06,
|
|
"loss": 2.699723243713379,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.7740292807129217,
|
|
"grad_norm": 18.131580548315544,
|
|
"learning_rate": 9.261603445862229e-06,
|
|
"loss": 2.382633686065674,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 0.77466581795035,
|
|
"grad_norm": 9.521834048728877,
|
|
"learning_rate": 9.25966509882114e-06,
|
|
"loss": 2.558096408843994,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 0.7753023551877785,
|
|
"grad_norm": 12.828514116759349,
|
|
"learning_rate": 9.25772441434993e-06,
|
|
"loss": 2.796450614929199,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 0.7759388924252069,
|
|
"grad_norm": 48.60647380704801,
|
|
"learning_rate": 9.255781393513523e-06,
|
|
"loss": 2.49711012840271,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 0.7765754296626353,
|
|
"grad_norm": 7.8399530210561865,
|
|
"learning_rate": 9.253836037378122e-06,
|
|
"loss": 2.9384589195251465,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.7772119669000637,
|
|
"grad_norm": 8.124337203909667,
|
|
"learning_rate": 9.251888347011214e-06,
|
|
"loss": 3.1889712810516357,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 0.7778485041374921,
|
|
"grad_norm": 8.605963861661598,
|
|
"learning_rate": 9.249938323481566e-06,
|
|
"loss": 2.405120372772217,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 0.7784850413749205,
|
|
"grad_norm": 10.301738676212663,
|
|
"learning_rate": 9.247985967859225e-06,
|
|
"loss": 3.1040446758270264,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 0.7791215786123489,
|
|
"grad_norm": 17.131686017400916,
|
|
"learning_rate": 9.246031281215522e-06,
|
|
"loss": 2.957456350326538,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 0.7797581158497772,
|
|
"grad_norm": 8.429220498236724,
|
|
"learning_rate": 9.244074264623058e-06,
|
|
"loss": 2.8081865310668945,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.7803946530872056,
|
|
"grad_norm": 7.907723815277553,
|
|
"learning_rate": 9.242114919155718e-06,
|
|
"loss": 2.852213144302368,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 0.781031190324634,
|
|
"grad_norm": 6.567851790415435,
|
|
"learning_rate": 9.24015324588867e-06,
|
|
"loss": 2.73412823677063,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 0.7816677275620624,
|
|
"grad_norm": 8.213853688073232,
|
|
"learning_rate": 9.238189245898348e-06,
|
|
"loss": 2.8029489517211914,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 0.7823042647994908,
|
|
"grad_norm": 8.365629205047716,
|
|
"learning_rate": 9.236222920262473e-06,
|
|
"loss": 2.9127280712127686,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 0.7829408020369192,
|
|
"grad_norm": 9.801297553132422,
|
|
"learning_rate": 9.234254270060036e-06,
|
|
"loss": 2.470463991165161,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.7835773392743476,
|
|
"grad_norm": 6.687872116603668,
|
|
"learning_rate": 9.232283296371305e-06,
|
|
"loss": 2.604677438735962,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 0.784213876511776,
|
|
"grad_norm": 15.611478833788476,
|
|
"learning_rate": 9.230310000277826e-06,
|
|
"loss": 2.8246655464172363,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 0.7848504137492043,
|
|
"grad_norm": 10.496722310569893,
|
|
"learning_rate": 9.228334382862415e-06,
|
|
"loss": 2.9619932174682617,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 0.7854869509866327,
|
|
"grad_norm": 21.111577164492267,
|
|
"learning_rate": 9.226356445209164e-06,
|
|
"loss": 3.1211252212524414,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 0.7861234882240611,
|
|
"grad_norm": 5.650042844459213,
|
|
"learning_rate": 9.224376188403438e-06,
|
|
"loss": 2.3700907230377197,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.7867600254614895,
|
|
"grad_norm": 11.614984343188375,
|
|
"learning_rate": 9.222393613531875e-06,
|
|
"loss": 2.7831969261169434,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 0.7873965626989179,
|
|
"grad_norm": 14.451173649063868,
|
|
"learning_rate": 9.220408721682384e-06,
|
|
"loss": 2.5475869178771973,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 0.7880330999363463,
|
|
"grad_norm": 8.626604462271105,
|
|
"learning_rate": 9.218421513944146e-06,
|
|
"loss": 2.251643180847168,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 0.7886696371737747,
|
|
"grad_norm": 10.541006366514067,
|
|
"learning_rate": 9.216431991407614e-06,
|
|
"loss": 2.9884228706359863,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 0.7893061744112031,
|
|
"grad_norm": 10.3895400900976,
|
|
"learning_rate": 9.214440155164509e-06,
|
|
"loss": 2.6941347122192383,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.7899427116486314,
|
|
"grad_norm": 15.634802726402812,
|
|
"learning_rate": 9.21244600630782e-06,
|
|
"loss": 3.2982771396636963,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 0.7905792488860598,
|
|
"grad_norm": 12.391044593349184,
|
|
"learning_rate": 9.21044954593181e-06,
|
|
"loss": 2.489248752593994,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 0.7912157861234882,
|
|
"grad_norm": 7.369020917300009,
|
|
"learning_rate": 9.20845077513201e-06,
|
|
"loss": 3.02134108543396,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 0.7918523233609166,
|
|
"grad_norm": 12.967257970395574,
|
|
"learning_rate": 9.206449695005214e-06,
|
|
"loss": 2.752878189086914,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 0.792488860598345,
|
|
"grad_norm": 22.412338612647503,
|
|
"learning_rate": 9.204446306649485e-06,
|
|
"loss": 2.832862615585327,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.7931253978357734,
|
|
"grad_norm": 13.838732237567118,
|
|
"learning_rate": 9.202440611164156e-06,
|
|
"loss": 2.739649772644043,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 0.7937619350732018,
|
|
"grad_norm": 13.15421166907182,
|
|
"learning_rate": 9.200432609649826e-06,
|
|
"loss": 3.1091763973236084,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 0.7943984723106302,
|
|
"grad_norm": 25.083539759246076,
|
|
"learning_rate": 9.198422303208349e-06,
|
|
"loss": 3.1699438095092773,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 0.7950350095480586,
|
|
"grad_norm": 12.736876776944172,
|
|
"learning_rate": 9.19640969294286e-06,
|
|
"loss": 2.7697696685791016,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 0.7956715467854869,
|
|
"grad_norm": 8.601857286692102,
|
|
"learning_rate": 9.194394779957746e-06,
|
|
"loss": 3.2362310886383057,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.7963080840229153,
|
|
"grad_norm": 13.459429768113056,
|
|
"learning_rate": 9.192377565358664e-06,
|
|
"loss": 2.5905492305755615,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 0.7969446212603437,
|
|
"grad_norm": 16.871641087210467,
|
|
"learning_rate": 9.190358050252528e-06,
|
|
"loss": 2.7865467071533203,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 0.7975811584977721,
|
|
"grad_norm": 9.844720147880034,
|
|
"learning_rate": 9.188336235747521e-06,
|
|
"loss": 2.3726232051849365,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 0.7982176957352005,
|
|
"grad_norm": 11.23669122084876,
|
|
"learning_rate": 9.186312122953083e-06,
|
|
"loss": 3.300795555114746,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 0.7988542329726289,
|
|
"grad_norm": 16.360634620929737,
|
|
"learning_rate": 9.184285712979919e-06,
|
|
"loss": 2.935802698135376,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.7994907702100573,
|
|
"grad_norm": 13.509288456680997,
|
|
"learning_rate": 9.182257006939989e-06,
|
|
"loss": 2.9251818656921387,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 0.8001273074474857,
|
|
"grad_norm": 9.639067408736738,
|
|
"learning_rate": 9.18022600594652e-06,
|
|
"loss": 2.9768736362457275,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 0.800763844684914,
|
|
"grad_norm": 10.675066476296655,
|
|
"learning_rate": 9.178192711113991e-06,
|
|
"loss": 2.221604585647583,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 0.8014003819223424,
|
|
"grad_norm": 19.08622224944117,
|
|
"learning_rate": 9.176157123558147e-06,
|
|
"loss": 2.6881661415100098,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 0.8020369191597708,
|
|
"grad_norm": 6.681873457109386,
|
|
"learning_rate": 9.174119244395984e-06,
|
|
"loss": 2.6460700035095215,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.8026734563971992,
|
|
"grad_norm": 9.120975168940044,
|
|
"learning_rate": 9.172079074745764e-06,
|
|
"loss": 2.587338924407959,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 0.8033099936346276,
|
|
"grad_norm": 8.470241427468848,
|
|
"learning_rate": 9.170036615726995e-06,
|
|
"loss": 2.9558582305908203,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 0.803946530872056,
|
|
"grad_norm": 21.420413651089113,
|
|
"learning_rate": 9.167991868460451e-06,
|
|
"loss": 2.270899772644043,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 0.8045830681094844,
|
|
"grad_norm": 10.088992613200938,
|
|
"learning_rate": 9.165944834068154e-06,
|
|
"loss": 2.834348678588867,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 0.8052196053469128,
|
|
"grad_norm": 17.773011919639547,
|
|
"learning_rate": 9.163895513673388e-06,
|
|
"loss": 2.6700775623321533,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.8058561425843411,
|
|
"grad_norm": 6.313584556413577,
|
|
"learning_rate": 9.16184390840069e-06,
|
|
"loss": 3.119436264038086,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 0.8064926798217695,
|
|
"grad_norm": 10.683183886824269,
|
|
"learning_rate": 9.159790019375844e-06,
|
|
"loss": 2.6771233081817627,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 0.8071292170591979,
|
|
"grad_norm": 17.039075471902997,
|
|
"learning_rate": 9.157733847725895e-06,
|
|
"loss": 2.8111491203308105,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 0.8077657542966263,
|
|
"grad_norm": 9.61025446735227,
|
|
"learning_rate": 9.155675394579137e-06,
|
|
"loss": 3.3145298957824707,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 0.8084022915340547,
|
|
"grad_norm": 13.026516215465113,
|
|
"learning_rate": 9.153614661065115e-06,
|
|
"loss": 3.323643922805786,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.8090388287714831,
|
|
"grad_norm": 13.865669627803625,
|
|
"learning_rate": 9.151551648314632e-06,
|
|
"loss": 2.976489782333374,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 0.8096753660089115,
|
|
"grad_norm": 23.93851468624548,
|
|
"learning_rate": 9.149486357459731e-06,
|
|
"loss": 3.009335517883301,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 0.81031190324634,
|
|
"grad_norm": 10.469437347525474,
|
|
"learning_rate": 9.147418789633715e-06,
|
|
"loss": 2.6073620319366455,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 0.8109484404837684,
|
|
"grad_norm": 10.853234944446672,
|
|
"learning_rate": 9.145348945971129e-06,
|
|
"loss": 2.799849510192871,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 0.8115849777211966,
|
|
"grad_norm": 9.688487286126236,
|
|
"learning_rate": 9.143276827607772e-06,
|
|
"loss": 2.2954964637756348,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.812221514958625,
|
|
"grad_norm": 10.669955887868204,
|
|
"learning_rate": 9.141202435680687e-06,
|
|
"loss": 2.92189359664917,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 0.8128580521960534,
|
|
"grad_norm": 11.626387329071216,
|
|
"learning_rate": 9.13912577132817e-06,
|
|
"loss": 2.646479845046997,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 0.8134945894334819,
|
|
"grad_norm": 10.031668621938243,
|
|
"learning_rate": 9.137046835689758e-06,
|
|
"loss": 2.1151552200317383,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 0.8141311266709103,
|
|
"grad_norm": 15.09847410739582,
|
|
"learning_rate": 9.134965629906238e-06,
|
|
"loss": 2.561397075653076,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 0.8147676639083387,
|
|
"grad_norm": 18.197636178571923,
|
|
"learning_rate": 9.132882155119645e-06,
|
|
"loss": 2.14336895942688,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.8154042011457671,
|
|
"grad_norm": 9.844441612822695,
|
|
"learning_rate": 9.13079641247325e-06,
|
|
"loss": 2.4377920627593994,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 0.8160407383831955,
|
|
"grad_norm": 7.707595564529393,
|
|
"learning_rate": 9.128708403111577e-06,
|
|
"loss": 2.7710394859313965,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 0.8166772756206238,
|
|
"grad_norm": 10.049701606083062,
|
|
"learning_rate": 9.126618128180394e-06,
|
|
"loss": 3.043046474456787,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 0.8173138128580522,
|
|
"grad_norm": 9.019335112227964,
|
|
"learning_rate": 9.124525588826706e-06,
|
|
"loss": 2.1620230674743652,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 0.8179503500954806,
|
|
"grad_norm": 8.67035035284991,
|
|
"learning_rate": 9.122430786198763e-06,
|
|
"loss": 2.6001639366149902,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.818586887332909,
|
|
"grad_norm": 10.043588823477533,
|
|
"learning_rate": 9.12033372144606e-06,
|
|
"loss": 1.823639988899231,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 0.8192234245703374,
|
|
"grad_norm": 9.22606774169981,
|
|
"learning_rate": 9.118234395719332e-06,
|
|
"loss": 2.6349215507507324,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 0.8198599618077658,
|
|
"grad_norm": 27.4129637623895,
|
|
"learning_rate": 9.116132810170554e-06,
|
|
"loss": 3.1643121242523193,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 0.8204964990451942,
|
|
"grad_norm": 13.522827712587821,
|
|
"learning_rate": 9.114028965952939e-06,
|
|
"loss": 2.936291217803955,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 0.8211330362826226,
|
|
"grad_norm": 14.31455799494892,
|
|
"learning_rate": 9.111922864220942e-06,
|
|
"loss": 2.8982882499694824,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.821769573520051,
|
|
"grad_norm": 30.02027998642032,
|
|
"learning_rate": 9.109814506130255e-06,
|
|
"loss": 3.107332706451416,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 0.8224061107574793,
|
|
"grad_norm": 16.261932973577068,
|
|
"learning_rate": 9.107703892837812e-06,
|
|
"loss": 2.811025619506836,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 0.8230426479949077,
|
|
"grad_norm": 18.45679320046287,
|
|
"learning_rate": 9.105591025501779e-06,
|
|
"loss": 2.2192745208740234,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 0.8236791852323361,
|
|
"grad_norm": 7.736250035717551,
|
|
"learning_rate": 9.103475905281563e-06,
|
|
"loss": 3.3386051654815674,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 0.8243157224697645,
|
|
"grad_norm": 13.766465002122054,
|
|
"learning_rate": 9.101358533337808e-06,
|
|
"loss": 2.681990385055542,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.8249522597071929,
|
|
"grad_norm": 12.720474806758402,
|
|
"learning_rate": 9.099238910832387e-06,
|
|
"loss": 2.671180009841919,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 0.8255887969446213,
|
|
"grad_norm": 11.870978572789058,
|
|
"learning_rate": 9.097117038928414e-06,
|
|
"loss": 2.5459225177764893,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 0.8262253341820497,
|
|
"grad_norm": 14.051712105259883,
|
|
"learning_rate": 9.094992918790238e-06,
|
|
"loss": 2.7658138275146484,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 0.8268618714194781,
|
|
"grad_norm": 9.78623844647995,
|
|
"learning_rate": 9.092866551583436e-06,
|
|
"loss": 2.522017240524292,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 0.8274984086569064,
|
|
"grad_norm": 16.121189036643578,
|
|
"learning_rate": 9.090737938474825e-06,
|
|
"loss": 3.155287265777588,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.8281349458943348,
|
|
"grad_norm": 8.482916829059928,
|
|
"learning_rate": 9.08860708063245e-06,
|
|
"loss": 2.536132574081421,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 0.8287714831317632,
|
|
"grad_norm": 12.819378495105811,
|
|
"learning_rate": 9.086473979225588e-06,
|
|
"loss": 2.7831597328186035,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 0.8294080203691916,
|
|
"grad_norm": 12.130627293232624,
|
|
"learning_rate": 9.084338635424745e-06,
|
|
"loss": 3.048809051513672,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 0.83004455760662,
|
|
"grad_norm": 11.074352418134843,
|
|
"learning_rate": 9.082201050401666e-06,
|
|
"loss": 2.6596789360046387,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 0.8306810948440484,
|
|
"grad_norm": 8.317566117685402,
|
|
"learning_rate": 9.080061225329317e-06,
|
|
"loss": 2.4225730895996094,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.8313176320814768,
|
|
"grad_norm": 12.478378501589457,
|
|
"learning_rate": 9.077919161381894e-06,
|
|
"loss": 2.435547113418579,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 0.8319541693189052,
|
|
"grad_norm": 8.20518520638,
|
|
"learning_rate": 9.075774859734829e-06,
|
|
"loss": 2.624713659286499,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 0.8325907065563335,
|
|
"grad_norm": 13.114609407099252,
|
|
"learning_rate": 9.073628321564773e-06,
|
|
"loss": 2.7705373764038086,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 0.8332272437937619,
|
|
"grad_norm": 11.553709399956617,
|
|
"learning_rate": 9.07147954804961e-06,
|
|
"loss": 2.776597023010254,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 0.8338637810311903,
|
|
"grad_norm": 14.484349806596981,
|
|
"learning_rate": 9.069328540368448e-06,
|
|
"loss": 2.5158157348632812,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.8345003182686187,
|
|
"grad_norm": 9.077943384505767,
|
|
"learning_rate": 9.067175299701619e-06,
|
|
"loss": 2.854553699493408,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 0.8351368555060471,
|
|
"grad_norm": 20.600143979613403,
|
|
"learning_rate": 9.065019827230688e-06,
|
|
"loss": 2.805817127227783,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 0.8357733927434755,
|
|
"grad_norm": 20.94123910800333,
|
|
"learning_rate": 9.062862124138435e-06,
|
|
"loss": 3.1979176998138428,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 0.8364099299809039,
|
|
"grad_norm": 10.511034456544738,
|
|
"learning_rate": 9.060702191608873e-06,
|
|
"loss": 2.674525022506714,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 0.8370464672183323,
|
|
"grad_norm": 13.282101608961089,
|
|
"learning_rate": 9.058540030827228e-06,
|
|
"loss": 3.2750120162963867,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.8376830044557607,
|
|
"grad_norm": 12.468491160804797,
|
|
"learning_rate": 9.056375642979961e-06,
|
|
"loss": 2.586822032928467,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 0.838319541693189,
|
|
"grad_norm": 12.825356591572639,
|
|
"learning_rate": 9.054209029254746e-06,
|
|
"loss": 2.5680291652679443,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 0.8389560789306174,
|
|
"grad_norm": 7.0251047845826236,
|
|
"learning_rate": 9.05204019084048e-06,
|
|
"loss": 2.9343976974487305,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 0.8395926161680458,
|
|
"grad_norm": 10.35830665819943,
|
|
"learning_rate": 9.049869128927284e-06,
|
|
"loss": 2.5775201320648193,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 0.8402291534054742,
|
|
"grad_norm": 12.074121871920275,
|
|
"learning_rate": 9.047695844706496e-06,
|
|
"loss": 3.1281425952911377,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.8408656906429026,
|
|
"grad_norm": 13.46786151972881,
|
|
"learning_rate": 9.045520339370675e-06,
|
|
"loss": 2.9933152198791504,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 0.841502227880331,
|
|
"grad_norm": 9.191417658963479,
|
|
"learning_rate": 9.0433426141136e-06,
|
|
"loss": 2.0610406398773193,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 0.8421387651177594,
|
|
"grad_norm": 20.31098015489218,
|
|
"learning_rate": 9.041162670130262e-06,
|
|
"loss": 2.398113250732422,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 0.8427753023551878,
|
|
"grad_norm": 7.247621506671208,
|
|
"learning_rate": 9.038980508616877e-06,
|
|
"loss": 2.683847427368164,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 0.8434118395926161,
|
|
"grad_norm": 21.14208058029866,
|
|
"learning_rate": 9.036796130770876e-06,
|
|
"loss": 3.447007656097412,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.8440483768300445,
|
|
"grad_norm": 11.308599379337744,
|
|
"learning_rate": 9.034609537790901e-06,
|
|
"loss": 2.7181715965270996,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 0.8446849140674729,
|
|
"grad_norm": 10.468205576836482,
|
|
"learning_rate": 9.032420730876819e-06,
|
|
"loss": 3.347914934158325,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 0.8453214513049013,
|
|
"grad_norm": 13.696870519850068,
|
|
"learning_rate": 9.030229711229701e-06,
|
|
"loss": 2.550179958343506,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 0.8459579885423297,
|
|
"grad_norm": 9.140413413821953,
|
|
"learning_rate": 9.028036480051843e-06,
|
|
"loss": 2.393416404724121,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 0.8465945257797581,
|
|
"grad_norm": 12.087748946308038,
|
|
"learning_rate": 9.025841038546743e-06,
|
|
"loss": 2.280996799468994,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.8472310630171865,
|
|
"grad_norm": 7.168215986872914,
|
|
"learning_rate": 9.023643387919123e-06,
|
|
"loss": 3.248619318008423,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 0.8478676002546149,
|
|
"grad_norm": 13.387201526610566,
|
|
"learning_rate": 9.021443529374912e-06,
|
|
"loss": 2.639176368713379,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 0.8485041374920432,
|
|
"grad_norm": 18.594921898086263,
|
|
"learning_rate": 9.019241464121246e-06,
|
|
"loss": 2.147660255432129,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 0.8491406747294716,
|
|
"grad_norm": 37.31900502048071,
|
|
"learning_rate": 9.017037193366483e-06,
|
|
"loss": 4.2564802169799805,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 0.8497772119669,
|
|
"grad_norm": 15.112857114582322,
|
|
"learning_rate": 9.01483071832018e-06,
|
|
"loss": 3.1673173904418945,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.8504137492043284,
|
|
"grad_norm": 21.676381708582444,
|
|
"learning_rate": 9.01262204019311e-06,
|
|
"loss": 3.0029163360595703,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 0.8510502864417568,
|
|
"grad_norm": 14.870952033681172,
|
|
"learning_rate": 9.010411160197257e-06,
|
|
"loss": 2.3692140579223633,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 0.8516868236791852,
|
|
"grad_norm": 9.74821280635995,
|
|
"learning_rate": 9.008198079545805e-06,
|
|
"loss": 2.133089542388916,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 0.8523233609166136,
|
|
"grad_norm": 15.469018293158362,
|
|
"learning_rate": 9.00598279945315e-06,
|
|
"loss": 2.728011131286621,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 0.852959898154042,
|
|
"grad_norm": 8.185856482659961,
|
|
"learning_rate": 9.0037653211349e-06,
|
|
"loss": 2.749013662338257,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.8535964353914705,
|
|
"grad_norm": 19.98457423207181,
|
|
"learning_rate": 9.00154564580786e-06,
|
|
"loss": 3.011110782623291,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 0.8542329726288987,
|
|
"grad_norm": 9.592951858163861,
|
|
"learning_rate": 8.999323774690047e-06,
|
|
"loss": 2.5260725021362305,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 0.8548695098663271,
|
|
"grad_norm": 18.64522308186827,
|
|
"learning_rate": 8.99709970900068e-06,
|
|
"loss": 1.9496815204620361,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 0.8555060471037556,
|
|
"grad_norm": 12.076529092259971,
|
|
"learning_rate": 8.994873449960184e-06,
|
|
"loss": 3.127403736114502,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 0.856142584341184,
|
|
"grad_norm": 10.462164153573513,
|
|
"learning_rate": 8.992644998790185e-06,
|
|
"loss": 2.9761838912963867,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.8567791215786124,
|
|
"grad_norm": 12.99995423386628,
|
|
"learning_rate": 8.990414356713517e-06,
|
|
"loss": 3.0213937759399414,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 0.8574156588160408,
|
|
"grad_norm": 8.95455484422871,
|
|
"learning_rate": 8.98818152495421e-06,
|
|
"loss": 2.820939779281616,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 0.8580521960534692,
|
|
"grad_norm": 11.266491420640211,
|
|
"learning_rate": 8.985946504737498e-06,
|
|
"loss": 2.9036173820495605,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 0.8586887332908976,
|
|
"grad_norm": 34.454539446401014,
|
|
"learning_rate": 8.983709297289818e-06,
|
|
"loss": 2.1190078258514404,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 0.8593252705283259,
|
|
"grad_norm": 12.587663196042058,
|
|
"learning_rate": 8.981469903838806e-06,
|
|
"loss": 2.672821044921875,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.8599618077657543,
|
|
"grad_norm": 17.88028573907417,
|
|
"learning_rate": 8.979228325613294e-06,
|
|
"loss": 2.6679329872131348,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 0.8605983450031827,
|
|
"grad_norm": 8.982580712828911,
|
|
"learning_rate": 8.97698456384332e-06,
|
|
"loss": 2.5248546600341797,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 0.8612348822406111,
|
|
"grad_norm": 16.158320589562617,
|
|
"learning_rate": 8.974738619760112e-06,
|
|
"loss": 3.293686866760254,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 0.8618714194780395,
|
|
"grad_norm": 21.576743503101472,
|
|
"learning_rate": 8.972490494596103e-06,
|
|
"loss": 3.087799549102783,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 0.8625079567154679,
|
|
"grad_norm": 20.43473137721611,
|
|
"learning_rate": 8.970240189584917e-06,
|
|
"loss": 2.5691702365875244,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.8631444939528963,
|
|
"grad_norm": 13.286826836661218,
|
|
"learning_rate": 8.967987705961379e-06,
|
|
"loss": 2.7788045406341553,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 0.8637810311903247,
|
|
"grad_norm": 10.982787627190925,
|
|
"learning_rate": 8.965733044961503e-06,
|
|
"loss": 2.638690948486328,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 0.864417568427753,
|
|
"grad_norm": 21.56517749749914,
|
|
"learning_rate": 8.963476207822506e-06,
|
|
"loss": 2.9436588287353516,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 0.8650541056651814,
|
|
"grad_norm": 7.846050590025499,
|
|
"learning_rate": 8.961217195782794e-06,
|
|
"loss": 2.5019116401672363,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 0.8656906429026098,
|
|
"grad_norm": 8.88665701771191,
|
|
"learning_rate": 8.958956010081967e-06,
|
|
"loss": 2.64182186126709,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.8663271801400382,
|
|
"grad_norm": 7.9286584819096015,
|
|
"learning_rate": 8.956692651960817e-06,
|
|
"loss": 2.5709879398345947,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 0.8669637173774666,
|
|
"grad_norm": 35.852572977236775,
|
|
"learning_rate": 8.95442712266133e-06,
|
|
"loss": 3.238485813140869,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 0.867600254614895,
|
|
"grad_norm": 13.061886199348878,
|
|
"learning_rate": 8.952159423426685e-06,
|
|
"loss": 2.509033203125,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 0.8682367918523234,
|
|
"grad_norm": 9.757063645996611,
|
|
"learning_rate": 8.949889555501248e-06,
|
|
"loss": 2.698711395263672,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 0.8688733290897518,
|
|
"grad_norm": 15.904767375002612,
|
|
"learning_rate": 8.947617520130575e-06,
|
|
"loss": 2.8830275535583496,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.8695098663271802,
|
|
"grad_norm": 9.859340776191049,
|
|
"learning_rate": 8.945343318561415e-06,
|
|
"loss": 2.7333996295928955,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 0.8701464035646085,
|
|
"grad_norm": 29.90621729856709,
|
|
"learning_rate": 8.9430669520417e-06,
|
|
"loss": 2.312429666519165,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 0.8707829408020369,
|
|
"grad_norm": 15.774793322102777,
|
|
"learning_rate": 8.940788421820557e-06,
|
|
"loss": 1.8989139795303345,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 0.8714194780394653,
|
|
"grad_norm": 26.079386831668614,
|
|
"learning_rate": 8.938507729148297e-06,
|
|
"loss": 3.147024631500244,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 0.8720560152768937,
|
|
"grad_norm": 25.93833059043368,
|
|
"learning_rate": 8.936224875276415e-06,
|
|
"loss": 3.548795223236084,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.8726925525143221,
|
|
"grad_norm": 15.127123147983411,
|
|
"learning_rate": 8.933939861457594e-06,
|
|
"loss": 2.9894320964813232,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 0.8733290897517505,
|
|
"grad_norm": 14.870429368132742,
|
|
"learning_rate": 8.931652688945706e-06,
|
|
"loss": 2.587772846221924,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 0.8739656269891789,
|
|
"grad_norm": 11.875634881012301,
|
|
"learning_rate": 8.929363358995802e-06,
|
|
"loss": 2.8910446166992188,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 0.8746021642266073,
|
|
"grad_norm": 13.721192137682051,
|
|
"learning_rate": 8.927071872864119e-06,
|
|
"loss": 2.673334836959839,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 0.8752387014640356,
|
|
"grad_norm": 12.460130848575641,
|
|
"learning_rate": 8.924778231808075e-06,
|
|
"loss": 2.914884090423584,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.875875238701464,
|
|
"grad_norm": 14.042103142396336,
|
|
"learning_rate": 8.922482437086276e-06,
|
|
"loss": 2.707538366317749,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 0.8765117759388924,
|
|
"grad_norm": 15.981995867165395,
|
|
"learning_rate": 8.920184489958505e-06,
|
|
"loss": 2.903834104537964,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 0.8771483131763208,
|
|
"grad_norm": 8.218622342353424,
|
|
"learning_rate": 8.917884391685729e-06,
|
|
"loss": 2.9739201068878174,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 0.8777848504137492,
|
|
"grad_norm": 12.842525285476123,
|
|
"learning_rate": 8.915582143530091e-06,
|
|
"loss": 2.8159000873565674,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 0.8784213876511776,
|
|
"grad_norm": 9.770965768425329,
|
|
"learning_rate": 8.91327774675492e-06,
|
|
"loss": 2.6709249019622803,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.879057924888606,
|
|
"grad_norm": 10.943758929362236,
|
|
"learning_rate": 8.910971202624717e-06,
|
|
"loss": 2.720698833465576,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 0.8796944621260344,
|
|
"grad_norm": 11.14834211284623,
|
|
"learning_rate": 8.90866251240517e-06,
|
|
"loss": 2.7545647621154785,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 0.8803309993634627,
|
|
"grad_norm": 6.992842046540137,
|
|
"learning_rate": 8.906351677363133e-06,
|
|
"loss": 2.6223673820495605,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 0.8809675366008911,
|
|
"grad_norm": 9.969659175978776,
|
|
"learning_rate": 8.904038698766649e-06,
|
|
"loss": 2.7305006980895996,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 0.8816040738383195,
|
|
"grad_norm": 11.375758939160075,
|
|
"learning_rate": 8.90172357788493e-06,
|
|
"loss": 2.7022647857666016,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.8822406110757479,
|
|
"grad_norm": 15.765826665284663,
|
|
"learning_rate": 8.899406315988363e-06,
|
|
"loss": 2.983297109603882,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 0.8828771483131763,
|
|
"grad_norm": 8.208433575257724,
|
|
"learning_rate": 8.897086914348519e-06,
|
|
"loss": 3.147146224975586,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 0.8835136855506047,
|
|
"grad_norm": 33.796645109789594,
|
|
"learning_rate": 8.894765374238129e-06,
|
|
"loss": 3.396644115447998,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 0.8841502227880331,
|
|
"grad_norm": 8.069088660483963,
|
|
"learning_rate": 8.89244169693111e-06,
|
|
"loss": 2.0669808387756348,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 0.8847867600254615,
|
|
"grad_norm": 14.457108495239018,
|
|
"learning_rate": 8.890115883702541e-06,
|
|
"loss": 2.5612125396728516,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.8854232972628899,
|
|
"grad_norm": 10.12258697850429,
|
|
"learning_rate": 8.887787935828684e-06,
|
|
"loss": 2.816615104675293,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 0.8860598345003182,
|
|
"grad_norm": 18.87692819262475,
|
|
"learning_rate": 8.885457854586966e-06,
|
|
"loss": 3.1916136741638184,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 0.8866963717377466,
|
|
"grad_norm": 15.834949119750588,
|
|
"learning_rate": 8.883125641255983e-06,
|
|
"loss": 3.166013717651367,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 0.887332908975175,
|
|
"grad_norm": 8.248272140916804,
|
|
"learning_rate": 8.880791297115507e-06,
|
|
"loss": 2.665917158126831,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 0.8879694462126034,
|
|
"grad_norm": 7.997372015258678,
|
|
"learning_rate": 8.878454823446474e-06,
|
|
"loss": 3.2195849418640137,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.8886059834500318,
|
|
"grad_norm": 8.305528328608894,
|
|
"learning_rate": 8.87611622153099e-06,
|
|
"loss": 2.7519631385803223,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 0.8892425206874602,
|
|
"grad_norm": 10.925992673424062,
|
|
"learning_rate": 8.87377549265233e-06,
|
|
"loss": 3.0465550422668457,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 0.8898790579248886,
|
|
"grad_norm": 5.948482134120402,
|
|
"learning_rate": 8.871432638094934e-06,
|
|
"loss": 3.009675979614258,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 0.890515595162317,
|
|
"grad_norm": 11.127002967272093,
|
|
"learning_rate": 8.869087659144413e-06,
|
|
"loss": 2.685904026031494,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 0.8911521323997453,
|
|
"grad_norm": 8.97087685278395,
|
|
"learning_rate": 8.866740557087539e-06,
|
|
"loss": 2.706536293029785,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.8917886696371737,
|
|
"grad_norm": 12.335864123989294,
|
|
"learning_rate": 8.864391333212248e-06,
|
|
"loss": 2.7309699058532715,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 0.8924252068746021,
|
|
"grad_norm": 11.930723853868875,
|
|
"learning_rate": 8.862039988807647e-06,
|
|
"loss": 2.5690298080444336,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 0.8930617441120305,
|
|
"grad_norm": 16.887269252746883,
|
|
"learning_rate": 8.859686525164e-06,
|
|
"loss": 3.1849586963653564,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 0.893698281349459,
|
|
"grad_norm": 10.744619526984696,
|
|
"learning_rate": 8.857330943572737e-06,
|
|
"loss": 2.9309535026550293,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 0.8943348185868873,
|
|
"grad_norm": 10.942121115571394,
|
|
"learning_rate": 8.854973245326451e-06,
|
|
"loss": 2.506758451461792,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.8949713558243158,
|
|
"grad_norm": 10.889820092495796,
|
|
"learning_rate": 8.852613431718891e-06,
|
|
"loss": 3.1235971450805664,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 0.8956078930617442,
|
|
"grad_norm": 18.299621145317612,
|
|
"learning_rate": 8.850251504044975e-06,
|
|
"loss": 2.1713199615478516,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 0.8962444302991726,
|
|
"grad_norm": 8.906668963816784,
|
|
"learning_rate": 8.847887463600778e-06,
|
|
"loss": 2.947065830230713,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 0.8968809675366008,
|
|
"grad_norm": 13.573712364791167,
|
|
"learning_rate": 8.845521311683528e-06,
|
|
"loss": 3.0217905044555664,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 0.8975175047740293,
|
|
"grad_norm": 8.352908652813237,
|
|
"learning_rate": 8.84315304959162e-06,
|
|
"loss": 2.857410192489624,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.8981540420114577,
|
|
"grad_norm": 16.935545380125163,
|
|
"learning_rate": 8.840782678624604e-06,
|
|
"loss": 2.941894292831421,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 0.8987905792488861,
|
|
"grad_norm": 9.37896485158657,
|
|
"learning_rate": 8.838410200083188e-06,
|
|
"loss": 2.708009719848633,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 0.8994271164863145,
|
|
"grad_norm": 11.051573410175154,
|
|
"learning_rate": 8.836035615269231e-06,
|
|
"loss": 2.9529104232788086,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 0.9000636537237429,
|
|
"grad_norm": 9.180184437257068,
|
|
"learning_rate": 8.833658925485759e-06,
|
|
"loss": 2.0678229331970215,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 0.9007001909611713,
|
|
"grad_norm": 12.008331322969955,
|
|
"learning_rate": 8.83128013203694e-06,
|
|
"loss": 2.78902006149292,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.9013367281985997,
|
|
"grad_norm": 15.298097773896862,
|
|
"learning_rate": 8.82889923622811e-06,
|
|
"loss": 3.3059544563293457,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 0.901973265436028,
|
|
"grad_norm": 12.92119685958626,
|
|
"learning_rate": 8.826516239365744e-06,
|
|
"loss": 3.177778720855713,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 0.9026098026734564,
|
|
"grad_norm": 7.231262540738608,
|
|
"learning_rate": 8.824131142757482e-06,
|
|
"loss": 2.2111966609954834,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 0.9032463399108848,
|
|
"grad_norm": 30.231236682369715,
|
|
"learning_rate": 8.82174394771211e-06,
|
|
"loss": 4.152851104736328,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 0.9038828771483132,
|
|
"grad_norm": 6.955888883185339,
|
|
"learning_rate": 8.819354655539567e-06,
|
|
"loss": 2.9363958835601807,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.9045194143857416,
|
|
"grad_norm": 6.221227295523858,
|
|
"learning_rate": 8.816963267550943e-06,
|
|
"loss": 2.814230442047119,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 0.90515595162317,
|
|
"grad_norm": 20.576731164909663,
|
|
"learning_rate": 8.814569785058478e-06,
|
|
"loss": 3.2863783836364746,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 0.9057924888605984,
|
|
"grad_norm": 10.124190036592271,
|
|
"learning_rate": 8.812174209375561e-06,
|
|
"loss": 2.850348949432373,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 0.9064290260980268,
|
|
"grad_norm": 15.761874926578889,
|
|
"learning_rate": 8.809776541816728e-06,
|
|
"loss": 2.9629907608032227,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 0.9070655633354551,
|
|
"grad_norm": 11.721717574823254,
|
|
"learning_rate": 8.80737678369767e-06,
|
|
"loss": 3.1664376258850098,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.9077021005728835,
|
|
"grad_norm": 17.22977161945999,
|
|
"learning_rate": 8.804974936335213e-06,
|
|
"loss": 2.2064313888549805,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 0.9083386378103119,
|
|
"grad_norm": 10.830257301815648,
|
|
"learning_rate": 8.802571001047343e-06,
|
|
"loss": 2.7995364665985107,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 0.9089751750477403,
|
|
"grad_norm": 6.327442683683713,
|
|
"learning_rate": 8.80016497915318e-06,
|
|
"loss": 2.714388370513916,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 0.9096117122851687,
|
|
"grad_norm": 8.555210132299838,
|
|
"learning_rate": 8.797756871972994e-06,
|
|
"loss": 2.8293895721435547,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 0.9102482495225971,
|
|
"grad_norm": 13.3626090821256,
|
|
"learning_rate": 8.795346680828203e-06,
|
|
"loss": 2.70363712310791,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.9108847867600255,
|
|
"grad_norm": 9.989760346933297,
|
|
"learning_rate": 8.79293440704136e-06,
|
|
"loss": 2.878157138824463,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 0.9115213239974539,
|
|
"grad_norm": 17.574273760979565,
|
|
"learning_rate": 8.790520051936172e-06,
|
|
"loss": 3.03108811378479,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 0.9121578612348823,
|
|
"grad_norm": 8.686800566695476,
|
|
"learning_rate": 8.788103616837476e-06,
|
|
"loss": 2.6360678672790527,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 0.9127943984723106,
|
|
"grad_norm": 9.07875542348685,
|
|
"learning_rate": 8.78568510307126e-06,
|
|
"loss": 2.9197378158569336,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 0.913430935709739,
|
|
"grad_norm": 10.295645175866365,
|
|
"learning_rate": 8.783264511964646e-06,
|
|
"loss": 2.4602155685424805,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.9140674729471674,
|
|
"grad_norm": 8.86363992461272,
|
|
"learning_rate": 8.7808418448459e-06,
|
|
"loss": 2.739168643951416,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 0.9147040101845958,
|
|
"grad_norm": 17.086921316775623,
|
|
"learning_rate": 8.778417103044423e-06,
|
|
"loss": 2.7610721588134766,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 0.9153405474220242,
|
|
"grad_norm": 10.019750837918233,
|
|
"learning_rate": 8.775990287890762e-06,
|
|
"loss": 2.855961322784424,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 0.9159770846594526,
|
|
"grad_norm": 12.118780675229214,
|
|
"learning_rate": 8.773561400716595e-06,
|
|
"loss": 2.713207483291626,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 0.916613621896881,
|
|
"grad_norm": 23.77219006288666,
|
|
"learning_rate": 8.771130442854739e-06,
|
|
"loss": 3.4736533164978027,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.9172501591343094,
|
|
"grad_norm": 16.064787393783657,
|
|
"learning_rate": 8.768697415639145e-06,
|
|
"loss": 1.408898949623108,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 0.9178866963717377,
|
|
"grad_norm": 21.290176402581753,
|
|
"learning_rate": 8.766262320404905e-06,
|
|
"loss": 2.7757620811462402,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 0.9185232336091661,
|
|
"grad_norm": 6.002070652945452,
|
|
"learning_rate": 8.76382515848824e-06,
|
|
"loss": 1.9027442932128906,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 0.9191597708465945,
|
|
"grad_norm": 21.26932231262647,
|
|
"learning_rate": 8.761385931226512e-06,
|
|
"loss": 3.4795098304748535,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 0.9197963080840229,
|
|
"grad_norm": 11.06161342307327,
|
|
"learning_rate": 8.758944639958205e-06,
|
|
"loss": 2.4269492626190186,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.9204328453214513,
|
|
"grad_norm": 11.03285367018562,
|
|
"learning_rate": 8.75650128602295e-06,
|
|
"loss": 2.875896453857422,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 0.9210693825588797,
|
|
"grad_norm": 10.287848138362884,
|
|
"learning_rate": 8.754055870761496e-06,
|
|
"loss": 2.742246389389038,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 0.9217059197963081,
|
|
"grad_norm": 12.590892977618639,
|
|
"learning_rate": 8.751608395515736e-06,
|
|
"loss": 2.545811891555786,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 0.9223424570337365,
|
|
"grad_norm": 9.673003668850864,
|
|
"learning_rate": 8.749158861628681e-06,
|
|
"loss": 2.839231014251709,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 0.9229789942711648,
|
|
"grad_norm": 10.17123811958482,
|
|
"learning_rate": 8.746707270444479e-06,
|
|
"loss": 3.0635018348693848,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.9236155315085932,
|
|
"grad_norm": 8.243523500914826,
|
|
"learning_rate": 8.744253623308407e-06,
|
|
"loss": 2.7437732219696045,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 0.9242520687460216,
|
|
"grad_norm": 17.156068983624365,
|
|
"learning_rate": 8.74179792156687e-06,
|
|
"loss": 2.2365593910217285,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 0.92488860598345,
|
|
"grad_norm": 9.491232000478549,
|
|
"learning_rate": 8.739340166567397e-06,
|
|
"loss": 2.858064651489258,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 0.9255251432208784,
|
|
"grad_norm": 11.029283599258722,
|
|
"learning_rate": 8.736880359658644e-06,
|
|
"loss": 2.6800594329833984,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 0.9261616804583068,
|
|
"grad_norm": 15.387910212162224,
|
|
"learning_rate": 8.734418502190398e-06,
|
|
"loss": 2.687188148498535,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.9267982176957352,
|
|
"grad_norm": 12.536653251071481,
|
|
"learning_rate": 8.731954595513567e-06,
|
|
"loss": 2.1078858375549316,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 0.9274347549331636,
|
|
"grad_norm": 9.031328925760082,
|
|
"learning_rate": 8.729488640980184e-06,
|
|
"loss": 2.6077308654785156,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 0.928071292170592,
|
|
"grad_norm": 12.265888896157241,
|
|
"learning_rate": 8.727020639943408e-06,
|
|
"loss": 2.6413002014160156,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 0.9287078294080203,
|
|
"grad_norm": 20.994279033111983,
|
|
"learning_rate": 8.724550593757515e-06,
|
|
"loss": 2.8199195861816406,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 0.9293443666454487,
|
|
"grad_norm": 8.39466199019947,
|
|
"learning_rate": 8.722078503777913e-06,
|
|
"loss": 2.7334635257720947,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.9299809038828771,
|
|
"grad_norm": 32.385117522692184,
|
|
"learning_rate": 8.71960437136112e-06,
|
|
"loss": 2.6803812980651855,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 0.9306174411203055,
|
|
"grad_norm": 13.34959832293905,
|
|
"learning_rate": 8.717128197864786e-06,
|
|
"loss": 2.946092128753662,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 0.9312539783577339,
|
|
"grad_norm": 6.930835219331372,
|
|
"learning_rate": 8.714649984647671e-06,
|
|
"loss": 2.860358715057373,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 0.9318905155951623,
|
|
"grad_norm": 13.35982857598474,
|
|
"learning_rate": 8.712169733069661e-06,
|
|
"loss": 3.013518810272217,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 0.9325270528325907,
|
|
"grad_norm": 21.64470981198118,
|
|
"learning_rate": 8.70968744449176e-06,
|
|
"loss": 2.914074182510376,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.9331635900700191,
|
|
"grad_norm": 10.043908040113202,
|
|
"learning_rate": 8.707203120276088e-06,
|
|
"loss": 2.9579620361328125,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 0.9338001273074474,
|
|
"grad_norm": 12.852889894909499,
|
|
"learning_rate": 8.704716761785881e-06,
|
|
"loss": 2.7109644412994385,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 0.9344366645448758,
|
|
"grad_norm": 20.892577897821393,
|
|
"learning_rate": 8.702228370385491e-06,
|
|
"loss": 2.1865947246551514,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 0.9350732017823042,
|
|
"grad_norm": 5.255934140558565,
|
|
"learning_rate": 8.699737947440389e-06,
|
|
"loss": 1.8131765127182007,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 0.9357097390197326,
|
|
"grad_norm": 10.931141906832528,
|
|
"learning_rate": 8.697245494317161e-06,
|
|
"loss": 2.68973970413208,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.936346276257161,
|
|
"grad_norm": 7.480666948740592,
|
|
"learning_rate": 8.6947510123835e-06,
|
|
"loss": 2.608941078186035,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 0.9369828134945895,
|
|
"grad_norm": 9.197846809716948,
|
|
"learning_rate": 8.692254503008221e-06,
|
|
"loss": 2.719707489013672,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 0.9376193507320179,
|
|
"grad_norm": 17.65088384619828,
|
|
"learning_rate": 8.689755967561248e-06,
|
|
"loss": 3.246102809906006,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 0.9382558879694463,
|
|
"grad_norm": 6.859171370041902,
|
|
"learning_rate": 8.687255407413612e-06,
|
|
"loss": 2.854102373123169,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 0.9388924252068745,
|
|
"grad_norm": 15.917817820678835,
|
|
"learning_rate": 8.684752823937466e-06,
|
|
"loss": 2.3989462852478027,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.939528962444303,
|
|
"grad_norm": 13.405391328912662,
|
|
"learning_rate": 8.682248218506061e-06,
|
|
"loss": 2.702059745788574,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 0.9401654996817314,
|
|
"grad_norm": 11.22720816995638,
|
|
"learning_rate": 8.679741592493766e-06,
|
|
"loss": 2.9737422466278076,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 0.9408020369191598,
|
|
"grad_norm": 24.210110695277212,
|
|
"learning_rate": 8.677232947276056e-06,
|
|
"loss": 2.9821133613586426,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 0.9414385741565882,
|
|
"grad_norm": 9.828711134527046,
|
|
"learning_rate": 8.674722284229514e-06,
|
|
"loss": 3.1264657974243164,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 0.9420751113940166,
|
|
"grad_norm": 12.506343891248635,
|
|
"learning_rate": 8.672209604731828e-06,
|
|
"loss": 2.760064125061035,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.942711648631445,
|
|
"grad_norm": 19.506236940040612,
|
|
"learning_rate": 8.669694910161799e-06,
|
|
"loss": 2.769021511077881,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 0.9433481858688734,
|
|
"grad_norm": 17.374370719201394,
|
|
"learning_rate": 8.667178201899326e-06,
|
|
"loss": 3.542201042175293,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 0.9439847231063018,
|
|
"grad_norm": 13.011819122669904,
|
|
"learning_rate": 8.66465948132542e-06,
|
|
"loss": 2.921635150909424,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 0.9446212603437301,
|
|
"grad_norm": 12.177558375703372,
|
|
"learning_rate": 8.662138749822191e-06,
|
|
"loss": 2.766899585723877,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 0.9452577975811585,
|
|
"grad_norm": 21.145265923147296,
|
|
"learning_rate": 8.659616008772854e-06,
|
|
"loss": 2.9893975257873535,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.9458943348185869,
|
|
"grad_norm": 31.766007666900276,
|
|
"learning_rate": 8.657091259561725e-06,
|
|
"loss": 2.5439658164978027,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 0.9465308720560153,
|
|
"grad_norm": 40.732855528912786,
|
|
"learning_rate": 8.654564503574228e-06,
|
|
"loss": 2.271054983139038,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 0.9471674092934437,
|
|
"grad_norm": 12.740859681461453,
|
|
"learning_rate": 8.65203574219688e-06,
|
|
"loss": 2.9355921745300293,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 0.9478039465308721,
|
|
"grad_norm": 14.131184418169383,
|
|
"learning_rate": 8.649504976817306e-06,
|
|
"loss": 2.431492567062378,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 0.9484404837683005,
|
|
"grad_norm": 12.272107098901829,
|
|
"learning_rate": 8.646972208824225e-06,
|
|
"loss": 2.5882158279418945,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.9490770210057289,
|
|
"grad_norm": 10.001110098598382,
|
|
"learning_rate": 8.64443743960746e-06,
|
|
"loss": 2.8265151977539062,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 0.9497135582431572,
|
|
"grad_norm": 7.870281496081134,
|
|
"learning_rate": 8.641900670557925e-06,
|
|
"loss": 2.6009325981140137,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 0.9503500954805856,
|
|
"grad_norm": 6.606186180448112,
|
|
"learning_rate": 8.639361903067638e-06,
|
|
"loss": 2.490417003631592,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 0.950986632718014,
|
|
"grad_norm": 16.32749021566751,
|
|
"learning_rate": 8.636821138529712e-06,
|
|
"loss": 2.382847547531128,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 0.9516231699554424,
|
|
"grad_norm": 18.58714970509347,
|
|
"learning_rate": 8.634278378338355e-06,
|
|
"loss": 1.9837441444396973,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.9522597071928708,
|
|
"grad_norm": 9.236163048934394,
|
|
"learning_rate": 8.631733623888867e-06,
|
|
"loss": 2.772226095199585,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 0.9528962444302992,
|
|
"grad_norm": 8.323499539185473,
|
|
"learning_rate": 8.629186876577648e-06,
|
|
"loss": 3.0804758071899414,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 0.9535327816677276,
|
|
"grad_norm": 13.627861487868559,
|
|
"learning_rate": 8.62663813780219e-06,
|
|
"loss": 2.870920181274414,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 0.954169318905156,
|
|
"grad_norm": 8.187420035976405,
|
|
"learning_rate": 8.624087408961075e-06,
|
|
"loss": 2.554389476776123,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 0.9548058561425843,
|
|
"grad_norm": 26.241958745271468,
|
|
"learning_rate": 8.621534691453981e-06,
|
|
"loss": 2.704555034637451,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.9554423933800127,
|
|
"grad_norm": 7.5367818384376175,
|
|
"learning_rate": 8.618979986681673e-06,
|
|
"loss": 2.818161725997925,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 0.9560789306174411,
|
|
"grad_norm": 8.177986099961563,
|
|
"learning_rate": 8.616423296046008e-06,
|
|
"loss": 2.6251158714294434,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 0.9567154678548695,
|
|
"grad_norm": 13.83128265339274,
|
|
"learning_rate": 8.613864620949937e-06,
|
|
"loss": 2.8486170768737793,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 0.9573520050922979,
|
|
"grad_norm": 9.839905952517228,
|
|
"learning_rate": 8.61130396279749e-06,
|
|
"loss": 2.5475683212280273,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 0.9579885423297263,
|
|
"grad_norm": 12.113119789871057,
|
|
"learning_rate": 8.608741322993798e-06,
|
|
"loss": 2.8252108097076416,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.9586250795671547,
|
|
"grad_norm": 8.472124174735304,
|
|
"learning_rate": 8.60617670294507e-06,
|
|
"loss": 3.012871265411377,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 0.9592616168045831,
|
|
"grad_norm": 11.311776521607479,
|
|
"learning_rate": 8.603610104058605e-06,
|
|
"loss": 2.7057697772979736,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 0.9598981540420115,
|
|
"grad_norm": 11.951505741269612,
|
|
"learning_rate": 8.601041527742787e-06,
|
|
"loss": 3.0853965282440186,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 0.9605346912794398,
|
|
"grad_norm": 11.891976254889936,
|
|
"learning_rate": 8.598470975407084e-06,
|
|
"loss": 2.3195412158966064,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 0.9611712285168682,
|
|
"grad_norm": 14.493443942962644,
|
|
"learning_rate": 8.595898448462053e-06,
|
|
"loss": 2.4563865661621094,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.9618077657542966,
|
|
"grad_norm": 20.022326730150414,
|
|
"learning_rate": 8.593323948319327e-06,
|
|
"loss": 3.090728759765625,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 0.962444302991725,
|
|
"grad_norm": 8.803355787881419,
|
|
"learning_rate": 8.59074747639163e-06,
|
|
"loss": 2.791250467300415,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 0.9630808402291534,
|
|
"grad_norm": 9.726485152075368,
|
|
"learning_rate": 8.588169034092761e-06,
|
|
"loss": 2.771373748779297,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 0.9637173774665818,
|
|
"grad_norm": 17.172151469542165,
|
|
"learning_rate": 8.585588622837606e-06,
|
|
"loss": 2.77121901512146,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 0.9643539147040102,
|
|
"grad_norm": 10.659171167179752,
|
|
"learning_rate": 8.583006244042126e-06,
|
|
"loss": 2.3193166255950928,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.9649904519414386,
|
|
"grad_norm": 7.045433564971674,
|
|
"learning_rate": 8.580421899123365e-06,
|
|
"loss": 2.8459174633026123,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 0.9656269891788669,
|
|
"grad_norm": 7.611562545836365,
|
|
"learning_rate": 8.577835589499448e-06,
|
|
"loss": 2.073949098587036,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 0.9662635264162953,
|
|
"grad_norm": 9.501402373558316,
|
|
"learning_rate": 8.575247316589573e-06,
|
|
"loss": 2.8364038467407227,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 0.9669000636537237,
|
|
"grad_norm": 10.663972357593154,
|
|
"learning_rate": 8.572657081814015e-06,
|
|
"loss": 2.4350342750549316,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 0.9675366008911521,
|
|
"grad_norm": 7.670157567002363,
|
|
"learning_rate": 8.570064886594132e-06,
|
|
"loss": 2.678783416748047,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.9681731381285805,
|
|
"grad_norm": 12.829595385967687,
|
|
"learning_rate": 8.567470732352354e-06,
|
|
"loss": 2.789569854736328,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 0.9688096753660089,
|
|
"grad_norm": 14.896042959437638,
|
|
"learning_rate": 8.564874620512184e-06,
|
|
"loss": 2.594637870788574,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 0.9694462126034373,
|
|
"grad_norm": 7.643925149013311,
|
|
"learning_rate": 8.562276552498201e-06,
|
|
"loss": 2.6070942878723145,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 0.9700827498408657,
|
|
"grad_norm": 11.0729438128728,
|
|
"learning_rate": 8.55967652973606e-06,
|
|
"loss": 3.331258535385132,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 0.9707192870782941,
|
|
"grad_norm": 10.651256199318965,
|
|
"learning_rate": 8.557074553652483e-06,
|
|
"loss": 4.254055976867676,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.9713558243157224,
|
|
"grad_norm": 12.234073637186876,
|
|
"learning_rate": 8.554470625675271e-06,
|
|
"loss": 2.9486451148986816,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 0.9719923615531508,
|
|
"grad_norm": 11.010439780773314,
|
|
"learning_rate": 8.551864747233288e-06,
|
|
"loss": 2.861355781555176,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 0.9726288987905792,
|
|
"grad_norm": 12.227179490251034,
|
|
"learning_rate": 8.549256919756475e-06,
|
|
"loss": 3.217989921569824,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 0.9732654360280076,
|
|
"grad_norm": 10.441061006667185,
|
|
"learning_rate": 8.546647144675837e-06,
|
|
"loss": 2.611697196960449,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 0.973901973265436,
|
|
"grad_norm": 7.047109217495742,
|
|
"learning_rate": 8.544035423423455e-06,
|
|
"loss": 2.819429874420166,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.9745385105028644,
|
|
"grad_norm": 14.928594244386037,
|
|
"learning_rate": 8.54142175743247e-06,
|
|
"loss": 2.841555118560791,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 0.9751750477402928,
|
|
"grad_norm": 15.51848853672532,
|
|
"learning_rate": 8.538806148137096e-06,
|
|
"loss": 2.815037250518799,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 0.9758115849777212,
|
|
"grad_norm": 11.219557094986436,
|
|
"learning_rate": 8.53618859697261e-06,
|
|
"loss": 2.755141019821167,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 0.9764481222151495,
|
|
"grad_norm": 9.756635768103603,
|
|
"learning_rate": 8.533569105375357e-06,
|
|
"loss": 2.5859339237213135,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 0.9770846594525779,
|
|
"grad_norm": 16.576216282836434,
|
|
"learning_rate": 8.530947674782741e-06,
|
|
"loss": 1.9348076581954956,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.9777211966900063,
|
|
"grad_norm": 9.797743250760059,
|
|
"learning_rate": 8.528324306633242e-06,
|
|
"loss": 2.7046377658843994,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 0.9783577339274347,
|
|
"grad_norm": 23.995662617500987,
|
|
"learning_rate": 8.525699002366387e-06,
|
|
"loss": 2.7023534774780273,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 0.9789942711648632,
|
|
"grad_norm": 10.395050557255956,
|
|
"learning_rate": 8.523071763422783e-06,
|
|
"loss": 2.550647735595703,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 0.9796308084022916,
|
|
"grad_norm": 9.417150815396226,
|
|
"learning_rate": 8.520442591244082e-06,
|
|
"loss": 2.7410709857940674,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 0.98026734563972,
|
|
"grad_norm": 8.895620373795214,
|
|
"learning_rate": 8.517811487273006e-06,
|
|
"loss": 2.1507298946380615,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.9809038828771484,
|
|
"grad_norm": 12.658307430881369,
|
|
"learning_rate": 8.515178452953341e-06,
|
|
"loss": 2.679276943206787,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 0.9815404201145767,
|
|
"grad_norm": 16.42879642738362,
|
|
"learning_rate": 8.512543489729921e-06,
|
|
"loss": 3.140566110610962,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 0.982176957352005,
|
|
"grad_norm": 8.84247180780308,
|
|
"learning_rate": 8.509906599048645e-06,
|
|
"loss": 2.7825236320495605,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 0.9828134945894335,
|
|
"grad_norm": 17.36356516865007,
|
|
"learning_rate": 8.507267782356471e-06,
|
|
"loss": 2.2732324600219727,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 0.9834500318268619,
|
|
"grad_norm": 29.981262242077374,
|
|
"learning_rate": 8.504627041101414e-06,
|
|
"loss": 3.3028225898742676,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.9840865690642903,
|
|
"grad_norm": 9.68713457137553,
|
|
"learning_rate": 8.501984376732535e-06,
|
|
"loss": 2.693692207336426,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 0.9847231063017187,
|
|
"grad_norm": 6.352198735843667,
|
|
"learning_rate": 8.499339790699967e-06,
|
|
"loss": 2.297257900238037,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 0.9853596435391471,
|
|
"grad_norm": 11.840233932732097,
|
|
"learning_rate": 8.496693284454882e-06,
|
|
"loss": 2.068459987640381,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 0.9859961807765755,
|
|
"grad_norm": 6.504503565462898,
|
|
"learning_rate": 8.494044859449518e-06,
|
|
"loss": 2.554231643676758,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 0.9866327180140039,
|
|
"grad_norm": 16.154139207848132,
|
|
"learning_rate": 8.491394517137153e-06,
|
|
"loss": 2.6632418632507324,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.9872692552514322,
|
|
"grad_norm": 8.142231355844261,
|
|
"learning_rate": 8.488742258972132e-06,
|
|
"loss": 2.740466356277466,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 0.9879057924888606,
|
|
"grad_norm": 9.893905854679373,
|
|
"learning_rate": 8.486088086409838e-06,
|
|
"loss": 2.541133403778076,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 0.988542329726289,
|
|
"grad_norm": 8.076922941798173,
|
|
"learning_rate": 8.483432000906715e-06,
|
|
"loss": 2.548953056335449,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 0.9891788669637174,
|
|
"grad_norm": 13.7974032727787,
|
|
"learning_rate": 8.480774003920247e-06,
|
|
"loss": 2.322848320007324,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 0.9898154042011458,
|
|
"grad_norm": 13.016432084972447,
|
|
"learning_rate": 8.478114096908974e-06,
|
|
"loss": 2.960254430770874,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.9904519414385742,
|
|
"grad_norm": 10.281067028596047,
|
|
"learning_rate": 8.475452281332484e-06,
|
|
"loss": 2.9151744842529297,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 0.9910884786760026,
|
|
"grad_norm": 13.783399482618753,
|
|
"learning_rate": 8.472788558651405e-06,
|
|
"loss": 2.9045119285583496,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 0.991725015913431,
|
|
"grad_norm": 8.37074340276371,
|
|
"learning_rate": 8.47012293032742e-06,
|
|
"loss": 1.4143991470336914,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 0.9923615531508593,
|
|
"grad_norm": 7.656752111840836,
|
|
"learning_rate": 8.467455397823254e-06,
|
|
"loss": 2.999601364135742,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 0.9929980903882877,
|
|
"grad_norm": 8.887901058027566,
|
|
"learning_rate": 8.464785962602678e-06,
|
|
"loss": 2.7708587646484375,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.9936346276257161,
|
|
"grad_norm": 9.58320143577911,
|
|
"learning_rate": 8.462114626130503e-06,
|
|
"loss": 3.1646625995635986,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 0.9942711648631445,
|
|
"grad_norm": 15.311980878917554,
|
|
"learning_rate": 8.459441389872589e-06,
|
|
"loss": 2.4998202323913574,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 0.9949077021005729,
|
|
"grad_norm": 18.622779764340642,
|
|
"learning_rate": 8.456766255295837e-06,
|
|
"loss": 2.445733070373535,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 0.9955442393380013,
|
|
"grad_norm": 29.80278794741722,
|
|
"learning_rate": 8.454089223868186e-06,
|
|
"loss": 2.949950695037842,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 0.9961807765754297,
|
|
"grad_norm": 8.981348515972892,
|
|
"learning_rate": 8.451410297058623e-06,
|
|
"loss": 2.407602548599243,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.9968173138128581,
|
|
"grad_norm": 10.86318288054507,
|
|
"learning_rate": 8.448729476337166e-06,
|
|
"loss": 1.6942147016525269,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 0.9974538510502864,
|
|
"grad_norm": 6.849075208924991,
|
|
"learning_rate": 8.446046763174877e-06,
|
|
"loss": 2.9985971450805664,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 0.9980903882877148,
|
|
"grad_norm": 19.64826633587671,
|
|
"learning_rate": 8.443362159043862e-06,
|
|
"loss": 2.626286029815674,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 0.9987269255251432,
|
|
"grad_norm": 12.44239321093429,
|
|
"learning_rate": 8.440675665417252e-06,
|
|
"loss": 2.616281032562256,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 0.9993634627625716,
|
|
"grad_norm": 5.899105846398005,
|
|
"learning_rate": 8.437987283769226e-06,
|
|
"loss": 2.447068452835083,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 27.94940618217712,
|
|
"learning_rate": 8.435297015574993e-06,
|
|
"loss": 2.3542513847351074,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 1.0006365372374284,
|
|
"grad_norm": 15.685358641118798,
|
|
"learning_rate": 8.432604862310803e-06,
|
|
"loss": 1.9478979110717773,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 1.0012730744748568,
|
|
"grad_norm": 8.85788052553538,
|
|
"learning_rate": 8.42991082545393e-06,
|
|
"loss": 2.1175429821014404,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 1.0019096117122852,
|
|
"grad_norm": 6.592589839240197,
|
|
"learning_rate": 8.427214906482693e-06,
|
|
"loss": 1.3762362003326416,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 1.0025461489497136,
|
|
"grad_norm": 8.129854163091727,
|
|
"learning_rate": 8.424517106876436e-06,
|
|
"loss": 1.7788302898406982,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 1.003182686187142,
|
|
"grad_norm": 14.092814327955969,
|
|
"learning_rate": 8.42181742811554e-06,
|
|
"loss": 1.1838836669921875,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 1.0038192234245704,
|
|
"grad_norm": 12.036099446001996,
|
|
"learning_rate": 8.419115871681414e-06,
|
|
"loss": 1.7293897867202759,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 1.0044557606619988,
|
|
"grad_norm": 9.88203344495813,
|
|
"learning_rate": 8.416412439056498e-06,
|
|
"loss": 1.3909540176391602,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 1.0050922978994272,
|
|
"grad_norm": 12.304474894364937,
|
|
"learning_rate": 8.413707131724263e-06,
|
|
"loss": 1.5465770959854126,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 1.0057288351368554,
|
|
"grad_norm": 9.143181239083816,
|
|
"learning_rate": 8.410999951169206e-06,
|
|
"loss": 1.8051213026046753,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 1.0063653723742838,
|
|
"grad_norm": 14.138006600362315,
|
|
"learning_rate": 8.408290898876856e-06,
|
|
"loss": 1.6632486581802368,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 1.0070019096117122,
|
|
"grad_norm": 18.78978923745861,
|
|
"learning_rate": 8.405579976333764e-06,
|
|
"loss": 2.232478141784668,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 1.0076384468491406,
|
|
"grad_norm": 9.46237728283357,
|
|
"learning_rate": 8.40286718502751e-06,
|
|
"loss": 1.7277331352233887,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 1.008274984086569,
|
|
"grad_norm": 13.544314021896062,
|
|
"learning_rate": 8.400152526446701e-06,
|
|
"loss": 1.2284635305404663,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 1.0089115213239974,
|
|
"grad_norm": 13.307320270591866,
|
|
"learning_rate": 8.397436002080967e-06,
|
|
"loss": 1.753080129623413,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 1.0095480585614258,
|
|
"grad_norm": 8.320353525601858,
|
|
"learning_rate": 8.39471761342096e-06,
|
|
"loss": 2.0175912380218506,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 1.0101845957988542,
|
|
"grad_norm": 8.850243754059989,
|
|
"learning_rate": 8.391997361958358e-06,
|
|
"loss": 1.502495527267456,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 1.0108211330362826,
|
|
"grad_norm": 12.157825237703662,
|
|
"learning_rate": 8.389275249185859e-06,
|
|
"loss": 1.617289423942566,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 1.011457670273711,
|
|
"grad_norm": 12.327022025899392,
|
|
"learning_rate": 8.386551276597186e-06,
|
|
"loss": 0.9483035206794739,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 1.0120942075111394,
|
|
"grad_norm": 12.697210098026716,
|
|
"learning_rate": 8.383825445687078e-06,
|
|
"loss": 1.5856382846832275,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 1.0127307447485678,
|
|
"grad_norm": 13.618734707796046,
|
|
"learning_rate": 8.381097757951294e-06,
|
|
"loss": 1.6918671131134033,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 1.0133672819859962,
|
|
"grad_norm": 8.663336283678763,
|
|
"learning_rate": 8.378368214886614e-06,
|
|
"loss": 1.5759856700897217,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 1.0140038192234246,
|
|
"grad_norm": 12.74485422998351,
|
|
"learning_rate": 8.375636817990837e-06,
|
|
"loss": 1.5827107429504395,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 1.014640356460853,
|
|
"grad_norm": 12.249941353369072,
|
|
"learning_rate": 8.372903568762779e-06,
|
|
"loss": 1.51044762134552,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 1.0152768936982814,
|
|
"grad_norm": 12.163581079496975,
|
|
"learning_rate": 8.370168468702269e-06,
|
|
"loss": 1.3908805847167969,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 1.0159134309357096,
|
|
"grad_norm": 12.64406261605538,
|
|
"learning_rate": 8.367431519310154e-06,
|
|
"loss": 1.3298909664154053,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 1.016549968173138,
|
|
"grad_norm": 7.839462555287826,
|
|
"learning_rate": 8.364692722088297e-06,
|
|
"loss": 1.5802236795425415,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 1.0171865054105664,
|
|
"grad_norm": 8.839530298728409,
|
|
"learning_rate": 8.361952078539574e-06,
|
|
"loss": 1.6365337371826172,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 1.0178230426479948,
|
|
"grad_norm": 15.198183689244685,
|
|
"learning_rate": 8.359209590167874e-06,
|
|
"loss": 2.3807125091552734,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 1.0184595798854232,
|
|
"grad_norm": 7.8077619973182735,
|
|
"learning_rate": 8.356465258478095e-06,
|
|
"loss": 1.4906675815582275,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 1.0190961171228516,
|
|
"grad_norm": 12.662926431534332,
|
|
"learning_rate": 8.353719084976152e-06,
|
|
"loss": 1.9658763408660889,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 1.01973265436028,
|
|
"grad_norm": 9.823848422637573,
|
|
"learning_rate": 8.350971071168968e-06,
|
|
"loss": 1.9681274890899658,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 1.0203691915977084,
|
|
"grad_norm": 10.064730309571255,
|
|
"learning_rate": 8.348221218564477e-06,
|
|
"loss": 1.8324413299560547,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 1.0210057288351368,
|
|
"grad_norm": 11.942685996288617,
|
|
"learning_rate": 8.34546952867162e-06,
|
|
"loss": 1.2514326572418213,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 1.0216422660725653,
|
|
"grad_norm": 10.182151962700415,
|
|
"learning_rate": 8.34271600300035e-06,
|
|
"loss": 1.5857393741607666,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 1.0222788033099937,
|
|
"grad_norm": 10.063217245392764,
|
|
"learning_rate": 8.33996064306162e-06,
|
|
"loss": 1.670034646987915,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 1.022915340547422,
|
|
"grad_norm": 46.41291108988851,
|
|
"learning_rate": 8.337203450367396e-06,
|
|
"loss": 1.8335425853729248,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 1.0235518777848505,
|
|
"grad_norm": 10.28045855353841,
|
|
"learning_rate": 8.33444442643065e-06,
|
|
"loss": 1.368062138557434,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 1.0241884150222789,
|
|
"grad_norm": 7.572962609232563,
|
|
"learning_rate": 8.331683572765355e-06,
|
|
"loss": 1.1756280660629272,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 1.0248249522597073,
|
|
"grad_norm": 12.38812491043937,
|
|
"learning_rate": 8.328920890886491e-06,
|
|
"loss": 1.6752097606658936,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 1.0254614894971357,
|
|
"grad_norm": 10.205420290092412,
|
|
"learning_rate": 8.32615638231004e-06,
|
|
"loss": 1.743167757987976,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 1.026098026734564,
|
|
"grad_norm": 13.275152342780736,
|
|
"learning_rate": 8.323390048552984e-06,
|
|
"loss": 2.1024320125579834,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 1.0267345639719923,
|
|
"grad_norm": 14.043989902284169,
|
|
"learning_rate": 8.320621891133313e-06,
|
|
"loss": 1.6133620738983154,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 1.0273711012094207,
|
|
"grad_norm": 7.852264686561906,
|
|
"learning_rate": 8.31785191157001e-06,
|
|
"loss": 1.6739814281463623,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 1.028007638446849,
|
|
"grad_norm": 9.716595275602375,
|
|
"learning_rate": 8.315080111383062e-06,
|
|
"loss": 1.868904948234558,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 1.0286441756842775,
|
|
"grad_norm": 13.483793704840371,
|
|
"learning_rate": 8.312306492093457e-06,
|
|
"loss": 1.848407506942749,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 1.0292807129217059,
|
|
"grad_norm": 15.349042363205411,
|
|
"learning_rate": 8.309531055223177e-06,
|
|
"loss": 1.406929850578308,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 1.0299172501591343,
|
|
"grad_norm": 17.03207340688238,
|
|
"learning_rate": 8.306753802295204e-06,
|
|
"loss": 1.5864617824554443,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 1.0305537873965627,
|
|
"grad_norm": 9.135669188167283,
|
|
"learning_rate": 8.303974734833516e-06,
|
|
"loss": 1.3825290203094482,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 1.031190324633991,
|
|
"grad_norm": 11.185379992251864,
|
|
"learning_rate": 8.301193854363084e-06,
|
|
"loss": 1.4131884574890137,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 1.0318268618714195,
|
|
"grad_norm": 13.57316677525489,
|
|
"learning_rate": 8.298411162409879e-06,
|
|
"loss": 1.4866886138916016,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 1.0324633991088479,
|
|
"grad_norm": 15.475000913383688,
|
|
"learning_rate": 8.295626660500861e-06,
|
|
"loss": 1.2877317667007446,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 1.0330999363462763,
|
|
"grad_norm": 11.315482981271101,
|
|
"learning_rate": 8.29284035016399e-06,
|
|
"loss": 1.6080381870269775,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 1.0337364735837047,
|
|
"grad_norm": 8.886044421157376,
|
|
"learning_rate": 8.290052232928207e-06,
|
|
"loss": 1.622658610343933,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 1.034373010821133,
|
|
"grad_norm": 8.647345686560026,
|
|
"learning_rate": 8.287262310323457e-06,
|
|
"loss": 1.5690397024154663,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 1.0350095480585615,
|
|
"grad_norm": 14.808459194207277,
|
|
"learning_rate": 8.284470583880666e-06,
|
|
"loss": 0.9358468055725098,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 1.03564608529599,
|
|
"grad_norm": 14.594537374935992,
|
|
"learning_rate": 8.281677055131758e-06,
|
|
"loss": 0.9582507610321045,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 1.0362826225334183,
|
|
"grad_norm": 9.592079100648048,
|
|
"learning_rate": 8.278881725609637e-06,
|
|
"loss": 1.7575924396514893,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 1.0369191597708467,
|
|
"grad_norm": 17.559436776381975,
|
|
"learning_rate": 8.276084596848205e-06,
|
|
"loss": 1.4164583683013916,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 1.0375556970082749,
|
|
"grad_norm": 8.268171667234254,
|
|
"learning_rate": 8.273285670382342e-06,
|
|
"loss": 1.7452517747879028,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 1.0381922342457033,
|
|
"grad_norm": 12.883950420509974,
|
|
"learning_rate": 8.270484947747924e-06,
|
|
"loss": 1.6780678033828735,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 1.0388287714831317,
|
|
"grad_norm": 10.997414804461346,
|
|
"learning_rate": 8.2676824304818e-06,
|
|
"loss": 2.120938301086426,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 1.03946530872056,
|
|
"grad_norm": 23.64960364916769,
|
|
"learning_rate": 8.264878120121816e-06,
|
|
"loss": 1.6598933935165405,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 1.0401018459579885,
|
|
"grad_norm": 9.024975550352027,
|
|
"learning_rate": 8.262072018206797e-06,
|
|
"loss": 1.4836094379425049,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 1.040738383195417,
|
|
"grad_norm": 9.132486507595646,
|
|
"learning_rate": 8.25926412627655e-06,
|
|
"loss": 1.0962557792663574,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 1.0413749204328453,
|
|
"grad_norm": 8.5793107306121,
|
|
"learning_rate": 8.256454445871866e-06,
|
|
"loss": 1.4689795970916748,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 1.0420114576702737,
|
|
"grad_norm": 15.016592288950207,
|
|
"learning_rate": 8.253642978534517e-06,
|
|
"loss": 1.6325409412384033,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 1.042647994907702,
|
|
"grad_norm": 13.412850714810107,
|
|
"learning_rate": 8.250829725807254e-06,
|
|
"loss": 1.6299569606781006,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 1.0432845321451305,
|
|
"grad_norm": 8.820604892388392,
|
|
"learning_rate": 8.24801468923381e-06,
|
|
"loss": 1.2900068759918213,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 1.043921069382559,
|
|
"grad_norm": 20.395329531383943,
|
|
"learning_rate": 8.245197870358898e-06,
|
|
"loss": 0.8589210510253906,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 1.0445576066199873,
|
|
"grad_norm": 9.708335028804578,
|
|
"learning_rate": 8.242379270728203e-06,
|
|
"loss": 1.7297940254211426,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 1.0451941438574157,
|
|
"grad_norm": 8.801661655986257,
|
|
"learning_rate": 8.239558891888393e-06,
|
|
"loss": 1.6582874059677124,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 1.0458306810948441,
|
|
"grad_norm": 12.963233181543366,
|
|
"learning_rate": 8.236736735387112e-06,
|
|
"loss": 1.2064335346221924,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 1.0464672183322725,
|
|
"grad_norm": 13.620795112093358,
|
|
"learning_rate": 8.233912802772976e-06,
|
|
"loss": 1.1542505025863647,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 1.047103755569701,
|
|
"grad_norm": 7.104123287679158,
|
|
"learning_rate": 8.231087095595579e-06,
|
|
"loss": 1.7787470817565918,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 1.0477402928071293,
|
|
"grad_norm": 13.901048416335506,
|
|
"learning_rate": 8.228259615405483e-06,
|
|
"loss": 2.1882848739624023,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 1.0483768300445575,
|
|
"grad_norm": 9.753015737304835,
|
|
"learning_rate": 8.225430363754231e-06,
|
|
"loss": 1.0799856185913086,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 1.049013367281986,
|
|
"grad_norm": 10.008718057996257,
|
|
"learning_rate": 8.222599342194335e-06,
|
|
"loss": 1.6267973184585571,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 1.0496499045194143,
|
|
"grad_norm": 9.611378096682964,
|
|
"learning_rate": 8.219766552279276e-06,
|
|
"loss": 1.9469648599624634,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 1.0502864417568427,
|
|
"grad_norm": 17.05947987283305,
|
|
"learning_rate": 8.216931995563506e-06,
|
|
"loss": 1.1864004135131836,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 1.0509229789942711,
|
|
"grad_norm": 13.146496163651747,
|
|
"learning_rate": 8.21409567360245e-06,
|
|
"loss": 1.4743739366531372,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 1.0515595162316995,
|
|
"grad_norm": 7.7948899872850514,
|
|
"learning_rate": 8.211257587952495e-06,
|
|
"loss": 1.2133256196975708,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 1.052196053469128,
|
|
"grad_norm": 11.999777880354147,
|
|
"learning_rate": 8.208417740171004e-06,
|
|
"loss": 2.0245907306671143,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 1.0528325907065563,
|
|
"grad_norm": 13.810605113801204,
|
|
"learning_rate": 8.205576131816302e-06,
|
|
"loss": 1.4931745529174805,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 1.0534691279439847,
|
|
"grad_norm": 9.44986492555049,
|
|
"learning_rate": 8.202732764447676e-06,
|
|
"loss": 1.8107151985168457,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 1.0541056651814131,
|
|
"grad_norm": 9.301071037131026,
|
|
"learning_rate": 8.199887639625389e-06,
|
|
"loss": 1.9895278215408325,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 1.0547422024188415,
|
|
"grad_norm": 15.019405626502477,
|
|
"learning_rate": 8.19704075891066e-06,
|
|
"loss": 1.9410228729248047,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 1.05537873965627,
|
|
"grad_norm": 8.383353222270486,
|
|
"learning_rate": 8.194192123865673e-06,
|
|
"loss": 1.5013635158538818,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 1.0560152768936983,
|
|
"grad_norm": 8.328580997876143,
|
|
"learning_rate": 8.191341736053577e-06,
|
|
"loss": 1.2550219297409058,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 1.0566518141311267,
|
|
"grad_norm": 11.050002560420085,
|
|
"learning_rate": 8.188489597038482e-06,
|
|
"loss": 1.3909871578216553,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 1.0572883513685551,
|
|
"grad_norm": 11.143182923060522,
|
|
"learning_rate": 8.185635708385457e-06,
|
|
"loss": 1.5867688655853271,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 1.0579248886059835,
|
|
"grad_norm": 10.20222436417481,
|
|
"learning_rate": 8.182780071660535e-06,
|
|
"loss": 1.4897205829620361,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 1.0585614258434117,
|
|
"grad_norm": 8.931242975370665,
|
|
"learning_rate": 8.1799226884307e-06,
|
|
"loss": 1.809645414352417,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 1.0591979630808401,
|
|
"grad_norm": 6.970685917982441,
|
|
"learning_rate": 8.177063560263906e-06,
|
|
"loss": 1.7880017757415771,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 1.0598345003182685,
|
|
"grad_norm": 15.792839546078376,
|
|
"learning_rate": 8.174202688729057e-06,
|
|
"loss": 1.8639477491378784,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 1.060471037555697,
|
|
"grad_norm": 17.502707465480373,
|
|
"learning_rate": 8.171340075396012e-06,
|
|
"loss": 1.335056185722351,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 1.0611075747931253,
|
|
"grad_norm": 9.727716405794908,
|
|
"learning_rate": 8.168475721835592e-06,
|
|
"loss": 1.384705901145935,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 1.0617441120305537,
|
|
"grad_norm": 11.962255893736563,
|
|
"learning_rate": 8.165609629619571e-06,
|
|
"loss": 2.0106027126312256,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 1.0623806492679821,
|
|
"grad_norm": 14.464330599123075,
|
|
"learning_rate": 8.162741800320672e-06,
|
|
"loss": 1.2054011821746826,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 1.0630171865054105,
|
|
"grad_norm": 9.253641747409125,
|
|
"learning_rate": 8.159872235512581e-06,
|
|
"loss": 1.8153573274612427,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 1.063653723742839,
|
|
"grad_norm": 11.975176677076705,
|
|
"learning_rate": 8.157000936769923e-06,
|
|
"loss": 1.0125563144683838,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 1.0642902609802674,
|
|
"grad_norm": 10.428104840908096,
|
|
"learning_rate": 8.154127905668289e-06,
|
|
"loss": 1.7303546667099,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 1.0649267982176958,
|
|
"grad_norm": 8.080398278681482,
|
|
"learning_rate": 8.15125314378421e-06,
|
|
"loss": 1.1772122383117676,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 1.0655633354551242,
|
|
"grad_norm": 7.161770853050351,
|
|
"learning_rate": 8.148376652695172e-06,
|
|
"loss": 1.873197078704834,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 1.0661998726925526,
|
|
"grad_norm": 14.207244484786004,
|
|
"learning_rate": 8.145498433979605e-06,
|
|
"loss": 1.9405423402786255,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 1.066836409929981,
|
|
"grad_norm": 88.9360573943596,
|
|
"learning_rate": 8.142618489216896e-06,
|
|
"loss": 1.2580645084381104,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 1.0674729471674094,
|
|
"grad_norm": 17.09333933215392,
|
|
"learning_rate": 8.139736819987368e-06,
|
|
"loss": 1.4222946166992188,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 1.0681094844048378,
|
|
"grad_norm": 11.66708175206413,
|
|
"learning_rate": 8.136853427872298e-06,
|
|
"loss": 1.4385242462158203,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 1.0687460216422662,
|
|
"grad_norm": 21.08110180359124,
|
|
"learning_rate": 8.133968314453903e-06,
|
|
"loss": 1.3539273738861084,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 1.0693825588796946,
|
|
"grad_norm": 11.332922309922932,
|
|
"learning_rate": 8.131081481315353e-06,
|
|
"loss": 1.4039275646209717,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 1.0700190961171228,
|
|
"grad_norm": 11.822415386227963,
|
|
"learning_rate": 8.128192930040752e-06,
|
|
"loss": 1.3482317924499512,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 1.0706556333545512,
|
|
"grad_norm": 11.300193815902606,
|
|
"learning_rate": 8.125302662215156e-06,
|
|
"loss": 1.556139588356018,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 1.0712921705919796,
|
|
"grad_norm": 11.001260944554838,
|
|
"learning_rate": 8.12241067942455e-06,
|
|
"loss": 1.6594297885894775,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 1.071928707829408,
|
|
"grad_norm": 17.26006568095415,
|
|
"learning_rate": 8.119516983255875e-06,
|
|
"loss": 1.8062752485275269,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 1.0725652450668364,
|
|
"grad_norm": 8.880410724554594,
|
|
"learning_rate": 8.116621575297004e-06,
|
|
"loss": 1.5717209577560425,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 1.0732017823042648,
|
|
"grad_norm": 9.979602337699953,
|
|
"learning_rate": 8.113724457136747e-06,
|
|
"loss": 1.7199119329452515,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 1.0738383195416932,
|
|
"grad_norm": 12.614829392114382,
|
|
"learning_rate": 8.11082563036486e-06,
|
|
"loss": 1.873417615890503,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 1.0744748567791216,
|
|
"grad_norm": 12.185931739938681,
|
|
"learning_rate": 8.107925096572031e-06,
|
|
"loss": 1.826694369316101,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 1.07511139401655,
|
|
"grad_norm": 20.925554372417256,
|
|
"learning_rate": 8.105022857349886e-06,
|
|
"loss": 1.0506352186203003,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 1.0757479312539784,
|
|
"grad_norm": 10.185233251622277,
|
|
"learning_rate": 8.102118914290988e-06,
|
|
"loss": 1.5804609060287476,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 1.0763844684914068,
|
|
"grad_norm": 9.775951299246545,
|
|
"learning_rate": 8.099213268988835e-06,
|
|
"loss": 1.5286052227020264,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 1.0770210057288352,
|
|
"grad_norm": 8.04895628585496,
|
|
"learning_rate": 8.096305923037857e-06,
|
|
"loss": 2.2917532920837402,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 1.0776575429662636,
|
|
"grad_norm": 12.396131316168823,
|
|
"learning_rate": 8.093396878033416e-06,
|
|
"loss": 1.8517321348190308,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 1.078294080203692,
|
|
"grad_norm": 11.839840208595898,
|
|
"learning_rate": 8.090486135571811e-06,
|
|
"loss": 1.7966738939285278,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 1.0789306174411204,
|
|
"grad_norm": 10.538205100213034,
|
|
"learning_rate": 8.087573697250271e-06,
|
|
"loss": 1.6291019916534424,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 1.0795671546785486,
|
|
"grad_norm": 12.542598375280967,
|
|
"learning_rate": 8.084659564666954e-06,
|
|
"loss": 1.483119249343872,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 1.080203691915977,
|
|
"grad_norm": 17.92790634167383,
|
|
"learning_rate": 8.081743739420949e-06,
|
|
"loss": 1.184516191482544,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 1.0808402291534054,
|
|
"grad_norm": 8.120614428265217,
|
|
"learning_rate": 8.078826223112272e-06,
|
|
"loss": 1.1689471006393433,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 1.0814767663908338,
|
|
"grad_norm": 10.39273268340443,
|
|
"learning_rate": 8.075907017341869e-06,
|
|
"loss": 1.3193955421447754,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 1.0821133036282622,
|
|
"grad_norm": 11.749209026655937,
|
|
"learning_rate": 8.072986123711612e-06,
|
|
"loss": 1.7868832349777222,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 1.0827498408656906,
|
|
"grad_norm": 11.9206432112175,
|
|
"learning_rate": 8.0700635438243e-06,
|
|
"loss": 1.9372731447219849,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 1.083386378103119,
|
|
"grad_norm": 16.76571690643751,
|
|
"learning_rate": 8.067139279283657e-06,
|
|
"loss": 1.4213175773620605,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 1.0840229153405474,
|
|
"grad_norm": 11.541077397920052,
|
|
"learning_rate": 8.06421333169433e-06,
|
|
"loss": 1.4775488376617432,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 1.0846594525779758,
|
|
"grad_norm": 15.4708223985343,
|
|
"learning_rate": 8.061285702661891e-06,
|
|
"loss": 1.1895475387573242,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 1.0852959898154042,
|
|
"grad_norm": 9.385116394727838,
|
|
"learning_rate": 8.058356393792836e-06,
|
|
"loss": 1.3861514329910278,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 1.0859325270528326,
|
|
"grad_norm": 12.674100329040792,
|
|
"learning_rate": 8.05542540669458e-06,
|
|
"loss": 1.6155014038085938,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 1.086569064290261,
|
|
"grad_norm": 11.967887698809248,
|
|
"learning_rate": 8.052492742975457e-06,
|
|
"loss": 1.6911782026290894,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 1.0872056015276894,
|
|
"grad_norm": 15.376295836200836,
|
|
"learning_rate": 8.049558404244731e-06,
|
|
"loss": 1.6580021381378174,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 1.0878421387651178,
|
|
"grad_norm": 17.518359669332686,
|
|
"learning_rate": 8.046622392112575e-06,
|
|
"loss": 1.4573746919631958,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 1.0884786760025462,
|
|
"grad_norm": 10.174914731743716,
|
|
"learning_rate": 8.04368470819008e-06,
|
|
"loss": 1.7645249366760254,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 1.0891152132399746,
|
|
"grad_norm": 14.816749816494264,
|
|
"learning_rate": 8.040745354089264e-06,
|
|
"loss": 1.3396403789520264,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 1.089751750477403,
|
|
"grad_norm": 8.500623260090219,
|
|
"learning_rate": 8.03780433142305e-06,
|
|
"loss": 1.8244812488555908,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 1.0903882877148314,
|
|
"grad_norm": 13.835563353359229,
|
|
"learning_rate": 8.034861641805287e-06,
|
|
"loss": 1.9595309495925903,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 1.0910248249522596,
|
|
"grad_norm": 7.6032087361840945,
|
|
"learning_rate": 8.03191728685073e-06,
|
|
"loss": 1.6191723346710205,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 1.091661362189688,
|
|
"grad_norm": 9.612118436341923,
|
|
"learning_rate": 8.028971268175054e-06,
|
|
"loss": 1.7245670557022095,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 1.0922978994271164,
|
|
"grad_norm": 7.733082218539383,
|
|
"learning_rate": 8.026023587394842e-06,
|
|
"loss": 1.4619736671447754,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 1.0929344366645448,
|
|
"grad_norm": 9.701026061431111,
|
|
"learning_rate": 8.023074246127593e-06,
|
|
"loss": 1.5184788703918457,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 1.0935709739019732,
|
|
"grad_norm": 11.363088864289784,
|
|
"learning_rate": 8.020123245991716e-06,
|
|
"loss": 1.9125618934631348,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 1.0942075111394016,
|
|
"grad_norm": 11.886266396828022,
|
|
"learning_rate": 8.017170588606529e-06,
|
|
"loss": 1.4592000246047974,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 1.09484404837683,
|
|
"grad_norm": 15.953624175177238,
|
|
"learning_rate": 8.014216275592262e-06,
|
|
"loss": 2.6135425567626953,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 1.0954805856142584,
|
|
"grad_norm": 12.383683402706158,
|
|
"learning_rate": 8.011260308570048e-06,
|
|
"loss": 1.8721458911895752,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 1.0961171228516868,
|
|
"grad_norm": 21.712072561809958,
|
|
"learning_rate": 8.008302689161938e-06,
|
|
"loss": 2.3193273544311523,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 1.0967536600891152,
|
|
"grad_norm": 12.745679385393263,
|
|
"learning_rate": 8.005343418990877e-06,
|
|
"loss": 1.9192438125610352,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 1.0973901973265436,
|
|
"grad_norm": 11.49317536790788,
|
|
"learning_rate": 8.002382499680725e-06,
|
|
"loss": 1.9271581172943115,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 1.098026734563972,
|
|
"grad_norm": 11.47631910729549,
|
|
"learning_rate": 7.999419932856245e-06,
|
|
"loss": 1.7452729940414429,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 1.0986632718014004,
|
|
"grad_norm": 13.548063560780626,
|
|
"learning_rate": 7.9964557201431e-06,
|
|
"loss": 1.7192658185958862,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 1.0992998090388288,
|
|
"grad_norm": 14.44945874753324,
|
|
"learning_rate": 7.99348986316786e-06,
|
|
"loss": 1.4745674133300781,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 1.0999363462762572,
|
|
"grad_norm": 8.790939478068589,
|
|
"learning_rate": 7.990522363558e-06,
|
|
"loss": 1.7271530628204346,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 1.1005728835136857,
|
|
"grad_norm": 13.294298649309285,
|
|
"learning_rate": 7.987553222941888e-06,
|
|
"loss": 2.079482078552246,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 1.1012094207511138,
|
|
"grad_norm": 8.955186343290276,
|
|
"learning_rate": 7.9845824429488e-06,
|
|
"loss": 1.9009345769882202,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 1.1018459579885422,
|
|
"grad_norm": 11.965178893767549,
|
|
"learning_rate": 7.98161002520891e-06,
|
|
"loss": 1.871706485748291,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 1.1024824952259706,
|
|
"grad_norm": 12.071102366896111,
|
|
"learning_rate": 7.978635971353285e-06,
|
|
"loss": 1.2385210990905762,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 1.103119032463399,
|
|
"grad_norm": 10.402210828323987,
|
|
"learning_rate": 7.9756602830139e-06,
|
|
"loss": 1.4987719058990479,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 1.1037555697008274,
|
|
"grad_norm": 15.882877710866513,
|
|
"learning_rate": 7.972682961823618e-06,
|
|
"loss": 1.0441172122955322,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 1.1043921069382558,
|
|
"grad_norm": 10.44535984277186,
|
|
"learning_rate": 7.969704009416201e-06,
|
|
"loss": 1.6794166564941406,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 1.1050286441756842,
|
|
"grad_norm": 8.294825851080079,
|
|
"learning_rate": 7.966723427426309e-06,
|
|
"loss": 1.5168843269348145,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 1.1056651814131127,
|
|
"grad_norm": 8.750430308096748,
|
|
"learning_rate": 7.963741217489489e-06,
|
|
"loss": 0.831028938293457,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 1.106301718650541,
|
|
"grad_norm": 15.572356503260961,
|
|
"learning_rate": 7.960757381242192e-06,
|
|
"loss": 1.599800944328308,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 1.1069382558879695,
|
|
"grad_norm": 12.139791785945542,
|
|
"learning_rate": 7.95777192032175e-06,
|
|
"loss": 1.6609857082366943,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 1.1075747931253979,
|
|
"grad_norm": 73.58212966042169,
|
|
"learning_rate": 7.954784836366395e-06,
|
|
"loss": 2.646315097808838,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 1.1082113303628263,
|
|
"grad_norm": 12.359273737070797,
|
|
"learning_rate": 7.951796131015246e-06,
|
|
"loss": 2.328749179840088,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 1.1088478676002547,
|
|
"grad_norm": 13.229289004438948,
|
|
"learning_rate": 7.948805805908313e-06,
|
|
"loss": 1.6222223043441772,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 1.109484404837683,
|
|
"grad_norm": 6.561897383325581,
|
|
"learning_rate": 7.94581386268649e-06,
|
|
"loss": 1.1249849796295166,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 1.1101209420751115,
|
|
"grad_norm": 12.860463321432597,
|
|
"learning_rate": 7.942820302991569e-06,
|
|
"loss": 1.9121992588043213,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 1.1107574793125399,
|
|
"grad_norm": 9.986315850505079,
|
|
"learning_rate": 7.939825128466216e-06,
|
|
"loss": 1.5456956624984741,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 1.1113940165499683,
|
|
"grad_norm": 9.239962459355732,
|
|
"learning_rate": 7.936828340753996e-06,
|
|
"loss": 1.7931642532348633,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 1.1120305537873967,
|
|
"grad_norm": 12.404016054106627,
|
|
"learning_rate": 7.93382994149935e-06,
|
|
"loss": 1.4237359762191772,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 1.1126670910248249,
|
|
"grad_norm": 18.822826852660583,
|
|
"learning_rate": 7.930829932347608e-06,
|
|
"loss": 1.8694027662277222,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 1.1133036282622533,
|
|
"grad_norm": 8.086491923138064,
|
|
"learning_rate": 7.92782831494498e-06,
|
|
"loss": 1.4876083135604858,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 1.1139401654996817,
|
|
"grad_norm": 16.538632235045654,
|
|
"learning_rate": 7.924825090938564e-06,
|
|
"loss": 1.4833011627197266,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 1.11457670273711,
|
|
"grad_norm": 7.1295183448037935,
|
|
"learning_rate": 7.921820261976334e-06,
|
|
"loss": 1.4614789485931396,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 1.1152132399745385,
|
|
"grad_norm": 10.045674188034987,
|
|
"learning_rate": 7.918813829707146e-06,
|
|
"loss": 1.6283634901046753,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 1.1158497772119669,
|
|
"grad_norm": 7.969780060386495,
|
|
"learning_rate": 7.915805795780737e-06,
|
|
"loss": 1.2911505699157715,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 1.1164863144493953,
|
|
"grad_norm": 9.928915387567933,
|
|
"learning_rate": 7.912796161847724e-06,
|
|
"loss": 1.5987910032272339,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 1.1171228516868237,
|
|
"grad_norm": 10.269427265068847,
|
|
"learning_rate": 7.9097849295596e-06,
|
|
"loss": 1.5262665748596191,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 1.117759388924252,
|
|
"grad_norm": 10.51806130174475,
|
|
"learning_rate": 7.906772100568734e-06,
|
|
"loss": 1.4935064315795898,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 1.1183959261616805,
|
|
"grad_norm": 18.204930179540256,
|
|
"learning_rate": 7.903757676528374e-06,
|
|
"loss": 1.5943958759307861,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 1.119032463399109,
|
|
"grad_norm": 28.708012637672688,
|
|
"learning_rate": 7.900741659092641e-06,
|
|
"loss": 1.8090311288833618,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 1.1196690006365373,
|
|
"grad_norm": 8.987710870395777,
|
|
"learning_rate": 7.897724049916534e-06,
|
|
"loss": 1.5836857557296753,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 1.1203055378739657,
|
|
"grad_norm": 20.438928041322313,
|
|
"learning_rate": 7.894704850655919e-06,
|
|
"loss": 2.117549419403076,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 1.120942075111394,
|
|
"grad_norm": 11.837901712992668,
|
|
"learning_rate": 7.891684062967539e-06,
|
|
"loss": 1.4354597330093384,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 1.1215786123488225,
|
|
"grad_norm": 12.643607966368892,
|
|
"learning_rate": 7.88866168850901e-06,
|
|
"loss": 1.7912664413452148,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 1.1222151495862507,
|
|
"grad_norm": 16.56441221996852,
|
|
"learning_rate": 7.885637728938815e-06,
|
|
"loss": 1.8154982328414917,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 1.122851686823679,
|
|
"grad_norm": 16.868727426573127,
|
|
"learning_rate": 7.882612185916308e-06,
|
|
"loss": 1.723301887512207,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 1.1234882240611075,
|
|
"grad_norm": 13.243390871390828,
|
|
"learning_rate": 7.879585061101715e-06,
|
|
"loss": 1.9012724161148071,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 1.124124761298536,
|
|
"grad_norm": 11.83742733644601,
|
|
"learning_rate": 7.876556356156124e-06,
|
|
"loss": 1.3822736740112305,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 1.1247612985359643,
|
|
"grad_norm": 10.513509466461578,
|
|
"learning_rate": 7.873526072741497e-06,
|
|
"loss": 1.6562281847000122,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 1.1253978357733927,
|
|
"grad_norm": 10.696287820390786,
|
|
"learning_rate": 7.870494212520653e-06,
|
|
"loss": 1.3009065389633179,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 1.126034373010821,
|
|
"grad_norm": 9.152413319145252,
|
|
"learning_rate": 7.867460777157289e-06,
|
|
"loss": 1.3122084140777588,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 1.1266709102482495,
|
|
"grad_norm": 13.990709454170132,
|
|
"learning_rate": 7.864425768315953e-06,
|
|
"loss": 1.8093345165252686,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 1.127307447485678,
|
|
"grad_norm": 22.660617128504835,
|
|
"learning_rate": 7.861389187662068e-06,
|
|
"loss": 1.0877732038497925,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 1.1279439847231063,
|
|
"grad_norm": 13.109659843410897,
|
|
"learning_rate": 7.858351036861908e-06,
|
|
"loss": 1.716296911239624,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 1.1285805219605347,
|
|
"grad_norm": 14.317353777136642,
|
|
"learning_rate": 7.85531131758262e-06,
|
|
"loss": 1.273411750793457,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 1.1292170591979631,
|
|
"grad_norm": 8.174385753265293,
|
|
"learning_rate": 7.852270031492201e-06,
|
|
"loss": 1.389054536819458,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 1.1298535964353915,
|
|
"grad_norm": 10.651345319559745,
|
|
"learning_rate": 7.849227180259517e-06,
|
|
"loss": 1.492347240447998,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 1.13049013367282,
|
|
"grad_norm": 10.040317898362389,
|
|
"learning_rate": 7.84618276555429e-06,
|
|
"loss": 1.1550723314285278,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 1.1311266709102483,
|
|
"grad_norm": 7.862519842844843,
|
|
"learning_rate": 7.843136789047097e-06,
|
|
"loss": 1.390535831451416,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 1.1317632081476767,
|
|
"grad_norm": 8.24074987762661,
|
|
"learning_rate": 7.840089252409374e-06,
|
|
"loss": 1.1107431650161743,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 1.1323997453851051,
|
|
"grad_norm": 21.117127745526005,
|
|
"learning_rate": 7.837040157313413e-06,
|
|
"loss": 1.3700048923492432,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 1.1330362826225335,
|
|
"grad_norm": 15.746327248024805,
|
|
"learning_rate": 7.83398950543236e-06,
|
|
"loss": 1.5517867803573608,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 1.1336728198599617,
|
|
"grad_norm": 18.013815785912914,
|
|
"learning_rate": 7.830937298440219e-06,
|
|
"loss": 2.208071708679199,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 1.1343093570973901,
|
|
"grad_norm": 8.300726112769016,
|
|
"learning_rate": 7.827883538011842e-06,
|
|
"loss": 1.2979140281677246,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 1.1349458943348185,
|
|
"grad_norm": 14.200131073264666,
|
|
"learning_rate": 7.82482822582294e-06,
|
|
"loss": 1.2123842239379883,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 1.135582431572247,
|
|
"grad_norm": 15.773698395658762,
|
|
"learning_rate": 7.821771363550067e-06,
|
|
"loss": 1.339749813079834,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 1.1362189688096753,
|
|
"grad_norm": 8.75008900624983,
|
|
"learning_rate": 7.818712952870637e-06,
|
|
"loss": 1.4869803190231323,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 1.1368555060471037,
|
|
"grad_norm": 13.827153229079332,
|
|
"learning_rate": 7.815652995462905e-06,
|
|
"loss": 2.4990031719207764,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 1.1374920432845321,
|
|
"grad_norm": 8.293429813915223,
|
|
"learning_rate": 7.812591493005982e-06,
|
|
"loss": 1.3112232685089111,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 1.1381285805219605,
|
|
"grad_norm": 10.883169553839318,
|
|
"learning_rate": 7.80952844717982e-06,
|
|
"loss": 1.0375416278839111,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 1.138765117759389,
|
|
"grad_norm": 19.383618591487423,
|
|
"learning_rate": 7.806463859665225e-06,
|
|
"loss": 1.0073294639587402,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 1.1394016549968173,
|
|
"grad_norm": 15.8668111613722,
|
|
"learning_rate": 7.803397732143843e-06,
|
|
"loss": 1.5781023502349854,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 1.1400381922342457,
|
|
"grad_norm": 17.26900887878871,
|
|
"learning_rate": 7.80033006629817e-06,
|
|
"loss": 1.7327053546905518,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 1.1406747294716741,
|
|
"grad_norm": 11.9573596402166,
|
|
"learning_rate": 7.79726086381154e-06,
|
|
"loss": 1.2857234477996826,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 1.1413112667091025,
|
|
"grad_norm": 11.302351050801876,
|
|
"learning_rate": 7.794190126368139e-06,
|
|
"loss": 1.0917978286743164,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 1.141947803946531,
|
|
"grad_norm": 12.837413206229778,
|
|
"learning_rate": 7.791117855652985e-06,
|
|
"loss": 1.8788642883300781,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 1.1425843411839594,
|
|
"grad_norm": 13.492431193858089,
|
|
"learning_rate": 7.788044053351943e-06,
|
|
"loss": 1.754347562789917,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 1.1432208784213875,
|
|
"grad_norm": 10.83367514175402,
|
|
"learning_rate": 7.784968721151722e-06,
|
|
"loss": 1.1002882719039917,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 1.143857415658816,
|
|
"grad_norm": 15.818914507619862,
|
|
"learning_rate": 7.781891860739863e-06,
|
|
"loss": 2.2377448081970215,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 1.1444939528962443,
|
|
"grad_norm": 13.614411287281241,
|
|
"learning_rate": 7.77881347380475e-06,
|
|
"loss": 2.431692123413086,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 1.1451304901336727,
|
|
"grad_norm": 25.435200541665722,
|
|
"learning_rate": 7.775733562035605e-06,
|
|
"loss": 2.7545251846313477,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 1.1457670273711011,
|
|
"grad_norm": 11.172948238391447,
|
|
"learning_rate": 7.772652127122482e-06,
|
|
"loss": 1.219679832458496,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 1.1464035646085295,
|
|
"grad_norm": 10.9219009801967,
|
|
"learning_rate": 7.769569170756277e-06,
|
|
"loss": 2.6224284172058105,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 1.147040101845958,
|
|
"grad_norm": 11.780326597680638,
|
|
"learning_rate": 7.766484694628715e-06,
|
|
"loss": 1.0525163412094116,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 1.1476766390833864,
|
|
"grad_norm": 17.373733744790346,
|
|
"learning_rate": 7.763398700432363e-06,
|
|
"loss": 1.9834010601043701,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 1.1483131763208148,
|
|
"grad_norm": 7.610458221030429,
|
|
"learning_rate": 7.760311189860613e-06,
|
|
"loss": 1.5376927852630615,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 1.1489497135582432,
|
|
"grad_norm": 9.622458596879596,
|
|
"learning_rate": 7.757222164607691e-06,
|
|
"loss": 1.308505654335022,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 1.1495862507956716,
|
|
"grad_norm": 15.326202689254016,
|
|
"learning_rate": 7.75413162636866e-06,
|
|
"loss": 1.2876720428466797,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 1.1502227880331,
|
|
"grad_norm": 15.444950525292843,
|
|
"learning_rate": 7.751039576839402e-06,
|
|
"loss": 1.4229402542114258,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 1.1508593252705284,
|
|
"grad_norm": 16.0730420050437,
|
|
"learning_rate": 7.74794601771664e-06,
|
|
"loss": 1.0552520751953125,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 1.1514958625079568,
|
|
"grad_norm": 26.543528058889677,
|
|
"learning_rate": 7.744850950697917e-06,
|
|
"loss": 2.448869228363037,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 1.1521323997453852,
|
|
"grad_norm": 15.795894360343198,
|
|
"learning_rate": 7.741754377481609e-06,
|
|
"loss": 1.8211095333099365,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 1.1527689369828136,
|
|
"grad_norm": 11.273729371681432,
|
|
"learning_rate": 7.738656299766916e-06,
|
|
"loss": 2.2845656871795654,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 1.153405474220242,
|
|
"grad_norm": 11.986781913527139,
|
|
"learning_rate": 7.73555671925386e-06,
|
|
"loss": 1.1179295778274536,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 1.1540420114576704,
|
|
"grad_norm": 14.604113835297118,
|
|
"learning_rate": 7.732455637643297e-06,
|
|
"loss": 1.9219005107879639,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 1.1546785486950988,
|
|
"grad_norm": 26.90837734397635,
|
|
"learning_rate": 7.729353056636898e-06,
|
|
"loss": 1.3155293464660645,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 1.155315085932527,
|
|
"grad_norm": 11.242979431752286,
|
|
"learning_rate": 7.726248977937156e-06,
|
|
"loss": 1.120147943496704,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 1.1559516231699554,
|
|
"grad_norm": 11.931042786205527,
|
|
"learning_rate": 7.723143403247397e-06,
|
|
"loss": 1.7911725044250488,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 1.1565881604073838,
|
|
"grad_norm": 8.92477471372061,
|
|
"learning_rate": 7.720036334271757e-06,
|
|
"loss": 1.445631980895996,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 1.1572246976448122,
|
|
"grad_norm": 11.739799772875195,
|
|
"learning_rate": 7.716927772715196e-06,
|
|
"loss": 1.9247690439224243,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 1.1578612348822406,
|
|
"grad_norm": 11.36316342840103,
|
|
"learning_rate": 7.713817720283491e-06,
|
|
"loss": 1.324549913406372,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 1.158497772119669,
|
|
"grad_norm": 10.467262638791574,
|
|
"learning_rate": 7.710706178683242e-06,
|
|
"loss": 1.5766773223876953,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 1.1591343093570974,
|
|
"grad_norm": 11.381685058632478,
|
|
"learning_rate": 7.70759314962186e-06,
|
|
"loss": 1.0806773900985718,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 1.1597708465945258,
|
|
"grad_norm": 13.481992728762078,
|
|
"learning_rate": 7.704478634807575e-06,
|
|
"loss": 1.5848543643951416,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 1.1604073838319542,
|
|
"grad_norm": 11.769840779789444,
|
|
"learning_rate": 7.701362635949433e-06,
|
|
"loss": 2.3119468688964844,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 1.1610439210693826,
|
|
"grad_norm": 10.926997952681917,
|
|
"learning_rate": 7.698245154757295e-06,
|
|
"loss": 1.5413258075714111,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 1.161680458306811,
|
|
"grad_norm": 19.13593601880582,
|
|
"learning_rate": 7.695126192941833e-06,
|
|
"loss": 1.515957236289978,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 1.1623169955442394,
|
|
"grad_norm": 11.667160824247867,
|
|
"learning_rate": 7.692005752214531e-06,
|
|
"loss": 1.2359853982925415,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 1.1629535327816678,
|
|
"grad_norm": 16.44512782464998,
|
|
"learning_rate": 7.688883834287689e-06,
|
|
"loss": 1.6302152872085571,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 1.1635900700190962,
|
|
"grad_norm": 13.784706775186404,
|
|
"learning_rate": 7.685760440874414e-06,
|
|
"loss": 1.7101160287857056,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 1.1642266072565246,
|
|
"grad_norm": 38.77274421757055,
|
|
"learning_rate": 7.682635573688621e-06,
|
|
"loss": 0.7238671779632568,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 1.1648631444939528,
|
|
"grad_norm": 11.771204327312155,
|
|
"learning_rate": 7.67950923444504e-06,
|
|
"loss": 1.0701311826705933,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 1.1654996817313812,
|
|
"grad_norm": 15.418361855175581,
|
|
"learning_rate": 7.6763814248592e-06,
|
|
"loss": 1.8999749422073364,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 1.1661362189688096,
|
|
"grad_norm": 8.98113563451655,
|
|
"learning_rate": 7.673252146647445e-06,
|
|
"loss": 1.3261454105377197,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 1.166772756206238,
|
|
"grad_norm": 17.45204550530026,
|
|
"learning_rate": 7.67012140152692e-06,
|
|
"loss": 1.6088931560516357,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 1.1674092934436664,
|
|
"grad_norm": 19.327759547982886,
|
|
"learning_rate": 7.666989191215577e-06,
|
|
"loss": 2.3624980449676514,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 1.1680458306810948,
|
|
"grad_norm": 14.35439210001662,
|
|
"learning_rate": 7.663855517432173e-06,
|
|
"loss": 1.3708419799804688,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 1.1686823679185232,
|
|
"grad_norm": 10.933628615499206,
|
|
"learning_rate": 7.660720381896262e-06,
|
|
"loss": 1.6434717178344727,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 1.1693189051559516,
|
|
"grad_norm": 8.645972322880423,
|
|
"learning_rate": 7.657583786328211e-06,
|
|
"loss": 1.540701150894165,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 1.16995544239338,
|
|
"grad_norm": 12.360248198918942,
|
|
"learning_rate": 7.654445732449178e-06,
|
|
"loss": 1.469840168952942,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 1.1705919796308084,
|
|
"grad_norm": 14.17181638673728,
|
|
"learning_rate": 7.651306221981125e-06,
|
|
"loss": 1.4820390939712524,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 1.1712285168682368,
|
|
"grad_norm": 13.19981255660452,
|
|
"learning_rate": 7.648165256646816e-06,
|
|
"loss": 1.763018250465393,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 1.1718650541056652,
|
|
"grad_norm": 19.96206173343936,
|
|
"learning_rate": 7.645022838169805e-06,
|
|
"loss": 1.2311797142028809,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 1.1725015913430936,
|
|
"grad_norm": 10.356742138040985,
|
|
"learning_rate": 7.641878968274455e-06,
|
|
"loss": 1.3975334167480469,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 1.173138128580522,
|
|
"grad_norm": 19.129275715059812,
|
|
"learning_rate": 7.638733648685919e-06,
|
|
"loss": 1.2975008487701416,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 1.1737746658179504,
|
|
"grad_norm": 11.905535019943983,
|
|
"learning_rate": 7.63558688113014e-06,
|
|
"loss": 2.2674379348754883,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 1.1744112030553788,
|
|
"grad_norm": 7.786688666985661,
|
|
"learning_rate": 7.63243866733387e-06,
|
|
"loss": 1.6974135637283325,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 1.1750477402928072,
|
|
"grad_norm": 12.52119689416767,
|
|
"learning_rate": 7.629289009024639e-06,
|
|
"loss": 1.4131574630737305,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 1.1756842775302356,
|
|
"grad_norm": 31.107198017749575,
|
|
"learning_rate": 7.626137907930782e-06,
|
|
"loss": 1.3976935148239136,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 1.1763208147676638,
|
|
"grad_norm": 10.351372658599026,
|
|
"learning_rate": 7.6229853657814166e-06,
|
|
"loss": 1.9820079803466797,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 1.1769573520050922,
|
|
"grad_norm": 10.707854928577772,
|
|
"learning_rate": 7.619831384306457e-06,
|
|
"loss": 1.7318838834762573,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 1.1775938892425206,
|
|
"grad_norm": 9.943147550417313,
|
|
"learning_rate": 7.616675965236606e-06,
|
|
"loss": 1.736013650894165,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 1.178230426479949,
|
|
"grad_norm": 12.006886202792714,
|
|
"learning_rate": 7.613519110303352e-06,
|
|
"loss": 1.7214933633804321,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 1.1788669637173774,
|
|
"grad_norm": 11.816337891212694,
|
|
"learning_rate": 7.610360821238978e-06,
|
|
"loss": 1.8942065238952637,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 1.1795035009548058,
|
|
"grad_norm": 10.752902737121095,
|
|
"learning_rate": 7.607201099776546e-06,
|
|
"loss": 1.3639030456542969,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 1.1801400381922342,
|
|
"grad_norm": 11.19879417637204,
|
|
"learning_rate": 7.604039947649909e-06,
|
|
"loss": 2.0798466205596924,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 1.1807765754296626,
|
|
"grad_norm": 8.51324032774973,
|
|
"learning_rate": 7.600877366593704e-06,
|
|
"loss": 2.0732204914093018,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 1.181413112667091,
|
|
"grad_norm": 12.805733964922954,
|
|
"learning_rate": 7.59771335834335e-06,
|
|
"loss": 1.4387568235397339,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 1.1820496499045194,
|
|
"grad_norm": 12.253794086756363,
|
|
"learning_rate": 7.594547924635056e-06,
|
|
"loss": 1.7229176759719849,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 1.1826861871419478,
|
|
"grad_norm": 17.225392838689363,
|
|
"learning_rate": 7.591381067205805e-06,
|
|
"loss": 1.971238613128662,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 1.1833227243793762,
|
|
"grad_norm": 11.216256085419301,
|
|
"learning_rate": 7.588212787793365e-06,
|
|
"loss": 1.4213664531707764,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 1.1839592616168046,
|
|
"grad_norm": 9.820068560590832,
|
|
"learning_rate": 7.585043088136283e-06,
|
|
"loss": 1.348130464553833,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 1.184595798854233,
|
|
"grad_norm": 15.906925938143027,
|
|
"learning_rate": 7.581871969973889e-06,
|
|
"loss": 1.6581814289093018,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 1.1852323360916615,
|
|
"grad_norm": 16.064981417373456,
|
|
"learning_rate": 7.578699435046286e-06,
|
|
"loss": 2.1315577030181885,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 1.1858688733290896,
|
|
"grad_norm": 9.032266355524344,
|
|
"learning_rate": 7.575525485094359e-06,
|
|
"loss": 2.0244717597961426,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 1.186505410566518,
|
|
"grad_norm": 9.942529375009329,
|
|
"learning_rate": 7.572350121859764e-06,
|
|
"loss": 1.2924580574035645,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 1.1871419478039464,
|
|
"grad_norm": 11.412308784951723,
|
|
"learning_rate": 7.569173347084939e-06,
|
|
"loss": 1.9106409549713135,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 1.1877784850413748,
|
|
"grad_norm": 11.939167987075383,
|
|
"learning_rate": 7.565995162513094e-06,
|
|
"loss": 1.6946731805801392,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 1.1884150222788032,
|
|
"grad_norm": 8.32850550886508,
|
|
"learning_rate": 7.562815569888211e-06,
|
|
"loss": 1.7525533437728882,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 1.1890515595162316,
|
|
"grad_norm": 13.010192788395944,
|
|
"learning_rate": 7.559634570955046e-06,
|
|
"loss": 2.2896313667297363,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 1.18968809675366,
|
|
"grad_norm": 15.350701716931825,
|
|
"learning_rate": 7.556452167459125e-06,
|
|
"loss": 1.0601682662963867,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 1.1903246339910885,
|
|
"grad_norm": 11.38436143738909,
|
|
"learning_rate": 7.5532683611467485e-06,
|
|
"loss": 1.624541997909546,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 1.1909611712285169,
|
|
"grad_norm": 19.38365810517717,
|
|
"learning_rate": 7.550083153764984e-06,
|
|
"loss": 1.4402720928192139,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 1.1915977084659453,
|
|
"grad_norm": 7.9509984357329255,
|
|
"learning_rate": 7.546896547061668e-06,
|
|
"loss": 1.712436556816101,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 1.1922342457033737,
|
|
"grad_norm": 13.016419894010188,
|
|
"learning_rate": 7.543708542785406e-06,
|
|
"loss": 1.6512826681137085,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 1.192870782940802,
|
|
"grad_norm": 16.13392114064012,
|
|
"learning_rate": 7.540519142685569e-06,
|
|
"loss": 1.245017409324646,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 1.1935073201782305,
|
|
"grad_norm": 13.50493833728495,
|
|
"learning_rate": 7.5373283485122954e-06,
|
|
"loss": 1.4898035526275635,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 1.1941438574156589,
|
|
"grad_norm": 20.855488269849733,
|
|
"learning_rate": 7.534136162016485e-06,
|
|
"loss": 3.2437474727630615,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 1.1947803946530873,
|
|
"grad_norm": 11.252751042792164,
|
|
"learning_rate": 7.530942584949807e-06,
|
|
"loss": 1.0865591764450073,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 1.1954169318905157,
|
|
"grad_norm": 13.032275349388597,
|
|
"learning_rate": 7.527747619064691e-06,
|
|
"loss": 1.626455545425415,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 1.196053469127944,
|
|
"grad_norm": 11.598490138086122,
|
|
"learning_rate": 7.524551266114328e-06,
|
|
"loss": 1.7297886610031128,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 1.1966900063653725,
|
|
"grad_norm": 10.894012669547493,
|
|
"learning_rate": 7.521353527852671e-06,
|
|
"loss": 1.2762043476104736,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 1.1973265436028009,
|
|
"grad_norm": 14.494653177716215,
|
|
"learning_rate": 7.518154406034431e-06,
|
|
"loss": 1.3345061540603638,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 1.197963080840229,
|
|
"grad_norm": 11.458183627356052,
|
|
"learning_rate": 7.514953902415083e-06,
|
|
"loss": 1.5401028394699097,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 1.1985996180776575,
|
|
"grad_norm": 6.092832911656193,
|
|
"learning_rate": 7.5117520187508575e-06,
|
|
"loss": 1.7554508447647095,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 1.1992361553150859,
|
|
"grad_norm": 11.981245755129045,
|
|
"learning_rate": 7.508548756798739e-06,
|
|
"loss": 1.2973220348358154,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 1.1998726925525143,
|
|
"grad_norm": 13.132185393244692,
|
|
"learning_rate": 7.505344118316475e-06,
|
|
"loss": 1.973362684249878,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 1.2005092297899427,
|
|
"grad_norm": 48.10276803054581,
|
|
"learning_rate": 7.5021381050625654e-06,
|
|
"loss": 1.0365291833877563,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 1.201145767027371,
|
|
"grad_norm": 16.014538047607346,
|
|
"learning_rate": 7.49893071879626e-06,
|
|
"loss": 1.9781842231750488,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 1.2017823042647995,
|
|
"grad_norm": 11.269003654897768,
|
|
"learning_rate": 7.495721961277569e-06,
|
|
"loss": 1.3941630125045776,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 1.2024188415022279,
|
|
"grad_norm": 11.591777344228708,
|
|
"learning_rate": 7.492511834267251e-06,
|
|
"loss": 1.9853066205978394,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 1.2030553787396563,
|
|
"grad_norm": 12.068802080673693,
|
|
"learning_rate": 7.489300339526817e-06,
|
|
"loss": 1.7171443700790405,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 1.2036919159770847,
|
|
"grad_norm": 22.920721006940184,
|
|
"learning_rate": 7.486087478818531e-06,
|
|
"loss": 2.167785167694092,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 1.204328453214513,
|
|
"grad_norm": 12.688299047469862,
|
|
"learning_rate": 7.4828732539054005e-06,
|
|
"loss": 1.8467473983764648,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 1.2049649904519415,
|
|
"grad_norm": 14.15099463149888,
|
|
"learning_rate": 7.479657666551188e-06,
|
|
"loss": 1.6865915060043335,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 1.20560152768937,
|
|
"grad_norm": 12.599599937093739,
|
|
"learning_rate": 7.4764407185204e-06,
|
|
"loss": 1.6907148361206055,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 1.2062380649267983,
|
|
"grad_norm": 10.347272305572426,
|
|
"learning_rate": 7.473222411578289e-06,
|
|
"loss": 1.4749281406402588,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 1.2068746021642265,
|
|
"grad_norm": 10.44069149604762,
|
|
"learning_rate": 7.47000274749086e-06,
|
|
"loss": 1.3367358446121216,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 1.2075111394016549,
|
|
"grad_norm": 18.336987946116935,
|
|
"learning_rate": 7.466781728024851e-06,
|
|
"loss": 1.7666252851486206,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 1.2081476766390833,
|
|
"grad_norm": 16.32370262672562,
|
|
"learning_rate": 7.463559354947755e-06,
|
|
"loss": 1.9802442789077759,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 1.2087842138765117,
|
|
"grad_norm": 22.8740603627606,
|
|
"learning_rate": 7.4603356300278e-06,
|
|
"loss": 2.067876100540161,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 1.20942075111394,
|
|
"grad_norm": 14.307070850765847,
|
|
"learning_rate": 7.45711055503396e-06,
|
|
"loss": 0.8940960168838501,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 1.2100572883513685,
|
|
"grad_norm": 9.175223965297342,
|
|
"learning_rate": 7.453884131735949e-06,
|
|
"loss": 1.8019193410873413,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 1.210693825588797,
|
|
"grad_norm": 15.949480275539605,
|
|
"learning_rate": 7.4506563619042205e-06,
|
|
"loss": 1.8527977466583252,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 1.2113303628262253,
|
|
"grad_norm": 9.419830167238736,
|
|
"learning_rate": 7.447427247309966e-06,
|
|
"loss": 1.4505186080932617,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 1.2119669000636537,
|
|
"grad_norm": 10.233950621374445,
|
|
"learning_rate": 7.444196789725117e-06,
|
|
"loss": 1.766714096069336,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 1.212603437301082,
|
|
"grad_norm": 8.783456450910034,
|
|
"learning_rate": 7.440964990922338e-06,
|
|
"loss": 1.0600299835205078,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 1.2132399745385105,
|
|
"grad_norm": 11.519901378517396,
|
|
"learning_rate": 7.437731852675036e-06,
|
|
"loss": 1.3673579692840576,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 1.213876511775939,
|
|
"grad_norm": 12.291262365507965,
|
|
"learning_rate": 7.434497376757347e-06,
|
|
"loss": 1.8734787702560425,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 1.2145130490133673,
|
|
"grad_norm": 10.393224808246323,
|
|
"learning_rate": 7.431261564944145e-06,
|
|
"loss": 0.9546700716018677,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 1.2151495862507957,
|
|
"grad_norm": 14.151477044961855,
|
|
"learning_rate": 7.428024419011033e-06,
|
|
"loss": 1.3177452087402344,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 1.2157861234882241,
|
|
"grad_norm": 7.269007351633296,
|
|
"learning_rate": 7.4247859407343495e-06,
|
|
"loss": 1.0580735206604004,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 1.2164226607256525,
|
|
"grad_norm": 8.089654185837398,
|
|
"learning_rate": 7.421546131891164e-06,
|
|
"loss": 1.882283329963684,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 1.217059197963081,
|
|
"grad_norm": 13.459427281395609,
|
|
"learning_rate": 7.418304994259273e-06,
|
|
"loss": 2.182218074798584,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 1.2176957352005093,
|
|
"grad_norm": 12.694279765624392,
|
|
"learning_rate": 7.415062529617208e-06,
|
|
"loss": 1.1375731229782104,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 1.2183322724379377,
|
|
"grad_norm": 10.727215231250955,
|
|
"learning_rate": 7.411818739744221e-06,
|
|
"loss": 1.3777339458465576,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 1.218968809675366,
|
|
"grad_norm": 9.732416797658074,
|
|
"learning_rate": 7.408573626420295e-06,
|
|
"loss": 1.4339038133621216,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 1.2196053469127943,
|
|
"grad_norm": 10.419394245073262,
|
|
"learning_rate": 7.405327191426142e-06,
|
|
"loss": 1.509937047958374,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 1.2202418841502227,
|
|
"grad_norm": 10.317576358758561,
|
|
"learning_rate": 7.402079436543195e-06,
|
|
"loss": 1.7624815702438354,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 1.2208784213876511,
|
|
"grad_norm": 12.729222656689798,
|
|
"learning_rate": 7.398830363553615e-06,
|
|
"loss": 1.4026525020599365,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 1.2215149586250795,
|
|
"grad_norm": 14.685361883429874,
|
|
"learning_rate": 7.3955799742402825e-06,
|
|
"loss": 1.026107907295227,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 1.222151495862508,
|
|
"grad_norm": 19.168456503917845,
|
|
"learning_rate": 7.392328270386801e-06,
|
|
"loss": 1.0268229246139526,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 1.2227880330999363,
|
|
"grad_norm": 9.54643154777968,
|
|
"learning_rate": 7.3890752537774975e-06,
|
|
"loss": 1.7518980503082275,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 1.2234245703373647,
|
|
"grad_norm": 10.395051446444308,
|
|
"learning_rate": 7.385820926197419e-06,
|
|
"loss": 1.529889464378357,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 1.2240611075747931,
|
|
"grad_norm": 18.2093731668895,
|
|
"learning_rate": 7.382565289432331e-06,
|
|
"loss": 1.613027572631836,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 1.2246976448122215,
|
|
"grad_norm": 23.82887472108906,
|
|
"learning_rate": 7.379308345268716e-06,
|
|
"loss": 1.5242559909820557,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 1.22533418204965,
|
|
"grad_norm": 13.673385908421059,
|
|
"learning_rate": 7.3760500954937765e-06,
|
|
"loss": 1.6235510110855103,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 1.2259707192870783,
|
|
"grad_norm": 9.144281260916904,
|
|
"learning_rate": 7.372790541895429e-06,
|
|
"loss": 1.1471562385559082,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 1.2266072565245068,
|
|
"grad_norm": 15.216430272604757,
|
|
"learning_rate": 7.369529686262309e-06,
|
|
"loss": 1.7793552875518799,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 1.2272437937619352,
|
|
"grad_norm": 8.894150277277527,
|
|
"learning_rate": 7.3662675303837625e-06,
|
|
"loss": 0.7938424944877625,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 1.2278803309993636,
|
|
"grad_norm": 15.264368220129498,
|
|
"learning_rate": 7.3630040760498526e-06,
|
|
"loss": 1.5442453622817993,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 1.2285168682367917,
|
|
"grad_norm": 11.649599369756318,
|
|
"learning_rate": 7.359739325051351e-06,
|
|
"loss": 1.6375675201416016,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 1.2291534054742201,
|
|
"grad_norm": 13.56701183347168,
|
|
"learning_rate": 7.356473279179743e-06,
|
|
"loss": 1.2604639530181885,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 1.2297899427116485,
|
|
"grad_norm": 12.737330913380749,
|
|
"learning_rate": 7.353205940227225e-06,
|
|
"loss": 1.1772258281707764,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 1.230426479949077,
|
|
"grad_norm": 8.291940590066453,
|
|
"learning_rate": 7.349937309986703e-06,
|
|
"loss": 1.5388436317443848,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 1.2310630171865053,
|
|
"grad_norm": 13.070371482826912,
|
|
"learning_rate": 7.3466673902517915e-06,
|
|
"loss": 1.3742997646331787,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 1.2316995544239338,
|
|
"grad_norm": 12.425396825123366,
|
|
"learning_rate": 7.34339618281681e-06,
|
|
"loss": 0.957462728023529,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 1.2323360916613622,
|
|
"grad_norm": 13.523298976937086,
|
|
"learning_rate": 7.340123689476788e-06,
|
|
"loss": 1.2024550437927246,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 1.2329726288987906,
|
|
"grad_norm": 11.391117925680897,
|
|
"learning_rate": 7.3368499120274595e-06,
|
|
"loss": 1.7216256856918335,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 1.233609166136219,
|
|
"grad_norm": 8.126973409603934,
|
|
"learning_rate": 7.333574852265261e-06,
|
|
"loss": 1.8681612014770508,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 1.2342457033736474,
|
|
"grad_norm": 9.012360390891047,
|
|
"learning_rate": 7.330298511987337e-06,
|
|
"loss": 1.4095933437347412,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 1.2348822406110758,
|
|
"grad_norm": 18.81487800771032,
|
|
"learning_rate": 7.327020892991531e-06,
|
|
"loss": 1.675499677658081,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 1.2355187778485042,
|
|
"grad_norm": 8.353732734462897,
|
|
"learning_rate": 7.32374199707639e-06,
|
|
"loss": 1.9631736278533936,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 1.2361553150859326,
|
|
"grad_norm": 11.817769020339089,
|
|
"learning_rate": 7.3204618260411606e-06,
|
|
"loss": 1.558953046798706,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 1.236791852323361,
|
|
"grad_norm": 14.333347533600476,
|
|
"learning_rate": 7.317180381685789e-06,
|
|
"loss": 1.446763515472412,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 1.2374283895607894,
|
|
"grad_norm": 11.845641737043543,
|
|
"learning_rate": 7.313897665810923e-06,
|
|
"loss": 1.7084661722183228,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 1.2380649267982178,
|
|
"grad_norm": 18.63755781167361,
|
|
"learning_rate": 7.310613680217901e-06,
|
|
"loss": 1.6522732973098755,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 1.2387014640356462,
|
|
"grad_norm": 6.604413600891718,
|
|
"learning_rate": 7.30732842670877e-06,
|
|
"loss": 1.3374499082565308,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 1.2393380012730746,
|
|
"grad_norm": 9.848894297617093,
|
|
"learning_rate": 7.304041907086262e-06,
|
|
"loss": 2.0001447200775146,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 1.239974538510503,
|
|
"grad_norm": 14.55893211755636,
|
|
"learning_rate": 7.300754123153806e-06,
|
|
"loss": 1.9936014413833618,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 1.2406110757479312,
|
|
"grad_norm": 15.810108156059623,
|
|
"learning_rate": 7.297465076715528e-06,
|
|
"loss": 2.4722843170166016,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 1.2412476129853596,
|
|
"grad_norm": 7.731696396145016,
|
|
"learning_rate": 7.294174769576243e-06,
|
|
"loss": 0.870339035987854,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 1.241884150222788,
|
|
"grad_norm": 16.16220622173208,
|
|
"learning_rate": 7.290883203541464e-06,
|
|
"loss": 1.5957900285720825,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 1.2425206874602164,
|
|
"grad_norm": 9.38622954000017,
|
|
"learning_rate": 7.287590380417389e-06,
|
|
"loss": 2.2406721115112305,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 1.2431572246976448,
|
|
"grad_norm": 11.29560243287352,
|
|
"learning_rate": 7.284296302010905e-06,
|
|
"loss": 1.4800920486450195,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 1.2437937619350732,
|
|
"grad_norm": 8.933884439359806,
|
|
"learning_rate": 7.281000970129593e-06,
|
|
"loss": 1.5932163000106812,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 1.2444302991725016,
|
|
"grad_norm": 12.318508004136184,
|
|
"learning_rate": 7.277704386581716e-06,
|
|
"loss": 1.2271223068237305,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 1.24506683640993,
|
|
"grad_norm": 12.675989142462607,
|
|
"learning_rate": 7.274406553176232e-06,
|
|
"loss": 1.360648512840271,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 1.2457033736473584,
|
|
"grad_norm": 15.242853964555625,
|
|
"learning_rate": 7.271107471722776e-06,
|
|
"loss": 1.1471834182739258,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 1.2463399108847868,
|
|
"grad_norm": 17.277403268443276,
|
|
"learning_rate": 7.267807144031671e-06,
|
|
"loss": 1.5737574100494385,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 1.2469764481222152,
|
|
"grad_norm": 15.151191106733325,
|
|
"learning_rate": 7.264505571913927e-06,
|
|
"loss": 1.2921802997589111,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 1.2476129853596436,
|
|
"grad_norm": 7.124277759919316,
|
|
"learning_rate": 7.2612027571812335e-06,
|
|
"loss": 1.5414897203445435,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 1.248249522597072,
|
|
"grad_norm": 7.852127681955942,
|
|
"learning_rate": 7.257898701645962e-06,
|
|
"loss": 1.8702853918075562,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 1.2488860598345004,
|
|
"grad_norm": 9.455603758918162,
|
|
"learning_rate": 7.2545934071211675e-06,
|
|
"loss": 1.3539278507232666,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 1.2495225970719286,
|
|
"grad_norm": 14.359858074624915,
|
|
"learning_rate": 7.25128687542058e-06,
|
|
"loss": 0.6792780160903931,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 1.250159134309357,
|
|
"grad_norm": 8.643654455985653,
|
|
"learning_rate": 7.247979108358615e-06,
|
|
"loss": 1.5339438915252686,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 1.2507956715467854,
|
|
"grad_norm": 10.896720766576127,
|
|
"learning_rate": 7.244670107750358e-06,
|
|
"loss": 1.652394413948059,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 1.2514322087842138,
|
|
"grad_norm": 13.721184303942685,
|
|
"learning_rate": 7.2413598754115785e-06,
|
|
"loss": 1.497282862663269,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 1.2520687460216422,
|
|
"grad_norm": 9.695875545347585,
|
|
"learning_rate": 7.238048413158718e-06,
|
|
"loss": 1.332318663597107,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 1.2527052832590706,
|
|
"grad_norm": 8.380761797235664,
|
|
"learning_rate": 7.234735722808895e-06,
|
|
"loss": 1.8001642227172852,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 1.253341820496499,
|
|
"grad_norm": 9.735036157652448,
|
|
"learning_rate": 7.231421806179899e-06,
|
|
"loss": 1.1970624923706055,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 1.2539783577339274,
|
|
"grad_norm": 13.537763020398728,
|
|
"learning_rate": 7.228106665090196e-06,
|
|
"loss": 1.8446029424667358,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 1.2546148949713558,
|
|
"grad_norm": 18.120197954506995,
|
|
"learning_rate": 7.22479030135892e-06,
|
|
"loss": 1.7946934700012207,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 1.2552514322087842,
|
|
"grad_norm": 10.984008655652975,
|
|
"learning_rate": 7.221472716805881e-06,
|
|
"loss": 2.002692461013794,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 1.2558879694462126,
|
|
"grad_norm": 14.064546620525391,
|
|
"learning_rate": 7.218153913251553e-06,
|
|
"loss": 1.6635444164276123,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 1.256524506683641,
|
|
"grad_norm": 11.49170384270617,
|
|
"learning_rate": 7.214833892517084e-06,
|
|
"loss": 2.171565294265747,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 1.2571610439210694,
|
|
"grad_norm": 12.582796268957058,
|
|
"learning_rate": 7.211512656424287e-06,
|
|
"loss": 1.2512736320495605,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 1.2577975811584978,
|
|
"grad_norm": 10.677004325528,
|
|
"learning_rate": 7.208190206795641e-06,
|
|
"loss": 1.4064788818359375,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 1.2584341183959262,
|
|
"grad_norm": 10.900151676996787,
|
|
"learning_rate": 7.2048665454542954e-06,
|
|
"loss": 1.9992822408676147,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 1.2590706556333546,
|
|
"grad_norm": 17.393340138680646,
|
|
"learning_rate": 7.2015416742240595e-06,
|
|
"loss": 3.2122464179992676,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 1.259707192870783,
|
|
"grad_norm": 15.261897561606352,
|
|
"learning_rate": 7.19821559492941e-06,
|
|
"loss": 2.054863214492798,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 1.2603437301082114,
|
|
"grad_norm": 15.896242229019826,
|
|
"learning_rate": 7.194888309395486e-06,
|
|
"loss": 1.4458142518997192,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 1.2609802673456398,
|
|
"grad_norm": 11.826879293543872,
|
|
"learning_rate": 7.191559819448086e-06,
|
|
"loss": 1.6558723449707031,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 1.2616168045830682,
|
|
"grad_norm": 12.788160019710507,
|
|
"learning_rate": 7.188230126913671e-06,
|
|
"loss": 1.4784185886383057,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 1.2622533418204964,
|
|
"grad_norm": 16.074729863093676,
|
|
"learning_rate": 7.184899233619362e-06,
|
|
"loss": 1.4221988916397095,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 1.2628898790579248,
|
|
"grad_norm": 8.801890393952833,
|
|
"learning_rate": 7.181567141392941e-06,
|
|
"loss": 1.517238736152649,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 1.2635264162953532,
|
|
"grad_norm": 10.54471122109457,
|
|
"learning_rate": 7.178233852062844e-06,
|
|
"loss": 1.365362524986267,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 1.2641629535327816,
|
|
"grad_norm": 11.962413012543848,
|
|
"learning_rate": 7.174899367458166e-06,
|
|
"loss": 1.3890221118927002,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 1.26479949077021,
|
|
"grad_norm": 25.494484636850306,
|
|
"learning_rate": 7.171563689408657e-06,
|
|
"loss": 1.5127917528152466,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 1.2654360280076384,
|
|
"grad_norm": 12.67963301824009,
|
|
"learning_rate": 7.168226819744723e-06,
|
|
"loss": 1.737844467163086,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 1.2660725652450668,
|
|
"grad_norm": 8.567371030578066,
|
|
"learning_rate": 7.1648887602974234e-06,
|
|
"loss": 1.6531749963760376,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 1.2667091024824952,
|
|
"grad_norm": 8.328964482464091,
|
|
"learning_rate": 7.161549512898472e-06,
|
|
"loss": 2.115635395050049,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 1.2673456397199236,
|
|
"grad_norm": 9.676401838353584,
|
|
"learning_rate": 7.1582090793802305e-06,
|
|
"loss": 1.2644509077072144,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 1.267982176957352,
|
|
"grad_norm": 12.357088545404718,
|
|
"learning_rate": 7.154867461575715e-06,
|
|
"loss": 1.1090812683105469,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 1.2686187141947805,
|
|
"grad_norm": 11.789160247345793,
|
|
"learning_rate": 7.151524661318591e-06,
|
|
"loss": 1.8918412923812866,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 1.2692552514322089,
|
|
"grad_norm": 12.369888357071828,
|
|
"learning_rate": 7.14818068044317e-06,
|
|
"loss": 1.4245556592941284,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 1.2698917886696373,
|
|
"grad_norm": 9.486321675919198,
|
|
"learning_rate": 7.144835520784416e-06,
|
|
"loss": 1.4290460348129272,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 1.2705283259070654,
|
|
"grad_norm": 7.9313436358179095,
|
|
"learning_rate": 7.141489184177934e-06,
|
|
"loss": 1.4305641651153564,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 1.2711648631444938,
|
|
"grad_norm": 15.104715008058536,
|
|
"learning_rate": 7.138141672459984e-06,
|
|
"loss": 1.949066400527954,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 1.2718014003819222,
|
|
"grad_norm": 15.101458585570494,
|
|
"learning_rate": 7.13479298746746e-06,
|
|
"loss": 1.6702244281768799,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 1.2724379376193506,
|
|
"grad_norm": 21.074675537203603,
|
|
"learning_rate": 7.131443131037906e-06,
|
|
"loss": 0.8868348598480225,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 1.273074474856779,
|
|
"grad_norm": 10.140580524034965,
|
|
"learning_rate": 7.128092105009509e-06,
|
|
"loss": 1.5796661376953125,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 1.2737110120942075,
|
|
"grad_norm": 12.149644745921481,
|
|
"learning_rate": 7.124739911221094e-06,
|
|
"loss": 1.8856110572814941,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 1.2743475493316359,
|
|
"grad_norm": 17.78423658720528,
|
|
"learning_rate": 7.1213865515121315e-06,
|
|
"loss": 1.5670487880706787,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 1.2749840865690643,
|
|
"grad_norm": 13.651830225927696,
|
|
"learning_rate": 7.118032027722729e-06,
|
|
"loss": 1.7737964391708374,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 1.2756206238064927,
|
|
"grad_norm": 9.323624641875844,
|
|
"learning_rate": 7.114676341693633e-06,
|
|
"loss": 1.735838770866394,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 1.276257161043921,
|
|
"grad_norm": 15.74868714614688,
|
|
"learning_rate": 7.111319495266228e-06,
|
|
"loss": 1.7061078548431396,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 1.2768936982813495,
|
|
"grad_norm": 14.577106488605871,
|
|
"learning_rate": 7.107961490282535e-06,
|
|
"loss": 1.8450149297714233,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 1.2775302355187779,
|
|
"grad_norm": 11.567185054926867,
|
|
"learning_rate": 7.104602328585213e-06,
|
|
"loss": 1.1935040950775146,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 1.2781667727562063,
|
|
"grad_norm": 29.766723610848548,
|
|
"learning_rate": 7.101242012017551e-06,
|
|
"loss": 3.633208751678467,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 1.2788033099936347,
|
|
"grad_norm": 12.374153481618576,
|
|
"learning_rate": 7.097880542423477e-06,
|
|
"loss": 1.8976576328277588,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 1.279439847231063,
|
|
"grad_norm": 9.798275521758873,
|
|
"learning_rate": 7.094517921647547e-06,
|
|
"loss": 1.4608005285263062,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 1.2800763844684915,
|
|
"grad_norm": 11.77683814782217,
|
|
"learning_rate": 7.091154151534953e-06,
|
|
"loss": 1.8218762874603271,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 1.2807129217059199,
|
|
"grad_norm": 7.679624917182098,
|
|
"learning_rate": 7.087789233931514e-06,
|
|
"loss": 1.4904704093933105,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 1.2813494589433483,
|
|
"grad_norm": 9.850412372734413,
|
|
"learning_rate": 7.08442317068368e-06,
|
|
"loss": 1.1225043535232544,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 1.2819859961807767,
|
|
"grad_norm": 22.3080237103038,
|
|
"learning_rate": 7.081055963638533e-06,
|
|
"loss": 1.6578782796859741,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 1.282622533418205,
|
|
"grad_norm": 23.354108651261082,
|
|
"learning_rate": 7.077687614643778e-06,
|
|
"loss": 1.5169906616210938,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 1.2832590706556333,
|
|
"grad_norm": 11.337491078888114,
|
|
"learning_rate": 7.074318125547745e-06,
|
|
"loss": 2.2514548301696777,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 1.2838956078930617,
|
|
"grad_norm": 11.375855346550463,
|
|
"learning_rate": 7.070947498199396e-06,
|
|
"loss": 1.5008771419525146,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 1.28453214513049,
|
|
"grad_norm": 9.429966713722655,
|
|
"learning_rate": 7.067575734448315e-06,
|
|
"loss": 1.3150955438613892,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 1.2851686823679185,
|
|
"grad_norm": 14.907133668929445,
|
|
"learning_rate": 7.064202836144707e-06,
|
|
"loss": 1.8760895729064941,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 1.2858052196053469,
|
|
"grad_norm": 9.708287072038424,
|
|
"learning_rate": 7.060828805139402e-06,
|
|
"loss": 1.8177990913391113,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 1.2864417568427753,
|
|
"grad_norm": 17.331700123350217,
|
|
"learning_rate": 7.057453643283851e-06,
|
|
"loss": 1.7386019229888916,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 1.2870782940802037,
|
|
"grad_norm": 11.728116844052103,
|
|
"learning_rate": 7.0540773524301275e-06,
|
|
"loss": 1.9298803806304932,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 1.287714831317632,
|
|
"grad_norm": 10.258038328070816,
|
|
"learning_rate": 7.0506999344309205e-06,
|
|
"loss": 1.8520281314849854,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 1.2883513685550605,
|
|
"grad_norm": 13.236169943162205,
|
|
"learning_rate": 7.04732139113954e-06,
|
|
"loss": 1.4344263076782227,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 1.288987905792489,
|
|
"grad_norm": 27.691402197217176,
|
|
"learning_rate": 7.043941724409915e-06,
|
|
"loss": 1.9201428890228271,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 1.2896244430299173,
|
|
"grad_norm": 9.082853874702812,
|
|
"learning_rate": 7.040560936096588e-06,
|
|
"loss": 1.872815728187561,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 1.2902609802673457,
|
|
"grad_norm": 9.519775269621894,
|
|
"learning_rate": 7.037179028054716e-06,
|
|
"loss": 1.6231926679611206,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 1.290897517504774,
|
|
"grad_norm": 12.265492071269582,
|
|
"learning_rate": 7.0337960021400755e-06,
|
|
"loss": 1.675493836402893,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 1.2915340547422023,
|
|
"grad_norm": 8.0998711496812,
|
|
"learning_rate": 7.030411860209052e-06,
|
|
"loss": 1.3420138359069824,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 1.2921705919796307,
|
|
"grad_norm": 14.83520231632638,
|
|
"learning_rate": 7.027026604118645e-06,
|
|
"loss": 1.67080557346344,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 1.292807129217059,
|
|
"grad_norm": 8.11513909549515,
|
|
"learning_rate": 7.023640235726467e-06,
|
|
"loss": 1.672710657119751,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 1.2934436664544875,
|
|
"grad_norm": 15.093764989824638,
|
|
"learning_rate": 7.020252756890736e-06,
|
|
"loss": 2.1060738563537598,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 1.294080203691916,
|
|
"grad_norm": 15.423871848235027,
|
|
"learning_rate": 7.016864169470284e-06,
|
|
"loss": 1.3418937921524048,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 1.2947167409293443,
|
|
"grad_norm": 12.45850837138449,
|
|
"learning_rate": 7.01347447532455e-06,
|
|
"loss": 1.6437814235687256,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 1.2953532781667727,
|
|
"grad_norm": 9.53408634455495,
|
|
"learning_rate": 7.01008367631358e-06,
|
|
"loss": 1.5858882665634155,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 1.295989815404201,
|
|
"grad_norm": 15.835859764318025,
|
|
"learning_rate": 7.006691774298025e-06,
|
|
"loss": 1.6233675479888916,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 1.2966263526416295,
|
|
"grad_norm": 13.247317583251935,
|
|
"learning_rate": 7.003298771139144e-06,
|
|
"loss": 2.148451328277588,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 1.297262889879058,
|
|
"grad_norm": 9.240344580069769,
|
|
"learning_rate": 6.999904668698799e-06,
|
|
"loss": 1.329669713973999,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 1.2978994271164863,
|
|
"grad_norm": 11.93373973334314,
|
|
"learning_rate": 6.996509468839453e-06,
|
|
"loss": 1.0661579370498657,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 1.2985359643539147,
|
|
"grad_norm": 11.492354390856546,
|
|
"learning_rate": 6.9931131734241766e-06,
|
|
"loss": 1.4517229795455933,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 1.2991725015913431,
|
|
"grad_norm": 8.709574489210635,
|
|
"learning_rate": 6.989715784316635e-06,
|
|
"loss": 1.35374116897583,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 1.2998090388287715,
|
|
"grad_norm": 15.839168012300563,
|
|
"learning_rate": 6.986317303381098e-06,
|
|
"loss": 1.1875379085540771,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 1.3004455760662,
|
|
"grad_norm": 12.002009511836185,
|
|
"learning_rate": 6.982917732482434e-06,
|
|
"loss": 1.3147296905517578,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 1.3010821133036283,
|
|
"grad_norm": 12.279551382216725,
|
|
"learning_rate": 6.979517073486107e-06,
|
|
"loss": 1.1447714567184448,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 1.3017186505410567,
|
|
"grad_norm": 7.876509462548768,
|
|
"learning_rate": 6.9761153282581804e-06,
|
|
"loss": 1.0903241634368896,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 1.3023551877784851,
|
|
"grad_norm": 11.317979083182502,
|
|
"learning_rate": 6.972712498665315e-06,
|
|
"loss": 1.6404948234558105,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 1.3029917250159135,
|
|
"grad_norm": 10.268270285673013,
|
|
"learning_rate": 6.969308586574763e-06,
|
|
"loss": 1.132559061050415,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 1.303628262253342,
|
|
"grad_norm": 10.786154234965135,
|
|
"learning_rate": 6.965903593854372e-06,
|
|
"loss": 1.2636125087738037,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 1.3042647994907703,
|
|
"grad_norm": 8.923504311774622,
|
|
"learning_rate": 6.962497522372584e-06,
|
|
"loss": 1.4736952781677246,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 1.3049013367281985,
|
|
"grad_norm": 8.6728743118788,
|
|
"learning_rate": 6.959090373998431e-06,
|
|
"loss": 1.9195170402526855,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 1.305537873965627,
|
|
"grad_norm": 7.665220601241584,
|
|
"learning_rate": 6.955682150601538e-06,
|
|
"loss": 1.4942491054534912,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 1.3061744112030553,
|
|
"grad_norm": 13.84671648780625,
|
|
"learning_rate": 6.9522728540521166e-06,
|
|
"loss": 2.2120413780212402,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 1.3068109484404837,
|
|
"grad_norm": 10.970140333694308,
|
|
"learning_rate": 6.9488624862209716e-06,
|
|
"loss": 1.000679850578308,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 1.3074474856779121,
|
|
"grad_norm": 14.96466821479489,
|
|
"learning_rate": 6.945451048979492e-06,
|
|
"loss": 0.9493977427482605,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 1.3080840229153405,
|
|
"grad_norm": 10.075101987656128,
|
|
"learning_rate": 6.9420385441996565e-06,
|
|
"loss": 1.9519625902175903,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 1.308720560152769,
|
|
"grad_norm": 10.501008247507887,
|
|
"learning_rate": 6.938624973754027e-06,
|
|
"loss": 0.9726603031158447,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 1.3093570973901973,
|
|
"grad_norm": 22.55639172223488,
|
|
"learning_rate": 6.93521033951575e-06,
|
|
"loss": 1.9709603786468506,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 1.3099936346276257,
|
|
"grad_norm": 9.24041575267706,
|
|
"learning_rate": 6.93179464335856e-06,
|
|
"loss": 1.548864722251892,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 1.3106301718650541,
|
|
"grad_norm": 10.444488265025456,
|
|
"learning_rate": 6.92837788715677e-06,
|
|
"loss": 1.6025577783584595,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 1.3112667091024826,
|
|
"grad_norm": 12.361006266083994,
|
|
"learning_rate": 6.924960072785274e-06,
|
|
"loss": 0.9072170257568359,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 1.311903246339911,
|
|
"grad_norm": 10.824248532783093,
|
|
"learning_rate": 6.921541202119552e-06,
|
|
"loss": 1.682476282119751,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 1.3125397835773391,
|
|
"grad_norm": 9.488210016660991,
|
|
"learning_rate": 6.918121277035657e-06,
|
|
"loss": 1.2035472393035889,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 1.3131763208147675,
|
|
"grad_norm": 8.83075244244878,
|
|
"learning_rate": 6.914700299410226e-06,
|
|
"loss": 1.960437536239624,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 1.313812858052196,
|
|
"grad_norm": 9.35789480959845,
|
|
"learning_rate": 6.9112782711204725e-06,
|
|
"loss": 1.5221984386444092,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 1.3144493952896243,
|
|
"grad_norm": 11.0167554457926,
|
|
"learning_rate": 6.9078551940441815e-06,
|
|
"loss": 1.681092619895935,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 1.3150859325270527,
|
|
"grad_norm": 8.03885310323271,
|
|
"learning_rate": 6.904431070059723e-06,
|
|
"loss": 1.3990423679351807,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 1.3157224697644812,
|
|
"grad_norm": 10.290753111505369,
|
|
"learning_rate": 6.901005901046031e-06,
|
|
"loss": 2.1758856773376465,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 1.3163590070019096,
|
|
"grad_norm": 14.92561642195631,
|
|
"learning_rate": 6.897579688882618e-06,
|
|
"loss": 1.563002586364746,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 1.316995544239338,
|
|
"grad_norm": 19.336800974196322,
|
|
"learning_rate": 6.894152435449572e-06,
|
|
"loss": 1.6746331453323364,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 1.3176320814767664,
|
|
"grad_norm": 12.397856385646229,
|
|
"learning_rate": 6.890724142627548e-06,
|
|
"loss": 1.199541687965393,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 1.3182686187141948,
|
|
"grad_norm": 11.45353796465258,
|
|
"learning_rate": 6.887294812297771e-06,
|
|
"loss": 1.6930968761444092,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 1.3189051559516232,
|
|
"grad_norm": 18.97514851516703,
|
|
"learning_rate": 6.883864446342036e-06,
|
|
"loss": 1.544563889503479,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 1.3195416931890516,
|
|
"grad_norm": 11.764993353068165,
|
|
"learning_rate": 6.880433046642712e-06,
|
|
"loss": 1.5668516159057617,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 1.32017823042648,
|
|
"grad_norm": 18.634138519650218,
|
|
"learning_rate": 6.877000615082726e-06,
|
|
"loss": 1.6925992965698242,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 1.3208147676639084,
|
|
"grad_norm": 10.291932073028669,
|
|
"learning_rate": 6.873567153545576e-06,
|
|
"loss": 1.4353834390640259,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 1.3214513049013368,
|
|
"grad_norm": 13.752283678046844,
|
|
"learning_rate": 6.870132663915328e-06,
|
|
"loss": 1.4297081232070923,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 1.3220878421387652,
|
|
"grad_norm": 10.23322896339913,
|
|
"learning_rate": 6.866697148076604e-06,
|
|
"loss": 1.1460442543029785,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 1.3227243793761936,
|
|
"grad_norm": 9.359470853685485,
|
|
"learning_rate": 6.863260607914597e-06,
|
|
"loss": 1.9953954219818115,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 1.323360916613622,
|
|
"grad_norm": 12.755737197521405,
|
|
"learning_rate": 6.859823045315059e-06,
|
|
"loss": 1.7688608169555664,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 1.3239974538510504,
|
|
"grad_norm": 9.255344407283879,
|
|
"learning_rate": 6.856384462164304e-06,
|
|
"loss": 1.6144315004348755,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 1.3246339910884788,
|
|
"grad_norm": 9.951025825254108,
|
|
"learning_rate": 6.8529448603492035e-06,
|
|
"loss": 1.6852055788040161,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 1.3252705283259072,
|
|
"grad_norm": 10.053643668777605,
|
|
"learning_rate": 6.84950424175719e-06,
|
|
"loss": 1.4977197647094727,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 1.3259070655633354,
|
|
"grad_norm": 14.38622341198003,
|
|
"learning_rate": 6.846062608276254e-06,
|
|
"loss": 1.5500648021697998,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 1.3265436028007638,
|
|
"grad_norm": 11.612784911737373,
|
|
"learning_rate": 6.842619961794943e-06,
|
|
"loss": 1.296177864074707,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 1.3271801400381922,
|
|
"grad_norm": 10.073476173831192,
|
|
"learning_rate": 6.839176304202357e-06,
|
|
"loss": 0.49733754992485046,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 1.3278166772756206,
|
|
"grad_norm": 8.403967414605898,
|
|
"learning_rate": 6.835731637388158e-06,
|
|
"loss": 1.5261521339416504,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 1.328453214513049,
|
|
"grad_norm": 14.621238975703363,
|
|
"learning_rate": 6.8322859632425545e-06,
|
|
"loss": 1.2459487915039062,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 1.3290897517504774,
|
|
"grad_norm": 9.749969165926782,
|
|
"learning_rate": 6.828839283656311e-06,
|
|
"loss": 1.4671571254730225,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 1.3297262889879058,
|
|
"grad_norm": 13.994804517959674,
|
|
"learning_rate": 6.825391600520746e-06,
|
|
"loss": 1.8795738220214844,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 1.3303628262253342,
|
|
"grad_norm": 15.225403118861353,
|
|
"learning_rate": 6.821942915727721e-06,
|
|
"loss": 1.9147197008132935,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 1.3309993634627626,
|
|
"grad_norm": 12.529091804970822,
|
|
"learning_rate": 6.818493231169659e-06,
|
|
"loss": 1.5957629680633545,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 1.331635900700191,
|
|
"grad_norm": 21.73659702613509,
|
|
"learning_rate": 6.81504254873952e-06,
|
|
"loss": 1.6237444877624512,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 1.3322724379376194,
|
|
"grad_norm": 16.648087289267234,
|
|
"learning_rate": 6.8115908703308175e-06,
|
|
"loss": 2.1958940029144287,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 1.3329089751750478,
|
|
"grad_norm": 14.81116786339602,
|
|
"learning_rate": 6.808138197837613e-06,
|
|
"loss": 2.1454992294311523,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 1.3335455124124762,
|
|
"grad_norm": 11.479577307549393,
|
|
"learning_rate": 6.804684533154506e-06,
|
|
"loss": 1.1923106908798218,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 1.3341820496499044,
|
|
"grad_norm": 28.788853303774175,
|
|
"learning_rate": 6.801229878176652e-06,
|
|
"loss": 3.3707993030548096,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 1.3348185868873328,
|
|
"grad_norm": 10.576484877317576,
|
|
"learning_rate": 6.797774234799739e-06,
|
|
"loss": 2.0627214908599854,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 1.3354551241247612,
|
|
"grad_norm": 13.257095358800573,
|
|
"learning_rate": 6.79431760492e-06,
|
|
"loss": 2.7560529708862305,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 1.3360916613621896,
|
|
"grad_norm": 14.989501136818301,
|
|
"learning_rate": 6.790859990434217e-06,
|
|
"loss": 1.19850492477417,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 1.336728198599618,
|
|
"grad_norm": 9.018102816625172,
|
|
"learning_rate": 6.787401393239701e-06,
|
|
"loss": 1.2823089361190796,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 1.3373647358370464,
|
|
"grad_norm": 7.995870948103689,
|
|
"learning_rate": 6.783941815234311e-06,
|
|
"loss": 1.873117446899414,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 1.3380012730744748,
|
|
"grad_norm": 7.519059288647954,
|
|
"learning_rate": 6.780481258316438e-06,
|
|
"loss": 1.185487985610962,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 1.3386378103119032,
|
|
"grad_norm": 17.16417414265568,
|
|
"learning_rate": 6.777019724385014e-06,
|
|
"loss": 1.7703580856323242,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 1.3392743475493316,
|
|
"grad_norm": 10.6109152574734,
|
|
"learning_rate": 6.773557215339508e-06,
|
|
"loss": 2.2608628273010254,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 1.33991088478676,
|
|
"grad_norm": 16.146306015694844,
|
|
"learning_rate": 6.770093733079919e-06,
|
|
"loss": 1.4796688556671143,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 1.3405474220241884,
|
|
"grad_norm": 15.417553824160835,
|
|
"learning_rate": 6.766629279506786e-06,
|
|
"loss": 1.3636486530303955,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 1.3411839592616168,
|
|
"grad_norm": 9.80590059967913,
|
|
"learning_rate": 6.763163856521178e-06,
|
|
"loss": 2.326904058456421,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 1.3418204964990452,
|
|
"grad_norm": 11.308190471866444,
|
|
"learning_rate": 6.7596974660246925e-06,
|
|
"loss": 1.6813642978668213,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 1.3424570337364736,
|
|
"grad_norm": 8.214793086651406,
|
|
"learning_rate": 6.7562301099194675e-06,
|
|
"loss": 2.079240083694458,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 1.343093570973902,
|
|
"grad_norm": 7.190351936622931,
|
|
"learning_rate": 6.75276179010816e-06,
|
|
"loss": 1.669417142868042,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 1.3437301082113304,
|
|
"grad_norm": 7.714811460919819,
|
|
"learning_rate": 6.749292508493962e-06,
|
|
"loss": 1.6795430183410645,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 1.3443666454487588,
|
|
"grad_norm": 9.850212126655698,
|
|
"learning_rate": 6.745822266980593e-06,
|
|
"loss": 1.6036384105682373,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 1.3450031826861872,
|
|
"grad_norm": 12.67826523216415,
|
|
"learning_rate": 6.742351067472297e-06,
|
|
"loss": 1.8061586618423462,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 1.3456397199236156,
|
|
"grad_norm": 9.403740555012115,
|
|
"learning_rate": 6.738878911873846e-06,
|
|
"loss": 1.4865294694900513,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 1.346276257161044,
|
|
"grad_norm": 9.326954129628175,
|
|
"learning_rate": 6.735405802090536e-06,
|
|
"loss": 1.6607012748718262,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 1.3469127943984724,
|
|
"grad_norm": 9.102535065466336,
|
|
"learning_rate": 6.731931740028184e-06,
|
|
"loss": 1.8555759191513062,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 1.3475493316359006,
|
|
"grad_norm": 8.085776555039383,
|
|
"learning_rate": 6.728456727593136e-06,
|
|
"loss": 1.4615516662597656,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 1.348185868873329,
|
|
"grad_norm": 8.35552783607606,
|
|
"learning_rate": 6.72498076669225e-06,
|
|
"loss": 1.8943145275115967,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 1.3488224061107574,
|
|
"grad_norm": 11.708244885366367,
|
|
"learning_rate": 6.7215038592329125e-06,
|
|
"loss": 1.4016022682189941,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 1.3494589433481858,
|
|
"grad_norm": 8.840274040091456,
|
|
"learning_rate": 6.718026007123026e-06,
|
|
"loss": 1.2076852321624756,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 1.3500954805856142,
|
|
"grad_norm": 8.43847580334136,
|
|
"learning_rate": 6.714547212271012e-06,
|
|
"loss": 1.924154281616211,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 1.3507320178230426,
|
|
"grad_norm": 18.779962057283058,
|
|
"learning_rate": 6.711067476585811e-06,
|
|
"loss": 1.948960304260254,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 1.351368555060471,
|
|
"grad_norm": 12.094242164729383,
|
|
"learning_rate": 6.707586801976873e-06,
|
|
"loss": 2.12758469581604,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 1.3520050922978994,
|
|
"grad_norm": 10.368550448608548,
|
|
"learning_rate": 6.7041051903541744e-06,
|
|
"loss": 1.8979034423828125,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 1.3526416295353278,
|
|
"grad_norm": 13.188567587338362,
|
|
"learning_rate": 6.700622643628196e-06,
|
|
"loss": 2.141956329345703,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 1.3532781667727563,
|
|
"grad_norm": 10.984995851433844,
|
|
"learning_rate": 6.697139163709936e-06,
|
|
"loss": 1.3657575845718384,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 1.3539147040101847,
|
|
"grad_norm": 11.289468170136036,
|
|
"learning_rate": 6.693654752510905e-06,
|
|
"loss": 1.7047542333602905,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 1.354551241247613,
|
|
"grad_norm": 12.636042984409194,
|
|
"learning_rate": 6.690169411943124e-06,
|
|
"loss": 1.6948938369750977,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 1.3551877784850412,
|
|
"grad_norm": 15.027155188825471,
|
|
"learning_rate": 6.686683143919119e-06,
|
|
"loss": 1.554497480392456,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 1.3558243157224696,
|
|
"grad_norm": 17.482166423106033,
|
|
"learning_rate": 6.683195950351937e-06,
|
|
"loss": 1.5829187631607056,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 1.356460852959898,
|
|
"grad_norm": 8.993795361126464,
|
|
"learning_rate": 6.679707833155119e-06,
|
|
"loss": 1.196561574935913,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 1.3570973901973264,
|
|
"grad_norm": 10.224631073276461,
|
|
"learning_rate": 6.676218794242724e-06,
|
|
"loss": 1.5029449462890625,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 1.3577339274347549,
|
|
"grad_norm": 13.622151145333707,
|
|
"learning_rate": 6.6727288355293085e-06,
|
|
"loss": 1.1759300231933594,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 1.3583704646721833,
|
|
"grad_norm": 13.787038924100134,
|
|
"learning_rate": 6.669237958929939e-06,
|
|
"loss": 1.5236375331878662,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 1.3590070019096117,
|
|
"grad_norm": 9.50872271068457,
|
|
"learning_rate": 6.665746166360185e-06,
|
|
"loss": 1.835353136062622,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 1.35964353914704,
|
|
"grad_norm": 8.86326023305628,
|
|
"learning_rate": 6.662253459736114e-06,
|
|
"loss": 1.880853533744812,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 1.3602800763844685,
|
|
"grad_norm": 12.350598605749058,
|
|
"learning_rate": 6.658759840974303e-06,
|
|
"loss": 2.347693681716919,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 1.3609166136218969,
|
|
"grad_norm": 10.603024134542418,
|
|
"learning_rate": 6.655265311991822e-06,
|
|
"loss": 1.2131357192993164,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 1.3615531508593253,
|
|
"grad_norm": 17.34616083937125,
|
|
"learning_rate": 6.651769874706245e-06,
|
|
"loss": 1.8799031972885132,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 1.3621896880967537,
|
|
"grad_norm": 12.15157410218963,
|
|
"learning_rate": 6.6482735310356425e-06,
|
|
"loss": 1.9390839338302612,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 1.362826225334182,
|
|
"grad_norm": 20.42026972304762,
|
|
"learning_rate": 6.644776282898584e-06,
|
|
"loss": 1.3343855142593384,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 1.3634627625716105,
|
|
"grad_norm": 13.438106280833992,
|
|
"learning_rate": 6.641278132214133e-06,
|
|
"loss": 1.5200254917144775,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 1.3640992998090389,
|
|
"grad_norm": 6.413930253727851,
|
|
"learning_rate": 6.637779080901851e-06,
|
|
"loss": 1.2077040672302246,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 1.3647358370464673,
|
|
"grad_norm": 9.180870332071997,
|
|
"learning_rate": 6.63427913088179e-06,
|
|
"loss": 2.02775239944458,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 1.3653723742838957,
|
|
"grad_norm": 11.304053679491917,
|
|
"learning_rate": 6.6307782840745e-06,
|
|
"loss": 1.7007734775543213,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 1.366008911521324,
|
|
"grad_norm": 8.518756502344205,
|
|
"learning_rate": 6.627276542401017e-06,
|
|
"loss": 1.2333950996398926,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 1.3666454487587525,
|
|
"grad_norm": 12.418587258954801,
|
|
"learning_rate": 6.623773907782876e-06,
|
|
"loss": 1.3447022438049316,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 1.367281985996181,
|
|
"grad_norm": 9.194698166497082,
|
|
"learning_rate": 6.6202703821420935e-06,
|
|
"loss": 1.5185067653656006,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 1.3679185232336093,
|
|
"grad_norm": 8.879987789360879,
|
|
"learning_rate": 6.61676596740118e-06,
|
|
"loss": 1.0394394397735596,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 1.3685550604710375,
|
|
"grad_norm": 7.841250549021613,
|
|
"learning_rate": 6.613260665483135e-06,
|
|
"loss": 1.3038004636764526,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 1.3691915977084659,
|
|
"grad_norm": 17.55731240460262,
|
|
"learning_rate": 6.6097544783114405e-06,
|
|
"loss": 1.7838655710220337,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 1.3698281349458943,
|
|
"grad_norm": 8.228606331506027,
|
|
"learning_rate": 6.606247407810067e-06,
|
|
"loss": 1.8675861358642578,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 1.3704646721833227,
|
|
"grad_norm": 7.467059484823506,
|
|
"learning_rate": 6.60273945590347e-06,
|
|
"loss": 1.3836982250213623,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 1.371101209420751,
|
|
"grad_norm": 12.485888251305484,
|
|
"learning_rate": 6.599230624516585e-06,
|
|
"loss": 1.5675023794174194,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 1.3717377466581795,
|
|
"grad_norm": 15.567851452398823,
|
|
"learning_rate": 6.5957209155748355e-06,
|
|
"loss": 1.7862575054168701,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 1.372374283895608,
|
|
"grad_norm": 13.981443969638242,
|
|
"learning_rate": 6.592210331004123e-06,
|
|
"loss": 1.1278631687164307,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 1.3730108211330363,
|
|
"grad_norm": 13.15212372653646,
|
|
"learning_rate": 6.588698872730831e-06,
|
|
"loss": 1.6467361450195312,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 1.3736473583704647,
|
|
"grad_norm": 15.616145367996944,
|
|
"learning_rate": 6.5851865426818215e-06,
|
|
"loss": 1.6315853595733643,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 1.374283895607893,
|
|
"grad_norm": 12.071659180855534,
|
|
"learning_rate": 6.581673342784435e-06,
|
|
"loss": 1.690008282661438,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 1.3749204328453215,
|
|
"grad_norm": 10.211327025135933,
|
|
"learning_rate": 6.578159274966489e-06,
|
|
"loss": 0.9187987446784973,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 1.37555697008275,
|
|
"grad_norm": 13.438711563797808,
|
|
"learning_rate": 6.57464434115628e-06,
|
|
"loss": 1.6902811527252197,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 1.3761935073201783,
|
|
"grad_norm": 12.453079163260508,
|
|
"learning_rate": 6.571128543282576e-06,
|
|
"loss": 1.4750566482543945,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 1.3768300445576065,
|
|
"grad_norm": 17.959531061122476,
|
|
"learning_rate": 6.5676118832746195e-06,
|
|
"loss": 1.9648330211639404,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 1.377466581795035,
|
|
"grad_norm": 12.300719608998236,
|
|
"learning_rate": 6.564094363062128e-06,
|
|
"loss": 1.2455105781555176,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 1.3781031190324633,
|
|
"grad_norm": 11.219402471677457,
|
|
"learning_rate": 6.5605759845752925e-06,
|
|
"loss": 1.2983028888702393,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 1.3787396562698917,
|
|
"grad_norm": 14.328811997520638,
|
|
"learning_rate": 6.55705674974477e-06,
|
|
"loss": 1.9251346588134766,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 1.37937619350732,
|
|
"grad_norm": 14.64826858357797,
|
|
"learning_rate": 6.553536660501691e-06,
|
|
"loss": 1.6900074481964111,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 1.3800127307447485,
|
|
"grad_norm": 8.528301843090656,
|
|
"learning_rate": 6.5500157187776555e-06,
|
|
"loss": 2.1877009868621826,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 1.380649267982177,
|
|
"grad_norm": 7.8651908301529225,
|
|
"learning_rate": 6.546493926504727e-06,
|
|
"loss": 1.7229034900665283,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 1.3812858052196053,
|
|
"grad_norm": 15.94456459374675,
|
|
"learning_rate": 6.54297128561544e-06,
|
|
"loss": 1.7511520385742188,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 1.3819223424570337,
|
|
"grad_norm": 14.878961962405457,
|
|
"learning_rate": 6.539447798042794e-06,
|
|
"loss": 2.0114879608154297,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 1.3825588796944621,
|
|
"grad_norm": 10.800956160351202,
|
|
"learning_rate": 6.535923465720249e-06,
|
|
"loss": 1.239030361175537,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 1.3831954169318905,
|
|
"grad_norm": 10.261961533103598,
|
|
"learning_rate": 6.532398290581736e-06,
|
|
"loss": 1.4675251245498657,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 1.383831954169319,
|
|
"grad_norm": 9.636887736053735,
|
|
"learning_rate": 6.528872274561641e-06,
|
|
"loss": 1.2436105012893677,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 1.3844684914067473,
|
|
"grad_norm": 11.54901510869167,
|
|
"learning_rate": 6.525345419594818e-06,
|
|
"loss": 1.1725226640701294,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 1.3851050286441757,
|
|
"grad_norm": 10.3832145763699,
|
|
"learning_rate": 6.521817727616574e-06,
|
|
"loss": 1.9142147302627563,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 1.3857415658816041,
|
|
"grad_norm": 10.929996772636745,
|
|
"learning_rate": 6.518289200562682e-06,
|
|
"loss": 1.3388845920562744,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 1.3863781031190325,
|
|
"grad_norm": 12.734890563619743,
|
|
"learning_rate": 6.51475984036937e-06,
|
|
"loss": 2.039224624633789,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 1.387014640356461,
|
|
"grad_norm": 14.31460829114896,
|
|
"learning_rate": 6.511229648973323e-06,
|
|
"loss": 1.4467695951461792,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 1.3876511775938893,
|
|
"grad_norm": 10.081285391928505,
|
|
"learning_rate": 6.507698628311686e-06,
|
|
"loss": 1.5399799346923828,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 1.3882877148313177,
|
|
"grad_norm": 18.571662120205282,
|
|
"learning_rate": 6.5041667803220535e-06,
|
|
"loss": 1.2376508712768555,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 1.3889242520687461,
|
|
"grad_norm": 14.5132149255744,
|
|
"learning_rate": 6.500634106942474e-06,
|
|
"loss": 1.773813247680664,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 1.3895607893061745,
|
|
"grad_norm": 8.728097996032005,
|
|
"learning_rate": 6.497100610111456e-06,
|
|
"loss": 1.359797477722168,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 1.3901973265436027,
|
|
"grad_norm": 14.700494437182511,
|
|
"learning_rate": 6.4935662917679546e-06,
|
|
"loss": 0.739691436290741,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 1.3908338637810311,
|
|
"grad_norm": 16.262194491870044,
|
|
"learning_rate": 6.490031153851373e-06,
|
|
"loss": 1.7340049743652344,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 1.3914704010184595,
|
|
"grad_norm": 10.227465908930895,
|
|
"learning_rate": 6.48649519830157e-06,
|
|
"loss": 1.7808657884597778,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 1.392106938255888,
|
|
"grad_norm": 14.236925681514215,
|
|
"learning_rate": 6.482958427058847e-06,
|
|
"loss": 1.6473512649536133,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 1.3927434754933163,
|
|
"grad_norm": 18.488163841989763,
|
|
"learning_rate": 6.479420842063963e-06,
|
|
"loss": 1.766308307647705,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 1.3933800127307447,
|
|
"grad_norm": 7.312654768912787,
|
|
"learning_rate": 6.47588244525811e-06,
|
|
"loss": 1.4837501049041748,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 1.3940165499681731,
|
|
"grad_norm": 11.066710488368418,
|
|
"learning_rate": 6.4723432385829384e-06,
|
|
"loss": 2.069955825805664,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 1.3946530872056015,
|
|
"grad_norm": 12.34520529771813,
|
|
"learning_rate": 6.468803223980534e-06,
|
|
"loss": 1.2535465955734253,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 1.39528962444303,
|
|
"grad_norm": 12.652184847236011,
|
|
"learning_rate": 6.465262403393429e-06,
|
|
"loss": 1.2388365268707275,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 1.3959261616804584,
|
|
"grad_norm": 10.567766751278663,
|
|
"learning_rate": 6.461720778764597e-06,
|
|
"loss": 1.4995468854904175,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 1.3965626989178868,
|
|
"grad_norm": 7.834618981007849,
|
|
"learning_rate": 6.458178352037459e-06,
|
|
"loss": 1.8272128105163574,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 1.3971992361553152,
|
|
"grad_norm": 12.325471764275232,
|
|
"learning_rate": 6.454635125155863e-06,
|
|
"loss": 1.2270289659500122,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 1.3978357733927433,
|
|
"grad_norm": 14.79567697031164,
|
|
"learning_rate": 6.45109110006411e-06,
|
|
"loss": 1.8408695459365845,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 1.3984723106301717,
|
|
"grad_norm": 7.701637127659964,
|
|
"learning_rate": 6.447546278706929e-06,
|
|
"loss": 1.272929072380066,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 1.3991088478676001,
|
|
"grad_norm": 7.0890368356852855,
|
|
"learning_rate": 6.444000663029494e-06,
|
|
"loss": 1.5190505981445312,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 1.3997453851050286,
|
|
"grad_norm": 23.48191736552914,
|
|
"learning_rate": 6.440454254977407e-06,
|
|
"loss": 1.439098834991455,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 1.400381922342457,
|
|
"grad_norm": 9.401723931268236,
|
|
"learning_rate": 6.43690705649671e-06,
|
|
"loss": 1.9440866708755493,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 1.4010184595798854,
|
|
"grad_norm": 14.025034385209763,
|
|
"learning_rate": 6.433359069533878e-06,
|
|
"loss": 2.302647352218628,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 1.4016549968173138,
|
|
"grad_norm": 14.723959105524486,
|
|
"learning_rate": 6.429810296035817e-06,
|
|
"loss": 1.3513097763061523,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 1.4022915340547422,
|
|
"grad_norm": 14.133590601314184,
|
|
"learning_rate": 6.426260737949866e-06,
|
|
"loss": 1.1011390686035156,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 1.4029280712921706,
|
|
"grad_norm": 17.586480443216193,
|
|
"learning_rate": 6.422710397223794e-06,
|
|
"loss": 1.9014208316802979,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 1.403564608529599,
|
|
"grad_norm": 12.757630956889631,
|
|
"learning_rate": 6.419159275805799e-06,
|
|
"loss": 2.630929708480835,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 1.4042011457670274,
|
|
"grad_norm": 13.732215912250094,
|
|
"learning_rate": 6.4156073756445084e-06,
|
|
"loss": 1.3734755516052246,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 1.4048376830044558,
|
|
"grad_norm": 32.171196888479926,
|
|
"learning_rate": 6.4120546986889755e-06,
|
|
"loss": 2.9236714839935303,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 1.4054742202418842,
|
|
"grad_norm": 14.712605726738637,
|
|
"learning_rate": 6.408501246888682e-06,
|
|
"loss": 1.4317361116409302,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 1.4061107574793126,
|
|
"grad_norm": 13.863448900221524,
|
|
"learning_rate": 6.404947022193535e-06,
|
|
"loss": 2.090886354446411,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 1.406747294716741,
|
|
"grad_norm": 7.631648416620053,
|
|
"learning_rate": 6.4013920265538595e-06,
|
|
"loss": 1.593640923500061,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 1.4073838319541694,
|
|
"grad_norm": 7.401858916293992,
|
|
"learning_rate": 6.397836261920415e-06,
|
|
"loss": 1.1235381364822388,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 1.4080203691915978,
|
|
"grad_norm": 9.132508483179071,
|
|
"learning_rate": 6.3942797302443706e-06,
|
|
"loss": 1.4245104789733887,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 1.4086569064290262,
|
|
"grad_norm": 10.062049180418628,
|
|
"learning_rate": 6.390722433477325e-06,
|
|
"loss": 1.3633389472961426,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 1.4092934436664546,
|
|
"grad_norm": 11.052650137533167,
|
|
"learning_rate": 6.387164373571293e-06,
|
|
"loss": 1.1453993320465088,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 1.409929980903883,
|
|
"grad_norm": 8.670156035931027,
|
|
"learning_rate": 6.383605552478709e-06,
|
|
"loss": 1.2008607387542725,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 1.4105665181413114,
|
|
"grad_norm": 10.963651499575823,
|
|
"learning_rate": 6.3800459721524255e-06,
|
|
"loss": 1.474987506866455,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 1.4112030553787396,
|
|
"grad_norm": 16.231420560110255,
|
|
"learning_rate": 6.37648563454571e-06,
|
|
"loss": 2.3234455585479736,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 1.411839592616168,
|
|
"grad_norm": 13.941402140619234,
|
|
"learning_rate": 6.372924541612248e-06,
|
|
"loss": 1.3550442457199097,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 1.4124761298535964,
|
|
"grad_norm": 7.991598436727154,
|
|
"learning_rate": 6.369362695306138e-06,
|
|
"loss": 1.4468458890914917,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 1.4131126670910248,
|
|
"grad_norm": 8.336015467632643,
|
|
"learning_rate": 6.36580009758189e-06,
|
|
"loss": 1.655072808265686,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 1.4137492043284532,
|
|
"grad_norm": 16.633901474263247,
|
|
"learning_rate": 6.362236750394431e-06,
|
|
"loss": 1.7641286849975586,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 1.4143857415658816,
|
|
"grad_norm": 19.725347224513143,
|
|
"learning_rate": 6.3586726556990955e-06,
|
|
"loss": 2.0049242973327637,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 1.41502227880331,
|
|
"grad_norm": 10.7435012585398,
|
|
"learning_rate": 6.355107815451629e-06,
|
|
"loss": 1.0433449745178223,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 1.4156588160407384,
|
|
"grad_norm": 19.339243493861588,
|
|
"learning_rate": 6.351542231608188e-06,
|
|
"loss": 1.3136436939239502,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 1.4162953532781668,
|
|
"grad_norm": 10.497933067797925,
|
|
"learning_rate": 6.3479759061253334e-06,
|
|
"loss": 0.8929241895675659,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 1.4169318905155952,
|
|
"grad_norm": 9.559182782439565,
|
|
"learning_rate": 6.344408840960037e-06,
|
|
"loss": 1.1983423233032227,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 1.4175684277530236,
|
|
"grad_norm": 9.529239589057827,
|
|
"learning_rate": 6.340841038069673e-06,
|
|
"loss": 1.0780221223831177,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 1.418204964990452,
|
|
"grad_norm": 9.608827881385222,
|
|
"learning_rate": 6.337272499412023e-06,
|
|
"loss": 1.1054682731628418,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 1.4188415022278804,
|
|
"grad_norm": 15.719206287796904,
|
|
"learning_rate": 6.3337032269452715e-06,
|
|
"loss": 2.25411319732666,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 1.4194780394653086,
|
|
"grad_norm": 17.794842478363268,
|
|
"learning_rate": 6.330133222628004e-06,
|
|
"loss": 1.542565941810608,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 1.420114576702737,
|
|
"grad_norm": 9.751934079958934,
|
|
"learning_rate": 6.326562488419213e-06,
|
|
"loss": 1.5496629476547241,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 1.4207511139401654,
|
|
"grad_norm": 22.19222644496835,
|
|
"learning_rate": 6.322991026278285e-06,
|
|
"loss": 1.441185474395752,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 1.4213876511775938,
|
|
"grad_norm": 10.510101925919152,
|
|
"learning_rate": 6.319418838165005e-06,
|
|
"loss": 1.432142734527588,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 1.4220241884150222,
|
|
"grad_norm": 13.36563236105199,
|
|
"learning_rate": 6.315845926039568e-06,
|
|
"loss": 1.7360641956329346,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 1.4226607256524506,
|
|
"grad_norm": 15.16706158597897,
|
|
"learning_rate": 6.312272291862553e-06,
|
|
"loss": 1.6843822002410889,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 1.423297262889879,
|
|
"grad_norm": 10.978306537880643,
|
|
"learning_rate": 6.308697937594942e-06,
|
|
"loss": 1.5882022380828857,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 1.4239338001273074,
|
|
"grad_norm": 11.26713450947295,
|
|
"learning_rate": 6.30512286519811e-06,
|
|
"loss": 1.4534211158752441,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 1.4245703373647358,
|
|
"grad_norm": 9.62433982888293,
|
|
"learning_rate": 6.301547076633825e-06,
|
|
"loss": 1.552571177482605,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 1.4252068746021642,
|
|
"grad_norm": 23.46147658043418,
|
|
"learning_rate": 6.2979705738642535e-06,
|
|
"loss": 2.4739670753479004,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 1.4258434118395926,
|
|
"grad_norm": 10.946123772411452,
|
|
"learning_rate": 6.294393358851947e-06,
|
|
"loss": 1.0432250499725342,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 1.426479949077021,
|
|
"grad_norm": 15.258792066809578,
|
|
"learning_rate": 6.290815433559853e-06,
|
|
"loss": 1.9332001209259033,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 1.4271164863144494,
|
|
"grad_norm": 14.441468370947723,
|
|
"learning_rate": 6.287236799951306e-06,
|
|
"loss": 2.2849225997924805,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 1.4277530235518778,
|
|
"grad_norm": 8.478215874496488,
|
|
"learning_rate": 6.283657459990028e-06,
|
|
"loss": 2.5756845474243164,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 1.4283895607893062,
|
|
"grad_norm": 10.33023886552243,
|
|
"learning_rate": 6.2800774156401334e-06,
|
|
"loss": 1.2622883319854736,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 1.4290260980267346,
|
|
"grad_norm": 10.23879794338887,
|
|
"learning_rate": 6.276496668866118e-06,
|
|
"loss": 1.4622156620025635,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 1.429662635264163,
|
|
"grad_norm": 11.428320725826136,
|
|
"learning_rate": 6.272915221632867e-06,
|
|
"loss": 2.4076485633850098,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 1.4302991725015914,
|
|
"grad_norm": 9.18367055531244,
|
|
"learning_rate": 6.269333075905647e-06,
|
|
"loss": 2.7986347675323486,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 1.4309357097390198,
|
|
"grad_norm": 10.718811501953986,
|
|
"learning_rate": 6.265750233650108e-06,
|
|
"loss": 1.0415111780166626,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 1.4315722469764482,
|
|
"grad_norm": 13.89304174407212,
|
|
"learning_rate": 6.262166696832288e-06,
|
|
"loss": 1.673966407775879,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 1.4322087842138764,
|
|
"grad_norm": 13.614234895048883,
|
|
"learning_rate": 6.258582467418596e-06,
|
|
"loss": 1.7757503986358643,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 1.4328453214513048,
|
|
"grad_norm": 8.229189188841083,
|
|
"learning_rate": 6.254997547375828e-06,
|
|
"loss": 1.5330592393875122,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 1.4334818586887332,
|
|
"grad_norm": 13.279023169672653,
|
|
"learning_rate": 6.251411938671159e-06,
|
|
"loss": 1.8557310104370117,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 1.4341183959261616,
|
|
"grad_norm": 12.420664420892885,
|
|
"learning_rate": 6.2478256432721395e-06,
|
|
"loss": 1.8852431774139404,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 1.43475493316359,
|
|
"grad_norm": 17.22679859354758,
|
|
"learning_rate": 6.244238663146698e-06,
|
|
"loss": 1.0919334888458252,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 1.4353914704010184,
|
|
"grad_norm": 11.259133977733073,
|
|
"learning_rate": 6.240651000263136e-06,
|
|
"loss": 1.5498840808868408,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 1.4360280076384468,
|
|
"grad_norm": 18.24539838096365,
|
|
"learning_rate": 6.237062656590135e-06,
|
|
"loss": 1.4166648387908936,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 1.4366645448758752,
|
|
"grad_norm": 8.713505921407725,
|
|
"learning_rate": 6.233473634096746e-06,
|
|
"loss": 1.3557902574539185,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 1.4373010821133037,
|
|
"grad_norm": 8.716074123775524,
|
|
"learning_rate": 6.229883934752393e-06,
|
|
"loss": 1.2707334756851196,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 1.437937619350732,
|
|
"grad_norm": 14.63987692349558,
|
|
"learning_rate": 6.226293560526875e-06,
|
|
"loss": 1.2643358707427979,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 1.4385741565881605,
|
|
"grad_norm": 10.401459052982707,
|
|
"learning_rate": 6.222702513390354e-06,
|
|
"loss": 2.7216434478759766,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 1.4392106938255889,
|
|
"grad_norm": 8.333916830423911,
|
|
"learning_rate": 6.219110795313368e-06,
|
|
"loss": 1.3003531694412231,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 1.4398472310630173,
|
|
"grad_norm": 11.773605144365478,
|
|
"learning_rate": 6.21551840826682e-06,
|
|
"loss": 1.5093822479248047,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 1.4404837683004454,
|
|
"grad_norm": 17.371610763815383,
|
|
"learning_rate": 6.211925354221981e-06,
|
|
"loss": 1.3118256330490112,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 1.4411203055378738,
|
|
"grad_norm": 8.882387238630173,
|
|
"learning_rate": 6.208331635150491e-06,
|
|
"loss": 1.1394437551498413,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 1.4417568427753022,
|
|
"grad_norm": 8.044599760767078,
|
|
"learning_rate": 6.204737253024347e-06,
|
|
"loss": 1.6412131786346436,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 1.4423933800127307,
|
|
"grad_norm": 10.922916029421351,
|
|
"learning_rate": 6.201142209815915e-06,
|
|
"loss": 0.9688152074813843,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 1.443029917250159,
|
|
"grad_norm": 12.828609189083918,
|
|
"learning_rate": 6.197546507497928e-06,
|
|
"loss": 1.4837284088134766,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 1.4436664544875875,
|
|
"grad_norm": 11.860294561287034,
|
|
"learning_rate": 6.193950148043473e-06,
|
|
"loss": 1.5708303451538086,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 1.4443029917250159,
|
|
"grad_norm": 7.952589988233202,
|
|
"learning_rate": 6.1903531334259985e-06,
|
|
"loss": 1.4329659938812256,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 1.4449395289624443,
|
|
"grad_norm": 10.539927680438874,
|
|
"learning_rate": 6.186755465619319e-06,
|
|
"loss": 2.3485629558563232,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 1.4455760661998727,
|
|
"grad_norm": 9.23104905413811,
|
|
"learning_rate": 6.183157146597599e-06,
|
|
"loss": 1.5098153352737427,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 1.446212603437301,
|
|
"grad_norm": 15.346346501941884,
|
|
"learning_rate": 6.179558178335367e-06,
|
|
"loss": 1.768712043762207,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 1.4468491406747295,
|
|
"grad_norm": 12.7331926715348,
|
|
"learning_rate": 6.1759585628075045e-06,
|
|
"loss": 2.09372615814209,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 1.4474856779121579,
|
|
"grad_norm": 10.14585137644003,
|
|
"learning_rate": 6.1723583019892504e-06,
|
|
"loss": 1.878859043121338,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 1.4481222151495863,
|
|
"grad_norm": 6.674020724819407,
|
|
"learning_rate": 6.168757397856194e-06,
|
|
"loss": 1.0077816247940063,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 1.4487587523870147,
|
|
"grad_norm": 8.66182461368363,
|
|
"learning_rate": 6.1651558523842804e-06,
|
|
"loss": 1.1360124349594116,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 1.449395289624443,
|
|
"grad_norm": 10.22036379463639,
|
|
"learning_rate": 6.161553667549807e-06,
|
|
"loss": 1.7001159191131592,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 1.4500318268618715,
|
|
"grad_norm": 11.501379211176305,
|
|
"learning_rate": 6.157950845329419e-06,
|
|
"loss": 1.5191199779510498,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 1.4506683640993,
|
|
"grad_norm": 13.489428655557099,
|
|
"learning_rate": 6.154347387700115e-06,
|
|
"loss": 0.9401975870132446,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 1.4513049013367283,
|
|
"grad_norm": 9.189412362825243,
|
|
"learning_rate": 6.150743296639241e-06,
|
|
"loss": 1.4657886028289795,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 1.4519414385741567,
|
|
"grad_norm": 7.063744317605319,
|
|
"learning_rate": 6.14713857412449e-06,
|
|
"loss": 1.6262617111206055,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 1.452577975811585,
|
|
"grad_norm": 7.319488479677199,
|
|
"learning_rate": 6.143533222133901e-06,
|
|
"loss": 1.9885435104370117,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 1.4532145130490135,
|
|
"grad_norm": 9.816619736767258,
|
|
"learning_rate": 6.139927242645859e-06,
|
|
"loss": 2.211883306503296,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 1.4538510502864417,
|
|
"grad_norm": 19.194333040452655,
|
|
"learning_rate": 6.136320637639094e-06,
|
|
"loss": 1.593003749847412,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 1.45448758752387,
|
|
"grad_norm": 9.861021206968063,
|
|
"learning_rate": 6.13271340909268e-06,
|
|
"loss": 1.4990943670272827,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 1.4551241247612985,
|
|
"grad_norm": 8.89575659198883,
|
|
"learning_rate": 6.12910555898603e-06,
|
|
"loss": 1.908895492553711,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 1.455760661998727,
|
|
"grad_norm": 11.68517641170503,
|
|
"learning_rate": 6.125497089298902e-06,
|
|
"loss": 1.4656141996383667,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 1.4563971992361553,
|
|
"grad_norm": 12.049955844153972,
|
|
"learning_rate": 6.121888002011389e-06,
|
|
"loss": 1.4878228902816772,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 1.4570337364735837,
|
|
"grad_norm": 12.72492012451628,
|
|
"learning_rate": 6.118278299103929e-06,
|
|
"loss": 1.8361848592758179,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 1.457670273711012,
|
|
"grad_norm": 9.191650634484828,
|
|
"learning_rate": 6.1146679825572945e-06,
|
|
"loss": 2.2196669578552246,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 1.4583068109484405,
|
|
"grad_norm": 7.759297972058429,
|
|
"learning_rate": 6.11105705435259e-06,
|
|
"loss": 1.926821231842041,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 1.458943348185869,
|
|
"grad_norm": 19.403444235316353,
|
|
"learning_rate": 6.107445516471268e-06,
|
|
"loss": 0.9025589227676392,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 1.4595798854232973,
|
|
"grad_norm": 6.93287755275663,
|
|
"learning_rate": 6.1038333708951025e-06,
|
|
"loss": 0.9668861627578735,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 1.4602164226607257,
|
|
"grad_norm": 14.934226720346048,
|
|
"learning_rate": 6.100220619606209e-06,
|
|
"loss": 1.6219764947891235,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 1.4608529598981541,
|
|
"grad_norm": 10.80098156770589,
|
|
"learning_rate": 6.096607264587032e-06,
|
|
"loss": 1.6168935298919678,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 1.4614894971355823,
|
|
"grad_norm": 11.977524581829597,
|
|
"learning_rate": 6.092993307820349e-06,
|
|
"loss": 1.3764303922653198,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 1.4621260343730107,
|
|
"grad_norm": 7.609612791648267,
|
|
"learning_rate": 6.089378751289268e-06,
|
|
"loss": 1.2406864166259766,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 1.462762571610439,
|
|
"grad_norm": 14.744503007862395,
|
|
"learning_rate": 6.085763596977222e-06,
|
|
"loss": 1.6459439992904663,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 1.4633991088478675,
|
|
"grad_norm": 11.516187438025026,
|
|
"learning_rate": 6.0821478468679766e-06,
|
|
"loss": 1.4948744773864746,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 1.464035646085296,
|
|
"grad_norm": 15.331463785959949,
|
|
"learning_rate": 6.078531502945624e-06,
|
|
"loss": 2.1930792331695557,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 1.4646721833227243,
|
|
"grad_norm": 9.429387511806071,
|
|
"learning_rate": 6.07491456719458e-06,
|
|
"loss": 1.778498888015747,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 1.4653087205601527,
|
|
"grad_norm": 16.032039111111153,
|
|
"learning_rate": 6.071297041599585e-06,
|
|
"loss": 2.0730414390563965,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 1.4659452577975811,
|
|
"grad_norm": 11.614462376707863,
|
|
"learning_rate": 6.067678928145707e-06,
|
|
"loss": 1.0565383434295654,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 1.4665817950350095,
|
|
"grad_norm": 14.730772994555107,
|
|
"learning_rate": 6.0640602288183315e-06,
|
|
"loss": 1.4123332500457764,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 1.467218332272438,
|
|
"grad_norm": 8.805644728306925,
|
|
"learning_rate": 6.06044094560317e-06,
|
|
"loss": 1.7742290496826172,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 1.4678548695098663,
|
|
"grad_norm": 10.025269858987773,
|
|
"learning_rate": 6.056821080486248e-06,
|
|
"loss": 1.5650653839111328,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 1.4684914067472947,
|
|
"grad_norm": 12.177960282142802,
|
|
"learning_rate": 6.0532006354539205e-06,
|
|
"loss": 1.2841260433197021,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 1.4691279439847231,
|
|
"grad_norm": 11.225977777200889,
|
|
"learning_rate": 6.049579612492851e-06,
|
|
"loss": 1.70684015750885,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 1.4697644812221515,
|
|
"grad_norm": 15.679589152504976,
|
|
"learning_rate": 6.045958013590024e-06,
|
|
"loss": 1.214942455291748,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 1.47040101845958,
|
|
"grad_norm": 11.248471711515633,
|
|
"learning_rate": 6.042335840732741e-06,
|
|
"loss": 1.8510022163391113,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 1.4710375556970083,
|
|
"grad_norm": 8.008516568916232,
|
|
"learning_rate": 6.038713095908617e-06,
|
|
"loss": 1.433444857597351,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 1.4716740929344367,
|
|
"grad_norm": 13.613960224083982,
|
|
"learning_rate": 6.035089781105581e-06,
|
|
"loss": 1.693634271621704,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 1.4723106301718651,
|
|
"grad_norm": 7.621590152687834,
|
|
"learning_rate": 6.031465898311877e-06,
|
|
"loss": 1.5318776369094849,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 1.4729471674092935,
|
|
"grad_norm": 18.121718920303067,
|
|
"learning_rate": 6.027841449516054e-06,
|
|
"loss": 1.6473984718322754,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 1.473583704646722,
|
|
"grad_norm": 8.14648709448568,
|
|
"learning_rate": 6.024216436706983e-06,
|
|
"loss": 1.8800264596939087,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 1.4742202418841504,
|
|
"grad_norm": 8.403442395625294,
|
|
"learning_rate": 6.020590861873832e-06,
|
|
"loss": 1.84152352809906,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 1.4748567791215785,
|
|
"grad_norm": 28.308542384872574,
|
|
"learning_rate": 6.016964727006084e-06,
|
|
"loss": 2.369072437286377,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 1.475493316359007,
|
|
"grad_norm": 10.266489194171298,
|
|
"learning_rate": 6.013338034093532e-06,
|
|
"loss": 1.7621761560440063,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 1.4761298535964353,
|
|
"grad_norm": 10.115881623135751,
|
|
"learning_rate": 6.009710785126267e-06,
|
|
"loss": 1.779378056526184,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 1.4767663908338637,
|
|
"grad_norm": 17.80621757863576,
|
|
"learning_rate": 6.006082982094695e-06,
|
|
"loss": 1.8648977279663086,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 1.4774029280712921,
|
|
"grad_norm": 14.094940079453211,
|
|
"learning_rate": 6.002454626989519e-06,
|
|
"loss": 1.298775315284729,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 1.4780394653087205,
|
|
"grad_norm": 9.400726504781899,
|
|
"learning_rate": 5.9988257218017435e-06,
|
|
"loss": 1.314895749092102,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 1.478676002546149,
|
|
"grad_norm": 9.726742378726325,
|
|
"learning_rate": 5.995196268522681e-06,
|
|
"loss": 1.7099711894989014,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 1.4793125397835774,
|
|
"grad_norm": 8.707329797886894,
|
|
"learning_rate": 5.991566269143941e-06,
|
|
"loss": 1.7361701726913452,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 1.4799490770210058,
|
|
"grad_norm": 12.476393055946213,
|
|
"learning_rate": 5.987935725657436e-06,
|
|
"loss": 0.9174911975860596,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 1.4805856142584342,
|
|
"grad_norm": 9.336089625750024,
|
|
"learning_rate": 5.984304640055373e-06,
|
|
"loss": 1.816239833831787,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 1.4812221514958626,
|
|
"grad_norm": 18.8249747839833,
|
|
"learning_rate": 5.980673014330256e-06,
|
|
"loss": 1.1981911659240723,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 1.481858688733291,
|
|
"grad_norm": 9.651018850571326,
|
|
"learning_rate": 5.977040850474891e-06,
|
|
"loss": 2.1803438663482666,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 1.4824952259707194,
|
|
"grad_norm": 6.872288607868434,
|
|
"learning_rate": 5.973408150482374e-06,
|
|
"loss": 1.6503804922103882,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 1.4831317632081475,
|
|
"grad_norm": 14.17087632352517,
|
|
"learning_rate": 5.969774916346097e-06,
|
|
"loss": 1.5935099124908447,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 1.483768300445576,
|
|
"grad_norm": 7.923951253168774,
|
|
"learning_rate": 5.966141150059745e-06,
|
|
"loss": 1.5434465408325195,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 1.4844048376830044,
|
|
"grad_norm": 10.317158939383916,
|
|
"learning_rate": 5.962506853617297e-06,
|
|
"loss": 1.6459558010101318,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 1.4850413749204328,
|
|
"grad_norm": 39.307323069970685,
|
|
"learning_rate": 5.958872029013019e-06,
|
|
"loss": 1.6799983978271484,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 1.4856779121578612,
|
|
"grad_norm": 16.928986573700584,
|
|
"learning_rate": 5.955236678241469e-06,
|
|
"loss": 1.854414939880371,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 1.4863144493952896,
|
|
"grad_norm": 12.311553995465173,
|
|
"learning_rate": 5.9516008032974944e-06,
|
|
"loss": 1.1528398990631104,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 1.486950986632718,
|
|
"grad_norm": 16.079098630901317,
|
|
"learning_rate": 5.94796440617623e-06,
|
|
"loss": 1.7140576839447021,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 1.4875875238701464,
|
|
"grad_norm": 12.817788151844667,
|
|
"learning_rate": 5.944327488873094e-06,
|
|
"loss": 1.3568150997161865,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 1.4882240611075748,
|
|
"grad_norm": 10.369456896344717,
|
|
"learning_rate": 5.940690053383795e-06,
|
|
"loss": 1.5332515239715576,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 1.4888605983450032,
|
|
"grad_norm": 12.169721343242498,
|
|
"learning_rate": 5.937052101704324e-06,
|
|
"loss": 1.5990650653839111,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 1.4894971355824316,
|
|
"grad_norm": 14.30655326063879,
|
|
"learning_rate": 5.933413635830953e-06,
|
|
"loss": 1.647970199584961,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 1.49013367281986,
|
|
"grad_norm": 19.416266648259818,
|
|
"learning_rate": 5.929774657760239e-06,
|
|
"loss": 1.4303776025772095,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 1.4907702100572884,
|
|
"grad_norm": 8.739110013653029,
|
|
"learning_rate": 5.9261351694890166e-06,
|
|
"loss": 1.4337981939315796,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 1.4914067472947168,
|
|
"grad_norm": 13.121068350218405,
|
|
"learning_rate": 5.922495173014406e-06,
|
|
"loss": 2.3536746501922607,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 1.4920432845321452,
|
|
"grad_norm": 13.086547441778615,
|
|
"learning_rate": 5.9188546703338024e-06,
|
|
"loss": 1.1129584312438965,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 1.4926798217695736,
|
|
"grad_norm": 12.599650011923513,
|
|
"learning_rate": 5.915213663444878e-06,
|
|
"loss": 1.907105565071106,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 1.493316359007002,
|
|
"grad_norm": 16.103481442488864,
|
|
"learning_rate": 5.911572154345584e-06,
|
|
"loss": 1.2642226219177246,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 1.4939528962444304,
|
|
"grad_norm": 6.702106812818325,
|
|
"learning_rate": 5.907930145034145e-06,
|
|
"loss": 0.8979045152664185,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 1.4945894334818588,
|
|
"grad_norm": 9.570102582838961,
|
|
"learning_rate": 5.904287637509065e-06,
|
|
"loss": 1.5047214031219482,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 1.4952259707192872,
|
|
"grad_norm": 14.410444106232045,
|
|
"learning_rate": 5.900644633769115e-06,
|
|
"loss": 2.863888740539551,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 1.4958625079567156,
|
|
"grad_norm": 10.733488379158382,
|
|
"learning_rate": 5.89700113581334e-06,
|
|
"loss": 1.1438448429107666,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 1.4964990451941438,
|
|
"grad_norm": 10.27487028623432,
|
|
"learning_rate": 5.893357145641059e-06,
|
|
"loss": 1.913692831993103,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 1.4971355824315722,
|
|
"grad_norm": 17.762700060064628,
|
|
"learning_rate": 5.889712665251859e-06,
|
|
"loss": 1.2852919101715088,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 1.4977721196690006,
|
|
"grad_norm": 8.948382406158954,
|
|
"learning_rate": 5.886067696645597e-06,
|
|
"loss": 1.6822280883789062,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 1.498408656906429,
|
|
"grad_norm": 17.53624658522359,
|
|
"learning_rate": 5.8824222418223965e-06,
|
|
"loss": 1.7187851667404175,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 1.4990451941438574,
|
|
"grad_norm": 9.515580296782518,
|
|
"learning_rate": 5.878776302782648e-06,
|
|
"loss": 1.5445634126663208,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 1.4996817313812858,
|
|
"grad_norm": 14.900367983746788,
|
|
"learning_rate": 5.87512988152701e-06,
|
|
"loss": 1.8200538158416748,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 1.5003182686187142,
|
|
"grad_norm": 11.66663984483443,
|
|
"learning_rate": 5.871482980056401e-06,
|
|
"loss": 1.5881679058074951,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 1.5009548058561426,
|
|
"grad_norm": 13.639376618960366,
|
|
"learning_rate": 5.867835600372008e-06,
|
|
"loss": 1.586029291152954,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 1.501591343093571,
|
|
"grad_norm": 13.085645464864266,
|
|
"learning_rate": 5.86418774447528e-06,
|
|
"loss": 0.9381835460662842,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 1.5022278803309994,
|
|
"grad_norm": 8.388082049487934,
|
|
"learning_rate": 5.860539414367921e-06,
|
|
"loss": 1.4036571979522705,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 1.5028644175684276,
|
|
"grad_norm": 11.032147030076258,
|
|
"learning_rate": 5.856890612051902e-06,
|
|
"loss": 1.1054632663726807,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 1.503500954805856,
|
|
"grad_norm": 6.449258628897456,
|
|
"learning_rate": 5.853241339529452e-06,
|
|
"loss": 1.6686400175094604,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 1.5041374920432844,
|
|
"grad_norm": 14.877905193213572,
|
|
"learning_rate": 5.849591598803056e-06,
|
|
"loss": 1.7445240020751953,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 1.5047740292807128,
|
|
"grad_norm": 13.48403607496307,
|
|
"learning_rate": 5.845941391875458e-06,
|
|
"loss": 1.478830337524414,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 1.5054105665181412,
|
|
"grad_norm": 12.624608988158526,
|
|
"learning_rate": 5.842290720749655e-06,
|
|
"loss": 1.498946189880371,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 1.5060471037555696,
|
|
"grad_norm": 10.248213528519813,
|
|
"learning_rate": 5.8386395874289e-06,
|
|
"loss": 1.4745010137557983,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 1.506683640992998,
|
|
"grad_norm": 15.927668266457836,
|
|
"learning_rate": 5.834987993916703e-06,
|
|
"loss": 2.136512279510498,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 1.5073201782304264,
|
|
"grad_norm": 10.552288301041084,
|
|
"learning_rate": 5.831335942216821e-06,
|
|
"loss": 0.8879401683807373,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 1.5079567154678548,
|
|
"grad_norm": 13.51108816180199,
|
|
"learning_rate": 5.827683434333266e-06,
|
|
"loss": 1.3464289903640747,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 1.5085932527052832,
|
|
"grad_norm": 7.8398218967830635,
|
|
"learning_rate": 5.824030472270298e-06,
|
|
"loss": 1.5729990005493164,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 1.5092297899427116,
|
|
"grad_norm": 16.613616433122516,
|
|
"learning_rate": 5.820377058032429e-06,
|
|
"loss": 1.59991455078125,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 1.50986632718014,
|
|
"grad_norm": 19.457630942072974,
|
|
"learning_rate": 5.816723193624419e-06,
|
|
"loss": 1.623780369758606,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 1.5105028644175684,
|
|
"grad_norm": 11.827239090313682,
|
|
"learning_rate": 5.81306888105127e-06,
|
|
"loss": 1.1282806396484375,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 1.5111394016549968,
|
|
"grad_norm": 11.807090303270067,
|
|
"learning_rate": 5.809414122318235e-06,
|
|
"loss": 1.6361347436904907,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 1.5117759388924252,
|
|
"grad_norm": 14.222246913731992,
|
|
"learning_rate": 5.805758919430812e-06,
|
|
"loss": 1.2901973724365234,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 1.5124124761298536,
|
|
"grad_norm": 9.621662642884392,
|
|
"learning_rate": 5.802103274394739e-06,
|
|
"loss": 1.0700773000717163,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 1.513049013367282,
|
|
"grad_norm": 12.445854880335943,
|
|
"learning_rate": 5.798447189216002e-06,
|
|
"loss": 1.4615952968597412,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 1.5136855506047104,
|
|
"grad_norm": 10.563388400964374,
|
|
"learning_rate": 5.79479066590082e-06,
|
|
"loss": 1.8412933349609375,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 1.5143220878421388,
|
|
"grad_norm": 10.105264788768947,
|
|
"learning_rate": 5.7911337064556635e-06,
|
|
"loss": 1.7908297777175903,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 1.5149586250795672,
|
|
"grad_norm": 15.031091034073702,
|
|
"learning_rate": 5.787476312887233e-06,
|
|
"loss": 1.5867691040039062,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 1.5155951623169956,
|
|
"grad_norm": 12.5518698581082,
|
|
"learning_rate": 5.783818487202472e-06,
|
|
"loss": 1.8068511486053467,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 1.516231699554424,
|
|
"grad_norm": 13.640365163199606,
|
|
"learning_rate": 5.780160231408559e-06,
|
|
"loss": 1.2256360054016113,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 1.5168682367918525,
|
|
"grad_norm": 16.753718900128565,
|
|
"learning_rate": 5.776501547512909e-06,
|
|
"loss": 1.6830406188964844,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 1.5175047740292809,
|
|
"grad_norm": 7.767233254984847,
|
|
"learning_rate": 5.772842437523175e-06,
|
|
"loss": 1.4654542207717896,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 1.5181413112667093,
|
|
"grad_norm": 13.135493185756758,
|
|
"learning_rate": 5.769182903447239e-06,
|
|
"loss": 1.823169231414795,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 1.5187778485041374,
|
|
"grad_norm": 11.192415782515821,
|
|
"learning_rate": 5.765522947293217e-06,
|
|
"loss": 1.5445574522018433,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 1.5194143857415658,
|
|
"grad_norm": 10.165697469491613,
|
|
"learning_rate": 5.76186257106946e-06,
|
|
"loss": 1.0204110145568848,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 1.5200509229789942,
|
|
"grad_norm": 15.312932633201065,
|
|
"learning_rate": 5.758201776784545e-06,
|
|
"loss": 1.7300519943237305,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 1.5206874602164226,
|
|
"grad_norm": 12.383392551599943,
|
|
"learning_rate": 5.7545405664472805e-06,
|
|
"loss": 0.9942628145217896,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 1.521323997453851,
|
|
"grad_norm": 15.634403404390396,
|
|
"learning_rate": 5.750878942066704e-06,
|
|
"loss": 1.2621705532073975,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 1.5219605346912795,
|
|
"grad_norm": 8.708812744078262,
|
|
"learning_rate": 5.74721690565208e-06,
|
|
"loss": 1.4440770149230957,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 1.5225970719287079,
|
|
"grad_norm": 21.633404286701108,
|
|
"learning_rate": 5.743554459212896e-06,
|
|
"loss": 1.27446448802948,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 1.5232336091661363,
|
|
"grad_norm": 13.130933163467803,
|
|
"learning_rate": 5.739891604758868e-06,
|
|
"loss": 1.325007438659668,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 1.5238701464035647,
|
|
"grad_norm": 12.790325736291626,
|
|
"learning_rate": 5.736228344299937e-06,
|
|
"loss": 1.3999392986297607,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 1.5245066836409928,
|
|
"grad_norm": 13.80507100462981,
|
|
"learning_rate": 5.73256467984626e-06,
|
|
"loss": 1.116119623184204,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 1.5251432208784212,
|
|
"grad_norm": 8.33182348678265,
|
|
"learning_rate": 5.728900613408225e-06,
|
|
"loss": 1.4327359199523926,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 1.5257797581158496,
|
|
"grad_norm": 10.471323325547097,
|
|
"learning_rate": 5.725236146996435e-06,
|
|
"loss": 1.56307053565979,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 1.526416295353278,
|
|
"grad_norm": 8.88639225392792,
|
|
"learning_rate": 5.72157128262171e-06,
|
|
"loss": 0.9844847917556763,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 1.5270528325907065,
|
|
"grad_norm": 9.56586278919019,
|
|
"learning_rate": 5.717906022295095e-06,
|
|
"loss": 1.6100908517837524,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 1.5276893698281349,
|
|
"grad_norm": 21.839175985650733,
|
|
"learning_rate": 5.714240368027849e-06,
|
|
"loss": 1.9893325567245483,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 1.5283259070655633,
|
|
"grad_norm": 8.588983401953769,
|
|
"learning_rate": 5.7105743218314445e-06,
|
|
"loss": 1.646536946296692,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 1.5289624443029917,
|
|
"grad_norm": 15.34499421303345,
|
|
"learning_rate": 5.706907885717577e-06,
|
|
"loss": 1.8654241561889648,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 1.52959898154042,
|
|
"grad_norm": 16.78750142696331,
|
|
"learning_rate": 5.703241061698144e-06,
|
|
"loss": 1.8605923652648926,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 1.5302355187778485,
|
|
"grad_norm": 8.32178861249778,
|
|
"learning_rate": 5.699573851785267e-06,
|
|
"loss": 1.4465360641479492,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 1.5308720560152769,
|
|
"grad_norm": 19.960113686351008,
|
|
"learning_rate": 5.6959062579912734e-06,
|
|
"loss": 1.6353950500488281,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 1.5315085932527053,
|
|
"grad_norm": 11.170449072193628,
|
|
"learning_rate": 5.692238282328704e-06,
|
|
"loss": 1.8010241985321045,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 1.5321451304901337,
|
|
"grad_norm": 10.21315576880138,
|
|
"learning_rate": 5.688569926810308e-06,
|
|
"loss": 1.8595669269561768,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 1.532781667727562,
|
|
"grad_norm": 16.1700537055414,
|
|
"learning_rate": 5.684901193449039e-06,
|
|
"loss": 1.5712897777557373,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 1.5334182049649905,
|
|
"grad_norm": 7.650369644512302,
|
|
"learning_rate": 5.681232084258067e-06,
|
|
"loss": 1.8795337677001953,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 1.5340547422024189,
|
|
"grad_norm": 10.860659163538914,
|
|
"learning_rate": 5.677562601250759e-06,
|
|
"loss": 1.3441723585128784,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 1.5346912794398473,
|
|
"grad_norm": 11.017657705757012,
|
|
"learning_rate": 5.673892746440693e-06,
|
|
"loss": 1.0673878192901611,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 1.5353278166772757,
|
|
"grad_norm": 11.87282419575077,
|
|
"learning_rate": 5.670222521841649e-06,
|
|
"loss": 2.402776002883911,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 1.535964353914704,
|
|
"grad_norm": 11.069682527119124,
|
|
"learning_rate": 5.666551929467609e-06,
|
|
"loss": 1.244581937789917,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 1.5366008911521325,
|
|
"grad_norm": 18.981452453693823,
|
|
"learning_rate": 5.662880971332761e-06,
|
|
"loss": 1.5043175220489502,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 1.537237428389561,
|
|
"grad_norm": 11.344567062763744,
|
|
"learning_rate": 5.6592096494514894e-06,
|
|
"loss": 2.5523786544799805,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 1.5378739656269893,
|
|
"grad_norm": 7.97452314720193,
|
|
"learning_rate": 5.655537965838376e-06,
|
|
"loss": 1.6291911602020264,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 1.5385105028644177,
|
|
"grad_norm": 10.080095765845345,
|
|
"learning_rate": 5.651865922508209e-06,
|
|
"loss": 2.0800302028656006,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 1.539147040101846,
|
|
"grad_norm": 34.72810949100083,
|
|
"learning_rate": 5.6481935214759665e-06,
|
|
"loss": 1.6141481399536133,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 1.5397835773392745,
|
|
"grad_norm": 12.48720555430153,
|
|
"learning_rate": 5.644520764756827e-06,
|
|
"loss": 2.1375699043273926,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 1.5404201145767027,
|
|
"grad_norm": 15.210646215132524,
|
|
"learning_rate": 5.640847654366164e-06,
|
|
"loss": 1.831993818283081,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 1.541056651814131,
|
|
"grad_norm": 15.851301537320069,
|
|
"learning_rate": 5.637174192319541e-06,
|
|
"loss": 1.8037670850753784,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 1.5416931890515595,
|
|
"grad_norm": 13.314051769818072,
|
|
"learning_rate": 5.633500380632723e-06,
|
|
"loss": 1.2829947471618652,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 1.542329726288988,
|
|
"grad_norm": 15.188898639694466,
|
|
"learning_rate": 5.629826221321657e-06,
|
|
"loss": 1.741872787475586,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 1.5429662635264163,
|
|
"grad_norm": 8.78353436178482,
|
|
"learning_rate": 5.626151716402486e-06,
|
|
"loss": 1.5937385559082031,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 1.5436028007638447,
|
|
"grad_norm": 10.59836028443109,
|
|
"learning_rate": 5.622476867891543e-06,
|
|
"loss": 1.3646893501281738,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 1.544239338001273,
|
|
"grad_norm": 9.844120096253837,
|
|
"learning_rate": 5.618801677805347e-06,
|
|
"loss": 1.365708589553833,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 1.5448758752387015,
|
|
"grad_norm": 11.504525050991903,
|
|
"learning_rate": 5.615126148160607e-06,
|
|
"loss": 1.3138048648834229,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 1.5455124124761297,
|
|
"grad_norm": 9.410993220717272,
|
|
"learning_rate": 5.611450280974218e-06,
|
|
"loss": 1.3601410388946533,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 1.546148949713558,
|
|
"grad_norm": 12.05500495032501,
|
|
"learning_rate": 5.607774078263256e-06,
|
|
"loss": 1.2759274244308472,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 1.5467854869509865,
|
|
"grad_norm": 9.13620329173272,
|
|
"learning_rate": 5.604097542044988e-06,
|
|
"loss": 1.2775843143463135,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 1.547422024188415,
|
|
"grad_norm": 13.704179908789458,
|
|
"learning_rate": 5.600420674336858e-06,
|
|
"loss": 1.4167678356170654,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 1.5480585614258433,
|
|
"grad_norm": 17.068954969593896,
|
|
"learning_rate": 5.596743477156496e-06,
|
|
"loss": 1.973123550415039,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 1.5486950986632717,
|
|
"grad_norm": 7.823725363411393,
|
|
"learning_rate": 5.593065952521712e-06,
|
|
"loss": 1.330206036567688,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 1.5493316359007,
|
|
"grad_norm": 11.234869141267719,
|
|
"learning_rate": 5.589388102450492e-06,
|
|
"loss": 1.5377652645111084,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 1.5499681731381285,
|
|
"grad_norm": 22.322730252272205,
|
|
"learning_rate": 5.585709928961007e-06,
|
|
"loss": 1.4899756908416748,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 1.550604710375557,
|
|
"grad_norm": 11.263512633909038,
|
|
"learning_rate": 5.582031434071598e-06,
|
|
"loss": 1.6189697980880737,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 1.5512412476129853,
|
|
"grad_norm": 9.78607233920165,
|
|
"learning_rate": 5.578352619800791e-06,
|
|
"loss": 1.5311734676361084,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 1.5518777848504137,
|
|
"grad_norm": 10.142816293555294,
|
|
"learning_rate": 5.574673488167279e-06,
|
|
"loss": 1.1967965364456177,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 1.5525143220878421,
|
|
"grad_norm": 19.320225330331976,
|
|
"learning_rate": 5.570994041189933e-06,
|
|
"loss": 1.6374616622924805,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 1.5531508593252705,
|
|
"grad_norm": 13.637618207149412,
|
|
"learning_rate": 5.5673142808877974e-06,
|
|
"loss": 1.9044063091278076,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 1.553787396562699,
|
|
"grad_norm": 11.081736434115335,
|
|
"learning_rate": 5.5636342092800895e-06,
|
|
"loss": 1.7417365312576294,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 1.5544239338001273,
|
|
"grad_norm": 10.429877995477352,
|
|
"learning_rate": 5.5599538283861944e-06,
|
|
"loss": 1.6746402978897095,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 1.5550604710375557,
|
|
"grad_norm": 24.387037640475516,
|
|
"learning_rate": 5.5562731402256666e-06,
|
|
"loss": 0.9348142147064209,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 1.5556970082749841,
|
|
"grad_norm": 15.02622300900033,
|
|
"learning_rate": 5.552592146818232e-06,
|
|
"loss": 1.704547643661499,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 1.5563335455124125,
|
|
"grad_norm": 11.53393011045004,
|
|
"learning_rate": 5.5489108501837855e-06,
|
|
"loss": 1.6509344577789307,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 1.556970082749841,
|
|
"grad_norm": 11.946294271093686,
|
|
"learning_rate": 5.545229252342381e-06,
|
|
"loss": 1.8902714252471924,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 1.5576066199872693,
|
|
"grad_norm": 8.345232669757912,
|
|
"learning_rate": 5.541547355314248e-06,
|
|
"loss": 1.7008066177368164,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 1.5582431572246978,
|
|
"grad_norm": 14.631995751980634,
|
|
"learning_rate": 5.53786516111977e-06,
|
|
"loss": 1.8355236053466797,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 1.5588796944621262,
|
|
"grad_norm": 10.202501297184629,
|
|
"learning_rate": 5.5341826717795e-06,
|
|
"loss": 1.3646094799041748,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 1.5595162316995546,
|
|
"grad_norm": 11.332643315382237,
|
|
"learning_rate": 5.530499889314152e-06,
|
|
"loss": 1.356947898864746,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 1.560152768936983,
|
|
"grad_norm": 18.041208568131353,
|
|
"learning_rate": 5.526816815744601e-06,
|
|
"loss": 1.8688534498214722,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 1.5607893061744114,
|
|
"grad_norm": 20.385089468087184,
|
|
"learning_rate": 5.523133453091877e-06,
|
|
"loss": 1.5583463907241821,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 1.5614258434118395,
|
|
"grad_norm": 10.911689392828249,
|
|
"learning_rate": 5.519449803377176e-06,
|
|
"loss": 1.2631714344024658,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 1.562062380649268,
|
|
"grad_norm": 12.433198469900724,
|
|
"learning_rate": 5.515765868621845e-06,
|
|
"loss": 1.6228747367858887,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 1.5626989178866963,
|
|
"grad_norm": 17.27844214745092,
|
|
"learning_rate": 5.512081650847393e-06,
|
|
"loss": 1.9856112003326416,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 1.5633354551241248,
|
|
"grad_norm": 7.399488641281761,
|
|
"learning_rate": 5.508397152075481e-06,
|
|
"loss": 0.9606994390487671,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 1.5639719923615532,
|
|
"grad_norm": 8.18254649238998,
|
|
"learning_rate": 5.504712374327923e-06,
|
|
"loss": 2.0466148853302,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 1.5646085295989816,
|
|
"grad_norm": 13.273908143367608,
|
|
"learning_rate": 5.501027319626693e-06,
|
|
"loss": 1.5686981678009033,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 1.56524506683641,
|
|
"grad_norm": 17.596294750850674,
|
|
"learning_rate": 5.497341989993904e-06,
|
|
"loss": 1.8726886510849,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 1.5658816040738384,
|
|
"grad_norm": 11.709153911604465,
|
|
"learning_rate": 5.493656387451834e-06,
|
|
"loss": 1.1663498878479004,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 1.5665181413112665,
|
|
"grad_norm": 12.373461059287882,
|
|
"learning_rate": 5.489970514022903e-06,
|
|
"loss": 1.1478195190429688,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 1.567154678548695,
|
|
"grad_norm": 12.126425305330907,
|
|
"learning_rate": 5.486284371729679e-06,
|
|
"loss": 1.1417909860610962,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 1.5677912157861233,
|
|
"grad_norm": 7.733955402071897,
|
|
"learning_rate": 5.482597962594883e-06,
|
|
"loss": 1.6889036893844604,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 1.5684277530235518,
|
|
"grad_norm": 7.730134513765488,
|
|
"learning_rate": 5.478911288641376e-06,
|
|
"loss": 1.6023591756820679,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 1.5690642902609802,
|
|
"grad_norm": 15.541811993059941,
|
|
"learning_rate": 5.475224351892171e-06,
|
|
"loss": 1.387468695640564,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 1.5697008274984086,
|
|
"grad_norm": 10.083374347297031,
|
|
"learning_rate": 5.4715371543704185e-06,
|
|
"loss": 1.9434430599212646,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 1.570337364735837,
|
|
"grad_norm": 16.39709004328844,
|
|
"learning_rate": 5.467849698099416e-06,
|
|
"loss": 1.7062137126922607,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 1.5709739019732654,
|
|
"grad_norm": 7.256369540200404,
|
|
"learning_rate": 5.464161985102604e-06,
|
|
"loss": 1.0557485818862915,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 1.5716104392106938,
|
|
"grad_norm": 8.870464485244176,
|
|
"learning_rate": 5.46047401740356e-06,
|
|
"loss": 1.541259765625,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 1.5722469764481222,
|
|
"grad_norm": 9.413632462509831,
|
|
"learning_rate": 5.456785797026004e-06,
|
|
"loss": 1.2600197792053223,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 1.5728835136855506,
|
|
"grad_norm": 10.456644794335347,
|
|
"learning_rate": 5.453097325993796e-06,
|
|
"loss": 1.3559761047363281,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 1.573520050922979,
|
|
"grad_norm": 13.882304324175811,
|
|
"learning_rate": 5.449408606330927e-06,
|
|
"loss": 1.2437595129013062,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 1.5741565881604074,
|
|
"grad_norm": 9.962278892638508,
|
|
"learning_rate": 5.445719640061533e-06,
|
|
"loss": 1.031199336051941,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 1.5747931253978358,
|
|
"grad_norm": 13.334460805236132,
|
|
"learning_rate": 5.442030429209881e-06,
|
|
"loss": 1.6973717212677002,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 1.5754296626352642,
|
|
"grad_norm": 11.320974865850614,
|
|
"learning_rate": 5.438340975800371e-06,
|
|
"loss": 1.422226905822754,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 1.5760661998726926,
|
|
"grad_norm": 14.987753764178102,
|
|
"learning_rate": 5.434651281857538e-06,
|
|
"loss": 1.8186506032943726,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 1.576702737110121,
|
|
"grad_norm": 10.98446346920428,
|
|
"learning_rate": 5.430961349406049e-06,
|
|
"loss": 1.5441179275512695,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 1.5773392743475494,
|
|
"grad_norm": 10.176140350382148,
|
|
"learning_rate": 5.427271180470701e-06,
|
|
"loss": 1.8851035833358765,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 1.5779758115849778,
|
|
"grad_norm": 10.319189885338636,
|
|
"learning_rate": 5.423580777076421e-06,
|
|
"loss": 1.3101853132247925,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 1.5786123488224062,
|
|
"grad_norm": 17.722473439859765,
|
|
"learning_rate": 5.419890141248267e-06,
|
|
"loss": 1.411855936050415,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 1.5792488860598346,
|
|
"grad_norm": 11.071651967569489,
|
|
"learning_rate": 5.416199275011421e-06,
|
|
"loss": 1.4189932346343994,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 1.579885423297263,
|
|
"grad_norm": 9.421983059684807,
|
|
"learning_rate": 5.412508180391192e-06,
|
|
"loss": 1.4346650838851929,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 1.5805219605346914,
|
|
"grad_norm": 11.813742057186015,
|
|
"learning_rate": 5.408816859413017e-06,
|
|
"loss": 1.2399206161499023,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 1.5811584977721198,
|
|
"grad_norm": 17.20658112273176,
|
|
"learning_rate": 5.405125314102454e-06,
|
|
"loss": 1.7983934879302979,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 1.5817950350095482,
|
|
"grad_norm": 12.90079430805244,
|
|
"learning_rate": 5.401433546485186e-06,
|
|
"loss": 1.5692299604415894,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 1.5824315722469766,
|
|
"grad_norm": 11.226849180867754,
|
|
"learning_rate": 5.397741558587019e-06,
|
|
"loss": 1.787600040435791,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 1.5830681094844048,
|
|
"grad_norm": 10.462778919604604,
|
|
"learning_rate": 5.3940493524338755e-06,
|
|
"loss": 1.2418570518493652,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 1.5837046467218332,
|
|
"grad_norm": 13.48895578831498,
|
|
"learning_rate": 5.390356930051803e-06,
|
|
"loss": 1.8791996240615845,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 1.5843411839592616,
|
|
"grad_norm": 8.943457777264197,
|
|
"learning_rate": 5.386664293466965e-06,
|
|
"loss": 1.3849817514419556,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 1.58497772119669,
|
|
"grad_norm": 7.084618489747768,
|
|
"learning_rate": 5.382971444705641e-06,
|
|
"loss": 1.5093352794647217,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 1.5856142584341184,
|
|
"grad_norm": 7.131231857259523,
|
|
"learning_rate": 5.379278385794231e-06,
|
|
"loss": 1.9903844594955444,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 1.5862507956715468,
|
|
"grad_norm": 9.948365368652288,
|
|
"learning_rate": 5.3755851187592476e-06,
|
|
"loss": 1.2966089248657227,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 1.5868873329089752,
|
|
"grad_norm": 9.351928892073133,
|
|
"learning_rate": 5.371891645627319e-06,
|
|
"loss": 1.3795301914215088,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 1.5875238701464036,
|
|
"grad_norm": 14.312100472969226,
|
|
"learning_rate": 5.3681979684251825e-06,
|
|
"loss": 1.3844339847564697,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 1.5881604073838318,
|
|
"grad_norm": 14.685176653500934,
|
|
"learning_rate": 5.364504089179693e-06,
|
|
"loss": 1.266568899154663,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 1.5887969446212602,
|
|
"grad_norm": 13.013869857277845,
|
|
"learning_rate": 5.3608100099178125e-06,
|
|
"loss": 1.5471580028533936,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 1.5894334818586886,
|
|
"grad_norm": 20.74069345556099,
|
|
"learning_rate": 5.357115732666616e-06,
|
|
"loss": 1.4884121417999268,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 1.590070019096117,
|
|
"grad_norm": 13.287116136135257,
|
|
"learning_rate": 5.353421259453283e-06,
|
|
"loss": 2.1138341426849365,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 1.5907065563335454,
|
|
"grad_norm": 9.592183280451499,
|
|
"learning_rate": 5.3497265923051035e-06,
|
|
"loss": 1.4981415271759033,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 1.5913430935709738,
|
|
"grad_norm": 9.406310225065061,
|
|
"learning_rate": 5.346031733249472e-06,
|
|
"loss": 0.8034936785697937,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 1.5919796308084022,
|
|
"grad_norm": 18.87792843664383,
|
|
"learning_rate": 5.342336684313893e-06,
|
|
"loss": 0.7998415231704712,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 1.5926161680458306,
|
|
"grad_norm": 10.08298596053083,
|
|
"learning_rate": 5.338641447525966e-06,
|
|
"loss": 1.3965051174163818,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 1.593252705283259,
|
|
"grad_norm": 14.175780476680286,
|
|
"learning_rate": 5.334946024913404e-06,
|
|
"loss": 1.2556719779968262,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 1.5938892425206874,
|
|
"grad_norm": 15.267074107268423,
|
|
"learning_rate": 5.3312504185040155e-06,
|
|
"loss": 2.0348596572875977,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 1.5945257797581158,
|
|
"grad_norm": 9.195903000153212,
|
|
"learning_rate": 5.327554630325711e-06,
|
|
"loss": 1.3694276809692383,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 1.5951623169955442,
|
|
"grad_norm": 12.841150269370587,
|
|
"learning_rate": 5.323858662406502e-06,
|
|
"loss": 1.2917990684509277,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 1.5957988542329726,
|
|
"grad_norm": 9.307524612673058,
|
|
"learning_rate": 5.3201625167744984e-06,
|
|
"loss": 1.1434298753738403,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 1.596435391470401,
|
|
"grad_norm": 8.718760930508182,
|
|
"learning_rate": 5.3164661954579045e-06,
|
|
"loss": 1.6025032997131348,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 1.5970719287078294,
|
|
"grad_norm": 12.735275486509599,
|
|
"learning_rate": 5.312769700485028e-06,
|
|
"loss": 1.7678284645080566,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 1.5977084659452578,
|
|
"grad_norm": 7.799973529517469,
|
|
"learning_rate": 5.309073033884262e-06,
|
|
"loss": 1.5079340934753418,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 1.5983450031826862,
|
|
"grad_norm": 8.154451083063064,
|
|
"learning_rate": 5.305376197684104e-06,
|
|
"loss": 1.562661051750183,
|
|
"step": 2511
|
|
},
|
|
{
|
|
"epoch": 1.5989815404201146,
|
|
"grad_norm": 8.15660927982753,
|
|
"learning_rate": 5.301679193913136e-06,
|
|
"loss": 1.6808500289916992,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 1.599618077657543,
|
|
"grad_norm": 9.094573426234467,
|
|
"learning_rate": 5.297982024600039e-06,
|
|
"loss": 1.566948413848877,
|
|
"step": 2513
|
|
},
|
|
{
|
|
"epoch": 1.6002546148949714,
|
|
"grad_norm": 34.31357429002888,
|
|
"learning_rate": 5.2942846917735816e-06,
|
|
"loss": 2.2005081176757812,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 1.6008911521323999,
|
|
"grad_norm": 13.782375923718162,
|
|
"learning_rate": 5.290587197462618e-06,
|
|
"loss": 1.8872666358947754,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 1.6015276893698283,
|
|
"grad_norm": 14.337836831748135,
|
|
"learning_rate": 5.2868895436961e-06,
|
|
"loss": 1.3187255859375,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 1.6021642266072567,
|
|
"grad_norm": 9.925545018631547,
|
|
"learning_rate": 5.28319173250306e-06,
|
|
"loss": 2.0606861114501953,
|
|
"step": 2517
|
|
},
|
|
{
|
|
"epoch": 1.602800763844685,
|
|
"grad_norm": 8.634133392617938,
|
|
"learning_rate": 5.279493765912618e-06,
|
|
"loss": 1.5563485622406006,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 1.6034373010821135,
|
|
"grad_norm": 14.27870926472679,
|
|
"learning_rate": 5.2757956459539824e-06,
|
|
"loss": 1.2816444635391235,
|
|
"step": 2519
|
|
},
|
|
{
|
|
"epoch": 1.6040738383195416,
|
|
"grad_norm": 7.42769119113961,
|
|
"learning_rate": 5.272097374656441e-06,
|
|
"loss": 1.4142775535583496,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 1.60471037555697,
|
|
"grad_norm": 10.609080901884434,
|
|
"learning_rate": 5.26839895404937e-06,
|
|
"loss": 1.1905722618103027,
|
|
"step": 2521
|
|
},
|
|
{
|
|
"epoch": 1.6053469127943985,
|
|
"grad_norm": 10.837566893071427,
|
|
"learning_rate": 5.264700386162221e-06,
|
|
"loss": 1.6138941049575806,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 1.6059834500318269,
|
|
"grad_norm": 10.63493174465722,
|
|
"learning_rate": 5.261001673024533e-06,
|
|
"loss": 1.6332387924194336,
|
|
"step": 2523
|
|
},
|
|
{
|
|
"epoch": 1.6066199872692553,
|
|
"grad_norm": 18.05268427884533,
|
|
"learning_rate": 5.257302816665921e-06,
|
|
"loss": 1.358574390411377,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 1.6072565245066837,
|
|
"grad_norm": 10.264455479855231,
|
|
"learning_rate": 5.2536038191160795e-06,
|
|
"loss": 1.794968843460083,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 1.607893061744112,
|
|
"grad_norm": 11.673489931037276,
|
|
"learning_rate": 5.24990468240478e-06,
|
|
"loss": 1.9878292083740234,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 1.6085295989815405,
|
|
"grad_norm": 9.934338495268458,
|
|
"learning_rate": 5.246205408561871e-06,
|
|
"loss": 1.2982629537582397,
|
|
"step": 2527
|
|
},
|
|
{
|
|
"epoch": 1.6091661362189686,
|
|
"grad_norm": 8.439367650438797,
|
|
"learning_rate": 5.242505999617275e-06,
|
|
"loss": 1.7368496656417847,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 1.609802673456397,
|
|
"grad_norm": 9.550554873364206,
|
|
"learning_rate": 5.23880645760099e-06,
|
|
"loss": 1.5525805950164795,
|
|
"step": 2529
|
|
},
|
|
{
|
|
"epoch": 1.6104392106938255,
|
|
"grad_norm": 19.304529934923032,
|
|
"learning_rate": 5.235106784543087e-06,
|
|
"loss": 1.269942045211792,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 1.6110757479312539,
|
|
"grad_norm": 15.007163801558022,
|
|
"learning_rate": 5.231406982473708e-06,
|
|
"loss": 1.7000211477279663,
|
|
"step": 2531
|
|
},
|
|
{
|
|
"epoch": 1.6117122851686823,
|
|
"grad_norm": 6.807140346409658,
|
|
"learning_rate": 5.2277070534230676e-06,
|
|
"loss": 1.319089412689209,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 1.6123488224061107,
|
|
"grad_norm": 11.928631242362574,
|
|
"learning_rate": 5.224006999421448e-06,
|
|
"loss": 1.4072086811065674,
|
|
"step": 2533
|
|
},
|
|
{
|
|
"epoch": 1.612985359643539,
|
|
"grad_norm": 11.328101118148659,
|
|
"learning_rate": 5.220306822499201e-06,
|
|
"loss": 1.5428425073623657,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 1.6136218968809675,
|
|
"grad_norm": 16.49286894082796,
|
|
"learning_rate": 5.216606524686744e-06,
|
|
"loss": 1.7508461475372314,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 1.6142584341183959,
|
|
"grad_norm": 16.35103594988865,
|
|
"learning_rate": 5.212906108014565e-06,
|
|
"loss": 1.1230452060699463,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 1.6148949713558243,
|
|
"grad_norm": 7.659799653499345,
|
|
"learning_rate": 5.209205574513213e-06,
|
|
"loss": 1.4330987930297852,
|
|
"step": 2537
|
|
},
|
|
{
|
|
"epoch": 1.6155315085932527,
|
|
"grad_norm": 10.221897463363504,
|
|
"learning_rate": 5.205504926213301e-06,
|
|
"loss": 1.6965951919555664,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 1.616168045830681,
|
|
"grad_norm": 13.112421908722954,
|
|
"learning_rate": 5.201804165145511e-06,
|
|
"loss": 1.099511742591858,
|
|
"step": 2539
|
|
},
|
|
{
|
|
"epoch": 1.6168045830681095,
|
|
"grad_norm": 10.853254721405618,
|
|
"learning_rate": 5.19810329334058e-06,
|
|
"loss": 1.3006181716918945,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 1.6174411203055379,
|
|
"grad_norm": 14.37833836842456,
|
|
"learning_rate": 5.194402312829308e-06,
|
|
"loss": 1.0019958019256592,
|
|
"step": 2541
|
|
},
|
|
{
|
|
"epoch": 1.6180776575429663,
|
|
"grad_norm": 17.54365619853365,
|
|
"learning_rate": 5.190701225642555e-06,
|
|
"loss": 1.5287961959838867,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 1.6187141947803947,
|
|
"grad_norm": 8.907175647988893,
|
|
"learning_rate": 5.18700003381124e-06,
|
|
"loss": 1.8033959865570068,
|
|
"step": 2543
|
|
},
|
|
{
|
|
"epoch": 1.619350732017823,
|
|
"grad_norm": 8.150764698477932,
|
|
"learning_rate": 5.183298739366339e-06,
|
|
"loss": 1.845013976097107,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 1.6199872692552515,
|
|
"grad_norm": 11.033085898851356,
|
|
"learning_rate": 5.1795973443388835e-06,
|
|
"loss": 1.9903559684753418,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 1.62062380649268,
|
|
"grad_norm": 17.502845998892802,
|
|
"learning_rate": 5.175895850759963e-06,
|
|
"loss": 1.7325098514556885,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 1.6212603437301083,
|
|
"grad_norm": 8.914388916100945,
|
|
"learning_rate": 5.172194260660716e-06,
|
|
"loss": 1.6158649921417236,
|
|
"step": 2547
|
|
},
|
|
{
|
|
"epoch": 1.6218968809675367,
|
|
"grad_norm": 23.376651275892883,
|
|
"learning_rate": 5.168492576072339e-06,
|
|
"loss": 1.8969463109970093,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 1.622533418204965,
|
|
"grad_norm": 7.7616691794319514,
|
|
"learning_rate": 5.164790799026078e-06,
|
|
"loss": 1.9469236135482788,
|
|
"step": 2549
|
|
},
|
|
{
|
|
"epoch": 1.6231699554423935,
|
|
"grad_norm": 11.448904373559774,
|
|
"learning_rate": 5.1610889315532305e-06,
|
|
"loss": 1.2270617485046387,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 1.623806492679822,
|
|
"grad_norm": 13.076404992383189,
|
|
"learning_rate": 5.157386975685141e-06,
|
|
"loss": 1.4642993211746216,
|
|
"step": 2551
|
|
},
|
|
{
|
|
"epoch": 1.6244430299172503,
|
|
"grad_norm": 8.66512216203333,
|
|
"learning_rate": 5.153684933453207e-06,
|
|
"loss": 1.241729974746704,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 1.6250795671546785,
|
|
"grad_norm": 11.325137785870561,
|
|
"learning_rate": 5.14998280688887e-06,
|
|
"loss": 1.555477261543274,
|
|
"step": 2553
|
|
},
|
|
{
|
|
"epoch": 1.625716104392107,
|
|
"grad_norm": 17.34661177920995,
|
|
"learning_rate": 5.14628059802362e-06,
|
|
"loss": 1.5977141857147217,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 1.6263526416295353,
|
|
"grad_norm": 12.576231546456594,
|
|
"learning_rate": 5.142578308888989e-06,
|
|
"loss": 1.2389638423919678,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 1.6269891788669637,
|
|
"grad_norm": 12.855957753677716,
|
|
"learning_rate": 5.138875941516557e-06,
|
|
"loss": 1.4821150302886963,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 1.627625716104392,
|
|
"grad_norm": 11.431309949656288,
|
|
"learning_rate": 5.1351734979379444e-06,
|
|
"loss": 1.5326590538024902,
|
|
"step": 2557
|
|
},
|
|
{
|
|
"epoch": 1.6282622533418205,
|
|
"grad_norm": 7.821937185891337,
|
|
"learning_rate": 5.1314709801848125e-06,
|
|
"loss": 1.4889559745788574,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 1.628898790579249,
|
|
"grad_norm": 9.49676349166614,
|
|
"learning_rate": 5.127768390288866e-06,
|
|
"loss": 1.6810004711151123,
|
|
"step": 2559
|
|
},
|
|
{
|
|
"epoch": 1.6295353278166773,
|
|
"grad_norm": 10.71161695931628,
|
|
"learning_rate": 5.124065730281851e-06,
|
|
"loss": 1.0240229368209839,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 1.6301718650541057,
|
|
"grad_norm": 11.268129447521497,
|
|
"learning_rate": 5.120363002195543e-06,
|
|
"loss": 1.7059662342071533,
|
|
"step": 2561
|
|
},
|
|
{
|
|
"epoch": 1.630808402291534,
|
|
"grad_norm": 14.221674921133687,
|
|
"learning_rate": 5.116660208061766e-06,
|
|
"loss": 1.7881436347961426,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 1.6314449395289623,
|
|
"grad_norm": 14.767107252921956,
|
|
"learning_rate": 5.1129573499123716e-06,
|
|
"loss": 2.1184029579162598,
|
|
"step": 2563
|
|
},
|
|
{
|
|
"epoch": 1.6320814767663907,
|
|
"grad_norm": 9.641730487301958,
|
|
"learning_rate": 5.109254429779253e-06,
|
|
"loss": 1.4992198944091797,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 1.632718014003819,
|
|
"grad_norm": 7.604590478701982,
|
|
"learning_rate": 5.105551449694333e-06,
|
|
"loss": 1.6534342765808105,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 1.6333545512412475,
|
|
"grad_norm": 9.55502878757529,
|
|
"learning_rate": 5.101848411689568e-06,
|
|
"loss": 1.5150446891784668,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 1.633991088478676,
|
|
"grad_norm": 9.899677596535001,
|
|
"learning_rate": 5.09814531779695e-06,
|
|
"loss": 1.0682098865509033,
|
|
"step": 2567
|
|
},
|
|
{
|
|
"epoch": 1.6346276257161043,
|
|
"grad_norm": 11.019149561544571,
|
|
"learning_rate": 5.0944421700484945e-06,
|
|
"loss": 1.5136057138442993,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 1.6352641629535327,
|
|
"grad_norm": 8.75181650552953,
|
|
"learning_rate": 5.090738970476254e-06,
|
|
"loss": 1.3633084297180176,
|
|
"step": 2569
|
|
},
|
|
{
|
|
"epoch": 1.6359007001909611,
|
|
"grad_norm": 15.60729853294145,
|
|
"learning_rate": 5.087035721112306e-06,
|
|
"loss": 1.7520954608917236,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 1.6365372374283895,
|
|
"grad_norm": 5.7399923227452225,
|
|
"learning_rate": 5.083332423988751e-06,
|
|
"loss": 0.6073510050773621,
|
|
"step": 2571
|
|
},
|
|
{
|
|
"epoch": 1.637173774665818,
|
|
"grad_norm": 8.930390639791247,
|
|
"learning_rate": 5.079629081137728e-06,
|
|
"loss": 1.3986365795135498,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 1.6378103119032463,
|
|
"grad_norm": 12.220633447031721,
|
|
"learning_rate": 5.075925694591387e-06,
|
|
"loss": 1.580613136291504,
|
|
"step": 2573
|
|
},
|
|
{
|
|
"epoch": 1.6384468491406747,
|
|
"grad_norm": 16.007715628823384,
|
|
"learning_rate": 5.07222226638191e-06,
|
|
"loss": 1.8403897285461426,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 1.6390833863781031,
|
|
"grad_norm": 16.745345585391913,
|
|
"learning_rate": 5.068518798541501e-06,
|
|
"loss": 1.9516756534576416,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 1.6397199236155315,
|
|
"grad_norm": 33.70732705282881,
|
|
"learning_rate": 5.064815293102383e-06,
|
|
"loss": 1.200810432434082,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 1.64035646085296,
|
|
"grad_norm": 12.366219401140018,
|
|
"learning_rate": 5.061111752096805e-06,
|
|
"loss": 0.7799091935157776,
|
|
"step": 2577
|
|
},
|
|
{
|
|
"epoch": 1.6409929980903883,
|
|
"grad_norm": 13.177154968084707,
|
|
"learning_rate": 5.057408177557028e-06,
|
|
"loss": 1.4837524890899658,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 1.6416295353278167,
|
|
"grad_norm": 9.513504824800297,
|
|
"learning_rate": 5.053704571515337e-06,
|
|
"loss": 1.6435632705688477,
|
|
"step": 2579
|
|
},
|
|
{
|
|
"epoch": 1.6422660725652451,
|
|
"grad_norm": 11.653974469164183,
|
|
"learning_rate": 5.0500009360040335e-06,
|
|
"loss": 1.476144790649414,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 1.6429026098026736,
|
|
"grad_norm": 14.882494814850851,
|
|
"learning_rate": 5.0462972730554316e-06,
|
|
"loss": 2.2405483722686768,
|
|
"step": 2581
|
|
},
|
|
{
|
|
"epoch": 1.643539147040102,
|
|
"grad_norm": 12.340221318641138,
|
|
"learning_rate": 5.042593584701867e-06,
|
|
"loss": 1.5619505643844604,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 1.6441756842775304,
|
|
"grad_norm": 12.679333565407244,
|
|
"learning_rate": 5.038889872975684e-06,
|
|
"loss": 2.5269551277160645,
|
|
"step": 2583
|
|
},
|
|
{
|
|
"epoch": 1.6448122215149588,
|
|
"grad_norm": 9.707074909772693,
|
|
"learning_rate": 5.035186139909239e-06,
|
|
"loss": 1.4109852313995361,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 1.6454487587523872,
|
|
"grad_norm": 9.739550403269806,
|
|
"learning_rate": 5.031482387534907e-06,
|
|
"loss": 2.023956060409546,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 1.6460852959898156,
|
|
"grad_norm": 10.453317414956889,
|
|
"learning_rate": 5.027778617885065e-06,
|
|
"loss": 1.4007673263549805,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 1.6467218332272437,
|
|
"grad_norm": 15.831567048732454,
|
|
"learning_rate": 5.024074832992105e-06,
|
|
"loss": 2.015324831008911,
|
|
"step": 2587
|
|
},
|
|
{
|
|
"epoch": 1.6473583704646722,
|
|
"grad_norm": 9.887873218708938,
|
|
"learning_rate": 5.020371034888426e-06,
|
|
"loss": 1.171660304069519,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 1.6479949077021006,
|
|
"grad_norm": 13.261114636419348,
|
|
"learning_rate": 5.016667225606434e-06,
|
|
"loss": 1.9868791103363037,
|
|
"step": 2589
|
|
},
|
|
{
|
|
"epoch": 1.648631444939529,
|
|
"grad_norm": 11.113892939147165,
|
|
"learning_rate": 5.01296340717854e-06,
|
|
"loss": 1.2732374668121338,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 1.6492679821769574,
|
|
"grad_norm": 9.880403389941325,
|
|
"learning_rate": 5.009259581637165e-06,
|
|
"loss": 1.531540036201477,
|
|
"step": 2591
|
|
},
|
|
{
|
|
"epoch": 1.6499045194143858,
|
|
"grad_norm": 10.854817982624324,
|
|
"learning_rate": 5.005555751014723e-06,
|
|
"loss": 1.3397401571273804,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 1.6505410566518142,
|
|
"grad_norm": 10.913040502838221,
|
|
"learning_rate": 5.001851917343647e-06,
|
|
"loss": 1.732617735862732,
|
|
"step": 2593
|
|
},
|
|
{
|
|
"epoch": 1.6511775938892426,
|
|
"grad_norm": 11.477416538848326,
|
|
"learning_rate": 4.998148082656356e-06,
|
|
"loss": 1.8442219495773315,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 1.6518141311266707,
|
|
"grad_norm": 9.798566088736951,
|
|
"learning_rate": 4.994444248985276e-06,
|
|
"loss": 1.4900766611099243,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 1.6524506683640992,
|
|
"grad_norm": 7.6013755458264365,
|
|
"learning_rate": 4.990740418362837e-06,
|
|
"loss": 0.8129755258560181,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 1.6530872056015276,
|
|
"grad_norm": 17.22161084772196,
|
|
"learning_rate": 4.987036592821459e-06,
|
|
"loss": 0.9461665153503418,
|
|
"step": 2597
|
|
},
|
|
{
|
|
"epoch": 1.653723742838956,
|
|
"grad_norm": 8.291848018121122,
|
|
"learning_rate": 4.983332774393568e-06,
|
|
"loss": 0.6947128176689148,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 1.6543602800763844,
|
|
"grad_norm": 11.197807245764695,
|
|
"learning_rate": 4.979628965111575e-06,
|
|
"loss": 1.4426134824752808,
|
|
"step": 2599
|
|
},
|
|
{
|
|
"epoch": 1.6549968173138128,
|
|
"grad_norm": 18.112945902640668,
|
|
"learning_rate": 4.9759251670078965e-06,
|
|
"loss": 1.5626939535140991,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 1.6556333545512412,
|
|
"grad_norm": 14.94374928312868,
|
|
"learning_rate": 4.972221382114936e-06,
|
|
"loss": 2.3489809036254883,
|
|
"step": 2601
|
|
},
|
|
{
|
|
"epoch": 1.6562698917886696,
|
|
"grad_norm": 13.568681793115239,
|
|
"learning_rate": 4.968517612465094e-06,
|
|
"loss": 1.7263020277023315,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 1.656906429026098,
|
|
"grad_norm": 7.3874521974438325,
|
|
"learning_rate": 4.964813860090763e-06,
|
|
"loss": 1.4950655698776245,
|
|
"step": 2603
|
|
},
|
|
{
|
|
"epoch": 1.6575429662635264,
|
|
"grad_norm": 11.905322353506724,
|
|
"learning_rate": 4.961110127024317e-06,
|
|
"loss": 1.5684951543807983,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 1.6581795035009548,
|
|
"grad_norm": 12.179213263352665,
|
|
"learning_rate": 4.9574064152981355e-06,
|
|
"loss": 1.3854238986968994,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 1.6588160407383832,
|
|
"grad_norm": 6.669421169405933,
|
|
"learning_rate": 4.95370272694457e-06,
|
|
"loss": 1.1074484586715698,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 1.6594525779758116,
|
|
"grad_norm": 11.107662432773974,
|
|
"learning_rate": 4.949999063995969e-06,
|
|
"loss": 1.2956738471984863,
|
|
"step": 2607
|
|
},
|
|
{
|
|
"epoch": 1.66008911521324,
|
|
"grad_norm": 8.723558893408683,
|
|
"learning_rate": 4.946295428484665e-06,
|
|
"loss": 1.7084920406341553,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 1.6607256524506684,
|
|
"grad_norm": 12.675058168280549,
|
|
"learning_rate": 4.942591822442975e-06,
|
|
"loss": 1.451248288154602,
|
|
"step": 2609
|
|
},
|
|
{
|
|
"epoch": 1.6613621896880968,
|
|
"grad_norm": 6.957939997001016,
|
|
"learning_rate": 4.938888247903196e-06,
|
|
"loss": 1.521910309791565,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 1.6619987269255252,
|
|
"grad_norm": 16.388122496129952,
|
|
"learning_rate": 4.935184706897619e-06,
|
|
"loss": 1.5063462257385254,
|
|
"step": 2611
|
|
},
|
|
{
|
|
"epoch": 1.6626352641629536,
|
|
"grad_norm": 12.627133798409567,
|
|
"learning_rate": 4.9314812014585e-06,
|
|
"loss": 2.344245433807373,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 1.663271801400382,
|
|
"grad_norm": 17.10431999242891,
|
|
"learning_rate": 4.927777733618092e-06,
|
|
"loss": 0.8642339110374451,
|
|
"step": 2613
|
|
},
|
|
{
|
|
"epoch": 1.6639083386378104,
|
|
"grad_norm": 8.360206683707844,
|
|
"learning_rate": 4.924074305408614e-06,
|
|
"loss": 1.2939438819885254,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 1.6645448758752388,
|
|
"grad_norm": 18.579337833934677,
|
|
"learning_rate": 4.920370918862274e-06,
|
|
"loss": 0.9887975454330444,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 1.6651814131126672,
|
|
"grad_norm": 11.852916789044158,
|
|
"learning_rate": 4.916667576011248e-06,
|
|
"loss": 1.2906113862991333,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 1.6658179503500956,
|
|
"grad_norm": 8.75643929770764,
|
|
"learning_rate": 4.912964278887696e-06,
|
|
"loss": 1.9068280458450317,
|
|
"step": 2617
|
|
},
|
|
{
|
|
"epoch": 1.666454487587524,
|
|
"grad_norm": 15.47526259851981,
|
|
"learning_rate": 4.9092610295237464e-06,
|
|
"loss": 1.1410505771636963,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 1.6670910248249524,
|
|
"grad_norm": 8.5861311208462,
|
|
"learning_rate": 4.905557829951506e-06,
|
|
"loss": 1.0436980724334717,
|
|
"step": 2619
|
|
},
|
|
{
|
|
"epoch": 1.6677275620623806,
|
|
"grad_norm": 13.336515755525518,
|
|
"learning_rate": 4.9018546822030535e-06,
|
|
"loss": 1.2206828594207764,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 1.668364099299809,
|
|
"grad_norm": 11.084065750932826,
|
|
"learning_rate": 4.8981515883104334e-06,
|
|
"loss": 1.3082199096679688,
|
|
"step": 2621
|
|
},
|
|
{
|
|
"epoch": 1.6690006365372374,
|
|
"grad_norm": 13.293188677416547,
|
|
"learning_rate": 4.894448550305669e-06,
|
|
"loss": 1.2076120376586914,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 1.6696371737746658,
|
|
"grad_norm": 13.921015014631717,
|
|
"learning_rate": 4.890745570220748e-06,
|
|
"loss": 2.084150552749634,
|
|
"step": 2623
|
|
},
|
|
{
|
|
"epoch": 1.6702737110120942,
|
|
"grad_norm": 10.879765103715789,
|
|
"learning_rate": 4.88704265008763e-06,
|
|
"loss": 1.5977354049682617,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 1.6709102482495226,
|
|
"grad_norm": 15.36979222502608,
|
|
"learning_rate": 4.883339791938236e-06,
|
|
"loss": 1.1671185493469238,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 1.671546785486951,
|
|
"grad_norm": 11.292821889433561,
|
|
"learning_rate": 4.8796369978044595e-06,
|
|
"loss": 1.3492268323898315,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 1.6721833227243794,
|
|
"grad_norm": 11.001488301329799,
|
|
"learning_rate": 4.875934269718151e-06,
|
|
"loss": 1.9842692613601685,
|
|
"step": 2627
|
|
},
|
|
{
|
|
"epoch": 1.6728198599618078,
|
|
"grad_norm": 8.484898229157698,
|
|
"learning_rate": 4.872231609711135e-06,
|
|
"loss": 1.5810469388961792,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 1.673456397199236,
|
|
"grad_norm": 9.00134511149233,
|
|
"learning_rate": 4.868529019815188e-06,
|
|
"loss": 1.4162614345550537,
|
|
"step": 2629
|
|
},
|
|
{
|
|
"epoch": 1.6740929344366644,
|
|
"grad_norm": 8.237923462320568,
|
|
"learning_rate": 4.864826502062058e-06,
|
|
"loss": 1.422102689743042,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 1.6747294716740928,
|
|
"grad_norm": 11.225714993475835,
|
|
"learning_rate": 4.861124058483444e-06,
|
|
"loss": 1.182719111442566,
|
|
"step": 2631
|
|
},
|
|
{
|
|
"epoch": 1.6753660089115212,
|
|
"grad_norm": 8.982227872237772,
|
|
"learning_rate": 4.857421691111012e-06,
|
|
"loss": 1.7780842781066895,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 1.6760025461489496,
|
|
"grad_norm": 16.986305284004324,
|
|
"learning_rate": 4.853719401976381e-06,
|
|
"loss": 2.028423547744751,
|
|
"step": 2633
|
|
},
|
|
{
|
|
"epoch": 1.676639083386378,
|
|
"grad_norm": 16.22236168730729,
|
|
"learning_rate": 4.850017193111132e-06,
|
|
"loss": 1.4137513637542725,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 1.6772756206238064,
|
|
"grad_norm": 17.063882116609896,
|
|
"learning_rate": 4.846315066546793e-06,
|
|
"loss": 1.62284517288208,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 1.6779121578612348,
|
|
"grad_norm": 14.463341304966352,
|
|
"learning_rate": 4.842613024314861e-06,
|
|
"loss": 1.4167145490646362,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 1.6785486950986632,
|
|
"grad_norm": 7.12265951247381,
|
|
"learning_rate": 4.838911068446772e-06,
|
|
"loss": 1.2699812650680542,
|
|
"step": 2637
|
|
},
|
|
{
|
|
"epoch": 1.6791852323360916,
|
|
"grad_norm": 11.560495425660083,
|
|
"learning_rate": 4.835209200973923e-06,
|
|
"loss": 1.7189140319824219,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 1.67982176957352,
|
|
"grad_norm": 9.284297454278853,
|
|
"learning_rate": 4.831507423927662e-06,
|
|
"loss": 1.8094840049743652,
|
|
"step": 2639
|
|
},
|
|
{
|
|
"epoch": 1.6804583068109484,
|
|
"grad_norm": 10.5547572694535,
|
|
"learning_rate": 4.8278057393392845e-06,
|
|
"loss": 1.5845067501068115,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 1.6810948440483768,
|
|
"grad_norm": 13.076491851412808,
|
|
"learning_rate": 4.82410414924004e-06,
|
|
"loss": 1.0932317972183228,
|
|
"step": 2641
|
|
},
|
|
{
|
|
"epoch": 1.6817313812858052,
|
|
"grad_norm": 15.369196486975776,
|
|
"learning_rate": 4.820402655661117e-06,
|
|
"loss": 2.6274807453155518,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 1.6823679185232336,
|
|
"grad_norm": 9.292207744571861,
|
|
"learning_rate": 4.816701260633663e-06,
|
|
"loss": 1.6815487146377563,
|
|
"step": 2643
|
|
},
|
|
{
|
|
"epoch": 1.683004455760662,
|
|
"grad_norm": 7.972676529087167,
|
|
"learning_rate": 4.812999966188761e-06,
|
|
"loss": 1.5698246955871582,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 1.6836409929980904,
|
|
"grad_norm": 8.171446802527788,
|
|
"learning_rate": 4.809298774357447e-06,
|
|
"loss": 1.4061400890350342,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 1.6842775302355188,
|
|
"grad_norm": 10.587272437483414,
|
|
"learning_rate": 4.805597687170693e-06,
|
|
"loss": 1.510296106338501,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 1.6849140674729473,
|
|
"grad_norm": 15.292650449536161,
|
|
"learning_rate": 4.801896706659421e-06,
|
|
"loss": 1.462324619293213,
|
|
"step": 2647
|
|
},
|
|
{
|
|
"epoch": 1.6855506047103757,
|
|
"grad_norm": 11.774977159376329,
|
|
"learning_rate": 4.7981958348544885e-06,
|
|
"loss": 1.1011741161346436,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 1.686187141947804,
|
|
"grad_norm": 9.09207607187413,
|
|
"learning_rate": 4.7944950737866995e-06,
|
|
"loss": 1.5601119995117188,
|
|
"step": 2649
|
|
},
|
|
{
|
|
"epoch": 1.6868236791852325,
|
|
"grad_norm": 7.4325759097753,
|
|
"learning_rate": 4.790794425486787e-06,
|
|
"loss": 1.452481746673584,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 1.6874602164226609,
|
|
"grad_norm": 5.863343330390682,
|
|
"learning_rate": 4.787093891985437e-06,
|
|
"loss": 1.1045328378677368,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 1.6880967536600893,
|
|
"grad_norm": 13.42455223072694,
|
|
"learning_rate": 4.7833934753132565e-06,
|
|
"loss": 1.9689677953720093,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 1.6887332908975177,
|
|
"grad_norm": 13.269272548547477,
|
|
"learning_rate": 4.7796931775008e-06,
|
|
"loss": 1.5194764137268066,
|
|
"step": 2653
|
|
},
|
|
{
|
|
"epoch": 1.6893698281349459,
|
|
"grad_norm": 7.962121367092392,
|
|
"learning_rate": 4.7759930005785545e-06,
|
|
"loss": 1.214949131011963,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 1.6900063653723743,
|
|
"grad_norm": 11.204364022067033,
|
|
"learning_rate": 4.772292946576933e-06,
|
|
"loss": 2.322883129119873,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 1.6906429026098027,
|
|
"grad_norm": 15.409102974042531,
|
|
"learning_rate": 4.768593017526294e-06,
|
|
"loss": 1.7185745239257812,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 1.691279439847231,
|
|
"grad_norm": 8.06740086429854,
|
|
"learning_rate": 4.764893215456915e-06,
|
|
"loss": 1.651207447052002,
|
|
"step": 2657
|
|
},
|
|
{
|
|
"epoch": 1.6919159770846595,
|
|
"grad_norm": 12.292151951284634,
|
|
"learning_rate": 4.7611935423990126e-06,
|
|
"loss": 1.7684534788131714,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 1.6925525143220879,
|
|
"grad_norm": 19.782795435990266,
|
|
"learning_rate": 4.757494000382728e-06,
|
|
"loss": 1.3692883253097534,
|
|
"step": 2659
|
|
},
|
|
{
|
|
"epoch": 1.6931890515595163,
|
|
"grad_norm": 11.36673899932888,
|
|
"learning_rate": 4.753794591438132e-06,
|
|
"loss": 1.336172342300415,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 1.6938255887969447,
|
|
"grad_norm": 14.702726471415264,
|
|
"learning_rate": 4.750095317595221e-06,
|
|
"loss": 1.4075257778167725,
|
|
"step": 2661
|
|
},
|
|
{
|
|
"epoch": 1.6944621260343729,
|
|
"grad_norm": 13.13559681156395,
|
|
"learning_rate": 4.746396180883922e-06,
|
|
"loss": 2.2185723781585693,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 1.6950986632718013,
|
|
"grad_norm": 13.993201185079574,
|
|
"learning_rate": 4.742697183334079e-06,
|
|
"loss": 1.713407278060913,
|
|
"step": 2663
|
|
},
|
|
{
|
|
"epoch": 1.6957352005092297,
|
|
"grad_norm": 11.250450854267283,
|
|
"learning_rate": 4.7389983269754685e-06,
|
|
"loss": 1.4078516960144043,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 1.696371737746658,
|
|
"grad_norm": 8.866806251529898,
|
|
"learning_rate": 4.7352996138377795e-06,
|
|
"loss": 1.0841351747512817,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 1.6970082749840865,
|
|
"grad_norm": 15.22251665328589,
|
|
"learning_rate": 4.731601045950632e-06,
|
|
"loss": 2.2208127975463867,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 1.6976448122215149,
|
|
"grad_norm": 12.755584434542216,
|
|
"learning_rate": 4.72790262534356e-06,
|
|
"loss": 1.541381597518921,
|
|
"step": 2667
|
|
},
|
|
{
|
|
"epoch": 1.6982813494589433,
|
|
"grad_norm": 12.973773040042227,
|
|
"learning_rate": 4.724204354046019e-06,
|
|
"loss": 4.1180739402771,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 1.6989178866963717,
|
|
"grad_norm": 14.865288813598779,
|
|
"learning_rate": 4.720506234087382e-06,
|
|
"loss": 1.2021582126617432,
|
|
"step": 2669
|
|
},
|
|
{
|
|
"epoch": 1.6995544239338,
|
|
"grad_norm": 9.45945210243453,
|
|
"learning_rate": 4.7168082674969414e-06,
|
|
"loss": 1.6852741241455078,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 1.7001909611712285,
|
|
"grad_norm": 11.74632445740345,
|
|
"learning_rate": 4.713110456303902e-06,
|
|
"loss": 1.4227819442749023,
|
|
"step": 2671
|
|
},
|
|
{
|
|
"epoch": 1.7008274984086569,
|
|
"grad_norm": 12.312044268760175,
|
|
"learning_rate": 4.709412802537383e-06,
|
|
"loss": 1.231907844543457,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 1.7014640356460853,
|
|
"grad_norm": 8.019791070796293,
|
|
"learning_rate": 4.705715308226422e-06,
|
|
"loss": 1.3580896854400635,
|
|
"step": 2673
|
|
},
|
|
{
|
|
"epoch": 1.7021005728835137,
|
|
"grad_norm": 11.25609712511326,
|
|
"learning_rate": 4.702017975399962e-06,
|
|
"loss": 1.5906083583831787,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 1.702737110120942,
|
|
"grad_norm": 13.351174211208962,
|
|
"learning_rate": 4.698320806086865e-06,
|
|
"loss": 1.5613493919372559,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 1.7033736473583705,
|
|
"grad_norm": 10.067603950433067,
|
|
"learning_rate": 4.694623802315897e-06,
|
|
"loss": 1.824951410293579,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 1.704010184595799,
|
|
"grad_norm": 8.619097293119305,
|
|
"learning_rate": 4.69092696611574e-06,
|
|
"loss": 1.9785186052322388,
|
|
"step": 2677
|
|
},
|
|
{
|
|
"epoch": 1.7046467218332273,
|
|
"grad_norm": 24.780299073551443,
|
|
"learning_rate": 4.687230299514973e-06,
|
|
"loss": 2.073528289794922,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 1.7052832590706557,
|
|
"grad_norm": 10.394760869831988,
|
|
"learning_rate": 4.683533804542096e-06,
|
|
"loss": 1.5442249774932861,
|
|
"step": 2679
|
|
},
|
|
{
|
|
"epoch": 1.705919796308084,
|
|
"grad_norm": 9.109112189999545,
|
|
"learning_rate": 4.679837483225502e-06,
|
|
"loss": 1.3770475387573242,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 1.7065563335455125,
|
|
"grad_norm": 11.288737093630047,
|
|
"learning_rate": 4.6761413375934985e-06,
|
|
"loss": 1.0954097509384155,
|
|
"step": 2681
|
|
},
|
|
{
|
|
"epoch": 1.707192870782941,
|
|
"grad_norm": 7.6022964620470885,
|
|
"learning_rate": 4.672445369674289e-06,
|
|
"loss": 1.6249165534973145,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 1.7078294080203693,
|
|
"grad_norm": 12.311050123793805,
|
|
"learning_rate": 4.668749581495985e-06,
|
|
"loss": 2.0466883182525635,
|
|
"step": 2683
|
|
},
|
|
{
|
|
"epoch": 1.7084659452577977,
|
|
"grad_norm": 9.652623133287916,
|
|
"learning_rate": 4.665053975086596e-06,
|
|
"loss": 1.3413220643997192,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 1.7091024824952261,
|
|
"grad_norm": 14.407945494916643,
|
|
"learning_rate": 4.661358552474035e-06,
|
|
"loss": 1.534101963043213,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 1.7097390197326545,
|
|
"grad_norm": 13.699681515021897,
|
|
"learning_rate": 4.657663315686108e-06,
|
|
"loss": 1.7639039754867554,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 1.7103755569700827,
|
|
"grad_norm": 9.835204331445263,
|
|
"learning_rate": 4.653968266750529e-06,
|
|
"loss": 1.663576364517212,
|
|
"step": 2687
|
|
},
|
|
{
|
|
"epoch": 1.711012094207511,
|
|
"grad_norm": 8.29213353900224,
|
|
"learning_rate": 4.650273407694899e-06,
|
|
"loss": 1.8401124477386475,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 1.7116486314449395,
|
|
"grad_norm": 16.21116667459892,
|
|
"learning_rate": 4.646578740546719e-06,
|
|
"loss": 2.051877498626709,
|
|
"step": 2689
|
|
},
|
|
{
|
|
"epoch": 1.712285168682368,
|
|
"grad_norm": 10.334816052918256,
|
|
"learning_rate": 4.642884267333386e-06,
|
|
"loss": 2.043306350708008,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 1.7129217059197963,
|
|
"grad_norm": 10.694960629551327,
|
|
"learning_rate": 4.639189990082188e-06,
|
|
"loss": 1.655669927597046,
|
|
"step": 2691
|
|
},
|
|
{
|
|
"epoch": 1.7135582431572247,
|
|
"grad_norm": 9.789722825125446,
|
|
"learning_rate": 4.63549591082031e-06,
|
|
"loss": 2.1007449626922607,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 1.7141947803946531,
|
|
"grad_norm": 10.841182097179676,
|
|
"learning_rate": 4.631802031574818e-06,
|
|
"loss": 1.9260590076446533,
|
|
"step": 2693
|
|
},
|
|
{
|
|
"epoch": 1.7148313176320815,
|
|
"grad_norm": 9.661695123917502,
|
|
"learning_rate": 4.628108354372684e-06,
|
|
"loss": 1.5422745943069458,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 1.7154678548695097,
|
|
"grad_norm": 11.278926682171598,
|
|
"learning_rate": 4.624414881240753e-06,
|
|
"loss": 1.5441336631774902,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 1.716104392106938,
|
|
"grad_norm": 9.370853240561832,
|
|
"learning_rate": 4.62072161420577e-06,
|
|
"loss": 1.4866554737091064,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 1.7167409293443665,
|
|
"grad_norm": 10.634180490901398,
|
|
"learning_rate": 4.6170285552943596e-06,
|
|
"loss": 1.3072073459625244,
|
|
"step": 2697
|
|
},
|
|
{
|
|
"epoch": 1.717377466581795,
|
|
"grad_norm": 10.154650081060982,
|
|
"learning_rate": 4.613335706533036e-06,
|
|
"loss": 1.5072894096374512,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 1.7180140038192233,
|
|
"grad_norm": 8.639145327939204,
|
|
"learning_rate": 4.609643069948198e-06,
|
|
"loss": 1.424002766609192,
|
|
"step": 2699
|
|
},
|
|
{
|
|
"epoch": 1.7186505410566517,
|
|
"grad_norm": 14.166335213503675,
|
|
"learning_rate": 4.605950647566126e-06,
|
|
"loss": 1.6846305131912231,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 1.7192870782940801,
|
|
"grad_norm": 9.192112419179182,
|
|
"learning_rate": 4.602258441412981e-06,
|
|
"loss": 1.3232005834579468,
|
|
"step": 2701
|
|
},
|
|
{
|
|
"epoch": 1.7199236155315085,
|
|
"grad_norm": 15.282658775457303,
|
|
"learning_rate": 4.598566453514815e-06,
|
|
"loss": 1.3494807481765747,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 1.720560152768937,
|
|
"grad_norm": 7.554112889870477,
|
|
"learning_rate": 4.594874685897547e-06,
|
|
"loss": 1.5970849990844727,
|
|
"step": 2703
|
|
},
|
|
{
|
|
"epoch": 1.7211966900063653,
|
|
"grad_norm": 12.5940463637466,
|
|
"learning_rate": 4.591183140586984e-06,
|
|
"loss": 1.9829490184783936,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 1.7218332272437937,
|
|
"grad_norm": 8.357527203466196,
|
|
"learning_rate": 4.58749181960881e-06,
|
|
"loss": 1.2931435108184814,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 1.7224697644812221,
|
|
"grad_norm": 13.079920830712595,
|
|
"learning_rate": 4.58380072498858e-06,
|
|
"loss": 1.723933219909668,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 1.7231063017186505,
|
|
"grad_norm": 15.043892445958704,
|
|
"learning_rate": 4.580109858751735e-06,
|
|
"loss": 1.5980844497680664,
|
|
"step": 2707
|
|
},
|
|
{
|
|
"epoch": 1.723742838956079,
|
|
"grad_norm": 13.773161210386057,
|
|
"learning_rate": 4.5764192229235806e-06,
|
|
"loss": 1.5282667875289917,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 1.7243793761935073,
|
|
"grad_norm": 13.30959014815685,
|
|
"learning_rate": 4.5727288195293015e-06,
|
|
"loss": 1.3275375366210938,
|
|
"step": 2709
|
|
},
|
|
{
|
|
"epoch": 1.7250159134309357,
|
|
"grad_norm": 12.88024093785115,
|
|
"learning_rate": 4.569038650593953e-06,
|
|
"loss": 1.5965644121170044,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 1.7256524506683641,
|
|
"grad_norm": 9.375123164985691,
|
|
"learning_rate": 4.565348718142464e-06,
|
|
"loss": 0.7428637742996216,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 1.7262889879057925,
|
|
"grad_norm": 15.354425070286716,
|
|
"learning_rate": 4.561659024199631e-06,
|
|
"loss": 1.6778018474578857,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 1.726925525143221,
|
|
"grad_norm": 10.325063708220172,
|
|
"learning_rate": 4.5579695707901206e-06,
|
|
"loss": 0.8608949780464172,
|
|
"step": 2713
|
|
},
|
|
{
|
|
"epoch": 1.7275620623806494,
|
|
"grad_norm": 9.69379985752633,
|
|
"learning_rate": 4.554280359938467e-06,
|
|
"loss": 1.8087621927261353,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 1.7281985996180778,
|
|
"grad_norm": 10.391679895250121,
|
|
"learning_rate": 4.550591393669075e-06,
|
|
"loss": 1.2912559509277344,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 1.7288351368555062,
|
|
"grad_norm": 9.509452341637738,
|
|
"learning_rate": 4.546902674006205e-06,
|
|
"loss": 1.325238823890686,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 1.7294716740929346,
|
|
"grad_norm": 10.031245400265288,
|
|
"learning_rate": 4.543214202973997e-06,
|
|
"loss": 1.3821557760238647,
|
|
"step": 2717
|
|
},
|
|
{
|
|
"epoch": 1.730108211330363,
|
|
"grad_norm": 7.8763825659782825,
|
|
"learning_rate": 4.539525982596441e-06,
|
|
"loss": 1.4219706058502197,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 1.7307447485677914,
|
|
"grad_norm": 8.131227606844485,
|
|
"learning_rate": 4.535838014897397e-06,
|
|
"loss": 1.3258090019226074,
|
|
"step": 2719
|
|
},
|
|
{
|
|
"epoch": 1.7313812858052198,
|
|
"grad_norm": 15.438585926443563,
|
|
"learning_rate": 4.5321503019005845e-06,
|
|
"loss": 1.742460012435913,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 1.732017823042648,
|
|
"grad_norm": 10.980552780427807,
|
|
"learning_rate": 4.528462845629582e-06,
|
|
"loss": 1.804673671722412,
|
|
"step": 2721
|
|
},
|
|
{
|
|
"epoch": 1.7326543602800764,
|
|
"grad_norm": 12.225911915985892,
|
|
"learning_rate": 4.5247756481078316e-06,
|
|
"loss": 1.4501913785934448,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 1.7332908975175048,
|
|
"grad_norm": 9.058366703392052,
|
|
"learning_rate": 4.521088711358625e-06,
|
|
"loss": 2.058661699295044,
|
|
"step": 2723
|
|
},
|
|
{
|
|
"epoch": 1.7339274347549332,
|
|
"grad_norm": 14.457279486215986,
|
|
"learning_rate": 4.5174020374051196e-06,
|
|
"loss": 2.465367317199707,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 1.7345639719923616,
|
|
"grad_norm": 9.968762813500518,
|
|
"learning_rate": 4.513715628270322e-06,
|
|
"loss": 1.7616548538208008,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 1.73520050922979,
|
|
"grad_norm": 19.23324342547291,
|
|
"learning_rate": 4.5100294859771e-06,
|
|
"loss": 1.5484445095062256,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 1.7358370464672184,
|
|
"grad_norm": 9.05027175359414,
|
|
"learning_rate": 4.506343612548167e-06,
|
|
"loss": 1.5154013633728027,
|
|
"step": 2727
|
|
},
|
|
{
|
|
"epoch": 1.7364735837046468,
|
|
"grad_norm": 16.351489583050718,
|
|
"learning_rate": 4.502658010006099e-06,
|
|
"loss": 1.9099916219711304,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 1.737110120942075,
|
|
"grad_norm": 14.012444498061342,
|
|
"learning_rate": 4.498972680373309e-06,
|
|
"loss": 1.546755075454712,
|
|
"step": 2729
|
|
},
|
|
{
|
|
"epoch": 1.7377466581795034,
|
|
"grad_norm": 9.658306319539678,
|
|
"learning_rate": 4.495287625672078e-06,
|
|
"loss": 1.8375014066696167,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 1.7383831954169318,
|
|
"grad_norm": 10.236324118990494,
|
|
"learning_rate": 4.49160284792452e-06,
|
|
"loss": 1.5359485149383545,
|
|
"step": 2731
|
|
},
|
|
{
|
|
"epoch": 1.7390197326543602,
|
|
"grad_norm": 15.876697672007449,
|
|
"learning_rate": 4.487918349152608e-06,
|
|
"loss": 1.550153136253357,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 1.7396562698917886,
|
|
"grad_norm": 10.035313981197161,
|
|
"learning_rate": 4.484234131378156e-06,
|
|
"loss": 1.5224084854125977,
|
|
"step": 2733
|
|
},
|
|
{
|
|
"epoch": 1.740292807129217,
|
|
"grad_norm": 16.188799288719043,
|
|
"learning_rate": 4.480550196622826e-06,
|
|
"loss": 1.6584681272506714,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 1.7409293443666454,
|
|
"grad_norm": 24.536301582125997,
|
|
"learning_rate": 4.476866546908123e-06,
|
|
"loss": 1.3278266191482544,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 1.7415658816040738,
|
|
"grad_norm": 13.660091693006471,
|
|
"learning_rate": 4.4731831842554005e-06,
|
|
"loss": 1.5143094062805176,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 1.7422024188415022,
|
|
"grad_norm": 12.097725864268526,
|
|
"learning_rate": 4.4695001106858476e-06,
|
|
"loss": 1.1598591804504395,
|
|
"step": 2737
|
|
},
|
|
{
|
|
"epoch": 1.7428389560789306,
|
|
"grad_norm": 6.817629496229504,
|
|
"learning_rate": 4.465817328220501e-06,
|
|
"loss": 1.4929516315460205,
|
|
"step": 2738
|
|
},
|
|
{
|
|
"epoch": 1.743475493316359,
|
|
"grad_norm": 12.298725463933952,
|
|
"learning_rate": 4.462134838880232e-06,
|
|
"loss": 1.676002025604248,
|
|
"step": 2739
|
|
},
|
|
{
|
|
"epoch": 1.7441120305537874,
|
|
"grad_norm": 12.790239051801278,
|
|
"learning_rate": 4.458452644685755e-06,
|
|
"loss": 1.4403189420700073,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 1.7447485677912158,
|
|
"grad_norm": 15.214184935419688,
|
|
"learning_rate": 4.454770747657621e-06,
|
|
"loss": 1.8207788467407227,
|
|
"step": 2741
|
|
},
|
|
{
|
|
"epoch": 1.7453851050286442,
|
|
"grad_norm": 18.147328398579585,
|
|
"learning_rate": 4.451089149816217e-06,
|
|
"loss": 1.6734029054641724,
|
|
"step": 2742
|
|
},
|
|
{
|
|
"epoch": 1.7460216422660726,
|
|
"grad_norm": 17.008129338089915,
|
|
"learning_rate": 4.44740785318177e-06,
|
|
"loss": 1.4844833612442017,
|
|
"step": 2743
|
|
},
|
|
{
|
|
"epoch": 1.746658179503501,
|
|
"grad_norm": 19.944149069153134,
|
|
"learning_rate": 4.443726859774335e-06,
|
|
"loss": 1.705742359161377,
|
|
"step": 2744
|
|
},
|
|
{
|
|
"epoch": 1.7472947167409294,
|
|
"grad_norm": 9.00735539820465,
|
|
"learning_rate": 4.440046171613809e-06,
|
|
"loss": 1.8046600818634033,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 1.7479312539783578,
|
|
"grad_norm": 11.165564361332601,
|
|
"learning_rate": 4.436365790719911e-06,
|
|
"loss": 1.7548118829727173,
|
|
"step": 2746
|
|
},
|
|
{
|
|
"epoch": 1.7485677912157862,
|
|
"grad_norm": 12.905787308633506,
|
|
"learning_rate": 4.432685719112203e-06,
|
|
"loss": 2.3659415245056152,
|
|
"step": 2747
|
|
},
|
|
{
|
|
"epoch": 1.7492043284532146,
|
|
"grad_norm": 10.468041323057008,
|
|
"learning_rate": 4.429005958810068e-06,
|
|
"loss": 1.8399016857147217,
|
|
"step": 2748
|
|
},
|
|
{
|
|
"epoch": 1.749840865690643,
|
|
"grad_norm": 16.12950367376264,
|
|
"learning_rate": 4.425326511832722e-06,
|
|
"loss": 1.7204582691192627,
|
|
"step": 2749
|
|
},
|
|
{
|
|
"epoch": 1.7504774029280714,
|
|
"grad_norm": 15.272007450644736,
|
|
"learning_rate": 4.421647380199209e-06,
|
|
"loss": 1.4919688701629639,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 1.7511139401654998,
|
|
"grad_norm": 7.684914591730195,
|
|
"learning_rate": 4.417968565928402e-06,
|
|
"loss": 1.036765694618225,
|
|
"step": 2751
|
|
},
|
|
{
|
|
"epoch": 1.7517504774029282,
|
|
"grad_norm": 14.096573359376926,
|
|
"learning_rate": 4.414290071038993e-06,
|
|
"loss": 1.4351688623428345,
|
|
"step": 2752
|
|
},
|
|
{
|
|
"epoch": 1.7523870146403566,
|
|
"grad_norm": 8.972784578658036,
|
|
"learning_rate": 4.410611897549508e-06,
|
|
"loss": 1.400472640991211,
|
|
"step": 2753
|
|
},
|
|
{
|
|
"epoch": 1.7530235518777848,
|
|
"grad_norm": 15.530583142097809,
|
|
"learning_rate": 4.406934047478289e-06,
|
|
"loss": 1.7428488731384277,
|
|
"step": 2754
|
|
},
|
|
{
|
|
"epoch": 1.7536600891152132,
|
|
"grad_norm": 9.999208053693803,
|
|
"learning_rate": 4.4032565228435045e-06,
|
|
"loss": 1.6773875951766968,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 1.7542966263526416,
|
|
"grad_norm": 13.141219762306736,
|
|
"learning_rate": 4.399579325663143e-06,
|
|
"loss": 1.8271172046661377,
|
|
"step": 2756
|
|
},
|
|
{
|
|
"epoch": 1.75493316359007,
|
|
"grad_norm": 9.646887729468569,
|
|
"learning_rate": 4.395902457955013e-06,
|
|
"loss": 1.2367219924926758,
|
|
"step": 2757
|
|
},
|
|
{
|
|
"epoch": 1.7555697008274984,
|
|
"grad_norm": 11.672356507759368,
|
|
"learning_rate": 4.392225921736747e-06,
|
|
"loss": 1.7831553220748901,
|
|
"step": 2758
|
|
},
|
|
{
|
|
"epoch": 1.7562062380649268,
|
|
"grad_norm": 11.740461262184539,
|
|
"learning_rate": 4.3885497190257846e-06,
|
|
"loss": 1.3299363851547241,
|
|
"step": 2759
|
|
},
|
|
{
|
|
"epoch": 1.7568427753023552,
|
|
"grad_norm": 14.282444391041624,
|
|
"learning_rate": 4.3848738518393955e-06,
|
|
"loss": 1.4146277904510498,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 1.7574793125397836,
|
|
"grad_norm": 11.946687790079677,
|
|
"learning_rate": 4.381198322194655e-06,
|
|
"loss": 1.8097727298736572,
|
|
"step": 2761
|
|
},
|
|
{
|
|
"epoch": 1.7581158497772118,
|
|
"grad_norm": 8.509222426900719,
|
|
"learning_rate": 4.37752313210846e-06,
|
|
"loss": 2.181358575820923,
|
|
"step": 2762
|
|
},
|
|
{
|
|
"epoch": 1.7587523870146402,
|
|
"grad_norm": 9.877851555069872,
|
|
"learning_rate": 4.373848283597515e-06,
|
|
"loss": 1.792051911354065,
|
|
"step": 2763
|
|
},
|
|
{
|
|
"epoch": 1.7593889242520686,
|
|
"grad_norm": 7.880989115735572,
|
|
"learning_rate": 4.370173778678345e-06,
|
|
"loss": 1.6314315795898438,
|
|
"step": 2764
|
|
},
|
|
{
|
|
"epoch": 1.760025461489497,
|
|
"grad_norm": 11.511721765097521,
|
|
"learning_rate": 4.366499619367278e-06,
|
|
"loss": 1.300513744354248,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 1.7606619987269254,
|
|
"grad_norm": 8.235853079907368,
|
|
"learning_rate": 4.36282580768046e-06,
|
|
"loss": 1.2249748706817627,
|
|
"step": 2766
|
|
},
|
|
{
|
|
"epoch": 1.7612985359643538,
|
|
"grad_norm": 15.517502556500824,
|
|
"learning_rate": 4.359152345633837e-06,
|
|
"loss": 1.568852186203003,
|
|
"step": 2767
|
|
},
|
|
{
|
|
"epoch": 1.7619350732017822,
|
|
"grad_norm": 13.196318433501105,
|
|
"learning_rate": 4.355479235243175e-06,
|
|
"loss": 1.5850822925567627,
|
|
"step": 2768
|
|
},
|
|
{
|
|
"epoch": 1.7625716104392106,
|
|
"grad_norm": 9.491915527963855,
|
|
"learning_rate": 4.351806478524034e-06,
|
|
"loss": 1.2834690809249878,
|
|
"step": 2769
|
|
},
|
|
{
|
|
"epoch": 1.763208147676639,
|
|
"grad_norm": 8.13351804237787,
|
|
"learning_rate": 4.348134077491793e-06,
|
|
"loss": 1.5589559078216553,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 1.7638446849140674,
|
|
"grad_norm": 9.71242858538455,
|
|
"learning_rate": 4.344462034161625e-06,
|
|
"loss": 1.8301423788070679,
|
|
"step": 2771
|
|
},
|
|
{
|
|
"epoch": 1.7644812221514958,
|
|
"grad_norm": 13.56209659158831,
|
|
"learning_rate": 4.340790350548512e-06,
|
|
"loss": 1.9303207397460938,
|
|
"step": 2772
|
|
},
|
|
{
|
|
"epoch": 1.7651177593889242,
|
|
"grad_norm": 9.37162818204778,
|
|
"learning_rate": 4.33711902866724e-06,
|
|
"loss": 1.2622904777526855,
|
|
"step": 2773
|
|
},
|
|
{
|
|
"epoch": 1.7657542966263526,
|
|
"grad_norm": 9.21554839176676,
|
|
"learning_rate": 4.3334480705323915e-06,
|
|
"loss": 1.9959380626678467,
|
|
"step": 2774
|
|
},
|
|
{
|
|
"epoch": 1.766390833863781,
|
|
"grad_norm": 7.683670140357001,
|
|
"learning_rate": 4.329777478158353e-06,
|
|
"loss": 1.663625955581665,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 1.7670273711012094,
|
|
"grad_norm": 11.91599494046607,
|
|
"learning_rate": 4.3261072535593085e-06,
|
|
"loss": 2.1813929080963135,
|
|
"step": 2776
|
|
},
|
|
{
|
|
"epoch": 1.7676639083386378,
|
|
"grad_norm": 13.039022995026968,
|
|
"learning_rate": 4.322437398749243e-06,
|
|
"loss": 1.7397382259368896,
|
|
"step": 2777
|
|
},
|
|
{
|
|
"epoch": 1.7683004455760662,
|
|
"grad_norm": 8.714024062774746,
|
|
"learning_rate": 4.318767915741935e-06,
|
|
"loss": 1.4001973867416382,
|
|
"step": 2778
|
|
},
|
|
{
|
|
"epoch": 1.7689369828134947,
|
|
"grad_norm": 9.16784933246916,
|
|
"learning_rate": 4.315098806550962e-06,
|
|
"loss": 1.0145443677902222,
|
|
"step": 2779
|
|
},
|
|
{
|
|
"epoch": 1.769573520050923,
|
|
"grad_norm": 10.226845400525429,
|
|
"learning_rate": 4.311430073189694e-06,
|
|
"loss": 1.6936746835708618,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 1.7702100572883515,
|
|
"grad_norm": 10.063227134440265,
|
|
"learning_rate": 4.307761717671298e-06,
|
|
"loss": 2.4931836128234863,
|
|
"step": 2781
|
|
},
|
|
{
|
|
"epoch": 1.7708465945257799,
|
|
"grad_norm": 16.45329187318938,
|
|
"learning_rate": 4.304093742008727e-06,
|
|
"loss": 1.7186734676361084,
|
|
"step": 2782
|
|
},
|
|
{
|
|
"epoch": 1.7714831317632083,
|
|
"grad_norm": 9.545672658717793,
|
|
"learning_rate": 4.300426148214735e-06,
|
|
"loss": 1.5424296855926514,
|
|
"step": 2783
|
|
},
|
|
{
|
|
"epoch": 1.7721196690006367,
|
|
"grad_norm": 12.51102086704486,
|
|
"learning_rate": 4.296758938301857e-06,
|
|
"loss": 1.3923466205596924,
|
|
"step": 2784
|
|
},
|
|
{
|
|
"epoch": 1.772756206238065,
|
|
"grad_norm": 14.011935924875702,
|
|
"learning_rate": 4.293092114282426e-06,
|
|
"loss": 2.013211727142334,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 1.7733927434754935,
|
|
"grad_norm": 7.779708386248659,
|
|
"learning_rate": 4.289425678168555e-06,
|
|
"loss": 1.4323763847351074,
|
|
"step": 2786
|
|
},
|
|
{
|
|
"epoch": 1.7740292807129217,
|
|
"grad_norm": 12.065057462565743,
|
|
"learning_rate": 4.285759631972152e-06,
|
|
"loss": 1.068676233291626,
|
|
"step": 2787
|
|
},
|
|
{
|
|
"epoch": 1.77466581795035,
|
|
"grad_norm": 14.248003851494179,
|
|
"learning_rate": 4.282093977704906e-06,
|
|
"loss": 1.8798476457595825,
|
|
"step": 2788
|
|
},
|
|
{
|
|
"epoch": 1.7753023551877785,
|
|
"grad_norm": 11.232685981221792,
|
|
"learning_rate": 4.278428717378292e-06,
|
|
"loss": 3.5437092781066895,
|
|
"step": 2789
|
|
},
|
|
{
|
|
"epoch": 1.7759388924252069,
|
|
"grad_norm": 11.183540202316731,
|
|
"learning_rate": 4.274763853003569e-06,
|
|
"loss": 1.4824597835540771,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 1.7765754296626353,
|
|
"grad_norm": 13.802222955664616,
|
|
"learning_rate": 4.271099386591776e-06,
|
|
"loss": 1.8571828603744507,
|
|
"step": 2791
|
|
},
|
|
{
|
|
"epoch": 1.7772119669000637,
|
|
"grad_norm": 9.628240361912486,
|
|
"learning_rate": 4.267435320153741e-06,
|
|
"loss": 1.421258807182312,
|
|
"step": 2792
|
|
},
|
|
{
|
|
"epoch": 1.777848504137492,
|
|
"grad_norm": 10.805433128164388,
|
|
"learning_rate": 4.263771655700065e-06,
|
|
"loss": 1.3578135967254639,
|
|
"step": 2793
|
|
},
|
|
{
|
|
"epoch": 1.7784850413749205,
|
|
"grad_norm": 9.825479299999373,
|
|
"learning_rate": 4.260108395241134e-06,
|
|
"loss": 1.8957493305206299,
|
|
"step": 2794
|
|
},
|
|
{
|
|
"epoch": 1.7791215786123489,
|
|
"grad_norm": 14.76523941209378,
|
|
"learning_rate": 4.256445540787105e-06,
|
|
"loss": 1.4083436727523804,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 1.779758115849777,
|
|
"grad_norm": 17.658230056553823,
|
|
"learning_rate": 4.252783094347923e-06,
|
|
"loss": 1.2712535858154297,
|
|
"step": 2796
|
|
},
|
|
{
|
|
"epoch": 1.7803946530872055,
|
|
"grad_norm": 12.532150521837378,
|
|
"learning_rate": 4.249121057933297e-06,
|
|
"loss": 1.4908297061920166,
|
|
"step": 2797
|
|
},
|
|
{
|
|
"epoch": 1.7810311903246339,
|
|
"grad_norm": 17.047390143890933,
|
|
"learning_rate": 4.245459433552721e-06,
|
|
"loss": 2.1099019050598145,
|
|
"step": 2798
|
|
},
|
|
{
|
|
"epoch": 1.7816677275620623,
|
|
"grad_norm": 17.241080570413338,
|
|
"learning_rate": 4.241798223215456e-06,
|
|
"loss": 1.5396265983581543,
|
|
"step": 2799
|
|
},
|
|
{
|
|
"epoch": 1.7823042647994907,
|
|
"grad_norm": 8.765223068782607,
|
|
"learning_rate": 4.238137428930542e-06,
|
|
"loss": 1.4360289573669434,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 1.782940802036919,
|
|
"grad_norm": 17.34606194623906,
|
|
"learning_rate": 4.234477052706784e-06,
|
|
"loss": 0.9871362447738647,
|
|
"step": 2801
|
|
},
|
|
{
|
|
"epoch": 1.7835773392743475,
|
|
"grad_norm": 12.55512283517256,
|
|
"learning_rate": 4.230817096552762e-06,
|
|
"loss": 1.7521235942840576,
|
|
"step": 2802
|
|
},
|
|
{
|
|
"epoch": 1.7842138765117759,
|
|
"grad_norm": 12.308565339677186,
|
|
"learning_rate": 4.227157562476826e-06,
|
|
"loss": 1.4013760089874268,
|
|
"step": 2803
|
|
},
|
|
{
|
|
"epoch": 1.7848504137492043,
|
|
"grad_norm": 10.723269316446883,
|
|
"learning_rate": 4.223498452487092e-06,
|
|
"loss": 1.6195989847183228,
|
|
"step": 2804
|
|
},
|
|
{
|
|
"epoch": 1.7854869509866327,
|
|
"grad_norm": 8.628473860906015,
|
|
"learning_rate": 4.2198397685914425e-06,
|
|
"loss": 1.5191609859466553,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 1.786123488224061,
|
|
"grad_norm": 9.935647980668826,
|
|
"learning_rate": 4.21618151279753e-06,
|
|
"loss": 1.8672910928726196,
|
|
"step": 2806
|
|
},
|
|
{
|
|
"epoch": 1.7867600254614895,
|
|
"grad_norm": 6.068502730361051,
|
|
"learning_rate": 4.212523687112769e-06,
|
|
"loss": 1.6413987874984741,
|
|
"step": 2807
|
|
},
|
|
{
|
|
"epoch": 1.787396562698918,
|
|
"grad_norm": 17.78448689223399,
|
|
"learning_rate": 4.208866293544338e-06,
|
|
"loss": 2.192814350128174,
|
|
"step": 2808
|
|
},
|
|
{
|
|
"epoch": 1.7880330999363463,
|
|
"grad_norm": 7.8161475752745435,
|
|
"learning_rate": 4.205209334099182e-06,
|
|
"loss": 2.1916379928588867,
|
|
"step": 2809
|
|
},
|
|
{
|
|
"epoch": 1.7886696371737747,
|
|
"grad_norm": 11.403672726304672,
|
|
"learning_rate": 4.2015528107839996e-06,
|
|
"loss": 1.607360601425171,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 1.789306174411203,
|
|
"grad_norm": 11.625093642522316,
|
|
"learning_rate": 4.197896725605263e-06,
|
|
"loss": 2.0406980514526367,
|
|
"step": 2811
|
|
},
|
|
{
|
|
"epoch": 1.7899427116486315,
|
|
"grad_norm": 13.724398599204527,
|
|
"learning_rate": 4.1942410805691896e-06,
|
|
"loss": 0.8421260118484497,
|
|
"step": 2812
|
|
},
|
|
{
|
|
"epoch": 1.79057924888606,
|
|
"grad_norm": 7.647726118571639,
|
|
"learning_rate": 4.190585877681766e-06,
|
|
"loss": 1.7024935483932495,
|
|
"step": 2813
|
|
},
|
|
{
|
|
"epoch": 1.7912157861234883,
|
|
"grad_norm": 12.4303655102848,
|
|
"learning_rate": 4.186931118948731e-06,
|
|
"loss": 1.6000210046768188,
|
|
"step": 2814
|
|
},
|
|
{
|
|
"epoch": 1.7918523233609167,
|
|
"grad_norm": 12.559554822963113,
|
|
"learning_rate": 4.183276806375584e-06,
|
|
"loss": 1.8290843963623047,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 1.7924888605983451,
|
|
"grad_norm": 10.582583743391472,
|
|
"learning_rate": 4.179622941967571e-06,
|
|
"loss": 1.9351472854614258,
|
|
"step": 2816
|
|
},
|
|
{
|
|
"epoch": 1.7931253978357735,
|
|
"grad_norm": 6.851608509117778,
|
|
"learning_rate": 4.175969527729704e-06,
|
|
"loss": 1.5427175760269165,
|
|
"step": 2817
|
|
},
|
|
{
|
|
"epoch": 1.793761935073202,
|
|
"grad_norm": 18.944559369270223,
|
|
"learning_rate": 4.172316565666735e-06,
|
|
"loss": 1.5095255374908447,
|
|
"step": 2818
|
|
},
|
|
{
|
|
"epoch": 1.7943984723106303,
|
|
"grad_norm": 13.99424007088511,
|
|
"learning_rate": 4.16866405778318e-06,
|
|
"loss": 1.5849189758300781,
|
|
"step": 2819
|
|
},
|
|
{
|
|
"epoch": 1.7950350095480587,
|
|
"grad_norm": 9.020531963095502,
|
|
"learning_rate": 4.165012006083298e-06,
|
|
"loss": 1.1962220668792725,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 1.795671546785487,
|
|
"grad_norm": 10.760225463876855,
|
|
"learning_rate": 4.161360412571101e-06,
|
|
"loss": 1.3352441787719727,
|
|
"step": 2821
|
|
},
|
|
{
|
|
"epoch": 1.7963080840229153,
|
|
"grad_norm": 9.095159526474035,
|
|
"learning_rate": 4.157709279250346e-06,
|
|
"loss": 1.5690834522247314,
|
|
"step": 2822
|
|
},
|
|
{
|
|
"epoch": 1.7969446212603437,
|
|
"grad_norm": 9.124062592636774,
|
|
"learning_rate": 4.154058608124543e-06,
|
|
"loss": 1.7572336196899414,
|
|
"step": 2823
|
|
},
|
|
{
|
|
"epoch": 1.7975811584977721,
|
|
"grad_norm": 11.155238440406327,
|
|
"learning_rate": 4.150408401196946e-06,
|
|
"loss": 1.1329994201660156,
|
|
"step": 2824
|
|
},
|
|
{
|
|
"epoch": 1.7982176957352005,
|
|
"grad_norm": 15.493019195835576,
|
|
"learning_rate": 4.14675866047055e-06,
|
|
"loss": 1.4053606986999512,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 1.798854232972629,
|
|
"grad_norm": 15.211171874329507,
|
|
"learning_rate": 4.1431093879481e-06,
|
|
"loss": 1.3623132705688477,
|
|
"step": 2826
|
|
},
|
|
{
|
|
"epoch": 1.7994907702100573,
|
|
"grad_norm": 13.133712319487321,
|
|
"learning_rate": 4.139460585632081e-06,
|
|
"loss": 1.4178318977355957,
|
|
"step": 2827
|
|
},
|
|
{
|
|
"epoch": 1.8001273074474857,
|
|
"grad_norm": 10.699256194063082,
|
|
"learning_rate": 4.135812255524723e-06,
|
|
"loss": 1.45753812789917,
|
|
"step": 2828
|
|
},
|
|
{
|
|
"epoch": 1.800763844684914,
|
|
"grad_norm": 26.809440286239152,
|
|
"learning_rate": 4.132164399627993e-06,
|
|
"loss": 1.496826410293579,
|
|
"step": 2829
|
|
},
|
|
{
|
|
"epoch": 1.8014003819223423,
|
|
"grad_norm": 12.57082790356433,
|
|
"learning_rate": 4.1285170199436e-06,
|
|
"loss": 1.5901217460632324,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 1.8020369191597707,
|
|
"grad_norm": 10.532688344602471,
|
|
"learning_rate": 4.124870118472992e-06,
|
|
"loss": 2.642929792404175,
|
|
"step": 2831
|
|
},
|
|
{
|
|
"epoch": 1.8026734563971991,
|
|
"grad_norm": 11.077436237440404,
|
|
"learning_rate": 4.121223697217354e-06,
|
|
"loss": 1.6081825494766235,
|
|
"step": 2832
|
|
},
|
|
{
|
|
"epoch": 1.8033099936346275,
|
|
"grad_norm": 12.664290795705691,
|
|
"learning_rate": 4.117577758177604e-06,
|
|
"loss": 1.4939444065093994,
|
|
"step": 2833
|
|
},
|
|
{
|
|
"epoch": 1.803946530872056,
|
|
"grad_norm": 11.856528120460341,
|
|
"learning_rate": 4.113932303354405e-06,
|
|
"loss": 1.7006382942199707,
|
|
"step": 2834
|
|
},
|
|
{
|
|
"epoch": 1.8045830681094843,
|
|
"grad_norm": 15.82601670872119,
|
|
"learning_rate": 4.110287334748141e-06,
|
|
"loss": 1.1850695610046387,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 1.8052196053469127,
|
|
"grad_norm": 9.943366762495765,
|
|
"learning_rate": 4.106642854358942e-06,
|
|
"loss": 1.621656894683838,
|
|
"step": 2836
|
|
},
|
|
{
|
|
"epoch": 1.8058561425843411,
|
|
"grad_norm": 9.600068209440105,
|
|
"learning_rate": 4.102998864186661e-06,
|
|
"loss": 1.1193110942840576,
|
|
"step": 2837
|
|
},
|
|
{
|
|
"epoch": 1.8064926798217695,
|
|
"grad_norm": 12.835685292632498,
|
|
"learning_rate": 4.099355366230887e-06,
|
|
"loss": 1.2406222820281982,
|
|
"step": 2838
|
|
},
|
|
{
|
|
"epoch": 1.807129217059198,
|
|
"grad_norm": 12.449219178421625,
|
|
"learning_rate": 4.095712362490935e-06,
|
|
"loss": 1.3829739093780518,
|
|
"step": 2839
|
|
},
|
|
{
|
|
"epoch": 1.8077657542966263,
|
|
"grad_norm": 41.33211392686159,
|
|
"learning_rate": 4.092069854965856e-06,
|
|
"loss": 3.00693678855896,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 1.8084022915340547,
|
|
"grad_norm": 15.632638203964227,
|
|
"learning_rate": 4.088427845654418e-06,
|
|
"loss": 1.4938067197799683,
|
|
"step": 2841
|
|
},
|
|
{
|
|
"epoch": 1.8090388287714831,
|
|
"grad_norm": 22.81446834451609,
|
|
"learning_rate": 4.084786336555124e-06,
|
|
"loss": 1.9294646978378296,
|
|
"step": 2842
|
|
},
|
|
{
|
|
"epoch": 1.8096753660089115,
|
|
"grad_norm": 18.78376746011658,
|
|
"learning_rate": 4.0811453296662e-06,
|
|
"loss": 1.6908161640167236,
|
|
"step": 2843
|
|
},
|
|
{
|
|
"epoch": 1.81031190324634,
|
|
"grad_norm": 8.078771065082595,
|
|
"learning_rate": 4.0775048269855955e-06,
|
|
"loss": 1.3115285634994507,
|
|
"step": 2844
|
|
},
|
|
{
|
|
"epoch": 1.8109484404837684,
|
|
"grad_norm": 8.175003735662003,
|
|
"learning_rate": 4.073864830510985e-06,
|
|
"loss": 1.3126105070114136,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 1.8115849777211968,
|
|
"grad_norm": 17.900707543853887,
|
|
"learning_rate": 4.070225342239763e-06,
|
|
"loss": 2.337632179260254,
|
|
"step": 2846
|
|
},
|
|
{
|
|
"epoch": 1.8122215149586252,
|
|
"grad_norm": 16.942782810165667,
|
|
"learning_rate": 4.066586364169049e-06,
|
|
"loss": 1.7859814167022705,
|
|
"step": 2847
|
|
},
|
|
{
|
|
"epoch": 1.8128580521960536,
|
|
"grad_norm": 9.346466278405702,
|
|
"learning_rate": 4.062947898295677e-06,
|
|
"loss": 1.3761142492294312,
|
|
"step": 2848
|
|
},
|
|
{
|
|
"epoch": 1.813494589433482,
|
|
"grad_norm": 10.113135117790584,
|
|
"learning_rate": 4.059309946616206e-06,
|
|
"loss": 1.356911063194275,
|
|
"step": 2849
|
|
},
|
|
{
|
|
"epoch": 1.8141311266709104,
|
|
"grad_norm": 13.562865840191234,
|
|
"learning_rate": 4.055672511126907e-06,
|
|
"loss": 1.37875497341156,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 1.8147676639083388,
|
|
"grad_norm": 7.885191606778201,
|
|
"learning_rate": 4.052035593823772e-06,
|
|
"loss": 1.5034464597702026,
|
|
"step": 2851
|
|
},
|
|
{
|
|
"epoch": 1.8154042011457672,
|
|
"grad_norm": 17.376643446227266,
|
|
"learning_rate": 4.048399196702506e-06,
|
|
"loss": 1.2730200290679932,
|
|
"step": 2852
|
|
},
|
|
{
|
|
"epoch": 1.8160407383831956,
|
|
"grad_norm": 23.47641940891411,
|
|
"learning_rate": 4.044763321758532e-06,
|
|
"loss": 1.4678232669830322,
|
|
"step": 2853
|
|
},
|
|
{
|
|
"epoch": 1.8166772756206238,
|
|
"grad_norm": 10.334364788346084,
|
|
"learning_rate": 4.041127970986981e-06,
|
|
"loss": 1.3934051990509033,
|
|
"step": 2854
|
|
},
|
|
{
|
|
"epoch": 1.8173138128580522,
|
|
"grad_norm": 17.405189949747346,
|
|
"learning_rate": 4.037493146382705e-06,
|
|
"loss": 1.7411506175994873,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 1.8179503500954806,
|
|
"grad_norm": 10.249084254626336,
|
|
"learning_rate": 4.0338588499402554e-06,
|
|
"loss": 1.3829255104064941,
|
|
"step": 2856
|
|
},
|
|
{
|
|
"epoch": 1.818586887332909,
|
|
"grad_norm": 9.37668767905887,
|
|
"learning_rate": 4.030225083653905e-06,
|
|
"loss": 2.6021153926849365,
|
|
"step": 2857
|
|
},
|
|
{
|
|
"epoch": 1.8192234245703374,
|
|
"grad_norm": 10.928394393961895,
|
|
"learning_rate": 4.026591849517628e-06,
|
|
"loss": 1.741642951965332,
|
|
"step": 2858
|
|
},
|
|
{
|
|
"epoch": 1.8198599618077658,
|
|
"grad_norm": 10.465955855411217,
|
|
"learning_rate": 4.02295914952511e-06,
|
|
"loss": 1.0957083702087402,
|
|
"step": 2859
|
|
},
|
|
{
|
|
"epoch": 1.8204964990451942,
|
|
"grad_norm": 11.356543633241605,
|
|
"learning_rate": 4.019326985669746e-06,
|
|
"loss": 2.1968047618865967,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 1.8211330362826226,
|
|
"grad_norm": 9.778457182295519,
|
|
"learning_rate": 4.015695359944628e-06,
|
|
"loss": 2.30609393119812,
|
|
"step": 2861
|
|
},
|
|
{
|
|
"epoch": 1.821769573520051,
|
|
"grad_norm": 13.123023423602369,
|
|
"learning_rate": 4.012064274342566e-06,
|
|
"loss": 1.8456538915634155,
|
|
"step": 2862
|
|
},
|
|
{
|
|
"epoch": 1.8224061107574792,
|
|
"grad_norm": 8.063328834273289,
|
|
"learning_rate": 4.0084337308560596e-06,
|
|
"loss": 1.5011341571807861,
|
|
"step": 2863
|
|
},
|
|
{
|
|
"epoch": 1.8230426479949076,
|
|
"grad_norm": 10.625179441651248,
|
|
"learning_rate": 4.004803731477321e-06,
|
|
"loss": 1.461496353149414,
|
|
"step": 2864
|
|
},
|
|
{
|
|
"epoch": 1.823679185232336,
|
|
"grad_norm": 18.777330914781412,
|
|
"learning_rate": 4.001174278198258e-06,
|
|
"loss": 1.1114155054092407,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 1.8243157224697644,
|
|
"grad_norm": 7.930945458759789,
|
|
"learning_rate": 3.997545373010484e-06,
|
|
"loss": 2.230154514312744,
|
|
"step": 2866
|
|
},
|
|
{
|
|
"epoch": 1.8249522597071928,
|
|
"grad_norm": 11.068666242334524,
|
|
"learning_rate": 3.993917017905306e-06,
|
|
"loss": 1.503533959388733,
|
|
"step": 2867
|
|
},
|
|
{
|
|
"epoch": 1.8255887969446212,
|
|
"grad_norm": 15.690509574737762,
|
|
"learning_rate": 3.990289214873734e-06,
|
|
"loss": 1.798210859298706,
|
|
"step": 2868
|
|
},
|
|
{
|
|
"epoch": 1.8262253341820496,
|
|
"grad_norm": 9.716017451785957,
|
|
"learning_rate": 3.986661965906469e-06,
|
|
"loss": 1.6863963603973389,
|
|
"step": 2869
|
|
},
|
|
{
|
|
"epoch": 1.826861871419478,
|
|
"grad_norm": 11.571783437092481,
|
|
"learning_rate": 3.983035272993918e-06,
|
|
"loss": 1.7966833114624023,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 1.8274984086569064,
|
|
"grad_norm": 19.255229113259986,
|
|
"learning_rate": 3.97940913812617e-06,
|
|
"loss": 1.4381554126739502,
|
|
"step": 2871
|
|
},
|
|
{
|
|
"epoch": 1.8281349458943348,
|
|
"grad_norm": 11.381610972405511,
|
|
"learning_rate": 3.97578356329302e-06,
|
|
"loss": 1.7153146266937256,
|
|
"step": 2872
|
|
},
|
|
{
|
|
"epoch": 1.8287714831317632,
|
|
"grad_norm": 11.386655212116958,
|
|
"learning_rate": 3.972158550483946e-06,
|
|
"loss": 1.6358150243759155,
|
|
"step": 2873
|
|
},
|
|
{
|
|
"epoch": 1.8294080203691916,
|
|
"grad_norm": 6.593922569952327,
|
|
"learning_rate": 3.9685341016881255e-06,
|
|
"loss": 2.0091328620910645,
|
|
"step": 2874
|
|
},
|
|
{
|
|
"epoch": 1.83004455760662,
|
|
"grad_norm": 10.038346284443978,
|
|
"learning_rate": 3.964910218894421e-06,
|
|
"loss": 1.77092707157135,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 1.8306810948440484,
|
|
"grad_norm": 9.160996970211745,
|
|
"learning_rate": 3.9612869040913834e-06,
|
|
"loss": 1.5817136764526367,
|
|
"step": 2876
|
|
},
|
|
{
|
|
"epoch": 1.8313176320814768,
|
|
"grad_norm": 10.907978103645188,
|
|
"learning_rate": 3.957664159267261e-06,
|
|
"loss": 1.5602571964263916,
|
|
"step": 2877
|
|
},
|
|
{
|
|
"epoch": 1.8319541693189052,
|
|
"grad_norm": 13.827821511690907,
|
|
"learning_rate": 3.954041986409978e-06,
|
|
"loss": 1.4133808612823486,
|
|
"step": 2878
|
|
},
|
|
{
|
|
"epoch": 1.8325907065563336,
|
|
"grad_norm": 13.478353814469886,
|
|
"learning_rate": 3.9504203875071515e-06,
|
|
"loss": 1.5663726329803467,
|
|
"step": 2879
|
|
},
|
|
{
|
|
"epoch": 1.833227243793762,
|
|
"grad_norm": 10.710541030666576,
|
|
"learning_rate": 3.946799364546081e-06,
|
|
"loss": 1.351537823677063,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 1.8338637810311904,
|
|
"grad_norm": 9.498717426202266,
|
|
"learning_rate": 3.943178919513753e-06,
|
|
"loss": 1.4036643505096436,
|
|
"step": 2881
|
|
},
|
|
{
|
|
"epoch": 1.8345003182686188,
|
|
"grad_norm": 10.248746115258475,
|
|
"learning_rate": 3.939559054396832e-06,
|
|
"loss": 1.1356933116912842,
|
|
"step": 2882
|
|
},
|
|
{
|
|
"epoch": 1.8351368555060472,
|
|
"grad_norm": 13.648579581067839,
|
|
"learning_rate": 3.93593977118167e-06,
|
|
"loss": 1.7145284414291382,
|
|
"step": 2883
|
|
},
|
|
{
|
|
"epoch": 1.8357733927434756,
|
|
"grad_norm": 11.237385330356203,
|
|
"learning_rate": 3.932321071854294e-06,
|
|
"loss": 1.3724491596221924,
|
|
"step": 2884
|
|
},
|
|
{
|
|
"epoch": 1.836409929980904,
|
|
"grad_norm": 15.40429795331263,
|
|
"learning_rate": 3.928702958400416e-06,
|
|
"loss": 1.3502287864685059,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 1.8370464672183324,
|
|
"grad_norm": 9.659071962495439,
|
|
"learning_rate": 3.925085432805422e-06,
|
|
"loss": 2.138554334640503,
|
|
"step": 2886
|
|
},
|
|
{
|
|
"epoch": 1.8376830044557608,
|
|
"grad_norm": 21.795752879699876,
|
|
"learning_rate": 3.921468497054378e-06,
|
|
"loss": 1.198209524154663,
|
|
"step": 2887
|
|
},
|
|
{
|
|
"epoch": 1.838319541693189,
|
|
"grad_norm": 9.759312699966467,
|
|
"learning_rate": 3.917852153132023e-06,
|
|
"loss": 0.8229266405105591,
|
|
"step": 2888
|
|
},
|
|
{
|
|
"epoch": 1.8389560789306174,
|
|
"grad_norm": 10.839219755866015,
|
|
"learning_rate": 3.914236403022779e-06,
|
|
"loss": 1.0355024337768555,
|
|
"step": 2889
|
|
},
|
|
{
|
|
"epoch": 1.8395926161680458,
|
|
"grad_norm": 8.041699784000208,
|
|
"learning_rate": 3.910621248710733e-06,
|
|
"loss": 1.4596374034881592,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 1.8402291534054742,
|
|
"grad_norm": 11.145251443689975,
|
|
"learning_rate": 3.9070066921796525e-06,
|
|
"loss": 1.5663981437683105,
|
|
"step": 2891
|
|
},
|
|
{
|
|
"epoch": 1.8408656906429026,
|
|
"grad_norm": 7.941898425454865,
|
|
"learning_rate": 3.903392735412969e-06,
|
|
"loss": 1.4436841011047363,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 1.841502227880331,
|
|
"grad_norm": 14.280872060726427,
|
|
"learning_rate": 3.899779380393793e-06,
|
|
"loss": 1.8422952890396118,
|
|
"step": 2893
|
|
},
|
|
{
|
|
"epoch": 1.8421387651177594,
|
|
"grad_norm": 14.615091656505317,
|
|
"learning_rate": 3.896166629104899e-06,
|
|
"loss": 1.3043195009231567,
|
|
"step": 2894
|
|
},
|
|
{
|
|
"epoch": 1.8427753023551878,
|
|
"grad_norm": 11.03565890218722,
|
|
"learning_rate": 3.892554483528734e-06,
|
|
"loss": 1.9933313131332397,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 1.843411839592616,
|
|
"grad_norm": 10.26711083737239,
|
|
"learning_rate": 3.8889429456474106e-06,
|
|
"loss": 1.9972119331359863,
|
|
"step": 2896
|
|
},
|
|
{
|
|
"epoch": 1.8440483768300444,
|
|
"grad_norm": 10.043013669477036,
|
|
"learning_rate": 3.885332017442708e-06,
|
|
"loss": 1.2068595886230469,
|
|
"step": 2897
|
|
},
|
|
{
|
|
"epoch": 1.8446849140674728,
|
|
"grad_norm": 16.03634140712494,
|
|
"learning_rate": 3.881721700896074e-06,
|
|
"loss": 1.071032166481018,
|
|
"step": 2898
|
|
},
|
|
{
|
|
"epoch": 1.8453214513049012,
|
|
"grad_norm": 19.28330328190598,
|
|
"learning_rate": 3.878111997988612e-06,
|
|
"loss": 1.5138368606567383,
|
|
"step": 2899
|
|
},
|
|
{
|
|
"epoch": 1.8459579885423296,
|
|
"grad_norm": 8.448239192095798,
|
|
"learning_rate": 3.874502910701101e-06,
|
|
"loss": 1.7184983491897583,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 1.846594525779758,
|
|
"grad_norm": 7.877076063101697,
|
|
"learning_rate": 3.870894441013971e-06,
|
|
"loss": 1.4885249137878418,
|
|
"step": 2901
|
|
},
|
|
{
|
|
"epoch": 1.8472310630171864,
|
|
"grad_norm": 27.704910370885152,
|
|
"learning_rate": 3.8672865909073214e-06,
|
|
"loss": 1.4520801305770874,
|
|
"step": 2902
|
|
},
|
|
{
|
|
"epoch": 1.8478676002546148,
|
|
"grad_norm": 7.779608946735297,
|
|
"learning_rate": 3.863679362360907e-06,
|
|
"loss": 1.6304466724395752,
|
|
"step": 2903
|
|
},
|
|
{
|
|
"epoch": 1.8485041374920432,
|
|
"grad_norm": 9.669324175312662,
|
|
"learning_rate": 3.860072757354143e-06,
|
|
"loss": 1.3839410543441772,
|
|
"step": 2904
|
|
},
|
|
{
|
|
"epoch": 1.8491406747294716,
|
|
"grad_norm": 10.378972470624392,
|
|
"learning_rate": 3.8564667778661e-06,
|
|
"loss": 2.3011062145233154,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 1.8497772119669,
|
|
"grad_norm": 8.17189708315968,
|
|
"learning_rate": 3.852861425875513e-06,
|
|
"loss": 0.993788480758667,
|
|
"step": 2906
|
|
},
|
|
{
|
|
"epoch": 1.8504137492043284,
|
|
"grad_norm": 15.400598985121874,
|
|
"learning_rate": 3.849256703360759e-06,
|
|
"loss": 1.5165927410125732,
|
|
"step": 2907
|
|
},
|
|
{
|
|
"epoch": 1.8510502864417568,
|
|
"grad_norm": 7.665762925712199,
|
|
"learning_rate": 3.845652612299886e-06,
|
|
"loss": 1.3429007530212402,
|
|
"step": 2908
|
|
},
|
|
{
|
|
"epoch": 1.8516868236791852,
|
|
"grad_norm": 9.251559888124305,
|
|
"learning_rate": 3.842049154670583e-06,
|
|
"loss": 1.6699916124343872,
|
|
"step": 2909
|
|
},
|
|
{
|
|
"epoch": 1.8523233609166136,
|
|
"grad_norm": 15.144716220358037,
|
|
"learning_rate": 3.838446332450195e-06,
|
|
"loss": 2.5920000076293945,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 1.852959898154042,
|
|
"grad_norm": 10.530008928040017,
|
|
"learning_rate": 3.834844147615722e-06,
|
|
"loss": 1.108059048652649,
|
|
"step": 2911
|
|
},
|
|
{
|
|
"epoch": 1.8535964353914705,
|
|
"grad_norm": 8.636412664373463,
|
|
"learning_rate": 3.831242602143807e-06,
|
|
"loss": 1.2285619974136353,
|
|
"step": 2912
|
|
},
|
|
{
|
|
"epoch": 1.8542329726288989,
|
|
"grad_norm": 9.758526949200924,
|
|
"learning_rate": 3.827641698010752e-06,
|
|
"loss": 2.0095975399017334,
|
|
"step": 2913
|
|
},
|
|
{
|
|
"epoch": 1.8548695098663273,
|
|
"grad_norm": 9.336569556525562,
|
|
"learning_rate": 3.824041437192496e-06,
|
|
"loss": 2.0980563163757324,
|
|
"step": 2914
|
|
},
|
|
{
|
|
"epoch": 1.8555060471037557,
|
|
"grad_norm": 10.296892917057084,
|
|
"learning_rate": 3.8204418216646344e-06,
|
|
"loss": 1.698615550994873,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 1.856142584341184,
|
|
"grad_norm": 9.196996996757687,
|
|
"learning_rate": 3.8168428534024024e-06,
|
|
"loss": 1.6235389709472656,
|
|
"step": 2916
|
|
},
|
|
{
|
|
"epoch": 1.8567791215786125,
|
|
"grad_norm": 8.807171550328754,
|
|
"learning_rate": 3.8132445343806834e-06,
|
|
"loss": 2.061776638031006,
|
|
"step": 2917
|
|
},
|
|
{
|
|
"epoch": 1.8574156588160409,
|
|
"grad_norm": 7.403972337272077,
|
|
"learning_rate": 3.8096468665740023e-06,
|
|
"loss": 1.6480553150177002,
|
|
"step": 2918
|
|
},
|
|
{
|
|
"epoch": 1.8580521960534693,
|
|
"grad_norm": 6.170162563114787,
|
|
"learning_rate": 3.806049851956529e-06,
|
|
"loss": 1.8125827312469482,
|
|
"step": 2919
|
|
},
|
|
{
|
|
"epoch": 1.8586887332908977,
|
|
"grad_norm": 14.151706780698929,
|
|
"learning_rate": 3.8024534925020723e-06,
|
|
"loss": 0.9944297075271606,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 1.8593252705283259,
|
|
"grad_norm": 13.473813351532165,
|
|
"learning_rate": 3.798857790184085e-06,
|
|
"loss": 1.1409094333648682,
|
|
"step": 2921
|
|
},
|
|
{
|
|
"epoch": 1.8599618077657543,
|
|
"grad_norm": 8.23206808050268,
|
|
"learning_rate": 3.795262746975654e-06,
|
|
"loss": 1.6605459451675415,
|
|
"step": 2922
|
|
},
|
|
{
|
|
"epoch": 1.8605983450031827,
|
|
"grad_norm": 17.797137157177552,
|
|
"learning_rate": 3.7916683648495106e-06,
|
|
"loss": 1.2091991901397705,
|
|
"step": 2923
|
|
},
|
|
{
|
|
"epoch": 1.861234882240611,
|
|
"grad_norm": 37.69629321952834,
|
|
"learning_rate": 3.788074645778018e-06,
|
|
"loss": 0.7815721035003662,
|
|
"step": 2924
|
|
},
|
|
{
|
|
"epoch": 1.8618714194780395,
|
|
"grad_norm": 13.372576382344619,
|
|
"learning_rate": 3.7844815917331805e-06,
|
|
"loss": 0.9410151839256287,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 1.8625079567154679,
|
|
"grad_norm": 10.587506192567798,
|
|
"learning_rate": 3.780889204686635e-06,
|
|
"loss": 2.008251190185547,
|
|
"step": 2926
|
|
},
|
|
{
|
|
"epoch": 1.8631444939528963,
|
|
"grad_norm": 15.339078608543764,
|
|
"learning_rate": 3.7772974866096467e-06,
|
|
"loss": 0.6460528373718262,
|
|
"step": 2927
|
|
},
|
|
{
|
|
"epoch": 1.8637810311903247,
|
|
"grad_norm": 15.296511645201427,
|
|
"learning_rate": 3.773706439473128e-06,
|
|
"loss": 1.3764185905456543,
|
|
"step": 2928
|
|
},
|
|
{
|
|
"epoch": 1.8644175684277529,
|
|
"grad_norm": 6.883611777851248,
|
|
"learning_rate": 3.7701160652476075e-06,
|
|
"loss": 1.6351902484893799,
|
|
"step": 2929
|
|
},
|
|
{
|
|
"epoch": 1.8650541056651813,
|
|
"grad_norm": 12.887349048316088,
|
|
"learning_rate": 3.7665263659032557e-06,
|
|
"loss": 2.1986546516418457,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 1.8656906429026097,
|
|
"grad_norm": 14.54225038195362,
|
|
"learning_rate": 3.7629373434098666e-06,
|
|
"loss": 2.2645087242126465,
|
|
"step": 2931
|
|
},
|
|
{
|
|
"epoch": 1.866327180140038,
|
|
"grad_norm": 7.771014705777784,
|
|
"learning_rate": 3.759348999736866e-06,
|
|
"loss": 0.7444299459457397,
|
|
"step": 2932
|
|
},
|
|
{
|
|
"epoch": 1.8669637173774665,
|
|
"grad_norm": 11.737576573508946,
|
|
"learning_rate": 3.755761336853304e-06,
|
|
"loss": 2.10587477684021,
|
|
"step": 2933
|
|
},
|
|
{
|
|
"epoch": 1.8676002546148949,
|
|
"grad_norm": 8.775640918155895,
|
|
"learning_rate": 3.752174356727863e-06,
|
|
"loss": 1.9457027912139893,
|
|
"step": 2934
|
|
},
|
|
{
|
|
"epoch": 1.8682367918523233,
|
|
"grad_norm": 13.515937867687345,
|
|
"learning_rate": 3.7485880613288417e-06,
|
|
"loss": 1.6708476543426514,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 1.8688733290897517,
|
|
"grad_norm": 23.093293304399715,
|
|
"learning_rate": 3.7450024526241736e-06,
|
|
"loss": 0.8383031487464905,
|
|
"step": 2936
|
|
},
|
|
{
|
|
"epoch": 1.86950986632718,
|
|
"grad_norm": 15.142424514156689,
|
|
"learning_rate": 3.7414175325814055e-06,
|
|
"loss": 1.975140929222107,
|
|
"step": 2937
|
|
},
|
|
{
|
|
"epoch": 1.8701464035646085,
|
|
"grad_norm": 8.808594675559215,
|
|
"learning_rate": 3.737833303167714e-06,
|
|
"loss": 1.585309386253357,
|
|
"step": 2938
|
|
},
|
|
{
|
|
"epoch": 1.8707829408020369,
|
|
"grad_norm": 12.54269974142991,
|
|
"learning_rate": 3.734249766349891e-06,
|
|
"loss": 2.28334903717041,
|
|
"step": 2939
|
|
},
|
|
{
|
|
"epoch": 1.8714194780394653,
|
|
"grad_norm": 12.039706631211335,
|
|
"learning_rate": 3.730666924094354e-06,
|
|
"loss": 1.6018445491790771,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 1.8720560152768937,
|
|
"grad_norm": 12.606822880989533,
|
|
"learning_rate": 3.727084778367133e-06,
|
|
"loss": 2.0158743858337402,
|
|
"step": 2941
|
|
},
|
|
{
|
|
"epoch": 1.872692552514322,
|
|
"grad_norm": 13.18481824489173,
|
|
"learning_rate": 3.7235033311338832e-06,
|
|
"loss": 0.8656675815582275,
|
|
"step": 2942
|
|
},
|
|
{
|
|
"epoch": 1.8733290897517505,
|
|
"grad_norm": 11.761203832027727,
|
|
"learning_rate": 3.719922584359869e-06,
|
|
"loss": 1.662010908126831,
|
|
"step": 2943
|
|
},
|
|
{
|
|
"epoch": 1.873965626989179,
|
|
"grad_norm": 11.055295697247734,
|
|
"learning_rate": 3.7163425400099734e-06,
|
|
"loss": 1.6481142044067383,
|
|
"step": 2944
|
|
},
|
|
{
|
|
"epoch": 1.8746021642266073,
|
|
"grad_norm": 8.184052509423914,
|
|
"learning_rate": 3.712763200048697e-06,
|
|
"loss": 1.3201755285263062,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 1.8752387014640357,
|
|
"grad_norm": 9.257340721993499,
|
|
"learning_rate": 3.709184566440148e-06,
|
|
"loss": 0.6050750613212585,
|
|
"step": 2946
|
|
},
|
|
{
|
|
"epoch": 1.875875238701464,
|
|
"grad_norm": 9.004885741171472,
|
|
"learning_rate": 3.7056066411480546e-06,
|
|
"loss": 0.8616651892662048,
|
|
"step": 2947
|
|
},
|
|
{
|
|
"epoch": 1.8765117759388925,
|
|
"grad_norm": 10.262472561157548,
|
|
"learning_rate": 3.702029426135748e-06,
|
|
"loss": 1.235988974571228,
|
|
"step": 2948
|
|
},
|
|
{
|
|
"epoch": 1.877148313176321,
|
|
"grad_norm": 10.006326012931487,
|
|
"learning_rate": 3.698452923366177e-06,
|
|
"loss": 1.6546168327331543,
|
|
"step": 2949
|
|
},
|
|
{
|
|
"epoch": 1.8777848504137493,
|
|
"grad_norm": 9.239380099845183,
|
|
"learning_rate": 3.694877134801892e-06,
|
|
"loss": 1.868090271949768,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 1.8784213876511777,
|
|
"grad_norm": 11.724873434767735,
|
|
"learning_rate": 3.6913020624050607e-06,
|
|
"loss": 1.8908902406692505,
|
|
"step": 2951
|
|
},
|
|
{
|
|
"epoch": 1.8790579248886061,
|
|
"grad_norm": 6.782205554830913,
|
|
"learning_rate": 3.6877277081374485e-06,
|
|
"loss": 1.5437768697738647,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 1.8796944621260345,
|
|
"grad_norm": 13.250147606465557,
|
|
"learning_rate": 3.6841540739604333e-06,
|
|
"loss": 1.6006748676300049,
|
|
"step": 2953
|
|
},
|
|
{
|
|
"epoch": 1.8803309993634627,
|
|
"grad_norm": 8.263585193238786,
|
|
"learning_rate": 3.680581161834994e-06,
|
|
"loss": 1.3121932744979858,
|
|
"step": 2954
|
|
},
|
|
{
|
|
"epoch": 1.880967536600891,
|
|
"grad_norm": 15.855281274860497,
|
|
"learning_rate": 3.6770089737217173e-06,
|
|
"loss": 1.5933866500854492,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 1.8816040738383195,
|
|
"grad_norm": 17.104636402653167,
|
|
"learning_rate": 3.6734375115807873e-06,
|
|
"loss": 1.366287112236023,
|
|
"step": 2956
|
|
},
|
|
{
|
|
"epoch": 1.882240611075748,
|
|
"grad_norm": 11.889323434402883,
|
|
"learning_rate": 3.6698667773719964e-06,
|
|
"loss": 1.087631344795227,
|
|
"step": 2957
|
|
},
|
|
{
|
|
"epoch": 1.8828771483131763,
|
|
"grad_norm": 10.28887515614746,
|
|
"learning_rate": 3.666296773054729e-06,
|
|
"loss": 1.2407673597335815,
|
|
"step": 2958
|
|
},
|
|
{
|
|
"epoch": 1.8835136855506047,
|
|
"grad_norm": 13.182129320643282,
|
|
"learning_rate": 3.6627275005879793e-06,
|
|
"loss": 1.1016771793365479,
|
|
"step": 2959
|
|
},
|
|
{
|
|
"epoch": 1.8841502227880331,
|
|
"grad_norm": 11.447446637699572,
|
|
"learning_rate": 3.659158961930329e-06,
|
|
"loss": 3.0205774307250977,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 1.8847867600254615,
|
|
"grad_norm": 11.336888264151233,
|
|
"learning_rate": 3.655591159039965e-06,
|
|
"loss": 1.6845563650131226,
|
|
"step": 2961
|
|
},
|
|
{
|
|
"epoch": 1.88542329726289,
|
|
"grad_norm": 8.513133099222893,
|
|
"learning_rate": 3.6520240938746686e-06,
|
|
"loss": 1.57521390914917,
|
|
"step": 2962
|
|
},
|
|
{
|
|
"epoch": 1.886059834500318,
|
|
"grad_norm": 13.843253725878345,
|
|
"learning_rate": 3.6484577683918137e-06,
|
|
"loss": 1.55642831325531,
|
|
"step": 2963
|
|
},
|
|
{
|
|
"epoch": 1.8866963717377465,
|
|
"grad_norm": 11.336427097104853,
|
|
"learning_rate": 3.644892184548373e-06,
|
|
"loss": 1.7204231023788452,
|
|
"step": 2964
|
|
},
|
|
{
|
|
"epoch": 1.887332908975175,
|
|
"grad_norm": 14.937415415408747,
|
|
"learning_rate": 3.6413273443009066e-06,
|
|
"loss": 1.8140678405761719,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 1.8879694462126033,
|
|
"grad_norm": 15.12413403522489,
|
|
"learning_rate": 3.6377632496055715e-06,
|
|
"loss": 1.4444818496704102,
|
|
"step": 2966
|
|
},
|
|
{
|
|
"epoch": 1.8886059834500317,
|
|
"grad_norm": 12.952422188153427,
|
|
"learning_rate": 3.6341999024181113e-06,
|
|
"loss": 1.0764448642730713,
|
|
"step": 2967
|
|
},
|
|
{
|
|
"epoch": 1.8892425206874601,
|
|
"grad_norm": 18.259926105660973,
|
|
"learning_rate": 3.6306373046938646e-06,
|
|
"loss": 1.1982852220535278,
|
|
"step": 2968
|
|
},
|
|
{
|
|
"epoch": 1.8898790579248885,
|
|
"grad_norm": 11.890515486530168,
|
|
"learning_rate": 3.627075458387753e-06,
|
|
"loss": 1.6283388137817383,
|
|
"step": 2969
|
|
},
|
|
{
|
|
"epoch": 1.890515595162317,
|
|
"grad_norm": 13.811771576252777,
|
|
"learning_rate": 3.623514365454291e-06,
|
|
"loss": 0.8901544213294983,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 1.8911521323997453,
|
|
"grad_norm": 9.679165937792929,
|
|
"learning_rate": 3.6199540278475753e-06,
|
|
"loss": 1.6047818660736084,
|
|
"step": 2971
|
|
},
|
|
{
|
|
"epoch": 1.8917886696371737,
|
|
"grad_norm": 20.126024292697785,
|
|
"learning_rate": 3.6163944475212925e-06,
|
|
"loss": 1.312869668006897,
|
|
"step": 2972
|
|
},
|
|
{
|
|
"epoch": 1.8924252068746021,
|
|
"grad_norm": 8.611093827796402,
|
|
"learning_rate": 3.612835626428707e-06,
|
|
"loss": 1.3583612442016602,
|
|
"step": 2973
|
|
},
|
|
{
|
|
"epoch": 1.8930617441120305,
|
|
"grad_norm": 9.155912381502747,
|
|
"learning_rate": 3.6092775665226766e-06,
|
|
"loss": 1.6458258628845215,
|
|
"step": 2974
|
|
},
|
|
{
|
|
"epoch": 1.893698281349459,
|
|
"grad_norm": 10.489661405812855,
|
|
"learning_rate": 3.6057202697556303e-06,
|
|
"loss": 1.483068823814392,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 1.8943348185868873,
|
|
"grad_norm": 5.669088167056578,
|
|
"learning_rate": 3.602163738079587e-06,
|
|
"loss": 1.5122544765472412,
|
|
"step": 2976
|
|
},
|
|
{
|
|
"epoch": 1.8949713558243158,
|
|
"grad_norm": 15.730385517328935,
|
|
"learning_rate": 3.598607973446142e-06,
|
|
"loss": 1.6851162910461426,
|
|
"step": 2977
|
|
},
|
|
{
|
|
"epoch": 1.8956078930617442,
|
|
"grad_norm": 9.754807356798125,
|
|
"learning_rate": 3.595052977806467e-06,
|
|
"loss": 1.4745590686798096,
|
|
"step": 2978
|
|
},
|
|
{
|
|
"epoch": 1.8962444302991726,
|
|
"grad_norm": 13.659337878420038,
|
|
"learning_rate": 3.59149875311132e-06,
|
|
"loss": 1.6734472513198853,
|
|
"step": 2979
|
|
},
|
|
{
|
|
"epoch": 1.896880967536601,
|
|
"grad_norm": 11.288157866459745,
|
|
"learning_rate": 3.5879453013110266e-06,
|
|
"loss": 1.503297209739685,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 1.8975175047740294,
|
|
"grad_norm": 9.833318826750947,
|
|
"learning_rate": 3.584392624355495e-06,
|
|
"loss": 1.2672520875930786,
|
|
"step": 2981
|
|
},
|
|
{
|
|
"epoch": 1.8981540420114578,
|
|
"grad_norm": 10.781652948334775,
|
|
"learning_rate": 3.5808407241942035e-06,
|
|
"loss": 1.4455342292785645,
|
|
"step": 2982
|
|
},
|
|
{
|
|
"epoch": 1.8987905792488862,
|
|
"grad_norm": 24.804714712071927,
|
|
"learning_rate": 3.577289602776208e-06,
|
|
"loss": 1.375553011894226,
|
|
"step": 2983
|
|
},
|
|
{
|
|
"epoch": 1.8994271164863146,
|
|
"grad_norm": 13.14656310606751,
|
|
"learning_rate": 3.5737392620501356e-06,
|
|
"loss": 2.576221466064453,
|
|
"step": 2984
|
|
},
|
|
{
|
|
"epoch": 1.900063653723743,
|
|
"grad_norm": 13.585164189074876,
|
|
"learning_rate": 3.5701897039641854e-06,
|
|
"loss": 1.90523099899292,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 1.9007001909611714,
|
|
"grad_norm": 17.586577936911645,
|
|
"learning_rate": 3.5666409304661226e-06,
|
|
"loss": 1.1571316719055176,
|
|
"step": 2986
|
|
},
|
|
{
|
|
"epoch": 1.9013367281985998,
|
|
"grad_norm": 6.545400747345549,
|
|
"learning_rate": 3.5630929435032915e-06,
|
|
"loss": 1.630649447441101,
|
|
"step": 2987
|
|
},
|
|
{
|
|
"epoch": 1.901973265436028,
|
|
"grad_norm": 9.512514849418265,
|
|
"learning_rate": 3.5595457450225944e-06,
|
|
"loss": 1.8141216039657593,
|
|
"step": 2988
|
|
},
|
|
{
|
|
"epoch": 1.9026098026734564,
|
|
"grad_norm": 8.506375900740151,
|
|
"learning_rate": 3.555999336970508e-06,
|
|
"loss": 1.11874258518219,
|
|
"step": 2989
|
|
},
|
|
{
|
|
"epoch": 1.9032463399108848,
|
|
"grad_norm": 13.794812449480618,
|
|
"learning_rate": 3.552453721293071e-06,
|
|
"loss": 1.619382619857788,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 1.9038828771483132,
|
|
"grad_norm": 16.651260707094043,
|
|
"learning_rate": 3.5489088999358916e-06,
|
|
"loss": 1.60298490524292,
|
|
"step": 2991
|
|
},
|
|
{
|
|
"epoch": 1.9045194143857416,
|
|
"grad_norm": 10.972801539111252,
|
|
"learning_rate": 3.545364874844137e-06,
|
|
"loss": 1.5508108139038086,
|
|
"step": 2992
|
|
},
|
|
{
|
|
"epoch": 1.90515595162317,
|
|
"grad_norm": 9.233405121504681,
|
|
"learning_rate": 3.5418216479625432e-06,
|
|
"loss": 1.0458866357803345,
|
|
"step": 2993
|
|
},
|
|
{
|
|
"epoch": 1.9057924888605984,
|
|
"grad_norm": 9.564752919579051,
|
|
"learning_rate": 3.5382792212354044e-06,
|
|
"loss": 1.7472103834152222,
|
|
"step": 2994
|
|
},
|
|
{
|
|
"epoch": 1.9064290260980268,
|
|
"grad_norm": 11.317185828966672,
|
|
"learning_rate": 3.5347375966065732e-06,
|
|
"loss": 1.9647533893585205,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 1.907065563335455,
|
|
"grad_norm": 16.039174800688794,
|
|
"learning_rate": 3.5311967760194685e-06,
|
|
"loss": 1.8768528699874878,
|
|
"step": 2996
|
|
},
|
|
{
|
|
"epoch": 1.9077021005728834,
|
|
"grad_norm": 9.69725685029296,
|
|
"learning_rate": 3.5276567614170632e-06,
|
|
"loss": 1.6337780952453613,
|
|
"step": 2997
|
|
},
|
|
{
|
|
"epoch": 1.9083386378103118,
|
|
"grad_norm": 13.84728746714734,
|
|
"learning_rate": 3.5241175547418905e-06,
|
|
"loss": 1.0726393461227417,
|
|
"step": 2998
|
|
},
|
|
{
|
|
"epoch": 1.9089751750477402,
|
|
"grad_norm": 12.253571745159757,
|
|
"learning_rate": 3.5205791579360386e-06,
|
|
"loss": 1.3294373750686646,
|
|
"step": 2999
|
|
},
|
|
{
|
|
"epoch": 1.9096117122851686,
|
|
"grad_norm": 7.145469680327638,
|
|
"learning_rate": 3.5170415729411544e-06,
|
|
"loss": 1.3319453001022339,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 1.910248249522597,
|
|
"grad_norm": 12.670397007583942,
|
|
"learning_rate": 3.513504801698432e-06,
|
|
"loss": 1.0952013731002808,
|
|
"step": 3001
|
|
},
|
|
{
|
|
"epoch": 1.9108847867600254,
|
|
"grad_norm": 19.599620263971225,
|
|
"learning_rate": 3.50996884614863e-06,
|
|
"loss": 1.1098661422729492,
|
|
"step": 3002
|
|
},
|
|
{
|
|
"epoch": 1.9115213239974538,
|
|
"grad_norm": 10.708770007076673,
|
|
"learning_rate": 3.5064337082320475e-06,
|
|
"loss": 2.0629000663757324,
|
|
"step": 3003
|
|
},
|
|
{
|
|
"epoch": 1.9121578612348822,
|
|
"grad_norm": 11.917806878874917,
|
|
"learning_rate": 3.502899389888545e-06,
|
|
"loss": 1.2926945686340332,
|
|
"step": 3004
|
|
},
|
|
{
|
|
"epoch": 1.9127943984723106,
|
|
"grad_norm": 14.183901970473984,
|
|
"learning_rate": 3.499365893057526e-06,
|
|
"loss": 1.500540018081665,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 1.913430935709739,
|
|
"grad_norm": 9.166386496776243,
|
|
"learning_rate": 3.4958332196779486e-06,
|
|
"loss": 1.163301706314087,
|
|
"step": 3006
|
|
},
|
|
{
|
|
"epoch": 1.9140674729471674,
|
|
"grad_norm": 12.255480602242926,
|
|
"learning_rate": 3.4923013716883146e-06,
|
|
"loss": 1.0257372856140137,
|
|
"step": 3007
|
|
},
|
|
{
|
|
"epoch": 1.9147040101845958,
|
|
"grad_norm": 13.22483419522713,
|
|
"learning_rate": 3.488770351026678e-06,
|
|
"loss": 1.455714225769043,
|
|
"step": 3008
|
|
},
|
|
{
|
|
"epoch": 1.9153405474220242,
|
|
"grad_norm": 10.179432970934371,
|
|
"learning_rate": 3.4852401596306306e-06,
|
|
"loss": 1.5575066804885864,
|
|
"step": 3009
|
|
},
|
|
{
|
|
"epoch": 1.9159770846594526,
|
|
"grad_norm": 20.12038533874464,
|
|
"learning_rate": 3.48171079943732e-06,
|
|
"loss": 1.757549524307251,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 1.916613621896881,
|
|
"grad_norm": 12.203643867997348,
|
|
"learning_rate": 3.4781822723834286e-06,
|
|
"loss": 1.7452330589294434,
|
|
"step": 3011
|
|
},
|
|
{
|
|
"epoch": 1.9172501591343094,
|
|
"grad_norm": 15.840159433670774,
|
|
"learning_rate": 3.4746545804051845e-06,
|
|
"loss": 1.858777403831482,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 1.9178866963717378,
|
|
"grad_norm": 16.066331909644493,
|
|
"learning_rate": 3.47112772543836e-06,
|
|
"loss": 1.0916454792022705,
|
|
"step": 3013
|
|
},
|
|
{
|
|
"epoch": 1.9185232336091662,
|
|
"grad_norm": 9.991258615289015,
|
|
"learning_rate": 3.4676017094182655e-06,
|
|
"loss": 1.8284687995910645,
|
|
"step": 3014
|
|
},
|
|
{
|
|
"epoch": 1.9191597708465946,
|
|
"grad_norm": 23.562108037135385,
|
|
"learning_rate": 3.464076534279753e-06,
|
|
"loss": 1.6296882629394531,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 1.919796308084023,
|
|
"grad_norm": 13.350276842306824,
|
|
"learning_rate": 3.4605522019572075e-06,
|
|
"loss": 2.1474852561950684,
|
|
"step": 3016
|
|
},
|
|
{
|
|
"epoch": 1.9204328453214514,
|
|
"grad_norm": 12.152677668203303,
|
|
"learning_rate": 3.4570287143845617e-06,
|
|
"loss": 1.6121530532836914,
|
|
"step": 3017
|
|
},
|
|
{
|
|
"epoch": 1.9210693825588798,
|
|
"grad_norm": 10.792615169339534,
|
|
"learning_rate": 3.453506073495274e-06,
|
|
"loss": 1.7401103973388672,
|
|
"step": 3018
|
|
},
|
|
{
|
|
"epoch": 1.9217059197963082,
|
|
"grad_norm": 19.617397599137732,
|
|
"learning_rate": 3.449984281222346e-06,
|
|
"loss": 2.0249862670898438,
|
|
"step": 3019
|
|
},
|
|
{
|
|
"epoch": 1.9223424570337366,
|
|
"grad_norm": 12.861936097372928,
|
|
"learning_rate": 3.4464633394983087e-06,
|
|
"loss": 1.87858247756958,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 1.9229789942711648,
|
|
"grad_norm": 12.821502062384148,
|
|
"learning_rate": 3.4429432502552306e-06,
|
|
"loss": 1.3402378559112549,
|
|
"step": 3021
|
|
},
|
|
{
|
|
"epoch": 1.9236155315085932,
|
|
"grad_norm": 12.271130860284014,
|
|
"learning_rate": 3.439424015424708e-06,
|
|
"loss": 1.3296496868133545,
|
|
"step": 3022
|
|
},
|
|
{
|
|
"epoch": 1.9242520687460216,
|
|
"grad_norm": 12.094984073587872,
|
|
"learning_rate": 3.435905636937873e-06,
|
|
"loss": 1.14638090133667,
|
|
"step": 3023
|
|
},
|
|
{
|
|
"epoch": 1.92488860598345,
|
|
"grad_norm": 9.547944000672503,
|
|
"learning_rate": 3.4323881167253804e-06,
|
|
"loss": 2.336252212524414,
|
|
"step": 3024
|
|
},
|
|
{
|
|
"epoch": 1.9255251432208784,
|
|
"grad_norm": 17.369817974746017,
|
|
"learning_rate": 3.4288714567174264e-06,
|
|
"loss": 1.4508349895477295,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 1.9261616804583068,
|
|
"grad_norm": 10.74696941864543,
|
|
"learning_rate": 3.4253556588437208e-06,
|
|
"loss": 1.2710459232330322,
|
|
"step": 3026
|
|
},
|
|
{
|
|
"epoch": 1.9267982176957352,
|
|
"grad_norm": 12.987178620919483,
|
|
"learning_rate": 3.421840725033512e-06,
|
|
"loss": 1.2776530981063843,
|
|
"step": 3027
|
|
},
|
|
{
|
|
"epoch": 1.9274347549331636,
|
|
"grad_norm": 7.520585806761156,
|
|
"learning_rate": 3.418326657215567e-06,
|
|
"loss": 1.1973588466644287,
|
|
"step": 3028
|
|
},
|
|
{
|
|
"epoch": 1.928071292170592,
|
|
"grad_norm": 13.139221862631066,
|
|
"learning_rate": 3.4148134573181798e-06,
|
|
"loss": 1.5048329830169678,
|
|
"step": 3029
|
|
},
|
|
{
|
|
"epoch": 1.9287078294080202,
|
|
"grad_norm": 14.009969949467193,
|
|
"learning_rate": 3.4113011272691708e-06,
|
|
"loss": 0.9765514731407166,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 1.9293443666454486,
|
|
"grad_norm": 9.474747894878966,
|
|
"learning_rate": 3.4077896689958782e-06,
|
|
"loss": 1.5028693675994873,
|
|
"step": 3031
|
|
},
|
|
{
|
|
"epoch": 1.929980903882877,
|
|
"grad_norm": 24.981700309160786,
|
|
"learning_rate": 3.4042790844251666e-06,
|
|
"loss": 2.431849956512451,
|
|
"step": 3032
|
|
},
|
|
{
|
|
"epoch": 1.9306174411203054,
|
|
"grad_norm": 12.405622244492198,
|
|
"learning_rate": 3.4007693754834166e-06,
|
|
"loss": 1.5558407306671143,
|
|
"step": 3033
|
|
},
|
|
{
|
|
"epoch": 1.9312539783577338,
|
|
"grad_norm": 8.277798441928566,
|
|
"learning_rate": 3.397260544096532e-06,
|
|
"loss": 1.9231243133544922,
|
|
"step": 3034
|
|
},
|
|
{
|
|
"epoch": 1.9318905155951622,
|
|
"grad_norm": 10.4446041402095,
|
|
"learning_rate": 3.393752592189934e-06,
|
|
"loss": 1.745375156402588,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 1.9325270528325906,
|
|
"grad_norm": 13.11851467672906,
|
|
"learning_rate": 3.3902455216885603e-06,
|
|
"loss": 1.7263861894607544,
|
|
"step": 3036
|
|
},
|
|
{
|
|
"epoch": 1.933163590070019,
|
|
"grad_norm": 10.810645515967366,
|
|
"learning_rate": 3.3867393345168653e-06,
|
|
"loss": 1.180358648300171,
|
|
"step": 3037
|
|
},
|
|
{
|
|
"epoch": 1.9338001273074474,
|
|
"grad_norm": 8.822196535546214,
|
|
"learning_rate": 3.3832340325988212e-06,
|
|
"loss": 2.118812084197998,
|
|
"step": 3038
|
|
},
|
|
{
|
|
"epoch": 1.9344366645448758,
|
|
"grad_norm": 11.163193376397869,
|
|
"learning_rate": 3.379729617857908e-06,
|
|
"loss": 1.6012519598007202,
|
|
"step": 3039
|
|
},
|
|
{
|
|
"epoch": 1.9350732017823042,
|
|
"grad_norm": 8.381398488181672,
|
|
"learning_rate": 3.3762260922171265e-06,
|
|
"loss": 1.4120349884033203,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 1.9357097390197326,
|
|
"grad_norm": 11.787336756078439,
|
|
"learning_rate": 3.372723457598983e-06,
|
|
"loss": 1.2969615459442139,
|
|
"step": 3041
|
|
},
|
|
{
|
|
"epoch": 1.936346276257161,
|
|
"grad_norm": 9.389599449868138,
|
|
"learning_rate": 3.3692217159255025e-06,
|
|
"loss": 1.8524794578552246,
|
|
"step": 3042
|
|
},
|
|
{
|
|
"epoch": 1.9369828134945895,
|
|
"grad_norm": 10.81217000988976,
|
|
"learning_rate": 3.3657208691182107e-06,
|
|
"loss": 0.9801638126373291,
|
|
"step": 3043
|
|
},
|
|
{
|
|
"epoch": 1.9376193507320179,
|
|
"grad_norm": 18.38620988657736,
|
|
"learning_rate": 3.3622209190981503e-06,
|
|
"loss": 1.663733720779419,
|
|
"step": 3044
|
|
},
|
|
{
|
|
"epoch": 1.9382558879694463,
|
|
"grad_norm": 11.710734609627233,
|
|
"learning_rate": 3.358721867785869e-06,
|
|
"loss": 3.6056973934173584,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 1.9388924252068747,
|
|
"grad_norm": 15.338078554184479,
|
|
"learning_rate": 3.355223717101418e-06,
|
|
"loss": 1.8417390584945679,
|
|
"step": 3046
|
|
},
|
|
{
|
|
"epoch": 1.939528962444303,
|
|
"grad_norm": 10.293351880303502,
|
|
"learning_rate": 3.351726468964359e-06,
|
|
"loss": 1.7809994220733643,
|
|
"step": 3047
|
|
},
|
|
{
|
|
"epoch": 1.9401654996817315,
|
|
"grad_norm": 13.24712762250747,
|
|
"learning_rate": 3.3482301252937564e-06,
|
|
"loss": 1.5533814430236816,
|
|
"step": 3048
|
|
},
|
|
{
|
|
"epoch": 1.9408020369191599,
|
|
"grad_norm": 11.022160859919824,
|
|
"learning_rate": 3.3447346880081798e-06,
|
|
"loss": 2.2012224197387695,
|
|
"step": 3049
|
|
},
|
|
{
|
|
"epoch": 1.9414385741565883,
|
|
"grad_norm": 8.570175454275299,
|
|
"learning_rate": 3.341240159025698e-06,
|
|
"loss": 1.6400574445724487,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 1.9420751113940167,
|
|
"grad_norm": 15.425440780971087,
|
|
"learning_rate": 3.3377465402638877e-06,
|
|
"loss": 2.379615306854248,
|
|
"step": 3051
|
|
},
|
|
{
|
|
"epoch": 1.942711648631445,
|
|
"grad_norm": 10.70244668276611,
|
|
"learning_rate": 3.3342538336398166e-06,
|
|
"loss": 1.0482215881347656,
|
|
"step": 3052
|
|
},
|
|
{
|
|
"epoch": 1.9433481858688735,
|
|
"grad_norm": 14.689318248877823,
|
|
"learning_rate": 3.3307620410700625e-06,
|
|
"loss": 1.3926039934158325,
|
|
"step": 3053
|
|
},
|
|
{
|
|
"epoch": 1.9439847231063019,
|
|
"grad_norm": 15.604591834753055,
|
|
"learning_rate": 3.3272711644706923e-06,
|
|
"loss": 1.5713223218917847,
|
|
"step": 3054
|
|
},
|
|
{
|
|
"epoch": 1.94462126034373,
|
|
"grad_norm": 11.132370985223409,
|
|
"learning_rate": 3.323781205757278e-06,
|
|
"loss": 1.5595935583114624,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 1.9452577975811585,
|
|
"grad_norm": 8.240956588375802,
|
|
"learning_rate": 3.320292166844881e-06,
|
|
"loss": 1.0932841300964355,
|
|
"step": 3056
|
|
},
|
|
{
|
|
"epoch": 1.9458943348185869,
|
|
"grad_norm": 13.688867206661676,
|
|
"learning_rate": 3.316804049648065e-06,
|
|
"loss": 1.921128749847412,
|
|
"step": 3057
|
|
},
|
|
{
|
|
"epoch": 1.9465308720560153,
|
|
"grad_norm": 11.620609053138507,
|
|
"learning_rate": 3.3133168560808805e-06,
|
|
"loss": 1.355882167816162,
|
|
"step": 3058
|
|
},
|
|
{
|
|
"epoch": 1.9471674092934437,
|
|
"grad_norm": 14.284538765313782,
|
|
"learning_rate": 3.3098305880568783e-06,
|
|
"loss": 1.9279327392578125,
|
|
"step": 3059
|
|
},
|
|
{
|
|
"epoch": 1.947803946530872,
|
|
"grad_norm": 9.855480881398547,
|
|
"learning_rate": 3.306345247489095e-06,
|
|
"loss": 1.4592701196670532,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 1.9484404837683005,
|
|
"grad_norm": 10.332419100270505,
|
|
"learning_rate": 3.3028608362900647e-06,
|
|
"loss": 1.398805856704712,
|
|
"step": 3061
|
|
},
|
|
{
|
|
"epoch": 1.9490770210057289,
|
|
"grad_norm": 13.959536418752435,
|
|
"learning_rate": 3.2993773563718057e-06,
|
|
"loss": 1.51752769947052,
|
|
"step": 3062
|
|
},
|
|
{
|
|
"epoch": 1.949713558243157,
|
|
"grad_norm": 16.848860128572603,
|
|
"learning_rate": 3.2958948096458272e-06,
|
|
"loss": 1.6075987815856934,
|
|
"step": 3063
|
|
},
|
|
{
|
|
"epoch": 1.9503500954805855,
|
|
"grad_norm": 10.370030263170925,
|
|
"learning_rate": 3.2924131980231276e-06,
|
|
"loss": 2.661520004272461,
|
|
"step": 3064
|
|
},
|
|
{
|
|
"epoch": 1.9509866327180139,
|
|
"grad_norm": 7.636469667188669,
|
|
"learning_rate": 3.288932523414191e-06,
|
|
"loss": 1.9577052593231201,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 1.9516231699554423,
|
|
"grad_norm": 10.240085727743649,
|
|
"learning_rate": 3.28545278772899e-06,
|
|
"loss": 1.8488285541534424,
|
|
"step": 3066
|
|
},
|
|
{
|
|
"epoch": 1.9522597071928707,
|
|
"grad_norm": 12.132299789528824,
|
|
"learning_rate": 3.2819739928769745e-06,
|
|
"loss": 2.030672550201416,
|
|
"step": 3067
|
|
},
|
|
{
|
|
"epoch": 1.952896244430299,
|
|
"grad_norm": 10.193505387271667,
|
|
"learning_rate": 3.2784961407670896e-06,
|
|
"loss": 2.250061511993408,
|
|
"step": 3068
|
|
},
|
|
{
|
|
"epoch": 1.9535327816677275,
|
|
"grad_norm": 12.017657693658537,
|
|
"learning_rate": 3.2750192333077514e-06,
|
|
"loss": 1.1749529838562012,
|
|
"step": 3069
|
|
},
|
|
{
|
|
"epoch": 1.9541693189051559,
|
|
"grad_norm": 12.250906336492893,
|
|
"learning_rate": 3.271543272406866e-06,
|
|
"loss": 1.6595430374145508,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 1.9548058561425843,
|
|
"grad_norm": 11.297035216753892,
|
|
"learning_rate": 3.2680682599718156e-06,
|
|
"loss": 1.1674996614456177,
|
|
"step": 3071
|
|
},
|
|
{
|
|
"epoch": 1.9554423933800127,
|
|
"grad_norm": 12.65734863075847,
|
|
"learning_rate": 3.264594197909465e-06,
|
|
"loss": 1.424347162246704,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 1.956078930617441,
|
|
"grad_norm": 10.226004926281766,
|
|
"learning_rate": 3.2611210881261535e-06,
|
|
"loss": 1.4201499223709106,
|
|
"step": 3073
|
|
},
|
|
{
|
|
"epoch": 1.9567154678548695,
|
|
"grad_norm": 7.916157632060394,
|
|
"learning_rate": 3.2576489325277045e-06,
|
|
"loss": 1.4614274501800537,
|
|
"step": 3074
|
|
},
|
|
{
|
|
"epoch": 1.957352005092298,
|
|
"grad_norm": 20.01032846884706,
|
|
"learning_rate": 3.2541777330194073e-06,
|
|
"loss": 0.9933788776397705,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 1.9579885423297263,
|
|
"grad_norm": 10.315125777877338,
|
|
"learning_rate": 3.2507074915060397e-06,
|
|
"loss": 1.8490878343582153,
|
|
"step": 3076
|
|
},
|
|
{
|
|
"epoch": 1.9586250795671547,
|
|
"grad_norm": 11.613095337512359,
|
|
"learning_rate": 3.2472382098918414e-06,
|
|
"loss": 1.0492528676986694,
|
|
"step": 3077
|
|
},
|
|
{
|
|
"epoch": 1.959261616804583,
|
|
"grad_norm": 5.274761670290617,
|
|
"learning_rate": 3.2437698900805346e-06,
|
|
"loss": 1.2276555299758911,
|
|
"step": 3078
|
|
},
|
|
{
|
|
"epoch": 1.9598981540420115,
|
|
"grad_norm": 10.485079102811254,
|
|
"learning_rate": 3.240302533975308e-06,
|
|
"loss": 1.5378557443618774,
|
|
"step": 3079
|
|
},
|
|
{
|
|
"epoch": 1.96053469127944,
|
|
"grad_norm": 8.610489273802843,
|
|
"learning_rate": 3.236836143478823e-06,
|
|
"loss": 1.1437630653381348,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 1.9611712285168683,
|
|
"grad_norm": 11.48334352465963,
|
|
"learning_rate": 3.2333707204932158e-06,
|
|
"loss": 1.811964988708496,
|
|
"step": 3081
|
|
},
|
|
{
|
|
"epoch": 1.9618077657542967,
|
|
"grad_norm": 11.60340422904036,
|
|
"learning_rate": 3.229906266920082e-06,
|
|
"loss": 1.6359905004501343,
|
|
"step": 3082
|
|
},
|
|
{
|
|
"epoch": 1.9624443029917251,
|
|
"grad_norm": 15.929546389831012,
|
|
"learning_rate": 3.226442784660494e-06,
|
|
"loss": 1.6629650592803955,
|
|
"step": 3083
|
|
},
|
|
{
|
|
"epoch": 1.9630808402291535,
|
|
"grad_norm": 16.669821865914205,
|
|
"learning_rate": 3.222980275614987e-06,
|
|
"loss": 2.0848677158355713,
|
|
"step": 3084
|
|
},
|
|
{
|
|
"epoch": 1.963717377466582,
|
|
"grad_norm": 8.99541983738079,
|
|
"learning_rate": 3.219518741683564e-06,
|
|
"loss": 1.6872613430023193,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 1.9643539147040103,
|
|
"grad_norm": 7.953084515602438,
|
|
"learning_rate": 3.2160581847656915e-06,
|
|
"loss": 1.7856699228286743,
|
|
"step": 3086
|
|
},
|
|
{
|
|
"epoch": 1.9649904519414387,
|
|
"grad_norm": 11.550514933921072,
|
|
"learning_rate": 3.2125986067603004e-06,
|
|
"loss": 1.3385860919952393,
|
|
"step": 3087
|
|
},
|
|
{
|
|
"epoch": 1.965626989178867,
|
|
"grad_norm": 10.070313074436045,
|
|
"learning_rate": 3.2091400095657842e-06,
|
|
"loss": 0.8561789393424988,
|
|
"step": 3088
|
|
},
|
|
{
|
|
"epoch": 1.9662635264162953,
|
|
"grad_norm": 10.382826957485825,
|
|
"learning_rate": 3.205682395080001e-06,
|
|
"loss": 1.1104825735092163,
|
|
"step": 3089
|
|
},
|
|
{
|
|
"epoch": 1.9669000636537237,
|
|
"grad_norm": 8.21773412283748,
|
|
"learning_rate": 3.202225765200262e-06,
|
|
"loss": 1.4783658981323242,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 1.9675366008911521,
|
|
"grad_norm": 17.873833279171105,
|
|
"learning_rate": 3.1987701218233496e-06,
|
|
"loss": 0.8753082752227783,
|
|
"step": 3091
|
|
},
|
|
{
|
|
"epoch": 1.9681731381285805,
|
|
"grad_norm": 10.414163992523427,
|
|
"learning_rate": 3.195315466845493e-06,
|
|
"loss": 1.0268055200576782,
|
|
"step": 3092
|
|
},
|
|
{
|
|
"epoch": 1.968809675366009,
|
|
"grad_norm": 8.412822041327617,
|
|
"learning_rate": 3.1918618021623885e-06,
|
|
"loss": 1.1761889457702637,
|
|
"step": 3093
|
|
},
|
|
{
|
|
"epoch": 1.9694462126034373,
|
|
"grad_norm": 8.725455364591456,
|
|
"learning_rate": 3.188409129669182e-06,
|
|
"loss": 1.1024329662322998,
|
|
"step": 3094
|
|
},
|
|
{
|
|
"epoch": 1.9700827498408657,
|
|
"grad_norm": 9.389550890164376,
|
|
"learning_rate": 3.1849574512604808e-06,
|
|
"loss": 1.2440528869628906,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 1.9707192870782941,
|
|
"grad_norm": 13.494648821582157,
|
|
"learning_rate": 3.181506768830344e-06,
|
|
"loss": 1.670837640762329,
|
|
"step": 3096
|
|
},
|
|
{
|
|
"epoch": 1.9713558243157223,
|
|
"grad_norm": 12.150357325567763,
|
|
"learning_rate": 3.1780570842722795e-06,
|
|
"loss": 1.5949982404708862,
|
|
"step": 3097
|
|
},
|
|
{
|
|
"epoch": 1.9719923615531507,
|
|
"grad_norm": 15.163911389676239,
|
|
"learning_rate": 3.1746083994792577e-06,
|
|
"loss": 1.7955188751220703,
|
|
"step": 3098
|
|
},
|
|
{
|
|
"epoch": 1.9726288987905791,
|
|
"grad_norm": 9.175712392609714,
|
|
"learning_rate": 3.1711607163436904e-06,
|
|
"loss": 1.4577592611312866,
|
|
"step": 3099
|
|
},
|
|
{
|
|
"epoch": 1.9732654360280075,
|
|
"grad_norm": 9.234930841935416,
|
|
"learning_rate": 3.1677140367574476e-06,
|
|
"loss": 2.5002384185791016,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 1.973901973265436,
|
|
"grad_norm": 11.337944959254775,
|
|
"learning_rate": 3.1642683626118442e-06,
|
|
"loss": 1.9067683219909668,
|
|
"step": 3101
|
|
},
|
|
{
|
|
"epoch": 1.9745385105028643,
|
|
"grad_norm": 10.149407145948285,
|
|
"learning_rate": 3.1608236957976445e-06,
|
|
"loss": 1.2545390129089355,
|
|
"step": 3102
|
|
},
|
|
{
|
|
"epoch": 1.9751750477402927,
|
|
"grad_norm": 9.27457318057026,
|
|
"learning_rate": 3.157380038205059e-06,
|
|
"loss": 1.3965643644332886,
|
|
"step": 3103
|
|
},
|
|
{
|
|
"epoch": 1.9758115849777211,
|
|
"grad_norm": 12.246186032090677,
|
|
"learning_rate": 3.153937391723748e-06,
|
|
"loss": 3.5177059173583984,
|
|
"step": 3104
|
|
},
|
|
{
|
|
"epoch": 1.9764481222151495,
|
|
"grad_norm": 10.391359576104954,
|
|
"learning_rate": 3.1504957582428115e-06,
|
|
"loss": 1.5710853338241577,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 1.977084659452578,
|
|
"grad_norm": 10.559254099194881,
|
|
"learning_rate": 3.147055139650798e-06,
|
|
"loss": 1.2918438911437988,
|
|
"step": 3106
|
|
},
|
|
{
|
|
"epoch": 1.9777211966900063,
|
|
"grad_norm": 10.52035800031223,
|
|
"learning_rate": 3.143615537835697e-06,
|
|
"loss": 1.754537582397461,
|
|
"step": 3107
|
|
},
|
|
{
|
|
"epoch": 1.9783577339274347,
|
|
"grad_norm": 31.605830555945424,
|
|
"learning_rate": 3.1401769546849414e-06,
|
|
"loss": 1.4454526901245117,
|
|
"step": 3108
|
|
},
|
|
{
|
|
"epoch": 1.9789942711648632,
|
|
"grad_norm": 9.062007515657388,
|
|
"learning_rate": 3.1367393920854027e-06,
|
|
"loss": 1.5038361549377441,
|
|
"step": 3109
|
|
},
|
|
{
|
|
"epoch": 1.9796308084022916,
|
|
"grad_norm": 10.276281144397306,
|
|
"learning_rate": 3.1333028519233964e-06,
|
|
"loss": 1.5616546869277954,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 1.98026734563972,
|
|
"grad_norm": 9.410325778375519,
|
|
"learning_rate": 3.129867336084673e-06,
|
|
"loss": 1.0828948020935059,
|
|
"step": 3111
|
|
},
|
|
{
|
|
"epoch": 1.9809038828771484,
|
|
"grad_norm": 15.13284583064293,
|
|
"learning_rate": 3.1264328464544253e-06,
|
|
"loss": 0.9905999898910522,
|
|
"step": 3112
|
|
},
|
|
{
|
|
"epoch": 1.9815404201145768,
|
|
"grad_norm": 10.574551454451893,
|
|
"learning_rate": 3.1229993849172764e-06,
|
|
"loss": 1.5945922136306763,
|
|
"step": 3113
|
|
},
|
|
{
|
|
"epoch": 1.9821769573520052,
|
|
"grad_norm": 12.976134401077557,
|
|
"learning_rate": 3.11956695335729e-06,
|
|
"loss": 1.8390474319458008,
|
|
"step": 3114
|
|
},
|
|
{
|
|
"epoch": 1.9828134945894336,
|
|
"grad_norm": 12.392306071093337,
|
|
"learning_rate": 3.1161355536579653e-06,
|
|
"loss": 1.447790265083313,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 1.983450031826862,
|
|
"grad_norm": 15.440907540898268,
|
|
"learning_rate": 3.1127051877022307e-06,
|
|
"loss": 0.8936029672622681,
|
|
"step": 3116
|
|
},
|
|
{
|
|
"epoch": 1.9840865690642904,
|
|
"grad_norm": 8.33224896953088,
|
|
"learning_rate": 3.1092758573724552e-06,
|
|
"loss": 1.157752275466919,
|
|
"step": 3117
|
|
},
|
|
{
|
|
"epoch": 1.9847231063017188,
|
|
"grad_norm": 10.907324824305018,
|
|
"learning_rate": 3.1058475645504284e-06,
|
|
"loss": 0.8514528870582581,
|
|
"step": 3118
|
|
},
|
|
{
|
|
"epoch": 1.9853596435391472,
|
|
"grad_norm": 10.654887083620297,
|
|
"learning_rate": 3.1024203111173833e-06,
|
|
"loss": 1.6049655675888062,
|
|
"step": 3119
|
|
},
|
|
{
|
|
"epoch": 1.9859961807765756,
|
|
"grad_norm": 6.115826190640891,
|
|
"learning_rate": 3.098994098953971e-06,
|
|
"loss": 0.8340649604797363,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 1.986632718014004,
|
|
"grad_norm": 11.47366728517658,
|
|
"learning_rate": 3.0955689299402793e-06,
|
|
"loss": 1.9429899454116821,
|
|
"step": 3121
|
|
},
|
|
{
|
|
"epoch": 1.9872692552514322,
|
|
"grad_norm": 9.459059050362953,
|
|
"learning_rate": 3.092144805955818e-06,
|
|
"loss": 1.7819583415985107,
|
|
"step": 3122
|
|
},
|
|
{
|
|
"epoch": 1.9879057924888606,
|
|
"grad_norm": 11.438816769213076,
|
|
"learning_rate": 3.0887217288795283e-06,
|
|
"loss": 1.437670350074768,
|
|
"step": 3123
|
|
},
|
|
{
|
|
"epoch": 1.988542329726289,
|
|
"grad_norm": 14.66202127305559,
|
|
"learning_rate": 3.0852997005897735e-06,
|
|
"loss": 1.8193809986114502,
|
|
"step": 3124
|
|
},
|
|
{
|
|
"epoch": 1.9891788669637174,
|
|
"grad_norm": 9.445926120985163,
|
|
"learning_rate": 3.0818787229643442e-06,
|
|
"loss": 1.6506184339523315,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 1.9898154042011458,
|
|
"grad_norm": 8.465231817536171,
|
|
"learning_rate": 3.078458797880449e-06,
|
|
"loss": 1.2958638668060303,
|
|
"step": 3126
|
|
},
|
|
{
|
|
"epoch": 1.9904519414385742,
|
|
"grad_norm": 9.799611362744931,
|
|
"learning_rate": 3.075039927214728e-06,
|
|
"loss": 1.4738852977752686,
|
|
"step": 3127
|
|
},
|
|
{
|
|
"epoch": 1.9910884786760026,
|
|
"grad_norm": 10.91720753319989,
|
|
"learning_rate": 3.071622112843232e-06,
|
|
"loss": 1.6265835762023926,
|
|
"step": 3128
|
|
},
|
|
{
|
|
"epoch": 1.991725015913431,
|
|
"grad_norm": 7.2470953939062035,
|
|
"learning_rate": 3.0682053566414416e-06,
|
|
"loss": 1.5042080879211426,
|
|
"step": 3129
|
|
},
|
|
{
|
|
"epoch": 1.9923615531508592,
|
|
"grad_norm": 15.139102548489543,
|
|
"learning_rate": 3.064789660484251e-06,
|
|
"loss": 2.352555274963379,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 1.9929980903882876,
|
|
"grad_norm": 9.044104466068717,
|
|
"learning_rate": 3.0613750262459753e-06,
|
|
"loss": 1.2178021669387817,
|
|
"step": 3131
|
|
},
|
|
{
|
|
"epoch": 1.993634627625716,
|
|
"grad_norm": 10.056864766304423,
|
|
"learning_rate": 3.057961455800347e-06,
|
|
"loss": 1.5974866151809692,
|
|
"step": 3132
|
|
},
|
|
{
|
|
"epoch": 1.9942711648631444,
|
|
"grad_norm": 23.906234837401477,
|
|
"learning_rate": 3.0545489510205083e-06,
|
|
"loss": 1.9503036737442017,
|
|
"step": 3133
|
|
},
|
|
{
|
|
"epoch": 1.9949077021005728,
|
|
"grad_norm": 10.64905333497331,
|
|
"learning_rate": 3.0511375137790305e-06,
|
|
"loss": 1.0582550764083862,
|
|
"step": 3134
|
|
},
|
|
{
|
|
"epoch": 1.9955442393380012,
|
|
"grad_norm": 11.165521278744388,
|
|
"learning_rate": 3.0477271459478847e-06,
|
|
"loss": 1.5541431903839111,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 1.9961807765754296,
|
|
"grad_norm": 9.471209935426868,
|
|
"learning_rate": 3.044317849398464e-06,
|
|
"loss": 1.5843894481658936,
|
|
"step": 3136
|
|
},
|
|
{
|
|
"epoch": 1.996817313812858,
|
|
"grad_norm": 10.442171241885108,
|
|
"learning_rate": 3.0409096260015703e-06,
|
|
"loss": 1.4579355716705322,
|
|
"step": 3137
|
|
},
|
|
{
|
|
"epoch": 1.9974538510502864,
|
|
"grad_norm": 18.94728789653089,
|
|
"learning_rate": 3.0375024776274178e-06,
|
|
"loss": 1.1820597648620605,
|
|
"step": 3138
|
|
},
|
|
{
|
|
"epoch": 1.9980903882877148,
|
|
"grad_norm": 18.230185244403344,
|
|
"learning_rate": 3.0340964061456284e-06,
|
|
"loss": 2.0564804077148438,
|
|
"step": 3139
|
|
},
|
|
{
|
|
"epoch": 1.9987269255251432,
|
|
"grad_norm": 12.652908441573445,
|
|
"learning_rate": 3.030691413425239e-06,
|
|
"loss": 1.3657358884811401,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 1.9993634627625716,
|
|
"grad_norm": 8.32228181735295,
|
|
"learning_rate": 3.0272875013346853e-06,
|
|
"loss": 2.350313425064087,
|
|
"step": 3141
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 9.406506494829356,
|
|
"learning_rate": 3.023884671741821e-06,
|
|
"loss": 1.6760640144348145,
|
|
"step": 3142
|
|
},
|
|
{
|
|
"epoch": 2.0006365372374284,
|
|
"grad_norm": 12.46356256089401,
|
|
"learning_rate": 3.0204829265138947e-06,
|
|
"loss": 0.7003447413444519,
|
|
"step": 3143
|
|
},
|
|
{
|
|
"epoch": 2.001273074474857,
|
|
"grad_norm": 12.40322846590644,
|
|
"learning_rate": 3.017082267517568e-06,
|
|
"loss": 0.5960747003555298,
|
|
"step": 3144
|
|
},
|
|
{
|
|
"epoch": 2.001909611712285,
|
|
"grad_norm": 11.234549616820136,
|
|
"learning_rate": 3.0136826966189024e-06,
|
|
"loss": 0.43681585788726807,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 2.0025461489497136,
|
|
"grad_norm": 9.633365087032399,
|
|
"learning_rate": 3.0102842156833665e-06,
|
|
"loss": 0.6057643294334412,
|
|
"step": 3146
|
|
},
|
|
{
|
|
"epoch": 2.003182686187142,
|
|
"grad_norm": 12.766291712291087,
|
|
"learning_rate": 3.0068868265758242e-06,
|
|
"loss": 0.35702773928642273,
|
|
"step": 3147
|
|
},
|
|
{
|
|
"epoch": 2.0038192234245704,
|
|
"grad_norm": 6.618096264582037,
|
|
"learning_rate": 3.0034905311605485e-06,
|
|
"loss": 0.24602855741977692,
|
|
"step": 3148
|
|
},
|
|
{
|
|
"epoch": 2.004455760661999,
|
|
"grad_norm": 8.905638834371095,
|
|
"learning_rate": 3.0000953313012036e-06,
|
|
"loss": 0.5150216221809387,
|
|
"step": 3149
|
|
},
|
|
{
|
|
"epoch": 2.0050922978994272,
|
|
"grad_norm": 9.164941120941812,
|
|
"learning_rate": 2.9967012288608576e-06,
|
|
"loss": 0.4516746997833252,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 2.0057288351368556,
|
|
"grad_norm": 11.837563041422573,
|
|
"learning_rate": 2.9933082257019767e-06,
|
|
"loss": 0.2624654769897461,
|
|
"step": 3151
|
|
},
|
|
{
|
|
"epoch": 2.006365372374284,
|
|
"grad_norm": 11.487323607343468,
|
|
"learning_rate": 2.9899163236864215e-06,
|
|
"loss": 0.5795379877090454,
|
|
"step": 3152
|
|
},
|
|
{
|
|
"epoch": 2.0070019096117124,
|
|
"grad_norm": 10.46961916492163,
|
|
"learning_rate": 2.9865255246754516e-06,
|
|
"loss": 0.4388521909713745,
|
|
"step": 3153
|
|
},
|
|
{
|
|
"epoch": 2.007638446849141,
|
|
"grad_norm": 12.549862910935373,
|
|
"learning_rate": 2.9831358305297166e-06,
|
|
"loss": 1.4091285467147827,
|
|
"step": 3154
|
|
},
|
|
{
|
|
"epoch": 2.0082749840865692,
|
|
"grad_norm": 7.69507473690529,
|
|
"learning_rate": 2.979747243109267e-06,
|
|
"loss": 0.5970480442047119,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 2.0089115213239976,
|
|
"grad_norm": 11.892005033640343,
|
|
"learning_rate": 2.9763597642735355e-06,
|
|
"loss": 0.5806328654289246,
|
|
"step": 3156
|
|
},
|
|
{
|
|
"epoch": 2.009548058561426,
|
|
"grad_norm": 9.012121762112493,
|
|
"learning_rate": 2.9729733958813563e-06,
|
|
"loss": 0.622743546962738,
|
|
"step": 3157
|
|
},
|
|
{
|
|
"epoch": 2.0101845957988544,
|
|
"grad_norm": 9.039845428183092,
|
|
"learning_rate": 2.9695881397909485e-06,
|
|
"loss": 0.48474836349487305,
|
|
"step": 3158
|
|
},
|
|
{
|
|
"epoch": 2.0108211330362824,
|
|
"grad_norm": 12.999442613720388,
|
|
"learning_rate": 2.966203997859926e-06,
|
|
"loss": 0.5977911949157715,
|
|
"step": 3159
|
|
},
|
|
{
|
|
"epoch": 2.011457670273711,
|
|
"grad_norm": 9.388140845650517,
|
|
"learning_rate": 2.962820971945285e-06,
|
|
"loss": 0.5747494697570801,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 2.012094207511139,
|
|
"grad_norm": 12.801041945381415,
|
|
"learning_rate": 2.9594390639034143e-06,
|
|
"loss": 0.8811707496643066,
|
|
"step": 3161
|
|
},
|
|
{
|
|
"epoch": 2.0127307447485676,
|
|
"grad_norm": 11.13358117745356,
|
|
"learning_rate": 2.956058275590086e-06,
|
|
"loss": 0.5279093980789185,
|
|
"step": 3162
|
|
},
|
|
{
|
|
"epoch": 2.013367281985996,
|
|
"grad_norm": 13.612001915960958,
|
|
"learning_rate": 2.952678608860461e-06,
|
|
"loss": 0.6851604580879211,
|
|
"step": 3163
|
|
},
|
|
{
|
|
"epoch": 2.0140038192234244,
|
|
"grad_norm": 8.822751640844997,
|
|
"learning_rate": 2.9493000655690795e-06,
|
|
"loss": 0.4466755986213684,
|
|
"step": 3164
|
|
},
|
|
{
|
|
"epoch": 2.014640356460853,
|
|
"grad_norm": 17.17901738850658,
|
|
"learning_rate": 2.945922647569874e-06,
|
|
"loss": 2.6146445274353027,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 2.0152768936982812,
|
|
"grad_norm": 20.311318251889887,
|
|
"learning_rate": 2.9425463567161505e-06,
|
|
"loss": 0.5570971369743347,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 2.0159134309357096,
|
|
"grad_norm": 13.492459253820568,
|
|
"learning_rate": 2.9391711948605995e-06,
|
|
"loss": 1.6028361320495605,
|
|
"step": 3167
|
|
},
|
|
{
|
|
"epoch": 2.016549968173138,
|
|
"grad_norm": 17.20228134636775,
|
|
"learning_rate": 2.935797163855296e-06,
|
|
"loss": 0.524744987487793,
|
|
"step": 3168
|
|
},
|
|
{
|
|
"epoch": 2.0171865054105664,
|
|
"grad_norm": 7.376589765501107,
|
|
"learning_rate": 2.9324242655516864e-06,
|
|
"loss": 0.26688352227211,
|
|
"step": 3169
|
|
},
|
|
{
|
|
"epoch": 2.017823042647995,
|
|
"grad_norm": 11.092143023254515,
|
|
"learning_rate": 2.9290525018006054e-06,
|
|
"loss": 0.45281243324279785,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 2.0184595798854232,
|
|
"grad_norm": 9.938214285976413,
|
|
"learning_rate": 2.925681874452256e-06,
|
|
"loss": 0.3849692940711975,
|
|
"step": 3171
|
|
},
|
|
{
|
|
"epoch": 2.0190961171228516,
|
|
"grad_norm": 11.597512917048412,
|
|
"learning_rate": 2.922312385356225e-06,
|
|
"loss": 0.6820840239524841,
|
|
"step": 3172
|
|
},
|
|
{
|
|
"epoch": 2.01973265436028,
|
|
"grad_norm": 10.973755623049307,
|
|
"learning_rate": 2.9189440363614664e-06,
|
|
"loss": 0.3673578202724457,
|
|
"step": 3173
|
|
},
|
|
{
|
|
"epoch": 2.0203691915977084,
|
|
"grad_norm": 7.9700444777730315,
|
|
"learning_rate": 2.9155768293163213e-06,
|
|
"loss": 0.6118618845939636,
|
|
"step": 3174
|
|
},
|
|
{
|
|
"epoch": 2.021005728835137,
|
|
"grad_norm": 9.991332698180424,
|
|
"learning_rate": 2.912210766068486e-06,
|
|
"loss": 0.6370064616203308,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 2.0216422660725653,
|
|
"grad_norm": 18.42201627294179,
|
|
"learning_rate": 2.9088458484650485e-06,
|
|
"loss": 0.5225192308425903,
|
|
"step": 3176
|
|
},
|
|
{
|
|
"epoch": 2.0222788033099937,
|
|
"grad_norm": 17.312231651699243,
|
|
"learning_rate": 2.905482078352454e-06,
|
|
"loss": 0.6268512606620789,
|
|
"step": 3177
|
|
},
|
|
{
|
|
"epoch": 2.022915340547422,
|
|
"grad_norm": 8.952186497649738,
|
|
"learning_rate": 2.9021194575765257e-06,
|
|
"loss": 0.6104077100753784,
|
|
"step": 3178
|
|
},
|
|
{
|
|
"epoch": 2.0235518777848505,
|
|
"grad_norm": 15.475594532921043,
|
|
"learning_rate": 2.8987579879824492e-06,
|
|
"loss": 1.112720251083374,
|
|
"step": 3179
|
|
},
|
|
{
|
|
"epoch": 2.024188415022279,
|
|
"grad_norm": 16.902034475138702,
|
|
"learning_rate": 2.895397671414788e-06,
|
|
"loss": 0.472690224647522,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 2.0248249522597073,
|
|
"grad_norm": 9.839014247286904,
|
|
"learning_rate": 2.892038509717465e-06,
|
|
"loss": 0.4841421842575073,
|
|
"step": 3181
|
|
},
|
|
{
|
|
"epoch": 2.0254614894971357,
|
|
"grad_norm": 8.590762320274365,
|
|
"learning_rate": 2.8886805047337736e-06,
|
|
"loss": 0.518042266368866,
|
|
"step": 3182
|
|
},
|
|
{
|
|
"epoch": 2.026098026734564,
|
|
"grad_norm": 12.50307119973155,
|
|
"learning_rate": 2.8853236583063695e-06,
|
|
"loss": 0.30463069677352905,
|
|
"step": 3183
|
|
},
|
|
{
|
|
"epoch": 2.0267345639719925,
|
|
"grad_norm": 13.55415563944142,
|
|
"learning_rate": 2.8819679722772732e-06,
|
|
"loss": 0.4761585593223572,
|
|
"step": 3184
|
|
},
|
|
{
|
|
"epoch": 2.027371101209421,
|
|
"grad_norm": 10.323909824063984,
|
|
"learning_rate": 2.878613448487871e-06,
|
|
"loss": 0.3689367473125458,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 2.0280076384468493,
|
|
"grad_norm": 17.01497476426275,
|
|
"learning_rate": 2.875260088778907e-06,
|
|
"loss": 0.7028419375419617,
|
|
"step": 3186
|
|
},
|
|
{
|
|
"epoch": 2.0286441756842777,
|
|
"grad_norm": 13.542252184708316,
|
|
"learning_rate": 2.871907894990495e-06,
|
|
"loss": 0.6263160705566406,
|
|
"step": 3187
|
|
},
|
|
{
|
|
"epoch": 2.029280712921706,
|
|
"grad_norm": 7.3376341289827325,
|
|
"learning_rate": 2.8685568689620958e-06,
|
|
"loss": 0.6165609359741211,
|
|
"step": 3188
|
|
},
|
|
{
|
|
"epoch": 2.0299172501591345,
|
|
"grad_norm": 12.669897190649397,
|
|
"learning_rate": 2.8652070125325416e-06,
|
|
"loss": 0.9314853549003601,
|
|
"step": 3189
|
|
},
|
|
{
|
|
"epoch": 2.030553787396563,
|
|
"grad_norm": 8.7196582806334,
|
|
"learning_rate": 2.861858327540018e-06,
|
|
"loss": 0.6993128657341003,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 2.0311903246339913,
|
|
"grad_norm": 9.952062518974616,
|
|
"learning_rate": 2.8585108158220664e-06,
|
|
"loss": 0.6346950531005859,
|
|
"step": 3191
|
|
},
|
|
{
|
|
"epoch": 2.0318268618714193,
|
|
"grad_norm": 10.307825570615718,
|
|
"learning_rate": 2.8551644792155845e-06,
|
|
"loss": 0.4012235999107361,
|
|
"step": 3192
|
|
},
|
|
{
|
|
"epoch": 2.0324633991088477,
|
|
"grad_norm": 8.842722997144174,
|
|
"learning_rate": 2.8518193195568304e-06,
|
|
"loss": 0.4270016551017761,
|
|
"step": 3193
|
|
},
|
|
{
|
|
"epoch": 2.033099936346276,
|
|
"grad_norm": 12.341905472926273,
|
|
"learning_rate": 2.84847533868141e-06,
|
|
"loss": 0.40161120891571045,
|
|
"step": 3194
|
|
},
|
|
{
|
|
"epoch": 2.0337364735837045,
|
|
"grad_norm": 9.676443158290573,
|
|
"learning_rate": 2.845132538424286e-06,
|
|
"loss": 0.8682132363319397,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 2.034373010821133,
|
|
"grad_norm": 9.562213215313651,
|
|
"learning_rate": 2.841790920619769e-06,
|
|
"loss": 0.2562720775604248,
|
|
"step": 3196
|
|
},
|
|
{
|
|
"epoch": 2.0350095480585613,
|
|
"grad_norm": 11.518279123176042,
|
|
"learning_rate": 2.83845048710153e-06,
|
|
"loss": 0.4420645236968994,
|
|
"step": 3197
|
|
},
|
|
{
|
|
"epoch": 2.0356460852959897,
|
|
"grad_norm": 10.452049091263088,
|
|
"learning_rate": 2.835111239702576e-06,
|
|
"loss": 0.7477583885192871,
|
|
"step": 3198
|
|
},
|
|
{
|
|
"epoch": 2.036282622533418,
|
|
"grad_norm": 11.797998040714148,
|
|
"learning_rate": 2.8317731802552774e-06,
|
|
"loss": 0.6933112144470215,
|
|
"step": 3199
|
|
},
|
|
{
|
|
"epoch": 2.0369191597708465,
|
|
"grad_norm": 10.919498256245369,
|
|
"learning_rate": 2.8284363105913447e-06,
|
|
"loss": 0.6165541410446167,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 2.037555697008275,
|
|
"grad_norm": 9.858438891717784,
|
|
"learning_rate": 2.825100632541836e-06,
|
|
"loss": 0.783021867275238,
|
|
"step": 3201
|
|
},
|
|
{
|
|
"epoch": 2.0381922342457033,
|
|
"grad_norm": 11.84280267272239,
|
|
"learning_rate": 2.8217661479371585e-06,
|
|
"loss": 0.3734252452850342,
|
|
"step": 3202
|
|
},
|
|
{
|
|
"epoch": 2.0388287714831317,
|
|
"grad_norm": 17.567056788118094,
|
|
"learning_rate": 2.8184328586070596e-06,
|
|
"loss": 0.5697858929634094,
|
|
"step": 3203
|
|
},
|
|
{
|
|
"epoch": 2.03946530872056,
|
|
"grad_norm": 11.951047163545796,
|
|
"learning_rate": 2.81510076638064e-06,
|
|
"loss": 0.4297953248023987,
|
|
"step": 3204
|
|
},
|
|
{
|
|
"epoch": 2.0401018459579885,
|
|
"grad_norm": 7.165365957833696,
|
|
"learning_rate": 2.8117698730863302e-06,
|
|
"loss": 0.37496840953826904,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 2.040738383195417,
|
|
"grad_norm": 12.119192215519528,
|
|
"learning_rate": 2.808440180551916e-06,
|
|
"loss": 1.0547622442245483,
|
|
"step": 3206
|
|
},
|
|
{
|
|
"epoch": 2.0413749204328453,
|
|
"grad_norm": 15.33298908135013,
|
|
"learning_rate": 2.8051116906045154e-06,
|
|
"loss": 0.5565409064292908,
|
|
"step": 3207
|
|
},
|
|
{
|
|
"epoch": 2.0420114576702737,
|
|
"grad_norm": 11.897131902832205,
|
|
"learning_rate": 2.801784405070591e-06,
|
|
"loss": 0.7787500619888306,
|
|
"step": 3208
|
|
},
|
|
{
|
|
"epoch": 2.042647994907702,
|
|
"grad_norm": 9.393851599365979,
|
|
"learning_rate": 2.79845832577594e-06,
|
|
"loss": 0.41894006729125977,
|
|
"step": 3209
|
|
},
|
|
{
|
|
"epoch": 2.0432845321451305,
|
|
"grad_norm": 11.147627022876764,
|
|
"learning_rate": 2.795133454545707e-06,
|
|
"loss": 0.5602455139160156,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 2.043921069382559,
|
|
"grad_norm": 6.846635597479448,
|
|
"learning_rate": 2.7918097932043598e-06,
|
|
"loss": 0.2927100658416748,
|
|
"step": 3211
|
|
},
|
|
{
|
|
"epoch": 2.0445576066199873,
|
|
"grad_norm": 9.165171726398286,
|
|
"learning_rate": 2.788487343575715e-06,
|
|
"loss": 0.5903818011283875,
|
|
"step": 3212
|
|
},
|
|
{
|
|
"epoch": 2.0451941438574157,
|
|
"grad_norm": 23.476619403147776,
|
|
"learning_rate": 2.7851661074829176e-06,
|
|
"loss": 1.7944833040237427,
|
|
"step": 3213
|
|
},
|
|
{
|
|
"epoch": 2.045830681094844,
|
|
"grad_norm": 10.48520006726488,
|
|
"learning_rate": 2.7818460867484488e-06,
|
|
"loss": 0.48250359296798706,
|
|
"step": 3214
|
|
},
|
|
{
|
|
"epoch": 2.0464672183322725,
|
|
"grad_norm": 9.44127208339123,
|
|
"learning_rate": 2.7785272831941198e-06,
|
|
"loss": 0.4169970452785492,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 2.047103755569701,
|
|
"grad_norm": 16.499270916009138,
|
|
"learning_rate": 2.7752096986410802e-06,
|
|
"loss": 0.4681619703769684,
|
|
"step": 3216
|
|
},
|
|
{
|
|
"epoch": 2.0477402928071293,
|
|
"grad_norm": 15.458816531706349,
|
|
"learning_rate": 2.7718933349098077e-06,
|
|
"loss": 0.4672512412071228,
|
|
"step": 3217
|
|
},
|
|
{
|
|
"epoch": 2.0483768300445577,
|
|
"grad_norm": 10.022530098921154,
|
|
"learning_rate": 2.7685781938201024e-06,
|
|
"loss": 0.4352753460407257,
|
|
"step": 3218
|
|
},
|
|
{
|
|
"epoch": 2.049013367281986,
|
|
"grad_norm": 12.324950558960667,
|
|
"learning_rate": 2.765264277191107e-06,
|
|
"loss": 0.32542985677719116,
|
|
"step": 3219
|
|
},
|
|
{
|
|
"epoch": 2.0496499045194145,
|
|
"grad_norm": 10.715295354976668,
|
|
"learning_rate": 2.761951586841284e-06,
|
|
"loss": 0.3411189913749695,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 2.050286441756843,
|
|
"grad_norm": 15.631613837657154,
|
|
"learning_rate": 2.7586401245884236e-06,
|
|
"loss": 1.1421493291854858,
|
|
"step": 3221
|
|
},
|
|
{
|
|
"epoch": 2.0509229789942713,
|
|
"grad_norm": 11.130166870127876,
|
|
"learning_rate": 2.7553298922496423e-06,
|
|
"loss": 0.5865961909294128,
|
|
"step": 3222
|
|
},
|
|
{
|
|
"epoch": 2.0515595162316997,
|
|
"grad_norm": 7.13843922070195,
|
|
"learning_rate": 2.7520208916413866e-06,
|
|
"loss": 0.4451628625392914,
|
|
"step": 3223
|
|
},
|
|
{
|
|
"epoch": 2.052196053469128,
|
|
"grad_norm": 6.859986120553279,
|
|
"learning_rate": 2.7487131245794207e-06,
|
|
"loss": 0.29068124294281006,
|
|
"step": 3224
|
|
},
|
|
{
|
|
"epoch": 2.0528325907065565,
|
|
"grad_norm": 7.086139260244435,
|
|
"learning_rate": 2.745406592878834e-06,
|
|
"loss": 0.34774288535118103,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 2.0534691279439845,
|
|
"grad_norm": 7.444475437890723,
|
|
"learning_rate": 2.7421012983540384e-06,
|
|
"loss": 0.38049668073654175,
|
|
"step": 3226
|
|
},
|
|
{
|
|
"epoch": 2.054105665181413,
|
|
"grad_norm": 11.944953328297366,
|
|
"learning_rate": 2.738797242818768e-06,
|
|
"loss": 0.46898943185806274,
|
|
"step": 3227
|
|
},
|
|
{
|
|
"epoch": 2.0547422024188413,
|
|
"grad_norm": 8.084521244857383,
|
|
"learning_rate": 2.735494428086073e-06,
|
|
"loss": 0.3792363405227661,
|
|
"step": 3228
|
|
},
|
|
{
|
|
"epoch": 2.0553787396562697,
|
|
"grad_norm": 9.797542752171367,
|
|
"learning_rate": 2.7321928559683296e-06,
|
|
"loss": 0.38770341873168945,
|
|
"step": 3229
|
|
},
|
|
{
|
|
"epoch": 2.056015276893698,
|
|
"grad_norm": 9.792132348109558,
|
|
"learning_rate": 2.728892528277226e-06,
|
|
"loss": 1.0433180332183838,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 2.0566518141311265,
|
|
"grad_norm": 6.557964529559676,
|
|
"learning_rate": 2.72559344682377e-06,
|
|
"loss": 0.18909412622451782,
|
|
"step": 3231
|
|
},
|
|
{
|
|
"epoch": 2.057288351368555,
|
|
"grad_norm": 14.05273896444134,
|
|
"learning_rate": 2.7222956134182833e-06,
|
|
"loss": 0.49194207787513733,
|
|
"step": 3232
|
|
},
|
|
{
|
|
"epoch": 2.0579248886059833,
|
|
"grad_norm": 10.913357132899804,
|
|
"learning_rate": 2.7189990298704105e-06,
|
|
"loss": 0.5012593865394592,
|
|
"step": 3233
|
|
},
|
|
{
|
|
"epoch": 2.0585614258434117,
|
|
"grad_norm": 18.480467244427906,
|
|
"learning_rate": 2.7157036979890984e-06,
|
|
"loss": 0.30408960580825806,
|
|
"step": 3234
|
|
},
|
|
{
|
|
"epoch": 2.05919796308084,
|
|
"grad_norm": 12.692004595044567,
|
|
"learning_rate": 2.7124096195826133e-06,
|
|
"loss": 0.9789431095123291,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 2.0598345003182685,
|
|
"grad_norm": 6.891068890017745,
|
|
"learning_rate": 2.7091167964585374e-06,
|
|
"loss": 0.41833198070526123,
|
|
"step": 3236
|
|
},
|
|
{
|
|
"epoch": 2.060471037555697,
|
|
"grad_norm": 19.272741777843954,
|
|
"learning_rate": 2.7058252304237577e-06,
|
|
"loss": 0.42587608098983765,
|
|
"step": 3237
|
|
},
|
|
{
|
|
"epoch": 2.0611075747931253,
|
|
"grad_norm": 14.959014212111224,
|
|
"learning_rate": 2.7025349232844744e-06,
|
|
"loss": 0.4084237813949585,
|
|
"step": 3238
|
|
},
|
|
{
|
|
"epoch": 2.0617441120305537,
|
|
"grad_norm": 11.767916514265051,
|
|
"learning_rate": 2.6992458768461947e-06,
|
|
"loss": 0.42100873589515686,
|
|
"step": 3239
|
|
},
|
|
{
|
|
"epoch": 2.062380649267982,
|
|
"grad_norm": 13.732195873693154,
|
|
"learning_rate": 2.695958092913741e-06,
|
|
"loss": 0.3621373474597931,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 2.0630171865054105,
|
|
"grad_norm": 15.052316193112196,
|
|
"learning_rate": 2.69267157329123e-06,
|
|
"loss": 0.36196765303611755,
|
|
"step": 3241
|
|
},
|
|
{
|
|
"epoch": 2.063653723742839,
|
|
"grad_norm": 8.791123051446233,
|
|
"learning_rate": 2.6893863197820978e-06,
|
|
"loss": 0.733702540397644,
|
|
"step": 3242
|
|
},
|
|
{
|
|
"epoch": 2.0642902609802674,
|
|
"grad_norm": 7.475648812336085,
|
|
"learning_rate": 2.686102334189079e-06,
|
|
"loss": 0.42003414034843445,
|
|
"step": 3243
|
|
},
|
|
{
|
|
"epoch": 2.0649267982176958,
|
|
"grad_norm": 13.569102527684445,
|
|
"learning_rate": 2.6828196183142117e-06,
|
|
"loss": 0.3186565637588501,
|
|
"step": 3244
|
|
},
|
|
{
|
|
"epoch": 2.065563335455124,
|
|
"grad_norm": 9.324977184898154,
|
|
"learning_rate": 2.6795381739588394e-06,
|
|
"loss": 0.32224637269973755,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 2.0661998726925526,
|
|
"grad_norm": 6.415901405634741,
|
|
"learning_rate": 2.6762580029236102e-06,
|
|
"loss": 0.30635929107666016,
|
|
"step": 3246
|
|
},
|
|
{
|
|
"epoch": 2.066836409929981,
|
|
"grad_norm": 10.35372111155344,
|
|
"learning_rate": 2.6729791070084693e-06,
|
|
"loss": 0.8054674863815308,
|
|
"step": 3247
|
|
},
|
|
{
|
|
"epoch": 2.0674729471674094,
|
|
"grad_norm": 10.696306455544944,
|
|
"learning_rate": 2.669701488012664e-06,
|
|
"loss": 0.49849972128868103,
|
|
"step": 3248
|
|
},
|
|
{
|
|
"epoch": 2.0681094844048378,
|
|
"grad_norm": 8.263034695346395,
|
|
"learning_rate": 2.6664251477347404e-06,
|
|
"loss": 0.21850839257240295,
|
|
"step": 3249
|
|
},
|
|
{
|
|
"epoch": 2.068746021642266,
|
|
"grad_norm": 9.372937594365398,
|
|
"learning_rate": 2.6631500879725425e-06,
|
|
"loss": 0.3750782907009125,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 2.0693825588796946,
|
|
"grad_norm": 10.426364379097361,
|
|
"learning_rate": 2.659876310523214e-06,
|
|
"loss": 0.5895072221755981,
|
|
"step": 3251
|
|
},
|
|
{
|
|
"epoch": 2.070019096117123,
|
|
"grad_norm": 8.629619368306836,
|
|
"learning_rate": 2.6566038171831902e-06,
|
|
"loss": 0.18251235783100128,
|
|
"step": 3252
|
|
},
|
|
{
|
|
"epoch": 2.0706556333545514,
|
|
"grad_norm": 14.157748250145481,
|
|
"learning_rate": 2.6533326097482114e-06,
|
|
"loss": 0.6843599081039429,
|
|
"step": 3253
|
|
},
|
|
{
|
|
"epoch": 2.07129217059198,
|
|
"grad_norm": 10.639887975474291,
|
|
"learning_rate": 2.6500626900132974e-06,
|
|
"loss": 0.35676315426826477,
|
|
"step": 3254
|
|
},
|
|
{
|
|
"epoch": 2.071928707829408,
|
|
"grad_norm": 15.337959408542437,
|
|
"learning_rate": 2.646794059772776e-06,
|
|
"loss": 2.2118444442749023,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 2.0725652450668366,
|
|
"grad_norm": 12.644577074348945,
|
|
"learning_rate": 2.643526720820259e-06,
|
|
"loss": 0.6072080135345459,
|
|
"step": 3256
|
|
},
|
|
{
|
|
"epoch": 2.073201782304265,
|
|
"grad_norm": 11.434453441228607,
|
|
"learning_rate": 2.640260674948652e-06,
|
|
"loss": 0.3190002739429474,
|
|
"step": 3257
|
|
},
|
|
{
|
|
"epoch": 2.0738383195416934,
|
|
"grad_norm": 8.98137016879291,
|
|
"learning_rate": 2.6369959239501487e-06,
|
|
"loss": 0.19415044784545898,
|
|
"step": 3258
|
|
},
|
|
{
|
|
"epoch": 2.074474856779122,
|
|
"grad_norm": 11.308842338843283,
|
|
"learning_rate": 2.633732469616238e-06,
|
|
"loss": 0.28903019428253174,
|
|
"step": 3259
|
|
},
|
|
{
|
|
"epoch": 2.0751113940165498,
|
|
"grad_norm": 10.390526838135772,
|
|
"learning_rate": 2.630470313737692e-06,
|
|
"loss": 0.5196407437324524,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 2.075747931253978,
|
|
"grad_norm": 14.766330473913122,
|
|
"learning_rate": 2.627209458104572e-06,
|
|
"loss": 0.9248642921447754,
|
|
"step": 3261
|
|
},
|
|
{
|
|
"epoch": 2.0763844684914066,
|
|
"grad_norm": 7.277822898693093,
|
|
"learning_rate": 2.6239499045062235e-06,
|
|
"loss": 0.37658756971359253,
|
|
"step": 3262
|
|
},
|
|
{
|
|
"epoch": 2.077021005728835,
|
|
"grad_norm": 12.905872120715083,
|
|
"learning_rate": 2.6206916547312865e-06,
|
|
"loss": 0.3785715103149414,
|
|
"step": 3263
|
|
},
|
|
{
|
|
"epoch": 2.0776575429662634,
|
|
"grad_norm": 18.72481288464424,
|
|
"learning_rate": 2.61743471056767e-06,
|
|
"loss": 0.7707376480102539,
|
|
"step": 3264
|
|
},
|
|
{
|
|
"epoch": 2.0782940802036918,
|
|
"grad_norm": 8.587041867271958,
|
|
"learning_rate": 2.614179073802582e-06,
|
|
"loss": 0.3318308889865875,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 2.07893061744112,
|
|
"grad_norm": 7.771217491488594,
|
|
"learning_rate": 2.610924746222503e-06,
|
|
"loss": 0.43681594729423523,
|
|
"step": 3266
|
|
},
|
|
{
|
|
"epoch": 2.0795671546785486,
|
|
"grad_norm": 18.48845128764159,
|
|
"learning_rate": 2.6076717296132005e-06,
|
|
"loss": 0.5202630162239075,
|
|
"step": 3267
|
|
},
|
|
{
|
|
"epoch": 2.080203691915977,
|
|
"grad_norm": 12.7775423658601,
|
|
"learning_rate": 2.60442002575972e-06,
|
|
"loss": 1.0267428159713745,
|
|
"step": 3268
|
|
},
|
|
{
|
|
"epoch": 2.0808402291534054,
|
|
"grad_norm": 9.385733118886481,
|
|
"learning_rate": 2.6011696364463878e-06,
|
|
"loss": 0.2559840679168701,
|
|
"step": 3269
|
|
},
|
|
{
|
|
"epoch": 2.081476766390834,
|
|
"grad_norm": 17.94835737406477,
|
|
"learning_rate": 2.5979205634568073e-06,
|
|
"loss": 0.4922923147678375,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 2.082113303628262,
|
|
"grad_norm": 5.832914015146009,
|
|
"learning_rate": 2.5946728085738593e-06,
|
|
"loss": 0.13679368793964386,
|
|
"step": 3271
|
|
},
|
|
{
|
|
"epoch": 2.0827498408656906,
|
|
"grad_norm": 7.716837449346257,
|
|
"learning_rate": 2.591426373579706e-06,
|
|
"loss": 0.6853848695755005,
|
|
"step": 3272
|
|
},
|
|
{
|
|
"epoch": 2.083386378103119,
|
|
"grad_norm": 9.795383883477603,
|
|
"learning_rate": 2.588181260255782e-06,
|
|
"loss": 0.9720170497894287,
|
|
"step": 3273
|
|
},
|
|
{
|
|
"epoch": 2.0840229153405474,
|
|
"grad_norm": 5.503650578746022,
|
|
"learning_rate": 2.5849374703827947e-06,
|
|
"loss": 0.27854397892951965,
|
|
"step": 3274
|
|
},
|
|
{
|
|
"epoch": 2.084659452577976,
|
|
"grad_norm": 9.340122442262517,
|
|
"learning_rate": 2.5816950057407263e-06,
|
|
"loss": 0.6471270322799683,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 2.085295989815404,
|
|
"grad_norm": 12.855186955075286,
|
|
"learning_rate": 2.5784538681088377e-06,
|
|
"loss": 0.647802472114563,
|
|
"step": 3276
|
|
},
|
|
{
|
|
"epoch": 2.0859325270528326,
|
|
"grad_norm": 9.115571266909637,
|
|
"learning_rate": 2.5752140592656505e-06,
|
|
"loss": 0.2930002510547638,
|
|
"step": 3277
|
|
},
|
|
{
|
|
"epoch": 2.086569064290261,
|
|
"grad_norm": 7.823769640298533,
|
|
"learning_rate": 2.5719755809889678e-06,
|
|
"loss": 0.42180970311164856,
|
|
"step": 3278
|
|
},
|
|
{
|
|
"epoch": 2.0872056015276894,
|
|
"grad_norm": 12.014748704805303,
|
|
"learning_rate": 2.5687384350558566e-06,
|
|
"loss": 0.340411514043808,
|
|
"step": 3279
|
|
},
|
|
{
|
|
"epoch": 2.087842138765118,
|
|
"grad_norm": 13.492842536158898,
|
|
"learning_rate": 2.565502623242654e-06,
|
|
"loss": 0.5913677215576172,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 2.088478676002546,
|
|
"grad_norm": 9.916878464298009,
|
|
"learning_rate": 2.562268147324964e-06,
|
|
"loss": 0.7465711832046509,
|
|
"step": 3281
|
|
},
|
|
{
|
|
"epoch": 2.0891152132399746,
|
|
"grad_norm": 9.611117503273233,
|
|
"learning_rate": 2.5590350090776617e-06,
|
|
"loss": 0.6559357643127441,
|
|
"step": 3282
|
|
},
|
|
{
|
|
"epoch": 2.089751750477403,
|
|
"grad_norm": 8.058500342204235,
|
|
"learning_rate": 2.5558032102748852e-06,
|
|
"loss": 0.5747423768043518,
|
|
"step": 3283
|
|
},
|
|
{
|
|
"epoch": 2.0903882877148314,
|
|
"grad_norm": 10.742648777279273,
|
|
"learning_rate": 2.5525727526900356e-06,
|
|
"loss": 0.3125855624675751,
|
|
"step": 3284
|
|
},
|
|
{
|
|
"epoch": 2.09102482495226,
|
|
"grad_norm": 11.296454103945338,
|
|
"learning_rate": 2.5493436380957816e-06,
|
|
"loss": 0.514959454536438,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 2.0916613621896882,
|
|
"grad_norm": 9.074937939487064,
|
|
"learning_rate": 2.546115868264053e-06,
|
|
"loss": 0.7121496796607971,
|
|
"step": 3286
|
|
},
|
|
{
|
|
"epoch": 2.0922978994271166,
|
|
"grad_norm": 9.350015709223264,
|
|
"learning_rate": 2.5428894449660424e-06,
|
|
"loss": 0.39893782138824463,
|
|
"step": 3287
|
|
},
|
|
{
|
|
"epoch": 2.092934436664545,
|
|
"grad_norm": 10.172545873229877,
|
|
"learning_rate": 2.5396643699722014e-06,
|
|
"loss": 0.38090085983276367,
|
|
"step": 3288
|
|
},
|
|
{
|
|
"epoch": 2.0935709739019734,
|
|
"grad_norm": 7.161333925652746,
|
|
"learning_rate": 2.536440645052246e-06,
|
|
"loss": 0.47203075885772705,
|
|
"step": 3289
|
|
},
|
|
{
|
|
"epoch": 2.094207511139402,
|
|
"grad_norm": 8.98813571216817,
|
|
"learning_rate": 2.5332182719751497e-06,
|
|
"loss": 0.908381462097168,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 2.0948440483768302,
|
|
"grad_norm": 10.17741567098585,
|
|
"learning_rate": 2.529997252509142e-06,
|
|
"loss": 0.35994988679885864,
|
|
"step": 3291
|
|
},
|
|
{
|
|
"epoch": 2.0954805856142587,
|
|
"grad_norm": 13.046329332971709,
|
|
"learning_rate": 2.5267775884217116e-06,
|
|
"loss": 0.40062403678894043,
|
|
"step": 3292
|
|
},
|
|
{
|
|
"epoch": 2.0961171228516866,
|
|
"grad_norm": 10.984336231521858,
|
|
"learning_rate": 2.523559281479603e-06,
|
|
"loss": 0.29584938287734985,
|
|
"step": 3293
|
|
},
|
|
{
|
|
"epoch": 2.096753660089115,
|
|
"grad_norm": 8.825847942241639,
|
|
"learning_rate": 2.520342333448813e-06,
|
|
"loss": 0.21844874322414398,
|
|
"step": 3294
|
|
},
|
|
{
|
|
"epoch": 2.0973901973265434,
|
|
"grad_norm": 10.869370059987933,
|
|
"learning_rate": 2.5171267460946003e-06,
|
|
"loss": 0.36157089471817017,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 2.098026734563972,
|
|
"grad_norm": 12.479038780665515,
|
|
"learning_rate": 2.5139125211814706e-06,
|
|
"loss": 0.4132142961025238,
|
|
"step": 3296
|
|
},
|
|
{
|
|
"epoch": 2.0986632718014,
|
|
"grad_norm": 13.589806666910155,
|
|
"learning_rate": 2.5106996604731835e-06,
|
|
"loss": 0.49431464076042175,
|
|
"step": 3297
|
|
},
|
|
{
|
|
"epoch": 2.0992998090388286,
|
|
"grad_norm": 9.092134090342647,
|
|
"learning_rate": 2.5074881657327486e-06,
|
|
"loss": 0.24856360256671906,
|
|
"step": 3298
|
|
},
|
|
{
|
|
"epoch": 2.099936346276257,
|
|
"grad_norm": 11.59206153297446,
|
|
"learning_rate": 2.504278038722433e-06,
|
|
"loss": 0.2738165855407715,
|
|
"step": 3299
|
|
},
|
|
{
|
|
"epoch": 2.1005728835136854,
|
|
"grad_norm": 16.841788854770154,
|
|
"learning_rate": 2.5010692812037406e-06,
|
|
"loss": 0.5679492354393005,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 2.101209420751114,
|
|
"grad_norm": 13.049966543578787,
|
|
"learning_rate": 2.4978618949374362e-06,
|
|
"loss": 0.8120748996734619,
|
|
"step": 3301
|
|
},
|
|
{
|
|
"epoch": 2.1018459579885422,
|
|
"grad_norm": 10.370958796341869,
|
|
"learning_rate": 2.4946558816835246e-06,
|
|
"loss": 0.6072478294372559,
|
|
"step": 3302
|
|
},
|
|
{
|
|
"epoch": 2.1024824952259706,
|
|
"grad_norm": 16.288540373970964,
|
|
"learning_rate": 2.4914512432012614e-06,
|
|
"loss": 1.911726951599121,
|
|
"step": 3303
|
|
},
|
|
{
|
|
"epoch": 2.103119032463399,
|
|
"grad_norm": 12.68702746760304,
|
|
"learning_rate": 2.4882479812491446e-06,
|
|
"loss": 0.5572983026504517,
|
|
"step": 3304
|
|
},
|
|
{
|
|
"epoch": 2.1037555697008274,
|
|
"grad_norm": 9.826034235539806,
|
|
"learning_rate": 2.4850460975849167e-06,
|
|
"loss": 0.5541264414787292,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 2.104392106938256,
|
|
"grad_norm": 12.29051435892266,
|
|
"learning_rate": 2.481845593965571e-06,
|
|
"loss": 0.847062885761261,
|
|
"step": 3306
|
|
},
|
|
{
|
|
"epoch": 2.1050286441756842,
|
|
"grad_norm": 7.337326578570493,
|
|
"learning_rate": 2.4786464721473307e-06,
|
|
"loss": 0.33833038806915283,
|
|
"step": 3307
|
|
},
|
|
{
|
|
"epoch": 2.1056651814131127,
|
|
"grad_norm": 11.58552968150726,
|
|
"learning_rate": 2.4754487338856734e-06,
|
|
"loss": 0.49039170145988464,
|
|
"step": 3308
|
|
},
|
|
{
|
|
"epoch": 2.106301718650541,
|
|
"grad_norm": 12.422496920890385,
|
|
"learning_rate": 2.47225238093531e-06,
|
|
"loss": 0.5499177575111389,
|
|
"step": 3309
|
|
},
|
|
{
|
|
"epoch": 2.1069382558879695,
|
|
"grad_norm": 10.993418822033284,
|
|
"learning_rate": 2.469057415050194e-06,
|
|
"loss": 0.3142160177230835,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 2.107574793125398,
|
|
"grad_norm": 8.08307128642135,
|
|
"learning_rate": 2.465863837983515e-06,
|
|
"loss": 0.400633305311203,
|
|
"step": 3311
|
|
},
|
|
{
|
|
"epoch": 2.1082113303628263,
|
|
"grad_norm": 9.531634017421851,
|
|
"learning_rate": 2.4626716514877075e-06,
|
|
"loss": 0.4359270930290222,
|
|
"step": 3312
|
|
},
|
|
{
|
|
"epoch": 2.1088478676002547,
|
|
"grad_norm": 10.103996351256544,
|
|
"learning_rate": 2.459480857314431e-06,
|
|
"loss": 0.5022025108337402,
|
|
"step": 3313
|
|
},
|
|
{
|
|
"epoch": 2.109484404837683,
|
|
"grad_norm": 13.30724035553672,
|
|
"learning_rate": 2.4562914572145944e-06,
|
|
"loss": 0.4654034376144409,
|
|
"step": 3314
|
|
},
|
|
{
|
|
"epoch": 2.1101209420751115,
|
|
"grad_norm": 12.528962490919293,
|
|
"learning_rate": 2.453103452938333e-06,
|
|
"loss": 0.4383985996246338,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 2.11075747931254,
|
|
"grad_norm": 7.779359381296516,
|
|
"learning_rate": 2.4499168462350177e-06,
|
|
"loss": 0.534390926361084,
|
|
"step": 3316
|
|
},
|
|
{
|
|
"epoch": 2.1113940165499683,
|
|
"grad_norm": 11.26314805292291,
|
|
"learning_rate": 2.4467316388532514e-06,
|
|
"loss": 0.6711363196372986,
|
|
"step": 3317
|
|
},
|
|
{
|
|
"epoch": 2.1120305537873967,
|
|
"grad_norm": 12.586661952502734,
|
|
"learning_rate": 2.4435478325408757e-06,
|
|
"loss": 0.3104287385940552,
|
|
"step": 3318
|
|
},
|
|
{
|
|
"epoch": 2.112667091024825,
|
|
"grad_norm": 15.877232865075307,
|
|
"learning_rate": 2.4403654290449578e-06,
|
|
"loss": 0.5714265704154968,
|
|
"step": 3319
|
|
},
|
|
{
|
|
"epoch": 2.1133036282622535,
|
|
"grad_norm": 14.025053810912718,
|
|
"learning_rate": 2.4371844301117908e-06,
|
|
"loss": 0.7892574667930603,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 2.113940165499682,
|
|
"grad_norm": 5.492807502345089,
|
|
"learning_rate": 2.434004837486908e-06,
|
|
"loss": 0.17987748980522156,
|
|
"step": 3321
|
|
},
|
|
{
|
|
"epoch": 2.1145767027371103,
|
|
"grad_norm": 10.495775224170274,
|
|
"learning_rate": 2.4308266529150626e-06,
|
|
"loss": 0.3170955777168274,
|
|
"step": 3322
|
|
},
|
|
{
|
|
"epoch": 2.1152132399745387,
|
|
"grad_norm": 11.296350017331362,
|
|
"learning_rate": 2.427649878140238e-06,
|
|
"loss": 0.35575318336486816,
|
|
"step": 3323
|
|
},
|
|
{
|
|
"epoch": 2.115849777211967,
|
|
"grad_norm": 10.181222472050495,
|
|
"learning_rate": 2.4244745149056425e-06,
|
|
"loss": 0.3627958595752716,
|
|
"step": 3324
|
|
},
|
|
{
|
|
"epoch": 2.1164863144493955,
|
|
"grad_norm": 17.56340979043379,
|
|
"learning_rate": 2.4213005649537146e-06,
|
|
"loss": 0.43335339426994324,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 2.1171228516868235,
|
|
"grad_norm": 7.677899069873162,
|
|
"learning_rate": 2.418128030026112e-06,
|
|
"loss": 0.3178519010543823,
|
|
"step": 3326
|
|
},
|
|
{
|
|
"epoch": 2.117759388924252,
|
|
"grad_norm": 8.990077681724406,
|
|
"learning_rate": 2.414956911863717e-06,
|
|
"loss": 0.3949454128742218,
|
|
"step": 3327
|
|
},
|
|
{
|
|
"epoch": 2.1183959261616803,
|
|
"grad_norm": 9.751373077349003,
|
|
"learning_rate": 2.4117872122066342e-06,
|
|
"loss": 0.5346337556838989,
|
|
"step": 3328
|
|
},
|
|
{
|
|
"epoch": 2.1190324633991087,
|
|
"grad_norm": 14.43557995717087,
|
|
"learning_rate": 2.4086189327941965e-06,
|
|
"loss": 1.041166067123413,
|
|
"step": 3329
|
|
},
|
|
{
|
|
"epoch": 2.119669000636537,
|
|
"grad_norm": 11.471000549538617,
|
|
"learning_rate": 2.405452075364944e-06,
|
|
"loss": 0.2893381714820862,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 2.1203055378739655,
|
|
"grad_norm": 9.708549926564013,
|
|
"learning_rate": 2.40228664165665e-06,
|
|
"loss": 0.28888773918151855,
|
|
"step": 3331
|
|
},
|
|
{
|
|
"epoch": 2.120942075111394,
|
|
"grad_norm": 8.09289181475409,
|
|
"learning_rate": 2.399122633406298e-06,
|
|
"loss": 0.43209660053253174,
|
|
"step": 3332
|
|
},
|
|
{
|
|
"epoch": 2.1215786123488223,
|
|
"grad_norm": 13.020839598169353,
|
|
"learning_rate": 2.3959600523500935e-06,
|
|
"loss": 0.6120706796646118,
|
|
"step": 3333
|
|
},
|
|
{
|
|
"epoch": 2.1222151495862507,
|
|
"grad_norm": 11.919528418883417,
|
|
"learning_rate": 2.392798900223455e-06,
|
|
"loss": 0.5933191180229187,
|
|
"step": 3334
|
|
},
|
|
{
|
|
"epoch": 2.122851686823679,
|
|
"grad_norm": 16.538186311923443,
|
|
"learning_rate": 2.389639178761025e-06,
|
|
"loss": 0.3194655478000641,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 2.1234882240611075,
|
|
"grad_norm": 11.932840938324789,
|
|
"learning_rate": 2.3864808896966503e-06,
|
|
"loss": 0.4698467254638672,
|
|
"step": 3336
|
|
},
|
|
{
|
|
"epoch": 2.124124761298536,
|
|
"grad_norm": 11.383974444908038,
|
|
"learning_rate": 2.3833240347633955e-06,
|
|
"loss": 0.3712739646434784,
|
|
"step": 3337
|
|
},
|
|
{
|
|
"epoch": 2.1247612985359643,
|
|
"grad_norm": 14.064938826149863,
|
|
"learning_rate": 2.3801686156935445e-06,
|
|
"loss": 0.546394944190979,
|
|
"step": 3338
|
|
},
|
|
{
|
|
"epoch": 2.1253978357733927,
|
|
"grad_norm": 40.742558161554435,
|
|
"learning_rate": 2.377014634218585e-06,
|
|
"loss": 0.7850564122200012,
|
|
"step": 3339
|
|
},
|
|
{
|
|
"epoch": 2.126034373010821,
|
|
"grad_norm": 12.769398445637938,
|
|
"learning_rate": 2.3738620920692208e-06,
|
|
"loss": 0.4774132966995239,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 2.1266709102482495,
|
|
"grad_norm": 13.97263062864823,
|
|
"learning_rate": 2.3707109909753613e-06,
|
|
"loss": 0.7087494730949402,
|
|
"step": 3341
|
|
},
|
|
{
|
|
"epoch": 2.127307447485678,
|
|
"grad_norm": 14.869233235968746,
|
|
"learning_rate": 2.3675613326661333e-06,
|
|
"loss": 0.22911036014556885,
|
|
"step": 3342
|
|
},
|
|
{
|
|
"epoch": 2.1279439847231063,
|
|
"grad_norm": 12.025692409048506,
|
|
"learning_rate": 2.3644131188698598e-06,
|
|
"loss": 0.9791948199272156,
|
|
"step": 3343
|
|
},
|
|
{
|
|
"epoch": 2.1285805219605347,
|
|
"grad_norm": 13.08724211137776,
|
|
"learning_rate": 2.3612663513140834e-06,
|
|
"loss": 0.2889867126941681,
|
|
"step": 3344
|
|
},
|
|
{
|
|
"epoch": 2.129217059197963,
|
|
"grad_norm": 12.579778834984843,
|
|
"learning_rate": 2.358121031725546e-06,
|
|
"loss": 0.9773622751235962,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 2.1298535964353915,
|
|
"grad_norm": 10.412626731111317,
|
|
"learning_rate": 2.354977161830196e-06,
|
|
"loss": 0.6916447877883911,
|
|
"step": 3346
|
|
},
|
|
{
|
|
"epoch": 2.13049013367282,
|
|
"grad_norm": 16.34771903703682,
|
|
"learning_rate": 2.3518347433531852e-06,
|
|
"loss": 0.7564796209335327,
|
|
"step": 3347
|
|
},
|
|
{
|
|
"epoch": 2.1311266709102483,
|
|
"grad_norm": 9.657327417102435,
|
|
"learning_rate": 2.348693778018875e-06,
|
|
"loss": 0.4786078631877899,
|
|
"step": 3348
|
|
},
|
|
{
|
|
"epoch": 2.1317632081476767,
|
|
"grad_norm": 12.092130029235612,
|
|
"learning_rate": 2.3455542675508224e-06,
|
|
"loss": 0.29180699586868286,
|
|
"step": 3349
|
|
},
|
|
{
|
|
"epoch": 2.132399745385105,
|
|
"grad_norm": 10.57023457071009,
|
|
"learning_rate": 2.34241621367179e-06,
|
|
"loss": 0.5671373605728149,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 2.1330362826225335,
|
|
"grad_norm": 13.767941904011684,
|
|
"learning_rate": 2.3392796181037363e-06,
|
|
"loss": 0.9068795442581177,
|
|
"step": 3351
|
|
},
|
|
{
|
|
"epoch": 2.133672819859962,
|
|
"grad_norm": 20.86251906321779,
|
|
"learning_rate": 2.33614448256783e-06,
|
|
"loss": 2.0571038722991943,
|
|
"step": 3352
|
|
},
|
|
{
|
|
"epoch": 2.1343093570973903,
|
|
"grad_norm": 7.064031628252607,
|
|
"learning_rate": 2.333010808784425e-06,
|
|
"loss": 0.23232118785381317,
|
|
"step": 3353
|
|
},
|
|
{
|
|
"epoch": 2.1349458943348187,
|
|
"grad_norm": 10.490484673171892,
|
|
"learning_rate": 2.329878598473081e-06,
|
|
"loss": 0.23985019326210022,
|
|
"step": 3354
|
|
},
|
|
{
|
|
"epoch": 2.135582431572247,
|
|
"grad_norm": 10.871615354833349,
|
|
"learning_rate": 2.3267478533525588e-06,
|
|
"loss": 0.7772335410118103,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 2.1362189688096755,
|
|
"grad_norm": 7.113976929626125,
|
|
"learning_rate": 2.323618575140802e-06,
|
|
"loss": 0.3307351768016815,
|
|
"step": 3356
|
|
},
|
|
{
|
|
"epoch": 2.136855506047104,
|
|
"grad_norm": 15.063746459776494,
|
|
"learning_rate": 2.320490765554963e-06,
|
|
"loss": 0.4741772413253784,
|
|
"step": 3357
|
|
},
|
|
{
|
|
"epoch": 2.1374920432845324,
|
|
"grad_norm": 9.765039441767453,
|
|
"learning_rate": 2.317364426311381e-06,
|
|
"loss": 0.36270594596862793,
|
|
"step": 3358
|
|
},
|
|
{
|
|
"epoch": 2.1381285805219603,
|
|
"grad_norm": 10.138198927802664,
|
|
"learning_rate": 2.3142395591255886e-06,
|
|
"loss": 0.4879109859466553,
|
|
"step": 3359
|
|
},
|
|
{
|
|
"epoch": 2.138765117759389,
|
|
"grad_norm": 8.631337639937422,
|
|
"learning_rate": 2.311116165712311e-06,
|
|
"loss": 0.7709143161773682,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 2.139401654996817,
|
|
"grad_norm": 10.304133281355053,
|
|
"learning_rate": 2.307994247785469e-06,
|
|
"loss": 1.0132774114608765,
|
|
"step": 3361
|
|
},
|
|
{
|
|
"epoch": 2.1400381922342455,
|
|
"grad_norm": 10.165065906293025,
|
|
"learning_rate": 2.3048738070581684e-06,
|
|
"loss": 0.5290195941925049,
|
|
"step": 3362
|
|
},
|
|
{
|
|
"epoch": 2.140674729471674,
|
|
"grad_norm": 10.864930726399276,
|
|
"learning_rate": 2.3017548452427064e-06,
|
|
"loss": 0.6377224326133728,
|
|
"step": 3363
|
|
},
|
|
{
|
|
"epoch": 2.1413112667091023,
|
|
"grad_norm": 10.193155575179244,
|
|
"learning_rate": 2.2986373640505665e-06,
|
|
"loss": 0.2541917562484741,
|
|
"step": 3364
|
|
},
|
|
{
|
|
"epoch": 2.1419478039465307,
|
|
"grad_norm": 8.608283836651443,
|
|
"learning_rate": 2.2955213651924274e-06,
|
|
"loss": 0.5369041562080383,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 2.142584341183959,
|
|
"grad_norm": 14.625959790118241,
|
|
"learning_rate": 2.2924068503781417e-06,
|
|
"loss": 0.3130542039871216,
|
|
"step": 3366
|
|
},
|
|
{
|
|
"epoch": 2.1432208784213875,
|
|
"grad_norm": 14.18643094034974,
|
|
"learning_rate": 2.289293821316759e-06,
|
|
"loss": 0.8524119257926941,
|
|
"step": 3367
|
|
},
|
|
{
|
|
"epoch": 2.143857415658816,
|
|
"grad_norm": 9.696829447325998,
|
|
"learning_rate": 2.2861822797165094e-06,
|
|
"loss": 0.6507335901260376,
|
|
"step": 3368
|
|
},
|
|
{
|
|
"epoch": 2.1444939528962443,
|
|
"grad_norm": 13.733199118600341,
|
|
"learning_rate": 2.283072227284806e-06,
|
|
"loss": 0.4282124638557434,
|
|
"step": 3369
|
|
},
|
|
{
|
|
"epoch": 2.1451304901336727,
|
|
"grad_norm": 18.052391064109116,
|
|
"learning_rate": 2.2799636657282446e-06,
|
|
"loss": 0.31571298837661743,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 2.145767027371101,
|
|
"grad_norm": 10.979925381824847,
|
|
"learning_rate": 2.276856596752603e-06,
|
|
"loss": 0.2936651110649109,
|
|
"step": 3371
|
|
},
|
|
{
|
|
"epoch": 2.1464035646085295,
|
|
"grad_norm": 9.207625773915087,
|
|
"learning_rate": 2.2737510220628458e-06,
|
|
"loss": 1.6048418283462524,
|
|
"step": 3372
|
|
},
|
|
{
|
|
"epoch": 2.147040101845958,
|
|
"grad_norm": 9.29457373486712,
|
|
"learning_rate": 2.2706469433631053e-06,
|
|
"loss": 0.2723718285560608,
|
|
"step": 3373
|
|
},
|
|
{
|
|
"epoch": 2.1476766390833864,
|
|
"grad_norm": 10.815154334010828,
|
|
"learning_rate": 2.267544362356705e-06,
|
|
"loss": 0.6221505999565125,
|
|
"step": 3374
|
|
},
|
|
{
|
|
"epoch": 2.1483131763208148,
|
|
"grad_norm": 10.290402728477853,
|
|
"learning_rate": 2.2644432807461413e-06,
|
|
"loss": 0.4234142005443573,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 2.148949713558243,
|
|
"grad_norm": 19.12884671376245,
|
|
"learning_rate": 2.261343700233087e-06,
|
|
"loss": 0.3772827386856079,
|
|
"step": 3376
|
|
},
|
|
{
|
|
"epoch": 2.1495862507956716,
|
|
"grad_norm": 9.55602219342714,
|
|
"learning_rate": 2.2582456225183913e-06,
|
|
"loss": 0.3183250427246094,
|
|
"step": 3377
|
|
},
|
|
{
|
|
"epoch": 2.1502227880331,
|
|
"grad_norm": 10.238368888861226,
|
|
"learning_rate": 2.255149049302085e-06,
|
|
"loss": 0.903436005115509,
|
|
"step": 3378
|
|
},
|
|
{
|
|
"epoch": 2.1508593252705284,
|
|
"grad_norm": 11.501655274907222,
|
|
"learning_rate": 2.2520539822833615e-06,
|
|
"loss": 0.8668727278709412,
|
|
"step": 3379
|
|
},
|
|
{
|
|
"epoch": 2.1514958625079568,
|
|
"grad_norm": 10.771103520530469,
|
|
"learning_rate": 2.248960423160599e-06,
|
|
"loss": 0.4836471676826477,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 2.152132399745385,
|
|
"grad_norm": 16.828109760744976,
|
|
"learning_rate": 2.2458683736313423e-06,
|
|
"loss": 0.3216490149497986,
|
|
"step": 3381
|
|
},
|
|
{
|
|
"epoch": 2.1527689369828136,
|
|
"grad_norm": 13.887107466378085,
|
|
"learning_rate": 2.2427778353923098e-06,
|
|
"loss": 0.49047866463661194,
|
|
"step": 3382
|
|
},
|
|
{
|
|
"epoch": 2.153405474220242,
|
|
"grad_norm": 10.932175157083606,
|
|
"learning_rate": 2.239688810139387e-06,
|
|
"loss": 0.3700225353240967,
|
|
"step": 3383
|
|
},
|
|
{
|
|
"epoch": 2.1540420114576704,
|
|
"grad_norm": 13.566420319087964,
|
|
"learning_rate": 2.236601299567637e-06,
|
|
"loss": 0.306808203458786,
|
|
"step": 3384
|
|
},
|
|
{
|
|
"epoch": 2.154678548695099,
|
|
"grad_norm": 11.56158787347894,
|
|
"learning_rate": 2.233515305371285e-06,
|
|
"loss": 0.2995290458202362,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 2.155315085932527,
|
|
"grad_norm": 17.568356017949135,
|
|
"learning_rate": 2.230430829243725e-06,
|
|
"loss": 0.37916237115859985,
|
|
"step": 3386
|
|
},
|
|
{
|
|
"epoch": 2.1559516231699556,
|
|
"grad_norm": 11.5306787030788,
|
|
"learning_rate": 2.22734787287752e-06,
|
|
"loss": 0.3119584321975708,
|
|
"step": 3387
|
|
},
|
|
{
|
|
"epoch": 2.156588160407384,
|
|
"grad_norm": 16.14158189328427,
|
|
"learning_rate": 2.224266437964398e-06,
|
|
"loss": 0.8759464025497437,
|
|
"step": 3388
|
|
},
|
|
{
|
|
"epoch": 2.1572246976448124,
|
|
"grad_norm": 10.92577996358869,
|
|
"learning_rate": 2.221186526195252e-06,
|
|
"loss": 0.593475341796875,
|
|
"step": 3389
|
|
},
|
|
{
|
|
"epoch": 2.157861234882241,
|
|
"grad_norm": 9.259688538944122,
|
|
"learning_rate": 2.2181081392601373e-06,
|
|
"loss": 0.3868349492549896,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 2.158497772119669,
|
|
"grad_norm": 19.06674991929584,
|
|
"learning_rate": 2.2150312788482786e-06,
|
|
"loss": 1.1448237895965576,
|
|
"step": 3391
|
|
},
|
|
{
|
|
"epoch": 2.159134309357097,
|
|
"grad_norm": 15.166406304972211,
|
|
"learning_rate": 2.211955946648057e-06,
|
|
"loss": 0.47934362292289734,
|
|
"step": 3392
|
|
},
|
|
{
|
|
"epoch": 2.159770846594526,
|
|
"grad_norm": 7.844403336761152,
|
|
"learning_rate": 2.2088821443470173e-06,
|
|
"loss": 0.2168283313512802,
|
|
"step": 3393
|
|
},
|
|
{
|
|
"epoch": 2.160407383831954,
|
|
"grad_norm": 9.223131609246044,
|
|
"learning_rate": 2.205809873631862e-06,
|
|
"loss": 0.47109267115592957,
|
|
"step": 3394
|
|
},
|
|
{
|
|
"epoch": 2.1610439210693824,
|
|
"grad_norm": 8.042898406416613,
|
|
"learning_rate": 2.2027391361884616e-06,
|
|
"loss": 0.29720538854599,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 2.1616804583068108,
|
|
"grad_norm": 7.93343186703424,
|
|
"learning_rate": 2.1996699337018307e-06,
|
|
"loss": 0.44718751311302185,
|
|
"step": 3396
|
|
},
|
|
{
|
|
"epoch": 2.162316995544239,
|
|
"grad_norm": 12.236668570673348,
|
|
"learning_rate": 2.1966022678561573e-06,
|
|
"loss": 0.23177067935466766,
|
|
"step": 3397
|
|
},
|
|
{
|
|
"epoch": 2.1629535327816676,
|
|
"grad_norm": 11.421250218894645,
|
|
"learning_rate": 2.1935361403347767e-06,
|
|
"loss": 0.45731085538864136,
|
|
"step": 3398
|
|
},
|
|
{
|
|
"epoch": 2.163590070019096,
|
|
"grad_norm": 15.152537089028039,
|
|
"learning_rate": 2.1904715528201813e-06,
|
|
"loss": 0.6630115509033203,
|
|
"step": 3399
|
|
},
|
|
{
|
|
"epoch": 2.1642266072565244,
|
|
"grad_norm": 15.471967915610055,
|
|
"learning_rate": 2.187408506994019e-06,
|
|
"loss": 0.701779842376709,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 2.164863144493953,
|
|
"grad_norm": 10.448807375221385,
|
|
"learning_rate": 2.1843470045370975e-06,
|
|
"loss": 0.5520093441009521,
|
|
"step": 3401
|
|
},
|
|
{
|
|
"epoch": 2.165499681731381,
|
|
"grad_norm": 11.663133424177104,
|
|
"learning_rate": 2.1812870471293647e-06,
|
|
"loss": 0.44490212202072144,
|
|
"step": 3402
|
|
},
|
|
{
|
|
"epoch": 2.1661362189688096,
|
|
"grad_norm": 11.561579013164737,
|
|
"learning_rate": 2.178228636449934e-06,
|
|
"loss": 0.37317323684692383,
|
|
"step": 3403
|
|
},
|
|
{
|
|
"epoch": 2.166772756206238,
|
|
"grad_norm": 8.431268474419799,
|
|
"learning_rate": 2.1751717741770623e-06,
|
|
"loss": 0.2616347372531891,
|
|
"step": 3404
|
|
},
|
|
{
|
|
"epoch": 2.1674092934436664,
|
|
"grad_norm": 8.198634993492524,
|
|
"learning_rate": 2.172116461988159e-06,
|
|
"loss": 0.24132557213306427,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 2.168045830681095,
|
|
"grad_norm": 15.943977266921602,
|
|
"learning_rate": 2.169062701559783e-06,
|
|
"loss": 0.5138012766838074,
|
|
"step": 3406
|
|
},
|
|
{
|
|
"epoch": 2.168682367918523,
|
|
"grad_norm": 8.866395917936815,
|
|
"learning_rate": 2.1660104945676402e-06,
|
|
"loss": 0.3448604643344879,
|
|
"step": 3407
|
|
},
|
|
{
|
|
"epoch": 2.1693189051559516,
|
|
"grad_norm": 14.056501432611842,
|
|
"learning_rate": 2.16295984268659e-06,
|
|
"loss": 0.5287642478942871,
|
|
"step": 3408
|
|
},
|
|
{
|
|
"epoch": 2.16995544239338,
|
|
"grad_norm": 8.825882101897346,
|
|
"learning_rate": 2.159910747590627e-06,
|
|
"loss": 0.5530349016189575,
|
|
"step": 3409
|
|
},
|
|
{
|
|
"epoch": 2.1705919796308084,
|
|
"grad_norm": 5.441821345737116,
|
|
"learning_rate": 2.156863210952904e-06,
|
|
"loss": 0.2527669370174408,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 2.171228516868237,
|
|
"grad_norm": 8.826445358427215,
|
|
"learning_rate": 2.1538172344457105e-06,
|
|
"loss": 0.566216230392456,
|
|
"step": 3411
|
|
},
|
|
{
|
|
"epoch": 2.171865054105665,
|
|
"grad_norm": 9.262464590061974,
|
|
"learning_rate": 2.150772819740483e-06,
|
|
"loss": 0.247681125998497,
|
|
"step": 3412
|
|
},
|
|
{
|
|
"epoch": 2.1725015913430936,
|
|
"grad_norm": 7.934189991415934,
|
|
"learning_rate": 2.147729968507799e-06,
|
|
"loss": 0.20549309253692627,
|
|
"step": 3413
|
|
},
|
|
{
|
|
"epoch": 2.173138128580522,
|
|
"grad_norm": 9.28831714399594,
|
|
"learning_rate": 2.1446886824173825e-06,
|
|
"loss": 0.33926114439964294,
|
|
"step": 3414
|
|
},
|
|
{
|
|
"epoch": 2.1737746658179504,
|
|
"grad_norm": 14.385866919389695,
|
|
"learning_rate": 2.1416489631380933e-06,
|
|
"loss": 0.5584162473678589,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 2.174411203055379,
|
|
"grad_norm": 16.47689599944588,
|
|
"learning_rate": 2.1386108123379344e-06,
|
|
"loss": 0.6566636562347412,
|
|
"step": 3416
|
|
},
|
|
{
|
|
"epoch": 2.1750477402928072,
|
|
"grad_norm": 12.663563404690024,
|
|
"learning_rate": 2.135574231684046e-06,
|
|
"loss": 0.6372646689414978,
|
|
"step": 3417
|
|
},
|
|
{
|
|
"epoch": 2.1756842775302356,
|
|
"grad_norm": 11.914424264265422,
|
|
"learning_rate": 2.132539222842713e-06,
|
|
"loss": 0.3574853539466858,
|
|
"step": 3418
|
|
},
|
|
{
|
|
"epoch": 2.176320814767664,
|
|
"grad_norm": 7.612202698436665,
|
|
"learning_rate": 2.129505787479346e-06,
|
|
"loss": 0.38791799545288086,
|
|
"step": 3419
|
|
},
|
|
{
|
|
"epoch": 2.1769573520050924,
|
|
"grad_norm": 11.840601030317876,
|
|
"learning_rate": 2.126473927258505e-06,
|
|
"loss": 0.443769246339798,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 2.177593889242521,
|
|
"grad_norm": 15.072477442673236,
|
|
"learning_rate": 2.123443643843879e-06,
|
|
"loss": 0.5991731286048889,
|
|
"step": 3421
|
|
},
|
|
{
|
|
"epoch": 2.1782304264799492,
|
|
"grad_norm": 9.692132708178386,
|
|
"learning_rate": 2.120414938898287e-06,
|
|
"loss": 0.4930022358894348,
|
|
"step": 3422
|
|
},
|
|
{
|
|
"epoch": 2.1788669637173776,
|
|
"grad_norm": 15.720705301313773,
|
|
"learning_rate": 2.1173878140836935e-06,
|
|
"loss": 1.2701705694198608,
|
|
"step": 3423
|
|
},
|
|
{
|
|
"epoch": 2.179503500954806,
|
|
"grad_norm": 11.093115176269182,
|
|
"learning_rate": 2.1143622710611876e-06,
|
|
"loss": 0.4316216707229614,
|
|
"step": 3424
|
|
},
|
|
{
|
|
"epoch": 2.1801400381922345,
|
|
"grad_norm": 8.613318889664129,
|
|
"learning_rate": 2.111338311490993e-06,
|
|
"loss": 0.48520854115486145,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 2.180776575429663,
|
|
"grad_norm": 11.89834708055669,
|
|
"learning_rate": 2.1083159370324624e-06,
|
|
"loss": 0.7003180980682373,
|
|
"step": 3426
|
|
},
|
|
{
|
|
"epoch": 2.181413112667091,
|
|
"grad_norm": 10.511478621180155,
|
|
"learning_rate": 2.105295149344083e-06,
|
|
"loss": 0.31332698464393616,
|
|
"step": 3427
|
|
},
|
|
{
|
|
"epoch": 2.182049649904519,
|
|
"grad_norm": 8.056646325285707,
|
|
"learning_rate": 2.1022759500834683e-06,
|
|
"loss": 0.5531500577926636,
|
|
"step": 3428
|
|
},
|
|
{
|
|
"epoch": 2.1826861871419476,
|
|
"grad_norm": 9.398688177006418,
|
|
"learning_rate": 2.0992583409073595e-06,
|
|
"loss": 0.7712733149528503,
|
|
"step": 3429
|
|
},
|
|
{
|
|
"epoch": 2.183322724379376,
|
|
"grad_norm": 11.817155962412096,
|
|
"learning_rate": 2.0962423234716257e-06,
|
|
"loss": 0.3385595381259918,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 2.1839592616168044,
|
|
"grad_norm": 8.082736627038416,
|
|
"learning_rate": 2.093227899431268e-06,
|
|
"loss": 0.615544319152832,
|
|
"step": 3431
|
|
},
|
|
{
|
|
"epoch": 2.184595798854233,
|
|
"grad_norm": 7.406037118037002,
|
|
"learning_rate": 2.0902150704404005e-06,
|
|
"loss": 0.5680183172225952,
|
|
"step": 3432
|
|
},
|
|
{
|
|
"epoch": 2.1852323360916612,
|
|
"grad_norm": 8.77753492251007,
|
|
"learning_rate": 2.0872038381522756e-06,
|
|
"loss": 0.3414529860019684,
|
|
"step": 3433
|
|
},
|
|
{
|
|
"epoch": 2.1858688733290896,
|
|
"grad_norm": 14.233806480293957,
|
|
"learning_rate": 2.084194204219263e-06,
|
|
"loss": 0.4020627737045288,
|
|
"step": 3434
|
|
},
|
|
{
|
|
"epoch": 2.186505410566518,
|
|
"grad_norm": 14.750428545877757,
|
|
"learning_rate": 2.0811861702928554e-06,
|
|
"loss": 0.2982262969017029,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 2.1871419478039464,
|
|
"grad_norm": 9.438642641641179,
|
|
"learning_rate": 2.0781797380236664e-06,
|
|
"loss": 0.287892609834671,
|
|
"step": 3436
|
|
},
|
|
{
|
|
"epoch": 2.187778485041375,
|
|
"grad_norm": 15.392978053152753,
|
|
"learning_rate": 2.075174909061436e-06,
|
|
"loss": 0.5279977917671204,
|
|
"step": 3437
|
|
},
|
|
{
|
|
"epoch": 2.1884150222788032,
|
|
"grad_norm": 10.211965640112897,
|
|
"learning_rate": 2.072171685055021e-06,
|
|
"loss": 0.7710495591163635,
|
|
"step": 3438
|
|
},
|
|
{
|
|
"epoch": 2.1890515595162316,
|
|
"grad_norm": 17.000200447049338,
|
|
"learning_rate": 2.069170067652393e-06,
|
|
"loss": 0.6563599109649658,
|
|
"step": 3439
|
|
},
|
|
{
|
|
"epoch": 2.18968809675366,
|
|
"grad_norm": 12.776188039892988,
|
|
"learning_rate": 2.066170058500651e-06,
|
|
"loss": 0.1997983604669571,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 2.1903246339910885,
|
|
"grad_norm": 14.253037502020888,
|
|
"learning_rate": 2.063171659246006e-06,
|
|
"loss": 0.7213305830955505,
|
|
"step": 3441
|
|
},
|
|
{
|
|
"epoch": 2.190961171228517,
|
|
"grad_norm": 7.797513778541945,
|
|
"learning_rate": 2.0601748715337855e-06,
|
|
"loss": 0.30038130283355713,
|
|
"step": 3442
|
|
},
|
|
{
|
|
"epoch": 2.1915977084659453,
|
|
"grad_norm": 8.828566563812117,
|
|
"learning_rate": 2.0571796970084325e-06,
|
|
"loss": 0.3803648352622986,
|
|
"step": 3443
|
|
},
|
|
{
|
|
"epoch": 2.1922342457033737,
|
|
"grad_norm": 8.664138024964915,
|
|
"learning_rate": 2.054186137313512e-06,
|
|
"loss": 0.20640689134597778,
|
|
"step": 3444
|
|
},
|
|
{
|
|
"epoch": 2.192870782940802,
|
|
"grad_norm": 9.637871630571919,
|
|
"learning_rate": 2.0511941940916886e-06,
|
|
"loss": 0.4140985906124115,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 2.1935073201782305,
|
|
"grad_norm": 14.413703626032627,
|
|
"learning_rate": 2.0482038689847543e-06,
|
|
"loss": 0.5255602598190308,
|
|
"step": 3446
|
|
},
|
|
{
|
|
"epoch": 2.194143857415659,
|
|
"grad_norm": 10.79025408848945,
|
|
"learning_rate": 2.0452151636336056e-06,
|
|
"loss": 0.6795587539672852,
|
|
"step": 3447
|
|
},
|
|
{
|
|
"epoch": 2.1947803946530873,
|
|
"grad_norm": 22.437331601177345,
|
|
"learning_rate": 2.0422280796782506e-06,
|
|
"loss": 0.780468761920929,
|
|
"step": 3448
|
|
},
|
|
{
|
|
"epoch": 2.1954169318905157,
|
|
"grad_norm": 7.879027645388186,
|
|
"learning_rate": 2.0392426187578083e-06,
|
|
"loss": 0.36017906665802,
|
|
"step": 3449
|
|
},
|
|
{
|
|
"epoch": 2.196053469127944,
|
|
"grad_norm": 15.99673880556755,
|
|
"learning_rate": 2.0362587825105106e-06,
|
|
"loss": 0.9976467490196228,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 2.1966900063653725,
|
|
"grad_norm": 14.272801791911292,
|
|
"learning_rate": 2.033276572573693e-06,
|
|
"loss": 0.8597637414932251,
|
|
"step": 3451
|
|
},
|
|
{
|
|
"epoch": 2.197326543602801,
|
|
"grad_norm": 10.17845326878243,
|
|
"learning_rate": 2.0302959905838004e-06,
|
|
"loss": 0.7297912836074829,
|
|
"step": 3452
|
|
},
|
|
{
|
|
"epoch": 2.1979630808402293,
|
|
"grad_norm": 13.628742232472177,
|
|
"learning_rate": 2.0273170381763826e-06,
|
|
"loss": 0.41223394870758057,
|
|
"step": 3453
|
|
},
|
|
{
|
|
"epoch": 2.1985996180776577,
|
|
"grad_norm": 8.170955544592292,
|
|
"learning_rate": 2.0243397169861025e-06,
|
|
"loss": 0.632558286190033,
|
|
"step": 3454
|
|
},
|
|
{
|
|
"epoch": 2.199236155315086,
|
|
"grad_norm": 12.061276941034809,
|
|
"learning_rate": 2.021364028646716e-06,
|
|
"loss": 0.20762304961681366,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 2.1998726925525145,
|
|
"grad_norm": 11.979995957223165,
|
|
"learning_rate": 2.018389974791092e-06,
|
|
"loss": 0.6424295902252197,
|
|
"step": 3456
|
|
},
|
|
{
|
|
"epoch": 2.200509229789943,
|
|
"grad_norm": 9.486108115100198,
|
|
"learning_rate": 2.0154175570512e-06,
|
|
"loss": 0.6736075282096863,
|
|
"step": 3457
|
|
},
|
|
{
|
|
"epoch": 2.2011457670273713,
|
|
"grad_norm": 7.480055412604567,
|
|
"learning_rate": 2.012446777058113e-06,
|
|
"loss": 0.3194897174835205,
|
|
"step": 3458
|
|
},
|
|
{
|
|
"epoch": 2.2017823042647997,
|
|
"grad_norm": 10.170951742702815,
|
|
"learning_rate": 2.0094776364420023e-06,
|
|
"loss": 0.2124815732240677,
|
|
"step": 3459
|
|
},
|
|
{
|
|
"epoch": 2.2024188415022277,
|
|
"grad_norm": 10.127641246813296,
|
|
"learning_rate": 2.0065101368321393e-06,
|
|
"loss": 0.24739108979701996,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 2.203055378739656,
|
|
"grad_norm": 9.648299884470832,
|
|
"learning_rate": 2.0035442798569028e-06,
|
|
"loss": 0.4586668312549591,
|
|
"step": 3461
|
|
},
|
|
{
|
|
"epoch": 2.2036919159770845,
|
|
"grad_norm": 7.732486438234397,
|
|
"learning_rate": 2.000580067143757e-06,
|
|
"loss": 0.5461676120758057,
|
|
"step": 3462
|
|
},
|
|
{
|
|
"epoch": 2.204328453214513,
|
|
"grad_norm": 12.03816666748737,
|
|
"learning_rate": 1.997617500319276e-06,
|
|
"loss": 0.3828747570514679,
|
|
"step": 3463
|
|
},
|
|
{
|
|
"epoch": 2.2049649904519413,
|
|
"grad_norm": 10.413194518688728,
|
|
"learning_rate": 1.9946565810091243e-06,
|
|
"loss": 0.6122806072235107,
|
|
"step": 3464
|
|
},
|
|
{
|
|
"epoch": 2.2056015276893697,
|
|
"grad_norm": 11.951136610172114,
|
|
"learning_rate": 1.9916973108380643e-06,
|
|
"loss": 0.8122888803482056,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 2.206238064926798,
|
|
"grad_norm": 10.950814954022292,
|
|
"learning_rate": 1.988739691429951e-06,
|
|
"loss": 0.4455221891403198,
|
|
"step": 3466
|
|
},
|
|
{
|
|
"epoch": 2.2068746021642265,
|
|
"grad_norm": 12.47943814048296,
|
|
"learning_rate": 1.985783724407741e-06,
|
|
"loss": 0.36648666858673096,
|
|
"step": 3467
|
|
},
|
|
{
|
|
"epoch": 2.207511139401655,
|
|
"grad_norm": 9.063001941567858,
|
|
"learning_rate": 1.9828294113934713e-06,
|
|
"loss": 0.2546212673187256,
|
|
"step": 3468
|
|
},
|
|
{
|
|
"epoch": 2.2081476766390833,
|
|
"grad_norm": 16.061139222682293,
|
|
"learning_rate": 1.9798767540082853e-06,
|
|
"loss": 0.4800608158111572,
|
|
"step": 3469
|
|
},
|
|
{
|
|
"epoch": 2.2087842138765117,
|
|
"grad_norm": 9.170828154978512,
|
|
"learning_rate": 1.9769257538724077e-06,
|
|
"loss": 0.6345722079277039,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 2.20942075111394,
|
|
"grad_norm": 16.10928402086594,
|
|
"learning_rate": 1.9739764126051598e-06,
|
|
"loss": 0.5042457580566406,
|
|
"step": 3471
|
|
},
|
|
{
|
|
"epoch": 2.2100572883513685,
|
|
"grad_norm": 18.298420948761144,
|
|
"learning_rate": 1.9710287318249482e-06,
|
|
"loss": 0.6672847867012024,
|
|
"step": 3472
|
|
},
|
|
{
|
|
"epoch": 2.210693825588797,
|
|
"grad_norm": 12.030607632707705,
|
|
"learning_rate": 1.9680827131492698e-06,
|
|
"loss": 0.3506079912185669,
|
|
"step": 3473
|
|
},
|
|
{
|
|
"epoch": 2.2113303628262253,
|
|
"grad_norm": 12.246713934244447,
|
|
"learning_rate": 1.9651383581947147e-06,
|
|
"loss": 0.7041405439376831,
|
|
"step": 3474
|
|
},
|
|
{
|
|
"epoch": 2.2119669000636537,
|
|
"grad_norm": 14.179514272460512,
|
|
"learning_rate": 1.9621956685769493e-06,
|
|
"loss": 0.9863985180854797,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 2.212603437301082,
|
|
"grad_norm": 18.424120635555123,
|
|
"learning_rate": 1.9592546459107376e-06,
|
|
"loss": 1.1433643102645874,
|
|
"step": 3476
|
|
},
|
|
{
|
|
"epoch": 2.2132399745385105,
|
|
"grad_norm": 9.587679560238161,
|
|
"learning_rate": 1.9563152918099205e-06,
|
|
"loss": 1.1305607557296753,
|
|
"step": 3477
|
|
},
|
|
{
|
|
"epoch": 2.213876511775939,
|
|
"grad_norm": 19.40340803531654,
|
|
"learning_rate": 1.953377607887428e-06,
|
|
"loss": 0.3361985981464386,
|
|
"step": 3478
|
|
},
|
|
{
|
|
"epoch": 2.2145130490133673,
|
|
"grad_norm": 9.603731451813786,
|
|
"learning_rate": 1.950441595755269e-06,
|
|
"loss": 0.2718903124332428,
|
|
"step": 3479
|
|
},
|
|
{
|
|
"epoch": 2.2151495862507957,
|
|
"grad_norm": 9.83675170481703,
|
|
"learning_rate": 1.9475072570245423e-06,
|
|
"loss": 0.5077819228172302,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 2.215786123488224,
|
|
"grad_norm": 11.252206727126238,
|
|
"learning_rate": 1.9445745933054223e-06,
|
|
"loss": 0.483068585395813,
|
|
"step": 3481
|
|
},
|
|
{
|
|
"epoch": 2.2164226607256525,
|
|
"grad_norm": 9.441422576334581,
|
|
"learning_rate": 1.941643606207166e-06,
|
|
"loss": 0.32851487398147583,
|
|
"step": 3482
|
|
},
|
|
{
|
|
"epoch": 2.217059197963081,
|
|
"grad_norm": 9.857250124104858,
|
|
"learning_rate": 1.938714297338111e-06,
|
|
"loss": 0.3394015431404114,
|
|
"step": 3483
|
|
},
|
|
{
|
|
"epoch": 2.2176957352005093,
|
|
"grad_norm": 15.190433733779852,
|
|
"learning_rate": 1.935786668305672e-06,
|
|
"loss": 0.47739294171333313,
|
|
"step": 3484
|
|
},
|
|
{
|
|
"epoch": 2.2183322724379377,
|
|
"grad_norm": 12.466585032510054,
|
|
"learning_rate": 1.9328607207163434e-06,
|
|
"loss": 0.4199254810810089,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 2.218968809675366,
|
|
"grad_norm": 18.101362610018096,
|
|
"learning_rate": 1.9299364561757005e-06,
|
|
"loss": 1.0428451299667358,
|
|
"step": 3486
|
|
},
|
|
{
|
|
"epoch": 2.2196053469127945,
|
|
"grad_norm": 8.797168262568427,
|
|
"learning_rate": 1.9270138762883883e-06,
|
|
"loss": 0.35159802436828613,
|
|
"step": 3487
|
|
},
|
|
{
|
|
"epoch": 2.220241884150223,
|
|
"grad_norm": 7.294094135618442,
|
|
"learning_rate": 1.9240929826581324e-06,
|
|
"loss": 0.268513560295105,
|
|
"step": 3488
|
|
},
|
|
{
|
|
"epoch": 2.2208784213876513,
|
|
"grad_norm": 9.797229909640722,
|
|
"learning_rate": 1.921173776887729e-06,
|
|
"loss": 0.7970614433288574,
|
|
"step": 3489
|
|
},
|
|
{
|
|
"epoch": 2.2215149586250797,
|
|
"grad_norm": 13.17501761413796,
|
|
"learning_rate": 1.918256260579053e-06,
|
|
"loss": 0.4983261227607727,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 2.222151495862508,
|
|
"grad_norm": 11.075571985620726,
|
|
"learning_rate": 1.9153404353330474e-06,
|
|
"loss": 0.5156108140945435,
|
|
"step": 3491
|
|
},
|
|
{
|
|
"epoch": 2.2227880330999366,
|
|
"grad_norm": 9.991317988793591,
|
|
"learning_rate": 1.912426302749729e-06,
|
|
"loss": 0.7613148093223572,
|
|
"step": 3492
|
|
},
|
|
{
|
|
"epoch": 2.2234245703373645,
|
|
"grad_norm": 13.125191694729143,
|
|
"learning_rate": 1.9095138644281895e-06,
|
|
"loss": 0.3501238226890564,
|
|
"step": 3493
|
|
},
|
|
{
|
|
"epoch": 2.2240611075747934,
|
|
"grad_norm": 9.294124454311604,
|
|
"learning_rate": 1.906603121966586e-06,
|
|
"loss": 0.6417360305786133,
|
|
"step": 3494
|
|
},
|
|
{
|
|
"epoch": 2.2246976448122213,
|
|
"grad_norm": 9.351177396602285,
|
|
"learning_rate": 1.9036940769621464e-06,
|
|
"loss": 0.27995768189430237,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 2.2253341820496497,
|
|
"grad_norm": 14.206047223911522,
|
|
"learning_rate": 1.9007867310111655e-06,
|
|
"loss": 0.4409753382205963,
|
|
"step": 3496
|
|
},
|
|
{
|
|
"epoch": 2.225970719287078,
|
|
"grad_norm": 9.337434032529131,
|
|
"learning_rate": 1.8978810857090136e-06,
|
|
"loss": 0.3434121012687683,
|
|
"step": 3497
|
|
},
|
|
{
|
|
"epoch": 2.2266072565245065,
|
|
"grad_norm": 7.5565624664541735,
|
|
"learning_rate": 1.894977142650114e-06,
|
|
"loss": 0.40334582328796387,
|
|
"step": 3498
|
|
},
|
|
{
|
|
"epoch": 2.227243793761935,
|
|
"grad_norm": 12.59713529766472,
|
|
"learning_rate": 1.8920749034279695e-06,
|
|
"loss": 0.2577935457229614,
|
|
"step": 3499
|
|
},
|
|
{
|
|
"epoch": 2.2278803309993633,
|
|
"grad_norm": 12.102862449583023,
|
|
"learning_rate": 1.889174369635141e-06,
|
|
"loss": 0.5428228974342346,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 2.2285168682367917,
|
|
"grad_norm": 17.341617869002587,
|
|
"learning_rate": 1.886275542863254e-06,
|
|
"loss": 0.863734245300293,
|
|
"step": 3501
|
|
},
|
|
{
|
|
"epoch": 2.22915340547422,
|
|
"grad_norm": 9.91678128764048,
|
|
"learning_rate": 1.8833784247029968e-06,
|
|
"loss": 0.9740700721740723,
|
|
"step": 3502
|
|
},
|
|
{
|
|
"epoch": 2.2297899427116485,
|
|
"grad_norm": 8.32449044346867,
|
|
"learning_rate": 1.880483016744125e-06,
|
|
"loss": 0.4950059652328491,
|
|
"step": 3503
|
|
},
|
|
{
|
|
"epoch": 2.230426479949077,
|
|
"grad_norm": 10.52685661018625,
|
|
"learning_rate": 1.8775893205754503e-06,
|
|
"loss": 0.5759857892990112,
|
|
"step": 3504
|
|
},
|
|
{
|
|
"epoch": 2.2310630171865053,
|
|
"grad_norm": 10.75385839034482,
|
|
"learning_rate": 1.8746973377848465e-06,
|
|
"loss": 0.6030562520027161,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 2.2316995544239338,
|
|
"grad_norm": 9.66589218132674,
|
|
"learning_rate": 1.8718070699592482e-06,
|
|
"loss": 0.29806768894195557,
|
|
"step": 3506
|
|
},
|
|
{
|
|
"epoch": 2.232336091661362,
|
|
"grad_norm": 19.13089110715104,
|
|
"learning_rate": 1.8689185186846487e-06,
|
|
"loss": 0.7423710227012634,
|
|
"step": 3507
|
|
},
|
|
{
|
|
"epoch": 2.2329726288987906,
|
|
"grad_norm": 8.660819879371079,
|
|
"learning_rate": 1.866031685546098e-06,
|
|
"loss": 0.3044222295284271,
|
|
"step": 3508
|
|
},
|
|
{
|
|
"epoch": 2.233609166136219,
|
|
"grad_norm": 11.72671394723013,
|
|
"learning_rate": 1.8631465721277037e-06,
|
|
"loss": 0.24697977304458618,
|
|
"step": 3509
|
|
},
|
|
{
|
|
"epoch": 2.2342457033736474,
|
|
"grad_norm": 9.81009915238982,
|
|
"learning_rate": 1.8602631800126352e-06,
|
|
"loss": 0.33691731095314026,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 2.2348822406110758,
|
|
"grad_norm": 8.567721807499018,
|
|
"learning_rate": 1.8573815107831062e-06,
|
|
"loss": 0.6139578223228455,
|
|
"step": 3511
|
|
},
|
|
{
|
|
"epoch": 2.235518777848504,
|
|
"grad_norm": 10.927962860060859,
|
|
"learning_rate": 1.8545015660203952e-06,
|
|
"loss": 0.4013819098472595,
|
|
"step": 3512
|
|
},
|
|
{
|
|
"epoch": 2.2361553150859326,
|
|
"grad_norm": 11.724217216411903,
|
|
"learning_rate": 1.85162334730483e-06,
|
|
"loss": 0.5242598652839661,
|
|
"step": 3513
|
|
},
|
|
{
|
|
"epoch": 2.236791852323361,
|
|
"grad_norm": 16.38145871625454,
|
|
"learning_rate": 1.8487468562157917e-06,
|
|
"loss": 0.5505284070968628,
|
|
"step": 3514
|
|
},
|
|
{
|
|
"epoch": 2.2374283895607894,
|
|
"grad_norm": 13.25113768978638,
|
|
"learning_rate": 1.845872094331711e-06,
|
|
"loss": 0.7210999131202698,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 2.238064926798218,
|
|
"grad_norm": 11.672761794127785,
|
|
"learning_rate": 1.842999063230077e-06,
|
|
"loss": 0.5329117178916931,
|
|
"step": 3516
|
|
},
|
|
{
|
|
"epoch": 2.238701464035646,
|
|
"grad_norm": 11.357489189319121,
|
|
"learning_rate": 1.8401277644874216e-06,
|
|
"loss": 1.5066075325012207,
|
|
"step": 3517
|
|
},
|
|
{
|
|
"epoch": 2.2393380012730746,
|
|
"grad_norm": 7.904148975413503,
|
|
"learning_rate": 1.8372581996793287e-06,
|
|
"loss": 0.36226022243499756,
|
|
"step": 3518
|
|
},
|
|
{
|
|
"epoch": 2.239974538510503,
|
|
"grad_norm": 12.1270379916499,
|
|
"learning_rate": 1.83439037038043e-06,
|
|
"loss": 0.5828202366828918,
|
|
"step": 3519
|
|
},
|
|
{
|
|
"epoch": 2.2406110757479314,
|
|
"grad_norm": 14.331951695286904,
|
|
"learning_rate": 1.8315242781644099e-06,
|
|
"loss": 0.40014493465423584,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 2.24124761298536,
|
|
"grad_norm": 18.50762287265391,
|
|
"learning_rate": 1.8286599246039888e-06,
|
|
"loss": 0.5343925952911377,
|
|
"step": 3521
|
|
},
|
|
{
|
|
"epoch": 2.241884150222788,
|
|
"grad_norm": 11.85502060687402,
|
|
"learning_rate": 1.8257973112709453e-06,
|
|
"loss": 0.653959333896637,
|
|
"step": 3522
|
|
},
|
|
{
|
|
"epoch": 2.2425206874602166,
|
|
"grad_norm": 11.992880165168499,
|
|
"learning_rate": 1.8229364397360954e-06,
|
|
"loss": 0.31213849782943726,
|
|
"step": 3523
|
|
},
|
|
{
|
|
"epoch": 2.243157224697645,
|
|
"grad_norm": 12.159627487239916,
|
|
"learning_rate": 1.820077311569301e-06,
|
|
"loss": 0.4305686354637146,
|
|
"step": 3524
|
|
},
|
|
{
|
|
"epoch": 2.2437937619350734,
|
|
"grad_norm": 12.667597733768334,
|
|
"learning_rate": 1.8172199283394682e-06,
|
|
"loss": 0.49271827936172485,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 2.2444302991725014,
|
|
"grad_norm": 10.54382177570777,
|
|
"learning_rate": 1.8143642916145443e-06,
|
|
"loss": 0.7646178603172302,
|
|
"step": 3526
|
|
},
|
|
{
|
|
"epoch": 2.24506683640993,
|
|
"grad_norm": 10.696436714563873,
|
|
"learning_rate": 1.8115104029615194e-06,
|
|
"loss": 0.7205324769020081,
|
|
"step": 3527
|
|
},
|
|
{
|
|
"epoch": 2.245703373647358,
|
|
"grad_norm": 8.487431434568393,
|
|
"learning_rate": 1.8086582639464228e-06,
|
|
"loss": 0.6558152437210083,
|
|
"step": 3528
|
|
},
|
|
{
|
|
"epoch": 2.2463399108847866,
|
|
"grad_norm": 11.426331407500314,
|
|
"learning_rate": 1.8058078761343273e-06,
|
|
"loss": 0.43178167939186096,
|
|
"step": 3529
|
|
},
|
|
{
|
|
"epoch": 2.246976448122215,
|
|
"grad_norm": 10.959997262762583,
|
|
"learning_rate": 1.8029592410893414e-06,
|
|
"loss": 0.5075429677963257,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 2.2476129853596434,
|
|
"grad_norm": 8.905995860964222,
|
|
"learning_rate": 1.8001123603746123e-06,
|
|
"loss": 0.2820245921611786,
|
|
"step": 3531
|
|
},
|
|
{
|
|
"epoch": 2.248249522597072,
|
|
"grad_norm": 13.666972145092824,
|
|
"learning_rate": 1.7972672355523235e-06,
|
|
"loss": 0.6120468974113464,
|
|
"step": 3532
|
|
},
|
|
{
|
|
"epoch": 2.2488860598345,
|
|
"grad_norm": 10.946458503354657,
|
|
"learning_rate": 1.7944238681837012e-06,
|
|
"loss": 0.23049457371234894,
|
|
"step": 3533
|
|
},
|
|
{
|
|
"epoch": 2.2495225970719286,
|
|
"grad_norm": 8.114239993841824,
|
|
"learning_rate": 1.791582259828996e-06,
|
|
"loss": 0.3650050461292267,
|
|
"step": 3534
|
|
},
|
|
{
|
|
"epoch": 2.250159134309357,
|
|
"grad_norm": 12.544174489980014,
|
|
"learning_rate": 1.788742412047505e-06,
|
|
"loss": 0.5506513714790344,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 2.2507956715467854,
|
|
"grad_norm": 7.435459228814035,
|
|
"learning_rate": 1.7859043263975517e-06,
|
|
"loss": 0.5111548900604248,
|
|
"step": 3536
|
|
},
|
|
{
|
|
"epoch": 2.251432208784214,
|
|
"grad_norm": 9.983069033347435,
|
|
"learning_rate": 1.783068004436495e-06,
|
|
"loss": 0.4751511812210083,
|
|
"step": 3537
|
|
},
|
|
{
|
|
"epoch": 2.252068746021642,
|
|
"grad_norm": 11.77034056871515,
|
|
"learning_rate": 1.7802334477207238e-06,
|
|
"loss": 0.2234572470188141,
|
|
"step": 3538
|
|
},
|
|
{
|
|
"epoch": 2.2527052832590706,
|
|
"grad_norm": 9.735581359291695,
|
|
"learning_rate": 1.7774006578056652e-06,
|
|
"loss": 0.7333520650863647,
|
|
"step": 3539
|
|
},
|
|
{
|
|
"epoch": 2.253341820496499,
|
|
"grad_norm": 11.904991462573685,
|
|
"learning_rate": 1.774569636245771e-06,
|
|
"loss": 0.28692349791526794,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 2.2539783577339274,
|
|
"grad_norm": 9.358363138502039,
|
|
"learning_rate": 1.7717403845945185e-06,
|
|
"loss": 0.613878607749939,
|
|
"step": 3541
|
|
},
|
|
{
|
|
"epoch": 2.254614894971356,
|
|
"grad_norm": 12.091175156448605,
|
|
"learning_rate": 1.7689129044044245e-06,
|
|
"loss": 0.57314532995224,
|
|
"step": 3542
|
|
},
|
|
{
|
|
"epoch": 2.255251432208784,
|
|
"grad_norm": 10.635856312842957,
|
|
"learning_rate": 1.766087197227026e-06,
|
|
"loss": 0.48670288920402527,
|
|
"step": 3543
|
|
},
|
|
{
|
|
"epoch": 2.2558879694462126,
|
|
"grad_norm": 11.618410214495096,
|
|
"learning_rate": 1.76326326461289e-06,
|
|
"loss": 0.2895565629005432,
|
|
"step": 3544
|
|
},
|
|
{
|
|
"epoch": 2.256524506683641,
|
|
"grad_norm": 18.28761505687289,
|
|
"learning_rate": 1.760441108111607e-06,
|
|
"loss": 0.8712286353111267,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 2.2571610439210694,
|
|
"grad_norm": 12.308177743704901,
|
|
"learning_rate": 1.7576207292717979e-06,
|
|
"loss": 0.41069579124450684,
|
|
"step": 3546
|
|
},
|
|
{
|
|
"epoch": 2.257797581158498,
|
|
"grad_norm": 14.432734857390464,
|
|
"learning_rate": 1.7548021296411033e-06,
|
|
"loss": 0.4294593334197998,
|
|
"step": 3547
|
|
},
|
|
{
|
|
"epoch": 2.2584341183959262,
|
|
"grad_norm": 8.502333486055484,
|
|
"learning_rate": 1.7519853107661904e-06,
|
|
"loss": 0.23566997051239014,
|
|
"step": 3548
|
|
},
|
|
{
|
|
"epoch": 2.2590706556333546,
|
|
"grad_norm": 14.074281353047315,
|
|
"learning_rate": 1.7491702741927475e-06,
|
|
"loss": 0.49657586216926575,
|
|
"step": 3549
|
|
},
|
|
{
|
|
"epoch": 2.259707192870783,
|
|
"grad_norm": 9.910752879118258,
|
|
"learning_rate": 1.7463570214654852e-06,
|
|
"loss": 1.0669946670532227,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 2.2603437301082114,
|
|
"grad_norm": 10.602541481981014,
|
|
"learning_rate": 1.7435455541281343e-06,
|
|
"loss": 0.26584136486053467,
|
|
"step": 3551
|
|
},
|
|
{
|
|
"epoch": 2.26098026734564,
|
|
"grad_norm": 9.008666869195622,
|
|
"learning_rate": 1.7407358737234503e-06,
|
|
"loss": 0.7706364393234253,
|
|
"step": 3552
|
|
},
|
|
{
|
|
"epoch": 2.2616168045830682,
|
|
"grad_norm": 16.070066176018877,
|
|
"learning_rate": 1.7379279817932037e-06,
|
|
"loss": 1.0075418949127197,
|
|
"step": 3553
|
|
},
|
|
{
|
|
"epoch": 2.2622533418204966,
|
|
"grad_norm": 10.117869692663858,
|
|
"learning_rate": 1.7351218798781849e-06,
|
|
"loss": 1.0107684135437012,
|
|
"step": 3554
|
|
},
|
|
{
|
|
"epoch": 2.262889879057925,
|
|
"grad_norm": 9.314656091569422,
|
|
"learning_rate": 1.7323175695182e-06,
|
|
"loss": 0.6869802474975586,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 2.2635264162953534,
|
|
"grad_norm": 11.814939794631169,
|
|
"learning_rate": 1.7295150522520792e-06,
|
|
"loss": 0.528224527835846,
|
|
"step": 3556
|
|
},
|
|
{
|
|
"epoch": 2.264162953532782,
|
|
"grad_norm": 7.247212661080214,
|
|
"learning_rate": 1.726714329617659e-06,
|
|
"loss": 0.2727453112602234,
|
|
"step": 3557
|
|
},
|
|
{
|
|
"epoch": 2.2647994907702103,
|
|
"grad_norm": 9.98633554031247,
|
|
"learning_rate": 1.7239154031517957e-06,
|
|
"loss": 0.9306679368019104,
|
|
"step": 3558
|
|
},
|
|
{
|
|
"epoch": 2.265436028007638,
|
|
"grad_norm": 9.587220257942565,
|
|
"learning_rate": 1.7211182743903631e-06,
|
|
"loss": 0.4386458098888397,
|
|
"step": 3559
|
|
},
|
|
{
|
|
"epoch": 2.266072565245067,
|
|
"grad_norm": 13.838614818326626,
|
|
"learning_rate": 1.7183229448682436e-06,
|
|
"loss": 0.42543673515319824,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 2.266709102482495,
|
|
"grad_norm": 9.473609878611798,
|
|
"learning_rate": 1.7155294161193348e-06,
|
|
"loss": 0.6633802056312561,
|
|
"step": 3561
|
|
},
|
|
{
|
|
"epoch": 2.2673456397199234,
|
|
"grad_norm": 15.38725086141798,
|
|
"learning_rate": 1.7127376896765436e-06,
|
|
"loss": 0.4467209577560425,
|
|
"step": 3562
|
|
},
|
|
{
|
|
"epoch": 2.267982176957352,
|
|
"grad_norm": 13.185191378907108,
|
|
"learning_rate": 1.7099477670717946e-06,
|
|
"loss": 0.8797957301139832,
|
|
"step": 3563
|
|
},
|
|
{
|
|
"epoch": 2.2686187141947802,
|
|
"grad_norm": 15.28725312673086,
|
|
"learning_rate": 1.7071596498360116e-06,
|
|
"loss": 0.6036324501037598,
|
|
"step": 3564
|
|
},
|
|
{
|
|
"epoch": 2.2692552514322086,
|
|
"grad_norm": 12.901406334213773,
|
|
"learning_rate": 1.7043733394991386e-06,
|
|
"loss": 0.6327831745147705,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 2.269891788669637,
|
|
"grad_norm": 13.033898575857316,
|
|
"learning_rate": 1.7015888375901223e-06,
|
|
"loss": 0.4433327913284302,
|
|
"step": 3566
|
|
},
|
|
{
|
|
"epoch": 2.2705283259070654,
|
|
"grad_norm": 9.735752826770067,
|
|
"learning_rate": 1.698806145636917e-06,
|
|
"loss": 0.4037848114967346,
|
|
"step": 3567
|
|
},
|
|
{
|
|
"epoch": 2.271164863144494,
|
|
"grad_norm": 9.852589667866491,
|
|
"learning_rate": 1.6960252651664843e-06,
|
|
"loss": 0.5703111290931702,
|
|
"step": 3568
|
|
},
|
|
{
|
|
"epoch": 2.2718014003819222,
|
|
"grad_norm": 12.487549278669688,
|
|
"learning_rate": 1.6932461977047976e-06,
|
|
"loss": 0.390015184879303,
|
|
"step": 3569
|
|
},
|
|
{
|
|
"epoch": 2.2724379376193506,
|
|
"grad_norm": 14.724846728798648,
|
|
"learning_rate": 1.690468944776823e-06,
|
|
"loss": 0.9128044247627258,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 2.273074474856779,
|
|
"grad_norm": 12.091500775810264,
|
|
"learning_rate": 1.6876935079065438e-06,
|
|
"loss": 0.5277889966964722,
|
|
"step": 3571
|
|
},
|
|
{
|
|
"epoch": 2.2737110120942075,
|
|
"grad_norm": 8.654633805223162,
|
|
"learning_rate": 1.6849198886169389e-06,
|
|
"loss": 0.22507601976394653,
|
|
"step": 3572
|
|
},
|
|
{
|
|
"epoch": 2.274347549331636,
|
|
"grad_norm": 18.948358571595122,
|
|
"learning_rate": 1.6821480884299924e-06,
|
|
"loss": 0.3390110433101654,
|
|
"step": 3573
|
|
},
|
|
{
|
|
"epoch": 2.2749840865690643,
|
|
"grad_norm": 10.613780305031355,
|
|
"learning_rate": 1.67937810886669e-06,
|
|
"loss": 0.21446850895881653,
|
|
"step": 3574
|
|
},
|
|
{
|
|
"epoch": 2.2756206238064927,
|
|
"grad_norm": 14.447555420927046,
|
|
"learning_rate": 1.6766099514470163e-06,
|
|
"loss": 0.6864573955535889,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 2.276257161043921,
|
|
"grad_norm": 13.672767198982472,
|
|
"learning_rate": 1.673843617689963e-06,
|
|
"loss": 0.37464284896850586,
|
|
"step": 3576
|
|
},
|
|
{
|
|
"epoch": 2.2768936982813495,
|
|
"grad_norm": 9.297357998616375,
|
|
"learning_rate": 1.6710791091135097e-06,
|
|
"loss": 0.6171706914901733,
|
|
"step": 3577
|
|
},
|
|
{
|
|
"epoch": 2.277530235518778,
|
|
"grad_norm": 12.898598837075022,
|
|
"learning_rate": 1.6683164272346459e-06,
|
|
"loss": 0.5255354046821594,
|
|
"step": 3578
|
|
},
|
|
{
|
|
"epoch": 2.2781667727562063,
|
|
"grad_norm": 15.456525894320567,
|
|
"learning_rate": 1.665555573569352e-06,
|
|
"loss": 0.4737781882286072,
|
|
"step": 3579
|
|
},
|
|
{
|
|
"epoch": 2.2788033099936347,
|
|
"grad_norm": 12.7709637980077,
|
|
"learning_rate": 1.662796549632606e-06,
|
|
"loss": 0.46658074855804443,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 2.279439847231063,
|
|
"grad_norm": 13.404313326487049,
|
|
"learning_rate": 1.6600393569383817e-06,
|
|
"loss": 1.0031671524047852,
|
|
"step": 3581
|
|
},
|
|
{
|
|
"epoch": 2.2800763844684915,
|
|
"grad_norm": 16.7742953789968,
|
|
"learning_rate": 1.6572839969996524e-06,
|
|
"loss": 0.32030603289604187,
|
|
"step": 3582
|
|
},
|
|
{
|
|
"epoch": 2.28071292170592,
|
|
"grad_norm": 13.927633778218528,
|
|
"learning_rate": 1.6545304713283806e-06,
|
|
"loss": 0.3638037443161011,
|
|
"step": 3583
|
|
},
|
|
{
|
|
"epoch": 2.2813494589433483,
|
|
"grad_norm": 6.9188645768966675,
|
|
"learning_rate": 1.651778781435524e-06,
|
|
"loss": 0.15295016765594482,
|
|
"step": 3584
|
|
},
|
|
{
|
|
"epoch": 2.2819859961807767,
|
|
"grad_norm": 14.31366451095305,
|
|
"learning_rate": 1.6490289288310313e-06,
|
|
"loss": 0.4253908395767212,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 2.282622533418205,
|
|
"grad_norm": 13.682501342420357,
|
|
"learning_rate": 1.64628091502385e-06,
|
|
"loss": 0.46490031480789185,
|
|
"step": 3586
|
|
},
|
|
{
|
|
"epoch": 2.2832590706556335,
|
|
"grad_norm": 8.150257314012448,
|
|
"learning_rate": 1.643534741521906e-06,
|
|
"loss": 0.5571666359901428,
|
|
"step": 3587
|
|
},
|
|
{
|
|
"epoch": 2.283895607893062,
|
|
"grad_norm": 9.60175840484246,
|
|
"learning_rate": 1.6407904098321282e-06,
|
|
"loss": 0.22285878658294678,
|
|
"step": 3588
|
|
},
|
|
{
|
|
"epoch": 2.2845321451304903,
|
|
"grad_norm": 10.014175229236065,
|
|
"learning_rate": 1.6380479214604267e-06,
|
|
"loss": 0.32916271686553955,
|
|
"step": 3589
|
|
},
|
|
{
|
|
"epoch": 2.2851686823679187,
|
|
"grad_norm": 8.299736960226006,
|
|
"learning_rate": 1.6353072779117036e-06,
|
|
"loss": 0.24011826515197754,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 2.285805219605347,
|
|
"grad_norm": 9.357967429035314,
|
|
"learning_rate": 1.6325684806898468e-06,
|
|
"loss": 0.2705532908439636,
|
|
"step": 3591
|
|
},
|
|
{
|
|
"epoch": 2.286441756842775,
|
|
"grad_norm": 11.089414258649509,
|
|
"learning_rate": 1.6298315312977331e-06,
|
|
"loss": 0.8302360773086548,
|
|
"step": 3592
|
|
},
|
|
{
|
|
"epoch": 2.287078294080204,
|
|
"grad_norm": 6.278016252890907,
|
|
"learning_rate": 1.6270964312372234e-06,
|
|
"loss": 0.28905045986175537,
|
|
"step": 3593
|
|
},
|
|
{
|
|
"epoch": 2.287714831317632,
|
|
"grad_norm": 8.197033849058773,
|
|
"learning_rate": 1.6243631820091638e-06,
|
|
"loss": 0.20282121002674103,
|
|
"step": 3594
|
|
},
|
|
{
|
|
"epoch": 2.2883513685550607,
|
|
"grad_norm": 12.163942331346213,
|
|
"learning_rate": 1.6216317851133877e-06,
|
|
"loss": 0.42968446016311646,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 2.2889879057924887,
|
|
"grad_norm": 15.279159484889568,
|
|
"learning_rate": 1.6189022420487094e-06,
|
|
"loss": 0.6520296931266785,
|
|
"step": 3596
|
|
},
|
|
{
|
|
"epoch": 2.289624443029917,
|
|
"grad_norm": 20.072671279159017,
|
|
"learning_rate": 1.6161745543129254e-06,
|
|
"loss": 1.815822720527649,
|
|
"step": 3597
|
|
},
|
|
{
|
|
"epoch": 2.2902609802673455,
|
|
"grad_norm": 11.702856245846263,
|
|
"learning_rate": 1.6134487234028145e-06,
|
|
"loss": 0.49471431970596313,
|
|
"step": 3598
|
|
},
|
|
{
|
|
"epoch": 2.290897517504774,
|
|
"grad_norm": 9.614888447397261,
|
|
"learning_rate": 1.6107247508141427e-06,
|
|
"loss": 0.5017402172088623,
|
|
"step": 3599
|
|
},
|
|
{
|
|
"epoch": 2.2915340547422023,
|
|
"grad_norm": 13.845876963255792,
|
|
"learning_rate": 1.608002638041643e-06,
|
|
"loss": 0.7130351066589355,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 2.2921705919796307,
|
|
"grad_norm": 14.779216377796459,
|
|
"learning_rate": 1.6052823865790412e-06,
|
|
"loss": 0.3332129120826721,
|
|
"step": 3601
|
|
},
|
|
{
|
|
"epoch": 2.292807129217059,
|
|
"grad_norm": 15.587667274654402,
|
|
"learning_rate": 1.6025639979190344e-06,
|
|
"loss": 1.0253678560256958,
|
|
"step": 3602
|
|
},
|
|
{
|
|
"epoch": 2.2934436664544875,
|
|
"grad_norm": 9.645842356106199,
|
|
"learning_rate": 1.5998474735533004e-06,
|
|
"loss": 0.43014228343963623,
|
|
"step": 3603
|
|
},
|
|
{
|
|
"epoch": 2.294080203691916,
|
|
"grad_norm": 7.4465732312366715,
|
|
"learning_rate": 1.5971328149724901e-06,
|
|
"loss": 0.3042871356010437,
|
|
"step": 3604
|
|
},
|
|
{
|
|
"epoch": 2.2947167409293443,
|
|
"grad_norm": 12.106940306712147,
|
|
"learning_rate": 1.5944200236662372e-06,
|
|
"loss": 0.5175577402114868,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 2.2953532781667727,
|
|
"grad_norm": 8.721367854893396,
|
|
"learning_rate": 1.5917091011231455e-06,
|
|
"loss": 0.4424808621406555,
|
|
"step": 3606
|
|
},
|
|
{
|
|
"epoch": 2.295989815404201,
|
|
"grad_norm": 14.684383732784086,
|
|
"learning_rate": 1.589000048830795e-06,
|
|
"loss": 0.4613659977912903,
|
|
"step": 3607
|
|
},
|
|
{
|
|
"epoch": 2.2966263526416295,
|
|
"grad_norm": 15.347722509583157,
|
|
"learning_rate": 1.586292868275739e-06,
|
|
"loss": 0.3522476553916931,
|
|
"step": 3608
|
|
},
|
|
{
|
|
"epoch": 2.297262889879058,
|
|
"grad_norm": 9.360582647015702,
|
|
"learning_rate": 1.5835875609435042e-06,
|
|
"loss": 0.4199846684932709,
|
|
"step": 3609
|
|
},
|
|
{
|
|
"epoch": 2.2978994271164863,
|
|
"grad_norm": 19.05247901253659,
|
|
"learning_rate": 1.5808841283185888e-06,
|
|
"loss": 0.5410860776901245,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 2.2985359643539147,
|
|
"grad_norm": 9.043382369943455,
|
|
"learning_rate": 1.5781825718844612e-06,
|
|
"loss": 0.3832551836967468,
|
|
"step": 3611
|
|
},
|
|
{
|
|
"epoch": 2.299172501591343,
|
|
"grad_norm": 8.461003927432863,
|
|
"learning_rate": 1.5754828931235672e-06,
|
|
"loss": 0.6752854585647583,
|
|
"step": 3612
|
|
},
|
|
{
|
|
"epoch": 2.2998090388287715,
|
|
"grad_norm": 6.12202896232288,
|
|
"learning_rate": 1.5727850935173095e-06,
|
|
"loss": 0.14791876077651978,
|
|
"step": 3613
|
|
},
|
|
{
|
|
"epoch": 2.3004455760662,
|
|
"grad_norm": 16.87998184328102,
|
|
"learning_rate": 1.5700891745460717e-06,
|
|
"loss": 1.508462905883789,
|
|
"step": 3614
|
|
},
|
|
{
|
|
"epoch": 2.3010821133036283,
|
|
"grad_norm": 11.425713840851401,
|
|
"learning_rate": 1.5673951376891999e-06,
|
|
"loss": 0.6087247133255005,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 2.3017186505410567,
|
|
"grad_norm": 12.360023388162777,
|
|
"learning_rate": 1.5647029844250077e-06,
|
|
"loss": 0.44248509407043457,
|
|
"step": 3616
|
|
},
|
|
{
|
|
"epoch": 2.302355187778485,
|
|
"grad_norm": 12.79301570004959,
|
|
"learning_rate": 1.5620127162307741e-06,
|
|
"loss": 0.24971036612987518,
|
|
"step": 3617
|
|
},
|
|
{
|
|
"epoch": 2.3029917250159135,
|
|
"grad_norm": 11.71850432844077,
|
|
"learning_rate": 1.5593243345827485e-06,
|
|
"loss": 0.3522535562515259,
|
|
"step": 3618
|
|
},
|
|
{
|
|
"epoch": 2.303628262253342,
|
|
"grad_norm": 13.119297552670384,
|
|
"learning_rate": 1.5566378409561394e-06,
|
|
"loss": 0.3391970992088318,
|
|
"step": 3619
|
|
},
|
|
{
|
|
"epoch": 2.3042647994907703,
|
|
"grad_norm": 7.923977606616533,
|
|
"learning_rate": 1.5539532368251226e-06,
|
|
"loss": 0.14920666813850403,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 2.3049013367281987,
|
|
"grad_norm": 7.5987958105107385,
|
|
"learning_rate": 1.5512705236628344e-06,
|
|
"loss": 0.49967896938323975,
|
|
"step": 3621
|
|
},
|
|
{
|
|
"epoch": 2.305537873965627,
|
|
"grad_norm": 10.078837502478047,
|
|
"learning_rate": 1.5485897029413793e-06,
|
|
"loss": 0.24752558767795563,
|
|
"step": 3622
|
|
},
|
|
{
|
|
"epoch": 2.3061744112030556,
|
|
"grad_norm": 6.351868764551711,
|
|
"learning_rate": 1.5459107761318132e-06,
|
|
"loss": 0.31946277618408203,
|
|
"step": 3623
|
|
},
|
|
{
|
|
"epoch": 2.306810948440484,
|
|
"grad_norm": 9.89620046223258,
|
|
"learning_rate": 1.5432337447041634e-06,
|
|
"loss": 0.9290648102760315,
|
|
"step": 3624
|
|
},
|
|
{
|
|
"epoch": 2.307447485677912,
|
|
"grad_norm": 7.680230646555971,
|
|
"learning_rate": 1.5405586101274117e-06,
|
|
"loss": 0.3040928840637207,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 2.3080840229153408,
|
|
"grad_norm": 12.380153963373857,
|
|
"learning_rate": 1.5378853738694987e-06,
|
|
"loss": 0.6746101379394531,
|
|
"step": 3626
|
|
},
|
|
{
|
|
"epoch": 2.3087205601527687,
|
|
"grad_norm": 17.8068297838926,
|
|
"learning_rate": 1.535214037397325e-06,
|
|
"loss": 0.7092942595481873,
|
|
"step": 3627
|
|
},
|
|
{
|
|
"epoch": 2.3093570973901976,
|
|
"grad_norm": 12.020871176944416,
|
|
"learning_rate": 1.5325446021767465e-06,
|
|
"loss": 0.17868655920028687,
|
|
"step": 3628
|
|
},
|
|
{
|
|
"epoch": 2.3099936346276255,
|
|
"grad_norm": 11.454025233433736,
|
|
"learning_rate": 1.5298770696725824e-06,
|
|
"loss": 0.5282299518585205,
|
|
"step": 3629
|
|
},
|
|
{
|
|
"epoch": 2.310630171865054,
|
|
"grad_norm": 26.109461817262574,
|
|
"learning_rate": 1.527211441348596e-06,
|
|
"loss": 1.2998456954956055,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 2.3112667091024823,
|
|
"grad_norm": 10.725439607251838,
|
|
"learning_rate": 1.5245477186675184e-06,
|
|
"loss": 0.38805341720581055,
|
|
"step": 3631
|
|
},
|
|
{
|
|
"epoch": 2.3119032463399107,
|
|
"grad_norm": 7.801657132057306,
|
|
"learning_rate": 1.5218859030910266e-06,
|
|
"loss": 0.46178191900253296,
|
|
"step": 3632
|
|
},
|
|
{
|
|
"epoch": 2.312539783577339,
|
|
"grad_norm": 10.629175987996685,
|
|
"learning_rate": 1.5192259960797546e-06,
|
|
"loss": 0.3144703209400177,
|
|
"step": 3633
|
|
},
|
|
{
|
|
"epoch": 2.3131763208147675,
|
|
"grad_norm": 14.413967861963577,
|
|
"learning_rate": 1.5165679990932857e-06,
|
|
"loss": 0.41735124588012695,
|
|
"step": 3634
|
|
},
|
|
{
|
|
"epoch": 2.313812858052196,
|
|
"grad_norm": 9.713189535062696,
|
|
"learning_rate": 1.5139119135901632e-06,
|
|
"loss": 0.4353427290916443,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 2.3144493952896243,
|
|
"grad_norm": 14.915821662254057,
|
|
"learning_rate": 1.511257741027869e-06,
|
|
"loss": 0.6386640071868896,
|
|
"step": 3636
|
|
},
|
|
{
|
|
"epoch": 2.3150859325270527,
|
|
"grad_norm": 7.073726796970359,
|
|
"learning_rate": 1.508605482862847e-06,
|
|
"loss": 0.14773771166801453,
|
|
"step": 3637
|
|
},
|
|
{
|
|
"epoch": 2.315722469764481,
|
|
"grad_norm": 14.52182409740128,
|
|
"learning_rate": 1.5059551405504846e-06,
|
|
"loss": 0.6305415034294128,
|
|
"step": 3638
|
|
},
|
|
{
|
|
"epoch": 2.3163590070019096,
|
|
"grad_norm": 12.06601036037594,
|
|
"learning_rate": 1.5033067155451186e-06,
|
|
"loss": 0.46238037943840027,
|
|
"step": 3639
|
|
},
|
|
{
|
|
"epoch": 2.316995544239338,
|
|
"grad_norm": 14.773896058423377,
|
|
"learning_rate": 1.500660209300034e-06,
|
|
"loss": 0.4040284752845764,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 2.3176320814767664,
|
|
"grad_norm": 12.757647220731817,
|
|
"learning_rate": 1.4980156232674641e-06,
|
|
"loss": 0.7795870304107666,
|
|
"step": 3641
|
|
},
|
|
{
|
|
"epoch": 2.3182686187141948,
|
|
"grad_norm": 19.24362559783828,
|
|
"learning_rate": 1.4953729588985894e-06,
|
|
"loss": 0.9090286493301392,
|
|
"step": 3642
|
|
},
|
|
{
|
|
"epoch": 2.318905155951623,
|
|
"grad_norm": 15.672532528484432,
|
|
"learning_rate": 1.4927322176435288e-06,
|
|
"loss": 0.3751720190048218,
|
|
"step": 3643
|
|
},
|
|
{
|
|
"epoch": 2.3195416931890516,
|
|
"grad_norm": 18.742423426587592,
|
|
"learning_rate": 1.4900934009513558e-06,
|
|
"loss": 0.6430118680000305,
|
|
"step": 3644
|
|
},
|
|
{
|
|
"epoch": 2.32017823042648,
|
|
"grad_norm": 14.611731063893258,
|
|
"learning_rate": 1.487456510270081e-06,
|
|
"loss": 0.6180001497268677,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 2.3208147676639084,
|
|
"grad_norm": 15.521679418580392,
|
|
"learning_rate": 1.4848215470466614e-06,
|
|
"loss": 0.38686472177505493,
|
|
"step": 3646
|
|
},
|
|
{
|
|
"epoch": 2.3214513049013368,
|
|
"grad_norm": 16.252399738789812,
|
|
"learning_rate": 1.4821885127269936e-06,
|
|
"loss": 0.3846510946750641,
|
|
"step": 3647
|
|
},
|
|
{
|
|
"epoch": 2.322087842138765,
|
|
"grad_norm": 11.871510763089457,
|
|
"learning_rate": 1.47955740875592e-06,
|
|
"loss": 1.2616004943847656,
|
|
"step": 3648
|
|
},
|
|
{
|
|
"epoch": 2.3227243793761936,
|
|
"grad_norm": 8.79403623074544,
|
|
"learning_rate": 1.4769282365772196e-06,
|
|
"loss": 0.625005841255188,
|
|
"step": 3649
|
|
},
|
|
{
|
|
"epoch": 2.323360916613622,
|
|
"grad_norm": 10.253647287150587,
|
|
"learning_rate": 1.4743009976336132e-06,
|
|
"loss": 0.5820122361183167,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 2.3239974538510504,
|
|
"grad_norm": 9.848322146907023,
|
|
"learning_rate": 1.4716756933667592e-06,
|
|
"loss": 0.23914802074432373,
|
|
"step": 3651
|
|
},
|
|
{
|
|
"epoch": 2.324633991088479,
|
|
"grad_norm": 10.340995805837212,
|
|
"learning_rate": 1.4690523252172595e-06,
|
|
"loss": 0.21634802222251892,
|
|
"step": 3652
|
|
},
|
|
{
|
|
"epoch": 2.325270528325907,
|
|
"grad_norm": 5.760077510283241,
|
|
"learning_rate": 1.4664308946246441e-06,
|
|
"loss": 0.19866140186786652,
|
|
"step": 3653
|
|
},
|
|
{
|
|
"epoch": 2.3259070655633356,
|
|
"grad_norm": 22.605378194422197,
|
|
"learning_rate": 1.4638114030273903e-06,
|
|
"loss": 0.843527615070343,
|
|
"step": 3654
|
|
},
|
|
{
|
|
"epoch": 2.326543602800764,
|
|
"grad_norm": 12.217049871532764,
|
|
"learning_rate": 1.4611938518629048e-06,
|
|
"loss": 0.4112418591976166,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 2.3271801400381924,
|
|
"grad_norm": 17.596310727776576,
|
|
"learning_rate": 1.458578242567531e-06,
|
|
"loss": 0.7754092216491699,
|
|
"step": 3656
|
|
},
|
|
{
|
|
"epoch": 2.327816677275621,
|
|
"grad_norm": 11.569529283786105,
|
|
"learning_rate": 1.4559645765765452e-06,
|
|
"loss": 0.45173966884613037,
|
|
"step": 3657
|
|
},
|
|
{
|
|
"epoch": 2.328453214513049,
|
|
"grad_norm": 6.008874695167993,
|
|
"learning_rate": 1.4533528553241643e-06,
|
|
"loss": 0.3209451735019684,
|
|
"step": 3658
|
|
},
|
|
{
|
|
"epoch": 2.3290897517504776,
|
|
"grad_norm": 11.975411718365635,
|
|
"learning_rate": 1.4507430802435285e-06,
|
|
"loss": 0.6683560013771057,
|
|
"step": 3659
|
|
},
|
|
{
|
|
"epoch": 2.3297262889879056,
|
|
"grad_norm": 13.102032681202644,
|
|
"learning_rate": 1.4481352527667136e-06,
|
|
"loss": 0.3309083580970764,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 2.3303628262253344,
|
|
"grad_norm": 9.721885000749575,
|
|
"learning_rate": 1.4455293743247313e-06,
|
|
"loss": 0.664567232131958,
|
|
"step": 3661
|
|
},
|
|
{
|
|
"epoch": 2.3309993634627624,
|
|
"grad_norm": 10.822122824742895,
|
|
"learning_rate": 1.4429254463475179e-06,
|
|
"loss": 0.27729907631874084,
|
|
"step": 3662
|
|
},
|
|
{
|
|
"epoch": 2.3316359007001908,
|
|
"grad_norm": 10.841564476697524,
|
|
"learning_rate": 1.4403234702639418e-06,
|
|
"loss": 0.5381125211715698,
|
|
"step": 3663
|
|
},
|
|
{
|
|
"epoch": 2.332272437937619,
|
|
"grad_norm": 14.675376700233853,
|
|
"learning_rate": 1.4377234475017987e-06,
|
|
"loss": 0.738660991191864,
|
|
"step": 3664
|
|
},
|
|
{
|
|
"epoch": 2.3329089751750476,
|
|
"grad_norm": 9.306934376497246,
|
|
"learning_rate": 1.4351253794878184e-06,
|
|
"loss": 0.7385612726211548,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 2.333545512412476,
|
|
"grad_norm": 10.892424385174278,
|
|
"learning_rate": 1.4325292676476471e-06,
|
|
"loss": 0.20372740924358368,
|
|
"step": 3666
|
|
},
|
|
{
|
|
"epoch": 2.3341820496499044,
|
|
"grad_norm": 10.257811251772715,
|
|
"learning_rate": 1.4299351134058686e-06,
|
|
"loss": 0.5008155703544617,
|
|
"step": 3667
|
|
},
|
|
{
|
|
"epoch": 2.334818586887333,
|
|
"grad_norm": 13.887540797057827,
|
|
"learning_rate": 1.4273429181859861e-06,
|
|
"loss": 0.3023715913295746,
|
|
"step": 3668
|
|
},
|
|
{
|
|
"epoch": 2.335455124124761,
|
|
"grad_norm": 8.847549201264535,
|
|
"learning_rate": 1.4247526834104303e-06,
|
|
"loss": 0.3329947888851166,
|
|
"step": 3669
|
|
},
|
|
{
|
|
"epoch": 2.3360916613621896,
|
|
"grad_norm": 5.95348852151552,
|
|
"learning_rate": 1.4221644105005528e-06,
|
|
"loss": 0.29144102334976196,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 2.336728198599618,
|
|
"grad_norm": 14.170395427490837,
|
|
"learning_rate": 1.4195781008766345e-06,
|
|
"loss": 0.6279107332229614,
|
|
"step": 3671
|
|
},
|
|
{
|
|
"epoch": 2.3373647358370464,
|
|
"grad_norm": 7.372939737730689,
|
|
"learning_rate": 1.416993755957875e-06,
|
|
"loss": 0.215074822306633,
|
|
"step": 3672
|
|
},
|
|
{
|
|
"epoch": 2.338001273074475,
|
|
"grad_norm": 12.742643327109723,
|
|
"learning_rate": 1.4144113771623957e-06,
|
|
"loss": 0.496043860912323,
|
|
"step": 3673
|
|
},
|
|
{
|
|
"epoch": 2.338637810311903,
|
|
"grad_norm": 10.232372103690514,
|
|
"learning_rate": 1.4118309659072387e-06,
|
|
"loss": 0.9400693774223328,
|
|
"step": 3674
|
|
},
|
|
{
|
|
"epoch": 2.3392743475493316,
|
|
"grad_norm": 9.038286266911594,
|
|
"learning_rate": 1.4092525236083721e-06,
|
|
"loss": 0.42702949047088623,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 2.33991088478676,
|
|
"grad_norm": 15.18231475470037,
|
|
"learning_rate": 1.406676051680675e-06,
|
|
"loss": 0.3502260446548462,
|
|
"step": 3676
|
|
},
|
|
{
|
|
"epoch": 2.3405474220241884,
|
|
"grad_norm": 21.9055595938898,
|
|
"learning_rate": 1.4041015515379486e-06,
|
|
"loss": 1.3659839630126953,
|
|
"step": 3677
|
|
},
|
|
{
|
|
"epoch": 2.341183959261617,
|
|
"grad_norm": 9.562326576124976,
|
|
"learning_rate": 1.401529024592918e-06,
|
|
"loss": 0.4016745090484619,
|
|
"step": 3678
|
|
},
|
|
{
|
|
"epoch": 2.3418204964990452,
|
|
"grad_norm": 8.122059425645102,
|
|
"learning_rate": 1.3989584722572143e-06,
|
|
"loss": 0.2181989550590515,
|
|
"step": 3679
|
|
},
|
|
{
|
|
"epoch": 2.3424570337364736,
|
|
"grad_norm": 8.775741385145263,
|
|
"learning_rate": 1.3963898959413958e-06,
|
|
"loss": 0.5251392126083374,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 2.343093570973902,
|
|
"grad_norm": 12.806934367488545,
|
|
"learning_rate": 1.393823297054931e-06,
|
|
"loss": 0.3657844066619873,
|
|
"step": 3681
|
|
},
|
|
{
|
|
"epoch": 2.3437301082113304,
|
|
"grad_norm": 11.410244101635918,
|
|
"learning_rate": 1.3912586770062024e-06,
|
|
"loss": 0.6960375905036926,
|
|
"step": 3682
|
|
},
|
|
{
|
|
"epoch": 2.344366645448759,
|
|
"grad_norm": 9.2192755074986,
|
|
"learning_rate": 1.3886960372025093e-06,
|
|
"loss": 0.5058915019035339,
|
|
"step": 3683
|
|
},
|
|
{
|
|
"epoch": 2.3450031826861872,
|
|
"grad_norm": 12.820251241137715,
|
|
"learning_rate": 1.3861353790500648e-06,
|
|
"loss": 0.48139941692352295,
|
|
"step": 3684
|
|
},
|
|
{
|
|
"epoch": 2.3456397199236156,
|
|
"grad_norm": 9.015032547221407,
|
|
"learning_rate": 1.3835767039539927e-06,
|
|
"loss": 0.39420944452285767,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 2.346276257161044,
|
|
"grad_norm": 11.20665558473499,
|
|
"learning_rate": 1.3810200133183294e-06,
|
|
"loss": 0.2258508801460266,
|
|
"step": 3686
|
|
},
|
|
{
|
|
"epoch": 2.3469127943984724,
|
|
"grad_norm": 16.92148946423977,
|
|
"learning_rate": 1.3784653085460198e-06,
|
|
"loss": 0.7787349224090576,
|
|
"step": 3687
|
|
},
|
|
{
|
|
"epoch": 2.347549331635901,
|
|
"grad_norm": 9.930702156235032,
|
|
"learning_rate": 1.3759125910389265e-06,
|
|
"loss": 0.4799356460571289,
|
|
"step": 3688
|
|
},
|
|
{
|
|
"epoch": 2.3481858688733293,
|
|
"grad_norm": 9.677716675408867,
|
|
"learning_rate": 1.3733618621978106e-06,
|
|
"loss": 0.8267190456390381,
|
|
"step": 3689
|
|
},
|
|
{
|
|
"epoch": 2.3488224061107577,
|
|
"grad_norm": 10.166112821165283,
|
|
"learning_rate": 1.3708131234223526e-06,
|
|
"loss": 0.39371243119239807,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 2.349458943348186,
|
|
"grad_norm": 10.575227773552253,
|
|
"learning_rate": 1.3682663761111348e-06,
|
|
"loss": 0.26244211196899414,
|
|
"step": 3691
|
|
},
|
|
{
|
|
"epoch": 2.3500954805856145,
|
|
"grad_norm": 9.545223561877174,
|
|
"learning_rate": 1.3657216216616476e-06,
|
|
"loss": 0.35921522974967957,
|
|
"step": 3692
|
|
},
|
|
{
|
|
"epoch": 2.3507320178230424,
|
|
"grad_norm": 15.089034021230734,
|
|
"learning_rate": 1.3631788614702896e-06,
|
|
"loss": 0.3103064000606537,
|
|
"step": 3693
|
|
},
|
|
{
|
|
"epoch": 2.3513685550604713,
|
|
"grad_norm": 12.757867285846213,
|
|
"learning_rate": 1.3606380969323625e-06,
|
|
"loss": 0.8469514846801758,
|
|
"step": 3694
|
|
},
|
|
{
|
|
"epoch": 2.3520050922978992,
|
|
"grad_norm": 10.219037032437837,
|
|
"learning_rate": 1.3580993294420775e-06,
|
|
"loss": 0.25363725423812866,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 2.3526416295353276,
|
|
"grad_norm": 9.395656924052952,
|
|
"learning_rate": 1.355562560392542e-06,
|
|
"loss": 0.2070430964231491,
|
|
"step": 3696
|
|
},
|
|
{
|
|
"epoch": 2.353278166772756,
|
|
"grad_norm": 13.838000505289177,
|
|
"learning_rate": 1.3530277911757756e-06,
|
|
"loss": 0.3234326243400574,
|
|
"step": 3697
|
|
},
|
|
{
|
|
"epoch": 2.3539147040101844,
|
|
"grad_norm": 8.125871300091289,
|
|
"learning_rate": 1.3504950231826958e-06,
|
|
"loss": 0.40520572662353516,
|
|
"step": 3698
|
|
},
|
|
{
|
|
"epoch": 2.354551241247613,
|
|
"grad_norm": 9.23480525202698,
|
|
"learning_rate": 1.3479642578031216e-06,
|
|
"loss": 0.22022618353366852,
|
|
"step": 3699
|
|
},
|
|
{
|
|
"epoch": 2.3551877784850412,
|
|
"grad_norm": 14.856183006106253,
|
|
"learning_rate": 1.3454354964257737e-06,
|
|
"loss": 0.5824393033981323,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 2.3558243157224696,
|
|
"grad_norm": 9.315125647702207,
|
|
"learning_rate": 1.3429087404382773e-06,
|
|
"loss": 0.44092607498168945,
|
|
"step": 3701
|
|
},
|
|
{
|
|
"epoch": 2.356460852959898,
|
|
"grad_norm": 13.949875737013524,
|
|
"learning_rate": 1.3403839912271482e-06,
|
|
"loss": 1.0444612503051758,
|
|
"step": 3702
|
|
},
|
|
{
|
|
"epoch": 2.3570973901973264,
|
|
"grad_norm": 14.252694461991577,
|
|
"learning_rate": 1.337861250177811e-06,
|
|
"loss": 0.6289519667625427,
|
|
"step": 3703
|
|
},
|
|
{
|
|
"epoch": 2.357733927434755,
|
|
"grad_norm": 10.892049820100306,
|
|
"learning_rate": 1.335340518674581e-06,
|
|
"loss": 0.4143725335597992,
|
|
"step": 3704
|
|
},
|
|
{
|
|
"epoch": 2.3583704646721833,
|
|
"grad_norm": 7.999663674342672,
|
|
"learning_rate": 1.3328217981006742e-06,
|
|
"loss": 0.28366735577583313,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 2.3590070019096117,
|
|
"grad_norm": 7.264284970868845,
|
|
"learning_rate": 1.3303050898382015e-06,
|
|
"loss": 0.26763850450515747,
|
|
"step": 3706
|
|
},
|
|
{
|
|
"epoch": 2.35964353914704,
|
|
"grad_norm": 8.267938251337856,
|
|
"learning_rate": 1.327790395268172e-06,
|
|
"loss": 0.29509538412094116,
|
|
"step": 3707
|
|
},
|
|
{
|
|
"epoch": 2.3602800763844685,
|
|
"grad_norm": 17.361067481080763,
|
|
"learning_rate": 1.3252777157704876e-06,
|
|
"loss": 0.35680949687957764,
|
|
"step": 3708
|
|
},
|
|
{
|
|
"epoch": 2.360916613621897,
|
|
"grad_norm": 10.301732022860866,
|
|
"learning_rate": 1.3227670527239455e-06,
|
|
"loss": 0.30002152919769287,
|
|
"step": 3709
|
|
},
|
|
{
|
|
"epoch": 2.3615531508593253,
|
|
"grad_norm": 14.611437530806262,
|
|
"learning_rate": 1.320258407506236e-06,
|
|
"loss": 0.4250519871711731,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 2.3621896880967537,
|
|
"grad_norm": 12.930521859583482,
|
|
"learning_rate": 1.3177517814939411e-06,
|
|
"loss": 0.5760447978973389,
|
|
"step": 3711
|
|
},
|
|
{
|
|
"epoch": 2.362826225334182,
|
|
"grad_norm": 10.870235332190978,
|
|
"learning_rate": 1.3152471760625368e-06,
|
|
"loss": 0.2658051550388336,
|
|
"step": 3712
|
|
},
|
|
{
|
|
"epoch": 2.3634627625716105,
|
|
"grad_norm": 7.405506116873889,
|
|
"learning_rate": 1.3127445925863875e-06,
|
|
"loss": 0.23447053134441376,
|
|
"step": 3713
|
|
},
|
|
{
|
|
"epoch": 2.364099299809039,
|
|
"grad_norm": 16.75744131230429,
|
|
"learning_rate": 1.3102440324387534e-06,
|
|
"loss": 0.5982019901275635,
|
|
"step": 3714
|
|
},
|
|
{
|
|
"epoch": 2.3647358370464673,
|
|
"grad_norm": 10.673282715242074,
|
|
"learning_rate": 1.307745496991779e-06,
|
|
"loss": 0.5700613260269165,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 2.3653723742838957,
|
|
"grad_norm": 8.21014487686139,
|
|
"learning_rate": 1.3052489876165004e-06,
|
|
"loss": 0.41640716791152954,
|
|
"step": 3716
|
|
},
|
|
{
|
|
"epoch": 2.366008911521324,
|
|
"grad_norm": 9.297449050392139,
|
|
"learning_rate": 1.3027545056828395e-06,
|
|
"loss": 0.34303605556488037,
|
|
"step": 3717
|
|
},
|
|
{
|
|
"epoch": 2.3666454487587525,
|
|
"grad_norm": 8.899360190569867,
|
|
"learning_rate": 1.3002620525596116e-06,
|
|
"loss": 0.4346969425678253,
|
|
"step": 3718
|
|
},
|
|
{
|
|
"epoch": 2.367281985996181,
|
|
"grad_norm": 8.992667699431827,
|
|
"learning_rate": 1.2977716296145093e-06,
|
|
"loss": 0.4370039999485016,
|
|
"step": 3719
|
|
},
|
|
{
|
|
"epoch": 2.3679185232336093,
|
|
"grad_norm": 11.313717968304134,
|
|
"learning_rate": 1.2952832382141207e-06,
|
|
"loss": 0.47830891609191895,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 2.3685550604710377,
|
|
"grad_norm": 10.377423833062291,
|
|
"learning_rate": 1.2927968797239133e-06,
|
|
"loss": 0.36754053831100464,
|
|
"step": 3721
|
|
},
|
|
{
|
|
"epoch": 2.369191597708466,
|
|
"grad_norm": 12.771258128785176,
|
|
"learning_rate": 1.2903125555082402e-06,
|
|
"loss": 0.35375094413757324,
|
|
"step": 3722
|
|
},
|
|
{
|
|
"epoch": 2.3698281349458945,
|
|
"grad_norm": 9.387597548109925,
|
|
"learning_rate": 1.2878302669303377e-06,
|
|
"loss": 0.7419187426567078,
|
|
"step": 3723
|
|
},
|
|
{
|
|
"epoch": 2.370464672183323,
|
|
"grad_norm": 14.36263963222831,
|
|
"learning_rate": 1.2853500153523308e-06,
|
|
"loss": 0.433469295501709,
|
|
"step": 3724
|
|
},
|
|
{
|
|
"epoch": 2.3711012094207513,
|
|
"grad_norm": 15.116553305119123,
|
|
"learning_rate": 1.2828718021352155e-06,
|
|
"loss": 0.8447028398513794,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 2.3717377466581793,
|
|
"grad_norm": 16.418494635265372,
|
|
"learning_rate": 1.2803956286388808e-06,
|
|
"loss": 0.2990822196006775,
|
|
"step": 3726
|
|
},
|
|
{
|
|
"epoch": 2.372374283895608,
|
|
"grad_norm": 12.237072180575987,
|
|
"learning_rate": 1.2779214962220888e-06,
|
|
"loss": 0.6836670637130737,
|
|
"step": 3727
|
|
},
|
|
{
|
|
"epoch": 2.373010821133036,
|
|
"grad_norm": 12.27843950813826,
|
|
"learning_rate": 1.2754494062424854e-06,
|
|
"loss": 0.403628408908844,
|
|
"step": 3728
|
|
},
|
|
{
|
|
"epoch": 2.373647358370465,
|
|
"grad_norm": 11.134214718224426,
|
|
"learning_rate": 1.2729793600565937e-06,
|
|
"loss": 0.38044747710227966,
|
|
"step": 3729
|
|
},
|
|
{
|
|
"epoch": 2.374283895607893,
|
|
"grad_norm": 15.165916020979232,
|
|
"learning_rate": 1.2705113590198155e-06,
|
|
"loss": 1.098738670349121,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 2.3749204328453213,
|
|
"grad_norm": 8.261495854160689,
|
|
"learning_rate": 1.2680454044864342e-06,
|
|
"loss": 0.2793627977371216,
|
|
"step": 3731
|
|
},
|
|
{
|
|
"epoch": 2.3755569700827497,
|
|
"grad_norm": 7.863877940829437,
|
|
"learning_rate": 1.265581497809602e-06,
|
|
"loss": 0.3842894434928894,
|
|
"step": 3732
|
|
},
|
|
{
|
|
"epoch": 2.376193507320178,
|
|
"grad_norm": 11.758657961547582,
|
|
"learning_rate": 1.2631196403413565e-06,
|
|
"loss": 1.5793788433074951,
|
|
"step": 3733
|
|
},
|
|
{
|
|
"epoch": 2.3768300445576065,
|
|
"grad_norm": 17.24367253985176,
|
|
"learning_rate": 1.2606598334326049e-06,
|
|
"loss": 0.8001893162727356,
|
|
"step": 3734
|
|
},
|
|
{
|
|
"epoch": 2.377466581795035,
|
|
"grad_norm": 15.980789517537973,
|
|
"learning_rate": 1.2582020784331318e-06,
|
|
"loss": 0.6164204478263855,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 2.3781031190324633,
|
|
"grad_norm": 15.81210521828585,
|
|
"learning_rate": 1.2557463766915922e-06,
|
|
"loss": 0.4665430784225464,
|
|
"step": 3736
|
|
},
|
|
{
|
|
"epoch": 2.3787396562698917,
|
|
"grad_norm": 10.837136433926592,
|
|
"learning_rate": 1.2532927295555214e-06,
|
|
"loss": 0.31216520071029663,
|
|
"step": 3737
|
|
},
|
|
{
|
|
"epoch": 2.37937619350732,
|
|
"grad_norm": 15.59644961979668,
|
|
"learning_rate": 1.250841138371321e-06,
|
|
"loss": 0.23173931241035461,
|
|
"step": 3738
|
|
},
|
|
{
|
|
"epoch": 2.3800127307447485,
|
|
"grad_norm": 27.273050075745154,
|
|
"learning_rate": 1.2483916044842665e-06,
|
|
"loss": 0.5167461037635803,
|
|
"step": 3739
|
|
},
|
|
{
|
|
"epoch": 2.380649267982177,
|
|
"grad_norm": 8.845762808947617,
|
|
"learning_rate": 1.2459441292385049e-06,
|
|
"loss": 0.5043585300445557,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 2.3812858052196053,
|
|
"grad_norm": 9.78578565378147,
|
|
"learning_rate": 1.2434987139770522e-06,
|
|
"loss": 0.3009123206138611,
|
|
"step": 3741
|
|
},
|
|
{
|
|
"epoch": 2.3819223424570337,
|
|
"grad_norm": 10.419282093700664,
|
|
"learning_rate": 1.2410553600417946e-06,
|
|
"loss": 1.1111235618591309,
|
|
"step": 3742
|
|
},
|
|
{
|
|
"epoch": 2.382558879694462,
|
|
"grad_norm": 12.761737956411759,
|
|
"learning_rate": 1.2386140687734898e-06,
|
|
"loss": 1.0053675174713135,
|
|
"step": 3743
|
|
},
|
|
{
|
|
"epoch": 2.3831954169318905,
|
|
"grad_norm": 7.132611172936865,
|
|
"learning_rate": 1.2361748415117619e-06,
|
|
"loss": 0.28074517846107483,
|
|
"step": 3744
|
|
},
|
|
{
|
|
"epoch": 2.383831954169319,
|
|
"grad_norm": 12.054551595891324,
|
|
"learning_rate": 1.2337376795950967e-06,
|
|
"loss": 0.26301613450050354,
|
|
"step": 3745
|
|
},
|
|
{
|
|
"epoch": 2.3844684914067473,
|
|
"grad_norm": 21.629165559579967,
|
|
"learning_rate": 1.2313025843608566e-06,
|
|
"loss": 0.4631958603858948,
|
|
"step": 3746
|
|
},
|
|
{
|
|
"epoch": 2.3851050286441757,
|
|
"grad_norm": 11.758611509943327,
|
|
"learning_rate": 1.2288695571452636e-06,
|
|
"loss": 0.5927736163139343,
|
|
"step": 3747
|
|
},
|
|
{
|
|
"epoch": 2.385741565881604,
|
|
"grad_norm": 10.403057055436687,
|
|
"learning_rate": 1.2264385992834072e-06,
|
|
"loss": 0.4360220432281494,
|
|
"step": 3748
|
|
},
|
|
{
|
|
"epoch": 2.3863781031190325,
|
|
"grad_norm": 13.989298831570544,
|
|
"learning_rate": 1.2240097121092382e-06,
|
|
"loss": 0.23291362822055817,
|
|
"step": 3749
|
|
},
|
|
{
|
|
"epoch": 2.387014640356461,
|
|
"grad_norm": 18.028698512975225,
|
|
"learning_rate": 1.2215828969555771e-06,
|
|
"loss": 0.2985227108001709,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 2.3876511775938893,
|
|
"grad_norm": 10.613505530236429,
|
|
"learning_rate": 1.219158155154102e-06,
|
|
"loss": 0.44736921787261963,
|
|
"step": 3751
|
|
},
|
|
{
|
|
"epoch": 2.3882877148313177,
|
|
"grad_norm": 12.855100341515485,
|
|
"learning_rate": 1.216735488035356e-06,
|
|
"loss": 0.44182783365249634,
|
|
"step": 3752
|
|
},
|
|
{
|
|
"epoch": 2.388924252068746,
|
|
"grad_norm": 10.414754482940225,
|
|
"learning_rate": 1.2143148969287405e-06,
|
|
"loss": 0.4356725215911865,
|
|
"step": 3753
|
|
},
|
|
{
|
|
"epoch": 2.3895607893061745,
|
|
"grad_norm": 8.769305233582422,
|
|
"learning_rate": 1.2118963831625252e-06,
|
|
"loss": 0.43675944209098816,
|
|
"step": 3754
|
|
},
|
|
{
|
|
"epoch": 2.390197326543603,
|
|
"grad_norm": 14.660117839785551,
|
|
"learning_rate": 1.2094799480638287e-06,
|
|
"loss": 0.7107505798339844,
|
|
"step": 3755
|
|
},
|
|
{
|
|
"epoch": 2.3908338637810314,
|
|
"grad_norm": 11.421187401800076,
|
|
"learning_rate": 1.2070655929586395e-06,
|
|
"loss": 0.4475456476211548,
|
|
"step": 3756
|
|
},
|
|
{
|
|
"epoch": 2.3914704010184598,
|
|
"grad_norm": 9.641057919738367,
|
|
"learning_rate": 1.2046533191717985e-06,
|
|
"loss": 0.5334263443946838,
|
|
"step": 3757
|
|
},
|
|
{
|
|
"epoch": 2.392106938255888,
|
|
"grad_norm": 9.625328248093474,
|
|
"learning_rate": 1.2022431280270075e-06,
|
|
"loss": 0.4311234652996063,
|
|
"step": 3758
|
|
},
|
|
{
|
|
"epoch": 2.392743475493316,
|
|
"grad_norm": 16.962406680330254,
|
|
"learning_rate": 1.1998350208468217e-06,
|
|
"loss": 0.8091346621513367,
|
|
"step": 3759
|
|
},
|
|
{
|
|
"epoch": 2.393380012730745,
|
|
"grad_norm": 13.138031971976169,
|
|
"learning_rate": 1.197428998952659e-06,
|
|
"loss": 0.5169646143913269,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 2.394016549968173,
|
|
"grad_norm": 22.08801596951749,
|
|
"learning_rate": 1.1950250636647887e-06,
|
|
"loss": 0.6854633092880249,
|
|
"step": 3761
|
|
},
|
|
{
|
|
"epoch": 2.3946530872056018,
|
|
"grad_norm": 11.698573413964109,
|
|
"learning_rate": 1.192623216302332e-06,
|
|
"loss": 0.44086697697639465,
|
|
"step": 3762
|
|
},
|
|
{
|
|
"epoch": 2.3952896244430297,
|
|
"grad_norm": 15.886156214297149,
|
|
"learning_rate": 1.1902234581832723e-06,
|
|
"loss": 0.6274378895759583,
|
|
"step": 3763
|
|
},
|
|
{
|
|
"epoch": 2.395926161680458,
|
|
"grad_norm": 12.206434564474439,
|
|
"learning_rate": 1.1878257906244412e-06,
|
|
"loss": 0.2722568213939667,
|
|
"step": 3764
|
|
},
|
|
{
|
|
"epoch": 2.3965626989178865,
|
|
"grad_norm": 12.071911618002728,
|
|
"learning_rate": 1.1854302149415242e-06,
|
|
"loss": 0.7645981907844543,
|
|
"step": 3765
|
|
},
|
|
{
|
|
"epoch": 2.397199236155315,
|
|
"grad_norm": 16.724975946030447,
|
|
"learning_rate": 1.1830367324490577e-06,
|
|
"loss": 1.381920576095581,
|
|
"step": 3766
|
|
},
|
|
{
|
|
"epoch": 2.3978357733927433,
|
|
"grad_norm": 13.074704894380814,
|
|
"learning_rate": 1.1806453444604354e-06,
|
|
"loss": 0.23027241230010986,
|
|
"step": 3767
|
|
},
|
|
{
|
|
"epoch": 2.3984723106301717,
|
|
"grad_norm": 7.501926380582122,
|
|
"learning_rate": 1.178256052287891e-06,
|
|
"loss": 0.773628830909729,
|
|
"step": 3768
|
|
},
|
|
{
|
|
"epoch": 2.3991088478676,
|
|
"grad_norm": 10.976355752611195,
|
|
"learning_rate": 1.1758688572425191e-06,
|
|
"loss": 0.28537803888320923,
|
|
"step": 3769
|
|
},
|
|
{
|
|
"epoch": 2.3997453851050286,
|
|
"grad_norm": 15.216087889854332,
|
|
"learning_rate": 1.173483760634257e-06,
|
|
"loss": 0.3113643527030945,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 2.400381922342457,
|
|
"grad_norm": 9.511957746561064,
|
|
"learning_rate": 1.1711007637718925e-06,
|
|
"loss": 0.25821590423583984,
|
|
"step": 3771
|
|
},
|
|
{
|
|
"epoch": 2.4010184595798854,
|
|
"grad_norm": 13.948635189961193,
|
|
"learning_rate": 1.1687198679630586e-06,
|
|
"loss": 0.37554019689559937,
|
|
"step": 3772
|
|
},
|
|
{
|
|
"epoch": 2.4016549968173138,
|
|
"grad_norm": 9.362461620607593,
|
|
"learning_rate": 1.1663410745142416e-06,
|
|
"loss": 0.2878197133541107,
|
|
"step": 3773
|
|
},
|
|
{
|
|
"epoch": 2.402291534054742,
|
|
"grad_norm": 9.39243982324823,
|
|
"learning_rate": 1.1639643847307685e-06,
|
|
"loss": 0.3661644160747528,
|
|
"step": 3774
|
|
},
|
|
{
|
|
"epoch": 2.4029280712921706,
|
|
"grad_norm": 10.569202056049091,
|
|
"learning_rate": 1.161589799916814e-06,
|
|
"loss": 0.588375449180603,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 2.403564608529599,
|
|
"grad_norm": 9.96661944037226,
|
|
"learning_rate": 1.159217321375396e-06,
|
|
"loss": 0.5742862224578857,
|
|
"step": 3776
|
|
},
|
|
{
|
|
"epoch": 2.4042011457670274,
|
|
"grad_norm": 22.19818033196068,
|
|
"learning_rate": 1.1568469504083818e-06,
|
|
"loss": 0.49878042936325073,
|
|
"step": 3777
|
|
},
|
|
{
|
|
"epoch": 2.4048376830044558,
|
|
"grad_norm": 17.590500198044325,
|
|
"learning_rate": 1.1544786883164743e-06,
|
|
"loss": 0.4632866382598877,
|
|
"step": 3778
|
|
},
|
|
{
|
|
"epoch": 2.405474220241884,
|
|
"grad_norm": 14.108065744139283,
|
|
"learning_rate": 1.152112536399224e-06,
|
|
"loss": 0.38641777634620667,
|
|
"step": 3779
|
|
},
|
|
{
|
|
"epoch": 2.4061107574793126,
|
|
"grad_norm": 8.107163245572407,
|
|
"learning_rate": 1.1497484959550254e-06,
|
|
"loss": 0.35480377078056335,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 2.406747294716741,
|
|
"grad_norm": 10.467818303600838,
|
|
"learning_rate": 1.1473865682811097e-06,
|
|
"loss": 0.4747023582458496,
|
|
"step": 3781
|
|
},
|
|
{
|
|
"epoch": 2.4073838319541694,
|
|
"grad_norm": 12.019573075340698,
|
|
"learning_rate": 1.1450267546735516e-06,
|
|
"loss": 0.8633090257644653,
|
|
"step": 3782
|
|
},
|
|
{
|
|
"epoch": 2.408020369191598,
|
|
"grad_norm": 7.77339547449022,
|
|
"learning_rate": 1.1426690564272647e-06,
|
|
"loss": 0.5159689784049988,
|
|
"step": 3783
|
|
},
|
|
{
|
|
"epoch": 2.408656906429026,
|
|
"grad_norm": 12.864002851351529,
|
|
"learning_rate": 1.1403134748360023e-06,
|
|
"loss": 0.561131477355957,
|
|
"step": 3784
|
|
},
|
|
{
|
|
"epoch": 2.4092934436664546,
|
|
"grad_norm": 9.417541984936822,
|
|
"learning_rate": 1.1379600111923538e-06,
|
|
"loss": 0.14377526938915253,
|
|
"step": 3785
|
|
},
|
|
{
|
|
"epoch": 2.409929980903883,
|
|
"grad_norm": 14.314802212997668,
|
|
"learning_rate": 1.1356086667877526e-06,
|
|
"loss": 0.7224687337875366,
|
|
"step": 3786
|
|
},
|
|
{
|
|
"epoch": 2.4105665181413114,
|
|
"grad_norm": 11.384630848147092,
|
|
"learning_rate": 1.1332594429124633e-06,
|
|
"loss": 0.20363056659698486,
|
|
"step": 3787
|
|
},
|
|
{
|
|
"epoch": 2.41120305537874,
|
|
"grad_norm": 9.607657429807452,
|
|
"learning_rate": 1.1309123408555883e-06,
|
|
"loss": 0.5656764507293701,
|
|
"step": 3788
|
|
},
|
|
{
|
|
"epoch": 2.411839592616168,
|
|
"grad_norm": 9.635754273335122,
|
|
"learning_rate": 1.1285673619050657e-06,
|
|
"loss": 0.3478263318538666,
|
|
"step": 3789
|
|
},
|
|
{
|
|
"epoch": 2.4124761298535966,
|
|
"grad_norm": 8.287801158986275,
|
|
"learning_rate": 1.1262245073476725e-06,
|
|
"loss": 0.3498222231864929,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 2.413112667091025,
|
|
"grad_norm": 10.369047085113845,
|
|
"learning_rate": 1.123883778469011e-06,
|
|
"loss": 0.40166744589805603,
|
|
"step": 3791
|
|
},
|
|
{
|
|
"epoch": 2.413749204328453,
|
|
"grad_norm": 10.61015162872025,
|
|
"learning_rate": 1.1215451765535273e-06,
|
|
"loss": 0.3910064697265625,
|
|
"step": 3792
|
|
},
|
|
{
|
|
"epoch": 2.414385741565882,
|
|
"grad_norm": 9.242009512504998,
|
|
"learning_rate": 1.1192087028844945e-06,
|
|
"loss": 0.7114223837852478,
|
|
"step": 3793
|
|
},
|
|
{
|
|
"epoch": 2.4150222788033098,
|
|
"grad_norm": 16.541659582866036,
|
|
"learning_rate": 1.1168743587440179e-06,
|
|
"loss": 1.0183207988739014,
|
|
"step": 3794
|
|
},
|
|
{
|
|
"epoch": 2.4156588160407386,
|
|
"grad_norm": 14.033990169863475,
|
|
"learning_rate": 1.1145421454130361e-06,
|
|
"loss": 0.44376140832901,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 2.4162953532781666,
|
|
"grad_norm": 11.656556714232906,
|
|
"learning_rate": 1.1122120641713159e-06,
|
|
"loss": 0.25253862142562866,
|
|
"step": 3796
|
|
},
|
|
{
|
|
"epoch": 2.416931890515595,
|
|
"grad_norm": 12.314637203602851,
|
|
"learning_rate": 1.1098841162974605e-06,
|
|
"loss": 0.4938369393348694,
|
|
"step": 3797
|
|
},
|
|
{
|
|
"epoch": 2.4175684277530234,
|
|
"grad_norm": 12.14700600555856,
|
|
"learning_rate": 1.1075583030688924e-06,
|
|
"loss": 0.28885698318481445,
|
|
"step": 3798
|
|
},
|
|
{
|
|
"epoch": 2.418204964990452,
|
|
"grad_norm": 11.805020765397705,
|
|
"learning_rate": 1.1052346257618724e-06,
|
|
"loss": 0.3935962915420532,
|
|
"step": 3799
|
|
},
|
|
{
|
|
"epoch": 2.41884150222788,
|
|
"grad_norm": 14.522669527435724,
|
|
"learning_rate": 1.1029130856514835e-06,
|
|
"loss": 0.2798091769218445,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 2.4194780394653086,
|
|
"grad_norm": 17.99675844279146,
|
|
"learning_rate": 1.1005936840116377e-06,
|
|
"loss": 2.819949150085449,
|
|
"step": 3801
|
|
},
|
|
{
|
|
"epoch": 2.420114576702737,
|
|
"grad_norm": 14.060814447743827,
|
|
"learning_rate": 1.0982764221150715e-06,
|
|
"loss": 0.412301629781723,
|
|
"step": 3802
|
|
},
|
|
{
|
|
"epoch": 2.4207511139401654,
|
|
"grad_norm": 11.723081349594906,
|
|
"learning_rate": 1.0959613012333526e-06,
|
|
"loss": 0.25898265838623047,
|
|
"step": 3803
|
|
},
|
|
{
|
|
"epoch": 2.421387651177594,
|
|
"grad_norm": 7.855819697294254,
|
|
"learning_rate": 1.0936483226368684e-06,
|
|
"loss": 0.16626277565956116,
|
|
"step": 3804
|
|
},
|
|
{
|
|
"epoch": 2.422024188415022,
|
|
"grad_norm": 8.328223383028789,
|
|
"learning_rate": 1.0913374875948329e-06,
|
|
"loss": 0.28895729780197144,
|
|
"step": 3805
|
|
},
|
|
{
|
|
"epoch": 2.4226607256524506,
|
|
"grad_norm": 10.557735560596633,
|
|
"learning_rate": 1.0890287973752845e-06,
|
|
"loss": 0.5577061176300049,
|
|
"step": 3806
|
|
},
|
|
{
|
|
"epoch": 2.423297262889879,
|
|
"grad_norm": 8.996233245177725,
|
|
"learning_rate": 1.0867222532450823e-06,
|
|
"loss": 1.2347557544708252,
|
|
"step": 3807
|
|
},
|
|
{
|
|
"epoch": 2.4239338001273074,
|
|
"grad_norm": 14.016253040281782,
|
|
"learning_rate": 1.0844178564699092e-06,
|
|
"loss": 1.2900872230529785,
|
|
"step": 3808
|
|
},
|
|
{
|
|
"epoch": 2.424570337364736,
|
|
"grad_norm": 10.886693344909176,
|
|
"learning_rate": 1.082115608314272e-06,
|
|
"loss": 0.40640875697135925,
|
|
"step": 3809
|
|
},
|
|
{
|
|
"epoch": 2.425206874602164,
|
|
"grad_norm": 12.222734334172792,
|
|
"learning_rate": 1.0798155100414953e-06,
|
|
"loss": 0.427412748336792,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 2.4258434118395926,
|
|
"grad_norm": 8.353529674874979,
|
|
"learning_rate": 1.0775175629137252e-06,
|
|
"loss": 0.2718370258808136,
|
|
"step": 3811
|
|
},
|
|
{
|
|
"epoch": 2.426479949077021,
|
|
"grad_norm": 13.897497422723054,
|
|
"learning_rate": 1.0752217681919263e-06,
|
|
"loss": 0.5335069894790649,
|
|
"step": 3812
|
|
},
|
|
{
|
|
"epoch": 2.4271164863144494,
|
|
"grad_norm": 15.600924585359062,
|
|
"learning_rate": 1.0729281271358838e-06,
|
|
"loss": 0.4916003346443176,
|
|
"step": 3813
|
|
},
|
|
{
|
|
"epoch": 2.427753023551878,
|
|
"grad_norm": 8.757702880845267,
|
|
"learning_rate": 1.0706366410042006e-06,
|
|
"loss": 0.2865592837333679,
|
|
"step": 3814
|
|
},
|
|
{
|
|
"epoch": 2.4283895607893062,
|
|
"grad_norm": 12.820178121170283,
|
|
"learning_rate": 1.0683473110542946e-06,
|
|
"loss": 0.29609590768814087,
|
|
"step": 3815
|
|
},
|
|
{
|
|
"epoch": 2.4290260980267346,
|
|
"grad_norm": 7.820138710598236,
|
|
"learning_rate": 1.0660601385424063e-06,
|
|
"loss": 0.5924516916275024,
|
|
"step": 3816
|
|
},
|
|
{
|
|
"epoch": 2.429662635264163,
|
|
"grad_norm": 11.94479420786885,
|
|
"learning_rate": 1.0637751247235866e-06,
|
|
"loss": 0.7252217531204224,
|
|
"step": 3817
|
|
},
|
|
{
|
|
"epoch": 2.4302991725015914,
|
|
"grad_norm": 9.864533670982793,
|
|
"learning_rate": 1.061492270851705e-06,
|
|
"loss": 0.22076858580112457,
|
|
"step": 3818
|
|
},
|
|
{
|
|
"epoch": 2.43093570973902,
|
|
"grad_norm": 6.5697555037533775,
|
|
"learning_rate": 1.0592115781794427e-06,
|
|
"loss": 0.22088488936424255,
|
|
"step": 3819
|
|
},
|
|
{
|
|
"epoch": 2.4315722469764482,
|
|
"grad_norm": 11.366776949775208,
|
|
"learning_rate": 1.0569330479583019e-06,
|
|
"loss": 0.31297528743743896,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 2.4322087842138767,
|
|
"grad_norm": 14.907722172487633,
|
|
"learning_rate": 1.0546566814385866e-06,
|
|
"loss": 0.6428528428077698,
|
|
"step": 3821
|
|
},
|
|
{
|
|
"epoch": 2.432845321451305,
|
|
"grad_norm": 9.9467471887832,
|
|
"learning_rate": 1.052382479869426e-06,
|
|
"loss": 0.6574569940567017,
|
|
"step": 3822
|
|
},
|
|
{
|
|
"epoch": 2.4334818586887335,
|
|
"grad_norm": 17.2034045866165,
|
|
"learning_rate": 1.0501104444987536e-06,
|
|
"loss": 0.49759557843208313,
|
|
"step": 3823
|
|
},
|
|
{
|
|
"epoch": 2.434118395926162,
|
|
"grad_norm": 12.884996001729155,
|
|
"learning_rate": 1.0478405765733157e-06,
|
|
"loss": 0.33131590485572815,
|
|
"step": 3824
|
|
},
|
|
{
|
|
"epoch": 2.4347549331635903,
|
|
"grad_norm": 9.493850689262331,
|
|
"learning_rate": 1.0455728773386691e-06,
|
|
"loss": 0.675175130367279,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 2.4353914704010187,
|
|
"grad_norm": 17.20863386024244,
|
|
"learning_rate": 1.0433073480391848e-06,
|
|
"loss": 0.3058862090110779,
|
|
"step": 3826
|
|
},
|
|
{
|
|
"epoch": 2.4360280076384466,
|
|
"grad_norm": 10.466162475421772,
|
|
"learning_rate": 1.0410439899180347e-06,
|
|
"loss": 0.3168693482875824,
|
|
"step": 3827
|
|
},
|
|
{
|
|
"epoch": 2.4366645448758755,
|
|
"grad_norm": 13.166745994255525,
|
|
"learning_rate": 1.0387828042172072e-06,
|
|
"loss": 1.440507411956787,
|
|
"step": 3828
|
|
},
|
|
{
|
|
"epoch": 2.4373010821133034,
|
|
"grad_norm": 10.833093993085129,
|
|
"learning_rate": 1.0365237921774952e-06,
|
|
"loss": 0.30918723344802856,
|
|
"step": 3829
|
|
},
|
|
{
|
|
"epoch": 2.437937619350732,
|
|
"grad_norm": 11.116131242470681,
|
|
"learning_rate": 1.0342669550384982e-06,
|
|
"loss": 0.428306519985199,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 2.4385741565881602,
|
|
"grad_norm": 10.634908492128861,
|
|
"learning_rate": 1.032012294038624e-06,
|
|
"loss": 0.19637051224708557,
|
|
"step": 3831
|
|
},
|
|
{
|
|
"epoch": 2.4392106938255886,
|
|
"grad_norm": 9.013619391139507,
|
|
"learning_rate": 1.0297598104150836e-06,
|
|
"loss": 0.37704598903656006,
|
|
"step": 3832
|
|
},
|
|
{
|
|
"epoch": 2.439847231063017,
|
|
"grad_norm": 9.922329764110929,
|
|
"learning_rate": 1.0275095054038998e-06,
|
|
"loss": 0.25277867913246155,
|
|
"step": 3833
|
|
},
|
|
{
|
|
"epoch": 2.4404837683004454,
|
|
"grad_norm": 9.473928918102837,
|
|
"learning_rate": 1.0252613802398887e-06,
|
|
"loss": 0.5325944423675537,
|
|
"step": 3834
|
|
},
|
|
{
|
|
"epoch": 2.441120305537874,
|
|
"grad_norm": 11.327363252811761,
|
|
"learning_rate": 1.023015436156682e-06,
|
|
"loss": 0.3226351737976074,
|
|
"step": 3835
|
|
},
|
|
{
|
|
"epoch": 2.4417568427753022,
|
|
"grad_norm": 12.635263437424054,
|
|
"learning_rate": 1.020771674386707e-06,
|
|
"loss": 0.3378393352031708,
|
|
"step": 3836
|
|
},
|
|
{
|
|
"epoch": 2.4423933800127307,
|
|
"grad_norm": 8.004090298554114,
|
|
"learning_rate": 1.0185300961611965e-06,
|
|
"loss": 0.3406223654747009,
|
|
"step": 3837
|
|
},
|
|
{
|
|
"epoch": 2.443029917250159,
|
|
"grad_norm": 8.953996713880791,
|
|
"learning_rate": 1.0162907027101826e-06,
|
|
"loss": 0.5684438943862915,
|
|
"step": 3838
|
|
},
|
|
{
|
|
"epoch": 2.4436664544875875,
|
|
"grad_norm": 8.149317314762413,
|
|
"learning_rate": 1.014053495262503e-06,
|
|
"loss": 0.1832207292318344,
|
|
"step": 3839
|
|
},
|
|
{
|
|
"epoch": 2.444302991725016,
|
|
"grad_norm": 13.813658586031599,
|
|
"learning_rate": 1.011818475045792e-06,
|
|
"loss": 0.2514493465423584,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 2.4449395289624443,
|
|
"grad_norm": 9.074076999622344,
|
|
"learning_rate": 1.0095856432864847e-06,
|
|
"loss": 0.30189287662506104,
|
|
"step": 3841
|
|
},
|
|
{
|
|
"epoch": 2.4455760661998727,
|
|
"grad_norm": 13.241646581537935,
|
|
"learning_rate": 1.007355001209815e-06,
|
|
"loss": 0.4534422755241394,
|
|
"step": 3842
|
|
},
|
|
{
|
|
"epoch": 2.446212603437301,
|
|
"grad_norm": 6.572917170512304,
|
|
"learning_rate": 1.0051265500398183e-06,
|
|
"loss": 0.3295198678970337,
|
|
"step": 3843
|
|
},
|
|
{
|
|
"epoch": 2.4468491406747295,
|
|
"grad_norm": 10.286209203796565,
|
|
"learning_rate": 1.0029002909993207e-06,
|
|
"loss": 0.2722637951374054,
|
|
"step": 3844
|
|
},
|
|
{
|
|
"epoch": 2.447485677912158,
|
|
"grad_norm": 11.733757980427388,
|
|
"learning_rate": 1.0006762253099544e-06,
|
|
"loss": 0.42865797877311707,
|
|
"step": 3845
|
|
},
|
|
{
|
|
"epoch": 2.4481222151495863,
|
|
"grad_norm": 9.607900959176765,
|
|
"learning_rate": 9.984543541921414e-07,
|
|
"loss": 0.7750614285469055,
|
|
"step": 3846
|
|
},
|
|
{
|
|
"epoch": 2.4487587523870147,
|
|
"grad_norm": 14.099006354407399,
|
|
"learning_rate": 9.962346788651024e-07,
|
|
"loss": 0.3322318196296692,
|
|
"step": 3847
|
|
},
|
|
{
|
|
"epoch": 2.449395289624443,
|
|
"grad_norm": 9.773312345373958,
|
|
"learning_rate": 9.940172005468513e-07,
|
|
"loss": 0.2014181762933731,
|
|
"step": 3848
|
|
},
|
|
{
|
|
"epoch": 2.4500318268618715,
|
|
"grad_norm": 11.983172594501728,
|
|
"learning_rate": 9.918019204541979e-07,
|
|
"loss": 0.28286194801330566,
|
|
"step": 3849
|
|
},
|
|
{
|
|
"epoch": 2.4506683640993,
|
|
"grad_norm": 13.473466908382198,
|
|
"learning_rate": 9.89588839802746e-07,
|
|
"loss": 0.6655032634735107,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 2.4513049013367283,
|
|
"grad_norm": 6.089497199138919,
|
|
"learning_rate": 9.8737795980689e-07,
|
|
"loss": 0.15333639085292816,
|
|
"step": 3851
|
|
},
|
|
{
|
|
"epoch": 2.4519414385741567,
|
|
"grad_norm": 10.061578182572855,
|
|
"learning_rate": 9.851692816798214e-07,
|
|
"loss": 0.7303205132484436,
|
|
"step": 3852
|
|
},
|
|
{
|
|
"epoch": 2.452577975811585,
|
|
"grad_norm": 10.28287128520656,
|
|
"learning_rate": 9.829628066335196e-07,
|
|
"loss": 0.23664222657680511,
|
|
"step": 3853
|
|
},
|
|
{
|
|
"epoch": 2.4532145130490135,
|
|
"grad_norm": 11.125450201620936,
|
|
"learning_rate": 9.807585358787552e-07,
|
|
"loss": 0.7476584911346436,
|
|
"step": 3854
|
|
},
|
|
{
|
|
"epoch": 2.453851050286442,
|
|
"grad_norm": 14.15945730067247,
|
|
"learning_rate": 9.785564706250895e-07,
|
|
"loss": 0.39169731736183167,
|
|
"step": 3855
|
|
},
|
|
{
|
|
"epoch": 2.4544875875238703,
|
|
"grad_norm": 11.623734157450794,
|
|
"learning_rate": 9.763566120808788e-07,
|
|
"loss": 0.30617618560791016,
|
|
"step": 3856
|
|
},
|
|
{
|
|
"epoch": 2.4551241247612987,
|
|
"grad_norm": 17.45937790047464,
|
|
"learning_rate": 9.74158961453257e-07,
|
|
"loss": 0.501999020576477,
|
|
"step": 3857
|
|
},
|
|
{
|
|
"epoch": 2.455760661998727,
|
|
"grad_norm": 11.718341948276963,
|
|
"learning_rate": 9.719635199481586e-07,
|
|
"loss": 0.41996297240257263,
|
|
"step": 3858
|
|
},
|
|
{
|
|
"epoch": 2.4563971992361555,
|
|
"grad_norm": 9.199374022804443,
|
|
"learning_rate": 9.697702887702992e-07,
|
|
"loss": 0.5343721508979797,
|
|
"step": 3859
|
|
},
|
|
{
|
|
"epoch": 2.4570337364735835,
|
|
"grad_norm": 10.281772022429909,
|
|
"learning_rate": 9.675792691231829e-07,
|
|
"loss": 0.45031020045280457,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 2.4576702737110123,
|
|
"grad_norm": 11.539028460864573,
|
|
"learning_rate": 9.653904622090988e-07,
|
|
"loss": 0.7029082775115967,
|
|
"step": 3861
|
|
},
|
|
{
|
|
"epoch": 2.4583068109484403,
|
|
"grad_norm": 8.411805511544255,
|
|
"learning_rate": 9.632038692291257e-07,
|
|
"loss": 0.54490065574646,
|
|
"step": 3862
|
|
},
|
|
{
|
|
"epoch": 2.4589433481858687,
|
|
"grad_norm": 11.488121758279865,
|
|
"learning_rate": 9.61019491383125e-07,
|
|
"loss": 0.18793101608753204,
|
|
"step": 3863
|
|
},
|
|
{
|
|
"epoch": 2.459579885423297,
|
|
"grad_norm": 11.843548707182343,
|
|
"learning_rate": 9.588373298697396e-07,
|
|
"loss": 1.120024561882019,
|
|
"step": 3864
|
|
},
|
|
{
|
|
"epoch": 2.4602164226607255,
|
|
"grad_norm": 10.143041543062624,
|
|
"learning_rate": 9.56657385886403e-07,
|
|
"loss": 0.35088586807250977,
|
|
"step": 3865
|
|
},
|
|
{
|
|
"epoch": 2.460852959898154,
|
|
"grad_norm": 11.223033111281344,
|
|
"learning_rate": 9.544796606293267e-07,
|
|
"loss": 0.3293212056159973,
|
|
"step": 3866
|
|
},
|
|
{
|
|
"epoch": 2.4614894971355823,
|
|
"grad_norm": 8.76476499373512,
|
|
"learning_rate": 9.523041552935058e-07,
|
|
"loss": 0.41763240098953247,
|
|
"step": 3867
|
|
},
|
|
{
|
|
"epoch": 2.4621260343730107,
|
|
"grad_norm": 14.93547195452655,
|
|
"learning_rate": 9.501308710727169e-07,
|
|
"loss": 0.41176217794418335,
|
|
"step": 3868
|
|
},
|
|
{
|
|
"epoch": 2.462762571610439,
|
|
"grad_norm": 7.530577228729636,
|
|
"learning_rate": 9.479598091595221e-07,
|
|
"loss": 0.3675522208213806,
|
|
"step": 3869
|
|
},
|
|
{
|
|
"epoch": 2.4633991088478675,
|
|
"grad_norm": 8.908702620139834,
|
|
"learning_rate": 9.457909707452556e-07,
|
|
"loss": 0.30302858352661133,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 2.464035646085296,
|
|
"grad_norm": 12.533662515672638,
|
|
"learning_rate": 9.436243570200404e-07,
|
|
"loss": 0.3298501968383789,
|
|
"step": 3871
|
|
},
|
|
{
|
|
"epoch": 2.4646721833227243,
|
|
"grad_norm": 11.650253720807704,
|
|
"learning_rate": 9.414599691727728e-07,
|
|
"loss": 0.22967742383480072,
|
|
"step": 3872
|
|
},
|
|
{
|
|
"epoch": 2.4653087205601527,
|
|
"grad_norm": 8.163958695083027,
|
|
"learning_rate": 9.392978083911303e-07,
|
|
"loss": 0.14559996128082275,
|
|
"step": 3873
|
|
},
|
|
{
|
|
"epoch": 2.465945257797581,
|
|
"grad_norm": 10.821015630406567,
|
|
"learning_rate": 9.371378758615657e-07,
|
|
"loss": 0.2332065999507904,
|
|
"step": 3874
|
|
},
|
|
{
|
|
"epoch": 2.4665817950350095,
|
|
"grad_norm": 23.81465779995915,
|
|
"learning_rate": 9.349801727693137e-07,
|
|
"loss": 0.48943302035331726,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 2.467218332272438,
|
|
"grad_norm": 18.46479765305656,
|
|
"learning_rate": 9.328247002983815e-07,
|
|
"loss": 0.4668262004852295,
|
|
"step": 3876
|
|
},
|
|
{
|
|
"epoch": 2.4678548695098663,
|
|
"grad_norm": 14.125759122355031,
|
|
"learning_rate": 9.306714596315547e-07,
|
|
"loss": 0.5885692834854126,
|
|
"step": 3877
|
|
},
|
|
{
|
|
"epoch": 2.4684914067472947,
|
|
"grad_norm": 9.984601554196471,
|
|
"learning_rate": 9.285204519503904e-07,
|
|
"loss": 0.43786299228668213,
|
|
"step": 3878
|
|
},
|
|
{
|
|
"epoch": 2.469127943984723,
|
|
"grad_norm": 10.915351114585736,
|
|
"learning_rate": 9.263716784352284e-07,
|
|
"loss": 0.4853839874267578,
|
|
"step": 3879
|
|
},
|
|
{
|
|
"epoch": 2.4697644812221515,
|
|
"grad_norm": 14.445961340018746,
|
|
"learning_rate": 9.242251402651731e-07,
|
|
"loss": 0.7993488311767578,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 2.47040101845958,
|
|
"grad_norm": 18.093932768325843,
|
|
"learning_rate": 9.220808386181058e-07,
|
|
"loss": 0.5057962536811829,
|
|
"step": 3881
|
|
},
|
|
{
|
|
"epoch": 2.4710375556970083,
|
|
"grad_norm": 8.153569956664871,
|
|
"learning_rate": 9.19938774670685e-07,
|
|
"loss": 0.5910652279853821,
|
|
"step": 3882
|
|
},
|
|
{
|
|
"epoch": 2.4716740929344367,
|
|
"grad_norm": 8.917882167784342,
|
|
"learning_rate": 9.177989495983353e-07,
|
|
"loss": 0.5409796237945557,
|
|
"step": 3883
|
|
},
|
|
{
|
|
"epoch": 2.472310630171865,
|
|
"grad_norm": 11.998262245548794,
|
|
"learning_rate": 9.156613645752554e-07,
|
|
"loss": 0.3271867632865906,
|
|
"step": 3884
|
|
},
|
|
{
|
|
"epoch": 2.4729471674092935,
|
|
"grad_norm": 15.912088236186927,
|
|
"learning_rate": 9.135260207744134e-07,
|
|
"loss": 0.4743756055831909,
|
|
"step": 3885
|
|
},
|
|
{
|
|
"epoch": 2.473583704646722,
|
|
"grad_norm": 13.545048782327063,
|
|
"learning_rate": 9.11392919367552e-07,
|
|
"loss": 0.45293328166007996,
|
|
"step": 3886
|
|
},
|
|
{
|
|
"epoch": 2.4742202418841504,
|
|
"grad_norm": 17.78342517288051,
|
|
"learning_rate": 9.092620615251752e-07,
|
|
"loss": 0.5736050605773926,
|
|
"step": 3887
|
|
},
|
|
{
|
|
"epoch": 2.4748567791215788,
|
|
"grad_norm": 11.884262782314487,
|
|
"learning_rate": 9.071334484165639e-07,
|
|
"loss": 1.0076842308044434,
|
|
"step": 3888
|
|
},
|
|
{
|
|
"epoch": 2.475493316359007,
|
|
"grad_norm": 11.20110060220873,
|
|
"learning_rate": 9.050070812097639e-07,
|
|
"loss": 0.7217289805412292,
|
|
"step": 3889
|
|
},
|
|
{
|
|
"epoch": 2.4761298535964356,
|
|
"grad_norm": 12.016217792995912,
|
|
"learning_rate": 9.028829610715872e-07,
|
|
"loss": 0.7828927040100098,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 2.476766390833864,
|
|
"grad_norm": 12.341574031143532,
|
|
"learning_rate": 9.007610891676144e-07,
|
|
"loss": 0.6456081867218018,
|
|
"step": 3891
|
|
},
|
|
{
|
|
"epoch": 2.4774029280712924,
|
|
"grad_norm": 12.92072370891837,
|
|
"learning_rate": 8.986414666621951e-07,
|
|
"loss": 0.8124405145645142,
|
|
"step": 3892
|
|
},
|
|
{
|
|
"epoch": 2.4780394653087203,
|
|
"grad_norm": 12.145863675913166,
|
|
"learning_rate": 8.965240947184373e-07,
|
|
"loss": 0.8363019824028015,
|
|
"step": 3893
|
|
},
|
|
{
|
|
"epoch": 2.478676002546149,
|
|
"grad_norm": 7.295683922551025,
|
|
"learning_rate": 8.94408974498222e-07,
|
|
"loss": 0.35038474202156067,
|
|
"step": 3894
|
|
},
|
|
{
|
|
"epoch": 2.479312539783577,
|
|
"grad_norm": 9.352470171523942,
|
|
"learning_rate": 8.9229610716219e-07,
|
|
"loss": 0.3922230005264282,
|
|
"step": 3895
|
|
},
|
|
{
|
|
"epoch": 2.479949077021006,
|
|
"grad_norm": 10.36961540397416,
|
|
"learning_rate": 8.901854938697463e-07,
|
|
"loss": 0.2653026282787323,
|
|
"step": 3896
|
|
},
|
|
{
|
|
"epoch": 2.480585614258434,
|
|
"grad_norm": 10.90727526950787,
|
|
"learning_rate": 8.880771357790608e-07,
|
|
"loss": 0.4123893976211548,
|
|
"step": 3897
|
|
},
|
|
{
|
|
"epoch": 2.4812221514958623,
|
|
"grad_norm": 16.136954113514566,
|
|
"learning_rate": 8.859710340470623e-07,
|
|
"loss": 1.1528195142745972,
|
|
"step": 3898
|
|
},
|
|
{
|
|
"epoch": 2.4818586887332907,
|
|
"grad_norm": 24.286155538119257,
|
|
"learning_rate": 8.838671898294482e-07,
|
|
"loss": 0.6724939942359924,
|
|
"step": 3899
|
|
},
|
|
{
|
|
"epoch": 2.482495225970719,
|
|
"grad_norm": 8.850487740533744,
|
|
"learning_rate": 8.817656042806683e-07,
|
|
"loss": 0.44073688983917236,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 2.4831317632081475,
|
|
"grad_norm": 8.418386604147216,
|
|
"learning_rate": 8.7966627855394e-07,
|
|
"loss": 0.3672140836715698,
|
|
"step": 3901
|
|
},
|
|
{
|
|
"epoch": 2.483768300445576,
|
|
"grad_norm": 11.04770614810734,
|
|
"learning_rate": 8.775692138012387e-07,
|
|
"loss": 0.48692113161087036,
|
|
"step": 3902
|
|
},
|
|
{
|
|
"epoch": 2.4844048376830044,
|
|
"grad_norm": 10.974564403318755,
|
|
"learning_rate": 8.754744111732972e-07,
|
|
"loss": 0.9503442645072937,
|
|
"step": 3903
|
|
},
|
|
{
|
|
"epoch": 2.4850413749204328,
|
|
"grad_norm": 13.990625782968438,
|
|
"learning_rate": 8.733818718196075e-07,
|
|
"loss": 0.7163272500038147,
|
|
"step": 3904
|
|
},
|
|
{
|
|
"epoch": 2.485677912157861,
|
|
"grad_norm": 12.00443011795822,
|
|
"learning_rate": 8.712915968884234e-07,
|
|
"loss": 0.5029400587081909,
|
|
"step": 3905
|
|
},
|
|
{
|
|
"epoch": 2.4863144493952896,
|
|
"grad_norm": 17.965271833041577,
|
|
"learning_rate": 8.692035875267519e-07,
|
|
"loss": 0.41578686237335205,
|
|
"step": 3906
|
|
},
|
|
{
|
|
"epoch": 2.486950986632718,
|
|
"grad_norm": 12.673551002344931,
|
|
"learning_rate": 8.67117844880358e-07,
|
|
"loss": 0.7005525827407837,
|
|
"step": 3907
|
|
},
|
|
{
|
|
"epoch": 2.4875875238701464,
|
|
"grad_norm": 21.309574822893687,
|
|
"learning_rate": 8.650343700937614e-07,
|
|
"loss": 0.46643948554992676,
|
|
"step": 3908
|
|
},
|
|
{
|
|
"epoch": 2.4882240611075748,
|
|
"grad_norm": 9.894789904552644,
|
|
"learning_rate": 8.62953164310244e-07,
|
|
"loss": 0.5438557267189026,
|
|
"step": 3909
|
|
},
|
|
{
|
|
"epoch": 2.488860598345003,
|
|
"grad_norm": 8.866708788641482,
|
|
"learning_rate": 8.608742286718314e-07,
|
|
"loss": 0.5588964819908142,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 2.4894971355824316,
|
|
"grad_norm": 11.462387009170216,
|
|
"learning_rate": 8.587975643193136e-07,
|
|
"loss": 0.3707104027271271,
|
|
"step": 3911
|
|
},
|
|
{
|
|
"epoch": 2.49013367281986,
|
|
"grad_norm": 12.067292862429397,
|
|
"learning_rate": 8.567231723922298e-07,
|
|
"loss": 1.8560024499893188,
|
|
"step": 3912
|
|
},
|
|
{
|
|
"epoch": 2.4907702100572884,
|
|
"grad_norm": 8.383572870077932,
|
|
"learning_rate": 8.546510540288728e-07,
|
|
"loss": 0.2791942059993744,
|
|
"step": 3913
|
|
},
|
|
{
|
|
"epoch": 2.491406747294717,
|
|
"grad_norm": 12.672973099526894,
|
|
"learning_rate": 8.525812103662872e-07,
|
|
"loss": 0.7258465886116028,
|
|
"step": 3914
|
|
},
|
|
{
|
|
"epoch": 2.492043284532145,
|
|
"grad_norm": 10.568179289112296,
|
|
"learning_rate": 8.505136425402704e-07,
|
|
"loss": 0.45907819271087646,
|
|
"step": 3915
|
|
},
|
|
{
|
|
"epoch": 2.4926798217695736,
|
|
"grad_norm": 13.027772226849727,
|
|
"learning_rate": 8.484483516853703e-07,
|
|
"loss": 2.177560806274414,
|
|
"step": 3916
|
|
},
|
|
{
|
|
"epoch": 2.493316359007002,
|
|
"grad_norm": 8.846560603648301,
|
|
"learning_rate": 8.463853389348847e-07,
|
|
"loss": 0.21484342217445374,
|
|
"step": 3917
|
|
},
|
|
{
|
|
"epoch": 2.4939528962444304,
|
|
"grad_norm": 10.11725067405563,
|
|
"learning_rate": 8.443246054208642e-07,
|
|
"loss": 0.5265414714813232,
|
|
"step": 3918
|
|
},
|
|
{
|
|
"epoch": 2.494589433481859,
|
|
"grad_norm": 28.25200085103127,
|
|
"learning_rate": 8.422661522741065e-07,
|
|
"loss": 0.648182213306427,
|
|
"step": 3919
|
|
},
|
|
{
|
|
"epoch": 2.495225970719287,
|
|
"grad_norm": 11.453150987186422,
|
|
"learning_rate": 8.402099806241576e-07,
|
|
"loss": 0.6163341999053955,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 2.4958625079567156,
|
|
"grad_norm": 12.861579324596951,
|
|
"learning_rate": 8.381560915993109e-07,
|
|
"loss": 0.23936185240745544,
|
|
"step": 3921
|
|
},
|
|
{
|
|
"epoch": 2.496499045194144,
|
|
"grad_norm": 9.511261670278138,
|
|
"learning_rate": 8.361044863266127e-07,
|
|
"loss": 0.6329669952392578,
|
|
"step": 3922
|
|
},
|
|
{
|
|
"epoch": 2.4971355824315724,
|
|
"grad_norm": 11.293623223865943,
|
|
"learning_rate": 8.340551659318463e-07,
|
|
"loss": 0.5023452043533325,
|
|
"step": 3923
|
|
},
|
|
{
|
|
"epoch": 2.497772119669001,
|
|
"grad_norm": 20.634585799349573,
|
|
"learning_rate": 8.320081315395512e-07,
|
|
"loss": 0.6741424798965454,
|
|
"step": 3924
|
|
},
|
|
{
|
|
"epoch": 2.498408656906429,
|
|
"grad_norm": 25.022550487994334,
|
|
"learning_rate": 8.299633842730065e-07,
|
|
"loss": 0.32343339920043945,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 2.499045194143857,
|
|
"grad_norm": 11.903436268800743,
|
|
"learning_rate": 8.27920925254238e-07,
|
|
"loss": 0.32865163683891296,
|
|
"step": 3926
|
|
},
|
|
{
|
|
"epoch": 2.499681731381286,
|
|
"grad_norm": 14.19780664980985,
|
|
"learning_rate": 8.258807556040149e-07,
|
|
"loss": 0.8066063523292542,
|
|
"step": 3927
|
|
},
|
|
{
|
|
"epoch": 2.500318268618714,
|
|
"grad_norm": 12.088809442154751,
|
|
"learning_rate": 8.238428764418532e-07,
|
|
"loss": 0.9081128835678101,
|
|
"step": 3928
|
|
},
|
|
{
|
|
"epoch": 2.500954805856143,
|
|
"grad_norm": 15.293012723065615,
|
|
"learning_rate": 8.21807288886009e-07,
|
|
"loss": 0.8295964598655701,
|
|
"step": 3929
|
|
},
|
|
{
|
|
"epoch": 2.501591343093571,
|
|
"grad_norm": 15.73977461196448,
|
|
"learning_rate": 8.197739940534815e-07,
|
|
"loss": 0.43273162841796875,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 2.5022278803309996,
|
|
"grad_norm": 13.338943731268314,
|
|
"learning_rate": 8.177429930600117e-07,
|
|
"loss": 0.27769073843955994,
|
|
"step": 3931
|
|
},
|
|
{
|
|
"epoch": 2.5028644175684276,
|
|
"grad_norm": 14.220792861448592,
|
|
"learning_rate": 8.15714287020083e-07,
|
|
"loss": 0.49164503812789917,
|
|
"step": 3932
|
|
},
|
|
{
|
|
"epoch": 2.503500954805856,
|
|
"grad_norm": 12.05284287892093,
|
|
"learning_rate": 8.136878770469181e-07,
|
|
"loss": 0.48813530802726746,
|
|
"step": 3933
|
|
},
|
|
{
|
|
"epoch": 2.5041374920432844,
|
|
"grad_norm": 8.556958885253536,
|
|
"learning_rate": 8.116637642524794e-07,
|
|
"loss": 0.26071983575820923,
|
|
"step": 3934
|
|
},
|
|
{
|
|
"epoch": 2.504774029280713,
|
|
"grad_norm": 8.225598932794536,
|
|
"learning_rate": 8.096419497474739e-07,
|
|
"loss": 0.2683258652687073,
|
|
"step": 3935
|
|
},
|
|
{
|
|
"epoch": 2.505410566518141,
|
|
"grad_norm": 19.262279699824376,
|
|
"learning_rate": 8.076224346413375e-07,
|
|
"loss": 0.4536847770214081,
|
|
"step": 3936
|
|
},
|
|
{
|
|
"epoch": 2.5060471037555696,
|
|
"grad_norm": 17.66581177773771,
|
|
"learning_rate": 8.056052200422543e-07,
|
|
"loss": 0.41516709327697754,
|
|
"step": 3937
|
|
},
|
|
{
|
|
"epoch": 2.506683640992998,
|
|
"grad_norm": 11.601108596142094,
|
|
"learning_rate": 8.035903070571411e-07,
|
|
"loss": 0.27830269932746887,
|
|
"step": 3938
|
|
},
|
|
{
|
|
"epoch": 2.5073201782304264,
|
|
"grad_norm": 7.974001864271789,
|
|
"learning_rate": 8.015776967916517e-07,
|
|
"loss": 0.3432263135910034,
|
|
"step": 3939
|
|
},
|
|
{
|
|
"epoch": 2.507956715467855,
|
|
"grad_norm": 10.27169560967598,
|
|
"learning_rate": 7.995673903501766e-07,
|
|
"loss": 0.25255173444747925,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 2.508593252705283,
|
|
"grad_norm": 9.636318330866025,
|
|
"learning_rate": 7.97559388835844e-07,
|
|
"loss": 0.32532811164855957,
|
|
"step": 3941
|
|
},
|
|
{
|
|
"epoch": 2.5092297899427116,
|
|
"grad_norm": 11.27672640424502,
|
|
"learning_rate": 7.955536933505159e-07,
|
|
"loss": 0.39307156205177307,
|
|
"step": 3942
|
|
},
|
|
{
|
|
"epoch": 2.50986632718014,
|
|
"grad_norm": 11.312664236327247,
|
|
"learning_rate": 7.935503049947885e-07,
|
|
"loss": 0.606777548789978,
|
|
"step": 3943
|
|
},
|
|
{
|
|
"epoch": 2.5105028644175684,
|
|
"grad_norm": 10.63463186456689,
|
|
"learning_rate": 7.915492248679912e-07,
|
|
"loss": 0.3355543613433838,
|
|
"step": 3944
|
|
},
|
|
{
|
|
"epoch": 2.511139401654997,
|
|
"grad_norm": 10.974903445083752,
|
|
"learning_rate": 7.895504540681914e-07,
|
|
"loss": 0.32796362042427063,
|
|
"step": 3945
|
|
},
|
|
{
|
|
"epoch": 2.5117759388924252,
|
|
"grad_norm": 12.012493693189512,
|
|
"learning_rate": 7.875539936921811e-07,
|
|
"loss": 0.33286190032958984,
|
|
"step": 3946
|
|
},
|
|
{
|
|
"epoch": 2.5124124761298536,
|
|
"grad_norm": 7.76592086315613,
|
|
"learning_rate": 7.855598448354935e-07,
|
|
"loss": 0.4606126844882965,
|
|
"step": 3947
|
|
},
|
|
{
|
|
"epoch": 2.513049013367282,
|
|
"grad_norm": 13.569980323182346,
|
|
"learning_rate": 7.835680085923874e-07,
|
|
"loss": 0.4506418704986572,
|
|
"step": 3948
|
|
},
|
|
{
|
|
"epoch": 2.5136855506047104,
|
|
"grad_norm": 7.968347232818058,
|
|
"learning_rate": 7.815784860558545e-07,
|
|
"loss": 0.40884724259376526,
|
|
"step": 3949
|
|
},
|
|
{
|
|
"epoch": 2.514322087842139,
|
|
"grad_norm": 8.849704702994723,
|
|
"learning_rate": 7.795912783176169e-07,
|
|
"loss": 0.2936815917491913,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 2.5149586250795672,
|
|
"grad_norm": 8.899514894978735,
|
|
"learning_rate": 7.776063864681249e-07,
|
|
"loss": 0.44772791862487793,
|
|
"step": 3951
|
|
},
|
|
{
|
|
"epoch": 2.5155951623169956,
|
|
"grad_norm": 7.9871753424606675,
|
|
"learning_rate": 7.756238115965631e-07,
|
|
"loss": 0.5370556116104126,
|
|
"step": 3952
|
|
},
|
|
{
|
|
"epoch": 2.516231699554424,
|
|
"grad_norm": 10.282090886763076,
|
|
"learning_rate": 7.736435547908367e-07,
|
|
"loss": 0.7182104587554932,
|
|
"step": 3953
|
|
},
|
|
{
|
|
"epoch": 2.5168682367918525,
|
|
"grad_norm": 11.921670321742583,
|
|
"learning_rate": 7.716656171375858e-07,
|
|
"loss": 0.2773297131061554,
|
|
"step": 3954
|
|
},
|
|
{
|
|
"epoch": 2.517504774029281,
|
|
"grad_norm": 10.810132079204251,
|
|
"learning_rate": 7.696899997221752e-07,
|
|
"loss": 0.9390149712562561,
|
|
"step": 3955
|
|
},
|
|
{
|
|
"epoch": 2.5181413112667093,
|
|
"grad_norm": 14.82963570830732,
|
|
"learning_rate": 7.677167036286959e-07,
|
|
"loss": 0.36737754940986633,
|
|
"step": 3956
|
|
},
|
|
{
|
|
"epoch": 2.518777848504137,
|
|
"grad_norm": 9.67913638860974,
|
|
"learning_rate": 7.657457299399645e-07,
|
|
"loss": 0.6483880281448364,
|
|
"step": 3957
|
|
},
|
|
{
|
|
"epoch": 2.519414385741566,
|
|
"grad_norm": 17.581677424639686,
|
|
"learning_rate": 7.637770797375288e-07,
|
|
"loss": 0.6671686172485352,
|
|
"step": 3958
|
|
},
|
|
{
|
|
"epoch": 2.520050922978994,
|
|
"grad_norm": 8.585393533998928,
|
|
"learning_rate": 7.618107541016523e-07,
|
|
"loss": 0.22666820883750916,
|
|
"step": 3959
|
|
},
|
|
{
|
|
"epoch": 2.520687460216423,
|
|
"grad_norm": 9.979454509852893,
|
|
"learning_rate": 7.598467541113319e-07,
|
|
"loss": 0.1724715530872345,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 2.521323997453851,
|
|
"grad_norm": 12.356985596453415,
|
|
"learning_rate": 7.578850808442823e-07,
|
|
"loss": 0.3336951732635498,
|
|
"step": 3961
|
|
},
|
|
{
|
|
"epoch": 2.5219605346912797,
|
|
"grad_norm": 11.86635527683857,
|
|
"learning_rate": 7.559257353769445e-07,
|
|
"loss": 0.7841365933418274,
|
|
"step": 3962
|
|
},
|
|
{
|
|
"epoch": 2.5225970719287076,
|
|
"grad_norm": 11.404116510648365,
|
|
"learning_rate": 7.539687187844791e-07,
|
|
"loss": 0.3960840404033661,
|
|
"step": 3963
|
|
},
|
|
{
|
|
"epoch": 2.5232336091661365,
|
|
"grad_norm": 13.485822498869954,
|
|
"learning_rate": 7.520140321407743e-07,
|
|
"loss": 0.6474733352661133,
|
|
"step": 3964
|
|
},
|
|
{
|
|
"epoch": 2.5238701464035644,
|
|
"grad_norm": 10.513039577614204,
|
|
"learning_rate": 7.50061676518436e-07,
|
|
"loss": 0.44929003715515137,
|
|
"step": 3965
|
|
},
|
|
{
|
|
"epoch": 2.524506683640993,
|
|
"grad_norm": 9.096772031182493,
|
|
"learning_rate": 7.481116529887872e-07,
|
|
"loss": 0.1913546770811081,
|
|
"step": 3966
|
|
},
|
|
{
|
|
"epoch": 2.5251432208784212,
|
|
"grad_norm": 10.32703529747825,
|
|
"learning_rate": 7.461639626218797e-07,
|
|
"loss": 0.6519386768341064,
|
|
"step": 3967
|
|
},
|
|
{
|
|
"epoch": 2.5257797581158496,
|
|
"grad_norm": 10.775277765817435,
|
|
"learning_rate": 7.442186064864787e-07,
|
|
"loss": 0.49530237913131714,
|
|
"step": 3968
|
|
},
|
|
{
|
|
"epoch": 2.526416295353278,
|
|
"grad_norm": 12.445278410113824,
|
|
"learning_rate": 7.422755856500713e-07,
|
|
"loss": 0.3465782105922699,
|
|
"step": 3969
|
|
},
|
|
{
|
|
"epoch": 2.5270528325907065,
|
|
"grad_norm": 7.851044789090474,
|
|
"learning_rate": 7.403349011788608e-07,
|
|
"loss": 0.6121078729629517,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 2.527689369828135,
|
|
"grad_norm": 10.571941989295352,
|
|
"learning_rate": 7.383965541377719e-07,
|
|
"loss": 0.30977076292037964,
|
|
"step": 3971
|
|
},
|
|
{
|
|
"epoch": 2.5283259070655633,
|
|
"grad_norm": 14.761430912459188,
|
|
"learning_rate": 7.364605455904439e-07,
|
|
"loss": 0.4765622615814209,
|
|
"step": 3972
|
|
},
|
|
{
|
|
"epoch": 2.5289624443029917,
|
|
"grad_norm": 13.96702196193006,
|
|
"learning_rate": 7.345268765992342e-07,
|
|
"loss": 0.4707704782485962,
|
|
"step": 3973
|
|
},
|
|
{
|
|
"epoch": 2.52959898154042,
|
|
"grad_norm": 7.610110479363033,
|
|
"learning_rate": 7.325955482252139e-07,
|
|
"loss": 0.15941064059734344,
|
|
"step": 3974
|
|
},
|
|
{
|
|
"epoch": 2.5302355187778485,
|
|
"grad_norm": 14.639664463905218,
|
|
"learning_rate": 7.306665615281755e-07,
|
|
"loss": 0.37662458419799805,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 2.530872056015277,
|
|
"grad_norm": 5.972874401662598,
|
|
"learning_rate": 7.287399175666187e-07,
|
|
"loss": 0.08262288570404053,
|
|
"step": 3976
|
|
},
|
|
{
|
|
"epoch": 2.5315085932527053,
|
|
"grad_norm": 8.333552733177665,
|
|
"learning_rate": 7.268156173977642e-07,
|
|
"loss": 0.4235314130783081,
|
|
"step": 3977
|
|
},
|
|
{
|
|
"epoch": 2.5321451304901337,
|
|
"grad_norm": 34.24994602674094,
|
|
"learning_rate": 7.248936620775438e-07,
|
|
"loss": 1.9823403358459473,
|
|
"step": 3978
|
|
},
|
|
{
|
|
"epoch": 2.532781667727562,
|
|
"grad_norm": 7.633287923802357,
|
|
"learning_rate": 7.229740526606027e-07,
|
|
"loss": 0.3385160565376282,
|
|
"step": 3979
|
|
},
|
|
{
|
|
"epoch": 2.5334182049649905,
|
|
"grad_norm": 11.401673992594684,
|
|
"learning_rate": 7.210567902002979e-07,
|
|
"loss": 0.36040711402893066,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 2.534054742202419,
|
|
"grad_norm": 10.234848659750531,
|
|
"learning_rate": 7.191418757487029e-07,
|
|
"loss": 0.7135156393051147,
|
|
"step": 3981
|
|
},
|
|
{
|
|
"epoch": 2.5346912794398473,
|
|
"grad_norm": 18.309217691170605,
|
|
"learning_rate": 7.172293103565963e-07,
|
|
"loss": 0.623892068862915,
|
|
"step": 3982
|
|
},
|
|
{
|
|
"epoch": 2.5353278166772757,
|
|
"grad_norm": 14.302095093065146,
|
|
"learning_rate": 7.153190950734707e-07,
|
|
"loss": 0.8380212187767029,
|
|
"step": 3983
|
|
},
|
|
{
|
|
"epoch": 2.535964353914704,
|
|
"grad_norm": 11.761961408069393,
|
|
"learning_rate": 7.134112309475316e-07,
|
|
"loss": 0.48324182629585266,
|
|
"step": 3984
|
|
},
|
|
{
|
|
"epoch": 2.5366008911521325,
|
|
"grad_norm": 14.556210987287836,
|
|
"learning_rate": 7.115057190256913e-07,
|
|
"loss": 0.606602668762207,
|
|
"step": 3985
|
|
},
|
|
{
|
|
"epoch": 2.537237428389561,
|
|
"grad_norm": 6.9981359977166715,
|
|
"learning_rate": 7.096025603535722e-07,
|
|
"loss": 0.3626210689544678,
|
|
"step": 3986
|
|
},
|
|
{
|
|
"epoch": 2.5378739656269893,
|
|
"grad_norm": 13.52001722625275,
|
|
"learning_rate": 7.07701755975504e-07,
|
|
"loss": 0.6667066216468811,
|
|
"step": 3987
|
|
},
|
|
{
|
|
"epoch": 2.5385105028644177,
|
|
"grad_norm": 8.004767110842625,
|
|
"learning_rate": 7.058033069345288e-07,
|
|
"loss": 0.723233163356781,
|
|
"step": 3988
|
|
},
|
|
{
|
|
"epoch": 2.539147040101846,
|
|
"grad_norm": 15.23538216180776,
|
|
"learning_rate": 7.03907214272389e-07,
|
|
"loss": 0.4600798487663269,
|
|
"step": 3989
|
|
},
|
|
{
|
|
"epoch": 2.5397835773392745,
|
|
"grad_norm": 7.956737760053174,
|
|
"learning_rate": 7.020134790295419e-07,
|
|
"loss": 0.3741765320301056,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 2.540420114576703,
|
|
"grad_norm": 11.974922232816839,
|
|
"learning_rate": 7.001221022451466e-07,
|
|
"loss": 0.39778295159339905,
|
|
"step": 3991
|
|
},
|
|
{
|
|
"epoch": 2.541056651814131,
|
|
"grad_norm": 7.161783117632343,
|
|
"learning_rate": 6.982330849570684e-07,
|
|
"loss": 0.666446328163147,
|
|
"step": 3992
|
|
},
|
|
{
|
|
"epoch": 2.5416931890515597,
|
|
"grad_norm": 9.27701058935869,
|
|
"learning_rate": 6.963464282018773e-07,
|
|
"loss": 0.2722229063510895,
|
|
"step": 3993
|
|
},
|
|
{
|
|
"epoch": 2.5423297262889877,
|
|
"grad_norm": 13.649664330044764,
|
|
"learning_rate": 6.944621330148521e-07,
|
|
"loss": 0.35371774435043335,
|
|
"step": 3994
|
|
},
|
|
{
|
|
"epoch": 2.5429662635264165,
|
|
"grad_norm": 12.557999458691365,
|
|
"learning_rate": 6.925802004299709e-07,
|
|
"loss": 0.5170055627822876,
|
|
"step": 3995
|
|
},
|
|
{
|
|
"epoch": 2.5436028007638445,
|
|
"grad_norm": 12.669537478796821,
|
|
"learning_rate": 6.90700631479918e-07,
|
|
"loss": 0.7935366630554199,
|
|
"step": 3996
|
|
},
|
|
{
|
|
"epoch": 2.5442393380012733,
|
|
"grad_norm": 15.257197141295174,
|
|
"learning_rate": 6.888234271960786e-07,
|
|
"loss": 0.35724225640296936,
|
|
"step": 3997
|
|
},
|
|
{
|
|
"epoch": 2.5448758752387013,
|
|
"grad_norm": 10.843805919979113,
|
|
"learning_rate": 6.869485886085431e-07,
|
|
"loss": 0.18914970755577087,
|
|
"step": 3998
|
|
},
|
|
{
|
|
"epoch": 2.5455124124761297,
|
|
"grad_norm": 8.336491374798458,
|
|
"learning_rate": 6.850761167461012e-07,
|
|
"loss": 0.24970906972885132,
|
|
"step": 3999
|
|
},
|
|
{
|
|
"epoch": 2.546148949713558,
|
|
"grad_norm": 8.864669665117166,
|
|
"learning_rate": 6.832060126362444e-07,
|
|
"loss": 0.4097861647605896,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 2.5467854869509865,
|
|
"grad_norm": 12.519767174722935,
|
|
"learning_rate": 6.813382773051686e-07,
|
|
"loss": 0.30255192518234253,
|
|
"step": 4001
|
|
},
|
|
{
|
|
"epoch": 2.547422024188415,
|
|
"grad_norm": 10.04382326079557,
|
|
"learning_rate": 6.794729117777626e-07,
|
|
"loss": 0.35676640272140503,
|
|
"step": 4002
|
|
},
|
|
{
|
|
"epoch": 2.5480585614258433,
|
|
"grad_norm": 26.03594131936014,
|
|
"learning_rate": 6.776099170776224e-07,
|
|
"loss": 1.2304989099502563,
|
|
"step": 4003
|
|
},
|
|
{
|
|
"epoch": 2.5486950986632717,
|
|
"grad_norm": 18.41409836626958,
|
|
"learning_rate": 6.757492942270377e-07,
|
|
"loss": 0.7692633867263794,
|
|
"step": 4004
|
|
},
|
|
{
|
|
"epoch": 2.5493316359007,
|
|
"grad_norm": 11.782444142211297,
|
|
"learning_rate": 6.738910442469993e-07,
|
|
"loss": 0.6399699449539185,
|
|
"step": 4005
|
|
},
|
|
{
|
|
"epoch": 2.5499681731381285,
|
|
"grad_norm": 10.106840689907994,
|
|
"learning_rate": 6.72035168157194e-07,
|
|
"loss": 0.5657060146331787,
|
|
"step": 4006
|
|
},
|
|
{
|
|
"epoch": 2.550604710375557,
|
|
"grad_norm": 7.183838333427905,
|
|
"learning_rate": 6.701816669760097e-07,
|
|
"loss": 0.23255078494548798,
|
|
"step": 4007
|
|
},
|
|
{
|
|
"epoch": 2.5512412476129853,
|
|
"grad_norm": 8.659732129996089,
|
|
"learning_rate": 6.683305417205266e-07,
|
|
"loss": 0.22366073727607727,
|
|
"step": 4008
|
|
},
|
|
{
|
|
"epoch": 2.5518777848504137,
|
|
"grad_norm": 12.520710299607957,
|
|
"learning_rate": 6.664817934065237e-07,
|
|
"loss": 0.3836514949798584,
|
|
"step": 4009
|
|
},
|
|
{
|
|
"epoch": 2.552514322087842,
|
|
"grad_norm": 12.39630940270907,
|
|
"learning_rate": 6.646354230484741e-07,
|
|
"loss": 0.2722182273864746,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 2.5531508593252705,
|
|
"grad_norm": 12.83135676896471,
|
|
"learning_rate": 6.627914316595508e-07,
|
|
"loss": 0.32897594571113586,
|
|
"step": 4011
|
|
},
|
|
{
|
|
"epoch": 2.553787396562699,
|
|
"grad_norm": 10.065896376482925,
|
|
"learning_rate": 6.609498202516119e-07,
|
|
"loss": 0.7564833164215088,
|
|
"step": 4012
|
|
},
|
|
{
|
|
"epoch": 2.5544239338001273,
|
|
"grad_norm": 9.221994387866406,
|
|
"learning_rate": 6.591105898352195e-07,
|
|
"loss": 0.3179935812950134,
|
|
"step": 4013
|
|
},
|
|
{
|
|
"epoch": 2.5550604710375557,
|
|
"grad_norm": 15.666651912373274,
|
|
"learning_rate": 6.57273741419624e-07,
|
|
"loss": 0.42274990677833557,
|
|
"step": 4014
|
|
},
|
|
{
|
|
"epoch": 2.555697008274984,
|
|
"grad_norm": 15.109293361481123,
|
|
"learning_rate": 6.554392760127687e-07,
|
|
"loss": 0.4832228422164917,
|
|
"step": 4015
|
|
},
|
|
{
|
|
"epoch": 2.5563335455124125,
|
|
"grad_norm": 10.385417905551119,
|
|
"learning_rate": 6.536071946212908e-07,
|
|
"loss": 0.8315209746360779,
|
|
"step": 4016
|
|
},
|
|
{
|
|
"epoch": 2.556970082749841,
|
|
"grad_norm": 10.986378584095577,
|
|
"learning_rate": 6.517774982505176e-07,
|
|
"loss": 0.43238815665245056,
|
|
"step": 4017
|
|
},
|
|
{
|
|
"epoch": 2.5576066199872693,
|
|
"grad_norm": 14.03450280081931,
|
|
"learning_rate": 6.499501879044723e-07,
|
|
"loss": 0.6071986556053162,
|
|
"step": 4018
|
|
},
|
|
{
|
|
"epoch": 2.5582431572246978,
|
|
"grad_norm": 10.179249658471798,
|
|
"learning_rate": 6.481252645858599e-07,
|
|
"loss": 0.4217533767223358,
|
|
"step": 4019
|
|
},
|
|
{
|
|
"epoch": 2.558879694462126,
|
|
"grad_norm": 25.390846667098813,
|
|
"learning_rate": 6.463027292960849e-07,
|
|
"loss": 0.6482077836990356,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 2.5595162316995546,
|
|
"grad_norm": 15.746180799660124,
|
|
"learning_rate": 6.444825830352363e-07,
|
|
"loss": 0.27979302406311035,
|
|
"step": 4021
|
|
},
|
|
{
|
|
"epoch": 2.560152768936983,
|
|
"grad_norm": 10.620053353234267,
|
|
"learning_rate": 6.42664826802093e-07,
|
|
"loss": 0.3468106687068939,
|
|
"step": 4022
|
|
},
|
|
{
|
|
"epoch": 2.5607893061744114,
|
|
"grad_norm": 11.187646474061209,
|
|
"learning_rate": 6.408494615941219e-07,
|
|
"loss": 0.30796951055526733,
|
|
"step": 4023
|
|
},
|
|
{
|
|
"epoch": 2.5614258434118398,
|
|
"grad_norm": 11.421373336232325,
|
|
"learning_rate": 6.390364884074812e-07,
|
|
"loss": 0.4836077094078064,
|
|
"step": 4024
|
|
},
|
|
{
|
|
"epoch": 2.5620623806492677,
|
|
"grad_norm": 12.070982990539402,
|
|
"learning_rate": 6.372259082370103e-07,
|
|
"loss": 0.649453341960907,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 2.5626989178866966,
|
|
"grad_norm": 16.80272281678233,
|
|
"learning_rate": 6.35417722076242e-07,
|
|
"loss": 0.45360037684440613,
|
|
"step": 4026
|
|
},
|
|
{
|
|
"epoch": 2.5633354551241245,
|
|
"grad_norm": 15.968912437067937,
|
|
"learning_rate": 6.336119309173922e-07,
|
|
"loss": 0.43244290351867676,
|
|
"step": 4027
|
|
},
|
|
{
|
|
"epoch": 2.5639719923615534,
|
|
"grad_norm": 9.986780279856935,
|
|
"learning_rate": 6.318085357513615e-07,
|
|
"loss": 0.5531265139579773,
|
|
"step": 4028
|
|
},
|
|
{
|
|
"epoch": 2.5646085295989813,
|
|
"grad_norm": 9.755041940144544,
|
|
"learning_rate": 6.300075375677367e-07,
|
|
"loss": 0.5455867052078247,
|
|
"step": 4029
|
|
},
|
|
{
|
|
"epoch": 2.56524506683641,
|
|
"grad_norm": 15.749565956253951,
|
|
"learning_rate": 6.282089373547922e-07,
|
|
"loss": 0.7256792783737183,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 2.565881604073838,
|
|
"grad_norm": 10.14669687129771,
|
|
"learning_rate": 6.264127360994832e-07,
|
|
"loss": 0.3116908073425293,
|
|
"step": 4031
|
|
},
|
|
{
|
|
"epoch": 2.5665181413112665,
|
|
"grad_norm": 23.012494647304095,
|
|
"learning_rate": 6.246189347874482e-07,
|
|
"loss": 0.5365034341812134,
|
|
"step": 4032
|
|
},
|
|
{
|
|
"epoch": 2.567154678548695,
|
|
"grad_norm": 10.798557794766793,
|
|
"learning_rate": 6.22827534403011e-07,
|
|
"loss": 0.5954269170761108,
|
|
"step": 4033
|
|
},
|
|
{
|
|
"epoch": 2.5677912157861233,
|
|
"grad_norm": 12.057567307557491,
|
|
"learning_rate": 6.210385359291765e-07,
|
|
"loss": 0.3405521810054779,
|
|
"step": 4034
|
|
},
|
|
{
|
|
"epoch": 2.5684277530235518,
|
|
"grad_norm": 8.084289365850049,
|
|
"learning_rate": 6.192519403476316e-07,
|
|
"loss": 0.5951095223426819,
|
|
"step": 4035
|
|
},
|
|
{
|
|
"epoch": 2.56906429026098,
|
|
"grad_norm": 10.125116903279523,
|
|
"learning_rate": 6.174677486387448e-07,
|
|
"loss": 0.6498456001281738,
|
|
"step": 4036
|
|
},
|
|
{
|
|
"epoch": 2.5697008274984086,
|
|
"grad_norm": 11.203961418863967,
|
|
"learning_rate": 6.156859617815675e-07,
|
|
"loss": 0.250361829996109,
|
|
"step": 4037
|
|
},
|
|
{
|
|
"epoch": 2.570337364735837,
|
|
"grad_norm": 11.947080547454908,
|
|
"learning_rate": 6.13906580753828e-07,
|
|
"loss": 0.4113607406616211,
|
|
"step": 4038
|
|
},
|
|
{
|
|
"epoch": 2.5709739019732654,
|
|
"grad_norm": 7.409866854811151,
|
|
"learning_rate": 6.121296065319365e-07,
|
|
"loss": 0.31993091106414795,
|
|
"step": 4039
|
|
},
|
|
{
|
|
"epoch": 2.5716104392106938,
|
|
"grad_norm": 9.538678239747178,
|
|
"learning_rate": 6.103550400909824e-07,
|
|
"loss": 0.2650909721851349,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 2.572246976448122,
|
|
"grad_norm": 11.683157176185997,
|
|
"learning_rate": 6.085828824047336e-07,
|
|
"loss": 0.26428112387657166,
|
|
"step": 4041
|
|
},
|
|
{
|
|
"epoch": 2.5728835136855506,
|
|
"grad_norm": 17.138762476077595,
|
|
"learning_rate": 6.068131344456346e-07,
|
|
"loss": 0.5150033235549927,
|
|
"step": 4042
|
|
},
|
|
{
|
|
"epoch": 2.573520050922979,
|
|
"grad_norm": 8.654326928252814,
|
|
"learning_rate": 6.050457971848117e-07,
|
|
"loss": 0.5108681321144104,
|
|
"step": 4043
|
|
},
|
|
{
|
|
"epoch": 2.5741565881604074,
|
|
"grad_norm": 7.773533322785913,
|
|
"learning_rate": 6.032808715920646e-07,
|
|
"loss": 0.17469176650047302,
|
|
"step": 4044
|
|
},
|
|
{
|
|
"epoch": 2.574793125397836,
|
|
"grad_norm": 10.35576578065918,
|
|
"learning_rate": 6.015183586358714e-07,
|
|
"loss": 0.21947219967842102,
|
|
"step": 4045
|
|
},
|
|
{
|
|
"epoch": 2.575429662635264,
|
|
"grad_norm": 7.9682456882626775,
|
|
"learning_rate": 5.997582592833839e-07,
|
|
"loss": 0.4151739776134491,
|
|
"step": 4046
|
|
},
|
|
{
|
|
"epoch": 2.5760661998726926,
|
|
"grad_norm": 9.959392351046564,
|
|
"learning_rate": 5.980005745004352e-07,
|
|
"loss": 0.3670939803123474,
|
|
"step": 4047
|
|
},
|
|
{
|
|
"epoch": 2.576702737110121,
|
|
"grad_norm": 9.984677466828005,
|
|
"learning_rate": 5.962453052515255e-07,
|
|
"loss": 0.5253440737724304,
|
|
"step": 4048
|
|
},
|
|
{
|
|
"epoch": 2.5773392743475494,
|
|
"grad_norm": 11.23299850265963,
|
|
"learning_rate": 5.944924524998363e-07,
|
|
"loss": 0.7628803253173828,
|
|
"step": 4049
|
|
},
|
|
{
|
|
"epoch": 2.577975811584978,
|
|
"grad_norm": 15.965633055637232,
|
|
"learning_rate": 5.927420172072195e-07,
|
|
"loss": 0.7902500629425049,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 2.578612348822406,
|
|
"grad_norm": 10.850945173909906,
|
|
"learning_rate": 5.909940003342018e-07,
|
|
"loss": 0.39650219678878784,
|
|
"step": 4051
|
|
},
|
|
{
|
|
"epoch": 2.5792488860598346,
|
|
"grad_norm": 7.319122127601429,
|
|
"learning_rate": 5.89248402839982e-07,
|
|
"loss": 0.1823950707912445,
|
|
"step": 4052
|
|
},
|
|
{
|
|
"epoch": 2.579885423297263,
|
|
"grad_norm": 17.984587464302077,
|
|
"learning_rate": 5.875052256824304e-07,
|
|
"loss": 0.4608056843280792,
|
|
"step": 4053
|
|
},
|
|
{
|
|
"epoch": 2.5805219605346914,
|
|
"grad_norm": 9.775152121470637,
|
|
"learning_rate": 5.857644698180937e-07,
|
|
"loss": 0.24857516586780548,
|
|
"step": 4054
|
|
},
|
|
{
|
|
"epoch": 2.58115849777212,
|
|
"grad_norm": 7.72330608213562,
|
|
"learning_rate": 5.840261362021831e-07,
|
|
"loss": 0.4499109089374542,
|
|
"step": 4055
|
|
},
|
|
{
|
|
"epoch": 2.581795035009548,
|
|
"grad_norm": 8.649994562620037,
|
|
"learning_rate": 5.822902257885865e-07,
|
|
"loss": 0.18724137544631958,
|
|
"step": 4056
|
|
},
|
|
{
|
|
"epoch": 2.5824315722469766,
|
|
"grad_norm": 10.508979666019846,
|
|
"learning_rate": 5.805567395298594e-07,
|
|
"loss": 0.44745534658432007,
|
|
"step": 4057
|
|
},
|
|
{
|
|
"epoch": 2.5830681094844046,
|
|
"grad_norm": 9.752479468668094,
|
|
"learning_rate": 5.788256783772272e-07,
|
|
"loss": 0.5347998142242432,
|
|
"step": 4058
|
|
},
|
|
{
|
|
"epoch": 2.5837046467218334,
|
|
"grad_norm": 13.769343454022025,
|
|
"learning_rate": 5.770970432805844e-07,
|
|
"loss": 0.38834691047668457,
|
|
"step": 4059
|
|
},
|
|
{
|
|
"epoch": 2.5843411839592614,
|
|
"grad_norm": 12.757115175870183,
|
|
"learning_rate": 5.753708351884963e-07,
|
|
"loss": 0.6713066101074219,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 2.5849777211966902,
|
|
"grad_norm": 18.32754619739884,
|
|
"learning_rate": 5.736470550481938e-07,
|
|
"loss": 0.7586411237716675,
|
|
"step": 4061
|
|
},
|
|
{
|
|
"epoch": 2.585614258434118,
|
|
"grad_norm": 10.271474583245858,
|
|
"learning_rate": 5.719257038055764e-07,
|
|
"loss": 0.30766427516937256,
|
|
"step": 4062
|
|
},
|
|
{
|
|
"epoch": 2.586250795671547,
|
|
"grad_norm": 15.201939737404972,
|
|
"learning_rate": 5.702067824052116e-07,
|
|
"loss": 1.1935421228408813,
|
|
"step": 4063
|
|
},
|
|
{
|
|
"epoch": 2.586887332908975,
|
|
"grad_norm": 24.59159721596722,
|
|
"learning_rate": 5.684902917903318e-07,
|
|
"loss": 0.9108860492706299,
|
|
"step": 4064
|
|
},
|
|
{
|
|
"epoch": 2.587523870146404,
|
|
"grad_norm": 9.04995185422225,
|
|
"learning_rate": 5.667762329028364e-07,
|
|
"loss": 0.2907896637916565,
|
|
"step": 4065
|
|
},
|
|
{
|
|
"epoch": 2.588160407383832,
|
|
"grad_norm": 10.678201116420878,
|
|
"learning_rate": 5.650646066832921e-07,
|
|
"loss": 0.3620736598968506,
|
|
"step": 4066
|
|
},
|
|
{
|
|
"epoch": 2.58879694462126,
|
|
"grad_norm": 7.9690751317708,
|
|
"learning_rate": 5.633554140709302e-07,
|
|
"loss": 0.7071490287780762,
|
|
"step": 4067
|
|
},
|
|
{
|
|
"epoch": 2.5894334818586886,
|
|
"grad_norm": 12.722277605000698,
|
|
"learning_rate": 5.616486560036416e-07,
|
|
"loss": 0.835293710231781,
|
|
"step": 4068
|
|
},
|
|
{
|
|
"epoch": 2.590070019096117,
|
|
"grad_norm": 7.3315614431992415,
|
|
"learning_rate": 5.599443334179889e-07,
|
|
"loss": 0.6301496028900146,
|
|
"step": 4069
|
|
},
|
|
{
|
|
"epoch": 2.5907065563335454,
|
|
"grad_norm": 15.104010990271298,
|
|
"learning_rate": 5.582424472491927e-07,
|
|
"loss": 0.6260243654251099,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 2.591343093570974,
|
|
"grad_norm": 10.351975406104097,
|
|
"learning_rate": 5.565429984311399e-07,
|
|
"loss": 0.8103821277618408,
|
|
"step": 4071
|
|
},
|
|
{
|
|
"epoch": 2.591979630808402,
|
|
"grad_norm": 14.33078262299076,
|
|
"learning_rate": 5.548459878963774e-07,
|
|
"loss": 0.38548845052719116,
|
|
"step": 4072
|
|
},
|
|
{
|
|
"epoch": 2.5926161680458306,
|
|
"grad_norm": 9.073929636402251,
|
|
"learning_rate": 5.531514165761164e-07,
|
|
"loss": 0.2681104242801666,
|
|
"step": 4073
|
|
},
|
|
{
|
|
"epoch": 2.593252705283259,
|
|
"grad_norm": 9.403328586706515,
|
|
"learning_rate": 5.514592854002287e-07,
|
|
"loss": 0.6098455786705017,
|
|
"step": 4074
|
|
},
|
|
{
|
|
"epoch": 2.5938892425206874,
|
|
"grad_norm": 9.82235715727146,
|
|
"learning_rate": 5.497695952972471e-07,
|
|
"loss": 0.38505712151527405,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 2.594525779758116,
|
|
"grad_norm": 11.536931030268624,
|
|
"learning_rate": 5.480823471943625e-07,
|
|
"loss": 0.6019818782806396,
|
|
"step": 4076
|
|
},
|
|
{
|
|
"epoch": 2.5951623169955442,
|
|
"grad_norm": 8.008487992794725,
|
|
"learning_rate": 5.463975420174327e-07,
|
|
"loss": 0.5317175984382629,
|
|
"step": 4077
|
|
},
|
|
{
|
|
"epoch": 2.5957988542329726,
|
|
"grad_norm": 18.47173478084601,
|
|
"learning_rate": 5.447151806909651e-07,
|
|
"loss": 0.29498717188835144,
|
|
"step": 4078
|
|
},
|
|
{
|
|
"epoch": 2.596435391470401,
|
|
"grad_norm": 11.995029436681856,
|
|
"learning_rate": 5.430352641381348e-07,
|
|
"loss": 0.456617146730423,
|
|
"step": 4079
|
|
},
|
|
{
|
|
"epoch": 2.5970719287078294,
|
|
"grad_norm": 7.926238739595407,
|
|
"learning_rate": 5.413577932807712e-07,
|
|
"loss": 0.3908483684062958,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 2.597708465945258,
|
|
"grad_norm": 4.227522381328757,
|
|
"learning_rate": 5.396827690393624e-07,
|
|
"loss": 0.1724707931280136,
|
|
"step": 4081
|
|
},
|
|
{
|
|
"epoch": 2.5983450031826862,
|
|
"grad_norm": 11.12669868267013,
|
|
"learning_rate": 5.380101923330522e-07,
|
|
"loss": 0.504496693611145,
|
|
"step": 4082
|
|
},
|
|
{
|
|
"epoch": 2.5989815404201146,
|
|
"grad_norm": 8.202957526577867,
|
|
"learning_rate": 5.363400640796467e-07,
|
|
"loss": 0.3466747999191284,
|
|
"step": 4083
|
|
},
|
|
{
|
|
"epoch": 2.599618077657543,
|
|
"grad_norm": 11.685783801431908,
|
|
"learning_rate": 5.346723851956015e-07,
|
|
"loss": 0.7704594135284424,
|
|
"step": 4084
|
|
},
|
|
{
|
|
"epoch": 2.6002546148949714,
|
|
"grad_norm": 11.803081780401964,
|
|
"learning_rate": 5.330071565960315e-07,
|
|
"loss": 0.24229168891906738,
|
|
"step": 4085
|
|
},
|
|
{
|
|
"epoch": 2.6008911521324,
|
|
"grad_norm": 12.095848705382494,
|
|
"learning_rate": 5.313443791947093e-07,
|
|
"loss": 0.28725600242614746,
|
|
"step": 4086
|
|
},
|
|
{
|
|
"epoch": 2.6015276893698283,
|
|
"grad_norm": 6.946883519139256,
|
|
"learning_rate": 5.296840539040593e-07,
|
|
"loss": 0.35463210940361023,
|
|
"step": 4087
|
|
},
|
|
{
|
|
"epoch": 2.6021642266072567,
|
|
"grad_norm": 7.713586628251217,
|
|
"learning_rate": 5.280261816351606e-07,
|
|
"loss": 0.6036189794540405,
|
|
"step": 4088
|
|
},
|
|
{
|
|
"epoch": 2.602800763844685,
|
|
"grad_norm": 12.353688937807355,
|
|
"learning_rate": 5.26370763297746e-07,
|
|
"loss": 0.3834241032600403,
|
|
"step": 4089
|
|
},
|
|
{
|
|
"epoch": 2.6034373010821135,
|
|
"grad_norm": 20.077134309396904,
|
|
"learning_rate": 5.247177998002057e-07,
|
|
"loss": 0.7139850854873657,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 2.6040738383195414,
|
|
"grad_norm": 12.63580826802718,
|
|
"learning_rate": 5.230672920495755e-07,
|
|
"loss": 0.38600242137908936,
|
|
"step": 4091
|
|
},
|
|
{
|
|
"epoch": 2.6047103755569703,
|
|
"grad_norm": 11.58134646523883,
|
|
"learning_rate": 5.214192409515512e-07,
|
|
"loss": 0.43848711252212524,
|
|
"step": 4092
|
|
},
|
|
{
|
|
"epoch": 2.6053469127943982,
|
|
"grad_norm": 11.317996434790762,
|
|
"learning_rate": 5.197736474104759e-07,
|
|
"loss": 0.2798628807067871,
|
|
"step": 4093
|
|
},
|
|
{
|
|
"epoch": 2.605983450031827,
|
|
"grad_norm": 7.499908830645163,
|
|
"learning_rate": 5.181305123293457e-07,
|
|
"loss": 0.18069583177566528,
|
|
"step": 4094
|
|
},
|
|
{
|
|
"epoch": 2.606619987269255,
|
|
"grad_norm": 7.327706335983863,
|
|
"learning_rate": 5.164898366098065e-07,
|
|
"loss": 0.40995126962661743,
|
|
"step": 4095
|
|
},
|
|
{
|
|
"epoch": 2.607256524506684,
|
|
"grad_norm": 13.765019235257453,
|
|
"learning_rate": 5.148516211521571e-07,
|
|
"loss": 0.24555763602256775,
|
|
"step": 4096
|
|
},
|
|
{
|
|
"epoch": 2.607893061744112,
|
|
"grad_norm": 10.689174968770708,
|
|
"learning_rate": 5.132158668553439e-07,
|
|
"loss": 0.39908403158187866,
|
|
"step": 4097
|
|
},
|
|
{
|
|
"epoch": 2.6085295989815407,
|
|
"grad_norm": 14.212210735050636,
|
|
"learning_rate": 5.11582574616964e-07,
|
|
"loss": 0.4648662209510803,
|
|
"step": 4098
|
|
},
|
|
{
|
|
"epoch": 2.6091661362189686,
|
|
"grad_norm": 9.805160053637175,
|
|
"learning_rate": 5.099517453332614e-07,
|
|
"loss": 0.32188141345977783,
|
|
"step": 4099
|
|
},
|
|
{
|
|
"epoch": 2.609802673456397,
|
|
"grad_norm": 12.411383679871745,
|
|
"learning_rate": 5.083233798991333e-07,
|
|
"loss": 0.41573289036750793,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 2.6104392106938255,
|
|
"grad_norm": 11.748131290202561,
|
|
"learning_rate": 5.066974792081197e-07,
|
|
"loss": 0.24640752375125885,
|
|
"step": 4101
|
|
},
|
|
{
|
|
"epoch": 2.611075747931254,
|
|
"grad_norm": 13.19669454957644,
|
|
"learning_rate": 5.050740441524099e-07,
|
|
"loss": 0.4697602093219757,
|
|
"step": 4102
|
|
},
|
|
{
|
|
"epoch": 2.6117122851686823,
|
|
"grad_norm": 10.841348257912811,
|
|
"learning_rate": 5.034530756228423e-07,
|
|
"loss": 0.26399198174476624,
|
|
"step": 4103
|
|
},
|
|
{
|
|
"epoch": 2.6123488224061107,
|
|
"grad_norm": 9.624692964681238,
|
|
"learning_rate": 5.018345745088993e-07,
|
|
"loss": 0.5276693105697632,
|
|
"step": 4104
|
|
},
|
|
{
|
|
"epoch": 2.612985359643539,
|
|
"grad_norm": 11.806129449246459,
|
|
"learning_rate": 5.002185416987104e-07,
|
|
"loss": 1.066301941871643,
|
|
"step": 4105
|
|
},
|
|
{
|
|
"epoch": 2.6136218968809675,
|
|
"grad_norm": 8.46730554469622,
|
|
"learning_rate": 4.986049780790502e-07,
|
|
"loss": 0.2760232090950012,
|
|
"step": 4106
|
|
},
|
|
{
|
|
"epoch": 2.614258434118396,
|
|
"grad_norm": 18.65712952851169,
|
|
"learning_rate": 4.969938845353384e-07,
|
|
"loss": 0.48304522037506104,
|
|
"step": 4107
|
|
},
|
|
{
|
|
"epoch": 2.6148949713558243,
|
|
"grad_norm": 7.4577480127183495,
|
|
"learning_rate": 4.953852619516386e-07,
|
|
"loss": 0.1664918065071106,
|
|
"step": 4108
|
|
},
|
|
{
|
|
"epoch": 2.6155315085932527,
|
|
"grad_norm": 8.603236740899478,
|
|
"learning_rate": 4.937791112106616e-07,
|
|
"loss": 0.22398081421852112,
|
|
"step": 4109
|
|
},
|
|
{
|
|
"epoch": 2.616168045830681,
|
|
"grad_norm": 9.607031452896308,
|
|
"learning_rate": 4.921754331937584e-07,
|
|
"loss": 0.6455321311950684,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 2.6168045830681095,
|
|
"grad_norm": 10.595930480633525,
|
|
"learning_rate": 4.905742287809246e-07,
|
|
"loss": 0.5751909613609314,
|
|
"step": 4111
|
|
},
|
|
{
|
|
"epoch": 2.617441120305538,
|
|
"grad_norm": 8.102143643941776,
|
|
"learning_rate": 4.889754988507967e-07,
|
|
"loss": 0.255344033241272,
|
|
"step": 4112
|
|
},
|
|
{
|
|
"epoch": 2.6180776575429663,
|
|
"grad_norm": 10.077475170586256,
|
|
"learning_rate": 4.87379244280658e-07,
|
|
"loss": 0.2505664825439453,
|
|
"step": 4113
|
|
},
|
|
{
|
|
"epoch": 2.6187141947803947,
|
|
"grad_norm": 14.044138182783676,
|
|
"learning_rate": 4.857854659464262e-07,
|
|
"loss": 0.7108798623085022,
|
|
"step": 4114
|
|
},
|
|
{
|
|
"epoch": 2.619350732017823,
|
|
"grad_norm": 6.653144230731202,
|
|
"learning_rate": 4.841941647226672e-07,
|
|
"loss": 0.39315658807754517,
|
|
"step": 4115
|
|
},
|
|
{
|
|
"epoch": 2.6199872692552515,
|
|
"grad_norm": 8.55804768661834,
|
|
"learning_rate": 4.826053414825844e-07,
|
|
"loss": 0.30764633417129517,
|
|
"step": 4116
|
|
},
|
|
{
|
|
"epoch": 2.62062380649268,
|
|
"grad_norm": 13.600367121111642,
|
|
"learning_rate": 4.810189970980211e-07,
|
|
"loss": 0.31536000967025757,
|
|
"step": 4117
|
|
},
|
|
{
|
|
"epoch": 2.6212603437301083,
|
|
"grad_norm": 14.969748889955568,
|
|
"learning_rate": 4.794351324394614e-07,
|
|
"loss": 0.455342561006546,
|
|
"step": 4118
|
|
},
|
|
{
|
|
"epoch": 2.6218968809675367,
|
|
"grad_norm": 10.651339465849556,
|
|
"learning_rate": 4.778537483760271e-07,
|
|
"loss": 0.36839210987091064,
|
|
"step": 4119
|
|
},
|
|
{
|
|
"epoch": 2.622533418204965,
|
|
"grad_norm": 18.23953235309341,
|
|
"learning_rate": 4.7627484577548234e-07,
|
|
"loss": 0.932968020439148,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 2.6231699554423935,
|
|
"grad_norm": 11.322514949690214,
|
|
"learning_rate": 4.746984255042247e-07,
|
|
"loss": 0.8573684096336365,
|
|
"step": 4121
|
|
},
|
|
{
|
|
"epoch": 2.623806492679822,
|
|
"grad_norm": 9.967012324875226,
|
|
"learning_rate": 4.731244884272945e-07,
|
|
"loss": 0.1748356968164444,
|
|
"step": 4122
|
|
},
|
|
{
|
|
"epoch": 2.6244430299172503,
|
|
"grad_norm": 15.446225825378987,
|
|
"learning_rate": 4.715530354083658e-07,
|
|
"loss": 1.1506575345993042,
|
|
"step": 4123
|
|
},
|
|
{
|
|
"epoch": 2.6250795671546783,
|
|
"grad_norm": 20.610873169385787,
|
|
"learning_rate": 4.699840673097511e-07,
|
|
"loss": 0.32537931203842163,
|
|
"step": 4124
|
|
},
|
|
{
|
|
"epoch": 2.625716104392107,
|
|
"grad_norm": 11.03342668393611,
|
|
"learning_rate": 4.6841758499239887e-07,
|
|
"loss": 0.6731318235397339,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 2.626352641629535,
|
|
"grad_norm": 7.615237564334669,
|
|
"learning_rate": 4.668535893158954e-07,
|
|
"loss": 0.5569811463356018,
|
|
"step": 4126
|
|
},
|
|
{
|
|
"epoch": 2.626989178866964,
|
|
"grad_norm": 9.731183305210399,
|
|
"learning_rate": 4.6529208113845816e-07,
|
|
"loss": 0.2950124740600586,
|
|
"step": 4127
|
|
},
|
|
{
|
|
"epoch": 2.627625716104392,
|
|
"grad_norm": 9.03741441722945,
|
|
"learning_rate": 4.6373306131694493e-07,
|
|
"loss": 0.8522462844848633,
|
|
"step": 4128
|
|
},
|
|
{
|
|
"epoch": 2.6282622533418207,
|
|
"grad_norm": 11.08357409031455,
|
|
"learning_rate": 4.621765307068443e-07,
|
|
"loss": 0.4782252311706543,
|
|
"step": 4129
|
|
},
|
|
{
|
|
"epoch": 2.6288987905792487,
|
|
"grad_norm": 17.39278566316733,
|
|
"learning_rate": 4.606224901622797e-07,
|
|
"loss": 2.614020347595215,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 2.6295353278166775,
|
|
"grad_norm": 9.915714779805283,
|
|
"learning_rate": 4.5907094053600887e-07,
|
|
"loss": 0.5547182559967041,
|
|
"step": 4131
|
|
},
|
|
{
|
|
"epoch": 2.6301718650541055,
|
|
"grad_norm": 7.416640257462956,
|
|
"learning_rate": 4.575218826794231e-07,
|
|
"loss": 0.42412108182907104,
|
|
"step": 4132
|
|
},
|
|
{
|
|
"epoch": 2.630808402291534,
|
|
"grad_norm": 8.435811343186867,
|
|
"learning_rate": 4.5597531744254575e-07,
|
|
"loss": 0.2449091523885727,
|
|
"step": 4133
|
|
},
|
|
{
|
|
"epoch": 2.6314449395289623,
|
|
"grad_norm": 12.655023181613203,
|
|
"learning_rate": 4.544312456740313e-07,
|
|
"loss": 0.3620641827583313,
|
|
"step": 4134
|
|
},
|
|
{
|
|
"epoch": 2.6320814767663907,
|
|
"grad_norm": 9.13939931506395,
|
|
"learning_rate": 4.5288966822116807e-07,
|
|
"loss": 0.5226321816444397,
|
|
"step": 4135
|
|
},
|
|
{
|
|
"epoch": 2.632718014003819,
|
|
"grad_norm": 13.019124127750661,
|
|
"learning_rate": 4.5135058592987333e-07,
|
|
"loss": 0.37915101647377014,
|
|
"step": 4136
|
|
},
|
|
{
|
|
"epoch": 2.6333545512412475,
|
|
"grad_norm": 10.808420778477727,
|
|
"learning_rate": 4.498139996446976e-07,
|
|
"loss": 0.6647825837135315,
|
|
"step": 4137
|
|
},
|
|
{
|
|
"epoch": 2.633991088478676,
|
|
"grad_norm": 11.57786296368533,
|
|
"learning_rate": 4.482799102088187e-07,
|
|
"loss": 0.3755315840244293,
|
|
"step": 4138
|
|
},
|
|
{
|
|
"epoch": 2.6346276257161043,
|
|
"grad_norm": 8.875610773562085,
|
|
"learning_rate": 4.4674831846404863e-07,
|
|
"loss": 0.5440626740455627,
|
|
"step": 4139
|
|
},
|
|
{
|
|
"epoch": 2.6352641629535327,
|
|
"grad_norm": 8.752354832583505,
|
|
"learning_rate": 4.4521922525082526e-07,
|
|
"loss": 0.3065177798271179,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 2.635900700190961,
|
|
"grad_norm": 11.179851780551138,
|
|
"learning_rate": 4.436926314082163e-07,
|
|
"loss": 0.5731112957000732,
|
|
"step": 4141
|
|
},
|
|
{
|
|
"epoch": 2.6365372374283895,
|
|
"grad_norm": 9.757327221220137,
|
|
"learning_rate": 4.421685377739182e-07,
|
|
"loss": 0.2809770703315735,
|
|
"step": 4142
|
|
},
|
|
{
|
|
"epoch": 2.637173774665818,
|
|
"grad_norm": 14.298061338871626,
|
|
"learning_rate": 4.406469451842571e-07,
|
|
"loss": 0.4565696120262146,
|
|
"step": 4143
|
|
},
|
|
{
|
|
"epoch": 2.6378103119032463,
|
|
"grad_norm": 8.291246701697267,
|
|
"learning_rate": 4.3912785447418227e-07,
|
|
"loss": 0.5066615343093872,
|
|
"step": 4144
|
|
},
|
|
{
|
|
"epoch": 2.6384468491406747,
|
|
"grad_norm": 7.975907034830099,
|
|
"learning_rate": 4.376112664772747e-07,
|
|
"loss": 0.3097657561302185,
|
|
"step": 4145
|
|
},
|
|
{
|
|
"epoch": 2.639083386378103,
|
|
"grad_norm": 10.15791583988077,
|
|
"learning_rate": 4.360971820257409e-07,
|
|
"loss": 0.5922439694404602,
|
|
"step": 4146
|
|
},
|
|
{
|
|
"epoch": 2.6397199236155315,
|
|
"grad_norm": 14.877454156446959,
|
|
"learning_rate": 4.345856019504119e-07,
|
|
"loss": 0.8842718601226807,
|
|
"step": 4147
|
|
},
|
|
{
|
|
"epoch": 2.64035646085296,
|
|
"grad_norm": 10.4929514799719,
|
|
"learning_rate": 4.3307652708074465e-07,
|
|
"loss": 0.3251538872718811,
|
|
"step": 4148
|
|
},
|
|
{
|
|
"epoch": 2.6409929980903883,
|
|
"grad_norm": 14.58847937445859,
|
|
"learning_rate": 4.315699582448257e-07,
|
|
"loss": 1.2450225353240967,
|
|
"step": 4149
|
|
},
|
|
{
|
|
"epoch": 2.6416295353278167,
|
|
"grad_norm": 8.125947860689633,
|
|
"learning_rate": 4.300658962693588e-07,
|
|
"loss": 0.48094889521598816,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 2.642266072565245,
|
|
"grad_norm": 16.411867779273845,
|
|
"learning_rate": 4.285643419796798e-07,
|
|
"loss": 0.7015743851661682,
|
|
"step": 4151
|
|
},
|
|
{
|
|
"epoch": 2.6429026098026736,
|
|
"grad_norm": 9.545006001252185,
|
|
"learning_rate": 4.2706529619974345e-07,
|
|
"loss": 0.665250301361084,
|
|
"step": 4152
|
|
},
|
|
{
|
|
"epoch": 2.643539147040102,
|
|
"grad_norm": 14.62170973169928,
|
|
"learning_rate": 4.255687597521302e-07,
|
|
"loss": 0.5696743726730347,
|
|
"step": 4153
|
|
},
|
|
{
|
|
"epoch": 2.6441756842775304,
|
|
"grad_norm": 16.869967323632615,
|
|
"learning_rate": 4.240747334580425e-07,
|
|
"loss": 0.5156615376472473,
|
|
"step": 4154
|
|
},
|
|
{
|
|
"epoch": 2.6448122215149588,
|
|
"grad_norm": 11.516758088803265,
|
|
"learning_rate": 4.225832181373052e-07,
|
|
"loss": 0.2613343596458435,
|
|
"step": 4155
|
|
},
|
|
{
|
|
"epoch": 2.645448758752387,
|
|
"grad_norm": 14.348577838654752,
|
|
"learning_rate": 4.210942146083685e-07,
|
|
"loss": 0.4780927300453186,
|
|
"step": 4156
|
|
},
|
|
{
|
|
"epoch": 2.6460852959898156,
|
|
"grad_norm": 12.026397828025917,
|
|
"learning_rate": 4.1960772368829837e-07,
|
|
"loss": 0.2942112684249878,
|
|
"step": 4157
|
|
},
|
|
{
|
|
"epoch": 2.646721833227244,
|
|
"grad_norm": 10.600619397365858,
|
|
"learning_rate": 4.1812374619278785e-07,
|
|
"loss": 0.20896536111831665,
|
|
"step": 4158
|
|
},
|
|
{
|
|
"epoch": 2.647358370464672,
|
|
"grad_norm": 10.57984621363277,
|
|
"learning_rate": 4.166422829361477e-07,
|
|
"loss": 0.592284083366394,
|
|
"step": 4159
|
|
},
|
|
{
|
|
"epoch": 2.6479949077021008,
|
|
"grad_norm": 7.535197852927329,
|
|
"learning_rate": 4.1516333473130886e-07,
|
|
"loss": 0.201694056391716,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 2.6486314449395287,
|
|
"grad_norm": 7.352041767336546,
|
|
"learning_rate": 4.1368690238982323e-07,
|
|
"loss": 0.34570127725601196,
|
|
"step": 4161
|
|
},
|
|
{
|
|
"epoch": 2.6492679821769576,
|
|
"grad_norm": 9.610957600875114,
|
|
"learning_rate": 4.122129867218627e-07,
|
|
"loss": 0.5658895969390869,
|
|
"step": 4162
|
|
},
|
|
{
|
|
"epoch": 2.6499045194143855,
|
|
"grad_norm": 8.540151651713604,
|
|
"learning_rate": 4.1074158853621704e-07,
|
|
"loss": 0.5704756379127502,
|
|
"step": 4163
|
|
},
|
|
{
|
|
"epoch": 2.6505410566518144,
|
|
"grad_norm": 14.217319486489174,
|
|
"learning_rate": 4.092727086402942e-07,
|
|
"loss": 0.2871258556842804,
|
|
"step": 4164
|
|
},
|
|
{
|
|
"epoch": 2.6511775938892423,
|
|
"grad_norm": 11.297679532923324,
|
|
"learning_rate": 4.078063478401212e-07,
|
|
"loss": 0.6045399904251099,
|
|
"step": 4165
|
|
},
|
|
{
|
|
"epoch": 2.6518141311266707,
|
|
"grad_norm": 12.20227079381005,
|
|
"learning_rate": 4.063425069403437e-07,
|
|
"loss": 1.1303468942642212,
|
|
"step": 4166
|
|
},
|
|
{
|
|
"epoch": 2.652450668364099,
|
|
"grad_norm": 9.686851935712701,
|
|
"learning_rate": 4.048811867442215e-07,
|
|
"loss": 0.31758686900138855,
|
|
"step": 4167
|
|
},
|
|
{
|
|
"epoch": 2.6530872056015276,
|
|
"grad_norm": 8.068110064996098,
|
|
"learning_rate": 4.034223880536342e-07,
|
|
"loss": 0.31641390919685364,
|
|
"step": 4168
|
|
},
|
|
{
|
|
"epoch": 2.653723742838956,
|
|
"grad_norm": 8.874594750065006,
|
|
"learning_rate": 4.0196611166907764e-07,
|
|
"loss": 0.3589363098144531,
|
|
"step": 4169
|
|
},
|
|
{
|
|
"epoch": 2.6543602800763844,
|
|
"grad_norm": 14.345723374561224,
|
|
"learning_rate": 4.0051235838965973e-07,
|
|
"loss": 0.3416450023651123,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 2.6549968173138128,
|
|
"grad_norm": 11.228361720735702,
|
|
"learning_rate": 3.990611290131091e-07,
|
|
"loss": 0.5151109099388123,
|
|
"step": 4171
|
|
},
|
|
{
|
|
"epoch": 2.655633354551241,
|
|
"grad_norm": 8.780694002750788,
|
|
"learning_rate": 3.9761242433576595e-07,
|
|
"loss": 0.31821924448013306,
|
|
"step": 4172
|
|
},
|
|
{
|
|
"epoch": 2.6562698917886696,
|
|
"grad_norm": 18.539933039403408,
|
|
"learning_rate": 3.961662451525872e-07,
|
|
"loss": 0.5817490220069885,
|
|
"step": 4173
|
|
},
|
|
{
|
|
"epoch": 2.656906429026098,
|
|
"grad_norm": 13.025868195763747,
|
|
"learning_rate": 3.9472259225714127e-07,
|
|
"loss": 0.27606385946273804,
|
|
"step": 4174
|
|
},
|
|
{
|
|
"epoch": 2.6575429662635264,
|
|
"grad_norm": 8.506420635098863,
|
|
"learning_rate": 3.9328146644161403e-07,
|
|
"loss": 0.28132539987564087,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 2.6581795035009548,
|
|
"grad_norm": 12.089889238567629,
|
|
"learning_rate": 3.918428684968012e-07,
|
|
"loss": 0.41118401288986206,
|
|
"step": 4176
|
|
},
|
|
{
|
|
"epoch": 2.658816040738383,
|
|
"grad_norm": 14.3359741660011,
|
|
"learning_rate": 3.9040679921211356e-07,
|
|
"loss": 0.8642361760139465,
|
|
"step": 4177
|
|
},
|
|
{
|
|
"epoch": 2.6594525779758116,
|
|
"grad_norm": 10.318794332910182,
|
|
"learning_rate": 3.889732593755724e-07,
|
|
"loss": 0.7048256993293762,
|
|
"step": 4178
|
|
},
|
|
{
|
|
"epoch": 2.66008911521324,
|
|
"grad_norm": 8.504319948271643,
|
|
"learning_rate": 3.875422497738146e-07,
|
|
"loss": 0.5382933616638184,
|
|
"step": 4179
|
|
},
|
|
{
|
|
"epoch": 2.6607256524506684,
|
|
"grad_norm": 9.842494108262397,
|
|
"learning_rate": 3.8611377119208206e-07,
|
|
"loss": 0.29906243085861206,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 2.661362189688097,
|
|
"grad_norm": 11.093492258647938,
|
|
"learning_rate": 3.8468782441423604e-07,
|
|
"loss": 0.30749762058258057,
|
|
"step": 4181
|
|
},
|
|
{
|
|
"epoch": 2.661998726925525,
|
|
"grad_norm": 8.551960698376003,
|
|
"learning_rate": 3.832644102227423e-07,
|
|
"loss": 0.37611305713653564,
|
|
"step": 4182
|
|
},
|
|
{
|
|
"epoch": 2.6626352641629536,
|
|
"grad_norm": 10.240978386480732,
|
|
"learning_rate": 3.8184352939867885e-07,
|
|
"loss": 0.3026156723499298,
|
|
"step": 4183
|
|
},
|
|
{
|
|
"epoch": 2.663271801400382,
|
|
"grad_norm": 12.51845793159237,
|
|
"learning_rate": 3.8042518272173366e-07,
|
|
"loss": 0.391543984413147,
|
|
"step": 4184
|
|
},
|
|
{
|
|
"epoch": 2.6639083386378104,
|
|
"grad_norm": 13.412053496998116,
|
|
"learning_rate": 3.790093709702053e-07,
|
|
"loss": 0.32294613122940063,
|
|
"step": 4185
|
|
},
|
|
{
|
|
"epoch": 2.664544875875239,
|
|
"grad_norm": 12.542049477092295,
|
|
"learning_rate": 3.775960949209995e-07,
|
|
"loss": 0.687692403793335,
|
|
"step": 4186
|
|
},
|
|
{
|
|
"epoch": 2.665181413112667,
|
|
"grad_norm": 13.725429694562523,
|
|
"learning_rate": 3.761853553496314e-07,
|
|
"loss": 0.4429126977920532,
|
|
"step": 4187
|
|
},
|
|
{
|
|
"epoch": 2.6658179503500956,
|
|
"grad_norm": 17.929115713521966,
|
|
"learning_rate": 3.7477715303022353e-07,
|
|
"loss": 0.8667738437652588,
|
|
"step": 4188
|
|
},
|
|
{
|
|
"epoch": 2.666454487587524,
|
|
"grad_norm": 9.416903155413737,
|
|
"learning_rate": 3.733714887355072e-07,
|
|
"loss": 0.44168245792388916,
|
|
"step": 4189
|
|
},
|
|
{
|
|
"epoch": 2.6670910248249524,
|
|
"grad_norm": 15.632077153500358,
|
|
"learning_rate": 3.719683632368204e-07,
|
|
"loss": 0.6333277225494385,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 2.667727562062381,
|
|
"grad_norm": 11.312890890394607,
|
|
"learning_rate": 3.7056777730410664e-07,
|
|
"loss": 0.9854741096496582,
|
|
"step": 4191
|
|
},
|
|
{
|
|
"epoch": 2.668364099299809,
|
|
"grad_norm": 7.932323143278352,
|
|
"learning_rate": 3.6916973170592117e-07,
|
|
"loss": 0.3559843897819519,
|
|
"step": 4192
|
|
},
|
|
{
|
|
"epoch": 2.6690006365372376,
|
|
"grad_norm": 8.839586522841298,
|
|
"learning_rate": 3.6777422720941704e-07,
|
|
"loss": 0.4284633696079254,
|
|
"step": 4193
|
|
},
|
|
{
|
|
"epoch": 2.6696371737746656,
|
|
"grad_norm": 11.759073839521456,
|
|
"learning_rate": 3.663812645803605e-07,
|
|
"loss": 0.7548391819000244,
|
|
"step": 4194
|
|
},
|
|
{
|
|
"epoch": 2.6702737110120944,
|
|
"grad_norm": 10.435700302664355,
|
|
"learning_rate": 3.6499084458311853e-07,
|
|
"loss": 0.5070865750312805,
|
|
"step": 4195
|
|
},
|
|
{
|
|
"epoch": 2.6709102482495224,
|
|
"grad_norm": 13.789427295975079,
|
|
"learning_rate": 3.6360296798066464e-07,
|
|
"loss": 0.5341179966926575,
|
|
"step": 4196
|
|
},
|
|
{
|
|
"epoch": 2.6715467854869512,
|
|
"grad_norm": 12.21163817391416,
|
|
"learning_rate": 3.622176355345747e-07,
|
|
"loss": 0.3842467963695526,
|
|
"step": 4197
|
|
},
|
|
{
|
|
"epoch": 2.672183322724379,
|
|
"grad_norm": 14.848081871873784,
|
|
"learning_rate": 3.6083484800503163e-07,
|
|
"loss": 0.8403598070144653,
|
|
"step": 4198
|
|
},
|
|
{
|
|
"epoch": 2.672819859961808,
|
|
"grad_norm": 11.371899569255133,
|
|
"learning_rate": 3.594546061508203e-07,
|
|
"loss": 0.3693830370903015,
|
|
"step": 4199
|
|
},
|
|
{
|
|
"epoch": 2.673456397199236,
|
|
"grad_norm": 17.943021819553415,
|
|
"learning_rate": 3.5807691072932803e-07,
|
|
"loss": 0.7598447799682617,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 2.6740929344366644,
|
|
"grad_norm": 28.16227522314146,
|
|
"learning_rate": 3.567017624965452e-07,
|
|
"loss": 0.6277492046356201,
|
|
"step": 4201
|
|
},
|
|
{
|
|
"epoch": 2.674729471674093,
|
|
"grad_norm": 16.85459217174598,
|
|
"learning_rate": 3.553291622070659e-07,
|
|
"loss": 0.45080533623695374,
|
|
"step": 4202
|
|
},
|
|
{
|
|
"epoch": 2.675366008911521,
|
|
"grad_norm": 7.95395586381539,
|
|
"learning_rate": 3.539591106140827e-07,
|
|
"loss": 0.2938351631164551,
|
|
"step": 4203
|
|
},
|
|
{
|
|
"epoch": 2.6760025461489496,
|
|
"grad_norm": 12.044280872279797,
|
|
"learning_rate": 3.5259160846939423e-07,
|
|
"loss": 0.2124682068824768,
|
|
"step": 4204
|
|
},
|
|
{
|
|
"epoch": 2.676639083386378,
|
|
"grad_norm": 14.932533595284871,
|
|
"learning_rate": 3.5122665652339574e-07,
|
|
"loss": 0.4682321548461914,
|
|
"step": 4205
|
|
},
|
|
{
|
|
"epoch": 2.6772756206238064,
|
|
"grad_norm": 10.475507395361465,
|
|
"learning_rate": 3.498642555250864e-07,
|
|
"loss": 0.21980136632919312,
|
|
"step": 4206
|
|
},
|
|
{
|
|
"epoch": 2.677912157861235,
|
|
"grad_norm": 14.50840416951671,
|
|
"learning_rate": 3.4850440622206326e-07,
|
|
"loss": 0.731866717338562,
|
|
"step": 4207
|
|
},
|
|
{
|
|
"epoch": 2.6785486950986632,
|
|
"grad_norm": 12.09856820155982,
|
|
"learning_rate": 3.471471093605233e-07,
|
|
"loss": 0.272088885307312,
|
|
"step": 4208
|
|
},
|
|
{
|
|
"epoch": 2.6791852323360916,
|
|
"grad_norm": 11.76734698312069,
|
|
"learning_rate": 3.4579236568526684e-07,
|
|
"loss": 0.9739639759063721,
|
|
"step": 4209
|
|
},
|
|
{
|
|
"epoch": 2.67982176957352,
|
|
"grad_norm": 14.33233822163741,
|
|
"learning_rate": 3.4444017593968647e-07,
|
|
"loss": 1.1476022005081177,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 2.6804583068109484,
|
|
"grad_norm": 12.152674084089535,
|
|
"learning_rate": 3.430905408657792e-07,
|
|
"loss": 0.39027106761932373,
|
|
"step": 4211
|
|
},
|
|
{
|
|
"epoch": 2.681094844048377,
|
|
"grad_norm": 10.30755416028074,
|
|
"learning_rate": 3.417434612041376e-07,
|
|
"loss": 0.7856820225715637,
|
|
"step": 4212
|
|
},
|
|
{
|
|
"epoch": 2.6817313812858052,
|
|
"grad_norm": 8.167847327382658,
|
|
"learning_rate": 3.403989376939515e-07,
|
|
"loss": 0.32235854864120483,
|
|
"step": 4213
|
|
},
|
|
{
|
|
"epoch": 2.6823679185232336,
|
|
"grad_norm": 13.740937395800405,
|
|
"learning_rate": 3.390569710730085e-07,
|
|
"loss": 0.30362454056739807,
|
|
"step": 4214
|
|
},
|
|
{
|
|
"epoch": 2.683004455760662,
|
|
"grad_norm": 9.93572791846366,
|
|
"learning_rate": 3.3771756207769567e-07,
|
|
"loss": 0.4014972448348999,
|
|
"step": 4215
|
|
},
|
|
{
|
|
"epoch": 2.6836409929980904,
|
|
"grad_norm": 16.83983652036534,
|
|
"learning_rate": 3.3638071144299176e-07,
|
|
"loss": 0.42258787155151367,
|
|
"step": 4216
|
|
},
|
|
{
|
|
"epoch": 2.684277530235519,
|
|
"grad_norm": 8.89934446769384,
|
|
"learning_rate": 3.3504641990247724e-07,
|
|
"loss": 0.5550803542137146,
|
|
"step": 4217
|
|
},
|
|
{
|
|
"epoch": 2.6849140674729473,
|
|
"grad_norm": 11.28235054693943,
|
|
"learning_rate": 3.3371468818832295e-07,
|
|
"loss": 0.3023662269115448,
|
|
"step": 4218
|
|
},
|
|
{
|
|
"epoch": 2.6855506047103757,
|
|
"grad_norm": 10.598756471075994,
|
|
"learning_rate": 3.3238551703129894e-07,
|
|
"loss": 0.5563198328018188,
|
|
"step": 4219
|
|
},
|
|
{
|
|
"epoch": 2.686187141947804,
|
|
"grad_norm": 10.170743643512592,
|
|
"learning_rate": 3.310589071607678e-07,
|
|
"loss": 0.2445223331451416,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 2.6868236791852325,
|
|
"grad_norm": 16.22936474685597,
|
|
"learning_rate": 3.2973485930468953e-07,
|
|
"loss": 0.3825610876083374,
|
|
"step": 4221
|
|
},
|
|
{
|
|
"epoch": 2.687460216422661,
|
|
"grad_norm": 9.90879097876087,
|
|
"learning_rate": 3.284133741896156e-07,
|
|
"loss": 0.3859259784221649,
|
|
"step": 4222
|
|
},
|
|
{
|
|
"epoch": 2.6880967536600893,
|
|
"grad_norm": 11.884020046231964,
|
|
"learning_rate": 3.2709445254068993e-07,
|
|
"loss": 0.42776572704315186,
|
|
"step": 4223
|
|
},
|
|
{
|
|
"epoch": 2.6887332908975177,
|
|
"grad_norm": 14.177835263166626,
|
|
"learning_rate": 3.257780950816547e-07,
|
|
"loss": 0.34132957458496094,
|
|
"step": 4224
|
|
},
|
|
{
|
|
"epoch": 2.6893698281349456,
|
|
"grad_norm": 10.709825078803748,
|
|
"learning_rate": 3.244643025348404e-07,
|
|
"loss": 0.45440587401390076,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 2.6900063653723745,
|
|
"grad_norm": 10.881942645660274,
|
|
"learning_rate": 3.2315307562117316e-07,
|
|
"loss": 0.45042353868484497,
|
|
"step": 4226
|
|
},
|
|
{
|
|
"epoch": 2.6906429026098024,
|
|
"grad_norm": 12.177737578153689,
|
|
"learning_rate": 3.2184441506016793e-07,
|
|
"loss": 0.26608598232269287,
|
|
"step": 4227
|
|
},
|
|
{
|
|
"epoch": 2.6912794398472313,
|
|
"grad_norm": 9.865083306888648,
|
|
"learning_rate": 3.205383215699359e-07,
|
|
"loss": 0.20638345181941986,
|
|
"step": 4228
|
|
},
|
|
{
|
|
"epoch": 2.6919159770846592,
|
|
"grad_norm": 9.930389339260044,
|
|
"learning_rate": 3.19234795867176e-07,
|
|
"loss": 0.5358873605728149,
|
|
"step": 4229
|
|
},
|
|
{
|
|
"epoch": 2.692552514322088,
|
|
"grad_norm": 8.006867762632641,
|
|
"learning_rate": 3.179338386671788e-07,
|
|
"loss": 0.24539242684841156,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 2.693189051559516,
|
|
"grad_norm": 11.262185953320438,
|
|
"learning_rate": 3.1663545068382597e-07,
|
|
"loss": 0.24088186025619507,
|
|
"step": 4231
|
|
},
|
|
{
|
|
"epoch": 2.693825588796945,
|
|
"grad_norm": 11.996875378641526,
|
|
"learning_rate": 3.15339632629591e-07,
|
|
"loss": 0.6157453656196594,
|
|
"step": 4232
|
|
},
|
|
{
|
|
"epoch": 2.694462126034373,
|
|
"grad_norm": 12.755239058966195,
|
|
"learning_rate": 3.140463852155329e-07,
|
|
"loss": 0.6274869441986084,
|
|
"step": 4233
|
|
},
|
|
{
|
|
"epoch": 2.6950986632718013,
|
|
"grad_norm": 9.94810465609412,
|
|
"learning_rate": 3.127557091513045e-07,
|
|
"loss": 0.5132277011871338,
|
|
"step": 4234
|
|
},
|
|
{
|
|
"epoch": 2.6957352005092297,
|
|
"grad_norm": 10.532205631405654,
|
|
"learning_rate": 3.1146760514514483e-07,
|
|
"loss": 0.721315324306488,
|
|
"step": 4235
|
|
},
|
|
{
|
|
"epoch": 2.696371737746658,
|
|
"grad_norm": 12.06851584523107,
|
|
"learning_rate": 3.1018207390388357e-07,
|
|
"loss": 0.5017930865287781,
|
|
"step": 4236
|
|
},
|
|
{
|
|
"epoch": 2.6970082749840865,
|
|
"grad_norm": 9.68712290443339,
|
|
"learning_rate": 3.088991161329352e-07,
|
|
"loss": 0.6609745025634766,
|
|
"step": 4237
|
|
},
|
|
{
|
|
"epoch": 2.697644812221515,
|
|
"grad_norm": 6.394919682690512,
|
|
"learning_rate": 3.076187325363078e-07,
|
|
"loss": 0.17533256113529205,
|
|
"step": 4238
|
|
},
|
|
{
|
|
"epoch": 2.6982813494589433,
|
|
"grad_norm": 10.203306521485871,
|
|
"learning_rate": 3.06340923816591e-07,
|
|
"loss": 0.24491241574287415,
|
|
"step": 4239
|
|
},
|
|
{
|
|
"epoch": 2.6989178866963717,
|
|
"grad_norm": 15.974156481461375,
|
|
"learning_rate": 3.050656906749638e-07,
|
|
"loss": 0.36442863941192627,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 2.6995544239338,
|
|
"grad_norm": 8.844978133477396,
|
|
"learning_rate": 3.0379303381119386e-07,
|
|
"loss": 0.3554074168205261,
|
|
"step": 4241
|
|
},
|
|
{
|
|
"epoch": 2.7001909611712285,
|
|
"grad_norm": 11.394578376951932,
|
|
"learning_rate": 3.025229539236324e-07,
|
|
"loss": 0.4078906774520874,
|
|
"step": 4242
|
|
},
|
|
{
|
|
"epoch": 2.700827498408657,
|
|
"grad_norm": 7.115448073721455,
|
|
"learning_rate": 3.0125545170921756e-07,
|
|
"loss": 0.29125985503196716,
|
|
"step": 4243
|
|
},
|
|
{
|
|
"epoch": 2.7014640356460853,
|
|
"grad_norm": 14.513988491089108,
|
|
"learning_rate": 2.9999052786347236e-07,
|
|
"loss": 0.7354227304458618,
|
|
"step": 4244
|
|
},
|
|
{
|
|
"epoch": 2.7021005728835137,
|
|
"grad_norm": 12.27832873637522,
|
|
"learning_rate": 2.987281830805078e-07,
|
|
"loss": 0.5535342693328857,
|
|
"step": 4245
|
|
},
|
|
{
|
|
"epoch": 2.702737110120942,
|
|
"grad_norm": 10.623514225123696,
|
|
"learning_rate": 2.9746841805301464e-07,
|
|
"loss": 0.4273834824562073,
|
|
"step": 4246
|
|
},
|
|
{
|
|
"epoch": 2.7033736473583705,
|
|
"grad_norm": 7.716892779508859,
|
|
"learning_rate": 2.9621123347227275e-07,
|
|
"loss": 0.9452025890350342,
|
|
"step": 4247
|
|
},
|
|
{
|
|
"epoch": 2.704010184595799,
|
|
"grad_norm": 6.920436913369039,
|
|
"learning_rate": 2.9495663002814367e-07,
|
|
"loss": 0.1669454574584961,
|
|
"step": 4248
|
|
},
|
|
{
|
|
"epoch": 2.7046467218332273,
|
|
"grad_norm": 9.880936132825969,
|
|
"learning_rate": 2.937046084090733e-07,
|
|
"loss": 0.5540735721588135,
|
|
"step": 4249
|
|
},
|
|
{
|
|
"epoch": 2.7052832590706557,
|
|
"grad_norm": 9.150729785239252,
|
|
"learning_rate": 2.924551693020894e-07,
|
|
"loss": 0.33344244956970215,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 2.705919796308084,
|
|
"grad_norm": 9.323586923127733,
|
|
"learning_rate": 2.9120831339280506e-07,
|
|
"loss": 0.668574333190918,
|
|
"step": 4251
|
|
},
|
|
{
|
|
"epoch": 2.7065563335455125,
|
|
"grad_norm": 6.744282662840181,
|
|
"learning_rate": 2.8996404136541423e-07,
|
|
"loss": 0.1677035689353943,
|
|
"step": 4252
|
|
},
|
|
{
|
|
"epoch": 2.707192870782941,
|
|
"grad_norm": 11.268284309249525,
|
|
"learning_rate": 2.887223539026934e-07,
|
|
"loss": 1.3707690238952637,
|
|
"step": 4253
|
|
},
|
|
{
|
|
"epoch": 2.7078294080203693,
|
|
"grad_norm": 13.204599760495604,
|
|
"learning_rate": 2.874832516859999e-07,
|
|
"loss": 0.48145592212677,
|
|
"step": 4254
|
|
},
|
|
{
|
|
"epoch": 2.7084659452577977,
|
|
"grad_norm": 11.43020400431346,
|
|
"learning_rate": 2.862467353952747e-07,
|
|
"loss": 0.342043936252594,
|
|
"step": 4255
|
|
},
|
|
{
|
|
"epoch": 2.709102482495226,
|
|
"grad_norm": 10.141017242865624,
|
|
"learning_rate": 2.8501280570903735e-07,
|
|
"loss": 0.2240038812160492,
|
|
"step": 4256
|
|
},
|
|
{
|
|
"epoch": 2.7097390197326545,
|
|
"grad_norm": 12.12872544336196,
|
|
"learning_rate": 2.8378146330438836e-07,
|
|
"loss": 0.5272125601768494,
|
|
"step": 4257
|
|
},
|
|
{
|
|
"epoch": 2.7103755569700825,
|
|
"grad_norm": 6.964906546317959,
|
|
"learning_rate": 2.8255270885701235e-07,
|
|
"loss": 0.3645150363445282,
|
|
"step": 4258
|
|
},
|
|
{
|
|
"epoch": 2.7110120942075113,
|
|
"grad_norm": 9.799382317823582,
|
|
"learning_rate": 2.813265430411666e-07,
|
|
"loss": 0.617933988571167,
|
|
"step": 4259
|
|
},
|
|
{
|
|
"epoch": 2.7116486314449393,
|
|
"grad_norm": 15.829285612557603,
|
|
"learning_rate": 2.801029665296945e-07,
|
|
"loss": 0.46067652106285095,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 2.712285168682368,
|
|
"grad_norm": 8.74206547069849,
|
|
"learning_rate": 2.7888197999401624e-07,
|
|
"loss": 0.4388101100921631,
|
|
"step": 4261
|
|
},
|
|
{
|
|
"epoch": 2.712921705919796,
|
|
"grad_norm": 10.588534515353642,
|
|
"learning_rate": 2.776635841041292e-07,
|
|
"loss": 0.2808324992656708,
|
|
"step": 4262
|
|
},
|
|
{
|
|
"epoch": 2.713558243157225,
|
|
"grad_norm": 9.079775389707791,
|
|
"learning_rate": 2.7644777952861135e-07,
|
|
"loss": 0.6054136753082275,
|
|
"step": 4263
|
|
},
|
|
{
|
|
"epoch": 2.714194780394653,
|
|
"grad_norm": 8.092659107903318,
|
|
"learning_rate": 2.752345669346185e-07,
|
|
"loss": 0.6175906658172607,
|
|
"step": 4264
|
|
},
|
|
{
|
|
"epoch": 2.7148313176320817,
|
|
"grad_norm": 6.813250221303636,
|
|
"learning_rate": 2.740239469878836e-07,
|
|
"loss": 0.6342043876647949,
|
|
"step": 4265
|
|
},
|
|
{
|
|
"epoch": 2.7154678548695097,
|
|
"grad_norm": 12.294422459487345,
|
|
"learning_rate": 2.7281592035271643e-07,
|
|
"loss": 0.6270891427993774,
|
|
"step": 4266
|
|
},
|
|
{
|
|
"epoch": 2.716104392106938,
|
|
"grad_norm": 23.360853471957164,
|
|
"learning_rate": 2.7161048769200337e-07,
|
|
"loss": 0.4364585280418396,
|
|
"step": 4267
|
|
},
|
|
{
|
|
"epoch": 2.7167409293443665,
|
|
"grad_norm": 10.32664074659213,
|
|
"learning_rate": 2.7040764966721144e-07,
|
|
"loss": 0.5619296431541443,
|
|
"step": 4268
|
|
},
|
|
{
|
|
"epoch": 2.717377466581795,
|
|
"grad_norm": 18.76663824164998,
|
|
"learning_rate": 2.692074069383771e-07,
|
|
"loss": 0.8377590179443359,
|
|
"step": 4269
|
|
},
|
|
{
|
|
"epoch": 2.7180140038192233,
|
|
"grad_norm": 12.19355629888269,
|
|
"learning_rate": 2.6800976016411904e-07,
|
|
"loss": 0.4755368232727051,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 2.7186505410566517,
|
|
"grad_norm": 11.71763158935508,
|
|
"learning_rate": 2.668147100016272e-07,
|
|
"loss": 0.24313849210739136,
|
|
"step": 4271
|
|
},
|
|
{
|
|
"epoch": 2.71928707829408,
|
|
"grad_norm": 20.84674385099233,
|
|
"learning_rate": 2.6562225710666855e-07,
|
|
"loss": 0.8417501449584961,
|
|
"step": 4272
|
|
},
|
|
{
|
|
"epoch": 2.7199236155315085,
|
|
"grad_norm": 8.944673335448611,
|
|
"learning_rate": 2.644324021335853e-07,
|
|
"loss": 0.24403586983680725,
|
|
"step": 4273
|
|
},
|
|
{
|
|
"epoch": 2.720560152768937,
|
|
"grad_norm": 15.081005478574195,
|
|
"learning_rate": 2.6324514573529135e-07,
|
|
"loss": 0.3433034121990204,
|
|
"step": 4274
|
|
},
|
|
{
|
|
"epoch": 2.7211966900063653,
|
|
"grad_norm": 12.717091166830789,
|
|
"learning_rate": 2.620604885632799e-07,
|
|
"loss": 0.30776041746139526,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 2.7218332272437937,
|
|
"grad_norm": 9.233628456958835,
|
|
"learning_rate": 2.608784312676105e-07,
|
|
"loss": 0.6768572330474854,
|
|
"step": 4276
|
|
},
|
|
{
|
|
"epoch": 2.722469764481222,
|
|
"grad_norm": 8.716046991275995,
|
|
"learning_rate": 2.596989744969236e-07,
|
|
"loss": 0.30308881402015686,
|
|
"step": 4277
|
|
},
|
|
{
|
|
"epoch": 2.7231063017186505,
|
|
"grad_norm": 9.640686155457239,
|
|
"learning_rate": 2.5852211889842717e-07,
|
|
"loss": 0.2957676947116852,
|
|
"step": 4278
|
|
},
|
|
{
|
|
"epoch": 2.723742838956079,
|
|
"grad_norm": 14.9960780673145,
|
|
"learning_rate": 2.5734786511790513e-07,
|
|
"loss": 0.3485487103462219,
|
|
"step": 4279
|
|
},
|
|
{
|
|
"epoch": 2.7243793761935073,
|
|
"grad_norm": 12.30043852886103,
|
|
"learning_rate": 2.561762137997109e-07,
|
|
"loss": 0.557969331741333,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 2.7250159134309357,
|
|
"grad_norm": 13.250766622765129,
|
|
"learning_rate": 2.5500716558677307e-07,
|
|
"loss": 0.5280587673187256,
|
|
"step": 4281
|
|
},
|
|
{
|
|
"epoch": 2.725652450668364,
|
|
"grad_norm": 8.648265992310122,
|
|
"learning_rate": 2.5384072112058845e-07,
|
|
"loss": 0.3716697692871094,
|
|
"step": 4282
|
|
},
|
|
{
|
|
"epoch": 2.7262889879057925,
|
|
"grad_norm": 7.810171136937785,
|
|
"learning_rate": 2.5267688104122777e-07,
|
|
"loss": 0.2588565945625305,
|
|
"step": 4283
|
|
},
|
|
{
|
|
"epoch": 2.726925525143221,
|
|
"grad_norm": 11.040988609947247,
|
|
"learning_rate": 2.5151564598733135e-07,
|
|
"loss": 0.6797652244567871,
|
|
"step": 4284
|
|
},
|
|
{
|
|
"epoch": 2.7275620623806494,
|
|
"grad_norm": 9.569822238643798,
|
|
"learning_rate": 2.5035701659611e-07,
|
|
"loss": 1.268112063407898,
|
|
"step": 4285
|
|
},
|
|
{
|
|
"epoch": 2.7281985996180778,
|
|
"grad_norm": 10.1928489327968,
|
|
"learning_rate": 2.4920099350334513e-07,
|
|
"loss": 0.5419979095458984,
|
|
"step": 4286
|
|
},
|
|
{
|
|
"epoch": 2.728835136855506,
|
|
"grad_norm": 13.988082334266707,
|
|
"learning_rate": 2.480475773433877e-07,
|
|
"loss": 0.7767527103424072,
|
|
"step": 4287
|
|
},
|
|
{
|
|
"epoch": 2.7294716740929346,
|
|
"grad_norm": 12.854143263048222,
|
|
"learning_rate": 2.4689676874915913e-07,
|
|
"loss": 0.9870243072509766,
|
|
"step": 4288
|
|
},
|
|
{
|
|
"epoch": 2.730108211330363,
|
|
"grad_norm": 18.57218071967357,
|
|
"learning_rate": 2.457485683521482e-07,
|
|
"loss": 1.311352252960205,
|
|
"step": 4289
|
|
},
|
|
{
|
|
"epoch": 2.7307447485677914,
|
|
"grad_norm": 12.492913638813363,
|
|
"learning_rate": 2.4460297678241376e-07,
|
|
"loss": 0.7109584808349609,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 2.7313812858052198,
|
|
"grad_norm": 12.692789088860808,
|
|
"learning_rate": 2.434599946685823e-07,
|
|
"loss": 0.26433488726615906,
|
|
"step": 4291
|
|
},
|
|
{
|
|
"epoch": 2.732017823042648,
|
|
"grad_norm": 17.792228239365656,
|
|
"learning_rate": 2.4231962263784934e-07,
|
|
"loss": 0.3746406137943268,
|
|
"step": 4292
|
|
},
|
|
{
|
|
"epoch": 2.732654360280076,
|
|
"grad_norm": 17.318376070131,
|
|
"learning_rate": 2.4118186131597666e-07,
|
|
"loss": 0.3980744183063507,
|
|
"step": 4293
|
|
},
|
|
{
|
|
"epoch": 2.733290897517505,
|
|
"grad_norm": 10.790409083983436,
|
|
"learning_rate": 2.400467113272964e-07,
|
|
"loss": 0.5603748559951782,
|
|
"step": 4294
|
|
},
|
|
{
|
|
"epoch": 2.733927434754933,
|
|
"grad_norm": 10.64101332957832,
|
|
"learning_rate": 2.389141732947048e-07,
|
|
"loss": 0.45091766119003296,
|
|
"step": 4295
|
|
},
|
|
{
|
|
"epoch": 2.734563971992362,
|
|
"grad_norm": 10.629186706240326,
|
|
"learning_rate": 2.377842478396658e-07,
|
|
"loss": 0.516888439655304,
|
|
"step": 4296
|
|
},
|
|
{
|
|
"epoch": 2.7352005092297897,
|
|
"grad_norm": 7.721465776912218,
|
|
"learning_rate": 2.3665693558220958e-07,
|
|
"loss": 0.3436046838760376,
|
|
"step": 4297
|
|
},
|
|
{
|
|
"epoch": 2.7358370464672186,
|
|
"grad_norm": 20.292564735940314,
|
|
"learning_rate": 2.3553223714093298e-07,
|
|
"loss": 0.2613334655761719,
|
|
"step": 4298
|
|
},
|
|
{
|
|
"epoch": 2.7364735837046466,
|
|
"grad_norm": 12.793194491300461,
|
|
"learning_rate": 2.3441015313299732e-07,
|
|
"loss": 0.4410665035247803,
|
|
"step": 4299
|
|
},
|
|
{
|
|
"epoch": 2.737110120942075,
|
|
"grad_norm": 9.111984906586548,
|
|
"learning_rate": 2.3329068417413114e-07,
|
|
"loss": 0.3262574374675751,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 2.7377466581795034,
|
|
"grad_norm": 18.06494995847896,
|
|
"learning_rate": 2.32173830878627e-07,
|
|
"loss": 0.48437198996543884,
|
|
"step": 4301
|
|
},
|
|
{
|
|
"epoch": 2.7383831954169318,
|
|
"grad_norm": 9.779436167696465,
|
|
"learning_rate": 2.310595938593413e-07,
|
|
"loss": 0.19420596957206726,
|
|
"step": 4302
|
|
},
|
|
{
|
|
"epoch": 2.73901973265436,
|
|
"grad_norm": 6.850524200944048,
|
|
"learning_rate": 2.299479737276955e-07,
|
|
"loss": 0.2850743532180786,
|
|
"step": 4303
|
|
},
|
|
{
|
|
"epoch": 2.7396562698917886,
|
|
"grad_norm": 19.076556050860997,
|
|
"learning_rate": 2.2883897109367724e-07,
|
|
"loss": 0.6600810885429382,
|
|
"step": 4304
|
|
},
|
|
{
|
|
"epoch": 2.740292807129217,
|
|
"grad_norm": 13.227385928964791,
|
|
"learning_rate": 2.2773258656583307e-07,
|
|
"loss": 1.904581904411316,
|
|
"step": 4305
|
|
},
|
|
{
|
|
"epoch": 2.7409293443666454,
|
|
"grad_norm": 19.440778689480968,
|
|
"learning_rate": 2.2662882075127735e-07,
|
|
"loss": 0.40808939933776855,
|
|
"step": 4306
|
|
},
|
|
{
|
|
"epoch": 2.7415658816040738,
|
|
"grad_norm": 11.79851520394274,
|
|
"learning_rate": 2.25527674255685e-07,
|
|
"loss": 0.41511595249176025,
|
|
"step": 4307
|
|
},
|
|
{
|
|
"epoch": 2.742202418841502,
|
|
"grad_norm": 10.102280558043457,
|
|
"learning_rate": 2.2442914768329492e-07,
|
|
"loss": 0.38626226782798767,
|
|
"step": 4308
|
|
},
|
|
{
|
|
"epoch": 2.7428389560789306,
|
|
"grad_norm": 17.005846309084074,
|
|
"learning_rate": 2.2333324163690716e-07,
|
|
"loss": 1.135148286819458,
|
|
"step": 4309
|
|
},
|
|
{
|
|
"epoch": 2.743475493316359,
|
|
"grad_norm": 7.747987474427863,
|
|
"learning_rate": 2.2223995671788457e-07,
|
|
"loss": 0.502388060092926,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 2.7441120305537874,
|
|
"grad_norm": 14.17202773113162,
|
|
"learning_rate": 2.2114929352615277e-07,
|
|
"loss": 0.414657324552536,
|
|
"step": 4311
|
|
},
|
|
{
|
|
"epoch": 2.744748567791216,
|
|
"grad_norm": 10.539192399359145,
|
|
"learning_rate": 2.2006125266019528e-07,
|
|
"loss": 0.27732551097869873,
|
|
"step": 4312
|
|
},
|
|
{
|
|
"epoch": 2.745385105028644,
|
|
"grad_norm": 10.561237554141723,
|
|
"learning_rate": 2.189758347170612e-07,
|
|
"loss": 0.6421482563018799,
|
|
"step": 4313
|
|
},
|
|
{
|
|
"epoch": 2.7460216422660726,
|
|
"grad_norm": 11.135916872443241,
|
|
"learning_rate": 2.1789304029235624e-07,
|
|
"loss": 0.7741884589195251,
|
|
"step": 4314
|
|
},
|
|
{
|
|
"epoch": 2.746658179503501,
|
|
"grad_norm": 16.655163146149526,
|
|
"learning_rate": 2.1681286998024963e-07,
|
|
"loss": 0.4049973785877228,
|
|
"step": 4315
|
|
},
|
|
{
|
|
"epoch": 2.7472947167409294,
|
|
"grad_norm": 9.368393370456381,
|
|
"learning_rate": 2.1573532437346834e-07,
|
|
"loss": 0.36269906163215637,
|
|
"step": 4316
|
|
},
|
|
{
|
|
"epoch": 2.747931253978358,
|
|
"grad_norm": 14.5726052197661,
|
|
"learning_rate": 2.1466040406330102e-07,
|
|
"loss": 0.3802472949028015,
|
|
"step": 4317
|
|
},
|
|
{
|
|
"epoch": 2.748567791215786,
|
|
"grad_norm": 7.329278276781925,
|
|
"learning_rate": 2.135881096395942e-07,
|
|
"loss": 0.2172292321920395,
|
|
"step": 4318
|
|
},
|
|
{
|
|
"epoch": 2.7492043284532146,
|
|
"grad_norm": 7.354116971460457,
|
|
"learning_rate": 2.1251844169075442e-07,
|
|
"loss": 0.25726011395454407,
|
|
"step": 4319
|
|
},
|
|
{
|
|
"epoch": 2.749840865690643,
|
|
"grad_norm": 10.02657240608222,
|
|
"learning_rate": 2.1145140080374604e-07,
|
|
"loss": 0.7946832180023193,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 2.7504774029280714,
|
|
"grad_norm": 17.6762639881742,
|
|
"learning_rate": 2.1038698756409292e-07,
|
|
"loss": 0.5637105703353882,
|
|
"step": 4321
|
|
},
|
|
{
|
|
"epoch": 2.7511139401655,
|
|
"grad_norm": 10.778476221349027,
|
|
"learning_rate": 2.093252025558762e-07,
|
|
"loss": 0.5554542541503906,
|
|
"step": 4322
|
|
},
|
|
{
|
|
"epoch": 2.751750477402928,
|
|
"grad_norm": 12.393549129861261,
|
|
"learning_rate": 2.082660463617353e-07,
|
|
"loss": 0.4928439259529114,
|
|
"step": 4323
|
|
},
|
|
{
|
|
"epoch": 2.7523870146403566,
|
|
"grad_norm": 8.142573967633252,
|
|
"learning_rate": 2.0720951956286818e-07,
|
|
"loss": 0.1775183379650116,
|
|
"step": 4324
|
|
},
|
|
{
|
|
"epoch": 2.753023551877785,
|
|
"grad_norm": 10.425979217267406,
|
|
"learning_rate": 2.0615562273902602e-07,
|
|
"loss": 0.40366294980049133,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 2.753660089115213,
|
|
"grad_norm": 13.655468798230945,
|
|
"learning_rate": 2.0510435646852177e-07,
|
|
"loss": 0.5020191073417664,
|
|
"step": 4326
|
|
},
|
|
{
|
|
"epoch": 2.754296626352642,
|
|
"grad_norm": 10.402700144189954,
|
|
"learning_rate": 2.0405572132822239e-07,
|
|
"loss": 0.5099070072174072,
|
|
"step": 4327
|
|
},
|
|
{
|
|
"epoch": 2.75493316359007,
|
|
"grad_norm": 9.91157832192849,
|
|
"learning_rate": 2.0300971789355138e-07,
|
|
"loss": 0.4746452867984772,
|
|
"step": 4328
|
|
},
|
|
{
|
|
"epoch": 2.7555697008274986,
|
|
"grad_norm": 12.25888997540068,
|
|
"learning_rate": 2.0196634673848693e-07,
|
|
"loss": 0.312614381313324,
|
|
"step": 4329
|
|
},
|
|
{
|
|
"epoch": 2.7562062380649266,
|
|
"grad_norm": 15.773800201853582,
|
|
"learning_rate": 2.009256084355654e-07,
|
|
"loss": 0.5852431058883667,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 2.7568427753023554,
|
|
"grad_norm": 10.578932486634834,
|
|
"learning_rate": 1.9988750355587728e-07,
|
|
"loss": 0.374070405960083,
|
|
"step": 4331
|
|
},
|
|
{
|
|
"epoch": 2.7574793125397834,
|
|
"grad_norm": 10.559529194780191,
|
|
"learning_rate": 1.9885203266906627e-07,
|
|
"loss": 0.34271422028541565,
|
|
"step": 4332
|
|
},
|
|
{
|
|
"epoch": 2.758115849777212,
|
|
"grad_norm": 8.559469427860115,
|
|
"learning_rate": 1.9781919634333237e-07,
|
|
"loss": 0.5302675366401672,
|
|
"step": 4333
|
|
},
|
|
{
|
|
"epoch": 2.75875238701464,
|
|
"grad_norm": 9.92723889513059,
|
|
"learning_rate": 1.967889951454316e-07,
|
|
"loss": 0.4150368869304657,
|
|
"step": 4334
|
|
},
|
|
{
|
|
"epoch": 2.7593889242520686,
|
|
"grad_norm": 10.751289855859332,
|
|
"learning_rate": 1.9576142964066957e-07,
|
|
"loss": 0.17283464968204498,
|
|
"step": 4335
|
|
},
|
|
{
|
|
"epoch": 2.760025461489497,
|
|
"grad_norm": 8.631591277855032,
|
|
"learning_rate": 1.947365003929097e-07,
|
|
"loss": 0.3527897000312805,
|
|
"step": 4336
|
|
},
|
|
{
|
|
"epoch": 2.7606619987269254,
|
|
"grad_norm": 15.585220226368758,
|
|
"learning_rate": 1.9371420796456707e-07,
|
|
"loss": 0.49996358156204224,
|
|
"step": 4337
|
|
},
|
|
{
|
|
"epoch": 2.761298535964354,
|
|
"grad_norm": 9.14196026995527,
|
|
"learning_rate": 1.926945529166091e-07,
|
|
"loss": 0.8745436668395996,
|
|
"step": 4338
|
|
},
|
|
{
|
|
"epoch": 2.761935073201782,
|
|
"grad_norm": 19.59578373154723,
|
|
"learning_rate": 1.9167753580855763e-07,
|
|
"loss": 0.3348027467727661,
|
|
"step": 4339
|
|
},
|
|
{
|
|
"epoch": 2.7625716104392106,
|
|
"grad_norm": 12.28375878643904,
|
|
"learning_rate": 1.9066315719848682e-07,
|
|
"loss": 0.3094848096370697,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 2.763208147676639,
|
|
"grad_norm": 8.846129210433077,
|
|
"learning_rate": 1.8965141764302087e-07,
|
|
"loss": 0.29138296842575073,
|
|
"step": 4341
|
|
},
|
|
{
|
|
"epoch": 2.7638446849140674,
|
|
"grad_norm": 13.503871417296063,
|
|
"learning_rate": 1.886423176973373e-07,
|
|
"loss": 0.5916569232940674,
|
|
"step": 4342
|
|
},
|
|
{
|
|
"epoch": 2.764481222151496,
|
|
"grad_norm": 8.279942974037798,
|
|
"learning_rate": 1.8763585791516715e-07,
|
|
"loss": 0.338371217250824,
|
|
"step": 4343
|
|
},
|
|
{
|
|
"epoch": 2.7651177593889242,
|
|
"grad_norm": 7.552973688877967,
|
|
"learning_rate": 1.8663203884878967e-07,
|
|
"loss": 0.36693233251571655,
|
|
"step": 4344
|
|
},
|
|
{
|
|
"epoch": 2.7657542966263526,
|
|
"grad_norm": 12.881044660588703,
|
|
"learning_rate": 1.8563086104903537e-07,
|
|
"loss": 0.584098756313324,
|
|
"step": 4345
|
|
},
|
|
{
|
|
"epoch": 2.766390833863781,
|
|
"grad_norm": 8.476461186413237,
|
|
"learning_rate": 1.8463232506528705e-07,
|
|
"loss": 0.5080705285072327,
|
|
"step": 4346
|
|
},
|
|
{
|
|
"epoch": 2.7670273711012094,
|
|
"grad_norm": 8.899640042190644,
|
|
"learning_rate": 1.8363643144547805e-07,
|
|
"loss": 0.19185948371887207,
|
|
"step": 4347
|
|
},
|
|
{
|
|
"epoch": 2.767663908338638,
|
|
"grad_norm": 9.801897987444429,
|
|
"learning_rate": 1.8264318073608843e-07,
|
|
"loss": 0.6385388970375061,
|
|
"step": 4348
|
|
},
|
|
{
|
|
"epoch": 2.7683004455760662,
|
|
"grad_norm": 11.978459962825934,
|
|
"learning_rate": 1.816525734821517e-07,
|
|
"loss": 0.5047708749771118,
|
|
"step": 4349
|
|
},
|
|
{
|
|
"epoch": 2.7689369828134947,
|
|
"grad_norm": 12.717043613698428,
|
|
"learning_rate": 1.8066461022724967e-07,
|
|
"loss": 0.3372824192047119,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 2.769573520050923,
|
|
"grad_norm": 11.568553357345325,
|
|
"learning_rate": 1.7967929151351206e-07,
|
|
"loss": 0.39070644974708557,
|
|
"step": 4351
|
|
},
|
|
{
|
|
"epoch": 2.7702100572883515,
|
|
"grad_norm": 14.984865987869235,
|
|
"learning_rate": 1.7869661788161795e-07,
|
|
"loss": 0.3570117950439453,
|
|
"step": 4352
|
|
},
|
|
{
|
|
"epoch": 2.77084659452578,
|
|
"grad_norm": 11.783701810628614,
|
|
"learning_rate": 1.7771658987079722e-07,
|
|
"loss": 0.4199962019920349,
|
|
"step": 4353
|
|
},
|
|
{
|
|
"epoch": 2.7714831317632083,
|
|
"grad_norm": 7.5642975673352,
|
|
"learning_rate": 1.7673920801882517e-07,
|
|
"loss": 0.4335901737213135,
|
|
"step": 4354
|
|
},
|
|
{
|
|
"epoch": 2.7721196690006367,
|
|
"grad_norm": 15.375826355953931,
|
|
"learning_rate": 1.7576447286202615e-07,
|
|
"loss": 1.3275525569915771,
|
|
"step": 4355
|
|
},
|
|
{
|
|
"epoch": 2.772756206238065,
|
|
"grad_norm": 10.102903902947347,
|
|
"learning_rate": 1.7479238493527119e-07,
|
|
"loss": 0.7213789820671082,
|
|
"step": 4356
|
|
},
|
|
{
|
|
"epoch": 2.7733927434754935,
|
|
"grad_norm": 11.805411010215806,
|
|
"learning_rate": 1.7382294477198193e-07,
|
|
"loss": 0.4079378843307495,
|
|
"step": 4357
|
|
},
|
|
{
|
|
"epoch": 2.774029280712922,
|
|
"grad_norm": 10.400630313568238,
|
|
"learning_rate": 1.7285615290412227e-07,
|
|
"loss": 0.39478975534439087,
|
|
"step": 4358
|
|
},
|
|
{
|
|
"epoch": 2.77466581795035,
|
|
"grad_norm": 7.97993204598414,
|
|
"learning_rate": 1.7189200986220621e-07,
|
|
"loss": 0.3212743103504181,
|
|
"step": 4359
|
|
},
|
|
{
|
|
"epoch": 2.7753023551877787,
|
|
"grad_norm": 15.573333045289687,
|
|
"learning_rate": 1.709305161752939e-07,
|
|
"loss": 0.30908530950546265,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 2.7759388924252066,
|
|
"grad_norm": 14.914602988528788,
|
|
"learning_rate": 1.69971672370991e-07,
|
|
"loss": 0.7659950852394104,
|
|
"step": 4361
|
|
},
|
|
{
|
|
"epoch": 2.7765754296626355,
|
|
"grad_norm": 15.287999147369128,
|
|
"learning_rate": 1.690154789754489e-07,
|
|
"loss": 0.8660287857055664,
|
|
"step": 4362
|
|
},
|
|
{
|
|
"epoch": 2.7772119669000634,
|
|
"grad_norm": 14.065918614954981,
|
|
"learning_rate": 1.6806193651336567e-07,
|
|
"loss": 0.5355532765388489,
|
|
"step": 4363
|
|
},
|
|
{
|
|
"epoch": 2.7778485041374923,
|
|
"grad_norm": 10.71175139279742,
|
|
"learning_rate": 1.6711104550798329e-07,
|
|
"loss": 0.40091973543167114,
|
|
"step": 4364
|
|
},
|
|
{
|
|
"epoch": 2.7784850413749203,
|
|
"grad_norm": 15.497332132924607,
|
|
"learning_rate": 1.6616280648108995e-07,
|
|
"loss": 0.22542725503444672,
|
|
"step": 4365
|
|
},
|
|
{
|
|
"epoch": 2.779121578612349,
|
|
"grad_norm": 11.0783049415082,
|
|
"learning_rate": 1.652172199530183e-07,
|
|
"loss": 0.5950685739517212,
|
|
"step": 4366
|
|
},
|
|
{
|
|
"epoch": 2.779758115849777,
|
|
"grad_norm": 9.073428141803928,
|
|
"learning_rate": 1.6427428644264553e-07,
|
|
"loss": 0.5838128924369812,
|
|
"step": 4367
|
|
},
|
|
{
|
|
"epoch": 2.7803946530872055,
|
|
"grad_norm": 8.66730110289837,
|
|
"learning_rate": 1.6333400646739217e-07,
|
|
"loss": 0.4302659034729004,
|
|
"step": 4368
|
|
},
|
|
{
|
|
"epoch": 2.781031190324634,
|
|
"grad_norm": 16.11172081698985,
|
|
"learning_rate": 1.6239638054322382e-07,
|
|
"loss": 0.567771315574646,
|
|
"step": 4369
|
|
},
|
|
{
|
|
"epoch": 2.7816677275620623,
|
|
"grad_norm": 13.93911649098347,
|
|
"learning_rate": 1.6146140918465003e-07,
|
|
"loss": 0.4877224862575531,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 2.7823042647994907,
|
|
"grad_norm": 8.422358165698013,
|
|
"learning_rate": 1.605290929047215e-07,
|
|
"loss": 0.2488918900489807,
|
|
"step": 4371
|
|
},
|
|
{
|
|
"epoch": 2.782940802036919,
|
|
"grad_norm": 12.34100509614224,
|
|
"learning_rate": 1.5959943221503403e-07,
|
|
"loss": 0.38559985160827637,
|
|
"step": 4372
|
|
},
|
|
{
|
|
"epoch": 2.7835773392743475,
|
|
"grad_norm": 9.202840878020266,
|
|
"learning_rate": 1.586724276257262e-07,
|
|
"loss": 0.576676607131958,
|
|
"step": 4373
|
|
},
|
|
{
|
|
"epoch": 2.784213876511776,
|
|
"grad_norm": 31.45421870136579,
|
|
"learning_rate": 1.577480796454778e-07,
|
|
"loss": 1.7332954406738281,
|
|
"step": 4374
|
|
},
|
|
{
|
|
"epoch": 2.7848504137492043,
|
|
"grad_norm": 7.66025647326814,
|
|
"learning_rate": 1.5682638878151136e-07,
|
|
"loss": 0.5894250869750977,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 2.7854869509866327,
|
|
"grad_norm": 9.533588932793453,
|
|
"learning_rate": 1.559073555395918e-07,
|
|
"loss": 0.7092304229736328,
|
|
"step": 4376
|
|
},
|
|
{
|
|
"epoch": 2.786123488224061,
|
|
"grad_norm": 9.16700802908481,
|
|
"learning_rate": 1.5499098042402628e-07,
|
|
"loss": 0.32052963972091675,
|
|
"step": 4377
|
|
},
|
|
{
|
|
"epoch": 2.7867600254614895,
|
|
"grad_norm": 9.352889099815966,
|
|
"learning_rate": 1.5407726393766032e-07,
|
|
"loss": 0.25640130043029785,
|
|
"step": 4378
|
|
},
|
|
{
|
|
"epoch": 2.787396562698918,
|
|
"grad_norm": 8.143726963086726,
|
|
"learning_rate": 1.5316620658188452e-07,
|
|
"loss": 0.6448952555656433,
|
|
"step": 4379
|
|
},
|
|
{
|
|
"epoch": 2.7880330999363463,
|
|
"grad_norm": 5.935753301265953,
|
|
"learning_rate": 1.5225780885662843e-07,
|
|
"loss": 0.13304725289344788,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 2.7886696371737747,
|
|
"grad_norm": 12.077162673939155,
|
|
"learning_rate": 1.5135207126036166e-07,
|
|
"loss": 0.4364404082298279,
|
|
"step": 4381
|
|
},
|
|
{
|
|
"epoch": 2.789306174411203,
|
|
"grad_norm": 14.210781282286607,
|
|
"learning_rate": 1.5044899429009497e-07,
|
|
"loss": 0.2122376412153244,
|
|
"step": 4382
|
|
},
|
|
{
|
|
"epoch": 2.7899427116486315,
|
|
"grad_norm": 8.561175375474564,
|
|
"learning_rate": 1.495485784413797e-07,
|
|
"loss": 0.4473179578781128,
|
|
"step": 4383
|
|
},
|
|
{
|
|
"epoch": 2.79057924888606,
|
|
"grad_norm": 12.795212797822536,
|
|
"learning_rate": 1.48650824208304e-07,
|
|
"loss": 0.4171499013900757,
|
|
"step": 4384
|
|
},
|
|
{
|
|
"epoch": 2.7912157861234883,
|
|
"grad_norm": 10.259963311843471,
|
|
"learning_rate": 1.4775573208349992e-07,
|
|
"loss": 0.5803985595703125,
|
|
"step": 4385
|
|
},
|
|
{
|
|
"epoch": 2.7918523233609167,
|
|
"grad_norm": 12.184224660402068,
|
|
"learning_rate": 1.4686330255813507e-07,
|
|
"loss": 0.3893691301345825,
|
|
"step": 4386
|
|
},
|
|
{
|
|
"epoch": 2.792488860598345,
|
|
"grad_norm": 15.039144206017967,
|
|
"learning_rate": 1.4597353612191834e-07,
|
|
"loss": 0.35126081109046936,
|
|
"step": 4387
|
|
},
|
|
{
|
|
"epoch": 2.7931253978357735,
|
|
"grad_norm": 13.367470953116724,
|
|
"learning_rate": 1.450864332630947e-07,
|
|
"loss": 0.36777427792549133,
|
|
"step": 4388
|
|
},
|
|
{
|
|
"epoch": 2.793761935073202,
|
|
"grad_norm": 18.182113804637456,
|
|
"learning_rate": 1.4420199446845084e-07,
|
|
"loss": 0.2519111633300781,
|
|
"step": 4389
|
|
},
|
|
{
|
|
"epoch": 2.7943984723106303,
|
|
"grad_norm": 12.261095206471172,
|
|
"learning_rate": 1.4332022022330917e-07,
|
|
"loss": 0.22938407957553864,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 2.7950350095480587,
|
|
"grad_norm": 9.392276560302708,
|
|
"learning_rate": 1.4244111101153035e-07,
|
|
"loss": 0.43280625343322754,
|
|
"step": 4391
|
|
},
|
|
{
|
|
"epoch": 2.7956715467854867,
|
|
"grad_norm": 7.890062323305005,
|
|
"learning_rate": 1.4156466731551355e-07,
|
|
"loss": 0.36694902181625366,
|
|
"step": 4392
|
|
},
|
|
{
|
|
"epoch": 2.7963080840229155,
|
|
"grad_norm": 12.172257145089155,
|
|
"learning_rate": 1.4069088961619404e-07,
|
|
"loss": 0.31940770149230957,
|
|
"step": 4393
|
|
},
|
|
{
|
|
"epoch": 2.7969446212603435,
|
|
"grad_norm": 11.744985066780853,
|
|
"learning_rate": 1.398197783930455e-07,
|
|
"loss": 0.8017423152923584,
|
|
"step": 4394
|
|
},
|
|
{
|
|
"epoch": 2.7975811584977723,
|
|
"grad_norm": 6.8405376068039505,
|
|
"learning_rate": 1.3895133412407667e-07,
|
|
"loss": 0.2951199412345886,
|
|
"step": 4395
|
|
},
|
|
{
|
|
"epoch": 2.7982176957352003,
|
|
"grad_norm": 12.042007738455073,
|
|
"learning_rate": 1.3808555728583528e-07,
|
|
"loss": 0.6661902070045471,
|
|
"step": 4396
|
|
},
|
|
{
|
|
"epoch": 2.798854232972629,
|
|
"grad_norm": 12.779626795387536,
|
|
"learning_rate": 1.3722244835340292e-07,
|
|
"loss": 0.2595215141773224,
|
|
"step": 4397
|
|
},
|
|
{
|
|
"epoch": 2.799490770210057,
|
|
"grad_norm": 5.861522223792799,
|
|
"learning_rate": 1.363620078003991e-07,
|
|
"loss": 0.5347214937210083,
|
|
"step": 4398
|
|
},
|
|
{
|
|
"epoch": 2.800127307447486,
|
|
"grad_norm": 12.83984882710008,
|
|
"learning_rate": 1.3550423609897668e-07,
|
|
"loss": 0.6801286935806274,
|
|
"step": 4399
|
|
},
|
|
{
|
|
"epoch": 2.800763844684914,
|
|
"grad_norm": 10.006507456419525,
|
|
"learning_rate": 1.3464913371982745e-07,
|
|
"loss": 0.7041571140289307,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 2.8014003819223423,
|
|
"grad_norm": 15.455367002004685,
|
|
"learning_rate": 1.33796701132175e-07,
|
|
"loss": 0.529301643371582,
|
|
"step": 4401
|
|
},
|
|
{
|
|
"epoch": 2.8020369191597707,
|
|
"grad_norm": 9.709162767265719,
|
|
"learning_rate": 1.3294693880378074e-07,
|
|
"loss": 0.34043580293655396,
|
|
"step": 4402
|
|
},
|
|
{
|
|
"epoch": 2.802673456397199,
|
|
"grad_norm": 8.926447006722517,
|
|
"learning_rate": 1.3209984720093882e-07,
|
|
"loss": 0.30980032682418823,
|
|
"step": 4403
|
|
},
|
|
{
|
|
"epoch": 2.8033099936346275,
|
|
"grad_norm": 14.853184439285796,
|
|
"learning_rate": 1.3125542678847912e-07,
|
|
"loss": 0.7388812303543091,
|
|
"step": 4404
|
|
},
|
|
{
|
|
"epoch": 2.803946530872056,
|
|
"grad_norm": 7.8258107218619966,
|
|
"learning_rate": 1.3041367802976436e-07,
|
|
"loss": 0.4440646767616272,
|
|
"step": 4405
|
|
},
|
|
{
|
|
"epoch": 2.8045830681094843,
|
|
"grad_norm": 17.863623310914267,
|
|
"learning_rate": 1.295746013866933e-07,
|
|
"loss": 0.6827105283737183,
|
|
"step": 4406
|
|
},
|
|
{
|
|
"epoch": 2.8052196053469127,
|
|
"grad_norm": 12.058619449369857,
|
|
"learning_rate": 1.287381973196955e-07,
|
|
"loss": 0.32942473888397217,
|
|
"step": 4407
|
|
},
|
|
{
|
|
"epoch": 2.805856142584341,
|
|
"grad_norm": 9.213035106753438,
|
|
"learning_rate": 1.2790446628773768e-07,
|
|
"loss": 0.1439140886068344,
|
|
"step": 4408
|
|
},
|
|
{
|
|
"epoch": 2.8064926798217695,
|
|
"grad_norm": 14.420382113247083,
|
|
"learning_rate": 1.2707340874831664e-07,
|
|
"loss": 0.5104187726974487,
|
|
"step": 4409
|
|
},
|
|
{
|
|
"epoch": 2.807129217059198,
|
|
"grad_norm": 17.32293520025481,
|
|
"learning_rate": 1.2624502515746317e-07,
|
|
"loss": 0.26788467168807983,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 2.8077657542966263,
|
|
"grad_norm": 11.73405977666017,
|
|
"learning_rate": 1.25419315969742e-07,
|
|
"loss": 0.7914992570877075,
|
|
"step": 4411
|
|
},
|
|
{
|
|
"epoch": 2.8084022915340547,
|
|
"grad_norm": 9.259963725798652,
|
|
"learning_rate": 1.2459628163824732e-07,
|
|
"loss": 0.6886008381843567,
|
|
"step": 4412
|
|
},
|
|
{
|
|
"epoch": 2.809038828771483,
|
|
"grad_norm": 13.705961703248914,
|
|
"learning_rate": 1.2377592261460959e-07,
|
|
"loss": 0.6125488877296448,
|
|
"step": 4413
|
|
},
|
|
{
|
|
"epoch": 2.8096753660089115,
|
|
"grad_norm": 14.013193224784624,
|
|
"learning_rate": 1.2295823934898697e-07,
|
|
"loss": 1.1255991458892822,
|
|
"step": 4414
|
|
},
|
|
{
|
|
"epoch": 2.81031190324634,
|
|
"grad_norm": 14.636493982016376,
|
|
"learning_rate": 1.2214323229007284e-07,
|
|
"loss": 0.44004884362220764,
|
|
"step": 4415
|
|
},
|
|
{
|
|
"epoch": 2.8109484404837684,
|
|
"grad_norm": 13.468519102553497,
|
|
"learning_rate": 1.2133090188509056e-07,
|
|
"loss": 0.34507501125335693,
|
|
"step": 4416
|
|
},
|
|
{
|
|
"epoch": 2.8115849777211968,
|
|
"grad_norm": 12.549436518383526,
|
|
"learning_rate": 1.2052124857979408e-07,
|
|
"loss": 1.268148422241211,
|
|
"step": 4417
|
|
},
|
|
{
|
|
"epoch": 2.812221514958625,
|
|
"grad_norm": 9.770274877561667,
|
|
"learning_rate": 1.1971427281846913e-07,
|
|
"loss": 0.35538095235824585,
|
|
"step": 4418
|
|
},
|
|
{
|
|
"epoch": 2.8128580521960536,
|
|
"grad_norm": 15.965965459171379,
|
|
"learning_rate": 1.1890997504393265e-07,
|
|
"loss": 0.6932916045188904,
|
|
"step": 4419
|
|
},
|
|
{
|
|
"epoch": 2.813494589433482,
|
|
"grad_norm": 12.382519919401737,
|
|
"learning_rate": 1.1810835569753099e-07,
|
|
"loss": 0.6585299968719482,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 2.8141311266709104,
|
|
"grad_norm": 13.007575538585334,
|
|
"learning_rate": 1.1730941521914175e-07,
|
|
"loss": 0.4269363284111023,
|
|
"step": 4421
|
|
},
|
|
{
|
|
"epoch": 2.8147676639083388,
|
|
"grad_norm": 12.545204648672675,
|
|
"learning_rate": 1.1651315404717145e-07,
|
|
"loss": 0.41912248730659485,
|
|
"step": 4422
|
|
},
|
|
{
|
|
"epoch": 2.815404201145767,
|
|
"grad_norm": 13.506300554886902,
|
|
"learning_rate": 1.1571957261855837e-07,
|
|
"loss": 0.49605053663253784,
|
|
"step": 4423
|
|
},
|
|
{
|
|
"epoch": 2.8160407383831956,
|
|
"grad_norm": 12.247286636282382,
|
|
"learning_rate": 1.1492867136876696e-07,
|
|
"loss": 0.8589668869972229,
|
|
"step": 4424
|
|
},
|
|
{
|
|
"epoch": 2.8166772756206235,
|
|
"grad_norm": 9.49353796946391,
|
|
"learning_rate": 1.1414045073179392e-07,
|
|
"loss": 0.2600698471069336,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 2.8173138128580524,
|
|
"grad_norm": 13.340490605059793,
|
|
"learning_rate": 1.133549111401644e-07,
|
|
"loss": 0.2603069543838501,
|
|
"step": 4426
|
|
},
|
|
{
|
|
"epoch": 2.8179503500954803,
|
|
"grad_norm": 8.527047807044804,
|
|
"learning_rate": 1.1257205302493135e-07,
|
|
"loss": 0.6665278673171997,
|
|
"step": 4427
|
|
},
|
|
{
|
|
"epoch": 2.818586887332909,
|
|
"grad_norm": 5.083032832097961,
|
|
"learning_rate": 1.1179187681567671e-07,
|
|
"loss": 0.1172168105840683,
|
|
"step": 4428
|
|
},
|
|
{
|
|
"epoch": 2.819223424570337,
|
|
"grad_norm": 13.398556136651946,
|
|
"learning_rate": 1.1101438294051193e-07,
|
|
"loss": 0.22658070921897888,
|
|
"step": 4429
|
|
},
|
|
{
|
|
"epoch": 2.819859961807766,
|
|
"grad_norm": 12.701150729171005,
|
|
"learning_rate": 1.1023957182607515e-07,
|
|
"loss": 0.651612401008606,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 2.820496499045194,
|
|
"grad_norm": 7.146573089855455,
|
|
"learning_rate": 1.0946744389753239e-07,
|
|
"loss": 0.31838878989219666,
|
|
"step": 4431
|
|
},
|
|
{
|
|
"epoch": 2.821133036282623,
|
|
"grad_norm": 8.654357939781463,
|
|
"learning_rate": 1.0869799957857808e-07,
|
|
"loss": 0.2821580171585083,
|
|
"step": 4432
|
|
},
|
|
{
|
|
"epoch": 2.8217695735200508,
|
|
"grad_norm": 10.506211168917828,
|
|
"learning_rate": 1.0793123929143445e-07,
|
|
"loss": 0.6332144737243652,
|
|
"step": 4433
|
|
},
|
|
{
|
|
"epoch": 2.822406110757479,
|
|
"grad_norm": 14.490206968208446,
|
|
"learning_rate": 1.0716716345684941e-07,
|
|
"loss": 0.5318081378936768,
|
|
"step": 4434
|
|
},
|
|
{
|
|
"epoch": 2.8230426479949076,
|
|
"grad_norm": 9.92884904499283,
|
|
"learning_rate": 1.0640577249409811e-07,
|
|
"loss": 0.34390687942504883,
|
|
"step": 4435
|
|
},
|
|
{
|
|
"epoch": 2.823679185232336,
|
|
"grad_norm": 14.743175828458485,
|
|
"learning_rate": 1.0564706682098525e-07,
|
|
"loss": 0.3705610930919647,
|
|
"step": 4436
|
|
},
|
|
{
|
|
"epoch": 2.8243157224697644,
|
|
"grad_norm": 14.384452616369366,
|
|
"learning_rate": 1.0489104685383666e-07,
|
|
"loss": 0.3779531717300415,
|
|
"step": 4437
|
|
},
|
|
{
|
|
"epoch": 2.8249522597071928,
|
|
"grad_norm": 9.363966887729818,
|
|
"learning_rate": 1.0413771300750942e-07,
|
|
"loss": 0.38987523317337036,
|
|
"step": 4438
|
|
},
|
|
{
|
|
"epoch": 2.825588796944621,
|
|
"grad_norm": 7.020404932998415,
|
|
"learning_rate": 1.0338706569538392e-07,
|
|
"loss": 0.18849973380565643,
|
|
"step": 4439
|
|
},
|
|
{
|
|
"epoch": 2.8262253341820496,
|
|
"grad_norm": 12.0655847741128,
|
|
"learning_rate": 1.0263910532936738e-07,
|
|
"loss": 0.3690700829029083,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 2.826861871419478,
|
|
"grad_norm": 9.57987101996461,
|
|
"learning_rate": 1.0189383231989202e-07,
|
|
"loss": 0.4701506495475769,
|
|
"step": 4441
|
|
},
|
|
{
|
|
"epoch": 2.8274984086569064,
|
|
"grad_norm": 11.950188096317811,
|
|
"learning_rate": 1.0115124707591628e-07,
|
|
"loss": 0.291561484336853,
|
|
"step": 4442
|
|
},
|
|
{
|
|
"epoch": 2.828134945894335,
|
|
"grad_norm": 13.548603120880893,
|
|
"learning_rate": 1.0041135000492309e-07,
|
|
"loss": 0.5485737323760986,
|
|
"step": 4443
|
|
},
|
|
{
|
|
"epoch": 2.828771483131763,
|
|
"grad_norm": 7.931899406269338,
|
|
"learning_rate": 9.967414151291932e-08,
|
|
"loss": 0.3453434109687805,
|
|
"step": 4444
|
|
},
|
|
{
|
|
"epoch": 2.8294080203691916,
|
|
"grad_norm": 15.473453832173739,
|
|
"learning_rate": 9.893962200443863e-08,
|
|
"loss": 0.4912480115890503,
|
|
"step": 4445
|
|
},
|
|
{
|
|
"epoch": 2.83004455760662,
|
|
"grad_norm": 5.9488169805606,
|
|
"learning_rate": 9.820779188253748e-08,
|
|
"loss": 0.09553024172782898,
|
|
"step": 4446
|
|
},
|
|
{
|
|
"epoch": 2.8306810948440484,
|
|
"grad_norm": 13.764654400798912,
|
|
"learning_rate": 9.747865154879798e-08,
|
|
"loss": 0.5168312191963196,
|
|
"step": 4447
|
|
},
|
|
{
|
|
"epoch": 2.831317632081477,
|
|
"grad_norm": 15.331341215166258,
|
|
"learning_rate": 9.675220140332452e-08,
|
|
"loss": 0.35934263467788696,
|
|
"step": 4448
|
|
},
|
|
{
|
|
"epoch": 2.831954169318905,
|
|
"grad_norm": 7.198756293350672,
|
|
"learning_rate": 9.602844184474714e-08,
|
|
"loss": 0.15390601754188538,
|
|
"step": 4449
|
|
},
|
|
{
|
|
"epoch": 2.8325907065563336,
|
|
"grad_norm": 8.836555196785069,
|
|
"learning_rate": 9.5307373270217e-08,
|
|
"loss": 0.20732107758522034,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 2.833227243793762,
|
|
"grad_norm": 11.160982633499165,
|
|
"learning_rate": 9.45889960754115e-08,
|
|
"loss": 0.5410911440849304,
|
|
"step": 4451
|
|
},
|
|
{
|
|
"epoch": 2.8338637810311904,
|
|
"grad_norm": 12.726741562345024,
|
|
"learning_rate": 9.387331065452921e-08,
|
|
"loss": 0.9187192916870117,
|
|
"step": 4452
|
|
},
|
|
{
|
|
"epoch": 2.834500318268619,
|
|
"grad_norm": 9.604486032470266,
|
|
"learning_rate": 9.316031740029264e-08,
|
|
"loss": 0.40101706981658936,
|
|
"step": 4453
|
|
},
|
|
{
|
|
"epoch": 2.835136855506047,
|
|
"grad_norm": 5.062763001665208,
|
|
"learning_rate": 9.245001670394604e-08,
|
|
"loss": 0.2110881507396698,
|
|
"step": 4454
|
|
},
|
|
{
|
|
"epoch": 2.8357733927434756,
|
|
"grad_norm": 16.827513675147042,
|
|
"learning_rate": 9.17424089552571e-08,
|
|
"loss": 0.7232325077056885,
|
|
"step": 4455
|
|
},
|
|
{
|
|
"epoch": 2.836409929980904,
|
|
"grad_norm": 17.956751990784042,
|
|
"learning_rate": 9.103749454251576e-08,
|
|
"loss": 0.5139992237091064,
|
|
"step": 4456
|
|
},
|
|
{
|
|
"epoch": 2.8370464672183324,
|
|
"grad_norm": 15.380880382871782,
|
|
"learning_rate": 9.033527385253316e-08,
|
|
"loss": 0.4737415909767151,
|
|
"step": 4457
|
|
},
|
|
{
|
|
"epoch": 2.837683004455761,
|
|
"grad_norm": 11.139858245851308,
|
|
"learning_rate": 8.963574727064273e-08,
|
|
"loss": 0.22228547930717468,
|
|
"step": 4458
|
|
},
|
|
{
|
|
"epoch": 2.8383195416931892,
|
|
"grad_norm": 11.395808275607928,
|
|
"learning_rate": 8.893891518070075e-08,
|
|
"loss": 0.21647092700004578,
|
|
"step": 4459
|
|
},
|
|
{
|
|
"epoch": 2.838956078930617,
|
|
"grad_norm": 11.593829700153915,
|
|
"learning_rate": 8.824477796508247e-08,
|
|
"loss": 0.18228834867477417,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 2.839592616168046,
|
|
"grad_norm": 8.503341670307945,
|
|
"learning_rate": 8.755333600468596e-08,
|
|
"loss": 0.5319182276725769,
|
|
"step": 4461
|
|
},
|
|
{
|
|
"epoch": 2.840229153405474,
|
|
"grad_norm": 8.25358143487463,
|
|
"learning_rate": 8.686458967893052e-08,
|
|
"loss": 0.3258248269557953,
|
|
"step": 4462
|
|
},
|
|
{
|
|
"epoch": 2.840865690642903,
|
|
"grad_norm": 22.093221809127197,
|
|
"learning_rate": 8.617853936575604e-08,
|
|
"loss": 1.1194725036621094,
|
|
"step": 4463
|
|
},
|
|
{
|
|
"epoch": 2.841502227880331,
|
|
"grad_norm": 8.837268868104667,
|
|
"learning_rate": 8.549518544162194e-08,
|
|
"loss": 0.28002914786338806,
|
|
"step": 4464
|
|
},
|
|
{
|
|
"epoch": 2.8421387651177596,
|
|
"grad_norm": 17.381718385312954,
|
|
"learning_rate": 8.481452828150938e-08,
|
|
"loss": 0.8821957111358643,
|
|
"step": 4465
|
|
},
|
|
{
|
|
"epoch": 2.8427753023551876,
|
|
"grad_norm": 10.12248178665113,
|
|
"learning_rate": 8.413656825891958e-08,
|
|
"loss": 0.6032583713531494,
|
|
"step": 4466
|
|
},
|
|
{
|
|
"epoch": 2.843411839592616,
|
|
"grad_norm": 7.600963808353676,
|
|
"learning_rate": 8.346130574587275e-08,
|
|
"loss": 0.40028008818626404,
|
|
"step": 4467
|
|
},
|
|
{
|
|
"epoch": 2.8440483768300444,
|
|
"grad_norm": 11.117287975623862,
|
|
"learning_rate": 8.278874111290969e-08,
|
|
"loss": 0.5357126593589783,
|
|
"step": 4468
|
|
},
|
|
{
|
|
"epoch": 2.844684914067473,
|
|
"grad_norm": 10.871594098621824,
|
|
"learning_rate": 8.211887472909074e-08,
|
|
"loss": 0.32229459285736084,
|
|
"step": 4469
|
|
},
|
|
{
|
|
"epoch": 2.845321451304901,
|
|
"grad_norm": 9.347649770819665,
|
|
"learning_rate": 8.145170696199578e-08,
|
|
"loss": 0.3124019503593445,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 2.8459579885423296,
|
|
"grad_norm": 9.641620728344396,
|
|
"learning_rate": 8.078723817772249e-08,
|
|
"loss": 0.3217499256134033,
|
|
"step": 4471
|
|
},
|
|
{
|
|
"epoch": 2.846594525779758,
|
|
"grad_norm": 7.362273253859314,
|
|
"learning_rate": 8.012546874088978e-08,
|
|
"loss": 0.610997200012207,
|
|
"step": 4472
|
|
},
|
|
{
|
|
"epoch": 2.8472310630171864,
|
|
"grad_norm": 8.47987539393995,
|
|
"learning_rate": 7.946639901463272e-08,
|
|
"loss": 0.2376602590084076,
|
|
"step": 4473
|
|
},
|
|
{
|
|
"epoch": 2.847867600254615,
|
|
"grad_norm": 7.9456884869410445,
|
|
"learning_rate": 7.88100293606081e-08,
|
|
"loss": 0.3960516154766083,
|
|
"step": 4474
|
|
},
|
|
{
|
|
"epoch": 2.8485041374920432,
|
|
"grad_norm": 9.140345764203408,
|
|
"learning_rate": 7.815636013898842e-08,
|
|
"loss": 0.1851504147052765,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 2.8491406747294716,
|
|
"grad_norm": 14.128717881268285,
|
|
"learning_rate": 7.750539170846505e-08,
|
|
"loss": 0.48731547594070435,
|
|
"step": 4476
|
|
},
|
|
{
|
|
"epoch": 2.8497772119669,
|
|
"grad_norm": 17.627662686809067,
|
|
"learning_rate": 7.685712442624838e-08,
|
|
"loss": 0.45498213171958923,
|
|
"step": 4477
|
|
},
|
|
{
|
|
"epoch": 2.8504137492043284,
|
|
"grad_norm": 14.841619557061877,
|
|
"learning_rate": 7.621155864806495e-08,
|
|
"loss": 0.368963360786438,
|
|
"step": 4478
|
|
},
|
|
{
|
|
"epoch": 2.851050286441757,
|
|
"grad_norm": 9.3619052997257,
|
|
"learning_rate": 7.556869472816031e-08,
|
|
"loss": 0.26591670513153076,
|
|
"step": 4479
|
|
},
|
|
{
|
|
"epoch": 2.8516868236791852,
|
|
"grad_norm": 14.375253108276187,
|
|
"learning_rate": 7.492853301929615e-08,
|
|
"loss": 1.0872457027435303,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 2.8523233609166136,
|
|
"grad_norm": 12.391381272569898,
|
|
"learning_rate": 7.429107387275259e-08,
|
|
"loss": 0.38050276041030884,
|
|
"step": 4481
|
|
},
|
|
{
|
|
"epoch": 2.852959898154042,
|
|
"grad_norm": 9.970198127704585,
|
|
"learning_rate": 7.365631763832648e-08,
|
|
"loss": 0.236327663064003,
|
|
"step": 4482
|
|
},
|
|
{
|
|
"epoch": 2.8535964353914705,
|
|
"grad_norm": 10.51881185040378,
|
|
"learning_rate": 7.302426466433032e-08,
|
|
"loss": 0.4160456955432892,
|
|
"step": 4483
|
|
},
|
|
{
|
|
"epoch": 2.854232972628899,
|
|
"grad_norm": 18.06197119086671,
|
|
"learning_rate": 7.239491529759445e-08,
|
|
"loss": 0.6310904026031494,
|
|
"step": 4484
|
|
},
|
|
{
|
|
"epoch": 2.8548695098663273,
|
|
"grad_norm": 9.489537695208638,
|
|
"learning_rate": 7.176826988346597e-08,
|
|
"loss": 0.6764538884162903,
|
|
"step": 4485
|
|
},
|
|
{
|
|
"epoch": 2.8555060471037557,
|
|
"grad_norm": 15.862886500570081,
|
|
"learning_rate": 7.114432876580646e-08,
|
|
"loss": 0.3962682783603668,
|
|
"step": 4486
|
|
},
|
|
{
|
|
"epoch": 2.856142584341184,
|
|
"grad_norm": 7.742437758829943,
|
|
"learning_rate": 7.052309228699594e-08,
|
|
"loss": 0.5590054392814636,
|
|
"step": 4487
|
|
},
|
|
{
|
|
"epoch": 2.8567791215786125,
|
|
"grad_norm": 9.470130875072385,
|
|
"learning_rate": 6.990456078792729e-08,
|
|
"loss": 0.3280986547470093,
|
|
"step": 4488
|
|
},
|
|
{
|
|
"epoch": 2.857415658816041,
|
|
"grad_norm": 10.316280864701609,
|
|
"learning_rate": 6.928873460801288e-08,
|
|
"loss": 0.454135000705719,
|
|
"step": 4489
|
|
},
|
|
{
|
|
"epoch": 2.8580521960534693,
|
|
"grad_norm": 15.569255253727679,
|
|
"learning_rate": 6.867561408517631e-08,
|
|
"loss": 0.5000993609428406,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 2.8586887332908977,
|
|
"grad_norm": 11.534845057933433,
|
|
"learning_rate": 6.806519955586066e-08,
|
|
"loss": 0.2902962863445282,
|
|
"step": 4491
|
|
},
|
|
{
|
|
"epoch": 2.859325270528326,
|
|
"grad_norm": 8.14551359453101,
|
|
"learning_rate": 6.745749135502077e-08,
|
|
"loss": 0.2559558153152466,
|
|
"step": 4492
|
|
},
|
|
{
|
|
"epoch": 2.859961807765754,
|
|
"grad_norm": 8.109206483058342,
|
|
"learning_rate": 6.68524898161288e-08,
|
|
"loss": 0.16547997295856476,
|
|
"step": 4493
|
|
},
|
|
{
|
|
"epoch": 2.860598345003183,
|
|
"grad_norm": 12.540330946147781,
|
|
"learning_rate": 6.625019527116972e-08,
|
|
"loss": 0.2976877987384796,
|
|
"step": 4494
|
|
},
|
|
{
|
|
"epoch": 2.861234882240611,
|
|
"grad_norm": 9.735846164542485,
|
|
"learning_rate": 6.565060805064471e-08,
|
|
"loss": 0.9716310501098633,
|
|
"step": 4495
|
|
},
|
|
{
|
|
"epoch": 2.8618714194780397,
|
|
"grad_norm": 27.405946650731224,
|
|
"learning_rate": 6.505372848356838e-08,
|
|
"loss": 1.2291693687438965,
|
|
"step": 4496
|
|
},
|
|
{
|
|
"epoch": 2.8625079567154676,
|
|
"grad_norm": 10.291637457435106,
|
|
"learning_rate": 6.44595568974693e-08,
|
|
"loss": 0.6201648712158203,
|
|
"step": 4497
|
|
},
|
|
{
|
|
"epoch": 2.8631444939528965,
|
|
"grad_norm": 16.41300650112146,
|
|
"learning_rate": 6.386809361839163e-08,
|
|
"loss": 1.1137560606002808,
|
|
"step": 4498
|
|
},
|
|
{
|
|
"epoch": 2.8637810311903245,
|
|
"grad_norm": 9.763512408580295,
|
|
"learning_rate": 6.327933897089245e-08,
|
|
"loss": 0.46922871470451355,
|
|
"step": 4499
|
|
},
|
|
{
|
|
"epoch": 2.864417568427753,
|
|
"grad_norm": 10.325504020447568,
|
|
"learning_rate": 6.269329327804107e-08,
|
|
"loss": 0.49104800820350647,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 2.8650541056651813,
|
|
"grad_norm": 8.177469348115752,
|
|
"learning_rate": 6.210995686142252e-08,
|
|
"loss": 0.5692614912986755,
|
|
"step": 4501
|
|
},
|
|
{
|
|
"epoch": 2.8656906429026097,
|
|
"grad_norm": 9.886226559193878,
|
|
"learning_rate": 6.15293300411346e-08,
|
|
"loss": 0.20902472734451294,
|
|
"step": 4502
|
|
},
|
|
{
|
|
"epoch": 2.866327180140038,
|
|
"grad_norm": 13.619568394311024,
|
|
"learning_rate": 6.09514131357869e-08,
|
|
"loss": 0.40585821866989136,
|
|
"step": 4503
|
|
},
|
|
{
|
|
"epoch": 2.8669637173774665,
|
|
"grad_norm": 10.789705295549705,
|
|
"learning_rate": 6.037620646250464e-08,
|
|
"loss": 0.6018178462982178,
|
|
"step": 4504
|
|
},
|
|
{
|
|
"epoch": 2.867600254614895,
|
|
"grad_norm": 14.345999572237337,
|
|
"learning_rate": 5.980371033692257e-08,
|
|
"loss": 0.670649528503418,
|
|
"step": 4505
|
|
},
|
|
{
|
|
"epoch": 2.8682367918523233,
|
|
"grad_norm": 9.990127169021587,
|
|
"learning_rate": 5.923392507319104e-08,
|
|
"loss": 0.17083537578582764,
|
|
"step": 4506
|
|
},
|
|
{
|
|
"epoch": 2.8688733290897517,
|
|
"grad_norm": 9.976647046836414,
|
|
"learning_rate": 5.866685098396996e-08,
|
|
"loss": 0.3060707151889801,
|
|
"step": 4507
|
|
},
|
|
{
|
|
"epoch": 2.86950986632718,
|
|
"grad_norm": 11.948991226308062,
|
|
"learning_rate": 5.8102488380434866e-08,
|
|
"loss": 0.6045752167701721,
|
|
"step": 4508
|
|
},
|
|
{
|
|
"epoch": 2.8701464035646085,
|
|
"grad_norm": 9.162474701421623,
|
|
"learning_rate": 5.75408375722708e-08,
|
|
"loss": 0.8474628329277039,
|
|
"step": 4509
|
|
},
|
|
{
|
|
"epoch": 2.870782940802037,
|
|
"grad_norm": 12.94460002971908,
|
|
"learning_rate": 5.698189886767569e-08,
|
|
"loss": 0.4504960775375366,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 2.8714194780394653,
|
|
"grad_norm": 12.159960350468108,
|
|
"learning_rate": 5.642567257335918e-08,
|
|
"loss": 0.7247211337089539,
|
|
"step": 4511
|
|
},
|
|
{
|
|
"epoch": 2.8720560152768937,
|
|
"grad_norm": 12.917950196781288,
|
|
"learning_rate": 5.587215899454268e-08,
|
|
"loss": 0.6724990606307983,
|
|
"step": 4512
|
|
},
|
|
{
|
|
"epoch": 2.872692552514322,
|
|
"grad_norm": 13.821904656241426,
|
|
"learning_rate": 5.532135843495767e-08,
|
|
"loss": 0.488506019115448,
|
|
"step": 4513
|
|
},
|
|
{
|
|
"epoch": 2.8733290897517505,
|
|
"grad_norm": 9.892853582240695,
|
|
"learning_rate": 5.477327119684961e-08,
|
|
"loss": 0.6169945597648621,
|
|
"step": 4514
|
|
},
|
|
{
|
|
"epoch": 2.873965626989179,
|
|
"grad_norm": 10.54057102665935,
|
|
"learning_rate": 5.422789758097291e-08,
|
|
"loss": 0.588119626045227,
|
|
"step": 4515
|
|
},
|
|
{
|
|
"epoch": 2.8746021642266073,
|
|
"grad_norm": 10.328070081405057,
|
|
"learning_rate": 5.3685237886592635e-08,
|
|
"loss": 0.5582101345062256,
|
|
"step": 4516
|
|
},
|
|
{
|
|
"epoch": 2.8752387014640357,
|
|
"grad_norm": 7.769263307122998,
|
|
"learning_rate": 5.314529241148669e-08,
|
|
"loss": 0.32208502292633057,
|
|
"step": 4517
|
|
},
|
|
{
|
|
"epoch": 2.875875238701464,
|
|
"grad_norm": 8.19608496412307,
|
|
"learning_rate": 5.260806145194253e-08,
|
|
"loss": 0.3067604899406433,
|
|
"step": 4518
|
|
},
|
|
{
|
|
"epoch": 2.8765117759388925,
|
|
"grad_norm": 10.461016766778824,
|
|
"learning_rate": 5.20735453027571e-08,
|
|
"loss": 0.19890695810317993,
|
|
"step": 4519
|
|
},
|
|
{
|
|
"epoch": 2.877148313176321,
|
|
"grad_norm": 15.077438205436966,
|
|
"learning_rate": 5.1541744257238014e-08,
|
|
"loss": 0.7313632965087891,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 2.8777848504137493,
|
|
"grad_norm": 13.788361009118347,
|
|
"learning_rate": 5.101265860720517e-08,
|
|
"loss": 0.5750613808631897,
|
|
"step": 4521
|
|
},
|
|
{
|
|
"epoch": 2.8784213876511777,
|
|
"grad_norm": 8.771392030295573,
|
|
"learning_rate": 5.048628864298577e-08,
|
|
"loss": 0.5299889445304871,
|
|
"step": 4522
|
|
},
|
|
{
|
|
"epoch": 2.879057924888606,
|
|
"grad_norm": 13.156372264193747,
|
|
"learning_rate": 4.9962634653417664e-08,
|
|
"loss": 0.8597928881645203,
|
|
"step": 4523
|
|
},
|
|
{
|
|
"epoch": 2.8796944621260345,
|
|
"grad_norm": 6.407389889901951,
|
|
"learning_rate": 4.9441696925848214e-08,
|
|
"loss": 0.206428661942482,
|
|
"step": 4524
|
|
},
|
|
{
|
|
"epoch": 2.880330999363463,
|
|
"grad_norm": 9.55087603823779,
|
|
"learning_rate": 4.892347574613599e-08,
|
|
"loss": 0.26703721284866333,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 2.880967536600891,
|
|
"grad_norm": 12.118321419929345,
|
|
"learning_rate": 4.840797139864517e-08,
|
|
"loss": 0.34286391735076904,
|
|
"step": 4526
|
|
},
|
|
{
|
|
"epoch": 2.8816040738383197,
|
|
"grad_norm": 10.712130402413909,
|
|
"learning_rate": 4.789518416625338e-08,
|
|
"loss": 0.27562084794044495,
|
|
"step": 4527
|
|
},
|
|
{
|
|
"epoch": 2.8822406110757477,
|
|
"grad_norm": 9.872198012445521,
|
|
"learning_rate": 4.738511433034443e-08,
|
|
"loss": 0.6138008832931519,
|
|
"step": 4528
|
|
},
|
|
{
|
|
"epoch": 2.8828771483131765,
|
|
"grad_norm": 13.11905700699076,
|
|
"learning_rate": 4.6877762170811633e-08,
|
|
"loss": 0.45805659890174866,
|
|
"step": 4529
|
|
},
|
|
{
|
|
"epoch": 2.8835136855506045,
|
|
"grad_norm": 7.1092825683274405,
|
|
"learning_rate": 4.6373127966057863e-08,
|
|
"loss": 0.34670984745025635,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 2.8841502227880333,
|
|
"grad_norm": 10.35525817290448,
|
|
"learning_rate": 4.587121199299272e-08,
|
|
"loss": 0.9458390474319458,
|
|
"step": 4531
|
|
},
|
|
{
|
|
"epoch": 2.8847867600254613,
|
|
"grad_norm": 8.325769927238404,
|
|
"learning_rate": 4.537201452703699e-08,
|
|
"loss": 0.35492727160453796,
|
|
"step": 4532
|
|
},
|
|
{
|
|
"epoch": 2.88542329726289,
|
|
"grad_norm": 12.974471443984363,
|
|
"learning_rate": 4.487553584211657e-08,
|
|
"loss": 0.5512891411781311,
|
|
"step": 4533
|
|
},
|
|
{
|
|
"epoch": 2.886059834500318,
|
|
"grad_norm": 11.181319485427325,
|
|
"learning_rate": 4.438177621066797e-08,
|
|
"loss": 0.3274085223674774,
|
|
"step": 4534
|
|
},
|
|
{
|
|
"epoch": 2.8866963717377465,
|
|
"grad_norm": 13.108270074939265,
|
|
"learning_rate": 4.3890735903634465e-08,
|
|
"loss": 0.23640866577625275,
|
|
"step": 4535
|
|
},
|
|
{
|
|
"epoch": 2.887332908975175,
|
|
"grad_norm": 8.616832133111684,
|
|
"learning_rate": 4.3402415190467174e-08,
|
|
"loss": 0.19059917330741882,
|
|
"step": 4536
|
|
},
|
|
{
|
|
"epoch": 2.8879694462126033,
|
|
"grad_norm": 10.438912659103528,
|
|
"learning_rate": 4.291681433912509e-08,
|
|
"loss": 0.7314203977584839,
|
|
"step": 4537
|
|
},
|
|
{
|
|
"epoch": 2.8886059834500317,
|
|
"grad_norm": 7.2983698418945915,
|
|
"learning_rate": 4.243393361607562e-08,
|
|
"loss": 0.4773388206958771,
|
|
"step": 4538
|
|
},
|
|
{
|
|
"epoch": 2.88924252068746,
|
|
"grad_norm": 11.003429159610093,
|
|
"learning_rate": 4.1953773286291243e-08,
|
|
"loss": 0.512122631072998,
|
|
"step": 4539
|
|
},
|
|
{
|
|
"epoch": 2.8898790579248885,
|
|
"grad_norm": 7.072640879091521,
|
|
"learning_rate": 4.147633361325454e-08,
|
|
"loss": 0.7262274622917175,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 2.890515595162317,
|
|
"grad_norm": 28.306193355621552,
|
|
"learning_rate": 4.1001614858952046e-08,
|
|
"loss": 0.5705623030662537,
|
|
"step": 4541
|
|
},
|
|
{
|
|
"epoch": 2.8911521323997453,
|
|
"grad_norm": 12.629888537952215,
|
|
"learning_rate": 4.0529617283880386e-08,
|
|
"loss": 0.26210007071495056,
|
|
"step": 4542
|
|
},
|
|
{
|
|
"epoch": 2.8917886696371737,
|
|
"grad_norm": 11.907186782578643,
|
|
"learning_rate": 4.006034114704016e-08,
|
|
"loss": 0.387427419424057,
|
|
"step": 4543
|
|
},
|
|
{
|
|
"epoch": 2.892425206874602,
|
|
"grad_norm": 13.79877895212065,
|
|
"learning_rate": 3.959378670594094e-08,
|
|
"loss": 0.3232886493206024,
|
|
"step": 4544
|
|
},
|
|
{
|
|
"epoch": 2.8930617441120305,
|
|
"grad_norm": 9.935636510229395,
|
|
"learning_rate": 3.9129954216597375e-08,
|
|
"loss": 0.34298163652420044,
|
|
"step": 4545
|
|
},
|
|
{
|
|
"epoch": 2.893698281349459,
|
|
"grad_norm": 13.409272917447078,
|
|
"learning_rate": 3.866884393353087e-08,
|
|
"loss": 1.2276716232299805,
|
|
"step": 4546
|
|
},
|
|
{
|
|
"epoch": 2.8943348185868873,
|
|
"grad_norm": 9.455407692868997,
|
|
"learning_rate": 3.8210456109769034e-08,
|
|
"loss": 1.2836456298828125,
|
|
"step": 4547
|
|
},
|
|
{
|
|
"epoch": 2.8949713558243158,
|
|
"grad_norm": 12.842722851236312,
|
|
"learning_rate": 3.775479099684565e-08,
|
|
"loss": 0.4995691776275635,
|
|
"step": 4548
|
|
},
|
|
{
|
|
"epoch": 2.895607893061744,
|
|
"grad_norm": 13.569094031140875,
|
|
"learning_rate": 3.7301848844800145e-08,
|
|
"loss": 0.3900608718395233,
|
|
"step": 4549
|
|
},
|
|
{
|
|
"epoch": 2.8962444302991726,
|
|
"grad_norm": 12.442618503205223,
|
|
"learning_rate": 3.685162990217816e-08,
|
|
"loss": 0.6684539914131165,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 2.896880967536601,
|
|
"grad_norm": 7.420570756299823,
|
|
"learning_rate": 3.6404134416031524e-08,
|
|
"loss": 0.23755618929862976,
|
|
"step": 4551
|
|
},
|
|
{
|
|
"epoch": 2.8975175047740294,
|
|
"grad_norm": 22.238530919928944,
|
|
"learning_rate": 3.595936263191657e-08,
|
|
"loss": 0.41127052903175354,
|
|
"step": 4552
|
|
},
|
|
{
|
|
"epoch": 2.8981540420114578,
|
|
"grad_norm": 8.114414533211912,
|
|
"learning_rate": 3.551731479389531e-08,
|
|
"loss": 0.2637931704521179,
|
|
"step": 4553
|
|
},
|
|
{
|
|
"epoch": 2.898790579248886,
|
|
"grad_norm": 13.86090703061008,
|
|
"learning_rate": 3.507799114453481e-08,
|
|
"loss": 0.6877958178520203,
|
|
"step": 4554
|
|
},
|
|
{
|
|
"epoch": 2.8994271164863146,
|
|
"grad_norm": 10.643344179517298,
|
|
"learning_rate": 3.4641391924908344e-08,
|
|
"loss": 0.4997013509273529,
|
|
"step": 4555
|
|
},
|
|
{
|
|
"epoch": 2.900063653723743,
|
|
"grad_norm": 8.078577324109752,
|
|
"learning_rate": 3.420751737459316e-08,
|
|
"loss": 0.408937931060791,
|
|
"step": 4556
|
|
},
|
|
{
|
|
"epoch": 2.9007001909611714,
|
|
"grad_norm": 11.094398976129265,
|
|
"learning_rate": 3.377636773167159e-08,
|
|
"loss": 0.5586139559745789,
|
|
"step": 4557
|
|
},
|
|
{
|
|
"epoch": 2.9013367281986,
|
|
"grad_norm": 9.230475613217141,
|
|
"learning_rate": 3.3347943232730495e-08,
|
|
"loss": 0.2931652069091797,
|
|
"step": 4558
|
|
},
|
|
{
|
|
"epoch": 2.9019732654360277,
|
|
"grad_norm": 20.033575353557268,
|
|
"learning_rate": 3.2922244112862376e-08,
|
|
"loss": 1.0695240497589111,
|
|
"step": 4559
|
|
},
|
|
{
|
|
"epoch": 2.9026098026734566,
|
|
"grad_norm": 10.139050366427407,
|
|
"learning_rate": 3.249927060566316e-08,
|
|
"loss": 0.9362115859985352,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 2.9032463399108845,
|
|
"grad_norm": 9.012167247191057,
|
|
"learning_rate": 3.207902294323384e-08,
|
|
"loss": 0.37736958265304565,
|
|
"step": 4561
|
|
},
|
|
{
|
|
"epoch": 2.9038828771483134,
|
|
"grad_norm": 11.550500708163534,
|
|
"learning_rate": 3.166150135617885e-08,
|
|
"loss": 0.39598530530929565,
|
|
"step": 4562
|
|
},
|
|
{
|
|
"epoch": 2.9045194143857413,
|
|
"grad_norm": 12.595159471499244,
|
|
"learning_rate": 3.124670607360714e-08,
|
|
"loss": 0.6241990327835083,
|
|
"step": 4563
|
|
},
|
|
{
|
|
"epoch": 2.90515595162317,
|
|
"grad_norm": 13.267944266639695,
|
|
"learning_rate": 3.083463732313163e-08,
|
|
"loss": 0.1851673275232315,
|
|
"step": 4564
|
|
},
|
|
{
|
|
"epoch": 2.905792488860598,
|
|
"grad_norm": 10.521859273531112,
|
|
"learning_rate": 3.04252953308698e-08,
|
|
"loss": 0.580502986907959,
|
|
"step": 4565
|
|
},
|
|
{
|
|
"epoch": 2.906429026098027,
|
|
"grad_norm": 16.216781262611818,
|
|
"learning_rate": 3.0018680321441396e-08,
|
|
"loss": 0.3015652596950531,
|
|
"step": 4566
|
|
},
|
|
{
|
|
"epoch": 2.907065563335455,
|
|
"grad_norm": 15.73240355724326,
|
|
"learning_rate": 2.9614792517970726e-08,
|
|
"loss": 0.6678402423858643,
|
|
"step": 4567
|
|
},
|
|
{
|
|
"epoch": 2.9077021005728834,
|
|
"grad_norm": 9.54454044413332,
|
|
"learning_rate": 2.9213632142086612e-08,
|
|
"loss": 0.4288226366043091,
|
|
"step": 4568
|
|
},
|
|
{
|
|
"epoch": 2.9083386378103118,
|
|
"grad_norm": 8.183428493587117,
|
|
"learning_rate": 2.881519941391797e-08,
|
|
"loss": 0.27657654881477356,
|
|
"step": 4569
|
|
},
|
|
{
|
|
"epoch": 2.90897517504774,
|
|
"grad_norm": 12.805530169715631,
|
|
"learning_rate": 2.8419494552101025e-08,
|
|
"loss": 1.0283787250518799,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 2.9096117122851686,
|
|
"grad_norm": 9.02253089877856,
|
|
"learning_rate": 2.802651777377152e-08,
|
|
"loss": 0.5577377676963806,
|
|
"step": 4571
|
|
},
|
|
{
|
|
"epoch": 2.910248249522597,
|
|
"grad_norm": 13.027508164723109,
|
|
"learning_rate": 2.763626929457086e-08,
|
|
"loss": 0.5071287155151367,
|
|
"step": 4572
|
|
},
|
|
{
|
|
"epoch": 2.9108847867600254,
|
|
"grad_norm": 11.013042426784692,
|
|
"learning_rate": 2.7248749328642187e-08,
|
|
"loss": 0.2327755093574524,
|
|
"step": 4573
|
|
},
|
|
{
|
|
"epoch": 2.911521323997454,
|
|
"grad_norm": 8.90380140679578,
|
|
"learning_rate": 2.6863958088630958e-08,
|
|
"loss": 0.57288658618927,
|
|
"step": 4574
|
|
},
|
|
{
|
|
"epoch": 2.912157861234882,
|
|
"grad_norm": 7.094931858902456,
|
|
"learning_rate": 2.6481895785686607e-08,
|
|
"loss": 0.1919298768043518,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 2.9127943984723106,
|
|
"grad_norm": 12.198952033207512,
|
|
"learning_rate": 2.610256262945976e-08,
|
|
"loss": 0.35462987422943115,
|
|
"step": 4576
|
|
},
|
|
{
|
|
"epoch": 2.913430935709739,
|
|
"grad_norm": 12.269093207476155,
|
|
"learning_rate": 2.572595882810447e-08,
|
|
"loss": 0.36865872144699097,
|
|
"step": 4577
|
|
},
|
|
{
|
|
"epoch": 2.9140674729471674,
|
|
"grad_norm": 10.972251642161854,
|
|
"learning_rate": 2.5352084588275984e-08,
|
|
"loss": 0.7054241895675659,
|
|
"step": 4578
|
|
},
|
|
{
|
|
"epoch": 2.914704010184596,
|
|
"grad_norm": 12.250179674659089,
|
|
"learning_rate": 2.4980940115133524e-08,
|
|
"loss": 0.7186670303344727,
|
|
"step": 4579
|
|
},
|
|
{
|
|
"epoch": 2.915340547422024,
|
|
"grad_norm": 9.284270543081451,
|
|
"learning_rate": 2.46125256123364e-08,
|
|
"loss": 0.6131752133369446,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 2.9159770846594526,
|
|
"grad_norm": 9.888657702691015,
|
|
"learning_rate": 2.4246841282046795e-08,
|
|
"loss": 0.3839394450187683,
|
|
"step": 4581
|
|
},
|
|
{
|
|
"epoch": 2.916613621896881,
|
|
"grad_norm": 14.768578185654794,
|
|
"learning_rate": 2.3883887324929188e-08,
|
|
"loss": 0.7487176060676575,
|
|
"step": 4582
|
|
},
|
|
{
|
|
"epoch": 2.9172501591343094,
|
|
"grad_norm": 9.897920758430246,
|
|
"learning_rate": 2.3523663940149266e-08,
|
|
"loss": 0.35754120349884033,
|
|
"step": 4583
|
|
},
|
|
{
|
|
"epoch": 2.917886696371738,
|
|
"grad_norm": 11.68323652294888,
|
|
"learning_rate": 2.3166171325374464e-08,
|
|
"loss": 0.6378008723258972,
|
|
"step": 4584
|
|
},
|
|
{
|
|
"epoch": 2.918523233609166,
|
|
"grad_norm": 9.558492839347917,
|
|
"learning_rate": 2.2811409676773422e-08,
|
|
"loss": 0.4749261438846588,
|
|
"step": 4585
|
|
},
|
|
{
|
|
"epoch": 2.9191597708465946,
|
|
"grad_norm": 7.966784517798446,
|
|
"learning_rate": 2.2459379189016528e-08,
|
|
"loss": 0.3044714033603668,
|
|
"step": 4586
|
|
},
|
|
{
|
|
"epoch": 2.919796308084023,
|
|
"grad_norm": 16.523261646513284,
|
|
"learning_rate": 2.2110080055275375e-08,
|
|
"loss": 0.32255470752716064,
|
|
"step": 4587
|
|
},
|
|
{
|
|
"epoch": 2.9204328453214514,
|
|
"grad_norm": 10.563321431499633,
|
|
"learning_rate": 2.176351246722386e-08,
|
|
"loss": 0.6167469024658203,
|
|
"step": 4588
|
|
},
|
|
{
|
|
"epoch": 2.92106938255888,
|
|
"grad_norm": 11.348854001473573,
|
|
"learning_rate": 2.1419676615034857e-08,
|
|
"loss": 0.3167179822921753,
|
|
"step": 4589
|
|
},
|
|
{
|
|
"epoch": 2.9217059197963082,
|
|
"grad_norm": 12.836393825008408,
|
|
"learning_rate": 2.1078572687383558e-08,
|
|
"loss": 1.1272660493850708,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 2.9223424570337366,
|
|
"grad_norm": 8.765601042318105,
|
|
"learning_rate": 2.074020087144635e-08,
|
|
"loss": 0.2598417401313782,
|
|
"step": 4591
|
|
},
|
|
{
|
|
"epoch": 2.9229789942711646,
|
|
"grad_norm": 9.30002751072817,
|
|
"learning_rate": 2.0404561352899144e-08,
|
|
"loss": 0.5057159066200256,
|
|
"step": 4592
|
|
},
|
|
{
|
|
"epoch": 2.9236155315085934,
|
|
"grad_norm": 6.063826370602459,
|
|
"learning_rate": 2.0071654315920176e-08,
|
|
"loss": 0.5362277626991272,
|
|
"step": 4593
|
|
},
|
|
{
|
|
"epoch": 2.9242520687460214,
|
|
"grad_norm": 9.056439821206423,
|
|
"learning_rate": 1.9741479943186647e-08,
|
|
"loss": 0.6873083710670471,
|
|
"step": 4594
|
|
},
|
|
{
|
|
"epoch": 2.9248886059834502,
|
|
"grad_norm": 14.735859378043989,
|
|
"learning_rate": 1.9414038415877523e-08,
|
|
"loss": 0.3906959295272827,
|
|
"step": 4595
|
|
},
|
|
{
|
|
"epoch": 2.925525143220878,
|
|
"grad_norm": 10.403426445842749,
|
|
"learning_rate": 1.908932991367185e-08,
|
|
"loss": 0.5455116033554077,
|
|
"step": 4596
|
|
},
|
|
{
|
|
"epoch": 2.926161680458307,
|
|
"grad_norm": 13.158173803457993,
|
|
"learning_rate": 1.8767354614747657e-08,
|
|
"loss": 0.3528488278388977,
|
|
"step": 4597
|
|
},
|
|
{
|
|
"epoch": 2.926798217695735,
|
|
"grad_norm": 11.653583575239574,
|
|
"learning_rate": 1.8448112695785282e-08,
|
|
"loss": 0.5714707970619202,
|
|
"step": 4598
|
|
},
|
|
{
|
|
"epoch": 2.927434754933164,
|
|
"grad_norm": 12.06362024924125,
|
|
"learning_rate": 1.813160433196348e-08,
|
|
"loss": 0.6397544145584106,
|
|
"step": 4599
|
|
},
|
|
{
|
|
"epoch": 2.928071292170592,
|
|
"grad_norm": 7.940811517503116,
|
|
"learning_rate": 1.7817829696962773e-08,
|
|
"loss": 0.29710057377815247,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 2.92870782940802,
|
|
"grad_norm": 9.856410599398739,
|
|
"learning_rate": 1.750678896296154e-08,
|
|
"loss": 0.4119393825531006,
|
|
"step": 4601
|
|
},
|
|
{
|
|
"epoch": 2.9293443666454486,
|
|
"grad_norm": 14.80908541737794,
|
|
"learning_rate": 1.7198482300638808e-08,
|
|
"loss": 0.5728417634963989,
|
|
"step": 4602
|
|
},
|
|
{
|
|
"epoch": 2.929980903882877,
|
|
"grad_norm": 10.862940523560596,
|
|
"learning_rate": 1.689290987917369e-08,
|
|
"loss": 0.49775296449661255,
|
|
"step": 4603
|
|
},
|
|
{
|
|
"epoch": 2.9306174411203054,
|
|
"grad_norm": 10.420456319482795,
|
|
"learning_rate": 1.6590071866245393e-08,
|
|
"loss": 0.34582358598709106,
|
|
"step": 4604
|
|
},
|
|
{
|
|
"epoch": 2.931253978357734,
|
|
"grad_norm": 10.049706056612026,
|
|
"learning_rate": 1.6289968428030433e-08,
|
|
"loss": 0.4532100558280945,
|
|
"step": 4605
|
|
},
|
|
{
|
|
"epoch": 2.9318905155951622,
|
|
"grad_norm": 6.757820628953149,
|
|
"learning_rate": 1.5992599729207638e-08,
|
|
"loss": 0.3634646534919739,
|
|
"step": 4606
|
|
},
|
|
{
|
|
"epoch": 2.9325270528325906,
|
|
"grad_norm": 8.555784308495546,
|
|
"learning_rate": 1.5697965932953694e-08,
|
|
"loss": 0.26822400093078613,
|
|
"step": 4607
|
|
},
|
|
{
|
|
"epoch": 2.933163590070019,
|
|
"grad_norm": 11.672770541356844,
|
|
"learning_rate": 1.540606720094373e-08,
|
|
"loss": 0.651909351348877,
|
|
"step": 4608
|
|
},
|
|
{
|
|
"epoch": 2.9338001273074474,
|
|
"grad_norm": 9.979001378964576,
|
|
"learning_rate": 1.51169036933535e-08,
|
|
"loss": 0.4629194140434265,
|
|
"step": 4609
|
|
},
|
|
{
|
|
"epoch": 2.934436664544876,
|
|
"grad_norm": 11.251043223072452,
|
|
"learning_rate": 1.4830475568857749e-08,
|
|
"loss": 2.339359998703003,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 2.9350732017823042,
|
|
"grad_norm": 11.403279780930562,
|
|
"learning_rate": 1.4546782984629082e-08,
|
|
"loss": 0.3291216492652893,
|
|
"step": 4611
|
|
},
|
|
{
|
|
"epoch": 2.9357097390197326,
|
|
"grad_norm": 12.874134226120445,
|
|
"learning_rate": 1.4265826096340197e-08,
|
|
"loss": 0.4676075577735901,
|
|
"step": 4612
|
|
},
|
|
{
|
|
"epoch": 2.936346276257161,
|
|
"grad_norm": 20.105035086696223,
|
|
"learning_rate": 1.3987605058162213e-08,
|
|
"loss": 0.4496423006057739,
|
|
"step": 4613
|
|
},
|
|
{
|
|
"epoch": 2.9369828134945895,
|
|
"grad_norm": 7.423854726260795,
|
|
"learning_rate": 1.3712120022764119e-08,
|
|
"loss": 0.5345197916030884,
|
|
"step": 4614
|
|
},
|
|
{
|
|
"epoch": 2.937619350732018,
|
|
"grad_norm": 23.551985114198768,
|
|
"learning_rate": 1.3439371141315549e-08,
|
|
"loss": 0.47419023513793945,
|
|
"step": 4615
|
|
},
|
|
{
|
|
"epoch": 2.9382558879694463,
|
|
"grad_norm": 8.284858958714235,
|
|
"learning_rate": 1.3169358563482893e-08,
|
|
"loss": 0.5698477029800415,
|
|
"step": 4616
|
|
},
|
|
{
|
|
"epoch": 2.9388924252068747,
|
|
"grad_norm": 12.162989842432431,
|
|
"learning_rate": 1.2902082437432074e-08,
|
|
"loss": 0.7193920612335205,
|
|
"step": 4617
|
|
},
|
|
{
|
|
"epoch": 2.939528962444303,
|
|
"grad_norm": 10.31378049105075,
|
|
"learning_rate": 1.2637542909826329e-08,
|
|
"loss": 0.3350120782852173,
|
|
"step": 4618
|
|
},
|
|
{
|
|
"epoch": 2.9401654996817315,
|
|
"grad_norm": 11.33761404680134,
|
|
"learning_rate": 1.2375740125828428e-08,
|
|
"loss": 0.4467247426509857,
|
|
"step": 4619
|
|
},
|
|
{
|
|
"epoch": 2.94080203691916,
|
|
"grad_norm": 13.919944337539462,
|
|
"learning_rate": 1.2116674229099012e-08,
|
|
"loss": 0.4478244185447693,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 2.9414385741565883,
|
|
"grad_norm": 9.019832744100302,
|
|
"learning_rate": 1.1860345361796032e-08,
|
|
"loss": 0.3990923762321472,
|
|
"step": 4621
|
|
},
|
|
{
|
|
"epoch": 2.9420751113940167,
|
|
"grad_norm": 15.089779838926905,
|
|
"learning_rate": 1.160675366457753e-08,
|
|
"loss": 0.20606833696365356,
|
|
"step": 4622
|
|
},
|
|
{
|
|
"epoch": 2.942711648631445,
|
|
"grad_norm": 9.342650066134949,
|
|
"learning_rate": 1.1355899276597193e-08,
|
|
"loss": 0.25931254029273987,
|
|
"step": 4623
|
|
},
|
|
{
|
|
"epoch": 2.9433481858688735,
|
|
"grad_norm": 9.576070508434526,
|
|
"learning_rate": 1.1107782335508244e-08,
|
|
"loss": 0.3865275979042053,
|
|
"step": 4624
|
|
},
|
|
{
|
|
"epoch": 2.943984723106302,
|
|
"grad_norm": 10.708802698444359,
|
|
"learning_rate": 1.0862402977461217e-08,
|
|
"loss": 0.44095659255981445,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 2.9446212603437303,
|
|
"grad_norm": 7.8796593248007065,
|
|
"learning_rate": 1.0619761337104517e-08,
|
|
"loss": 0.5816665291786194,
|
|
"step": 4626
|
|
},
|
|
{
|
|
"epoch": 2.9452577975811582,
|
|
"grad_norm": 19.374681129891762,
|
|
"learning_rate": 1.0379857547584415e-08,
|
|
"loss": 0.408510684967041,
|
|
"step": 4627
|
|
},
|
|
{
|
|
"epoch": 2.945894334818587,
|
|
"grad_norm": 9.477790975890397,
|
|
"learning_rate": 1.0142691740544497e-08,
|
|
"loss": 0.2748204469680786,
|
|
"step": 4628
|
|
},
|
|
{
|
|
"epoch": 2.946530872056015,
|
|
"grad_norm": 13.022074413201983,
|
|
"learning_rate": 9.908264046126215e-09,
|
|
"loss": 0.7232655882835388,
|
|
"step": 4629
|
|
},
|
|
{
|
|
"epoch": 2.947167409293444,
|
|
"grad_norm": 10.553840607174726,
|
|
"learning_rate": 9.67657459296778e-09,
|
|
"loss": 0.255391925573349,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 2.947803946530872,
|
|
"grad_norm": 11.646437471889387,
|
|
"learning_rate": 9.447623508206383e-09,
|
|
"loss": 0.6574585437774658,
|
|
"step": 4631
|
|
},
|
|
{
|
|
"epoch": 2.9484404837683007,
|
|
"grad_norm": 11.192933249385815,
|
|
"learning_rate": 9.221410917475415e-09,
|
|
"loss": 0.6308625340461731,
|
|
"step": 4632
|
|
},
|
|
{
|
|
"epoch": 2.9490770210057287,
|
|
"grad_norm": 16.47639477707107,
|
|
"learning_rate": 8.997936944905583e-09,
|
|
"loss": 0.7614609003067017,
|
|
"step": 4633
|
|
},
|
|
{
|
|
"epoch": 2.949713558243157,
|
|
"grad_norm": 18.821031093522826,
|
|
"learning_rate": 8.777201713124906e-09,
|
|
"loss": 0.3736048936843872,
|
|
"step": 4634
|
|
},
|
|
{
|
|
"epoch": 2.9503500954805855,
|
|
"grad_norm": 14.956951920215104,
|
|
"learning_rate": 8.55920534325927e-09,
|
|
"loss": 0.380316823720932,
|
|
"step": 4635
|
|
},
|
|
{
|
|
"epoch": 2.950986632718014,
|
|
"grad_norm": 11.998666501834945,
|
|
"learning_rate": 8.343947954930209e-09,
|
|
"loss": 0.7638375163078308,
|
|
"step": 4636
|
|
},
|
|
{
|
|
"epoch": 2.9516231699554423,
|
|
"grad_norm": 9.74268219477764,
|
|
"learning_rate": 8.131429666257685e-09,
|
|
"loss": 0.2994990050792694,
|
|
"step": 4637
|
|
},
|
|
{
|
|
"epoch": 2.9522597071928707,
|
|
"grad_norm": 9.755158696628564,
|
|
"learning_rate": 7.921650593857854e-09,
|
|
"loss": 0.6474143266677856,
|
|
"step": 4638
|
|
},
|
|
{
|
|
"epoch": 2.952896244430299,
|
|
"grad_norm": 8.125385774870677,
|
|
"learning_rate": 7.714610852844195e-09,
|
|
"loss": 0.2557169795036316,
|
|
"step": 4639
|
|
},
|
|
{
|
|
"epoch": 2.9535327816677275,
|
|
"grad_norm": 11.572466470400945,
|
|
"learning_rate": 7.510310556826384e-09,
|
|
"loss": 0.5875788927078247,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 2.954169318905156,
|
|
"grad_norm": 13.952905422739963,
|
|
"learning_rate": 7.308749817911409e-09,
|
|
"loss": 0.549329936504364,
|
|
"step": 4641
|
|
},
|
|
{
|
|
"epoch": 2.9548058561425843,
|
|
"grad_norm": 10.413226557769837,
|
|
"learning_rate": 7.109928746703576e-09,
|
|
"loss": 0.27711164951324463,
|
|
"step": 4642
|
|
},
|
|
{
|
|
"epoch": 2.9554423933800127,
|
|
"grad_norm": 8.048674249875848,
|
|
"learning_rate": 6.91384745230228e-09,
|
|
"loss": 0.548971951007843,
|
|
"step": 4643
|
|
},
|
|
{
|
|
"epoch": 2.956078930617441,
|
|
"grad_norm": 10.925300197821286,
|
|
"learning_rate": 6.720506042304231e-09,
|
|
"loss": 0.7924700379371643,
|
|
"step": 4644
|
|
},
|
|
{
|
|
"epoch": 2.9567154678548695,
|
|
"grad_norm": 9.54046390077054,
|
|
"learning_rate": 6.529904622803451e-09,
|
|
"loss": 0.5511906743049622,
|
|
"step": 4645
|
|
},
|
|
{
|
|
"epoch": 2.957352005092298,
|
|
"grad_norm": 6.960396340630606,
|
|
"learning_rate": 6.342043298389055e-09,
|
|
"loss": 0.30417490005493164,
|
|
"step": 4646
|
|
},
|
|
{
|
|
"epoch": 2.9579885423297263,
|
|
"grad_norm": 11.312873173337188,
|
|
"learning_rate": 6.156922172147472e-09,
|
|
"loss": 0.8745008707046509,
|
|
"step": 4647
|
|
},
|
|
{
|
|
"epoch": 2.9586250795671547,
|
|
"grad_norm": 9.28198695690304,
|
|
"learning_rate": 5.974541345661333e-09,
|
|
"loss": 0.7359851598739624,
|
|
"step": 4648
|
|
},
|
|
{
|
|
"epoch": 2.959261616804583,
|
|
"grad_norm": 17.70072258269222,
|
|
"learning_rate": 5.794900919009472e-09,
|
|
"loss": 0.5813353657722473,
|
|
"step": 4649
|
|
},
|
|
{
|
|
"epoch": 2.9598981540420115,
|
|
"grad_norm": 13.46079452508331,
|
|
"learning_rate": 5.61800099076748e-09,
|
|
"loss": 0.33794909715652466,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 2.96053469127944,
|
|
"grad_norm": 8.172576505106319,
|
|
"learning_rate": 5.4438416580054885e-09,
|
|
"loss": 0.39185667037963867,
|
|
"step": 4651
|
|
},
|
|
{
|
|
"epoch": 2.9611712285168683,
|
|
"grad_norm": 10.144199515250063,
|
|
"learning_rate": 5.27242301629205e-09,
|
|
"loss": 0.4409201443195343,
|
|
"step": 4652
|
|
},
|
|
{
|
|
"epoch": 2.9618077657542967,
|
|
"grad_norm": 9.141650159655349,
|
|
"learning_rate": 5.103745159690254e-09,
|
|
"loss": 0.4678126871585846,
|
|
"step": 4653
|
|
},
|
|
{
|
|
"epoch": 2.962444302991725,
|
|
"grad_norm": 9.289043350192085,
|
|
"learning_rate": 4.937808180759951e-09,
|
|
"loss": 0.3816818594932556,
|
|
"step": 4654
|
|
},
|
|
{
|
|
"epoch": 2.9630808402291535,
|
|
"grad_norm": 8.214529228244643,
|
|
"learning_rate": 4.774612170556081e-09,
|
|
"loss": 0.4803459346294403,
|
|
"step": 4655
|
|
},
|
|
{
|
|
"epoch": 2.963717377466582,
|
|
"grad_norm": 7.198439299523344,
|
|
"learning_rate": 4.614157218630344e-09,
|
|
"loss": 0.40270543098449707,
|
|
"step": 4656
|
|
},
|
|
{
|
|
"epoch": 2.9643539147040103,
|
|
"grad_norm": 11.045642870577113,
|
|
"learning_rate": 4.456443413030087e-09,
|
|
"loss": 0.8374072313308716,
|
|
"step": 4657
|
|
},
|
|
{
|
|
"epoch": 2.9649904519414387,
|
|
"grad_norm": 6.684385318568651,
|
|
"learning_rate": 4.30147084029886e-09,
|
|
"loss": 0.3140571415424347,
|
|
"step": 4658
|
|
},
|
|
{
|
|
"epoch": 2.965626989178867,
|
|
"grad_norm": 12.318220516683906,
|
|
"learning_rate": 4.149239585475306e-09,
|
|
"loss": 0.45646950602531433,
|
|
"step": 4659
|
|
},
|
|
{
|
|
"epoch": 2.966263526416295,
|
|
"grad_norm": 9.625743666116774,
|
|
"learning_rate": 3.999749732093716e-09,
|
|
"loss": 0.5095931887626648,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 2.966900063653724,
|
|
"grad_norm": 9.296466825780978,
|
|
"learning_rate": 3.853001362185693e-09,
|
|
"loss": 0.46697038412094116,
|
|
"step": 4661
|
|
},
|
|
{
|
|
"epoch": 2.967536600891152,
|
|
"grad_norm": 14.602637922669667,
|
|
"learning_rate": 3.7089945562762687e-09,
|
|
"loss": 0.5334930419921875,
|
|
"step": 4662
|
|
},
|
|
{
|
|
"epoch": 2.9681731381285807,
|
|
"grad_norm": 8.338407899437348,
|
|
"learning_rate": 3.5677293933877865e-09,
|
|
"loss": 0.4201638698577881,
|
|
"step": 4663
|
|
},
|
|
{
|
|
"epoch": 2.9688096753660087,
|
|
"grad_norm": 15.004695387156493,
|
|
"learning_rate": 3.429205951036574e-09,
|
|
"loss": 0.47473764419555664,
|
|
"step": 4664
|
|
},
|
|
{
|
|
"epoch": 2.9694462126034376,
|
|
"grad_norm": 11.349277045893329,
|
|
"learning_rate": 3.29342430523627e-09,
|
|
"loss": 0.577049732208252,
|
|
"step": 4665
|
|
},
|
|
{
|
|
"epoch": 2.9700827498408655,
|
|
"grad_norm": 13.624833672125106,
|
|
"learning_rate": 3.160384530494498e-09,
|
|
"loss": 0.3924509286880493,
|
|
"step": 4666
|
|
},
|
|
{
|
|
"epoch": 2.9707192870782944,
|
|
"grad_norm": 14.59513753642718,
|
|
"learning_rate": 3.0300866998156376e-09,
|
|
"loss": 0.19112181663513184,
|
|
"step": 4667
|
|
},
|
|
{
|
|
"epoch": 2.9713558243157223,
|
|
"grad_norm": 11.858530858795417,
|
|
"learning_rate": 2.9025308846980514e-09,
|
|
"loss": 0.962306022644043,
|
|
"step": 4668
|
|
},
|
|
{
|
|
"epoch": 2.9719923615531507,
|
|
"grad_norm": 12.201211180370539,
|
|
"learning_rate": 2.7777171551363058e-09,
|
|
"loss": 0.4326895475387573,
|
|
"step": 4669
|
|
},
|
|
{
|
|
"epoch": 2.972628898790579,
|
|
"grad_norm": 9.78405868292641,
|
|
"learning_rate": 2.655645579620614e-09,
|
|
"loss": 0.34527263045310974,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 2.9732654360280075,
|
|
"grad_norm": 9.898585483398586,
|
|
"learning_rate": 2.5363162251357265e-09,
|
|
"loss": 0.36358243227005005,
|
|
"step": 4671
|
|
},
|
|
{
|
|
"epoch": 2.973901973265436,
|
|
"grad_norm": 10.108074306615567,
|
|
"learning_rate": 2.419729157161488e-09,
|
|
"loss": 0.6575568318367004,
|
|
"step": 4672
|
|
},
|
|
{
|
|
"epoch": 2.9745385105028643,
|
|
"grad_norm": 13.735777287176315,
|
|
"learning_rate": 2.3058844396739443e-09,
|
|
"loss": 1.230634331703186,
|
|
"step": 4673
|
|
},
|
|
{
|
|
"epoch": 2.9751750477402927,
|
|
"grad_norm": 12.500055991788804,
|
|
"learning_rate": 2.1947821351431252e-09,
|
|
"loss": 0.5693601369857788,
|
|
"step": 4674
|
|
},
|
|
{
|
|
"epoch": 2.975811584977721,
|
|
"grad_norm": 10.532481489192321,
|
|
"learning_rate": 2.086422304535818e-09,
|
|
"loss": 0.4035893678665161,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 2.9764481222151495,
|
|
"grad_norm": 17.158646254901626,
|
|
"learning_rate": 1.9808050073122365e-09,
|
|
"loss": 1.1364099979400635,
|
|
"step": 4676
|
|
},
|
|
{
|
|
"epoch": 2.977084659452578,
|
|
"grad_norm": 10.770126703442818,
|
|
"learning_rate": 1.8779303014282436e-09,
|
|
"loss": 0.31037622690200806,
|
|
"step": 4677
|
|
},
|
|
{
|
|
"epoch": 2.9777211966900063,
|
|
"grad_norm": 5.2507568097299995,
|
|
"learning_rate": 1.7777982433353492e-09,
|
|
"loss": 0.13051451742649078,
|
|
"step": 4678
|
|
},
|
|
{
|
|
"epoch": 2.9783577339274347,
|
|
"grad_norm": 8.059077860358032,
|
|
"learning_rate": 1.680408887979046e-09,
|
|
"loss": 0.5374547839164734,
|
|
"step": 4679
|
|
},
|
|
{
|
|
"epoch": 2.978994271164863,
|
|
"grad_norm": 8.361687841450028,
|
|
"learning_rate": 1.5857622888010294e-09,
|
|
"loss": 0.2703975439071655,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 2.9796308084022916,
|
|
"grad_norm": 10.860569488784169,
|
|
"learning_rate": 1.4938584977364223e-09,
|
|
"loss": 0.5047432780265808,
|
|
"step": 4681
|
|
},
|
|
{
|
|
"epoch": 2.98026734563972,
|
|
"grad_norm": 10.641551737562104,
|
|
"learning_rate": 1.4046975652165506e-09,
|
|
"loss": 0.38319021463394165,
|
|
"step": 4682
|
|
},
|
|
{
|
|
"epoch": 2.9809038828771484,
|
|
"grad_norm": 9.358560198175164,
|
|
"learning_rate": 1.3182795401678328e-09,
|
|
"loss": 0.6337284445762634,
|
|
"step": 4683
|
|
},
|
|
{
|
|
"epoch": 2.9815404201145768,
|
|
"grad_norm": 7.857281843436642,
|
|
"learning_rate": 1.2346044700095594e-09,
|
|
"loss": 0.7023638486862183,
|
|
"step": 4684
|
|
},
|
|
{
|
|
"epoch": 2.982176957352005,
|
|
"grad_norm": 21.27162566836479,
|
|
"learning_rate": 1.1536724006583344e-09,
|
|
"loss": 0.36084359884262085,
|
|
"step": 4685
|
|
},
|
|
{
|
|
"epoch": 2.9828134945894336,
|
|
"grad_norm": 19.295798481613037,
|
|
"learning_rate": 1.075483376524189e-09,
|
|
"loss": 0.6320829391479492,
|
|
"step": 4686
|
|
},
|
|
{
|
|
"epoch": 2.983450031826862,
|
|
"grad_norm": 8.970561560053307,
|
|
"learning_rate": 1.000037440511692e-09,
|
|
"loss": 0.35005611181259155,
|
|
"step": 4687
|
|
},
|
|
{
|
|
"epoch": 2.9840865690642904,
|
|
"grad_norm": 9.301973166109358,
|
|
"learning_rate": 9.273346340216149e-10,
|
|
"loss": 0.1751422882080078,
|
|
"step": 4688
|
|
},
|
|
{
|
|
"epoch": 2.9847231063017188,
|
|
"grad_norm": 20.854029071881634,
|
|
"learning_rate": 8.57374996947602e-10,
|
|
"loss": 0.6614738702774048,
|
|
"step": 4689
|
|
},
|
|
{
|
|
"epoch": 2.985359643539147,
|
|
"grad_norm": 9.719595901457362,
|
|
"learning_rate": 7.90158567680055e-10,
|
|
"loss": 0.6357887387275696,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 2.9859961807765756,
|
|
"grad_norm": 11.102547776524217,
|
|
"learning_rate": 7.256853831022481e-10,
|
|
"loss": 0.6367413997650146,
|
|
"step": 4691
|
|
},
|
|
{
|
|
"epoch": 2.986632718014004,
|
|
"grad_norm": 15.364075839766894,
|
|
"learning_rate": 6.639554785931035e-10,
|
|
"loss": 0.4574640393257141,
|
|
"step": 4692
|
|
},
|
|
{
|
|
"epoch": 2.987269255251432,
|
|
"grad_norm": 12.457287020917468,
|
|
"learning_rate": 6.049688880260807e-10,
|
|
"loss": 0.36697688698768616,
|
|
"step": 4693
|
|
},
|
|
{
|
|
"epoch": 2.987905792488861,
|
|
"grad_norm": 9.161327121663907,
|
|
"learning_rate": 5.487256437691768e-10,
|
|
"loss": 0.5309689044952393,
|
|
"step": 4694
|
|
},
|
|
{
|
|
"epoch": 2.9885423297262887,
|
|
"grad_norm": 10.882506683089108,
|
|
"learning_rate": 4.952257766849266e-10,
|
|
"loss": 0.5090453624725342,
|
|
"step": 4695
|
|
},
|
|
{
|
|
"epoch": 2.9891788669637176,
|
|
"grad_norm": 11.67842769259947,
|
|
"learning_rate": 4.444693161309577e-10,
|
|
"loss": 0.7584124803543091,
|
|
"step": 4696
|
|
},
|
|
{
|
|
"epoch": 2.9898154042011456,
|
|
"grad_norm": 8.416305458186724,
|
|
"learning_rate": 3.9645628995943487e-10,
|
|
"loss": 0.7738195657730103,
|
|
"step": 4697
|
|
},
|
|
{
|
|
"epoch": 2.9904519414385744,
|
|
"grad_norm": 19.47558414095772,
|
|
"learning_rate": 3.511867245159506e-10,
|
|
"loss": 0.9212406277656555,
|
|
"step": 4698
|
|
},
|
|
{
|
|
"epoch": 2.9910884786760024,
|
|
"grad_norm": 8.161966078157366,
|
|
"learning_rate": 3.0866064464174505e-10,
|
|
"loss": 0.9757088422775269,
|
|
"step": 4699
|
|
},
|
|
{
|
|
"epoch": 2.991725015913431,
|
|
"grad_norm": 10.453815620055869,
|
|
"learning_rate": 2.68878073672596e-10,
|
|
"loss": 0.5119154453277588,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 2.992361553150859,
|
|
"grad_norm": 20.832447435219233,
|
|
"learning_rate": 2.3183903343881875e-10,
|
|
"loss": 0.6462416052818298,
|
|
"step": 4701
|
|
},
|
|
{
|
|
"epoch": 2.9929980903882876,
|
|
"grad_norm": 7.854065037269155,
|
|
"learning_rate": 1.9754354426526623e-10,
|
|
"loss": 0.5890966653823853,
|
|
"step": 4702
|
|
},
|
|
{
|
|
"epoch": 2.993634627625716,
|
|
"grad_norm": 14.755030293269593,
|
|
"learning_rate": 1.659916249702187e-10,
|
|
"loss": 0.7764922380447388,
|
|
"step": 4703
|
|
},
|
|
{
|
|
"epoch": 2.9942711648631444,
|
|
"grad_norm": 13.240081469391923,
|
|
"learning_rate": 1.3718329286760423e-10,
|
|
"loss": 0.39418211579322815,
|
|
"step": 4704
|
|
},
|
|
{
|
|
"epoch": 2.9949077021005728,
|
|
"grad_norm": 10.974899505583934,
|
|
"learning_rate": 1.1111856376644359e-10,
|
|
"loss": 0.4528464078903198,
|
|
"step": 4705
|
|
},
|
|
{
|
|
"epoch": 2.995544239338001,
|
|
"grad_norm": 13.295350963853561,
|
|
"learning_rate": 8.779745196807465e-11,
|
|
"loss": 0.4834592640399933,
|
|
"step": 4706
|
|
},
|
|
{
|
|
"epoch": 2.9961807765754296,
|
|
"grad_norm": 13.441788607828286,
|
|
"learning_rate": 6.721997027059335e-11,
|
|
"loss": 0.3847227990627289,
|
|
"step": 4707
|
|
},
|
|
{
|
|
"epoch": 2.996817313812858,
|
|
"grad_norm": 9.842613528414857,
|
|
"learning_rate": 4.938612996552294e-11,
|
|
"loss": 0.3487985134124756,
|
|
"step": 4708
|
|
},
|
|
{
|
|
"epoch": 2.9974538510502864,
|
|
"grad_norm": 17.369804534311466,
|
|
"learning_rate": 3.4295940838369176e-11,
|
|
"loss": 0.33690494298934937,
|
|
"step": 4709
|
|
},
|
|
{
|
|
"epoch": 2.998090388287715,
|
|
"grad_norm": 9.474979710917523,
|
|
"learning_rate": 2.1949411169730484e-11,
|
|
"loss": 0.6540824174880981,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 2.998726925525143,
|
|
"grad_norm": 10.217589047260072,
|
|
"learning_rate": 1.2346547735297975e-11,
|
|
"loss": 0.3194640874862671,
|
|
"step": 4711
|
|
},
|
|
{
|
|
"epoch": 2.9993634627625716,
|
|
"grad_norm": 11.528238108079135,
|
|
"learning_rate": 5.487355803635019e-12,
|
|
"loss": 0.6700233817100525,
|
|
"step": 4712
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 10.094211639717656,
|
|
"learning_rate": 1.3718391389527796e-12,
|
|
"loss": 0.3551580309867859,
|
|
"step": 4713
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 4713,
|
|
"total_flos": 12614518824960.0,
|
|
"train_loss": 1.6530290696965189,
|
|
"train_runtime": 5359.1808,
|
|
"train_samples_per_second": 3.517,
|
|
"train_steps_per_second": 0.879
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 4713,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 12614518824960.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|