1500 lines
39 KiB
JSON
1500 lines
39 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1044,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.014406627048442283,
|
|
"grad_norm": 2.235235207512626,
|
|
"learning_rate": 3.8095238095238102e-06,
|
|
"loss": 0.7567409992218017,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.028813254096884566,
|
|
"grad_norm": 3.1220907369972015,
|
|
"learning_rate": 8.571428571428573e-06,
|
|
"loss": 0.6131507873535156,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.04321988114532685,
|
|
"grad_norm": 1.26071593847269,
|
|
"learning_rate": 1.3333333333333333e-05,
|
|
"loss": 0.4174152374267578,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.05762650819376913,
|
|
"grad_norm": 0.7001329999687771,
|
|
"learning_rate": 1.8095238095238094e-05,
|
|
"loss": 0.3269367218017578,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.07203313524221142,
|
|
"grad_norm": 0.45760881472851334,
|
|
"learning_rate": 2.2857142857142858e-05,
|
|
"loss": 0.2579814910888672,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.0864397622906537,
|
|
"grad_norm": 0.3195758164531423,
|
|
"learning_rate": 2.7619047619047622e-05,
|
|
"loss": 0.22403466701507568,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.10084638933909598,
|
|
"grad_norm": 0.3937202043873978,
|
|
"learning_rate": 3.2380952380952386e-05,
|
|
"loss": 0.19430849552154542,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.11525301638753827,
|
|
"grad_norm": 0.4622992203768602,
|
|
"learning_rate": 3.7142857142857143e-05,
|
|
"loss": 0.17950987815856934,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.12965964343598055,
|
|
"grad_norm": 0.6153699707363115,
|
|
"learning_rate": 4.190476190476191e-05,
|
|
"loss": 0.16267883777618408,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.14406627048442283,
|
|
"grad_norm": 0.44308873981945024,
|
|
"learning_rate": 4.666666666666667e-05,
|
|
"loss": 0.15465893745422363,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.15847289753286511,
|
|
"grad_norm": 0.2667361601014973,
|
|
"learning_rate": 5.142857142857143e-05,
|
|
"loss": 0.1339721441268921,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.1728795245813074,
|
|
"grad_norm": 0.2856336328456399,
|
|
"learning_rate": 5.619047619047619e-05,
|
|
"loss": 0.12487690448760987,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.18728615162974968,
|
|
"grad_norm": 0.25342260570898284,
|
|
"learning_rate": 6.0952380952380964e-05,
|
|
"loss": 0.12374393939971924,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.20169277867819196,
|
|
"grad_norm": 0.30856178034874804,
|
|
"learning_rate": 6.571428571428571e-05,
|
|
"loss": 0.11958651542663574,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.21609940572663425,
|
|
"grad_norm": 0.37526519775047024,
|
|
"learning_rate": 7.047619047619048e-05,
|
|
"loss": 0.1187552571296692,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.23050603277507653,
|
|
"grad_norm": 0.23950000695755433,
|
|
"learning_rate": 7.523809523809524e-05,
|
|
"loss": 0.11327266693115234,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.2449126598235188,
|
|
"grad_norm": 0.2205068195876155,
|
|
"learning_rate": 8e-05,
|
|
"loss": 0.10409235954284668,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.2593192868719611,
|
|
"grad_norm": 0.38702416724389993,
|
|
"learning_rate": 8.476190476190477e-05,
|
|
"loss": 0.10459071397781372,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.2737259139204034,
|
|
"grad_norm": 1.5049886602582672,
|
|
"learning_rate": 8.952380952380953e-05,
|
|
"loss": 0.10260200500488281,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.28813254096884566,
|
|
"grad_norm": 0.7639386034441172,
|
|
"learning_rate": 9.428571428571429e-05,
|
|
"loss": 0.15102967023849487,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.302539168017288,
|
|
"grad_norm": 1.2918337466488035,
|
|
"learning_rate": 9.904761904761905e-05,
|
|
"loss": 0.21126885414123536,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.31694579506573023,
|
|
"grad_norm": 0.3993035259049567,
|
|
"learning_rate": 9.99955226394288e-05,
|
|
"loss": 0.1489308714866638,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.33135242211417254,
|
|
"grad_norm": 0.4337702538884852,
|
|
"learning_rate": 9.997733473639876e-05,
|
|
"loss": 0.1149595022201538,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.3457590491626148,
|
|
"grad_norm": 0.19718286454816564,
|
|
"learning_rate": 9.994516154152849e-05,
|
|
"loss": 0.10596739053726197,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.3601656762110571,
|
|
"grad_norm": 0.20133933569463766,
|
|
"learning_rate": 9.989901205792952e-05,
|
|
"loss": 0.09920316338539123,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.37457230325949936,
|
|
"grad_norm": 0.1627871886908846,
|
|
"learning_rate": 9.983889919973586e-05,
|
|
"loss": 0.08917503952980041,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.3889789303079417,
|
|
"grad_norm": 0.1732609558706655,
|
|
"learning_rate": 9.976483978849007e-05,
|
|
"loss": 0.08957574367523194,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.40338555735638393,
|
|
"grad_norm": 0.17068559939024425,
|
|
"learning_rate": 9.967685454843618e-05,
|
|
"loss": 0.08561774492263793,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.41779218440482624,
|
|
"grad_norm": 0.1627969165227149,
|
|
"learning_rate": 9.957496810072027e-05,
|
|
"loss": 0.083004629611969,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.4321988114532685,
|
|
"grad_norm": 0.1467656284113813,
|
|
"learning_rate": 9.945920895650071e-05,
|
|
"loss": 0.0808147668838501,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.4466054385017108,
|
|
"grad_norm": 0.13322475158297778,
|
|
"learning_rate": 9.932960950896981e-05,
|
|
"loss": 0.07847496271133422,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.46101206555015306,
|
|
"grad_norm": 0.12265959792803287,
|
|
"learning_rate": 9.918620602428915e-05,
|
|
"loss": 0.07879123687744141,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.47541869259859537,
|
|
"grad_norm": 0.11616840330675544,
|
|
"learning_rate": 9.902903863144107e-05,
|
|
"loss": 0.07581273913383484,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.4898253196470376,
|
|
"grad_norm": 0.12203330893770489,
|
|
"learning_rate": 9.885815131099934e-05,
|
|
"loss": 0.07368603944778443,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.5042319466954799,
|
|
"grad_norm": 0.1546308315564488,
|
|
"learning_rate": 9.867359188282192e-05,
|
|
"loss": 0.06976621150970459,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.5186385737439222,
|
|
"grad_norm": 0.2238419658216969,
|
|
"learning_rate": 9.847541199266941e-05,
|
|
"loss": 0.07270271778106689,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.5330452007923645,
|
|
"grad_norm": 0.1778940838233776,
|
|
"learning_rate": 9.826366709775286e-05,
|
|
"loss": 0.06899308562278747,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.5474518278408068,
|
|
"grad_norm": 0.1236118996782577,
|
|
"learning_rate": 9.803841645121504e-05,
|
|
"loss": 0.06641653776168824,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.561858454889249,
|
|
"grad_norm": 0.20583115161000629,
|
|
"learning_rate": 9.779972308554952e-05,
|
|
"loss": 0.06647136211395263,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.5762650819376913,
|
|
"grad_norm": 0.13075349423709637,
|
|
"learning_rate": 9.754765379496202e-05,
|
|
"loss": 0.06856078505516053,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.5906717089861336,
|
|
"grad_norm": 0.17339943774041067,
|
|
"learning_rate": 9.728227911667934e-05,
|
|
"loss": 0.06724534034729004,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.605078336034576,
|
|
"grad_norm": 0.14182602625367816,
|
|
"learning_rate": 9.700367331121054e-05,
|
|
"loss": 0.06738802194595336,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.6194849630830181,
|
|
"grad_norm": 0.16393619983216387,
|
|
"learning_rate": 9.67119143415667e-05,
|
|
"loss": 0.07073606252670288,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.6338915901314605,
|
|
"grad_norm": 0.1328841830481596,
|
|
"learning_rate": 9.640708385144403e-05,
|
|
"loss": 0.06382153034210206,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.6482982171799028,
|
|
"grad_norm": 0.13571266215544603,
|
|
"learning_rate": 9.608926714237754e-05,
|
|
"loss": 0.06776301860809326,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.6627048442283451,
|
|
"grad_norm": 0.12351937744356929,
|
|
"learning_rate": 9.575855314987068e-05,
|
|
"loss": 0.06309446096420288,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.6771114712767873,
|
|
"grad_norm": 0.12290269065618897,
|
|
"learning_rate": 9.541503441850843e-05,
|
|
"loss": 0.06422497630119324,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.6915180983252296,
|
|
"grad_norm": 0.12486344744276894,
|
|
"learning_rate": 9.505880707606024e-05,
|
|
"loss": 0.06324135661125183,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.7059247253736719,
|
|
"grad_norm": 0.1371721876913286,
|
|
"learning_rate": 9.468997080658031e-05,
|
|
"loss": 0.06205494403839111,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.7203313524221142,
|
|
"grad_norm": 0.12440196659006258,
|
|
"learning_rate": 9.430862882251278e-05,
|
|
"loss": 0.057729125022888184,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.7347379794705564,
|
|
"grad_norm": 0.11085006544539791,
|
|
"learning_rate": 9.391488783580955e-05,
|
|
"loss": 0.059876751899719236,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.7491446065189987,
|
|
"grad_norm": 0.11611256342361528,
|
|
"learning_rate": 9.350885802806863e-05,
|
|
"loss": 0.05882802605628967,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.763551233567441,
|
|
"grad_norm": 0.1279259846460798,
|
|
"learning_rate": 9.309065301970193e-05,
|
|
"loss": 0.06077917814254761,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.7779578606158833,
|
|
"grad_norm": 0.11105876561542377,
|
|
"learning_rate": 9.266038983814039e-05,
|
|
"loss": 0.05303559303283691,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.7923644876643255,
|
|
"grad_norm": 0.11671310410423168,
|
|
"learning_rate": 9.221818888508602e-05,
|
|
"loss": 0.06124954223632813,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.8067711147127679,
|
|
"grad_norm": 0.11537085211038406,
|
|
"learning_rate": 9.176417390281944e-05,
|
|
"loss": 0.055888807773590087,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.8211777417612102,
|
|
"grad_norm": 0.1480823536245831,
|
|
"learning_rate": 9.129847193957282e-05,
|
|
"loss": 0.056972581148147586,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.8355843688096525,
|
|
"grad_norm": 0.15744268133880865,
|
|
"learning_rate": 9.08212133139776e-05,
|
|
"loss": 0.05824898481369019,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.8499909958580947,
|
|
"grad_norm": 0.1397867333597395,
|
|
"learning_rate": 9.033253157859714e-05,
|
|
"loss": 0.05415785312652588,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.864397622906537,
|
|
"grad_norm": 0.12034022108734013,
|
|
"learning_rate": 8.983256348255423e-05,
|
|
"loss": 0.05467197895050049,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.8788042499549793,
|
|
"grad_norm": 0.12682573622924756,
|
|
"learning_rate": 8.932144893326432e-05,
|
|
"loss": 0.06181464791297912,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.8932108770034216,
|
|
"grad_norm": 0.11321366531316682,
|
|
"learning_rate": 8.879933095728485e-05,
|
|
"loss": 0.05511963367462158,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.9076175040518638,
|
|
"grad_norm": 0.1076394497380973,
|
|
"learning_rate": 8.826635566029166e-05,
|
|
"loss": 0.05229709148406982,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.9220241311003061,
|
|
"grad_norm": 0.11249447920151531,
|
|
"learning_rate": 8.772267218619388e-05,
|
|
"loss": 0.05275582075119019,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.9364307581487484,
|
|
"grad_norm": 0.11401150417345533,
|
|
"learning_rate": 8.716843267539869e-05,
|
|
"loss": 0.05470834374427795,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.9508373851971907,
|
|
"grad_norm": 0.13321527980254963,
|
|
"learning_rate": 8.660379222223727e-05,
|
|
"loss": 0.05563476085662842,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.9652440122456329,
|
|
"grad_norm": 0.10771804020098895,
|
|
"learning_rate": 8.602890883156454e-05,
|
|
"loss": 0.054843342304229735,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.9796506392940753,
|
|
"grad_norm": 0.12601833333178913,
|
|
"learning_rate": 8.544394337454409e-05,
|
|
"loss": 0.05721263885498047,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.9940572663425176,
|
|
"grad_norm": 0.12322820499048608,
|
|
"learning_rate": 8.484905954363123e-05,
|
|
"loss": 0.05096786618232727,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 1.0057626508193769,
|
|
"grad_norm": 0.14089468629356533,
|
|
"learning_rate": 8.424442380676647e-05,
|
|
"loss": 0.05167339444160461,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 1.0201692778678193,
|
|
"grad_norm": 0.1705872004915626,
|
|
"learning_rate": 8.363020536079239e-05,
|
|
"loss": 0.05249757170677185,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 1.0345759049162615,
|
|
"grad_norm": 0.15358181481824462,
|
|
"learning_rate": 8.300657608410678e-05,
|
|
"loss": 0.05038872957229614,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 1.0489825319647037,
|
|
"grad_norm": 0.13895400680332037,
|
|
"learning_rate": 8.237371048856546e-05,
|
|
"loss": 0.050058400630950926,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 1.0633891590131461,
|
|
"grad_norm": 0.09560889181658183,
|
|
"learning_rate": 8.17317856706482e-05,
|
|
"loss": 0.04919912219047547,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 1.0777957860615883,
|
|
"grad_norm": 0.10951811698505555,
|
|
"learning_rate": 8.108098126190129e-05,
|
|
"loss": 0.04963598847389221,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 1.0922024131100305,
|
|
"grad_norm": 0.09853927812254934,
|
|
"learning_rate": 8.042147937867079e-05,
|
|
"loss": 0.046415746212005615,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 1.106609040158473,
|
|
"grad_norm": 0.09238299590671381,
|
|
"learning_rate": 7.975346457114034e-05,
|
|
"loss": 0.04439312219619751,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 1.1210156672069151,
|
|
"grad_norm": 0.10940030307745394,
|
|
"learning_rate": 7.907712377168817e-05,
|
|
"loss": 0.051634716987609866,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 1.1354222942553576,
|
|
"grad_norm": 0.09338579936215781,
|
|
"learning_rate": 7.839264624257712e-05,
|
|
"loss": 0.04415662288665771,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 1.1498289213037998,
|
|
"grad_norm": 0.10999587309136662,
|
|
"learning_rate": 7.770022352299293e-05,
|
|
"loss": 0.047378170490264895,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 1.164235548352242,
|
|
"grad_norm": 0.10109309983264758,
|
|
"learning_rate": 7.700004937544542e-05,
|
|
"loss": 0.04249417781829834,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 1.1786421754006844,
|
|
"grad_norm": 0.10231496239314469,
|
|
"learning_rate": 7.629231973154725e-05,
|
|
"loss": 0.04593285918235779,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 1.1930488024491266,
|
|
"grad_norm": 0.1000912342655061,
|
|
"learning_rate": 7.557723263718596e-05,
|
|
"loss": 0.05370241403579712,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 1.2074554294975688,
|
|
"grad_norm": 0.08355578823714238,
|
|
"learning_rate": 7.485498819710417e-05,
|
|
"loss": 0.04612640142440796,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 1.2218620565460112,
|
|
"grad_norm": 0.087036754767847,
|
|
"learning_rate": 7.412578851890384e-05,
|
|
"loss": 0.043773263692855835,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 1.2362686835944534,
|
|
"grad_norm": 0.09341830589519805,
|
|
"learning_rate": 7.338983765648985e-05,
|
|
"loss": 0.046638333797454835,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 1.2506753106428956,
|
|
"grad_norm": 0.09163918271970233,
|
|
"learning_rate": 7.264734155296912e-05,
|
|
"loss": 0.045640939474105836,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 1.265081937691338,
|
|
"grad_norm": 0.09623135416486957,
|
|
"learning_rate": 7.189850798302099e-05,
|
|
"loss": 0.04710923135280609,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 1.2794885647397802,
|
|
"grad_norm": 0.09010925699278292,
|
|
"learning_rate": 7.114354649475499e-05,
|
|
"loss": 0.04437531530857086,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 1.2938951917882227,
|
|
"grad_norm": 0.09828854110045074,
|
|
"learning_rate": 7.038266835107257e-05,
|
|
"loss": 0.04155453443527222,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 1.3083018188366649,
|
|
"grad_norm": 0.09261388252893078,
|
|
"learning_rate": 6.961608647054873e-05,
|
|
"loss": 0.04477185308933258,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 1.322708445885107,
|
|
"grad_norm": 0.09199618999958105,
|
|
"learning_rate": 6.884401536785045e-05,
|
|
"loss": 0.045587533712387086,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 1.3371150729335495,
|
|
"grad_norm": 0.10296954226773448,
|
|
"learning_rate": 6.806667109370853e-05,
|
|
"loss": 0.04496743679046631,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 1.3515216999819917,
|
|
"grad_norm": 0.0991741419475408,
|
|
"learning_rate": 6.728427117445948e-05,
|
|
"loss": 0.04124987423419953,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 1.365928327030434,
|
|
"grad_norm": 0.08767468242608127,
|
|
"learning_rate": 6.649703455117458e-05,
|
|
"loss": 0.044256627559661865,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 1.3803349540788763,
|
|
"grad_norm": 0.08419233546507805,
|
|
"learning_rate": 6.5705181518393e-05,
|
|
"loss": 0.047923988103866576,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 1.3947415811273185,
|
|
"grad_norm": 0.15529323580619178,
|
|
"learning_rate": 6.490893366247612e-05,
|
|
"loss": 0.040982422232627866,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 1.409148208175761,
|
|
"grad_norm": 0.08719252163236856,
|
|
"learning_rate": 6.41085137996006e-05,
|
|
"loss": 0.0431306004524231,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.4235548352242031,
|
|
"grad_norm": 0.09381117178448978,
|
|
"learning_rate": 6.330414591340689e-05,
|
|
"loss": 0.039784133434295654,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 1.4379614622726455,
|
|
"grad_norm": 0.08334433128110437,
|
|
"learning_rate": 6.249605509232149e-05,
|
|
"loss": 0.04327746033668518,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.4523680893210877,
|
|
"grad_norm": 0.09141409005562276,
|
|
"learning_rate": 6.168446746656973e-05,
|
|
"loss": 0.04065501093864441,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 1.46677471636953,
|
|
"grad_norm": 0.10836927533553822,
|
|
"learning_rate": 6.0869610144897215e-05,
|
|
"loss": 0.040621763467788695,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.4811813434179721,
|
|
"grad_norm": 0.11429670482454558,
|
|
"learning_rate": 6.005171115101735e-05,
|
|
"loss": 0.042708945274353025,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 1.4955879704664146,
|
|
"grad_norm": 0.10265027708777795,
|
|
"learning_rate": 5.9230999359802784e-05,
|
|
"loss": 0.03845831751823425,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.509994597514857,
|
|
"grad_norm": 0.0937825232136341,
|
|
"learning_rate": 5.84077044332389e-05,
|
|
"loss": 0.04369714856147766,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 1.5244012245632992,
|
|
"grad_norm": 0.14710934296521627,
|
|
"learning_rate": 5.7582056756156665e-05,
|
|
"loss": 0.04057990908622742,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.5388078516117414,
|
|
"grad_norm": 0.08557873748617338,
|
|
"learning_rate": 5.675428737176367e-05,
|
|
"loss": 0.03988811373710632,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 1.5532144786601836,
|
|
"grad_norm": 0.08304731519894865,
|
|
"learning_rate": 5.5924627916990446e-05,
|
|
"loss": 0.040156081318855286,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.567621105708626,
|
|
"grad_norm": 0.09009100140646863,
|
|
"learning_rate": 5.5093310557671074e-05,
|
|
"loss": 0.04313129186630249,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 1.5820277327570682,
|
|
"grad_norm": 0.09229023810015868,
|
|
"learning_rate": 5.426056792357551e-05,
|
|
"loss": 0.04041691720485687,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.5964343598055106,
|
|
"grad_norm": 0.08400211717158966,
|
|
"learning_rate": 5.342663304331211e-05,
|
|
"loss": 0.04093085825443268,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 1.6108409868539528,
|
|
"grad_norm": 0.09614326424875454,
|
|
"learning_rate": 5.25917392791188e-05,
|
|
"loss": 0.039686673879623414,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.625247613902395,
|
|
"grad_norm": 0.1067845470194038,
|
|
"learning_rate": 5.1756120261560446e-05,
|
|
"loss": 0.039973828196525577,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 1.6396542409508372,
|
|
"grad_norm": 0.08943621090417164,
|
|
"learning_rate": 5.092000982415162e-05,
|
|
"loss": 0.03885244131088257,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.6540608679992796,
|
|
"grad_norm": 0.08753082979407804,
|
|
"learning_rate": 5.0083641937922145e-05,
|
|
"loss": 0.03913732171058655,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 1.668467495047722,
|
|
"grad_norm": 0.09803669811995008,
|
|
"learning_rate": 4.924725064594447e-05,
|
|
"loss": 0.038859084248542786,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.6828741220961643,
|
|
"grad_norm": 0.08541143736458823,
|
|
"learning_rate": 4.8411069997840756e-05,
|
|
"loss": 0.037244629859924314,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 1.6972807491446065,
|
|
"grad_norm": 0.08650694144802851,
|
|
"learning_rate": 4.757533398428812e-05,
|
|
"loss": 0.04225952625274658,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.7116873761930487,
|
|
"grad_norm": 0.09490787276668022,
|
|
"learning_rate": 4.674027647154037e-05,
|
|
"loss": 0.03874731659889221,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 1.726094003241491,
|
|
"grad_norm": 0.07772058542302925,
|
|
"learning_rate": 4.590613113598461e-05,
|
|
"loss": 0.03750569224357605,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.7405006302899335,
|
|
"grad_norm": 0.07856101825582532,
|
|
"learning_rate": 4.507313139875102e-05,
|
|
"loss": 0.03765683174133301,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 1.7549072573383757,
|
|
"grad_norm": 0.07088260858693515,
|
|
"learning_rate": 4.4241510360393804e-05,
|
|
"loss": 0.03841148316860199,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.769313884386818,
|
|
"grad_norm": 0.08315598782355023,
|
|
"learning_rate": 4.341150073566227e-05,
|
|
"loss": 0.03978689610958099,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 1.7837205114352601,
|
|
"grad_norm": 0.08933153255691949,
|
|
"learning_rate": 4.258333478837947e-05,
|
|
"loss": 0.038895291090011594,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.7981271384837025,
|
|
"grad_norm": 0.08396668543385523,
|
|
"learning_rate": 4.1757244266447245e-05,
|
|
"loss": 0.04072596728801727,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 1.8125337655321447,
|
|
"grad_norm": 0.07957802106126194,
|
|
"learning_rate": 4.093346033699557e-05,
|
|
"loss": 0.03865320086479187,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.8269403925805872,
|
|
"grad_norm": 0.08958406118221353,
|
|
"learning_rate": 4.011221352169447e-05,
|
|
"loss": 0.04185936748981476,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 1.8413470196290294,
|
|
"grad_norm": 0.08961676019198377,
|
|
"learning_rate": 3.9293733632246544e-05,
|
|
"loss": 0.04408974051475525,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 1.8557536466774716,
|
|
"grad_norm": 0.07858278806552751,
|
|
"learning_rate": 3.847824970607797e-05,
|
|
"loss": 0.04014042019844055,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 1.8701602737259138,
|
|
"grad_norm": 0.07419667584622487,
|
|
"learning_rate": 3.7665989942246625e-05,
|
|
"loss": 0.03581300973892212,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 1.8845669007743562,
|
|
"grad_norm": 0.08037951897237189,
|
|
"learning_rate": 3.685718163758427e-05,
|
|
"loss": 0.04189331531524658,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 1.8989735278227986,
|
|
"grad_norm": 0.08133067284522653,
|
|
"learning_rate": 3.6052051123091634e-05,
|
|
"loss": 0.03912949562072754,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 1.9133801548712408,
|
|
"grad_norm": 0.08974888658045152,
|
|
"learning_rate": 3.5250823700603496e-05,
|
|
"loss": 0.03808005452156067,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 1.927786781919683,
|
|
"grad_norm": 0.07193212698550007,
|
|
"learning_rate": 3.445372357974194e-05,
|
|
"loss": 0.03524368405342102,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 1.9421934089681252,
|
|
"grad_norm": 0.07439568567213939,
|
|
"learning_rate": 3.3660973815175165e-05,
|
|
"loss": 0.03650209903717041,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 1.9566000360165676,
|
|
"grad_norm": 0.07586041788325688,
|
|
"learning_rate": 3.287279624419945e-05,
|
|
"loss": 0.036546701192855836,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.97100666306501,
|
|
"grad_norm": 0.08294122441026296,
|
|
"learning_rate": 3.208941142466187e-05,
|
|
"loss": 0.03591431975364685,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 1.9854132901134522,
|
|
"grad_norm": 0.08528763303850583,
|
|
"learning_rate": 3.1311038573240975e-05,
|
|
"loss": 0.03485568761825562,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 1.9998199171618944,
|
|
"grad_norm": 0.0756456466151007,
|
|
"learning_rate": 3.0537895504102874e-05,
|
|
"loss": 0.037538421154022214,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 2.0115253016387538,
|
|
"grad_norm": 0.0987258257656567,
|
|
"learning_rate": 2.9770198567949546e-05,
|
|
"loss": 0.027647560834884642,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 2.025931928687196,
|
|
"grad_norm": 0.10342059226496335,
|
|
"learning_rate": 2.900816259147705e-05,
|
|
"loss": 0.03239924311637878,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 2.0403385557356386,
|
|
"grad_norm": 0.08947622183974005,
|
|
"learning_rate": 2.8252000817259837e-05,
|
|
"loss": 0.02974867820739746,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 2.054745182784081,
|
|
"grad_norm": 0.07819720124564082,
|
|
"learning_rate": 2.7501924844078534e-05,
|
|
"loss": 0.027856966853141783,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 2.069151809832523,
|
|
"grad_norm": 0.07255651027166257,
|
|
"learning_rate": 2.6758144567707754e-05,
|
|
"loss": 0.028209209442138672,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 2.083558436880965,
|
|
"grad_norm": 0.0777676865315773,
|
|
"learning_rate": 2.6020868122180385e-05,
|
|
"loss": 0.02793322205543518,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 2.0979650639294074,
|
|
"grad_norm": 0.08664972293238134,
|
|
"learning_rate": 2.5290301821544825e-05,
|
|
"loss": 0.02801375389099121,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 2.1123716909778496,
|
|
"grad_norm": 0.08559466896073407,
|
|
"learning_rate": 2.4566650102131573e-05,
|
|
"loss": 0.02737850546836853,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 2.1267783180262922,
|
|
"grad_norm": 0.07852535239386964,
|
|
"learning_rate": 2.3850115465345324e-05,
|
|
"loss": 0.030919501185417177,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 2.1411849450747344,
|
|
"grad_norm": 0.08182892636530964,
|
|
"learning_rate": 2.3140898420998426e-05,
|
|
"loss": 0.028718733787536622,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 2.1555915721231766,
|
|
"grad_norm": 0.07295529971805709,
|
|
"learning_rate": 2.2439197431201646e-05,
|
|
"loss": 0.028903046250343324,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 2.169998199171619,
|
|
"grad_norm": 0.07624400365106067,
|
|
"learning_rate": 2.1745208854828058e-05,
|
|
"loss": 0.024923816323280334,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 2.184404826220061,
|
|
"grad_norm": 0.07567603422035397,
|
|
"learning_rate": 2.105912689256533e-05,
|
|
"loss": 0.026013752818107604,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 2.1988114532685037,
|
|
"grad_norm": 0.07427613549699529,
|
|
"learning_rate": 2.0381143532572082e-05,
|
|
"loss": 0.026708921790122984,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 2.213218080316946,
|
|
"grad_norm": 0.0721068508797536,
|
|
"learning_rate": 1.9711448496753297e-05,
|
|
"loss": 0.02909781038761139,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 2.227624707365388,
|
|
"grad_norm": 0.09841381262275949,
|
|
"learning_rate": 1.905022918766995e-05,
|
|
"loss": 0.027940624952316286,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 2.2420313344138303,
|
|
"grad_norm": 0.0816958462956758,
|
|
"learning_rate": 1.8397670636097636e-05,
|
|
"loss": 0.026423072814941405,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 2.2564379614622725,
|
|
"grad_norm": 0.07936813973695164,
|
|
"learning_rate": 1.775395544924885e-05,
|
|
"loss": 0.028386065363883974,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 2.270844588510715,
|
|
"grad_norm": 0.07710097062295308,
|
|
"learning_rate": 1.7119263759673675e-05,
|
|
"loss": 0.02769894599914551,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 2.2852512155591573,
|
|
"grad_norm": 0.08498281330072474,
|
|
"learning_rate": 1.6493773174852673e-05,
|
|
"loss": 0.02839537858963013,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 2.2996578426075995,
|
|
"grad_norm": 0.07674813377075432,
|
|
"learning_rate": 1.587765872749649e-05,
|
|
"loss": 0.02569463849067688,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 2.3140644696560417,
|
|
"grad_norm": 0.06662948325098497,
|
|
"learning_rate": 1.527109282656611e-05,
|
|
"loss": 0.028371796011924744,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 2.328471096704484,
|
|
"grad_norm": 0.08015839069477317,
|
|
"learning_rate": 1.4674245209027066e-05,
|
|
"loss": 0.026229003071784975,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 2.3428777237529266,
|
|
"grad_norm": 0.08019588118318016,
|
|
"learning_rate": 1.4087282892351623e-05,
|
|
"loss": 0.029995208978652953,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 2.3572843508013688,
|
|
"grad_norm": 0.08221863155956374,
|
|
"learning_rate": 1.3510370127781635e-05,
|
|
"loss": 0.029001206159591675,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 2.371690977849811,
|
|
"grad_norm": 0.07480678399512465,
|
|
"learning_rate": 1.2943668354365878e-05,
|
|
"loss": 0.02766028940677643,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 2.386097604898253,
|
|
"grad_norm": 0.07477452302806815,
|
|
"learning_rate": 1.2387336153784018e-05,
|
|
"loss": 0.02593517005443573,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 2.4005042319466954,
|
|
"grad_norm": 0.07081183958851973,
|
|
"learning_rate": 1.184152920597028e-05,
|
|
"loss": 0.026943469047546388,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 2.4149108589951376,
|
|
"grad_norm": 0.07536754957279856,
|
|
"learning_rate": 1.1306400245549158e-05,
|
|
"loss": 0.024954386055469513,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 2.42931748604358,
|
|
"grad_norm": 0.06344152496317775,
|
|
"learning_rate": 1.0782099019095238e-05,
|
|
"loss": 0.028272977471351622,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 2.4437241130920224,
|
|
"grad_norm": 0.0644553682371491,
|
|
"learning_rate": 1.026877224322923e-05,
|
|
"loss": 0.02370927333831787,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 2.4581307401404646,
|
|
"grad_norm": 0.07529675849595874,
|
|
"learning_rate": 9.766563563561799e-06,
|
|
"loss": 0.025498074293136597,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 2.472537367188907,
|
|
"grad_norm": 0.08420954265091966,
|
|
"learning_rate": 9.275613514496977e-06,
|
|
"loss": 0.02770912051200867,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 2.486943994237349,
|
|
"grad_norm": 0.0744332415489311,
|
|
"learning_rate": 8.7960594799059e-06,
|
|
"loss": 0.027615338563919067,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 2.501350621285791,
|
|
"grad_norm": 0.07212967627396147,
|
|
"learning_rate": 8.328035654682325e-06,
|
|
"loss": 0.027428582310676575,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 2.515757248334234,
|
|
"grad_norm": 0.08246547759863139,
|
|
"learning_rate": 7.871673007190599e-06,
|
|
"loss": 0.026888126134872438,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 2.530163875382676,
|
|
"grad_norm": 0.06863337011207567,
|
|
"learning_rate": 7.427099242616348e-06,
|
|
"loss": 0.025411182641983034,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 2.5445705024311183,
|
|
"grad_norm": 0.06777467806972155,
|
|
"learning_rate": 6.994438767230466e-06,
|
|
"loss": 0.024811127781867982,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 2.5589771294795605,
|
|
"grad_norm": 0.07029495896606512,
|
|
"learning_rate": 6.573812653576062e-06,
|
|
"loss": 0.02613699436187744,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 2.5733837565280027,
|
|
"grad_norm": 0.07134936463967867,
|
|
"learning_rate": 6.1653386065885165e-06,
|
|
"loss": 0.026964515447616577,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 2.5877903835764453,
|
|
"grad_norm": 0.07711841632882044,
|
|
"learning_rate": 5.769130930657734e-06,
|
|
"loss": 0.028112486004829407,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 2.6021970106248875,
|
|
"grad_norm": 0.08360128959008864,
|
|
"learning_rate": 5.38530049764206e-06,
|
|
"loss": 0.02626214623451233,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 2.6166036376733297,
|
|
"grad_norm": 0.07456201121764428,
|
|
"learning_rate": 5.0139547158427e-06,
|
|
"loss": 0.02669944763183594,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 2.631010264721772,
|
|
"grad_norm": 0.07740576081667884,
|
|
"learning_rate": 4.655197499947378e-06,
|
|
"loss": 0.029006192088127138,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 2.645416891770214,
|
|
"grad_norm": 0.06845350619031464,
|
|
"learning_rate": 4.309129241951587e-06,
|
|
"loss": 0.02491077184677124,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 2.6598235188186568,
|
|
"grad_norm": 0.07501903308333313,
|
|
"learning_rate": 3.975846783065662e-06,
|
|
"loss": 0.026326572895050047,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 2.674230145867099,
|
|
"grad_norm": 0.07580375293031513,
|
|
"learning_rate": 3.6554433866154036e-06,
|
|
"loss": 0.026823589205741884,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 2.688636772915541,
|
|
"grad_norm": 0.06969116474563261,
|
|
"learning_rate": 3.3480087119440063e-06,
|
|
"loss": 0.025913709402084352,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 2.7030433999639834,
|
|
"grad_norm": 0.0714630826160477,
|
|
"learning_rate": 3.0536287893223604e-06,
|
|
"loss": 0.026928871870040894,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 2.7174500270124256,
|
|
"grad_norm": 0.07358152299227637,
|
|
"learning_rate": 2.7723859958750486e-06,
|
|
"loss": 0.02748822569847107,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 2.731856654060868,
|
|
"grad_norm": 0.06838564316740577,
|
|
"learning_rate": 2.5043590325285195e-06,
|
|
"loss": 0.025952500104904175,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 2.7462632811093104,
|
|
"grad_norm": 0.07787109185214655,
|
|
"learning_rate": 2.249622901987963e-06,
|
|
"loss": 0.02589995265007019,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 2.7606699081577526,
|
|
"grad_norm": 0.07156945963749864,
|
|
"learning_rate": 2.0082488877491033e-06,
|
|
"loss": 0.027577921748161316,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 2.775076535206195,
|
|
"grad_norm": 0.06514188446012159,
|
|
"learning_rate": 1.7803045341507952e-06,
|
|
"loss": 0.025488072633743288,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 2.789483162254637,
|
|
"grad_norm": 0.0712195602884753,
|
|
"learning_rate": 1.5658536274738621e-06,
|
|
"loss": 0.02348570078611374,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 2.8038897893030796,
|
|
"grad_norm": 0.0680133235009968,
|
|
"learning_rate": 1.3649561780916199e-06,
|
|
"loss": 0.02316732406616211,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 2.818296416351522,
|
|
"grad_norm": 0.0824565977146897,
|
|
"learning_rate": 1.1776684036770347e-06,
|
|
"loss": 0.02901957035064697,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 2.832703043399964,
|
|
"grad_norm": 0.08111572063117606,
|
|
"learning_rate": 1.004042713471165e-06,
|
|
"loss": 0.02710677683353424,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 2.8471096704484062,
|
|
"grad_norm": 0.07416113908713114,
|
|
"learning_rate": 8.441276936173193e-07,
|
|
"loss": 0.024537976086139678,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 2.8615162974968484,
|
|
"grad_norm": 0.06645937685734804,
|
|
"learning_rate": 6.9796809356511e-07,
|
|
"loss": 0.025470972061157227,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 2.875922924545291,
|
|
"grad_norm": 0.07056688302520532,
|
|
"learning_rate": 5.656048135480763e-07,
|
|
"loss": 0.025230163335800172,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 2.8903295515937333,
|
|
"grad_norm": 0.07480029198072068,
|
|
"learning_rate": 4.470748931384494e-07,
|
|
"loss": 0.026770299673080443,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 2.9047361786421755,
|
|
"grad_norm": 0.06476290220031579,
|
|
"learning_rate": 3.424115008822726e-07,
|
|
"loss": 0.026645660400390625,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 2.9191428056906177,
|
|
"grad_norm": 0.07374044092567203,
|
|
"learning_rate": 2.5164392501777487e-07,
|
|
"loss": 0.025820019841194152,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 2.93354943273906,
|
|
"grad_norm": 0.07098709082144111,
|
|
"learning_rate": 1.7479756527955527e-07,
|
|
"loss": 0.025720816850662232,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 2.9479560597875025,
|
|
"grad_norm": 0.07593395611493338,
|
|
"learning_rate": 1.1189392579090129e-07,
|
|
"loss": 0.024733534455299376,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 2.9623626868359443,
|
|
"grad_norm": 0.07179585283776127,
|
|
"learning_rate": 6.295060904623617e-08,
|
|
"loss": 0.02832019031047821,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 2.976769313884387,
|
|
"grad_norm": 0.06802635060193646,
|
|
"learning_rate": 2.7981310985369935e-08,
|
|
"loss": 0.025465887784957886,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 2.991175940932829,
|
|
"grad_norm": 0.0759224455019542,
|
|
"learning_rate": 6.995817160920792e-09,
|
|
"loss": 0.0264853298664093,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 1044,
|
|
"total_flos": 1577088536150016.0,
|
|
"train_loss": 0.06165807318099385,
|
|
"train_runtime": 23128.4215,
|
|
"train_samples_per_second": 2.881,
|
|
"train_steps_per_second": 0.045
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1044,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 207,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1577088536150016.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|