16315 lines
422 KiB
JSON
16315 lines
422 KiB
JSON
{
|
|
"best_global_step": 1380,
|
|
"best_metric": 0.7464115023612976,
|
|
"best_model_checkpoint": "saves/qwen3-1.7B/Qwen3-1.7B-SFT-science-2e-5/checkpoint-1380",
|
|
"epoch": 3.0,
|
|
"eval_steps": 230,
|
|
"global_step": 2313,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0012977269501388974,
|
|
"grad_norm": 18.96442413330078,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.341123104095459,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0025954539002777948,
|
|
"grad_norm": 17.5643310546875,
|
|
"learning_rate": 1.7241379310344828e-07,
|
|
"loss": 1.240975022315979,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.003893180850416692,
|
|
"grad_norm": 18.22071075439453,
|
|
"learning_rate": 3.4482758620689656e-07,
|
|
"loss": 1.3369407653808594,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0051909078005555895,
|
|
"grad_norm": 19.40529441833496,
|
|
"learning_rate": 5.172413793103449e-07,
|
|
"loss": 1.4051162004470825,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.006488634750694487,
|
|
"grad_norm": 17.282682418823242,
|
|
"learning_rate": 6.896551724137931e-07,
|
|
"loss": 1.318056344985962,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.007786361700833384,
|
|
"grad_norm": 18.145490646362305,
|
|
"learning_rate": 8.620689655172415e-07,
|
|
"loss": 1.3011627197265625,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.009084088650972282,
|
|
"grad_norm": 18.944950103759766,
|
|
"learning_rate": 1.0344827586206898e-06,
|
|
"loss": 1.2762426137924194,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.010381815601111179,
|
|
"grad_norm": 16.987550735473633,
|
|
"learning_rate": 1.2068965517241381e-06,
|
|
"loss": 1.2320008277893066,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.011679542551250076,
|
|
"grad_norm": 15.374279975891113,
|
|
"learning_rate": 1.3793103448275862e-06,
|
|
"loss": 1.1568862199783325,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.012977269501388973,
|
|
"grad_norm": 15.470294952392578,
|
|
"learning_rate": 1.5517241379310346e-06,
|
|
"loss": 1.2633228302001953,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.01427499645152787,
|
|
"grad_norm": 13.62917709350586,
|
|
"learning_rate": 1.724137931034483e-06,
|
|
"loss": 1.2120124101638794,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.015572723401666768,
|
|
"grad_norm": 11.841530799865723,
|
|
"learning_rate": 1.896551724137931e-06,
|
|
"loss": 1.15806245803833,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.016870450351805667,
|
|
"grad_norm": 11.673654556274414,
|
|
"learning_rate": 2.0689655172413796e-06,
|
|
"loss": 1.1886231899261475,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.018168177301944564,
|
|
"grad_norm": 11.115256309509277,
|
|
"learning_rate": 2.241379310344828e-06,
|
|
"loss": 1.1659168004989624,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.01946590425208346,
|
|
"grad_norm": 8.34097671508789,
|
|
"learning_rate": 2.4137931034482762e-06,
|
|
"loss": 1.1347044706344604,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.020763631202222358,
|
|
"grad_norm": 6.3707804679870605,
|
|
"learning_rate": 2.5862068965517246e-06,
|
|
"loss": 1.097546935081482,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.022061358152361255,
|
|
"grad_norm": 6.07731294631958,
|
|
"learning_rate": 2.7586206896551725e-06,
|
|
"loss": 1.1303181648254395,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.023359085102500152,
|
|
"grad_norm": 5.143428802490234,
|
|
"learning_rate": 2.931034482758621e-06,
|
|
"loss": 1.087995171546936,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.02465681205263905,
|
|
"grad_norm": 5.108595371246338,
|
|
"learning_rate": 3.103448275862069e-06,
|
|
"loss": 1.09377121925354,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.025954539002777947,
|
|
"grad_norm": 4.329593658447266,
|
|
"learning_rate": 3.2758620689655175e-06,
|
|
"loss": 0.9835488200187683,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.027252265952916844,
|
|
"grad_norm": 2.5329697132110596,
|
|
"learning_rate": 3.448275862068966e-06,
|
|
"loss": 1.1068130731582642,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.02854999290305574,
|
|
"grad_norm": 2.4052135944366455,
|
|
"learning_rate": 3.620689655172414e-06,
|
|
"loss": 0.9785792827606201,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.029847719853194638,
|
|
"grad_norm": 2.2059929370880127,
|
|
"learning_rate": 3.793103448275862e-06,
|
|
"loss": 1.0071507692337036,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.031145446803333535,
|
|
"grad_norm": 1.962939977645874,
|
|
"learning_rate": 3.96551724137931e-06,
|
|
"loss": 0.9504339694976807,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.032443173753472436,
|
|
"grad_norm": 1.8630015850067139,
|
|
"learning_rate": 4.137931034482759e-06,
|
|
"loss": 0.9488564133644104,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.03374090070361133,
|
|
"grad_norm": 1.9074621200561523,
|
|
"learning_rate": 4.310344827586207e-06,
|
|
"loss": 0.9918304681777954,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.03503862765375023,
|
|
"grad_norm": 1.7486937046051025,
|
|
"learning_rate": 4.482758620689656e-06,
|
|
"loss": 0.9598171710968018,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.03633635460388913,
|
|
"grad_norm": 1.5654025077819824,
|
|
"learning_rate": 4.655172413793104e-06,
|
|
"loss": 0.9875293970108032,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.037634081554028025,
|
|
"grad_norm": 1.5146547555923462,
|
|
"learning_rate": 4.8275862068965525e-06,
|
|
"loss": 0.9899477958679199,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.03893180850416692,
|
|
"grad_norm": 1.4136415719985962,
|
|
"learning_rate": 5e-06,
|
|
"loss": 1.0122514963150024,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04022953545430582,
|
|
"grad_norm": 1.3606868982315063,
|
|
"learning_rate": 5.172413793103449e-06,
|
|
"loss": 0.9211847186088562,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.041527262404444716,
|
|
"grad_norm": 1.1916248798370361,
|
|
"learning_rate": 5.344827586206896e-06,
|
|
"loss": 0.9429690837860107,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.04282498935458361,
|
|
"grad_norm": 1.1089906692504883,
|
|
"learning_rate": 5.517241379310345e-06,
|
|
"loss": 0.9432889819145203,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.04412271630472251,
|
|
"grad_norm": 1.0991381406784058,
|
|
"learning_rate": 5.689655172413794e-06,
|
|
"loss": 0.8937160968780518,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.04542044325486141,
|
|
"grad_norm": 1.1420905590057373,
|
|
"learning_rate": 5.862068965517242e-06,
|
|
"loss": 0.9616763591766357,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.046718170205000305,
|
|
"grad_norm": 1.22003972530365,
|
|
"learning_rate": 6.03448275862069e-06,
|
|
"loss": 0.991248369216919,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.0480158971551392,
|
|
"grad_norm": 1.0027211904525757,
|
|
"learning_rate": 6.206896551724138e-06,
|
|
"loss": 0.8961243033409119,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.0493136241052781,
|
|
"grad_norm": 0.948948085308075,
|
|
"learning_rate": 6.379310344827587e-06,
|
|
"loss": 0.8873807787895203,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.050611351055416996,
|
|
"grad_norm": 0.906653106212616,
|
|
"learning_rate": 6.551724137931035e-06,
|
|
"loss": 0.9843493103981018,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.05190907800555589,
|
|
"grad_norm": 0.9032185077667236,
|
|
"learning_rate": 6.724137931034484e-06,
|
|
"loss": 0.9521259069442749,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.05320680495569479,
|
|
"grad_norm": 0.9004918336868286,
|
|
"learning_rate": 6.896551724137932e-06,
|
|
"loss": 0.9388642311096191,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.05450453190583369,
|
|
"grad_norm": 0.9163469672203064,
|
|
"learning_rate": 7.0689655172413796e-06,
|
|
"loss": 0.8808169364929199,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.055802258855972585,
|
|
"grad_norm": 0.8777008056640625,
|
|
"learning_rate": 7.241379310344828e-06,
|
|
"loss": 0.8969473242759705,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.05709998580611148,
|
|
"grad_norm": 0.8831114768981934,
|
|
"learning_rate": 7.413793103448277e-06,
|
|
"loss": 0.8995171189308167,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.05839771275625038,
|
|
"grad_norm": 0.8527185320854187,
|
|
"learning_rate": 7.586206896551724e-06,
|
|
"loss": 0.9566978216171265,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.059695439706389276,
|
|
"grad_norm": 0.8445229530334473,
|
|
"learning_rate": 7.758620689655173e-06,
|
|
"loss": 0.8870581388473511,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.060993166656528174,
|
|
"grad_norm": 0.7909572720527649,
|
|
"learning_rate": 7.93103448275862e-06,
|
|
"loss": 0.839882493019104,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.06229089360666707,
|
|
"grad_norm": 0.9035473465919495,
|
|
"learning_rate": 8.103448275862069e-06,
|
|
"loss": 0.9470881223678589,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.06358862055680597,
|
|
"grad_norm": 0.812706708908081,
|
|
"learning_rate": 8.275862068965518e-06,
|
|
"loss": 0.9084426760673523,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.06488634750694487,
|
|
"grad_norm": 0.7788446545600891,
|
|
"learning_rate": 8.448275862068966e-06,
|
|
"loss": 0.9100271463394165,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.06618407445708377,
|
|
"grad_norm": 0.7733594179153442,
|
|
"learning_rate": 8.620689655172414e-06,
|
|
"loss": 0.9046688675880432,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.06748180140722267,
|
|
"grad_norm": 0.8074057698249817,
|
|
"learning_rate": 8.793103448275862e-06,
|
|
"loss": 0.9495884776115417,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.06877952835736156,
|
|
"grad_norm": 0.7883110642433167,
|
|
"learning_rate": 8.965517241379312e-06,
|
|
"loss": 0.944835901260376,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.07007725530750046,
|
|
"grad_norm": 0.7795141935348511,
|
|
"learning_rate": 9.13793103448276e-06,
|
|
"loss": 0.8827984929084778,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.07137498225763936,
|
|
"grad_norm": 0.7496516704559326,
|
|
"learning_rate": 9.310344827586207e-06,
|
|
"loss": 0.8837717771530151,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.07267270920777825,
|
|
"grad_norm": 0.7296638488769531,
|
|
"learning_rate": 9.482758620689655e-06,
|
|
"loss": 0.9134169220924377,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.07397043615791715,
|
|
"grad_norm": 0.7594932913780212,
|
|
"learning_rate": 9.655172413793105e-06,
|
|
"loss": 0.8602768182754517,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.07526816310805605,
|
|
"grad_norm": 0.7925019264221191,
|
|
"learning_rate": 9.827586206896553e-06,
|
|
"loss": 0.9638795852661133,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.07656589005819495,
|
|
"grad_norm": 0.7823756337165833,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.9325800538063049,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.07786361700833384,
|
|
"grad_norm": 0.7671526074409485,
|
|
"learning_rate": 1.0172413793103449e-05,
|
|
"loss": 0.8490806221961975,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.07916134395847274,
|
|
"grad_norm": 0.7950026392936707,
|
|
"learning_rate": 1.0344827586206898e-05,
|
|
"loss": 0.8811596632003784,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.08045907090861164,
|
|
"grad_norm": 0.7760382294654846,
|
|
"learning_rate": 1.0517241379310346e-05,
|
|
"loss": 0.9363852739334106,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.08175679785875054,
|
|
"grad_norm": 0.7695664763450623,
|
|
"learning_rate": 1.0689655172413792e-05,
|
|
"loss": 0.9032339453697205,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.08305452480888943,
|
|
"grad_norm": 0.7472826838493347,
|
|
"learning_rate": 1.0862068965517242e-05,
|
|
"loss": 0.9319165349006653,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.08435225175902833,
|
|
"grad_norm": 0.7492451667785645,
|
|
"learning_rate": 1.103448275862069e-05,
|
|
"loss": 0.9181802272796631,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.08564997870916723,
|
|
"grad_norm": 0.7906931042671204,
|
|
"learning_rate": 1.1206896551724138e-05,
|
|
"loss": 0.9204844236373901,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.08694770565930612,
|
|
"grad_norm": 0.7987682223320007,
|
|
"learning_rate": 1.1379310344827587e-05,
|
|
"loss": 0.9132669568061829,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.08824543260944502,
|
|
"grad_norm": 0.7293349504470825,
|
|
"learning_rate": 1.1551724137931035e-05,
|
|
"loss": 0.840244472026825,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.08954315955958392,
|
|
"grad_norm": 0.7649659514427185,
|
|
"learning_rate": 1.1724137931034483e-05,
|
|
"loss": 0.9429194331169128,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.09084088650972282,
|
|
"grad_norm": 0.7362731695175171,
|
|
"learning_rate": 1.1896551724137933e-05,
|
|
"loss": 0.910248339176178,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.09213861345986171,
|
|
"grad_norm": 0.7714956402778625,
|
|
"learning_rate": 1.206896551724138e-05,
|
|
"loss": 0.9148205518722534,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.09343634041000061,
|
|
"grad_norm": 0.8190087676048279,
|
|
"learning_rate": 1.2241379310344827e-05,
|
|
"loss": 1.0036617517471313,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.0947340673601395,
|
|
"grad_norm": 0.7508696913719177,
|
|
"learning_rate": 1.2413793103448277e-05,
|
|
"loss": 0.8585586547851562,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.0960317943102784,
|
|
"grad_norm": 0.7731637358665466,
|
|
"learning_rate": 1.2586206896551725e-05,
|
|
"loss": 0.8797649145126343,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.0973295212604173,
|
|
"grad_norm": 0.7766374349594116,
|
|
"learning_rate": 1.2758620689655174e-05,
|
|
"loss": 0.8823714852333069,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.0986272482105562,
|
|
"grad_norm": 0.7738403677940369,
|
|
"learning_rate": 1.2931034482758622e-05,
|
|
"loss": 0.9374374747276306,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.0999249751606951,
|
|
"grad_norm": 0.7996422648429871,
|
|
"learning_rate": 1.310344827586207e-05,
|
|
"loss": 0.8985888957977295,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.10122270211083399,
|
|
"grad_norm": 0.8077470064163208,
|
|
"learning_rate": 1.327586206896552e-05,
|
|
"loss": 0.8687019944190979,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.10252042906097289,
|
|
"grad_norm": 0.7868083715438843,
|
|
"learning_rate": 1.3448275862068967e-05,
|
|
"loss": 0.9471523761749268,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.10381815601111179,
|
|
"grad_norm": 0.7429269552230835,
|
|
"learning_rate": 1.3620689655172414e-05,
|
|
"loss": 0.8650257587432861,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.10511588296125068,
|
|
"grad_norm": 0.736170768737793,
|
|
"learning_rate": 1.3793103448275863e-05,
|
|
"loss": 0.8755403757095337,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.10641360991138958,
|
|
"grad_norm": 0.7359841465950012,
|
|
"learning_rate": 1.3965517241379311e-05,
|
|
"loss": 0.8383484482765198,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.10771133686152848,
|
|
"grad_norm": 0.7211300730705261,
|
|
"learning_rate": 1.4137931034482759e-05,
|
|
"loss": 0.8565696477890015,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.10900906381166738,
|
|
"grad_norm": 0.7671189308166504,
|
|
"learning_rate": 1.4310344827586209e-05,
|
|
"loss": 0.9218558073043823,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.11030679076180627,
|
|
"grad_norm": 0.816425085067749,
|
|
"learning_rate": 1.4482758620689657e-05,
|
|
"loss": 0.870709240436554,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.11160451771194517,
|
|
"grad_norm": 0.7335647940635681,
|
|
"learning_rate": 1.4655172413793105e-05,
|
|
"loss": 0.8868783116340637,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.11290224466208407,
|
|
"grad_norm": 0.7765848636627197,
|
|
"learning_rate": 1.4827586206896554e-05,
|
|
"loss": 0.8968692421913147,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.11419997161222296,
|
|
"grad_norm": 0.7707907557487488,
|
|
"learning_rate": 1.5000000000000002e-05,
|
|
"loss": 0.8512423634529114,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.11549769856236186,
|
|
"grad_norm": 0.7698812484741211,
|
|
"learning_rate": 1.5172413793103448e-05,
|
|
"loss": 0.9038546085357666,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.11679542551250076,
|
|
"grad_norm": 0.7673100829124451,
|
|
"learning_rate": 1.5344827586206898e-05,
|
|
"loss": 0.9032548666000366,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.11809315246263966,
|
|
"grad_norm": 0.7782520055770874,
|
|
"learning_rate": 1.5517241379310346e-05,
|
|
"loss": 0.8969484567642212,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.11939087941277855,
|
|
"grad_norm": 0.7486196756362915,
|
|
"learning_rate": 1.5689655172413794e-05,
|
|
"loss": 0.9460266828536987,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.12068860636291745,
|
|
"grad_norm": 0.7591387033462524,
|
|
"learning_rate": 1.586206896551724e-05,
|
|
"loss": 0.8913143277168274,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.12198633331305635,
|
|
"grad_norm": 0.7186006903648376,
|
|
"learning_rate": 1.603448275862069e-05,
|
|
"loss": 0.817532479763031,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.12328406026319524,
|
|
"grad_norm": 0.8398354053497314,
|
|
"learning_rate": 1.6206896551724137e-05,
|
|
"loss": 0.9849364161491394,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.12458178721333414,
|
|
"grad_norm": 0.7659850120544434,
|
|
"learning_rate": 1.637931034482759e-05,
|
|
"loss": 0.8463207483291626,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.12587951416347304,
|
|
"grad_norm": 0.7916679978370667,
|
|
"learning_rate": 1.6551724137931037e-05,
|
|
"loss": 0.87321537733078,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.12717724111361195,
|
|
"grad_norm": 0.7151588201522827,
|
|
"learning_rate": 1.6724137931034485e-05,
|
|
"loss": 0.8810160160064697,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.12847496806375083,
|
|
"grad_norm": 0.7750177383422852,
|
|
"learning_rate": 1.6896551724137932e-05,
|
|
"loss": 0.7909659147262573,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.12977269501388974,
|
|
"grad_norm": 0.7832080125808716,
|
|
"learning_rate": 1.706896551724138e-05,
|
|
"loss": 0.9595565795898438,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.13107042196402863,
|
|
"grad_norm": 0.764074444770813,
|
|
"learning_rate": 1.7241379310344828e-05,
|
|
"loss": 0.9244315028190613,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.13236814891416754,
|
|
"grad_norm": 0.8302505016326904,
|
|
"learning_rate": 1.7413793103448276e-05,
|
|
"loss": 0.8567872643470764,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.13366587586430642,
|
|
"grad_norm": 0.7476164102554321,
|
|
"learning_rate": 1.7586206896551724e-05,
|
|
"loss": 0.8335643410682678,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.13496360281444533,
|
|
"grad_norm": 0.7683222889900208,
|
|
"learning_rate": 1.7758620689655175e-05,
|
|
"loss": 0.92899489402771,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.13626132976458422,
|
|
"grad_norm": 0.8164420127868652,
|
|
"learning_rate": 1.7931034482758623e-05,
|
|
"loss": 0.9577179551124573,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.13755905671472313,
|
|
"grad_norm": 0.7937741279602051,
|
|
"learning_rate": 1.810344827586207e-05,
|
|
"loss": 0.9404830932617188,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.138856783664862,
|
|
"grad_norm": 0.7443995475769043,
|
|
"learning_rate": 1.827586206896552e-05,
|
|
"loss": 0.8533992171287537,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.14015451061500092,
|
|
"grad_norm": 0.7239556312561035,
|
|
"learning_rate": 1.8448275862068967e-05,
|
|
"loss": 0.8692059516906738,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.1414522375651398,
|
|
"grad_norm": 0.7722207903862,
|
|
"learning_rate": 1.8620689655172415e-05,
|
|
"loss": 0.9231195449829102,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.14274996451527872,
|
|
"grad_norm": 0.8155950307846069,
|
|
"learning_rate": 1.8793103448275863e-05,
|
|
"loss": 0.9769394397735596,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.1440476914654176,
|
|
"grad_norm": 0.8122441172599792,
|
|
"learning_rate": 1.896551724137931e-05,
|
|
"loss": 0.9506130218505859,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.1453454184155565,
|
|
"grad_norm": 0.748271644115448,
|
|
"learning_rate": 1.913793103448276e-05,
|
|
"loss": 0.8314372897148132,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.1466431453656954,
|
|
"grad_norm": 0.7835760712623596,
|
|
"learning_rate": 1.931034482758621e-05,
|
|
"loss": 0.9071435332298279,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.1479408723158343,
|
|
"grad_norm": 0.7403405904769897,
|
|
"learning_rate": 1.9482758620689658e-05,
|
|
"loss": 0.8897596597671509,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.1492385992659732,
|
|
"grad_norm": 0.8157104849815369,
|
|
"learning_rate": 1.9655172413793106e-05,
|
|
"loss": 0.8683630228042603,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.1505363262161121,
|
|
"grad_norm": 0.8036532402038574,
|
|
"learning_rate": 1.9827586206896554e-05,
|
|
"loss": 0.8975539207458496,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.15183405316625098,
|
|
"grad_norm": 0.7673157453536987,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.938015341758728,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.1531317801163899,
|
|
"grad_norm": 0.8311364650726318,
|
|
"learning_rate": 1.999998977626552e-05,
|
|
"loss": 0.927339494228363,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.15442950706652878,
|
|
"grad_norm": 0.8438189029693604,
|
|
"learning_rate": 1.999995910508299e-05,
|
|
"loss": 0.8367739319801331,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.1557272340166677,
|
|
"grad_norm": 0.7619196176528931,
|
|
"learning_rate": 1.999990798651512e-05,
|
|
"loss": 0.8823627829551697,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.15702496096680657,
|
|
"grad_norm": 0.8044223785400391,
|
|
"learning_rate": 1.9999836420666438e-05,
|
|
"loss": 0.9462600350379944,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.15832268791694548,
|
|
"grad_norm": 0.7767183780670166,
|
|
"learning_rate": 1.999974440768327e-05,
|
|
"loss": 0.8584571480751038,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.15962041486708436,
|
|
"grad_norm": 0.8261749148368835,
|
|
"learning_rate": 1.9999631947753776e-05,
|
|
"loss": 0.8864863514900208,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.16091814181722328,
|
|
"grad_norm": 0.7884521484375,
|
|
"learning_rate": 1.999949904110789e-05,
|
|
"loss": 0.9228469133377075,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.16221586876736216,
|
|
"grad_norm": 0.7482346296310425,
|
|
"learning_rate": 1.999934568801738e-05,
|
|
"loss": 0.8749440908432007,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.16351359571750107,
|
|
"grad_norm": 0.7735321521759033,
|
|
"learning_rate": 1.999917188879582e-05,
|
|
"loss": 0.8487443327903748,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.16481132266763995,
|
|
"grad_norm": 0.7950016856193542,
|
|
"learning_rate": 1.9998977643798572e-05,
|
|
"loss": 0.8879282474517822,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.16610904961777886,
|
|
"grad_norm": 0.7628664374351501,
|
|
"learning_rate": 1.999876295342283e-05,
|
|
"loss": 0.8263102173805237,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.16740677656791775,
|
|
"grad_norm": 0.7986794114112854,
|
|
"learning_rate": 1.9998527818107577e-05,
|
|
"loss": 0.8462676405906677,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.16870450351805666,
|
|
"grad_norm": 0.7867287993431091,
|
|
"learning_rate": 1.9998272238333606e-05,
|
|
"loss": 0.8144584894180298,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.17000223046819554,
|
|
"grad_norm": 0.7938011288642883,
|
|
"learning_rate": 1.9997996214623515e-05,
|
|
"loss": 0.9469823837280273,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.17129995741833445,
|
|
"grad_norm": 0.7824422717094421,
|
|
"learning_rate": 1.9997699747541698e-05,
|
|
"loss": 0.8819964528083801,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.17259768436847334,
|
|
"grad_norm": 0.7831183075904846,
|
|
"learning_rate": 1.9997382837694355e-05,
|
|
"loss": 0.8070334196090698,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.17389541131861225,
|
|
"grad_norm": 0.7970272302627563,
|
|
"learning_rate": 1.999704548572949e-05,
|
|
"loss": 0.9148434996604919,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.17519313826875113,
|
|
"grad_norm": 0.7763343453407288,
|
|
"learning_rate": 1.9996687692336896e-05,
|
|
"loss": 0.8732989430427551,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.17649086521889004,
|
|
"grad_norm": 0.7826754450798035,
|
|
"learning_rate": 1.9996309458248184e-05,
|
|
"loss": 0.8220726847648621,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.17778859216902893,
|
|
"grad_norm": 0.761687159538269,
|
|
"learning_rate": 1.999591078423673e-05,
|
|
"loss": 0.8763125538825989,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.17908631911916784,
|
|
"grad_norm": 0.7728819251060486,
|
|
"learning_rate": 1.9995491671117734e-05,
|
|
"loss": 0.804518461227417,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.18038404606930672,
|
|
"grad_norm": 0.7697947025299072,
|
|
"learning_rate": 1.999505211974817e-05,
|
|
"loss": 0.8979027271270752,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.18168177301944563,
|
|
"grad_norm": 0.7905195951461792,
|
|
"learning_rate": 1.999459213102681e-05,
|
|
"loss": 0.8996750116348267,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.1829794999695845,
|
|
"grad_norm": 0.7597678899765015,
|
|
"learning_rate": 1.9994111705894218e-05,
|
|
"loss": 0.9672253727912903,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.18427722691972342,
|
|
"grad_norm": 0.7724127769470215,
|
|
"learning_rate": 1.9993610845332734e-05,
|
|
"loss": 0.9037659764289856,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.1855749538698623,
|
|
"grad_norm": 0.8090096712112427,
|
|
"learning_rate": 1.99930895503665e-05,
|
|
"loss": 0.9177453517913818,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.18687268082000122,
|
|
"grad_norm": 0.7363874316215515,
|
|
"learning_rate": 1.9992547822061427e-05,
|
|
"loss": 0.8449195027351379,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.1881704077701401,
|
|
"grad_norm": 0.8058642745018005,
|
|
"learning_rate": 1.9991985661525217e-05,
|
|
"loss": 0.998737096786499,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.189468134720279,
|
|
"grad_norm": 0.7756547927856445,
|
|
"learning_rate": 1.999140306990734e-05,
|
|
"loss": 0.8317436575889587,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.1907658616704179,
|
|
"grad_norm": 0.7556934952735901,
|
|
"learning_rate": 1.999080004839905e-05,
|
|
"loss": 0.8867667317390442,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.1920635886205568,
|
|
"grad_norm": 0.8031500577926636,
|
|
"learning_rate": 1.999017659823338e-05,
|
|
"loss": 0.9501492381095886,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.1933613155706957,
|
|
"grad_norm": 0.7905899882316589,
|
|
"learning_rate": 1.9989532720685115e-05,
|
|
"loss": 0.9475319981575012,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.1946590425208346,
|
|
"grad_norm": 0.7352354526519775,
|
|
"learning_rate": 1.998886841707083e-05,
|
|
"loss": 0.8857019543647766,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.19595676947097349,
|
|
"grad_norm": 0.7715173363685608,
|
|
"learning_rate": 1.9988183688748862e-05,
|
|
"loss": 0.9451955556869507,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.1972544964211124,
|
|
"grad_norm": 0.7771379351615906,
|
|
"learning_rate": 1.9987478537119297e-05,
|
|
"loss": 0.9485697150230408,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.19855222337125128,
|
|
"grad_norm": 0.7867424488067627,
|
|
"learning_rate": 1.9986752963624002e-05,
|
|
"loss": 0.9234886169433594,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.1998499503213902,
|
|
"grad_norm": 0.8710278272628784,
|
|
"learning_rate": 1.998600696974658e-05,
|
|
"loss": 0.9107885956764221,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.20114767727152907,
|
|
"grad_norm": 0.7554876208305359,
|
|
"learning_rate": 1.9985240557012406e-05,
|
|
"loss": 0.9065303206443787,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.20244540422166798,
|
|
"grad_norm": 0.7357529997825623,
|
|
"learning_rate": 1.99844537269886e-05,
|
|
"loss": 0.7701905965805054,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.20374313117180687,
|
|
"grad_norm": 0.8202847242355347,
|
|
"learning_rate": 1.9983646481284028e-05,
|
|
"loss": 0.992992103099823,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.20504085812194578,
|
|
"grad_norm": 0.7828136682510376,
|
|
"learning_rate": 1.9982818821549308e-05,
|
|
"loss": 0.9072571992874146,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.2063385850720847,
|
|
"grad_norm": 0.7381945252418518,
|
|
"learning_rate": 1.9981970749476792e-05,
|
|
"loss": 0.8416173458099365,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.20763631202222357,
|
|
"grad_norm": 0.7436814308166504,
|
|
"learning_rate": 1.998110226680057e-05,
|
|
"loss": 0.860198438167572,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.20893403897236248,
|
|
"grad_norm": 0.7724810242652893,
|
|
"learning_rate": 1.9980213375296468e-05,
|
|
"loss": 0.8358607292175293,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.21023176592250137,
|
|
"grad_norm": 0.7248872518539429,
|
|
"learning_rate": 1.997930407678205e-05,
|
|
"loss": 0.8103194236755371,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.21152949287264028,
|
|
"grad_norm": 0.7623717784881592,
|
|
"learning_rate": 1.99783743731166e-05,
|
|
"loss": 0.8410395383834839,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.21282721982277916,
|
|
"grad_norm": 0.7665237188339233,
|
|
"learning_rate": 1.9977424266201126e-05,
|
|
"loss": 0.9623262286186218,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.21412494677291807,
|
|
"grad_norm": 0.7374143600463867,
|
|
"learning_rate": 1.9976453757978355e-05,
|
|
"loss": 0.8592593669891357,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.21542267372305696,
|
|
"grad_norm": 0.7116683721542358,
|
|
"learning_rate": 1.997546285043273e-05,
|
|
"loss": 0.7682055234909058,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.21672040067319587,
|
|
"grad_norm": 0.8028838038444519,
|
|
"learning_rate": 1.9974451545590407e-05,
|
|
"loss": 0.9229005575180054,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.21801812762333475,
|
|
"grad_norm": 0.8015571236610413,
|
|
"learning_rate": 1.997341984551925e-05,
|
|
"loss": 0.8815708756446838,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.21931585457347366,
|
|
"grad_norm": 0.7032439708709717,
|
|
"learning_rate": 1.9972367752328824e-05,
|
|
"loss": 0.7823411822319031,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.22061358152361255,
|
|
"grad_norm": 0.7352714538574219,
|
|
"learning_rate": 1.9971295268170393e-05,
|
|
"loss": 0.8304542899131775,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.22191130847375146,
|
|
"grad_norm": 0.7774588465690613,
|
|
"learning_rate": 1.9970202395236913e-05,
|
|
"loss": 0.8442955017089844,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.22320903542389034,
|
|
"grad_norm": 0.8193069696426392,
|
|
"learning_rate": 1.996908913576304e-05,
|
|
"loss": 0.8395213484764099,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.22450676237402925,
|
|
"grad_norm": 0.805517852306366,
|
|
"learning_rate": 1.9967955492025094e-05,
|
|
"loss": 0.8934487104415894,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.22580448932416813,
|
|
"grad_norm": 0.7246384620666504,
|
|
"learning_rate": 1.9966801466341107e-05,
|
|
"loss": 0.8137494325637817,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.22710221627430704,
|
|
"grad_norm": 0.7587799429893494,
|
|
"learning_rate": 1.9965627061070755e-05,
|
|
"loss": 0.8050680756568909,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.22839994322444593,
|
|
"grad_norm": 0.744683027267456,
|
|
"learning_rate": 1.996443227861541e-05,
|
|
"loss": 0.9190195798873901,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.22969767017458484,
|
|
"grad_norm": 0.7057942748069763,
|
|
"learning_rate": 1.996321712141809e-05,
|
|
"loss": 0.771306574344635,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.23099539712472372,
|
|
"grad_norm": 0.758804440498352,
|
|
"learning_rate": 1.9961981591963494e-05,
|
|
"loss": 0.9052093029022217,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.23229312407486263,
|
|
"grad_norm": 0.761832058429718,
|
|
"learning_rate": 1.9960725692777956e-05,
|
|
"loss": 0.8963150382041931,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.23359085102500152,
|
|
"grad_norm": 0.7698036432266235,
|
|
"learning_rate": 1.995944942642948e-05,
|
|
"loss": 0.879082202911377,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.23488857797514043,
|
|
"grad_norm": 0.7247833013534546,
|
|
"learning_rate": 1.9958152795527706e-05,
|
|
"loss": 0.8330357074737549,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.2361863049252793,
|
|
"grad_norm": 0.8077431321144104,
|
|
"learning_rate": 1.9956835802723916e-05,
|
|
"loss": 0.94368577003479,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.23748403187541822,
|
|
"grad_norm": 0.7545983195304871,
|
|
"learning_rate": 1.9955498450711026e-05,
|
|
"loss": 0.8294435739517212,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.2387817588255571,
|
|
"grad_norm": 0.7249157428741455,
|
|
"learning_rate": 1.9954140742223586e-05,
|
|
"loss": 0.8432042598724365,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.24007948577569602,
|
|
"grad_norm": 0.7442438006401062,
|
|
"learning_rate": 1.9952762680037758e-05,
|
|
"loss": 0.8805173635482788,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2413772127258349,
|
|
"grad_norm": 0.7329111695289612,
|
|
"learning_rate": 1.995136426697134e-05,
|
|
"loss": 0.863207221031189,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.2426749396759738,
|
|
"grad_norm": 0.716304361820221,
|
|
"learning_rate": 1.9949945505883723e-05,
|
|
"loss": 0.8094059824943542,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.2439726666261127,
|
|
"grad_norm": 0.7312113046646118,
|
|
"learning_rate": 1.994850639967592e-05,
|
|
"loss": 0.9180686473846436,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.2452703935762516,
|
|
"grad_norm": 0.7700150609016418,
|
|
"learning_rate": 1.994704695129054e-05,
|
|
"loss": 0.8603487610816956,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.2465681205263905,
|
|
"grad_norm": 0.7655259370803833,
|
|
"learning_rate": 1.9945567163711788e-05,
|
|
"loss": 0.8780601620674133,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2478658474765294,
|
|
"grad_norm": 0.7268514633178711,
|
|
"learning_rate": 1.9944067039965445e-05,
|
|
"loss": 0.8242926001548767,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.24916357442666828,
|
|
"grad_norm": 0.7264497876167297,
|
|
"learning_rate": 1.9942546583118894e-05,
|
|
"loss": 0.894584596157074,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.25046130137680717,
|
|
"grad_norm": 0.773765504360199,
|
|
"learning_rate": 1.994100579628108e-05,
|
|
"loss": 0.8504235744476318,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.2517590283269461,
|
|
"grad_norm": 0.6867210865020752,
|
|
"learning_rate": 1.9939444682602522e-05,
|
|
"loss": 0.7794942259788513,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.253056755277085,
|
|
"grad_norm": 0.7574644684791565,
|
|
"learning_rate": 1.9937863245275303e-05,
|
|
"loss": 0.8992743492126465,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2543544822272239,
|
|
"grad_norm": 0.7294052243232727,
|
|
"learning_rate": 1.9936261487533066e-05,
|
|
"loss": 0.8371526002883911,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.25565220917736275,
|
|
"grad_norm": 0.7199873924255371,
|
|
"learning_rate": 1.993463941265099e-05,
|
|
"loss": 0.8135456442832947,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.25694993612750167,
|
|
"grad_norm": 0.7726846933364868,
|
|
"learning_rate": 1.993299702394582e-05,
|
|
"loss": 0.8241779804229736,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.2582476630776406,
|
|
"grad_norm": 0.7929345369338989,
|
|
"learning_rate": 1.9931334324775817e-05,
|
|
"loss": 0.9309947490692139,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.2595453900277795,
|
|
"grad_norm": 0.7434781193733215,
|
|
"learning_rate": 1.9929651318540783e-05,
|
|
"loss": 0.8470789790153503,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.26084311697791834,
|
|
"grad_norm": 0.8077720403671265,
|
|
"learning_rate": 1.9927948008682038e-05,
|
|
"loss": 0.8455624580383301,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.26214084392805725,
|
|
"grad_norm": 0.7723199725151062,
|
|
"learning_rate": 1.9926224398682424e-05,
|
|
"loss": 0.8877855539321899,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.26343857087819617,
|
|
"grad_norm": 0.723115861415863,
|
|
"learning_rate": 1.992448049206628e-05,
|
|
"loss": 0.7923484444618225,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.2647362978283351,
|
|
"grad_norm": 0.7819997072219849,
|
|
"learning_rate": 1.9922716292399458e-05,
|
|
"loss": 0.8195080757141113,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.26603402477847393,
|
|
"grad_norm": 0.7534734010696411,
|
|
"learning_rate": 1.9920931803289302e-05,
|
|
"loss": 0.8843890428543091,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.26733175172861284,
|
|
"grad_norm": 0.6980569362640381,
|
|
"learning_rate": 1.9919127028384634e-05,
|
|
"loss": 0.841879665851593,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.26862947867875175,
|
|
"grad_norm": 0.7415062189102173,
|
|
"learning_rate": 1.9917301971375767e-05,
|
|
"loss": 0.910488486289978,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.26992720562889067,
|
|
"grad_norm": 0.7163265347480774,
|
|
"learning_rate": 1.991545663599448e-05,
|
|
"loss": 0.8969396948814392,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.2712249325790295,
|
|
"grad_norm": 0.7287595868110657,
|
|
"learning_rate": 1.9913591026014016e-05,
|
|
"loss": 0.8557533621788025,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.27252265952916843,
|
|
"grad_norm": 0.8144972324371338,
|
|
"learning_rate": 1.9911705145249076e-05,
|
|
"loss": 0.9075403809547424,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.27382038647930734,
|
|
"grad_norm": 0.6856523156166077,
|
|
"learning_rate": 1.9909798997555806e-05,
|
|
"loss": 0.9015495777130127,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.27511811342944625,
|
|
"grad_norm": 0.7224120497703552,
|
|
"learning_rate": 1.99078725868318e-05,
|
|
"loss": 0.8107393383979797,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.2764158403795851,
|
|
"grad_norm": 0.783104419708252,
|
|
"learning_rate": 1.9905925917016077e-05,
|
|
"loss": 0.831728458404541,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.277713567329724,
|
|
"grad_norm": 0.765583872795105,
|
|
"learning_rate": 1.9903958992089087e-05,
|
|
"loss": 0.872807502746582,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.27901129427986293,
|
|
"grad_norm": 0.7342137098312378,
|
|
"learning_rate": 1.990197181607269e-05,
|
|
"loss": 0.8797867298126221,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.28030902123000184,
|
|
"grad_norm": 0.7050272822380066,
|
|
"learning_rate": 1.989996439303016e-05,
|
|
"loss": 0.8417098522186279,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.2816067481801407,
|
|
"grad_norm": 0.7334570288658142,
|
|
"learning_rate": 1.989793672706617e-05,
|
|
"loss": 0.8433218598365784,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.2829044751302796,
|
|
"grad_norm": 0.7583123445510864,
|
|
"learning_rate": 1.9895888822326783e-05,
|
|
"loss": 0.8300482034683228,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.2842022020804185,
|
|
"grad_norm": 0.7325905561447144,
|
|
"learning_rate": 1.9893820682999444e-05,
|
|
"loss": 0.8698530197143555,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.28549992903055743,
|
|
"grad_norm": 0.7196786403656006,
|
|
"learning_rate": 1.9891732313312973e-05,
|
|
"loss": 0.8875235915184021,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.2867976559806963,
|
|
"grad_norm": 0.7486999034881592,
|
|
"learning_rate": 1.9889623717537564e-05,
|
|
"loss": 0.8711264729499817,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.2880953829308352,
|
|
"grad_norm": 0.7866005897521973,
|
|
"learning_rate": 1.9887494899984757e-05,
|
|
"loss": 0.9035714268684387,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.2893931098809741,
|
|
"grad_norm": 0.698315441608429,
|
|
"learning_rate": 1.9885345865007444e-05,
|
|
"loss": 0.873035728931427,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.290690836831113,
|
|
"grad_norm": 0.7287175059318542,
|
|
"learning_rate": 1.9883176616999863e-05,
|
|
"loss": 0.9040322303771973,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.2919885637812519,
|
|
"grad_norm": 0.6973027586936951,
|
|
"learning_rate": 1.9880987160397573e-05,
|
|
"loss": 0.8214952349662781,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.2932862907313908,
|
|
"grad_norm": 0.7529054880142212,
|
|
"learning_rate": 1.987877749967746e-05,
|
|
"loss": 0.8002289533615112,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.2945840176815297,
|
|
"grad_norm": 0.7562571167945862,
|
|
"learning_rate": 1.987654763935772e-05,
|
|
"loss": 0.8632272481918335,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.2958817446316686,
|
|
"grad_norm": 0.7309690713882446,
|
|
"learning_rate": 1.9874297583997852e-05,
|
|
"loss": 0.835785984992981,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.29717947158180746,
|
|
"grad_norm": 0.7542479038238525,
|
|
"learning_rate": 1.9872027338198652e-05,
|
|
"loss": 0.8635554909706116,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.2984771985319464,
|
|
"grad_norm": 0.743453860282898,
|
|
"learning_rate": 1.98697369066022e-05,
|
|
"loss": 0.918680727481842,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.2984771985319464,
|
|
"eval_loss": 0.818739116191864,
|
|
"eval_runtime": 153.6061,
|
|
"eval_samples_per_second": 33.801,
|
|
"eval_steps_per_second": 8.45,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.2997749254820853,
|
|
"grad_norm": 0.766386091709137,
|
|
"learning_rate": 1.986742629389184e-05,
|
|
"loss": 0.8685123324394226,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3010726524322242,
|
|
"grad_norm": 0.7218268513679504,
|
|
"learning_rate": 1.98650955047922e-05,
|
|
"loss": 0.8525049090385437,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.30237037938236305,
|
|
"grad_norm": 0.7203767895698547,
|
|
"learning_rate": 1.9862744544069146e-05,
|
|
"loss": 0.867932915687561,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.30366810633250196,
|
|
"grad_norm": 0.7556924819946289,
|
|
"learning_rate": 1.9860373416529804e-05,
|
|
"loss": 0.8170772790908813,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.3049658332826409,
|
|
"grad_norm": 0.7739233374595642,
|
|
"learning_rate": 1.9857982127022527e-05,
|
|
"loss": 0.8461399674415588,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.3062635602327798,
|
|
"grad_norm": 0.7455801367759705,
|
|
"learning_rate": 1.9855570680436896e-05,
|
|
"loss": 0.8253067135810852,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.3075612871829187,
|
|
"grad_norm": 0.7704318761825562,
|
|
"learning_rate": 1.9853139081703712e-05,
|
|
"loss": 0.9142767786979675,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.30885901413305755,
|
|
"grad_norm": 0.7740578651428223,
|
|
"learning_rate": 1.9850687335794974e-05,
|
|
"loss": 0.8383587002754211,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.31015674108319646,
|
|
"grad_norm": 0.7392247319221497,
|
|
"learning_rate": 1.9848215447723888e-05,
|
|
"loss": 0.8735100030899048,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.3114544680333354,
|
|
"grad_norm": 0.7605814337730408,
|
|
"learning_rate": 1.9845723422544834e-05,
|
|
"loss": 0.9212141633033752,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.3127521949834743,
|
|
"grad_norm": 0.7394529581069946,
|
|
"learning_rate": 1.9843211265353376e-05,
|
|
"loss": 0.8197087049484253,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.31404992193361314,
|
|
"grad_norm": 0.6981598138809204,
|
|
"learning_rate": 1.9840678981286237e-05,
|
|
"loss": 0.77371746301651,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.31534764888375205,
|
|
"grad_norm": 0.6841283440589905,
|
|
"learning_rate": 1.98381265755213e-05,
|
|
"loss": 0.7815872430801392,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.31664537583389096,
|
|
"grad_norm": 0.7323400974273682,
|
|
"learning_rate": 1.9835554053277587e-05,
|
|
"loss": 0.8495661616325378,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.3179431027840299,
|
|
"grad_norm": 0.7340859174728394,
|
|
"learning_rate": 1.9832961419815253e-05,
|
|
"loss": 0.7806031107902527,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.31924082973416873,
|
|
"grad_norm": 0.7229768633842468,
|
|
"learning_rate": 1.983034868043558e-05,
|
|
"loss": 0.8009724617004395,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.32053855668430764,
|
|
"grad_norm": 0.7510941624641418,
|
|
"learning_rate": 1.9827715840480962e-05,
|
|
"loss": 0.9413229823112488,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.32183628363444655,
|
|
"grad_norm": 0.6999549269676208,
|
|
"learning_rate": 1.9825062905334883e-05,
|
|
"loss": 0.7988513112068176,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.32313401058458546,
|
|
"grad_norm": 0.7060723304748535,
|
|
"learning_rate": 1.9822389880421927e-05,
|
|
"loss": 0.8266105651855469,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3244317375347243,
|
|
"grad_norm": 0.7090180516242981,
|
|
"learning_rate": 1.9819696771207756e-05,
|
|
"loss": 0.8882022500038147,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.32572946448486323,
|
|
"grad_norm": 0.7266640663146973,
|
|
"learning_rate": 1.981698358319909e-05,
|
|
"loss": 0.8313782215118408,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.32702719143500214,
|
|
"grad_norm": 0.7484982013702393,
|
|
"learning_rate": 1.981425032194372e-05,
|
|
"loss": 0.9093562960624695,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.32832491838514105,
|
|
"grad_norm": 0.7394732236862183,
|
|
"learning_rate": 1.981149699303047e-05,
|
|
"loss": 0.8808751106262207,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.3296226453352799,
|
|
"grad_norm": 0.7643232345581055,
|
|
"learning_rate": 1.9808723602089198e-05,
|
|
"loss": 0.9079170823097229,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3309203722854188,
|
|
"grad_norm": 0.7218993902206421,
|
|
"learning_rate": 1.980593015479079e-05,
|
|
"loss": 0.8374384641647339,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.33221809923555773,
|
|
"grad_norm": 0.7780535221099854,
|
|
"learning_rate": 1.9803116656847136e-05,
|
|
"loss": 0.9171014428138733,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.33351582618569664,
|
|
"grad_norm": 0.7390936613082886,
|
|
"learning_rate": 1.9800283114011134e-05,
|
|
"loss": 0.8307523131370544,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3348135531358355,
|
|
"grad_norm": 0.7285546064376831,
|
|
"learning_rate": 1.9797429532076652e-05,
|
|
"loss": 0.8579209446907043,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.3361112800859744,
|
|
"grad_norm": 0.7298453450202942,
|
|
"learning_rate": 1.9794555916878548e-05,
|
|
"loss": 0.9177393317222595,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.3374090070361133,
|
|
"grad_norm": 0.7240604758262634,
|
|
"learning_rate": 1.9791662274292638e-05,
|
|
"loss": 0.8674473166465759,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.33870673398625223,
|
|
"grad_norm": 0.6959360241889954,
|
|
"learning_rate": 1.978874861023569e-05,
|
|
"loss": 0.8340597152709961,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.3400044609363911,
|
|
"grad_norm": 0.711373507976532,
|
|
"learning_rate": 1.9785814930665404e-05,
|
|
"loss": 0.8793005347251892,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.34130218788653,
|
|
"grad_norm": 0.721527099609375,
|
|
"learning_rate": 1.9782861241580417e-05,
|
|
"loss": 0.7826907634735107,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.3425999148366689,
|
|
"grad_norm": 0.7333364486694336,
|
|
"learning_rate": 1.9779887549020273e-05,
|
|
"loss": 0.8747556209564209,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.3438976417868078,
|
|
"grad_norm": 0.6954993605613708,
|
|
"learning_rate": 1.9776893859065424e-05,
|
|
"loss": 0.825065553188324,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.3451953687369467,
|
|
"grad_norm": 0.7496482729911804,
|
|
"learning_rate": 1.9773880177837202e-05,
|
|
"loss": 0.8960598111152649,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.3464930956870856,
|
|
"grad_norm": 0.7554039359092712,
|
|
"learning_rate": 1.9770846511497833e-05,
|
|
"loss": 0.8298478722572327,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.3477908226372245,
|
|
"grad_norm": 0.7233474850654602,
|
|
"learning_rate": 1.9767792866250386e-05,
|
|
"loss": 0.8535934090614319,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.3490885495873634,
|
|
"grad_norm": 0.7677019238471985,
|
|
"learning_rate": 1.97647192483388e-05,
|
|
"loss": 0.8413315415382385,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.35038627653750226,
|
|
"grad_norm": 0.7146613597869873,
|
|
"learning_rate": 1.976162566404784e-05,
|
|
"loss": 0.7900301814079285,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.3516840034876412,
|
|
"grad_norm": 0.7061136364936829,
|
|
"learning_rate": 1.9758512119703106e-05,
|
|
"loss": 0.8699895739555359,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.3529817304377801,
|
|
"grad_norm": 0.7685773968696594,
|
|
"learning_rate": 1.9755378621671006e-05,
|
|
"loss": 0.9059665203094482,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.354279457387919,
|
|
"grad_norm": 0.7667369842529297,
|
|
"learning_rate": 1.9752225176358757e-05,
|
|
"loss": 0.8284919857978821,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.35557718433805785,
|
|
"grad_norm": 0.7389227151870728,
|
|
"learning_rate": 1.974905179021435e-05,
|
|
"loss": 0.8445216417312622,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.35687491128819676,
|
|
"grad_norm": 0.7373800873756409,
|
|
"learning_rate": 1.9745858469726555e-05,
|
|
"loss": 0.8499696254730225,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.35817263823833567,
|
|
"grad_norm": 0.6966509222984314,
|
|
"learning_rate": 1.9742645221424905e-05,
|
|
"loss": 0.7845723032951355,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.3594703651884746,
|
|
"grad_norm": 0.7133153080940247,
|
|
"learning_rate": 1.9739412051879686e-05,
|
|
"loss": 0.7712838053703308,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.36076809213861344,
|
|
"grad_norm": 0.7376941442489624,
|
|
"learning_rate": 1.973615896770191e-05,
|
|
"loss": 0.8497350811958313,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.36206581908875235,
|
|
"grad_norm": 0.7676963806152344,
|
|
"learning_rate": 1.97328859755433e-05,
|
|
"loss": 0.8830881714820862,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.36336354603889126,
|
|
"grad_norm": 0.7721049785614014,
|
|
"learning_rate": 1.972959308209631e-05,
|
|
"loss": 0.9047907590866089,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.36466127298903017,
|
|
"grad_norm": 0.7234658598899841,
|
|
"learning_rate": 1.9726280294094067e-05,
|
|
"loss": 0.8566961288452148,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.365958999939169,
|
|
"grad_norm": 0.7352125644683838,
|
|
"learning_rate": 1.9722947618310384e-05,
|
|
"loss": 0.8019842505455017,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.36725672688930794,
|
|
"grad_norm": 0.7341601848602295,
|
|
"learning_rate": 1.9719595061559742e-05,
|
|
"loss": 0.7666940093040466,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.36855445383944685,
|
|
"grad_norm": 0.7719873785972595,
|
|
"learning_rate": 1.9716222630697266e-05,
|
|
"loss": 0.8902671933174133,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.36985218078958576,
|
|
"grad_norm": 0.754192054271698,
|
|
"learning_rate": 1.971283033261873e-05,
|
|
"loss": 0.8718546628952026,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.3711499077397246,
|
|
"grad_norm": 0.7254419922828674,
|
|
"learning_rate": 1.9709418174260523e-05,
|
|
"loss": 0.8636943101882935,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.3724476346898635,
|
|
"grad_norm": 0.7372341156005859,
|
|
"learning_rate": 1.9705986162599642e-05,
|
|
"loss": 0.8579723238945007,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.37374536164000244,
|
|
"grad_norm": 0.7488671541213989,
|
|
"learning_rate": 1.9702534304653685e-05,
|
|
"loss": 0.8281093835830688,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.37504308859014135,
|
|
"grad_norm": 0.8016876578330994,
|
|
"learning_rate": 1.9699062607480827e-05,
|
|
"loss": 0.8639754056930542,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.3763408155402802,
|
|
"grad_norm": 0.732269823551178,
|
|
"learning_rate": 1.969557107817981e-05,
|
|
"loss": 0.8395862579345703,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.3776385424904191,
|
|
"grad_norm": 0.7406111359596252,
|
|
"learning_rate": 1.9692059723889927e-05,
|
|
"loss": 0.8540798425674438,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.378936269440558,
|
|
"grad_norm": 0.7769038677215576,
|
|
"learning_rate": 1.968852855179101e-05,
|
|
"loss": 0.8707680106163025,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.38023399639069694,
|
|
"grad_norm": 0.7666140198707581,
|
|
"learning_rate": 1.9684977569103415e-05,
|
|
"loss": 0.8578312993049622,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.3815317233408358,
|
|
"grad_norm": 0.7852650284767151,
|
|
"learning_rate": 1.9681406783087998e-05,
|
|
"loss": 0.7673178911209106,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.3828294502909747,
|
|
"grad_norm": 0.6789321899414062,
|
|
"learning_rate": 1.9677816201046113e-05,
|
|
"loss": 0.7785404324531555,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.3841271772411136,
|
|
"grad_norm": 0.7129622101783752,
|
|
"learning_rate": 1.9674205830319594e-05,
|
|
"loss": 0.7908732295036316,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.3854249041912525,
|
|
"grad_norm": 0.7952395081520081,
|
|
"learning_rate": 1.9670575678290732e-05,
|
|
"loss": 0.905153751373291,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.3867226311413914,
|
|
"grad_norm": 0.7407474517822266,
|
|
"learning_rate": 1.9666925752382275e-05,
|
|
"loss": 0.8455154895782471,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.3880203580915303,
|
|
"grad_norm": 0.7149595022201538,
|
|
"learning_rate": 1.9663256060057395e-05,
|
|
"loss": 0.7669047117233276,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.3893180850416692,
|
|
"grad_norm": 0.724448025226593,
|
|
"learning_rate": 1.9659566608819677e-05,
|
|
"loss": 0.827459990978241,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.3906158119918081,
|
|
"grad_norm": 0.7544072270393372,
|
|
"learning_rate": 1.9655857406213124e-05,
|
|
"loss": 0.8931189775466919,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.39191353894194697,
|
|
"grad_norm": 0.7281385064125061,
|
|
"learning_rate": 1.9652128459822113e-05,
|
|
"loss": 0.8091886639595032,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.3932112658920859,
|
|
"grad_norm": 0.7316269874572754,
|
|
"learning_rate": 1.9648379777271397e-05,
|
|
"loss": 0.7829949855804443,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.3945089928422248,
|
|
"grad_norm": 0.7421220541000366,
|
|
"learning_rate": 1.964461136622608e-05,
|
|
"loss": 0.8580082058906555,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.3958067197923637,
|
|
"grad_norm": 0.7127732038497925,
|
|
"learning_rate": 1.9640823234391614e-05,
|
|
"loss": 0.7645027041435242,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.39710444674250256,
|
|
"grad_norm": 0.7605704665184021,
|
|
"learning_rate": 1.9637015389513765e-05,
|
|
"loss": 0.8976550698280334,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.39840217369264147,
|
|
"grad_norm": 0.7157081365585327,
|
|
"learning_rate": 1.963318783937861e-05,
|
|
"loss": 0.7898974418640137,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.3996999006427804,
|
|
"grad_norm": 0.694803774356842,
|
|
"learning_rate": 1.962934059181253e-05,
|
|
"loss": 0.8454594612121582,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.4009976275929193,
|
|
"grad_norm": 0.7790278792381287,
|
|
"learning_rate": 1.962547365468216e-05,
|
|
"loss": 0.8850522041320801,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.40229535454305815,
|
|
"grad_norm": 0.7630907893180847,
|
|
"learning_rate": 1.962158703589442e-05,
|
|
"loss": 0.7932512760162354,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.40359308149319706,
|
|
"grad_norm": 0.7254197597503662,
|
|
"learning_rate": 1.9617680743396452e-05,
|
|
"loss": 0.8825772404670715,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.40489080844333597,
|
|
"grad_norm": 0.6837211847305298,
|
|
"learning_rate": 1.961375478517564e-05,
|
|
"loss": 0.787892758846283,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.4061885353934749,
|
|
"grad_norm": 0.8057960867881775,
|
|
"learning_rate": 1.9609809169259573e-05,
|
|
"loss": 0.8797285556793213,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.40748626234361374,
|
|
"grad_norm": 0.7656168341636658,
|
|
"learning_rate": 1.960584390371604e-05,
|
|
"loss": 0.8403958678245544,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.40878398929375265,
|
|
"grad_norm": 0.7079064249992371,
|
|
"learning_rate": 1.9601858996653004e-05,
|
|
"loss": 0.8279827237129211,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.41008171624389156,
|
|
"grad_norm": 0.7371337413787842,
|
|
"learning_rate": 1.9597854456218588e-05,
|
|
"loss": 0.8244680166244507,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.41137944319403047,
|
|
"grad_norm": 0.7662513256072998,
|
|
"learning_rate": 1.9593830290601067e-05,
|
|
"loss": 0.8895809650421143,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.4126771701441694,
|
|
"grad_norm": 0.7431499361991882,
|
|
"learning_rate": 1.9589786508028842e-05,
|
|
"loss": 0.8213914632797241,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.41397489709430824,
|
|
"grad_norm": 0.7631136178970337,
|
|
"learning_rate": 1.9585723116770425e-05,
|
|
"loss": 0.8473777770996094,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.41527262404444715,
|
|
"grad_norm": 0.7579299807548523,
|
|
"learning_rate": 1.9581640125134415e-05,
|
|
"loss": 0.8756963014602661,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.41657035099458606,
|
|
"grad_norm": 0.75262850522995,
|
|
"learning_rate": 1.9577537541469506e-05,
|
|
"loss": 0.8210287094116211,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.41786807794472497,
|
|
"grad_norm": 0.7107104063034058,
|
|
"learning_rate": 1.957341537416444e-05,
|
|
"loss": 0.7835584878921509,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.4191658048948638,
|
|
"grad_norm": 0.7898051738739014,
|
|
"learning_rate": 1.9569273631648005e-05,
|
|
"loss": 0.8497559428215027,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.42046353184500274,
|
|
"grad_norm": 0.7612116932868958,
|
|
"learning_rate": 1.9565112322389017e-05,
|
|
"loss": 0.8350054621696472,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.42176125879514165,
|
|
"grad_norm": 0.7677422761917114,
|
|
"learning_rate": 1.95609314548963e-05,
|
|
"loss": 0.8192890286445618,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.42305898574528056,
|
|
"grad_norm": 0.7246314883232117,
|
|
"learning_rate": 1.955673103771867e-05,
|
|
"loss": 0.7340703010559082,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.4243567126954194,
|
|
"grad_norm": 0.7684205770492554,
|
|
"learning_rate": 1.9552511079444914e-05,
|
|
"loss": 0.8853901028633118,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.4256544396455583,
|
|
"grad_norm": 0.7860892415046692,
|
|
"learning_rate": 1.9548271588703783e-05,
|
|
"loss": 0.8821452856063843,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.42695216659569724,
|
|
"grad_norm": 0.6936531662940979,
|
|
"learning_rate": 1.954401257416396e-05,
|
|
"loss": 0.7570967674255371,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.42824989354583615,
|
|
"grad_norm": 0.7630011439323425,
|
|
"learning_rate": 1.9539734044534057e-05,
|
|
"loss": 0.8907523155212402,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.429547620495975,
|
|
"grad_norm": 0.7460386753082275,
|
|
"learning_rate": 1.9535436008562576e-05,
|
|
"loss": 0.8264608383178711,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.4308453474461139,
|
|
"grad_norm": 0.6788963675498962,
|
|
"learning_rate": 1.9531118475037916e-05,
|
|
"loss": 0.7674898505210876,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.4321430743962528,
|
|
"grad_norm": 0.7098816633224487,
|
|
"learning_rate": 1.9526781452788342e-05,
|
|
"loss": 0.8403605818748474,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.43344080134639174,
|
|
"grad_norm": 0.7769349813461304,
|
|
"learning_rate": 1.9522424950681964e-05,
|
|
"loss": 0.8386063575744629,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.4347385282965306,
|
|
"grad_norm": 0.7037668824195862,
|
|
"learning_rate": 1.951804897762673e-05,
|
|
"loss": 0.7852950096130371,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.4360362552466695,
|
|
"grad_norm": 0.6976593136787415,
|
|
"learning_rate": 1.951365354257039e-05,
|
|
"loss": 0.7828155159950256,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.4373339821968084,
|
|
"grad_norm": 0.6809433698654175,
|
|
"learning_rate": 1.9509238654500505e-05,
|
|
"loss": 0.7821134924888611,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.4386317091469473,
|
|
"grad_norm": 0.7023005485534668,
|
|
"learning_rate": 1.95048043224444e-05,
|
|
"loss": 0.8137397766113281,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.4399294360970862,
|
|
"grad_norm": 0.709460973739624,
|
|
"learning_rate": 1.9500350555469164e-05,
|
|
"loss": 0.8287125825881958,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.4412271630472251,
|
|
"grad_norm": 0.7066413760185242,
|
|
"learning_rate": 1.9495877362681613e-05,
|
|
"loss": 0.7227614521980286,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.442524889997364,
|
|
"grad_norm": 0.7095454335212708,
|
|
"learning_rate": 1.9491384753228308e-05,
|
|
"loss": 0.8386364579200745,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.4438226169475029,
|
|
"grad_norm": 0.704826831817627,
|
|
"learning_rate": 1.948687273629549e-05,
|
|
"loss": 0.7332904934883118,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.44512034389764177,
|
|
"grad_norm": 0.7315965294837952,
|
|
"learning_rate": 1.9482341321109096e-05,
|
|
"loss": 0.8262498378753662,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.4464180708477807,
|
|
"grad_norm": 0.7236066460609436,
|
|
"learning_rate": 1.947779051693472e-05,
|
|
"loss": 0.8105201721191406,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.4477157977979196,
|
|
"grad_norm": 0.7457305192947388,
|
|
"learning_rate": 1.9473220333077604e-05,
|
|
"loss": 0.9067633748054504,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.4490135247480585,
|
|
"grad_norm": 0.7768529653549194,
|
|
"learning_rate": 1.946863077888262e-05,
|
|
"loss": 0.9473153352737427,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.45031125169819736,
|
|
"grad_norm": 0.7324157357215881,
|
|
"learning_rate": 1.946402186373424e-05,
|
|
"loss": 0.8552070260047913,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.45160897864833627,
|
|
"grad_norm": 0.7343083024024963,
|
|
"learning_rate": 1.9459393597056536e-05,
|
|
"loss": 0.7906739115715027,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.4529067055984752,
|
|
"grad_norm": 0.7099336385726929,
|
|
"learning_rate": 1.9454745988313135e-05,
|
|
"loss": 0.7985537052154541,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.4542044325486141,
|
|
"grad_norm": 0.7202642560005188,
|
|
"learning_rate": 1.945007904700723e-05,
|
|
"loss": 0.8377722501754761,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.45550215949875295,
|
|
"grad_norm": 0.7456194162368774,
|
|
"learning_rate": 1.9445392782681523e-05,
|
|
"loss": 0.7578713893890381,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.45679988644889186,
|
|
"grad_norm": 0.6951096653938293,
|
|
"learning_rate": 1.9440687204918245e-05,
|
|
"loss": 0.8215861320495605,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.45809761339903077,
|
|
"grad_norm": 0.6824142932891846,
|
|
"learning_rate": 1.943596232333911e-05,
|
|
"loss": 0.7992759346961975,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.4593953403491697,
|
|
"grad_norm": 0.7076693773269653,
|
|
"learning_rate": 1.9431218147605307e-05,
|
|
"loss": 0.889447033405304,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.46069306729930853,
|
|
"grad_norm": 0.7202051877975464,
|
|
"learning_rate": 1.9426454687417474e-05,
|
|
"loss": 0.7953578233718872,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.46199079424944745,
|
|
"grad_norm": 0.6777750253677368,
|
|
"learning_rate": 1.942167195251568e-05,
|
|
"loss": 0.7135353088378906,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.46328852119958636,
|
|
"grad_norm": 0.7169584035873413,
|
|
"learning_rate": 1.941686995267941e-05,
|
|
"loss": 0.8654831051826477,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.46458624814972527,
|
|
"grad_norm": 0.7217689752578735,
|
|
"learning_rate": 1.941204869772753e-05,
|
|
"loss": 0.8449923992156982,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.4658839750998641,
|
|
"grad_norm": 0.7016704678535461,
|
|
"learning_rate": 1.9407208197518296e-05,
|
|
"loss": 0.8285680413246155,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.46718170205000303,
|
|
"grad_norm": 0.7271103262901306,
|
|
"learning_rate": 1.94023484619493e-05,
|
|
"loss": 0.788341760635376,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.46847942900014194,
|
|
"grad_norm": 0.7725624442100525,
|
|
"learning_rate": 1.9397469500957478e-05,
|
|
"loss": 0.8492755889892578,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.46977715595028086,
|
|
"grad_norm": 0.737015962600708,
|
|
"learning_rate": 1.939257132451906e-05,
|
|
"loss": 0.8843685388565063,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.4710748829004197,
|
|
"grad_norm": 0.7315338850021362,
|
|
"learning_rate": 1.9387653942649586e-05,
|
|
"loss": 0.8183721899986267,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.4723726098505586,
|
|
"grad_norm": 0.7253148555755615,
|
|
"learning_rate": 1.9382717365403854e-05,
|
|
"loss": 0.8446192145347595,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.47367033680069753,
|
|
"grad_norm": 0.7184107303619385,
|
|
"learning_rate": 1.9377761602875913e-05,
|
|
"loss": 0.8196067214012146,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.47496806375083644,
|
|
"grad_norm": 0.7668046355247498,
|
|
"learning_rate": 1.937278666519905e-05,
|
|
"loss": 0.8784077167510986,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.4762657907009753,
|
|
"grad_norm": 0.7028603553771973,
|
|
"learning_rate": 1.9367792562545744e-05,
|
|
"loss": 0.8172916769981384,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.4775635176511142,
|
|
"grad_norm": 0.7071288824081421,
|
|
"learning_rate": 1.9362779305127674e-05,
|
|
"loss": 0.7726463079452515,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.4788612446012531,
|
|
"grad_norm": 0.744328498840332,
|
|
"learning_rate": 1.9357746903195686e-05,
|
|
"loss": 0.8223643898963928,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.48015897155139203,
|
|
"grad_norm": 0.7051971554756165,
|
|
"learning_rate": 1.9352695367039764e-05,
|
|
"loss": 0.7989709973335266,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.4814566985015309,
|
|
"grad_norm": 0.6921087503433228,
|
|
"learning_rate": 1.9347624706989026e-05,
|
|
"loss": 0.8276992440223694,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.4827544254516698,
|
|
"grad_norm": 0.775720477104187,
|
|
"learning_rate": 1.9342534933411683e-05,
|
|
"loss": 0.8847764730453491,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.4840521524018087,
|
|
"grad_norm": 0.7056650519371033,
|
|
"learning_rate": 1.9337426056715036e-05,
|
|
"loss": 0.8185163736343384,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.4853498793519476,
|
|
"grad_norm": 0.746159017086029,
|
|
"learning_rate": 1.9332298087345447e-05,
|
|
"loss": 0.8038766980171204,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.4866476063020865,
|
|
"grad_norm": 0.7275370359420776,
|
|
"learning_rate": 1.932715103578831e-05,
|
|
"loss": 0.8622571229934692,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.4879453332522254,
|
|
"grad_norm": 0.6875770688056946,
|
|
"learning_rate": 1.9321984912568048e-05,
|
|
"loss": 0.7297530770301819,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.4892430602023643,
|
|
"grad_norm": 0.7196366190910339,
|
|
"learning_rate": 1.9316799728248074e-05,
|
|
"loss": 0.8098776340484619,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.4905407871525032,
|
|
"grad_norm": 0.8017922043800354,
|
|
"learning_rate": 1.9311595493430776e-05,
|
|
"loss": 0.8927175998687744,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.49183851410264207,
|
|
"grad_norm": 0.752349317073822,
|
|
"learning_rate": 1.93063722187575e-05,
|
|
"loss": 0.8595757484436035,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.493136241052781,
|
|
"grad_norm": 0.7166591882705688,
|
|
"learning_rate": 1.9301129914908516e-05,
|
|
"loss": 0.8619329333305359,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.4944339680029199,
|
|
"grad_norm": 0.7622588872909546,
|
|
"learning_rate": 1.9295868592603012e-05,
|
|
"loss": 0.9877883195877075,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.4957316949530588,
|
|
"grad_norm": 0.738442063331604,
|
|
"learning_rate": 1.929058826259906e-05,
|
|
"loss": 0.8450830578804016,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.49702942190319765,
|
|
"grad_norm": 0.7250852584838867,
|
|
"learning_rate": 1.9285288935693597e-05,
|
|
"loss": 0.8014863133430481,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.49832714885333657,
|
|
"grad_norm": 0.7121626138687134,
|
|
"learning_rate": 1.9279970622722403e-05,
|
|
"loss": 0.8381094932556152,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.4996248758034755,
|
|
"grad_norm": 0.7626416087150574,
|
|
"learning_rate": 1.927463333456009e-05,
|
|
"loss": 0.8965335488319397,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5009226027536143,
|
|
"grad_norm": 0.7094375491142273,
|
|
"learning_rate": 1.9269277082120053e-05,
|
|
"loss": 0.8557580709457397,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5022203297037533,
|
|
"grad_norm": 0.7018871903419495,
|
|
"learning_rate": 1.926390187635448e-05,
|
|
"loss": 0.8587688207626343,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5035180566538922,
|
|
"grad_norm": 0.7267133593559265,
|
|
"learning_rate": 1.92585077282543e-05,
|
|
"loss": 0.8346423506736755,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.504815783604031,
|
|
"grad_norm": 0.7274966835975647,
|
|
"learning_rate": 1.9253094648849183e-05,
|
|
"loss": 0.8169071078300476,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.50611351055417,
|
|
"grad_norm": 0.7901791334152222,
|
|
"learning_rate": 1.924766264920751e-05,
|
|
"loss": 0.9163885116577148,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5074112375043088,
|
|
"grad_norm": 0.7128793001174927,
|
|
"learning_rate": 1.9242211740436335e-05,
|
|
"loss": 0.8264936804771423,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5087089644544478,
|
|
"grad_norm": 0.7791725993156433,
|
|
"learning_rate": 1.9236741933681396e-05,
|
|
"loss": 0.830746054649353,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5100066914045867,
|
|
"grad_norm": 0.7333115339279175,
|
|
"learning_rate": 1.9231253240127062e-05,
|
|
"loss": 0.7689610719680786,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.5113044183547255,
|
|
"grad_norm": 0.722161591053009,
|
|
"learning_rate": 1.922574567099632e-05,
|
|
"loss": 0.8242402076721191,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.5126021453048645,
|
|
"grad_norm": 0.7445337176322937,
|
|
"learning_rate": 1.9220219237550757e-05,
|
|
"loss": 0.8102379441261292,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5138998722550033,
|
|
"grad_norm": 0.6720981001853943,
|
|
"learning_rate": 1.921467395109053e-05,
|
|
"loss": 0.7922290563583374,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.5151975992051423,
|
|
"grad_norm": 0.793062686920166,
|
|
"learning_rate": 1.9209109822954345e-05,
|
|
"loss": 0.8537084460258484,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.5164953261552812,
|
|
"grad_norm": 0.7766822576522827,
|
|
"learning_rate": 1.9203526864519432e-05,
|
|
"loss": 0.8576462864875793,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.51779305310542,
|
|
"grad_norm": 0.7053048610687256,
|
|
"learning_rate": 1.919792508720154e-05,
|
|
"loss": 0.7955272197723389,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.519090780055559,
|
|
"grad_norm": 0.7525441646575928,
|
|
"learning_rate": 1.9192304502454876e-05,
|
|
"loss": 0.7955189347267151,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.5203885070056978,
|
|
"grad_norm": 0.7097117304801941,
|
|
"learning_rate": 1.918666512177211e-05,
|
|
"loss": 0.8108992576599121,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.5216862339558367,
|
|
"grad_norm": 0.7281200885772705,
|
|
"learning_rate": 1.918100695668436e-05,
|
|
"loss": 0.7774943113327026,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.5229839609059757,
|
|
"grad_norm": 0.6979084610939026,
|
|
"learning_rate": 1.917533001876113e-05,
|
|
"loss": 0.8288201093673706,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.5242816878561145,
|
|
"grad_norm": 0.7136226892471313,
|
|
"learning_rate": 1.916963431961033e-05,
|
|
"loss": 0.8710139393806458,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.5255794148062535,
|
|
"grad_norm": 0.6950761079788208,
|
|
"learning_rate": 1.916391987087822e-05,
|
|
"loss": 0.82500821352005,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.5268771417563923,
|
|
"grad_norm": 0.7169130444526672,
|
|
"learning_rate": 1.9158186684249397e-05,
|
|
"loss": 0.8732189536094666,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.5281748687065312,
|
|
"grad_norm": 0.71788489818573,
|
|
"learning_rate": 1.9152434771446783e-05,
|
|
"loss": 0.7809304594993591,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.5294725956566702,
|
|
"grad_norm": 0.7155045866966248,
|
|
"learning_rate": 1.914666414423158e-05,
|
|
"loss": 0.7732210159301758,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.530770322606809,
|
|
"grad_norm": 0.6769919991493225,
|
|
"learning_rate": 1.914087481440326e-05,
|
|
"loss": 0.8261522650718689,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.5320680495569479,
|
|
"grad_norm": 0.7309243679046631,
|
|
"learning_rate": 1.9135066793799538e-05,
|
|
"loss": 0.7936241626739502,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.5333657765070868,
|
|
"grad_norm": 0.6851993203163147,
|
|
"learning_rate": 1.912924009429635e-05,
|
|
"loss": 0.8394724130630493,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.5346635034572257,
|
|
"grad_norm": 0.7112469673156738,
|
|
"learning_rate": 1.9123394727807816e-05,
|
|
"loss": 0.8659080862998962,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.5359612304073647,
|
|
"grad_norm": 0.8407036066055298,
|
|
"learning_rate": 1.9117530706286232e-05,
|
|
"loss": 0.8815537095069885,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.5372589573575035,
|
|
"grad_norm": 0.7725140452384949,
|
|
"learning_rate": 1.9111648041722044e-05,
|
|
"loss": 0.8264433741569519,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.5385566843076424,
|
|
"grad_norm": 0.7106306552886963,
|
|
"learning_rate": 1.91057467461438e-05,
|
|
"loss": 0.8120384812355042,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.5398544112577813,
|
|
"grad_norm": 0.7314519882202148,
|
|
"learning_rate": 1.9099826831618168e-05,
|
|
"loss": 0.7814322113990784,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.5411521382079202,
|
|
"grad_norm": 0.7492959499359131,
|
|
"learning_rate": 1.909388831024987e-05,
|
|
"loss": 0.8211044669151306,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.542449865158059,
|
|
"grad_norm": 0.7524264454841614,
|
|
"learning_rate": 1.908793119418168e-05,
|
|
"loss": 0.831349790096283,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.543747592108198,
|
|
"grad_norm": 0.768027663230896,
|
|
"learning_rate": 1.9081955495594388e-05,
|
|
"loss": 0.777296245098114,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.5450453190583369,
|
|
"grad_norm": 0.6683104038238525,
|
|
"learning_rate": 1.9075961226706784e-05,
|
|
"loss": 0.8545945882797241,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.5463430460084758,
|
|
"grad_norm": 0.7471824288368225,
|
|
"learning_rate": 1.906994839977564e-05,
|
|
"loss": 0.8631961941719055,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.5476407729586147,
|
|
"grad_norm": 0.7404365539550781,
|
|
"learning_rate": 1.9063917027095664e-05,
|
|
"loss": 0.8402459025382996,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.5489384999087535,
|
|
"grad_norm": 0.790240466594696,
|
|
"learning_rate": 1.905786712099948e-05,
|
|
"loss": 0.8911325335502625,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.5502362268588925,
|
|
"grad_norm": 0.7139849662780762,
|
|
"learning_rate": 1.9051798693857617e-05,
|
|
"loss": 0.8359181880950928,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.5515339538090314,
|
|
"grad_norm": 0.7506136894226074,
|
|
"learning_rate": 1.904571175807848e-05,
|
|
"loss": 0.8717991709709167,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.5528316807591702,
|
|
"grad_norm": 0.7033493518829346,
|
|
"learning_rate": 1.9039606326108297e-05,
|
|
"loss": 0.808268666267395,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.5541294077093092,
|
|
"grad_norm": 0.7442057132720947,
|
|
"learning_rate": 1.903348241043114e-05,
|
|
"loss": 0.8272799849510193,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.555427134659448,
|
|
"grad_norm": 0.7257173657417297,
|
|
"learning_rate": 1.902734002356887e-05,
|
|
"loss": 0.8194448947906494,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.556724861609587,
|
|
"grad_norm": 0.7403514385223389,
|
|
"learning_rate": 1.9021179178081107e-05,
|
|
"loss": 0.7172797322273254,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.5580225885597259,
|
|
"grad_norm": 0.7432394623756409,
|
|
"learning_rate": 1.9014999886565226e-05,
|
|
"loss": 0.7437801361083984,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.5593203155098647,
|
|
"grad_norm": 0.6978660225868225,
|
|
"learning_rate": 1.9008802161656308e-05,
|
|
"loss": 0.7967916131019592,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.5606180424600037,
|
|
"grad_norm": 0.7165699005126953,
|
|
"learning_rate": 1.9002586016027136e-05,
|
|
"loss": 0.8070824146270752,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.5619157694101425,
|
|
"grad_norm": 0.7089285254478455,
|
|
"learning_rate": 1.8996351462388153e-05,
|
|
"loss": 0.8515596389770508,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.5632134963602814,
|
|
"grad_norm": 0.7979022860527039,
|
|
"learning_rate": 1.8990098513487447e-05,
|
|
"loss": 0.8934742212295532,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.5645112233104204,
|
|
"grad_norm": 0.6929235458374023,
|
|
"learning_rate": 1.898382718211071e-05,
|
|
"loss": 0.7550987601280212,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.5658089502605592,
|
|
"grad_norm": 0.7286235094070435,
|
|
"learning_rate": 1.897753748108123e-05,
|
|
"loss": 0.8770807981491089,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.5671066772106982,
|
|
"grad_norm": 0.7233553528785706,
|
|
"learning_rate": 1.8971229423259855e-05,
|
|
"loss": 0.7454729080200195,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.568404404160837,
|
|
"grad_norm": 0.7452800869941711,
|
|
"learning_rate": 1.8964903021544964e-05,
|
|
"loss": 0.8079807758331299,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.5697021311109759,
|
|
"grad_norm": 0.696835994720459,
|
|
"learning_rate": 1.895855828887245e-05,
|
|
"loss": 0.8501238226890564,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.5709998580611149,
|
|
"grad_norm": 0.6924627423286438,
|
|
"learning_rate": 1.895219523821568e-05,
|
|
"loss": 0.7888904213905334,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.5722975850112537,
|
|
"grad_norm": 0.764805793762207,
|
|
"learning_rate": 1.894581388258549e-05,
|
|
"loss": 0.8138964772224426,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.5735953119613926,
|
|
"grad_norm": 0.8151068091392517,
|
|
"learning_rate": 1.8939414235030137e-05,
|
|
"loss": 0.8286200165748596,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.5748930389115315,
|
|
"grad_norm": 0.739456832408905,
|
|
"learning_rate": 1.893299630863527e-05,
|
|
"loss": 0.7820205092430115,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.5761907658616704,
|
|
"grad_norm": 0.7076554298400879,
|
|
"learning_rate": 1.892656011652393e-05,
|
|
"loss": 0.8406723737716675,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.5774884928118094,
|
|
"grad_norm": 0.6758636832237244,
|
|
"learning_rate": 1.8920105671856507e-05,
|
|
"loss": 0.793111264705658,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.5787862197619482,
|
|
"grad_norm": 0.7238133549690247,
|
|
"learning_rate": 1.89136329878307e-05,
|
|
"loss": 0.7582585215568542,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.5800839467120871,
|
|
"grad_norm": 0.7192074656486511,
|
|
"learning_rate": 1.890714207768151e-05,
|
|
"loss": 0.7284867763519287,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.581381673662226,
|
|
"grad_norm": 0.7265046834945679,
|
|
"learning_rate": 1.8900632954681203e-05,
|
|
"loss": 0.836294412612915,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.5826794006123649,
|
|
"grad_norm": 0.7325915098190308,
|
|
"learning_rate": 1.8894105632139296e-05,
|
|
"loss": 0.7910576462745667,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.5839771275625038,
|
|
"grad_norm": 0.7702357172966003,
|
|
"learning_rate": 1.8887560123402505e-05,
|
|
"loss": 0.8775222301483154,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.5852748545126427,
|
|
"grad_norm": 0.7335582971572876,
|
|
"learning_rate": 1.888099644185474e-05,
|
|
"loss": 0.8012707829475403,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.5865725814627816,
|
|
"grad_norm": 0.733706533908844,
|
|
"learning_rate": 1.887441460091707e-05,
|
|
"loss": 0.7948039174079895,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.5878703084129205,
|
|
"grad_norm": 0.7587592005729675,
|
|
"learning_rate": 1.886781461404769e-05,
|
|
"loss": 0.804535448551178,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.5891680353630594,
|
|
"grad_norm": 0.7819000482559204,
|
|
"learning_rate": 1.886119649474191e-05,
|
|
"loss": 0.7766174077987671,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.5904657623131983,
|
|
"grad_norm": 0.69929039478302,
|
|
"learning_rate": 1.8854560256532098e-05,
|
|
"loss": 0.7503871321678162,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.5917634892633372,
|
|
"grad_norm": 0.742264449596405,
|
|
"learning_rate": 1.8847905912987693e-05,
|
|
"loss": 0.7669814229011536,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.5930612162134761,
|
|
"grad_norm": 0.7957385182380676,
|
|
"learning_rate": 1.8841233477715136e-05,
|
|
"loss": 0.7808370590209961,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.5943589431636149,
|
|
"grad_norm": 0.7357493042945862,
|
|
"learning_rate": 1.8834542964357875e-05,
|
|
"loss": 0.8638509511947632,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.5956566701137539,
|
|
"grad_norm": 0.6800391674041748,
|
|
"learning_rate": 1.8827834386596306e-05,
|
|
"loss": 0.8268325924873352,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.5969543970638927,
|
|
"grad_norm": 0.6685859560966492,
|
|
"learning_rate": 1.882110775814778e-05,
|
|
"loss": 0.7641065716743469,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.5969543970638927,
|
|
"eval_loss": 0.788587212562561,
|
|
"eval_runtime": 140.6113,
|
|
"eval_samples_per_second": 36.924,
|
|
"eval_steps_per_second": 9.231,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.5982521240140317,
|
|
"grad_norm": 0.7249795794487,
|
|
"learning_rate": 1.881436309276655e-05,
|
|
"loss": 0.8106693625450134,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.5995498509641706,
|
|
"grad_norm": 0.7279155254364014,
|
|
"learning_rate": 1.8807600404243746e-05,
|
|
"loss": 0.7669492363929749,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6008475779143094,
|
|
"grad_norm": 0.6802601218223572,
|
|
"learning_rate": 1.8800819706407355e-05,
|
|
"loss": 0.7968916296958923,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.6021453048644484,
|
|
"grad_norm": 0.6981019973754883,
|
|
"learning_rate": 1.879402101312219e-05,
|
|
"loss": 0.736625075340271,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.6034430318145872,
|
|
"grad_norm": 0.7771289944648743,
|
|
"learning_rate": 1.8787204338289858e-05,
|
|
"loss": 0.8314676284790039,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.6047407587647261,
|
|
"grad_norm": 0.7184056043624878,
|
|
"learning_rate": 1.8780369695848733e-05,
|
|
"loss": 0.7979223132133484,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.6060384857148651,
|
|
"grad_norm": 0.7473218441009521,
|
|
"learning_rate": 1.8773517099773927e-05,
|
|
"loss": 0.858469545841217,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.6073362126650039,
|
|
"grad_norm": 0.683022141456604,
|
|
"learning_rate": 1.8766646564077265e-05,
|
|
"loss": 0.8193258047103882,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.6086339396151429,
|
|
"grad_norm": 0.7081974148750305,
|
|
"learning_rate": 1.8759758102807253e-05,
|
|
"loss": 0.7676112055778503,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.6099316665652817,
|
|
"grad_norm": 0.7614895105361938,
|
|
"learning_rate": 1.8752851730049055e-05,
|
|
"loss": 0.8635563254356384,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.6112293935154206,
|
|
"grad_norm": 0.7243057489395142,
|
|
"learning_rate": 1.8745927459924454e-05,
|
|
"loss": 0.9161559343338013,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.6125271204655596,
|
|
"grad_norm": 0.6948226690292358,
|
|
"learning_rate": 1.8738985306591826e-05,
|
|
"loss": 0.7749679684638977,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.6138248474156984,
|
|
"grad_norm": 0.7040874361991882,
|
|
"learning_rate": 1.8732025284246122e-05,
|
|
"loss": 0.79802006483078,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.6151225743658374,
|
|
"grad_norm": 0.7108686566352844,
|
|
"learning_rate": 1.8725047407118823e-05,
|
|
"loss": 0.7963647246360779,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.6164203013159762,
|
|
"grad_norm": 0.6806232333183289,
|
|
"learning_rate": 1.8718051689477923e-05,
|
|
"loss": 0.8362119197845459,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.6177180282661151,
|
|
"grad_norm": 0.7135924696922302,
|
|
"learning_rate": 1.8711038145627893e-05,
|
|
"loss": 0.8811363577842712,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.6190157552162541,
|
|
"grad_norm": 0.7035737633705139,
|
|
"learning_rate": 1.8704006789909654e-05,
|
|
"loss": 0.839409351348877,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.6203134821663929,
|
|
"grad_norm": 0.6822429299354553,
|
|
"learning_rate": 1.8696957636700555e-05,
|
|
"loss": 0.8191482424736023,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.6216112091165318,
|
|
"grad_norm": 0.731574296951294,
|
|
"learning_rate": 1.868989070041432e-05,
|
|
"loss": 0.853705108165741,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.6229089360666707,
|
|
"grad_norm": 0.7717382907867432,
|
|
"learning_rate": 1.8682805995501052e-05,
|
|
"loss": 0.7867730259895325,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.6242066630168096,
|
|
"grad_norm": 0.7173001170158386,
|
|
"learning_rate": 1.8675703536447178e-05,
|
|
"loss": 0.8229404091835022,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.6255043899669486,
|
|
"grad_norm": 0.7436506748199463,
|
|
"learning_rate": 1.866858333777543e-05,
|
|
"loss": 0.8175429105758667,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.6268021169170874,
|
|
"grad_norm": 0.6823157072067261,
|
|
"learning_rate": 1.8661445414044813e-05,
|
|
"loss": 0.8235064148902893,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.6280998438672263,
|
|
"grad_norm": 0.6958295702934265,
|
|
"learning_rate": 1.865428977985057e-05,
|
|
"loss": 0.8292087316513062,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.6293975708173652,
|
|
"grad_norm": 0.7212422490119934,
|
|
"learning_rate": 1.8647116449824165e-05,
|
|
"loss": 0.8680652379989624,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.6306952977675041,
|
|
"grad_norm": 0.692675769329071,
|
|
"learning_rate": 1.8639925438633243e-05,
|
|
"loss": 0.8230209350585938,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.631993024717643,
|
|
"grad_norm": 0.7433279752731323,
|
|
"learning_rate": 1.86327167609816e-05,
|
|
"loss": 0.7730977535247803,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.6332907516677819,
|
|
"grad_norm": 0.7101516723632812,
|
|
"learning_rate": 1.8625490431609154e-05,
|
|
"loss": 0.9187572002410889,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.6345884786179208,
|
|
"grad_norm": 0.7050445675849915,
|
|
"learning_rate": 1.8618246465291925e-05,
|
|
"loss": 0.8063424229621887,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.6358862055680597,
|
|
"grad_norm": 0.7434412240982056,
|
|
"learning_rate": 1.861098487684199e-05,
|
|
"loss": 0.7892963290214539,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.6371839325181986,
|
|
"grad_norm": 0.7191323041915894,
|
|
"learning_rate": 1.8603705681107456e-05,
|
|
"loss": 0.7660176157951355,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.6384816594683375,
|
|
"grad_norm": 0.7202406525611877,
|
|
"learning_rate": 1.8596408892972442e-05,
|
|
"loss": 0.8213373422622681,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.6397793864184764,
|
|
"grad_norm": 0.6945679783821106,
|
|
"learning_rate": 1.858909452735703e-05,
|
|
"loss": 0.7523878216743469,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.6410771133686153,
|
|
"grad_norm": 0.8023699522018433,
|
|
"learning_rate": 1.858176259921724e-05,
|
|
"loss": 0.8551954030990601,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.6423748403187541,
|
|
"grad_norm": 0.728702962398529,
|
|
"learning_rate": 1.857441312354502e-05,
|
|
"loss": 0.7901893854141235,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.6436725672688931,
|
|
"grad_norm": 0.7125030755996704,
|
|
"learning_rate": 1.856704611536818e-05,
|
|
"loss": 0.8292658925056458,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.644970294219032,
|
|
"grad_norm": 0.748110294342041,
|
|
"learning_rate": 1.8559661589750387e-05,
|
|
"loss": 0.8110982179641724,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.6462680211691709,
|
|
"grad_norm": 0.7424649000167847,
|
|
"learning_rate": 1.8552259561791133e-05,
|
|
"loss": 0.7920522689819336,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.6475657481193098,
|
|
"grad_norm": 0.7908960580825806,
|
|
"learning_rate": 1.8544840046625686e-05,
|
|
"loss": 0.9255160093307495,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.6488634750694486,
|
|
"grad_norm": 0.7190539240837097,
|
|
"learning_rate": 1.8537403059425082e-05,
|
|
"loss": 0.8494732975959778,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.6501612020195876,
|
|
"grad_norm": 0.7224424481391907,
|
|
"learning_rate": 1.852994861539607e-05,
|
|
"loss": 0.7837664484977722,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.6514589289697265,
|
|
"grad_norm": 0.7687528729438782,
|
|
"learning_rate": 1.8522476729781106e-05,
|
|
"loss": 0.8091537952423096,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.6527566559198653,
|
|
"grad_norm": 0.7272804379463196,
|
|
"learning_rate": 1.8514987417858306e-05,
|
|
"loss": 0.8691030740737915,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.6540543828700043,
|
|
"grad_norm": 0.7369651794433594,
|
|
"learning_rate": 1.8507480694941416e-05,
|
|
"loss": 0.8802081346511841,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.6553521098201431,
|
|
"grad_norm": 0.7450799942016602,
|
|
"learning_rate": 1.849995657637978e-05,
|
|
"loss": 0.8451288342475891,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.6566498367702821,
|
|
"grad_norm": 0.723861813545227,
|
|
"learning_rate": 1.8492415077558325e-05,
|
|
"loss": 0.8779444694519043,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.657947563720421,
|
|
"grad_norm": 0.6959301829338074,
|
|
"learning_rate": 1.8484856213897496e-05,
|
|
"loss": 0.8489083647727966,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.6592452906705598,
|
|
"grad_norm": 0.7295985817909241,
|
|
"learning_rate": 1.847728000085327e-05,
|
|
"loss": 0.8433302044868469,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.6605430176206988,
|
|
"grad_norm": 0.6785035133361816,
|
|
"learning_rate": 1.8469686453917074e-05,
|
|
"loss": 0.7844301462173462,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.6618407445708376,
|
|
"grad_norm": 0.7163369059562683,
|
|
"learning_rate": 1.846207558861579e-05,
|
|
"loss": 0.8518480658531189,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.6631384715209765,
|
|
"grad_norm": 0.6807128190994263,
|
|
"learning_rate": 1.845444742051172e-05,
|
|
"loss": 0.8048978447914124,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.6644361984711155,
|
|
"grad_norm": 0.7018458247184753,
|
|
"learning_rate": 1.8446801965202524e-05,
|
|
"loss": 0.7482452392578125,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.6657339254212543,
|
|
"grad_norm": 0.7418568134307861,
|
|
"learning_rate": 1.8439139238321235e-05,
|
|
"loss": 0.8263827562332153,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.6670316523713933,
|
|
"grad_norm": 0.7616980075836182,
|
|
"learning_rate": 1.8431459255536185e-05,
|
|
"loss": 0.8845346570014954,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.6683293793215321,
|
|
"grad_norm": 0.7437636852264404,
|
|
"learning_rate": 1.8423762032551e-05,
|
|
"loss": 0.7848752737045288,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.669627106271671,
|
|
"grad_norm": 0.6855003833770752,
|
|
"learning_rate": 1.841604758510454e-05,
|
|
"loss": 0.7946106195449829,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.67092483322181,
|
|
"grad_norm": 0.7443661689758301,
|
|
"learning_rate": 1.840831592897091e-05,
|
|
"loss": 0.8530216813087463,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.6722225601719488,
|
|
"grad_norm": 0.7664664387702942,
|
|
"learning_rate": 1.8400567079959383e-05,
|
|
"loss": 0.836358368396759,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.6735202871220877,
|
|
"grad_norm": 0.722017228603363,
|
|
"learning_rate": 1.8392801053914396e-05,
|
|
"loss": 0.8537322878837585,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.6748180140722266,
|
|
"grad_norm": 0.7312494516372681,
|
|
"learning_rate": 1.8385017866715507e-05,
|
|
"loss": 0.8338693380355835,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.6761157410223655,
|
|
"grad_norm": 0.7151913642883301,
|
|
"learning_rate": 1.8377217534277365e-05,
|
|
"loss": 0.879010021686554,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.6774134679725045,
|
|
"grad_norm": 0.8348478674888611,
|
|
"learning_rate": 1.8369400072549674e-05,
|
|
"loss": 0.8499034643173218,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.6787111949226433,
|
|
"grad_norm": 0.7662613987922668,
|
|
"learning_rate": 1.8361565497517166e-05,
|
|
"loss": 0.8573883175849915,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.6800089218727822,
|
|
"grad_norm": 0.7006996870040894,
|
|
"learning_rate": 1.835371382519956e-05,
|
|
"loss": 0.8768547773361206,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.6813066488229211,
|
|
"grad_norm": 0.6807017922401428,
|
|
"learning_rate": 1.8345845071651543e-05,
|
|
"loss": 0.7412630915641785,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.68260437577306,
|
|
"grad_norm": 0.7801376581192017,
|
|
"learning_rate": 1.8337959252962728e-05,
|
|
"loss": 0.7919901609420776,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.6839021027231988,
|
|
"grad_norm": 0.7031033635139465,
|
|
"learning_rate": 1.8330056385257607e-05,
|
|
"loss": 0.7936250567436218,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.6851998296733378,
|
|
"grad_norm": 0.67047518491745,
|
|
"learning_rate": 1.8322136484695553e-05,
|
|
"loss": 0.7688592076301575,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.6864975566234767,
|
|
"grad_norm": 0.7209057211875916,
|
|
"learning_rate": 1.8314199567470755e-05,
|
|
"loss": 0.7531197667121887,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.6877952835736156,
|
|
"grad_norm": 0.7783409357070923,
|
|
"learning_rate": 1.83062456498122e-05,
|
|
"loss": 0.8060978055000305,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.6890930105237545,
|
|
"grad_norm": 0.7646079659461975,
|
|
"learning_rate": 1.8298274747983638e-05,
|
|
"loss": 0.9013359546661377,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.6903907374738933,
|
|
"grad_norm": 0.6973395943641663,
|
|
"learning_rate": 1.8290286878283542e-05,
|
|
"loss": 0.789779543876648,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.6916884644240323,
|
|
"grad_norm": 0.7242528796195984,
|
|
"learning_rate": 1.8282282057045087e-05,
|
|
"loss": 0.8460395336151123,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.6929861913741712,
|
|
"grad_norm": 0.7025911211967468,
|
|
"learning_rate": 1.827426030063611e-05,
|
|
"loss": 0.7623457312583923,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.69428391832431,
|
|
"grad_norm": 0.6914080381393433,
|
|
"learning_rate": 1.8266221625459064e-05,
|
|
"loss": 0.8142719864845276,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.695581645274449,
|
|
"grad_norm": 0.7013720870018005,
|
|
"learning_rate": 1.825816604795101e-05,
|
|
"loss": 0.7999016642570496,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.6968793722245878,
|
|
"grad_norm": 0.7201952934265137,
|
|
"learning_rate": 1.8250093584583567e-05,
|
|
"loss": 0.8158777952194214,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.6981770991747268,
|
|
"grad_norm": 0.6993263363838196,
|
|
"learning_rate": 1.8242004251862872e-05,
|
|
"loss": 0.7727892994880676,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.6994748261248657,
|
|
"grad_norm": 0.7411354780197144,
|
|
"learning_rate": 1.823389806632957e-05,
|
|
"loss": 0.8402857184410095,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.7007725530750045,
|
|
"grad_norm": 0.717903733253479,
|
|
"learning_rate": 1.8225775044558757e-05,
|
|
"loss": 0.8313778042793274,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.7020702800251435,
|
|
"grad_norm": 0.7139982581138611,
|
|
"learning_rate": 1.8217635203159957e-05,
|
|
"loss": 0.8449199795722961,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.7033680069752823,
|
|
"grad_norm": 0.7448502779006958,
|
|
"learning_rate": 1.8209478558777084e-05,
|
|
"loss": 0.8782564997673035,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.7046657339254212,
|
|
"grad_norm": 0.7237476110458374,
|
|
"learning_rate": 1.8201305128088412e-05,
|
|
"loss": 0.8148598670959473,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.7059634608755602,
|
|
"grad_norm": 0.7190750241279602,
|
|
"learning_rate": 1.819311492780654e-05,
|
|
"loss": 0.8512831926345825,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.707261187825699,
|
|
"grad_norm": 0.6827414035797119,
|
|
"learning_rate": 1.8184907974678348e-05,
|
|
"loss": 0.7911166548728943,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.708558914775838,
|
|
"grad_norm": 0.7072880864143372,
|
|
"learning_rate": 1.8176684285484985e-05,
|
|
"loss": 0.7934311032295227,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.7098566417259768,
|
|
"grad_norm": 0.6981719136238098,
|
|
"learning_rate": 1.816844387704181e-05,
|
|
"loss": 0.7569193840026855,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.7111543686761157,
|
|
"grad_norm": 0.6892895102500916,
|
|
"learning_rate": 1.8160186766198375e-05,
|
|
"loss": 0.8187867999076843,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.7124520956262547,
|
|
"grad_norm": 0.6689103245735168,
|
|
"learning_rate": 1.815191296983838e-05,
|
|
"loss": 0.8214238882064819,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.7137498225763935,
|
|
"grad_norm": 0.7005360722541809,
|
|
"learning_rate": 1.8143622504879647e-05,
|
|
"loss": 0.7808399796485901,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.7150475495265324,
|
|
"grad_norm": 0.6692766547203064,
|
|
"learning_rate": 1.8135315388274075e-05,
|
|
"loss": 0.8118186593055725,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.7163452764766713,
|
|
"grad_norm": 0.7556451559066772,
|
|
"learning_rate": 1.8126991637007618e-05,
|
|
"loss": 0.8829076290130615,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.7176430034268102,
|
|
"grad_norm": 0.7057021856307983,
|
|
"learning_rate": 1.8118651268100235e-05,
|
|
"loss": 0.8323896527290344,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.7189407303769492,
|
|
"grad_norm": 0.6931277513504028,
|
|
"learning_rate": 1.811029429860588e-05,
|
|
"loss": 0.8186264038085938,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.720238457327088,
|
|
"grad_norm": 0.6943070292472839,
|
|
"learning_rate": 1.810192074561243e-05,
|
|
"loss": 0.7884860634803772,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.7215361842772269,
|
|
"grad_norm": 0.7362954616546631,
|
|
"learning_rate": 1.8093530626241684e-05,
|
|
"loss": 0.8730647563934326,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.7228339112273658,
|
|
"grad_norm": 0.7225231528282166,
|
|
"learning_rate": 1.8085123957649315e-05,
|
|
"loss": 0.8629934787750244,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.7241316381775047,
|
|
"grad_norm": 0.6993386745452881,
|
|
"learning_rate": 1.8076700757024833e-05,
|
|
"loss": 0.8742365837097168,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.7254293651276437,
|
|
"grad_norm": 0.7013887166976929,
|
|
"learning_rate": 1.8068261041591548e-05,
|
|
"loss": 0.8042615056037903,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.7267270920777825,
|
|
"grad_norm": 0.7084468007087708,
|
|
"learning_rate": 1.8059804828606545e-05,
|
|
"loss": 0.8460750579833984,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.7280248190279214,
|
|
"grad_norm": 0.6864623427391052,
|
|
"learning_rate": 1.8051332135360637e-05,
|
|
"loss": 0.7461860179901123,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.7293225459780603,
|
|
"grad_norm": 0.7570308446884155,
|
|
"learning_rate": 1.8042842979178338e-05,
|
|
"loss": 0.8015311360359192,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.7306202729281992,
|
|
"grad_norm": 0.6948541402816772,
|
|
"learning_rate": 1.8034337377417826e-05,
|
|
"loss": 0.7483975887298584,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.731917999878338,
|
|
"grad_norm": 0.6935976147651672,
|
|
"learning_rate": 1.80258153474709e-05,
|
|
"loss": 0.8245661854743958,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.733215726828477,
|
|
"grad_norm": 0.713844895362854,
|
|
"learning_rate": 1.8017276906762955e-05,
|
|
"loss": 0.7062139511108398,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.7345134537786159,
|
|
"grad_norm": 0.7592107653617859,
|
|
"learning_rate": 1.8008722072752943e-05,
|
|
"loss": 0.9009630680084229,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.7358111807287548,
|
|
"grad_norm": 0.7252402901649475,
|
|
"learning_rate": 1.8000150862933335e-05,
|
|
"loss": 0.8240823745727539,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.7371089076788937,
|
|
"grad_norm": 0.6888589262962341,
|
|
"learning_rate": 1.7991563294830083e-05,
|
|
"loss": 0.7797961235046387,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.7384066346290326,
|
|
"grad_norm": 0.6920890808105469,
|
|
"learning_rate": 1.7982959386002592e-05,
|
|
"loss": 0.8363062739372253,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.7397043615791715,
|
|
"grad_norm": 0.7188555002212524,
|
|
"learning_rate": 1.7974339154043677e-05,
|
|
"loss": 0.8217660784721375,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.7410020885293104,
|
|
"grad_norm": 0.6754209995269775,
|
|
"learning_rate": 1.796570261657953e-05,
|
|
"loss": 0.8851417899131775,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.7422998154794492,
|
|
"grad_norm": 0.7101492881774902,
|
|
"learning_rate": 1.7957049791269684e-05,
|
|
"loss": 0.8277086615562439,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.7435975424295882,
|
|
"grad_norm": 0.7085975408554077,
|
|
"learning_rate": 1.7948380695806983e-05,
|
|
"loss": 0.8054807186126709,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.744895269379727,
|
|
"grad_norm": 0.6522380709648132,
|
|
"learning_rate": 1.793969534791752e-05,
|
|
"loss": 0.749293327331543,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.746192996329866,
|
|
"grad_norm": 0.753157377243042,
|
|
"learning_rate": 1.7930993765360644e-05,
|
|
"loss": 0.86817467212677,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.7474907232800049,
|
|
"grad_norm": 0.6874333024024963,
|
|
"learning_rate": 1.792227596592889e-05,
|
|
"loss": 0.7839986085891724,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.7487884502301437,
|
|
"grad_norm": 0.690792977809906,
|
|
"learning_rate": 1.791354196744794e-05,
|
|
"loss": 0.8275938630104065,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.7500861771802827,
|
|
"grad_norm": 0.7033665180206299,
|
|
"learning_rate": 1.790479178777662e-05,
|
|
"loss": 0.8231739401817322,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.7513839041304216,
|
|
"grad_norm": 0.7290453314781189,
|
|
"learning_rate": 1.7896025444806834e-05,
|
|
"loss": 0.8637040257453918,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.7526816310805604,
|
|
"grad_norm": 0.7544882893562317,
|
|
"learning_rate": 1.7887242956463528e-05,
|
|
"loss": 0.8368648886680603,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.7539793580306994,
|
|
"grad_norm": 0.6997877955436707,
|
|
"learning_rate": 1.7878444340704666e-05,
|
|
"loss": 0.8118851184844971,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.7552770849808382,
|
|
"grad_norm": 0.6926761269569397,
|
|
"learning_rate": 1.78696296155212e-05,
|
|
"loss": 0.7650015354156494,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.7565748119309772,
|
|
"grad_norm": 0.7061843872070312,
|
|
"learning_rate": 1.7860798798937e-05,
|
|
"loss": 0.7908979654312134,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.757872538881116,
|
|
"grad_norm": 0.687125563621521,
|
|
"learning_rate": 1.7851951909008864e-05,
|
|
"loss": 0.7617890238761902,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.7591702658312549,
|
|
"grad_norm": 0.7391111254692078,
|
|
"learning_rate": 1.7843088963826437e-05,
|
|
"loss": 0.7612465023994446,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.7604679927813939,
|
|
"grad_norm": 0.7583956122398376,
|
|
"learning_rate": 1.783420998151219e-05,
|
|
"loss": 0.8573638200759888,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.7617657197315327,
|
|
"grad_norm": 0.721450686454773,
|
|
"learning_rate": 1.782531498022141e-05,
|
|
"loss": 0.7986845970153809,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.7630634466816716,
|
|
"grad_norm": 0.7499017119407654,
|
|
"learning_rate": 1.781640397814211e-05,
|
|
"loss": 0.8502310514450073,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.7643611736318106,
|
|
"grad_norm": 0.705142617225647,
|
|
"learning_rate": 1.7807476993495047e-05,
|
|
"loss": 0.8705092668533325,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.7656589005819494,
|
|
"grad_norm": 0.689218282699585,
|
|
"learning_rate": 1.779853404453363e-05,
|
|
"loss": 0.8186284899711609,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.7669566275320884,
|
|
"grad_norm": 0.6828286647796631,
|
|
"learning_rate": 1.7789575149543936e-05,
|
|
"loss": 0.7887763381004333,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.7682543544822272,
|
|
"grad_norm": 0.7451944351196289,
|
|
"learning_rate": 1.7780600326844638e-05,
|
|
"loss": 0.8204880952835083,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.7695520814323661,
|
|
"grad_norm": 0.7414618730545044,
|
|
"learning_rate": 1.7771609594786968e-05,
|
|
"loss": 0.8183786869049072,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.770849808382505,
|
|
"grad_norm": 0.7165583968162537,
|
|
"learning_rate": 1.776260297175471e-05,
|
|
"loss": 0.860834002494812,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.7721475353326439,
|
|
"grad_norm": 0.6954268217086792,
|
|
"learning_rate": 1.775358047616412e-05,
|
|
"loss": 0.7466313242912292,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.7734452622827828,
|
|
"grad_norm": 0.7495166063308716,
|
|
"learning_rate": 1.774454212646392e-05,
|
|
"loss": 0.8352164626121521,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.7747429892329217,
|
|
"grad_norm": 0.7836682796478271,
|
|
"learning_rate": 1.773548794113525e-05,
|
|
"loss": 0.7596052885055542,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.7760407161830606,
|
|
"grad_norm": 0.7165281176567078,
|
|
"learning_rate": 1.772641793869162e-05,
|
|
"loss": 0.8770286440849304,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.7773384431331996,
|
|
"grad_norm": 0.7152581810951233,
|
|
"learning_rate": 1.7717332137678895e-05,
|
|
"loss": 0.7514005899429321,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.7786361700833384,
|
|
"grad_norm": 0.7103503942489624,
|
|
"learning_rate": 1.770823055667524e-05,
|
|
"loss": 0.8051580190658569,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.7799338970334773,
|
|
"grad_norm": 0.6738602519035339,
|
|
"learning_rate": 1.7699113214291082e-05,
|
|
"loss": 0.7153568267822266,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.7812316239836162,
|
|
"grad_norm": 0.7472966909408569,
|
|
"learning_rate": 1.768998012916908e-05,
|
|
"loss": 0.8714797496795654,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.7825293509337551,
|
|
"grad_norm": 0.6653077602386475,
|
|
"learning_rate": 1.7680831319984077e-05,
|
|
"loss": 0.7944467663764954,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.7838270778838939,
|
|
"grad_norm": 0.6959863305091858,
|
|
"learning_rate": 1.7671666805443076e-05,
|
|
"loss": 0.8018844127655029,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.7851248048340329,
|
|
"grad_norm": 0.7507782578468323,
|
|
"learning_rate": 1.766248660428519e-05,
|
|
"loss": 0.8342332243919373,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.7864225317841718,
|
|
"grad_norm": 0.685041069984436,
|
|
"learning_rate": 1.7653290735281605e-05,
|
|
"loss": 0.8430291414260864,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.7877202587343107,
|
|
"grad_norm": 0.7120122313499451,
|
|
"learning_rate": 1.7644079217235547e-05,
|
|
"loss": 0.8382185697555542,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.7890179856844496,
|
|
"grad_norm": 0.6778322458267212,
|
|
"learning_rate": 1.763485206898224e-05,
|
|
"loss": 0.7327848672866821,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.7903157126345884,
|
|
"grad_norm": 0.6974225044250488,
|
|
"learning_rate": 1.762560930938886e-05,
|
|
"loss": 0.8788211941719055,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.7916134395847274,
|
|
"grad_norm": 0.7211731672286987,
|
|
"learning_rate": 1.7616350957354523e-05,
|
|
"loss": 0.788176417350769,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.7929111665348663,
|
|
"grad_norm": 0.7053602337837219,
|
|
"learning_rate": 1.7607077031810204e-05,
|
|
"loss": 0.7817824482917786,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.7942088934850051,
|
|
"grad_norm": 0.7248443961143494,
|
|
"learning_rate": 1.759778755171874e-05,
|
|
"loss": 0.8502725958824158,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.7955066204351441,
|
|
"grad_norm": 0.7390884160995483,
|
|
"learning_rate": 1.758848253607476e-05,
|
|
"loss": 0.8086085319519043,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.7968043473852829,
|
|
"grad_norm": 0.7250061631202698,
|
|
"learning_rate": 1.7579162003904678e-05,
|
|
"loss": 0.8245308995246887,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.7981020743354219,
|
|
"grad_norm": 0.7186241149902344,
|
|
"learning_rate": 1.756982597426661e-05,
|
|
"loss": 0.8296452760696411,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.7993998012855608,
|
|
"grad_norm": 0.6929823160171509,
|
|
"learning_rate": 1.756047446625038e-05,
|
|
"loss": 0.804393470287323,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.8006975282356996,
|
|
"grad_norm": 0.6644824743270874,
|
|
"learning_rate": 1.7551107498977458e-05,
|
|
"loss": 0.7272558808326721,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.8019952551858386,
|
|
"grad_norm": 0.6946169137954712,
|
|
"learning_rate": 1.7541725091600918e-05,
|
|
"loss": 0.7725887894630432,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.8032929821359774,
|
|
"grad_norm": 0.7124983668327332,
|
|
"learning_rate": 1.7532327263305405e-05,
|
|
"loss": 0.8026424646377563,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.8045907090861163,
|
|
"grad_norm": 0.7041330337524414,
|
|
"learning_rate": 1.75229140333071e-05,
|
|
"loss": 0.8723938465118408,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.8058884360362553,
|
|
"grad_norm": 0.7211349606513977,
|
|
"learning_rate": 1.7513485420853683e-05,
|
|
"loss": 0.7833378911018372,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.8071861629863941,
|
|
"grad_norm": 0.7067847847938538,
|
|
"learning_rate": 1.750404144522427e-05,
|
|
"loss": 0.8030161261558533,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.8084838899365331,
|
|
"grad_norm": 0.7632414102554321,
|
|
"learning_rate": 1.7494582125729408e-05,
|
|
"loss": 0.8390699625015259,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.8097816168866719,
|
|
"grad_norm": 0.6906073689460754,
|
|
"learning_rate": 1.7485107481711014e-05,
|
|
"loss": 0.7584885954856873,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.8110793438368108,
|
|
"grad_norm": 0.7074705362319946,
|
|
"learning_rate": 1.7475617532542325e-05,
|
|
"loss": 0.7802140116691589,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.8123770707869498,
|
|
"grad_norm": 0.7454720735549927,
|
|
"learning_rate": 1.7466112297627894e-05,
|
|
"loss": 0.8060036897659302,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.8136747977370886,
|
|
"grad_norm": 0.720340371131897,
|
|
"learning_rate": 1.7456591796403525e-05,
|
|
"loss": 0.8245412707328796,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.8149725246872275,
|
|
"grad_norm": 0.6765140295028687,
|
|
"learning_rate": 1.744705604833622e-05,
|
|
"loss": 0.7529839277267456,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.8162702516373664,
|
|
"grad_norm": 0.7487897872924805,
|
|
"learning_rate": 1.7437505072924177e-05,
|
|
"loss": 0.8539460897445679,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.8175679785875053,
|
|
"grad_norm": 0.819340169429779,
|
|
"learning_rate": 1.742793888969673e-05,
|
|
"loss": 0.9023832082748413,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.8188657055376443,
|
|
"grad_norm": 0.6978700757026672,
|
|
"learning_rate": 1.741835751821429e-05,
|
|
"loss": 0.8347563743591309,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.8201634324877831,
|
|
"grad_norm": 0.6633133888244629,
|
|
"learning_rate": 1.7408760978068343e-05,
|
|
"loss": 0.7656944394111633,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.821461159437922,
|
|
"grad_norm": 0.779058039188385,
|
|
"learning_rate": 1.739914928888139e-05,
|
|
"loss": 0.8407497406005859,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.8227588863880609,
|
|
"grad_norm": 0.7178354263305664,
|
|
"learning_rate": 1.7389522470306892e-05,
|
|
"loss": 0.8489883542060852,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.8240566133381998,
|
|
"grad_norm": 0.6867073774337769,
|
|
"learning_rate": 1.7379880542029263e-05,
|
|
"loss": 0.8083344101905823,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.8253543402883388,
|
|
"grad_norm": 0.7312392592430115,
|
|
"learning_rate": 1.7370223523763804e-05,
|
|
"loss": 0.8478159308433533,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.8266520672384776,
|
|
"grad_norm": 0.7201517224311829,
|
|
"learning_rate": 1.7360551435256673e-05,
|
|
"loss": 0.8310608863830566,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.8279497941886165,
|
|
"grad_norm": 0.7189190983772278,
|
|
"learning_rate": 1.7350864296284846e-05,
|
|
"loss": 0.8333780765533447,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.8292475211387554,
|
|
"grad_norm": 0.686372697353363,
|
|
"learning_rate": 1.7341162126656063e-05,
|
|
"loss": 0.774347722530365,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.8305452480888943,
|
|
"grad_norm": 0.7090693712234497,
|
|
"learning_rate": 1.7331444946208815e-05,
|
|
"loss": 0.7772883772850037,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.8318429750390332,
|
|
"grad_norm": 0.7179540991783142,
|
|
"learning_rate": 1.732171277481227e-05,
|
|
"loss": 0.8045225739479065,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.8331407019891721,
|
|
"grad_norm": 0.7238140106201172,
|
|
"learning_rate": 1.7311965632366254e-05,
|
|
"loss": 0.816831648349762,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.834438428939311,
|
|
"grad_norm": 0.7198631167411804,
|
|
"learning_rate": 1.7302203538801212e-05,
|
|
"loss": 0.8121675252914429,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.8357361558894499,
|
|
"grad_norm": 0.743016242980957,
|
|
"learning_rate": 1.729242651407815e-05,
|
|
"loss": 0.8649178743362427,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.8370338828395888,
|
|
"grad_norm": 0.7449317574501038,
|
|
"learning_rate": 1.7282634578188612e-05,
|
|
"loss": 0.823853611946106,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.8383316097897276,
|
|
"grad_norm": 0.725826621055603,
|
|
"learning_rate": 1.7272827751154627e-05,
|
|
"loss": 0.8356031179428101,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.8396293367398666,
|
|
"grad_norm": 0.7286955118179321,
|
|
"learning_rate": 1.7263006053028674e-05,
|
|
"loss": 0.7678595781326294,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.8409270636900055,
|
|
"grad_norm": 0.7141085863113403,
|
|
"learning_rate": 1.7253169503893637e-05,
|
|
"loss": 0.819695770740509,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.8422247906401443,
|
|
"grad_norm": 0.7320179343223572,
|
|
"learning_rate": 1.7243318123862777e-05,
|
|
"loss": 0.7937145233154297,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.8435225175902833,
|
|
"grad_norm": 0.677760124206543,
|
|
"learning_rate": 1.7233451933079663e-05,
|
|
"loss": 0.7791966199874878,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.8448202445404221,
|
|
"grad_norm": 0.7462013363838196,
|
|
"learning_rate": 1.7223570951718166e-05,
|
|
"loss": 0.7947529554367065,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.8461179714905611,
|
|
"grad_norm": 0.7482285499572754,
|
|
"learning_rate": 1.7213675199982388e-05,
|
|
"loss": 0.8657369613647461,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.8474156984407,
|
|
"grad_norm": 0.7175538539886475,
|
|
"learning_rate": 1.7203764698106636e-05,
|
|
"loss": 0.8233255743980408,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.8487134253908388,
|
|
"grad_norm": 0.687630295753479,
|
|
"learning_rate": 1.7193839466355383e-05,
|
|
"loss": 0.730807363986969,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.8500111523409778,
|
|
"grad_norm": 0.7357272505760193,
|
|
"learning_rate": 1.7183899525023212e-05,
|
|
"loss": 0.7798961997032166,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.8513088792911166,
|
|
"grad_norm": 0.7003471851348877,
|
|
"learning_rate": 1.7173944894434783e-05,
|
|
"loss": 0.752636730670929,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.8526066062412555,
|
|
"grad_norm": 0.72862708568573,
|
|
"learning_rate": 1.7163975594944807e-05,
|
|
"loss": 0.8516281247138977,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.8539043331913945,
|
|
"grad_norm": 0.7155880928039551,
|
|
"learning_rate": 1.715399164693797e-05,
|
|
"loss": 0.8015654683113098,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.8552020601415333,
|
|
"grad_norm": 0.6752951145172119,
|
|
"learning_rate": 1.7143993070828913e-05,
|
|
"loss": 0.7704746127128601,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.8564997870916723,
|
|
"grad_norm": 0.7284151315689087,
|
|
"learning_rate": 1.713397988706221e-05,
|
|
"loss": 0.8053057789802551,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.8577975140418111,
|
|
"grad_norm": 0.7367468476295471,
|
|
"learning_rate": 1.7123952116112275e-05,
|
|
"loss": 0.8107625246047974,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.85909524099195,
|
|
"grad_norm": 0.7247380018234253,
|
|
"learning_rate": 1.7113909778483364e-05,
|
|
"loss": 0.8168917894363403,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.860392967942089,
|
|
"grad_norm": 0.708310067653656,
|
|
"learning_rate": 1.7103852894709517e-05,
|
|
"loss": 0.765848696231842,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.8616906948922278,
|
|
"grad_norm": 0.746276319026947,
|
|
"learning_rate": 1.7093781485354517e-05,
|
|
"loss": 0.7557209730148315,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.8629884218423667,
|
|
"grad_norm": 0.7245295643806458,
|
|
"learning_rate": 1.7083695571011842e-05,
|
|
"loss": 0.8230986595153809,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.8642861487925056,
|
|
"grad_norm": 0.6767184734344482,
|
|
"learning_rate": 1.707359517230464e-05,
|
|
"loss": 0.7791951894760132,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.8655838757426445,
|
|
"grad_norm": 0.7177157402038574,
|
|
"learning_rate": 1.7063480309885668e-05,
|
|
"loss": 0.7597481608390808,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.8668816026927835,
|
|
"grad_norm": 0.7185314297676086,
|
|
"learning_rate": 1.7053351004437258e-05,
|
|
"loss": 0.7932897210121155,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.8681793296429223,
|
|
"grad_norm": 0.6925249695777893,
|
|
"learning_rate": 1.7043207276671276e-05,
|
|
"loss": 0.8076404333114624,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.8694770565930612,
|
|
"grad_norm": 0.6706543564796448,
|
|
"learning_rate": 1.7033049147329077e-05,
|
|
"loss": 0.8299864530563354,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.8707747835432001,
|
|
"grad_norm": 0.6854607462882996,
|
|
"learning_rate": 1.702287663718147e-05,
|
|
"loss": 0.7249770760536194,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.872072510493339,
|
|
"grad_norm": 0.6870327591896057,
|
|
"learning_rate": 1.7012689767028656e-05,
|
|
"loss": 0.770750880241394,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.8733702374434779,
|
|
"grad_norm": 0.7077570557594299,
|
|
"learning_rate": 1.700248855770021e-05,
|
|
"loss": 0.887006402015686,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.8746679643936168,
|
|
"grad_norm": 0.7156735062599182,
|
|
"learning_rate": 1.6992273030055022e-05,
|
|
"loss": 0.793735921382904,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.8759656913437557,
|
|
"grad_norm": 0.7201855182647705,
|
|
"learning_rate": 1.6982043204981264e-05,
|
|
"loss": 0.7955703139305115,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.8772634182938946,
|
|
"grad_norm": 0.7118475437164307,
|
|
"learning_rate": 1.6971799103396332e-05,
|
|
"loss": 0.7845295667648315,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.8785611452440335,
|
|
"grad_norm": 0.7221444845199585,
|
|
"learning_rate": 1.696154074624683e-05,
|
|
"loss": 0.824984610080719,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.8798588721941724,
|
|
"grad_norm": 0.6542083621025085,
|
|
"learning_rate": 1.6951268154508497e-05,
|
|
"loss": 0.8094558119773865,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.8811565991443113,
|
|
"grad_norm": 0.7080230116844177,
|
|
"learning_rate": 1.6940981349186182e-05,
|
|
"loss": 0.8446075916290283,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.8824543260944502,
|
|
"grad_norm": 0.7394174933433533,
|
|
"learning_rate": 1.69306803513138e-05,
|
|
"loss": 0.8166599273681641,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.883752053044589,
|
|
"grad_norm": 0.6939387321472168,
|
|
"learning_rate": 1.6920365181954284e-05,
|
|
"loss": 0.8320161700248718,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.885049779994728,
|
|
"grad_norm": 0.7184001803398132,
|
|
"learning_rate": 1.6910035862199545e-05,
|
|
"loss": 0.7950330376625061,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.8863475069448669,
|
|
"grad_norm": 0.6943792700767517,
|
|
"learning_rate": 1.6899692413170422e-05,
|
|
"loss": 0.8061293363571167,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.8876452338950058,
|
|
"grad_norm": 0.7242916822433472,
|
|
"learning_rate": 1.688933485601666e-05,
|
|
"loss": 0.799871563911438,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.8889429608451447,
|
|
"grad_norm": 0.6802281141281128,
|
|
"learning_rate": 1.6878963211916833e-05,
|
|
"loss": 0.8111347556114197,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.8902406877952835,
|
|
"grad_norm": 0.7344982028007507,
|
|
"learning_rate": 1.6868577502078336e-05,
|
|
"loss": 0.818919837474823,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.8915384147454225,
|
|
"grad_norm": 0.7255212664604187,
|
|
"learning_rate": 1.6858177747737312e-05,
|
|
"loss": 0.8595883846282959,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.8928361416955614,
|
|
"grad_norm": 0.6713986992835999,
|
|
"learning_rate": 1.684776397015863e-05,
|
|
"loss": 0.7319802045822144,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.8941338686457002,
|
|
"grad_norm": 0.6508772373199463,
|
|
"learning_rate": 1.6837336190635824e-05,
|
|
"loss": 0.7525233626365662,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.8954315955958392,
|
|
"grad_norm": 0.6712636947631836,
|
|
"learning_rate": 1.682689443049107e-05,
|
|
"loss": 0.785638689994812,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.8954315955958392,
|
|
"eval_loss": 0.7686871290206909,
|
|
"eval_runtime": 143.0434,
|
|
"eval_samples_per_second": 36.297,
|
|
"eval_steps_per_second": 9.074,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.896729322545978,
|
|
"grad_norm": 0.7040373682975769,
|
|
"learning_rate": 1.6816438711075114e-05,
|
|
"loss": 0.8052287101745605,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.898027049496117,
|
|
"grad_norm": 0.6796557903289795,
|
|
"learning_rate": 1.680596905376727e-05,
|
|
"loss": 0.8128867745399475,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.8993247764462559,
|
|
"grad_norm": 0.689491868019104,
|
|
"learning_rate": 1.6795485479975327e-05,
|
|
"loss": 0.7731098532676697,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.9006225033963947,
|
|
"grad_norm": 0.6846652030944824,
|
|
"learning_rate": 1.6784988011135546e-05,
|
|
"loss": 0.8001493811607361,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.9019202303465337,
|
|
"grad_norm": 0.7072511911392212,
|
|
"learning_rate": 1.6774476668712587e-05,
|
|
"loss": 0.7856433391571045,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.9032179572966725,
|
|
"grad_norm": 0.6913763880729675,
|
|
"learning_rate": 1.676395147419949e-05,
|
|
"loss": 0.8246166110038757,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.9045156842468114,
|
|
"grad_norm": 0.7131518721580505,
|
|
"learning_rate": 1.6753412449117615e-05,
|
|
"loss": 0.8256362080574036,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.9058134111969504,
|
|
"grad_norm": 0.6939201951026917,
|
|
"learning_rate": 1.67428596150166e-05,
|
|
"loss": 0.8615972399711609,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.9071111381470892,
|
|
"grad_norm": 0.7194769382476807,
|
|
"learning_rate": 1.6732292993474316e-05,
|
|
"loss": 0.7931585907936096,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.9084088650972282,
|
|
"grad_norm": 0.6878808736801147,
|
|
"learning_rate": 1.6721712606096833e-05,
|
|
"loss": 0.7722562551498413,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.909706592047367,
|
|
"grad_norm": 0.6713901162147522,
|
|
"learning_rate": 1.6711118474518363e-05,
|
|
"loss": 0.7399365901947021,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.9110043189975059,
|
|
"grad_norm": 0.6830242872238159,
|
|
"learning_rate": 1.6700510620401223e-05,
|
|
"loss": 0.7681128978729248,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.9123020459476449,
|
|
"grad_norm": 0.7052934169769287,
|
|
"learning_rate": 1.6689889065435796e-05,
|
|
"loss": 0.8287486433982849,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.9135997728977837,
|
|
"grad_norm": 0.7426304817199707,
|
|
"learning_rate": 1.667925383134047e-05,
|
|
"loss": 0.7236632108688354,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.9148974998479226,
|
|
"grad_norm": 0.7284197807312012,
|
|
"learning_rate": 1.66686049398616e-05,
|
|
"loss": 0.8001005053520203,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.9161952267980615,
|
|
"grad_norm": 0.7305144667625427,
|
|
"learning_rate": 1.6657942412773484e-05,
|
|
"loss": 0.816078245639801,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.9174929537482004,
|
|
"grad_norm": 0.7396757006645203,
|
|
"learning_rate": 1.664726627187829e-05,
|
|
"loss": 0.8432518243789673,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.9187906806983394,
|
|
"grad_norm": 0.7043930292129517,
|
|
"learning_rate": 1.6636576539006015e-05,
|
|
"loss": 0.8011447787284851,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.9200884076484782,
|
|
"grad_norm": 0.6750434637069702,
|
|
"learning_rate": 1.6625873236014464e-05,
|
|
"loss": 0.8111026883125305,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.9213861345986171,
|
|
"grad_norm": 0.6968750953674316,
|
|
"learning_rate": 1.6615156384789185e-05,
|
|
"loss": 0.7856196165084839,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.922683861548756,
|
|
"grad_norm": 0.6756315231323242,
|
|
"learning_rate": 1.660442600724342e-05,
|
|
"loss": 0.7796693444252014,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.9239815884988949,
|
|
"grad_norm": 0.7280746102333069,
|
|
"learning_rate": 1.659368212531808e-05,
|
|
"loss": 0.8190441131591797,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.9252793154490339,
|
|
"grad_norm": 0.6771341562271118,
|
|
"learning_rate": 1.6582924760981683e-05,
|
|
"loss": 0.7919082641601562,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.9265770423991727,
|
|
"grad_norm": 0.7019714713096619,
|
|
"learning_rate": 1.6572153936230316e-05,
|
|
"loss": 0.7387243509292603,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.9278747693493116,
|
|
"grad_norm": 0.7424118518829346,
|
|
"learning_rate": 1.6561369673087588e-05,
|
|
"loss": 0.8694776892662048,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.9291724962994505,
|
|
"grad_norm": 0.6909191012382507,
|
|
"learning_rate": 1.6550571993604587e-05,
|
|
"loss": 0.8239873647689819,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.9304702232495894,
|
|
"grad_norm": 0.7481014728546143,
|
|
"learning_rate": 1.6539760919859838e-05,
|
|
"loss": 0.8004978895187378,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.9317679501997282,
|
|
"grad_norm": 0.6954971551895142,
|
|
"learning_rate": 1.6528936473959253e-05,
|
|
"loss": 0.8122729659080505,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.9330656771498672,
|
|
"grad_norm": 0.7150570154190063,
|
|
"learning_rate": 1.6518098678036073e-05,
|
|
"loss": 0.8382218480110168,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.9343634041000061,
|
|
"grad_norm": 0.7469287514686584,
|
|
"learning_rate": 1.650724755425086e-05,
|
|
"loss": 0.8599920868873596,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.935661131050145,
|
|
"grad_norm": 0.7064406275749207,
|
|
"learning_rate": 1.6496383124791406e-05,
|
|
"loss": 0.7755042314529419,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.9369588580002839,
|
|
"grad_norm": 0.7173776626586914,
|
|
"learning_rate": 1.6485505411872725e-05,
|
|
"loss": 0.8066536784172058,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.9382565849504227,
|
|
"grad_norm": 0.717430591583252,
|
|
"learning_rate": 1.6474614437736986e-05,
|
|
"loss": 0.8112089037895203,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.9395543119005617,
|
|
"grad_norm": 0.696087658405304,
|
|
"learning_rate": 1.6463710224653477e-05,
|
|
"loss": 0.7918620705604553,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.9408520388507006,
|
|
"grad_norm": 0.6923975944519043,
|
|
"learning_rate": 1.6452792794918545e-05,
|
|
"loss": 0.8037642240524292,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.9421497658008394,
|
|
"grad_norm": 0.7063742280006409,
|
|
"learning_rate": 1.644186217085558e-05,
|
|
"loss": 0.7934796810150146,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.9434474927509784,
|
|
"grad_norm": 0.6965203881263733,
|
|
"learning_rate": 1.6430918374814937e-05,
|
|
"loss": 0.8489659428596497,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.9447452197011172,
|
|
"grad_norm": 0.7392389178276062,
|
|
"learning_rate": 1.641996142917391e-05,
|
|
"loss": 0.8604154586791992,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.9460429466512562,
|
|
"grad_norm": 0.7131820321083069,
|
|
"learning_rate": 1.640899135633668e-05,
|
|
"loss": 0.8199344277381897,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.9473406736013951,
|
|
"grad_norm": 0.7163403630256653,
|
|
"learning_rate": 1.6398008178734272e-05,
|
|
"loss": 0.8697142004966736,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.9486384005515339,
|
|
"grad_norm": 0.6607118844985962,
|
|
"learning_rate": 1.6387011918824493e-05,
|
|
"loss": 0.7900056838989258,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.9499361275016729,
|
|
"grad_norm": 0.669420599937439,
|
|
"learning_rate": 1.6376002599091925e-05,
|
|
"loss": 0.8032844066619873,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.9512338544518117,
|
|
"grad_norm": 0.7059581279754639,
|
|
"learning_rate": 1.6364980242047835e-05,
|
|
"loss": 0.8048977851867676,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.9525315814019506,
|
|
"grad_norm": 0.7329293489456177,
|
|
"learning_rate": 1.635394487023015e-05,
|
|
"loss": 0.8311731815338135,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.9538293083520896,
|
|
"grad_norm": 0.7057397961616516,
|
|
"learning_rate": 1.634289650620342e-05,
|
|
"loss": 0.8411611318588257,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.9551270353022284,
|
|
"grad_norm": 0.653426468372345,
|
|
"learning_rate": 1.633183517255875e-05,
|
|
"loss": 0.758813738822937,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.9564247622523674,
|
|
"grad_norm": 0.7300577759742737,
|
|
"learning_rate": 1.632076089191376e-05,
|
|
"loss": 0.8028651475906372,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.9577224892025062,
|
|
"grad_norm": 0.6757684350013733,
|
|
"learning_rate": 1.630967368691256e-05,
|
|
"loss": 0.8133585453033447,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.9590202161526451,
|
|
"grad_norm": 0.6894041299819946,
|
|
"learning_rate": 1.6298573580225676e-05,
|
|
"loss": 0.766591489315033,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.9603179431027841,
|
|
"grad_norm": 0.7034198641777039,
|
|
"learning_rate": 1.6287460594550017e-05,
|
|
"loss": 0.778566837310791,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.9616156700529229,
|
|
"grad_norm": 0.6629794239997864,
|
|
"learning_rate": 1.6276334752608823e-05,
|
|
"loss": 0.7911474704742432,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.9629133970030618,
|
|
"grad_norm": 0.6980583667755127,
|
|
"learning_rate": 1.6265196077151627e-05,
|
|
"loss": 0.7445369958877563,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.9642111239532007,
|
|
"grad_norm": 0.7294824719429016,
|
|
"learning_rate": 1.62540445909542e-05,
|
|
"loss": 0.86620032787323,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.9655088509033396,
|
|
"grad_norm": 0.7365493774414062,
|
|
"learning_rate": 1.624288031681851e-05,
|
|
"loss": 0.810501754283905,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.9668065778534786,
|
|
"grad_norm": 0.737711489200592,
|
|
"learning_rate": 1.623170327757267e-05,
|
|
"loss": 0.8520309329032898,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.9681043048036174,
|
|
"grad_norm": 0.683699905872345,
|
|
"learning_rate": 1.62205134960709e-05,
|
|
"loss": 0.7950071096420288,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.9694020317537563,
|
|
"grad_norm": 0.7092661261558533,
|
|
"learning_rate": 1.620931099519347e-05,
|
|
"loss": 0.8340073823928833,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.9706997587038952,
|
|
"grad_norm": 0.7204828262329102,
|
|
"learning_rate": 1.619809579784665e-05,
|
|
"loss": 0.7778469324111938,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.9719974856540341,
|
|
"grad_norm": 0.6977567076683044,
|
|
"learning_rate": 1.6186867926962695e-05,
|
|
"loss": 0.797735869884491,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.973295212604173,
|
|
"grad_norm": 0.6957900524139404,
|
|
"learning_rate": 1.6175627405499746e-05,
|
|
"loss": 0.7967561483383179,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.9745929395543119,
|
|
"grad_norm": 0.6862889528274536,
|
|
"learning_rate": 1.6164374256441837e-05,
|
|
"loss": 0.8016502261161804,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.9758906665044508,
|
|
"grad_norm": 0.6969533562660217,
|
|
"learning_rate": 1.6153108502798796e-05,
|
|
"loss": 0.8099682331085205,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.9771883934545897,
|
|
"grad_norm": 0.6920532584190369,
|
|
"learning_rate": 1.614183016760625e-05,
|
|
"loss": 0.8007751107215881,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.9784861204047286,
|
|
"grad_norm": 0.6825345158576965,
|
|
"learning_rate": 1.613053927392553e-05,
|
|
"loss": 0.8570786118507385,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.9797838473548675,
|
|
"grad_norm": 0.7230255603790283,
|
|
"learning_rate": 1.6119235844843664e-05,
|
|
"loss": 0.7779375910758972,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.9810815743050064,
|
|
"grad_norm": 0.68338543176651,
|
|
"learning_rate": 1.6107919903473294e-05,
|
|
"loss": 0.7894657850265503,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.9823793012551453,
|
|
"grad_norm": 0.7132012248039246,
|
|
"learning_rate": 1.6096591472952664e-05,
|
|
"loss": 0.8401795625686646,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.9836770282052841,
|
|
"grad_norm": 0.681077241897583,
|
|
"learning_rate": 1.6085250576445548e-05,
|
|
"loss": 0.7692939043045044,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.9849747551554231,
|
|
"grad_norm": 0.6817126870155334,
|
|
"learning_rate": 1.6073897237141203e-05,
|
|
"loss": 0.7555439472198486,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.986272482105562,
|
|
"grad_norm": 0.6702454090118408,
|
|
"learning_rate": 1.6062531478254333e-05,
|
|
"loss": 0.7115926742553711,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.9875702090557009,
|
|
"grad_norm": 0.6700429320335388,
|
|
"learning_rate": 1.605115332302505e-05,
|
|
"loss": 0.7557807564735413,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.9888679360058398,
|
|
"grad_norm": 0.6891334652900696,
|
|
"learning_rate": 1.603976279471879e-05,
|
|
"loss": 0.8077662587165833,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.9901656629559786,
|
|
"grad_norm": 0.767073929309845,
|
|
"learning_rate": 1.6028359916626308e-05,
|
|
"loss": 0.7964708805084229,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.9914633899061176,
|
|
"grad_norm": 0.660102367401123,
|
|
"learning_rate": 1.601694471206359e-05,
|
|
"loss": 0.7086456418037415,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.9927611168562565,
|
|
"grad_norm": 0.6949501037597656,
|
|
"learning_rate": 1.600551720437186e-05,
|
|
"loss": 0.7723450660705566,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.9940588438063953,
|
|
"grad_norm": 0.7149574756622314,
|
|
"learning_rate": 1.599407741691746e-05,
|
|
"loss": 0.8286278247833252,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.9953565707565343,
|
|
"grad_norm": 0.6776000261306763,
|
|
"learning_rate": 1.5982625373091877e-05,
|
|
"loss": 0.7701430320739746,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.9966542977066731,
|
|
"grad_norm": 0.7129999399185181,
|
|
"learning_rate": 1.5971161096311628e-05,
|
|
"loss": 0.8104744553565979,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.9979520246568121,
|
|
"grad_norm": 0.6826761960983276,
|
|
"learning_rate": 1.5959684610018267e-05,
|
|
"loss": 0.7398239970207214,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.999249751606951,
|
|
"grad_norm": 0.7236920595169067,
|
|
"learning_rate": 1.5948195937678297e-05,
|
|
"loss": 0.7627758383750916,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.9062820672988892,
|
|
"learning_rate": 1.5936695102783148e-05,
|
|
"loss": 0.7684851288795471,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 1.0012977269501389,
|
|
"grad_norm": 1.0222225189208984,
|
|
"learning_rate": 1.5925182128849116e-05,
|
|
"loss": 0.7260036468505859,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 1.0025954539002777,
|
|
"grad_norm": 0.8933354020118713,
|
|
"learning_rate": 1.591365703941732e-05,
|
|
"loss": 0.6952782869338989,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 1.0038931808504168,
|
|
"grad_norm": 0.8150500059127808,
|
|
"learning_rate": 1.5902119858053652e-05,
|
|
"loss": 0.708466649055481,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 1.0051909078005556,
|
|
"grad_norm": 0.677733838558197,
|
|
"learning_rate": 1.589057060834872e-05,
|
|
"loss": 0.714854896068573,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 1.0064886347506945,
|
|
"grad_norm": 0.8115158677101135,
|
|
"learning_rate": 1.5879009313917826e-05,
|
|
"loss": 0.7126277089118958,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 1.0077863617008334,
|
|
"grad_norm": 0.9660588502883911,
|
|
"learning_rate": 1.5867435998400885e-05,
|
|
"loss": 0.8123319149017334,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 1.0090840886509722,
|
|
"grad_norm": 0.8912333846092224,
|
|
"learning_rate": 1.5855850685462404e-05,
|
|
"loss": 0.7480561137199402,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 1.0103818156011113,
|
|
"grad_norm": 0.9120140075683594,
|
|
"learning_rate": 1.584425339879141e-05,
|
|
"loss": 0.7480191588401794,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 1.0116795425512501,
|
|
"grad_norm": 0.8324950337409973,
|
|
"learning_rate": 1.5832644162101417e-05,
|
|
"loss": 0.7069035172462463,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.012977269501389,
|
|
"grad_norm": 0.7601868510246277,
|
|
"learning_rate": 1.5821022999130385e-05,
|
|
"loss": 0.646752655506134,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 1.0142749964515279,
|
|
"grad_norm": 0.7213713526725769,
|
|
"learning_rate": 1.580938993364064e-05,
|
|
"loss": 0.6728400588035583,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 1.0155727234016667,
|
|
"grad_norm": 0.8234879374504089,
|
|
"learning_rate": 1.579774498941886e-05,
|
|
"loss": 0.6997194886207581,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 1.0168704503518056,
|
|
"grad_norm": 0.794476330280304,
|
|
"learning_rate": 1.578608819027602e-05,
|
|
"loss": 0.6844808459281921,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 1.0181681773019446,
|
|
"grad_norm": 0.8356218338012695,
|
|
"learning_rate": 1.5774419560047303e-05,
|
|
"loss": 0.7501406073570251,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 1.0194659042520835,
|
|
"grad_norm": 0.7794895172119141,
|
|
"learning_rate": 1.5762739122592123e-05,
|
|
"loss": 0.7650024890899658,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 1.0207636312022224,
|
|
"grad_norm": 0.7471200227737427,
|
|
"learning_rate": 1.5751046901794008e-05,
|
|
"loss": 0.7121275067329407,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 1.0220613581523612,
|
|
"grad_norm": 0.7541830539703369,
|
|
"learning_rate": 1.5739342921560593e-05,
|
|
"loss": 0.7205899357795715,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 1.0233590851025,
|
|
"grad_norm": 0.8261748552322388,
|
|
"learning_rate": 1.5727627205823554e-05,
|
|
"loss": 0.6890494227409363,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 1.0246568120526391,
|
|
"grad_norm": 0.7363404035568237,
|
|
"learning_rate": 1.571589977853857e-05,
|
|
"loss": 0.7250495553016663,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 1.025954539002778,
|
|
"grad_norm": 0.7666418552398682,
|
|
"learning_rate": 1.5704160663685254e-05,
|
|
"loss": 0.6565474271774292,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 1.0272522659529169,
|
|
"grad_norm": 0.707535982131958,
|
|
"learning_rate": 1.5692409885267127e-05,
|
|
"loss": 0.8307659029960632,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 1.0285499929030557,
|
|
"grad_norm": 0.7528367638587952,
|
|
"learning_rate": 1.568064746731156e-05,
|
|
"loss": 0.734372615814209,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 1.0298477198531946,
|
|
"grad_norm": 0.7138853073120117,
|
|
"learning_rate": 1.5668873433869718e-05,
|
|
"loss": 0.6305298805236816,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 1.0311454468033336,
|
|
"grad_norm": 0.7478009462356567,
|
|
"learning_rate": 1.5657087809016517e-05,
|
|
"loss": 0.6923752427101135,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 1.0324431737534725,
|
|
"grad_norm": 0.7364891171455383,
|
|
"learning_rate": 1.564529061685058e-05,
|
|
"loss": 0.7163046598434448,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 1.0337409007036114,
|
|
"grad_norm": 0.7004992365837097,
|
|
"learning_rate": 1.5633481881494178e-05,
|
|
"loss": 0.6700119972229004,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 1.0350386276537502,
|
|
"grad_norm": 0.749292254447937,
|
|
"learning_rate": 1.562166162709319e-05,
|
|
"loss": 0.6811234951019287,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 1.036336354603889,
|
|
"grad_norm": 0.7418084740638733,
|
|
"learning_rate": 1.560982987781704e-05,
|
|
"loss": 0.7332763075828552,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 1.037634081554028,
|
|
"grad_norm": 0.6867294907569885,
|
|
"learning_rate": 1.5597986657858656e-05,
|
|
"loss": 0.7094939351081848,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 1.038931808504167,
|
|
"grad_norm": 0.6801954507827759,
|
|
"learning_rate": 1.5586131991434434e-05,
|
|
"loss": 0.7229615449905396,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 1.0402295354543059,
|
|
"grad_norm": 0.6919074654579163,
|
|
"learning_rate": 1.5574265902784163e-05,
|
|
"loss": 0.6745041012763977,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 1.0415272624044447,
|
|
"grad_norm": 0.7064636945724487,
|
|
"learning_rate": 1.556238841617099e-05,
|
|
"loss": 0.7311556935310364,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 1.0428249893545836,
|
|
"grad_norm": 0.7400867938995361,
|
|
"learning_rate": 1.555049955588137e-05,
|
|
"loss": 0.7360319495201111,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 1.0441227163047224,
|
|
"grad_norm": 0.7186093330383301,
|
|
"learning_rate": 1.5538599346225013e-05,
|
|
"loss": 0.6791881918907166,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 1.0454204432548615,
|
|
"grad_norm": 0.7080870866775513,
|
|
"learning_rate": 1.552668781153484e-05,
|
|
"loss": 0.6935555338859558,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 1.0467181702050004,
|
|
"grad_norm": 0.7288933396339417,
|
|
"learning_rate": 1.5514764976166916e-05,
|
|
"loss": 0.7893433570861816,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 1.0480158971551392,
|
|
"grad_norm": 0.7090301513671875,
|
|
"learning_rate": 1.5502830864500426e-05,
|
|
"loss": 0.7087657451629639,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 1.049313624105278,
|
|
"grad_norm": 0.7548444271087646,
|
|
"learning_rate": 1.5490885500937606e-05,
|
|
"loss": 0.72869473695755,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 1.050611351055417,
|
|
"grad_norm": 0.7161403894424438,
|
|
"learning_rate": 1.5478928909903705e-05,
|
|
"loss": 0.7281824946403503,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 1.051909078005556,
|
|
"grad_norm": 0.6805386543273926,
|
|
"learning_rate": 1.5466961115846927e-05,
|
|
"loss": 0.6523677110671997,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 1.0532068049556949,
|
|
"grad_norm": 0.7339995503425598,
|
|
"learning_rate": 1.545498214323837e-05,
|
|
"loss": 0.7160875797271729,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 1.0545045319058337,
|
|
"grad_norm": 0.6826195120811462,
|
|
"learning_rate": 1.544299201657202e-05,
|
|
"loss": 0.7368515133857727,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 1.0558022588559726,
|
|
"grad_norm": 0.7545201182365417,
|
|
"learning_rate": 1.543099076036463e-05,
|
|
"loss": 0.7098448276519775,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 1.0570999858061114,
|
|
"grad_norm": 0.6874995827674866,
|
|
"learning_rate": 1.5418978399155748e-05,
|
|
"loss": 0.6643248200416565,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 1.0583977127562503,
|
|
"grad_norm": 0.7067052125930786,
|
|
"learning_rate": 1.54069549575076e-05,
|
|
"loss": 0.7022271752357483,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 1.0596954397063894,
|
|
"grad_norm": 0.7168053388595581,
|
|
"learning_rate": 1.539492046000509e-05,
|
|
"loss": 0.6977633237838745,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 1.0609931666565282,
|
|
"grad_norm": 0.7110093235969543,
|
|
"learning_rate": 1.5382874931255717e-05,
|
|
"loss": 0.7410083413124084,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 1.062290893606667,
|
|
"grad_norm": 0.6772004961967468,
|
|
"learning_rate": 1.5370818395889536e-05,
|
|
"loss": 0.6744326949119568,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 1.063588620556806,
|
|
"grad_norm": 0.7344289422035217,
|
|
"learning_rate": 1.5358750878559113e-05,
|
|
"loss": 0.7128704190254211,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 1.0648863475069448,
|
|
"grad_norm": 0.7206461429595947,
|
|
"learning_rate": 1.5346672403939465e-05,
|
|
"loss": 0.7533354759216309,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 1.0661840744570839,
|
|
"grad_norm": 0.7541556358337402,
|
|
"learning_rate": 1.5334582996728017e-05,
|
|
"loss": 0.7774013876914978,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 1.0674818014072227,
|
|
"grad_norm": 0.7579377293586731,
|
|
"learning_rate": 1.532248268164455e-05,
|
|
"loss": 0.7790758609771729,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 1.0687795283573616,
|
|
"grad_norm": 0.7289340496063232,
|
|
"learning_rate": 1.5310371483431138e-05,
|
|
"loss": 0.7054307460784912,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 1.0700772553075004,
|
|
"grad_norm": 0.7037842869758606,
|
|
"learning_rate": 1.529824942685212e-05,
|
|
"loss": 0.7457549571990967,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 1.0713749822576393,
|
|
"grad_norm": 0.7253069877624512,
|
|
"learning_rate": 1.528611653669403e-05,
|
|
"loss": 0.7203331589698792,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 1.0726727092077784,
|
|
"grad_norm": 0.7243335247039795,
|
|
"learning_rate": 1.5273972837765566e-05,
|
|
"loss": 0.7370164394378662,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 1.0739704361579172,
|
|
"grad_norm": 0.6802127957344055,
|
|
"learning_rate": 1.526181835489751e-05,
|
|
"loss": 0.7022003531455994,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 1.075268163108056,
|
|
"grad_norm": 0.7470188736915588,
|
|
"learning_rate": 1.5249653112942708e-05,
|
|
"loss": 0.7355238795280457,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 1.076565890058195,
|
|
"grad_norm": 0.7139303684234619,
|
|
"learning_rate": 1.5237477136776e-05,
|
|
"loss": 0.6995757222175598,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 1.0778636170083338,
|
|
"grad_norm": 0.6893638372421265,
|
|
"learning_rate": 1.5225290451294173e-05,
|
|
"loss": 0.6514896750450134,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 1.0791613439584729,
|
|
"grad_norm": 0.7205830812454224,
|
|
"learning_rate": 1.521309308141592e-05,
|
|
"loss": 0.6881433725357056,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 1.0804590709086117,
|
|
"grad_norm": 0.7569621205329895,
|
|
"learning_rate": 1.5200885052081767e-05,
|
|
"loss": 0.7357972264289856,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 1.0817567978587506,
|
|
"grad_norm": 0.7436279654502869,
|
|
"learning_rate": 1.518866638825405e-05,
|
|
"loss": 0.758313775062561,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 1.0830545248088894,
|
|
"grad_norm": 0.7273634076118469,
|
|
"learning_rate": 1.517643711491684e-05,
|
|
"loss": 0.6798244714736938,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 1.0843522517590283,
|
|
"grad_norm": 0.6966442465782166,
|
|
"learning_rate": 1.516419725707591e-05,
|
|
"loss": 0.7077891826629639,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 1.0856499787091671,
|
|
"grad_norm": 0.6794623732566833,
|
|
"learning_rate": 1.5151946839758673e-05,
|
|
"loss": 0.6736932992935181,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 1.0869477056593062,
|
|
"grad_norm": 0.7189822196960449,
|
|
"learning_rate": 1.5139685888014123e-05,
|
|
"loss": 0.7594777345657349,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 1.088245432609445,
|
|
"grad_norm": 0.7691319584846497,
|
|
"learning_rate": 1.512741442691281e-05,
|
|
"loss": 0.7986084818840027,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 1.089543159559584,
|
|
"grad_norm": 0.7428483366966248,
|
|
"learning_rate": 1.5115132481546763e-05,
|
|
"loss": 0.7112255096435547,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 1.0908408865097228,
|
|
"grad_norm": 0.7567489743232727,
|
|
"learning_rate": 1.5102840077029452e-05,
|
|
"loss": 0.647540807723999,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 1.0921386134598616,
|
|
"grad_norm": 0.7548873424530029,
|
|
"learning_rate": 1.509053723849574e-05,
|
|
"loss": 0.776237428188324,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 1.0934363404100007,
|
|
"grad_norm": 0.7588720917701721,
|
|
"learning_rate": 1.5078223991101805e-05,
|
|
"loss": 0.6855933666229248,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 1.0947340673601396,
|
|
"grad_norm": 0.7549242973327637,
|
|
"learning_rate": 1.5065900360025128e-05,
|
|
"loss": 0.7288146615028381,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 1.0960317943102784,
|
|
"grad_norm": 0.7281069755554199,
|
|
"learning_rate": 1.5053566370464416e-05,
|
|
"loss": 0.7359070777893066,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 1.0973295212604173,
|
|
"grad_norm": 0.709331750869751,
|
|
"learning_rate": 1.5041222047639558e-05,
|
|
"loss": 0.718718945980072,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 1.0986272482105561,
|
|
"grad_norm": 0.684161365032196,
|
|
"learning_rate": 1.5028867416791566e-05,
|
|
"loss": 0.6832801699638367,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 1.099924975160695,
|
|
"grad_norm": 0.7570529580116272,
|
|
"learning_rate": 1.5016502503182533e-05,
|
|
"loss": 0.712772786617279,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 1.101222702110834,
|
|
"grad_norm": 0.7224586606025696,
|
|
"learning_rate": 1.5004127332095579e-05,
|
|
"loss": 0.72933429479599,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 1.102520429060973,
|
|
"grad_norm": 0.7530233263969421,
|
|
"learning_rate": 1.49917419288348e-05,
|
|
"loss": 0.7607170343399048,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 1.1038181560111118,
|
|
"grad_norm": 0.7433916926383972,
|
|
"learning_rate": 1.4979346318725203e-05,
|
|
"loss": 0.7284337282180786,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 1.1051158829612506,
|
|
"grad_norm": 0.7271002531051636,
|
|
"learning_rate": 1.4966940527112679e-05,
|
|
"loss": 0.7452124357223511,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 1.1064136099113895,
|
|
"grad_norm": 0.7177510857582092,
|
|
"learning_rate": 1.4954524579363932e-05,
|
|
"loss": 0.7781730890274048,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 1.1077113368615286,
|
|
"grad_norm": 0.7278553247451782,
|
|
"learning_rate": 1.4942098500866428e-05,
|
|
"loss": 0.760970413684845,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 1.1090090638116674,
|
|
"grad_norm": 0.7369382977485657,
|
|
"learning_rate": 1.4929662317028359e-05,
|
|
"loss": 0.7270724177360535,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 1.1103067907618063,
|
|
"grad_norm": 0.7529125213623047,
|
|
"learning_rate": 1.491721605327857e-05,
|
|
"loss": 0.6972394585609436,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 1.1116045177119451,
|
|
"grad_norm": 0.8102325201034546,
|
|
"learning_rate": 1.490475973506652e-05,
|
|
"loss": 0.7593643069267273,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 1.112902244662084,
|
|
"grad_norm": 0.7033381462097168,
|
|
"learning_rate": 1.4892293387862221e-05,
|
|
"loss": 0.750421404838562,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 1.114199971612223,
|
|
"grad_norm": 0.7504622340202332,
|
|
"learning_rate": 1.487981703715621e-05,
|
|
"loss": 0.7422147989273071,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 1.115497698562362,
|
|
"grad_norm": 0.7424933910369873,
|
|
"learning_rate": 1.4867330708459463e-05,
|
|
"loss": 0.7375016212463379,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 1.1167954255125008,
|
|
"grad_norm": 0.73978191614151,
|
|
"learning_rate": 1.4854834427303353e-05,
|
|
"loss": 0.7315906286239624,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 1.1180931524626396,
|
|
"grad_norm": 0.7480568289756775,
|
|
"learning_rate": 1.4842328219239618e-05,
|
|
"loss": 0.7146769762039185,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 1.1193908794127785,
|
|
"grad_norm": 0.6838370561599731,
|
|
"learning_rate": 1.4829812109840291e-05,
|
|
"loss": 0.6863071918487549,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 1.1206886063629176,
|
|
"grad_norm": 0.69765305519104,
|
|
"learning_rate": 1.4817286124697647e-05,
|
|
"loss": 0.6740079522132874,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 1.1219863333130564,
|
|
"grad_norm": 0.7375463843345642,
|
|
"learning_rate": 1.480475028942415e-05,
|
|
"loss": 0.7721714973449707,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 1.1232840602631953,
|
|
"grad_norm": 0.7765669226646423,
|
|
"learning_rate": 1.4792204629652414e-05,
|
|
"loss": 0.6988716125488281,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 1.1245817872133341,
|
|
"grad_norm": 0.6921293139457703,
|
|
"learning_rate": 1.4779649171035138e-05,
|
|
"loss": 0.7338443398475647,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 1.125879514163473,
|
|
"grad_norm": 0.7645788192749023,
|
|
"learning_rate": 1.4767083939245055e-05,
|
|
"loss": 0.7597560882568359,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 1.1271772411136118,
|
|
"grad_norm": 0.7806273698806763,
|
|
"learning_rate": 1.475450895997489e-05,
|
|
"loss": 0.7360360026359558,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 1.128474968063751,
|
|
"grad_norm": 0.7329487204551697,
|
|
"learning_rate": 1.4741924258937283e-05,
|
|
"loss": 0.694042980670929,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 1.1297726950138898,
|
|
"grad_norm": 0.7490030527114868,
|
|
"learning_rate": 1.472932986186477e-05,
|
|
"loss": 0.771519660949707,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 1.1310704219640286,
|
|
"grad_norm": 0.7821305990219116,
|
|
"learning_rate": 1.47167257945097e-05,
|
|
"loss": 0.7572095990180969,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 1.1323681489141675,
|
|
"grad_norm": 0.745883584022522,
|
|
"learning_rate": 1.4704112082644207e-05,
|
|
"loss": 0.7173527479171753,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 1.1336658758643063,
|
|
"grad_norm": 0.7457818984985352,
|
|
"learning_rate": 1.4691488752060132e-05,
|
|
"loss": 0.7411136031150818,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 1.1349636028144454,
|
|
"grad_norm": 0.7116679549217224,
|
|
"learning_rate": 1.4678855828568996e-05,
|
|
"loss": 0.6630608439445496,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 1.1362613297645843,
|
|
"grad_norm": 0.7429471611976624,
|
|
"learning_rate": 1.4666213338001929e-05,
|
|
"loss": 0.6890819668769836,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 1.1375590567147231,
|
|
"grad_norm": 0.7173399925231934,
|
|
"learning_rate": 1.4653561306209625e-05,
|
|
"loss": 0.7061414122581482,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 1.138856783664862,
|
|
"grad_norm": 0.7341779470443726,
|
|
"learning_rate": 1.4640899759062285e-05,
|
|
"loss": 0.7564276456832886,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 1.1401545106150008,
|
|
"grad_norm": 0.73567795753479,
|
|
"learning_rate": 1.462822872244957e-05,
|
|
"loss": 0.7193140983581543,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 1.1414522375651397,
|
|
"grad_norm": 0.7359784841537476,
|
|
"learning_rate": 1.461554822228054e-05,
|
|
"loss": 0.724113941192627,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 1.1427499645152788,
|
|
"grad_norm": 0.6934400200843811,
|
|
"learning_rate": 1.460285828448361e-05,
|
|
"loss": 0.6648344397544861,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 1.1440476914654176,
|
|
"grad_norm": 0.6720191836357117,
|
|
"learning_rate": 1.4590158935006494e-05,
|
|
"loss": 0.6355569362640381,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 1.1453454184155565,
|
|
"grad_norm": 0.7342029809951782,
|
|
"learning_rate": 1.4577450199816142e-05,
|
|
"loss": 0.7470182180404663,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 1.1466431453656953,
|
|
"grad_norm": 0.7566630244255066,
|
|
"learning_rate": 1.4564732104898702e-05,
|
|
"loss": 0.7848218679428101,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 1.1479408723158344,
|
|
"grad_norm": 0.6953855752944946,
|
|
"learning_rate": 1.4552004676259462e-05,
|
|
"loss": 0.7087516784667969,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 1.1492385992659733,
|
|
"grad_norm": 0.7306509613990784,
|
|
"learning_rate": 1.453926793992279e-05,
|
|
"loss": 0.7669079303741455,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 1.1505363262161121,
|
|
"grad_norm": 0.7278076410293579,
|
|
"learning_rate": 1.4526521921932091e-05,
|
|
"loss": 0.7629184722900391,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 1.151834053166251,
|
|
"grad_norm": 0.7405791878700256,
|
|
"learning_rate": 1.4513766648349742e-05,
|
|
"loss": 0.6739349961280823,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 1.1531317801163898,
|
|
"grad_norm": 0.7238565683364868,
|
|
"learning_rate": 1.4501002145257048e-05,
|
|
"loss": 0.7271534204483032,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 1.1544295070665287,
|
|
"grad_norm": 0.6887433528900146,
|
|
"learning_rate": 1.4488228438754191e-05,
|
|
"loss": 0.7166074514389038,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 1.1557272340166678,
|
|
"grad_norm": 0.7274357676506042,
|
|
"learning_rate": 1.4475445554960166e-05,
|
|
"loss": 0.7644513845443726,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 1.1570249609668066,
|
|
"grad_norm": 0.7332258224487305,
|
|
"learning_rate": 1.4462653520012736e-05,
|
|
"loss": 0.7806090116500854,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 1.1583226879169455,
|
|
"grad_norm": 0.7651371359825134,
|
|
"learning_rate": 1.4449852360068372e-05,
|
|
"loss": 0.774925947189331,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 1.1596204148670843,
|
|
"grad_norm": 0.718445897102356,
|
|
"learning_rate": 1.4437042101302212e-05,
|
|
"loss": 0.7388082146644592,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 1.1609181418172232,
|
|
"grad_norm": 0.7201905250549316,
|
|
"learning_rate": 1.4424222769907985e-05,
|
|
"loss": 0.6872411966323853,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 1.1622158687673623,
|
|
"grad_norm": 0.7322660088539124,
|
|
"learning_rate": 1.4411394392097985e-05,
|
|
"loss": 0.7020053267478943,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 1.1635135957175011,
|
|
"grad_norm": 0.7322126626968384,
|
|
"learning_rate": 1.4398556994102996e-05,
|
|
"loss": 0.746367335319519,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 1.16481132266764,
|
|
"grad_norm": 0.7316040992736816,
|
|
"learning_rate": 1.4385710602172245e-05,
|
|
"loss": 0.7530633807182312,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 1.1661090496177788,
|
|
"grad_norm": 0.7623510360717773,
|
|
"learning_rate": 1.4372855242573356e-05,
|
|
"loss": 0.7122158408164978,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 1.1674067765679177,
|
|
"grad_norm": 0.7587069869041443,
|
|
"learning_rate": 1.4359990941592283e-05,
|
|
"loss": 0.7452347278594971,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 1.1687045035180565,
|
|
"grad_norm": 0.7146732807159424,
|
|
"learning_rate": 1.4347117725533269e-05,
|
|
"loss": 0.670911431312561,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 1.1700022304681956,
|
|
"grad_norm": 0.6925002932548523,
|
|
"learning_rate": 1.4334235620718774e-05,
|
|
"loss": 0.6600379943847656,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 1.1712999574183345,
|
|
"grad_norm": 0.7344015836715698,
|
|
"learning_rate": 1.4321344653489453e-05,
|
|
"loss": 0.7038690447807312,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 1.1725976843684733,
|
|
"grad_norm": 0.7387973070144653,
|
|
"learning_rate": 1.4308444850204066e-05,
|
|
"loss": 0.7008363604545593,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 1.1738954113186122,
|
|
"grad_norm": 0.7728487849235535,
|
|
"learning_rate": 1.4295536237239445e-05,
|
|
"loss": 0.7336927652359009,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 1.175193138268751,
|
|
"grad_norm": 0.7491990923881531,
|
|
"learning_rate": 1.4282618840990438e-05,
|
|
"loss": 0.7324055433273315,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 1.1764908652188901,
|
|
"grad_norm": 0.723862886428833,
|
|
"learning_rate": 1.4269692687869849e-05,
|
|
"loss": 0.7677553296089172,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 1.177788592169029,
|
|
"grad_norm": 0.7578226923942566,
|
|
"learning_rate": 1.425675780430839e-05,
|
|
"loss": 0.7772313356399536,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 1.1790863191191678,
|
|
"grad_norm": 0.7269909977912903,
|
|
"learning_rate": 1.4243814216754626e-05,
|
|
"loss": 0.7330427765846252,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 1.1803840460693067,
|
|
"grad_norm": 0.7582956552505493,
|
|
"learning_rate": 1.4230861951674914e-05,
|
|
"loss": 0.7717634439468384,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 1.1816817730194455,
|
|
"grad_norm": 0.7162467837333679,
|
|
"learning_rate": 1.421790103555336e-05,
|
|
"loss": 0.7092885375022888,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 1.1829794999695844,
|
|
"grad_norm": 0.743224024772644,
|
|
"learning_rate": 1.4204931494891759e-05,
|
|
"loss": 0.7082977294921875,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 1.1842772269197235,
|
|
"grad_norm": 0.7687066197395325,
|
|
"learning_rate": 1.4191953356209535e-05,
|
|
"loss": 0.7173585295677185,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 1.1855749538698623,
|
|
"grad_norm": 0.7276656627655029,
|
|
"learning_rate": 1.4178966646043702e-05,
|
|
"loss": 0.6923103928565979,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 1.1868726808200012,
|
|
"grad_norm": 0.7307775020599365,
|
|
"learning_rate": 1.4165971390948787e-05,
|
|
"loss": 0.7817268967628479,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 1.18817040777014,
|
|
"grad_norm": 0.7706684470176697,
|
|
"learning_rate": 1.4152967617496805e-05,
|
|
"loss": 0.7029048800468445,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 1.1894681347202791,
|
|
"grad_norm": 0.7382630705833435,
|
|
"learning_rate": 1.4139955352277176e-05,
|
|
"loss": 0.6833078265190125,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 1.190765861670418,
|
|
"grad_norm": 0.6961492300033569,
|
|
"learning_rate": 1.4126934621896692e-05,
|
|
"loss": 0.6633516550064087,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 1.1920635886205568,
|
|
"grad_norm": 0.7289763689041138,
|
|
"learning_rate": 1.4113905452979455e-05,
|
|
"loss": 0.7273116707801819,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 1.1933613155706957,
|
|
"grad_norm": 0.6953696608543396,
|
|
"learning_rate": 1.410086787216681e-05,
|
|
"loss": 0.6880172491073608,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.1933613155706957,
|
|
"eval_loss": 0.7621704339981079,
|
|
"eval_runtime": 143.9146,
|
|
"eval_samples_per_second": 36.077,
|
|
"eval_steps_per_second": 9.019,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.1946590425208345,
|
|
"grad_norm": 0.6652716398239136,
|
|
"learning_rate": 1.4087821906117314e-05,
|
|
"loss": 0.6670587658882141,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 1.1959567694709734,
|
|
"grad_norm": 0.7497081756591797,
|
|
"learning_rate": 1.4074767581506666e-05,
|
|
"loss": 0.7381057739257812,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 1.1972544964211125,
|
|
"grad_norm": 0.710457444190979,
|
|
"learning_rate": 1.4061704925027653e-05,
|
|
"loss": 0.6957287192344666,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 1.1985522233712513,
|
|
"grad_norm": 0.7493513226509094,
|
|
"learning_rate": 1.4048633963390105e-05,
|
|
"loss": 0.6821112036705017,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 1.1998499503213902,
|
|
"grad_norm": 0.7443753480911255,
|
|
"learning_rate": 1.4035554723320828e-05,
|
|
"loss": 0.7110794186592102,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 1.201147677271529,
|
|
"grad_norm": 0.6964433789253235,
|
|
"learning_rate": 1.4022467231563554e-05,
|
|
"loss": 0.6899577379226685,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 1.202445404221668,
|
|
"grad_norm": 0.718528687953949,
|
|
"learning_rate": 1.4009371514878898e-05,
|
|
"loss": 0.7851035594940186,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 1.203743131171807,
|
|
"grad_norm": 0.7249849438667297,
|
|
"learning_rate": 1.399626760004428e-05,
|
|
"loss": 0.7298780679702759,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 1.2050408581219458,
|
|
"grad_norm": 0.6934380531311035,
|
|
"learning_rate": 1.3983155513853897e-05,
|
|
"loss": 0.7791250944137573,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 1.2063385850720847,
|
|
"grad_norm": 0.704552173614502,
|
|
"learning_rate": 1.3970035283118639e-05,
|
|
"loss": 0.7045942544937134,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 1.2076363120222235,
|
|
"grad_norm": 0.748252809047699,
|
|
"learning_rate": 1.3956906934666056e-05,
|
|
"loss": 0.7210633158683777,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 1.2089340389723624,
|
|
"grad_norm": 0.7162604331970215,
|
|
"learning_rate": 1.3943770495340307e-05,
|
|
"loss": 0.7707422375679016,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 1.2102317659225013,
|
|
"grad_norm": 0.6919230222702026,
|
|
"learning_rate": 1.3930625992002076e-05,
|
|
"loss": 0.7039645910263062,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 1.2115294928726403,
|
|
"grad_norm": 0.7416049242019653,
|
|
"learning_rate": 1.391747345152855e-05,
|
|
"loss": 0.7351235747337341,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 1.2128272198227792,
|
|
"grad_norm": 0.7046512961387634,
|
|
"learning_rate": 1.3904312900813345e-05,
|
|
"loss": 0.659813642501831,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 1.214124946772918,
|
|
"grad_norm": 0.6865445971488953,
|
|
"learning_rate": 1.3891144366766457e-05,
|
|
"loss": 0.6879123449325562,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 1.215422673723057,
|
|
"grad_norm": 0.7112798094749451,
|
|
"learning_rate": 1.3877967876314205e-05,
|
|
"loss": 0.745692789554596,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 1.216720400673196,
|
|
"grad_norm": 0.7131559252738953,
|
|
"learning_rate": 1.3864783456399174e-05,
|
|
"loss": 0.7047199010848999,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 1.2180181276233348,
|
|
"grad_norm": 0.7183334231376648,
|
|
"learning_rate": 1.3851591133980167e-05,
|
|
"loss": 0.7335140109062195,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 1.2193158545734737,
|
|
"grad_norm": 0.7161308526992798,
|
|
"learning_rate": 1.3838390936032146e-05,
|
|
"loss": 0.6805643439292908,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 1.2206135815236125,
|
|
"grad_norm": 0.6899462938308716,
|
|
"learning_rate": 1.3825182889546173e-05,
|
|
"loss": 0.6711665391921997,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 1.2219113084737514,
|
|
"grad_norm": 0.7179728150367737,
|
|
"learning_rate": 1.3811967021529362e-05,
|
|
"loss": 0.730987012386322,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 1.2232090354238903,
|
|
"grad_norm": 0.7028578519821167,
|
|
"learning_rate": 1.3798743359004816e-05,
|
|
"loss": 0.7164129614830017,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 1.2245067623740293,
|
|
"grad_norm": 0.7241238355636597,
|
|
"learning_rate": 1.378551192901158e-05,
|
|
"loss": 0.6604956984519958,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 1.2258044893241682,
|
|
"grad_norm": 0.6871349215507507,
|
|
"learning_rate": 1.3772272758604576e-05,
|
|
"loss": 0.705906093120575,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 1.227102216274307,
|
|
"grad_norm": 0.7182629108428955,
|
|
"learning_rate": 1.375902587485456e-05,
|
|
"loss": 0.6978931427001953,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 1.228399943224446,
|
|
"grad_norm": 0.7523950934410095,
|
|
"learning_rate": 1.3745771304848056e-05,
|
|
"loss": 0.669691264629364,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 1.2296976701745848,
|
|
"grad_norm": 0.736535906791687,
|
|
"learning_rate": 1.3732509075687302e-05,
|
|
"loss": 0.6971163749694824,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 1.2309953971247238,
|
|
"grad_norm": 0.773280143737793,
|
|
"learning_rate": 1.3719239214490203e-05,
|
|
"loss": 0.7307339906692505,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 1.2322931240748627,
|
|
"grad_norm": 0.7597857713699341,
|
|
"learning_rate": 1.3705961748390264e-05,
|
|
"loss": 0.6916163563728333,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.2335908510250015,
|
|
"grad_norm": 0.7426233291625977,
|
|
"learning_rate": 1.3692676704536547e-05,
|
|
"loss": 0.7779046297073364,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 1.2348885779751404,
|
|
"grad_norm": 0.7428677082061768,
|
|
"learning_rate": 1.3679384110093601e-05,
|
|
"loss": 0.7056743502616882,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 1.2361863049252793,
|
|
"grad_norm": 0.7308823466300964,
|
|
"learning_rate": 1.3666083992241414e-05,
|
|
"loss": 0.7445065379142761,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 1.2374840318754181,
|
|
"grad_norm": 0.7000466585159302,
|
|
"learning_rate": 1.3652776378175366e-05,
|
|
"loss": 0.7621708512306213,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 1.2387817588255572,
|
|
"grad_norm": 0.7069138288497925,
|
|
"learning_rate": 1.3639461295106157e-05,
|
|
"loss": 0.6963789463043213,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 1.240079485775696,
|
|
"grad_norm": 0.7114101052284241,
|
|
"learning_rate": 1.3626138770259765e-05,
|
|
"loss": 0.6562871932983398,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 1.241377212725835,
|
|
"grad_norm": 0.7246086597442627,
|
|
"learning_rate": 1.3612808830877377e-05,
|
|
"loss": 0.6914277672767639,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 1.2426749396759738,
|
|
"grad_norm": 0.7212405800819397,
|
|
"learning_rate": 1.3599471504215347e-05,
|
|
"loss": 0.7332183122634888,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 1.2439726666261126,
|
|
"grad_norm": 0.725243866443634,
|
|
"learning_rate": 1.358612681754513e-05,
|
|
"loss": 0.7095848321914673,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 1.2452703935762517,
|
|
"grad_norm": 0.7690359354019165,
|
|
"learning_rate": 1.357277479815324e-05,
|
|
"loss": 0.7376914024353027,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 1.2465681205263905,
|
|
"grad_norm": 0.7036330699920654,
|
|
"learning_rate": 1.355941547334117e-05,
|
|
"loss": 0.6845636367797852,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 1.2478658474765294,
|
|
"grad_norm": 0.7338976860046387,
|
|
"learning_rate": 1.3546048870425356e-05,
|
|
"loss": 0.6979953050613403,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 1.2491635744266683,
|
|
"grad_norm": 0.7343106865882874,
|
|
"learning_rate": 1.3532675016737127e-05,
|
|
"loss": 0.7461492419242859,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 1.250461301376807,
|
|
"grad_norm": 0.7208863496780396,
|
|
"learning_rate": 1.3519293939622622e-05,
|
|
"loss": 0.8038127422332764,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 1.251759028326946,
|
|
"grad_norm": 0.7410427331924438,
|
|
"learning_rate": 1.3505905666442757e-05,
|
|
"loss": 0.7741251587867737,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 1.253056755277085,
|
|
"grad_norm": 0.711874783039093,
|
|
"learning_rate": 1.3492510224573165e-05,
|
|
"loss": 0.6908672451972961,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 1.254354482227224,
|
|
"grad_norm": 0.6897700428962708,
|
|
"learning_rate": 1.3479107641404134e-05,
|
|
"loss": 0.6856587529182434,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 1.2556522091773628,
|
|
"grad_norm": 0.6764082908630371,
|
|
"learning_rate": 1.3465697944340552e-05,
|
|
"loss": 0.6477972865104675,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 1.2569499361275016,
|
|
"grad_norm": 0.7004117965698242,
|
|
"learning_rate": 1.3452281160801856e-05,
|
|
"loss": 0.7135658264160156,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 1.2582476630776407,
|
|
"grad_norm": 0.7178849577903748,
|
|
"learning_rate": 1.3438857318221974e-05,
|
|
"loss": 0.7354244589805603,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 1.2595453900277795,
|
|
"grad_norm": 0.7121056318283081,
|
|
"learning_rate": 1.3425426444049265e-05,
|
|
"loss": 0.7121109962463379,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 1.2608431169779184,
|
|
"grad_norm": 0.8285553455352783,
|
|
"learning_rate": 1.3411988565746467e-05,
|
|
"loss": 0.7759053111076355,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 1.2621408439280573,
|
|
"grad_norm": 0.6977941989898682,
|
|
"learning_rate": 1.3398543710790642e-05,
|
|
"loss": 0.7189201712608337,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 1.263438570878196,
|
|
"grad_norm": 0.7547982931137085,
|
|
"learning_rate": 1.3385091906673115e-05,
|
|
"loss": 0.7352871298789978,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 1.264736297828335,
|
|
"grad_norm": 0.7178804278373718,
|
|
"learning_rate": 1.3371633180899417e-05,
|
|
"loss": 0.7920108437538147,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 1.2660340247784738,
|
|
"grad_norm": 0.7035505771636963,
|
|
"learning_rate": 1.335816756098924e-05,
|
|
"loss": 0.7362672090530396,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 1.267331751728613,
|
|
"grad_norm": 0.7581067681312561,
|
|
"learning_rate": 1.3344695074476365e-05,
|
|
"loss": 0.7702075839042664,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 1.2686294786787518,
|
|
"grad_norm": 0.7533540725708008,
|
|
"learning_rate": 1.3331215748908622e-05,
|
|
"loss": 0.7555018067359924,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 1.2699272056288906,
|
|
"grad_norm": 0.7056939601898193,
|
|
"learning_rate": 1.3317729611847818e-05,
|
|
"loss": 0.7297285795211792,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 1.2712249325790295,
|
|
"grad_norm": 0.7933931946754456,
|
|
"learning_rate": 1.3304236690869688e-05,
|
|
"loss": 0.7637395262718201,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 1.2725226595291685,
|
|
"grad_norm": 0.7511240243911743,
|
|
"learning_rate": 1.329073701356384e-05,
|
|
"loss": 0.7278518676757812,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 1.2738203864793074,
|
|
"grad_norm": 0.6915922164916992,
|
|
"learning_rate": 1.3277230607533698e-05,
|
|
"loss": 0.6694924831390381,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 1.2751181134294463,
|
|
"grad_norm": 0.7327374219894409,
|
|
"learning_rate": 1.3263717500396446e-05,
|
|
"loss": 0.714762806892395,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 1.276415840379585,
|
|
"grad_norm": 0.7382856607437134,
|
|
"learning_rate": 1.3250197719782966e-05,
|
|
"loss": 0.7134686708450317,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 1.277713567329724,
|
|
"grad_norm": 0.7472854256629944,
|
|
"learning_rate": 1.3236671293337788e-05,
|
|
"loss": 0.7220948934555054,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 1.2790112942798628,
|
|
"grad_norm": 0.7201051712036133,
|
|
"learning_rate": 1.3223138248719032e-05,
|
|
"loss": 0.7394418120384216,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 1.280309021230002,
|
|
"grad_norm": 0.7629786133766174,
|
|
"learning_rate": 1.3209598613598344e-05,
|
|
"loss": 0.7015069127082825,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 1.2816067481801408,
|
|
"grad_norm": 0.7126546502113342,
|
|
"learning_rate": 1.3196052415660856e-05,
|
|
"loss": 0.7289220690727234,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 1.2829044751302796,
|
|
"grad_norm": 0.7296859622001648,
|
|
"learning_rate": 1.318249968260511e-05,
|
|
"loss": 0.7893659472465515,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 1.2842022020804185,
|
|
"grad_norm": 0.7498401403427124,
|
|
"learning_rate": 1.316894044214302e-05,
|
|
"loss": 0.7200069427490234,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 1.2854999290305575,
|
|
"grad_norm": 0.7126410603523254,
|
|
"learning_rate": 1.3155374721999797e-05,
|
|
"loss": 0.7033067345619202,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 1.2867976559806964,
|
|
"grad_norm": 0.7097041606903076,
|
|
"learning_rate": 1.3141802549913907e-05,
|
|
"loss": 0.7358456254005432,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 1.2880953829308353,
|
|
"grad_norm": 0.6961123943328857,
|
|
"learning_rate": 1.3128223953637003e-05,
|
|
"loss": 0.6741704940795898,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 1.289393109880974,
|
|
"grad_norm": 0.7323908805847168,
|
|
"learning_rate": 1.3114638960933883e-05,
|
|
"loss": 0.8081434965133667,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 1.290690836831113,
|
|
"grad_norm": 0.713190495967865,
|
|
"learning_rate": 1.3101047599582415e-05,
|
|
"loss": 0.7475412487983704,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 1.2919885637812518,
|
|
"grad_norm": 0.7204756140708923,
|
|
"learning_rate": 1.3087449897373494e-05,
|
|
"loss": 0.7166237831115723,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 1.2932862907313907,
|
|
"grad_norm": 0.7209048271179199,
|
|
"learning_rate": 1.307384588211098e-05,
|
|
"loss": 0.7091537117958069,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 1.2945840176815298,
|
|
"grad_norm": 0.7139458656311035,
|
|
"learning_rate": 1.306023558161164e-05,
|
|
"loss": 0.7146654725074768,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 1.2958817446316686,
|
|
"grad_norm": 0.7128956317901611,
|
|
"learning_rate": 1.3046619023705095e-05,
|
|
"loss": 0.821353018283844,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 1.2971794715818075,
|
|
"grad_norm": 0.7287904620170593,
|
|
"learning_rate": 1.3032996236233756e-05,
|
|
"loss": 0.7813044786453247,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 1.2984771985319463,
|
|
"grad_norm": 0.7277258038520813,
|
|
"learning_rate": 1.3019367247052781e-05,
|
|
"loss": 0.7448681592941284,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 1.2997749254820854,
|
|
"grad_norm": 0.7179688811302185,
|
|
"learning_rate": 1.300573208403e-05,
|
|
"loss": 0.6965285539627075,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 1.3010726524322243,
|
|
"grad_norm": 0.7211664319038391,
|
|
"learning_rate": 1.2992090775045868e-05,
|
|
"loss": 0.7049282789230347,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 1.302370379382363,
|
|
"grad_norm": 0.6898071765899658,
|
|
"learning_rate": 1.2978443347993415e-05,
|
|
"loss": 0.6415733695030212,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 1.303668106332502,
|
|
"grad_norm": 0.7255175709724426,
|
|
"learning_rate": 1.296478983077817e-05,
|
|
"loss": 0.708603024482727,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 1.3049658332826408,
|
|
"grad_norm": 0.7339725494384766,
|
|
"learning_rate": 1.2951130251318125e-05,
|
|
"loss": 0.73588627576828,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 1.3062635602327797,
|
|
"grad_norm": 0.6914424300193787,
|
|
"learning_rate": 1.2937464637543655e-05,
|
|
"loss": 0.7236727476119995,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 1.3075612871829188,
|
|
"grad_norm": 0.6850101351737976,
|
|
"learning_rate": 1.2923793017397488e-05,
|
|
"loss": 0.6565558910369873,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 1.3088590141330576,
|
|
"grad_norm": 0.6893193125724792,
|
|
"learning_rate": 1.2910115418834624e-05,
|
|
"loss": 0.6460487246513367,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 1.3101567410831965,
|
|
"grad_norm": 0.7375558018684387,
|
|
"learning_rate": 1.289643186982229e-05,
|
|
"loss": 0.8016327619552612,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 1.3114544680333353,
|
|
"grad_norm": 0.7113102078437805,
|
|
"learning_rate": 1.2882742398339884e-05,
|
|
"loss": 0.6883566975593567,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 1.3127521949834744,
|
|
"grad_norm": 0.7452290058135986,
|
|
"learning_rate": 1.2869047032378905e-05,
|
|
"loss": 0.7325704097747803,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 1.3140499219336133,
|
|
"grad_norm": 0.6935728192329407,
|
|
"learning_rate": 1.2855345799942915e-05,
|
|
"loss": 0.689193606376648,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 1.315347648883752,
|
|
"grad_norm": 0.7144383192062378,
|
|
"learning_rate": 1.2841638729047463e-05,
|
|
"loss": 0.6948485374450684,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 1.316645375833891,
|
|
"grad_norm": 0.6706473231315613,
|
|
"learning_rate": 1.2827925847720041e-05,
|
|
"loss": 0.7062092423439026,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 1.3179431027840298,
|
|
"grad_norm": 0.7125740051269531,
|
|
"learning_rate": 1.2814207184000018e-05,
|
|
"loss": 0.6752945780754089,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 1.3192408297341687,
|
|
"grad_norm": 0.7221876978874207,
|
|
"learning_rate": 1.2800482765938594e-05,
|
|
"loss": 0.7700286507606506,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 1.3205385566843075,
|
|
"grad_norm": 0.6877630949020386,
|
|
"learning_rate": 1.2786752621598726e-05,
|
|
"loss": 0.7289664149284363,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 1.3218362836344466,
|
|
"grad_norm": 0.7257193922996521,
|
|
"learning_rate": 1.2773016779055089e-05,
|
|
"loss": 0.6938936710357666,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 1.3231340105845855,
|
|
"grad_norm": 0.6880965828895569,
|
|
"learning_rate": 1.2759275266393998e-05,
|
|
"loss": 0.6982592344284058,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 1.3244317375347243,
|
|
"grad_norm": 0.683870792388916,
|
|
"learning_rate": 1.2745528111713373e-05,
|
|
"loss": 0.6983235478401184,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 1.3257294644848632,
|
|
"grad_norm": 0.7127654552459717,
|
|
"learning_rate": 1.2731775343122663e-05,
|
|
"loss": 0.7544030547142029,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 1.3270271914350023,
|
|
"grad_norm": 0.7284364104270935,
|
|
"learning_rate": 1.2718016988742799e-05,
|
|
"loss": 0.7375183701515198,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 1.328324918385141,
|
|
"grad_norm": 0.6857113838195801,
|
|
"learning_rate": 1.270425307670614e-05,
|
|
"loss": 0.6983596682548523,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 1.32962264533528,
|
|
"grad_norm": 0.7102038860321045,
|
|
"learning_rate": 1.2690483635156392e-05,
|
|
"loss": 0.7385768294334412,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 1.3309203722854188,
|
|
"grad_norm": 0.7345147728919983,
|
|
"learning_rate": 1.2676708692248583e-05,
|
|
"loss": 0.6854493618011475,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 1.3322180992355577,
|
|
"grad_norm": 0.7039386630058289,
|
|
"learning_rate": 1.2662928276148985e-05,
|
|
"loss": 0.7170513868331909,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 1.3335158261856965,
|
|
"grad_norm": 0.6941388845443726,
|
|
"learning_rate": 1.264914241503506e-05,
|
|
"loss": 0.7566976547241211,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 1.3348135531358354,
|
|
"grad_norm": 0.6874922513961792,
|
|
"learning_rate": 1.2635351137095408e-05,
|
|
"loss": 0.6834582686424255,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 1.3361112800859745,
|
|
"grad_norm": 0.7201216220855713,
|
|
"learning_rate": 1.2621554470529698e-05,
|
|
"loss": 0.734821617603302,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 1.3374090070361133,
|
|
"grad_norm": 0.7032731175422668,
|
|
"learning_rate": 1.2607752443548622e-05,
|
|
"loss": 0.7255396842956543,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 1.3387067339862522,
|
|
"grad_norm": 0.7893847823143005,
|
|
"learning_rate": 1.259394508437383e-05,
|
|
"loss": 0.7393696308135986,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 1.340004460936391,
|
|
"grad_norm": 0.7231351137161255,
|
|
"learning_rate": 1.2580132421237883e-05,
|
|
"loss": 0.7424145340919495,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 1.34130218788653,
|
|
"grad_norm": 0.7326940298080444,
|
|
"learning_rate": 1.2566314482384174e-05,
|
|
"loss": 0.7439311742782593,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 1.342599914836669,
|
|
"grad_norm": 0.775790810585022,
|
|
"learning_rate": 1.2552491296066895e-05,
|
|
"loss": 0.7325758934020996,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 1.3438976417868078,
|
|
"grad_norm": 0.7467171549797058,
|
|
"learning_rate": 1.2538662890550959e-05,
|
|
"loss": 0.7975653409957886,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 1.3451953687369467,
|
|
"grad_norm": 0.762482225894928,
|
|
"learning_rate": 1.252482929411196e-05,
|
|
"loss": 0.7613498568534851,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 1.3464930956870855,
|
|
"grad_norm": 0.6938416957855225,
|
|
"learning_rate": 1.25109905350361e-05,
|
|
"loss": 0.691423773765564,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 1.3477908226372244,
|
|
"grad_norm": 0.7459502816200256,
|
|
"learning_rate": 1.249714664162014e-05,
|
|
"loss": 0.7226969003677368,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 1.3490885495873635,
|
|
"grad_norm": 0.7236127853393555,
|
|
"learning_rate": 1.2483297642171332e-05,
|
|
"loss": 0.7204033732414246,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 1.3503862765375023,
|
|
"grad_norm": 0.7287815809249878,
|
|
"learning_rate": 1.246944356500738e-05,
|
|
"loss": 0.7803208231925964,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 1.3516840034876412,
|
|
"grad_norm": 0.7607238292694092,
|
|
"learning_rate": 1.2455584438456366e-05,
|
|
"loss": 0.7617399096488953,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 1.35298173043778,
|
|
"grad_norm": 0.707085907459259,
|
|
"learning_rate": 1.2441720290856694e-05,
|
|
"loss": 0.7277243733406067,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 1.354279457387919,
|
|
"grad_norm": 0.7148833274841309,
|
|
"learning_rate": 1.2427851150557036e-05,
|
|
"loss": 0.7467551231384277,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 1.355577184338058,
|
|
"grad_norm": 0.7209689617156982,
|
|
"learning_rate": 1.241397704591627e-05,
|
|
"loss": 0.6694290637969971,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 1.3568749112881968,
|
|
"grad_norm": 0.7720620036125183,
|
|
"learning_rate": 1.2400098005303436e-05,
|
|
"loss": 0.7658464312553406,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 1.3581726382383357,
|
|
"grad_norm": 0.68074631690979,
|
|
"learning_rate": 1.238621405709766e-05,
|
|
"loss": 0.6357854008674622,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 1.3594703651884745,
|
|
"grad_norm": 0.7629329562187195,
|
|
"learning_rate": 1.2372325229688093e-05,
|
|
"loss": 0.7309067249298096,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 1.3607680921386134,
|
|
"grad_norm": 0.7004507184028625,
|
|
"learning_rate": 1.235843155147388e-05,
|
|
"loss": 0.6715525388717651,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 1.3620658190887522,
|
|
"grad_norm": 0.6997591853141785,
|
|
"learning_rate": 1.2344533050864071e-05,
|
|
"loss": 0.6700186729431152,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 1.3633635460388913,
|
|
"grad_norm": 0.7181966304779053,
|
|
"learning_rate": 1.2330629756277588e-05,
|
|
"loss": 0.6444705724716187,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 1.3646612729890302,
|
|
"grad_norm": 0.780085563659668,
|
|
"learning_rate": 1.2316721696143141e-05,
|
|
"loss": 0.7659810185432434,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 1.365958999939169,
|
|
"grad_norm": 0.690724790096283,
|
|
"learning_rate": 1.23028088988992e-05,
|
|
"loss": 0.6315090656280518,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 1.3672567268893079,
|
|
"grad_norm": 0.7686077356338501,
|
|
"learning_rate": 1.228889139299391e-05,
|
|
"loss": 0.8060528039932251,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 1.368554453839447,
|
|
"grad_norm": 0.7056965827941895,
|
|
"learning_rate": 1.2274969206885048e-05,
|
|
"loss": 0.6794640421867371,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 1.3698521807895858,
|
|
"grad_norm": 0.7886383533477783,
|
|
"learning_rate": 1.2261042369039966e-05,
|
|
"loss": 0.7453962564468384,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 1.3711499077397247,
|
|
"grad_norm": 0.6753075122833252,
|
|
"learning_rate": 1.2247110907935518e-05,
|
|
"loss": 0.6878754496574402,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 1.3724476346898635,
|
|
"grad_norm": 0.670427143573761,
|
|
"learning_rate": 1.2233174852058015e-05,
|
|
"loss": 0.6822103261947632,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 1.3737453616400024,
|
|
"grad_norm": 0.725235641002655,
|
|
"learning_rate": 1.2219234229903163e-05,
|
|
"loss": 0.7130811810493469,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 1.3750430885901412,
|
|
"grad_norm": 0.7341755032539368,
|
|
"learning_rate": 1.2205289069976012e-05,
|
|
"loss": 0.6956161856651306,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 1.37634081554028,
|
|
"grad_norm": 0.7005776166915894,
|
|
"learning_rate": 1.2191339400790881e-05,
|
|
"loss": 0.6915519833564758,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 1.3776385424904192,
|
|
"grad_norm": 0.7250275015830994,
|
|
"learning_rate": 1.2177385250871312e-05,
|
|
"loss": 0.7210217118263245,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 1.378936269440558,
|
|
"grad_norm": 0.7169617414474487,
|
|
"learning_rate": 1.2163426648750009e-05,
|
|
"loss": 0.7050390839576721,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 1.3802339963906969,
|
|
"grad_norm": 0.7458826303482056,
|
|
"learning_rate": 1.2149463622968782e-05,
|
|
"loss": 0.7116800546646118,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 1.3815317233408357,
|
|
"grad_norm": 0.7212430834770203,
|
|
"learning_rate": 1.2135496202078487e-05,
|
|
"loss": 0.658031165599823,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 1.3828294502909748,
|
|
"grad_norm": 0.7072278261184692,
|
|
"learning_rate": 1.2121524414638958e-05,
|
|
"loss": 0.7117524147033691,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 1.3841271772411137,
|
|
"grad_norm": 0.7267945408821106,
|
|
"learning_rate": 1.2107548289218968e-05,
|
|
"loss": 0.690047025680542,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 1.3854249041912525,
|
|
"grad_norm": 0.7326766848564148,
|
|
"learning_rate": 1.2093567854396158e-05,
|
|
"loss": 0.7240371704101562,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 1.3867226311413914,
|
|
"grad_norm": 0.6955649256706238,
|
|
"learning_rate": 1.2079583138756976e-05,
|
|
"loss": 0.7229723334312439,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 1.3880203580915302,
|
|
"grad_norm": 0.6991240978240967,
|
|
"learning_rate": 1.206559417089663e-05,
|
|
"loss": 0.7131638526916504,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 1.389318085041669,
|
|
"grad_norm": 0.7009238600730896,
|
|
"learning_rate": 1.205160097941901e-05,
|
|
"loss": 0.7577610611915588,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 1.3906158119918082,
|
|
"grad_norm": 0.7368999719619751,
|
|
"learning_rate": 1.2037603592936656e-05,
|
|
"loss": 0.7876178026199341,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 1.391913538941947,
|
|
"grad_norm": 0.7627021670341492,
|
|
"learning_rate": 1.2023602040070679e-05,
|
|
"loss": 0.8456990718841553,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 1.3932112658920859,
|
|
"grad_norm": 0.7341564893722534,
|
|
"learning_rate": 1.2009596349450717e-05,
|
|
"loss": 0.7692890167236328,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 1.3945089928422247,
|
|
"grad_norm": 0.706305205821991,
|
|
"learning_rate": 1.1995586549714855e-05,
|
|
"loss": 0.7290987372398376,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 1.3958067197923638,
|
|
"grad_norm": 0.7150030136108398,
|
|
"learning_rate": 1.198157266950959e-05,
|
|
"loss": 0.7904977202415466,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 1.3971044467425027,
|
|
"grad_norm": 0.6936087608337402,
|
|
"learning_rate": 1.1967554737489762e-05,
|
|
"loss": 0.7233096361160278,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 1.3984021736926415,
|
|
"grad_norm": 0.705502450466156,
|
|
"learning_rate": 1.1953532782318491e-05,
|
|
"loss": 0.6974169015884399,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 1.3996999006427804,
|
|
"grad_norm": 0.7046432495117188,
|
|
"learning_rate": 1.1939506832667129e-05,
|
|
"loss": 0.7049128413200378,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 1.4009976275929192,
|
|
"grad_norm": 0.7448377013206482,
|
|
"learning_rate": 1.1925476917215191e-05,
|
|
"loss": 0.7288391590118408,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 1.402295354543058,
|
|
"grad_norm": 0.7215666174888611,
|
|
"learning_rate": 1.1911443064650301e-05,
|
|
"loss": 0.7517431974411011,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 1.403593081493197,
|
|
"grad_norm": 0.7152860164642334,
|
|
"learning_rate": 1.189740530366814e-05,
|
|
"loss": 0.7353943586349487,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 1.404890808443336,
|
|
"grad_norm": 0.7322341203689575,
|
|
"learning_rate": 1.1883363662972375e-05,
|
|
"loss": 0.7282765507698059,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 1.4061885353934749,
|
|
"grad_norm": 0.7007766962051392,
|
|
"learning_rate": 1.1869318171274606e-05,
|
|
"loss": 0.6773781776428223,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 1.4074862623436137,
|
|
"grad_norm": 0.6969038248062134,
|
|
"learning_rate": 1.1855268857294308e-05,
|
|
"loss": 0.7106554508209229,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 1.4087839892937526,
|
|
"grad_norm": 0.7315483093261719,
|
|
"learning_rate": 1.1841215749758774e-05,
|
|
"loss": 0.7127244472503662,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 1.4100817162438917,
|
|
"grad_norm": 0.7427330613136292,
|
|
"learning_rate": 1.182715887740305e-05,
|
|
"loss": 0.7914733290672302,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 1.4113794431940305,
|
|
"grad_norm": 0.7135612964630127,
|
|
"learning_rate": 1.1813098268969886e-05,
|
|
"loss": 0.7351382374763489,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 1.4126771701441694,
|
|
"grad_norm": 0.6763968467712402,
|
|
"learning_rate": 1.1799033953209664e-05,
|
|
"loss": 0.7243238687515259,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 1.4139748970943082,
|
|
"grad_norm": 0.6963580250740051,
|
|
"learning_rate": 1.178496595888035e-05,
|
|
"loss": 0.718358039855957,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 1.415272624044447,
|
|
"grad_norm": 0.7186612486839294,
|
|
"learning_rate": 1.1770894314747433e-05,
|
|
"loss": 0.7567769885063171,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 1.416570350994586,
|
|
"grad_norm": 0.7769639492034912,
|
|
"learning_rate": 1.1756819049583861e-05,
|
|
"loss": 0.6931068301200867,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 1.417868077944725,
|
|
"grad_norm": 0.6902489066123962,
|
|
"learning_rate": 1.1742740192169995e-05,
|
|
"loss": 0.7427462339401245,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 1.4191658048948639,
|
|
"grad_norm": 0.7374582886695862,
|
|
"learning_rate": 1.1728657771293529e-05,
|
|
"loss": 0.7023187279701233,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 1.4204635318450027,
|
|
"grad_norm": 0.7119615077972412,
|
|
"learning_rate": 1.171457181574945e-05,
|
|
"loss": 0.7274259328842163,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 1.4217612587951416,
|
|
"grad_norm": 0.7346155047416687,
|
|
"learning_rate": 1.1700482354339972e-05,
|
|
"loss": 0.7683991193771362,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 1.4230589857452807,
|
|
"grad_norm": 0.7501071095466614,
|
|
"learning_rate": 1.168638941587448e-05,
|
|
"loss": 0.7191241979598999,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 1.4243567126954195,
|
|
"grad_norm": 0.7470526695251465,
|
|
"learning_rate": 1.1672293029169466e-05,
|
|
"loss": 0.6885469555854797,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 1.4256544396455584,
|
|
"grad_norm": 0.7323938608169556,
|
|
"learning_rate": 1.165819322304847e-05,
|
|
"loss": 0.7280178666114807,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 1.4269521665956972,
|
|
"grad_norm": 0.735260546207428,
|
|
"learning_rate": 1.164409002634203e-05,
|
|
"loss": 0.7417027354240417,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 1.428249893545836,
|
|
"grad_norm": 0.6863338351249695,
|
|
"learning_rate": 1.162998346788761e-05,
|
|
"loss": 0.7153418660163879,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 1.429547620495975,
|
|
"grad_norm": 0.6918323636054993,
|
|
"learning_rate": 1.1615873576529556e-05,
|
|
"loss": 0.7203163504600525,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 1.4308453474461138,
|
|
"grad_norm": 0.6796247363090515,
|
|
"learning_rate": 1.1601760381119022e-05,
|
|
"loss": 0.6820694208145142,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 1.4321430743962529,
|
|
"grad_norm": 0.7495130896568298,
|
|
"learning_rate": 1.158764391051392e-05,
|
|
"loss": 0.8182595372200012,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 1.4334408013463917,
|
|
"grad_norm": 0.702680766582489,
|
|
"learning_rate": 1.1573524193578863e-05,
|
|
"loss": 0.6952674984931946,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 1.4347385282965306,
|
|
"grad_norm": 0.7394551634788513,
|
|
"learning_rate": 1.1559401259185095e-05,
|
|
"loss": 0.7986393570899963,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 1.4360362552466694,
|
|
"grad_norm": 0.7024036049842834,
|
|
"learning_rate": 1.1545275136210441e-05,
|
|
"loss": 0.7037473917007446,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 1.4373339821968085,
|
|
"grad_norm": 0.7654225826263428,
|
|
"learning_rate": 1.153114585353925e-05,
|
|
"loss": 0.788162350654602,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 1.4386317091469474,
|
|
"grad_norm": 0.7220718264579773,
|
|
"learning_rate": 1.1517013440062326e-05,
|
|
"loss": 0.677041232585907,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 1.4399294360970862,
|
|
"grad_norm": 0.636647641658783,
|
|
"learning_rate": 1.1502877924676881e-05,
|
|
"loss": 0.6478151679039001,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 1.441227163047225,
|
|
"grad_norm": 0.7449962496757507,
|
|
"learning_rate": 1.1488739336286467e-05,
|
|
"loss": 0.7527351975440979,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 1.442524889997364,
|
|
"grad_norm": 0.6970670819282532,
|
|
"learning_rate": 1.1474597703800915e-05,
|
|
"loss": 0.7169626951217651,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 1.4438226169475028,
|
|
"grad_norm": 0.7441650032997131,
|
|
"learning_rate": 1.1460453056136285e-05,
|
|
"loss": 0.750106930732727,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 1.4451203438976417,
|
|
"grad_norm": 0.7144120335578918,
|
|
"learning_rate": 1.14463054222148e-05,
|
|
"loss": 0.7835033535957336,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 1.4464180708477807,
|
|
"grad_norm": 0.7178052663803101,
|
|
"learning_rate": 1.1432154830964796e-05,
|
|
"loss": 0.755246639251709,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 1.4477157977979196,
|
|
"grad_norm": 0.7312644720077515,
|
|
"learning_rate": 1.1418001311320649e-05,
|
|
"loss": 0.7156558632850647,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 1.4490135247480584,
|
|
"grad_norm": 0.6545835137367249,
|
|
"learning_rate": 1.1403844892222717e-05,
|
|
"loss": 0.6448360085487366,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 1.4503112516981973,
|
|
"grad_norm": 0.7543350458145142,
|
|
"learning_rate": 1.1389685602617302e-05,
|
|
"loss": 0.7119275331497192,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 1.4516089786483364,
|
|
"grad_norm": 0.6919403672218323,
|
|
"learning_rate": 1.1375523471456564e-05,
|
|
"loss": 0.6998506188392639,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 1.4529067055984752,
|
|
"grad_norm": 0.7320676445960999,
|
|
"learning_rate": 1.1361358527698481e-05,
|
|
"loss": 0.7184922099113464,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 1.454204432548614,
|
|
"grad_norm": 0.672732949256897,
|
|
"learning_rate": 1.134719080030677e-05,
|
|
"loss": 0.6867491006851196,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 1.455502159498753,
|
|
"grad_norm": 0.6875948309898376,
|
|
"learning_rate": 1.1333020318250854e-05,
|
|
"loss": 0.7337048053741455,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 1.4567998864488918,
|
|
"grad_norm": 0.6922927498817444,
|
|
"learning_rate": 1.131884711050578e-05,
|
|
"loss": 0.6915356516838074,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 1.4580976133990307,
|
|
"grad_norm": 0.6755322217941284,
|
|
"learning_rate": 1.1304671206052168e-05,
|
|
"loss": 0.6491101980209351,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 1.4593953403491697,
|
|
"grad_norm": 0.698635995388031,
|
|
"learning_rate": 1.1290492633876164e-05,
|
|
"loss": 0.7431061267852783,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 1.4606930672993086,
|
|
"grad_norm": 0.6657348871231079,
|
|
"learning_rate": 1.1276311422969349e-05,
|
|
"loss": 0.7039294838905334,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 1.4619907942494474,
|
|
"grad_norm": 0.7172051072120667,
|
|
"learning_rate": 1.1262127602328712e-05,
|
|
"loss": 0.7308294773101807,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 1.4632885211995863,
|
|
"grad_norm": 0.6960781812667847,
|
|
"learning_rate": 1.124794120095658e-05,
|
|
"loss": 0.693443238735199,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 1.4645862481497254,
|
|
"grad_norm": 0.759774386882782,
|
|
"learning_rate": 1.1233752247860549e-05,
|
|
"loss": 0.7438464760780334,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 1.4658839750998642,
|
|
"grad_norm": 0.7278202772140503,
|
|
"learning_rate": 1.1219560772053442e-05,
|
|
"loss": 0.7231059074401855,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 1.467181702050003,
|
|
"grad_norm": 0.7277034521102905,
|
|
"learning_rate": 1.1205366802553231e-05,
|
|
"loss": 0.6796480417251587,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 1.468479429000142,
|
|
"grad_norm": 0.7773372530937195,
|
|
"learning_rate": 1.1191170368382992e-05,
|
|
"loss": 0.7957556247711182,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 1.4697771559502808,
|
|
"grad_norm": 0.7063891887664795,
|
|
"learning_rate": 1.117697149857084e-05,
|
|
"loss": 0.7295725345611572,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 1.4710748829004197,
|
|
"grad_norm": 0.7076992988586426,
|
|
"learning_rate": 1.1162770222149873e-05,
|
|
"loss": 0.7353643178939819,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 1.4723726098505585,
|
|
"grad_norm": 0.7097960710525513,
|
|
"learning_rate": 1.1148566568158099e-05,
|
|
"loss": 0.6855234503746033,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 1.4736703368006976,
|
|
"grad_norm": 0.7133991122245789,
|
|
"learning_rate": 1.1134360565638402e-05,
|
|
"loss": 0.7381144762039185,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 1.4749680637508364,
|
|
"grad_norm": 0.6666829586029053,
|
|
"learning_rate": 1.1120152243638457e-05,
|
|
"loss": 0.7571398019790649,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 1.4762657907009753,
|
|
"grad_norm": 0.7138345837593079,
|
|
"learning_rate": 1.1105941631210694e-05,
|
|
"loss": 0.7363887429237366,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 1.4775635176511142,
|
|
"grad_norm": 0.6881229877471924,
|
|
"learning_rate": 1.1091728757412212e-05,
|
|
"loss": 0.6838353276252747,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 1.4788612446012532,
|
|
"grad_norm": 0.6954206824302673,
|
|
"learning_rate": 1.107751365130474e-05,
|
|
"loss": 0.6892279386520386,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 1.480158971551392,
|
|
"grad_norm": 0.7325204014778137,
|
|
"learning_rate": 1.1063296341954577e-05,
|
|
"loss": 0.7068898677825928,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 1.481456698501531,
|
|
"grad_norm": 0.7389767169952393,
|
|
"learning_rate": 1.1049076858432517e-05,
|
|
"loss": 0.7737511396408081,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 1.4827544254516698,
|
|
"grad_norm": 0.7286487817764282,
|
|
"learning_rate": 1.1034855229813812e-05,
|
|
"loss": 0.7521780729293823,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 1.4840521524018087,
|
|
"grad_norm": 0.7211914658546448,
|
|
"learning_rate": 1.1020631485178084e-05,
|
|
"loss": 0.7648857831954956,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 1.4853498793519475,
|
|
"grad_norm": 0.6989269852638245,
|
|
"learning_rate": 1.1006405653609295e-05,
|
|
"loss": 0.7818325161933899,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 1.4866476063020864,
|
|
"grad_norm": 0.7269567251205444,
|
|
"learning_rate": 1.0992177764195671e-05,
|
|
"loss": 0.7369544506072998,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 1.4879453332522254,
|
|
"grad_norm": 0.7193188071250916,
|
|
"learning_rate": 1.0977947846029642e-05,
|
|
"loss": 0.7326228022575378,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 1.4892430602023643,
|
|
"grad_norm": 0.6688587665557861,
|
|
"learning_rate": 1.0963715928207795e-05,
|
|
"loss": 0.6900015473365784,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 1.4905407871525032,
|
|
"grad_norm": 0.7130873203277588,
|
|
"learning_rate": 1.094948203983079e-05,
|
|
"loss": 0.7647519707679749,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 1.491838514102642,
|
|
"grad_norm": 0.7038359642028809,
|
|
"learning_rate": 1.0935246210003334e-05,
|
|
"loss": 0.7078969478607178,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 1.491838514102642,
|
|
"eval_loss": 0.7540779113769531,
|
|
"eval_runtime": 144.3473,
|
|
"eval_samples_per_second": 35.969,
|
|
"eval_steps_per_second": 8.992,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 1.493136241052781,
|
|
"grad_norm": 0.7373347878456116,
|
|
"learning_rate": 1.0921008467834094e-05,
|
|
"loss": 0.7495899200439453,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 1.49443396800292,
|
|
"grad_norm": 0.7285864949226379,
|
|
"learning_rate": 1.0906768842435647e-05,
|
|
"loss": 0.7451608777046204,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 1.4957316949530588,
|
|
"grad_norm": 0.7112108469009399,
|
|
"learning_rate": 1.0892527362924426e-05,
|
|
"loss": 0.6732929944992065,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 1.4970294219031977,
|
|
"grad_norm": 0.7155210971832275,
|
|
"learning_rate": 1.0878284058420647e-05,
|
|
"loss": 0.7473354339599609,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 1.4983271488533365,
|
|
"grad_norm": 0.7318425178527832,
|
|
"learning_rate": 1.0864038958048267e-05,
|
|
"loss": 0.6648399829864502,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 1.4996248758034754,
|
|
"grad_norm": 0.6885069012641907,
|
|
"learning_rate": 1.084979209093491e-05,
|
|
"loss": 0.7034338712692261,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 1.5009226027536142,
|
|
"grad_norm": 0.7019109129905701,
|
|
"learning_rate": 1.0835543486211815e-05,
|
|
"loss": 0.7674492001533508,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 1.5022203297037533,
|
|
"grad_norm": 0.7262328267097473,
|
|
"learning_rate": 1.0821293173013769e-05,
|
|
"loss": 0.7348574995994568,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 1.5035180566538922,
|
|
"grad_norm": 0.6678932905197144,
|
|
"learning_rate": 1.0807041180479054e-05,
|
|
"loss": 0.6102491617202759,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 1.504815783604031,
|
|
"grad_norm": 0.7315651178359985,
|
|
"learning_rate": 1.0792787537749392e-05,
|
|
"loss": 0.7893344163894653,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 1.50611351055417,
|
|
"grad_norm": 0.7274885773658752,
|
|
"learning_rate": 1.0778532273969877e-05,
|
|
"loss": 0.6995629072189331,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 1.507411237504309,
|
|
"grad_norm": 0.6988937258720398,
|
|
"learning_rate": 1.0764275418288908e-05,
|
|
"loss": 0.753483772277832,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 1.5087089644544478,
|
|
"grad_norm": 0.714952290058136,
|
|
"learning_rate": 1.0750016999858151e-05,
|
|
"loss": 0.7254124283790588,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 1.5100066914045867,
|
|
"grad_norm": 0.7081964015960693,
|
|
"learning_rate": 1.0735757047832461e-05,
|
|
"loss": 0.7344964146614075,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 1.5113044183547255,
|
|
"grad_norm": 0.6843774914741516,
|
|
"learning_rate": 1.0721495591369832e-05,
|
|
"loss": 0.6407060623168945,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 1.5126021453048644,
|
|
"grad_norm": 0.7179701924324036,
|
|
"learning_rate": 1.0707232659631333e-05,
|
|
"loss": 0.7781057357788086,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 1.5138998722550032,
|
|
"grad_norm": 0.7363991141319275,
|
|
"learning_rate": 1.0692968281781046e-05,
|
|
"loss": 0.6866899132728577,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 1.5151975992051423,
|
|
"grad_norm": 0.6679601669311523,
|
|
"learning_rate": 1.0678702486986016e-05,
|
|
"loss": 0.6717002391815186,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 1.5164953261552812,
|
|
"grad_norm": 0.6931522488594055,
|
|
"learning_rate": 1.0664435304416185e-05,
|
|
"loss": 0.6953310966491699,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 1.51779305310542,
|
|
"grad_norm": 0.738691508769989,
|
|
"learning_rate": 1.065016676324433e-05,
|
|
"loss": 0.7797529101371765,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 1.519090780055559,
|
|
"grad_norm": 0.6795670390129089,
|
|
"learning_rate": 1.0635896892645998e-05,
|
|
"loss": 0.652160108089447,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 1.520388507005698,
|
|
"grad_norm": 0.7034809589385986,
|
|
"learning_rate": 1.0621625721799473e-05,
|
|
"loss": 0.7155415415763855,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 1.5216862339558368,
|
|
"grad_norm": 0.7075764536857605,
|
|
"learning_rate": 1.0607353279885682e-05,
|
|
"loss": 0.6893566846847534,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 1.5229839609059757,
|
|
"grad_norm": 0.696140468120575,
|
|
"learning_rate": 1.0593079596088155e-05,
|
|
"loss": 0.6836467981338501,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 1.5242816878561145,
|
|
"grad_norm": 0.7141397595405579,
|
|
"learning_rate": 1.0578804699592968e-05,
|
|
"loss": 0.7246308326721191,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 1.5255794148062534,
|
|
"grad_norm": 0.6880807280540466,
|
|
"learning_rate": 1.0564528619588668e-05,
|
|
"loss": 0.6564866304397583,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 1.5268771417563922,
|
|
"grad_norm": 0.6661361455917358,
|
|
"learning_rate": 1.0550251385266223e-05,
|
|
"loss": 0.6993754506111145,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 1.528174868706531,
|
|
"grad_norm": 0.7442536950111389,
|
|
"learning_rate": 1.0535973025818969e-05,
|
|
"loss": 0.7055092453956604,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 1.5294725956566702,
|
|
"grad_norm": 0.7330362200737,
|
|
"learning_rate": 1.0521693570442533e-05,
|
|
"loss": 0.7582162022590637,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 1.530770322606809,
|
|
"grad_norm": 0.722161591053009,
|
|
"learning_rate": 1.050741304833479e-05,
|
|
"loss": 0.7415435314178467,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 1.5320680495569479,
|
|
"grad_norm": 0.69851154088974,
|
|
"learning_rate": 1.0493131488695789e-05,
|
|
"loss": 0.6807332038879395,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 1.533365776507087,
|
|
"grad_norm": 0.7059313654899597,
|
|
"learning_rate": 1.0478848920727707e-05,
|
|
"loss": 0.7028640508651733,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 1.5346635034572258,
|
|
"grad_norm": 0.6546805500984192,
|
|
"learning_rate": 1.0464565373634784e-05,
|
|
"loss": 0.6459164619445801,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 1.5359612304073647,
|
|
"grad_norm": 0.6890950202941895,
|
|
"learning_rate": 1.0450280876623253e-05,
|
|
"loss": 0.7195508480072021,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 1.5372589573575035,
|
|
"grad_norm": 0.6886339783668518,
|
|
"learning_rate": 1.0435995458901298e-05,
|
|
"loss": 0.7041788697242737,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 1.5385566843076424,
|
|
"grad_norm": 0.7007988095283508,
|
|
"learning_rate": 1.042170914967898e-05,
|
|
"loss": 0.6726493835449219,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 1.5398544112577812,
|
|
"grad_norm": 0.7152829766273499,
|
|
"learning_rate": 1.0407421978168186e-05,
|
|
"loss": 0.7684251666069031,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 1.54115213820792,
|
|
"grad_norm": 0.7171955108642578,
|
|
"learning_rate": 1.0393133973582572e-05,
|
|
"loss": 0.7586410045623779,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 1.542449865158059,
|
|
"grad_norm": 0.7011827230453491,
|
|
"learning_rate": 1.0378845165137483e-05,
|
|
"loss": 0.6837091445922852,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 1.543747592108198,
|
|
"grad_norm": 0.7295593619346619,
|
|
"learning_rate": 1.0364555582049917e-05,
|
|
"loss": 0.7210373282432556,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 1.5450453190583369,
|
|
"grad_norm": 0.7250920534133911,
|
|
"learning_rate": 1.0350265253538458e-05,
|
|
"loss": 0.7209242582321167,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 1.546343046008476,
|
|
"grad_norm": 0.7172147631645203,
|
|
"learning_rate": 1.033597420882321e-05,
|
|
"loss": 0.771355390548706,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 1.5476407729586148,
|
|
"grad_norm": 0.7234722375869751,
|
|
"learning_rate": 1.0321682477125743e-05,
|
|
"loss": 0.7173848152160645,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 1.5489384999087537,
|
|
"grad_norm": 0.7182676792144775,
|
|
"learning_rate": 1.0307390087669026e-05,
|
|
"loss": 0.6971171498298645,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 1.5502362268588925,
|
|
"grad_norm": 0.711088240146637,
|
|
"learning_rate": 1.0293097069677382e-05,
|
|
"loss": 0.7250340580940247,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 1.5515339538090314,
|
|
"grad_norm": 0.7057585120201111,
|
|
"learning_rate": 1.0278803452376416e-05,
|
|
"loss": 0.6538138389587402,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 1.5528316807591702,
|
|
"grad_norm": 0.7198209166526794,
|
|
"learning_rate": 1.0264509264992954e-05,
|
|
"loss": 0.7397878170013428,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 1.554129407709309,
|
|
"grad_norm": 0.7141586542129517,
|
|
"learning_rate": 1.0250214536754996e-05,
|
|
"loss": 0.7416911125183105,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 1.555427134659448,
|
|
"grad_norm": 0.6700720191001892,
|
|
"learning_rate": 1.0235919296891641e-05,
|
|
"loss": 0.6646735072135925,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 1.556724861609587,
|
|
"grad_norm": 0.7620872855186462,
|
|
"learning_rate": 1.0221623574633035e-05,
|
|
"loss": 0.7746062874794006,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 1.5580225885597259,
|
|
"grad_norm": 0.7291470766067505,
|
|
"learning_rate": 1.0207327399210311e-05,
|
|
"loss": 0.7022420167922974,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 1.5593203155098647,
|
|
"grad_norm": 0.7325419783592224,
|
|
"learning_rate": 1.0193030799855534e-05,
|
|
"loss": 0.6780503988265991,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 1.5606180424600038,
|
|
"grad_norm": 0.7144452929496765,
|
|
"learning_rate": 1.0178733805801626e-05,
|
|
"loss": 0.7393384575843811,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 1.5619157694101427,
|
|
"grad_norm": 0.7362129092216492,
|
|
"learning_rate": 1.0164436446282324e-05,
|
|
"loss": 0.7512763142585754,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 1.5632134963602815,
|
|
"grad_norm": 0.6746947765350342,
|
|
"learning_rate": 1.015013875053211e-05,
|
|
"loss": 0.6646847128868103,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 1.5645112233104204,
|
|
"grad_norm": 0.6834600567817688,
|
|
"learning_rate": 1.013584074778615e-05,
|
|
"loss": 0.6130549311637878,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 1.5658089502605592,
|
|
"grad_norm": 0.7684876918792725,
|
|
"learning_rate": 1.0121542467280245e-05,
|
|
"loss": 0.7241174578666687,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 1.567106677210698,
|
|
"grad_norm": 0.7327429056167603,
|
|
"learning_rate": 1.0107243938250755e-05,
|
|
"loss": 0.6390076875686646,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 1.568404404160837,
|
|
"grad_norm": 0.6959134340286255,
|
|
"learning_rate": 1.0092945189934558e-05,
|
|
"loss": 0.7467840909957886,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 1.5697021311109758,
|
|
"grad_norm": 0.7259625792503357,
|
|
"learning_rate": 1.007864625156897e-05,
|
|
"loss": 0.7787569165229797,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 1.5709998580611149,
|
|
"grad_norm": 0.7313428521156311,
|
|
"learning_rate": 1.0064347152391703e-05,
|
|
"loss": 0.7091028690338135,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 1.5722975850112537,
|
|
"grad_norm": 0.7232116460800171,
|
|
"learning_rate": 1.0050047921640797e-05,
|
|
"loss": 0.6815755367279053,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 1.5735953119613926,
|
|
"grad_norm": 0.7286602854728699,
|
|
"learning_rate": 1.003574858855456e-05,
|
|
"loss": 0.72878098487854,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 1.5748930389115317,
|
|
"grad_norm": 0.6926529407501221,
|
|
"learning_rate": 1.0021449182371504e-05,
|
|
"loss": 0.6855754852294922,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 1.5761907658616705,
|
|
"grad_norm": 0.7037234306335449,
|
|
"learning_rate": 1.0007149732330299e-05,
|
|
"loss": 0.6827071309089661,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 1.5774884928118094,
|
|
"grad_norm": 0.7107639908790588,
|
|
"learning_rate": 9.992850267669703e-06,
|
|
"loss": 0.7649542093276978,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 1.5787862197619482,
|
|
"grad_norm": 0.737821102142334,
|
|
"learning_rate": 9.978550817628501e-06,
|
|
"loss": 0.6636335849761963,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 1.580083946712087,
|
|
"grad_norm": 0.7441766858100891,
|
|
"learning_rate": 9.964251411445444e-06,
|
|
"loss": 0.7413192391395569,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 1.581381673662226,
|
|
"grad_norm": 0.750579833984375,
|
|
"learning_rate": 9.949952078359208e-06,
|
|
"loss": 0.7131896018981934,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 1.5826794006123648,
|
|
"grad_norm": 0.7051860690116882,
|
|
"learning_rate": 9.935652847608302e-06,
|
|
"loss": 0.7157960534095764,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 1.5839771275625036,
|
|
"grad_norm": 0.6900631785392761,
|
|
"learning_rate": 9.921353748431036e-06,
|
|
"loss": 0.6898972392082214,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 1.5852748545126427,
|
|
"grad_norm": 0.7243295907974243,
|
|
"learning_rate": 9.907054810065446e-06,
|
|
"loss": 0.6597715616226196,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 1.5865725814627816,
|
|
"grad_norm": 0.6974424123764038,
|
|
"learning_rate": 9.89275606174925e-06,
|
|
"loss": 0.6871618032455444,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 1.5878703084129207,
|
|
"grad_norm": 0.6947103142738342,
|
|
"learning_rate": 9.878457532719757e-06,
|
|
"loss": 0.680080235004425,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 1.5891680353630595,
|
|
"grad_norm": 0.7873682975769043,
|
|
"learning_rate": 9.864159252213852e-06,
|
|
"loss": 0.7676745057106018,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 1.5904657623131984,
|
|
"grad_norm": 0.7117084860801697,
|
|
"learning_rate": 9.849861249467893e-06,
|
|
"loss": 0.7582260370254517,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 1.5917634892633372,
|
|
"grad_norm": 0.7120140194892883,
|
|
"learning_rate": 9.83556355371768e-06,
|
|
"loss": 0.7325617074966431,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 1.593061216213476,
|
|
"grad_norm": 0.8112825155258179,
|
|
"learning_rate": 9.821266194198375e-06,
|
|
"loss": 0.704188883304596,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 1.594358943163615,
|
|
"grad_norm": 0.6812202334403992,
|
|
"learning_rate": 9.806969200144471e-06,
|
|
"loss": 0.6495468616485596,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 1.5956566701137538,
|
|
"grad_norm": 0.672173261642456,
|
|
"learning_rate": 9.79267260078969e-06,
|
|
"loss": 0.7104700207710266,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 1.5969543970638926,
|
|
"grad_norm": 0.7402030229568481,
|
|
"learning_rate": 9.778376425366967e-06,
|
|
"loss": 0.7161640524864197,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 1.5982521240140317,
|
|
"grad_norm": 0.7105618119239807,
|
|
"learning_rate": 9.764080703108362e-06,
|
|
"loss": 0.7429479956626892,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 1.5995498509641706,
|
|
"grad_norm": 0.7068690657615662,
|
|
"learning_rate": 9.749785463245006e-06,
|
|
"loss": 0.7453438639640808,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 1.6008475779143094,
|
|
"grad_norm": 0.7170218825340271,
|
|
"learning_rate": 9.735490735007047e-06,
|
|
"loss": 0.7229534387588501,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 1.6021453048644485,
|
|
"grad_norm": 0.6783753633499146,
|
|
"learning_rate": 9.721196547623585e-06,
|
|
"loss": 0.7175101041793823,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 1.6034430318145874,
|
|
"grad_norm": 0.7113945484161377,
|
|
"learning_rate": 9.706902930322621e-06,
|
|
"loss": 0.7054000496864319,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 1.6047407587647262,
|
|
"grad_norm": 0.7143622636795044,
|
|
"learning_rate": 9.692609912330975e-06,
|
|
"loss": 0.7337828278541565,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 1.606038485714865,
|
|
"grad_norm": 0.7191219329833984,
|
|
"learning_rate": 9.67831752287426e-06,
|
|
"loss": 0.7462741136550903,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 1.607336212665004,
|
|
"grad_norm": 0.6787925362586975,
|
|
"learning_rate": 9.66402579117679e-06,
|
|
"loss": 0.6983505487442017,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 1.6086339396151428,
|
|
"grad_norm": 0.7183864712715149,
|
|
"learning_rate": 9.649734746461544e-06,
|
|
"loss": 0.7454296350479126,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 1.6099316665652816,
|
|
"grad_norm": 0.7119743227958679,
|
|
"learning_rate": 9.635444417950083e-06,
|
|
"loss": 0.6732832193374634,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 1.6112293935154205,
|
|
"grad_norm": 0.7184067368507385,
|
|
"learning_rate": 9.62115483486252e-06,
|
|
"loss": 0.6472535729408264,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 1.6125271204655596,
|
|
"grad_norm": 0.693452000617981,
|
|
"learning_rate": 9.606866026417431e-06,
|
|
"loss": 0.7115393877029419,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 1.6138248474156984,
|
|
"grad_norm": 0.749234139919281,
|
|
"learning_rate": 9.592578021831817e-06,
|
|
"loss": 0.775533139705658,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 1.6151225743658375,
|
|
"grad_norm": 0.7310823798179626,
|
|
"learning_rate": 9.578290850321023e-06,
|
|
"loss": 0.7301318645477295,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 1.6164203013159764,
|
|
"grad_norm": 0.7240172028541565,
|
|
"learning_rate": 9.564004541098709e-06,
|
|
"loss": 0.6760499477386475,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 1.6177180282661152,
|
|
"grad_norm": 0.7192076444625854,
|
|
"learning_rate": 9.549719123376749e-06,
|
|
"loss": 0.8106221556663513,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 1.619015755216254,
|
|
"grad_norm": 0.763373851776123,
|
|
"learning_rate": 9.535434626365221e-06,
|
|
"loss": 0.7758911848068237,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 1.620313482166393,
|
|
"grad_norm": 0.766298234462738,
|
|
"learning_rate": 9.521151079272295e-06,
|
|
"loss": 0.8113157749176025,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 1.6216112091165318,
|
|
"grad_norm": 0.7206328511238098,
|
|
"learning_rate": 9.506868511304216e-06,
|
|
"loss": 0.7105867266654968,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 1.6229089360666706,
|
|
"grad_norm": 0.7417821288108826,
|
|
"learning_rate": 9.492586951665214e-06,
|
|
"loss": 0.7875233888626099,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 1.6242066630168095,
|
|
"grad_norm": 0.6913713812828064,
|
|
"learning_rate": 9.47830642955747e-06,
|
|
"loss": 0.6810760498046875,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 1.6255043899669486,
|
|
"grad_norm": 0.7151052355766296,
|
|
"learning_rate": 9.464026974181035e-06,
|
|
"loss": 0.7549710869789124,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 1.6268021169170874,
|
|
"grad_norm": 0.6772926449775696,
|
|
"learning_rate": 9.44974861473378e-06,
|
|
"loss": 0.6992902159690857,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 1.6280998438672263,
|
|
"grad_norm": 0.734398365020752,
|
|
"learning_rate": 9.435471380411335e-06,
|
|
"loss": 0.7508738040924072,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 1.6293975708173654,
|
|
"grad_norm": 0.6922202706336975,
|
|
"learning_rate": 9.421195300407035e-06,
|
|
"loss": 0.6657233834266663,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 1.6306952977675042,
|
|
"grad_norm": 0.6931065917015076,
|
|
"learning_rate": 9.406920403911848e-06,
|
|
"loss": 0.7156346440315247,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 1.631993024717643,
|
|
"grad_norm": 0.6905820369720459,
|
|
"learning_rate": 9.392646720114325e-06,
|
|
"loss": 0.7550724744796753,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 1.633290751667782,
|
|
"grad_norm": 0.6891010403633118,
|
|
"learning_rate": 9.37837427820053e-06,
|
|
"loss": 0.7689525485038757,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 1.6345884786179208,
|
|
"grad_norm": 0.6997367739677429,
|
|
"learning_rate": 9.364103107354002e-06,
|
|
"loss": 0.6940702795982361,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 1.6358862055680596,
|
|
"grad_norm": 0.7232581973075867,
|
|
"learning_rate": 9.349833236755675e-06,
|
|
"loss": 0.708733856678009,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 1.6371839325181985,
|
|
"grad_norm": 0.7156563997268677,
|
|
"learning_rate": 9.335564695583816e-06,
|
|
"loss": 0.7080838680267334,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 1.6384816594683373,
|
|
"grad_norm": 0.7129452228546143,
|
|
"learning_rate": 9.321297513013987e-06,
|
|
"loss": 0.7160661816596985,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 1.6397793864184764,
|
|
"grad_norm": 0.7260149717330933,
|
|
"learning_rate": 9.307031718218956e-06,
|
|
"loss": 0.7261675000190735,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 1.6410771133686153,
|
|
"grad_norm": 0.7252016067504883,
|
|
"learning_rate": 9.292767340368672e-06,
|
|
"loss": 0.7626814842224121,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 1.6423748403187541,
|
|
"grad_norm": 0.7192304134368896,
|
|
"learning_rate": 9.278504408630171e-06,
|
|
"loss": 0.7479438781738281,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 1.6436725672688932,
|
|
"grad_norm": 0.7067307829856873,
|
|
"learning_rate": 9.264242952167544e-06,
|
|
"loss": 0.7229454517364502,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 1.644970294219032,
|
|
"grad_norm": 0.69132000207901,
|
|
"learning_rate": 9.24998300014185e-06,
|
|
"loss": 0.7404082417488098,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 1.646268021169171,
|
|
"grad_norm": 0.7199667096138,
|
|
"learning_rate": 9.235724581711096e-06,
|
|
"loss": 0.6846930384635925,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 1.6475657481193098,
|
|
"grad_norm": 2.456246852874756,
|
|
"learning_rate": 9.221467726030126e-06,
|
|
"loss": 0.7993893623352051,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 1.6488634750694486,
|
|
"grad_norm": 0.6726557016372681,
|
|
"learning_rate": 9.207212462250611e-06,
|
|
"loss": 0.6635693311691284,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 1.6501612020195875,
|
|
"grad_norm": 0.6767668128013611,
|
|
"learning_rate": 9.192958819520948e-06,
|
|
"loss": 0.6265630722045898,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 1.6514589289697263,
|
|
"grad_norm": 0.660176157951355,
|
|
"learning_rate": 9.178706826986236e-06,
|
|
"loss": 0.7039428353309631,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 1.6527566559198652,
|
|
"grad_norm": 0.710209846496582,
|
|
"learning_rate": 9.164456513788186e-06,
|
|
"loss": 0.712166965007782,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 1.6540543828700043,
|
|
"grad_norm": 0.7239776849746704,
|
|
"learning_rate": 9.150207909065093e-06,
|
|
"loss": 0.7487761378288269,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 1.6553521098201431,
|
|
"grad_norm": 0.6918028593063354,
|
|
"learning_rate": 9.135961041951735e-06,
|
|
"loss": 0.6682979464530945,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 1.6566498367702822,
|
|
"grad_norm": 0.7262064218521118,
|
|
"learning_rate": 9.121715941579358e-06,
|
|
"loss": 0.6650745868682861,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 1.657947563720421,
|
|
"grad_norm": 0.6805858612060547,
|
|
"learning_rate": 9.107472637075578e-06,
|
|
"loss": 0.7332329750061035,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 1.65924529067056,
|
|
"grad_norm": 0.7414560914039612,
|
|
"learning_rate": 9.093231157564357e-06,
|
|
"loss": 0.7112785577774048,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 1.6605430176206988,
|
|
"grad_norm": 0.6898860335350037,
|
|
"learning_rate": 9.078991532165911e-06,
|
|
"loss": 0.6940746307373047,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 1.6618407445708376,
|
|
"grad_norm": 0.734137773513794,
|
|
"learning_rate": 9.06475378999667e-06,
|
|
"loss": 0.7100757956504822,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 1.6631384715209765,
|
|
"grad_norm": 0.7733497023582458,
|
|
"learning_rate": 9.050517960169211e-06,
|
|
"loss": 0.7518686056137085,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 1.6644361984711153,
|
|
"grad_norm": 0.709705650806427,
|
|
"learning_rate": 9.036284071792212e-06,
|
|
"loss": 0.7964266538619995,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 1.6657339254212542,
|
|
"grad_norm": 0.711685836315155,
|
|
"learning_rate": 9.022052153970361e-06,
|
|
"loss": 0.7170289158821106,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 1.6670316523713933,
|
|
"grad_norm": 0.7108113169670105,
|
|
"learning_rate": 9.007822235804334e-06,
|
|
"loss": 0.7257951498031616,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 1.6683293793215321,
|
|
"grad_norm": 0.727200984954834,
|
|
"learning_rate": 8.993594346390709e-06,
|
|
"loss": 0.7011697888374329,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 1.669627106271671,
|
|
"grad_norm": 0.682969868183136,
|
|
"learning_rate": 8.979368514821917e-06,
|
|
"loss": 0.6846626996994019,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 1.67092483322181,
|
|
"grad_norm": 0.7197726964950562,
|
|
"learning_rate": 8.965144770186192e-06,
|
|
"loss": 0.7460110783576965,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 1.672222560171949,
|
|
"grad_norm": 0.7024762630462646,
|
|
"learning_rate": 8.950923141567482e-06,
|
|
"loss": 0.6903531551361084,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 1.6735202871220878,
|
|
"grad_norm": 0.7416940927505493,
|
|
"learning_rate": 8.936703658045426e-06,
|
|
"loss": 0.8462705612182617,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 1.6748180140722266,
|
|
"grad_norm": 0.749668538570404,
|
|
"learning_rate": 8.92248634869526e-06,
|
|
"loss": 0.7686569690704346,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 1.6761157410223655,
|
|
"grad_norm": 0.6500091552734375,
|
|
"learning_rate": 8.90827124258779e-06,
|
|
"loss": 0.7148120403289795,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 1.6774134679725043,
|
|
"grad_norm": 0.6878598928451538,
|
|
"learning_rate": 8.894058368789308e-06,
|
|
"loss": 0.5954074263572693,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 1.6787111949226432,
|
|
"grad_norm": 0.687202513217926,
|
|
"learning_rate": 8.879847756361544e-06,
|
|
"loss": 0.6912335753440857,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 1.680008921872782,
|
|
"grad_norm": 0.7027560472488403,
|
|
"learning_rate": 8.8656394343616e-06,
|
|
"loss": 0.6989542245864868,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 1.6813066488229211,
|
|
"grad_norm": 0.6999865770339966,
|
|
"learning_rate": 8.851433431841904e-06,
|
|
"loss": 0.7319304347038269,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 1.68260437577306,
|
|
"grad_norm": 0.7822436690330505,
|
|
"learning_rate": 8.837229777850129e-06,
|
|
"loss": 0.7571746110916138,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 1.6839021027231988,
|
|
"grad_norm": 0.6928126215934753,
|
|
"learning_rate": 8.823028501429161e-06,
|
|
"loss": 0.7471798062324524,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 1.685199829673338,
|
|
"grad_norm": 0.6795255541801453,
|
|
"learning_rate": 8.808829631617009e-06,
|
|
"loss": 0.6901456117630005,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 1.6864975566234768,
|
|
"grad_norm": 0.7609167695045471,
|
|
"learning_rate": 8.79463319744677e-06,
|
|
"loss": 0.782101035118103,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 1.6877952835736156,
|
|
"grad_norm": 0.7111324667930603,
|
|
"learning_rate": 8.78043922794656e-06,
|
|
"loss": 0.7500295042991638,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 1.6890930105237545,
|
|
"grad_norm": 0.7332251667976379,
|
|
"learning_rate": 8.766247752139453e-06,
|
|
"loss": 0.7808182835578918,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 1.6903907374738933,
|
|
"grad_norm": 0.7156122922897339,
|
|
"learning_rate": 8.752058799043422e-06,
|
|
"loss": 0.748470664024353,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 1.6916884644240322,
|
|
"grad_norm": 0.7189647555351257,
|
|
"learning_rate": 8.737872397671293e-06,
|
|
"loss": 0.7072033882141113,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 1.692986191374171,
|
|
"grad_norm": 0.719592809677124,
|
|
"learning_rate": 8.723688577030655e-06,
|
|
"loss": 0.7256566286087036,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 1.69428391832431,
|
|
"grad_norm": 0.7151191234588623,
|
|
"learning_rate": 8.709507366123841e-06,
|
|
"loss": 0.7216327786445618,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 1.695581645274449,
|
|
"grad_norm": 0.6929178833961487,
|
|
"learning_rate": 8.695328793947833e-06,
|
|
"loss": 0.6505569815635681,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 1.6968793722245878,
|
|
"grad_norm": 0.7117684483528137,
|
|
"learning_rate": 8.681152889494227e-06,
|
|
"loss": 0.750861644744873,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 1.698177099174727,
|
|
"grad_norm": 0.8048399090766907,
|
|
"learning_rate": 8.66697968174915e-06,
|
|
"loss": 0.7125011682510376,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 1.6994748261248658,
|
|
"grad_norm": 0.7594026923179626,
|
|
"learning_rate": 8.652809199693236e-06,
|
|
"loss": 0.6821706295013428,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 1.7007725530750046,
|
|
"grad_norm": 0.696814775466919,
|
|
"learning_rate": 8.638641472301524e-06,
|
|
"loss": 0.7341318726539612,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 1.7020702800251435,
|
|
"grad_norm": 0.6953744292259216,
|
|
"learning_rate": 8.624476528543439e-06,
|
|
"loss": 0.7471984028816223,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 1.7033680069752823,
|
|
"grad_norm": 0.7624510526657104,
|
|
"learning_rate": 8.610314397382701e-06,
|
|
"loss": 0.7660402655601501,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 1.7046657339254212,
|
|
"grad_norm": 0.7193018198013306,
|
|
"learning_rate": 8.596155107777288e-06,
|
|
"loss": 0.7213659882545471,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 1.70596346087556,
|
|
"grad_norm": 0.703834593296051,
|
|
"learning_rate": 8.581998688679356e-06,
|
|
"loss": 0.7187014818191528,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 1.707261187825699,
|
|
"grad_norm": 0.7352998852729797,
|
|
"learning_rate": 8.567845169035205e-06,
|
|
"loss": 0.7381072044372559,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 1.708558914775838,
|
|
"grad_norm": 0.7008899450302124,
|
|
"learning_rate": 8.553694577785201e-06,
|
|
"loss": 0.6953420639038086,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 1.7098566417259768,
|
|
"grad_norm": 0.6997075080871582,
|
|
"learning_rate": 8.539546943863717e-06,
|
|
"loss": 0.721794605255127,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 1.7111543686761157,
|
|
"grad_norm": 0.7531685829162598,
|
|
"learning_rate": 8.525402296199089e-06,
|
|
"loss": 0.763767421245575,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 1.7124520956262548,
|
|
"grad_norm": 0.686306357383728,
|
|
"learning_rate": 8.511260663713537e-06,
|
|
"loss": 0.6505174040794373,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 1.7137498225763936,
|
|
"grad_norm": 0.6891371607780457,
|
|
"learning_rate": 8.497122075323122e-06,
|
|
"loss": 0.6535521745681763,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 1.7150475495265325,
|
|
"grad_norm": 0.6797356009483337,
|
|
"learning_rate": 8.482986559937676e-06,
|
|
"loss": 0.711966872215271,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 1.7163452764766713,
|
|
"grad_norm": 0.6834943890571594,
|
|
"learning_rate": 8.468854146460754e-06,
|
|
"loss": 0.6898146271705627,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 1.7176430034268102,
|
|
"grad_norm": 0.6787711381912231,
|
|
"learning_rate": 8.45472486378956e-06,
|
|
"loss": 0.7132437825202942,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 1.718940730376949,
|
|
"grad_norm": 0.731886088848114,
|
|
"learning_rate": 8.440598740814909e-06,
|
|
"loss": 0.767355740070343,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 1.720238457327088,
|
|
"grad_norm": 0.6801634430885315,
|
|
"learning_rate": 8.426475806421139e-06,
|
|
"loss": 0.728312611579895,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 1.7215361842772268,
|
|
"grad_norm": 0.6922846436500549,
|
|
"learning_rate": 8.412356089486082e-06,
|
|
"loss": 0.6810072064399719,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 1.7228339112273658,
|
|
"grad_norm": 0.7422820329666138,
|
|
"learning_rate": 8.39823961888098e-06,
|
|
"loss": 0.7293540835380554,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 1.7241316381775047,
|
|
"grad_norm": 0.9656670689582825,
|
|
"learning_rate": 8.384126423470447e-06,
|
|
"loss": 0.7158606648445129,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 1.7254293651276438,
|
|
"grad_norm": 0.704413652420044,
|
|
"learning_rate": 8.37001653211239e-06,
|
|
"loss": 0.6522120833396912,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 1.7267270920777826,
|
|
"grad_norm": 0.7198591828346252,
|
|
"learning_rate": 8.355909973657975e-06,
|
|
"loss": 0.7289344072341919,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 1.7280248190279215,
|
|
"grad_norm": 0.7069032192230225,
|
|
"learning_rate": 8.341806776951532e-06,
|
|
"loss": 0.7365983724594116,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 1.7293225459780603,
|
|
"grad_norm": 0.7014702558517456,
|
|
"learning_rate": 8.327706970830537e-06,
|
|
"loss": 0.7173565030097961,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 1.7306202729281992,
|
|
"grad_norm": 0.7151576280593872,
|
|
"learning_rate": 8.313610584125523e-06,
|
|
"loss": 0.7827293872833252,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 1.731917999878338,
|
|
"grad_norm": 0.7050095796585083,
|
|
"learning_rate": 8.299517645660033e-06,
|
|
"loss": 0.681469202041626,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 1.733215726828477,
|
|
"grad_norm": 0.6885892152786255,
|
|
"learning_rate": 8.285428184250554e-06,
|
|
"loss": 0.6469728946685791,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 1.7345134537786158,
|
|
"grad_norm": 0.7026622891426086,
|
|
"learning_rate": 8.271342228706478e-06,
|
|
"loss": 0.76534104347229,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 1.7358111807287548,
|
|
"grad_norm": 0.6556008458137512,
|
|
"learning_rate": 8.257259807830009e-06,
|
|
"loss": 0.6358019113540649,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 1.7371089076788937,
|
|
"grad_norm": 0.6949118971824646,
|
|
"learning_rate": 8.243180950416142e-06,
|
|
"loss": 0.7216454148292542,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 1.7384066346290326,
|
|
"grad_norm": 0.6842135190963745,
|
|
"learning_rate": 8.22910568525257e-06,
|
|
"loss": 0.7009142637252808,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 1.7397043615791716,
|
|
"grad_norm": 0.7473326921463013,
|
|
"learning_rate": 8.215034041119655e-06,
|
|
"loss": 0.7074841856956482,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 1.7410020885293105,
|
|
"grad_norm": 0.6532716751098633,
|
|
"learning_rate": 8.200966046790339e-06,
|
|
"loss": 0.7174238562583923,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 1.7422998154794493,
|
|
"grad_norm": 0.672916829586029,
|
|
"learning_rate": 8.186901731030117e-06,
|
|
"loss": 0.71747887134552,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 1.7435975424295882,
|
|
"grad_norm": 0.7592087388038635,
|
|
"learning_rate": 8.172841122596951e-06,
|
|
"loss": 0.8052394390106201,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 1.744895269379727,
|
|
"grad_norm": 0.6938197016716003,
|
|
"learning_rate": 8.158784250241226e-06,
|
|
"loss": 0.7313718795776367,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 1.746192996329866,
|
|
"grad_norm": 0.6459118723869324,
|
|
"learning_rate": 8.144731142705693e-06,
|
|
"loss": 0.632814884185791,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 1.7474907232800048,
|
|
"grad_norm": 0.6704484820365906,
|
|
"learning_rate": 8.130681828725394e-06,
|
|
"loss": 0.6906111836433411,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 1.7487884502301436,
|
|
"grad_norm": 0.6933112144470215,
|
|
"learning_rate": 8.116636337027626e-06,
|
|
"loss": 0.6973313093185425,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 1.7500861771802827,
|
|
"grad_norm": 0.6778403520584106,
|
|
"learning_rate": 8.10259469633186e-06,
|
|
"loss": 0.7237393260002136,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 1.7513839041304216,
|
|
"grad_norm": 0.7188864946365356,
|
|
"learning_rate": 8.0885569353497e-06,
|
|
"loss": 0.694682776927948,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 1.7526816310805604,
|
|
"grad_norm": 0.7305310368537903,
|
|
"learning_rate": 8.07452308278481e-06,
|
|
"loss": 0.7369967103004456,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 1.7539793580306995,
|
|
"grad_norm": 0.6883519291877747,
|
|
"learning_rate": 8.060493167332874e-06,
|
|
"loss": 0.6693746447563171,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 1.7552770849808383,
|
|
"grad_norm": 0.6861468553543091,
|
|
"learning_rate": 8.04646721768151e-06,
|
|
"loss": 0.7269149422645569,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 1.7565748119309772,
|
|
"grad_norm": 0.6963792443275452,
|
|
"learning_rate": 8.032445262510241e-06,
|
|
"loss": 0.7375723123550415,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 1.757872538881116,
|
|
"grad_norm": 0.70611572265625,
|
|
"learning_rate": 8.018427330490411e-06,
|
|
"loss": 0.6536609530448914,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 1.759170265831255,
|
|
"grad_norm": 0.6833199262619019,
|
|
"learning_rate": 8.004413450285147e-06,
|
|
"loss": 0.7803836464881897,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 1.7604679927813938,
|
|
"grad_norm": 0.7367565631866455,
|
|
"learning_rate": 7.990403650549285e-06,
|
|
"loss": 0.7431750893592834,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 1.7617657197315326,
|
|
"grad_norm": 0.7168142795562744,
|
|
"learning_rate": 7.976397959929324e-06,
|
|
"loss": 0.708920955657959,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 1.7630634466816715,
|
|
"grad_norm": 0.7081824541091919,
|
|
"learning_rate": 7.962396407063346e-06,
|
|
"loss": 0.7360220551490784,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 1.7643611736318106,
|
|
"grad_norm": 0.7008010149002075,
|
|
"learning_rate": 7.948399020580995e-06,
|
|
"loss": 0.6721465587615967,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 1.7656589005819494,
|
|
"grad_norm": 0.7550066709518433,
|
|
"learning_rate": 7.934405829103376e-06,
|
|
"loss": 0.7266613245010376,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 1.7669566275320885,
|
|
"grad_norm": 0.713932454586029,
|
|
"learning_rate": 7.920416861243028e-06,
|
|
"loss": 0.7003293037414551,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 1.7682543544822273,
|
|
"grad_norm": 0.6848137378692627,
|
|
"learning_rate": 7.906432145603844e-06,
|
|
"loss": 0.7255281805992126,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 1.7695520814323662,
|
|
"grad_norm": 0.7302910685539246,
|
|
"learning_rate": 7.892451710781035e-06,
|
|
"loss": 0.7285719513893127,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 1.770849808382505,
|
|
"grad_norm": 0.7387238144874573,
|
|
"learning_rate": 7.878475585361045e-06,
|
|
"loss": 0.7333699464797974,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 1.772147535332644,
|
|
"grad_norm": 0.7755225300788879,
|
|
"learning_rate": 7.864503797921518e-06,
|
|
"loss": 0.7592843770980835,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 1.7734452622827828,
|
|
"grad_norm": 0.6892391443252563,
|
|
"learning_rate": 7.850536377031221e-06,
|
|
"loss": 0.7412334084510803,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 1.7747429892329216,
|
|
"grad_norm": 0.7299293279647827,
|
|
"learning_rate": 7.836573351249996e-06,
|
|
"loss": 0.7442951798439026,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 1.7760407161830605,
|
|
"grad_norm": 0.6848152875900269,
|
|
"learning_rate": 7.822614749128692e-06,
|
|
"loss": 0.6193121671676636,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 1.7773384431331996,
|
|
"grad_norm": 0.6931573748588562,
|
|
"learning_rate": 7.808660599209124e-06,
|
|
"loss": 0.7440711259841919,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 1.7786361700833384,
|
|
"grad_norm": 0.7260693907737732,
|
|
"learning_rate": 7.794710930023993e-06,
|
|
"loss": 0.7359597682952881,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 1.7799338970334773,
|
|
"grad_norm": 0.705436646938324,
|
|
"learning_rate": 7.78076577009684e-06,
|
|
"loss": 0.6207844614982605,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 1.7812316239836163,
|
|
"grad_norm": 0.6740301847457886,
|
|
"learning_rate": 7.76682514794199e-06,
|
|
"loss": 0.6975910663604736,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 1.7825293509337552,
|
|
"grad_norm": 0.6805901527404785,
|
|
"learning_rate": 7.752889092064484e-06,
|
|
"loss": 0.671751081943512,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 1.783827077883894,
|
|
"grad_norm": 0.7223953604698181,
|
|
"learning_rate": 7.738957630960037e-06,
|
|
"loss": 0.6885688900947571,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 1.785124804834033,
|
|
"grad_norm": 0.6852001547813416,
|
|
"learning_rate": 7.725030793114952e-06,
|
|
"loss": 0.7190781831741333,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 1.7864225317841718,
|
|
"grad_norm": 0.7344854474067688,
|
|
"learning_rate": 7.711108607006094e-06,
|
|
"loss": 0.7325436472892761,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 1.7877202587343106,
|
|
"grad_norm": 0.7047913670539856,
|
|
"learning_rate": 7.697191101100802e-06,
|
|
"loss": 0.7324240803718567,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 1.7890179856844495,
|
|
"grad_norm": 0.7197734713554382,
|
|
"learning_rate": 7.683278303856862e-06,
|
|
"loss": 0.7601778507232666,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 1.7903157126345883,
|
|
"grad_norm": 0.6842553615570068,
|
|
"learning_rate": 7.669370243722415e-06,
|
|
"loss": 0.7301578521728516,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 1.7903157126345883,
|
|
"eval_loss": 0.7464115023612976,
|
|
"eval_runtime": 143.4981,
|
|
"eval_samples_per_second": 36.182,
|
|
"eval_steps_per_second": 9.045,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 1.7916134395847274,
|
|
"grad_norm": 0.6962341666221619,
|
|
"learning_rate": 7.655466949135932e-06,
|
|
"loss": 0.7249746918678284,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 1.7929111665348663,
|
|
"grad_norm": 0.6840744018554688,
|
|
"learning_rate": 7.641568448526122e-06,
|
|
"loss": 0.6648120880126953,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 1.7942088934850051,
|
|
"grad_norm": 0.7047871947288513,
|
|
"learning_rate": 7.627674770311909e-06,
|
|
"loss": 0.6969434022903442,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 1.7955066204351442,
|
|
"grad_norm": 0.716124951839447,
|
|
"learning_rate": 7.613785942902343e-06,
|
|
"loss": 0.7197269201278687,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 1.796804347385283,
|
|
"grad_norm": 0.6727207899093628,
|
|
"learning_rate": 7.599901994696566e-06,
|
|
"loss": 0.6794359683990479,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 1.798102074335422,
|
|
"grad_norm": 0.6976568698883057,
|
|
"learning_rate": 7.586022954083731e-06,
|
|
"loss": 0.6372778415679932,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 1.7993998012855608,
|
|
"grad_norm": 0.683164656162262,
|
|
"learning_rate": 7.572148849442971e-06,
|
|
"loss": 0.6731259226799011,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 1.8006975282356996,
|
|
"grad_norm": 0.6801917552947998,
|
|
"learning_rate": 7.5582797091433105e-06,
|
|
"loss": 0.6921297907829285,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 1.8019952551858385,
|
|
"grad_norm": 0.7587413191795349,
|
|
"learning_rate": 7.544415561543639e-06,
|
|
"loss": 0.7684265971183777,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 1.8032929821359773,
|
|
"grad_norm": 0.7493230700492859,
|
|
"learning_rate": 7.5305564349926215e-06,
|
|
"loss": 0.6984431147575378,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 1.8045907090861162,
|
|
"grad_norm": 0.6897554993629456,
|
|
"learning_rate": 7.516702357828672e-06,
|
|
"loss": 0.739819347858429,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 1.8058884360362553,
|
|
"grad_norm": 0.6832559704780579,
|
|
"learning_rate": 7.502853358379865e-06,
|
|
"loss": 0.6518275141716003,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 1.8071861629863941,
|
|
"grad_norm": 0.7185218334197998,
|
|
"learning_rate": 7.489009464963903e-06,
|
|
"loss": 0.7867194414138794,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 1.8084838899365332,
|
|
"grad_norm": 0.6737310886383057,
|
|
"learning_rate": 7.475170705888042e-06,
|
|
"loss": 0.6979063749313354,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 1.809781616886672,
|
|
"grad_norm": 0.713076651096344,
|
|
"learning_rate": 7.461337109449045e-06,
|
|
"loss": 0.7293301224708557,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 1.811079343836811,
|
|
"grad_norm": 0.700568675994873,
|
|
"learning_rate": 7.447508703933109e-06,
|
|
"loss": 0.6935805678367615,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 1.8123770707869498,
|
|
"grad_norm": 0.7034053802490234,
|
|
"learning_rate": 7.433685517615831e-06,
|
|
"loss": 0.7284054160118103,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 1.8136747977370886,
|
|
"grad_norm": 0.6562127470970154,
|
|
"learning_rate": 7.4198675787621185e-06,
|
|
"loss": 0.721833348274231,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 1.8149725246872275,
|
|
"grad_norm": 0.6957826614379883,
|
|
"learning_rate": 7.406054915626172e-06,
|
|
"loss": 0.6763690114021301,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 1.8162702516373663,
|
|
"grad_norm": 0.758056104183197,
|
|
"learning_rate": 7.392247556451382e-06,
|
|
"loss": 0.7644186615943909,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 1.8175679785875052,
|
|
"grad_norm": 0.6855806708335876,
|
|
"learning_rate": 7.378445529470303e-06,
|
|
"loss": 0.7499503493309021,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 1.8188657055376443,
|
|
"grad_norm": 0.7280805706977844,
|
|
"learning_rate": 7.364648862904593e-06,
|
|
"loss": 0.7766327261924744,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 1.8201634324877831,
|
|
"grad_norm": 0.7023898959159851,
|
|
"learning_rate": 7.35085758496494e-06,
|
|
"loss": 0.6799028515815735,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 1.821461159437922,
|
|
"grad_norm": 0.696554958820343,
|
|
"learning_rate": 7.337071723851018e-06,
|
|
"loss": 0.6930332183837891,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 1.822758886388061,
|
|
"grad_norm": 0.7462826371192932,
|
|
"learning_rate": 7.323291307751418e-06,
|
|
"loss": 0.7603926658630371,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 1.8240566133382,
|
|
"grad_norm": 0.6899564266204834,
|
|
"learning_rate": 7.3095163648436115e-06,
|
|
"loss": 0.6602949500083923,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 1.8253543402883388,
|
|
"grad_norm": 0.7230206727981567,
|
|
"learning_rate": 7.295746923293865e-06,
|
|
"loss": 0.7429470419883728,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 1.8266520672384776,
|
|
"grad_norm": 0.6691879034042358,
|
|
"learning_rate": 7.2819830112572035e-06,
|
|
"loss": 0.7018039226531982,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 1.8279497941886165,
|
|
"grad_norm": 0.7611459493637085,
|
|
"learning_rate": 7.268224656877339e-06,
|
|
"loss": 0.7324895262718201,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 1.8292475211387553,
|
|
"grad_norm": 0.7313300967216492,
|
|
"learning_rate": 7.25447188828663e-06,
|
|
"loss": 0.7643807530403137,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 1.8305452480888942,
|
|
"grad_norm": 0.7345109581947327,
|
|
"learning_rate": 7.240724733606002e-06,
|
|
"loss": 0.7648757696151733,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 1.831842975039033,
|
|
"grad_norm": 0.6995144486427307,
|
|
"learning_rate": 7.2269832209449145e-06,
|
|
"loss": 0.6826534271240234,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 1.8331407019891721,
|
|
"grad_norm": 0.6842563152313232,
|
|
"learning_rate": 7.213247378401274e-06,
|
|
"loss": 0.7718407511711121,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 1.834438428939311,
|
|
"grad_norm": 0.6925626397132874,
|
|
"learning_rate": 7.199517234061408e-06,
|
|
"loss": 0.7063374519348145,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 1.83573615588945,
|
|
"grad_norm": 0.7153764963150024,
|
|
"learning_rate": 7.1857928159999814e-06,
|
|
"loss": 0.7116506695747375,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 1.837033882839589,
|
|
"grad_norm": 0.7008180022239685,
|
|
"learning_rate": 7.172074152279963e-06,
|
|
"loss": 0.6926634311676025,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 1.8383316097897278,
|
|
"grad_norm": 0.695785641670227,
|
|
"learning_rate": 7.1583612709525405e-06,
|
|
"loss": 0.7824428081512451,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 1.8396293367398666,
|
|
"grad_norm": 0.7137957215309143,
|
|
"learning_rate": 7.14465420005709e-06,
|
|
"loss": 0.7480607032775879,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 1.8409270636900055,
|
|
"grad_norm": 0.6970608234405518,
|
|
"learning_rate": 7.130952967621096e-06,
|
|
"loss": 0.6973427534103394,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 1.8422247906401443,
|
|
"grad_norm": 0.7116836309432983,
|
|
"learning_rate": 7.11725760166012e-06,
|
|
"loss": 0.7084696292877197,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 1.8435225175902832,
|
|
"grad_norm": 0.7125561833381653,
|
|
"learning_rate": 7.103568130177713e-06,
|
|
"loss": 0.6803657412528992,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 1.844820244540422,
|
|
"grad_norm": 0.66914963722229,
|
|
"learning_rate": 7.089884581165382e-06,
|
|
"loss": 0.6364957690238953,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 1.8461179714905611,
|
|
"grad_norm": 0.7396631240844727,
|
|
"learning_rate": 7.076206982602516e-06,
|
|
"loss": 0.7236162424087524,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 1.8474156984407,
|
|
"grad_norm": 0.7191373109817505,
|
|
"learning_rate": 7.06253536245635e-06,
|
|
"loss": 0.7462475895881653,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 1.8487134253908388,
|
|
"grad_norm": 0.7262799143791199,
|
|
"learning_rate": 7.048869748681879e-06,
|
|
"loss": 0.7678788900375366,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 1.850011152340978,
|
|
"grad_norm": 0.7085245847702026,
|
|
"learning_rate": 7.035210169221834e-06,
|
|
"loss": 0.7576820850372314,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 1.8513088792911168,
|
|
"grad_norm": 0.7027114629745483,
|
|
"learning_rate": 7.021556652006588e-06,
|
|
"loss": 0.755644679069519,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 1.8526066062412556,
|
|
"grad_norm": 0.6858870387077332,
|
|
"learning_rate": 7.007909224954135e-06,
|
|
"loss": 0.7338079810142517,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 1.8539043331913945,
|
|
"grad_norm": 0.7013359069824219,
|
|
"learning_rate": 6.994267915970003e-06,
|
|
"loss": 0.7038964033126831,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 1.8552020601415333,
|
|
"grad_norm": 0.7172896265983582,
|
|
"learning_rate": 6.980632752947221e-06,
|
|
"loss": 0.7479324340820312,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 1.8564997870916722,
|
|
"grad_norm": 0.7214548587799072,
|
|
"learning_rate": 6.967003763766247e-06,
|
|
"loss": 0.7139613032341003,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 1.857797514041811,
|
|
"grad_norm": 0.730970025062561,
|
|
"learning_rate": 6.953380976294907e-06,
|
|
"loss": 0.765926718711853,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 1.85909524099195,
|
|
"grad_norm": 0.6703609824180603,
|
|
"learning_rate": 6.9397644183883616e-06,
|
|
"loss": 0.7193933129310608,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 1.860392967942089,
|
|
"grad_norm": 0.6499923467636108,
|
|
"learning_rate": 6.926154117889022e-06,
|
|
"loss": 0.6723966002464294,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 1.8616906948922278,
|
|
"grad_norm": 0.7143534421920776,
|
|
"learning_rate": 6.91255010262651e-06,
|
|
"loss": 0.7171000838279724,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 1.8629884218423667,
|
|
"grad_norm": 0.6932517290115356,
|
|
"learning_rate": 6.898952400417587e-06,
|
|
"loss": 0.6997263431549072,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 1.8642861487925058,
|
|
"grad_norm": 0.7429547905921936,
|
|
"learning_rate": 6.885361039066121e-06,
|
|
"loss": 0.780619204044342,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 1.8655838757426446,
|
|
"grad_norm": 0.7190982699394226,
|
|
"learning_rate": 6.8717760463629965e-06,
|
|
"loss": 0.7348355054855347,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 1.8668816026927835,
|
|
"grad_norm": 0.7007834315299988,
|
|
"learning_rate": 6.858197450086097e-06,
|
|
"loss": 0.7280945181846619,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 1.8681793296429223,
|
|
"grad_norm": 0.7208773493766785,
|
|
"learning_rate": 6.844625278000205e-06,
|
|
"loss": 0.775151252746582,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 1.8694770565930612,
|
|
"grad_norm": 0.6837726831436157,
|
|
"learning_rate": 6.831059557856984e-06,
|
|
"loss": 0.7308005094528198,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 1.8707747835432,
|
|
"grad_norm": 0.6819126009941101,
|
|
"learning_rate": 6.81750031739489e-06,
|
|
"loss": 0.6529159545898438,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 1.872072510493339,
|
|
"grad_norm": 0.6784840226173401,
|
|
"learning_rate": 6.803947584339148e-06,
|
|
"loss": 0.6919572949409485,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 1.8733702374434777,
|
|
"grad_norm": 0.6869913935661316,
|
|
"learning_rate": 6.79040138640166e-06,
|
|
"loss": 0.6871669888496399,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 1.8746679643936168,
|
|
"grad_norm": 0.7124300599098206,
|
|
"learning_rate": 6.7768617512809745e-06,
|
|
"loss": 0.7206623554229736,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 1.8759656913437557,
|
|
"grad_norm": 0.71539306640625,
|
|
"learning_rate": 6.763328706662214e-06,
|
|
"loss": 0.7108519673347473,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 1.8772634182938948,
|
|
"grad_norm": 0.7159188985824585,
|
|
"learning_rate": 6.749802280217037e-06,
|
|
"loss": 0.7131993770599365,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 1.8785611452440336,
|
|
"grad_norm": 0.722147524356842,
|
|
"learning_rate": 6.7362824996035545e-06,
|
|
"loss": 0.6998387575149536,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 1.8798588721941725,
|
|
"grad_norm": 0.7286826968193054,
|
|
"learning_rate": 6.722769392466304e-06,
|
|
"loss": 0.7367603778839111,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 1.8811565991443113,
|
|
"grad_norm": 0.7212167382240295,
|
|
"learning_rate": 6.709262986436162e-06,
|
|
"loss": 0.7357022762298584,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 1.8824543260944502,
|
|
"grad_norm": 0.7026610374450684,
|
|
"learning_rate": 6.695763309130318e-06,
|
|
"loss": 0.7126086354255676,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 1.883752053044589,
|
|
"grad_norm": 0.7145894169807434,
|
|
"learning_rate": 6.682270388152185e-06,
|
|
"loss": 0.6773615479469299,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 1.885049779994728,
|
|
"grad_norm": 0.7049593925476074,
|
|
"learning_rate": 6.668784251091381e-06,
|
|
"loss": 0.6776928305625916,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 1.8863475069448667,
|
|
"grad_norm": 0.699505627155304,
|
|
"learning_rate": 6.655304925523635e-06,
|
|
"loss": 0.6610416173934937,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 1.8876452338950058,
|
|
"grad_norm": 0.7056293487548828,
|
|
"learning_rate": 6.641832439010765e-06,
|
|
"loss": 0.6919702291488647,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 1.8889429608451447,
|
|
"grad_norm": 0.702669084072113,
|
|
"learning_rate": 6.628366819100586e-06,
|
|
"loss": 0.682940661907196,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 1.8902406877952835,
|
|
"grad_norm": 0.6931704878807068,
|
|
"learning_rate": 6.614908093326891e-06,
|
|
"loss": 0.7477650046348572,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 1.8915384147454226,
|
|
"grad_norm": 0.7257412075996399,
|
|
"learning_rate": 6.601456289209362e-06,
|
|
"loss": 0.774404764175415,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 1.8928361416955615,
|
|
"grad_norm": 0.6645631194114685,
|
|
"learning_rate": 6.588011434253534e-06,
|
|
"loss": 0.647753119468689,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 1.8941338686457003,
|
|
"grad_norm": 0.70735102891922,
|
|
"learning_rate": 6.574573555950738e-06,
|
|
"loss": 0.6710544228553772,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 1.8954315955958392,
|
|
"grad_norm": 0.7064939141273499,
|
|
"learning_rate": 6.561142681778027e-06,
|
|
"loss": 0.6929414868354797,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 1.896729322545978,
|
|
"grad_norm": 0.6896395683288574,
|
|
"learning_rate": 6.547718839198145e-06,
|
|
"loss": 0.6804373264312744,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 1.898027049496117,
|
|
"grad_norm": 0.726024329662323,
|
|
"learning_rate": 6.53430205565945e-06,
|
|
"loss": 0.7252693772315979,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 1.8993247764462557,
|
|
"grad_norm": 0.7104306817054749,
|
|
"learning_rate": 6.520892358595869e-06,
|
|
"loss": 0.7321268916130066,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 1.9006225033963946,
|
|
"grad_norm": 0.680915892124176,
|
|
"learning_rate": 6.507489775426834e-06,
|
|
"loss": 0.7166538238525391,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 1.9019202303465337,
|
|
"grad_norm": 0.7132366895675659,
|
|
"learning_rate": 6.494094333557243e-06,
|
|
"loss": 0.708162784576416,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 1.9032179572966725,
|
|
"grad_norm": 0.6515333652496338,
|
|
"learning_rate": 6.4807060603773795e-06,
|
|
"loss": 0.7163029313087463,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 1.9045156842468114,
|
|
"grad_norm": 0.7042413353919983,
|
|
"learning_rate": 6.467324983262877e-06,
|
|
"loss": 0.6881014704704285,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 1.9058134111969505,
|
|
"grad_norm": 0.6660881042480469,
|
|
"learning_rate": 6.453951129574644e-06,
|
|
"loss": 0.678939938545227,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 1.9071111381470893,
|
|
"grad_norm": 0.7373862266540527,
|
|
"learning_rate": 6.4405845266588356e-06,
|
|
"loss": 0.7181136608123779,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 1.9084088650972282,
|
|
"grad_norm": 0.7122411727905273,
|
|
"learning_rate": 6.427225201846763e-06,
|
|
"loss": 0.6904677748680115,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 1.909706592047367,
|
|
"grad_norm": 0.7414330244064331,
|
|
"learning_rate": 6.413873182454873e-06,
|
|
"loss": 0.7363246083259583,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 1.911004318997506,
|
|
"grad_norm": 0.6871086359024048,
|
|
"learning_rate": 6.4005284957846546e-06,
|
|
"loss": 0.6799793243408203,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 1.9123020459476447,
|
|
"grad_norm": 0.7056854963302612,
|
|
"learning_rate": 6.3871911691226276e-06,
|
|
"loss": 0.7036612033843994,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 1.9135997728977836,
|
|
"grad_norm": 0.7454568147659302,
|
|
"learning_rate": 6.373861229740237e-06,
|
|
"loss": 0.7416712045669556,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 1.9148974998479225,
|
|
"grad_norm": 0.6941256523132324,
|
|
"learning_rate": 6.360538704893845e-06,
|
|
"loss": 0.6659767031669617,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 1.9161952267980615,
|
|
"grad_norm": 0.7420505881309509,
|
|
"learning_rate": 6.3472236218246366e-06,
|
|
"loss": 0.7747020721435547,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 1.9174929537482004,
|
|
"grad_norm": 0.7113460302352905,
|
|
"learning_rate": 6.333916007758591e-06,
|
|
"loss": 0.7053021788597107,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 1.9187906806983395,
|
|
"grad_norm": 0.7145473957061768,
|
|
"learning_rate": 6.320615889906403e-06,
|
|
"loss": 0.7014235258102417,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 1.9200884076484783,
|
|
"grad_norm": 0.7099266052246094,
|
|
"learning_rate": 6.307323295463457e-06,
|
|
"loss": 0.7599897980690002,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 1.9213861345986172,
|
|
"grad_norm": 0.7005822062492371,
|
|
"learning_rate": 6.294038251609738e-06,
|
|
"loss": 0.6990090608596802,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 1.922683861548756,
|
|
"grad_norm": 0.6796419620513916,
|
|
"learning_rate": 6.280760785509802e-06,
|
|
"loss": 0.6529797911643982,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 1.923981588498895,
|
|
"grad_norm": 0.7199534773826599,
|
|
"learning_rate": 6.2674909243127e-06,
|
|
"loss": 0.714480459690094,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 1.9252793154490337,
|
|
"grad_norm": 0.7127954959869385,
|
|
"learning_rate": 6.254228695151949e-06,
|
|
"loss": 0.7583557367324829,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 1.9265770423991726,
|
|
"grad_norm": 0.7017828226089478,
|
|
"learning_rate": 6.240974125145443e-06,
|
|
"loss": 0.6976377367973328,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 1.9278747693493115,
|
|
"grad_norm": 0.6956459283828735,
|
|
"learning_rate": 6.227727241395429e-06,
|
|
"loss": 0.7237988114356995,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 1.9291724962994505,
|
|
"grad_norm": 0.7250760197639465,
|
|
"learning_rate": 6.214488070988424e-06,
|
|
"loss": 0.705412745475769,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 1.9304702232495894,
|
|
"grad_norm": 0.72161465883255,
|
|
"learning_rate": 6.201256640995184e-06,
|
|
"loss": 0.6755847930908203,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 1.9317679501997282,
|
|
"grad_norm": 0.6741456389427185,
|
|
"learning_rate": 6.188032978470639e-06,
|
|
"loss": 0.7194631099700928,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 1.9330656771498673,
|
|
"grad_norm": 0.6884588003158569,
|
|
"learning_rate": 6.174817110453828e-06,
|
|
"loss": 0.6863330006599426,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 1.9343634041000062,
|
|
"grad_norm": 0.7027184963226318,
|
|
"learning_rate": 6.161609063967857e-06,
|
|
"loss": 0.7379326224327087,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 1.935661131050145,
|
|
"grad_norm": 0.7299201488494873,
|
|
"learning_rate": 6.1484088660198325e-06,
|
|
"loss": 0.7956094145774841,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 1.936958858000284,
|
|
"grad_norm": 0.7009000182151794,
|
|
"learning_rate": 6.135216543600828e-06,
|
|
"loss": 0.7050310373306274,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 1.9382565849504227,
|
|
"grad_norm": 0.7212353944778442,
|
|
"learning_rate": 6.1220321236857974e-06,
|
|
"loss": 0.7898357510566711,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 1.9395543119005616,
|
|
"grad_norm": 0.7044717669487,
|
|
"learning_rate": 6.108855633233546e-06,
|
|
"loss": 0.7022029757499695,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 1.9408520388507005,
|
|
"grad_norm": 0.6811977624893188,
|
|
"learning_rate": 6.0956870991866545e-06,
|
|
"loss": 0.6920107007026672,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 1.9421497658008393,
|
|
"grad_norm": 0.6873610019683838,
|
|
"learning_rate": 6.0825265484714526e-06,
|
|
"loss": 0.6889206767082214,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 1.9434474927509784,
|
|
"grad_norm": 0.7255538702011108,
|
|
"learning_rate": 6.0693740079979235e-06,
|
|
"loss": 0.763762891292572,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 1.9447452197011172,
|
|
"grad_norm": 0.6617857217788696,
|
|
"learning_rate": 6.056229504659696e-06,
|
|
"loss": 0.65453040599823,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 1.9460429466512563,
|
|
"grad_norm": 0.7204879522323608,
|
|
"learning_rate": 6.043093065333945e-06,
|
|
"loss": 0.6839476823806763,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 1.9473406736013952,
|
|
"grad_norm": 0.695447564125061,
|
|
"learning_rate": 6.029964716881367e-06,
|
|
"loss": 0.6658032536506653,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 1.948638400551534,
|
|
"grad_norm": 0.6816181540489197,
|
|
"learning_rate": 6.016844486146106e-06,
|
|
"loss": 0.7248274087905884,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 1.9499361275016729,
|
|
"grad_norm": 0.7379606366157532,
|
|
"learning_rate": 6.003732399955722e-06,
|
|
"loss": 0.6768795251846313,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 1.9512338544518117,
|
|
"grad_norm": 0.6998269557952881,
|
|
"learning_rate": 5.990628485121106e-06,
|
|
"loss": 0.6504592895507812,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 1.9525315814019506,
|
|
"grad_norm": 0.7351219654083252,
|
|
"learning_rate": 5.97753276843645e-06,
|
|
"loss": 0.7741858959197998,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 1.9538293083520895,
|
|
"grad_norm": 0.6803948283195496,
|
|
"learning_rate": 5.964445276679176e-06,
|
|
"loss": 0.6615405678749084,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 1.9551270353022283,
|
|
"grad_norm": 0.7318346500396729,
|
|
"learning_rate": 5.9513660366099005e-06,
|
|
"loss": 0.7087497115135193,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 1.9564247622523674,
|
|
"grad_norm": 0.7118584513664246,
|
|
"learning_rate": 5.93829507497235e-06,
|
|
"loss": 0.647581934928894,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 1.9577224892025062,
|
|
"grad_norm": 0.7135505080223083,
|
|
"learning_rate": 5.925232418493338e-06,
|
|
"loss": 0.7108398079872131,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 1.959020216152645,
|
|
"grad_norm": 0.6982471346855164,
|
|
"learning_rate": 5.912178093882688e-06,
|
|
"loss": 0.7022315859794617,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 1.9603179431027842,
|
|
"grad_norm": 0.7076136469841003,
|
|
"learning_rate": 5.8991321278331934e-06,
|
|
"loss": 0.6406600475311279,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 1.961615670052923,
|
|
"grad_norm": 0.7392069101333618,
|
|
"learning_rate": 5.8860945470205466e-06,
|
|
"loss": 0.7887027859687805,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 1.9629133970030619,
|
|
"grad_norm": 0.7483602166175842,
|
|
"learning_rate": 5.8730653781033085e-06,
|
|
"loss": 0.7219119668006897,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 1.9642111239532007,
|
|
"grad_norm": 0.7024926543235779,
|
|
"learning_rate": 5.860044647722827e-06,
|
|
"loss": 0.7041683793067932,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 1.9655088509033396,
|
|
"grad_norm": 0.6939775347709656,
|
|
"learning_rate": 5.847032382503202e-06,
|
|
"loss": 0.6798254251480103,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 1.9668065778534785,
|
|
"grad_norm": 0.700524628162384,
|
|
"learning_rate": 5.834028609051218e-06,
|
|
"loss": 0.731053352355957,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 1.9681043048036173,
|
|
"grad_norm": 0.7189422845840454,
|
|
"learning_rate": 5.8210333539563e-06,
|
|
"loss": 0.6871148347854614,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 1.9694020317537562,
|
|
"grad_norm": 0.7418919205665588,
|
|
"learning_rate": 5.808046643790468e-06,
|
|
"loss": 0.7469598054885864,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 1.9706997587038952,
|
|
"grad_norm": 0.6783238649368286,
|
|
"learning_rate": 5.795068505108243e-06,
|
|
"loss": 0.6897709369659424,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 1.971997485654034,
|
|
"grad_norm": 0.7016989588737488,
|
|
"learning_rate": 5.782098964446641e-06,
|
|
"loss": 0.6978930830955505,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 1.973295212604173,
|
|
"grad_norm": 0.6924634575843811,
|
|
"learning_rate": 5.769138048325087e-06,
|
|
"loss": 0.6557913422584534,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 1.974592939554312,
|
|
"grad_norm": 0.6980036497116089,
|
|
"learning_rate": 5.756185783245376e-06,
|
|
"loss": 0.6883926391601562,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 1.9758906665044509,
|
|
"grad_norm": 0.6666119694709778,
|
|
"learning_rate": 5.743242195691612e-06,
|
|
"loss": 0.696445107460022,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 1.9771883934545897,
|
|
"grad_norm": 0.7082392573356628,
|
|
"learning_rate": 5.730307312130152e-06,
|
|
"loss": 0.7830109596252441,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 1.9784861204047286,
|
|
"grad_norm": 0.7415315508842468,
|
|
"learning_rate": 5.717381159009563e-06,
|
|
"loss": 0.6982215642929077,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 1.9797838473548675,
|
|
"grad_norm": 0.7484350800514221,
|
|
"learning_rate": 5.704463762760559e-06,
|
|
"loss": 0.727252721786499,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 1.9810815743050063,
|
|
"grad_norm": 0.6809999346733093,
|
|
"learning_rate": 5.691555149795933e-06,
|
|
"loss": 0.794657826423645,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 1.9823793012551452,
|
|
"grad_norm": 0.7138223648071289,
|
|
"learning_rate": 5.678655346510549e-06,
|
|
"loss": 0.7287296056747437,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 1.983677028205284,
|
|
"grad_norm": 0.6818944215774536,
|
|
"learning_rate": 5.6657643792812265e-06,
|
|
"loss": 0.6768350601196289,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 1.984974755155423,
|
|
"grad_norm": 0.7276642918586731,
|
|
"learning_rate": 5.652882274466736e-06,
|
|
"loss": 0.7598171830177307,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 1.986272482105562,
|
|
"grad_norm": 0.6802821159362793,
|
|
"learning_rate": 5.640009058407719e-06,
|
|
"loss": 0.682623028755188,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 1.987570209055701,
|
|
"grad_norm": 0.7515146732330322,
|
|
"learning_rate": 5.627144757426647e-06,
|
|
"loss": 0.7861851453781128,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 1.9888679360058399,
|
|
"grad_norm": 0.7353605628013611,
|
|
"learning_rate": 5.614289397827757e-06,
|
|
"loss": 0.7634737491607666,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 1.9901656629559787,
|
|
"grad_norm": 0.7560073137283325,
|
|
"learning_rate": 5.601443005897012e-06,
|
|
"loss": 0.7616620659828186,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 1.9914633899061176,
|
|
"grad_norm": 0.7289350628852844,
|
|
"learning_rate": 5.588605607902017e-06,
|
|
"loss": 0.7190179824829102,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 1.9927611168562565,
|
|
"grad_norm": 0.7019691467285156,
|
|
"learning_rate": 5.57577723009202e-06,
|
|
"loss": 0.671945333480835,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 1.9940588438063953,
|
|
"grad_norm": 0.6952185034751892,
|
|
"learning_rate": 5.5629578986977894e-06,
|
|
"loss": 0.7416089177131653,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 1.9953565707565342,
|
|
"grad_norm": 0.7558557987213135,
|
|
"learning_rate": 5.550147639931631e-06,
|
|
"loss": 0.7460814714431763,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 1.996654297706673,
|
|
"grad_norm": 0.6997542381286621,
|
|
"learning_rate": 5.537346479987269e-06,
|
|
"loss": 0.7162995338439941,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 1.997952024656812,
|
|
"grad_norm": 0.7319507002830505,
|
|
"learning_rate": 5.524554445039838e-06,
|
|
"loss": 0.7580918669700623,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 1.999249751606951,
|
|
"grad_norm": 0.7187158465385437,
|
|
"learning_rate": 5.511771561245813e-06,
|
|
"loss": 0.6829614043235779,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.8756005167961121,
|
|
"learning_rate": 5.498997854742956e-06,
|
|
"loss": 0.654055118560791,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 2.001297726950139,
|
|
"grad_norm": 0.884756326675415,
|
|
"learning_rate": 5.4862333516502634e-06,
|
|
"loss": 0.6550735831260681,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 2.0025954539002777,
|
|
"grad_norm": 0.8835470080375671,
|
|
"learning_rate": 5.473478078067913e-06,
|
|
"loss": 0.7326578497886658,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 2.0038931808504166,
|
|
"grad_norm": 0.8778272867202759,
|
|
"learning_rate": 5.460732060077212e-06,
|
|
"loss": 0.6050289273262024,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 2.0051909078005554,
|
|
"grad_norm": 0.7473064661026001,
|
|
"learning_rate": 5.44799532374054e-06,
|
|
"loss": 0.6881033182144165,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 2.0064886347506943,
|
|
"grad_norm": 0.7708891034126282,
|
|
"learning_rate": 5.435267895101303e-06,
|
|
"loss": 0.6227023005485535,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 2.0077863617008336,
|
|
"grad_norm": 0.7482177019119263,
|
|
"learning_rate": 5.422549800183861e-06,
|
|
"loss": 0.6618348360061646,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 2.0090840886509724,
|
|
"grad_norm": 0.7345021963119507,
|
|
"learning_rate": 5.409841064993512e-06,
|
|
"loss": 0.6520942449569702,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 2.0103818156011113,
|
|
"grad_norm": 0.7631828188896179,
|
|
"learning_rate": 5.39714171551639e-06,
|
|
"loss": 0.6233668923377991,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 2.01167954255125,
|
|
"grad_norm": 0.813840925693512,
|
|
"learning_rate": 5.384451777719464e-06,
|
|
"loss": 0.7311254739761353,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 2.012977269501389,
|
|
"grad_norm": 0.8413859009742737,
|
|
"learning_rate": 5.371771277550432e-06,
|
|
"loss": 0.7018522024154663,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 2.014274996451528,
|
|
"grad_norm": 0.7750846147537231,
|
|
"learning_rate": 5.359100240937717e-06,
|
|
"loss": 0.6850703954696655,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 2.0155727234016667,
|
|
"grad_norm": 0.7778939604759216,
|
|
"learning_rate": 5.3464386937903764e-06,
|
|
"loss": 0.6811778545379639,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 2.0168704503518056,
|
|
"grad_norm": 0.7875815033912659,
|
|
"learning_rate": 5.33378666199807e-06,
|
|
"loss": 0.6062582731246948,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 2.0181681773019444,
|
|
"grad_norm": 0.8213943839073181,
|
|
"learning_rate": 5.321144171431003e-06,
|
|
"loss": 0.6217991709709167,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 2.0194659042520833,
|
|
"grad_norm": 0.8762441873550415,
|
|
"learning_rate": 5.308511247939872e-06,
|
|
"loss": 0.6675798296928406,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 2.0207636312022226,
|
|
"grad_norm": 0.7665208578109741,
|
|
"learning_rate": 5.295887917355794e-06,
|
|
"loss": 0.6503481268882751,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 2.0220613581523614,
|
|
"grad_norm": 0.7740142941474915,
|
|
"learning_rate": 5.283274205490303e-06,
|
|
"loss": 0.6113878488540649,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 2.0233590851025003,
|
|
"grad_norm": 0.7948552966117859,
|
|
"learning_rate": 5.270670138135234e-06,
|
|
"loss": 0.7041577100753784,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 2.024656812052639,
|
|
"grad_norm": 0.732266366481781,
|
|
"learning_rate": 5.25807574106272e-06,
|
|
"loss": 0.683874785900116,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 2.025954539002778,
|
|
"grad_norm": 0.7335087060928345,
|
|
"learning_rate": 5.245491040025115e-06,
|
|
"loss": 0.6318987011909485,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 2.027252265952917,
|
|
"grad_norm": 0.7172908186912537,
|
|
"learning_rate": 5.232916060754947e-06,
|
|
"loss": 0.6631210446357727,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 2.0285499929030557,
|
|
"grad_norm": 0.7232309579849243,
|
|
"learning_rate": 5.220350828964865e-06,
|
|
"loss": 0.6236647367477417,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 2.0298477198531946,
|
|
"grad_norm": 0.727989137172699,
|
|
"learning_rate": 5.207795370347588e-06,
|
|
"loss": 0.6853646039962769,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 2.0311454468033334,
|
|
"grad_norm": 0.7468066215515137,
|
|
"learning_rate": 5.195249710575853e-06,
|
|
"loss": 0.6544186472892761,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 2.0324431737534723,
|
|
"grad_norm": 0.7399063110351562,
|
|
"learning_rate": 5.182713875302361e-06,
|
|
"loss": 0.6106476783752441,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 2.033740900703611,
|
|
"grad_norm": 0.7420501708984375,
|
|
"learning_rate": 5.1701878901597106e-06,
|
|
"loss": 0.715307891368866,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 2.0350386276537504,
|
|
"grad_norm": 0.7202077507972717,
|
|
"learning_rate": 5.157671780760385e-06,
|
|
"loss": 0.6406188607215881,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 2.0363363546038893,
|
|
"grad_norm": 0.7133172154426575,
|
|
"learning_rate": 5.145165572696652e-06,
|
|
"loss": 0.6294587850570679,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 2.037634081554028,
|
|
"grad_norm": 0.7211350798606873,
|
|
"learning_rate": 5.132669291540544e-06,
|
|
"loss": 0.6074943542480469,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 2.038931808504167,
|
|
"grad_norm": 0.7271124124526978,
|
|
"learning_rate": 5.1201829628437926e-06,
|
|
"loss": 0.6158304214477539,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 2.040229535454306,
|
|
"grad_norm": 0.7051241397857666,
|
|
"learning_rate": 5.107706612137776e-06,
|
|
"loss": 0.6632368564605713,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 2.0415272624044447,
|
|
"grad_norm": 0.7206335067749023,
|
|
"learning_rate": 5.095240264933486e-06,
|
|
"loss": 0.6133254766464233,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 2.0428249893545836,
|
|
"grad_norm": 0.7106805443763733,
|
|
"learning_rate": 5.082783946721434e-06,
|
|
"loss": 0.629423201084137,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 2.0441227163047224,
|
|
"grad_norm": 0.7104700207710266,
|
|
"learning_rate": 5.070337682971642e-06,
|
|
"loss": 0.6985434293746948,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 2.0454204432548613,
|
|
"grad_norm": 0.6845932006835938,
|
|
"learning_rate": 5.057901499133573e-06,
|
|
"loss": 0.6254795789718628,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 2.046718170205,
|
|
"grad_norm": 0.7214529514312744,
|
|
"learning_rate": 5.0454754206360705e-06,
|
|
"loss": 0.6072602868080139,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 2.048015897155139,
|
|
"grad_norm": 0.71996009349823,
|
|
"learning_rate": 5.033059472887322e-06,
|
|
"loss": 0.6534575819969177,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 2.0493136241052783,
|
|
"grad_norm": 0.7217608690261841,
|
|
"learning_rate": 5.0206536812748004e-06,
|
|
"loss": 0.6317112445831299,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 2.050611351055417,
|
|
"grad_norm": 0.7069404125213623,
|
|
"learning_rate": 5.008258071165202e-06,
|
|
"loss": 0.6474272608757019,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 2.051909078005556,
|
|
"grad_norm": 0.7298946976661682,
|
|
"learning_rate": 4.995872667904424e-06,
|
|
"loss": 0.6893925666809082,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 2.053206804955695,
|
|
"grad_norm": 0.750266432762146,
|
|
"learning_rate": 4.98349749681747e-06,
|
|
"loss": 0.6087015271186829,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 2.0545045319058337,
|
|
"grad_norm": 0.7133123278617859,
|
|
"learning_rate": 4.971132583208438e-06,
|
|
"loss": 0.624868631362915,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 2.0558022588559726,
|
|
"grad_norm": 0.7388240694999695,
|
|
"learning_rate": 4.958777952360445e-06,
|
|
"loss": 0.6425670981407166,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 2.0570999858061114,
|
|
"grad_norm": 0.7531347870826721,
|
|
"learning_rate": 4.946433629535585e-06,
|
|
"loss": 0.6272885799407959,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 2.0583977127562503,
|
|
"grad_norm": 0.7500084042549133,
|
|
"learning_rate": 4.934099639974874e-06,
|
|
"loss": 0.6620087027549744,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 2.059695439706389,
|
|
"grad_norm": 0.708791196346283,
|
|
"learning_rate": 4.921776008898198e-06,
|
|
"loss": 0.5606707334518433,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 2.060993166656528,
|
|
"grad_norm": 0.7260934114456177,
|
|
"learning_rate": 4.909462761504264e-06,
|
|
"loss": 0.67381352186203,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 2.0622908936066673,
|
|
"grad_norm": 0.6928997039794922,
|
|
"learning_rate": 4.897159922970551e-06,
|
|
"loss": 0.6307032704353333,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 2.063588620556806,
|
|
"grad_norm": 0.7362192869186401,
|
|
"learning_rate": 4.884867518453238e-06,
|
|
"loss": 0.6901969313621521,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 2.064886347506945,
|
|
"grad_norm": 0.722802460193634,
|
|
"learning_rate": 4.872585573087195e-06,
|
|
"loss": 0.7266512513160706,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 2.066184074457084,
|
|
"grad_norm": 0.7570728659629822,
|
|
"learning_rate": 4.860314111985881e-06,
|
|
"loss": 0.7014977335929871,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 2.0674818014072227,
|
|
"grad_norm": 0.7065424919128418,
|
|
"learning_rate": 4.848053160241333e-06,
|
|
"loss": 0.623349130153656,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 2.0687795283573616,
|
|
"grad_norm": 0.7208600044250488,
|
|
"learning_rate": 4.835802742924091e-06,
|
|
"loss": 0.6265473961830139,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 2.0700772553075004,
|
|
"grad_norm": 0.7267877459526062,
|
|
"learning_rate": 4.823562885083161e-06,
|
|
"loss": 0.6631119251251221,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 2.0713749822576393,
|
|
"grad_norm": 0.7265859842300415,
|
|
"learning_rate": 4.811333611745953e-06,
|
|
"loss": 0.655154824256897,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 2.072672709207778,
|
|
"grad_norm": 0.7422747015953064,
|
|
"learning_rate": 4.799114947918238e-06,
|
|
"loss": 0.6400114297866821,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 2.073970436157917,
|
|
"grad_norm": 0.7720977663993835,
|
|
"learning_rate": 4.786906918584083e-06,
|
|
"loss": 0.6592541337013245,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 2.075268163108056,
|
|
"grad_norm": 0.741809606552124,
|
|
"learning_rate": 4.774709548705831e-06,
|
|
"loss": 0.6636130213737488,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 2.076565890058195,
|
|
"grad_norm": 0.7521026730537415,
|
|
"learning_rate": 4.762522863224001e-06,
|
|
"loss": 0.6645440459251404,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 2.077863617008334,
|
|
"grad_norm": 0.735471248626709,
|
|
"learning_rate": 4.750346887057292e-06,
|
|
"loss": 0.6191429495811462,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 2.079161343958473,
|
|
"grad_norm": 0.7346929907798767,
|
|
"learning_rate": 4.738181645102493e-06,
|
|
"loss": 0.616767406463623,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 2.0804590709086117,
|
|
"grad_norm": 0.7322461605072021,
|
|
"learning_rate": 4.726027162234434e-06,
|
|
"loss": 0.6997534036636353,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 2.0817567978587506,
|
|
"grad_norm": 0.7436448335647583,
|
|
"learning_rate": 4.713883463305972e-06,
|
|
"loss": 0.6780825853347778,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 2.0830545248088894,
|
|
"grad_norm": 0.7452847361564636,
|
|
"learning_rate": 4.701750573147885e-06,
|
|
"loss": 0.6652136445045471,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 2.0843522517590283,
|
|
"grad_norm": 0.7359369993209839,
|
|
"learning_rate": 4.689628516568866e-06,
|
|
"loss": 0.676584780216217,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 2.085649978709167,
|
|
"grad_norm": 0.7257094979286194,
|
|
"learning_rate": 4.677517318355455e-06,
|
|
"loss": 0.6461347937583923,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 2.086947705659306,
|
|
"grad_norm": 0.7261176705360413,
|
|
"learning_rate": 4.6654170032719825e-06,
|
|
"loss": 0.6190035343170166,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 2.088245432609445,
|
|
"grad_norm": 0.7273695468902588,
|
|
"learning_rate": 4.6533275960605355e-06,
|
|
"loss": 0.6539610028266907,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 2.088245432609445,
|
|
"eval_loss": 0.7521457076072693,
|
|
"eval_runtime": 140.3222,
|
|
"eval_samples_per_second": 37.001,
|
|
"eval_steps_per_second": 9.25,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 2.089543159559584,
|
|
"grad_norm": 0.7120246291160583,
|
|
"learning_rate": 4.641249121440892e-06,
|
|
"loss": 0.6520042419433594,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 2.090840886509723,
|
|
"grad_norm": 0.7543119788169861,
|
|
"learning_rate": 4.629181604110464e-06,
|
|
"loss": 0.6681778430938721,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 2.092138613459862,
|
|
"grad_norm": 0.7003790736198425,
|
|
"learning_rate": 4.617125068744288e-06,
|
|
"loss": 0.5710310935974121,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 2.0934363404100007,
|
|
"grad_norm": 0.7836804986000061,
|
|
"learning_rate": 4.605079539994911e-06,
|
|
"loss": 0.686365008354187,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 2.0947340673601396,
|
|
"grad_norm": 0.7372239828109741,
|
|
"learning_rate": 4.593045042492404e-06,
|
|
"loss": 0.684090256690979,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 2.0960317943102784,
|
|
"grad_norm": 0.7506935000419617,
|
|
"learning_rate": 4.581021600844258e-06,
|
|
"loss": 0.6425600647926331,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 2.0973295212604173,
|
|
"grad_norm": 0.7384741306304932,
|
|
"learning_rate": 4.569009239635374e-06,
|
|
"loss": 0.675249457359314,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 2.098627248210556,
|
|
"grad_norm": 0.7220048308372498,
|
|
"learning_rate": 4.557007983427987e-06,
|
|
"loss": 0.6857472658157349,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 2.099924975160695,
|
|
"grad_norm": 0.7698497772216797,
|
|
"learning_rate": 4.54501785676163e-06,
|
|
"loss": 0.6067232489585876,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 2.101222702110834,
|
|
"grad_norm": 0.7213151454925537,
|
|
"learning_rate": 4.533038884153077e-06,
|
|
"loss": 0.7489792704582214,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 2.1025204290609727,
|
|
"grad_norm": 0.7353917956352234,
|
|
"learning_rate": 4.521071090096298e-06,
|
|
"loss": 0.6004921793937683,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 2.103818156011112,
|
|
"grad_norm": 0.712821364402771,
|
|
"learning_rate": 4.509114499062393e-06,
|
|
"loss": 0.632519006729126,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 2.105115882961251,
|
|
"grad_norm": 0.7335408926010132,
|
|
"learning_rate": 4.4971691354995795e-06,
|
|
"loss": 0.6487690210342407,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 2.1064136099113897,
|
|
"grad_norm": 0.7657801508903503,
|
|
"learning_rate": 4.485235023833087e-06,
|
|
"loss": 0.7272740602493286,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 2.1077113368615286,
|
|
"grad_norm": 0.7787186503410339,
|
|
"learning_rate": 4.4733121884651665e-06,
|
|
"loss": 0.6530774235725403,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 2.1090090638116674,
|
|
"grad_norm": 0.7693159580230713,
|
|
"learning_rate": 4.46140065377499e-06,
|
|
"loss": 0.6131106019020081,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 2.1103067907618063,
|
|
"grad_norm": 0.7225230932235718,
|
|
"learning_rate": 4.449500444118633e-06,
|
|
"loss": 0.6403114199638367,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 2.111604517711945,
|
|
"grad_norm": 0.7100993990898132,
|
|
"learning_rate": 4.437611583829014e-06,
|
|
"loss": 0.6448891162872314,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 2.112902244662084,
|
|
"grad_norm": 0.6913020610809326,
|
|
"learning_rate": 4.42573409721584e-06,
|
|
"loss": 0.6105331778526306,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 2.114199971612223,
|
|
"grad_norm": 0.7184289693832397,
|
|
"learning_rate": 4.413868008565569e-06,
|
|
"loss": 0.6300491690635681,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 2.1154976985623617,
|
|
"grad_norm": 0.7327896356582642,
|
|
"learning_rate": 4.402013342141347e-06,
|
|
"loss": 0.5891982316970825,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 2.1167954255125006,
|
|
"grad_norm": 0.7524354457855225,
|
|
"learning_rate": 4.390170122182965e-06,
|
|
"loss": 0.6236910820007324,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 2.11809315246264,
|
|
"grad_norm": 0.69328373670578,
|
|
"learning_rate": 4.378338372906813e-06,
|
|
"loss": 0.6320694088935852,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 2.1193908794127787,
|
|
"grad_norm": 0.7765412926673889,
|
|
"learning_rate": 4.3665181185058255e-06,
|
|
"loss": 0.6867218613624573,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 2.1206886063629176,
|
|
"grad_norm": 0.7132006883621216,
|
|
"learning_rate": 4.354709383149421e-06,
|
|
"loss": 0.6264625787734985,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 2.1219863333130564,
|
|
"grad_norm": 0.7659435272216797,
|
|
"learning_rate": 4.342912190983487e-06,
|
|
"loss": 0.7046580910682678,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 2.1232840602631953,
|
|
"grad_norm": 0.7297986149787903,
|
|
"learning_rate": 4.331126566130284e-06,
|
|
"loss": 0.7077990174293518,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 2.124581787213334,
|
|
"grad_norm": 0.7537614107131958,
|
|
"learning_rate": 4.319352532688444e-06,
|
|
"loss": 0.652155876159668,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 2.125879514163473,
|
|
"grad_norm": 0.7315341234207153,
|
|
"learning_rate": 4.3075901147328745e-06,
|
|
"loss": 0.6733738780021667,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 2.127177241113612,
|
|
"grad_norm": 0.7361832857131958,
|
|
"learning_rate": 4.295839336314749e-06,
|
|
"loss": 0.635147750377655,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 2.1284749680637507,
|
|
"grad_norm": 0.7507902383804321,
|
|
"learning_rate": 4.284100221461432e-06,
|
|
"loss": 0.6047714948654175,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 2.1297726950138895,
|
|
"grad_norm": 0.7528434991836548,
|
|
"learning_rate": 4.272372794176446e-06,
|
|
"loss": 0.7513724565505981,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 2.1310704219640284,
|
|
"grad_norm": 0.7637490034103394,
|
|
"learning_rate": 4.260657078439409e-06,
|
|
"loss": 0.67987060546875,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 2.1323681489141677,
|
|
"grad_norm": 0.7283375859260559,
|
|
"learning_rate": 4.248953098205997e-06,
|
|
"loss": 0.6341656446456909,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 2.1336658758643066,
|
|
"grad_norm": 0.7419525980949402,
|
|
"learning_rate": 4.237260877407878e-06,
|
|
"loss": 0.6832218766212463,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 2.1349636028144454,
|
|
"grad_norm": 0.7223761081695557,
|
|
"learning_rate": 4.225580439952699e-06,
|
|
"loss": 0.6866045594215393,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 2.1362613297645843,
|
|
"grad_norm": 0.7388637065887451,
|
|
"learning_rate": 4.213911809723987e-06,
|
|
"loss": 0.6384668350219727,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 2.137559056714723,
|
|
"grad_norm": 0.755170464515686,
|
|
"learning_rate": 4.20225501058114e-06,
|
|
"loss": 0.6708781123161316,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 2.138856783664862,
|
|
"grad_norm": 0.7287908792495728,
|
|
"learning_rate": 4.190610066359364e-06,
|
|
"loss": 0.6631587743759155,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 2.140154510615001,
|
|
"grad_norm": 0.7358418107032776,
|
|
"learning_rate": 4.1789770008696205e-06,
|
|
"loss": 0.6789165735244751,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 2.1414522375651397,
|
|
"grad_norm": 0.7651984691619873,
|
|
"learning_rate": 4.167355837898585e-06,
|
|
"loss": 0.7314514517784119,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 2.1427499645152785,
|
|
"grad_norm": 0.7463676333427429,
|
|
"learning_rate": 4.155746601208594e-06,
|
|
"loss": 0.6692876219749451,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 2.1440476914654174,
|
|
"grad_norm": 0.7222311496734619,
|
|
"learning_rate": 4.144149314537599e-06,
|
|
"loss": 0.6298620104789734,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 2.1453454184155567,
|
|
"grad_norm": 0.6989638805389404,
|
|
"learning_rate": 4.1325640015991185e-06,
|
|
"loss": 0.6444326043128967,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 2.1466431453656956,
|
|
"grad_norm": 0.7494760155677795,
|
|
"learning_rate": 4.120990686082174e-06,
|
|
"loss": 0.6625097990036011,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 2.1479408723158344,
|
|
"grad_norm": 0.7078225016593933,
|
|
"learning_rate": 4.109429391651283e-06,
|
|
"loss": 0.5881250500679016,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 2.1492385992659733,
|
|
"grad_norm": 0.767970621585846,
|
|
"learning_rate": 4.097880141946354e-06,
|
|
"loss": 0.6296786665916443,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 2.150536326216112,
|
|
"grad_norm": 0.7743704319000244,
|
|
"learning_rate": 4.08634296058268e-06,
|
|
"loss": 0.6085373759269714,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 2.151834053166251,
|
|
"grad_norm": 0.7132009267807007,
|
|
"learning_rate": 4.074817871150887e-06,
|
|
"loss": 0.6695290803909302,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 2.15313178011639,
|
|
"grad_norm": 0.7174614667892456,
|
|
"learning_rate": 4.063304897216856e-06,
|
|
"loss": 0.6345046758651733,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 2.1544295070665287,
|
|
"grad_norm": 0.756147027015686,
|
|
"learning_rate": 4.051804062321706e-06,
|
|
"loss": 0.6537505388259888,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 2.1557272340166675,
|
|
"grad_norm": 0.7213236093521118,
|
|
"learning_rate": 4.040315389981736e-06,
|
|
"loss": 0.702519953250885,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 2.1570249609668064,
|
|
"grad_norm": 0.7155364751815796,
|
|
"learning_rate": 4.028838903688372e-06,
|
|
"loss": 0.681422770023346,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 2.1583226879169457,
|
|
"grad_norm": 0.7463889122009277,
|
|
"learning_rate": 4.017374626908125e-06,
|
|
"loss": 0.6635671854019165,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 2.1596204148670846,
|
|
"grad_norm": 0.7302799820899963,
|
|
"learning_rate": 4.005922583082538e-06,
|
|
"loss": 0.6605507731437683,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 2.1609181418172234,
|
|
"grad_norm": 0.7709221243858337,
|
|
"learning_rate": 3.994482795628142e-06,
|
|
"loss": 0.6744245290756226,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 2.1622158687673623,
|
|
"grad_norm": 0.7545700669288635,
|
|
"learning_rate": 3.983055287936411e-06,
|
|
"loss": 0.7104499340057373,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 2.163513595717501,
|
|
"grad_norm": 0.7296931743621826,
|
|
"learning_rate": 3.971640083373696e-06,
|
|
"loss": 0.6586728096008301,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 2.16481132266764,
|
|
"grad_norm": 0.7653056383132935,
|
|
"learning_rate": 3.960237205281213e-06,
|
|
"loss": 0.6596845388412476,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 2.166109049617779,
|
|
"grad_norm": 0.740091860294342,
|
|
"learning_rate": 3.948846676974953e-06,
|
|
"loss": 0.6983301043510437,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 2.1674067765679177,
|
|
"grad_norm": 0.7317189574241638,
|
|
"learning_rate": 3.937468521745666e-06,
|
|
"loss": 0.6039131879806519,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 2.1687045035180565,
|
|
"grad_norm": 0.7543178200721741,
|
|
"learning_rate": 3.9261027628588e-06,
|
|
"loss": 0.7082279324531555,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 2.1700022304681954,
|
|
"grad_norm": 0.7396308779716492,
|
|
"learning_rate": 3.9147494235544544e-06,
|
|
"loss": 0.6432596445083618,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 2.1712999574183343,
|
|
"grad_norm": 0.7311068177223206,
|
|
"learning_rate": 3.903408527047336e-06,
|
|
"loss": 0.6383781433105469,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 2.1725976843684736,
|
|
"grad_norm": 0.7544176578521729,
|
|
"learning_rate": 3.892080096526707e-06,
|
|
"loss": 0.6584154367446899,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 2.1738954113186124,
|
|
"grad_norm": 0.7279508113861084,
|
|
"learning_rate": 3.880764155156339e-06,
|
|
"loss": 0.6078423261642456,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 2.1751931382687513,
|
|
"grad_norm": 0.7655706405639648,
|
|
"learning_rate": 3.8694607260744745e-06,
|
|
"loss": 0.716061532497406,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 2.17649086521889,
|
|
"grad_norm": 0.7374406456947327,
|
|
"learning_rate": 3.858169832393752e-06,
|
|
"loss": 0.6383547782897949,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 2.177788592169029,
|
|
"grad_norm": 0.7599214911460876,
|
|
"learning_rate": 3.846891497201206e-06,
|
|
"loss": 0.734661340713501,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 2.179086319119168,
|
|
"grad_norm": 0.7564613819122314,
|
|
"learning_rate": 3.835625743558168e-06,
|
|
"loss": 0.6974920630455017,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 2.1803840460693067,
|
|
"grad_norm": 0.7368860244750977,
|
|
"learning_rate": 3.824372594500256e-06,
|
|
"loss": 0.7153822183609009,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 2.1816817730194455,
|
|
"grad_norm": 0.7436947226524353,
|
|
"learning_rate": 3.813132073037309e-06,
|
|
"loss": 0.6690018773078918,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 2.1829794999695844,
|
|
"grad_norm": 0.7441128492355347,
|
|
"learning_rate": 3.8019042021533513e-06,
|
|
"loss": 0.6398620009422302,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 2.1842772269197233,
|
|
"grad_norm": 0.7101579308509827,
|
|
"learning_rate": 3.7906890048065358e-06,
|
|
"loss": 0.6713053584098816,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 2.185574953869862,
|
|
"grad_norm": 0.7423803210258484,
|
|
"learning_rate": 3.779486503929106e-06,
|
|
"loss": 0.6554515957832336,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 2.1868726808200014,
|
|
"grad_norm": 0.7913647890090942,
|
|
"learning_rate": 3.7682967224273317e-06,
|
|
"loss": 0.6829732656478882,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 2.1881704077701403,
|
|
"grad_norm": 0.7406657338142395,
|
|
"learning_rate": 3.757119683181493e-06,
|
|
"loss": 0.6207722425460815,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 2.189468134720279,
|
|
"grad_norm": 0.755535900592804,
|
|
"learning_rate": 3.7459554090458018e-06,
|
|
"loss": 0.5663500428199768,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 2.190765861670418,
|
|
"grad_norm": 0.736067533493042,
|
|
"learning_rate": 3.7348039228483758e-06,
|
|
"loss": 0.6010056734085083,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 2.192063588620557,
|
|
"grad_norm": 0.7262256741523743,
|
|
"learning_rate": 3.7236652473911817e-06,
|
|
"loss": 0.6251591444015503,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 2.1933613155706957,
|
|
"grad_norm": 0.7204144597053528,
|
|
"learning_rate": 3.7125394054499843e-06,
|
|
"loss": 0.6580095887184143,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 2.1946590425208345,
|
|
"grad_norm": 0.7472013235092163,
|
|
"learning_rate": 3.7014264197743267e-06,
|
|
"loss": 0.6532347798347473,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 2.1959567694709734,
|
|
"grad_norm": 0.7987051010131836,
|
|
"learning_rate": 3.6903263130874423e-06,
|
|
"loss": 0.7221670746803284,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 2.1972544964211123,
|
|
"grad_norm": 0.6925249695777893,
|
|
"learning_rate": 3.679239108086241e-06,
|
|
"loss": 0.6809045672416687,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 2.198552223371251,
|
|
"grad_norm": 0.7587743997573853,
|
|
"learning_rate": 3.668164827441254e-06,
|
|
"loss": 0.6878798007965088,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 2.19984995032139,
|
|
"grad_norm": 0.7842516899108887,
|
|
"learning_rate": 3.657103493796581e-06,
|
|
"loss": 0.6502532958984375,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 2.2011476772715293,
|
|
"grad_norm": 0.7169952392578125,
|
|
"learning_rate": 3.6460551297698486e-06,
|
|
"loss": 0.6481271386146545,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 2.202445404221668,
|
|
"grad_norm": 0.7124336957931519,
|
|
"learning_rate": 3.6350197579521696e-06,
|
|
"loss": 0.6550193428993225,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 2.203743131171807,
|
|
"grad_norm": 0.7990091443061829,
|
|
"learning_rate": 3.6239974009080746e-06,
|
|
"loss": 0.6425266265869141,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 2.205040858121946,
|
|
"grad_norm": 0.7323048114776611,
|
|
"learning_rate": 3.6129880811755093e-06,
|
|
"loss": 0.6682150959968567,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 2.2063385850720847,
|
|
"grad_norm": 0.7515720129013062,
|
|
"learning_rate": 3.601991821265731e-06,
|
|
"loss": 0.6324195265769958,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 2.2076363120222235,
|
|
"grad_norm": 0.7524798512458801,
|
|
"learning_rate": 3.591008643663323e-06,
|
|
"loss": 0.6398360729217529,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 2.2089340389723624,
|
|
"grad_norm": 0.7255743741989136,
|
|
"learning_rate": 3.580038570826093e-06,
|
|
"loss": 0.6324408650398254,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 2.2102317659225013,
|
|
"grad_norm": 0.7248579263687134,
|
|
"learning_rate": 3.5690816251850657e-06,
|
|
"loss": 0.6215530037879944,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 2.21152949287264,
|
|
"grad_norm": 0.7395302057266235,
|
|
"learning_rate": 3.5581378291444223e-06,
|
|
"loss": 0.6551209092140198,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 2.212827219822779,
|
|
"grad_norm": 0.7041357755661011,
|
|
"learning_rate": 3.5472072050814565e-06,
|
|
"loss": 0.5609908103942871,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 2.2141249467729183,
|
|
"grad_norm": 0.7290234565734863,
|
|
"learning_rate": 3.5362897753465265e-06,
|
|
"loss": 0.6203784346580505,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 2.215422673723057,
|
|
"grad_norm": 0.7435030341148376,
|
|
"learning_rate": 3.5253855622630174e-06,
|
|
"loss": 0.6926784515380859,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 2.216720400673196,
|
|
"grad_norm": 0.8078302145004272,
|
|
"learning_rate": 3.514494588127275e-06,
|
|
"loss": 0.7228481769561768,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 2.218018127623335,
|
|
"grad_norm": 0.7225632667541504,
|
|
"learning_rate": 3.5036168752085977e-06,
|
|
"loss": 0.6265015006065369,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 2.2193158545734737,
|
|
"grad_norm": 0.7306722402572632,
|
|
"learning_rate": 3.4927524457491456e-06,
|
|
"loss": 0.6289119720458984,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 2.2206135815236125,
|
|
"grad_norm": 0.7898452281951904,
|
|
"learning_rate": 3.4819013219639295e-06,
|
|
"loss": 0.597404420375824,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 2.2219113084737514,
|
|
"grad_norm": 0.6890703439712524,
|
|
"learning_rate": 3.471063526040752e-06,
|
|
"loss": 0.6129499673843384,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 2.2232090354238903,
|
|
"grad_norm": 0.710536777973175,
|
|
"learning_rate": 3.460239080140163e-06,
|
|
"loss": 0.5661106109619141,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 2.224506762374029,
|
|
"grad_norm": 0.7644726634025574,
|
|
"learning_rate": 3.4494280063954146e-06,
|
|
"loss": 0.6964048147201538,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 2.225804489324168,
|
|
"grad_norm": 0.7347561120986938,
|
|
"learning_rate": 3.4386303269124142e-06,
|
|
"loss": 0.6240056157112122,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 2.2271022162743073,
|
|
"grad_norm": 0.7397733330726624,
|
|
"learning_rate": 3.4278460637696865e-06,
|
|
"loss": 0.6740396022796631,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 2.228399943224446,
|
|
"grad_norm": 0.7311684489250183,
|
|
"learning_rate": 3.4170752390183183e-06,
|
|
"loss": 0.666801929473877,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 2.229697670174585,
|
|
"grad_norm": 0.7383760213851929,
|
|
"learning_rate": 3.4063178746819193e-06,
|
|
"loss": 0.6334900259971619,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 2.230995397124724,
|
|
"grad_norm": 0.7332467436790466,
|
|
"learning_rate": 3.395573992756579e-06,
|
|
"loss": 0.6466909646987915,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 2.2322931240748627,
|
|
"grad_norm": 0.7475365996360779,
|
|
"learning_rate": 3.384843615210819e-06,
|
|
"loss": 0.6753822565078735,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 2.2335908510250015,
|
|
"grad_norm": 0.7616447806358337,
|
|
"learning_rate": 3.3741267639855345e-06,
|
|
"loss": 0.7791091203689575,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 2.2348885779751404,
|
|
"grad_norm": 0.7229276299476624,
|
|
"learning_rate": 3.3634234609939888e-06,
|
|
"loss": 0.6403383016586304,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 2.2361863049252793,
|
|
"grad_norm": 0.7077613472938538,
|
|
"learning_rate": 3.352733728121712e-06,
|
|
"loss": 0.6446459889411926,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 2.237484031875418,
|
|
"grad_norm": 0.6968312859535217,
|
|
"learning_rate": 3.3420575872265184e-06,
|
|
"loss": 0.5743072032928467,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 2.238781758825557,
|
|
"grad_norm": 0.7185531854629517,
|
|
"learning_rate": 3.3313950601384016e-06,
|
|
"loss": 0.6074244379997253,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 2.240079485775696,
|
|
"grad_norm": 0.7392717599868774,
|
|
"learning_rate": 3.320746168659534e-06,
|
|
"loss": 0.7010684609413147,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 2.241377212725835,
|
|
"grad_norm": 0.7549191117286682,
|
|
"learning_rate": 3.3101109345642056e-06,
|
|
"loss": 0.6260566115379333,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 2.242674939675974,
|
|
"grad_norm": 0.7569594383239746,
|
|
"learning_rate": 3.299489379598777e-06,
|
|
"loss": 0.6684094667434692,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 2.243972666626113,
|
|
"grad_norm": 0.7654653787612915,
|
|
"learning_rate": 3.288881525481639e-06,
|
|
"loss": 0.6516446471214294,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 2.2452703935762517,
|
|
"grad_norm": 0.7150068879127502,
|
|
"learning_rate": 3.278287393903172e-06,
|
|
"loss": 0.6244807839393616,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 2.2465681205263905,
|
|
"grad_norm": 0.7367082238197327,
|
|
"learning_rate": 3.2677070065256855e-06,
|
|
"loss": 0.6541182398796082,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 2.2478658474765294,
|
|
"grad_norm": 0.7309427857398987,
|
|
"learning_rate": 3.257140384983405e-06,
|
|
"loss": 0.6608707308769226,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 2.2491635744266683,
|
|
"grad_norm": 0.7438578009605408,
|
|
"learning_rate": 3.2465875508823876e-06,
|
|
"loss": 0.6337431073188782,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 2.250461301376807,
|
|
"grad_norm": 0.7018159627914429,
|
|
"learning_rate": 3.2360485258005115e-06,
|
|
"loss": 0.614033043384552,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 2.251759028326946,
|
|
"grad_norm": 0.7361255884170532,
|
|
"learning_rate": 3.2255233312874155e-06,
|
|
"loss": 0.6730838418006897,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 2.253056755277085,
|
|
"grad_norm": 0.7623570561408997,
|
|
"learning_rate": 3.2150119888644594e-06,
|
|
"loss": 0.659545361995697,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 2.2543544822272237,
|
|
"grad_norm": 0.6926621198654175,
|
|
"learning_rate": 3.2045145200246763e-06,
|
|
"loss": 0.5896809697151184,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 2.255652209177363,
|
|
"grad_norm": 0.7644792795181274,
|
|
"learning_rate": 3.1940309462327334e-06,
|
|
"loss": 0.688497006893158,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 2.256949936127502,
|
|
"grad_norm": 0.7479227185249329,
|
|
"learning_rate": 3.1835612889248868e-06,
|
|
"loss": 0.6612273454666138,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 2.2582476630776407,
|
|
"grad_norm": 0.7315995693206787,
|
|
"learning_rate": 3.1731055695089384e-06,
|
|
"loss": 0.5924808382987976,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 2.2595453900277795,
|
|
"grad_norm": 0.7356354594230652,
|
|
"learning_rate": 3.162663809364178e-06,
|
|
"loss": 0.6635130047798157,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 2.2608431169779184,
|
|
"grad_norm": 0.7253445982933044,
|
|
"learning_rate": 3.152236029841376e-06,
|
|
"loss": 0.6303724646568298,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 2.2621408439280573,
|
|
"grad_norm": 0.7351011037826538,
|
|
"learning_rate": 3.1418222522626907e-06,
|
|
"loss": 0.720777153968811,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 2.263438570878196,
|
|
"grad_norm": 0.7059449553489685,
|
|
"learning_rate": 3.1314224979216633e-06,
|
|
"loss": 0.598090648651123,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 2.264736297828335,
|
|
"grad_norm": 0.7039961218833923,
|
|
"learning_rate": 3.1210367880831684e-06,
|
|
"loss": 0.5808880925178528,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 2.266034024778474,
|
|
"grad_norm": 0.7747211456298828,
|
|
"learning_rate": 3.1106651439833434e-06,
|
|
"loss": 0.6428390741348267,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 2.2673317517286127,
|
|
"grad_norm": 0.7529793381690979,
|
|
"learning_rate": 3.1003075868295794e-06,
|
|
"loss": 0.6959705352783203,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 2.2686294786787515,
|
|
"grad_norm": 0.7145947813987732,
|
|
"learning_rate": 3.0899641378004596e-06,
|
|
"loss": 0.6403526663780212,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 2.269927205628891,
|
|
"grad_norm": 0.7092662453651428,
|
|
"learning_rate": 3.079634818045719e-06,
|
|
"loss": 0.5681431889533997,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 2.2712249325790297,
|
|
"grad_norm": 0.7515605688095093,
|
|
"learning_rate": 3.069319648686202e-06,
|
|
"loss": 0.633612334728241,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 2.2725226595291685,
|
|
"grad_norm": 0.7028906941413879,
|
|
"learning_rate": 3.0590186508138186e-06,
|
|
"loss": 0.6241360902786255,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 2.2738203864793074,
|
|
"grad_norm": 0.7183363437652588,
|
|
"learning_rate": 3.048731845491504e-06,
|
|
"loss": 0.5909807085990906,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 2.2751181134294463,
|
|
"grad_norm": 0.7331669926643372,
|
|
"learning_rate": 3.038459253753172e-06,
|
|
"loss": 0.6321236491203308,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 2.276415840379585,
|
|
"grad_norm": 0.6997974514961243,
|
|
"learning_rate": 3.0282008966036647e-06,
|
|
"loss": 0.6245713829994202,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 2.277713567329724,
|
|
"grad_norm": 0.7051255702972412,
|
|
"learning_rate": 3.0179567950187396e-06,
|
|
"loss": 0.6196664571762085,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 2.279011294279863,
|
|
"grad_norm": 0.7281318306922913,
|
|
"learning_rate": 3.0077269699449795e-06,
|
|
"loss": 0.6078094840049744,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 2.2803090212300017,
|
|
"grad_norm": 0.7404606938362122,
|
|
"learning_rate": 2.9975114422997932e-06,
|
|
"loss": 0.6296783685684204,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 2.2816067481801405,
|
|
"grad_norm": 0.7832150459289551,
|
|
"learning_rate": 2.9873102329713478e-06,
|
|
"loss": 0.6518726348876953,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 2.2829044751302794,
|
|
"grad_norm": 0.715710461139679,
|
|
"learning_rate": 2.9771233628185346e-06,
|
|
"loss": 0.5865130424499512,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 2.2842022020804187,
|
|
"grad_norm": 0.7315993309020996,
|
|
"learning_rate": 2.9669508526709256e-06,
|
|
"loss": 0.7027003765106201,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 2.2854999290305575,
|
|
"grad_norm": 0.7398679852485657,
|
|
"learning_rate": 2.9567927233287307e-06,
|
|
"loss": 0.6710663437843323,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 2.2867976559806964,
|
|
"grad_norm": 0.7295849323272705,
|
|
"learning_rate": 2.9466489955627452e-06,
|
|
"loss": 0.7136781811714172,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 2.2880953829308353,
|
|
"grad_norm": 0.7286946773529053,
|
|
"learning_rate": 2.936519690114338e-06,
|
|
"loss": 0.6223260760307312,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 2.289393109880974,
|
|
"grad_norm": 0.7104554772377014,
|
|
"learning_rate": 2.9264048276953606e-06,
|
|
"loss": 0.6340541839599609,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 2.290690836831113,
|
|
"grad_norm": 0.7115781903266907,
|
|
"learning_rate": 2.9163044289881604e-06,
|
|
"loss": 0.6645469069480896,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 2.291988563781252,
|
|
"grad_norm": 0.733094334602356,
|
|
"learning_rate": 2.906218514645487e-06,
|
|
"loss": 0.6235517859458923,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 2.2932862907313907,
|
|
"grad_norm": 0.7436304688453674,
|
|
"learning_rate": 2.8961471052904855e-06,
|
|
"loss": 0.66838139295578,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 2.2945840176815295,
|
|
"grad_norm": 0.7022131681442261,
|
|
"learning_rate": 2.8860902215166374e-06,
|
|
"loss": 0.6098725199699402,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 2.295881744631669,
|
|
"grad_norm": 0.725817859172821,
|
|
"learning_rate": 2.876047883887727e-06,
|
|
"loss": 0.7111449837684631,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 2.2971794715818072,
|
|
"grad_norm": 0.7336429357528687,
|
|
"learning_rate": 2.866020112937792e-06,
|
|
"loss": 0.6535848379135132,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 2.2984771985319465,
|
|
"grad_norm": 0.743033230304718,
|
|
"learning_rate": 2.8560069291710857e-06,
|
|
"loss": 0.6946330070495605,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 2.2997749254820854,
|
|
"grad_norm": 0.7527621388435364,
|
|
"learning_rate": 2.8460083530620342e-06,
|
|
"loss": 0.67728191614151,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 2.3010726524322243,
|
|
"grad_norm": 0.7036607265472412,
|
|
"learning_rate": 2.8360244050551943e-06,
|
|
"loss": 0.5508571267127991,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 2.302370379382363,
|
|
"grad_norm": 0.698133647441864,
|
|
"learning_rate": 2.8260551055652154e-06,
|
|
"loss": 0.680967390537262,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 2.303668106332502,
|
|
"grad_norm": 0.7584355473518372,
|
|
"learning_rate": 2.8161004749767893e-06,
|
|
"loss": 0.6776391863822937,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 2.304965833282641,
|
|
"grad_norm": 0.7389799356460571,
|
|
"learning_rate": 2.8061605336446194e-06,
|
|
"loss": 0.6526666879653931,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 2.3062635602327797,
|
|
"grad_norm": 0.7454041242599487,
|
|
"learning_rate": 2.796235301893362e-06,
|
|
"loss": 0.6357724666595459,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 2.3075612871829185,
|
|
"grad_norm": 0.745415210723877,
|
|
"learning_rate": 2.7863248000176146e-06,
|
|
"loss": 0.6145803928375244,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 2.3088590141330574,
|
|
"grad_norm": 0.7515760660171509,
|
|
"learning_rate": 2.776429048281837e-06,
|
|
"loss": 0.6784413456916809,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 2.3101567410831967,
|
|
"grad_norm": 0.7618042230606079,
|
|
"learning_rate": 2.7665480669203383e-06,
|
|
"loss": 0.6697713136672974,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 2.3114544680333355,
|
|
"grad_norm": 0.69931560754776,
|
|
"learning_rate": 2.756681876137227e-06,
|
|
"loss": 0.5977004766464233,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 2.3127521949834744,
|
|
"grad_norm": 0.7272830605506897,
|
|
"learning_rate": 2.7468304961063642e-06,
|
|
"loss": 0.6867664456367493,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 2.3140499219336133,
|
|
"grad_norm": 0.7531746029853821,
|
|
"learning_rate": 2.736993946971329e-06,
|
|
"loss": 0.6313377022743225,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 2.315347648883752,
|
|
"grad_norm": 0.7396632432937622,
|
|
"learning_rate": 2.727172248845378e-06,
|
|
"loss": 0.6548261642456055,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 2.316645375833891,
|
|
"grad_norm": 0.7558153867721558,
|
|
"learning_rate": 2.717365421811389e-06,
|
|
"loss": 0.6362917423248291,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 2.31794310278403,
|
|
"grad_norm": 0.7348777055740356,
|
|
"learning_rate": 2.7075734859218526e-06,
|
|
"loss": 0.617246150970459,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 2.3192408297341687,
|
|
"grad_norm": 0.7107247710227966,
|
|
"learning_rate": 2.6977964611987885e-06,
|
|
"loss": 0.6115847229957581,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 2.3205385566843075,
|
|
"grad_norm": 0.7372192740440369,
|
|
"learning_rate": 2.6880343676337485e-06,
|
|
"loss": 0.653107762336731,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 2.3218362836344464,
|
|
"grad_norm": 0.7087644338607788,
|
|
"learning_rate": 2.6782872251877347e-06,
|
|
"loss": 0.6624957919120789,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 2.3231340105845852,
|
|
"grad_norm": 0.7231054902076721,
|
|
"learning_rate": 2.6685550537911886e-06,
|
|
"loss": 0.6585568189620972,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 2.3244317375347245,
|
|
"grad_norm": 0.7619837522506714,
|
|
"learning_rate": 2.658837873343938e-06,
|
|
"loss": 0.6406753063201904,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 2.3257294644848634,
|
|
"grad_norm": 0.7381089329719543,
|
|
"learning_rate": 2.6491357037151565e-06,
|
|
"loss": 0.6516512036323547,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 2.3270271914350023,
|
|
"grad_norm": 0.7420887351036072,
|
|
"learning_rate": 2.639448564743328e-06,
|
|
"loss": 0.6555370688438416,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 2.328324918385141,
|
|
"grad_norm": 0.7358477115631104,
|
|
"learning_rate": 2.6297764762362e-06,
|
|
"loss": 0.6229339838027954,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 2.32962264533528,
|
|
"grad_norm": 0.7449919581413269,
|
|
"learning_rate": 2.6201194579707377e-06,
|
|
"loss": 0.6487348675727844,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 2.330920372285419,
|
|
"grad_norm": 0.755095362663269,
|
|
"learning_rate": 2.6104775296931118e-06,
|
|
"loss": 0.709601640701294,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 2.3322180992355577,
|
|
"grad_norm": 0.7726845145225525,
|
|
"learning_rate": 2.6008507111186142e-06,
|
|
"loss": 0.6235072016716003,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 2.3335158261856965,
|
|
"grad_norm": 0.7045385241508484,
|
|
"learning_rate": 2.5912390219316573e-06,
|
|
"loss": 0.5908339619636536,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 2.3348135531358354,
|
|
"grad_norm": 0.7490655779838562,
|
|
"learning_rate": 2.5816424817857122e-06,
|
|
"loss": 0.7369755506515503,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 2.3361112800859742,
|
|
"grad_norm": 0.7135450839996338,
|
|
"learning_rate": 2.572061110303271e-06,
|
|
"loss": 0.6987670063972473,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 2.337409007036113,
|
|
"grad_norm": 0.7187747359275818,
|
|
"learning_rate": 2.562494927075824e-06,
|
|
"loss": 0.5778123140335083,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 2.3387067339862524,
|
|
"grad_norm": 0.7786324620246887,
|
|
"learning_rate": 2.552943951663782e-06,
|
|
"loss": 0.6605340838432312,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 2.3400044609363913,
|
|
"grad_norm": 0.785906195640564,
|
|
"learning_rate": 2.543408203596479e-06,
|
|
"loss": 0.6925969123840332,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 2.34130218788653,
|
|
"grad_norm": 0.7589930891990662,
|
|
"learning_rate": 2.5338877023721055e-06,
|
|
"loss": 0.6296513676643372,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 2.342599914836669,
|
|
"grad_norm": 0.6791945695877075,
|
|
"learning_rate": 2.5243824674576743e-06,
|
|
"loss": 0.6128097176551819,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 2.343897641786808,
|
|
"grad_norm": 0.737198531627655,
|
|
"learning_rate": 2.514892518288988e-06,
|
|
"loss": 0.60391765832901,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 2.3451953687369467,
|
|
"grad_norm": 0.7078155279159546,
|
|
"learning_rate": 2.5054178742705936e-06,
|
|
"loss": 0.6364641189575195,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 2.3464930956870855,
|
|
"grad_norm": 0.7275543808937073,
|
|
"learning_rate": 2.4959585547757294e-06,
|
|
"loss": 0.6722849011421204,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 2.3477908226372244,
|
|
"grad_norm": 0.8179038166999817,
|
|
"learning_rate": 2.486514579146322e-06,
|
|
"loss": 0.6581687927246094,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 2.3490885495873632,
|
|
"grad_norm": 0.766876757144928,
|
|
"learning_rate": 2.4770859666929027e-06,
|
|
"loss": 0.6003885865211487,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 2.350386276537502,
|
|
"grad_norm": 0.7353731989860535,
|
|
"learning_rate": 2.4676727366945995e-06,
|
|
"loss": 0.6582502722740173,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 2.351684003487641,
|
|
"grad_norm": 0.7552323341369629,
|
|
"learning_rate": 2.4582749083990875e-06,
|
|
"loss": 0.6586010456085205,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 2.3529817304377803,
|
|
"grad_norm": 0.7750751376152039,
|
|
"learning_rate": 2.448892501022544e-06,
|
|
"loss": 0.6576810479164124,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 2.354279457387919,
|
|
"grad_norm": 0.755615770816803,
|
|
"learning_rate": 2.4395255337496202e-06,
|
|
"loss": 0.6574745178222656,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 2.355577184338058,
|
|
"grad_norm": 0.7417405843734741,
|
|
"learning_rate": 2.4301740257333918e-06,
|
|
"loss": 0.6290728449821472,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 2.356874911288197,
|
|
"grad_norm": 0.7301021814346313,
|
|
"learning_rate": 2.4208379960953255e-06,
|
|
"loss": 0.6600069403648376,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 2.3581726382383357,
|
|
"grad_norm": 0.7170204520225525,
|
|
"learning_rate": 2.4115174639252425e-06,
|
|
"loss": 0.5834653973579407,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 2.3594703651884745,
|
|
"grad_norm": 0.7591288089752197,
|
|
"learning_rate": 2.4022124482812627e-06,
|
|
"loss": 0.6460838913917542,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 2.3607680921386134,
|
|
"grad_norm": 0.7465713024139404,
|
|
"learning_rate": 2.3929229681898005e-06,
|
|
"loss": 0.670021116733551,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 2.3620658190887522,
|
|
"grad_norm": 0.7204452753067017,
|
|
"learning_rate": 2.3836490426454816e-06,
|
|
"loss": 0.6367021799087524,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 2.363363546038891,
|
|
"grad_norm": 0.7174842357635498,
|
|
"learning_rate": 2.3743906906111415e-06,
|
|
"loss": 0.6825685501098633,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 2.3646612729890304,
|
|
"grad_norm": 0.6899293065071106,
|
|
"learning_rate": 2.365147931017764e-06,
|
|
"loss": 0.642341673374176,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 2.365958999939169,
|
|
"grad_norm": 0.7295400500297546,
|
|
"learning_rate": 2.355920782764455e-06,
|
|
"loss": 0.6189469695091248,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 2.367256726889308,
|
|
"grad_norm": 0.7334946393966675,
|
|
"learning_rate": 2.3467092647183962e-06,
|
|
"loss": 0.642494261264801,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 2.368554453839447,
|
|
"grad_norm": 0.727120041847229,
|
|
"learning_rate": 2.337513395714812e-06,
|
|
"loss": 0.6564252972602844,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 2.369852180789586,
|
|
"grad_norm": 0.7781887650489807,
|
|
"learning_rate": 2.3283331945569256e-06,
|
|
"loss": 0.7230110764503479,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 2.3711499077397247,
|
|
"grad_norm": 0.7318363189697266,
|
|
"learning_rate": 2.3191686800159272e-06,
|
|
"loss": 0.6312495470046997,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 2.3724476346898635,
|
|
"grad_norm": 0.7348397374153137,
|
|
"learning_rate": 2.310019870830923e-06,
|
|
"loss": 0.6707776784896851,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 2.3737453616400024,
|
|
"grad_norm": 0.7550859451293945,
|
|
"learning_rate": 2.300886785708919e-06,
|
|
"loss": 0.6729933023452759,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 2.3750430885901412,
|
|
"grad_norm": 0.724520206451416,
|
|
"learning_rate": 2.2917694433247626e-06,
|
|
"loss": 0.6436410546302795,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 2.37634081554028,
|
|
"grad_norm": 0.7761313319206238,
|
|
"learning_rate": 2.282667862321104e-06,
|
|
"loss": 0.6961484551429749,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 2.377638542490419,
|
|
"grad_norm": 0.7718027234077454,
|
|
"learning_rate": 2.2735820613083837e-06,
|
|
"loss": 0.731279194355011,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 2.3789362694405582,
|
|
"grad_norm": 0.7511587738990784,
|
|
"learning_rate": 2.264512058864755e-06,
|
|
"loss": 0.6527747511863708,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 2.380233996390697,
|
|
"grad_norm": 0.7314983010292053,
|
|
"learning_rate": 2.2554578735360823e-06,
|
|
"loss": 0.6660367846488953,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 2.381531723340836,
|
|
"grad_norm": 0.7481415867805481,
|
|
"learning_rate": 2.246419523835882e-06,
|
|
"loss": 0.6034996509552002,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 2.382829450290975,
|
|
"grad_norm": 0.7201923131942749,
|
|
"learning_rate": 2.2373970282452916e-06,
|
|
"loss": 0.618115246295929,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 2.3841271772411137,
|
|
"grad_norm": 0.7333959341049194,
|
|
"learning_rate": 2.2283904052130313e-06,
|
|
"loss": 0.679516077041626,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 2.3854249041912525,
|
|
"grad_norm": 0.7144783735275269,
|
|
"learning_rate": 2.2193996731553656e-06,
|
|
"loss": 0.6412646174430847,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 2.3867226311413914,
|
|
"grad_norm": 0.7374799251556396,
|
|
"learning_rate": 2.2104248504560643e-06,
|
|
"loss": 0.6004337072372437,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 2.3867226311413914,
|
|
"eval_loss": 0.7504242062568665,
|
|
"eval_runtime": 140.6905,
|
|
"eval_samples_per_second": 36.904,
|
|
"eval_steps_per_second": 9.226,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 2.3880203580915302,
|
|
"grad_norm": 0.7108725905418396,
|
|
"learning_rate": 2.2014659554663732e-06,
|
|
"loss": 0.6515002250671387,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 2.389318085041669,
|
|
"grad_norm": 0.744311511516571,
|
|
"learning_rate": 2.192523006504956e-06,
|
|
"loss": 0.5911805033683777,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 2.390615811991808,
|
|
"grad_norm": 0.7513126730918884,
|
|
"learning_rate": 2.183596021857891e-06,
|
|
"loss": 0.5855857133865356,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 2.391913538941947,
|
|
"grad_norm": 0.7308302521705627,
|
|
"learning_rate": 2.1746850197785928e-06,
|
|
"loss": 0.6079833507537842,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 2.393211265892086,
|
|
"grad_norm": 0.7567104697227478,
|
|
"learning_rate": 2.16579001848781e-06,
|
|
"loss": 0.6419387459754944,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 2.394508992842225,
|
|
"grad_norm": 0.7667451500892639,
|
|
"learning_rate": 2.156911036173568e-06,
|
|
"loss": 0.6022201776504517,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 2.395806719792364,
|
|
"grad_norm": 0.700553297996521,
|
|
"learning_rate": 2.1480480909911384e-06,
|
|
"loss": 0.6151991486549377,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 2.3971044467425027,
|
|
"grad_norm": 0.7488269209861755,
|
|
"learning_rate": 2.139201201062999e-06,
|
|
"loss": 0.6688805222511292,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 2.3984021736926415,
|
|
"grad_norm": 0.7348271608352661,
|
|
"learning_rate": 2.130370384478807e-06,
|
|
"loss": 0.6284016370773315,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 2.3996999006427804,
|
|
"grad_norm": 0.7548435926437378,
|
|
"learning_rate": 2.1215556592953357e-06,
|
|
"loss": 0.6753513216972351,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 2.4009976275929192,
|
|
"grad_norm": 0.7015430927276611,
|
|
"learning_rate": 2.11275704353648e-06,
|
|
"loss": 0.5835912823677063,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 2.402295354543058,
|
|
"grad_norm": 0.732021689414978,
|
|
"learning_rate": 2.10397455519317e-06,
|
|
"loss": 0.645444929599762,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 2.403593081493197,
|
|
"grad_norm": 0.7345272302627563,
|
|
"learning_rate": 2.095208212223383e-06,
|
|
"loss": 0.666027843952179,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 2.404890808443336,
|
|
"grad_norm": 0.694179356098175,
|
|
"learning_rate": 2.0864580325520623e-06,
|
|
"loss": 0.6171280145645142,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 2.4061885353934747,
|
|
"grad_norm": 0.7522391080856323,
|
|
"learning_rate": 2.077724034071116e-06,
|
|
"loss": 0.6551393270492554,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 2.407486262343614,
|
|
"grad_norm": 0.731461226940155,
|
|
"learning_rate": 2.069006234639357e-06,
|
|
"loss": 0.5965202450752258,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 2.408783989293753,
|
|
"grad_norm": 0.7376645803451538,
|
|
"learning_rate": 2.060304652082481e-06,
|
|
"loss": 0.6684772372245789,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 2.4100817162438917,
|
|
"grad_norm": 0.8123404383659363,
|
|
"learning_rate": 2.051619304193022e-06,
|
|
"loss": 0.726719856262207,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 2.4113794431940305,
|
|
"grad_norm": 0.723229169845581,
|
|
"learning_rate": 2.0429502087303164e-06,
|
|
"loss": 0.6310455799102783,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 2.4126771701441694,
|
|
"grad_norm": 0.7440442442893982,
|
|
"learning_rate": 2.0342973834204715e-06,
|
|
"loss": 0.6147751808166504,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 2.4139748970943082,
|
|
"grad_norm": 0.7190000414848328,
|
|
"learning_rate": 2.0256608459563244e-06,
|
|
"loss": 0.6343541741371155,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 2.415272624044447,
|
|
"grad_norm": 0.7396417260169983,
|
|
"learning_rate": 2.017040613997412e-06,
|
|
"loss": 0.6213467121124268,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 2.416570350994586,
|
|
"grad_norm": 0.7148772478103638,
|
|
"learning_rate": 2.008436705169917e-06,
|
|
"loss": 0.5708230137825012,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 2.417868077944725,
|
|
"grad_norm": 0.7284368872642517,
|
|
"learning_rate": 1.9998491370666684e-06,
|
|
"loss": 0.5845701098442078,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 2.4191658048948637,
|
|
"grad_norm": 0.7286568284034729,
|
|
"learning_rate": 1.991277927247056e-06,
|
|
"loss": 0.636822521686554,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 2.4204635318450025,
|
|
"grad_norm": 0.741385817527771,
|
|
"learning_rate": 1.9827230932370467e-06,
|
|
"loss": 0.6635302305221558,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 2.421761258795142,
|
|
"grad_norm": 0.7097977995872498,
|
|
"learning_rate": 1.9741846525291033e-06,
|
|
"loss": 0.5913397669792175,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 2.4230589857452807,
|
|
"grad_norm": 0.748805582523346,
|
|
"learning_rate": 1.9656626225821774e-06,
|
|
"loss": 0.6394146680831909,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 2.4243567126954195,
|
|
"grad_norm": 0.7540968656539917,
|
|
"learning_rate": 1.957157020821664e-06,
|
|
"loss": 0.6580138802528381,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 2.4256544396455584,
|
|
"grad_norm": 0.7199598550796509,
|
|
"learning_rate": 1.9486678646393654e-06,
|
|
"loss": 0.6445693969726562,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 2.4269521665956972,
|
|
"grad_norm": 0.722776472568512,
|
|
"learning_rate": 1.9401951713934574e-06,
|
|
"loss": 0.6294406056404114,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 2.428249893545836,
|
|
"grad_norm": 0.776488184928894,
|
|
"learning_rate": 1.931738958408457e-06,
|
|
"loss": 0.6513455510139465,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 2.429547620495975,
|
|
"grad_norm": 0.751055121421814,
|
|
"learning_rate": 1.9232992429751694e-06,
|
|
"loss": 0.6255248785018921,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 2.430845347446114,
|
|
"grad_norm": 0.7133703827857971,
|
|
"learning_rate": 1.9148760423506884e-06,
|
|
"loss": 0.5895485281944275,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 2.4321430743962527,
|
|
"grad_norm": 0.7120479941368103,
|
|
"learning_rate": 1.9064693737583173e-06,
|
|
"loss": 0.6799072027206421,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 2.433440801346392,
|
|
"grad_norm": 0.7090493440628052,
|
|
"learning_rate": 1.8980792543875758e-06,
|
|
"loss": 0.6845042705535889,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 2.4347385282965304,
|
|
"grad_norm": 0.7474452257156372,
|
|
"learning_rate": 1.8897057013941256e-06,
|
|
"loss": 0.6170677542686462,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 2.4360362552466697,
|
|
"grad_norm": 0.7024904489517212,
|
|
"learning_rate": 1.8813487318997658e-06,
|
|
"loss": 0.6431372165679932,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 2.4373339821968085,
|
|
"grad_norm": 0.7497063875198364,
|
|
"learning_rate": 1.8730083629923857e-06,
|
|
"loss": 0.6090019345283508,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 2.4386317091469474,
|
|
"grad_norm": 0.7273635268211365,
|
|
"learning_rate": 1.8646846117259277e-06,
|
|
"loss": 0.6302788257598877,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 2.4399294360970862,
|
|
"grad_norm": 0.745716392993927,
|
|
"learning_rate": 1.856377495120355e-06,
|
|
"loss": 0.6740216612815857,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 2.441227163047225,
|
|
"grad_norm": 0.6912100911140442,
|
|
"learning_rate": 1.8480870301616227e-06,
|
|
"loss": 0.6371436715126038,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 2.442524889997364,
|
|
"grad_norm": 0.73276287317276,
|
|
"learning_rate": 1.839813233801626e-06,
|
|
"loss": 0.6914728283882141,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 2.443822616947503,
|
|
"grad_norm": 0.6954025626182556,
|
|
"learning_rate": 1.8315561229581925e-06,
|
|
"loss": 0.6365620493888855,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 2.4451203438976417,
|
|
"grad_norm": 0.7226231098175049,
|
|
"learning_rate": 1.8233157145150183e-06,
|
|
"loss": 0.6907994151115417,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 2.4464180708477805,
|
|
"grad_norm": 0.7429067492485046,
|
|
"learning_rate": 1.8150920253216542e-06,
|
|
"loss": 0.6867068409919739,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 2.44771579779792,
|
|
"grad_norm": 0.7071108818054199,
|
|
"learning_rate": 1.8068850721934639e-06,
|
|
"loss": 0.6865320205688477,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 2.4490135247480587,
|
|
"grad_norm": 0.7338579893112183,
|
|
"learning_rate": 1.7986948719115872e-06,
|
|
"loss": 0.6243481636047363,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 2.4503112516981975,
|
|
"grad_norm": 0.727736234664917,
|
|
"learning_rate": 1.7905214412229177e-06,
|
|
"loss": 0.6568608283996582,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 2.4516089786483364,
|
|
"grad_norm": 0.7110669612884521,
|
|
"learning_rate": 1.7823647968400437e-06,
|
|
"loss": 0.6400637626647949,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 2.4529067055984752,
|
|
"grad_norm": 0.7366207242012024,
|
|
"learning_rate": 1.7742249554412426e-06,
|
|
"loss": 0.6992728114128113,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 2.454204432548614,
|
|
"grad_norm": 0.7760360836982727,
|
|
"learning_rate": 1.76610193367043e-06,
|
|
"loss": 0.660463809967041,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 2.455502159498753,
|
|
"grad_norm": 0.7349168658256531,
|
|
"learning_rate": 1.757995748137129e-06,
|
|
"loss": 0.6087374091148376,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 2.456799886448892,
|
|
"grad_norm": 0.7244678139686584,
|
|
"learning_rate": 1.7499064154164358e-06,
|
|
"loss": 0.6310493350028992,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 2.4580976133990307,
|
|
"grad_norm": 0.735069215297699,
|
|
"learning_rate": 1.7418339520489936e-06,
|
|
"loss": 0.6924616098403931,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 2.4593953403491695,
|
|
"grad_norm": 0.7370489239692688,
|
|
"learning_rate": 1.7337783745409363e-06,
|
|
"loss": 0.6034020781517029,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 2.4606930672993084,
|
|
"grad_norm": 0.7326070666313171,
|
|
"learning_rate": 1.7257396993638942e-06,
|
|
"loss": 0.6212228536605835,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 2.4619907942494477,
|
|
"grad_norm": 0.6936232447624207,
|
|
"learning_rate": 1.717717942954914e-06,
|
|
"loss": 0.705615758895874,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 2.4632885211995865,
|
|
"grad_norm": 0.7247579097747803,
|
|
"learning_rate": 1.7097131217164598e-06,
|
|
"loss": 0.6505810618400574,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 2.4645862481497254,
|
|
"grad_norm": 0.7129016518592834,
|
|
"learning_rate": 1.7017252520163652e-06,
|
|
"loss": 0.637854814529419,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 2.4658839750998642,
|
|
"grad_norm": 0.7215719819068909,
|
|
"learning_rate": 1.6937543501878018e-06,
|
|
"loss": 0.6486891508102417,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 2.467181702050003,
|
|
"grad_norm": 0.7112030386924744,
|
|
"learning_rate": 1.6858004325292466e-06,
|
|
"loss": 0.6466121673583984,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 2.468479429000142,
|
|
"grad_norm": 0.7482553124427795,
|
|
"learning_rate": 1.6778635153044486e-06,
|
|
"loss": 0.6906379461288452,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 2.469777155950281,
|
|
"grad_norm": 0.7411786317825317,
|
|
"learning_rate": 1.6699436147423942e-06,
|
|
"loss": 0.613003134727478,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 2.4710748829004197,
|
|
"grad_norm": 0.7285057902336121,
|
|
"learning_rate": 1.662040747037277e-06,
|
|
"loss": 0.7423882484436035,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 2.4723726098505585,
|
|
"grad_norm": 0.7251142859458923,
|
|
"learning_rate": 1.654154928348455e-06,
|
|
"loss": 0.6890588402748108,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 2.4736703368006974,
|
|
"grad_norm": 0.7212609052658081,
|
|
"learning_rate": 1.646286174800441e-06,
|
|
"loss": 0.6591873168945312,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 2.4749680637508362,
|
|
"grad_norm": 0.7344200611114502,
|
|
"learning_rate": 1.6384345024828374e-06,
|
|
"loss": 0.6354522705078125,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 2.4762657907009755,
|
|
"grad_norm": 0.7125760316848755,
|
|
"learning_rate": 1.6305999274503282e-06,
|
|
"loss": 0.6043302416801453,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 2.4775635176511144,
|
|
"grad_norm": 0.7003780603408813,
|
|
"learning_rate": 1.6227824657226366e-06,
|
|
"loss": 0.5772091150283813,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 2.4788612446012532,
|
|
"grad_norm": 0.7161146998405457,
|
|
"learning_rate": 1.614982133284495e-06,
|
|
"loss": 0.6129906177520752,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 2.480158971551392,
|
|
"grad_norm": 0.7459210157394409,
|
|
"learning_rate": 1.6071989460856063e-06,
|
|
"loss": 0.6741005182266235,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 2.481456698501531,
|
|
"grad_norm": 0.7306010723114014,
|
|
"learning_rate": 1.5994329200406223e-06,
|
|
"loss": 0.6048024296760559,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 2.48275442545167,
|
|
"grad_norm": 0.7296182513237,
|
|
"learning_rate": 1.5916840710290937e-06,
|
|
"loss": 0.6497235298156738,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 2.4840521524018087,
|
|
"grad_norm": 0.7177472114562988,
|
|
"learning_rate": 1.5839524148954622e-06,
|
|
"loss": 0.5927858352661133,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 2.4853498793519475,
|
|
"grad_norm": 0.7376892566680908,
|
|
"learning_rate": 1.5762379674490048e-06,
|
|
"loss": 0.591650128364563,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 2.4866476063020864,
|
|
"grad_norm": 0.7759072780609131,
|
|
"learning_rate": 1.5685407444638146e-06,
|
|
"loss": 0.686072051525116,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 2.487945333252225,
|
|
"grad_norm": 0.7239146828651428,
|
|
"learning_rate": 1.5608607616787663e-06,
|
|
"loss": 0.6082277297973633,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 2.489243060202364,
|
|
"grad_norm": 0.7537539005279541,
|
|
"learning_rate": 1.553198034797474e-06,
|
|
"loss": 0.7451168298721313,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 2.4905407871525034,
|
|
"grad_norm": 0.7346340417861938,
|
|
"learning_rate": 1.5455525794882841e-06,
|
|
"loss": 0.611229658126831,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 2.4918385141026422,
|
|
"grad_norm": 0.731436550617218,
|
|
"learning_rate": 1.5379244113842106e-06,
|
|
"loss": 0.659216582775116,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 2.493136241052781,
|
|
"grad_norm": 0.7185493111610413,
|
|
"learning_rate": 1.53031354608293e-06,
|
|
"loss": 0.7043588161468506,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 2.49443396800292,
|
|
"grad_norm": 0.7525856494903564,
|
|
"learning_rate": 1.5227199991467335e-06,
|
|
"loss": 0.6584152579307556,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 2.495731694953059,
|
|
"grad_norm": 0.7316333055496216,
|
|
"learning_rate": 1.5151437861025032e-06,
|
|
"loss": 0.5660229921340942,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 2.4970294219031977,
|
|
"grad_norm": 0.7230735421180725,
|
|
"learning_rate": 1.5075849224416783e-06,
|
|
"loss": 0.6512929201126099,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 2.4983271488533365,
|
|
"grad_norm": 0.7257496118545532,
|
|
"learning_rate": 1.5000434236202211e-06,
|
|
"loss": 0.665654718875885,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 2.4996248758034754,
|
|
"grad_norm": 0.7206733226776123,
|
|
"learning_rate": 1.4925193050585873e-06,
|
|
"loss": 0.656543493270874,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 2.500922602753614,
|
|
"grad_norm": 0.7368682026863098,
|
|
"learning_rate": 1.4850125821416983e-06,
|
|
"loss": 0.6262930035591125,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 2.5022203297037535,
|
|
"grad_norm": 0.7327122092247009,
|
|
"learning_rate": 1.4775232702188947e-06,
|
|
"loss": 0.6124476790428162,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 2.503518056653892,
|
|
"grad_norm": 0.7396702170372009,
|
|
"learning_rate": 1.4700513846039332e-06,
|
|
"loss": 0.5858893990516663,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 2.5048157836040312,
|
|
"grad_norm": 0.7264795899391174,
|
|
"learning_rate": 1.4625969405749218e-06,
|
|
"loss": 0.6673074960708618,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 2.50611351055417,
|
|
"grad_norm": 0.7444024085998535,
|
|
"learning_rate": 1.4551599533743155e-06,
|
|
"loss": 0.6632063388824463,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 2.507411237504309,
|
|
"grad_norm": 0.7873533964157104,
|
|
"learning_rate": 1.4477404382088689e-06,
|
|
"loss": 0.6932485103607178,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 2.508708964454448,
|
|
"grad_norm": 0.7218677997589111,
|
|
"learning_rate": 1.4403384102496132e-06,
|
|
"loss": 0.6060501933097839,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 2.5100066914045867,
|
|
"grad_norm": 0.7189037203788757,
|
|
"learning_rate": 1.4329538846318225e-06,
|
|
"loss": 0.6672825217247009,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 2.5113044183547255,
|
|
"grad_norm": 0.7413656115531921,
|
|
"learning_rate": 1.4255868764549852e-06,
|
|
"loss": 0.6226930022239685,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 2.5126021453048644,
|
|
"grad_norm": 0.7134820222854614,
|
|
"learning_rate": 1.4182374007827605e-06,
|
|
"loss": 0.6670020818710327,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 2.513899872255003,
|
|
"grad_norm": 0.7409310340881348,
|
|
"learning_rate": 1.410905472642975e-06,
|
|
"loss": 0.6528188586235046,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 2.515197599205142,
|
|
"grad_norm": 0.7328957319259644,
|
|
"learning_rate": 1.4035911070275576e-06,
|
|
"loss": 0.6440276503562927,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 2.5164953261552814,
|
|
"grad_norm": 0.7795917391777039,
|
|
"learning_rate": 1.3962943188925438e-06,
|
|
"loss": 0.6895844340324402,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 2.51779305310542,
|
|
"grad_norm": 0.7205235958099365,
|
|
"learning_rate": 1.3890151231580117e-06,
|
|
"loss": 0.6578382253646851,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 2.519090780055559,
|
|
"grad_norm": 0.7230272889137268,
|
|
"learning_rate": 1.3817535347080768e-06,
|
|
"loss": 0.6839146614074707,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 2.520388507005698,
|
|
"grad_norm": 0.7740436792373657,
|
|
"learning_rate": 1.3745095683908482e-06,
|
|
"loss": 0.6639747619628906,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 2.521686233955837,
|
|
"grad_norm": 0.7473544478416443,
|
|
"learning_rate": 1.3672832390184042e-06,
|
|
"loss": 0.6539671421051025,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 2.5229839609059757,
|
|
"grad_norm": 0.7322369813919067,
|
|
"learning_rate": 1.3600745613667598e-06,
|
|
"loss": 0.6508328318595886,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 2.5242816878561145,
|
|
"grad_norm": 0.7107250094413757,
|
|
"learning_rate": 1.3528835501758365e-06,
|
|
"loss": 0.6462997198104858,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 2.5255794148062534,
|
|
"grad_norm": 0.7492804527282715,
|
|
"learning_rate": 1.345710220149431e-06,
|
|
"loss": 0.6402596235275269,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 2.526877141756392,
|
|
"grad_norm": 0.7333636283874512,
|
|
"learning_rate": 1.3385545859551886e-06,
|
|
"loss": 0.6897069811820984,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 2.528174868706531,
|
|
"grad_norm": 0.7276363372802734,
|
|
"learning_rate": 1.3314166622245717e-06,
|
|
"loss": 0.6612985134124756,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 2.52947259565667,
|
|
"grad_norm": 0.7273007035255432,
|
|
"learning_rate": 1.324296463552821e-06,
|
|
"loss": 0.6120861172676086,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 2.5307703226068092,
|
|
"grad_norm": 0.7370741963386536,
|
|
"learning_rate": 1.3171940044989495e-06,
|
|
"loss": 0.7364912033081055,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 2.5320680495569476,
|
|
"grad_norm": 0.7171733379364014,
|
|
"learning_rate": 1.3101092995856802e-06,
|
|
"loss": 0.6327986121177673,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 2.533365776507087,
|
|
"grad_norm": 0.7327584028244019,
|
|
"learning_rate": 1.3030423632994493e-06,
|
|
"loss": 0.6383181810379028,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 2.534663503457226,
|
|
"grad_norm": 0.7271527051925659,
|
|
"learning_rate": 1.2959932100903472e-06,
|
|
"loss": 0.6336721777915955,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 2.5359612304073647,
|
|
"grad_norm": 0.7524319887161255,
|
|
"learning_rate": 1.2889618543721094e-06,
|
|
"loss": 0.662846028804779,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 2.5372589573575035,
|
|
"grad_norm": 0.7470775842666626,
|
|
"learning_rate": 1.2819483105220798e-06,
|
|
"loss": 0.6556363105773926,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 2.5385566843076424,
|
|
"grad_norm": 0.7219761610031128,
|
|
"learning_rate": 1.274952592881179e-06,
|
|
"loss": 0.6259469389915466,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 2.539854411257781,
|
|
"grad_norm": 0.7156399488449097,
|
|
"learning_rate": 1.2679747157538801e-06,
|
|
"loss": 0.6495680212974548,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 2.54115213820792,
|
|
"grad_norm": 0.7380321621894836,
|
|
"learning_rate": 1.2610146934081768e-06,
|
|
"loss": 0.6329517960548401,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 2.542449865158059,
|
|
"grad_norm": 0.7332315444946289,
|
|
"learning_rate": 1.2540725400755472e-06,
|
|
"loss": 0.7250087261199951,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 2.543747592108198,
|
|
"grad_norm": 0.6943919658660889,
|
|
"learning_rate": 1.2471482699509463e-06,
|
|
"loss": 0.6895512938499451,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 2.545045319058337,
|
|
"grad_norm": 0.7061095237731934,
|
|
"learning_rate": 1.2402418971927487e-06,
|
|
"loss": 0.6665888428688049,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 2.546343046008476,
|
|
"grad_norm": 0.7387134432792664,
|
|
"learning_rate": 1.2333534359227383e-06,
|
|
"loss": 0.6526239514350891,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 2.547640772958615,
|
|
"grad_norm": 0.7360694408416748,
|
|
"learning_rate": 1.226482900226077e-06,
|
|
"loss": 0.6126471161842346,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 2.5489384999087537,
|
|
"grad_norm": 0.7157735824584961,
|
|
"learning_rate": 1.2196303041512714e-06,
|
|
"loss": 0.6631340384483337,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 2.5502362268588925,
|
|
"grad_norm": 0.7504985332489014,
|
|
"learning_rate": 1.2127956617101445e-06,
|
|
"loss": 0.6746035218238831,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 2.5515339538090314,
|
|
"grad_norm": 0.7058922648429871,
|
|
"learning_rate": 1.2059789868778116e-06,
|
|
"loss": 0.641784131526947,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 2.55283168075917,
|
|
"grad_norm": 0.7049847841262817,
|
|
"learning_rate": 1.1991802935926455e-06,
|
|
"loss": 0.5715856552124023,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 2.554129407709309,
|
|
"grad_norm": 0.7680399417877197,
|
|
"learning_rate": 1.1923995957562585e-06,
|
|
"loss": 0.6144214272499084,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 2.555427134659448,
|
|
"grad_norm": 0.7535842657089233,
|
|
"learning_rate": 1.1856369072334517e-06,
|
|
"loss": 0.6755169630050659,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 2.5567248616095872,
|
|
"grad_norm": 0.7342673540115356,
|
|
"learning_rate": 1.178892241852222e-06,
|
|
"loss": 0.6000391244888306,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 2.5580225885597256,
|
|
"grad_norm": 0.7472249865531921,
|
|
"learning_rate": 1.1721656134036962e-06,
|
|
"loss": 0.6413825750350952,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 2.559320315509865,
|
|
"grad_norm": 0.7509233355522156,
|
|
"learning_rate": 1.165457035642128e-06,
|
|
"loss": 0.662197470664978,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 2.560618042460004,
|
|
"grad_norm": 0.7827663421630859,
|
|
"learning_rate": 1.1587665222848643e-06,
|
|
"loss": 0.6412524580955505,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 2.5619157694101427,
|
|
"grad_norm": 0.7427447438240051,
|
|
"learning_rate": 1.1520940870123065e-06,
|
|
"loss": 0.6249580979347229,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 2.5632134963602815,
|
|
"grad_norm": 0.7329998016357422,
|
|
"learning_rate": 1.1454397434679022e-06,
|
|
"loss": 0.67298424243927,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 2.5645112233104204,
|
|
"grad_norm": 0.7379522919654846,
|
|
"learning_rate": 1.1388035052580936e-06,
|
|
"loss": 0.6553415060043335,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 2.565808950260559,
|
|
"grad_norm": 0.7228721380233765,
|
|
"learning_rate": 1.1321853859523113e-06,
|
|
"loss": 0.6369103193283081,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 2.567106677210698,
|
|
"grad_norm": 0.7016708850860596,
|
|
"learning_rate": 1.1255853990829323e-06,
|
|
"loss": 0.5797883868217468,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 2.568404404160837,
|
|
"grad_norm": 0.7308626174926758,
|
|
"learning_rate": 1.119003558145262e-06,
|
|
"loss": 0.6397665143013,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 2.569702131110976,
|
|
"grad_norm": 0.7535097599029541,
|
|
"learning_rate": 1.1124398765974976e-06,
|
|
"loss": 0.6552141308784485,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 2.570999858061115,
|
|
"grad_norm": 0.7034752368927002,
|
|
"learning_rate": 1.1058943678607082e-06,
|
|
"loss": 0.5966861844062805,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 2.5722975850112535,
|
|
"grad_norm": 0.7308294177055359,
|
|
"learning_rate": 1.0993670453187965e-06,
|
|
"loss": 0.678621768951416,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 2.573595311961393,
|
|
"grad_norm": 0.7100163698196411,
|
|
"learning_rate": 1.0928579223184943e-06,
|
|
"loss": 0.629210889339447,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 2.5748930389115317,
|
|
"grad_norm": 0.715771496295929,
|
|
"learning_rate": 1.0863670121693037e-06,
|
|
"loss": 0.6395845413208008,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 2.5761907658616705,
|
|
"grad_norm": 0.7279219627380371,
|
|
"learning_rate": 1.0798943281434958e-06,
|
|
"loss": 0.6864475607872009,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 2.5774884928118094,
|
|
"grad_norm": 0.7253682613372803,
|
|
"learning_rate": 1.0734398834760695e-06,
|
|
"loss": 0.613013505935669,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 2.578786219761948,
|
|
"grad_norm": 0.7802004814147949,
|
|
"learning_rate": 1.067003691364733e-06,
|
|
"loss": 0.686352014541626,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 2.580083946712087,
|
|
"grad_norm": 0.7534424066543579,
|
|
"learning_rate": 1.060585764969867e-06,
|
|
"loss": 0.7019538283348083,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 2.581381673662226,
|
|
"grad_norm": 0.7177249789237976,
|
|
"learning_rate": 1.0541861174145097e-06,
|
|
"loss": 0.6038709282875061,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 2.582679400612365,
|
|
"grad_norm": 0.7184469103813171,
|
|
"learning_rate": 1.047804761784319e-06,
|
|
"loss": 0.6142391562461853,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 2.5839771275625036,
|
|
"grad_norm": 0.7472144961357117,
|
|
"learning_rate": 1.0414417111275533e-06,
|
|
"loss": 0.6911140084266663,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 2.585274854512643,
|
|
"grad_norm": 0.7293811440467834,
|
|
"learning_rate": 1.0350969784550368e-06,
|
|
"loss": 0.6472504138946533,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 2.5865725814627814,
|
|
"grad_norm": 0.7172240018844604,
|
|
"learning_rate": 1.028770576740148e-06,
|
|
"loss": 0.674932599067688,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 2.5878703084129207,
|
|
"grad_norm": 0.70241379737854,
|
|
"learning_rate": 1.022462518918772e-06,
|
|
"loss": 0.5798804759979248,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 2.5891680353630595,
|
|
"grad_norm": 0.7364243865013123,
|
|
"learning_rate": 1.0161728178892928e-06,
|
|
"loss": 0.5872079133987427,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 2.5904657623131984,
|
|
"grad_norm": 0.7111935615539551,
|
|
"learning_rate": 1.0099014865125557e-06,
|
|
"loss": 0.609887421131134,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 2.591763489263337,
|
|
"grad_norm": 0.7527702450752258,
|
|
"learning_rate": 1.0036485376118477e-06,
|
|
"loss": 0.7164459824562073,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 2.593061216213476,
|
|
"grad_norm": 0.7354010939598083,
|
|
"learning_rate": 9.974139839728658e-07,
|
|
"loss": 0.7024336457252502,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 2.594358943163615,
|
|
"grad_norm": 0.7463487982749939,
|
|
"learning_rate": 9.91197838343696e-07,
|
|
"loss": 0.6939477324485779,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 2.595656670113754,
|
|
"grad_norm": 0.736788809299469,
|
|
"learning_rate": 9.850001134347765e-07,
|
|
"loss": 0.6644649505615234,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 2.5969543970638926,
|
|
"grad_norm": 0.7293047904968262,
|
|
"learning_rate": 9.788208219188932e-07,
|
|
"loss": 0.6119586825370789,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 2.5982521240140315,
|
|
"grad_norm": 0.7182607054710388,
|
|
"learning_rate": 9.726599764311318e-07,
|
|
"loss": 0.611649215221405,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 2.599549850964171,
|
|
"grad_norm": 0.7259273529052734,
|
|
"learning_rate": 9.665175895688594e-07,
|
|
"loss": 0.6101284623146057,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 2.600847577914309,
|
|
"grad_norm": 0.701677680015564,
|
|
"learning_rate": 9.603936738917063e-07,
|
|
"loss": 0.6807554364204407,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 2.6021453048644485,
|
|
"grad_norm": 0.7464570999145508,
|
|
"learning_rate": 9.54288241921525e-07,
|
|
"loss": 0.6781387329101562,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 2.6034430318145874,
|
|
"grad_norm": 0.7273631691932678,
|
|
"learning_rate": 9.482013061423833e-07,
|
|
"loss": 0.6723061203956604,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 2.604740758764726,
|
|
"grad_norm": 0.7473943829536438,
|
|
"learning_rate": 9.421328790005213e-07,
|
|
"loss": 0.6500118970870972,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 2.606038485714865,
|
|
"grad_norm": 0.7298744320869446,
|
|
"learning_rate": 9.360829729043375e-07,
|
|
"loss": 0.647000789642334,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 2.607336212665004,
|
|
"grad_norm": 0.7570067644119263,
|
|
"learning_rate": 9.300516002243587e-07,
|
|
"loss": 0.658997118473053,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 2.608633939615143,
|
|
"grad_norm": 0.7472216486930847,
|
|
"learning_rate": 9.240387732932155e-07,
|
|
"loss": 0.6748676300048828,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 2.6099316665652816,
|
|
"grad_norm": 0.7370826005935669,
|
|
"learning_rate": 9.180445044056164e-07,
|
|
"loss": 0.6571428179740906,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 2.6112293935154205,
|
|
"grad_norm": 0.7431361675262451,
|
|
"learning_rate": 9.120688058183269e-07,
|
|
"loss": 0.6858744025230408,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 2.6125271204655593,
|
|
"grad_norm": 0.7619893550872803,
|
|
"learning_rate": 9.061116897501321e-07,
|
|
"loss": 0.6860224008560181,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 2.6138248474156986,
|
|
"grad_norm": 0.6949592232704163,
|
|
"learning_rate": 9.001731683818338e-07,
|
|
"loss": 0.6436545848846436,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 2.6151225743658375,
|
|
"grad_norm": 0.7831428647041321,
|
|
"learning_rate": 8.942532538561988e-07,
|
|
"loss": 0.7231192588806152,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 2.6164203013159764,
|
|
"grad_norm": 0.7632724046707153,
|
|
"learning_rate": 8.883519582779598e-07,
|
|
"loss": 0.7117716073989868,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 2.617718028266115,
|
|
"grad_norm": 0.7610095739364624,
|
|
"learning_rate": 8.82469293713768e-07,
|
|
"loss": 0.6059130430221558,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 2.619015755216254,
|
|
"grad_norm": 0.7569096684455872,
|
|
"learning_rate": 8.766052721921858e-07,
|
|
"loss": 0.6521672010421753,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 2.620313482166393,
|
|
"grad_norm": 0.7089208960533142,
|
|
"learning_rate": 8.70759905703652e-07,
|
|
"loss": 0.6266563534736633,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 2.621611209116532,
|
|
"grad_norm": 0.7617636919021606,
|
|
"learning_rate": 8.649332062004622e-07,
|
|
"loss": 0.6242752075195312,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 2.6229089360666706,
|
|
"grad_norm": 0.7356528043746948,
|
|
"learning_rate": 8.59125185596742e-07,
|
|
"loss": 0.6804662942886353,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 2.6242066630168095,
|
|
"grad_norm": 0.730805516242981,
|
|
"learning_rate": 8.533358557684246e-07,
|
|
"loss": 0.6591053605079651,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 2.625504389966949,
|
|
"grad_norm": 0.740450382232666,
|
|
"learning_rate": 8.475652285532199e-07,
|
|
"loss": 0.6597458720207214,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 2.626802116917087,
|
|
"grad_norm": 0.7419881224632263,
|
|
"learning_rate": 8.41813315750607e-07,
|
|
"loss": 0.6208306550979614,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 2.6280998438672265,
|
|
"grad_norm": 0.7380879521369934,
|
|
"learning_rate": 8.360801291217835e-07,
|
|
"loss": 0.6311178803443909,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 2.6293975708173654,
|
|
"grad_norm": 0.6968350410461426,
|
|
"learning_rate": 8.303656803896731e-07,
|
|
"loss": 0.6126903891563416,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 2.630695297767504,
|
|
"grad_norm": 0.6993783712387085,
|
|
"learning_rate": 8.246699812388714e-07,
|
|
"loss": 0.6219539642333984,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 2.631993024717643,
|
|
"grad_norm": 0.7296315431594849,
|
|
"learning_rate": 8.189930433156424e-07,
|
|
"loss": 0.6454072594642639,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 2.633290751667782,
|
|
"grad_norm": 0.7435656785964966,
|
|
"learning_rate": 8.133348782278916e-07,
|
|
"loss": 0.640640139579773,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 2.634588478617921,
|
|
"grad_norm": 0.7254202961921692,
|
|
"learning_rate": 8.07695497545129e-07,
|
|
"loss": 0.574336588382721,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 2.6358862055680596,
|
|
"grad_norm": 0.7589125037193298,
|
|
"learning_rate": 8.020749127984629e-07,
|
|
"loss": 0.6744675636291504,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 2.6371839325181985,
|
|
"grad_norm": 0.7237491011619568,
|
|
"learning_rate": 7.964731354805677e-07,
|
|
"loss": 0.6050382852554321,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 2.6384816594683373,
|
|
"grad_norm": 0.736615777015686,
|
|
"learning_rate": 7.908901770456579e-07,
|
|
"loss": 0.6752466559410095,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 2.6397793864184766,
|
|
"grad_norm": 0.7375562787055969,
|
|
"learning_rate": 7.853260489094727e-07,
|
|
"loss": 0.6168178915977478,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 2.641077113368615,
|
|
"grad_norm": 0.7463002800941467,
|
|
"learning_rate": 7.79780762449246e-07,
|
|
"loss": 0.6608278751373291,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 2.6423748403187544,
|
|
"grad_norm": 0.7306200861930847,
|
|
"learning_rate": 7.742543290036797e-07,
|
|
"loss": 0.6231617331504822,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 2.643672567268893,
|
|
"grad_norm": 0.7191357612609863,
|
|
"learning_rate": 7.687467598729403e-07,
|
|
"loss": 0.6745753884315491,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 2.644970294219032,
|
|
"grad_norm": 0.6983992457389832,
|
|
"learning_rate": 7.63258066318604e-07,
|
|
"loss": 0.6209067702293396,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 2.646268021169171,
|
|
"grad_norm": 0.7191793322563171,
|
|
"learning_rate": 7.577882595636665e-07,
|
|
"loss": 0.6866878867149353,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 2.64756574811931,
|
|
"grad_norm": 0.7254435420036316,
|
|
"learning_rate": 7.523373507924947e-07,
|
|
"loss": 0.6178576946258545,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 2.6488634750694486,
|
|
"grad_norm": 0.7166338562965393,
|
|
"learning_rate": 7.469053511508184e-07,
|
|
"loss": 0.6005609035491943,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 2.6501612020195875,
|
|
"grad_norm": 0.7637789249420166,
|
|
"learning_rate": 7.414922717457018e-07,
|
|
"loss": 0.718099057674408,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 2.6514589289697263,
|
|
"grad_norm": 0.7439664006233215,
|
|
"learning_rate": 7.360981236455222e-07,
|
|
"loss": 0.6896740198135376,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 2.652756655919865,
|
|
"grad_norm": 0.7089899182319641,
|
|
"learning_rate": 7.307229178799469e-07,
|
|
"loss": 0.6416285634040833,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 2.6540543828700045,
|
|
"grad_norm": 0.7403551340103149,
|
|
"learning_rate": 7.253666654399128e-07,
|
|
"loss": 0.6686422824859619,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 2.655352109820143,
|
|
"grad_norm": 0.7438167333602905,
|
|
"learning_rate": 7.200293772775968e-07,
|
|
"loss": 0.6786326766014099,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 2.656649836770282,
|
|
"grad_norm": 0.7066054344177246,
|
|
"learning_rate": 7.14711064306407e-07,
|
|
"loss": 0.6346741318702698,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 2.657947563720421,
|
|
"grad_norm": 0.7646064758300781,
|
|
"learning_rate": 7.094117374009446e-07,
|
|
"loss": 0.67086261510849,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 2.65924529067056,
|
|
"grad_norm": 0.7251279950141907,
|
|
"learning_rate": 7.041314073969918e-07,
|
|
"loss": 0.6325028538703918,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 2.660543017620699,
|
|
"grad_norm": 0.7678724527359009,
|
|
"learning_rate": 6.988700850914876e-07,
|
|
"loss": 0.6267367005348206,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 2.6618407445708376,
|
|
"grad_norm": 0.7265689969062805,
|
|
"learning_rate": 6.93627781242504e-07,
|
|
"loss": 0.6617064476013184,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 2.6631384715209765,
|
|
"grad_norm": 0.7217026352882385,
|
|
"learning_rate": 6.884045065692257e-07,
|
|
"loss": 0.6587082743644714,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 2.6644361984711153,
|
|
"grad_norm": 0.7629426121711731,
|
|
"learning_rate": 6.83200271751927e-07,
|
|
"loss": 0.692336916923523,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 2.665733925421254,
|
|
"grad_norm": 0.7733954191207886,
|
|
"learning_rate": 6.780150874319524e-07,
|
|
"loss": 0.6802124381065369,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 2.667031652371393,
|
|
"grad_norm": 0.7317995429039001,
|
|
"learning_rate": 6.72848964211692e-07,
|
|
"loss": 0.6866804957389832,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 2.6683293793215324,
|
|
"grad_norm": 0.7314664721488953,
|
|
"learning_rate": 6.677019126545548e-07,
|
|
"loss": 0.6293746829032898,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 2.6696271062716708,
|
|
"grad_norm": 0.7272669076919556,
|
|
"learning_rate": 6.625739432849643e-07,
|
|
"loss": 0.673871636390686,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 2.67092483322181,
|
|
"grad_norm": 0.7291983962059021,
|
|
"learning_rate": 6.574650665883197e-07,
|
|
"loss": 0.6971457004547119,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 2.672222560171949,
|
|
"grad_norm": 0.746300458908081,
|
|
"learning_rate": 6.523752930109761e-07,
|
|
"loss": 0.6644643545150757,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 2.673520287122088,
|
|
"grad_norm": 0.7214688062667847,
|
|
"learning_rate": 6.473046329602384e-07,
|
|
"loss": 0.579256534576416,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 2.6748180140722266,
|
|
"grad_norm": 0.7157896757125854,
|
|
"learning_rate": 6.422530968043173e-07,
|
|
"loss": 0.6934089660644531,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 2.6761157410223655,
|
|
"grad_norm": 0.7446689605712891,
|
|
"learning_rate": 6.372206948723292e-07,
|
|
"loss": 0.6685813665390015,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 2.6774134679725043,
|
|
"grad_norm": 0.7324274182319641,
|
|
"learning_rate": 6.322074374542608e-07,
|
|
"loss": 0.6548044085502625,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 2.678711194922643,
|
|
"grad_norm": 0.7366431951522827,
|
|
"learning_rate": 6.272133348009546e-07,
|
|
"loss": 0.6561753153800964,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 2.680008921872782,
|
|
"grad_norm": 0.6906739473342896,
|
|
"learning_rate": 6.222383971240875e-07,
|
|
"loss": 0.6162272095680237,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 2.681306648822921,
|
|
"grad_norm": 0.7250291109085083,
|
|
"learning_rate": 6.17282634596148e-07,
|
|
"loss": 0.6417672038078308,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 2.68260437577306,
|
|
"grad_norm": 0.7425340414047241,
|
|
"learning_rate": 6.123460573504147e-07,
|
|
"loss": 0.6258097887039185,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 2.6839021027231986,
|
|
"grad_norm": 0.7179927825927734,
|
|
"learning_rate": 6.074286754809411e-07,
|
|
"loss": 0.6689911484718323,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 2.685199829673338,
|
|
"grad_norm": 0.7198472619056702,
|
|
"learning_rate": 6.025304990425241e-07,
|
|
"loss": 0.6711916923522949,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 2.685199829673338,
|
|
"eval_loss": 0.7492260932922363,
|
|
"eval_runtime": 145.3339,
|
|
"eval_samples_per_second": 35.725,
|
|
"eval_steps_per_second": 8.931,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 2.686497556623477,
|
|
"grad_norm": 0.7170226573944092,
|
|
"learning_rate": 5.976515380507008e-07,
|
|
"loss": 0.6783643960952759,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 2.6877952835736156,
|
|
"grad_norm": 0.7576429843902588,
|
|
"learning_rate": 5.927918024817059e-07,
|
|
"loss": 0.7274392247200012,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 2.6890930105237545,
|
|
"grad_norm": 0.7014567255973816,
|
|
"learning_rate": 5.879513022724714e-07,
|
|
"loss": 0.6101505160331726,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 2.6903907374738933,
|
|
"grad_norm": 0.7218198180198669,
|
|
"learning_rate": 5.831300473205948e-07,
|
|
"loss": 0.6697475910186768,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 2.691688464424032,
|
|
"grad_norm": 0.7351176738739014,
|
|
"learning_rate": 5.783280474843222e-07,
|
|
"loss": 0.6683188080787659,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 2.692986191374171,
|
|
"grad_norm": 0.7387964129447937,
|
|
"learning_rate": 5.735453125825275e-07,
|
|
"loss": 0.6495317220687866,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 2.69428391832431,
|
|
"grad_norm": 0.7699364423751831,
|
|
"learning_rate": 5.687818523946931e-07,
|
|
"loss": 0.6670310497283936,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 2.6955816452744488,
|
|
"grad_norm": 0.7399834394454956,
|
|
"learning_rate": 5.640376766608902e-07,
|
|
"loss": 0.6311538219451904,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 2.696879372224588,
|
|
"grad_norm": 0.7210641503334045,
|
|
"learning_rate": 5.593127950817579e-07,
|
|
"loss": 0.6419323682785034,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 2.698177099174727,
|
|
"grad_norm": 0.7432581186294556,
|
|
"learning_rate": 5.546072173184791e-07,
|
|
"loss": 0.6984769701957703,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 2.699474826124866,
|
|
"grad_norm": 0.7039175629615784,
|
|
"learning_rate": 5.499209529927751e-07,
|
|
"loss": 0.6130697727203369,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 2.7007725530750046,
|
|
"grad_norm": 0.7450562715530396,
|
|
"learning_rate": 5.452540116868654e-07,
|
|
"loss": 0.709285318851471,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 2.7020702800251435,
|
|
"grad_norm": 0.7391056418418884,
|
|
"learning_rate": 5.406064029434666e-07,
|
|
"loss": 0.7196047306060791,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 2.7033680069752823,
|
|
"grad_norm": 0.7550768852233887,
|
|
"learning_rate": 5.359781362657623e-07,
|
|
"loss": 0.6528761982917786,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 2.704665733925421,
|
|
"grad_norm": 0.7071364521980286,
|
|
"learning_rate": 5.313692211173838e-07,
|
|
"loss": 0.664832353591919,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 2.70596346087556,
|
|
"grad_norm": 0.7408220171928406,
|
|
"learning_rate": 5.26779666922399e-07,
|
|
"loss": 0.6972253322601318,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 2.707261187825699,
|
|
"grad_norm": 0.706516683101654,
|
|
"learning_rate": 5.222094830652835e-07,
|
|
"loss": 0.6413928866386414,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 2.708558914775838,
|
|
"grad_norm": 0.6609142422676086,
|
|
"learning_rate": 5.176586788909066e-07,
|
|
"loss": 0.61426842212677,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 2.7098566417259766,
|
|
"grad_norm": 0.7437728047370911,
|
|
"learning_rate": 5.131272637045104e-07,
|
|
"loss": 0.7072603106498718,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 2.711154368676116,
|
|
"grad_norm": 0.7043668627738953,
|
|
"learning_rate": 5.086152467716932e-07,
|
|
"loss": 0.6285822987556458,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 2.7124520956262548,
|
|
"grad_norm": 0.740922212600708,
|
|
"learning_rate": 5.041226373183861e-07,
|
|
"loss": 0.6565816402435303,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 2.7137498225763936,
|
|
"grad_norm": 0.716456949710846,
|
|
"learning_rate": 4.996494445308409e-07,
|
|
"loss": 0.6037598848342896,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 2.7150475495265325,
|
|
"grad_norm": 0.7253233194351196,
|
|
"learning_rate": 4.951956775556e-07,
|
|
"loss": 0.6392321586608887,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 2.7163452764766713,
|
|
"grad_norm": 0.7206777334213257,
|
|
"learning_rate": 4.907613454994964e-07,
|
|
"loss": 0.6381296515464783,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 2.71764300342681,
|
|
"grad_norm": 0.7042269110679626,
|
|
"learning_rate": 4.863464574296106e-07,
|
|
"loss": 0.6764304041862488,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 2.718940730376949,
|
|
"grad_norm": 0.7474066019058228,
|
|
"learning_rate": 4.819510223732738e-07,
|
|
"loss": 0.710769534111023,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 2.720238457327088,
|
|
"grad_norm": 0.7537234425544739,
|
|
"learning_rate": 4.775750493180386e-07,
|
|
"loss": 0.6200648546218872,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 2.7215361842772268,
|
|
"grad_norm": 0.7299405336380005,
|
|
"learning_rate": 4.7321854721166127e-07,
|
|
"loss": 0.6677811741828918,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 2.722833911227366,
|
|
"grad_norm": 0.6883127093315125,
|
|
"learning_rate": 4.6888152496208593e-07,
|
|
"loss": 0.5572382211685181,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 2.7241316381775045,
|
|
"grad_norm": 0.730640709400177,
|
|
"learning_rate": 4.645639914374278e-07,
|
|
"loss": 0.6930029392242432,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 2.7254293651276438,
|
|
"grad_norm": 0.7166103720664978,
|
|
"learning_rate": 4.602659554659461e-07,
|
|
"loss": 0.5943949818611145,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 2.7267270920777826,
|
|
"grad_norm": 0.7555888295173645,
|
|
"learning_rate": 4.559874258360408e-07,
|
|
"loss": 0.6563291549682617,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 2.7280248190279215,
|
|
"grad_norm": 0.7199954390525818,
|
|
"learning_rate": 4.5172841129621726e-07,
|
|
"loss": 0.6438056826591492,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 2.7293225459780603,
|
|
"grad_norm": 0.7394102811813354,
|
|
"learning_rate": 4.474889205550881e-07,
|
|
"loss": 0.6618061065673828,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 2.730620272928199,
|
|
"grad_norm": 0.7350549697875977,
|
|
"learning_rate": 4.4326896228133354e-07,
|
|
"loss": 0.6392850875854492,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 2.731917999878338,
|
|
"grad_norm": 0.7010295391082764,
|
|
"learning_rate": 4.3906854510370245e-07,
|
|
"loss": 0.6507184505462646,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 2.733215726828477,
|
|
"grad_norm": 0.7381558418273926,
|
|
"learning_rate": 4.348876776109856e-07,
|
|
"loss": 0.6545774936676025,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 2.7345134537786158,
|
|
"grad_norm": 0.7013775110244751,
|
|
"learning_rate": 4.307263683519969e-07,
|
|
"loss": 0.6212908625602722,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 2.7358111807287546,
|
|
"grad_norm": 0.7366412878036499,
|
|
"learning_rate": 4.2658462583556216e-07,
|
|
"loss": 0.684171736240387,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 2.737108907678894,
|
|
"grad_norm": 0.7112710475921631,
|
|
"learning_rate": 4.2246245853049706e-07,
|
|
"loss": 0.6173405051231384,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 2.7384066346290323,
|
|
"grad_norm": 0.7728049159049988,
|
|
"learning_rate": 4.1835987486558595e-07,
|
|
"loss": 0.6173956990242004,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 2.7397043615791716,
|
|
"grad_norm": 0.6931276321411133,
|
|
"learning_rate": 4.142768832295807e-07,
|
|
"loss": 0.6579814553260803,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 2.7410020885293105,
|
|
"grad_norm": 0.7127827405929565,
|
|
"learning_rate": 4.102134919711609e-07,
|
|
"loss": 0.6169605255126953,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 2.7422998154794493,
|
|
"grad_norm": 0.7167375683784485,
|
|
"learning_rate": 4.061697093989347e-07,
|
|
"loss": 0.6766916513442993,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 2.743597542429588,
|
|
"grad_norm": 0.7316383719444275,
|
|
"learning_rate": 4.021455437814148e-07,
|
|
"loss": 0.6033115983009338,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 2.744895269379727,
|
|
"grad_norm": 0.7062050104141235,
|
|
"learning_rate": 3.981410033469979e-07,
|
|
"loss": 0.6221883296966553,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 2.746192996329866,
|
|
"grad_norm": 0.7120285630226135,
|
|
"learning_rate": 3.941560962839619e-07,
|
|
"loss": 0.6118264198303223,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 2.7474907232800048,
|
|
"grad_norm": 0.7053149938583374,
|
|
"learning_rate": 3.9019083074042784e-07,
|
|
"loss": 0.5848374962806702,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 2.7487884502301436,
|
|
"grad_norm": 0.7223408818244934,
|
|
"learning_rate": 3.862452148243623e-07,
|
|
"loss": 0.6187662482261658,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 2.7500861771802825,
|
|
"grad_norm": 0.7368988394737244,
|
|
"learning_rate": 3.823192566035494e-07,
|
|
"loss": 0.647794783115387,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 2.7513839041304218,
|
|
"grad_norm": 0.7369173765182495,
|
|
"learning_rate": 3.7841296410558225e-07,
|
|
"loss": 0.6177867650985718,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 2.75268163108056,
|
|
"grad_norm": 0.7405387759208679,
|
|
"learning_rate": 3.7452634531783935e-07,
|
|
"loss": 0.6547641754150391,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 2.7539793580306995,
|
|
"grad_norm": 0.7224996089935303,
|
|
"learning_rate": 3.706594081874737e-07,
|
|
"loss": 0.6353644132614136,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 2.7552770849808383,
|
|
"grad_norm": 0.7474029660224915,
|
|
"learning_rate": 3.6681216062138923e-07,
|
|
"loss": 0.682817816734314,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 2.756574811930977,
|
|
"grad_norm": 0.7351192235946655,
|
|
"learning_rate": 3.6298461048623887e-07,
|
|
"loss": 0.6670258641242981,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 2.757872538881116,
|
|
"grad_norm": 0.6816844344139099,
|
|
"learning_rate": 3.5917676560838775e-07,
|
|
"loss": 0.609431803226471,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 2.759170265831255,
|
|
"grad_norm": 0.7361696362495422,
|
|
"learning_rate": 3.5538863377392095e-07,
|
|
"loss": 0.6345561742782593,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 2.7604679927813938,
|
|
"grad_norm": 0.750041663646698,
|
|
"learning_rate": 3.5162022272860475e-07,
|
|
"loss": 0.6858513951301575,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 2.7617657197315326,
|
|
"grad_norm": 0.7399468421936035,
|
|
"learning_rate": 3.478715401778876e-07,
|
|
"loss": 0.6643052697181702,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 2.7630634466816715,
|
|
"grad_norm": 0.764750063419342,
|
|
"learning_rate": 3.44142593786877e-07,
|
|
"loss": 0.7398065328598022,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 2.7643611736318103,
|
|
"grad_norm": 0.7458817958831787,
|
|
"learning_rate": 3.404333911803237e-07,
|
|
"loss": 0.6310020685195923,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 2.7656589005819496,
|
|
"grad_norm": 0.7141246199607849,
|
|
"learning_rate": 3.367439399426087e-07,
|
|
"loss": 0.6750156879425049,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 2.7669566275320885,
|
|
"grad_norm": 0.7121133804321289,
|
|
"learning_rate": 3.330742476177273e-07,
|
|
"loss": 0.6371780037879944,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 2.7682543544822273,
|
|
"grad_norm": 0.7298391461372375,
|
|
"learning_rate": 3.2942432170926743e-07,
|
|
"loss": 0.5725361108779907,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 2.769552081432366,
|
|
"grad_norm": 0.742504358291626,
|
|
"learning_rate": 3.257941696804079e-07,
|
|
"loss": 0.6555971503257751,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 2.770849808382505,
|
|
"grad_norm": 0.7092410922050476,
|
|
"learning_rate": 3.2218379895388896e-07,
|
|
"loss": 0.5985562205314636,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 2.772147535332644,
|
|
"grad_norm": 0.7868666648864746,
|
|
"learning_rate": 3.185932169120043e-07,
|
|
"loss": 0.6679819226264954,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 2.7734452622827828,
|
|
"grad_norm": 0.7421088814735413,
|
|
"learning_rate": 3.150224308965866e-07,
|
|
"loss": 0.6530116200447083,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 2.7747429892329216,
|
|
"grad_norm": 0.8364231586456299,
|
|
"learning_rate": 3.114714482089898e-07,
|
|
"loss": 0.7263075709342957,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 2.7760407161830605,
|
|
"grad_norm": 0.7070637345314026,
|
|
"learning_rate": 3.079402761100736e-07,
|
|
"loss": 0.5931848883628845,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 2.7773384431331998,
|
|
"grad_norm": 0.715865433216095,
|
|
"learning_rate": 3.0442892182019236e-07,
|
|
"loss": 0.5411802530288696,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 2.778636170083338,
|
|
"grad_norm": 0.7688911557197571,
|
|
"learning_rate": 3.00937392519175e-07,
|
|
"loss": 0.6958683133125305,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 2.7799338970334775,
|
|
"grad_norm": 0.7352038621902466,
|
|
"learning_rate": 2.974656953463173e-07,
|
|
"loss": 0.5754610896110535,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 2.7812316239836163,
|
|
"grad_norm": 0.7284995913505554,
|
|
"learning_rate": 2.9401383740035983e-07,
|
|
"loss": 0.6452664136886597,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 2.782529350933755,
|
|
"grad_norm": 0.7445150017738342,
|
|
"learning_rate": 2.905818257394799e-07,
|
|
"loss": 0.6866068243980408,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 2.783827077883894,
|
|
"grad_norm": 0.7142398357391357,
|
|
"learning_rate": 2.871696673812718e-07,
|
|
"loss": 0.6363600492477417,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 2.785124804834033,
|
|
"grad_norm": 0.7269803285598755,
|
|
"learning_rate": 2.837773693027346e-07,
|
|
"loss": 0.6741392612457275,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 2.7864225317841718,
|
|
"grad_norm": 0.7683520317077637,
|
|
"learning_rate": 2.8040493844026185e-07,
|
|
"loss": 0.6339127421379089,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 2.7877202587343106,
|
|
"grad_norm": 0.7308069467544556,
|
|
"learning_rate": 2.7705238168961867e-07,
|
|
"loss": 0.6009587049484253,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 2.7890179856844495,
|
|
"grad_norm": 0.7165871858596802,
|
|
"learning_rate": 2.7371970590593597e-07,
|
|
"loss": 0.6652488708496094,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 2.7903157126345883,
|
|
"grad_norm": 0.7490328550338745,
|
|
"learning_rate": 2.7040691790369165e-07,
|
|
"loss": 0.6180223226547241,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 2.7916134395847276,
|
|
"grad_norm": 0.729664146900177,
|
|
"learning_rate": 2.671140244567005e-07,
|
|
"loss": 0.6324159502983093,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 2.792911166534866,
|
|
"grad_norm": 0.728609025478363,
|
|
"learning_rate": 2.6384103229809445e-07,
|
|
"loss": 0.6185531616210938,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 2.7942088934850053,
|
|
"grad_norm": 0.7523699402809143,
|
|
"learning_rate": 2.605879481203144e-07,
|
|
"loss": 0.6833655834197998,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 2.795506620435144,
|
|
"grad_norm": 0.7207692265510559,
|
|
"learning_rate": 2.5735477857509406e-07,
|
|
"loss": 0.6240508556365967,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 2.796804347385283,
|
|
"grad_norm": 0.7327904105186462,
|
|
"learning_rate": 2.5414153027344846e-07,
|
|
"loss": 0.6517814993858337,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 2.798102074335422,
|
|
"grad_norm": 0.7405744194984436,
|
|
"learning_rate": 2.5094820978565416e-07,
|
|
"loss": 0.6217131614685059,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 2.7993998012855608,
|
|
"grad_norm": 0.7404962182044983,
|
|
"learning_rate": 2.4777482364124695e-07,
|
|
"loss": 0.6210229992866516,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 2.8006975282356996,
|
|
"grad_norm": 0.7105421423912048,
|
|
"learning_rate": 2.446213783289941e-07,
|
|
"loss": 0.6224609613418579,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 2.8019952551858385,
|
|
"grad_norm": 0.777541995048523,
|
|
"learning_rate": 2.4148788029689565e-07,
|
|
"loss": 0.6957967877388,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 2.8032929821359773,
|
|
"grad_norm": 0.7556023001670837,
|
|
"learning_rate": 2.3837433595216174e-07,
|
|
"loss": 0.6769660115242004,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 2.804590709086116,
|
|
"grad_norm": 0.7225756049156189,
|
|
"learning_rate": 2.3528075166120323e-07,
|
|
"loss": 0.6382290124893188,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 2.8058884360362555,
|
|
"grad_norm": 0.7236006259918213,
|
|
"learning_rate": 2.3220713374961457e-07,
|
|
"loss": 0.6584991216659546,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 2.807186162986394,
|
|
"grad_norm": 0.7643389701843262,
|
|
"learning_rate": 2.2915348850216955e-07,
|
|
"loss": 0.6372033953666687,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 2.808483889936533,
|
|
"grad_norm": 0.6990427374839783,
|
|
"learning_rate": 2.2611982216279693e-07,
|
|
"loss": 0.6647629141807556,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 2.809781616886672,
|
|
"grad_norm": 0.7442436814308167,
|
|
"learning_rate": 2.2310614093457917e-07,
|
|
"loss": 0.6188019514083862,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 2.811079343836811,
|
|
"grad_norm": 0.7379173040390015,
|
|
"learning_rate": 2.2011245097972812e-07,
|
|
"loss": 0.643206000328064,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 2.8123770707869498,
|
|
"grad_norm": 0.7450693249702454,
|
|
"learning_rate": 2.171387584195861e-07,
|
|
"loss": 0.6626617312431335,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 2.8136747977370886,
|
|
"grad_norm": 0.7376441359519958,
|
|
"learning_rate": 2.1418506933459926e-07,
|
|
"loss": 0.6287381052970886,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 2.8149725246872275,
|
|
"grad_norm": 0.7581092715263367,
|
|
"learning_rate": 2.1125138976431425e-07,
|
|
"loss": 0.6942882537841797,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 2.8162702516373663,
|
|
"grad_norm": 0.7551229596138,
|
|
"learning_rate": 2.0833772570736376e-07,
|
|
"loss": 0.6641190052032471,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 2.817567978587505,
|
|
"grad_norm": 0.723896861076355,
|
|
"learning_rate": 2.0544408312145325e-07,
|
|
"loss": 0.6406188607215881,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 2.818865705537644,
|
|
"grad_norm": 0.7154518961906433,
|
|
"learning_rate": 2.025704679233498e-07,
|
|
"loss": 0.6102049946784973,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 2.8201634324877833,
|
|
"grad_norm": 0.7203720808029175,
|
|
"learning_rate": 1.9971688598886874e-07,
|
|
"loss": 0.6299295425415039,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 2.8214611594379218,
|
|
"grad_norm": 0.7477232217788696,
|
|
"learning_rate": 1.9688334315286383e-07,
|
|
"loss": 0.657807469367981,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 2.822758886388061,
|
|
"grad_norm": 0.7149349451065063,
|
|
"learning_rate": 1.9406984520921156e-07,
|
|
"loss": 0.6447558999061584,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 2.8240566133382,
|
|
"grad_norm": 0.7502943277359009,
|
|
"learning_rate": 1.9127639791080345e-07,
|
|
"loss": 0.7339900732040405,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 2.8253543402883388,
|
|
"grad_norm": 0.7233054637908936,
|
|
"learning_rate": 1.885030069695326e-07,
|
|
"loss": 0.668261706829071,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 2.8266520672384776,
|
|
"grad_norm": 0.7234363555908203,
|
|
"learning_rate": 1.8574967805628174e-07,
|
|
"loss": 0.6577302813529968,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 2.8279497941886165,
|
|
"grad_norm": 0.7601407766342163,
|
|
"learning_rate": 1.8301641680090965e-07,
|
|
"loss": 0.6615520715713501,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 2.8292475211387553,
|
|
"grad_norm": 0.7155176401138306,
|
|
"learning_rate": 1.8030322879224792e-07,
|
|
"loss": 0.6732202768325806,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 2.830545248088894,
|
|
"grad_norm": 0.7071481347084045,
|
|
"learning_rate": 1.7761011957807439e-07,
|
|
"loss": 0.6781343817710876,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 2.831842975039033,
|
|
"grad_norm": 0.7136833071708679,
|
|
"learning_rate": 1.7493709466511965e-07,
|
|
"loss": 0.6390227675437927,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 2.833140701989172,
|
|
"grad_norm": 0.741337239742279,
|
|
"learning_rate": 1.7228415951904165e-07,
|
|
"loss": 0.6472516059875488,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 2.834438428939311,
|
|
"grad_norm": 0.732276976108551,
|
|
"learning_rate": 1.6965131956442004e-07,
|
|
"loss": 0.6666471362113953,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 2.83573615588945,
|
|
"grad_norm": 0.7136049866676331,
|
|
"learning_rate": 1.670385801847485e-07,
|
|
"loss": 0.6376191973686218,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 2.837033882839589,
|
|
"grad_norm": 0.7336399555206299,
|
|
"learning_rate": 1.6444594672241688e-07,
|
|
"loss": 0.6784384846687317,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 2.8383316097897278,
|
|
"grad_norm": 0.7359493374824524,
|
|
"learning_rate": 1.6187342447870235e-07,
|
|
"loss": 0.6160508394241333,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 2.8396293367398666,
|
|
"grad_norm": 0.7054331302642822,
|
|
"learning_rate": 1.5932101871376503e-07,
|
|
"loss": 0.6256083846092224,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 2.8409270636900055,
|
|
"grad_norm": 0.7195982336997986,
|
|
"learning_rate": 1.567887346466257e-07,
|
|
"loss": 0.5842984318733215,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 2.8422247906401443,
|
|
"grad_norm": 0.7330359220504761,
|
|
"learning_rate": 1.54276577455168e-07,
|
|
"loss": 0.655302882194519,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 2.843522517590283,
|
|
"grad_norm": 0.7195461392402649,
|
|
"learning_rate": 1.517845522761141e-07,
|
|
"loss": 0.695612370967865,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 2.844820244540422,
|
|
"grad_norm": 0.7142940759658813,
|
|
"learning_rate": 1.4931266420502687e-07,
|
|
"loss": 0.671156108379364,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 2.8461179714905613,
|
|
"grad_norm": 0.7329767346382141,
|
|
"learning_rate": 1.468609182962899e-07,
|
|
"loss": 0.6843516230583191,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 2.8474156984406997,
|
|
"grad_norm": 0.7575559616088867,
|
|
"learning_rate": 1.4442931956310525e-07,
|
|
"loss": 0.6152229309082031,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 2.848713425390839,
|
|
"grad_norm": 0.7627936005592346,
|
|
"learning_rate": 1.420178729774746e-07,
|
|
"loss": 0.6545628905296326,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 2.850011152340978,
|
|
"grad_norm": 0.7592964768409729,
|
|
"learning_rate": 1.3962658347019819e-07,
|
|
"loss": 0.7087745666503906,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 2.8513088792911168,
|
|
"grad_norm": 0.7184759974479675,
|
|
"learning_rate": 1.372554559308559e-07,
|
|
"loss": 0.6886664032936096,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 2.8526066062412556,
|
|
"grad_norm": 0.7686153054237366,
|
|
"learning_rate": 1.3490449520780492e-07,
|
|
"loss": 0.65256667137146,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 2.8539043331913945,
|
|
"grad_norm": 0.722467839717865,
|
|
"learning_rate": 1.3257370610816333e-07,
|
|
"loss": 0.6053767800331116,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 2.8552020601415333,
|
|
"grad_norm": 0.7348204255104065,
|
|
"learning_rate": 1.3026309339780442e-07,
|
|
"loss": 0.57970130443573,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 2.856499787091672,
|
|
"grad_norm": 0.724539041519165,
|
|
"learning_rate": 1.2797266180134994e-07,
|
|
"loss": 0.6097747087478638,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 2.857797514041811,
|
|
"grad_norm": 0.7563627362251282,
|
|
"learning_rate": 1.2570241600214805e-07,
|
|
"loss": 0.6322290897369385,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 2.85909524099195,
|
|
"grad_norm": 0.7333301901817322,
|
|
"learning_rate": 1.2345236064228216e-07,
|
|
"loss": 0.6172837615013123,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 2.860392967942089,
|
|
"grad_norm": 0.7645448446273804,
|
|
"learning_rate": 1.212225003225409e-07,
|
|
"loss": 0.6847653388977051,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 2.8616906948922276,
|
|
"grad_norm": 0.7139600515365601,
|
|
"learning_rate": 1.1901283960242704e-07,
|
|
"loss": 0.641283392906189,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 2.862988421842367,
|
|
"grad_norm": 0.7192294597625732,
|
|
"learning_rate": 1.168233830001364e-07,
|
|
"loss": 0.6558660864830017,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 2.8642861487925058,
|
|
"grad_norm": 0.7247057557106018,
|
|
"learning_rate": 1.1465413499255452e-07,
|
|
"loss": 0.648059070110321,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 2.8655838757426446,
|
|
"grad_norm": 0.7141038179397583,
|
|
"learning_rate": 1.1250510001524329e-07,
|
|
"loss": 0.7089075446128845,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 2.8668816026927835,
|
|
"grad_norm": 0.7448967099189758,
|
|
"learning_rate": 1.103762824624377e-07,
|
|
"loss": 0.655659019947052,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 2.8681793296429223,
|
|
"grad_norm": 0.7217125296592712,
|
|
"learning_rate": 1.0826768668702691e-07,
|
|
"loss": 0.6335598826408386,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 2.869477056593061,
|
|
"grad_norm": 0.7432066202163696,
|
|
"learning_rate": 1.0617931700055984e-07,
|
|
"loss": 0.6629352569580078,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 2.8707747835432,
|
|
"grad_norm": 0.759253740310669,
|
|
"learning_rate": 1.0411117767322065e-07,
|
|
"loss": 0.6971714496612549,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 2.872072510493339,
|
|
"grad_norm": 0.7214189171791077,
|
|
"learning_rate": 1.0206327293383222e-07,
|
|
"loss": 0.6498401165008545,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 2.8733702374434777,
|
|
"grad_norm": 0.7300909161567688,
|
|
"learning_rate": 1.000356069698416e-07,
|
|
"loss": 0.6666358113288879,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 2.874667964393617,
|
|
"grad_norm": 0.7169894576072693,
|
|
"learning_rate": 9.802818392731117e-08,
|
|
"loss": 0.6067378520965576,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 2.8759656913437555,
|
|
"grad_norm": 0.7870055437088013,
|
|
"learning_rate": 9.60410079109153e-08,
|
|
"loss": 0.7164538502693176,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 2.8772634182938948,
|
|
"grad_norm": 0.731452465057373,
|
|
"learning_rate": 9.407408298392373e-08,
|
|
"loss": 0.6627915501594543,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 2.8785611452440336,
|
|
"grad_norm": 0.7452148795127869,
|
|
"learning_rate": 9.212741316820039e-08,
|
|
"loss": 0.6090914607048035,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 2.8798588721941725,
|
|
"grad_norm": 0.7165141701698303,
|
|
"learning_rate": 9.020100244419461e-08,
|
|
"loss": 0.7527438998222351,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 2.8811565991443113,
|
|
"grad_norm": 0.7165322303771973,
|
|
"learning_rate": 8.829485475092548e-08,
|
|
"loss": 0.663241446018219,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 2.88245432609445,
|
|
"grad_norm": 0.8054161667823792,
|
|
"learning_rate": 8.640897398598525e-08,
|
|
"loss": 0.765292227268219,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 2.883752053044589,
|
|
"grad_norm": 0.7372357249259949,
|
|
"learning_rate": 8.454336400552154e-08,
|
|
"loss": 0.6321142911911011,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 2.885049779994728,
|
|
"grad_norm": 0.7551286220550537,
|
|
"learning_rate": 8.269802862423405e-08,
|
|
"loss": 0.6694223880767822,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 2.8863475069448667,
|
|
"grad_norm": 0.6954628825187683,
|
|
"learning_rate": 8.087297161536778e-08,
|
|
"loss": 0.650575578212738,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 2.8876452338950056,
|
|
"grad_norm": 0.6984097957611084,
|
|
"learning_rate": 7.906819671070098e-08,
|
|
"loss": 0.6023176908493042,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 2.888942960845145,
|
|
"grad_norm": 0.7234562635421753,
|
|
"learning_rate": 7.728370760054283e-08,
|
|
"loss": 0.6330822110176086,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 2.8902406877952833,
|
|
"grad_norm": 0.7173102498054504,
|
|
"learning_rate": 7.55195079337212e-08,
|
|
"loss": 0.6250259876251221,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 2.8915384147454226,
|
|
"grad_norm": 0.7292760610580444,
|
|
"learning_rate": 7.377560131757832e-08,
|
|
"loss": 0.6211444139480591,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 2.8928361416955615,
|
|
"grad_norm": 0.7143842577934265,
|
|
"learning_rate": 7.205199131796182e-08,
|
|
"loss": 0.6102809906005859,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 2.8941338686457003,
|
|
"grad_norm": 0.7200958132743835,
|
|
"learning_rate": 7.034868145921802e-08,
|
|
"loss": 0.6820523142814636,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 2.895431595595839,
|
|
"grad_norm": 0.7009389400482178,
|
|
"learning_rate": 6.866567522418322e-08,
|
|
"loss": 0.6737648248672485,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 2.896729322545978,
|
|
"grad_norm": 0.7720589637756348,
|
|
"learning_rate": 6.700297605418127e-08,
|
|
"loss": 0.6236926317214966,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 2.898027049496117,
|
|
"grad_norm": 0.7273607850074768,
|
|
"learning_rate": 6.53605873490093e-08,
|
|
"loss": 0.673498272895813,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 2.8993247764462557,
|
|
"grad_norm": 0.7236337065696716,
|
|
"learning_rate": 6.373851246693763e-08,
|
|
"loss": 0.6256372928619385,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 2.9006225033963946,
|
|
"grad_norm": 0.7014041543006897,
|
|
"learning_rate": 6.21367547246976e-08,
|
|
"loss": 0.6363632678985596,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 2.9019202303465335,
|
|
"grad_norm": 0.7210372686386108,
|
|
"learning_rate": 6.055531739747933e-08,
|
|
"loss": 0.6491326689720154,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 2.9032179572966728,
|
|
"grad_norm": 0.766070544719696,
|
|
"learning_rate": 5.899420371892173e-08,
|
|
"loss": 0.606798529624939,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 2.904515684246811,
|
|
"grad_norm": 0.7013832330703735,
|
|
"learning_rate": 5.745341688110806e-08,
|
|
"loss": 0.6418301463127136,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 2.9058134111969505,
|
|
"grad_norm": 0.7240904569625854,
|
|
"learning_rate": 5.593296003455595e-08,
|
|
"loss": 0.6093890070915222,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 2.9071111381470893,
|
|
"grad_norm": 0.7125054001808167,
|
|
"learning_rate": 5.4432836288215165e-08,
|
|
"loss": 0.6541129350662231,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 2.908408865097228,
|
|
"grad_norm": 0.7161985635757446,
|
|
"learning_rate": 5.2953048709459834e-08,
|
|
"loss": 0.617908239364624,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 2.909706592047367,
|
|
"grad_norm": 0.737856388092041,
|
|
"learning_rate": 5.1493600324080684e-08,
|
|
"loss": 0.649212121963501,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 2.911004318997506,
|
|
"grad_norm": 0.7285069227218628,
|
|
"learning_rate": 5.0054494116279497e-08,
|
|
"loss": 0.6526796221733093,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 2.9123020459476447,
|
|
"grad_norm": 0.715974748134613,
|
|
"learning_rate": 4.8635733028664644e-08,
|
|
"loss": 0.6148603558540344,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 2.9135997728977836,
|
|
"grad_norm": 0.7559519410133362,
|
|
"learning_rate": 4.723731996224446e-08,
|
|
"loss": 0.6750462055206299,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 2.9148974998479225,
|
|
"grad_norm": 0.7167734503746033,
|
|
"learning_rate": 4.585925777641831e-08,
|
|
"loss": 0.6933612823486328,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 2.9161952267980613,
|
|
"grad_norm": 0.7255918383598328,
|
|
"learning_rate": 4.450154928897443e-08,
|
|
"loss": 0.6560993194580078,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 2.9174929537482006,
|
|
"grad_norm": 0.7656079530715942,
|
|
"learning_rate": 4.316419727608434e-08,
|
|
"loss": 0.6685020923614502,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 2.9187906806983395,
|
|
"grad_norm": 0.7287185788154602,
|
|
"learning_rate": 4.1847204472293954e-08,
|
|
"loss": 0.646466851234436,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 2.9200884076484783,
|
|
"grad_norm": 0.7272042036056519,
|
|
"learning_rate": 4.055057357052139e-08,
|
|
"loss": 0.6481143236160278,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 2.921386134598617,
|
|
"grad_norm": 0.7513357996940613,
|
|
"learning_rate": 3.927430722204473e-08,
|
|
"loss": 0.6382118463516235,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 2.922683861548756,
|
|
"grad_norm": 0.7202178239822388,
|
|
"learning_rate": 3.801840803651091e-08,
|
|
"loss": 0.6208593845367432,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 2.923981588498895,
|
|
"grad_norm": 0.7391272783279419,
|
|
"learning_rate": 3.678287858191132e-08,
|
|
"loss": 0.62124103307724,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 2.9252793154490337,
|
|
"grad_norm": 0.7046197056770325,
|
|
"learning_rate": 3.5567721384593965e-08,
|
|
"loss": 0.6635320782661438,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 2.9265770423991726,
|
|
"grad_norm": 0.7366517782211304,
|
|
"learning_rate": 3.437293892924576e-08,
|
|
"loss": 0.657387912273407,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 2.9278747693493115,
|
|
"grad_norm": 0.7833458781242371,
|
|
"learning_rate": 3.3198533658895804e-08,
|
|
"loss": 0.681797981262207,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 2.9291724962994508,
|
|
"grad_norm": 0.7216890454292297,
|
|
"learning_rate": 3.2044507974905433e-08,
|
|
"loss": 0.5936287641525269,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 2.930470223249589,
|
|
"grad_norm": 0.736221969127655,
|
|
"learning_rate": 3.091086423696377e-08,
|
|
"loss": 0.6654385328292847,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 2.9317679501997285,
|
|
"grad_norm": 0.7042406797409058,
|
|
"learning_rate": 2.9797604763087684e-08,
|
|
"loss": 0.6541644930839539,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 2.9330656771498673,
|
|
"grad_norm": 0.7537480592727661,
|
|
"learning_rate": 2.8704731829609643e-08,
|
|
"loss": 0.6462427377700806,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 2.934363404100006,
|
|
"grad_norm": 0.748501718044281,
|
|
"learning_rate": 2.763224767117767e-08,
|
|
"loss": 0.6837744116783142,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 2.935661131050145,
|
|
"grad_norm": 0.7571681141853333,
|
|
"learning_rate": 2.6580154480750907e-08,
|
|
"loss": 0.6494276523590088,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 2.936958858000284,
|
|
"grad_norm": 0.7051231265068054,
|
|
"learning_rate": 2.554845440959408e-08,
|
|
"loss": 0.6642428040504456,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 2.9382565849504227,
|
|
"grad_norm": 0.7481043934822083,
|
|
"learning_rate": 2.4537149567271935e-08,
|
|
"loss": 0.7524136900901794,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 2.9395543119005616,
|
|
"grad_norm": 0.7172916531562805,
|
|
"learning_rate": 2.3546242021648126e-08,
|
|
"loss": 0.6545467972755432,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 2.9408520388507005,
|
|
"grad_norm": 0.7390909790992737,
|
|
"learning_rate": 2.2575733798876342e-08,
|
|
"loss": 0.6789126396179199,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 2.9421497658008393,
|
|
"grad_norm": 0.6911484003067017,
|
|
"learning_rate": 2.162562688340142e-08,
|
|
"loss": 0.5900536775588989,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 2.9434474927509786,
|
|
"grad_norm": 0.7650425434112549,
|
|
"learning_rate": 2.0695923217950442e-08,
|
|
"loss": 0.6601477861404419,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 2.944745219701117,
|
|
"grad_norm": 0.7415356040000916,
|
|
"learning_rate": 1.9786624703532764e-08,
|
|
"loss": 0.7132882475852966,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 2.9460429466512563,
|
|
"grad_norm": 0.7267791032791138,
|
|
"learning_rate": 1.8897733199434443e-08,
|
|
"loss": 0.6234641075134277,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 2.947340673601395,
|
|
"grad_norm": 0.7090092897415161,
|
|
"learning_rate": 1.8029250523211582e-08,
|
|
"loss": 0.6485676765441895,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 2.948638400551534,
|
|
"grad_norm": 0.7129170298576355,
|
|
"learning_rate": 1.718117845069367e-08,
|
|
"loss": 0.6410534977912903,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 2.949936127501673,
|
|
"grad_norm": 0.7186943292617798,
|
|
"learning_rate": 1.635351871597246e-08,
|
|
"loss": 0.7133535146713257,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 2.9512338544518117,
|
|
"grad_norm": 0.7258438467979431,
|
|
"learning_rate": 1.554627301140199e-08,
|
|
"loss": 0.5933857560157776,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 2.9525315814019506,
|
|
"grad_norm": 0.7135540843009949,
|
|
"learning_rate": 1.4759442987596351e-08,
|
|
"loss": 0.6514700055122375,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 2.9538293083520895,
|
|
"grad_norm": 0.7308082580566406,
|
|
"learning_rate": 1.3993030253423023e-08,
|
|
"loss": 0.6132031679153442,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 2.9551270353022283,
|
|
"grad_norm": 0.7810271382331848,
|
|
"learning_rate": 1.3247036376002886e-08,
|
|
"loss": 0.654386043548584,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 2.956424762252367,
|
|
"grad_norm": 0.761455237865448,
|
|
"learning_rate": 1.252146288070355e-08,
|
|
"loss": 0.6730161309242249,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 2.9577224892025065,
|
|
"grad_norm": 0.7196770906448364,
|
|
"learning_rate": 1.1816311251140466e-08,
|
|
"loss": 0.6393716931343079,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 2.959020216152645,
|
|
"grad_norm": 0.6943092346191406,
|
|
"learning_rate": 1.113158292916916e-08,
|
|
"loss": 0.6582570672035217,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 2.960317943102784,
|
|
"grad_norm": 0.7215139865875244,
|
|
"learning_rate": 1.0467279314886336e-08,
|
|
"loss": 0.6728758215904236,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 2.961615670052923,
|
|
"grad_norm": 0.7100042700767517,
|
|
"learning_rate": 9.82340176662433e-09,
|
|
"loss": 0.6192055344581604,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 2.962913397003062,
|
|
"grad_norm": 0.772715151309967,
|
|
"learning_rate": 9.199951600951106e-09,
|
|
"loss": 0.6373339295387268,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 2.9642111239532007,
|
|
"grad_norm": 0.6952692866325378,
|
|
"learning_rate": 8.596930092662493e-09,
|
|
"loss": 0.6480576992034912,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 2.9655088509033396,
|
|
"grad_norm": 0.729654848575592,
|
|
"learning_rate": 8.014338474785499e-09,
|
|
"loss": 0.5901361107826233,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 2.9668065778534785,
|
|
"grad_norm": 0.7037022709846497,
|
|
"learning_rate": 7.45217793857389e-09,
|
|
"loss": 0.6541380882263184,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 2.9681043048036173,
|
|
"grad_norm": 0.7359015941619873,
|
|
"learning_rate": 6.910449633501515e-09,
|
|
"loss": 0.6508733630180359,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 2.969402031753756,
|
|
"grad_norm": 0.6860209703445435,
|
|
"learning_rate": 6.389154667266751e-09,
|
|
"loss": 0.6324610710144043,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 2.970699758703895,
|
|
"grad_norm": 0.7234740257263184,
|
|
"learning_rate": 5.888294105785841e-09,
|
|
"loss": 0.6781293749809265,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 2.9719974856540343,
|
|
"grad_norm": 0.748229444026947,
|
|
"learning_rate": 5.407868973191788e-09,
|
|
"loss": 0.7036339640617371,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 2.9732952126041727,
|
|
"grad_norm": 0.7085629105567932,
|
|
"learning_rate": 4.947880251832127e-09,
|
|
"loss": 0.6461360454559326,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 2.974592939554312,
|
|
"grad_norm": 0.7789812088012695,
|
|
"learning_rate": 4.508328882268931e-09,
|
|
"loss": 0.6448870897293091,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 2.975890666504451,
|
|
"grad_norm": 0.7379918694496155,
|
|
"learning_rate": 4.089215763271037e-09,
|
|
"loss": 0.5733003616333008,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 2.9771883934545897,
|
|
"grad_norm": 0.700847864151001,
|
|
"learning_rate": 3.6905417518195985e-09,
|
|
"loss": 0.6530927419662476,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 2.9784861204047286,
|
|
"grad_norm": 0.7081441879272461,
|
|
"learning_rate": 3.312307663103642e-09,
|
|
"loss": 0.643142819404602,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 2.9797838473548675,
|
|
"grad_norm": 0.7461786270141602,
|
|
"learning_rate": 2.954514270513409e-09,
|
|
"loss": 0.6704539060592651,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 2.9810815743050063,
|
|
"grad_norm": 0.7500106692314148,
|
|
"learning_rate": 2.6171623056481245e-09,
|
|
"loss": 0.6799619197845459,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 2.982379301255145,
|
|
"grad_norm": 0.7831278443336487,
|
|
"learning_rate": 2.300252458306007e-09,
|
|
"loss": 0.6943190097808838,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 2.983677028205284,
|
|
"grad_norm": 0.7213168740272522,
|
|
"learning_rate": 2.0037853764887096e-09,
|
|
"loss": 0.677469789981842,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 2.983677028205284,
|
|
"eval_loss": 0.748992383480072,
|
|
"eval_runtime": 142.0816,
|
|
"eval_samples_per_second": 36.542,
|
|
"eval_steps_per_second": 9.136,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 2.984974755155423,
|
|
"grad_norm": 0.7276439070701599,
|
|
"learning_rate": 1.7277616663946562e-09,
|
|
"loss": 0.6558234095573425,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 2.986272482105562,
|
|
"grad_norm": 0.7110369205474854,
|
|
"learning_rate": 1.4721818924223752e-09,
|
|
"loss": 0.6696679592132568,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 2.987570209055701,
|
|
"grad_norm": 0.7226536273956299,
|
|
"learning_rate": 1.2370465771693874e-09,
|
|
"loss": 0.6655494570732117,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 2.98886793600584,
|
|
"grad_norm": 0.7148182988166809,
|
|
"learning_rate": 1.0223562014277654e-09,
|
|
"loss": 0.6355955600738525,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 2.9901656629559787,
|
|
"grad_norm": 0.7424213886260986,
|
|
"learning_rate": 8.281112041841343e-10,
|
|
"loss": 0.6586095094680786,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 2.9914633899061176,
|
|
"grad_norm": 0.742540717124939,
|
|
"learning_rate": 6.543119826207811e-10,
|
|
"loss": 0.6475944519042969,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 2.9927611168562565,
|
|
"grad_norm": 0.7507250905036926,
|
|
"learning_rate": 5.009588921123243e-10,
|
|
"loss": 0.6415051221847534,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 2.9940588438063953,
|
|
"grad_norm": 0.7377761602401733,
|
|
"learning_rate": 3.680522462279346e-10,
|
|
"loss": 0.6111840009689331,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 2.995356570756534,
|
|
"grad_norm": 0.735808789730072,
|
|
"learning_rate": 2.555923167291141e-10,
|
|
"loss": 0.6714158058166504,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 2.996654297706673,
|
|
"grad_norm": 0.7448306083679199,
|
|
"learning_rate": 1.635793335652558e-10,
|
|
"loss": 0.7029042840003967,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 2.9979520246568123,
|
|
"grad_norm": 0.761702299118042,
|
|
"learning_rate": 9.20134848814147e-11,
|
|
"loss": 0.6207424998283386,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 2.9992497516069507,
|
|
"grad_norm": 0.76186203956604,
|
|
"learning_rate": 4.08949170105366e-11,
|
|
"loss": 0.6319787502288818,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.9660639762878418,
|
|
"learning_rate": 1.022373447900904e-11,
|
|
"loss": 0.7546765804290771,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 2313,
|
|
"total_flos": 4.1917370482093916e+18,
|
|
"train_loss": 0.06833103949818527,
|
|
"train_runtime": 3477.4713,
|
|
"train_samples_per_second": 85.09,
|
|
"train_steps_per_second": 0.665
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 2313,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 230,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 4.1917370482093916e+18,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|