{ "best_global_step": 1380, "best_metric": 0.7464115023612976, "best_model_checkpoint": "saves/qwen3-1.7B/Qwen3-1.7B-SFT-science-2e-5/checkpoint-1380", "epoch": 3.0, "eval_steps": 230, "global_step": 2313, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012977269501388974, "grad_norm": 18.96442413330078, "learning_rate": 0.0, "loss": 1.341123104095459, "step": 1 }, { "epoch": 0.0025954539002777948, "grad_norm": 17.5643310546875, "learning_rate": 1.7241379310344828e-07, "loss": 1.240975022315979, "step": 2 }, { "epoch": 0.003893180850416692, "grad_norm": 18.22071075439453, "learning_rate": 3.4482758620689656e-07, "loss": 1.3369407653808594, "step": 3 }, { "epoch": 0.0051909078005555895, "grad_norm": 19.40529441833496, "learning_rate": 5.172413793103449e-07, "loss": 1.4051162004470825, "step": 4 }, { "epoch": 0.006488634750694487, "grad_norm": 17.282682418823242, "learning_rate": 6.896551724137931e-07, "loss": 1.318056344985962, "step": 5 }, { "epoch": 0.007786361700833384, "grad_norm": 18.145490646362305, "learning_rate": 8.620689655172415e-07, "loss": 1.3011627197265625, "step": 6 }, { "epoch": 0.009084088650972282, "grad_norm": 18.944950103759766, "learning_rate": 1.0344827586206898e-06, "loss": 1.2762426137924194, "step": 7 }, { "epoch": 0.010381815601111179, "grad_norm": 16.987550735473633, "learning_rate": 1.2068965517241381e-06, "loss": 1.2320008277893066, "step": 8 }, { "epoch": 0.011679542551250076, "grad_norm": 15.374279975891113, "learning_rate": 1.3793103448275862e-06, "loss": 1.1568862199783325, "step": 9 }, { "epoch": 0.012977269501388973, "grad_norm": 15.470294952392578, "learning_rate": 1.5517241379310346e-06, "loss": 1.2633228302001953, "step": 10 }, { "epoch": 0.01427499645152787, "grad_norm": 13.62917709350586, "learning_rate": 1.724137931034483e-06, "loss": 1.2120124101638794, "step": 11 }, { "epoch": 0.015572723401666768, "grad_norm": 11.841530799865723, "learning_rate": 1.896551724137931e-06, "loss": 1.15806245803833, "step": 12 }, { "epoch": 0.016870450351805667, "grad_norm": 11.673654556274414, "learning_rate": 2.0689655172413796e-06, "loss": 1.1886231899261475, "step": 13 }, { "epoch": 0.018168177301944564, "grad_norm": 11.115256309509277, "learning_rate": 2.241379310344828e-06, "loss": 1.1659168004989624, "step": 14 }, { "epoch": 0.01946590425208346, "grad_norm": 8.34097671508789, "learning_rate": 2.4137931034482762e-06, "loss": 1.1347044706344604, "step": 15 }, { "epoch": 0.020763631202222358, "grad_norm": 6.3707804679870605, "learning_rate": 2.5862068965517246e-06, "loss": 1.097546935081482, "step": 16 }, { "epoch": 0.022061358152361255, "grad_norm": 6.07731294631958, "learning_rate": 2.7586206896551725e-06, "loss": 1.1303181648254395, "step": 17 }, { "epoch": 0.023359085102500152, "grad_norm": 5.143428802490234, "learning_rate": 2.931034482758621e-06, "loss": 1.087995171546936, "step": 18 }, { "epoch": 0.02465681205263905, "grad_norm": 5.108595371246338, "learning_rate": 3.103448275862069e-06, "loss": 1.09377121925354, "step": 19 }, { "epoch": 0.025954539002777947, "grad_norm": 4.329593658447266, "learning_rate": 3.2758620689655175e-06, "loss": 0.9835488200187683, "step": 20 }, { "epoch": 0.027252265952916844, "grad_norm": 2.5329697132110596, "learning_rate": 3.448275862068966e-06, "loss": 1.1068130731582642, "step": 21 }, { "epoch": 0.02854999290305574, "grad_norm": 2.4052135944366455, "learning_rate": 3.620689655172414e-06, "loss": 0.9785792827606201, "step": 22 }, { "epoch": 0.029847719853194638, "grad_norm": 2.2059929370880127, "learning_rate": 3.793103448275862e-06, "loss": 1.0071507692337036, "step": 23 }, { "epoch": 0.031145446803333535, "grad_norm": 1.962939977645874, "learning_rate": 3.96551724137931e-06, "loss": 0.9504339694976807, "step": 24 }, { "epoch": 0.032443173753472436, "grad_norm": 1.8630015850067139, "learning_rate": 4.137931034482759e-06, "loss": 0.9488564133644104, "step": 25 }, { "epoch": 0.03374090070361133, "grad_norm": 1.9074621200561523, "learning_rate": 4.310344827586207e-06, "loss": 0.9918304681777954, "step": 26 }, { "epoch": 0.03503862765375023, "grad_norm": 1.7486937046051025, "learning_rate": 4.482758620689656e-06, "loss": 0.9598171710968018, "step": 27 }, { "epoch": 0.03633635460388913, "grad_norm": 1.5654025077819824, "learning_rate": 4.655172413793104e-06, "loss": 0.9875293970108032, "step": 28 }, { "epoch": 0.037634081554028025, "grad_norm": 1.5146547555923462, "learning_rate": 4.8275862068965525e-06, "loss": 0.9899477958679199, "step": 29 }, { "epoch": 0.03893180850416692, "grad_norm": 1.4136415719985962, "learning_rate": 5e-06, "loss": 1.0122514963150024, "step": 30 }, { "epoch": 0.04022953545430582, "grad_norm": 1.3606868982315063, "learning_rate": 5.172413793103449e-06, "loss": 0.9211847186088562, "step": 31 }, { "epoch": 0.041527262404444716, "grad_norm": 1.1916248798370361, "learning_rate": 5.344827586206896e-06, "loss": 0.9429690837860107, "step": 32 }, { "epoch": 0.04282498935458361, "grad_norm": 1.1089906692504883, "learning_rate": 5.517241379310345e-06, "loss": 0.9432889819145203, "step": 33 }, { "epoch": 0.04412271630472251, "grad_norm": 1.0991381406784058, "learning_rate": 5.689655172413794e-06, "loss": 0.8937160968780518, "step": 34 }, { "epoch": 0.04542044325486141, "grad_norm": 1.1420905590057373, "learning_rate": 5.862068965517242e-06, "loss": 0.9616763591766357, "step": 35 }, { "epoch": 0.046718170205000305, "grad_norm": 1.22003972530365, "learning_rate": 6.03448275862069e-06, "loss": 0.991248369216919, "step": 36 }, { "epoch": 0.0480158971551392, "grad_norm": 1.0027211904525757, "learning_rate": 6.206896551724138e-06, "loss": 0.8961243033409119, "step": 37 }, { "epoch": 0.0493136241052781, "grad_norm": 0.948948085308075, "learning_rate": 6.379310344827587e-06, "loss": 0.8873807787895203, "step": 38 }, { "epoch": 0.050611351055416996, "grad_norm": 0.906653106212616, "learning_rate": 6.551724137931035e-06, "loss": 0.9843493103981018, "step": 39 }, { "epoch": 0.05190907800555589, "grad_norm": 0.9032185077667236, "learning_rate": 6.724137931034484e-06, "loss": 0.9521259069442749, "step": 40 }, { "epoch": 0.05320680495569479, "grad_norm": 0.9004918336868286, "learning_rate": 6.896551724137932e-06, "loss": 0.9388642311096191, "step": 41 }, { "epoch": 0.05450453190583369, "grad_norm": 0.9163469672203064, "learning_rate": 7.0689655172413796e-06, "loss": 0.8808169364929199, "step": 42 }, { "epoch": 0.055802258855972585, "grad_norm": 0.8777008056640625, "learning_rate": 7.241379310344828e-06, "loss": 0.8969473242759705, "step": 43 }, { "epoch": 0.05709998580611148, "grad_norm": 0.8831114768981934, "learning_rate": 7.413793103448277e-06, "loss": 0.8995171189308167, "step": 44 }, { "epoch": 0.05839771275625038, "grad_norm": 0.8527185320854187, "learning_rate": 7.586206896551724e-06, "loss": 0.9566978216171265, "step": 45 }, { "epoch": 0.059695439706389276, "grad_norm": 0.8445229530334473, "learning_rate": 7.758620689655173e-06, "loss": 0.8870581388473511, "step": 46 }, { "epoch": 0.060993166656528174, "grad_norm": 0.7909572720527649, "learning_rate": 7.93103448275862e-06, "loss": 0.839882493019104, "step": 47 }, { "epoch": 0.06229089360666707, "grad_norm": 0.9035473465919495, "learning_rate": 8.103448275862069e-06, "loss": 0.9470881223678589, "step": 48 }, { "epoch": 0.06358862055680597, "grad_norm": 0.812706708908081, "learning_rate": 8.275862068965518e-06, "loss": 0.9084426760673523, "step": 49 }, { "epoch": 0.06488634750694487, "grad_norm": 0.7788446545600891, "learning_rate": 8.448275862068966e-06, "loss": 0.9100271463394165, "step": 50 }, { "epoch": 0.06618407445708377, "grad_norm": 0.7733594179153442, "learning_rate": 8.620689655172414e-06, "loss": 0.9046688675880432, "step": 51 }, { "epoch": 0.06748180140722267, "grad_norm": 0.8074057698249817, "learning_rate": 8.793103448275862e-06, "loss": 0.9495884776115417, "step": 52 }, { "epoch": 0.06877952835736156, "grad_norm": 0.7883110642433167, "learning_rate": 8.965517241379312e-06, "loss": 0.944835901260376, "step": 53 }, { "epoch": 0.07007725530750046, "grad_norm": 0.7795141935348511, "learning_rate": 9.13793103448276e-06, "loss": 0.8827984929084778, "step": 54 }, { "epoch": 0.07137498225763936, "grad_norm": 0.7496516704559326, "learning_rate": 9.310344827586207e-06, "loss": 0.8837717771530151, "step": 55 }, { "epoch": 0.07267270920777825, "grad_norm": 0.7296638488769531, "learning_rate": 9.482758620689655e-06, "loss": 0.9134169220924377, "step": 56 }, { "epoch": 0.07397043615791715, "grad_norm": 0.7594932913780212, "learning_rate": 9.655172413793105e-06, "loss": 0.8602768182754517, "step": 57 }, { "epoch": 0.07526816310805605, "grad_norm": 0.7925019264221191, "learning_rate": 9.827586206896553e-06, "loss": 0.9638795852661133, "step": 58 }, { "epoch": 0.07656589005819495, "grad_norm": 0.7823756337165833, "learning_rate": 1e-05, "loss": 0.9325800538063049, "step": 59 }, { "epoch": 0.07786361700833384, "grad_norm": 0.7671526074409485, "learning_rate": 1.0172413793103449e-05, "loss": 0.8490806221961975, "step": 60 }, { "epoch": 0.07916134395847274, "grad_norm": 0.7950026392936707, "learning_rate": 1.0344827586206898e-05, "loss": 0.8811596632003784, "step": 61 }, { "epoch": 0.08045907090861164, "grad_norm": 0.7760382294654846, "learning_rate": 1.0517241379310346e-05, "loss": 0.9363852739334106, "step": 62 }, { "epoch": 0.08175679785875054, "grad_norm": 0.7695664763450623, "learning_rate": 1.0689655172413792e-05, "loss": 0.9032339453697205, "step": 63 }, { "epoch": 0.08305452480888943, "grad_norm": 0.7472826838493347, "learning_rate": 1.0862068965517242e-05, "loss": 0.9319165349006653, "step": 64 }, { "epoch": 0.08435225175902833, "grad_norm": 0.7492451667785645, "learning_rate": 1.103448275862069e-05, "loss": 0.9181802272796631, "step": 65 }, { "epoch": 0.08564997870916723, "grad_norm": 0.7906931042671204, "learning_rate": 1.1206896551724138e-05, "loss": 0.9204844236373901, "step": 66 }, { "epoch": 0.08694770565930612, "grad_norm": 0.7987682223320007, "learning_rate": 1.1379310344827587e-05, "loss": 0.9132669568061829, "step": 67 }, { "epoch": 0.08824543260944502, "grad_norm": 0.7293349504470825, "learning_rate": 1.1551724137931035e-05, "loss": 0.840244472026825, "step": 68 }, { "epoch": 0.08954315955958392, "grad_norm": 0.7649659514427185, "learning_rate": 1.1724137931034483e-05, "loss": 0.9429194331169128, "step": 69 }, { "epoch": 0.09084088650972282, "grad_norm": 0.7362731695175171, "learning_rate": 1.1896551724137933e-05, "loss": 0.910248339176178, "step": 70 }, { "epoch": 0.09213861345986171, "grad_norm": 0.7714956402778625, "learning_rate": 1.206896551724138e-05, "loss": 0.9148205518722534, "step": 71 }, { "epoch": 0.09343634041000061, "grad_norm": 0.8190087676048279, "learning_rate": 1.2241379310344827e-05, "loss": 1.0036617517471313, "step": 72 }, { "epoch": 0.0947340673601395, "grad_norm": 0.7508696913719177, "learning_rate": 1.2413793103448277e-05, "loss": 0.8585586547851562, "step": 73 }, { "epoch": 0.0960317943102784, "grad_norm": 0.7731637358665466, "learning_rate": 1.2586206896551725e-05, "loss": 0.8797649145126343, "step": 74 }, { "epoch": 0.0973295212604173, "grad_norm": 0.7766374349594116, "learning_rate": 1.2758620689655174e-05, "loss": 0.8823714852333069, "step": 75 }, { "epoch": 0.0986272482105562, "grad_norm": 0.7738403677940369, "learning_rate": 1.2931034482758622e-05, "loss": 0.9374374747276306, "step": 76 }, { "epoch": 0.0999249751606951, "grad_norm": 0.7996422648429871, "learning_rate": 1.310344827586207e-05, "loss": 0.8985888957977295, "step": 77 }, { "epoch": 0.10122270211083399, "grad_norm": 0.8077470064163208, "learning_rate": 1.327586206896552e-05, "loss": 0.8687019944190979, "step": 78 }, { "epoch": 0.10252042906097289, "grad_norm": 0.7868083715438843, "learning_rate": 1.3448275862068967e-05, "loss": 0.9471523761749268, "step": 79 }, { "epoch": 0.10381815601111179, "grad_norm": 0.7429269552230835, "learning_rate": 1.3620689655172414e-05, "loss": 0.8650257587432861, "step": 80 }, { "epoch": 0.10511588296125068, "grad_norm": 0.736170768737793, "learning_rate": 1.3793103448275863e-05, "loss": 0.8755403757095337, "step": 81 }, { "epoch": 0.10641360991138958, "grad_norm": 0.7359841465950012, "learning_rate": 1.3965517241379311e-05, "loss": 0.8383484482765198, "step": 82 }, { "epoch": 0.10771133686152848, "grad_norm": 0.7211300730705261, "learning_rate": 1.4137931034482759e-05, "loss": 0.8565696477890015, "step": 83 }, { "epoch": 0.10900906381166738, "grad_norm": 0.7671189308166504, "learning_rate": 1.4310344827586209e-05, "loss": 0.9218558073043823, "step": 84 }, { "epoch": 0.11030679076180627, "grad_norm": 0.816425085067749, "learning_rate": 1.4482758620689657e-05, "loss": 0.870709240436554, "step": 85 }, { "epoch": 0.11160451771194517, "grad_norm": 0.7335647940635681, "learning_rate": 1.4655172413793105e-05, "loss": 0.8868783116340637, "step": 86 }, { "epoch": 0.11290224466208407, "grad_norm": 0.7765848636627197, "learning_rate": 1.4827586206896554e-05, "loss": 0.8968692421913147, "step": 87 }, { "epoch": 0.11419997161222296, "grad_norm": 0.7707907557487488, "learning_rate": 1.5000000000000002e-05, "loss": 0.8512423634529114, "step": 88 }, { "epoch": 0.11549769856236186, "grad_norm": 0.7698812484741211, "learning_rate": 1.5172413793103448e-05, "loss": 0.9038546085357666, "step": 89 }, { "epoch": 0.11679542551250076, "grad_norm": 0.7673100829124451, "learning_rate": 1.5344827586206898e-05, "loss": 0.9032548666000366, "step": 90 }, { "epoch": 0.11809315246263966, "grad_norm": 0.7782520055770874, "learning_rate": 1.5517241379310346e-05, "loss": 0.8969484567642212, "step": 91 }, { "epoch": 0.11939087941277855, "grad_norm": 0.7486196756362915, "learning_rate": 1.5689655172413794e-05, "loss": 0.9460266828536987, "step": 92 }, { "epoch": 0.12068860636291745, "grad_norm": 0.7591387033462524, "learning_rate": 1.586206896551724e-05, "loss": 0.8913143277168274, "step": 93 }, { "epoch": 0.12198633331305635, "grad_norm": 0.7186006903648376, "learning_rate": 1.603448275862069e-05, "loss": 0.817532479763031, "step": 94 }, { "epoch": 0.12328406026319524, "grad_norm": 0.8398354053497314, "learning_rate": 1.6206896551724137e-05, "loss": 0.9849364161491394, "step": 95 }, { "epoch": 0.12458178721333414, "grad_norm": 0.7659850120544434, "learning_rate": 1.637931034482759e-05, "loss": 0.8463207483291626, "step": 96 }, { "epoch": 0.12587951416347304, "grad_norm": 0.7916679978370667, "learning_rate": 1.6551724137931037e-05, "loss": 0.87321537733078, "step": 97 }, { "epoch": 0.12717724111361195, "grad_norm": 0.7151588201522827, "learning_rate": 1.6724137931034485e-05, "loss": 0.8810160160064697, "step": 98 }, { "epoch": 0.12847496806375083, "grad_norm": 0.7750177383422852, "learning_rate": 1.6896551724137932e-05, "loss": 0.7909659147262573, "step": 99 }, { "epoch": 0.12977269501388974, "grad_norm": 0.7832080125808716, "learning_rate": 1.706896551724138e-05, "loss": 0.9595565795898438, "step": 100 }, { "epoch": 0.13107042196402863, "grad_norm": 0.764074444770813, "learning_rate": 1.7241379310344828e-05, "loss": 0.9244315028190613, "step": 101 }, { "epoch": 0.13236814891416754, "grad_norm": 0.8302505016326904, "learning_rate": 1.7413793103448276e-05, "loss": 0.8567872643470764, "step": 102 }, { "epoch": 0.13366587586430642, "grad_norm": 0.7476164102554321, "learning_rate": 1.7586206896551724e-05, "loss": 0.8335643410682678, "step": 103 }, { "epoch": 0.13496360281444533, "grad_norm": 0.7683222889900208, "learning_rate": 1.7758620689655175e-05, "loss": 0.92899489402771, "step": 104 }, { "epoch": 0.13626132976458422, "grad_norm": 0.8164420127868652, "learning_rate": 1.7931034482758623e-05, "loss": 0.9577179551124573, "step": 105 }, { "epoch": 0.13755905671472313, "grad_norm": 0.7937741279602051, "learning_rate": 1.810344827586207e-05, "loss": 0.9404830932617188, "step": 106 }, { "epoch": 0.138856783664862, "grad_norm": 0.7443995475769043, "learning_rate": 1.827586206896552e-05, "loss": 0.8533992171287537, "step": 107 }, { "epoch": 0.14015451061500092, "grad_norm": 0.7239556312561035, "learning_rate": 1.8448275862068967e-05, "loss": 0.8692059516906738, "step": 108 }, { "epoch": 0.1414522375651398, "grad_norm": 0.7722207903862, "learning_rate": 1.8620689655172415e-05, "loss": 0.9231195449829102, "step": 109 }, { "epoch": 0.14274996451527872, "grad_norm": 0.8155950307846069, "learning_rate": 1.8793103448275863e-05, "loss": 0.9769394397735596, "step": 110 }, { "epoch": 0.1440476914654176, "grad_norm": 0.8122441172599792, "learning_rate": 1.896551724137931e-05, "loss": 0.9506130218505859, "step": 111 }, { "epoch": 0.1453454184155565, "grad_norm": 0.748271644115448, "learning_rate": 1.913793103448276e-05, "loss": 0.8314372897148132, "step": 112 }, { "epoch": 0.1466431453656954, "grad_norm": 0.7835760712623596, "learning_rate": 1.931034482758621e-05, "loss": 0.9071435332298279, "step": 113 }, { "epoch": 0.1479408723158343, "grad_norm": 0.7403405904769897, "learning_rate": 1.9482758620689658e-05, "loss": 0.8897596597671509, "step": 114 }, { "epoch": 0.1492385992659732, "grad_norm": 0.8157104849815369, "learning_rate": 1.9655172413793106e-05, "loss": 0.8683630228042603, "step": 115 }, { "epoch": 0.1505363262161121, "grad_norm": 0.8036532402038574, "learning_rate": 1.9827586206896554e-05, "loss": 0.8975539207458496, "step": 116 }, { "epoch": 0.15183405316625098, "grad_norm": 0.7673157453536987, "learning_rate": 2e-05, "loss": 0.938015341758728, "step": 117 }, { "epoch": 0.1531317801163899, "grad_norm": 0.8311364650726318, "learning_rate": 1.999998977626552e-05, "loss": 0.927339494228363, "step": 118 }, { "epoch": 0.15442950706652878, "grad_norm": 0.8438189029693604, "learning_rate": 1.999995910508299e-05, "loss": 0.8367739319801331, "step": 119 }, { "epoch": 0.1557272340166677, "grad_norm": 0.7619196176528931, "learning_rate": 1.999990798651512e-05, "loss": 0.8823627829551697, "step": 120 }, { "epoch": 0.15702496096680657, "grad_norm": 0.8044223785400391, "learning_rate": 1.9999836420666438e-05, "loss": 0.9462600350379944, "step": 121 }, { "epoch": 0.15832268791694548, "grad_norm": 0.7767183780670166, "learning_rate": 1.999974440768327e-05, "loss": 0.8584571480751038, "step": 122 }, { "epoch": 0.15962041486708436, "grad_norm": 0.8261749148368835, "learning_rate": 1.9999631947753776e-05, "loss": 0.8864863514900208, "step": 123 }, { "epoch": 0.16091814181722328, "grad_norm": 0.7884521484375, "learning_rate": 1.999949904110789e-05, "loss": 0.9228469133377075, "step": 124 }, { "epoch": 0.16221586876736216, "grad_norm": 0.7482346296310425, "learning_rate": 1.999934568801738e-05, "loss": 0.8749440908432007, "step": 125 }, { "epoch": 0.16351359571750107, "grad_norm": 0.7735321521759033, "learning_rate": 1.999917188879582e-05, "loss": 0.8487443327903748, "step": 126 }, { "epoch": 0.16481132266763995, "grad_norm": 0.7950016856193542, "learning_rate": 1.9998977643798572e-05, "loss": 0.8879282474517822, "step": 127 }, { "epoch": 0.16610904961777886, "grad_norm": 0.7628664374351501, "learning_rate": 1.999876295342283e-05, "loss": 0.8263102173805237, "step": 128 }, { "epoch": 0.16740677656791775, "grad_norm": 0.7986794114112854, "learning_rate": 1.9998527818107577e-05, "loss": 0.8462676405906677, "step": 129 }, { "epoch": 0.16870450351805666, "grad_norm": 0.7867287993431091, "learning_rate": 1.9998272238333606e-05, "loss": 0.8144584894180298, "step": 130 }, { "epoch": 0.17000223046819554, "grad_norm": 0.7938011288642883, "learning_rate": 1.9997996214623515e-05, "loss": 0.9469823837280273, "step": 131 }, { "epoch": 0.17129995741833445, "grad_norm": 0.7824422717094421, "learning_rate": 1.9997699747541698e-05, "loss": 0.8819964528083801, "step": 132 }, { "epoch": 0.17259768436847334, "grad_norm": 0.7831183075904846, "learning_rate": 1.9997382837694355e-05, "loss": 0.8070334196090698, "step": 133 }, { "epoch": 0.17389541131861225, "grad_norm": 0.7970272302627563, "learning_rate": 1.999704548572949e-05, "loss": 0.9148434996604919, "step": 134 }, { "epoch": 0.17519313826875113, "grad_norm": 0.7763343453407288, "learning_rate": 1.9996687692336896e-05, "loss": 0.8732989430427551, "step": 135 }, { "epoch": 0.17649086521889004, "grad_norm": 0.7826754450798035, "learning_rate": 1.9996309458248184e-05, "loss": 0.8220726847648621, "step": 136 }, { "epoch": 0.17778859216902893, "grad_norm": 0.761687159538269, "learning_rate": 1.999591078423673e-05, "loss": 0.8763125538825989, "step": 137 }, { "epoch": 0.17908631911916784, "grad_norm": 0.7728819251060486, "learning_rate": 1.9995491671117734e-05, "loss": 0.804518461227417, "step": 138 }, { "epoch": 0.18038404606930672, "grad_norm": 0.7697947025299072, "learning_rate": 1.999505211974817e-05, "loss": 0.8979027271270752, "step": 139 }, { "epoch": 0.18168177301944563, "grad_norm": 0.7905195951461792, "learning_rate": 1.999459213102681e-05, "loss": 0.8996750116348267, "step": 140 }, { "epoch": 0.1829794999695845, "grad_norm": 0.7597678899765015, "learning_rate": 1.9994111705894218e-05, "loss": 0.9672253727912903, "step": 141 }, { "epoch": 0.18427722691972342, "grad_norm": 0.7724127769470215, "learning_rate": 1.9993610845332734e-05, "loss": 0.9037659764289856, "step": 142 }, { "epoch": 0.1855749538698623, "grad_norm": 0.8090096712112427, "learning_rate": 1.99930895503665e-05, "loss": 0.9177453517913818, "step": 143 }, { "epoch": 0.18687268082000122, "grad_norm": 0.7363874316215515, "learning_rate": 1.9992547822061427e-05, "loss": 0.8449195027351379, "step": 144 }, { "epoch": 0.1881704077701401, "grad_norm": 0.8058642745018005, "learning_rate": 1.9991985661525217e-05, "loss": 0.998737096786499, "step": 145 }, { "epoch": 0.189468134720279, "grad_norm": 0.7756547927856445, "learning_rate": 1.999140306990734e-05, "loss": 0.8317436575889587, "step": 146 }, { "epoch": 0.1907658616704179, "grad_norm": 0.7556934952735901, "learning_rate": 1.999080004839905e-05, "loss": 0.8867667317390442, "step": 147 }, { "epoch": 0.1920635886205568, "grad_norm": 0.8031500577926636, "learning_rate": 1.999017659823338e-05, "loss": 0.9501492381095886, "step": 148 }, { "epoch": 0.1933613155706957, "grad_norm": 0.7905899882316589, "learning_rate": 1.9989532720685115e-05, "loss": 0.9475319981575012, "step": 149 }, { "epoch": 0.1946590425208346, "grad_norm": 0.7352354526519775, "learning_rate": 1.998886841707083e-05, "loss": 0.8857019543647766, "step": 150 }, { "epoch": 0.19595676947097349, "grad_norm": 0.7715173363685608, "learning_rate": 1.9988183688748862e-05, "loss": 0.9451955556869507, "step": 151 }, { "epoch": 0.1972544964211124, "grad_norm": 0.7771379351615906, "learning_rate": 1.9987478537119297e-05, "loss": 0.9485697150230408, "step": 152 }, { "epoch": 0.19855222337125128, "grad_norm": 0.7867424488067627, "learning_rate": 1.9986752963624002e-05, "loss": 0.9234886169433594, "step": 153 }, { "epoch": 0.1998499503213902, "grad_norm": 0.8710278272628784, "learning_rate": 1.998600696974658e-05, "loss": 0.9107885956764221, "step": 154 }, { "epoch": 0.20114767727152907, "grad_norm": 0.7554876208305359, "learning_rate": 1.9985240557012406e-05, "loss": 0.9065303206443787, "step": 155 }, { "epoch": 0.20244540422166798, "grad_norm": 0.7357529997825623, "learning_rate": 1.99844537269886e-05, "loss": 0.7701905965805054, "step": 156 }, { "epoch": 0.20374313117180687, "grad_norm": 0.8202847242355347, "learning_rate": 1.9983646481284028e-05, "loss": 0.992992103099823, "step": 157 }, { "epoch": 0.20504085812194578, "grad_norm": 0.7828136682510376, "learning_rate": 1.9982818821549308e-05, "loss": 0.9072571992874146, "step": 158 }, { "epoch": 0.2063385850720847, "grad_norm": 0.7381945252418518, "learning_rate": 1.9981970749476792e-05, "loss": 0.8416173458099365, "step": 159 }, { "epoch": 0.20763631202222357, "grad_norm": 0.7436814308166504, "learning_rate": 1.998110226680057e-05, "loss": 0.860198438167572, "step": 160 }, { "epoch": 0.20893403897236248, "grad_norm": 0.7724810242652893, "learning_rate": 1.9980213375296468e-05, "loss": 0.8358607292175293, "step": 161 }, { "epoch": 0.21023176592250137, "grad_norm": 0.7248872518539429, "learning_rate": 1.997930407678205e-05, "loss": 0.8103194236755371, "step": 162 }, { "epoch": 0.21152949287264028, "grad_norm": 0.7623717784881592, "learning_rate": 1.99783743731166e-05, "loss": 0.8410395383834839, "step": 163 }, { "epoch": 0.21282721982277916, "grad_norm": 0.7665237188339233, "learning_rate": 1.9977424266201126e-05, "loss": 0.9623262286186218, "step": 164 }, { "epoch": 0.21412494677291807, "grad_norm": 0.7374143600463867, "learning_rate": 1.9976453757978355e-05, "loss": 0.8592593669891357, "step": 165 }, { "epoch": 0.21542267372305696, "grad_norm": 0.7116683721542358, "learning_rate": 1.997546285043273e-05, "loss": 0.7682055234909058, "step": 166 }, { "epoch": 0.21672040067319587, "grad_norm": 0.8028838038444519, "learning_rate": 1.9974451545590407e-05, "loss": 0.9229005575180054, "step": 167 }, { "epoch": 0.21801812762333475, "grad_norm": 0.8015571236610413, "learning_rate": 1.997341984551925e-05, "loss": 0.8815708756446838, "step": 168 }, { "epoch": 0.21931585457347366, "grad_norm": 0.7032439708709717, "learning_rate": 1.9972367752328824e-05, "loss": 0.7823411822319031, "step": 169 }, { "epoch": 0.22061358152361255, "grad_norm": 0.7352714538574219, "learning_rate": 1.9971295268170393e-05, "loss": 0.8304542899131775, "step": 170 }, { "epoch": 0.22191130847375146, "grad_norm": 0.7774588465690613, "learning_rate": 1.9970202395236913e-05, "loss": 0.8442955017089844, "step": 171 }, { "epoch": 0.22320903542389034, "grad_norm": 0.8193069696426392, "learning_rate": 1.996908913576304e-05, "loss": 0.8395213484764099, "step": 172 }, { "epoch": 0.22450676237402925, "grad_norm": 0.805517852306366, "learning_rate": 1.9967955492025094e-05, "loss": 0.8934487104415894, "step": 173 }, { "epoch": 0.22580448932416813, "grad_norm": 0.7246384620666504, "learning_rate": 1.9966801466341107e-05, "loss": 0.8137494325637817, "step": 174 }, { "epoch": 0.22710221627430704, "grad_norm": 0.7587799429893494, "learning_rate": 1.9965627061070755e-05, "loss": 0.8050680756568909, "step": 175 }, { "epoch": 0.22839994322444593, "grad_norm": 0.744683027267456, "learning_rate": 1.996443227861541e-05, "loss": 0.9190195798873901, "step": 176 }, { "epoch": 0.22969767017458484, "grad_norm": 0.7057942748069763, "learning_rate": 1.996321712141809e-05, "loss": 0.771306574344635, "step": 177 }, { "epoch": 0.23099539712472372, "grad_norm": 0.758804440498352, "learning_rate": 1.9961981591963494e-05, "loss": 0.9052093029022217, "step": 178 }, { "epoch": 0.23229312407486263, "grad_norm": 0.761832058429718, "learning_rate": 1.9960725692777956e-05, "loss": 0.8963150382041931, "step": 179 }, { "epoch": 0.23359085102500152, "grad_norm": 0.7698036432266235, "learning_rate": 1.995944942642948e-05, "loss": 0.879082202911377, "step": 180 }, { "epoch": 0.23488857797514043, "grad_norm": 0.7247833013534546, "learning_rate": 1.9958152795527706e-05, "loss": 0.8330357074737549, "step": 181 }, { "epoch": 0.2361863049252793, "grad_norm": 0.8077431321144104, "learning_rate": 1.9956835802723916e-05, "loss": 0.94368577003479, "step": 182 }, { "epoch": 0.23748403187541822, "grad_norm": 0.7545983195304871, "learning_rate": 1.9955498450711026e-05, "loss": 0.8294435739517212, "step": 183 }, { "epoch": 0.2387817588255571, "grad_norm": 0.7249157428741455, "learning_rate": 1.9954140742223586e-05, "loss": 0.8432042598724365, "step": 184 }, { "epoch": 0.24007948577569602, "grad_norm": 0.7442438006401062, "learning_rate": 1.9952762680037758e-05, "loss": 0.8805173635482788, "step": 185 }, { "epoch": 0.2413772127258349, "grad_norm": 0.7329111695289612, "learning_rate": 1.995136426697134e-05, "loss": 0.863207221031189, "step": 186 }, { "epoch": 0.2426749396759738, "grad_norm": 0.716304361820221, "learning_rate": 1.9949945505883723e-05, "loss": 0.8094059824943542, "step": 187 }, { "epoch": 0.2439726666261127, "grad_norm": 0.7312113046646118, "learning_rate": 1.994850639967592e-05, "loss": 0.9180686473846436, "step": 188 }, { "epoch": 0.2452703935762516, "grad_norm": 0.7700150609016418, "learning_rate": 1.994704695129054e-05, "loss": 0.8603487610816956, "step": 189 }, { "epoch": 0.2465681205263905, "grad_norm": 0.7655259370803833, "learning_rate": 1.9945567163711788e-05, "loss": 0.8780601620674133, "step": 190 }, { "epoch": 0.2478658474765294, "grad_norm": 0.7268514633178711, "learning_rate": 1.9944067039965445e-05, "loss": 0.8242926001548767, "step": 191 }, { "epoch": 0.24916357442666828, "grad_norm": 0.7264497876167297, "learning_rate": 1.9942546583118894e-05, "loss": 0.894584596157074, "step": 192 }, { "epoch": 0.25046130137680717, "grad_norm": 0.773765504360199, "learning_rate": 1.994100579628108e-05, "loss": 0.8504235744476318, "step": 193 }, { "epoch": 0.2517590283269461, "grad_norm": 0.6867210865020752, "learning_rate": 1.9939444682602522e-05, "loss": 0.7794942259788513, "step": 194 }, { "epoch": 0.253056755277085, "grad_norm": 0.7574644684791565, "learning_rate": 1.9937863245275303e-05, "loss": 0.8992743492126465, "step": 195 }, { "epoch": 0.2543544822272239, "grad_norm": 0.7294052243232727, "learning_rate": 1.9936261487533066e-05, "loss": 0.8371526002883911, "step": 196 }, { "epoch": 0.25565220917736275, "grad_norm": 0.7199873924255371, "learning_rate": 1.993463941265099e-05, "loss": 0.8135456442832947, "step": 197 }, { "epoch": 0.25694993612750167, "grad_norm": 0.7726846933364868, "learning_rate": 1.993299702394582e-05, "loss": 0.8241779804229736, "step": 198 }, { "epoch": 0.2582476630776406, "grad_norm": 0.7929345369338989, "learning_rate": 1.9931334324775817e-05, "loss": 0.9309947490692139, "step": 199 }, { "epoch": 0.2595453900277795, "grad_norm": 0.7434781193733215, "learning_rate": 1.9929651318540783e-05, "loss": 0.8470789790153503, "step": 200 }, { "epoch": 0.26084311697791834, "grad_norm": 0.8077720403671265, "learning_rate": 1.9927948008682038e-05, "loss": 0.8455624580383301, "step": 201 }, { "epoch": 0.26214084392805725, "grad_norm": 0.7723199725151062, "learning_rate": 1.9926224398682424e-05, "loss": 0.8877855539321899, "step": 202 }, { "epoch": 0.26343857087819617, "grad_norm": 0.723115861415863, "learning_rate": 1.992448049206628e-05, "loss": 0.7923484444618225, "step": 203 }, { "epoch": 0.2647362978283351, "grad_norm": 0.7819997072219849, "learning_rate": 1.9922716292399458e-05, "loss": 0.8195080757141113, "step": 204 }, { "epoch": 0.26603402477847393, "grad_norm": 0.7534734010696411, "learning_rate": 1.9920931803289302e-05, "loss": 0.8843890428543091, "step": 205 }, { "epoch": 0.26733175172861284, "grad_norm": 0.6980569362640381, "learning_rate": 1.9919127028384634e-05, "loss": 0.841879665851593, "step": 206 }, { "epoch": 0.26862947867875175, "grad_norm": 0.7415062189102173, "learning_rate": 1.9917301971375767e-05, "loss": 0.910488486289978, "step": 207 }, { "epoch": 0.26992720562889067, "grad_norm": 0.7163265347480774, "learning_rate": 1.991545663599448e-05, "loss": 0.8969396948814392, "step": 208 }, { "epoch": 0.2712249325790295, "grad_norm": 0.7287595868110657, "learning_rate": 1.9913591026014016e-05, "loss": 0.8557533621788025, "step": 209 }, { "epoch": 0.27252265952916843, "grad_norm": 0.8144972324371338, "learning_rate": 1.9911705145249076e-05, "loss": 0.9075403809547424, "step": 210 }, { "epoch": 0.27382038647930734, "grad_norm": 0.6856523156166077, "learning_rate": 1.9909798997555806e-05, "loss": 0.9015495777130127, "step": 211 }, { "epoch": 0.27511811342944625, "grad_norm": 0.7224120497703552, "learning_rate": 1.99078725868318e-05, "loss": 0.8107393383979797, "step": 212 }, { "epoch": 0.2764158403795851, "grad_norm": 0.783104419708252, "learning_rate": 1.9905925917016077e-05, "loss": 0.831728458404541, "step": 213 }, { "epoch": 0.277713567329724, "grad_norm": 0.765583872795105, "learning_rate": 1.9903958992089087e-05, "loss": 0.872807502746582, "step": 214 }, { "epoch": 0.27901129427986293, "grad_norm": 0.7342137098312378, "learning_rate": 1.990197181607269e-05, "loss": 0.8797867298126221, "step": 215 }, { "epoch": 0.28030902123000184, "grad_norm": 0.7050272822380066, "learning_rate": 1.989996439303016e-05, "loss": 0.8417098522186279, "step": 216 }, { "epoch": 0.2816067481801407, "grad_norm": 0.7334570288658142, "learning_rate": 1.989793672706617e-05, "loss": 0.8433218598365784, "step": 217 }, { "epoch": 0.2829044751302796, "grad_norm": 0.7583123445510864, "learning_rate": 1.9895888822326783e-05, "loss": 0.8300482034683228, "step": 218 }, { "epoch": 0.2842022020804185, "grad_norm": 0.7325905561447144, "learning_rate": 1.9893820682999444e-05, "loss": 0.8698530197143555, "step": 219 }, { "epoch": 0.28549992903055743, "grad_norm": 0.7196786403656006, "learning_rate": 1.9891732313312973e-05, "loss": 0.8875235915184021, "step": 220 }, { "epoch": 0.2867976559806963, "grad_norm": 0.7486999034881592, "learning_rate": 1.9889623717537564e-05, "loss": 0.8711264729499817, "step": 221 }, { "epoch": 0.2880953829308352, "grad_norm": 0.7866005897521973, "learning_rate": 1.9887494899984757e-05, "loss": 0.9035714268684387, "step": 222 }, { "epoch": 0.2893931098809741, "grad_norm": 0.698315441608429, "learning_rate": 1.9885345865007444e-05, "loss": 0.873035728931427, "step": 223 }, { "epoch": 0.290690836831113, "grad_norm": 0.7287175059318542, "learning_rate": 1.9883176616999863e-05, "loss": 0.9040322303771973, "step": 224 }, { "epoch": 0.2919885637812519, "grad_norm": 0.6973027586936951, "learning_rate": 1.9880987160397573e-05, "loss": 0.8214952349662781, "step": 225 }, { "epoch": 0.2932862907313908, "grad_norm": 0.7529054880142212, "learning_rate": 1.987877749967746e-05, "loss": 0.8002289533615112, "step": 226 }, { "epoch": 0.2945840176815297, "grad_norm": 0.7562571167945862, "learning_rate": 1.987654763935772e-05, "loss": 0.8632272481918335, "step": 227 }, { "epoch": 0.2958817446316686, "grad_norm": 0.7309690713882446, "learning_rate": 1.9874297583997852e-05, "loss": 0.835785984992981, "step": 228 }, { "epoch": 0.29717947158180746, "grad_norm": 0.7542479038238525, "learning_rate": 1.9872027338198652e-05, "loss": 0.8635554909706116, "step": 229 }, { "epoch": 0.2984771985319464, "grad_norm": 0.743453860282898, "learning_rate": 1.98697369066022e-05, "loss": 0.918680727481842, "step": 230 }, { "epoch": 0.2984771985319464, "eval_loss": 0.818739116191864, "eval_runtime": 153.6061, "eval_samples_per_second": 33.801, "eval_steps_per_second": 8.45, "step": 230 }, { "epoch": 0.2997749254820853, "grad_norm": 0.766386091709137, "learning_rate": 1.986742629389184e-05, "loss": 0.8685123324394226, "step": 231 }, { "epoch": 0.3010726524322242, "grad_norm": 0.7218268513679504, "learning_rate": 1.98650955047922e-05, "loss": 0.8525049090385437, "step": 232 }, { "epoch": 0.30237037938236305, "grad_norm": 0.7203767895698547, "learning_rate": 1.9862744544069146e-05, "loss": 0.867932915687561, "step": 233 }, { "epoch": 0.30366810633250196, "grad_norm": 0.7556924819946289, "learning_rate": 1.9860373416529804e-05, "loss": 0.8170772790908813, "step": 234 }, { "epoch": 0.3049658332826409, "grad_norm": 0.7739233374595642, "learning_rate": 1.9857982127022527e-05, "loss": 0.8461399674415588, "step": 235 }, { "epoch": 0.3062635602327798, "grad_norm": 0.7455801367759705, "learning_rate": 1.9855570680436896e-05, "loss": 0.8253067135810852, "step": 236 }, { "epoch": 0.3075612871829187, "grad_norm": 0.7704318761825562, "learning_rate": 1.9853139081703712e-05, "loss": 0.9142767786979675, "step": 237 }, { "epoch": 0.30885901413305755, "grad_norm": 0.7740578651428223, "learning_rate": 1.9850687335794974e-05, "loss": 0.8383587002754211, "step": 238 }, { "epoch": 0.31015674108319646, "grad_norm": 0.7392247319221497, "learning_rate": 1.9848215447723888e-05, "loss": 0.8735100030899048, "step": 239 }, { "epoch": 0.3114544680333354, "grad_norm": 0.7605814337730408, "learning_rate": 1.9845723422544834e-05, "loss": 0.9212141633033752, "step": 240 }, { "epoch": 0.3127521949834743, "grad_norm": 0.7394529581069946, "learning_rate": 1.9843211265353376e-05, "loss": 0.8197087049484253, "step": 241 }, { "epoch": 0.31404992193361314, "grad_norm": 0.6981598138809204, "learning_rate": 1.9840678981286237e-05, "loss": 0.77371746301651, "step": 242 }, { "epoch": 0.31534764888375205, "grad_norm": 0.6841283440589905, "learning_rate": 1.98381265755213e-05, "loss": 0.7815872430801392, "step": 243 }, { "epoch": 0.31664537583389096, "grad_norm": 0.7323400974273682, "learning_rate": 1.9835554053277587e-05, "loss": 0.8495661616325378, "step": 244 }, { "epoch": 0.3179431027840299, "grad_norm": 0.7340859174728394, "learning_rate": 1.9832961419815253e-05, "loss": 0.7806031107902527, "step": 245 }, { "epoch": 0.31924082973416873, "grad_norm": 0.7229768633842468, "learning_rate": 1.983034868043558e-05, "loss": 0.8009724617004395, "step": 246 }, { "epoch": 0.32053855668430764, "grad_norm": 0.7510941624641418, "learning_rate": 1.9827715840480962e-05, "loss": 0.9413229823112488, "step": 247 }, { "epoch": 0.32183628363444655, "grad_norm": 0.6999549269676208, "learning_rate": 1.9825062905334883e-05, "loss": 0.7988513112068176, "step": 248 }, { "epoch": 0.32313401058458546, "grad_norm": 0.7060723304748535, "learning_rate": 1.9822389880421927e-05, "loss": 0.8266105651855469, "step": 249 }, { "epoch": 0.3244317375347243, "grad_norm": 0.7090180516242981, "learning_rate": 1.9819696771207756e-05, "loss": 0.8882022500038147, "step": 250 }, { "epoch": 0.32572946448486323, "grad_norm": 0.7266640663146973, "learning_rate": 1.981698358319909e-05, "loss": 0.8313782215118408, "step": 251 }, { "epoch": 0.32702719143500214, "grad_norm": 0.7484982013702393, "learning_rate": 1.981425032194372e-05, "loss": 0.9093562960624695, "step": 252 }, { "epoch": 0.32832491838514105, "grad_norm": 0.7394732236862183, "learning_rate": 1.981149699303047e-05, "loss": 0.8808751106262207, "step": 253 }, { "epoch": 0.3296226453352799, "grad_norm": 0.7643232345581055, "learning_rate": 1.9808723602089198e-05, "loss": 0.9079170823097229, "step": 254 }, { "epoch": 0.3309203722854188, "grad_norm": 0.7218993902206421, "learning_rate": 1.980593015479079e-05, "loss": 0.8374384641647339, "step": 255 }, { "epoch": 0.33221809923555773, "grad_norm": 0.7780535221099854, "learning_rate": 1.9803116656847136e-05, "loss": 0.9171014428138733, "step": 256 }, { "epoch": 0.33351582618569664, "grad_norm": 0.7390936613082886, "learning_rate": 1.9800283114011134e-05, "loss": 0.8307523131370544, "step": 257 }, { "epoch": 0.3348135531358355, "grad_norm": 0.7285546064376831, "learning_rate": 1.9797429532076652e-05, "loss": 0.8579209446907043, "step": 258 }, { "epoch": 0.3361112800859744, "grad_norm": 0.7298453450202942, "learning_rate": 1.9794555916878548e-05, "loss": 0.9177393317222595, "step": 259 }, { "epoch": 0.3374090070361133, "grad_norm": 0.7240604758262634, "learning_rate": 1.9791662274292638e-05, "loss": 0.8674473166465759, "step": 260 }, { "epoch": 0.33870673398625223, "grad_norm": 0.6959360241889954, "learning_rate": 1.978874861023569e-05, "loss": 0.8340597152709961, "step": 261 }, { "epoch": 0.3400044609363911, "grad_norm": 0.711373507976532, "learning_rate": 1.9785814930665404e-05, "loss": 0.8793005347251892, "step": 262 }, { "epoch": 0.34130218788653, "grad_norm": 0.721527099609375, "learning_rate": 1.9782861241580417e-05, "loss": 0.7826907634735107, "step": 263 }, { "epoch": 0.3425999148366689, "grad_norm": 0.7333364486694336, "learning_rate": 1.9779887549020273e-05, "loss": 0.8747556209564209, "step": 264 }, { "epoch": 0.3438976417868078, "grad_norm": 0.6954993605613708, "learning_rate": 1.9776893859065424e-05, "loss": 0.825065553188324, "step": 265 }, { "epoch": 0.3451953687369467, "grad_norm": 0.7496482729911804, "learning_rate": 1.9773880177837202e-05, "loss": 0.8960598111152649, "step": 266 }, { "epoch": 0.3464930956870856, "grad_norm": 0.7554039359092712, "learning_rate": 1.9770846511497833e-05, "loss": 0.8298478722572327, "step": 267 }, { "epoch": 0.3477908226372245, "grad_norm": 0.7233474850654602, "learning_rate": 1.9767792866250386e-05, "loss": 0.8535934090614319, "step": 268 }, { "epoch": 0.3490885495873634, "grad_norm": 0.7677019238471985, "learning_rate": 1.97647192483388e-05, "loss": 0.8413315415382385, "step": 269 }, { "epoch": 0.35038627653750226, "grad_norm": 0.7146613597869873, "learning_rate": 1.976162566404784e-05, "loss": 0.7900301814079285, "step": 270 }, { "epoch": 0.3516840034876412, "grad_norm": 0.7061136364936829, "learning_rate": 1.9758512119703106e-05, "loss": 0.8699895739555359, "step": 271 }, { "epoch": 0.3529817304377801, "grad_norm": 0.7685773968696594, "learning_rate": 1.9755378621671006e-05, "loss": 0.9059665203094482, "step": 272 }, { "epoch": 0.354279457387919, "grad_norm": 0.7667369842529297, "learning_rate": 1.9752225176358757e-05, "loss": 0.8284919857978821, "step": 273 }, { "epoch": 0.35557718433805785, "grad_norm": 0.7389227151870728, "learning_rate": 1.974905179021435e-05, "loss": 0.8445216417312622, "step": 274 }, { "epoch": 0.35687491128819676, "grad_norm": 0.7373800873756409, "learning_rate": 1.9745858469726555e-05, "loss": 0.8499696254730225, "step": 275 }, { "epoch": 0.35817263823833567, "grad_norm": 0.6966509222984314, "learning_rate": 1.9742645221424905e-05, "loss": 0.7845723032951355, "step": 276 }, { "epoch": 0.3594703651884746, "grad_norm": 0.7133153080940247, "learning_rate": 1.9739412051879686e-05, "loss": 0.7712838053703308, "step": 277 }, { "epoch": 0.36076809213861344, "grad_norm": 0.7376941442489624, "learning_rate": 1.973615896770191e-05, "loss": 0.8497350811958313, "step": 278 }, { "epoch": 0.36206581908875235, "grad_norm": 0.7676963806152344, "learning_rate": 1.97328859755433e-05, "loss": 0.8830881714820862, "step": 279 }, { "epoch": 0.36336354603889126, "grad_norm": 0.7721049785614014, "learning_rate": 1.972959308209631e-05, "loss": 0.9047907590866089, "step": 280 }, { "epoch": 0.36466127298903017, "grad_norm": 0.7234658598899841, "learning_rate": 1.9726280294094067e-05, "loss": 0.8566961288452148, "step": 281 }, { "epoch": 0.365958999939169, "grad_norm": 0.7352125644683838, "learning_rate": 1.9722947618310384e-05, "loss": 0.8019842505455017, "step": 282 }, { "epoch": 0.36725672688930794, "grad_norm": 0.7341601848602295, "learning_rate": 1.9719595061559742e-05, "loss": 0.7666940093040466, "step": 283 }, { "epoch": 0.36855445383944685, "grad_norm": 0.7719873785972595, "learning_rate": 1.9716222630697266e-05, "loss": 0.8902671933174133, "step": 284 }, { "epoch": 0.36985218078958576, "grad_norm": 0.754192054271698, "learning_rate": 1.971283033261873e-05, "loss": 0.8718546628952026, "step": 285 }, { "epoch": 0.3711499077397246, "grad_norm": 0.7254419922828674, "learning_rate": 1.9709418174260523e-05, "loss": 0.8636943101882935, "step": 286 }, { "epoch": 0.3724476346898635, "grad_norm": 0.7372341156005859, "learning_rate": 1.9705986162599642e-05, "loss": 0.8579723238945007, "step": 287 }, { "epoch": 0.37374536164000244, "grad_norm": 0.7488671541213989, "learning_rate": 1.9702534304653685e-05, "loss": 0.8281093835830688, "step": 288 }, { "epoch": 0.37504308859014135, "grad_norm": 0.8016876578330994, "learning_rate": 1.9699062607480827e-05, "loss": 0.8639754056930542, "step": 289 }, { "epoch": 0.3763408155402802, "grad_norm": 0.732269823551178, "learning_rate": 1.969557107817981e-05, "loss": 0.8395862579345703, "step": 290 }, { "epoch": 0.3776385424904191, "grad_norm": 0.7406111359596252, "learning_rate": 1.9692059723889927e-05, "loss": 0.8540798425674438, "step": 291 }, { "epoch": 0.378936269440558, "grad_norm": 0.7769038677215576, "learning_rate": 1.968852855179101e-05, "loss": 0.8707680106163025, "step": 292 }, { "epoch": 0.38023399639069694, "grad_norm": 0.7666140198707581, "learning_rate": 1.9684977569103415e-05, "loss": 0.8578312993049622, "step": 293 }, { "epoch": 0.3815317233408358, "grad_norm": 0.7852650284767151, "learning_rate": 1.9681406783087998e-05, "loss": 0.7673178911209106, "step": 294 }, { "epoch": 0.3828294502909747, "grad_norm": 0.6789321899414062, "learning_rate": 1.9677816201046113e-05, "loss": 0.7785404324531555, "step": 295 }, { "epoch": 0.3841271772411136, "grad_norm": 0.7129622101783752, "learning_rate": 1.9674205830319594e-05, "loss": 0.7908732295036316, "step": 296 }, { "epoch": 0.3854249041912525, "grad_norm": 0.7952395081520081, "learning_rate": 1.9670575678290732e-05, "loss": 0.905153751373291, "step": 297 }, { "epoch": 0.3867226311413914, "grad_norm": 0.7407474517822266, "learning_rate": 1.9666925752382275e-05, "loss": 0.8455154895782471, "step": 298 }, { "epoch": 0.3880203580915303, "grad_norm": 0.7149595022201538, "learning_rate": 1.9663256060057395e-05, "loss": 0.7669047117233276, "step": 299 }, { "epoch": 0.3893180850416692, "grad_norm": 0.724448025226593, "learning_rate": 1.9659566608819677e-05, "loss": 0.827459990978241, "step": 300 }, { "epoch": 0.3906158119918081, "grad_norm": 0.7544072270393372, "learning_rate": 1.9655857406213124e-05, "loss": 0.8931189775466919, "step": 301 }, { "epoch": 0.39191353894194697, "grad_norm": 0.7281385064125061, "learning_rate": 1.9652128459822113e-05, "loss": 0.8091886639595032, "step": 302 }, { "epoch": 0.3932112658920859, "grad_norm": 0.7316269874572754, "learning_rate": 1.9648379777271397e-05, "loss": 0.7829949855804443, "step": 303 }, { "epoch": 0.3945089928422248, "grad_norm": 0.7421220541000366, "learning_rate": 1.964461136622608e-05, "loss": 0.8580082058906555, "step": 304 }, { "epoch": 0.3958067197923637, "grad_norm": 0.7127732038497925, "learning_rate": 1.9640823234391614e-05, "loss": 0.7645027041435242, "step": 305 }, { "epoch": 0.39710444674250256, "grad_norm": 0.7605704665184021, "learning_rate": 1.9637015389513765e-05, "loss": 0.8976550698280334, "step": 306 }, { "epoch": 0.39840217369264147, "grad_norm": 0.7157081365585327, "learning_rate": 1.963318783937861e-05, "loss": 0.7898974418640137, "step": 307 }, { "epoch": 0.3996999006427804, "grad_norm": 0.694803774356842, "learning_rate": 1.962934059181253e-05, "loss": 0.8454594612121582, "step": 308 }, { "epoch": 0.4009976275929193, "grad_norm": 0.7790278792381287, "learning_rate": 1.962547365468216e-05, "loss": 0.8850522041320801, "step": 309 }, { "epoch": 0.40229535454305815, "grad_norm": 0.7630907893180847, "learning_rate": 1.962158703589442e-05, "loss": 0.7932512760162354, "step": 310 }, { "epoch": 0.40359308149319706, "grad_norm": 0.7254197597503662, "learning_rate": 1.9617680743396452e-05, "loss": 0.8825772404670715, "step": 311 }, { "epoch": 0.40489080844333597, "grad_norm": 0.6837211847305298, "learning_rate": 1.961375478517564e-05, "loss": 0.787892758846283, "step": 312 }, { "epoch": 0.4061885353934749, "grad_norm": 0.8057960867881775, "learning_rate": 1.9609809169259573e-05, "loss": 0.8797285556793213, "step": 313 }, { "epoch": 0.40748626234361374, "grad_norm": 0.7656168341636658, "learning_rate": 1.960584390371604e-05, "loss": 0.8403958678245544, "step": 314 }, { "epoch": 0.40878398929375265, "grad_norm": 0.7079064249992371, "learning_rate": 1.9601858996653004e-05, "loss": 0.8279827237129211, "step": 315 }, { "epoch": 0.41008171624389156, "grad_norm": 0.7371337413787842, "learning_rate": 1.9597854456218588e-05, "loss": 0.8244680166244507, "step": 316 }, { "epoch": 0.41137944319403047, "grad_norm": 0.7662513256072998, "learning_rate": 1.9593830290601067e-05, "loss": 0.8895809650421143, "step": 317 }, { "epoch": 0.4126771701441694, "grad_norm": 0.7431499361991882, "learning_rate": 1.9589786508028842e-05, "loss": 0.8213914632797241, "step": 318 }, { "epoch": 0.41397489709430824, "grad_norm": 0.7631136178970337, "learning_rate": 1.9585723116770425e-05, "loss": 0.8473777770996094, "step": 319 }, { "epoch": 0.41527262404444715, "grad_norm": 0.7579299807548523, "learning_rate": 1.9581640125134415e-05, "loss": 0.8756963014602661, "step": 320 }, { "epoch": 0.41657035099458606, "grad_norm": 0.75262850522995, "learning_rate": 1.9577537541469506e-05, "loss": 0.8210287094116211, "step": 321 }, { "epoch": 0.41786807794472497, "grad_norm": 0.7107104063034058, "learning_rate": 1.957341537416444e-05, "loss": 0.7835584878921509, "step": 322 }, { "epoch": 0.4191658048948638, "grad_norm": 0.7898051738739014, "learning_rate": 1.9569273631648005e-05, "loss": 0.8497559428215027, "step": 323 }, { "epoch": 0.42046353184500274, "grad_norm": 0.7612116932868958, "learning_rate": 1.9565112322389017e-05, "loss": 0.8350054621696472, "step": 324 }, { "epoch": 0.42176125879514165, "grad_norm": 0.7677422761917114, "learning_rate": 1.95609314548963e-05, "loss": 0.8192890286445618, "step": 325 }, { "epoch": 0.42305898574528056, "grad_norm": 0.7246314883232117, "learning_rate": 1.955673103771867e-05, "loss": 0.7340703010559082, "step": 326 }, { "epoch": 0.4243567126954194, "grad_norm": 0.7684205770492554, "learning_rate": 1.9552511079444914e-05, "loss": 0.8853901028633118, "step": 327 }, { "epoch": 0.4256544396455583, "grad_norm": 0.7860892415046692, "learning_rate": 1.9548271588703783e-05, "loss": 0.8821452856063843, "step": 328 }, { "epoch": 0.42695216659569724, "grad_norm": 0.6936531662940979, "learning_rate": 1.954401257416396e-05, "loss": 0.7570967674255371, "step": 329 }, { "epoch": 0.42824989354583615, "grad_norm": 0.7630011439323425, "learning_rate": 1.9539734044534057e-05, "loss": 0.8907523155212402, "step": 330 }, { "epoch": 0.429547620495975, "grad_norm": 0.7460386753082275, "learning_rate": 1.9535436008562576e-05, "loss": 0.8264608383178711, "step": 331 }, { "epoch": 0.4308453474461139, "grad_norm": 0.6788963675498962, "learning_rate": 1.9531118475037916e-05, "loss": 0.7674898505210876, "step": 332 }, { "epoch": 0.4321430743962528, "grad_norm": 0.7098816633224487, "learning_rate": 1.9526781452788342e-05, "loss": 0.8403605818748474, "step": 333 }, { "epoch": 0.43344080134639174, "grad_norm": 0.7769349813461304, "learning_rate": 1.9522424950681964e-05, "loss": 0.8386063575744629, "step": 334 }, { "epoch": 0.4347385282965306, "grad_norm": 0.7037668824195862, "learning_rate": 1.951804897762673e-05, "loss": 0.7852950096130371, "step": 335 }, { "epoch": 0.4360362552466695, "grad_norm": 0.6976593136787415, "learning_rate": 1.951365354257039e-05, "loss": 0.7828155159950256, "step": 336 }, { "epoch": 0.4373339821968084, "grad_norm": 0.6809433698654175, "learning_rate": 1.9509238654500505e-05, "loss": 0.7821134924888611, "step": 337 }, { "epoch": 0.4386317091469473, "grad_norm": 0.7023005485534668, "learning_rate": 1.95048043224444e-05, "loss": 0.8137397766113281, "step": 338 }, { "epoch": 0.4399294360970862, "grad_norm": 0.709460973739624, "learning_rate": 1.9500350555469164e-05, "loss": 0.8287125825881958, "step": 339 }, { "epoch": 0.4412271630472251, "grad_norm": 0.7066413760185242, "learning_rate": 1.9495877362681613e-05, "loss": 0.7227614521980286, "step": 340 }, { "epoch": 0.442524889997364, "grad_norm": 0.7095454335212708, "learning_rate": 1.9491384753228308e-05, "loss": 0.8386364579200745, "step": 341 }, { "epoch": 0.4438226169475029, "grad_norm": 0.704826831817627, "learning_rate": 1.948687273629549e-05, "loss": 0.7332904934883118, "step": 342 }, { "epoch": 0.44512034389764177, "grad_norm": 0.7315965294837952, "learning_rate": 1.9482341321109096e-05, "loss": 0.8262498378753662, "step": 343 }, { "epoch": 0.4464180708477807, "grad_norm": 0.7236066460609436, "learning_rate": 1.947779051693472e-05, "loss": 0.8105201721191406, "step": 344 }, { "epoch": 0.4477157977979196, "grad_norm": 0.7457305192947388, "learning_rate": 1.9473220333077604e-05, "loss": 0.9067633748054504, "step": 345 }, { "epoch": 0.4490135247480585, "grad_norm": 0.7768529653549194, "learning_rate": 1.946863077888262e-05, "loss": 0.9473153352737427, "step": 346 }, { "epoch": 0.45031125169819736, "grad_norm": 0.7324157357215881, "learning_rate": 1.946402186373424e-05, "loss": 0.8552070260047913, "step": 347 }, { "epoch": 0.45160897864833627, "grad_norm": 0.7343083024024963, "learning_rate": 1.9459393597056536e-05, "loss": 0.7906739115715027, "step": 348 }, { "epoch": 0.4529067055984752, "grad_norm": 0.7099336385726929, "learning_rate": 1.9454745988313135e-05, "loss": 0.7985537052154541, "step": 349 }, { "epoch": 0.4542044325486141, "grad_norm": 0.7202642560005188, "learning_rate": 1.945007904700723e-05, "loss": 0.8377722501754761, "step": 350 }, { "epoch": 0.45550215949875295, "grad_norm": 0.7456194162368774, "learning_rate": 1.9445392782681523e-05, "loss": 0.7578713893890381, "step": 351 }, { "epoch": 0.45679988644889186, "grad_norm": 0.6951096653938293, "learning_rate": 1.9440687204918245e-05, "loss": 0.8215861320495605, "step": 352 }, { "epoch": 0.45809761339903077, "grad_norm": 0.6824142932891846, "learning_rate": 1.943596232333911e-05, "loss": 0.7992759346961975, "step": 353 }, { "epoch": 0.4593953403491697, "grad_norm": 0.7076693773269653, "learning_rate": 1.9431218147605307e-05, "loss": 0.889447033405304, "step": 354 }, { "epoch": 0.46069306729930853, "grad_norm": 0.7202051877975464, "learning_rate": 1.9426454687417474e-05, "loss": 0.7953578233718872, "step": 355 }, { "epoch": 0.46199079424944745, "grad_norm": 0.6777750253677368, "learning_rate": 1.942167195251568e-05, "loss": 0.7135353088378906, "step": 356 }, { "epoch": 0.46328852119958636, "grad_norm": 0.7169584035873413, "learning_rate": 1.941686995267941e-05, "loss": 0.8654831051826477, "step": 357 }, { "epoch": 0.46458624814972527, "grad_norm": 0.7217689752578735, "learning_rate": 1.941204869772753e-05, "loss": 0.8449923992156982, "step": 358 }, { "epoch": 0.4658839750998641, "grad_norm": 0.7016704678535461, "learning_rate": 1.9407208197518296e-05, "loss": 0.8285680413246155, "step": 359 }, { "epoch": 0.46718170205000303, "grad_norm": 0.7271103262901306, "learning_rate": 1.94023484619493e-05, "loss": 0.788341760635376, "step": 360 }, { "epoch": 0.46847942900014194, "grad_norm": 0.7725624442100525, "learning_rate": 1.9397469500957478e-05, "loss": 0.8492755889892578, "step": 361 }, { "epoch": 0.46977715595028086, "grad_norm": 0.737015962600708, "learning_rate": 1.939257132451906e-05, "loss": 0.8843685388565063, "step": 362 }, { "epoch": 0.4710748829004197, "grad_norm": 0.7315338850021362, "learning_rate": 1.9387653942649586e-05, "loss": 0.8183721899986267, "step": 363 }, { "epoch": 0.4723726098505586, "grad_norm": 0.7253148555755615, "learning_rate": 1.9382717365403854e-05, "loss": 0.8446192145347595, "step": 364 }, { "epoch": 0.47367033680069753, "grad_norm": 0.7184107303619385, "learning_rate": 1.9377761602875913e-05, "loss": 0.8196067214012146, "step": 365 }, { "epoch": 0.47496806375083644, "grad_norm": 0.7668046355247498, "learning_rate": 1.937278666519905e-05, "loss": 0.8784077167510986, "step": 366 }, { "epoch": 0.4762657907009753, "grad_norm": 0.7028603553771973, "learning_rate": 1.9367792562545744e-05, "loss": 0.8172916769981384, "step": 367 }, { "epoch": 0.4775635176511142, "grad_norm": 0.7071288824081421, "learning_rate": 1.9362779305127674e-05, "loss": 0.7726463079452515, "step": 368 }, { "epoch": 0.4788612446012531, "grad_norm": 0.744328498840332, "learning_rate": 1.9357746903195686e-05, "loss": 0.8223643898963928, "step": 369 }, { "epoch": 0.48015897155139203, "grad_norm": 0.7051971554756165, "learning_rate": 1.9352695367039764e-05, "loss": 0.7989709973335266, "step": 370 }, { "epoch": 0.4814566985015309, "grad_norm": 0.6921087503433228, "learning_rate": 1.9347624706989026e-05, "loss": 0.8276992440223694, "step": 371 }, { "epoch": 0.4827544254516698, "grad_norm": 0.775720477104187, "learning_rate": 1.9342534933411683e-05, "loss": 0.8847764730453491, "step": 372 }, { "epoch": 0.4840521524018087, "grad_norm": 0.7056650519371033, "learning_rate": 1.9337426056715036e-05, "loss": 0.8185163736343384, "step": 373 }, { "epoch": 0.4853498793519476, "grad_norm": 0.746159017086029, "learning_rate": 1.9332298087345447e-05, "loss": 0.8038766980171204, "step": 374 }, { "epoch": 0.4866476063020865, "grad_norm": 0.7275370359420776, "learning_rate": 1.932715103578831e-05, "loss": 0.8622571229934692, "step": 375 }, { "epoch": 0.4879453332522254, "grad_norm": 0.6875770688056946, "learning_rate": 1.9321984912568048e-05, "loss": 0.7297530770301819, "step": 376 }, { "epoch": 0.4892430602023643, "grad_norm": 0.7196366190910339, "learning_rate": 1.9316799728248074e-05, "loss": 0.8098776340484619, "step": 377 }, { "epoch": 0.4905407871525032, "grad_norm": 0.8017922043800354, "learning_rate": 1.9311595493430776e-05, "loss": 0.8927175998687744, "step": 378 }, { "epoch": 0.49183851410264207, "grad_norm": 0.752349317073822, "learning_rate": 1.93063722187575e-05, "loss": 0.8595757484436035, "step": 379 }, { "epoch": 0.493136241052781, "grad_norm": 0.7166591882705688, "learning_rate": 1.9301129914908516e-05, "loss": 0.8619329333305359, "step": 380 }, { "epoch": 0.4944339680029199, "grad_norm": 0.7622588872909546, "learning_rate": 1.9295868592603012e-05, "loss": 0.9877883195877075, "step": 381 }, { "epoch": 0.4957316949530588, "grad_norm": 0.738442063331604, "learning_rate": 1.929058826259906e-05, "loss": 0.8450830578804016, "step": 382 }, { "epoch": 0.49702942190319765, "grad_norm": 0.7250852584838867, "learning_rate": 1.9285288935693597e-05, "loss": 0.8014863133430481, "step": 383 }, { "epoch": 0.49832714885333657, "grad_norm": 0.7121626138687134, "learning_rate": 1.9279970622722403e-05, "loss": 0.8381094932556152, "step": 384 }, { "epoch": 0.4996248758034755, "grad_norm": 0.7626416087150574, "learning_rate": 1.927463333456009e-05, "loss": 0.8965335488319397, "step": 385 }, { "epoch": 0.5009226027536143, "grad_norm": 0.7094375491142273, "learning_rate": 1.9269277082120053e-05, "loss": 0.8557580709457397, "step": 386 }, { "epoch": 0.5022203297037533, "grad_norm": 0.7018871903419495, "learning_rate": 1.926390187635448e-05, "loss": 0.8587688207626343, "step": 387 }, { "epoch": 0.5035180566538922, "grad_norm": 0.7267133593559265, "learning_rate": 1.92585077282543e-05, "loss": 0.8346423506736755, "step": 388 }, { "epoch": 0.504815783604031, "grad_norm": 0.7274966835975647, "learning_rate": 1.9253094648849183e-05, "loss": 0.8169071078300476, "step": 389 }, { "epoch": 0.50611351055417, "grad_norm": 0.7901791334152222, "learning_rate": 1.924766264920751e-05, "loss": 0.9163885116577148, "step": 390 }, { "epoch": 0.5074112375043088, "grad_norm": 0.7128793001174927, "learning_rate": 1.9242211740436335e-05, "loss": 0.8264936804771423, "step": 391 }, { "epoch": 0.5087089644544478, "grad_norm": 0.7791725993156433, "learning_rate": 1.9236741933681396e-05, "loss": 0.830746054649353, "step": 392 }, { "epoch": 0.5100066914045867, "grad_norm": 0.7333115339279175, "learning_rate": 1.9231253240127062e-05, "loss": 0.7689610719680786, "step": 393 }, { "epoch": 0.5113044183547255, "grad_norm": 0.722161591053009, "learning_rate": 1.922574567099632e-05, "loss": 0.8242402076721191, "step": 394 }, { "epoch": 0.5126021453048645, "grad_norm": 0.7445337176322937, "learning_rate": 1.9220219237550757e-05, "loss": 0.8102379441261292, "step": 395 }, { "epoch": 0.5138998722550033, "grad_norm": 0.6720981001853943, "learning_rate": 1.921467395109053e-05, "loss": 0.7922290563583374, "step": 396 }, { "epoch": 0.5151975992051423, "grad_norm": 0.793062686920166, "learning_rate": 1.9209109822954345e-05, "loss": 0.8537084460258484, "step": 397 }, { "epoch": 0.5164953261552812, "grad_norm": 0.7766822576522827, "learning_rate": 1.9203526864519432e-05, "loss": 0.8576462864875793, "step": 398 }, { "epoch": 0.51779305310542, "grad_norm": 0.7053048610687256, "learning_rate": 1.919792508720154e-05, "loss": 0.7955272197723389, "step": 399 }, { "epoch": 0.519090780055559, "grad_norm": 0.7525441646575928, "learning_rate": 1.9192304502454876e-05, "loss": 0.7955189347267151, "step": 400 }, { "epoch": 0.5203885070056978, "grad_norm": 0.7097117304801941, "learning_rate": 1.918666512177211e-05, "loss": 0.8108992576599121, "step": 401 }, { "epoch": 0.5216862339558367, "grad_norm": 0.7281200885772705, "learning_rate": 1.918100695668436e-05, "loss": 0.7774943113327026, "step": 402 }, { "epoch": 0.5229839609059757, "grad_norm": 0.6979084610939026, "learning_rate": 1.917533001876113e-05, "loss": 0.8288201093673706, "step": 403 }, { "epoch": 0.5242816878561145, "grad_norm": 0.7136226892471313, "learning_rate": 1.916963431961033e-05, "loss": 0.8710139393806458, "step": 404 }, { "epoch": 0.5255794148062535, "grad_norm": 0.6950761079788208, "learning_rate": 1.916391987087822e-05, "loss": 0.82500821352005, "step": 405 }, { "epoch": 0.5268771417563923, "grad_norm": 0.7169130444526672, "learning_rate": 1.9158186684249397e-05, "loss": 0.8732189536094666, "step": 406 }, { "epoch": 0.5281748687065312, "grad_norm": 0.71788489818573, "learning_rate": 1.9152434771446783e-05, "loss": 0.7809304594993591, "step": 407 }, { "epoch": 0.5294725956566702, "grad_norm": 0.7155045866966248, "learning_rate": 1.914666414423158e-05, "loss": 0.7732210159301758, "step": 408 }, { "epoch": 0.530770322606809, "grad_norm": 0.6769919991493225, "learning_rate": 1.914087481440326e-05, "loss": 0.8261522650718689, "step": 409 }, { "epoch": 0.5320680495569479, "grad_norm": 0.7309243679046631, "learning_rate": 1.9135066793799538e-05, "loss": 0.7936241626739502, "step": 410 }, { "epoch": 0.5333657765070868, "grad_norm": 0.6851993203163147, "learning_rate": 1.912924009429635e-05, "loss": 0.8394724130630493, "step": 411 }, { "epoch": 0.5346635034572257, "grad_norm": 0.7112469673156738, "learning_rate": 1.9123394727807816e-05, "loss": 0.8659080862998962, "step": 412 }, { "epoch": 0.5359612304073647, "grad_norm": 0.8407036066055298, "learning_rate": 1.9117530706286232e-05, "loss": 0.8815537095069885, "step": 413 }, { "epoch": 0.5372589573575035, "grad_norm": 0.7725140452384949, "learning_rate": 1.9111648041722044e-05, "loss": 0.8264433741569519, "step": 414 }, { "epoch": 0.5385566843076424, "grad_norm": 0.7106306552886963, "learning_rate": 1.91057467461438e-05, "loss": 0.8120384812355042, "step": 415 }, { "epoch": 0.5398544112577813, "grad_norm": 0.7314519882202148, "learning_rate": 1.9099826831618168e-05, "loss": 0.7814322113990784, "step": 416 }, { "epoch": 0.5411521382079202, "grad_norm": 0.7492959499359131, "learning_rate": 1.909388831024987e-05, "loss": 0.8211044669151306, "step": 417 }, { "epoch": 0.542449865158059, "grad_norm": 0.7524264454841614, "learning_rate": 1.908793119418168e-05, "loss": 0.831349790096283, "step": 418 }, { "epoch": 0.543747592108198, "grad_norm": 0.768027663230896, "learning_rate": 1.9081955495594388e-05, "loss": 0.777296245098114, "step": 419 }, { "epoch": 0.5450453190583369, "grad_norm": 0.6683104038238525, "learning_rate": 1.9075961226706784e-05, "loss": 0.8545945882797241, "step": 420 }, { "epoch": 0.5463430460084758, "grad_norm": 0.7471824288368225, "learning_rate": 1.906994839977564e-05, "loss": 0.8631961941719055, "step": 421 }, { "epoch": 0.5476407729586147, "grad_norm": 0.7404365539550781, "learning_rate": 1.9063917027095664e-05, "loss": 0.8402459025382996, "step": 422 }, { "epoch": 0.5489384999087535, "grad_norm": 0.790240466594696, "learning_rate": 1.905786712099948e-05, "loss": 0.8911325335502625, "step": 423 }, { "epoch": 0.5502362268588925, "grad_norm": 0.7139849662780762, "learning_rate": 1.9051798693857617e-05, "loss": 0.8359181880950928, "step": 424 }, { "epoch": 0.5515339538090314, "grad_norm": 0.7506136894226074, "learning_rate": 1.904571175807848e-05, "loss": 0.8717991709709167, "step": 425 }, { "epoch": 0.5528316807591702, "grad_norm": 0.7033493518829346, "learning_rate": 1.9039606326108297e-05, "loss": 0.808268666267395, "step": 426 }, { "epoch": 0.5541294077093092, "grad_norm": 0.7442057132720947, "learning_rate": 1.903348241043114e-05, "loss": 0.8272799849510193, "step": 427 }, { "epoch": 0.555427134659448, "grad_norm": 0.7257173657417297, "learning_rate": 1.902734002356887e-05, "loss": 0.8194448947906494, "step": 428 }, { "epoch": 0.556724861609587, "grad_norm": 0.7403514385223389, "learning_rate": 1.9021179178081107e-05, "loss": 0.7172797322273254, "step": 429 }, { "epoch": 0.5580225885597259, "grad_norm": 0.7432394623756409, "learning_rate": 1.9014999886565226e-05, "loss": 0.7437801361083984, "step": 430 }, { "epoch": 0.5593203155098647, "grad_norm": 0.6978660225868225, "learning_rate": 1.9008802161656308e-05, "loss": 0.7967916131019592, "step": 431 }, { "epoch": 0.5606180424600037, "grad_norm": 0.7165699005126953, "learning_rate": 1.9002586016027136e-05, "loss": 0.8070824146270752, "step": 432 }, { "epoch": 0.5619157694101425, "grad_norm": 0.7089285254478455, "learning_rate": 1.8996351462388153e-05, "loss": 0.8515596389770508, "step": 433 }, { "epoch": 0.5632134963602814, "grad_norm": 0.7979022860527039, "learning_rate": 1.8990098513487447e-05, "loss": 0.8934742212295532, "step": 434 }, { "epoch": 0.5645112233104204, "grad_norm": 0.6929235458374023, "learning_rate": 1.898382718211071e-05, "loss": 0.7550987601280212, "step": 435 }, { "epoch": 0.5658089502605592, "grad_norm": 0.7286235094070435, "learning_rate": 1.897753748108123e-05, "loss": 0.8770807981491089, "step": 436 }, { "epoch": 0.5671066772106982, "grad_norm": 0.7233553528785706, "learning_rate": 1.8971229423259855e-05, "loss": 0.7454729080200195, "step": 437 }, { "epoch": 0.568404404160837, "grad_norm": 0.7452800869941711, "learning_rate": 1.8964903021544964e-05, "loss": 0.8079807758331299, "step": 438 }, { "epoch": 0.5697021311109759, "grad_norm": 0.696835994720459, "learning_rate": 1.895855828887245e-05, "loss": 0.8501238226890564, "step": 439 }, { "epoch": 0.5709998580611149, "grad_norm": 0.6924627423286438, "learning_rate": 1.895219523821568e-05, "loss": 0.7888904213905334, "step": 440 }, { "epoch": 0.5722975850112537, "grad_norm": 0.764805793762207, "learning_rate": 1.894581388258549e-05, "loss": 0.8138964772224426, "step": 441 }, { "epoch": 0.5735953119613926, "grad_norm": 0.8151068091392517, "learning_rate": 1.8939414235030137e-05, "loss": 0.8286200165748596, "step": 442 }, { "epoch": 0.5748930389115315, "grad_norm": 0.739456832408905, "learning_rate": 1.893299630863527e-05, "loss": 0.7820205092430115, "step": 443 }, { "epoch": 0.5761907658616704, "grad_norm": 0.7076554298400879, "learning_rate": 1.892656011652393e-05, "loss": 0.8406723737716675, "step": 444 }, { "epoch": 0.5774884928118094, "grad_norm": 0.6758636832237244, "learning_rate": 1.8920105671856507e-05, "loss": 0.793111264705658, "step": 445 }, { "epoch": 0.5787862197619482, "grad_norm": 0.7238133549690247, "learning_rate": 1.89136329878307e-05, "loss": 0.7582585215568542, "step": 446 }, { "epoch": 0.5800839467120871, "grad_norm": 0.7192074656486511, "learning_rate": 1.890714207768151e-05, "loss": 0.7284867763519287, "step": 447 }, { "epoch": 0.581381673662226, "grad_norm": 0.7265046834945679, "learning_rate": 1.8900632954681203e-05, "loss": 0.836294412612915, "step": 448 }, { "epoch": 0.5826794006123649, "grad_norm": 0.7325915098190308, "learning_rate": 1.8894105632139296e-05, "loss": 0.7910576462745667, "step": 449 }, { "epoch": 0.5839771275625038, "grad_norm": 0.7702357172966003, "learning_rate": 1.8887560123402505e-05, "loss": 0.8775222301483154, "step": 450 }, { "epoch": 0.5852748545126427, "grad_norm": 0.7335582971572876, "learning_rate": 1.888099644185474e-05, "loss": 0.8012707829475403, "step": 451 }, { "epoch": 0.5865725814627816, "grad_norm": 0.733706533908844, "learning_rate": 1.887441460091707e-05, "loss": 0.7948039174079895, "step": 452 }, { "epoch": 0.5878703084129205, "grad_norm": 0.7587592005729675, "learning_rate": 1.886781461404769e-05, "loss": 0.804535448551178, "step": 453 }, { "epoch": 0.5891680353630594, "grad_norm": 0.7819000482559204, "learning_rate": 1.886119649474191e-05, "loss": 0.7766174077987671, "step": 454 }, { "epoch": 0.5904657623131983, "grad_norm": 0.69929039478302, "learning_rate": 1.8854560256532098e-05, "loss": 0.7503871321678162, "step": 455 }, { "epoch": 0.5917634892633372, "grad_norm": 0.742264449596405, "learning_rate": 1.8847905912987693e-05, "loss": 0.7669814229011536, "step": 456 }, { "epoch": 0.5930612162134761, "grad_norm": 0.7957385182380676, "learning_rate": 1.8841233477715136e-05, "loss": 0.7808370590209961, "step": 457 }, { "epoch": 0.5943589431636149, "grad_norm": 0.7357493042945862, "learning_rate": 1.8834542964357875e-05, "loss": 0.8638509511947632, "step": 458 }, { "epoch": 0.5956566701137539, "grad_norm": 0.6800391674041748, "learning_rate": 1.8827834386596306e-05, "loss": 0.8268325924873352, "step": 459 }, { "epoch": 0.5969543970638927, "grad_norm": 0.6685859560966492, "learning_rate": 1.882110775814778e-05, "loss": 0.7641065716743469, "step": 460 }, { "epoch": 0.5969543970638927, "eval_loss": 0.788587212562561, "eval_runtime": 140.6113, "eval_samples_per_second": 36.924, "eval_steps_per_second": 9.231, "step": 460 }, { "epoch": 0.5982521240140317, "grad_norm": 0.7249795794487, "learning_rate": 1.881436309276655e-05, "loss": 0.8106693625450134, "step": 461 }, { "epoch": 0.5995498509641706, "grad_norm": 0.7279155254364014, "learning_rate": 1.8807600404243746e-05, "loss": 0.7669492363929749, "step": 462 }, { "epoch": 0.6008475779143094, "grad_norm": 0.6802601218223572, "learning_rate": 1.8800819706407355e-05, "loss": 0.7968916296958923, "step": 463 }, { "epoch": 0.6021453048644484, "grad_norm": 0.6981019973754883, "learning_rate": 1.879402101312219e-05, "loss": 0.736625075340271, "step": 464 }, { "epoch": 0.6034430318145872, "grad_norm": 0.7771289944648743, "learning_rate": 1.8787204338289858e-05, "loss": 0.8314676284790039, "step": 465 }, { "epoch": 0.6047407587647261, "grad_norm": 0.7184056043624878, "learning_rate": 1.8780369695848733e-05, "loss": 0.7979223132133484, "step": 466 }, { "epoch": 0.6060384857148651, "grad_norm": 0.7473218441009521, "learning_rate": 1.8773517099773927e-05, "loss": 0.858469545841217, "step": 467 }, { "epoch": 0.6073362126650039, "grad_norm": 0.683022141456604, "learning_rate": 1.8766646564077265e-05, "loss": 0.8193258047103882, "step": 468 }, { "epoch": 0.6086339396151429, "grad_norm": 0.7081974148750305, "learning_rate": 1.8759758102807253e-05, "loss": 0.7676112055778503, "step": 469 }, { "epoch": 0.6099316665652817, "grad_norm": 0.7614895105361938, "learning_rate": 1.8752851730049055e-05, "loss": 0.8635563254356384, "step": 470 }, { "epoch": 0.6112293935154206, "grad_norm": 0.7243057489395142, "learning_rate": 1.8745927459924454e-05, "loss": 0.9161559343338013, "step": 471 }, { "epoch": 0.6125271204655596, "grad_norm": 0.6948226690292358, "learning_rate": 1.8738985306591826e-05, "loss": 0.7749679684638977, "step": 472 }, { "epoch": 0.6138248474156984, "grad_norm": 0.7040874361991882, "learning_rate": 1.8732025284246122e-05, "loss": 0.79802006483078, "step": 473 }, { "epoch": 0.6151225743658374, "grad_norm": 0.7108686566352844, "learning_rate": 1.8725047407118823e-05, "loss": 0.7963647246360779, "step": 474 }, { "epoch": 0.6164203013159762, "grad_norm": 0.6806232333183289, "learning_rate": 1.8718051689477923e-05, "loss": 0.8362119197845459, "step": 475 }, { "epoch": 0.6177180282661151, "grad_norm": 0.7135924696922302, "learning_rate": 1.8711038145627893e-05, "loss": 0.8811363577842712, "step": 476 }, { "epoch": 0.6190157552162541, "grad_norm": 0.7035737633705139, "learning_rate": 1.8704006789909654e-05, "loss": 0.839409351348877, "step": 477 }, { "epoch": 0.6203134821663929, "grad_norm": 0.6822429299354553, "learning_rate": 1.8696957636700555e-05, "loss": 0.8191482424736023, "step": 478 }, { "epoch": 0.6216112091165318, "grad_norm": 0.731574296951294, "learning_rate": 1.868989070041432e-05, "loss": 0.853705108165741, "step": 479 }, { "epoch": 0.6229089360666707, "grad_norm": 0.7717382907867432, "learning_rate": 1.8682805995501052e-05, "loss": 0.7867730259895325, "step": 480 }, { "epoch": 0.6242066630168096, "grad_norm": 0.7173001170158386, "learning_rate": 1.8675703536447178e-05, "loss": 0.8229404091835022, "step": 481 }, { "epoch": 0.6255043899669486, "grad_norm": 0.7436506748199463, "learning_rate": 1.866858333777543e-05, "loss": 0.8175429105758667, "step": 482 }, { "epoch": 0.6268021169170874, "grad_norm": 0.6823157072067261, "learning_rate": 1.8661445414044813e-05, "loss": 0.8235064148902893, "step": 483 }, { "epoch": 0.6280998438672263, "grad_norm": 0.6958295702934265, "learning_rate": 1.865428977985057e-05, "loss": 0.8292087316513062, "step": 484 }, { "epoch": 0.6293975708173652, "grad_norm": 0.7212422490119934, "learning_rate": 1.8647116449824165e-05, "loss": 0.8680652379989624, "step": 485 }, { "epoch": 0.6306952977675041, "grad_norm": 0.692675769329071, "learning_rate": 1.8639925438633243e-05, "loss": 0.8230209350585938, "step": 486 }, { "epoch": 0.631993024717643, "grad_norm": 0.7433279752731323, "learning_rate": 1.86327167609816e-05, "loss": 0.7730977535247803, "step": 487 }, { "epoch": 0.6332907516677819, "grad_norm": 0.7101516723632812, "learning_rate": 1.8625490431609154e-05, "loss": 0.9187572002410889, "step": 488 }, { "epoch": 0.6345884786179208, "grad_norm": 0.7050445675849915, "learning_rate": 1.8618246465291925e-05, "loss": 0.8063424229621887, "step": 489 }, { "epoch": 0.6358862055680597, "grad_norm": 0.7434412240982056, "learning_rate": 1.861098487684199e-05, "loss": 0.7892963290214539, "step": 490 }, { "epoch": 0.6371839325181986, "grad_norm": 0.7191323041915894, "learning_rate": 1.8603705681107456e-05, "loss": 0.7660176157951355, "step": 491 }, { "epoch": 0.6384816594683375, "grad_norm": 0.7202406525611877, "learning_rate": 1.8596408892972442e-05, "loss": 0.8213373422622681, "step": 492 }, { "epoch": 0.6397793864184764, "grad_norm": 0.6945679783821106, "learning_rate": 1.858909452735703e-05, "loss": 0.7523878216743469, "step": 493 }, { "epoch": 0.6410771133686153, "grad_norm": 0.8023699522018433, "learning_rate": 1.858176259921724e-05, "loss": 0.8551954030990601, "step": 494 }, { "epoch": 0.6423748403187541, "grad_norm": 0.728702962398529, "learning_rate": 1.857441312354502e-05, "loss": 0.7901893854141235, "step": 495 }, { "epoch": 0.6436725672688931, "grad_norm": 0.7125030755996704, "learning_rate": 1.856704611536818e-05, "loss": 0.8292658925056458, "step": 496 }, { "epoch": 0.644970294219032, "grad_norm": 0.748110294342041, "learning_rate": 1.8559661589750387e-05, "loss": 0.8110982179641724, "step": 497 }, { "epoch": 0.6462680211691709, "grad_norm": 0.7424649000167847, "learning_rate": 1.8552259561791133e-05, "loss": 0.7920522689819336, "step": 498 }, { "epoch": 0.6475657481193098, "grad_norm": 0.7908960580825806, "learning_rate": 1.8544840046625686e-05, "loss": 0.9255160093307495, "step": 499 }, { "epoch": 0.6488634750694486, "grad_norm": 0.7190539240837097, "learning_rate": 1.8537403059425082e-05, "loss": 0.8494732975959778, "step": 500 }, { "epoch": 0.6501612020195876, "grad_norm": 0.7224424481391907, "learning_rate": 1.852994861539607e-05, "loss": 0.7837664484977722, "step": 501 }, { "epoch": 0.6514589289697265, "grad_norm": 0.7687528729438782, "learning_rate": 1.8522476729781106e-05, "loss": 0.8091537952423096, "step": 502 }, { "epoch": 0.6527566559198653, "grad_norm": 0.7272804379463196, "learning_rate": 1.8514987417858306e-05, "loss": 0.8691030740737915, "step": 503 }, { "epoch": 0.6540543828700043, "grad_norm": 0.7369651794433594, "learning_rate": 1.8507480694941416e-05, "loss": 0.8802081346511841, "step": 504 }, { "epoch": 0.6553521098201431, "grad_norm": 0.7450799942016602, "learning_rate": 1.849995657637978e-05, "loss": 0.8451288342475891, "step": 505 }, { "epoch": 0.6566498367702821, "grad_norm": 0.723861813545227, "learning_rate": 1.8492415077558325e-05, "loss": 0.8779444694519043, "step": 506 }, { "epoch": 0.657947563720421, "grad_norm": 0.6959301829338074, "learning_rate": 1.8484856213897496e-05, "loss": 0.8489083647727966, "step": 507 }, { "epoch": 0.6592452906705598, "grad_norm": 0.7295985817909241, "learning_rate": 1.847728000085327e-05, "loss": 0.8433302044868469, "step": 508 }, { "epoch": 0.6605430176206988, "grad_norm": 0.6785035133361816, "learning_rate": 1.8469686453917074e-05, "loss": 0.7844301462173462, "step": 509 }, { "epoch": 0.6618407445708376, "grad_norm": 0.7163369059562683, "learning_rate": 1.846207558861579e-05, "loss": 0.8518480658531189, "step": 510 }, { "epoch": 0.6631384715209765, "grad_norm": 0.6807128190994263, "learning_rate": 1.845444742051172e-05, "loss": 0.8048978447914124, "step": 511 }, { "epoch": 0.6644361984711155, "grad_norm": 0.7018458247184753, "learning_rate": 1.8446801965202524e-05, "loss": 0.7482452392578125, "step": 512 }, { "epoch": 0.6657339254212543, "grad_norm": 0.7418568134307861, "learning_rate": 1.8439139238321235e-05, "loss": 0.8263827562332153, "step": 513 }, { "epoch": 0.6670316523713933, "grad_norm": 0.7616980075836182, "learning_rate": 1.8431459255536185e-05, "loss": 0.8845346570014954, "step": 514 }, { "epoch": 0.6683293793215321, "grad_norm": 0.7437636852264404, "learning_rate": 1.8423762032551e-05, "loss": 0.7848752737045288, "step": 515 }, { "epoch": 0.669627106271671, "grad_norm": 0.6855003833770752, "learning_rate": 1.841604758510454e-05, "loss": 0.7946106195449829, "step": 516 }, { "epoch": 0.67092483322181, "grad_norm": 0.7443661689758301, "learning_rate": 1.840831592897091e-05, "loss": 0.8530216813087463, "step": 517 }, { "epoch": 0.6722225601719488, "grad_norm": 0.7664664387702942, "learning_rate": 1.8400567079959383e-05, "loss": 0.836358368396759, "step": 518 }, { "epoch": 0.6735202871220877, "grad_norm": 0.722017228603363, "learning_rate": 1.8392801053914396e-05, "loss": 0.8537322878837585, "step": 519 }, { "epoch": 0.6748180140722266, "grad_norm": 0.7312494516372681, "learning_rate": 1.8385017866715507e-05, "loss": 0.8338693380355835, "step": 520 }, { "epoch": 0.6761157410223655, "grad_norm": 0.7151913642883301, "learning_rate": 1.8377217534277365e-05, "loss": 0.879010021686554, "step": 521 }, { "epoch": 0.6774134679725045, "grad_norm": 0.8348478674888611, "learning_rate": 1.8369400072549674e-05, "loss": 0.8499034643173218, "step": 522 }, { "epoch": 0.6787111949226433, "grad_norm": 0.7662613987922668, "learning_rate": 1.8361565497517166e-05, "loss": 0.8573883175849915, "step": 523 }, { "epoch": 0.6800089218727822, "grad_norm": 0.7006996870040894, "learning_rate": 1.835371382519956e-05, "loss": 0.8768547773361206, "step": 524 }, { "epoch": 0.6813066488229211, "grad_norm": 0.6807017922401428, "learning_rate": 1.8345845071651543e-05, "loss": 0.7412630915641785, "step": 525 }, { "epoch": 0.68260437577306, "grad_norm": 0.7801376581192017, "learning_rate": 1.8337959252962728e-05, "loss": 0.7919901609420776, "step": 526 }, { "epoch": 0.6839021027231988, "grad_norm": 0.7031033635139465, "learning_rate": 1.8330056385257607e-05, "loss": 0.7936250567436218, "step": 527 }, { "epoch": 0.6851998296733378, "grad_norm": 0.67047518491745, "learning_rate": 1.8322136484695553e-05, "loss": 0.7688592076301575, "step": 528 }, { "epoch": 0.6864975566234767, "grad_norm": 0.7209057211875916, "learning_rate": 1.8314199567470755e-05, "loss": 0.7531197667121887, "step": 529 }, { "epoch": 0.6877952835736156, "grad_norm": 0.7783409357070923, "learning_rate": 1.83062456498122e-05, "loss": 0.8060978055000305, "step": 530 }, { "epoch": 0.6890930105237545, "grad_norm": 0.7646079659461975, "learning_rate": 1.8298274747983638e-05, "loss": 0.9013359546661377, "step": 531 }, { "epoch": 0.6903907374738933, "grad_norm": 0.6973395943641663, "learning_rate": 1.8290286878283542e-05, "loss": 0.789779543876648, "step": 532 }, { "epoch": 0.6916884644240323, "grad_norm": 0.7242528796195984, "learning_rate": 1.8282282057045087e-05, "loss": 0.8460395336151123, "step": 533 }, { "epoch": 0.6929861913741712, "grad_norm": 0.7025911211967468, "learning_rate": 1.827426030063611e-05, "loss": 0.7623457312583923, "step": 534 }, { "epoch": 0.69428391832431, "grad_norm": 0.6914080381393433, "learning_rate": 1.8266221625459064e-05, "loss": 0.8142719864845276, "step": 535 }, { "epoch": 0.695581645274449, "grad_norm": 0.7013720870018005, "learning_rate": 1.825816604795101e-05, "loss": 0.7999016642570496, "step": 536 }, { "epoch": 0.6968793722245878, "grad_norm": 0.7201952934265137, "learning_rate": 1.8250093584583567e-05, "loss": 0.8158777952194214, "step": 537 }, { "epoch": 0.6981770991747268, "grad_norm": 0.6993263363838196, "learning_rate": 1.8242004251862872e-05, "loss": 0.7727892994880676, "step": 538 }, { "epoch": 0.6994748261248657, "grad_norm": 0.7411354780197144, "learning_rate": 1.823389806632957e-05, "loss": 0.8402857184410095, "step": 539 }, { "epoch": 0.7007725530750045, "grad_norm": 0.717903733253479, "learning_rate": 1.8225775044558757e-05, "loss": 0.8313778042793274, "step": 540 }, { "epoch": 0.7020702800251435, "grad_norm": 0.7139982581138611, "learning_rate": 1.8217635203159957e-05, "loss": 0.8449199795722961, "step": 541 }, { "epoch": 0.7033680069752823, "grad_norm": 0.7448502779006958, "learning_rate": 1.8209478558777084e-05, "loss": 0.8782564997673035, "step": 542 }, { "epoch": 0.7046657339254212, "grad_norm": 0.7237476110458374, "learning_rate": 1.8201305128088412e-05, "loss": 0.8148598670959473, "step": 543 }, { "epoch": 0.7059634608755602, "grad_norm": 0.7190750241279602, "learning_rate": 1.819311492780654e-05, "loss": 0.8512831926345825, "step": 544 }, { "epoch": 0.707261187825699, "grad_norm": 0.6827414035797119, "learning_rate": 1.8184907974678348e-05, "loss": 0.7911166548728943, "step": 545 }, { "epoch": 0.708558914775838, "grad_norm": 0.7072880864143372, "learning_rate": 1.8176684285484985e-05, "loss": 0.7934311032295227, "step": 546 }, { "epoch": 0.7098566417259768, "grad_norm": 0.6981719136238098, "learning_rate": 1.816844387704181e-05, "loss": 0.7569193840026855, "step": 547 }, { "epoch": 0.7111543686761157, "grad_norm": 0.6892895102500916, "learning_rate": 1.8160186766198375e-05, "loss": 0.8187867999076843, "step": 548 }, { "epoch": 0.7124520956262547, "grad_norm": 0.6689103245735168, "learning_rate": 1.815191296983838e-05, "loss": 0.8214238882064819, "step": 549 }, { "epoch": 0.7137498225763935, "grad_norm": 0.7005360722541809, "learning_rate": 1.8143622504879647e-05, "loss": 0.7808399796485901, "step": 550 }, { "epoch": 0.7150475495265324, "grad_norm": 0.6692766547203064, "learning_rate": 1.8135315388274075e-05, "loss": 0.8118186593055725, "step": 551 }, { "epoch": 0.7163452764766713, "grad_norm": 0.7556451559066772, "learning_rate": 1.8126991637007618e-05, "loss": 0.8829076290130615, "step": 552 }, { "epoch": 0.7176430034268102, "grad_norm": 0.7057021856307983, "learning_rate": 1.8118651268100235e-05, "loss": 0.8323896527290344, "step": 553 }, { "epoch": 0.7189407303769492, "grad_norm": 0.6931277513504028, "learning_rate": 1.811029429860588e-05, "loss": 0.8186264038085938, "step": 554 }, { "epoch": 0.720238457327088, "grad_norm": 0.6943070292472839, "learning_rate": 1.810192074561243e-05, "loss": 0.7884860634803772, "step": 555 }, { "epoch": 0.7215361842772269, "grad_norm": 0.7362954616546631, "learning_rate": 1.8093530626241684e-05, "loss": 0.8730647563934326, "step": 556 }, { "epoch": 0.7228339112273658, "grad_norm": 0.7225231528282166, "learning_rate": 1.8085123957649315e-05, "loss": 0.8629934787750244, "step": 557 }, { "epoch": 0.7241316381775047, "grad_norm": 0.6993386745452881, "learning_rate": 1.8076700757024833e-05, "loss": 0.8742365837097168, "step": 558 }, { "epoch": 0.7254293651276437, "grad_norm": 0.7013887166976929, "learning_rate": 1.8068261041591548e-05, "loss": 0.8042615056037903, "step": 559 }, { "epoch": 0.7267270920777825, "grad_norm": 0.7084468007087708, "learning_rate": 1.8059804828606545e-05, "loss": 0.8460750579833984, "step": 560 }, { "epoch": 0.7280248190279214, "grad_norm": 0.6864623427391052, "learning_rate": 1.8051332135360637e-05, "loss": 0.7461860179901123, "step": 561 }, { "epoch": 0.7293225459780603, "grad_norm": 0.7570308446884155, "learning_rate": 1.8042842979178338e-05, "loss": 0.8015311360359192, "step": 562 }, { "epoch": 0.7306202729281992, "grad_norm": 0.6948541402816772, "learning_rate": 1.8034337377417826e-05, "loss": 0.7483975887298584, "step": 563 }, { "epoch": 0.731917999878338, "grad_norm": 0.6935976147651672, "learning_rate": 1.80258153474709e-05, "loss": 0.8245661854743958, "step": 564 }, { "epoch": 0.733215726828477, "grad_norm": 0.713844895362854, "learning_rate": 1.8017276906762955e-05, "loss": 0.7062139511108398, "step": 565 }, { "epoch": 0.7345134537786159, "grad_norm": 0.7592107653617859, "learning_rate": 1.8008722072752943e-05, "loss": 0.9009630680084229, "step": 566 }, { "epoch": 0.7358111807287548, "grad_norm": 0.7252402901649475, "learning_rate": 1.8000150862933335e-05, "loss": 0.8240823745727539, "step": 567 }, { "epoch": 0.7371089076788937, "grad_norm": 0.6888589262962341, "learning_rate": 1.7991563294830083e-05, "loss": 0.7797961235046387, "step": 568 }, { "epoch": 0.7384066346290326, "grad_norm": 0.6920890808105469, "learning_rate": 1.7982959386002592e-05, "loss": 0.8363062739372253, "step": 569 }, { "epoch": 0.7397043615791715, "grad_norm": 0.7188555002212524, "learning_rate": 1.7974339154043677e-05, "loss": 0.8217660784721375, "step": 570 }, { "epoch": 0.7410020885293104, "grad_norm": 0.6754209995269775, "learning_rate": 1.796570261657953e-05, "loss": 0.8851417899131775, "step": 571 }, { "epoch": 0.7422998154794492, "grad_norm": 0.7101492881774902, "learning_rate": 1.7957049791269684e-05, "loss": 0.8277086615562439, "step": 572 }, { "epoch": 0.7435975424295882, "grad_norm": 0.7085975408554077, "learning_rate": 1.7948380695806983e-05, "loss": 0.8054807186126709, "step": 573 }, { "epoch": 0.744895269379727, "grad_norm": 0.6522380709648132, "learning_rate": 1.793969534791752e-05, "loss": 0.749293327331543, "step": 574 }, { "epoch": 0.746192996329866, "grad_norm": 0.753157377243042, "learning_rate": 1.7930993765360644e-05, "loss": 0.86817467212677, "step": 575 }, { "epoch": 0.7474907232800049, "grad_norm": 0.6874333024024963, "learning_rate": 1.792227596592889e-05, "loss": 0.7839986085891724, "step": 576 }, { "epoch": 0.7487884502301437, "grad_norm": 0.690792977809906, "learning_rate": 1.791354196744794e-05, "loss": 0.8275938630104065, "step": 577 }, { "epoch": 0.7500861771802827, "grad_norm": 0.7033665180206299, "learning_rate": 1.790479178777662e-05, "loss": 0.8231739401817322, "step": 578 }, { "epoch": 0.7513839041304216, "grad_norm": 0.7290453314781189, "learning_rate": 1.7896025444806834e-05, "loss": 0.8637040257453918, "step": 579 }, { "epoch": 0.7526816310805604, "grad_norm": 0.7544882893562317, "learning_rate": 1.7887242956463528e-05, "loss": 0.8368648886680603, "step": 580 }, { "epoch": 0.7539793580306994, "grad_norm": 0.6997877955436707, "learning_rate": 1.7878444340704666e-05, "loss": 0.8118851184844971, "step": 581 }, { "epoch": 0.7552770849808382, "grad_norm": 0.6926761269569397, "learning_rate": 1.78696296155212e-05, "loss": 0.7650015354156494, "step": 582 }, { "epoch": 0.7565748119309772, "grad_norm": 0.7061843872070312, "learning_rate": 1.7860798798937e-05, "loss": 0.7908979654312134, "step": 583 }, { "epoch": 0.757872538881116, "grad_norm": 0.687125563621521, "learning_rate": 1.7851951909008864e-05, "loss": 0.7617890238761902, "step": 584 }, { "epoch": 0.7591702658312549, "grad_norm": 0.7391111254692078, "learning_rate": 1.7843088963826437e-05, "loss": 0.7612465023994446, "step": 585 }, { "epoch": 0.7604679927813939, "grad_norm": 0.7583956122398376, "learning_rate": 1.783420998151219e-05, "loss": 0.8573638200759888, "step": 586 }, { "epoch": 0.7617657197315327, "grad_norm": 0.721450686454773, "learning_rate": 1.782531498022141e-05, "loss": 0.7986845970153809, "step": 587 }, { "epoch": 0.7630634466816716, "grad_norm": 0.7499017119407654, "learning_rate": 1.781640397814211e-05, "loss": 0.8502310514450073, "step": 588 }, { "epoch": 0.7643611736318106, "grad_norm": 0.705142617225647, "learning_rate": 1.7807476993495047e-05, "loss": 0.8705092668533325, "step": 589 }, { "epoch": 0.7656589005819494, "grad_norm": 0.689218282699585, "learning_rate": 1.779853404453363e-05, "loss": 0.8186284899711609, "step": 590 }, { "epoch": 0.7669566275320884, "grad_norm": 0.6828286647796631, "learning_rate": 1.7789575149543936e-05, "loss": 0.7887763381004333, "step": 591 }, { "epoch": 0.7682543544822272, "grad_norm": 0.7451944351196289, "learning_rate": 1.7780600326844638e-05, "loss": 0.8204880952835083, "step": 592 }, { "epoch": 0.7695520814323661, "grad_norm": 0.7414618730545044, "learning_rate": 1.7771609594786968e-05, "loss": 0.8183786869049072, "step": 593 }, { "epoch": 0.770849808382505, "grad_norm": 0.7165583968162537, "learning_rate": 1.776260297175471e-05, "loss": 0.860834002494812, "step": 594 }, { "epoch": 0.7721475353326439, "grad_norm": 0.6954268217086792, "learning_rate": 1.775358047616412e-05, "loss": 0.7466313242912292, "step": 595 }, { "epoch": 0.7734452622827828, "grad_norm": 0.7495166063308716, "learning_rate": 1.774454212646392e-05, "loss": 0.8352164626121521, "step": 596 }, { "epoch": 0.7747429892329217, "grad_norm": 0.7836682796478271, "learning_rate": 1.773548794113525e-05, "loss": 0.7596052885055542, "step": 597 }, { "epoch": 0.7760407161830606, "grad_norm": 0.7165281176567078, "learning_rate": 1.772641793869162e-05, "loss": 0.8770286440849304, "step": 598 }, { "epoch": 0.7773384431331996, "grad_norm": 0.7152581810951233, "learning_rate": 1.7717332137678895e-05, "loss": 0.7514005899429321, "step": 599 }, { "epoch": 0.7786361700833384, "grad_norm": 0.7103503942489624, "learning_rate": 1.770823055667524e-05, "loss": 0.8051580190658569, "step": 600 }, { "epoch": 0.7799338970334773, "grad_norm": 0.6738602519035339, "learning_rate": 1.7699113214291082e-05, "loss": 0.7153568267822266, "step": 601 }, { "epoch": 0.7812316239836162, "grad_norm": 0.7472966909408569, "learning_rate": 1.768998012916908e-05, "loss": 0.8714797496795654, "step": 602 }, { "epoch": 0.7825293509337551, "grad_norm": 0.6653077602386475, "learning_rate": 1.7680831319984077e-05, "loss": 0.7944467663764954, "step": 603 }, { "epoch": 0.7838270778838939, "grad_norm": 0.6959863305091858, "learning_rate": 1.7671666805443076e-05, "loss": 0.8018844127655029, "step": 604 }, { "epoch": 0.7851248048340329, "grad_norm": 0.7507782578468323, "learning_rate": 1.766248660428519e-05, "loss": 0.8342332243919373, "step": 605 }, { "epoch": 0.7864225317841718, "grad_norm": 0.685041069984436, "learning_rate": 1.7653290735281605e-05, "loss": 0.8430291414260864, "step": 606 }, { "epoch": 0.7877202587343107, "grad_norm": 0.7120122313499451, "learning_rate": 1.7644079217235547e-05, "loss": 0.8382185697555542, "step": 607 }, { "epoch": 0.7890179856844496, "grad_norm": 0.6778322458267212, "learning_rate": 1.763485206898224e-05, "loss": 0.7327848672866821, "step": 608 }, { "epoch": 0.7903157126345884, "grad_norm": 0.6974225044250488, "learning_rate": 1.762560930938886e-05, "loss": 0.8788211941719055, "step": 609 }, { "epoch": 0.7916134395847274, "grad_norm": 0.7211731672286987, "learning_rate": 1.7616350957354523e-05, "loss": 0.788176417350769, "step": 610 }, { "epoch": 0.7929111665348663, "grad_norm": 0.7053602337837219, "learning_rate": 1.7607077031810204e-05, "loss": 0.7817824482917786, "step": 611 }, { "epoch": 0.7942088934850051, "grad_norm": 0.7248443961143494, "learning_rate": 1.759778755171874e-05, "loss": 0.8502725958824158, "step": 612 }, { "epoch": 0.7955066204351441, "grad_norm": 0.7390884160995483, "learning_rate": 1.758848253607476e-05, "loss": 0.8086085319519043, "step": 613 }, { "epoch": 0.7968043473852829, "grad_norm": 0.7250061631202698, "learning_rate": 1.7579162003904678e-05, "loss": 0.8245308995246887, "step": 614 }, { "epoch": 0.7981020743354219, "grad_norm": 0.7186241149902344, "learning_rate": 1.756982597426661e-05, "loss": 0.8296452760696411, "step": 615 }, { "epoch": 0.7993998012855608, "grad_norm": 0.6929823160171509, "learning_rate": 1.756047446625038e-05, "loss": 0.804393470287323, "step": 616 }, { "epoch": 0.8006975282356996, "grad_norm": 0.6644824743270874, "learning_rate": 1.7551107498977458e-05, "loss": 0.7272558808326721, "step": 617 }, { "epoch": 0.8019952551858386, "grad_norm": 0.6946169137954712, "learning_rate": 1.7541725091600918e-05, "loss": 0.7725887894630432, "step": 618 }, { "epoch": 0.8032929821359774, "grad_norm": 0.7124983668327332, "learning_rate": 1.7532327263305405e-05, "loss": 0.8026424646377563, "step": 619 }, { "epoch": 0.8045907090861163, "grad_norm": 0.7041330337524414, "learning_rate": 1.75229140333071e-05, "loss": 0.8723938465118408, "step": 620 }, { "epoch": 0.8058884360362553, "grad_norm": 0.7211349606513977, "learning_rate": 1.7513485420853683e-05, "loss": 0.7833378911018372, "step": 621 }, { "epoch": 0.8071861629863941, "grad_norm": 0.7067847847938538, "learning_rate": 1.750404144522427e-05, "loss": 0.8030161261558533, "step": 622 }, { "epoch": 0.8084838899365331, "grad_norm": 0.7632414102554321, "learning_rate": 1.7494582125729408e-05, "loss": 0.8390699625015259, "step": 623 }, { "epoch": 0.8097816168866719, "grad_norm": 0.6906073689460754, "learning_rate": 1.7485107481711014e-05, "loss": 0.7584885954856873, "step": 624 }, { "epoch": 0.8110793438368108, "grad_norm": 0.7074705362319946, "learning_rate": 1.7475617532542325e-05, "loss": 0.7802140116691589, "step": 625 }, { "epoch": 0.8123770707869498, "grad_norm": 0.7454720735549927, "learning_rate": 1.7466112297627894e-05, "loss": 0.8060036897659302, "step": 626 }, { "epoch": 0.8136747977370886, "grad_norm": 0.720340371131897, "learning_rate": 1.7456591796403525e-05, "loss": 0.8245412707328796, "step": 627 }, { "epoch": 0.8149725246872275, "grad_norm": 0.6765140295028687, "learning_rate": 1.744705604833622e-05, "loss": 0.7529839277267456, "step": 628 }, { "epoch": 0.8162702516373664, "grad_norm": 0.7487897872924805, "learning_rate": 1.7437505072924177e-05, "loss": 0.8539460897445679, "step": 629 }, { "epoch": 0.8175679785875053, "grad_norm": 0.819340169429779, "learning_rate": 1.742793888969673e-05, "loss": 0.9023832082748413, "step": 630 }, { "epoch": 0.8188657055376443, "grad_norm": 0.6978700757026672, "learning_rate": 1.741835751821429e-05, "loss": 0.8347563743591309, "step": 631 }, { "epoch": 0.8201634324877831, "grad_norm": 0.6633133888244629, "learning_rate": 1.7408760978068343e-05, "loss": 0.7656944394111633, "step": 632 }, { "epoch": 0.821461159437922, "grad_norm": 0.779058039188385, "learning_rate": 1.739914928888139e-05, "loss": 0.8407497406005859, "step": 633 }, { "epoch": 0.8227588863880609, "grad_norm": 0.7178354263305664, "learning_rate": 1.7389522470306892e-05, "loss": 0.8489883542060852, "step": 634 }, { "epoch": 0.8240566133381998, "grad_norm": 0.6867073774337769, "learning_rate": 1.7379880542029263e-05, "loss": 0.8083344101905823, "step": 635 }, { "epoch": 0.8253543402883388, "grad_norm": 0.7312392592430115, "learning_rate": 1.7370223523763804e-05, "loss": 0.8478159308433533, "step": 636 }, { "epoch": 0.8266520672384776, "grad_norm": 0.7201517224311829, "learning_rate": 1.7360551435256673e-05, "loss": 0.8310608863830566, "step": 637 }, { "epoch": 0.8279497941886165, "grad_norm": 0.7189190983772278, "learning_rate": 1.7350864296284846e-05, "loss": 0.8333780765533447, "step": 638 }, { "epoch": 0.8292475211387554, "grad_norm": 0.686372697353363, "learning_rate": 1.7341162126656063e-05, "loss": 0.774347722530365, "step": 639 }, { "epoch": 0.8305452480888943, "grad_norm": 0.7090693712234497, "learning_rate": 1.7331444946208815e-05, "loss": 0.7772883772850037, "step": 640 }, { "epoch": 0.8318429750390332, "grad_norm": 0.7179540991783142, "learning_rate": 1.732171277481227e-05, "loss": 0.8045225739479065, "step": 641 }, { "epoch": 0.8331407019891721, "grad_norm": 0.7238140106201172, "learning_rate": 1.7311965632366254e-05, "loss": 0.816831648349762, "step": 642 }, { "epoch": 0.834438428939311, "grad_norm": 0.7198631167411804, "learning_rate": 1.7302203538801212e-05, "loss": 0.8121675252914429, "step": 643 }, { "epoch": 0.8357361558894499, "grad_norm": 0.743016242980957, "learning_rate": 1.729242651407815e-05, "loss": 0.8649178743362427, "step": 644 }, { "epoch": 0.8370338828395888, "grad_norm": 0.7449317574501038, "learning_rate": 1.7282634578188612e-05, "loss": 0.823853611946106, "step": 645 }, { "epoch": 0.8383316097897276, "grad_norm": 0.725826621055603, "learning_rate": 1.7272827751154627e-05, "loss": 0.8356031179428101, "step": 646 }, { "epoch": 0.8396293367398666, "grad_norm": 0.7286955118179321, "learning_rate": 1.7263006053028674e-05, "loss": 0.7678595781326294, "step": 647 }, { "epoch": 0.8409270636900055, "grad_norm": 0.7141085863113403, "learning_rate": 1.7253169503893637e-05, "loss": 0.819695770740509, "step": 648 }, { "epoch": 0.8422247906401443, "grad_norm": 0.7320179343223572, "learning_rate": 1.7243318123862777e-05, "loss": 0.7937145233154297, "step": 649 }, { "epoch": 0.8435225175902833, "grad_norm": 0.677760124206543, "learning_rate": 1.7233451933079663e-05, "loss": 0.7791966199874878, "step": 650 }, { "epoch": 0.8448202445404221, "grad_norm": 0.7462013363838196, "learning_rate": 1.7223570951718166e-05, "loss": 0.7947529554367065, "step": 651 }, { "epoch": 0.8461179714905611, "grad_norm": 0.7482285499572754, "learning_rate": 1.7213675199982388e-05, "loss": 0.8657369613647461, "step": 652 }, { "epoch": 0.8474156984407, "grad_norm": 0.7175538539886475, "learning_rate": 1.7203764698106636e-05, "loss": 0.8233255743980408, "step": 653 }, { "epoch": 0.8487134253908388, "grad_norm": 0.687630295753479, "learning_rate": 1.7193839466355383e-05, "loss": 0.730807363986969, "step": 654 }, { "epoch": 0.8500111523409778, "grad_norm": 0.7357272505760193, "learning_rate": 1.7183899525023212e-05, "loss": 0.7798961997032166, "step": 655 }, { "epoch": 0.8513088792911166, "grad_norm": 0.7003471851348877, "learning_rate": 1.7173944894434783e-05, "loss": 0.752636730670929, "step": 656 }, { "epoch": 0.8526066062412555, "grad_norm": 0.72862708568573, "learning_rate": 1.7163975594944807e-05, "loss": 0.8516281247138977, "step": 657 }, { "epoch": 0.8539043331913945, "grad_norm": 0.7155880928039551, "learning_rate": 1.715399164693797e-05, "loss": 0.8015654683113098, "step": 658 }, { "epoch": 0.8552020601415333, "grad_norm": 0.6752951145172119, "learning_rate": 1.7143993070828913e-05, "loss": 0.7704746127128601, "step": 659 }, { "epoch": 0.8564997870916723, "grad_norm": 0.7284151315689087, "learning_rate": 1.713397988706221e-05, "loss": 0.8053057789802551, "step": 660 }, { "epoch": 0.8577975140418111, "grad_norm": 0.7367468476295471, "learning_rate": 1.7123952116112275e-05, "loss": 0.8107625246047974, "step": 661 }, { "epoch": 0.85909524099195, "grad_norm": 0.7247380018234253, "learning_rate": 1.7113909778483364e-05, "loss": 0.8168917894363403, "step": 662 }, { "epoch": 0.860392967942089, "grad_norm": 0.708310067653656, "learning_rate": 1.7103852894709517e-05, "loss": 0.765848696231842, "step": 663 }, { "epoch": 0.8616906948922278, "grad_norm": 0.746276319026947, "learning_rate": 1.7093781485354517e-05, "loss": 0.7557209730148315, "step": 664 }, { "epoch": 0.8629884218423667, "grad_norm": 0.7245295643806458, "learning_rate": 1.7083695571011842e-05, "loss": 0.8230986595153809, "step": 665 }, { "epoch": 0.8642861487925056, "grad_norm": 0.6767184734344482, "learning_rate": 1.707359517230464e-05, "loss": 0.7791951894760132, "step": 666 }, { "epoch": 0.8655838757426445, "grad_norm": 0.7177157402038574, "learning_rate": 1.7063480309885668e-05, "loss": 0.7597481608390808, "step": 667 }, { "epoch": 0.8668816026927835, "grad_norm": 0.7185314297676086, "learning_rate": 1.7053351004437258e-05, "loss": 0.7932897210121155, "step": 668 }, { "epoch": 0.8681793296429223, "grad_norm": 0.6925249695777893, "learning_rate": 1.7043207276671276e-05, "loss": 0.8076404333114624, "step": 669 }, { "epoch": 0.8694770565930612, "grad_norm": 0.6706543564796448, "learning_rate": 1.7033049147329077e-05, "loss": 0.8299864530563354, "step": 670 }, { "epoch": 0.8707747835432001, "grad_norm": 0.6854607462882996, "learning_rate": 1.702287663718147e-05, "loss": 0.7249770760536194, "step": 671 }, { "epoch": 0.872072510493339, "grad_norm": 0.6870327591896057, "learning_rate": 1.7012689767028656e-05, "loss": 0.770750880241394, "step": 672 }, { "epoch": 0.8733702374434779, "grad_norm": 0.7077570557594299, "learning_rate": 1.700248855770021e-05, "loss": 0.887006402015686, "step": 673 }, { "epoch": 0.8746679643936168, "grad_norm": 0.7156735062599182, "learning_rate": 1.6992273030055022e-05, "loss": 0.793735921382904, "step": 674 }, { "epoch": 0.8759656913437557, "grad_norm": 0.7201855182647705, "learning_rate": 1.6982043204981264e-05, "loss": 0.7955703139305115, "step": 675 }, { "epoch": 0.8772634182938946, "grad_norm": 0.7118475437164307, "learning_rate": 1.6971799103396332e-05, "loss": 0.7845295667648315, "step": 676 }, { "epoch": 0.8785611452440335, "grad_norm": 0.7221444845199585, "learning_rate": 1.696154074624683e-05, "loss": 0.824984610080719, "step": 677 }, { "epoch": 0.8798588721941724, "grad_norm": 0.6542083621025085, "learning_rate": 1.6951268154508497e-05, "loss": 0.8094558119773865, "step": 678 }, { "epoch": 0.8811565991443113, "grad_norm": 0.7080230116844177, "learning_rate": 1.6940981349186182e-05, "loss": 0.8446075916290283, "step": 679 }, { "epoch": 0.8824543260944502, "grad_norm": 0.7394174933433533, "learning_rate": 1.69306803513138e-05, "loss": 0.8166599273681641, "step": 680 }, { "epoch": 0.883752053044589, "grad_norm": 0.6939387321472168, "learning_rate": 1.6920365181954284e-05, "loss": 0.8320161700248718, "step": 681 }, { "epoch": 0.885049779994728, "grad_norm": 0.7184001803398132, "learning_rate": 1.6910035862199545e-05, "loss": 0.7950330376625061, "step": 682 }, { "epoch": 0.8863475069448669, "grad_norm": 0.6943792700767517, "learning_rate": 1.6899692413170422e-05, "loss": 0.8061293363571167, "step": 683 }, { "epoch": 0.8876452338950058, "grad_norm": 0.7242916822433472, "learning_rate": 1.688933485601666e-05, "loss": 0.799871563911438, "step": 684 }, { "epoch": 0.8889429608451447, "grad_norm": 0.6802281141281128, "learning_rate": 1.6878963211916833e-05, "loss": 0.8111347556114197, "step": 685 }, { "epoch": 0.8902406877952835, "grad_norm": 0.7344982028007507, "learning_rate": 1.6868577502078336e-05, "loss": 0.818919837474823, "step": 686 }, { "epoch": 0.8915384147454225, "grad_norm": 0.7255212664604187, "learning_rate": 1.6858177747737312e-05, "loss": 0.8595883846282959, "step": 687 }, { "epoch": 0.8928361416955614, "grad_norm": 0.6713986992835999, "learning_rate": 1.684776397015863e-05, "loss": 0.7319802045822144, "step": 688 }, { "epoch": 0.8941338686457002, "grad_norm": 0.6508772373199463, "learning_rate": 1.6837336190635824e-05, "loss": 0.7525233626365662, "step": 689 }, { "epoch": 0.8954315955958392, "grad_norm": 0.6712636947631836, "learning_rate": 1.682689443049107e-05, "loss": 0.785638689994812, "step": 690 }, { "epoch": 0.8954315955958392, "eval_loss": 0.7686871290206909, "eval_runtime": 143.0434, "eval_samples_per_second": 36.297, "eval_steps_per_second": 9.074, "step": 690 }, { "epoch": 0.896729322545978, "grad_norm": 0.7040373682975769, "learning_rate": 1.6816438711075114e-05, "loss": 0.8052287101745605, "step": 691 }, { "epoch": 0.898027049496117, "grad_norm": 0.6796557903289795, "learning_rate": 1.680596905376727e-05, "loss": 0.8128867745399475, "step": 692 }, { "epoch": 0.8993247764462559, "grad_norm": 0.689491868019104, "learning_rate": 1.6795485479975327e-05, "loss": 0.7731098532676697, "step": 693 }, { "epoch": 0.9006225033963947, "grad_norm": 0.6846652030944824, "learning_rate": 1.6784988011135546e-05, "loss": 0.8001493811607361, "step": 694 }, { "epoch": 0.9019202303465337, "grad_norm": 0.7072511911392212, "learning_rate": 1.6774476668712587e-05, "loss": 0.7856433391571045, "step": 695 }, { "epoch": 0.9032179572966725, "grad_norm": 0.6913763880729675, "learning_rate": 1.676395147419949e-05, "loss": 0.8246166110038757, "step": 696 }, { "epoch": 0.9045156842468114, "grad_norm": 0.7131518721580505, "learning_rate": 1.6753412449117615e-05, "loss": 0.8256362080574036, "step": 697 }, { "epoch": 0.9058134111969504, "grad_norm": 0.6939201951026917, "learning_rate": 1.67428596150166e-05, "loss": 0.8615972399711609, "step": 698 }, { "epoch": 0.9071111381470892, "grad_norm": 0.7194769382476807, "learning_rate": 1.6732292993474316e-05, "loss": 0.7931585907936096, "step": 699 }, { "epoch": 0.9084088650972282, "grad_norm": 0.6878808736801147, "learning_rate": 1.6721712606096833e-05, "loss": 0.7722562551498413, "step": 700 }, { "epoch": 0.909706592047367, "grad_norm": 0.6713901162147522, "learning_rate": 1.6711118474518363e-05, "loss": 0.7399365901947021, "step": 701 }, { "epoch": 0.9110043189975059, "grad_norm": 0.6830242872238159, "learning_rate": 1.6700510620401223e-05, "loss": 0.7681128978729248, "step": 702 }, { "epoch": 0.9123020459476449, "grad_norm": 0.7052934169769287, "learning_rate": 1.6689889065435796e-05, "loss": 0.8287486433982849, "step": 703 }, { "epoch": 0.9135997728977837, "grad_norm": 0.7426304817199707, "learning_rate": 1.667925383134047e-05, "loss": 0.7236632108688354, "step": 704 }, { "epoch": 0.9148974998479226, "grad_norm": 0.7284197807312012, "learning_rate": 1.66686049398616e-05, "loss": 0.8001005053520203, "step": 705 }, { "epoch": 0.9161952267980615, "grad_norm": 0.7305144667625427, "learning_rate": 1.6657942412773484e-05, "loss": 0.816078245639801, "step": 706 }, { "epoch": 0.9174929537482004, "grad_norm": 0.7396757006645203, "learning_rate": 1.664726627187829e-05, "loss": 0.8432518243789673, "step": 707 }, { "epoch": 0.9187906806983394, "grad_norm": 0.7043930292129517, "learning_rate": 1.6636576539006015e-05, "loss": 0.8011447787284851, "step": 708 }, { "epoch": 0.9200884076484782, "grad_norm": 0.6750434637069702, "learning_rate": 1.6625873236014464e-05, "loss": 0.8111026883125305, "step": 709 }, { "epoch": 0.9213861345986171, "grad_norm": 0.6968750953674316, "learning_rate": 1.6615156384789185e-05, "loss": 0.7856196165084839, "step": 710 }, { "epoch": 0.922683861548756, "grad_norm": 0.6756315231323242, "learning_rate": 1.660442600724342e-05, "loss": 0.7796693444252014, "step": 711 }, { "epoch": 0.9239815884988949, "grad_norm": 0.7280746102333069, "learning_rate": 1.659368212531808e-05, "loss": 0.8190441131591797, "step": 712 }, { "epoch": 0.9252793154490339, "grad_norm": 0.6771341562271118, "learning_rate": 1.6582924760981683e-05, "loss": 0.7919082641601562, "step": 713 }, { "epoch": 0.9265770423991727, "grad_norm": 0.7019714713096619, "learning_rate": 1.6572153936230316e-05, "loss": 0.7387243509292603, "step": 714 }, { "epoch": 0.9278747693493116, "grad_norm": 0.7424118518829346, "learning_rate": 1.6561369673087588e-05, "loss": 0.8694776892662048, "step": 715 }, { "epoch": 0.9291724962994505, "grad_norm": 0.6909191012382507, "learning_rate": 1.6550571993604587e-05, "loss": 0.8239873647689819, "step": 716 }, { "epoch": 0.9304702232495894, "grad_norm": 0.7481014728546143, "learning_rate": 1.6539760919859838e-05, "loss": 0.8004978895187378, "step": 717 }, { "epoch": 0.9317679501997282, "grad_norm": 0.6954971551895142, "learning_rate": 1.6528936473959253e-05, "loss": 0.8122729659080505, "step": 718 }, { "epoch": 0.9330656771498672, "grad_norm": 0.7150570154190063, "learning_rate": 1.6518098678036073e-05, "loss": 0.8382218480110168, "step": 719 }, { "epoch": 0.9343634041000061, "grad_norm": 0.7469287514686584, "learning_rate": 1.650724755425086e-05, "loss": 0.8599920868873596, "step": 720 }, { "epoch": 0.935661131050145, "grad_norm": 0.7064406275749207, "learning_rate": 1.6496383124791406e-05, "loss": 0.7755042314529419, "step": 721 }, { "epoch": 0.9369588580002839, "grad_norm": 0.7173776626586914, "learning_rate": 1.6485505411872725e-05, "loss": 0.8066536784172058, "step": 722 }, { "epoch": 0.9382565849504227, "grad_norm": 0.717430591583252, "learning_rate": 1.6474614437736986e-05, "loss": 0.8112089037895203, "step": 723 }, { "epoch": 0.9395543119005617, "grad_norm": 0.696087658405304, "learning_rate": 1.6463710224653477e-05, "loss": 0.7918620705604553, "step": 724 }, { "epoch": 0.9408520388507006, "grad_norm": 0.6923975944519043, "learning_rate": 1.6452792794918545e-05, "loss": 0.8037642240524292, "step": 725 }, { "epoch": 0.9421497658008394, "grad_norm": 0.7063742280006409, "learning_rate": 1.644186217085558e-05, "loss": 0.7934796810150146, "step": 726 }, { "epoch": 0.9434474927509784, "grad_norm": 0.6965203881263733, "learning_rate": 1.6430918374814937e-05, "loss": 0.8489659428596497, "step": 727 }, { "epoch": 0.9447452197011172, "grad_norm": 0.7392389178276062, "learning_rate": 1.641996142917391e-05, "loss": 0.8604154586791992, "step": 728 }, { "epoch": 0.9460429466512562, "grad_norm": 0.7131820321083069, "learning_rate": 1.640899135633668e-05, "loss": 0.8199344277381897, "step": 729 }, { "epoch": 0.9473406736013951, "grad_norm": 0.7163403630256653, "learning_rate": 1.6398008178734272e-05, "loss": 0.8697142004966736, "step": 730 }, { "epoch": 0.9486384005515339, "grad_norm": 0.6607118844985962, "learning_rate": 1.6387011918824493e-05, "loss": 0.7900056838989258, "step": 731 }, { "epoch": 0.9499361275016729, "grad_norm": 0.669420599937439, "learning_rate": 1.6376002599091925e-05, "loss": 0.8032844066619873, "step": 732 }, { "epoch": 0.9512338544518117, "grad_norm": 0.7059581279754639, "learning_rate": 1.6364980242047835e-05, "loss": 0.8048977851867676, "step": 733 }, { "epoch": 0.9525315814019506, "grad_norm": 0.7329293489456177, "learning_rate": 1.635394487023015e-05, "loss": 0.8311731815338135, "step": 734 }, { "epoch": 0.9538293083520896, "grad_norm": 0.7057397961616516, "learning_rate": 1.634289650620342e-05, "loss": 0.8411611318588257, "step": 735 }, { "epoch": 0.9551270353022284, "grad_norm": 0.653426468372345, "learning_rate": 1.633183517255875e-05, "loss": 0.758813738822937, "step": 736 }, { "epoch": 0.9564247622523674, "grad_norm": 0.7300577759742737, "learning_rate": 1.632076089191376e-05, "loss": 0.8028651475906372, "step": 737 }, { "epoch": 0.9577224892025062, "grad_norm": 0.6757684350013733, "learning_rate": 1.630967368691256e-05, "loss": 0.8133585453033447, "step": 738 }, { "epoch": 0.9590202161526451, "grad_norm": 0.6894041299819946, "learning_rate": 1.6298573580225676e-05, "loss": 0.766591489315033, "step": 739 }, { "epoch": 0.9603179431027841, "grad_norm": 0.7034198641777039, "learning_rate": 1.6287460594550017e-05, "loss": 0.778566837310791, "step": 740 }, { "epoch": 0.9616156700529229, "grad_norm": 0.6629794239997864, "learning_rate": 1.6276334752608823e-05, "loss": 0.7911474704742432, "step": 741 }, { "epoch": 0.9629133970030618, "grad_norm": 0.6980583667755127, "learning_rate": 1.6265196077151627e-05, "loss": 0.7445369958877563, "step": 742 }, { "epoch": 0.9642111239532007, "grad_norm": 0.7294824719429016, "learning_rate": 1.62540445909542e-05, "loss": 0.86620032787323, "step": 743 }, { "epoch": 0.9655088509033396, "grad_norm": 0.7365493774414062, "learning_rate": 1.624288031681851e-05, "loss": 0.810501754283905, "step": 744 }, { "epoch": 0.9668065778534786, "grad_norm": 0.737711489200592, "learning_rate": 1.623170327757267e-05, "loss": 0.8520309329032898, "step": 745 }, { "epoch": 0.9681043048036174, "grad_norm": 0.683699905872345, "learning_rate": 1.62205134960709e-05, "loss": 0.7950071096420288, "step": 746 }, { "epoch": 0.9694020317537563, "grad_norm": 0.7092661261558533, "learning_rate": 1.620931099519347e-05, "loss": 0.8340073823928833, "step": 747 }, { "epoch": 0.9706997587038952, "grad_norm": 0.7204828262329102, "learning_rate": 1.619809579784665e-05, "loss": 0.7778469324111938, "step": 748 }, { "epoch": 0.9719974856540341, "grad_norm": 0.6977567076683044, "learning_rate": 1.6186867926962695e-05, "loss": 0.797735869884491, "step": 749 }, { "epoch": 0.973295212604173, "grad_norm": 0.6957900524139404, "learning_rate": 1.6175627405499746e-05, "loss": 0.7967561483383179, "step": 750 }, { "epoch": 0.9745929395543119, "grad_norm": 0.6862889528274536, "learning_rate": 1.6164374256441837e-05, "loss": 0.8016502261161804, "step": 751 }, { "epoch": 0.9758906665044508, "grad_norm": 0.6969533562660217, "learning_rate": 1.6153108502798796e-05, "loss": 0.8099682331085205, "step": 752 }, { "epoch": 0.9771883934545897, "grad_norm": 0.6920532584190369, "learning_rate": 1.614183016760625e-05, "loss": 0.8007751107215881, "step": 753 }, { "epoch": 0.9784861204047286, "grad_norm": 0.6825345158576965, "learning_rate": 1.613053927392553e-05, "loss": 0.8570786118507385, "step": 754 }, { "epoch": 0.9797838473548675, "grad_norm": 0.7230255603790283, "learning_rate": 1.6119235844843664e-05, "loss": 0.7779375910758972, "step": 755 }, { "epoch": 0.9810815743050064, "grad_norm": 0.68338543176651, "learning_rate": 1.6107919903473294e-05, "loss": 0.7894657850265503, "step": 756 }, { "epoch": 0.9823793012551453, "grad_norm": 0.7132012248039246, "learning_rate": 1.6096591472952664e-05, "loss": 0.8401795625686646, "step": 757 }, { "epoch": 0.9836770282052841, "grad_norm": 0.681077241897583, "learning_rate": 1.6085250576445548e-05, "loss": 0.7692939043045044, "step": 758 }, { "epoch": 0.9849747551554231, "grad_norm": 0.6817126870155334, "learning_rate": 1.6073897237141203e-05, "loss": 0.7555439472198486, "step": 759 }, { "epoch": 0.986272482105562, "grad_norm": 0.6702454090118408, "learning_rate": 1.6062531478254333e-05, "loss": 0.7115926742553711, "step": 760 }, { "epoch": 0.9875702090557009, "grad_norm": 0.6700429320335388, "learning_rate": 1.605115332302505e-05, "loss": 0.7557807564735413, "step": 761 }, { "epoch": 0.9888679360058398, "grad_norm": 0.6891334652900696, "learning_rate": 1.603976279471879e-05, "loss": 0.8077662587165833, "step": 762 }, { "epoch": 0.9901656629559786, "grad_norm": 0.767073929309845, "learning_rate": 1.6028359916626308e-05, "loss": 0.7964708805084229, "step": 763 }, { "epoch": 0.9914633899061176, "grad_norm": 0.660102367401123, "learning_rate": 1.601694471206359e-05, "loss": 0.7086456418037415, "step": 764 }, { "epoch": 0.9927611168562565, "grad_norm": 0.6949501037597656, "learning_rate": 1.600551720437186e-05, "loss": 0.7723450660705566, "step": 765 }, { "epoch": 0.9940588438063953, "grad_norm": 0.7149574756622314, "learning_rate": 1.599407741691746e-05, "loss": 0.8286278247833252, "step": 766 }, { "epoch": 0.9953565707565343, "grad_norm": 0.6776000261306763, "learning_rate": 1.5982625373091877e-05, "loss": 0.7701430320739746, "step": 767 }, { "epoch": 0.9966542977066731, "grad_norm": 0.7129999399185181, "learning_rate": 1.5971161096311628e-05, "loss": 0.8104744553565979, "step": 768 }, { "epoch": 0.9979520246568121, "grad_norm": 0.6826761960983276, "learning_rate": 1.5959684610018267e-05, "loss": 0.7398239970207214, "step": 769 }, { "epoch": 0.999249751606951, "grad_norm": 0.7236920595169067, "learning_rate": 1.5948195937678297e-05, "loss": 0.7627758383750916, "step": 770 }, { "epoch": 1.0, "grad_norm": 0.9062820672988892, "learning_rate": 1.5936695102783148e-05, "loss": 0.7684851288795471, "step": 771 }, { "epoch": 1.0012977269501389, "grad_norm": 1.0222225189208984, "learning_rate": 1.5925182128849116e-05, "loss": 0.7260036468505859, "step": 772 }, { "epoch": 1.0025954539002777, "grad_norm": 0.8933354020118713, "learning_rate": 1.591365703941732e-05, "loss": 0.6952782869338989, "step": 773 }, { "epoch": 1.0038931808504168, "grad_norm": 0.8150500059127808, "learning_rate": 1.5902119858053652e-05, "loss": 0.708466649055481, "step": 774 }, { "epoch": 1.0051909078005556, "grad_norm": 0.677733838558197, "learning_rate": 1.589057060834872e-05, "loss": 0.714854896068573, "step": 775 }, { "epoch": 1.0064886347506945, "grad_norm": 0.8115158677101135, "learning_rate": 1.5879009313917826e-05, "loss": 0.7126277089118958, "step": 776 }, { "epoch": 1.0077863617008334, "grad_norm": 0.9660588502883911, "learning_rate": 1.5867435998400885e-05, "loss": 0.8123319149017334, "step": 777 }, { "epoch": 1.0090840886509722, "grad_norm": 0.8912333846092224, "learning_rate": 1.5855850685462404e-05, "loss": 0.7480561137199402, "step": 778 }, { "epoch": 1.0103818156011113, "grad_norm": 0.9120140075683594, "learning_rate": 1.584425339879141e-05, "loss": 0.7480191588401794, "step": 779 }, { "epoch": 1.0116795425512501, "grad_norm": 0.8324950337409973, "learning_rate": 1.5832644162101417e-05, "loss": 0.7069035172462463, "step": 780 }, { "epoch": 1.012977269501389, "grad_norm": 0.7601868510246277, "learning_rate": 1.5821022999130385e-05, "loss": 0.646752655506134, "step": 781 }, { "epoch": 1.0142749964515279, "grad_norm": 0.7213713526725769, "learning_rate": 1.580938993364064e-05, "loss": 0.6728400588035583, "step": 782 }, { "epoch": 1.0155727234016667, "grad_norm": 0.8234879374504089, "learning_rate": 1.579774498941886e-05, "loss": 0.6997194886207581, "step": 783 }, { "epoch": 1.0168704503518056, "grad_norm": 0.794476330280304, "learning_rate": 1.578608819027602e-05, "loss": 0.6844808459281921, "step": 784 }, { "epoch": 1.0181681773019446, "grad_norm": 0.8356218338012695, "learning_rate": 1.5774419560047303e-05, "loss": 0.7501406073570251, "step": 785 }, { "epoch": 1.0194659042520835, "grad_norm": 0.7794895172119141, "learning_rate": 1.5762739122592123e-05, "loss": 0.7650024890899658, "step": 786 }, { "epoch": 1.0207636312022224, "grad_norm": 0.7471200227737427, "learning_rate": 1.5751046901794008e-05, "loss": 0.7121275067329407, "step": 787 }, { "epoch": 1.0220613581523612, "grad_norm": 0.7541830539703369, "learning_rate": 1.5739342921560593e-05, "loss": 0.7205899357795715, "step": 788 }, { "epoch": 1.0233590851025, "grad_norm": 0.8261748552322388, "learning_rate": 1.5727627205823554e-05, "loss": 0.6890494227409363, "step": 789 }, { "epoch": 1.0246568120526391, "grad_norm": 0.7363404035568237, "learning_rate": 1.571589977853857e-05, "loss": 0.7250495553016663, "step": 790 }, { "epoch": 1.025954539002778, "grad_norm": 0.7666418552398682, "learning_rate": 1.5704160663685254e-05, "loss": 0.6565474271774292, "step": 791 }, { "epoch": 1.0272522659529169, "grad_norm": 0.707535982131958, "learning_rate": 1.5692409885267127e-05, "loss": 0.8307659029960632, "step": 792 }, { "epoch": 1.0285499929030557, "grad_norm": 0.7528367638587952, "learning_rate": 1.568064746731156e-05, "loss": 0.734372615814209, "step": 793 }, { "epoch": 1.0298477198531946, "grad_norm": 0.7138853073120117, "learning_rate": 1.5668873433869718e-05, "loss": 0.6305298805236816, "step": 794 }, { "epoch": 1.0311454468033336, "grad_norm": 0.7478009462356567, "learning_rate": 1.5657087809016517e-05, "loss": 0.6923752427101135, "step": 795 }, { "epoch": 1.0324431737534725, "grad_norm": 0.7364891171455383, "learning_rate": 1.564529061685058e-05, "loss": 0.7163046598434448, "step": 796 }, { "epoch": 1.0337409007036114, "grad_norm": 0.7004992365837097, "learning_rate": 1.5633481881494178e-05, "loss": 0.6700119972229004, "step": 797 }, { "epoch": 1.0350386276537502, "grad_norm": 0.749292254447937, "learning_rate": 1.562166162709319e-05, "loss": 0.6811234951019287, "step": 798 }, { "epoch": 1.036336354603889, "grad_norm": 0.7418084740638733, "learning_rate": 1.560982987781704e-05, "loss": 0.7332763075828552, "step": 799 }, { "epoch": 1.037634081554028, "grad_norm": 0.6867294907569885, "learning_rate": 1.5597986657858656e-05, "loss": 0.7094939351081848, "step": 800 }, { "epoch": 1.038931808504167, "grad_norm": 0.6801954507827759, "learning_rate": 1.5586131991434434e-05, "loss": 0.7229615449905396, "step": 801 }, { "epoch": 1.0402295354543059, "grad_norm": 0.6919074654579163, "learning_rate": 1.5574265902784163e-05, "loss": 0.6745041012763977, "step": 802 }, { "epoch": 1.0415272624044447, "grad_norm": 0.7064636945724487, "learning_rate": 1.556238841617099e-05, "loss": 0.7311556935310364, "step": 803 }, { "epoch": 1.0428249893545836, "grad_norm": 0.7400867938995361, "learning_rate": 1.555049955588137e-05, "loss": 0.7360319495201111, "step": 804 }, { "epoch": 1.0441227163047224, "grad_norm": 0.7186093330383301, "learning_rate": 1.5538599346225013e-05, "loss": 0.6791881918907166, "step": 805 }, { "epoch": 1.0454204432548615, "grad_norm": 0.7080870866775513, "learning_rate": 1.552668781153484e-05, "loss": 0.6935555338859558, "step": 806 }, { "epoch": 1.0467181702050004, "grad_norm": 0.7288933396339417, "learning_rate": 1.5514764976166916e-05, "loss": 0.7893433570861816, "step": 807 }, { "epoch": 1.0480158971551392, "grad_norm": 0.7090301513671875, "learning_rate": 1.5502830864500426e-05, "loss": 0.7087657451629639, "step": 808 }, { "epoch": 1.049313624105278, "grad_norm": 0.7548444271087646, "learning_rate": 1.5490885500937606e-05, "loss": 0.72869473695755, "step": 809 }, { "epoch": 1.050611351055417, "grad_norm": 0.7161403894424438, "learning_rate": 1.5478928909903705e-05, "loss": 0.7281824946403503, "step": 810 }, { "epoch": 1.051909078005556, "grad_norm": 0.6805386543273926, "learning_rate": 1.5466961115846927e-05, "loss": 0.6523677110671997, "step": 811 }, { "epoch": 1.0532068049556949, "grad_norm": 0.7339995503425598, "learning_rate": 1.545498214323837e-05, "loss": 0.7160875797271729, "step": 812 }, { "epoch": 1.0545045319058337, "grad_norm": 0.6826195120811462, "learning_rate": 1.544299201657202e-05, "loss": 0.7368515133857727, "step": 813 }, { "epoch": 1.0558022588559726, "grad_norm": 0.7545201182365417, "learning_rate": 1.543099076036463e-05, "loss": 0.7098448276519775, "step": 814 }, { "epoch": 1.0570999858061114, "grad_norm": 0.6874995827674866, "learning_rate": 1.5418978399155748e-05, "loss": 0.6643248200416565, "step": 815 }, { "epoch": 1.0583977127562503, "grad_norm": 0.7067052125930786, "learning_rate": 1.54069549575076e-05, "loss": 0.7022271752357483, "step": 816 }, { "epoch": 1.0596954397063894, "grad_norm": 0.7168053388595581, "learning_rate": 1.539492046000509e-05, "loss": 0.6977633237838745, "step": 817 }, { "epoch": 1.0609931666565282, "grad_norm": 0.7110093235969543, "learning_rate": 1.5382874931255717e-05, "loss": 0.7410083413124084, "step": 818 }, { "epoch": 1.062290893606667, "grad_norm": 0.6772004961967468, "learning_rate": 1.5370818395889536e-05, "loss": 0.6744326949119568, "step": 819 }, { "epoch": 1.063588620556806, "grad_norm": 0.7344289422035217, "learning_rate": 1.5358750878559113e-05, "loss": 0.7128704190254211, "step": 820 }, { "epoch": 1.0648863475069448, "grad_norm": 0.7206461429595947, "learning_rate": 1.5346672403939465e-05, "loss": 0.7533354759216309, "step": 821 }, { "epoch": 1.0661840744570839, "grad_norm": 0.7541556358337402, "learning_rate": 1.5334582996728017e-05, "loss": 0.7774013876914978, "step": 822 }, { "epoch": 1.0674818014072227, "grad_norm": 0.7579377293586731, "learning_rate": 1.532248268164455e-05, "loss": 0.7790758609771729, "step": 823 }, { "epoch": 1.0687795283573616, "grad_norm": 0.7289340496063232, "learning_rate": 1.5310371483431138e-05, "loss": 0.7054307460784912, "step": 824 }, { "epoch": 1.0700772553075004, "grad_norm": 0.7037842869758606, "learning_rate": 1.529824942685212e-05, "loss": 0.7457549571990967, "step": 825 }, { "epoch": 1.0713749822576393, "grad_norm": 0.7253069877624512, "learning_rate": 1.528611653669403e-05, "loss": 0.7203331589698792, "step": 826 }, { "epoch": 1.0726727092077784, "grad_norm": 0.7243335247039795, "learning_rate": 1.5273972837765566e-05, "loss": 0.7370164394378662, "step": 827 }, { "epoch": 1.0739704361579172, "grad_norm": 0.6802127957344055, "learning_rate": 1.526181835489751e-05, "loss": 0.7022003531455994, "step": 828 }, { "epoch": 1.075268163108056, "grad_norm": 0.7470188736915588, "learning_rate": 1.5249653112942708e-05, "loss": 0.7355238795280457, "step": 829 }, { "epoch": 1.076565890058195, "grad_norm": 0.7139303684234619, "learning_rate": 1.5237477136776e-05, "loss": 0.6995757222175598, "step": 830 }, { "epoch": 1.0778636170083338, "grad_norm": 0.6893638372421265, "learning_rate": 1.5225290451294173e-05, "loss": 0.6514896750450134, "step": 831 }, { "epoch": 1.0791613439584729, "grad_norm": 0.7205830812454224, "learning_rate": 1.521309308141592e-05, "loss": 0.6881433725357056, "step": 832 }, { "epoch": 1.0804590709086117, "grad_norm": 0.7569621205329895, "learning_rate": 1.5200885052081767e-05, "loss": 0.7357972264289856, "step": 833 }, { "epoch": 1.0817567978587506, "grad_norm": 0.7436279654502869, "learning_rate": 1.518866638825405e-05, "loss": 0.758313775062561, "step": 834 }, { "epoch": 1.0830545248088894, "grad_norm": 0.7273634076118469, "learning_rate": 1.517643711491684e-05, "loss": 0.6798244714736938, "step": 835 }, { "epoch": 1.0843522517590283, "grad_norm": 0.6966442465782166, "learning_rate": 1.516419725707591e-05, "loss": 0.7077891826629639, "step": 836 }, { "epoch": 1.0856499787091671, "grad_norm": 0.6794623732566833, "learning_rate": 1.5151946839758673e-05, "loss": 0.6736932992935181, "step": 837 }, { "epoch": 1.0869477056593062, "grad_norm": 0.7189822196960449, "learning_rate": 1.5139685888014123e-05, "loss": 0.7594777345657349, "step": 838 }, { "epoch": 1.088245432609445, "grad_norm": 0.7691319584846497, "learning_rate": 1.512741442691281e-05, "loss": 0.7986084818840027, "step": 839 }, { "epoch": 1.089543159559584, "grad_norm": 0.7428483366966248, "learning_rate": 1.5115132481546763e-05, "loss": 0.7112255096435547, "step": 840 }, { "epoch": 1.0908408865097228, "grad_norm": 0.7567489743232727, "learning_rate": 1.5102840077029452e-05, "loss": 0.647540807723999, "step": 841 }, { "epoch": 1.0921386134598616, "grad_norm": 0.7548873424530029, "learning_rate": 1.509053723849574e-05, "loss": 0.776237428188324, "step": 842 }, { "epoch": 1.0934363404100007, "grad_norm": 0.7588720917701721, "learning_rate": 1.5078223991101805e-05, "loss": 0.6855933666229248, "step": 843 }, { "epoch": 1.0947340673601396, "grad_norm": 0.7549242973327637, "learning_rate": 1.5065900360025128e-05, "loss": 0.7288146615028381, "step": 844 }, { "epoch": 1.0960317943102784, "grad_norm": 0.7281069755554199, "learning_rate": 1.5053566370464416e-05, "loss": 0.7359070777893066, "step": 845 }, { "epoch": 1.0973295212604173, "grad_norm": 0.709331750869751, "learning_rate": 1.5041222047639558e-05, "loss": 0.718718945980072, "step": 846 }, { "epoch": 1.0986272482105561, "grad_norm": 0.684161365032196, "learning_rate": 1.5028867416791566e-05, "loss": 0.6832801699638367, "step": 847 }, { "epoch": 1.099924975160695, "grad_norm": 0.7570529580116272, "learning_rate": 1.5016502503182533e-05, "loss": 0.712772786617279, "step": 848 }, { "epoch": 1.101222702110834, "grad_norm": 0.7224586606025696, "learning_rate": 1.5004127332095579e-05, "loss": 0.72933429479599, "step": 849 }, { "epoch": 1.102520429060973, "grad_norm": 0.7530233263969421, "learning_rate": 1.49917419288348e-05, "loss": 0.7607170343399048, "step": 850 }, { "epoch": 1.1038181560111118, "grad_norm": 0.7433916926383972, "learning_rate": 1.4979346318725203e-05, "loss": 0.7284337282180786, "step": 851 }, { "epoch": 1.1051158829612506, "grad_norm": 0.7271002531051636, "learning_rate": 1.4966940527112679e-05, "loss": 0.7452124357223511, "step": 852 }, { "epoch": 1.1064136099113895, "grad_norm": 0.7177510857582092, "learning_rate": 1.4954524579363932e-05, "loss": 0.7781730890274048, "step": 853 }, { "epoch": 1.1077113368615286, "grad_norm": 0.7278553247451782, "learning_rate": 1.4942098500866428e-05, "loss": 0.760970413684845, "step": 854 }, { "epoch": 1.1090090638116674, "grad_norm": 0.7369382977485657, "learning_rate": 1.4929662317028359e-05, "loss": 0.7270724177360535, "step": 855 }, { "epoch": 1.1103067907618063, "grad_norm": 0.7529125213623047, "learning_rate": 1.491721605327857e-05, "loss": 0.6972394585609436, "step": 856 }, { "epoch": 1.1116045177119451, "grad_norm": 0.8102325201034546, "learning_rate": 1.490475973506652e-05, "loss": 0.7593643069267273, "step": 857 }, { "epoch": 1.112902244662084, "grad_norm": 0.7033381462097168, "learning_rate": 1.4892293387862221e-05, "loss": 0.750421404838562, "step": 858 }, { "epoch": 1.114199971612223, "grad_norm": 0.7504622340202332, "learning_rate": 1.487981703715621e-05, "loss": 0.7422147989273071, "step": 859 }, { "epoch": 1.115497698562362, "grad_norm": 0.7424933910369873, "learning_rate": 1.4867330708459463e-05, "loss": 0.7375016212463379, "step": 860 }, { "epoch": 1.1167954255125008, "grad_norm": 0.73978191614151, "learning_rate": 1.4854834427303353e-05, "loss": 0.7315906286239624, "step": 861 }, { "epoch": 1.1180931524626396, "grad_norm": 0.7480568289756775, "learning_rate": 1.4842328219239618e-05, "loss": 0.7146769762039185, "step": 862 }, { "epoch": 1.1193908794127785, "grad_norm": 0.6838370561599731, "learning_rate": 1.4829812109840291e-05, "loss": 0.6863071918487549, "step": 863 }, { "epoch": 1.1206886063629176, "grad_norm": 0.69765305519104, "learning_rate": 1.4817286124697647e-05, "loss": 0.6740079522132874, "step": 864 }, { "epoch": 1.1219863333130564, "grad_norm": 0.7375463843345642, "learning_rate": 1.480475028942415e-05, "loss": 0.7721714973449707, "step": 865 }, { "epoch": 1.1232840602631953, "grad_norm": 0.7765669226646423, "learning_rate": 1.4792204629652414e-05, "loss": 0.6988716125488281, "step": 866 }, { "epoch": 1.1245817872133341, "grad_norm": 0.6921293139457703, "learning_rate": 1.4779649171035138e-05, "loss": 0.7338443398475647, "step": 867 }, { "epoch": 1.125879514163473, "grad_norm": 0.7645788192749023, "learning_rate": 1.4767083939245055e-05, "loss": 0.7597560882568359, "step": 868 }, { "epoch": 1.1271772411136118, "grad_norm": 0.7806273698806763, "learning_rate": 1.475450895997489e-05, "loss": 0.7360360026359558, "step": 869 }, { "epoch": 1.128474968063751, "grad_norm": 0.7329487204551697, "learning_rate": 1.4741924258937283e-05, "loss": 0.694042980670929, "step": 870 }, { "epoch": 1.1297726950138898, "grad_norm": 0.7490030527114868, "learning_rate": 1.472932986186477e-05, "loss": 0.771519660949707, "step": 871 }, { "epoch": 1.1310704219640286, "grad_norm": 0.7821305990219116, "learning_rate": 1.47167257945097e-05, "loss": 0.7572095990180969, "step": 872 }, { "epoch": 1.1323681489141675, "grad_norm": 0.745883584022522, "learning_rate": 1.4704112082644207e-05, "loss": 0.7173527479171753, "step": 873 }, { "epoch": 1.1336658758643063, "grad_norm": 0.7457818984985352, "learning_rate": 1.4691488752060132e-05, "loss": 0.7411136031150818, "step": 874 }, { "epoch": 1.1349636028144454, "grad_norm": 0.7116679549217224, "learning_rate": 1.4678855828568996e-05, "loss": 0.6630608439445496, "step": 875 }, { "epoch": 1.1362613297645843, "grad_norm": 0.7429471611976624, "learning_rate": 1.4666213338001929e-05, "loss": 0.6890819668769836, "step": 876 }, { "epoch": 1.1375590567147231, "grad_norm": 0.7173399925231934, "learning_rate": 1.4653561306209625e-05, "loss": 0.7061414122581482, "step": 877 }, { "epoch": 1.138856783664862, "grad_norm": 0.7341779470443726, "learning_rate": 1.4640899759062285e-05, "loss": 0.7564276456832886, "step": 878 }, { "epoch": 1.1401545106150008, "grad_norm": 0.73567795753479, "learning_rate": 1.462822872244957e-05, "loss": 0.7193140983581543, "step": 879 }, { "epoch": 1.1414522375651397, "grad_norm": 0.7359784841537476, "learning_rate": 1.461554822228054e-05, "loss": 0.724113941192627, "step": 880 }, { "epoch": 1.1427499645152788, "grad_norm": 0.6934400200843811, "learning_rate": 1.460285828448361e-05, "loss": 0.6648344397544861, "step": 881 }, { "epoch": 1.1440476914654176, "grad_norm": 0.6720191836357117, "learning_rate": 1.4590158935006494e-05, "loss": 0.6355569362640381, "step": 882 }, { "epoch": 1.1453454184155565, "grad_norm": 0.7342029809951782, "learning_rate": 1.4577450199816142e-05, "loss": 0.7470182180404663, "step": 883 }, { "epoch": 1.1466431453656953, "grad_norm": 0.7566630244255066, "learning_rate": 1.4564732104898702e-05, "loss": 0.7848218679428101, "step": 884 }, { "epoch": 1.1479408723158344, "grad_norm": 0.6953855752944946, "learning_rate": 1.4552004676259462e-05, "loss": 0.7087516784667969, "step": 885 }, { "epoch": 1.1492385992659733, "grad_norm": 0.7306509613990784, "learning_rate": 1.453926793992279e-05, "loss": 0.7669079303741455, "step": 886 }, { "epoch": 1.1505363262161121, "grad_norm": 0.7278076410293579, "learning_rate": 1.4526521921932091e-05, "loss": 0.7629184722900391, "step": 887 }, { "epoch": 1.151834053166251, "grad_norm": 0.7405791878700256, "learning_rate": 1.4513766648349742e-05, "loss": 0.6739349961280823, "step": 888 }, { "epoch": 1.1531317801163898, "grad_norm": 0.7238565683364868, "learning_rate": 1.4501002145257048e-05, "loss": 0.7271534204483032, "step": 889 }, { "epoch": 1.1544295070665287, "grad_norm": 0.6887433528900146, "learning_rate": 1.4488228438754191e-05, "loss": 0.7166074514389038, "step": 890 }, { "epoch": 1.1557272340166678, "grad_norm": 0.7274357676506042, "learning_rate": 1.4475445554960166e-05, "loss": 0.7644513845443726, "step": 891 }, { "epoch": 1.1570249609668066, "grad_norm": 0.7332258224487305, "learning_rate": 1.4462653520012736e-05, "loss": 0.7806090116500854, "step": 892 }, { "epoch": 1.1583226879169455, "grad_norm": 0.7651371359825134, "learning_rate": 1.4449852360068372e-05, "loss": 0.774925947189331, "step": 893 }, { "epoch": 1.1596204148670843, "grad_norm": 0.718445897102356, "learning_rate": 1.4437042101302212e-05, "loss": 0.7388082146644592, "step": 894 }, { "epoch": 1.1609181418172232, "grad_norm": 0.7201905250549316, "learning_rate": 1.4424222769907985e-05, "loss": 0.6872411966323853, "step": 895 }, { "epoch": 1.1622158687673623, "grad_norm": 0.7322660088539124, "learning_rate": 1.4411394392097985e-05, "loss": 0.7020053267478943, "step": 896 }, { "epoch": 1.1635135957175011, "grad_norm": 0.7322126626968384, "learning_rate": 1.4398556994102996e-05, "loss": 0.746367335319519, "step": 897 }, { "epoch": 1.16481132266764, "grad_norm": 0.7316040992736816, "learning_rate": 1.4385710602172245e-05, "loss": 0.7530633807182312, "step": 898 }, { "epoch": 1.1661090496177788, "grad_norm": 0.7623510360717773, "learning_rate": 1.4372855242573356e-05, "loss": 0.7122158408164978, "step": 899 }, { "epoch": 1.1674067765679177, "grad_norm": 0.7587069869041443, "learning_rate": 1.4359990941592283e-05, "loss": 0.7452347278594971, "step": 900 }, { "epoch": 1.1687045035180565, "grad_norm": 0.7146732807159424, "learning_rate": 1.4347117725533269e-05, "loss": 0.670911431312561, "step": 901 }, { "epoch": 1.1700022304681956, "grad_norm": 0.6925002932548523, "learning_rate": 1.4334235620718774e-05, "loss": 0.6600379943847656, "step": 902 }, { "epoch": 1.1712999574183345, "grad_norm": 0.7344015836715698, "learning_rate": 1.4321344653489453e-05, "loss": 0.7038690447807312, "step": 903 }, { "epoch": 1.1725976843684733, "grad_norm": 0.7387973070144653, "learning_rate": 1.4308444850204066e-05, "loss": 0.7008363604545593, "step": 904 }, { "epoch": 1.1738954113186122, "grad_norm": 0.7728487849235535, "learning_rate": 1.4295536237239445e-05, "loss": 0.7336927652359009, "step": 905 }, { "epoch": 1.175193138268751, "grad_norm": 0.7491990923881531, "learning_rate": 1.4282618840990438e-05, "loss": 0.7324055433273315, "step": 906 }, { "epoch": 1.1764908652188901, "grad_norm": 0.723862886428833, "learning_rate": 1.4269692687869849e-05, "loss": 0.7677553296089172, "step": 907 }, { "epoch": 1.177788592169029, "grad_norm": 0.7578226923942566, "learning_rate": 1.425675780430839e-05, "loss": 0.7772313356399536, "step": 908 }, { "epoch": 1.1790863191191678, "grad_norm": 0.7269909977912903, "learning_rate": 1.4243814216754626e-05, "loss": 0.7330427765846252, "step": 909 }, { "epoch": 1.1803840460693067, "grad_norm": 0.7582956552505493, "learning_rate": 1.4230861951674914e-05, "loss": 0.7717634439468384, "step": 910 }, { "epoch": 1.1816817730194455, "grad_norm": 0.7162467837333679, "learning_rate": 1.421790103555336e-05, "loss": 0.7092885375022888, "step": 911 }, { "epoch": 1.1829794999695844, "grad_norm": 0.743224024772644, "learning_rate": 1.4204931494891759e-05, "loss": 0.7082977294921875, "step": 912 }, { "epoch": 1.1842772269197235, "grad_norm": 0.7687066197395325, "learning_rate": 1.4191953356209535e-05, "loss": 0.7173585295677185, "step": 913 }, { "epoch": 1.1855749538698623, "grad_norm": 0.7276656627655029, "learning_rate": 1.4178966646043702e-05, "loss": 0.6923103928565979, "step": 914 }, { "epoch": 1.1868726808200012, "grad_norm": 0.7307775020599365, "learning_rate": 1.4165971390948787e-05, "loss": 0.7817268967628479, "step": 915 }, { "epoch": 1.18817040777014, "grad_norm": 0.7706684470176697, "learning_rate": 1.4152967617496805e-05, "loss": 0.7029048800468445, "step": 916 }, { "epoch": 1.1894681347202791, "grad_norm": 0.7382630705833435, "learning_rate": 1.4139955352277176e-05, "loss": 0.6833078265190125, "step": 917 }, { "epoch": 1.190765861670418, "grad_norm": 0.6961492300033569, "learning_rate": 1.4126934621896692e-05, "loss": 0.6633516550064087, "step": 918 }, { "epoch": 1.1920635886205568, "grad_norm": 0.7289763689041138, "learning_rate": 1.4113905452979455e-05, "loss": 0.7273116707801819, "step": 919 }, { "epoch": 1.1933613155706957, "grad_norm": 0.6953696608543396, "learning_rate": 1.410086787216681e-05, "loss": 0.6880172491073608, "step": 920 }, { "epoch": 1.1933613155706957, "eval_loss": 0.7621704339981079, "eval_runtime": 143.9146, "eval_samples_per_second": 36.077, "eval_steps_per_second": 9.019, "step": 920 }, { "epoch": 1.1946590425208345, "grad_norm": 0.6652716398239136, "learning_rate": 1.4087821906117314e-05, "loss": 0.6670587658882141, "step": 921 }, { "epoch": 1.1959567694709734, "grad_norm": 0.7497081756591797, "learning_rate": 1.4074767581506666e-05, "loss": 0.7381057739257812, "step": 922 }, { "epoch": 1.1972544964211125, "grad_norm": 0.710457444190979, "learning_rate": 1.4061704925027653e-05, "loss": 0.6957287192344666, "step": 923 }, { "epoch": 1.1985522233712513, "grad_norm": 0.7493513226509094, "learning_rate": 1.4048633963390105e-05, "loss": 0.6821112036705017, "step": 924 }, { "epoch": 1.1998499503213902, "grad_norm": 0.7443753480911255, "learning_rate": 1.4035554723320828e-05, "loss": 0.7110794186592102, "step": 925 }, { "epoch": 1.201147677271529, "grad_norm": 0.6964433789253235, "learning_rate": 1.4022467231563554e-05, "loss": 0.6899577379226685, "step": 926 }, { "epoch": 1.202445404221668, "grad_norm": 0.718528687953949, "learning_rate": 1.4009371514878898e-05, "loss": 0.7851035594940186, "step": 927 }, { "epoch": 1.203743131171807, "grad_norm": 0.7249849438667297, "learning_rate": 1.399626760004428e-05, "loss": 0.7298780679702759, "step": 928 }, { "epoch": 1.2050408581219458, "grad_norm": 0.6934380531311035, "learning_rate": 1.3983155513853897e-05, "loss": 0.7791250944137573, "step": 929 }, { "epoch": 1.2063385850720847, "grad_norm": 0.704552173614502, "learning_rate": 1.3970035283118639e-05, "loss": 0.7045942544937134, "step": 930 }, { "epoch": 1.2076363120222235, "grad_norm": 0.748252809047699, "learning_rate": 1.3956906934666056e-05, "loss": 0.7210633158683777, "step": 931 }, { "epoch": 1.2089340389723624, "grad_norm": 0.7162604331970215, "learning_rate": 1.3943770495340307e-05, "loss": 0.7707422375679016, "step": 932 }, { "epoch": 1.2102317659225013, "grad_norm": 0.6919230222702026, "learning_rate": 1.3930625992002076e-05, "loss": 0.7039645910263062, "step": 933 }, { "epoch": 1.2115294928726403, "grad_norm": 0.7416049242019653, "learning_rate": 1.391747345152855e-05, "loss": 0.7351235747337341, "step": 934 }, { "epoch": 1.2128272198227792, "grad_norm": 0.7046512961387634, "learning_rate": 1.3904312900813345e-05, "loss": 0.659813642501831, "step": 935 }, { "epoch": 1.214124946772918, "grad_norm": 0.6865445971488953, "learning_rate": 1.3891144366766457e-05, "loss": 0.6879123449325562, "step": 936 }, { "epoch": 1.215422673723057, "grad_norm": 0.7112798094749451, "learning_rate": 1.3877967876314205e-05, "loss": 0.745692789554596, "step": 937 }, { "epoch": 1.216720400673196, "grad_norm": 0.7131559252738953, "learning_rate": 1.3864783456399174e-05, "loss": 0.7047199010848999, "step": 938 }, { "epoch": 1.2180181276233348, "grad_norm": 0.7183334231376648, "learning_rate": 1.3851591133980167e-05, "loss": 0.7335140109062195, "step": 939 }, { "epoch": 1.2193158545734737, "grad_norm": 0.7161308526992798, "learning_rate": 1.3838390936032146e-05, "loss": 0.6805643439292908, "step": 940 }, { "epoch": 1.2206135815236125, "grad_norm": 0.6899462938308716, "learning_rate": 1.3825182889546173e-05, "loss": 0.6711665391921997, "step": 941 }, { "epoch": 1.2219113084737514, "grad_norm": 0.7179728150367737, "learning_rate": 1.3811967021529362e-05, "loss": 0.730987012386322, "step": 942 }, { "epoch": 1.2232090354238903, "grad_norm": 0.7028578519821167, "learning_rate": 1.3798743359004816e-05, "loss": 0.7164129614830017, "step": 943 }, { "epoch": 1.2245067623740293, "grad_norm": 0.7241238355636597, "learning_rate": 1.378551192901158e-05, "loss": 0.6604956984519958, "step": 944 }, { "epoch": 1.2258044893241682, "grad_norm": 0.6871349215507507, "learning_rate": 1.3772272758604576e-05, "loss": 0.705906093120575, "step": 945 }, { "epoch": 1.227102216274307, "grad_norm": 0.7182629108428955, "learning_rate": 1.375902587485456e-05, "loss": 0.6978931427001953, "step": 946 }, { "epoch": 1.228399943224446, "grad_norm": 0.7523950934410095, "learning_rate": 1.3745771304848056e-05, "loss": 0.669691264629364, "step": 947 }, { "epoch": 1.2296976701745848, "grad_norm": 0.736535906791687, "learning_rate": 1.3732509075687302e-05, "loss": 0.6971163749694824, "step": 948 }, { "epoch": 1.2309953971247238, "grad_norm": 0.773280143737793, "learning_rate": 1.3719239214490203e-05, "loss": 0.7307339906692505, "step": 949 }, { "epoch": 1.2322931240748627, "grad_norm": 0.7597857713699341, "learning_rate": 1.3705961748390264e-05, "loss": 0.6916163563728333, "step": 950 }, { "epoch": 1.2335908510250015, "grad_norm": 0.7426233291625977, "learning_rate": 1.3692676704536547e-05, "loss": 0.7779046297073364, "step": 951 }, { "epoch": 1.2348885779751404, "grad_norm": 0.7428677082061768, "learning_rate": 1.3679384110093601e-05, "loss": 0.7056743502616882, "step": 952 }, { "epoch": 1.2361863049252793, "grad_norm": 0.7308823466300964, "learning_rate": 1.3666083992241414e-05, "loss": 0.7445065379142761, "step": 953 }, { "epoch": 1.2374840318754181, "grad_norm": 0.7000466585159302, "learning_rate": 1.3652776378175366e-05, "loss": 0.7621708512306213, "step": 954 }, { "epoch": 1.2387817588255572, "grad_norm": 0.7069138288497925, "learning_rate": 1.3639461295106157e-05, "loss": 0.6963789463043213, "step": 955 }, { "epoch": 1.240079485775696, "grad_norm": 0.7114101052284241, "learning_rate": 1.3626138770259765e-05, "loss": 0.6562871932983398, "step": 956 }, { "epoch": 1.241377212725835, "grad_norm": 0.7246086597442627, "learning_rate": 1.3612808830877377e-05, "loss": 0.6914277672767639, "step": 957 }, { "epoch": 1.2426749396759738, "grad_norm": 0.7212405800819397, "learning_rate": 1.3599471504215347e-05, "loss": 0.7332183122634888, "step": 958 }, { "epoch": 1.2439726666261126, "grad_norm": 0.725243866443634, "learning_rate": 1.358612681754513e-05, "loss": 0.7095848321914673, "step": 959 }, { "epoch": 1.2452703935762517, "grad_norm": 0.7690359354019165, "learning_rate": 1.357277479815324e-05, "loss": 0.7376914024353027, "step": 960 }, { "epoch": 1.2465681205263905, "grad_norm": 0.7036330699920654, "learning_rate": 1.355941547334117e-05, "loss": 0.6845636367797852, "step": 961 }, { "epoch": 1.2478658474765294, "grad_norm": 0.7338976860046387, "learning_rate": 1.3546048870425356e-05, "loss": 0.6979953050613403, "step": 962 }, { "epoch": 1.2491635744266683, "grad_norm": 0.7343106865882874, "learning_rate": 1.3532675016737127e-05, "loss": 0.7461492419242859, "step": 963 }, { "epoch": 1.250461301376807, "grad_norm": 0.7208863496780396, "learning_rate": 1.3519293939622622e-05, "loss": 0.8038127422332764, "step": 964 }, { "epoch": 1.251759028326946, "grad_norm": 0.7410427331924438, "learning_rate": 1.3505905666442757e-05, "loss": 0.7741251587867737, "step": 965 }, { "epoch": 1.253056755277085, "grad_norm": 0.711874783039093, "learning_rate": 1.3492510224573165e-05, "loss": 0.6908672451972961, "step": 966 }, { "epoch": 1.254354482227224, "grad_norm": 0.6897700428962708, "learning_rate": 1.3479107641404134e-05, "loss": 0.6856587529182434, "step": 967 }, { "epoch": 1.2556522091773628, "grad_norm": 0.6764082908630371, "learning_rate": 1.3465697944340552e-05, "loss": 0.6477972865104675, "step": 968 }, { "epoch": 1.2569499361275016, "grad_norm": 0.7004117965698242, "learning_rate": 1.3452281160801856e-05, "loss": 0.7135658264160156, "step": 969 }, { "epoch": 1.2582476630776407, "grad_norm": 0.7178849577903748, "learning_rate": 1.3438857318221974e-05, "loss": 0.7354244589805603, "step": 970 }, { "epoch": 1.2595453900277795, "grad_norm": 0.7121056318283081, "learning_rate": 1.3425426444049265e-05, "loss": 0.7121109962463379, "step": 971 }, { "epoch": 1.2608431169779184, "grad_norm": 0.8285553455352783, "learning_rate": 1.3411988565746467e-05, "loss": 0.7759053111076355, "step": 972 }, { "epoch": 1.2621408439280573, "grad_norm": 0.6977941989898682, "learning_rate": 1.3398543710790642e-05, "loss": 0.7189201712608337, "step": 973 }, { "epoch": 1.263438570878196, "grad_norm": 0.7547982931137085, "learning_rate": 1.3385091906673115e-05, "loss": 0.7352871298789978, "step": 974 }, { "epoch": 1.264736297828335, "grad_norm": 0.7178804278373718, "learning_rate": 1.3371633180899417e-05, "loss": 0.7920108437538147, "step": 975 }, { "epoch": 1.2660340247784738, "grad_norm": 0.7035505771636963, "learning_rate": 1.335816756098924e-05, "loss": 0.7362672090530396, "step": 976 }, { "epoch": 1.267331751728613, "grad_norm": 0.7581067681312561, "learning_rate": 1.3344695074476365e-05, "loss": 0.7702075839042664, "step": 977 }, { "epoch": 1.2686294786787518, "grad_norm": 0.7533540725708008, "learning_rate": 1.3331215748908622e-05, "loss": 0.7555018067359924, "step": 978 }, { "epoch": 1.2699272056288906, "grad_norm": 0.7056939601898193, "learning_rate": 1.3317729611847818e-05, "loss": 0.7297285795211792, "step": 979 }, { "epoch": 1.2712249325790295, "grad_norm": 0.7933931946754456, "learning_rate": 1.3304236690869688e-05, "loss": 0.7637395262718201, "step": 980 }, { "epoch": 1.2725226595291685, "grad_norm": 0.7511240243911743, "learning_rate": 1.329073701356384e-05, "loss": 0.7278518676757812, "step": 981 }, { "epoch": 1.2738203864793074, "grad_norm": 0.6915922164916992, "learning_rate": 1.3277230607533698e-05, "loss": 0.6694924831390381, "step": 982 }, { "epoch": 1.2751181134294463, "grad_norm": 0.7327374219894409, "learning_rate": 1.3263717500396446e-05, "loss": 0.714762806892395, "step": 983 }, { "epoch": 1.276415840379585, "grad_norm": 0.7382856607437134, "learning_rate": 1.3250197719782966e-05, "loss": 0.7134686708450317, "step": 984 }, { "epoch": 1.277713567329724, "grad_norm": 0.7472854256629944, "learning_rate": 1.3236671293337788e-05, "loss": 0.7220948934555054, "step": 985 }, { "epoch": 1.2790112942798628, "grad_norm": 0.7201051712036133, "learning_rate": 1.3223138248719032e-05, "loss": 0.7394418120384216, "step": 986 }, { "epoch": 1.280309021230002, "grad_norm": 0.7629786133766174, "learning_rate": 1.3209598613598344e-05, "loss": 0.7015069127082825, "step": 987 }, { "epoch": 1.2816067481801408, "grad_norm": 0.7126546502113342, "learning_rate": 1.3196052415660856e-05, "loss": 0.7289220690727234, "step": 988 }, { "epoch": 1.2829044751302796, "grad_norm": 0.7296859622001648, "learning_rate": 1.318249968260511e-05, "loss": 0.7893659472465515, "step": 989 }, { "epoch": 1.2842022020804185, "grad_norm": 0.7498401403427124, "learning_rate": 1.316894044214302e-05, "loss": 0.7200069427490234, "step": 990 }, { "epoch": 1.2854999290305575, "grad_norm": 0.7126410603523254, "learning_rate": 1.3155374721999797e-05, "loss": 0.7033067345619202, "step": 991 }, { "epoch": 1.2867976559806964, "grad_norm": 0.7097041606903076, "learning_rate": 1.3141802549913907e-05, "loss": 0.7358456254005432, "step": 992 }, { "epoch": 1.2880953829308353, "grad_norm": 0.6961123943328857, "learning_rate": 1.3128223953637003e-05, "loss": 0.6741704940795898, "step": 993 }, { "epoch": 1.289393109880974, "grad_norm": 0.7323908805847168, "learning_rate": 1.3114638960933883e-05, "loss": 0.8081434965133667, "step": 994 }, { "epoch": 1.290690836831113, "grad_norm": 0.713190495967865, "learning_rate": 1.3101047599582415e-05, "loss": 0.7475412487983704, "step": 995 }, { "epoch": 1.2919885637812518, "grad_norm": 0.7204756140708923, "learning_rate": 1.3087449897373494e-05, "loss": 0.7166237831115723, "step": 996 }, { "epoch": 1.2932862907313907, "grad_norm": 0.7209048271179199, "learning_rate": 1.307384588211098e-05, "loss": 0.7091537117958069, "step": 997 }, { "epoch": 1.2945840176815298, "grad_norm": 0.7139458656311035, "learning_rate": 1.306023558161164e-05, "loss": 0.7146654725074768, "step": 998 }, { "epoch": 1.2958817446316686, "grad_norm": 0.7128956317901611, "learning_rate": 1.3046619023705095e-05, "loss": 0.821353018283844, "step": 999 }, { "epoch": 1.2971794715818075, "grad_norm": 0.7287904620170593, "learning_rate": 1.3032996236233756e-05, "loss": 0.7813044786453247, "step": 1000 }, { "epoch": 1.2984771985319463, "grad_norm": 0.7277258038520813, "learning_rate": 1.3019367247052781e-05, "loss": 0.7448681592941284, "step": 1001 }, { "epoch": 1.2997749254820854, "grad_norm": 0.7179688811302185, "learning_rate": 1.300573208403e-05, "loss": 0.6965285539627075, "step": 1002 }, { "epoch": 1.3010726524322243, "grad_norm": 0.7211664319038391, "learning_rate": 1.2992090775045868e-05, "loss": 0.7049282789230347, "step": 1003 }, { "epoch": 1.302370379382363, "grad_norm": 0.6898071765899658, "learning_rate": 1.2978443347993415e-05, "loss": 0.6415733695030212, "step": 1004 }, { "epoch": 1.303668106332502, "grad_norm": 0.7255175709724426, "learning_rate": 1.296478983077817e-05, "loss": 0.708603024482727, "step": 1005 }, { "epoch": 1.3049658332826408, "grad_norm": 0.7339725494384766, "learning_rate": 1.2951130251318125e-05, "loss": 0.73588627576828, "step": 1006 }, { "epoch": 1.3062635602327797, "grad_norm": 0.6914424300193787, "learning_rate": 1.2937464637543655e-05, "loss": 0.7236727476119995, "step": 1007 }, { "epoch": 1.3075612871829188, "grad_norm": 0.6850101351737976, "learning_rate": 1.2923793017397488e-05, "loss": 0.6565558910369873, "step": 1008 }, { "epoch": 1.3088590141330576, "grad_norm": 0.6893193125724792, "learning_rate": 1.2910115418834624e-05, "loss": 0.6460487246513367, "step": 1009 }, { "epoch": 1.3101567410831965, "grad_norm": 0.7375558018684387, "learning_rate": 1.289643186982229e-05, "loss": 0.8016327619552612, "step": 1010 }, { "epoch": 1.3114544680333353, "grad_norm": 0.7113102078437805, "learning_rate": 1.2882742398339884e-05, "loss": 0.6883566975593567, "step": 1011 }, { "epoch": 1.3127521949834744, "grad_norm": 0.7452290058135986, "learning_rate": 1.2869047032378905e-05, "loss": 0.7325704097747803, "step": 1012 }, { "epoch": 1.3140499219336133, "grad_norm": 0.6935728192329407, "learning_rate": 1.2855345799942915e-05, "loss": 0.689193606376648, "step": 1013 }, { "epoch": 1.315347648883752, "grad_norm": 0.7144383192062378, "learning_rate": 1.2841638729047463e-05, "loss": 0.6948485374450684, "step": 1014 }, { "epoch": 1.316645375833891, "grad_norm": 0.6706473231315613, "learning_rate": 1.2827925847720041e-05, "loss": 0.7062092423439026, "step": 1015 }, { "epoch": 1.3179431027840298, "grad_norm": 0.7125740051269531, "learning_rate": 1.2814207184000018e-05, "loss": 0.6752945780754089, "step": 1016 }, { "epoch": 1.3192408297341687, "grad_norm": 0.7221876978874207, "learning_rate": 1.2800482765938594e-05, "loss": 0.7700286507606506, "step": 1017 }, { "epoch": 1.3205385566843075, "grad_norm": 0.6877630949020386, "learning_rate": 1.2786752621598726e-05, "loss": 0.7289664149284363, "step": 1018 }, { "epoch": 1.3218362836344466, "grad_norm": 0.7257193922996521, "learning_rate": 1.2773016779055089e-05, "loss": 0.6938936710357666, "step": 1019 }, { "epoch": 1.3231340105845855, "grad_norm": 0.6880965828895569, "learning_rate": 1.2759275266393998e-05, "loss": 0.6982592344284058, "step": 1020 }, { "epoch": 1.3244317375347243, "grad_norm": 0.683870792388916, "learning_rate": 1.2745528111713373e-05, "loss": 0.6983235478401184, "step": 1021 }, { "epoch": 1.3257294644848632, "grad_norm": 0.7127654552459717, "learning_rate": 1.2731775343122663e-05, "loss": 0.7544030547142029, "step": 1022 }, { "epoch": 1.3270271914350023, "grad_norm": 0.7284364104270935, "learning_rate": 1.2718016988742799e-05, "loss": 0.7375183701515198, "step": 1023 }, { "epoch": 1.328324918385141, "grad_norm": 0.6857113838195801, "learning_rate": 1.270425307670614e-05, "loss": 0.6983596682548523, "step": 1024 }, { "epoch": 1.32962264533528, "grad_norm": 0.7102038860321045, "learning_rate": 1.2690483635156392e-05, "loss": 0.7385768294334412, "step": 1025 }, { "epoch": 1.3309203722854188, "grad_norm": 0.7345147728919983, "learning_rate": 1.2676708692248583e-05, "loss": 0.6854493618011475, "step": 1026 }, { "epoch": 1.3322180992355577, "grad_norm": 0.7039386630058289, "learning_rate": 1.2662928276148985e-05, "loss": 0.7170513868331909, "step": 1027 }, { "epoch": 1.3335158261856965, "grad_norm": 0.6941388845443726, "learning_rate": 1.264914241503506e-05, "loss": 0.7566976547241211, "step": 1028 }, { "epoch": 1.3348135531358354, "grad_norm": 0.6874922513961792, "learning_rate": 1.2635351137095408e-05, "loss": 0.6834582686424255, "step": 1029 }, { "epoch": 1.3361112800859745, "grad_norm": 0.7201216220855713, "learning_rate": 1.2621554470529698e-05, "loss": 0.734821617603302, "step": 1030 }, { "epoch": 1.3374090070361133, "grad_norm": 0.7032731175422668, "learning_rate": 1.2607752443548622e-05, "loss": 0.7255396842956543, "step": 1031 }, { "epoch": 1.3387067339862522, "grad_norm": 0.7893847823143005, "learning_rate": 1.259394508437383e-05, "loss": 0.7393696308135986, "step": 1032 }, { "epoch": 1.340004460936391, "grad_norm": 0.7231351137161255, "learning_rate": 1.2580132421237883e-05, "loss": 0.7424145340919495, "step": 1033 }, { "epoch": 1.34130218788653, "grad_norm": 0.7326940298080444, "learning_rate": 1.2566314482384174e-05, "loss": 0.7439311742782593, "step": 1034 }, { "epoch": 1.342599914836669, "grad_norm": 0.775790810585022, "learning_rate": 1.2552491296066895e-05, "loss": 0.7325758934020996, "step": 1035 }, { "epoch": 1.3438976417868078, "grad_norm": 0.7467171549797058, "learning_rate": 1.2538662890550959e-05, "loss": 0.7975653409957886, "step": 1036 }, { "epoch": 1.3451953687369467, "grad_norm": 0.762482225894928, "learning_rate": 1.252482929411196e-05, "loss": 0.7613498568534851, "step": 1037 }, { "epoch": 1.3464930956870855, "grad_norm": 0.6938416957855225, "learning_rate": 1.25109905350361e-05, "loss": 0.691423773765564, "step": 1038 }, { "epoch": 1.3477908226372244, "grad_norm": 0.7459502816200256, "learning_rate": 1.249714664162014e-05, "loss": 0.7226969003677368, "step": 1039 }, { "epoch": 1.3490885495873635, "grad_norm": 0.7236127853393555, "learning_rate": 1.2483297642171332e-05, "loss": 0.7204033732414246, "step": 1040 }, { "epoch": 1.3503862765375023, "grad_norm": 0.7287815809249878, "learning_rate": 1.246944356500738e-05, "loss": 0.7803208231925964, "step": 1041 }, { "epoch": 1.3516840034876412, "grad_norm": 0.7607238292694092, "learning_rate": 1.2455584438456366e-05, "loss": 0.7617399096488953, "step": 1042 }, { "epoch": 1.35298173043778, "grad_norm": 0.707085907459259, "learning_rate": 1.2441720290856694e-05, "loss": 0.7277243733406067, "step": 1043 }, { "epoch": 1.354279457387919, "grad_norm": 0.7148833274841309, "learning_rate": 1.2427851150557036e-05, "loss": 0.7467551231384277, "step": 1044 }, { "epoch": 1.355577184338058, "grad_norm": 0.7209689617156982, "learning_rate": 1.241397704591627e-05, "loss": 0.6694290637969971, "step": 1045 }, { "epoch": 1.3568749112881968, "grad_norm": 0.7720620036125183, "learning_rate": 1.2400098005303436e-05, "loss": 0.7658464312553406, "step": 1046 }, { "epoch": 1.3581726382383357, "grad_norm": 0.68074631690979, "learning_rate": 1.238621405709766e-05, "loss": 0.6357854008674622, "step": 1047 }, { "epoch": 1.3594703651884745, "grad_norm": 0.7629329562187195, "learning_rate": 1.2372325229688093e-05, "loss": 0.7309067249298096, "step": 1048 }, { "epoch": 1.3607680921386134, "grad_norm": 0.7004507184028625, "learning_rate": 1.235843155147388e-05, "loss": 0.6715525388717651, "step": 1049 }, { "epoch": 1.3620658190887522, "grad_norm": 0.6997591853141785, "learning_rate": 1.2344533050864071e-05, "loss": 0.6700186729431152, "step": 1050 }, { "epoch": 1.3633635460388913, "grad_norm": 0.7181966304779053, "learning_rate": 1.2330629756277588e-05, "loss": 0.6444705724716187, "step": 1051 }, { "epoch": 1.3646612729890302, "grad_norm": 0.780085563659668, "learning_rate": 1.2316721696143141e-05, "loss": 0.7659810185432434, "step": 1052 }, { "epoch": 1.365958999939169, "grad_norm": 0.690724790096283, "learning_rate": 1.23028088988992e-05, "loss": 0.6315090656280518, "step": 1053 }, { "epoch": 1.3672567268893079, "grad_norm": 0.7686077356338501, "learning_rate": 1.228889139299391e-05, "loss": 0.8060528039932251, "step": 1054 }, { "epoch": 1.368554453839447, "grad_norm": 0.7056965827941895, "learning_rate": 1.2274969206885048e-05, "loss": 0.6794640421867371, "step": 1055 }, { "epoch": 1.3698521807895858, "grad_norm": 0.7886383533477783, "learning_rate": 1.2261042369039966e-05, "loss": 0.7453962564468384, "step": 1056 }, { "epoch": 1.3711499077397247, "grad_norm": 0.6753075122833252, "learning_rate": 1.2247110907935518e-05, "loss": 0.6878754496574402, "step": 1057 }, { "epoch": 1.3724476346898635, "grad_norm": 0.670427143573761, "learning_rate": 1.2233174852058015e-05, "loss": 0.6822103261947632, "step": 1058 }, { "epoch": 1.3737453616400024, "grad_norm": 0.725235641002655, "learning_rate": 1.2219234229903163e-05, "loss": 0.7130811810493469, "step": 1059 }, { "epoch": 1.3750430885901412, "grad_norm": 0.7341755032539368, "learning_rate": 1.2205289069976012e-05, "loss": 0.6956161856651306, "step": 1060 }, { "epoch": 1.37634081554028, "grad_norm": 0.7005776166915894, "learning_rate": 1.2191339400790881e-05, "loss": 0.6915519833564758, "step": 1061 }, { "epoch": 1.3776385424904192, "grad_norm": 0.7250275015830994, "learning_rate": 1.2177385250871312e-05, "loss": 0.7210217118263245, "step": 1062 }, { "epoch": 1.378936269440558, "grad_norm": 0.7169617414474487, "learning_rate": 1.2163426648750009e-05, "loss": 0.7050390839576721, "step": 1063 }, { "epoch": 1.3802339963906969, "grad_norm": 0.7458826303482056, "learning_rate": 1.2149463622968782e-05, "loss": 0.7116800546646118, "step": 1064 }, { "epoch": 1.3815317233408357, "grad_norm": 0.7212430834770203, "learning_rate": 1.2135496202078487e-05, "loss": 0.658031165599823, "step": 1065 }, { "epoch": 1.3828294502909748, "grad_norm": 0.7072278261184692, "learning_rate": 1.2121524414638958e-05, "loss": 0.7117524147033691, "step": 1066 }, { "epoch": 1.3841271772411137, "grad_norm": 0.7267945408821106, "learning_rate": 1.2107548289218968e-05, "loss": 0.690047025680542, "step": 1067 }, { "epoch": 1.3854249041912525, "grad_norm": 0.7326766848564148, "learning_rate": 1.2093567854396158e-05, "loss": 0.7240371704101562, "step": 1068 }, { "epoch": 1.3867226311413914, "grad_norm": 0.6955649256706238, "learning_rate": 1.2079583138756976e-05, "loss": 0.7229723334312439, "step": 1069 }, { "epoch": 1.3880203580915302, "grad_norm": 0.6991240978240967, "learning_rate": 1.206559417089663e-05, "loss": 0.7131638526916504, "step": 1070 }, { "epoch": 1.389318085041669, "grad_norm": 0.7009238600730896, "learning_rate": 1.205160097941901e-05, "loss": 0.7577610611915588, "step": 1071 }, { "epoch": 1.3906158119918082, "grad_norm": 0.7368999719619751, "learning_rate": 1.2037603592936656e-05, "loss": 0.7876178026199341, "step": 1072 }, { "epoch": 1.391913538941947, "grad_norm": 0.7627021670341492, "learning_rate": 1.2023602040070679e-05, "loss": 0.8456990718841553, "step": 1073 }, { "epoch": 1.3932112658920859, "grad_norm": 0.7341564893722534, "learning_rate": 1.2009596349450717e-05, "loss": 0.7692890167236328, "step": 1074 }, { "epoch": 1.3945089928422247, "grad_norm": 0.706305205821991, "learning_rate": 1.1995586549714855e-05, "loss": 0.7290987372398376, "step": 1075 }, { "epoch": 1.3958067197923638, "grad_norm": 0.7150030136108398, "learning_rate": 1.198157266950959e-05, "loss": 0.7904977202415466, "step": 1076 }, { "epoch": 1.3971044467425027, "grad_norm": 0.6936087608337402, "learning_rate": 1.1967554737489762e-05, "loss": 0.7233096361160278, "step": 1077 }, { "epoch": 1.3984021736926415, "grad_norm": 0.705502450466156, "learning_rate": 1.1953532782318491e-05, "loss": 0.6974169015884399, "step": 1078 }, { "epoch": 1.3996999006427804, "grad_norm": 0.7046432495117188, "learning_rate": 1.1939506832667129e-05, "loss": 0.7049128413200378, "step": 1079 }, { "epoch": 1.4009976275929192, "grad_norm": 0.7448377013206482, "learning_rate": 1.1925476917215191e-05, "loss": 0.7288391590118408, "step": 1080 }, { "epoch": 1.402295354543058, "grad_norm": 0.7215666174888611, "learning_rate": 1.1911443064650301e-05, "loss": 0.7517431974411011, "step": 1081 }, { "epoch": 1.403593081493197, "grad_norm": 0.7152860164642334, "learning_rate": 1.189740530366814e-05, "loss": 0.7353943586349487, "step": 1082 }, { "epoch": 1.404890808443336, "grad_norm": 0.7322341203689575, "learning_rate": 1.1883363662972375e-05, "loss": 0.7282765507698059, "step": 1083 }, { "epoch": 1.4061885353934749, "grad_norm": 0.7007766962051392, "learning_rate": 1.1869318171274606e-05, "loss": 0.6773781776428223, "step": 1084 }, { "epoch": 1.4074862623436137, "grad_norm": 0.6969038248062134, "learning_rate": 1.1855268857294308e-05, "loss": 0.7106554508209229, "step": 1085 }, { "epoch": 1.4087839892937526, "grad_norm": 0.7315483093261719, "learning_rate": 1.1841215749758774e-05, "loss": 0.7127244472503662, "step": 1086 }, { "epoch": 1.4100817162438917, "grad_norm": 0.7427330613136292, "learning_rate": 1.182715887740305e-05, "loss": 0.7914733290672302, "step": 1087 }, { "epoch": 1.4113794431940305, "grad_norm": 0.7135612964630127, "learning_rate": 1.1813098268969886e-05, "loss": 0.7351382374763489, "step": 1088 }, { "epoch": 1.4126771701441694, "grad_norm": 0.6763968467712402, "learning_rate": 1.1799033953209664e-05, "loss": 0.7243238687515259, "step": 1089 }, { "epoch": 1.4139748970943082, "grad_norm": 0.6963580250740051, "learning_rate": 1.178496595888035e-05, "loss": 0.718358039855957, "step": 1090 }, { "epoch": 1.415272624044447, "grad_norm": 0.7186612486839294, "learning_rate": 1.1770894314747433e-05, "loss": 0.7567769885063171, "step": 1091 }, { "epoch": 1.416570350994586, "grad_norm": 0.7769639492034912, "learning_rate": 1.1756819049583861e-05, "loss": 0.6931068301200867, "step": 1092 }, { "epoch": 1.417868077944725, "grad_norm": 0.6902489066123962, "learning_rate": 1.1742740192169995e-05, "loss": 0.7427462339401245, "step": 1093 }, { "epoch": 1.4191658048948639, "grad_norm": 0.7374582886695862, "learning_rate": 1.1728657771293529e-05, "loss": 0.7023187279701233, "step": 1094 }, { "epoch": 1.4204635318450027, "grad_norm": 0.7119615077972412, "learning_rate": 1.171457181574945e-05, "loss": 0.7274259328842163, "step": 1095 }, { "epoch": 1.4217612587951416, "grad_norm": 0.7346155047416687, "learning_rate": 1.1700482354339972e-05, "loss": 0.7683991193771362, "step": 1096 }, { "epoch": 1.4230589857452807, "grad_norm": 0.7501071095466614, "learning_rate": 1.168638941587448e-05, "loss": 0.7191241979598999, "step": 1097 }, { "epoch": 1.4243567126954195, "grad_norm": 0.7470526695251465, "learning_rate": 1.1672293029169466e-05, "loss": 0.6885469555854797, "step": 1098 }, { "epoch": 1.4256544396455584, "grad_norm": 0.7323938608169556, "learning_rate": 1.165819322304847e-05, "loss": 0.7280178666114807, "step": 1099 }, { "epoch": 1.4269521665956972, "grad_norm": 0.735260546207428, "learning_rate": 1.164409002634203e-05, "loss": 0.7417027354240417, "step": 1100 }, { "epoch": 1.428249893545836, "grad_norm": 0.6863338351249695, "learning_rate": 1.162998346788761e-05, "loss": 0.7153418660163879, "step": 1101 }, { "epoch": 1.429547620495975, "grad_norm": 0.6918323636054993, "learning_rate": 1.1615873576529556e-05, "loss": 0.7203163504600525, "step": 1102 }, { "epoch": 1.4308453474461138, "grad_norm": 0.6796247363090515, "learning_rate": 1.1601760381119022e-05, "loss": 0.6820694208145142, "step": 1103 }, { "epoch": 1.4321430743962529, "grad_norm": 0.7495130896568298, "learning_rate": 1.158764391051392e-05, "loss": 0.8182595372200012, "step": 1104 }, { "epoch": 1.4334408013463917, "grad_norm": 0.702680766582489, "learning_rate": 1.1573524193578863e-05, "loss": 0.6952674984931946, "step": 1105 }, { "epoch": 1.4347385282965306, "grad_norm": 0.7394551634788513, "learning_rate": 1.1559401259185095e-05, "loss": 0.7986393570899963, "step": 1106 }, { "epoch": 1.4360362552466694, "grad_norm": 0.7024036049842834, "learning_rate": 1.1545275136210441e-05, "loss": 0.7037473917007446, "step": 1107 }, { "epoch": 1.4373339821968085, "grad_norm": 0.7654225826263428, "learning_rate": 1.153114585353925e-05, "loss": 0.788162350654602, "step": 1108 }, { "epoch": 1.4386317091469474, "grad_norm": 0.7220718264579773, "learning_rate": 1.1517013440062326e-05, "loss": 0.677041232585907, "step": 1109 }, { "epoch": 1.4399294360970862, "grad_norm": 0.636647641658783, "learning_rate": 1.1502877924676881e-05, "loss": 0.6478151679039001, "step": 1110 }, { "epoch": 1.441227163047225, "grad_norm": 0.7449962496757507, "learning_rate": 1.1488739336286467e-05, "loss": 0.7527351975440979, "step": 1111 }, { "epoch": 1.442524889997364, "grad_norm": 0.6970670819282532, "learning_rate": 1.1474597703800915e-05, "loss": 0.7169626951217651, "step": 1112 }, { "epoch": 1.4438226169475028, "grad_norm": 0.7441650032997131, "learning_rate": 1.1460453056136285e-05, "loss": 0.750106930732727, "step": 1113 }, { "epoch": 1.4451203438976417, "grad_norm": 0.7144120335578918, "learning_rate": 1.14463054222148e-05, "loss": 0.7835033535957336, "step": 1114 }, { "epoch": 1.4464180708477807, "grad_norm": 0.7178052663803101, "learning_rate": 1.1432154830964796e-05, "loss": 0.755246639251709, "step": 1115 }, { "epoch": 1.4477157977979196, "grad_norm": 0.7312644720077515, "learning_rate": 1.1418001311320649e-05, "loss": 0.7156558632850647, "step": 1116 }, { "epoch": 1.4490135247480584, "grad_norm": 0.6545835137367249, "learning_rate": 1.1403844892222717e-05, "loss": 0.6448360085487366, "step": 1117 }, { "epoch": 1.4503112516981973, "grad_norm": 0.7543350458145142, "learning_rate": 1.1389685602617302e-05, "loss": 0.7119275331497192, "step": 1118 }, { "epoch": 1.4516089786483364, "grad_norm": 0.6919403672218323, "learning_rate": 1.1375523471456564e-05, "loss": 0.6998506188392639, "step": 1119 }, { "epoch": 1.4529067055984752, "grad_norm": 0.7320676445960999, "learning_rate": 1.1361358527698481e-05, "loss": 0.7184922099113464, "step": 1120 }, { "epoch": 1.454204432548614, "grad_norm": 0.672732949256897, "learning_rate": 1.134719080030677e-05, "loss": 0.6867491006851196, "step": 1121 }, { "epoch": 1.455502159498753, "grad_norm": 0.6875948309898376, "learning_rate": 1.1333020318250854e-05, "loss": 0.7337048053741455, "step": 1122 }, { "epoch": 1.4567998864488918, "grad_norm": 0.6922927498817444, "learning_rate": 1.131884711050578e-05, "loss": 0.6915356516838074, "step": 1123 }, { "epoch": 1.4580976133990307, "grad_norm": 0.6755322217941284, "learning_rate": 1.1304671206052168e-05, "loss": 0.6491101980209351, "step": 1124 }, { "epoch": 1.4593953403491697, "grad_norm": 0.698635995388031, "learning_rate": 1.1290492633876164e-05, "loss": 0.7431061267852783, "step": 1125 }, { "epoch": 1.4606930672993086, "grad_norm": 0.6657348871231079, "learning_rate": 1.1276311422969349e-05, "loss": 0.7039294838905334, "step": 1126 }, { "epoch": 1.4619907942494474, "grad_norm": 0.7172051072120667, "learning_rate": 1.1262127602328712e-05, "loss": 0.7308294773101807, "step": 1127 }, { "epoch": 1.4632885211995863, "grad_norm": 0.6960781812667847, "learning_rate": 1.124794120095658e-05, "loss": 0.693443238735199, "step": 1128 }, { "epoch": 1.4645862481497254, "grad_norm": 0.759774386882782, "learning_rate": 1.1233752247860549e-05, "loss": 0.7438464760780334, "step": 1129 }, { "epoch": 1.4658839750998642, "grad_norm": 0.7278202772140503, "learning_rate": 1.1219560772053442e-05, "loss": 0.7231059074401855, "step": 1130 }, { "epoch": 1.467181702050003, "grad_norm": 0.7277034521102905, "learning_rate": 1.1205366802553231e-05, "loss": 0.6796480417251587, "step": 1131 }, { "epoch": 1.468479429000142, "grad_norm": 0.7773372530937195, "learning_rate": 1.1191170368382992e-05, "loss": 0.7957556247711182, "step": 1132 }, { "epoch": 1.4697771559502808, "grad_norm": 0.7063891887664795, "learning_rate": 1.117697149857084e-05, "loss": 0.7295725345611572, "step": 1133 }, { "epoch": 1.4710748829004197, "grad_norm": 0.7076992988586426, "learning_rate": 1.1162770222149873e-05, "loss": 0.7353643178939819, "step": 1134 }, { "epoch": 1.4723726098505585, "grad_norm": 0.7097960710525513, "learning_rate": 1.1148566568158099e-05, "loss": 0.6855234503746033, "step": 1135 }, { "epoch": 1.4736703368006976, "grad_norm": 0.7133991122245789, "learning_rate": 1.1134360565638402e-05, "loss": 0.7381144762039185, "step": 1136 }, { "epoch": 1.4749680637508364, "grad_norm": 0.6666829586029053, "learning_rate": 1.1120152243638457e-05, "loss": 0.7571398019790649, "step": 1137 }, { "epoch": 1.4762657907009753, "grad_norm": 0.7138345837593079, "learning_rate": 1.1105941631210694e-05, "loss": 0.7363887429237366, "step": 1138 }, { "epoch": 1.4775635176511142, "grad_norm": 0.6881229877471924, "learning_rate": 1.1091728757412212e-05, "loss": 0.6838353276252747, "step": 1139 }, { "epoch": 1.4788612446012532, "grad_norm": 0.6954206824302673, "learning_rate": 1.107751365130474e-05, "loss": 0.6892279386520386, "step": 1140 }, { "epoch": 1.480158971551392, "grad_norm": 0.7325204014778137, "learning_rate": 1.1063296341954577e-05, "loss": 0.7068898677825928, "step": 1141 }, { "epoch": 1.481456698501531, "grad_norm": 0.7389767169952393, "learning_rate": 1.1049076858432517e-05, "loss": 0.7737511396408081, "step": 1142 }, { "epoch": 1.4827544254516698, "grad_norm": 0.7286487817764282, "learning_rate": 1.1034855229813812e-05, "loss": 0.7521780729293823, "step": 1143 }, { "epoch": 1.4840521524018087, "grad_norm": 0.7211914658546448, "learning_rate": 1.1020631485178084e-05, "loss": 0.7648857831954956, "step": 1144 }, { "epoch": 1.4853498793519475, "grad_norm": 0.6989269852638245, "learning_rate": 1.1006405653609295e-05, "loss": 0.7818325161933899, "step": 1145 }, { "epoch": 1.4866476063020864, "grad_norm": 0.7269567251205444, "learning_rate": 1.0992177764195671e-05, "loss": 0.7369544506072998, "step": 1146 }, { "epoch": 1.4879453332522254, "grad_norm": 0.7193188071250916, "learning_rate": 1.0977947846029642e-05, "loss": 0.7326228022575378, "step": 1147 }, { "epoch": 1.4892430602023643, "grad_norm": 0.6688587665557861, "learning_rate": 1.0963715928207795e-05, "loss": 0.6900015473365784, "step": 1148 }, { "epoch": 1.4905407871525032, "grad_norm": 0.7130873203277588, "learning_rate": 1.094948203983079e-05, "loss": 0.7647519707679749, "step": 1149 }, { "epoch": 1.491838514102642, "grad_norm": 0.7038359642028809, "learning_rate": 1.0935246210003334e-05, "loss": 0.7078969478607178, "step": 1150 }, { "epoch": 1.491838514102642, "eval_loss": 0.7540779113769531, "eval_runtime": 144.3473, "eval_samples_per_second": 35.969, "eval_steps_per_second": 8.992, "step": 1150 }, { "epoch": 1.493136241052781, "grad_norm": 0.7373347878456116, "learning_rate": 1.0921008467834094e-05, "loss": 0.7495899200439453, "step": 1151 }, { "epoch": 1.49443396800292, "grad_norm": 0.7285864949226379, "learning_rate": 1.0906768842435647e-05, "loss": 0.7451608777046204, "step": 1152 }, { "epoch": 1.4957316949530588, "grad_norm": 0.7112108469009399, "learning_rate": 1.0892527362924426e-05, "loss": 0.6732929944992065, "step": 1153 }, { "epoch": 1.4970294219031977, "grad_norm": 0.7155210971832275, "learning_rate": 1.0878284058420647e-05, "loss": 0.7473354339599609, "step": 1154 }, { "epoch": 1.4983271488533365, "grad_norm": 0.7318425178527832, "learning_rate": 1.0864038958048267e-05, "loss": 0.6648399829864502, "step": 1155 }, { "epoch": 1.4996248758034754, "grad_norm": 0.6885069012641907, "learning_rate": 1.084979209093491e-05, "loss": 0.7034338712692261, "step": 1156 }, { "epoch": 1.5009226027536142, "grad_norm": 0.7019109129905701, "learning_rate": 1.0835543486211815e-05, "loss": 0.7674492001533508, "step": 1157 }, { "epoch": 1.5022203297037533, "grad_norm": 0.7262328267097473, "learning_rate": 1.0821293173013769e-05, "loss": 0.7348574995994568, "step": 1158 }, { "epoch": 1.5035180566538922, "grad_norm": 0.6678932905197144, "learning_rate": 1.0807041180479054e-05, "loss": 0.6102491617202759, "step": 1159 }, { "epoch": 1.504815783604031, "grad_norm": 0.7315651178359985, "learning_rate": 1.0792787537749392e-05, "loss": 0.7893344163894653, "step": 1160 }, { "epoch": 1.50611351055417, "grad_norm": 0.7274885773658752, "learning_rate": 1.0778532273969877e-05, "loss": 0.6995629072189331, "step": 1161 }, { "epoch": 1.507411237504309, "grad_norm": 0.6988937258720398, "learning_rate": 1.0764275418288908e-05, "loss": 0.753483772277832, "step": 1162 }, { "epoch": 1.5087089644544478, "grad_norm": 0.714952290058136, "learning_rate": 1.0750016999858151e-05, "loss": 0.7254124283790588, "step": 1163 }, { "epoch": 1.5100066914045867, "grad_norm": 0.7081964015960693, "learning_rate": 1.0735757047832461e-05, "loss": 0.7344964146614075, "step": 1164 }, { "epoch": 1.5113044183547255, "grad_norm": 0.6843774914741516, "learning_rate": 1.0721495591369832e-05, "loss": 0.6407060623168945, "step": 1165 }, { "epoch": 1.5126021453048644, "grad_norm": 0.7179701924324036, "learning_rate": 1.0707232659631333e-05, "loss": 0.7781057357788086, "step": 1166 }, { "epoch": 1.5138998722550032, "grad_norm": 0.7363991141319275, "learning_rate": 1.0692968281781046e-05, "loss": 0.6866899132728577, "step": 1167 }, { "epoch": 1.5151975992051423, "grad_norm": 0.6679601669311523, "learning_rate": 1.0678702486986016e-05, "loss": 0.6717002391815186, "step": 1168 }, { "epoch": 1.5164953261552812, "grad_norm": 0.6931522488594055, "learning_rate": 1.0664435304416185e-05, "loss": 0.6953310966491699, "step": 1169 }, { "epoch": 1.51779305310542, "grad_norm": 0.738691508769989, "learning_rate": 1.065016676324433e-05, "loss": 0.7797529101371765, "step": 1170 }, { "epoch": 1.519090780055559, "grad_norm": 0.6795670390129089, "learning_rate": 1.0635896892645998e-05, "loss": 0.652160108089447, "step": 1171 }, { "epoch": 1.520388507005698, "grad_norm": 0.7034809589385986, "learning_rate": 1.0621625721799473e-05, "loss": 0.7155415415763855, "step": 1172 }, { "epoch": 1.5216862339558368, "grad_norm": 0.7075764536857605, "learning_rate": 1.0607353279885682e-05, "loss": 0.6893566846847534, "step": 1173 }, { "epoch": 1.5229839609059757, "grad_norm": 0.696140468120575, "learning_rate": 1.0593079596088155e-05, "loss": 0.6836467981338501, "step": 1174 }, { "epoch": 1.5242816878561145, "grad_norm": 0.7141397595405579, "learning_rate": 1.0578804699592968e-05, "loss": 0.7246308326721191, "step": 1175 }, { "epoch": 1.5255794148062534, "grad_norm": 0.6880807280540466, "learning_rate": 1.0564528619588668e-05, "loss": 0.6564866304397583, "step": 1176 }, { "epoch": 1.5268771417563922, "grad_norm": 0.6661361455917358, "learning_rate": 1.0550251385266223e-05, "loss": 0.6993754506111145, "step": 1177 }, { "epoch": 1.528174868706531, "grad_norm": 0.7442536950111389, "learning_rate": 1.0535973025818969e-05, "loss": 0.7055092453956604, "step": 1178 }, { "epoch": 1.5294725956566702, "grad_norm": 0.7330362200737, "learning_rate": 1.0521693570442533e-05, "loss": 0.7582162022590637, "step": 1179 }, { "epoch": 1.530770322606809, "grad_norm": 0.722161591053009, "learning_rate": 1.050741304833479e-05, "loss": 0.7415435314178467, "step": 1180 }, { "epoch": 1.5320680495569479, "grad_norm": 0.69851154088974, "learning_rate": 1.0493131488695789e-05, "loss": 0.6807332038879395, "step": 1181 }, { "epoch": 1.533365776507087, "grad_norm": 0.7059313654899597, "learning_rate": 1.0478848920727707e-05, "loss": 0.7028640508651733, "step": 1182 }, { "epoch": 1.5346635034572258, "grad_norm": 0.6546805500984192, "learning_rate": 1.0464565373634784e-05, "loss": 0.6459164619445801, "step": 1183 }, { "epoch": 1.5359612304073647, "grad_norm": 0.6890950202941895, "learning_rate": 1.0450280876623253e-05, "loss": 0.7195508480072021, "step": 1184 }, { "epoch": 1.5372589573575035, "grad_norm": 0.6886339783668518, "learning_rate": 1.0435995458901298e-05, "loss": 0.7041788697242737, "step": 1185 }, { "epoch": 1.5385566843076424, "grad_norm": 0.7007988095283508, "learning_rate": 1.042170914967898e-05, "loss": 0.6726493835449219, "step": 1186 }, { "epoch": 1.5398544112577812, "grad_norm": 0.7152829766273499, "learning_rate": 1.0407421978168186e-05, "loss": 0.7684251666069031, "step": 1187 }, { "epoch": 1.54115213820792, "grad_norm": 0.7171955108642578, "learning_rate": 1.0393133973582572e-05, "loss": 0.7586410045623779, "step": 1188 }, { "epoch": 1.542449865158059, "grad_norm": 0.7011827230453491, "learning_rate": 1.0378845165137483e-05, "loss": 0.6837091445922852, "step": 1189 }, { "epoch": 1.543747592108198, "grad_norm": 0.7295593619346619, "learning_rate": 1.0364555582049917e-05, "loss": 0.7210373282432556, "step": 1190 }, { "epoch": 1.5450453190583369, "grad_norm": 0.7250920534133911, "learning_rate": 1.0350265253538458e-05, "loss": 0.7209242582321167, "step": 1191 }, { "epoch": 1.546343046008476, "grad_norm": 0.7172147631645203, "learning_rate": 1.033597420882321e-05, "loss": 0.771355390548706, "step": 1192 }, { "epoch": 1.5476407729586148, "grad_norm": 0.7234722375869751, "learning_rate": 1.0321682477125743e-05, "loss": 0.7173848152160645, "step": 1193 }, { "epoch": 1.5489384999087537, "grad_norm": 0.7182676792144775, "learning_rate": 1.0307390087669026e-05, "loss": 0.6971171498298645, "step": 1194 }, { "epoch": 1.5502362268588925, "grad_norm": 0.711088240146637, "learning_rate": 1.0293097069677382e-05, "loss": 0.7250340580940247, "step": 1195 }, { "epoch": 1.5515339538090314, "grad_norm": 0.7057585120201111, "learning_rate": 1.0278803452376416e-05, "loss": 0.6538138389587402, "step": 1196 }, { "epoch": 1.5528316807591702, "grad_norm": 0.7198209166526794, "learning_rate": 1.0264509264992954e-05, "loss": 0.7397878170013428, "step": 1197 }, { "epoch": 1.554129407709309, "grad_norm": 0.7141586542129517, "learning_rate": 1.0250214536754996e-05, "loss": 0.7416911125183105, "step": 1198 }, { "epoch": 1.555427134659448, "grad_norm": 0.6700720191001892, "learning_rate": 1.0235919296891641e-05, "loss": 0.6646735072135925, "step": 1199 }, { "epoch": 1.556724861609587, "grad_norm": 0.7620872855186462, "learning_rate": 1.0221623574633035e-05, "loss": 0.7746062874794006, "step": 1200 }, { "epoch": 1.5580225885597259, "grad_norm": 0.7291470766067505, "learning_rate": 1.0207327399210311e-05, "loss": 0.7022420167922974, "step": 1201 }, { "epoch": 1.5593203155098647, "grad_norm": 0.7325419783592224, "learning_rate": 1.0193030799855534e-05, "loss": 0.6780503988265991, "step": 1202 }, { "epoch": 1.5606180424600038, "grad_norm": 0.7144452929496765, "learning_rate": 1.0178733805801626e-05, "loss": 0.7393384575843811, "step": 1203 }, { "epoch": 1.5619157694101427, "grad_norm": 0.7362129092216492, "learning_rate": 1.0164436446282324e-05, "loss": 0.7512763142585754, "step": 1204 }, { "epoch": 1.5632134963602815, "grad_norm": 0.6746947765350342, "learning_rate": 1.015013875053211e-05, "loss": 0.6646847128868103, "step": 1205 }, { "epoch": 1.5645112233104204, "grad_norm": 0.6834600567817688, "learning_rate": 1.013584074778615e-05, "loss": 0.6130549311637878, "step": 1206 }, { "epoch": 1.5658089502605592, "grad_norm": 0.7684876918792725, "learning_rate": 1.0121542467280245e-05, "loss": 0.7241174578666687, "step": 1207 }, { "epoch": 1.567106677210698, "grad_norm": 0.7327429056167603, "learning_rate": 1.0107243938250755e-05, "loss": 0.6390076875686646, "step": 1208 }, { "epoch": 1.568404404160837, "grad_norm": 0.6959134340286255, "learning_rate": 1.0092945189934558e-05, "loss": 0.7467840909957886, "step": 1209 }, { "epoch": 1.5697021311109758, "grad_norm": 0.7259625792503357, "learning_rate": 1.007864625156897e-05, "loss": 0.7787569165229797, "step": 1210 }, { "epoch": 1.5709998580611149, "grad_norm": 0.7313428521156311, "learning_rate": 1.0064347152391703e-05, "loss": 0.7091028690338135, "step": 1211 }, { "epoch": 1.5722975850112537, "grad_norm": 0.7232116460800171, "learning_rate": 1.0050047921640797e-05, "loss": 0.6815755367279053, "step": 1212 }, { "epoch": 1.5735953119613926, "grad_norm": 0.7286602854728699, "learning_rate": 1.003574858855456e-05, "loss": 0.72878098487854, "step": 1213 }, { "epoch": 1.5748930389115317, "grad_norm": 0.6926529407501221, "learning_rate": 1.0021449182371504e-05, "loss": 0.6855754852294922, "step": 1214 }, { "epoch": 1.5761907658616705, "grad_norm": 0.7037234306335449, "learning_rate": 1.0007149732330299e-05, "loss": 0.6827071309089661, "step": 1215 }, { "epoch": 1.5774884928118094, "grad_norm": 0.7107639908790588, "learning_rate": 9.992850267669703e-06, "loss": 0.7649542093276978, "step": 1216 }, { "epoch": 1.5787862197619482, "grad_norm": 0.737821102142334, "learning_rate": 9.978550817628501e-06, "loss": 0.6636335849761963, "step": 1217 }, { "epoch": 1.580083946712087, "grad_norm": 0.7441766858100891, "learning_rate": 9.964251411445444e-06, "loss": 0.7413192391395569, "step": 1218 }, { "epoch": 1.581381673662226, "grad_norm": 0.750579833984375, "learning_rate": 9.949952078359208e-06, "loss": 0.7131896018981934, "step": 1219 }, { "epoch": 1.5826794006123648, "grad_norm": 0.7051860690116882, "learning_rate": 9.935652847608302e-06, "loss": 0.7157960534095764, "step": 1220 }, { "epoch": 1.5839771275625036, "grad_norm": 0.6900631785392761, "learning_rate": 9.921353748431036e-06, "loss": 0.6898972392082214, "step": 1221 }, { "epoch": 1.5852748545126427, "grad_norm": 0.7243295907974243, "learning_rate": 9.907054810065446e-06, "loss": 0.6597715616226196, "step": 1222 }, { "epoch": 1.5865725814627816, "grad_norm": 0.6974424123764038, "learning_rate": 9.89275606174925e-06, "loss": 0.6871618032455444, "step": 1223 }, { "epoch": 1.5878703084129207, "grad_norm": 0.6947103142738342, "learning_rate": 9.878457532719757e-06, "loss": 0.680080235004425, "step": 1224 }, { "epoch": 1.5891680353630595, "grad_norm": 0.7873682975769043, "learning_rate": 9.864159252213852e-06, "loss": 0.7676745057106018, "step": 1225 }, { "epoch": 1.5904657623131984, "grad_norm": 0.7117084860801697, "learning_rate": 9.849861249467893e-06, "loss": 0.7582260370254517, "step": 1226 }, { "epoch": 1.5917634892633372, "grad_norm": 0.7120140194892883, "learning_rate": 9.83556355371768e-06, "loss": 0.7325617074966431, "step": 1227 }, { "epoch": 1.593061216213476, "grad_norm": 0.8112825155258179, "learning_rate": 9.821266194198375e-06, "loss": 0.704188883304596, "step": 1228 }, { "epoch": 1.594358943163615, "grad_norm": 0.6812202334403992, "learning_rate": 9.806969200144471e-06, "loss": 0.6495468616485596, "step": 1229 }, { "epoch": 1.5956566701137538, "grad_norm": 0.672173261642456, "learning_rate": 9.79267260078969e-06, "loss": 0.7104700207710266, "step": 1230 }, { "epoch": 1.5969543970638926, "grad_norm": 0.7402030229568481, "learning_rate": 9.778376425366967e-06, "loss": 0.7161640524864197, "step": 1231 }, { "epoch": 1.5982521240140317, "grad_norm": 0.7105618119239807, "learning_rate": 9.764080703108362e-06, "loss": 0.7429479956626892, "step": 1232 }, { "epoch": 1.5995498509641706, "grad_norm": 0.7068690657615662, "learning_rate": 9.749785463245006e-06, "loss": 0.7453438639640808, "step": 1233 }, { "epoch": 1.6008475779143094, "grad_norm": 0.7170218825340271, "learning_rate": 9.735490735007047e-06, "loss": 0.7229534387588501, "step": 1234 }, { "epoch": 1.6021453048644485, "grad_norm": 0.6783753633499146, "learning_rate": 9.721196547623585e-06, "loss": 0.7175101041793823, "step": 1235 }, { "epoch": 1.6034430318145874, "grad_norm": 0.7113945484161377, "learning_rate": 9.706902930322621e-06, "loss": 0.7054000496864319, "step": 1236 }, { "epoch": 1.6047407587647262, "grad_norm": 0.7143622636795044, "learning_rate": 9.692609912330975e-06, "loss": 0.7337828278541565, "step": 1237 }, { "epoch": 1.606038485714865, "grad_norm": 0.7191219329833984, "learning_rate": 9.67831752287426e-06, "loss": 0.7462741136550903, "step": 1238 }, { "epoch": 1.607336212665004, "grad_norm": 0.6787925362586975, "learning_rate": 9.66402579117679e-06, "loss": 0.6983505487442017, "step": 1239 }, { "epoch": 1.6086339396151428, "grad_norm": 0.7183864712715149, "learning_rate": 9.649734746461544e-06, "loss": 0.7454296350479126, "step": 1240 }, { "epoch": 1.6099316665652816, "grad_norm": 0.7119743227958679, "learning_rate": 9.635444417950083e-06, "loss": 0.6732832193374634, "step": 1241 }, { "epoch": 1.6112293935154205, "grad_norm": 0.7184067368507385, "learning_rate": 9.62115483486252e-06, "loss": 0.6472535729408264, "step": 1242 }, { "epoch": 1.6125271204655596, "grad_norm": 0.693452000617981, "learning_rate": 9.606866026417431e-06, "loss": 0.7115393877029419, "step": 1243 }, { "epoch": 1.6138248474156984, "grad_norm": 0.749234139919281, "learning_rate": 9.592578021831817e-06, "loss": 0.775533139705658, "step": 1244 }, { "epoch": 1.6151225743658375, "grad_norm": 0.7310823798179626, "learning_rate": 9.578290850321023e-06, "loss": 0.7301318645477295, "step": 1245 }, { "epoch": 1.6164203013159764, "grad_norm": 0.7240172028541565, "learning_rate": 9.564004541098709e-06, "loss": 0.6760499477386475, "step": 1246 }, { "epoch": 1.6177180282661152, "grad_norm": 0.7192076444625854, "learning_rate": 9.549719123376749e-06, "loss": 0.8106221556663513, "step": 1247 }, { "epoch": 1.619015755216254, "grad_norm": 0.763373851776123, "learning_rate": 9.535434626365221e-06, "loss": 0.7758911848068237, "step": 1248 }, { "epoch": 1.620313482166393, "grad_norm": 0.766298234462738, "learning_rate": 9.521151079272295e-06, "loss": 0.8113157749176025, "step": 1249 }, { "epoch": 1.6216112091165318, "grad_norm": 0.7206328511238098, "learning_rate": 9.506868511304216e-06, "loss": 0.7105867266654968, "step": 1250 }, { "epoch": 1.6229089360666706, "grad_norm": 0.7417821288108826, "learning_rate": 9.492586951665214e-06, "loss": 0.7875233888626099, "step": 1251 }, { "epoch": 1.6242066630168095, "grad_norm": 0.6913713812828064, "learning_rate": 9.47830642955747e-06, "loss": 0.6810760498046875, "step": 1252 }, { "epoch": 1.6255043899669486, "grad_norm": 0.7151052355766296, "learning_rate": 9.464026974181035e-06, "loss": 0.7549710869789124, "step": 1253 }, { "epoch": 1.6268021169170874, "grad_norm": 0.6772926449775696, "learning_rate": 9.44974861473378e-06, "loss": 0.6992902159690857, "step": 1254 }, { "epoch": 1.6280998438672263, "grad_norm": 0.734398365020752, "learning_rate": 9.435471380411335e-06, "loss": 0.7508738040924072, "step": 1255 }, { "epoch": 1.6293975708173654, "grad_norm": 0.6922202706336975, "learning_rate": 9.421195300407035e-06, "loss": 0.6657233834266663, "step": 1256 }, { "epoch": 1.6306952977675042, "grad_norm": 0.6931065917015076, "learning_rate": 9.406920403911848e-06, "loss": 0.7156346440315247, "step": 1257 }, { "epoch": 1.631993024717643, "grad_norm": 0.6905820369720459, "learning_rate": 9.392646720114325e-06, "loss": 0.7550724744796753, "step": 1258 }, { "epoch": 1.633290751667782, "grad_norm": 0.6891010403633118, "learning_rate": 9.37837427820053e-06, "loss": 0.7689525485038757, "step": 1259 }, { "epoch": 1.6345884786179208, "grad_norm": 0.6997367739677429, "learning_rate": 9.364103107354002e-06, "loss": 0.6940702795982361, "step": 1260 }, { "epoch": 1.6358862055680596, "grad_norm": 0.7232581973075867, "learning_rate": 9.349833236755675e-06, "loss": 0.708733856678009, "step": 1261 }, { "epoch": 1.6371839325181985, "grad_norm": 0.7156563997268677, "learning_rate": 9.335564695583816e-06, "loss": 0.7080838680267334, "step": 1262 }, { "epoch": 1.6384816594683373, "grad_norm": 0.7129452228546143, "learning_rate": 9.321297513013987e-06, "loss": 0.7160661816596985, "step": 1263 }, { "epoch": 1.6397793864184764, "grad_norm": 0.7260149717330933, "learning_rate": 9.307031718218956e-06, "loss": 0.7261675000190735, "step": 1264 }, { "epoch": 1.6410771133686153, "grad_norm": 0.7252016067504883, "learning_rate": 9.292767340368672e-06, "loss": 0.7626814842224121, "step": 1265 }, { "epoch": 1.6423748403187541, "grad_norm": 0.7192304134368896, "learning_rate": 9.278504408630171e-06, "loss": 0.7479438781738281, "step": 1266 }, { "epoch": 1.6436725672688932, "grad_norm": 0.7067307829856873, "learning_rate": 9.264242952167544e-06, "loss": 0.7229454517364502, "step": 1267 }, { "epoch": 1.644970294219032, "grad_norm": 0.69132000207901, "learning_rate": 9.24998300014185e-06, "loss": 0.7404082417488098, "step": 1268 }, { "epoch": 1.646268021169171, "grad_norm": 0.7199667096138, "learning_rate": 9.235724581711096e-06, "loss": 0.6846930384635925, "step": 1269 }, { "epoch": 1.6475657481193098, "grad_norm": 2.456246852874756, "learning_rate": 9.221467726030126e-06, "loss": 0.7993893623352051, "step": 1270 }, { "epoch": 1.6488634750694486, "grad_norm": 0.6726557016372681, "learning_rate": 9.207212462250611e-06, "loss": 0.6635693311691284, "step": 1271 }, { "epoch": 1.6501612020195875, "grad_norm": 0.6767668128013611, "learning_rate": 9.192958819520948e-06, "loss": 0.6265630722045898, "step": 1272 }, { "epoch": 1.6514589289697263, "grad_norm": 0.660176157951355, "learning_rate": 9.178706826986236e-06, "loss": 0.7039428353309631, "step": 1273 }, { "epoch": 1.6527566559198652, "grad_norm": 0.710209846496582, "learning_rate": 9.164456513788186e-06, "loss": 0.712166965007782, "step": 1274 }, { "epoch": 1.6540543828700043, "grad_norm": 0.7239776849746704, "learning_rate": 9.150207909065093e-06, "loss": 0.7487761378288269, "step": 1275 }, { "epoch": 1.6553521098201431, "grad_norm": 0.6918028593063354, "learning_rate": 9.135961041951735e-06, "loss": 0.6682979464530945, "step": 1276 }, { "epoch": 1.6566498367702822, "grad_norm": 0.7262064218521118, "learning_rate": 9.121715941579358e-06, "loss": 0.6650745868682861, "step": 1277 }, { "epoch": 1.657947563720421, "grad_norm": 0.6805858612060547, "learning_rate": 9.107472637075578e-06, "loss": 0.7332329750061035, "step": 1278 }, { "epoch": 1.65924529067056, "grad_norm": 0.7414560914039612, "learning_rate": 9.093231157564357e-06, "loss": 0.7112785577774048, "step": 1279 }, { "epoch": 1.6605430176206988, "grad_norm": 0.6898860335350037, "learning_rate": 9.078991532165911e-06, "loss": 0.6940746307373047, "step": 1280 }, { "epoch": 1.6618407445708376, "grad_norm": 0.734137773513794, "learning_rate": 9.06475378999667e-06, "loss": 0.7100757956504822, "step": 1281 }, { "epoch": 1.6631384715209765, "grad_norm": 0.7733497023582458, "learning_rate": 9.050517960169211e-06, "loss": 0.7518686056137085, "step": 1282 }, { "epoch": 1.6644361984711153, "grad_norm": 0.709705650806427, "learning_rate": 9.036284071792212e-06, "loss": 0.7964266538619995, "step": 1283 }, { "epoch": 1.6657339254212542, "grad_norm": 0.711685836315155, "learning_rate": 9.022052153970361e-06, "loss": 0.7170289158821106, "step": 1284 }, { "epoch": 1.6670316523713933, "grad_norm": 0.7108113169670105, "learning_rate": 9.007822235804334e-06, "loss": 0.7257951498031616, "step": 1285 }, { "epoch": 1.6683293793215321, "grad_norm": 0.727200984954834, "learning_rate": 8.993594346390709e-06, "loss": 0.7011697888374329, "step": 1286 }, { "epoch": 1.669627106271671, "grad_norm": 0.682969868183136, "learning_rate": 8.979368514821917e-06, "loss": 0.6846626996994019, "step": 1287 }, { "epoch": 1.67092483322181, "grad_norm": 0.7197726964950562, "learning_rate": 8.965144770186192e-06, "loss": 0.7460110783576965, "step": 1288 }, { "epoch": 1.672222560171949, "grad_norm": 0.7024762630462646, "learning_rate": 8.950923141567482e-06, "loss": 0.6903531551361084, "step": 1289 }, { "epoch": 1.6735202871220878, "grad_norm": 0.7416940927505493, "learning_rate": 8.936703658045426e-06, "loss": 0.8462705612182617, "step": 1290 }, { "epoch": 1.6748180140722266, "grad_norm": 0.749668538570404, "learning_rate": 8.92248634869526e-06, "loss": 0.7686569690704346, "step": 1291 }, { "epoch": 1.6761157410223655, "grad_norm": 0.6500091552734375, "learning_rate": 8.90827124258779e-06, "loss": 0.7148120403289795, "step": 1292 }, { "epoch": 1.6774134679725043, "grad_norm": 0.6878598928451538, "learning_rate": 8.894058368789308e-06, "loss": 0.5954074263572693, "step": 1293 }, { "epoch": 1.6787111949226432, "grad_norm": 0.687202513217926, "learning_rate": 8.879847756361544e-06, "loss": 0.6912335753440857, "step": 1294 }, { "epoch": 1.680008921872782, "grad_norm": 0.7027560472488403, "learning_rate": 8.8656394343616e-06, "loss": 0.6989542245864868, "step": 1295 }, { "epoch": 1.6813066488229211, "grad_norm": 0.6999865770339966, "learning_rate": 8.851433431841904e-06, "loss": 0.7319304347038269, "step": 1296 }, { "epoch": 1.68260437577306, "grad_norm": 0.7822436690330505, "learning_rate": 8.837229777850129e-06, "loss": 0.7571746110916138, "step": 1297 }, { "epoch": 1.6839021027231988, "grad_norm": 0.6928126215934753, "learning_rate": 8.823028501429161e-06, "loss": 0.7471798062324524, "step": 1298 }, { "epoch": 1.685199829673338, "grad_norm": 0.6795255541801453, "learning_rate": 8.808829631617009e-06, "loss": 0.6901456117630005, "step": 1299 }, { "epoch": 1.6864975566234768, "grad_norm": 0.7609167695045471, "learning_rate": 8.79463319744677e-06, "loss": 0.782101035118103, "step": 1300 }, { "epoch": 1.6877952835736156, "grad_norm": 0.7111324667930603, "learning_rate": 8.78043922794656e-06, "loss": 0.7500295042991638, "step": 1301 }, { "epoch": 1.6890930105237545, "grad_norm": 0.7332251667976379, "learning_rate": 8.766247752139453e-06, "loss": 0.7808182835578918, "step": 1302 }, { "epoch": 1.6903907374738933, "grad_norm": 0.7156122922897339, "learning_rate": 8.752058799043422e-06, "loss": 0.748470664024353, "step": 1303 }, { "epoch": 1.6916884644240322, "grad_norm": 0.7189647555351257, "learning_rate": 8.737872397671293e-06, "loss": 0.7072033882141113, "step": 1304 }, { "epoch": 1.692986191374171, "grad_norm": 0.719592809677124, "learning_rate": 8.723688577030655e-06, "loss": 0.7256566286087036, "step": 1305 }, { "epoch": 1.69428391832431, "grad_norm": 0.7151191234588623, "learning_rate": 8.709507366123841e-06, "loss": 0.7216327786445618, "step": 1306 }, { "epoch": 1.695581645274449, "grad_norm": 0.6929178833961487, "learning_rate": 8.695328793947833e-06, "loss": 0.6505569815635681, "step": 1307 }, { "epoch": 1.6968793722245878, "grad_norm": 0.7117684483528137, "learning_rate": 8.681152889494227e-06, "loss": 0.750861644744873, "step": 1308 }, { "epoch": 1.698177099174727, "grad_norm": 0.8048399090766907, "learning_rate": 8.66697968174915e-06, "loss": 0.7125011682510376, "step": 1309 }, { "epoch": 1.6994748261248658, "grad_norm": 0.7594026923179626, "learning_rate": 8.652809199693236e-06, "loss": 0.6821706295013428, "step": 1310 }, { "epoch": 1.7007725530750046, "grad_norm": 0.696814775466919, "learning_rate": 8.638641472301524e-06, "loss": 0.7341318726539612, "step": 1311 }, { "epoch": 1.7020702800251435, "grad_norm": 0.6953744292259216, "learning_rate": 8.624476528543439e-06, "loss": 0.7471984028816223, "step": 1312 }, { "epoch": 1.7033680069752823, "grad_norm": 0.7624510526657104, "learning_rate": 8.610314397382701e-06, "loss": 0.7660402655601501, "step": 1313 }, { "epoch": 1.7046657339254212, "grad_norm": 0.7193018198013306, "learning_rate": 8.596155107777288e-06, "loss": 0.7213659882545471, "step": 1314 }, { "epoch": 1.70596346087556, "grad_norm": 0.703834593296051, "learning_rate": 8.581998688679356e-06, "loss": 0.7187014818191528, "step": 1315 }, { "epoch": 1.707261187825699, "grad_norm": 0.7352998852729797, "learning_rate": 8.567845169035205e-06, "loss": 0.7381072044372559, "step": 1316 }, { "epoch": 1.708558914775838, "grad_norm": 0.7008899450302124, "learning_rate": 8.553694577785201e-06, "loss": 0.6953420639038086, "step": 1317 }, { "epoch": 1.7098566417259768, "grad_norm": 0.6997075080871582, "learning_rate": 8.539546943863717e-06, "loss": 0.721794605255127, "step": 1318 }, { "epoch": 1.7111543686761157, "grad_norm": 0.7531685829162598, "learning_rate": 8.525402296199089e-06, "loss": 0.763767421245575, "step": 1319 }, { "epoch": 1.7124520956262548, "grad_norm": 0.686306357383728, "learning_rate": 8.511260663713537e-06, "loss": 0.6505174040794373, "step": 1320 }, { "epoch": 1.7137498225763936, "grad_norm": 0.6891371607780457, "learning_rate": 8.497122075323122e-06, "loss": 0.6535521745681763, "step": 1321 }, { "epoch": 1.7150475495265325, "grad_norm": 0.6797356009483337, "learning_rate": 8.482986559937676e-06, "loss": 0.711966872215271, "step": 1322 }, { "epoch": 1.7163452764766713, "grad_norm": 0.6834943890571594, "learning_rate": 8.468854146460754e-06, "loss": 0.6898146271705627, "step": 1323 }, { "epoch": 1.7176430034268102, "grad_norm": 0.6787711381912231, "learning_rate": 8.45472486378956e-06, "loss": 0.7132437825202942, "step": 1324 }, { "epoch": 1.718940730376949, "grad_norm": 0.731886088848114, "learning_rate": 8.440598740814909e-06, "loss": 0.767355740070343, "step": 1325 }, { "epoch": 1.720238457327088, "grad_norm": 0.6801634430885315, "learning_rate": 8.426475806421139e-06, "loss": 0.728312611579895, "step": 1326 }, { "epoch": 1.7215361842772268, "grad_norm": 0.6922846436500549, "learning_rate": 8.412356089486082e-06, "loss": 0.6810072064399719, "step": 1327 }, { "epoch": 1.7228339112273658, "grad_norm": 0.7422820329666138, "learning_rate": 8.39823961888098e-06, "loss": 0.7293540835380554, "step": 1328 }, { "epoch": 1.7241316381775047, "grad_norm": 0.9656670689582825, "learning_rate": 8.384126423470447e-06, "loss": 0.7158606648445129, "step": 1329 }, { "epoch": 1.7254293651276438, "grad_norm": 0.704413652420044, "learning_rate": 8.37001653211239e-06, "loss": 0.6522120833396912, "step": 1330 }, { "epoch": 1.7267270920777826, "grad_norm": 0.7198591828346252, "learning_rate": 8.355909973657975e-06, "loss": 0.7289344072341919, "step": 1331 }, { "epoch": 1.7280248190279215, "grad_norm": 0.7069032192230225, "learning_rate": 8.341806776951532e-06, "loss": 0.7365983724594116, "step": 1332 }, { "epoch": 1.7293225459780603, "grad_norm": 0.7014702558517456, "learning_rate": 8.327706970830537e-06, "loss": 0.7173565030097961, "step": 1333 }, { "epoch": 1.7306202729281992, "grad_norm": 0.7151576280593872, "learning_rate": 8.313610584125523e-06, "loss": 0.7827293872833252, "step": 1334 }, { "epoch": 1.731917999878338, "grad_norm": 0.7050095796585083, "learning_rate": 8.299517645660033e-06, "loss": 0.681469202041626, "step": 1335 }, { "epoch": 1.733215726828477, "grad_norm": 0.6885892152786255, "learning_rate": 8.285428184250554e-06, "loss": 0.6469728946685791, "step": 1336 }, { "epoch": 1.7345134537786158, "grad_norm": 0.7026622891426086, "learning_rate": 8.271342228706478e-06, "loss": 0.76534104347229, "step": 1337 }, { "epoch": 1.7358111807287548, "grad_norm": 0.6556008458137512, "learning_rate": 8.257259807830009e-06, "loss": 0.6358019113540649, "step": 1338 }, { "epoch": 1.7371089076788937, "grad_norm": 0.6949118971824646, "learning_rate": 8.243180950416142e-06, "loss": 0.7216454148292542, "step": 1339 }, { "epoch": 1.7384066346290326, "grad_norm": 0.6842135190963745, "learning_rate": 8.22910568525257e-06, "loss": 0.7009142637252808, "step": 1340 }, { "epoch": 1.7397043615791716, "grad_norm": 0.7473326921463013, "learning_rate": 8.215034041119655e-06, "loss": 0.7074841856956482, "step": 1341 }, { "epoch": 1.7410020885293105, "grad_norm": 0.6532716751098633, "learning_rate": 8.200966046790339e-06, "loss": 0.7174238562583923, "step": 1342 }, { "epoch": 1.7422998154794493, "grad_norm": 0.672916829586029, "learning_rate": 8.186901731030117e-06, "loss": 0.71747887134552, "step": 1343 }, { "epoch": 1.7435975424295882, "grad_norm": 0.7592087388038635, "learning_rate": 8.172841122596951e-06, "loss": 0.8052394390106201, "step": 1344 }, { "epoch": 1.744895269379727, "grad_norm": 0.6938197016716003, "learning_rate": 8.158784250241226e-06, "loss": 0.7313718795776367, "step": 1345 }, { "epoch": 1.746192996329866, "grad_norm": 0.6459118723869324, "learning_rate": 8.144731142705693e-06, "loss": 0.632814884185791, "step": 1346 }, { "epoch": 1.7474907232800048, "grad_norm": 0.6704484820365906, "learning_rate": 8.130681828725394e-06, "loss": 0.6906111836433411, "step": 1347 }, { "epoch": 1.7487884502301436, "grad_norm": 0.6933112144470215, "learning_rate": 8.116636337027626e-06, "loss": 0.6973313093185425, "step": 1348 }, { "epoch": 1.7500861771802827, "grad_norm": 0.6778403520584106, "learning_rate": 8.10259469633186e-06, "loss": 0.7237393260002136, "step": 1349 }, { "epoch": 1.7513839041304216, "grad_norm": 0.7188864946365356, "learning_rate": 8.0885569353497e-06, "loss": 0.694682776927948, "step": 1350 }, { "epoch": 1.7526816310805604, "grad_norm": 0.7305310368537903, "learning_rate": 8.07452308278481e-06, "loss": 0.7369967103004456, "step": 1351 }, { "epoch": 1.7539793580306995, "grad_norm": 0.6883519291877747, "learning_rate": 8.060493167332874e-06, "loss": 0.6693746447563171, "step": 1352 }, { "epoch": 1.7552770849808383, "grad_norm": 0.6861468553543091, "learning_rate": 8.04646721768151e-06, "loss": 0.7269149422645569, "step": 1353 }, { "epoch": 1.7565748119309772, "grad_norm": 0.6963792443275452, "learning_rate": 8.032445262510241e-06, "loss": 0.7375723123550415, "step": 1354 }, { "epoch": 1.757872538881116, "grad_norm": 0.70611572265625, "learning_rate": 8.018427330490411e-06, "loss": 0.6536609530448914, "step": 1355 }, { "epoch": 1.759170265831255, "grad_norm": 0.6833199262619019, "learning_rate": 8.004413450285147e-06, "loss": 0.7803836464881897, "step": 1356 }, { "epoch": 1.7604679927813938, "grad_norm": 0.7367565631866455, "learning_rate": 7.990403650549285e-06, "loss": 0.7431750893592834, "step": 1357 }, { "epoch": 1.7617657197315326, "grad_norm": 0.7168142795562744, "learning_rate": 7.976397959929324e-06, "loss": 0.708920955657959, "step": 1358 }, { "epoch": 1.7630634466816715, "grad_norm": 0.7081824541091919, "learning_rate": 7.962396407063346e-06, "loss": 0.7360220551490784, "step": 1359 }, { "epoch": 1.7643611736318106, "grad_norm": 0.7008010149002075, "learning_rate": 7.948399020580995e-06, "loss": 0.6721465587615967, "step": 1360 }, { "epoch": 1.7656589005819494, "grad_norm": 0.7550066709518433, "learning_rate": 7.934405829103376e-06, "loss": 0.7266613245010376, "step": 1361 }, { "epoch": 1.7669566275320885, "grad_norm": 0.713932454586029, "learning_rate": 7.920416861243028e-06, "loss": 0.7003293037414551, "step": 1362 }, { "epoch": 1.7682543544822273, "grad_norm": 0.6848137378692627, "learning_rate": 7.906432145603844e-06, "loss": 0.7255281805992126, "step": 1363 }, { "epoch": 1.7695520814323662, "grad_norm": 0.7302910685539246, "learning_rate": 7.892451710781035e-06, "loss": 0.7285719513893127, "step": 1364 }, { "epoch": 1.770849808382505, "grad_norm": 0.7387238144874573, "learning_rate": 7.878475585361045e-06, "loss": 0.7333699464797974, "step": 1365 }, { "epoch": 1.772147535332644, "grad_norm": 0.7755225300788879, "learning_rate": 7.864503797921518e-06, "loss": 0.7592843770980835, "step": 1366 }, { "epoch": 1.7734452622827828, "grad_norm": 0.6892391443252563, "learning_rate": 7.850536377031221e-06, "loss": 0.7412334084510803, "step": 1367 }, { "epoch": 1.7747429892329216, "grad_norm": 0.7299293279647827, "learning_rate": 7.836573351249996e-06, "loss": 0.7442951798439026, "step": 1368 }, { "epoch": 1.7760407161830605, "grad_norm": 0.6848152875900269, "learning_rate": 7.822614749128692e-06, "loss": 0.6193121671676636, "step": 1369 }, { "epoch": 1.7773384431331996, "grad_norm": 0.6931573748588562, "learning_rate": 7.808660599209124e-06, "loss": 0.7440711259841919, "step": 1370 }, { "epoch": 1.7786361700833384, "grad_norm": 0.7260693907737732, "learning_rate": 7.794710930023993e-06, "loss": 0.7359597682952881, "step": 1371 }, { "epoch": 1.7799338970334773, "grad_norm": 0.705436646938324, "learning_rate": 7.78076577009684e-06, "loss": 0.6207844614982605, "step": 1372 }, { "epoch": 1.7812316239836163, "grad_norm": 0.6740301847457886, "learning_rate": 7.76682514794199e-06, "loss": 0.6975910663604736, "step": 1373 }, { "epoch": 1.7825293509337552, "grad_norm": 0.6805901527404785, "learning_rate": 7.752889092064484e-06, "loss": 0.671751081943512, "step": 1374 }, { "epoch": 1.783827077883894, "grad_norm": 0.7223953604698181, "learning_rate": 7.738957630960037e-06, "loss": 0.6885688900947571, "step": 1375 }, { "epoch": 1.785124804834033, "grad_norm": 0.6852001547813416, "learning_rate": 7.725030793114952e-06, "loss": 0.7190781831741333, "step": 1376 }, { "epoch": 1.7864225317841718, "grad_norm": 0.7344854474067688, "learning_rate": 7.711108607006094e-06, "loss": 0.7325436472892761, "step": 1377 }, { "epoch": 1.7877202587343106, "grad_norm": 0.7047913670539856, "learning_rate": 7.697191101100802e-06, "loss": 0.7324240803718567, "step": 1378 }, { "epoch": 1.7890179856844495, "grad_norm": 0.7197734713554382, "learning_rate": 7.683278303856862e-06, "loss": 0.7601778507232666, "step": 1379 }, { "epoch": 1.7903157126345883, "grad_norm": 0.6842553615570068, "learning_rate": 7.669370243722415e-06, "loss": 0.7301578521728516, "step": 1380 }, { "epoch": 1.7903157126345883, "eval_loss": 0.7464115023612976, "eval_runtime": 143.4981, "eval_samples_per_second": 36.182, "eval_steps_per_second": 9.045, "step": 1380 }, { "epoch": 1.7916134395847274, "grad_norm": 0.6962341666221619, "learning_rate": 7.655466949135932e-06, "loss": 0.7249746918678284, "step": 1381 }, { "epoch": 1.7929111665348663, "grad_norm": 0.6840744018554688, "learning_rate": 7.641568448526122e-06, "loss": 0.6648120880126953, "step": 1382 }, { "epoch": 1.7942088934850051, "grad_norm": 0.7047871947288513, "learning_rate": 7.627674770311909e-06, "loss": 0.6969434022903442, "step": 1383 }, { "epoch": 1.7955066204351442, "grad_norm": 0.716124951839447, "learning_rate": 7.613785942902343e-06, "loss": 0.7197269201278687, "step": 1384 }, { "epoch": 1.796804347385283, "grad_norm": 0.6727207899093628, "learning_rate": 7.599901994696566e-06, "loss": 0.6794359683990479, "step": 1385 }, { "epoch": 1.798102074335422, "grad_norm": 0.6976568698883057, "learning_rate": 7.586022954083731e-06, "loss": 0.6372778415679932, "step": 1386 }, { "epoch": 1.7993998012855608, "grad_norm": 0.683164656162262, "learning_rate": 7.572148849442971e-06, "loss": 0.6731259226799011, "step": 1387 }, { "epoch": 1.8006975282356996, "grad_norm": 0.6801917552947998, "learning_rate": 7.5582797091433105e-06, "loss": 0.6921297907829285, "step": 1388 }, { "epoch": 1.8019952551858385, "grad_norm": 0.7587413191795349, "learning_rate": 7.544415561543639e-06, "loss": 0.7684265971183777, "step": 1389 }, { "epoch": 1.8032929821359773, "grad_norm": 0.7493230700492859, "learning_rate": 7.5305564349926215e-06, "loss": 0.6984431147575378, "step": 1390 }, { "epoch": 1.8045907090861162, "grad_norm": 0.6897554993629456, "learning_rate": 7.516702357828672e-06, "loss": 0.739819347858429, "step": 1391 }, { "epoch": 1.8058884360362553, "grad_norm": 0.6832559704780579, "learning_rate": 7.502853358379865e-06, "loss": 0.6518275141716003, "step": 1392 }, { "epoch": 1.8071861629863941, "grad_norm": 0.7185218334197998, "learning_rate": 7.489009464963903e-06, "loss": 0.7867194414138794, "step": 1393 }, { "epoch": 1.8084838899365332, "grad_norm": 0.6737310886383057, "learning_rate": 7.475170705888042e-06, "loss": 0.6979063749313354, "step": 1394 }, { "epoch": 1.809781616886672, "grad_norm": 0.713076651096344, "learning_rate": 7.461337109449045e-06, "loss": 0.7293301224708557, "step": 1395 }, { "epoch": 1.811079343836811, "grad_norm": 0.700568675994873, "learning_rate": 7.447508703933109e-06, "loss": 0.6935805678367615, "step": 1396 }, { "epoch": 1.8123770707869498, "grad_norm": 0.7034053802490234, "learning_rate": 7.433685517615831e-06, "loss": 0.7284054160118103, "step": 1397 }, { "epoch": 1.8136747977370886, "grad_norm": 0.6562127470970154, "learning_rate": 7.4198675787621185e-06, "loss": 0.721833348274231, "step": 1398 }, { "epoch": 1.8149725246872275, "grad_norm": 0.6957826614379883, "learning_rate": 7.406054915626172e-06, "loss": 0.6763690114021301, "step": 1399 }, { "epoch": 1.8162702516373663, "grad_norm": 0.758056104183197, "learning_rate": 7.392247556451382e-06, "loss": 0.7644186615943909, "step": 1400 }, { "epoch": 1.8175679785875052, "grad_norm": 0.6855806708335876, "learning_rate": 7.378445529470303e-06, "loss": 0.7499503493309021, "step": 1401 }, { "epoch": 1.8188657055376443, "grad_norm": 0.7280805706977844, "learning_rate": 7.364648862904593e-06, "loss": 0.7766327261924744, "step": 1402 }, { "epoch": 1.8201634324877831, "grad_norm": 0.7023898959159851, "learning_rate": 7.35085758496494e-06, "loss": 0.6799028515815735, "step": 1403 }, { "epoch": 1.821461159437922, "grad_norm": 0.696554958820343, "learning_rate": 7.337071723851018e-06, "loss": 0.6930332183837891, "step": 1404 }, { "epoch": 1.822758886388061, "grad_norm": 0.7462826371192932, "learning_rate": 7.323291307751418e-06, "loss": 0.7603926658630371, "step": 1405 }, { "epoch": 1.8240566133382, "grad_norm": 0.6899564266204834, "learning_rate": 7.3095163648436115e-06, "loss": 0.6602949500083923, "step": 1406 }, { "epoch": 1.8253543402883388, "grad_norm": 0.7230206727981567, "learning_rate": 7.295746923293865e-06, "loss": 0.7429470419883728, "step": 1407 }, { "epoch": 1.8266520672384776, "grad_norm": 0.6691879034042358, "learning_rate": 7.2819830112572035e-06, "loss": 0.7018039226531982, "step": 1408 }, { "epoch": 1.8279497941886165, "grad_norm": 0.7611459493637085, "learning_rate": 7.268224656877339e-06, "loss": 0.7324895262718201, "step": 1409 }, { "epoch": 1.8292475211387553, "grad_norm": 0.7313300967216492, "learning_rate": 7.25447188828663e-06, "loss": 0.7643807530403137, "step": 1410 }, { "epoch": 1.8305452480888942, "grad_norm": 0.7345109581947327, "learning_rate": 7.240724733606002e-06, "loss": 0.7648757696151733, "step": 1411 }, { "epoch": 1.831842975039033, "grad_norm": 0.6995144486427307, "learning_rate": 7.2269832209449145e-06, "loss": 0.6826534271240234, "step": 1412 }, { "epoch": 1.8331407019891721, "grad_norm": 0.6842563152313232, "learning_rate": 7.213247378401274e-06, "loss": 0.7718407511711121, "step": 1413 }, { "epoch": 1.834438428939311, "grad_norm": 0.6925626397132874, "learning_rate": 7.199517234061408e-06, "loss": 0.7063374519348145, "step": 1414 }, { "epoch": 1.83573615588945, "grad_norm": 0.7153764963150024, "learning_rate": 7.1857928159999814e-06, "loss": 0.7116506695747375, "step": 1415 }, { "epoch": 1.837033882839589, "grad_norm": 0.7008180022239685, "learning_rate": 7.172074152279963e-06, "loss": 0.6926634311676025, "step": 1416 }, { "epoch": 1.8383316097897278, "grad_norm": 0.695785641670227, "learning_rate": 7.1583612709525405e-06, "loss": 0.7824428081512451, "step": 1417 }, { "epoch": 1.8396293367398666, "grad_norm": 0.7137957215309143, "learning_rate": 7.14465420005709e-06, "loss": 0.7480607032775879, "step": 1418 }, { "epoch": 1.8409270636900055, "grad_norm": 0.6970608234405518, "learning_rate": 7.130952967621096e-06, "loss": 0.6973427534103394, "step": 1419 }, { "epoch": 1.8422247906401443, "grad_norm": 0.7116836309432983, "learning_rate": 7.11725760166012e-06, "loss": 0.7084696292877197, "step": 1420 }, { "epoch": 1.8435225175902832, "grad_norm": 0.7125561833381653, "learning_rate": 7.103568130177713e-06, "loss": 0.6803657412528992, "step": 1421 }, { "epoch": 1.844820244540422, "grad_norm": 0.66914963722229, "learning_rate": 7.089884581165382e-06, "loss": 0.6364957690238953, "step": 1422 }, { "epoch": 1.8461179714905611, "grad_norm": 0.7396631240844727, "learning_rate": 7.076206982602516e-06, "loss": 0.7236162424087524, "step": 1423 }, { "epoch": 1.8474156984407, "grad_norm": 0.7191373109817505, "learning_rate": 7.06253536245635e-06, "loss": 0.7462475895881653, "step": 1424 }, { "epoch": 1.8487134253908388, "grad_norm": 0.7262799143791199, "learning_rate": 7.048869748681879e-06, "loss": 0.7678788900375366, "step": 1425 }, { "epoch": 1.850011152340978, "grad_norm": 0.7085245847702026, "learning_rate": 7.035210169221834e-06, "loss": 0.7576820850372314, "step": 1426 }, { "epoch": 1.8513088792911168, "grad_norm": 0.7027114629745483, "learning_rate": 7.021556652006588e-06, "loss": 0.755644679069519, "step": 1427 }, { "epoch": 1.8526066062412556, "grad_norm": 0.6858870387077332, "learning_rate": 7.007909224954135e-06, "loss": 0.7338079810142517, "step": 1428 }, { "epoch": 1.8539043331913945, "grad_norm": 0.7013359069824219, "learning_rate": 6.994267915970003e-06, "loss": 0.7038964033126831, "step": 1429 }, { "epoch": 1.8552020601415333, "grad_norm": 0.7172896265983582, "learning_rate": 6.980632752947221e-06, "loss": 0.7479324340820312, "step": 1430 }, { "epoch": 1.8564997870916722, "grad_norm": 0.7214548587799072, "learning_rate": 6.967003763766247e-06, "loss": 0.7139613032341003, "step": 1431 }, { "epoch": 1.857797514041811, "grad_norm": 0.730970025062561, "learning_rate": 6.953380976294907e-06, "loss": 0.765926718711853, "step": 1432 }, { "epoch": 1.85909524099195, "grad_norm": 0.6703609824180603, "learning_rate": 6.9397644183883616e-06, "loss": 0.7193933129310608, "step": 1433 }, { "epoch": 1.860392967942089, "grad_norm": 0.6499923467636108, "learning_rate": 6.926154117889022e-06, "loss": 0.6723966002464294, "step": 1434 }, { "epoch": 1.8616906948922278, "grad_norm": 0.7143534421920776, "learning_rate": 6.91255010262651e-06, "loss": 0.7171000838279724, "step": 1435 }, { "epoch": 1.8629884218423667, "grad_norm": 0.6932517290115356, "learning_rate": 6.898952400417587e-06, "loss": 0.6997263431549072, "step": 1436 }, { "epoch": 1.8642861487925058, "grad_norm": 0.7429547905921936, "learning_rate": 6.885361039066121e-06, "loss": 0.780619204044342, "step": 1437 }, { "epoch": 1.8655838757426446, "grad_norm": 0.7190982699394226, "learning_rate": 6.8717760463629965e-06, "loss": 0.7348355054855347, "step": 1438 }, { "epoch": 1.8668816026927835, "grad_norm": 0.7007834315299988, "learning_rate": 6.858197450086097e-06, "loss": 0.7280945181846619, "step": 1439 }, { "epoch": 1.8681793296429223, "grad_norm": 0.7208773493766785, "learning_rate": 6.844625278000205e-06, "loss": 0.775151252746582, "step": 1440 }, { "epoch": 1.8694770565930612, "grad_norm": 0.6837726831436157, "learning_rate": 6.831059557856984e-06, "loss": 0.7308005094528198, "step": 1441 }, { "epoch": 1.8707747835432, "grad_norm": 0.6819126009941101, "learning_rate": 6.81750031739489e-06, "loss": 0.6529159545898438, "step": 1442 }, { "epoch": 1.872072510493339, "grad_norm": 0.6784840226173401, "learning_rate": 6.803947584339148e-06, "loss": 0.6919572949409485, "step": 1443 }, { "epoch": 1.8733702374434777, "grad_norm": 0.6869913935661316, "learning_rate": 6.79040138640166e-06, "loss": 0.6871669888496399, "step": 1444 }, { "epoch": 1.8746679643936168, "grad_norm": 0.7124300599098206, "learning_rate": 6.7768617512809745e-06, "loss": 0.7206623554229736, "step": 1445 }, { "epoch": 1.8759656913437557, "grad_norm": 0.71539306640625, "learning_rate": 6.763328706662214e-06, "loss": 0.7108519673347473, "step": 1446 }, { "epoch": 1.8772634182938948, "grad_norm": 0.7159188985824585, "learning_rate": 6.749802280217037e-06, "loss": 0.7131993770599365, "step": 1447 }, { "epoch": 1.8785611452440336, "grad_norm": 0.722147524356842, "learning_rate": 6.7362824996035545e-06, "loss": 0.6998387575149536, "step": 1448 }, { "epoch": 1.8798588721941725, "grad_norm": 0.7286826968193054, "learning_rate": 6.722769392466304e-06, "loss": 0.7367603778839111, "step": 1449 }, { "epoch": 1.8811565991443113, "grad_norm": 0.7212167382240295, "learning_rate": 6.709262986436162e-06, "loss": 0.7357022762298584, "step": 1450 }, { "epoch": 1.8824543260944502, "grad_norm": 0.7026610374450684, "learning_rate": 6.695763309130318e-06, "loss": 0.7126086354255676, "step": 1451 }, { "epoch": 1.883752053044589, "grad_norm": 0.7145894169807434, "learning_rate": 6.682270388152185e-06, "loss": 0.6773615479469299, "step": 1452 }, { "epoch": 1.885049779994728, "grad_norm": 0.7049593925476074, "learning_rate": 6.668784251091381e-06, "loss": 0.6776928305625916, "step": 1453 }, { "epoch": 1.8863475069448667, "grad_norm": 0.699505627155304, "learning_rate": 6.655304925523635e-06, "loss": 0.6610416173934937, "step": 1454 }, { "epoch": 1.8876452338950058, "grad_norm": 0.7056293487548828, "learning_rate": 6.641832439010765e-06, "loss": 0.6919702291488647, "step": 1455 }, { "epoch": 1.8889429608451447, "grad_norm": 0.702669084072113, "learning_rate": 6.628366819100586e-06, "loss": 0.682940661907196, "step": 1456 }, { "epoch": 1.8902406877952835, "grad_norm": 0.6931704878807068, "learning_rate": 6.614908093326891e-06, "loss": 0.7477650046348572, "step": 1457 }, { "epoch": 1.8915384147454226, "grad_norm": 0.7257412075996399, "learning_rate": 6.601456289209362e-06, "loss": 0.774404764175415, "step": 1458 }, { "epoch": 1.8928361416955615, "grad_norm": 0.6645631194114685, "learning_rate": 6.588011434253534e-06, "loss": 0.647753119468689, "step": 1459 }, { "epoch": 1.8941338686457003, "grad_norm": 0.70735102891922, "learning_rate": 6.574573555950738e-06, "loss": 0.6710544228553772, "step": 1460 }, { "epoch": 1.8954315955958392, "grad_norm": 0.7064939141273499, "learning_rate": 6.561142681778027e-06, "loss": 0.6929414868354797, "step": 1461 }, { "epoch": 1.896729322545978, "grad_norm": 0.6896395683288574, "learning_rate": 6.547718839198145e-06, "loss": 0.6804373264312744, "step": 1462 }, { "epoch": 1.898027049496117, "grad_norm": 0.726024329662323, "learning_rate": 6.53430205565945e-06, "loss": 0.7252693772315979, "step": 1463 }, { "epoch": 1.8993247764462557, "grad_norm": 0.7104306817054749, "learning_rate": 6.520892358595869e-06, "loss": 0.7321268916130066, "step": 1464 }, { "epoch": 1.9006225033963946, "grad_norm": 0.680915892124176, "learning_rate": 6.507489775426834e-06, "loss": 0.7166538238525391, "step": 1465 }, { "epoch": 1.9019202303465337, "grad_norm": 0.7132366895675659, "learning_rate": 6.494094333557243e-06, "loss": 0.708162784576416, "step": 1466 }, { "epoch": 1.9032179572966725, "grad_norm": 0.6515333652496338, "learning_rate": 6.4807060603773795e-06, "loss": 0.7163029313087463, "step": 1467 }, { "epoch": 1.9045156842468114, "grad_norm": 0.7042413353919983, "learning_rate": 6.467324983262877e-06, "loss": 0.6881014704704285, "step": 1468 }, { "epoch": 1.9058134111969505, "grad_norm": 0.6660881042480469, "learning_rate": 6.453951129574644e-06, "loss": 0.678939938545227, "step": 1469 }, { "epoch": 1.9071111381470893, "grad_norm": 0.7373862266540527, "learning_rate": 6.4405845266588356e-06, "loss": 0.7181136608123779, "step": 1470 }, { "epoch": 1.9084088650972282, "grad_norm": 0.7122411727905273, "learning_rate": 6.427225201846763e-06, "loss": 0.6904677748680115, "step": 1471 }, { "epoch": 1.909706592047367, "grad_norm": 0.7414330244064331, "learning_rate": 6.413873182454873e-06, "loss": 0.7363246083259583, "step": 1472 }, { "epoch": 1.911004318997506, "grad_norm": 0.6871086359024048, "learning_rate": 6.4005284957846546e-06, "loss": 0.6799793243408203, "step": 1473 }, { "epoch": 1.9123020459476447, "grad_norm": 0.7056854963302612, "learning_rate": 6.3871911691226276e-06, "loss": 0.7036612033843994, "step": 1474 }, { "epoch": 1.9135997728977836, "grad_norm": 0.7454568147659302, "learning_rate": 6.373861229740237e-06, "loss": 0.7416712045669556, "step": 1475 }, { "epoch": 1.9148974998479225, "grad_norm": 0.6941256523132324, "learning_rate": 6.360538704893845e-06, "loss": 0.6659767031669617, "step": 1476 }, { "epoch": 1.9161952267980615, "grad_norm": 0.7420505881309509, "learning_rate": 6.3472236218246366e-06, "loss": 0.7747020721435547, "step": 1477 }, { "epoch": 1.9174929537482004, "grad_norm": 0.7113460302352905, "learning_rate": 6.333916007758591e-06, "loss": 0.7053021788597107, "step": 1478 }, { "epoch": 1.9187906806983395, "grad_norm": 0.7145473957061768, "learning_rate": 6.320615889906403e-06, "loss": 0.7014235258102417, "step": 1479 }, { "epoch": 1.9200884076484783, "grad_norm": 0.7099266052246094, "learning_rate": 6.307323295463457e-06, "loss": 0.7599897980690002, "step": 1480 }, { "epoch": 1.9213861345986172, "grad_norm": 0.7005822062492371, "learning_rate": 6.294038251609738e-06, "loss": 0.6990090608596802, "step": 1481 }, { "epoch": 1.922683861548756, "grad_norm": 0.6796419620513916, "learning_rate": 6.280760785509802e-06, "loss": 0.6529797911643982, "step": 1482 }, { "epoch": 1.923981588498895, "grad_norm": 0.7199534773826599, "learning_rate": 6.2674909243127e-06, "loss": 0.714480459690094, "step": 1483 }, { "epoch": 1.9252793154490337, "grad_norm": 0.7127954959869385, "learning_rate": 6.254228695151949e-06, "loss": 0.7583557367324829, "step": 1484 }, { "epoch": 1.9265770423991726, "grad_norm": 0.7017828226089478, "learning_rate": 6.240974125145443e-06, "loss": 0.6976377367973328, "step": 1485 }, { "epoch": 1.9278747693493115, "grad_norm": 0.6956459283828735, "learning_rate": 6.227727241395429e-06, "loss": 0.7237988114356995, "step": 1486 }, { "epoch": 1.9291724962994505, "grad_norm": 0.7250760197639465, "learning_rate": 6.214488070988424e-06, "loss": 0.705412745475769, "step": 1487 }, { "epoch": 1.9304702232495894, "grad_norm": 0.72161465883255, "learning_rate": 6.201256640995184e-06, "loss": 0.6755847930908203, "step": 1488 }, { "epoch": 1.9317679501997282, "grad_norm": 0.6741456389427185, "learning_rate": 6.188032978470639e-06, "loss": 0.7194631099700928, "step": 1489 }, { "epoch": 1.9330656771498673, "grad_norm": 0.6884588003158569, "learning_rate": 6.174817110453828e-06, "loss": 0.6863330006599426, "step": 1490 }, { "epoch": 1.9343634041000062, "grad_norm": 0.7027184963226318, "learning_rate": 6.161609063967857e-06, "loss": 0.7379326224327087, "step": 1491 }, { "epoch": 1.935661131050145, "grad_norm": 0.7299201488494873, "learning_rate": 6.1484088660198325e-06, "loss": 0.7956094145774841, "step": 1492 }, { "epoch": 1.936958858000284, "grad_norm": 0.7009000182151794, "learning_rate": 6.135216543600828e-06, "loss": 0.7050310373306274, "step": 1493 }, { "epoch": 1.9382565849504227, "grad_norm": 0.7212353944778442, "learning_rate": 6.1220321236857974e-06, "loss": 0.7898357510566711, "step": 1494 }, { "epoch": 1.9395543119005616, "grad_norm": 0.7044717669487, "learning_rate": 6.108855633233546e-06, "loss": 0.7022029757499695, "step": 1495 }, { "epoch": 1.9408520388507005, "grad_norm": 0.6811977624893188, "learning_rate": 6.0956870991866545e-06, "loss": 0.6920107007026672, "step": 1496 }, { "epoch": 1.9421497658008393, "grad_norm": 0.6873610019683838, "learning_rate": 6.0825265484714526e-06, "loss": 0.6889206767082214, "step": 1497 }, { "epoch": 1.9434474927509784, "grad_norm": 0.7255538702011108, "learning_rate": 6.0693740079979235e-06, "loss": 0.763762891292572, "step": 1498 }, { "epoch": 1.9447452197011172, "grad_norm": 0.6617857217788696, "learning_rate": 6.056229504659696e-06, "loss": 0.65453040599823, "step": 1499 }, { "epoch": 1.9460429466512563, "grad_norm": 0.7204879522323608, "learning_rate": 6.043093065333945e-06, "loss": 0.6839476823806763, "step": 1500 }, { "epoch": 1.9473406736013952, "grad_norm": 0.695447564125061, "learning_rate": 6.029964716881367e-06, "loss": 0.6658032536506653, "step": 1501 }, { "epoch": 1.948638400551534, "grad_norm": 0.6816181540489197, "learning_rate": 6.016844486146106e-06, "loss": 0.7248274087905884, "step": 1502 }, { "epoch": 1.9499361275016729, "grad_norm": 0.7379606366157532, "learning_rate": 6.003732399955722e-06, "loss": 0.6768795251846313, "step": 1503 }, { "epoch": 1.9512338544518117, "grad_norm": 0.6998269557952881, "learning_rate": 5.990628485121106e-06, "loss": 0.6504592895507812, "step": 1504 }, { "epoch": 1.9525315814019506, "grad_norm": 0.7351219654083252, "learning_rate": 5.97753276843645e-06, "loss": 0.7741858959197998, "step": 1505 }, { "epoch": 1.9538293083520895, "grad_norm": 0.6803948283195496, "learning_rate": 5.964445276679176e-06, "loss": 0.6615405678749084, "step": 1506 }, { "epoch": 1.9551270353022283, "grad_norm": 0.7318346500396729, "learning_rate": 5.9513660366099005e-06, "loss": 0.7087497115135193, "step": 1507 }, { "epoch": 1.9564247622523674, "grad_norm": 0.7118584513664246, "learning_rate": 5.93829507497235e-06, "loss": 0.647581934928894, "step": 1508 }, { "epoch": 1.9577224892025062, "grad_norm": 0.7135505080223083, "learning_rate": 5.925232418493338e-06, "loss": 0.7108398079872131, "step": 1509 }, { "epoch": 1.959020216152645, "grad_norm": 0.6982471346855164, "learning_rate": 5.912178093882688e-06, "loss": 0.7022315859794617, "step": 1510 }, { "epoch": 1.9603179431027842, "grad_norm": 0.7076136469841003, "learning_rate": 5.8991321278331934e-06, "loss": 0.6406600475311279, "step": 1511 }, { "epoch": 1.961615670052923, "grad_norm": 0.7392069101333618, "learning_rate": 5.8860945470205466e-06, "loss": 0.7887027859687805, "step": 1512 }, { "epoch": 1.9629133970030619, "grad_norm": 0.7483602166175842, "learning_rate": 5.8730653781033085e-06, "loss": 0.7219119668006897, "step": 1513 }, { "epoch": 1.9642111239532007, "grad_norm": 0.7024926543235779, "learning_rate": 5.860044647722827e-06, "loss": 0.7041683793067932, "step": 1514 }, { "epoch": 1.9655088509033396, "grad_norm": 0.6939775347709656, "learning_rate": 5.847032382503202e-06, "loss": 0.6798254251480103, "step": 1515 }, { "epoch": 1.9668065778534785, "grad_norm": 0.700524628162384, "learning_rate": 5.834028609051218e-06, "loss": 0.731053352355957, "step": 1516 }, { "epoch": 1.9681043048036173, "grad_norm": 0.7189422845840454, "learning_rate": 5.8210333539563e-06, "loss": 0.6871148347854614, "step": 1517 }, { "epoch": 1.9694020317537562, "grad_norm": 0.7418919205665588, "learning_rate": 5.808046643790468e-06, "loss": 0.7469598054885864, "step": 1518 }, { "epoch": 1.9706997587038952, "grad_norm": 0.6783238649368286, "learning_rate": 5.795068505108243e-06, "loss": 0.6897709369659424, "step": 1519 }, { "epoch": 1.971997485654034, "grad_norm": 0.7016989588737488, "learning_rate": 5.782098964446641e-06, "loss": 0.6978930830955505, "step": 1520 }, { "epoch": 1.973295212604173, "grad_norm": 0.6924634575843811, "learning_rate": 5.769138048325087e-06, "loss": 0.6557913422584534, "step": 1521 }, { "epoch": 1.974592939554312, "grad_norm": 0.6980036497116089, "learning_rate": 5.756185783245376e-06, "loss": 0.6883926391601562, "step": 1522 }, { "epoch": 1.9758906665044509, "grad_norm": 0.6666119694709778, "learning_rate": 5.743242195691612e-06, "loss": 0.696445107460022, "step": 1523 }, { "epoch": 1.9771883934545897, "grad_norm": 0.7082392573356628, "learning_rate": 5.730307312130152e-06, "loss": 0.7830109596252441, "step": 1524 }, { "epoch": 1.9784861204047286, "grad_norm": 0.7415315508842468, "learning_rate": 5.717381159009563e-06, "loss": 0.6982215642929077, "step": 1525 }, { "epoch": 1.9797838473548675, "grad_norm": 0.7484350800514221, "learning_rate": 5.704463762760559e-06, "loss": 0.727252721786499, "step": 1526 }, { "epoch": 1.9810815743050063, "grad_norm": 0.6809999346733093, "learning_rate": 5.691555149795933e-06, "loss": 0.794657826423645, "step": 1527 }, { "epoch": 1.9823793012551452, "grad_norm": 0.7138223648071289, "learning_rate": 5.678655346510549e-06, "loss": 0.7287296056747437, "step": 1528 }, { "epoch": 1.983677028205284, "grad_norm": 0.6818944215774536, "learning_rate": 5.6657643792812265e-06, "loss": 0.6768350601196289, "step": 1529 }, { "epoch": 1.984974755155423, "grad_norm": 0.7276642918586731, "learning_rate": 5.652882274466736e-06, "loss": 0.7598171830177307, "step": 1530 }, { "epoch": 1.986272482105562, "grad_norm": 0.6802821159362793, "learning_rate": 5.640009058407719e-06, "loss": 0.682623028755188, "step": 1531 }, { "epoch": 1.987570209055701, "grad_norm": 0.7515146732330322, "learning_rate": 5.627144757426647e-06, "loss": 0.7861851453781128, "step": 1532 }, { "epoch": 1.9888679360058399, "grad_norm": 0.7353605628013611, "learning_rate": 5.614289397827757e-06, "loss": 0.7634737491607666, "step": 1533 }, { "epoch": 1.9901656629559787, "grad_norm": 0.7560073137283325, "learning_rate": 5.601443005897012e-06, "loss": 0.7616620659828186, "step": 1534 }, { "epoch": 1.9914633899061176, "grad_norm": 0.7289350628852844, "learning_rate": 5.588605607902017e-06, "loss": 0.7190179824829102, "step": 1535 }, { "epoch": 1.9927611168562565, "grad_norm": 0.7019691467285156, "learning_rate": 5.57577723009202e-06, "loss": 0.671945333480835, "step": 1536 }, { "epoch": 1.9940588438063953, "grad_norm": 0.6952185034751892, "learning_rate": 5.5629578986977894e-06, "loss": 0.7416089177131653, "step": 1537 }, { "epoch": 1.9953565707565342, "grad_norm": 0.7558557987213135, "learning_rate": 5.550147639931631e-06, "loss": 0.7460814714431763, "step": 1538 }, { "epoch": 1.996654297706673, "grad_norm": 0.6997542381286621, "learning_rate": 5.537346479987269e-06, "loss": 0.7162995338439941, "step": 1539 }, { "epoch": 1.997952024656812, "grad_norm": 0.7319507002830505, "learning_rate": 5.524554445039838e-06, "loss": 0.7580918669700623, "step": 1540 }, { "epoch": 1.999249751606951, "grad_norm": 0.7187158465385437, "learning_rate": 5.511771561245813e-06, "loss": 0.6829614043235779, "step": 1541 }, { "epoch": 2.0, "grad_norm": 0.8756005167961121, "learning_rate": 5.498997854742956e-06, "loss": 0.654055118560791, "step": 1542 }, { "epoch": 2.001297726950139, "grad_norm": 0.884756326675415, "learning_rate": 5.4862333516502634e-06, "loss": 0.6550735831260681, "step": 1543 }, { "epoch": 2.0025954539002777, "grad_norm": 0.8835470080375671, "learning_rate": 5.473478078067913e-06, "loss": 0.7326578497886658, "step": 1544 }, { "epoch": 2.0038931808504166, "grad_norm": 0.8778272867202759, "learning_rate": 5.460732060077212e-06, "loss": 0.6050289273262024, "step": 1545 }, { "epoch": 2.0051909078005554, "grad_norm": 0.7473064661026001, "learning_rate": 5.44799532374054e-06, "loss": 0.6881033182144165, "step": 1546 }, { "epoch": 2.0064886347506943, "grad_norm": 0.7708891034126282, "learning_rate": 5.435267895101303e-06, "loss": 0.6227023005485535, "step": 1547 }, { "epoch": 2.0077863617008336, "grad_norm": 0.7482177019119263, "learning_rate": 5.422549800183861e-06, "loss": 0.6618348360061646, "step": 1548 }, { "epoch": 2.0090840886509724, "grad_norm": 0.7345021963119507, "learning_rate": 5.409841064993512e-06, "loss": 0.6520942449569702, "step": 1549 }, { "epoch": 2.0103818156011113, "grad_norm": 0.7631828188896179, "learning_rate": 5.39714171551639e-06, "loss": 0.6233668923377991, "step": 1550 }, { "epoch": 2.01167954255125, "grad_norm": 0.813840925693512, "learning_rate": 5.384451777719464e-06, "loss": 0.7311254739761353, "step": 1551 }, { "epoch": 2.012977269501389, "grad_norm": 0.8413859009742737, "learning_rate": 5.371771277550432e-06, "loss": 0.7018522024154663, "step": 1552 }, { "epoch": 2.014274996451528, "grad_norm": 0.7750846147537231, "learning_rate": 5.359100240937717e-06, "loss": 0.6850703954696655, "step": 1553 }, { "epoch": 2.0155727234016667, "grad_norm": 0.7778939604759216, "learning_rate": 5.3464386937903764e-06, "loss": 0.6811778545379639, "step": 1554 }, { "epoch": 2.0168704503518056, "grad_norm": 0.7875815033912659, "learning_rate": 5.33378666199807e-06, "loss": 0.6062582731246948, "step": 1555 }, { "epoch": 2.0181681773019444, "grad_norm": 0.8213943839073181, "learning_rate": 5.321144171431003e-06, "loss": 0.6217991709709167, "step": 1556 }, { "epoch": 2.0194659042520833, "grad_norm": 0.8762441873550415, "learning_rate": 5.308511247939872e-06, "loss": 0.6675798296928406, "step": 1557 }, { "epoch": 2.0207636312022226, "grad_norm": 0.7665208578109741, "learning_rate": 5.295887917355794e-06, "loss": 0.6503481268882751, "step": 1558 }, { "epoch": 2.0220613581523614, "grad_norm": 0.7740142941474915, "learning_rate": 5.283274205490303e-06, "loss": 0.6113878488540649, "step": 1559 }, { "epoch": 2.0233590851025003, "grad_norm": 0.7948552966117859, "learning_rate": 5.270670138135234e-06, "loss": 0.7041577100753784, "step": 1560 }, { "epoch": 2.024656812052639, "grad_norm": 0.732266366481781, "learning_rate": 5.25807574106272e-06, "loss": 0.683874785900116, "step": 1561 }, { "epoch": 2.025954539002778, "grad_norm": 0.7335087060928345, "learning_rate": 5.245491040025115e-06, "loss": 0.6318987011909485, "step": 1562 }, { "epoch": 2.027252265952917, "grad_norm": 0.7172908186912537, "learning_rate": 5.232916060754947e-06, "loss": 0.6631210446357727, "step": 1563 }, { "epoch": 2.0285499929030557, "grad_norm": 0.7232309579849243, "learning_rate": 5.220350828964865e-06, "loss": 0.6236647367477417, "step": 1564 }, { "epoch": 2.0298477198531946, "grad_norm": 0.727989137172699, "learning_rate": 5.207795370347588e-06, "loss": 0.6853646039962769, "step": 1565 }, { "epoch": 2.0311454468033334, "grad_norm": 0.7468066215515137, "learning_rate": 5.195249710575853e-06, "loss": 0.6544186472892761, "step": 1566 }, { "epoch": 2.0324431737534723, "grad_norm": 0.7399063110351562, "learning_rate": 5.182713875302361e-06, "loss": 0.6106476783752441, "step": 1567 }, { "epoch": 2.033740900703611, "grad_norm": 0.7420501708984375, "learning_rate": 5.1701878901597106e-06, "loss": 0.715307891368866, "step": 1568 }, { "epoch": 2.0350386276537504, "grad_norm": 0.7202077507972717, "learning_rate": 5.157671780760385e-06, "loss": 0.6406188607215881, "step": 1569 }, { "epoch": 2.0363363546038893, "grad_norm": 0.7133172154426575, "learning_rate": 5.145165572696652e-06, "loss": 0.6294587850570679, "step": 1570 }, { "epoch": 2.037634081554028, "grad_norm": 0.7211350798606873, "learning_rate": 5.132669291540544e-06, "loss": 0.6074943542480469, "step": 1571 }, { "epoch": 2.038931808504167, "grad_norm": 0.7271124124526978, "learning_rate": 5.1201829628437926e-06, "loss": 0.6158304214477539, "step": 1572 }, { "epoch": 2.040229535454306, "grad_norm": 0.7051241397857666, "learning_rate": 5.107706612137776e-06, "loss": 0.6632368564605713, "step": 1573 }, { "epoch": 2.0415272624044447, "grad_norm": 0.7206335067749023, "learning_rate": 5.095240264933486e-06, "loss": 0.6133254766464233, "step": 1574 }, { "epoch": 2.0428249893545836, "grad_norm": 0.7106805443763733, "learning_rate": 5.082783946721434e-06, "loss": 0.629423201084137, "step": 1575 }, { "epoch": 2.0441227163047224, "grad_norm": 0.7104700207710266, "learning_rate": 5.070337682971642e-06, "loss": 0.6985434293746948, "step": 1576 }, { "epoch": 2.0454204432548613, "grad_norm": 0.6845932006835938, "learning_rate": 5.057901499133573e-06, "loss": 0.6254795789718628, "step": 1577 }, { "epoch": 2.046718170205, "grad_norm": 0.7214529514312744, "learning_rate": 5.0454754206360705e-06, "loss": 0.6072602868080139, "step": 1578 }, { "epoch": 2.048015897155139, "grad_norm": 0.71996009349823, "learning_rate": 5.033059472887322e-06, "loss": 0.6534575819969177, "step": 1579 }, { "epoch": 2.0493136241052783, "grad_norm": 0.7217608690261841, "learning_rate": 5.0206536812748004e-06, "loss": 0.6317112445831299, "step": 1580 }, { "epoch": 2.050611351055417, "grad_norm": 0.7069404125213623, "learning_rate": 5.008258071165202e-06, "loss": 0.6474272608757019, "step": 1581 }, { "epoch": 2.051909078005556, "grad_norm": 0.7298946976661682, "learning_rate": 4.995872667904424e-06, "loss": 0.6893925666809082, "step": 1582 }, { "epoch": 2.053206804955695, "grad_norm": 0.750266432762146, "learning_rate": 4.98349749681747e-06, "loss": 0.6087015271186829, "step": 1583 }, { "epoch": 2.0545045319058337, "grad_norm": 0.7133123278617859, "learning_rate": 4.971132583208438e-06, "loss": 0.624868631362915, "step": 1584 }, { "epoch": 2.0558022588559726, "grad_norm": 0.7388240694999695, "learning_rate": 4.958777952360445e-06, "loss": 0.6425670981407166, "step": 1585 }, { "epoch": 2.0570999858061114, "grad_norm": 0.7531347870826721, "learning_rate": 4.946433629535585e-06, "loss": 0.6272885799407959, "step": 1586 }, { "epoch": 2.0583977127562503, "grad_norm": 0.7500084042549133, "learning_rate": 4.934099639974874e-06, "loss": 0.6620087027549744, "step": 1587 }, { "epoch": 2.059695439706389, "grad_norm": 0.708791196346283, "learning_rate": 4.921776008898198e-06, "loss": 0.5606707334518433, "step": 1588 }, { "epoch": 2.060993166656528, "grad_norm": 0.7260934114456177, "learning_rate": 4.909462761504264e-06, "loss": 0.67381352186203, "step": 1589 }, { "epoch": 2.0622908936066673, "grad_norm": 0.6928997039794922, "learning_rate": 4.897159922970551e-06, "loss": 0.6307032704353333, "step": 1590 }, { "epoch": 2.063588620556806, "grad_norm": 0.7362192869186401, "learning_rate": 4.884867518453238e-06, "loss": 0.6901969313621521, "step": 1591 }, { "epoch": 2.064886347506945, "grad_norm": 0.722802460193634, "learning_rate": 4.872585573087195e-06, "loss": 0.7266512513160706, "step": 1592 }, { "epoch": 2.066184074457084, "grad_norm": 0.7570728659629822, "learning_rate": 4.860314111985881e-06, "loss": 0.7014977335929871, "step": 1593 }, { "epoch": 2.0674818014072227, "grad_norm": 0.7065424919128418, "learning_rate": 4.848053160241333e-06, "loss": 0.623349130153656, "step": 1594 }, { "epoch": 2.0687795283573616, "grad_norm": 0.7208600044250488, "learning_rate": 4.835802742924091e-06, "loss": 0.6265473961830139, "step": 1595 }, { "epoch": 2.0700772553075004, "grad_norm": 0.7267877459526062, "learning_rate": 4.823562885083161e-06, "loss": 0.6631119251251221, "step": 1596 }, { "epoch": 2.0713749822576393, "grad_norm": 0.7265859842300415, "learning_rate": 4.811333611745953e-06, "loss": 0.655154824256897, "step": 1597 }, { "epoch": 2.072672709207778, "grad_norm": 0.7422747015953064, "learning_rate": 4.799114947918238e-06, "loss": 0.6400114297866821, "step": 1598 }, { "epoch": 2.073970436157917, "grad_norm": 0.7720977663993835, "learning_rate": 4.786906918584083e-06, "loss": 0.6592541337013245, "step": 1599 }, { "epoch": 2.075268163108056, "grad_norm": 0.741809606552124, "learning_rate": 4.774709548705831e-06, "loss": 0.6636130213737488, "step": 1600 }, { "epoch": 2.076565890058195, "grad_norm": 0.7521026730537415, "learning_rate": 4.762522863224001e-06, "loss": 0.6645440459251404, "step": 1601 }, { "epoch": 2.077863617008334, "grad_norm": 0.735471248626709, "learning_rate": 4.750346887057292e-06, "loss": 0.6191429495811462, "step": 1602 }, { "epoch": 2.079161343958473, "grad_norm": 0.7346929907798767, "learning_rate": 4.738181645102493e-06, "loss": 0.616767406463623, "step": 1603 }, { "epoch": 2.0804590709086117, "grad_norm": 0.7322461605072021, "learning_rate": 4.726027162234434e-06, "loss": 0.6997534036636353, "step": 1604 }, { "epoch": 2.0817567978587506, "grad_norm": 0.7436448335647583, "learning_rate": 4.713883463305972e-06, "loss": 0.6780825853347778, "step": 1605 }, { "epoch": 2.0830545248088894, "grad_norm": 0.7452847361564636, "learning_rate": 4.701750573147885e-06, "loss": 0.6652136445045471, "step": 1606 }, { "epoch": 2.0843522517590283, "grad_norm": 0.7359369993209839, "learning_rate": 4.689628516568866e-06, "loss": 0.676584780216217, "step": 1607 }, { "epoch": 2.085649978709167, "grad_norm": 0.7257094979286194, "learning_rate": 4.677517318355455e-06, "loss": 0.6461347937583923, "step": 1608 }, { "epoch": 2.086947705659306, "grad_norm": 0.7261176705360413, "learning_rate": 4.6654170032719825e-06, "loss": 0.6190035343170166, "step": 1609 }, { "epoch": 2.088245432609445, "grad_norm": 0.7273695468902588, "learning_rate": 4.6533275960605355e-06, "loss": 0.6539610028266907, "step": 1610 }, { "epoch": 2.088245432609445, "eval_loss": 0.7521457076072693, "eval_runtime": 140.3222, "eval_samples_per_second": 37.001, "eval_steps_per_second": 9.25, "step": 1610 }, { "epoch": 2.089543159559584, "grad_norm": 0.7120246291160583, "learning_rate": 4.641249121440892e-06, "loss": 0.6520042419433594, "step": 1611 }, { "epoch": 2.090840886509723, "grad_norm": 0.7543119788169861, "learning_rate": 4.629181604110464e-06, "loss": 0.6681778430938721, "step": 1612 }, { "epoch": 2.092138613459862, "grad_norm": 0.7003790736198425, "learning_rate": 4.617125068744288e-06, "loss": 0.5710310935974121, "step": 1613 }, { "epoch": 2.0934363404100007, "grad_norm": 0.7836804986000061, "learning_rate": 4.605079539994911e-06, "loss": 0.686365008354187, "step": 1614 }, { "epoch": 2.0947340673601396, "grad_norm": 0.7372239828109741, "learning_rate": 4.593045042492404e-06, "loss": 0.684090256690979, "step": 1615 }, { "epoch": 2.0960317943102784, "grad_norm": 0.7506935000419617, "learning_rate": 4.581021600844258e-06, "loss": 0.6425600647926331, "step": 1616 }, { "epoch": 2.0973295212604173, "grad_norm": 0.7384741306304932, "learning_rate": 4.569009239635374e-06, "loss": 0.675249457359314, "step": 1617 }, { "epoch": 2.098627248210556, "grad_norm": 0.7220048308372498, "learning_rate": 4.557007983427987e-06, "loss": 0.6857472658157349, "step": 1618 }, { "epoch": 2.099924975160695, "grad_norm": 0.7698497772216797, "learning_rate": 4.54501785676163e-06, "loss": 0.6067232489585876, "step": 1619 }, { "epoch": 2.101222702110834, "grad_norm": 0.7213151454925537, "learning_rate": 4.533038884153077e-06, "loss": 0.7489792704582214, "step": 1620 }, { "epoch": 2.1025204290609727, "grad_norm": 0.7353917956352234, "learning_rate": 4.521071090096298e-06, "loss": 0.6004921793937683, "step": 1621 }, { "epoch": 2.103818156011112, "grad_norm": 0.712821364402771, "learning_rate": 4.509114499062393e-06, "loss": 0.632519006729126, "step": 1622 }, { "epoch": 2.105115882961251, "grad_norm": 0.7335408926010132, "learning_rate": 4.4971691354995795e-06, "loss": 0.6487690210342407, "step": 1623 }, { "epoch": 2.1064136099113897, "grad_norm": 0.7657801508903503, "learning_rate": 4.485235023833087e-06, "loss": 0.7272740602493286, "step": 1624 }, { "epoch": 2.1077113368615286, "grad_norm": 0.7787186503410339, "learning_rate": 4.4733121884651665e-06, "loss": 0.6530774235725403, "step": 1625 }, { "epoch": 2.1090090638116674, "grad_norm": 0.7693159580230713, "learning_rate": 4.46140065377499e-06, "loss": 0.6131106019020081, "step": 1626 }, { "epoch": 2.1103067907618063, "grad_norm": 0.7225230932235718, "learning_rate": 4.449500444118633e-06, "loss": 0.6403114199638367, "step": 1627 }, { "epoch": 2.111604517711945, "grad_norm": 0.7100993990898132, "learning_rate": 4.437611583829014e-06, "loss": 0.6448891162872314, "step": 1628 }, { "epoch": 2.112902244662084, "grad_norm": 0.6913020610809326, "learning_rate": 4.42573409721584e-06, "loss": 0.6105331778526306, "step": 1629 }, { "epoch": 2.114199971612223, "grad_norm": 0.7184289693832397, "learning_rate": 4.413868008565569e-06, "loss": 0.6300491690635681, "step": 1630 }, { "epoch": 2.1154976985623617, "grad_norm": 0.7327896356582642, "learning_rate": 4.402013342141347e-06, "loss": 0.5891982316970825, "step": 1631 }, { "epoch": 2.1167954255125006, "grad_norm": 0.7524354457855225, "learning_rate": 4.390170122182965e-06, "loss": 0.6236910820007324, "step": 1632 }, { "epoch": 2.11809315246264, "grad_norm": 0.69328373670578, "learning_rate": 4.378338372906813e-06, "loss": 0.6320694088935852, "step": 1633 }, { "epoch": 2.1193908794127787, "grad_norm": 0.7765412926673889, "learning_rate": 4.3665181185058255e-06, "loss": 0.6867218613624573, "step": 1634 }, { "epoch": 2.1206886063629176, "grad_norm": 0.7132006883621216, "learning_rate": 4.354709383149421e-06, "loss": 0.6264625787734985, "step": 1635 }, { "epoch": 2.1219863333130564, "grad_norm": 0.7659435272216797, "learning_rate": 4.342912190983487e-06, "loss": 0.7046580910682678, "step": 1636 }, { "epoch": 2.1232840602631953, "grad_norm": 0.7297986149787903, "learning_rate": 4.331126566130284e-06, "loss": 0.7077990174293518, "step": 1637 }, { "epoch": 2.124581787213334, "grad_norm": 0.7537614107131958, "learning_rate": 4.319352532688444e-06, "loss": 0.652155876159668, "step": 1638 }, { "epoch": 2.125879514163473, "grad_norm": 0.7315341234207153, "learning_rate": 4.3075901147328745e-06, "loss": 0.6733738780021667, "step": 1639 }, { "epoch": 2.127177241113612, "grad_norm": 0.7361832857131958, "learning_rate": 4.295839336314749e-06, "loss": 0.635147750377655, "step": 1640 }, { "epoch": 2.1284749680637507, "grad_norm": 0.7507902383804321, "learning_rate": 4.284100221461432e-06, "loss": 0.6047714948654175, "step": 1641 }, { "epoch": 2.1297726950138895, "grad_norm": 0.7528434991836548, "learning_rate": 4.272372794176446e-06, "loss": 0.7513724565505981, "step": 1642 }, { "epoch": 2.1310704219640284, "grad_norm": 0.7637490034103394, "learning_rate": 4.260657078439409e-06, "loss": 0.67987060546875, "step": 1643 }, { "epoch": 2.1323681489141677, "grad_norm": 0.7283375859260559, "learning_rate": 4.248953098205997e-06, "loss": 0.6341656446456909, "step": 1644 }, { "epoch": 2.1336658758643066, "grad_norm": 0.7419525980949402, "learning_rate": 4.237260877407878e-06, "loss": 0.6832218766212463, "step": 1645 }, { "epoch": 2.1349636028144454, "grad_norm": 0.7223761081695557, "learning_rate": 4.225580439952699e-06, "loss": 0.6866045594215393, "step": 1646 }, { "epoch": 2.1362613297645843, "grad_norm": 0.7388637065887451, "learning_rate": 4.213911809723987e-06, "loss": 0.6384668350219727, "step": 1647 }, { "epoch": 2.137559056714723, "grad_norm": 0.755170464515686, "learning_rate": 4.20225501058114e-06, "loss": 0.6708781123161316, "step": 1648 }, { "epoch": 2.138856783664862, "grad_norm": 0.7287908792495728, "learning_rate": 4.190610066359364e-06, "loss": 0.6631587743759155, "step": 1649 }, { "epoch": 2.140154510615001, "grad_norm": 0.7358418107032776, "learning_rate": 4.1789770008696205e-06, "loss": 0.6789165735244751, "step": 1650 }, { "epoch": 2.1414522375651397, "grad_norm": 0.7651984691619873, "learning_rate": 4.167355837898585e-06, "loss": 0.7314514517784119, "step": 1651 }, { "epoch": 2.1427499645152785, "grad_norm": 0.7463676333427429, "learning_rate": 4.155746601208594e-06, "loss": 0.6692876219749451, "step": 1652 }, { "epoch": 2.1440476914654174, "grad_norm": 0.7222311496734619, "learning_rate": 4.144149314537599e-06, "loss": 0.6298620104789734, "step": 1653 }, { "epoch": 2.1453454184155567, "grad_norm": 0.6989638805389404, "learning_rate": 4.1325640015991185e-06, "loss": 0.6444326043128967, "step": 1654 }, { "epoch": 2.1466431453656956, "grad_norm": 0.7494760155677795, "learning_rate": 4.120990686082174e-06, "loss": 0.6625097990036011, "step": 1655 }, { "epoch": 2.1479408723158344, "grad_norm": 0.7078225016593933, "learning_rate": 4.109429391651283e-06, "loss": 0.5881250500679016, "step": 1656 }, { "epoch": 2.1492385992659733, "grad_norm": 0.767970621585846, "learning_rate": 4.097880141946354e-06, "loss": 0.6296786665916443, "step": 1657 }, { "epoch": 2.150536326216112, "grad_norm": 0.7743704319000244, "learning_rate": 4.08634296058268e-06, "loss": 0.6085373759269714, "step": 1658 }, { "epoch": 2.151834053166251, "grad_norm": 0.7132009267807007, "learning_rate": 4.074817871150887e-06, "loss": 0.6695290803909302, "step": 1659 }, { "epoch": 2.15313178011639, "grad_norm": 0.7174614667892456, "learning_rate": 4.063304897216856e-06, "loss": 0.6345046758651733, "step": 1660 }, { "epoch": 2.1544295070665287, "grad_norm": 0.756147027015686, "learning_rate": 4.051804062321706e-06, "loss": 0.6537505388259888, "step": 1661 }, { "epoch": 2.1557272340166675, "grad_norm": 0.7213236093521118, "learning_rate": 4.040315389981736e-06, "loss": 0.702519953250885, "step": 1662 }, { "epoch": 2.1570249609668064, "grad_norm": 0.7155364751815796, "learning_rate": 4.028838903688372e-06, "loss": 0.681422770023346, "step": 1663 }, { "epoch": 2.1583226879169457, "grad_norm": 0.7463889122009277, "learning_rate": 4.017374626908125e-06, "loss": 0.6635671854019165, "step": 1664 }, { "epoch": 2.1596204148670846, "grad_norm": 0.7302799820899963, "learning_rate": 4.005922583082538e-06, "loss": 0.6605507731437683, "step": 1665 }, { "epoch": 2.1609181418172234, "grad_norm": 0.7709221243858337, "learning_rate": 3.994482795628142e-06, "loss": 0.6744245290756226, "step": 1666 }, { "epoch": 2.1622158687673623, "grad_norm": 0.7545700669288635, "learning_rate": 3.983055287936411e-06, "loss": 0.7104499340057373, "step": 1667 }, { "epoch": 2.163513595717501, "grad_norm": 0.7296931743621826, "learning_rate": 3.971640083373696e-06, "loss": 0.6586728096008301, "step": 1668 }, { "epoch": 2.16481132266764, "grad_norm": 0.7653056383132935, "learning_rate": 3.960237205281213e-06, "loss": 0.6596845388412476, "step": 1669 }, { "epoch": 2.166109049617779, "grad_norm": 0.740091860294342, "learning_rate": 3.948846676974953e-06, "loss": 0.6983301043510437, "step": 1670 }, { "epoch": 2.1674067765679177, "grad_norm": 0.7317189574241638, "learning_rate": 3.937468521745666e-06, "loss": 0.6039131879806519, "step": 1671 }, { "epoch": 2.1687045035180565, "grad_norm": 0.7543178200721741, "learning_rate": 3.9261027628588e-06, "loss": 0.7082279324531555, "step": 1672 }, { "epoch": 2.1700022304681954, "grad_norm": 0.7396308779716492, "learning_rate": 3.9147494235544544e-06, "loss": 0.6432596445083618, "step": 1673 }, { "epoch": 2.1712999574183343, "grad_norm": 0.7311068177223206, "learning_rate": 3.903408527047336e-06, "loss": 0.6383781433105469, "step": 1674 }, { "epoch": 2.1725976843684736, "grad_norm": 0.7544176578521729, "learning_rate": 3.892080096526707e-06, "loss": 0.6584154367446899, "step": 1675 }, { "epoch": 2.1738954113186124, "grad_norm": 0.7279508113861084, "learning_rate": 3.880764155156339e-06, "loss": 0.6078423261642456, "step": 1676 }, { "epoch": 2.1751931382687513, "grad_norm": 0.7655706405639648, "learning_rate": 3.8694607260744745e-06, "loss": 0.716061532497406, "step": 1677 }, { "epoch": 2.17649086521889, "grad_norm": 0.7374406456947327, "learning_rate": 3.858169832393752e-06, "loss": 0.6383547782897949, "step": 1678 }, { "epoch": 2.177788592169029, "grad_norm": 0.7599214911460876, "learning_rate": 3.846891497201206e-06, "loss": 0.734661340713501, "step": 1679 }, { "epoch": 2.179086319119168, "grad_norm": 0.7564613819122314, "learning_rate": 3.835625743558168e-06, "loss": 0.6974920630455017, "step": 1680 }, { "epoch": 2.1803840460693067, "grad_norm": 0.7368860244750977, "learning_rate": 3.824372594500256e-06, "loss": 0.7153822183609009, "step": 1681 }, { "epoch": 2.1816817730194455, "grad_norm": 0.7436947226524353, "learning_rate": 3.813132073037309e-06, "loss": 0.6690018773078918, "step": 1682 }, { "epoch": 2.1829794999695844, "grad_norm": 0.7441128492355347, "learning_rate": 3.8019042021533513e-06, "loss": 0.6398620009422302, "step": 1683 }, { "epoch": 2.1842772269197233, "grad_norm": 0.7101579308509827, "learning_rate": 3.7906890048065358e-06, "loss": 0.6713053584098816, "step": 1684 }, { "epoch": 2.185574953869862, "grad_norm": 0.7423803210258484, "learning_rate": 3.779486503929106e-06, "loss": 0.6554515957832336, "step": 1685 }, { "epoch": 2.1868726808200014, "grad_norm": 0.7913647890090942, "learning_rate": 3.7682967224273317e-06, "loss": 0.6829732656478882, "step": 1686 }, { "epoch": 2.1881704077701403, "grad_norm": 0.7406657338142395, "learning_rate": 3.757119683181493e-06, "loss": 0.6207722425460815, "step": 1687 }, { "epoch": 2.189468134720279, "grad_norm": 0.755535900592804, "learning_rate": 3.7459554090458018e-06, "loss": 0.5663500428199768, "step": 1688 }, { "epoch": 2.190765861670418, "grad_norm": 0.736067533493042, "learning_rate": 3.7348039228483758e-06, "loss": 0.6010056734085083, "step": 1689 }, { "epoch": 2.192063588620557, "grad_norm": 0.7262256741523743, "learning_rate": 3.7236652473911817e-06, "loss": 0.6251591444015503, "step": 1690 }, { "epoch": 2.1933613155706957, "grad_norm": 0.7204144597053528, "learning_rate": 3.7125394054499843e-06, "loss": 0.6580095887184143, "step": 1691 }, { "epoch": 2.1946590425208345, "grad_norm": 0.7472013235092163, "learning_rate": 3.7014264197743267e-06, "loss": 0.6532347798347473, "step": 1692 }, { "epoch": 2.1959567694709734, "grad_norm": 0.7987051010131836, "learning_rate": 3.6903263130874423e-06, "loss": 0.7221670746803284, "step": 1693 }, { "epoch": 2.1972544964211123, "grad_norm": 0.6925249695777893, "learning_rate": 3.679239108086241e-06, "loss": 0.6809045672416687, "step": 1694 }, { "epoch": 2.198552223371251, "grad_norm": 0.7587743997573853, "learning_rate": 3.668164827441254e-06, "loss": 0.6878798007965088, "step": 1695 }, { "epoch": 2.19984995032139, "grad_norm": 0.7842516899108887, "learning_rate": 3.657103493796581e-06, "loss": 0.6502532958984375, "step": 1696 }, { "epoch": 2.2011476772715293, "grad_norm": 0.7169952392578125, "learning_rate": 3.6460551297698486e-06, "loss": 0.6481271386146545, "step": 1697 }, { "epoch": 2.202445404221668, "grad_norm": 0.7124336957931519, "learning_rate": 3.6350197579521696e-06, "loss": 0.6550193428993225, "step": 1698 }, { "epoch": 2.203743131171807, "grad_norm": 0.7990091443061829, "learning_rate": 3.6239974009080746e-06, "loss": 0.6425266265869141, "step": 1699 }, { "epoch": 2.205040858121946, "grad_norm": 0.7323048114776611, "learning_rate": 3.6129880811755093e-06, "loss": 0.6682150959968567, "step": 1700 }, { "epoch": 2.2063385850720847, "grad_norm": 0.7515720129013062, "learning_rate": 3.601991821265731e-06, "loss": 0.6324195265769958, "step": 1701 }, { "epoch": 2.2076363120222235, "grad_norm": 0.7524798512458801, "learning_rate": 3.591008643663323e-06, "loss": 0.6398360729217529, "step": 1702 }, { "epoch": 2.2089340389723624, "grad_norm": 0.7255743741989136, "learning_rate": 3.580038570826093e-06, "loss": 0.6324408650398254, "step": 1703 }, { "epoch": 2.2102317659225013, "grad_norm": 0.7248579263687134, "learning_rate": 3.5690816251850657e-06, "loss": 0.6215530037879944, "step": 1704 }, { "epoch": 2.21152949287264, "grad_norm": 0.7395302057266235, "learning_rate": 3.5581378291444223e-06, "loss": 0.6551209092140198, "step": 1705 }, { "epoch": 2.212827219822779, "grad_norm": 0.7041357755661011, "learning_rate": 3.5472072050814565e-06, "loss": 0.5609908103942871, "step": 1706 }, { "epoch": 2.2141249467729183, "grad_norm": 0.7290234565734863, "learning_rate": 3.5362897753465265e-06, "loss": 0.6203784346580505, "step": 1707 }, { "epoch": 2.215422673723057, "grad_norm": 0.7435030341148376, "learning_rate": 3.5253855622630174e-06, "loss": 0.6926784515380859, "step": 1708 }, { "epoch": 2.216720400673196, "grad_norm": 0.8078302145004272, "learning_rate": 3.514494588127275e-06, "loss": 0.7228481769561768, "step": 1709 }, { "epoch": 2.218018127623335, "grad_norm": 0.7225632667541504, "learning_rate": 3.5036168752085977e-06, "loss": 0.6265015006065369, "step": 1710 }, { "epoch": 2.2193158545734737, "grad_norm": 0.7306722402572632, "learning_rate": 3.4927524457491456e-06, "loss": 0.6289119720458984, "step": 1711 }, { "epoch": 2.2206135815236125, "grad_norm": 0.7898452281951904, "learning_rate": 3.4819013219639295e-06, "loss": 0.597404420375824, "step": 1712 }, { "epoch": 2.2219113084737514, "grad_norm": 0.6890703439712524, "learning_rate": 3.471063526040752e-06, "loss": 0.6129499673843384, "step": 1713 }, { "epoch": 2.2232090354238903, "grad_norm": 0.710536777973175, "learning_rate": 3.460239080140163e-06, "loss": 0.5661106109619141, "step": 1714 }, { "epoch": 2.224506762374029, "grad_norm": 0.7644726634025574, "learning_rate": 3.4494280063954146e-06, "loss": 0.6964048147201538, "step": 1715 }, { "epoch": 2.225804489324168, "grad_norm": 0.7347561120986938, "learning_rate": 3.4386303269124142e-06, "loss": 0.6240056157112122, "step": 1716 }, { "epoch": 2.2271022162743073, "grad_norm": 0.7397733330726624, "learning_rate": 3.4278460637696865e-06, "loss": 0.6740396022796631, "step": 1717 }, { "epoch": 2.228399943224446, "grad_norm": 0.7311684489250183, "learning_rate": 3.4170752390183183e-06, "loss": 0.666801929473877, "step": 1718 }, { "epoch": 2.229697670174585, "grad_norm": 0.7383760213851929, "learning_rate": 3.4063178746819193e-06, "loss": 0.6334900259971619, "step": 1719 }, { "epoch": 2.230995397124724, "grad_norm": 0.7332467436790466, "learning_rate": 3.395573992756579e-06, "loss": 0.6466909646987915, "step": 1720 }, { "epoch": 2.2322931240748627, "grad_norm": 0.7475365996360779, "learning_rate": 3.384843615210819e-06, "loss": 0.6753822565078735, "step": 1721 }, { "epoch": 2.2335908510250015, "grad_norm": 0.7616447806358337, "learning_rate": 3.3741267639855345e-06, "loss": 0.7791091203689575, "step": 1722 }, { "epoch": 2.2348885779751404, "grad_norm": 0.7229276299476624, "learning_rate": 3.3634234609939888e-06, "loss": 0.6403383016586304, "step": 1723 }, { "epoch": 2.2361863049252793, "grad_norm": 0.7077613472938538, "learning_rate": 3.352733728121712e-06, "loss": 0.6446459889411926, "step": 1724 }, { "epoch": 2.237484031875418, "grad_norm": 0.6968312859535217, "learning_rate": 3.3420575872265184e-06, "loss": 0.5743072032928467, "step": 1725 }, { "epoch": 2.238781758825557, "grad_norm": 0.7185531854629517, "learning_rate": 3.3313950601384016e-06, "loss": 0.6074244379997253, "step": 1726 }, { "epoch": 2.240079485775696, "grad_norm": 0.7392717599868774, "learning_rate": 3.320746168659534e-06, "loss": 0.7010684609413147, "step": 1727 }, { "epoch": 2.241377212725835, "grad_norm": 0.7549191117286682, "learning_rate": 3.3101109345642056e-06, "loss": 0.6260566115379333, "step": 1728 }, { "epoch": 2.242674939675974, "grad_norm": 0.7569594383239746, "learning_rate": 3.299489379598777e-06, "loss": 0.6684094667434692, "step": 1729 }, { "epoch": 2.243972666626113, "grad_norm": 0.7654653787612915, "learning_rate": 3.288881525481639e-06, "loss": 0.6516446471214294, "step": 1730 }, { "epoch": 2.2452703935762517, "grad_norm": 0.7150068879127502, "learning_rate": 3.278287393903172e-06, "loss": 0.6244807839393616, "step": 1731 }, { "epoch": 2.2465681205263905, "grad_norm": 0.7367082238197327, "learning_rate": 3.2677070065256855e-06, "loss": 0.6541182398796082, "step": 1732 }, { "epoch": 2.2478658474765294, "grad_norm": 0.7309427857398987, "learning_rate": 3.257140384983405e-06, "loss": 0.6608707308769226, "step": 1733 }, { "epoch": 2.2491635744266683, "grad_norm": 0.7438578009605408, "learning_rate": 3.2465875508823876e-06, "loss": 0.6337431073188782, "step": 1734 }, { "epoch": 2.250461301376807, "grad_norm": 0.7018159627914429, "learning_rate": 3.2360485258005115e-06, "loss": 0.614033043384552, "step": 1735 }, { "epoch": 2.251759028326946, "grad_norm": 0.7361255884170532, "learning_rate": 3.2255233312874155e-06, "loss": 0.6730838418006897, "step": 1736 }, { "epoch": 2.253056755277085, "grad_norm": 0.7623570561408997, "learning_rate": 3.2150119888644594e-06, "loss": 0.659545361995697, "step": 1737 }, { "epoch": 2.2543544822272237, "grad_norm": 0.6926621198654175, "learning_rate": 3.2045145200246763e-06, "loss": 0.5896809697151184, "step": 1738 }, { "epoch": 2.255652209177363, "grad_norm": 0.7644792795181274, "learning_rate": 3.1940309462327334e-06, "loss": 0.688497006893158, "step": 1739 }, { "epoch": 2.256949936127502, "grad_norm": 0.7479227185249329, "learning_rate": 3.1835612889248868e-06, "loss": 0.6612273454666138, "step": 1740 }, { "epoch": 2.2582476630776407, "grad_norm": 0.7315995693206787, "learning_rate": 3.1731055695089384e-06, "loss": 0.5924808382987976, "step": 1741 }, { "epoch": 2.2595453900277795, "grad_norm": 0.7356354594230652, "learning_rate": 3.162663809364178e-06, "loss": 0.6635130047798157, "step": 1742 }, { "epoch": 2.2608431169779184, "grad_norm": 0.7253445982933044, "learning_rate": 3.152236029841376e-06, "loss": 0.6303724646568298, "step": 1743 }, { "epoch": 2.2621408439280573, "grad_norm": 0.7351011037826538, "learning_rate": 3.1418222522626907e-06, "loss": 0.720777153968811, "step": 1744 }, { "epoch": 2.263438570878196, "grad_norm": 0.7059449553489685, "learning_rate": 3.1314224979216633e-06, "loss": 0.598090648651123, "step": 1745 }, { "epoch": 2.264736297828335, "grad_norm": 0.7039961218833923, "learning_rate": 3.1210367880831684e-06, "loss": 0.5808880925178528, "step": 1746 }, { "epoch": 2.266034024778474, "grad_norm": 0.7747211456298828, "learning_rate": 3.1106651439833434e-06, "loss": 0.6428390741348267, "step": 1747 }, { "epoch": 2.2673317517286127, "grad_norm": 0.7529793381690979, "learning_rate": 3.1003075868295794e-06, "loss": 0.6959705352783203, "step": 1748 }, { "epoch": 2.2686294786787515, "grad_norm": 0.7145947813987732, "learning_rate": 3.0899641378004596e-06, "loss": 0.6403526663780212, "step": 1749 }, { "epoch": 2.269927205628891, "grad_norm": 0.7092662453651428, "learning_rate": 3.079634818045719e-06, "loss": 0.5681431889533997, "step": 1750 }, { "epoch": 2.2712249325790297, "grad_norm": 0.7515605688095093, "learning_rate": 3.069319648686202e-06, "loss": 0.633612334728241, "step": 1751 }, { "epoch": 2.2725226595291685, "grad_norm": 0.7028906941413879, "learning_rate": 3.0590186508138186e-06, "loss": 0.6241360902786255, "step": 1752 }, { "epoch": 2.2738203864793074, "grad_norm": 0.7183363437652588, "learning_rate": 3.048731845491504e-06, "loss": 0.5909807085990906, "step": 1753 }, { "epoch": 2.2751181134294463, "grad_norm": 0.7331669926643372, "learning_rate": 3.038459253753172e-06, "loss": 0.6321236491203308, "step": 1754 }, { "epoch": 2.276415840379585, "grad_norm": 0.6997974514961243, "learning_rate": 3.0282008966036647e-06, "loss": 0.6245713829994202, "step": 1755 }, { "epoch": 2.277713567329724, "grad_norm": 0.7051255702972412, "learning_rate": 3.0179567950187396e-06, "loss": 0.6196664571762085, "step": 1756 }, { "epoch": 2.279011294279863, "grad_norm": 0.7281318306922913, "learning_rate": 3.0077269699449795e-06, "loss": 0.6078094840049744, "step": 1757 }, { "epoch": 2.2803090212300017, "grad_norm": 0.7404606938362122, "learning_rate": 2.9975114422997932e-06, "loss": 0.6296783685684204, "step": 1758 }, { "epoch": 2.2816067481801405, "grad_norm": 0.7832150459289551, "learning_rate": 2.9873102329713478e-06, "loss": 0.6518726348876953, "step": 1759 }, { "epoch": 2.2829044751302794, "grad_norm": 0.715710461139679, "learning_rate": 2.9771233628185346e-06, "loss": 0.5865130424499512, "step": 1760 }, { "epoch": 2.2842022020804187, "grad_norm": 0.7315993309020996, "learning_rate": 2.9669508526709256e-06, "loss": 0.7027003765106201, "step": 1761 }, { "epoch": 2.2854999290305575, "grad_norm": 0.7398679852485657, "learning_rate": 2.9567927233287307e-06, "loss": 0.6710663437843323, "step": 1762 }, { "epoch": 2.2867976559806964, "grad_norm": 0.7295849323272705, "learning_rate": 2.9466489955627452e-06, "loss": 0.7136781811714172, "step": 1763 }, { "epoch": 2.2880953829308353, "grad_norm": 0.7286946773529053, "learning_rate": 2.936519690114338e-06, "loss": 0.6223260760307312, "step": 1764 }, { "epoch": 2.289393109880974, "grad_norm": 0.7104554772377014, "learning_rate": 2.9264048276953606e-06, "loss": 0.6340541839599609, "step": 1765 }, { "epoch": 2.290690836831113, "grad_norm": 0.7115781903266907, "learning_rate": 2.9163044289881604e-06, "loss": 0.6645469069480896, "step": 1766 }, { "epoch": 2.291988563781252, "grad_norm": 0.733094334602356, "learning_rate": 2.906218514645487e-06, "loss": 0.6235517859458923, "step": 1767 }, { "epoch": 2.2932862907313907, "grad_norm": 0.7436304688453674, "learning_rate": 2.8961471052904855e-06, "loss": 0.66838139295578, "step": 1768 }, { "epoch": 2.2945840176815295, "grad_norm": 0.7022131681442261, "learning_rate": 2.8860902215166374e-06, "loss": 0.6098725199699402, "step": 1769 }, { "epoch": 2.295881744631669, "grad_norm": 0.725817859172821, "learning_rate": 2.876047883887727e-06, "loss": 0.7111449837684631, "step": 1770 }, { "epoch": 2.2971794715818072, "grad_norm": 0.7336429357528687, "learning_rate": 2.866020112937792e-06, "loss": 0.6535848379135132, "step": 1771 }, { "epoch": 2.2984771985319465, "grad_norm": 0.743033230304718, "learning_rate": 2.8560069291710857e-06, "loss": 0.6946330070495605, "step": 1772 }, { "epoch": 2.2997749254820854, "grad_norm": 0.7527621388435364, "learning_rate": 2.8460083530620342e-06, "loss": 0.67728191614151, "step": 1773 }, { "epoch": 2.3010726524322243, "grad_norm": 0.7036607265472412, "learning_rate": 2.8360244050551943e-06, "loss": 0.5508571267127991, "step": 1774 }, { "epoch": 2.302370379382363, "grad_norm": 0.698133647441864, "learning_rate": 2.8260551055652154e-06, "loss": 0.680967390537262, "step": 1775 }, { "epoch": 2.303668106332502, "grad_norm": 0.7584355473518372, "learning_rate": 2.8161004749767893e-06, "loss": 0.6776391863822937, "step": 1776 }, { "epoch": 2.304965833282641, "grad_norm": 0.7389799356460571, "learning_rate": 2.8061605336446194e-06, "loss": 0.6526666879653931, "step": 1777 }, { "epoch": 2.3062635602327797, "grad_norm": 0.7454041242599487, "learning_rate": 2.796235301893362e-06, "loss": 0.6357724666595459, "step": 1778 }, { "epoch": 2.3075612871829185, "grad_norm": 0.745415210723877, "learning_rate": 2.7863248000176146e-06, "loss": 0.6145803928375244, "step": 1779 }, { "epoch": 2.3088590141330574, "grad_norm": 0.7515760660171509, "learning_rate": 2.776429048281837e-06, "loss": 0.6784413456916809, "step": 1780 }, { "epoch": 2.3101567410831967, "grad_norm": 0.7618042230606079, "learning_rate": 2.7665480669203383e-06, "loss": 0.6697713136672974, "step": 1781 }, { "epoch": 2.3114544680333355, "grad_norm": 0.69931560754776, "learning_rate": 2.756681876137227e-06, "loss": 0.5977004766464233, "step": 1782 }, { "epoch": 2.3127521949834744, "grad_norm": 0.7272830605506897, "learning_rate": 2.7468304961063642e-06, "loss": 0.6867664456367493, "step": 1783 }, { "epoch": 2.3140499219336133, "grad_norm": 0.7531746029853821, "learning_rate": 2.736993946971329e-06, "loss": 0.6313377022743225, "step": 1784 }, { "epoch": 2.315347648883752, "grad_norm": 0.7396632432937622, "learning_rate": 2.727172248845378e-06, "loss": 0.6548261642456055, "step": 1785 }, { "epoch": 2.316645375833891, "grad_norm": 0.7558153867721558, "learning_rate": 2.717365421811389e-06, "loss": 0.6362917423248291, "step": 1786 }, { "epoch": 2.31794310278403, "grad_norm": 0.7348777055740356, "learning_rate": 2.7075734859218526e-06, "loss": 0.617246150970459, "step": 1787 }, { "epoch": 2.3192408297341687, "grad_norm": 0.7107247710227966, "learning_rate": 2.6977964611987885e-06, "loss": 0.6115847229957581, "step": 1788 }, { "epoch": 2.3205385566843075, "grad_norm": 0.7372192740440369, "learning_rate": 2.6880343676337485e-06, "loss": 0.653107762336731, "step": 1789 }, { "epoch": 2.3218362836344464, "grad_norm": 0.7087644338607788, "learning_rate": 2.6782872251877347e-06, "loss": 0.6624957919120789, "step": 1790 }, { "epoch": 2.3231340105845852, "grad_norm": 0.7231054902076721, "learning_rate": 2.6685550537911886e-06, "loss": 0.6585568189620972, "step": 1791 }, { "epoch": 2.3244317375347245, "grad_norm": 0.7619837522506714, "learning_rate": 2.658837873343938e-06, "loss": 0.6406753063201904, "step": 1792 }, { "epoch": 2.3257294644848634, "grad_norm": 0.7381089329719543, "learning_rate": 2.6491357037151565e-06, "loss": 0.6516512036323547, "step": 1793 }, { "epoch": 2.3270271914350023, "grad_norm": 0.7420887351036072, "learning_rate": 2.639448564743328e-06, "loss": 0.6555370688438416, "step": 1794 }, { "epoch": 2.328324918385141, "grad_norm": 0.7358477115631104, "learning_rate": 2.6297764762362e-06, "loss": 0.6229339838027954, "step": 1795 }, { "epoch": 2.32962264533528, "grad_norm": 0.7449919581413269, "learning_rate": 2.6201194579707377e-06, "loss": 0.6487348675727844, "step": 1796 }, { "epoch": 2.330920372285419, "grad_norm": 0.755095362663269, "learning_rate": 2.6104775296931118e-06, "loss": 0.709601640701294, "step": 1797 }, { "epoch": 2.3322180992355577, "grad_norm": 0.7726845145225525, "learning_rate": 2.6008507111186142e-06, "loss": 0.6235072016716003, "step": 1798 }, { "epoch": 2.3335158261856965, "grad_norm": 0.7045385241508484, "learning_rate": 2.5912390219316573e-06, "loss": 0.5908339619636536, "step": 1799 }, { "epoch": 2.3348135531358354, "grad_norm": 0.7490655779838562, "learning_rate": 2.5816424817857122e-06, "loss": 0.7369755506515503, "step": 1800 }, { "epoch": 2.3361112800859742, "grad_norm": 0.7135450839996338, "learning_rate": 2.572061110303271e-06, "loss": 0.6987670063972473, "step": 1801 }, { "epoch": 2.337409007036113, "grad_norm": 0.7187747359275818, "learning_rate": 2.562494927075824e-06, "loss": 0.5778123140335083, "step": 1802 }, { "epoch": 2.3387067339862524, "grad_norm": 0.7786324620246887, "learning_rate": 2.552943951663782e-06, "loss": 0.6605340838432312, "step": 1803 }, { "epoch": 2.3400044609363913, "grad_norm": 0.785906195640564, "learning_rate": 2.543408203596479e-06, "loss": 0.6925969123840332, "step": 1804 }, { "epoch": 2.34130218788653, "grad_norm": 0.7589930891990662, "learning_rate": 2.5338877023721055e-06, "loss": 0.6296513676643372, "step": 1805 }, { "epoch": 2.342599914836669, "grad_norm": 0.6791945695877075, "learning_rate": 2.5243824674576743e-06, "loss": 0.6128097176551819, "step": 1806 }, { "epoch": 2.343897641786808, "grad_norm": 0.737198531627655, "learning_rate": 2.514892518288988e-06, "loss": 0.60391765832901, "step": 1807 }, { "epoch": 2.3451953687369467, "grad_norm": 0.7078155279159546, "learning_rate": 2.5054178742705936e-06, "loss": 0.6364641189575195, "step": 1808 }, { "epoch": 2.3464930956870855, "grad_norm": 0.7275543808937073, "learning_rate": 2.4959585547757294e-06, "loss": 0.6722849011421204, "step": 1809 }, { "epoch": 2.3477908226372244, "grad_norm": 0.8179038166999817, "learning_rate": 2.486514579146322e-06, "loss": 0.6581687927246094, "step": 1810 }, { "epoch": 2.3490885495873632, "grad_norm": 0.766876757144928, "learning_rate": 2.4770859666929027e-06, "loss": 0.6003885865211487, "step": 1811 }, { "epoch": 2.350386276537502, "grad_norm": 0.7353731989860535, "learning_rate": 2.4676727366945995e-06, "loss": 0.6582502722740173, "step": 1812 }, { "epoch": 2.351684003487641, "grad_norm": 0.7552323341369629, "learning_rate": 2.4582749083990875e-06, "loss": 0.6586010456085205, "step": 1813 }, { "epoch": 2.3529817304377803, "grad_norm": 0.7750751376152039, "learning_rate": 2.448892501022544e-06, "loss": 0.6576810479164124, "step": 1814 }, { "epoch": 2.354279457387919, "grad_norm": 0.755615770816803, "learning_rate": 2.4395255337496202e-06, "loss": 0.6574745178222656, "step": 1815 }, { "epoch": 2.355577184338058, "grad_norm": 0.7417405843734741, "learning_rate": 2.4301740257333918e-06, "loss": 0.6290728449821472, "step": 1816 }, { "epoch": 2.356874911288197, "grad_norm": 0.7301021814346313, "learning_rate": 2.4208379960953255e-06, "loss": 0.6600069403648376, "step": 1817 }, { "epoch": 2.3581726382383357, "grad_norm": 0.7170204520225525, "learning_rate": 2.4115174639252425e-06, "loss": 0.5834653973579407, "step": 1818 }, { "epoch": 2.3594703651884745, "grad_norm": 0.7591288089752197, "learning_rate": 2.4022124482812627e-06, "loss": 0.6460838913917542, "step": 1819 }, { "epoch": 2.3607680921386134, "grad_norm": 0.7465713024139404, "learning_rate": 2.3929229681898005e-06, "loss": 0.670021116733551, "step": 1820 }, { "epoch": 2.3620658190887522, "grad_norm": 0.7204452753067017, "learning_rate": 2.3836490426454816e-06, "loss": 0.6367021799087524, "step": 1821 }, { "epoch": 2.363363546038891, "grad_norm": 0.7174842357635498, "learning_rate": 2.3743906906111415e-06, "loss": 0.6825685501098633, "step": 1822 }, { "epoch": 2.3646612729890304, "grad_norm": 0.6899293065071106, "learning_rate": 2.365147931017764e-06, "loss": 0.642341673374176, "step": 1823 }, { "epoch": 2.365958999939169, "grad_norm": 0.7295400500297546, "learning_rate": 2.355920782764455e-06, "loss": 0.6189469695091248, "step": 1824 }, { "epoch": 2.367256726889308, "grad_norm": 0.7334946393966675, "learning_rate": 2.3467092647183962e-06, "loss": 0.642494261264801, "step": 1825 }, { "epoch": 2.368554453839447, "grad_norm": 0.727120041847229, "learning_rate": 2.337513395714812e-06, "loss": 0.6564252972602844, "step": 1826 }, { "epoch": 2.369852180789586, "grad_norm": 0.7781887650489807, "learning_rate": 2.3283331945569256e-06, "loss": 0.7230110764503479, "step": 1827 }, { "epoch": 2.3711499077397247, "grad_norm": 0.7318363189697266, "learning_rate": 2.3191686800159272e-06, "loss": 0.6312495470046997, "step": 1828 }, { "epoch": 2.3724476346898635, "grad_norm": 0.7348397374153137, "learning_rate": 2.310019870830923e-06, "loss": 0.6707776784896851, "step": 1829 }, { "epoch": 2.3737453616400024, "grad_norm": 0.7550859451293945, "learning_rate": 2.300886785708919e-06, "loss": 0.6729933023452759, "step": 1830 }, { "epoch": 2.3750430885901412, "grad_norm": 0.724520206451416, "learning_rate": 2.2917694433247626e-06, "loss": 0.6436410546302795, "step": 1831 }, { "epoch": 2.37634081554028, "grad_norm": 0.7761313319206238, "learning_rate": 2.282667862321104e-06, "loss": 0.6961484551429749, "step": 1832 }, { "epoch": 2.377638542490419, "grad_norm": 0.7718027234077454, "learning_rate": 2.2735820613083837e-06, "loss": 0.731279194355011, "step": 1833 }, { "epoch": 2.3789362694405582, "grad_norm": 0.7511587738990784, "learning_rate": 2.264512058864755e-06, "loss": 0.6527747511863708, "step": 1834 }, { "epoch": 2.380233996390697, "grad_norm": 0.7314983010292053, "learning_rate": 2.2554578735360823e-06, "loss": 0.6660367846488953, "step": 1835 }, { "epoch": 2.381531723340836, "grad_norm": 0.7481415867805481, "learning_rate": 2.246419523835882e-06, "loss": 0.6034996509552002, "step": 1836 }, { "epoch": 2.382829450290975, "grad_norm": 0.7201923131942749, "learning_rate": 2.2373970282452916e-06, "loss": 0.618115246295929, "step": 1837 }, { "epoch": 2.3841271772411137, "grad_norm": 0.7333959341049194, "learning_rate": 2.2283904052130313e-06, "loss": 0.679516077041626, "step": 1838 }, { "epoch": 2.3854249041912525, "grad_norm": 0.7144783735275269, "learning_rate": 2.2193996731553656e-06, "loss": 0.6412646174430847, "step": 1839 }, { "epoch": 2.3867226311413914, "grad_norm": 0.7374799251556396, "learning_rate": 2.2104248504560643e-06, "loss": 0.6004337072372437, "step": 1840 }, { "epoch": 2.3867226311413914, "eval_loss": 0.7504242062568665, "eval_runtime": 140.6905, "eval_samples_per_second": 36.904, "eval_steps_per_second": 9.226, "step": 1840 }, { "epoch": 2.3880203580915302, "grad_norm": 0.7108725905418396, "learning_rate": 2.2014659554663732e-06, "loss": 0.6515002250671387, "step": 1841 }, { "epoch": 2.389318085041669, "grad_norm": 0.744311511516571, "learning_rate": 2.192523006504956e-06, "loss": 0.5911805033683777, "step": 1842 }, { "epoch": 2.390615811991808, "grad_norm": 0.7513126730918884, "learning_rate": 2.183596021857891e-06, "loss": 0.5855857133865356, "step": 1843 }, { "epoch": 2.391913538941947, "grad_norm": 0.7308302521705627, "learning_rate": 2.1746850197785928e-06, "loss": 0.6079833507537842, "step": 1844 }, { "epoch": 2.393211265892086, "grad_norm": 0.7567104697227478, "learning_rate": 2.16579001848781e-06, "loss": 0.6419387459754944, "step": 1845 }, { "epoch": 2.394508992842225, "grad_norm": 0.7667451500892639, "learning_rate": 2.156911036173568e-06, "loss": 0.6022201776504517, "step": 1846 }, { "epoch": 2.395806719792364, "grad_norm": 0.700553297996521, "learning_rate": 2.1480480909911384e-06, "loss": 0.6151991486549377, "step": 1847 }, { "epoch": 2.3971044467425027, "grad_norm": 0.7488269209861755, "learning_rate": 2.139201201062999e-06, "loss": 0.6688805222511292, "step": 1848 }, { "epoch": 2.3984021736926415, "grad_norm": 0.7348271608352661, "learning_rate": 2.130370384478807e-06, "loss": 0.6284016370773315, "step": 1849 }, { "epoch": 2.3996999006427804, "grad_norm": 0.7548435926437378, "learning_rate": 2.1215556592953357e-06, "loss": 0.6753513216972351, "step": 1850 }, { "epoch": 2.4009976275929192, "grad_norm": 0.7015430927276611, "learning_rate": 2.11275704353648e-06, "loss": 0.5835912823677063, "step": 1851 }, { "epoch": 2.402295354543058, "grad_norm": 0.732021689414978, "learning_rate": 2.10397455519317e-06, "loss": 0.645444929599762, "step": 1852 }, { "epoch": 2.403593081493197, "grad_norm": 0.7345272302627563, "learning_rate": 2.095208212223383e-06, "loss": 0.666027843952179, "step": 1853 }, { "epoch": 2.404890808443336, "grad_norm": 0.694179356098175, "learning_rate": 2.0864580325520623e-06, "loss": 0.6171280145645142, "step": 1854 }, { "epoch": 2.4061885353934747, "grad_norm": 0.7522391080856323, "learning_rate": 2.077724034071116e-06, "loss": 0.6551393270492554, "step": 1855 }, { "epoch": 2.407486262343614, "grad_norm": 0.731461226940155, "learning_rate": 2.069006234639357e-06, "loss": 0.5965202450752258, "step": 1856 }, { "epoch": 2.408783989293753, "grad_norm": 0.7376645803451538, "learning_rate": 2.060304652082481e-06, "loss": 0.6684772372245789, "step": 1857 }, { "epoch": 2.4100817162438917, "grad_norm": 0.8123404383659363, "learning_rate": 2.051619304193022e-06, "loss": 0.726719856262207, "step": 1858 }, { "epoch": 2.4113794431940305, "grad_norm": 0.723229169845581, "learning_rate": 2.0429502087303164e-06, "loss": 0.6310455799102783, "step": 1859 }, { "epoch": 2.4126771701441694, "grad_norm": 0.7440442442893982, "learning_rate": 2.0342973834204715e-06, "loss": 0.6147751808166504, "step": 1860 }, { "epoch": 2.4139748970943082, "grad_norm": 0.7190000414848328, "learning_rate": 2.0256608459563244e-06, "loss": 0.6343541741371155, "step": 1861 }, { "epoch": 2.415272624044447, "grad_norm": 0.7396417260169983, "learning_rate": 2.017040613997412e-06, "loss": 0.6213467121124268, "step": 1862 }, { "epoch": 2.416570350994586, "grad_norm": 0.7148772478103638, "learning_rate": 2.008436705169917e-06, "loss": 0.5708230137825012, "step": 1863 }, { "epoch": 2.417868077944725, "grad_norm": 0.7284368872642517, "learning_rate": 1.9998491370666684e-06, "loss": 0.5845701098442078, "step": 1864 }, { "epoch": 2.4191658048948637, "grad_norm": 0.7286568284034729, "learning_rate": 1.991277927247056e-06, "loss": 0.636822521686554, "step": 1865 }, { "epoch": 2.4204635318450025, "grad_norm": 0.741385817527771, "learning_rate": 1.9827230932370467e-06, "loss": 0.6635302305221558, "step": 1866 }, { "epoch": 2.421761258795142, "grad_norm": 0.7097977995872498, "learning_rate": 1.9741846525291033e-06, "loss": 0.5913397669792175, "step": 1867 }, { "epoch": 2.4230589857452807, "grad_norm": 0.748805582523346, "learning_rate": 1.9656626225821774e-06, "loss": 0.6394146680831909, "step": 1868 }, { "epoch": 2.4243567126954195, "grad_norm": 0.7540968656539917, "learning_rate": 1.957157020821664e-06, "loss": 0.6580138802528381, "step": 1869 }, { "epoch": 2.4256544396455584, "grad_norm": 0.7199598550796509, "learning_rate": 1.9486678646393654e-06, "loss": 0.6445693969726562, "step": 1870 }, { "epoch": 2.4269521665956972, "grad_norm": 0.722776472568512, "learning_rate": 1.9401951713934574e-06, "loss": 0.6294406056404114, "step": 1871 }, { "epoch": 2.428249893545836, "grad_norm": 0.776488184928894, "learning_rate": 1.931738958408457e-06, "loss": 0.6513455510139465, "step": 1872 }, { "epoch": 2.429547620495975, "grad_norm": 0.751055121421814, "learning_rate": 1.9232992429751694e-06, "loss": 0.6255248785018921, "step": 1873 }, { "epoch": 2.430845347446114, "grad_norm": 0.7133703827857971, "learning_rate": 1.9148760423506884e-06, "loss": 0.5895485281944275, "step": 1874 }, { "epoch": 2.4321430743962527, "grad_norm": 0.7120479941368103, "learning_rate": 1.9064693737583173e-06, "loss": 0.6799072027206421, "step": 1875 }, { "epoch": 2.433440801346392, "grad_norm": 0.7090493440628052, "learning_rate": 1.8980792543875758e-06, "loss": 0.6845042705535889, "step": 1876 }, { "epoch": 2.4347385282965304, "grad_norm": 0.7474452257156372, "learning_rate": 1.8897057013941256e-06, "loss": 0.6170677542686462, "step": 1877 }, { "epoch": 2.4360362552466697, "grad_norm": 0.7024904489517212, "learning_rate": 1.8813487318997658e-06, "loss": 0.6431372165679932, "step": 1878 }, { "epoch": 2.4373339821968085, "grad_norm": 0.7497063875198364, "learning_rate": 1.8730083629923857e-06, "loss": 0.6090019345283508, "step": 1879 }, { "epoch": 2.4386317091469474, "grad_norm": 0.7273635268211365, "learning_rate": 1.8646846117259277e-06, "loss": 0.6302788257598877, "step": 1880 }, { "epoch": 2.4399294360970862, "grad_norm": 0.745716392993927, "learning_rate": 1.856377495120355e-06, "loss": 0.6740216612815857, "step": 1881 }, { "epoch": 2.441227163047225, "grad_norm": 0.6912100911140442, "learning_rate": 1.8480870301616227e-06, "loss": 0.6371436715126038, "step": 1882 }, { "epoch": 2.442524889997364, "grad_norm": 0.73276287317276, "learning_rate": 1.839813233801626e-06, "loss": 0.6914728283882141, "step": 1883 }, { "epoch": 2.443822616947503, "grad_norm": 0.6954025626182556, "learning_rate": 1.8315561229581925e-06, "loss": 0.6365620493888855, "step": 1884 }, { "epoch": 2.4451203438976417, "grad_norm": 0.7226231098175049, "learning_rate": 1.8233157145150183e-06, "loss": 0.6907994151115417, "step": 1885 }, { "epoch": 2.4464180708477805, "grad_norm": 0.7429067492485046, "learning_rate": 1.8150920253216542e-06, "loss": 0.6867068409919739, "step": 1886 }, { "epoch": 2.44771579779792, "grad_norm": 0.7071108818054199, "learning_rate": 1.8068850721934639e-06, "loss": 0.6865320205688477, "step": 1887 }, { "epoch": 2.4490135247480587, "grad_norm": 0.7338579893112183, "learning_rate": 1.7986948719115872e-06, "loss": 0.6243481636047363, "step": 1888 }, { "epoch": 2.4503112516981975, "grad_norm": 0.727736234664917, "learning_rate": 1.7905214412229177e-06, "loss": 0.6568608283996582, "step": 1889 }, { "epoch": 2.4516089786483364, "grad_norm": 0.7110669612884521, "learning_rate": 1.7823647968400437e-06, "loss": 0.6400637626647949, "step": 1890 }, { "epoch": 2.4529067055984752, "grad_norm": 0.7366207242012024, "learning_rate": 1.7742249554412426e-06, "loss": 0.6992728114128113, "step": 1891 }, { "epoch": 2.454204432548614, "grad_norm": 0.7760360836982727, "learning_rate": 1.76610193367043e-06, "loss": 0.660463809967041, "step": 1892 }, { "epoch": 2.455502159498753, "grad_norm": 0.7349168658256531, "learning_rate": 1.757995748137129e-06, "loss": 0.6087374091148376, "step": 1893 }, { "epoch": 2.456799886448892, "grad_norm": 0.7244678139686584, "learning_rate": 1.7499064154164358e-06, "loss": 0.6310493350028992, "step": 1894 }, { "epoch": 2.4580976133990307, "grad_norm": 0.735069215297699, "learning_rate": 1.7418339520489936e-06, "loss": 0.6924616098403931, "step": 1895 }, { "epoch": 2.4593953403491695, "grad_norm": 0.7370489239692688, "learning_rate": 1.7337783745409363e-06, "loss": 0.6034020781517029, "step": 1896 }, { "epoch": 2.4606930672993084, "grad_norm": 0.7326070666313171, "learning_rate": 1.7257396993638942e-06, "loss": 0.6212228536605835, "step": 1897 }, { "epoch": 2.4619907942494477, "grad_norm": 0.6936232447624207, "learning_rate": 1.717717942954914e-06, "loss": 0.705615758895874, "step": 1898 }, { "epoch": 2.4632885211995865, "grad_norm": 0.7247579097747803, "learning_rate": 1.7097131217164598e-06, "loss": 0.6505810618400574, "step": 1899 }, { "epoch": 2.4645862481497254, "grad_norm": 0.7129016518592834, "learning_rate": 1.7017252520163652e-06, "loss": 0.637854814529419, "step": 1900 }, { "epoch": 2.4658839750998642, "grad_norm": 0.7215719819068909, "learning_rate": 1.6937543501878018e-06, "loss": 0.6486891508102417, "step": 1901 }, { "epoch": 2.467181702050003, "grad_norm": 0.7112030386924744, "learning_rate": 1.6858004325292466e-06, "loss": 0.6466121673583984, "step": 1902 }, { "epoch": 2.468479429000142, "grad_norm": 0.7482553124427795, "learning_rate": 1.6778635153044486e-06, "loss": 0.6906379461288452, "step": 1903 }, { "epoch": 2.469777155950281, "grad_norm": 0.7411786317825317, "learning_rate": 1.6699436147423942e-06, "loss": 0.613003134727478, "step": 1904 }, { "epoch": 2.4710748829004197, "grad_norm": 0.7285057902336121, "learning_rate": 1.662040747037277e-06, "loss": 0.7423882484436035, "step": 1905 }, { "epoch": 2.4723726098505585, "grad_norm": 0.7251142859458923, "learning_rate": 1.654154928348455e-06, "loss": 0.6890588402748108, "step": 1906 }, { "epoch": 2.4736703368006974, "grad_norm": 0.7212609052658081, "learning_rate": 1.646286174800441e-06, "loss": 0.6591873168945312, "step": 1907 }, { "epoch": 2.4749680637508362, "grad_norm": 0.7344200611114502, "learning_rate": 1.6384345024828374e-06, "loss": 0.6354522705078125, "step": 1908 }, { "epoch": 2.4762657907009755, "grad_norm": 0.7125760316848755, "learning_rate": 1.6305999274503282e-06, "loss": 0.6043302416801453, "step": 1909 }, { "epoch": 2.4775635176511144, "grad_norm": 0.7003780603408813, "learning_rate": 1.6227824657226366e-06, "loss": 0.5772091150283813, "step": 1910 }, { "epoch": 2.4788612446012532, "grad_norm": 0.7161146998405457, "learning_rate": 1.614982133284495e-06, "loss": 0.6129906177520752, "step": 1911 }, { "epoch": 2.480158971551392, "grad_norm": 0.7459210157394409, "learning_rate": 1.6071989460856063e-06, "loss": 0.6741005182266235, "step": 1912 }, { "epoch": 2.481456698501531, "grad_norm": 0.7306010723114014, "learning_rate": 1.5994329200406223e-06, "loss": 0.6048024296760559, "step": 1913 }, { "epoch": 2.48275442545167, "grad_norm": 0.7296182513237, "learning_rate": 1.5916840710290937e-06, "loss": 0.6497235298156738, "step": 1914 }, { "epoch": 2.4840521524018087, "grad_norm": 0.7177472114562988, "learning_rate": 1.5839524148954622e-06, "loss": 0.5927858352661133, "step": 1915 }, { "epoch": 2.4853498793519475, "grad_norm": 0.7376892566680908, "learning_rate": 1.5762379674490048e-06, "loss": 0.591650128364563, "step": 1916 }, { "epoch": 2.4866476063020864, "grad_norm": 0.7759072780609131, "learning_rate": 1.5685407444638146e-06, "loss": 0.686072051525116, "step": 1917 }, { "epoch": 2.487945333252225, "grad_norm": 0.7239146828651428, "learning_rate": 1.5608607616787663e-06, "loss": 0.6082277297973633, "step": 1918 }, { "epoch": 2.489243060202364, "grad_norm": 0.7537539005279541, "learning_rate": 1.553198034797474e-06, "loss": 0.7451168298721313, "step": 1919 }, { "epoch": 2.4905407871525034, "grad_norm": 0.7346340417861938, "learning_rate": 1.5455525794882841e-06, "loss": 0.611229658126831, "step": 1920 }, { "epoch": 2.4918385141026422, "grad_norm": 0.731436550617218, "learning_rate": 1.5379244113842106e-06, "loss": 0.659216582775116, "step": 1921 }, { "epoch": 2.493136241052781, "grad_norm": 0.7185493111610413, "learning_rate": 1.53031354608293e-06, "loss": 0.7043588161468506, "step": 1922 }, { "epoch": 2.49443396800292, "grad_norm": 0.7525856494903564, "learning_rate": 1.5227199991467335e-06, "loss": 0.6584152579307556, "step": 1923 }, { "epoch": 2.495731694953059, "grad_norm": 0.7316333055496216, "learning_rate": 1.5151437861025032e-06, "loss": 0.5660229921340942, "step": 1924 }, { "epoch": 2.4970294219031977, "grad_norm": 0.7230735421180725, "learning_rate": 1.5075849224416783e-06, "loss": 0.6512929201126099, "step": 1925 }, { "epoch": 2.4983271488533365, "grad_norm": 0.7257496118545532, "learning_rate": 1.5000434236202211e-06, "loss": 0.665654718875885, "step": 1926 }, { "epoch": 2.4996248758034754, "grad_norm": 0.7206733226776123, "learning_rate": 1.4925193050585873e-06, "loss": 0.656543493270874, "step": 1927 }, { "epoch": 2.500922602753614, "grad_norm": 0.7368682026863098, "learning_rate": 1.4850125821416983e-06, "loss": 0.6262930035591125, "step": 1928 }, { "epoch": 2.5022203297037535, "grad_norm": 0.7327122092247009, "learning_rate": 1.4775232702188947e-06, "loss": 0.6124476790428162, "step": 1929 }, { "epoch": 2.503518056653892, "grad_norm": 0.7396702170372009, "learning_rate": 1.4700513846039332e-06, "loss": 0.5858893990516663, "step": 1930 }, { "epoch": 2.5048157836040312, "grad_norm": 0.7264795899391174, "learning_rate": 1.4625969405749218e-06, "loss": 0.6673074960708618, "step": 1931 }, { "epoch": 2.50611351055417, "grad_norm": 0.7444024085998535, "learning_rate": 1.4551599533743155e-06, "loss": 0.6632063388824463, "step": 1932 }, { "epoch": 2.507411237504309, "grad_norm": 0.7873533964157104, "learning_rate": 1.4477404382088689e-06, "loss": 0.6932485103607178, "step": 1933 }, { "epoch": 2.508708964454448, "grad_norm": 0.7218677997589111, "learning_rate": 1.4403384102496132e-06, "loss": 0.6060501933097839, "step": 1934 }, { "epoch": 2.5100066914045867, "grad_norm": 0.7189037203788757, "learning_rate": 1.4329538846318225e-06, "loss": 0.6672825217247009, "step": 1935 }, { "epoch": 2.5113044183547255, "grad_norm": 0.7413656115531921, "learning_rate": 1.4255868764549852e-06, "loss": 0.6226930022239685, "step": 1936 }, { "epoch": 2.5126021453048644, "grad_norm": 0.7134820222854614, "learning_rate": 1.4182374007827605e-06, "loss": 0.6670020818710327, "step": 1937 }, { "epoch": 2.513899872255003, "grad_norm": 0.7409310340881348, "learning_rate": 1.410905472642975e-06, "loss": 0.6528188586235046, "step": 1938 }, { "epoch": 2.515197599205142, "grad_norm": 0.7328957319259644, "learning_rate": 1.4035911070275576e-06, "loss": 0.6440276503562927, "step": 1939 }, { "epoch": 2.5164953261552814, "grad_norm": 0.7795917391777039, "learning_rate": 1.3962943188925438e-06, "loss": 0.6895844340324402, "step": 1940 }, { "epoch": 2.51779305310542, "grad_norm": 0.7205235958099365, "learning_rate": 1.3890151231580117e-06, "loss": 0.6578382253646851, "step": 1941 }, { "epoch": 2.519090780055559, "grad_norm": 0.7230272889137268, "learning_rate": 1.3817535347080768e-06, "loss": 0.6839146614074707, "step": 1942 }, { "epoch": 2.520388507005698, "grad_norm": 0.7740436792373657, "learning_rate": 1.3745095683908482e-06, "loss": 0.6639747619628906, "step": 1943 }, { "epoch": 2.521686233955837, "grad_norm": 0.7473544478416443, "learning_rate": 1.3672832390184042e-06, "loss": 0.6539671421051025, "step": 1944 }, { "epoch": 2.5229839609059757, "grad_norm": 0.7322369813919067, "learning_rate": 1.3600745613667598e-06, "loss": 0.6508328318595886, "step": 1945 }, { "epoch": 2.5242816878561145, "grad_norm": 0.7107250094413757, "learning_rate": 1.3528835501758365e-06, "loss": 0.6462997198104858, "step": 1946 }, { "epoch": 2.5255794148062534, "grad_norm": 0.7492804527282715, "learning_rate": 1.345710220149431e-06, "loss": 0.6402596235275269, "step": 1947 }, { "epoch": 2.526877141756392, "grad_norm": 0.7333636283874512, "learning_rate": 1.3385545859551886e-06, "loss": 0.6897069811820984, "step": 1948 }, { "epoch": 2.528174868706531, "grad_norm": 0.7276363372802734, "learning_rate": 1.3314166622245717e-06, "loss": 0.6612985134124756, "step": 1949 }, { "epoch": 2.52947259565667, "grad_norm": 0.7273007035255432, "learning_rate": 1.324296463552821e-06, "loss": 0.6120861172676086, "step": 1950 }, { "epoch": 2.5307703226068092, "grad_norm": 0.7370741963386536, "learning_rate": 1.3171940044989495e-06, "loss": 0.7364912033081055, "step": 1951 }, { "epoch": 2.5320680495569476, "grad_norm": 0.7171733379364014, "learning_rate": 1.3101092995856802e-06, "loss": 0.6327986121177673, "step": 1952 }, { "epoch": 2.533365776507087, "grad_norm": 0.7327584028244019, "learning_rate": 1.3030423632994493e-06, "loss": 0.6383181810379028, "step": 1953 }, { "epoch": 2.534663503457226, "grad_norm": 0.7271527051925659, "learning_rate": 1.2959932100903472e-06, "loss": 0.6336721777915955, "step": 1954 }, { "epoch": 2.5359612304073647, "grad_norm": 0.7524319887161255, "learning_rate": 1.2889618543721094e-06, "loss": 0.662846028804779, "step": 1955 }, { "epoch": 2.5372589573575035, "grad_norm": 0.7470775842666626, "learning_rate": 1.2819483105220798e-06, "loss": 0.6556363105773926, "step": 1956 }, { "epoch": 2.5385566843076424, "grad_norm": 0.7219761610031128, "learning_rate": 1.274952592881179e-06, "loss": 0.6259469389915466, "step": 1957 }, { "epoch": 2.539854411257781, "grad_norm": 0.7156399488449097, "learning_rate": 1.2679747157538801e-06, "loss": 0.6495680212974548, "step": 1958 }, { "epoch": 2.54115213820792, "grad_norm": 0.7380321621894836, "learning_rate": 1.2610146934081768e-06, "loss": 0.6329517960548401, "step": 1959 }, { "epoch": 2.542449865158059, "grad_norm": 0.7332315444946289, "learning_rate": 1.2540725400755472e-06, "loss": 0.7250087261199951, "step": 1960 }, { "epoch": 2.543747592108198, "grad_norm": 0.6943919658660889, "learning_rate": 1.2471482699509463e-06, "loss": 0.6895512938499451, "step": 1961 }, { "epoch": 2.545045319058337, "grad_norm": 0.7061095237731934, "learning_rate": 1.2402418971927487e-06, "loss": 0.6665888428688049, "step": 1962 }, { "epoch": 2.546343046008476, "grad_norm": 0.7387134432792664, "learning_rate": 1.2333534359227383e-06, "loss": 0.6526239514350891, "step": 1963 }, { "epoch": 2.547640772958615, "grad_norm": 0.7360694408416748, "learning_rate": 1.226482900226077e-06, "loss": 0.6126471161842346, "step": 1964 }, { "epoch": 2.5489384999087537, "grad_norm": 0.7157735824584961, "learning_rate": 1.2196303041512714e-06, "loss": 0.6631340384483337, "step": 1965 }, { "epoch": 2.5502362268588925, "grad_norm": 0.7504985332489014, "learning_rate": 1.2127956617101445e-06, "loss": 0.6746035218238831, "step": 1966 }, { "epoch": 2.5515339538090314, "grad_norm": 0.7058922648429871, "learning_rate": 1.2059789868778116e-06, "loss": 0.641784131526947, "step": 1967 }, { "epoch": 2.55283168075917, "grad_norm": 0.7049847841262817, "learning_rate": 1.1991802935926455e-06, "loss": 0.5715856552124023, "step": 1968 }, { "epoch": 2.554129407709309, "grad_norm": 0.7680399417877197, "learning_rate": 1.1923995957562585e-06, "loss": 0.6144214272499084, "step": 1969 }, { "epoch": 2.555427134659448, "grad_norm": 0.7535842657089233, "learning_rate": 1.1856369072334517e-06, "loss": 0.6755169630050659, "step": 1970 }, { "epoch": 2.5567248616095872, "grad_norm": 0.7342673540115356, "learning_rate": 1.178892241852222e-06, "loss": 0.6000391244888306, "step": 1971 }, { "epoch": 2.5580225885597256, "grad_norm": 0.7472249865531921, "learning_rate": 1.1721656134036962e-06, "loss": 0.6413825750350952, "step": 1972 }, { "epoch": 2.559320315509865, "grad_norm": 0.7509233355522156, "learning_rate": 1.165457035642128e-06, "loss": 0.662197470664978, "step": 1973 }, { "epoch": 2.560618042460004, "grad_norm": 0.7827663421630859, "learning_rate": 1.1587665222848643e-06, "loss": 0.6412524580955505, "step": 1974 }, { "epoch": 2.5619157694101427, "grad_norm": 0.7427447438240051, "learning_rate": 1.1520940870123065e-06, "loss": 0.6249580979347229, "step": 1975 }, { "epoch": 2.5632134963602815, "grad_norm": 0.7329998016357422, "learning_rate": 1.1454397434679022e-06, "loss": 0.67298424243927, "step": 1976 }, { "epoch": 2.5645112233104204, "grad_norm": 0.7379522919654846, "learning_rate": 1.1388035052580936e-06, "loss": 0.6553415060043335, "step": 1977 }, { "epoch": 2.565808950260559, "grad_norm": 0.7228721380233765, "learning_rate": 1.1321853859523113e-06, "loss": 0.6369103193283081, "step": 1978 }, { "epoch": 2.567106677210698, "grad_norm": 0.7016708850860596, "learning_rate": 1.1255853990829323e-06, "loss": 0.5797883868217468, "step": 1979 }, { "epoch": 2.568404404160837, "grad_norm": 0.7308626174926758, "learning_rate": 1.119003558145262e-06, "loss": 0.6397665143013, "step": 1980 }, { "epoch": 2.569702131110976, "grad_norm": 0.7535097599029541, "learning_rate": 1.1124398765974976e-06, "loss": 0.6552141308784485, "step": 1981 }, { "epoch": 2.570999858061115, "grad_norm": 0.7034752368927002, "learning_rate": 1.1058943678607082e-06, "loss": 0.5966861844062805, "step": 1982 }, { "epoch": 2.5722975850112535, "grad_norm": 0.7308294177055359, "learning_rate": 1.0993670453187965e-06, "loss": 0.678621768951416, "step": 1983 }, { "epoch": 2.573595311961393, "grad_norm": 0.7100163698196411, "learning_rate": 1.0928579223184943e-06, "loss": 0.629210889339447, "step": 1984 }, { "epoch": 2.5748930389115317, "grad_norm": 0.715771496295929, "learning_rate": 1.0863670121693037e-06, "loss": 0.6395845413208008, "step": 1985 }, { "epoch": 2.5761907658616705, "grad_norm": 0.7279219627380371, "learning_rate": 1.0798943281434958e-06, "loss": 0.6864475607872009, "step": 1986 }, { "epoch": 2.5774884928118094, "grad_norm": 0.7253682613372803, "learning_rate": 1.0734398834760695e-06, "loss": 0.613013505935669, "step": 1987 }, { "epoch": 2.578786219761948, "grad_norm": 0.7802004814147949, "learning_rate": 1.067003691364733e-06, "loss": 0.686352014541626, "step": 1988 }, { "epoch": 2.580083946712087, "grad_norm": 0.7534424066543579, "learning_rate": 1.060585764969867e-06, "loss": 0.7019538283348083, "step": 1989 }, { "epoch": 2.581381673662226, "grad_norm": 0.7177249789237976, "learning_rate": 1.0541861174145097e-06, "loss": 0.6038709282875061, "step": 1990 }, { "epoch": 2.582679400612365, "grad_norm": 0.7184469103813171, "learning_rate": 1.047804761784319e-06, "loss": 0.6142391562461853, "step": 1991 }, { "epoch": 2.5839771275625036, "grad_norm": 0.7472144961357117, "learning_rate": 1.0414417111275533e-06, "loss": 0.6911140084266663, "step": 1992 }, { "epoch": 2.585274854512643, "grad_norm": 0.7293811440467834, "learning_rate": 1.0350969784550368e-06, "loss": 0.6472504138946533, "step": 1993 }, { "epoch": 2.5865725814627814, "grad_norm": 0.7172240018844604, "learning_rate": 1.028770576740148e-06, "loss": 0.674932599067688, "step": 1994 }, { "epoch": 2.5878703084129207, "grad_norm": 0.70241379737854, "learning_rate": 1.022462518918772e-06, "loss": 0.5798804759979248, "step": 1995 }, { "epoch": 2.5891680353630595, "grad_norm": 0.7364243865013123, "learning_rate": 1.0161728178892928e-06, "loss": 0.5872079133987427, "step": 1996 }, { "epoch": 2.5904657623131984, "grad_norm": 0.7111935615539551, "learning_rate": 1.0099014865125557e-06, "loss": 0.609887421131134, "step": 1997 }, { "epoch": 2.591763489263337, "grad_norm": 0.7527702450752258, "learning_rate": 1.0036485376118477e-06, "loss": 0.7164459824562073, "step": 1998 }, { "epoch": 2.593061216213476, "grad_norm": 0.7354010939598083, "learning_rate": 9.974139839728658e-07, "loss": 0.7024336457252502, "step": 1999 }, { "epoch": 2.594358943163615, "grad_norm": 0.7463487982749939, "learning_rate": 9.91197838343696e-07, "loss": 0.6939477324485779, "step": 2000 }, { "epoch": 2.595656670113754, "grad_norm": 0.736788809299469, "learning_rate": 9.850001134347765e-07, "loss": 0.6644649505615234, "step": 2001 }, { "epoch": 2.5969543970638926, "grad_norm": 0.7293047904968262, "learning_rate": 9.788208219188932e-07, "loss": 0.6119586825370789, "step": 2002 }, { "epoch": 2.5982521240140315, "grad_norm": 0.7182607054710388, "learning_rate": 9.726599764311318e-07, "loss": 0.611649215221405, "step": 2003 }, { "epoch": 2.599549850964171, "grad_norm": 0.7259273529052734, "learning_rate": 9.665175895688594e-07, "loss": 0.6101284623146057, "step": 2004 }, { "epoch": 2.600847577914309, "grad_norm": 0.701677680015564, "learning_rate": 9.603936738917063e-07, "loss": 0.6807554364204407, "step": 2005 }, { "epoch": 2.6021453048644485, "grad_norm": 0.7464570999145508, "learning_rate": 9.54288241921525e-07, "loss": 0.6781387329101562, "step": 2006 }, { "epoch": 2.6034430318145874, "grad_norm": 0.7273631691932678, "learning_rate": 9.482013061423833e-07, "loss": 0.6723061203956604, "step": 2007 }, { "epoch": 2.604740758764726, "grad_norm": 0.7473943829536438, "learning_rate": 9.421328790005213e-07, "loss": 0.6500118970870972, "step": 2008 }, { "epoch": 2.606038485714865, "grad_norm": 0.7298744320869446, "learning_rate": 9.360829729043375e-07, "loss": 0.647000789642334, "step": 2009 }, { "epoch": 2.607336212665004, "grad_norm": 0.7570067644119263, "learning_rate": 9.300516002243587e-07, "loss": 0.658997118473053, "step": 2010 }, { "epoch": 2.608633939615143, "grad_norm": 0.7472216486930847, "learning_rate": 9.240387732932155e-07, "loss": 0.6748676300048828, "step": 2011 }, { "epoch": 2.6099316665652816, "grad_norm": 0.7370826005935669, "learning_rate": 9.180445044056164e-07, "loss": 0.6571428179740906, "step": 2012 }, { "epoch": 2.6112293935154205, "grad_norm": 0.7431361675262451, "learning_rate": 9.120688058183269e-07, "loss": 0.6858744025230408, "step": 2013 }, { "epoch": 2.6125271204655593, "grad_norm": 0.7619893550872803, "learning_rate": 9.061116897501321e-07, "loss": 0.6860224008560181, "step": 2014 }, { "epoch": 2.6138248474156986, "grad_norm": 0.6949592232704163, "learning_rate": 9.001731683818338e-07, "loss": 0.6436545848846436, "step": 2015 }, { "epoch": 2.6151225743658375, "grad_norm": 0.7831428647041321, "learning_rate": 8.942532538561988e-07, "loss": 0.7231192588806152, "step": 2016 }, { "epoch": 2.6164203013159764, "grad_norm": 0.7632724046707153, "learning_rate": 8.883519582779598e-07, "loss": 0.7117716073989868, "step": 2017 }, { "epoch": 2.617718028266115, "grad_norm": 0.7610095739364624, "learning_rate": 8.82469293713768e-07, "loss": 0.6059130430221558, "step": 2018 }, { "epoch": 2.619015755216254, "grad_norm": 0.7569096684455872, "learning_rate": 8.766052721921858e-07, "loss": 0.6521672010421753, "step": 2019 }, { "epoch": 2.620313482166393, "grad_norm": 0.7089208960533142, "learning_rate": 8.70759905703652e-07, "loss": 0.6266563534736633, "step": 2020 }, { "epoch": 2.621611209116532, "grad_norm": 0.7617636919021606, "learning_rate": 8.649332062004622e-07, "loss": 0.6242752075195312, "step": 2021 }, { "epoch": 2.6229089360666706, "grad_norm": 0.7356528043746948, "learning_rate": 8.59125185596742e-07, "loss": 0.6804662942886353, "step": 2022 }, { "epoch": 2.6242066630168095, "grad_norm": 0.730805516242981, "learning_rate": 8.533358557684246e-07, "loss": 0.6591053605079651, "step": 2023 }, { "epoch": 2.625504389966949, "grad_norm": 0.740450382232666, "learning_rate": 8.475652285532199e-07, "loss": 0.6597458720207214, "step": 2024 }, { "epoch": 2.626802116917087, "grad_norm": 0.7419881224632263, "learning_rate": 8.41813315750607e-07, "loss": 0.6208306550979614, "step": 2025 }, { "epoch": 2.6280998438672265, "grad_norm": 0.7380879521369934, "learning_rate": 8.360801291217835e-07, "loss": 0.6311178803443909, "step": 2026 }, { "epoch": 2.6293975708173654, "grad_norm": 0.6968350410461426, "learning_rate": 8.303656803896731e-07, "loss": 0.6126903891563416, "step": 2027 }, { "epoch": 2.630695297767504, "grad_norm": 0.6993783712387085, "learning_rate": 8.246699812388714e-07, "loss": 0.6219539642333984, "step": 2028 }, { "epoch": 2.631993024717643, "grad_norm": 0.7296315431594849, "learning_rate": 8.189930433156424e-07, "loss": 0.6454072594642639, "step": 2029 }, { "epoch": 2.633290751667782, "grad_norm": 0.7435656785964966, "learning_rate": 8.133348782278916e-07, "loss": 0.640640139579773, "step": 2030 }, { "epoch": 2.634588478617921, "grad_norm": 0.7254202961921692, "learning_rate": 8.07695497545129e-07, "loss": 0.574336588382721, "step": 2031 }, { "epoch": 2.6358862055680596, "grad_norm": 0.7589125037193298, "learning_rate": 8.020749127984629e-07, "loss": 0.6744675636291504, "step": 2032 }, { "epoch": 2.6371839325181985, "grad_norm": 0.7237491011619568, "learning_rate": 7.964731354805677e-07, "loss": 0.6050382852554321, "step": 2033 }, { "epoch": 2.6384816594683373, "grad_norm": 0.736615777015686, "learning_rate": 7.908901770456579e-07, "loss": 0.6752466559410095, "step": 2034 }, { "epoch": 2.6397793864184766, "grad_norm": 0.7375562787055969, "learning_rate": 7.853260489094727e-07, "loss": 0.6168178915977478, "step": 2035 }, { "epoch": 2.641077113368615, "grad_norm": 0.7463002800941467, "learning_rate": 7.79780762449246e-07, "loss": 0.6608278751373291, "step": 2036 }, { "epoch": 2.6423748403187544, "grad_norm": 0.7306200861930847, "learning_rate": 7.742543290036797e-07, "loss": 0.6231617331504822, "step": 2037 }, { "epoch": 2.643672567268893, "grad_norm": 0.7191357612609863, "learning_rate": 7.687467598729403e-07, "loss": 0.6745753884315491, "step": 2038 }, { "epoch": 2.644970294219032, "grad_norm": 0.6983992457389832, "learning_rate": 7.63258066318604e-07, "loss": 0.6209067702293396, "step": 2039 }, { "epoch": 2.646268021169171, "grad_norm": 0.7191793322563171, "learning_rate": 7.577882595636665e-07, "loss": 0.6866878867149353, "step": 2040 }, { "epoch": 2.64756574811931, "grad_norm": 0.7254435420036316, "learning_rate": 7.523373507924947e-07, "loss": 0.6178576946258545, "step": 2041 }, { "epoch": 2.6488634750694486, "grad_norm": 0.7166338562965393, "learning_rate": 7.469053511508184e-07, "loss": 0.6005609035491943, "step": 2042 }, { "epoch": 2.6501612020195875, "grad_norm": 0.7637789249420166, "learning_rate": 7.414922717457018e-07, "loss": 0.718099057674408, "step": 2043 }, { "epoch": 2.6514589289697263, "grad_norm": 0.7439664006233215, "learning_rate": 7.360981236455222e-07, "loss": 0.6896740198135376, "step": 2044 }, { "epoch": 2.652756655919865, "grad_norm": 0.7089899182319641, "learning_rate": 7.307229178799469e-07, "loss": 0.6416285634040833, "step": 2045 }, { "epoch": 2.6540543828700045, "grad_norm": 0.7403551340103149, "learning_rate": 7.253666654399128e-07, "loss": 0.6686422824859619, "step": 2046 }, { "epoch": 2.655352109820143, "grad_norm": 0.7438167333602905, "learning_rate": 7.200293772775968e-07, "loss": 0.6786326766014099, "step": 2047 }, { "epoch": 2.656649836770282, "grad_norm": 0.7066054344177246, "learning_rate": 7.14711064306407e-07, "loss": 0.6346741318702698, "step": 2048 }, { "epoch": 2.657947563720421, "grad_norm": 0.7646064758300781, "learning_rate": 7.094117374009446e-07, "loss": 0.67086261510849, "step": 2049 }, { "epoch": 2.65924529067056, "grad_norm": 0.7251279950141907, "learning_rate": 7.041314073969918e-07, "loss": 0.6325028538703918, "step": 2050 }, { "epoch": 2.660543017620699, "grad_norm": 0.7678724527359009, "learning_rate": 6.988700850914876e-07, "loss": 0.6267367005348206, "step": 2051 }, { "epoch": 2.6618407445708376, "grad_norm": 0.7265689969062805, "learning_rate": 6.93627781242504e-07, "loss": 0.6617064476013184, "step": 2052 }, { "epoch": 2.6631384715209765, "grad_norm": 0.7217026352882385, "learning_rate": 6.884045065692257e-07, "loss": 0.6587082743644714, "step": 2053 }, { "epoch": 2.6644361984711153, "grad_norm": 0.7629426121711731, "learning_rate": 6.83200271751927e-07, "loss": 0.692336916923523, "step": 2054 }, { "epoch": 2.665733925421254, "grad_norm": 0.7733954191207886, "learning_rate": 6.780150874319524e-07, "loss": 0.6802124381065369, "step": 2055 }, { "epoch": 2.667031652371393, "grad_norm": 0.7317995429039001, "learning_rate": 6.72848964211692e-07, "loss": 0.6866804957389832, "step": 2056 }, { "epoch": 2.6683293793215324, "grad_norm": 0.7314664721488953, "learning_rate": 6.677019126545548e-07, "loss": 0.6293746829032898, "step": 2057 }, { "epoch": 2.6696271062716708, "grad_norm": 0.7272669076919556, "learning_rate": 6.625739432849643e-07, "loss": 0.673871636390686, "step": 2058 }, { "epoch": 2.67092483322181, "grad_norm": 0.7291983962059021, "learning_rate": 6.574650665883197e-07, "loss": 0.6971457004547119, "step": 2059 }, { "epoch": 2.672222560171949, "grad_norm": 0.746300458908081, "learning_rate": 6.523752930109761e-07, "loss": 0.6644643545150757, "step": 2060 }, { "epoch": 2.673520287122088, "grad_norm": 0.7214688062667847, "learning_rate": 6.473046329602384e-07, "loss": 0.579256534576416, "step": 2061 }, { "epoch": 2.6748180140722266, "grad_norm": 0.7157896757125854, "learning_rate": 6.422530968043173e-07, "loss": 0.6934089660644531, "step": 2062 }, { "epoch": 2.6761157410223655, "grad_norm": 0.7446689605712891, "learning_rate": 6.372206948723292e-07, "loss": 0.6685813665390015, "step": 2063 }, { "epoch": 2.6774134679725043, "grad_norm": 0.7324274182319641, "learning_rate": 6.322074374542608e-07, "loss": 0.6548044085502625, "step": 2064 }, { "epoch": 2.678711194922643, "grad_norm": 0.7366431951522827, "learning_rate": 6.272133348009546e-07, "loss": 0.6561753153800964, "step": 2065 }, { "epoch": 2.680008921872782, "grad_norm": 0.6906739473342896, "learning_rate": 6.222383971240875e-07, "loss": 0.6162272095680237, "step": 2066 }, { "epoch": 2.681306648822921, "grad_norm": 0.7250291109085083, "learning_rate": 6.17282634596148e-07, "loss": 0.6417672038078308, "step": 2067 }, { "epoch": 2.68260437577306, "grad_norm": 0.7425340414047241, "learning_rate": 6.123460573504147e-07, "loss": 0.6258097887039185, "step": 2068 }, { "epoch": 2.6839021027231986, "grad_norm": 0.7179927825927734, "learning_rate": 6.074286754809411e-07, "loss": 0.6689911484718323, "step": 2069 }, { "epoch": 2.685199829673338, "grad_norm": 0.7198472619056702, "learning_rate": 6.025304990425241e-07, "loss": 0.6711916923522949, "step": 2070 }, { "epoch": 2.685199829673338, "eval_loss": 0.7492260932922363, "eval_runtime": 145.3339, "eval_samples_per_second": 35.725, "eval_steps_per_second": 8.931, "step": 2070 }, { "epoch": 2.686497556623477, "grad_norm": 0.7170226573944092, "learning_rate": 5.976515380507008e-07, "loss": 0.6783643960952759, "step": 2071 }, { "epoch": 2.6877952835736156, "grad_norm": 0.7576429843902588, "learning_rate": 5.927918024817059e-07, "loss": 0.7274392247200012, "step": 2072 }, { "epoch": 2.6890930105237545, "grad_norm": 0.7014567255973816, "learning_rate": 5.879513022724714e-07, "loss": 0.6101505160331726, "step": 2073 }, { "epoch": 2.6903907374738933, "grad_norm": 0.7218198180198669, "learning_rate": 5.831300473205948e-07, "loss": 0.6697475910186768, "step": 2074 }, { "epoch": 2.691688464424032, "grad_norm": 0.7351176738739014, "learning_rate": 5.783280474843222e-07, "loss": 0.6683188080787659, "step": 2075 }, { "epoch": 2.692986191374171, "grad_norm": 0.7387964129447937, "learning_rate": 5.735453125825275e-07, "loss": 0.6495317220687866, "step": 2076 }, { "epoch": 2.69428391832431, "grad_norm": 0.7699364423751831, "learning_rate": 5.687818523946931e-07, "loss": 0.6670310497283936, "step": 2077 }, { "epoch": 2.6955816452744488, "grad_norm": 0.7399834394454956, "learning_rate": 5.640376766608902e-07, "loss": 0.6311538219451904, "step": 2078 }, { "epoch": 2.696879372224588, "grad_norm": 0.7210641503334045, "learning_rate": 5.593127950817579e-07, "loss": 0.6419323682785034, "step": 2079 }, { "epoch": 2.698177099174727, "grad_norm": 0.7432581186294556, "learning_rate": 5.546072173184791e-07, "loss": 0.6984769701957703, "step": 2080 }, { "epoch": 2.699474826124866, "grad_norm": 0.7039175629615784, "learning_rate": 5.499209529927751e-07, "loss": 0.6130697727203369, "step": 2081 }, { "epoch": 2.7007725530750046, "grad_norm": 0.7450562715530396, "learning_rate": 5.452540116868654e-07, "loss": 0.709285318851471, "step": 2082 }, { "epoch": 2.7020702800251435, "grad_norm": 0.7391056418418884, "learning_rate": 5.406064029434666e-07, "loss": 0.7196047306060791, "step": 2083 }, { "epoch": 2.7033680069752823, "grad_norm": 0.7550768852233887, "learning_rate": 5.359781362657623e-07, "loss": 0.6528761982917786, "step": 2084 }, { "epoch": 2.704665733925421, "grad_norm": 0.7071364521980286, "learning_rate": 5.313692211173838e-07, "loss": 0.664832353591919, "step": 2085 }, { "epoch": 2.70596346087556, "grad_norm": 0.7408220171928406, "learning_rate": 5.26779666922399e-07, "loss": 0.6972253322601318, "step": 2086 }, { "epoch": 2.707261187825699, "grad_norm": 0.706516683101654, "learning_rate": 5.222094830652835e-07, "loss": 0.6413928866386414, "step": 2087 }, { "epoch": 2.708558914775838, "grad_norm": 0.6609142422676086, "learning_rate": 5.176586788909066e-07, "loss": 0.61426842212677, "step": 2088 }, { "epoch": 2.7098566417259766, "grad_norm": 0.7437728047370911, "learning_rate": 5.131272637045104e-07, "loss": 0.7072603106498718, "step": 2089 }, { "epoch": 2.711154368676116, "grad_norm": 0.7043668627738953, "learning_rate": 5.086152467716932e-07, "loss": 0.6285822987556458, "step": 2090 }, { "epoch": 2.7124520956262548, "grad_norm": 0.740922212600708, "learning_rate": 5.041226373183861e-07, "loss": 0.6565816402435303, "step": 2091 }, { "epoch": 2.7137498225763936, "grad_norm": 0.716456949710846, "learning_rate": 4.996494445308409e-07, "loss": 0.6037598848342896, "step": 2092 }, { "epoch": 2.7150475495265325, "grad_norm": 0.7253233194351196, "learning_rate": 4.951956775556e-07, "loss": 0.6392321586608887, "step": 2093 }, { "epoch": 2.7163452764766713, "grad_norm": 0.7206777334213257, "learning_rate": 4.907613454994964e-07, "loss": 0.6381296515464783, "step": 2094 }, { "epoch": 2.71764300342681, "grad_norm": 0.7042269110679626, "learning_rate": 4.863464574296106e-07, "loss": 0.6764304041862488, "step": 2095 }, { "epoch": 2.718940730376949, "grad_norm": 0.7474066019058228, "learning_rate": 4.819510223732738e-07, "loss": 0.710769534111023, "step": 2096 }, { "epoch": 2.720238457327088, "grad_norm": 0.7537234425544739, "learning_rate": 4.775750493180386e-07, "loss": 0.6200648546218872, "step": 2097 }, { "epoch": 2.7215361842772268, "grad_norm": 0.7299405336380005, "learning_rate": 4.7321854721166127e-07, "loss": 0.6677811741828918, "step": 2098 }, { "epoch": 2.722833911227366, "grad_norm": 0.6883127093315125, "learning_rate": 4.6888152496208593e-07, "loss": 0.5572382211685181, "step": 2099 }, { "epoch": 2.7241316381775045, "grad_norm": 0.730640709400177, "learning_rate": 4.645639914374278e-07, "loss": 0.6930029392242432, "step": 2100 }, { "epoch": 2.7254293651276438, "grad_norm": 0.7166103720664978, "learning_rate": 4.602659554659461e-07, "loss": 0.5943949818611145, "step": 2101 }, { "epoch": 2.7267270920777826, "grad_norm": 0.7555888295173645, "learning_rate": 4.559874258360408e-07, "loss": 0.6563291549682617, "step": 2102 }, { "epoch": 2.7280248190279215, "grad_norm": 0.7199954390525818, "learning_rate": 4.5172841129621726e-07, "loss": 0.6438056826591492, "step": 2103 }, { "epoch": 2.7293225459780603, "grad_norm": 0.7394102811813354, "learning_rate": 4.474889205550881e-07, "loss": 0.6618061065673828, "step": 2104 }, { "epoch": 2.730620272928199, "grad_norm": 0.7350549697875977, "learning_rate": 4.4326896228133354e-07, "loss": 0.6392850875854492, "step": 2105 }, { "epoch": 2.731917999878338, "grad_norm": 0.7010295391082764, "learning_rate": 4.3906854510370245e-07, "loss": 0.6507184505462646, "step": 2106 }, { "epoch": 2.733215726828477, "grad_norm": 0.7381558418273926, "learning_rate": 4.348876776109856e-07, "loss": 0.6545774936676025, "step": 2107 }, { "epoch": 2.7345134537786158, "grad_norm": 0.7013775110244751, "learning_rate": 4.307263683519969e-07, "loss": 0.6212908625602722, "step": 2108 }, { "epoch": 2.7358111807287546, "grad_norm": 0.7366412878036499, "learning_rate": 4.2658462583556216e-07, "loss": 0.684171736240387, "step": 2109 }, { "epoch": 2.737108907678894, "grad_norm": 0.7112710475921631, "learning_rate": 4.2246245853049706e-07, "loss": 0.6173405051231384, "step": 2110 }, { "epoch": 2.7384066346290323, "grad_norm": 0.7728049159049988, "learning_rate": 4.1835987486558595e-07, "loss": 0.6173956990242004, "step": 2111 }, { "epoch": 2.7397043615791716, "grad_norm": 0.6931276321411133, "learning_rate": 4.142768832295807e-07, "loss": 0.6579814553260803, "step": 2112 }, { "epoch": 2.7410020885293105, "grad_norm": 0.7127827405929565, "learning_rate": 4.102134919711609e-07, "loss": 0.6169605255126953, "step": 2113 }, { "epoch": 2.7422998154794493, "grad_norm": 0.7167375683784485, "learning_rate": 4.061697093989347e-07, "loss": 0.6766916513442993, "step": 2114 }, { "epoch": 2.743597542429588, "grad_norm": 0.7316383719444275, "learning_rate": 4.021455437814148e-07, "loss": 0.6033115983009338, "step": 2115 }, { "epoch": 2.744895269379727, "grad_norm": 0.7062050104141235, "learning_rate": 3.981410033469979e-07, "loss": 0.6221883296966553, "step": 2116 }, { "epoch": 2.746192996329866, "grad_norm": 0.7120285630226135, "learning_rate": 3.941560962839619e-07, "loss": 0.6118264198303223, "step": 2117 }, { "epoch": 2.7474907232800048, "grad_norm": 0.7053149938583374, "learning_rate": 3.9019083074042784e-07, "loss": 0.5848374962806702, "step": 2118 }, { "epoch": 2.7487884502301436, "grad_norm": 0.7223408818244934, "learning_rate": 3.862452148243623e-07, "loss": 0.6187662482261658, "step": 2119 }, { "epoch": 2.7500861771802825, "grad_norm": 0.7368988394737244, "learning_rate": 3.823192566035494e-07, "loss": 0.647794783115387, "step": 2120 }, { "epoch": 2.7513839041304218, "grad_norm": 0.7369173765182495, "learning_rate": 3.7841296410558225e-07, "loss": 0.6177867650985718, "step": 2121 }, { "epoch": 2.75268163108056, "grad_norm": 0.7405387759208679, "learning_rate": 3.7452634531783935e-07, "loss": 0.6547641754150391, "step": 2122 }, { "epoch": 2.7539793580306995, "grad_norm": 0.7224996089935303, "learning_rate": 3.706594081874737e-07, "loss": 0.6353644132614136, "step": 2123 }, { "epoch": 2.7552770849808383, "grad_norm": 0.7474029660224915, "learning_rate": 3.6681216062138923e-07, "loss": 0.682817816734314, "step": 2124 }, { "epoch": 2.756574811930977, "grad_norm": 0.7351192235946655, "learning_rate": 3.6298461048623887e-07, "loss": 0.6670258641242981, "step": 2125 }, { "epoch": 2.757872538881116, "grad_norm": 0.6816844344139099, "learning_rate": 3.5917676560838775e-07, "loss": 0.609431803226471, "step": 2126 }, { "epoch": 2.759170265831255, "grad_norm": 0.7361696362495422, "learning_rate": 3.5538863377392095e-07, "loss": 0.6345561742782593, "step": 2127 }, { "epoch": 2.7604679927813938, "grad_norm": 0.750041663646698, "learning_rate": 3.5162022272860475e-07, "loss": 0.6858513951301575, "step": 2128 }, { "epoch": 2.7617657197315326, "grad_norm": 0.7399468421936035, "learning_rate": 3.478715401778876e-07, "loss": 0.6643052697181702, "step": 2129 }, { "epoch": 2.7630634466816715, "grad_norm": 0.764750063419342, "learning_rate": 3.44142593786877e-07, "loss": 0.7398065328598022, "step": 2130 }, { "epoch": 2.7643611736318103, "grad_norm": 0.7458817958831787, "learning_rate": 3.404333911803237e-07, "loss": 0.6310020685195923, "step": 2131 }, { "epoch": 2.7656589005819496, "grad_norm": 0.7141246199607849, "learning_rate": 3.367439399426087e-07, "loss": 0.6750156879425049, "step": 2132 }, { "epoch": 2.7669566275320885, "grad_norm": 0.7121133804321289, "learning_rate": 3.330742476177273e-07, "loss": 0.6371780037879944, "step": 2133 }, { "epoch": 2.7682543544822273, "grad_norm": 0.7298391461372375, "learning_rate": 3.2942432170926743e-07, "loss": 0.5725361108779907, "step": 2134 }, { "epoch": 2.769552081432366, "grad_norm": 0.742504358291626, "learning_rate": 3.257941696804079e-07, "loss": 0.6555971503257751, "step": 2135 }, { "epoch": 2.770849808382505, "grad_norm": 0.7092410922050476, "learning_rate": 3.2218379895388896e-07, "loss": 0.5985562205314636, "step": 2136 }, { "epoch": 2.772147535332644, "grad_norm": 0.7868666648864746, "learning_rate": 3.185932169120043e-07, "loss": 0.6679819226264954, "step": 2137 }, { "epoch": 2.7734452622827828, "grad_norm": 0.7421088814735413, "learning_rate": 3.150224308965866e-07, "loss": 0.6530116200447083, "step": 2138 }, { "epoch": 2.7747429892329216, "grad_norm": 0.8364231586456299, "learning_rate": 3.114714482089898e-07, "loss": 0.7263075709342957, "step": 2139 }, { "epoch": 2.7760407161830605, "grad_norm": 0.7070637345314026, "learning_rate": 3.079402761100736e-07, "loss": 0.5931848883628845, "step": 2140 }, { "epoch": 2.7773384431331998, "grad_norm": 0.715865433216095, "learning_rate": 3.0442892182019236e-07, "loss": 0.5411802530288696, "step": 2141 }, { "epoch": 2.778636170083338, "grad_norm": 0.7688911557197571, "learning_rate": 3.00937392519175e-07, "loss": 0.6958683133125305, "step": 2142 }, { "epoch": 2.7799338970334775, "grad_norm": 0.7352038621902466, "learning_rate": 2.974656953463173e-07, "loss": 0.5754610896110535, "step": 2143 }, { "epoch": 2.7812316239836163, "grad_norm": 0.7284995913505554, "learning_rate": 2.9401383740035983e-07, "loss": 0.6452664136886597, "step": 2144 }, { "epoch": 2.782529350933755, "grad_norm": 0.7445150017738342, "learning_rate": 2.905818257394799e-07, "loss": 0.6866068243980408, "step": 2145 }, { "epoch": 2.783827077883894, "grad_norm": 0.7142398357391357, "learning_rate": 2.871696673812718e-07, "loss": 0.6363600492477417, "step": 2146 }, { "epoch": 2.785124804834033, "grad_norm": 0.7269803285598755, "learning_rate": 2.837773693027346e-07, "loss": 0.6741392612457275, "step": 2147 }, { "epoch": 2.7864225317841718, "grad_norm": 0.7683520317077637, "learning_rate": 2.8040493844026185e-07, "loss": 0.6339127421379089, "step": 2148 }, { "epoch": 2.7877202587343106, "grad_norm": 0.7308069467544556, "learning_rate": 2.7705238168961867e-07, "loss": 0.6009587049484253, "step": 2149 }, { "epoch": 2.7890179856844495, "grad_norm": 0.7165871858596802, "learning_rate": 2.7371970590593597e-07, "loss": 0.6652488708496094, "step": 2150 }, { "epoch": 2.7903157126345883, "grad_norm": 0.7490328550338745, "learning_rate": 2.7040691790369165e-07, "loss": 0.6180223226547241, "step": 2151 }, { "epoch": 2.7916134395847276, "grad_norm": 0.729664146900177, "learning_rate": 2.671140244567005e-07, "loss": 0.6324159502983093, "step": 2152 }, { "epoch": 2.792911166534866, "grad_norm": 0.728609025478363, "learning_rate": 2.6384103229809445e-07, "loss": 0.6185531616210938, "step": 2153 }, { "epoch": 2.7942088934850053, "grad_norm": 0.7523699402809143, "learning_rate": 2.605879481203144e-07, "loss": 0.6833655834197998, "step": 2154 }, { "epoch": 2.795506620435144, "grad_norm": 0.7207692265510559, "learning_rate": 2.5735477857509406e-07, "loss": 0.6240508556365967, "step": 2155 }, { "epoch": 2.796804347385283, "grad_norm": 0.7327904105186462, "learning_rate": 2.5414153027344846e-07, "loss": 0.6517814993858337, "step": 2156 }, { "epoch": 2.798102074335422, "grad_norm": 0.7405744194984436, "learning_rate": 2.5094820978565416e-07, "loss": 0.6217131614685059, "step": 2157 }, { "epoch": 2.7993998012855608, "grad_norm": 0.7404962182044983, "learning_rate": 2.4777482364124695e-07, "loss": 0.6210229992866516, "step": 2158 }, { "epoch": 2.8006975282356996, "grad_norm": 0.7105421423912048, "learning_rate": 2.446213783289941e-07, "loss": 0.6224609613418579, "step": 2159 }, { "epoch": 2.8019952551858385, "grad_norm": 0.777541995048523, "learning_rate": 2.4148788029689565e-07, "loss": 0.6957967877388, "step": 2160 }, { "epoch": 2.8032929821359773, "grad_norm": 0.7556023001670837, "learning_rate": 2.3837433595216174e-07, "loss": 0.6769660115242004, "step": 2161 }, { "epoch": 2.804590709086116, "grad_norm": 0.7225756049156189, "learning_rate": 2.3528075166120323e-07, "loss": 0.6382290124893188, "step": 2162 }, { "epoch": 2.8058884360362555, "grad_norm": 0.7236006259918213, "learning_rate": 2.3220713374961457e-07, "loss": 0.6584991216659546, "step": 2163 }, { "epoch": 2.807186162986394, "grad_norm": 0.7643389701843262, "learning_rate": 2.2915348850216955e-07, "loss": 0.6372033953666687, "step": 2164 }, { "epoch": 2.808483889936533, "grad_norm": 0.6990427374839783, "learning_rate": 2.2611982216279693e-07, "loss": 0.6647629141807556, "step": 2165 }, { "epoch": 2.809781616886672, "grad_norm": 0.7442436814308167, "learning_rate": 2.2310614093457917e-07, "loss": 0.6188019514083862, "step": 2166 }, { "epoch": 2.811079343836811, "grad_norm": 0.7379173040390015, "learning_rate": 2.2011245097972812e-07, "loss": 0.643206000328064, "step": 2167 }, { "epoch": 2.8123770707869498, "grad_norm": 0.7450693249702454, "learning_rate": 2.171387584195861e-07, "loss": 0.6626617312431335, "step": 2168 }, { "epoch": 2.8136747977370886, "grad_norm": 0.7376441359519958, "learning_rate": 2.1418506933459926e-07, "loss": 0.6287381052970886, "step": 2169 }, { "epoch": 2.8149725246872275, "grad_norm": 0.7581092715263367, "learning_rate": 2.1125138976431425e-07, "loss": 0.6942882537841797, "step": 2170 }, { "epoch": 2.8162702516373663, "grad_norm": 0.7551229596138, "learning_rate": 2.0833772570736376e-07, "loss": 0.6641190052032471, "step": 2171 }, { "epoch": 2.817567978587505, "grad_norm": 0.723896861076355, "learning_rate": 2.0544408312145325e-07, "loss": 0.6406188607215881, "step": 2172 }, { "epoch": 2.818865705537644, "grad_norm": 0.7154518961906433, "learning_rate": 2.025704679233498e-07, "loss": 0.6102049946784973, "step": 2173 }, { "epoch": 2.8201634324877833, "grad_norm": 0.7203720808029175, "learning_rate": 1.9971688598886874e-07, "loss": 0.6299295425415039, "step": 2174 }, { "epoch": 2.8214611594379218, "grad_norm": 0.7477232217788696, "learning_rate": 1.9688334315286383e-07, "loss": 0.657807469367981, "step": 2175 }, { "epoch": 2.822758886388061, "grad_norm": 0.7149349451065063, "learning_rate": 1.9406984520921156e-07, "loss": 0.6447558999061584, "step": 2176 }, { "epoch": 2.8240566133382, "grad_norm": 0.7502943277359009, "learning_rate": 1.9127639791080345e-07, "loss": 0.7339900732040405, "step": 2177 }, { "epoch": 2.8253543402883388, "grad_norm": 0.7233054637908936, "learning_rate": 1.885030069695326e-07, "loss": 0.668261706829071, "step": 2178 }, { "epoch": 2.8266520672384776, "grad_norm": 0.7234363555908203, "learning_rate": 1.8574967805628174e-07, "loss": 0.6577302813529968, "step": 2179 }, { "epoch": 2.8279497941886165, "grad_norm": 0.7601407766342163, "learning_rate": 1.8301641680090965e-07, "loss": 0.6615520715713501, "step": 2180 }, { "epoch": 2.8292475211387553, "grad_norm": 0.7155176401138306, "learning_rate": 1.8030322879224792e-07, "loss": 0.6732202768325806, "step": 2181 }, { "epoch": 2.830545248088894, "grad_norm": 0.7071481347084045, "learning_rate": 1.7761011957807439e-07, "loss": 0.6781343817710876, "step": 2182 }, { "epoch": 2.831842975039033, "grad_norm": 0.7136833071708679, "learning_rate": 1.7493709466511965e-07, "loss": 0.6390227675437927, "step": 2183 }, { "epoch": 2.833140701989172, "grad_norm": 0.741337239742279, "learning_rate": 1.7228415951904165e-07, "loss": 0.6472516059875488, "step": 2184 }, { "epoch": 2.834438428939311, "grad_norm": 0.732276976108551, "learning_rate": 1.6965131956442004e-07, "loss": 0.6666471362113953, "step": 2185 }, { "epoch": 2.83573615588945, "grad_norm": 0.7136049866676331, "learning_rate": 1.670385801847485e-07, "loss": 0.6376191973686218, "step": 2186 }, { "epoch": 2.837033882839589, "grad_norm": 0.7336399555206299, "learning_rate": 1.6444594672241688e-07, "loss": 0.6784384846687317, "step": 2187 }, { "epoch": 2.8383316097897278, "grad_norm": 0.7359493374824524, "learning_rate": 1.6187342447870235e-07, "loss": 0.6160508394241333, "step": 2188 }, { "epoch": 2.8396293367398666, "grad_norm": 0.7054331302642822, "learning_rate": 1.5932101871376503e-07, "loss": 0.6256083846092224, "step": 2189 }, { "epoch": 2.8409270636900055, "grad_norm": 0.7195982336997986, "learning_rate": 1.567887346466257e-07, "loss": 0.5842984318733215, "step": 2190 }, { "epoch": 2.8422247906401443, "grad_norm": 0.7330359220504761, "learning_rate": 1.54276577455168e-07, "loss": 0.655302882194519, "step": 2191 }, { "epoch": 2.843522517590283, "grad_norm": 0.7195461392402649, "learning_rate": 1.517845522761141e-07, "loss": 0.695612370967865, "step": 2192 }, { "epoch": 2.844820244540422, "grad_norm": 0.7142940759658813, "learning_rate": 1.4931266420502687e-07, "loss": 0.671156108379364, "step": 2193 }, { "epoch": 2.8461179714905613, "grad_norm": 0.7329767346382141, "learning_rate": 1.468609182962899e-07, "loss": 0.6843516230583191, "step": 2194 }, { "epoch": 2.8474156984406997, "grad_norm": 0.7575559616088867, "learning_rate": 1.4442931956310525e-07, "loss": 0.6152229309082031, "step": 2195 }, { "epoch": 2.848713425390839, "grad_norm": 0.7627936005592346, "learning_rate": 1.420178729774746e-07, "loss": 0.6545628905296326, "step": 2196 }, { "epoch": 2.850011152340978, "grad_norm": 0.7592964768409729, "learning_rate": 1.3962658347019819e-07, "loss": 0.7087745666503906, "step": 2197 }, { "epoch": 2.8513088792911168, "grad_norm": 0.7184759974479675, "learning_rate": 1.372554559308559e-07, "loss": 0.6886664032936096, "step": 2198 }, { "epoch": 2.8526066062412556, "grad_norm": 0.7686153054237366, "learning_rate": 1.3490449520780492e-07, "loss": 0.65256667137146, "step": 2199 }, { "epoch": 2.8539043331913945, "grad_norm": 0.722467839717865, "learning_rate": 1.3257370610816333e-07, "loss": 0.6053767800331116, "step": 2200 }, { "epoch": 2.8552020601415333, "grad_norm": 0.7348204255104065, "learning_rate": 1.3026309339780442e-07, "loss": 0.57970130443573, "step": 2201 }, { "epoch": 2.856499787091672, "grad_norm": 0.724539041519165, "learning_rate": 1.2797266180134994e-07, "loss": 0.6097747087478638, "step": 2202 }, { "epoch": 2.857797514041811, "grad_norm": 0.7563627362251282, "learning_rate": 1.2570241600214805e-07, "loss": 0.6322290897369385, "step": 2203 }, { "epoch": 2.85909524099195, "grad_norm": 0.7333301901817322, "learning_rate": 1.2345236064228216e-07, "loss": 0.6172837615013123, "step": 2204 }, { "epoch": 2.860392967942089, "grad_norm": 0.7645448446273804, "learning_rate": 1.212225003225409e-07, "loss": 0.6847653388977051, "step": 2205 }, { "epoch": 2.8616906948922276, "grad_norm": 0.7139600515365601, "learning_rate": 1.1901283960242704e-07, "loss": 0.641283392906189, "step": 2206 }, { "epoch": 2.862988421842367, "grad_norm": 0.7192294597625732, "learning_rate": 1.168233830001364e-07, "loss": 0.6558660864830017, "step": 2207 }, { "epoch": 2.8642861487925058, "grad_norm": 0.7247057557106018, "learning_rate": 1.1465413499255452e-07, "loss": 0.648059070110321, "step": 2208 }, { "epoch": 2.8655838757426446, "grad_norm": 0.7141038179397583, "learning_rate": 1.1250510001524329e-07, "loss": 0.7089075446128845, "step": 2209 }, { "epoch": 2.8668816026927835, "grad_norm": 0.7448967099189758, "learning_rate": 1.103762824624377e-07, "loss": 0.655659019947052, "step": 2210 }, { "epoch": 2.8681793296429223, "grad_norm": 0.7217125296592712, "learning_rate": 1.0826768668702691e-07, "loss": 0.6335598826408386, "step": 2211 }, { "epoch": 2.869477056593061, "grad_norm": 0.7432066202163696, "learning_rate": 1.0617931700055984e-07, "loss": 0.6629352569580078, "step": 2212 }, { "epoch": 2.8707747835432, "grad_norm": 0.759253740310669, "learning_rate": 1.0411117767322065e-07, "loss": 0.6971714496612549, "step": 2213 }, { "epoch": 2.872072510493339, "grad_norm": 0.7214189171791077, "learning_rate": 1.0206327293383222e-07, "loss": 0.6498401165008545, "step": 2214 }, { "epoch": 2.8733702374434777, "grad_norm": 0.7300909161567688, "learning_rate": 1.000356069698416e-07, "loss": 0.6666358113288879, "step": 2215 }, { "epoch": 2.874667964393617, "grad_norm": 0.7169894576072693, "learning_rate": 9.802818392731117e-08, "loss": 0.6067378520965576, "step": 2216 }, { "epoch": 2.8759656913437555, "grad_norm": 0.7870055437088013, "learning_rate": 9.60410079109153e-08, "loss": 0.7164538502693176, "step": 2217 }, { "epoch": 2.8772634182938948, "grad_norm": 0.731452465057373, "learning_rate": 9.407408298392373e-08, "loss": 0.6627915501594543, "step": 2218 }, { "epoch": 2.8785611452440336, "grad_norm": 0.7452148795127869, "learning_rate": 9.212741316820039e-08, "loss": 0.6090914607048035, "step": 2219 }, { "epoch": 2.8798588721941725, "grad_norm": 0.7165141701698303, "learning_rate": 9.020100244419461e-08, "loss": 0.7527438998222351, "step": 2220 }, { "epoch": 2.8811565991443113, "grad_norm": 0.7165322303771973, "learning_rate": 8.829485475092548e-08, "loss": 0.663241446018219, "step": 2221 }, { "epoch": 2.88245432609445, "grad_norm": 0.8054161667823792, "learning_rate": 8.640897398598525e-08, "loss": 0.765292227268219, "step": 2222 }, { "epoch": 2.883752053044589, "grad_norm": 0.7372357249259949, "learning_rate": 8.454336400552154e-08, "loss": 0.6321142911911011, "step": 2223 }, { "epoch": 2.885049779994728, "grad_norm": 0.7551286220550537, "learning_rate": 8.269802862423405e-08, "loss": 0.6694223880767822, "step": 2224 }, { "epoch": 2.8863475069448667, "grad_norm": 0.6954628825187683, "learning_rate": 8.087297161536778e-08, "loss": 0.650575578212738, "step": 2225 }, { "epoch": 2.8876452338950056, "grad_norm": 0.6984097957611084, "learning_rate": 7.906819671070098e-08, "loss": 0.6023176908493042, "step": 2226 }, { "epoch": 2.888942960845145, "grad_norm": 0.7234562635421753, "learning_rate": 7.728370760054283e-08, "loss": 0.6330822110176086, "step": 2227 }, { "epoch": 2.8902406877952833, "grad_norm": 0.7173102498054504, "learning_rate": 7.55195079337212e-08, "loss": 0.6250259876251221, "step": 2228 }, { "epoch": 2.8915384147454226, "grad_norm": 0.7292760610580444, "learning_rate": 7.377560131757832e-08, "loss": 0.6211444139480591, "step": 2229 }, { "epoch": 2.8928361416955615, "grad_norm": 0.7143842577934265, "learning_rate": 7.205199131796182e-08, "loss": 0.6102809906005859, "step": 2230 }, { "epoch": 2.8941338686457003, "grad_norm": 0.7200958132743835, "learning_rate": 7.034868145921802e-08, "loss": 0.6820523142814636, "step": 2231 }, { "epoch": 2.895431595595839, "grad_norm": 0.7009389400482178, "learning_rate": 6.866567522418322e-08, "loss": 0.6737648248672485, "step": 2232 }, { "epoch": 2.896729322545978, "grad_norm": 0.7720589637756348, "learning_rate": 6.700297605418127e-08, "loss": 0.6236926317214966, "step": 2233 }, { "epoch": 2.898027049496117, "grad_norm": 0.7273607850074768, "learning_rate": 6.53605873490093e-08, "loss": 0.673498272895813, "step": 2234 }, { "epoch": 2.8993247764462557, "grad_norm": 0.7236337065696716, "learning_rate": 6.373851246693763e-08, "loss": 0.6256372928619385, "step": 2235 }, { "epoch": 2.9006225033963946, "grad_norm": 0.7014041543006897, "learning_rate": 6.21367547246976e-08, "loss": 0.6363632678985596, "step": 2236 }, { "epoch": 2.9019202303465335, "grad_norm": 0.7210372686386108, "learning_rate": 6.055531739747933e-08, "loss": 0.6491326689720154, "step": 2237 }, { "epoch": 2.9032179572966728, "grad_norm": 0.766070544719696, "learning_rate": 5.899420371892173e-08, "loss": 0.606798529624939, "step": 2238 }, { "epoch": 2.904515684246811, "grad_norm": 0.7013832330703735, "learning_rate": 5.745341688110806e-08, "loss": 0.6418301463127136, "step": 2239 }, { "epoch": 2.9058134111969505, "grad_norm": 0.7240904569625854, "learning_rate": 5.593296003455595e-08, "loss": 0.6093890070915222, "step": 2240 }, { "epoch": 2.9071111381470893, "grad_norm": 0.7125054001808167, "learning_rate": 5.4432836288215165e-08, "loss": 0.6541129350662231, "step": 2241 }, { "epoch": 2.908408865097228, "grad_norm": 0.7161985635757446, "learning_rate": 5.2953048709459834e-08, "loss": 0.617908239364624, "step": 2242 }, { "epoch": 2.909706592047367, "grad_norm": 0.737856388092041, "learning_rate": 5.1493600324080684e-08, "loss": 0.649212121963501, "step": 2243 }, { "epoch": 2.911004318997506, "grad_norm": 0.7285069227218628, "learning_rate": 5.0054494116279497e-08, "loss": 0.6526796221733093, "step": 2244 }, { "epoch": 2.9123020459476447, "grad_norm": 0.715974748134613, "learning_rate": 4.8635733028664644e-08, "loss": 0.6148603558540344, "step": 2245 }, { "epoch": 2.9135997728977836, "grad_norm": 0.7559519410133362, "learning_rate": 4.723731996224446e-08, "loss": 0.6750462055206299, "step": 2246 }, { "epoch": 2.9148974998479225, "grad_norm": 0.7167734503746033, "learning_rate": 4.585925777641831e-08, "loss": 0.6933612823486328, "step": 2247 }, { "epoch": 2.9161952267980613, "grad_norm": 0.7255918383598328, "learning_rate": 4.450154928897443e-08, "loss": 0.6560993194580078, "step": 2248 }, { "epoch": 2.9174929537482006, "grad_norm": 0.7656079530715942, "learning_rate": 4.316419727608434e-08, "loss": 0.6685020923614502, "step": 2249 }, { "epoch": 2.9187906806983395, "grad_norm": 0.7287185788154602, "learning_rate": 4.1847204472293954e-08, "loss": 0.646466851234436, "step": 2250 }, { "epoch": 2.9200884076484783, "grad_norm": 0.7272042036056519, "learning_rate": 4.055057357052139e-08, "loss": 0.6481143236160278, "step": 2251 }, { "epoch": 2.921386134598617, "grad_norm": 0.7513357996940613, "learning_rate": 3.927430722204473e-08, "loss": 0.6382118463516235, "step": 2252 }, { "epoch": 2.922683861548756, "grad_norm": 0.7202178239822388, "learning_rate": 3.801840803651091e-08, "loss": 0.6208593845367432, "step": 2253 }, { "epoch": 2.923981588498895, "grad_norm": 0.7391272783279419, "learning_rate": 3.678287858191132e-08, "loss": 0.62124103307724, "step": 2254 }, { "epoch": 2.9252793154490337, "grad_norm": 0.7046197056770325, "learning_rate": 3.5567721384593965e-08, "loss": 0.6635320782661438, "step": 2255 }, { "epoch": 2.9265770423991726, "grad_norm": 0.7366517782211304, "learning_rate": 3.437293892924576e-08, "loss": 0.657387912273407, "step": 2256 }, { "epoch": 2.9278747693493115, "grad_norm": 0.7833458781242371, "learning_rate": 3.3198533658895804e-08, "loss": 0.681797981262207, "step": 2257 }, { "epoch": 2.9291724962994508, "grad_norm": 0.7216890454292297, "learning_rate": 3.2044507974905433e-08, "loss": 0.5936287641525269, "step": 2258 }, { "epoch": 2.930470223249589, "grad_norm": 0.736221969127655, "learning_rate": 3.091086423696377e-08, "loss": 0.6654385328292847, "step": 2259 }, { "epoch": 2.9317679501997285, "grad_norm": 0.7042406797409058, "learning_rate": 2.9797604763087684e-08, "loss": 0.6541644930839539, "step": 2260 }, { "epoch": 2.9330656771498673, "grad_norm": 0.7537480592727661, "learning_rate": 2.8704731829609643e-08, "loss": 0.6462427377700806, "step": 2261 }, { "epoch": 2.934363404100006, "grad_norm": 0.748501718044281, "learning_rate": 2.763224767117767e-08, "loss": 0.6837744116783142, "step": 2262 }, { "epoch": 2.935661131050145, "grad_norm": 0.7571681141853333, "learning_rate": 2.6580154480750907e-08, "loss": 0.6494276523590088, "step": 2263 }, { "epoch": 2.936958858000284, "grad_norm": 0.7051231265068054, "learning_rate": 2.554845440959408e-08, "loss": 0.6642428040504456, "step": 2264 }, { "epoch": 2.9382565849504227, "grad_norm": 0.7481043934822083, "learning_rate": 2.4537149567271935e-08, "loss": 0.7524136900901794, "step": 2265 }, { "epoch": 2.9395543119005616, "grad_norm": 0.7172916531562805, "learning_rate": 2.3546242021648126e-08, "loss": 0.6545467972755432, "step": 2266 }, { "epoch": 2.9408520388507005, "grad_norm": 0.7390909790992737, "learning_rate": 2.2575733798876342e-08, "loss": 0.6789126396179199, "step": 2267 }, { "epoch": 2.9421497658008393, "grad_norm": 0.6911484003067017, "learning_rate": 2.162562688340142e-08, "loss": 0.5900536775588989, "step": 2268 }, { "epoch": 2.9434474927509786, "grad_norm": 0.7650425434112549, "learning_rate": 2.0695923217950442e-08, "loss": 0.6601477861404419, "step": 2269 }, { "epoch": 2.944745219701117, "grad_norm": 0.7415356040000916, "learning_rate": 1.9786624703532764e-08, "loss": 0.7132882475852966, "step": 2270 }, { "epoch": 2.9460429466512563, "grad_norm": 0.7267791032791138, "learning_rate": 1.8897733199434443e-08, "loss": 0.6234641075134277, "step": 2271 }, { "epoch": 2.947340673601395, "grad_norm": 0.7090092897415161, "learning_rate": 1.8029250523211582e-08, "loss": 0.6485676765441895, "step": 2272 }, { "epoch": 2.948638400551534, "grad_norm": 0.7129170298576355, "learning_rate": 1.718117845069367e-08, "loss": 0.6410534977912903, "step": 2273 }, { "epoch": 2.949936127501673, "grad_norm": 0.7186943292617798, "learning_rate": 1.635351871597246e-08, "loss": 0.7133535146713257, "step": 2274 }, { "epoch": 2.9512338544518117, "grad_norm": 0.7258438467979431, "learning_rate": 1.554627301140199e-08, "loss": 0.5933857560157776, "step": 2275 }, { "epoch": 2.9525315814019506, "grad_norm": 0.7135540843009949, "learning_rate": 1.4759442987596351e-08, "loss": 0.6514700055122375, "step": 2276 }, { "epoch": 2.9538293083520895, "grad_norm": 0.7308082580566406, "learning_rate": 1.3993030253423023e-08, "loss": 0.6132031679153442, "step": 2277 }, { "epoch": 2.9551270353022283, "grad_norm": 0.7810271382331848, "learning_rate": 1.3247036376002886e-08, "loss": 0.654386043548584, "step": 2278 }, { "epoch": 2.956424762252367, "grad_norm": 0.761455237865448, "learning_rate": 1.252146288070355e-08, "loss": 0.6730161309242249, "step": 2279 }, { "epoch": 2.9577224892025065, "grad_norm": 0.7196770906448364, "learning_rate": 1.1816311251140466e-08, "loss": 0.6393716931343079, "step": 2280 }, { "epoch": 2.959020216152645, "grad_norm": 0.6943092346191406, "learning_rate": 1.113158292916916e-08, "loss": 0.6582570672035217, "step": 2281 }, { "epoch": 2.960317943102784, "grad_norm": 0.7215139865875244, "learning_rate": 1.0467279314886336e-08, "loss": 0.6728758215904236, "step": 2282 }, { "epoch": 2.961615670052923, "grad_norm": 0.7100042700767517, "learning_rate": 9.82340176662433e-09, "loss": 0.6192055344581604, "step": 2283 }, { "epoch": 2.962913397003062, "grad_norm": 0.772715151309967, "learning_rate": 9.199951600951106e-09, "loss": 0.6373339295387268, "step": 2284 }, { "epoch": 2.9642111239532007, "grad_norm": 0.6952692866325378, "learning_rate": 8.596930092662493e-09, "loss": 0.6480576992034912, "step": 2285 }, { "epoch": 2.9655088509033396, "grad_norm": 0.729654848575592, "learning_rate": 8.014338474785499e-09, "loss": 0.5901361107826233, "step": 2286 }, { "epoch": 2.9668065778534785, "grad_norm": 0.7037022709846497, "learning_rate": 7.45217793857389e-09, "loss": 0.6541380882263184, "step": 2287 }, { "epoch": 2.9681043048036173, "grad_norm": 0.7359015941619873, "learning_rate": 6.910449633501515e-09, "loss": 0.6508733630180359, "step": 2288 }, { "epoch": 2.969402031753756, "grad_norm": 0.6860209703445435, "learning_rate": 6.389154667266751e-09, "loss": 0.6324610710144043, "step": 2289 }, { "epoch": 2.970699758703895, "grad_norm": 0.7234740257263184, "learning_rate": 5.888294105785841e-09, "loss": 0.6781293749809265, "step": 2290 }, { "epoch": 2.9719974856540343, "grad_norm": 0.748229444026947, "learning_rate": 5.407868973191788e-09, "loss": 0.7036339640617371, "step": 2291 }, { "epoch": 2.9732952126041727, "grad_norm": 0.7085629105567932, "learning_rate": 4.947880251832127e-09, "loss": 0.6461360454559326, "step": 2292 }, { "epoch": 2.974592939554312, "grad_norm": 0.7789812088012695, "learning_rate": 4.508328882268931e-09, "loss": 0.6448870897293091, "step": 2293 }, { "epoch": 2.975890666504451, "grad_norm": 0.7379918694496155, "learning_rate": 4.089215763271037e-09, "loss": 0.5733003616333008, "step": 2294 }, { "epoch": 2.9771883934545897, "grad_norm": 0.700847864151001, "learning_rate": 3.6905417518195985e-09, "loss": 0.6530927419662476, "step": 2295 }, { "epoch": 2.9784861204047286, "grad_norm": 0.7081441879272461, "learning_rate": 3.312307663103642e-09, "loss": 0.643142819404602, "step": 2296 }, { "epoch": 2.9797838473548675, "grad_norm": 0.7461786270141602, "learning_rate": 2.954514270513409e-09, "loss": 0.6704539060592651, "step": 2297 }, { "epoch": 2.9810815743050063, "grad_norm": 0.7500106692314148, "learning_rate": 2.6171623056481245e-09, "loss": 0.6799619197845459, "step": 2298 }, { "epoch": 2.982379301255145, "grad_norm": 0.7831278443336487, "learning_rate": 2.300252458306007e-09, "loss": 0.6943190097808838, "step": 2299 }, { "epoch": 2.983677028205284, "grad_norm": 0.7213168740272522, "learning_rate": 2.0037853764887096e-09, "loss": 0.677469789981842, "step": 2300 }, { "epoch": 2.983677028205284, "eval_loss": 0.748992383480072, "eval_runtime": 142.0816, "eval_samples_per_second": 36.542, "eval_steps_per_second": 9.136, "step": 2300 }, { "epoch": 2.984974755155423, "grad_norm": 0.7276439070701599, "learning_rate": 1.7277616663946562e-09, "loss": 0.6558234095573425, "step": 2301 }, { "epoch": 2.986272482105562, "grad_norm": 0.7110369205474854, "learning_rate": 1.4721818924223752e-09, "loss": 0.6696679592132568, "step": 2302 }, { "epoch": 2.987570209055701, "grad_norm": 0.7226536273956299, "learning_rate": 1.2370465771693874e-09, "loss": 0.6655494570732117, "step": 2303 }, { "epoch": 2.98886793600584, "grad_norm": 0.7148182988166809, "learning_rate": 1.0223562014277654e-09, "loss": 0.6355955600738525, "step": 2304 }, { "epoch": 2.9901656629559787, "grad_norm": 0.7424213886260986, "learning_rate": 8.281112041841343e-10, "loss": 0.6586095094680786, "step": 2305 }, { "epoch": 2.9914633899061176, "grad_norm": 0.742540717124939, "learning_rate": 6.543119826207811e-10, "loss": 0.6475944519042969, "step": 2306 }, { "epoch": 2.9927611168562565, "grad_norm": 0.7507250905036926, "learning_rate": 5.009588921123243e-10, "loss": 0.6415051221847534, "step": 2307 }, { "epoch": 2.9940588438063953, "grad_norm": 0.7377761602401733, "learning_rate": 3.680522462279346e-10, "loss": 0.6111840009689331, "step": 2308 }, { "epoch": 2.995356570756534, "grad_norm": 0.735808789730072, "learning_rate": 2.555923167291141e-10, "loss": 0.6714158058166504, "step": 2309 }, { "epoch": 2.996654297706673, "grad_norm": 0.7448306083679199, "learning_rate": 1.635793335652558e-10, "loss": 0.7029042840003967, "step": 2310 }, { "epoch": 2.9979520246568123, "grad_norm": 0.761702299118042, "learning_rate": 9.20134848814147e-11, "loss": 0.6207424998283386, "step": 2311 }, { "epoch": 2.9992497516069507, "grad_norm": 0.76186203956604, "learning_rate": 4.08949170105366e-11, "loss": 0.6319787502288818, "step": 2312 }, { "epoch": 3.0, "grad_norm": 0.9660639762878418, "learning_rate": 1.022373447900904e-11, "loss": 0.7546765804290771, "step": 2313 }, { "epoch": 3.0, "step": 2313, "total_flos": 4.1917370482093916e+18, "train_loss": 0.06833103949818527, "train_runtime": 3477.4713, "train_samples_per_second": 85.09, "train_steps_per_second": 0.665 } ], "logging_steps": 1.0, "max_steps": 2313, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 230, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.1917370482093916e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }