767 lines
20 KiB
JSON
767 lines
20 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.4999036237471087,
|
|
"eval_steps": 5187,
|
|
"global_step": 5187,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00481881264456438,
|
|
"grad_norm": 1.6099064350128174,
|
|
"learning_rate": 6.294155427103405e-07,
|
|
"loss": 0.095972900390625,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.00963762528912876,
|
|
"grad_norm": 1.0794726610183716,
|
|
"learning_rate": 1.2716763005780348e-06,
|
|
"loss": 0.03401387691497803,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.014456437933693137,
|
|
"grad_norm": 0.9826000928878784,
|
|
"learning_rate": 1.9139370584457295e-06,
|
|
"loss": 0.021355185508728027,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.01927525057825752,
|
|
"grad_norm": 1.6832308769226074,
|
|
"learning_rate": 2.5561978163134233e-06,
|
|
"loss": 0.016741816997528077,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.024094063222821895,
|
|
"grad_norm": 1.745717167854309,
|
|
"learning_rate": 3.198458574181118e-06,
|
|
"loss": 0.014754180908203124,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.028912875867386275,
|
|
"grad_norm": 0.41400647163391113,
|
|
"learning_rate": 3.8407193320488126e-06,
|
|
"loss": 0.014074199199676514,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.03373168851195066,
|
|
"grad_norm": 0.7941911220550537,
|
|
"learning_rate": 4.482980089916507e-06,
|
|
"loss": 0.013227691650390625,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.03855050115651504,
|
|
"grad_norm": 0.5189383029937744,
|
|
"learning_rate": 5.125240847784201e-06,
|
|
"loss": 0.013188705444335938,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.04336931380107941,
|
|
"grad_norm": 1.6657729148864746,
|
|
"learning_rate": 5.767501605651895e-06,
|
|
"loss": 0.012030971050262452,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.04818812644564379,
|
|
"grad_norm": 0.32716143131256104,
|
|
"learning_rate": 6.4097623635195895e-06,
|
|
"loss": 0.012086995840072633,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.05300693909020817,
|
|
"grad_norm": 0.3107227385044098,
|
|
"learning_rate": 7.052023121387284e-06,
|
|
"loss": 0.011651687622070313,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.05782575173477255,
|
|
"grad_norm": 0.6159315705299377,
|
|
"learning_rate": 7.694283879254977e-06,
|
|
"loss": 0.011140645742416381,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.06264456437933694,
|
|
"grad_norm": 2.1143224239349365,
|
|
"learning_rate": 8.336544637122673e-06,
|
|
"loss": 0.012795639038085938,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.06746337702390132,
|
|
"grad_norm": 2.545966863632202,
|
|
"learning_rate": 8.978805394990367e-06,
|
|
"loss": 0.013084233999252319,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.0722821896684657,
|
|
"grad_norm": 1.8246541023254395,
|
|
"learning_rate": 9.621066152858061e-06,
|
|
"loss": 0.012490972280502319,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.07710100231303008,
|
|
"grad_norm": 0.959894597530365,
|
|
"learning_rate": 1.0263326910725756e-05,
|
|
"loss": 0.011189931631088256,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.08191981495759446,
|
|
"grad_norm": 4.178706645965576,
|
|
"learning_rate": 1.090558766859345e-05,
|
|
"loss": 0.012091522216796874,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.08673862760215882,
|
|
"grad_norm": 0.7208101153373718,
|
|
"learning_rate": 1.1547848426461144e-05,
|
|
"loss": 0.012419841289520263,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.0915574402467232,
|
|
"grad_norm": 1.3128774166107178,
|
|
"learning_rate": 1.2190109184328838e-05,
|
|
"loss": 0.011686071157455444,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.09637625289128758,
|
|
"grad_norm": 1.2861932516098022,
|
|
"learning_rate": 1.2832369942196533e-05,
|
|
"loss": 0.011539828777313233,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.10119506553585196,
|
|
"grad_norm": 1.6700119972229004,
|
|
"learning_rate": 1.3474630700064227e-05,
|
|
"loss": 0.012701009511947631,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.10601387818041634,
|
|
"grad_norm": 1.6825134754180908,
|
|
"learning_rate": 1.4116891457931921e-05,
|
|
"loss": 0.0122796630859375,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.11083269082498072,
|
|
"grad_norm": 0.18270175158977509,
|
|
"learning_rate": 1.4759152215799615e-05,
|
|
"loss": 0.014598617553710938,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.1156515034695451,
|
|
"grad_norm": 2.148013114929199,
|
|
"learning_rate": 1.540141297366731e-05,
|
|
"loss": 0.012004268169403077,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.12047031611410948,
|
|
"grad_norm": 0.2672475278377533,
|
|
"learning_rate": 1.6043673731535007e-05,
|
|
"loss": 0.011746572256088257,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.12528912875867387,
|
|
"grad_norm": 1.4434212446212769,
|
|
"learning_rate": 1.66859344894027e-05,
|
|
"loss": 0.012035726308822632,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.13010794140323825,
|
|
"grad_norm": 0.4635275602340698,
|
|
"learning_rate": 1.7328195247270396e-05,
|
|
"loss": 0.012548294067382813,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.13492675404780263,
|
|
"grad_norm": 0.548039436340332,
|
|
"learning_rate": 1.7970456005138088e-05,
|
|
"loss": 0.012786407470703125,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.139745566692367,
|
|
"grad_norm": 0.7193094491958618,
|
|
"learning_rate": 1.8612716763005784e-05,
|
|
"loss": 0.011991348266601563,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.1445643793369314,
|
|
"grad_norm": 0.5346310138702393,
|
|
"learning_rate": 1.9254977520873477e-05,
|
|
"loss": 0.012789205312728882,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.14938319198149577,
|
|
"grad_norm": 0.40978488326072693,
|
|
"learning_rate": 1.9897238278741172e-05,
|
|
"loss": 0.012897975444793701,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.15420200462606015,
|
|
"grad_norm": 1.7101497650146484,
|
|
"learning_rate": 1.9999900451301277e-05,
|
|
"loss": 0.014085414409637452,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.15902081727062453,
|
|
"grad_norm": 0.2999866306781769,
|
|
"learning_rate": 1.9999522349843378e-05,
|
|
"loss": 0.01345428466796875,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.1638396299151889,
|
|
"grad_norm": 0.3239404857158661,
|
|
"learning_rate": 1.9998862094545145e-05,
|
|
"loss": 0.012794520854949951,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.16865844255975326,
|
|
"grad_norm": 0.1947634220123291,
|
|
"learning_rate": 1.999791970403682e-05,
|
|
"loss": 0.013333181142807007,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.17347725520431764,
|
|
"grad_norm": 0.8663123846054077,
|
|
"learning_rate": 1.9996695204909593e-05,
|
|
"loss": 0.012868322134017944,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.17829606784888202,
|
|
"grad_norm": 0.504265546798706,
|
|
"learning_rate": 1.9995188631714816e-05,
|
|
"loss": 0.013335164785385132,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.1831148804934464,
|
|
"grad_norm": 0.36884820461273193,
|
|
"learning_rate": 1.9993400026963072e-05,
|
|
"loss": 0.013257879018783569,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.18793369313801078,
|
|
"grad_norm": 0.6779909729957581,
|
|
"learning_rate": 1.9991329441122937e-05,
|
|
"loss": 0.012929306030273438,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.19275250578257516,
|
|
"grad_norm": 0.19276919960975647,
|
|
"learning_rate": 1.9988976932619574e-05,
|
|
"loss": 0.012471644878387452,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.19757131842713954,
|
|
"grad_norm": 2.5498015880584717,
|
|
"learning_rate": 1.9986342567833087e-05,
|
|
"loss": 0.013909963369369506,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.20239013107170392,
|
|
"grad_norm": 0.7871809005737305,
|
|
"learning_rate": 1.9983426421096636e-05,
|
|
"loss": 0.013362987041473389,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.2072089437162683,
|
|
"grad_norm": 0.5824525356292725,
|
|
"learning_rate": 1.9980228574694357e-05,
|
|
"loss": 0.012392985820770263,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.21202775636083268,
|
|
"grad_norm": 1.331589698791504,
|
|
"learning_rate": 1.9976749118859023e-05,
|
|
"loss": 0.012818679809570313,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.21684656900539706,
|
|
"grad_norm": 0.22093236446380615,
|
|
"learning_rate": 1.9972988151769507e-05,
|
|
"loss": 0.012426936626434326,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.22166538164996144,
|
|
"grad_norm": 1.7017446756362915,
|
|
"learning_rate": 1.9968945779548007e-05,
|
|
"loss": 0.013253505229949952,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.22648419429452582,
|
|
"grad_norm": 0.16596011817455292,
|
|
"learning_rate": 1.9964622116257056e-05,
|
|
"loss": 0.012612838745117188,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.2313030069390902,
|
|
"grad_norm": 0.2315380871295929,
|
|
"learning_rate": 1.99600172838963e-05,
|
|
"loss": 0.012747344970703125,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.23612181958365458,
|
|
"grad_norm": 0.3689921796321869,
|
|
"learning_rate": 1.9955131412399064e-05,
|
|
"loss": 0.012891719341278076,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.24094063222821896,
|
|
"grad_norm": 0.5337355732917786,
|
|
"learning_rate": 1.994996463962867e-05,
|
|
"loss": 0.012232768535614013,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.24575944487278334,
|
|
"grad_norm": 0.1625846028327942,
|
|
"learning_rate": 1.9944517111374558e-05,
|
|
"loss": 0.013072433471679688,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.25057825751734775,
|
|
"grad_norm": 0.7666917443275452,
|
|
"learning_rate": 1.9938788981348175e-05,
|
|
"loss": 0.011926066875457764,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.2553970701619121,
|
|
"grad_norm": 0.5212653279304504,
|
|
"learning_rate": 1.9932780411178628e-05,
|
|
"loss": 0.012572301626205444,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.2602158828064765,
|
|
"grad_norm": 1.3501203060150146,
|
|
"learning_rate": 1.9926491570408126e-05,
|
|
"loss": 0.012271144390106202,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.26503469545104086,
|
|
"grad_norm": 0.1507686972618103,
|
|
"learning_rate": 1.991992263648721e-05,
|
|
"loss": 0.013356069326400757,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.26985350809560527,
|
|
"grad_norm": 1.450133204460144,
|
|
"learning_rate": 1.9913073794769727e-05,
|
|
"loss": 0.012582473754882813,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.2746723207401696,
|
|
"grad_norm": 0.20175831019878387,
|
|
"learning_rate": 1.9905945238507597e-05,
|
|
"loss": 0.012466964721679687,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.279491133384734,
|
|
"grad_norm": 0.9705828428268433,
|
|
"learning_rate": 1.989853716884539e-05,
|
|
"loss": 0.012320556640625,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.2843099460292984,
|
|
"grad_norm": 0.29664674401283264,
|
|
"learning_rate": 1.9890849794814616e-05,
|
|
"loss": 0.012661590576171874,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.2891287586738628,
|
|
"grad_norm": 0.7184270620346069,
|
|
"learning_rate": 1.9882883333327844e-05,
|
|
"loss": 0.012468541860580445,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.29394757131842714,
|
|
"grad_norm": 1.2583141326904297,
|
|
"learning_rate": 1.987463800917259e-05,
|
|
"loss": 0.013047130107879638,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.29876638396299154,
|
|
"grad_norm": 0.3179360628128052,
|
|
"learning_rate": 1.986611405500495e-05,
|
|
"loss": 0.012662353515625,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.3035851966075559,
|
|
"grad_norm": 0.4840896427631378,
|
|
"learning_rate": 1.9857311711343047e-05,
|
|
"loss": 0.012455928325653075,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.3084040092521203,
|
|
"grad_norm": 0.8195740580558777,
|
|
"learning_rate": 1.984823122656026e-05,
|
|
"loss": 0.012638804912567138,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.31322282189668466,
|
|
"grad_norm": 0.1526927649974823,
|
|
"learning_rate": 1.9838872856878185e-05,
|
|
"loss": 0.01197858214378357,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.31804163454124906,
|
|
"grad_norm": 0.27761492133140564,
|
|
"learning_rate": 1.982923686635944e-05,
|
|
"loss": 0.012119649648666382,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.3228604471858134,
|
|
"grad_norm": 1.5864442586898804,
|
|
"learning_rate": 1.981932352690017e-05,
|
|
"loss": 0.012395553588867188,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.3276792598303778,
|
|
"grad_norm": 1.2578856945037842,
|
|
"learning_rate": 1.980913311822243e-05,
|
|
"loss": 0.01248263120651245,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.3324980724749422,
|
|
"grad_norm": 0.30967798829078674,
|
|
"learning_rate": 1.979866592786624e-05,
|
|
"loss": 0.012674357891082764,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.3373168851195065,
|
|
"grad_norm": 1.2549265623092651,
|
|
"learning_rate": 1.9787922251181513e-05,
|
|
"loss": 0.012856496572494507,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.34213569776407093,
|
|
"grad_norm": 0.8616346120834351,
|
|
"learning_rate": 1.977690239131968e-05,
|
|
"loss": 0.012417705059051513,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.3469545104086353,
|
|
"grad_norm": 0.27554193139076233,
|
|
"learning_rate": 1.976560665922518e-05,
|
|
"loss": 0.012434184551239014,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.3517733230531997,
|
|
"grad_norm": 0.6482635140419006,
|
|
"learning_rate": 1.9754035373626646e-05,
|
|
"loss": 0.012548320293426514,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.35659213569776405,
|
|
"grad_norm": 0.8016761541366577,
|
|
"learning_rate": 1.9742188861027957e-05,
|
|
"loss": 0.012607015371322632,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.36141094834232845,
|
|
"grad_norm": 0.3415991961956024,
|
|
"learning_rate": 1.9730067455698964e-05,
|
|
"loss": 0.012693126201629639,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.3662297609868928,
|
|
"grad_norm": 1.8076531887054443,
|
|
"learning_rate": 1.9717671499666125e-05,
|
|
"loss": 0.01259676694869995,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.3710485736314572,
|
|
"grad_norm": 0.1274661123752594,
|
|
"learning_rate": 1.97050013427028e-05,
|
|
"loss": 0.012547200918197632,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.37586738627602156,
|
|
"grad_norm": 0.20902210474014282,
|
|
"learning_rate": 1.9692057342319407e-05,
|
|
"loss": 0.0122760009765625,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.38068619892058597,
|
|
"grad_norm": 0.5315442085266113,
|
|
"learning_rate": 1.9678839863753336e-05,
|
|
"loss": 0.012463277578353882,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.3855050115651503,
|
|
"grad_norm": 1.2004033327102661,
|
|
"learning_rate": 1.966534927995864e-05,
|
|
"loss": 0.012470932006835937,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.39032382420971473,
|
|
"grad_norm": 0.1404499113559723,
|
|
"learning_rate": 1.9651585971595494e-05,
|
|
"loss": 0.012283198833465576,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.3951426368542791,
|
|
"grad_norm": 0.4698476791381836,
|
|
"learning_rate": 1.9637550327019488e-05,
|
|
"loss": 0.012424596548080445,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.3999614494988435,
|
|
"grad_norm": 0.11542811989784241,
|
|
"learning_rate": 1.9623242742270635e-05,
|
|
"loss": 0.012442626953125,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.40478026214340784,
|
|
"grad_norm": 0.309451699256897,
|
|
"learning_rate": 1.9608663621062222e-05,
|
|
"loss": 0.011946996450424194,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.40959907478797225,
|
|
"grad_norm": 0.17752103507518768,
|
|
"learning_rate": 1.9593813374769396e-05,
|
|
"loss": 0.012281291484832764,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.4144178874325366,
|
|
"grad_norm": 0.9642850160598755,
|
|
"learning_rate": 1.9578692422417578e-05,
|
|
"loss": 0.011960487365722656,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.419236700077101,
|
|
"grad_norm": 0.38358408212661743,
|
|
"learning_rate": 1.9563301190670625e-05,
|
|
"loss": 0.012084554433822631,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.42405551272166536,
|
|
"grad_norm": 0.3695308566093445,
|
|
"learning_rate": 1.954764011381879e-05,
|
|
"loss": 0.011912307739257812,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.42887432536622977,
|
|
"grad_norm": 0.1504460573196411,
|
|
"learning_rate": 1.9531709633766486e-05,
|
|
"loss": 0.0125970721244812,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.4336931380107941,
|
|
"grad_norm": 0.7031795382499695,
|
|
"learning_rate": 1.951551020001979e-05,
|
|
"loss": 0.011779887676239014,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.43851195065535853,
|
|
"grad_norm": 0.9349226355552673,
|
|
"learning_rate": 1.9499042269673785e-05,
|
|
"loss": 0.012475408315658569,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.4433307632999229,
|
|
"grad_norm": 1.299560308456421,
|
|
"learning_rate": 1.9482306307399642e-05,
|
|
"loss": 0.012063064575195313,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.4481495759444873,
|
|
"grad_norm": 0.3574764132499695,
|
|
"learning_rate": 1.9465302785431518e-05,
|
|
"loss": 0.012038066387176513,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.45296838858905164,
|
|
"grad_norm": 0.39971208572387695,
|
|
"learning_rate": 1.9448032183553237e-05,
|
|
"loss": 0.012331008911132812,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.45778720123361605,
|
|
"grad_norm": 0.7214897274971008,
|
|
"learning_rate": 1.9430494989084733e-05,
|
|
"loss": 0.012144622802734375,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.4626060138781804,
|
|
"grad_norm": 0.5722167491912842,
|
|
"learning_rate": 1.9412691696868327e-05,
|
|
"loss": 0.012291641235351562,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.4674248265227448,
|
|
"grad_norm": 0.193996861577034,
|
|
"learning_rate": 1.9394622809254735e-05,
|
|
"loss": 0.012149810791015625,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.47224363916730916,
|
|
"grad_norm": 0.7098087072372437,
|
|
"learning_rate": 1.9376288836088916e-05,
|
|
"loss": 0.012204360961914063,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.47706245181187357,
|
|
"grad_norm": 0.5333502292633057,
|
|
"learning_rate": 1.9357690294695673e-05,
|
|
"loss": 0.012274196147918701,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.4818812644564379,
|
|
"grad_norm": 0.13141588866710663,
|
|
"learning_rate": 1.9338827709865064e-05,
|
|
"loss": 0.012214864492416383,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.4867000771010023,
|
|
"grad_norm": 0.2113286256790161,
|
|
"learning_rate": 1.9319701613837577e-05,
|
|
"loss": 0.012197240591049194,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.4915188897455667,
|
|
"grad_norm": 0.7589179873466492,
|
|
"learning_rate": 1.9300312546289144e-05,
|
|
"loss": 0.012238616943359376,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.4963377023901311,
|
|
"grad_norm": 0.47019609808921814,
|
|
"learning_rate": 1.928066105431588e-05,
|
|
"loss": 0.012136790752410889,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.4999036237471087,
|
|
"eval_loss": 0.007605554535984993,
|
|
"eval_mae": 0.06400828063488007,
|
|
"eval_mse": 0.007605642545968294,
|
|
"eval_runtime": 4371.2139,
|
|
"eval_samples_per_second": 105.223,
|
|
"eval_spearman": 0.9365585006265831,
|
|
"eval_steps_per_second": 0.548,
|
|
"step": 5187
|
|
}
|
|
],
|
|
"logging_steps": 50,
|
|
"max_steps": 31128,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 5187,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3.704276921907937e+19,
|
|
"train_batch_size": 32,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|