16315 lines
422 KiB
JSON
16315 lines
422 KiB
JSON
{
|
|
"best_global_step": 1380,
|
|
"best_metric": 0.6777992248535156,
|
|
"best_model_checkpoint": "saves/qwen3-4B/Qwen3-4B-SFT-science-1e-5/checkpoint-1380",
|
|
"epoch": 3.0,
|
|
"eval_steps": 230,
|
|
"global_step": 2313,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0012976480129764801,
|
|
"grad_norm": 8.159334182739258,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.117659091949463,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0025952960259529602,
|
|
"grad_norm": 7.678379535675049,
|
|
"learning_rate": 8.620689655172414e-08,
|
|
"loss": 1.0263863801956177,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0038929440389294406,
|
|
"grad_norm": 8.245121002197266,
|
|
"learning_rate": 1.7241379310344828e-07,
|
|
"loss": 1.1220793724060059,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0051905920519059205,
|
|
"grad_norm": 8.546252250671387,
|
|
"learning_rate": 2.5862068965517245e-07,
|
|
"loss": 1.18021821975708,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.006488240064882401,
|
|
"grad_norm": 7.886499404907227,
|
|
"learning_rate": 3.4482758620689656e-07,
|
|
"loss": 1.107445240020752,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.007785888077858881,
|
|
"grad_norm": 10.850175857543945,
|
|
"learning_rate": 4.3103448275862073e-07,
|
|
"loss": 1.099359393119812,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.009083536090835361,
|
|
"grad_norm": 8.097647666931152,
|
|
"learning_rate": 5.172413793103449e-07,
|
|
"loss": 1.0631245374679565,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.010381184103811841,
|
|
"grad_norm": 7.725368022918701,
|
|
"learning_rate": 6.034482758620691e-07,
|
|
"loss": 1.0364526510238647,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.01167883211678832,
|
|
"grad_norm": 7.084433555603027,
|
|
"learning_rate": 6.896551724137931e-07,
|
|
"loss": 0.977345883846283,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.012976480129764802,
|
|
"grad_norm": 7.370170593261719,
|
|
"learning_rate": 7.758620689655173e-07,
|
|
"loss": 1.0759401321411133,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.014274128142741281,
|
|
"grad_norm": 7.056736469268799,
|
|
"learning_rate": 8.620689655172415e-07,
|
|
"loss": 1.051821231842041,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.015571776155717762,
|
|
"grad_norm": 6.129208564758301,
|
|
"learning_rate": 9.482758620689655e-07,
|
|
"loss": 1.008002758026123,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.01686942416869424,
|
|
"grad_norm": 6.331120491027832,
|
|
"learning_rate": 1.0344827586206898e-06,
|
|
"loss": 1.0412415266036987,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.018167072181670723,
|
|
"grad_norm": 5.9186625480651855,
|
|
"learning_rate": 1.120689655172414e-06,
|
|
"loss": 1.0198311805725098,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.019464720194647202,
|
|
"grad_norm": 5.27198600769043,
|
|
"learning_rate": 1.2068965517241381e-06,
|
|
"loss": 1.0152095556259155,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.020762368207623682,
|
|
"grad_norm": 4.562581539154053,
|
|
"learning_rate": 1.2931034482758623e-06,
|
|
"loss": 0.9857317805290222,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.02206001622060016,
|
|
"grad_norm": 4.586100101470947,
|
|
"learning_rate": 1.3793103448275862e-06,
|
|
"loss": 1.0225930213928223,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.02335766423357664,
|
|
"grad_norm": 4.058810234069824,
|
|
"learning_rate": 1.4655172413793104e-06,
|
|
"loss": 0.9823198318481445,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.024655312246553124,
|
|
"grad_norm": 4.066655158996582,
|
|
"learning_rate": 1.5517241379310346e-06,
|
|
"loss": 0.9863596558570862,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.025952960259529603,
|
|
"grad_norm": 3.7554173469543457,
|
|
"learning_rate": 1.6379310344827587e-06,
|
|
"loss": 0.9025828838348389,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.027250608272506083,
|
|
"grad_norm": 2.6631460189819336,
|
|
"learning_rate": 1.724137931034483e-06,
|
|
"loss": 0.9907147288322449,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.028548256285482562,
|
|
"grad_norm": 2.3198695182800293,
|
|
"learning_rate": 1.810344827586207e-06,
|
|
"loss": 0.872843861579895,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.02984590429845904,
|
|
"grad_norm": 2.0851941108703613,
|
|
"learning_rate": 1.896551724137931e-06,
|
|
"loss": 0.896687388420105,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.031143552311435525,
|
|
"grad_norm": 1.9391196966171265,
|
|
"learning_rate": 1.982758620689655e-06,
|
|
"loss": 0.8471081852912903,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.032441200324412,
|
|
"grad_norm": 1.705809473991394,
|
|
"learning_rate": 2.0689655172413796e-06,
|
|
"loss": 0.8402453064918518,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.03373884833738848,
|
|
"grad_norm": 1.6870861053466797,
|
|
"learning_rate": 2.1551724137931035e-06,
|
|
"loss": 0.8823003768920898,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.035036496350364967,
|
|
"grad_norm": 1.63539719581604,
|
|
"learning_rate": 2.241379310344828e-06,
|
|
"loss": 0.8520861864089966,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.036334144363341446,
|
|
"grad_norm": 1.215566873550415,
|
|
"learning_rate": 2.327586206896552e-06,
|
|
"loss": 0.8638472557067871,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.037631792376317925,
|
|
"grad_norm": 1.3506380319595337,
|
|
"learning_rate": 2.4137931034482762e-06,
|
|
"loss": 0.8670657277107239,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.038929440389294405,
|
|
"grad_norm": 1.3755369186401367,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.8863908648490906,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.040227088402270884,
|
|
"grad_norm": 1.3734447956085205,
|
|
"learning_rate": 2.5862068965517246e-06,
|
|
"loss": 0.8129177093505859,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.041524736415247364,
|
|
"grad_norm": 1.294492244720459,
|
|
"learning_rate": 2.672413793103448e-06,
|
|
"loss": 0.8296308517456055,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.04282238442822384,
|
|
"grad_norm": 1.0568984746932983,
|
|
"learning_rate": 2.7586206896551725e-06,
|
|
"loss": 0.8287128210067749,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.04412003244120032,
|
|
"grad_norm": 0.9133521914482117,
|
|
"learning_rate": 2.844827586206897e-06,
|
|
"loss": 0.7776259183883667,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.0454176804541768,
|
|
"grad_norm": 0.9421447515487671,
|
|
"learning_rate": 2.931034482758621e-06,
|
|
"loss": 0.8422408103942871,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.04671532846715328,
|
|
"grad_norm": 0.9022809863090515,
|
|
"learning_rate": 3.017241379310345e-06,
|
|
"loss": 0.8629512190818787,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.04801297648012977,
|
|
"grad_norm": 0.780587375164032,
|
|
"learning_rate": 3.103448275862069e-06,
|
|
"loss": 0.781667947769165,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.04931062449310625,
|
|
"grad_norm": 0.7616261839866638,
|
|
"learning_rate": 3.1896551724137935e-06,
|
|
"loss": 0.7705612182617188,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05060827250608273,
|
|
"grad_norm": 0.7669604420661926,
|
|
"learning_rate": 3.2758620689655175e-06,
|
|
"loss": 0.8600028157234192,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.05190592051905921,
|
|
"grad_norm": 0.8013553619384766,
|
|
"learning_rate": 3.362068965517242e-06,
|
|
"loss": 0.8316032886505127,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.053203568532035686,
|
|
"grad_norm": 0.760819673538208,
|
|
"learning_rate": 3.448275862068966e-06,
|
|
"loss": 0.8170580863952637,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.054501216545012166,
|
|
"grad_norm": 0.719124436378479,
|
|
"learning_rate": 3.5344827586206898e-06,
|
|
"loss": 0.7726640701293945,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.055798864557988645,
|
|
"grad_norm": 0.7333022952079773,
|
|
"learning_rate": 3.620689655172414e-06,
|
|
"loss": 0.7847077250480652,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.057096512570965124,
|
|
"grad_norm": 0.7520370483398438,
|
|
"learning_rate": 3.7068965517241385e-06,
|
|
"loss": 0.7839537858963013,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.058394160583941604,
|
|
"grad_norm": 0.7901465892791748,
|
|
"learning_rate": 3.793103448275862e-06,
|
|
"loss": 0.8387829065322876,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.05969180859691808,
|
|
"grad_norm": 0.7442818284034729,
|
|
"learning_rate": 3.8793103448275865e-06,
|
|
"loss": 0.7767361402511597,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.06098945660989456,
|
|
"grad_norm": 0.6601076722145081,
|
|
"learning_rate": 3.96551724137931e-06,
|
|
"loss": 0.7320765256881714,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.06228710462287105,
|
|
"grad_norm": 0.6948726773262024,
|
|
"learning_rate": 4.051724137931034e-06,
|
|
"loss": 0.8290716409683228,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.06358475263584752,
|
|
"grad_norm": 0.6669663190841675,
|
|
"learning_rate": 4.137931034482759e-06,
|
|
"loss": 0.7917401790618896,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.064882400648824,
|
|
"grad_norm": 0.6616993546485901,
|
|
"learning_rate": 4.224137931034483e-06,
|
|
"loss": 0.7917563915252686,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.06618004866180048,
|
|
"grad_norm": 0.6595159769058228,
|
|
"learning_rate": 4.310344827586207e-06,
|
|
"loss": 0.7899826765060425,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.06747769667477696,
|
|
"grad_norm": 0.6776856184005737,
|
|
"learning_rate": 4.396551724137931e-06,
|
|
"loss": 0.8258700370788574,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.06877534468775345,
|
|
"grad_norm": 0.7086785435676575,
|
|
"learning_rate": 4.482758620689656e-06,
|
|
"loss": 0.8281067609786987,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.07007299270072993,
|
|
"grad_norm": 0.6362385153770447,
|
|
"learning_rate": 4.56896551724138e-06,
|
|
"loss": 0.7703720331192017,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.07137064071370641,
|
|
"grad_norm": 1.0633333921432495,
|
|
"learning_rate": 4.655172413793104e-06,
|
|
"loss": 0.7698659896850586,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.07266828872668289,
|
|
"grad_norm": 0.6450533270835876,
|
|
"learning_rate": 4.741379310344828e-06,
|
|
"loss": 0.7988396286964417,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.07396593673965937,
|
|
"grad_norm": 0.6176488995552063,
|
|
"learning_rate": 4.8275862068965525e-06,
|
|
"loss": 0.7486166954040527,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.07526358475263585,
|
|
"grad_norm": 0.6564953327178955,
|
|
"learning_rate": 4.9137931034482765e-06,
|
|
"loss": 0.8380484580993652,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.07656123276561233,
|
|
"grad_norm": 1.4383426904678345,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.8178592920303345,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.07785888077858881,
|
|
"grad_norm": 0.6065345406532288,
|
|
"learning_rate": 5.086206896551724e-06,
|
|
"loss": 0.7402592897415161,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.07915652879156529,
|
|
"grad_norm": 0.6361149549484253,
|
|
"learning_rate": 5.172413793103449e-06,
|
|
"loss": 0.7722781300544739,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.08045417680454177,
|
|
"grad_norm": 0.6287536025047302,
|
|
"learning_rate": 5.258620689655173e-06,
|
|
"loss": 0.8166277408599854,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.08175182481751825,
|
|
"grad_norm": 0.6238293051719666,
|
|
"learning_rate": 5.344827586206896e-06,
|
|
"loss": 0.7863017320632935,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.08304947283049473,
|
|
"grad_norm": 0.6116371750831604,
|
|
"learning_rate": 5.431034482758621e-06,
|
|
"loss": 0.8139179944992065,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.08434712084347121,
|
|
"grad_norm": 0.6211651563644409,
|
|
"learning_rate": 5.517241379310345e-06,
|
|
"loss": 0.802246630191803,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.08564476885644769,
|
|
"grad_norm": 0.6179801821708679,
|
|
"learning_rate": 5.603448275862069e-06,
|
|
"loss": 0.8019924163818359,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.08694241686942417,
|
|
"grad_norm": 0.6304736733436584,
|
|
"learning_rate": 5.689655172413794e-06,
|
|
"loss": 0.797938346862793,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.08824006488240065,
|
|
"grad_norm": 0.5991215705871582,
|
|
"learning_rate": 5.775862068965518e-06,
|
|
"loss": 0.7311227917671204,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.08953771289537713,
|
|
"grad_norm": 0.6336483955383301,
|
|
"learning_rate": 5.862068965517242e-06,
|
|
"loss": 0.8222885131835938,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.0908353609083536,
|
|
"grad_norm": 0.6269424557685852,
|
|
"learning_rate": 5.9482758620689665e-06,
|
|
"loss": 0.7962170839309692,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.09213300892133008,
|
|
"grad_norm": 0.6373898983001709,
|
|
"learning_rate": 6.03448275862069e-06,
|
|
"loss": 0.8021715879440308,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.09343065693430656,
|
|
"grad_norm": 0.6345935463905334,
|
|
"learning_rate": 6.1206896551724135e-06,
|
|
"loss": 0.8776074647903442,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.09472830494728304,
|
|
"grad_norm": 0.6083796620368958,
|
|
"learning_rate": 6.206896551724138e-06,
|
|
"loss": 0.7513650059700012,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.09602595296025954,
|
|
"grad_norm": 0.6068538427352905,
|
|
"learning_rate": 6.293103448275862e-06,
|
|
"loss": 0.7684041857719421,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.09732360097323602,
|
|
"grad_norm": 0.6176103949546814,
|
|
"learning_rate": 6.379310344827587e-06,
|
|
"loss": 0.7645843029022217,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.0986212489862125,
|
|
"grad_norm": 0.6182767152786255,
|
|
"learning_rate": 6.465517241379311e-06,
|
|
"loss": 0.8177169561386108,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.09991889699918897,
|
|
"grad_norm": 0.6175945997238159,
|
|
"learning_rate": 6.551724137931035e-06,
|
|
"loss": 0.7822265625,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.10121654501216545,
|
|
"grad_norm": 0.6050496101379395,
|
|
"learning_rate": 6.63793103448276e-06,
|
|
"loss": 0.7576093673706055,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.10251419302514193,
|
|
"grad_norm": 0.7123962640762329,
|
|
"learning_rate": 6.724137931034484e-06,
|
|
"loss": 0.8231764435768127,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.10381184103811841,
|
|
"grad_norm": 0.61634361743927,
|
|
"learning_rate": 6.810344827586207e-06,
|
|
"loss": 0.7479314804077148,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.10510948905109489,
|
|
"grad_norm": 0.5944046378135681,
|
|
"learning_rate": 6.896551724137932e-06,
|
|
"loss": 0.7602187395095825,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.10640713706407137,
|
|
"grad_norm": 1.9641212224960327,
|
|
"learning_rate": 6.982758620689656e-06,
|
|
"loss": 0.7291417121887207,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.10770478507704785,
|
|
"grad_norm": 0.6604083776473999,
|
|
"learning_rate": 7.0689655172413796e-06,
|
|
"loss": 0.7462600469589233,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.10900243309002433,
|
|
"grad_norm": 0.6202764511108398,
|
|
"learning_rate": 7.155172413793104e-06,
|
|
"loss": 0.8041630983352661,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.11030008110300081,
|
|
"grad_norm": 0.6278896331787109,
|
|
"learning_rate": 7.241379310344828e-06,
|
|
"loss": 0.7589733600616455,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.11159772911597729,
|
|
"grad_norm": 0.5918757915496826,
|
|
"learning_rate": 7.327586206896552e-06,
|
|
"loss": 0.7733231782913208,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.11289537712895377,
|
|
"grad_norm": 0.6275747418403625,
|
|
"learning_rate": 7.413793103448277e-06,
|
|
"loss": 0.7821832299232483,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.11419302514193025,
|
|
"grad_norm": 0.5935595631599426,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 0.7410198450088501,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.11549067315490673,
|
|
"grad_norm": 0.6088429093360901,
|
|
"learning_rate": 7.586206896551724e-06,
|
|
"loss": 0.78556227684021,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.11678832116788321,
|
|
"grad_norm": 0.6014888286590576,
|
|
"learning_rate": 7.672413793103449e-06,
|
|
"loss": 0.7850443124771118,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.11808596918085969,
|
|
"grad_norm": 0.644192636013031,
|
|
"learning_rate": 7.758620689655173e-06,
|
|
"loss": 0.7852208614349365,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.11938361719383617,
|
|
"grad_norm": 0.6681314706802368,
|
|
"learning_rate": 7.844827586206897e-06,
|
|
"loss": 0.8286585211753845,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.12068126520681265,
|
|
"grad_norm": 0.6156536936759949,
|
|
"learning_rate": 7.93103448275862e-06,
|
|
"loss": 0.7740339040756226,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.12197891321978913,
|
|
"grad_norm": 0.5617393255233765,
|
|
"learning_rate": 8.017241379310345e-06,
|
|
"loss": 0.7133764028549194,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.12327656123276562,
|
|
"grad_norm": 0.6284353733062744,
|
|
"learning_rate": 8.103448275862069e-06,
|
|
"loss": 0.8572052121162415,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.1245742092457421,
|
|
"grad_norm": 0.6048849821090698,
|
|
"learning_rate": 8.189655172413794e-06,
|
|
"loss": 0.7354931831359863,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.12587185725871858,
|
|
"grad_norm": 0.717276930809021,
|
|
"learning_rate": 8.275862068965518e-06,
|
|
"loss": 0.7633223533630371,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.12716950527169504,
|
|
"grad_norm": 0.5850024223327637,
|
|
"learning_rate": 8.362068965517242e-06,
|
|
"loss": 0.7660566568374634,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.12846715328467154,
|
|
"grad_norm": 0.6040444374084473,
|
|
"learning_rate": 8.448275862068966e-06,
|
|
"loss": 0.687772274017334,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.129764801297648,
|
|
"grad_norm": 0.635793924331665,
|
|
"learning_rate": 8.53448275862069e-06,
|
|
"loss": 0.8365746736526489,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.1310624493106245,
|
|
"grad_norm": 0.681013822555542,
|
|
"learning_rate": 8.620689655172414e-06,
|
|
"loss": 0.8097229599952698,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.13236009732360096,
|
|
"grad_norm": 0.5976776480674744,
|
|
"learning_rate": 8.706896551724138e-06,
|
|
"loss": 0.7460197806358337,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.13365774533657745,
|
|
"grad_norm": 0.5931348204612732,
|
|
"learning_rate": 8.793103448275862e-06,
|
|
"loss": 0.7234000563621521,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.13495539334955392,
|
|
"grad_norm": 0.6787912845611572,
|
|
"learning_rate": 8.879310344827588e-06,
|
|
"loss": 0.8100739121437073,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.1362530413625304,
|
|
"grad_norm": 0.6532299518585205,
|
|
"learning_rate": 8.965517241379312e-06,
|
|
"loss": 0.8400453925132751,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.1375506893755069,
|
|
"grad_norm": 0.6569010615348816,
|
|
"learning_rate": 9.051724137931036e-06,
|
|
"loss": 0.8247137069702148,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.13884833738848337,
|
|
"grad_norm": 0.6199808716773987,
|
|
"learning_rate": 9.13793103448276e-06,
|
|
"loss": 0.7428423166275024,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.14014598540145987,
|
|
"grad_norm": 0.6075517535209656,
|
|
"learning_rate": 9.224137931034484e-06,
|
|
"loss": 0.7575728893280029,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.14144363341443633,
|
|
"grad_norm": 0.6420115232467651,
|
|
"learning_rate": 9.310344827586207e-06,
|
|
"loss": 0.8051052093505859,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.14274128142741282,
|
|
"grad_norm": 0.6138091683387756,
|
|
"learning_rate": 9.396551724137931e-06,
|
|
"loss": 0.8522422313690186,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.1440389294403893,
|
|
"grad_norm": 0.650187075138092,
|
|
"learning_rate": 9.482758620689655e-06,
|
|
"loss": 0.8301827907562256,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.14533657745336578,
|
|
"grad_norm": 0.6030973196029663,
|
|
"learning_rate": 9.56896551724138e-06,
|
|
"loss": 0.7207387089729309,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.14663422546634225,
|
|
"grad_norm": 0.622131884098053,
|
|
"learning_rate": 9.655172413793105e-06,
|
|
"loss": 0.7915451526641846,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.14793187347931874,
|
|
"grad_norm": 0.6085039377212524,
|
|
"learning_rate": 9.741379310344829e-06,
|
|
"loss": 0.7769342064857483,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.1492295214922952,
|
|
"grad_norm": 0.6578651666641235,
|
|
"learning_rate": 9.827586206896553e-06,
|
|
"loss": 0.7566852569580078,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.1505271695052717,
|
|
"grad_norm": 0.6066433787345886,
|
|
"learning_rate": 9.913793103448277e-06,
|
|
"loss": 0.7825925350189209,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.15182481751824817,
|
|
"grad_norm": 0.6409288644790649,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.8247882127761841,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.15312246553122466,
|
|
"grad_norm": 0.6675072312355042,
|
|
"learning_rate": 9.99999488813276e-06,
|
|
"loss": 0.8096261024475098,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.15442011354420113,
|
|
"grad_norm": 0.6444228887557983,
|
|
"learning_rate": 9.999979552541496e-06,
|
|
"loss": 0.7326732873916626,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.15571776155717762,
|
|
"grad_norm": 0.6155293583869934,
|
|
"learning_rate": 9.99995399325756e-06,
|
|
"loss": 0.7694077491760254,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.15701540957015409,
|
|
"grad_norm": 0.6370646953582764,
|
|
"learning_rate": 9.999918210333219e-06,
|
|
"loss": 0.8235340118408203,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.15831305758313058,
|
|
"grad_norm": 0.6056079864501953,
|
|
"learning_rate": 9.999872203841635e-06,
|
|
"loss": 0.7498428821563721,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.15961070559610704,
|
|
"grad_norm": 0.6514161825180054,
|
|
"learning_rate": 9.999815973876888e-06,
|
|
"loss": 0.772469162940979,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.16090835360908354,
|
|
"grad_norm": 0.6417706608772278,
|
|
"learning_rate": 9.999749520553945e-06,
|
|
"loss": 0.8074150085449219,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.16220600162206,
|
|
"grad_norm": 0.6162619590759277,
|
|
"learning_rate": 9.99967284400869e-06,
|
|
"loss": 0.7649105191230774,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.1635036496350365,
|
|
"grad_norm": 0.6231618523597717,
|
|
"learning_rate": 9.99958594439791e-06,
|
|
"loss": 0.7435484528541565,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.164801297648013,
|
|
"grad_norm": 0.6211341023445129,
|
|
"learning_rate": 9.999488821899286e-06,
|
|
"loss": 0.7700725793838501,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.16609894566098946,
|
|
"grad_norm": 0.6546758413314819,
|
|
"learning_rate": 9.999381476711416e-06,
|
|
"loss": 0.7208442091941833,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.16739659367396595,
|
|
"grad_norm": 0.6165010333061218,
|
|
"learning_rate": 9.999263909053789e-06,
|
|
"loss": 0.7380815148353577,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.16869424168694241,
|
|
"grad_norm": 0.7457146048545837,
|
|
"learning_rate": 9.999136119166803e-06,
|
|
"loss": 0.7085788249969482,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.1699918896999189,
|
|
"grad_norm": 0.6893863677978516,
|
|
"learning_rate": 9.998998107311758e-06,
|
|
"loss": 0.8248496055603027,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.17128953771289537,
|
|
"grad_norm": 0.6099883317947388,
|
|
"learning_rate": 9.998849873770849e-06,
|
|
"loss": 0.7661588191986084,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.17258718572587187,
|
|
"grad_norm": 0.5964142084121704,
|
|
"learning_rate": 9.998691418847177e-06,
|
|
"loss": 0.7037764191627502,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.17388483373884833,
|
|
"grad_norm": 0.6277547478675842,
|
|
"learning_rate": 9.998522742864745e-06,
|
|
"loss": 0.8015055060386658,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.17518248175182483,
|
|
"grad_norm": 0.6385223865509033,
|
|
"learning_rate": 9.998343846168448e-06,
|
|
"loss": 0.7598564028739929,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.1764801297648013,
|
|
"grad_norm": 0.6057168245315552,
|
|
"learning_rate": 9.998154729124092e-06,
|
|
"loss": 0.7190810441970825,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.17777777777777778,
|
|
"grad_norm": 0.6524573564529419,
|
|
"learning_rate": 9.997955392118365e-06,
|
|
"loss": 0.7655267715454102,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.17907542579075425,
|
|
"grad_norm": 0.593307614326477,
|
|
"learning_rate": 9.997745835558867e-06,
|
|
"loss": 0.6991128921508789,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.18037307380373074,
|
|
"grad_norm": 0.6667762994766235,
|
|
"learning_rate": 9.997526059874086e-06,
|
|
"loss": 0.7836197018623352,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.1816707218167072,
|
|
"grad_norm": 0.6364095211029053,
|
|
"learning_rate": 9.997296065513405e-06,
|
|
"loss": 0.7866847515106201,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.1829683698296837,
|
|
"grad_norm": 0.6693204641342163,
|
|
"learning_rate": 9.997055852947109e-06,
|
|
"loss": 0.8498630523681641,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.18426601784266017,
|
|
"grad_norm": 0.6703641414642334,
|
|
"learning_rate": 9.996805422666367e-06,
|
|
"loss": 0.7902424335479736,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.18556366585563666,
|
|
"grad_norm": 0.6226605772972107,
|
|
"learning_rate": 9.99654477518325e-06,
|
|
"loss": 0.7982854843139648,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.18686131386861313,
|
|
"grad_norm": 0.5963988304138184,
|
|
"learning_rate": 9.996273911030714e-06,
|
|
"loss": 0.7364012598991394,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.18815896188158962,
|
|
"grad_norm": 3.2399189472198486,
|
|
"learning_rate": 9.995992830762608e-06,
|
|
"loss": 0.8748813271522522,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.18945660989456609,
|
|
"grad_norm": 0.6035348773002625,
|
|
"learning_rate": 9.99570153495367e-06,
|
|
"loss": 0.7249287366867065,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.19075425790754258,
|
|
"grad_norm": 0.6258792877197266,
|
|
"learning_rate": 9.995400024199526e-06,
|
|
"loss": 0.7734540700912476,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.19205190592051907,
|
|
"grad_norm": 0.6568045020103455,
|
|
"learning_rate": 9.99508829911669e-06,
|
|
"loss": 0.8293142318725586,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.19334955393349554,
|
|
"grad_norm": 0.8624785542488098,
|
|
"learning_rate": 9.994766360342557e-06,
|
|
"loss": 0.8258950710296631,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.19464720194647203,
|
|
"grad_norm": 0.591865599155426,
|
|
"learning_rate": 9.994434208535415e-06,
|
|
"loss": 0.7743998765945435,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.1959448499594485,
|
|
"grad_norm": 0.6273242831230164,
|
|
"learning_rate": 9.994091844374431e-06,
|
|
"loss": 0.8304177522659302,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.197242497972425,
|
|
"grad_norm": 0.6169039011001587,
|
|
"learning_rate": 9.993739268559648e-06,
|
|
"loss": 0.8317509889602661,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.19854014598540146,
|
|
"grad_norm": 0.6500508785247803,
|
|
"learning_rate": 9.993376481812001e-06,
|
|
"loss": 0.8074177503585815,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.19983779399837795,
|
|
"grad_norm": 0.691698431968689,
|
|
"learning_rate": 9.99300348487329e-06,
|
|
"loss": 0.7966357469558716,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.20113544201135442,
|
|
"grad_norm": 0.6341277956962585,
|
|
"learning_rate": 9.992620278506203e-06,
|
|
"loss": 0.7922544479370117,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.2024330900243309,
|
|
"grad_norm": 0.5936447381973267,
|
|
"learning_rate": 9.9922268634943e-06,
|
|
"loss": 0.6732587218284607,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.20373073803730737,
|
|
"grad_norm": 0.6575024127960205,
|
|
"learning_rate": 9.991823240642014e-06,
|
|
"loss": 0.8733258247375488,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.20502838605028387,
|
|
"grad_norm": 0.6686046719551086,
|
|
"learning_rate": 9.991409410774654e-06,
|
|
"loss": 0.790815532207489,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.20632603406326033,
|
|
"grad_norm": 1.4253793954849243,
|
|
"learning_rate": 9.990985374738396e-06,
|
|
"loss": 0.7325870990753174,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.20762368207623683,
|
|
"grad_norm": 0.6524296998977661,
|
|
"learning_rate": 9.990551133400284e-06,
|
|
"loss": 0.7516152858734131,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.2089213300892133,
|
|
"grad_norm": 0.6569153666496277,
|
|
"learning_rate": 9.990106687648234e-06,
|
|
"loss": 0.7317984104156494,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.21021897810218979,
|
|
"grad_norm": 0.5729793906211853,
|
|
"learning_rate": 9.989652038391025e-06,
|
|
"loss": 0.7050694227218628,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.21151662611516625,
|
|
"grad_norm": 0.5924677848815918,
|
|
"learning_rate": 9.9891871865583e-06,
|
|
"loss": 0.7387759685516357,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.21281427412814274,
|
|
"grad_norm": 0.9845248460769653,
|
|
"learning_rate": 9.988712133100563e-06,
|
|
"loss": 0.8402718305587769,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.2141119221411192,
|
|
"grad_norm": 0.6559567451477051,
|
|
"learning_rate": 9.988226878989178e-06,
|
|
"loss": 0.7516730427742004,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.2154095701540957,
|
|
"grad_norm": 0.603742778301239,
|
|
"learning_rate": 9.987731425216364e-06,
|
|
"loss": 0.6687497496604919,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.21670721816707217,
|
|
"grad_norm": 0.6345369815826416,
|
|
"learning_rate": 9.987225772795204e-06,
|
|
"loss": 0.8063400387763977,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.21800486618004866,
|
|
"grad_norm": 0.6372174024581909,
|
|
"learning_rate": 9.986709922759626e-06,
|
|
"loss": 0.7703537940979004,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.21930251419302516,
|
|
"grad_norm": 0.607814371585846,
|
|
"learning_rate": 9.986183876164412e-06,
|
|
"loss": 0.6834731101989746,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.22060016220600162,
|
|
"grad_norm": 0.5630145072937012,
|
|
"learning_rate": 9.985647634085197e-06,
|
|
"loss": 0.7261765599250793,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.22189781021897811,
|
|
"grad_norm": 0.6719157695770264,
|
|
"learning_rate": 9.985101197618456e-06,
|
|
"loss": 0.7341983318328857,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.22319545823195458,
|
|
"grad_norm": 0.6283457279205322,
|
|
"learning_rate": 9.98454456788152e-06,
|
|
"loss": 0.7351614832878113,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.22449310624493107,
|
|
"grad_norm": 0.6344905495643616,
|
|
"learning_rate": 9.983977746012547e-06,
|
|
"loss": 0.7843720316886902,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.22579075425790754,
|
|
"grad_norm": 0.605237603187561,
|
|
"learning_rate": 9.983400733170553e-06,
|
|
"loss": 0.7114173769950867,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.22708840227088403,
|
|
"grad_norm": 0.626672089099884,
|
|
"learning_rate": 9.982813530535377e-06,
|
|
"loss": 0.7024215459823608,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.2283860502838605,
|
|
"grad_norm": 0.6185852885246277,
|
|
"learning_rate": 9.982216139307705e-06,
|
|
"loss": 0.8043787479400635,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.229683698296837,
|
|
"grad_norm": 0.5857049226760864,
|
|
"learning_rate": 9.981608560709044e-06,
|
|
"loss": 0.6755383014678955,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.23098134630981346,
|
|
"grad_norm": 0.6019972562789917,
|
|
"learning_rate": 9.980990795981747e-06,
|
|
"loss": 0.7932974100112915,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.23227899432278995,
|
|
"grad_norm": 0.6226310729980469,
|
|
"learning_rate": 9.980362846388978e-06,
|
|
"loss": 0.784454882144928,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.23357664233576642,
|
|
"grad_norm": 0.643936812877655,
|
|
"learning_rate": 9.97972471321474e-06,
|
|
"loss": 0.768436849117279,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.2348742903487429,
|
|
"grad_norm": 0.629254162311554,
|
|
"learning_rate": 9.979076397763853e-06,
|
|
"loss": 0.7261864542961121,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.23617193836171937,
|
|
"grad_norm": 0.6138353943824768,
|
|
"learning_rate": 9.978417901361958e-06,
|
|
"loss": 0.8290830254554749,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.23746958637469587,
|
|
"grad_norm": 0.6166982054710388,
|
|
"learning_rate": 9.977749225355513e-06,
|
|
"loss": 0.7295878529548645,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.23876723438767233,
|
|
"grad_norm": 0.5729910731315613,
|
|
"learning_rate": 9.977070371111793e-06,
|
|
"loss": 0.7391046285629272,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.24006488240064883,
|
|
"grad_norm": 0.6283906102180481,
|
|
"learning_rate": 9.976381340018879e-06,
|
|
"loss": 0.7741225957870483,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2413625304136253,
|
|
"grad_norm": 0.5742847919464111,
|
|
"learning_rate": 9.97568213348567e-06,
|
|
"loss": 0.7565523386001587,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.24266017842660179,
|
|
"grad_norm": 0.5885831713676453,
|
|
"learning_rate": 9.974972752941861e-06,
|
|
"loss": 0.7079343199729919,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.24395782643957825,
|
|
"grad_norm": 0.6233158707618713,
|
|
"learning_rate": 9.97425319983796e-06,
|
|
"loss": 0.802773118019104,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.24525547445255474,
|
|
"grad_norm": 0.6107950210571289,
|
|
"learning_rate": 9.97352347564527e-06,
|
|
"loss": 0.7514665126800537,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.24655312246553124,
|
|
"grad_norm": 0.6127108335494995,
|
|
"learning_rate": 9.972783581855894e-06,
|
|
"loss": 0.766715943813324,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2478507704785077,
|
|
"grad_norm": 0.5911589860916138,
|
|
"learning_rate": 9.972033519982722e-06,
|
|
"loss": 0.719687283039093,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.2491484184914842,
|
|
"grad_norm": 0.7104600071907043,
|
|
"learning_rate": 9.971273291559447e-06,
|
|
"loss": 0.7840068340301514,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.25044606650446066,
|
|
"grad_norm": 1.2322938442230225,
|
|
"learning_rate": 9.97050289814054e-06,
|
|
"loss": 0.7457755208015442,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.25174371451743716,
|
|
"grad_norm": 0.568343460559845,
|
|
"learning_rate": 9.969722341301261e-06,
|
|
"loss": 0.6806910037994385,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.25304136253041365,
|
|
"grad_norm": 0.6099660396575928,
|
|
"learning_rate": 9.968931622637652e-06,
|
|
"loss": 0.7885247468948364,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2543390105433901,
|
|
"grad_norm": 0.5906837582588196,
|
|
"learning_rate": 9.968130743766533e-06,
|
|
"loss": 0.7320465445518494,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.2556366585563666,
|
|
"grad_norm": 0.5778429508209229,
|
|
"learning_rate": 9.967319706325495e-06,
|
|
"loss": 0.7082957029342651,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.2569343065693431,
|
|
"grad_norm": 0.5944257974624634,
|
|
"learning_rate": 9.96649851197291e-06,
|
|
"loss": 0.7171834707260132,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.25823195458231957,
|
|
"grad_norm": 0.8729922771453857,
|
|
"learning_rate": 9.965667162387908e-06,
|
|
"loss": 0.8201053142547607,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.259529602595296,
|
|
"grad_norm": 0.6156542897224426,
|
|
"learning_rate": 9.964825659270391e-06,
|
|
"loss": 0.7408115863800049,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.2608272506082725,
|
|
"grad_norm": 0.5976687669754028,
|
|
"learning_rate": 9.963974004341019e-06,
|
|
"loss": 0.7426021099090576,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.262124898621249,
|
|
"grad_norm": 0.6217131018638611,
|
|
"learning_rate": 9.963112199341212e-06,
|
|
"loss": 0.7804723978042603,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.2634225466342255,
|
|
"grad_norm": 0.5792650580406189,
|
|
"learning_rate": 9.96224024603314e-06,
|
|
"loss": 0.6894349455833435,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.2647201946472019,
|
|
"grad_norm": 0.6177152395248413,
|
|
"learning_rate": 9.961358146199729e-06,
|
|
"loss": 0.717537522315979,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.2660178426601784,
|
|
"grad_norm": 0.6125051975250244,
|
|
"learning_rate": 9.960465901644651e-06,
|
|
"loss": 0.774456799030304,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.2673154906731549,
|
|
"grad_norm": 0.6172115206718445,
|
|
"learning_rate": 9.959563514192317e-06,
|
|
"loss": 0.7355530261993408,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.2686131386861314,
|
|
"grad_norm": 0.6835010051727295,
|
|
"learning_rate": 9.958650985687884e-06,
|
|
"loss": 0.8002670407295227,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.26991078669910784,
|
|
"grad_norm": 0.6039808392524719,
|
|
"learning_rate": 9.95772831799724e-06,
|
|
"loss": 0.784502387046814,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.27120843471208433,
|
|
"grad_norm": 3.698056936264038,
|
|
"learning_rate": 9.956795513007008e-06,
|
|
"loss": 0.7473998069763184,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.2725060827250608,
|
|
"grad_norm": 0.6423486471176147,
|
|
"learning_rate": 9.955852572624538e-06,
|
|
"loss": 0.7945725917816162,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.2738037307380373,
|
|
"grad_norm": 0.5756685137748718,
|
|
"learning_rate": 9.954899498777903e-06,
|
|
"loss": 0.7909812927246094,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.2751013787510138,
|
|
"grad_norm": 0.5984244346618652,
|
|
"learning_rate": 9.9539362934159e-06,
|
|
"loss": 0.7091703414916992,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.27639902676399025,
|
|
"grad_norm": 0.6023333072662354,
|
|
"learning_rate": 9.952962958508038e-06,
|
|
"loss": 0.7251565456390381,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.27769667477696675,
|
|
"grad_norm": 0.6191360950469971,
|
|
"learning_rate": 9.951979496044544e-06,
|
|
"loss": 0.7646386027336121,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.27899432278994324,
|
|
"grad_norm": 0.6032703518867493,
|
|
"learning_rate": 9.950985908036346e-06,
|
|
"loss": 0.76767897605896,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.28029197080291973,
|
|
"grad_norm": 0.5847381949424744,
|
|
"learning_rate": 9.94998219651508e-06,
|
|
"loss": 0.7368282079696655,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.28158961881589617,
|
|
"grad_norm": 0.6057823896408081,
|
|
"learning_rate": 9.948968363533085e-06,
|
|
"loss": 0.7350323796272278,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.28288726682887266,
|
|
"grad_norm": 0.6186010241508484,
|
|
"learning_rate": 9.947944411163391e-06,
|
|
"loss": 0.7249234318733215,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.28418491484184916,
|
|
"grad_norm": 0.6159788370132446,
|
|
"learning_rate": 9.946910341499722e-06,
|
|
"loss": 0.761109471321106,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.28548256285482565,
|
|
"grad_norm": 0.5817273259162903,
|
|
"learning_rate": 9.945866156656487e-06,
|
|
"loss": 0.7725365161895752,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.2867802108678021,
|
|
"grad_norm": 0.655717134475708,
|
|
"learning_rate": 9.944811858768782e-06,
|
|
"loss": 0.7668634057044983,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.2880778588807786,
|
|
"grad_norm": 0.6457056403160095,
|
|
"learning_rate": 9.943747449992379e-06,
|
|
"loss": 0.7912311553955078,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.2893755068937551,
|
|
"grad_norm": 0.5742535591125488,
|
|
"learning_rate": 9.942672932503722e-06,
|
|
"loss": 0.7619901299476624,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.29067315490673157,
|
|
"grad_norm": 0.5950078964233398,
|
|
"learning_rate": 9.941588308499932e-06,
|
|
"loss": 0.7898773550987244,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.291970802919708,
|
|
"grad_norm": 0.6142423152923584,
|
|
"learning_rate": 9.940493580198787e-06,
|
|
"loss": 0.7200186252593994,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.2932684509326845,
|
|
"grad_norm": 0.6070595979690552,
|
|
"learning_rate": 9.93938874983873e-06,
|
|
"loss": 0.6990747451782227,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.294566098945661,
|
|
"grad_norm": 0.6014435887336731,
|
|
"learning_rate": 9.93827381967886e-06,
|
|
"loss": 0.7597475647926331,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.2958637469586375,
|
|
"grad_norm": 0.5983416438102722,
|
|
"learning_rate": 9.937148791998926e-06,
|
|
"loss": 0.738788366317749,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.2971613949716139,
|
|
"grad_norm": 2.7879600524902344,
|
|
"learning_rate": 9.936013669099326e-06,
|
|
"loss": 0.7541340589523315,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.2984590429845904,
|
|
"grad_norm": 0.6435497403144836,
|
|
"learning_rate": 9.9348684533011e-06,
|
|
"loss": 0.8065454959869385,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.2984590429845904,
|
|
"eval_loss": 0.7250053882598877,
|
|
"eval_runtime": 73.3232,
|
|
"eval_samples_per_second": 70.81,
|
|
"eval_steps_per_second": 8.851,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.2997566909975669,
|
|
"grad_norm": 2.4210150241851807,
|
|
"learning_rate": 9.93371314694592e-06,
|
|
"loss": 0.7646887302398682,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3010543390105434,
|
|
"grad_norm": 0.601508617401123,
|
|
"learning_rate": 9.9325477523961e-06,
|
|
"loss": 0.7489044070243835,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.3023519870235199,
|
|
"grad_norm": 0.5808404684066772,
|
|
"learning_rate": 9.931372272034573e-06,
|
|
"loss": 0.7624624371528625,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.30364963503649633,
|
|
"grad_norm": 1.0590876340866089,
|
|
"learning_rate": 9.930186708264902e-06,
|
|
"loss": 0.7188542485237122,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.30494728304947283,
|
|
"grad_norm": 0.6582311391830444,
|
|
"learning_rate": 9.928991063511264e-06,
|
|
"loss": 0.7417193055152893,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.3062449310624493,
|
|
"grad_norm": 0.5886158347129822,
|
|
"learning_rate": 9.927785340218448e-06,
|
|
"loss": 0.7227447032928467,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.3075425790754258,
|
|
"grad_norm": 0.8434078693389893,
|
|
"learning_rate": 9.926569540851856e-06,
|
|
"loss": 0.8079698085784912,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.30884022708840225,
|
|
"grad_norm": 0.7032890915870667,
|
|
"learning_rate": 9.925343667897487e-06,
|
|
"loss": 0.730448842048645,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.31013787510137875,
|
|
"grad_norm": 0.5958182215690613,
|
|
"learning_rate": 9.924107723861944e-06,
|
|
"loss": 0.7622323036193848,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.31143552311435524,
|
|
"grad_norm": 0.7387073040008545,
|
|
"learning_rate": 9.922861711272417e-06,
|
|
"loss": 0.8103834390640259,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.31273317112733173,
|
|
"grad_norm": 0.589846134185791,
|
|
"learning_rate": 9.921605632676688e-06,
|
|
"loss": 0.7218436002731323,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.31403081914030817,
|
|
"grad_norm": 1.18753182888031,
|
|
"learning_rate": 9.920339490643119e-06,
|
|
"loss": 0.6769864559173584,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.31532846715328466,
|
|
"grad_norm": 0.6063650250434875,
|
|
"learning_rate": 9.91906328776065e-06,
|
|
"loss": 0.6872894763946533,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.31662611516626116,
|
|
"grad_norm": 0.6060184240341187,
|
|
"learning_rate": 9.917777026638794e-06,
|
|
"loss": 0.7477156519889832,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.31792376317923765,
|
|
"grad_norm": 0.5981388092041016,
|
|
"learning_rate": 9.916480709907626e-06,
|
|
"loss": 0.6859747767448425,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.3192214111922141,
|
|
"grad_norm": 0.5809654593467712,
|
|
"learning_rate": 9.91517434021779e-06,
|
|
"loss": 0.7025295495986938,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.3205190592051906,
|
|
"grad_norm": 0.6036680340766907,
|
|
"learning_rate": 9.913857920240481e-06,
|
|
"loss": 0.8275207877159119,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.3218167072181671,
|
|
"grad_norm": 0.5851848125457764,
|
|
"learning_rate": 9.912531452667441e-06,
|
|
"loss": 0.7031136155128479,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.32311435523114357,
|
|
"grad_norm": 0.5534024238586426,
|
|
"learning_rate": 9.911194940210964e-06,
|
|
"loss": 0.7281129956245422,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.32441200324412,
|
|
"grad_norm": 0.6152268052101135,
|
|
"learning_rate": 9.909848385603878e-06,
|
|
"loss": 0.7846366167068481,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3257096512570965,
|
|
"grad_norm": 0.5951406359672546,
|
|
"learning_rate": 9.908491791599546e-06,
|
|
"loss": 0.7278503179550171,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.327007299270073,
|
|
"grad_norm": 0.6011956334114075,
|
|
"learning_rate": 9.90712516097186e-06,
|
|
"loss": 0.7939674854278564,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.3283049472830495,
|
|
"grad_norm": 0.6651070713996887,
|
|
"learning_rate": 9.905748496515235e-06,
|
|
"loss": 0.772196888923645,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.329602595296026,
|
|
"grad_norm": 0.617461085319519,
|
|
"learning_rate": 9.904361801044599e-06,
|
|
"loss": 0.7933390140533447,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3309002433090024,
|
|
"grad_norm": 0.5844789147377014,
|
|
"learning_rate": 9.902965077395395e-06,
|
|
"loss": 0.7286657691001892,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.3321978913219789,
|
|
"grad_norm": 0.6185967326164246,
|
|
"learning_rate": 9.901558328423568e-06,
|
|
"loss": 0.8058604001998901,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.3334955393349554,
|
|
"grad_norm": 0.6511676907539368,
|
|
"learning_rate": 9.900141557005567e-06,
|
|
"loss": 0.7281938195228577,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3347931873479319,
|
|
"grad_norm": 0.6114381551742554,
|
|
"learning_rate": 9.898714766038326e-06,
|
|
"loss": 0.7546758651733398,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.33609083536090834,
|
|
"grad_norm": 0.5931724905967712,
|
|
"learning_rate": 9.897277958439274e-06,
|
|
"loss": 0.811058759689331,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.33738848337388483,
|
|
"grad_norm": 0.5811541080474854,
|
|
"learning_rate": 9.895831137146319e-06,
|
|
"loss": 0.764075517654419,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.3386861313868613,
|
|
"grad_norm": 0.5857120156288147,
|
|
"learning_rate": 9.894374305117844e-06,
|
|
"loss": 0.730948805809021,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.3399837793998378,
|
|
"grad_norm": 0.5755126476287842,
|
|
"learning_rate": 9.892907465332702e-06,
|
|
"loss": 0.7732649445533752,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.34128142741281425,
|
|
"grad_norm": 0.5852351784706116,
|
|
"learning_rate": 9.891430620790208e-06,
|
|
"loss": 0.6883482933044434,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.34257907542579075,
|
|
"grad_norm": 0.5931571125984192,
|
|
"learning_rate": 9.889943774510136e-06,
|
|
"loss": 0.7685630321502686,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.34387672343876724,
|
|
"grad_norm": 0.7222980260848999,
|
|
"learning_rate": 9.888446929532712e-06,
|
|
"loss": 0.7235557436943054,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.34517437145174373,
|
|
"grad_norm": 0.6728655695915222,
|
|
"learning_rate": 9.886940088918601e-06,
|
|
"loss": 0.7901487350463867,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.34647201946472017,
|
|
"grad_norm": 0.5990903973579407,
|
|
"learning_rate": 9.885423255748916e-06,
|
|
"loss": 0.7315446138381958,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.34776966747769666,
|
|
"grad_norm": 0.6058611869812012,
|
|
"learning_rate": 9.883896433125193e-06,
|
|
"loss": 0.748113751411438,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.34906731549067316,
|
|
"grad_norm": 0.6079699397087097,
|
|
"learning_rate": 9.8823596241694e-06,
|
|
"loss": 0.7346718907356262,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.35036496350364965,
|
|
"grad_norm": 0.5837222337722778,
|
|
"learning_rate": 9.88081283202392e-06,
|
|
"loss": 0.6944899559020996,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.3516626115166261,
|
|
"grad_norm": 0.5878487229347229,
|
|
"learning_rate": 9.879256059851553e-06,
|
|
"loss": 0.766356885433197,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.3529602595296026,
|
|
"grad_norm": 0.605903685092926,
|
|
"learning_rate": 9.877689310835503e-06,
|
|
"loss": 0.7980437278747559,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.3542579075425791,
|
|
"grad_norm": 0.5946698784828186,
|
|
"learning_rate": 9.876112588179378e-06,
|
|
"loss": 0.7276085019111633,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.35555555555555557,
|
|
"grad_norm": 0.5997035503387451,
|
|
"learning_rate": 9.874525895107175e-06,
|
|
"loss": 0.7429395318031311,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.35685320356853206,
|
|
"grad_norm": 0.5639536380767822,
|
|
"learning_rate": 9.872929234863277e-06,
|
|
"loss": 0.7452772855758667,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.3581508515815085,
|
|
"grad_norm": 0.5665518641471863,
|
|
"learning_rate": 9.871322610712452e-06,
|
|
"loss": 0.6850217580795288,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.359448499594485,
|
|
"grad_norm": 0.5540530681610107,
|
|
"learning_rate": 9.869706025939843e-06,
|
|
"loss": 0.6755887269973755,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.3607461476074615,
|
|
"grad_norm": 0.5980620384216309,
|
|
"learning_rate": 9.868079483850955e-06,
|
|
"loss": 0.7464824914932251,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.362043795620438,
|
|
"grad_norm": 0.619748055934906,
|
|
"learning_rate": 9.86644298777165e-06,
|
|
"loss": 0.778630793094635,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.3633414436334144,
|
|
"grad_norm": 0.5898886919021606,
|
|
"learning_rate": 9.864796541048155e-06,
|
|
"loss": 0.7965477705001831,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.3646390916463909,
|
|
"grad_norm": 0.5768588185310364,
|
|
"learning_rate": 9.863140147047034e-06,
|
|
"loss": 0.7540180087089539,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.3659367396593674,
|
|
"grad_norm": 0.6073225140571594,
|
|
"learning_rate": 9.861473809155192e-06,
|
|
"loss": 0.7069481015205383,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.3672343876723439,
|
|
"grad_norm": 0.853999137878418,
|
|
"learning_rate": 9.859797530779871e-06,
|
|
"loss": 0.6730421185493469,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.36853203568532034,
|
|
"grad_norm": 0.5999425649642944,
|
|
"learning_rate": 9.858111315348633e-06,
|
|
"loss": 0.7877826690673828,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.36982968369829683,
|
|
"grad_norm": 0.9857465624809265,
|
|
"learning_rate": 9.856415166309365e-06,
|
|
"loss": 0.7664862871170044,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.3711273317112733,
|
|
"grad_norm": 0.6046482920646667,
|
|
"learning_rate": 9.854709087130261e-06,
|
|
"loss": 0.7595510482788086,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.3724249797242498,
|
|
"grad_norm": 0.6335992217063904,
|
|
"learning_rate": 9.852993081299821e-06,
|
|
"loss": 0.7546533346176147,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.37372262773722625,
|
|
"grad_norm": 0.6080864667892456,
|
|
"learning_rate": 9.851267152326842e-06,
|
|
"loss": 0.7263352870941162,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.37502027575020275,
|
|
"grad_norm": 0.6323843598365784,
|
|
"learning_rate": 9.849531303740414e-06,
|
|
"loss": 0.7602711915969849,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.37631792376317924,
|
|
"grad_norm": 0.6081179976463318,
|
|
"learning_rate": 9.847785539089904e-06,
|
|
"loss": 0.740424633026123,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.37761557177615573,
|
|
"grad_norm": 0.6082411408424377,
|
|
"learning_rate": 9.846029861944964e-06,
|
|
"loss": 0.7497418522834778,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.37891321978913217,
|
|
"grad_norm": 2.8806638717651367,
|
|
"learning_rate": 9.844264275895505e-06,
|
|
"loss": 0.7668443918228149,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.38021086780210867,
|
|
"grad_norm": 0.6383978128433228,
|
|
"learning_rate": 9.842488784551707e-06,
|
|
"loss": 0.7615733742713928,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.38150851581508516,
|
|
"grad_norm": 0.589131772518158,
|
|
"learning_rate": 9.840703391543999e-06,
|
|
"loss": 0.6759642362594604,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.38280616382806165,
|
|
"grad_norm": 0.5658035278320312,
|
|
"learning_rate": 9.838908100523056e-06,
|
|
"loss": 0.6837214231491089,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.38410381184103815,
|
|
"grad_norm": 0.7991520166397095,
|
|
"learning_rate": 9.837102915159797e-06,
|
|
"loss": 0.6950873732566833,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.3854014598540146,
|
|
"grad_norm": 0.6660937666893005,
|
|
"learning_rate": 9.835287839145366e-06,
|
|
"loss": 0.7929595708847046,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.3866991078669911,
|
|
"grad_norm": 0.5755690336227417,
|
|
"learning_rate": 9.833462876191138e-06,
|
|
"loss": 0.7429145574569702,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.38799675587996757,
|
|
"grad_norm": 0.5845285654067993,
|
|
"learning_rate": 9.831628030028698e-06,
|
|
"loss": 0.673062801361084,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.38929440389294406,
|
|
"grad_norm": 0.6984291672706604,
|
|
"learning_rate": 9.829783304409838e-06,
|
|
"loss": 0.7271926403045654,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.3905920519059205,
|
|
"grad_norm": 0.6314187049865723,
|
|
"learning_rate": 9.827928703106562e-06,
|
|
"loss": 0.7842410206794739,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.391889699918897,
|
|
"grad_norm": 0.5774804353713989,
|
|
"learning_rate": 9.826064229911056e-06,
|
|
"loss": 0.7108284831047058,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.3931873479318735,
|
|
"grad_norm": 0.5863385200500488,
|
|
"learning_rate": 9.824189888635699e-06,
|
|
"loss": 0.6845728158950806,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.39448499594485,
|
|
"grad_norm": 0.6258076429367065,
|
|
"learning_rate": 9.82230568311304e-06,
|
|
"loss": 0.7528674602508545,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.3957826439578264,
|
|
"grad_norm": 0.5792856216430664,
|
|
"learning_rate": 9.820411617195807e-06,
|
|
"loss": 0.6762325763702393,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.3970802919708029,
|
|
"grad_norm": 0.6361887454986572,
|
|
"learning_rate": 9.818507694756883e-06,
|
|
"loss": 0.7917072176933289,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.3983779399837794,
|
|
"grad_norm": 0.5518248677253723,
|
|
"learning_rate": 9.816593919689305e-06,
|
|
"loss": 0.6964313387870789,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.3996755879967559,
|
|
"grad_norm": 0.5932815670967102,
|
|
"learning_rate": 9.814670295906265e-06,
|
|
"loss": 0.7426280975341797,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.40097323600973234,
|
|
"grad_norm": 0.6102697253227234,
|
|
"learning_rate": 9.81273682734108e-06,
|
|
"loss": 0.7797576189041138,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.40227088402270883,
|
|
"grad_norm": 0.5859159827232361,
|
|
"learning_rate": 9.81079351794721e-06,
|
|
"loss": 0.6963766813278198,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.4035685320356853,
|
|
"grad_norm": 0.6081574559211731,
|
|
"learning_rate": 9.808840371698226e-06,
|
|
"loss": 0.7762277722358704,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.4048661800486618,
|
|
"grad_norm": 0.5929109454154968,
|
|
"learning_rate": 9.80687739258782e-06,
|
|
"loss": 0.6928838491439819,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.40616382806163825,
|
|
"grad_norm": 0.6156943440437317,
|
|
"learning_rate": 9.804904584629786e-06,
|
|
"loss": 0.7755375504493713,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.40746147607461475,
|
|
"grad_norm": 0.6252034306526184,
|
|
"learning_rate": 9.80292195185802e-06,
|
|
"loss": 0.7410427927970886,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.40875912408759124,
|
|
"grad_norm": 0.5801575183868408,
|
|
"learning_rate": 9.800929498326502e-06,
|
|
"loss": 0.7257661819458008,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.41005677210056773,
|
|
"grad_norm": 0.6071752309799194,
|
|
"learning_rate": 9.798927228109294e-06,
|
|
"loss": 0.72821044921875,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.41135442011354423,
|
|
"grad_norm": 0.6007112264633179,
|
|
"learning_rate": 9.796915145300534e-06,
|
|
"loss": 0.7845569849014282,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.41265206812652067,
|
|
"grad_norm": 0.5841884016990662,
|
|
"learning_rate": 9.794893254014421e-06,
|
|
"loss": 0.7238840460777283,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.41394971613949716,
|
|
"grad_norm": 0.7773919701576233,
|
|
"learning_rate": 9.792861558385212e-06,
|
|
"loss": 0.7452490329742432,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.41524736415247365,
|
|
"grad_norm": 0.6115602254867554,
|
|
"learning_rate": 9.790820062567208e-06,
|
|
"loss": 0.769629716873169,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.41654501216545015,
|
|
"grad_norm": 0.597138524055481,
|
|
"learning_rate": 9.788768770734753e-06,
|
|
"loss": 0.7215956449508667,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.4178426601784266,
|
|
"grad_norm": 0.5886080265045166,
|
|
"learning_rate": 9.78670768708222e-06,
|
|
"loss": 0.6885201930999756,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.4191403081914031,
|
|
"grad_norm": 0.6041279435157776,
|
|
"learning_rate": 9.784636815824003e-06,
|
|
"loss": 0.748660147190094,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.42043795620437957,
|
|
"grad_norm": 0.6275052428245544,
|
|
"learning_rate": 9.782556161194508e-06,
|
|
"loss": 0.7351919412612915,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.42173560421735606,
|
|
"grad_norm": 0.6083272695541382,
|
|
"learning_rate": 9.78046572744815e-06,
|
|
"loss": 0.7183579206466675,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4230332522303325,
|
|
"grad_norm": 0.5836600065231323,
|
|
"learning_rate": 9.778365518859334e-06,
|
|
"loss": 0.6470940113067627,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.424330900243309,
|
|
"grad_norm": 0.611179769039154,
|
|
"learning_rate": 9.776255539722457e-06,
|
|
"loss": 0.7807853817939758,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.4256285482562855,
|
|
"grad_norm": 0.5962700843811035,
|
|
"learning_rate": 9.774135794351892e-06,
|
|
"loss": 0.7775930166244507,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.426926196269262,
|
|
"grad_norm": 0.5820413827896118,
|
|
"learning_rate": 9.77200628708198e-06,
|
|
"loss": 0.6654623746871948,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4282238442822384,
|
|
"grad_norm": 0.5713212490081787,
|
|
"learning_rate": 9.769867022267028e-06,
|
|
"loss": 0.7844803333282471,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.4295214922952149,
|
|
"grad_norm": 0.6236836314201355,
|
|
"learning_rate": 9.767718004281288e-06,
|
|
"loss": 0.7271528244018555,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.4308191403081914,
|
|
"grad_norm": 0.5810200572013855,
|
|
"learning_rate": 9.765559237518958e-06,
|
|
"loss": 0.6717958450317383,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.4321167883211679,
|
|
"grad_norm": 0.5980990529060364,
|
|
"learning_rate": 9.763390726394171e-06,
|
|
"loss": 0.7378814220428467,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.43341443633414434,
|
|
"grad_norm": 0.620817244052887,
|
|
"learning_rate": 9.761212475340982e-06,
|
|
"loss": 0.7411800026893616,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.43471208434712083,
|
|
"grad_norm": 0.5831018686294556,
|
|
"learning_rate": 9.759024488813364e-06,
|
|
"loss": 0.6943602561950684,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.4360097323600973,
|
|
"grad_norm": 0.6330239176750183,
|
|
"learning_rate": 9.756826771285195e-06,
|
|
"loss": 0.6916518211364746,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.4373073803730738,
|
|
"grad_norm": 0.5482841730117798,
|
|
"learning_rate": 9.754619327250253e-06,
|
|
"loss": 0.6894945502281189,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.4386050283860503,
|
|
"grad_norm": 0.5814421772956848,
|
|
"learning_rate": 9.7524021612222e-06,
|
|
"loss": 0.7126766443252563,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.43990267639902675,
|
|
"grad_norm": 0.6360822916030884,
|
|
"learning_rate": 9.750175277734582e-06,
|
|
"loss": 0.7301243543624878,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.44120032441200324,
|
|
"grad_norm": 0.5673643946647644,
|
|
"learning_rate": 9.747938681340807e-06,
|
|
"loss": 0.632249116897583,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.44249797242497974,
|
|
"grad_norm": 0.59381103515625,
|
|
"learning_rate": 9.745692376614154e-06,
|
|
"loss": 0.7363812923431396,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.44379562043795623,
|
|
"grad_norm": 0.5689446926116943,
|
|
"learning_rate": 9.743436368147745e-06,
|
|
"loss": 0.6463121175765991,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.44509326845093267,
|
|
"grad_norm": 0.5716972351074219,
|
|
"learning_rate": 9.741170660554548e-06,
|
|
"loss": 0.726833701133728,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.44639091646390916,
|
|
"grad_norm": 0.6090091466903687,
|
|
"learning_rate": 9.73889525846736e-06,
|
|
"loss": 0.7105214595794678,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.44768856447688565,
|
|
"grad_norm": 0.6220769286155701,
|
|
"learning_rate": 9.736610166538802e-06,
|
|
"loss": 0.7986119389533997,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.44898621248986215,
|
|
"grad_norm": 0.6415942311286926,
|
|
"learning_rate": 9.73431538944131e-06,
|
|
"loss": 0.8365704417228699,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.4502838605028386,
|
|
"grad_norm": 0.6018549203872681,
|
|
"learning_rate": 9.73201093186712e-06,
|
|
"loss": 0.754788875579834,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.4515815085158151,
|
|
"grad_norm": 0.6342391967773438,
|
|
"learning_rate": 9.729696798528268e-06,
|
|
"loss": 0.6986638307571411,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.45287915652879157,
|
|
"grad_norm": 0.6728231906890869,
|
|
"learning_rate": 9.727372994156568e-06,
|
|
"loss": 0.7003589272499084,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.45417680454176806,
|
|
"grad_norm": 0.5958974957466125,
|
|
"learning_rate": 9.725039523503615e-06,
|
|
"loss": 0.7366368770599365,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.4554744525547445,
|
|
"grad_norm": 0.5878227353096008,
|
|
"learning_rate": 9.722696391340762e-06,
|
|
"loss": 0.6686346530914307,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.456772100567721,
|
|
"grad_norm": 0.5995833277702332,
|
|
"learning_rate": 9.720343602459123e-06,
|
|
"loss": 0.720341682434082,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.4580697485806975,
|
|
"grad_norm": 0.5677472352981567,
|
|
"learning_rate": 9.717981161669556e-06,
|
|
"loss": 0.7040742039680481,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.459367396593674,
|
|
"grad_norm": 0.5821993350982666,
|
|
"learning_rate": 9.715609073802653e-06,
|
|
"loss": 0.7871376276016235,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.4606650446066504,
|
|
"grad_norm": 0.6043302416801453,
|
|
"learning_rate": 9.713227343708737e-06,
|
|
"loss": 0.6964189410209656,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.4619626926196269,
|
|
"grad_norm": 0.5885515213012695,
|
|
"learning_rate": 9.71083597625784e-06,
|
|
"loss": 0.6288225054740906,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.4632603406326034,
|
|
"grad_norm": 0.5931031703948975,
|
|
"learning_rate": 9.708434976339704e-06,
|
|
"loss": 0.7654111981391907,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.4645579886455799,
|
|
"grad_norm": 0.5929883122444153,
|
|
"learning_rate": 9.706024348863766e-06,
|
|
"loss": 0.7472108602523804,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.4658556366585564,
|
|
"grad_norm": 0.6003252267837524,
|
|
"learning_rate": 9.703604098759148e-06,
|
|
"loss": 0.7266678810119629,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.46715328467153283,
|
|
"grad_norm": 0.6148797869682312,
|
|
"learning_rate": 9.70117423097465e-06,
|
|
"loss": 0.6877753734588623,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.4684509326845093,
|
|
"grad_norm": 0.632279634475708,
|
|
"learning_rate": 9.698734750478739e-06,
|
|
"loss": 0.7512223720550537,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.4697485806974858,
|
|
"grad_norm": 0.5888375639915466,
|
|
"learning_rate": 9.69628566225953e-06,
|
|
"loss": 0.7822796702384949,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.4710462287104623,
|
|
"grad_norm": 0.6794424057006836,
|
|
"learning_rate": 9.693826971324793e-06,
|
|
"loss": 0.7204307317733765,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.47234387672343875,
|
|
"grad_norm": 0.5850203633308411,
|
|
"learning_rate": 9.691358682701927e-06,
|
|
"loss": 0.7395058870315552,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.47364152473641524,
|
|
"grad_norm": 0.947333574295044,
|
|
"learning_rate": 9.688880801437957e-06,
|
|
"loss": 0.7230464220046997,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.47493917274939174,
|
|
"grad_norm": 0.6044790744781494,
|
|
"learning_rate": 9.686393332599525e-06,
|
|
"loss": 0.7762792110443115,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.47623682076236823,
|
|
"grad_norm": 0.558193027973175,
|
|
"learning_rate": 9.683896281272872e-06,
|
|
"loss": 0.7202603816986084,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.47753446877534467,
|
|
"grad_norm": 0.6356004476547241,
|
|
"learning_rate": 9.681389652563837e-06,
|
|
"loss": 0.6806402206420898,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.47883211678832116,
|
|
"grad_norm": 0.5731885433197021,
|
|
"learning_rate": 9.678873451597843e-06,
|
|
"loss": 0.7234804630279541,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.48012976480129765,
|
|
"grad_norm": 0.6563818454742432,
|
|
"learning_rate": 9.676347683519882e-06,
|
|
"loss": 0.7021783590316772,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.48142741281427415,
|
|
"grad_norm": 0.632475733757019,
|
|
"learning_rate": 9.673812353494513e-06,
|
|
"loss": 0.7313486337661743,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.4827250608272506,
|
|
"grad_norm": 0.6746646761894226,
|
|
"learning_rate": 9.671267466705841e-06,
|
|
"loss": 0.7820821404457092,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.4840227088402271,
|
|
"grad_norm": 0.558120608329773,
|
|
"learning_rate": 9.668713028357518e-06,
|
|
"loss": 0.7215161323547363,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.48532035685320357,
|
|
"grad_norm": 0.5888929963111877,
|
|
"learning_rate": 9.666149043672724e-06,
|
|
"loss": 0.7091335654258728,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.48661800486618007,
|
|
"grad_norm": 7.202490329742432,
|
|
"learning_rate": 9.663575517894155e-06,
|
|
"loss": 0.7597553133964539,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.4879156528791565,
|
|
"grad_norm": 0.6477593183517456,
|
|
"learning_rate": 9.660992456284024e-06,
|
|
"loss": 0.6395682692527771,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.489213300892133,
|
|
"grad_norm": 0.6040880680084229,
|
|
"learning_rate": 9.658399864124037e-06,
|
|
"loss": 0.7132856249809265,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.4905109489051095,
|
|
"grad_norm": 0.6065711379051208,
|
|
"learning_rate": 9.655797746715388e-06,
|
|
"loss": 0.7926105260848999,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.491808596918086,
|
|
"grad_norm": 0.6568942666053772,
|
|
"learning_rate": 9.65318610937875e-06,
|
|
"loss": 0.7595465183258057,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.4931062449310625,
|
|
"grad_norm": 0.5950395464897156,
|
|
"learning_rate": 9.650564957454258e-06,
|
|
"loss": 0.7643356919288635,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.4944038929440389,
|
|
"grad_norm": 0.608245313167572,
|
|
"learning_rate": 9.647934296301506e-06,
|
|
"loss": 0.8734641075134277,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.4957015409570154,
|
|
"grad_norm": 0.6461122632026672,
|
|
"learning_rate": 9.64529413129953e-06,
|
|
"loss": 0.7460113167762756,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.4969991889699919,
|
|
"grad_norm": 0.5779212117195129,
|
|
"learning_rate": 9.642644467846799e-06,
|
|
"loss": 0.707379937171936,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.4982968369829684,
|
|
"grad_norm": 0.5882854461669922,
|
|
"learning_rate": 9.639985311361202e-06,
|
|
"loss": 0.74379563331604,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.49959448499594483,
|
|
"grad_norm": 0.6086680293083191,
|
|
"learning_rate": 9.637316667280046e-06,
|
|
"loss": 0.7925621271133423,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5008921330089213,
|
|
"grad_norm": 0.5651184916496277,
|
|
"learning_rate": 9.634638541060027e-06,
|
|
"loss": 0.7554738521575928,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5021897810218978,
|
|
"grad_norm": 0.5808055400848389,
|
|
"learning_rate": 9.63195093817724e-06,
|
|
"loss": 0.7644078731536865,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5034874290348743,
|
|
"grad_norm": 0.6111287474632263,
|
|
"learning_rate": 9.62925386412715e-06,
|
|
"loss": 0.7364607453346252,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.5047850770478508,
|
|
"grad_norm": 0.6057661175727844,
|
|
"learning_rate": 9.626547324424592e-06,
|
|
"loss": 0.7212823629379272,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.5060827250608273,
|
|
"grad_norm": 0.6477599740028381,
|
|
"learning_rate": 9.623831324603755e-06,
|
|
"loss": 0.813086748123169,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5073803730738037,
|
|
"grad_norm": 0.5950746536254883,
|
|
"learning_rate": 9.621105870218167e-06,
|
|
"loss": 0.7306693196296692,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5086780210867802,
|
|
"grad_norm": 0.6298786401748657,
|
|
"learning_rate": 9.618370966840698e-06,
|
|
"loss": 0.7335579991340637,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5099756690997567,
|
|
"grad_norm": 0.5998733639717102,
|
|
"learning_rate": 9.615626620063531e-06,
|
|
"loss": 0.6837765574455261,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.5112733171127332,
|
|
"grad_norm": 0.6094253659248352,
|
|
"learning_rate": 9.61287283549816e-06,
|
|
"loss": 0.7273898720741272,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.5125709651257097,
|
|
"grad_norm": 0.5919696092605591,
|
|
"learning_rate": 9.610109618775379e-06,
|
|
"loss": 0.7142295241355896,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5138686131386861,
|
|
"grad_norm": 0.5768521428108215,
|
|
"learning_rate": 9.607336975545264e-06,
|
|
"loss": 0.6993876695632935,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.5151662611516626,
|
|
"grad_norm": 0.6359198689460754,
|
|
"learning_rate": 9.604554911477173e-06,
|
|
"loss": 0.751734733581543,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.5164639091646391,
|
|
"grad_norm": 0.612307071685791,
|
|
"learning_rate": 9.601763432259716e-06,
|
|
"loss": 0.7581944465637207,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.5177615571776155,
|
|
"grad_norm": 0.5969548225402832,
|
|
"learning_rate": 9.59896254360077e-06,
|
|
"loss": 0.7034813165664673,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.519059205190592,
|
|
"grad_norm": 0.5891065001487732,
|
|
"learning_rate": 9.596152251227438e-06,
|
|
"loss": 0.7002313137054443,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.5203568532035685,
|
|
"grad_norm": 0.5791100263595581,
|
|
"learning_rate": 9.593332560886055e-06,
|
|
"loss": 0.7138193845748901,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.521654501216545,
|
|
"grad_norm": 0.7952408790588379,
|
|
"learning_rate": 9.59050347834218e-06,
|
|
"loss": 0.6865421533584595,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.5229521492295215,
|
|
"grad_norm": 0.6096974015235901,
|
|
"learning_rate": 9.587665009380565e-06,
|
|
"loss": 0.7312819957733154,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.524249797242498,
|
|
"grad_norm": 0.6021596789360046,
|
|
"learning_rate": 9.584817159805164e-06,
|
|
"loss": 0.7670427560806274,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.5255474452554745,
|
|
"grad_norm": 0.6113924980163574,
|
|
"learning_rate": 9.58195993543911e-06,
|
|
"loss": 0.7259009480476379,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.526845093268451,
|
|
"grad_norm": 0.6386753916740417,
|
|
"learning_rate": 9.579093342124699e-06,
|
|
"loss": 0.7742621898651123,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.5281427412814275,
|
|
"grad_norm": 0.5846640467643738,
|
|
"learning_rate": 9.576217385723391e-06,
|
|
"loss": 0.6874604225158691,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.5294403892944038,
|
|
"grad_norm": 0.5714486241340637,
|
|
"learning_rate": 9.57333207211579e-06,
|
|
"loss": 0.6830397844314575,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.5307380373073803,
|
|
"grad_norm": 0.5846112370491028,
|
|
"learning_rate": 9.57043740720163e-06,
|
|
"loss": 0.7333765029907227,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.5320356853203568,
|
|
"grad_norm": 0.6309279799461365,
|
|
"learning_rate": 9.567533396899769e-06,
|
|
"loss": 0.698890209197998,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.5333333333333333,
|
|
"grad_norm": 0.5987696647644043,
|
|
"learning_rate": 9.564620047148174e-06,
|
|
"loss": 0.7424242496490479,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.5346309813463098,
|
|
"grad_norm": 0.5915178656578064,
|
|
"learning_rate": 9.561697363903908e-06,
|
|
"loss": 0.7625330090522766,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.5359286293592863,
|
|
"grad_norm": 0.6682938933372498,
|
|
"learning_rate": 9.558765353143116e-06,
|
|
"loss": 0.7808880805969238,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.5372262773722628,
|
|
"grad_norm": 0.5921300649642944,
|
|
"learning_rate": 9.555824020861022e-06,
|
|
"loss": 0.7293972969055176,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.5385239253852393,
|
|
"grad_norm": 0.6055417060852051,
|
|
"learning_rate": 9.5528733730719e-06,
|
|
"loss": 0.7130710482597351,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.5398215733982157,
|
|
"grad_norm": 1.2821067571640015,
|
|
"learning_rate": 9.549913415809084e-06,
|
|
"loss": 0.6902526617050171,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.5411192214111922,
|
|
"grad_norm": 0.5723661184310913,
|
|
"learning_rate": 9.546944155124935e-06,
|
|
"loss": 0.7237967252731323,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.5424168694241687,
|
|
"grad_norm": 0.5984989404678345,
|
|
"learning_rate": 9.54396559709084e-06,
|
|
"loss": 0.7385105490684509,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.5437145174371452,
|
|
"grad_norm": 0.6114164590835571,
|
|
"learning_rate": 9.540977747797194e-06,
|
|
"loss": 0.6872152090072632,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.5450121654501217,
|
|
"grad_norm": 0.585870087146759,
|
|
"learning_rate": 9.537980613353392e-06,
|
|
"loss": 0.7558926343917847,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.5463098134630981,
|
|
"grad_norm": 0.5969951748847961,
|
|
"learning_rate": 9.53497419988782e-06,
|
|
"loss": 0.7628536224365234,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.5476074614760746,
|
|
"grad_norm": 0.6526360511779785,
|
|
"learning_rate": 9.531958513547832e-06,
|
|
"loss": 0.7417917251586914,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.5489051094890511,
|
|
"grad_norm": 0.6217682361602783,
|
|
"learning_rate": 9.52893356049974e-06,
|
|
"loss": 0.7846866846084595,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.5502027575020276,
|
|
"grad_norm": 0.6098693013191223,
|
|
"learning_rate": 9.525899346928809e-06,
|
|
"loss": 0.7403139472007751,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.551500405515004,
|
|
"grad_norm": 0.6113680005073547,
|
|
"learning_rate": 9.52285587903924e-06,
|
|
"loss": 0.7699853181838989,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.5527980535279805,
|
|
"grad_norm": 0.5491748452186584,
|
|
"learning_rate": 9.519803163054149e-06,
|
|
"loss": 0.7141760587692261,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.554095701540957,
|
|
"grad_norm": 0.6018276214599609,
|
|
"learning_rate": 9.51674120521557e-06,
|
|
"loss": 0.7314755916595459,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.5553933495539335,
|
|
"grad_norm": 0.6114900708198547,
|
|
"learning_rate": 9.513670011784435e-06,
|
|
"loss": 0.7220840454101562,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.55669099756691,
|
|
"grad_norm": 0.5553966760635376,
|
|
"learning_rate": 9.510589589040554e-06,
|
|
"loss": 0.630115270614624,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.5579886455798865,
|
|
"grad_norm": 0.5907071232795715,
|
|
"learning_rate": 9.507499943282613e-06,
|
|
"loss": 0.6516691446304321,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.559286293592863,
|
|
"grad_norm": 0.5842899084091187,
|
|
"learning_rate": 9.504401080828154e-06,
|
|
"loss": 0.7031220197677612,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.5605839416058395,
|
|
"grad_norm": 0.5828782916069031,
|
|
"learning_rate": 9.501293008013568e-06,
|
|
"loss": 0.7107349038124084,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.5618815896188158,
|
|
"grad_norm": 0.5939279198646545,
|
|
"learning_rate": 9.498175731194077e-06,
|
|
"loss": 0.7517828941345215,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.5631792376317923,
|
|
"grad_norm": 0.6058377623558044,
|
|
"learning_rate": 9.495049256743723e-06,
|
|
"loss": 0.7890589237213135,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.5644768856447688,
|
|
"grad_norm": 0.6133562922477722,
|
|
"learning_rate": 9.491913591055356e-06,
|
|
"loss": 0.6695548892021179,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.5657745336577453,
|
|
"grad_norm": 0.6204050183296204,
|
|
"learning_rate": 9.488768740540615e-06,
|
|
"loss": 0.7749900817871094,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.5670721816707218,
|
|
"grad_norm": 0.5636538863182068,
|
|
"learning_rate": 9.485614711629927e-06,
|
|
"loss": 0.6592154502868652,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.5683698296836983,
|
|
"grad_norm": 0.5660319328308105,
|
|
"learning_rate": 9.482451510772482e-06,
|
|
"loss": 0.7120122313499451,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.5696674776966748,
|
|
"grad_norm": 0.574423611164093,
|
|
"learning_rate": 9.479279144436224e-06,
|
|
"loss": 0.7538824081420898,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.5709651257096513,
|
|
"grad_norm": 0.5769577622413635,
|
|
"learning_rate": 9.47609761910784e-06,
|
|
"loss": 0.6975010633468628,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.5722627737226277,
|
|
"grad_norm": 1.1428693532943726,
|
|
"learning_rate": 9.472906941292746e-06,
|
|
"loss": 0.7184154987335205,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.5735604217356042,
|
|
"grad_norm": 0.6155918836593628,
|
|
"learning_rate": 9.469707117515068e-06,
|
|
"loss": 0.7325999140739441,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.5748580697485807,
|
|
"grad_norm": 0.6040661931037903,
|
|
"learning_rate": 9.466498154317635e-06,
|
|
"loss": 0.6905105113983154,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.5761557177615572,
|
|
"grad_norm": 0.6275285482406616,
|
|
"learning_rate": 9.463280058261965e-06,
|
|
"loss": 0.7441266775131226,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.5774533657745337,
|
|
"grad_norm": 0.5689868927001953,
|
|
"learning_rate": 9.460052835928254e-06,
|
|
"loss": 0.6997857093811035,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.5787510137875101,
|
|
"grad_norm": 0.5860233902931213,
|
|
"learning_rate": 9.45681649391535e-06,
|
|
"loss": 0.6657996773719788,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.5800486618004866,
|
|
"grad_norm": 0.5518195629119873,
|
|
"learning_rate": 9.453571038840755e-06,
|
|
"loss": 0.6410640478134155,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.5813463098134631,
|
|
"grad_norm": 0.7139276266098022,
|
|
"learning_rate": 9.450316477340602e-06,
|
|
"loss": 0.7444489598274231,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.5826439578264396,
|
|
"grad_norm": 0.6063182950019836,
|
|
"learning_rate": 9.447052816069648e-06,
|
|
"loss": 0.7016487121582031,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.583941605839416,
|
|
"grad_norm": 0.5990587472915649,
|
|
"learning_rate": 9.443780061701252e-06,
|
|
"loss": 0.7742944359779358,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.5852392538523925,
|
|
"grad_norm": 0.5863263010978699,
|
|
"learning_rate": 9.44049822092737e-06,
|
|
"loss": 0.7078189253807068,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.586536901865369,
|
|
"grad_norm": 0.5597153902053833,
|
|
"learning_rate": 9.437207300458535e-06,
|
|
"loss": 0.7037616968154907,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.5878345498783455,
|
|
"grad_norm": 0.5865596532821655,
|
|
"learning_rate": 9.433907307023845e-06,
|
|
"loss": 0.7111040353775024,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.589132197891322,
|
|
"grad_norm": 0.595535397529602,
|
|
"learning_rate": 9.430598247370955e-06,
|
|
"loss": 0.6840265393257141,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.5904298459042985,
|
|
"grad_norm": 0.6209713816642761,
|
|
"learning_rate": 9.427280128266049e-06,
|
|
"loss": 0.6608985066413879,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.591727493917275,
|
|
"grad_norm": 0.7749186158180237,
|
|
"learning_rate": 9.423952956493846e-06,
|
|
"loss": 0.6757811307907104,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.5930251419302515,
|
|
"grad_norm": 0.6284626126289368,
|
|
"learning_rate": 9.420616738857568e-06,
|
|
"loss": 0.6912366151809692,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.5943227899432278,
|
|
"grad_norm": 0.5830085277557373,
|
|
"learning_rate": 9.417271482178938e-06,
|
|
"loss": 0.7678932547569275,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.5956204379562043,
|
|
"grad_norm": 0.5680383443832397,
|
|
"learning_rate": 9.413917193298153e-06,
|
|
"loss": 0.7322279810905457,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.5969180859691808,
|
|
"grad_norm": 0.5904244184494019,
|
|
"learning_rate": 9.41055387907389e-06,
|
|
"loss": 0.6763080358505249,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.5969180859691808,
|
|
"eval_loss": 0.7040426731109619,
|
|
"eval_runtime": 73.0729,
|
|
"eval_samples_per_second": 71.052,
|
|
"eval_steps_per_second": 8.882,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.5982157339821573,
|
|
"grad_norm": 0.5804091691970825,
|
|
"learning_rate": 9.407181546383275e-06,
|
|
"loss": 0.7188655138015747,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.5995133819951338,
|
|
"grad_norm": 0.5912026166915894,
|
|
"learning_rate": 9.403800202121873e-06,
|
|
"loss": 0.6785882711410522,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6008110300081103,
|
|
"grad_norm": 0.5554898381233215,
|
|
"learning_rate": 9.400409853203677e-06,
|
|
"loss": 0.7052475214004517,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.6021086780210868,
|
|
"grad_norm": 0.6723419427871704,
|
|
"learning_rate": 9.397010506561096e-06,
|
|
"loss": 0.6488598585128784,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.6034063260340633,
|
|
"grad_norm": 0.5925308465957642,
|
|
"learning_rate": 9.393602169144929e-06,
|
|
"loss": 0.7316585779190063,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.6047039740470398,
|
|
"grad_norm": 0.7151989936828613,
|
|
"learning_rate": 9.390184847924366e-06,
|
|
"loss": 0.7060757875442505,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.6060016220600162,
|
|
"grad_norm": 0.5946957468986511,
|
|
"learning_rate": 9.386758549886964e-06,
|
|
"loss": 0.7584104537963867,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.6072992700729927,
|
|
"grad_norm": 0.568766176700592,
|
|
"learning_rate": 9.383323282038632e-06,
|
|
"loss": 0.725806713104248,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.6085969180859692,
|
|
"grad_norm": 0.5797498226165771,
|
|
"learning_rate": 9.379879051403627e-06,
|
|
"loss": 0.6769331693649292,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.6098945660989457,
|
|
"grad_norm": 0.7914499640464783,
|
|
"learning_rate": 9.376425865024527e-06,
|
|
"loss": 0.7631534934043884,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.6111922141119221,
|
|
"grad_norm": 0.601610004901886,
|
|
"learning_rate": 9.372963729962227e-06,
|
|
"loss": 0.8109684586524963,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.6124898621248986,
|
|
"grad_norm": 0.6191813349723816,
|
|
"learning_rate": 9.369492653295913e-06,
|
|
"loss": 0.6854857206344604,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.6137875101378751,
|
|
"grad_norm": 0.8444225192070007,
|
|
"learning_rate": 9.366012642123061e-06,
|
|
"loss": 0.7072763442993164,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.6150851581508516,
|
|
"grad_norm": 0.5926432609558105,
|
|
"learning_rate": 9.362523703559412e-06,
|
|
"loss": 0.7057541012763977,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.616382806163828,
|
|
"grad_norm": 0.5982694625854492,
|
|
"learning_rate": 9.359025844738962e-06,
|
|
"loss": 0.7388914823532104,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.6176804541768045,
|
|
"grad_norm": 0.6068631410598755,
|
|
"learning_rate": 9.355519072813946e-06,
|
|
"loss": 0.7815642356872559,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.618978102189781,
|
|
"grad_norm": 0.5807543396949768,
|
|
"learning_rate": 9.352003394954827e-06,
|
|
"loss": 0.7441459894180298,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.6202757502027575,
|
|
"grad_norm": 0.5668230056762695,
|
|
"learning_rate": 9.348478818350277e-06,
|
|
"loss": 0.7281776666641235,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.621573398215734,
|
|
"grad_norm": 0.6428498029708862,
|
|
"learning_rate": 9.34494535020716e-06,
|
|
"loss": 0.754060685634613,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.6228710462287105,
|
|
"grad_norm": 0.6553912162780762,
|
|
"learning_rate": 9.341402997750526e-06,
|
|
"loss": 0.6970114707946777,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.624168694241687,
|
|
"grad_norm": 0.5876368880271912,
|
|
"learning_rate": 9.337851768223589e-06,
|
|
"loss": 0.7278268933296204,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.6254663422546635,
|
|
"grad_norm": 0.6632186770439148,
|
|
"learning_rate": 9.334291668887716e-06,
|
|
"loss": 0.724956750869751,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.6267639902676398,
|
|
"grad_norm": 0.582115113735199,
|
|
"learning_rate": 9.330722707022406e-06,
|
|
"loss": 0.7292401790618896,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.6280616382806163,
|
|
"grad_norm": 0.5983607769012451,
|
|
"learning_rate": 9.327144889925286e-06,
|
|
"loss": 0.7359820604324341,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.6293592862935928,
|
|
"grad_norm": 0.594374418258667,
|
|
"learning_rate": 9.323558224912083e-06,
|
|
"loss": 0.7724255323410034,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.6306569343065693,
|
|
"grad_norm": 0.5669406056404114,
|
|
"learning_rate": 9.319962719316621e-06,
|
|
"loss": 0.7348428964614868,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.6319545823195458,
|
|
"grad_norm": 0.6060366630554199,
|
|
"learning_rate": 9.3163583804908e-06,
|
|
"loss": 0.682552695274353,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.6332522303325223,
|
|
"grad_norm": 0.6307089328765869,
|
|
"learning_rate": 9.312745215804577e-06,
|
|
"loss": 0.8117605447769165,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.6345498783454988,
|
|
"grad_norm": 0.5955522656440735,
|
|
"learning_rate": 9.309123232645963e-06,
|
|
"loss": 0.7129393219947815,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.6358475263584753,
|
|
"grad_norm": 0.6481534242630005,
|
|
"learning_rate": 9.305492438420995e-06,
|
|
"loss": 0.6988842487335205,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.6371451743714518,
|
|
"grad_norm": 0.5734648108482361,
|
|
"learning_rate": 9.301852840553728e-06,
|
|
"loss": 0.678565263748169,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.6384428223844282,
|
|
"grad_norm": 0.5938750505447388,
|
|
"learning_rate": 9.298204446486221e-06,
|
|
"loss": 0.7267583608627319,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.6397404703974047,
|
|
"grad_norm": 0.5493259429931641,
|
|
"learning_rate": 9.294547263678515e-06,
|
|
"loss": 0.665608286857605,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.6410381184103812,
|
|
"grad_norm": 0.6349811553955078,
|
|
"learning_rate": 9.29088129960862e-06,
|
|
"loss": 0.7591350078582764,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.6423357664233577,
|
|
"grad_norm": 0.5922753214836121,
|
|
"learning_rate": 9.28720656177251e-06,
|
|
"loss": 0.6984656453132629,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.6436334144363342,
|
|
"grad_norm": 0.5910064578056335,
|
|
"learning_rate": 9.28352305768409e-06,
|
|
"loss": 0.7371819019317627,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.6449310624493106,
|
|
"grad_norm": 0.5690438151359558,
|
|
"learning_rate": 9.279830794875194e-06,
|
|
"loss": 0.7185039520263672,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.6462287104622871,
|
|
"grad_norm": 0.6163010597229004,
|
|
"learning_rate": 9.276129780895566e-06,
|
|
"loss": 0.6993834972381592,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.6475263584752636,
|
|
"grad_norm": 0.6288541555404663,
|
|
"learning_rate": 9.272420023312843e-06,
|
|
"loss": 0.8217408657073975,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.64882400648824,
|
|
"grad_norm": 0.620994508266449,
|
|
"learning_rate": 9.268701529712541e-06,
|
|
"loss": 0.7522677779197693,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.6501216545012165,
|
|
"grad_norm": 0.5998205542564392,
|
|
"learning_rate": 9.264974307698034e-06,
|
|
"loss": 0.6935300827026367,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.651419302514193,
|
|
"grad_norm": 0.8760928511619568,
|
|
"learning_rate": 9.261238364890553e-06,
|
|
"loss": 0.7158179879188538,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.6527169505271695,
|
|
"grad_norm": 0.6253861784934998,
|
|
"learning_rate": 9.257493708929153e-06,
|
|
"loss": 0.7684556841850281,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.654014598540146,
|
|
"grad_norm": 0.6935423016548157,
|
|
"learning_rate": 9.253740347470708e-06,
|
|
"loss": 0.778200626373291,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.6553122465531225,
|
|
"grad_norm": 0.6469247937202454,
|
|
"learning_rate": 9.24997828818989e-06,
|
|
"loss": 0.7509121894836426,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.656609894566099,
|
|
"grad_norm": 0.6015416979789734,
|
|
"learning_rate": 9.246207538779162e-06,
|
|
"loss": 0.7778556942939758,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.6579075425790755,
|
|
"grad_norm": 0.5774285793304443,
|
|
"learning_rate": 9.242428106948748e-06,
|
|
"loss": 0.7515290975570679,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.659205190592052,
|
|
"grad_norm": 0.5681214332580566,
|
|
"learning_rate": 9.238640000426635e-06,
|
|
"loss": 0.7492050528526306,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.6605028386050283,
|
|
"grad_norm": 0.5640445351600647,
|
|
"learning_rate": 9.234843226958537e-06,
|
|
"loss": 0.6927063465118408,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.6618004866180048,
|
|
"grad_norm": 0.6083568334579468,
|
|
"learning_rate": 9.231037794307896e-06,
|
|
"loss": 0.7587168216705322,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.6630981346309813,
|
|
"grad_norm": 0.5821657776832581,
|
|
"learning_rate": 9.22722371025586e-06,
|
|
"loss": 0.7126904726028442,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.6643957826439578,
|
|
"grad_norm": 2.4457342624664307,
|
|
"learning_rate": 9.223400982601262e-06,
|
|
"loss": 0.6615161895751953,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.6656934306569343,
|
|
"grad_norm": 0.6009355187416077,
|
|
"learning_rate": 9.219569619160618e-06,
|
|
"loss": 0.7299069166183472,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.6669910786699108,
|
|
"grad_norm": 0.6069469451904297,
|
|
"learning_rate": 9.215729627768093e-06,
|
|
"loss": 0.7864600419998169,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.6682887266828873,
|
|
"grad_norm": 0.6514759659767151,
|
|
"learning_rate": 9.2118810162755e-06,
|
|
"loss": 0.6937267184257507,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.6695863746958638,
|
|
"grad_norm": 0.795812726020813,
|
|
"learning_rate": 9.20802379255227e-06,
|
|
"loss": 0.704431414604187,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.6708840227088402,
|
|
"grad_norm": 0.6042063236236572,
|
|
"learning_rate": 9.204157964485454e-06,
|
|
"loss": 0.7550405263900757,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.6721816707218167,
|
|
"grad_norm": 0.6756092309951782,
|
|
"learning_rate": 9.200283539979691e-06,
|
|
"loss": 0.7409992218017578,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.6734793187347932,
|
|
"grad_norm": 0.7710636854171753,
|
|
"learning_rate": 9.196400526957198e-06,
|
|
"loss": 0.7560484409332275,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.6747769667477697,
|
|
"grad_norm": 0.7084681987762451,
|
|
"learning_rate": 9.192508933357753e-06,
|
|
"loss": 0.7406056523323059,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.6760746147607462,
|
|
"grad_norm": 0.6131231188774109,
|
|
"learning_rate": 9.188608767138683e-06,
|
|
"loss": 0.7801857590675354,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.6773722627737226,
|
|
"grad_norm": 0.6520926356315613,
|
|
"learning_rate": 9.184700036274837e-06,
|
|
"loss": 0.7538937926292419,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.6786699107866991,
|
|
"grad_norm": 0.5901785492897034,
|
|
"learning_rate": 9.180782748758583e-06,
|
|
"loss": 0.7579227089881897,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.6799675587996756,
|
|
"grad_norm": 0.5867577195167542,
|
|
"learning_rate": 9.17685691259978e-06,
|
|
"loss": 0.7785968780517578,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.681265206812652,
|
|
"grad_norm": 0.6682732105255127,
|
|
"learning_rate": 9.172922535825772e-06,
|
|
"loss": 0.6564942598342896,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.6825628548256285,
|
|
"grad_norm": 0.5923816561698914,
|
|
"learning_rate": 9.168979626481364e-06,
|
|
"loss": 0.7041895985603333,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.683860502838605,
|
|
"grad_norm": 0.5651242136955261,
|
|
"learning_rate": 9.165028192628803e-06,
|
|
"loss": 0.7024134397506714,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.6851581508515815,
|
|
"grad_norm": 0.6138148307800293,
|
|
"learning_rate": 9.161068242347777e-06,
|
|
"loss": 0.680936872959137,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.686455798864558,
|
|
"grad_norm": 0.5655775666236877,
|
|
"learning_rate": 9.157099783735378e-06,
|
|
"loss": 0.6618273854255676,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.6877534468775345,
|
|
"grad_norm": 0.6033377051353455,
|
|
"learning_rate": 9.1531228249061e-06,
|
|
"loss": 0.7136421203613281,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.689051094890511,
|
|
"grad_norm": 0.7331950068473816,
|
|
"learning_rate": 9.149137373991819e-06,
|
|
"loss": 0.7970547676086426,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.6903487429034875,
|
|
"grad_norm": 0.5791338682174683,
|
|
"learning_rate": 9.145143439141771e-06,
|
|
"loss": 0.6997847557067871,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.691646390916464,
|
|
"grad_norm": 0.578549325466156,
|
|
"learning_rate": 9.141141028522544e-06,
|
|
"loss": 0.7562875151634216,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.6929440389294403,
|
|
"grad_norm": 1.920037865638733,
|
|
"learning_rate": 9.137130150318055e-06,
|
|
"loss": 0.6756929755210876,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.6942416869424168,
|
|
"grad_norm": 0.6300271153450012,
|
|
"learning_rate": 9.133110812729532e-06,
|
|
"loss": 0.7216504216194153,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.6955393349553933,
|
|
"grad_norm": 0.6114068031311035,
|
|
"learning_rate": 9.129083023975505e-06,
|
|
"loss": 0.7115483283996582,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.6968369829683698,
|
|
"grad_norm": 0.6002055406570435,
|
|
"learning_rate": 9.125046792291784e-06,
|
|
"loss": 0.7236282229423523,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.6981346309813463,
|
|
"grad_norm": 0.6047035455703735,
|
|
"learning_rate": 9.121002125931436e-06,
|
|
"loss": 0.6811922788619995,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.6994322789943228,
|
|
"grad_norm": 0.6067850589752197,
|
|
"learning_rate": 9.116949033164785e-06,
|
|
"loss": 0.7463216781616211,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.7007299270072993,
|
|
"grad_norm": 0.5822233557701111,
|
|
"learning_rate": 9.112887522279378e-06,
|
|
"loss": 0.7334940433502197,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.7020275750202758,
|
|
"grad_norm": 0.5947557687759399,
|
|
"learning_rate": 9.108817601579978e-06,
|
|
"loss": 0.7504947185516357,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.7033252230332522,
|
|
"grad_norm": 0.6123725771903992,
|
|
"learning_rate": 9.104739279388542e-06,
|
|
"loss": 0.7778276205062866,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.7046228710462287,
|
|
"grad_norm": 0.6185777187347412,
|
|
"learning_rate": 9.100652564044206e-06,
|
|
"loss": 0.7200486660003662,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.7059205190592052,
|
|
"grad_norm": 1.0739803314208984,
|
|
"learning_rate": 9.09655746390327e-06,
|
|
"loss": 0.7538056969642639,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.7072181670721817,
|
|
"grad_norm": 0.5895283818244934,
|
|
"learning_rate": 9.092453987339174e-06,
|
|
"loss": 0.6963307857513428,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.7085158150851582,
|
|
"grad_norm": 0.5688499212265015,
|
|
"learning_rate": 9.088342142742493e-06,
|
|
"loss": 0.7032905220985413,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.7098134630981346,
|
|
"grad_norm": 0.6233918070793152,
|
|
"learning_rate": 9.084221938520906e-06,
|
|
"loss": 0.6713303923606873,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.7111111111111111,
|
|
"grad_norm": 0.7095353007316589,
|
|
"learning_rate": 9.080093383099187e-06,
|
|
"loss": 0.7268386483192444,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.7124087591240876,
|
|
"grad_norm": 0.6135478019714355,
|
|
"learning_rate": 9.07595648491919e-06,
|
|
"loss": 0.7246679663658142,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.7137064071370641,
|
|
"grad_norm": 0.582713782787323,
|
|
"learning_rate": 9.071811252439823e-06,
|
|
"loss": 0.691692590713501,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.7150040551500405,
|
|
"grad_norm": 0.542813241481781,
|
|
"learning_rate": 9.067657694137038e-06,
|
|
"loss": 0.7191475629806519,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.716301703163017,
|
|
"grad_norm": 0.6026738286018372,
|
|
"learning_rate": 9.063495818503809e-06,
|
|
"loss": 0.7817606925964355,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.7175993511759935,
|
|
"grad_norm": 0.8981631398200989,
|
|
"learning_rate": 9.059325634050118e-06,
|
|
"loss": 0.7415137887001038,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.71889699918897,
|
|
"grad_norm": 0.624947190284729,
|
|
"learning_rate": 9.05514714930294e-06,
|
|
"loss": 0.7271240949630737,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.7201946472019465,
|
|
"grad_norm": 0.5546719431877136,
|
|
"learning_rate": 9.050960372806214e-06,
|
|
"loss": 0.698599100112915,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.721492295214923,
|
|
"grad_norm": 0.5948834419250488,
|
|
"learning_rate": 9.046765313120842e-06,
|
|
"loss": 0.7756059169769287,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.7227899432278995,
|
|
"grad_norm": 0.5877026915550232,
|
|
"learning_rate": 9.042561978824657e-06,
|
|
"loss": 0.7625119090080261,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.724087591240876,
|
|
"grad_norm": 0.6063138246536255,
|
|
"learning_rate": 9.038350378512417e-06,
|
|
"loss": 0.7803001403808594,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.7253852392538523,
|
|
"grad_norm": 0.5974534153938293,
|
|
"learning_rate": 9.034130520795774e-06,
|
|
"loss": 0.716859757900238,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.7266828872668288,
|
|
"grad_norm": 0.5728408694267273,
|
|
"learning_rate": 9.029902414303273e-06,
|
|
"loss": 0.749966561794281,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.7279805352798053,
|
|
"grad_norm": 0.5723510384559631,
|
|
"learning_rate": 9.025666067680319e-06,
|
|
"loss": 0.6597641706466675,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.7292781832927818,
|
|
"grad_norm": 0.6084505915641785,
|
|
"learning_rate": 9.021421489589169e-06,
|
|
"loss": 0.710649847984314,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.7305758313057583,
|
|
"grad_norm": 0.5824548006057739,
|
|
"learning_rate": 9.017168688708913e-06,
|
|
"loss": 0.6628729104995728,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.7318734793187348,
|
|
"grad_norm": 0.594218373298645,
|
|
"learning_rate": 9.01290767373545e-06,
|
|
"loss": 0.730206310749054,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.7331711273317113,
|
|
"grad_norm": 0.7261629700660706,
|
|
"learning_rate": 9.008638453381477e-06,
|
|
"loss": 0.6241463422775269,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.7344687753446878,
|
|
"grad_norm": 0.6365723609924316,
|
|
"learning_rate": 9.004361036376472e-06,
|
|
"loss": 0.7979130148887634,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.7357664233576642,
|
|
"grad_norm": 0.6350899934768677,
|
|
"learning_rate": 9.000075431466668e-06,
|
|
"loss": 0.7318904399871826,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.7370640713706407,
|
|
"grad_norm": 0.5833107829093933,
|
|
"learning_rate": 8.995781647415041e-06,
|
|
"loss": 0.6889808177947998,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.7383617193836172,
|
|
"grad_norm": 1.110663652420044,
|
|
"learning_rate": 8.991479693001296e-06,
|
|
"loss": 0.7418273687362671,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.7396593673965937,
|
|
"grad_norm": 0.5860966444015503,
|
|
"learning_rate": 8.987169577021838e-06,
|
|
"loss": 0.7295401096343994,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.7409570154095702,
|
|
"grad_norm": 2.7430782318115234,
|
|
"learning_rate": 8.982851308289765e-06,
|
|
"loss": 0.7898417711257935,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.7422546634225466,
|
|
"grad_norm": 0.6228799223899841,
|
|
"learning_rate": 8.978524895634842e-06,
|
|
"loss": 0.7360432147979736,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.7435523114355231,
|
|
"grad_norm": 0.6052027344703674,
|
|
"learning_rate": 8.974190347903491e-06,
|
|
"loss": 0.7148642539978027,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.7448499594484996,
|
|
"grad_norm": 0.5462301969528198,
|
|
"learning_rate": 8.96984767395876e-06,
|
|
"loss": 0.6608201861381531,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.7461476074614761,
|
|
"grad_norm": 0.6186708211898804,
|
|
"learning_rate": 8.965496882680322e-06,
|
|
"loss": 0.7763011455535889,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.7474452554744525,
|
|
"grad_norm": 0.5678666830062866,
|
|
"learning_rate": 8.961137982964445e-06,
|
|
"loss": 0.6967377662658691,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.748742903487429,
|
|
"grad_norm": 0.5985408425331116,
|
|
"learning_rate": 8.95677098372397e-06,
|
|
"loss": 0.7348828911781311,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.7500405515004055,
|
|
"grad_norm": 0.5867311954498291,
|
|
"learning_rate": 8.95239589388831e-06,
|
|
"loss": 0.7279753684997559,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.751338199513382,
|
|
"grad_norm": 0.5872586369514465,
|
|
"learning_rate": 8.948012722403417e-06,
|
|
"loss": 0.7667936086654663,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.7526358475263585,
|
|
"grad_norm": 0.6062989234924316,
|
|
"learning_rate": 8.943621478231764e-06,
|
|
"loss": 0.7433009147644043,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.753933495539335,
|
|
"grad_norm": 0.5952759981155396,
|
|
"learning_rate": 8.939222170352333e-06,
|
|
"loss": 0.7213162183761597,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.7552311435523115,
|
|
"grad_norm": 0.6251077651977539,
|
|
"learning_rate": 8.9348148077606e-06,
|
|
"loss": 0.6798166632652283,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.756528791565288,
|
|
"grad_norm": 0.6643015742301941,
|
|
"learning_rate": 8.9303993994685e-06,
|
|
"loss": 0.697973370552063,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.7578264395782643,
|
|
"grad_norm": 0.614818274974823,
|
|
"learning_rate": 8.925975954504432e-06,
|
|
"loss": 0.6740398406982422,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.7591240875912408,
|
|
"grad_norm": 0.5874298214912415,
|
|
"learning_rate": 8.921544481913218e-06,
|
|
"loss": 0.6789122819900513,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.7604217356042173,
|
|
"grad_norm": 0.5964909791946411,
|
|
"learning_rate": 8.917104990756096e-06,
|
|
"loss": 0.7620725631713867,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.7617193836171938,
|
|
"grad_norm": 0.6049628853797913,
|
|
"learning_rate": 8.912657490110705e-06,
|
|
"loss": 0.7080841064453125,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.7630170316301703,
|
|
"grad_norm": 0.5781946778297424,
|
|
"learning_rate": 8.908201989071055e-06,
|
|
"loss": 0.7524607181549072,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.7643146796431468,
|
|
"grad_norm": 0.585602879524231,
|
|
"learning_rate": 8.903738496747523e-06,
|
|
"loss": 0.775031566619873,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.7656123276561233,
|
|
"grad_norm": 0.5722633004188538,
|
|
"learning_rate": 8.899267022266815e-06,
|
|
"loss": 0.7250426411628723,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.7669099756690998,
|
|
"grad_norm": 0.5955145359039307,
|
|
"learning_rate": 8.894787574771968e-06,
|
|
"loss": 0.7013397216796875,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.7682076236820763,
|
|
"grad_norm": 0.5935817956924438,
|
|
"learning_rate": 8.890300163422319e-06,
|
|
"loss": 0.7290763854980469,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.7695052716950527,
|
|
"grad_norm": 0.5822441577911377,
|
|
"learning_rate": 8.885804797393484e-06,
|
|
"loss": 0.7267876863479614,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.7708029197080292,
|
|
"grad_norm": 0.6610195636749268,
|
|
"learning_rate": 8.881301485877355e-06,
|
|
"loss": 0.7642419338226318,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.7721005677210057,
|
|
"grad_norm": 0.5827111005783081,
|
|
"learning_rate": 8.87679023808206e-06,
|
|
"loss": 0.6633021831512451,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.7733982157339822,
|
|
"grad_norm": 0.5982354283332825,
|
|
"learning_rate": 8.87227106323196e-06,
|
|
"loss": 0.7427453994750977,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.7746958637469586,
|
|
"grad_norm": 0.5927367210388184,
|
|
"learning_rate": 8.867743970567625e-06,
|
|
"loss": 0.6740269660949707,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.7759935117599351,
|
|
"grad_norm": 0.5812351703643799,
|
|
"learning_rate": 8.86320896934581e-06,
|
|
"loss": 0.7781720161437988,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.7772911597729116,
|
|
"grad_norm": 0.5589850544929504,
|
|
"learning_rate": 8.858666068839447e-06,
|
|
"loss": 0.6646384000778198,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.7785888077858881,
|
|
"grad_norm": 0.6152946352958679,
|
|
"learning_rate": 8.85411527833762e-06,
|
|
"loss": 0.7158241868019104,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.7798864557988645,
|
|
"grad_norm": 0.6571215987205505,
|
|
"learning_rate": 8.849556607145541e-06,
|
|
"loss": 0.6301259994506836,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.781184103811841,
|
|
"grad_norm": 0.650355339050293,
|
|
"learning_rate": 8.84499006458454e-06,
|
|
"loss": 0.7729838490486145,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.7824817518248175,
|
|
"grad_norm": 0.5668020844459534,
|
|
"learning_rate": 8.840415659992038e-06,
|
|
"loss": 0.7071006298065186,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.783779399837794,
|
|
"grad_norm": 0.5940731763839722,
|
|
"learning_rate": 8.835833402721538e-06,
|
|
"loss": 0.709991991519928,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.7850770478507705,
|
|
"grad_norm": 0.6069549918174744,
|
|
"learning_rate": 8.831243302142595e-06,
|
|
"loss": 0.7425503730773926,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.786374695863747,
|
|
"grad_norm": 0.6917547583580017,
|
|
"learning_rate": 8.826645367640803e-06,
|
|
"loss": 0.7509415149688721,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.7876723438767235,
|
|
"grad_norm": 0.5669399499893188,
|
|
"learning_rate": 8.822039608617773e-06,
|
|
"loss": 0.7422374486923218,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.7889699918897,
|
|
"grad_norm": 0.5998254418373108,
|
|
"learning_rate": 8.81742603449112e-06,
|
|
"loss": 0.6498250961303711,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.7902676399026763,
|
|
"grad_norm": 0.5784206390380859,
|
|
"learning_rate": 8.81280465469443e-06,
|
|
"loss": 0.7794440388679504,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.7915652879156528,
|
|
"grad_norm": 0.5644393563270569,
|
|
"learning_rate": 8.808175478677261e-06,
|
|
"loss": 0.697083055973053,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.7928629359286293,
|
|
"grad_norm": 0.5781574249267578,
|
|
"learning_rate": 8.803538515905102e-06,
|
|
"loss": 0.6970184445381165,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.7941605839416058,
|
|
"grad_norm": 0.585652768611908,
|
|
"learning_rate": 8.79889377585937e-06,
|
|
"loss": 0.7602633833885193,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.7954582319545823,
|
|
"grad_norm": 0.5716352462768555,
|
|
"learning_rate": 8.79424126803738e-06,
|
|
"loss": 0.717863142490387,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.7967558799675588,
|
|
"grad_norm": 0.5922728776931763,
|
|
"learning_rate": 8.789581001952339e-06,
|
|
"loss": 0.7333586812019348,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.7980535279805353,
|
|
"grad_norm": 0.7918326258659363,
|
|
"learning_rate": 8.784912987133305e-06,
|
|
"loss": 0.7329719066619873,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.7993511759935118,
|
|
"grad_norm": 0.6318597793579102,
|
|
"learning_rate": 8.78023723312519e-06,
|
|
"loss": 0.71714848279953,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.8006488240064883,
|
|
"grad_norm": 0.5931165814399719,
|
|
"learning_rate": 8.775553749488729e-06,
|
|
"loss": 0.6446089744567871,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.8019464720194647,
|
|
"grad_norm": 0.5699899196624756,
|
|
"learning_rate": 8.770862545800459e-06,
|
|
"loss": 0.6896922588348389,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.8032441200324412,
|
|
"grad_norm": 0.5788043141365051,
|
|
"learning_rate": 8.766163631652702e-06,
|
|
"loss": 0.7116216421127319,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.8045417680454177,
|
|
"grad_norm": 0.6152717471122742,
|
|
"learning_rate": 8.76145701665355e-06,
|
|
"loss": 0.7757282853126526,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.8058394160583942,
|
|
"grad_norm": 0.6117092967033386,
|
|
"learning_rate": 8.756742710426842e-06,
|
|
"loss": 0.6977071166038513,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.8071370640713706,
|
|
"grad_norm": 0.5893334150314331,
|
|
"learning_rate": 8.752020722612135e-06,
|
|
"loss": 0.7122848033905029,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.8084347120843471,
|
|
"grad_norm": 0.613097608089447,
|
|
"learning_rate": 8.747291062864704e-06,
|
|
"loss": 0.7448244094848633,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.8097323600973236,
|
|
"grad_norm": 0.5860653519630432,
|
|
"learning_rate": 8.742553740855507e-06,
|
|
"loss": 0.6702634692192078,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.8110300081103001,
|
|
"grad_norm": 0.6024116277694702,
|
|
"learning_rate": 8.737808766271163e-06,
|
|
"loss": 0.6898221969604492,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.8123276561232765,
|
|
"grad_norm": 0.5622679591178894,
|
|
"learning_rate": 8.733056148813947e-06,
|
|
"loss": 0.7181109189987183,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.813625304136253,
|
|
"grad_norm": 0.595656156539917,
|
|
"learning_rate": 8.728295898201762e-06,
|
|
"loss": 0.7352790832519531,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.8149229521492295,
|
|
"grad_norm": 0.5798142552375793,
|
|
"learning_rate": 8.72352802416811e-06,
|
|
"loss": 0.6691849231719971,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.816220600162206,
|
|
"grad_norm": 0.6328383088111877,
|
|
"learning_rate": 8.718752536462089e-06,
|
|
"loss": 0.7578571438789368,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.8175182481751825,
|
|
"grad_norm": 0.6140182018280029,
|
|
"learning_rate": 8.713969444848365e-06,
|
|
"loss": 0.8000912666320801,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.818815896188159,
|
|
"grad_norm": 0.5924091935157776,
|
|
"learning_rate": 8.709178759107146e-06,
|
|
"loss": 0.7412709593772888,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.8201135442011355,
|
|
"grad_norm": 0.5865992903709412,
|
|
"learning_rate": 8.704380489034172e-06,
|
|
"loss": 0.6817134022712708,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.821411192214112,
|
|
"grad_norm": 0.6066908240318298,
|
|
"learning_rate": 8.699574644440696e-06,
|
|
"loss": 0.7462890148162842,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.8227088402270885,
|
|
"grad_norm": 0.6996213793754578,
|
|
"learning_rate": 8.694761235153446e-06,
|
|
"loss": 0.7541388273239136,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.8240064882400648,
|
|
"grad_norm": 0.5837500691413879,
|
|
"learning_rate": 8.689940271014631e-06,
|
|
"loss": 0.7211518883705139,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.8253041362530413,
|
|
"grad_norm": 0.6041287183761597,
|
|
"learning_rate": 8.685111761881902e-06,
|
|
"loss": 0.7510079741477966,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.8266017842660178,
|
|
"grad_norm": 0.5609418153762817,
|
|
"learning_rate": 8.680275717628336e-06,
|
|
"loss": 0.7399103045463562,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.8278994322789943,
|
|
"grad_norm": 0.6362541913986206,
|
|
"learning_rate": 8.675432148142423e-06,
|
|
"loss": 0.7379388809204102,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.8291970802919708,
|
|
"grad_norm": 0.555855393409729,
|
|
"learning_rate": 8.670581063328031e-06,
|
|
"loss": 0.6878998279571533,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.8304947283049473,
|
|
"grad_norm": 0.5522022843360901,
|
|
"learning_rate": 8.665722473104407e-06,
|
|
"loss": 0.6912398338317871,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.8317923763179238,
|
|
"grad_norm": 0.6348553895950317,
|
|
"learning_rate": 8.660856387406134e-06,
|
|
"loss": 0.7144729495048523,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.8330900243309003,
|
|
"grad_norm": 0.5787035226821899,
|
|
"learning_rate": 8.655982816183127e-06,
|
|
"loss": 0.7252941727638245,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.8343876723438767,
|
|
"grad_norm": 1.6580746173858643,
|
|
"learning_rate": 8.651101769400606e-06,
|
|
"loss": 0.7200146913528442,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.8356853203568532,
|
|
"grad_norm": 1.0832597017288208,
|
|
"learning_rate": 8.646213257039076e-06,
|
|
"loss": 0.7684627771377563,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.8369829683698297,
|
|
"grad_norm": 1.513912320137024,
|
|
"learning_rate": 8.641317289094306e-06,
|
|
"loss": 0.7325241565704346,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.8382806163828062,
|
|
"grad_norm": 0.6023765802383423,
|
|
"learning_rate": 8.636413875577314e-06,
|
|
"loss": 0.74098801612854,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.8395782643957826,
|
|
"grad_norm": 0.6051165461540222,
|
|
"learning_rate": 8.631503026514337e-06,
|
|
"loss": 0.6847478151321411,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.8408759124087591,
|
|
"grad_norm": 0.5932079553604126,
|
|
"learning_rate": 8.626584751946818e-06,
|
|
"loss": 0.731514036655426,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.8421735604217356,
|
|
"grad_norm": 0.592435359954834,
|
|
"learning_rate": 8.621659061931389e-06,
|
|
"loss": 0.7055472731590271,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.8434712084347121,
|
|
"grad_norm": 2.370189905166626,
|
|
"learning_rate": 8.616725966539831e-06,
|
|
"loss": 0.6948425769805908,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.8447688564476885,
|
|
"grad_norm": 0.6067817807197571,
|
|
"learning_rate": 8.611785475859083e-06,
|
|
"loss": 0.7035855650901794,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.846066504460665,
|
|
"grad_norm": 0.6086214780807495,
|
|
"learning_rate": 8.606837599991194e-06,
|
|
"loss": 0.7720967531204224,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.8473641524736415,
|
|
"grad_norm": 0.5939242243766785,
|
|
"learning_rate": 8.601882349053318e-06,
|
|
"loss": 0.7347517609596252,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.848661800486618,
|
|
"grad_norm": 0.6451635360717773,
|
|
"learning_rate": 8.596919733177692e-06,
|
|
"loss": 0.6510732173919678,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.8499594484995945,
|
|
"grad_norm": 0.6460222601890564,
|
|
"learning_rate": 8.591949762511606e-06,
|
|
"loss": 0.6970388293266296,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.851257096512571,
|
|
"grad_norm": 0.5829662084579468,
|
|
"learning_rate": 8.586972447217392e-06,
|
|
"loss": 0.6706767678260803,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.8525547445255475,
|
|
"grad_norm": 0.5833383798599243,
|
|
"learning_rate": 8.581987797472404e-06,
|
|
"loss": 0.7589589357376099,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.853852392538524,
|
|
"grad_norm": 0.5842010974884033,
|
|
"learning_rate": 8.576995823468984e-06,
|
|
"loss": 0.7162166833877563,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.8551500405515005,
|
|
"grad_norm": 0.5614502429962158,
|
|
"learning_rate": 8.571996535414457e-06,
|
|
"loss": 0.6840311288833618,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.8564476885644768,
|
|
"grad_norm": 0.5722468495368958,
|
|
"learning_rate": 8.566989943531106e-06,
|
|
"loss": 0.7161433100700378,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.8577453365774533,
|
|
"grad_norm": 0.6029196977615356,
|
|
"learning_rate": 8.561976058056138e-06,
|
|
"loss": 0.7230268716812134,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.8590429845904298,
|
|
"grad_norm": 0.5787186622619629,
|
|
"learning_rate": 8.556954889241682e-06,
|
|
"loss": 0.7280833721160889,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.8603406326034063,
|
|
"grad_norm": 0.6488873362541199,
|
|
"learning_rate": 8.551926447354759e-06,
|
|
"loss": 0.6804985404014587,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.8616382806163828,
|
|
"grad_norm": 0.5842364430427551,
|
|
"learning_rate": 8.546890742677259e-06,
|
|
"loss": 0.669411301612854,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.8629359286293593,
|
|
"grad_norm": 0.5956006646156311,
|
|
"learning_rate": 8.541847785505921e-06,
|
|
"loss": 0.7321279048919678,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.8642335766423358,
|
|
"grad_norm": 3.8146164417266846,
|
|
"learning_rate": 8.53679758615232e-06,
|
|
"loss": 0.693459153175354,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.8655312246553123,
|
|
"grad_norm": 0.7075020670890808,
|
|
"learning_rate": 8.531740154942834e-06,
|
|
"loss": 0.6751031875610352,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.8668288726682887,
|
|
"grad_norm": 0.5840404629707336,
|
|
"learning_rate": 8.526675502218629e-06,
|
|
"loss": 0.7010972499847412,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.8681265206812652,
|
|
"grad_norm": 0.5663997530937195,
|
|
"learning_rate": 8.521603638335638e-06,
|
|
"loss": 0.7152513265609741,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.8694241686942417,
|
|
"grad_norm": 0.58479243516922,
|
|
"learning_rate": 8.516524573664539e-06,
|
|
"loss": 0.7431036233901978,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.8707218167072182,
|
|
"grad_norm": 0.5867894887924194,
|
|
"learning_rate": 8.511438318590735e-06,
|
|
"loss": 0.6411721706390381,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.8720194647201946,
|
|
"grad_norm": 0.595013439655304,
|
|
"learning_rate": 8.506344883514328e-06,
|
|
"loss": 0.6847820281982422,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.8733171127331711,
|
|
"grad_norm": 0.6092846989631653,
|
|
"learning_rate": 8.501244278850105e-06,
|
|
"loss": 0.7914074659347534,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.8746147607461476,
|
|
"grad_norm": 0.6108312606811523,
|
|
"learning_rate": 8.496136515027511e-06,
|
|
"loss": 0.7064344882965088,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.8759124087591241,
|
|
"grad_norm": 0.6098673343658447,
|
|
"learning_rate": 8.491021602490632e-06,
|
|
"loss": 0.7082339525222778,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.8772100567721006,
|
|
"grad_norm": 0.5852345824241638,
|
|
"learning_rate": 8.485899551698166e-06,
|
|
"loss": 0.6980363130569458,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.878507704785077,
|
|
"grad_norm": 0.60945725440979,
|
|
"learning_rate": 8.480770373123415e-06,
|
|
"loss": 0.7337608933448792,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.8798053527980535,
|
|
"grad_norm": 0.5622206926345825,
|
|
"learning_rate": 8.475634077254248e-06,
|
|
"loss": 0.7212387919425964,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.88110300081103,
|
|
"grad_norm": 1.9474778175354004,
|
|
"learning_rate": 8.470490674593091e-06,
|
|
"loss": 0.7507941722869873,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.8824006488240065,
|
|
"grad_norm": 0.5891706943511963,
|
|
"learning_rate": 8.4653401756569e-06,
|
|
"loss": 0.72685706615448,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.883698296836983,
|
|
"grad_norm": 0.5848804116249084,
|
|
"learning_rate": 8.460182590977142e-06,
|
|
"loss": 0.7391736507415771,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.8849959448499595,
|
|
"grad_norm": 0.5995469093322754,
|
|
"learning_rate": 8.455017931099772e-06,
|
|
"loss": 0.7077188491821289,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.886293592862936,
|
|
"grad_norm": 0.5778690576553345,
|
|
"learning_rate": 8.449846206585211e-06,
|
|
"loss": 0.7160015106201172,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.8875912408759125,
|
|
"grad_norm": 0.6114044785499573,
|
|
"learning_rate": 8.44466742800833e-06,
|
|
"loss": 0.7118149995803833,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.8888888888888888,
|
|
"grad_norm": 0.5748172998428345,
|
|
"learning_rate": 8.439481605958416e-06,
|
|
"loss": 0.7232242822647095,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.8901865369018653,
|
|
"grad_norm": 0.9608264565467834,
|
|
"learning_rate": 8.434288751039168e-06,
|
|
"loss": 0.7293300032615662,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.8914841849148418,
|
|
"grad_norm": 0.5927110910415649,
|
|
"learning_rate": 8.429088873868656e-06,
|
|
"loss": 0.7629004716873169,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.8927818329278183,
|
|
"grad_norm": 0.5677574872970581,
|
|
"learning_rate": 8.423881985079315e-06,
|
|
"loss": 0.6493050456047058,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.8940794809407948,
|
|
"grad_norm": 0.5510875582695007,
|
|
"learning_rate": 8.418668095317912e-06,
|
|
"loss": 0.6685976386070251,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.8953771289537713,
|
|
"grad_norm": 0.5691307187080383,
|
|
"learning_rate": 8.413447215245534e-06,
|
|
"loss": 0.7029674053192139,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.8953771289537713,
|
|
"eval_loss": 0.6914051175117493,
|
|
"eval_runtime": 73.0841,
|
|
"eval_samples_per_second": 71.041,
|
|
"eval_steps_per_second": 8.88,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.8966747769667478,
|
|
"grad_norm": 0.5947213172912598,
|
|
"learning_rate": 8.408219355537557e-06,
|
|
"loss": 0.7144750356674194,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.8979724249797243,
|
|
"grad_norm": 0.6758149266242981,
|
|
"learning_rate": 8.402984526883635e-06,
|
|
"loss": 0.7232916355133057,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.8992700729927007,
|
|
"grad_norm": 0.6068633198738098,
|
|
"learning_rate": 8.397742739987664e-06,
|
|
"loss": 0.6896466612815857,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.9005677210056772,
|
|
"grad_norm": 0.5855746865272522,
|
|
"learning_rate": 8.392494005567773e-06,
|
|
"loss": 0.7137375473976135,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.9018653690186537,
|
|
"grad_norm": 0.6378610134124756,
|
|
"learning_rate": 8.387238334356294e-06,
|
|
"loss": 0.6991242170333862,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.9031630170316302,
|
|
"grad_norm": 0.5615161657333374,
|
|
"learning_rate": 8.381975737099745e-06,
|
|
"loss": 0.7315720319747925,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.9044606650446066,
|
|
"grad_norm": 0.5945183634757996,
|
|
"learning_rate": 8.376706224558807e-06,
|
|
"loss": 0.7387629151344299,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.9057583130575831,
|
|
"grad_norm": 0.5757802724838257,
|
|
"learning_rate": 8.3714298075083e-06,
|
|
"loss": 0.769163191318512,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.9070559610705596,
|
|
"grad_norm": 0.6023557186126709,
|
|
"learning_rate": 8.366146496737158e-06,
|
|
"loss": 0.7032333016395569,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.9083536090835361,
|
|
"grad_norm": 0.5623191595077515,
|
|
"learning_rate": 8.360856303048417e-06,
|
|
"loss": 0.688059389591217,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.9096512570965126,
|
|
"grad_norm": 0.6660424470901489,
|
|
"learning_rate": 8.355559237259181e-06,
|
|
"loss": 0.6570596098899841,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.910948905109489,
|
|
"grad_norm": 0.6358682513237,
|
|
"learning_rate": 8.350255310200611e-06,
|
|
"loss": 0.6851440668106079,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.9122465531224655,
|
|
"grad_norm": 0.5915968418121338,
|
|
"learning_rate": 8.344944532717898e-06,
|
|
"loss": 0.7370898127555847,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.913544201135442,
|
|
"grad_norm": 0.6724914908409119,
|
|
"learning_rate": 8.339626915670234e-06,
|
|
"loss": 0.6419695615768433,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.9148418491484185,
|
|
"grad_norm": 0.5758830308914185,
|
|
"learning_rate": 8.3343024699308e-06,
|
|
"loss": 0.7100552320480347,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.916139497161395,
|
|
"grad_norm": 0.5856196284294128,
|
|
"learning_rate": 8.328971206386742e-06,
|
|
"loss": 0.7285655736923218,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.9174371451743715,
|
|
"grad_norm": 0.6096091270446777,
|
|
"learning_rate": 8.323633135939145e-06,
|
|
"loss": 0.7508881092071533,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.918734793187348,
|
|
"grad_norm": 0.5876352787017822,
|
|
"learning_rate": 8.318288269503007e-06,
|
|
"loss": 0.7147477865219116,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.9200324412003245,
|
|
"grad_norm": 0.5633363127708435,
|
|
"learning_rate": 8.312936618007232e-06,
|
|
"loss": 0.7191579937934875,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.9213300892133008,
|
|
"grad_norm": 0.6324480772018433,
|
|
"learning_rate": 8.307578192394592e-06,
|
|
"loss": 0.6980431079864502,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.9226277372262773,
|
|
"grad_norm": 0.559508740901947,
|
|
"learning_rate": 8.30221300362171e-06,
|
|
"loss": 0.6977928280830383,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.9239253852392538,
|
|
"grad_norm": 0.5924115180969238,
|
|
"learning_rate": 8.29684106265904e-06,
|
|
"loss": 0.7254680395126343,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.9252230332522303,
|
|
"grad_norm": 0.5572075843811035,
|
|
"learning_rate": 8.291462380490842e-06,
|
|
"loss": 0.7060861587524414,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.9265206812652068,
|
|
"grad_norm": 0.5710304975509644,
|
|
"learning_rate": 8.286076968115158e-06,
|
|
"loss": 0.6528699398040771,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.9278183292781833,
|
|
"grad_norm": 0.7677385210990906,
|
|
"learning_rate": 8.280684836543794e-06,
|
|
"loss": 0.7742418646812439,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.9291159772911598,
|
|
"grad_norm": 0.5909350514411926,
|
|
"learning_rate": 8.275285996802293e-06,
|
|
"loss": 0.7355895042419434,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.9304136253041363,
|
|
"grad_norm": 0.6246051788330078,
|
|
"learning_rate": 8.269880459929919e-06,
|
|
"loss": 0.7119331955909729,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.9317112733171128,
|
|
"grad_norm": 1.3237872123718262,
|
|
"learning_rate": 8.264468236979626e-06,
|
|
"loss": 0.724329948425293,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.9330089213300892,
|
|
"grad_norm": 0.6042487621307373,
|
|
"learning_rate": 8.259049339018036e-06,
|
|
"loss": 0.7507586479187012,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.9343065693430657,
|
|
"grad_norm": 0.6646915078163147,
|
|
"learning_rate": 8.25362377712543e-06,
|
|
"loss": 0.7630937695503235,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.9356042173560422,
|
|
"grad_norm": 1.2076338529586792,
|
|
"learning_rate": 8.248191562395703e-06,
|
|
"loss": 0.6889426708221436,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.9369018653690186,
|
|
"grad_norm": 0.7128719091415405,
|
|
"learning_rate": 8.242752705936363e-06,
|
|
"loss": 0.7193243503570557,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.9381995133819951,
|
|
"grad_norm": 0.5779634714126587,
|
|
"learning_rate": 8.237307218868493e-06,
|
|
"loss": 0.7252578735351562,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.9394971613949716,
|
|
"grad_norm": 0.5774085521697998,
|
|
"learning_rate": 8.231855112326738e-06,
|
|
"loss": 0.7056664228439331,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.9407948094079481,
|
|
"grad_norm": 0.5759864449501038,
|
|
"learning_rate": 8.226396397459272e-06,
|
|
"loss": 0.7182119488716125,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.9420924574209246,
|
|
"grad_norm": 0.5475362539291382,
|
|
"learning_rate": 8.22093108542779e-06,
|
|
"loss": 0.7100398540496826,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.943390105433901,
|
|
"grad_norm": 0.6080360412597656,
|
|
"learning_rate": 8.215459187407468e-06,
|
|
"loss": 0.7540023326873779,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.9446877534468775,
|
|
"grad_norm": 0.5985339283943176,
|
|
"learning_rate": 8.209980714586955e-06,
|
|
"loss": 0.7655041217803955,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.945985401459854,
|
|
"grad_norm": 0.5587835311889648,
|
|
"learning_rate": 8.20449567816834e-06,
|
|
"loss": 0.7308551669120789,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.9472830494728305,
|
|
"grad_norm": 0.5767388939857483,
|
|
"learning_rate": 8.199004089367136e-06,
|
|
"loss": 0.7747267484664917,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.948580697485807,
|
|
"grad_norm": 0.5542681217193604,
|
|
"learning_rate": 8.193505959412246e-06,
|
|
"loss": 0.7009122371673584,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.9498783454987835,
|
|
"grad_norm": 0.7035977244377136,
|
|
"learning_rate": 8.188001299545963e-06,
|
|
"loss": 0.7160595655441284,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.95117599351176,
|
|
"grad_norm": 3.6369824409484863,
|
|
"learning_rate": 8.182490121023918e-06,
|
|
"loss": 0.7146700620651245,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.9524736415247365,
|
|
"grad_norm": 0.6017202734947205,
|
|
"learning_rate": 8.176972435115075e-06,
|
|
"loss": 0.7427970170974731,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.9537712895377128,
|
|
"grad_norm": 0.5797709822654724,
|
|
"learning_rate": 8.17144825310171e-06,
|
|
"loss": 0.7534258365631104,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.9550689375506893,
|
|
"grad_norm": 0.6132066249847412,
|
|
"learning_rate": 8.165917586279374e-06,
|
|
"loss": 0.6742781400680542,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.9563665855636658,
|
|
"grad_norm": 0.5700656175613403,
|
|
"learning_rate": 8.16038044595688e-06,
|
|
"loss": 0.7190455794334412,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.9576642335766423,
|
|
"grad_norm": 0.5793234705924988,
|
|
"learning_rate": 8.15483684345628e-06,
|
|
"loss": 0.7258193492889404,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.9589618815896188,
|
|
"grad_norm": 0.589043378829956,
|
|
"learning_rate": 8.149286790112838e-06,
|
|
"loss": 0.6817978620529175,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.9602595296025953,
|
|
"grad_norm": 0.5883787870407104,
|
|
"learning_rate": 8.143730297275008e-06,
|
|
"loss": 0.6951944828033447,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.9615571776155718,
|
|
"grad_norm": 0.6058008074760437,
|
|
"learning_rate": 8.138167376304411e-06,
|
|
"loss": 0.7065063118934631,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.9628548256285483,
|
|
"grad_norm": 0.5645580291748047,
|
|
"learning_rate": 8.132598038575814e-06,
|
|
"loss": 0.6607494354248047,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.9641524736415248,
|
|
"grad_norm": 0.5984307527542114,
|
|
"learning_rate": 8.1270222954771e-06,
|
|
"loss": 0.7731702327728271,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.9654501216545012,
|
|
"grad_norm": 0.5940436124801636,
|
|
"learning_rate": 8.121440158409255e-06,
|
|
"loss": 0.7217580080032349,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.9667477696674777,
|
|
"grad_norm": 0.6139102578163147,
|
|
"learning_rate": 8.115851638786335e-06,
|
|
"loss": 0.761775553226471,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.9680454176804542,
|
|
"grad_norm": 0.5621196627616882,
|
|
"learning_rate": 8.11025674803545e-06,
|
|
"loss": 0.7084890007972717,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.9693430656934306,
|
|
"grad_norm": 0.634238064289093,
|
|
"learning_rate": 8.104655497596734e-06,
|
|
"loss": 0.7413675785064697,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.9706407137064071,
|
|
"grad_norm": 0.6062578558921814,
|
|
"learning_rate": 8.099047898923326e-06,
|
|
"loss": 0.6940469741821289,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.9719383617193836,
|
|
"grad_norm": 1.2983204126358032,
|
|
"learning_rate": 8.093433963481348e-06,
|
|
"loss": 0.7091077566146851,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.9732360097323601,
|
|
"grad_norm": 0.5655047297477722,
|
|
"learning_rate": 8.087813702749873e-06,
|
|
"loss": 0.7066688537597656,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.9745336577453366,
|
|
"grad_norm": 0.6067200303077698,
|
|
"learning_rate": 8.082187128220918e-06,
|
|
"loss": 0.7150874137878418,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.975831305758313,
|
|
"grad_norm": 0.5860595107078552,
|
|
"learning_rate": 8.076554251399398e-06,
|
|
"loss": 0.7268061637878418,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.9771289537712895,
|
|
"grad_norm": 0.5691843628883362,
|
|
"learning_rate": 8.070915083803124e-06,
|
|
"loss": 0.7130003571510315,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.978426601784266,
|
|
"grad_norm": 0.5511523485183716,
|
|
"learning_rate": 8.065269636962765e-06,
|
|
"loss": 0.7632818222045898,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.9797242497972425,
|
|
"grad_norm": 0.9720051884651184,
|
|
"learning_rate": 8.059617922421832e-06,
|
|
"loss": 0.6920190453529358,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.981021897810219,
|
|
"grad_norm": 0.9689953327178955,
|
|
"learning_rate": 8.053959951736647e-06,
|
|
"loss": 0.7026671171188354,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.9823195458231955,
|
|
"grad_norm": 0.5877639055252075,
|
|
"learning_rate": 8.048295736476332e-06,
|
|
"loss": 0.7458422780036926,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.983617193836172,
|
|
"grad_norm": 0.5555517077445984,
|
|
"learning_rate": 8.042625288222774e-06,
|
|
"loss": 0.6832958459854126,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.9849148418491485,
|
|
"grad_norm": 0.5778935551643372,
|
|
"learning_rate": 8.036948618570601e-06,
|
|
"loss": 0.6715413331985474,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.986212489862125,
|
|
"grad_norm": 0.5913302898406982,
|
|
"learning_rate": 8.031265739127167e-06,
|
|
"loss": 0.6345862150192261,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.9875101378751013,
|
|
"grad_norm": 0.5491726994514465,
|
|
"learning_rate": 8.025576661512524e-06,
|
|
"loss": 0.6723500490188599,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.9888077858880778,
|
|
"grad_norm": 0.5520846247673035,
|
|
"learning_rate": 8.019881397359395e-06,
|
|
"loss": 0.7205091118812561,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.9901054339010543,
|
|
"grad_norm": 0.5902574062347412,
|
|
"learning_rate": 8.014179958313154e-06,
|
|
"loss": 0.7127419114112854,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.9914030819140308,
|
|
"grad_norm": 0.5558638572692871,
|
|
"learning_rate": 8.008472356031795e-06,
|
|
"loss": 0.6300485134124756,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.9927007299270073,
|
|
"grad_norm": 0.5584984421730042,
|
|
"learning_rate": 8.00275860218593e-06,
|
|
"loss": 0.6915569305419922,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.9939983779399838,
|
|
"grad_norm": 0.5804587006568909,
|
|
"learning_rate": 7.99703870845873e-06,
|
|
"loss": 0.7401936054229736,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.9952960259529603,
|
|
"grad_norm": 0.562065064907074,
|
|
"learning_rate": 7.991312686545939e-06,
|
|
"loss": 0.6845479011535645,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.9965936739659368,
|
|
"grad_norm": 0.5887646079063416,
|
|
"learning_rate": 7.985580548155814e-06,
|
|
"loss": 0.7238905429840088,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.9978913219789132,
|
|
"grad_norm": 0.70610111951828,
|
|
"learning_rate": 7.979842305009133e-06,
|
|
"loss": 0.6573514342308044,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.9991889699918897,
|
|
"grad_norm": 0.5765895843505859,
|
|
"learning_rate": 7.974097968839149e-06,
|
|
"loss": 0.6816248297691345,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.7250688672065735,
|
|
"learning_rate": 7.968347551391574e-06,
|
|
"loss": 0.6674489974975586,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 1.0012976480129765,
|
|
"grad_norm": 0.6792595982551575,
|
|
"learning_rate": 7.962591064424558e-06,
|
|
"loss": 0.6514409184455872,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 1.002595296025953,
|
|
"grad_norm": 0.7125512361526489,
|
|
"learning_rate": 7.95682851970866e-06,
|
|
"loss": 0.6212759613990784,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 1.0038929440389295,
|
|
"grad_norm": 0.6438767313957214,
|
|
"learning_rate": 7.951059929026826e-06,
|
|
"loss": 0.6282512545585632,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 1.005190592051906,
|
|
"grad_norm": 0.572353720664978,
|
|
"learning_rate": 7.94528530417436e-06,
|
|
"loss": 0.6370311379432678,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 1.0064882400648825,
|
|
"grad_norm": 0.5794159173965454,
|
|
"learning_rate": 7.939504656958913e-06,
|
|
"loss": 0.6351627707481384,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 1.007785888077859,
|
|
"grad_norm": 0.6709707379341125,
|
|
"learning_rate": 7.933717999200442e-06,
|
|
"loss": 0.7240197658538818,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 1.0090835360908355,
|
|
"grad_norm": 0.7591879963874817,
|
|
"learning_rate": 7.927925342731202e-06,
|
|
"loss": 0.662930428981781,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 1.010381184103812,
|
|
"grad_norm": 0.6731166243553162,
|
|
"learning_rate": 7.922126699395705e-06,
|
|
"loss": 0.6665748357772827,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 1.0116788321167882,
|
|
"grad_norm": 0.6249240040779114,
|
|
"learning_rate": 7.916322081050708e-06,
|
|
"loss": 0.6313880681991577,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.0129764801297647,
|
|
"grad_norm": 0.6070784330368042,
|
|
"learning_rate": 7.910511499565192e-06,
|
|
"loss": 0.5778607130050659,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 1.0142741281427412,
|
|
"grad_norm": 0.5682867765426636,
|
|
"learning_rate": 7.90469496682032e-06,
|
|
"loss": 0.5984998941421509,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 1.0155717761557177,
|
|
"grad_norm": 0.5944799184799194,
|
|
"learning_rate": 7.89887249470943e-06,
|
|
"loss": 0.6242648363113403,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 1.0168694241686942,
|
|
"grad_norm": 0.8286924958229065,
|
|
"learning_rate": 7.89304409513801e-06,
|
|
"loss": 0.612074613571167,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 1.0181670721816707,
|
|
"grad_norm": 0.6117927432060242,
|
|
"learning_rate": 7.887209780023652e-06,
|
|
"loss": 0.6674654483795166,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 1.0194647201946472,
|
|
"grad_norm": 0.6768798828125,
|
|
"learning_rate": 7.881369561296061e-06,
|
|
"loss": 0.6811670660972595,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 1.0207623682076237,
|
|
"grad_norm": 0.6664367914199829,
|
|
"learning_rate": 7.875523450897004e-06,
|
|
"loss": 0.638746440410614,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 1.0220600162206002,
|
|
"grad_norm": 1.1638799905776978,
|
|
"learning_rate": 7.869671460780297e-06,
|
|
"loss": 0.6403613090515137,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 1.0233576642335767,
|
|
"grad_norm": 0.5986616015434265,
|
|
"learning_rate": 7.863813602911777e-06,
|
|
"loss": 0.6099958419799805,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 1.0246553122465532,
|
|
"grad_norm": 1.8672071695327759,
|
|
"learning_rate": 7.857949889269285e-06,
|
|
"loss": 0.6486390829086304,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 1.0259529602595296,
|
|
"grad_norm": 0.6674206852912903,
|
|
"learning_rate": 7.852080331842627e-06,
|
|
"loss": 0.5824840664863586,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 1.0272506082725061,
|
|
"grad_norm": 0.6552616953849792,
|
|
"learning_rate": 7.846204942633564e-06,
|
|
"loss": 0.7385782599449158,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 1.0285482562854826,
|
|
"grad_norm": 0.636968195438385,
|
|
"learning_rate": 7.84032373365578e-06,
|
|
"loss": 0.6557282209396362,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 1.0298459042984591,
|
|
"grad_norm": 0.5769335627555847,
|
|
"learning_rate": 7.834436716934859e-06,
|
|
"loss": 0.5607404708862305,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 1.0311435523114356,
|
|
"grad_norm": 0.6747480034828186,
|
|
"learning_rate": 7.828543904508258e-06,
|
|
"loss": 0.6176875829696655,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 1.0324412003244121,
|
|
"grad_norm": 0.5826826691627502,
|
|
"learning_rate": 7.82264530842529e-06,
|
|
"loss": 0.6352604627609253,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 1.0337388483373884,
|
|
"grad_norm": 0.5748003721237183,
|
|
"learning_rate": 7.816740940747089e-06,
|
|
"loss": 0.5930640697479248,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 1.0350364963503649,
|
|
"grad_norm": 0.5976374745368958,
|
|
"learning_rate": 7.810830813546594e-06,
|
|
"loss": 0.6040553450584412,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 1.0363341443633414,
|
|
"grad_norm": 0.5924686789512634,
|
|
"learning_rate": 7.80491493890852e-06,
|
|
"loss": 0.6496337652206421,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 1.0376317923763179,
|
|
"grad_norm": 0.5696931481361389,
|
|
"learning_rate": 7.798993328929328e-06,
|
|
"loss": 0.6347925662994385,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 1.0389294403892944,
|
|
"grad_norm": 0.5750864148139954,
|
|
"learning_rate": 7.793065995717217e-06,
|
|
"loss": 0.6404843330383301,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 1.0402270884022708,
|
|
"grad_norm": 0.5975061058998108,
|
|
"learning_rate": 7.787132951392082e-06,
|
|
"loss": 0.5997766256332397,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 1.0415247364152473,
|
|
"grad_norm": 0.6157170534133911,
|
|
"learning_rate": 7.781194208085495e-06,
|
|
"loss": 0.6501672267913818,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 1.0428223844282238,
|
|
"grad_norm": 0.6032687425613403,
|
|
"learning_rate": 7.775249777940685e-06,
|
|
"loss": 0.6564816832542419,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 1.0441200324412003,
|
|
"grad_norm": 0.5874586701393127,
|
|
"learning_rate": 7.769299673112507e-06,
|
|
"loss": 0.6064618825912476,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 1.0454176804541768,
|
|
"grad_norm": 0.6239724159240723,
|
|
"learning_rate": 7.76334390576742e-06,
|
|
"loss": 0.6170182228088379,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 1.0467153284671533,
|
|
"grad_norm": 0.6056293845176697,
|
|
"learning_rate": 7.757382488083458e-06,
|
|
"loss": 0.7019131183624268,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 1.0480129764801298,
|
|
"grad_norm": 0.5994875431060791,
|
|
"learning_rate": 7.751415432250213e-06,
|
|
"loss": 0.6316931247711182,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 1.0493106244931063,
|
|
"grad_norm": 0.6516374945640564,
|
|
"learning_rate": 7.745442750468803e-06,
|
|
"loss": 0.649019718170166,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 1.0506082725060828,
|
|
"grad_norm": 0.5792532563209534,
|
|
"learning_rate": 7.739464454951853e-06,
|
|
"loss": 0.6500118374824524,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 1.0519059205190593,
|
|
"grad_norm": 0.745469331741333,
|
|
"learning_rate": 7.733480557923464e-06,
|
|
"loss": 0.5821675658226013,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 1.0532035685320358,
|
|
"grad_norm": 0.6124119162559509,
|
|
"learning_rate": 7.727491071619186e-06,
|
|
"loss": 0.6384508609771729,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 1.0545012165450123,
|
|
"grad_norm": 0.5831156969070435,
|
|
"learning_rate": 7.72149600828601e-06,
|
|
"loss": 0.6578410267829895,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 1.0557988645579885,
|
|
"grad_norm": 0.605689287185669,
|
|
"learning_rate": 7.715495380182314e-06,
|
|
"loss": 0.6352893710136414,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 1.057096512570965,
|
|
"grad_norm": 0.5769819617271423,
|
|
"learning_rate": 7.709489199577874e-06,
|
|
"loss": 0.5956138372421265,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 1.0583941605839415,
|
|
"grad_norm": 1.2673306465148926,
|
|
"learning_rate": 7.7034774787538e-06,
|
|
"loss": 0.6302381753921509,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 1.059691808596918,
|
|
"grad_norm": 0.5970334410667419,
|
|
"learning_rate": 7.697460230002545e-06,
|
|
"loss": 0.6213703751564026,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 1.0609894566098945,
|
|
"grad_norm": 0.5932973623275757,
|
|
"learning_rate": 7.691437465627859e-06,
|
|
"loss": 0.6656537652015686,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 1.062287104622871,
|
|
"grad_norm": 0.5778910517692566,
|
|
"learning_rate": 7.685409197944768e-06,
|
|
"loss": 0.6016901135444641,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 1.0635847526358475,
|
|
"grad_norm": 0.6970887780189514,
|
|
"learning_rate": 7.679375439279557e-06,
|
|
"loss": 0.6404139995574951,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 1.064882400648824,
|
|
"grad_norm": 0.8317319750785828,
|
|
"learning_rate": 7.673336201969733e-06,
|
|
"loss": 0.670491099357605,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 1.0661800486618005,
|
|
"grad_norm": 0.5904209613800049,
|
|
"learning_rate": 7.667291498364009e-06,
|
|
"loss": 0.697813868522644,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 1.067477696674777,
|
|
"grad_norm": 0.6368371844291687,
|
|
"learning_rate": 7.661241340822274e-06,
|
|
"loss": 0.6957151889801025,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 1.0687753446877535,
|
|
"grad_norm": 0.6323496103286743,
|
|
"learning_rate": 7.655185741715569e-06,
|
|
"loss": 0.6282387375831604,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 1.07007299270073,
|
|
"grad_norm": 0.582459568977356,
|
|
"learning_rate": 7.64912471342606e-06,
|
|
"loss": 0.6632883548736572,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 1.0713706407137065,
|
|
"grad_norm": 0.5815753936767578,
|
|
"learning_rate": 7.643058268347015e-06,
|
|
"loss": 0.6437957882881165,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 1.072668288726683,
|
|
"grad_norm": 0.5913931131362915,
|
|
"learning_rate": 7.636986418882783e-06,
|
|
"loss": 0.6558079719543457,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 1.0739659367396595,
|
|
"grad_norm": 0.5545955300331116,
|
|
"learning_rate": 7.630909177448755e-06,
|
|
"loss": 0.6246286630630493,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 1.075263584752636,
|
|
"grad_norm": 0.5951606631278992,
|
|
"learning_rate": 7.624826556471354e-06,
|
|
"loss": 0.6540351510047913,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 1.0765612327656124,
|
|
"grad_norm": 0.6533515453338623,
|
|
"learning_rate": 7.618738568388e-06,
|
|
"loss": 0.6222127676010132,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 1.0778588807785887,
|
|
"grad_norm": 0.5797233581542969,
|
|
"learning_rate": 7.612645225647086e-06,
|
|
"loss": 0.5815407037734985,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 1.0791565287915652,
|
|
"grad_norm": 0.6024124622344971,
|
|
"learning_rate": 7.60654654070796e-06,
|
|
"loss": 0.609170138835907,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 1.0804541768045417,
|
|
"grad_norm": 0.6007437109947205,
|
|
"learning_rate": 7.600442526040883e-06,
|
|
"loss": 0.6566615104675293,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 1.0817518248175182,
|
|
"grad_norm": 0.6132609844207764,
|
|
"learning_rate": 7.594333194127025e-06,
|
|
"loss": 0.6762999892234802,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 1.0830494728304947,
|
|
"grad_norm": 0.6206640005111694,
|
|
"learning_rate": 7.58821855745842e-06,
|
|
"loss": 0.6008488535881042,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 1.0843471208434712,
|
|
"grad_norm": 0.5727500319480896,
|
|
"learning_rate": 7.582098628537955e-06,
|
|
"loss": 0.6291306018829346,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 1.0856447688564477,
|
|
"grad_norm": 0.5835679769515991,
|
|
"learning_rate": 7.5759734198793365e-06,
|
|
"loss": 0.598922848701477,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 1.0869424168694242,
|
|
"grad_norm": 0.6435012817382812,
|
|
"learning_rate": 7.5698429440070616e-06,
|
|
"loss": 0.6742567420005798,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 1.0882400648824007,
|
|
"grad_norm": 0.6521117687225342,
|
|
"learning_rate": 7.563707213456405e-06,
|
|
"loss": 0.7133705615997314,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 1.0895377128953772,
|
|
"grad_norm": 0.6230207085609436,
|
|
"learning_rate": 7.5575662407733815e-06,
|
|
"loss": 0.6346240043640137,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 1.0908353609083536,
|
|
"grad_norm": 0.6041070818901062,
|
|
"learning_rate": 7.551420038514726e-06,
|
|
"loss": 0.5786027908325195,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 1.0921330089213301,
|
|
"grad_norm": 0.6142879724502563,
|
|
"learning_rate": 7.54526861924787e-06,
|
|
"loss": 0.689670205116272,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 1.0934306569343066,
|
|
"grad_norm": 0.5727767944335938,
|
|
"learning_rate": 7.5391119955509026e-06,
|
|
"loss": 0.6093534827232361,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 1.0947283049472831,
|
|
"grad_norm": 0.5920162796974182,
|
|
"learning_rate": 7.532950180012564e-06,
|
|
"loss": 0.6508292555809021,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 1.0960259529602596,
|
|
"grad_norm": 0.6140349507331848,
|
|
"learning_rate": 7.526783185232208e-06,
|
|
"loss": 0.6522685885429382,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 1.0973236009732361,
|
|
"grad_norm": 0.6111754179000854,
|
|
"learning_rate": 7.520611023819779e-06,
|
|
"loss": 0.6456558704376221,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 1.0986212489862126,
|
|
"grad_norm": 0.5693365931510925,
|
|
"learning_rate": 7.514433708395783e-06,
|
|
"loss": 0.6057475805282593,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 1.0999188969991889,
|
|
"grad_norm": 0.6043863892555237,
|
|
"learning_rate": 7.508251251591266e-06,
|
|
"loss": 0.6344411969184875,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 1.1012165450121654,
|
|
"grad_norm": 0.6892386078834534,
|
|
"learning_rate": 7.5020636660477894e-06,
|
|
"loss": 0.6500993371009827,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 1.1025141930251419,
|
|
"grad_norm": 0.6054773926734924,
|
|
"learning_rate": 7.4958709644174e-06,
|
|
"loss": 0.6792426109313965,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 1.1038118410381184,
|
|
"grad_norm": 0.6106455326080322,
|
|
"learning_rate": 7.4896731593626015e-06,
|
|
"loss": 0.648511528968811,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 1.1051094890510949,
|
|
"grad_norm": 0.5832105875015259,
|
|
"learning_rate": 7.4834702635563395e-06,
|
|
"loss": 0.6617711782455444,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 1.1064071370640713,
|
|
"grad_norm": 0.668353259563446,
|
|
"learning_rate": 7.477262289681966e-06,
|
|
"loss": 0.6955296397209167,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 1.1077047850770478,
|
|
"grad_norm": 0.5962719917297363,
|
|
"learning_rate": 7.471049250433214e-06,
|
|
"loss": 0.680686354637146,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 1.1090024330900243,
|
|
"grad_norm": 0.6140416860580444,
|
|
"learning_rate": 7.464831158514179e-06,
|
|
"loss": 0.6445127725601196,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 1.1103000811030008,
|
|
"grad_norm": 0.6690049171447754,
|
|
"learning_rate": 7.458608026639285e-06,
|
|
"loss": 0.6185108423233032,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 1.1115977291159773,
|
|
"grad_norm": 0.7241218090057373,
|
|
"learning_rate": 7.45237986753326e-06,
|
|
"loss": 0.6828392744064331,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 1.1128953771289538,
|
|
"grad_norm": 0.6075162887573242,
|
|
"learning_rate": 7.446146693931111e-06,
|
|
"loss": 0.6688688397407532,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 1.1141930251419303,
|
|
"grad_norm": 0.7877935767173767,
|
|
"learning_rate": 7.439908518578105e-06,
|
|
"loss": 0.6596081852912903,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 1.1154906731549068,
|
|
"grad_norm": 0.5754934549331665,
|
|
"learning_rate": 7.433665354229731e-06,
|
|
"loss": 0.655542254447937,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 1.1167883211678833,
|
|
"grad_norm": 0.6457986831665039,
|
|
"learning_rate": 7.4274172136516766e-06,
|
|
"loss": 0.6543152928352356,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 1.1180859691808598,
|
|
"grad_norm": 0.5904266238212585,
|
|
"learning_rate": 7.421164109619809e-06,
|
|
"loss": 0.6421469449996948,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 1.119383617193836,
|
|
"grad_norm": 0.5537955164909363,
|
|
"learning_rate": 7.4149060549201455e-06,
|
|
"loss": 0.609650194644928,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 1.1206812652068125,
|
|
"grad_norm": 0.5964105129241943,
|
|
"learning_rate": 7.408643062348824e-06,
|
|
"loss": 0.6043794751167297,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 1.121978913219789,
|
|
"grad_norm": 0.5994772911071777,
|
|
"learning_rate": 7.402375144712075e-06,
|
|
"loss": 0.6849918365478516,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 1.1232765612327655,
|
|
"grad_norm": 0.6322051286697388,
|
|
"learning_rate": 7.396102314826207e-06,
|
|
"loss": 0.6219741106033325,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 1.124574209245742,
|
|
"grad_norm": 0.5794394016265869,
|
|
"learning_rate": 7.389824585517569e-06,
|
|
"loss": 0.6507738828659058,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 1.1258718572587185,
|
|
"grad_norm": 0.6662233471870422,
|
|
"learning_rate": 7.3835419696225275e-06,
|
|
"loss": 0.6731002330780029,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 1.127169505271695,
|
|
"grad_norm": 0.5842033624649048,
|
|
"learning_rate": 7.377254479987445e-06,
|
|
"loss": 0.6546036005020142,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 1.1284671532846715,
|
|
"grad_norm": 2.6347815990448,
|
|
"learning_rate": 7.370962129468642e-06,
|
|
"loss": 0.61831134557724,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 1.129764801297648,
|
|
"grad_norm": 0.6191915273666382,
|
|
"learning_rate": 7.364664930932385e-06,
|
|
"loss": 0.682953953742981,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 1.1310624493106245,
|
|
"grad_norm": 0.6216323375701904,
|
|
"learning_rate": 7.35836289725485e-06,
|
|
"loss": 0.6735019087791443,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 1.132360097323601,
|
|
"grad_norm": 0.5958914756774902,
|
|
"learning_rate": 7.352056041322103e-06,
|
|
"loss": 0.6420754194259644,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 1.1336577453365775,
|
|
"grad_norm": 0.5970807671546936,
|
|
"learning_rate": 7.345744376030066e-06,
|
|
"loss": 0.6589509844779968,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 1.134955393349554,
|
|
"grad_norm": 0.6387295126914978,
|
|
"learning_rate": 7.339427914284498e-06,
|
|
"loss": 0.5913777351379395,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 1.1362530413625305,
|
|
"grad_norm": 1.3676766157150269,
|
|
"learning_rate": 7.3331066690009644e-06,
|
|
"loss": 0.6156778931617737,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 1.137550689375507,
|
|
"grad_norm": 0.5990293025970459,
|
|
"learning_rate": 7.326780653104813e-06,
|
|
"loss": 0.6320254802703857,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 1.1388483373884835,
|
|
"grad_norm": 0.6619262099266052,
|
|
"learning_rate": 7.320449879531143e-06,
|
|
"loss": 0.6741781830787659,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 1.14014598540146,
|
|
"grad_norm": 0.6091610193252563,
|
|
"learning_rate": 7.314114361224785e-06,
|
|
"loss": 0.6403502821922302,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 1.1414436334144362,
|
|
"grad_norm": 0.6015101075172424,
|
|
"learning_rate": 7.30777411114027e-06,
|
|
"loss": 0.6477581858634949,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 1.142741281427413,
|
|
"grad_norm": 0.5771135687828064,
|
|
"learning_rate": 7.301429142241805e-06,
|
|
"loss": 0.5903566479682922,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 1.1440389294403892,
|
|
"grad_norm": 0.571612536907196,
|
|
"learning_rate": 7.295079467503247e-06,
|
|
"loss": 0.5671682357788086,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 1.1453365774533657,
|
|
"grad_norm": 0.7478623390197754,
|
|
"learning_rate": 7.288725099908071e-06,
|
|
"loss": 0.6659491658210754,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 1.1466342254663422,
|
|
"grad_norm": 0.6303284764289856,
|
|
"learning_rate": 7.282366052449351e-06,
|
|
"loss": 0.7001731395721436,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 1.1479318734793187,
|
|
"grad_norm": 0.5829930901527405,
|
|
"learning_rate": 7.276002338129731e-06,
|
|
"loss": 0.632986843585968,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 1.1492295214922952,
|
|
"grad_norm": 0.6018064022064209,
|
|
"learning_rate": 7.269633969961395e-06,
|
|
"loss": 0.6848266124725342,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 1.1505271695052717,
|
|
"grad_norm": 0.7479543089866638,
|
|
"learning_rate": 7.2632609609660456e-06,
|
|
"loss": 0.6810072064399719,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 1.1518248175182482,
|
|
"grad_norm": 0.5979959964752197,
|
|
"learning_rate": 7.256883324174871e-06,
|
|
"loss": 0.59900963306427,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 1.1531224655312247,
|
|
"grad_norm": 0.608985424041748,
|
|
"learning_rate": 7.250501072628524e-06,
|
|
"loss": 0.6502770185470581,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 1.1544201135442012,
|
|
"grad_norm": 0.5771687626838684,
|
|
"learning_rate": 7.2441142193770955e-06,
|
|
"loss": 0.6427179574966431,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 1.1557177615571776,
|
|
"grad_norm": 0.7472683787345886,
|
|
"learning_rate": 7.237722777480083e-06,
|
|
"loss": 0.6853768825531006,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 1.1570154095701541,
|
|
"grad_norm": 0.5946991443634033,
|
|
"learning_rate": 7.231326760006368e-06,
|
|
"loss": 0.6969834566116333,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 1.1583130575831306,
|
|
"grad_norm": 0.6238925457000732,
|
|
"learning_rate": 7.224926180034186e-06,
|
|
"loss": 0.6919976472854614,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 1.1596107055961071,
|
|
"grad_norm": 0.6162919402122498,
|
|
"learning_rate": 7.218521050651106e-06,
|
|
"loss": 0.6636837720870972,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 1.1609083536090836,
|
|
"grad_norm": 0.5723338723182678,
|
|
"learning_rate": 7.212111384953993e-06,
|
|
"loss": 0.6149659156799316,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 1.1622060016220601,
|
|
"grad_norm": 0.6074439883232117,
|
|
"learning_rate": 7.205697196048992e-06,
|
|
"loss": 0.6255541443824768,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 1.1635036496350364,
|
|
"grad_norm": 0.6277779936790466,
|
|
"learning_rate": 7.199278497051498e-06,
|
|
"loss": 0.6648150086402893,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 1.164801297648013,
|
|
"grad_norm": 0.6254341006278992,
|
|
"learning_rate": 7.192855301086123e-06,
|
|
"loss": 0.6707339882850647,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 1.1660989456609894,
|
|
"grad_norm": 0.6244154572486877,
|
|
"learning_rate": 7.186427621286678e-06,
|
|
"loss": 0.6344256401062012,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 1.1673965936739659,
|
|
"grad_norm": 0.6074284911155701,
|
|
"learning_rate": 7.179995470796141e-06,
|
|
"loss": 0.6663004159927368,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 1.1686942416869424,
|
|
"grad_norm": 0.6512662768363953,
|
|
"learning_rate": 7.1735588627666346e-06,
|
|
"loss": 0.6009752154350281,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 1.1699918896999189,
|
|
"grad_norm": 0.6028872132301331,
|
|
"learning_rate": 7.167117810359387e-06,
|
|
"loss": 0.5874291062355042,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 1.1712895377128953,
|
|
"grad_norm": 0.6266588568687439,
|
|
"learning_rate": 7.160672326744726e-06,
|
|
"loss": 0.6230692267417908,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 1.1725871857258718,
|
|
"grad_norm": 3.8021433353424072,
|
|
"learning_rate": 7.154222425102033e-06,
|
|
"loss": 0.6242640018463135,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 1.1738848337388483,
|
|
"grad_norm": 0.6971346735954285,
|
|
"learning_rate": 7.1477681186197225e-06,
|
|
"loss": 0.6548742651939392,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 1.1751824817518248,
|
|
"grad_norm": 0.612678587436676,
|
|
"learning_rate": 7.141309420495219e-06,
|
|
"loss": 0.6528737545013428,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 1.1764801297648013,
|
|
"grad_norm": 0.6218580007553101,
|
|
"learning_rate": 7.134846343934924e-06,
|
|
"loss": 0.6845676898956299,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 1.1777777777777778,
|
|
"grad_norm": 0.6113817691802979,
|
|
"learning_rate": 7.128378902154195e-06,
|
|
"loss": 0.6958880424499512,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 1.1790754257907543,
|
|
"grad_norm": 0.6120286583900452,
|
|
"learning_rate": 7.121907108377313e-06,
|
|
"loss": 0.6543635725975037,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 1.1803730738037308,
|
|
"grad_norm": 0.6076055765151978,
|
|
"learning_rate": 7.115430975837457e-06,
|
|
"loss": 0.6869640946388245,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 1.1816707218167073,
|
|
"grad_norm": 0.6232397556304932,
|
|
"learning_rate": 7.10895051777668e-06,
|
|
"loss": 0.6338291764259338,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 1.1829683698296838,
|
|
"grad_norm": 0.6153266429901123,
|
|
"learning_rate": 7.1024657474458795e-06,
|
|
"loss": 0.6337912678718567,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 1.1842660178426603,
|
|
"grad_norm": 0.6057350039482117,
|
|
"learning_rate": 7.095976678104768e-06,
|
|
"loss": 0.6359199285507202,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 1.1855636658556366,
|
|
"grad_norm": 0.6107894778251648,
|
|
"learning_rate": 7.089483323021851e-06,
|
|
"loss": 0.6233211755752563,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 1.186861313868613,
|
|
"grad_norm": 0.5987040400505066,
|
|
"learning_rate": 7.082985695474394e-06,
|
|
"loss": 0.6974512338638306,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 1.1881589618815895,
|
|
"grad_norm": 0.5928195118904114,
|
|
"learning_rate": 7.076483808748402e-06,
|
|
"loss": 0.6281331777572632,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 1.189456609894566,
|
|
"grad_norm": 0.751203179359436,
|
|
"learning_rate": 7.069977676138588e-06,
|
|
"loss": 0.6113827228546143,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 1.1907542579075425,
|
|
"grad_norm": 0.6335259079933167,
|
|
"learning_rate": 7.063467310948346e-06,
|
|
"loss": 0.5900315046310425,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 1.192051905920519,
|
|
"grad_norm": 0.6231621503829956,
|
|
"learning_rate": 7.0569527264897275e-06,
|
|
"loss": 0.6505625247955322,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 1.1933495539334955,
|
|
"grad_norm": 0.6135134696960449,
|
|
"learning_rate": 7.050433936083405e-06,
|
|
"loss": 0.6122363805770874,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.1933495539334955,
|
|
"eval_loss": 0.68769770860672,
|
|
"eval_runtime": 73.0979,
|
|
"eval_samples_per_second": 71.028,
|
|
"eval_steps_per_second": 8.879,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.194647201946472,
|
|
"grad_norm": 0.5773142576217651,
|
|
"learning_rate": 7.043910953058657e-06,
|
|
"loss": 0.5964255332946777,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 1.1959448499594485,
|
|
"grad_norm": 0.6031613945960999,
|
|
"learning_rate": 7.037383790753333e-06,
|
|
"loss": 0.662893533706665,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 1.197242497972425,
|
|
"grad_norm": 0.6189724206924438,
|
|
"learning_rate": 7.030852462513827e-06,
|
|
"loss": 0.6189711093902588,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 1.1985401459854015,
|
|
"grad_norm": 0.6367059946060181,
|
|
"learning_rate": 7.024316981695053e-06,
|
|
"loss": 0.6123430132865906,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 1.199837793998378,
|
|
"grad_norm": 0.6039940118789673,
|
|
"learning_rate": 7.017777361660414e-06,
|
|
"loss": 0.6341007947921753,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 1.2011354420113545,
|
|
"grad_norm": 0.7465354204177856,
|
|
"learning_rate": 7.011233615781777e-06,
|
|
"loss": 0.6174352765083313,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 1.202433090024331,
|
|
"grad_norm": 0.6807838678359985,
|
|
"learning_rate": 7.004685757439449e-06,
|
|
"loss": 0.7061627507209778,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 1.2037307380373075,
|
|
"grad_norm": 0.5960806012153625,
|
|
"learning_rate": 6.99813380002214e-06,
|
|
"loss": 0.6526781320571899,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 1.205028386050284,
|
|
"grad_norm": 0.5771905183792114,
|
|
"learning_rate": 6.991577756926948e-06,
|
|
"loss": 0.6951519250869751,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 1.2063260340632604,
|
|
"grad_norm": 0.632168710231781,
|
|
"learning_rate": 6.9850176415593195e-06,
|
|
"loss": 0.6279127597808838,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 1.2076236820762367,
|
|
"grad_norm": 0.6110833287239075,
|
|
"learning_rate": 6.978453467333028e-06,
|
|
"loss": 0.6424981355667114,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 1.2089213300892132,
|
|
"grad_norm": 0.5829861164093018,
|
|
"learning_rate": 6.9718852476701535e-06,
|
|
"loss": 0.6850586533546448,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 1.2102189781021897,
|
|
"grad_norm": 0.6042872071266174,
|
|
"learning_rate": 6.965312996001038e-06,
|
|
"loss": 0.628888726234436,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 1.2115166261151662,
|
|
"grad_norm": 0.641800045967102,
|
|
"learning_rate": 6.958736725764275e-06,
|
|
"loss": 0.6589823961257935,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 1.2128142741281427,
|
|
"grad_norm": 0.5857986211776733,
|
|
"learning_rate": 6.952156450406673e-06,
|
|
"loss": 0.5867838859558105,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 1.2141119221411192,
|
|
"grad_norm": 0.6070905923843384,
|
|
"learning_rate": 6.945572183383229e-06,
|
|
"loss": 0.6120666265487671,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 1.2154095701540957,
|
|
"grad_norm": 0.620799720287323,
|
|
"learning_rate": 6.9389839381571025e-06,
|
|
"loss": 0.6689779758453369,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 1.2167072181670722,
|
|
"grad_norm": 3.69341778755188,
|
|
"learning_rate": 6.932391728199587e-06,
|
|
"loss": 0.6268787384033203,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 1.2180048661800487,
|
|
"grad_norm": 0.6159505248069763,
|
|
"learning_rate": 6.925795566990083e-06,
|
|
"loss": 0.6517162322998047,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 1.2193025141930252,
|
|
"grad_norm": 0.6000729203224182,
|
|
"learning_rate": 6.919195468016073e-06,
|
|
"loss": 0.6077402234077454,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 1.2206001622060016,
|
|
"grad_norm": 0.5589438080787659,
|
|
"learning_rate": 6.9125914447730865e-06,
|
|
"loss": 0.596868634223938,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 1.2218978102189781,
|
|
"grad_norm": 2.3887641429901123,
|
|
"learning_rate": 6.905983510764681e-06,
|
|
"loss": 0.6510117053985596,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 1.2231954582319546,
|
|
"grad_norm": 0.5905357003211975,
|
|
"learning_rate": 6.899371679502408e-06,
|
|
"loss": 0.6385715007781982,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 1.2244931062449311,
|
|
"grad_norm": 0.6210343837738037,
|
|
"learning_rate": 6.89275596450579e-06,
|
|
"loss": 0.5893187522888184,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 1.2257907542579076,
|
|
"grad_norm": 0.5834376215934753,
|
|
"learning_rate": 6.886136379302288e-06,
|
|
"loss": 0.6301822662353516,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 1.2270884022708841,
|
|
"grad_norm": 0.6120421886444092,
|
|
"learning_rate": 6.87951293742728e-06,
|
|
"loss": 0.6227176189422607,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 1.2283860502838606,
|
|
"grad_norm": 0.5846749544143677,
|
|
"learning_rate": 6.872885652424028e-06,
|
|
"loss": 0.5956023931503296,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 1.2296836982968369,
|
|
"grad_norm": 0.6237694025039673,
|
|
"learning_rate": 6.866254537843651e-06,
|
|
"loss": 0.619324266910553,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 1.2309813463098134,
|
|
"grad_norm": 0.6295216679573059,
|
|
"learning_rate": 6.859619607245102e-06,
|
|
"loss": 0.6520287990570068,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 1.2322789943227899,
|
|
"grad_norm": 0.6216979026794434,
|
|
"learning_rate": 6.852980874195132e-06,
|
|
"loss": 0.6138555407524109,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.2335766423357664,
|
|
"grad_norm": 0.59978848695755,
|
|
"learning_rate": 6.846338352268273e-06,
|
|
"loss": 0.6959421038627625,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 1.2348742903487429,
|
|
"grad_norm": 0.6199280619621277,
|
|
"learning_rate": 6.839692055046801e-06,
|
|
"loss": 0.6330957412719727,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 1.2361719383617193,
|
|
"grad_norm": 0.6078975200653076,
|
|
"learning_rate": 6.833041996120707e-06,
|
|
"loss": 0.6647271513938904,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 1.2374695863746958,
|
|
"grad_norm": 0.6505293846130371,
|
|
"learning_rate": 6.826388189087683e-06,
|
|
"loss": 0.6796462535858154,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 1.2387672343876723,
|
|
"grad_norm": 2.935091257095337,
|
|
"learning_rate": 6.819730647553079e-06,
|
|
"loss": 0.6220841407775879,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 1.2400648824006488,
|
|
"grad_norm": 0.6445925831794739,
|
|
"learning_rate": 6.813069385129883e-06,
|
|
"loss": 0.5865710973739624,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 1.2413625304136253,
|
|
"grad_norm": 0.5919390320777893,
|
|
"learning_rate": 6.806404415438689e-06,
|
|
"loss": 0.6186652779579163,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 1.2426601784266018,
|
|
"grad_norm": 0.601252019405365,
|
|
"learning_rate": 6.7997357521076735e-06,
|
|
"loss": 0.6536276340484619,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 1.2439578264395783,
|
|
"grad_norm": 1.1728289127349854,
|
|
"learning_rate": 6.793063408772565e-06,
|
|
"loss": 0.6327337026596069,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 1.2452554744525548,
|
|
"grad_norm": 0.6600290536880493,
|
|
"learning_rate": 6.78638739907662e-06,
|
|
"loss": 0.6598416566848755,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 1.2465531224655313,
|
|
"grad_norm": 0.6247118711471558,
|
|
"learning_rate": 6.779707736670585e-06,
|
|
"loss": 0.6106679439544678,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 1.2478507704785078,
|
|
"grad_norm": 0.588431179523468,
|
|
"learning_rate": 6.773024435212678e-06,
|
|
"loss": 0.6234384775161743,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 1.2491484184914843,
|
|
"grad_norm": 0.6060811281204224,
|
|
"learning_rate": 6.7663375083685635e-06,
|
|
"loss": 0.6653448343276978,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 1.2504460665044608,
|
|
"grad_norm": 0.7780699729919434,
|
|
"learning_rate": 6.759646969811311e-06,
|
|
"loss": 0.7183551788330078,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 1.251743714517437,
|
|
"grad_norm": 0.6161801815032959,
|
|
"learning_rate": 6.752952833221379e-06,
|
|
"loss": 0.693482518196106,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 1.2530413625304138,
|
|
"grad_norm": 0.5934755802154541,
|
|
"learning_rate": 6.7462551122865825e-06,
|
|
"loss": 0.6136157512664795,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 1.25433901054339,
|
|
"grad_norm": 0.5638807415962219,
|
|
"learning_rate": 6.739553820702067e-06,
|
|
"loss": 0.6110460758209229,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 1.2556366585563665,
|
|
"grad_norm": 2.232645273208618,
|
|
"learning_rate": 6.732848972170276e-06,
|
|
"loss": 0.5771392583847046,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 1.256934306569343,
|
|
"grad_norm": 0.5793489217758179,
|
|
"learning_rate": 6.726140580400928e-06,
|
|
"loss": 0.637577474117279,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 1.2582319545823195,
|
|
"grad_norm": 0.6198015213012695,
|
|
"learning_rate": 6.719428659110987e-06,
|
|
"loss": 0.6566798686981201,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 1.259529602595296,
|
|
"grad_norm": 8.447957992553711,
|
|
"learning_rate": 6.712713222024633e-06,
|
|
"loss": 0.6350081562995911,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 1.2608272506082725,
|
|
"grad_norm": 0.6281896233558655,
|
|
"learning_rate": 6.705994282873233e-06,
|
|
"loss": 0.6955903172492981,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 1.262124898621249,
|
|
"grad_norm": 0.5929207801818848,
|
|
"learning_rate": 6.699271855395321e-06,
|
|
"loss": 0.6420506834983826,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 1.2634225466342255,
|
|
"grad_norm": 0.6053920388221741,
|
|
"learning_rate": 6.6925459533365576e-06,
|
|
"loss": 0.6596835851669312,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 1.264720194647202,
|
|
"grad_norm": 0.6256871819496155,
|
|
"learning_rate": 6.685816590449708e-06,
|
|
"loss": 0.7071737051010132,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 1.2660178426601785,
|
|
"grad_norm": 0.5950897336006165,
|
|
"learning_rate": 6.67908378049462e-06,
|
|
"loss": 0.656615674495697,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 1.267315490673155,
|
|
"grad_norm": 0.6450179815292358,
|
|
"learning_rate": 6.672347537238183e-06,
|
|
"loss": 0.6895189881324768,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 1.2686131386861315,
|
|
"grad_norm": 0.6535899639129639,
|
|
"learning_rate": 6.665607874454311e-06,
|
|
"loss": 0.6748580932617188,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 1.269910786699108,
|
|
"grad_norm": 3.30841326713562,
|
|
"learning_rate": 6.658864805923909e-06,
|
|
"loss": 0.6493468284606934,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 1.2712084347120842,
|
|
"grad_norm": 0.6671776175498962,
|
|
"learning_rate": 6.652118345434844e-06,
|
|
"loss": 0.6867607235908508,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 1.272506082725061,
|
|
"grad_norm": 0.623457670211792,
|
|
"learning_rate": 6.64536850678192e-06,
|
|
"loss": 0.6442928314208984,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 1.2738037307380372,
|
|
"grad_norm": 0.5984421372413635,
|
|
"learning_rate": 6.638615303766849e-06,
|
|
"loss": 0.5990972518920898,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 1.275101378751014,
|
|
"grad_norm": 0.7166045904159546,
|
|
"learning_rate": 6.631858750198223e-06,
|
|
"loss": 0.6415522694587708,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 1.2763990267639902,
|
|
"grad_norm": 0.6510207056999207,
|
|
"learning_rate": 6.625098859891483e-06,
|
|
"loss": 0.6367224454879761,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 1.2776966747769667,
|
|
"grad_norm": 0.6455490589141846,
|
|
"learning_rate": 6.618335646668894e-06,
|
|
"loss": 0.6474705934524536,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 1.2789943227899432,
|
|
"grad_norm": 0.6324385404586792,
|
|
"learning_rate": 6.611569124359516e-06,
|
|
"loss": 0.6616948843002319,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 1.2802919708029197,
|
|
"grad_norm": 0.6118378043174744,
|
|
"learning_rate": 6.604799306799172e-06,
|
|
"loss": 0.628074586391449,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 1.2815896188158962,
|
|
"grad_norm": 0.5939401984214783,
|
|
"learning_rate": 6.598026207830428e-06,
|
|
"loss": 0.6460234522819519,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 1.2828872668288727,
|
|
"grad_norm": 0.5931558609008789,
|
|
"learning_rate": 6.591249841302555e-06,
|
|
"loss": 0.7053772211074829,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 1.2841849148418492,
|
|
"grad_norm": 0.6080952882766724,
|
|
"learning_rate": 6.58447022107151e-06,
|
|
"loss": 0.6465653777122498,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 1.2854825628548256,
|
|
"grad_norm": 0.5909331440925598,
|
|
"learning_rate": 6.577687360999898e-06,
|
|
"loss": 0.6280587911605835,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 1.2867802108678021,
|
|
"grad_norm": 0.6082817912101746,
|
|
"learning_rate": 6.5709012749569535e-06,
|
|
"loss": 0.6570587158203125,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 1.2880778588807786,
|
|
"grad_norm": 0.5879994630813599,
|
|
"learning_rate": 6.564111976818501e-06,
|
|
"loss": 0.6010950803756714,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 1.2893755068937551,
|
|
"grad_norm": 0.6213524341583252,
|
|
"learning_rate": 6.5573194804669416e-06,
|
|
"loss": 0.7210543751716614,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 1.2906731549067316,
|
|
"grad_norm": 0.8193002343177795,
|
|
"learning_rate": 6.550523799791207e-06,
|
|
"loss": 0.6705042123794556,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 1.2919708029197081,
|
|
"grad_norm": 0.6038559079170227,
|
|
"learning_rate": 6.543724948686747e-06,
|
|
"loss": 0.6417216062545776,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 1.2932684509326844,
|
|
"grad_norm": 0.6030299067497253,
|
|
"learning_rate": 6.53692294105549e-06,
|
|
"loss": 0.6307570338249207,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 1.294566098945661,
|
|
"grad_norm": 0.6002436876296997,
|
|
"learning_rate": 6.53011779080582e-06,
|
|
"loss": 0.6394779086112976,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 1.2958637469586374,
|
|
"grad_norm": 0.6847420334815979,
|
|
"learning_rate": 6.523309511852547e-06,
|
|
"loss": 0.7355165481567383,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 1.2971613949716139,
|
|
"grad_norm": 0.6133946180343628,
|
|
"learning_rate": 6.516498118116878e-06,
|
|
"loss": 0.6960593461990356,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 1.2984590429845904,
|
|
"grad_norm": 0.6106923222541809,
|
|
"learning_rate": 6.5096836235263904e-06,
|
|
"loss": 0.6673066020011902,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 1.2997566909975669,
|
|
"grad_norm": 0.6132566928863525,
|
|
"learning_rate": 6.502866042015e-06,
|
|
"loss": 0.6237598657608032,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 1.3010543390105433,
|
|
"grad_norm": 0.8997653126716614,
|
|
"learning_rate": 6.496045387522934e-06,
|
|
"loss": 0.6304394006729126,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 1.3023519870235198,
|
|
"grad_norm": 0.5679188966751099,
|
|
"learning_rate": 6.489221673996708e-06,
|
|
"loss": 0.575568675994873,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 1.3036496350364963,
|
|
"grad_norm": 0.6406558752059937,
|
|
"learning_rate": 6.482394915389085e-06,
|
|
"loss": 0.632392406463623,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 1.3049472830494728,
|
|
"grad_norm": 0.6094868183135986,
|
|
"learning_rate": 6.475565125659063e-06,
|
|
"loss": 0.6548421382904053,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 1.3062449310624493,
|
|
"grad_norm": 0.5837537050247192,
|
|
"learning_rate": 6.4687323187718276e-06,
|
|
"loss": 0.6500783562660217,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 1.3075425790754258,
|
|
"grad_norm": 0.5676296353340149,
|
|
"learning_rate": 6.461896508698744e-06,
|
|
"loss": 0.5843409299850464,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 1.3088402270884023,
|
|
"grad_norm": 0.5929064154624939,
|
|
"learning_rate": 6.455057709417312e-06,
|
|
"loss": 0.5786738395690918,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 1.3101378751013788,
|
|
"grad_norm": 0.6186608672142029,
|
|
"learning_rate": 6.448215934911145e-06,
|
|
"loss": 0.7198565006256104,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 1.3114355231143553,
|
|
"grad_norm": 0.573298454284668,
|
|
"learning_rate": 6.441371199169942e-06,
|
|
"loss": 0.6153538227081299,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 1.3127331711273318,
|
|
"grad_norm": 0.6731165051460266,
|
|
"learning_rate": 6.434523516189453e-06,
|
|
"loss": 0.6571598052978516,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 1.3140308191403083,
|
|
"grad_norm": 0.5842266082763672,
|
|
"learning_rate": 6.427672899971457e-06,
|
|
"loss": 0.6164257526397705,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 1.3153284671532846,
|
|
"grad_norm": 0.6072558760643005,
|
|
"learning_rate": 6.4208193645237314e-06,
|
|
"loss": 0.6229099035263062,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 1.3166261151662613,
|
|
"grad_norm": 0.6617994904518127,
|
|
"learning_rate": 6.413962923860021e-06,
|
|
"loss": 0.634198009967804,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 1.3179237631792375,
|
|
"grad_norm": 5.200798511505127,
|
|
"learning_rate": 6.407103592000009e-06,
|
|
"loss": 0.6058683395385742,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 1.319221411192214,
|
|
"grad_norm": 0.5821889042854309,
|
|
"learning_rate": 6.400241382969297e-06,
|
|
"loss": 0.6865833401679993,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 1.3205190592051905,
|
|
"grad_norm": 0.5700265169143677,
|
|
"learning_rate": 6.393376310799363e-06,
|
|
"loss": 0.6534625291824341,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 1.321816707218167,
|
|
"grad_norm": 0.5971737504005432,
|
|
"learning_rate": 6.386508389527544e-06,
|
|
"loss": 0.6178575158119202,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 1.3231143552311435,
|
|
"grad_norm": 0.5835508108139038,
|
|
"learning_rate": 6.379637633196999e-06,
|
|
"loss": 0.6270486116409302,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 1.32441200324412,
|
|
"grad_norm": 0.5576135516166687,
|
|
"learning_rate": 6.3727640558566865e-06,
|
|
"loss": 0.6197627782821655,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 1.3257096512570965,
|
|
"grad_norm": 0.6085971593856812,
|
|
"learning_rate": 6.3658876715613315e-06,
|
|
"loss": 0.6738483309745789,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 1.327007299270073,
|
|
"grad_norm": 0.6080042719841003,
|
|
"learning_rate": 6.3590084943713995e-06,
|
|
"loss": 0.6581575870513916,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 1.3283049472830495,
|
|
"grad_norm": 0.5854855179786682,
|
|
"learning_rate": 6.35212653835307e-06,
|
|
"loss": 0.6252275705337524,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 1.329602595296026,
|
|
"grad_norm": 0.5838765501976013,
|
|
"learning_rate": 6.345241817578196e-06,
|
|
"loss": 0.6577827334403992,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 1.3309002433090025,
|
|
"grad_norm": 0.5933322310447693,
|
|
"learning_rate": 6.3383543461242914e-06,
|
|
"loss": 0.6144447326660156,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 1.332197891321979,
|
|
"grad_norm": 0.6093854904174805,
|
|
"learning_rate": 6.331464138074493e-06,
|
|
"loss": 0.6428185701370239,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 1.3334955393349555,
|
|
"grad_norm": 0.6086922287940979,
|
|
"learning_rate": 6.32457120751753e-06,
|
|
"loss": 0.6779354810714722,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 1.334793187347932,
|
|
"grad_norm": 0.5752759575843811,
|
|
"learning_rate": 6.317675568547704e-06,
|
|
"loss": 0.6089493036270142,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 1.3360908353609084,
|
|
"grad_norm": 0.5942736268043518,
|
|
"learning_rate": 6.310777235264849e-06,
|
|
"loss": 0.6579400300979614,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 1.3373884833738847,
|
|
"grad_norm": 0.5779575705528259,
|
|
"learning_rate": 6.303876221774311e-06,
|
|
"loss": 0.6444313526153564,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 1.3386861313868614,
|
|
"grad_norm": 0.6055609583854675,
|
|
"learning_rate": 6.296972542186915e-06,
|
|
"loss": 0.6654270887374878,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 1.3399837793998377,
|
|
"grad_norm": 0.60945063829422,
|
|
"learning_rate": 6.2900662106189415e-06,
|
|
"loss": 0.661444902420044,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 1.3412814274128142,
|
|
"grad_norm": 1.2685060501098633,
|
|
"learning_rate": 6.283157241192087e-06,
|
|
"loss": 0.6629235148429871,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 1.3425790754257907,
|
|
"grad_norm": 0.6141192317008972,
|
|
"learning_rate": 6.276245648033447e-06,
|
|
"loss": 0.6560642719268799,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 1.3438767234387672,
|
|
"grad_norm": 0.5949526429176331,
|
|
"learning_rate": 6.2693314452754796e-06,
|
|
"loss": 0.7151345014572144,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 1.3451743714517437,
|
|
"grad_norm": 0.6198956370353699,
|
|
"learning_rate": 6.26241464705598e-06,
|
|
"loss": 0.6870914101600647,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 1.3464720194647202,
|
|
"grad_norm": 0.6193254590034485,
|
|
"learning_rate": 6.25549526751805e-06,
|
|
"loss": 0.6167775392532349,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 1.3477696674776967,
|
|
"grad_norm": 0.582747757434845,
|
|
"learning_rate": 6.24857332081007e-06,
|
|
"loss": 0.6421079635620117,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 1.3490673154906732,
|
|
"grad_norm": 0.6055523753166199,
|
|
"learning_rate": 6.241648821085666e-06,
|
|
"loss": 0.642772376537323,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 1.3503649635036497,
|
|
"grad_norm": 0.5949704051017761,
|
|
"learning_rate": 6.23472178250369e-06,
|
|
"loss": 0.6979063153266907,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 1.3516626115166261,
|
|
"grad_norm": 0.6037271022796631,
|
|
"learning_rate": 6.227792219228183e-06,
|
|
"loss": 0.6815102100372314,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 1.3529602595296026,
|
|
"grad_norm": 0.5929699540138245,
|
|
"learning_rate": 6.220860145428347e-06,
|
|
"loss": 0.6474612951278687,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 1.3542579075425791,
|
|
"grad_norm": 0.612301230430603,
|
|
"learning_rate": 6.213925575278518e-06,
|
|
"loss": 0.669405460357666,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 1.3555555555555556,
|
|
"grad_norm": 0.5837467908859253,
|
|
"learning_rate": 6.206988522958135e-06,
|
|
"loss": 0.5990941524505615,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 1.3568532035685321,
|
|
"grad_norm": 0.614405632019043,
|
|
"learning_rate": 6.200049002651718e-06,
|
|
"loss": 0.6845515370368958,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 1.3581508515815086,
|
|
"grad_norm": 0.713435709476471,
|
|
"learning_rate": 6.19310702854883e-06,
|
|
"loss": 0.5659395456314087,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 1.3594484995944849,
|
|
"grad_norm": 0.6173283457756042,
|
|
"learning_rate": 6.186162614844047e-06,
|
|
"loss": 0.6531370282173157,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 1.3607461476074616,
|
|
"grad_norm": 0.6224690675735474,
|
|
"learning_rate": 6.17921577573694e-06,
|
|
"loss": 0.6006350517272949,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 1.3620437956204379,
|
|
"grad_norm": 0.5716680288314819,
|
|
"learning_rate": 6.172266525432036e-06,
|
|
"loss": 0.6007007360458374,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 1.3633414436334144,
|
|
"grad_norm": 0.964508593082428,
|
|
"learning_rate": 6.165314878138794e-06,
|
|
"loss": 0.5759468674659729,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 1.3646390916463909,
|
|
"grad_norm": 1.8147951364517212,
|
|
"learning_rate": 6.1583608480715705e-06,
|
|
"loss": 0.6763917207717896,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 1.3659367396593673,
|
|
"grad_norm": 0.5682212710380554,
|
|
"learning_rate": 6.1514044494496e-06,
|
|
"loss": 0.5627442002296448,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 1.3672343876723438,
|
|
"grad_norm": 0.6249387860298157,
|
|
"learning_rate": 6.144445696496955e-06,
|
|
"loss": 0.7233635187149048,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 1.3685320356853203,
|
|
"grad_norm": 0.5967603921890259,
|
|
"learning_rate": 6.137484603442524e-06,
|
|
"loss": 0.60671067237854,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 1.3698296836982968,
|
|
"grad_norm": 0.9533456563949585,
|
|
"learning_rate": 6.130521184519983e-06,
|
|
"loss": 0.6718368530273438,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 1.3711273317112733,
|
|
"grad_norm": 0.577439546585083,
|
|
"learning_rate": 6.123555453967759e-06,
|
|
"loss": 0.6093976497650146,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 1.3724249797242498,
|
|
"grad_norm": 0.5558829307556152,
|
|
"learning_rate": 6.1165874260290074e-06,
|
|
"loss": 0.6086419224739075,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 1.3737226277372263,
|
|
"grad_norm": 0.6080211400985718,
|
|
"learning_rate": 6.109617114951581e-06,
|
|
"loss": 0.6369859576225281,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 1.3750202757502028,
|
|
"grad_norm": 0.70982426404953,
|
|
"learning_rate": 6.102644534988006e-06,
|
|
"loss": 0.6179996728897095,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 1.3763179237631793,
|
|
"grad_norm": 0.6002638936042786,
|
|
"learning_rate": 6.0956697003954404e-06,
|
|
"loss": 0.6171343326568604,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 1.3776155717761558,
|
|
"grad_norm": 0.74629145860672,
|
|
"learning_rate": 6.088692625435656e-06,
|
|
"loss": 0.64389967918396,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 1.378913219789132,
|
|
"grad_norm": 0.5946625471115112,
|
|
"learning_rate": 6.0817133243750046e-06,
|
|
"loss": 0.6315205097198486,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 1.3802108678021088,
|
|
"grad_norm": 0.6307440996170044,
|
|
"learning_rate": 6.074731811484391e-06,
|
|
"loss": 0.6365832090377808,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 1.381508515815085,
|
|
"grad_norm": 0.958493173122406,
|
|
"learning_rate": 6.067748101039243e-06,
|
|
"loss": 0.588029146194458,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 1.3828061638280618,
|
|
"grad_norm": 2.589282512664795,
|
|
"learning_rate": 6.060762207319479e-06,
|
|
"loss": 0.6348222494125366,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 1.384103811841038,
|
|
"grad_norm": 0.6122376322746277,
|
|
"learning_rate": 6.053774144609484e-06,
|
|
"loss": 0.6187014579772949,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 1.3854014598540145,
|
|
"grad_norm": 0.6017574071884155,
|
|
"learning_rate": 6.046783927198079e-06,
|
|
"loss": 0.646289587020874,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 1.386699107866991,
|
|
"grad_norm": 0.5894978046417236,
|
|
"learning_rate": 6.039791569378488e-06,
|
|
"loss": 0.6435679197311401,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 1.3879967558799675,
|
|
"grad_norm": 0.5931923389434814,
|
|
"learning_rate": 6.032797085448315e-06,
|
|
"loss": 0.6404111981391907,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 1.389294403892944,
|
|
"grad_norm": 0.5930508971214294,
|
|
"learning_rate": 6.025800489709505e-06,
|
|
"loss": 0.6763365268707275,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 1.3905920519059205,
|
|
"grad_norm": 0.621198832988739,
|
|
"learning_rate": 6.018801796468328e-06,
|
|
"loss": 0.7032692432403564,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 1.391889699918897,
|
|
"grad_norm": 0.6337350606918335,
|
|
"learning_rate": 6.0118010200353396e-06,
|
|
"loss": 0.7524909973144531,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 1.3931873479318735,
|
|
"grad_norm": 0.5976428389549255,
|
|
"learning_rate": 6.004798174725358e-06,
|
|
"loss": 0.6851296424865723,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 1.39448499594485,
|
|
"grad_norm": 0.9778940081596375,
|
|
"learning_rate": 5.997793274857427e-06,
|
|
"loss": 0.6498898267745972,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 1.3957826439578265,
|
|
"grad_norm": 0.638106644153595,
|
|
"learning_rate": 5.990786334754795e-06,
|
|
"loss": 0.707371711730957,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 1.397080291970803,
|
|
"grad_norm": 4.414336204528809,
|
|
"learning_rate": 5.983777368744881e-06,
|
|
"loss": 0.6448768973350525,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 1.3983779399837795,
|
|
"grad_norm": 0.5891152620315552,
|
|
"learning_rate": 5.9767663911592454e-06,
|
|
"loss": 0.6236732602119446,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 1.399675587996756,
|
|
"grad_norm": 0.59264075756073,
|
|
"learning_rate": 5.9697534163335645e-06,
|
|
"loss": 0.6284846663475037,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 1.4009732360097322,
|
|
"grad_norm": 0.6076551675796509,
|
|
"learning_rate": 5.9627384586075954e-06,
|
|
"loss": 0.6464221477508545,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 1.402270884022709,
|
|
"grad_norm": 0.6048544645309448,
|
|
"learning_rate": 5.955721532325151e-06,
|
|
"loss": 0.6747769713401794,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 1.4035685320356852,
|
|
"grad_norm": 1.4065572023391724,
|
|
"learning_rate": 5.94870265183407e-06,
|
|
"loss": 0.6566921472549438,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 1.404866180048662,
|
|
"grad_norm": 0.5989380478858948,
|
|
"learning_rate": 5.941681831486188e-06,
|
|
"loss": 0.65166175365448,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 1.4061638280616382,
|
|
"grad_norm": 0.566685676574707,
|
|
"learning_rate": 5.934659085637303e-06,
|
|
"loss": 0.6065230369567871,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 1.4074614760746147,
|
|
"grad_norm": 0.5942695736885071,
|
|
"learning_rate": 5.927634428647154e-06,
|
|
"loss": 0.6362863183021545,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 1.4087591240875912,
|
|
"grad_norm": 0.6072388887405396,
|
|
"learning_rate": 5.920607874879387e-06,
|
|
"loss": 0.6389554738998413,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 1.4100567721005677,
|
|
"grad_norm": 0.6188286542892456,
|
|
"learning_rate": 5.913579438701525e-06,
|
|
"loss": 0.7114623188972473,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 1.4113544201135442,
|
|
"grad_norm": 0.639775812625885,
|
|
"learning_rate": 5.906549134484943e-06,
|
|
"loss": 0.6554163694381714,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 1.4126520681265207,
|
|
"grad_norm": 0.5889431238174438,
|
|
"learning_rate": 5.899516976604832e-06,
|
|
"loss": 0.6516610383987427,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 1.4139497161394972,
|
|
"grad_norm": 0.5683200359344482,
|
|
"learning_rate": 5.892482979440175e-06,
|
|
"loss": 0.6421197652816772,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 1.4152473641524737,
|
|
"grad_norm": 0.6089890003204346,
|
|
"learning_rate": 5.885447157373716e-06,
|
|
"loss": 0.6774452924728394,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 1.4165450121654501,
|
|
"grad_norm": 0.6220733523368835,
|
|
"learning_rate": 5.878409524791931e-06,
|
|
"loss": 0.6213857531547546,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 1.4178426601784266,
|
|
"grad_norm": 0.6474610567092896,
|
|
"learning_rate": 5.871370096084997e-06,
|
|
"loss": 0.6641533970832825,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 1.4191403081914031,
|
|
"grad_norm": 0.5982186198234558,
|
|
"learning_rate": 5.864328885646764e-06,
|
|
"loss": 0.6307400465011597,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 1.4204379562043796,
|
|
"grad_norm": 1.0146141052246094,
|
|
"learning_rate": 5.857285907874725e-06,
|
|
"loss": 0.6501115560531616,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 1.4217356042173561,
|
|
"grad_norm": 0.6026197671890259,
|
|
"learning_rate": 5.850241177169986e-06,
|
|
"loss": 0.6877589225769043,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 1.4230332522303324,
|
|
"grad_norm": 0.6162115931510925,
|
|
"learning_rate": 5.84319470793724e-06,
|
|
"loss": 0.6401875019073486,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 1.424330900243309,
|
|
"grad_norm": 0.5684193968772888,
|
|
"learning_rate": 5.836146514584733e-06,
|
|
"loss": 0.6159685850143433,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 1.4256285482562854,
|
|
"grad_norm": 0.6261927485466003,
|
|
"learning_rate": 5.829096611524235e-06,
|
|
"loss": 0.6478676199913025,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 1.426926196269262,
|
|
"grad_norm": 0.6026703119277954,
|
|
"learning_rate": 5.822045013171015e-06,
|
|
"loss": 0.6607078313827515,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 1.4282238442822384,
|
|
"grad_norm": 0.602356493473053,
|
|
"learning_rate": 5.814991733943805e-06,
|
|
"loss": 0.6449368000030518,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 1.4295214922952149,
|
|
"grad_norm": 0.5841164588928223,
|
|
"learning_rate": 5.807936788264778e-06,
|
|
"loss": 0.6442397236824036,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 1.4308191403081914,
|
|
"grad_norm": 0.6046425104141235,
|
|
"learning_rate": 5.800880190559511e-06,
|
|
"loss": 0.6141000986099243,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 1.4321167883211678,
|
|
"grad_norm": 0.6180353760719299,
|
|
"learning_rate": 5.79382195525696e-06,
|
|
"loss": 0.7307353019714355,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 1.4334144363341443,
|
|
"grad_norm": 0.5996196269989014,
|
|
"learning_rate": 5.786762096789431e-06,
|
|
"loss": 0.6220886707305908,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 1.4347120843471208,
|
|
"grad_norm": 0.6037473678588867,
|
|
"learning_rate": 5.779700629592547e-06,
|
|
"loss": 0.7145535945892334,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 1.4360097323600973,
|
|
"grad_norm": 0.5726904273033142,
|
|
"learning_rate": 5.7726375681052205e-06,
|
|
"loss": 0.6307674646377563,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 1.4373073803730738,
|
|
"grad_norm": 0.6289665102958679,
|
|
"learning_rate": 5.765572926769625e-06,
|
|
"loss": 0.7094706296920776,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 1.4386050283860503,
|
|
"grad_norm": 0.5811914801597595,
|
|
"learning_rate": 5.758506720031163e-06,
|
|
"loss": 0.6041115522384644,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 1.4399026763990268,
|
|
"grad_norm": 0.5376439094543457,
|
|
"learning_rate": 5.751438962338441e-06,
|
|
"loss": 0.5803889036178589,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 1.4412003244120033,
|
|
"grad_norm": 0.5952728390693665,
|
|
"learning_rate": 5.744369668143233e-06,
|
|
"loss": 0.6684442758560181,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 1.4424979724249798,
|
|
"grad_norm": 0.5791693329811096,
|
|
"learning_rate": 5.737298851900457e-06,
|
|
"loss": 0.6404840350151062,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 1.4437956204379563,
|
|
"grad_norm": 0.6007118225097656,
|
|
"learning_rate": 5.730226528068142e-06,
|
|
"loss": 0.6698148846626282,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 1.4450932684509326,
|
|
"grad_norm": 0.613433301448822,
|
|
"learning_rate": 5.7231527111074e-06,
|
|
"loss": 0.7007705569267273,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 1.4463909164639093,
|
|
"grad_norm": 0.5919564962387085,
|
|
"learning_rate": 5.716077415482398e-06,
|
|
"loss": 0.6769901514053345,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 1.4476885644768855,
|
|
"grad_norm": 0.5912166833877563,
|
|
"learning_rate": 5.709000655660324e-06,
|
|
"loss": 0.6436672210693359,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 1.4489862124898623,
|
|
"grad_norm": 0.5603325366973877,
|
|
"learning_rate": 5.7019224461113585e-06,
|
|
"loss": 0.5793130993843079,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 1.4502838605028385,
|
|
"grad_norm": 0.611814558506012,
|
|
"learning_rate": 5.694842801308651e-06,
|
|
"loss": 0.6368833780288696,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 1.451581508515815,
|
|
"grad_norm": 0.5689136385917664,
|
|
"learning_rate": 5.687761735728282e-06,
|
|
"loss": 0.6261428594589233,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 1.4528791565287915,
|
|
"grad_norm": 0.6117684245109558,
|
|
"learning_rate": 5.680679263849241e-06,
|
|
"loss": 0.6463526487350464,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 1.454176804541768,
|
|
"grad_norm": 0.5878109931945801,
|
|
"learning_rate": 5.673595400153385e-06,
|
|
"loss": 0.6132445335388184,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 1.4554744525547445,
|
|
"grad_norm": 0.5826682448387146,
|
|
"learning_rate": 5.666510159125427e-06,
|
|
"loss": 0.6556754112243652,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 1.456772100567721,
|
|
"grad_norm": 0.5753729939460754,
|
|
"learning_rate": 5.65942355525289e-06,
|
|
"loss": 0.6176761388778687,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 1.4580697485806975,
|
|
"grad_norm": 0.7028788924217224,
|
|
"learning_rate": 5.652335603026084e-06,
|
|
"loss": 0.5802330374717712,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 1.459367396593674,
|
|
"grad_norm": 0.5847388505935669,
|
|
"learning_rate": 5.645246316938082e-06,
|
|
"loss": 0.6626067161560059,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 1.4606650446066505,
|
|
"grad_norm": 1.399409294128418,
|
|
"learning_rate": 5.638155711484674e-06,
|
|
"loss": 0.6308712959289551,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 1.461962692619627,
|
|
"grad_norm": 0.602827250957489,
|
|
"learning_rate": 5.631063801164356e-06,
|
|
"loss": 0.6493173241615295,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 1.4632603406326035,
|
|
"grad_norm": 0.7403953075408936,
|
|
"learning_rate": 5.62397060047829e-06,
|
|
"loss": 0.620072603225708,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 1.46455798864558,
|
|
"grad_norm": 0.6334176063537598,
|
|
"learning_rate": 5.6168761239302745e-06,
|
|
"loss": 0.665931761264801,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 1.4658556366585564,
|
|
"grad_norm": 0.6131840944290161,
|
|
"learning_rate": 5.609780386026721e-06,
|
|
"loss": 0.6492164731025696,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 1.4671532846715327,
|
|
"grad_norm": 0.6045870780944824,
|
|
"learning_rate": 5.6026834012766155e-06,
|
|
"loss": 0.6135592460632324,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 1.4684509326845094,
|
|
"grad_norm": 0.650088906288147,
|
|
"learning_rate": 5.595585184191496e-06,
|
|
"loss": 0.7170080542564392,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 1.4697485806974857,
|
|
"grad_norm": 0.5771186351776123,
|
|
"learning_rate": 5.58848574928542e-06,
|
|
"loss": 0.6513093709945679,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 1.4710462287104624,
|
|
"grad_norm": 0.7128145694732666,
|
|
"learning_rate": 5.5813851110749365e-06,
|
|
"loss": 0.6579954624176025,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 1.4723438767234387,
|
|
"grad_norm": 0.5734491348266602,
|
|
"learning_rate": 5.574283284079049e-06,
|
|
"loss": 0.6137959361076355,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 1.4736415247364152,
|
|
"grad_norm": 0.5757655501365662,
|
|
"learning_rate": 5.567180282819201e-06,
|
|
"loss": 0.6633074283599854,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 1.4749391727493917,
|
|
"grad_norm": 0.5958343148231506,
|
|
"learning_rate": 5.560076121819229e-06,
|
|
"loss": 0.6766320466995239,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 1.4762368207623682,
|
|
"grad_norm": 0.5708390474319458,
|
|
"learning_rate": 5.552970815605347e-06,
|
|
"loss": 0.6593270897865295,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 1.4775344687753447,
|
|
"grad_norm": 0.5592367649078369,
|
|
"learning_rate": 5.545864378706106e-06,
|
|
"loss": 0.6107625961303711,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 1.4788321167883212,
|
|
"grad_norm": 0.5908456444740295,
|
|
"learning_rate": 5.53875682565237e-06,
|
|
"loss": 0.612775444984436,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 1.4801297648012977,
|
|
"grad_norm": 0.7283220291137695,
|
|
"learning_rate": 5.5316481709772886e-06,
|
|
"loss": 0.6324783563613892,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 1.4814274128142741,
|
|
"grad_norm": 0.5963947176933289,
|
|
"learning_rate": 5.524538429216258e-06,
|
|
"loss": 0.6906737089157104,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 1.4827250608272506,
|
|
"grad_norm": 0.6059021949768066,
|
|
"learning_rate": 5.517427614906906e-06,
|
|
"loss": 0.6746259331703186,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 1.4840227088402271,
|
|
"grad_norm": 0.5953018069267273,
|
|
"learning_rate": 5.510315742589042e-06,
|
|
"loss": 0.6834631562232971,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 1.4853203568532036,
|
|
"grad_norm": 0.5694923996925354,
|
|
"learning_rate": 5.503202826804647e-06,
|
|
"loss": 0.6960294246673584,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 1.4866180048661801,
|
|
"grad_norm": 0.6007208228111267,
|
|
"learning_rate": 5.496088882097836e-06,
|
|
"loss": 0.657875657081604,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 1.4879156528791566,
|
|
"grad_norm": 0.6081047654151917,
|
|
"learning_rate": 5.488973923014821e-06,
|
|
"loss": 0.6561139225959778,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 1.4892133008921329,
|
|
"grad_norm": 0.5819503664970398,
|
|
"learning_rate": 5.4818579641038974e-06,
|
|
"loss": 0.6176397204399109,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 1.4905109489051096,
|
|
"grad_norm": 0.6077326536178589,
|
|
"learning_rate": 5.474741019915395e-06,
|
|
"loss": 0.6847512722015381,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 1.4918085969180859,
|
|
"grad_norm": 0.6074263453483582,
|
|
"learning_rate": 5.467623105001667e-06,
|
|
"loss": 0.6360629200935364,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 1.4918085969180859,
|
|
"eval_loss": 0.6826658844947815,
|
|
"eval_runtime": 73.0405,
|
|
"eval_samples_per_second": 71.084,
|
|
"eval_steps_per_second": 8.885,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 1.4931062449310626,
|
|
"grad_norm": 0.5855403542518616,
|
|
"learning_rate": 5.460504233917047e-06,
|
|
"loss": 0.6704986095428467,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 1.4944038929440389,
|
|
"grad_norm": 0.6127449870109558,
|
|
"learning_rate": 5.453384421217823e-06,
|
|
"loss": 0.6719274520874023,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 1.4957015409570154,
|
|
"grad_norm": 0.5484548211097717,
|
|
"learning_rate": 5.446263681462213e-06,
|
|
"loss": 0.6012224555015564,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 1.4969991889699918,
|
|
"grad_norm": 0.5728206038475037,
|
|
"learning_rate": 5.439142029210323e-06,
|
|
"loss": 0.6711239218711853,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 1.4982968369829683,
|
|
"grad_norm": 0.5789787769317627,
|
|
"learning_rate": 5.4320194790241335e-06,
|
|
"loss": 0.5949071645736694,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 1.4995944849959448,
|
|
"grad_norm": 0.5778141021728516,
|
|
"learning_rate": 5.424896045467455e-06,
|
|
"loss": 0.6263710260391235,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 1.5008921330089213,
|
|
"grad_norm": 0.5851665139198303,
|
|
"learning_rate": 5.417771743105908e-06,
|
|
"loss": 0.690178632736206,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 1.5021897810218978,
|
|
"grad_norm": 0.620339035987854,
|
|
"learning_rate": 5.4106465865068846e-06,
|
|
"loss": 0.6553722620010376,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 1.5034874290348743,
|
|
"grad_norm": 0.5484940409660339,
|
|
"learning_rate": 5.403520590239527e-06,
|
|
"loss": 0.5462528467178345,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 1.5047850770478508,
|
|
"grad_norm": 0.62648606300354,
|
|
"learning_rate": 5.396393768874696e-06,
|
|
"loss": 0.7103927135467529,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 1.5060827250608273,
|
|
"grad_norm": 0.5696239471435547,
|
|
"learning_rate": 5.389266136984939e-06,
|
|
"loss": 0.6234554648399353,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 1.5073803730738038,
|
|
"grad_norm": 0.6027652025222778,
|
|
"learning_rate": 5.382137709144454e-06,
|
|
"loss": 0.6729198694229126,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 1.50867802108678,
|
|
"grad_norm": 0.5693642497062683,
|
|
"learning_rate": 5.3750084999290755e-06,
|
|
"loss": 0.6457726359367371,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 1.5099756690997568,
|
|
"grad_norm": 1.6674511432647705,
|
|
"learning_rate": 5.3678785239162305e-06,
|
|
"loss": 0.656345009803772,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 1.511273317112733,
|
|
"grad_norm": 0.5577940344810486,
|
|
"learning_rate": 5.360747795684916e-06,
|
|
"loss": 0.5705595016479492,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 1.5125709651257098,
|
|
"grad_norm": 0.5919134616851807,
|
|
"learning_rate": 5.353616329815667e-06,
|
|
"loss": 0.6972566246986389,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 1.513868613138686,
|
|
"grad_norm": 0.6095024347305298,
|
|
"learning_rate": 5.346484140890523e-06,
|
|
"loss": 0.6107922196388245,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 1.5151662611516628,
|
|
"grad_norm": 0.5990864634513855,
|
|
"learning_rate": 5.339351243493008e-06,
|
|
"loss": 0.5962531566619873,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 1.516463909164639,
|
|
"grad_norm": 0.5995983481407166,
|
|
"learning_rate": 5.332217652208093e-06,
|
|
"loss": 0.6228233575820923,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 1.5177615571776155,
|
|
"grad_norm": 0.5965218544006348,
|
|
"learning_rate": 5.325083381622165e-06,
|
|
"loss": 0.6963210105895996,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 1.519059205190592,
|
|
"grad_norm": 0.5758861303329468,
|
|
"learning_rate": 5.317948446322999e-06,
|
|
"loss": 0.58036869764328,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 1.5203568532035685,
|
|
"grad_norm": 0.5857213139533997,
|
|
"learning_rate": 5.310812860899737e-06,
|
|
"loss": 0.6398880481719971,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 1.521654501216545,
|
|
"grad_norm": 0.706536054611206,
|
|
"learning_rate": 5.303676639942841e-06,
|
|
"loss": 0.6162217855453491,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 1.5229521492295215,
|
|
"grad_norm": 0.5781589150428772,
|
|
"learning_rate": 5.296539798044078e-06,
|
|
"loss": 0.6084649562835693,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 1.524249797242498,
|
|
"grad_norm": 0.5943130850791931,
|
|
"learning_rate": 5.289402349796484e-06,
|
|
"loss": 0.6497021913528442,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 1.5255474452554745,
|
|
"grad_norm": 0.5641393065452576,
|
|
"learning_rate": 5.282264309794334e-06,
|
|
"loss": 0.5834084749221802,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 1.526845093268451,
|
|
"grad_norm": 0.5564937591552734,
|
|
"learning_rate": 5.2751256926331115e-06,
|
|
"loss": 0.6279217004776001,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 1.5281427412814275,
|
|
"grad_norm": 0.5945193767547607,
|
|
"learning_rate": 5.267986512909484e-06,
|
|
"loss": 0.6333688497543335,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 1.529440389294404,
|
|
"grad_norm": 0.6081971526145935,
|
|
"learning_rate": 5.2608467852212665e-06,
|
|
"loss": 0.6803103685379028,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 1.5307380373073802,
|
|
"grad_norm": 0.584886908531189,
|
|
"learning_rate": 5.253706524167395e-06,
|
|
"loss": 0.6653470993041992,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 1.532035685320357,
|
|
"grad_norm": 0.8528439998626709,
|
|
"learning_rate": 5.246565744347894e-06,
|
|
"loss": 0.6093430519104004,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 1.5333333333333332,
|
|
"grad_norm": 0.573440432548523,
|
|
"learning_rate": 5.2394244603638536e-06,
|
|
"loss": 0.6251604557037354,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 1.53463098134631,
|
|
"grad_norm": 0.5646257996559143,
|
|
"learning_rate": 5.232282686817392e-06,
|
|
"loss": 0.5792976021766663,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 1.5359286293592862,
|
|
"grad_norm": 0.5741854310035706,
|
|
"learning_rate": 5.2251404383116265e-06,
|
|
"loss": 0.6484105587005615,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 1.537226277372263,
|
|
"grad_norm": 0.5606357455253601,
|
|
"learning_rate": 5.217997729450649e-06,
|
|
"loss": 0.6315451860427856,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 1.5385239253852392,
|
|
"grad_norm": 0.5854267477989197,
|
|
"learning_rate": 5.21085457483949e-06,
|
|
"loss": 0.6010391712188721,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 1.5398215733982157,
|
|
"grad_norm": 0.6120538711547852,
|
|
"learning_rate": 5.203710989084093e-06,
|
|
"loss": 0.6872812509536743,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 1.5411192214111922,
|
|
"grad_norm": 0.6018205881118774,
|
|
"learning_rate": 5.196566986791286e-06,
|
|
"loss": 0.6842239499092102,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 1.5424168694241687,
|
|
"grad_norm": 0.5673507452011108,
|
|
"learning_rate": 5.189422582568742e-06,
|
|
"loss": 0.6135258674621582,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 1.5437145174371452,
|
|
"grad_norm": 0.5736320614814758,
|
|
"learning_rate": 5.182277791024959e-06,
|
|
"loss": 0.6442878246307373,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 1.5450121654501217,
|
|
"grad_norm": 0.5806821584701538,
|
|
"learning_rate": 5.175132626769229e-06,
|
|
"loss": 0.6409611701965332,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 1.5463098134630981,
|
|
"grad_norm": 0.6098542213439941,
|
|
"learning_rate": 5.167987104411605e-06,
|
|
"loss": 0.6895368695259094,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 1.5476074614760746,
|
|
"grad_norm": 0.6138260364532471,
|
|
"learning_rate": 5.160841238562872e-06,
|
|
"loss": 0.6403982043266296,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 1.5489051094890511,
|
|
"grad_norm": 0.5820956826210022,
|
|
"learning_rate": 5.153695043834513e-06,
|
|
"loss": 0.6204026937484741,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 1.5502027575020276,
|
|
"grad_norm": 0.5773366093635559,
|
|
"learning_rate": 5.146548534838691e-06,
|
|
"loss": 0.645720899105072,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 1.5515004055150041,
|
|
"grad_norm": 0.5759880542755127,
|
|
"learning_rate": 5.139401726188208e-06,
|
|
"loss": 0.5854007601737976,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 1.5527980535279804,
|
|
"grad_norm": 0.584076464176178,
|
|
"learning_rate": 5.132254632496477e-06,
|
|
"loss": 0.662139892578125,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 1.554095701540957,
|
|
"grad_norm": 0.6095874905586243,
|
|
"learning_rate": 5.125107268377498e-06,
|
|
"loss": 0.6662768125534058,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 1.5553933495539334,
|
|
"grad_norm": 0.5676849484443665,
|
|
"learning_rate": 5.117959648445821e-06,
|
|
"loss": 0.593256413936615,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 1.55669099756691,
|
|
"grad_norm": 0.9843289852142334,
|
|
"learning_rate": 5.1108117873165175e-06,
|
|
"loss": 0.6919536590576172,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 1.5579886455798864,
|
|
"grad_norm": 0.5795591473579407,
|
|
"learning_rate": 5.1036636996051556e-06,
|
|
"loss": 0.6274605989456177,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 1.559286293592863,
|
|
"grad_norm": 0.5853375196456909,
|
|
"learning_rate": 5.096515399927767e-06,
|
|
"loss": 0.6070197820663452,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 1.5605839416058394,
|
|
"grad_norm": 0.6098043918609619,
|
|
"learning_rate": 5.089366902900813e-06,
|
|
"loss": 0.6619631052017212,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 1.5618815896188158,
|
|
"grad_norm": 0.6142205595970154,
|
|
"learning_rate": 5.082218223141162e-06,
|
|
"loss": 0.6737958192825317,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 1.5631792376317923,
|
|
"grad_norm": 0.5625759363174438,
|
|
"learning_rate": 5.075069375266055e-06,
|
|
"loss": 0.590381383895874,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 1.5644768856447688,
|
|
"grad_norm": 0.8771416544914246,
|
|
"learning_rate": 5.067920373893075e-06,
|
|
"loss": 0.5482794046401978,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 1.5657745336577453,
|
|
"grad_norm": 0.8982309699058533,
|
|
"learning_rate": 5.060771233640122e-06,
|
|
"loss": 0.6464008092880249,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 1.5670721816707218,
|
|
"grad_norm": 0.6009715795516968,
|
|
"learning_rate": 5.0536219691253776e-06,
|
|
"loss": 0.5735194683074951,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 1.5683698296836983,
|
|
"grad_norm": 0.5980544686317444,
|
|
"learning_rate": 5.046472594967279e-06,
|
|
"loss": 0.66939377784729,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 1.5696674776966748,
|
|
"grad_norm": 0.6162261962890625,
|
|
"learning_rate": 5.039323125784485e-06,
|
|
"loss": 0.6994204521179199,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 1.5709651257096513,
|
|
"grad_norm": 0.617485761642456,
|
|
"learning_rate": 5.0321735761958515e-06,
|
|
"loss": 0.633686363697052,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 1.5722627737226276,
|
|
"grad_norm": 0.5924466848373413,
|
|
"learning_rate": 5.025023960820399e-06,
|
|
"loss": 0.6124377250671387,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 1.5735604217356043,
|
|
"grad_norm": 0.6040006279945374,
|
|
"learning_rate": 5.01787429427728e-06,
|
|
"loss": 0.6491550207138062,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 1.5748580697485806,
|
|
"grad_norm": 0.5883519649505615,
|
|
"learning_rate": 5.010724591185752e-06,
|
|
"loss": 0.6150457262992859,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 1.5761557177615573,
|
|
"grad_norm": 0.6369590759277344,
|
|
"learning_rate": 5.003574866165149e-06,
|
|
"loss": 0.6079261898994446,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 1.5774533657745335,
|
|
"grad_norm": 0.6027874946594238,
|
|
"learning_rate": 4.9964251338348515e-06,
|
|
"loss": 0.6851716637611389,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 1.5787510137875103,
|
|
"grad_norm": 0.5862027406692505,
|
|
"learning_rate": 4.989275408814251e-06,
|
|
"loss": 0.5923515558242798,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 1.5800486618004865,
|
|
"grad_norm": 0.6328719854354858,
|
|
"learning_rate": 4.982125705722722e-06,
|
|
"loss": 0.6643452644348145,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 1.5813463098134632,
|
|
"grad_norm": 0.6100243330001831,
|
|
"learning_rate": 4.974976039179604e-06,
|
|
"loss": 0.6416760683059692,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 1.5826439578264395,
|
|
"grad_norm": 0.5908761620521545,
|
|
"learning_rate": 4.967826423804151e-06,
|
|
"loss": 0.643882155418396,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 1.583941605839416,
|
|
"grad_norm": 0.5938880443572998,
|
|
"learning_rate": 4.960676874215518e-06,
|
|
"loss": 0.6157772541046143,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 1.5852392538523925,
|
|
"grad_norm": 0.5930868983268738,
|
|
"learning_rate": 4.953527405032723e-06,
|
|
"loss": 0.5862378478050232,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 1.586536901865369,
|
|
"grad_norm": 0.589255690574646,
|
|
"learning_rate": 4.946378030874625e-06,
|
|
"loss": 0.6135423183441162,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 1.5878345498783455,
|
|
"grad_norm": 0.5754698514938354,
|
|
"learning_rate": 4.9392287663598785e-06,
|
|
"loss": 0.6066054701805115,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 1.589132197891322,
|
|
"grad_norm": 0.6168340444564819,
|
|
"learning_rate": 4.932079626106926e-06,
|
|
"loss": 0.683946967124939,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 1.5904298459042985,
|
|
"grad_norm": 0.5985932350158691,
|
|
"learning_rate": 4.924930624733947e-06,
|
|
"loss": 0.6772314310073853,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 1.591727493917275,
|
|
"grad_norm": 0.6024285554885864,
|
|
"learning_rate": 4.91778177685884e-06,
|
|
"loss": 0.652093768119812,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 1.5930251419302515,
|
|
"grad_norm": 0.6394546627998352,
|
|
"learning_rate": 4.910633097099188e-06,
|
|
"loss": 0.6307955384254456,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 1.5943227899432277,
|
|
"grad_norm": 0.5471766591072083,
|
|
"learning_rate": 4.903484600072236e-06,
|
|
"loss": 0.5805978775024414,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 1.5956204379562045,
|
|
"grad_norm": 0.5722350478172302,
|
|
"learning_rate": 4.896336300394845e-06,
|
|
"loss": 0.6355024576187134,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 1.5969180859691807,
|
|
"grad_norm": 0.6039298176765442,
|
|
"learning_rate": 4.889188212683483e-06,
|
|
"loss": 0.6441288590431213,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 1.5982157339821574,
|
|
"grad_norm": 0.6237229704856873,
|
|
"learning_rate": 4.882040351554181e-06,
|
|
"loss": 0.6681591272354126,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 1.5995133819951337,
|
|
"grad_norm": 0.6051374673843384,
|
|
"learning_rate": 4.874892731622503e-06,
|
|
"loss": 0.6615642309188843,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 1.6008110300081104,
|
|
"grad_norm": 0.5937628746032715,
|
|
"learning_rate": 4.867745367503524e-06,
|
|
"loss": 0.6506084203720093,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 1.6021086780210867,
|
|
"grad_norm": 0.5851325988769531,
|
|
"learning_rate": 4.860598273811793e-06,
|
|
"loss": 0.6443929076194763,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 1.6034063260340634,
|
|
"grad_norm": 0.5777382850646973,
|
|
"learning_rate": 4.8534514651613104e-06,
|
|
"loss": 0.635840892791748,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 1.6047039740470397,
|
|
"grad_norm": 0.5909644365310669,
|
|
"learning_rate": 4.846304956165488e-06,
|
|
"loss": 0.6581849455833435,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 1.6060016220600162,
|
|
"grad_norm": 0.5992142558097839,
|
|
"learning_rate": 4.83915876143713e-06,
|
|
"loss": 0.6690875291824341,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 1.6072992700729927,
|
|
"grad_norm": 1.2001910209655762,
|
|
"learning_rate": 4.832012895588395e-06,
|
|
"loss": 0.6264456510543823,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 1.6085969180859692,
|
|
"grad_norm": 0.6141691207885742,
|
|
"learning_rate": 4.824867373230772e-06,
|
|
"loss": 0.670561671257019,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 1.6098945660989457,
|
|
"grad_norm": 0.5834086537361145,
|
|
"learning_rate": 4.817722208975041e-06,
|
|
"loss": 0.6045785546302795,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 1.6111922141119221,
|
|
"grad_norm": 0.6060406565666199,
|
|
"learning_rate": 4.81057741743126e-06,
|
|
"loss": 0.5803914666175842,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 1.6124898621248986,
|
|
"grad_norm": 0.5703381299972534,
|
|
"learning_rate": 4.8034330132087155e-06,
|
|
"loss": 0.6377118825912476,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 1.6137875101378751,
|
|
"grad_norm": 0.6010227203369141,
|
|
"learning_rate": 4.7962890109159085e-06,
|
|
"loss": 0.6981620788574219,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 1.6150851581508516,
|
|
"grad_norm": 0.6107721924781799,
|
|
"learning_rate": 4.789145425160511e-06,
|
|
"loss": 0.6511063575744629,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 1.616382806163828,
|
|
"grad_norm": 0.5982344150543213,
|
|
"learning_rate": 4.782002270549354e-06,
|
|
"loss": 0.6058223247528076,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 1.6176804541768046,
|
|
"grad_norm": 0.7359511256217957,
|
|
"learning_rate": 4.774859561688374e-06,
|
|
"loss": 0.7255959510803223,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 1.6189781021897809,
|
|
"grad_norm": 0.6240600347518921,
|
|
"learning_rate": 4.767717313182611e-06,
|
|
"loss": 0.695855975151062,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 1.6202757502027576,
|
|
"grad_norm": 0.6217120885848999,
|
|
"learning_rate": 4.760575539636147e-06,
|
|
"loss": 0.7245144844055176,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 1.6215733982157339,
|
|
"grad_norm": 0.6095402240753174,
|
|
"learning_rate": 4.753434255652108e-06,
|
|
"loss": 0.6345319151878357,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 1.6228710462287106,
|
|
"grad_norm": 0.5852973461151123,
|
|
"learning_rate": 4.746293475832607e-06,
|
|
"loss": 0.7055230736732483,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 1.6241686942416869,
|
|
"grad_norm": 0.5857930779457092,
|
|
"learning_rate": 4.739153214778735e-06,
|
|
"loss": 0.611079216003418,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 1.6254663422546636,
|
|
"grad_norm": 0.5896874070167542,
|
|
"learning_rate": 4.732013487090517e-06,
|
|
"loss": 0.6760262250900269,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 1.6267639902676398,
|
|
"grad_norm": 0.5715303421020508,
|
|
"learning_rate": 4.72487430736689e-06,
|
|
"loss": 0.6258687376976013,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 1.6280616382806163,
|
|
"grad_norm": 0.6083521246910095,
|
|
"learning_rate": 4.7177356902056675e-06,
|
|
"loss": 0.6745297908782959,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 1.6293592862935928,
|
|
"grad_norm": 0.5798436403274536,
|
|
"learning_rate": 4.7105976502035175e-06,
|
|
"loss": 0.5955469608306885,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 1.6306569343065693,
|
|
"grad_norm": 0.5836136341094971,
|
|
"learning_rate": 4.703460201955924e-06,
|
|
"loss": 0.6397416591644287,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 1.6319545823195458,
|
|
"grad_norm": 0.5983015894889832,
|
|
"learning_rate": 4.696323360057162e-06,
|
|
"loss": 0.6736359596252441,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 1.6332522303325223,
|
|
"grad_norm": 0.5725530982017517,
|
|
"learning_rate": 4.689187139100265e-06,
|
|
"loss": 0.6878089904785156,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 1.6345498783454988,
|
|
"grad_norm": 0.5805061459541321,
|
|
"learning_rate": 4.682051553677001e-06,
|
|
"loss": 0.6194028854370117,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 1.6358475263584753,
|
|
"grad_norm": 0.6036574840545654,
|
|
"learning_rate": 4.6749166183778375e-06,
|
|
"loss": 0.634255588054657,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 1.6371451743714518,
|
|
"grad_norm": 0.8983334898948669,
|
|
"learning_rate": 4.667782347791908e-06,
|
|
"loss": 0.6297205686569214,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 1.638442822384428,
|
|
"grad_norm": 0.5956529378890991,
|
|
"learning_rate": 4.660648756506993e-06,
|
|
"loss": 0.6427313089370728,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 1.6397404703974048,
|
|
"grad_norm": 0.5881230235099792,
|
|
"learning_rate": 4.653515859109478e-06,
|
|
"loss": 0.6450825929641724,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 1.641038118410381,
|
|
"grad_norm": 0.5867661833763123,
|
|
"learning_rate": 4.646383670184336e-06,
|
|
"loss": 0.6814026832580566,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 1.6423357664233578,
|
|
"grad_norm": 0.6160328388214111,
|
|
"learning_rate": 4.639252204315086e-06,
|
|
"loss": 0.6689074039459229,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 1.643633414436334,
|
|
"grad_norm": 0.582465410232544,
|
|
"learning_rate": 4.632121476083772e-06,
|
|
"loss": 0.6467956304550171,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 1.6449310624493108,
|
|
"grad_norm": 0.5506557822227478,
|
|
"learning_rate": 4.624991500070925e-06,
|
|
"loss": 0.6649973392486572,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 1.646228710462287,
|
|
"grad_norm": 0.600159227848053,
|
|
"learning_rate": 4.617862290855548e-06,
|
|
"loss": 0.6144022345542908,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 1.6475263584752637,
|
|
"grad_norm": 1.0451817512512207,
|
|
"learning_rate": 4.610733863015063e-06,
|
|
"loss": 0.6827117800712585,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 1.64882400648824,
|
|
"grad_norm": 0.5652205944061279,
|
|
"learning_rate": 4.6036062311253055e-06,
|
|
"loss": 0.5971782207489014,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 1.6501216545012165,
|
|
"grad_norm": 0.686071515083313,
|
|
"learning_rate": 4.596479409760474e-06,
|
|
"loss": 0.5615164041519165,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 1.651419302514193,
|
|
"grad_norm": 0.5449540019035339,
|
|
"learning_rate": 4.589353413493118e-06,
|
|
"loss": 0.6300219297409058,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 1.6527169505271695,
|
|
"grad_norm": 0.6144797205924988,
|
|
"learning_rate": 4.582228256894093e-06,
|
|
"loss": 0.6373116970062256,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 1.654014598540146,
|
|
"grad_norm": 0.6170778274536133,
|
|
"learning_rate": 4.575103954532547e-06,
|
|
"loss": 0.6746265888214111,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 1.6553122465531225,
|
|
"grad_norm": 0.5726920366287231,
|
|
"learning_rate": 4.567980520975867e-06,
|
|
"loss": 0.598582923412323,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 1.656609894566099,
|
|
"grad_norm": 0.59462571144104,
|
|
"learning_rate": 4.560857970789679e-06,
|
|
"loss": 0.5969716906547546,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 1.6579075425790755,
|
|
"grad_norm": 0.5755953192710876,
|
|
"learning_rate": 4.553736318537789e-06,
|
|
"loss": 0.6542321443557739,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 1.659205190592052,
|
|
"grad_norm": 0.6138618588447571,
|
|
"learning_rate": 4.546615578782178e-06,
|
|
"loss": 0.6415365934371948,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 1.6605028386050282,
|
|
"grad_norm": 0.5503448247909546,
|
|
"learning_rate": 4.5394957660829554e-06,
|
|
"loss": 0.6184664964675903,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 1.661800486618005,
|
|
"grad_norm": 0.5893129110336304,
|
|
"learning_rate": 4.532376894998335e-06,
|
|
"loss": 0.6324410438537598,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 1.6630981346309812,
|
|
"grad_norm": 0.6124705672264099,
|
|
"learning_rate": 4.5252589800846054e-06,
|
|
"loss": 0.6756390333175659,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 1.664395782643958,
|
|
"grad_norm": 0.598412275314331,
|
|
"learning_rate": 4.518142035896106e-06,
|
|
"loss": 0.7126625776290894,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 1.6656934306569342,
|
|
"grad_norm": 0.599096417427063,
|
|
"learning_rate": 4.5110260769851804e-06,
|
|
"loss": 0.6402862071990967,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 1.666991078669911,
|
|
"grad_norm": 0.5952857136726379,
|
|
"learning_rate": 4.503911117902167e-06,
|
|
"loss": 0.6510819792747498,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 1.6682887266828872,
|
|
"grad_norm": 0.5893689393997192,
|
|
"learning_rate": 4.496797173195354e-06,
|
|
"loss": 0.6236964464187622,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 1.669586374695864,
|
|
"grad_norm": 0.5871599316596985,
|
|
"learning_rate": 4.489684257410959e-06,
|
|
"loss": 0.6143825054168701,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 1.6708840227088402,
|
|
"grad_norm": 0.5756003260612488,
|
|
"learning_rate": 4.482572385093096e-06,
|
|
"loss": 0.6664775609970093,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 1.6721816707218167,
|
|
"grad_norm": 0.6174732446670532,
|
|
"learning_rate": 4.475461570783741e-06,
|
|
"loss": 0.6171724200248718,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 1.6734793187347932,
|
|
"grad_norm": 0.6114921569824219,
|
|
"learning_rate": 4.468351829022713e-06,
|
|
"loss": 0.7615275382995605,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 1.6747769667477697,
|
|
"grad_norm": 0.6558356285095215,
|
|
"learning_rate": 4.46124317434763e-06,
|
|
"loss": 0.6879911422729492,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 1.6760746147607462,
|
|
"grad_norm": 0.5599299669265747,
|
|
"learning_rate": 4.454135621293895e-06,
|
|
"loss": 0.6413300633430481,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 1.6773722627737226,
|
|
"grad_norm": 0.5664532780647278,
|
|
"learning_rate": 4.447029184394654e-06,
|
|
"loss": 0.5328360795974731,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 1.6786699107866991,
|
|
"grad_norm": 0.5689435005187988,
|
|
"learning_rate": 4.439923878180772e-06,
|
|
"loss": 0.6179879903793335,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 1.6799675587996756,
|
|
"grad_norm": 0.7659060955047607,
|
|
"learning_rate": 4.4328197171808e-06,
|
|
"loss": 0.6246920824050903,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 1.6812652068126521,
|
|
"grad_norm": 0.5884883403778076,
|
|
"learning_rate": 4.425716715920952e-06,
|
|
"loss": 0.6561876535415649,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 1.6825628548256284,
|
|
"grad_norm": 0.604040801525116,
|
|
"learning_rate": 4.418614888925064e-06,
|
|
"loss": 0.6797306537628174,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 1.683860502838605,
|
|
"grad_norm": 0.6084474921226501,
|
|
"learning_rate": 4.4115142507145806e-06,
|
|
"loss": 0.6703431606292725,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 1.6851581508515814,
|
|
"grad_norm": 0.5863416790962219,
|
|
"learning_rate": 4.4044148158085046e-06,
|
|
"loss": 0.6162433624267578,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 1.686455798864558,
|
|
"grad_norm": 0.6356022953987122,
|
|
"learning_rate": 4.397316598723385e-06,
|
|
"loss": 0.7044586539268494,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 1.6877534468775344,
|
|
"grad_norm": 0.625541627407074,
|
|
"learning_rate": 4.39021961397328e-06,
|
|
"loss": 0.6772735714912415,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 1.689051094890511,
|
|
"grad_norm": 0.6222056746482849,
|
|
"learning_rate": 4.383123876069726e-06,
|
|
"loss": 0.6994260549545288,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 1.6903487429034874,
|
|
"grad_norm": 0.6140106916427612,
|
|
"learning_rate": 4.376029399521711e-06,
|
|
"loss": 0.6723775863647461,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 1.691646390916464,
|
|
"grad_norm": 0.665780782699585,
|
|
"learning_rate": 4.368936198835646e-06,
|
|
"loss": 0.6295307278633118,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 1.6929440389294403,
|
|
"grad_norm": 0.5935512781143188,
|
|
"learning_rate": 4.361844288515327e-06,
|
|
"loss": 0.6478678584098816,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 1.6942416869424168,
|
|
"grad_norm": 0.6001803874969482,
|
|
"learning_rate": 4.354753683061921e-06,
|
|
"loss": 0.6501032710075378,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 1.6955393349553933,
|
|
"grad_norm": 0.5884422063827515,
|
|
"learning_rate": 4.347664396973917e-06,
|
|
"loss": 0.5854666829109192,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 1.6968369829683698,
|
|
"grad_norm": 0.5774276256561279,
|
|
"learning_rate": 4.340576444747114e-06,
|
|
"loss": 0.6706461906433105,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 1.6981346309813463,
|
|
"grad_norm": 0.6317939162254333,
|
|
"learning_rate": 4.333489840874575e-06,
|
|
"loss": 0.6367801427841187,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 1.6994322789943228,
|
|
"grad_norm": 0.5990278720855713,
|
|
"learning_rate": 4.326404599846618e-06,
|
|
"loss": 0.6113296747207642,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 1.7007299270072993,
|
|
"grad_norm": 0.5930926203727722,
|
|
"learning_rate": 4.319320736150762e-06,
|
|
"loss": 0.658935546875,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 1.7020275750202758,
|
|
"grad_norm": 0.5893100500106812,
|
|
"learning_rate": 4.3122382642717196e-06,
|
|
"loss": 0.6707964539527893,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 1.7033252230332523,
|
|
"grad_norm": 0.6219534277915955,
|
|
"learning_rate": 4.305157198691351e-06,
|
|
"loss": 0.6915128231048584,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 1.7046228710462286,
|
|
"grad_norm": 0.5844510197639465,
|
|
"learning_rate": 4.298077553888644e-06,
|
|
"loss": 0.6463670134544373,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 1.7059205190592053,
|
|
"grad_norm": 0.590699315071106,
|
|
"learning_rate": 4.290999344339678e-06,
|
|
"loss": 0.6447714567184448,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 1.7072181670721815,
|
|
"grad_norm": 0.7812482714653015,
|
|
"learning_rate": 4.283922584517603e-06,
|
|
"loss": 0.6600894927978516,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 1.7085158150851583,
|
|
"grad_norm": 0.5863601565361023,
|
|
"learning_rate": 4.276847288892601e-06,
|
|
"loss": 0.6242765784263611,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 1.7098134630981345,
|
|
"grad_norm": 0.5812450647354126,
|
|
"learning_rate": 4.269773471931858e-06,
|
|
"loss": 0.6475106477737427,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 1.7111111111111112,
|
|
"grad_norm": 0.5987546443939209,
|
|
"learning_rate": 4.262701148099544e-06,
|
|
"loss": 0.6834150552749634,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 1.7124087591240875,
|
|
"grad_norm": 0.5713450312614441,
|
|
"learning_rate": 4.255630331856768e-06,
|
|
"loss": 0.5877612829208374,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 1.7137064071370642,
|
|
"grad_norm": 0.5582994818687439,
|
|
"learning_rate": 4.248561037661561e-06,
|
|
"loss": 0.5848795175552368,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 1.7150040551500405,
|
|
"grad_norm": 0.5713660717010498,
|
|
"learning_rate": 4.241493279968838e-06,
|
|
"loss": 0.6386708617210388,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 1.716301703163017,
|
|
"grad_norm": 0.5673105716705322,
|
|
"learning_rate": 4.234427073230377e-06,
|
|
"loss": 0.6179746389389038,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 1.7175993511759935,
|
|
"grad_norm": 0.5679452419281006,
|
|
"learning_rate": 4.22736243189478e-06,
|
|
"loss": 0.641147255897522,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 1.71889699918897,
|
|
"grad_norm": 0.608302652835846,
|
|
"learning_rate": 4.220299370407454e-06,
|
|
"loss": 0.6888396143913269,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 1.7201946472019465,
|
|
"grad_norm": 0.5650665163993835,
|
|
"learning_rate": 4.2132379032105695e-06,
|
|
"loss": 0.651650607585907,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 1.721492295214923,
|
|
"grad_norm": 0.561650812625885,
|
|
"learning_rate": 4.206178044743041e-06,
|
|
"loss": 0.6115202307701111,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 1.7227899432278995,
|
|
"grad_norm": 0.5860607624053955,
|
|
"learning_rate": 4.19911980944049e-06,
|
|
"loss": 0.6547002792358398,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 1.724087591240876,
|
|
"grad_norm": 0.7003436088562012,
|
|
"learning_rate": 4.1920632117352235e-06,
|
|
"loss": 0.6392462253570557,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 1.7253852392538525,
|
|
"grad_norm": 0.5677862763404846,
|
|
"learning_rate": 4.185008266056195e-06,
|
|
"loss": 0.5821945667266846,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 1.7266828872668287,
|
|
"grad_norm": 0.587795615196228,
|
|
"learning_rate": 4.177954986828987e-06,
|
|
"loss": 0.6519031524658203,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 1.7279805352798054,
|
|
"grad_norm": 0.5895066857337952,
|
|
"learning_rate": 4.170903388475766e-06,
|
|
"loss": 0.6622262597084045,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 1.7292781832927817,
|
|
"grad_norm": 0.5911295413970947,
|
|
"learning_rate": 4.163853485415269e-06,
|
|
"loss": 0.6385645866394043,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 1.7305758313057584,
|
|
"grad_norm": 0.6040472984313965,
|
|
"learning_rate": 4.156805292062762e-06,
|
|
"loss": 0.6997763514518738,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 1.7318734793187347,
|
|
"grad_norm": 0.6030855178833008,
|
|
"learning_rate": 4.1497588228300165e-06,
|
|
"loss": 0.6099704504013062,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 1.7331711273317114,
|
|
"grad_norm": 0.5850874781608582,
|
|
"learning_rate": 4.142714092125277e-06,
|
|
"loss": 0.5748507380485535,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 1.7344687753446877,
|
|
"grad_norm": 0.5881203413009644,
|
|
"learning_rate": 4.135671114353239e-06,
|
|
"loss": 0.6896364688873291,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 1.7357664233576642,
|
|
"grad_norm": 0.5428244471549988,
|
|
"learning_rate": 4.128629903915004e-06,
|
|
"loss": 0.5673160552978516,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 1.7370640713706407,
|
|
"grad_norm": 0.6348845362663269,
|
|
"learning_rate": 4.121590475208071e-06,
|
|
"loss": 0.6452966928482056,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 1.7383617193836172,
|
|
"grad_norm": 0.5799127221107483,
|
|
"learning_rate": 4.114552842626285e-06,
|
|
"loss": 0.626937747001648,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 1.7396593673965937,
|
|
"grad_norm": 0.5999795198440552,
|
|
"learning_rate": 4.107517020559827e-06,
|
|
"loss": 0.6316832900047302,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 1.7409570154095702,
|
|
"grad_norm": 3.404263734817505,
|
|
"learning_rate": 4.1004830233951696e-06,
|
|
"loss": 0.6446040868759155,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 1.7422546634225466,
|
|
"grad_norm": 0.5750575661659241,
|
|
"learning_rate": 4.0934508655150585e-06,
|
|
"loss": 0.6410173177719116,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 1.7435523114355231,
|
|
"grad_norm": 0.612946093082428,
|
|
"learning_rate": 4.086420561298476e-06,
|
|
"loss": 0.7256200313568115,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 1.7448499594484996,
|
|
"grad_norm": 0.5811024904251099,
|
|
"learning_rate": 4.079392125120613e-06,
|
|
"loss": 0.6546262502670288,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 1.7461476074614761,
|
|
"grad_norm": 0.6089962124824524,
|
|
"learning_rate": 4.072365571352847e-06,
|
|
"loss": 0.5643700957298279,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 1.7474452554744526,
|
|
"grad_norm": 0.5598763227462769,
|
|
"learning_rate": 4.065340914362697e-06,
|
|
"loss": 0.6210203170776367,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 1.748742903487429,
|
|
"grad_norm": 0.5718949437141418,
|
|
"learning_rate": 4.058318168513813e-06,
|
|
"loss": 0.6246052980422974,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 1.7500405515004056,
|
|
"grad_norm": 0.5816182494163513,
|
|
"learning_rate": 4.05129734816593e-06,
|
|
"loss": 0.6502724289894104,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 1.7513381995133819,
|
|
"grad_norm": 0.6006066799163818,
|
|
"learning_rate": 4.04427846767485e-06,
|
|
"loss": 0.6196832060813904,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 1.7526358475263586,
|
|
"grad_norm": 0.6209701299667358,
|
|
"learning_rate": 4.037261541392405e-06,
|
|
"loss": 0.6615033149719238,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 1.7539334955393349,
|
|
"grad_norm": 0.5778906345367432,
|
|
"learning_rate": 4.030246583666437e-06,
|
|
"loss": 0.600303053855896,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 1.7552311435523116,
|
|
"grad_norm": 0.5654350519180298,
|
|
"learning_rate": 4.023233608840755e-06,
|
|
"loss": 0.6526889801025391,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 1.7565287915652879,
|
|
"grad_norm": 0.604720413684845,
|
|
"learning_rate": 4.016222631255121e-06,
|
|
"loss": 0.6632093191146851,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 1.7578264395782643,
|
|
"grad_norm": 0.5776406526565552,
|
|
"learning_rate": 4.0092136652452054e-06,
|
|
"loss": 0.5856695175170898,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 1.7591240875912408,
|
|
"grad_norm": 0.5833093523979187,
|
|
"learning_rate": 4.0022067251425736e-06,
|
|
"loss": 0.7012051939964294,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 1.7604217356042173,
|
|
"grad_norm": 0.6321353912353516,
|
|
"learning_rate": 3.9952018252746424e-06,
|
|
"loss": 0.6692728996276855,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 1.7617193836171938,
|
|
"grad_norm": 0.5867600440979004,
|
|
"learning_rate": 3.988198979964662e-06,
|
|
"loss": 0.6333553791046143,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 1.7630170316301703,
|
|
"grad_norm": 0.5640849471092224,
|
|
"learning_rate": 3.981198203531673e-06,
|
|
"loss": 0.6600401401519775,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 1.7643146796431468,
|
|
"grad_norm": 0.5749746561050415,
|
|
"learning_rate": 3.974199510290498e-06,
|
|
"loss": 0.600135087966919,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 1.7656123276561233,
|
|
"grad_norm": 0.6021872162818909,
|
|
"learning_rate": 3.967202914551688e-06,
|
|
"loss": 0.6514877676963806,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 1.7669099756690998,
|
|
"grad_norm": 1.1252561807632446,
|
|
"learning_rate": 3.960208430621514e-06,
|
|
"loss": 0.6247175931930542,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 1.7682076236820763,
|
|
"grad_norm": 0.6089026927947998,
|
|
"learning_rate": 3.953216072801922e-06,
|
|
"loss": 0.6505289077758789,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 1.7695052716950528,
|
|
"grad_norm": 0.613433301448822,
|
|
"learning_rate": 3.946225855390518e-06,
|
|
"loss": 0.6519597768783569,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 1.770802919708029,
|
|
"grad_norm": 0.6230673789978027,
|
|
"learning_rate": 3.9392377926805226e-06,
|
|
"loss": 0.6527152061462402,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 1.7721005677210058,
|
|
"grad_norm": 0.629035472869873,
|
|
"learning_rate": 3.932251898960759e-06,
|
|
"loss": 0.6801344156265259,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 1.773398215733982,
|
|
"grad_norm": 0.586634635925293,
|
|
"learning_rate": 3.925268188515611e-06,
|
|
"loss": 0.6678798794746399,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 1.7746958637469588,
|
|
"grad_norm": 0.691630482673645,
|
|
"learning_rate": 3.918286675624998e-06,
|
|
"loss": 0.6675139665603638,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 1.775993511759935,
|
|
"grad_norm": 0.5624348521232605,
|
|
"learning_rate": 3.911307374564346e-06,
|
|
"loss": 0.5508803129196167,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 1.7772911597729117,
|
|
"grad_norm": 0.9164373874664307,
|
|
"learning_rate": 3.904330299604562e-06,
|
|
"loss": 0.6670984625816345,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 1.778588807785888,
|
|
"grad_norm": 0.620689868927002,
|
|
"learning_rate": 3.897355465011996e-06,
|
|
"loss": 0.6593863368034363,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 1.7798864557988645,
|
|
"grad_norm": 0.5467659831047058,
|
|
"learning_rate": 3.89038288504842e-06,
|
|
"loss": 0.5556522607803345,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 1.781184103811841,
|
|
"grad_norm": 0.5498706698417664,
|
|
"learning_rate": 3.883412573970995e-06,
|
|
"loss": 0.6222935914993286,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 1.7824817518248175,
|
|
"grad_norm": 0.5786144137382507,
|
|
"learning_rate": 3.876444546032242e-06,
|
|
"loss": 0.6003856658935547,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 1.783779399837794,
|
|
"grad_norm": 0.5900736451148987,
|
|
"learning_rate": 3.8694788154800185e-06,
|
|
"loss": 0.6151521801948547,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 1.7850770478507705,
|
|
"grad_norm": 0.5880241394042969,
|
|
"learning_rate": 3.862515396557476e-06,
|
|
"loss": 0.6527180671691895,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 1.786374695863747,
|
|
"grad_norm": 0.6083548069000244,
|
|
"learning_rate": 3.855554303503047e-06,
|
|
"loss": 0.6581445932388306,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 1.7876723438767235,
|
|
"grad_norm": 0.5609106421470642,
|
|
"learning_rate": 3.848595550550401e-06,
|
|
"loss": 0.6590725779533386,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 1.7889699918897,
|
|
"grad_norm": 0.6204782724380493,
|
|
"learning_rate": 3.841639151928431e-06,
|
|
"loss": 0.6809993386268616,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 1.7902676399026762,
|
|
"grad_norm": 0.5831668972969055,
|
|
"learning_rate": 3.834685121861208e-06,
|
|
"loss": 0.6498827934265137,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 1.7902676399026762,
|
|
"eval_loss": 0.6777992248535156,
|
|
"eval_runtime": 73.0192,
|
|
"eval_samples_per_second": 71.105,
|
|
"eval_steps_per_second": 8.888,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 1.791565287915653,
|
|
"grad_norm": 0.5954435467720032,
|
|
"learning_rate": 3.827733474567966e-06,
|
|
"loss": 0.6496779322624207,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 1.7928629359286292,
|
|
"grad_norm": 0.5471308827400208,
|
|
"learning_rate": 3.820784224263061e-06,
|
|
"loss": 0.5941118001937866,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 1.794160583941606,
|
|
"grad_norm": 0.5896412134170532,
|
|
"learning_rate": 3.8138373851559546e-06,
|
|
"loss": 0.6255256533622742,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 1.7954582319545822,
|
|
"grad_norm": 0.9544134736061096,
|
|
"learning_rate": 3.8068929714511716e-06,
|
|
"loss": 0.6434448957443237,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 1.796755879967559,
|
|
"grad_norm": 0.5609217882156372,
|
|
"learning_rate": 3.799950997348283e-06,
|
|
"loss": 0.6087275743484497,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 1.7980535279805352,
|
|
"grad_norm": 4.44458532333374,
|
|
"learning_rate": 3.7930114770418654e-06,
|
|
"loss": 0.5713160037994385,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 1.799351175993512,
|
|
"grad_norm": 0.5974010825157166,
|
|
"learning_rate": 3.7860744247214853e-06,
|
|
"loss": 0.6058465838432312,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 1.8006488240064882,
|
|
"grad_norm": 0.5761491060256958,
|
|
"learning_rate": 3.7791398545716552e-06,
|
|
"loss": 0.619678258895874,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 1.8019464720194647,
|
|
"grad_norm": 1.2458136081695557,
|
|
"learning_rate": 3.7722077807718193e-06,
|
|
"loss": 0.6886736750602722,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 1.8032441200324412,
|
|
"grad_norm": 0.6204317212104797,
|
|
"learning_rate": 3.7652782174963107e-06,
|
|
"loss": 0.6285656690597534,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 1.8045417680454177,
|
|
"grad_norm": 0.5791151523590088,
|
|
"learning_rate": 3.758351178914336e-06,
|
|
"loss": 0.6601356267929077,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 1.8058394160583942,
|
|
"grad_norm": 0.5656175017356873,
|
|
"learning_rate": 3.7514266791899324e-06,
|
|
"loss": 0.5828202962875366,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 1.8071370640713706,
|
|
"grad_norm": 0.6195251941680908,
|
|
"learning_rate": 3.7445047324819517e-06,
|
|
"loss": 0.7079391479492188,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 1.8084347120843471,
|
|
"grad_norm": 0.5826953649520874,
|
|
"learning_rate": 3.737585352944021e-06,
|
|
"loss": 0.6261759996414185,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 1.8097323600973236,
|
|
"grad_norm": 0.6581652760505676,
|
|
"learning_rate": 3.7306685547245225e-06,
|
|
"loss": 0.6573713421821594,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 1.8110300081103001,
|
|
"grad_norm": 0.5666741728782654,
|
|
"learning_rate": 3.7237543519665543e-06,
|
|
"loss": 0.621452808380127,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 1.8123276561232764,
|
|
"grad_norm": 0.5948919057846069,
|
|
"learning_rate": 3.7168427588079153e-06,
|
|
"loss": 0.6522223353385925,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 1.8136253041362531,
|
|
"grad_norm": 0.5332669615745544,
|
|
"learning_rate": 3.7099337893810593e-06,
|
|
"loss": 0.650192141532898,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 1.8149229521492294,
|
|
"grad_norm": 0.599592924118042,
|
|
"learning_rate": 3.703027457813086e-06,
|
|
"loss": 0.6094880700111389,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 1.816220600162206,
|
|
"grad_norm": 0.6047189235687256,
|
|
"learning_rate": 3.696123778225691e-06,
|
|
"loss": 0.6866611838340759,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 1.8175182481751824,
|
|
"grad_norm": 0.7004641890525818,
|
|
"learning_rate": 3.6892227647351515e-06,
|
|
"loss": 0.6755614280700684,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 1.818815896188159,
|
|
"grad_norm": 0.5989522933959961,
|
|
"learning_rate": 3.6823244314522966e-06,
|
|
"loss": 0.6946245431900024,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 1.8201135442011354,
|
|
"grad_norm": 0.579132080078125,
|
|
"learning_rate": 3.67542879248247e-06,
|
|
"loss": 0.6097831726074219,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 1.821411192214112,
|
|
"grad_norm": 0.577029287815094,
|
|
"learning_rate": 3.668535861925509e-06,
|
|
"loss": 0.6218363046646118,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 1.8227088402270883,
|
|
"grad_norm": 0.6415956020355225,
|
|
"learning_rate": 3.661645653875709e-06,
|
|
"loss": 0.6793798208236694,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 1.8240064882400648,
|
|
"grad_norm": 0.603378415107727,
|
|
"learning_rate": 3.6547581824218057e-06,
|
|
"loss": 0.5855191946029663,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 1.8253041362530413,
|
|
"grad_norm": 0.6317605376243591,
|
|
"learning_rate": 3.6478734616469324e-06,
|
|
"loss": 0.6648485660552979,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 1.8266017842660178,
|
|
"grad_norm": 0.5663666725158691,
|
|
"learning_rate": 3.6409915056286017e-06,
|
|
"loss": 0.6257850527763367,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 1.8278994322789943,
|
|
"grad_norm": 0.8109258413314819,
|
|
"learning_rate": 3.6341123284386694e-06,
|
|
"loss": 0.6545461416244507,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 1.8291970802919708,
|
|
"grad_norm": 0.6355454325675964,
|
|
"learning_rate": 3.627235944143315e-06,
|
|
"loss": 0.68341463804245,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 1.8304947283049473,
|
|
"grad_norm": 0.5834214091300964,
|
|
"learning_rate": 3.620362366803001e-06,
|
|
"loss": 0.6818444728851318,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 1.8317923763179238,
|
|
"grad_norm": 0.5867376327514648,
|
|
"learning_rate": 3.6134916104724573e-06,
|
|
"loss": 0.6132810115814209,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 1.8330900243309003,
|
|
"grad_norm": 0.5869424343109131,
|
|
"learning_rate": 3.606623689200637e-06,
|
|
"loss": 0.6913362741470337,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 1.8343876723438766,
|
|
"grad_norm": 0.5870312452316284,
|
|
"learning_rate": 3.599758617030704e-06,
|
|
"loss": 0.6339567303657532,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 1.8356853203568533,
|
|
"grad_norm": 0.6119568943977356,
|
|
"learning_rate": 3.5928964079999907e-06,
|
|
"loss": 0.6378414630889893,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 1.8369829683698295,
|
|
"grad_norm": 0.5717766284942627,
|
|
"learning_rate": 3.5860370761399814e-06,
|
|
"loss": 0.6197869777679443,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 1.8382806163828063,
|
|
"grad_norm": 0.626775860786438,
|
|
"learning_rate": 3.5791806354762702e-06,
|
|
"loss": 0.7052003145217896,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 1.8395782643957825,
|
|
"grad_norm": 0.5812957286834717,
|
|
"learning_rate": 3.572327100028545e-06,
|
|
"loss": 0.66878342628479,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 1.8408759124087593,
|
|
"grad_norm": 0.585649311542511,
|
|
"learning_rate": 3.565476483810548e-06,
|
|
"loss": 0.6272032260894775,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 1.8421735604217355,
|
|
"grad_norm": 0.6118691563606262,
|
|
"learning_rate": 3.55862880083006e-06,
|
|
"loss": 0.6374541521072388,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 1.8434712084347122,
|
|
"grad_norm": 0.5860823392868042,
|
|
"learning_rate": 3.5517840650888564e-06,
|
|
"loss": 0.6104147434234619,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 1.8447688564476885,
|
|
"grad_norm": 0.5618652701377869,
|
|
"learning_rate": 3.544942290582691e-06,
|
|
"loss": 0.5710769891738892,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 1.846066504460665,
|
|
"grad_norm": 0.5879126787185669,
|
|
"learning_rate": 3.538103491301258e-06,
|
|
"loss": 0.6456954479217529,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 1.8473641524736415,
|
|
"grad_norm": 0.6192496418952942,
|
|
"learning_rate": 3.531267681228175e-06,
|
|
"loss": 0.6715401411056519,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 1.848661800486618,
|
|
"grad_norm": 0.6261125802993774,
|
|
"learning_rate": 3.5244348743409394e-06,
|
|
"loss": 0.6905325055122375,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 1.8499594484995945,
|
|
"grad_norm": 0.5808646082878113,
|
|
"learning_rate": 3.517605084610917e-06,
|
|
"loss": 0.6800282001495361,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 1.851257096512571,
|
|
"grad_norm": 0.5866647362709045,
|
|
"learning_rate": 3.510778326003294e-06,
|
|
"loss": 0.6750452518463135,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 1.8525547445255475,
|
|
"grad_norm": 0.5787751078605652,
|
|
"learning_rate": 3.5039546124770675e-06,
|
|
"loss": 0.6570975184440613,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 1.853852392538524,
|
|
"grad_norm": 0.6095142960548401,
|
|
"learning_rate": 3.4971339579850017e-06,
|
|
"loss": 0.6344528198242188,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 1.8551500405515005,
|
|
"grad_norm": 0.5892320871353149,
|
|
"learning_rate": 3.4903163764736104e-06,
|
|
"loss": 0.6722358465194702,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 1.8564476885644767,
|
|
"grad_norm": 0.5868071913719177,
|
|
"learning_rate": 3.4835018818831235e-06,
|
|
"loss": 0.638904333114624,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 1.8577453365774534,
|
|
"grad_norm": 0.6003979444503784,
|
|
"learning_rate": 3.4766904881474535e-06,
|
|
"loss": 0.6853640079498291,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 1.8590429845904297,
|
|
"grad_norm": 0.555009663105011,
|
|
"learning_rate": 3.4698822091941808e-06,
|
|
"loss": 0.6409114599227905,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 1.8603406326034064,
|
|
"grad_norm": 0.5608627796173096,
|
|
"learning_rate": 3.463077058944511e-06,
|
|
"loss": 0.6055079698562622,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 1.8616382806163827,
|
|
"grad_norm": 0.6137329339981079,
|
|
"learning_rate": 3.456275051313255e-06,
|
|
"loss": 0.6407139897346497,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 1.8629359286293594,
|
|
"grad_norm": 0.5606741905212402,
|
|
"learning_rate": 3.4494762002087934e-06,
|
|
"loss": 0.6254716515541077,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 1.8642335766423357,
|
|
"grad_norm": 0.6578085422515869,
|
|
"learning_rate": 3.4426805195330605e-06,
|
|
"loss": 0.7003939151763916,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 1.8655312246553124,
|
|
"grad_norm": 0.6054635047912598,
|
|
"learning_rate": 3.4358880231814983e-06,
|
|
"loss": 0.6616827845573425,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 1.8668288726682887,
|
|
"grad_norm": 0.5833800435066223,
|
|
"learning_rate": 3.4290987250430486e-06,
|
|
"loss": 0.6554232835769653,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 1.8681265206812652,
|
|
"grad_norm": 0.6048437356948853,
|
|
"learning_rate": 3.4223126390001025e-06,
|
|
"loss": 0.6970128417015076,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 1.8694241686942417,
|
|
"grad_norm": 0.5701255202293396,
|
|
"learning_rate": 3.415529778928492e-06,
|
|
"loss": 0.6580668687820435,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 1.8707218167072182,
|
|
"grad_norm": 0.553488552570343,
|
|
"learning_rate": 3.408750158697445e-06,
|
|
"loss": 0.5830860137939453,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 1.8720194647201946,
|
|
"grad_norm": 0.5695835947990417,
|
|
"learning_rate": 3.401973792169574e-06,
|
|
"loss": 0.6223429441452026,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 1.8733171127331711,
|
|
"grad_norm": 0.5780246257781982,
|
|
"learning_rate": 3.39520069320083e-06,
|
|
"loss": 0.6171367168426514,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 1.8746147607461476,
|
|
"grad_norm": 0.5851401686668396,
|
|
"learning_rate": 3.3884308756404873e-06,
|
|
"loss": 0.648118257522583,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 1.8759124087591241,
|
|
"grad_norm": 0.5909201502799988,
|
|
"learning_rate": 3.381664353331107e-06,
|
|
"loss": 0.6370965242385864,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 1.8772100567721006,
|
|
"grad_norm": 0.5840253233909607,
|
|
"learning_rate": 3.3749011401085185e-06,
|
|
"loss": 0.637911856174469,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 1.878507704785077,
|
|
"grad_norm": 0.5772621035575867,
|
|
"learning_rate": 3.3681412498017773e-06,
|
|
"loss": 0.6257845759391785,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 1.8798053527980536,
|
|
"grad_norm": 0.5972771048545837,
|
|
"learning_rate": 3.361384696233152e-06,
|
|
"loss": 0.6612721085548401,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 1.8811030008110299,
|
|
"grad_norm": 0.622917652130127,
|
|
"learning_rate": 3.354631493218081e-06,
|
|
"loss": 0.657785177230835,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 1.8824006488240066,
|
|
"grad_norm": 0.581942081451416,
|
|
"learning_rate": 3.347881654565159e-06,
|
|
"loss": 0.6339654922485352,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 1.8836982968369829,
|
|
"grad_norm": 0.5792364478111267,
|
|
"learning_rate": 3.3411351940760924e-06,
|
|
"loss": 0.606496274471283,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 1.8849959448499596,
|
|
"grad_norm": 0.5994595289230347,
|
|
"learning_rate": 3.3343921255456903e-06,
|
|
"loss": 0.6079939603805542,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 1.8862935928629359,
|
|
"grad_norm": 0.5667769908905029,
|
|
"learning_rate": 3.3276524627618177e-06,
|
|
"loss": 0.5945770740509033,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 1.8875912408759126,
|
|
"grad_norm": 0.591791033744812,
|
|
"learning_rate": 3.3209162195053825e-06,
|
|
"loss": 0.620225727558136,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 1.8888888888888888,
|
|
"grad_norm": 0.5802031755447388,
|
|
"learning_rate": 3.314183409550293e-06,
|
|
"loss": 0.614050567150116,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 1.8901865369018653,
|
|
"grad_norm": 0.6020429134368896,
|
|
"learning_rate": 3.3074540466634454e-06,
|
|
"loss": 0.6691816449165344,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 1.8914841849148418,
|
|
"grad_norm": 0.6074531674385071,
|
|
"learning_rate": 3.300728144604681e-06,
|
|
"loss": 0.6914318799972534,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 1.8927818329278183,
|
|
"grad_norm": 0.5949025750160217,
|
|
"learning_rate": 3.294005717126767e-06,
|
|
"loss": 0.5819941163063049,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 1.8940794809407948,
|
|
"grad_norm": 0.5953806638717651,
|
|
"learning_rate": 3.287286777975369e-06,
|
|
"loss": 0.6016311645507812,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 1.8953771289537713,
|
|
"grad_norm": 0.6012862920761108,
|
|
"learning_rate": 3.2805713408890134e-06,
|
|
"loss": 0.62370765209198,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 1.8966747769667478,
|
|
"grad_norm": 0.5692993402481079,
|
|
"learning_rate": 3.2738594195990725e-06,
|
|
"loss": 0.6124866604804993,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 1.8979724249797243,
|
|
"grad_norm": 0.5979285836219788,
|
|
"learning_rate": 3.267151027829725e-06,
|
|
"loss": 0.6501439213752747,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 1.8992700729927008,
|
|
"grad_norm": 0.579058825969696,
|
|
"learning_rate": 3.2604461792979346e-06,
|
|
"loss": 0.6591506004333496,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 1.900567721005677,
|
|
"grad_norm": 0.5612583756446838,
|
|
"learning_rate": 3.253744887713417e-06,
|
|
"loss": 0.644995927810669,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 1.9018653690186538,
|
|
"grad_norm": 0.5929267406463623,
|
|
"learning_rate": 3.2470471667786217e-06,
|
|
"loss": 0.6369574069976807,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 1.90316301703163,
|
|
"grad_norm": 0.5371314287185669,
|
|
"learning_rate": 3.2403530301886897e-06,
|
|
"loss": 0.6427657604217529,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 1.9044606650446068,
|
|
"grad_norm": 0.5879482626914978,
|
|
"learning_rate": 3.2336624916314385e-06,
|
|
"loss": 0.6144864559173584,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 1.905758313057583,
|
|
"grad_norm": 0.5627234578132629,
|
|
"learning_rate": 3.226975564787322e-06,
|
|
"loss": 0.6070575714111328,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 1.9070559610705597,
|
|
"grad_norm": 0.595919668674469,
|
|
"learning_rate": 3.2202922633294178e-06,
|
|
"loss": 0.6438186764717102,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 1.908353609083536,
|
|
"grad_norm": 0.5860680937767029,
|
|
"learning_rate": 3.2136126009233815e-06,
|
|
"loss": 0.6168484091758728,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 1.9096512570965127,
|
|
"grad_norm": 0.6082072257995605,
|
|
"learning_rate": 3.2069365912274364e-06,
|
|
"loss": 0.6607163548469543,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 1.910948905109489,
|
|
"grad_norm": 0.6000680923461914,
|
|
"learning_rate": 3.2002642478923273e-06,
|
|
"loss": 0.6100636720657349,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 1.9122465531224655,
|
|
"grad_norm": 0.5958935022354126,
|
|
"learning_rate": 3.1935955845613138e-06,
|
|
"loss": 0.6283643245697021,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 1.913544201135442,
|
|
"grad_norm": 0.5999156832695007,
|
|
"learning_rate": 3.1869306148701186e-06,
|
|
"loss": 0.6624071002006531,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 1.9148418491484185,
|
|
"grad_norm": 0.5659943222999573,
|
|
"learning_rate": 3.1802693524469226e-06,
|
|
"loss": 0.5978960990905762,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 1.916139497161395,
|
|
"grad_norm": 0.6041963696479797,
|
|
"learning_rate": 3.1736118109123183e-06,
|
|
"loss": 0.6953626871109009,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 1.9174371451743715,
|
|
"grad_norm": 0.5829861164093018,
|
|
"learning_rate": 3.1669580038792953e-06,
|
|
"loss": 0.6347401142120361,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 1.918734793187348,
|
|
"grad_norm": 0.5910770297050476,
|
|
"learning_rate": 3.1603079449532014e-06,
|
|
"loss": 0.6252144575119019,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 1.9200324412003245,
|
|
"grad_norm": 0.5840498208999634,
|
|
"learning_rate": 3.1536616477317283e-06,
|
|
"loss": 0.6821172833442688,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 1.921330089213301,
|
|
"grad_norm": 0.5815771222114563,
|
|
"learning_rate": 3.147019125804869e-06,
|
|
"loss": 0.627813458442688,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 1.9226277372262772,
|
|
"grad_norm": 0.6089122295379639,
|
|
"learning_rate": 3.140380392754901e-06,
|
|
"loss": 0.5848509073257446,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 1.923925385239254,
|
|
"grad_norm": 0.5963802337646484,
|
|
"learning_rate": 3.13374546215635e-06,
|
|
"loss": 0.6434051990509033,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 1.9252230332522302,
|
|
"grad_norm": 0.5844939351081848,
|
|
"learning_rate": 3.1271143475759745e-06,
|
|
"loss": 0.6818792819976807,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 1.926520681265207,
|
|
"grad_norm": 0.5862755179405212,
|
|
"learning_rate": 3.1204870625727216e-06,
|
|
"loss": 0.6306114196777344,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 1.9278183292781832,
|
|
"grad_norm": 0.5746100544929504,
|
|
"learning_rate": 3.1138636206977147e-06,
|
|
"loss": 0.649817705154419,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 1.92911597729116,
|
|
"grad_norm": 0.7469968199729919,
|
|
"learning_rate": 3.107244035494212e-06,
|
|
"loss": 0.6348094940185547,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 1.9304136253041362,
|
|
"grad_norm": 0.5893679857254028,
|
|
"learning_rate": 3.100628320497592e-06,
|
|
"loss": 0.6067320704460144,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 1.931711273317113,
|
|
"grad_norm": 0.5654053688049316,
|
|
"learning_rate": 3.0940164892353197e-06,
|
|
"loss": 0.6475971937179565,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 1.9330089213300892,
|
|
"grad_norm": 0.5734997987747192,
|
|
"learning_rate": 3.087408555226914e-06,
|
|
"loss": 0.61939537525177,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 1.9343065693430657,
|
|
"grad_norm": 0.5849641561508179,
|
|
"learning_rate": 3.0808045319839285e-06,
|
|
"loss": 0.6628157496452332,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 1.9356042173560422,
|
|
"grad_norm": 0.6002839803695679,
|
|
"learning_rate": 3.0742044330099162e-06,
|
|
"loss": 0.7149718403816223,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 1.9369018653690186,
|
|
"grad_norm": 0.5984014272689819,
|
|
"learning_rate": 3.067608271800414e-06,
|
|
"loss": 0.6320532560348511,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 1.9381995133819951,
|
|
"grad_norm": 0.5990681052207947,
|
|
"learning_rate": 3.0610160618428987e-06,
|
|
"loss": 0.7083904147148132,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 1.9394971613949716,
|
|
"grad_norm": 0.5863717794418335,
|
|
"learning_rate": 3.054427816616773e-06,
|
|
"loss": 0.6290713548660278,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 1.9407948094079481,
|
|
"grad_norm": 0.5699295401573181,
|
|
"learning_rate": 3.0478435495933273e-06,
|
|
"loss": 0.621793270111084,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 1.9420924574209246,
|
|
"grad_norm": 0.5777533054351807,
|
|
"learning_rate": 3.0412632742357263e-06,
|
|
"loss": 0.6173816323280334,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 1.9433901054339011,
|
|
"grad_norm": 0.6047410368919373,
|
|
"learning_rate": 3.0346870039989618e-06,
|
|
"loss": 0.6888694763183594,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 1.9446877534468774,
|
|
"grad_norm": 0.5461248159408569,
|
|
"learning_rate": 3.028114752329848e-06,
|
|
"loss": 0.5872098207473755,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 1.945985401459854,
|
|
"grad_norm": 0.6002129316329956,
|
|
"learning_rate": 3.0215465326669724e-06,
|
|
"loss": 0.6144348382949829,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 1.9472830494728304,
|
|
"grad_norm": 0.5926127433776855,
|
|
"learning_rate": 3.0149823584406834e-06,
|
|
"loss": 0.5981168746948242,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 1.948580697485807,
|
|
"grad_norm": 0.553831160068512,
|
|
"learning_rate": 3.008422243073053e-06,
|
|
"loss": 0.6507419943809509,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 1.9498783454987834,
|
|
"grad_norm": 0.6168836951255798,
|
|
"learning_rate": 3.001866199977861e-06,
|
|
"loss": 0.6085610389709473,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 1.95117599351176,
|
|
"grad_norm": 0.610622227191925,
|
|
"learning_rate": 2.995314242560553e-06,
|
|
"loss": 0.584296703338623,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 1.9524736415247363,
|
|
"grad_norm": 0.598139762878418,
|
|
"learning_rate": 2.988766384218225e-06,
|
|
"loss": 0.6997476816177368,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 1.9537712895377128,
|
|
"grad_norm": 0.5578987002372742,
|
|
"learning_rate": 2.982222638339588e-06,
|
|
"loss": 0.5938620567321777,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 1.9550689375506893,
|
|
"grad_norm": 0.6006044745445251,
|
|
"learning_rate": 2.9756830183049502e-06,
|
|
"loss": 0.6362953186035156,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 1.9563665855636658,
|
|
"grad_norm": 0.6040393710136414,
|
|
"learning_rate": 2.969147537486175e-06,
|
|
"loss": 0.5799316167831421,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 1.9576642335766423,
|
|
"grad_norm": 0.5984890460968018,
|
|
"learning_rate": 2.962616209246669e-06,
|
|
"loss": 0.639271080493927,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 1.9589618815896188,
|
|
"grad_norm": 0.7439842820167542,
|
|
"learning_rate": 2.956089046941344e-06,
|
|
"loss": 0.6323772072792053,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 1.9602595296025953,
|
|
"grad_norm": 0.5876015424728394,
|
|
"learning_rate": 2.9495660639165967e-06,
|
|
"loss": 0.5763074159622192,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 1.9615571776155718,
|
|
"grad_norm": 0.6764865517616272,
|
|
"learning_rate": 2.9430472735102733e-06,
|
|
"loss": 0.7091867923736572,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 1.9628548256285483,
|
|
"grad_norm": 0.6067684292793274,
|
|
"learning_rate": 2.9365326890516543e-06,
|
|
"loss": 0.6496888995170593,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 1.9641524736415248,
|
|
"grad_norm": 0.5764046311378479,
|
|
"learning_rate": 2.9300223238614135e-06,
|
|
"loss": 0.6311619281768799,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 1.9654501216545013,
|
|
"grad_norm": 0.5956159234046936,
|
|
"learning_rate": 2.923516191251601e-06,
|
|
"loss": 0.6114912033081055,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 1.9667477696674776,
|
|
"grad_norm": 0.5818417072296143,
|
|
"learning_rate": 2.917014304525609e-06,
|
|
"loss": 0.6572203636169434,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 1.9680454176804543,
|
|
"grad_norm": 0.6058406233787537,
|
|
"learning_rate": 2.91051667697815e-06,
|
|
"loss": 0.6197275519371033,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 1.9693430656934305,
|
|
"grad_norm": 0.6014067530632019,
|
|
"learning_rate": 2.904023321895234e-06,
|
|
"loss": 0.6693457365036011,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 1.9706407137064073,
|
|
"grad_norm": 0.5446932315826416,
|
|
"learning_rate": 2.8975342525541217e-06,
|
|
"loss": 0.6219191551208496,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 1.9719383617193835,
|
|
"grad_norm": 0.5773969292640686,
|
|
"learning_rate": 2.8910494822233203e-06,
|
|
"loss": 0.6279373168945312,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 1.9732360097323602,
|
|
"grad_norm": 0.553596556186676,
|
|
"learning_rate": 2.8845690241625437e-06,
|
|
"loss": 0.5865894556045532,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 1.9745336577453365,
|
|
"grad_norm": 0.5790948867797852,
|
|
"learning_rate": 2.878092891622688e-06,
|
|
"loss": 0.6192329525947571,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 1.975831305758313,
|
|
"grad_norm": 0.5870917439460754,
|
|
"learning_rate": 2.871621097845806e-06,
|
|
"loss": 0.6201770305633545,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 1.9771289537712895,
|
|
"grad_norm": 0.586599588394165,
|
|
"learning_rate": 2.865153656065076e-06,
|
|
"loss": 0.6979238390922546,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 1.978426601784266,
|
|
"grad_norm": 0.5809787511825562,
|
|
"learning_rate": 2.8586905795047813e-06,
|
|
"loss": 0.6264389753341675,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 1.9797242497972425,
|
|
"grad_norm": 0.5969094038009644,
|
|
"learning_rate": 2.8522318813802796e-06,
|
|
"loss": 0.6544374227523804,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 1.981021897810219,
|
|
"grad_norm": 0.5875753164291382,
|
|
"learning_rate": 2.8457775748979664e-06,
|
|
"loss": 0.7151497006416321,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 1.9823195458231955,
|
|
"grad_norm": 0.5887599587440491,
|
|
"learning_rate": 2.8393276732552745e-06,
|
|
"loss": 0.650242030620575,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 1.983617193836172,
|
|
"grad_norm": 0.5730281472206116,
|
|
"learning_rate": 2.8328821896406132e-06,
|
|
"loss": 0.6076555252075195,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 1.9849148418491485,
|
|
"grad_norm": 0.6394782662391663,
|
|
"learning_rate": 2.826441137233368e-06,
|
|
"loss": 0.6826823949813843,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 1.986212489862125,
|
|
"grad_norm": 0.5790883302688599,
|
|
"learning_rate": 2.8200045292038596e-06,
|
|
"loss": 0.6138323545455933,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 1.9875101378751014,
|
|
"grad_norm": 0.6426994800567627,
|
|
"learning_rate": 2.8135723787133233e-06,
|
|
"loss": 0.7073339223861694,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 1.9888077858880777,
|
|
"grad_norm": 0.6070610880851746,
|
|
"learning_rate": 2.8071446989138786e-06,
|
|
"loss": 0.6867741346359253,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 1.9901054339010544,
|
|
"grad_norm": 0.6205259561538696,
|
|
"learning_rate": 2.800721502948506e-06,
|
|
"loss": 0.6849797368049622,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 1.9914030819140307,
|
|
"grad_norm": 0.6018499135971069,
|
|
"learning_rate": 2.7943028039510085e-06,
|
|
"loss": 0.6437822580337524,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 1.9927007299270074,
|
|
"grad_norm": 0.6043044328689575,
|
|
"learning_rate": 2.78788861504601e-06,
|
|
"loss": 0.6022955775260925,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 1.9939983779399837,
|
|
"grad_norm": 0.5917290449142456,
|
|
"learning_rate": 2.7814789493488947e-06,
|
|
"loss": 0.6646702885627747,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 1.9952960259529604,
|
|
"grad_norm": 0.6160155534744263,
|
|
"learning_rate": 2.7750738199658157e-06,
|
|
"loss": 0.6750048398971558,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 1.9965936739659367,
|
|
"grad_norm": 0.5714327096939087,
|
|
"learning_rate": 2.7686732399936343e-06,
|
|
"loss": 0.6445184946060181,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 1.9978913219789132,
|
|
"grad_norm": 0.5985759496688843,
|
|
"learning_rate": 2.762277222519919e-06,
|
|
"loss": 0.6806057691574097,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 1.9991889699918897,
|
|
"grad_norm": 0.5991272330284119,
|
|
"learning_rate": 2.7558857806229066e-06,
|
|
"loss": 0.6159195899963379,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.7430510520935059,
|
|
"learning_rate": 2.749498927371478e-06,
|
|
"loss": 0.5819271802902222,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 2.0012976480129763,
|
|
"grad_norm": 0.6751839518547058,
|
|
"learning_rate": 2.7431166758251317e-06,
|
|
"loss": 0.5926187634468079,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 2.002595296025953,
|
|
"grad_norm": 0.6562322378158569,
|
|
"learning_rate": 2.7367390390339565e-06,
|
|
"loss": 0.6589317321777344,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 2.0038929440389293,
|
|
"grad_norm": 0.6393698453903198,
|
|
"learning_rate": 2.730366030038606e-06,
|
|
"loss": 0.544275164604187,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 2.005190592051906,
|
|
"grad_norm": 0.5910435318946838,
|
|
"learning_rate": 2.72399766187027e-06,
|
|
"loss": 0.6208810210227966,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 2.0064882400648822,
|
|
"grad_norm": 0.6248382925987244,
|
|
"learning_rate": 2.7176339475506515e-06,
|
|
"loss": 0.5592293739318848,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 2.007785888077859,
|
|
"grad_norm": 0.6031874418258667,
|
|
"learning_rate": 2.7112749000919304e-06,
|
|
"loss": 0.5941007137298584,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 2.0090835360908352,
|
|
"grad_norm": 0.598434567451477,
|
|
"learning_rate": 2.704920532496756e-06,
|
|
"loss": 0.5872475504875183,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 2.010381184103812,
|
|
"grad_norm": 0.606324315071106,
|
|
"learning_rate": 2.698570857758195e-06,
|
|
"loss": 0.5607691407203674,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 2.011678832116788,
|
|
"grad_norm": 0.6241020560264587,
|
|
"learning_rate": 2.692225888859732e-06,
|
|
"loss": 0.6537069082260132,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 2.012976480129765,
|
|
"grad_norm": 0.6302763223648071,
|
|
"learning_rate": 2.685885638775216e-06,
|
|
"loss": 0.6311033964157104,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 2.014274128142741,
|
|
"grad_norm": 0.7878178358078003,
|
|
"learning_rate": 2.6795501204688586e-06,
|
|
"loss": 0.6164021492004395,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 2.015571776155718,
|
|
"grad_norm": 0.6297698616981506,
|
|
"learning_rate": 2.6732193468951882e-06,
|
|
"loss": 0.6132771968841553,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 2.016869424168694,
|
|
"grad_norm": 0.6227217316627502,
|
|
"learning_rate": 2.666893330999035e-06,
|
|
"loss": 0.5441837310791016,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 2.018167072181671,
|
|
"grad_norm": 0.639819860458374,
|
|
"learning_rate": 2.6605720857155017e-06,
|
|
"loss": 0.5625590682029724,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 2.019464720194647,
|
|
"grad_norm": 0.6482471227645874,
|
|
"learning_rate": 2.654255623969936e-06,
|
|
"loss": 0.5997311472892761,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 2.020762368207624,
|
|
"grad_norm": 0.6367791891098022,
|
|
"learning_rate": 2.647943958677897e-06,
|
|
"loss": 0.5890505313873291,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 2.0220600162206,
|
|
"grad_norm": 0.6217620372772217,
|
|
"learning_rate": 2.6416371027451514e-06,
|
|
"loss": 0.5508283376693726,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 2.0233576642335764,
|
|
"grad_norm": 0.651731014251709,
|
|
"learning_rate": 2.635335069067617e-06,
|
|
"loss": 0.6351226568222046,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 2.024655312246553,
|
|
"grad_norm": 0.6955805420875549,
|
|
"learning_rate": 2.62903787053136e-06,
|
|
"loss": 0.6140905022621155,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 2.0259529602595294,
|
|
"grad_norm": 0.5920689105987549,
|
|
"learning_rate": 2.6227455200125575e-06,
|
|
"loss": 0.5677257776260376,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 2.027250608272506,
|
|
"grad_norm": 0.6131844520568848,
|
|
"learning_rate": 2.6164580303774733e-06,
|
|
"loss": 0.5954424142837524,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 2.0285482562854824,
|
|
"grad_norm": 0.6266505122184753,
|
|
"learning_rate": 2.6101754144824327e-06,
|
|
"loss": 0.5571186542510986,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 2.029845904298459,
|
|
"grad_norm": 0.609183669090271,
|
|
"learning_rate": 2.603897685173794e-06,
|
|
"loss": 0.61628657579422,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 2.0311435523114354,
|
|
"grad_norm": 0.6080002784729004,
|
|
"learning_rate": 2.5976248552879264e-06,
|
|
"loss": 0.5877048969268799,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 2.032441200324412,
|
|
"grad_norm": 0.6042158007621765,
|
|
"learning_rate": 2.5913569376511806e-06,
|
|
"loss": 0.5496143102645874,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 2.0337388483373884,
|
|
"grad_norm": 0.6415978670120239,
|
|
"learning_rate": 2.5850939450798553e-06,
|
|
"loss": 0.6424070596694946,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 2.035036496350365,
|
|
"grad_norm": 0.6292750239372253,
|
|
"learning_rate": 2.5788358903801926e-06,
|
|
"loss": 0.5802291631698608,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 2.0363341443633414,
|
|
"grad_norm": 0.5823472738265991,
|
|
"learning_rate": 2.572582786348326e-06,
|
|
"loss": 0.5664765238761902,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 2.037631792376318,
|
|
"grad_norm": 0.6012071371078491,
|
|
"learning_rate": 2.566334645770272e-06,
|
|
"loss": 0.5476250648498535,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 2.0389294403892944,
|
|
"grad_norm": 0.6168148517608643,
|
|
"learning_rate": 2.5600914814218963e-06,
|
|
"loss": 0.5573870539665222,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 2.040227088402271,
|
|
"grad_norm": 0.6200307011604309,
|
|
"learning_rate": 2.553853306068888e-06,
|
|
"loss": 0.5985852479934692,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 2.0415247364152473,
|
|
"grad_norm": 0.5821851491928101,
|
|
"learning_rate": 2.547620132466743e-06,
|
|
"loss": 0.5544208288192749,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 2.042822384428224,
|
|
"grad_norm": 0.5919877886772156,
|
|
"learning_rate": 2.541391973360717e-06,
|
|
"loss": 0.5700052976608276,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 2.0441200324412003,
|
|
"grad_norm": 0.6059973835945129,
|
|
"learning_rate": 2.535168841485821e-06,
|
|
"loss": 0.6292803287506104,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 2.0454176804541766,
|
|
"grad_norm": 0.580622136592865,
|
|
"learning_rate": 2.5289507495667864e-06,
|
|
"loss": 0.5648876428604126,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 2.0467153284671533,
|
|
"grad_norm": 0.6086398363113403,
|
|
"learning_rate": 2.5227377103180353e-06,
|
|
"loss": 0.5471535921096802,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 2.0480129764801296,
|
|
"grad_norm": 0.6052615642547607,
|
|
"learning_rate": 2.516529736443661e-06,
|
|
"loss": 0.5907412767410278,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 2.0493106244931063,
|
|
"grad_norm": 0.6123395562171936,
|
|
"learning_rate": 2.5103268406374002e-06,
|
|
"loss": 0.5662798881530762,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 2.0506082725060826,
|
|
"grad_norm": 0.5917913317680359,
|
|
"learning_rate": 2.504129035582601e-06,
|
|
"loss": 0.5825642943382263,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 2.0519059205190593,
|
|
"grad_norm": 0.645075261592865,
|
|
"learning_rate": 2.497936333952212e-06,
|
|
"loss": 0.6213525533676147,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 2.0532035685320356,
|
|
"grad_norm": 1.3204904794692993,
|
|
"learning_rate": 2.491748748408735e-06,
|
|
"loss": 0.5462846755981445,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 2.0545012165450123,
|
|
"grad_norm": 0.5815834403038025,
|
|
"learning_rate": 2.485566291604219e-06,
|
|
"loss": 0.5608969926834106,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 2.0557988645579885,
|
|
"grad_norm": 0.6155984401702881,
|
|
"learning_rate": 2.4793889761802225e-06,
|
|
"loss": 0.5753802061080933,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 2.0570965125709653,
|
|
"grad_norm": 0.645876407623291,
|
|
"learning_rate": 2.4732168147677927e-06,
|
|
"loss": 0.5655276775360107,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 2.0583941605839415,
|
|
"grad_norm": 0.6139212846755981,
|
|
"learning_rate": 2.467049819987437e-06,
|
|
"loss": 0.5936379432678223,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 2.0596918085969182,
|
|
"grad_norm": 0.5790942311286926,
|
|
"learning_rate": 2.460888004449099e-06,
|
|
"loss": 0.5055116415023804,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 2.0609894566098945,
|
|
"grad_norm": 0.5931289196014404,
|
|
"learning_rate": 2.454731380752132e-06,
|
|
"loss": 0.611015260219574,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 2.0622871046228712,
|
|
"grad_norm": 0.5739728808403015,
|
|
"learning_rate": 2.4485799614852755e-06,
|
|
"loss": 0.5669503211975098,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 2.0635847526358475,
|
|
"grad_norm": 0.6072220802307129,
|
|
"learning_rate": 2.442433759226619e-06,
|
|
"loss": 0.6242780685424805,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 2.0648824006488242,
|
|
"grad_norm": 0.6013473868370056,
|
|
"learning_rate": 2.4362927865435975e-06,
|
|
"loss": 0.6564007997512817,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 2.0661800486618005,
|
|
"grad_norm": 0.6308622360229492,
|
|
"learning_rate": 2.4301570559929405e-06,
|
|
"loss": 0.6350818276405334,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 2.0674776966747768,
|
|
"grad_norm": 0.5770552754402161,
|
|
"learning_rate": 2.4240265801206665e-06,
|
|
"loss": 0.5588065981864929,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 2.0687753446877535,
|
|
"grad_norm": 0.5862566828727722,
|
|
"learning_rate": 2.4179013714620456e-06,
|
|
"loss": 0.564478874206543,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 2.0700729927007298,
|
|
"grad_norm": 0.6063327193260193,
|
|
"learning_rate": 2.4117814425415803e-06,
|
|
"loss": 0.5994401574134827,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 2.0713706407137065,
|
|
"grad_norm": 0.7205548286437988,
|
|
"learning_rate": 2.4056668058729766e-06,
|
|
"loss": 0.5876675248146057,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 2.0726682887266827,
|
|
"grad_norm": 0.6141117811203003,
|
|
"learning_rate": 2.399557473959119e-06,
|
|
"loss": 0.5730265974998474,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 2.0739659367396595,
|
|
"grad_norm": 0.653766393661499,
|
|
"learning_rate": 2.3934534592920416e-06,
|
|
"loss": 0.5947611331939697,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 2.0752635847526357,
|
|
"grad_norm": 0.6264708042144775,
|
|
"learning_rate": 2.3873547743529157e-06,
|
|
"loss": 0.597199559211731,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 2.0765612327656124,
|
|
"grad_norm": 0.6255890727043152,
|
|
"learning_rate": 2.3812614316120003e-06,
|
|
"loss": 0.5970041155815125,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 2.0778588807785887,
|
|
"grad_norm": 0.6051512956619263,
|
|
"learning_rate": 2.375173443528646e-06,
|
|
"loss": 0.5532850027084351,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 2.0791565287915654,
|
|
"grad_norm": 0.6211998462677002,
|
|
"learning_rate": 2.3690908225512464e-06,
|
|
"loss": 0.5505103468894958,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 2.0804541768045417,
|
|
"grad_norm": 0.6291670799255371,
|
|
"learning_rate": 2.363013581117217e-06,
|
|
"loss": 0.6287462711334229,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 2.0817518248175184,
|
|
"grad_norm": 0.6058430075645447,
|
|
"learning_rate": 2.356941731652986e-06,
|
|
"loss": 0.6096627712249756,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 2.0830494728304947,
|
|
"grad_norm": 0.6372430324554443,
|
|
"learning_rate": 2.3508752865739425e-06,
|
|
"loss": 0.6022605895996094,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 2.0843471208434714,
|
|
"grad_norm": 0.6325316429138184,
|
|
"learning_rate": 2.344814258284433e-06,
|
|
"loss": 0.610370397567749,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 2.0856447688564477,
|
|
"grad_norm": 0.6065165996551514,
|
|
"learning_rate": 2.3387586591777274e-06,
|
|
"loss": 0.5800055861473083,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 2.086942416869424,
|
|
"grad_norm": 0.59498131275177,
|
|
"learning_rate": 2.3327085016359912e-06,
|
|
"loss": 0.5574961304664612,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 2.0882400648824007,
|
|
"grad_norm": 0.6029080748558044,
|
|
"learning_rate": 2.3266637980302677e-06,
|
|
"loss": 0.5879454016685486,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 2.0882400648824007,
|
|
"eval_loss": 0.6837871670722961,
|
|
"eval_runtime": 72.9619,
|
|
"eval_samples_per_second": 71.16,
|
|
"eval_steps_per_second": 8.895,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 2.089537712895377,
|
|
"grad_norm": 0.6146489381790161,
|
|
"learning_rate": 2.320624560720446e-06,
|
|
"loss": 0.5897351503372192,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 2.0908353609083536,
|
|
"grad_norm": 0.6313148140907288,
|
|
"learning_rate": 2.314590802055232e-06,
|
|
"loss": 0.5991021990776062,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 2.09213300892133,
|
|
"grad_norm": 0.578288197517395,
|
|
"learning_rate": 2.308562534372144e-06,
|
|
"loss": 0.5127542018890381,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 2.0934306569343066,
|
|
"grad_norm": 0.6262894868850708,
|
|
"learning_rate": 2.3025397699974555e-06,
|
|
"loss": 0.6180716753005981,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 2.094728304947283,
|
|
"grad_norm": 0.6143955588340759,
|
|
"learning_rate": 2.296522521246202e-06,
|
|
"loss": 0.6144124865531921,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 2.0960259529602596,
|
|
"grad_norm": 0.6245327591896057,
|
|
"learning_rate": 2.290510800422129e-06,
|
|
"loss": 0.5791307687759399,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 2.097323600973236,
|
|
"grad_norm": 0.6619604825973511,
|
|
"learning_rate": 2.284504619817687e-06,
|
|
"loss": 0.6063104271888733,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 2.0986212489862126,
|
|
"grad_norm": 0.6063318848609924,
|
|
"learning_rate": 2.2785039917139933e-06,
|
|
"loss": 0.619540810585022,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 2.099918896999189,
|
|
"grad_norm": 0.6290093660354614,
|
|
"learning_rate": 2.272508928380815e-06,
|
|
"loss": 0.5471513271331787,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 2.1012165450121656,
|
|
"grad_norm": 0.6088972091674805,
|
|
"learning_rate": 2.2665194420765386e-06,
|
|
"loss": 0.673788845539093,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 2.102514193025142,
|
|
"grad_norm": 0.6053624153137207,
|
|
"learning_rate": 2.260535545048149e-06,
|
|
"loss": 0.540647029876709,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 2.1038118410381186,
|
|
"grad_norm": 0.6025784015655518,
|
|
"learning_rate": 2.2545572495311966e-06,
|
|
"loss": 0.5704219341278076,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 2.105109489051095,
|
|
"grad_norm": 0.5917617678642273,
|
|
"learning_rate": 2.2485845677497897e-06,
|
|
"loss": 0.5879180431365967,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 2.1064071370640716,
|
|
"grad_norm": 0.6286986470222473,
|
|
"learning_rate": 2.2426175119165435e-06,
|
|
"loss": 0.6564632058143616,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 2.107704785077048,
|
|
"grad_norm": 0.7979365587234497,
|
|
"learning_rate": 2.2366560942325833e-06,
|
|
"loss": 0.5867825746536255,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 2.1090024330900246,
|
|
"grad_norm": 0.6283751130104065,
|
|
"learning_rate": 2.230700326887495e-06,
|
|
"loss": 0.5519679188728333,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 2.110300081103001,
|
|
"grad_norm": 0.6093899011611938,
|
|
"learning_rate": 2.2247502220593164e-06,
|
|
"loss": 0.578905463218689,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 2.111597729115977,
|
|
"grad_norm": 0.694290816783905,
|
|
"learning_rate": 2.218805791914507e-06,
|
|
"loss": 0.5794886350631714,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 2.112895377128954,
|
|
"grad_norm": 0.6268723607063293,
|
|
"learning_rate": 2.21286704860792e-06,
|
|
"loss": 0.5475939512252808,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 2.11419302514193,
|
|
"grad_norm": 0.5893663167953491,
|
|
"learning_rate": 2.2069340042827846e-06,
|
|
"loss": 0.5644780397415161,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 2.115490673154907,
|
|
"grad_norm": 0.6139518022537231,
|
|
"learning_rate": 2.2010066710706734e-06,
|
|
"loss": 0.5307568311691284,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 2.116788321167883,
|
|
"grad_norm": 0.6323785781860352,
|
|
"learning_rate": 2.1950850610914824e-06,
|
|
"loss": 0.5611797571182251,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 2.11808596918086,
|
|
"grad_norm": 0.5823566913604736,
|
|
"learning_rate": 2.1891691864534065e-06,
|
|
"loss": 0.5725387334823608,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 2.119383617193836,
|
|
"grad_norm": 0.6572033762931824,
|
|
"learning_rate": 2.1832590592529128e-06,
|
|
"loss": 0.6158653497695923,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 2.1206812652068128,
|
|
"grad_norm": 1.0890551805496216,
|
|
"learning_rate": 2.1773546915747103e-06,
|
|
"loss": 0.559654951095581,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 2.121978913219789,
|
|
"grad_norm": 0.6277933120727539,
|
|
"learning_rate": 2.1714560954917437e-06,
|
|
"loss": 0.6304750442504883,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 2.1232765612327658,
|
|
"grad_norm": 0.6458949446678162,
|
|
"learning_rate": 2.165563283065142e-06,
|
|
"loss": 0.6345778703689575,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 2.124574209245742,
|
|
"grad_norm": 0.643680214881897,
|
|
"learning_rate": 2.159676266344222e-06,
|
|
"loss": 0.5876523852348328,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 2.1258718572587187,
|
|
"grad_norm": 0.595977783203125,
|
|
"learning_rate": 2.1537950573664372e-06,
|
|
"loss": 0.6067019104957581,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 2.127169505271695,
|
|
"grad_norm": 0.6042376160621643,
|
|
"learning_rate": 2.1479196681573745e-06,
|
|
"loss": 0.5710458159446716,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 2.1284671532846717,
|
|
"grad_norm": 0.6172091960906982,
|
|
"learning_rate": 2.142050110730716e-06,
|
|
"loss": 0.5443819761276245,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 2.129764801297648,
|
|
"grad_norm": 0.6249525547027588,
|
|
"learning_rate": 2.136186397088223e-06,
|
|
"loss": 0.6747730374336243,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 2.1310624493106243,
|
|
"grad_norm": 0.6373762488365173,
|
|
"learning_rate": 2.1303285392197043e-06,
|
|
"loss": 0.6101464033126831,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 2.132360097323601,
|
|
"grad_norm": 0.6049467921257019,
|
|
"learning_rate": 2.1244765491029985e-06,
|
|
"loss": 0.5729132890701294,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 2.1336577453365773,
|
|
"grad_norm": 0.6222594380378723,
|
|
"learning_rate": 2.118630438703939e-06,
|
|
"loss": 0.6150310039520264,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 2.134955393349554,
|
|
"grad_norm": 0.9498931169509888,
|
|
"learning_rate": 2.1127902199763496e-06,
|
|
"loss": 0.6144990921020508,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 2.1362530413625302,
|
|
"grad_norm": 0.6177363991737366,
|
|
"learning_rate": 2.1069559048619937e-06,
|
|
"loss": 0.5762449502944946,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 2.137550689375507,
|
|
"grad_norm": 0.59578537940979,
|
|
"learning_rate": 2.10112750529057e-06,
|
|
"loss": 0.6036182641983032,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 2.1388483373884832,
|
|
"grad_norm": 0.6090502738952637,
|
|
"learning_rate": 2.095305033179682e-06,
|
|
"loss": 0.5963237285614014,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 2.14014598540146,
|
|
"grad_norm": 5.44432258605957,
|
|
"learning_rate": 2.0894885004348102e-06,
|
|
"loss": 0.6094678640365601,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 2.141443633414436,
|
|
"grad_norm": 0.6466519832611084,
|
|
"learning_rate": 2.0836779189492925e-06,
|
|
"loss": 0.6607776880264282,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 2.142741281427413,
|
|
"grad_norm": 0.6259258985519409,
|
|
"learning_rate": 2.077873300604297e-06,
|
|
"loss": 0.6022912859916687,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 2.144038929440389,
|
|
"grad_norm": 0.6033953428268433,
|
|
"learning_rate": 2.0720746572687995e-06,
|
|
"loss": 0.5635781288146973,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 2.145336577453366,
|
|
"grad_norm": 0.5921186208724976,
|
|
"learning_rate": 2.0662820007995592e-06,
|
|
"loss": 0.5796300172805786,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 2.146634225466342,
|
|
"grad_norm": 0.7194099426269531,
|
|
"learning_rate": 2.060495343041087e-06,
|
|
"loss": 0.5955857038497925,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 2.147931873479319,
|
|
"grad_norm": 0.6012006998062134,
|
|
"learning_rate": 2.0547146958256416e-06,
|
|
"loss": 0.531291127204895,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 2.149229521492295,
|
|
"grad_norm": 0.8573319911956787,
|
|
"learning_rate": 2.048940070973177e-06,
|
|
"loss": 0.5659847259521484,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 2.150527169505272,
|
|
"grad_norm": 0.639750599861145,
|
|
"learning_rate": 2.04317148029134e-06,
|
|
"loss": 0.5485103130340576,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 2.151824817518248,
|
|
"grad_norm": 0.6052505970001221,
|
|
"learning_rate": 2.0374089355754434e-06,
|
|
"loss": 0.6026275753974915,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 2.153122465531225,
|
|
"grad_norm": 0.6007844805717468,
|
|
"learning_rate": 2.031652448608428e-06,
|
|
"loss": 0.5721523761749268,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 2.154420113544201,
|
|
"grad_norm": 0.6320387125015259,
|
|
"learning_rate": 2.025902031160853e-06,
|
|
"loss": 0.5851036906242371,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 2.1557177615571774,
|
|
"grad_norm": 0.8335509300231934,
|
|
"learning_rate": 2.020157694990868e-06,
|
|
"loss": 0.631894588470459,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 2.157015409570154,
|
|
"grad_norm": 0.6097387075424194,
|
|
"learning_rate": 2.014419451844186e-06,
|
|
"loss": 0.6118210554122925,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 2.1583130575831304,
|
|
"grad_norm": 0.6130866408348083,
|
|
"learning_rate": 2.0086873134540626e-06,
|
|
"loss": 0.5941121578216553,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 2.159610705596107,
|
|
"grad_norm": 0.6047623753547668,
|
|
"learning_rate": 2.002961291541269e-06,
|
|
"loss": 0.592534065246582,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 2.1609083536090834,
|
|
"grad_norm": 0.6416432857513428,
|
|
"learning_rate": 1.997241397814071e-06,
|
|
"loss": 0.6065758466720581,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 2.16220600162206,
|
|
"grad_norm": 0.6395633816719055,
|
|
"learning_rate": 1.9915276439682056e-06,
|
|
"loss": 0.6400467157363892,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 2.1635036496350364,
|
|
"grad_norm": 0.604591965675354,
|
|
"learning_rate": 1.985820041686848e-06,
|
|
"loss": 0.590105414390564,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 2.164801297648013,
|
|
"grad_norm": 0.6412749886512756,
|
|
"learning_rate": 1.9801186026406066e-06,
|
|
"loss": 0.5925630927085876,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 2.1660989456609894,
|
|
"grad_norm": 0.6263708472251892,
|
|
"learning_rate": 1.9744233384874766e-06,
|
|
"loss": 0.6293658018112183,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 2.167396593673966,
|
|
"grad_norm": 0.6095645427703857,
|
|
"learning_rate": 1.968734260872833e-06,
|
|
"loss": 0.5433490872383118,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 2.1686942416869424,
|
|
"grad_norm": 0.6286778450012207,
|
|
"learning_rate": 1.9630513814294e-06,
|
|
"loss": 0.637223482131958,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 2.169991889699919,
|
|
"grad_norm": 0.6185746788978577,
|
|
"learning_rate": 1.9573747117772272e-06,
|
|
"loss": 0.5756215453147888,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 2.1712895377128953,
|
|
"grad_norm": 0.63084876537323,
|
|
"learning_rate": 1.951704263523668e-06,
|
|
"loss": 0.5794859528541565,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 2.172587185725872,
|
|
"grad_norm": 0.6249853372573853,
|
|
"learning_rate": 1.9460400482633537e-06,
|
|
"loss": 0.5887556672096252,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 2.1738848337388483,
|
|
"grad_norm": 0.6094781756401062,
|
|
"learning_rate": 1.9403820775781696e-06,
|
|
"loss": 0.550574779510498,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 2.1751824817518246,
|
|
"grad_norm": 0.6323735117912292,
|
|
"learning_rate": 1.9347303630372373e-06,
|
|
"loss": 0.6414695978164673,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 2.1764801297648013,
|
|
"grad_norm": 0.6307917237281799,
|
|
"learning_rate": 1.929084916196876e-06,
|
|
"loss": 0.5808806419372559,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 2.1777777777777776,
|
|
"grad_norm": 0.6933386921882629,
|
|
"learning_rate": 1.923445748600603e-06,
|
|
"loss": 0.6602625846862793,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 2.1790754257907543,
|
|
"grad_norm": 0.6270908713340759,
|
|
"learning_rate": 1.917812871779084e-06,
|
|
"loss": 0.6303268074989319,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 2.1803730738037306,
|
|
"grad_norm": 0.640339195728302,
|
|
"learning_rate": 1.912186297250128e-06,
|
|
"loss": 0.6451208591461182,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 2.1816707218167073,
|
|
"grad_norm": 0.632722020149231,
|
|
"learning_rate": 1.9065660365186545e-06,
|
|
"loss": 0.6016892194747925,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 2.1829683698296836,
|
|
"grad_norm": 0.6271699070930481,
|
|
"learning_rate": 1.9009521010766756e-06,
|
|
"loss": 0.5756760835647583,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 2.1842660178426603,
|
|
"grad_norm": 0.6046696305274963,
|
|
"learning_rate": 1.8953445024032679e-06,
|
|
"loss": 0.6025729775428772,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 2.1855636658556366,
|
|
"grad_norm": 0.62837815284729,
|
|
"learning_rate": 1.889743251964553e-06,
|
|
"loss": 0.5909950733184814,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 2.1868613138686133,
|
|
"grad_norm": 0.6451588869094849,
|
|
"learning_rate": 1.8841483612136658e-06,
|
|
"loss": 0.6150632500648499,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 2.1881589618815895,
|
|
"grad_norm": 0.6260155439376831,
|
|
"learning_rate": 1.8785598415907464e-06,
|
|
"loss": 0.5601434707641602,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 2.1894566098945663,
|
|
"grad_norm": 0.6069033741950989,
|
|
"learning_rate": 1.8729777045229009e-06,
|
|
"loss": 0.508891761302948,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 2.1907542579075425,
|
|
"grad_norm": 0.613555371761322,
|
|
"learning_rate": 1.8674019614241879e-06,
|
|
"loss": 0.5379388928413391,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 2.1920519059205192,
|
|
"grad_norm": 0.6071879863739014,
|
|
"learning_rate": 1.8618326236955908e-06,
|
|
"loss": 0.5609877109527588,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 2.1933495539334955,
|
|
"grad_norm": 0.6063624620437622,
|
|
"learning_rate": 1.8562697027249921e-06,
|
|
"loss": 0.5955809950828552,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 2.1946472019464722,
|
|
"grad_norm": 0.6319783926010132,
|
|
"learning_rate": 1.8507132098871633e-06,
|
|
"loss": 0.5856696367263794,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 2.1959448499594485,
|
|
"grad_norm": 0.6493479609489441,
|
|
"learning_rate": 1.8451631565437211e-06,
|
|
"loss": 0.6506030559539795,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 2.197242497972425,
|
|
"grad_norm": 0.6016229391098022,
|
|
"learning_rate": 1.8396195540431205e-06,
|
|
"loss": 0.6117116212844849,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 2.1985401459854015,
|
|
"grad_norm": 0.6247114539146423,
|
|
"learning_rate": 1.834082413720627e-06,
|
|
"loss": 0.6172184348106384,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 2.1998377939983778,
|
|
"grad_norm": 0.6307165026664734,
|
|
"learning_rate": 1.8285517468982905e-06,
|
|
"loss": 0.589012622833252,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 2.2011354420113545,
|
|
"grad_norm": 0.6177083253860474,
|
|
"learning_rate": 1.8230275648849243e-06,
|
|
"loss": 0.5813847780227661,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 2.2024330900243307,
|
|
"grad_norm": 0.6012999415397644,
|
|
"learning_rate": 1.8175098789760848e-06,
|
|
"loss": 0.5948748588562012,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 2.2037307380373075,
|
|
"grad_norm": 0.6464450359344482,
|
|
"learning_rate": 1.8119987004540373e-06,
|
|
"loss": 0.5775672197341919,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 2.2050283860502837,
|
|
"grad_norm": 0.6167866587638855,
|
|
"learning_rate": 1.8064940405877546e-06,
|
|
"loss": 0.6011961698532104,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 2.2063260340632604,
|
|
"grad_norm": 0.6373000741004944,
|
|
"learning_rate": 1.8009959106328655e-06,
|
|
"loss": 0.5679797530174255,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 2.2076236820762367,
|
|
"grad_norm": 0.5966001152992249,
|
|
"learning_rate": 1.7955043218316615e-06,
|
|
"loss": 0.5757954120635986,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 2.2089213300892134,
|
|
"grad_norm": 0.6121652722358704,
|
|
"learning_rate": 1.7900192854130465e-06,
|
|
"loss": 0.5717330574989319,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 2.2102189781021897,
|
|
"grad_norm": 0.6737116575241089,
|
|
"learning_rate": 1.7845408125925328e-06,
|
|
"loss": 0.5650469064712524,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 2.2115166261151664,
|
|
"grad_norm": 0.6384139060974121,
|
|
"learning_rate": 1.7790689145722111e-06,
|
|
"loss": 0.5935101509094238,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 2.2128142741281427,
|
|
"grad_norm": 0.5904914140701294,
|
|
"learning_rate": 1.7736036025407282e-06,
|
|
"loss": 0.5071459412574768,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 2.2141119221411194,
|
|
"grad_norm": 0.6069095730781555,
|
|
"learning_rate": 1.7681448876732632e-06,
|
|
"loss": 0.5586497783660889,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 2.2154095701540957,
|
|
"grad_norm": 0.6180804967880249,
|
|
"learning_rate": 1.7626927811315087e-06,
|
|
"loss": 0.6200004816055298,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 2.2167072181670724,
|
|
"grad_norm": 0.6510108113288879,
|
|
"learning_rate": 1.7572472940636375e-06,
|
|
"loss": 0.6552962064743042,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 2.2180048661800487,
|
|
"grad_norm": 0.6048802137374878,
|
|
"learning_rate": 1.7518084376042988e-06,
|
|
"loss": 0.5669337511062622,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 2.219302514193025,
|
|
"grad_norm": 0.6182539463043213,
|
|
"learning_rate": 1.7463762228745728e-06,
|
|
"loss": 0.5660184621810913,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 2.2206001622060016,
|
|
"grad_norm": 0.8579080700874329,
|
|
"learning_rate": 1.7409506609819648e-06,
|
|
"loss": 0.5399761199951172,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 2.221897810218978,
|
|
"grad_norm": 0.5763684511184692,
|
|
"learning_rate": 1.735531763020376e-06,
|
|
"loss": 0.5553586483001709,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 2.2231954582319546,
|
|
"grad_norm": 0.5882948040962219,
|
|
"learning_rate": 1.7301195400700815e-06,
|
|
"loss": 0.5055762529373169,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 2.224493106244931,
|
|
"grad_norm": 0.6292950510978699,
|
|
"learning_rate": 1.7247140031977073e-06,
|
|
"loss": 0.6296324133872986,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 2.2257907542579076,
|
|
"grad_norm": 0.600114643573761,
|
|
"learning_rate": 1.7193151634562071e-06,
|
|
"loss": 0.5636775493621826,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 2.227088402270884,
|
|
"grad_norm": 0.6101363301277161,
|
|
"learning_rate": 1.7139230318848432e-06,
|
|
"loss": 0.6061251163482666,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 2.2283860502838606,
|
|
"grad_norm": 0.6061044335365295,
|
|
"learning_rate": 1.7085376195091591e-06,
|
|
"loss": 0.6004489660263062,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 2.229683698296837,
|
|
"grad_norm": 0.6100283265113831,
|
|
"learning_rate": 1.7031589373409596e-06,
|
|
"loss": 0.571765661239624,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 2.2309813463098136,
|
|
"grad_norm": 0.6510441303253174,
|
|
"learning_rate": 1.6977869963782895e-06,
|
|
"loss": 0.5853846073150635,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 2.23227899432279,
|
|
"grad_norm": 0.635164201259613,
|
|
"learning_rate": 1.6924218076054095e-06,
|
|
"loss": 0.6079269647598267,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 2.2335766423357666,
|
|
"grad_norm": 0.641042172908783,
|
|
"learning_rate": 1.6870633819927672e-06,
|
|
"loss": 0.7038273811340332,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 2.234874290348743,
|
|
"grad_norm": 0.621701717376709,
|
|
"learning_rate": 1.6817117304969944e-06,
|
|
"loss": 0.5776299238204956,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 2.2361719383617196,
|
|
"grad_norm": 0.5985130071640015,
|
|
"learning_rate": 1.676366864060856e-06,
|
|
"loss": 0.5792907476425171,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 2.237469586374696,
|
|
"grad_norm": 0.6016199588775635,
|
|
"learning_rate": 1.6710287936132592e-06,
|
|
"loss": 0.518044650554657,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 2.238767234387672,
|
|
"grad_norm": 0.5871309041976929,
|
|
"learning_rate": 1.6656975300692008e-06,
|
|
"loss": 0.5443193316459656,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 2.240064882400649,
|
|
"grad_norm": 0.6189736723899841,
|
|
"learning_rate": 1.660373084329767e-06,
|
|
"loss": 0.6327258944511414,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 2.241362530413625,
|
|
"grad_norm": 0.6076374650001526,
|
|
"learning_rate": 1.6550554672821028e-06,
|
|
"loss": 0.5638880729675293,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 2.242660178426602,
|
|
"grad_norm": 0.6918789744377136,
|
|
"learning_rate": 1.6497446897993885e-06,
|
|
"loss": 0.6006242632865906,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 2.243957826439578,
|
|
"grad_norm": 0.636972188949585,
|
|
"learning_rate": 1.6444407627408194e-06,
|
|
"loss": 0.5908925533294678,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 2.245255474452555,
|
|
"grad_norm": 0.6132383942604065,
|
|
"learning_rate": 1.639143696951586e-06,
|
|
"loss": 0.5603156089782715,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 2.246553122465531,
|
|
"grad_norm": 0.6161746382713318,
|
|
"learning_rate": 1.6338535032628427e-06,
|
|
"loss": 0.5923026204109192,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 2.247850770478508,
|
|
"grad_norm": 0.6077067255973816,
|
|
"learning_rate": 1.6285701924917025e-06,
|
|
"loss": 0.5932834148406982,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 2.249148418491484,
|
|
"grad_norm": 0.6137869954109192,
|
|
"learning_rate": 1.6232937754411938e-06,
|
|
"loss": 0.5695690512657166,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 2.2504460665044608,
|
|
"grad_norm": 0.5874996185302734,
|
|
"learning_rate": 1.6180242629002558e-06,
|
|
"loss": 0.5515947341918945,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 2.251743714517437,
|
|
"grad_norm": 0.5972124934196472,
|
|
"learning_rate": 1.6127616656437078e-06,
|
|
"loss": 0.6108847260475159,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 2.2530413625304138,
|
|
"grad_norm": 0.6362358927726746,
|
|
"learning_rate": 1.6075059944322297e-06,
|
|
"loss": 0.5956808924674988,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 2.25433901054339,
|
|
"grad_norm": 0.6626409888267517,
|
|
"learning_rate": 1.6022572600123382e-06,
|
|
"loss": 0.5291856527328491,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 2.2556366585563667,
|
|
"grad_norm": 0.6440781354904175,
|
|
"learning_rate": 1.5970154731163667e-06,
|
|
"loss": 0.6244629621505737,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 2.256934306569343,
|
|
"grad_norm": 0.598318874835968,
|
|
"learning_rate": 1.5917806444624434e-06,
|
|
"loss": 0.5915838479995728,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 2.2582319545823197,
|
|
"grad_norm": 0.6128567457199097,
|
|
"learning_rate": 1.5865527847544692e-06,
|
|
"loss": 0.5356861352920532,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 2.259529602595296,
|
|
"grad_norm": 0.6078605651855469,
|
|
"learning_rate": 1.581331904682089e-06,
|
|
"loss": 0.5974371433258057,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 2.2608272506082727,
|
|
"grad_norm": 0.6011683344841003,
|
|
"learning_rate": 1.576118014920688e-06,
|
|
"loss": 0.5702426433563232,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 2.262124898621249,
|
|
"grad_norm": 0.6138583421707153,
|
|
"learning_rate": 1.5709111261313454e-06,
|
|
"loss": 0.6526232361793518,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 2.2634225466342253,
|
|
"grad_norm": 0.5757991075515747,
|
|
"learning_rate": 1.5657112489608316e-06,
|
|
"loss": 0.5384607315063477,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 2.264720194647202,
|
|
"grad_norm": 0.5720049142837524,
|
|
"learning_rate": 1.5605183940415842e-06,
|
|
"loss": 0.5239338278770447,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 2.2660178426601782,
|
|
"grad_norm": 0.6321298480033875,
|
|
"learning_rate": 1.5553325719916717e-06,
|
|
"loss": 0.5788372159004211,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 2.267315490673155,
|
|
"grad_norm": 0.6393312215805054,
|
|
"learning_rate": 1.5501537934147897e-06,
|
|
"loss": 0.6262606978416443,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 2.2686131386861312,
|
|
"grad_norm": 0.5900475978851318,
|
|
"learning_rate": 1.5449820689002298e-06,
|
|
"loss": 0.5757325887680054,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 2.269910786699108,
|
|
"grad_norm": 0.9620885848999023,
|
|
"learning_rate": 1.5398174090228595e-06,
|
|
"loss": 0.5125218629837036,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 2.2712084347120842,
|
|
"grad_norm": 0.611209511756897,
|
|
"learning_rate": 1.534659824343101e-06,
|
|
"loss": 0.5692592859268188,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 2.272506082725061,
|
|
"grad_norm": 0.5884798169136047,
|
|
"learning_rate": 1.5295093254069093e-06,
|
|
"loss": 0.561367392539978,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 2.273803730738037,
|
|
"grad_norm": 0.9114322662353516,
|
|
"learning_rate": 1.524365922745752e-06,
|
|
"loss": 0.5305287837982178,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 2.275101378751014,
|
|
"grad_norm": 0.615672767162323,
|
|
"learning_rate": 1.519229626876586e-06,
|
|
"loss": 0.5678682923316956,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 2.27639902676399,
|
|
"grad_norm": 0.596740186214447,
|
|
"learning_rate": 1.5141004483018323e-06,
|
|
"loss": 0.562171995639801,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 2.277696674776967,
|
|
"grad_norm": 0.5916588306427002,
|
|
"learning_rate": 1.5089783975093698e-06,
|
|
"loss": 0.5581475496292114,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 2.278994322789943,
|
|
"grad_norm": 0.5962932705879211,
|
|
"learning_rate": 1.5038634849724898e-06,
|
|
"loss": 0.5466150045394897,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 2.28029197080292,
|
|
"grad_norm": 0.6148486137390137,
|
|
"learning_rate": 1.4987557211498966e-06,
|
|
"loss": 0.562313973903656,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 2.281589618815896,
|
|
"grad_norm": 0.6286764740943909,
|
|
"learning_rate": 1.4936551164856739e-06,
|
|
"loss": 0.585920512676239,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 2.2828872668288724,
|
|
"grad_norm": 0.5897656679153442,
|
|
"learning_rate": 1.4885616814092673e-06,
|
|
"loss": 0.5238120555877686,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 2.284184914841849,
|
|
"grad_norm": 0.6284115314483643,
|
|
"learning_rate": 1.4834754263354628e-06,
|
|
"loss": 0.6318528652191162,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 2.285482562854826,
|
|
"grad_norm": 0.6478753685951233,
|
|
"learning_rate": 1.4783963616643654e-06,
|
|
"loss": 0.6090703010559082,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 2.286780210867802,
|
|
"grad_norm": 0.6065962314605713,
|
|
"learning_rate": 1.4733244977813726e-06,
|
|
"loss": 0.6407983303070068,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 2.2880778588807784,
|
|
"grad_norm": 0.6130876541137695,
|
|
"learning_rate": 1.468259845057169e-06,
|
|
"loss": 0.5580013990402222,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 2.289375506893755,
|
|
"grad_norm": 0.5975884199142456,
|
|
"learning_rate": 1.4632024138476803e-06,
|
|
"loss": 0.5697616338729858,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 2.2906731549067314,
|
|
"grad_norm": 0.6038120985031128,
|
|
"learning_rate": 1.4581522144940802e-06,
|
|
"loss": 0.5938565731048584,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 2.291970802919708,
|
|
"grad_norm": 0.6311531066894531,
|
|
"learning_rate": 1.4531092573227434e-06,
|
|
"loss": 0.5615339875221252,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 2.2932684509326844,
|
|
"grad_norm": 0.7556526064872742,
|
|
"learning_rate": 1.4480735526452427e-06,
|
|
"loss": 0.6018495559692383,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 2.294566098945661,
|
|
"grad_norm": 0.5966140627861023,
|
|
"learning_rate": 1.4430451107583187e-06,
|
|
"loss": 0.5482977628707886,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 2.2958637469586374,
|
|
"grad_norm": 0.6495786309242249,
|
|
"learning_rate": 1.4380239419438636e-06,
|
|
"loss": 0.6411464810371399,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 2.297161394971614,
|
|
"grad_norm": 0.6380159258842468,
|
|
"learning_rate": 1.433010056468896e-06,
|
|
"loss": 0.585355281829834,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 2.2984590429845904,
|
|
"grad_norm": 0.6330418586730957,
|
|
"learning_rate": 1.4280034645855429e-06,
|
|
"loss": 0.6234038472175598,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 2.299756690997567,
|
|
"grad_norm": 0.6164976358413696,
|
|
"learning_rate": 1.4230041765310171e-06,
|
|
"loss": 0.6070310473442078,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 2.3010543390105433,
|
|
"grad_norm": 0.5887792706489563,
|
|
"learning_rate": 1.4180122025275972e-06,
|
|
"loss": 0.49864742159843445,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 2.30235198702352,
|
|
"grad_norm": 0.5811118483543396,
|
|
"learning_rate": 1.4130275527826077e-06,
|
|
"loss": 0.6116331815719604,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 2.3036496350364963,
|
|
"grad_norm": 0.6207771897315979,
|
|
"learning_rate": 1.4080502374883947e-06,
|
|
"loss": 0.6092080473899841,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 2.304947283049473,
|
|
"grad_norm": 0.6548723578453064,
|
|
"learning_rate": 1.4030802668223097e-06,
|
|
"loss": 0.5866458415985107,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 2.3062449310624493,
|
|
"grad_norm": 0.6081016659736633,
|
|
"learning_rate": 1.398117650946681e-06,
|
|
"loss": 0.5727241039276123,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 2.3075425790754256,
|
|
"grad_norm": 0.6203863024711609,
|
|
"learning_rate": 1.3931624000088073e-06,
|
|
"loss": 0.5507431030273438,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 2.3088402270884023,
|
|
"grad_norm": 0.61940598487854,
|
|
"learning_rate": 1.3882145241409184e-06,
|
|
"loss": 0.6124242544174194,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 2.3101378751013786,
|
|
"grad_norm": 0.6263229250907898,
|
|
"learning_rate": 1.3832740334601692e-06,
|
|
"loss": 0.6032424569129944,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 2.3114355231143553,
|
|
"grad_norm": 0.5885775685310364,
|
|
"learning_rate": 1.3783409380686135e-06,
|
|
"loss": 0.5357505083084106,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 2.3127331711273316,
|
|
"grad_norm": 0.6165185570716858,
|
|
"learning_rate": 1.3734152480531821e-06,
|
|
"loss": 0.6190866231918335,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 2.3140308191403083,
|
|
"grad_norm": 0.6044617295265198,
|
|
"learning_rate": 1.3684969734856646e-06,
|
|
"loss": 0.5655971765518188,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 2.3153284671532846,
|
|
"grad_norm": 0.6254231929779053,
|
|
"learning_rate": 1.363586124422689e-06,
|
|
"loss": 0.5936893224716187,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 2.3166261151662613,
|
|
"grad_norm": 0.6128689646720886,
|
|
"learning_rate": 1.3586827109056944e-06,
|
|
"loss": 0.5749369263648987,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 2.3179237631792375,
|
|
"grad_norm": 0.6421996355056763,
|
|
"learning_rate": 1.3537867429609263e-06,
|
|
"loss": 0.5559523105621338,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 2.3192214111922143,
|
|
"grad_norm": 0.6680915951728821,
|
|
"learning_rate": 1.3488982305993942e-06,
|
|
"loss": 0.5511724352836609,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 2.3205190592051905,
|
|
"grad_norm": 0.6443539261817932,
|
|
"learning_rate": 1.3440171838168743e-06,
|
|
"loss": 0.5881825089454651,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 2.3218167072181672,
|
|
"grad_norm": 0.612708568572998,
|
|
"learning_rate": 1.3391436125938673e-06,
|
|
"loss": 0.5950250625610352,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 2.3231143552311435,
|
|
"grad_norm": 0.595385730266571,
|
|
"learning_rate": 1.3342775268955943e-06,
|
|
"loss": 0.5954742431640625,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 2.3244120032441202,
|
|
"grad_norm": 0.6444376111030579,
|
|
"learning_rate": 1.329418936671969e-06,
|
|
"loss": 0.5775749087333679,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 2.3257096512570965,
|
|
"grad_norm": 0.6064639687538147,
|
|
"learning_rate": 1.3245678518575782e-06,
|
|
"loss": 0.5845799446105957,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 2.3270072992700728,
|
|
"grad_norm": 0.6051777601242065,
|
|
"learning_rate": 1.319724282371664e-06,
|
|
"loss": 0.5920668840408325,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 2.3283049472830495,
|
|
"grad_norm": 0.6336135268211365,
|
|
"learning_rate": 1.3148882381181e-06,
|
|
"loss": 0.562667965888977,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 2.329602595296026,
|
|
"grad_norm": 0.6154525876045227,
|
|
"learning_rate": 1.3100597289853689e-06,
|
|
"loss": 0.5847402811050415,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 2.3309002433090025,
|
|
"grad_norm": 0.6278738379478455,
|
|
"learning_rate": 1.3052387648465559e-06,
|
|
"loss": 0.6408085823059082,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 2.3321978913219787,
|
|
"grad_norm": 0.6477576494216919,
|
|
"learning_rate": 1.3004253555593071e-06,
|
|
"loss": 0.5616024732589722,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 2.3334955393349555,
|
|
"grad_norm": 0.6108107566833496,
|
|
"learning_rate": 1.2956195109658287e-06,
|
|
"loss": 0.5326311588287354,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 2.3347931873479317,
|
|
"grad_norm": 1.3087694644927979,
|
|
"learning_rate": 1.2908212408928561e-06,
|
|
"loss": 0.6685813069343567,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 2.3360908353609084,
|
|
"grad_norm": 0.592055082321167,
|
|
"learning_rate": 1.2860305551516355e-06,
|
|
"loss": 0.6329461932182312,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 2.3373884833738847,
|
|
"grad_norm": 0.6386983394622803,
|
|
"learning_rate": 1.281247463537912e-06,
|
|
"loss": 0.5208531618118286,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 2.3386861313868614,
|
|
"grad_norm": 0.6252365112304688,
|
|
"learning_rate": 1.276471975831891e-06,
|
|
"loss": 0.5943001508712769,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 2.3399837793998377,
|
|
"grad_norm": 0.6460595726966858,
|
|
"learning_rate": 1.2717041017982396e-06,
|
|
"loss": 0.6217683553695679,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 2.3412814274128144,
|
|
"grad_norm": 0.6099584698677063,
|
|
"learning_rate": 1.2669438511860527e-06,
|
|
"loss": 0.5706977844238281,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 2.3425790754257907,
|
|
"grad_norm": 0.6689403653144836,
|
|
"learning_rate": 1.2621912337288372e-06,
|
|
"loss": 0.551365077495575,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 2.3438767234387674,
|
|
"grad_norm": 0.606182873249054,
|
|
"learning_rate": 1.257446259144494e-06,
|
|
"loss": 0.5419661998748779,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 2.3451743714517437,
|
|
"grad_norm": 0.5901670455932617,
|
|
"learning_rate": 1.2527089371352968e-06,
|
|
"loss": 0.5732494592666626,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 2.34647201946472,
|
|
"grad_norm": 0.6110414862632751,
|
|
"learning_rate": 1.2479792773878647e-06,
|
|
"loss": 0.6051602363586426,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 2.3477696674776967,
|
|
"grad_norm": 0.6416681408882141,
|
|
"learning_rate": 1.243257289573161e-06,
|
|
"loss": 0.593826949596405,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 2.3490673154906734,
|
|
"grad_norm": 0.6288197636604309,
|
|
"learning_rate": 1.2385429833464513e-06,
|
|
"loss": 0.5421499609947205,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 2.3503649635036497,
|
|
"grad_norm": 0.6199961304664612,
|
|
"learning_rate": 1.2338363683472998e-06,
|
|
"loss": 0.5908663868904114,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 2.351662611516626,
|
|
"grad_norm": 0.6229044198989868,
|
|
"learning_rate": 1.2291374541995437e-06,
|
|
"loss": 0.5933829545974731,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 2.3529602595296026,
|
|
"grad_norm": 0.6609744429588318,
|
|
"learning_rate": 1.224446250511272e-06,
|
|
"loss": 0.594125509262085,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 2.354257907542579,
|
|
"grad_norm": 0.6363682150840759,
|
|
"learning_rate": 1.2197627668748101e-06,
|
|
"loss": 0.5930228233337402,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 2.3555555555555556,
|
|
"grad_norm": 0.6157255172729492,
|
|
"learning_rate": 1.2150870128666959e-06,
|
|
"loss": 0.5634854435920715,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 2.356853203568532,
|
|
"grad_norm": 0.6403535604476929,
|
|
"learning_rate": 1.2104189980476627e-06,
|
|
"loss": 0.5946694612503052,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 2.3581508515815086,
|
|
"grad_norm": 0.6029789447784424,
|
|
"learning_rate": 1.2057587319626213e-06,
|
|
"loss": 0.5258057713508606,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 2.359448499594485,
|
|
"grad_norm": 0.6252802014350891,
|
|
"learning_rate": 1.2011062241406313e-06,
|
|
"loss": 0.5830211639404297,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 2.3607461476074616,
|
|
"grad_norm": 0.608201801776886,
|
|
"learning_rate": 1.1964614840949002e-06,
|
|
"loss": 0.6013060212135315,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 2.362043795620438,
|
|
"grad_norm": 0.6110815405845642,
|
|
"learning_rate": 1.1918245213227408e-06,
|
|
"loss": 0.576073169708252,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 2.3633414436334146,
|
|
"grad_norm": 0.605087161064148,
|
|
"learning_rate": 1.1871953453055707e-06,
|
|
"loss": 0.6136230826377869,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 2.364639091646391,
|
|
"grad_norm": 0.6053324341773987,
|
|
"learning_rate": 1.182573965508882e-06,
|
|
"loss": 0.5785141587257385,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 2.3659367396593676,
|
|
"grad_norm": 0.6085898876190186,
|
|
"learning_rate": 1.1779603913822274e-06,
|
|
"loss": 0.5601797103881836,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 2.367234387672344,
|
|
"grad_norm": 0.6608554124832153,
|
|
"learning_rate": 1.1733546323591981e-06,
|
|
"loss": 0.5785682797431946,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 2.3685320356853206,
|
|
"grad_norm": 0.6056334972381592,
|
|
"learning_rate": 1.168756697857406e-06,
|
|
"loss": 0.5939031839370728,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 2.369829683698297,
|
|
"grad_norm": 0.6553589105606079,
|
|
"learning_rate": 1.1641665972784628e-06,
|
|
"loss": 0.6532239317893982,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 2.371127331711273,
|
|
"grad_norm": 0.6094745397567749,
|
|
"learning_rate": 1.1595843400079636e-06,
|
|
"loss": 0.5682094097137451,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 2.37242497972425,
|
|
"grad_norm": 0.623717188835144,
|
|
"learning_rate": 1.1550099354154615e-06,
|
|
"loss": 0.6046154499053955,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 2.373722627737226,
|
|
"grad_norm": 0.631445050239563,
|
|
"learning_rate": 1.1504433928544594e-06,
|
|
"loss": 0.6053498387336731,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 2.375020275750203,
|
|
"grad_norm": 0.6280617117881775,
|
|
"learning_rate": 1.1458847216623813e-06,
|
|
"loss": 0.5817880630493164,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 2.376317923763179,
|
|
"grad_norm": 0.6309313178062439,
|
|
"learning_rate": 1.141333931160552e-06,
|
|
"loss": 0.6206140518188477,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 2.377615571776156,
|
|
"grad_norm": 0.6384704113006592,
|
|
"learning_rate": 1.1367910306541918e-06,
|
|
"loss": 0.6599752306938171,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 2.378913219789132,
|
|
"grad_norm": 0.6254469752311707,
|
|
"learning_rate": 1.1322560294323775e-06,
|
|
"loss": 0.5889034271240234,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 2.3802108678021088,
|
|
"grad_norm": 0.6390111446380615,
|
|
"learning_rate": 1.1277289367680411e-06,
|
|
"loss": 0.6020563840866089,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 2.381508515815085,
|
|
"grad_norm": 0.6277632117271423,
|
|
"learning_rate": 1.123209761917941e-06,
|
|
"loss": 0.5417424440383911,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 2.3828061638280618,
|
|
"grad_norm": 0.6135120987892151,
|
|
"learning_rate": 1.1186985141226458e-06,
|
|
"loss": 0.5558514595031738,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 2.384103811841038,
|
|
"grad_norm": 0.6234643459320068,
|
|
"learning_rate": 1.1141952026065156e-06,
|
|
"loss": 0.6145384311676025,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 2.3854014598540147,
|
|
"grad_norm": 0.6055371165275574,
|
|
"learning_rate": 1.1096998365776828e-06,
|
|
"loss": 0.5748616456985474,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 2.386699107866991,
|
|
"grad_norm": 0.6127825379371643,
|
|
"learning_rate": 1.1052124252280322e-06,
|
|
"loss": 0.5389982461929321,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 2.386699107866991,
|
|
"eval_loss": 0.6825700998306274,
|
|
"eval_runtime": 72.9215,
|
|
"eval_samples_per_second": 71.2,
|
|
"eval_steps_per_second": 8.9,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 2.3879967558799677,
|
|
"grad_norm": 0.6031513214111328,
|
|
"learning_rate": 1.1007329777331866e-06,
|
|
"loss": 0.5840494632720947,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 2.389294403892944,
|
|
"grad_norm": 0.63483726978302,
|
|
"learning_rate": 1.096261503252478e-06,
|
|
"loss": 0.5311962366104126,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 2.3905920519059203,
|
|
"grad_norm": 0.6125195622444153,
|
|
"learning_rate": 1.0917980109289455e-06,
|
|
"loss": 0.5285024046897888,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 2.391889699918897,
|
|
"grad_norm": 0.5990893244743347,
|
|
"learning_rate": 1.0873425098892964e-06,
|
|
"loss": 0.5493112802505493,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 2.3931873479318737,
|
|
"grad_norm": 0.6030960083007812,
|
|
"learning_rate": 1.082895009243905e-06,
|
|
"loss": 0.5796130895614624,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 2.39448499594485,
|
|
"grad_norm": 0.6366276741027832,
|
|
"learning_rate": 1.078455518086784e-06,
|
|
"loss": 0.5433975458145142,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 2.3957826439578263,
|
|
"grad_norm": 0.5901277661323547,
|
|
"learning_rate": 1.0740240454955692e-06,
|
|
"loss": 0.5538575649261475,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 2.397080291970803,
|
|
"grad_norm": 0.6165037155151367,
|
|
"learning_rate": 1.0696006005314996e-06,
|
|
"loss": 0.5998971462249756,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 2.3983779399837792,
|
|
"grad_norm": 0.6113094091415405,
|
|
"learning_rate": 1.0651851922394035e-06,
|
|
"loss": 0.570077121257782,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 2.399675587996756,
|
|
"grad_norm": 0.6432837247848511,
|
|
"learning_rate": 1.0607778296476679e-06,
|
|
"loss": 0.6083425283432007,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 2.4009732360097322,
|
|
"grad_norm": 0.5917057394981384,
|
|
"learning_rate": 1.05637852176824e-06,
|
|
"loss": 0.5251022577285767,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 2.402270884022709,
|
|
"grad_norm": 0.6266626119613647,
|
|
"learning_rate": 1.051987277596585e-06,
|
|
"loss": 0.5856255292892456,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 2.403568532035685,
|
|
"grad_norm": 0.610355019569397,
|
|
"learning_rate": 1.0476041061116915e-06,
|
|
"loss": 0.6004334688186646,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 2.404866180048662,
|
|
"grad_norm": 0.5825424790382385,
|
|
"learning_rate": 1.0432290162760311e-06,
|
|
"loss": 0.5548322796821594,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 2.406163828061638,
|
|
"grad_norm": 0.6335608959197998,
|
|
"learning_rate": 1.038862017035558e-06,
|
|
"loss": 0.5934311747550964,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 2.407461476074615,
|
|
"grad_norm": 0.6018176078796387,
|
|
"learning_rate": 1.0345031173196785e-06,
|
|
"loss": 0.5377739071846008,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 2.408759124087591,
|
|
"grad_norm": 0.6398853659629822,
|
|
"learning_rate": 1.0301523260412405e-06,
|
|
"loss": 0.6047654151916504,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 2.410056772100568,
|
|
"grad_norm": 0.6761499643325806,
|
|
"learning_rate": 1.025809652096511e-06,
|
|
"loss": 0.6525087356567383,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 2.411354420113544,
|
|
"grad_norm": 0.5981181859970093,
|
|
"learning_rate": 1.0214751043651582e-06,
|
|
"loss": 0.5705087184906006,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 2.412652068126521,
|
|
"grad_norm": 0.6022308468818665,
|
|
"learning_rate": 1.0171486917102357e-06,
|
|
"loss": 0.5528420209884644,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 2.413949716139497,
|
|
"grad_norm": 0.576118528842926,
|
|
"learning_rate": 1.0128304229781622e-06,
|
|
"loss": 0.572098970413208,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 2.4152473641524734,
|
|
"grad_norm": 0.6066587567329407,
|
|
"learning_rate": 1.008520306998706e-06,
|
|
"loss": 0.5568013787269592,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 2.41654501216545,
|
|
"grad_norm": 0.7212052345275879,
|
|
"learning_rate": 1.0042183525849586e-06,
|
|
"loss": 0.5123892426490784,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 2.4178426601784264,
|
|
"grad_norm": 0.5919977426528931,
|
|
"learning_rate": 9.999245685333342e-07,
|
|
"loss": 0.5277501344680786,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 2.419140308191403,
|
|
"grad_norm": 0.5896833539009094,
|
|
"learning_rate": 9.95638963623528e-07,
|
|
"loss": 0.5733782649040222,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 2.4204379562043794,
|
|
"grad_norm": 0.6342105269432068,
|
|
"learning_rate": 9.913615466185234e-07,
|
|
"loss": 0.6013584136962891,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 2.421735604217356,
|
|
"grad_norm": 0.5951900482177734,
|
|
"learning_rate": 9.870923262645516e-07,
|
|
"loss": 0.5315797328948975,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 2.4230332522303324,
|
|
"grad_norm": 0.6201072931289673,
|
|
"learning_rate": 9.828313112910887e-07,
|
|
"loss": 0.5741020441055298,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 2.424330900243309,
|
|
"grad_norm": 0.6206340193748474,
|
|
"learning_rate": 9.78578510410832e-07,
|
|
"loss": 0.5911818146705627,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 2.4256285482562854,
|
|
"grad_norm": 0.6191825270652771,
|
|
"learning_rate": 9.743339323196827e-07,
|
|
"loss": 0.5818160772323608,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 2.426926196269262,
|
|
"grad_norm": 0.6224012970924377,
|
|
"learning_rate": 9.700975856967287e-07,
|
|
"loss": 0.5667495727539062,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 2.4282238442822384,
|
|
"grad_norm": 0.622602105140686,
|
|
"learning_rate": 9.658694792042284e-07,
|
|
"loss": 0.5867684483528137,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 2.429521492295215,
|
|
"grad_norm": 0.6468759179115295,
|
|
"learning_rate": 9.616496214875847e-07,
|
|
"loss": 0.5605747699737549,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 2.4308191403081914,
|
|
"grad_norm": 0.6025612950325012,
|
|
"learning_rate": 9.574380211753442e-07,
|
|
"loss": 0.5322221517562866,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 2.432116788321168,
|
|
"grad_norm": 0.601256251335144,
|
|
"learning_rate": 9.532346868791587e-07,
|
|
"loss": 0.6136845350265503,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 2.4334144363341443,
|
|
"grad_norm": 0.6094178557395935,
|
|
"learning_rate": 9.490396271937879e-07,
|
|
"loss": 0.6157099604606628,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 2.4347120843471206,
|
|
"grad_norm": 0.6287171244621277,
|
|
"learning_rate": 9.448528506970628e-07,
|
|
"loss": 0.5530134439468384,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 2.4360097323600973,
|
|
"grad_norm": 0.5963685512542725,
|
|
"learning_rate": 9.406743659498829e-07,
|
|
"loss": 0.5840374827384949,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 2.437307380373074,
|
|
"grad_norm": 0.6349402070045471,
|
|
"learning_rate": 9.365041814961928e-07,
|
|
"loss": 0.5503448843955994,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 2.4386050283860503,
|
|
"grad_norm": 0.6072769165039062,
|
|
"learning_rate": 9.323423058629638e-07,
|
|
"loss": 0.5658475756645203,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 2.4399026763990266,
|
|
"grad_norm": 0.6268115043640137,
|
|
"learning_rate": 9.281887475601775e-07,
|
|
"loss": 0.6097016334533691,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 2.4412003244120033,
|
|
"grad_norm": 0.5882371664047241,
|
|
"learning_rate": 9.240435150808113e-07,
|
|
"loss": 0.5780482292175293,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 2.4424979724249796,
|
|
"grad_norm": 0.6373420357704163,
|
|
"learning_rate": 9.19906616900813e-07,
|
|
"loss": 0.6226140260696411,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 2.4437956204379563,
|
|
"grad_norm": 0.6072852611541748,
|
|
"learning_rate": 9.157780614790963e-07,
|
|
"loss": 0.5743207335472107,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 2.4450932684509326,
|
|
"grad_norm": 0.634705126285553,
|
|
"learning_rate": 9.116578572575091e-07,
|
|
"loss": 0.6267349720001221,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 2.4463909164639093,
|
|
"grad_norm": 0.6120656132698059,
|
|
"learning_rate": 9.075460126608271e-07,
|
|
"loss": 0.6176955699920654,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 2.4476885644768855,
|
|
"grad_norm": 0.5967820882797241,
|
|
"learning_rate": 9.034425360967319e-07,
|
|
"loss": 0.6183077096939087,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 2.4489862124898623,
|
|
"grad_norm": 0.5987744331359863,
|
|
"learning_rate": 8.993474359557936e-07,
|
|
"loss": 0.5591214895248413,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 2.4502838605028385,
|
|
"grad_norm": 0.6169969439506531,
|
|
"learning_rate": 8.952607206114588e-07,
|
|
"loss": 0.5904876589775085,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 2.4515815085158152,
|
|
"grad_norm": 0.6008497476577759,
|
|
"learning_rate": 8.911823984200219e-07,
|
|
"loss": 0.5758087635040283,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 2.4528791565287915,
|
|
"grad_norm": 0.6111242175102234,
|
|
"learning_rate": 8.871124777206213e-07,
|
|
"loss": 0.6324316263198853,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 2.4541768045417682,
|
|
"grad_norm": 0.638118326663971,
|
|
"learning_rate": 8.83050966835215e-07,
|
|
"loss": 0.5944634079933167,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 2.4554744525547445,
|
|
"grad_norm": 0.6154019832611084,
|
|
"learning_rate": 8.789978740685646e-07,
|
|
"loss": 0.5495239496231079,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 2.456772100567721,
|
|
"grad_norm": 0.618356466293335,
|
|
"learning_rate": 8.749532077082179e-07,
|
|
"loss": 0.5651803016662598,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 2.4580697485806975,
|
|
"grad_norm": 0.6217320561408997,
|
|
"learning_rate": 8.709169760244968e-07,
|
|
"loss": 0.6198887825012207,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 2.4593673965936738,
|
|
"grad_norm": 0.6045297384262085,
|
|
"learning_rate": 8.668891872704682e-07,
|
|
"loss": 0.5438726544380188,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 2.4606650446066505,
|
|
"grad_norm": 0.614281952381134,
|
|
"learning_rate": 8.628698496819471e-07,
|
|
"loss": 0.5607205629348755,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 2.4619626926196267,
|
|
"grad_norm": 0.5984881520271301,
|
|
"learning_rate": 8.58858971477457e-07,
|
|
"loss": 0.6331669688224792,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 2.4632603406326035,
|
|
"grad_norm": 0.6256738901138306,
|
|
"learning_rate": 8.548565608582299e-07,
|
|
"loss": 0.5844709873199463,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 2.4645579886455797,
|
|
"grad_norm": 0.5857892036437988,
|
|
"learning_rate": 8.508626260081826e-07,
|
|
"loss": 0.5776396989822388,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 2.4658556366585564,
|
|
"grad_norm": 0.6575695872306824,
|
|
"learning_rate": 8.468771750939009e-07,
|
|
"loss": 0.5862407684326172,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 2.4671532846715327,
|
|
"grad_norm": 0.5867515206336975,
|
|
"learning_rate": 8.429002162646233e-07,
|
|
"loss": 0.5810645222663879,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 2.4684509326845094,
|
|
"grad_norm": 0.6347371935844421,
|
|
"learning_rate": 8.389317576522243e-07,
|
|
"loss": 0.6229629516601562,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 2.4697485806974857,
|
|
"grad_norm": 0.604457676410675,
|
|
"learning_rate": 8.349718073711971e-07,
|
|
"loss": 0.5473800897598267,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 2.4710462287104624,
|
|
"grad_norm": 0.6130659580230713,
|
|
"learning_rate": 8.310203735186384e-07,
|
|
"loss": 0.6687853932380676,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 2.4723438767234387,
|
|
"grad_norm": 0.6164904236793518,
|
|
"learning_rate": 8.270774641742275e-07,
|
|
"loss": 0.6242067217826843,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 2.4736415247364154,
|
|
"grad_norm": 0.64787358045578,
|
|
"learning_rate": 8.231430874002206e-07,
|
|
"loss": 0.5970586538314819,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 2.4749391727493917,
|
|
"grad_norm": 0.6561875939369202,
|
|
"learning_rate": 8.192172512414187e-07,
|
|
"loss": 0.5711146593093872,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 2.4762368207623684,
|
|
"grad_norm": 0.6017801761627197,
|
|
"learning_rate": 8.152999637251641e-07,
|
|
"loss": 0.5429533123970032,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 2.4775344687753447,
|
|
"grad_norm": 0.60152268409729,
|
|
"learning_rate": 8.113912328613183e-07,
|
|
"loss": 0.5184666514396667,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 2.478832116788321,
|
|
"grad_norm": 0.598573625087738,
|
|
"learning_rate": 8.074910666422475e-07,
|
|
"loss": 0.5503566861152649,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 2.4801297648012977,
|
|
"grad_norm": 0.6241352558135986,
|
|
"learning_rate": 8.035994730428031e-07,
|
|
"loss": 0.6021054983139038,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 2.4814274128142744,
|
|
"grad_norm": 0.6195024251937866,
|
|
"learning_rate": 7.997164600203111e-07,
|
|
"loss": 0.5467978715896606,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 2.4827250608272506,
|
|
"grad_norm": 0.6009840369224548,
|
|
"learning_rate": 7.958420355145469e-07,
|
|
"loss": 0.5863580703735352,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 2.484022708840227,
|
|
"grad_norm": 0.6128111481666565,
|
|
"learning_rate": 7.919762074477311e-07,
|
|
"loss": 0.5403767824172974,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 2.4853203568532036,
|
|
"grad_norm": 0.6071099042892456,
|
|
"learning_rate": 7.881189837245024e-07,
|
|
"loss": 0.5299487709999084,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 2.48661800486618,
|
|
"grad_norm": 0.6704837083816528,
|
|
"learning_rate": 7.842703722319073e-07,
|
|
"loss": 0.6165317893028259,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 2.4879156528791566,
|
|
"grad_norm": 0.6277005672454834,
|
|
"learning_rate": 7.804303808393831e-07,
|
|
"loss": 0.5439109206199646,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 2.489213300892133,
|
|
"grad_norm": 0.6348392367362976,
|
|
"learning_rate": 7.76599017398737e-07,
|
|
"loss": 0.6694045662879944,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 2.4905109489051096,
|
|
"grad_norm": 0.6145819425582886,
|
|
"learning_rate": 7.727762897441421e-07,
|
|
"loss": 0.550458550453186,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 2.491808596918086,
|
|
"grad_norm": 0.61981600522995,
|
|
"learning_rate": 7.689622056921053e-07,
|
|
"loss": 0.594965934753418,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 2.4931062449310626,
|
|
"grad_norm": 0.7170799374580383,
|
|
"learning_rate": 7.65156773041465e-07,
|
|
"loss": 0.6357606053352356,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 2.494403892944039,
|
|
"grad_norm": 0.6079750061035156,
|
|
"learning_rate": 7.613599995733667e-07,
|
|
"loss": 0.5912356376647949,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 2.4957015409570156,
|
|
"grad_norm": 0.6176713109016418,
|
|
"learning_rate": 7.575718930512516e-07,
|
|
"loss": 0.5135859847068787,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 2.496999188969992,
|
|
"grad_norm": 0.6063299179077148,
|
|
"learning_rate": 7.537924612208391e-07,
|
|
"loss": 0.5870840549468994,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 2.4982968369829686,
|
|
"grad_norm": 0.6175487041473389,
|
|
"learning_rate": 7.500217118101106e-07,
|
|
"loss": 0.5973732471466064,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 2.499594484995945,
|
|
"grad_norm": 0.6008102893829346,
|
|
"learning_rate": 7.462596525292937e-07,
|
|
"loss": 0.5943004488945007,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 2.5008921330089215,
|
|
"grad_norm": 0.6359487771987915,
|
|
"learning_rate": 7.425062910708492e-07,
|
|
"loss": 0.5653975009918213,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 2.502189781021898,
|
|
"grad_norm": 0.6241583824157715,
|
|
"learning_rate": 7.387616351094473e-07,
|
|
"loss": 0.5532112121582031,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 2.503487429034874,
|
|
"grad_norm": 0.6088744401931763,
|
|
"learning_rate": 7.350256923019666e-07,
|
|
"loss": 0.5315259695053101,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 2.504785077047851,
|
|
"grad_norm": 0.6145752668380737,
|
|
"learning_rate": 7.312984702874609e-07,
|
|
"loss": 0.600688099861145,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 2.5060827250608275,
|
|
"grad_norm": 0.6202653050422668,
|
|
"learning_rate": 7.275799766871577e-07,
|
|
"loss": 0.6020484566688538,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 2.507380373073804,
|
|
"grad_norm": 0.6492214798927307,
|
|
"learning_rate": 7.238702191044344e-07,
|
|
"loss": 0.6212818622589111,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 2.50867802108678,
|
|
"grad_norm": 0.5913106203079224,
|
|
"learning_rate": 7.201692051248066e-07,
|
|
"loss": 0.5435472726821899,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 2.509975669099757,
|
|
"grad_norm": 0.6050302982330322,
|
|
"learning_rate": 7.164769423159113e-07,
|
|
"loss": 0.6042004823684692,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 2.511273317112733,
|
|
"grad_norm": 0.6316038966178894,
|
|
"learning_rate": 7.127934382274926e-07,
|
|
"loss": 0.558472752571106,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 2.5125709651257098,
|
|
"grad_norm": 0.6041384339332581,
|
|
"learning_rate": 7.091187003913802e-07,
|
|
"loss": 0.6053918600082397,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 2.513868613138686,
|
|
"grad_norm": 0.6338528394699097,
|
|
"learning_rate": 7.054527363214875e-07,
|
|
"loss": 0.5851538777351379,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 2.5151662611516628,
|
|
"grad_norm": 0.7164930105209351,
|
|
"learning_rate": 7.017955535137788e-07,
|
|
"loss": 0.5775594115257263,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 2.516463909164639,
|
|
"grad_norm": 0.9809231758117676,
|
|
"learning_rate": 6.981471594462719e-07,
|
|
"loss": 0.6198115348815918,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 2.5177615571776153,
|
|
"grad_norm": 0.6024364829063416,
|
|
"learning_rate": 6.945075615790059e-07,
|
|
"loss": 0.5934704542160034,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 2.519059205190592,
|
|
"grad_norm": 0.6212522387504578,
|
|
"learning_rate": 6.908767673540384e-07,
|
|
"loss": 0.6180324554443359,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 2.5203568532035687,
|
|
"grad_norm": 0.6258326172828674,
|
|
"learning_rate": 6.872547841954241e-07,
|
|
"loss": 0.5982950925827026,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 2.521654501216545,
|
|
"grad_norm": 0.6158891320228577,
|
|
"learning_rate": 6.836416195092021e-07,
|
|
"loss": 0.5860976576805115,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 2.5229521492295213,
|
|
"grad_norm": 0.6238812208175659,
|
|
"learning_rate": 6.800372806833799e-07,
|
|
"loss": 0.5936440229415894,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 2.524249797242498,
|
|
"grad_norm": 0.5862494111061096,
|
|
"learning_rate": 6.764417750879182e-07,
|
|
"loss": 0.5802135467529297,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 2.5255474452554747,
|
|
"grad_norm": 0.6118647456169128,
|
|
"learning_rate": 6.728551100747155e-07,
|
|
"loss": 0.5778954029083252,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 2.526845093268451,
|
|
"grad_norm": 0.6207137703895569,
|
|
"learning_rate": 6.692772929775943e-07,
|
|
"loss": 0.6226284503936768,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 2.5281427412814272,
|
|
"grad_norm": 0.6094867587089539,
|
|
"learning_rate": 6.657083311122858e-07,
|
|
"loss": 0.5938500761985779,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 2.529440389294404,
|
|
"grad_norm": 0.6266283988952637,
|
|
"learning_rate": 6.621482317764105e-07,
|
|
"loss": 0.5501142740249634,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 2.5307380373073802,
|
|
"grad_norm": 0.6360139846801758,
|
|
"learning_rate": 6.585970022494748e-07,
|
|
"loss": 0.6632074117660522,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 2.532035685320357,
|
|
"grad_norm": 0.6052773594856262,
|
|
"learning_rate": 6.550546497928401e-07,
|
|
"loss": 0.5711944103240967,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 2.533333333333333,
|
|
"grad_norm": 0.6809741258621216,
|
|
"learning_rate": 6.515211816497247e-07,
|
|
"loss": 0.5731922388076782,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 2.53463098134631,
|
|
"grad_norm": 0.6013851761817932,
|
|
"learning_rate": 6.479966050451736e-07,
|
|
"loss": 0.572198748588562,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 2.535928629359286,
|
|
"grad_norm": 0.6084575653076172,
|
|
"learning_rate": 6.444809271860547e-07,
|
|
"loss": 0.5986557006835938,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 2.537226277372263,
|
|
"grad_norm": 0.6349742412567139,
|
|
"learning_rate": 6.409741552610399e-07,
|
|
"loss": 0.5914225578308105,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 2.538523925385239,
|
|
"grad_norm": 0.6118656396865845,
|
|
"learning_rate": 6.374762964405895e-07,
|
|
"loss": 0.5655546188354492,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 2.539821573398216,
|
|
"grad_norm": 0.6187875270843506,
|
|
"learning_rate": 6.339873578769401e-07,
|
|
"loss": 0.5871388912200928,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 2.541119221411192,
|
|
"grad_norm": 48.24391555786133,
|
|
"learning_rate": 6.305073467040884e-07,
|
|
"loss": 0.5712297558784485,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 2.5424168694241684,
|
|
"grad_norm": 0.6253454685211182,
|
|
"learning_rate": 6.270362700377736e-07,
|
|
"loss": 0.6522243022918701,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 2.543714517437145,
|
|
"grad_norm": 0.5885297656059265,
|
|
"learning_rate": 6.235741349754731e-07,
|
|
"loss": 0.6240279078483582,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 2.545012165450122,
|
|
"grad_norm": 0.600005030632019,
|
|
"learning_rate": 6.201209485963744e-07,
|
|
"loss": 0.6034828424453735,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 2.546309813463098,
|
|
"grad_norm": 0.677692711353302,
|
|
"learning_rate": 6.166767179613691e-07,
|
|
"loss": 0.5885945558547974,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 2.5476074614760744,
|
|
"grad_norm": 0.6142828464508057,
|
|
"learning_rate": 6.132414501130385e-07,
|
|
"loss": 0.5538769960403442,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 2.548905109489051,
|
|
"grad_norm": 0.6016609072685242,
|
|
"learning_rate": 6.098151520756357e-07,
|
|
"loss": 0.5977665185928345,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 2.550202757502028,
|
|
"grad_norm": 2.0037388801574707,
|
|
"learning_rate": 6.063978308550722e-07,
|
|
"loss": 0.612566351890564,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 2.551500405515004,
|
|
"grad_norm": 0.602703869342804,
|
|
"learning_rate": 6.029894934389058e-07,
|
|
"loss": 0.5812326669692993,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 2.5527980535279804,
|
|
"grad_norm": 0.5868345499038696,
|
|
"learning_rate": 5.995901467963228e-07,
|
|
"loss": 0.5142446160316467,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 2.554095701540957,
|
|
"grad_norm": 0.625521719455719,
|
|
"learning_rate": 5.961997978781292e-07,
|
|
"loss": 0.5533977746963501,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 2.5553933495539334,
|
|
"grad_norm": 0.6117697358131409,
|
|
"learning_rate": 5.928184536167258e-07,
|
|
"loss": 0.6049879789352417,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 2.55669099756691,
|
|
"grad_norm": 0.6366870403289795,
|
|
"learning_rate": 5.89446120926111e-07,
|
|
"loss": 0.5416997671127319,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 2.5579886455798864,
|
|
"grad_norm": 0.6090091466903687,
|
|
"learning_rate": 5.860828067018481e-07,
|
|
"loss": 0.5767660737037659,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 2.559286293592863,
|
|
"grad_norm": 0.6263614892959595,
|
|
"learning_rate": 5.82728517821064e-07,
|
|
"loss": 0.5914768576622009,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 2.5605839416058394,
|
|
"grad_norm": 0.6438020467758179,
|
|
"learning_rate": 5.793832611424322e-07,
|
|
"loss": 0.5773044228553772,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 2.5618815896188156,
|
|
"grad_norm": 0.6195680499076843,
|
|
"learning_rate": 5.760470435061533e-07,
|
|
"loss": 0.5637648701667786,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 2.5631792376317923,
|
|
"grad_norm": 1.289580225944519,
|
|
"learning_rate": 5.727198717339511e-07,
|
|
"loss": 0.6060294508934021,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 2.564476885644769,
|
|
"grad_norm": 0.6049319505691528,
|
|
"learning_rate": 5.694017526290468e-07,
|
|
"loss": 0.5878962278366089,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 2.5657745336577453,
|
|
"grad_norm": 0.6546334028244019,
|
|
"learning_rate": 5.660926929761556e-07,
|
|
"loss": 0.5719892382621765,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 2.5670721816707216,
|
|
"grad_norm": 0.5887362957000732,
|
|
"learning_rate": 5.627926995414662e-07,
|
|
"loss": 0.5226088762283325,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 2.5683698296836983,
|
|
"grad_norm": 0.6115890741348267,
|
|
"learning_rate": 5.59501779072631e-07,
|
|
"loss": 0.5784634947776794,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 2.569667477696675,
|
|
"grad_norm": 0.6565897464752197,
|
|
"learning_rate": 5.562199382987488e-07,
|
|
"loss": 0.5947513580322266,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 2.5709651257096513,
|
|
"grad_norm": 0.594465970993042,
|
|
"learning_rate": 5.529471839303541e-07,
|
|
"loss": 0.5367786884307861,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 2.5722627737226276,
|
|
"grad_norm": 0.6155304908752441,
|
|
"learning_rate": 5.496835226593983e-07,
|
|
"loss": 0.6144155859947205,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 2.5735604217356043,
|
|
"grad_norm": 0.6233793497085571,
|
|
"learning_rate": 5.464289611592472e-07,
|
|
"loss": 0.5667406916618347,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 2.5748580697485806,
|
|
"grad_norm": 0.6025534272193909,
|
|
"learning_rate": 5.431835060846519e-07,
|
|
"loss": 0.5775101184844971,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 2.5761557177615573,
|
|
"grad_norm": 0.6037949323654175,
|
|
"learning_rate": 5.399471640717479e-07,
|
|
"loss": 0.6155390739440918,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 2.5774533657745335,
|
|
"grad_norm": 0.61771160364151,
|
|
"learning_rate": 5.367199417380347e-07,
|
|
"loss": 0.5459461808204651,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 2.5787510137875103,
|
|
"grad_norm": 0.6559909582138062,
|
|
"learning_rate": 5.335018456823665e-07,
|
|
"loss": 0.6187810897827148,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 2.5800486618004865,
|
|
"grad_norm": 0.6218096017837524,
|
|
"learning_rate": 5.302928824849335e-07,
|
|
"loss": 0.629378080368042,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 2.5813463098134632,
|
|
"grad_norm": 0.5922935605049133,
|
|
"learning_rate": 5.270930587072548e-07,
|
|
"loss": 0.5435377359390259,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 2.5826439578264395,
|
|
"grad_norm": 0.5918126106262207,
|
|
"learning_rate": 5.239023808921595e-07,
|
|
"loss": 0.5545147657394409,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 2.5839416058394162,
|
|
"grad_norm": 0.6067506074905396,
|
|
"learning_rate": 5.207208555637767e-07,
|
|
"loss": 0.6249223351478577,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 2.5852392538523925,
|
|
"grad_norm": 0.6125559210777283,
|
|
"learning_rate": 5.175484892275184e-07,
|
|
"loss": 0.5820242166519165,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 2.5865369018653688,
|
|
"grad_norm": 0.5970590114593506,
|
|
"learning_rate": 5.14385288370074e-07,
|
|
"loss": 0.6091808080673218,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 2.5878345498783455,
|
|
"grad_norm": 0.5902854204177856,
|
|
"learning_rate": 5.11231259459386e-07,
|
|
"loss": 0.5224129557609558,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 2.589132197891322,
|
|
"grad_norm": 0.604062020778656,
|
|
"learning_rate": 5.080864089446464e-07,
|
|
"loss": 0.5258910655975342,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 2.5904298459042985,
|
|
"grad_norm": 0.6816832423210144,
|
|
"learning_rate": 5.049507432562778e-07,
|
|
"loss": 0.5509624481201172,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 2.5917274939172747,
|
|
"grad_norm": 0.6220773458480835,
|
|
"learning_rate": 5.018242688059238e-07,
|
|
"loss": 0.6509982943534851,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 2.5930251419302515,
|
|
"grad_norm": 0.6238852143287659,
|
|
"learning_rate": 4.987069919864329e-07,
|
|
"loss": 0.6329154968261719,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 2.5943227899432277,
|
|
"grad_norm": 0.6279301643371582,
|
|
"learning_rate": 4.95598919171848e-07,
|
|
"loss": 0.624962329864502,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 2.5956204379562045,
|
|
"grad_norm": 0.6066421866416931,
|
|
"learning_rate": 4.925000567173882e-07,
|
|
"loss": 0.6009570360183716,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 2.5969180859691807,
|
|
"grad_norm": 0.6097516417503357,
|
|
"learning_rate": 4.894104109594466e-07,
|
|
"loss": 0.5533030033111572,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 2.5982157339821574,
|
|
"grad_norm": 0.60311359167099,
|
|
"learning_rate": 4.863299882155659e-07,
|
|
"loss": 0.5549200177192688,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 2.5995133819951337,
|
|
"grad_norm": 0.6075156927108765,
|
|
"learning_rate": 4.832587947844297e-07,
|
|
"loss": 0.5541381239891052,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 2.6008110300081104,
|
|
"grad_norm": 0.6099098324775696,
|
|
"learning_rate": 4.801968369458531e-07,
|
|
"loss": 0.6142464876174927,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 2.6021086780210867,
|
|
"grad_norm": 0.6433584690093994,
|
|
"learning_rate": 4.771441209607625e-07,
|
|
"loss": 0.6120733022689819,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 2.6034063260340634,
|
|
"grad_norm": 1.134731411933899,
|
|
"learning_rate": 4.7410065307119167e-07,
|
|
"loss": 0.6064984798431396,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 2.6047039740470397,
|
|
"grad_norm": 0.6147306561470032,
|
|
"learning_rate": 4.7106643950026067e-07,
|
|
"loss": 0.5834633111953735,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 2.606001622060016,
|
|
"grad_norm": 0.610374927520752,
|
|
"learning_rate": 4.6804148645216873e-07,
|
|
"loss": 0.5858355760574341,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 2.6072992700729927,
|
|
"grad_norm": 0.6226435899734497,
|
|
"learning_rate": 4.6502580011217934e-07,
|
|
"loss": 0.5983865261077881,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 2.6085969180859694,
|
|
"grad_norm": 0.6833674311637878,
|
|
"learning_rate": 4.6201938664660775e-07,
|
|
"loss": 0.6065071225166321,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 2.6098945660989457,
|
|
"grad_norm": 0.6266833543777466,
|
|
"learning_rate": 4.590222522028082e-07,
|
|
"loss": 0.5968768000602722,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 2.611192214111922,
|
|
"grad_norm": 0.6198201179504395,
|
|
"learning_rate": 4.5603440290916347e-07,
|
|
"loss": 0.6149097681045532,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 2.6124898621248986,
|
|
"grad_norm": 0.6224921941757202,
|
|
"learning_rate": 4.5305584487506605e-07,
|
|
"loss": 0.6195799708366394,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 2.6137875101378754,
|
|
"grad_norm": 0.5922067165374756,
|
|
"learning_rate": 4.500865841909169e-07,
|
|
"loss": 0.5795333385467529,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 2.6150851581508516,
|
|
"grad_norm": 0.6451519727706909,
|
|
"learning_rate": 4.471266269280994e-07,
|
|
"loss": 0.6512206196784973,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 2.616382806163828,
|
|
"grad_norm": 0.6207348108291626,
|
|
"learning_rate": 4.441759791389799e-07,
|
|
"loss": 0.6410412788391113,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 2.6176804541768046,
|
|
"grad_norm": 0.6637576818466187,
|
|
"learning_rate": 4.41234646856884e-07,
|
|
"loss": 0.5507533550262451,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 2.618978102189781,
|
|
"grad_norm": 0.6296217441558838,
|
|
"learning_rate": 4.383026360960929e-07,
|
|
"loss": 0.5853258371353149,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 2.6202757502027576,
|
|
"grad_norm": 0.5993384122848511,
|
|
"learning_rate": 4.35379952851826e-07,
|
|
"loss": 0.5613459944725037,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 2.621573398215734,
|
|
"grad_norm": 0.6372536420822144,
|
|
"learning_rate": 4.324666031002311e-07,
|
|
"loss": 0.563460111618042,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 2.6228710462287106,
|
|
"grad_norm": 0.6129400134086609,
|
|
"learning_rate": 4.29562592798371e-07,
|
|
"loss": 0.6133362650871277,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 2.624168694241687,
|
|
"grad_norm": 0.6232635974884033,
|
|
"learning_rate": 4.266679278842123e-07,
|
|
"loss": 0.5923752784729004,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 2.6254663422546636,
|
|
"grad_norm": 0.6236964464187622,
|
|
"learning_rate": 4.2378261427660994e-07,
|
|
"loss": 0.5925074815750122,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 2.62676399026764,
|
|
"grad_norm": 0.5997064113616943,
|
|
"learning_rate": 4.209066578753035e-07,
|
|
"loss": 0.5586100816726685,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 2.6280616382806166,
|
|
"grad_norm": 0.6276852488517761,
|
|
"learning_rate": 4.1804006456089174e-07,
|
|
"loss": 0.5699270367622375,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 2.629359286293593,
|
|
"grad_norm": 0.5818026065826416,
|
|
"learning_rate": 4.1518284019483655e-07,
|
|
"loss": 0.5539983510971069,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 2.630656934306569,
|
|
"grad_norm": 0.6021342277526855,
|
|
"learning_rate": 4.123349906194357e-07,
|
|
"loss": 0.5571432709693909,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 2.631954582319546,
|
|
"grad_norm": 0.6044632196426392,
|
|
"learning_rate": 4.094965216578212e-07,
|
|
"loss": 0.5815938711166382,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 2.6332522303325225,
|
|
"grad_norm": 0.6218861937522888,
|
|
"learning_rate": 4.066674391139458e-07,
|
|
"loss": 0.5798450112342834,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 2.634549878345499,
|
|
"grad_norm": 0.6776529550552368,
|
|
"learning_rate": 4.038477487725645e-07,
|
|
"loss": 0.5181751251220703,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 2.635847526358475,
|
|
"grad_norm": 0.6296592354774475,
|
|
"learning_rate": 4.0103745639923144e-07,
|
|
"loss": 0.6052215695381165,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 2.637145174371452,
|
|
"grad_norm": 0.6410042643547058,
|
|
"learning_rate": 3.9823656774028386e-07,
|
|
"loss": 0.5471499562263489,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 2.638442822384428,
|
|
"grad_norm": 0.6148339509963989,
|
|
"learning_rate": 3.9544508852282895e-07,
|
|
"loss": 0.6046350002288818,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 2.639740470397405,
|
|
"grad_norm": 0.6409063935279846,
|
|
"learning_rate": 3.9266302445473634e-07,
|
|
"loss": 0.5563018918037415,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 2.641038118410381,
|
|
"grad_norm": 0.6377732157707214,
|
|
"learning_rate": 3.89890381224623e-07,
|
|
"loss": 0.5965743064880371,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 2.6423357664233578,
|
|
"grad_norm": 0.6147736310958862,
|
|
"learning_rate": 3.8712716450183985e-07,
|
|
"loss": 0.558821439743042,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 2.643633414436334,
|
|
"grad_norm": 0.5959088802337646,
|
|
"learning_rate": 3.8437337993647017e-07,
|
|
"loss": 0.6072096824645996,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 2.6449310624493108,
|
|
"grad_norm": 0.5934545993804932,
|
|
"learning_rate": 3.81629033159302e-07,
|
|
"loss": 0.5585888028144836,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 2.646228710462287,
|
|
"grad_norm": 0.6148179173469543,
|
|
"learning_rate": 3.7889412978183324e-07,
|
|
"loss": 0.6224203705787659,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 2.6475263584752637,
|
|
"grad_norm": 0.6041895151138306,
|
|
"learning_rate": 3.7616867539624733e-07,
|
|
"loss": 0.5594790577888489,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 2.64882400648824,
|
|
"grad_norm": 0.6036660075187683,
|
|
"learning_rate": 3.734526755754092e-07,
|
|
"loss": 0.5392581820487976,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 2.6501216545012163,
|
|
"grad_norm": 0.6497801542282104,
|
|
"learning_rate": 3.707461358728509e-07,
|
|
"loss": 0.645263135433197,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 2.651419302514193,
|
|
"grad_norm": 0.6202139258384705,
|
|
"learning_rate": 3.680490618227611e-07,
|
|
"loss": 0.6205359697341919,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 2.6527169505271697,
|
|
"grad_norm": 0.5867362022399902,
|
|
"learning_rate": 3.6536145893997346e-07,
|
|
"loss": 0.5754397511482239,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 2.654014598540146,
|
|
"grad_norm": 0.6415355205535889,
|
|
"learning_rate": 3.626833327199564e-07,
|
|
"loss": 0.6042582392692566,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 2.6553122465531223,
|
|
"grad_norm": 0.6417367458343506,
|
|
"learning_rate": 3.600146886387984e-07,
|
|
"loss": 0.6140678524971008,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 2.656609894566099,
|
|
"grad_norm": 0.6080589890480042,
|
|
"learning_rate": 3.573555321532035e-07,
|
|
"loss": 0.574844241142273,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 2.6579075425790757,
|
|
"grad_norm": 0.6920068264007568,
|
|
"learning_rate": 3.547058687004723e-07,
|
|
"loss": 0.6025684475898743,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 2.659205190592052,
|
|
"grad_norm": 0.6130858659744263,
|
|
"learning_rate": 3.520657036984959e-07,
|
|
"loss": 0.5683197379112244,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 2.6605028386050282,
|
|
"grad_norm": 0.6280376315116882,
|
|
"learning_rate": 3.494350425457438e-07,
|
|
"loss": 0.5609173774719238,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 2.661800486618005,
|
|
"grad_norm": 0.6326773166656494,
|
|
"learning_rate": 3.46813890621252e-07,
|
|
"loss": 0.5946630239486694,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 2.663098134630981,
|
|
"grad_norm": 0.6118667721748352,
|
|
"learning_rate": 3.4420225328461286e-07,
|
|
"loss": 0.5908790826797485,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 2.664395782643958,
|
|
"grad_norm": 0.6427050828933716,
|
|
"learning_rate": 3.416001358759635e-07,
|
|
"loss": 0.6200711727142334,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 2.665693430656934,
|
|
"grad_norm": 0.6258965730667114,
|
|
"learning_rate": 3.390075437159762e-07,
|
|
"loss": 0.6091062426567078,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 2.666991078669911,
|
|
"grad_norm": 0.8197891116142273,
|
|
"learning_rate": 3.36424482105846e-07,
|
|
"loss": 0.6184768676757812,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 2.668288726682887,
|
|
"grad_norm": 0.6219103336334229,
|
|
"learning_rate": 3.338509563272774e-07,
|
|
"loss": 0.5699069499969482,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 2.669586374695864,
|
|
"grad_norm": 0.6160385012626648,
|
|
"learning_rate": 3.3128697164248213e-07,
|
|
"loss": 0.6063632369041443,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 2.67088402270884,
|
|
"grad_norm": 0.6377853155136108,
|
|
"learning_rate": 3.2873253329415986e-07,
|
|
"loss": 0.6303044557571411,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 2.672181670721817,
|
|
"grad_norm": 0.6218414306640625,
|
|
"learning_rate": 3.2618764650548806e-07,
|
|
"loss": 0.5987715721130371,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 2.673479318734793,
|
|
"grad_norm": 0.6107571125030518,
|
|
"learning_rate": 3.236523164801192e-07,
|
|
"loss": 0.5237259864807129,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 2.6747769667477694,
|
|
"grad_norm": 0.6305319666862488,
|
|
"learning_rate": 3.2112654840215863e-07,
|
|
"loss": 0.6254755854606628,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 2.676074614760746,
|
|
"grad_norm": 0.6144214868545532,
|
|
"learning_rate": 3.186103474361646e-07,
|
|
"loss": 0.6048131585121155,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 2.677372262773723,
|
|
"grad_norm": 0.6124334335327148,
|
|
"learning_rate": 3.161037187271304e-07,
|
|
"loss": 0.5881555080413818,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 2.678669910786699,
|
|
"grad_norm": 0.6141470670700073,
|
|
"learning_rate": 3.136066674004773e-07,
|
|
"loss": 0.5876516103744507,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 2.6799675587996754,
|
|
"grad_norm": 0.5808926820755005,
|
|
"learning_rate": 3.1111919856204373e-07,
|
|
"loss": 0.5583111047744751,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 2.681265206812652,
|
|
"grad_norm": 0.663599967956543,
|
|
"learning_rate": 3.08641317298074e-07,
|
|
"loss": 0.5772061944007874,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 2.6825628548256284,
|
|
"grad_norm": 0.6320760846138,
|
|
"learning_rate": 3.0617302867520736e-07,
|
|
"loss": 0.5595476031303406,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 2.683860502838605,
|
|
"grad_norm": 0.61170494556427,
|
|
"learning_rate": 3.0371433774047056e-07,
|
|
"loss": 0.6012779474258423,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 2.6851581508515814,
|
|
"grad_norm": 0.6115148067474365,
|
|
"learning_rate": 3.0126524952126203e-07,
|
|
"loss": 0.6057910919189453,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 2.6851581508515814,
|
|
"eval_loss": 0.6819512844085693,
|
|
"eval_runtime": 72.9512,
|
|
"eval_samples_per_second": 71.171,
|
|
"eval_steps_per_second": 8.896,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 2.686455798864558,
|
|
"grad_norm": 0.6251775026321411,
|
|
"learning_rate": 2.988257690253504e-07,
|
|
"loss": 0.6118081212043762,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 2.6877534468775344,
|
|
"grad_norm": 0.6253253221511841,
|
|
"learning_rate": 2.9639590124085296e-07,
|
|
"loss": 0.6572234630584717,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 2.689051094890511,
|
|
"grad_norm": 0.6017980575561523,
|
|
"learning_rate": 2.939756511362357e-07,
|
|
"loss": 0.5534753799438477,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 2.6903487429034874,
|
|
"grad_norm": 0.6164457201957703,
|
|
"learning_rate": 2.915650236602974e-07,
|
|
"loss": 0.6046677827835083,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 2.691646390916464,
|
|
"grad_norm": 0.6189885139465332,
|
|
"learning_rate": 2.891640237421611e-07,
|
|
"loss": 0.6001750826835632,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 2.6929440389294403,
|
|
"grad_norm": 0.6118842959403992,
|
|
"learning_rate": 2.8677265629126373e-07,
|
|
"loss": 0.5822157263755798,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 2.6942416869424166,
|
|
"grad_norm": 0.6505289673805237,
|
|
"learning_rate": 2.8439092619734655e-07,
|
|
"loss": 0.6047310829162598,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 2.6955393349553933,
|
|
"grad_norm": 0.6261717081069946,
|
|
"learning_rate": 2.820188383304451e-07,
|
|
"loss": 0.5709232687950134,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 2.69683698296837,
|
|
"grad_norm": 0.591399610042572,
|
|
"learning_rate": 2.7965639754087893e-07,
|
|
"loss": 0.5760236382484436,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 2.6981346309813463,
|
|
"grad_norm": 0.6267626881599426,
|
|
"learning_rate": 2.7730360865923954e-07,
|
|
"loss": 0.627373218536377,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 2.6994322789943226,
|
|
"grad_norm": 0.5880517959594727,
|
|
"learning_rate": 2.7496047649638757e-07,
|
|
"loss": 0.556127667427063,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 2.7007299270072993,
|
|
"grad_norm": 0.6221486926078796,
|
|
"learning_rate": 2.726270058434327e-07,
|
|
"loss": 0.6388289332389832,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 2.702027575020276,
|
|
"grad_norm": 0.6296391487121582,
|
|
"learning_rate": 2.703032014717333e-07,
|
|
"loss": 0.6471085548400879,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 2.7033252230332523,
|
|
"grad_norm": 0.6119943261146545,
|
|
"learning_rate": 2.6798906813288117e-07,
|
|
"loss": 0.587184488773346,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 2.7046228710462286,
|
|
"grad_norm": 0.5858760476112366,
|
|
"learning_rate": 2.656846105586919e-07,
|
|
"loss": 0.6001055836677551,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 2.7059205190592053,
|
|
"grad_norm": 0.6214133501052856,
|
|
"learning_rate": 2.633898334611995e-07,
|
|
"loss": 0.6275671720504761,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 2.7072181670721815,
|
|
"grad_norm": 0.5908603668212891,
|
|
"learning_rate": 2.6110474153264176e-07,
|
|
"loss": 0.5731199979782104,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 2.7085158150851583,
|
|
"grad_norm": 0.5500771403312683,
|
|
"learning_rate": 2.588293394454533e-07,
|
|
"loss": 0.5535600781440735,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 2.7098134630981345,
|
|
"grad_norm": 0.6212435364723206,
|
|
"learning_rate": 2.565636318522552e-07,
|
|
"loss": 0.6325974464416504,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 2.7111111111111112,
|
|
"grad_norm": 0.5896530747413635,
|
|
"learning_rate": 2.543076233858466e-07,
|
|
"loss": 0.564407229423523,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 2.7124087591240875,
|
|
"grad_norm": 0.6151485443115234,
|
|
"learning_rate": 2.5206131865919303e-07,
|
|
"loss": 0.5890393257141113,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 2.7137064071370642,
|
|
"grad_norm": 0.5984410643577576,
|
|
"learning_rate": 2.4982472226542045e-07,
|
|
"loss": 0.5423193573951721,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 2.7150040551500405,
|
|
"grad_norm": 0.6220104694366455,
|
|
"learning_rate": 2.475978387778e-07,
|
|
"loss": 0.5741702318191528,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 2.7163017031630172,
|
|
"grad_norm": 0.64532470703125,
|
|
"learning_rate": 2.453806727497482e-07,
|
|
"loss": 0.578140914440155,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 2.7175993511759935,
|
|
"grad_norm": 0.6362125277519226,
|
|
"learning_rate": 2.431732287148053e-07,
|
|
"loss": 0.6103841066360474,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 2.7188969991889698,
|
|
"grad_norm": 0.6365206837654114,
|
|
"learning_rate": 2.409755111866369e-07,
|
|
"loss": 0.6380729079246521,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 2.7201946472019465,
|
|
"grad_norm": 0.6440710425376892,
|
|
"learning_rate": 2.387875246590193e-07,
|
|
"loss": 0.5572207570075989,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 2.721492295214923,
|
|
"grad_norm": 0.6295807361602783,
|
|
"learning_rate": 2.3660927360583064e-07,
|
|
"loss": 0.6024692058563232,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 2.7227899432278995,
|
|
"grad_norm": 0.5711405873298645,
|
|
"learning_rate": 2.3444076248104297e-07,
|
|
"loss": 0.5038433074951172,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 2.7240875912408757,
|
|
"grad_norm": 0.7995308637619019,
|
|
"learning_rate": 2.322819957187139e-07,
|
|
"loss": 0.6232460737228394,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 2.7253852392538525,
|
|
"grad_norm": 0.5909203886985779,
|
|
"learning_rate": 2.3013297773297306e-07,
|
|
"loss": 0.5349663496017456,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 2.7266828872668287,
|
|
"grad_norm": 0.6373469829559326,
|
|
"learning_rate": 2.279937129180204e-07,
|
|
"loss": 0.5974945425987244,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 2.7279805352798054,
|
|
"grad_norm": 0.6128799915313721,
|
|
"learning_rate": 2.2586420564810863e-07,
|
|
"loss": 0.5850982069969177,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 2.7292781832927817,
|
|
"grad_norm": 0.6667084097862244,
|
|
"learning_rate": 2.2374446027754405e-07,
|
|
"loss": 0.5952577590942383,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 2.7305758313057584,
|
|
"grad_norm": 0.6103115081787109,
|
|
"learning_rate": 2.2163448114066677e-07,
|
|
"loss": 0.5764719247817993,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 2.7318734793187347,
|
|
"grad_norm": 0.5843047499656677,
|
|
"learning_rate": 2.1953427255185122e-07,
|
|
"loss": 0.5831491947174072,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 2.7331711273317114,
|
|
"grad_norm": 0.6300417184829712,
|
|
"learning_rate": 2.174438388054928e-07,
|
|
"loss": 0.5893597602844238,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 2.7344687753446877,
|
|
"grad_norm": 0.601433515548706,
|
|
"learning_rate": 2.1536318417599844e-07,
|
|
"loss": 0.5604301691055298,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 2.7357664233576644,
|
|
"grad_norm": 0.6220826506614685,
|
|
"learning_rate": 2.1329231291778108e-07,
|
|
"loss": 0.6189798712730408,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 2.7370640713706407,
|
|
"grad_norm": 0.5895432233810425,
|
|
"learning_rate": 2.1123122926524853e-07,
|
|
"loss": 0.5561822652816772,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 2.738361719383617,
|
|
"grad_norm": 0.8975700736045837,
|
|
"learning_rate": 2.0917993743279297e-07,
|
|
"loss": 0.552111029624939,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 2.7396593673965937,
|
|
"grad_norm": 0.5886269211769104,
|
|
"learning_rate": 2.0713844161479035e-07,
|
|
"loss": 0.5910426378250122,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 2.7409570154095704,
|
|
"grad_norm": 0.5890198945999146,
|
|
"learning_rate": 2.0510674598558045e-07,
|
|
"loss": 0.5544984936714172,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 2.7422546634225466,
|
|
"grad_norm": 0.6140372157096863,
|
|
"learning_rate": 2.0308485469946736e-07,
|
|
"loss": 0.6121523380279541,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 2.743552311435523,
|
|
"grad_norm": 0.5979804396629333,
|
|
"learning_rate": 2.010727718907074e-07,
|
|
"loss": 0.5417115688323975,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 2.7448499594484996,
|
|
"grad_norm": 0.6019598841667175,
|
|
"learning_rate": 1.9907050167349894e-07,
|
|
"loss": 0.5624793171882629,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 2.7461476074614763,
|
|
"grad_norm": 0.6011685132980347,
|
|
"learning_rate": 1.9707804814198096e-07,
|
|
"loss": 0.5510683655738831,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 2.7474452554744526,
|
|
"grad_norm": 0.5924180746078491,
|
|
"learning_rate": 1.9509541537021392e-07,
|
|
"loss": 0.5276060104370117,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 2.748742903487429,
|
|
"grad_norm": 0.6053572297096252,
|
|
"learning_rate": 1.9312260741218114e-07,
|
|
"loss": 0.5551567673683167,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 2.7500405515004056,
|
|
"grad_norm": 0.620968222618103,
|
|
"learning_rate": 1.911596283017747e-07,
|
|
"loss": 0.5851413011550903,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 2.751338199513382,
|
|
"grad_norm": 0.626413881778717,
|
|
"learning_rate": 1.8920648205279113e-07,
|
|
"loss": 0.5591974258422852,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 2.7526358475263586,
|
|
"grad_norm": 0.615846574306488,
|
|
"learning_rate": 1.8726317265891968e-07,
|
|
"loss": 0.5918228626251221,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 2.753933495539335,
|
|
"grad_norm": 0.645077645778656,
|
|
"learning_rate": 1.8532970409373684e-07,
|
|
"loss": 0.5714014172554016,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 2.7552311435523116,
|
|
"grad_norm": 0.6882081031799316,
|
|
"learning_rate": 1.8340608031069462e-07,
|
|
"loss": 0.6177914142608643,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 2.756528791565288,
|
|
"grad_norm": 0.6870415806770325,
|
|
"learning_rate": 1.8149230524311944e-07,
|
|
"loss": 0.6026558876037598,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 2.757826439578264,
|
|
"grad_norm": 0.5801068544387817,
|
|
"learning_rate": 1.7958838280419387e-07,
|
|
"loss": 0.5492424964904785,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 2.759124087591241,
|
|
"grad_norm": 0.6277424693107605,
|
|
"learning_rate": 1.7769431688696048e-07,
|
|
"loss": 0.5704351663589478,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 2.7604217356042176,
|
|
"grad_norm": 0.6131430864334106,
|
|
"learning_rate": 1.7581011136430238e-07,
|
|
"loss": 0.6227852702140808,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 2.761719383617194,
|
|
"grad_norm": 0.6621940732002258,
|
|
"learning_rate": 1.739357700889438e-07,
|
|
"loss": 0.5971069931983948,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 2.76301703163017,
|
|
"grad_norm": 0.6566265225410461,
|
|
"learning_rate": 1.720712968934385e-07,
|
|
"loss": 0.6617914438247681,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 2.764314679643147,
|
|
"grad_norm": 0.6165506839752197,
|
|
"learning_rate": 1.7021669559016184e-07,
|
|
"loss": 0.5680196285247803,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 2.7656123276561235,
|
|
"grad_norm": 0.6046646237373352,
|
|
"learning_rate": 1.6837196997130434e-07,
|
|
"loss": 0.605772078037262,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 2.7669099756691,
|
|
"grad_norm": 0.6838919520378113,
|
|
"learning_rate": 1.6653712380886366e-07,
|
|
"loss": 0.5754232406616211,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 2.768207623682076,
|
|
"grad_norm": 0.6096740365028381,
|
|
"learning_rate": 1.6471216085463372e-07,
|
|
"loss": 0.5173358917236328,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 2.769505271695053,
|
|
"grad_norm": 0.6066602468490601,
|
|
"learning_rate": 1.6289708484020395e-07,
|
|
"loss": 0.5950397253036499,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 2.770802919708029,
|
|
"grad_norm": 0.609034538269043,
|
|
"learning_rate": 1.6109189947694448e-07,
|
|
"loss": 0.5427603721618652,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 2.7721005677210058,
|
|
"grad_norm": 0.6451703906059265,
|
|
"learning_rate": 1.5929660845600215e-07,
|
|
"loss": 0.6046600341796875,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 2.773398215733982,
|
|
"grad_norm": 0.5977014899253845,
|
|
"learning_rate": 1.575112154482933e-07,
|
|
"loss": 0.5849440693855286,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 2.7746958637469588,
|
|
"grad_norm": 0.6242566108703613,
|
|
"learning_rate": 1.557357241044949e-07,
|
|
"loss": 0.6496338844299316,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 2.775993511759935,
|
|
"grad_norm": 0.5988749265670776,
|
|
"learning_rate": 1.539701380550368e-07,
|
|
"loss": 0.5334508419036865,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 2.7772911597729117,
|
|
"grad_norm": 0.7054303288459778,
|
|
"learning_rate": 1.5221446091009618e-07,
|
|
"loss": 0.4878901541233063,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 2.778588807785888,
|
|
"grad_norm": 0.6645851731300354,
|
|
"learning_rate": 1.504686962595875e-07,
|
|
"loss": 0.6245031356811523,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 2.7798864557988647,
|
|
"grad_norm": 0.6102975606918335,
|
|
"learning_rate": 1.4873284767315864e-07,
|
|
"loss": 0.5180703997612,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 2.781184103811841,
|
|
"grad_norm": 0.6466278433799744,
|
|
"learning_rate": 1.4700691870017991e-07,
|
|
"loss": 0.5804831981658936,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 2.7824817518248173,
|
|
"grad_norm": 0.639724612236023,
|
|
"learning_rate": 1.4529091286973994e-07,
|
|
"loss": 0.6196957230567932,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 2.783779399837794,
|
|
"grad_norm": 0.6038338541984558,
|
|
"learning_rate": 1.435848336906359e-07,
|
|
"loss": 0.5739912986755371,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 2.7850770478507707,
|
|
"grad_norm": 0.6094257831573486,
|
|
"learning_rate": 1.418886846513673e-07,
|
|
"loss": 0.6085304021835327,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 2.786374695863747,
|
|
"grad_norm": 0.6370331048965454,
|
|
"learning_rate": 1.4020246922013093e-07,
|
|
"loss": 0.572968065738678,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 2.7876723438767232,
|
|
"grad_norm": 0.5946716666221619,
|
|
"learning_rate": 1.3852619084480933e-07,
|
|
"loss": 0.5418939590454102,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 2.7889699918897,
|
|
"grad_norm": 0.6075360774993896,
|
|
"learning_rate": 1.3685985295296798e-07,
|
|
"loss": 0.5994930267333984,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 2.7902676399026762,
|
|
"grad_norm": 0.6279016733169556,
|
|
"learning_rate": 1.3520345895184583e-07,
|
|
"loss": 0.5570014715194702,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 2.791565287915653,
|
|
"grad_norm": 0.6306800246238708,
|
|
"learning_rate": 1.3355701222835026e-07,
|
|
"loss": 0.5708903074264526,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 2.792862935928629,
|
|
"grad_norm": 0.6170070171356201,
|
|
"learning_rate": 1.3192051614904722e-07,
|
|
"loss": 0.550320029258728,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 2.794160583941606,
|
|
"grad_norm": 0.6288532018661499,
|
|
"learning_rate": 1.302939740601572e-07,
|
|
"loss": 0.613933801651001,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 2.795458231954582,
|
|
"grad_norm": 0.6111281514167786,
|
|
"learning_rate": 1.2867738928754703e-07,
|
|
"loss": 0.5617604851722717,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 2.796755879967559,
|
|
"grad_norm": 0.9522826075553894,
|
|
"learning_rate": 1.2707076513672423e-07,
|
|
"loss": 0.5882472395896912,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 2.798053527980535,
|
|
"grad_norm": 0.6296880841255188,
|
|
"learning_rate": 1.2547410489282708e-07,
|
|
"loss": 0.559617280960083,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 2.799351175993512,
|
|
"grad_norm": 0.6598941087722778,
|
|
"learning_rate": 1.2388741182062348e-07,
|
|
"loss": 0.5574393272399902,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 2.800648824006488,
|
|
"grad_norm": 0.5911178588867188,
|
|
"learning_rate": 1.2231068916449705e-07,
|
|
"loss": 0.5624610185623169,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 2.8019464720194645,
|
|
"grad_norm": 0.6504255533218384,
|
|
"learning_rate": 1.2074394014844782e-07,
|
|
"loss": 0.6260690689086914,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 2.803244120032441,
|
|
"grad_norm": 0.6211426258087158,
|
|
"learning_rate": 1.1918716797608087e-07,
|
|
"loss": 0.6100113391876221,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 2.804541768045418,
|
|
"grad_norm": 0.6233659386634827,
|
|
"learning_rate": 1.1764037583060162e-07,
|
|
"loss": 0.5747858285903931,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 2.805839416058394,
|
|
"grad_norm": 0.6078013777732849,
|
|
"learning_rate": 1.1610356687480728e-07,
|
|
"loss": 0.5918527841567993,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 2.8071370640713704,
|
|
"grad_norm": 0.6079197525978088,
|
|
"learning_rate": 1.1457674425108478e-07,
|
|
"loss": 0.5714898109436035,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 2.808434712084347,
|
|
"grad_norm": 0.5850006937980652,
|
|
"learning_rate": 1.1305991108139847e-07,
|
|
"loss": 0.5996066927909851,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 2.809732360097324,
|
|
"grad_norm": 0.6206707954406738,
|
|
"learning_rate": 1.1155307046728958e-07,
|
|
"loss": 0.55565345287323,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 2.8110300081103,
|
|
"grad_norm": 0.6294933557510376,
|
|
"learning_rate": 1.1005622548986406e-07,
|
|
"loss": 0.5798709392547607,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 2.8123276561232764,
|
|
"grad_norm": 0.6298512816429138,
|
|
"learning_rate": 1.0856937920979305e-07,
|
|
"loss": 0.5979269742965698,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 2.813625304136253,
|
|
"grad_norm": 0.6053575277328491,
|
|
"learning_rate": 1.0709253466729963e-07,
|
|
"loss": 0.5668598413467407,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 2.8149229521492294,
|
|
"grad_norm": 0.6343475580215454,
|
|
"learning_rate": 1.0562569488215712e-07,
|
|
"loss": 0.6248285174369812,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 2.816220600162206,
|
|
"grad_norm": 0.6348695755004883,
|
|
"learning_rate": 1.0416886285368188e-07,
|
|
"loss": 0.5982720851898193,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 2.8175182481751824,
|
|
"grad_norm": 0.6075454354286194,
|
|
"learning_rate": 1.0272204156072663e-07,
|
|
"loss": 0.580233097076416,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 2.818815896188159,
|
|
"grad_norm": 0.6037595272064209,
|
|
"learning_rate": 1.012852339616749e-07,
|
|
"loss": 0.549045205116272,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 2.8201135442011354,
|
|
"grad_norm": 0.6013658046722412,
|
|
"learning_rate": 9.985844299443437e-08,
|
|
"loss": 0.5709958672523499,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 2.821411192214112,
|
|
"grad_norm": 0.6192932724952698,
|
|
"learning_rate": 9.844167157643191e-08,
|
|
"loss": 0.5936025381088257,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 2.8227088402270883,
|
|
"grad_norm": 0.6013957858085632,
|
|
"learning_rate": 9.703492260460578e-08,
|
|
"loss": 0.5784536600112915,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 2.824006488240065,
|
|
"grad_norm": 0.6452348232269287,
|
|
"learning_rate": 9.563819895540172e-08,
|
|
"loss": 0.6597691774368286,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 2.8253041362530413,
|
|
"grad_norm": 0.6121287941932678,
|
|
"learning_rate": 9.42515034847663e-08,
|
|
"loss": 0.6041057705879211,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 2.8266017842660176,
|
|
"grad_norm": 0.6265618801116943,
|
|
"learning_rate": 9.287483902814087e-08,
|
|
"loss": 0.5931543707847595,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 2.8278994322789943,
|
|
"grad_norm": 0.6284413933753967,
|
|
"learning_rate": 9.150820840045483e-08,
|
|
"loss": 0.5969519019126892,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 2.829197080291971,
|
|
"grad_norm": 0.6021496057510376,
|
|
"learning_rate": 9.015161439612396e-08,
|
|
"loss": 0.6106084585189819,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 2.8304947283049473,
|
|
"grad_norm": 0.6177151203155518,
|
|
"learning_rate": 8.880505978903719e-08,
|
|
"loss": 0.6132292151451111,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 2.8317923763179236,
|
|
"grad_norm": 0.6375380754470825,
|
|
"learning_rate": 8.746854733255982e-08,
|
|
"loss": 0.5775139927864075,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 2.8330900243309003,
|
|
"grad_norm": 0.623674750328064,
|
|
"learning_rate": 8.614207975952083e-08,
|
|
"loss": 0.5772640705108643,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 2.8343876723438766,
|
|
"grad_norm": 2.0252397060394287,
|
|
"learning_rate": 8.482565978221002e-08,
|
|
"loss": 0.6038268804550171,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 2.8356853203568533,
|
|
"grad_norm": 0.6209124326705933,
|
|
"learning_rate": 8.351929009237425e-08,
|
|
"loss": 0.5768431425094604,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 2.8369829683698295,
|
|
"grad_norm": 0.6141339540481567,
|
|
"learning_rate": 8.222297336120844e-08,
|
|
"loss": 0.6076856851577759,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 2.8382806163828063,
|
|
"grad_norm": 0.615900456905365,
|
|
"learning_rate": 8.093671223935118e-08,
|
|
"loss": 0.5514330267906189,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 2.8395782643957825,
|
|
"grad_norm": 0.6074119210243225,
|
|
"learning_rate": 7.966050935688252e-08,
|
|
"loss": 0.5663487911224365,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 2.8408759124087593,
|
|
"grad_norm": 0.6119362711906433,
|
|
"learning_rate": 7.839436732331285e-08,
|
|
"loss": 0.5301929712295532,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 2.8421735604217355,
|
|
"grad_norm": 0.6157346963882446,
|
|
"learning_rate": 7.7138288727584e-08,
|
|
"loss": 0.5847445130348206,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 2.8434712084347122,
|
|
"grad_norm": 0.8324165940284729,
|
|
"learning_rate": 7.589227613805705e-08,
|
|
"loss": 0.6258946061134338,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 2.8447688564476885,
|
|
"grad_norm": 0.6051367521286011,
|
|
"learning_rate": 7.465633210251344e-08,
|
|
"loss": 0.6049424409866333,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 2.846066504460665,
|
|
"grad_norm": 0.6111598610877991,
|
|
"learning_rate": 7.343045914814495e-08,
|
|
"loss": 0.615462601184845,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 2.8473641524736415,
|
|
"grad_norm": 0.6303842663764954,
|
|
"learning_rate": 7.221465978155262e-08,
|
|
"loss": 0.5582486987113953,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 2.848661800486618,
|
|
"grad_norm": 0.6294355392456055,
|
|
"learning_rate": 7.10089364887373e-08,
|
|
"loss": 0.5927014946937561,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 2.8499594484995945,
|
|
"grad_norm": 0.6469996571540833,
|
|
"learning_rate": 6.981329173509909e-08,
|
|
"loss": 0.639467179775238,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 2.8512570965125708,
|
|
"grad_norm": 0.5986980199813843,
|
|
"learning_rate": 6.862772796542794e-08,
|
|
"loss": 0.6210333704948425,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 2.8525547445255475,
|
|
"grad_norm": 0.6324379444122314,
|
|
"learning_rate": 6.745224760390246e-08,
|
|
"loss": 0.5866251587867737,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 2.853852392538524,
|
|
"grad_norm": 0.6159996390342712,
|
|
"learning_rate": 6.628685305408166e-08,
|
|
"loss": 0.5464287996292114,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 2.8551500405515005,
|
|
"grad_norm": 0.6313933730125427,
|
|
"learning_rate": 6.513154669890221e-08,
|
|
"loss": 0.5239887237548828,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 2.8564476885644767,
|
|
"grad_norm": 0.6151242852210999,
|
|
"learning_rate": 6.398633090067497e-08,
|
|
"loss": 0.5513571500778198,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 2.8577453365774534,
|
|
"grad_norm": 0.6272878646850586,
|
|
"learning_rate": 6.285120800107402e-08,
|
|
"loss": 0.5711073875427246,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 2.8590429845904297,
|
|
"grad_norm": 0.6284655928611755,
|
|
"learning_rate": 6.172618032114108e-08,
|
|
"loss": 0.5585539937019348,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 2.8603406326034064,
|
|
"grad_norm": 0.6207369565963745,
|
|
"learning_rate": 6.061125016127045e-08,
|
|
"loss": 0.6215085983276367,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 2.8616382806163827,
|
|
"grad_norm": 0.5943953394889832,
|
|
"learning_rate": 5.950641980121352e-08,
|
|
"loss": 0.5761866569519043,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 2.8629359286293594,
|
|
"grad_norm": 0.6034578680992126,
|
|
"learning_rate": 5.84116915000682e-08,
|
|
"loss": 0.5912197828292847,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 2.8642335766423357,
|
|
"grad_norm": 0.612192690372467,
|
|
"learning_rate": 5.732706749627726e-08,
|
|
"loss": 0.5836058855056763,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 2.8655312246553124,
|
|
"grad_norm": 0.5961986780166626,
|
|
"learning_rate": 5.6252550007621645e-08,
|
|
"loss": 0.6387939453125,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 2.8668288726682887,
|
|
"grad_norm": 0.6482071876525879,
|
|
"learning_rate": 5.518814123121885e-08,
|
|
"loss": 0.5909046530723572,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 2.8681265206812654,
|
|
"grad_norm": 0.6003962755203247,
|
|
"learning_rate": 5.413384334351346e-08,
|
|
"loss": 0.5739219188690186,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 2.8694241686942417,
|
|
"grad_norm": 0.6164109110832214,
|
|
"learning_rate": 5.308965850027992e-08,
|
|
"loss": 0.5988886952400208,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 2.870721816707218,
|
|
"grad_norm": 0.6417747139930725,
|
|
"learning_rate": 5.205558883661033e-08,
|
|
"loss": 0.6298974752426147,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 2.8720194647201946,
|
|
"grad_norm": 0.6133490204811096,
|
|
"learning_rate": 5.103163646691611e-08,
|
|
"loss": 0.584977388381958,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 2.8733171127331714,
|
|
"grad_norm": 0.611873984336853,
|
|
"learning_rate": 5.00178034849208e-08,
|
|
"loss": 0.5997759103775024,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 2.8746147607461476,
|
|
"grad_norm": 0.5938137173652649,
|
|
"learning_rate": 4.9014091963655584e-08,
|
|
"loss": 0.5509130954742432,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 2.875912408759124,
|
|
"grad_norm": 0.6484394073486328,
|
|
"learning_rate": 4.802050395545765e-08,
|
|
"loss": 0.6474854946136475,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 2.8772100567721006,
|
|
"grad_norm": 0.619995653629303,
|
|
"learning_rate": 4.703704149196187e-08,
|
|
"loss": 0.5942093133926392,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 2.878507704785077,
|
|
"grad_norm": 0.6322592496871948,
|
|
"learning_rate": 4.6063706584100196e-08,
|
|
"loss": 0.5504230856895447,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 2.8798053527980536,
|
|
"grad_norm": 0.6172313094139099,
|
|
"learning_rate": 4.5100501222097304e-08,
|
|
"loss": 0.677121639251709,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 2.88110300081103,
|
|
"grad_norm": 0.5940432548522949,
|
|
"learning_rate": 4.414742737546274e-08,
|
|
"loss": 0.593209981918335,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 2.8824006488240066,
|
|
"grad_norm": 0.8577704429626465,
|
|
"learning_rate": 4.320448699299262e-08,
|
|
"loss": 0.705782949924469,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 2.883698296836983,
|
|
"grad_norm": 0.6182291507720947,
|
|
"learning_rate": 4.227168200276077e-08,
|
|
"loss": 0.569422721862793,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 2.8849959448499596,
|
|
"grad_norm": 0.6797721982002258,
|
|
"learning_rate": 4.134901431211702e-08,
|
|
"loss": 0.6029517650604248,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 2.886293592862936,
|
|
"grad_norm": 0.5963630676269531,
|
|
"learning_rate": 4.043648580768389e-08,
|
|
"loss": 0.5859914422035217,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 2.8875912408759126,
|
|
"grad_norm": 0.5913455486297607,
|
|
"learning_rate": 3.953409835535049e-08,
|
|
"loss": 0.5406662225723267,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 2.888888888888889,
|
|
"grad_norm": 0.6106013655662537,
|
|
"learning_rate": 3.8641853800271414e-08,
|
|
"loss": 0.5755677223205566,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 2.890186536901865,
|
|
"grad_norm": 0.6029745936393738,
|
|
"learning_rate": 3.77597539668606e-08,
|
|
"loss": 0.5609725713729858,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 2.891484184914842,
|
|
"grad_norm": 0.6042892336845398,
|
|
"learning_rate": 3.688780065878916e-08,
|
|
"loss": 0.5605003237724304,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 2.8927818329278185,
|
|
"grad_norm": 0.5953066945075989,
|
|
"learning_rate": 3.602599565898091e-08,
|
|
"loss": 0.5514798164367676,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 2.894079480940795,
|
|
"grad_norm": 0.5983406901359558,
|
|
"learning_rate": 3.517434072960901e-08,
|
|
"loss": 0.6183291077613831,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 2.895377128953771,
|
|
"grad_norm": 0.5932295918464661,
|
|
"learning_rate": 3.433283761209161e-08,
|
|
"loss": 0.6106539964675903,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 2.896674776966748,
|
|
"grad_norm": 0.6509292721748352,
|
|
"learning_rate": 3.3501488027090635e-08,
|
|
"loss": 0.5615214109420776,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 2.8979724249797245,
|
|
"grad_norm": 0.613764762878418,
|
|
"learning_rate": 3.268029367450465e-08,
|
|
"loss": 0.6054869294166565,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 2.899270072992701,
|
|
"grad_norm": 0.6044638156890869,
|
|
"learning_rate": 3.186925623346882e-08,
|
|
"loss": 0.5691530704498291,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 2.900567721005677,
|
|
"grad_norm": 0.6060168147087097,
|
|
"learning_rate": 3.10683773623488e-08,
|
|
"loss": 0.5762636065483093,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 2.9018653690186538,
|
|
"grad_norm": 0.611011803150177,
|
|
"learning_rate": 3.0277658698739665e-08,
|
|
"loss": 0.5851128101348877,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 2.90316301703163,
|
|
"grad_norm": 0.6304229497909546,
|
|
"learning_rate": 2.9497101859460865e-08,
|
|
"loss": 0.5497856140136719,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 2.9044606650446068,
|
|
"grad_norm": 0.5783108472824097,
|
|
"learning_rate": 2.872670844055403e-08,
|
|
"loss": 0.5745448470115662,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 2.905758313057583,
|
|
"grad_norm": 0.609293520450592,
|
|
"learning_rate": 2.7966480017277974e-08,
|
|
"loss": 0.5522551536560059,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 2.9070559610705597,
|
|
"grad_norm": 0.5960776209831238,
|
|
"learning_rate": 2.7216418144107583e-08,
|
|
"loss": 0.5907799005508423,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 2.908353609083536,
|
|
"grad_norm": 0.793721079826355,
|
|
"learning_rate": 2.6476524354729917e-08,
|
|
"loss": 0.55716872215271,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 2.9096512570965127,
|
|
"grad_norm": 0.6245414614677429,
|
|
"learning_rate": 2.5746800162040342e-08,
|
|
"loss": 0.5835314989089966,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 2.910948905109489,
|
|
"grad_norm": 0.616008996963501,
|
|
"learning_rate": 2.5027247058139748e-08,
|
|
"loss": 0.594428300857544,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 2.9122465531224657,
|
|
"grad_norm": 0.5911674499511719,
|
|
"learning_rate": 2.4317866514332322e-08,
|
|
"loss": 0.5509923696517944,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 2.913544201135442,
|
|
"grad_norm": 0.6335274577140808,
|
|
"learning_rate": 2.361865998112223e-08,
|
|
"loss": 0.6094061732292175,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 2.9148418491484183,
|
|
"grad_norm": 0.6137773990631104,
|
|
"learning_rate": 2.2929628888209156e-08,
|
|
"loss": 0.6228293180465698,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 2.916139497161395,
|
|
"grad_norm": 0.6228021383285522,
|
|
"learning_rate": 2.2250774644487215e-08,
|
|
"loss": 0.5877048969268799,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 2.9174371451743717,
|
|
"grad_norm": 0.6152287125587463,
|
|
"learning_rate": 2.158209863804217e-08,
|
|
"loss": 0.6036567091941833,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 2.918734793187348,
|
|
"grad_norm": 0.6172757744789124,
|
|
"learning_rate": 2.0923602236146977e-08,
|
|
"loss": 0.5865423083305359,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 2.9200324412003242,
|
|
"grad_norm": 0.6071073412895203,
|
|
"learning_rate": 2.0275286785260694e-08,
|
|
"loss": 0.583999752998352,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 2.921330089213301,
|
|
"grad_norm": 0.6244159936904907,
|
|
"learning_rate": 1.9637153611022365e-08,
|
|
"loss": 0.5794707536697388,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 2.9226277372262772,
|
|
"grad_norm": 0.6465387940406799,
|
|
"learning_rate": 1.9009204018255456e-08,
|
|
"loss": 0.559209942817688,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 2.923925385239254,
|
|
"grad_norm": 0.6284136176109314,
|
|
"learning_rate": 1.839143929095566e-08,
|
|
"loss": 0.562762975692749,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 2.92522303325223,
|
|
"grad_norm": 0.6393802762031555,
|
|
"learning_rate": 1.7783860692296982e-08,
|
|
"loss": 0.6002349853515625,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 2.926520681265207,
|
|
"grad_norm": 0.6242037415504456,
|
|
"learning_rate": 1.718646946462288e-08,
|
|
"loss": 0.593687117099762,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 2.927818329278183,
|
|
"grad_norm": 0.6453087329864502,
|
|
"learning_rate": 1.6599266829447902e-08,
|
|
"loss": 0.6138840317726135,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 2.92911597729116,
|
|
"grad_norm": 0.632391095161438,
|
|
"learning_rate": 1.6022253987452717e-08,
|
|
"loss": 0.5360509157180786,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 2.930413625304136,
|
|
"grad_norm": 0.625159740447998,
|
|
"learning_rate": 1.5455432118481884e-08,
|
|
"loss": 0.6014057397842407,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 2.931711273317113,
|
|
"grad_norm": 0.6160334944725037,
|
|
"learning_rate": 1.4898802381543842e-08,
|
|
"loss": 0.5864812135696411,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 2.933008921330089,
|
|
"grad_norm": 0.6208499073982239,
|
|
"learning_rate": 1.4352365914804822e-08,
|
|
"loss": 0.5853984355926514,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 2.9343065693430654,
|
|
"grad_norm": 0.6147589087486267,
|
|
"learning_rate": 1.3816123835588835e-08,
|
|
"loss": 0.6146311163902283,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 2.935604217356042,
|
|
"grad_norm": 0.6171795129776001,
|
|
"learning_rate": 1.3290077240375453e-08,
|
|
"loss": 0.5833883285522461,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 2.936901865369019,
|
|
"grad_norm": 0.5844340920448303,
|
|
"learning_rate": 1.277422720479704e-08,
|
|
"loss": 0.6002391576766968,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 2.938199513381995,
|
|
"grad_norm": 0.6268512606620789,
|
|
"learning_rate": 1.2268574783635968e-08,
|
|
"loss": 0.6797309517860413,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 2.9394971613949714,
|
|
"grad_norm": 0.5872271656990051,
|
|
"learning_rate": 1.1773121010824063e-08,
|
|
"loss": 0.5867947936058044,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 2.940794809407948,
|
|
"grad_norm": 0.633165180683136,
|
|
"learning_rate": 1.1287866899438171e-08,
|
|
"loss": 0.6117358207702637,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 2.942092457420925,
|
|
"grad_norm": 0.5991867184638977,
|
|
"learning_rate": 1.081281344170071e-08,
|
|
"loss": 0.5292370319366455,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 2.943390105433901,
|
|
"grad_norm": 0.6432121396064758,
|
|
"learning_rate": 1.0347961608975221e-08,
|
|
"loss": 0.5962504148483276,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 2.9446877534468774,
|
|
"grad_norm": 0.6073801517486572,
|
|
"learning_rate": 9.893312351766382e-09,
|
|
"loss": 0.6454894542694092,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 2.945985401459854,
|
|
"grad_norm": 0.6156368851661682,
|
|
"learning_rate": 9.448866599717221e-09,
|
|
"loss": 0.5632429718971252,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 2.9472830494728304,
|
|
"grad_norm": 0.7083485126495361,
|
|
"learning_rate": 9.014625261605791e-09,
|
|
"loss": 0.5813943147659302,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 2.948580697485807,
|
|
"grad_norm": 0.6162700653076172,
|
|
"learning_rate": 8.590589225346834e-09,
|
|
"loss": 0.5752675533294678,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 2.9498783454987834,
|
|
"grad_norm": 0.610639750957489,
|
|
"learning_rate": 8.17675935798623e-09,
|
|
"loss": 0.6433367133140564,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 2.95117599351176,
|
|
"grad_norm": 0.5966771841049194,
|
|
"learning_rate": 7.773136505700995e-09,
|
|
"loss": 0.532874345779419,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 2.9524736415247363,
|
|
"grad_norm": 0.6585695743560791,
|
|
"learning_rate": 7.379721493798176e-09,
|
|
"loss": 0.5892356634140015,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 2.9537712895377126,
|
|
"grad_norm": 0.6081703901290894,
|
|
"learning_rate": 6.996515126711511e-09,
|
|
"loss": 0.5548315048217773,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 2.9550689375506893,
|
|
"grad_norm": 0.6258850693702698,
|
|
"learning_rate": 6.623518188001443e-09,
|
|
"loss": 0.5927635431289673,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 2.956366585563666,
|
|
"grad_norm": 0.6431419253349304,
|
|
"learning_rate": 6.260731440351775e-09,
|
|
"loss": 0.6057431101799011,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 2.9576642335766423,
|
|
"grad_norm": 0.621634840965271,
|
|
"learning_rate": 5.908155625570233e-09,
|
|
"loss": 0.5803443789482117,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 2.9589618815896186,
|
|
"grad_norm": 0.5794631838798523,
|
|
"learning_rate": 5.56579146458458e-09,
|
|
"loss": 0.5982474088668823,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 2.9602595296025953,
|
|
"grad_norm": 0.5987969040870667,
|
|
"learning_rate": 5.233639657443168e-09,
|
|
"loss": 0.6081230640411377,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 2.961557177615572,
|
|
"grad_norm": 0.6121331453323364,
|
|
"learning_rate": 4.911700883312165e-09,
|
|
"loss": 0.5589238405227661,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 2.9628548256285483,
|
|
"grad_norm": 0.6170937418937683,
|
|
"learning_rate": 4.599975800475553e-09,
|
|
"loss": 0.575406014919281,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 2.9641524736415246,
|
|
"grad_norm": 0.5928655862808228,
|
|
"learning_rate": 4.298465046331246e-09,
|
|
"loss": 0.588203489780426,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 2.9654501216545013,
|
|
"grad_norm": 0.6178304553031921,
|
|
"learning_rate": 4.007169237392749e-09,
|
|
"loss": 0.5311431288719177,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 2.9667477696674776,
|
|
"grad_norm": 0.6006078124046326,
|
|
"learning_rate": 3.726088969286945e-09,
|
|
"loss": 0.5917048454284668,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 2.9680454176804543,
|
|
"grad_norm": 0.6022590398788452,
|
|
"learning_rate": 3.4552248167507576e-09,
|
|
"loss": 0.5889644026756287,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 2.9693430656934305,
|
|
"grad_norm": 0.5813162922859192,
|
|
"learning_rate": 3.1945773336333754e-09,
|
|
"loss": 0.5726138353347778,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 2.9706407137064073,
|
|
"grad_norm": 0.6178452372550964,
|
|
"learning_rate": 2.9441470528929206e-09,
|
|
"loss": 0.6099365949630737,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 2.9719383617193835,
|
|
"grad_norm": 0.6197913289070129,
|
|
"learning_rate": 2.703934486595894e-09,
|
|
"loss": 0.6363242268562317,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 2.9732360097323602,
|
|
"grad_norm": 0.6046017408370972,
|
|
"learning_rate": 2.4739401259160635e-09,
|
|
"loss": 0.5827226042747498,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 2.9745336577453365,
|
|
"grad_norm": 0.64341801404953,
|
|
"learning_rate": 2.2541644411344653e-09,
|
|
"loss": 0.5797464847564697,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 2.9758313057583132,
|
|
"grad_norm": 0.6010720133781433,
|
|
"learning_rate": 2.0446078816355186e-09,
|
|
"loss": 0.5213384628295898,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 2.9771289537712895,
|
|
"grad_norm": 0.5899950265884399,
|
|
"learning_rate": 1.8452708759097993e-09,
|
|
"loss": 0.5917242765426636,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 2.9784266017842658,
|
|
"grad_norm": 0.5960827469825745,
|
|
"learning_rate": 1.656153831551821e-09,
|
|
"loss": 0.5761323571205139,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 2.9797242497972425,
|
|
"grad_norm": 0.641033411026001,
|
|
"learning_rate": 1.4772571352567044e-09,
|
|
"loss": 0.6058821678161621,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 2.981021897810219,
|
|
"grad_norm": 0.6438850164413452,
|
|
"learning_rate": 1.3085811528240622e-09,
|
|
"loss": 0.6135293245315552,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 2.9823195458231955,
|
|
"grad_norm": 0.652836263179779,
|
|
"learning_rate": 1.1501262291530034e-09,
|
|
"loss": 0.6278634667396545,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 2.9836171938361717,
|
|
"grad_norm": 0.6056571006774902,
|
|
"learning_rate": 1.0018926882443548e-09,
|
|
"loss": 0.6097397208213806,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 2.9836171938361717,
|
|
"eval_loss": 0.6816402673721313,
|
|
"eval_runtime": 72.9022,
|
|
"eval_samples_per_second": 71.219,
|
|
"eval_steps_per_second": 8.902,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 2.9849148418491485,
|
|
"grad_norm": 0.6095858812332153,
|
|
"learning_rate": 8.638808331973281e-10,
|
|
"loss": 0.5901839733123779,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 2.986212489862125,
|
|
"grad_norm": 0.610837996006012,
|
|
"learning_rate": 7.360909462111876e-10,
|
|
"loss": 0.6008099913597107,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 2.9875101378751014,
|
|
"grad_norm": 0.6182950735092163,
|
|
"learning_rate": 6.185232885846937e-10,
|
|
"loss": 0.599170446395874,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 2.9888077858880777,
|
|
"grad_norm": 0.5876109600067139,
|
|
"learning_rate": 5.111781007138827e-10,
|
|
"loss": 0.5724647045135498,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 2.9901054339010544,
|
|
"grad_norm": 0.6355734467506409,
|
|
"learning_rate": 4.1405560209206716e-10,
|
|
"loss": 0.5922134518623352,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 2.9914030819140307,
|
|
"grad_norm": 0.613153338432312,
|
|
"learning_rate": 3.2715599131039053e-10,
|
|
"loss": 0.5836412906646729,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 2.9927007299270074,
|
|
"grad_norm": 0.6345803737640381,
|
|
"learning_rate": 2.5047944605616215e-10,
|
|
"loss": 0.5756551623344421,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 2.9939983779399837,
|
|
"grad_norm": 0.6199482679367065,
|
|
"learning_rate": 1.840261231139673e-10,
|
|
"loss": 0.5494982004165649,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 2.9952960259529604,
|
|
"grad_norm": 0.62641441822052,
|
|
"learning_rate": 1.2779615836455706e-10,
|
|
"loss": 0.6009610295295715,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 2.9965936739659367,
|
|
"grad_norm": 0.6289675831794739,
|
|
"learning_rate": 8.17896667826279e-11,
|
|
"loss": 0.6343727111816406,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 2.997891321978913,
|
|
"grad_norm": 0.6318255662918091,
|
|
"learning_rate": 4.600674244070735e-11,
|
|
"loss": 0.5607834458351135,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 2.9991889699918897,
|
|
"grad_norm": 0.6189204454421997,
|
|
"learning_rate": 2.04474585052683e-11,
|
|
"loss": 0.5686444044113159,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.7989046573638916,
|
|
"learning_rate": 5.11186723950452e-12,
|
|
"loss": 0.6789172887802124,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 2313,
|
|
"total_flos": 8.852766725217714e+18,
|
|
"train_loss": 0.6584745990833162,
|
|
"train_runtime": 19013.667,
|
|
"train_samples_per_second": 15.562,
|
|
"train_steps_per_second": 0.122
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 2313,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 230,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 8.852766725217714e+18,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|