7475 lines
184 KiB
JSON
7475 lines
184 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.22003725936659077,
|
|
"eval_steps": 500,
|
|
"global_step": 1063,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00020699648105982198,
|
|
"grad_norm": 0.3105248212814331,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.3097,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.00041399296211964395,
|
|
"grad_norm": 0.35843613743782043,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.3333,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.000620989443179466,
|
|
"grad_norm": 0.31169694662094116,
|
|
"learning_rate": 3e-05,
|
|
"loss": 0.3367,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0008279859242392879,
|
|
"grad_norm": 0.4025513529777527,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.3568,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0010349824052991099,
|
|
"grad_norm": 0.38164222240448,
|
|
"learning_rate": 5e-05,
|
|
"loss": 0.375,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.001241978886358932,
|
|
"grad_norm": 0.41811275482177734,
|
|
"learning_rate": 6e-05,
|
|
"loss": 0.3421,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.001448975367418754,
|
|
"grad_norm": 0.40563058853149414,
|
|
"learning_rate": 7e-05,
|
|
"loss": 0.3046,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.0016559718484785758,
|
|
"grad_norm": 0.32561907172203064,
|
|
"learning_rate": 8e-05,
|
|
"loss": 0.3298,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.0018629683295383979,
|
|
"grad_norm": 0.2336910218000412,
|
|
"learning_rate": 9e-05,
|
|
"loss": 0.2331,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.0020699648105982197,
|
|
"grad_norm": 0.2214404195547104,
|
|
"learning_rate": 0.0001,
|
|
"loss": 0.2747,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.002276961291658042,
|
|
"grad_norm": 0.1755189299583435,
|
|
"learning_rate": 0.00011000000000000002,
|
|
"loss": 0.215,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.002483957772717864,
|
|
"grad_norm": 0.13464422523975372,
|
|
"learning_rate": 0.00012,
|
|
"loss": 0.1854,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.0026909542537776857,
|
|
"grad_norm": 0.1568724364042282,
|
|
"learning_rate": 0.00013000000000000002,
|
|
"loss": 0.1954,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.002897950734837508,
|
|
"grad_norm": 0.1855469048023224,
|
|
"learning_rate": 0.00014,
|
|
"loss": 0.1788,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.00310494721589733,
|
|
"grad_norm": 0.16363286972045898,
|
|
"learning_rate": 0.00015000000000000001,
|
|
"loss": 0.1816,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.0033119436969571516,
|
|
"grad_norm": 0.14675703644752502,
|
|
"learning_rate": 0.00016,
|
|
"loss": 0.144,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.003518940178016974,
|
|
"grad_norm": 0.13585928082466125,
|
|
"learning_rate": 0.00017,
|
|
"loss": 0.1265,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.0037259366590767957,
|
|
"grad_norm": 0.13742923736572266,
|
|
"learning_rate": 0.00018,
|
|
"loss": 0.1373,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.003932933140136618,
|
|
"grad_norm": 0.09649409353733063,
|
|
"learning_rate": 0.00019,
|
|
"loss": 0.1161,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.004139929621196439,
|
|
"grad_norm": 0.08537352085113525,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.1086,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.004346926102256262,
|
|
"grad_norm": 0.07582477480173111,
|
|
"learning_rate": 0.00019995842860112245,
|
|
"loss": 0.1047,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.004553922583316084,
|
|
"grad_norm": 0.07321921736001968,
|
|
"learning_rate": 0.00019991685720224486,
|
|
"loss": 0.1095,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.004760919064375905,
|
|
"grad_norm": 0.07746334373950958,
|
|
"learning_rate": 0.0001998752858033673,
|
|
"loss": 0.08,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.004967915545435728,
|
|
"grad_norm": 0.06693358719348907,
|
|
"learning_rate": 0.00019983371440448973,
|
|
"loss": 0.0932,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.00517491202649555,
|
|
"grad_norm": 0.09097249805927277,
|
|
"learning_rate": 0.00019979214300561214,
|
|
"loss": 0.095,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.005381908507555371,
|
|
"grad_norm": 0.06965727359056473,
|
|
"learning_rate": 0.00019975057160673458,
|
|
"loss": 0.0847,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.005588904988615194,
|
|
"grad_norm": 0.060345325618982315,
|
|
"learning_rate": 0.00019970900020785701,
|
|
"loss": 0.0609,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.005795901469675016,
|
|
"grad_norm": 0.06169256567955017,
|
|
"learning_rate": 0.00019966742880897945,
|
|
"loss": 0.0651,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.006002897950734837,
|
|
"grad_norm": 0.07536791265010834,
|
|
"learning_rate": 0.00019962585741010186,
|
|
"loss": 0.0614,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.00620989443179466,
|
|
"grad_norm": 0.06803280860185623,
|
|
"learning_rate": 0.0001995842860112243,
|
|
"loss": 0.0589,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.006416890912854482,
|
|
"grad_norm": 0.09668745845556259,
|
|
"learning_rate": 0.0001995427146123467,
|
|
"loss": 0.0511,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.006623887393914303,
|
|
"grad_norm": 0.060853298753499985,
|
|
"learning_rate": 0.00019950114321346915,
|
|
"loss": 0.0428,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.0068308838749741255,
|
|
"grad_norm": 0.04822453856468201,
|
|
"learning_rate": 0.00019945957181459156,
|
|
"loss": 0.0442,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.007037880356033948,
|
|
"grad_norm": 0.07689573615789413,
|
|
"learning_rate": 0.000199418000415714,
|
|
"loss": 0.0448,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.007244876837093769,
|
|
"grad_norm": 0.07483747601509094,
|
|
"learning_rate": 0.00019937642901683643,
|
|
"loss": 0.0449,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.0074518733181535915,
|
|
"grad_norm": 0.06677654385566711,
|
|
"learning_rate": 0.00019933485761795884,
|
|
"loss": 0.0368,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.007658869799213414,
|
|
"grad_norm": 0.06132747232913971,
|
|
"learning_rate": 0.00019929328621908128,
|
|
"loss": 0.0365,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.007865866280273236,
|
|
"grad_norm": 0.04215759411454201,
|
|
"learning_rate": 0.00019925171482020371,
|
|
"loss": 0.0307,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.008072862761333057,
|
|
"grad_norm": 0.05545351654291153,
|
|
"learning_rate": 0.00019921014342132612,
|
|
"loss": 0.0308,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.008279859242392879,
|
|
"grad_norm": 0.06014477089047432,
|
|
"learning_rate": 0.00019916857202244856,
|
|
"loss": 0.0335,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.008486855723452702,
|
|
"grad_norm": 0.06840485334396362,
|
|
"learning_rate": 0.000199127000623571,
|
|
"loss": 0.0287,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.008693852204512523,
|
|
"grad_norm": 0.0705009400844574,
|
|
"learning_rate": 0.0001990854292246934,
|
|
"loss": 0.0271,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.008900848685572345,
|
|
"grad_norm": 0.05818328261375427,
|
|
"learning_rate": 0.00019904385782581585,
|
|
"loss": 0.0253,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.009107845166632168,
|
|
"grad_norm": 0.03936947509646416,
|
|
"learning_rate": 0.00019900228642693828,
|
|
"loss": 0.0216,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.00931484164769199,
|
|
"grad_norm": 0.044559165835380554,
|
|
"learning_rate": 0.00019896071502806072,
|
|
"loss": 0.0212,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.00952183812875181,
|
|
"grad_norm": 0.04012398421764374,
|
|
"learning_rate": 0.00019891914362918313,
|
|
"loss": 0.014,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.009728834609811634,
|
|
"grad_norm": 0.048441193997859955,
|
|
"learning_rate": 0.00019887757223030557,
|
|
"loss": 0.0144,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.009935831090871455,
|
|
"grad_norm": 0.046538762748241425,
|
|
"learning_rate": 0.000198836000831428,
|
|
"loss": 0.0111,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.010142827571931277,
|
|
"grad_norm": 0.04020772501826286,
|
|
"learning_rate": 0.0001987944294325504,
|
|
"loss": 0.0103,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.0103498240529911,
|
|
"grad_norm": 0.030860010534524918,
|
|
"learning_rate": 0.00019875285803367285,
|
|
"loss": 0.0071,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.010556820534050921,
|
|
"grad_norm": 0.0534852109849453,
|
|
"learning_rate": 0.0001987112866347953,
|
|
"loss": 0.0082,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.010763817015110743,
|
|
"grad_norm": 0.061904191970825195,
|
|
"learning_rate": 0.0001986697152359177,
|
|
"loss": 0.0092,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.010970813496170566,
|
|
"grad_norm": 0.06135628744959831,
|
|
"learning_rate": 0.0001986281438370401,
|
|
"loss": 0.0091,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.011177809977230387,
|
|
"grad_norm": 0.060930103063583374,
|
|
"learning_rate": 0.00019858657243816254,
|
|
"loss": 0.0103,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.011384806458290209,
|
|
"grad_norm": 0.038847871124744415,
|
|
"learning_rate": 0.00019854500103928498,
|
|
"loss": 0.0077,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.011591802939350032,
|
|
"grad_norm": 0.01643364317715168,
|
|
"learning_rate": 0.0001985034296404074,
|
|
"loss": 0.0065,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.011798799420409853,
|
|
"grad_norm": 0.03226076811552048,
|
|
"learning_rate": 0.00019846185824152983,
|
|
"loss": 0.0059,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.012005795901469675,
|
|
"grad_norm": 0.045181699097156525,
|
|
"learning_rate": 0.00019842028684265227,
|
|
"loss": 0.0075,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.012212792382529498,
|
|
"grad_norm": 0.0442410409450531,
|
|
"learning_rate": 0.00019837871544377468,
|
|
"loss": 0.007,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.01241978886358932,
|
|
"grad_norm": 0.04646408557891846,
|
|
"learning_rate": 0.0001983371440448971,
|
|
"loss": 0.009,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.01262678534464914,
|
|
"grad_norm": 0.037814535200595856,
|
|
"learning_rate": 0.00019829557264601955,
|
|
"loss": 0.0062,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.012833781825708964,
|
|
"grad_norm": 0.029840698465704918,
|
|
"learning_rate": 0.000198254001247142,
|
|
"loss": 0.0075,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.013040778306768785,
|
|
"grad_norm": 0.009363808669149876,
|
|
"learning_rate": 0.0001982124298482644,
|
|
"loss": 0.0054,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.013247774787828607,
|
|
"grad_norm": 0.02504296600818634,
|
|
"learning_rate": 0.00019817085844938683,
|
|
"loss": 0.0051,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.01345477126888843,
|
|
"grad_norm": 0.03861517831683159,
|
|
"learning_rate": 0.00019812928705050927,
|
|
"loss": 0.0061,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.013661767749948251,
|
|
"grad_norm": 0.04222877696156502,
|
|
"learning_rate": 0.00019808771565163168,
|
|
"loss": 0.0074,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.013868764231008072,
|
|
"grad_norm": 0.03613612800836563,
|
|
"learning_rate": 0.00019804614425275412,
|
|
"loss": 0.0063,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.014075760712067896,
|
|
"grad_norm": 0.02817763015627861,
|
|
"learning_rate": 0.00019800457285387655,
|
|
"loss": 0.0074,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.014282757193127717,
|
|
"grad_norm": 0.01215298566967249,
|
|
"learning_rate": 0.00019796300145499896,
|
|
"loss": 0.0057,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.014489753674187538,
|
|
"grad_norm": 0.01781376637518406,
|
|
"learning_rate": 0.0001979214300561214,
|
|
"loss": 0.0067,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.014696750155247362,
|
|
"grad_norm": 0.02770097553730011,
|
|
"learning_rate": 0.00019787985865724384,
|
|
"loss": 0.0057,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.014903746636307183,
|
|
"grad_norm": 0.029749557375907898,
|
|
"learning_rate": 0.00019783828725836625,
|
|
"loss": 0.0063,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.015110743117367004,
|
|
"grad_norm": 0.02843872457742691,
|
|
"learning_rate": 0.00019779671585948869,
|
|
"loss": 0.0076,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.015317739598426828,
|
|
"grad_norm": 0.019167358055710793,
|
|
"learning_rate": 0.00019775514446061112,
|
|
"loss": 0.0059,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.015524736079486649,
|
|
"grad_norm": 0.01691405475139618,
|
|
"learning_rate": 0.00019771357306173353,
|
|
"loss": 0.0072,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.015731732560546472,
|
|
"grad_norm": 0.00585334375500679,
|
|
"learning_rate": 0.00019767200166285594,
|
|
"loss": 0.0047,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.015938729041606294,
|
|
"grad_norm": 0.019127612933516502,
|
|
"learning_rate": 0.00019763043026397838,
|
|
"loss": 0.0047,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.016145725522666115,
|
|
"grad_norm": 0.026558954268693924,
|
|
"learning_rate": 0.00019758885886510082,
|
|
"loss": 0.0064,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.016352722003725936,
|
|
"grad_norm": 0.0275382362306118,
|
|
"learning_rate": 0.00019754728746622325,
|
|
"loss": 0.006,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.016559718484785758,
|
|
"grad_norm": 0.016686394810676575,
|
|
"learning_rate": 0.00019750571606734566,
|
|
"loss": 0.0069,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.01676671496584558,
|
|
"grad_norm": 0.011664893478155136,
|
|
"learning_rate": 0.0001974641446684681,
|
|
"loss": 0.0055,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.016973711446905404,
|
|
"grad_norm": 0.010350242257118225,
|
|
"learning_rate": 0.00019742257326959054,
|
|
"loss": 0.0045,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.017180707927965225,
|
|
"grad_norm": 0.018541481345891953,
|
|
"learning_rate": 0.00019738100187071295,
|
|
"loss": 0.0056,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.017387704409025047,
|
|
"grad_norm": 0.015899088233709335,
|
|
"learning_rate": 0.00019733943047183539,
|
|
"loss": 0.0047,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.017594700890084868,
|
|
"grad_norm": 0.01706838794052601,
|
|
"learning_rate": 0.00019729785907295782,
|
|
"loss": 0.0059,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.01780169737114469,
|
|
"grad_norm": 0.01610150933265686,
|
|
"learning_rate": 0.00019725628767408023,
|
|
"loss": 0.0065,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.01800869385220451,
|
|
"grad_norm": 0.011388594284653664,
|
|
"learning_rate": 0.00019721471627520267,
|
|
"loss": 0.0054,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.018215690333264336,
|
|
"grad_norm": 0.010438695549964905,
|
|
"learning_rate": 0.0001971731448763251,
|
|
"loss": 0.005,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.018422686814324157,
|
|
"grad_norm": 0.01277916319668293,
|
|
"learning_rate": 0.00019713157347744752,
|
|
"loss": 0.0045,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.01862968329538398,
|
|
"grad_norm": 0.016964582726359367,
|
|
"learning_rate": 0.00019709000207856995,
|
|
"loss": 0.0064,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.0188366797764438,
|
|
"grad_norm": 0.015511998906731606,
|
|
"learning_rate": 0.0001970484306796924,
|
|
"loss": 0.0046,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.01904367625750362,
|
|
"grad_norm": 0.014610686339437962,
|
|
"learning_rate": 0.00019700685928081483,
|
|
"loss": 0.0068,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.019250672738563443,
|
|
"grad_norm": 0.011182552203536034,
|
|
"learning_rate": 0.00019696528788193724,
|
|
"loss": 0.005,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.019457669219623268,
|
|
"grad_norm": 0.00904427282512188,
|
|
"learning_rate": 0.00019692371648305967,
|
|
"loss": 0.0052,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.01966466570068309,
|
|
"grad_norm": 0.013450189493596554,
|
|
"learning_rate": 0.0001968821450841821,
|
|
"loss": 0.0053,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.01987166218174291,
|
|
"grad_norm": 0.011684760451316833,
|
|
"learning_rate": 0.00019684057368530452,
|
|
"loss": 0.0051,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.020078658662802732,
|
|
"grad_norm": 0.009463542141020298,
|
|
"learning_rate": 0.00019679900228642693,
|
|
"loss": 0.0055,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.020285655143862553,
|
|
"grad_norm": 0.007411513477563858,
|
|
"learning_rate": 0.00019675743088754937,
|
|
"loss": 0.005,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.020492651624922375,
|
|
"grad_norm": 0.013031812384724617,
|
|
"learning_rate": 0.0001967158594886718,
|
|
"loss": 0.0063,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.0206996481059822,
|
|
"grad_norm": 0.006821679417043924,
|
|
"learning_rate": 0.00019667428808979422,
|
|
"loss": 0.0058,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.02090664458704202,
|
|
"grad_norm": 0.009770995937287807,
|
|
"learning_rate": 0.00019663271669091665,
|
|
"loss": 0.0045,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.021113641068101843,
|
|
"grad_norm": 0.007675915956497192,
|
|
"learning_rate": 0.0001965911452920391,
|
|
"loss": 0.0049,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.021320637549161664,
|
|
"grad_norm": 0.009076464921236038,
|
|
"learning_rate": 0.0001965495738931615,
|
|
"loss": 0.0048,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.021527634030221485,
|
|
"grad_norm": 0.010678350925445557,
|
|
"learning_rate": 0.00019650800249428394,
|
|
"loss": 0.0056,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.021734630511281307,
|
|
"grad_norm": 0.029563505202531815,
|
|
"learning_rate": 0.00019646643109540637,
|
|
"loss": 0.0049,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.02194162699234113,
|
|
"grad_norm": 0.0072585404850542545,
|
|
"learning_rate": 0.00019642485969652878,
|
|
"loss": 0.005,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.022148623473400953,
|
|
"grad_norm": 0.009124008938670158,
|
|
"learning_rate": 0.00019638328829765122,
|
|
"loss": 0.0045,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.022355619954460774,
|
|
"grad_norm": 0.005743277724832296,
|
|
"learning_rate": 0.00019634171689877366,
|
|
"loss": 0.0051,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.022562616435520596,
|
|
"grad_norm": 0.01300257071852684,
|
|
"learning_rate": 0.0001963001454998961,
|
|
"loss": 0.0049,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.022769612916580417,
|
|
"grad_norm": 0.02877631224691868,
|
|
"learning_rate": 0.0001962585741010185,
|
|
"loss": 0.0053,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.02297660939764024,
|
|
"grad_norm": 0.010237788781523705,
|
|
"learning_rate": 0.00019621700270214094,
|
|
"loss": 0.0046,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.023183605878700064,
|
|
"grad_norm": 0.010189997963607311,
|
|
"learning_rate": 0.00019617543130326338,
|
|
"loss": 0.0056,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.023390602359759885,
|
|
"grad_norm": 0.010226712562143803,
|
|
"learning_rate": 0.0001961338599043858,
|
|
"loss": 0.005,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.023597598840819706,
|
|
"grad_norm": 0.011194237507879734,
|
|
"learning_rate": 0.00019609228850550823,
|
|
"loss": 0.0054,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.023804595321879528,
|
|
"grad_norm": 0.0065891253761947155,
|
|
"learning_rate": 0.00019605071710663066,
|
|
"loss": 0.0057,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.02401159180293935,
|
|
"grad_norm": 0.008131214417517185,
|
|
"learning_rate": 0.00019600914570775307,
|
|
"loss": 0.0056,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.02421858828399917,
|
|
"grad_norm": 0.013916265219449997,
|
|
"learning_rate": 0.0001959675743088755,
|
|
"loss": 0.0051,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.024425584765058996,
|
|
"grad_norm": 0.00658258656039834,
|
|
"learning_rate": 0.00019592600290999795,
|
|
"loss": 0.0048,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.024632581246118817,
|
|
"grad_norm": 0.005407229065895081,
|
|
"learning_rate": 0.00019588443151112036,
|
|
"loss": 0.0053,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.02483957772717864,
|
|
"grad_norm": 0.009581067599356174,
|
|
"learning_rate": 0.00019584286011224277,
|
|
"loss": 0.0048,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.02504657420823846,
|
|
"grad_norm": 0.011583163402974606,
|
|
"learning_rate": 0.0001958012887133652,
|
|
"loss": 0.0049,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.02525357068929828,
|
|
"grad_norm": 0.017572317272424698,
|
|
"learning_rate": 0.00019575971731448764,
|
|
"loss": 0.0049,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.025460567170358103,
|
|
"grad_norm": 0.018644949421286583,
|
|
"learning_rate": 0.00019571814591561005,
|
|
"loss": 0.0068,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.025667563651417927,
|
|
"grad_norm": 0.014408939518034458,
|
|
"learning_rate": 0.0001956765745167325,
|
|
"loss": 0.0056,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.02587456013247775,
|
|
"grad_norm": 0.0033774918410927057,
|
|
"learning_rate": 0.00019563500311785493,
|
|
"loss": 0.0053,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.02608155661353757,
|
|
"grad_norm": 0.005626993719488382,
|
|
"learning_rate": 0.00019559343171897736,
|
|
"loss": 0.0046,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.02628855309459739,
|
|
"grad_norm": 0.018701711669564247,
|
|
"learning_rate": 0.00019555186032009977,
|
|
"loss": 0.0077,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.026495549575657213,
|
|
"grad_norm": 0.016636714339256287,
|
|
"learning_rate": 0.0001955102889212222,
|
|
"loss": 0.005,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.026702546056717034,
|
|
"grad_norm": 0.013526069931685925,
|
|
"learning_rate": 0.00019546871752234465,
|
|
"loss": 0.0047,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.02690954253777686,
|
|
"grad_norm": 0.024728018790483475,
|
|
"learning_rate": 0.00019542714612346706,
|
|
"loss": 0.0045,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.02711653901883668,
|
|
"grad_norm": 0.02217916212975979,
|
|
"learning_rate": 0.0001953855747245895,
|
|
"loss": 0.0045,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.027323535499896502,
|
|
"grad_norm": 0.010518092662096024,
|
|
"learning_rate": 0.00019534400332571193,
|
|
"loss": 0.0065,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.027530531980956324,
|
|
"grad_norm": 0.008342115208506584,
|
|
"learning_rate": 0.00019530243192683434,
|
|
"loss": 0.0063,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.027737528462016145,
|
|
"grad_norm": 0.008312125690281391,
|
|
"learning_rate": 0.00019526086052795678,
|
|
"loss": 0.0047,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.027944524943075966,
|
|
"grad_norm": 0.006928388494998217,
|
|
"learning_rate": 0.00019521928912907921,
|
|
"loss": 0.005,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.02815152142413579,
|
|
"grad_norm": 0.0073064775206148624,
|
|
"learning_rate": 0.00019517771773020162,
|
|
"loss": 0.0061,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.028358517905195613,
|
|
"grad_norm": 0.007849021814763546,
|
|
"learning_rate": 0.00019513614633132406,
|
|
"loss": 0.0052,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.028565514386255434,
|
|
"grad_norm": 0.006330843083560467,
|
|
"learning_rate": 0.0001950945749324465,
|
|
"loss": 0.006,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.028772510867315255,
|
|
"grad_norm": 0.002727820537984371,
|
|
"learning_rate": 0.00019505300353356894,
|
|
"loss": 0.0047,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.028979507348375077,
|
|
"grad_norm": 0.006755925714969635,
|
|
"learning_rate": 0.00019501143213469135,
|
|
"loss": 0.0043,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.0291865038294349,
|
|
"grad_norm": 0.007393544539809227,
|
|
"learning_rate": 0.00019496986073581376,
|
|
"loss": 0.0047,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.029393500310494723,
|
|
"grad_norm": 0.007699685171246529,
|
|
"learning_rate": 0.0001949282893369362,
|
|
"loss": 0.0053,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.029600496791554545,
|
|
"grad_norm": 0.003382055787369609,
|
|
"learning_rate": 0.0001948867179380586,
|
|
"loss": 0.0042,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.029807493272614366,
|
|
"grad_norm": 0.01596757024526596,
|
|
"learning_rate": 0.00019484514653918104,
|
|
"loss": 0.0078,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.030014489753674187,
|
|
"grad_norm": 0.00668082432821393,
|
|
"learning_rate": 0.00019480357514030348,
|
|
"loss": 0.0052,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.03022148623473401,
|
|
"grad_norm": 0.007241010665893555,
|
|
"learning_rate": 0.00019476200374142591,
|
|
"loss": 0.0054,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.03042848271579383,
|
|
"grad_norm": 0.009339329786598682,
|
|
"learning_rate": 0.00019472043234254832,
|
|
"loss": 0.0066,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.030635479196853655,
|
|
"grad_norm": 0.007252382580190897,
|
|
"learning_rate": 0.00019467886094367076,
|
|
"loss": 0.0046,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.030842475677913476,
|
|
"grad_norm": 0.004665658809244633,
|
|
"learning_rate": 0.0001946372895447932,
|
|
"loss": 0.0044,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.031049472158973298,
|
|
"grad_norm": 0.006148173939436674,
|
|
"learning_rate": 0.0001945957181459156,
|
|
"loss": 0.0049,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.03125646864003312,
|
|
"grad_norm": 0.004451967775821686,
|
|
"learning_rate": 0.00019455414674703805,
|
|
"loss": 0.0061,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.031463465121092944,
|
|
"grad_norm": 0.004053746350109577,
|
|
"learning_rate": 0.00019451257534816048,
|
|
"loss": 0.0047,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.03167046160215276,
|
|
"grad_norm": 0.009219355881214142,
|
|
"learning_rate": 0.0001944710039492829,
|
|
"loss": 0.0048,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.03187745808321259,
|
|
"grad_norm": 0.020168175920844078,
|
|
"learning_rate": 0.00019442943255040533,
|
|
"loss": 0.0063,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.032084454564272405,
|
|
"grad_norm": 0.0030542612075805664,
|
|
"learning_rate": 0.00019438786115152777,
|
|
"loss": 0.0048,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.03229145104533223,
|
|
"grad_norm": 0.00561768002808094,
|
|
"learning_rate": 0.0001943462897526502,
|
|
"loss": 0.0047,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.032498447526392055,
|
|
"grad_norm": 0.004265373572707176,
|
|
"learning_rate": 0.0001943047183537726,
|
|
"loss": 0.0062,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.03270544400745187,
|
|
"grad_norm": 0.005523406434804201,
|
|
"learning_rate": 0.00019426314695489505,
|
|
"loss": 0.0044,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.0329124404885117,
|
|
"grad_norm": 0.009324849583208561,
|
|
"learning_rate": 0.0001942215755560175,
|
|
"loss": 0.0055,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.033119436969571515,
|
|
"grad_norm": 0.012468270026147366,
|
|
"learning_rate": 0.0001941800041571399,
|
|
"loss": 0.0045,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.03332643345063134,
|
|
"grad_norm": 0.0018690524157136679,
|
|
"learning_rate": 0.00019413843275826233,
|
|
"loss": 0.0047,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.03353342993169116,
|
|
"grad_norm": 0.009851769544184208,
|
|
"learning_rate": 0.00019409686135938477,
|
|
"loss": 0.0052,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.03374042641275098,
|
|
"grad_norm": 0.0014802832156419754,
|
|
"learning_rate": 0.00019405528996050718,
|
|
"loss": 0.0045,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.03394742289381081,
|
|
"grad_norm": 0.00177583540789783,
|
|
"learning_rate": 0.0001940137185616296,
|
|
"loss": 0.0046,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.034154419374870626,
|
|
"grad_norm": 0.0017039773520082235,
|
|
"learning_rate": 0.00019397214716275203,
|
|
"loss": 0.0044,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.03436141585593045,
|
|
"grad_norm": 0.010193880647420883,
|
|
"learning_rate": 0.00019393057576387447,
|
|
"loss": 0.0054,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.03456841233699027,
|
|
"grad_norm": 0.011056206189095974,
|
|
"learning_rate": 0.00019388900436499688,
|
|
"loss": 0.0051,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.034775408818050094,
|
|
"grad_norm": 0.004655253142118454,
|
|
"learning_rate": 0.0001938474329661193,
|
|
"loss": 0.0046,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.03498240529910992,
|
|
"grad_norm": 0.0035964485723525286,
|
|
"learning_rate": 0.00019380586156724175,
|
|
"loss": 0.0045,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.035189401780169736,
|
|
"grad_norm": 0.0026267431676387787,
|
|
"learning_rate": 0.00019376429016836416,
|
|
"loss": 0.0047,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.03539639826122956,
|
|
"grad_norm": 0.008452721871435642,
|
|
"learning_rate": 0.0001937227187694866,
|
|
"loss": 0.0059,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.03560339474228938,
|
|
"grad_norm": 0.006845233030617237,
|
|
"learning_rate": 0.00019368114737060903,
|
|
"loss": 0.0051,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.035810391223349204,
|
|
"grad_norm": 0.005468891002237797,
|
|
"learning_rate": 0.00019363957597173144,
|
|
"loss": 0.0059,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.03601738770440902,
|
|
"grad_norm": 0.007444227579981089,
|
|
"learning_rate": 0.00019359800457285388,
|
|
"loss": 0.0044,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.03622438418546885,
|
|
"grad_norm": 0.011544904671609402,
|
|
"learning_rate": 0.00019355643317397632,
|
|
"loss": 0.0053,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.03643138066652867,
|
|
"grad_norm": 0.04777868092060089,
|
|
"learning_rate": 0.00019351486177509875,
|
|
"loss": 0.005,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.03663837714758849,
|
|
"grad_norm": 0.004177747759968042,
|
|
"learning_rate": 0.00019347329037622116,
|
|
"loss": 0.0048,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.036845373628648315,
|
|
"grad_norm": 0.0020709133241325617,
|
|
"learning_rate": 0.0001934317189773436,
|
|
"loss": 0.0045,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.03705237010970813,
|
|
"grad_norm": 0.012106567621231079,
|
|
"learning_rate": 0.00019339014757846604,
|
|
"loss": 0.0043,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.03725936659076796,
|
|
"grad_norm": 0.005318734794855118,
|
|
"learning_rate": 0.00019334857617958845,
|
|
"loss": 0.005,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.03746636307182778,
|
|
"grad_norm": 0.04815113916993141,
|
|
"learning_rate": 0.00019330700478071089,
|
|
"loss": 0.0056,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.0376733595528876,
|
|
"grad_norm": 0.029821882024407387,
|
|
"learning_rate": 0.00019326543338183332,
|
|
"loss": 0.0053,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.037880356033947425,
|
|
"grad_norm": 0.010615061037242413,
|
|
"learning_rate": 0.00019322386198295573,
|
|
"loss": 0.0049,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.03808735251500724,
|
|
"grad_norm": 0.00788772851228714,
|
|
"learning_rate": 0.00019318229058407817,
|
|
"loss": 0.0043,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.03829434899606707,
|
|
"grad_norm": 0.0045401486568152905,
|
|
"learning_rate": 0.00019314071918520058,
|
|
"loss": 0.0056,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.038501345477126886,
|
|
"grad_norm": 0.004325198009610176,
|
|
"learning_rate": 0.00019309914778632302,
|
|
"loss": 0.0059,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.03870834195818671,
|
|
"grad_norm": 0.012164851650595665,
|
|
"learning_rate": 0.00019305757638744543,
|
|
"loss": 0.0046,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.038915338439246536,
|
|
"grad_norm": 0.02496037259697914,
|
|
"learning_rate": 0.00019301600498856786,
|
|
"loss": 0.0068,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.039122334920306354,
|
|
"grad_norm": 0.011729522608220577,
|
|
"learning_rate": 0.0001929744335896903,
|
|
"loss": 0.0047,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.03932933140136618,
|
|
"grad_norm": 0.003992550540715456,
|
|
"learning_rate": 0.0001929328621908127,
|
|
"loss": 0.0047,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.039536327882425996,
|
|
"grad_norm": 0.012220533564686775,
|
|
"learning_rate": 0.00019289129079193515,
|
|
"loss": 0.0059,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.03974332436348582,
|
|
"grad_norm": 0.0037318835966289043,
|
|
"learning_rate": 0.00019284971939305759,
|
|
"loss": 0.0049,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.039950320844545646,
|
|
"grad_norm": 0.008259239606559277,
|
|
"learning_rate": 0.00019280814799418002,
|
|
"loss": 0.0047,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.040157317325605464,
|
|
"grad_norm": 0.0029760177712887526,
|
|
"learning_rate": 0.00019276657659530243,
|
|
"loss": 0.0046,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.04036431380666529,
|
|
"grad_norm": 0.007835526019334793,
|
|
"learning_rate": 0.00019272500519642487,
|
|
"loss": 0.0045,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.04057131028772511,
|
|
"grad_norm": 0.019788436591625214,
|
|
"learning_rate": 0.0001926834337975473,
|
|
"loss": 0.0056,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.04077830676878493,
|
|
"grad_norm": 0.0054263221099972725,
|
|
"learning_rate": 0.00019264186239866972,
|
|
"loss": 0.0047,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.04098530324984475,
|
|
"grad_norm": 0.006738686002790928,
|
|
"learning_rate": 0.00019260029099979215,
|
|
"loss": 0.0053,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.041192299730904575,
|
|
"grad_norm": 0.0019211384933441877,
|
|
"learning_rate": 0.0001925587196009146,
|
|
"loss": 0.0046,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.0413992962119644,
|
|
"grad_norm": 0.0064520747400820255,
|
|
"learning_rate": 0.000192517148202037,
|
|
"loss": 0.0049,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.04160629269302422,
|
|
"grad_norm": 0.0034363584127277136,
|
|
"learning_rate": 0.00019247557680315944,
|
|
"loss": 0.0048,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.04181328917408404,
|
|
"grad_norm": 0.021868525072932243,
|
|
"learning_rate": 0.00019243400540428187,
|
|
"loss": 0.0055,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.04202028565514386,
|
|
"grad_norm": 0.011735360138118267,
|
|
"learning_rate": 0.0001923924340054043,
|
|
"loss": 0.0059,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.042227282136203685,
|
|
"grad_norm": 0.008518829010426998,
|
|
"learning_rate": 0.00019235086260652672,
|
|
"loss": 0.0062,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.0424342786172635,
|
|
"grad_norm": 0.007598051335662603,
|
|
"learning_rate": 0.00019230929120764916,
|
|
"loss": 0.005,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.04264127509832333,
|
|
"grad_norm": 0.004902805667370558,
|
|
"learning_rate": 0.0001922677198087716,
|
|
"loss": 0.0047,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.04284827157938315,
|
|
"grad_norm": 0.01092604547739029,
|
|
"learning_rate": 0.00019222614840989398,
|
|
"loss": 0.0062,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.04305526806044297,
|
|
"grad_norm": 0.017829621210694313,
|
|
"learning_rate": 0.00019218457701101642,
|
|
"loss": 0.0069,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.043262264541502796,
|
|
"grad_norm": 0.0056928307749331,
|
|
"learning_rate": 0.00019214300561213885,
|
|
"loss": 0.0052,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.043469261022562614,
|
|
"grad_norm": 0.0032917701173573732,
|
|
"learning_rate": 0.0001921014342132613,
|
|
"loss": 0.0049,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.04367625750362244,
|
|
"grad_norm": 0.00267777475528419,
|
|
"learning_rate": 0.0001920598628143837,
|
|
"loss": 0.0047,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.04388325398468226,
|
|
"grad_norm": 0.005063153337687254,
|
|
"learning_rate": 0.00019201829141550614,
|
|
"loss": 0.0065,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.04409025046574208,
|
|
"grad_norm": 0.007821328938007355,
|
|
"learning_rate": 0.00019197672001662857,
|
|
"loss": 0.0073,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.044297246946801906,
|
|
"grad_norm": 0.0022040351759642363,
|
|
"learning_rate": 0.00019193514861775098,
|
|
"loss": 0.0042,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.044504243427861724,
|
|
"grad_norm": 0.010549996979534626,
|
|
"learning_rate": 0.00019189357721887342,
|
|
"loss": 0.0048,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.04471123990892155,
|
|
"grad_norm": 0.003572909627109766,
|
|
"learning_rate": 0.00019185200581999586,
|
|
"loss": 0.0048,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.04491823638998137,
|
|
"grad_norm": 0.0047572036273777485,
|
|
"learning_rate": 0.00019181043442111827,
|
|
"loss": 0.0046,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.04512523287104119,
|
|
"grad_norm": 0.003330536652356386,
|
|
"learning_rate": 0.0001917688630222407,
|
|
"loss": 0.0043,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.04533222935210102,
|
|
"grad_norm": 0.003489327384158969,
|
|
"learning_rate": 0.00019172729162336314,
|
|
"loss": 0.0044,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.045539225833160835,
|
|
"grad_norm": 0.006631118711084127,
|
|
"learning_rate": 0.00019168572022448555,
|
|
"loss": 0.0046,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.04574622231422066,
|
|
"grad_norm": 0.0016155489720404148,
|
|
"learning_rate": 0.000191644148825608,
|
|
"loss": 0.0045,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.04595321879528048,
|
|
"grad_norm": 0.00484581058844924,
|
|
"learning_rate": 0.00019160257742673043,
|
|
"loss": 0.0049,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.0461602152763403,
|
|
"grad_norm": 0.016010191291570663,
|
|
"learning_rate": 0.00019156100602785286,
|
|
"loss": 0.0052,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.04636721175740013,
|
|
"grad_norm": 0.0038696257397532463,
|
|
"learning_rate": 0.00019151943462897527,
|
|
"loss": 0.0046,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.046574208238459945,
|
|
"grad_norm": 0.003132582874968648,
|
|
"learning_rate": 0.0001914778632300977,
|
|
"loss": 0.0055,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.04678120471951977,
|
|
"grad_norm": 0.003856977680698037,
|
|
"learning_rate": 0.00019143629183122015,
|
|
"loss": 0.0048,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.04698820120057959,
|
|
"grad_norm": 0.0084042027592659,
|
|
"learning_rate": 0.00019139472043234256,
|
|
"loss": 0.005,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.04719519768163941,
|
|
"grad_norm": 0.004033537581562996,
|
|
"learning_rate": 0.000191353149033465,
|
|
"loss": 0.0053,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.04740219416269923,
|
|
"grad_norm": 0.002745938254520297,
|
|
"learning_rate": 0.0001913115776345874,
|
|
"loss": 0.005,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.047609190643759056,
|
|
"grad_norm": 0.0033783107064664364,
|
|
"learning_rate": 0.00019127000623570984,
|
|
"loss": 0.0045,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.04781618712481888,
|
|
"grad_norm": 0.011202207766473293,
|
|
"learning_rate": 0.00019122843483683225,
|
|
"loss": 0.0054,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.0480231836058787,
|
|
"grad_norm": 0.006426738575100899,
|
|
"learning_rate": 0.0001911868634379547,
|
|
"loss": 0.0049,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.04823018008693852,
|
|
"grad_norm": 0.0026592197827994823,
|
|
"learning_rate": 0.00019114529203907713,
|
|
"loss": 0.0045,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.04843717656799834,
|
|
"grad_norm": 0.0044938791543245316,
|
|
"learning_rate": 0.00019110372064019954,
|
|
"loss": 0.0048,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.048644173049058166,
|
|
"grad_norm": 0.004537639208137989,
|
|
"learning_rate": 0.00019106214924132197,
|
|
"loss": 0.0043,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.04885116953011799,
|
|
"grad_norm": 0.007899290882050991,
|
|
"learning_rate": 0.0001910205778424444,
|
|
"loss": 0.0047,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.04905816601117781,
|
|
"grad_norm": 0.003346335142850876,
|
|
"learning_rate": 0.00019097900644356682,
|
|
"loss": 0.0046,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.049265162492237634,
|
|
"grad_norm": 0.012666060589253902,
|
|
"learning_rate": 0.00019093743504468926,
|
|
"loss": 0.0051,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.04947215897329745,
|
|
"grad_norm": 0.005454343743622303,
|
|
"learning_rate": 0.0001908958636458117,
|
|
"loss": 0.0052,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.04967915545435728,
|
|
"grad_norm": 0.0051568858325481415,
|
|
"learning_rate": 0.00019085429224693413,
|
|
"loss": 0.0054,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.049886151935417095,
|
|
"grad_norm": 0.005354621913284063,
|
|
"learning_rate": 0.00019081272084805654,
|
|
"loss": 0.0045,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.05009314841647692,
|
|
"grad_norm": 0.00728578818961978,
|
|
"learning_rate": 0.00019077114944917898,
|
|
"loss": 0.0056,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.050300144897536744,
|
|
"grad_norm": 0.012164080515503883,
|
|
"learning_rate": 0.00019072957805030141,
|
|
"loss": 0.0055,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.05050714137859656,
|
|
"grad_norm": 0.006451157853007317,
|
|
"learning_rate": 0.00019068800665142382,
|
|
"loss": 0.0045,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.05071413785965639,
|
|
"grad_norm": 0.04119205102324486,
|
|
"learning_rate": 0.00019064643525254626,
|
|
"loss": 0.0048,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.050921134340716205,
|
|
"grad_norm": 0.006683278828859329,
|
|
"learning_rate": 0.0001906048638536687,
|
|
"loss": 0.0054,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.05112813082177603,
|
|
"grad_norm": 0.004711155779659748,
|
|
"learning_rate": 0.0001905632924547911,
|
|
"loss": 0.0052,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.051335127302835855,
|
|
"grad_norm": 0.022338053211569786,
|
|
"learning_rate": 0.00019052172105591355,
|
|
"loss": 0.0051,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.05154212378389567,
|
|
"grad_norm": 0.0033723730593919754,
|
|
"learning_rate": 0.00019048014965703598,
|
|
"loss": 0.0041,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.0517491202649555,
|
|
"grad_norm": 0.013185818679630756,
|
|
"learning_rate": 0.00019043857825815842,
|
|
"loss": 0.0071,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.051956116746015316,
|
|
"grad_norm": 0.01329563558101654,
|
|
"learning_rate": 0.0001903970068592808,
|
|
"loss": 0.0047,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.05216311322707514,
|
|
"grad_norm": 0.0038430816493928432,
|
|
"learning_rate": 0.00019035543546040324,
|
|
"loss": 0.0049,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.05237010970813496,
|
|
"grad_norm": 0.006868777330964804,
|
|
"learning_rate": 0.00019031386406152568,
|
|
"loss": 0.0045,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.05257710618919478,
|
|
"grad_norm": 0.002608188660815358,
|
|
"learning_rate": 0.0001902722926626481,
|
|
"loss": 0.005,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.05278410267025461,
|
|
"grad_norm": 0.005354885943233967,
|
|
"learning_rate": 0.00019023072126377052,
|
|
"loss": 0.0058,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.052991099151314426,
|
|
"grad_norm": 0.006729793269187212,
|
|
"learning_rate": 0.00019018914986489296,
|
|
"loss": 0.0056,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.05319809563237425,
|
|
"grad_norm": 0.0042056431993842125,
|
|
"learning_rate": 0.0001901475784660154,
|
|
"loss": 0.0042,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.05340509211343407,
|
|
"grad_norm": 0.006845582276582718,
|
|
"learning_rate": 0.0001901060070671378,
|
|
"loss": 0.0048,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.053612088594493894,
|
|
"grad_norm": 0.004010177683085203,
|
|
"learning_rate": 0.00019006443566826025,
|
|
"loss": 0.005,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.05381908507555372,
|
|
"grad_norm": 0.0019074059091508389,
|
|
"learning_rate": 0.00019002286426938268,
|
|
"loss": 0.005,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.05402608155661354,
|
|
"grad_norm": 0.002619614126160741,
|
|
"learning_rate": 0.0001899812928705051,
|
|
"loss": 0.0043,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.05423307803767336,
|
|
"grad_norm": 0.002715028589591384,
|
|
"learning_rate": 0.00018993972147162753,
|
|
"loss": 0.0046,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.05444007451873318,
|
|
"grad_norm": 0.004556506406515837,
|
|
"learning_rate": 0.00018989815007274997,
|
|
"loss": 0.0046,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.054647070999793004,
|
|
"grad_norm": 0.010189310647547245,
|
|
"learning_rate": 0.00018985657867387238,
|
|
"loss": 0.0047,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.05485406748085282,
|
|
"grad_norm": 0.002120416844263673,
|
|
"learning_rate": 0.0001898150072749948,
|
|
"loss": 0.0051,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.05506106396191265,
|
|
"grad_norm": 0.008936331607401371,
|
|
"learning_rate": 0.00018977343587611725,
|
|
"loss": 0.005,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.05526806044297247,
|
|
"grad_norm": 0.0026866530533879995,
|
|
"learning_rate": 0.00018973186447723966,
|
|
"loss": 0.0047,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.05547505692403229,
|
|
"grad_norm": 0.009859035722911358,
|
|
"learning_rate": 0.0001896902930783621,
|
|
"loss": 0.0052,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.055682053405092115,
|
|
"grad_norm": 0.0024370807223021984,
|
|
"learning_rate": 0.00018964872167948453,
|
|
"loss": 0.0053,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.05588904988615193,
|
|
"grad_norm": 0.006978074554353952,
|
|
"learning_rate": 0.00018960715028060697,
|
|
"loss": 0.0062,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.05609604636721176,
|
|
"grad_norm": 0.0037223445251584053,
|
|
"learning_rate": 0.00018956557888172938,
|
|
"loss": 0.0047,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.05630304284827158,
|
|
"grad_norm": 0.018946697935461998,
|
|
"learning_rate": 0.00018952400748285182,
|
|
"loss": 0.0054,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.0565100393293314,
|
|
"grad_norm": 0.009405361488461494,
|
|
"learning_rate": 0.00018948243608397423,
|
|
"loss": 0.0043,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.056717035810391225,
|
|
"grad_norm": 0.0008082574931904674,
|
|
"learning_rate": 0.00018944086468509667,
|
|
"loss": 0.0045,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.05692403229145104,
|
|
"grad_norm": 0.004950170870870352,
|
|
"learning_rate": 0.00018939929328621908,
|
|
"loss": 0.0044,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.05713102877251087,
|
|
"grad_norm": 0.010343370027840137,
|
|
"learning_rate": 0.0001893577218873415,
|
|
"loss": 0.0046,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.057338025253570686,
|
|
"grad_norm": 0.005096866749227047,
|
|
"learning_rate": 0.00018931615048846395,
|
|
"loss": 0.005,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.05754502173463051,
|
|
"grad_norm": 0.0033187547232955694,
|
|
"learning_rate": 0.00018927457908958636,
|
|
"loss": 0.0041,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.057752018215690336,
|
|
"grad_norm": 0.0031861995812505484,
|
|
"learning_rate": 0.0001892330076907088,
|
|
"loss": 0.0059,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.057959014696750154,
|
|
"grad_norm": 0.0053593809716403484,
|
|
"learning_rate": 0.00018919143629183123,
|
|
"loss": 0.0047,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.05816601117780998,
|
|
"grad_norm": 0.005950809922069311,
|
|
"learning_rate": 0.00018914986489295364,
|
|
"loss": 0.0046,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.0583730076588698,
|
|
"grad_norm": 0.006816082634031773,
|
|
"learning_rate": 0.00018910829349407608,
|
|
"loss": 0.0048,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.05858000413992962,
|
|
"grad_norm": 0.005741049535572529,
|
|
"learning_rate": 0.00018906672209519852,
|
|
"loss": 0.0048,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.058787000620989446,
|
|
"grad_norm": 0.004367714747786522,
|
|
"learning_rate": 0.00018902515069632093,
|
|
"loss": 0.0044,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.058993997102049264,
|
|
"grad_norm": 0.0051370360888540745,
|
|
"learning_rate": 0.00018898357929744336,
|
|
"loss": 0.0049,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.05920099358310909,
|
|
"grad_norm": 0.00557302962988615,
|
|
"learning_rate": 0.0001889420078985658,
|
|
"loss": 0.005,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.05940799006416891,
|
|
"grad_norm": 0.004271439276635647,
|
|
"learning_rate": 0.00018890043649968824,
|
|
"loss": 0.0042,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.05961498654522873,
|
|
"grad_norm": 0.00942255649715662,
|
|
"learning_rate": 0.00018885886510081065,
|
|
"loss": 0.005,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.05982198302628855,
|
|
"grad_norm": 0.001850366359576583,
|
|
"learning_rate": 0.00018881729370193309,
|
|
"loss": 0.0049,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.060028979507348375,
|
|
"grad_norm": 0.005905089899897575,
|
|
"learning_rate": 0.00018877572230305552,
|
|
"loss": 0.0049,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.0602359759884082,
|
|
"grad_norm": 0.006148039363324642,
|
|
"learning_rate": 0.00018873415090417793,
|
|
"loss": 0.0058,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.06044297246946802,
|
|
"grad_norm": 0.004467264749109745,
|
|
"learning_rate": 0.00018869257950530037,
|
|
"loss": 0.0046,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.06064996895052784,
|
|
"grad_norm": 0.0026016500778496265,
|
|
"learning_rate": 0.0001886510081064228,
|
|
"loss": 0.0046,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.06085696543158766,
|
|
"grad_norm": 0.00536306481808424,
|
|
"learning_rate": 0.00018860943670754522,
|
|
"loss": 0.0046,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.061063961912647485,
|
|
"grad_norm": 0.00043015365372411907,
|
|
"learning_rate": 0.00018856786530866763,
|
|
"loss": 0.0051,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.06127095839370731,
|
|
"grad_norm": 0.002035475103184581,
|
|
"learning_rate": 0.00018852629390979006,
|
|
"loss": 0.0044,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.06147795487476713,
|
|
"grad_norm": 0.004757678601890802,
|
|
"learning_rate": 0.0001884847225109125,
|
|
"loss": 0.0055,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.06168495135582695,
|
|
"grad_norm": 0.004153924528509378,
|
|
"learning_rate": 0.0001884431511120349,
|
|
"loss": 0.0047,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.06189194783688677,
|
|
"grad_norm": 0.004693943541496992,
|
|
"learning_rate": 0.00018840157971315735,
|
|
"loss": 0.006,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.062098944317946596,
|
|
"grad_norm": 0.005683131981641054,
|
|
"learning_rate": 0.00018836000831427979,
|
|
"loss": 0.0054,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.062305940799006414,
|
|
"grad_norm": 0.01867171749472618,
|
|
"learning_rate": 0.0001883184369154022,
|
|
"loss": 0.0047,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.06251293728006624,
|
|
"grad_norm": 0.010631178505718708,
|
|
"learning_rate": 0.00018827686551652463,
|
|
"loss": 0.0052,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.06271993376112606,
|
|
"grad_norm": 0.0025544106028974056,
|
|
"learning_rate": 0.00018823529411764707,
|
|
"loss": 0.0044,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.06292693024218589,
|
|
"grad_norm": 0.005773225799202919,
|
|
"learning_rate": 0.0001881937227187695,
|
|
"loss": 0.0047,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.0631339267232457,
|
|
"grad_norm": 0.008623667992651463,
|
|
"learning_rate": 0.00018815215131989192,
|
|
"loss": 0.0041,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.06334092320430552,
|
|
"grad_norm": 0.0023567613679915667,
|
|
"learning_rate": 0.00018811057992101435,
|
|
"loss": 0.0044,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.06354791968536534,
|
|
"grad_norm": 0.005568039603531361,
|
|
"learning_rate": 0.0001880690085221368,
|
|
"loss": 0.0048,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.06375491616642517,
|
|
"grad_norm": 0.006429716479033232,
|
|
"learning_rate": 0.0001880274371232592,
|
|
"loss": 0.0049,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.06396191264748499,
|
|
"grad_norm": 0.0012258924543857574,
|
|
"learning_rate": 0.00018798586572438164,
|
|
"loss": 0.0046,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.06416890912854481,
|
|
"grad_norm": 0.013900945894420147,
|
|
"learning_rate": 0.00018794429432550407,
|
|
"loss": 0.0054,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.06437590560960464,
|
|
"grad_norm": 0.0164532121270895,
|
|
"learning_rate": 0.00018790272292662648,
|
|
"loss": 0.0052,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.06458290209066446,
|
|
"grad_norm": 0.0039049319457262754,
|
|
"learning_rate": 0.00018786115152774892,
|
|
"loss": 0.0047,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.06478989857172428,
|
|
"grad_norm": 0.0025435080751776695,
|
|
"learning_rate": 0.00018781958012887136,
|
|
"loss": 0.0045,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.06499689505278411,
|
|
"grad_norm": 0.0066621373407542706,
|
|
"learning_rate": 0.00018777800872999377,
|
|
"loss": 0.0057,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.06520389153384393,
|
|
"grad_norm": 0.0011938404059037566,
|
|
"learning_rate": 0.0001877364373311162,
|
|
"loss": 0.0045,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.06541088801490375,
|
|
"grad_norm": 0.005898007657378912,
|
|
"learning_rate": 0.00018769486593223864,
|
|
"loss": 0.0047,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.06561788449596356,
|
|
"grad_norm": 0.004224811680614948,
|
|
"learning_rate": 0.00018765329453336105,
|
|
"loss": 0.0059,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.0658248809770234,
|
|
"grad_norm": 0.02106441557407379,
|
|
"learning_rate": 0.00018761172313448346,
|
|
"loss": 0.0051,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.06603187745808321,
|
|
"grad_norm": 0.0010845439974218607,
|
|
"learning_rate": 0.0001875701517356059,
|
|
"loss": 0.0042,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.06623887393914303,
|
|
"grad_norm": 0.007267239037901163,
|
|
"learning_rate": 0.00018752858033672834,
|
|
"loss": 0.0048,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.06644587042020286,
|
|
"grad_norm": 0.0066713071428239346,
|
|
"learning_rate": 0.00018748700893785077,
|
|
"loss": 0.0049,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.06665286690126268,
|
|
"grad_norm": 0.007623916491866112,
|
|
"learning_rate": 0.00018744543753897318,
|
|
"loss": 0.0052,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.0668598633823225,
|
|
"grad_norm": 0.003484464716166258,
|
|
"learning_rate": 0.00018740386614009562,
|
|
"loss": 0.0045,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.06706685986338232,
|
|
"grad_norm": 0.013743946328759193,
|
|
"learning_rate": 0.00018736229474121806,
|
|
"loss": 0.0049,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.06727385634444215,
|
|
"grad_norm": 0.0030819710809737444,
|
|
"learning_rate": 0.00018732072334234047,
|
|
"loss": 0.0052,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.06748085282550197,
|
|
"grad_norm": 0.014786194078624249,
|
|
"learning_rate": 0.0001872791519434629,
|
|
"loss": 0.0058,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.06768784930656178,
|
|
"grad_norm": 0.007248689886182547,
|
|
"learning_rate": 0.00018723758054458534,
|
|
"loss": 0.0046,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.06789484578762162,
|
|
"grad_norm": 0.010181601159274578,
|
|
"learning_rate": 0.00018719600914570775,
|
|
"loss": 0.0053,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.06810184226868143,
|
|
"grad_norm": 0.004160667769610882,
|
|
"learning_rate": 0.0001871544377468302,
|
|
"loss": 0.0056,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.06830883874974125,
|
|
"grad_norm": 0.0021253142040222883,
|
|
"learning_rate": 0.00018711286634795263,
|
|
"loss": 0.0045,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.06851583523080107,
|
|
"grad_norm": 0.0058175004087388515,
|
|
"learning_rate": 0.00018707129494907504,
|
|
"loss": 0.0058,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.0687228317118609,
|
|
"grad_norm": 0.005257429089397192,
|
|
"learning_rate": 0.00018702972355019747,
|
|
"loss": 0.0049,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.06892982819292072,
|
|
"grad_norm": 0.003563474863767624,
|
|
"learning_rate": 0.0001869881521513199,
|
|
"loss": 0.0043,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.06913682467398054,
|
|
"grad_norm": 0.002636804012581706,
|
|
"learning_rate": 0.00018694658075244235,
|
|
"loss": 0.0046,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.06934382115504037,
|
|
"grad_norm": 0.009255892597138882,
|
|
"learning_rate": 0.00018690500935356476,
|
|
"loss": 0.0053,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.06955081763610019,
|
|
"grad_norm": 0.0023986424785107374,
|
|
"learning_rate": 0.0001868634379546872,
|
|
"loss": 0.0045,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.06975781411716,
|
|
"grad_norm": 0.006371774710714817,
|
|
"learning_rate": 0.00018682186655580963,
|
|
"loss": 0.0046,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.06996481059821984,
|
|
"grad_norm": 0.009518152102828026,
|
|
"learning_rate": 0.00018678029515693204,
|
|
"loss": 0.0049,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.07017180707927965,
|
|
"grad_norm": 0.003742037108168006,
|
|
"learning_rate": 0.00018673872375805445,
|
|
"loss": 0.0064,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.07037880356033947,
|
|
"grad_norm": 0.009771923534572124,
|
|
"learning_rate": 0.0001866971523591769,
|
|
"loss": 0.0045,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.07058580004139929,
|
|
"grad_norm": 0.01101437397301197,
|
|
"learning_rate": 0.00018665558096029933,
|
|
"loss": 0.0055,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.07079279652245912,
|
|
"grad_norm": 0.008826150558888912,
|
|
"learning_rate": 0.00018661400956142174,
|
|
"loss": 0.005,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.07099979300351894,
|
|
"grad_norm": 0.006243105512112379,
|
|
"learning_rate": 0.00018657243816254417,
|
|
"loss": 0.0048,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.07120678948457876,
|
|
"grad_norm": 0.0014233957044780254,
|
|
"learning_rate": 0.0001865308667636666,
|
|
"loss": 0.0045,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.07141378596563859,
|
|
"grad_norm": 0.002639338606968522,
|
|
"learning_rate": 0.00018648929536478902,
|
|
"loss": 0.0053,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.07162078244669841,
|
|
"grad_norm": 0.003536937525495887,
|
|
"learning_rate": 0.00018644772396591146,
|
|
"loss": 0.0048,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.07182777892775823,
|
|
"grad_norm": 0.0018274744506925344,
|
|
"learning_rate": 0.0001864061525670339,
|
|
"loss": 0.0043,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.07203477540881804,
|
|
"grad_norm": 0.004306804854422808,
|
|
"learning_rate": 0.0001863645811681563,
|
|
"loss": 0.0058,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.07224177188987788,
|
|
"grad_norm": 0.003877132898196578,
|
|
"learning_rate": 0.00018632300976927874,
|
|
"loss": 0.0058,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.0724487683709377,
|
|
"grad_norm": 0.0018924670293927193,
|
|
"learning_rate": 0.00018628143837040118,
|
|
"loss": 0.0044,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.07265576485199751,
|
|
"grad_norm": 0.005626944359391928,
|
|
"learning_rate": 0.00018623986697152361,
|
|
"loss": 0.0046,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.07286276133305734,
|
|
"grad_norm": 0.006948824506253004,
|
|
"learning_rate": 0.00018619829557264602,
|
|
"loss": 0.0057,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.07306975781411716,
|
|
"grad_norm": 0.002097270218655467,
|
|
"learning_rate": 0.00018615672417376846,
|
|
"loss": 0.0047,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.07327675429517698,
|
|
"grad_norm": 0.0013399182353168726,
|
|
"learning_rate": 0.0001861151527748909,
|
|
"loss": 0.0047,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.0734837507762368,
|
|
"grad_norm": 0.010953530669212341,
|
|
"learning_rate": 0.0001860735813760133,
|
|
"loss": 0.0061,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.07369074725729663,
|
|
"grad_norm": 0.0024627153761684895,
|
|
"learning_rate": 0.00018603200997713575,
|
|
"loss": 0.0061,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.07389774373835645,
|
|
"grad_norm": 0.002271963283419609,
|
|
"learning_rate": 0.00018599043857825818,
|
|
"loss": 0.0043,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.07410474021941627,
|
|
"grad_norm": 0.0036786317359656096,
|
|
"learning_rate": 0.0001859488671793806,
|
|
"loss": 0.0044,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.0743117367004761,
|
|
"grad_norm": 0.006179209798574448,
|
|
"learning_rate": 0.00018590729578050303,
|
|
"loss": 0.0049,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.07451873318153591,
|
|
"grad_norm": 0.0028029060922563076,
|
|
"learning_rate": 0.00018586572438162547,
|
|
"loss": 0.0051,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.07472572966259573,
|
|
"grad_norm": 0.003495444543659687,
|
|
"learning_rate": 0.00018582415298274788,
|
|
"loss": 0.0055,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.07493272614365556,
|
|
"grad_norm": 0.005056194495409727,
|
|
"learning_rate": 0.0001857825815838703,
|
|
"loss": 0.0052,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.07513972262471538,
|
|
"grad_norm": 0.005216763820499182,
|
|
"learning_rate": 0.00018574101018499272,
|
|
"loss": 0.0064,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.0753467191057752,
|
|
"grad_norm": 0.006161578465253115,
|
|
"learning_rate": 0.00018569943878611516,
|
|
"loss": 0.0051,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.07555371558683502,
|
|
"grad_norm": 0.0022837778087705374,
|
|
"learning_rate": 0.00018565786738723757,
|
|
"loss": 0.0046,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.07576071206789485,
|
|
"grad_norm": 0.0017736790468916297,
|
|
"learning_rate": 0.00018561629598836,
|
|
"loss": 0.0043,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.07596770854895467,
|
|
"grad_norm": 0.004538076464086771,
|
|
"learning_rate": 0.00018557472458948245,
|
|
"loss": 0.0048,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.07617470503001449,
|
|
"grad_norm": 0.0018419534899294376,
|
|
"learning_rate": 0.00018553315319060488,
|
|
"loss": 0.0043,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.07638170151107432,
|
|
"grad_norm": 0.004529993515461683,
|
|
"learning_rate": 0.0001854915817917273,
|
|
"loss": 0.0044,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.07658869799213414,
|
|
"grad_norm": 0.0072746858932077885,
|
|
"learning_rate": 0.00018545001039284973,
|
|
"loss": 0.0045,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.07679569447319395,
|
|
"grad_norm": 0.013997476547956467,
|
|
"learning_rate": 0.00018540843899397217,
|
|
"loss": 0.0048,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.07700269095425377,
|
|
"grad_norm": 0.0026135060470551252,
|
|
"learning_rate": 0.00018536686759509458,
|
|
"loss": 0.0043,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.0772096874353136,
|
|
"grad_norm": 0.008647504262626171,
|
|
"learning_rate": 0.000185325296196217,
|
|
"loss": 0.0043,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.07741668391637342,
|
|
"grad_norm": 0.0028080667834728956,
|
|
"learning_rate": 0.00018528372479733945,
|
|
"loss": 0.0045,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.07762368039743324,
|
|
"grad_norm": 0.00593935651704669,
|
|
"learning_rate": 0.00018524215339846186,
|
|
"loss": 0.0043,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.07783067687849307,
|
|
"grad_norm": 0.005520394071936607,
|
|
"learning_rate": 0.0001852005819995843,
|
|
"loss": 0.0044,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.07803767335955289,
|
|
"grad_norm": 0.004040780942887068,
|
|
"learning_rate": 0.00018515901060070673,
|
|
"loss": 0.0044,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.07824466984061271,
|
|
"grad_norm": 0.006418270990252495,
|
|
"learning_rate": 0.00018511743920182914,
|
|
"loss": 0.0044,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.07845166632167253,
|
|
"grad_norm": 0.01350860670208931,
|
|
"learning_rate": 0.00018507586780295158,
|
|
"loss": 0.0044,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.07865866280273236,
|
|
"grad_norm": 0.011060641147196293,
|
|
"learning_rate": 0.00018503429640407402,
|
|
"loss": 0.004,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.07886565928379217,
|
|
"grad_norm": 0.004036550410091877,
|
|
"learning_rate": 0.00018499272500519646,
|
|
"loss": 0.0038,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.07907265576485199,
|
|
"grad_norm": 0.003959359601140022,
|
|
"learning_rate": 0.00018495115360631887,
|
|
"loss": 0.0038,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.07927965224591182,
|
|
"grad_norm": 0.005686459131538868,
|
|
"learning_rate": 0.00018490958220744128,
|
|
"loss": 0.0047,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.07948664872697164,
|
|
"grad_norm": 0.007630357053130865,
|
|
"learning_rate": 0.0001848680108085637,
|
|
"loss": 0.0042,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.07969364520803146,
|
|
"grad_norm": 0.005852843634784222,
|
|
"learning_rate": 0.00018482643940968615,
|
|
"loss": 0.0036,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.07990064168909129,
|
|
"grad_norm": 0.005719276610761881,
|
|
"learning_rate": 0.00018478486801080856,
|
|
"loss": 0.0033,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.08010763817015111,
|
|
"grad_norm": 0.007002281956374645,
|
|
"learning_rate": 0.000184743296611931,
|
|
"loss": 0.0033,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.08031463465121093,
|
|
"grad_norm": 0.007220590952783823,
|
|
"learning_rate": 0.00018470172521305343,
|
|
"loss": 0.0046,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.08052163113227075,
|
|
"grad_norm": 0.005552125629037619,
|
|
"learning_rate": 0.00018466015381417584,
|
|
"loss": 0.0028,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.08072862761333058,
|
|
"grad_norm": 0.011994168162345886,
|
|
"learning_rate": 0.00018461858241529828,
|
|
"loss": 0.0028,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.0809356240943904,
|
|
"grad_norm": 0.011316240765154362,
|
|
"learning_rate": 0.00018457701101642072,
|
|
"loss": 0.0026,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.08114262057545021,
|
|
"grad_norm": 0.006591492332518101,
|
|
"learning_rate": 0.00018453543961754313,
|
|
"loss": 0.0022,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.08134961705651005,
|
|
"grad_norm": 0.007715560495853424,
|
|
"learning_rate": 0.00018449386821866556,
|
|
"loss": 0.0032,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.08155661353756986,
|
|
"grad_norm": 0.009517872706055641,
|
|
"learning_rate": 0.000184452296819788,
|
|
"loss": 0.0016,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.08176361001862968,
|
|
"grad_norm": 0.79290372133255,
|
|
"learning_rate": 0.0001844107254209104,
|
|
"loss": 0.0117,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.0819706064996895,
|
|
"grad_norm": 0.10478183627128601,
|
|
"learning_rate": 0.00018436915402203285,
|
|
"loss": 0.0041,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.08217760298074933,
|
|
"grad_norm": 0.11330251395702362,
|
|
"learning_rate": 0.00018432758262315529,
|
|
"loss": 0.0349,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.08238459946180915,
|
|
"grad_norm": 0.022089608013629913,
|
|
"learning_rate": 0.00018428601122427772,
|
|
"loss": 0.0028,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.08259159594286897,
|
|
"grad_norm": 0.045795392245054245,
|
|
"learning_rate": 0.00018424443982540013,
|
|
"loss": 0.0027,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.0827985924239288,
|
|
"grad_norm": 0.052710726857185364,
|
|
"learning_rate": 0.00018420286842652257,
|
|
"loss": 0.0033,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.08300558890498862,
|
|
"grad_norm": 0.0732388123869896,
|
|
"learning_rate": 0.000184161297027645,
|
|
"loss": 0.0031,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.08321258538604843,
|
|
"grad_norm": 0.03560757264494896,
|
|
"learning_rate": 0.00018411972562876742,
|
|
"loss": 0.0025,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.08341958186710825,
|
|
"grad_norm": 0.039032500237226486,
|
|
"learning_rate": 0.00018407815422988985,
|
|
"loss": 0.0027,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.08362657834816808,
|
|
"grad_norm": 0.018673432990908623,
|
|
"learning_rate": 0.0001840365828310123,
|
|
"loss": 0.0018,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.0838335748292279,
|
|
"grad_norm": 0.06421470642089844,
|
|
"learning_rate": 0.0001839950114321347,
|
|
"loss": 0.0293,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.08404057131028772,
|
|
"grad_norm": 0.05356355383992195,
|
|
"learning_rate": 0.0001839534400332571,
|
|
"loss": 0.0234,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.08424756779134755,
|
|
"grad_norm": 0.047022175043821335,
|
|
"learning_rate": 0.00018391186863437955,
|
|
"loss": 0.0198,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.08445456427240737,
|
|
"grad_norm": 0.017866840586066246,
|
|
"learning_rate": 0.00018387029723550199,
|
|
"loss": 0.0017,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.08466156075346719,
|
|
"grad_norm": 0.010899499990046024,
|
|
"learning_rate": 0.0001838287258366244,
|
|
"loss": 0.0013,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.084868557234527,
|
|
"grad_norm": 0.0167918112128973,
|
|
"learning_rate": 0.00018378715443774683,
|
|
"loss": 0.0024,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.08507555371558684,
|
|
"grad_norm": 0.02067534811794758,
|
|
"learning_rate": 0.00018374558303886927,
|
|
"loss": 0.0015,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.08528255019664666,
|
|
"grad_norm": 0.01670040749013424,
|
|
"learning_rate": 0.00018370401163999168,
|
|
"loss": 0.0015,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.08548954667770647,
|
|
"grad_norm": 0.008924894034862518,
|
|
"learning_rate": 0.00018366244024111412,
|
|
"loss": 0.0029,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.0856965431587663,
|
|
"grad_norm": 0.013602840714156628,
|
|
"learning_rate": 0.00018362086884223655,
|
|
"loss": 0.0009,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.08590353963982612,
|
|
"grad_norm": 0.013082594610750675,
|
|
"learning_rate": 0.000183579297443359,
|
|
"loss": 0.002,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.08611053612088594,
|
|
"grad_norm": 0.012215960770845413,
|
|
"learning_rate": 0.0001835377260444814,
|
|
"loss": 0.0017,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.08631753260194577,
|
|
"grad_norm": 0.16738812625408173,
|
|
"learning_rate": 0.00018349615464560384,
|
|
"loss": 0.0173,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.08652452908300559,
|
|
"grad_norm": 0.006629611365497112,
|
|
"learning_rate": 0.00018345458324672627,
|
|
"loss": 0.0006,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.08673152556406541,
|
|
"grad_norm": 0.006643650587648153,
|
|
"learning_rate": 0.00018341301184784868,
|
|
"loss": 0.0015,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.08693852204512523,
|
|
"grad_norm": 0.0039656031876802444,
|
|
"learning_rate": 0.00018337144044897112,
|
|
"loss": 0.0003,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.08714551852618506,
|
|
"grad_norm": 0.00564931146800518,
|
|
"learning_rate": 0.00018332986905009356,
|
|
"loss": 0.0004,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.08735251500724488,
|
|
"grad_norm": 0.014363352209329605,
|
|
"learning_rate": 0.00018328829765121597,
|
|
"loss": 0.0006,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.0875595114883047,
|
|
"grad_norm": 0.006862149108201265,
|
|
"learning_rate": 0.0001832467262523384,
|
|
"loss": 0.0004,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.08776650796936453,
|
|
"grad_norm": 0.005224172957241535,
|
|
"learning_rate": 0.00018320515485346084,
|
|
"loss": 0.0003,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.08797350445042434,
|
|
"grad_norm": 0.012813829816877842,
|
|
"learning_rate": 0.00018316358345458325,
|
|
"loss": 0.0005,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.08818050093148416,
|
|
"grad_norm": 0.0045601376332342625,
|
|
"learning_rate": 0.0001831220120557057,
|
|
"loss": 0.0003,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.08838749741254398,
|
|
"grad_norm": 0.002229505218565464,
|
|
"learning_rate": 0.0001830804406568281,
|
|
"loss": 0.0002,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.08859449389360381,
|
|
"grad_norm": 0.005202361848205328,
|
|
"learning_rate": 0.00018303886925795054,
|
|
"loss": 0.0003,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.08880149037466363,
|
|
"grad_norm": 0.010837195441126823,
|
|
"learning_rate": 0.00018299729785907295,
|
|
"loss": 0.0016,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.08900848685572345,
|
|
"grad_norm": 0.006401981692761183,
|
|
"learning_rate": 0.00018295572646019538,
|
|
"loss": 0.0002,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.08921548333678328,
|
|
"grad_norm": 0.0025153057649731636,
|
|
"learning_rate": 0.00018291415506131782,
|
|
"loss": 0.0001,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.0894224798178431,
|
|
"grad_norm": 0.009693821892142296,
|
|
"learning_rate": 0.00018287258366244023,
|
|
"loss": 0.0004,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.08962947629890292,
|
|
"grad_norm": 0.0013723783195018768,
|
|
"learning_rate": 0.00018283101226356267,
|
|
"loss": 0.0001,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.08983647277996273,
|
|
"grad_norm": 0.008555575273931026,
|
|
"learning_rate": 0.0001827894408646851,
|
|
"loss": 0.0018,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.09004346926102257,
|
|
"grad_norm": 0.0028277519159018993,
|
|
"learning_rate": 0.00018274786946580754,
|
|
"loss": 0.0002,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.09025046574208238,
|
|
"grad_norm": 0.014325006864964962,
|
|
"learning_rate": 0.00018270629806692995,
|
|
"loss": 0.0009,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.0904574622231422,
|
|
"grad_norm": 0.008406553417444229,
|
|
"learning_rate": 0.0001826647266680524,
|
|
"loss": 0.0012,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.09066445870420203,
|
|
"grad_norm": 0.0018985685892403126,
|
|
"learning_rate": 0.00018262315526917483,
|
|
"loss": 0.0002,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.09087145518526185,
|
|
"grad_norm": 0.008550492115318775,
|
|
"learning_rate": 0.00018258158387029724,
|
|
"loss": 0.0024,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.09107845166632167,
|
|
"grad_norm": 0.0008987212786450982,
|
|
"learning_rate": 0.00018254001247141967,
|
|
"loss": 0.0001,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.0912854481473815,
|
|
"grad_norm": 0.002059886697679758,
|
|
"learning_rate": 0.0001824984410725421,
|
|
"loss": 0.0001,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.09149244462844132,
|
|
"grad_norm": 0.003429644973948598,
|
|
"learning_rate": 0.00018245686967366452,
|
|
"loss": 0.0001,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.09169944110950114,
|
|
"grad_norm": 0.0026945085264742374,
|
|
"learning_rate": 0.00018241529827478696,
|
|
"loss": 0.0011,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.09190643759056095,
|
|
"grad_norm": 0.00299448031000793,
|
|
"learning_rate": 0.0001823737268759094,
|
|
"loss": 0.0001,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.09211343407162079,
|
|
"grad_norm": 0.004376763943582773,
|
|
"learning_rate": 0.00018233215547703183,
|
|
"loss": 0.0011,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.0923204305526806,
|
|
"grad_norm": 0.005078963004052639,
|
|
"learning_rate": 0.00018229058407815424,
|
|
"loss": 0.0002,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.09252742703374042,
|
|
"grad_norm": 0.009306194260716438,
|
|
"learning_rate": 0.00018224901267927668,
|
|
"loss": 0.0003,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.09273442351480025,
|
|
"grad_norm": 0.0030481938738375902,
|
|
"learning_rate": 0.00018220744128039912,
|
|
"loss": 0.0013,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.09294141999586007,
|
|
"grad_norm": 0.013268841430544853,
|
|
"learning_rate": 0.0001821658698815215,
|
|
"loss": 0.0017,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.09314841647691989,
|
|
"grad_norm": 0.006063752807676792,
|
|
"learning_rate": 0.00018212429848264394,
|
|
"loss": 0.0002,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.09335541295797971,
|
|
"grad_norm": 0.00182344822678715,
|
|
"learning_rate": 0.00018208272708376637,
|
|
"loss": 0.0001,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.09356240943903954,
|
|
"grad_norm": 0.009372780099511147,
|
|
"learning_rate": 0.0001820411556848888,
|
|
"loss": 0.0005,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.09376940592009936,
|
|
"grad_norm": 0.003136920742690563,
|
|
"learning_rate": 0.00018199958428601122,
|
|
"loss": 0.0001,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.09397640240115918,
|
|
"grad_norm": 0.030062230303883553,
|
|
"learning_rate": 0.00018195801288713366,
|
|
"loss": 0.0006,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.09418339888221901,
|
|
"grad_norm": 0.004309754353016615,
|
|
"learning_rate": 0.0001819164414882561,
|
|
"loss": 0.0007,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.09439039536327883,
|
|
"grad_norm": 0.010606180876493454,
|
|
"learning_rate": 0.0001818748700893785,
|
|
"loss": 0.0002,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.09459739184433864,
|
|
"grad_norm": 0.0054748812690377235,
|
|
"learning_rate": 0.00018183329869050094,
|
|
"loss": 0.0005,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.09480438832539846,
|
|
"grad_norm": 0.001673020888119936,
|
|
"learning_rate": 0.00018179172729162338,
|
|
"loss": 0.0001,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.0950113848064583,
|
|
"grad_norm": 0.004401384387165308,
|
|
"learning_rate": 0.0001817501558927458,
|
|
"loss": 0.0001,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.09521838128751811,
|
|
"grad_norm": 0.005755012389272451,
|
|
"learning_rate": 0.00018170858449386822,
|
|
"loss": 0.0012,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.09542537776857793,
|
|
"grad_norm": 0.004951901733875275,
|
|
"learning_rate": 0.00018166701309499066,
|
|
"loss": 0.0012,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.09563237424963776,
|
|
"grad_norm": 0.0014370133867487311,
|
|
"learning_rate": 0.0001816254416961131,
|
|
"loss": 0.0001,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.09583937073069758,
|
|
"grad_norm": 0.012078122235834599,
|
|
"learning_rate": 0.0001815838702972355,
|
|
"loss": 0.0013,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.0960463672117574,
|
|
"grad_norm": 0.002765175886452198,
|
|
"learning_rate": 0.00018154229889835795,
|
|
"loss": 0.0009,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.09625336369281723,
|
|
"grad_norm": 0.0017701378092169762,
|
|
"learning_rate": 0.00018150072749948038,
|
|
"loss": 0.0001,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.09646036017387705,
|
|
"grad_norm": 0.003232579445466399,
|
|
"learning_rate": 0.0001814591561006028,
|
|
"loss": 0.0001,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.09666735665493686,
|
|
"grad_norm": 0.004619232844561338,
|
|
"learning_rate": 0.00018141758470172523,
|
|
"loss": 0.0012,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.09687435313599668,
|
|
"grad_norm": 0.008126890286803246,
|
|
"learning_rate": 0.00018137601330284767,
|
|
"loss": 0.0011,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.09708134961705651,
|
|
"grad_norm": 0.004719397984445095,
|
|
"learning_rate": 0.00018133444190397008,
|
|
"loss": 0.0009,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.09728834609811633,
|
|
"grad_norm": 0.005153202451765537,
|
|
"learning_rate": 0.00018129287050509251,
|
|
"loss": 0.0012,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.09749534257917615,
|
|
"grad_norm": 0.01385215763002634,
|
|
"learning_rate": 0.00018125129910621492,
|
|
"loss": 0.0022,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.09770233906023598,
|
|
"grad_norm": 0.004983994178473949,
|
|
"learning_rate": 0.00018120972770733736,
|
|
"loss": 0.0004,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.0979093355412958,
|
|
"grad_norm": 0.007088206708431244,
|
|
"learning_rate": 0.00018116815630845977,
|
|
"loss": 0.0005,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.09811633202235562,
|
|
"grad_norm": 0.004754175432026386,
|
|
"learning_rate": 0.0001811265849095822,
|
|
"loss": 0.0006,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.09832332850341544,
|
|
"grad_norm": 0.004105637315660715,
|
|
"learning_rate": 0.00018108501351070465,
|
|
"loss": 0.0001,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.09853032498447527,
|
|
"grad_norm": 0.002687152475118637,
|
|
"learning_rate": 0.00018104344211182706,
|
|
"loss": 0.0001,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.09873732146553509,
|
|
"grad_norm": 0.0023124783765524626,
|
|
"learning_rate": 0.0001810018707129495,
|
|
"loss": 0.0001,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.0989443179465949,
|
|
"grad_norm": 0.007885076105594635,
|
|
"learning_rate": 0.00018096029931407193,
|
|
"loss": 0.0006,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.09915131442765474,
|
|
"grad_norm": 0.0014087413437664509,
|
|
"learning_rate": 0.00018091872791519434,
|
|
"loss": 0.0007,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.09935831090871455,
|
|
"grad_norm": 0.0055119190365076065,
|
|
"learning_rate": 0.00018087715651631678,
|
|
"loss": 0.0002,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.09956530738977437,
|
|
"grad_norm": 0.0003096537839155644,
|
|
"learning_rate": 0.0001808355851174392,
|
|
"loss": 0.0,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.09977230387083419,
|
|
"grad_norm": 0.001899409806355834,
|
|
"learning_rate": 0.00018079401371856165,
|
|
"loss": 0.0002,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.09997930035189402,
|
|
"grad_norm": 0.005635194014757872,
|
|
"learning_rate": 0.00018075244231968406,
|
|
"loss": 0.0001,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.10018629683295384,
|
|
"grad_norm": 0.005856087896972895,
|
|
"learning_rate": 0.0001807108709208065,
|
|
"loss": 0.001,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.10039329331401366,
|
|
"grad_norm": 0.005273948423564434,
|
|
"learning_rate": 0.00018066929952192893,
|
|
"loss": 0.0013,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.10060028979507349,
|
|
"grad_norm": 0.001652201754041016,
|
|
"learning_rate": 0.00018062772812305134,
|
|
"loss": 0.0001,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.1008072862761333,
|
|
"grad_norm": 0.006849886849522591,
|
|
"learning_rate": 0.00018058615672417378,
|
|
"loss": 0.0001,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.10101428275719312,
|
|
"grad_norm": 0.009754250757396221,
|
|
"learning_rate": 0.00018054458532529622,
|
|
"loss": 0.0023,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.10122127923825296,
|
|
"grad_norm": 0.0038455536123365164,
|
|
"learning_rate": 0.00018050301392641863,
|
|
"loss": 0.0016,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.10142827571931277,
|
|
"grad_norm": 0.0006483698962256312,
|
|
"learning_rate": 0.00018046144252754107,
|
|
"loss": 0.0,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.10163527220037259,
|
|
"grad_norm": 0.008700639940798283,
|
|
"learning_rate": 0.0001804198711286635,
|
|
"loss": 0.0007,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.10184226868143241,
|
|
"grad_norm": 0.004151639994233847,
|
|
"learning_rate": 0.00018037829972978594,
|
|
"loss": 0.0006,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.10204926516249224,
|
|
"grad_norm": 0.003242357401177287,
|
|
"learning_rate": 0.00018033672833090832,
|
|
"loss": 0.0001,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.10225626164355206,
|
|
"grad_norm": 0.0038309101946651936,
|
|
"learning_rate": 0.00018029515693203076,
|
|
"loss": 0.0002,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.10246325812461188,
|
|
"grad_norm": 0.0032492976170033216,
|
|
"learning_rate": 0.0001802535855331532,
|
|
"loss": 0.0001,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.10267025460567171,
|
|
"grad_norm": 0.005621058400720358,
|
|
"learning_rate": 0.0001802120141342756,
|
|
"loss": 0.0002,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.10287725108673153,
|
|
"grad_norm": 0.007613383699208498,
|
|
"learning_rate": 0.00018017044273539804,
|
|
"loss": 0.0013,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.10308424756779135,
|
|
"grad_norm": 0.004469983279705048,
|
|
"learning_rate": 0.00018012887133652048,
|
|
"loss": 0.0001,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.10329124404885116,
|
|
"grad_norm": 0.010518516413867474,
|
|
"learning_rate": 0.00018008729993764292,
|
|
"loss": 0.0002,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.103498240529911,
|
|
"grad_norm": 0.0019439860479906201,
|
|
"learning_rate": 0.00018004572853876533,
|
|
"loss": 0.0001,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.10370523701097081,
|
|
"grad_norm": 0.007315011695027351,
|
|
"learning_rate": 0.00018000415713988776,
|
|
"loss": 0.0002,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.10391223349203063,
|
|
"grad_norm": 0.0027510204818099737,
|
|
"learning_rate": 0.0001799625857410102,
|
|
"loss": 0.0002,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.10411922997309046,
|
|
"grad_norm": 0.007963057607412338,
|
|
"learning_rate": 0.0001799210143421326,
|
|
"loss": 0.0003,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.10432622645415028,
|
|
"grad_norm": 0.004816776607185602,
|
|
"learning_rate": 0.00017987944294325505,
|
|
"loss": 0.001,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.1045332229352101,
|
|
"grad_norm": 0.0010107432026416063,
|
|
"learning_rate": 0.00017983787154437749,
|
|
"loss": 0.0,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.10474021941626992,
|
|
"grad_norm": 0.002849761163815856,
|
|
"learning_rate": 0.0001797963001454999,
|
|
"loss": 0.0001,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.10494721589732975,
|
|
"grad_norm": 0.008465790189802647,
|
|
"learning_rate": 0.00017975472874662233,
|
|
"loss": 0.0017,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.10515421237838957,
|
|
"grad_norm": 0.001012888620607555,
|
|
"learning_rate": 0.00017971315734774477,
|
|
"loss": 0.0,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.10536120885944938,
|
|
"grad_norm": 0.005154603160917759,
|
|
"learning_rate": 0.0001796715859488672,
|
|
"loss": 0.0001,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.10556820534050922,
|
|
"grad_norm": 0.012283824384212494,
|
|
"learning_rate": 0.00017963001454998962,
|
|
"loss": 0.0004,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.10577520182156903,
|
|
"grad_norm": 0.001696570310741663,
|
|
"learning_rate": 0.00017958844315111205,
|
|
"loss": 0.0001,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.10598219830262885,
|
|
"grad_norm": 0.0002515624219086021,
|
|
"learning_rate": 0.0001795468717522345,
|
|
"loss": 0.0,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.10618919478368868,
|
|
"grad_norm": 0.0006335057551041245,
|
|
"learning_rate": 0.0001795053003533569,
|
|
"loss": 0.0,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.1063961912647485,
|
|
"grad_norm": 0.0007280270801857114,
|
|
"learning_rate": 0.00017946372895447934,
|
|
"loss": 0.0,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.10660318774580832,
|
|
"grad_norm": 0.0040188622660934925,
|
|
"learning_rate": 0.00017942215755560175,
|
|
"loss": 0.0002,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.10681018422686814,
|
|
"grad_norm": 0.0054796175099909306,
|
|
"learning_rate": 0.00017938058615672419,
|
|
"loss": 0.0009,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.10701718070792797,
|
|
"grad_norm": 0.0034792469814419746,
|
|
"learning_rate": 0.0001793390147578466,
|
|
"loss": 0.0001,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.10722417718898779,
|
|
"grad_norm": 0.00432013813406229,
|
|
"learning_rate": 0.00017929744335896903,
|
|
"loss": 0.0002,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.1074311736700476,
|
|
"grad_norm": 0.004862105939537287,
|
|
"learning_rate": 0.00017925587196009147,
|
|
"loss": 0.001,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.10763817015110744,
|
|
"grad_norm": 0.002249139128252864,
|
|
"learning_rate": 0.00017921430056121388,
|
|
"loss": 0.0,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.10784516663216726,
|
|
"grad_norm": 0.006818681955337524,
|
|
"learning_rate": 0.00017917272916233632,
|
|
"loss": 0.0004,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.10805216311322707,
|
|
"grad_norm": 0.008624670095741749,
|
|
"learning_rate": 0.00017913115776345875,
|
|
"loss": 0.0004,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.10825915959428689,
|
|
"grad_norm": 0.0007210278417915106,
|
|
"learning_rate": 0.00017908958636458116,
|
|
"loss": 0.0,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.10846615607534672,
|
|
"grad_norm": 0.010820691473782063,
|
|
"learning_rate": 0.0001790480149657036,
|
|
"loss": 0.0003,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.10867315255640654,
|
|
"grad_norm": 0.0023018312640488148,
|
|
"learning_rate": 0.00017900644356682604,
|
|
"loss": 0.0001,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.10888014903746636,
|
|
"grad_norm": 0.00019024198991246521,
|
|
"learning_rate": 0.00017896487216794845,
|
|
"loss": 0.0,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.10908714551852619,
|
|
"grad_norm": 0.005011410918086767,
|
|
"learning_rate": 0.00017892330076907088,
|
|
"loss": 0.0003,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.10929414199958601,
|
|
"grad_norm": 0.007016469724476337,
|
|
"learning_rate": 0.00017888172937019332,
|
|
"loss": 0.0009,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.10950113848064583,
|
|
"grad_norm": 0.008118787780404091,
|
|
"learning_rate": 0.00017884015797131576,
|
|
"loss": 0.0003,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.10970813496170564,
|
|
"grad_norm": 0.005854643415659666,
|
|
"learning_rate": 0.00017879858657243817,
|
|
"loss": 0.0011,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.10991513144276548,
|
|
"grad_norm": 0.0038967933505773544,
|
|
"learning_rate": 0.0001787570151735606,
|
|
"loss": 0.001,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.1101221279238253,
|
|
"grad_norm": 0.003802061313763261,
|
|
"learning_rate": 0.00017871544377468304,
|
|
"loss": 0.0018,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.11032912440488511,
|
|
"grad_norm": 0.004740913398563862,
|
|
"learning_rate": 0.00017867387237580545,
|
|
"loss": 0.0001,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.11053612088594494,
|
|
"grad_norm": 0.008046228438615799,
|
|
"learning_rate": 0.0001786323009769279,
|
|
"loss": 0.0005,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.11074311736700476,
|
|
"grad_norm": 0.0014560514828190207,
|
|
"learning_rate": 0.00017859072957805033,
|
|
"loss": 0.0,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.11095011384806458,
|
|
"grad_norm": 0.009222283028066158,
|
|
"learning_rate": 0.00017854915817917274,
|
|
"loss": 0.0002,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.11115711032912441,
|
|
"grad_norm": 0.005452610552310944,
|
|
"learning_rate": 0.00017850758678029515,
|
|
"loss": 0.0013,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.11136410681018423,
|
|
"grad_norm": 0.0007964425021782517,
|
|
"learning_rate": 0.00017846601538141758,
|
|
"loss": 0.0,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.11157110329124405,
|
|
"grad_norm": 0.007167865987867117,
|
|
"learning_rate": 0.00017842444398254002,
|
|
"loss": 0.0011,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.11177809977230387,
|
|
"grad_norm": 0.01039041206240654,
|
|
"learning_rate": 0.00017838287258366243,
|
|
"loss": 0.0007,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.1119850962533637,
|
|
"grad_norm": 0.0001594142959220335,
|
|
"learning_rate": 0.00017834130118478487,
|
|
"loss": 0.0,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.11219209273442352,
|
|
"grad_norm": 0.0023100003600120544,
|
|
"learning_rate": 0.0001782997297859073,
|
|
"loss": 0.0007,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.11239908921548333,
|
|
"grad_norm": 0.003656044602394104,
|
|
"learning_rate": 0.00017825815838702972,
|
|
"loss": 0.0014,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.11260608569654317,
|
|
"grad_norm": 0.0014201959129422903,
|
|
"learning_rate": 0.00017821658698815215,
|
|
"loss": 0.0001,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.11281308217760298,
|
|
"grad_norm": 0.015617001801729202,
|
|
"learning_rate": 0.0001781750155892746,
|
|
"loss": 0.0002,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.1130200786586628,
|
|
"grad_norm": 0.008762934245169163,
|
|
"learning_rate": 0.00017813344419039703,
|
|
"loss": 0.0003,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.11322707513972262,
|
|
"grad_norm": 0.002274678787216544,
|
|
"learning_rate": 0.00017809187279151944,
|
|
"loss": 0.0001,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.11343407162078245,
|
|
"grad_norm": 0.003864066442474723,
|
|
"learning_rate": 0.00017805030139264187,
|
|
"loss": 0.0005,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.11364106810184227,
|
|
"grad_norm": 0.006032771430909634,
|
|
"learning_rate": 0.0001780087299937643,
|
|
"loss": 0.0003,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.11384806458290209,
|
|
"grad_norm": 0.0011935516959056258,
|
|
"learning_rate": 0.00017796715859488672,
|
|
"loss": 0.0,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.11405506106396192,
|
|
"grad_norm": 0.013128140941262245,
|
|
"learning_rate": 0.00017792558719600916,
|
|
"loss": 0.0006,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.11426205754502174,
|
|
"grad_norm": 0.000645163469016552,
|
|
"learning_rate": 0.0001778840157971316,
|
|
"loss": 0.0,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.11446905402608155,
|
|
"grad_norm": 0.011457535438239574,
|
|
"learning_rate": 0.000177842444398254,
|
|
"loss": 0.0003,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.11467605050714137,
|
|
"grad_norm": 0.003057427005842328,
|
|
"learning_rate": 0.00017780087299937644,
|
|
"loss": 0.0012,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.1148830469882012,
|
|
"grad_norm": 0.0017485780408605933,
|
|
"learning_rate": 0.00017775930160049888,
|
|
"loss": 0.0006,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.11509004346926102,
|
|
"grad_norm": 0.0005046813748776913,
|
|
"learning_rate": 0.0001777177302016213,
|
|
"loss": 0.0,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.11529703995032084,
|
|
"grad_norm": 0.007802332751452923,
|
|
"learning_rate": 0.00017767615880274373,
|
|
"loss": 0.0009,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.11550403643138067,
|
|
"grad_norm": 0.0074394443072378635,
|
|
"learning_rate": 0.00017763458740386616,
|
|
"loss": 0.0021,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.11571103291244049,
|
|
"grad_norm": 0.007590603083372116,
|
|
"learning_rate": 0.00017759301600498857,
|
|
"loss": 0.0004,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.11591802939350031,
|
|
"grad_norm": 0.005805825348943472,
|
|
"learning_rate": 0.00017755144460611098,
|
|
"loss": 0.0019,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.11612502587456014,
|
|
"grad_norm": 0.008781611919403076,
|
|
"learning_rate": 0.00017750987320723342,
|
|
"loss": 0.0017,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.11633202235561996,
|
|
"grad_norm": 0.0010139975929632783,
|
|
"learning_rate": 0.00017746830180835586,
|
|
"loss": 0.0,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.11653901883667978,
|
|
"grad_norm": 0.003940982278436422,
|
|
"learning_rate": 0.0001774267304094783,
|
|
"loss": 0.0001,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.1167460153177396,
|
|
"grad_norm": 0.013145407661795616,
|
|
"learning_rate": 0.0001773851590106007,
|
|
"loss": 0.0012,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.11695301179879942,
|
|
"grad_norm": 0.0011159079149365425,
|
|
"learning_rate": 0.00017734358761172314,
|
|
"loss": 0.0001,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.11716000827985924,
|
|
"grad_norm": 0.01270979829132557,
|
|
"learning_rate": 0.00017730201621284558,
|
|
"loss": 0.0017,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.11736700476091906,
|
|
"grad_norm": 0.004431411158293486,
|
|
"learning_rate": 0.000177260444813968,
|
|
"loss": 0.0001,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.11757400124197889,
|
|
"grad_norm": 0.0041249035857617855,
|
|
"learning_rate": 0.00017721887341509042,
|
|
"loss": 0.0003,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.11778099772303871,
|
|
"grad_norm": 0.0006257002823986113,
|
|
"learning_rate": 0.00017717730201621286,
|
|
"loss": 0.0,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.11798799420409853,
|
|
"grad_norm": 0.004501596093177795,
|
|
"learning_rate": 0.00017713573061733527,
|
|
"loss": 0.0001,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.11819499068515835,
|
|
"grad_norm": 0.00742512010037899,
|
|
"learning_rate": 0.0001770941592184577,
|
|
"loss": 0.0002,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.11840198716621818,
|
|
"grad_norm": 0.010421551764011383,
|
|
"learning_rate": 0.00017705258781958015,
|
|
"loss": 0.0008,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.118608983647278,
|
|
"grad_norm": 0.0010451058624312282,
|
|
"learning_rate": 0.00017701101642070256,
|
|
"loss": 0.0001,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.11881598012833781,
|
|
"grad_norm": 0.0014272347325459123,
|
|
"learning_rate": 0.000176969445021825,
|
|
"loss": 0.0001,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.11902297660939765,
|
|
"grad_norm": 0.007021667901426554,
|
|
"learning_rate": 0.00017692787362294743,
|
|
"loss": 0.0002,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.11922997309045746,
|
|
"grad_norm": 0.0050498368218541145,
|
|
"learning_rate": 0.00017688630222406987,
|
|
"loss": 0.001,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.11943696957151728,
|
|
"grad_norm": 0.0004365240456536412,
|
|
"learning_rate": 0.00017684473082519228,
|
|
"loss": 0.0,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.1196439660525771,
|
|
"grad_norm": 0.007502545602619648,
|
|
"learning_rate": 0.00017680315942631471,
|
|
"loss": 0.0002,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.11985096253363693,
|
|
"grad_norm": 0.00824455451220274,
|
|
"learning_rate": 0.00017676158802743715,
|
|
"loss": 0.0004,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.12005795901469675,
|
|
"grad_norm": 0.003414528677240014,
|
|
"learning_rate": 0.00017672001662855956,
|
|
"loss": 0.0011,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.12026495549575657,
|
|
"grad_norm": 0.004608092829585075,
|
|
"learning_rate": 0.00017667844522968197,
|
|
"loss": 0.0014,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.1204719519768164,
|
|
"grad_norm": 0.006573988124728203,
|
|
"learning_rate": 0.0001766368738308044,
|
|
"loss": 0.0002,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.12067894845787622,
|
|
"grad_norm": 0.006878604646772146,
|
|
"learning_rate": 0.00017659530243192685,
|
|
"loss": 0.0005,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.12088594493893604,
|
|
"grad_norm": 0.0013765916228294373,
|
|
"learning_rate": 0.00017655373103304926,
|
|
"loss": 0.0001,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.12109294141999585,
|
|
"grad_norm": 0.009517376311123371,
|
|
"learning_rate": 0.0001765121596341717,
|
|
"loss": 0.0005,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.12129993790105568,
|
|
"grad_norm": 0.02729586698114872,
|
|
"learning_rate": 0.00017647058823529413,
|
|
"loss": 0.0013,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.1215069343821155,
|
|
"grad_norm": 0.005033944733440876,
|
|
"learning_rate": 0.00017642901683641654,
|
|
"loss": 0.0011,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.12171393086317532,
|
|
"grad_norm": 0.001488934038206935,
|
|
"learning_rate": 0.00017638744543753898,
|
|
"loss": 0.0001,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.12192092734423515,
|
|
"grad_norm": 0.004233523737639189,
|
|
"learning_rate": 0.0001763458740386614,
|
|
"loss": 0.0013,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.12212792382529497,
|
|
"grad_norm": 0.001819688593968749,
|
|
"learning_rate": 0.00017630430263978382,
|
|
"loss": 0.0002,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.12233492030635479,
|
|
"grad_norm": 0.0051133958622813225,
|
|
"learning_rate": 0.00017626273124090626,
|
|
"loss": 0.0003,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.12254191678741462,
|
|
"grad_norm": 0.007632863707840443,
|
|
"learning_rate": 0.0001762211598420287,
|
|
"loss": 0.0005,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.12274891326847444,
|
|
"grad_norm": 0.0009289845474995673,
|
|
"learning_rate": 0.00017617958844315113,
|
|
"loss": 0.0,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.12295590974953426,
|
|
"grad_norm": 0.00543027650564909,
|
|
"learning_rate": 0.00017613801704427354,
|
|
"loss": 0.0001,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.12316290623059407,
|
|
"grad_norm": 0.002607417991384864,
|
|
"learning_rate": 0.00017609644564539598,
|
|
"loss": 0.0002,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.1233699027116539,
|
|
"grad_norm": 0.025557972490787506,
|
|
"learning_rate": 0.00017605487424651842,
|
|
"loss": 0.0002,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.12357689919271372,
|
|
"grad_norm": 0.0016189507441595197,
|
|
"learning_rate": 0.00017601330284764083,
|
|
"loss": 0.0001,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.12378389567377354,
|
|
"grad_norm": 0.004612909164279699,
|
|
"learning_rate": 0.00017597173144876327,
|
|
"loss": 0.0002,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.12399089215483337,
|
|
"grad_norm": 0.00019464526849333197,
|
|
"learning_rate": 0.0001759301600498857,
|
|
"loss": 0.0,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.12419788863589319,
|
|
"grad_norm": 0.0013309603091329336,
|
|
"learning_rate": 0.0001758885886510081,
|
|
"loss": 0.0007,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.12440488511695301,
|
|
"grad_norm": 0.002917417325079441,
|
|
"learning_rate": 0.00017584701725213055,
|
|
"loss": 0.0001,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.12461188159801283,
|
|
"grad_norm": 0.004730269778519869,
|
|
"learning_rate": 0.000175805445853253,
|
|
"loss": 0.0001,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.12481887807907266,
|
|
"grad_norm": 0.0036635478027164936,
|
|
"learning_rate": 0.0001757638744543754,
|
|
"loss": 0.0001,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.12502587456013248,
|
|
"grad_norm": 0.002084661042317748,
|
|
"learning_rate": 0.0001757223030554978,
|
|
"loss": 0.0001,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.1252328710411923,
|
|
"grad_norm": 0.006881284527480602,
|
|
"learning_rate": 0.00017568073165662024,
|
|
"loss": 0.0002,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.1254398675222521,
|
|
"grad_norm": 0.0007496286416426301,
|
|
"learning_rate": 0.00017563916025774268,
|
|
"loss": 0.0,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.12564686400331193,
|
|
"grad_norm": 0.0013991744490340352,
|
|
"learning_rate": 0.0001755975888588651,
|
|
"loss": 0.0,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.12585386048437178,
|
|
"grad_norm": 0.00578208127990365,
|
|
"learning_rate": 0.00017555601745998753,
|
|
"loss": 0.0016,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.1260608569654316,
|
|
"grad_norm": 0.0005476415390148759,
|
|
"learning_rate": 0.00017551444606110996,
|
|
"loss": 0.0,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.1262678534464914,
|
|
"grad_norm": 0.003824407234787941,
|
|
"learning_rate": 0.0001754728746622324,
|
|
"loss": 0.0001,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.12647484992755123,
|
|
"grad_norm": 0.0068860347382724285,
|
|
"learning_rate": 0.0001754313032633548,
|
|
"loss": 0.0003,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.12668184640861105,
|
|
"grad_norm": 0.001763600972481072,
|
|
"learning_rate": 0.00017538973186447725,
|
|
"loss": 0.0002,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.12688884288967087,
|
|
"grad_norm": 0.0029042328242212534,
|
|
"learning_rate": 0.00017534816046559969,
|
|
"loss": 0.0019,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.12709583937073068,
|
|
"grad_norm": 0.026835285127162933,
|
|
"learning_rate": 0.0001753065890667221,
|
|
"loss": 0.0006,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.12730283585179053,
|
|
"grad_norm": 0.0025784820318222046,
|
|
"learning_rate": 0.00017526501766784453,
|
|
"loss": 0.0002,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.12750983233285035,
|
|
"grad_norm": 0.000811999780125916,
|
|
"learning_rate": 0.00017522344626896697,
|
|
"loss": 0.0,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.12771682881391017,
|
|
"grad_norm": 0.0023158304393291473,
|
|
"learning_rate": 0.00017518187487008938,
|
|
"loss": 0.0001,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.12792382529496998,
|
|
"grad_norm": 0.00527742225676775,
|
|
"learning_rate": 0.00017514030347121182,
|
|
"loss": 0.0011,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.1281308217760298,
|
|
"grad_norm": 0.004715193063020706,
|
|
"learning_rate": 0.00017509873207233425,
|
|
"loss": 0.0026,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.12833781825708962,
|
|
"grad_norm": 0.001638007932342589,
|
|
"learning_rate": 0.00017505716067345666,
|
|
"loss": 0.0001,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.12854481473814944,
|
|
"grad_norm": 0.0012813376961275935,
|
|
"learning_rate": 0.0001750155892745791,
|
|
"loss": 0.0001,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.12875181121920928,
|
|
"grad_norm": 0.006484480109065771,
|
|
"learning_rate": 0.00017497401787570154,
|
|
"loss": 0.0004,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.1289588077002691,
|
|
"grad_norm": 0.00035095165367238224,
|
|
"learning_rate": 0.00017493244647682398,
|
|
"loss": 0.0,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.12916580418132892,
|
|
"grad_norm": 0.004927014000713825,
|
|
"learning_rate": 0.00017489087507794639,
|
|
"loss": 0.0012,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.12937280066238874,
|
|
"grad_norm": 0.00287305167876184,
|
|
"learning_rate": 0.0001748493036790688,
|
|
"loss": 0.0001,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.12957979714344856,
|
|
"grad_norm": 0.003079169662669301,
|
|
"learning_rate": 0.00017480773228019123,
|
|
"loss": 0.0001,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.12978679362450837,
|
|
"grad_norm": 0.0018820518162101507,
|
|
"learning_rate": 0.00017476616088131367,
|
|
"loss": 0.0001,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.12999379010556822,
|
|
"grad_norm": 0.004426770843565464,
|
|
"learning_rate": 0.00017472458948243608,
|
|
"loss": 0.0001,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.13020078658662804,
|
|
"grad_norm": 0.008074757643043995,
|
|
"learning_rate": 0.00017468301808355852,
|
|
"loss": 0.0002,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.13040778306768785,
|
|
"grad_norm": 0.004479815252125263,
|
|
"learning_rate": 0.00017464144668468095,
|
|
"loss": 0.0002,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.13061477954874767,
|
|
"grad_norm": 0.0016544251702725887,
|
|
"learning_rate": 0.00017459987528580336,
|
|
"loss": 0.0,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.1308217760298075,
|
|
"grad_norm": 0.0007902836659923196,
|
|
"learning_rate": 0.0001745583038869258,
|
|
"loss": 0.0,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.1310287725108673,
|
|
"grad_norm": 0.011000900529325008,
|
|
"learning_rate": 0.00017451673248804824,
|
|
"loss": 0.0005,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.13123576899192713,
|
|
"grad_norm": 0.00046783004654571414,
|
|
"learning_rate": 0.00017447516108917065,
|
|
"loss": 0.0,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.13144276547298697,
|
|
"grad_norm": 0.003358067711815238,
|
|
"learning_rate": 0.00017443358969029308,
|
|
"loss": 0.0018,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.1316497619540468,
|
|
"grad_norm": 0.0025496368762105703,
|
|
"learning_rate": 0.00017439201829141552,
|
|
"loss": 0.0015,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.1318567584351066,
|
|
"grad_norm": 0.0016015061410143971,
|
|
"learning_rate": 0.00017435044689253793,
|
|
"loss": 0.0002,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.13206375491616643,
|
|
"grad_norm": 0.0038993649650365114,
|
|
"learning_rate": 0.00017430887549366037,
|
|
"loss": 0.0001,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.13227075139722624,
|
|
"grad_norm": 0.0033800469245761633,
|
|
"learning_rate": 0.0001742673040947828,
|
|
"loss": 0.0001,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.13247774787828606,
|
|
"grad_norm": 0.0008187236380763352,
|
|
"learning_rate": 0.00017422573269590524,
|
|
"loss": 0.0,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.13268474435934588,
|
|
"grad_norm": 0.005097914487123489,
|
|
"learning_rate": 0.00017418416129702765,
|
|
"loss": 0.0004,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.13289174084040573,
|
|
"grad_norm": 0.0009978336747735739,
|
|
"learning_rate": 0.0001741425898981501,
|
|
"loss": 0.0,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.13309873732146554,
|
|
"grad_norm": 0.004832749720662832,
|
|
"learning_rate": 0.00017410101849927253,
|
|
"loss": 0.0012,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.13330573380252536,
|
|
"grad_norm": 0.0038694944232702255,
|
|
"learning_rate": 0.00017405944710039494,
|
|
"loss": 0.0001,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.13351273028358518,
|
|
"grad_norm": 0.001419690903276205,
|
|
"learning_rate": 0.00017401787570151737,
|
|
"loss": 0.0,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.133719726764645,
|
|
"grad_norm": 0.006202602293342352,
|
|
"learning_rate": 0.0001739763043026398,
|
|
"loss": 0.0002,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.13392672324570482,
|
|
"grad_norm": 0.0008485604776069522,
|
|
"learning_rate": 0.00017393473290376222,
|
|
"loss": 0.0,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.13413371972676463,
|
|
"grad_norm": 0.0050230189226567745,
|
|
"learning_rate": 0.00017389316150488463,
|
|
"loss": 0.0022,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.13434071620782448,
|
|
"grad_norm": 0.002081549260765314,
|
|
"learning_rate": 0.00017385159010600707,
|
|
"loss": 0.0001,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.1345477126888843,
|
|
"grad_norm": 0.001964141381904483,
|
|
"learning_rate": 0.0001738100187071295,
|
|
"loss": 0.0002,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.13475470916994411,
|
|
"grad_norm": 0.0006888345233164728,
|
|
"learning_rate": 0.00017376844730825192,
|
|
"loss": 0.0,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.13496170565100393,
|
|
"grad_norm": 0.002313450677320361,
|
|
"learning_rate": 0.00017372687590937435,
|
|
"loss": 0.0,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.13516870213206375,
|
|
"grad_norm": 0.007078672293573618,
|
|
"learning_rate": 0.0001736853045104968,
|
|
"loss": 0.0006,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.13537569861312357,
|
|
"grad_norm": 0.005166813265532255,
|
|
"learning_rate": 0.0001736437331116192,
|
|
"loss": 0.0011,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.1355826950941834,
|
|
"grad_norm": 0.007185124326497316,
|
|
"learning_rate": 0.00017360216171274164,
|
|
"loss": 0.0001,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.13578969157524323,
|
|
"grad_norm": 0.005528238136321306,
|
|
"learning_rate": 0.00017356059031386407,
|
|
"loss": 0.0001,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.13599668805630305,
|
|
"grad_norm": 0.0077844299376010895,
|
|
"learning_rate": 0.0001735190189149865,
|
|
"loss": 0.0011,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.13620368453736287,
|
|
"grad_norm": 0.00246329209767282,
|
|
"learning_rate": 0.00017347744751610892,
|
|
"loss": 0.0008,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.13641068101842269,
|
|
"grad_norm": 0.005287639796733856,
|
|
"learning_rate": 0.00017343587611723136,
|
|
"loss": 0.0002,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.1366176774994825,
|
|
"grad_norm": 0.006681959610432386,
|
|
"learning_rate": 0.0001733943047183538,
|
|
"loss": 0.0013,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.13682467398054232,
|
|
"grad_norm": 0.0063599334098398685,
|
|
"learning_rate": 0.0001733527333194762,
|
|
"loss": 0.0003,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.13703167046160214,
|
|
"grad_norm": 0.007015643175691366,
|
|
"learning_rate": 0.00017331116192059864,
|
|
"loss": 0.0003,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.13723866694266199,
|
|
"grad_norm": 0.0003168722032569349,
|
|
"learning_rate": 0.00017326959052172108,
|
|
"loss": 0.0,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.1374456634237218,
|
|
"grad_norm": 0.006562775932252407,
|
|
"learning_rate": 0.0001732280191228435,
|
|
"loss": 0.0008,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.13765265990478162,
|
|
"grad_norm": 0.003267984837293625,
|
|
"learning_rate": 0.00017318644772396593,
|
|
"loss": 0.0001,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.13785965638584144,
|
|
"grad_norm": 0.007215241901576519,
|
|
"learning_rate": 0.00017314487632508836,
|
|
"loss": 0.0021,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.13806665286690126,
|
|
"grad_norm": 0.001962031237781048,
|
|
"learning_rate": 0.00017310330492621077,
|
|
"loss": 0.0,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.13827364934796108,
|
|
"grad_norm": 0.007086516357958317,
|
|
"learning_rate": 0.0001730617335273332,
|
|
"loss": 0.0021,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.1384806458290209,
|
|
"grad_norm": 0.0063016172498464584,
|
|
"learning_rate": 0.00017302016212845562,
|
|
"loss": 0.0001,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.13868764231008074,
|
|
"grad_norm": 0.007975582964718342,
|
|
"learning_rate": 0.00017297859072957806,
|
|
"loss": 0.0002,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.13889463879114056,
|
|
"grad_norm": 0.0030251971911638975,
|
|
"learning_rate": 0.00017293701933070047,
|
|
"loss": 0.0002,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.13910163527220037,
|
|
"grad_norm": 0.00741973053663969,
|
|
"learning_rate": 0.0001728954479318229,
|
|
"loss": 0.0003,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.1393086317532602,
|
|
"grad_norm": 0.002640543272718787,
|
|
"learning_rate": 0.00017285387653294534,
|
|
"loss": 0.0015,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.13951562823432,
|
|
"grad_norm": 0.0004313603858463466,
|
|
"learning_rate": 0.00017281230513406778,
|
|
"loss": 0.0,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.13972262471537983,
|
|
"grad_norm": 0.0020118863321840763,
|
|
"learning_rate": 0.0001727707337351902,
|
|
"loss": 0.0005,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.13992962119643967,
|
|
"grad_norm": 0.003337120870128274,
|
|
"learning_rate": 0.00017272916233631262,
|
|
"loss": 0.0007,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.1401366176774995,
|
|
"grad_norm": 0.014386707916855812,
|
|
"learning_rate": 0.00017268759093743506,
|
|
"loss": 0.0004,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.1403436141585593,
|
|
"grad_norm": 0.006729326210916042,
|
|
"learning_rate": 0.00017264601953855747,
|
|
"loss": 0.001,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.14055061063961913,
|
|
"grad_norm": 0.001671936479397118,
|
|
"learning_rate": 0.0001726044481396799,
|
|
"loss": 0.0002,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.14075760712067895,
|
|
"grad_norm": 0.007516622077673674,
|
|
"learning_rate": 0.00017256287674080235,
|
|
"loss": 0.001,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.14096460360173876,
|
|
"grad_norm": 0.0027280249632894993,
|
|
"learning_rate": 0.00017252130534192476,
|
|
"loss": 0.0006,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.14117160008279858,
|
|
"grad_norm": 0.010556796565651894,
|
|
"learning_rate": 0.0001724797339430472,
|
|
"loss": 0.0003,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.14137859656385843,
|
|
"grad_norm": 0.0027946115005761385,
|
|
"learning_rate": 0.00017243816254416963,
|
|
"loss": 0.0001,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.14158559304491825,
|
|
"grad_norm": 0.00467882351949811,
|
|
"learning_rate": 0.00017239659114529204,
|
|
"loss": 0.0001,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.14179258952597806,
|
|
"grad_norm": 0.004167881328612566,
|
|
"learning_rate": 0.00017235501974641448,
|
|
"loss": 0.0012,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.14199958600703788,
|
|
"grad_norm": 0.0034762704744935036,
|
|
"learning_rate": 0.00017231344834753691,
|
|
"loss": 0.0007,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.1422065824880977,
|
|
"grad_norm": 0.0005650786333717406,
|
|
"learning_rate": 0.00017227187694865935,
|
|
"loss": 0.0,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.14241357896915752,
|
|
"grad_norm": 0.0043237158097326756,
|
|
"learning_rate": 0.00017223030554978176,
|
|
"loss": 0.0001,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.14262057545021734,
|
|
"grad_norm": 0.0071853832341730595,
|
|
"learning_rate": 0.0001721887341509042,
|
|
"loss": 0.0004,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.14282757193127718,
|
|
"grad_norm": 0.01868472993373871,
|
|
"learning_rate": 0.00017214716275202664,
|
|
"loss": 0.0009,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.143034568412337,
|
|
"grad_norm": 0.001339295064099133,
|
|
"learning_rate": 0.00017210559135314902,
|
|
"loss": 0.0009,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.14324156489339682,
|
|
"grad_norm": 0.00664726085960865,
|
|
"learning_rate": 0.00017206401995427146,
|
|
"loss": 0.0002,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.14344856137445663,
|
|
"grad_norm": 0.006592089310288429,
|
|
"learning_rate": 0.0001720224485553939,
|
|
"loss": 0.0001,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.14365555785551645,
|
|
"grad_norm": 0.0005503061693161726,
|
|
"learning_rate": 0.00017198087715651633,
|
|
"loss": 0.0,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.14386255433657627,
|
|
"grad_norm": 0.003913522697985172,
|
|
"learning_rate": 0.00017193930575763874,
|
|
"loss": 0.0002,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.1440695508176361,
|
|
"grad_norm": 0.004871245473623276,
|
|
"learning_rate": 0.00017189773435876118,
|
|
"loss": 0.0002,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.14427654729869593,
|
|
"grad_norm": 0.007188999559730291,
|
|
"learning_rate": 0.0001718561629598836,
|
|
"loss": 0.0002,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.14448354377975575,
|
|
"grad_norm": 0.003864140482619405,
|
|
"learning_rate": 0.00017181459156100602,
|
|
"loss": 0.0013,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.14469054026081557,
|
|
"grad_norm": 0.005774588789790869,
|
|
"learning_rate": 0.00017177302016212846,
|
|
"loss": 0.0004,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.1448975367418754,
|
|
"grad_norm": 0.002636535558849573,
|
|
"learning_rate": 0.0001717314487632509,
|
|
"loss": 0.0007,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.1451045332229352,
|
|
"grad_norm": 0.03907289355993271,
|
|
"learning_rate": 0.0001716898773643733,
|
|
"loss": 0.0019,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.14531152970399502,
|
|
"grad_norm": 0.005653630942106247,
|
|
"learning_rate": 0.00017164830596549574,
|
|
"loss": 0.0003,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.14551852618505484,
|
|
"grad_norm": 0.003644258715212345,
|
|
"learning_rate": 0.00017160673456661818,
|
|
"loss": 0.0001,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.1457255226661147,
|
|
"grad_norm": 0.0028953952714800835,
|
|
"learning_rate": 0.00017156516316774062,
|
|
"loss": 0.0001,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.1459325191471745,
|
|
"grad_norm": 0.005685892421752214,
|
|
"learning_rate": 0.00017152359176886303,
|
|
"loss": 0.0002,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.14613951562823432,
|
|
"grad_norm": 0.00946901086717844,
|
|
"learning_rate": 0.00017148202036998547,
|
|
"loss": 0.0008,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.14634651210929414,
|
|
"grad_norm": 0.004027761984616518,
|
|
"learning_rate": 0.0001714404489711079,
|
|
"loss": 0.0001,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.14655350859035396,
|
|
"grad_norm": 0.0014218458672985435,
|
|
"learning_rate": 0.0001713988775722303,
|
|
"loss": 0.0,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.14676050507141378,
|
|
"grad_norm": 0.0058472915552556515,
|
|
"learning_rate": 0.00017135730617335275,
|
|
"loss": 0.0002,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.1469675015524736,
|
|
"grad_norm": 0.004684192128479481,
|
|
"learning_rate": 0.0001713157347744752,
|
|
"loss": 0.0002,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.14717449803353344,
|
|
"grad_norm": 0.002729298546910286,
|
|
"learning_rate": 0.0001712741633755976,
|
|
"loss": 0.0001,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.14738149451459326,
|
|
"grad_norm": 0.003782545682042837,
|
|
"learning_rate": 0.00017123259197672,
|
|
"loss": 0.0022,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.14758849099565308,
|
|
"grad_norm": 0.004307260736823082,
|
|
"learning_rate": 0.00017119102057784244,
|
|
"loss": 0.0009,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.1477954874767129,
|
|
"grad_norm": 0.01339892577379942,
|
|
"learning_rate": 0.00017114944917896488,
|
|
"loss": 0.001,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.1480024839577727,
|
|
"grad_norm": 0.0017793363658711314,
|
|
"learning_rate": 0.0001711078777800873,
|
|
"loss": 0.0,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.14820948043883253,
|
|
"grad_norm": 0.0005680687027052045,
|
|
"learning_rate": 0.00017106630638120973,
|
|
"loss": 0.0,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.14841647691989235,
|
|
"grad_norm": 0.0010823605116456747,
|
|
"learning_rate": 0.00017102473498233216,
|
|
"loss": 0.0,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.1486234734009522,
|
|
"grad_norm": 0.006135303992778063,
|
|
"learning_rate": 0.00017098316358345457,
|
|
"loss": 0.0002,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.148830469882012,
|
|
"grad_norm": 0.003215776290744543,
|
|
"learning_rate": 0.000170941592184577,
|
|
"loss": 0.0008,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.14903746636307183,
|
|
"grad_norm": 0.00970076397061348,
|
|
"learning_rate": 0.00017090002078569945,
|
|
"loss": 0.0008,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.14924446284413165,
|
|
"grad_norm": 0.0037311650812625885,
|
|
"learning_rate": 0.00017085844938682189,
|
|
"loss": 0.0007,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.14945145932519147,
|
|
"grad_norm": 0.0035531132016330957,
|
|
"learning_rate": 0.0001708168779879443,
|
|
"loss": 0.0007,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.14965845580625128,
|
|
"grad_norm": 0.0013675455702468753,
|
|
"learning_rate": 0.00017077530658906673,
|
|
"loss": 0.0002,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.14986545228731113,
|
|
"grad_norm": 0.003667420009151101,
|
|
"learning_rate": 0.00017073373519018917,
|
|
"loss": 0.0001,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.15007244876837095,
|
|
"grad_norm": 0.0006531656836159527,
|
|
"learning_rate": 0.00017069216379131158,
|
|
"loss": 0.0,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.15027944524943077,
|
|
"grad_norm": 0.0029405278619378805,
|
|
"learning_rate": 0.00017065059239243402,
|
|
"loss": 0.0001,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.15048644173049058,
|
|
"grad_norm": 0.0020144616719335318,
|
|
"learning_rate": 0.00017060902099355645,
|
|
"loss": 0.0001,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.1506934382115504,
|
|
"grad_norm": 0.003123146714642644,
|
|
"learning_rate": 0.00017056744959467886,
|
|
"loss": 0.0007,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.15090043469261022,
|
|
"grad_norm": 0.005841000005602837,
|
|
"learning_rate": 0.0001705258781958013,
|
|
"loss": 0.0001,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.15110743117367004,
|
|
"grad_norm": 0.001898916088975966,
|
|
"learning_rate": 0.00017048430679692374,
|
|
"loss": 0.0,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.15131442765472988,
|
|
"grad_norm": 0.0005505726439878345,
|
|
"learning_rate": 0.00017044273539804615,
|
|
"loss": 0.0,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.1515214241357897,
|
|
"grad_norm": 0.022630905732512474,
|
|
"learning_rate": 0.00017040116399916859,
|
|
"loss": 0.0003,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.15172842061684952,
|
|
"grad_norm": 0.0018513459945097566,
|
|
"learning_rate": 0.00017035959260029102,
|
|
"loss": 0.0,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.15193541709790934,
|
|
"grad_norm": 0.006640856619924307,
|
|
"learning_rate": 0.00017031802120141343,
|
|
"loss": 0.0015,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.15214241357896915,
|
|
"grad_norm": 0.010431594215333462,
|
|
"learning_rate": 0.00017027644980253584,
|
|
"loss": 0.0007,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.15234941006002897,
|
|
"grad_norm": 0.0009595350711606443,
|
|
"learning_rate": 0.00017023487840365828,
|
|
"loss": 0.0001,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.1525564065410888,
|
|
"grad_norm": 0.0019930503331124783,
|
|
"learning_rate": 0.00017019330700478072,
|
|
"loss": 0.0001,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.15276340302214864,
|
|
"grad_norm": 0.0020235483534634113,
|
|
"learning_rate": 0.00017015173560590313,
|
|
"loss": 0.0012,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.15297039950320845,
|
|
"grad_norm": 0.000323964050039649,
|
|
"learning_rate": 0.00017011016420702556,
|
|
"loss": 0.0,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.15317739598426827,
|
|
"grad_norm": 0.004805979318916798,
|
|
"learning_rate": 0.000170068592808148,
|
|
"loss": 0.0016,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.1533843924653281,
|
|
"grad_norm": 0.0007103011594153941,
|
|
"learning_rate": 0.00017002702140927044,
|
|
"loss": 0.0,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.1535913889463879,
|
|
"grad_norm": 0.00901501253247261,
|
|
"learning_rate": 0.00016998545001039285,
|
|
"loss": 0.0001,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.15379838542744773,
|
|
"grad_norm": 0.01626206934452057,
|
|
"learning_rate": 0.00016994387861151528,
|
|
"loss": 0.0001,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.15400538190850754,
|
|
"grad_norm": 0.006600509863346815,
|
|
"learning_rate": 0.00016990230721263772,
|
|
"loss": 0.0022,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.1542123783895674,
|
|
"grad_norm": 0.0031586415134370327,
|
|
"learning_rate": 0.00016986073581376013,
|
|
"loss": 0.0003,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.1544193748706272,
|
|
"grad_norm": 0.00408458337187767,
|
|
"learning_rate": 0.00016981916441488257,
|
|
"loss": 0.0014,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.15462637135168703,
|
|
"grad_norm": 0.006417447701096535,
|
|
"learning_rate": 0.000169777593016005,
|
|
"loss": 0.0013,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.15483336783274684,
|
|
"grad_norm": 0.002676580101251602,
|
|
"learning_rate": 0.00016973602161712742,
|
|
"loss": 0.0009,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.15504036431380666,
|
|
"grad_norm": 0.003124868730083108,
|
|
"learning_rate": 0.00016969445021824985,
|
|
"loss": 0.0001,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.15524736079486648,
|
|
"grad_norm": 0.005617608781903982,
|
|
"learning_rate": 0.0001696528788193723,
|
|
"loss": 0.0009,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.1554543572759263,
|
|
"grad_norm": 0.0029069185256958008,
|
|
"learning_rate": 0.00016961130742049473,
|
|
"loss": 0.0008,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.15566135375698614,
|
|
"grad_norm": 0.008944474160671234,
|
|
"learning_rate": 0.00016956973602161714,
|
|
"loss": 0.0017,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.15586835023804596,
|
|
"grad_norm": 0.004935794975608587,
|
|
"learning_rate": 0.00016952816462273957,
|
|
"loss": 0.0012,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.15607534671910578,
|
|
"grad_norm": 0.0005579759599640965,
|
|
"learning_rate": 0.000169486593223862,
|
|
"loss": 0.0,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.1562823432001656,
|
|
"grad_norm": 0.00902874581515789,
|
|
"learning_rate": 0.00016944502182498442,
|
|
"loss": 0.0004,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.15648933968122541,
|
|
"grad_norm": 0.00498725613579154,
|
|
"learning_rate": 0.00016940345042610683,
|
|
"loss": 0.0001,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.15669633616228523,
|
|
"grad_norm": 0.0004982489626854658,
|
|
"learning_rate": 0.00016936187902722927,
|
|
"loss": 0.0,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.15690333264334505,
|
|
"grad_norm": 0.0011680921306833625,
|
|
"learning_rate": 0.0001693203076283517,
|
|
"loss": 0.0,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.1571103291244049,
|
|
"grad_norm": 0.0013553223107010126,
|
|
"learning_rate": 0.00016927873622947412,
|
|
"loss": 0.0,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.15731732560546471,
|
|
"grad_norm": 0.00549361202865839,
|
|
"learning_rate": 0.00016923716483059655,
|
|
"loss": 0.0016,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.15752432208652453,
|
|
"grad_norm": 0.004852932877838612,
|
|
"learning_rate": 0.000169195593431719,
|
|
"loss": 0.0002,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.15773131856758435,
|
|
"grad_norm": 0.0046032629907131195,
|
|
"learning_rate": 0.0001691540220328414,
|
|
"loss": 0.002,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.15793831504864417,
|
|
"grad_norm": 0.009385612793266773,
|
|
"learning_rate": 0.00016911245063396384,
|
|
"loss": 0.0003,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.15814531152970399,
|
|
"grad_norm": 0.0024257150944322348,
|
|
"learning_rate": 0.00016907087923508627,
|
|
"loss": 0.0009,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.1583523080107638,
|
|
"grad_norm": 0.002726235194131732,
|
|
"learning_rate": 0.00016902930783620868,
|
|
"loss": 0.0012,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.15855930449182365,
|
|
"grad_norm": 0.006497920490801334,
|
|
"learning_rate": 0.00016898773643733112,
|
|
"loss": 0.0005,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.15876630097288347,
|
|
"grad_norm": 0.012873928062617779,
|
|
"learning_rate": 0.00016894616503845356,
|
|
"loss": 0.0021,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.15897329745394329,
|
|
"grad_norm": 0.009931253269314766,
|
|
"learning_rate": 0.000168904593639576,
|
|
"loss": 0.0004,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.1591802939350031,
|
|
"grad_norm": 0.0012783849379047751,
|
|
"learning_rate": 0.0001688630222406984,
|
|
"loss": 0.0,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.15938729041606292,
|
|
"grad_norm": 0.0025215751957148314,
|
|
"learning_rate": 0.00016882145084182084,
|
|
"loss": 0.0001,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.15959428689712274,
|
|
"grad_norm": 0.00574857834726572,
|
|
"learning_rate": 0.00016877987944294328,
|
|
"loss": 0.0002,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.15980128337818258,
|
|
"grad_norm": 0.0008691879920661449,
|
|
"learning_rate": 0.0001687383080440657,
|
|
"loss": 0.0,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.1600082798592424,
|
|
"grad_norm": 0.008225478231906891,
|
|
"learning_rate": 0.00016869673664518813,
|
|
"loss": 0.0002,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.16021527634030222,
|
|
"grad_norm": 0.003890304360538721,
|
|
"learning_rate": 0.00016865516524631056,
|
|
"loss": 0.0014,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.16042227282136204,
|
|
"grad_norm": 0.0011641031596809626,
|
|
"learning_rate": 0.00016861359384743297,
|
|
"loss": 0.0001,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.16062926930242186,
|
|
"grad_norm": 0.008769871667027473,
|
|
"learning_rate": 0.0001685720224485554,
|
|
"loss": 0.0011,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.16083626578348167,
|
|
"grad_norm": 0.005050954408943653,
|
|
"learning_rate": 0.00016853045104967785,
|
|
"loss": 0.0002,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.1610432622645415,
|
|
"grad_norm": 0.002180990530177951,
|
|
"learning_rate": 0.00016848887965080026,
|
|
"loss": 0.0008,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.16125025874560134,
|
|
"grad_norm": 0.0015876460820436478,
|
|
"learning_rate": 0.00016844730825192267,
|
|
"loss": 0.0,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.16145725522666116,
|
|
"grad_norm": 0.004357179626822472,
|
|
"learning_rate": 0.0001684057368530451,
|
|
"loss": 0.0003,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.16166425170772097,
|
|
"grad_norm": 0.0034056720323860645,
|
|
"learning_rate": 0.00016836416545416754,
|
|
"loss": 0.0012,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.1618712481887808,
|
|
"grad_norm": 0.005545208230614662,
|
|
"learning_rate": 0.00016832259405528995,
|
|
"loss": 0.0001,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.1620782446698406,
|
|
"grad_norm": 0.0002129770437022671,
|
|
"learning_rate": 0.0001682810226564124,
|
|
"loss": 0.0,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.16228524115090043,
|
|
"grad_norm": 0.0036753590684384108,
|
|
"learning_rate": 0.00016823945125753482,
|
|
"loss": 0.0001,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.16249223763196025,
|
|
"grad_norm": 0.0018491502851247787,
|
|
"learning_rate": 0.00016819787985865723,
|
|
"loss": 0.0008,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.1626992341130201,
|
|
"grad_norm": 0.0006519712042063475,
|
|
"learning_rate": 0.00016815630845977967,
|
|
"loss": 0.0,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.1629062305940799,
|
|
"grad_norm": 0.011139947921037674,
|
|
"learning_rate": 0.0001681147370609021,
|
|
"loss": 0.0011,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.16311322707513973,
|
|
"grad_norm": 0.0020866713020950556,
|
|
"learning_rate": 0.00016807316566202455,
|
|
"loss": 0.0002,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.16332022355619955,
|
|
"grad_norm": 0.0034007905051112175,
|
|
"learning_rate": 0.00016803159426314696,
|
|
"loss": 0.0008,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.16352722003725936,
|
|
"grad_norm": 0.0017938032979145646,
|
|
"learning_rate": 0.0001679900228642694,
|
|
"loss": 0.0009,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.16373421651831918,
|
|
"grad_norm": 0.005385685246437788,
|
|
"learning_rate": 0.00016794845146539183,
|
|
"loss": 0.001,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.163941212999379,
|
|
"grad_norm": 0.010079730302095413,
|
|
"learning_rate": 0.00016790688006651424,
|
|
"loss": 0.0004,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.16414820948043884,
|
|
"grad_norm": 0.005826961249113083,
|
|
"learning_rate": 0.00016786530866763668,
|
|
"loss": 0.0001,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.16435520596149866,
|
|
"grad_norm": 0.002885566558688879,
|
|
"learning_rate": 0.00016782373726875911,
|
|
"loss": 0.0001,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.16456220244255848,
|
|
"grad_norm": 0.004031067714095116,
|
|
"learning_rate": 0.00016778216586988152,
|
|
"loss": 0.0002,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.1647691989236183,
|
|
"grad_norm": 0.0019721402786672115,
|
|
"learning_rate": 0.00016774059447100396,
|
|
"loss": 0.0001,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.16497619540467812,
|
|
"grad_norm": 0.002213244093582034,
|
|
"learning_rate": 0.0001676990230721264,
|
|
"loss": 0.0006,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.16518319188573793,
|
|
"grad_norm": 0.001942839939147234,
|
|
"learning_rate": 0.00016765745167324884,
|
|
"loss": 0.0,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.16539018836679775,
|
|
"grad_norm": 0.003173516597598791,
|
|
"learning_rate": 0.00016761588027437125,
|
|
"loss": 0.0001,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.1655971848478576,
|
|
"grad_norm": 0.004877821542322636,
|
|
"learning_rate": 0.00016757430887549366,
|
|
"loss": 0.0001,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.16580418132891742,
|
|
"grad_norm": 0.006676991004496813,
|
|
"learning_rate": 0.0001675327374766161,
|
|
"loss": 0.0002,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.16601117780997723,
|
|
"grad_norm": 0.0022598986979573965,
|
|
"learning_rate": 0.0001674911660777385,
|
|
"loss": 0.0003,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.16621817429103705,
|
|
"grad_norm": 0.00012318776862230152,
|
|
"learning_rate": 0.00016744959467886094,
|
|
"loss": 0.0,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.16642517077209687,
|
|
"grad_norm": 0.006045771297067404,
|
|
"learning_rate": 0.00016740802327998338,
|
|
"loss": 0.0003,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.1666321672531567,
|
|
"grad_norm": 0.004370058421045542,
|
|
"learning_rate": 0.0001673664518811058,
|
|
"loss": 0.0001,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.1668391637342165,
|
|
"grad_norm": 0.005490643437951803,
|
|
"learning_rate": 0.00016732488048222822,
|
|
"loss": 0.0022,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.16704616021527635,
|
|
"grad_norm": 0.007493430282920599,
|
|
"learning_rate": 0.00016728330908335066,
|
|
"loss": 0.001,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.16725315669633617,
|
|
"grad_norm": 0.0006159085314720869,
|
|
"learning_rate": 0.0001672417376844731,
|
|
"loss": 0.0,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.167460153177396,
|
|
"grad_norm": 0.002211883431300521,
|
|
"learning_rate": 0.0001672001662855955,
|
|
"loss": 0.0,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.1676671496584558,
|
|
"grad_norm": 0.0028680090326815844,
|
|
"learning_rate": 0.00016715859488671794,
|
|
"loss": 0.0011,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.16787414613951562,
|
|
"grad_norm": 0.004992680158466101,
|
|
"learning_rate": 0.00016711702348784038,
|
|
"loss": 0.0002,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.16808114262057544,
|
|
"grad_norm": 0.0024819490499794483,
|
|
"learning_rate": 0.0001670754520889628,
|
|
"loss": 0.0001,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.16828813910163526,
|
|
"grad_norm": 0.001662694732658565,
|
|
"learning_rate": 0.00016703388069008523,
|
|
"loss": 0.0001,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.1684951355826951,
|
|
"grad_norm": 0.0027136337012052536,
|
|
"learning_rate": 0.00016699230929120767,
|
|
"loss": 0.0015,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.16870213206375492,
|
|
"grad_norm": 0.0055983890779316425,
|
|
"learning_rate": 0.00016695073789233008,
|
|
"loss": 0.0006,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.16890912854481474,
|
|
"grad_norm": 0.0005543065490201116,
|
|
"learning_rate": 0.0001669091664934525,
|
|
"loss": 0.0,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.16911612502587456,
|
|
"grad_norm": 0.006743449252098799,
|
|
"learning_rate": 0.00016686759509457495,
|
|
"loss": 0.0006,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.16932312150693438,
|
|
"grad_norm": 0.005361751653254032,
|
|
"learning_rate": 0.0001668260236956974,
|
|
"loss": 0.0002,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.1695301179879942,
|
|
"grad_norm": 0.015542850829660892,
|
|
"learning_rate": 0.0001667844522968198,
|
|
"loss": 0.0001,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.169737114469054,
|
|
"grad_norm": 0.006788911763578653,
|
|
"learning_rate": 0.00016674288089794223,
|
|
"loss": 0.0004,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.16994411095011386,
|
|
"grad_norm": 0.006434622220695019,
|
|
"learning_rate": 0.00016670130949906467,
|
|
"loss": 0.0004,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.17015110743117368,
|
|
"grad_norm": 0.0024506154004484415,
|
|
"learning_rate": 0.00016665973810018708,
|
|
"loss": 0.0007,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.1703581039122335,
|
|
"grad_norm": 0.000382046215236187,
|
|
"learning_rate": 0.0001666181667013095,
|
|
"loss": 0.0,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.1705651003932933,
|
|
"grad_norm": 0.00432636309415102,
|
|
"learning_rate": 0.00016657659530243193,
|
|
"loss": 0.0009,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.17077209687435313,
|
|
"grad_norm": 0.005686972755938768,
|
|
"learning_rate": 0.00016653502390355436,
|
|
"loss": 0.0002,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.17097909335541295,
|
|
"grad_norm": 0.005743528716266155,
|
|
"learning_rate": 0.00016649345250467677,
|
|
"loss": 0.0003,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.1711860898364728,
|
|
"grad_norm": 0.002116352552548051,
|
|
"learning_rate": 0.0001664518811057992,
|
|
"loss": 0.0003,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.1713930863175326,
|
|
"grad_norm": 0.002352718496695161,
|
|
"learning_rate": 0.00016641030970692165,
|
|
"loss": 0.0,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.17160008279859243,
|
|
"grad_norm": 0.0044693113304674625,
|
|
"learning_rate": 0.00016636873830804406,
|
|
"loss": 0.0004,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.17180707927965225,
|
|
"grad_norm": 0.0005167116178199649,
|
|
"learning_rate": 0.0001663271669091665,
|
|
"loss": 0.0,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.17201407576071207,
|
|
"grad_norm": 0.005162122659385204,
|
|
"learning_rate": 0.00016628559551028893,
|
|
"loss": 0.0002,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.17222107224177188,
|
|
"grad_norm": 0.00015954635455273092,
|
|
"learning_rate": 0.00016624402411141134,
|
|
"loss": 0.0,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.1724280687228317,
|
|
"grad_norm": 0.0030487151816487312,
|
|
"learning_rate": 0.00016620245271253378,
|
|
"loss": 0.0001,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.17263506520389155,
|
|
"grad_norm": 0.002151534892618656,
|
|
"learning_rate": 0.00016616088131365622,
|
|
"loss": 0.0002,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.17284206168495136,
|
|
"grad_norm": 0.0044494629837572575,
|
|
"learning_rate": 0.00016611930991477865,
|
|
"loss": 0.0005,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.17304905816601118,
|
|
"grad_norm": 0.00033838755916804075,
|
|
"learning_rate": 0.00016607773851590106,
|
|
"loss": 0.0,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.173256054647071,
|
|
"grad_norm": 0.0005302856443449855,
|
|
"learning_rate": 0.0001660361671170235,
|
|
"loss": 0.0,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.17346305112813082,
|
|
"grad_norm": 0.0013208640739321709,
|
|
"learning_rate": 0.00016599459571814594,
|
|
"loss": 0.0,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.17367004760919064,
|
|
"grad_norm": 0.001052051316946745,
|
|
"learning_rate": 0.00016595302431926835,
|
|
"loss": 0.0,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.17387704409025045,
|
|
"grad_norm": 0.018445929512381554,
|
|
"learning_rate": 0.00016591145292039079,
|
|
"loss": 0.0004,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.1740840405713103,
|
|
"grad_norm": 0.0025256802327930927,
|
|
"learning_rate": 0.00016586988152151322,
|
|
"loss": 0.0001,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.17429103705237012,
|
|
"grad_norm": 0.0014724883949384093,
|
|
"learning_rate": 0.00016582831012263563,
|
|
"loss": 0.0,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.17449803353342994,
|
|
"grad_norm": 0.003576815826818347,
|
|
"learning_rate": 0.00016578673872375807,
|
|
"loss": 0.0001,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.17470503001448975,
|
|
"grad_norm": 0.0006163385114632547,
|
|
"learning_rate": 0.00016574516732488048,
|
|
"loss": 0.0,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.17491202649554957,
|
|
"grad_norm": 0.0011656074784696102,
|
|
"learning_rate": 0.00016570359592600292,
|
|
"loss": 0.0001,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.1751190229766094,
|
|
"grad_norm": 0.0018338944064453244,
|
|
"learning_rate": 0.00016566202452712533,
|
|
"loss": 0.0001,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.1753260194576692,
|
|
"grad_norm": 0.005035779904574156,
|
|
"learning_rate": 0.00016562045312824776,
|
|
"loss": 0.0014,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.17553301593872905,
|
|
"grad_norm": 0.006770180072635412,
|
|
"learning_rate": 0.0001655788817293702,
|
|
"loss": 0.0012,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.17574001241978887,
|
|
"grad_norm": 0.0003650276339612901,
|
|
"learning_rate": 0.0001655373103304926,
|
|
"loss": 0.0,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.1759470089008487,
|
|
"grad_norm": 0.00023851868172641844,
|
|
"learning_rate": 0.00016549573893161505,
|
|
"loss": 0.0,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.1761540053819085,
|
|
"grad_norm": 0.014695384539663792,
|
|
"learning_rate": 0.00016545416753273748,
|
|
"loss": 0.0011,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.17636100186296833,
|
|
"grad_norm": 0.00036404369166120887,
|
|
"learning_rate": 0.00016541259613385992,
|
|
"loss": 0.0,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.17656799834402814,
|
|
"grad_norm": 0.002682497026398778,
|
|
"learning_rate": 0.00016537102473498233,
|
|
"loss": 0.0012,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.17677499482508796,
|
|
"grad_norm": 0.007028127089142799,
|
|
"learning_rate": 0.00016532945333610477,
|
|
"loss": 0.0002,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.1769819913061478,
|
|
"grad_norm": 0.0012324461713433266,
|
|
"learning_rate": 0.0001652878819372272,
|
|
"loss": 0.0,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.17718898778720762,
|
|
"grad_norm": 0.00292210397310555,
|
|
"learning_rate": 0.00016524631053834962,
|
|
"loss": 0.0,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.17739598426826744,
|
|
"grad_norm": 0.0014698312152177095,
|
|
"learning_rate": 0.00016520473913947205,
|
|
"loss": 0.0002,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.17760298074932726,
|
|
"grad_norm": 0.0022247894667088985,
|
|
"learning_rate": 0.0001651631677405945,
|
|
"loss": 0.0,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.17780997723038708,
|
|
"grad_norm": 0.0006738615338690579,
|
|
"learning_rate": 0.0001651215963417169,
|
|
"loss": 0.0,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.1780169737114469,
|
|
"grad_norm": 0.004056425765156746,
|
|
"learning_rate": 0.00016508002494283934,
|
|
"loss": 0.001,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.17822397019250671,
|
|
"grad_norm": 0.006607827264815569,
|
|
"learning_rate": 0.00016503845354396177,
|
|
"loss": 0.0003,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.17843096667356656,
|
|
"grad_norm": 0.007498994003981352,
|
|
"learning_rate": 0.00016499688214508418,
|
|
"loss": 0.0009,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.17863796315462638,
|
|
"grad_norm": 0.002715140348300338,
|
|
"learning_rate": 0.00016495531074620662,
|
|
"loss": 0.0001,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.1788449596356862,
|
|
"grad_norm": 0.006559406872838736,
|
|
"learning_rate": 0.00016491373934732906,
|
|
"loss": 0.0012,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.179051956116746,
|
|
"grad_norm": 0.005900564603507519,
|
|
"learning_rate": 0.0001648721679484515,
|
|
"loss": 0.0001,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.17925895259780583,
|
|
"grad_norm": 0.0014680642634630203,
|
|
"learning_rate": 0.00016483059654957388,
|
|
"loss": 0.0004,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.17946594907886565,
|
|
"grad_norm": 0.0017862527165561914,
|
|
"learning_rate": 0.00016478902515069632,
|
|
"loss": 0.0001,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.17967294555992547,
|
|
"grad_norm": 0.0010660128900781274,
|
|
"learning_rate": 0.00016474745375181875,
|
|
"loss": 0.0001,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.1798799420409853,
|
|
"grad_norm": 0.010508016683161259,
|
|
"learning_rate": 0.0001647058823529412,
|
|
"loss": 0.0004,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.18008693852204513,
|
|
"grad_norm": 0.0026938568335026503,
|
|
"learning_rate": 0.0001646643109540636,
|
|
"loss": 0.0001,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.18029393500310495,
|
|
"grad_norm": 0.0015470877988263965,
|
|
"learning_rate": 0.00016462273955518604,
|
|
"loss": 0.0,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.18050093148416477,
|
|
"grad_norm": 0.003435211256146431,
|
|
"learning_rate": 0.00016458116815630847,
|
|
"loss": 0.0001,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.18070792796522459,
|
|
"grad_norm": 0.010342281311750412,
|
|
"learning_rate": 0.00016453959675743088,
|
|
"loss": 0.0007,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.1809149244462844,
|
|
"grad_norm": 0.0007751841330900788,
|
|
"learning_rate": 0.00016449802535855332,
|
|
"loss": 0.0,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.18112192092734425,
|
|
"grad_norm": 0.0003991715202573687,
|
|
"learning_rate": 0.00016445645395967576,
|
|
"loss": 0.0,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.18132891740840407,
|
|
"grad_norm": 0.004742010496556759,
|
|
"learning_rate": 0.00016441488256079817,
|
|
"loss": 0.0001,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.18153591388946388,
|
|
"grad_norm": 0.0010139705846086144,
|
|
"learning_rate": 0.0001643733111619206,
|
|
"loss": 0.0,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.1817429103705237,
|
|
"grad_norm": 0.00697368336841464,
|
|
"learning_rate": 0.00016433173976304304,
|
|
"loss": 0.0003,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.18194990685158352,
|
|
"grad_norm": 0.0056029148399829865,
|
|
"learning_rate": 0.00016429016836416545,
|
|
"loss": 0.0003,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.18215690333264334,
|
|
"grad_norm": 0.0031287583988159895,
|
|
"learning_rate": 0.0001642485969652879,
|
|
"loss": 0.0001,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.18236389981370316,
|
|
"grad_norm": 0.0005836491473019123,
|
|
"learning_rate": 0.00016420702556641033,
|
|
"loss": 0.0,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.182570896294763,
|
|
"grad_norm": 0.006221551448106766,
|
|
"learning_rate": 0.00016416545416753276,
|
|
"loss": 0.0014,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.18277789277582282,
|
|
"grad_norm": 0.00045936627429910004,
|
|
"learning_rate": 0.00016412388276865517,
|
|
"loss": 0.0,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.18298488925688264,
|
|
"grad_norm": 0.0006924067274667323,
|
|
"learning_rate": 0.0001640823113697776,
|
|
"loss": 0.0,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.18319188573794246,
|
|
"grad_norm": 0.010869395919144154,
|
|
"learning_rate": 0.00016404073997090005,
|
|
"loss": 0.0013,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.18339888221900227,
|
|
"grad_norm": 0.00480787456035614,
|
|
"learning_rate": 0.00016399916857202246,
|
|
"loss": 0.0009,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.1836058787000621,
|
|
"grad_norm": 0.0004685772000811994,
|
|
"learning_rate": 0.0001639575971731449,
|
|
"loss": 0.0,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.1838128751811219,
|
|
"grad_norm": 0.00303410436026752,
|
|
"learning_rate": 0.0001639160257742673,
|
|
"loss": 0.0007,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.18401987166218176,
|
|
"grad_norm": 0.001141547691076994,
|
|
"learning_rate": 0.00016387445437538974,
|
|
"loss": 0.0001,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.18422686814324157,
|
|
"grad_norm": 0.007433968596160412,
|
|
"learning_rate": 0.00016383288297651215,
|
|
"loss": 0.0001,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.1844338646243014,
|
|
"grad_norm": 0.003386344527825713,
|
|
"learning_rate": 0.0001637913115776346,
|
|
"loss": 0.0003,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.1846408611053612,
|
|
"grad_norm": 0.0011372484732419252,
|
|
"learning_rate": 0.00016374974017875702,
|
|
"loss": 0.0,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.18484785758642103,
|
|
"grad_norm": 0.0039020997937768698,
|
|
"learning_rate": 0.00016370816877987943,
|
|
"loss": 0.0001,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.18505485406748085,
|
|
"grad_norm": 0.003088288474828005,
|
|
"learning_rate": 0.00016366659738100187,
|
|
"loss": 0.0009,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.18526185054854066,
|
|
"grad_norm": 0.001126794726587832,
|
|
"learning_rate": 0.0001636250259821243,
|
|
"loss": 0.0001,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.1854688470296005,
|
|
"grad_norm": 0.007449139375239611,
|
|
"learning_rate": 0.00016358345458324672,
|
|
"loss": 0.0004,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.18567584351066033,
|
|
"grad_norm": 0.005704225040972233,
|
|
"learning_rate": 0.00016354188318436916,
|
|
"loss": 0.0016,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.18588283999172014,
|
|
"grad_norm": 0.004983640741556883,
|
|
"learning_rate": 0.0001635003117854916,
|
|
"loss": 0.0016,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.18608983647277996,
|
|
"grad_norm": 0.00034120268537662923,
|
|
"learning_rate": 0.00016345874038661403,
|
|
"loss": 0.0,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.18629683295383978,
|
|
"grad_norm": 0.007043101824820042,
|
|
"learning_rate": 0.00016341716898773644,
|
|
"loss": 0.001,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.1865038294348996,
|
|
"grad_norm": 0.004050271585583687,
|
|
"learning_rate": 0.00016337559758885888,
|
|
"loss": 0.0001,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.18671082591595942,
|
|
"grad_norm": 0.001882696757093072,
|
|
"learning_rate": 0.00016333402618998131,
|
|
"loss": 0.0005,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.18691782239701926,
|
|
"grad_norm": 0.002479350659996271,
|
|
"learning_rate": 0.00016329245479110372,
|
|
"loss": 0.0003,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 0.18712481887807908,
|
|
"grad_norm": 0.00246567465364933,
|
|
"learning_rate": 0.00016325088339222616,
|
|
"loss": 0.0001,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.1873318153591389,
|
|
"grad_norm": 0.0021426973398774862,
|
|
"learning_rate": 0.0001632093119933486,
|
|
"loss": 0.0006,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.18753881184019872,
|
|
"grad_norm": 0.004363594576716423,
|
|
"learning_rate": 0.000163167740594471,
|
|
"loss": 0.0002,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.18774580832125853,
|
|
"grad_norm": 0.004984852857887745,
|
|
"learning_rate": 0.00016312616919559345,
|
|
"loss": 0.0004,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 0.18795280480231835,
|
|
"grad_norm": 0.004489907994866371,
|
|
"learning_rate": 0.00016308459779671588,
|
|
"loss": 0.0001,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.18815980128337817,
|
|
"grad_norm": 0.0013233786448836327,
|
|
"learning_rate": 0.0001630430263978383,
|
|
"loss": 0.0,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 0.18836679776443802,
|
|
"grad_norm": 0.00894436426460743,
|
|
"learning_rate": 0.0001630014549989607,
|
|
"loss": 0.0016,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.18857379424549783,
|
|
"grad_norm": 0.001729694427922368,
|
|
"learning_rate": 0.00016295988360008314,
|
|
"loss": 0.001,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 0.18878079072655765,
|
|
"grad_norm": 0.005005873739719391,
|
|
"learning_rate": 0.00016291831220120558,
|
|
"loss": 0.0005,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.18898778720761747,
|
|
"grad_norm": 0.0007573101902380586,
|
|
"learning_rate": 0.00016287674080232799,
|
|
"loss": 0.0,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 0.1891947836886773,
|
|
"grad_norm": 0.005315006244927645,
|
|
"learning_rate": 0.00016283516940345042,
|
|
"loss": 0.0001,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.1894017801697371,
|
|
"grad_norm": 0.001140634878538549,
|
|
"learning_rate": 0.00016279359800457286,
|
|
"loss": 0.0001,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.18960877665079692,
|
|
"grad_norm": 0.003881396260112524,
|
|
"learning_rate": 0.0001627520266056953,
|
|
"loss": 0.0001,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.18981577313185677,
|
|
"grad_norm": 0.001353550935164094,
|
|
"learning_rate": 0.0001627104552068177,
|
|
"loss": 0.0,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 0.1900227696129166,
|
|
"grad_norm": 0.001415180740877986,
|
|
"learning_rate": 0.00016266888380794014,
|
|
"loss": 0.0,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.1902297660939764,
|
|
"grad_norm": 0.006398684810847044,
|
|
"learning_rate": 0.00016262731240906258,
|
|
"loss": 0.0006,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 0.19043676257503622,
|
|
"grad_norm": 0.005204816348850727,
|
|
"learning_rate": 0.000162585741010185,
|
|
"loss": 0.0016,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.19064375905609604,
|
|
"grad_norm": 0.0015194268198683858,
|
|
"learning_rate": 0.00016254416961130743,
|
|
"loss": 0.0007,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 0.19085075553715586,
|
|
"grad_norm": 0.002916971454396844,
|
|
"learning_rate": 0.00016250259821242987,
|
|
"loss": 0.0015,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.1910577520182157,
|
|
"grad_norm": 0.00017840563668869436,
|
|
"learning_rate": 0.00016246102681355228,
|
|
"loss": 0.0,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 0.19126474849927552,
|
|
"grad_norm": 0.0017515165964141488,
|
|
"learning_rate": 0.0001624194554146747,
|
|
"loss": 0.0005,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.19147174498033534,
|
|
"grad_norm": 0.0011207156348973513,
|
|
"learning_rate": 0.00016237788401579715,
|
|
"loss": 0.0,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.19167874146139516,
|
|
"grad_norm": 0.00647772429510951,
|
|
"learning_rate": 0.00016233631261691956,
|
|
"loss": 0.0004,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.19188573794245498,
|
|
"grad_norm": 0.0009239514474757016,
|
|
"learning_rate": 0.000162294741218042,
|
|
"loss": 0.0,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 0.1920927344235148,
|
|
"grad_norm": 0.0009718858054839075,
|
|
"learning_rate": 0.00016225316981916443,
|
|
"loss": 0.0,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.1922997309045746,
|
|
"grad_norm": 0.0009835285600274801,
|
|
"learning_rate": 0.00016221159842028687,
|
|
"loss": 0.0001,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 0.19250672738563446,
|
|
"grad_norm": 0.003986849449574947,
|
|
"learning_rate": 0.00016217002702140928,
|
|
"loss": 0.0005,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.19271372386669428,
|
|
"grad_norm": 0.0055690668523311615,
|
|
"learning_rate": 0.00016212845562253172,
|
|
"loss": 0.0002,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 0.1929207203477541,
|
|
"grad_norm": 0.006283191032707691,
|
|
"learning_rate": 0.00016208688422365413,
|
|
"loss": 0.0003,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.1931277168288139,
|
|
"grad_norm": 0.00035167241003364325,
|
|
"learning_rate": 0.00016204531282477656,
|
|
"loss": 0.0,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 0.19333471330987373,
|
|
"grad_norm": 0.001550202607177198,
|
|
"learning_rate": 0.00016200374142589897,
|
|
"loss": 0.0001,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.19354170979093355,
|
|
"grad_norm": 0.0009650330757722259,
|
|
"learning_rate": 0.0001619621700270214,
|
|
"loss": 0.0,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.19374870627199337,
|
|
"grad_norm": 0.006459403783082962,
|
|
"learning_rate": 0.00016192059862814385,
|
|
"loss": 0.0009,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.1939557027530532,
|
|
"grad_norm": 0.0006884423783048987,
|
|
"learning_rate": 0.00016187902722926626,
|
|
"loss": 0.0,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 0.19416269923411303,
|
|
"grad_norm": 0.0034009867813438177,
|
|
"learning_rate": 0.0001618374558303887,
|
|
"loss": 0.0001,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.19436969571517285,
|
|
"grad_norm": 0.001749175600707531,
|
|
"learning_rate": 0.00016179588443151113,
|
|
"loss": 0.0012,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 0.19457669219623266,
|
|
"grad_norm": 0.005409194156527519,
|
|
"learning_rate": 0.00016175431303263354,
|
|
"loss": 0.0002,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.19478368867729248,
|
|
"grad_norm": 0.0033904362935572863,
|
|
"learning_rate": 0.00016171274163375598,
|
|
"loss": 0.0001,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 0.1949906851583523,
|
|
"grad_norm": 0.005800081882625818,
|
|
"learning_rate": 0.00016167117023487842,
|
|
"loss": 0.0014,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.19519768163941212,
|
|
"grad_norm": 0.001085714902728796,
|
|
"learning_rate": 0.00016162959883600083,
|
|
"loss": 0.0001,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 0.19540467812047196,
|
|
"grad_norm": 0.0017082407139241695,
|
|
"learning_rate": 0.00016158802743712326,
|
|
"loss": 0.0001,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.19561167460153178,
|
|
"grad_norm": 0.0016056247986853123,
|
|
"learning_rate": 0.0001615464560382457,
|
|
"loss": 0.0009,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.1958186710825916,
|
|
"grad_norm": 0.0005931173800490797,
|
|
"learning_rate": 0.00016150488463936814,
|
|
"loss": 0.0,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.19602566756365142,
|
|
"grad_norm": 0.0002708766842260957,
|
|
"learning_rate": 0.00016146331324049055,
|
|
"loss": 0.0,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 0.19623266404471124,
|
|
"grad_norm": 0.003350366372615099,
|
|
"learning_rate": 0.00016142174184161299,
|
|
"loss": 0.0004,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.19643966052577105,
|
|
"grad_norm": 0.00030215582228265703,
|
|
"learning_rate": 0.00016138017044273542,
|
|
"loss": 0.0,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 0.19664665700683087,
|
|
"grad_norm": 0.0013855715515092015,
|
|
"learning_rate": 0.00016133859904385783,
|
|
"loss": 0.0009,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.19685365348789072,
|
|
"grad_norm": 0.0005864354898221791,
|
|
"learning_rate": 0.00016129702764498027,
|
|
"loss": 0.0,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 0.19706064996895054,
|
|
"grad_norm": 0.0006372429197654128,
|
|
"learning_rate": 0.0001612554562461027,
|
|
"loss": 0.0,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.19726764645001035,
|
|
"grad_norm": 0.0005041586118750274,
|
|
"learning_rate": 0.00016121388484722512,
|
|
"loss": 0.0,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 0.19747464293107017,
|
|
"grad_norm": 0.0023472902830690145,
|
|
"learning_rate": 0.00016117231344834753,
|
|
"loss": 0.001,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.19768163941213,
|
|
"grad_norm": 0.00015194782463368028,
|
|
"learning_rate": 0.00016113074204946996,
|
|
"loss": 0.0,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.1978886358931898,
|
|
"grad_norm": 0.001190232578665018,
|
|
"learning_rate": 0.0001610891706505924,
|
|
"loss": 0.0002,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.19809563237424963,
|
|
"grad_norm": 0.0018357646185904741,
|
|
"learning_rate": 0.0001610475992517148,
|
|
"loss": 0.0002,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 0.19830262885530947,
|
|
"grad_norm": 0.007886867970228195,
|
|
"learning_rate": 0.00016100602785283725,
|
|
"loss": 0.0015,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.1985096253363693,
|
|
"grad_norm": 0.00014407855633180588,
|
|
"learning_rate": 0.00016096445645395968,
|
|
"loss": 0.0,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 0.1987166218174291,
|
|
"grad_norm": 0.0008407345740124583,
|
|
"learning_rate": 0.0001609228850550821,
|
|
"loss": 0.0001,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.19892361829848892,
|
|
"grad_norm": 0.0005690194084309042,
|
|
"learning_rate": 0.00016088131365620453,
|
|
"loss": 0.0,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 0.19913061477954874,
|
|
"grad_norm": 0.0001164446584880352,
|
|
"learning_rate": 0.00016083974225732697,
|
|
"loss": 0.0,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.19933761126060856,
|
|
"grad_norm": 0.004800689872354269,
|
|
"learning_rate": 0.0001607981708584494,
|
|
"loss": 0.0009,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 0.19954460774166838,
|
|
"grad_norm": 0.0038551113102585077,
|
|
"learning_rate": 0.00016075659945957182,
|
|
"loss": 0.0002,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.19975160422272822,
|
|
"grad_norm": 0.00023845378018449992,
|
|
"learning_rate": 0.00016071502806069425,
|
|
"loss": 0.0,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.19995860070378804,
|
|
"grad_norm": 0.0006543719209730625,
|
|
"learning_rate": 0.0001606734566618167,
|
|
"loss": 0.0,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.20016559718484786,
|
|
"grad_norm": 0.0024344087578356266,
|
|
"learning_rate": 0.0001606318852629391,
|
|
"loss": 0.0011,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 0.20037259366590768,
|
|
"grad_norm": 0.0006420607678592205,
|
|
"learning_rate": 0.00016059031386406154,
|
|
"loss": 0.0,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.2005795901469675,
|
|
"grad_norm": 0.0016330952057614923,
|
|
"learning_rate": 0.00016054874246518397,
|
|
"loss": 0.0,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 0.2007865866280273,
|
|
"grad_norm": 0.0013299377169460058,
|
|
"learning_rate": 0.00016050717106630638,
|
|
"loss": 0.0001,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.20099358310908713,
|
|
"grad_norm": 0.0023206102196127176,
|
|
"learning_rate": 0.00016046559966742882,
|
|
"loss": 0.0006,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 0.20120057959014698,
|
|
"grad_norm": 0.0030964380130171776,
|
|
"learning_rate": 0.00016042402826855126,
|
|
"loss": 0.0004,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.2014075760712068,
|
|
"grad_norm": 0.0015272133750841022,
|
|
"learning_rate": 0.00016038245686967367,
|
|
"loss": 0.0,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 0.2016145725522666,
|
|
"grad_norm": 0.0036174836568534374,
|
|
"learning_rate": 0.0001603408854707961,
|
|
"loss": 0.0002,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.20182156903332643,
|
|
"grad_norm": 0.0014752513961866498,
|
|
"learning_rate": 0.00016029931407191854,
|
|
"loss": 0.0,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.20202856551438625,
|
|
"grad_norm": 0.0008594008395448327,
|
|
"learning_rate": 0.00016025774267304095,
|
|
"loss": 0.0001,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.20223556199544607,
|
|
"grad_norm": 0.007280942518264055,
|
|
"learning_rate": 0.00016021617127416336,
|
|
"loss": 0.001,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 0.2024425584765059,
|
|
"grad_norm": 0.0013399991439655423,
|
|
"learning_rate": 0.0001601745998752858,
|
|
"loss": 0.0,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.20264955495756573,
|
|
"grad_norm": 0.0015200217021629214,
|
|
"learning_rate": 0.00016013302847640824,
|
|
"loss": 0.0003,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 0.20285655143862555,
|
|
"grad_norm": 0.008712020702660084,
|
|
"learning_rate": 0.00016009145707753067,
|
|
"loss": 0.0002,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.20306354791968537,
|
|
"grad_norm": 0.004218498710542917,
|
|
"learning_rate": 0.00016004988567865308,
|
|
"loss": 0.0006,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 0.20327054440074518,
|
|
"grad_norm": 0.0019195530330762267,
|
|
"learning_rate": 0.00016000831427977552,
|
|
"loss": 0.0008,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.203477540881805,
|
|
"grad_norm": 0.0017306975787505507,
|
|
"learning_rate": 0.00015996674288089796,
|
|
"loss": 0.001,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 0.20368453736286482,
|
|
"grad_norm": 0.006045056506991386,
|
|
"learning_rate": 0.00015992517148202037,
|
|
"loss": 0.0002,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.20389153384392467,
|
|
"grad_norm": 0.002741064177826047,
|
|
"learning_rate": 0.0001598836000831428,
|
|
"loss": 0.0001,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.20409853032498448,
|
|
"grad_norm": 0.0026846020482480526,
|
|
"learning_rate": 0.00015984202868426524,
|
|
"loss": 0.0003,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.2043055268060443,
|
|
"grad_norm": 0.009860471822321415,
|
|
"learning_rate": 0.00015980045728538765,
|
|
"loss": 0.0002,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 0.20451252328710412,
|
|
"grad_norm": 0.0001563982223160565,
|
|
"learning_rate": 0.0001597588858865101,
|
|
"loss": 0.0,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.20471951976816394,
|
|
"grad_norm": 0.0035680129658430815,
|
|
"learning_rate": 0.00015971731448763253,
|
|
"loss": 0.0009,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 0.20492651624922376,
|
|
"grad_norm": 0.00037079930189065635,
|
|
"learning_rate": 0.00015967574308875494,
|
|
"loss": 0.0,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.20513351273028357,
|
|
"grad_norm": 0.006476435344666243,
|
|
"learning_rate": 0.00015963417168987737,
|
|
"loss": 0.0004,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 0.20534050921134342,
|
|
"grad_norm": 0.0008909539901651442,
|
|
"learning_rate": 0.0001595926002909998,
|
|
"loss": 0.0001,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.20554750569240324,
|
|
"grad_norm": 0.005418546497821808,
|
|
"learning_rate": 0.00015955102889212225,
|
|
"loss": 0.0009,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 0.20575450217346306,
|
|
"grad_norm": 0.004925790708512068,
|
|
"learning_rate": 0.00015950945749324466,
|
|
"loss": 0.0009,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.20596149865452287,
|
|
"grad_norm": 0.003130319295451045,
|
|
"learning_rate": 0.0001594678860943671,
|
|
"loss": 0.001,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.2061684951355827,
|
|
"grad_norm": 0.0062978435307741165,
|
|
"learning_rate": 0.00015942631469548953,
|
|
"loss": 0.0002,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.2063754916166425,
|
|
"grad_norm": 0.006842117290943861,
|
|
"learning_rate": 0.00015938474329661194,
|
|
"loss": 0.0003,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 0.20658248809770233,
|
|
"grad_norm": 0.0006231727893464267,
|
|
"learning_rate": 0.00015934317189773435,
|
|
"loss": 0.0,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.20678948457876217,
|
|
"grad_norm": 0.0008572920341975987,
|
|
"learning_rate": 0.0001593016004988568,
|
|
"loss": 0.0,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 0.206996481059822,
|
|
"grad_norm": 0.0018585945945233107,
|
|
"learning_rate": 0.00015926002909997922,
|
|
"loss": 0.0001,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2072034775408818,
|
|
"grad_norm": 0.002304868074133992,
|
|
"learning_rate": 0.00015921845770110163,
|
|
"loss": 0.0009,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 0.20741047402194163,
|
|
"grad_norm": 0.004541350062936544,
|
|
"learning_rate": 0.00015917688630222407,
|
|
"loss": 0.0,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.20761747050300144,
|
|
"grad_norm": 0.003366716904565692,
|
|
"learning_rate": 0.0001591353149033465,
|
|
"loss": 0.0011,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 0.20782446698406126,
|
|
"grad_norm": 0.002917301142588258,
|
|
"learning_rate": 0.00015909374350446892,
|
|
"loss": 0.0017,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.20803146346512108,
|
|
"grad_norm": 0.0004168320447206497,
|
|
"learning_rate": 0.00015905217210559136,
|
|
"loss": 0.0,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.20823845994618093,
|
|
"grad_norm": 0.001863375655375421,
|
|
"learning_rate": 0.0001590106007067138,
|
|
"loss": 0.001,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.20844545642724074,
|
|
"grad_norm": 0.001271730288863182,
|
|
"learning_rate": 0.0001589690293078362,
|
|
"loss": 0.0001,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 0.20865245290830056,
|
|
"grad_norm": 0.002366506028920412,
|
|
"learning_rate": 0.00015892745790895864,
|
|
"loss": 0.0009,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.20885944938936038,
|
|
"grad_norm": 0.0031757184769958258,
|
|
"learning_rate": 0.00015888588651008108,
|
|
"loss": 0.0001,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 0.2090664458704202,
|
|
"grad_norm": 0.0019097458571195602,
|
|
"learning_rate": 0.00015884431511120351,
|
|
"loss": 0.0,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.20927344235148002,
|
|
"grad_norm": 0.0008379930513910949,
|
|
"learning_rate": 0.00015880274371232592,
|
|
"loss": 0.0,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 0.20948043883253983,
|
|
"grad_norm": 0.005206478293985128,
|
|
"learning_rate": 0.00015876117231344836,
|
|
"loss": 0.002,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.20968743531359968,
|
|
"grad_norm": 0.0008720169425942004,
|
|
"learning_rate": 0.0001587196009145708,
|
|
"loss": 0.0,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 0.2098944317946595,
|
|
"grad_norm": 0.0041591702029109,
|
|
"learning_rate": 0.0001586780295156932,
|
|
"loss": 0.001,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.21010142827571932,
|
|
"grad_norm": 0.0004002148343715817,
|
|
"learning_rate": 0.00015863645811681565,
|
|
"loss": 0.0,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.21030842475677913,
|
|
"grad_norm": 0.00017360522178933024,
|
|
"learning_rate": 0.00015859488671793808,
|
|
"loss": 0.0,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.21051542123783895,
|
|
"grad_norm": 0.004276643507182598,
|
|
"learning_rate": 0.0001585533153190605,
|
|
"loss": 0.0,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 0.21072241771889877,
|
|
"grad_norm": 0.0027010890189558268,
|
|
"learning_rate": 0.00015851174392018293,
|
|
"loss": 0.0014,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.2109294141999586,
|
|
"grad_norm": 0.0048659988678991795,
|
|
"learning_rate": 0.00015847017252130537,
|
|
"loss": 0.0005,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 0.21113641068101843,
|
|
"grad_norm": 0.0003106594958808273,
|
|
"learning_rate": 0.00015842860112242778,
|
|
"loss": 0.0,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.21134340716207825,
|
|
"grad_norm": 0.0032943710684776306,
|
|
"learning_rate": 0.00015838702972355019,
|
|
"loss": 0.0012,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 0.21155040364313807,
|
|
"grad_norm": 0.0022477346938103437,
|
|
"learning_rate": 0.00015834545832467262,
|
|
"loss": 0.0002,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.2117574001241979,
|
|
"grad_norm": 0.0007089116843417287,
|
|
"learning_rate": 0.00015830388692579506,
|
|
"loss": 0.0,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 0.2119643966052577,
|
|
"grad_norm": 0.003983316943049431,
|
|
"learning_rate": 0.00015826231552691747,
|
|
"loss": 0.0002,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.21217139308631752,
|
|
"grad_norm": 0.0038651269860565662,
|
|
"learning_rate": 0.0001582207441280399,
|
|
"loss": 0.0017,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.21237838956737737,
|
|
"grad_norm": 0.006369102746248245,
|
|
"learning_rate": 0.00015817917272916234,
|
|
"loss": 0.0002,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.21258538604843719,
|
|
"grad_norm": 0.002382291480898857,
|
|
"learning_rate": 0.00015813760133028478,
|
|
"loss": 0.0004,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 0.212792382529497,
|
|
"grad_norm": 0.0016512110596522689,
|
|
"learning_rate": 0.0001580960299314072,
|
|
"loss": 0.0002,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.21299937901055682,
|
|
"grad_norm": 0.008482804521918297,
|
|
"learning_rate": 0.00015805445853252963,
|
|
"loss": 0.0002,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 0.21320637549161664,
|
|
"grad_norm": 0.005470529198646545,
|
|
"learning_rate": 0.00015801288713365207,
|
|
"loss": 0.0007,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.21341337197267646,
|
|
"grad_norm": 0.004424599930644035,
|
|
"learning_rate": 0.00015797131573477448,
|
|
"loss": 0.0007,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 0.21362036845373628,
|
|
"grad_norm": 0.0011165774194523692,
|
|
"learning_rate": 0.0001579297443358969,
|
|
"loss": 0.0,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.21382736493479612,
|
|
"grad_norm": 0.0008872200851328671,
|
|
"learning_rate": 0.00015788817293701935,
|
|
"loss": 0.0,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 0.21403436141585594,
|
|
"grad_norm": 0.010052971541881561,
|
|
"learning_rate": 0.00015784660153814176,
|
|
"loss": 0.0011,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.21424135789691576,
|
|
"grad_norm": 0.002929918933659792,
|
|
"learning_rate": 0.0001578050301392642,
|
|
"loss": 0.0015,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.21444835437797558,
|
|
"grad_norm": 0.008970585651695728,
|
|
"learning_rate": 0.00015776345874038663,
|
|
"loss": 0.0003,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.2146553508590354,
|
|
"grad_norm": 0.06671982258558273,
|
|
"learning_rate": 0.00015772188734150904,
|
|
"loss": 0.0008,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 0.2148623473400952,
|
|
"grad_norm": 0.0003781789855565876,
|
|
"learning_rate": 0.00015768031594263148,
|
|
"loss": 0.0,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.21506934382115503,
|
|
"grad_norm": 0.0004683547012973577,
|
|
"learning_rate": 0.00015763874454375392,
|
|
"loss": 0.0,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 0.21527634030221487,
|
|
"grad_norm": 0.005777016282081604,
|
|
"learning_rate": 0.00015759717314487635,
|
|
"loss": 0.0015,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.2154833367832747,
|
|
"grad_norm": 0.0068507413379848,
|
|
"learning_rate": 0.00015755560174599876,
|
|
"loss": 0.0002,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 0.2156903332643345,
|
|
"grad_norm": 0.0021482266020029783,
|
|
"learning_rate": 0.00015751403034712117,
|
|
"loss": 0.0004,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.21589732974539433,
|
|
"grad_norm": 0.003305132733657956,
|
|
"learning_rate": 0.0001574724589482436,
|
|
"loss": 0.0009,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 0.21610432622645415,
|
|
"grad_norm": 0.005324844736605883,
|
|
"learning_rate": 0.00015743088754936602,
|
|
"loss": 0.0012,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.21631132270751396,
|
|
"grad_norm": 0.004400115925818682,
|
|
"learning_rate": 0.00015738931615048846,
|
|
"loss": 0.0002,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.21651831918857378,
|
|
"grad_norm": 0.0030595625285059214,
|
|
"learning_rate": 0.0001573477447516109,
|
|
"loss": 0.0005,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.21672531566963363,
|
|
"grad_norm": 0.004036907572299242,
|
|
"learning_rate": 0.00015730617335273333,
|
|
"loss": 0.0,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 0.21693231215069345,
|
|
"grad_norm": 0.005875944159924984,
|
|
"learning_rate": 0.00015726460195385574,
|
|
"loss": 0.0004,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.21713930863175326,
|
|
"grad_norm": 0.003494358854368329,
|
|
"learning_rate": 0.00015722303055497818,
|
|
"loss": 0.0015,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 0.21734630511281308,
|
|
"grad_norm": 0.00041328632505610585,
|
|
"learning_rate": 0.00015718145915610062,
|
|
"loss": 0.0,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.2175533015938729,
|
|
"grad_norm": 0.0010599165689200163,
|
|
"learning_rate": 0.00015713988775722303,
|
|
"loss": 0.0,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 0.21776029807493272,
|
|
"grad_norm": 0.00022103896480984986,
|
|
"learning_rate": 0.00015709831635834546,
|
|
"loss": 0.0,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.21796729455599254,
|
|
"grad_norm": 0.00018703911337070167,
|
|
"learning_rate": 0.0001570567449594679,
|
|
"loss": 0.0,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 0.21817429103705238,
|
|
"grad_norm": 0.0001905701938085258,
|
|
"learning_rate": 0.0001570151735605903,
|
|
"loss": 0.0,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.2183812875181122,
|
|
"grad_norm": 0.01590561680495739,
|
|
"learning_rate": 0.00015697360216171275,
|
|
"loss": 0.0004,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.21858828399917202,
|
|
"grad_norm": 0.007658824324607849,
|
|
"learning_rate": 0.00015693203076283519,
|
|
"loss": 0.0001,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.21879528048023184,
|
|
"grad_norm": 0.0036896623205393553,
|
|
"learning_rate": 0.00015689045936395762,
|
|
"loss": 0.0011,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 0.21900227696129165,
|
|
"grad_norm": 0.006060061044991016,
|
|
"learning_rate": 0.00015684888796508003,
|
|
"loss": 0.0001,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.21920927344235147,
|
|
"grad_norm": 0.010098116472363472,
|
|
"learning_rate": 0.00015680731656620247,
|
|
"loss": 0.0012,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 0.2194162699234113,
|
|
"grad_norm": 0.0016395826824009418,
|
|
"learning_rate": 0.0001567657451673249,
|
|
"loss": 0.0007,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.21962326640447113,
|
|
"grad_norm": 0.004565931856632233,
|
|
"learning_rate": 0.00015672417376844732,
|
|
"loss": 0.0011,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 0.21983026288553095,
|
|
"grad_norm": 0.004525905009359121,
|
|
"learning_rate": 0.00015668260236956975,
|
|
"loss": 0.0002,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.22003725936659077,
|
|
"grad_norm": 0.0008571099024266005,
|
|
"learning_rate": 0.0001566410309706922,
|
|
"loss": 0.0,
|
|
"step": 1063
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 4831,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 5.0739590310395904e+17,
|
|
"train_batch_size": 12,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|