8820 lines
197 KiB
JSON
8820 lines
197 KiB
JSON
[
|
|
{
|
|
"loss": 0.311,
|
|
"grad_norm": 7.0026350021362305,
|
|
"learning_rate": 0.0,
|
|
"epoch": 0.0008019246190858059,
|
|
"step": 1
|
|
},
|
|
{
|
|
"loss": 0.4209,
|
|
"grad_norm": 7.763463973999023,
|
|
"learning_rate": 3.1746031746031746e-06,
|
|
"epoch": 0.0016038492381716118,
|
|
"step": 2
|
|
},
|
|
{
|
|
"loss": 0.5177,
|
|
"grad_norm": 7.721052646636963,
|
|
"learning_rate": 6.349206349206349e-06,
|
|
"epoch": 0.0024057738572574178,
|
|
"step": 3
|
|
},
|
|
{
|
|
"loss": 0.3841,
|
|
"grad_norm": 5.636883735656738,
|
|
"learning_rate": 9.523809523809523e-06,
|
|
"epoch": 0.0032076984763432237,
|
|
"step": 4
|
|
},
|
|
{
|
|
"loss": 0.3934,
|
|
"grad_norm": 5.894153118133545,
|
|
"learning_rate": 1.2698412698412699e-05,
|
|
"epoch": 0.00400962309542903,
|
|
"step": 5
|
|
},
|
|
{
|
|
"loss": 0.2577,
|
|
"grad_norm": 3.8658251762390137,
|
|
"learning_rate": 1.5873015873015872e-05,
|
|
"epoch": 0.0048115477145148355,
|
|
"step": 6
|
|
},
|
|
{
|
|
"loss": 0.1507,
|
|
"grad_norm": 3.220764398574829,
|
|
"learning_rate": 1.9047619047619046e-05,
|
|
"epoch": 0.0056134723336006415,
|
|
"step": 7
|
|
},
|
|
{
|
|
"loss": 0.1547,
|
|
"grad_norm": 1.97222101688385,
|
|
"learning_rate": 2.2222222222222223e-05,
|
|
"epoch": 0.006415396952686447,
|
|
"step": 8
|
|
},
|
|
{
|
|
"loss": 0.1619,
|
|
"grad_norm": 2.004807472229004,
|
|
"learning_rate": 2.5396825396825397e-05,
|
|
"epoch": 0.007217321571772253,
|
|
"step": 9
|
|
},
|
|
{
|
|
"loss": 0.2521,
|
|
"grad_norm": 2.6470654010772705,
|
|
"learning_rate": 2.857142857142857e-05,
|
|
"epoch": 0.00801924619085806,
|
|
"step": 10
|
|
},
|
|
{
|
|
"loss": 0.3305,
|
|
"grad_norm": 3.026132106781006,
|
|
"learning_rate": 3.1746031746031745e-05,
|
|
"epoch": 0.008821170809943865,
|
|
"step": 11
|
|
},
|
|
{
|
|
"loss": 0.2042,
|
|
"grad_norm": 2.123467206954956,
|
|
"learning_rate": 3.492063492063492e-05,
|
|
"epoch": 0.009623095429029671,
|
|
"step": 12
|
|
},
|
|
{
|
|
"loss": 0.2047,
|
|
"grad_norm": 1.958135962486267,
|
|
"learning_rate": 3.809523809523809e-05,
|
|
"epoch": 0.010425020048115477,
|
|
"step": 13
|
|
},
|
|
{
|
|
"loss": 0.1499,
|
|
"grad_norm": 1.2876746654510498,
|
|
"learning_rate": 4.126984126984127e-05,
|
|
"epoch": 0.011226944667201283,
|
|
"step": 14
|
|
},
|
|
{
|
|
"loss": 0.0589,
|
|
"grad_norm": 0.7209349870681763,
|
|
"learning_rate": 4.4444444444444447e-05,
|
|
"epoch": 0.012028869286287089,
|
|
"step": 15
|
|
},
|
|
{
|
|
"loss": 0.1404,
|
|
"grad_norm": 1.2799328565597534,
|
|
"learning_rate": 4.761904761904762e-05,
|
|
"epoch": 0.012830793905372895,
|
|
"step": 16
|
|
},
|
|
{
|
|
"loss": 0.1594,
|
|
"grad_norm": 1.2897180318832397,
|
|
"learning_rate": 5.0793650793650794e-05,
|
|
"epoch": 0.0136327185244587,
|
|
"step": 17
|
|
},
|
|
{
|
|
"loss": 0.1236,
|
|
"grad_norm": 0.907631516456604,
|
|
"learning_rate": 5.396825396825397e-05,
|
|
"epoch": 0.014434643143544507,
|
|
"step": 18
|
|
},
|
|
{
|
|
"loss": 0.0899,
|
|
"grad_norm": 0.7336040139198303,
|
|
"learning_rate": 5.714285714285714e-05,
|
|
"epoch": 0.015236567762630313,
|
|
"step": 19
|
|
},
|
|
{
|
|
"loss": 0.1434,
|
|
"grad_norm": 1.4779671430587769,
|
|
"learning_rate": 6.0317460317460316e-05,
|
|
"epoch": 0.01603849238171612,
|
|
"step": 20
|
|
},
|
|
{
|
|
"loss": 0.1072,
|
|
"grad_norm": 0.6834859251976013,
|
|
"learning_rate": 6.349206349206349e-05,
|
|
"epoch": 0.016840417000801924,
|
|
"step": 21
|
|
},
|
|
{
|
|
"loss": 0.1077,
|
|
"grad_norm": 0.9278278946876526,
|
|
"learning_rate": 6.666666666666667e-05,
|
|
"epoch": 0.01764234161988773,
|
|
"step": 22
|
|
},
|
|
{
|
|
"loss": 0.1463,
|
|
"grad_norm": 1.041062593460083,
|
|
"learning_rate": 6.984126984126984e-05,
|
|
"epoch": 0.018444266238973536,
|
|
"step": 23
|
|
},
|
|
{
|
|
"loss": 0.1311,
|
|
"grad_norm": 1.007616639137268,
|
|
"learning_rate": 7.301587301587302e-05,
|
|
"epoch": 0.019246190858059342,
|
|
"step": 24
|
|
},
|
|
{
|
|
"loss": 0.1652,
|
|
"grad_norm": 1.5278170108795166,
|
|
"learning_rate": 7.619047619047618e-05,
|
|
"epoch": 0.020048115477145148,
|
|
"step": 25
|
|
},
|
|
{
|
|
"loss": 0.2241,
|
|
"grad_norm": 1.5930604934692383,
|
|
"learning_rate": 7.936507936507937e-05,
|
|
"epoch": 0.020850040096230954,
|
|
"step": 26
|
|
},
|
|
{
|
|
"loss": 0.0831,
|
|
"grad_norm": 0.7199026942253113,
|
|
"learning_rate": 8.253968253968255e-05,
|
|
"epoch": 0.02165196471531676,
|
|
"step": 27
|
|
},
|
|
{
|
|
"loss": 0.1204,
|
|
"grad_norm": 0.986321210861206,
|
|
"learning_rate": 8.571428571428571e-05,
|
|
"epoch": 0.022453889334402566,
|
|
"step": 28
|
|
},
|
|
{
|
|
"loss": 0.1153,
|
|
"grad_norm": 1.234464168548584,
|
|
"learning_rate": 8.888888888888889e-05,
|
|
"epoch": 0.023255813953488372,
|
|
"step": 29
|
|
},
|
|
{
|
|
"loss": 0.081,
|
|
"grad_norm": 1.0115418434143066,
|
|
"learning_rate": 9.206349206349206e-05,
|
|
"epoch": 0.024057738572574178,
|
|
"step": 30
|
|
},
|
|
{
|
|
"loss": 0.103,
|
|
"grad_norm": 1.4726132154464722,
|
|
"learning_rate": 9.523809523809524e-05,
|
|
"epoch": 0.024859663191659984,
|
|
"step": 31
|
|
},
|
|
{
|
|
"loss": 0.0841,
|
|
"grad_norm": 0.7434117197990417,
|
|
"learning_rate": 9.841269841269841e-05,
|
|
"epoch": 0.02566158781074579,
|
|
"step": 32
|
|
},
|
|
{
|
|
"loss": 0.0806,
|
|
"grad_norm": 0.6968424916267395,
|
|
"learning_rate": 0.00010158730158730159,
|
|
"epoch": 0.026463512429831595,
|
|
"step": 33
|
|
},
|
|
{
|
|
"loss": 0.1339,
|
|
"grad_norm": 1.8305174112319946,
|
|
"learning_rate": 0.00010476190476190477,
|
|
"epoch": 0.0272654370489174,
|
|
"step": 34
|
|
},
|
|
{
|
|
"loss": 0.148,
|
|
"grad_norm": 1.3083935976028442,
|
|
"learning_rate": 0.00010793650793650794,
|
|
"epoch": 0.028067361668003207,
|
|
"step": 35
|
|
},
|
|
{
|
|
"loss": 0.0659,
|
|
"grad_norm": 0.5363959074020386,
|
|
"learning_rate": 0.00011111111111111112,
|
|
"epoch": 0.028869286287089013,
|
|
"step": 36
|
|
},
|
|
{
|
|
"loss": 0.0761,
|
|
"grad_norm": 0.7278910875320435,
|
|
"learning_rate": 0.00011428571428571428,
|
|
"epoch": 0.02967121090617482,
|
|
"step": 37
|
|
},
|
|
{
|
|
"loss": 0.0628,
|
|
"grad_norm": 0.5862115621566772,
|
|
"learning_rate": 0.00011746031746031746,
|
|
"epoch": 0.030473135525260625,
|
|
"step": 38
|
|
},
|
|
{
|
|
"loss": 0.0892,
|
|
"grad_norm": 0.8882272243499756,
|
|
"learning_rate": 0.00012063492063492063,
|
|
"epoch": 0.03127506014434643,
|
|
"step": 39
|
|
},
|
|
{
|
|
"loss": 0.0907,
|
|
"grad_norm": 0.8315787315368652,
|
|
"learning_rate": 0.0001238095238095238,
|
|
"epoch": 0.03207698476343224,
|
|
"step": 40
|
|
},
|
|
{
|
|
"loss": 0.048,
|
|
"grad_norm": 0.6063331365585327,
|
|
"learning_rate": 0.00012698412698412698,
|
|
"epoch": 0.03287890938251804,
|
|
"step": 41
|
|
},
|
|
{
|
|
"loss": 0.066,
|
|
"grad_norm": 0.6467223167419434,
|
|
"learning_rate": 0.00013015873015873017,
|
|
"epoch": 0.03368083400160385,
|
|
"step": 42
|
|
},
|
|
{
|
|
"loss": 0.1351,
|
|
"grad_norm": 1.2565680742263794,
|
|
"learning_rate": 0.00013333333333333334,
|
|
"epoch": 0.034482758620689655,
|
|
"step": 43
|
|
},
|
|
{
|
|
"loss": 0.0867,
|
|
"grad_norm": 0.8123145699501038,
|
|
"learning_rate": 0.0001365079365079365,
|
|
"epoch": 0.03528468323977546,
|
|
"step": 44
|
|
},
|
|
{
|
|
"loss": 0.1072,
|
|
"grad_norm": 0.8433717489242554,
|
|
"learning_rate": 0.00013968253968253967,
|
|
"epoch": 0.03608660785886127,
|
|
"step": 45
|
|
},
|
|
{
|
|
"loss": 0.0438,
|
|
"grad_norm": 0.5360523462295532,
|
|
"learning_rate": 0.00014285714285714287,
|
|
"epoch": 0.03688853247794707,
|
|
"step": 46
|
|
},
|
|
{
|
|
"loss": 0.0866,
|
|
"grad_norm": 0.644867479801178,
|
|
"learning_rate": 0.00014603174603174603,
|
|
"epoch": 0.03769045709703288,
|
|
"step": 47
|
|
},
|
|
{
|
|
"loss": 0.0839,
|
|
"grad_norm": 0.8485159873962402,
|
|
"learning_rate": 0.00014920634920634923,
|
|
"epoch": 0.038492381716118684,
|
|
"step": 48
|
|
},
|
|
{
|
|
"loss": 0.0774,
|
|
"grad_norm": 0.5638540387153625,
|
|
"learning_rate": 0.00015238095238095237,
|
|
"epoch": 0.03929430633520449,
|
|
"step": 49
|
|
},
|
|
{
|
|
"loss": 0.0611,
|
|
"grad_norm": 0.7566853761672974,
|
|
"learning_rate": 0.00015555555555555556,
|
|
"epoch": 0.040096230954290296,
|
|
"step": 50
|
|
},
|
|
{
|
|
"loss": 0.1411,
|
|
"grad_norm": 1.0959564447402954,
|
|
"learning_rate": 0.00015873015873015873,
|
|
"epoch": 0.0408981555733761,
|
|
"step": 51
|
|
},
|
|
{
|
|
"loss": 0.0594,
|
|
"grad_norm": 0.6066744923591614,
|
|
"learning_rate": 0.00016190476190476192,
|
|
"epoch": 0.04170008019246191,
|
|
"step": 52
|
|
},
|
|
{
|
|
"loss": 0.0337,
|
|
"grad_norm": 0.5505036115646362,
|
|
"learning_rate": 0.0001650793650793651,
|
|
"epoch": 0.042502004811547714,
|
|
"step": 53
|
|
},
|
|
{
|
|
"loss": 0.0572,
|
|
"grad_norm": 0.6075869798660278,
|
|
"learning_rate": 0.00016825396825396826,
|
|
"epoch": 0.04330392943063352,
|
|
"step": 54
|
|
},
|
|
{
|
|
"loss": 0.0869,
|
|
"grad_norm": 0.9212067723274231,
|
|
"learning_rate": 0.00017142857142857143,
|
|
"epoch": 0.044105854049719326,
|
|
"step": 55
|
|
},
|
|
{
|
|
"loss": 0.033,
|
|
"grad_norm": 0.4611626863479614,
|
|
"learning_rate": 0.00017460317460317462,
|
|
"epoch": 0.04490777866880513,
|
|
"step": 56
|
|
},
|
|
{
|
|
"loss": 0.0711,
|
|
"grad_norm": 0.8158572912216187,
|
|
"learning_rate": 0.00017777777777777779,
|
|
"epoch": 0.04570970328789094,
|
|
"step": 57
|
|
},
|
|
{
|
|
"loss": 0.1482,
|
|
"grad_norm": 1.3836172819137573,
|
|
"learning_rate": 0.00018095238095238095,
|
|
"epoch": 0.046511627906976744,
|
|
"step": 58
|
|
},
|
|
{
|
|
"loss": 0.0953,
|
|
"grad_norm": 0.6279105544090271,
|
|
"learning_rate": 0.00018412698412698412,
|
|
"epoch": 0.04731355252606255,
|
|
"step": 59
|
|
},
|
|
{
|
|
"loss": 0.1133,
|
|
"grad_norm": 1.3958708047866821,
|
|
"learning_rate": 0.00018730158730158731,
|
|
"epoch": 0.048115477145148355,
|
|
"step": 60
|
|
},
|
|
{
|
|
"loss": 0.1674,
|
|
"grad_norm": 1.2703611850738525,
|
|
"learning_rate": 0.00019047619047619048,
|
|
"epoch": 0.04891740176423416,
|
|
"step": 61
|
|
},
|
|
{
|
|
"loss": 0.0756,
|
|
"grad_norm": 0.8350338935852051,
|
|
"learning_rate": 0.00019365079365079365,
|
|
"epoch": 0.04971932638331997,
|
|
"step": 62
|
|
},
|
|
{
|
|
"loss": 0.0776,
|
|
"grad_norm": 0.7750063538551331,
|
|
"learning_rate": 0.00019682539682539682,
|
|
"epoch": 0.05052125100240577,
|
|
"step": 63
|
|
},
|
|
{
|
|
"loss": 0.0258,
|
|
"grad_norm": 0.4177851974964142,
|
|
"learning_rate": 0.0002,
|
|
"epoch": 0.05132317562149158,
|
|
"step": 64
|
|
},
|
|
{
|
|
"loss": 0.0743,
|
|
"grad_norm": 0.9661064743995667,
|
|
"learning_rate": 0.00019999964798101197,
|
|
"epoch": 0.052125100240577385,
|
|
"step": 65
|
|
},
|
|
{
|
|
"loss": 0.1386,
|
|
"grad_norm": 1.2234452962875366,
|
|
"learning_rate": 0.0001999985919265261,
|
|
"epoch": 0.05292702485966319,
|
|
"step": 66
|
|
},
|
|
{
|
|
"loss": 0.0466,
|
|
"grad_norm": 0.3697403073310852,
|
|
"learning_rate": 0.00019999683184397752,
|
|
"epoch": 0.053728949478749,
|
|
"step": 67
|
|
},
|
|
{
|
|
"loss": 0.1913,
|
|
"grad_norm": 1.1906723976135254,
|
|
"learning_rate": 0.0001999943677457578,
|
|
"epoch": 0.0545308740978348,
|
|
"step": 68
|
|
},
|
|
{
|
|
"loss": 0.0784,
|
|
"grad_norm": 0.6538499593734741,
|
|
"learning_rate": 0.0001999911996492152,
|
|
"epoch": 0.05533279871692061,
|
|
"step": 69
|
|
},
|
|
{
|
|
"loss": 0.0477,
|
|
"grad_norm": 0.7172570824623108,
|
|
"learning_rate": 0.00019998732757665427,
|
|
"epoch": 0.056134723336006415,
|
|
"step": 70
|
|
},
|
|
{
|
|
"loss": 0.0691,
|
|
"grad_norm": 0.5724918842315674,
|
|
"learning_rate": 0.00019998275155533587,
|
|
"epoch": 0.05693664795509222,
|
|
"step": 71
|
|
},
|
|
{
|
|
"loss": 0.0746,
|
|
"grad_norm": 0.8900654911994934,
|
|
"learning_rate": 0.00019997747161747695,
|
|
"epoch": 0.057738572574178026,
|
|
"step": 72
|
|
},
|
|
{
|
|
"loss": 0.0356,
|
|
"grad_norm": 0.43228501081466675,
|
|
"learning_rate": 0.00019997148780025027,
|
|
"epoch": 0.05854049719326383,
|
|
"step": 73
|
|
},
|
|
{
|
|
"loss": 0.0341,
|
|
"grad_norm": 0.4748842716217041,
|
|
"learning_rate": 0.0001999648001457842,
|
|
"epoch": 0.05934242181234964,
|
|
"step": 74
|
|
},
|
|
{
|
|
"loss": 0.0751,
|
|
"grad_norm": 0.5790648460388184,
|
|
"learning_rate": 0.00019995740870116233,
|
|
"epoch": 0.060144346431435444,
|
|
"step": 75
|
|
},
|
|
{
|
|
"loss": 0.1419,
|
|
"grad_norm": 0.8083305358886719,
|
|
"learning_rate": 0.00019994931351842327,
|
|
"epoch": 0.06094627105052125,
|
|
"step": 76
|
|
},
|
|
{
|
|
"loss": 0.0738,
|
|
"grad_norm": 1.0509905815124512,
|
|
"learning_rate": 0.00019994051465456014,
|
|
"epoch": 0.061748195669607056,
|
|
"step": 77
|
|
},
|
|
{
|
|
"loss": 0.09,
|
|
"grad_norm": 1.006076693534851,
|
|
"learning_rate": 0.00019993101217152028,
|
|
"epoch": 0.06255012028869286,
|
|
"step": 78
|
|
},
|
|
{
|
|
"loss": 0.0671,
|
|
"grad_norm": 0.7131031155586243,
|
|
"learning_rate": 0.00019992080613620485,
|
|
"epoch": 0.06335204490777867,
|
|
"step": 79
|
|
},
|
|
{
|
|
"loss": 0.0476,
|
|
"grad_norm": 0.6254774332046509,
|
|
"learning_rate": 0.00019990989662046818,
|
|
"epoch": 0.06415396952686447,
|
|
"step": 80
|
|
},
|
|
{
|
|
"loss": 0.0313,
|
|
"grad_norm": 0.5143452882766724,
|
|
"learning_rate": 0.00019989828370111737,
|
|
"epoch": 0.06495589414595028,
|
|
"step": 81
|
|
},
|
|
{
|
|
"loss": 0.0727,
|
|
"grad_norm": 0.616113007068634,
|
|
"learning_rate": 0.00019988596745991179,
|
|
"epoch": 0.06575781876503609,
|
|
"step": 82
|
|
},
|
|
{
|
|
"loss": 0.0693,
|
|
"grad_norm": 1.8450731039047241,
|
|
"learning_rate": 0.00019987294798356247,
|
|
"epoch": 0.06655974338412189,
|
|
"step": 83
|
|
},
|
|
{
|
|
"loss": 0.1301,
|
|
"grad_norm": 0.8279522657394409,
|
|
"learning_rate": 0.00019985922536373146,
|
|
"epoch": 0.0673616680032077,
|
|
"step": 84
|
|
},
|
|
{
|
|
"loss": 0.0457,
|
|
"grad_norm": 0.6411037445068359,
|
|
"learning_rate": 0.00019984479969703127,
|
|
"epoch": 0.0681635926222935,
|
|
"step": 85
|
|
},
|
|
{
|
|
"loss": 0.0636,
|
|
"grad_norm": 0.5541757941246033,
|
|
"learning_rate": 0.000199829671085024,
|
|
"epoch": 0.06896551724137931,
|
|
"step": 86
|
|
},
|
|
{
|
|
"loss": 0.0644,
|
|
"grad_norm": 0.5471921563148499,
|
|
"learning_rate": 0.00019981383963422087,
|
|
"epoch": 0.06976744186046512,
|
|
"step": 87
|
|
},
|
|
{
|
|
"loss": 0.0486,
|
|
"grad_norm": 0.7092999219894409,
|
|
"learning_rate": 0.00019979730545608126,
|
|
"epoch": 0.07056936647955092,
|
|
"step": 88
|
|
},
|
|
{
|
|
"loss": 0.124,
|
|
"grad_norm": 1.2980421781539917,
|
|
"learning_rate": 0.00019978006866701211,
|
|
"epoch": 0.07137129109863673,
|
|
"step": 89
|
|
},
|
|
{
|
|
"loss": 0.1298,
|
|
"grad_norm": 0.778945803642273,
|
|
"learning_rate": 0.0001997621293883669,
|
|
"epoch": 0.07217321571772253,
|
|
"step": 90
|
|
},
|
|
{
|
|
"loss": 0.0542,
|
|
"grad_norm": 0.4509424865245819,
|
|
"learning_rate": 0.00019974348774644501,
|
|
"epoch": 0.07297514033680834,
|
|
"step": 91
|
|
},
|
|
{
|
|
"loss": 0.0468,
|
|
"grad_norm": 0.6056888103485107,
|
|
"learning_rate": 0.00019972414387249072,
|
|
"epoch": 0.07377706495589414,
|
|
"step": 92
|
|
},
|
|
{
|
|
"loss": 0.0815,
|
|
"grad_norm": 0.7726762294769287,
|
|
"learning_rate": 0.00019970409790269215,
|
|
"epoch": 0.07457898957497995,
|
|
"step": 93
|
|
},
|
|
{
|
|
"loss": 0.0668,
|
|
"grad_norm": 0.6297205090522766,
|
|
"learning_rate": 0.00019968334997818064,
|
|
"epoch": 0.07538091419406576,
|
|
"step": 94
|
|
},
|
|
{
|
|
"loss": 0.1778,
|
|
"grad_norm": 1.166032075881958,
|
|
"learning_rate": 0.00019966190024502939,
|
|
"epoch": 0.07618283881315156,
|
|
"step": 95
|
|
},
|
|
{
|
|
"loss": 0.0662,
|
|
"grad_norm": 0.7612900733947754,
|
|
"learning_rate": 0.00019963974885425266,
|
|
"epoch": 0.07698476343223737,
|
|
"step": 96
|
|
},
|
|
{
|
|
"loss": 0.0433,
|
|
"grad_norm": 0.43482959270477295,
|
|
"learning_rate": 0.00019961689596180467,
|
|
"epoch": 0.07778668805132317,
|
|
"step": 97
|
|
},
|
|
{
|
|
"loss": 0.0616,
|
|
"grad_norm": 0.5207836627960205,
|
|
"learning_rate": 0.0001995933417285785,
|
|
"epoch": 0.07858861267040898,
|
|
"step": 98
|
|
},
|
|
{
|
|
"loss": 0.0523,
|
|
"grad_norm": 0.6553881764411926,
|
|
"learning_rate": 0.0001995690863204049,
|
|
"epoch": 0.07939053728949479,
|
|
"step": 99
|
|
},
|
|
{
|
|
"loss": 0.1302,
|
|
"grad_norm": 1.2842791080474854,
|
|
"learning_rate": 0.00019954412990805107,
|
|
"epoch": 0.08019246190858059,
|
|
"step": 100
|
|
},
|
|
{
|
|
"loss": 0.0922,
|
|
"grad_norm": 0.5699795484542847,
|
|
"learning_rate": 0.0001995184726672197,
|
|
"epoch": 0.0809943865276664,
|
|
"step": 101
|
|
},
|
|
{
|
|
"loss": 0.0807,
|
|
"grad_norm": 0.5272155404090881,
|
|
"learning_rate": 0.00019949211477854749,
|
|
"epoch": 0.0817963111467522,
|
|
"step": 102
|
|
},
|
|
{
|
|
"loss": 0.1362,
|
|
"grad_norm": 0.6196130514144897,
|
|
"learning_rate": 0.00019946505642760398,
|
|
"epoch": 0.08259823576583801,
|
|
"step": 103
|
|
},
|
|
{
|
|
"loss": 0.0705,
|
|
"grad_norm": 0.6336621046066284,
|
|
"learning_rate": 0.00019943729780489027,
|
|
"epoch": 0.08340016038492382,
|
|
"step": 104
|
|
},
|
|
{
|
|
"loss": 0.0652,
|
|
"grad_norm": 0.7032070755958557,
|
|
"learning_rate": 0.00019940883910583756,
|
|
"epoch": 0.08420208500400962,
|
|
"step": 105
|
|
},
|
|
{
|
|
"loss": 0.1116,
|
|
"grad_norm": 0.908371090888977,
|
|
"learning_rate": 0.0001993796805308059,
|
|
"epoch": 0.08500400962309543,
|
|
"step": 106
|
|
},
|
|
{
|
|
"loss": 0.0818,
|
|
"grad_norm": 0.7326153516769409,
|
|
"learning_rate": 0.00019934982228508278,
|
|
"epoch": 0.08580593424218123,
|
|
"step": 107
|
|
},
|
|
{
|
|
"loss": 0.1018,
|
|
"grad_norm": 0.8321508169174194,
|
|
"learning_rate": 0.00019931926457888156,
|
|
"epoch": 0.08660785886126704,
|
|
"step": 108
|
|
},
|
|
{
|
|
"loss": 0.0259,
|
|
"grad_norm": 0.2848133146762848,
|
|
"learning_rate": 0.00019928800762734005,
|
|
"epoch": 0.08740978348035285,
|
|
"step": 109
|
|
},
|
|
{
|
|
"loss": 0.0884,
|
|
"grad_norm": 1.1061406135559082,
|
|
"learning_rate": 0.00019925605165051918,
|
|
"epoch": 0.08821170809943865,
|
|
"step": 110
|
|
},
|
|
{
|
|
"loss": 0.0813,
|
|
"grad_norm": 0.5895913243293762,
|
|
"learning_rate": 0.000199223396873401,
|
|
"epoch": 0.08901363271852446,
|
|
"step": 111
|
|
},
|
|
{
|
|
"loss": 0.1933,
|
|
"grad_norm": 1.0626415014266968,
|
|
"learning_rate": 0.00019919004352588767,
|
|
"epoch": 0.08981555733761026,
|
|
"step": 112
|
|
},
|
|
{
|
|
"loss": 0.0619,
|
|
"grad_norm": 0.5373443365097046,
|
|
"learning_rate": 0.00019915599184279942,
|
|
"epoch": 0.09061748195669607,
|
|
"step": 113
|
|
},
|
|
{
|
|
"loss": 0.1071,
|
|
"grad_norm": 0.6781280636787415,
|
|
"learning_rate": 0.00019912124206387295,
|
|
"epoch": 0.09141940657578188,
|
|
"step": 114
|
|
},
|
|
{
|
|
"loss": 0.0754,
|
|
"grad_norm": 0.42521488666534424,
|
|
"learning_rate": 0.00019908579443375996,
|
|
"epoch": 0.09222133119486768,
|
|
"step": 115
|
|
},
|
|
{
|
|
"loss": 0.0705,
|
|
"grad_norm": 0.5241889357566833,
|
|
"learning_rate": 0.0001990496492020252,
|
|
"epoch": 0.09302325581395349,
|
|
"step": 116
|
|
},
|
|
{
|
|
"loss": 0.1076,
|
|
"grad_norm": 0.6329948902130127,
|
|
"learning_rate": 0.00019901280662314484,
|
|
"epoch": 0.09382518043303929,
|
|
"step": 117
|
|
},
|
|
{
|
|
"loss": 0.0346,
|
|
"grad_norm": 0.3218804597854614,
|
|
"learning_rate": 0.0001989752669565046,
|
|
"epoch": 0.0946271050521251,
|
|
"step": 118
|
|
},
|
|
{
|
|
"loss": 0.1206,
|
|
"grad_norm": 0.5836507081985474,
|
|
"learning_rate": 0.00019893703046639804,
|
|
"epoch": 0.0954290296712109,
|
|
"step": 119
|
|
},
|
|
{
|
|
"loss": 0.0955,
|
|
"grad_norm": 0.6629716157913208,
|
|
"learning_rate": 0.00019889809742202455,
|
|
"epoch": 0.09623095429029671,
|
|
"step": 120
|
|
},
|
|
{
|
|
"loss": 0.1089,
|
|
"grad_norm": 0.9768190383911133,
|
|
"learning_rate": 0.00019885846809748753,
|
|
"epoch": 0.09703287890938252,
|
|
"step": 121
|
|
},
|
|
{
|
|
"loss": 0.0323,
|
|
"grad_norm": 0.27991437911987305,
|
|
"learning_rate": 0.00019881814277179248,
|
|
"epoch": 0.09783480352846832,
|
|
"step": 122
|
|
},
|
|
{
|
|
"loss": 0.0466,
|
|
"grad_norm": 0.5002017617225647,
|
|
"learning_rate": 0.00019877712172884502,
|
|
"epoch": 0.09863672814755413,
|
|
"step": 123
|
|
},
|
|
{
|
|
"loss": 0.0311,
|
|
"grad_norm": 0.4994860589504242,
|
|
"learning_rate": 0.00019873540525744887,
|
|
"epoch": 0.09943865276663993,
|
|
"step": 124
|
|
},
|
|
{
|
|
"eval_loss": 0.06782178580760956,
|
|
"eval_runtime": 50.5786,
|
|
"eval_samples_per_second": 20.76,
|
|
"eval_steps_per_second": 5.2,
|
|
"epoch": 0.09943865276663993,
|
|
"step": 124
|
|
},
|
|
{
|
|
"loss": 0.0705,
|
|
"grad_norm": 0.6123189926147461,
|
|
"learning_rate": 0.00019869299365130383,
|
|
"epoch": 0.10024057738572574,
|
|
"step": 125
|
|
},
|
|
{
|
|
"loss": 0.0841,
|
|
"grad_norm": 0.7357730865478516,
|
|
"learning_rate": 0.00019864988720900368,
|
|
"epoch": 0.10104250200481155,
|
|
"step": 126
|
|
},
|
|
{
|
|
"loss": 0.0674,
|
|
"grad_norm": 0.46437254548072815,
|
|
"learning_rate": 0.0001986060862340342,
|
|
"epoch": 0.10184442662389735,
|
|
"step": 127
|
|
},
|
|
{
|
|
"loss": 0.0376,
|
|
"grad_norm": 0.5510705709457397,
|
|
"learning_rate": 0.00019856159103477086,
|
|
"epoch": 0.10264635124298316,
|
|
"step": 128
|
|
},
|
|
{
|
|
"loss": 0.0396,
|
|
"grad_norm": 0.5313072204589844,
|
|
"learning_rate": 0.00019851640192447673,
|
|
"epoch": 0.10344827586206896,
|
|
"step": 129
|
|
},
|
|
{
|
|
"loss": 0.0801,
|
|
"grad_norm": 0.6203364133834839,
|
|
"learning_rate": 0.00019847051922130038,
|
|
"epoch": 0.10425020048115477,
|
|
"step": 130
|
|
},
|
|
{
|
|
"loss": 0.0499,
|
|
"grad_norm": 0.3568151891231537,
|
|
"learning_rate": 0.00019842394324827341,
|
|
"epoch": 0.10505212510024058,
|
|
"step": 131
|
|
},
|
|
{
|
|
"loss": 0.0561,
|
|
"grad_norm": 0.3815423548221588,
|
|
"learning_rate": 0.00019837667433330838,
|
|
"epoch": 0.10585404971932638,
|
|
"step": 132
|
|
},
|
|
{
|
|
"loss": 0.0722,
|
|
"grad_norm": 0.4797166585922241,
|
|
"learning_rate": 0.00019832871280919635,
|
|
"epoch": 0.10665597433841219,
|
|
"step": 133
|
|
},
|
|
{
|
|
"loss": 0.0516,
|
|
"grad_norm": 0.47701454162597656,
|
|
"learning_rate": 0.00019828005901360475,
|
|
"epoch": 0.107457898957498,
|
|
"step": 134
|
|
},
|
|
{
|
|
"loss": 0.0309,
|
|
"grad_norm": 0.37124770879745483,
|
|
"learning_rate": 0.00019823071328907473,
|
|
"epoch": 0.1082598235765838,
|
|
"step": 135
|
|
},
|
|
{
|
|
"loss": 0.0707,
|
|
"grad_norm": 0.6959102749824524,
|
|
"learning_rate": 0.0001981806759830189,
|
|
"epoch": 0.1090617481956696,
|
|
"step": 136
|
|
},
|
|
{
|
|
"loss": 0.1417,
|
|
"grad_norm": 0.7774357795715332,
|
|
"learning_rate": 0.00019812994744771898,
|
|
"epoch": 0.10986367281475541,
|
|
"step": 137
|
|
},
|
|
{
|
|
"loss": 0.0405,
|
|
"grad_norm": 0.4280378818511963,
|
|
"learning_rate": 0.00019807852804032305,
|
|
"epoch": 0.11066559743384122,
|
|
"step": 138
|
|
},
|
|
{
|
|
"loss": 0.0506,
|
|
"grad_norm": 0.5292235016822815,
|
|
"learning_rate": 0.00019802641812284328,
|
|
"epoch": 0.11146752205292702,
|
|
"step": 139
|
|
},
|
|
{
|
|
"loss": 0.0624,
|
|
"grad_norm": 0.5091221332550049,
|
|
"learning_rate": 0.00019797361806215332,
|
|
"epoch": 0.11226944667201283,
|
|
"step": 140
|
|
},
|
|
{
|
|
"loss": 0.0598,
|
|
"grad_norm": 0.5391169786453247,
|
|
"learning_rate": 0.0001979201282299856,
|
|
"epoch": 0.11307137129109864,
|
|
"step": 141
|
|
},
|
|
{
|
|
"loss": 0.0861,
|
|
"grad_norm": 0.7957108616828918,
|
|
"learning_rate": 0.00019786594900292887,
|
|
"epoch": 0.11387329591018444,
|
|
"step": 142
|
|
},
|
|
{
|
|
"loss": 0.0698,
|
|
"grad_norm": 0.5378095507621765,
|
|
"learning_rate": 0.00019781108076242547,
|
|
"epoch": 0.11467522052927025,
|
|
"step": 143
|
|
},
|
|
{
|
|
"loss": 0.0612,
|
|
"grad_norm": 0.5657555460929871,
|
|
"learning_rate": 0.00019775552389476864,
|
|
"epoch": 0.11547714514835605,
|
|
"step": 144
|
|
},
|
|
{
|
|
"loss": 0.1241,
|
|
"grad_norm": 0.8074794411659241,
|
|
"learning_rate": 0.00019769927879109982,
|
|
"epoch": 0.11627906976744186,
|
|
"step": 145
|
|
},
|
|
{
|
|
"loss": 0.0831,
|
|
"grad_norm": 0.5241571068763733,
|
|
"learning_rate": 0.0001976423458474059,
|
|
"epoch": 0.11708099438652766,
|
|
"step": 146
|
|
},
|
|
{
|
|
"loss": 0.0462,
|
|
"grad_norm": 0.3452630043029785,
|
|
"learning_rate": 0.00019758472546451645,
|
|
"epoch": 0.11788291900561347,
|
|
"step": 147
|
|
},
|
|
{
|
|
"loss": 0.0656,
|
|
"grad_norm": 0.38813871145248413,
|
|
"learning_rate": 0.00019752641804810084,
|
|
"epoch": 0.11868484362469928,
|
|
"step": 148
|
|
},
|
|
{
|
|
"loss": 0.0512,
|
|
"grad_norm": 0.5402405261993408,
|
|
"learning_rate": 0.0001974674240086654,
|
|
"epoch": 0.11948676824378508,
|
|
"step": 149
|
|
},
|
|
{
|
|
"loss": 0.039,
|
|
"grad_norm": 0.35998794436454773,
|
|
"learning_rate": 0.00019740774376155061,
|
|
"epoch": 0.12028869286287089,
|
|
"step": 150
|
|
},
|
|
{
|
|
"loss": 0.0275,
|
|
"grad_norm": 0.2361939251422882,
|
|
"learning_rate": 0.000197347377726928,
|
|
"epoch": 0.1210906174819567,
|
|
"step": 151
|
|
},
|
|
{
|
|
"loss": 0.0476,
|
|
"grad_norm": 0.48203134536743164,
|
|
"learning_rate": 0.00019728632632979746,
|
|
"epoch": 0.1218925421010425,
|
|
"step": 152
|
|
},
|
|
{
|
|
"loss": 0.0255,
|
|
"grad_norm": 0.2733021676540375,
|
|
"learning_rate": 0.00019722458999998398,
|
|
"epoch": 0.1226944667201283,
|
|
"step": 153
|
|
},
|
|
{
|
|
"loss": 0.0506,
|
|
"grad_norm": 0.3442985713481903,
|
|
"learning_rate": 0.00019716216917213476,
|
|
"epoch": 0.12349639133921411,
|
|
"step": 154
|
|
},
|
|
{
|
|
"loss": 0.0441,
|
|
"grad_norm": 0.46731194853782654,
|
|
"learning_rate": 0.00019709906428571616,
|
|
"epoch": 0.12429831595829992,
|
|
"step": 155
|
|
},
|
|
{
|
|
"loss": 0.0794,
|
|
"grad_norm": 0.9103071689605713,
|
|
"learning_rate": 0.0001970352757850105,
|
|
"epoch": 0.12510024057738572,
|
|
"step": 156
|
|
},
|
|
{
|
|
"loss": 0.0394,
|
|
"grad_norm": 0.46548521518707275,
|
|
"learning_rate": 0.0001969708041191131,
|
|
"epoch": 0.12590216519647154,
|
|
"step": 157
|
|
},
|
|
{
|
|
"loss": 0.0485,
|
|
"grad_norm": 0.4331950545310974,
|
|
"learning_rate": 0.00019690564974192892,
|
|
"epoch": 0.12670408981555734,
|
|
"step": 158
|
|
},
|
|
{
|
|
"loss": 0.0417,
|
|
"grad_norm": 0.4089224636554718,
|
|
"learning_rate": 0.00019683981311216959,
|
|
"epoch": 0.12750601443464316,
|
|
"step": 159
|
|
},
|
|
{
|
|
"loss": 0.067,
|
|
"grad_norm": 0.7565222978591919,
|
|
"learning_rate": 0.0001967732946933499,
|
|
"epoch": 0.12830793905372895,
|
|
"step": 160
|
|
},
|
|
{
|
|
"loss": 0.1036,
|
|
"grad_norm": 0.5942509174346924,
|
|
"learning_rate": 0.00019670609495378482,
|
|
"epoch": 0.12910986367281477,
|
|
"step": 161
|
|
},
|
|
{
|
|
"loss": 0.079,
|
|
"grad_norm": 0.6143490672111511,
|
|
"learning_rate": 0.00019663821436658604,
|
|
"epoch": 0.12991178829190056,
|
|
"step": 162
|
|
},
|
|
{
|
|
"loss": 0.0825,
|
|
"grad_norm": 0.4321056306362152,
|
|
"learning_rate": 0.0001965696534096587,
|
|
"epoch": 0.13071371291098638,
|
|
"step": 163
|
|
},
|
|
{
|
|
"loss": 0.0583,
|
|
"grad_norm": 0.5038022398948669,
|
|
"learning_rate": 0.00019650041256569792,
|
|
"epoch": 0.13151563753007217,
|
|
"step": 164
|
|
},
|
|
{
|
|
"loss": 0.0422,
|
|
"grad_norm": 0.31969794631004333,
|
|
"learning_rate": 0.00019643049232218553,
|
|
"epoch": 0.132317562149158,
|
|
"step": 165
|
|
},
|
|
{
|
|
"loss": 0.0574,
|
|
"grad_norm": 0.33682748675346375,
|
|
"learning_rate": 0.00019635989317138666,
|
|
"epoch": 0.13311948676824378,
|
|
"step": 166
|
|
},
|
|
{
|
|
"loss": 0.1009,
|
|
"grad_norm": 0.8040818572044373,
|
|
"learning_rate": 0.0001962886156103461,
|
|
"epoch": 0.1339214113873296,
|
|
"step": 167
|
|
},
|
|
{
|
|
"loss": 0.0342,
|
|
"grad_norm": 0.3678284287452698,
|
|
"learning_rate": 0.00019621666014088494,
|
|
"epoch": 0.1347233360064154,
|
|
"step": 168
|
|
},
|
|
{
|
|
"loss": 0.1214,
|
|
"grad_norm": 0.5396429300308228,
|
|
"learning_rate": 0.00019614402726959705,
|
|
"epoch": 0.13552526062550121,
|
|
"step": 169
|
|
},
|
|
{
|
|
"loss": 0.0595,
|
|
"grad_norm": 0.46996235847473145,
|
|
"learning_rate": 0.0001960707175078454,
|
|
"epoch": 0.136327185244587,
|
|
"step": 170
|
|
},
|
|
{
|
|
"loss": 0.0714,
|
|
"grad_norm": 0.5289244055747986,
|
|
"learning_rate": 0.00019599673137175855,
|
|
"epoch": 0.13712910986367283,
|
|
"step": 171
|
|
},
|
|
{
|
|
"loss": 0.0443,
|
|
"grad_norm": 0.45096877217292786,
|
|
"learning_rate": 0.00019592206938222703,
|
|
"epoch": 0.13793103448275862,
|
|
"step": 172
|
|
},
|
|
{
|
|
"loss": 0.0552,
|
|
"grad_norm": 0.451477587223053,
|
|
"learning_rate": 0.00019584673206489954,
|
|
"epoch": 0.13873295910184444,
|
|
"step": 173
|
|
},
|
|
{
|
|
"loss": 0.0539,
|
|
"grad_norm": 0.37521955370903015,
|
|
"learning_rate": 0.00019577071995017945,
|
|
"epoch": 0.13953488372093023,
|
|
"step": 174
|
|
},
|
|
{
|
|
"loss": 0.0612,
|
|
"grad_norm": 0.34417349100112915,
|
|
"learning_rate": 0.0001956940335732209,
|
|
"epoch": 0.14033680834001605,
|
|
"step": 175
|
|
},
|
|
{
|
|
"loss": 0.0488,
|
|
"grad_norm": 0.30773675441741943,
|
|
"learning_rate": 0.00019561667347392508,
|
|
"epoch": 0.14113873295910184,
|
|
"step": 176
|
|
},
|
|
{
|
|
"loss": 0.0713,
|
|
"grad_norm": 0.633940577507019,
|
|
"learning_rate": 0.00019553864019693652,
|
|
"epoch": 0.14194065757818766,
|
|
"step": 177
|
|
},
|
|
{
|
|
"loss": 0.046,
|
|
"grad_norm": 0.5169980525970459,
|
|
"learning_rate": 0.00019545993429163913,
|
|
"epoch": 0.14274258219727345,
|
|
"step": 178
|
|
},
|
|
{
|
|
"loss": 0.1141,
|
|
"grad_norm": 0.6768614053726196,
|
|
"learning_rate": 0.0001953805563121523,
|
|
"epoch": 0.14354450681635927,
|
|
"step": 179
|
|
},
|
|
{
|
|
"loss": 0.0451,
|
|
"grad_norm": 0.4514835476875305,
|
|
"learning_rate": 0.0001953005068173272,
|
|
"epoch": 0.14434643143544507,
|
|
"step": 180
|
|
},
|
|
{
|
|
"loss": 0.0473,
|
|
"grad_norm": 0.4141976237297058,
|
|
"learning_rate": 0.00019521978637074267,
|
|
"epoch": 0.14514835605453089,
|
|
"step": 181
|
|
},
|
|
{
|
|
"loss": 0.0551,
|
|
"grad_norm": 0.5272877812385559,
|
|
"learning_rate": 0.0001951383955407013,
|
|
"epoch": 0.14595028067361668,
|
|
"step": 182
|
|
},
|
|
{
|
|
"loss": 0.0714,
|
|
"grad_norm": 0.9530414342880249,
|
|
"learning_rate": 0.00019505633490022546,
|
|
"epoch": 0.1467522052927025,
|
|
"step": 183
|
|
},
|
|
{
|
|
"loss": 0.1042,
|
|
"grad_norm": 0.8545625805854797,
|
|
"learning_rate": 0.0001949736050270532,
|
|
"epoch": 0.1475541299117883,
|
|
"step": 184
|
|
},
|
|
{
|
|
"loss": 0.0809,
|
|
"grad_norm": 0.4434387683868408,
|
|
"learning_rate": 0.00019489020650363426,
|
|
"epoch": 0.1483560545308741,
|
|
"step": 185
|
|
},
|
|
{
|
|
"loss": 0.0775,
|
|
"grad_norm": 0.6959827542304993,
|
|
"learning_rate": 0.00019480613991712588,
|
|
"epoch": 0.1491579791499599,
|
|
"step": 186
|
|
},
|
|
{
|
|
"loss": 0.0479,
|
|
"grad_norm": 0.6551741361618042,
|
|
"learning_rate": 0.00019472140585938882,
|
|
"epoch": 0.14995990376904572,
|
|
"step": 187
|
|
},
|
|
{
|
|
"loss": 0.0719,
|
|
"grad_norm": 0.6390430331230164,
|
|
"learning_rate": 0.00019463600492698296,
|
|
"epoch": 0.1507618283881315,
|
|
"step": 188
|
|
},
|
|
{
|
|
"loss": 0.0513,
|
|
"grad_norm": 0.4449121356010437,
|
|
"learning_rate": 0.00019454993772116336,
|
|
"epoch": 0.15156375300721733,
|
|
"step": 189
|
|
},
|
|
{
|
|
"loss": 0.0419,
|
|
"grad_norm": 0.3439355492591858,
|
|
"learning_rate": 0.00019446320484787575,
|
|
"epoch": 0.15236567762630313,
|
|
"step": 190
|
|
},
|
|
{
|
|
"loss": 0.0594,
|
|
"grad_norm": 0.3776263892650604,
|
|
"learning_rate": 0.00019437580691775258,
|
|
"epoch": 0.15316760224538895,
|
|
"step": 191
|
|
},
|
|
{
|
|
"loss": 0.0544,
|
|
"grad_norm": 0.5031057000160217,
|
|
"learning_rate": 0.00019428774454610843,
|
|
"epoch": 0.15396952686447474,
|
|
"step": 192
|
|
},
|
|
{
|
|
"loss": 0.0451,
|
|
"grad_norm": 0.33334505558013916,
|
|
"learning_rate": 0.00019419901835293583,
|
|
"epoch": 0.15477145148356056,
|
|
"step": 193
|
|
},
|
|
{
|
|
"loss": 0.0433,
|
|
"grad_norm": 0.28815487027168274,
|
|
"learning_rate": 0.0001941096289629009,
|
|
"epoch": 0.15557337610264635,
|
|
"step": 194
|
|
},
|
|
{
|
|
"loss": 0.0648,
|
|
"grad_norm": 0.5981271266937256,
|
|
"learning_rate": 0.00019401957700533888,
|
|
"epoch": 0.15637530072173217,
|
|
"step": 195
|
|
},
|
|
{
|
|
"loss": 0.0469,
|
|
"grad_norm": 0.3985323905944824,
|
|
"learning_rate": 0.00019392886311424973,
|
|
"epoch": 0.15717722534081796,
|
|
"step": 196
|
|
},
|
|
{
|
|
"loss": 0.0467,
|
|
"grad_norm": 0.3379111886024475,
|
|
"learning_rate": 0.00019383748792829372,
|
|
"epoch": 0.15797914995990378,
|
|
"step": 197
|
|
},
|
|
{
|
|
"loss": 0.0467,
|
|
"grad_norm": 0.4567181468009949,
|
|
"learning_rate": 0.00019374545209078687,
|
|
"epoch": 0.15878107457898957,
|
|
"step": 198
|
|
},
|
|
{
|
|
"loss": 0.0494,
|
|
"grad_norm": 0.38965797424316406,
|
|
"learning_rate": 0.0001936527562496964,
|
|
"epoch": 0.1595829991980754,
|
|
"step": 199
|
|
},
|
|
{
|
|
"loss": 0.0552,
|
|
"grad_norm": 0.5935775637626648,
|
|
"learning_rate": 0.0001935594010576362,
|
|
"epoch": 0.16038492381716118,
|
|
"step": 200
|
|
},
|
|
{
|
|
"loss": 0.0558,
|
|
"grad_norm": 0.4197022318840027,
|
|
"learning_rate": 0.0001934653871718624,
|
|
"epoch": 0.161186848436247,
|
|
"step": 201
|
|
},
|
|
{
|
|
"loss": 0.0582,
|
|
"grad_norm": 0.46609750390052795,
|
|
"learning_rate": 0.0001933707152542683,
|
|
"epoch": 0.1619887730553328,
|
|
"step": 202
|
|
},
|
|
{
|
|
"loss": 0.0686,
|
|
"grad_norm": 0.6024537086486816,
|
|
"learning_rate": 0.00019327538597138029,
|
|
"epoch": 0.16279069767441862,
|
|
"step": 203
|
|
},
|
|
{
|
|
"loss": 0.0394,
|
|
"grad_norm": 0.36054351925849915,
|
|
"learning_rate": 0.0001931793999943526,
|
|
"epoch": 0.1635926222935044,
|
|
"step": 204
|
|
},
|
|
{
|
|
"loss": 0.0756,
|
|
"grad_norm": 0.6744378805160522,
|
|
"learning_rate": 0.0001930827579989631,
|
|
"epoch": 0.16439454691259023,
|
|
"step": 205
|
|
},
|
|
{
|
|
"loss": 0.0492,
|
|
"grad_norm": 0.3806591331958771,
|
|
"learning_rate": 0.00019298546066560802,
|
|
"epoch": 0.16519647153167602,
|
|
"step": 206
|
|
},
|
|
{
|
|
"loss": 0.0189,
|
|
"grad_norm": 0.30359482765197754,
|
|
"learning_rate": 0.00019288750867929756,
|
|
"epoch": 0.16599839615076184,
|
|
"step": 207
|
|
},
|
|
{
|
|
"loss": 0.0396,
|
|
"grad_norm": 0.4475793242454529,
|
|
"learning_rate": 0.00019278890272965096,
|
|
"epoch": 0.16680032076984763,
|
|
"step": 208
|
|
},
|
|
{
|
|
"loss": 0.0935,
|
|
"grad_norm": 0.6485787630081177,
|
|
"learning_rate": 0.00019268964351089148,
|
|
"epoch": 0.16760224538893345,
|
|
"step": 209
|
|
},
|
|
{
|
|
"loss": 0.0615,
|
|
"grad_norm": 0.4646718502044678,
|
|
"learning_rate": 0.00019258973172184174,
|
|
"epoch": 0.16840417000801924,
|
|
"step": 210
|
|
},
|
|
{
|
|
"loss": 0.0414,
|
|
"grad_norm": 0.49095892906188965,
|
|
"learning_rate": 0.0001924891680659187,
|
|
"epoch": 0.16920609462710506,
|
|
"step": 211
|
|
},
|
|
{
|
|
"loss": 0.0265,
|
|
"grad_norm": 0.835785984992981,
|
|
"learning_rate": 0.0001923879532511287,
|
|
"epoch": 0.17000801924619086,
|
|
"step": 212
|
|
},
|
|
{
|
|
"loss": 0.0696,
|
|
"grad_norm": 0.43860942125320435,
|
|
"learning_rate": 0.0001922860879900624,
|
|
"epoch": 0.17080994386527668,
|
|
"step": 213
|
|
},
|
|
{
|
|
"loss": 0.0578,
|
|
"grad_norm": 0.5574386119842529,
|
|
"learning_rate": 0.00019218357299988998,
|
|
"epoch": 0.17161186848436247,
|
|
"step": 214
|
|
},
|
|
{
|
|
"loss": 0.0454,
|
|
"grad_norm": 0.3218346834182739,
|
|
"learning_rate": 0.0001920804090023559,
|
|
"epoch": 0.1724137931034483,
|
|
"step": 215
|
|
},
|
|
{
|
|
"loss": 0.0453,
|
|
"grad_norm": 0.4190017879009247,
|
|
"learning_rate": 0.0001919765967237739,
|
|
"epoch": 0.17321571772253408,
|
|
"step": 216
|
|
},
|
|
{
|
|
"loss": 0.0449,
|
|
"grad_norm": 0.35313868522644043,
|
|
"learning_rate": 0.00019187213689502176,
|
|
"epoch": 0.1740176423416199,
|
|
"step": 217
|
|
},
|
|
{
|
|
"loss": 0.0813,
|
|
"grad_norm": 0.44302183389663696,
|
|
"learning_rate": 0.00019176703025153643,
|
|
"epoch": 0.1748195669607057,
|
|
"step": 218
|
|
},
|
|
{
|
|
"loss": 0.0276,
|
|
"grad_norm": 0.2679917812347412,
|
|
"learning_rate": 0.00019166127753330857,
|
|
"epoch": 0.1756214915797915,
|
|
"step": 219
|
|
},
|
|
{
|
|
"loss": 0.0323,
|
|
"grad_norm": 0.2973562777042389,
|
|
"learning_rate": 0.00019155487948487748,
|
|
"epoch": 0.1764234161988773,
|
|
"step": 220
|
|
},
|
|
{
|
|
"loss": 0.044,
|
|
"grad_norm": 0.3215465247631073,
|
|
"learning_rate": 0.00019144783685532578,
|
|
"epoch": 0.17722534081796312,
|
|
"step": 221
|
|
},
|
|
{
|
|
"loss": 0.0353,
|
|
"grad_norm": 0.3549197018146515,
|
|
"learning_rate": 0.00019134015039827431,
|
|
"epoch": 0.17802726543704891,
|
|
"step": 222
|
|
},
|
|
{
|
|
"loss": 0.051,
|
|
"grad_norm": 0.42532142996788025,
|
|
"learning_rate": 0.00019123182087187656,
|
|
"epoch": 0.17882919005613473,
|
|
"step": 223
|
|
},
|
|
{
|
|
"loss": 0.0479,
|
|
"grad_norm": 0.39455631375312805,
|
|
"learning_rate": 0.0001911228490388136,
|
|
"epoch": 0.17963111467522053,
|
|
"step": 224
|
|
},
|
|
{
|
|
"loss": 0.0573,
|
|
"grad_norm": 0.45477625727653503,
|
|
"learning_rate": 0.00019101323566628843,
|
|
"epoch": 0.18043303929430635,
|
|
"step": 225
|
|
},
|
|
{
|
|
"loss": 0.0342,
|
|
"grad_norm": 0.2890731692314148,
|
|
"learning_rate": 0.0001909029815260209,
|
|
"epoch": 0.18123496391339214,
|
|
"step": 226
|
|
},
|
|
{
|
|
"loss": 0.0361,
|
|
"grad_norm": 0.34329336881637573,
|
|
"learning_rate": 0.00019079208739424197,
|
|
"epoch": 0.18203688853247796,
|
|
"step": 227
|
|
},
|
|
{
|
|
"loss": 0.0714,
|
|
"grad_norm": 0.6790952682495117,
|
|
"learning_rate": 0.0001906805540516885,
|
|
"epoch": 0.18283881315156375,
|
|
"step": 228
|
|
},
|
|
{
|
|
"loss": 0.032,
|
|
"grad_norm": 0.26758837699890137,
|
|
"learning_rate": 0.00019056838228359753,
|
|
"epoch": 0.18364073777064957,
|
|
"step": 229
|
|
},
|
|
{
|
|
"loss": 0.0401,
|
|
"grad_norm": 0.5012450218200684,
|
|
"learning_rate": 0.0001904555728797009,
|
|
"epoch": 0.18444266238973536,
|
|
"step": 230
|
|
},
|
|
{
|
|
"loss": 0.0301,
|
|
"grad_norm": 0.28922465443611145,
|
|
"learning_rate": 0.00019034212663421969,
|
|
"epoch": 0.18524458700882118,
|
|
"step": 231
|
|
},
|
|
{
|
|
"loss": 0.0639,
|
|
"grad_norm": 0.44703420996665955,
|
|
"learning_rate": 0.00019022804434585852,
|
|
"epoch": 0.18604651162790697,
|
|
"step": 232
|
|
},
|
|
{
|
|
"loss": 0.0446,
|
|
"grad_norm": 0.25191769003868103,
|
|
"learning_rate": 0.00019011332681780006,
|
|
"epoch": 0.1868484362469928,
|
|
"step": 233
|
|
},
|
|
{
|
|
"loss": 0.0623,
|
|
"grad_norm": 0.46206915378570557,
|
|
"learning_rate": 0.00018999797485769925,
|
|
"epoch": 0.18765036086607859,
|
|
"step": 234
|
|
},
|
|
{
|
|
"loss": 0.0379,
|
|
"grad_norm": 0.25015994906425476,
|
|
"learning_rate": 0.0001898819892776777,
|
|
"epoch": 0.1884522854851644,
|
|
"step": 235
|
|
},
|
|
{
|
|
"loss": 0.0602,
|
|
"grad_norm": 0.3543962836265564,
|
|
"learning_rate": 0.0001897653708943179,
|
|
"epoch": 0.1892542101042502,
|
|
"step": 236
|
|
},
|
|
{
|
|
"loss": 0.0483,
|
|
"grad_norm": 0.29529276490211487,
|
|
"learning_rate": 0.00018964812052865764,
|
|
"epoch": 0.19005613472333602,
|
|
"step": 237
|
|
},
|
|
{
|
|
"loss": 0.1344,
|
|
"grad_norm": 0.5841286778450012,
|
|
"learning_rate": 0.00018953023900618397,
|
|
"epoch": 0.1908580593424218,
|
|
"step": 238
|
|
},
|
|
{
|
|
"loss": 0.0651,
|
|
"grad_norm": 0.45970141887664795,
|
|
"learning_rate": 0.00018941172715682757,
|
|
"epoch": 0.19165998396150763,
|
|
"step": 239
|
|
},
|
|
{
|
|
"loss": 0.0552,
|
|
"grad_norm": 0.4103776514530182,
|
|
"learning_rate": 0.00018929258581495685,
|
|
"epoch": 0.19246190858059342,
|
|
"step": 240
|
|
},
|
|
{
|
|
"loss": 0.0525,
|
|
"grad_norm": 0.3026215434074402,
|
|
"learning_rate": 0.00018917281581937214,
|
|
"epoch": 0.19326383319967924,
|
|
"step": 241
|
|
},
|
|
{
|
|
"loss": 0.0343,
|
|
"grad_norm": 0.28369593620300293,
|
|
"learning_rate": 0.00018905241801329972,
|
|
"epoch": 0.19406575781876503,
|
|
"step": 242
|
|
},
|
|
{
|
|
"loss": 0.05,
|
|
"grad_norm": 0.36326268315315247,
|
|
"learning_rate": 0.00018893139324438577,
|
|
"epoch": 0.19486768243785085,
|
|
"step": 243
|
|
},
|
|
{
|
|
"loss": 0.0539,
|
|
"grad_norm": 0.38554129004478455,
|
|
"learning_rate": 0.0001888097423646907,
|
|
"epoch": 0.19566960705693665,
|
|
"step": 244
|
|
},
|
|
{
|
|
"loss": 0.0202,
|
|
"grad_norm": 0.16676409542560577,
|
|
"learning_rate": 0.00018868746623068293,
|
|
"epoch": 0.19647153167602247,
|
|
"step": 245
|
|
},
|
|
{
|
|
"loss": 0.0282,
|
|
"grad_norm": 0.37308400869369507,
|
|
"learning_rate": 0.00018856456570323277,
|
|
"epoch": 0.19727345629510826,
|
|
"step": 246
|
|
},
|
|
{
|
|
"loss": 0.0517,
|
|
"grad_norm": 0.38957253098487854,
|
|
"learning_rate": 0.0001884410416476067,
|
|
"epoch": 0.19807538091419408,
|
|
"step": 247
|
|
},
|
|
{
|
|
"loss": 0.0236,
|
|
"grad_norm": 0.33424702286720276,
|
|
"learning_rate": 0.00018831689493346095,
|
|
"epoch": 0.19887730553327987,
|
|
"step": 248
|
|
},
|
|
{
|
|
"eval_loss": 0.056792400777339935,
|
|
"eval_runtime": 32.4803,
|
|
"eval_samples_per_second": 32.327,
|
|
"eval_steps_per_second": 8.097,
|
|
"epoch": 0.19887730553327987,
|
|
"step": 248
|
|
},
|
|
{
|
|
"loss": 0.0303,
|
|
"grad_norm": 0.308700829744339,
|
|
"learning_rate": 0.0001881921264348355,
|
|
"epoch": 0.1996792301523657,
|
|
"step": 249
|
|
},
|
|
{
|
|
"loss": 0.0619,
|
|
"grad_norm": 0.31528714299201965,
|
|
"learning_rate": 0.00018806673703014804,
|
|
"epoch": 0.20048115477145148,
|
|
"step": 250
|
|
},
|
|
{
|
|
"loss": 0.0917,
|
|
"grad_norm": 0.6735418438911438,
|
|
"learning_rate": 0.00018794072760218753,
|
|
"epoch": 0.2012830793905373,
|
|
"step": 251
|
|
},
|
|
{
|
|
"loss": 0.0882,
|
|
"grad_norm": 0.793260931968689,
|
|
"learning_rate": 0.00018781409903810821,
|
|
"epoch": 0.2020850040096231,
|
|
"step": 252
|
|
},
|
|
{
|
|
"loss": 0.0455,
|
|
"grad_norm": 0.39877378940582275,
|
|
"learning_rate": 0.0001876868522294233,
|
|
"epoch": 0.2028869286287089,
|
|
"step": 253
|
|
},
|
|
{
|
|
"loss": 0.0959,
|
|
"grad_norm": 0.931326687335968,
|
|
"learning_rate": 0.00018755898807199856,
|
|
"epoch": 0.2036888532477947,
|
|
"step": 254
|
|
},
|
|
{
|
|
"loss": 0.0983,
|
|
"grad_norm": 0.8125079274177551,
|
|
"learning_rate": 0.00018743050746604633,
|
|
"epoch": 0.20449077786688052,
|
|
"step": 255
|
|
},
|
|
{
|
|
"loss": 0.0523,
|
|
"grad_norm": 0.4794807434082031,
|
|
"learning_rate": 0.00018730141131611882,
|
|
"epoch": 0.20529270248596632,
|
|
"step": 256
|
|
},
|
|
{
|
|
"loss": 0.0311,
|
|
"grad_norm": 0.24229726195335388,
|
|
"learning_rate": 0.00018717170053110196,
|
|
"epoch": 0.20609462710505214,
|
|
"step": 257
|
|
},
|
|
{
|
|
"loss": 0.073,
|
|
"grad_norm": 0.3876228630542755,
|
|
"learning_rate": 0.0001870413760242089,
|
|
"epoch": 0.20689655172413793,
|
|
"step": 258
|
|
},
|
|
{
|
|
"loss": 0.0837,
|
|
"grad_norm": 0.3997587263584137,
|
|
"learning_rate": 0.0001869104387129737,
|
|
"epoch": 0.20769847634322375,
|
|
"step": 259
|
|
},
|
|
{
|
|
"loss": 0.0395,
|
|
"grad_norm": 0.3965926468372345,
|
|
"learning_rate": 0.00018677888951924474,
|
|
"epoch": 0.20850040096230954,
|
|
"step": 260
|
|
},
|
|
{
|
|
"loss": 0.0554,
|
|
"grad_norm": 0.4688607156276703,
|
|
"learning_rate": 0.00018664672936917828,
|
|
"epoch": 0.20930232558139536,
|
|
"step": 261
|
|
},
|
|
{
|
|
"loss": 0.1299,
|
|
"grad_norm": 0.9481903910636902,
|
|
"learning_rate": 0.00018651395919323202,
|
|
"epoch": 0.21010425020048115,
|
|
"step": 262
|
|
},
|
|
{
|
|
"loss": 0.0503,
|
|
"grad_norm": 0.29419174790382385,
|
|
"learning_rate": 0.00018638057992615838,
|
|
"epoch": 0.21090617481956697,
|
|
"step": 263
|
|
},
|
|
{
|
|
"loss": 0.0969,
|
|
"grad_norm": 0.5171178579330444,
|
|
"learning_rate": 0.00018624659250699805,
|
|
"epoch": 0.21170809943865276,
|
|
"step": 264
|
|
},
|
|
{
|
|
"loss": 0.0345,
|
|
"grad_norm": 0.3265765309333801,
|
|
"learning_rate": 0.00018611199787907338,
|
|
"epoch": 0.21251002405773858,
|
|
"step": 265
|
|
},
|
|
{
|
|
"loss": 0.0362,
|
|
"grad_norm": 0.33063560724258423,
|
|
"learning_rate": 0.00018597679698998163,
|
|
"epoch": 0.21331194867682438,
|
|
"step": 266
|
|
},
|
|
{
|
|
"loss": 0.0511,
|
|
"grad_norm": 0.660375714302063,
|
|
"learning_rate": 0.00018584099079158842,
|
|
"epoch": 0.2141138732959102,
|
|
"step": 267
|
|
},
|
|
{
|
|
"loss": 0.0815,
|
|
"grad_norm": 0.580894410610199,
|
|
"learning_rate": 0.00018570458024002093,
|
|
"epoch": 0.214915797914996,
|
|
"step": 268
|
|
},
|
|
{
|
|
"loss": 0.1139,
|
|
"grad_norm": 0.7811892032623291,
|
|
"learning_rate": 0.0001855675662956613,
|
|
"epoch": 0.2157177225340818,
|
|
"step": 269
|
|
},
|
|
{
|
|
"loss": 0.0454,
|
|
"grad_norm": 0.40047529339790344,
|
|
"learning_rate": 0.0001854299499231397,
|
|
"epoch": 0.2165196471531676,
|
|
"step": 270
|
|
},
|
|
{
|
|
"loss": 0.0691,
|
|
"grad_norm": 0.6034380197525024,
|
|
"learning_rate": 0.0001852917320913276,
|
|
"epoch": 0.21732157177225342,
|
|
"step": 271
|
|
},
|
|
{
|
|
"loss": 0.0392,
|
|
"grad_norm": 0.3567689061164856,
|
|
"learning_rate": 0.00018515291377333112,
|
|
"epoch": 0.2181234963913392,
|
|
"step": 272
|
|
},
|
|
{
|
|
"loss": 0.0426,
|
|
"grad_norm": 0.346510648727417,
|
|
"learning_rate": 0.00018501349594648395,
|
|
"epoch": 0.21892542101042503,
|
|
"step": 273
|
|
},
|
|
{
|
|
"loss": 0.0326,
|
|
"grad_norm": 0.5042007565498352,
|
|
"learning_rate": 0.0001848734795923404,
|
|
"epoch": 0.21972734562951082,
|
|
"step": 274
|
|
},
|
|
{
|
|
"loss": 0.0446,
|
|
"grad_norm": 0.5788083076477051,
|
|
"learning_rate": 0.0001847328656966689,
|
|
"epoch": 0.22052927024859664,
|
|
"step": 275
|
|
},
|
|
{
|
|
"loss": 0.0573,
|
|
"grad_norm": 0.3450917601585388,
|
|
"learning_rate": 0.0001845916552494446,
|
|
"epoch": 0.22133119486768243,
|
|
"step": 276
|
|
},
|
|
{
|
|
"loss": 0.0495,
|
|
"grad_norm": 0.36637723445892334,
|
|
"learning_rate": 0.00018444984924484277,
|
|
"epoch": 0.22213311948676825,
|
|
"step": 277
|
|
},
|
|
{
|
|
"loss": 0.0342,
|
|
"grad_norm": 0.24668528139591217,
|
|
"learning_rate": 0.00018430744868123145,
|
|
"epoch": 0.22293504410585405,
|
|
"step": 278
|
|
},
|
|
{
|
|
"loss": 0.0499,
|
|
"grad_norm": 0.49873360991477966,
|
|
"learning_rate": 0.0001841644545611647,
|
|
"epoch": 0.22373696872493987,
|
|
"step": 279
|
|
},
|
|
{
|
|
"loss": 0.1,
|
|
"grad_norm": 1.0184354782104492,
|
|
"learning_rate": 0.00018402086789137546,
|
|
"epoch": 0.22453889334402566,
|
|
"step": 280
|
|
},
|
|
{
|
|
"loss": 0.0401,
|
|
"grad_norm": 0.43986013531684875,
|
|
"learning_rate": 0.00018387668968276836,
|
|
"epoch": 0.22534081796311148,
|
|
"step": 281
|
|
},
|
|
{
|
|
"loss": 0.0451,
|
|
"grad_norm": 0.3782620131969452,
|
|
"learning_rate": 0.0001837319209504128,
|
|
"epoch": 0.22614274258219727,
|
|
"step": 282
|
|
},
|
|
{
|
|
"loss": 0.0494,
|
|
"grad_norm": 0.396990031003952,
|
|
"learning_rate": 0.00018358656271353559,
|
|
"epoch": 0.2269446672012831,
|
|
"step": 283
|
|
},
|
|
{
|
|
"loss": 0.0262,
|
|
"grad_norm": 0.34569329023361206,
|
|
"learning_rate": 0.00018344061599551398,
|
|
"epoch": 0.22774659182036888,
|
|
"step": 284
|
|
},
|
|
{
|
|
"loss": 0.049,
|
|
"grad_norm": 0.36551395058631897,
|
|
"learning_rate": 0.0001832940818238682,
|
|
"epoch": 0.2285485164394547,
|
|
"step": 285
|
|
},
|
|
{
|
|
"loss": 0.0493,
|
|
"grad_norm": 0.3316669166088104,
|
|
"learning_rate": 0.00018314696123025454,
|
|
"epoch": 0.2293504410585405,
|
|
"step": 286
|
|
},
|
|
{
|
|
"loss": 0.0974,
|
|
"grad_norm": 0.9379229545593262,
|
|
"learning_rate": 0.0001829992552504578,
|
|
"epoch": 0.2301523656776263,
|
|
"step": 287
|
|
},
|
|
{
|
|
"loss": 0.035,
|
|
"grad_norm": 0.4309346079826355,
|
|
"learning_rate": 0.00018285096492438424,
|
|
"epoch": 0.2309542902967121,
|
|
"step": 288
|
|
},
|
|
{
|
|
"loss": 0.0573,
|
|
"grad_norm": 0.47338199615478516,
|
|
"learning_rate": 0.00018270209129605397,
|
|
"epoch": 0.23175621491579793,
|
|
"step": 289
|
|
},
|
|
{
|
|
"loss": 0.026,
|
|
"grad_norm": 0.3351285457611084,
|
|
"learning_rate": 0.00018255263541359397,
|
|
"epoch": 0.23255813953488372,
|
|
"step": 290
|
|
},
|
|
{
|
|
"loss": 0.0291,
|
|
"grad_norm": 0.2552240192890167,
|
|
"learning_rate": 0.00018240259832923034,
|
|
"epoch": 0.23336006415396954,
|
|
"step": 291
|
|
},
|
|
{
|
|
"loss": 0.02,
|
|
"grad_norm": 0.23985892534255981,
|
|
"learning_rate": 0.00018225198109928114,
|
|
"epoch": 0.23416198877305533,
|
|
"step": 292
|
|
},
|
|
{
|
|
"loss": 0.0195,
|
|
"grad_norm": 0.26093894243240356,
|
|
"learning_rate": 0.00018210078478414894,
|
|
"epoch": 0.23496391339214115,
|
|
"step": 293
|
|
},
|
|
{
|
|
"loss": 0.0669,
|
|
"grad_norm": 0.5353745222091675,
|
|
"learning_rate": 0.00018194901044831313,
|
|
"epoch": 0.23576583801122694,
|
|
"step": 294
|
|
},
|
|
{
|
|
"loss": 0.0193,
|
|
"grad_norm": 0.25396963953971863,
|
|
"learning_rate": 0.00018179665916032273,
|
|
"epoch": 0.23656776263031276,
|
|
"step": 295
|
|
},
|
|
{
|
|
"loss": 0.0656,
|
|
"grad_norm": 0.3989141881465912,
|
|
"learning_rate": 0.00018164373199278856,
|
|
"epoch": 0.23736968724939855,
|
|
"step": 296
|
|
},
|
|
{
|
|
"loss": 0.0297,
|
|
"grad_norm": 0.31333601474761963,
|
|
"learning_rate": 0.00018149023002237612,
|
|
"epoch": 0.23817161186848437,
|
|
"step": 297
|
|
},
|
|
{
|
|
"loss": 0.0471,
|
|
"grad_norm": 0.8698596954345703,
|
|
"learning_rate": 0.00018133615432979744,
|
|
"epoch": 0.23897353648757017,
|
|
"step": 298
|
|
},
|
|
{
|
|
"loss": 0.081,
|
|
"grad_norm": 0.46993565559387207,
|
|
"learning_rate": 0.00018118150599980397,
|
|
"epoch": 0.23977546110665598,
|
|
"step": 299
|
|
},
|
|
{
|
|
"loss": 0.049,
|
|
"grad_norm": 0.5059134364128113,
|
|
"learning_rate": 0.00018102628612117865,
|
|
"epoch": 0.24057738572574178,
|
|
"step": 300
|
|
},
|
|
{
|
|
"loss": 0.0653,
|
|
"grad_norm": 0.5506439805030823,
|
|
"learning_rate": 0.00018087049578672845,
|
|
"epoch": 0.2413793103448276,
|
|
"step": 301
|
|
},
|
|
{
|
|
"loss": 0.0554,
|
|
"grad_norm": 0.5644898414611816,
|
|
"learning_rate": 0.00018071413609327638,
|
|
"epoch": 0.2421812349639134,
|
|
"step": 302
|
|
},
|
|
{
|
|
"loss": 0.0536,
|
|
"grad_norm": 0.4158555269241333,
|
|
"learning_rate": 0.00018055720814165414,
|
|
"epoch": 0.2429831595829992,
|
|
"step": 303
|
|
},
|
|
{
|
|
"loss": 0.0593,
|
|
"grad_norm": 0.4376695454120636,
|
|
"learning_rate": 0.00018039971303669407,
|
|
"epoch": 0.243785084202085,
|
|
"step": 304
|
|
},
|
|
{
|
|
"loss": 0.0698,
|
|
"grad_norm": 0.5507003664970398,
|
|
"learning_rate": 0.00018024165188722151,
|
|
"epoch": 0.24458700882117082,
|
|
"step": 305
|
|
},
|
|
{
|
|
"loss": 0.0219,
|
|
"grad_norm": 0.25363439321517944,
|
|
"learning_rate": 0.000180083025806047,
|
|
"epoch": 0.2453889334402566,
|
|
"step": 306
|
|
},
|
|
{
|
|
"loss": 0.0134,
|
|
"grad_norm": 0.2080700397491455,
|
|
"learning_rate": 0.00017992383590995838,
|
|
"epoch": 0.24619085805934243,
|
|
"step": 307
|
|
},
|
|
{
|
|
"loss": 0.0656,
|
|
"grad_norm": 0.421975702047348,
|
|
"learning_rate": 0.00017976408331971298,
|
|
"epoch": 0.24699278267842822,
|
|
"step": 308
|
|
},
|
|
{
|
|
"loss": 0.027,
|
|
"grad_norm": 0.3046298921108246,
|
|
"learning_rate": 0.00017960376916002972,
|
|
"epoch": 0.24779470729751404,
|
|
"step": 309
|
|
},
|
|
{
|
|
"loss": 0.0534,
|
|
"grad_norm": 0.3668377995491028,
|
|
"learning_rate": 0.00017944289455958112,
|
|
"epoch": 0.24859663191659984,
|
|
"step": 310
|
|
},
|
|
{
|
|
"loss": 0.0553,
|
|
"grad_norm": 0.4287368059158325,
|
|
"learning_rate": 0.0001792814606509855,
|
|
"epoch": 0.24939855653568566,
|
|
"step": 311
|
|
},
|
|
{
|
|
"loss": 0.0487,
|
|
"grad_norm": 0.370373010635376,
|
|
"learning_rate": 0.00017911946857079888,
|
|
"epoch": 0.25020048115477145,
|
|
"step": 312
|
|
},
|
|
{
|
|
"loss": 0.0422,
|
|
"grad_norm": 0.4405466616153717,
|
|
"learning_rate": 0.00017895691945950696,
|
|
"epoch": 0.25100240577385724,
|
|
"step": 313
|
|
},
|
|
{
|
|
"loss": 0.1081,
|
|
"grad_norm": 0.6124715805053711,
|
|
"learning_rate": 0.0001787938144615173,
|
|
"epoch": 0.2518043303929431,
|
|
"step": 314
|
|
},
|
|
{
|
|
"loss": 0.0403,
|
|
"grad_norm": 0.22574079036712646,
|
|
"learning_rate": 0.000178630154725151,
|
|
"epoch": 0.2526062550120289,
|
|
"step": 315
|
|
},
|
|
{
|
|
"loss": 0.1046,
|
|
"grad_norm": 0.5598015189170837,
|
|
"learning_rate": 0.00017846594140263474,
|
|
"epoch": 0.25340817963111467,
|
|
"step": 316
|
|
},
|
|
{
|
|
"loss": 0.0544,
|
|
"grad_norm": 0.3449535667896271,
|
|
"learning_rate": 0.0001783011756500927,
|
|
"epoch": 0.25421010425020046,
|
|
"step": 317
|
|
},
|
|
{
|
|
"loss": 0.058,
|
|
"grad_norm": 0.40914788842201233,
|
|
"learning_rate": 0.0001781358586275383,
|
|
"epoch": 0.2550120288692863,
|
|
"step": 318
|
|
},
|
|
{
|
|
"loss": 0.1132,
|
|
"grad_norm": 0.7423124313354492,
|
|
"learning_rate": 0.0001779699914988662,
|
|
"epoch": 0.2558139534883721,
|
|
"step": 319
|
|
},
|
|
{
|
|
"loss": 0.0601,
|
|
"grad_norm": 0.6021925210952759,
|
|
"learning_rate": 0.00017780357543184397,
|
|
"epoch": 0.2566158781074579,
|
|
"step": 320
|
|
},
|
|
{
|
|
"loss": 0.1014,
|
|
"grad_norm": 0.48059457540512085,
|
|
"learning_rate": 0.0001776366115981039,
|
|
"epoch": 0.2574178027265437,
|
|
"step": 321
|
|
},
|
|
{
|
|
"loss": 0.0548,
|
|
"grad_norm": 0.5897157788276672,
|
|
"learning_rate": 0.00017746910117313482,
|
|
"epoch": 0.25821972734562953,
|
|
"step": 322
|
|
},
|
|
{
|
|
"loss": 0.0483,
|
|
"grad_norm": 0.36229458451271057,
|
|
"learning_rate": 0.0001773010453362737,
|
|
"epoch": 0.2590216519647153,
|
|
"step": 323
|
|
},
|
|
{
|
|
"loss": 0.0632,
|
|
"grad_norm": 0.49136513471603394,
|
|
"learning_rate": 0.0001771324452706975,
|
|
"epoch": 0.2598235765838011,
|
|
"step": 324
|
|
},
|
|
{
|
|
"loss": 0.0324,
|
|
"grad_norm": 0.6286053657531738,
|
|
"learning_rate": 0.00017696330216341463,
|
|
"epoch": 0.2606255012028869,
|
|
"step": 325
|
|
},
|
|
{
|
|
"loss": 0.054,
|
|
"grad_norm": 0.49283909797668457,
|
|
"learning_rate": 0.0001767936172052569,
|
|
"epoch": 0.26142742582197276,
|
|
"step": 326
|
|
},
|
|
{
|
|
"loss": 0.0256,
|
|
"grad_norm": 0.2010183483362198,
|
|
"learning_rate": 0.00017662339159087078,
|
|
"epoch": 0.26222935044105855,
|
|
"step": 327
|
|
},
|
|
{
|
|
"loss": 0.0451,
|
|
"grad_norm": 0.39567244052886963,
|
|
"learning_rate": 0.00017645262651870926,
|
|
"epoch": 0.26303127506014434,
|
|
"step": 328
|
|
},
|
|
{
|
|
"loss": 0.1059,
|
|
"grad_norm": 0.5877751708030701,
|
|
"learning_rate": 0.00017628132319102332,
|
|
"epoch": 0.26383319967923013,
|
|
"step": 329
|
|
},
|
|
{
|
|
"loss": 0.0314,
|
|
"grad_norm": 0.28202834725379944,
|
|
"learning_rate": 0.0001761094828138534,
|
|
"epoch": 0.264635124298316,
|
|
"step": 330
|
|
},
|
|
{
|
|
"loss": 0.0241,
|
|
"grad_norm": 0.3100980222225189,
|
|
"learning_rate": 0.00017593710659702104,
|
|
"epoch": 0.2654370489174018,
|
|
"step": 331
|
|
},
|
|
{
|
|
"loss": 0.0515,
|
|
"grad_norm": 0.28590792417526245,
|
|
"learning_rate": 0.0001757641957541203,
|
|
"epoch": 0.26623897353648757,
|
|
"step": 332
|
|
},
|
|
{
|
|
"loss": 0.0691,
|
|
"grad_norm": 0.7025532126426697,
|
|
"learning_rate": 0.0001755907515025091,
|
|
"epoch": 0.26704089815557336,
|
|
"step": 333
|
|
},
|
|
{
|
|
"loss": 0.0398,
|
|
"grad_norm": 0.3652035593986511,
|
|
"learning_rate": 0.0001754167750633009,
|
|
"epoch": 0.2678428227746592,
|
|
"step": 334
|
|
},
|
|
{
|
|
"loss": 0.0597,
|
|
"grad_norm": 0.41364148259162903,
|
|
"learning_rate": 0.00017524226766135588,
|
|
"epoch": 0.268644747393745,
|
|
"step": 335
|
|
},
|
|
{
|
|
"loss": 0.0582,
|
|
"grad_norm": 0.3338804244995117,
|
|
"learning_rate": 0.00017506723052527242,
|
|
"epoch": 0.2694466720128308,
|
|
"step": 336
|
|
},
|
|
{
|
|
"loss": 0.0833,
|
|
"grad_norm": 0.9465529322624207,
|
|
"learning_rate": 0.00017489166488737846,
|
|
"epoch": 0.2702485966319166,
|
|
"step": 337
|
|
},
|
|
{
|
|
"loss": 0.0848,
|
|
"grad_norm": 0.44553694128990173,
|
|
"learning_rate": 0.00017471557198372274,
|
|
"epoch": 0.27105052125100243,
|
|
"step": 338
|
|
},
|
|
{
|
|
"loss": 0.1273,
|
|
"grad_norm": 1.1346548795700073,
|
|
"learning_rate": 0.00017453895305406616,
|
|
"epoch": 0.2718524458700882,
|
|
"step": 339
|
|
},
|
|
{
|
|
"loss": 0.0736,
|
|
"grad_norm": 0.4856693744659424,
|
|
"learning_rate": 0.00017436180934187308,
|
|
"epoch": 0.272654370489174,
|
|
"step": 340
|
|
},
|
|
{
|
|
"loss": 0.0296,
|
|
"grad_norm": 0.27393412590026855,
|
|
"learning_rate": 0.0001741841420943025,
|
|
"epoch": 0.2734562951082598,
|
|
"step": 341
|
|
},
|
|
{
|
|
"loss": 0.0504,
|
|
"grad_norm": 0.3282850384712219,
|
|
"learning_rate": 0.00017400595256219928,
|
|
"epoch": 0.27425821972734565,
|
|
"step": 342
|
|
},
|
|
{
|
|
"loss": 0.0501,
|
|
"grad_norm": 0.3622792363166809,
|
|
"learning_rate": 0.00017382724200008546,
|
|
"epoch": 0.27506014434643145,
|
|
"step": 343
|
|
},
|
|
{
|
|
"loss": 0.0875,
|
|
"grad_norm": 0.5967736840248108,
|
|
"learning_rate": 0.00017364801166615124,
|
|
"epoch": 0.27586206896551724,
|
|
"step": 344
|
|
},
|
|
{
|
|
"loss": 0.0797,
|
|
"grad_norm": 0.665009617805481,
|
|
"learning_rate": 0.0001734682628222462,
|
|
"epoch": 0.27666399358460303,
|
|
"step": 345
|
|
},
|
|
{
|
|
"loss": 0.0367,
|
|
"grad_norm": 0.31664225459098816,
|
|
"learning_rate": 0.0001732879967338705,
|
|
"epoch": 0.2774659182036889,
|
|
"step": 346
|
|
},
|
|
{
|
|
"loss": 0.078,
|
|
"grad_norm": 0.613771915435791,
|
|
"learning_rate": 0.00017310721467016587,
|
|
"epoch": 0.27826784282277467,
|
|
"step": 347
|
|
},
|
|
{
|
|
"loss": 0.0309,
|
|
"grad_norm": 0.29217079281806946,
|
|
"learning_rate": 0.00017292591790390665,
|
|
"epoch": 0.27906976744186046,
|
|
"step": 348
|
|
},
|
|
{
|
|
"loss": 0.0147,
|
|
"grad_norm": 0.1654537171125412,
|
|
"learning_rate": 0.00017274410771149094,
|
|
"epoch": 0.27987169206094625,
|
|
"step": 349
|
|
},
|
|
{
|
|
"loss": 0.0372,
|
|
"grad_norm": 0.2641878128051758,
|
|
"learning_rate": 0.0001725617853729316,
|
|
"epoch": 0.2806736166800321,
|
|
"step": 350
|
|
},
|
|
{
|
|
"loss": 0.0438,
|
|
"grad_norm": 0.4984488785266876,
|
|
"learning_rate": 0.00017237895217184703,
|
|
"epoch": 0.2814755412991179,
|
|
"step": 351
|
|
},
|
|
{
|
|
"loss": 0.0641,
|
|
"grad_norm": 0.4201189875602722,
|
|
"learning_rate": 0.00017219560939545246,
|
|
"epoch": 0.2822774659182037,
|
|
"step": 352
|
|
},
|
|
{
|
|
"loss": 0.0582,
|
|
"grad_norm": 0.3273194134235382,
|
|
"learning_rate": 0.00017201175833455066,
|
|
"epoch": 0.2830793905372895,
|
|
"step": 353
|
|
},
|
|
{
|
|
"loss": 0.0358,
|
|
"grad_norm": 0.2902083396911621,
|
|
"learning_rate": 0.0001718274002835229,
|
|
"epoch": 0.2838813151563753,
|
|
"step": 354
|
|
},
|
|
{
|
|
"loss": 0.0383,
|
|
"grad_norm": 0.1811976581811905,
|
|
"learning_rate": 0.00017164253654031986,
|
|
"epoch": 0.2846832397754611,
|
|
"step": 355
|
|
},
|
|
{
|
|
"loss": 0.0519,
|
|
"grad_norm": 0.4728938639163971,
|
|
"learning_rate": 0.00017145716840645254,
|
|
"epoch": 0.2854851643945469,
|
|
"step": 356
|
|
},
|
|
{
|
|
"loss": 0.0437,
|
|
"grad_norm": 0.48397713899612427,
|
|
"learning_rate": 0.00017127129718698297,
|
|
"epoch": 0.2862870890136327,
|
|
"step": 357
|
|
},
|
|
{
|
|
"loss": 0.0416,
|
|
"grad_norm": 0.3491261303424835,
|
|
"learning_rate": 0.0001710849241905151,
|
|
"epoch": 0.28708901363271855,
|
|
"step": 358
|
|
},
|
|
{
|
|
"loss": 0.0688,
|
|
"grad_norm": 0.4765617251396179,
|
|
"learning_rate": 0.00017089805072918567,
|
|
"epoch": 0.28789093825180434,
|
|
"step": 359
|
|
},
|
|
{
|
|
"loss": 0.0959,
|
|
"grad_norm": 0.7366757988929749,
|
|
"learning_rate": 0.00017071067811865476,
|
|
"epoch": 0.28869286287089013,
|
|
"step": 360
|
|
},
|
|
{
|
|
"loss": 0.033,
|
|
"grad_norm": 0.3149030804634094,
|
|
"learning_rate": 0.00017052280767809673,
|
|
"epoch": 0.2894947874899759,
|
|
"step": 361
|
|
},
|
|
{
|
|
"loss": 0.0458,
|
|
"grad_norm": 0.3187673091888428,
|
|
"learning_rate": 0.00017033444073019077,
|
|
"epoch": 0.29029671210906177,
|
|
"step": 362
|
|
},
|
|
{
|
|
"loss": 0.0446,
|
|
"grad_norm": 0.3986169099807739,
|
|
"learning_rate": 0.0001701455786011118,
|
|
"epoch": 0.29109863672814756,
|
|
"step": 363
|
|
},
|
|
{
|
|
"loss": 0.0341,
|
|
"grad_norm": 0.3107149302959442,
|
|
"learning_rate": 0.00016995622262052092,
|
|
"epoch": 0.29190056134723336,
|
|
"step": 364
|
|
},
|
|
{
|
|
"loss": 0.0392,
|
|
"grad_norm": 0.38037049770355225,
|
|
"learning_rate": 0.00016976637412155612,
|
|
"epoch": 0.29270248596631915,
|
|
"step": 365
|
|
},
|
|
{
|
|
"loss": 0.0406,
|
|
"grad_norm": 0.35384100675582886,
|
|
"learning_rate": 0.00016957603444082295,
|
|
"epoch": 0.293504410585405,
|
|
"step": 366
|
|
},
|
|
{
|
|
"loss": 0.0676,
|
|
"grad_norm": 0.6596208810806274,
|
|
"learning_rate": 0.000169385204918385,
|
|
"epoch": 0.2943063352044908,
|
|
"step": 367
|
|
},
|
|
{
|
|
"loss": 0.0496,
|
|
"grad_norm": 0.3856953978538513,
|
|
"learning_rate": 0.00016919388689775464,
|
|
"epoch": 0.2951082598235766,
|
|
"step": 368
|
|
},
|
|
{
|
|
"loss": 0.0541,
|
|
"grad_norm": 0.3974038362503052,
|
|
"learning_rate": 0.00016900208172588332,
|
|
"epoch": 0.29591018444266237,
|
|
"step": 369
|
|
},
|
|
{
|
|
"loss": 0.0419,
|
|
"grad_norm": 0.40319862961769104,
|
|
"learning_rate": 0.00016880979075315237,
|
|
"epoch": 0.2967121090617482,
|
|
"step": 370
|
|
},
|
|
{
|
|
"loss": 0.0565,
|
|
"grad_norm": 0.27359071373939514,
|
|
"learning_rate": 0.00016861701533336322,
|
|
"epoch": 0.297514033680834,
|
|
"step": 371
|
|
},
|
|
{
|
|
"loss": 0.0262,
|
|
"grad_norm": 0.351244181394577,
|
|
"learning_rate": 0.00016842375682372805,
|
|
"epoch": 0.2983159582999198,
|
|
"step": 372
|
|
},
|
|
{
|
|
"eval_loss": 0.05102652311325073,
|
|
"eval_runtime": 31.7718,
|
|
"eval_samples_per_second": 33.048,
|
|
"eval_steps_per_second": 8.278,
|
|
"epoch": 0.2983159582999198,
|
|
"step": 372
|
|
},
|
|
{
|
|
"loss": 0.0428,
|
|
"grad_norm": 0.42074060440063477,
|
|
"learning_rate": 0.00016823001658486012,
|
|
"epoch": 0.2991178829190056,
|
|
"step": 373
|
|
},
|
|
{
|
|
"loss": 0.0224,
|
|
"grad_norm": 0.2260231077671051,
|
|
"learning_rate": 0.00016803579598076432,
|
|
"epoch": 0.29991980753809144,
|
|
"step": 374
|
|
},
|
|
{
|
|
"loss": 0.0492,
|
|
"grad_norm": 0.47774842381477356,
|
|
"learning_rate": 0.0001678410963788275,
|
|
"epoch": 0.30072173215717724,
|
|
"step": 375
|
|
},
|
|
{
|
|
"loss": 0.0638,
|
|
"grad_norm": 0.5587054491043091,
|
|
"learning_rate": 0.0001676459191498087,
|
|
"epoch": 0.301523656776263,
|
|
"step": 376
|
|
},
|
|
{
|
|
"loss": 0.0707,
|
|
"grad_norm": 0.4895194172859192,
|
|
"learning_rate": 0.0001674502656678298,
|
|
"epoch": 0.3023255813953488,
|
|
"step": 377
|
|
},
|
|
{
|
|
"loss": 0.0279,
|
|
"grad_norm": 0.24737556278705597,
|
|
"learning_rate": 0.00016725413731036561,
|
|
"epoch": 0.30312750601443467,
|
|
"step": 378
|
|
},
|
|
{
|
|
"loss": 0.0305,
|
|
"grad_norm": 0.35510316491127014,
|
|
"learning_rate": 0.00016705753545823423,
|
|
"epoch": 0.30392943063352046,
|
|
"step": 379
|
|
},
|
|
{
|
|
"loss": 0.0204,
|
|
"grad_norm": 0.253121942281723,
|
|
"learning_rate": 0.00016686046149558736,
|
|
"epoch": 0.30473135525260625,
|
|
"step": 380
|
|
},
|
|
{
|
|
"loss": 0.0226,
|
|
"grad_norm": 0.25866273045539856,
|
|
"learning_rate": 0.00016666291680990055,
|
|
"epoch": 0.30553327987169204,
|
|
"step": 381
|
|
},
|
|
{
|
|
"loss": 0.0829,
|
|
"grad_norm": 0.4675450325012207,
|
|
"learning_rate": 0.00016646490279196343,
|
|
"epoch": 0.3063352044907779,
|
|
"step": 382
|
|
},
|
|
{
|
|
"loss": 0.0203,
|
|
"grad_norm": 0.30080100893974304,
|
|
"learning_rate": 0.00016626642083586985,
|
|
"epoch": 0.3071371291098637,
|
|
"step": 383
|
|
},
|
|
{
|
|
"loss": 0.0454,
|
|
"grad_norm": 0.5222088694572449,
|
|
"learning_rate": 0.00016606747233900815,
|
|
"epoch": 0.3079390537289495,
|
|
"step": 384
|
|
},
|
|
{
|
|
"loss": 0.0502,
|
|
"grad_norm": 0.32578209042549133,
|
|
"learning_rate": 0.00016586805870205134,
|
|
"epoch": 0.30874097834803527,
|
|
"step": 385
|
|
},
|
|
{
|
|
"loss": 0.0567,
|
|
"grad_norm": 0.3294476568698883,
|
|
"learning_rate": 0.0001656681813289471,
|
|
"epoch": 0.3095429029671211,
|
|
"step": 386
|
|
},
|
|
{
|
|
"loss": 0.0817,
|
|
"grad_norm": 0.7187215685844421,
|
|
"learning_rate": 0.0001654678416269081,
|
|
"epoch": 0.3103448275862069,
|
|
"step": 387
|
|
},
|
|
{
|
|
"loss": 0.0305,
|
|
"grad_norm": 0.31030380725860596,
|
|
"learning_rate": 0.0001652670410064019,
|
|
"epoch": 0.3111467522052927,
|
|
"step": 388
|
|
},
|
|
{
|
|
"loss": 0.0614,
|
|
"grad_norm": 0.5844921469688416,
|
|
"learning_rate": 0.00016506578088114107,
|
|
"epoch": 0.3119486768243785,
|
|
"step": 389
|
|
},
|
|
{
|
|
"loss": 0.0291,
|
|
"grad_norm": 0.2818225622177124,
|
|
"learning_rate": 0.00016486406266807345,
|
|
"epoch": 0.31275060144346434,
|
|
"step": 390
|
|
},
|
|
{
|
|
"loss": 0.1276,
|
|
"grad_norm": 0.6056419610977173,
|
|
"learning_rate": 0.0001646618877873717,
|
|
"epoch": 0.31355252606255013,
|
|
"step": 391
|
|
},
|
|
{
|
|
"loss": 0.0534,
|
|
"grad_norm": 0.36668699979782104,
|
|
"learning_rate": 0.00016445925766242391,
|
|
"epoch": 0.3143544506816359,
|
|
"step": 392
|
|
},
|
|
{
|
|
"loss": 0.031,
|
|
"grad_norm": 0.34223347902297974,
|
|
"learning_rate": 0.00016425617371982303,
|
|
"epoch": 0.3151563753007217,
|
|
"step": 393
|
|
},
|
|
{
|
|
"loss": 0.0862,
|
|
"grad_norm": 0.394709050655365,
|
|
"learning_rate": 0.00016405263738935718,
|
|
"epoch": 0.31595829991980756,
|
|
"step": 394
|
|
},
|
|
{
|
|
"loss": 0.0489,
|
|
"grad_norm": 0.41530197858810425,
|
|
"learning_rate": 0.00016384865010399935,
|
|
"epoch": 0.31676022453889335,
|
|
"step": 395
|
|
},
|
|
{
|
|
"loss": 0.1056,
|
|
"grad_norm": 0.450509637594223,
|
|
"learning_rate": 0.00016364421329989755,
|
|
"epoch": 0.31756214915797915,
|
|
"step": 396
|
|
},
|
|
{
|
|
"loss": 0.0511,
|
|
"grad_norm": 0.4890766441822052,
|
|
"learning_rate": 0.00016343932841636456,
|
|
"epoch": 0.31836407377706494,
|
|
"step": 397
|
|
},
|
|
{
|
|
"loss": 0.16,
|
|
"grad_norm": 0.6917940974235535,
|
|
"learning_rate": 0.00016323399689586768,
|
|
"epoch": 0.3191659983961508,
|
|
"step": 398
|
|
},
|
|
{
|
|
"loss": 0.0584,
|
|
"grad_norm": 0.4217245280742645,
|
|
"learning_rate": 0.00016302822018401884,
|
|
"epoch": 0.3199679230152366,
|
|
"step": 399
|
|
},
|
|
{
|
|
"loss": 0.0471,
|
|
"grad_norm": 0.33742472529411316,
|
|
"learning_rate": 0.00016282199972956425,
|
|
"epoch": 0.32076984763432237,
|
|
"step": 400
|
|
},
|
|
{
|
|
"loss": 0.0561,
|
|
"grad_norm": 0.30320796370506287,
|
|
"learning_rate": 0.00016261533698437418,
|
|
"epoch": 0.32157177225340816,
|
|
"step": 401
|
|
},
|
|
{
|
|
"loss": 0.0504,
|
|
"grad_norm": 0.41129252314567566,
|
|
"learning_rate": 0.00016240823340343285,
|
|
"epoch": 0.322373696872494,
|
|
"step": 402
|
|
},
|
|
{
|
|
"loss": 0.0191,
|
|
"grad_norm": 0.21539658308029175,
|
|
"learning_rate": 0.00016220069044482814,
|
|
"epoch": 0.3231756214915798,
|
|
"step": 403
|
|
},
|
|
{
|
|
"loss": 0.0846,
|
|
"grad_norm": 0.5003443360328674,
|
|
"learning_rate": 0.00016199270956974128,
|
|
"epoch": 0.3239775461106656,
|
|
"step": 404
|
|
},
|
|
{
|
|
"loss": 0.0821,
|
|
"grad_norm": 0.3936382532119751,
|
|
"learning_rate": 0.00016178429224243663,
|
|
"epoch": 0.3247794707297514,
|
|
"step": 405
|
|
},
|
|
{
|
|
"loss": 0.1342,
|
|
"grad_norm": 1.055274248123169,
|
|
"learning_rate": 0.00016157543993025134,
|
|
"epoch": 0.32558139534883723,
|
|
"step": 406
|
|
},
|
|
{
|
|
"loss": 0.0784,
|
|
"grad_norm": 0.33087801933288574,
|
|
"learning_rate": 0.00016136615410358493,
|
|
"epoch": 0.326383319967923,
|
|
"step": 407
|
|
},
|
|
{
|
|
"loss": 0.0415,
|
|
"grad_norm": 0.27356383204460144,
|
|
"learning_rate": 0.00016115643623588915,
|
|
"epoch": 0.3271852445870088,
|
|
"step": 408
|
|
},
|
|
{
|
|
"loss": 0.0449,
|
|
"grad_norm": 0.39037784934043884,
|
|
"learning_rate": 0.00016094628780365743,
|
|
"epoch": 0.3279871692060946,
|
|
"step": 409
|
|
},
|
|
{
|
|
"loss": 0.0643,
|
|
"grad_norm": 0.3727872967720032,
|
|
"learning_rate": 0.00016073571028641452,
|
|
"epoch": 0.32878909382518046,
|
|
"step": 410
|
|
},
|
|
{
|
|
"loss": 0.0366,
|
|
"grad_norm": 0.30508482456207275,
|
|
"learning_rate": 0.0001605247051667061,
|
|
"epoch": 0.32959101844426625,
|
|
"step": 411
|
|
},
|
|
{
|
|
"loss": 0.0384,
|
|
"grad_norm": 0.313531756401062,
|
|
"learning_rate": 0.00016031327393008845,
|
|
"epoch": 0.33039294306335204,
|
|
"step": 412
|
|
},
|
|
{
|
|
"loss": 0.034,
|
|
"grad_norm": 0.3675989806652069,
|
|
"learning_rate": 0.00016010141806511766,
|
|
"epoch": 0.33119486768243783,
|
|
"step": 413
|
|
},
|
|
{
|
|
"loss": 0.0383,
|
|
"grad_norm": 0.2861047685146332,
|
|
"learning_rate": 0.00015988913906333946,
|
|
"epoch": 0.3319967923015237,
|
|
"step": 414
|
|
},
|
|
{
|
|
"loss": 0.0232,
|
|
"grad_norm": 0.30425795912742615,
|
|
"learning_rate": 0.0001596764384192787,
|
|
"epoch": 0.33279871692060947,
|
|
"step": 415
|
|
},
|
|
{
|
|
"loss": 0.0582,
|
|
"grad_norm": 0.5757021307945251,
|
|
"learning_rate": 0.00015946331763042867,
|
|
"epoch": 0.33360064153969526,
|
|
"step": 416
|
|
},
|
|
{
|
|
"loss": 0.0341,
|
|
"grad_norm": 0.2700221538543701,
|
|
"learning_rate": 0.00015924977819724068,
|
|
"epoch": 0.33440256615878106,
|
|
"step": 417
|
|
},
|
|
{
|
|
"loss": 0.0458,
|
|
"grad_norm": 0.3216298818588257,
|
|
"learning_rate": 0.00015903582162311337,
|
|
"epoch": 0.3352044907778669,
|
|
"step": 418
|
|
},
|
|
{
|
|
"loss": 0.0497,
|
|
"grad_norm": 0.2954160273075104,
|
|
"learning_rate": 0.00015882144941438233,
|
|
"epoch": 0.3360064153969527,
|
|
"step": 419
|
|
},
|
|
{
|
|
"loss": 0.0345,
|
|
"grad_norm": 0.30057498812675476,
|
|
"learning_rate": 0.00015860666308030932,
|
|
"epoch": 0.3368083400160385,
|
|
"step": 420
|
|
},
|
|
{
|
|
"loss": 0.046,
|
|
"grad_norm": 0.31479984521865845,
|
|
"learning_rate": 0.00015839146413307165,
|
|
"epoch": 0.3376102646351243,
|
|
"step": 421
|
|
},
|
|
{
|
|
"loss": 0.0455,
|
|
"grad_norm": 0.31836825609207153,
|
|
"learning_rate": 0.00015817585408775168,
|
|
"epoch": 0.3384121892542101,
|
|
"step": 422
|
|
},
|
|
{
|
|
"loss": 0.021,
|
|
"grad_norm": 0.24365834891796112,
|
|
"learning_rate": 0.000157959834462326,
|
|
"epoch": 0.3392141138732959,
|
|
"step": 423
|
|
},
|
|
{
|
|
"loss": 0.0354,
|
|
"grad_norm": 0.38124316930770874,
|
|
"learning_rate": 0.0001577434067776548,
|
|
"epoch": 0.3400160384923817,
|
|
"step": 424
|
|
},
|
|
{
|
|
"loss": 0.0853,
|
|
"grad_norm": 0.6972952485084534,
|
|
"learning_rate": 0.00015752657255747122,
|
|
"epoch": 0.3408179631114675,
|
|
"step": 425
|
|
},
|
|
{
|
|
"loss": 0.0189,
|
|
"grad_norm": 0.2013692706823349,
|
|
"learning_rate": 0.00015730933332837045,
|
|
"epoch": 0.34161988773055335,
|
|
"step": 426
|
|
},
|
|
{
|
|
"loss": 0.0561,
|
|
"grad_norm": 0.3334507346153259,
|
|
"learning_rate": 0.00015709169061979913,
|
|
"epoch": 0.34242181234963914,
|
|
"step": 427
|
|
},
|
|
{
|
|
"loss": 0.0652,
|
|
"grad_norm": 0.8858683109283447,
|
|
"learning_rate": 0.0001568736459640447,
|
|
"epoch": 0.34322373696872494,
|
|
"step": 428
|
|
},
|
|
{
|
|
"loss": 0.0267,
|
|
"grad_norm": 0.2540907561779022,
|
|
"learning_rate": 0.00015665520089622423,
|
|
"epoch": 0.3440256615878107,
|
|
"step": 429
|
|
},
|
|
{
|
|
"loss": 0.0475,
|
|
"grad_norm": 0.3518412709236145,
|
|
"learning_rate": 0.00015643635695427403,
|
|
"epoch": 0.3448275862068966,
|
|
"step": 430
|
|
},
|
|
{
|
|
"loss": 0.0311,
|
|
"grad_norm": 0.17859573662281036,
|
|
"learning_rate": 0.00015621711567893854,
|
|
"epoch": 0.34562951082598237,
|
|
"step": 431
|
|
},
|
|
{
|
|
"loss": 0.0202,
|
|
"grad_norm": 0.3969719409942627,
|
|
"learning_rate": 0.00015599747861375955,
|
|
"epoch": 0.34643143544506816,
|
|
"step": 432
|
|
},
|
|
{
|
|
"loss": 0.0249,
|
|
"grad_norm": 0.21450327336788177,
|
|
"learning_rate": 0.00015577744730506545,
|
|
"epoch": 0.34723336006415395,
|
|
"step": 433
|
|
},
|
|
{
|
|
"loss": 0.0416,
|
|
"grad_norm": 0.37466296553611755,
|
|
"learning_rate": 0.00015555702330196023,
|
|
"epoch": 0.3480352846832398,
|
|
"step": 434
|
|
},
|
|
{
|
|
"loss": 0.064,
|
|
"grad_norm": 0.5470214486122131,
|
|
"learning_rate": 0.00015533620815631256,
|
|
"epoch": 0.3488372093023256,
|
|
"step": 435
|
|
},
|
|
{
|
|
"loss": 0.0988,
|
|
"grad_norm": 0.6237538456916809,
|
|
"learning_rate": 0.0001551150034227449,
|
|
"epoch": 0.3496391339214114,
|
|
"step": 436
|
|
},
|
|
{
|
|
"loss": 0.1344,
|
|
"grad_norm": 0.5647206902503967,
|
|
"learning_rate": 0.0001548934106586226,
|
|
"epoch": 0.3504410585404972,
|
|
"step": 437
|
|
},
|
|
{
|
|
"loss": 0.0561,
|
|
"grad_norm": 0.326889306306839,
|
|
"learning_rate": 0.0001546714314240429,
|
|
"epoch": 0.351242983159583,
|
|
"step": 438
|
|
},
|
|
{
|
|
"loss": 0.0785,
|
|
"grad_norm": 0.4708334803581238,
|
|
"learning_rate": 0.00015444906728182385,
|
|
"epoch": 0.3520449077786688,
|
|
"step": 439
|
|
},
|
|
{
|
|
"loss": 0.0392,
|
|
"grad_norm": 0.4006723165512085,
|
|
"learning_rate": 0.00015422631979749354,
|
|
"epoch": 0.3528468323977546,
|
|
"step": 440
|
|
},
|
|
{
|
|
"loss": 0.0291,
|
|
"grad_norm": 0.25906902551651,
|
|
"learning_rate": 0.00015400319053927874,
|
|
"epoch": 0.3536487570168404,
|
|
"step": 441
|
|
},
|
|
{
|
|
"loss": 0.0242,
|
|
"grad_norm": 0.31759947538375854,
|
|
"learning_rate": 0.00015377968107809425,
|
|
"epoch": 0.35445068163592625,
|
|
"step": 442
|
|
},
|
|
{
|
|
"loss": 0.0368,
|
|
"grad_norm": 0.2436400055885315,
|
|
"learning_rate": 0.00015355579298753153,
|
|
"epoch": 0.35525260625501204,
|
|
"step": 443
|
|
},
|
|
{
|
|
"loss": 0.0704,
|
|
"grad_norm": 0.4932403564453125,
|
|
"learning_rate": 0.00015333152784384777,
|
|
"epoch": 0.35605453087409783,
|
|
"step": 444
|
|
},
|
|
{
|
|
"loss": 0.0529,
|
|
"grad_norm": 0.4474373757839203,
|
|
"learning_rate": 0.00015310688722595473,
|
|
"epoch": 0.3568564554931836,
|
|
"step": 445
|
|
},
|
|
{
|
|
"loss": 0.0773,
|
|
"grad_norm": 0.5451852679252625,
|
|
"learning_rate": 0.00015288187271540767,
|
|
"epoch": 0.35765838011226947,
|
|
"step": 446
|
|
},
|
|
{
|
|
"loss": 0.0567,
|
|
"grad_norm": 0.3486538231372833,
|
|
"learning_rate": 0.00015265648589639423,
|
|
"epoch": 0.35846030473135526,
|
|
"step": 447
|
|
},
|
|
{
|
|
"loss": 0.0445,
|
|
"grad_norm": 0.33438971638679504,
|
|
"learning_rate": 0.00015243072835572318,
|
|
"epoch": 0.35926222935044105,
|
|
"step": 448
|
|
},
|
|
{
|
|
"loss": 0.0656,
|
|
"grad_norm": 0.6021797060966492,
|
|
"learning_rate": 0.00015220460168281335,
|
|
"epoch": 0.36006415396952685,
|
|
"step": 449
|
|
},
|
|
{
|
|
"loss": 0.0539,
|
|
"grad_norm": 0.2629101276397705,
|
|
"learning_rate": 0.0001519781074696824,
|
|
"epoch": 0.3608660785886127,
|
|
"step": 450
|
|
},
|
|
{
|
|
"loss": 0.0559,
|
|
"grad_norm": 0.38639259338378906,
|
|
"learning_rate": 0.00015175124731093553,
|
|
"epoch": 0.3616680032076985,
|
|
"step": 451
|
|
},
|
|
{
|
|
"loss": 0.0632,
|
|
"grad_norm": 0.40031421184539795,
|
|
"learning_rate": 0.00015152402280375454,
|
|
"epoch": 0.3624699278267843,
|
|
"step": 452
|
|
},
|
|
{
|
|
"loss": 0.0196,
|
|
"grad_norm": 0.24561044573783875,
|
|
"learning_rate": 0.00015129643554788612,
|
|
"epoch": 0.36327185244587007,
|
|
"step": 453
|
|
},
|
|
{
|
|
"loss": 0.0563,
|
|
"grad_norm": 0.8373734354972839,
|
|
"learning_rate": 0.00015106848714563112,
|
|
"epoch": 0.3640737770649559,
|
|
"step": 454
|
|
},
|
|
{
|
|
"loss": 0.0388,
|
|
"grad_norm": 0.38167354464530945,
|
|
"learning_rate": 0.00015084017920183272,
|
|
"epoch": 0.3648757016840417,
|
|
"step": 455
|
|
},
|
|
{
|
|
"loss": 0.0506,
|
|
"grad_norm": 0.46959736943244934,
|
|
"learning_rate": 0.00015061151332386566,
|
|
"epoch": 0.3656776263031275,
|
|
"step": 456
|
|
},
|
|
{
|
|
"loss": 0.0314,
|
|
"grad_norm": 0.30401480197906494,
|
|
"learning_rate": 0.00015038249112162445,
|
|
"epoch": 0.3664795509222133,
|
|
"step": 457
|
|
},
|
|
{
|
|
"loss": 0.0466,
|
|
"grad_norm": 0.2866615355014801,
|
|
"learning_rate": 0.00015015311420751244,
|
|
"epoch": 0.36728147554129914,
|
|
"step": 458
|
|
},
|
|
{
|
|
"loss": 0.0963,
|
|
"grad_norm": 0.787212073802948,
|
|
"learning_rate": 0.00014992338419643022,
|
|
"epoch": 0.36808340016038493,
|
|
"step": 459
|
|
},
|
|
{
|
|
"loss": 0.0872,
|
|
"grad_norm": 0.49554625153541565,
|
|
"learning_rate": 0.00014969330270576427,
|
|
"epoch": 0.3688853247794707,
|
|
"step": 460
|
|
},
|
|
{
|
|
"loss": 0.0268,
|
|
"grad_norm": 0.2807726562023163,
|
|
"learning_rate": 0.0001494628713553757,
|
|
"epoch": 0.3696872493985565,
|
|
"step": 461
|
|
},
|
|
{
|
|
"loss": 0.0488,
|
|
"grad_norm": 0.28138288855552673,
|
|
"learning_rate": 0.0001492320917675887,
|
|
"epoch": 0.37048917401764236,
|
|
"step": 462
|
|
},
|
|
{
|
|
"loss": 0.0582,
|
|
"grad_norm": 0.3524348735809326,
|
|
"learning_rate": 0.0001490009655671792,
|
|
"epoch": 0.37129109863672816,
|
|
"step": 463
|
|
},
|
|
{
|
|
"loss": 0.0627,
|
|
"grad_norm": 0.38492485880851746,
|
|
"learning_rate": 0.00014876949438136347,
|
|
"epoch": 0.37209302325581395,
|
|
"step": 464
|
|
},
|
|
{
|
|
"loss": 0.0581,
|
|
"grad_norm": 0.31561005115509033,
|
|
"learning_rate": 0.0001485376798397865,
|
|
"epoch": 0.37289494787489974,
|
|
"step": 465
|
|
},
|
|
{
|
|
"loss": 0.0437,
|
|
"grad_norm": 0.30238181352615356,
|
|
"learning_rate": 0.00014830552357451076,
|
|
"epoch": 0.3736968724939856,
|
|
"step": 466
|
|
},
|
|
{
|
|
"loss": 0.0498,
|
|
"grad_norm": 0.3918459117412567,
|
|
"learning_rate": 0.00014807302722000447,
|
|
"epoch": 0.3744987971130714,
|
|
"step": 467
|
|
},
|
|
{
|
|
"loss": 0.0245,
|
|
"grad_norm": 0.20536094903945923,
|
|
"learning_rate": 0.00014784019241313026,
|
|
"epoch": 0.37530072173215717,
|
|
"step": 468
|
|
},
|
|
{
|
|
"loss": 0.0327,
|
|
"grad_norm": 0.2256690412759781,
|
|
"learning_rate": 0.0001476070207931336,
|
|
"epoch": 0.37610264635124296,
|
|
"step": 469
|
|
},
|
|
{
|
|
"loss": 0.0626,
|
|
"grad_norm": 0.42872869968414307,
|
|
"learning_rate": 0.00014737351400163128,
|
|
"epoch": 0.3769045709703288,
|
|
"step": 470
|
|
},
|
|
{
|
|
"loss": 0.0555,
|
|
"grad_norm": 0.3690952658653259,
|
|
"learning_rate": 0.0001471396736825998,
|
|
"epoch": 0.3777064955894146,
|
|
"step": 471
|
|
},
|
|
{
|
|
"loss": 0.0675,
|
|
"grad_norm": 0.4958707094192505,
|
|
"learning_rate": 0.0001469055014823637,
|
|
"epoch": 0.3785084202085004,
|
|
"step": 472
|
|
},
|
|
{
|
|
"loss": 0.0505,
|
|
"grad_norm": 0.319414883852005,
|
|
"learning_rate": 0.0001466709990495843,
|
|
"epoch": 0.3793103448275862,
|
|
"step": 473
|
|
},
|
|
{
|
|
"loss": 0.0455,
|
|
"grad_norm": 0.34806713461875916,
|
|
"learning_rate": 0.00014643616803524778,
|
|
"epoch": 0.38011226944667204,
|
|
"step": 474
|
|
},
|
|
{
|
|
"loss": 0.0284,
|
|
"grad_norm": 0.25858795642852783,
|
|
"learning_rate": 0.0001462010100926536,
|
|
"epoch": 0.3809141940657578,
|
|
"step": 475
|
|
},
|
|
{
|
|
"loss": 0.0587,
|
|
"grad_norm": 0.39808589220046997,
|
|
"learning_rate": 0.00014596552687740302,
|
|
"epoch": 0.3817161186848436,
|
|
"step": 476
|
|
},
|
|
{
|
|
"loss": 0.0476,
|
|
"grad_norm": 0.4907149374485016,
|
|
"learning_rate": 0.00014572972004738732,
|
|
"epoch": 0.3825180433039294,
|
|
"step": 477
|
|
},
|
|
{
|
|
"loss": 0.0506,
|
|
"grad_norm": 0.30614417791366577,
|
|
"learning_rate": 0.00014549359126277608,
|
|
"epoch": 0.38331996792301526,
|
|
"step": 478
|
|
},
|
|
{
|
|
"loss": 0.0259,
|
|
"grad_norm": 0.3281151354312897,
|
|
"learning_rate": 0.00014525714218600565,
|
|
"epoch": 0.38412189254210105,
|
|
"step": 479
|
|
},
|
|
{
|
|
"loss": 0.0376,
|
|
"grad_norm": 0.34824758768081665,
|
|
"learning_rate": 0.00014502037448176734,
|
|
"epoch": 0.38492381716118684,
|
|
"step": 480
|
|
},
|
|
{
|
|
"loss": 0.0425,
|
|
"grad_norm": 0.2705196440219879,
|
|
"learning_rate": 0.00014478328981699568,
|
|
"epoch": 0.38572574178027264,
|
|
"step": 481
|
|
},
|
|
{
|
|
"loss": 0.0466,
|
|
"grad_norm": 0.2696325480937958,
|
|
"learning_rate": 0.00014454588986085676,
|
|
"epoch": 0.3865276663993585,
|
|
"step": 482
|
|
},
|
|
{
|
|
"loss": 0.0372,
|
|
"grad_norm": 0.3687107264995575,
|
|
"learning_rate": 0.00014430817628473638,
|
|
"epoch": 0.3873295910184443,
|
|
"step": 483
|
|
},
|
|
{
|
|
"loss": 0.0554,
|
|
"grad_norm": 0.3724960684776306,
|
|
"learning_rate": 0.00014407015076222846,
|
|
"epoch": 0.38813151563753007,
|
|
"step": 484
|
|
},
|
|
{
|
|
"loss": 0.079,
|
|
"grad_norm": 0.5664525032043457,
|
|
"learning_rate": 0.000143831814969123,
|
|
"epoch": 0.38893344025661586,
|
|
"step": 485
|
|
},
|
|
{
|
|
"loss": 0.0318,
|
|
"grad_norm": 0.20477205514907837,
|
|
"learning_rate": 0.00014359317058339457,
|
|
"epoch": 0.3897353648757017,
|
|
"step": 486
|
|
},
|
|
{
|
|
"loss": 0.0456,
|
|
"grad_norm": 0.3792808949947357,
|
|
"learning_rate": 0.0001433542192851902,
|
|
"epoch": 0.3905372894947875,
|
|
"step": 487
|
|
},
|
|
{
|
|
"loss": 0.0253,
|
|
"grad_norm": 0.26179176568984985,
|
|
"learning_rate": 0.00014311496275681783,
|
|
"epoch": 0.3913392141138733,
|
|
"step": 488
|
|
},
|
|
{
|
|
"loss": 0.0398,
|
|
"grad_norm": 0.29624319076538086,
|
|
"learning_rate": 0.00014287540268273426,
|
|
"epoch": 0.3921411387329591,
|
|
"step": 489
|
|
},
|
|
{
|
|
"loss": 0.0425,
|
|
"grad_norm": 0.3284585773944855,
|
|
"learning_rate": 0.00014263554074953337,
|
|
"epoch": 0.39294306335204493,
|
|
"step": 490
|
|
},
|
|
{
|
|
"loss": 0.0277,
|
|
"grad_norm": 0.23194313049316406,
|
|
"learning_rate": 0.00014239537864593432,
|
|
"epoch": 0.3937449879711307,
|
|
"step": 491
|
|
},
|
|
{
|
|
"loss": 0.047,
|
|
"grad_norm": 0.557132363319397,
|
|
"learning_rate": 0.00014215491806276944,
|
|
"epoch": 0.3945469125902165,
|
|
"step": 492
|
|
},
|
|
{
|
|
"loss": 0.0495,
|
|
"grad_norm": 0.3186132311820984,
|
|
"learning_rate": 0.0001419141606929726,
|
|
"epoch": 0.3953488372093023,
|
|
"step": 493
|
|
},
|
|
{
|
|
"loss": 0.0476,
|
|
"grad_norm": 0.4139769375324249,
|
|
"learning_rate": 0.0001416731082315671,
|
|
"epoch": 0.39615076182838815,
|
|
"step": 494
|
|
},
|
|
{
|
|
"loss": 0.0707,
|
|
"grad_norm": 0.6908156275749207,
|
|
"learning_rate": 0.00014143176237565387,
|
|
"epoch": 0.39695268644747395,
|
|
"step": 495
|
|
},
|
|
{
|
|
"loss": 0.0328,
|
|
"grad_norm": 0.47614389657974243,
|
|
"learning_rate": 0.0001411901248243993,
|
|
"epoch": 0.39775461106655974,
|
|
"step": 496
|
|
},
|
|
{
|
|
"eval_loss": 0.04790589585900307,
|
|
"eval_runtime": 31.9045,
|
|
"eval_samples_per_second": 32.911,
|
|
"eval_steps_per_second": 8.243,
|
|
"epoch": 0.39775461106655974,
|
|
"step": 496
|
|
},
|
|
{
|
|
"loss": 0.0491,
|
|
"grad_norm": 0.4075859487056732,
|
|
"learning_rate": 0.00014094819727902353,
|
|
"epoch": 0.39855653568564553,
|
|
"step": 497
|
|
},
|
|
{
|
|
"loss": 0.0679,
|
|
"grad_norm": 0.2855551838874817,
|
|
"learning_rate": 0.0001407059814427884,
|
|
"epoch": 0.3993584603047314,
|
|
"step": 498
|
|
},
|
|
{
|
|
"loss": 0.0366,
|
|
"grad_norm": 0.7473935484886169,
|
|
"learning_rate": 0.00014046347902098535,
|
|
"epoch": 0.40016038492381717,
|
|
"step": 499
|
|
},
|
|
{
|
|
"loss": 0.0177,
|
|
"grad_norm": 0.16580775380134583,
|
|
"learning_rate": 0.00014022069172092352,
|
|
"epoch": 0.40096230954290296,
|
|
"step": 500
|
|
},
|
|
{
|
|
"loss": 0.04,
|
|
"grad_norm": 0.3346802592277527,
|
|
"learning_rate": 0.00013997762125191773,
|
|
"epoch": 0.40176423416198875,
|
|
"step": 501
|
|
},
|
|
{
|
|
"loss": 0.065,
|
|
"grad_norm": 0.5194714069366455,
|
|
"learning_rate": 0.00013973426932527636,
|
|
"epoch": 0.4025661587810746,
|
|
"step": 502
|
|
},
|
|
{
|
|
"loss": 0.0412,
|
|
"grad_norm": 0.25542134046554565,
|
|
"learning_rate": 0.00013949063765428943,
|
|
"epoch": 0.4033680834001604,
|
|
"step": 503
|
|
},
|
|
{
|
|
"loss": 0.0768,
|
|
"grad_norm": 0.46887674927711487,
|
|
"learning_rate": 0.00013924672795421637,
|
|
"epoch": 0.4041700080192462,
|
|
"step": 504
|
|
},
|
|
{
|
|
"loss": 0.0508,
|
|
"grad_norm": 0.3275587558746338,
|
|
"learning_rate": 0.00013900254194227415,
|
|
"epoch": 0.404971932638332,
|
|
"step": 505
|
|
},
|
|
{
|
|
"loss": 0.0253,
|
|
"grad_norm": 0.2020861655473709,
|
|
"learning_rate": 0.000138758081337625,
|
|
"epoch": 0.4057738572574178,
|
|
"step": 506
|
|
},
|
|
{
|
|
"loss": 0.0322,
|
|
"grad_norm": 0.5022090673446655,
|
|
"learning_rate": 0.0001385133478613644,
|
|
"epoch": 0.4065757818765036,
|
|
"step": 507
|
|
},
|
|
{
|
|
"loss": 0.121,
|
|
"grad_norm": 1.1316415071487427,
|
|
"learning_rate": 0.000138268343236509,
|
|
"epoch": 0.4073777064955894,
|
|
"step": 508
|
|
},
|
|
{
|
|
"loss": 0.0239,
|
|
"grad_norm": 0.29626041650772095,
|
|
"learning_rate": 0.00013802306918798437,
|
|
"epoch": 0.4081796311146752,
|
|
"step": 509
|
|
},
|
|
{
|
|
"loss": 0.0354,
|
|
"grad_norm": 0.19514746963977814,
|
|
"learning_rate": 0.00013777752744261295,
|
|
"epoch": 0.40898155573376105,
|
|
"step": 510
|
|
},
|
|
{
|
|
"loss": 0.0692,
|
|
"grad_norm": 0.4436163902282715,
|
|
"learning_rate": 0.0001375317197291019,
|
|
"epoch": 0.40978348035284684,
|
|
"step": 511
|
|
},
|
|
{
|
|
"loss": 0.0427,
|
|
"grad_norm": 0.36557817459106445,
|
|
"learning_rate": 0.00013728564777803088,
|
|
"epoch": 0.41058540497193263,
|
|
"step": 512
|
|
},
|
|
{
|
|
"loss": 0.0407,
|
|
"grad_norm": 0.3514234721660614,
|
|
"learning_rate": 0.00013703931332183987,
|
|
"epoch": 0.4113873295910184,
|
|
"step": 513
|
|
},
|
|
{
|
|
"loss": 0.0235,
|
|
"grad_norm": 0.24922512471675873,
|
|
"learning_rate": 0.00013679271809481693,
|
|
"epoch": 0.41218925421010427,
|
|
"step": 514
|
|
},
|
|
{
|
|
"loss": 0.0492,
|
|
"grad_norm": 0.4417109787464142,
|
|
"learning_rate": 0.00013654586383308619,
|
|
"epoch": 0.41299117882919006,
|
|
"step": 515
|
|
},
|
|
{
|
|
"loss": 0.0973,
|
|
"grad_norm": 0.5984606146812439,
|
|
"learning_rate": 0.00013629875227459532,
|
|
"epoch": 0.41379310344827586,
|
|
"step": 516
|
|
},
|
|
{
|
|
"loss": 0.0597,
|
|
"grad_norm": 0.5426322221755981,
|
|
"learning_rate": 0.0001360513851591036,
|
|
"epoch": 0.41459502806736165,
|
|
"step": 517
|
|
},
|
|
{
|
|
"loss": 0.081,
|
|
"grad_norm": 0.7733796238899231,
|
|
"learning_rate": 0.00013580376422816945,
|
|
"epoch": 0.4153969526864475,
|
|
"step": 518
|
|
},
|
|
{
|
|
"loss": 0.031,
|
|
"grad_norm": 0.33183905482292175,
|
|
"learning_rate": 0.00013555589122513827,
|
|
"epoch": 0.4161988773055333,
|
|
"step": 519
|
|
},
|
|
{
|
|
"loss": 0.0592,
|
|
"grad_norm": 0.4072870910167694,
|
|
"learning_rate": 0.0001353077678951301,
|
|
"epoch": 0.4170008019246191,
|
|
"step": 520
|
|
},
|
|
{
|
|
"loss": 0.0523,
|
|
"grad_norm": 0.3927518427371979,
|
|
"learning_rate": 0.0001350593959850274,
|
|
"epoch": 0.41780272654370487,
|
|
"step": 521
|
|
},
|
|
{
|
|
"loss": 0.0332,
|
|
"grad_norm": 0.3755587637424469,
|
|
"learning_rate": 0.00013481077724346278,
|
|
"epoch": 0.4186046511627907,
|
|
"step": 522
|
|
},
|
|
{
|
|
"loss": 0.1049,
|
|
"grad_norm": 0.5004737377166748,
|
|
"learning_rate": 0.0001345619134208066,
|
|
"epoch": 0.4194065757818765,
|
|
"step": 523
|
|
},
|
|
{
|
|
"loss": 0.0878,
|
|
"grad_norm": 0.3315165042877197,
|
|
"learning_rate": 0.00013431280626915467,
|
|
"epoch": 0.4202085004009623,
|
|
"step": 524
|
|
},
|
|
{
|
|
"loss": 0.0339,
|
|
"grad_norm": 0.27768945693969727,
|
|
"learning_rate": 0.00013406345754231588,
|
|
"epoch": 0.4210104250200481,
|
|
"step": 525
|
|
},
|
|
{
|
|
"loss": 0.0433,
|
|
"grad_norm": 0.3195447325706482,
|
|
"learning_rate": 0.00013381386899580003,
|
|
"epoch": 0.42181234963913394,
|
|
"step": 526
|
|
},
|
|
{
|
|
"loss": 0.028,
|
|
"grad_norm": 0.2721582055091858,
|
|
"learning_rate": 0.00013356404238680527,
|
|
"epoch": 0.42261427425821974,
|
|
"step": 527
|
|
},
|
|
{
|
|
"loss": 0.0324,
|
|
"grad_norm": 0.2353498488664627,
|
|
"learning_rate": 0.00013331397947420576,
|
|
"epoch": 0.4234161988773055,
|
|
"step": 528
|
|
},
|
|
{
|
|
"loss": 0.0572,
|
|
"grad_norm": 0.49510321021080017,
|
|
"learning_rate": 0.0001330636820185394,
|
|
"epoch": 0.4242181234963913,
|
|
"step": 529
|
|
},
|
|
{
|
|
"loss": 0.0586,
|
|
"grad_norm": 0.5035674571990967,
|
|
"learning_rate": 0.00013281315178199536,
|
|
"epoch": 0.42502004811547717,
|
|
"step": 530
|
|
},
|
|
{
|
|
"loss": 0.0337,
|
|
"grad_norm": 0.761020839214325,
|
|
"learning_rate": 0.00013256239052840155,
|
|
"epoch": 0.42582197273456296,
|
|
"step": 531
|
|
},
|
|
{
|
|
"loss": 0.0587,
|
|
"grad_norm": 0.2618282735347748,
|
|
"learning_rate": 0.00013231140002321253,
|
|
"epoch": 0.42662389735364875,
|
|
"step": 532
|
|
},
|
|
{
|
|
"loss": 0.0257,
|
|
"grad_norm": 0.2896956503391266,
|
|
"learning_rate": 0.0001320601820334967,
|
|
"epoch": 0.42742582197273454,
|
|
"step": 533
|
|
},
|
|
{
|
|
"loss": 0.0461,
|
|
"grad_norm": 0.48962509632110596,
|
|
"learning_rate": 0.00013180873832792416,
|
|
"epoch": 0.4282277465918204,
|
|
"step": 534
|
|
},
|
|
{
|
|
"loss": 0.0093,
|
|
"grad_norm": 0.13504081964492798,
|
|
"learning_rate": 0.00013155707067675406,
|
|
"epoch": 0.4290296712109062,
|
|
"step": 535
|
|
},
|
|
{
|
|
"loss": 0.0417,
|
|
"grad_norm": 0.3743266463279724,
|
|
"learning_rate": 0.00013130518085182225,
|
|
"epoch": 0.429831595829992,
|
|
"step": 536
|
|
},
|
|
{
|
|
"loss": 0.0343,
|
|
"grad_norm": 0.29630181193351746,
|
|
"learning_rate": 0.00013105307062652872,
|
|
"epoch": 0.43063352044907777,
|
|
"step": 537
|
|
},
|
|
{
|
|
"loss": 0.0291,
|
|
"grad_norm": 0.25488558411598206,
|
|
"learning_rate": 0.00013080074177582526,
|
|
"epoch": 0.4314354450681636,
|
|
"step": 538
|
|
},
|
|
{
|
|
"loss": 0.091,
|
|
"grad_norm": 0.4586013853549957,
|
|
"learning_rate": 0.00013054819607620274,
|
|
"epoch": 0.4322373696872494,
|
|
"step": 539
|
|
},
|
|
{
|
|
"loss": 0.1163,
|
|
"grad_norm": 0.7305994033813477,
|
|
"learning_rate": 0.00013029543530567884,
|
|
"epoch": 0.4330392943063352,
|
|
"step": 540
|
|
},
|
|
{
|
|
"loss": 0.0339,
|
|
"grad_norm": 0.234614759683609,
|
|
"learning_rate": 0.00013004246124378535,
|
|
"epoch": 0.433841218925421,
|
|
"step": 541
|
|
},
|
|
{
|
|
"loss": 0.0321,
|
|
"grad_norm": 0.2804659903049469,
|
|
"learning_rate": 0.00012978927567155573,
|
|
"epoch": 0.43464314354450684,
|
|
"step": 542
|
|
},
|
|
{
|
|
"loss": 0.0514,
|
|
"grad_norm": 0.5687031745910645,
|
|
"learning_rate": 0.0001295358803715126,
|
|
"epoch": 0.43544506816359263,
|
|
"step": 543
|
|
},
|
|
{
|
|
"loss": 0.0824,
|
|
"grad_norm": 0.583227276802063,
|
|
"learning_rate": 0.00012928227712765504,
|
|
"epoch": 0.4362469927826784,
|
|
"step": 544
|
|
},
|
|
{
|
|
"loss": 0.0453,
|
|
"grad_norm": 0.31921252608299255,
|
|
"learning_rate": 0.00012902846772544624,
|
|
"epoch": 0.4370489174017642,
|
|
"step": 545
|
|
},
|
|
{
|
|
"loss": 0.0495,
|
|
"grad_norm": 0.4188879430294037,
|
|
"learning_rate": 0.00012877445395180078,
|
|
"epoch": 0.43785084202085006,
|
|
"step": 546
|
|
},
|
|
{
|
|
"loss": 0.0393,
|
|
"grad_norm": 0.2866995334625244,
|
|
"learning_rate": 0.00012852023759507203,
|
|
"epoch": 0.43865276663993585,
|
|
"step": 547
|
|
},
|
|
{
|
|
"loss": 0.0772,
|
|
"grad_norm": 0.48335814476013184,
|
|
"learning_rate": 0.00012826582044503978,
|
|
"epoch": 0.43945469125902165,
|
|
"step": 548
|
|
},
|
|
{
|
|
"loss": 0.0537,
|
|
"grad_norm": 0.3400033414363861,
|
|
"learning_rate": 0.0001280112042928973,
|
|
"epoch": 0.44025661587810744,
|
|
"step": 549
|
|
},
|
|
{
|
|
"loss": 0.0503,
|
|
"grad_norm": 0.43847382068634033,
|
|
"learning_rate": 0.00012775639093123907,
|
|
"epoch": 0.4410585404971933,
|
|
"step": 550
|
|
},
|
|
{
|
|
"loss": 0.0659,
|
|
"grad_norm": 0.3055131137371063,
|
|
"learning_rate": 0.00012750138215404782,
|
|
"epoch": 0.4418604651162791,
|
|
"step": 551
|
|
},
|
|
{
|
|
"loss": 0.0532,
|
|
"grad_norm": 0.31449994444847107,
|
|
"learning_rate": 0.0001272461797566823,
|
|
"epoch": 0.44266238973536487,
|
|
"step": 552
|
|
},
|
|
{
|
|
"loss": 0.0273,
|
|
"grad_norm": 0.39831122756004333,
|
|
"learning_rate": 0.00012699078553586422,
|
|
"epoch": 0.44346431435445066,
|
|
"step": 553
|
|
},
|
|
{
|
|
"loss": 0.04,
|
|
"grad_norm": 0.464834600687027,
|
|
"learning_rate": 0.00012673520128966592,
|
|
"epoch": 0.4442662389735365,
|
|
"step": 554
|
|
},
|
|
{
|
|
"loss": 0.0679,
|
|
"grad_norm": 0.3944595158100128,
|
|
"learning_rate": 0.00012647942881749755,
|
|
"epoch": 0.4450681635926223,
|
|
"step": 555
|
|
},
|
|
{
|
|
"loss": 0.0271,
|
|
"grad_norm": 0.21679094433784485,
|
|
"learning_rate": 0.00012622346992009447,
|
|
"epoch": 0.4458700882117081,
|
|
"step": 556
|
|
},
|
|
{
|
|
"loss": 0.0349,
|
|
"grad_norm": 0.34640711545944214,
|
|
"learning_rate": 0.00012596732639950442,
|
|
"epoch": 0.4466720128307939,
|
|
"step": 557
|
|
},
|
|
{
|
|
"loss": 0.0445,
|
|
"grad_norm": 0.5096455216407776,
|
|
"learning_rate": 0.00012571100005907523,
|
|
"epoch": 0.44747393744987973,
|
|
"step": 558
|
|
},
|
|
{
|
|
"loss": 0.0544,
|
|
"grad_norm": 0.35034018754959106,
|
|
"learning_rate": 0.0001254544927034415,
|
|
"epoch": 0.4482758620689655,
|
|
"step": 559
|
|
},
|
|
{
|
|
"loss": 0.1161,
|
|
"grad_norm": 0.4701795279979706,
|
|
"learning_rate": 0.00012519780613851254,
|
|
"epoch": 0.4490777866880513,
|
|
"step": 560
|
|
},
|
|
{
|
|
"loss": 0.0259,
|
|
"grad_norm": 0.25175973773002625,
|
|
"learning_rate": 0.00012494094217145918,
|
|
"epoch": 0.4498797113071371,
|
|
"step": 561
|
|
},
|
|
{
|
|
"loss": 0.0431,
|
|
"grad_norm": 0.30269894003868103,
|
|
"learning_rate": 0.00012468390261070138,
|
|
"epoch": 0.45068163592622296,
|
|
"step": 562
|
|
},
|
|
{
|
|
"loss": 0.0234,
|
|
"grad_norm": 0.23327726125717163,
|
|
"learning_rate": 0.0001244266892658952,
|
|
"epoch": 0.45148356054530875,
|
|
"step": 563
|
|
},
|
|
{
|
|
"loss": 0.0433,
|
|
"grad_norm": 0.26909253001213074,
|
|
"learning_rate": 0.00012416930394792026,
|
|
"epoch": 0.45228548516439454,
|
|
"step": 564
|
|
},
|
|
{
|
|
"loss": 0.0676,
|
|
"grad_norm": 0.4461866319179535,
|
|
"learning_rate": 0.00012391174846886698,
|
|
"epoch": 0.45308740978348033,
|
|
"step": 565
|
|
},
|
|
{
|
|
"loss": 0.0461,
|
|
"grad_norm": 0.4100785553455353,
|
|
"learning_rate": 0.0001236540246420237,
|
|
"epoch": 0.4538893344025662,
|
|
"step": 566
|
|
},
|
|
{
|
|
"loss": 0.0338,
|
|
"grad_norm": 0.35902178287506104,
|
|
"learning_rate": 0.00012339613428186407,
|
|
"epoch": 0.454691259021652,
|
|
"step": 567
|
|
},
|
|
{
|
|
"loss": 0.0544,
|
|
"grad_norm": 0.43561217188835144,
|
|
"learning_rate": 0.00012313807920403419,
|
|
"epoch": 0.45549318364073776,
|
|
"step": 568
|
|
},
|
|
{
|
|
"loss": 0.0476,
|
|
"grad_norm": 0.34299418330192566,
|
|
"learning_rate": 0.0001228798612253397,
|
|
"epoch": 0.45629510825982356,
|
|
"step": 569
|
|
},
|
|
{
|
|
"loss": 0.1276,
|
|
"grad_norm": 0.5789246559143066,
|
|
"learning_rate": 0.00012262148216373331,
|
|
"epoch": 0.4570970328789094,
|
|
"step": 570
|
|
},
|
|
{
|
|
"loss": 0.0243,
|
|
"grad_norm": 0.42919760942459106,
|
|
"learning_rate": 0.00012236294383830175,
|
|
"epoch": 0.4578989574979952,
|
|
"step": 571
|
|
},
|
|
{
|
|
"loss": 0.0459,
|
|
"grad_norm": 0.24285271763801575,
|
|
"learning_rate": 0.00012210424806925301,
|
|
"epoch": 0.458700882117081,
|
|
"step": 572
|
|
},
|
|
{
|
|
"loss": 0.0573,
|
|
"grad_norm": 0.46728515625,
|
|
"learning_rate": 0.00012184539667790349,
|
|
"epoch": 0.4595028067361668,
|
|
"step": 573
|
|
},
|
|
{
|
|
"loss": 0.0543,
|
|
"grad_norm": 0.2979477643966675,
|
|
"learning_rate": 0.00012158639148666534,
|
|
"epoch": 0.4603047313552526,
|
|
"step": 574
|
|
},
|
|
{
|
|
"loss": 0.0613,
|
|
"grad_norm": 0.35671502351760864,
|
|
"learning_rate": 0.00012132723431903341,
|
|
"epoch": 0.4611066559743384,
|
|
"step": 575
|
|
},
|
|
{
|
|
"loss": 0.0328,
|
|
"grad_norm": 0.279118150472641,
|
|
"learning_rate": 0.00012106792699957263,
|
|
"epoch": 0.4619085805934242,
|
|
"step": 576
|
|
},
|
|
{
|
|
"loss": 0.0595,
|
|
"grad_norm": 0.6142110824584961,
|
|
"learning_rate": 0.000120808471353905,
|
|
"epoch": 0.46271050521251,
|
|
"step": 577
|
|
},
|
|
{
|
|
"loss": 0.0691,
|
|
"grad_norm": 0.7308236956596375,
|
|
"learning_rate": 0.00012054886920869681,
|
|
"epoch": 0.46351242983159585,
|
|
"step": 578
|
|
},
|
|
{
|
|
"loss": 0.0528,
|
|
"grad_norm": 0.45223355293273926,
|
|
"learning_rate": 0.00012028912239164569,
|
|
"epoch": 0.46431435445068164,
|
|
"step": 579
|
|
},
|
|
{
|
|
"loss": 0.0373,
|
|
"grad_norm": 0.2948494255542755,
|
|
"learning_rate": 0.00012002923273146794,
|
|
"epoch": 0.46511627906976744,
|
|
"step": 580
|
|
},
|
|
{
|
|
"loss": 0.0414,
|
|
"grad_norm": 0.27661287784576416,
|
|
"learning_rate": 0.00011976920205788542,
|
|
"epoch": 0.4659182036888532,
|
|
"step": 581
|
|
},
|
|
{
|
|
"loss": 0.0578,
|
|
"grad_norm": 0.4644034504890442,
|
|
"learning_rate": 0.00011950903220161285,
|
|
"epoch": 0.4667201283079391,
|
|
"step": 582
|
|
},
|
|
{
|
|
"loss": 0.0565,
|
|
"grad_norm": 0.6451210379600525,
|
|
"learning_rate": 0.00011924872499434479,
|
|
"epoch": 0.46752205292702487,
|
|
"step": 583
|
|
},
|
|
{
|
|
"loss": 0.0231,
|
|
"grad_norm": 0.21448062360286713,
|
|
"learning_rate": 0.00011898828226874284,
|
|
"epoch": 0.46832397754611066,
|
|
"step": 584
|
|
},
|
|
{
|
|
"loss": 0.0166,
|
|
"grad_norm": 0.15424512326717377,
|
|
"learning_rate": 0.00011872770585842273,
|
|
"epoch": 0.46912590216519645,
|
|
"step": 585
|
|
},
|
|
{
|
|
"loss": 0.0473,
|
|
"grad_norm": 0.31540054082870483,
|
|
"learning_rate": 0.0001184669975979413,
|
|
"epoch": 0.4699278267842823,
|
|
"step": 586
|
|
},
|
|
{
|
|
"loss": 0.0165,
|
|
"grad_norm": 0.13097421824932098,
|
|
"learning_rate": 0.00011820615932278374,
|
|
"epoch": 0.4707297514033681,
|
|
"step": 587
|
|
},
|
|
{
|
|
"loss": 0.0318,
|
|
"grad_norm": 0.308799684047699,
|
|
"learning_rate": 0.00011794519286935055,
|
|
"epoch": 0.4715316760224539,
|
|
"step": 588
|
|
},
|
|
{
|
|
"loss": 0.0471,
|
|
"grad_norm": 0.2947872579097748,
|
|
"learning_rate": 0.00011768410007494466,
|
|
"epoch": 0.4723336006415397,
|
|
"step": 589
|
|
},
|
|
{
|
|
"loss": 0.0516,
|
|
"grad_norm": 0.22661037743091583,
|
|
"learning_rate": 0.0001174228827777585,
|
|
"epoch": 0.4731355252606255,
|
|
"step": 590
|
|
},
|
|
{
|
|
"loss": 0.03,
|
|
"grad_norm": 0.24548248946666718,
|
|
"learning_rate": 0.00011716154281686105,
|
|
"epoch": 0.4739374498797113,
|
|
"step": 591
|
|
},
|
|
{
|
|
"loss": 0.0365,
|
|
"grad_norm": 0.2837478220462799,
|
|
"learning_rate": 0.00011690008203218493,
|
|
"epoch": 0.4747393744987971,
|
|
"step": 592
|
|
},
|
|
{
|
|
"loss": 0.0538,
|
|
"grad_norm": 0.3481287360191345,
|
|
"learning_rate": 0.00011663850226451327,
|
|
"epoch": 0.4755412991178829,
|
|
"step": 593
|
|
},
|
|
{
|
|
"loss": 0.048,
|
|
"grad_norm": 0.4488002061843872,
|
|
"learning_rate": 0.000116376805355467,
|
|
"epoch": 0.47634322373696875,
|
|
"step": 594
|
|
},
|
|
{
|
|
"loss": 0.015,
|
|
"grad_norm": 0.16303379833698273,
|
|
"learning_rate": 0.00011611499314749177,
|
|
"epoch": 0.47714514835605454,
|
|
"step": 595
|
|
},
|
|
{
|
|
"loss": 0.0246,
|
|
"grad_norm": 0.22950126230716705,
|
|
"learning_rate": 0.0001158530674838449,
|
|
"epoch": 0.47794707297514033,
|
|
"step": 596
|
|
},
|
|
{
|
|
"loss": 0.0116,
|
|
"grad_norm": 0.1625395268201828,
|
|
"learning_rate": 0.0001155910302085826,
|
|
"epoch": 0.4787489975942261,
|
|
"step": 597
|
|
},
|
|
{
|
|
"loss": 0.0301,
|
|
"grad_norm": 0.23239369690418243,
|
|
"learning_rate": 0.00011532888316654675,
|
|
"epoch": 0.47955092221331197,
|
|
"step": 598
|
|
},
|
|
{
|
|
"loss": 0.1168,
|
|
"grad_norm": 0.7024423480033875,
|
|
"learning_rate": 0.00011506662820335208,
|
|
"epoch": 0.48035284683239776,
|
|
"step": 599
|
|
},
|
|
{
|
|
"loss": 0.0615,
|
|
"grad_norm": 0.31283116340637207,
|
|
"learning_rate": 0.00011480426716537315,
|
|
"epoch": 0.48115477145148355,
|
|
"step": 600
|
|
},
|
|
{
|
|
"loss": 0.0387,
|
|
"grad_norm": 0.22865501046180725,
|
|
"learning_rate": 0.0001145418018997313,
|
|
"epoch": 0.48195669607056935,
|
|
"step": 601
|
|
},
|
|
{
|
|
"loss": 0.0189,
|
|
"grad_norm": 0.2138299196958542,
|
|
"learning_rate": 0.00011427923425428164,
|
|
"epoch": 0.4827586206896552,
|
|
"step": 602
|
|
},
|
|
{
|
|
"loss": 0.0703,
|
|
"grad_norm": 0.3493439257144928,
|
|
"learning_rate": 0.00011401656607760015,
|
|
"epoch": 0.483560545308741,
|
|
"step": 603
|
|
},
|
|
{
|
|
"loss": 0.0178,
|
|
"grad_norm": 0.2075956165790558,
|
|
"learning_rate": 0.00011375379921897051,
|
|
"epoch": 0.4843624699278268,
|
|
"step": 604
|
|
},
|
|
{
|
|
"loss": 0.0456,
|
|
"grad_norm": 0.4019928276538849,
|
|
"learning_rate": 0.0001134909355283712,
|
|
"epoch": 0.48516439454691257,
|
|
"step": 605
|
|
},
|
|
{
|
|
"loss": 0.0332,
|
|
"grad_norm": 0.3662348687648773,
|
|
"learning_rate": 0.00011322797685646242,
|
|
"epoch": 0.4859663191659984,
|
|
"step": 606
|
|
},
|
|
{
|
|
"loss": 0.0289,
|
|
"grad_norm": 0.26660025119781494,
|
|
"learning_rate": 0.00011296492505457314,
|
|
"epoch": 0.4867682437850842,
|
|
"step": 607
|
|
},
|
|
{
|
|
"loss": 0.0251,
|
|
"grad_norm": 0.1749676614999771,
|
|
"learning_rate": 0.00011270178197468789,
|
|
"epoch": 0.48757016840417,
|
|
"step": 608
|
|
},
|
|
{
|
|
"loss": 0.0338,
|
|
"grad_norm": 0.2791067957878113,
|
|
"learning_rate": 0.00011243854946943388,
|
|
"epoch": 0.4883720930232558,
|
|
"step": 609
|
|
},
|
|
{
|
|
"loss": 0.0395,
|
|
"grad_norm": 0.21187956631183624,
|
|
"learning_rate": 0.00011217522939206795,
|
|
"epoch": 0.48917401764234164,
|
|
"step": 610
|
|
},
|
|
{
|
|
"loss": 0.0596,
|
|
"grad_norm": 0.4193437695503235,
|
|
"learning_rate": 0.00011191182359646337,
|
|
"epoch": 0.48997594226142743,
|
|
"step": 611
|
|
},
|
|
{
|
|
"loss": 0.0437,
|
|
"grad_norm": 0.42110878229141235,
|
|
"learning_rate": 0.00011164833393709706,
|
|
"epoch": 0.4907778668805132,
|
|
"step": 612
|
|
},
|
|
{
|
|
"loss": 0.0364,
|
|
"grad_norm": 0.3795287013053894,
|
|
"learning_rate": 0.00011138476226903625,
|
|
"epoch": 0.491579791499599,
|
|
"step": 613
|
|
},
|
|
{
|
|
"loss": 0.0537,
|
|
"grad_norm": 0.307650089263916,
|
|
"learning_rate": 0.00011112111044792557,
|
|
"epoch": 0.49238171611868486,
|
|
"step": 614
|
|
},
|
|
{
|
|
"loss": 0.044,
|
|
"grad_norm": 0.33749890327453613,
|
|
"learning_rate": 0.00011085738032997398,
|
|
"epoch": 0.49318364073777066,
|
|
"step": 615
|
|
},
|
|
{
|
|
"loss": 0.0479,
|
|
"grad_norm": 0.3227038085460663,
|
|
"learning_rate": 0.00011059357377194161,
|
|
"epoch": 0.49398556535685645,
|
|
"step": 616
|
|
},
|
|
{
|
|
"loss": 0.0452,
|
|
"grad_norm": 0.3482477068901062,
|
|
"learning_rate": 0.00011032969263112688,
|
|
"epoch": 0.49478748997594224,
|
|
"step": 617
|
|
},
|
|
{
|
|
"loss": 0.0315,
|
|
"grad_norm": 0.27159547805786133,
|
|
"learning_rate": 0.00011006573876535322,
|
|
"epoch": 0.4955894145950281,
|
|
"step": 618
|
|
},
|
|
{
|
|
"loss": 0.0637,
|
|
"grad_norm": 0.40270885825157166,
|
|
"learning_rate": 0.0001098017140329561,
|
|
"epoch": 0.4963913392141139,
|
|
"step": 619
|
|
},
|
|
{
|
|
"loss": 0.022,
|
|
"grad_norm": 0.21836791932582855,
|
|
"learning_rate": 0.00010953762029276982,
|
|
"epoch": 0.4971932638331997,
|
|
"step": 620
|
|
},
|
|
{
|
|
"eval_loss": 0.04481621831655502,
|
|
"eval_runtime": 32.1222,
|
|
"eval_samples_per_second": 32.688,
|
|
"eval_steps_per_second": 8.187,
|
|
"epoch": 0.4971932638331997,
|
|
"step": 620
|
|
},
|
|
{
|
|
"loss": 0.0203,
|
|
"grad_norm": 0.1830679029226303,
|
|
"learning_rate": 0.00010927345940411467,
|
|
"epoch": 0.49799518845228546,
|
|
"step": 621
|
|
},
|
|
{
|
|
"loss": 0.0607,
|
|
"grad_norm": 0.4090077579021454,
|
|
"learning_rate": 0.00010900923322678364,
|
|
"epoch": 0.4987971130713713,
|
|
"step": 622
|
|
},
|
|
{
|
|
"loss": 0.0342,
|
|
"grad_norm": 0.28506171703338623,
|
|
"learning_rate": 0.00010874494362102931,
|
|
"epoch": 0.4995990376904571,
|
|
"step": 623
|
|
},
|
|
{
|
|
"loss": 0.0318,
|
|
"grad_norm": 0.31976205110549927,
|
|
"learning_rate": 0.00010848059244755093,
|
|
"epoch": 0.5004009623095429,
|
|
"step": 624
|
|
},
|
|
{
|
|
"loss": 0.0556,
|
|
"grad_norm": 0.2998436391353607,
|
|
"learning_rate": 0.0001082161815674811,
|
|
"epoch": 0.5012028869286287,
|
|
"step": 625
|
|
},
|
|
{
|
|
"loss": 0.021,
|
|
"grad_norm": 0.22129428386688232,
|
|
"learning_rate": 0.00010795171284237284,
|
|
"epoch": 0.5020048115477145,
|
|
"step": 626
|
|
},
|
|
{
|
|
"loss": 0.0266,
|
|
"grad_norm": 0.2941289246082306,
|
|
"learning_rate": 0.00010768718813418644,
|
|
"epoch": 0.5028067361668003,
|
|
"step": 627
|
|
},
|
|
{
|
|
"loss": 0.0568,
|
|
"grad_norm": 0.3848710358142853,
|
|
"learning_rate": 0.00010742260930527625,
|
|
"epoch": 0.5036086607858862,
|
|
"step": 628
|
|
},
|
|
{
|
|
"loss": 0.0388,
|
|
"grad_norm": 0.33324113488197327,
|
|
"learning_rate": 0.00010715797821837776,
|
|
"epoch": 0.504410585404972,
|
|
"step": 629
|
|
},
|
|
{
|
|
"loss": 0.0357,
|
|
"grad_norm": 0.350759893655777,
|
|
"learning_rate": 0.00010689329673659429,
|
|
"epoch": 0.5052125100240578,
|
|
"step": 630
|
|
},
|
|
{
|
|
"loss": 0.0111,
|
|
"grad_norm": 0.15862928330898285,
|
|
"learning_rate": 0.00010662856672338397,
|
|
"epoch": 0.5060144346431436,
|
|
"step": 631
|
|
},
|
|
{
|
|
"loss": 0.0382,
|
|
"grad_norm": 0.26137423515319824,
|
|
"learning_rate": 0.00010636379004254664,
|
|
"epoch": 0.5068163592622293,
|
|
"step": 632
|
|
},
|
|
{
|
|
"loss": 0.0244,
|
|
"grad_norm": 0.28696557879447937,
|
|
"learning_rate": 0.00010609896855821068,
|
|
"epoch": 0.5076182838813151,
|
|
"step": 633
|
|
},
|
|
{
|
|
"loss": 0.0497,
|
|
"grad_norm": 0.3603985905647278,
|
|
"learning_rate": 0.00010583410413481994,
|
|
"epoch": 0.5084202085004009,
|
|
"step": 634
|
|
},
|
|
{
|
|
"loss": 0.0837,
|
|
"grad_norm": 0.653423011302948,
|
|
"learning_rate": 0.00010556919863712054,
|
|
"epoch": 0.5092221331194867,
|
|
"step": 635
|
|
},
|
|
{
|
|
"loss": 0.0243,
|
|
"grad_norm": 0.23948614299297333,
|
|
"learning_rate": 0.00010530425393014774,
|
|
"epoch": 0.5100240577385726,
|
|
"step": 636
|
|
},
|
|
{
|
|
"loss": 0.0271,
|
|
"grad_norm": 0.22972430288791656,
|
|
"learning_rate": 0.00010503927187921292,
|
|
"epoch": 0.5108259823576584,
|
|
"step": 637
|
|
},
|
|
{
|
|
"loss": 0.047,
|
|
"grad_norm": 0.4855923354625702,
|
|
"learning_rate": 0.00010477425434989036,
|
|
"epoch": 0.5116279069767442,
|
|
"step": 638
|
|
},
|
|
{
|
|
"loss": 0.0319,
|
|
"grad_norm": 0.3573042154312134,
|
|
"learning_rate": 0.0001045092032080041,
|
|
"epoch": 0.51242983159583,
|
|
"step": 639
|
|
},
|
|
{
|
|
"loss": 0.0679,
|
|
"grad_norm": 0.4812779426574707,
|
|
"learning_rate": 0.00010424412031961484,
|
|
"epoch": 0.5132317562149158,
|
|
"step": 640
|
|
},
|
|
{
|
|
"loss": 0.016,
|
|
"grad_norm": 0.21666432917118073,
|
|
"learning_rate": 0.00010397900755100678,
|
|
"epoch": 0.5140336808340016,
|
|
"step": 641
|
|
},
|
|
{
|
|
"loss": 0.018,
|
|
"grad_norm": 0.19402359426021576,
|
|
"learning_rate": 0.00010371386676867447,
|
|
"epoch": 0.5148356054530874,
|
|
"step": 642
|
|
},
|
|
{
|
|
"loss": 0.0917,
|
|
"grad_norm": 0.5789539217948914,
|
|
"learning_rate": 0.00010344869983930974,
|
|
"epoch": 0.5156375300721732,
|
|
"step": 643
|
|
},
|
|
{
|
|
"loss": 0.02,
|
|
"grad_norm": 0.19617126882076263,
|
|
"learning_rate": 0.00010318350862978848,
|
|
"epoch": 0.5164394546912591,
|
|
"step": 644
|
|
},
|
|
{
|
|
"loss": 0.0407,
|
|
"grad_norm": 0.33302173018455505,
|
|
"learning_rate": 0.00010291829500715744,
|
|
"epoch": 0.5172413793103449,
|
|
"step": 645
|
|
},
|
|
{
|
|
"loss": 0.0685,
|
|
"grad_norm": 0.4327728748321533,
|
|
"learning_rate": 0.00010265306083862134,
|
|
"epoch": 0.5180433039294307,
|
|
"step": 646
|
|
},
|
|
{
|
|
"loss": 0.029,
|
|
"grad_norm": 0.3352719843387604,
|
|
"learning_rate": 0.00010238780799152938,
|
|
"epoch": 0.5188452285485164,
|
|
"step": 647
|
|
},
|
|
{
|
|
"loss": 0.0195,
|
|
"grad_norm": 0.20400014519691467,
|
|
"learning_rate": 0.00010212253833336237,
|
|
"epoch": 0.5196471531676022,
|
|
"step": 648
|
|
},
|
|
{
|
|
"loss": 0.052,
|
|
"grad_norm": 0.5209816098213196,
|
|
"learning_rate": 0.00010185725373171942,
|
|
"epoch": 0.520449077786688,
|
|
"step": 649
|
|
},
|
|
{
|
|
"loss": 0.0123,
|
|
"grad_norm": 0.2923823595046997,
|
|
"learning_rate": 0.0001015919560543049,
|
|
"epoch": 0.5212510024057738,
|
|
"step": 650
|
|
},
|
|
{
|
|
"loss": 0.0412,
|
|
"grad_norm": 0.3393188714981079,
|
|
"learning_rate": 0.0001013266471689152,
|
|
"epoch": 0.5220529270248596,
|
|
"step": 651
|
|
},
|
|
{
|
|
"loss": 0.0188,
|
|
"grad_norm": 0.24097828567028046,
|
|
"learning_rate": 0.00010106132894342564,
|
|
"epoch": 0.5228548516439455,
|
|
"step": 652
|
|
},
|
|
{
|
|
"loss": 0.0686,
|
|
"grad_norm": 0.44344210624694824,
|
|
"learning_rate": 0.00010079600324577722,
|
|
"epoch": 0.5236567762630313,
|
|
"step": 653
|
|
},
|
|
{
|
|
"loss": 0.0143,
|
|
"grad_norm": 0.2262842059135437,
|
|
"learning_rate": 0.0001005306719439637,
|
|
"epoch": 0.5244587008821171,
|
|
"step": 654
|
|
},
|
|
{
|
|
"loss": 0.0288,
|
|
"grad_norm": 0.2735036611557007,
|
|
"learning_rate": 0.00010026533690601814,
|
|
"epoch": 0.5252606255012029,
|
|
"step": 655
|
|
},
|
|
{
|
|
"loss": 0.0554,
|
|
"grad_norm": 0.5491762757301331,
|
|
"learning_rate": 0.0001,
|
|
"epoch": 0.5260625501202887,
|
|
"step": 656
|
|
},
|
|
{
|
|
"loss": 0.052,
|
|
"grad_norm": 0.3667290508747101,
|
|
"learning_rate": 9.973466309398187e-05,
|
|
"epoch": 0.5268644747393745,
|
|
"step": 657
|
|
},
|
|
{
|
|
"loss": 0.029,
|
|
"grad_norm": 0.24463889002799988,
|
|
"learning_rate": 9.946932805603635e-05,
|
|
"epoch": 0.5276663993584603,
|
|
"step": 658
|
|
},
|
|
{
|
|
"loss": 0.0305,
|
|
"grad_norm": 0.34307271242141724,
|
|
"learning_rate": 9.92039967542228e-05,
|
|
"epoch": 0.5284683239775461,
|
|
"step": 659
|
|
},
|
|
{
|
|
"loss": 0.0543,
|
|
"grad_norm": 0.32049161195755005,
|
|
"learning_rate": 9.89386710565744e-05,
|
|
"epoch": 0.529270248596632,
|
|
"step": 660
|
|
},
|
|
{
|
|
"loss": 0.0745,
|
|
"grad_norm": 0.5253795981407166,
|
|
"learning_rate": 9.867335283108479e-05,
|
|
"epoch": 0.5300721732157178,
|
|
"step": 661
|
|
},
|
|
{
|
|
"loss": 0.0205,
|
|
"grad_norm": 0.22634099423885345,
|
|
"learning_rate": 9.840804394569513e-05,
|
|
"epoch": 0.5308740978348035,
|
|
"step": 662
|
|
},
|
|
{
|
|
"loss": 0.0478,
|
|
"grad_norm": 0.3835356831550598,
|
|
"learning_rate": 9.81427462682806e-05,
|
|
"epoch": 0.5316760224538893,
|
|
"step": 663
|
|
},
|
|
{
|
|
"loss": 0.0198,
|
|
"grad_norm": 0.25156858563423157,
|
|
"learning_rate": 9.787746166663764e-05,
|
|
"epoch": 0.5324779470729751,
|
|
"step": 664
|
|
},
|
|
{
|
|
"loss": 0.066,
|
|
"grad_norm": 0.577354907989502,
|
|
"learning_rate": 9.761219200847065e-05,
|
|
"epoch": 0.5332798716920609,
|
|
"step": 665
|
|
},
|
|
{
|
|
"loss": 0.0769,
|
|
"grad_norm": 0.5115137696266174,
|
|
"learning_rate": 9.73469391613787e-05,
|
|
"epoch": 0.5340817963111467,
|
|
"step": 666
|
|
},
|
|
{
|
|
"loss": 0.0391,
|
|
"grad_norm": 0.3202758729457855,
|
|
"learning_rate": 9.708170499284256e-05,
|
|
"epoch": 0.5348837209302325,
|
|
"step": 667
|
|
},
|
|
{
|
|
"loss": 0.0445,
|
|
"grad_norm": 0.422722727060318,
|
|
"learning_rate": 9.681649137021158e-05,
|
|
"epoch": 0.5356856455493184,
|
|
"step": 668
|
|
},
|
|
{
|
|
"loss": 0.0446,
|
|
"grad_norm": 0.32844579219818115,
|
|
"learning_rate": 9.655130016069028e-05,
|
|
"epoch": 0.5364875701684042,
|
|
"step": 669
|
|
},
|
|
{
|
|
"loss": 0.045,
|
|
"grad_norm": 0.3552158772945404,
|
|
"learning_rate": 9.628613323132554e-05,
|
|
"epoch": 0.53728949478749,
|
|
"step": 670
|
|
},
|
|
{
|
|
"loss": 0.0516,
|
|
"grad_norm": 0.37886497378349304,
|
|
"learning_rate": 9.602099244899323e-05,
|
|
"epoch": 0.5380914194065758,
|
|
"step": 671
|
|
},
|
|
{
|
|
"loss": 0.0224,
|
|
"grad_norm": 0.25544053316116333,
|
|
"learning_rate": 9.57558796803852e-05,
|
|
"epoch": 0.5388933440256616,
|
|
"step": 672
|
|
},
|
|
{
|
|
"loss": 0.0433,
|
|
"grad_norm": 0.2606353163719177,
|
|
"learning_rate": 9.549079679199592e-05,
|
|
"epoch": 0.5396952686447474,
|
|
"step": 673
|
|
},
|
|
{
|
|
"loss": 0.053,
|
|
"grad_norm": 0.3851439654827118,
|
|
"learning_rate": 9.522574565010965e-05,
|
|
"epoch": 0.5404971932638332,
|
|
"step": 674
|
|
},
|
|
{
|
|
"loss": 0.0314,
|
|
"grad_norm": 0.26221150159835815,
|
|
"learning_rate": 9.496072812078712e-05,
|
|
"epoch": 0.541299117882919,
|
|
"step": 675
|
|
},
|
|
{
|
|
"loss": 0.0569,
|
|
"grad_norm": 0.5227025747299194,
|
|
"learning_rate": 9.46957460698523e-05,
|
|
"epoch": 0.5421010425020049,
|
|
"step": 676
|
|
},
|
|
{
|
|
"loss": 0.0181,
|
|
"grad_norm": 0.20920135080814362,
|
|
"learning_rate": 9.44308013628795e-05,
|
|
"epoch": 0.5429029671210907,
|
|
"step": 677
|
|
},
|
|
{
|
|
"loss": 0.0326,
|
|
"grad_norm": 0.2929348647594452,
|
|
"learning_rate": 9.416589586518008e-05,
|
|
"epoch": 0.5437048917401764,
|
|
"step": 678
|
|
},
|
|
{
|
|
"loss": 0.0346,
|
|
"grad_norm": 0.38871344923973083,
|
|
"learning_rate": 9.390103144178932e-05,
|
|
"epoch": 0.5445068163592622,
|
|
"step": 679
|
|
},
|
|
{
|
|
"loss": 0.0603,
|
|
"grad_norm": 0.392945259809494,
|
|
"learning_rate": 9.363620995745337e-05,
|
|
"epoch": 0.545308740978348,
|
|
"step": 680
|
|
},
|
|
{
|
|
"loss": 0.0579,
|
|
"grad_norm": 0.6106362342834473,
|
|
"learning_rate": 9.337143327661604e-05,
|
|
"epoch": 0.5461106655974338,
|
|
"step": 681
|
|
},
|
|
{
|
|
"loss": 0.1305,
|
|
"grad_norm": 0.6625472903251648,
|
|
"learning_rate": 9.310670326340576e-05,
|
|
"epoch": 0.5469125902165196,
|
|
"step": 682
|
|
},
|
|
{
|
|
"loss": 0.0954,
|
|
"grad_norm": 0.5873953104019165,
|
|
"learning_rate": 9.284202178162226e-05,
|
|
"epoch": 0.5477145148356054,
|
|
"step": 683
|
|
},
|
|
{
|
|
"loss": 0.0214,
|
|
"grad_norm": 0.2383047342300415,
|
|
"learning_rate": 9.257739069472374e-05,
|
|
"epoch": 0.5485164394546913,
|
|
"step": 684
|
|
},
|
|
{
|
|
"loss": 0.0395,
|
|
"grad_norm": 0.46583423018455505,
|
|
"learning_rate": 9.23128118658136e-05,
|
|
"epoch": 0.5493183640737771,
|
|
"step": 685
|
|
},
|
|
{
|
|
"loss": 0.0847,
|
|
"grad_norm": 0.42172953486442566,
|
|
"learning_rate": 9.204828715762718e-05,
|
|
"epoch": 0.5501202886928629,
|
|
"step": 686
|
|
},
|
|
{
|
|
"loss": 0.0182,
|
|
"grad_norm": 0.17326125502586365,
|
|
"learning_rate": 9.178381843251891e-05,
|
|
"epoch": 0.5509222133119487,
|
|
"step": 687
|
|
},
|
|
{
|
|
"loss": 0.0706,
|
|
"grad_norm": 0.4465944468975067,
|
|
"learning_rate": 9.151940755244912e-05,
|
|
"epoch": 0.5517241379310345,
|
|
"step": 688
|
|
},
|
|
{
|
|
"loss": 0.0361,
|
|
"grad_norm": 0.3605600893497467,
|
|
"learning_rate": 9.12550563789707e-05,
|
|
"epoch": 0.5525260625501203,
|
|
"step": 689
|
|
},
|
|
{
|
|
"loss": 0.0637,
|
|
"grad_norm": 0.5488521456718445,
|
|
"learning_rate": 9.099076677321638e-05,
|
|
"epoch": 0.5533279871692061,
|
|
"step": 690
|
|
},
|
|
{
|
|
"loss": 0.0413,
|
|
"grad_norm": 0.3144517242908478,
|
|
"learning_rate": 9.072654059588533e-05,
|
|
"epoch": 0.5541299117882919,
|
|
"step": 691
|
|
},
|
|
{
|
|
"loss": 0.0469,
|
|
"grad_norm": 0.356842041015625,
|
|
"learning_rate": 9.04623797072302e-05,
|
|
"epoch": 0.5549318364073778,
|
|
"step": 692
|
|
},
|
|
{
|
|
"loss": 0.0976,
|
|
"grad_norm": 0.5099210143089294,
|
|
"learning_rate": 9.019828596704394e-05,
|
|
"epoch": 0.5557337610264635,
|
|
"step": 693
|
|
},
|
|
{
|
|
"loss": 0.0491,
|
|
"grad_norm": 0.5059170126914978,
|
|
"learning_rate": 8.99342612346468e-05,
|
|
"epoch": 0.5565356856455493,
|
|
"step": 694
|
|
},
|
|
{
|
|
"loss": 0.0378,
|
|
"grad_norm": 0.29008913040161133,
|
|
"learning_rate": 8.967030736887314e-05,
|
|
"epoch": 0.5573376102646351,
|
|
"step": 695
|
|
},
|
|
{
|
|
"loss": 0.0998,
|
|
"grad_norm": 0.6845918297767639,
|
|
"learning_rate": 8.94064262280584e-05,
|
|
"epoch": 0.5581395348837209,
|
|
"step": 696
|
|
},
|
|
{
|
|
"loss": 0.0582,
|
|
"grad_norm": 0.3744989335536957,
|
|
"learning_rate": 8.914261967002605e-05,
|
|
"epoch": 0.5589414595028067,
|
|
"step": 697
|
|
},
|
|
{
|
|
"loss": 0.0581,
|
|
"grad_norm": 0.467715859413147,
|
|
"learning_rate": 8.887888955207444e-05,
|
|
"epoch": 0.5597433841218925,
|
|
"step": 698
|
|
},
|
|
{
|
|
"loss": 0.0444,
|
|
"grad_norm": 0.3465082347393036,
|
|
"learning_rate": 8.861523773096378e-05,
|
|
"epoch": 0.5605453087409783,
|
|
"step": 699
|
|
},
|
|
{
|
|
"loss": 0.0616,
|
|
"grad_norm": 0.4096762537956238,
|
|
"learning_rate": 8.835166606290295e-05,
|
|
"epoch": 0.5613472333600642,
|
|
"step": 700
|
|
},
|
|
{
|
|
"loss": 0.0286,
|
|
"grad_norm": 0.3438918888568878,
|
|
"learning_rate": 8.808817640353661e-05,
|
|
"epoch": 0.56214915797915,
|
|
"step": 701
|
|
},
|
|
{
|
|
"loss": 0.0796,
|
|
"grad_norm": 0.503362774848938,
|
|
"learning_rate": 8.782477060793211e-05,
|
|
"epoch": 0.5629510825982358,
|
|
"step": 702
|
|
},
|
|
{
|
|
"loss": 0.0403,
|
|
"grad_norm": 0.36747029423713684,
|
|
"learning_rate": 8.756145053056615e-05,
|
|
"epoch": 0.5637530072173216,
|
|
"step": 703
|
|
},
|
|
{
|
|
"loss": 0.0339,
|
|
"grad_norm": 0.2829087972640991,
|
|
"learning_rate": 8.729821802531212e-05,
|
|
"epoch": 0.5645549318364074,
|
|
"step": 704
|
|
},
|
|
{
|
|
"loss": 0.0333,
|
|
"grad_norm": 0.3334031105041504,
|
|
"learning_rate": 8.703507494542691e-05,
|
|
"epoch": 0.5653568564554932,
|
|
"step": 705
|
|
},
|
|
{
|
|
"loss": 0.0437,
|
|
"grad_norm": 0.38484475016593933,
|
|
"learning_rate": 8.67720231435376e-05,
|
|
"epoch": 0.566158781074579,
|
|
"step": 706
|
|
},
|
|
{
|
|
"loss": 0.0485,
|
|
"grad_norm": 0.3287144601345062,
|
|
"learning_rate": 8.650906447162884e-05,
|
|
"epoch": 0.5669607056936647,
|
|
"step": 707
|
|
},
|
|
{
|
|
"loss": 0.0255,
|
|
"grad_norm": 0.22345122694969177,
|
|
"learning_rate": 8.624620078102951e-05,
|
|
"epoch": 0.5677626303127506,
|
|
"step": 708
|
|
},
|
|
{
|
|
"loss": 0.1315,
|
|
"grad_norm": 0.7388908267021179,
|
|
"learning_rate": 8.598343392239989e-05,
|
|
"epoch": 0.5685645549318364,
|
|
"step": 709
|
|
},
|
|
{
|
|
"loss": 0.0334,
|
|
"grad_norm": 0.2406347393989563,
|
|
"learning_rate": 8.572076574571838e-05,
|
|
"epoch": 0.5693664795509222,
|
|
"step": 710
|
|
},
|
|
{
|
|
"loss": 0.0225,
|
|
"grad_norm": 0.3055616021156311,
|
|
"learning_rate": 8.545819810026871e-05,
|
|
"epoch": 0.570168404170008,
|
|
"step": 711
|
|
},
|
|
{
|
|
"loss": 0.0139,
|
|
"grad_norm": 0.23999330401420593,
|
|
"learning_rate": 8.519573283462687e-05,
|
|
"epoch": 0.5709703287890938,
|
|
"step": 712
|
|
},
|
|
{
|
|
"loss": 0.0783,
|
|
"grad_norm": 0.6496703624725342,
|
|
"learning_rate": 8.493337179664793e-05,
|
|
"epoch": 0.5717722534081796,
|
|
"step": 713
|
|
},
|
|
{
|
|
"loss": 0.0103,
|
|
"grad_norm": 0.2168056070804596,
|
|
"learning_rate": 8.467111683345326e-05,
|
|
"epoch": 0.5725741780272654,
|
|
"step": 714
|
|
},
|
|
{
|
|
"loss": 0.1149,
|
|
"grad_norm": 0.9031127095222473,
|
|
"learning_rate": 8.440896979141744e-05,
|
|
"epoch": 0.5733761026463512,
|
|
"step": 715
|
|
},
|
|
{
|
|
"loss": 0.0517,
|
|
"grad_norm": 0.3651449680328369,
|
|
"learning_rate": 8.414693251615512e-05,
|
|
"epoch": 0.5741780272654371,
|
|
"step": 716
|
|
},
|
|
{
|
|
"loss": 0.0603,
|
|
"grad_norm": 0.35386982560157776,
|
|
"learning_rate": 8.388500685250827e-05,
|
|
"epoch": 0.5749799518845229,
|
|
"step": 717
|
|
},
|
|
{
|
|
"loss": 0.0563,
|
|
"grad_norm": 0.3657480478286743,
|
|
"learning_rate": 8.3623194644533e-05,
|
|
"epoch": 0.5757818765036087,
|
|
"step": 718
|
|
},
|
|
{
|
|
"loss": 0.0391,
|
|
"grad_norm": 0.2397533357143402,
|
|
"learning_rate": 8.336149773548678e-05,
|
|
"epoch": 0.5765838011226945,
|
|
"step": 719
|
|
},
|
|
{
|
|
"loss": 0.0399,
|
|
"grad_norm": 0.33155348896980286,
|
|
"learning_rate": 8.309991796781511e-05,
|
|
"epoch": 0.5773857257417803,
|
|
"step": 720
|
|
},
|
|
{
|
|
"loss": 0.0514,
|
|
"grad_norm": 0.4915727972984314,
|
|
"learning_rate": 8.283845718313894e-05,
|
|
"epoch": 0.5781876503608661,
|
|
"step": 721
|
|
},
|
|
{
|
|
"loss": 0.026,
|
|
"grad_norm": 0.22791197896003723,
|
|
"learning_rate": 8.257711722224152e-05,
|
|
"epoch": 0.5789895749799518,
|
|
"step": 722
|
|
},
|
|
{
|
|
"loss": 0.0435,
|
|
"grad_norm": 0.40722930431365967,
|
|
"learning_rate": 8.231589992505536e-05,
|
|
"epoch": 0.5797914995990376,
|
|
"step": 723
|
|
},
|
|
{
|
|
"loss": 0.0217,
|
|
"grad_norm": 0.24059796333312988,
|
|
"learning_rate": 8.205480713064946e-05,
|
|
"epoch": 0.5805934242181235,
|
|
"step": 724
|
|
},
|
|
{
|
|
"loss": 0.0339,
|
|
"grad_norm": 0.40672048926353455,
|
|
"learning_rate": 8.179384067721631e-05,
|
|
"epoch": 0.5813953488372093,
|
|
"step": 725
|
|
},
|
|
{
|
|
"loss": 0.0744,
|
|
"grad_norm": 0.3509446680545807,
|
|
"learning_rate": 8.153300240205873e-05,
|
|
"epoch": 0.5821972734562951,
|
|
"step": 726
|
|
},
|
|
{
|
|
"loss": 0.0644,
|
|
"grad_norm": 0.28646859526634216,
|
|
"learning_rate": 8.12722941415773e-05,
|
|
"epoch": 0.5829991980753809,
|
|
"step": 727
|
|
},
|
|
{
|
|
"loss": 0.0163,
|
|
"grad_norm": 0.222028449177742,
|
|
"learning_rate": 8.101171773125716e-05,
|
|
"epoch": 0.5838011226944667,
|
|
"step": 728
|
|
},
|
|
{
|
|
"loss": 0.0215,
|
|
"grad_norm": 0.23310942947864532,
|
|
"learning_rate": 8.075127500565525e-05,
|
|
"epoch": 0.5846030473135525,
|
|
"step": 729
|
|
},
|
|
{
|
|
"loss": 0.0297,
|
|
"grad_norm": 0.2620904743671417,
|
|
"learning_rate": 8.049096779838719e-05,
|
|
"epoch": 0.5854049719326383,
|
|
"step": 730
|
|
},
|
|
{
|
|
"loss": 0.0773,
|
|
"grad_norm": 0.6896341443061829,
|
|
"learning_rate": 8.023079794211459e-05,
|
|
"epoch": 0.5862068965517241,
|
|
"step": 731
|
|
},
|
|
{
|
|
"loss": 0.0654,
|
|
"grad_norm": 0.3588181138038635,
|
|
"learning_rate": 7.99707672685321e-05,
|
|
"epoch": 0.58700882117081,
|
|
"step": 732
|
|
},
|
|
{
|
|
"loss": 0.0348,
|
|
"grad_norm": 0.2889043390750885,
|
|
"learning_rate": 7.971087760835432e-05,
|
|
"epoch": 0.5878107457898958,
|
|
"step": 733
|
|
},
|
|
{
|
|
"loss": 0.0174,
|
|
"grad_norm": 0.5972622632980347,
|
|
"learning_rate": 7.945113079130323e-05,
|
|
"epoch": 0.5886126704089816,
|
|
"step": 734
|
|
},
|
|
{
|
|
"loss": 0.0359,
|
|
"grad_norm": 0.25957322120666504,
|
|
"learning_rate": 7.919152864609499e-05,
|
|
"epoch": 0.5894145950280674,
|
|
"step": 735
|
|
},
|
|
{
|
|
"loss": 0.0433,
|
|
"grad_norm": 0.3756544888019562,
|
|
"learning_rate": 7.89320730004274e-05,
|
|
"epoch": 0.5902165196471532,
|
|
"step": 736
|
|
},
|
|
{
|
|
"loss": 0.0429,
|
|
"grad_norm": 0.25527504086494446,
|
|
"learning_rate": 7.867276568096662e-05,
|
|
"epoch": 0.591018444266239,
|
|
"step": 737
|
|
},
|
|
{
|
|
"loss": 0.0727,
|
|
"grad_norm": 0.49652037024497986,
|
|
"learning_rate": 7.84136085133347e-05,
|
|
"epoch": 0.5918203688853247,
|
|
"step": 738
|
|
},
|
|
{
|
|
"loss": 0.0555,
|
|
"grad_norm": 0.46329352259635925,
|
|
"learning_rate": 7.815460332209656e-05,
|
|
"epoch": 0.5926222935044105,
|
|
"step": 739
|
|
},
|
|
{
|
|
"loss": 0.0231,
|
|
"grad_norm": 0.217621847987175,
|
|
"learning_rate": 7.789575193074704e-05,
|
|
"epoch": 0.5934242181234964,
|
|
"step": 740
|
|
},
|
|
{
|
|
"loss": 0.0689,
|
|
"grad_norm": 0.5665069818496704,
|
|
"learning_rate": 7.763705616169825e-05,
|
|
"epoch": 0.5942261427425822,
|
|
"step": 741
|
|
},
|
|
{
|
|
"loss": 0.0463,
|
|
"grad_norm": 0.42257973551750183,
|
|
"learning_rate": 7.737851783626671e-05,
|
|
"epoch": 0.595028067361668,
|
|
"step": 742
|
|
},
|
|
{
|
|
"loss": 0.0348,
|
|
"grad_norm": 0.34942853450775146,
|
|
"learning_rate": 7.712013877466032e-05,
|
|
"epoch": 0.5958299919807538,
|
|
"step": 743
|
|
},
|
|
{
|
|
"loss": 0.0189,
|
|
"grad_norm": 0.2506210505962372,
|
|
"learning_rate": 7.686192079596586e-05,
|
|
"epoch": 0.5966319165998396,
|
|
"step": 744
|
|
},
|
|
{
|
|
"eval_loss": 0.042267050594091415,
|
|
"eval_runtime": 31.7194,
|
|
"eval_samples_per_second": 33.103,
|
|
"eval_steps_per_second": 8.291,
|
|
"epoch": 0.5966319165998396,
|
|
"step": 744
|
|
},
|
|
{
|
|
"loss": 0.0516,
|
|
"grad_norm": 0.3015764355659485,
|
|
"learning_rate": 7.660386571813593e-05,
|
|
"epoch": 0.5974338412189254,
|
|
"step": 745
|
|
},
|
|
{
|
|
"loss": 0.04,
|
|
"grad_norm": 0.333032488822937,
|
|
"learning_rate": 7.634597535797633e-05,
|
|
"epoch": 0.5982357658380112,
|
|
"step": 746
|
|
},
|
|
{
|
|
"loss": 0.058,
|
|
"grad_norm": 0.34520605206489563,
|
|
"learning_rate": 7.608825153113305e-05,
|
|
"epoch": 0.599037690457097,
|
|
"step": 747
|
|
},
|
|
{
|
|
"loss": 0.0177,
|
|
"grad_norm": 0.24561840295791626,
|
|
"learning_rate": 7.583069605207975e-05,
|
|
"epoch": 0.5998396150761829,
|
|
"step": 748
|
|
},
|
|
{
|
|
"loss": 0.0406,
|
|
"grad_norm": 0.30027586221694946,
|
|
"learning_rate": 7.557331073410485e-05,
|
|
"epoch": 0.6006415396952687,
|
|
"step": 749
|
|
},
|
|
{
|
|
"loss": 0.0641,
|
|
"grad_norm": 0.41032275557518005,
|
|
"learning_rate": 7.531609738929865e-05,
|
|
"epoch": 0.6014434643143545,
|
|
"step": 750
|
|
},
|
|
{
|
|
"loss": 0.0209,
|
|
"grad_norm": 0.20874442160129547,
|
|
"learning_rate": 7.505905782854081e-05,
|
|
"epoch": 0.6022453889334403,
|
|
"step": 751
|
|
},
|
|
{
|
|
"loss": 0.0501,
|
|
"grad_norm": 0.3524108827114105,
|
|
"learning_rate": 7.48021938614875e-05,
|
|
"epoch": 0.603047313552526,
|
|
"step": 752
|
|
},
|
|
{
|
|
"loss": 0.0323,
|
|
"grad_norm": 0.3698127269744873,
|
|
"learning_rate": 7.454550729655852e-05,
|
|
"epoch": 0.6038492381716118,
|
|
"step": 753
|
|
},
|
|
{
|
|
"loss": 0.047,
|
|
"grad_norm": 0.40356680750846863,
|
|
"learning_rate": 7.428899994092483e-05,
|
|
"epoch": 0.6046511627906976,
|
|
"step": 754
|
|
},
|
|
{
|
|
"loss": 0.024,
|
|
"grad_norm": 0.2525324523448944,
|
|
"learning_rate": 7.403267360049556e-05,
|
|
"epoch": 0.6054530874097834,
|
|
"step": 755
|
|
},
|
|
{
|
|
"loss": 0.0143,
|
|
"grad_norm": 0.416182279586792,
|
|
"learning_rate": 7.37765300799056e-05,
|
|
"epoch": 0.6062550120288693,
|
|
"step": 756
|
|
},
|
|
{
|
|
"loss": 0.0715,
|
|
"grad_norm": 0.4480084478855133,
|
|
"learning_rate": 7.352057118250246e-05,
|
|
"epoch": 0.6070569366479551,
|
|
"step": 757
|
|
},
|
|
{
|
|
"loss": 0.01,
|
|
"grad_norm": 0.22036206722259521,
|
|
"learning_rate": 7.326479871033409e-05,
|
|
"epoch": 0.6078588612670409,
|
|
"step": 758
|
|
},
|
|
{
|
|
"loss": 0.0329,
|
|
"grad_norm": 0.2710481882095337,
|
|
"learning_rate": 7.300921446413583e-05,
|
|
"epoch": 0.6086607858861267,
|
|
"step": 759
|
|
},
|
|
{
|
|
"loss": 0.0332,
|
|
"grad_norm": 0.241096630692482,
|
|
"learning_rate": 7.275382024331772e-05,
|
|
"epoch": 0.6094627105052125,
|
|
"step": 760
|
|
},
|
|
{
|
|
"loss": 0.0367,
|
|
"grad_norm": 0.37980324029922485,
|
|
"learning_rate": 7.249861784595217e-05,
|
|
"epoch": 0.6102646351242983,
|
|
"step": 761
|
|
},
|
|
{
|
|
"loss": 0.0499,
|
|
"grad_norm": 0.4780760407447815,
|
|
"learning_rate": 7.2243609068761e-05,
|
|
"epoch": 0.6110665597433841,
|
|
"step": 762
|
|
},
|
|
{
|
|
"loss": 0.0261,
|
|
"grad_norm": 0.21910789608955383,
|
|
"learning_rate": 7.198879570710272e-05,
|
|
"epoch": 0.6118684843624699,
|
|
"step": 763
|
|
},
|
|
{
|
|
"loss": 0.047,
|
|
"grad_norm": 0.30522310733795166,
|
|
"learning_rate": 7.173417955496024e-05,
|
|
"epoch": 0.6126704089815558,
|
|
"step": 764
|
|
},
|
|
{
|
|
"loss": 0.0361,
|
|
"grad_norm": 0.4247373044490814,
|
|
"learning_rate": 7.147976240492795e-05,
|
|
"epoch": 0.6134723336006416,
|
|
"step": 765
|
|
},
|
|
{
|
|
"loss": 0.0543,
|
|
"grad_norm": 0.30531254410743713,
|
|
"learning_rate": 7.122554604819925e-05,
|
|
"epoch": 0.6142742582197274,
|
|
"step": 766
|
|
},
|
|
{
|
|
"loss": 0.0394,
|
|
"grad_norm": 0.3051380515098572,
|
|
"learning_rate": 7.097153227455379e-05,
|
|
"epoch": 0.6150761828388132,
|
|
"step": 767
|
|
},
|
|
{
|
|
"loss": 0.0556,
|
|
"grad_norm": 0.3333624601364136,
|
|
"learning_rate": 7.071772287234497e-05,
|
|
"epoch": 0.615878107457899,
|
|
"step": 768
|
|
},
|
|
{
|
|
"loss": 0.032,
|
|
"grad_norm": 0.2435581535100937,
|
|
"learning_rate": 7.046411962848744e-05,
|
|
"epoch": 0.6166800320769847,
|
|
"step": 769
|
|
},
|
|
{
|
|
"loss": 0.0175,
|
|
"grad_norm": 0.21923010051250458,
|
|
"learning_rate": 7.021072432844426e-05,
|
|
"epoch": 0.6174819566960705,
|
|
"step": 770
|
|
},
|
|
{
|
|
"loss": 0.0308,
|
|
"grad_norm": 0.344446063041687,
|
|
"learning_rate": 6.995753875621464e-05,
|
|
"epoch": 0.6182838813151563,
|
|
"step": 771
|
|
},
|
|
{
|
|
"loss": 0.0579,
|
|
"grad_norm": 0.6894804835319519,
|
|
"learning_rate": 6.970456469432117e-05,
|
|
"epoch": 0.6190858059342422,
|
|
"step": 772
|
|
},
|
|
{
|
|
"loss": 0.0412,
|
|
"grad_norm": 0.9697020053863525,
|
|
"learning_rate": 6.945180392379729e-05,
|
|
"epoch": 0.619887730553328,
|
|
"step": 773
|
|
},
|
|
{
|
|
"loss": 0.0313,
|
|
"grad_norm": 0.30235642194747925,
|
|
"learning_rate": 6.919925822417476e-05,
|
|
"epoch": 0.6206896551724138,
|
|
"step": 774
|
|
},
|
|
{
|
|
"loss": 0.058,
|
|
"grad_norm": 0.42743489146232605,
|
|
"learning_rate": 6.894692937347127e-05,
|
|
"epoch": 0.6214915797914996,
|
|
"step": 775
|
|
},
|
|
{
|
|
"loss": 0.0405,
|
|
"grad_norm": 0.38457682728767395,
|
|
"learning_rate": 6.869481914817779e-05,
|
|
"epoch": 0.6222935044105854,
|
|
"step": 776
|
|
},
|
|
{
|
|
"loss": 0.0319,
|
|
"grad_norm": 0.31749409437179565,
|
|
"learning_rate": 6.844292932324597e-05,
|
|
"epoch": 0.6230954290296712,
|
|
"step": 777
|
|
},
|
|
{
|
|
"loss": 0.0363,
|
|
"grad_norm": 0.4263424575328827,
|
|
"learning_rate": 6.819126167207585e-05,
|
|
"epoch": 0.623897353648757,
|
|
"step": 778
|
|
},
|
|
{
|
|
"loss": 0.0393,
|
|
"grad_norm": 0.25529760122299194,
|
|
"learning_rate": 6.793981796650333e-05,
|
|
"epoch": 0.6246992782678428,
|
|
"step": 779
|
|
},
|
|
{
|
|
"loss": 0.0294,
|
|
"grad_norm": 0.203300341963768,
|
|
"learning_rate": 6.768859997678751e-05,
|
|
"epoch": 0.6255012028869287,
|
|
"step": 780
|
|
},
|
|
{
|
|
"loss": 0.0714,
|
|
"grad_norm": 0.43434929847717285,
|
|
"learning_rate": 6.743760947159846e-05,
|
|
"epoch": 0.6263031275060145,
|
|
"step": 781
|
|
},
|
|
{
|
|
"loss": 0.0236,
|
|
"grad_norm": 0.3486297130584717,
|
|
"learning_rate": 6.718684821800467e-05,
|
|
"epoch": 0.6271050521251003,
|
|
"step": 782
|
|
},
|
|
{
|
|
"loss": 0.0401,
|
|
"grad_norm": 0.36812183260917664,
|
|
"learning_rate": 6.69363179814606e-05,
|
|
"epoch": 0.627906976744186,
|
|
"step": 783
|
|
},
|
|
{
|
|
"loss": 0.0466,
|
|
"grad_norm": 0.40551620721817017,
|
|
"learning_rate": 6.668602052579424e-05,
|
|
"epoch": 0.6287089013632718,
|
|
"step": 784
|
|
},
|
|
{
|
|
"loss": 0.0548,
|
|
"grad_norm": 0.39897987246513367,
|
|
"learning_rate": 6.643595761319474e-05,
|
|
"epoch": 0.6295108259823576,
|
|
"step": 785
|
|
},
|
|
{
|
|
"loss": 0.0287,
|
|
"grad_norm": 0.23864711821079254,
|
|
"learning_rate": 6.61861310042e-05,
|
|
"epoch": 0.6303127506014434,
|
|
"step": 786
|
|
},
|
|
{
|
|
"loss": 0.0342,
|
|
"grad_norm": 0.32459014654159546,
|
|
"learning_rate": 6.593654245768415e-05,
|
|
"epoch": 0.6311146752205292,
|
|
"step": 787
|
|
},
|
|
{
|
|
"loss": 0.103,
|
|
"grad_norm": 0.8521727323532104,
|
|
"learning_rate": 6.568719373084538e-05,
|
|
"epoch": 0.6319165998396151,
|
|
"step": 788
|
|
},
|
|
{
|
|
"loss": 0.0183,
|
|
"grad_norm": 0.20950952172279358,
|
|
"learning_rate": 6.543808657919345e-05,
|
|
"epoch": 0.6327185244587009,
|
|
"step": 789
|
|
},
|
|
{
|
|
"loss": 0.0399,
|
|
"grad_norm": 0.41553550958633423,
|
|
"learning_rate": 6.518922275653724e-05,
|
|
"epoch": 0.6335204490777867,
|
|
"step": 790
|
|
},
|
|
{
|
|
"loss": 0.0347,
|
|
"grad_norm": 0.2640535831451416,
|
|
"learning_rate": 6.494060401497261e-05,
|
|
"epoch": 0.6343223736968725,
|
|
"step": 791
|
|
},
|
|
{
|
|
"loss": 0.0288,
|
|
"grad_norm": 0.2901599407196045,
|
|
"learning_rate": 6.469223210486992e-05,
|
|
"epoch": 0.6351242983159583,
|
|
"step": 792
|
|
},
|
|
{
|
|
"loss": 0.036,
|
|
"grad_norm": 0.30714696645736694,
|
|
"learning_rate": 6.444410877486178e-05,
|
|
"epoch": 0.6359262229350441,
|
|
"step": 793
|
|
},
|
|
{
|
|
"loss": 0.0168,
|
|
"grad_norm": 0.16659529507160187,
|
|
"learning_rate": 6.419623577183056e-05,
|
|
"epoch": 0.6367281475541299,
|
|
"step": 794
|
|
},
|
|
{
|
|
"loss": 0.0391,
|
|
"grad_norm": 0.2610877454280853,
|
|
"learning_rate": 6.394861484089641e-05,
|
|
"epoch": 0.6375300721732157,
|
|
"step": 795
|
|
},
|
|
{
|
|
"loss": 0.0113,
|
|
"grad_norm": 0.14762139320373535,
|
|
"learning_rate": 6.370124772540469e-05,
|
|
"epoch": 0.6383319967923016,
|
|
"step": 796
|
|
},
|
|
{
|
|
"loss": 0.1048,
|
|
"grad_norm": 0.5695735216140747,
|
|
"learning_rate": 6.345413616691385e-05,
|
|
"epoch": 0.6391339214113874,
|
|
"step": 797
|
|
},
|
|
{
|
|
"loss": 0.0379,
|
|
"grad_norm": 0.2888137996196747,
|
|
"learning_rate": 6.320728190518308e-05,
|
|
"epoch": 0.6399358460304732,
|
|
"step": 798
|
|
},
|
|
{
|
|
"loss": 0.0287,
|
|
"grad_norm": 0.3997354805469513,
|
|
"learning_rate": 6.29606866781602e-05,
|
|
"epoch": 0.640737770649559,
|
|
"step": 799
|
|
},
|
|
{
|
|
"loss": 0.0347,
|
|
"grad_norm": 0.23028384149074554,
|
|
"learning_rate": 6.271435222196916e-05,
|
|
"epoch": 0.6415396952686447,
|
|
"step": 800
|
|
},
|
|
{
|
|
"loss": 0.0375,
|
|
"grad_norm": 0.332156240940094,
|
|
"learning_rate": 6.246828027089811e-05,
|
|
"epoch": 0.6423416198877305,
|
|
"step": 801
|
|
},
|
|
{
|
|
"loss": 0.0492,
|
|
"grad_norm": 0.41977575421333313,
|
|
"learning_rate": 6.222247255738706e-05,
|
|
"epoch": 0.6431435445068163,
|
|
"step": 802
|
|
},
|
|
{
|
|
"loss": 0.0199,
|
|
"grad_norm": 0.24224106967449188,
|
|
"learning_rate": 6.197693081201567e-05,
|
|
"epoch": 0.6439454691259021,
|
|
"step": 803
|
|
},
|
|
{
|
|
"loss": 0.0513,
|
|
"grad_norm": 0.46784040331840515,
|
|
"learning_rate": 6.173165676349103e-05,
|
|
"epoch": 0.644747393744988,
|
|
"step": 804
|
|
},
|
|
{
|
|
"loss": 0.0472,
|
|
"grad_norm": 0.38110026717185974,
|
|
"learning_rate": 6.14866521386356e-05,
|
|
"epoch": 0.6455493183640738,
|
|
"step": 805
|
|
},
|
|
{
|
|
"loss": 0.0649,
|
|
"grad_norm": 0.3705803453922272,
|
|
"learning_rate": 6.124191866237504e-05,
|
|
"epoch": 0.6463512429831596,
|
|
"step": 806
|
|
},
|
|
{
|
|
"loss": 0.0437,
|
|
"grad_norm": 0.28756698966026306,
|
|
"learning_rate": 6.0997458057725877e-05,
|
|
"epoch": 0.6471531676022454,
|
|
"step": 807
|
|
},
|
|
{
|
|
"loss": 0.0297,
|
|
"grad_norm": 0.3769364356994629,
|
|
"learning_rate": 6.0753272045783625e-05,
|
|
"epoch": 0.6479550922213312,
|
|
"step": 808
|
|
},
|
|
{
|
|
"loss": 0.0351,
|
|
"grad_norm": 0.2772417962551117,
|
|
"learning_rate": 6.0509362345710585e-05,
|
|
"epoch": 0.648757016840417,
|
|
"step": 809
|
|
},
|
|
{
|
|
"loss": 0.0276,
|
|
"grad_norm": 0.20303967595100403,
|
|
"learning_rate": 6.026573067472366e-05,
|
|
"epoch": 0.6495589414595028,
|
|
"step": 810
|
|
},
|
|
{
|
|
"loss": 0.0409,
|
|
"grad_norm": 0.26352015137672424,
|
|
"learning_rate": 6.00223787480823e-05,
|
|
"epoch": 0.6503608660785886,
|
|
"step": 811
|
|
},
|
|
{
|
|
"loss": 0.0392,
|
|
"grad_norm": 0.4463076591491699,
|
|
"learning_rate": 5.977930827907649e-05,
|
|
"epoch": 0.6511627906976745,
|
|
"step": 812
|
|
},
|
|
{
|
|
"loss": 0.0556,
|
|
"grad_norm": 0.3590082824230194,
|
|
"learning_rate": 5.9536520979014676e-05,
|
|
"epoch": 0.6519647153167603,
|
|
"step": 813
|
|
},
|
|
{
|
|
"loss": 0.0316,
|
|
"grad_norm": 0.38499733805656433,
|
|
"learning_rate": 5.929401855721162e-05,
|
|
"epoch": 0.652766639935846,
|
|
"step": 814
|
|
},
|
|
{
|
|
"loss": 0.0456,
|
|
"grad_norm": 1.1676615476608276,
|
|
"learning_rate": 5.905180272097648e-05,
|
|
"epoch": 0.6535685645549318,
|
|
"step": 815
|
|
},
|
|
{
|
|
"loss": 0.0783,
|
|
"grad_norm": 0.5431867837905884,
|
|
"learning_rate": 5.880987517560075e-05,
|
|
"epoch": 0.6543704891740176,
|
|
"step": 816
|
|
},
|
|
{
|
|
"loss": 0.0501,
|
|
"grad_norm": 0.35964494943618774,
|
|
"learning_rate": 5.856823762434618e-05,
|
|
"epoch": 0.6551724137931034,
|
|
"step": 817
|
|
},
|
|
{
|
|
"loss": 0.0408,
|
|
"grad_norm": 0.323234498500824,
|
|
"learning_rate": 5.832689176843291e-05,
|
|
"epoch": 0.6559743384121892,
|
|
"step": 818
|
|
},
|
|
{
|
|
"loss": 0.0442,
|
|
"grad_norm": 1.2825475931167603,
|
|
"learning_rate": 5.808583930702739e-05,
|
|
"epoch": 0.656776263031275,
|
|
"step": 819
|
|
},
|
|
{
|
|
"loss": 0.0291,
|
|
"grad_norm": 0.19290927052497864,
|
|
"learning_rate": 5.784508193723057e-05,
|
|
"epoch": 0.6575781876503609,
|
|
"step": 820
|
|
},
|
|
{
|
|
"loss": 0.0176,
|
|
"grad_norm": 0.18903003633022308,
|
|
"learning_rate": 5.76046213540657e-05,
|
|
"epoch": 0.6583801122694467,
|
|
"step": 821
|
|
},
|
|
{
|
|
"loss": 0.0336,
|
|
"grad_norm": 0.39771515130996704,
|
|
"learning_rate": 5.7364459250466596e-05,
|
|
"epoch": 0.6591820368885325,
|
|
"step": 822
|
|
},
|
|
{
|
|
"loss": 0.0294,
|
|
"grad_norm": 0.3263964354991913,
|
|
"learning_rate": 5.712459731726577e-05,
|
|
"epoch": 0.6599839615076183,
|
|
"step": 823
|
|
},
|
|
{
|
|
"loss": 0.0371,
|
|
"grad_norm": 0.2918822765350342,
|
|
"learning_rate": 5.688503724318217e-05,
|
|
"epoch": 0.6607858861267041,
|
|
"step": 824
|
|
},
|
|
{
|
|
"loss": 0.0395,
|
|
"grad_norm": 0.24919533729553223,
|
|
"learning_rate": 5.6645780714809814e-05,
|
|
"epoch": 0.6615878107457899,
|
|
"step": 825
|
|
},
|
|
{
|
|
"loss": 0.0374,
|
|
"grad_norm": 0.23720526695251465,
|
|
"learning_rate": 5.640682941660547e-05,
|
|
"epoch": 0.6623897353648757,
|
|
"step": 826
|
|
},
|
|
{
|
|
"loss": 0.0199,
|
|
"grad_norm": 0.1959155648946762,
|
|
"learning_rate": 5.616818503087704e-05,
|
|
"epoch": 0.6631916599839615,
|
|
"step": 827
|
|
},
|
|
{
|
|
"loss": 0.0641,
|
|
"grad_norm": 2.0196421146392822,
|
|
"learning_rate": 5.5929849237771556e-05,
|
|
"epoch": 0.6639935846030474,
|
|
"step": 828
|
|
},
|
|
{
|
|
"loss": 0.0397,
|
|
"grad_norm": 0.24733805656433105,
|
|
"learning_rate": 5.569182371526365e-05,
|
|
"epoch": 0.6647955092221332,
|
|
"step": 829
|
|
},
|
|
{
|
|
"loss": 0.0359,
|
|
"grad_norm": 0.24535015225410461,
|
|
"learning_rate": 5.545411013914329e-05,
|
|
"epoch": 0.6655974338412189,
|
|
"step": 830
|
|
},
|
|
{
|
|
"loss": 0.0384,
|
|
"grad_norm": 0.4017760753631592,
|
|
"learning_rate": 5.521671018300436e-05,
|
|
"epoch": 0.6663993584603047,
|
|
"step": 831
|
|
},
|
|
{
|
|
"loss": 0.0287,
|
|
"grad_norm": 0.2186603546142578,
|
|
"learning_rate": 5.497962551823266e-05,
|
|
"epoch": 0.6672012830793905,
|
|
"step": 832
|
|
},
|
|
{
|
|
"loss": 0.0885,
|
|
"grad_norm": 0.4830259680747986,
|
|
"learning_rate": 5.4742857813994356e-05,
|
|
"epoch": 0.6680032076984763,
|
|
"step": 833
|
|
},
|
|
{
|
|
"loss": 0.0302,
|
|
"grad_norm": 0.3544902205467224,
|
|
"learning_rate": 5.450640873722395e-05,
|
|
"epoch": 0.6688051323175621,
|
|
"step": 834
|
|
},
|
|
{
|
|
"loss": 0.0764,
|
|
"grad_norm": 0.3852503299713135,
|
|
"learning_rate": 5.427027995261269e-05,
|
|
"epoch": 0.6696070569366479,
|
|
"step": 835
|
|
},
|
|
{
|
|
"loss": 0.0466,
|
|
"grad_norm": 0.32173559069633484,
|
|
"learning_rate": 5.403447312259702e-05,
|
|
"epoch": 0.6704089815557338,
|
|
"step": 836
|
|
},
|
|
{
|
|
"loss": 0.0149,
|
|
"grad_norm": 0.19790256023406982,
|
|
"learning_rate": 5.379898990734641e-05,
|
|
"epoch": 0.6712109061748196,
|
|
"step": 837
|
|
},
|
|
{
|
|
"loss": 0.0309,
|
|
"grad_norm": 0.2427252233028412,
|
|
"learning_rate": 5.356383196475225e-05,
|
|
"epoch": 0.6720128307939054,
|
|
"step": 838
|
|
},
|
|
{
|
|
"loss": 0.0637,
|
|
"grad_norm": 0.45702651143074036,
|
|
"learning_rate": 5.332900095041569e-05,
|
|
"epoch": 0.6728147554129912,
|
|
"step": 839
|
|
},
|
|
{
|
|
"loss": 0.0258,
|
|
"grad_norm": 0.21773581206798553,
|
|
"learning_rate": 5.309449851763633e-05,
|
|
"epoch": 0.673616680032077,
|
|
"step": 840
|
|
},
|
|
{
|
|
"loss": 0.0383,
|
|
"grad_norm": 0.34996020793914795,
|
|
"learning_rate": 5.286032631740023e-05,
|
|
"epoch": 0.6744186046511628,
|
|
"step": 841
|
|
},
|
|
{
|
|
"loss": 0.053,
|
|
"grad_norm": 0.3601475656032562,
|
|
"learning_rate": 5.2626485998368726e-05,
|
|
"epoch": 0.6752205292702486,
|
|
"step": 842
|
|
},
|
|
{
|
|
"loss": 0.0334,
|
|
"grad_norm": 0.2879583537578583,
|
|
"learning_rate": 5.239297920686641e-05,
|
|
"epoch": 0.6760224538893344,
|
|
"step": 843
|
|
},
|
|
{
|
|
"loss": 0.025,
|
|
"grad_norm": 0.3214558959007263,
|
|
"learning_rate": 5.215980758686978e-05,
|
|
"epoch": 0.6768243785084203,
|
|
"step": 844
|
|
},
|
|
{
|
|
"loss": 0.038,
|
|
"grad_norm": 0.33000731468200684,
|
|
"learning_rate": 5.1926972779995564e-05,
|
|
"epoch": 0.677626303127506,
|
|
"step": 845
|
|
},
|
|
{
|
|
"loss": 0.0325,
|
|
"grad_norm": 0.45079219341278076,
|
|
"learning_rate": 5.169447642548928e-05,
|
|
"epoch": 0.6784282277465918,
|
|
"step": 846
|
|
},
|
|
{
|
|
"loss": 0.0186,
|
|
"grad_norm": 0.27335742115974426,
|
|
"learning_rate": 5.146232016021353e-05,
|
|
"epoch": 0.6792301523656776,
|
|
"step": 847
|
|
},
|
|
{
|
|
"loss": 0.0674,
|
|
"grad_norm": 0.480881929397583,
|
|
"learning_rate": 5.123050561863657e-05,
|
|
"epoch": 0.6800320769847634,
|
|
"step": 848
|
|
},
|
|
{
|
|
"loss": 0.0358,
|
|
"grad_norm": 0.26150617003440857,
|
|
"learning_rate": 5.099903443282079e-05,
|
|
"epoch": 0.6808340016038492,
|
|
"step": 849
|
|
},
|
|
{
|
|
"loss": 0.0358,
|
|
"grad_norm": 0.4229785203933716,
|
|
"learning_rate": 5.0767908232411306e-05,
|
|
"epoch": 0.681635926222935,
|
|
"step": 850
|
|
},
|
|
{
|
|
"loss": 0.0964,
|
|
"grad_norm": 0.6302306652069092,
|
|
"learning_rate": 5.053712864462432e-05,
|
|
"epoch": 0.6824378508420208,
|
|
"step": 851
|
|
},
|
|
{
|
|
"loss": 0.0529,
|
|
"grad_norm": 0.40250223875045776,
|
|
"learning_rate": 5.0306697294235714e-05,
|
|
"epoch": 0.6832397754611067,
|
|
"step": 852
|
|
},
|
|
{
|
|
"loss": 0.0349,
|
|
"grad_norm": 0.40601104497909546,
|
|
"learning_rate": 5.007661580356982e-05,
|
|
"epoch": 0.6840417000801925,
|
|
"step": 853
|
|
},
|
|
{
|
|
"loss": 0.0264,
|
|
"grad_norm": 0.20523907244205475,
|
|
"learning_rate": 4.984688579248756e-05,
|
|
"epoch": 0.6848436246992783,
|
|
"step": 854
|
|
},
|
|
{
|
|
"loss": 0.0233,
|
|
"grad_norm": 0.2532117962837219,
|
|
"learning_rate": 4.961750887837557e-05,
|
|
"epoch": 0.6856455493183641,
|
|
"step": 855
|
|
},
|
|
{
|
|
"loss": 0.0222,
|
|
"grad_norm": 0.23107284307479858,
|
|
"learning_rate": 4.938848667613436e-05,
|
|
"epoch": 0.6864474739374499,
|
|
"step": 856
|
|
},
|
|
{
|
|
"loss": 0.0243,
|
|
"grad_norm": 0.2529151141643524,
|
|
"learning_rate": 4.915982079816732e-05,
|
|
"epoch": 0.6872493985565357,
|
|
"step": 857
|
|
},
|
|
{
|
|
"loss": 0.0391,
|
|
"grad_norm": 0.2575894892215729,
|
|
"learning_rate": 4.8931512854368913e-05,
|
|
"epoch": 0.6880513231756215,
|
|
"step": 858
|
|
},
|
|
{
|
|
"loss": 0.0773,
|
|
"grad_norm": 0.6415811777114868,
|
|
"learning_rate": 4.870356445211388e-05,
|
|
"epoch": 0.6888532477947072,
|
|
"step": 859
|
|
},
|
|
{
|
|
"loss": 0.0391,
|
|
"grad_norm": 0.4123080372810364,
|
|
"learning_rate": 4.8475977196245504e-05,
|
|
"epoch": 0.6896551724137931,
|
|
"step": 860
|
|
},
|
|
{
|
|
"loss": 0.0457,
|
|
"grad_norm": 0.31931477785110474,
|
|
"learning_rate": 4.8248752689064494e-05,
|
|
"epoch": 0.6904570970328789,
|
|
"step": 861
|
|
},
|
|
{
|
|
"loss": 0.0268,
|
|
"grad_norm": 0.2256850302219391,
|
|
"learning_rate": 4.802189253031764e-05,
|
|
"epoch": 0.6912590216519647,
|
|
"step": 862
|
|
},
|
|
{
|
|
"loss": 0.029,
|
|
"grad_norm": 0.4967258870601654,
|
|
"learning_rate": 4.779539831718668e-05,
|
|
"epoch": 0.6920609462710505,
|
|
"step": 863
|
|
},
|
|
{
|
|
"loss": 0.0175,
|
|
"grad_norm": 0.18003036081790924,
|
|
"learning_rate": 4.756927164427685e-05,
|
|
"epoch": 0.6928628708901363,
|
|
"step": 864
|
|
},
|
|
{
|
|
"loss": 0.0215,
|
|
"grad_norm": 0.2862027585506439,
|
|
"learning_rate": 4.7343514103605767e-05,
|
|
"epoch": 0.6936647955092221,
|
|
"step": 865
|
|
},
|
|
{
|
|
"loss": 0.0455,
|
|
"grad_norm": 0.32969915866851807,
|
|
"learning_rate": 4.711812728459233e-05,
|
|
"epoch": 0.6944667201283079,
|
|
"step": 866
|
|
},
|
|
{
|
|
"loss": 0.0135,
|
|
"grad_norm": 0.16625012457370758,
|
|
"learning_rate": 4.689311277404529e-05,
|
|
"epoch": 0.6952686447473937,
|
|
"step": 867
|
|
},
|
|
{
|
|
"loss": 0.0223,
|
|
"grad_norm": 0.4092782139778137,
|
|
"learning_rate": 4.666847215615226e-05,
|
|
"epoch": 0.6960705693664796,
|
|
"step": 868
|
|
},
|
|
{
|
|
"eval_loss": 0.04034050926566124,
|
|
"eval_runtime": 31.8203,
|
|
"eval_samples_per_second": 32.998,
|
|
"eval_steps_per_second": 8.265,
|
|
"epoch": 0.6960705693664796,
|
|
"step": 868
|
|
},
|
|
{
|
|
"loss": 0.0779,
|
|
"grad_norm": 0.5360198616981506,
|
|
"learning_rate": 4.6444207012468465e-05,
|
|
"epoch": 0.6968724939855654,
|
|
"step": 869
|
|
},
|
|
{
|
|
"loss": 0.0513,
|
|
"grad_norm": 0.49572035670280457,
|
|
"learning_rate": 4.622031892190579e-05,
|
|
"epoch": 0.6976744186046512,
|
|
"step": 870
|
|
},
|
|
{
|
|
"loss": 0.0337,
|
|
"grad_norm": 0.27240556478500366,
|
|
"learning_rate": 4.599680946072127e-05,
|
|
"epoch": 0.698476343223737,
|
|
"step": 871
|
|
},
|
|
{
|
|
"loss": 0.0367,
|
|
"grad_norm": 0.3386692702770233,
|
|
"learning_rate": 4.57736802025065e-05,
|
|
"epoch": 0.6992782678428228,
|
|
"step": 872
|
|
},
|
|
{
|
|
"loss": 0.0502,
|
|
"grad_norm": 0.47822192311286926,
|
|
"learning_rate": 4.555093271817616e-05,
|
|
"epoch": 0.7000801924619086,
|
|
"step": 873
|
|
},
|
|
{
|
|
"loss": 0.0752,
|
|
"grad_norm": 0.6390823125839233,
|
|
"learning_rate": 4.532856857595714e-05,
|
|
"epoch": 0.7008821170809943,
|
|
"step": 874
|
|
},
|
|
{
|
|
"loss": 0.0452,
|
|
"grad_norm": 0.37735825777053833,
|
|
"learning_rate": 4.5106589341377394e-05,
|
|
"epoch": 0.7016840417000801,
|
|
"step": 875
|
|
},
|
|
{
|
|
"loss": 0.021,
|
|
"grad_norm": 0.18500256538391113,
|
|
"learning_rate": 4.488499657725511e-05,
|
|
"epoch": 0.702485966319166,
|
|
"step": 876
|
|
},
|
|
{
|
|
"loss": 0.0621,
|
|
"grad_norm": 0.43604958057403564,
|
|
"learning_rate": 4.466379184368747e-05,
|
|
"epoch": 0.7032878909382518,
|
|
"step": 877
|
|
},
|
|
{
|
|
"loss": 0.0204,
|
|
"grad_norm": 0.5190374851226807,
|
|
"learning_rate": 4.444297669803981e-05,
|
|
"epoch": 0.7040898155573376,
|
|
"step": 878
|
|
},
|
|
{
|
|
"loss": 0.0131,
|
|
"grad_norm": 0.3594497740268707,
|
|
"learning_rate": 4.422255269493455e-05,
|
|
"epoch": 0.7048917401764234,
|
|
"step": 879
|
|
},
|
|
{
|
|
"loss": 0.0234,
|
|
"grad_norm": 0.26429349184036255,
|
|
"learning_rate": 4.4002521386240466e-05,
|
|
"epoch": 0.7056936647955092,
|
|
"step": 880
|
|
},
|
|
{
|
|
"loss": 0.0205,
|
|
"grad_norm": 0.3388163447380066,
|
|
"learning_rate": 4.37828843210615e-05,
|
|
"epoch": 0.706495589414595,
|
|
"step": 881
|
|
},
|
|
{
|
|
"loss": 0.0264,
|
|
"grad_norm": 0.21950915455818176,
|
|
"learning_rate": 4.3563643045725964e-05,
|
|
"epoch": 0.7072975140336808,
|
|
"step": 882
|
|
},
|
|
{
|
|
"loss": 0.0542,
|
|
"grad_norm": 0.40110042691230774,
|
|
"learning_rate": 4.334479910377577e-05,
|
|
"epoch": 0.7080994386527666,
|
|
"step": 883
|
|
},
|
|
{
|
|
"loss": 0.078,
|
|
"grad_norm": 0.4152352511882782,
|
|
"learning_rate": 4.312635403595532e-05,
|
|
"epoch": 0.7089013632718525,
|
|
"step": 884
|
|
},
|
|
{
|
|
"loss": 0.0244,
|
|
"grad_norm": 0.20890304446220398,
|
|
"learning_rate": 4.290830938020087e-05,
|
|
"epoch": 0.7097032878909383,
|
|
"step": 885
|
|
},
|
|
{
|
|
"loss": 0.0318,
|
|
"grad_norm": 0.32398372888565063,
|
|
"learning_rate": 4.269066667162956e-05,
|
|
"epoch": 0.7105052125100241,
|
|
"step": 886
|
|
},
|
|
{
|
|
"loss": 0.0615,
|
|
"grad_norm": 0.3690579831600189,
|
|
"learning_rate": 4.247342744252883e-05,
|
|
"epoch": 0.7113071371291099,
|
|
"step": 887
|
|
},
|
|
{
|
|
"loss": 0.0299,
|
|
"grad_norm": 0.4021519422531128,
|
|
"learning_rate": 4.2256593222345185e-05,
|
|
"epoch": 0.7121090617481957,
|
|
"step": 888
|
|
},
|
|
{
|
|
"loss": 0.0228,
|
|
"grad_norm": 0.24381564557552338,
|
|
"learning_rate": 4.2040165537674006e-05,
|
|
"epoch": 0.7129109863672815,
|
|
"step": 889
|
|
},
|
|
{
|
|
"loss": 0.0786,
|
|
"grad_norm": 0.5315597057342529,
|
|
"learning_rate": 4.182414591224833e-05,
|
|
"epoch": 0.7137129109863672,
|
|
"step": 890
|
|
},
|
|
{
|
|
"loss": 0.0306,
|
|
"grad_norm": 0.29537150263786316,
|
|
"learning_rate": 4.160853586692839e-05,
|
|
"epoch": 0.714514835605453,
|
|
"step": 891
|
|
},
|
|
{
|
|
"loss": 0.029,
|
|
"grad_norm": 0.3600987195968628,
|
|
"learning_rate": 4.139333691969071e-05,
|
|
"epoch": 0.7153167602245389,
|
|
"step": 892
|
|
},
|
|
{
|
|
"loss": 0.0235,
|
|
"grad_norm": 0.23906612396240234,
|
|
"learning_rate": 4.117855058561769e-05,
|
|
"epoch": 0.7161186848436247,
|
|
"step": 893
|
|
},
|
|
{
|
|
"loss": 0.1348,
|
|
"grad_norm": 0.7623001337051392,
|
|
"learning_rate": 4.096417837688666e-05,
|
|
"epoch": 0.7169206094627105,
|
|
"step": 894
|
|
},
|
|
{
|
|
"loss": 0.0216,
|
|
"grad_norm": 0.33109530806541443,
|
|
"learning_rate": 4.075022180275935e-05,
|
|
"epoch": 0.7177225340817963,
|
|
"step": 895
|
|
},
|
|
{
|
|
"loss": 0.0213,
|
|
"grad_norm": 0.172570139169693,
|
|
"learning_rate": 4.053668236957134e-05,
|
|
"epoch": 0.7185244587008821,
|
|
"step": 896
|
|
},
|
|
{
|
|
"loss": 0.0213,
|
|
"grad_norm": 0.27047714591026306,
|
|
"learning_rate": 4.032356158072131e-05,
|
|
"epoch": 0.7193263833199679,
|
|
"step": 897
|
|
},
|
|
{
|
|
"loss": 0.0791,
|
|
"grad_norm": 0.4213772118091583,
|
|
"learning_rate": 4.0110860936660566e-05,
|
|
"epoch": 0.7201283079390537,
|
|
"step": 898
|
|
},
|
|
{
|
|
"loss": 0.0458,
|
|
"grad_norm": 0.38493579626083374,
|
|
"learning_rate": 3.989858193488236e-05,
|
|
"epoch": 0.7209302325581395,
|
|
"step": 899
|
|
},
|
|
{
|
|
"loss": 0.022,
|
|
"grad_norm": 0.23332200944423676,
|
|
"learning_rate": 3.96867260699116e-05,
|
|
"epoch": 0.7217321571772254,
|
|
"step": 900
|
|
},
|
|
{
|
|
"loss": 0.0438,
|
|
"grad_norm": 0.3719151020050049,
|
|
"learning_rate": 3.947529483329387e-05,
|
|
"epoch": 0.7225340817963112,
|
|
"step": 901
|
|
},
|
|
{
|
|
"loss": 0.0294,
|
|
"grad_norm": 0.18766042590141296,
|
|
"learning_rate": 3.92642897135855e-05,
|
|
"epoch": 0.723336006415397,
|
|
"step": 902
|
|
},
|
|
{
|
|
"loss": 0.0163,
|
|
"grad_norm": 0.17008039355278015,
|
|
"learning_rate": 3.905371219634257e-05,
|
|
"epoch": 0.7241379310344828,
|
|
"step": 903
|
|
},
|
|
{
|
|
"loss": 0.0293,
|
|
"grad_norm": 0.2763400673866272,
|
|
"learning_rate": 3.884356376411089e-05,
|
|
"epoch": 0.7249398556535686,
|
|
"step": 904
|
|
},
|
|
{
|
|
"loss": 0.0286,
|
|
"grad_norm": 0.3425106704235077,
|
|
"learning_rate": 3.863384589641509e-05,
|
|
"epoch": 0.7257417802726543,
|
|
"step": 905
|
|
},
|
|
{
|
|
"loss": 0.0221,
|
|
"grad_norm": 0.3655487596988678,
|
|
"learning_rate": 3.8424560069748706e-05,
|
|
"epoch": 0.7265437048917401,
|
|
"step": 906
|
|
},
|
|
{
|
|
"loss": 0.0191,
|
|
"grad_norm": 0.22211404144763947,
|
|
"learning_rate": 3.821570775756339e-05,
|
|
"epoch": 0.7273456295108259,
|
|
"step": 907
|
|
},
|
|
{
|
|
"loss": 0.0415,
|
|
"grad_norm": 0.3968844711780548,
|
|
"learning_rate": 3.800729043025871e-05,
|
|
"epoch": 0.7281475541299118,
|
|
"step": 908
|
|
},
|
|
{
|
|
"loss": 0.0477,
|
|
"grad_norm": 0.30855193734169006,
|
|
"learning_rate": 3.779930955517187e-05,
|
|
"epoch": 0.7289494787489976,
|
|
"step": 909
|
|
},
|
|
{
|
|
"loss": 0.0186,
|
|
"grad_norm": 0.20964409410953522,
|
|
"learning_rate": 3.759176659656717e-05,
|
|
"epoch": 0.7297514033680834,
|
|
"step": 910
|
|
},
|
|
{
|
|
"loss": 0.0169,
|
|
"grad_norm": 0.20416317880153656,
|
|
"learning_rate": 3.7384663015625854e-05,
|
|
"epoch": 0.7305533279871692,
|
|
"step": 911
|
|
},
|
|
{
|
|
"loss": 0.1093,
|
|
"grad_norm": 0.6007756590843201,
|
|
"learning_rate": 3.717800027043576e-05,
|
|
"epoch": 0.731355252606255,
|
|
"step": 912
|
|
},
|
|
{
|
|
"loss": 0.0393,
|
|
"grad_norm": 0.4740281403064728,
|
|
"learning_rate": 3.697177981598115e-05,
|
|
"epoch": 0.7321571772253408,
|
|
"step": 913
|
|
},
|
|
{
|
|
"loss": 0.0507,
|
|
"grad_norm": 0.39300405979156494,
|
|
"learning_rate": 3.676600310413233e-05,
|
|
"epoch": 0.7329591018444266,
|
|
"step": 914
|
|
},
|
|
{
|
|
"loss": 0.0255,
|
|
"grad_norm": 0.29988205432891846,
|
|
"learning_rate": 3.6560671583635467e-05,
|
|
"epoch": 0.7337610264635124,
|
|
"step": 915
|
|
},
|
|
{
|
|
"loss": 0.0219,
|
|
"grad_norm": 0.22536736726760864,
|
|
"learning_rate": 3.635578670010242e-05,
|
|
"epoch": 0.7345629510825983,
|
|
"step": 916
|
|
},
|
|
{
|
|
"loss": 0.0398,
|
|
"grad_norm": 0.29492881894111633,
|
|
"learning_rate": 3.615134989600069e-05,
|
|
"epoch": 0.7353648757016841,
|
|
"step": 917
|
|
},
|
|
{
|
|
"loss": 0.0413,
|
|
"grad_norm": 0.3680134415626526,
|
|
"learning_rate": 3.5947362610642854e-05,
|
|
"epoch": 0.7361668003207699,
|
|
"step": 918
|
|
},
|
|
{
|
|
"loss": 0.0451,
|
|
"grad_norm": 0.2880399525165558,
|
|
"learning_rate": 3.5743826280177e-05,
|
|
"epoch": 0.7369687249398557,
|
|
"step": 919
|
|
},
|
|
{
|
|
"loss": 0.0463,
|
|
"grad_norm": 0.38011434674263,
|
|
"learning_rate": 3.554074233757608e-05,
|
|
"epoch": 0.7377706495589414,
|
|
"step": 920
|
|
},
|
|
{
|
|
"loss": 0.0236,
|
|
"grad_norm": 0.21362242102622986,
|
|
"learning_rate": 3.533811221262833e-05,
|
|
"epoch": 0.7385725741780272,
|
|
"step": 921
|
|
},
|
|
{
|
|
"loss": 0.0611,
|
|
"grad_norm": 0.49550414085388184,
|
|
"learning_rate": 3.5135937331926596e-05,
|
|
"epoch": 0.739374498797113,
|
|
"step": 922
|
|
},
|
|
{
|
|
"loss": 0.0312,
|
|
"grad_norm": 0.2971956133842468,
|
|
"learning_rate": 3.4934219118858936e-05,
|
|
"epoch": 0.7401764234161988,
|
|
"step": 923
|
|
},
|
|
{
|
|
"loss": 0.0168,
|
|
"grad_norm": 0.21751493215560913,
|
|
"learning_rate": 3.4732958993598154e-05,
|
|
"epoch": 0.7409783480352847,
|
|
"step": 924
|
|
},
|
|
{
|
|
"loss": 0.0316,
|
|
"grad_norm": 0.25100478529930115,
|
|
"learning_rate": 3.453215837309192e-05,
|
|
"epoch": 0.7417802726543705,
|
|
"step": 925
|
|
},
|
|
{
|
|
"loss": 0.0355,
|
|
"grad_norm": 0.23625293374061584,
|
|
"learning_rate": 3.4331818671052906e-05,
|
|
"epoch": 0.7425821972734563,
|
|
"step": 926
|
|
},
|
|
{
|
|
"loss": 0.0158,
|
|
"grad_norm": 0.4031226336956024,
|
|
"learning_rate": 3.413194129794869e-05,
|
|
"epoch": 0.7433841218925421,
|
|
"step": 927
|
|
},
|
|
{
|
|
"loss": 0.0269,
|
|
"grad_norm": 0.27065587043762207,
|
|
"learning_rate": 3.393252766099187e-05,
|
|
"epoch": 0.7441860465116279,
|
|
"step": 928
|
|
},
|
|
{
|
|
"loss": 0.0348,
|
|
"grad_norm": 0.2262876033782959,
|
|
"learning_rate": 3.373357916413016e-05,
|
|
"epoch": 0.7449879711307137,
|
|
"step": 929
|
|
},
|
|
{
|
|
"loss": 0.0519,
|
|
"grad_norm": 0.4427652359008789,
|
|
"learning_rate": 3.353509720803658e-05,
|
|
"epoch": 0.7457898957497995,
|
|
"step": 930
|
|
},
|
|
{
|
|
"loss": 0.0686,
|
|
"grad_norm": 0.46217551827430725,
|
|
"learning_rate": 3.333708319009945e-05,
|
|
"epoch": 0.7465918203688853,
|
|
"step": 931
|
|
},
|
|
{
|
|
"loss": 0.0278,
|
|
"grad_norm": 0.3490634262561798,
|
|
"learning_rate": 3.313953850441266e-05,
|
|
"epoch": 0.7473937449879712,
|
|
"step": 932
|
|
},
|
|
{
|
|
"loss": 0.0184,
|
|
"grad_norm": 0.23873376846313477,
|
|
"learning_rate": 3.294246454176577e-05,
|
|
"epoch": 0.748195669607057,
|
|
"step": 933
|
|
},
|
|
{
|
|
"loss": 0.0297,
|
|
"grad_norm": 0.35058480501174927,
|
|
"learning_rate": 3.274586268963443e-05,
|
|
"epoch": 0.7489975942261428,
|
|
"step": 934
|
|
},
|
|
{
|
|
"loss": 0.0616,
|
|
"grad_norm": 0.7911800742149353,
|
|
"learning_rate": 3.254973433217021e-05,
|
|
"epoch": 0.7497995188452286,
|
|
"step": 935
|
|
},
|
|
{
|
|
"loss": 0.0355,
|
|
"grad_norm": 0.2944418489933014,
|
|
"learning_rate": 3.2354080850191324e-05,
|
|
"epoch": 0.7506014434643143,
|
|
"step": 936
|
|
},
|
|
{
|
|
"loss": 0.0705,
|
|
"grad_norm": 0.495128333568573,
|
|
"learning_rate": 3.215890362117256e-05,
|
|
"epoch": 0.7514033680834001,
|
|
"step": 937
|
|
},
|
|
{
|
|
"loss": 0.0297,
|
|
"grad_norm": 0.3445483446121216,
|
|
"learning_rate": 3.196420401923566e-05,
|
|
"epoch": 0.7522052927024859,
|
|
"step": 938
|
|
},
|
|
{
|
|
"loss": 0.025,
|
|
"grad_norm": 0.28738442063331604,
|
|
"learning_rate": 3.176998341513989e-05,
|
|
"epoch": 0.7530072173215717,
|
|
"step": 939
|
|
},
|
|
{
|
|
"loss": 0.0215,
|
|
"grad_norm": 0.24774937331676483,
|
|
"learning_rate": 3.157624317627195e-05,
|
|
"epoch": 0.7538091419406576,
|
|
"step": 940
|
|
},
|
|
{
|
|
"loss": 0.0215,
|
|
"grad_norm": 0.2146318256855011,
|
|
"learning_rate": 3.138298466663681e-05,
|
|
"epoch": 0.7546110665597434,
|
|
"step": 941
|
|
},
|
|
{
|
|
"loss": 0.0433,
|
|
"grad_norm": 0.27725639939308167,
|
|
"learning_rate": 3.119020924684762e-05,
|
|
"epoch": 0.7554129911788292,
|
|
"step": 942
|
|
},
|
|
{
|
|
"loss": 0.0978,
|
|
"grad_norm": 0.4043017029762268,
|
|
"learning_rate": 3.099791827411668e-05,
|
|
"epoch": 0.756214915797915,
|
|
"step": 943
|
|
},
|
|
{
|
|
"loss": 0.0499,
|
|
"grad_norm": 0.33598214387893677,
|
|
"learning_rate": 3.080611310224539e-05,
|
|
"epoch": 0.7570168404170008,
|
|
"step": 944
|
|
},
|
|
{
|
|
"loss": 0.0422,
|
|
"grad_norm": 0.5092307925224304,
|
|
"learning_rate": 3.061479508161502e-05,
|
|
"epoch": 0.7578187650360866,
|
|
"step": 945
|
|
},
|
|
{
|
|
"loss": 0.0192,
|
|
"grad_norm": 0.37134242057800293,
|
|
"learning_rate": 3.042396555917707e-05,
|
|
"epoch": 0.7586206896551724,
|
|
"step": 946
|
|
},
|
|
{
|
|
"loss": 0.0539,
|
|
"grad_norm": 0.36531612277030945,
|
|
"learning_rate": 3.0233625878443927e-05,
|
|
"epoch": 0.7594226142742582,
|
|
"step": 947
|
|
},
|
|
{
|
|
"loss": 0.0405,
|
|
"grad_norm": 0.3020681142807007,
|
|
"learning_rate": 3.0043777379479098e-05,
|
|
"epoch": 0.7602245388933441,
|
|
"step": 948
|
|
},
|
|
{
|
|
"loss": 0.093,
|
|
"grad_norm": 0.45718201994895935,
|
|
"learning_rate": 2.985442139888821e-05,
|
|
"epoch": 0.7610264635124299,
|
|
"step": 949
|
|
},
|
|
{
|
|
"loss": 0.0479,
|
|
"grad_norm": 0.26453983783721924,
|
|
"learning_rate": 2.9665559269809217e-05,
|
|
"epoch": 0.7618283881315157,
|
|
"step": 950
|
|
},
|
|
{
|
|
"loss": 0.06,
|
|
"grad_norm": 0.3121758699417114,
|
|
"learning_rate": 2.9477192321903292e-05,
|
|
"epoch": 0.7626303127506014,
|
|
"step": 951
|
|
},
|
|
{
|
|
"loss": 0.0204,
|
|
"grad_norm": 0.20015697181224823,
|
|
"learning_rate": 2.9289321881345254e-05,
|
|
"epoch": 0.7634322373696872,
|
|
"step": 952
|
|
},
|
|
{
|
|
"loss": 0.0371,
|
|
"grad_norm": 0.36291614174842834,
|
|
"learning_rate": 2.9101949270814344e-05,
|
|
"epoch": 0.764234161988773,
|
|
"step": 953
|
|
},
|
|
{
|
|
"loss": 0.0229,
|
|
"grad_norm": 0.2591784596443176,
|
|
"learning_rate": 2.8915075809484904e-05,
|
|
"epoch": 0.7650360866078588,
|
|
"step": 954
|
|
},
|
|
{
|
|
"loss": 0.0261,
|
|
"grad_norm": 0.2614014744758606,
|
|
"learning_rate": 2.872870281301704e-05,
|
|
"epoch": 0.7658380112269446,
|
|
"step": 955
|
|
},
|
|
{
|
|
"loss": 0.0228,
|
|
"grad_norm": 0.3347266912460327,
|
|
"learning_rate": 2.854283159354748e-05,
|
|
"epoch": 0.7666399358460305,
|
|
"step": 956
|
|
},
|
|
{
|
|
"loss": 0.0368,
|
|
"grad_norm": 0.3613988757133484,
|
|
"learning_rate": 2.835746345968012e-05,
|
|
"epoch": 0.7674418604651163,
|
|
"step": 957
|
|
},
|
|
{
|
|
"loss": 0.0175,
|
|
"grad_norm": 0.2561342716217041,
|
|
"learning_rate": 2.8172599716477143e-05,
|
|
"epoch": 0.7682437850842021,
|
|
"step": 958
|
|
},
|
|
{
|
|
"loss": 0.0396,
|
|
"grad_norm": 0.2864450216293335,
|
|
"learning_rate": 2.7988241665449354e-05,
|
|
"epoch": 0.7690457097032879,
|
|
"step": 959
|
|
},
|
|
{
|
|
"loss": 0.0294,
|
|
"grad_norm": 0.2632593512535095,
|
|
"learning_rate": 2.7804390604547557e-05,
|
|
"epoch": 0.7698476343223737,
|
|
"step": 960
|
|
},
|
|
{
|
|
"loss": 0.0252,
|
|
"grad_norm": 0.2442079782485962,
|
|
"learning_rate": 2.7621047828153e-05,
|
|
"epoch": 0.7706495589414595,
|
|
"step": 961
|
|
},
|
|
{
|
|
"loss": 0.027,
|
|
"grad_norm": 0.30140507221221924,
|
|
"learning_rate": 2.7438214627068448e-05,
|
|
"epoch": 0.7714514835605453,
|
|
"step": 962
|
|
},
|
|
{
|
|
"loss": 0.0602,
|
|
"grad_norm": 0.4731035828590393,
|
|
"learning_rate": 2.7255892288509043e-05,
|
|
"epoch": 0.7722534081796311,
|
|
"step": 963
|
|
},
|
|
{
|
|
"loss": 0.0201,
|
|
"grad_norm": 0.25631648302078247,
|
|
"learning_rate": 2.707408209609339e-05,
|
|
"epoch": 0.773055332798717,
|
|
"step": 964
|
|
},
|
|
{
|
|
"loss": 0.0316,
|
|
"grad_norm": 0.2207869291305542,
|
|
"learning_rate": 2.689278532983416e-05,
|
|
"epoch": 0.7738572574178028,
|
|
"step": 965
|
|
},
|
|
{
|
|
"loss": 0.0997,
|
|
"grad_norm": 0.574215292930603,
|
|
"learning_rate": 2.6712003266129525e-05,
|
|
"epoch": 0.7746591820368885,
|
|
"step": 966
|
|
},
|
|
{
|
|
"loss": 0.107,
|
|
"grad_norm": 0.5775609612464905,
|
|
"learning_rate": 2.65317371777538e-05,
|
|
"epoch": 0.7754611066559743,
|
|
"step": 967
|
|
},
|
|
{
|
|
"loss": 0.028,
|
|
"grad_norm": 0.37875795364379883,
|
|
"learning_rate": 2.6351988333848788e-05,
|
|
"epoch": 0.7762630312750601,
|
|
"step": 968
|
|
},
|
|
{
|
|
"loss": 0.026,
|
|
"grad_norm": 0.2766773998737335,
|
|
"learning_rate": 2.6172757999914554e-05,
|
|
"epoch": 0.7770649558941459,
|
|
"step": 969
|
|
},
|
|
{
|
|
"loss": 0.066,
|
|
"grad_norm": 0.3758871555328369,
|
|
"learning_rate": 2.5994047437800706e-05,
|
|
"epoch": 0.7778668805132317,
|
|
"step": 970
|
|
},
|
|
{
|
|
"loss": 0.019,
|
|
"grad_norm": 0.2001071274280548,
|
|
"learning_rate": 2.5815857905697548e-05,
|
|
"epoch": 0.7786688051323175,
|
|
"step": 971
|
|
},
|
|
{
|
|
"loss": 0.0469,
|
|
"grad_norm": 0.35508066415786743,
|
|
"learning_rate": 2.5638190658126938e-05,
|
|
"epoch": 0.7794707297514034,
|
|
"step": 972
|
|
},
|
|
{
|
|
"loss": 0.05,
|
|
"grad_norm": 0.4158821403980255,
|
|
"learning_rate": 2.5461046945933854e-05,
|
|
"epoch": 0.7802726543704892,
|
|
"step": 973
|
|
},
|
|
{
|
|
"loss": 0.0471,
|
|
"grad_norm": 0.4256257712841034,
|
|
"learning_rate": 2.5284428016277284e-05,
|
|
"epoch": 0.781074578989575,
|
|
"step": 974
|
|
},
|
|
{
|
|
"loss": 0.0345,
|
|
"grad_norm": 0.4111097753047943,
|
|
"learning_rate": 2.5108335112621562e-05,
|
|
"epoch": 0.7818765036086608,
|
|
"step": 975
|
|
},
|
|
{
|
|
"loss": 0.0287,
|
|
"grad_norm": 0.2494903802871704,
|
|
"learning_rate": 2.493276947472756e-05,
|
|
"epoch": 0.7826784282277466,
|
|
"step": 976
|
|
},
|
|
{
|
|
"loss": 0.0141,
|
|
"grad_norm": 0.3150325119495392,
|
|
"learning_rate": 2.4757732338644124e-05,
|
|
"epoch": 0.7834803528468324,
|
|
"step": 977
|
|
},
|
|
{
|
|
"loss": 0.0281,
|
|
"grad_norm": 0.33352166414260864,
|
|
"learning_rate": 2.458322493669911e-05,
|
|
"epoch": 0.7842822774659182,
|
|
"step": 978
|
|
},
|
|
{
|
|
"loss": 0.0404,
|
|
"grad_norm": 0.2655154764652252,
|
|
"learning_rate": 2.4409248497490922e-05,
|
|
"epoch": 0.785084202085004,
|
|
"step": 979
|
|
},
|
|
{
|
|
"loss": 0.0338,
|
|
"grad_norm": 0.4300474524497986,
|
|
"learning_rate": 2.4235804245879723e-05,
|
|
"epoch": 0.7858861267040899,
|
|
"step": 980
|
|
},
|
|
{
|
|
"loss": 0.0173,
|
|
"grad_norm": 0.18177032470703125,
|
|
"learning_rate": 2.4062893402978958e-05,
|
|
"epoch": 0.7866880513231757,
|
|
"step": 981
|
|
},
|
|
{
|
|
"loss": 0.0422,
|
|
"grad_norm": 0.3139914572238922,
|
|
"learning_rate": 2.389051718614662e-05,
|
|
"epoch": 0.7874899759422614,
|
|
"step": 982
|
|
},
|
|
{
|
|
"loss": 0.0139,
|
|
"grad_norm": 0.16391263902187347,
|
|
"learning_rate": 2.371867680897668e-05,
|
|
"epoch": 0.7882919005613472,
|
|
"step": 983
|
|
},
|
|
{
|
|
"loss": 0.0496,
|
|
"grad_norm": 0.3264078199863434,
|
|
"learning_rate": 2.354737348129077e-05,
|
|
"epoch": 0.789093825180433,
|
|
"step": 984
|
|
},
|
|
{
|
|
"loss": 0.0342,
|
|
"grad_norm": 0.3129443824291229,
|
|
"learning_rate": 2.337660840912923e-05,
|
|
"epoch": 0.7898957497995188,
|
|
"step": 985
|
|
},
|
|
{
|
|
"loss": 0.0892,
|
|
"grad_norm": 0.5504446029663086,
|
|
"learning_rate": 2.320638279474312e-05,
|
|
"epoch": 0.7906976744186046,
|
|
"step": 986
|
|
},
|
|
{
|
|
"loss": 0.0212,
|
|
"grad_norm": 0.2645837664604187,
|
|
"learning_rate": 2.3036697836585353e-05,
|
|
"epoch": 0.7914995990376904,
|
|
"step": 987
|
|
},
|
|
{
|
|
"loss": 0.0374,
|
|
"grad_norm": 0.3089625835418701,
|
|
"learning_rate": 2.2867554729302542e-05,
|
|
"epoch": 0.7923015236567763,
|
|
"step": 988
|
|
},
|
|
{
|
|
"loss": 0.068,
|
|
"grad_norm": 0.5308516025543213,
|
|
"learning_rate": 2.26989546637263e-05,
|
|
"epoch": 0.7931034482758621,
|
|
"step": 989
|
|
},
|
|
{
|
|
"loss": 0.0077,
|
|
"grad_norm": 0.20258042216300964,
|
|
"learning_rate": 2.25308988268652e-05,
|
|
"epoch": 0.7939053728949479,
|
|
"step": 990
|
|
},
|
|
{
|
|
"loss": 0.0377,
|
|
"grad_norm": 0.3660711348056793,
|
|
"learning_rate": 2.2363388401896124e-05,
|
|
"epoch": 0.7947072975140337,
|
|
"step": 991
|
|
},
|
|
{
|
|
"loss": 0.037,
|
|
"grad_norm": 0.3499682545661926,
|
|
"learning_rate": 2.2196424568156073e-05,
|
|
"epoch": 0.7955092221331195,
|
|
"step": 992
|
|
},
|
|
{
|
|
"eval_loss": 0.039402980357408524,
|
|
"eval_runtime": 31.7538,
|
|
"eval_samples_per_second": 33.067,
|
|
"eval_steps_per_second": 8.282,
|
|
"epoch": 0.7955092221331195,
|
|
"step": 992
|
|
},
|
|
{
|
|
"loss": 0.0333,
|
|
"grad_norm": 0.259440541267395,
|
|
"learning_rate": 2.2030008501133815e-05,
|
|
"epoch": 0.7963111467522053,
|
|
"step": 993
|
|
},
|
|
{
|
|
"loss": 0.0071,
|
|
"grad_norm": 0.16091641783714294,
|
|
"learning_rate": 2.186414137246172e-05,
|
|
"epoch": 0.7971130713712911,
|
|
"step": 994
|
|
},
|
|
{
|
|
"loss": 0.0539,
|
|
"grad_norm": 0.4330938160419464,
|
|
"learning_rate": 2.1698824349907344e-05,
|
|
"epoch": 0.7979149959903769,
|
|
"step": 995
|
|
},
|
|
{
|
|
"loss": 0.0287,
|
|
"grad_norm": 0.2377873659133911,
|
|
"learning_rate": 2.153405859736528e-05,
|
|
"epoch": 0.7987169206094628,
|
|
"step": 996
|
|
},
|
|
{
|
|
"loss": 0.0286,
|
|
"grad_norm": 0.30016374588012695,
|
|
"learning_rate": 2.136984527484901e-05,
|
|
"epoch": 0.7995188452285485,
|
|
"step": 997
|
|
},
|
|
{
|
|
"loss": 0.0644,
|
|
"grad_norm": 0.3702533543109894,
|
|
"learning_rate": 2.1206185538482703e-05,
|
|
"epoch": 0.8003207698476343,
|
|
"step": 998
|
|
},
|
|
{
|
|
"loss": 0.0425,
|
|
"grad_norm": 0.31753045320510864,
|
|
"learning_rate": 2.1043080540493056e-05,
|
|
"epoch": 0.8011226944667201,
|
|
"step": 999
|
|
},
|
|
{
|
|
"loss": 0.0495,
|
|
"grad_norm": 0.26273587346076965,
|
|
"learning_rate": 2.0880531429201145e-05,
|
|
"epoch": 0.8019246190858059,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"loss": 0.0328,
|
|
"grad_norm": 0.2284964621067047,
|
|
"learning_rate": 2.0718539349014544e-05,
|
|
"epoch": 0.8027265437048917,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"loss": 0.0299,
|
|
"grad_norm": 0.31368839740753174,
|
|
"learning_rate": 2.05571054404189e-05,
|
|
"epoch": 0.8035284683239775,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"loss": 0.0734,
|
|
"grad_norm": 0.5880581736564636,
|
|
"learning_rate": 2.039623083997031e-05,
|
|
"epoch": 0.8043303929430633,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"loss": 0.0626,
|
|
"grad_norm": 0.6813879609107971,
|
|
"learning_rate": 2.0235916680287015e-05,
|
|
"epoch": 0.8051323175621492,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"loss": 0.0493,
|
|
"grad_norm": 0.8034153580665588,
|
|
"learning_rate": 2.007616409004165e-05,
|
|
"epoch": 0.805934242181235,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"loss": 0.0275,
|
|
"grad_norm": 0.2636951208114624,
|
|
"learning_rate": 1.991697419395301e-05,
|
|
"epoch": 0.8067361668003208,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"loss": 0.0725,
|
|
"grad_norm": 0.7305841445922852,
|
|
"learning_rate": 1.97583481127785e-05,
|
|
"epoch": 0.8075380914194066,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"loss": 0.0484,
|
|
"grad_norm": 0.3095945119857788,
|
|
"learning_rate": 1.9600286963305957e-05,
|
|
"epoch": 0.8083400160384924,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"loss": 0.0204,
|
|
"grad_norm": 0.21153652667999268,
|
|
"learning_rate": 1.9442791858345887e-05,
|
|
"epoch": 0.8091419406575782,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"loss": 0.0456,
|
|
"grad_norm": 0.2735914885997772,
|
|
"learning_rate": 1.928586390672361e-05,
|
|
"epoch": 0.809943865276664,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"loss": 0.0288,
|
|
"grad_norm": 0.2760597765445709,
|
|
"learning_rate": 1.9129504213271564e-05,
|
|
"epoch": 0.8107457898957497,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"loss": 0.0254,
|
|
"grad_norm": 0.25427863001823425,
|
|
"learning_rate": 1.897371387882134e-05,
|
|
"epoch": 0.8115477145148356,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"loss": 0.0204,
|
|
"grad_norm": 0.23352181911468506,
|
|
"learning_rate": 1.881849400019602e-05,
|
|
"epoch": 0.8123496391339214,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"loss": 0.0979,
|
|
"grad_norm": 0.5435330867767334,
|
|
"learning_rate": 1.8663845670202563e-05,
|
|
"epoch": 0.8131515637530072,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"loss": 0.025,
|
|
"grad_norm": 0.2618383467197418,
|
|
"learning_rate": 1.85097699776239e-05,
|
|
"epoch": 0.813953488372093,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"loss": 0.024,
|
|
"grad_norm": 0.26069387793540955,
|
|
"learning_rate": 1.835626800721144e-05,
|
|
"epoch": 0.8147554129911788,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"loss": 0.0237,
|
|
"grad_norm": 0.2411407083272934,
|
|
"learning_rate": 1.8203340839677308e-05,
|
|
"epoch": 0.8155573376102646,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"loss": 0.0388,
|
|
"grad_norm": 0.4038242697715759,
|
|
"learning_rate": 1.8050989551686914e-05,
|
|
"epoch": 0.8163592622293504,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"loss": 0.068,
|
|
"grad_norm": 0.5744785070419312,
|
|
"learning_rate": 1.7899215215851084e-05,
|
|
"epoch": 0.8171611868484362,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"loss": 0.0195,
|
|
"grad_norm": 0.23403626680374146,
|
|
"learning_rate": 1.7748018900718854e-05,
|
|
"epoch": 0.8179631114675221,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"loss": 0.0095,
|
|
"grad_norm": 0.11110510677099228,
|
|
"learning_rate": 1.7597401670769685e-05,
|
|
"epoch": 0.8187650360866079,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"loss": 0.0321,
|
|
"grad_norm": 0.38874614238739014,
|
|
"learning_rate": 1.7447364586406066e-05,
|
|
"epoch": 0.8195669607056937,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"loss": 0.083,
|
|
"grad_norm": 0.46676599979400635,
|
|
"learning_rate": 1.729790870394603e-05,
|
|
"epoch": 0.8203688853247795,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"loss": 0.0314,
|
|
"grad_norm": 0.4288753569126129,
|
|
"learning_rate": 1.7149035075615794e-05,
|
|
"epoch": 0.8211708099438653,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"loss": 0.0651,
|
|
"grad_norm": 0.29615238308906555,
|
|
"learning_rate": 1.7000744749542208e-05,
|
|
"epoch": 0.8219727345629511,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"loss": 0.068,
|
|
"grad_norm": 0.4459689259529114,
|
|
"learning_rate": 1.6853038769745467e-05,
|
|
"epoch": 0.8227746591820368,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"loss": 0.0259,
|
|
"grad_norm": 0.22963477671146393,
|
|
"learning_rate": 1.670591817613181e-05,
|
|
"epoch": 0.8235765838011226,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"loss": 0.0222,
|
|
"grad_norm": 0.2704809010028839,
|
|
"learning_rate": 1.6559384004486055e-05,
|
|
"epoch": 0.8243785084202085,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"loss": 0.028,
|
|
"grad_norm": 0.28117361664772034,
|
|
"learning_rate": 1.6413437286464417e-05,
|
|
"epoch": 0.8251804330392943,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"loss": 0.0303,
|
|
"grad_norm": 0.22778946161270142,
|
|
"learning_rate": 1.6268079049587203e-05,
|
|
"epoch": 0.8259823576583801,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"loss": 0.1011,
|
|
"grad_norm": 0.7209060788154602,
|
|
"learning_rate": 1.6123310317231643e-05,
|
|
"epoch": 0.8267842822774659,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"loss": 0.0223,
|
|
"grad_norm": 0.383091002702713,
|
|
"learning_rate": 1.5979132108624574e-05,
|
|
"epoch": 0.8275862068965517,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"loss": 0.0628,
|
|
"grad_norm": 0.5048542618751526,
|
|
"learning_rate": 1.583554543883532e-05,
|
|
"epoch": 0.8283881315156375,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"loss": 0.0255,
|
|
"grad_norm": 0.21313592791557312,
|
|
"learning_rate": 1.5692551318768556e-05,
|
|
"epoch": 0.8291900561347233,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"loss": 0.0251,
|
|
"grad_norm": 0.2026532143354416,
|
|
"learning_rate": 1.5550150755157268e-05,
|
|
"epoch": 0.8299919807538091,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"loss": 0.0779,
|
|
"grad_norm": 0.3825243413448334,
|
|
"learning_rate": 1.5408344750555383e-05,
|
|
"epoch": 0.830793905372895,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"loss": 0.0298,
|
|
"grad_norm": 0.31827080249786377,
|
|
"learning_rate": 1.5267134303331122e-05,
|
|
"epoch": 0.8315958299919808,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"loss": 0.0338,
|
|
"grad_norm": 0.25245165824890137,
|
|
"learning_rate": 1.5126520407659617e-05,
|
|
"epoch": 0.8323977546110666,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"loss": 0.026,
|
|
"grad_norm": 0.22130174934864044,
|
|
"learning_rate": 1.4986504053516105e-05,
|
|
"epoch": 0.8331996792301524,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"loss": 0.0215,
|
|
"grad_norm": 0.22145700454711914,
|
|
"learning_rate": 1.4847086226668872e-05,
|
|
"epoch": 0.8340016038492382,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"loss": 0.0759,
|
|
"grad_norm": 0.42060795426368713,
|
|
"learning_rate": 1.4708267908672401e-05,
|
|
"epoch": 0.834803528468324,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"loss": 0.0294,
|
|
"grad_norm": 0.32257431745529175,
|
|
"learning_rate": 1.4570050076860342e-05,
|
|
"epoch": 0.8356054530874097,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"loss": 0.0242,
|
|
"grad_norm": 0.16260656714439392,
|
|
"learning_rate": 1.4432433704338722e-05,
|
|
"epoch": 0.8364073777064955,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"loss": 0.0232,
|
|
"grad_norm": 0.2972884476184845,
|
|
"learning_rate": 1.429541975997908e-05,
|
|
"epoch": 0.8372093023255814,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"loss": 0.0321,
|
|
"grad_norm": 0.3135718107223511,
|
|
"learning_rate": 1.415900920841161e-05,
|
|
"epoch": 0.8380112269446672,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"loss": 0.0203,
|
|
"grad_norm": 0.21164710819721222,
|
|
"learning_rate": 1.4023203010018394e-05,
|
|
"epoch": 0.838813151563753,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"loss": 0.021,
|
|
"grad_norm": 0.2070372849702835,
|
|
"learning_rate": 1.3888002120926623e-05,
|
|
"epoch": 0.8396150761828388,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"loss": 0.0428,
|
|
"grad_norm": 0.39735156297683716,
|
|
"learning_rate": 1.3753407493001968e-05,
|
|
"epoch": 0.8404170008019246,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"loss": 0.02,
|
|
"grad_norm": 0.23848125338554382,
|
|
"learning_rate": 1.3619420073841637e-05,
|
|
"epoch": 0.8412189254210104,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"loss": 0.056,
|
|
"grad_norm": 0.3926337957382202,
|
|
"learning_rate": 1.3486040806767996e-05,
|
|
"epoch": 0.8420208500400962,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"loss": 0.0319,
|
|
"grad_norm": 0.2710540294647217,
|
|
"learning_rate": 1.3353270630821712e-05,
|
|
"epoch": 0.842822774659182,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"loss": 0.0402,
|
|
"grad_norm": 0.41882696747779846,
|
|
"learning_rate": 1.3221110480755305e-05,
|
|
"epoch": 0.8436246992782679,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"loss": 0.0475,
|
|
"grad_norm": 0.4259793162345886,
|
|
"learning_rate": 1.3089561287026319e-05,
|
|
"epoch": 0.8444266238973537,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"loss": 0.0314,
|
|
"grad_norm": 0.26140275597572327,
|
|
"learning_rate": 1.2958623975791118e-05,
|
|
"epoch": 0.8452285485164395,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"loss": 0.0476,
|
|
"grad_norm": 0.3547419309616089,
|
|
"learning_rate": 1.2828299468898076e-05,
|
|
"epoch": 0.8460304731355253,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"loss": 0.0493,
|
|
"grad_norm": 0.40817224979400635,
|
|
"learning_rate": 1.2698588683881186e-05,
|
|
"epoch": 0.846832397754611,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"loss": 0.0312,
|
|
"grad_norm": 0.2400854080915451,
|
|
"learning_rate": 1.2569492533953665e-05,
|
|
"epoch": 0.8476343223736968,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"loss": 0.0251,
|
|
"grad_norm": 0.3751298785209656,
|
|
"learning_rate": 1.2441011928001433e-05,
|
|
"epoch": 0.8484362469927826,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"loss": 0.0223,
|
|
"grad_norm": 0.2388581931591034,
|
|
"learning_rate": 1.2313147770576749e-05,
|
|
"epoch": 0.8492381716118684,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"loss": 0.0415,
|
|
"grad_norm": 0.3184750974178314,
|
|
"learning_rate": 1.2185900961891794e-05,
|
|
"epoch": 0.8500400962309543,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"loss": 0.0818,
|
|
"grad_norm": 0.5252367258071899,
|
|
"learning_rate": 1.2059272397812493e-05,
|
|
"epoch": 0.8508420208500401,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"loss": 0.0277,
|
|
"grad_norm": 0.3043983578681946,
|
|
"learning_rate": 1.1933262969851988e-05,
|
|
"epoch": 0.8516439454691259,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"loss": 0.0106,
|
|
"grad_norm": 0.09968919306993484,
|
|
"learning_rate": 1.1807873565164506e-05,
|
|
"epoch": 0.8524458700882117,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"loss": 0.0365,
|
|
"grad_norm": 0.27977362275123596,
|
|
"learning_rate": 1.1683105066539068e-05,
|
|
"epoch": 0.8532477947072975,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"loss": 0.0589,
|
|
"grad_norm": 0.3934069275856018,
|
|
"learning_rate": 1.1558958352393334e-05,
|
|
"epoch": 0.8540497193263833,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"loss": 0.0272,
|
|
"grad_norm": 0.24241115152835846,
|
|
"learning_rate": 1.1435434296767233e-05,
|
|
"epoch": 0.8548516439454691,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"loss": 0.0502,
|
|
"grad_norm": 0.31850865483283997,
|
|
"learning_rate": 1.1312533769317103e-05,
|
|
"epoch": 0.8556535685645549,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"loss": 0.0295,
|
|
"grad_norm": 0.2685650885105133,
|
|
"learning_rate": 1.1190257635309275e-05,
|
|
"epoch": 0.8564554931836408,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"loss": 0.0346,
|
|
"grad_norm": 0.20911841094493866,
|
|
"learning_rate": 1.106860675561424e-05,
|
|
"epoch": 0.8572574178027266,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"loss": 0.0187,
|
|
"grad_norm": 0.24455945193767548,
|
|
"learning_rate": 1.0947581986700306e-05,
|
|
"epoch": 0.8580593424218124,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"loss": 0.0396,
|
|
"grad_norm": 0.3328882157802582,
|
|
"learning_rate": 1.0827184180627858e-05,
|
|
"epoch": 0.8588612670408982,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"loss": 0.0463,
|
|
"grad_norm": 0.4071497917175293,
|
|
"learning_rate": 1.0707414185043163e-05,
|
|
"epoch": 0.859663191659984,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"loss": 0.0366,
|
|
"grad_norm": 0.411491334438324,
|
|
"learning_rate": 1.0588272843172454e-05,
|
|
"epoch": 0.8604651162790697,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"loss": 0.0312,
|
|
"grad_norm": 0.4028700888156891,
|
|
"learning_rate": 1.0469760993816057e-05,
|
|
"epoch": 0.8612670408981555,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"loss": 0.0297,
|
|
"grad_norm": 0.17155224084854126,
|
|
"learning_rate": 1.0351879471342374e-05,
|
|
"epoch": 0.8620689655172413,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"loss": 0.047,
|
|
"grad_norm": 0.3104127049446106,
|
|
"learning_rate": 1.0234629105682103e-05,
|
|
"epoch": 0.8628708901363272,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"loss": 0.0207,
|
|
"grad_norm": 0.2133323699235916,
|
|
"learning_rate": 1.0118010722322314e-05,
|
|
"epoch": 0.863672814755413,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"loss": 0.0143,
|
|
"grad_norm": 0.17464157938957214,
|
|
"learning_rate": 1.0002025142300765e-05,
|
|
"epoch": 0.8644747393744988,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"loss": 0.0289,
|
|
"grad_norm": 0.3865419328212738,
|
|
"learning_rate": 9.886673182199957e-06,
|
|
"epoch": 0.8652766639935846,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"loss": 0.0185,
|
|
"grad_norm": 0.2113240659236908,
|
|
"learning_rate": 9.771955654141496e-06,
|
|
"epoch": 0.8660785886126704,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"loss": 0.0135,
|
|
"grad_norm": 0.138031005859375,
|
|
"learning_rate": 9.657873365780323e-06,
|
|
"epoch": 0.8668805132317562,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"loss": 0.0308,
|
|
"grad_norm": 0.3399095833301544,
|
|
"learning_rate": 9.544427120299138e-06,
|
|
"epoch": 0.867682437850842,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"loss": 0.0146,
|
|
"grad_norm": 0.26471027731895447,
|
|
"learning_rate": 9.431617716402507e-06,
|
|
"epoch": 0.8684843624699278,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"loss": 0.0202,
|
|
"grad_norm": 0.19661951065063477,
|
|
"learning_rate": 9.319445948311534e-06,
|
|
"epoch": 0.8692862870890137,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"loss": 0.0636,
|
|
"grad_norm": 0.49121007323265076,
|
|
"learning_rate": 9.207912605758052e-06,
|
|
"epoch": 0.8700882117080995,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"loss": 0.0532,
|
|
"grad_norm": 0.4061635434627533,
|
|
"learning_rate": 9.097018473979124e-06,
|
|
"epoch": 0.8708901363271853,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"loss": 0.0411,
|
|
"grad_norm": 0.29139432311058044,
|
|
"learning_rate": 8.986764333711584e-06,
|
|
"epoch": 0.871692060946271,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"loss": 0.0191,
|
|
"grad_norm": 0.14276857674121857,
|
|
"learning_rate": 8.87715096118642e-06,
|
|
"epoch": 0.8724939855653568,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"loss": 0.0147,
|
|
"grad_norm": 0.19102653861045837,
|
|
"learning_rate": 8.768179128123455e-06,
|
|
"epoch": 0.8732959101844426,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"loss": 0.106,
|
|
"grad_norm": 0.6417858004570007,
|
|
"learning_rate": 8.659849601725701e-06,
|
|
"epoch": 0.8740978348035284,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"loss": 0.0448,
|
|
"grad_norm": 0.3078593313694,
|
|
"learning_rate": 8.55216314467422e-06,
|
|
"epoch": 0.8748997594226142,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"loss": 0.0169,
|
|
"grad_norm": 0.22024403512477875,
|
|
"learning_rate": 8.445120515122551e-06,
|
|
"epoch": 0.8757016840417001,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"loss": 0.0637,
|
|
"grad_norm": 0.4088142514228821,
|
|
"learning_rate": 8.338722466691451e-06,
|
|
"epoch": 0.8765036086607859,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"loss": 0.0353,
|
|
"grad_norm": 0.2129702866077423,
|
|
"learning_rate": 8.23296974846357e-06,
|
|
"epoch": 0.8773055332798717,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"loss": 0.0619,
|
|
"grad_norm": 0.438853919506073,
|
|
"learning_rate": 8.127863104978261e-06,
|
|
"epoch": 0.8781074578989575,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"loss": 0.0339,
|
|
"grad_norm": 0.30360084772109985,
|
|
"learning_rate": 8.023403276226126e-06,
|
|
"epoch": 0.8789093825180433,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"loss": 0.0257,
|
|
"grad_norm": 0.2785508930683136,
|
|
"learning_rate": 7.91959099764411e-06,
|
|
"epoch": 0.8797113071371291,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"loss": 0.0355,
|
|
"grad_norm": 0.2872433066368103,
|
|
"learning_rate": 7.816427000110015e-06,
|
|
"epoch": 0.8805132317562149,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"loss": 0.0103,
|
|
"grad_norm": 0.11933287978172302,
|
|
"learning_rate": 7.713912009937608e-06,
|
|
"epoch": 0.8813151563753007,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"loss": 0.0392,
|
|
"grad_norm": 0.26215773820877075,
|
|
"learning_rate": 7.612046748871327e-06,
|
|
"epoch": 0.8821170809943866,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"loss": 0.0147,
|
|
"grad_norm": 1.0445473194122314,
|
|
"learning_rate": 7.5108319340813085e-06,
|
|
"epoch": 0.8829190056134724,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"loss": 0.0212,
|
|
"grad_norm": 0.18120186030864716,
|
|
"learning_rate": 7.410268278158272e-06,
|
|
"epoch": 0.8837209302325582,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"loss": 0.0471,
|
|
"grad_norm": 0.37671101093292236,
|
|
"learning_rate": 7.310356489108538e-06,
|
|
"epoch": 0.884522854851644,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"loss": 0.0455,
|
|
"grad_norm": 0.38250431418418884,
|
|
"learning_rate": 7.211097270349066e-06,
|
|
"epoch": 0.8853247794707297,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"loss": 0.0233,
|
|
"grad_norm": 0.36999034881591797,
|
|
"learning_rate": 7.112491320702441e-06,
|
|
"epoch": 0.8861267040898155,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"loss": 0.0264,
|
|
"grad_norm": 0.2825574278831482,
|
|
"learning_rate": 7.014539334392012e-06,
|
|
"epoch": 0.8869286287089013,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"loss": 0.064,
|
|
"grad_norm": 0.35153815150260925,
|
|
"learning_rate": 6.917242001036917e-06,
|
|
"epoch": 0.8877305533279871,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"loss": 0.0188,
|
|
"grad_norm": 0.14561624825000763,
|
|
"learning_rate": 6.820600005647382e-06,
|
|
"epoch": 0.888532477947073,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"loss": 0.0561,
|
|
"grad_norm": 0.463888555765152,
|
|
"learning_rate": 6.7246140286197355e-06,
|
|
"epoch": 0.8893344025661588,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"loss": 0.0041,
|
|
"grad_norm": 0.0676012933254242,
|
|
"learning_rate": 6.629284745731701e-06,
|
|
"epoch": 0.8901363271852446,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"loss": 0.0159,
|
|
"grad_norm": 0.18029426038265228,
|
|
"learning_rate": 6.5346128281376204e-06,
|
|
"epoch": 0.8909382518043304,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"loss": 0.063,
|
|
"grad_norm": 0.35441911220550537,
|
|
"learning_rate": 6.440598942363796e-06,
|
|
"epoch": 0.8917401764234162,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"loss": 0.0502,
|
|
"grad_norm": 0.37630465626716614,
|
|
"learning_rate": 6.347243750303622e-06,
|
|
"epoch": 0.892542101042502,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"loss": 0.0604,
|
|
"grad_norm": 0.3076138496398926,
|
|
"learning_rate": 6.254547909213149e-06,
|
|
"epoch": 0.8933440256615878,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"loss": 0.0382,
|
|
"grad_norm": 0.2812439203262329,
|
|
"learning_rate": 6.162512071706272e-06,
|
|
"epoch": 0.8941459502806736,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"loss": 0.0421,
|
|
"grad_norm": 0.26998552680015564,
|
|
"learning_rate": 6.071136885750272e-06,
|
|
"epoch": 0.8949478748997595,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"eval_loss": 0.03864981606602669,
|
|
"eval_runtime": 31.6055,
|
|
"eval_samples_per_second": 33.222,
|
|
"eval_steps_per_second": 8.321,
|
|
"epoch": 0.8949478748997595,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"loss": 0.0632,
|
|
"grad_norm": 0.4778763949871063,
|
|
"learning_rate": 5.980422994661139e-06,
|
|
"epoch": 0.8957497995188453,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"loss": 0.0152,
|
|
"grad_norm": 0.18333400785923004,
|
|
"learning_rate": 5.890371037099107e-06,
|
|
"epoch": 0.896551724137931,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"loss": 0.027,
|
|
"grad_norm": 0.28018802404403687,
|
|
"learning_rate": 5.800981647064186e-06,
|
|
"epoch": 0.8973536487570168,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"loss": 0.0281,
|
|
"grad_norm": 0.2619428336620331,
|
|
"learning_rate": 5.71225545389158e-06,
|
|
"epoch": 0.8981555733761026,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"loss": 0.0274,
|
|
"grad_norm": 0.2673223912715912,
|
|
"learning_rate": 5.624193082247431e-06,
|
|
"epoch": 0.8989574979951884,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"loss": 0.0482,
|
|
"grad_norm": 0.2983281910419464,
|
|
"learning_rate": 5.536795152124252e-06,
|
|
"epoch": 0.8997594226142742,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"loss": 0.0384,
|
|
"grad_norm": 0.30985018610954285,
|
|
"learning_rate": 5.450062278836677e-06,
|
|
"epoch": 0.90056134723336,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"loss": 0.0169,
|
|
"grad_norm": 0.1679602563381195,
|
|
"learning_rate": 5.363995073017047e-06,
|
|
"epoch": 0.9013632718524459,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"loss": 0.0396,
|
|
"grad_norm": 0.29093822836875916,
|
|
"learning_rate": 5.278594140611204e-06,
|
|
"epoch": 0.9021651964715317,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"loss": 0.0218,
|
|
"grad_norm": 0.26348739862442017,
|
|
"learning_rate": 5.193860082874125e-06,
|
|
"epoch": 0.9029671210906175,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"loss": 0.0458,
|
|
"grad_norm": 0.5017328262329102,
|
|
"learning_rate": 5.1097934963657665e-06,
|
|
"epoch": 0.9037690457097033,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"loss": 0.0321,
|
|
"grad_norm": 0.3663092851638794,
|
|
"learning_rate": 5.026394972946813e-06,
|
|
"epoch": 0.9045709703287891,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"loss": 0.0308,
|
|
"grad_norm": 0.28317686915397644,
|
|
"learning_rate": 4.943665099774553e-06,
|
|
"epoch": 0.9053728949478749,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"loss": 0.0492,
|
|
"grad_norm": 0.4701959490776062,
|
|
"learning_rate": 4.861604459298696e-06,
|
|
"epoch": 0.9061748195669607,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"loss": 0.0677,
|
|
"grad_norm": 0.5933964252471924,
|
|
"learning_rate": 4.780213629257324e-06,
|
|
"epoch": 0.9069767441860465,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"loss": 0.0532,
|
|
"grad_norm": 0.35850825905799866,
|
|
"learning_rate": 4.69949318267281e-06,
|
|
"epoch": 0.9077786688051324,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"loss": 0.0429,
|
|
"grad_norm": 0.3367816209793091,
|
|
"learning_rate": 4.619443687847702e-06,
|
|
"epoch": 0.9085805934242182,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"loss": 0.0223,
|
|
"grad_norm": 0.3213984966278076,
|
|
"learning_rate": 4.540065708360886e-06,
|
|
"epoch": 0.909382518043304,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"loss": 0.0452,
|
|
"grad_norm": 0.4436606168746948,
|
|
"learning_rate": 4.461359803063458e-06,
|
|
"epoch": 0.9101844426623897,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"loss": 0.0179,
|
|
"grad_norm": 0.3105975389480591,
|
|
"learning_rate": 4.383326526074916e-06,
|
|
"epoch": 0.9109863672814755,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"loss": 0.0313,
|
|
"grad_norm": 0.3733506202697754,
|
|
"learning_rate": 4.305966426779118e-06,
|
|
"epoch": 0.9117882919005613,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"loss": 0.0142,
|
|
"grad_norm": 0.28305160999298096,
|
|
"learning_rate": 4.229280049820561e-06,
|
|
"epoch": 0.9125902165196471,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"loss": 0.0351,
|
|
"grad_norm": 0.431251585483551,
|
|
"learning_rate": 4.15326793510048e-06,
|
|
"epoch": 0.9133921411387329,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"loss": 0.0231,
|
|
"grad_norm": 0.20681129395961761,
|
|
"learning_rate": 4.077930617773007e-06,
|
|
"epoch": 0.9141940657578188,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"loss": 0.0448,
|
|
"grad_norm": 0.314126193523407,
|
|
"learning_rate": 4.003268628241452e-06,
|
|
"epoch": 0.9149959903769046,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"loss": 0.0495,
|
|
"grad_norm": 0.3883209228515625,
|
|
"learning_rate": 3.929282492154607e-06,
|
|
"epoch": 0.9157979149959904,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"loss": 0.0204,
|
|
"grad_norm": 0.28020593523979187,
|
|
"learning_rate": 3.855972730402968e-06,
|
|
"epoch": 0.9165998396150762,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"loss": 0.0161,
|
|
"grad_norm": 0.23583762347698212,
|
|
"learning_rate": 3.783339859115065e-06,
|
|
"epoch": 0.917401764234162,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"loss": 0.0306,
|
|
"grad_norm": 0.2325585037469864,
|
|
"learning_rate": 3.711384389653916e-06,
|
|
"epoch": 0.9182036888532478,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"loss": 0.0235,
|
|
"grad_norm": 0.24619098007678986,
|
|
"learning_rate": 3.6401068286133542e-06,
|
|
"epoch": 0.9190056134723336,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"loss": 0.033,
|
|
"grad_norm": 0.3259875476360321,
|
|
"learning_rate": 3.5695076778144875e-06,
|
|
"epoch": 0.9198075380914194,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"loss": 0.0304,
|
|
"grad_norm": 0.25106412172317505,
|
|
"learning_rate": 3.4995874343021094e-06,
|
|
"epoch": 0.9206094627105053,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"loss": 0.0469,
|
|
"grad_norm": 0.4743681252002716,
|
|
"learning_rate": 3.430346590341338e-06,
|
|
"epoch": 0.921411387329591,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"loss": 0.0539,
|
|
"grad_norm": 0.34995362162590027,
|
|
"learning_rate": 3.3617856334139607e-06,
|
|
"epoch": 0.9222133119486768,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"loss": 0.0662,
|
|
"grad_norm": 0.7248356342315674,
|
|
"learning_rate": 3.2939050462151953e-06,
|
|
"epoch": 0.9230152365677626,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"loss": 0.014,
|
|
"grad_norm": 0.2265823483467102,
|
|
"learning_rate": 3.226705306650113e-06,
|
|
"epoch": 0.9238171611868484,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"loss": 0.013,
|
|
"grad_norm": 0.20024491846561432,
|
|
"learning_rate": 3.1601868878304406e-06,
|
|
"epoch": 0.9246190858059342,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"loss": 0.0347,
|
|
"grad_norm": 0.25708380341529846,
|
|
"learning_rate": 3.0943502580710772e-06,
|
|
"epoch": 0.92542101042502,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"loss": 0.0149,
|
|
"grad_norm": 0.2992137372493744,
|
|
"learning_rate": 3.0291958808869037e-06,
|
|
"epoch": 0.9262229350441058,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"loss": 0.0162,
|
|
"grad_norm": 0.24897243082523346,
|
|
"learning_rate": 2.9647242149895006e-06,
|
|
"epoch": 0.9270248596631917,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"loss": 0.0193,
|
|
"grad_norm": 0.19664904475212097,
|
|
"learning_rate": 2.9009357142838477e-06,
|
|
"epoch": 0.9278267842822775,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"loss": 0.0288,
|
|
"grad_norm": 0.40675321221351624,
|
|
"learning_rate": 2.8378308278652288e-06,
|
|
"epoch": 0.9286287089013633,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"loss": 0.029,
|
|
"grad_norm": 0.2802353501319885,
|
|
"learning_rate": 2.775410000016021e-06,
|
|
"epoch": 0.9294306335204491,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"loss": 0.0561,
|
|
"grad_norm": 0.9122768044471741,
|
|
"learning_rate": 2.7136736702025433e-06,
|
|
"epoch": 0.9302325581395349,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"loss": 0.032,
|
|
"grad_norm": 0.36887556314468384,
|
|
"learning_rate": 2.652622273072003e-06,
|
|
"epoch": 0.9310344827586207,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"loss": 0.0285,
|
|
"grad_norm": 0.31024497747421265,
|
|
"learning_rate": 2.5922562384494196e-06,
|
|
"epoch": 0.9318364073777065,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"loss": 0.0499,
|
|
"grad_norm": 0.27006012201309204,
|
|
"learning_rate": 2.532575991334618e-06,
|
|
"epoch": 0.9326383319967922,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"loss": 0.0422,
|
|
"grad_norm": 0.3135731816291809,
|
|
"learning_rate": 2.473581951899184e-06,
|
|
"epoch": 0.9334402566158782,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"loss": 0.0249,
|
|
"grad_norm": 0.23915302753448486,
|
|
"learning_rate": 2.415274535483547e-06,
|
|
"epoch": 0.9342421812349639,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"loss": 0.0736,
|
|
"grad_norm": 0.5155062675476074,
|
|
"learning_rate": 2.357654152594113e-06,
|
|
"epoch": 0.9350441058540497,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"loss": 0.0379,
|
|
"grad_norm": 0.28162410855293274,
|
|
"learning_rate": 2.3007212089001916e-06,
|
|
"epoch": 0.9358460304731355,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"loss": 0.0247,
|
|
"grad_norm": 0.2652982771396637,
|
|
"learning_rate": 2.2444761052313856e-06,
|
|
"epoch": 0.9366479550922213,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"loss": 0.0219,
|
|
"grad_norm": 0.1709524244070053,
|
|
"learning_rate": 2.1889192375745494e-06,
|
|
"epoch": 0.9374498797113071,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"loss": 0.0725,
|
|
"grad_norm": 0.5203680396080017,
|
|
"learning_rate": 2.1340509970711466e-06,
|
|
"epoch": 0.9382518043303929,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"loss": 0.0466,
|
|
"grad_norm": 0.5651960372924805,
|
|
"learning_rate": 2.0798717700144077e-06,
|
|
"epoch": 0.9390537289494787,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"loss": 0.0316,
|
|
"grad_norm": 0.219880610704422,
|
|
"learning_rate": 2.0263819378466884e-06,
|
|
"epoch": 0.9398556535685646,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"loss": 0.037,
|
|
"grad_norm": 0.3648744225502014,
|
|
"learning_rate": 1.973581877156716e-06,
|
|
"epoch": 0.9406575781876504,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"loss": 0.0337,
|
|
"grad_norm": 0.35861659049987793,
|
|
"learning_rate": 1.921471959676957e-06,
|
|
"epoch": 0.9414595028067362,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"loss": 0.0255,
|
|
"grad_norm": 0.3175615072250366,
|
|
"learning_rate": 1.870052552281032e-06,
|
|
"epoch": 0.942261427425822,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"loss": 0.0521,
|
|
"grad_norm": 0.41771504282951355,
|
|
"learning_rate": 1.8193240169810943e-06,
|
|
"epoch": 0.9430633520449078,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"loss": 0.113,
|
|
"grad_norm": 0.5672475695610046,
|
|
"learning_rate": 1.7692867109252886e-06,
|
|
"epoch": 0.9438652766639936,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"loss": 0.0231,
|
|
"grad_norm": 0.24217240512371063,
|
|
"learning_rate": 1.7199409863952521e-06,
|
|
"epoch": 0.9446672012830793,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"loss": 0.0286,
|
|
"grad_norm": 0.34596091508865356,
|
|
"learning_rate": 1.6712871908036387e-06,
|
|
"epoch": 0.9454691259021651,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"loss": 0.0096,
|
|
"grad_norm": 0.1532655507326126,
|
|
"learning_rate": 1.623325666691644e-06,
|
|
"epoch": 0.946271050521251,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"loss": 0.0107,
|
|
"grad_norm": 0.14835034310817719,
|
|
"learning_rate": 1.5760567517266066e-06,
|
|
"epoch": 0.9470729751403368,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"loss": 0.0378,
|
|
"grad_norm": 0.31077146530151367,
|
|
"learning_rate": 1.5294807786996213e-06,
|
|
"epoch": 0.9478748997594226,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"loss": 0.0459,
|
|
"grad_norm": 0.33775973320007324,
|
|
"learning_rate": 1.4835980755232626e-06,
|
|
"epoch": 0.9486768243785084,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"loss": 0.0722,
|
|
"grad_norm": 0.3435156047344208,
|
|
"learning_rate": 1.4384089652291543e-06,
|
|
"epoch": 0.9494787489975942,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"loss": 0.0259,
|
|
"grad_norm": 0.29428747296333313,
|
|
"learning_rate": 1.3939137659658153e-06,
|
|
"epoch": 0.95028067361668,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"loss": 0.0189,
|
|
"grad_norm": 0.21608836948871613,
|
|
"learning_rate": 1.3501127909963274e-06,
|
|
"epoch": 0.9510825982357658,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"loss": 0.0352,
|
|
"grad_norm": 0.35777002573013306,
|
|
"learning_rate": 1.3070063486961936e-06,
|
|
"epoch": 0.9518845228548516,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"loss": 0.0284,
|
|
"grad_norm": 0.2871112525463104,
|
|
"learning_rate": 1.2645947425511395e-06,
|
|
"epoch": 0.9526864474739375,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"loss": 0.0225,
|
|
"grad_norm": 0.22989165782928467,
|
|
"learning_rate": 1.2228782711549924e-06,
|
|
"epoch": 0.9534883720930233,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"loss": 0.027,
|
|
"grad_norm": 0.3213796317577362,
|
|
"learning_rate": 1.181857228207539e-06,
|
|
"epoch": 0.9542902967121091,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"loss": 0.0232,
|
|
"grad_norm": 0.27565819025039673,
|
|
"learning_rate": 1.1415319025124938e-06,
|
|
"epoch": 0.9550922213311949,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"loss": 0.0385,
|
|
"grad_norm": 0.5389794111251831,
|
|
"learning_rate": 1.1019025779754666e-06,
|
|
"epoch": 0.9558941459502807,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"loss": 0.0419,
|
|
"grad_norm": 0.27501630783081055,
|
|
"learning_rate": 1.0629695336019763e-06,
|
|
"epoch": 0.9566960705693665,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"loss": 0.0423,
|
|
"grad_norm": 0.2711651027202606,
|
|
"learning_rate": 1.0247330434954071e-06,
|
|
"epoch": 0.9574979951884522,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"loss": 0.0222,
|
|
"grad_norm": 0.20319171249866486,
|
|
"learning_rate": 9.871933768551888e-07,
|
|
"epoch": 0.958299919807538,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"loss": 0.0718,
|
|
"grad_norm": 0.3775697648525238,
|
|
"learning_rate": 9.503507979748305e-07,
|
|
"epoch": 0.9591018444266239,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"loss": 0.0313,
|
|
"grad_norm": 0.27909061312675476,
|
|
"learning_rate": 9.142055662400673e-07,
|
|
"epoch": 0.9599037690457097,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"loss": 0.0754,
|
|
"grad_norm": 0.5719237923622131,
|
|
"learning_rate": 8.787579361270614e-07,
|
|
"epoch": 0.9607056936647955,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"loss": 0.0706,
|
|
"grad_norm": 0.6314537525177002,
|
|
"learning_rate": 8.440081572005931e-07,
|
|
"epoch": 0.9615076182838813,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"loss": 0.038,
|
|
"grad_norm": 0.45612236857414246,
|
|
"learning_rate": 8.099564741123166e-07,
|
|
"epoch": 0.9623095429029671,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"loss": 0.0525,
|
|
"grad_norm": 0.3892579674720764,
|
|
"learning_rate": 7.766031265989849e-07,
|
|
"epoch": 0.9631114675220529,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"loss": 0.0146,
|
|
"grad_norm": 0.20876550674438477,
|
|
"learning_rate": 7.439483494808497e-07,
|
|
"epoch": 0.9639133921411387,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"loss": 0.0245,
|
|
"grad_norm": 0.2424009144306183,
|
|
"learning_rate": 7.11992372659942e-07,
|
|
"epoch": 0.9647153167602245,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"loss": 0.0171,
|
|
"grad_norm": 0.20193764567375183,
|
|
"learning_rate": 6.807354211184613e-07,
|
|
"epoch": 0.9655172413793104,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"loss": 0.0357,
|
|
"grad_norm": 0.3591613471508026,
|
|
"learning_rate": 6.501777149172328e-07,
|
|
"epoch": 0.9663191659983962,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"loss": 0.0481,
|
|
"grad_norm": 0.40879741311073303,
|
|
"learning_rate": 6.203194691940972e-07,
|
|
"epoch": 0.967121090617482,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"loss": 0.0316,
|
|
"grad_norm": 0.2834092080593109,
|
|
"learning_rate": 5.91160894162468e-07,
|
|
"epoch": 0.9679230152365678,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"loss": 0.0357,
|
|
"grad_norm": 0.23049277067184448,
|
|
"learning_rate": 5.627021951097545e-07,
|
|
"epoch": 0.9687249398556536,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"loss": 0.0289,
|
|
"grad_norm": 0.2661738693714142,
|
|
"learning_rate": 5.349435723960183e-07,
|
|
"epoch": 0.9695268644747393,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"loss": 0.0295,
|
|
"grad_norm": 0.2500978708267212,
|
|
"learning_rate": 5.078852214525198e-07,
|
|
"epoch": 0.9703287890938251,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"loss": 0.0253,
|
|
"grad_norm": 0.20364578068256378,
|
|
"learning_rate": 4.815273327803182e-07,
|
|
"epoch": 0.9711307137129109,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"loss": 0.0444,
|
|
"grad_norm": 0.33421790599823,
|
|
"learning_rate": 4.5587009194894004e-07,
|
|
"epoch": 0.9719326383319968,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"loss": 0.0181,
|
|
"grad_norm": 0.2810249626636505,
|
|
"learning_rate": 4.3091367959512407e-07,
|
|
"epoch": 0.9727345629510826,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"loss": 0.0252,
|
|
"grad_norm": 0.24068693816661835,
|
|
"learning_rate": 4.066582714214895e-07,
|
|
"epoch": 0.9735364875701684,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"loss": 0.0529,
|
|
"grad_norm": 0.3940003514289856,
|
|
"learning_rate": 3.831040381953144e-07,
|
|
"epoch": 0.9743384121892542,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"loss": 0.0359,
|
|
"grad_norm": 0.7913379073143005,
|
|
"learning_rate": 3.6025114574734785e-07,
|
|
"epoch": 0.97514033680834,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"loss": 0.1689,
|
|
"grad_norm": 0.7005312442779541,
|
|
"learning_rate": 3.380997549706444e-07,
|
|
"epoch": 0.9759422614274258,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"loss": 0.0493,
|
|
"grad_norm": 0.38180509209632874,
|
|
"learning_rate": 3.166500218193758e-07,
|
|
"epoch": 0.9767441860465116,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"loss": 0.0252,
|
|
"grad_norm": 0.20567728579044342,
|
|
"learning_rate": 2.9590209730784304e-07,
|
|
"epoch": 0.9775461106655974,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"loss": 0.0352,
|
|
"grad_norm": 0.6058043241500854,
|
|
"learning_rate": 2.758561275092886e-07,
|
|
"epoch": 0.9783480352846833,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"loss": 0.0272,
|
|
"grad_norm": 0.32482075691223145,
|
|
"learning_rate": 2.5651225355497464e-07,
|
|
"epoch": 0.9791499599037691,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"loss": 0.031,
|
|
"grad_norm": 0.16501711308956146,
|
|
"learning_rate": 2.378706116330953e-07,
|
|
"epoch": 0.9799518845228549,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"loss": 0.0579,
|
|
"grad_norm": 0.3395942747592926,
|
|
"learning_rate": 2.1993133298791046e-07,
|
|
"epoch": 0.9807538091419407,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"loss": 0.013,
|
|
"grad_norm": 0.18358808755874634,
|
|
"learning_rate": 2.0269454391874666e-07,
|
|
"epoch": 0.9815557337610264,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"loss": 0.0441,
|
|
"grad_norm": 0.4380914866924286,
|
|
"learning_rate": 1.861603657791422e-07,
|
|
"epoch": 0.9823576583801122,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"loss": 0.0216,
|
|
"grad_norm": 0.19042056798934937,
|
|
"learning_rate": 1.7032891497600345e-07,
|
|
"epoch": 0.983159582999198,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"loss": 0.0208,
|
|
"grad_norm": 0.21681898832321167,
|
|
"learning_rate": 1.5520030296873877e-07,
|
|
"epoch": 0.9839615076182838,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"loss": 0.0664,
|
|
"grad_norm": 0.4365503489971161,
|
|
"learning_rate": 1.4077463626852582e-07,
|
|
"epoch": 0.9847634322373697,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"loss": 0.0224,
|
|
"grad_norm": 0.18103234469890594,
|
|
"learning_rate": 1.270520164375344e-07,
|
|
"epoch": 0.9855653568564555,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"loss": 0.029,
|
|
"grad_norm": 0.26580461859703064,
|
|
"learning_rate": 1.1403254008822695e-07,
|
|
"epoch": 0.9863672814755413,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"loss": 0.0414,
|
|
"grad_norm": 0.2631921172142029,
|
|
"learning_rate": 1.0171629888265921e-07,
|
|
"epoch": 0.9871692060946271,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"loss": 0.0267,
|
|
"grad_norm": 0.34322279691696167,
|
|
"learning_rate": 9.010337953185843e-08,
|
|
"epoch": 0.9879711307137129,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"loss": 0.0245,
|
|
"grad_norm": 0.24382710456848145,
|
|
"learning_rate": 7.919386379515726e-08,
|
|
"epoch": 0.9887730553327987,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"loss": 0.0652,
|
|
"grad_norm": 0.5472224354743958,
|
|
"learning_rate": 6.89878284797163e-08,
|
|
"epoch": 0.9895749799518845,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"loss": 0.0412,
|
|
"grad_norm": 0.2964751720428467,
|
|
"learning_rate": 5.948534543988027e-08,
|
|
"epoch": 0.9903769045709703,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"loss": 0.0436,
|
|
"grad_norm": 0.3399409353733063,
|
|
"learning_rate": 5.068648157675604e-08,
|
|
"epoch": 0.9911788291900562,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"loss": 0.0481,
|
|
"grad_norm": 0.346693754196167,
|
|
"learning_rate": 4.259129883767976e-08,
|
|
"epoch": 0.991980753809142,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"loss": 0.098,
|
|
"grad_norm": 0.5714817047119141,
|
|
"learning_rate": 3.5199854215817176e-08,
|
|
"epoch": 0.9927826784282278,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"loss": 0.0611,
|
|
"grad_norm": 0.36500847339630127,
|
|
"learning_rate": 2.8512199749730628e-08,
|
|
"epoch": 0.9935846030473136,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"loss": 0.0238,
|
|
"grad_norm": 0.21105419099330902,
|
|
"learning_rate": 2.2528382523057113e-08,
|
|
"epoch": 0.9943865276663993,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"eval_loss": 0.038508880883455276,
|
|
"eval_runtime": 31.7523,
|
|
"eval_samples_per_second": 33.068,
|
|
"eval_steps_per_second": 8.283,
|
|
"epoch": 0.9943865276663993,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"loss": 0.0441,
|
|
"grad_norm": 0.3713122010231018,
|
|
"learning_rate": 1.7248444664141884e-08,
|
|
"epoch": 0.9951884522854851,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"loss": 0.0319,
|
|
"grad_norm": 0.1977764517068863,
|
|
"learning_rate": 1.2672423345760909e-08,
|
|
"epoch": 0.9959903769045709,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"loss": 0.0478,
|
|
"grad_norm": 0.42995980381965637,
|
|
"learning_rate": 8.80035078482111e-09,
|
|
"epoch": 0.9967923015236567,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"loss": 0.0306,
|
|
"grad_norm": 0.39976274967193604,
|
|
"learning_rate": 5.6322542422049266e-09,
|
|
"epoch": 0.9975942261427426,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"loss": 0.0413,
|
|
"grad_norm": 0.28021112084388733,
|
|
"learning_rate": 3.1681560225038654e-09,
|
|
"epoch": 0.9983961507618284,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"loss": 0.0335,
|
|
"grad_norm": 0.25943997502326965,
|
|
"learning_rate": 1.4080734739074786e-09,
|
|
"epoch": 0.9991980753809142,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"loss": 0.0384,
|
|
"grad_norm": 0.45012426376342773,
|
|
"learning_rate": 3.52018988059033e-10,
|
|
"epoch": 1.0,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"train_runtime": 1545.9245,
|
|
"train_samples_per_second": 12.901,
|
|
"train_steps_per_second": 0.807,
|
|
"total_flos": 1.433925787776e+16,
|
|
"train_loss": 0.05218265543086118,
|
|
"epoch": 1.0,
|
|
"step": 1247
|
|
}
|
|
] |