6575 lines
169 KiB
JSON
6575 lines
169 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 933,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.003215434083601286,
|
|
"grad_norm": 28.06913370740238,
|
|
"learning_rate": 0.0,
|
|
"loss": 2.6452956199645996,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.006430868167202572,
|
|
"grad_norm": 48.39869965639885,
|
|
"learning_rate": 1.0638297872340426e-07,
|
|
"loss": 4.55789852142334,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.00964630225080386,
|
|
"grad_norm": 49.69301124062599,
|
|
"learning_rate": 2.1276595744680852e-07,
|
|
"loss": 4.399077415466309,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.012861736334405145,
|
|
"grad_norm": 41.69483295374549,
|
|
"learning_rate": 3.1914893617021275e-07,
|
|
"loss": 3.8412859439849854,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.01607717041800643,
|
|
"grad_norm": 38.19554788922322,
|
|
"learning_rate": 4.2553191489361704e-07,
|
|
"loss": 4.593955993652344,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.01929260450160772,
|
|
"grad_norm": 35.40565048454064,
|
|
"learning_rate": 5.319148936170213e-07,
|
|
"loss": 4.042428016662598,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.022508038585209004,
|
|
"grad_norm": 33.35178240422259,
|
|
"learning_rate": 6.382978723404255e-07,
|
|
"loss": 4.12535285949707,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.02572347266881029,
|
|
"grad_norm": 30.900117272078198,
|
|
"learning_rate": 7.446808510638298e-07,
|
|
"loss": 3.521498680114746,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.028938906752411574,
|
|
"grad_norm": 30.636501853074883,
|
|
"learning_rate": 8.510638297872341e-07,
|
|
"loss": 3.6315274238586426,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.03215434083601286,
|
|
"grad_norm": 38.64870494238772,
|
|
"learning_rate": 9.574468085106384e-07,
|
|
"loss": 4.302469730377197,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.03536977491961415,
|
|
"grad_norm": 32.91754594782006,
|
|
"learning_rate": 1.0638297872340427e-06,
|
|
"loss": 4.56865119934082,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.03858520900321544,
|
|
"grad_norm": 34.58013362254361,
|
|
"learning_rate": 1.170212765957447e-06,
|
|
"loss": 4.283186912536621,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.04180064308681672,
|
|
"grad_norm": 40.581705970195586,
|
|
"learning_rate": 1.276595744680851e-06,
|
|
"loss": 4.2185187339782715,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.04501607717041801,
|
|
"grad_norm": 29.29283904903904,
|
|
"learning_rate": 1.3829787234042555e-06,
|
|
"loss": 3.8783042430877686,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.04823151125401929,
|
|
"grad_norm": 27.27991833133174,
|
|
"learning_rate": 1.4893617021276596e-06,
|
|
"loss": 4.050146579742432,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.05144694533762058,
|
|
"grad_norm": 27.999481136276096,
|
|
"learning_rate": 1.595744680851064e-06,
|
|
"loss": 3.825010299682617,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.05466237942122187,
|
|
"grad_norm": 22.536982883463256,
|
|
"learning_rate": 1.7021276595744682e-06,
|
|
"loss": 3.562378406524658,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.05787781350482315,
|
|
"grad_norm": 34.480722847374665,
|
|
"learning_rate": 1.8085106382978727e-06,
|
|
"loss": 3.8451032638549805,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.06109324758842444,
|
|
"grad_norm": 18.624534731891554,
|
|
"learning_rate": 1.9148936170212767e-06,
|
|
"loss": 3.538512945175171,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.06430868167202572,
|
|
"grad_norm": 12.727844125034615,
|
|
"learning_rate": 2.021276595744681e-06,
|
|
"loss": 3.4035849571228027,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.06752411575562701,
|
|
"grad_norm": 16.628683791330275,
|
|
"learning_rate": 2.1276595744680853e-06,
|
|
"loss": 4.060901165008545,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.0707395498392283,
|
|
"grad_norm": 15.851111458013285,
|
|
"learning_rate": 2.2340425531914894e-06,
|
|
"loss": 3.4492287635803223,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.07395498392282958,
|
|
"grad_norm": 14.753226693275314,
|
|
"learning_rate": 2.340425531914894e-06,
|
|
"loss": 3.4821548461914062,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.07717041800643087,
|
|
"grad_norm": 13.046614909449563,
|
|
"learning_rate": 2.446808510638298e-06,
|
|
"loss": 2.6914403438568115,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.08038585209003216,
|
|
"grad_norm": 10.833167684511187,
|
|
"learning_rate": 2.553191489361702e-06,
|
|
"loss": 3.381208896636963,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.08360128617363344,
|
|
"grad_norm": 14.663957003362531,
|
|
"learning_rate": 2.6595744680851065e-06,
|
|
"loss": 3.723344564437866,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.08681672025723473,
|
|
"grad_norm": 12.38492722838179,
|
|
"learning_rate": 2.765957446808511e-06,
|
|
"loss": 3.4975712299346924,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.09003215434083602,
|
|
"grad_norm": 11.401609720183584,
|
|
"learning_rate": 2.8723404255319155e-06,
|
|
"loss": 3.742828369140625,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.0932475884244373,
|
|
"grad_norm": 12.794559143366632,
|
|
"learning_rate": 2.978723404255319e-06,
|
|
"loss": 3.7880096435546875,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.09646302250803858,
|
|
"grad_norm": 26.7679703474409,
|
|
"learning_rate": 3.0851063829787237e-06,
|
|
"loss": 3.091320037841797,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.09967845659163987,
|
|
"grad_norm": 11.726188149872904,
|
|
"learning_rate": 3.191489361702128e-06,
|
|
"loss": 3.397942543029785,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.10289389067524116,
|
|
"grad_norm": 11.535658534446908,
|
|
"learning_rate": 3.297872340425532e-06,
|
|
"loss": 3.3407297134399414,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.10610932475884244,
|
|
"grad_norm": 12.230466594907684,
|
|
"learning_rate": 3.4042553191489363e-06,
|
|
"loss": 3.5374701023101807,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.10932475884244373,
|
|
"grad_norm": 17.243624644167863,
|
|
"learning_rate": 3.510638297872341e-06,
|
|
"loss": 3.028085708618164,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.11254019292604502,
|
|
"grad_norm": 17.334038622045792,
|
|
"learning_rate": 3.6170212765957453e-06,
|
|
"loss": 3.191539764404297,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.1157556270096463,
|
|
"grad_norm": 14.86079639907893,
|
|
"learning_rate": 3.723404255319149e-06,
|
|
"loss": 3.315582275390625,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.1189710610932476,
|
|
"grad_norm": 13.609433504610683,
|
|
"learning_rate": 3.8297872340425535e-06,
|
|
"loss": 3.1429853439331055,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.12218649517684887,
|
|
"grad_norm": 12.204605042968506,
|
|
"learning_rate": 3.936170212765958e-06,
|
|
"loss": 3.2580060958862305,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.12540192926045016,
|
|
"grad_norm": 9.319212608299305,
|
|
"learning_rate": 4.042553191489362e-06,
|
|
"loss": 2.8858089447021484,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.12861736334405144,
|
|
"grad_norm": 12.524532210698558,
|
|
"learning_rate": 4.148936170212766e-06,
|
|
"loss": 3.385632038116455,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.13183279742765272,
|
|
"grad_norm": 8.648905095426267,
|
|
"learning_rate": 4.255319148936171e-06,
|
|
"loss": 2.8703691959381104,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.13504823151125403,
|
|
"grad_norm": 13.669866742395188,
|
|
"learning_rate": 4.361702127659575e-06,
|
|
"loss": 3.3186304569244385,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.1382636655948553,
|
|
"grad_norm": 13.513984643274535,
|
|
"learning_rate": 4.468085106382979e-06,
|
|
"loss": 3.2565927505493164,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.1414790996784566,
|
|
"grad_norm": 11.467838472357526,
|
|
"learning_rate": 4.574468085106383e-06,
|
|
"loss": 3.012521982192993,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.14469453376205788,
|
|
"grad_norm": 14.414224190053467,
|
|
"learning_rate": 4.680851063829788e-06,
|
|
"loss": 3.093921184539795,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.14790996784565916,
|
|
"grad_norm": 13.083779631573304,
|
|
"learning_rate": 4.787234042553192e-06,
|
|
"loss": 3.1773462295532227,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.15112540192926044,
|
|
"grad_norm": 10.99722367362911,
|
|
"learning_rate": 4.893617021276596e-06,
|
|
"loss": 2.971510648727417,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.15434083601286175,
|
|
"grad_norm": 11.68667724946745,
|
|
"learning_rate": 5e-06,
|
|
"loss": 3.365861415863037,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.15755627009646303,
|
|
"grad_norm": 12.098124814579858,
|
|
"learning_rate": 5.106382978723404e-06,
|
|
"loss": 3.331052780151367,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.1607717041800643,
|
|
"grad_norm": 15.052059643954104,
|
|
"learning_rate": 5.212765957446809e-06,
|
|
"loss": 3.513488292694092,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.1639871382636656,
|
|
"grad_norm": 11.566325521455326,
|
|
"learning_rate": 5.319148936170213e-06,
|
|
"loss": 3.165461540222168,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.16720257234726688,
|
|
"grad_norm": 9.34326584922921,
|
|
"learning_rate": 5.425531914893617e-06,
|
|
"loss": 3.308755397796631,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.17041800643086816,
|
|
"grad_norm": 8.081169045448624,
|
|
"learning_rate": 5.531914893617022e-06,
|
|
"loss": 2.905186653137207,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.17363344051446947,
|
|
"grad_norm": 7.992815413183043,
|
|
"learning_rate": 5.638297872340426e-06,
|
|
"loss": 3.2160592079162598,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.17684887459807075,
|
|
"grad_norm": 13.368580785667488,
|
|
"learning_rate": 5.744680851063831e-06,
|
|
"loss": 2.809837818145752,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.18006430868167203,
|
|
"grad_norm": 12.22993319530342,
|
|
"learning_rate": 5.851063829787235e-06,
|
|
"loss": 3.355532646179199,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.1832797427652733,
|
|
"grad_norm": 7.221255459808142,
|
|
"learning_rate": 5.957446808510638e-06,
|
|
"loss": 2.912825584411621,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.1864951768488746,
|
|
"grad_norm": 8.99412347483827,
|
|
"learning_rate": 6.063829787234044e-06,
|
|
"loss": 2.738528251647949,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.18971061093247588,
|
|
"grad_norm": 9.344015814293519,
|
|
"learning_rate": 6.170212765957447e-06,
|
|
"loss": 3.5236358642578125,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.19292604501607716,
|
|
"grad_norm": 9.156439246600511,
|
|
"learning_rate": 6.276595744680851e-06,
|
|
"loss": 2.438237190246582,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.19614147909967847,
|
|
"grad_norm": 13.840159967058424,
|
|
"learning_rate": 6.382978723404256e-06,
|
|
"loss": 3.002664804458618,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.19935691318327975,
|
|
"grad_norm": 19.877275319550726,
|
|
"learning_rate": 6.48936170212766e-06,
|
|
"loss": 2.6102824211120605,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.20257234726688103,
|
|
"grad_norm": 12.1602713562138,
|
|
"learning_rate": 6.595744680851064e-06,
|
|
"loss": 2.8119864463806152,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.2057877813504823,
|
|
"grad_norm": 16.720203965795687,
|
|
"learning_rate": 6.702127659574469e-06,
|
|
"loss": 3.0033774375915527,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.2090032154340836,
|
|
"grad_norm": 11.940414659790333,
|
|
"learning_rate": 6.808510638297873e-06,
|
|
"loss": 3.0424952507019043,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.21221864951768488,
|
|
"grad_norm": 13.529947357143289,
|
|
"learning_rate": 6.914893617021278e-06,
|
|
"loss": 3.125572443008423,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.21543408360128619,
|
|
"grad_norm": 14.39004817757797,
|
|
"learning_rate": 7.021276595744682e-06,
|
|
"loss": 2.6417791843414307,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.21864951768488747,
|
|
"grad_norm": 10.746950415723779,
|
|
"learning_rate": 7.127659574468085e-06,
|
|
"loss": 2.795292615890503,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.22186495176848875,
|
|
"grad_norm": 8.800102366100415,
|
|
"learning_rate": 7.234042553191491e-06,
|
|
"loss": 2.806095600128174,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.22508038585209003,
|
|
"grad_norm": 9.675471278291617,
|
|
"learning_rate": 7.340425531914894e-06,
|
|
"loss": 3.1251420974731445,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.2282958199356913,
|
|
"grad_norm": 10.053887771078337,
|
|
"learning_rate": 7.446808510638298e-06,
|
|
"loss": 3.1366963386535645,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.2315112540192926,
|
|
"grad_norm": 9.96554198597255,
|
|
"learning_rate": 7.553191489361703e-06,
|
|
"loss": 2.7942872047424316,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.2347266881028939,
|
|
"grad_norm": 9.128869915479399,
|
|
"learning_rate": 7.659574468085107e-06,
|
|
"loss": 2.7176175117492676,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.2379421221864952,
|
|
"grad_norm": 28.173273740714603,
|
|
"learning_rate": 7.765957446808511e-06,
|
|
"loss": 3.2510626316070557,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.24115755627009647,
|
|
"grad_norm": 15.875524782572642,
|
|
"learning_rate": 7.872340425531916e-06,
|
|
"loss": 3.5180716514587402,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.24437299035369775,
|
|
"grad_norm": 8.411231774581736,
|
|
"learning_rate": 7.97872340425532e-06,
|
|
"loss": 2.6583242416381836,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.24758842443729903,
|
|
"grad_norm": 10.255213693759098,
|
|
"learning_rate": 8.085106382978723e-06,
|
|
"loss": 3.522376537322998,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.2508038585209003,
|
|
"grad_norm": 14.129242001151896,
|
|
"learning_rate": 8.191489361702128e-06,
|
|
"loss": 2.666438102722168,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.2540192926045016,
|
|
"grad_norm": 13.280551504579265,
|
|
"learning_rate": 8.297872340425532e-06,
|
|
"loss": 3.0438873767852783,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.2572347266881029,
|
|
"grad_norm": 9.282380199453716,
|
|
"learning_rate": 8.404255319148937e-06,
|
|
"loss": 2.7416107654571533,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.2604501607717042,
|
|
"grad_norm": 13.819155087109117,
|
|
"learning_rate": 8.510638297872341e-06,
|
|
"loss": 3.040888547897339,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.26366559485530544,
|
|
"grad_norm": 10.236048490196817,
|
|
"learning_rate": 8.617021276595746e-06,
|
|
"loss": 2.8178224563598633,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.26688102893890675,
|
|
"grad_norm": 11.247565366748827,
|
|
"learning_rate": 8.72340425531915e-06,
|
|
"loss": 3.13818097114563,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.27009646302250806,
|
|
"grad_norm": 11.38311149290456,
|
|
"learning_rate": 8.829787234042555e-06,
|
|
"loss": 3.0007710456848145,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.2733118971061093,
|
|
"grad_norm": 15.216874845868524,
|
|
"learning_rate": 8.936170212765958e-06,
|
|
"loss": 2.1240034103393555,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.2765273311897106,
|
|
"grad_norm": 14.569359980649596,
|
|
"learning_rate": 9.042553191489362e-06,
|
|
"loss": 3.0258898735046387,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.2797427652733119,
|
|
"grad_norm": 10.551293545168425,
|
|
"learning_rate": 9.148936170212767e-06,
|
|
"loss": 3.815779685974121,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.2829581993569132,
|
|
"grad_norm": 13.73383748688779,
|
|
"learning_rate": 9.255319148936171e-06,
|
|
"loss": 3.146327018737793,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.2861736334405145,
|
|
"grad_norm": 16.606288521806196,
|
|
"learning_rate": 9.361702127659576e-06,
|
|
"loss": 3.730978488922119,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.28938906752411575,
|
|
"grad_norm": 15.589892740807427,
|
|
"learning_rate": 9.46808510638298e-06,
|
|
"loss": 2.837461471557617,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.29260450160771706,
|
|
"grad_norm": 8.969993651783593,
|
|
"learning_rate": 9.574468085106385e-06,
|
|
"loss": 2.8579788208007812,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.2958199356913183,
|
|
"grad_norm": 7.662931624851478,
|
|
"learning_rate": 9.680851063829787e-06,
|
|
"loss": 3.059731960296631,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.2990353697749196,
|
|
"grad_norm": 12.506461029325436,
|
|
"learning_rate": 9.787234042553192e-06,
|
|
"loss": 2.419557809829712,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.3022508038585209,
|
|
"grad_norm": 11.680821462164978,
|
|
"learning_rate": 9.893617021276596e-06,
|
|
"loss": 2.8333683013916016,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.3054662379421222,
|
|
"grad_norm": 12.174316986589778,
|
|
"learning_rate": 1e-05,
|
|
"loss": 2.74963641166687,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.3086816720257235,
|
|
"grad_norm": 11.199762414211195,
|
|
"learning_rate": 9.999964947796453e-06,
|
|
"loss": 2.873091697692871,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.31189710610932475,
|
|
"grad_norm": 13.974152571943035,
|
|
"learning_rate": 9.999859791677274e-06,
|
|
"loss": 2.7511343955993652,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.31511254019292606,
|
|
"grad_norm": 7.263607356101665,
|
|
"learning_rate": 9.999684533116843e-06,
|
|
"loss": 2.71083927154541,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.3183279742765273,
|
|
"grad_norm": 25.796622294537844,
|
|
"learning_rate": 9.999439174572441e-06,
|
|
"loss": 3.5184683799743652,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.3215434083601286,
|
|
"grad_norm": 17.664679448204584,
|
|
"learning_rate": 9.999123719484209e-06,
|
|
"loss": 3.0679643154144287,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.3247588424437299,
|
|
"grad_norm": 8.788378126441323,
|
|
"learning_rate": 9.99873817227511e-06,
|
|
"loss": 3.4208366870880127,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.3279742765273312,
|
|
"grad_norm": 10.180060028116147,
|
|
"learning_rate": 9.998282538350849e-06,
|
|
"loss": 2.5970406532287598,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.3311897106109325,
|
|
"grad_norm": 8.806422861108551,
|
|
"learning_rate": 9.997756824099822e-06,
|
|
"loss": 2.414546012878418,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.33440514469453375,
|
|
"grad_norm": 7.16593228968872,
|
|
"learning_rate": 9.997161036893001e-06,
|
|
"loss": 2.7526440620422363,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.33762057877813506,
|
|
"grad_norm": 17.516673563240467,
|
|
"learning_rate": 9.996495185083853e-06,
|
|
"loss": 3.5313873291015625,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.3408360128617363,
|
|
"grad_norm": 11.176485057675038,
|
|
"learning_rate": 9.995759278008202e-06,
|
|
"loss": 3.215785026550293,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.3440514469453376,
|
|
"grad_norm": 10.013772937565832,
|
|
"learning_rate": 9.994953325984116e-06,
|
|
"loss": 2.8342652320861816,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.34726688102893893,
|
|
"grad_norm": 16.13725758624622,
|
|
"learning_rate": 9.994077340311751e-06,
|
|
"loss": 3.187843084335327,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.3504823151125402,
|
|
"grad_norm": 13.142839661126178,
|
|
"learning_rate": 9.993131333273203e-06,
|
|
"loss": 3.329102039337158,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.3536977491961415,
|
|
"grad_norm": 13.15304975587744,
|
|
"learning_rate": 9.99211531813232e-06,
|
|
"loss": 2.963022232055664,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.35691318327974275,
|
|
"grad_norm": 20.764351097324777,
|
|
"learning_rate": 9.991029309134533e-06,
|
|
"loss": 3.1603951454162598,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.36012861736334406,
|
|
"grad_norm": 20.990534185345496,
|
|
"learning_rate": 9.989873321506643e-06,
|
|
"loss": 3.063810110092163,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.3633440514469453,
|
|
"grad_norm": 11.652596742212573,
|
|
"learning_rate": 9.988647371456614e-06,
|
|
"loss": 3.0116543769836426,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.3665594855305466,
|
|
"grad_norm": 19.438695012900585,
|
|
"learning_rate": 9.987351476173352e-06,
|
|
"loss": 3.010406255722046,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.36977491961414793,
|
|
"grad_norm": 6.079653142271714,
|
|
"learning_rate": 9.985985653826444e-06,
|
|
"loss": 1.9504810571670532,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.3729903536977492,
|
|
"grad_norm": 13.795243277642982,
|
|
"learning_rate": 9.98454992356593e-06,
|
|
"loss": 2.93680739402771,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.3762057877813505,
|
|
"grad_norm": 13.003967588877996,
|
|
"learning_rate": 9.983044305522007e-06,
|
|
"loss": 2.3677353858947754,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.37942122186495175,
|
|
"grad_norm": 18.380499765458158,
|
|
"learning_rate": 9.981468820804774e-06,
|
|
"loss": 2.847960948944092,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.38263665594855306,
|
|
"grad_norm": 9.899382867106105,
|
|
"learning_rate": 9.979823491503909e-06,
|
|
"loss": 3.0923283100128174,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.3858520900321543,
|
|
"grad_norm": 28.34535742986171,
|
|
"learning_rate": 9.978108340688383e-06,
|
|
"loss": 3.020812511444092,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.3890675241157556,
|
|
"grad_norm": 9.019875841237615,
|
|
"learning_rate": 9.976323392406122e-06,
|
|
"loss": 3.3838634490966797,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.39228295819935693,
|
|
"grad_norm": 23.495664363464023,
|
|
"learning_rate": 9.974468671683673e-06,
|
|
"loss": 3.5906333923339844,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.3954983922829582,
|
|
"grad_norm": 10.641890312294386,
|
|
"learning_rate": 9.972544204525853e-06,
|
|
"loss": 3.000471591949463,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.3987138263665595,
|
|
"grad_norm": 9.496581002852425,
|
|
"learning_rate": 9.970550017915393e-06,
|
|
"loss": 2.9725592136383057,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.40192926045016075,
|
|
"grad_norm": 20.913334058741135,
|
|
"learning_rate": 9.968486139812544e-06,
|
|
"loss": 3.160482406616211,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.40514469453376206,
|
|
"grad_norm": 11.751298309185795,
|
|
"learning_rate": 9.966352599154697e-06,
|
|
"loss": 3.0642364025115967,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.40836012861736337,
|
|
"grad_norm": 20.856740980507603,
|
|
"learning_rate": 9.964149425855971e-06,
|
|
"loss": 3.73250675201416,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.4115755627009646,
|
|
"grad_norm": 9.151325730059728,
|
|
"learning_rate": 9.961876650806799e-06,
|
|
"loss": 2.632124900817871,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.41479099678456594,
|
|
"grad_norm": 11.4247472974135,
|
|
"learning_rate": 9.95953430587349e-06,
|
|
"loss": 2.5810580253601074,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.4180064308681672,
|
|
"grad_norm": 16.431822779962932,
|
|
"learning_rate": 9.957122423897786e-06,
|
|
"loss": 3.170461654663086,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.4212218649517685,
|
|
"grad_norm": 21.943673264965508,
|
|
"learning_rate": 9.954641038696395e-06,
|
|
"loss": 3.044951915740967,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.42443729903536975,
|
|
"grad_norm": 6.71332684009912,
|
|
"learning_rate": 9.952090185060528e-06,
|
|
"loss": 2.9257850646972656,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.42765273311897106,
|
|
"grad_norm": 9.885222299724687,
|
|
"learning_rate": 9.9494698987554e-06,
|
|
"loss": 2.943833827972412,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.43086816720257237,
|
|
"grad_norm": 11.16095425096583,
|
|
"learning_rate": 9.946780216519734e-06,
|
|
"loss": 3.022878646850586,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.4340836012861736,
|
|
"grad_norm": 9.033329511785515,
|
|
"learning_rate": 9.944021176065247e-06,
|
|
"loss": 2.792724847793579,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.43729903536977494,
|
|
"grad_norm": 13.435034889452208,
|
|
"learning_rate": 9.941192816076114e-06,
|
|
"loss": 3.35680890083313,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.4405144694533762,
|
|
"grad_norm": 6.768294617543249,
|
|
"learning_rate": 9.938295176208441e-06,
|
|
"loss": 2.4593820571899414,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.4437299035369775,
|
|
"grad_norm": 14.633684351350666,
|
|
"learning_rate": 9.93532829708969e-06,
|
|
"loss": 2.6255781650543213,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.44694533762057875,
|
|
"grad_norm": 16.68694895086861,
|
|
"learning_rate": 9.932292220318121e-06,
|
|
"loss": 2.6132946014404297,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.45016077170418006,
|
|
"grad_norm": 15.726897720613817,
|
|
"learning_rate": 9.929186988462208e-06,
|
|
"loss": 2.9322824478149414,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.4533762057877814,
|
|
"grad_norm": 15.761243450650786,
|
|
"learning_rate": 9.926012645060037e-06,
|
|
"loss": 2.601005792617798,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.4565916398713826,
|
|
"grad_norm": 36.62478666800511,
|
|
"learning_rate": 9.9227692346187e-06,
|
|
"loss": 2.5554819107055664,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.45980707395498394,
|
|
"grad_norm": 12.187964367204458,
|
|
"learning_rate": 9.919456802613672e-06,
|
|
"loss": 2.5176918506622314,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.4630225080385852,
|
|
"grad_norm": 17.390325809468337,
|
|
"learning_rate": 9.916075395488167e-06,
|
|
"loss": 2.341370105743408,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.4662379421221865,
|
|
"grad_norm": 12.718168513574046,
|
|
"learning_rate": 9.912625060652496e-06,
|
|
"loss": 3.2781105041503906,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.4694533762057878,
|
|
"grad_norm": 13.449199904281418,
|
|
"learning_rate": 9.909105846483394e-06,
|
|
"loss": 3.615126132965088,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.47266881028938906,
|
|
"grad_norm": 8.050446777669299,
|
|
"learning_rate": 9.905517802323345e-06,
|
|
"loss": 2.8603837490081787,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.4758842443729904,
|
|
"grad_norm": 12.943386027137832,
|
|
"learning_rate": 9.901860978479889e-06,
|
|
"loss": 2.465343952178955,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.4790996784565916,
|
|
"grad_norm": 49.15991413291505,
|
|
"learning_rate": 9.898135426224923e-06,
|
|
"loss": 3.241806983947754,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.48231511254019294,
|
|
"grad_norm": 17.678071991265508,
|
|
"learning_rate": 9.89434119779397e-06,
|
|
"loss": 3.292675018310547,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.4855305466237942,
|
|
"grad_norm": 15.424220522820049,
|
|
"learning_rate": 9.89047834638546e-06,
|
|
"loss": 2.6159539222717285,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.4887459807073955,
|
|
"grad_norm": 6.510280246958492,
|
|
"learning_rate": 9.886546926159972e-06,
|
|
"loss": 2.4839179515838623,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.4919614147909968,
|
|
"grad_norm": 11.741972927177837,
|
|
"learning_rate": 9.882546992239483e-06,
|
|
"loss": 2.995469570159912,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.49517684887459806,
|
|
"grad_norm": 13.630320770906891,
|
|
"learning_rate": 9.878478600706595e-06,
|
|
"loss": 3.255875587463379,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.4983922829581994,
|
|
"grad_norm": 36.221839717193376,
|
|
"learning_rate": 9.87434180860374e-06,
|
|
"loss": 3.4806838035583496,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.5016077170418006,
|
|
"grad_norm": 15.136927688596392,
|
|
"learning_rate": 9.87013667393239e-06,
|
|
"loss": 3.202141284942627,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.5048231511254019,
|
|
"grad_norm": 21.095472411241253,
|
|
"learning_rate": 9.865863255652242e-06,
|
|
"loss": 2.8557021617889404,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.5080385852090032,
|
|
"grad_norm": 9.121385453492111,
|
|
"learning_rate": 9.861521613680384e-06,
|
|
"loss": 2.9327592849731445,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.5112540192926045,
|
|
"grad_norm": 8.35474008993999,
|
|
"learning_rate": 9.857111808890465e-06,
|
|
"loss": 3.2285704612731934,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.5144694533762058,
|
|
"grad_norm": 14.002125724500594,
|
|
"learning_rate": 9.852633903111834e-06,
|
|
"loss": 2.7799017429351807,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.5176848874598071,
|
|
"grad_norm": 12.65627846068601,
|
|
"learning_rate": 9.848087959128679e-06,
|
|
"loss": 2.5985677242279053,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.5209003215434084,
|
|
"grad_norm": 14.086117584953953,
|
|
"learning_rate": 9.843474040679137e-06,
|
|
"loss": 3.1218137741088867,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.5241157556270096,
|
|
"grad_norm": 8.527738044773436,
|
|
"learning_rate": 9.838792212454416e-06,
|
|
"loss": 2.367370367050171,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.5273311897106109,
|
|
"grad_norm": 9.981924505220025,
|
|
"learning_rate": 9.834042540097875e-06,
|
|
"loss": 2.8083603382110596,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.5305466237942122,
|
|
"grad_norm": 11.46020527117729,
|
|
"learning_rate": 9.829225090204102e-06,
|
|
"loss": 2.920241117477417,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.5337620578778135,
|
|
"grad_norm": 12.469087819796588,
|
|
"learning_rate": 9.824339930317994e-06,
|
|
"loss": 2.6172094345092773,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.5369774919614148,
|
|
"grad_norm": 20.922191531715136,
|
|
"learning_rate": 9.819387128933799e-06,
|
|
"loss": 3.1109981536865234,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.5401929260450161,
|
|
"grad_norm": 15.632738580412314,
|
|
"learning_rate": 9.814366755494155e-06,
|
|
"loss": 3.153048038482666,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.5434083601286174,
|
|
"grad_norm": 35.82397708735359,
|
|
"learning_rate": 9.809278880389126e-06,
|
|
"loss": 3.2571372985839844,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.5466237942122186,
|
|
"grad_norm": 8.554787244670667,
|
|
"learning_rate": 9.804123574955202e-06,
|
|
"loss": 3.108255624771118,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.5498392282958199,
|
|
"grad_norm": 7.383101110488588,
|
|
"learning_rate": 9.798900911474315e-06,
|
|
"loss": 3.0177969932556152,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.5530546623794212,
|
|
"grad_norm": 11.293545889392764,
|
|
"learning_rate": 9.793610963172802e-06,
|
|
"loss": 2.795715093612671,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.5562700964630225,
|
|
"grad_norm": 21.241938441534675,
|
|
"learning_rate": 9.78825380422041e-06,
|
|
"loss": 3.3412439823150635,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.5594855305466238,
|
|
"grad_norm": 9.87643076192503,
|
|
"learning_rate": 9.78282950972922e-06,
|
|
"loss": 2.7117419242858887,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.5627009646302251,
|
|
"grad_norm": 10.242385683099888,
|
|
"learning_rate": 9.77733815575263e-06,
|
|
"loss": 3.1940979957580566,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.5659163987138264,
|
|
"grad_norm": 13.773999328309266,
|
|
"learning_rate": 9.771779819284257e-06,
|
|
"loss": 2.418674945831299,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.5691318327974276,
|
|
"grad_norm": 18.81855085740899,
|
|
"learning_rate": 9.766154578256883e-06,
|
|
"loss": 2.819211959838867,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.572347266881029,
|
|
"grad_norm": 18.832073296958836,
|
|
"learning_rate": 9.76046251154134e-06,
|
|
"loss": 3.260634183883667,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.5755627009646302,
|
|
"grad_norm": 14.459091987874368,
|
|
"learning_rate": 9.754703698945425e-06,
|
|
"loss": 3.002488136291504,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.5787781350482315,
|
|
"grad_norm": 9.482812229157702,
|
|
"learning_rate": 9.748878221212763e-06,
|
|
"loss": 3.066258668899536,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.5819935691318328,
|
|
"grad_norm": 12.867585721735884,
|
|
"learning_rate": 9.742986160021688e-06,
|
|
"loss": 3.4351935386657715,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.5852090032154341,
|
|
"grad_norm": 10.802517130903386,
|
|
"learning_rate": 9.73702759798409e-06,
|
|
"loss": 3.2599828243255615,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.5884244372990354,
|
|
"grad_norm": 24.934825581625855,
|
|
"learning_rate": 9.731002618644265e-06,
|
|
"loss": 2.5718865394592285,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.5916398713826366,
|
|
"grad_norm": 20.787769842276045,
|
|
"learning_rate": 9.724911306477729e-06,
|
|
"loss": 2.765693426132202,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.594855305466238,
|
|
"grad_norm": 9.109874314786204,
|
|
"learning_rate": 9.71875374689005e-06,
|
|
"loss": 3.426023244857788,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.5980707395498392,
|
|
"grad_norm": 21.279795009176986,
|
|
"learning_rate": 9.71253002621564e-06,
|
|
"loss": 3.001716375350952,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.6012861736334405,
|
|
"grad_norm": 10.787659736048406,
|
|
"learning_rate": 9.706240231716549e-06,
|
|
"loss": 2.950758218765259,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.6045016077170418,
|
|
"grad_norm": 12.733718582846585,
|
|
"learning_rate": 9.699884451581238e-06,
|
|
"loss": 3.2297606468200684,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.6077170418006431,
|
|
"grad_norm": 12.774631438040222,
|
|
"learning_rate": 9.693462774923351e-06,
|
|
"loss": 4.231553077697754,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.6109324758842444,
|
|
"grad_norm": 7.991710258799094,
|
|
"learning_rate": 9.686975291780449e-06,
|
|
"loss": 3.427185535430908,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.6141479099678456,
|
|
"grad_norm": 8.091734891391763,
|
|
"learning_rate": 9.68042209311277e-06,
|
|
"loss": 3.104320526123047,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.617363344051447,
|
|
"grad_norm": 9.939232309092867,
|
|
"learning_rate": 9.67380327080193e-06,
|
|
"loss": 2.5996108055114746,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.6205787781350482,
|
|
"grad_norm": 15.107188854133135,
|
|
"learning_rate": 9.667118917649656e-06,
|
|
"loss": 2.6025047302246094,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.6237942122186495,
|
|
"grad_norm": 13.565146098612988,
|
|
"learning_rate": 9.660369127376469e-06,
|
|
"loss": 2.7667369842529297,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.6270096463022508,
|
|
"grad_norm": 12.27977824054795,
|
|
"learning_rate": 9.653553994620378e-06,
|
|
"loss": 2.9813175201416016,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.6302250803858521,
|
|
"grad_norm": 16.18068581074389,
|
|
"learning_rate": 9.64667361493555e-06,
|
|
"loss": 3.094072103500366,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.6334405144694534,
|
|
"grad_norm": 7.584418927697848,
|
|
"learning_rate": 9.639728084790976e-06,
|
|
"loss": 2.9591763019561768,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.6366559485530546,
|
|
"grad_norm": 6.415026684299775,
|
|
"learning_rate": 9.632717501569106e-06,
|
|
"loss": 1.9626116752624512,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.639871382636656,
|
|
"grad_norm": 21.919505536575453,
|
|
"learning_rate": 9.6256419635645e-06,
|
|
"loss": 3.0262200832366943,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.6430868167202572,
|
|
"grad_norm": 9.532554532436253,
|
|
"learning_rate": 9.618501569982437e-06,
|
|
"loss": 2.373394012451172,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.6463022508038585,
|
|
"grad_norm": 9.33134950216214,
|
|
"learning_rate": 9.611296420937526e-06,
|
|
"loss": 2.601590633392334,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.6495176848874598,
|
|
"grad_norm": 13.972297231734744,
|
|
"learning_rate": 9.60402661745231e-06,
|
|
"loss": 3.3032302856445312,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.6527331189710611,
|
|
"grad_norm": 12.123983986715679,
|
|
"learning_rate": 9.59669226145584e-06,
|
|
"loss": 2.55745267868042,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.6559485530546624,
|
|
"grad_norm": 13.044678435205267,
|
|
"learning_rate": 9.589293455782253e-06,
|
|
"loss": 3.162076950073242,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.6591639871382636,
|
|
"grad_norm": 12.717015520085477,
|
|
"learning_rate": 9.581830304169325e-06,
|
|
"loss": 3.104783535003662,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.662379421221865,
|
|
"grad_norm": 10.909068293638112,
|
|
"learning_rate": 9.574302911257021e-06,
|
|
"loss": 1.5882987976074219,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.6655948553054662,
|
|
"grad_norm": 16.856966099264525,
|
|
"learning_rate": 9.566711382586022e-06,
|
|
"loss": 3.12563157081604,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.6688102893890675,
|
|
"grad_norm": 8.178490711172556,
|
|
"learning_rate": 9.559055824596252e-06,
|
|
"loss": 2.5417189598083496,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.6720257234726688,
|
|
"grad_norm": 24.246438880890675,
|
|
"learning_rate": 9.551336344625387e-06,
|
|
"loss": 3.221043109893799,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.6752411575562701,
|
|
"grad_norm": 8.34315714768651,
|
|
"learning_rate": 9.543553050907332e-06,
|
|
"loss": 2.767820358276367,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.6784565916398714,
|
|
"grad_norm": 28.687273320967922,
|
|
"learning_rate": 9.53570605257073e-06,
|
|
"loss": 4.299283027648926,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.6816720257234726,
|
|
"grad_norm": 9.232214067792807,
|
|
"learning_rate": 9.527795459637413e-06,
|
|
"loss": 2.867112398147583,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.684887459807074,
|
|
"grad_norm": 14.46539808412858,
|
|
"learning_rate": 9.519821383020866e-06,
|
|
"loss": 2.6538944244384766,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.6881028938906752,
|
|
"grad_norm": 16.762492706972427,
|
|
"learning_rate": 9.511783934524674e-06,
|
|
"loss": 3.379420280456543,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.6913183279742765,
|
|
"grad_norm": 11.417167539665027,
|
|
"learning_rate": 9.503683226840948e-06,
|
|
"loss": 2.9065451622009277,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.6945337620578779,
|
|
"grad_norm": 8.688764296327069,
|
|
"learning_rate": 9.495519373548748e-06,
|
|
"loss": 2.9622750282287598,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.6977491961414791,
|
|
"grad_norm": 17.306753656078968,
|
|
"learning_rate": 9.487292489112497e-06,
|
|
"loss": 3.1384975910186768,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.7009646302250804,
|
|
"grad_norm": 9.278446527178623,
|
|
"learning_rate": 9.479002688880362e-06,
|
|
"loss": 3.00459623336792,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.7041800643086816,
|
|
"grad_norm": 8.829166062791868,
|
|
"learning_rate": 9.470650089082649e-06,
|
|
"loss": 3.349134922027588,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.707395498392283,
|
|
"grad_norm": 8.252383756187383,
|
|
"learning_rate": 9.462234806830172e-06,
|
|
"loss": 2.8735666275024414,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.7106109324758842,
|
|
"grad_norm": 10.10461049277595,
|
|
"learning_rate": 9.453756960112605e-06,
|
|
"loss": 2.7908334732055664,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.7138263665594855,
|
|
"grad_norm": 11.263066058506535,
|
|
"learning_rate": 9.445216667796833e-06,
|
|
"loss": 2.5783145427703857,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.7170418006430869,
|
|
"grad_norm": 19.60227612362862,
|
|
"learning_rate": 9.436614049625277e-06,
|
|
"loss": 2.7622079849243164,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.7202572347266881,
|
|
"grad_norm": 20.895857573617988,
|
|
"learning_rate": 9.42794922621423e-06,
|
|
"loss": 3.2321324348449707,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.7234726688102894,
|
|
"grad_norm": 23.420564933331203,
|
|
"learning_rate": 9.419222319052154e-06,
|
|
"loss": 2.5579419136047363,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.7266881028938906,
|
|
"grad_norm": 12.853985003804183,
|
|
"learning_rate": 9.410433450497977e-06,
|
|
"loss": 3.260056495666504,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.729903536977492,
|
|
"grad_norm": 11.182131927651461,
|
|
"learning_rate": 9.401582743779384e-06,
|
|
"loss": 2.7521541118621826,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.7331189710610932,
|
|
"grad_norm": 19.802196143776204,
|
|
"learning_rate": 9.392670322991085e-06,
|
|
"loss": 2.986008644104004,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.7363344051446945,
|
|
"grad_norm": 10.66119164095427,
|
|
"learning_rate": 9.383696313093073e-06,
|
|
"loss": 3.1167283058166504,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.7395498392282959,
|
|
"grad_norm": 8.87831230718112,
|
|
"learning_rate": 9.374660839908881e-06,
|
|
"loss": 3.0863423347473145,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.7427652733118971,
|
|
"grad_norm": 16.804824808938967,
|
|
"learning_rate": 9.365564030123802e-06,
|
|
"loss": 3.310753345489502,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.7459807073954984,
|
|
"grad_norm": 14.686714392325005,
|
|
"learning_rate": 9.356406011283128e-06,
|
|
"loss": 2.494666576385498,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.7491961414790996,
|
|
"grad_norm": 12.638367443704174,
|
|
"learning_rate": 9.34718691179036e-06,
|
|
"loss": 3.037945508956909,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.752411575562701,
|
|
"grad_norm": 26.920374247029958,
|
|
"learning_rate": 9.337906860905394e-06,
|
|
"loss": 3.584845542907715,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.7556270096463023,
|
|
"grad_norm": 6.708394075133507,
|
|
"learning_rate": 9.328565988742723e-06,
|
|
"loss": 2.984691619873047,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.7588424437299035,
|
|
"grad_norm": 11.729278792048037,
|
|
"learning_rate": 9.31916442626961e-06,
|
|
"loss": 2.795103073120117,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.7620578778135049,
|
|
"grad_norm": 7.697881178490154,
|
|
"learning_rate": 9.30970230530425e-06,
|
|
"loss": 2.5230283737182617,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.7652733118971061,
|
|
"grad_norm": 8.848418169366562,
|
|
"learning_rate": 9.300179758513912e-06,
|
|
"loss": 2.849795341491699,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.7684887459807074,
|
|
"grad_norm": 9.64788428575172,
|
|
"learning_rate": 9.290596919413101e-06,
|
|
"loss": 2.4834206104278564,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.7717041800643086,
|
|
"grad_norm": 11.151899506614933,
|
|
"learning_rate": 9.280953922361667e-06,
|
|
"loss": 2.7844467163085938,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.77491961414791,
|
|
"grad_norm": 13.100289974037779,
|
|
"learning_rate": 9.271250902562925e-06,
|
|
"loss": 3.224181890487671,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.7781350482315113,
|
|
"grad_norm": 11.686317061932591,
|
|
"learning_rate": 9.26148799606177e-06,
|
|
"loss": 2.7271580696105957,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.7813504823151125,
|
|
"grad_norm": 17.10133837407015,
|
|
"learning_rate": 9.251665339742751e-06,
|
|
"loss": 2.834172248840332,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.7845659163987139,
|
|
"grad_norm": 9.175423032974624,
|
|
"learning_rate": 9.24178307132817e-06,
|
|
"loss": 2.8642072677612305,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.7877813504823151,
|
|
"grad_norm": 10.563682693957642,
|
|
"learning_rate": 9.231841329376142e-06,
|
|
"loss": 2.961083173751831,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.7909967845659164,
|
|
"grad_norm": 8.694998203842605,
|
|
"learning_rate": 9.22184025327865e-06,
|
|
"loss": 3.1648690700531006,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.7942122186495176,
|
|
"grad_norm": 13.321968130939467,
|
|
"learning_rate": 9.211779983259597e-06,
|
|
"loss": 2.65283203125,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.797427652733119,
|
|
"grad_norm": 8.370015100382718,
|
|
"learning_rate": 9.201660660372835e-06,
|
|
"loss": 2.8571534156799316,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.8006430868167203,
|
|
"grad_norm": 13.1283440171712,
|
|
"learning_rate": 9.191482426500192e-06,
|
|
"loss": 3.076096534729004,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.8038585209003215,
|
|
"grad_norm": 12.58763191241908,
|
|
"learning_rate": 9.181245424349477e-06,
|
|
"loss": 3.0239181518554688,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8070739549839229,
|
|
"grad_norm": 10.995319596435253,
|
|
"learning_rate": 9.170949797452481e-06,
|
|
"loss": 2.6239326000213623,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.8102893890675241,
|
|
"grad_norm": 9.225322055367544,
|
|
"learning_rate": 9.160595690162974e-06,
|
|
"loss": 2.5555763244628906,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.8135048231511254,
|
|
"grad_norm": 11.507809934321713,
|
|
"learning_rate": 9.15018324765466e-06,
|
|
"loss": 2.371147871017456,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.8167202572347267,
|
|
"grad_norm": 12.469884255360506,
|
|
"learning_rate": 9.139712615919163e-06,
|
|
"loss": 2.494548797607422,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.819935691318328,
|
|
"grad_norm": 10.895498922897257,
|
|
"learning_rate": 9.129183941763971e-06,
|
|
"loss": 2.974158525466919,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.8231511254019293,
|
|
"grad_norm": 9.163142573864613,
|
|
"learning_rate": 9.118597372810374e-06,
|
|
"loss": 2.792419672012329,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.8263665594855305,
|
|
"grad_norm": 7.256984326103208,
|
|
"learning_rate": 9.107953057491399e-06,
|
|
"loss": 2.502934694290161,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.8295819935691319,
|
|
"grad_norm": 9.28895883091526,
|
|
"learning_rate": 9.09725114504973e-06,
|
|
"loss": 3.282193899154663,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.8327974276527331,
|
|
"grad_norm": 12.653451743754152,
|
|
"learning_rate": 9.086491785535613e-06,
|
|
"loss": 2.6221415996551514,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.8360128617363344,
|
|
"grad_norm": 12.157053154731505,
|
|
"learning_rate": 9.07567512980475e-06,
|
|
"loss": 2.594520330429077,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.8392282958199357,
|
|
"grad_norm": 16.45711676935307,
|
|
"learning_rate": 9.064801329516192e-06,
|
|
"loss": 3.279817819595337,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.842443729903537,
|
|
"grad_norm": 13.973378629771124,
|
|
"learning_rate": 9.053870537130198e-06,
|
|
"loss": 3.408698558807373,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.8456591639871383,
|
|
"grad_norm": 16.62805609651466,
|
|
"learning_rate": 9.042882905906118e-06,
|
|
"loss": 2.918642997741699,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.8488745980707395,
|
|
"grad_norm": 9.44966384136106,
|
|
"learning_rate": 9.03183858990022e-06,
|
|
"loss": 2.65049409866333,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.8520900321543409,
|
|
"grad_norm": 13.363066970747031,
|
|
"learning_rate": 9.020737743963555e-06,
|
|
"loss": 3.2480692863464355,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.8553054662379421,
|
|
"grad_norm": 12.666421815128665,
|
|
"learning_rate": 9.009580523739763e-06,
|
|
"loss": 3.1771302223205566,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.8585209003215434,
|
|
"grad_norm": 9.17254667692124,
|
|
"learning_rate": 8.998367085662908e-06,
|
|
"loss": 2.5076744556427,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.8617363344051447,
|
|
"grad_norm": 10.593922036651545,
|
|
"learning_rate": 8.987097586955276e-06,
|
|
"loss": 2.7998993396759033,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.864951768488746,
|
|
"grad_norm": 14.910537978617421,
|
|
"learning_rate": 8.97577218562517e-06,
|
|
"loss": 2.735177755355835,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.8681672025723473,
|
|
"grad_norm": 7.437460777250785,
|
|
"learning_rate": 8.964391040464699e-06,
|
|
"loss": 2.8440442085266113,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.8713826366559485,
|
|
"grad_norm": 9.856724829654736,
|
|
"learning_rate": 8.952954311047554e-06,
|
|
"loss": 3.18597674369812,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.8745980707395499,
|
|
"grad_norm": 12.798619966866845,
|
|
"learning_rate": 8.941462157726757e-06,
|
|
"loss": 2.6696226596832275,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.8778135048231511,
|
|
"grad_norm": 10.558246728168221,
|
|
"learning_rate": 8.92991474163243e-06,
|
|
"loss": 3.1313624382019043,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.8810289389067524,
|
|
"grad_norm": 73.1769401363515,
|
|
"learning_rate": 8.918312224669523e-06,
|
|
"loss": 2.7278852462768555,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.8842443729903537,
|
|
"grad_norm": 14.250721035805938,
|
|
"learning_rate": 8.906654769515551e-06,
|
|
"loss": 3.123018264770508,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.887459807073955,
|
|
"grad_norm": 13.624305251997455,
|
|
"learning_rate": 8.89494253961831e-06,
|
|
"loss": 2.5999131202697754,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.8906752411575563,
|
|
"grad_norm": 7.8025103094357,
|
|
"learning_rate": 8.883175699193589e-06,
|
|
"loss": 3.3754043579101562,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.8938906752411575,
|
|
"grad_norm": 14.813200674717987,
|
|
"learning_rate": 8.871354413222859e-06,
|
|
"loss": 2.477343797683716,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.8971061093247589,
|
|
"grad_norm": 16.927933274503733,
|
|
"learning_rate": 8.85947884745097e-06,
|
|
"loss": 3.132758140563965,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.9003215434083601,
|
|
"grad_norm": 7.191780346342989,
|
|
"learning_rate": 8.847549168383823e-06,
|
|
"loss": 2.820451021194458,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.9035369774919614,
|
|
"grad_norm": 8.389707727309089,
|
|
"learning_rate": 8.835565543286031e-06,
|
|
"loss": 2.6014206409454346,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.9067524115755627,
|
|
"grad_norm": 11.378077478241833,
|
|
"learning_rate": 8.82352814017858e-06,
|
|
"loss": 2.541848659515381,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.909967845659164,
|
|
"grad_norm": 8.237217625528718,
|
|
"learning_rate": 8.811437127836477e-06,
|
|
"loss": 2.961427688598633,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.9131832797427653,
|
|
"grad_norm": 14.816053043362626,
|
|
"learning_rate": 8.799292675786365e-06,
|
|
"loss": 2.8450682163238525,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.9163987138263665,
|
|
"grad_norm": 16.057064979343828,
|
|
"learning_rate": 8.787094954304172e-06,
|
|
"loss": 3.3827338218688965,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.9196141479099679,
|
|
"grad_norm": 10.580135043478649,
|
|
"learning_rate": 8.7748441344127e-06,
|
|
"loss": 2.749037265777588,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.9228295819935691,
|
|
"grad_norm": 13.251352277326397,
|
|
"learning_rate": 8.762540387879245e-06,
|
|
"loss": 2.7875254154205322,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.9260450160771704,
|
|
"grad_norm": 12.609710642983892,
|
|
"learning_rate": 8.75018388721318e-06,
|
|
"loss": 2.7887279987335205,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.9292604501607717,
|
|
"grad_norm": 14.143996593057057,
|
|
"learning_rate": 8.73777480566353e-06,
|
|
"loss": 3.5317232608795166,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.932475884244373,
|
|
"grad_norm": 10.953951580514532,
|
|
"learning_rate": 8.725313317216558e-06,
|
|
"loss": 2.695559024810791,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.9356913183279743,
|
|
"grad_norm": 19.652789221795818,
|
|
"learning_rate": 8.712799596593317e-06,
|
|
"loss": 3.773618698120117,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.9389067524115756,
|
|
"grad_norm": 28.646917195900233,
|
|
"learning_rate": 8.7002338192472e-06,
|
|
"loss": 2.8618407249450684,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.9421221864951769,
|
|
"grad_norm": 14.650325645648199,
|
|
"learning_rate": 8.68761616136148e-06,
|
|
"loss": 2.663022518157959,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.9453376205787781,
|
|
"grad_norm": 14.205451486366611,
|
|
"learning_rate": 8.674946799846844e-06,
|
|
"loss": 2.731468677520752,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.9485530546623794,
|
|
"grad_norm": 12.68853285402269,
|
|
"learning_rate": 8.662225912338906e-06,
|
|
"loss": 3.047337293624878,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.9517684887459807,
|
|
"grad_norm": 9.93366549634143,
|
|
"learning_rate": 8.64945367719572e-06,
|
|
"loss": 2.7521162033081055,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.954983922829582,
|
|
"grad_norm": 9.799333221523396,
|
|
"learning_rate": 8.636630273495284e-06,
|
|
"loss": 2.8862874507904053,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.9581993569131833,
|
|
"grad_norm": 9.853206555821423,
|
|
"learning_rate": 8.623755881033016e-06,
|
|
"loss": 2.4406180381774902,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.9614147909967846,
|
|
"grad_norm": 15.424788267085962,
|
|
"learning_rate": 8.61083068031925e-06,
|
|
"loss": 2.9202933311462402,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.9646302250803859,
|
|
"grad_norm": 8.470590376610176,
|
|
"learning_rate": 8.59785485257669e-06,
|
|
"loss": 2.7283151149749756,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.9678456591639871,
|
|
"grad_norm": 11.638302591399707,
|
|
"learning_rate": 8.58482857973788e-06,
|
|
"loss": 3.007424831390381,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.9710610932475884,
|
|
"grad_norm": 20.741792977624527,
|
|
"learning_rate": 8.571752044442645e-06,
|
|
"loss": 3.5147528648376465,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.9742765273311897,
|
|
"grad_norm": 13.142922472996343,
|
|
"learning_rate": 8.558625430035537e-06,
|
|
"loss": 2.6513309478759766,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.977491961414791,
|
|
"grad_norm": 16.4573242676752,
|
|
"learning_rate": 8.54544892056326e-06,
|
|
"loss": 2.8666458129882812,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.9807073954983923,
|
|
"grad_norm": 11.252470074853187,
|
|
"learning_rate": 8.53222270077209e-06,
|
|
"loss": 3.155954360961914,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.9839228295819936,
|
|
"grad_norm": 9.53791139505571,
|
|
"learning_rate": 8.518946956105288e-06,
|
|
"loss": 2.705427646636963,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.9871382636655949,
|
|
"grad_norm": 8.995699850871455,
|
|
"learning_rate": 8.505621872700493e-06,
|
|
"loss": 2.7899868488311768,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.9903536977491961,
|
|
"grad_norm": 20.466444028142707,
|
|
"learning_rate": 8.492247637387123e-06,
|
|
"loss": 3.3897032737731934,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.9935691318327974,
|
|
"grad_norm": 24.429378092820443,
|
|
"learning_rate": 8.478824437683742e-06,
|
|
"loss": 2.9029016494750977,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.9967845659163987,
|
|
"grad_norm": 14.9356195741505,
|
|
"learning_rate": 8.465352461795443e-06,
|
|
"loss": 2.8247499465942383,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 6.9393031111913785,
|
|
"learning_rate": 8.451831898611202e-06,
|
|
"loss": 1.8072712421417236,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 1.0032154340836013,
|
|
"grad_norm": 15.619600920043508,
|
|
"learning_rate": 8.438262937701232e-06,
|
|
"loss": 1.6938456296920776,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 1.0064308681672025,
|
|
"grad_norm": 11.204028481476685,
|
|
"learning_rate": 8.424645769314324e-06,
|
|
"loss": 1.7548950910568237,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 1.0096463022508038,
|
|
"grad_norm": 11.12284902940604,
|
|
"learning_rate": 8.410980584375184e-06,
|
|
"loss": 1.2501029968261719,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 1.0128617363344052,
|
|
"grad_norm": 15.282718019978093,
|
|
"learning_rate": 8.397267574481746e-06,
|
|
"loss": 1.7188260555267334,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 1.0160771704180065,
|
|
"grad_norm": 21.75639670646837,
|
|
"learning_rate": 8.3835069319025e-06,
|
|
"loss": 1.522129774093628,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 1.0192926045016077,
|
|
"grad_norm": 24.92647139454926,
|
|
"learning_rate": 8.369698849573778e-06,
|
|
"loss": 1.9183681011199951,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 1.022508038585209,
|
|
"grad_norm": 12.732330001189787,
|
|
"learning_rate": 8.355843521097071e-06,
|
|
"loss": 1.1024775505065918,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 1.0257234726688103,
|
|
"grad_norm": 10.813488074904523,
|
|
"learning_rate": 8.341941140736292e-06,
|
|
"loss": 1.8346397876739502,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 1.0289389067524115,
|
|
"grad_norm": 13.745993996373599,
|
|
"learning_rate": 8.327991903415071e-06,
|
|
"loss": 1.5781948566436768,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 1.0321543408360128,
|
|
"grad_norm": 10.618379447829842,
|
|
"learning_rate": 8.313996004714007e-06,
|
|
"loss": 2.0449671745300293,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 1.0353697749196142,
|
|
"grad_norm": 30.15610599886493,
|
|
"learning_rate": 8.29995364086794e-06,
|
|
"loss": 1.843071460723877,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 1.0385852090032155,
|
|
"grad_norm": 11.641323891727778,
|
|
"learning_rate": 8.285865008763185e-06,
|
|
"loss": 1.9475151300430298,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 1.0418006430868167,
|
|
"grad_norm": 10.055339873855122,
|
|
"learning_rate": 8.271730305934781e-06,
|
|
"loss": 1.6771236658096313,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 1.045016077170418,
|
|
"grad_norm": 19.242811918496102,
|
|
"learning_rate": 8.257549730563726e-06,
|
|
"loss": 1.799647331237793,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 1.0482315112540193,
|
|
"grad_norm": 9.640296031149678,
|
|
"learning_rate": 8.24332348147418e-06,
|
|
"loss": 1.5140706300735474,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 1.0514469453376205,
|
|
"grad_norm": 17.52024512085222,
|
|
"learning_rate": 8.229051758130697e-06,
|
|
"loss": 1.7540702819824219,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 1.0546623794212218,
|
|
"grad_norm": 12.09956162787055,
|
|
"learning_rate": 8.214734760635418e-06,
|
|
"loss": 1.0229500532150269,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 1.0578778135048232,
|
|
"grad_norm": 9.27508841346202,
|
|
"learning_rate": 8.200372689725265e-06,
|
|
"loss": 1.9475460052490234,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 1.0610932475884245,
|
|
"grad_norm": 19.21666339915877,
|
|
"learning_rate": 8.185965746769134e-06,
|
|
"loss": 1.4429758787155151,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 1.0643086816720257,
|
|
"grad_norm": 13.399563404218933,
|
|
"learning_rate": 8.171514133765062e-06,
|
|
"loss": 2.7338225841522217,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 1.067524115755627,
|
|
"grad_norm": 13.76033185328455,
|
|
"learning_rate": 8.157018053337401e-06,
|
|
"loss": 2.250379800796509,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 1.0707395498392283,
|
|
"grad_norm": 7.631778726340121,
|
|
"learning_rate": 8.142477708733977e-06,
|
|
"loss": 1.337722659111023,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 1.0739549839228295,
|
|
"grad_norm": 13.880528130342933,
|
|
"learning_rate": 8.127893303823237e-06,
|
|
"loss": 1.0721861124038696,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 1.077170418006431,
|
|
"grad_norm": 12.862974801515321,
|
|
"learning_rate": 8.113265043091393e-06,
|
|
"loss": 1.4026358127593994,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 1.0803858520900322,
|
|
"grad_norm": 9.49382053942382,
|
|
"learning_rate": 8.098593131639555e-06,
|
|
"loss": 1.9554322957992554,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 1.0836012861736335,
|
|
"grad_norm": 9.28245720635116,
|
|
"learning_rate": 8.083877775180851e-06,
|
|
"loss": 1.3829636573791504,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 1.0868167202572347,
|
|
"grad_norm": 9.50422506559844,
|
|
"learning_rate": 8.06911918003755e-06,
|
|
"loss": 1.4771521091461182,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 1.090032154340836,
|
|
"grad_norm": 14.885902266047808,
|
|
"learning_rate": 8.054317553138164e-06,
|
|
"loss": 1.9953043460845947,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 1.0932475884244373,
|
|
"grad_norm": 21.84891512532416,
|
|
"learning_rate": 8.039473102014552e-06,
|
|
"loss": 1.8568346500396729,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 1.0964630225080385,
|
|
"grad_norm": 10.983885337102643,
|
|
"learning_rate": 8.024586034798998e-06,
|
|
"loss": 1.0092703104019165,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 1.09967845659164,
|
|
"grad_norm": 9.853494378778448,
|
|
"learning_rate": 8.00965656022131e-06,
|
|
"loss": 1.3574286699295044,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 1.1028938906752412,
|
|
"grad_norm": 9.677987937855264,
|
|
"learning_rate": 7.994684887605877e-06,
|
|
"loss": 1.5812814235687256,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 1.1061093247588425,
|
|
"grad_norm": 12.943696081924468,
|
|
"learning_rate": 7.97967122686875e-06,
|
|
"loss": 1.5962257385253906,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 1.1093247588424437,
|
|
"grad_norm": 11.101835108804991,
|
|
"learning_rate": 7.964615788514683e-06,
|
|
"loss": 1.9959800243377686,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 1.112540192926045,
|
|
"grad_norm": 9.080973101963357,
|
|
"learning_rate": 7.949518783634191e-06,
|
|
"loss": 1.4967670440673828,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 1.1157556270096463,
|
|
"grad_norm": 13.434521540385626,
|
|
"learning_rate": 7.934380423900591e-06,
|
|
"loss": 1.5423383712768555,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 1.1189710610932475,
|
|
"grad_norm": 10.76431702775392,
|
|
"learning_rate": 7.919200921567029e-06,
|
|
"loss": 0.7710652351379395,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 1.122186495176849,
|
|
"grad_norm": 9.814715538896447,
|
|
"learning_rate": 7.903980489463507e-06,
|
|
"loss": 1.6633532047271729,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 1.1254019292604502,
|
|
"grad_norm": 19.545538078520554,
|
|
"learning_rate": 7.8887193409939e-06,
|
|
"loss": 2.7766261100769043,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 1.1286173633440515,
|
|
"grad_norm": 9.41961139939792,
|
|
"learning_rate": 7.87341769013296e-06,
|
|
"loss": 1.6282963752746582,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 1.1318327974276527,
|
|
"grad_norm": 21.731938311350465,
|
|
"learning_rate": 7.858075751423319e-06,
|
|
"loss": 1.9692919254302979,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 1.135048231511254,
|
|
"grad_norm": 16.44935380857812,
|
|
"learning_rate": 7.84269373997248e-06,
|
|
"loss": 1.912774682044983,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 1.1382636655948553,
|
|
"grad_norm": 13.72564993302605,
|
|
"learning_rate": 7.827271871449803e-06,
|
|
"loss": 1.8513641357421875,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 1.1414790996784565,
|
|
"grad_norm": 14.127395536555673,
|
|
"learning_rate": 7.811810362083476e-06,
|
|
"loss": 1.7648791074752808,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 1.144694533762058,
|
|
"grad_norm": 11.466500120151354,
|
|
"learning_rate": 7.79630942865749e-06,
|
|
"loss": 1.8777326345443726,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 1.1479099678456592,
|
|
"grad_norm": 16.23925105600561,
|
|
"learning_rate": 7.780769288508594e-06,
|
|
"loss": 1.4060571193695068,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 1.1511254019292605,
|
|
"grad_norm": 13.429215394228326,
|
|
"learning_rate": 7.76519015952325e-06,
|
|
"loss": 1.8052624464035034,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 1.1543408360128617,
|
|
"grad_norm": 11.074667268910995,
|
|
"learning_rate": 7.749572260134578e-06,
|
|
"loss": 1.251237392425537,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 1.157556270096463,
|
|
"grad_norm": 9.520190196974967,
|
|
"learning_rate": 7.733915809319295e-06,
|
|
"loss": 1.7426929473876953,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 1.1607717041800643,
|
|
"grad_norm": 13.259610020931154,
|
|
"learning_rate": 7.718221026594638e-06,
|
|
"loss": 1.3306620121002197,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 1.1639871382636655,
|
|
"grad_norm": 11.55865396171068,
|
|
"learning_rate": 7.7024881320153e-06,
|
|
"loss": 1.1201272010803223,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 1.167202572347267,
|
|
"grad_norm": 18.047432249398582,
|
|
"learning_rate": 7.686717346170323e-06,
|
|
"loss": 1.2232224941253662,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 1.1704180064308682,
|
|
"grad_norm": 19.950356361520544,
|
|
"learning_rate": 7.67090889018003e-06,
|
|
"loss": 1.5430934429168701,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 1.1736334405144695,
|
|
"grad_norm": 11.821218809655774,
|
|
"learning_rate": 7.655062985692905e-06,
|
|
"loss": 1.1287882328033447,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 1.1768488745980707,
|
|
"grad_norm": 16.3870537176483,
|
|
"learning_rate": 7.639179854882499e-06,
|
|
"loss": 1.8235803842544556,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 1.180064308681672,
|
|
"grad_norm": 12.215851800288341,
|
|
"learning_rate": 7.623259720444305e-06,
|
|
"loss": 1.5709896087646484,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 1.1832797427652733,
|
|
"grad_norm": 10.179196448113856,
|
|
"learning_rate": 7.6073028055926375e-06,
|
|
"loss": 1.3701753616333008,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 1.1864951768488745,
|
|
"grad_norm": 13.266482945914282,
|
|
"learning_rate": 7.591309334057511e-06,
|
|
"loss": 1.8047711849212646,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 1.189710610932476,
|
|
"grad_norm": 12.558221362384941,
|
|
"learning_rate": 7.5752795300814915e-06,
|
|
"loss": 1.3536028861999512,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 1.1929260450160772,
|
|
"grad_norm": 8.288395487014714,
|
|
"learning_rate": 7.5592136184165586e-06,
|
|
"loss": 1.7526684999465942,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 1.1961414790996785,
|
|
"grad_norm": 32.312646691375775,
|
|
"learning_rate": 7.543111824320956e-06,
|
|
"loss": 1.9506163597106934,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 1.1993569131832797,
|
|
"grad_norm": 18.190358055738578,
|
|
"learning_rate": 7.526974373556031e-06,
|
|
"loss": 1.4558610916137695,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 1.202572347266881,
|
|
"grad_norm": 22.456941575166663,
|
|
"learning_rate": 7.510801492383064e-06,
|
|
"loss": 3.8982784748077393,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 1.2057877813504823,
|
|
"grad_norm": 9.286565530142028,
|
|
"learning_rate": 7.494593407560105e-06,
|
|
"loss": 1.4054985046386719,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 1.2090032154340835,
|
|
"grad_norm": 8.57272869781517,
|
|
"learning_rate": 7.4783503463387915e-06,
|
|
"loss": 1.4290287494659424,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 1.212218649517685,
|
|
"grad_norm": 25.62750132146527,
|
|
"learning_rate": 7.462072536461158e-06,
|
|
"loss": 2.6208114624023438,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 1.2154340836012862,
|
|
"grad_norm": 29.086655268807295,
|
|
"learning_rate": 7.445760206156443e-06,
|
|
"loss": 1.4879982471466064,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 1.2186495176848875,
|
|
"grad_norm": 10.92323075616016,
|
|
"learning_rate": 7.429413584137899e-06,
|
|
"loss": 1.5636539459228516,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 1.2218649517684887,
|
|
"grad_norm": 131.58390317452395,
|
|
"learning_rate": 7.413032899599575e-06,
|
|
"loss": 2.5596606731414795,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 1.22508038585209,
|
|
"grad_norm": 10.911077112816377,
|
|
"learning_rate": 7.3966183822131055e-06,
|
|
"loss": 1.7480653524398804,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 1.2282958199356913,
|
|
"grad_norm": 14.750459586463144,
|
|
"learning_rate": 7.380170262124491e-06,
|
|
"loss": 1.1983137130737305,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 1.2315112540192925,
|
|
"grad_norm": 10.193526385520498,
|
|
"learning_rate": 7.363688769950874e-06,
|
|
"loss": 2.4147493839263916,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 1.234726688102894,
|
|
"grad_norm": 8.110765389360624,
|
|
"learning_rate": 7.347174136777303e-06,
|
|
"loss": 1.5930522680282593,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 1.2379421221864952,
|
|
"grad_norm": 13.379838012098837,
|
|
"learning_rate": 7.33062659415349e-06,
|
|
"loss": 1.545811414718628,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 1.2411575562700965,
|
|
"grad_norm": 12.491186334322332,
|
|
"learning_rate": 7.314046374090569e-06,
|
|
"loss": 1.4453141689300537,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 1.2443729903536977,
|
|
"grad_norm": 11.673353502682332,
|
|
"learning_rate": 7.297433709057837e-06,
|
|
"loss": 1.6127538681030273,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 1.247588424437299,
|
|
"grad_norm": 14.231888382187387,
|
|
"learning_rate": 7.280788831979504e-06,
|
|
"loss": 1.3530693054199219,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 1.2508038585209003,
|
|
"grad_norm": 8.015144095452492,
|
|
"learning_rate": 7.264111976231416e-06,
|
|
"loss": 2.167546510696411,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 1.2540192926045015,
|
|
"grad_norm": 6.8301669716274125,
|
|
"learning_rate": 7.247403375637789e-06,
|
|
"loss": 1.1884214878082275,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 1.257234726688103,
|
|
"grad_norm": 9.740309887687731,
|
|
"learning_rate": 7.230663264467932e-06,
|
|
"loss": 1.8475289344787598,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 1.2604501607717042,
|
|
"grad_norm": 15.839480265530513,
|
|
"learning_rate": 7.213891877432957e-06,
|
|
"loss": 1.3596510887145996,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 1.2636655948553055,
|
|
"grad_norm": 8.14484618288492,
|
|
"learning_rate": 7.197089449682495e-06,
|
|
"loss": 1.1813664436340332,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 1.2668810289389068,
|
|
"grad_norm": 14.070099542557305,
|
|
"learning_rate": 7.180256216801392e-06,
|
|
"loss": 1.5710866451263428,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 1.270096463022508,
|
|
"grad_norm": 11.220505541098138,
|
|
"learning_rate": 7.163392414806409e-06,
|
|
"loss": 1.1523092985153198,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 1.2733118971061093,
|
|
"grad_norm": 9.667370363546029,
|
|
"learning_rate": 7.146498280142917e-06,
|
|
"loss": 1.584272861480713,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 1.2765273311897105,
|
|
"grad_norm": 7.714526098855273,
|
|
"learning_rate": 7.1295740496815715e-06,
|
|
"loss": 1.8063056468963623,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 1.279742765273312,
|
|
"grad_norm": 10.787378083877842,
|
|
"learning_rate": 7.112619960715004e-06,
|
|
"loss": 1.792249083518982,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 1.2829581993569132,
|
|
"grad_norm": 13.233918817558243,
|
|
"learning_rate": 7.095636250954481e-06,
|
|
"loss": 1.5670925378799438,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 1.2861736334405145,
|
|
"grad_norm": 11.79576476009515,
|
|
"learning_rate": 7.078623158526588e-06,
|
|
"loss": 0.8326504230499268,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 1.2893890675241158,
|
|
"grad_norm": 10.491299911652822,
|
|
"learning_rate": 7.061580921969875e-06,
|
|
"loss": 1.3786863088607788,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 1.292604501607717,
|
|
"grad_norm": 11.94691709161536,
|
|
"learning_rate": 7.044509780231517e-06,
|
|
"loss": 1.1054686307907104,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 1.2958199356913183,
|
|
"grad_norm": 12.08443122924739,
|
|
"learning_rate": 7.027409972663972e-06,
|
|
"loss": 1.670744776725769,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 1.2990353697749195,
|
|
"grad_norm": 12.818583379341405,
|
|
"learning_rate": 7.010281739021612e-06,
|
|
"loss": 1.4025758504867554,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 1.302250803858521,
|
|
"grad_norm": 17.63344093685557,
|
|
"learning_rate": 6.993125319457371e-06,
|
|
"loss": 1.3583598136901855,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 1.3054662379421222,
|
|
"grad_norm": 11.894949620821711,
|
|
"learning_rate": 6.975940954519372e-06,
|
|
"loss": 1.1004712581634521,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 1.3086816720257235,
|
|
"grad_norm": 12.892080489602101,
|
|
"learning_rate": 6.958728885147559e-06,
|
|
"loss": 1.2955303192138672,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 1.3118971061093248,
|
|
"grad_norm": 19.36393889877359,
|
|
"learning_rate": 6.941489352670315e-06,
|
|
"loss": 1.0670486688613892,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 1.315112540192926,
|
|
"grad_norm": 8.211056728192768,
|
|
"learning_rate": 6.92422259880108e-06,
|
|
"loss": 1.9638640880584717,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 1.3183279742765273,
|
|
"grad_norm": 10.833159131088287,
|
|
"learning_rate": 6.9069288656349654e-06,
|
|
"loss": 1.5076138973236084,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 1.3215434083601285,
|
|
"grad_norm": 8.58513007343669,
|
|
"learning_rate": 6.8896083956453495e-06,
|
|
"loss": 1.0038764476776123,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 1.32475884244373,
|
|
"grad_norm": 10.831768155951462,
|
|
"learning_rate": 6.87226143168049e-06,
|
|
"loss": 1.8994210958480835,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 1.3279742765273312,
|
|
"grad_norm": 9.967816587412674,
|
|
"learning_rate": 6.8548882169601125e-06,
|
|
"loss": 1.7425453662872314,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 1.3311897106109325,
|
|
"grad_norm": 11.573652006078895,
|
|
"learning_rate": 6.837488995071999e-06,
|
|
"loss": 0.9079417586326599,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 1.3344051446945338,
|
|
"grad_norm": 11.435087576974311,
|
|
"learning_rate": 6.820064009968577e-06,
|
|
"loss": 2.3625776767730713,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 1.337620578778135,
|
|
"grad_norm": 11.687306169218841,
|
|
"learning_rate": 6.802613505963496e-06,
|
|
"loss": 1.4717687368392944,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 1.3408360128617363,
|
|
"grad_norm": 7.907999407954611,
|
|
"learning_rate": 6.7851377277282025e-06,
|
|
"loss": 1.8274226188659668,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 1.3440514469453375,
|
|
"grad_norm": 13.33807600903979,
|
|
"learning_rate": 6.767636920288514e-06,
|
|
"loss": 0.9393669366836548,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 1.347266881028939,
|
|
"grad_norm": 18.07910651280989,
|
|
"learning_rate": 6.7501113290211715e-06,
|
|
"loss": 2.2995893955230713,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 1.3504823151125402,
|
|
"grad_norm": 13.219145457947517,
|
|
"learning_rate": 6.732561199650417e-06,
|
|
"loss": 0.9605876207351685,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 1.3536977491961415,
|
|
"grad_norm": 9.84083040492843,
|
|
"learning_rate": 6.71498677824453e-06,
|
|
"loss": 1.400538682937622,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 1.3569131832797428,
|
|
"grad_norm": 8.770195722914984,
|
|
"learning_rate": 6.69738831121239e-06,
|
|
"loss": 1.5933213233947754,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 1.360128617363344,
|
|
"grad_norm": 13.726188714507023,
|
|
"learning_rate": 6.679766045300017e-06,
|
|
"loss": 1.3214967250823975,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 1.3633440514469453,
|
|
"grad_norm": 11.733403335264603,
|
|
"learning_rate": 6.66212022758711e-06,
|
|
"loss": 1.8377995491027832,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 1.3665594855305465,
|
|
"grad_norm": 10.482810176903879,
|
|
"learning_rate": 6.644451105483588e-06,
|
|
"loss": 2.171910047531128,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 1.369774919614148,
|
|
"grad_norm": 10.272158746941752,
|
|
"learning_rate": 6.626758926726118e-06,
|
|
"loss": 1.01298987865448,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 1.3729903536977492,
|
|
"grad_norm": 10.850492383150184,
|
|
"learning_rate": 6.609043939374638e-06,
|
|
"loss": 1.3563112020492554,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 1.3762057877813505,
|
|
"grad_norm": 9.52772114565326,
|
|
"learning_rate": 6.591306391808886e-06,
|
|
"loss": 1.1524646282196045,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 1.3794212218649518,
|
|
"grad_norm": 9.634527311630647,
|
|
"learning_rate": 6.5735465327249125e-06,
|
|
"loss": 1.288474202156067,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 1.382636655948553,
|
|
"grad_norm": 15.933483184745002,
|
|
"learning_rate": 6.555764611131599e-06,
|
|
"loss": 1.6986860036849976,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 1.3858520900321543,
|
|
"grad_norm": 12.386422766725826,
|
|
"learning_rate": 6.537960876347155e-06,
|
|
"loss": 1.1839923858642578,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 1.3890675241157555,
|
|
"grad_norm": 10.253938139276935,
|
|
"learning_rate": 6.520135577995636e-06,
|
|
"loss": 1.0426067113876343,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 1.392282958199357,
|
|
"grad_norm": 11.04901140382035,
|
|
"learning_rate": 6.502288966003437e-06,
|
|
"loss": 1.5624547004699707,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 1.3954983922829582,
|
|
"grad_norm": 11.25738538359911,
|
|
"learning_rate": 6.48442129059579e-06,
|
|
"loss": 1.6425249576568604,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 1.3987138263665595,
|
|
"grad_norm": 7.369100897838746,
|
|
"learning_rate": 6.4665328022932505e-06,
|
|
"loss": 1.1195060014724731,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 1.4019292604501608,
|
|
"grad_norm": 8.920734482221718,
|
|
"learning_rate": 6.448623751908193e-06,
|
|
"loss": 1.9613807201385498,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 1.405144694533762,
|
|
"grad_norm": 17.2717195962134,
|
|
"learning_rate": 6.43069439054129e-06,
|
|
"loss": 1.9818463325500488,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 1.4083601286173635,
|
|
"grad_norm": 10.291894233810238,
|
|
"learning_rate": 6.4127449695779894e-06,
|
|
"loss": 1.321189284324646,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 1.4115755627009645,
|
|
"grad_norm": 14.652832324526607,
|
|
"learning_rate": 6.394775740684996e-06,
|
|
"loss": 1.653143048286438,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 1.414790996784566,
|
|
"grad_norm": 7.091089961179019,
|
|
"learning_rate": 6.376786955806735e-06,
|
|
"loss": 1.3114051818847656,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 1.4180064308681672,
|
|
"grad_norm": 8.725539866872378,
|
|
"learning_rate": 6.358778867161829e-06,
|
|
"loss": 1.6939886808395386,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 1.4212218649517685,
|
|
"grad_norm": 13.545006306769176,
|
|
"learning_rate": 6.340751727239551e-06,
|
|
"loss": 1.195266842842102,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 1.4244372990353698,
|
|
"grad_norm": 8.297440104157152,
|
|
"learning_rate": 6.322705788796293e-06,
|
|
"loss": 1.6877739429473877,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 1.427652733118971,
|
|
"grad_norm": 9.61755742164904,
|
|
"learning_rate": 6.304641304852017e-06,
|
|
"loss": 1.8234769105911255,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 1.4308681672025725,
|
|
"grad_norm": 8.53099814778349,
|
|
"learning_rate": 6.286558528686713e-06,
|
|
"loss": 1.707655429840088,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 1.4340836012861735,
|
|
"grad_norm": 16.09095041323655,
|
|
"learning_rate": 6.268457713836839e-06,
|
|
"loss": 1.685136318206787,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 1.437299035369775,
|
|
"grad_norm": 9.220851793207403,
|
|
"learning_rate": 6.250339114091775e-06,
|
|
"loss": 1.8174901008605957,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 1.4405144694533762,
|
|
"grad_norm": 16.91990210354561,
|
|
"learning_rate": 6.2322029834902565e-06,
|
|
"loss": 1.2479822635650635,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 1.4437299035369775,
|
|
"grad_norm": 18.222539873072662,
|
|
"learning_rate": 6.214049576316824e-06,
|
|
"loss": 2.1364529132843018,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 1.4469453376205788,
|
|
"grad_norm": 8.829662303717768,
|
|
"learning_rate": 6.195879147098246e-06,
|
|
"loss": 1.1708037853240967,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 1.45016077170418,
|
|
"grad_norm": 9.811681806402762,
|
|
"learning_rate": 6.177691950599953e-06,
|
|
"loss": 1.7463445663452148,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 1.4533762057877815,
|
|
"grad_norm": 15.76003868979169,
|
|
"learning_rate": 6.159488241822473e-06,
|
|
"loss": 1.4239546060562134,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 1.4565916398713825,
|
|
"grad_norm": 13.819031689596322,
|
|
"learning_rate": 6.141268275997848e-06,
|
|
"loss": 1.0984452962875366,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 1.459807073954984,
|
|
"grad_norm": 9.216205799504845,
|
|
"learning_rate": 6.123032308586059e-06,
|
|
"loss": 1.6430256366729736,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 1.4630225080385852,
|
|
"grad_norm": 12.706806062500931,
|
|
"learning_rate": 6.10478059527144e-06,
|
|
"loss": 1.4744967222213745,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 1.4662379421221865,
|
|
"grad_norm": 9.724752942795591,
|
|
"learning_rate": 6.086513391959101e-06,
|
|
"loss": 1.6440513134002686,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 1.4694533762057878,
|
|
"grad_norm": 12.12251316195964,
|
|
"learning_rate": 6.068230954771334e-06,
|
|
"loss": 1.4198493957519531,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 1.472668810289389,
|
|
"grad_norm": 10.329929581969795,
|
|
"learning_rate": 6.0499335400440216e-06,
|
|
"loss": 1.5990746021270752,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 1.4758842443729905,
|
|
"grad_norm": 18.5583606495554,
|
|
"learning_rate": 6.031621404323046e-06,
|
|
"loss": 1.1847198009490967,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 1.4790996784565915,
|
|
"grad_norm": 15.993991523648914,
|
|
"learning_rate": 6.013294804360689e-06,
|
|
"loss": 0.9134633541107178,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 1.482315112540193,
|
|
"grad_norm": 9.490813889864704,
|
|
"learning_rate": 5.9949539971120405e-06,
|
|
"loss": 2.0340170860290527,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 1.4855305466237942,
|
|
"grad_norm": 11.37889692680751,
|
|
"learning_rate": 5.976599239731381e-06,
|
|
"loss": 1.8881299495697021,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 1.4887459807073955,
|
|
"grad_norm": 9.569629046002419,
|
|
"learning_rate": 5.9582307895685876e-06,
|
|
"loss": 1.7137913703918457,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 1.4919614147909968,
|
|
"grad_norm": 12.420204461040186,
|
|
"learning_rate": 5.939848904165519e-06,
|
|
"loss": 1.2723381519317627,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 1.495176848874598,
|
|
"grad_norm": 11.926795629917983,
|
|
"learning_rate": 5.9214538412524155e-06,
|
|
"loss": 1.1736359596252441,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 1.4983922829581995,
|
|
"grad_norm": 12.405298290422278,
|
|
"learning_rate": 5.903045858744271e-06,
|
|
"loss": 1.3090627193450928,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 1.5016077170418005,
|
|
"grad_norm": 15.523116313387339,
|
|
"learning_rate": 5.884625214737224e-06,
|
|
"loss": 1.4756783246994019,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 1.504823151125402,
|
|
"grad_norm": 10.93229883191674,
|
|
"learning_rate": 5.866192167504941e-06,
|
|
"loss": 1.6622803211212158,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 1.5080385852090032,
|
|
"grad_norm": 17.915289620044277,
|
|
"learning_rate": 5.84774697549499e-06,
|
|
"loss": 1.6512175798416138,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 1.5112540192926045,
|
|
"grad_norm": 30.283483282050263,
|
|
"learning_rate": 5.8292898973252246e-06,
|
|
"loss": 1.4214634895324707,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 1.5144694533762058,
|
|
"grad_norm": 15.810670228159147,
|
|
"learning_rate": 5.810821191780146e-06,
|
|
"loss": 1.1714420318603516,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 1.517684887459807,
|
|
"grad_norm": 13.058805569793217,
|
|
"learning_rate": 5.792341117807284e-06,
|
|
"loss": 1.4205889701843262,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 1.5209003215434085,
|
|
"grad_norm": 10.022823791961894,
|
|
"learning_rate": 5.773849934513568e-06,
|
|
"loss": 1.419925570487976,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 1.5241157556270095,
|
|
"grad_norm": 12.48707482451956,
|
|
"learning_rate": 5.755347901161683e-06,
|
|
"loss": 1.8584307432174683,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 1.527331189710611,
|
|
"grad_norm": 12.857583603145283,
|
|
"learning_rate": 5.736835277166446e-06,
|
|
"loss": 2.2646093368530273,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 1.5305466237942122,
|
|
"grad_norm": 10.274067777361985,
|
|
"learning_rate": 5.7183123220911615e-06,
|
|
"loss": 0.9686712622642517,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 1.5337620578778135,
|
|
"grad_norm": 29.493341811852122,
|
|
"learning_rate": 5.699779295643988e-06,
|
|
"loss": 0.964187741279602,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 1.5369774919614148,
|
|
"grad_norm": 13.55003021738102,
|
|
"learning_rate": 5.68123645767429e-06,
|
|
"loss": 1.0403389930725098,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 1.540192926045016,
|
|
"grad_norm": 12.54908295885377,
|
|
"learning_rate": 5.662684068169002e-06,
|
|
"loss": 1.6527446508407593,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 1.5434083601286175,
|
|
"grad_norm": 6.610317425915739,
|
|
"learning_rate": 5.644122387248975e-06,
|
|
"loss": 1.4375754594802856,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 1.5466237942122185,
|
|
"grad_norm": 11.06854659241844,
|
|
"learning_rate": 5.6255516751653376e-06,
|
|
"loss": 1.3001888990402222,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 1.54983922829582,
|
|
"grad_norm": 21.599505542000774,
|
|
"learning_rate": 5.606972192295841e-06,
|
|
"loss": 1.9102635383605957,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 1.5530546623794212,
|
|
"grad_norm": 11.405842693936945,
|
|
"learning_rate": 5.588384199141211e-06,
|
|
"loss": 1.1081337928771973,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 1.5562700964630225,
|
|
"grad_norm": 10.182874123460897,
|
|
"learning_rate": 5.569787956321496e-06,
|
|
"loss": 1.2650421857833862,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 1.5594855305466238,
|
|
"grad_norm": 13.134492774505976,
|
|
"learning_rate": 5.551183724572411e-06,
|
|
"loss": 1.2185710668563843,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 1.562700964630225,
|
|
"grad_norm": 15.44354876414701,
|
|
"learning_rate": 5.532571764741686e-06,
|
|
"loss": 1.2483716011047363,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 1.5659163987138265,
|
|
"grad_norm": 13.467838369490899,
|
|
"learning_rate": 5.513952337785398e-06,
|
|
"loss": 1.7064783573150635,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 1.5691318327974275,
|
|
"grad_norm": 11.973440553832287,
|
|
"learning_rate": 5.4953257047643284e-06,
|
|
"loss": 1.9306564331054688,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 1.572347266881029,
|
|
"grad_norm": 12.222087950483303,
|
|
"learning_rate": 5.476692126840287e-06,
|
|
"loss": 1.796401858329773,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 1.5755627009646302,
|
|
"grad_norm": 14.251036373638994,
|
|
"learning_rate": 5.458051865272462e-06,
|
|
"loss": 1.9565566778182983,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.5787781350482315,
|
|
"grad_norm": 12.909098298317854,
|
|
"learning_rate": 5.439405181413752e-06,
|
|
"loss": 1.7262601852416992,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 1.5819935691318328,
|
|
"grad_norm": 9.663223095722376,
|
|
"learning_rate": 5.420752336707098e-06,
|
|
"loss": 1.2656997442245483,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 1.585209003215434,
|
|
"grad_norm": 15.520726456430584,
|
|
"learning_rate": 5.402093592681823e-06,
|
|
"loss": 1.3866935968399048,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 1.5884244372990355,
|
|
"grad_norm": 10.917894975556058,
|
|
"learning_rate": 5.383429210949967e-06,
|
|
"loss": 0.8932974934577942,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 1.5916398713826365,
|
|
"grad_norm": 13.277750253841756,
|
|
"learning_rate": 5.36475945320261e-06,
|
|
"loss": 1.5216851234436035,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 1.594855305466238,
|
|
"grad_norm": 12.30741363554072,
|
|
"learning_rate": 5.346084581206215e-06,
|
|
"loss": 1.0728681087493896,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 1.5980707395498392,
|
|
"grad_norm": 29.029933055790377,
|
|
"learning_rate": 5.327404856798944e-06,
|
|
"loss": 1.913758635520935,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 1.6012861736334405,
|
|
"grad_norm": 12.42781087693241,
|
|
"learning_rate": 5.3087205418870014e-06,
|
|
"loss": 1.6854526996612549,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 1.6045016077170418,
|
|
"grad_norm": 9.018921661705617,
|
|
"learning_rate": 5.29003189844095e-06,
|
|
"loss": 1.147437334060669,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 1.607717041800643,
|
|
"grad_norm": 13.581361587228178,
|
|
"learning_rate": 5.2713391884920415e-06,
|
|
"loss": 2.265592098236084,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.6109324758842445,
|
|
"grad_norm": 9.087003042899404,
|
|
"learning_rate": 5.2526426741285465e-06,
|
|
"loss": 1.6772857904434204,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 1.6141479099678455,
|
|
"grad_norm": 12.529074715254756,
|
|
"learning_rate": 5.233942617492077e-06,
|
|
"loss": 1.633618712425232,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 1.617363344051447,
|
|
"grad_norm": 12.64649957772085,
|
|
"learning_rate": 5.215239280773908e-06,
|
|
"loss": 1.5846664905548096,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 1.6205787781350482,
|
|
"grad_norm": 8.149292574594154,
|
|
"learning_rate": 5.196532926211307e-06,
|
|
"loss": 1.4743802547454834,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 1.6237942122186495,
|
|
"grad_norm": 30.942623906686798,
|
|
"learning_rate": 5.177823816083853e-06,
|
|
"loss": 1.7887756824493408,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 1.6270096463022508,
|
|
"grad_norm": 10.656347918532477,
|
|
"learning_rate": 5.15911221270976e-06,
|
|
"loss": 1.8924931287765503,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 1.630225080385852,
|
|
"grad_norm": 11.982264969959546,
|
|
"learning_rate": 5.140398378442201e-06,
|
|
"loss": 1.5837712287902832,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 1.6334405144694535,
|
|
"grad_norm": 17.74630841676715,
|
|
"learning_rate": 5.121682575665625e-06,
|
|
"loss": 1.3969013690948486,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 1.6366559485530545,
|
|
"grad_norm": 13.757379611481515,
|
|
"learning_rate": 5.102965066792085e-06,
|
|
"loss": 1.3699804544448853,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 1.639871382636656,
|
|
"grad_norm": 9.471724169843402,
|
|
"learning_rate": 5.084246114257554e-06,
|
|
"loss": 1.3550987243652344,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.6430868167202572,
|
|
"grad_norm": 15.596683071578276,
|
|
"learning_rate": 5.065525980518244e-06,
|
|
"loss": 1.2851953506469727,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 1.6463022508038585,
|
|
"grad_norm": 10.623931822433907,
|
|
"learning_rate": 5.046804928046933e-06,
|
|
"loss": 1.3243783712387085,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 1.6495176848874598,
|
|
"grad_norm": 22.19363105957551,
|
|
"learning_rate": 5.028083219329274e-06,
|
|
"loss": 1.6754820346832275,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 1.652733118971061,
|
|
"grad_norm": 10.186022632613163,
|
|
"learning_rate": 5.009361116860129e-06,
|
|
"loss": 2.1871590614318848,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 1.6559485530546625,
|
|
"grad_norm": 14.398718167356659,
|
|
"learning_rate": 4.990638883139872e-06,
|
|
"loss": 0.9794585108757019,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 1.6591639871382635,
|
|
"grad_norm": 10.294011417163757,
|
|
"learning_rate": 4.9719167806707265e-06,
|
|
"loss": 1.329102635383606,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 1.662379421221865,
|
|
"grad_norm": 10.782193417515709,
|
|
"learning_rate": 4.953195071953069e-06,
|
|
"loss": 1.4713919162750244,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 1.6655948553054662,
|
|
"grad_norm": 20.256749627414443,
|
|
"learning_rate": 4.934474019481755e-06,
|
|
"loss": 1.6331861019134521,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 1.6688102893890675,
|
|
"grad_norm": 10.326694785789037,
|
|
"learning_rate": 4.915753885742446e-06,
|
|
"loss": 0.8953830003738403,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 1.6720257234726688,
|
|
"grad_norm": 34.510915012884716,
|
|
"learning_rate": 4.8970349332079155e-06,
|
|
"loss": 2.0606470108032227,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.67524115755627,
|
|
"grad_norm": 22.26005680178925,
|
|
"learning_rate": 4.8783174243343765e-06,
|
|
"loss": 1.9117261171340942,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 1.6784565916398715,
|
|
"grad_norm": 11.981892440704723,
|
|
"learning_rate": 4.8596016215578e-06,
|
|
"loss": 1.107745885848999,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 1.6816720257234725,
|
|
"grad_norm": 7.902869741318056,
|
|
"learning_rate": 4.8408877872902404e-06,
|
|
"loss": 2.056065559387207,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 1.684887459807074,
|
|
"grad_norm": 14.657507529582745,
|
|
"learning_rate": 4.822176183916147e-06,
|
|
"loss": 1.2189289331436157,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 1.6881028938906752,
|
|
"grad_norm": 15.323149691380317,
|
|
"learning_rate": 4.803467073788694e-06,
|
|
"loss": 1.1678478717803955,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 1.6913183279742765,
|
|
"grad_norm": 10.369820938531406,
|
|
"learning_rate": 4.7847607192260916e-06,
|
|
"loss": 1.3617630004882812,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 1.694533762057878,
|
|
"grad_norm": 16.278666347716534,
|
|
"learning_rate": 4.766057382507924e-06,
|
|
"loss": 1.8124133348464966,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 1.697749196141479,
|
|
"grad_norm": 11.617355712233303,
|
|
"learning_rate": 4.747357325871454e-06,
|
|
"loss": 1.7624785900115967,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 1.7009646302250805,
|
|
"grad_norm": 6.976469169369697,
|
|
"learning_rate": 4.72866081150796e-06,
|
|
"loss": 1.804181456565857,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 1.7041800643086815,
|
|
"grad_norm": 8.884998817103593,
|
|
"learning_rate": 4.709968101559051e-06,
|
|
"loss": 1.8973114490509033,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.707395498392283,
|
|
"grad_norm": 10.671057224558174,
|
|
"learning_rate": 4.6912794581129985e-06,
|
|
"loss": 2.0870800018310547,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 1.7106109324758842,
|
|
"grad_norm": 10.490926549117916,
|
|
"learning_rate": 4.672595143201056e-06,
|
|
"loss": 1.4478678703308105,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 1.7138263665594855,
|
|
"grad_norm": 10.312387100131009,
|
|
"learning_rate": 4.653915418793786e-06,
|
|
"loss": 1.0462535619735718,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 1.717041800643087,
|
|
"grad_norm": 12.221086012098088,
|
|
"learning_rate": 4.63524054679739e-06,
|
|
"loss": 1.0799921751022339,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 1.720257234726688,
|
|
"grad_norm": 6.827983273847641,
|
|
"learning_rate": 4.616570789050034e-06,
|
|
"loss": 0.9600023031234741,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 1.7234726688102895,
|
|
"grad_norm": 8.94999117796044,
|
|
"learning_rate": 4.5979064073181775e-06,
|
|
"loss": 1.4467713832855225,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 1.7266881028938905,
|
|
"grad_norm": 11.947768679414317,
|
|
"learning_rate": 4.579247663292903e-06,
|
|
"loss": 1.1485623121261597,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 1.729903536977492,
|
|
"grad_norm": 7.738903631330294,
|
|
"learning_rate": 4.560594818586248e-06,
|
|
"loss": 1.881239891052246,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 1.7331189710610932,
|
|
"grad_norm": 14.43039125664941,
|
|
"learning_rate": 4.541948134727538e-06,
|
|
"loss": 1.4121603965759277,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 1.7363344051446945,
|
|
"grad_norm": 13.559187134442164,
|
|
"learning_rate": 4.523307873159714e-06,
|
|
"loss": 1.600027084350586,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.739549839228296,
|
|
"grad_norm": 12.924451068775486,
|
|
"learning_rate": 4.504674295235673e-06,
|
|
"loss": 1.9366849660873413,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 1.742765273311897,
|
|
"grad_norm": 10.348503628222995,
|
|
"learning_rate": 4.486047662214602e-06,
|
|
"loss": 1.147647500038147,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 1.7459807073954985,
|
|
"grad_norm": 9.22563519797577,
|
|
"learning_rate": 4.467428235258315e-06,
|
|
"loss": 2.267954111099243,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 1.7491961414790995,
|
|
"grad_norm": 11.921588194327418,
|
|
"learning_rate": 4.448816275427589e-06,
|
|
"loss": 1.3431754112243652,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 1.752411575562701,
|
|
"grad_norm": 13.456836651748802,
|
|
"learning_rate": 4.430212043678506e-06,
|
|
"loss": 1.371949553489685,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 1.7556270096463023,
|
|
"grad_norm": 11.31057602108461,
|
|
"learning_rate": 4.41161580085879e-06,
|
|
"loss": 1.2015950679779053,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 1.7588424437299035,
|
|
"grad_norm": 11.775227684435517,
|
|
"learning_rate": 4.39302780770416e-06,
|
|
"loss": 1.2269978523254395,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 1.762057877813505,
|
|
"grad_norm": 9.277512918356958,
|
|
"learning_rate": 4.374448324834664e-06,
|
|
"loss": 1.1524686813354492,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 1.765273311897106,
|
|
"grad_norm": 13.986012721095971,
|
|
"learning_rate": 4.355877612751027e-06,
|
|
"loss": 0.8838063478469849,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 1.7684887459807075,
|
|
"grad_norm": 9.61624989824511,
|
|
"learning_rate": 4.337315931830999e-06,
|
|
"loss": 1.0831339359283447,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.7717041800643085,
|
|
"grad_norm": 10.157468448463078,
|
|
"learning_rate": 4.318763542325711e-06,
|
|
"loss": 1.1000258922576904,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 1.77491961414791,
|
|
"grad_norm": 9.188155688035968,
|
|
"learning_rate": 4.3002207043560135e-06,
|
|
"loss": 1.230742335319519,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 1.7781350482315113,
|
|
"grad_norm": 9.615692013478174,
|
|
"learning_rate": 4.28168767790884e-06,
|
|
"loss": 1.103325605392456,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 1.7813504823151125,
|
|
"grad_norm": 8.302705452574013,
|
|
"learning_rate": 4.263164722833556e-06,
|
|
"loss": 1.3692865371704102,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 1.784565916398714,
|
|
"grad_norm": 14.643567177677879,
|
|
"learning_rate": 4.2446520988383185e-06,
|
|
"loss": 0.6936740875244141,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 1.787781350482315,
|
|
"grad_norm": 9.360894269774661,
|
|
"learning_rate": 4.226150065486434e-06,
|
|
"loss": 0.7901575565338135,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 1.7909967845659165,
|
|
"grad_norm": 12.472712807562056,
|
|
"learning_rate": 4.207658882192717e-06,
|
|
"loss": 1.3338205814361572,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 1.7942122186495175,
|
|
"grad_norm": 11.717589080001146,
|
|
"learning_rate": 4.189178808219856e-06,
|
|
"loss": 1.3363522291183472,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 1.797427652733119,
|
|
"grad_norm": 10.30067614909667,
|
|
"learning_rate": 4.170710102674777e-06,
|
|
"loss": 1.8522659540176392,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 1.8006430868167203,
|
|
"grad_norm": 9.820476935188125,
|
|
"learning_rate": 4.152253024505011e-06,
|
|
"loss": 1.9775140285491943,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.8038585209003215,
|
|
"grad_norm": 10.166187506670667,
|
|
"learning_rate": 4.133807832495062e-06,
|
|
"loss": 1.0015840530395508,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 1.807073954983923,
|
|
"grad_norm": 15.340234839194505,
|
|
"learning_rate": 4.1153747852627775e-06,
|
|
"loss": 1.753740906715393,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 1.810289389067524,
|
|
"grad_norm": 8.962676705869463,
|
|
"learning_rate": 4.096954141255731e-06,
|
|
"loss": 1.5827150344848633,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 1.8135048231511255,
|
|
"grad_norm": 15.147384411595603,
|
|
"learning_rate": 4.078546158747586e-06,
|
|
"loss": 0.916832685470581,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 1.8167202572347267,
|
|
"grad_norm": 10.569002234689366,
|
|
"learning_rate": 4.060151095834482e-06,
|
|
"loss": 1.4536582231521606,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 1.819935691318328,
|
|
"grad_norm": 9.415759397401844,
|
|
"learning_rate": 4.041769210431414e-06,
|
|
"loss": 1.4267023801803589,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 1.8231511254019293,
|
|
"grad_norm": 7.078102010311895,
|
|
"learning_rate": 4.02340076026862e-06,
|
|
"loss": 1.1329270601272583,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 1.8263665594855305,
|
|
"grad_norm": 10.876729484364853,
|
|
"learning_rate": 4.00504600288796e-06,
|
|
"loss": 1.7833589315414429,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 1.829581993569132,
|
|
"grad_norm": 13.635055310739693,
|
|
"learning_rate": 3.9867051956393114e-06,
|
|
"loss": 1.2976348400115967,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 1.832797427652733,
|
|
"grad_norm": 10.771584206810726,
|
|
"learning_rate": 3.968378595676956e-06,
|
|
"loss": 1.4629448652267456,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.8360128617363345,
|
|
"grad_norm": 11.703783804229115,
|
|
"learning_rate": 3.95006645995598e-06,
|
|
"loss": 2.5722076892852783,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 1.8392282958199357,
|
|
"grad_norm": 12.349504033741153,
|
|
"learning_rate": 3.931769045228668e-06,
|
|
"loss": 2.2091426849365234,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 1.842443729903537,
|
|
"grad_norm": 11.542335230955135,
|
|
"learning_rate": 3.9134866080409e-06,
|
|
"loss": 1.391589641571045,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 1.8456591639871383,
|
|
"grad_norm": 12.806637836059952,
|
|
"learning_rate": 3.895219404728561e-06,
|
|
"loss": 1.1414397954940796,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 1.8488745980707395,
|
|
"grad_norm": 14.264578459530476,
|
|
"learning_rate": 3.8769676914139426e-06,
|
|
"loss": 1.059516191482544,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 1.852090032154341,
|
|
"grad_norm": 19.271915274351503,
|
|
"learning_rate": 3.858731724002153e-06,
|
|
"loss": 2.1879444122314453,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 1.855305466237942,
|
|
"grad_norm": 12.309421348778711,
|
|
"learning_rate": 3.840511758177528e-06,
|
|
"loss": 1.473386526107788,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 1.8585209003215435,
|
|
"grad_norm": 10.878678959452348,
|
|
"learning_rate": 3.822308049400047e-06,
|
|
"loss": 1.4653959274291992,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 1.8617363344051447,
|
|
"grad_norm": 8.040534262037765,
|
|
"learning_rate": 3.804120852901756e-06,
|
|
"loss": 1.6413612365722656,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 1.864951768488746,
|
|
"grad_norm": 9.044916275419325,
|
|
"learning_rate": 3.7859504236831766e-06,
|
|
"loss": 1.316032886505127,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.8681672025723473,
|
|
"grad_norm": 14.730705685102624,
|
|
"learning_rate": 3.7677970165097444e-06,
|
|
"loss": 1.7361235618591309,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 1.8713826366559485,
|
|
"grad_norm": 7.6900213009486675,
|
|
"learning_rate": 3.749660885908226e-06,
|
|
"loss": 1.667905569076538,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 1.87459807073955,
|
|
"grad_norm": 11.07535258062379,
|
|
"learning_rate": 3.7315422861631623e-06,
|
|
"loss": 1.065222978591919,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 1.877813504823151,
|
|
"grad_norm": 8.326344701604272,
|
|
"learning_rate": 3.7134414713132883e-06,
|
|
"loss": 1.115635633468628,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 1.8810289389067525,
|
|
"grad_norm": 18.801864494350724,
|
|
"learning_rate": 3.6953586951479834e-06,
|
|
"loss": 2.2017221450805664,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 1.8842443729903537,
|
|
"grad_norm": 25.418633684284842,
|
|
"learning_rate": 3.677294211203708e-06,
|
|
"loss": 2.484564781188965,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 1.887459807073955,
|
|
"grad_norm": 12.423647444445745,
|
|
"learning_rate": 3.6592482727604508e-06,
|
|
"loss": 1.3357205390930176,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 1.8906752411575563,
|
|
"grad_norm": 12.144643827381929,
|
|
"learning_rate": 3.641221132838173e-06,
|
|
"loss": 1.3544979095458984,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 1.8938906752411575,
|
|
"grad_norm": 12.46311005742411,
|
|
"learning_rate": 3.623213044193266e-06,
|
|
"loss": 0.9614198207855225,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 1.897106109324759,
|
|
"grad_norm": 10.364954003308453,
|
|
"learning_rate": 3.605224259315005e-06,
|
|
"loss": 1.2620229721069336,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.90032154340836,
|
|
"grad_norm": 9.237871106266182,
|
|
"learning_rate": 3.587255030422011e-06,
|
|
"loss": 1.886639952659607,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 1.9035369774919615,
|
|
"grad_norm": 13.030874547511374,
|
|
"learning_rate": 3.569305609458712e-06,
|
|
"loss": 1.2755482196807861,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 1.9067524115755627,
|
|
"grad_norm": 11.110774284864801,
|
|
"learning_rate": 3.5513762480918084e-06,
|
|
"loss": 1.4138600826263428,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 1.909967845659164,
|
|
"grad_norm": 13.866814610623521,
|
|
"learning_rate": 3.5334671977067504e-06,
|
|
"loss": 1.2061731815338135,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 1.9131832797427653,
|
|
"grad_norm": 11.47086636607418,
|
|
"learning_rate": 3.5155787094042113e-06,
|
|
"loss": 1.641986608505249,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 1.9163987138263665,
|
|
"grad_norm": 12.881675239858552,
|
|
"learning_rate": 3.497711033996564e-06,
|
|
"loss": 1.2158643007278442,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 1.919614147909968,
|
|
"grad_norm": 10.830930409963317,
|
|
"learning_rate": 3.4798644220043663e-06,
|
|
"loss": 1.8701896667480469,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 1.922829581993569,
|
|
"grad_norm": 18.238087057780916,
|
|
"learning_rate": 3.462039123652847e-06,
|
|
"loss": 1.195351243019104,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 1.9260450160771705,
|
|
"grad_norm": 18.418356217668133,
|
|
"learning_rate": 3.444235388868403e-06,
|
|
"loss": 1.2579845190048218,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 1.9292604501607717,
|
|
"grad_norm": 12.58424302992326,
|
|
"learning_rate": 3.4264534672750884e-06,
|
|
"loss": 1.524860143661499,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.932475884244373,
|
|
"grad_norm": 8.098300750497785,
|
|
"learning_rate": 3.408693608191116e-06,
|
|
"loss": 1.286231517791748,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 1.9356913183279743,
|
|
"grad_norm": 9.863741692326508,
|
|
"learning_rate": 3.3909560606253632e-06,
|
|
"loss": 1.322242021560669,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 1.9389067524115755,
|
|
"grad_norm": 12.987443308520445,
|
|
"learning_rate": 3.3732410732738843e-06,
|
|
"loss": 1.4537055492401123,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 1.942122186495177,
|
|
"grad_norm": 10.506554610719009,
|
|
"learning_rate": 3.3555488945164127e-06,
|
|
"loss": 1.5410267114639282,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 1.945337620578778,
|
|
"grad_norm": 10.174939767346636,
|
|
"learning_rate": 3.337879772412892e-06,
|
|
"loss": 1.754872441291809,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 1.9485530546623795,
|
|
"grad_norm": 10.59532168962947,
|
|
"learning_rate": 3.320233954699985e-06,
|
|
"loss": 1.4205820560455322,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 1.9517684887459807,
|
|
"grad_norm": 25.622835260786083,
|
|
"learning_rate": 3.302611688787612e-06,
|
|
"loss": 1.73175048828125,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 1.954983922829582,
|
|
"grad_norm": 9.901036226861155,
|
|
"learning_rate": 3.285013221755472e-06,
|
|
"loss": 1.4618065357208252,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 1.9581993569131833,
|
|
"grad_norm": 13.405612432143831,
|
|
"learning_rate": 3.267438800349586e-06,
|
|
"loss": 1.0909953117370605,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 1.9614147909967845,
|
|
"grad_norm": 14.915858656289252,
|
|
"learning_rate": 3.2498886709788298e-06,
|
|
"loss": 1.2120417356491089,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.964630225080386,
|
|
"grad_norm": 11.503072772803362,
|
|
"learning_rate": 3.2323630797114892e-06,
|
|
"loss": 1.560947060585022,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 1.967845659163987,
|
|
"grad_norm": 13.493518918254455,
|
|
"learning_rate": 3.214862272271799e-06,
|
|
"loss": 1.1600837707519531,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 1.9710610932475885,
|
|
"grad_norm": 15.01023608928888,
|
|
"learning_rate": 3.1973864940365076e-06,
|
|
"loss": 1.0669816732406616,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 1.9742765273311897,
|
|
"grad_norm": 12.264629967700163,
|
|
"learning_rate": 3.179935990031425e-06,
|
|
"loss": 1.236401081085205,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 1.977491961414791,
|
|
"grad_norm": 12.416946892637084,
|
|
"learning_rate": 3.162511004928003e-06,
|
|
"loss": 0.8553851842880249,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 1.9807073954983923,
|
|
"grad_norm": 10.966787271475475,
|
|
"learning_rate": 3.1451117830398896e-06,
|
|
"loss": 1.6353504657745361,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 1.9839228295819935,
|
|
"grad_norm": 11.04503339220036,
|
|
"learning_rate": 3.1277385683195117e-06,
|
|
"loss": 1.8089317083358765,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 1.987138263665595,
|
|
"grad_norm": 16.458323320048635,
|
|
"learning_rate": 3.110391604354652e-06,
|
|
"loss": 1.1577980518341064,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 1.990353697749196,
|
|
"grad_norm": 16.20975999278611,
|
|
"learning_rate": 3.093071134365037e-06,
|
|
"loss": 2.4027674198150635,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 1.9935691318327975,
|
|
"grad_norm": 23.51251674539541,
|
|
"learning_rate": 3.075777401198922e-06,
|
|
"loss": 1.3853836059570312,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.9967845659163987,
|
|
"grad_norm": 8.708072763645559,
|
|
"learning_rate": 3.058510647329688e-06,
|
|
"loss": 2.743666410446167,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 11.979852138626141,
|
|
"learning_rate": 3.041271114852443e-06,
|
|
"loss": 1.1549768447875977,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 2.0032154340836015,
|
|
"grad_norm": 14.314789124914883,
|
|
"learning_rate": 3.02405904548063e-06,
|
|
"loss": 1.0811271667480469,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 2.0064308681672025,
|
|
"grad_norm": 9.87321228153049,
|
|
"learning_rate": 3.0068746805426318e-06,
|
|
"loss": 0.35157322883605957,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 2.009646302250804,
|
|
"grad_norm": 9.320230936525673,
|
|
"learning_rate": 2.9897182609783905e-06,
|
|
"loss": 0.40260130167007446,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 2.012861736334405,
|
|
"grad_norm": 9.052307743678542,
|
|
"learning_rate": 2.97259002733603e-06,
|
|
"loss": 0.870284378528595,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 2.0160771704180065,
|
|
"grad_norm": 8.178905821906115,
|
|
"learning_rate": 2.9554902197684843e-06,
|
|
"loss": 0.4470030963420868,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 2.0192926045016075,
|
|
"grad_norm": 12.906748407694202,
|
|
"learning_rate": 2.938419078030128e-06,
|
|
"loss": 0.6769909858703613,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 2.022508038585209,
|
|
"grad_norm": 15.160992244136171,
|
|
"learning_rate": 2.9213768414734146e-06,
|
|
"loss": 0.5105010271072388,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 2.0257234726688105,
|
|
"grad_norm": 9.593795396914789,
|
|
"learning_rate": 2.90436374904552e-06,
|
|
"loss": 0.5568506121635437,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 2.0289389067524115,
|
|
"grad_norm": 8.019441658658858,
|
|
"learning_rate": 2.887380039284999e-06,
|
|
"loss": 0.21812975406646729,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 2.032154340836013,
|
|
"grad_norm": 7.294457920425058,
|
|
"learning_rate": 2.8704259503184306e-06,
|
|
"loss": 0.5240740776062012,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 2.035369774919614,
|
|
"grad_norm": 8.804193733481519,
|
|
"learning_rate": 2.853501719857086e-06,
|
|
"loss": 0.3466935455799103,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 2.0385852090032155,
|
|
"grad_norm": 10.734383468470607,
|
|
"learning_rate": 2.8366075851935927e-06,
|
|
"loss": 0.6415536403656006,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 2.0418006430868165,
|
|
"grad_norm": 7.475946116985285,
|
|
"learning_rate": 2.8197437831986085e-06,
|
|
"loss": 0.4148802161216736,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 2.045016077170418,
|
|
"grad_norm": 11.258686396792596,
|
|
"learning_rate": 2.802910550317506e-06,
|
|
"loss": 0.7162201404571533,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 2.0482315112540195,
|
|
"grad_norm": 9.209880063404333,
|
|
"learning_rate": 2.786108122567044e-06,
|
|
"loss": 0.5609467029571533,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 2.0514469453376205,
|
|
"grad_norm": 10.662055158233054,
|
|
"learning_rate": 2.769336735532068e-06,
|
|
"loss": 0.4859113395214081,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 2.054662379421222,
|
|
"grad_norm": 8.827327001805601,
|
|
"learning_rate": 2.7525966243622105e-06,
|
|
"loss": 0.35435792803764343,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 2.057877813504823,
|
|
"grad_norm": 8.027492720872965,
|
|
"learning_rate": 2.7358880237685844e-06,
|
|
"loss": 0.2768567204475403,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 2.0610932475884245,
|
|
"grad_norm": 10.207767409910922,
|
|
"learning_rate": 2.7192111680204957e-06,
|
|
"loss": 0.24898266792297363,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 2.0643086816720255,
|
|
"grad_norm": 9.78068569866971,
|
|
"learning_rate": 2.7025662909421625e-06,
|
|
"loss": 0.7269777059555054,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 2.067524115755627,
|
|
"grad_norm": 14.513005174422293,
|
|
"learning_rate": 2.685953625909432e-06,
|
|
"loss": 0.6724145412445068,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 2.0707395498392285,
|
|
"grad_norm": 12.097896199856809,
|
|
"learning_rate": 2.6693734058465105e-06,
|
|
"loss": 0.5692760348320007,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 2.0739549839228295,
|
|
"grad_norm": 13.857845362848131,
|
|
"learning_rate": 2.652825863222698e-06,
|
|
"loss": 0.5785061717033386,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 2.077170418006431,
|
|
"grad_norm": 9.786540455953286,
|
|
"learning_rate": 2.636311230049125e-06,
|
|
"loss": 0.3663535416126251,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 2.080385852090032,
|
|
"grad_norm": 12.912854147301195,
|
|
"learning_rate": 2.619829737875509e-06,
|
|
"loss": 0.4806700050830841,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 2.0836012861736335,
|
|
"grad_norm": 8.68661115707137,
|
|
"learning_rate": 2.6033816177868954e-06,
|
|
"loss": 0.5164961814880371,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 2.0868167202572345,
|
|
"grad_norm": 13.93536389564521,
|
|
"learning_rate": 2.5869671004004256e-06,
|
|
"loss": 0.6894420385360718,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 2.090032154340836,
|
|
"grad_norm": 7.225443037652673,
|
|
"learning_rate": 2.5705864158621008e-06,
|
|
"loss": 0.4011325538158417,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 2.0932475884244375,
|
|
"grad_norm": 23.325451994129086,
|
|
"learning_rate": 2.5542397938435574e-06,
|
|
"loss": 0.745391845703125,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 2.0964630225080385,
|
|
"grad_norm": 9.205824646857016,
|
|
"learning_rate": 2.537927463538844e-06,
|
|
"loss": 0.3058035969734192,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 2.09967845659164,
|
|
"grad_norm": 14.483317104386071,
|
|
"learning_rate": 2.521649653661209e-06,
|
|
"loss": 0.33844730257987976,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 2.102893890675241,
|
|
"grad_norm": 10.748115186443174,
|
|
"learning_rate": 2.5054065924398934e-06,
|
|
"loss": 0.3529083728790283,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 2.1061093247588425,
|
|
"grad_norm": 7.134237797900679,
|
|
"learning_rate": 2.4891985076169356e-06,
|
|
"loss": 0.3711613714694977,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 2.1093247588424435,
|
|
"grad_norm": 9.555120459648053,
|
|
"learning_rate": 2.473025626443969e-06,
|
|
"loss": 0.7479931712150574,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 2.112540192926045,
|
|
"grad_norm": 19.610879263288407,
|
|
"learning_rate": 2.4568881756790436e-06,
|
|
"loss": 1.7793715000152588,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 2.1157556270096465,
|
|
"grad_norm": 15.31483056892348,
|
|
"learning_rate": 2.4407863815834414e-06,
|
|
"loss": 1.3851677179336548,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 2.1189710610932475,
|
|
"grad_norm": 12.915717617411342,
|
|
"learning_rate": 2.42472046991851e-06,
|
|
"loss": 1.4731594324111938,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 2.122186495176849,
|
|
"grad_norm": 7.978020539846432,
|
|
"learning_rate": 2.4086906659424904e-06,
|
|
"loss": 0.5832240581512451,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 2.12540192926045,
|
|
"grad_norm": 8.57893982485209,
|
|
"learning_rate": 2.392697194407363e-06,
|
|
"loss": 0.18587762117385864,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 2.1286173633440515,
|
|
"grad_norm": 16.040640516050228,
|
|
"learning_rate": 2.3767402795556953e-06,
|
|
"loss": 1.4967979192733765,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 2.1318327974276525,
|
|
"grad_norm": 8.800901865323832,
|
|
"learning_rate": 2.3608201451175004e-06,
|
|
"loss": 0.6785554885864258,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 2.135048231511254,
|
|
"grad_norm": 8.369254419222456,
|
|
"learning_rate": 2.3449370143070948e-06,
|
|
"loss": 0.3392030596733093,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 2.1382636655948555,
|
|
"grad_norm": 13.581695588001903,
|
|
"learning_rate": 2.329091109819972e-06,
|
|
"loss": 1.3503499031066895,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 2.1414790996784565,
|
|
"grad_norm": 9.544273690674421,
|
|
"learning_rate": 2.313282653829679e-06,
|
|
"loss": 0.4895798861980438,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 2.144694533762058,
|
|
"grad_norm": 7.497690746653366,
|
|
"learning_rate": 2.297511867984703e-06,
|
|
"loss": 0.5282753109931946,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 2.147909967845659,
|
|
"grad_norm": 15.108645601163957,
|
|
"learning_rate": 2.2817789734053626e-06,
|
|
"loss": 0.44572100043296814,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 2.1511254019292605,
|
|
"grad_norm": 9.522046573188412,
|
|
"learning_rate": 2.266084190680707e-06,
|
|
"loss": 0.3825170397758484,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 2.154340836012862,
|
|
"grad_norm": 11.927142058431379,
|
|
"learning_rate": 2.250427739865421e-06,
|
|
"loss": 0.8217453360557556,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 2.157556270096463,
|
|
"grad_norm": 11.122450888975413,
|
|
"learning_rate": 2.23480984047675e-06,
|
|
"loss": 0.32945936918258667,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 2.1607717041800645,
|
|
"grad_norm": 10.060653422202963,
|
|
"learning_rate": 2.219230711491406e-06,
|
|
"loss": 0.3554953634738922,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 2.1639871382636655,
|
|
"grad_norm": 10.224259390532415,
|
|
"learning_rate": 2.2036905713425104e-06,
|
|
"loss": 0.7275658845901489,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 2.167202572347267,
|
|
"grad_norm": 9.307333936478546,
|
|
"learning_rate": 2.1881896379165253e-06,
|
|
"loss": 0.5208015441894531,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 2.170418006430868,
|
|
"grad_norm": 16.480106084286305,
|
|
"learning_rate": 2.172728128550199e-06,
|
|
"loss": 0.8410961627960205,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 2.1736334405144695,
|
|
"grad_norm": 23.43109332519012,
|
|
"learning_rate": 2.1573062600275217e-06,
|
|
"loss": 0.9196676015853882,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 2.176848874598071,
|
|
"grad_norm": 8.075311133528524,
|
|
"learning_rate": 2.1419242485766834e-06,
|
|
"loss": 0.4561595022678375,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 2.180064308681672,
|
|
"grad_norm": 6.991604551367555,
|
|
"learning_rate": 2.126582309867041e-06,
|
|
"loss": 0.2470824271440506,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 2.1832797427652735,
|
|
"grad_norm": 11.89113431006695,
|
|
"learning_rate": 2.1112806590061006e-06,
|
|
"loss": 0.366765558719635,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 2.1864951768488745,
|
|
"grad_norm": 7.200037824647487,
|
|
"learning_rate": 2.0960195105364935e-06,
|
|
"loss": 0.11232413351535797,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 2.189710610932476,
|
|
"grad_norm": 15.235602952570773,
|
|
"learning_rate": 2.080799078432972e-06,
|
|
"loss": 0.5797847509384155,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 2.192926045016077,
|
|
"grad_norm": 12.082768147438976,
|
|
"learning_rate": 2.0656195760994104e-06,
|
|
"loss": 0.3955646753311157,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 2.1961414790996785,
|
|
"grad_norm": 10.182906221123625,
|
|
"learning_rate": 2.0504812163658104e-06,
|
|
"loss": 0.27037519216537476,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 2.19935691318328,
|
|
"grad_norm": 16.725089320535528,
|
|
"learning_rate": 2.0353842114853194e-06,
|
|
"loss": 1.0738942623138428,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 2.202572347266881,
|
|
"grad_norm": 14.80956824571125,
|
|
"learning_rate": 2.020328773131252e-06,
|
|
"loss": 0.48105546832084656,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 2.2057877813504825,
|
|
"grad_norm": 14.488092854169647,
|
|
"learning_rate": 2.005315112394122e-06,
|
|
"loss": 0.37764203548431396,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 2.2090032154340835,
|
|
"grad_norm": 35.11784903119427,
|
|
"learning_rate": 1.990343439778691e-06,
|
|
"loss": 1.3033102750778198,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 2.212218649517685,
|
|
"grad_norm": 11.32083821956754,
|
|
"learning_rate": 1.9754139652010025e-06,
|
|
"loss": 0.3128071427345276,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 2.215434083601286,
|
|
"grad_norm": 7.739863787904974,
|
|
"learning_rate": 1.9605268979854493e-06,
|
|
"loss": 0.25044938921928406,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 2.2186495176848875,
|
|
"grad_norm": 9.336795568467322,
|
|
"learning_rate": 1.9456824468618365e-06,
|
|
"loss": 0.7681007981300354,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 2.221864951768489,
|
|
"grad_norm": 8.47441592443782,
|
|
"learning_rate": 1.9308808199624518e-06,
|
|
"loss": 0.8675619959831238,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 2.22508038585209,
|
|
"grad_norm": 13.531497358422932,
|
|
"learning_rate": 1.9161222248191515e-06,
|
|
"loss": 0.3523237109184265,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 2.2282958199356915,
|
|
"grad_norm": 7.7686044200353646,
|
|
"learning_rate": 1.9014068683604475e-06,
|
|
"loss": 0.5673654079437256,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 2.2315112540192925,
|
|
"grad_norm": 12.607843632159877,
|
|
"learning_rate": 1.8867349569086064e-06,
|
|
"loss": 0.4956747889518738,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 2.234726688102894,
|
|
"grad_norm": 20.83086061852303,
|
|
"learning_rate": 1.8721066961767626e-06,
|
|
"loss": 1.0982662439346313,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 2.237942122186495,
|
|
"grad_norm": 28.0587449883014,
|
|
"learning_rate": 1.8575222912660224e-06,
|
|
"loss": 0.7824825644493103,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 2.2411575562700965,
|
|
"grad_norm": 13.190430546984835,
|
|
"learning_rate": 1.8429819466625993e-06,
|
|
"loss": 0.9699738025665283,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 2.244372990353698,
|
|
"grad_norm": 9.095739529876795,
|
|
"learning_rate": 1.8284858662349391e-06,
|
|
"loss": 0.2911728620529175,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 2.247588424437299,
|
|
"grad_norm": 10.728396018637426,
|
|
"learning_rate": 1.8140342532308675e-06,
|
|
"loss": 0.21695515513420105,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 2.2508038585209005,
|
|
"grad_norm": 11.998687234131197,
|
|
"learning_rate": 1.7996273102747363e-06,
|
|
"loss": 0.5445791482925415,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 2.2540192926045015,
|
|
"grad_norm": 11.429548764717456,
|
|
"learning_rate": 1.7852652393645842e-06,
|
|
"loss": 0.5194580554962158,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 2.257234726688103,
|
|
"grad_norm": 7.56653505390186,
|
|
"learning_rate": 1.7709482418693036e-06,
|
|
"loss": 0.23896172642707825,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 2.260450160771704,
|
|
"grad_norm": 9.62095075841344,
|
|
"learning_rate": 1.7566765185258205e-06,
|
|
"loss": 0.35497206449508667,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 2.2636655948553055,
|
|
"grad_norm": 8.254578062437453,
|
|
"learning_rate": 1.7424502694362755e-06,
|
|
"loss": 0.5100143551826477,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 2.266881028938907,
|
|
"grad_norm": 9.058208468637773,
|
|
"learning_rate": 1.7282696940652188e-06,
|
|
"loss": 0.3519476652145386,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 2.270096463022508,
|
|
"grad_norm": 10.161898375908322,
|
|
"learning_rate": 1.714134991236817e-06,
|
|
"loss": 0.317427396774292,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 2.2733118971061095,
|
|
"grad_norm": 13.112579685570976,
|
|
"learning_rate": 1.7000463591320621e-06,
|
|
"loss": 0.2504899799823761,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 2.2765273311897105,
|
|
"grad_norm": 7.423757130841184,
|
|
"learning_rate": 1.6860039952859941e-06,
|
|
"loss": 0.6729331612586975,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 2.279742765273312,
|
|
"grad_norm": 10.935414242351301,
|
|
"learning_rate": 1.672008096584931e-06,
|
|
"loss": 0.2858375310897827,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 2.282958199356913,
|
|
"grad_norm": 7.614378612646665,
|
|
"learning_rate": 1.658058859263708e-06,
|
|
"loss": 0.2584810256958008,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 2.2861736334405145,
|
|
"grad_norm": 9.020570310988946,
|
|
"learning_rate": 1.64415647890293e-06,
|
|
"loss": 0.6542217135429382,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 2.289389067524116,
|
|
"grad_norm": 8.199722638862658,
|
|
"learning_rate": 1.6303011504262223e-06,
|
|
"loss": 0.7301985025405884,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 2.292604501607717,
|
|
"grad_norm": 16.272283444338548,
|
|
"learning_rate": 1.6164930680975021e-06,
|
|
"loss": 0.5628085136413574,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 2.2958199356913185,
|
|
"grad_norm": 10.028333472377934,
|
|
"learning_rate": 1.6027324255182547e-06,
|
|
"loss": 0.6142044067382812,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 2.2990353697749195,
|
|
"grad_norm": 8.45235295984565,
|
|
"learning_rate": 1.5890194156248178e-06,
|
|
"loss": 0.1730177402496338,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 2.302250803858521,
|
|
"grad_norm": 19.06820769510073,
|
|
"learning_rate": 1.5753542306856774e-06,
|
|
"loss": 0.44994986057281494,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 2.305466237942122,
|
|
"grad_norm": 13.015925232226362,
|
|
"learning_rate": 1.5617370622987703e-06,
|
|
"loss": 0.4153675436973572,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 2.3086816720257235,
|
|
"grad_norm": 15.815386750824745,
|
|
"learning_rate": 1.548168101388799e-06,
|
|
"loss": 0.7933076620101929,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 2.311897106109325,
|
|
"grad_norm": 6.8033416899165475,
|
|
"learning_rate": 1.5346475382045578e-06,
|
|
"loss": 0.2861023545265198,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 2.315112540192926,
|
|
"grad_norm": 7.586806061312669,
|
|
"learning_rate": 1.5211755623162588e-06,
|
|
"loss": 0.21383363008499146,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 2.3183279742765275,
|
|
"grad_norm": 8.35896871658916,
|
|
"learning_rate": 1.507752362612878e-06,
|
|
"loss": 0.34175100922584534,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 2.3215434083601285,
|
|
"grad_norm": 11.144091740266603,
|
|
"learning_rate": 1.4943781272995073e-06,
|
|
"loss": 0.6520302295684814,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 2.32475884244373,
|
|
"grad_norm": 9.891660748247679,
|
|
"learning_rate": 1.481053043894713e-06,
|
|
"loss": 0.47313952445983887,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 2.327974276527331,
|
|
"grad_norm": 10.01954348978427,
|
|
"learning_rate": 1.467777299227911e-06,
|
|
"loss": 0.43662163615226746,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 2.3311897106109325,
|
|
"grad_norm": 11.031433385009539,
|
|
"learning_rate": 1.4545510794367413e-06,
|
|
"loss": 0.8428820371627808,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 2.334405144694534,
|
|
"grad_norm": 8.663154574713626,
|
|
"learning_rate": 1.4413745699644633e-06,
|
|
"loss": 0.3453049063682556,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 2.337620578778135,
|
|
"grad_norm": 7.803079349836385,
|
|
"learning_rate": 1.4282479555573559e-06,
|
|
"loss": 0.2745356261730194,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 2.3408360128617365,
|
|
"grad_norm": 10.72034771606454,
|
|
"learning_rate": 1.4151714202621214e-06,
|
|
"loss": 0.5829728245735168,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 2.3440514469453375,
|
|
"grad_norm": 14.559259818421047,
|
|
"learning_rate": 1.4021451474233111e-06,
|
|
"loss": 0.599088191986084,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 2.347266881028939,
|
|
"grad_norm": 11.885109180133032,
|
|
"learning_rate": 1.389169319680752e-06,
|
|
"loss": 0.6071078777313232,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 2.35048231511254,
|
|
"grad_norm": 7.512662309336336,
|
|
"learning_rate": 1.3762441189669855e-06,
|
|
"loss": 0.7940813302993774,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 2.3536977491961415,
|
|
"grad_norm": 9.282805302910258,
|
|
"learning_rate": 1.363369726504719e-06,
|
|
"loss": 0.6212410926818848,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 2.356913183279743,
|
|
"grad_norm": 16.59514995951129,
|
|
"learning_rate": 1.3505463228042814e-06,
|
|
"loss": 0.7771339416503906,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 2.360128617363344,
|
|
"grad_norm": 8.019573283607059,
|
|
"learning_rate": 1.337774087661095e-06,
|
|
"loss": 0.3336787819862366,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 2.3633440514469455,
|
|
"grad_norm": 9.3480016042164,
|
|
"learning_rate": 1.3250532001531568e-06,
|
|
"loss": 0.3283434510231018,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 2.3665594855305465,
|
|
"grad_norm": 15.615069116375588,
|
|
"learning_rate": 1.31238383863852e-06,
|
|
"loss": 0.747290849685669,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 2.369774919614148,
|
|
"grad_norm": 11.478964562122078,
|
|
"learning_rate": 1.2997661807528011e-06,
|
|
"loss": 0.4145023226737976,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 2.372990353697749,
|
|
"grad_norm": 9.674102610648761,
|
|
"learning_rate": 1.2872004034066843e-06,
|
|
"loss": 0.6085692644119263,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 2.3762057877813505,
|
|
"grad_norm": 9.158481184369425,
|
|
"learning_rate": 1.2746866827834443e-06,
|
|
"loss": 0.35022085905075073,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 2.379421221864952,
|
|
"grad_norm": 10.878685805468756,
|
|
"learning_rate": 1.2622251943364733e-06,
|
|
"loss": 0.3418879210948944,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 2.382636655948553,
|
|
"grad_norm": 14.543864763023729,
|
|
"learning_rate": 1.2498161127868236e-06,
|
|
"loss": 0.48997265100479126,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 2.3858520900321545,
|
|
"grad_norm": 10.876583627678352,
|
|
"learning_rate": 1.237459612120755e-06,
|
|
"loss": 0.5536065697669983,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 2.3890675241157555,
|
|
"grad_norm": 20.351717332652505,
|
|
"learning_rate": 1.2251558655873003e-06,
|
|
"loss": 0.4042523205280304,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 2.392282958199357,
|
|
"grad_norm": 8.13192706557856,
|
|
"learning_rate": 1.2129050456958296e-06,
|
|
"loss": 0.3217351734638214,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 2.395498392282958,
|
|
"grad_norm": 7.539239428048716,
|
|
"learning_rate": 1.2007073242136358e-06,
|
|
"loss": 0.7380541563034058,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 2.3987138263665595,
|
|
"grad_norm": 10.893419775532118,
|
|
"learning_rate": 1.1885628721635256e-06,
|
|
"loss": 1.3607124090194702,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 2.401929260450161,
|
|
"grad_norm": 8.269742975582927,
|
|
"learning_rate": 1.176471859821421e-06,
|
|
"loss": 0.5945987701416016,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 2.405144694533762,
|
|
"grad_norm": 10.313373636714106,
|
|
"learning_rate": 1.1644344567139716e-06,
|
|
"loss": 0.44915276765823364,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 2.4083601286173635,
|
|
"grad_norm": 9.146019028304934,
|
|
"learning_rate": 1.1524508316161799e-06,
|
|
"loss": 0.38859468698501587,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 2.4115755627009645,
|
|
"grad_norm": 13.294635461287648,
|
|
"learning_rate": 1.1405211525490307e-06,
|
|
"loss": 0.20568466186523438,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 2.414790996784566,
|
|
"grad_norm": 11.70213362190279,
|
|
"learning_rate": 1.1286455867771422e-06,
|
|
"loss": 0.6367174983024597,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 2.418006430868167,
|
|
"grad_norm": 10.461363981706414,
|
|
"learning_rate": 1.1168243008064123e-06,
|
|
"loss": 0.30549776554107666,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 2.4212218649517685,
|
|
"grad_norm": 10.69031162106857,
|
|
"learning_rate": 1.1050574603816905e-06,
|
|
"loss": 0.37557756900787354,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 2.42443729903537,
|
|
"grad_norm": 7.379872280064574,
|
|
"learning_rate": 1.0933452304844505e-06,
|
|
"loss": 0.5297196507453918,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 2.427652733118971,
|
|
"grad_norm": 9.7634723677066,
|
|
"learning_rate": 1.0816877753304777e-06,
|
|
"loss": 0.274686723947525,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 2.4308681672025725,
|
|
"grad_norm": 7.237746575511637,
|
|
"learning_rate": 1.0700852583675708e-06,
|
|
"loss": 0.33781081438064575,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 2.4340836012861735,
|
|
"grad_norm": 10.197522367598072,
|
|
"learning_rate": 1.0585378422732435e-06,
|
|
"loss": 0.3239028751850128,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 2.437299035369775,
|
|
"grad_norm": 9.392166371827608,
|
|
"learning_rate": 1.0470456889524473e-06,
|
|
"loss": 0.34594273567199707,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 2.440514469453376,
|
|
"grad_norm": 6.796254455681624,
|
|
"learning_rate": 1.0356089595353008e-06,
|
|
"loss": 0.21343210339546204,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 2.4437299035369775,
|
|
"grad_norm": 7.480509218493879,
|
|
"learning_rate": 1.0242278143748307e-06,
|
|
"loss": 0.49799197912216187,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 2.446945337620579,
|
|
"grad_norm": 8.95241961534226,
|
|
"learning_rate": 1.012902413044725e-06,
|
|
"loss": 0.3868298828601837,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 2.45016077170418,
|
|
"grad_norm": 10.42651529337544,
|
|
"learning_rate": 1.0016329143370929e-06,
|
|
"loss": 0.264824241399765,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 2.4533762057877815,
|
|
"grad_norm": 9.17833598375061,
|
|
"learning_rate": 9.904194762602382e-07,
|
|
"loss": 0.3985700011253357,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 2.4565916398713825,
|
|
"grad_norm": 9.674121185768858,
|
|
"learning_rate": 9.792622560364467e-07,
|
|
"loss": 0.17972898483276367,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 2.459807073954984,
|
|
"grad_norm": 8.206523942923813,
|
|
"learning_rate": 9.681614100997806e-07,
|
|
"loss": 0.3820546865463257,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 2.463022508038585,
|
|
"grad_norm": 8.778341452335091,
|
|
"learning_rate": 9.57117094093884e-07,
|
|
"loss": 0.13588829338550568,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 2.4662379421221865,
|
|
"grad_norm": 12.703644433268686,
|
|
"learning_rate": 9.46129462869802e-07,
|
|
"loss": 0.2940727472305298,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 2.469453376205788,
|
|
"grad_norm": 10.30882189666681,
|
|
"learning_rate": 9.351986704838084e-07,
|
|
"loss": 0.6208051443099976,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 2.472668810289389,
|
|
"grad_norm": 9.88354985018698,
|
|
"learning_rate": 9.243248701952489e-07,
|
|
"loss": 0.9009281396865845,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 2.4758842443729905,
|
|
"grad_norm": 7.057277121900955,
|
|
"learning_rate": 9.135082144643869e-07,
|
|
"loss": 0.3825221061706543,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 2.4790996784565915,
|
|
"grad_norm": 13.283010516967643,
|
|
"learning_rate": 9.0274885495027e-07,
|
|
"loss": 0.4601898789405823,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 2.482315112540193,
|
|
"grad_norm": 9.034062537915768,
|
|
"learning_rate": 8.92046942508602e-07,
|
|
"loss": 0.31752362847328186,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 2.485530546623794,
|
|
"grad_norm": 9.886229624024415,
|
|
"learning_rate": 8.814026271896275e-07,
|
|
"loss": 0.9294736981391907,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 2.4887459807073955,
|
|
"grad_norm": 13.589832734358895,
|
|
"learning_rate": 8.708160582360303e-07,
|
|
"loss": 0.324219286441803,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 2.491961414790997,
|
|
"grad_norm": 11.552870172052325,
|
|
"learning_rate": 8.602873840808379e-07,
|
|
"loss": 0.9777847528457642,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 2.495176848874598,
|
|
"grad_norm": 14.788050425785567,
|
|
"learning_rate": 8.498167523453404e-07,
|
|
"loss": 0.5360197424888611,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 2.4983922829581995,
|
|
"grad_norm": 13.424208182029288,
|
|
"learning_rate": 8.394043098370275e-07,
|
|
"loss": 0.31830108165740967,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 2.5016077170418005,
|
|
"grad_norm": 20.795163896337453,
|
|
"learning_rate": 8.290502025475183e-07,
|
|
"loss": 0.9196930527687073,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 2.504823151125402,
|
|
"grad_norm": 13.342352710693937,
|
|
"learning_rate": 8.187545756505244e-07,
|
|
"loss": 0.5551115870475769,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 2.508038585209003,
|
|
"grad_norm": 10.381356116441133,
|
|
"learning_rate": 8.085175734998091e-07,
|
|
"loss": 0.6043642163276672,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 2.5112540192926045,
|
|
"grad_norm": 6.062655557368801,
|
|
"learning_rate": 7.98339339627166e-07,
|
|
"loss": 0.22657924890518188,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 2.514469453376206,
|
|
"grad_norm": 10.807014185724375,
|
|
"learning_rate": 7.882200167404047e-07,
|
|
"loss": 0.259809672832489,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 2.517684887459807,
|
|
"grad_norm": 7.872967554652684,
|
|
"learning_rate": 7.781597467213514e-07,
|
|
"loss": 0.4243828058242798,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 2.5209003215434085,
|
|
"grad_norm": 14.382598474640337,
|
|
"learning_rate": 7.681586706238586e-07,
|
|
"loss": 0.5216907858848572,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 2.5241157556270095,
|
|
"grad_norm": 6.741881997512629,
|
|
"learning_rate": 7.582169286718305e-07,
|
|
"loss": 0.260081946849823,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 2.527331189710611,
|
|
"grad_norm": 7.491622137828397,
|
|
"learning_rate": 7.483346602572505e-07,
|
|
"loss": 0.3229532837867737,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 2.530546623794212,
|
|
"grad_norm": 12.51190036363299,
|
|
"learning_rate": 7.385120039382326e-07,
|
|
"loss": 1.6990151405334473,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 2.5337620578778135,
|
|
"grad_norm": 12.102101725870327,
|
|
"learning_rate": 7.287490974370759e-07,
|
|
"loss": 0.46230068802833557,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 2.536977491961415,
|
|
"grad_norm": 7.3789531863557,
|
|
"learning_rate": 7.190460776383351e-07,
|
|
"loss": 0.1410602331161499,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 2.540192926045016,
|
|
"grad_norm": 8.791343590023278,
|
|
"learning_rate": 7.094030805869001e-07,
|
|
"loss": 0.1462668925523758,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 2.5434083601286175,
|
|
"grad_norm": 11.499145842643497,
|
|
"learning_rate": 6.998202414860894e-07,
|
|
"loss": 0.4931030869483948,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 2.5466237942122185,
|
|
"grad_norm": 8.890241677567976,
|
|
"learning_rate": 6.902976946957518e-07,
|
|
"loss": 0.4790411591529846,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 2.54983922829582,
|
|
"grad_norm": 7.357169662653574,
|
|
"learning_rate": 6.808355737303895e-07,
|
|
"loss": 0.25933071970939636,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 2.553054662379421,
|
|
"grad_norm": 18.008951919388966,
|
|
"learning_rate": 6.71434011257277e-07,
|
|
"loss": 0.7769885063171387,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 2.5562700964630225,
|
|
"grad_norm": 15.216768610586033,
|
|
"learning_rate": 6.620931390946078e-07,
|
|
"loss": 0.9100818634033203,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 2.559485530546624,
|
|
"grad_norm": 10.749117948962583,
|
|
"learning_rate": 6.528130882096418e-07,
|
|
"loss": 0.31192898750305176,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 2.562700964630225,
|
|
"grad_norm": 11.371166952776898,
|
|
"learning_rate": 6.435939887168718e-07,
|
|
"loss": 0.29440587759017944,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 2.5659163987138265,
|
|
"grad_norm": 9.195291843958517,
|
|
"learning_rate": 6.344359698761998e-07,
|
|
"loss": 0.3736717402935028,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 2.5691318327974275,
|
|
"grad_norm": 13.644043776235835,
|
|
"learning_rate": 6.253391600911213e-07,
|
|
"loss": 0.6370671391487122,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 2.572347266881029,
|
|
"grad_norm": 17.84760553493272,
|
|
"learning_rate": 6.163036869069267e-07,
|
|
"loss": 1.8662174940109253,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 2.57556270096463,
|
|
"grad_norm": 8.898342133449939,
|
|
"learning_rate": 6.073296770089159e-07,
|
|
"loss": 0.4110758900642395,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 2.5787781350482315,
|
|
"grad_norm": 13.204596578832312,
|
|
"learning_rate": 5.984172562206164e-07,
|
|
"loss": 0.26106947660446167,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 2.581993569131833,
|
|
"grad_norm": 10.376179325495649,
|
|
"learning_rate": 5.895665495020242e-07,
|
|
"loss": 0.25730016827583313,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 2.585209003215434,
|
|
"grad_norm": 13.684680313080825,
|
|
"learning_rate": 5.807776809478472e-07,
|
|
"loss": 0.3436719477176666,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 2.5884244372990355,
|
|
"grad_norm": 27.176558442062554,
|
|
"learning_rate": 5.720507737857706e-07,
|
|
"loss": 1.1672216653823853,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 2.5916398713826365,
|
|
"grad_norm": 12.887853458109294,
|
|
"learning_rate": 5.633859503747241e-07,
|
|
"loss": 0.5256634950637817,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 2.594855305466238,
|
|
"grad_norm": 8.96694135103656,
|
|
"learning_rate": 5.547833322031693e-07,
|
|
"loss": 0.366617888212204,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 2.598070739549839,
|
|
"grad_norm": 9.412357022980187,
|
|
"learning_rate": 5.462430398873947e-07,
|
|
"loss": 0.27508530020713806,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 2.6012861736334405,
|
|
"grad_norm": 9.133709325093466,
|
|
"learning_rate": 5.377651931698275e-07,
|
|
"loss": 0.36251911520957947,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 2.604501607717042,
|
|
"grad_norm": 9.848470581534286,
|
|
"learning_rate": 5.293499109173517e-07,
|
|
"loss": 0.33935314416885376,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 2.607717041800643,
|
|
"grad_norm": 14.662987456896733,
|
|
"learning_rate": 5.209973111196404e-07,
|
|
"loss": 0.4906863570213318,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 2.6109324758842445,
|
|
"grad_norm": 26.400737908261828,
|
|
"learning_rate": 5.127075108875051e-07,
|
|
"loss": 1.5506454706192017,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 2.6141479099678455,
|
|
"grad_norm": 12.023484760258915,
|
|
"learning_rate": 5.044806264512525e-07,
|
|
"loss": 0.2666461765766144,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 2.617363344051447,
|
|
"grad_norm": 12.371598029084595,
|
|
"learning_rate": 4.963167731590535e-07,
|
|
"loss": 0.6199164986610413,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 2.620578778135048,
|
|
"grad_norm": 9.073073795027149,
|
|
"learning_rate": 4.88216065475327e-07,
|
|
"loss": 0.2723952531814575,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 2.6237942122186495,
|
|
"grad_norm": 8.758785690157431,
|
|
"learning_rate": 4.801786169791339e-07,
|
|
"loss": 0.46878230571746826,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 2.627009646302251,
|
|
"grad_norm": 9.483949934382007,
|
|
"learning_rate": 4.7220454036258803e-07,
|
|
"loss": 0.36121273040771484,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 2.630225080385852,
|
|
"grad_norm": 9.850946982616728,
|
|
"learning_rate": 4.642939474292713e-07,
|
|
"loss": 0.38482001423835754,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 2.6334405144694535,
|
|
"grad_norm": 14.43891373480777,
|
|
"learning_rate": 4.5644694909266984e-07,
|
|
"loss": 0.6713676452636719,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 2.6366559485530545,
|
|
"grad_norm": 10.446474380940323,
|
|
"learning_rate": 4.4866365537461543e-07,
|
|
"loss": 0.4145408868789673,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 2.639871382636656,
|
|
"grad_norm": 7.984266402601969,
|
|
"learning_rate": 4.4094417540374745e-07,
|
|
"loss": 0.18469397723674774,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 2.643086816720257,
|
|
"grad_norm": 12.587060023396914,
|
|
"learning_rate": 4.332886174139794e-07,
|
|
"loss": 0.9125012159347534,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 2.6463022508038585,
|
|
"grad_norm": 7.989834221712064,
|
|
"learning_rate": 4.2569708874298123e-07,
|
|
"loss": 0.7010893225669861,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 2.64951768488746,
|
|
"grad_norm": 10.71200284540403,
|
|
"learning_rate": 4.1816969583067526e-07,
|
|
"loss": 0.22042930126190186,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 2.652733118971061,
|
|
"grad_norm": 8.632776088215412,
|
|
"learning_rate": 4.1070654421774767e-07,
|
|
"loss": 0.8344212770462036,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 2.6559485530546625,
|
|
"grad_norm": 16.33719255777251,
|
|
"learning_rate": 4.0330773854416025e-07,
|
|
"loss": 0.3559122681617737,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 2.6591639871382635,
|
|
"grad_norm": 11.020551860792871,
|
|
"learning_rate": 3.959733825476908e-07,
|
|
"loss": 0.30302202701568604,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 2.662379421221865,
|
|
"grad_norm": 11.593833422236477,
|
|
"learning_rate": 3.8870357906247434e-07,
|
|
"loss": 0.37081068754196167,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 2.665594855305466,
|
|
"grad_norm": 10.818672631896604,
|
|
"learning_rate": 3.814984300175645e-07,
|
|
"loss": 0.2318996787071228,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 2.6688102893890675,
|
|
"grad_norm": 10.490748477323145,
|
|
"learning_rate": 3.743580364355004e-07,
|
|
"loss": 0.27689433097839355,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 2.672025723472669,
|
|
"grad_norm": 7.557342296482055,
|
|
"learning_rate": 3.672824984308948e-07,
|
|
"loss": 0.3493078351020813,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 2.67524115755627,
|
|
"grad_norm": 19.55126309686112,
|
|
"learning_rate": 3.602719152090256e-07,
|
|
"loss": 0.7462046146392822,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 2.6784565916398715,
|
|
"grad_norm": 9.221894655944931,
|
|
"learning_rate": 3.533263850644508e-07,
|
|
"loss": 0.6614691615104675,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 2.6816720257234725,
|
|
"grad_norm": 9.211741634507469,
|
|
"learning_rate": 3.464460053796237e-07,
|
|
"loss": 0.29138296842575073,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 2.684887459807074,
|
|
"grad_norm": 9.734125500615347,
|
|
"learning_rate": 3.396308726235326e-07,
|
|
"loss": 0.5526795387268066,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 2.688102893890675,
|
|
"grad_norm": 10.836649082695137,
|
|
"learning_rate": 3.328810823503448e-07,
|
|
"loss": 0.5932884812355042,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 2.6913183279742765,
|
|
"grad_norm": 10.19243735016382,
|
|
"learning_rate": 3.2619672919807054e-07,
|
|
"loss": 0.6854689121246338,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 2.694533762057878,
|
|
"grad_norm": 14.297447936460108,
|
|
"learning_rate": 3.195779068872318e-07,
|
|
"loss": 0.2756684422492981,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 2.697749196141479,
|
|
"grad_norm": 10.266269542405677,
|
|
"learning_rate": 3.1302470821955143e-07,
|
|
"loss": 0.30567488074302673,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 2.7009646302250805,
|
|
"grad_norm": 10.298881489172567,
|
|
"learning_rate": 3.0653722507665016e-07,
|
|
"loss": 0.8650332093238831,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 2.7041800643086815,
|
|
"grad_norm": 12.33005217016299,
|
|
"learning_rate": 3.0011554841876236e-07,
|
|
"loss": 0.7738863229751587,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 2.707395498392283,
|
|
"grad_norm": 13.788201933128668,
|
|
"learning_rate": 2.9375976828345254e-07,
|
|
"loss": 0.4506850838661194,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 2.710610932475884,
|
|
"grad_norm": 23.437927191055728,
|
|
"learning_rate": 2.8746997378436117e-07,
|
|
"loss": 0.9781379699707031,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 2.7138263665594855,
|
|
"grad_norm": 8.156853967408997,
|
|
"learning_rate": 2.8124625310995136e-07,
|
|
"loss": 0.36554017663002014,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 2.717041800643087,
|
|
"grad_norm": 36.113062108363714,
|
|
"learning_rate": 2.750886935222724e-07,
|
|
"loss": 1.2861707210540771,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 2.720257234726688,
|
|
"grad_norm": 9.707027837669324,
|
|
"learning_rate": 2.689973813557367e-07,
|
|
"loss": 0.30182671546936035,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 2.7234726688102895,
|
|
"grad_norm": 8.613290992443563,
|
|
"learning_rate": 2.6297240201591025e-07,
|
|
"loss": 0.3068884015083313,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 2.7266881028938905,
|
|
"grad_norm": 10.585291997817569,
|
|
"learning_rate": 2.5701383997831284e-07,
|
|
"loss": 0.3039591610431671,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 2.729903536977492,
|
|
"grad_norm": 16.036360689461798,
|
|
"learning_rate": 2.5112177878723833e-07,
|
|
"loss": 0.4938279986381531,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 2.733118971061093,
|
|
"grad_norm": 7.7246780557178045,
|
|
"learning_rate": 2.452963010545767e-07,
|
|
"loss": 0.3451383709907532,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 2.7363344051446945,
|
|
"grad_norm": 11.562492840025135,
|
|
"learning_rate": 2.3953748845866096e-07,
|
|
"loss": 0.29905378818511963,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 2.739549839228296,
|
|
"grad_norm": 13.062794280899158,
|
|
"learning_rate": 2.3384542174311908e-07,
|
|
"loss": 0.3758173882961273,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 2.742765273311897,
|
|
"grad_norm": 10.466966734307,
|
|
"learning_rate": 2.282201807157436e-07,
|
|
"loss": 0.7077566981315613,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 2.7459807073954985,
|
|
"grad_norm": 23.68854074954466,
|
|
"learning_rate": 2.2266184424737214e-07,
|
|
"loss": 0.5676212310791016,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 2.7491961414790995,
|
|
"grad_norm": 10.931312601197227,
|
|
"learning_rate": 2.1717049027078106e-07,
|
|
"loss": 0.3098456859588623,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 2.752411575562701,
|
|
"grad_norm": 13.915496014810294,
|
|
"learning_rate": 2.1174619577959355e-07,
|
|
"loss": 0.5395633578300476,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 2.755627009646302,
|
|
"grad_norm": 11.883754001722993,
|
|
"learning_rate": 2.0638903682719814e-07,
|
|
"loss": 0.67840975522995,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 2.7588424437299035,
|
|
"grad_norm": 9.072301219104835,
|
|
"learning_rate": 2.010990885256875e-07,
|
|
"loss": 0.22875866293907166,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 2.762057877813505,
|
|
"grad_norm": 11.388523965575855,
|
|
"learning_rate": 1.958764250447981e-07,
|
|
"loss": 0.28753116726875305,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 2.765273311897106,
|
|
"grad_norm": 10.42746808832713,
|
|
"learning_rate": 1.9072111961087546e-07,
|
|
"loss": 0.49607959389686584,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 2.7684887459807075,
|
|
"grad_norm": 8.55806623128332,
|
|
"learning_rate": 1.856332445058462e-07,
|
|
"loss": 0.503407895565033,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 2.7717041800643085,
|
|
"grad_norm": 12.297262248101239,
|
|
"learning_rate": 1.8061287106620308e-07,
|
|
"loss": 0.37309232354164124,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 2.77491961414791,
|
|
"grad_norm": 6.834698795090784,
|
|
"learning_rate": 1.7566006968200712e-07,
|
|
"loss": 0.15657085180282593,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 2.778135048231511,
|
|
"grad_norm": 11.42404497830935,
|
|
"learning_rate": 1.7077490979589996e-07,
|
|
"loss": 0.9316859245300293,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 2.7813504823151125,
|
|
"grad_norm": 24.35923106389002,
|
|
"learning_rate": 1.6595745990212686e-07,
|
|
"loss": 0.5050212144851685,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 2.784565916398714,
|
|
"grad_norm": 8.48025747992941,
|
|
"learning_rate": 1.6120778754558418e-07,
|
|
"loss": 0.2638796865940094,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 2.787781350482315,
|
|
"grad_norm": 10.634566725803458,
|
|
"learning_rate": 1.5652595932086346e-07,
|
|
"loss": 0.3945295214653015,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 2.7909967845659165,
|
|
"grad_norm": 10.867738538207686,
|
|
"learning_rate": 1.519120408713237e-07,
|
|
"loss": 0.7213743925094604,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 2.7942122186495175,
|
|
"grad_norm": 14.418328198181541,
|
|
"learning_rate": 1.4736609688816738e-07,
|
|
"loss": 0.3248399794101715,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 2.797427652733119,
|
|
"grad_norm": 8.419980675343098,
|
|
"learning_rate": 1.42888191109537e-07,
|
|
"loss": 0.22939413785934448,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 2.80064308681672,
|
|
"grad_norm": 9.19433579405382,
|
|
"learning_rate": 1.3847838631961764e-07,
|
|
"loss": 0.32842016220092773,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 2.8038585209003215,
|
|
"grad_norm": 10.639689045879424,
|
|
"learning_rate": 1.341367443477598e-07,
|
|
"loss": 0.35260647535324097,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 2.807073954983923,
|
|
"grad_norm": 7.268481225335977,
|
|
"learning_rate": 1.2986332606761077e-07,
|
|
"loss": 0.14097937941551208,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 2.810289389067524,
|
|
"grad_norm": 12.654855752821373,
|
|
"learning_rate": 1.2565819139626123e-07,
|
|
"loss": 0.3869823217391968,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 2.8135048231511255,
|
|
"grad_norm": 12.375199769540473,
|
|
"learning_rate": 1.215213992934061e-07,
|
|
"loss": 0.37241262197494507,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 2.816720257234727,
|
|
"grad_norm": 11.840995986503778,
|
|
"learning_rate": 1.1745300776051683e-07,
|
|
"loss": 0.4987008273601532,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 2.819935691318328,
|
|
"grad_norm": 20.150747698775717,
|
|
"learning_rate": 1.1345307384002857e-07,
|
|
"loss": 0.7997194528579712,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 2.823151125401929,
|
|
"grad_norm": 11.222276595989268,
|
|
"learning_rate": 1.0952165361454103e-07,
|
|
"loss": 0.3447033166885376,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 2.8263665594855305,
|
|
"grad_norm": 10.642677007946524,
|
|
"learning_rate": 1.0565880220603009e-07,
|
|
"loss": 0.3858156204223633,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 2.829581993569132,
|
|
"grad_norm": 9.185432192229552,
|
|
"learning_rate": 1.0186457377507786e-07,
|
|
"loss": 0.34079843759536743,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 2.832797427652733,
|
|
"grad_norm": 11.088590201340256,
|
|
"learning_rate": 9.813902152011112e-08,
|
|
"loss": 0.23632663488388062,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 2.8360128617363345,
|
|
"grad_norm": 8.103072227415286,
|
|
"learning_rate": 9.448219767665579e-08,
|
|
"loss": 0.27433305978775024,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 2.839228295819936,
|
|
"grad_norm": 12.74986410220812,
|
|
"learning_rate": 9.089415351660635e-08,
|
|
"loss": 0.23926009237766266,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 2.842443729903537,
|
|
"grad_norm": 9.837558952269603,
|
|
"learning_rate": 8.737493934750374e-08,
|
|
"loss": 0.2923390567302704,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 2.845659163987138,
|
|
"grad_norm": 8.70335385161108,
|
|
"learning_rate": 8.392460451183304e-08,
|
|
"loss": 0.41260838508605957,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 2.8488745980707395,
|
|
"grad_norm": 14.688290879664716,
|
|
"learning_rate": 8.05431973863291e-08,
|
|
"loss": 1.014516830444336,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 2.852090032154341,
|
|
"grad_norm": 9.853101244202293,
|
|
"learning_rate": 7.723076538130093e-08,
|
|
"loss": 0.7642203569412231,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 2.855305466237942,
|
|
"grad_norm": 21.13860057153763,
|
|
"learning_rate": 7.398735493996445e-08,
|
|
"loss": 0.5412633419036865,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 2.8585209003215435,
|
|
"grad_norm": 6.858041915122581,
|
|
"learning_rate": 7.081301153779308e-08,
|
|
"loss": 0.2149556279182434,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 2.861736334405145,
|
|
"grad_norm": 8.692720770718445,
|
|
"learning_rate": 6.77077796818787e-08,
|
|
"loss": 0.2750585675239563,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 2.864951768488746,
|
|
"grad_norm": 11.520297885410352,
|
|
"learning_rate": 6.467170291030999e-08,
|
|
"loss": 0.870136022567749,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 2.868167202572347,
|
|
"grad_norm": 7.431835115778323,
|
|
"learning_rate": 6.170482379155907e-08,
|
|
"loss": 0.6092904806137085,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 2.8713826366559485,
|
|
"grad_norm": 6.6845295035921035,
|
|
"learning_rate": 5.880718392388518e-08,
|
|
"loss": 0.3216220438480377,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 2.87459807073955,
|
|
"grad_norm": 12.92591336560967,
|
|
"learning_rate": 5.597882393475473e-08,
|
|
"loss": 0.9828184247016907,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 2.877813504823151,
|
|
"grad_norm": 12.3805351827977,
|
|
"learning_rate": 5.3219783480266685e-08,
|
|
"loss": 0.3474922776222229,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 2.8810289389067525,
|
|
"grad_norm": 8.250380956070305,
|
|
"learning_rate": 5.053010124460078e-08,
|
|
"loss": 0.6136802434921265,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 2.884244372990354,
|
|
"grad_norm": 10.183182722886206,
|
|
"learning_rate": 4.790981493947244e-08,
|
|
"loss": 0.36145269870758057,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 2.887459807073955,
|
|
"grad_norm": 10.874247692186005,
|
|
"learning_rate": 4.5358961303604845e-08,
|
|
"loss": 0.582977831363678,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 2.890675241157556,
|
|
"grad_norm": 7.351347894780587,
|
|
"learning_rate": 4.287757610221488e-08,
|
|
"loss": 0.26535022258758545,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 2.8938906752411575,
|
|
"grad_norm": 8.630676257377292,
|
|
"learning_rate": 4.046569412651025e-08,
|
|
"loss": 0.36218592524528503,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 2.897106109324759,
|
|
"grad_norm": 16.143200059875355,
|
|
"learning_rate": 3.8123349193201484e-08,
|
|
"loss": 0.6273083686828613,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 2.90032154340836,
|
|
"grad_norm": 13.043811958366279,
|
|
"learning_rate": 3.585057414402959e-08,
|
|
"loss": 0.51048743724823,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 2.9035369774919615,
|
|
"grad_norm": 13.302923874937125,
|
|
"learning_rate": 3.364740084530416e-08,
|
|
"loss": 0.3014843165874481,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 2.906752411575563,
|
|
"grad_norm": 14.89983151718059,
|
|
"learning_rate": 3.1513860187457055e-08,
|
|
"loss": 0.2004130780696869,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 2.909967845659164,
|
|
"grad_norm": 9.790667693555704,
|
|
"learning_rate": 2.9449982084607808e-08,
|
|
"loss": 0.39281973242759705,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 2.913183279742765,
|
|
"grad_norm": 18.644324884396916,
|
|
"learning_rate": 2.7455795474147228e-08,
|
|
"loss": 0.23955759406089783,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 2.9163987138263665,
|
|
"grad_norm": 8.927013412093052,
|
|
"learning_rate": 2.5531328316328875e-08,
|
|
"loss": 0.20128212869167328,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 2.919614147909968,
|
|
"grad_norm": 15.799239685702284,
|
|
"learning_rate": 2.367660759387935e-08,
|
|
"loss": 0.7717773914337158,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 2.922829581993569,
|
|
"grad_norm": 15.772125165413822,
|
|
"learning_rate": 2.189165931161752e-08,
|
|
"loss": 0.36580389738082886,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 2.9260450160771705,
|
|
"grad_norm": 9.375529241824653,
|
|
"learning_rate": 2.017650849609143e-08,
|
|
"loss": 0.20489028096199036,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 2.929260450160772,
|
|
"grad_norm": 9.02286597270734,
|
|
"learning_rate": 1.8531179195227512e-08,
|
|
"loss": 0.4565661549568176,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 2.932475884244373,
|
|
"grad_norm": 8.336347819565956,
|
|
"learning_rate": 1.6955694477993055e-08,
|
|
"loss": 0.1987374871969223,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 2.935691318327974,
|
|
"grad_norm": 11.104808526073217,
|
|
"learning_rate": 1.545007643407148e-08,
|
|
"loss": 0.6139571070671082,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 2.9389067524115755,
|
|
"grad_norm": 19.520039784387993,
|
|
"learning_rate": 1.4014346173555904e-08,
|
|
"loss": 0.3137204647064209,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 2.942122186495177,
|
|
"grad_norm": 11.947768453886892,
|
|
"learning_rate": 1.2648523826649384e-08,
|
|
"loss": 0.2887860834598541,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 2.945337620578778,
|
|
"grad_norm": 12.055012811370135,
|
|
"learning_rate": 1.1352628543385702e-08,
|
|
"loss": 0.20143553614616394,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 2.9485530546623795,
|
|
"grad_norm": 9.518048446591854,
|
|
"learning_rate": 1.0126678493358466e-08,
|
|
"loss": 0.4039468765258789,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 2.951768488745981,
|
|
"grad_norm": 6.847191516049079,
|
|
"learning_rate": 8.97069086546798e-09,
|
|
"loss": 0.22868111729621887,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 2.954983922829582,
|
|
"grad_norm": 8.642631440592353,
|
|
"learning_rate": 7.884681867679766e-09,
|
|
"loss": 0.7044589519500732,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 2.958199356913183,
|
|
"grad_norm": 7.259843508692505,
|
|
"learning_rate": 6.86866672679698e-09,
|
|
"loss": 0.17346805334091187,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 2.9614147909967845,
|
|
"grad_norm": 8.999484586415532,
|
|
"learning_rate": 5.9226596882483445e-09,
|
|
"loss": 0.6509414911270142,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 2.964630225080386,
|
|
"grad_norm": 9.496798526213007,
|
|
"learning_rate": 5.0466740158849895e-09,
|
|
"loss": 0.25824788212776184,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 2.967845659163987,
|
|
"grad_norm": 8.918273839634464,
|
|
"learning_rate": 4.240721991799479e-09,
|
|
"loss": 0.40939438343048096,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 2.9710610932475885,
|
|
"grad_norm": 7.676643145381744,
|
|
"learning_rate": 3.5048149161487356e-09,
|
|
"loss": 0.2906154692173004,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 2.97427652733119,
|
|
"grad_norm": 9.78535730660284,
|
|
"learning_rate": 2.8389631069986044e-09,
|
|
"loss": 0.7982381582260132,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 2.977491961414791,
|
|
"grad_norm": 8.125146432986423,
|
|
"learning_rate": 2.2431759001789734e-09,
|
|
"loss": 0.39818036556243896,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 2.980707395498392,
|
|
"grad_norm": 8.127002382258551,
|
|
"learning_rate": 1.7174616491510975e-09,
|
|
"loss": 0.311463862657547,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 2.9839228295819935,
|
|
"grad_norm": 9.126971708317253,
|
|
"learning_rate": 1.2618277248921397e-09,
|
|
"loss": 0.8928610682487488,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 2.987138263665595,
|
|
"grad_norm": 13.640832208032544,
|
|
"learning_rate": 8.762805157913612e-10,
|
|
"loss": 0.5860022306442261,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 2.990353697749196,
|
|
"grad_norm": 11.44603238864219,
|
|
"learning_rate": 5.608254275607516e-10,
|
|
"loss": 0.244879812002182,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 2.9935691318327975,
|
|
"grad_norm": 11.900949930490297,
|
|
"learning_rate": 3.1546688315842177e-10,
|
|
"loss": 0.37080761790275574,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 2.996784565916399,
|
|
"grad_norm": 11.46303644524341,
|
|
"learning_rate": 1.4020832272754193e-10,
|
|
"loss": 0.3129621148109436,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 7.767345085481768,
|
|
"learning_rate": 3.505220354749206e-11,
|
|
"loss": 0.548541247844696,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 933,
|
|
"total_flos": 2578979020800.0,
|
|
"train_loss": 1.6905082198137384,
|
|
"train_runtime": 1266.0882,
|
|
"train_samples_per_second": 2.945,
|
|
"train_steps_per_second": 0.737
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 933,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2578979020800.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|