5634 lines
139 KiB
JSON
5634 lines
139 KiB
JSON
|
|
{
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 0.008947845247016454,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 800,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 1.1184806558770566e-05,
|
||
|
|
"grad_norm": 9.17889404296875,
|
||
|
|
"learning_rate": 0.00019999999993826567,
|
||
|
|
"loss": 4.6752,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.236961311754113e-05,
|
||
|
|
"grad_norm": 15.023734092712402,
|
||
|
|
"learning_rate": 0.0001999999997530627,
|
||
|
|
"loss": 4.8816,
|
||
|
|
"step": 2
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.35544196763117e-05,
|
||
|
|
"grad_norm": 4.711775302886963,
|
||
|
|
"learning_rate": 0.00019999999944439107,
|
||
|
|
"loss": 4.3122,
|
||
|
|
"step": 3
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.473922623508226e-05,
|
||
|
|
"grad_norm": 5.1041460037231445,
|
||
|
|
"learning_rate": 0.0001999999990122508,
|
||
|
|
"loss": 4.0207,
|
||
|
|
"step": 4
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5924032793852833e-05,
|
||
|
|
"grad_norm": 11.579492568969727,
|
||
|
|
"learning_rate": 0.0001999999984566419,
|
||
|
|
"loss": 3.656,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.71088393526234e-05,
|
||
|
|
"grad_norm": 4.573488235473633,
|
||
|
|
"learning_rate": 0.00019999999777756431,
|
||
|
|
"loss": 3.3136,
|
||
|
|
"step": 6
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.829364591139397e-05,
|
||
|
|
"grad_norm": 3.6844234466552734,
|
||
|
|
"learning_rate": 0.0001999999969750181,
|
||
|
|
"loss": 3.0363,
|
||
|
|
"step": 7
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 8.947845247016453e-05,
|
||
|
|
"grad_norm": 2.9362566471099854,
|
||
|
|
"learning_rate": 0.00019999999604900323,
|
||
|
|
"loss": 2.7911,
|
||
|
|
"step": 8
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0001006632590289351,
|
||
|
|
"grad_norm": 2.6654202938079834,
|
||
|
|
"learning_rate": 0.0001999999949995197,
|
||
|
|
"loss": 2.6176,
|
||
|
|
"step": 9
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00011184806558770567,
|
||
|
|
"grad_norm": 1.8987364768981934,
|
||
|
|
"learning_rate": 0.00019999999382656758,
|
||
|
|
"loss": 2.5172,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00012303287214647624,
|
||
|
|
"grad_norm": 2.596072196960449,
|
||
|
|
"learning_rate": 0.0001999999925301468,
|
||
|
|
"loss": 2.3978,
|
||
|
|
"step": 11
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0001342176787052468,
|
||
|
|
"grad_norm": 1.6658835411071777,
|
||
|
|
"learning_rate": 0.00019999999111025733,
|
||
|
|
"loss": 2.2888,
|
||
|
|
"step": 12
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00014540248526401735,
|
||
|
|
"grad_norm": 1.5891242027282715,
|
||
|
|
"learning_rate": 0.00019999998956689926,
|
||
|
|
"loss": 2.1966,
|
||
|
|
"step": 13
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00015658729182278794,
|
||
|
|
"grad_norm": 1.7551047801971436,
|
||
|
|
"learning_rate": 0.00019999998790007256,
|
||
|
|
"loss": 2.1286,
|
||
|
|
"step": 14
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0001677720983815585,
|
||
|
|
"grad_norm": 1.515837550163269,
|
||
|
|
"learning_rate": 0.0001999999861097772,
|
||
|
|
"loss": 2.0329,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00017895690494032905,
|
||
|
|
"grad_norm": 2.0875966548919678,
|
||
|
|
"learning_rate": 0.0001999999841960132,
|
||
|
|
"loss": 1.9836,
|
||
|
|
"step": 16
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00019014171149909964,
|
||
|
|
"grad_norm": 1.3364766836166382,
|
||
|
|
"learning_rate": 0.0001999999821587806,
|
||
|
|
"loss": 1.9412,
|
||
|
|
"step": 17
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0002013265180578702,
|
||
|
|
"grad_norm": 0.9586036205291748,
|
||
|
|
"learning_rate": 0.00019999997999807934,
|
||
|
|
"loss": 1.9021,
|
||
|
|
"step": 18
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00021251132461664075,
|
||
|
|
"grad_norm": 0.610419750213623,
|
||
|
|
"learning_rate": 0.00019999997771390947,
|
||
|
|
"loss": 1.9278,
|
||
|
|
"step": 19
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00022369613117541133,
|
||
|
|
"grad_norm": 0.6592239141464233,
|
||
|
|
"learning_rate": 0.000199999975306271,
|
||
|
|
"loss": 1.8956,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0002348809377341819,
|
||
|
|
"grad_norm": 0.7091565132141113,
|
||
|
|
"learning_rate": 0.00019999997277516388,
|
||
|
|
"loss": 1.8629,
|
||
|
|
"step": 21
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0002460657442929525,
|
||
|
|
"grad_norm": 0.6687048077583313,
|
||
|
|
"learning_rate": 0.00019999997012058819,
|
||
|
|
"loss": 1.818,
|
||
|
|
"step": 22
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.000257250550851723,
|
||
|
|
"grad_norm": 0.29321762919425964,
|
||
|
|
"learning_rate": 0.00019999996734254382,
|
||
|
|
"loss": 1.8024,
|
||
|
|
"step": 23
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0002684353574104936,
|
||
|
|
"grad_norm": 0.6186531186103821,
|
||
|
|
"learning_rate": 0.00019999996444103086,
|
||
|
|
"loss": 1.7958,
|
||
|
|
"step": 24
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0002796201639692642,
|
||
|
|
"grad_norm": 0.4960622489452362,
|
||
|
|
"learning_rate": 0.0001999999614160493,
|
||
|
|
"loss": 1.7714,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0002908049705280347,
|
||
|
|
"grad_norm": 0.25318390130996704,
|
||
|
|
"learning_rate": 0.00019999995826759916,
|
||
|
|
"loss": 1.7419,
|
||
|
|
"step": 26
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0003019897770868053,
|
||
|
|
"grad_norm": 0.5521177649497986,
|
||
|
|
"learning_rate": 0.0001999999549956804,
|
||
|
|
"loss": 1.7336,
|
||
|
|
"step": 27
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0003131745836455759,
|
||
|
|
"grad_norm": 0.3085158169269562,
|
||
|
|
"learning_rate": 0.00019999995160029305,
|
||
|
|
"loss": 1.7304,
|
||
|
|
"step": 28
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0003243593902043464,
|
||
|
|
"grad_norm": 0.2978903353214264,
|
||
|
|
"learning_rate": 0.0001999999480814371,
|
||
|
|
"loss": 1.7283,
|
||
|
|
"step": 29
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.000335544196763117,
|
||
|
|
"grad_norm": 0.40339481830596924,
|
||
|
|
"learning_rate": 0.00019999994443911258,
|
||
|
|
"loss": 1.7577,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00034672900332188757,
|
||
|
|
"grad_norm": 0.13451404869556427,
|
||
|
|
"learning_rate": 0.00019999994067331945,
|
||
|
|
"loss": 1.7435,
|
||
|
|
"step": 31
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0003579138098806581,
|
||
|
|
"grad_norm": 0.3141914904117584,
|
||
|
|
"learning_rate": 0.0001999999367840578,
|
||
|
|
"loss": 1.7479,
|
||
|
|
"step": 32
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0003690986164394287,
|
||
|
|
"grad_norm": 0.12182258069515228,
|
||
|
|
"learning_rate": 0.00019999993277132754,
|
||
|
|
"loss": 1.7391,
|
||
|
|
"step": 33
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00038028342299819927,
|
||
|
|
"grad_norm": 0.3160305917263031,
|
||
|
|
"learning_rate": 0.00019999992863512872,
|
||
|
|
"loss": 1.7231,
|
||
|
|
"step": 34
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0003914682295569698,
|
||
|
|
"grad_norm": 0.18215563893318176,
|
||
|
|
"learning_rate": 0.00019999992437546134,
|
||
|
|
"loss": 1.7067,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0004026530361157404,
|
||
|
|
"grad_norm": 0.24103401601314545,
|
||
|
|
"learning_rate": 0.0001999999199923254,
|
||
|
|
"loss": 1.6877,
|
||
|
|
"step": 36
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00041383784267451097,
|
||
|
|
"grad_norm": 0.17353500425815582,
|
||
|
|
"learning_rate": 0.0001999999154857209,
|
||
|
|
"loss": 1.6746,
|
||
|
|
"step": 37
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0004250226492332815,
|
||
|
|
"grad_norm": 0.19149154424667358,
|
||
|
|
"learning_rate": 0.00019999991085564784,
|
||
|
|
"loss": 1.6734,
|
||
|
|
"step": 38
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0004362074557920521,
|
||
|
|
"grad_norm": 0.15810631215572357,
|
||
|
|
"learning_rate": 0.0001999999061021063,
|
||
|
|
"loss": 1.6773,
|
||
|
|
"step": 39
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00044739226235082267,
|
||
|
|
"grad_norm": 0.14770038425922394,
|
||
|
|
"learning_rate": 0.00019999990122509614,
|
||
|
|
"loss": 1.6967,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0004585770689095932,
|
||
|
|
"grad_norm": 0.15101520717144012,
|
||
|
|
"learning_rate": 0.0001999998962246175,
|
||
|
|
"loss": 1.6816,
|
||
|
|
"step": 41
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0004697618754683638,
|
||
|
|
"grad_norm": 0.1085171177983284,
|
||
|
|
"learning_rate": 0.00019999989110067035,
|
||
|
|
"loss": 1.6875,
|
||
|
|
"step": 42
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00048094668202713437,
|
||
|
|
"grad_norm": 0.13066136837005615,
|
||
|
|
"learning_rate": 0.00019999988585325468,
|
||
|
|
"loss": 1.6768,
|
||
|
|
"step": 43
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.000492131488585905,
|
||
|
|
"grad_norm": 0.09783171862363815,
|
||
|
|
"learning_rate": 0.0001999998804823705,
|
||
|
|
"loss": 1.6559,
|
||
|
|
"step": 44
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0005033162951446755,
|
||
|
|
"grad_norm": 0.15001484751701355,
|
||
|
|
"learning_rate": 0.00019999987498801777,
|
||
|
|
"loss": 1.6662,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.000514501101703446,
|
||
|
|
"grad_norm": 0.06484243273735046,
|
||
|
|
"learning_rate": 0.0001999998693701966,
|
||
|
|
"loss": 1.6528,
|
||
|
|
"step": 46
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0005256859082622166,
|
||
|
|
"grad_norm": 0.12908180058002472,
|
||
|
|
"learning_rate": 0.00019999986362890693,
|
||
|
|
"loss": 1.67,
|
||
|
|
"step": 47
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0005368707148209872,
|
||
|
|
"grad_norm": 0.05700545758008957,
|
||
|
|
"learning_rate": 0.00019999985776414877,
|
||
|
|
"loss": 1.6643,
|
||
|
|
"step": 48
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0005480555213797577,
|
||
|
|
"grad_norm": 0.10586538165807724,
|
||
|
|
"learning_rate": 0.00019999985177592211,
|
||
|
|
"loss": 1.6725,
|
||
|
|
"step": 49
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0005592403279385283,
|
||
|
|
"grad_norm": 0.05411362275481224,
|
||
|
|
"learning_rate": 0.00019999984566422703,
|
||
|
|
"loss": 1.6495,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0005704251344972989,
|
||
|
|
"grad_norm": 0.08841974288225174,
|
||
|
|
"learning_rate": 0.00019999983942906347,
|
||
|
|
"loss": 1.6397,
|
||
|
|
"step": 51
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0005816099410560694,
|
||
|
|
"grad_norm": 0.049202822148799896,
|
||
|
|
"learning_rate": 0.00019999983307043145,
|
||
|
|
"loss": 1.6601,
|
||
|
|
"step": 52
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00059279474761484,
|
||
|
|
"grad_norm": 0.06537283957004547,
|
||
|
|
"learning_rate": 0.00019999982658833098,
|
||
|
|
"loss": 1.6405,
|
||
|
|
"step": 53
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0006039795541736106,
|
||
|
|
"grad_norm": 0.04358899965882301,
|
||
|
|
"learning_rate": 0.0001999998199827621,
|
||
|
|
"loss": 1.6586,
|
||
|
|
"step": 54
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0006151643607323811,
|
||
|
|
"grad_norm": 0.05924156308174133,
|
||
|
|
"learning_rate": 0.0001999998132537248,
|
||
|
|
"loss": 1.6609,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0006263491672911517,
|
||
|
|
"grad_norm": 0.047364529222249985,
|
||
|
|
"learning_rate": 0.00019999980640121904,
|
||
|
|
"loss": 1.6561,
|
||
|
|
"step": 56
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0006375339738499223,
|
||
|
|
"grad_norm": 0.05860909819602966,
|
||
|
|
"learning_rate": 0.00019999979942524488,
|
||
|
|
"loss": 1.644,
|
||
|
|
"step": 57
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0006487187804086928,
|
||
|
|
"grad_norm": 0.058639075607061386,
|
||
|
|
"learning_rate": 0.00019999979232580235,
|
||
|
|
"loss": 1.6582,
|
||
|
|
"step": 58
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0006599035869674634,
|
||
|
|
"grad_norm": 0.049288246780633926,
|
||
|
|
"learning_rate": 0.00019999978510289138,
|
||
|
|
"loss": 1.6828,
|
||
|
|
"step": 59
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.000671088393526234,
|
||
|
|
"grad_norm": 0.06293074041604996,
|
||
|
|
"learning_rate": 0.00019999977775651207,
|
||
|
|
"loss": 1.6852,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0006822732000850045,
|
||
|
|
"grad_norm": 0.03848947212100029,
|
||
|
|
"learning_rate": 0.00019999977028666436,
|
||
|
|
"loss": 1.6792,
|
||
|
|
"step": 61
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0006934580066437751,
|
||
|
|
"grad_norm": 0.1217992901802063,
|
||
|
|
"learning_rate": 0.00019999976269334828,
|
||
|
|
"loss": 1.7142,
|
||
|
|
"step": 62
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0007046428132025457,
|
||
|
|
"grad_norm": 0.04006423428654671,
|
||
|
|
"learning_rate": 0.00019999975497656384,
|
||
|
|
"loss": 1.7108,
|
||
|
|
"step": 63
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0007158276197613162,
|
||
|
|
"grad_norm": 0.057927753776311874,
|
||
|
|
"learning_rate": 0.0001999997471363111,
|
||
|
|
"loss": 1.6902,
|
||
|
|
"step": 64
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0007270124263200868,
|
||
|
|
"grad_norm": 0.04537767171859741,
|
||
|
|
"learning_rate": 0.00019999973917258997,
|
||
|
|
"loss": 1.6627,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0007381972328788574,
|
||
|
|
"grad_norm": 0.057307057082653046,
|
||
|
|
"learning_rate": 0.00019999973108540052,
|
||
|
|
"loss": 1.6561,
|
||
|
|
"step": 66
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0007493820394376279,
|
||
|
|
"grad_norm": 0.044002167880535126,
|
||
|
|
"learning_rate": 0.00019999972287474272,
|
||
|
|
"loss": 1.6595,
|
||
|
|
"step": 67
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0007605668459963985,
|
||
|
|
"grad_norm": 0.03991294279694557,
|
||
|
|
"learning_rate": 0.00019999971454061666,
|
||
|
|
"loss": 1.6563,
|
||
|
|
"step": 68
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0007717516525551691,
|
||
|
|
"grad_norm": 0.044279683381319046,
|
||
|
|
"learning_rate": 0.0001999997060830223,
|
||
|
|
"loss": 1.6352,
|
||
|
|
"step": 69
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0007829364591139396,
|
||
|
|
"grad_norm": 0.04319776967167854,
|
||
|
|
"learning_rate": 0.00019999969750195967,
|
||
|
|
"loss": 1.6319,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0007941212656727102,
|
||
|
|
"grad_norm": 0.04436005651950836,
|
||
|
|
"learning_rate": 0.00019999968879742873,
|
||
|
|
"loss": 1.6556,
|
||
|
|
"step": 71
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0008053060722314808,
|
||
|
|
"grad_norm": 0.0416998416185379,
|
||
|
|
"learning_rate": 0.00019999967996942952,
|
||
|
|
"loss": 1.6646,
|
||
|
|
"step": 72
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0008164908787902513,
|
||
|
|
"grad_norm": 0.03601493313908577,
|
||
|
|
"learning_rate": 0.00019999967101796208,
|
||
|
|
"loss": 1.6605,
|
||
|
|
"step": 73
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0008276756853490219,
|
||
|
|
"grad_norm": 0.03957024961709976,
|
||
|
|
"learning_rate": 0.00019999966194302637,
|
||
|
|
"loss": 1.6714,
|
||
|
|
"step": 74
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0008388604919077925,
|
||
|
|
"grad_norm": 0.04401829466223717,
|
||
|
|
"learning_rate": 0.00019999965274462245,
|
||
|
|
"loss": 1.6497,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.000850045298466563,
|
||
|
|
"grad_norm": 0.04251580312848091,
|
||
|
|
"learning_rate": 0.0001999996434227503,
|
||
|
|
"loss": 1.6567,
|
||
|
|
"step": 76
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0008612301050253336,
|
||
|
|
"grad_norm": 0.038915786892175674,
|
||
|
|
"learning_rate": 0.00019999963397740995,
|
||
|
|
"loss": 1.6515,
|
||
|
|
"step": 77
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0008724149115841042,
|
||
|
|
"grad_norm": 0.04063812270760536,
|
||
|
|
"learning_rate": 0.00019999962440860137,
|
||
|
|
"loss": 1.6764,
|
||
|
|
"step": 78
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0008835997181428747,
|
||
|
|
"grad_norm": 0.04114954546093941,
|
||
|
|
"learning_rate": 0.00019999961471632463,
|
||
|
|
"loss": 1.6553,
|
||
|
|
"step": 79
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0008947845247016453,
|
||
|
|
"grad_norm": 0.039563097059726715,
|
||
|
|
"learning_rate": 0.0001999996049005797,
|
||
|
|
"loss": 1.6595,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0009059693312604159,
|
||
|
|
"grad_norm": 0.03477632254362106,
|
||
|
|
"learning_rate": 0.00019999959496136663,
|
||
|
|
"loss": 1.6612,
|
||
|
|
"step": 81
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0009171541378191864,
|
||
|
|
"grad_norm": 0.04104992002248764,
|
||
|
|
"learning_rate": 0.0001999995848986854,
|
||
|
|
"loss": 1.6476,
|
||
|
|
"step": 82
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.000928338944377957,
|
||
|
|
"grad_norm": 0.03909602388739586,
|
||
|
|
"learning_rate": 0.00019999957471253602,
|
||
|
|
"loss": 1.6445,
|
||
|
|
"step": 83
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0009395237509367276,
|
||
|
|
"grad_norm": 0.039677415043115616,
|
||
|
|
"learning_rate": 0.00019999956440291855,
|
||
|
|
"loss": 1.6368,
|
||
|
|
"step": 84
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0009507085574954981,
|
||
|
|
"grad_norm": 0.03493763506412506,
|
||
|
|
"learning_rate": 0.00019999955396983292,
|
||
|
|
"loss": 1.6343,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0009618933640542687,
|
||
|
|
"grad_norm": 0.03530074283480644,
|
||
|
|
"learning_rate": 0.0001999995434132792,
|
||
|
|
"loss": 1.6843,
|
||
|
|
"step": 86
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0009730781706130393,
|
||
|
|
"grad_norm": 0.037156637758016586,
|
||
|
|
"learning_rate": 0.00019999953273325743,
|
||
|
|
"loss": 1.6721,
|
||
|
|
"step": 87
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00098426297717181,
|
||
|
|
"grad_norm": 0.04006032645702362,
|
||
|
|
"learning_rate": 0.00019999952192976755,
|
||
|
|
"loss": 1.6724,
|
||
|
|
"step": 88
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0009954477837305804,
|
||
|
|
"grad_norm": 0.03743763267993927,
|
||
|
|
"learning_rate": 0.0001999995110028096,
|
||
|
|
"loss": 1.6613,
|
||
|
|
"step": 89
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001006632590289351,
|
||
|
|
"grad_norm": 0.04100384563207626,
|
||
|
|
"learning_rate": 0.00019999949995238369,
|
||
|
|
"loss": 1.6444,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0010178173968481215,
|
||
|
|
"grad_norm": 0.03799253702163696,
|
||
|
|
"learning_rate": 0.00019999948877848965,
|
||
|
|
"loss": 1.6641,
|
||
|
|
"step": 91
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001029002203406892,
|
||
|
|
"grad_norm": 0.040163811296224594,
|
||
|
|
"learning_rate": 0.00019999947748112763,
|
||
|
|
"loss": 1.6411,
|
||
|
|
"step": 92
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0010401870099656626,
|
||
|
|
"grad_norm": 0.03576591610908508,
|
||
|
|
"learning_rate": 0.0001999994660602976,
|
||
|
|
"loss": 1.6378,
|
||
|
|
"step": 93
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0010513718165244333,
|
||
|
|
"grad_norm": 0.03735070303082466,
|
||
|
|
"learning_rate": 0.00019999945451599957,
|
||
|
|
"loss": 1.644,
|
||
|
|
"step": 94
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0010625566230832038,
|
||
|
|
"grad_norm": 0.04157353192567825,
|
||
|
|
"learning_rate": 0.00019999944284823358,
|
||
|
|
"loss": 1.6532,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0010737414296419744,
|
||
|
|
"grad_norm": 0.046478450298309326,
|
||
|
|
"learning_rate": 0.0001999994310569996,
|
||
|
|
"loss": 1.6744,
|
||
|
|
"step": 96
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0010849262362007449,
|
||
|
|
"grad_norm": 0.04813043400645256,
|
||
|
|
"learning_rate": 0.0001999994191422977,
|
||
|
|
"loss": 1.6716,
|
||
|
|
"step": 97
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0010961110427595154,
|
||
|
|
"grad_norm": 0.03780042380094528,
|
||
|
|
"learning_rate": 0.00019999940710412788,
|
||
|
|
"loss": 1.6502,
|
||
|
|
"step": 98
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001107295849318286,
|
||
|
|
"grad_norm": 0.03811471536755562,
|
||
|
|
"learning_rate": 0.0001999993949424901,
|
||
|
|
"loss": 1.6405,
|
||
|
|
"step": 99
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0011184806558770567,
|
||
|
|
"grad_norm": 0.03838631138205528,
|
||
|
|
"learning_rate": 0.00019999938265738445,
|
||
|
|
"loss": 1.6202,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0011296654624358272,
|
||
|
|
"grad_norm": 0.04033266752958298,
|
||
|
|
"learning_rate": 0.0001999993702488109,
|
||
|
|
"loss": 1.6305,
|
||
|
|
"step": 101
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0011408502689945978,
|
||
|
|
"grad_norm": 0.038872625678777695,
|
||
|
|
"learning_rate": 0.0001999993577167695,
|
||
|
|
"loss": 1.6243,
|
||
|
|
"step": 102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0011520350755533683,
|
||
|
|
"grad_norm": 0.047068770974874496,
|
||
|
|
"learning_rate": 0.0001999993450612602,
|
||
|
|
"loss": 1.6126,
|
||
|
|
"step": 103
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0011632198821121388,
|
||
|
|
"grad_norm": 0.038774486631155014,
|
||
|
|
"learning_rate": 0.0001999993322822831,
|
||
|
|
"loss": 1.6125,
|
||
|
|
"step": 104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0011744046886709093,
|
||
|
|
"grad_norm": 0.046706534922122955,
|
||
|
|
"learning_rate": 0.00019999931937983814,
|
||
|
|
"loss": 1.6062,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00118558949522968,
|
||
|
|
"grad_norm": 0.038454532623291016,
|
||
|
|
"learning_rate": 0.00019999930635392538,
|
||
|
|
"loss": 1.6145,
|
||
|
|
"step": 106
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0011967743017884506,
|
||
|
|
"grad_norm": 0.042938027530908585,
|
||
|
|
"learning_rate": 0.0001999992932045448,
|
||
|
|
"loss": 1.6064,
|
||
|
|
"step": 107
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0012079591083472212,
|
||
|
|
"grad_norm": 0.03728878125548363,
|
||
|
|
"learning_rate": 0.00019999927993169652,
|
||
|
|
"loss": 1.6203,
|
||
|
|
"step": 108
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0012191439149059917,
|
||
|
|
"grad_norm": 0.03831510245800018,
|
||
|
|
"learning_rate": 0.00019999926653538043,
|
||
|
|
"loss": 1.6214,
|
||
|
|
"step": 109
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0012303287214647622,
|
||
|
|
"grad_norm": 0.04032002389431,
|
||
|
|
"learning_rate": 0.00019999925301559659,
|
||
|
|
"loss": 1.6116,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0012415135280235327,
|
||
|
|
"grad_norm": 0.04648670554161072,
|
||
|
|
"learning_rate": 0.00019999923937234505,
|
||
|
|
"loss": 1.6228,
|
||
|
|
"step": 111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0012526983345823035,
|
||
|
|
"grad_norm": 0.03572435304522514,
|
||
|
|
"learning_rate": 0.00019999922560562575,
|
||
|
|
"loss": 1.6382,
|
||
|
|
"step": 112
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001263883141141074,
|
||
|
|
"grad_norm": 0.03520497307181358,
|
||
|
|
"learning_rate": 0.0001999992117154388,
|
||
|
|
"loss": 1.642,
|
||
|
|
"step": 113
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0012750679476998446,
|
||
|
|
"grad_norm": 0.037833768874406815,
|
||
|
|
"learning_rate": 0.00019999919770178414,
|
||
|
|
"loss": 1.6809,
|
||
|
|
"step": 114
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001286252754258615,
|
||
|
|
"grad_norm": 0.04116043448448181,
|
||
|
|
"learning_rate": 0.00019999918356466186,
|
||
|
|
"loss": 1.6669,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0012974375608173856,
|
||
|
|
"grad_norm": 0.03686891868710518,
|
||
|
|
"learning_rate": 0.00019999916930407192,
|
||
|
|
"loss": 1.6336,
|
||
|
|
"step": 116
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0013086223673761561,
|
||
|
|
"grad_norm": 0.04540928825736046,
|
||
|
|
"learning_rate": 0.00019999915492001434,
|
||
|
|
"loss": 1.636,
|
||
|
|
"step": 117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0013198071739349269,
|
||
|
|
"grad_norm": 0.03894014656543732,
|
||
|
|
"learning_rate": 0.00019999914041248917,
|
||
|
|
"loss": 1.642,
|
||
|
|
"step": 118
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0013309919804936974,
|
||
|
|
"grad_norm": 0.03868821635842323,
|
||
|
|
"learning_rate": 0.0001999991257814964,
|
||
|
|
"loss": 1.6459,
|
||
|
|
"step": 119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001342176787052468,
|
||
|
|
"grad_norm": 0.05047065392136574,
|
||
|
|
"learning_rate": 0.00019999911102703606,
|
||
|
|
"loss": 1.6566,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0013533615936112385,
|
||
|
|
"grad_norm": 0.037026163190603256,
|
||
|
|
"learning_rate": 0.0001999990961491082,
|
||
|
|
"loss": 1.644,
|
||
|
|
"step": 121
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001364546400170009,
|
||
|
|
"grad_norm": 0.05025867745280266,
|
||
|
|
"learning_rate": 0.00019999908114771278,
|
||
|
|
"loss": 1.6375,
|
||
|
|
"step": 122
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0013757312067287795,
|
||
|
|
"grad_norm": 0.05618799850344658,
|
||
|
|
"learning_rate": 0.0001999990660228498,
|
||
|
|
"loss": 1.6414,
|
||
|
|
"step": 123
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0013869160132875503,
|
||
|
|
"grad_norm": 0.04012198746204376,
|
||
|
|
"learning_rate": 0.0001999990507745194,
|
||
|
|
"loss": 1.6338,
|
||
|
|
"step": 124
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0013981008198463208,
|
||
|
|
"grad_norm": 0.03692522644996643,
|
||
|
|
"learning_rate": 0.00019999903540272147,
|
||
|
|
"loss": 1.6338,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0014092856264050913,
|
||
|
|
"grad_norm": 0.04653295874595642,
|
||
|
|
"learning_rate": 0.0001999990199074561,
|
||
|
|
"loss": 1.6242,
|
||
|
|
"step": 126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0014204704329638619,
|
||
|
|
"grad_norm": 0.038410138338804245,
|
||
|
|
"learning_rate": 0.0001999990042887233,
|
||
|
|
"loss": 1.6535,
|
||
|
|
"step": 127
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0014316552395226324,
|
||
|
|
"grad_norm": 0.03855321556329727,
|
||
|
|
"learning_rate": 0.00019999898854652307,
|
||
|
|
"loss": 1.6622,
|
||
|
|
"step": 128
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001442840046081403,
|
||
|
|
"grad_norm": 0.039161138236522675,
|
||
|
|
"learning_rate": 0.00019999897268085543,
|
||
|
|
"loss": 1.6596,
|
||
|
|
"step": 129
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0014540248526401737,
|
||
|
|
"grad_norm": 0.04229150339961052,
|
||
|
|
"learning_rate": 0.00019999895669172042,
|
||
|
|
"loss": 1.6044,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0014652096591989442,
|
||
|
|
"grad_norm": 0.04695671424269676,
|
||
|
|
"learning_rate": 0.00019999894057911804,
|
||
|
|
"loss": 1.593,
|
||
|
|
"step": 131
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0014763944657577147,
|
||
|
|
"grad_norm": 0.05007043853402138,
|
||
|
|
"learning_rate": 0.00019999892434304832,
|
||
|
|
"loss": 1.6371,
|
||
|
|
"step": 132
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0014875792723164853,
|
||
|
|
"grad_norm": 0.04140834882855415,
|
||
|
|
"learning_rate": 0.00019999890798351127,
|
||
|
|
"loss": 1.6333,
|
||
|
|
"step": 133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0014987640788752558,
|
||
|
|
"grad_norm": 0.03771176561713219,
|
||
|
|
"learning_rate": 0.0001999988915005069,
|
||
|
|
"loss": 1.6253,
|
||
|
|
"step": 134
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0015099488854340263,
|
||
|
|
"grad_norm": 0.03835882246494293,
|
||
|
|
"learning_rate": 0.00019999887489403532,
|
||
|
|
"loss": 1.6096,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001521133691992797,
|
||
|
|
"grad_norm": 0.042071614414453506,
|
||
|
|
"learning_rate": 0.00019999885816409643,
|
||
|
|
"loss": 1.6158,
|
||
|
|
"step": 136
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0015323184985515676,
|
||
|
|
"grad_norm": 0.04736172780394554,
|
||
|
|
"learning_rate": 0.0001999988413106903,
|
||
|
|
"loss": 1.6076,
|
||
|
|
"step": 137
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0015435033051103381,
|
||
|
|
"grad_norm": 0.045473724603652954,
|
||
|
|
"learning_rate": 0.00019999882433381695,
|
||
|
|
"loss": 1.6002,
|
||
|
|
"step": 138
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0015546881116691087,
|
||
|
|
"grad_norm": 0.038239121437072754,
|
||
|
|
"learning_rate": 0.0001999988072334764,
|
||
|
|
"loss": 1.6093,
|
||
|
|
"step": 139
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0015658729182278792,
|
||
|
|
"grad_norm": 0.03806670382618904,
|
||
|
|
"learning_rate": 0.0001999987900096687,
|
||
|
|
"loss": 1.5958,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0015770577247866497,
|
||
|
|
"grad_norm": 0.034363262355327606,
|
||
|
|
"learning_rate": 0.00019999877266239382,
|
||
|
|
"loss": 1.6182,
|
||
|
|
"step": 141
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0015882425313454205,
|
||
|
|
"grad_norm": 0.03781922906637192,
|
||
|
|
"learning_rate": 0.0001999987551916518,
|
||
|
|
"loss": 1.6041,
|
||
|
|
"step": 142
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001599427337904191,
|
||
|
|
"grad_norm": 0.037946417927742004,
|
||
|
|
"learning_rate": 0.00019999873759744268,
|
||
|
|
"loss": 1.6154,
|
||
|
|
"step": 143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0016106121444629615,
|
||
|
|
"grad_norm": 0.03221859410405159,
|
||
|
|
"learning_rate": 0.00019999871987976645,
|
||
|
|
"loss": 1.6205,
|
||
|
|
"step": 144
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001621796951021732,
|
||
|
|
"grad_norm": 0.034399550408124924,
|
||
|
|
"learning_rate": 0.00019999870203862318,
|
||
|
|
"loss": 1.6077,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0016329817575805026,
|
||
|
|
"grad_norm": 0.037965498864650726,
|
||
|
|
"learning_rate": 0.00019999868407401285,
|
||
|
|
"loss": 1.6226,
|
||
|
|
"step": 146
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0016441665641392731,
|
||
|
|
"grad_norm": 0.034158267080783844,
|
||
|
|
"learning_rate": 0.00019999866598593549,
|
||
|
|
"loss": 1.6364,
|
||
|
|
"step": 147
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0016553513706980439,
|
||
|
|
"grad_norm": 0.035179782658815384,
|
||
|
|
"learning_rate": 0.00019999864777439113,
|
||
|
|
"loss": 1.6015,
|
||
|
|
"step": 148
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0016665361772568144,
|
||
|
|
"grad_norm": 0.07380399852991104,
|
||
|
|
"learning_rate": 0.00019999862943937977,
|
||
|
|
"loss": 1.6036,
|
||
|
|
"step": 149
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001677720983815585,
|
||
|
|
"grad_norm": 0.055961351841688156,
|
||
|
|
"learning_rate": 0.00019999861098090146,
|
||
|
|
"loss": 1.6209,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0016889057903743555,
|
||
|
|
"grad_norm": 0.06556452065706253,
|
||
|
|
"learning_rate": 0.00019999859239895623,
|
||
|
|
"loss": 1.6179,
|
||
|
|
"step": 151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001700090596933126,
|
||
|
|
"grad_norm": 0.0406390018761158,
|
||
|
|
"learning_rate": 0.0001999985736935441,
|
||
|
|
"loss": 1.6251,
|
||
|
|
"step": 152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0017112754034918965,
|
||
|
|
"grad_norm": 0.04337646812200546,
|
||
|
|
"learning_rate": 0.00019999855486466504,
|
||
|
|
"loss": 1.6229,
|
||
|
|
"step": 153
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0017224602100506673,
|
||
|
|
"grad_norm": 0.03773214668035507,
|
||
|
|
"learning_rate": 0.00019999853591231914,
|
||
|
|
"loss": 1.6163,
|
||
|
|
"step": 154
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0017336450166094378,
|
||
|
|
"grad_norm": 0.04170192405581474,
|
||
|
|
"learning_rate": 0.0001999985168365064,
|
||
|
|
"loss": 1.6187,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0017448298231682083,
|
||
|
|
"grad_norm": 0.046784352511167526,
|
||
|
|
"learning_rate": 0.00019999849763722684,
|
||
|
|
"loss": 1.6313,
|
||
|
|
"step": 156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0017560146297269789,
|
||
|
|
"grad_norm": 0.04641957953572273,
|
||
|
|
"learning_rate": 0.00019999847831448048,
|
||
|
|
"loss": 1.6353,
|
||
|
|
"step": 157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0017671994362857494,
|
||
|
|
"grad_norm": 0.04125691205263138,
|
||
|
|
"learning_rate": 0.00019999845886826736,
|
||
|
|
"loss": 1.6136,
|
||
|
|
"step": 158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00177838424284452,
|
||
|
|
"grad_norm": 0.03794560208916664,
|
||
|
|
"learning_rate": 0.00019999843929858748,
|
||
|
|
"loss": 1.6124,
|
||
|
|
"step": 159
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0017895690494032907,
|
||
|
|
"grad_norm": 0.04494895413517952,
|
||
|
|
"learning_rate": 0.00019999841960544087,
|
||
|
|
"loss": 1.6038,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0018007538559620612,
|
||
|
|
"grad_norm": 0.0605354905128479,
|
||
|
|
"learning_rate": 0.00019999839978882756,
|
||
|
|
"loss": 1.6129,
|
||
|
|
"step": 161
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0018119386625208317,
|
||
|
|
"grad_norm": 0.03629598766565323,
|
||
|
|
"learning_rate": 0.0001999983798487476,
|
||
|
|
"loss": 1.6136,
|
||
|
|
"step": 162
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0018231234690796023,
|
||
|
|
"grad_norm": 0.051288772374391556,
|
||
|
|
"learning_rate": 0.00019999835978520099,
|
||
|
|
"loss": 1.6121,
|
||
|
|
"step": 163
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0018343082756383728,
|
||
|
|
"grad_norm": 0.0646372139453888,
|
||
|
|
"learning_rate": 0.00019999833959818774,
|
||
|
|
"loss": 1.6097,
|
||
|
|
"step": 164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0018454930821971433,
|
||
|
|
"grad_norm": 0.05430466681718826,
|
||
|
|
"learning_rate": 0.00019999831928770788,
|
||
|
|
"loss": 1.6336,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001856677888755914,
|
||
|
|
"grad_norm": 0.04446660354733467,
|
||
|
|
"learning_rate": 0.00019999829885376146,
|
||
|
|
"loss": 1.6446,
|
||
|
|
"step": 166
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0018678626953146846,
|
||
|
|
"grad_norm": 0.05089535191655159,
|
||
|
|
"learning_rate": 0.0001999982782963485,
|
||
|
|
"loss": 1.6295,
|
||
|
|
"step": 167
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0018790475018734551,
|
||
|
|
"grad_norm": 0.04738520458340645,
|
||
|
|
"learning_rate": 0.000199998257615469,
|
||
|
|
"loss": 1.6129,
|
||
|
|
"step": 168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0018902323084322257,
|
||
|
|
"grad_norm": 0.06705950200557709,
|
||
|
|
"learning_rate": 0.00019999823681112296,
|
||
|
|
"loss": 1.6091,
|
||
|
|
"step": 169
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0019014171149909962,
|
||
|
|
"grad_norm": 0.08080725371837616,
|
||
|
|
"learning_rate": 0.0001999982158833105,
|
||
|
|
"loss": 1.6041,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0019126019215497667,
|
||
|
|
"grad_norm": 0.08998562395572662,
|
||
|
|
"learning_rate": 0.00019999819483203162,
|
||
|
|
"loss": 1.61,
|
||
|
|
"step": 171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0019237867281085375,
|
||
|
|
"grad_norm": 0.0561324767768383,
|
||
|
|
"learning_rate": 0.00019999817365728626,
|
||
|
|
"loss": 1.6019,
|
||
|
|
"step": 172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001934971534667308,
|
||
|
|
"grad_norm": 0.06384976953268051,
|
||
|
|
"learning_rate": 0.00019999815235907453,
|
||
|
|
"loss": 1.6045,
|
||
|
|
"step": 173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0019461563412260785,
|
||
|
|
"grad_norm": 0.08231502771377563,
|
||
|
|
"learning_rate": 0.00019999813093739643,
|
||
|
|
"loss": 1.5851,
|
||
|
|
"step": 174
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001957341147784849,
|
||
|
|
"grad_norm": 0.06544940173625946,
|
||
|
|
"learning_rate": 0.00019999810939225196,
|
||
|
|
"loss": 1.5902,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00196852595434362,
|
||
|
|
"grad_norm": 0.05647379904985428,
|
||
|
|
"learning_rate": 0.0001999980877236412,
|
||
|
|
"loss": 1.5972,
|
||
|
|
"step": 176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00197971076090239,
|
||
|
|
"grad_norm": 0.04889946058392525,
|
||
|
|
"learning_rate": 0.00019999806593156417,
|
||
|
|
"loss": 1.6039,
|
||
|
|
"step": 177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.001990895567461161,
|
||
|
|
"grad_norm": 0.04563042148947716,
|
||
|
|
"learning_rate": 0.00019999804401602087,
|
||
|
|
"loss": 1.6137,
|
||
|
|
"step": 178
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002002080374019931,
|
||
|
|
"grad_norm": 0.057878173887729645,
|
||
|
|
"learning_rate": 0.0001999980219770113,
|
||
|
|
"loss": 1.603,
|
||
|
|
"step": 179
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002013265180578702,
|
||
|
|
"grad_norm": 0.04965892806649208,
|
||
|
|
"learning_rate": 0.00019999799981453554,
|
||
|
|
"loss": 1.6016,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0020244499871374727,
|
||
|
|
"grad_norm": 0.042930275201797485,
|
||
|
|
"learning_rate": 0.00019999797752859362,
|
||
|
|
"loss": 1.5872,
|
||
|
|
"step": 181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002035634793696243,
|
||
|
|
"grad_norm": 0.07251135259866714,
|
||
|
|
"learning_rate": 0.00019999795511918553,
|
||
|
|
"loss": 1.5973,
|
||
|
|
"step": 182
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0020468196002550137,
|
||
|
|
"grad_norm": 0.053799793124198914,
|
||
|
|
"learning_rate": 0.00019999793258631133,
|
||
|
|
"loss": 1.6177,
|
||
|
|
"step": 183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002058004406813784,
|
||
|
|
"grad_norm": 0.05833510681986809,
|
||
|
|
"learning_rate": 0.00019999790992997101,
|
||
|
|
"loss": 1.5898,
|
||
|
|
"step": 184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002069189213372555,
|
||
|
|
"grad_norm": 0.06137058511376381,
|
||
|
|
"learning_rate": 0.0001999978871501646,
|
||
|
|
"loss": 1.5836,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002080374019931325,
|
||
|
|
"grad_norm": 0.04289720579981804,
|
||
|
|
"learning_rate": 0.0001999978642468922,
|
||
|
|
"loss": 1.5893,
|
||
|
|
"step": 186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002091558826490096,
|
||
|
|
"grad_norm": 0.054850902408361435,
|
||
|
|
"learning_rate": 0.00019999784122015375,
|
||
|
|
"loss": 1.5894,
|
||
|
|
"step": 187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0021027436330488666,
|
||
|
|
"grad_norm": 0.06448347866535187,
|
||
|
|
"learning_rate": 0.00019999781806994934,
|
||
|
|
"loss": 1.5862,
|
||
|
|
"step": 188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002113928439607637,
|
||
|
|
"grad_norm": 0.06109186261892319,
|
||
|
|
"learning_rate": 0.00019999779479627897,
|
||
|
|
"loss": 1.5951,
|
||
|
|
"step": 189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0021251132461664077,
|
||
|
|
"grad_norm": 0.05638093128800392,
|
||
|
|
"learning_rate": 0.00019999777139914263,
|
||
|
|
"loss": 1.5929,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002136298052725178,
|
||
|
|
"grad_norm": 0.05871524661779404,
|
||
|
|
"learning_rate": 0.00019999774787854047,
|
||
|
|
"loss": 1.5979,
|
||
|
|
"step": 191
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0021474828592839487,
|
||
|
|
"grad_norm": 0.05536816641688347,
|
||
|
|
"learning_rate": 0.00019999772423447238,
|
||
|
|
"loss": 1.598,
|
||
|
|
"step": 192
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0021586676658427195,
|
||
|
|
"grad_norm": 0.04985928162932396,
|
||
|
|
"learning_rate": 0.00019999770046693848,
|
||
|
|
"loss": 1.6067,
|
||
|
|
"step": 193
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0021698524724014898,
|
||
|
|
"grad_norm": 0.03956759348511696,
|
||
|
|
"learning_rate": 0.00019999767657593874,
|
||
|
|
"loss": 1.6101,
|
||
|
|
"step": 194
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0021810372789602605,
|
||
|
|
"grad_norm": 0.043052803725004196,
|
||
|
|
"learning_rate": 0.00019999765256147324,
|
||
|
|
"loss": 1.5925,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002192222085519031,
|
||
|
|
"grad_norm": 0.049073830246925354,
|
||
|
|
"learning_rate": 0.000199997628423542,
|
||
|
|
"loss": 1.5997,
|
||
|
|
"step": 196
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0022034068920778016,
|
||
|
|
"grad_norm": 0.0505862683057785,
|
||
|
|
"learning_rate": 0.00019999760416214503,
|
||
|
|
"loss": 1.5976,
|
||
|
|
"step": 197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002214591698636572,
|
||
|
|
"grad_norm": 0.04482674226164818,
|
||
|
|
"learning_rate": 0.00019999757977728235,
|
||
|
|
"loss": 1.5762,
|
||
|
|
"step": 198
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0022257765051953426,
|
||
|
|
"grad_norm": 0.03707127273082733,
|
||
|
|
"learning_rate": 0.00019999755526895402,
|
||
|
|
"loss": 1.5805,
|
||
|
|
"step": 199
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0022369613117541134,
|
||
|
|
"grad_norm": 0.041122857481241226,
|
||
|
|
"learning_rate": 0.00019999753063716002,
|
||
|
|
"loss": 1.6041,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0022481461183128837,
|
||
|
|
"grad_norm": 0.05945773050189018,
|
||
|
|
"learning_rate": 0.00019999750588190046,
|
||
|
|
"loss": 1.6068,
|
||
|
|
"step": 201
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0022593309248716545,
|
||
|
|
"grad_norm": 0.058612506836652756,
|
||
|
|
"learning_rate": 0.00019999748100317532,
|
||
|
|
"loss": 1.6,
|
||
|
|
"step": 202
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0022705157314304248,
|
||
|
|
"grad_norm": 0.05361739546060562,
|
||
|
|
"learning_rate": 0.00019999745600098466,
|
||
|
|
"loss": 1.5891,
|
||
|
|
"step": 203
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0022817005379891955,
|
||
|
|
"grad_norm": 0.045548051595687866,
|
||
|
|
"learning_rate": 0.00019999743087532846,
|
||
|
|
"loss": 1.6161,
|
||
|
|
"step": 204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002292885344547966,
|
||
|
|
"grad_norm": 0.04524560272693634,
|
||
|
|
"learning_rate": 0.00019999740562620682,
|
||
|
|
"loss": 1.6092,
|
||
|
|
"step": 205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0023040701511067366,
|
||
|
|
"grad_norm": 0.04346180334687233,
|
||
|
|
"learning_rate": 0.0001999973802536197,
|
||
|
|
"loss": 1.6076,
|
||
|
|
"step": 206
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0023152549576655073,
|
||
|
|
"grad_norm": 0.047505974769592285,
|
||
|
|
"learning_rate": 0.00019999735475756717,
|
||
|
|
"loss": 1.5825,
|
||
|
|
"step": 207
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0023264397642242776,
|
||
|
|
"grad_norm": 0.03851678594946861,
|
||
|
|
"learning_rate": 0.00019999732913804927,
|
||
|
|
"loss": 1.5991,
|
||
|
|
"step": 208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0023376245707830484,
|
||
|
|
"grad_norm": 0.051913902163505554,
|
||
|
|
"learning_rate": 0.000199997303395066,
|
||
|
|
"loss": 1.6281,
|
||
|
|
"step": 209
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0023488093773418187,
|
||
|
|
"grad_norm": 0.06070960685610771,
|
||
|
|
"learning_rate": 0.0001999972775286174,
|
||
|
|
"loss": 1.641,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0023599941839005894,
|
||
|
|
"grad_norm": 0.13532494008541107,
|
||
|
|
"learning_rate": 0.00019999725153870354,
|
||
|
|
"loss": 1.6009,
|
||
|
|
"step": 211
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00237117899045936,
|
||
|
|
"grad_norm": 0.30072659254074097,
|
||
|
|
"learning_rate": 0.00019999722542532442,
|
||
|
|
"loss": 1.6163,
|
||
|
|
"step": 212
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0023823637970181305,
|
||
|
|
"grad_norm": 0.3511681854724884,
|
||
|
|
"learning_rate": 0.00019999719918848004,
|
||
|
|
"loss": 1.6465,
|
||
|
|
"step": 213
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0023935486035769012,
|
||
|
|
"grad_norm": 0.2487097680568695,
|
||
|
|
"learning_rate": 0.00019999717282817052,
|
||
|
|
"loss": 1.6367,
|
||
|
|
"step": 214
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0024047334101356716,
|
||
|
|
"grad_norm": 0.23582719266414642,
|
||
|
|
"learning_rate": 0.00019999714634439582,
|
||
|
|
"loss": 1.6202,
|
||
|
|
"step": 215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0024159182166944423,
|
||
|
|
"grad_norm": 0.17389710247516632,
|
||
|
|
"learning_rate": 0.000199997119737156,
|
||
|
|
"loss": 1.6308,
|
||
|
|
"step": 216
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0024271030232532126,
|
||
|
|
"grad_norm": 0.14822594821453094,
|
||
|
|
"learning_rate": 0.00019999709300645105,
|
||
|
|
"loss": 1.6175,
|
||
|
|
"step": 217
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0024382878298119834,
|
||
|
|
"grad_norm": 0.12898804247379303,
|
||
|
|
"learning_rate": 0.00019999706615228107,
|
||
|
|
"loss": 1.6069,
|
||
|
|
"step": 218
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002449472636370754,
|
||
|
|
"grad_norm": 0.16721111536026,
|
||
|
|
"learning_rate": 0.00019999703917464605,
|
||
|
|
"loss": 1.6206,
|
||
|
|
"step": 219
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0024606574429295244,
|
||
|
|
"grad_norm": 0.08022027462720871,
|
||
|
|
"learning_rate": 0.00019999701207354606,
|
||
|
|
"loss": 1.6082,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002471842249488295,
|
||
|
|
"grad_norm": 0.1281793862581253,
|
||
|
|
"learning_rate": 0.0001999969848489811,
|
||
|
|
"loss": 1.5928,
|
||
|
|
"step": 221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0024830270560470655,
|
||
|
|
"grad_norm": 0.10240975767374039,
|
||
|
|
"learning_rate": 0.00019999695750095117,
|
||
|
|
"loss": 1.6058,
|
||
|
|
"step": 222
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0024942118626058362,
|
||
|
|
"grad_norm": 0.08368588238954544,
|
||
|
|
"learning_rate": 0.00019999693002945642,
|
||
|
|
"loss": 1.6193,
|
||
|
|
"step": 223
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002505396669164607,
|
||
|
|
"grad_norm": 0.09998615831136703,
|
||
|
|
"learning_rate": 0.00019999690243449676,
|
||
|
|
"loss": 1.6096,
|
||
|
|
"step": 224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0025165814757233773,
|
||
|
|
"grad_norm": 0.08170673996210098,
|
||
|
|
"learning_rate": 0.00019999687471607228,
|
||
|
|
"loss": 1.5874,
|
||
|
|
"step": 225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002527766282282148,
|
||
|
|
"grad_norm": 0.09015469253063202,
|
||
|
|
"learning_rate": 0.000199996846874183,
|
||
|
|
"loss": 1.5903,
|
||
|
|
"step": 226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0025389510888409184,
|
||
|
|
"grad_norm": 0.07382986694574356,
|
||
|
|
"learning_rate": 0.000199996818908829,
|
||
|
|
"loss": 1.5958,
|
||
|
|
"step": 227
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002550135895399689,
|
||
|
|
"grad_norm": 0.06669515371322632,
|
||
|
|
"learning_rate": 0.00019999679082001023,
|
||
|
|
"loss": 1.5881,
|
||
|
|
"step": 228
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0025613207019584594,
|
||
|
|
"grad_norm": 0.0616774745285511,
|
||
|
|
"learning_rate": 0.0001999967626077268,
|
||
|
|
"loss": 1.6038,
|
||
|
|
"step": 229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00257250550851723,
|
||
|
|
"grad_norm": 0.05451146885752678,
|
||
|
|
"learning_rate": 0.00019999673427197872,
|
||
|
|
"loss": 1.6021,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002583690315076001,
|
||
|
|
"grad_norm": 0.05640149116516113,
|
||
|
|
"learning_rate": 0.000199996705812766,
|
||
|
|
"loss": 1.6022,
|
||
|
|
"step": 231
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0025948751216347712,
|
||
|
|
"grad_norm": 0.06033660098910332,
|
||
|
|
"learning_rate": 0.00019999667723008871,
|
||
|
|
"loss": 1.6002,
|
||
|
|
"step": 232
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002606059928193542,
|
||
|
|
"grad_norm": 0.07876270264387131,
|
||
|
|
"learning_rate": 0.0001999966485239469,
|
||
|
|
"loss": 1.6043,
|
||
|
|
"step": 233
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0026172447347523123,
|
||
|
|
"grad_norm": 0.0829700380563736,
|
||
|
|
"learning_rate": 0.00019999661969434055,
|
||
|
|
"loss": 1.5915,
|
||
|
|
"step": 234
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002628429541311083,
|
||
|
|
"grad_norm": 0.05654975026845932,
|
||
|
|
"learning_rate": 0.0001999965907412697,
|
||
|
|
"loss": 1.5893,
|
||
|
|
"step": 235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0026396143478698538,
|
||
|
|
"grad_norm": 0.06751634925603867,
|
||
|
|
"learning_rate": 0.00019999656166473444,
|
||
|
|
"loss": 1.5757,
|
||
|
|
"step": 236
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002650799154428624,
|
||
|
|
"grad_norm": 0.06081743538379669,
|
||
|
|
"learning_rate": 0.00019999653246473477,
|
||
|
|
"loss": 1.5702,
|
||
|
|
"step": 237
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002661983960987395,
|
||
|
|
"grad_norm": 0.0666998103260994,
|
||
|
|
"learning_rate": 0.00019999650314127075,
|
||
|
|
"loss": 1.568,
|
||
|
|
"step": 238
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002673168767546165,
|
||
|
|
"grad_norm": 0.04934430122375488,
|
||
|
|
"learning_rate": 0.00019999647369434235,
|
||
|
|
"loss": 1.6017,
|
||
|
|
"step": 239
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002684353574104936,
|
||
|
|
"grad_norm": 0.0574209988117218,
|
||
|
|
"learning_rate": 0.00019999644412394972,
|
||
|
|
"loss": 1.5935,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002695538380663706,
|
||
|
|
"grad_norm": 0.04870286583900452,
|
||
|
|
"learning_rate": 0.00019999641443009278,
|
||
|
|
"loss": 1.6035,
|
||
|
|
"step": 241
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002706723187222477,
|
||
|
|
"grad_norm": 0.04176439344882965,
|
||
|
|
"learning_rate": 0.00019999638461277162,
|
||
|
|
"loss": 1.598,
|
||
|
|
"step": 242
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0027179079937812477,
|
||
|
|
"grad_norm": 0.05534802004694939,
|
||
|
|
"learning_rate": 0.0001999963546719863,
|
||
|
|
"loss": 1.5945,
|
||
|
|
"step": 243
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002729092800340018,
|
||
|
|
"grad_norm": 0.04214160889387131,
|
||
|
|
"learning_rate": 0.0001999963246077368,
|
||
|
|
"loss": 1.6019,
|
||
|
|
"step": 244
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0027402776068987888,
|
||
|
|
"grad_norm": 0.04326852038502693,
|
||
|
|
"learning_rate": 0.00019999629442002322,
|
||
|
|
"loss": 1.604,
|
||
|
|
"step": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002751462413457559,
|
||
|
|
"grad_norm": 0.04295732453465462,
|
||
|
|
"learning_rate": 0.00019999626410884553,
|
||
|
|
"loss": 1.6136,
|
||
|
|
"step": 246
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00276264722001633,
|
||
|
|
"grad_norm": 0.038508690893650055,
|
||
|
|
"learning_rate": 0.00019999623367420385,
|
||
|
|
"loss": 1.5904,
|
||
|
|
"step": 247
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0027738320265751006,
|
||
|
|
"grad_norm": 0.040281713008880615,
|
||
|
|
"learning_rate": 0.0001999962031160981,
|
||
|
|
"loss": 1.5955,
|
||
|
|
"step": 248
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002785016833133871,
|
||
|
|
"grad_norm": 0.041424721479415894,
|
||
|
|
"learning_rate": 0.00019999617243452844,
|
||
|
|
"loss": 1.608,
|
||
|
|
"step": 249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0027962016396926416,
|
||
|
|
"grad_norm": 0.03804994374513626,
|
||
|
|
"learning_rate": 0.00019999614162949484,
|
||
|
|
"loss": 1.6125,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002807386446251412,
|
||
|
|
"grad_norm": 0.04370785504579544,
|
||
|
|
"learning_rate": 0.0001999961107009974,
|
||
|
|
"loss": 1.6125,
|
||
|
|
"step": 251
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0028185712528101827,
|
||
|
|
"grad_norm": 0.047021038830280304,
|
||
|
|
"learning_rate": 0.000199996079649036,
|
||
|
|
"loss": 1.5931,
|
||
|
|
"step": 252
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002829756059368953,
|
||
|
|
"grad_norm": 0.036128588020801544,
|
||
|
|
"learning_rate": 0.0001999960484736109,
|
||
|
|
"loss": 1.5707,
|
||
|
|
"step": 253
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0028409408659277238,
|
||
|
|
"grad_norm": 0.04315561056137085,
|
||
|
|
"learning_rate": 0.00019999601717472199,
|
||
|
|
"loss": 1.5902,
|
||
|
|
"step": 254
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0028521256724864945,
|
||
|
|
"grad_norm": 0.04395722597837448,
|
||
|
|
"learning_rate": 0.00019999598575236934,
|
||
|
|
"loss": 1.5995,
|
||
|
|
"step": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002863310479045265,
|
||
|
|
"grad_norm": 0.038929786533117294,
|
||
|
|
"learning_rate": 0.000199995954206553,
|
||
|
|
"loss": 1.5854,
|
||
|
|
"step": 256
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0028744952856040356,
|
||
|
|
"grad_norm": 0.041567280888557434,
|
||
|
|
"learning_rate": 0.00019999592253727299,
|
||
|
|
"loss": 1.5783,
|
||
|
|
"step": 257
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.002885680092162806,
|
||
|
|
"grad_norm": 0.03894374892115593,
|
||
|
|
"learning_rate": 0.0001999958907445294,
|
||
|
|
"loss": 1.5846,
|
||
|
|
"step": 258
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0028968648987215766,
|
||
|
|
"grad_norm": 0.04269428178668022,
|
||
|
|
"learning_rate": 0.00019999585882832222,
|
||
|
|
"loss": 1.6023,
|
||
|
|
"step": 259
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0029080497052803474,
|
||
|
|
"grad_norm": 0.04121831804513931,
|
||
|
|
"learning_rate": 0.00019999582678865147,
|
||
|
|
"loss": 1.6051,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0029192345118391177,
|
||
|
|
"grad_norm": 0.038076166063547134,
|
||
|
|
"learning_rate": 0.00019999579462551728,
|
||
|
|
"loss": 1.6136,
|
||
|
|
"step": 261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0029304193183978884,
|
||
|
|
"grad_norm": 0.042008642107248306,
|
||
|
|
"learning_rate": 0.0001999957623389196,
|
||
|
|
"loss": 1.6063,
|
||
|
|
"step": 262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0029416041249566587,
|
||
|
|
"grad_norm": 0.042438406497240067,
|
||
|
|
"learning_rate": 0.0001999957299288585,
|
||
|
|
"loss": 1.6033,
|
||
|
|
"step": 263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0029527889315154295,
|
||
|
|
"grad_norm": 0.041119206696748734,
|
||
|
|
"learning_rate": 0.000199995697395334,
|
||
|
|
"loss": 1.6031,
|
||
|
|
"step": 264
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0029639737380742,
|
||
|
|
"grad_norm": 0.045258279889822006,
|
||
|
|
"learning_rate": 0.00019999566473834622,
|
||
|
|
"loss": 1.5853,
|
||
|
|
"step": 265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0029751585446329705,
|
||
|
|
"grad_norm": 0.04734019562602043,
|
||
|
|
"learning_rate": 0.0001999956319578951,
|
||
|
|
"loss": 1.5921,
|
||
|
|
"step": 266
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0029863433511917413,
|
||
|
|
"grad_norm": 0.04389064759016037,
|
||
|
|
"learning_rate": 0.00019999559905398072,
|
||
|
|
"loss": 1.581,
|
||
|
|
"step": 267
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0029975281577505116,
|
||
|
|
"grad_norm": 0.04582642391324043,
|
||
|
|
"learning_rate": 0.00019999556602660318,
|
||
|
|
"loss": 1.5928,
|
||
|
|
"step": 268
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0030087129643092824,
|
||
|
|
"grad_norm": 0.04518941417336464,
|
||
|
|
"learning_rate": 0.00019999553287576238,
|
||
|
|
"loss": 1.5809,
|
||
|
|
"step": 269
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0030198977708680527,
|
||
|
|
"grad_norm": 0.04687381908297539,
|
||
|
|
"learning_rate": 0.0001999954996014585,
|
||
|
|
"loss": 1.5999,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0030310825774268234,
|
||
|
|
"grad_norm": 0.04809357225894928,
|
||
|
|
"learning_rate": 0.00019999546620369152,
|
||
|
|
"loss": 1.5972,
|
||
|
|
"step": 271
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003042267383985594,
|
||
|
|
"grad_norm": 0.05907173454761505,
|
||
|
|
"learning_rate": 0.0001999954326824615,
|
||
|
|
"loss": 1.5862,
|
||
|
|
"step": 272
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0030534521905443645,
|
||
|
|
"grad_norm": 0.06583942472934723,
|
||
|
|
"learning_rate": 0.00019999539903776842,
|
||
|
|
"loss": 1.5846,
|
||
|
|
"step": 273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0030646369971031352,
|
||
|
|
"grad_norm": 0.07557905465364456,
|
||
|
|
"learning_rate": 0.0001999953652696124,
|
||
|
|
"loss": 1.5962,
|
||
|
|
"step": 274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0030758218036619055,
|
||
|
|
"grad_norm": 0.07007817178964615,
|
||
|
|
"learning_rate": 0.00019999533137799347,
|
||
|
|
"loss": 1.5951,
|
||
|
|
"step": 275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0030870066102206763,
|
||
|
|
"grad_norm": 0.05711887776851654,
|
||
|
|
"learning_rate": 0.00019999529736291162,
|
||
|
|
"loss": 1.5932,
|
||
|
|
"step": 276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0030981914167794466,
|
||
|
|
"grad_norm": 0.04450292885303497,
|
||
|
|
"learning_rate": 0.00019999526322436696,
|
||
|
|
"loss": 1.6071,
|
||
|
|
"step": 277
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0031093762233382173,
|
||
|
|
"grad_norm": 0.04260997474193573,
|
||
|
|
"learning_rate": 0.00019999522896235947,
|
||
|
|
"loss": 1.6038,
|
||
|
|
"step": 278
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003120561029896988,
|
||
|
|
"grad_norm": 0.05689796805381775,
|
||
|
|
"learning_rate": 0.00019999519457688925,
|
||
|
|
"loss": 1.5688,
|
||
|
|
"step": 279
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0031317458364557584,
|
||
|
|
"grad_norm": 0.06330379098653793,
|
||
|
|
"learning_rate": 0.0001999951600679563,
|
||
|
|
"loss": 1.5617,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003142930643014529,
|
||
|
|
"grad_norm": 0.06195618584752083,
|
||
|
|
"learning_rate": 0.00019999512543556066,
|
||
|
|
"loss": 1.5602,
|
||
|
|
"step": 281
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0031541154495732995,
|
||
|
|
"grad_norm": 0.06677111238241196,
|
||
|
|
"learning_rate": 0.0001999950906797024,
|
||
|
|
"loss": 1.5659,
|
||
|
|
"step": 282
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00316530025613207,
|
||
|
|
"grad_norm": 0.05750421807169914,
|
||
|
|
"learning_rate": 0.00019999505580038153,
|
||
|
|
"loss": 1.5759,
|
||
|
|
"step": 283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003176485062690841,
|
||
|
|
"grad_norm": 0.04907039552927017,
|
||
|
|
"learning_rate": 0.00019999502079759817,
|
||
|
|
"loss": 1.5833,
|
||
|
|
"step": 284
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0031876698692496113,
|
||
|
|
"grad_norm": 0.048877034336328506,
|
||
|
|
"learning_rate": 0.00019999498567135223,
|
||
|
|
"loss": 1.5836,
|
||
|
|
"step": 285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003198854675808382,
|
||
|
|
"grad_norm": 0.05494236946105957,
|
||
|
|
"learning_rate": 0.0001999949504216439,
|
||
|
|
"loss": 1.5837,
|
||
|
|
"step": 286
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0032100394823671523,
|
||
|
|
"grad_norm": 0.04953937977552414,
|
||
|
|
"learning_rate": 0.00019999491504847313,
|
||
|
|
"loss": 1.5794,
|
||
|
|
"step": 287
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003221224288925923,
|
||
|
|
"grad_norm": 0.05240803211927414,
|
||
|
|
"learning_rate": 0.00019999487955184,
|
||
|
|
"loss": 1.5894,
|
||
|
|
"step": 288
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0032324090954846934,
|
||
|
|
"grad_norm": 0.05633338540792465,
|
||
|
|
"learning_rate": 0.0001999948439317445,
|
||
|
|
"loss": 1.5755,
|
||
|
|
"step": 289
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003243593902043464,
|
||
|
|
"grad_norm": 0.06563600897789001,
|
||
|
|
"learning_rate": 0.00019999480818818675,
|
||
|
|
"loss": 1.5912,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003254778708602235,
|
||
|
|
"grad_norm": 0.05903002619743347,
|
||
|
|
"learning_rate": 0.00019999477232116676,
|
||
|
|
"loss": 1.5856,
|
||
|
|
"step": 291
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003265963515161005,
|
||
|
|
"grad_norm": 0.03582334890961647,
|
||
|
|
"learning_rate": 0.00019999473633068457,
|
||
|
|
"loss": 1.6079,
|
||
|
|
"step": 292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003277148321719776,
|
||
|
|
"grad_norm": 0.05011364817619324,
|
||
|
|
"learning_rate": 0.00019999470021674025,
|
||
|
|
"loss": 1.5907,
|
||
|
|
"step": 293
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0032883331282785463,
|
||
|
|
"grad_norm": 0.0577118918299675,
|
||
|
|
"learning_rate": 0.0001999946639793338,
|
||
|
|
"loss": 1.5955,
|
||
|
|
"step": 294
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003299517934837317,
|
||
|
|
"grad_norm": 0.05170518904924393,
|
||
|
|
"learning_rate": 0.00019999462761846528,
|
||
|
|
"loss": 1.5884,
|
||
|
|
"step": 295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0033107027413960878,
|
||
|
|
"grad_norm": 0.05011725425720215,
|
||
|
|
"learning_rate": 0.00019999459113413475,
|
||
|
|
"loss": 1.6046,
|
||
|
|
"step": 296
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003321887547954858,
|
||
|
|
"grad_norm": 0.05645633116364479,
|
||
|
|
"learning_rate": 0.00019999455452634224,
|
||
|
|
"loss": 1.596,
|
||
|
|
"step": 297
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003333072354513629,
|
||
|
|
"grad_norm": 0.05705921724438667,
|
||
|
|
"learning_rate": 0.0001999945177950878,
|
||
|
|
"loss": 1.6136,
|
||
|
|
"step": 298
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003344257161072399,
|
||
|
|
"grad_norm": 0.05761184170842171,
|
||
|
|
"learning_rate": 0.0001999944809403715,
|
||
|
|
"loss": 1.6095,
|
||
|
|
"step": 299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00335544196763117,
|
||
|
|
"grad_norm": 0.0613851472735405,
|
||
|
|
"learning_rate": 0.00019999444396219337,
|
||
|
|
"loss": 1.6141,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00336662677418994,
|
||
|
|
"grad_norm": 0.06220489367842674,
|
||
|
|
"learning_rate": 0.00019999440686055344,
|
||
|
|
"loss": 1.5988,
|
||
|
|
"step": 301
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003377811580748711,
|
||
|
|
"grad_norm": 0.062393296509981155,
|
||
|
|
"learning_rate": 0.00019999436963545177,
|
||
|
|
"loss": 1.6075,
|
||
|
|
"step": 302
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0033889963873074817,
|
||
|
|
"grad_norm": 0.0625912994146347,
|
||
|
|
"learning_rate": 0.00019999433228688838,
|
||
|
|
"loss": 1.6102,
|
||
|
|
"step": 303
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003400181193866252,
|
||
|
|
"grad_norm": 0.06049802899360657,
|
||
|
|
"learning_rate": 0.00019999429481486335,
|
||
|
|
"loss": 1.5968,
|
||
|
|
"step": 304
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0034113660004250227,
|
||
|
|
"grad_norm": 0.05767315998673439,
|
||
|
|
"learning_rate": 0.00019999425721937674,
|
||
|
|
"loss": 1.5906,
|
||
|
|
"step": 305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003422550806983793,
|
||
|
|
"grad_norm": 0.049920015037059784,
|
||
|
|
"learning_rate": 0.00019999421950042854,
|
||
|
|
"loss": 1.5694,
|
||
|
|
"step": 306
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003433735613542564,
|
||
|
|
"grad_norm": 0.04852724075317383,
|
||
|
|
"learning_rate": 0.0001999941816580188,
|
||
|
|
"loss": 1.5609,
|
||
|
|
"step": 307
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0034449204201013345,
|
||
|
|
"grad_norm": 0.05249037966132164,
|
||
|
|
"learning_rate": 0.00019999414369214767,
|
||
|
|
"loss": 1.5675,
|
||
|
|
"step": 308
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003456105226660105,
|
||
|
|
"grad_norm": 0.05167357623577118,
|
||
|
|
"learning_rate": 0.00019999410560281506,
|
||
|
|
"loss": 1.5645,
|
||
|
|
"step": 309
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0034672900332188756,
|
||
|
|
"grad_norm": 0.05197747051715851,
|
||
|
|
"learning_rate": 0.00019999406739002108,
|
||
|
|
"loss": 1.5707,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003478474839777646,
|
||
|
|
"grad_norm": 0.05140923336148262,
|
||
|
|
"learning_rate": 0.00019999402905376582,
|
||
|
|
"loss": 1.5745,
|
||
|
|
"step": 311
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0034896596463364167,
|
||
|
|
"grad_norm": 0.05357779935002327,
|
||
|
|
"learning_rate": 0.00019999399059404923,
|
||
|
|
"loss": 1.5824,
|
||
|
|
"step": 312
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003500844452895187,
|
||
|
|
"grad_norm": 0.04196302220225334,
|
||
|
|
"learning_rate": 0.00019999395201087143,
|
||
|
|
"loss": 1.5685,
|
||
|
|
"step": 313
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0035120292594539577,
|
||
|
|
"grad_norm": 0.04698769748210907,
|
||
|
|
"learning_rate": 0.00019999391330423246,
|
||
|
|
"loss": 1.5645,
|
||
|
|
"step": 314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0035232140660127285,
|
||
|
|
"grad_norm": 0.055174414068460464,
|
||
|
|
"learning_rate": 0.00019999387447413236,
|
||
|
|
"loss": 1.5702,
|
||
|
|
"step": 315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003534398872571499,
|
||
|
|
"grad_norm": 0.05560048297047615,
|
||
|
|
"learning_rate": 0.00019999383552057114,
|
||
|
|
"loss": 1.5746,
|
||
|
|
"step": 316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0035455836791302695,
|
||
|
|
"grad_norm": 0.059730809181928635,
|
||
|
|
"learning_rate": 0.0001999937964435489,
|
||
|
|
"loss": 1.5698,
|
||
|
|
"step": 317
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00355676848568904,
|
||
|
|
"grad_norm": 0.06850636750459671,
|
||
|
|
"learning_rate": 0.00019999375724306568,
|
||
|
|
"loss": 1.5837,
|
||
|
|
"step": 318
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0035679532922478106,
|
||
|
|
"grad_norm": 0.0658111497759819,
|
||
|
|
"learning_rate": 0.00019999371791912148,
|
||
|
|
"loss": 1.5759,
|
||
|
|
"step": 319
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0035791380988065813,
|
||
|
|
"grad_norm": 0.05440279841423035,
|
||
|
|
"learning_rate": 0.00019999367847171643,
|
||
|
|
"loss": 1.5873,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0035903229053653517,
|
||
|
|
"grad_norm": 0.06169600412249565,
|
||
|
|
"learning_rate": 0.0001999936389008505,
|
||
|
|
"loss": 1.5791,
|
||
|
|
"step": 321
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0036015077119241224,
|
||
|
|
"grad_norm": 0.06897033751010895,
|
||
|
|
"learning_rate": 0.0001999935992065238,
|
||
|
|
"loss": 1.5815,
|
||
|
|
"step": 322
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0036126925184828927,
|
||
|
|
"grad_norm": 0.06641620397567749,
|
||
|
|
"learning_rate": 0.00019999355938873635,
|
||
|
|
"loss": 1.5826,
|
||
|
|
"step": 323
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0036238773250416635,
|
||
|
|
"grad_norm": 0.057002220302820206,
|
||
|
|
"learning_rate": 0.00019999351944748818,
|
||
|
|
"loss": 1.5716,
|
||
|
|
"step": 324
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0036350621316004338,
|
||
|
|
"grad_norm": 0.06431427597999573,
|
||
|
|
"learning_rate": 0.00019999347938277938,
|
||
|
|
"loss": 1.5626,
|
||
|
|
"step": 325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0036462469381592045,
|
||
|
|
"grad_norm": 0.06504250317811966,
|
||
|
|
"learning_rate": 0.00019999343919460997,
|
||
|
|
"loss": 1.574,
|
||
|
|
"step": 326
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0036574317447179753,
|
||
|
|
"grad_norm": 0.06940289586782455,
|
||
|
|
"learning_rate": 0.00019999339888298004,
|
||
|
|
"loss": 1.5676,
|
||
|
|
"step": 327
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0036686165512767456,
|
||
|
|
"grad_norm": 0.06492604315280914,
|
||
|
|
"learning_rate": 0.00019999335844788957,
|
||
|
|
"loss": 1.5676,
|
||
|
|
"step": 328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0036798013578355163,
|
||
|
|
"grad_norm": 0.07069146633148193,
|
||
|
|
"learning_rate": 0.0001999933178893387,
|
||
|
|
"loss": 1.5601,
|
||
|
|
"step": 329
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0036909861643942866,
|
||
|
|
"grad_norm": 0.07502440363168716,
|
||
|
|
"learning_rate": 0.00019999327720732736,
|
||
|
|
"loss": 1.5651,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0037021709709530574,
|
||
|
|
"grad_norm": 0.06407099217176437,
|
||
|
|
"learning_rate": 0.00019999323640185573,
|
||
|
|
"loss": 1.5818,
|
||
|
|
"step": 331
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003713355777511828,
|
||
|
|
"grad_norm": 0.05621904134750366,
|
||
|
|
"learning_rate": 0.00019999319547292377,
|
||
|
|
"loss": 1.5975,
|
||
|
|
"step": 332
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0037245405840705985,
|
||
|
|
"grad_norm": 0.053219810128211975,
|
||
|
|
"learning_rate": 0.00019999315442053157,
|
||
|
|
"loss": 1.5925,
|
||
|
|
"step": 333
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003735725390629369,
|
||
|
|
"grad_norm": 0.0538603812456131,
|
||
|
|
"learning_rate": 0.00019999311324467919,
|
||
|
|
"loss": 1.5784,
|
||
|
|
"step": 334
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0037469101971881395,
|
||
|
|
"grad_norm": 0.05239463597536087,
|
||
|
|
"learning_rate": 0.00019999307194536664,
|
||
|
|
"loss": 1.5782,
|
||
|
|
"step": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0037580950037469103,
|
||
|
|
"grad_norm": 0.053746242076158524,
|
||
|
|
"learning_rate": 0.00019999303052259398,
|
||
|
|
"loss": 1.5802,
|
||
|
|
"step": 336
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0037692798103056806,
|
||
|
|
"grad_norm": 0.04721551761031151,
|
||
|
|
"learning_rate": 0.0001999929889763613,
|
||
|
|
"loss": 1.5739,
|
||
|
|
"step": 337
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0037804646168644513,
|
||
|
|
"grad_norm": 0.04483070224523544,
|
||
|
|
"learning_rate": 0.00019999294730666862,
|
||
|
|
"loss": 1.5823,
|
||
|
|
"step": 338
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003791649423423222,
|
||
|
|
"grad_norm": 0.05224015936255455,
|
||
|
|
"learning_rate": 0.000199992905513516,
|
||
|
|
"loss": 1.5559,
|
||
|
|
"step": 339
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0038028342299819924,
|
||
|
|
"grad_norm": 0.05772995948791504,
|
||
|
|
"learning_rate": 0.0001999928635969035,
|
||
|
|
"loss": 1.5574,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003814019036540763,
|
||
|
|
"grad_norm": 0.059287503361701965,
|
||
|
|
"learning_rate": 0.00019999282155683116,
|
||
|
|
"loss": 1.5694,
|
||
|
|
"step": 341
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0038252038430995334,
|
||
|
|
"grad_norm": 0.050815433263778687,
|
||
|
|
"learning_rate": 0.00019999277939329902,
|
||
|
|
"loss": 1.5655,
|
||
|
|
"step": 342
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003836388649658304,
|
||
|
|
"grad_norm": 0.047384679317474365,
|
||
|
|
"learning_rate": 0.00019999273710630714,
|
||
|
|
"loss": 1.5704,
|
||
|
|
"step": 343
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003847573456217075,
|
||
|
|
"grad_norm": 0.04918666183948517,
|
||
|
|
"learning_rate": 0.00019999269469585555,
|
||
|
|
"loss": 1.5694,
|
||
|
|
"step": 344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0038587582627758452,
|
||
|
|
"grad_norm": 0.058182474225759506,
|
||
|
|
"learning_rate": 0.0001999926521619444,
|
||
|
|
"loss": 1.5971,
|
||
|
|
"step": 345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003869943069334616,
|
||
|
|
"grad_norm": 0.07194909453392029,
|
||
|
|
"learning_rate": 0.00019999260950457362,
|
||
|
|
"loss": 1.6033,
|
||
|
|
"step": 346
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0038811278758933863,
|
||
|
|
"grad_norm": 0.08051355183124542,
|
||
|
|
"learning_rate": 0.0001999925667237433,
|
||
|
|
"loss": 1.5969,
|
||
|
|
"step": 347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003892312682452157,
|
||
|
|
"grad_norm": 0.09640984982252121,
|
||
|
|
"learning_rate": 0.00019999252381945357,
|
||
|
|
"loss": 1.6414,
|
||
|
|
"step": 348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0039034974890109274,
|
||
|
|
"grad_norm": 0.11357203125953674,
|
||
|
|
"learning_rate": 0.00019999248079170437,
|
||
|
|
"loss": 1.6522,
|
||
|
|
"step": 349
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003914682295569698,
|
||
|
|
"grad_norm": 0.14623580873012543,
|
||
|
|
"learning_rate": 0.00019999243764049586,
|
||
|
|
"loss": 1.6482,
|
||
|
|
"step": 350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003925867102128468,
|
||
|
|
"grad_norm": 0.158810093998909,
|
||
|
|
"learning_rate": 0.00019999239436582796,
|
||
|
|
"loss": 1.5814,
|
||
|
|
"step": 351
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00393705190868724,
|
||
|
|
"grad_norm": 0.11811669170856476,
|
||
|
|
"learning_rate": 0.00019999235096770086,
|
||
|
|
"loss": 1.5782,
|
||
|
|
"step": 352
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00394823671524601,
|
||
|
|
"grad_norm": 0.09518411755561829,
|
||
|
|
"learning_rate": 0.0001999923074461145,
|
||
|
|
"loss": 1.5852,
|
||
|
|
"step": 353
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00395942152180478,
|
||
|
|
"grad_norm": 0.1165471076965332,
|
||
|
|
"learning_rate": 0.00019999226380106906,
|
||
|
|
"loss": 1.5766,
|
||
|
|
"step": 354
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0039706063283635505,
|
||
|
|
"grad_norm": 0.09517768025398254,
|
||
|
|
"learning_rate": 0.00019999222003256448,
|
||
|
|
"loss": 1.5829,
|
||
|
|
"step": 355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003981791134922322,
|
||
|
|
"grad_norm": 0.06006886065006256,
|
||
|
|
"learning_rate": 0.00019999217614060085,
|
||
|
|
"loss": 1.5708,
|
||
|
|
"step": 356
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.003992975941481092,
|
||
|
|
"grad_norm": 0.08933499455451965,
|
||
|
|
"learning_rate": 0.00019999213212517825,
|
||
|
|
"loss": 1.5718,
|
||
|
|
"step": 357
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004004160748039862,
|
||
|
|
"grad_norm": 0.0867113471031189,
|
||
|
|
"learning_rate": 0.0001999920879862967,
|
||
|
|
"loss": 1.5711,
|
||
|
|
"step": 358
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0040153455545986335,
|
||
|
|
"grad_norm": 0.06603030860424042,
|
||
|
|
"learning_rate": 0.00019999204372395628,
|
||
|
|
"loss": 1.5778,
|
||
|
|
"step": 359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004026530361157404,
|
||
|
|
"grad_norm": 0.07514684647321701,
|
||
|
|
"learning_rate": 0.00019999199933815702,
|
||
|
|
"loss": 1.5738,
|
||
|
|
"step": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004037715167716174,
|
||
|
|
"grad_norm": 0.060728590935468674,
|
||
|
|
"learning_rate": 0.00019999195482889897,
|
||
|
|
"loss": 1.5694,
|
||
|
|
"step": 361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004048899974274945,
|
||
|
|
"grad_norm": 0.06927715986967087,
|
||
|
|
"learning_rate": 0.00019999191019618224,
|
||
|
|
"loss": 1.5664,
|
||
|
|
"step": 362
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004060084780833716,
|
||
|
|
"grad_norm": 0.0576176755130291,
|
||
|
|
"learning_rate": 0.00019999186544000685,
|
||
|
|
"loss": 1.5704,
|
||
|
|
"step": 363
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004071269587392486,
|
||
|
|
"grad_norm": 0.047579534351825714,
|
||
|
|
"learning_rate": 0.00019999182056037285,
|
||
|
|
"loss": 1.5732,
|
||
|
|
"step": 364
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004082454393951256,
|
||
|
|
"grad_norm": 0.06114533543586731,
|
||
|
|
"learning_rate": 0.00019999177555728027,
|
||
|
|
"loss": 1.5776,
|
||
|
|
"step": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0040936392005100275,
|
||
|
|
"grad_norm": 0.05183887854218483,
|
||
|
|
"learning_rate": 0.0001999917304307292,
|
||
|
|
"loss": 1.5697,
|
||
|
|
"step": 366
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004104824007068798,
|
||
|
|
"grad_norm": 0.05595005676150322,
|
||
|
|
"learning_rate": 0.0001999916851807197,
|
||
|
|
"loss": 1.5611,
|
||
|
|
"step": 367
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004116008813627568,
|
||
|
|
"grad_norm": 0.04884869232773781,
|
||
|
|
"learning_rate": 0.00019999163980725183,
|
||
|
|
"loss": 1.5754,
|
||
|
|
"step": 368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004127193620186339,
|
||
|
|
"grad_norm": 0.050160013139247894,
|
||
|
|
"learning_rate": 0.00019999159431032562,
|
||
|
|
"loss": 1.5818,
|
||
|
|
"step": 369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00413837842674511,
|
||
|
|
"grad_norm": 0.0458194725215435,
|
||
|
|
"learning_rate": 0.00019999154868994111,
|
||
|
|
"loss": 1.5821,
|
||
|
|
"step": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00414956323330388,
|
||
|
|
"grad_norm": 0.04877634719014168,
|
||
|
|
"learning_rate": 0.00019999150294609845,
|
||
|
|
"loss": 1.5828,
|
||
|
|
"step": 371
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00416074803986265,
|
||
|
|
"grad_norm": 0.045320361852645874,
|
||
|
|
"learning_rate": 0.00019999145707879758,
|
||
|
|
"loss": 1.5946,
|
||
|
|
"step": 372
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004171932846421421,
|
||
|
|
"grad_norm": 0.05053291842341423,
|
||
|
|
"learning_rate": 0.00019999141108803864,
|
||
|
|
"loss": 1.6002,
|
||
|
|
"step": 373
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004183117652980192,
|
||
|
|
"grad_norm": 0.053211431950330734,
|
||
|
|
"learning_rate": 0.0001999913649738216,
|
||
|
|
"loss": 1.5914,
|
||
|
|
"step": 374
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004194302459538962,
|
||
|
|
"grad_norm": 0.045671332627534866,
|
||
|
|
"learning_rate": 0.00019999131873614664,
|
||
|
|
"loss": 1.5806,
|
||
|
|
"step": 375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004205487266097733,
|
||
|
|
"grad_norm": 0.051272232085466385,
|
||
|
|
"learning_rate": 0.0001999912723750137,
|
||
|
|
"loss": 1.5898,
|
||
|
|
"step": 376
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0042166720726565035,
|
||
|
|
"grad_norm": 0.05297670140862465,
|
||
|
|
"learning_rate": 0.0001999912258904229,
|
||
|
|
"loss": 1.6003,
|
||
|
|
"step": 377
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004227856879215274,
|
||
|
|
"grad_norm": 0.044414643198251724,
|
||
|
|
"learning_rate": 0.00019999117928237427,
|
||
|
|
"loss": 1.6069,
|
||
|
|
"step": 378
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004239041685774044,
|
||
|
|
"grad_norm": 0.04553841054439545,
|
||
|
|
"learning_rate": 0.0001999911325508679,
|
||
|
|
"loss": 1.5798,
|
||
|
|
"step": 379
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004250226492332815,
|
||
|
|
"grad_norm": 0.05364730581641197,
|
||
|
|
"learning_rate": 0.00019999108569590383,
|
||
|
|
"loss": 1.5856,
|
||
|
|
"step": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004261411298891586,
|
||
|
|
"grad_norm": 0.05173739790916443,
|
||
|
|
"learning_rate": 0.0001999910387174821,
|
||
|
|
"loss": 1.5764,
|
||
|
|
"step": 381
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004272596105450356,
|
||
|
|
"grad_norm": 0.05577515438199043,
|
||
|
|
"learning_rate": 0.00019999099161560282,
|
||
|
|
"loss": 1.5855,
|
||
|
|
"step": 382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004283780912009127,
|
||
|
|
"grad_norm": 0.057436104863882065,
|
||
|
|
"learning_rate": 0.00019999094439026598,
|
||
|
|
"loss": 1.5785,
|
||
|
|
"step": 383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0042949657185678974,
|
||
|
|
"grad_norm": 0.03927776962518692,
|
||
|
|
"learning_rate": 0.00019999089704147166,
|
||
|
|
"loss": 1.5735,
|
||
|
|
"step": 384
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004306150525126668,
|
||
|
|
"grad_norm": 0.04739474132657051,
|
||
|
|
"learning_rate": 0.00019999084956921997,
|
||
|
|
"loss": 1.5805,
|
||
|
|
"step": 385
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004317335331685439,
|
||
|
|
"grad_norm": 0.04832485690712929,
|
||
|
|
"learning_rate": 0.0001999908019735109,
|
||
|
|
"loss": 1.6164,
|
||
|
|
"step": 386
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004328520138244209,
|
||
|
|
"grad_norm": 0.049625612795352936,
|
||
|
|
"learning_rate": 0.00019999075425434452,
|
||
|
|
"loss": 1.6468,
|
||
|
|
"step": 387
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0043397049448029796,
|
||
|
|
"grad_norm": 0.04835371673107147,
|
||
|
|
"learning_rate": 0.00019999070641172094,
|
||
|
|
"loss": 1.6438,
|
||
|
|
"step": 388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00435088975136175,
|
||
|
|
"grad_norm": 0.05029625818133354,
|
||
|
|
"learning_rate": 0.00019999065844564018,
|
||
|
|
"loss": 1.6103,
|
||
|
|
"step": 389
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004362074557920521,
|
||
|
|
"grad_norm": 0.055567558854818344,
|
||
|
|
"learning_rate": 0.0001999906103561023,
|
||
|
|
"loss": 1.602,
|
||
|
|
"step": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004373259364479291,
|
||
|
|
"grad_norm": 0.06654093414545059,
|
||
|
|
"learning_rate": 0.00019999056214310733,
|
||
|
|
"loss": 1.5815,
|
||
|
|
"step": 391
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004384444171038062,
|
||
|
|
"grad_norm": 0.06477198004722595,
|
||
|
|
"learning_rate": 0.0001999905138066554,
|
||
|
|
"loss": 1.5883,
|
||
|
|
"step": 392
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004395628977596833,
|
||
|
|
"grad_norm": 0.0623844638466835,
|
||
|
|
"learning_rate": 0.00019999046534674656,
|
||
|
|
"loss": 1.5877,
|
||
|
|
"step": 393
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004406813784155603,
|
||
|
|
"grad_norm": 0.059734586626291275,
|
||
|
|
"learning_rate": 0.00019999041676338077,
|
||
|
|
"loss": 1.5855,
|
||
|
|
"step": 394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0044179985907143735,
|
||
|
|
"grad_norm": 0.05187408998608589,
|
||
|
|
"learning_rate": 0.0001999903680565582,
|
||
|
|
"loss": 1.5827,
|
||
|
|
"step": 395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004429183397273144,
|
||
|
|
"grad_norm": 0.05175703763961792,
|
||
|
|
"learning_rate": 0.00019999031922627886,
|
||
|
|
"loss": 1.5758,
|
||
|
|
"step": 396
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004440368203831915,
|
||
|
|
"grad_norm": 0.05059249326586723,
|
||
|
|
"learning_rate": 0.00019999027027254286,
|
||
|
|
"loss": 1.5758,
|
||
|
|
"step": 397
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004451553010390685,
|
||
|
|
"grad_norm": 0.050220511853694916,
|
||
|
|
"learning_rate": 0.0001999902211953502,
|
||
|
|
"loss": 1.5771,
|
||
|
|
"step": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004462737816949456,
|
||
|
|
"grad_norm": 0.057194001972675323,
|
||
|
|
"learning_rate": 0.00019999017199470094,
|
||
|
|
"loss": 1.5828,
|
||
|
|
"step": 399
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004473922623508227,
|
||
|
|
"grad_norm": 0.07026943564414978,
|
||
|
|
"learning_rate": 0.00019999012267059519,
|
||
|
|
"loss": 1.5728,
|
||
|
|
"step": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004485107430066997,
|
||
|
|
"grad_norm": 0.08094791322946548,
|
||
|
|
"learning_rate": 0.00019999007322303296,
|
||
|
|
"loss": 1.5634,
|
||
|
|
"step": 401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004496292236625767,
|
||
|
|
"grad_norm": 0.08370808511972427,
|
||
|
|
"learning_rate": 0.0001999900236520144,
|
||
|
|
"loss": 1.5747,
|
||
|
|
"step": 402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004507477043184538,
|
||
|
|
"grad_norm": 0.09409447014331818,
|
||
|
|
"learning_rate": 0.00019998997395753945,
|
||
|
|
"loss": 1.5703,
|
||
|
|
"step": 403
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004518661849743309,
|
||
|
|
"grad_norm": 0.09207552671432495,
|
||
|
|
"learning_rate": 0.0001999899241396082,
|
||
|
|
"loss": 1.5628,
|
||
|
|
"step": 404
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004529846656302079,
|
||
|
|
"grad_norm": 0.07077619433403015,
|
||
|
|
"learning_rate": 0.0001999898741982208,
|
||
|
|
"loss": 1.5826,
|
||
|
|
"step": 405
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0045410314628608495,
|
||
|
|
"grad_norm": 0.06743451207876205,
|
||
|
|
"learning_rate": 0.00019998982413337724,
|
||
|
|
"loss": 1.6047,
|
||
|
|
"step": 406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004552216269419621,
|
||
|
|
"grad_norm": 0.10414531826972961,
|
||
|
|
"learning_rate": 0.0001999897739450776,
|
||
|
|
"loss": 1.6005,
|
||
|
|
"step": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004563401075978391,
|
||
|
|
"grad_norm": 0.10947459191083908,
|
||
|
|
"learning_rate": 0.0001999897236333219,
|
||
|
|
"loss": 1.6025,
|
||
|
|
"step": 408
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004574585882537161,
|
||
|
|
"grad_norm": 0.06659277528524399,
|
||
|
|
"learning_rate": 0.00019998967319811027,
|
||
|
|
"loss": 1.6059,
|
||
|
|
"step": 409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004585770689095932,
|
||
|
|
"grad_norm": 0.06038579344749451,
|
||
|
|
"learning_rate": 0.00019998962263944274,
|
||
|
|
"loss": 1.5826,
|
||
|
|
"step": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004596955495654703,
|
||
|
|
"grad_norm": 0.08695773035287857,
|
||
|
|
"learning_rate": 0.00019998957195731934,
|
||
|
|
"loss": 1.5779,
|
||
|
|
"step": 411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004608140302213473,
|
||
|
|
"grad_norm": 0.07446157187223434,
|
||
|
|
"learning_rate": 0.0001999895211517402,
|
||
|
|
"loss": 1.5812,
|
||
|
|
"step": 412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0046193251087722435,
|
||
|
|
"grad_norm": 0.04878260940313339,
|
||
|
|
"learning_rate": 0.00019998947022270534,
|
||
|
|
"loss": 1.6033,
|
||
|
|
"step": 413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004630509915331015,
|
||
|
|
"grad_norm": 0.06283459812402725,
|
||
|
|
"learning_rate": 0.00019998941917021484,
|
||
|
|
"loss": 1.5879,
|
||
|
|
"step": 414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004641694721889785,
|
||
|
|
"grad_norm": 0.05891675129532814,
|
||
|
|
"learning_rate": 0.00019998936799426874,
|
||
|
|
"loss": 1.59,
|
||
|
|
"step": 415
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004652879528448555,
|
||
|
|
"grad_norm": 0.04745139181613922,
|
||
|
|
"learning_rate": 0.0001999893166948671,
|
||
|
|
"loss": 1.5981,
|
||
|
|
"step": 416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0046640643350073265,
|
||
|
|
"grad_norm": 0.05297010764479637,
|
||
|
|
"learning_rate": 0.00019998926527201003,
|
||
|
|
"loss": 1.5937,
|
||
|
|
"step": 417
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004675249141566097,
|
||
|
|
"grad_norm": 0.05115678906440735,
|
||
|
|
"learning_rate": 0.00019998921372569757,
|
||
|
|
"loss": 1.5941,
|
||
|
|
"step": 418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004686433948124867,
|
||
|
|
"grad_norm": 0.05678752437233925,
|
||
|
|
"learning_rate": 0.00019998916205592974,
|
||
|
|
"loss": 1.5631,
|
||
|
|
"step": 419
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004697618754683637,
|
||
|
|
"grad_norm": 0.05227034166455269,
|
||
|
|
"learning_rate": 0.00019998911026270668,
|
||
|
|
"loss": 1.5755,
|
||
|
|
"step": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004708803561242409,
|
||
|
|
"grad_norm": 0.05871938541531563,
|
||
|
|
"learning_rate": 0.0001999890583460284,
|
||
|
|
"loss": 1.5843,
|
||
|
|
"step": 421
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004719988367801179,
|
||
|
|
"grad_norm": 0.06751812249422073,
|
||
|
|
"learning_rate": 0.00019998900630589493,
|
||
|
|
"loss": 1.5922,
|
||
|
|
"step": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004731173174359949,
|
||
|
|
"grad_norm": 0.061179131269454956,
|
||
|
|
"learning_rate": 0.00019998895414230646,
|
||
|
|
"loss": 1.5939,
|
||
|
|
"step": 423
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00474235798091872,
|
||
|
|
"grad_norm": 0.06404510140419006,
|
||
|
|
"learning_rate": 0.00019998890185526292,
|
||
|
|
"loss": 1.5857,
|
||
|
|
"step": 424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004753542787477491,
|
||
|
|
"grad_norm": 0.07117751985788345,
|
||
|
|
"learning_rate": 0.0001999888494447645,
|
||
|
|
"loss": 1.5676,
|
||
|
|
"step": 425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004764727594036261,
|
||
|
|
"grad_norm": 0.06996233761310577,
|
||
|
|
"learning_rate": 0.00019998879691081114,
|
||
|
|
"loss": 1.5769,
|
||
|
|
"step": 426
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004775912400595031,
|
||
|
|
"grad_norm": 0.0711674690246582,
|
||
|
|
"learning_rate": 0.00019998874425340298,
|
||
|
|
"loss": 1.5622,
|
||
|
|
"step": 427
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0047870972071538025,
|
||
|
|
"grad_norm": 0.079228475689888,
|
||
|
|
"learning_rate": 0.0001999886914725401,
|
||
|
|
"loss": 1.571,
|
||
|
|
"step": 428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004798282013712573,
|
||
|
|
"grad_norm": 0.08016793429851532,
|
||
|
|
"learning_rate": 0.00019998863856822248,
|
||
|
|
"loss": 1.6029,
|
||
|
|
"step": 429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004809466820271343,
|
||
|
|
"grad_norm": 0.0795593336224556,
|
||
|
|
"learning_rate": 0.00019998858554045026,
|
||
|
|
"loss": 1.6062,
|
||
|
|
"step": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004820651626830114,
|
||
|
|
"grad_norm": 0.06341350823640823,
|
||
|
|
"learning_rate": 0.00019998853238922348,
|
||
|
|
"loss": 1.5884,
|
||
|
|
"step": 431
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004831836433388885,
|
||
|
|
"grad_norm": 0.05454142764210701,
|
||
|
|
"learning_rate": 0.00019998847911454219,
|
||
|
|
"loss": 1.5921,
|
||
|
|
"step": 432
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004843021239947655,
|
||
|
|
"grad_norm": 0.0625983327627182,
|
||
|
|
"learning_rate": 0.0001999884257164065,
|
||
|
|
"loss": 1.582,
|
||
|
|
"step": 433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004854206046506425,
|
||
|
|
"grad_norm": 0.06116988882422447,
|
||
|
|
"learning_rate": 0.00019998837219481645,
|
||
|
|
"loss": 1.5804,
|
||
|
|
"step": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004865390853065196,
|
||
|
|
"grad_norm": 0.057872429490089417,
|
||
|
|
"learning_rate": 0.0001999883185497721,
|
||
|
|
"loss": 1.5721,
|
||
|
|
"step": 435
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004876575659623967,
|
||
|
|
"grad_norm": 0.04977230727672577,
|
||
|
|
"learning_rate": 0.00019998826478127352,
|
||
|
|
"loss": 1.5539,
|
||
|
|
"step": 436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004887760466182737,
|
||
|
|
"grad_norm": 0.06137122958898544,
|
||
|
|
"learning_rate": 0.00019998821088932077,
|
||
|
|
"loss": 1.5715,
|
||
|
|
"step": 437
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004898945272741508,
|
||
|
|
"grad_norm": 0.05595165491104126,
|
||
|
|
"learning_rate": 0.00019998815687391396,
|
||
|
|
"loss": 1.5841,
|
||
|
|
"step": 438
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0049101300793002785,
|
||
|
|
"grad_norm": 0.049001295119524,
|
||
|
|
"learning_rate": 0.00019998810273505311,
|
||
|
|
"loss": 1.571,
|
||
|
|
"step": 439
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004921314885859049,
|
||
|
|
"grad_norm": 0.04792420566082001,
|
||
|
|
"learning_rate": 0.00019998804847273828,
|
||
|
|
"loss": 1.5695,
|
||
|
|
"step": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00493249969241782,
|
||
|
|
"grad_norm": 0.052222106605768204,
|
||
|
|
"learning_rate": 0.00019998799408696956,
|
||
|
|
"loss": 1.5671,
|
||
|
|
"step": 441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00494368449897659,
|
||
|
|
"grad_norm": 0.05545085668563843,
|
||
|
|
"learning_rate": 0.00019998793957774703,
|
||
|
|
"loss": 1.5679,
|
||
|
|
"step": 442
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004954869305535361,
|
||
|
|
"grad_norm": 0.06147260218858719,
|
||
|
|
"learning_rate": 0.00019998788494507075,
|
||
|
|
"loss": 1.5746,
|
||
|
|
"step": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004966054112094131,
|
||
|
|
"grad_norm": 0.06299655884504318,
|
||
|
|
"learning_rate": 0.00019998783018894073,
|
||
|
|
"loss": 1.5719,
|
||
|
|
"step": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004977238918652902,
|
||
|
|
"grad_norm": 0.05477641522884369,
|
||
|
|
"learning_rate": 0.00019998777530935713,
|
||
|
|
"loss": 1.5933,
|
||
|
|
"step": 445
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0049884237252116725,
|
||
|
|
"grad_norm": 0.054924603551626205,
|
||
|
|
"learning_rate": 0.00019998772030631993,
|
||
|
|
"loss": 1.6381,
|
||
|
|
"step": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.004999608531770443,
|
||
|
|
"grad_norm": 0.05982334539294243,
|
||
|
|
"learning_rate": 0.0001999876651798293,
|
||
|
|
"loss": 1.6049,
|
||
|
|
"step": 447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005010793338329214,
|
||
|
|
"grad_norm": 0.07177302241325378,
|
||
|
|
"learning_rate": 0.0001999876099298852,
|
||
|
|
"loss": 1.5885,
|
||
|
|
"step": 448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005021978144887984,
|
||
|
|
"grad_norm": 0.06020566448569298,
|
||
|
|
"learning_rate": 0.00019998755455648778,
|
||
|
|
"loss": 1.5918,
|
||
|
|
"step": 449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005033162951446755,
|
||
|
|
"grad_norm": 0.0725252702832222,
|
||
|
|
"learning_rate": 0.00019998749905963706,
|
||
|
|
"loss": 1.5948,
|
||
|
|
"step": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005044347758005525,
|
||
|
|
"grad_norm": 0.07799220085144043,
|
||
|
|
"learning_rate": 0.00019998744343933313,
|
||
|
|
"loss": 1.5903,
|
||
|
|
"step": 451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005055532564564296,
|
||
|
|
"grad_norm": 0.06732252240180969,
|
||
|
|
"learning_rate": 0.00019998738769557605,
|
||
|
|
"loss": 1.6123,
|
||
|
|
"step": 452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005066717371123066,
|
||
|
|
"grad_norm": 0.056653380393981934,
|
||
|
|
"learning_rate": 0.0001999873318283659,
|
||
|
|
"loss": 1.5976,
|
||
|
|
"step": 453
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005077902177681837,
|
||
|
|
"grad_norm": 0.06148442253470421,
|
||
|
|
"learning_rate": 0.00019998727583770274,
|
||
|
|
"loss": 1.5826,
|
||
|
|
"step": 454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005089086984240608,
|
||
|
|
"grad_norm": 0.0657142624258995,
|
||
|
|
"learning_rate": 0.00019998721972358662,
|
||
|
|
"loss": 1.5504,
|
||
|
|
"step": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005100271790799378,
|
||
|
|
"grad_norm": 0.06259225308895111,
|
||
|
|
"learning_rate": 0.00019998716348601766,
|
||
|
|
"loss": 1.552,
|
||
|
|
"step": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0051114565973581485,
|
||
|
|
"grad_norm": 0.08781653642654419,
|
||
|
|
"learning_rate": 0.00019998710712499585,
|
||
|
|
"loss": 1.5523,
|
||
|
|
"step": 457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005122641403916919,
|
||
|
|
"grad_norm": 0.0888068750500679,
|
||
|
|
"learning_rate": 0.00019998705064052137,
|
||
|
|
"loss": 1.5581,
|
||
|
|
"step": 458
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00513382621047569,
|
||
|
|
"grad_norm": 0.11727220565080643,
|
||
|
|
"learning_rate": 0.0001999869940325942,
|
||
|
|
"loss": 1.5714,
|
||
|
|
"step": 459
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00514501101703446,
|
||
|
|
"grad_norm": 0.10440998524427414,
|
||
|
|
"learning_rate": 0.00019998693730121443,
|
||
|
|
"loss": 1.5567,
|
||
|
|
"step": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005156195823593231,
|
||
|
|
"grad_norm": 0.0791282206773758,
|
||
|
|
"learning_rate": 0.00019998688044638215,
|
||
|
|
"loss": 1.5605,
|
||
|
|
"step": 461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005167380630152002,
|
||
|
|
"grad_norm": 0.07670507580041885,
|
||
|
|
"learning_rate": 0.0001999868234680974,
|
||
|
|
"loss": 1.5676,
|
||
|
|
"step": 462
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005178565436710772,
|
||
|
|
"grad_norm": 0.09401030838489532,
|
||
|
|
"learning_rate": 0.0001999867663663603,
|
||
|
|
"loss": 1.555,
|
||
|
|
"step": 463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0051897502432695424,
|
||
|
|
"grad_norm": 0.08279041945934296,
|
||
|
|
"learning_rate": 0.00019998670914117087,
|
||
|
|
"loss": 1.5436,
|
||
|
|
"step": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005200935049828314,
|
||
|
|
"grad_norm": 0.17070412635803223,
|
||
|
|
"learning_rate": 0.0001999866517925292,
|
||
|
|
"loss": 1.5575,
|
||
|
|
"step": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005212119856387084,
|
||
|
|
"grad_norm": 0.09067776054143906,
|
||
|
|
"learning_rate": 0.00019998659432043537,
|
||
|
|
"loss": 1.5665,
|
||
|
|
"step": 466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005223304662945854,
|
||
|
|
"grad_norm": 0.08899036049842834,
|
||
|
|
"learning_rate": 0.00019998653672488942,
|
||
|
|
"loss": 1.5678,
|
||
|
|
"step": 467
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005234489469504625,
|
||
|
|
"grad_norm": 0.06300584226846695,
|
||
|
|
"learning_rate": 0.00019998647900589144,
|
||
|
|
"loss": 1.5826,
|
||
|
|
"step": 468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005245674276063396,
|
||
|
|
"grad_norm": 0.07066696137189865,
|
||
|
|
"learning_rate": 0.00019998642116344156,
|
||
|
|
"loss": 1.5696,
|
||
|
|
"step": 469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005256859082622166,
|
||
|
|
"grad_norm": 0.0663958340883255,
|
||
|
|
"learning_rate": 0.00019998636319753973,
|
||
|
|
"loss": 1.5878,
|
||
|
|
"step": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005268043889180936,
|
||
|
|
"grad_norm": 0.06905809789896011,
|
||
|
|
"learning_rate": 0.00019998630510818612,
|
||
|
|
"loss": 1.5667,
|
||
|
|
"step": 471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0052792286957397076,
|
||
|
|
"grad_norm": 0.05643589049577713,
|
||
|
|
"learning_rate": 0.00019998624689538077,
|
||
|
|
"loss": 1.5742,
|
||
|
|
"step": 472
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005290413502298478,
|
||
|
|
"grad_norm": 0.05323821306228638,
|
||
|
|
"learning_rate": 0.00019998618855912375,
|
||
|
|
"loss": 1.5965,
|
||
|
|
"step": 473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005301598308857248,
|
||
|
|
"grad_norm": 0.07279177010059357,
|
||
|
|
"learning_rate": 0.0001999861300994151,
|
||
|
|
"loss": 1.5954,
|
||
|
|
"step": 474
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0053127831154160185,
|
||
|
|
"grad_norm": 0.06261339038610458,
|
||
|
|
"learning_rate": 0.00019998607151625497,
|
||
|
|
"loss": 1.5771,
|
||
|
|
"step": 475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00532396792197479,
|
||
|
|
"grad_norm": 0.0605684369802475,
|
||
|
|
"learning_rate": 0.00019998601280964335,
|
||
|
|
"loss": 1.5674,
|
||
|
|
"step": 476
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00533515272853356,
|
||
|
|
"grad_norm": 0.05855708196759224,
|
||
|
|
"learning_rate": 0.0001999859539795804,
|
||
|
|
"loss": 1.5641,
|
||
|
|
"step": 477
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00534633753509233,
|
||
|
|
"grad_norm": 0.04459947720170021,
|
||
|
|
"learning_rate": 0.0001999858950260661,
|
||
|
|
"loss": 1.5706,
|
||
|
|
"step": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0053575223416511015,
|
||
|
|
"grad_norm": 0.05174221470952034,
|
||
|
|
"learning_rate": 0.00019998583594910057,
|
||
|
|
"loss": 1.5525,
|
||
|
|
"step": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005368707148209872,
|
||
|
|
"grad_norm": 0.047726552933454514,
|
||
|
|
"learning_rate": 0.0001999857767486839,
|
||
|
|
"loss": 1.5613,
|
||
|
|
"step": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005379891954768642,
|
||
|
|
"grad_norm": 0.056866295635700226,
|
||
|
|
"learning_rate": 0.0001999857174248161,
|
||
|
|
"loss": 1.5851,
|
||
|
|
"step": 481
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005391076761327412,
|
||
|
|
"grad_norm": 0.05624596029520035,
|
||
|
|
"learning_rate": 0.00019998565797749732,
|
||
|
|
"loss": 1.5862,
|
||
|
|
"step": 482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005402261567886184,
|
||
|
|
"grad_norm": 0.057655058801174164,
|
||
|
|
"learning_rate": 0.0001999855984067276,
|
||
|
|
"loss": 1.5848,
|
||
|
|
"step": 483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005413446374444954,
|
||
|
|
"grad_norm": 0.06511086970567703,
|
||
|
|
"learning_rate": 0.000199985538712507,
|
||
|
|
"loss": 1.6081,
|
||
|
|
"step": 484
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005424631181003724,
|
||
|
|
"grad_norm": 0.0913616269826889,
|
||
|
|
"learning_rate": 0.0001999854788948356,
|
||
|
|
"loss": 1.5973,
|
||
|
|
"step": 485
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005435815987562495,
|
||
|
|
"grad_norm": 0.11560064554214478,
|
||
|
|
"learning_rate": 0.00019998541895371345,
|
||
|
|
"loss": 1.5767,
|
||
|
|
"step": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005447000794121266,
|
||
|
|
"grad_norm": 0.10205356776714325,
|
||
|
|
"learning_rate": 0.00019998535888914073,
|
||
|
|
"loss": 1.5641,
|
||
|
|
"step": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005458185600680036,
|
||
|
|
"grad_norm": 0.06806248426437378,
|
||
|
|
"learning_rate": 0.00019998529870111735,
|
||
|
|
"loss": 1.5586,
|
||
|
|
"step": 488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005469370407238807,
|
||
|
|
"grad_norm": 0.06459362804889679,
|
||
|
|
"learning_rate": 0.00019998523838964355,
|
||
|
|
"loss": 1.5532,
|
||
|
|
"step": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0054805552137975775,
|
||
|
|
"grad_norm": 0.07120149582624435,
|
||
|
|
"learning_rate": 0.00019998517795471928,
|
||
|
|
"loss": 1.5515,
|
||
|
|
"step": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005491740020356348,
|
||
|
|
"grad_norm": 0.059738751500844955,
|
||
|
|
"learning_rate": 0.00019998511739634464,
|
||
|
|
"loss": 1.5567,
|
||
|
|
"step": 491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005502924826915118,
|
||
|
|
"grad_norm": 0.06517786532640457,
|
||
|
|
"learning_rate": 0.00019998505671451976,
|
||
|
|
"loss": 1.595,
|
||
|
|
"step": 492
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005514109633473889,
|
||
|
|
"grad_norm": 0.06841952353715897,
|
||
|
|
"learning_rate": 0.0001999849959092447,
|
||
|
|
"loss": 1.5805,
|
||
|
|
"step": 493
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00552529444003266,
|
||
|
|
"grad_norm": 0.06286690384149551,
|
||
|
|
"learning_rate": 0.0001999849349805195,
|
||
|
|
"loss": 1.571,
|
||
|
|
"step": 494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00553647924659143,
|
||
|
|
"grad_norm": 0.0559217631816864,
|
||
|
|
"learning_rate": 0.00019998487392834422,
|
||
|
|
"loss": 1.5569,
|
||
|
|
"step": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005547664053150201,
|
||
|
|
"grad_norm": 0.0687541738152504,
|
||
|
|
"learning_rate": 0.000199984812752719,
|
||
|
|
"loss": 1.557,
|
||
|
|
"step": 496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0055588488597089715,
|
||
|
|
"grad_norm": 0.06690400838851929,
|
||
|
|
"learning_rate": 0.00019998475145364383,
|
||
|
|
"loss": 1.567,
|
||
|
|
"step": 497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005570033666267742,
|
||
|
|
"grad_norm": 0.06469254940748215,
|
||
|
|
"learning_rate": 0.00019998469003111892,
|
||
|
|
"loss": 1.5783,
|
||
|
|
"step": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005581218472826512,
|
||
|
|
"grad_norm": 0.06279771029949188,
|
||
|
|
"learning_rate": 0.0001999846284851442,
|
||
|
|
"loss": 1.588,
|
||
|
|
"step": 499
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005592403279385283,
|
||
|
|
"grad_norm": 0.05500609427690506,
|
||
|
|
"learning_rate": 0.00019998456681571982,
|
||
|
|
"loss": 1.5775,
|
||
|
|
"step": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005603588085944054,
|
||
|
|
"grad_norm": 0.05660603567957878,
|
||
|
|
"learning_rate": 0.00019998450502284584,
|
||
|
|
"loss": 1.5847,
|
||
|
|
"step": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005614772892502824,
|
||
|
|
"grad_norm": 0.06350179761648178,
|
||
|
|
"learning_rate": 0.00019998444310652237,
|
||
|
|
"loss": 1.5859,
|
||
|
|
"step": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005625957699061595,
|
||
|
|
"grad_norm": 0.06947285681962967,
|
||
|
|
"learning_rate": 0.00019998438106674945,
|
||
|
|
"loss": 1.6142,
|
||
|
|
"step": 503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005637142505620365,
|
||
|
|
"grad_norm": 0.08556090295314789,
|
||
|
|
"learning_rate": 0.00019998431890352712,
|
||
|
|
"loss": 1.5949,
|
||
|
|
"step": 504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005648327312179136,
|
||
|
|
"grad_norm": 0.10163229703903198,
|
||
|
|
"learning_rate": 0.00019998425661685553,
|
||
|
|
"loss": 1.5819,
|
||
|
|
"step": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005659512118737906,
|
||
|
|
"grad_norm": 0.11865612119436264,
|
||
|
|
"learning_rate": 0.00019998419420673476,
|
||
|
|
"loss": 1.5799,
|
||
|
|
"step": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005670696925296677,
|
||
|
|
"grad_norm": 0.12710048258304596,
|
||
|
|
"learning_rate": 0.0001999841316731648,
|
||
|
|
"loss": 1.5837,
|
||
|
|
"step": 507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0056818817318554475,
|
||
|
|
"grad_norm": 0.10893180966377258,
|
||
|
|
"learning_rate": 0.00019998406901614583,
|
||
|
|
"loss": 1.5547,
|
||
|
|
"step": 508
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005693066538414218,
|
||
|
|
"grad_norm": 0.06662983447313309,
|
||
|
|
"learning_rate": 0.00019998400623567788,
|
||
|
|
"loss": 1.5412,
|
||
|
|
"step": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005704251344972989,
|
||
|
|
"grad_norm": 0.06717602163553238,
|
||
|
|
"learning_rate": 0.000199983943331761,
|
||
|
|
"loss": 1.5471,
|
||
|
|
"step": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005715436151531759,
|
||
|
|
"grad_norm": 0.08228597790002823,
|
||
|
|
"learning_rate": 0.0001999838803043953,
|
||
|
|
"loss": 1.5446,
|
||
|
|
"step": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00572662095809053,
|
||
|
|
"grad_norm": 0.07614196836948395,
|
||
|
|
"learning_rate": 0.00019998381715358084,
|
||
|
|
"loss": 1.5574,
|
||
|
|
"step": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005737805764649301,
|
||
|
|
"grad_norm": 0.06075645610690117,
|
||
|
|
"learning_rate": 0.00019998375387931774,
|
||
|
|
"loss": 1.5597,
|
||
|
|
"step": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005748990571208071,
|
||
|
|
"grad_norm": 0.05882800742983818,
|
||
|
|
"learning_rate": 0.00019998369048160604,
|
||
|
|
"loss": 1.5559,
|
||
|
|
"step": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0057601753777668414,
|
||
|
|
"grad_norm": 0.07097506523132324,
|
||
|
|
"learning_rate": 0.0001999836269604458,
|
||
|
|
"loss": 1.5648,
|
||
|
|
"step": 515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005771360184325612,
|
||
|
|
"grad_norm": 0.06486310064792633,
|
||
|
|
"learning_rate": 0.00019998356331583716,
|
||
|
|
"loss": 1.5735,
|
||
|
|
"step": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005782544990884383,
|
||
|
|
"grad_norm": 0.05333361402153969,
|
||
|
|
"learning_rate": 0.00019998349954778016,
|
||
|
|
"loss": 1.567,
|
||
|
|
"step": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005793729797443153,
|
||
|
|
"grad_norm": 0.07817003130912781,
|
||
|
|
"learning_rate": 0.00019998343565627488,
|
||
|
|
"loss": 1.5902,
|
||
|
|
"step": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0058049146040019236,
|
||
|
|
"grad_norm": 0.07619974762201309,
|
||
|
|
"learning_rate": 0.00019998337164132138,
|
||
|
|
"loss": 1.5819,
|
||
|
|
"step": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005816099410560695,
|
||
|
|
"grad_norm": 0.06044092774391174,
|
||
|
|
"learning_rate": 0.0001999833075029198,
|
||
|
|
"loss": 1.6163,
|
||
|
|
"step": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005827284217119465,
|
||
|
|
"grad_norm": 0.06666608154773712,
|
||
|
|
"learning_rate": 0.00019998324324107015,
|
||
|
|
"loss": 1.5977,
|
||
|
|
"step": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005838469023678235,
|
||
|
|
"grad_norm": 0.06902644038200378,
|
||
|
|
"learning_rate": 0.00019998317885577254,
|
||
|
|
"loss": 1.5818,
|
||
|
|
"step": 522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005849653830237006,
|
||
|
|
"grad_norm": 0.05606195330619812,
|
||
|
|
"learning_rate": 0.00019998311434702703,
|
||
|
|
"loss": 1.5784,
|
||
|
|
"step": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005860838636795777,
|
||
|
|
"grad_norm": 0.08465290814638138,
|
||
|
|
"learning_rate": 0.00019998304971483374,
|
||
|
|
"loss": 1.5653,
|
||
|
|
"step": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005872023443354547,
|
||
|
|
"grad_norm": 0.08544803410768509,
|
||
|
|
"learning_rate": 0.00019998298495919274,
|
||
|
|
"loss": 1.5616,
|
||
|
|
"step": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0058832082499133175,
|
||
|
|
"grad_norm": 0.06527858972549438,
|
||
|
|
"learning_rate": 0.0001999829200801041,
|
||
|
|
"loss": 1.5879,
|
||
|
|
"step": 526
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005894393056472089,
|
||
|
|
"grad_norm": 0.07150562107563019,
|
||
|
|
"learning_rate": 0.00019998285507756789,
|
||
|
|
"loss": 1.5642,
|
||
|
|
"step": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005905577863030859,
|
||
|
|
"grad_norm": 0.08152669668197632,
|
||
|
|
"learning_rate": 0.00019998278995158418,
|
||
|
|
"loss": 1.5744,
|
||
|
|
"step": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005916762669589629,
|
||
|
|
"grad_norm": 0.09149385243654251,
|
||
|
|
"learning_rate": 0.0001999827247021531,
|
||
|
|
"loss": 1.5715,
|
||
|
|
"step": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0059279474761484,
|
||
|
|
"grad_norm": 0.08523254096508026,
|
||
|
|
"learning_rate": 0.00019998265932927466,
|
||
|
|
"loss": 1.5822,
|
||
|
|
"step": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005939132282707171,
|
||
|
|
"grad_norm": 0.062498655170202255,
|
||
|
|
"learning_rate": 0.000199982593832949,
|
||
|
|
"loss": 1.5862,
|
||
|
|
"step": 531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005950317089265941,
|
||
|
|
"grad_norm": 0.07431355118751526,
|
||
|
|
"learning_rate": 0.0001999825282131762,
|
||
|
|
"loss": 1.574,
|
||
|
|
"step": 532
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005961501895824711,
|
||
|
|
"grad_norm": 0.0720391795039177,
|
||
|
|
"learning_rate": 0.00019998246246995632,
|
||
|
|
"loss": 1.57,
|
||
|
|
"step": 533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005972686702383483,
|
||
|
|
"grad_norm": 0.06915175914764404,
|
||
|
|
"learning_rate": 0.00019998239660328943,
|
||
|
|
"loss": 1.5721,
|
||
|
|
"step": 534
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005983871508942253,
|
||
|
|
"grad_norm": 0.058932509273290634,
|
||
|
|
"learning_rate": 0.00019998233061317561,
|
||
|
|
"loss": 1.583,
|
||
|
|
"step": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005995056315501023,
|
||
|
|
"grad_norm": 0.05271697789430618,
|
||
|
|
"learning_rate": 0.000199982264499615,
|
||
|
|
"loss": 1.5762,
|
||
|
|
"step": 536
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006006241122059794,
|
||
|
|
"grad_norm": 0.05659927427768707,
|
||
|
|
"learning_rate": 0.0001999821982626076,
|
||
|
|
"loss": 1.5718,
|
||
|
|
"step": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006017425928618565,
|
||
|
|
"grad_norm": 0.05749264359474182,
|
||
|
|
"learning_rate": 0.00019998213190215353,
|
||
|
|
"loss": 1.5897,
|
||
|
|
"step": 538
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006028610735177335,
|
||
|
|
"grad_norm": 0.06748930364847183,
|
||
|
|
"learning_rate": 0.0001999820654182529,
|
||
|
|
"loss": 1.5844,
|
||
|
|
"step": 539
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006039795541736105,
|
||
|
|
"grad_norm": 0.06751269847154617,
|
||
|
|
"learning_rate": 0.0001999819988109057,
|
||
|
|
"loss": 1.5926,
|
||
|
|
"step": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0060509803482948765,
|
||
|
|
"grad_norm": 0.06434184312820435,
|
||
|
|
"learning_rate": 0.00019998193208011213,
|
||
|
|
"loss": 1.5742,
|
||
|
|
"step": 541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006062165154853647,
|
||
|
|
"grad_norm": 0.06833555549383163,
|
||
|
|
"learning_rate": 0.0001999818652258722,
|
||
|
|
"loss": 1.5818,
|
||
|
|
"step": 542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006073349961412417,
|
||
|
|
"grad_norm": 0.06202944368124008,
|
||
|
|
"learning_rate": 0.000199981798248186,
|
||
|
|
"loss": 1.5806,
|
||
|
|
"step": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006084534767971188,
|
||
|
|
"grad_norm": 0.06603850424289703,
|
||
|
|
"learning_rate": 0.00019998173114705366,
|
||
|
|
"loss": 1.5892,
|
||
|
|
"step": 544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006095719574529959,
|
||
|
|
"grad_norm": 0.04424119368195534,
|
||
|
|
"learning_rate": 0.00019998166392247522,
|
||
|
|
"loss": 1.6017,
|
||
|
|
"step": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006106904381088729,
|
||
|
|
"grad_norm": 0.04886182025074959,
|
||
|
|
"learning_rate": 0.00019998159657445074,
|
||
|
|
"loss": 1.5971,
|
||
|
|
"step": 546
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006118089187647499,
|
||
|
|
"grad_norm": 0.055009353905916214,
|
||
|
|
"learning_rate": 0.00019998152910298035,
|
||
|
|
"loss": 1.5942,
|
||
|
|
"step": 547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0061292739942062704,
|
||
|
|
"grad_norm": 0.04949553683400154,
|
||
|
|
"learning_rate": 0.00019998146150806411,
|
||
|
|
"loss": 1.5851,
|
||
|
|
"step": 548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006140458800765041,
|
||
|
|
"grad_norm": 0.05669408291578293,
|
||
|
|
"learning_rate": 0.0001999813937897021,
|
||
|
|
"loss": 1.5565,
|
||
|
|
"step": 549
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006151643607323811,
|
||
|
|
"grad_norm": 0.052044518291950226,
|
||
|
|
"learning_rate": 0.00019998132594789444,
|
||
|
|
"loss": 1.5652,
|
||
|
|
"step": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006162828413882582,
|
||
|
|
"grad_norm": 0.055031049996614456,
|
||
|
|
"learning_rate": 0.00019998125798264117,
|
||
|
|
"loss": 1.5509,
|
||
|
|
"step": 551
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006174013220441353,
|
||
|
|
"grad_norm": 0.0653780847787857,
|
||
|
|
"learning_rate": 0.0001999811898939424,
|
||
|
|
"loss": 1.5672,
|
||
|
|
"step": 552
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006185198027000123,
|
||
|
|
"grad_norm": 0.06647983938455582,
|
||
|
|
"learning_rate": 0.00019998112168179822,
|
||
|
|
"loss": 1.5744,
|
||
|
|
"step": 553
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006196382833558893,
|
||
|
|
"grad_norm": 0.062013398855924606,
|
||
|
|
"learning_rate": 0.00019998105334620867,
|
||
|
|
"loss": 1.5545,
|
||
|
|
"step": 554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006207567640117664,
|
||
|
|
"grad_norm": 0.04633625969290733,
|
||
|
|
"learning_rate": 0.00019998098488717384,
|
||
|
|
"loss": 1.5643,
|
||
|
|
"step": 555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006218752446676435,
|
||
|
|
"grad_norm": 0.04178265854716301,
|
||
|
|
"learning_rate": 0.00019998091630469387,
|
||
|
|
"loss": 1.5484,
|
||
|
|
"step": 556
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006229937253235205,
|
||
|
|
"grad_norm": 0.04647913947701454,
|
||
|
|
"learning_rate": 0.00019998084759876883,
|
||
|
|
"loss": 1.554,
|
||
|
|
"step": 557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006241122059793976,
|
||
|
|
"grad_norm": 0.05254526063799858,
|
||
|
|
"learning_rate": 0.00019998077876939876,
|
||
|
|
"loss": 1.5671,
|
||
|
|
"step": 558
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0062523068663527465,
|
||
|
|
"grad_norm": 0.054666776210069656,
|
||
|
|
"learning_rate": 0.00019998070981658376,
|
||
|
|
"loss": 1.5709,
|
||
|
|
"step": 559
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006263491672911517,
|
||
|
|
"grad_norm": 0.04982587322592735,
|
||
|
|
"learning_rate": 0.00019998064074032396,
|
||
|
|
"loss": 1.5645,
|
||
|
|
"step": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006274676479470288,
|
||
|
|
"grad_norm": 0.05644576624035835,
|
||
|
|
"learning_rate": 0.00019998057154061938,
|
||
|
|
"loss": 1.5512,
|
||
|
|
"step": 561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006285861286029058,
|
||
|
|
"grad_norm": 0.05311651527881622,
|
||
|
|
"learning_rate": 0.00019998050221747016,
|
||
|
|
"loss": 1.5399,
|
||
|
|
"step": 562
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006297046092587829,
|
||
|
|
"grad_norm": 0.05626964941620827,
|
||
|
|
"learning_rate": 0.00019998043277087634,
|
||
|
|
"loss": 1.5389,
|
||
|
|
"step": 563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006308230899146599,
|
||
|
|
"grad_norm": 0.06463497877120972,
|
||
|
|
"learning_rate": 0.00019998036320083808,
|
||
|
|
"loss": 1.549,
|
||
|
|
"step": 564
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00631941570570537,
|
||
|
|
"grad_norm": 0.07779069244861603,
|
||
|
|
"learning_rate": 0.00019998029350735538,
|
||
|
|
"loss": 1.5591,
|
||
|
|
"step": 565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00633060051226414,
|
||
|
|
"grad_norm": 0.10419348627328873,
|
||
|
|
"learning_rate": 0.00019998022369042837,
|
||
|
|
"loss": 1.5669,
|
||
|
|
"step": 566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006341785318822911,
|
||
|
|
"grad_norm": 0.11543929576873779,
|
||
|
|
"learning_rate": 0.00019998015375005709,
|
||
|
|
"loss": 1.5669,
|
||
|
|
"step": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006352970125381682,
|
||
|
|
"grad_norm": 0.08400971442461014,
|
||
|
|
"learning_rate": 0.0001999800836862417,
|
||
|
|
"loss": 1.589,
|
||
|
|
"step": 568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006364154931940452,
|
||
|
|
"grad_norm": 0.056216295808553696,
|
||
|
|
"learning_rate": 0.00019998001349898225,
|
||
|
|
"loss": 1.5781,
|
||
|
|
"step": 569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0063753397384992225,
|
||
|
|
"grad_norm": 0.07171747833490372,
|
||
|
|
"learning_rate": 0.0001999799431882788,
|
||
|
|
"loss": 1.5722,
|
||
|
|
"step": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006386524545057993,
|
||
|
|
"grad_norm": 0.07911943644285202,
|
||
|
|
"learning_rate": 0.0001999798727541315,
|
||
|
|
"loss": 1.5617,
|
||
|
|
"step": 571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006397709351616764,
|
||
|
|
"grad_norm": 0.07471580803394318,
|
||
|
|
"learning_rate": 0.0001999798021965404,
|
||
|
|
"loss": 1.5674,
|
||
|
|
"step": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006408894158175534,
|
||
|
|
"grad_norm": 0.06016454100608826,
|
||
|
|
"learning_rate": 0.00019997973151550556,
|
||
|
|
"loss": 1.589,
|
||
|
|
"step": 573
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006420078964734305,
|
||
|
|
"grad_norm": 0.06692295521497726,
|
||
|
|
"learning_rate": 0.00019997966071102713,
|
||
|
|
"loss": 1.5721,
|
||
|
|
"step": 574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006431263771293076,
|
||
|
|
"grad_norm": 0.06640581041574478,
|
||
|
|
"learning_rate": 0.00019997958978310514,
|
||
|
|
"loss": 1.5781,
|
||
|
|
"step": 575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006442448577851846,
|
||
|
|
"grad_norm": 0.058826372027397156,
|
||
|
|
"learning_rate": 0.0001999795187317397,
|
||
|
|
"loss": 1.5666,
|
||
|
|
"step": 576
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0064536333844106165,
|
||
|
|
"grad_norm": 0.055648185312747955,
|
||
|
|
"learning_rate": 0.0001999794475569309,
|
||
|
|
"loss": 1.5707,
|
||
|
|
"step": 577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006464818190969387,
|
||
|
|
"grad_norm": 0.058248959481716156,
|
||
|
|
"learning_rate": 0.00019997937625867884,
|
||
|
|
"loss": 1.57,
|
||
|
|
"step": 578
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006476002997528158,
|
||
|
|
"grad_norm": 0.05667665973305702,
|
||
|
|
"learning_rate": 0.00019997930483698357,
|
||
|
|
"loss": 1.5715,
|
||
|
|
"step": 579
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006487187804086928,
|
||
|
|
"grad_norm": 0.051860544830560684,
|
||
|
|
"learning_rate": 0.00019997923329184524,
|
||
|
|
"loss": 1.5875,
|
||
|
|
"step": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006498372610645699,
|
||
|
|
"grad_norm": 0.05429021269083023,
|
||
|
|
"learning_rate": 0.00019997916162326385,
|
||
|
|
"loss": 1.606,
|
||
|
|
"step": 581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00650955741720447,
|
||
|
|
"grad_norm": 0.055650923401117325,
|
||
|
|
"learning_rate": 0.00019997908983123956,
|
||
|
|
"loss": 1.6024,
|
||
|
|
"step": 582
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00652074222376324,
|
||
|
|
"grad_norm": 0.061447255313396454,
|
||
|
|
"learning_rate": 0.00019997901791577244,
|
||
|
|
"loss": 1.5888,
|
||
|
|
"step": 583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00653192703032201,
|
||
|
|
"grad_norm": 0.06065785884857178,
|
||
|
|
"learning_rate": 0.00019997894587686255,
|
||
|
|
"loss": 1.5739,
|
||
|
|
"step": 584
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006543111836880782,
|
||
|
|
"grad_norm": 0.07358521968126297,
|
||
|
|
"learning_rate": 0.00019997887371451002,
|
||
|
|
"loss": 1.5682,
|
||
|
|
"step": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006554296643439552,
|
||
|
|
"grad_norm": 0.08286885917186737,
|
||
|
|
"learning_rate": 0.00019997880142871494,
|
||
|
|
"loss": 1.5702,
|
||
|
|
"step": 586
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006565481449998322,
|
||
|
|
"grad_norm": 0.09056065231561661,
|
||
|
|
"learning_rate": 0.0001999787290194774,
|
||
|
|
"loss": 1.5822,
|
||
|
|
"step": 587
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0065766662565570925,
|
||
|
|
"grad_norm": 0.08298853039741516,
|
||
|
|
"learning_rate": 0.00019997865648679745,
|
||
|
|
"loss": 1.5818,
|
||
|
|
"step": 588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006587851063115864,
|
||
|
|
"grad_norm": 0.08499585837125778,
|
||
|
|
"learning_rate": 0.00019997858383067517,
|
||
|
|
"loss": 1.5775,
|
||
|
|
"step": 589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006599035869674634,
|
||
|
|
"grad_norm": 0.08271525800228119,
|
||
|
|
"learning_rate": 0.00019997851105111073,
|
||
|
|
"loss": 1.5756,
|
||
|
|
"step": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006610220676233404,
|
||
|
|
"grad_norm": 0.07318850606679916,
|
||
|
|
"learning_rate": 0.00019997843814810416,
|
||
|
|
"loss": 1.5674,
|
||
|
|
"step": 591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0066214054827921755,
|
||
|
|
"grad_norm": 0.07372857630252838,
|
||
|
|
"learning_rate": 0.00019997836512165558,
|
||
|
|
"loss": 1.5589,
|
||
|
|
"step": 592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006632590289350946,
|
||
|
|
"grad_norm": 0.09608045220375061,
|
||
|
|
"learning_rate": 0.00019997829197176503,
|
||
|
|
"loss": 1.5483,
|
||
|
|
"step": 593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006643775095909716,
|
||
|
|
"grad_norm": 0.13775509595870972,
|
||
|
|
"learning_rate": 0.00019997821869843264,
|
||
|
|
"loss": 1.5534,
|
||
|
|
"step": 594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0066549599024684864,
|
||
|
|
"grad_norm": 0.1282949000597,
|
||
|
|
"learning_rate": 0.00019997814530165847,
|
||
|
|
"loss": 1.5707,
|
||
|
|
"step": 595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006666144709027258,
|
||
|
|
"grad_norm": 0.09030576795339584,
|
||
|
|
"learning_rate": 0.00019997807178144268,
|
||
|
|
"loss": 1.5759,
|
||
|
|
"step": 596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006677329515586028,
|
||
|
|
"grad_norm": 0.08960919827222824,
|
||
|
|
"learning_rate": 0.00019997799813778531,
|
||
|
|
"loss": 1.5747,
|
||
|
|
"step": 597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006688514322144798,
|
||
|
|
"grad_norm": 0.08592968434095383,
|
||
|
|
"learning_rate": 0.00019997792437068644,
|
||
|
|
"loss": 1.5837,
|
||
|
|
"step": 598
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0066996991287035694,
|
||
|
|
"grad_norm": 0.07291566580533981,
|
||
|
|
"learning_rate": 0.00019997785048014616,
|
||
|
|
"loss": 1.5797,
|
||
|
|
"step": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00671088393526234,
|
||
|
|
"grad_norm": 0.07706471532583237,
|
||
|
|
"learning_rate": 0.0001999777764661646,
|
||
|
|
"loss": 1.5715,
|
||
|
|
"step": 600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00672206874182111,
|
||
|
|
"grad_norm": 0.06954386830329895,
|
||
|
|
"learning_rate": 0.00019997770232874182,
|
||
|
|
"loss": 1.563,
|
||
|
|
"step": 601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00673325354837988,
|
||
|
|
"grad_norm": 0.06999648362398148,
|
||
|
|
"learning_rate": 0.00019997762806787792,
|
||
|
|
"loss": 1.5717,
|
||
|
|
"step": 602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0067444383549386516,
|
||
|
|
"grad_norm": 0.05400196090340614,
|
||
|
|
"learning_rate": 0.00019997755368357298,
|
||
|
|
"loss": 1.5862,
|
||
|
|
"step": 603
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006755623161497422,
|
||
|
|
"grad_norm": 0.06418072432279587,
|
||
|
|
"learning_rate": 0.00019997747917582714,
|
||
|
|
"loss": 1.5908,
|
||
|
|
"step": 604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006766807968056192,
|
||
|
|
"grad_norm": 0.05838518589735031,
|
||
|
|
"learning_rate": 0.00019997740454464044,
|
||
|
|
"loss": 1.5718,
|
||
|
|
"step": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006777992774614963,
|
||
|
|
"grad_norm": 0.05882187560200691,
|
||
|
|
"learning_rate": 0.00019997732979001298,
|
||
|
|
"loss": 1.5769,
|
||
|
|
"step": 606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006789177581173734,
|
||
|
|
"grad_norm": 0.05842543765902519,
|
||
|
|
"learning_rate": 0.00019997725491194482,
|
||
|
|
"loss": 1.5746,
|
||
|
|
"step": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006800362387732504,
|
||
|
|
"grad_norm": 0.0527958944439888,
|
||
|
|
"learning_rate": 0.00019997717991043616,
|
||
|
|
"loss": 1.5682,
|
||
|
|
"step": 608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006811547194291274,
|
||
|
|
"grad_norm": 0.06266690045595169,
|
||
|
|
"learning_rate": 0.00019997710478548698,
|
||
|
|
"loss": 1.572,
|
||
|
|
"step": 609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0068227320008500455,
|
||
|
|
"grad_norm": 0.05554317682981491,
|
||
|
|
"learning_rate": 0.00019997702953709746,
|
||
|
|
"loss": 1.583,
|
||
|
|
"step": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006833916807408816,
|
||
|
|
"grad_norm": 0.04651861637830734,
|
||
|
|
"learning_rate": 0.00019997695416526761,
|
||
|
|
"loss": 1.5738,
|
||
|
|
"step": 611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006845101613967586,
|
||
|
|
"grad_norm": 0.053717561066150665,
|
||
|
|
"learning_rate": 0.0001999768786699976,
|
||
|
|
"loss": 1.5766,
|
||
|
|
"step": 612
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006856286420526357,
|
||
|
|
"grad_norm": 0.050605516880750656,
|
||
|
|
"learning_rate": 0.0001999768030512875,
|
||
|
|
"loss": 1.606,
|
||
|
|
"step": 613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006867471227085128,
|
||
|
|
"grad_norm": 0.054307371377944946,
|
||
|
|
"learning_rate": 0.00019997672730913735,
|
||
|
|
"loss": 1.6187,
|
||
|
|
"step": 614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006878656033643898,
|
||
|
|
"grad_norm": 0.06506580859422684,
|
||
|
|
"learning_rate": 0.00019997665144354728,
|
||
|
|
"loss": 1.6113,
|
||
|
|
"step": 615
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006889840840202669,
|
||
|
|
"grad_norm": 0.06480210274457932,
|
||
|
|
"learning_rate": 0.00019997657545451744,
|
||
|
|
"loss": 1.594,
|
||
|
|
"step": 616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006901025646761439,
|
||
|
|
"grad_norm": 0.04906410723924637,
|
||
|
|
"learning_rate": 0.00019997649934204784,
|
||
|
|
"loss": 1.5809,
|
||
|
|
"step": 617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00691221045332021,
|
||
|
|
"grad_norm": 0.05194586515426636,
|
||
|
|
"learning_rate": 0.00019997642310613857,
|
||
|
|
"loss": 1.5913,
|
||
|
|
"step": 618
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00692339525987898,
|
||
|
|
"grad_norm": 0.05839546024799347,
|
||
|
|
"learning_rate": 0.0001999763467467898,
|
||
|
|
"loss": 1.5746,
|
||
|
|
"step": 619
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006934580066437751,
|
||
|
|
"grad_norm": 0.06750357896089554,
|
||
|
|
"learning_rate": 0.0001999762702640016,
|
||
|
|
"loss": 1.575,
|
||
|
|
"step": 620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0069457648729965215,
|
||
|
|
"grad_norm": 0.07982991635799408,
|
||
|
|
"learning_rate": 0.00019997619365777402,
|
||
|
|
"loss": 1.576,
|
||
|
|
"step": 621
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006956949679555292,
|
||
|
|
"grad_norm": 0.08816216886043549,
|
||
|
|
"learning_rate": 0.00019997611692810718,
|
||
|
|
"loss": 1.5744,
|
||
|
|
"step": 622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006968134486114063,
|
||
|
|
"grad_norm": 0.09619053453207016,
|
||
|
|
"learning_rate": 0.0001999760400750012,
|
||
|
|
"loss": 1.5791,
|
||
|
|
"step": 623
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006979319292672833,
|
||
|
|
"grad_norm": 0.09412987530231476,
|
||
|
|
"learning_rate": 0.00019997596309845612,
|
||
|
|
"loss": 1.5799,
|
||
|
|
"step": 624
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.006990504099231604,
|
||
|
|
"grad_norm": 0.09227743744850159,
|
||
|
|
"learning_rate": 0.0001999758859984721,
|
||
|
|
"loss": 1.574,
|
||
|
|
"step": 625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007001688905790374,
|
||
|
|
"grad_norm": 0.07984034717082977,
|
||
|
|
"learning_rate": 0.00019997580877504918,
|
||
|
|
"loss": 1.5612,
|
||
|
|
"step": 626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007012873712349145,
|
||
|
|
"grad_norm": 0.05941009521484375,
|
||
|
|
"learning_rate": 0.00019997573142818752,
|
||
|
|
"loss": 1.5655,
|
||
|
|
"step": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0070240585189079155,
|
||
|
|
"grad_norm": 0.06787893921136856,
|
||
|
|
"learning_rate": 0.0001999756539578871,
|
||
|
|
"loss": 1.5662,
|
||
|
|
"step": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007035243325466686,
|
||
|
|
"grad_norm": 0.07556013017892838,
|
||
|
|
"learning_rate": 0.00019997557636414816,
|
||
|
|
"loss": 1.5877,
|
||
|
|
"step": 629
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007046428132025457,
|
||
|
|
"grad_norm": 0.06565730273723602,
|
||
|
|
"learning_rate": 0.0001999754986469707,
|
||
|
|
"loss": 1.5701,
|
||
|
|
"step": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007057612938584227,
|
||
|
|
"grad_norm": 0.05801456421613693,
|
||
|
|
"learning_rate": 0.00019997542080635482,
|
||
|
|
"loss": 1.5604,
|
||
|
|
"step": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007068797745142998,
|
||
|
|
"grad_norm": 0.058777451515197754,
|
||
|
|
"learning_rate": 0.00019997534284230066,
|
||
|
|
"loss": 1.5695,
|
||
|
|
"step": 632
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007079982551701768,
|
||
|
|
"grad_norm": 0.0650622621178627,
|
||
|
|
"learning_rate": 0.0001999752647548083,
|
||
|
|
"loss": 1.5612,
|
||
|
|
"step": 633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007091167358260539,
|
||
|
|
"grad_norm": 0.05834876000881195,
|
||
|
|
"learning_rate": 0.00019997518654387783,
|
||
|
|
"loss": 1.5617,
|
||
|
|
"step": 634
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007102352164819309,
|
||
|
|
"grad_norm": 0.06384813785552979,
|
||
|
|
"learning_rate": 0.00019997510820950933,
|
||
|
|
"loss": 1.5567,
|
||
|
|
"step": 635
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00711353697137808,
|
||
|
|
"grad_norm": 0.05401776731014252,
|
||
|
|
"learning_rate": 0.00019997502975170291,
|
||
|
|
"loss": 1.5558,
|
||
|
|
"step": 636
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007124721777936851,
|
||
|
|
"grad_norm": 0.06590646505355835,
|
||
|
|
"learning_rate": 0.00019997495117045867,
|
||
|
|
"loss": 1.5474,
|
||
|
|
"step": 637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007135906584495621,
|
||
|
|
"grad_norm": 0.0560823492705822,
|
||
|
|
"learning_rate": 0.00019997487246577674,
|
||
|
|
"loss": 1.5404,
|
||
|
|
"step": 638
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0071470913910543915,
|
||
|
|
"grad_norm": 0.0544624887406826,
|
||
|
|
"learning_rate": 0.00019997479363765717,
|
||
|
|
"loss": 1.5321,
|
||
|
|
"step": 639
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007158276197613163,
|
||
|
|
"grad_norm": 0.04914103075861931,
|
||
|
|
"learning_rate": 0.00019997471468610005,
|
||
|
|
"loss": 1.5395,
|
||
|
|
"step": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007169461004171933,
|
||
|
|
"grad_norm": 0.0481346994638443,
|
||
|
|
"learning_rate": 0.00019997463561110553,
|
||
|
|
"loss": 1.5513,
|
||
|
|
"step": 641
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007180645810730703,
|
||
|
|
"grad_norm": 0.04910167306661606,
|
||
|
|
"learning_rate": 0.00019997455641267367,
|
||
|
|
"loss": 1.5436,
|
||
|
|
"step": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007191830617289474,
|
||
|
|
"grad_norm": 0.05214869976043701,
|
||
|
|
"learning_rate": 0.00019997447709080456,
|
||
|
|
"loss": 1.5589,
|
||
|
|
"step": 643
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007203015423848245,
|
||
|
|
"grad_norm": 0.06242618337273598,
|
||
|
|
"learning_rate": 0.00019997439764549832,
|
||
|
|
"loss": 1.5395,
|
||
|
|
"step": 644
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007214200230407015,
|
||
|
|
"grad_norm": 0.07024102658033371,
|
||
|
|
"learning_rate": 0.00019997431807675505,
|
||
|
|
"loss": 1.5624,
|
||
|
|
"step": 645
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0072253850369657854,
|
||
|
|
"grad_norm": 0.07082174718379974,
|
||
|
|
"learning_rate": 0.0001999742383845748,
|
||
|
|
"loss": 1.5505,
|
||
|
|
"step": 646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007236569843524557,
|
||
|
|
"grad_norm": 0.06821414083242416,
|
||
|
|
"learning_rate": 0.00019997415856895775,
|
||
|
|
"loss": 1.5489,
|
||
|
|
"step": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007247754650083327,
|
||
|
|
"grad_norm": 0.062424443662166595,
|
||
|
|
"learning_rate": 0.00019997407862990395,
|
||
|
|
"loss": 1.5341,
|
||
|
|
"step": 648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007258939456642097,
|
||
|
|
"grad_norm": 0.05251247063279152,
|
||
|
|
"learning_rate": 0.00019997399856741348,
|
||
|
|
"loss": 1.5257,
|
||
|
|
"step": 649
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0072701242632008676,
|
||
|
|
"grad_norm": 0.04626723378896713,
|
||
|
|
"learning_rate": 0.0001999739183814865,
|
||
|
|
"loss": 1.5387,
|
||
|
|
"step": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007281309069759639,
|
||
|
|
"grad_norm": 0.05231785774230957,
|
||
|
|
"learning_rate": 0.00019997383807212306,
|
||
|
|
"loss": 1.5378,
|
||
|
|
"step": 651
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007292493876318409,
|
||
|
|
"grad_norm": 0.06309188902378082,
|
||
|
|
"learning_rate": 0.00019997375763932323,
|
||
|
|
"loss": 1.5531,
|
||
|
|
"step": 652
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007303678682877179,
|
||
|
|
"grad_norm": 0.05309786647558212,
|
||
|
|
"learning_rate": 0.0001999736770830872,
|
||
|
|
"loss": 1.5694,
|
||
|
|
"step": 653
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0073148634894359505,
|
||
|
|
"grad_norm": 0.04923882707953453,
|
||
|
|
"learning_rate": 0.000199973596403415,
|
||
|
|
"loss": 1.5837,
|
||
|
|
"step": 654
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007326048295994721,
|
||
|
|
"grad_norm": 0.05534524843096733,
|
||
|
|
"learning_rate": 0.00019997351560030677,
|
||
|
|
"loss": 1.5918,
|
||
|
|
"step": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007337233102553491,
|
||
|
|
"grad_norm": 0.1240246444940567,
|
||
|
|
"learning_rate": 0.00019997343467376258,
|
||
|
|
"loss": 1.5881,
|
||
|
|
"step": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0073484179091122615,
|
||
|
|
"grad_norm": 0.06112068518996239,
|
||
|
|
"learning_rate": 0.00019997335362378254,
|
||
|
|
"loss": 1.5761,
|
||
|
|
"step": 657
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007359602715671033,
|
||
|
|
"grad_norm": 0.06589160114526749,
|
||
|
|
"learning_rate": 0.00019997327245036673,
|
||
|
|
"loss": 1.5641,
|
||
|
|
"step": 658
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007370787522229803,
|
||
|
|
"grad_norm": 0.060181207954883575,
|
||
|
|
"learning_rate": 0.0001999731911535153,
|
||
|
|
"loss": 1.5573,
|
||
|
|
"step": 659
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007381972328788573,
|
||
|
|
"grad_norm": 0.06380990892648697,
|
||
|
|
"learning_rate": 0.0001999731097332283,
|
||
|
|
"loss": 1.5624,
|
||
|
|
"step": 660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0073931571353473445,
|
||
|
|
"grad_norm": 0.06176357343792915,
|
||
|
|
"learning_rate": 0.00019997302818950584,
|
||
|
|
"loss": 1.5499,
|
||
|
|
"step": 661
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007404341941906115,
|
||
|
|
"grad_norm": 0.055721499025821686,
|
||
|
|
"learning_rate": 0.00019997294652234805,
|
||
|
|
"loss": 1.557,
|
||
|
|
"step": 662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007415526748464885,
|
||
|
|
"grad_norm": 0.051978956907987595,
|
||
|
|
"learning_rate": 0.000199972864731755,
|
||
|
|
"loss": 1.5623,
|
||
|
|
"step": 663
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007426711555023656,
|
||
|
|
"grad_norm": 0.04754827544093132,
|
||
|
|
"learning_rate": 0.00019997278281772682,
|
||
|
|
"loss": 1.5465,
|
||
|
|
"step": 664
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007437896361582427,
|
||
|
|
"grad_norm": 0.0538279265165329,
|
||
|
|
"learning_rate": 0.0001999727007802636,
|
||
|
|
"loss": 1.5326,
|
||
|
|
"step": 665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007449081168141197,
|
||
|
|
"grad_norm": 0.0629352405667305,
|
||
|
|
"learning_rate": 0.00019997261861936543,
|
||
|
|
"loss": 1.5365,
|
||
|
|
"step": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007460265974699967,
|
||
|
|
"grad_norm": 0.06892745941877365,
|
||
|
|
"learning_rate": 0.00019997253633503238,
|
||
|
|
"loss": 1.5607,
|
||
|
|
"step": 667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007471450781258738,
|
||
|
|
"grad_norm": 0.07525767385959625,
|
||
|
|
"learning_rate": 0.00019997245392726465,
|
||
|
|
"loss": 1.5728,
|
||
|
|
"step": 668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007482635587817509,
|
||
|
|
"grad_norm": 0.10010071098804474,
|
||
|
|
"learning_rate": 0.00019997237139606224,
|
||
|
|
"loss": 1.559,
|
||
|
|
"step": 669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007493820394376279,
|
||
|
|
"grad_norm": 0.12011202424764633,
|
||
|
|
"learning_rate": 0.0001999722887414253,
|
||
|
|
"loss": 1.5694,
|
||
|
|
"step": 670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00750500520093505,
|
||
|
|
"grad_norm": 0.12278566509485245,
|
||
|
|
"learning_rate": 0.00019997220596335393,
|
||
|
|
"loss": 1.5939,
|
||
|
|
"step": 671
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0075161900074938205,
|
||
|
|
"grad_norm": 0.10163605213165283,
|
||
|
|
"learning_rate": 0.00019997212306184823,
|
||
|
|
"loss": 1.5722,
|
||
|
|
"step": 672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007527374814052591,
|
||
|
|
"grad_norm": 0.09386469423770905,
|
||
|
|
"learning_rate": 0.00019997204003690828,
|
||
|
|
"loss": 1.5748,
|
||
|
|
"step": 673
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007538559620611361,
|
||
|
|
"grad_norm": 0.1031983494758606,
|
||
|
|
"learning_rate": 0.00019997195688853422,
|
||
|
|
"loss": 1.5778,
|
||
|
|
"step": 674
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007549744427170132,
|
||
|
|
"grad_norm": 0.09082422405481339,
|
||
|
|
"learning_rate": 0.00019997187361672615,
|
||
|
|
"loss": 1.5904,
|
||
|
|
"step": 675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007560929233728903,
|
||
|
|
"grad_norm": 0.05761239677667618,
|
||
|
|
"learning_rate": 0.0001999717902214841,
|
||
|
|
"loss": 1.5753,
|
||
|
|
"step": 676
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007572114040287673,
|
||
|
|
"grad_norm": 0.0772882029414177,
|
||
|
|
"learning_rate": 0.0001999717067028083,
|
||
|
|
"loss": 1.5631,
|
||
|
|
"step": 677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007583298846846444,
|
||
|
|
"grad_norm": 0.09266892075538635,
|
||
|
|
"learning_rate": 0.00019997162306069875,
|
||
|
|
"loss": 1.5798,
|
||
|
|
"step": 678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0075944836534052144,
|
||
|
|
"grad_norm": 0.07755053043365479,
|
||
|
|
"learning_rate": 0.00019997153929515558,
|
||
|
|
"loss": 1.5969,
|
||
|
|
"step": 679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007605668459963985,
|
||
|
|
"grad_norm": 0.061833951622247696,
|
||
|
|
"learning_rate": 0.0001999714554061789,
|
||
|
|
"loss": 1.5781,
|
||
|
|
"step": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007616853266522755,
|
||
|
|
"grad_norm": 0.07911964505910873,
|
||
|
|
"learning_rate": 0.00019997137139376883,
|
||
|
|
"loss": 1.611,
|
||
|
|
"step": 681
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007628038073081526,
|
||
|
|
"grad_norm": 0.07502644509077072,
|
||
|
|
"learning_rate": 0.00019997128725792544,
|
||
|
|
"loss": 1.6201,
|
||
|
|
"step": 682
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007639222879640297,
|
||
|
|
"grad_norm": 0.11084458976984024,
|
||
|
|
"learning_rate": 0.00019997120299864886,
|
||
|
|
"loss": 1.5882,
|
||
|
|
"step": 683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007650407686199067,
|
||
|
|
"grad_norm": 0.1428053230047226,
|
||
|
|
"learning_rate": 0.00019997111861593921,
|
||
|
|
"loss": 1.5737,
|
||
|
|
"step": 684
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007661592492757838,
|
||
|
|
"grad_norm": 0.1456058770418167,
|
||
|
|
"learning_rate": 0.00019997103410979652,
|
||
|
|
"loss": 1.583,
|
||
|
|
"step": 685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007672777299316608,
|
||
|
|
"grad_norm": 0.10741148889064789,
|
||
|
|
"learning_rate": 0.00019997094948022098,
|
||
|
|
"loss": 1.5736,
|
||
|
|
"step": 686
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007683962105875379,
|
||
|
|
"grad_norm": 0.08248301595449448,
|
||
|
|
"learning_rate": 0.00019997086472721263,
|
||
|
|
"loss": 1.5559,
|
||
|
|
"step": 687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00769514691243415,
|
||
|
|
"grad_norm": 0.09595336019992828,
|
||
|
|
"learning_rate": 0.00019997077985077163,
|
||
|
|
"loss": 1.5513,
|
||
|
|
"step": 688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00770633171899292,
|
||
|
|
"grad_norm": 0.06806618720293045,
|
||
|
|
"learning_rate": 0.00019997069485089804,
|
||
|
|
"loss": 1.5624,
|
||
|
|
"step": 689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0077175165255516905,
|
||
|
|
"grad_norm": 0.07510481029748917,
|
||
|
|
"learning_rate": 0.00019997060972759198,
|
||
|
|
"loss": 1.5401,
|
||
|
|
"step": 690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007728701332110461,
|
||
|
|
"grad_norm": 0.06557908654212952,
|
||
|
|
"learning_rate": 0.00019997052448085358,
|
||
|
|
"loss": 1.5507,
|
||
|
|
"step": 691
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007739886138669232,
|
||
|
|
"grad_norm": 0.07286231964826584,
|
||
|
|
"learning_rate": 0.0001999704391106829,
|
||
|
|
"loss": 1.5597,
|
||
|
|
"step": 692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007751070945228002,
|
||
|
|
"grad_norm": 0.06357460469007492,
|
||
|
|
"learning_rate": 0.0001999703536170801,
|
||
|
|
"loss": 1.5591,
|
||
|
|
"step": 693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007762255751786773,
|
||
|
|
"grad_norm": 0.06700731813907623,
|
||
|
|
"learning_rate": 0.00019997026800004522,
|
||
|
|
"loss": 1.5521,
|
||
|
|
"step": 694
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007773440558345544,
|
||
|
|
"grad_norm": 0.05840053781867027,
|
||
|
|
"learning_rate": 0.00019997018225957839,
|
||
|
|
"loss": 1.5688,
|
||
|
|
"step": 695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007784625364904314,
|
||
|
|
"grad_norm": 0.06327050924301147,
|
||
|
|
"learning_rate": 0.00019997009639567974,
|
||
|
|
"loss": 1.5547,
|
||
|
|
"step": 696
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007795810171463084,
|
||
|
|
"grad_norm": 0.06035961955785751,
|
||
|
|
"learning_rate": 0.00019997001040834936,
|
||
|
|
"loss": 1.5701,
|
||
|
|
"step": 697
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007806994978021855,
|
||
|
|
"grad_norm": 0.05573936179280281,
|
||
|
|
"learning_rate": 0.0001999699242975874,
|
||
|
|
"loss": 1.5878,
|
||
|
|
"step": 698
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007818179784580626,
|
||
|
|
"grad_norm": 0.05611170828342438,
|
||
|
|
"learning_rate": 0.00019996983806339387,
|
||
|
|
"loss": 1.5899,
|
||
|
|
"step": 699
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007829364591139396,
|
||
|
|
"grad_norm": 0.05826570466160774,
|
||
|
|
"learning_rate": 0.00019996975170576896,
|
||
|
|
"loss": 1.5533,
|
||
|
|
"step": 700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007840549397698167,
|
||
|
|
"grad_norm": 0.050937049090862274,
|
||
|
|
"learning_rate": 0.00019996966522471273,
|
||
|
|
"loss": 1.5545,
|
||
|
|
"step": 701
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007851734204256937,
|
||
|
|
"grad_norm": 0.06593479216098785,
|
||
|
|
"learning_rate": 0.0001999695786202253,
|
||
|
|
"loss": 1.5685,
|
||
|
|
"step": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007862919010815707,
|
||
|
|
"grad_norm": 0.06405465304851532,
|
||
|
|
"learning_rate": 0.0001999694918923068,
|
||
|
|
"loss": 1.6121,
|
||
|
|
"step": 703
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00787410381737448,
|
||
|
|
"grad_norm": 0.052820343524217606,
|
||
|
|
"learning_rate": 0.0001999694050409573,
|
||
|
|
"loss": 1.6007,
|
||
|
|
"step": 704
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00788528862393325,
|
||
|
|
"grad_norm": 0.05512186512351036,
|
||
|
|
"learning_rate": 0.00019996931806617695,
|
||
|
|
"loss": 1.5792,
|
||
|
|
"step": 705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00789647343049202,
|
||
|
|
"grad_norm": 0.0432184673845768,
|
||
|
|
"learning_rate": 0.0001999692309679658,
|
||
|
|
"loss": 1.5852,
|
||
|
|
"step": 706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00790765823705079,
|
||
|
|
"grad_norm": 0.05248282849788666,
|
||
|
|
"learning_rate": 0.00019996914374632402,
|
||
|
|
"loss": 1.5917,
|
||
|
|
"step": 707
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00791884304360956,
|
||
|
|
"grad_norm": 0.04413476958870888,
|
||
|
|
"learning_rate": 0.00019996905640125165,
|
||
|
|
"loss": 1.5584,
|
||
|
|
"step": 708
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00793002785016833,
|
||
|
|
"grad_norm": 0.04878908023238182,
|
||
|
|
"learning_rate": 0.00019996896893274886,
|
||
|
|
"loss": 1.5506,
|
||
|
|
"step": 709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007941212656727101,
|
||
|
|
"grad_norm": 0.04344234988093376,
|
||
|
|
"learning_rate": 0.00019996888134081575,
|
||
|
|
"loss": 1.5684,
|
||
|
|
"step": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007952397463285873,
|
||
|
|
"grad_norm": 0.047158923000097275,
|
||
|
|
"learning_rate": 0.0001999687936254524,
|
||
|
|
"loss": 1.6133,
|
||
|
|
"step": 711
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007963582269844643,
|
||
|
|
"grad_norm": 0.050282686948776245,
|
||
|
|
"learning_rate": 0.00019996870578665893,
|
||
|
|
"loss": 1.6021,
|
||
|
|
"step": 712
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007974767076403414,
|
||
|
|
"grad_norm": 0.043916840106248856,
|
||
|
|
"learning_rate": 0.0001999686178244354,
|
||
|
|
"loss": 1.5794,
|
||
|
|
"step": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007985951882962184,
|
||
|
|
"grad_norm": 0.079423688352108,
|
||
|
|
"learning_rate": 0.00019996852973878205,
|
||
|
|
"loss": 1.5666,
|
||
|
|
"step": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.007997136689520954,
|
||
|
|
"grad_norm": 0.047040194272994995,
|
||
|
|
"learning_rate": 0.00019996844152969884,
|
||
|
|
"loss": 1.5643,
|
||
|
|
"step": 715
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008008321496079725,
|
||
|
|
"grad_norm": 0.04954817518591881,
|
||
|
|
"learning_rate": 0.00019996835319718596,
|
||
|
|
"loss": 1.5756,
|
||
|
|
"step": 716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008019506302638495,
|
||
|
|
"grad_norm": 0.0529201366007328,
|
||
|
|
"learning_rate": 0.00019996826474124352,
|
||
|
|
"loss": 1.5693,
|
||
|
|
"step": 717
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008030691109197267,
|
||
|
|
"grad_norm": 0.0555887334048748,
|
||
|
|
"learning_rate": 0.00019996817616187162,
|
||
|
|
"loss": 1.5699,
|
||
|
|
"step": 718
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008041875915756037,
|
||
|
|
"grad_norm": 0.05515376478433609,
|
||
|
|
"learning_rate": 0.00019996808745907036,
|
||
|
|
"loss": 1.5729,
|
||
|
|
"step": 719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008053060722314808,
|
||
|
|
"grad_norm": 0.05125884339213371,
|
||
|
|
"learning_rate": 0.0001999679986328398,
|
||
|
|
"loss": 1.5618,
|
||
|
|
"step": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008064245528873578,
|
||
|
|
"grad_norm": 0.046284329146146774,
|
||
|
|
"learning_rate": 0.0001999679096831802,
|
||
|
|
"loss": 1.5723,
|
||
|
|
"step": 721
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008075430335432348,
|
||
|
|
"grad_norm": 0.07273488491773605,
|
||
|
|
"learning_rate": 0.0001999678206100915,
|
||
|
|
"loss": 1.5701,
|
||
|
|
"step": 722
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008086615141991119,
|
||
|
|
"grad_norm": 0.047560565173625946,
|
||
|
|
"learning_rate": 0.0001999677314135739,
|
||
|
|
"loss": 1.5579,
|
||
|
|
"step": 723
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00809779994854989,
|
||
|
|
"grad_norm": 0.060283761471509933,
|
||
|
|
"learning_rate": 0.00019996764209362749,
|
||
|
|
"loss": 1.5615,
|
||
|
|
"step": 724
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008108984755108661,
|
||
|
|
"grad_norm": 0.0602620430290699,
|
||
|
|
"learning_rate": 0.00019996755265025236,
|
||
|
|
"loss": 1.5602,
|
||
|
|
"step": 725
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008120169561667431,
|
||
|
|
"grad_norm": 0.05383098125457764,
|
||
|
|
"learning_rate": 0.00019996746308344868,
|
||
|
|
"loss": 1.5769,
|
||
|
|
"step": 726
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008131354368226202,
|
||
|
|
"grad_norm": 0.04577267915010452,
|
||
|
|
"learning_rate": 0.0001999673733932165,
|
||
|
|
"loss": 1.619,
|
||
|
|
"step": 727
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008142539174784972,
|
||
|
|
"grad_norm": 0.04550078883767128,
|
||
|
|
"learning_rate": 0.00019996728357955595,
|
||
|
|
"loss": 1.5907,
|
||
|
|
"step": 728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008153723981343742,
|
||
|
|
"grad_norm": 0.050287820398807526,
|
||
|
|
"learning_rate": 0.00019996719364246714,
|
||
|
|
"loss": 1.5809,
|
||
|
|
"step": 729
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008164908787902513,
|
||
|
|
"grad_norm": 0.05841783806681633,
|
||
|
|
"learning_rate": 0.00019996710358195018,
|
||
|
|
"loss": 1.5521,
|
||
|
|
"step": 730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008176093594461285,
|
||
|
|
"grad_norm": 0.07749857753515244,
|
||
|
|
"learning_rate": 0.0001999670133980052,
|
||
|
|
"loss": 1.5848,
|
||
|
|
"step": 731
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008187278401020055,
|
||
|
|
"grad_norm": 0.08802466839551926,
|
||
|
|
"learning_rate": 0.00019996692309063232,
|
||
|
|
"loss": 1.6046,
|
||
|
|
"step": 732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008198463207578825,
|
||
|
|
"grad_norm": 0.09324830025434494,
|
||
|
|
"learning_rate": 0.00019996683265983162,
|
||
|
|
"loss": 1.5969,
|
||
|
|
"step": 733
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008209648014137596,
|
||
|
|
"grad_norm": 0.07845516502857208,
|
||
|
|
"learning_rate": 0.0001999667421056032,
|
||
|
|
"loss": 1.5831,
|
||
|
|
"step": 734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008220832820696366,
|
||
|
|
"grad_norm": 0.06912586092948914,
|
||
|
|
"learning_rate": 0.0001999666514279472,
|
||
|
|
"loss": 1.5706,
|
||
|
|
"step": 735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008232017627255136,
|
||
|
|
"grad_norm": 0.0572381317615509,
|
||
|
|
"learning_rate": 0.00019996656062686374,
|
||
|
|
"loss": 1.5609,
|
||
|
|
"step": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008243202433813906,
|
||
|
|
"grad_norm": 0.06219245865941048,
|
||
|
|
"learning_rate": 0.00019996646970235287,
|
||
|
|
"loss": 1.5541,
|
||
|
|
"step": 737
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008254387240372679,
|
||
|
|
"grad_norm": 0.0628521591424942,
|
||
|
|
"learning_rate": 0.0001999663786544148,
|
||
|
|
"loss": 1.5556,
|
||
|
|
"step": 738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008265572046931449,
|
||
|
|
"grad_norm": 0.06389934569597244,
|
||
|
|
"learning_rate": 0.0001999662874830496,
|
||
|
|
"loss": 1.5235,
|
||
|
|
"step": 739
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00827675685349022,
|
||
|
|
"grad_norm": 0.052320901304483414,
|
||
|
|
"learning_rate": 0.00019996619618825733,
|
||
|
|
"loss": 1.539,
|
||
|
|
"step": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00828794166004899,
|
||
|
|
"grad_norm": 0.05470295995473862,
|
||
|
|
"learning_rate": 0.00019996610477003817,
|
||
|
|
"loss": 1.5415,
|
||
|
|
"step": 741
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00829912646660776,
|
||
|
|
"grad_norm": 0.06103771552443504,
|
||
|
|
"learning_rate": 0.00019996601322839222,
|
||
|
|
"loss": 1.5422,
|
||
|
|
"step": 742
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00831031127316653,
|
||
|
|
"grad_norm": 0.06434791535139084,
|
||
|
|
"learning_rate": 0.00019996592156331958,
|
||
|
|
"loss": 1.5527,
|
||
|
|
"step": 743
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0083214960797253,
|
||
|
|
"grad_norm": 0.06087024137377739,
|
||
|
|
"learning_rate": 0.00019996582977482036,
|
||
|
|
"loss": 1.562,
|
||
|
|
"step": 744
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008332680886284072,
|
||
|
|
"grad_norm": 0.060757141560316086,
|
||
|
|
"learning_rate": 0.00019996573786289465,
|
||
|
|
"loss": 1.5641,
|
||
|
|
"step": 745
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008343865692842843,
|
||
|
|
"grad_norm": 0.07097544521093369,
|
||
|
|
"learning_rate": 0.00019996564582754265,
|
||
|
|
"loss": 1.542,
|
||
|
|
"step": 746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008355050499401613,
|
||
|
|
"grad_norm": 0.07591135054826736,
|
||
|
|
"learning_rate": 0.00019996555366876437,
|
||
|
|
"loss": 1.5557,
|
||
|
|
"step": 747
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008366235305960383,
|
||
|
|
"grad_norm": 0.07860101759433746,
|
||
|
|
"learning_rate": 0.00019996546138655998,
|
||
|
|
"loss": 1.5592,
|
||
|
|
"step": 748
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008377420112519154,
|
||
|
|
"grad_norm": 0.08454012125730515,
|
||
|
|
"learning_rate": 0.00019996536898092958,
|
||
|
|
"loss": 1.5428,
|
||
|
|
"step": 749
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008388604919077924,
|
||
|
|
"grad_norm": 0.08686886727809906,
|
||
|
|
"learning_rate": 0.0001999652764518733,
|
||
|
|
"loss": 1.5604,
|
||
|
|
"step": 750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008399789725636694,
|
||
|
|
"grad_norm": 0.07752903550863266,
|
||
|
|
"learning_rate": 0.00019996518379939126,
|
||
|
|
"loss": 1.5663,
|
||
|
|
"step": 751
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008410974532195466,
|
||
|
|
"grad_norm": 0.07272690534591675,
|
||
|
|
"learning_rate": 0.00019996509102348356,
|
||
|
|
"loss": 1.5463,
|
||
|
|
"step": 752
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008422159338754237,
|
||
|
|
"grad_norm": 0.07069668918848038,
|
||
|
|
"learning_rate": 0.00019996499812415026,
|
||
|
|
"loss": 1.5403,
|
||
|
|
"step": 753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008433344145313007,
|
||
|
|
"grad_norm": 0.06617298722267151,
|
||
|
|
"learning_rate": 0.00019996490510139155,
|
||
|
|
"loss": 1.5452,
|
||
|
|
"step": 754
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008444528951871777,
|
||
|
|
"grad_norm": 0.06795412302017212,
|
||
|
|
"learning_rate": 0.00019996481195520756,
|
||
|
|
"loss": 1.5355,
|
||
|
|
"step": 755
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008455713758430548,
|
||
|
|
"grad_norm": 0.06670048087835312,
|
||
|
|
"learning_rate": 0.00019996471868559832,
|
||
|
|
"loss": 1.5529,
|
||
|
|
"step": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008466898564989318,
|
||
|
|
"grad_norm": 0.06750231981277466,
|
||
|
|
"learning_rate": 0.000199964625292564,
|
||
|
|
"loss": 1.5626,
|
||
|
|
"step": 757
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008478083371548088,
|
||
|
|
"grad_norm": 0.06446841359138489,
|
||
|
|
"learning_rate": 0.0001999645317761047,
|
||
|
|
"loss": 1.5599,
|
||
|
|
"step": 758
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00848926817810686,
|
||
|
|
"grad_norm": 0.0593569353222847,
|
||
|
|
"learning_rate": 0.00019996443813622057,
|
||
|
|
"loss": 1.5538,
|
||
|
|
"step": 759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00850045298466563,
|
||
|
|
"grad_norm": 0.0496729277074337,
|
||
|
|
"learning_rate": 0.00019996434437291168,
|
||
|
|
"loss": 1.5427,
|
||
|
|
"step": 760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008511637791224401,
|
||
|
|
"grad_norm": 0.04995394125580788,
|
||
|
|
"learning_rate": 0.00019996425048617814,
|
||
|
|
"loss": 1.5326,
|
||
|
|
"step": 761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008522822597783171,
|
||
|
|
"grad_norm": 0.061305031180381775,
|
||
|
|
"learning_rate": 0.00019996415647602014,
|
||
|
|
"loss": 1.5553,
|
||
|
|
"step": 762
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008534007404341942,
|
||
|
|
"grad_norm": 0.07046514004468918,
|
||
|
|
"learning_rate": 0.0001999640623424377,
|
||
|
|
"loss": 1.5305,
|
||
|
|
"step": 763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008545192210900712,
|
||
|
|
"grad_norm": 0.0729839950799942,
|
||
|
|
"learning_rate": 0.00019996396808543102,
|
||
|
|
"loss": 1.5448,
|
||
|
|
"step": 764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008556377017459482,
|
||
|
|
"grad_norm": 0.07597866654396057,
|
||
|
|
"learning_rate": 0.00019996387370500016,
|
||
|
|
"loss": 1.5339,
|
||
|
|
"step": 765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008567561824018254,
|
||
|
|
"grad_norm": 0.07808911800384521,
|
||
|
|
"learning_rate": 0.00019996377920114525,
|
||
|
|
"loss": 1.5474,
|
||
|
|
"step": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008578746630577025,
|
||
|
|
"grad_norm": 0.07077853381633759,
|
||
|
|
"learning_rate": 0.0001999636845738664,
|
||
|
|
"loss": 1.5456,
|
||
|
|
"step": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008589931437135795,
|
||
|
|
"grad_norm": 0.05932854115962982,
|
||
|
|
"learning_rate": 0.00019996358982316378,
|
||
|
|
"loss": 1.5581,
|
||
|
|
"step": 768
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008601116243694565,
|
||
|
|
"grad_norm": 0.055467307567596436,
|
||
|
|
"learning_rate": 0.00019996349494903743,
|
||
|
|
"loss": 1.552,
|
||
|
|
"step": 769
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008612301050253336,
|
||
|
|
"grad_norm": 0.0684780701994896,
|
||
|
|
"learning_rate": 0.0001999633999514875,
|
||
|
|
"loss": 1.5691,
|
||
|
|
"step": 770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008623485856812106,
|
||
|
|
"grad_norm": 0.07558051496744156,
|
||
|
|
"learning_rate": 0.0001999633048305141,
|
||
|
|
"loss": 1.5704,
|
||
|
|
"step": 771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008634670663370878,
|
||
|
|
"grad_norm": 0.07451245933771133,
|
||
|
|
"learning_rate": 0.0001999632095861174,
|
||
|
|
"loss": 1.562,
|
||
|
|
"step": 772
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008645855469929648,
|
||
|
|
"grad_norm": 0.06852173060178757,
|
||
|
|
"learning_rate": 0.00019996311421829744,
|
||
|
|
"loss": 1.5582,
|
||
|
|
"step": 773
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008657040276488418,
|
||
|
|
"grad_norm": 0.05201677978038788,
|
||
|
|
"learning_rate": 0.00019996301872705438,
|
||
|
|
"loss": 1.5405,
|
||
|
|
"step": 774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008668225083047189,
|
||
|
|
"grad_norm": 0.05331201106309891,
|
||
|
|
"learning_rate": 0.00019996292311238832,
|
||
|
|
"loss": 1.5389,
|
||
|
|
"step": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008679409889605959,
|
||
|
|
"grad_norm": 0.06298915296792984,
|
||
|
|
"learning_rate": 0.00019996282737429942,
|
||
|
|
"loss": 1.5544,
|
||
|
|
"step": 776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00869059469616473,
|
||
|
|
"grad_norm": 0.06354406476020813,
|
||
|
|
"learning_rate": 0.00019996273151278774,
|
||
|
|
"loss": 1.5654,
|
||
|
|
"step": 777
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0087017795027235,
|
||
|
|
"grad_norm": 0.0683928057551384,
|
||
|
|
"learning_rate": 0.00019996263552785344,
|
||
|
|
"loss": 1.5515,
|
||
|
|
"step": 778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008712964309282272,
|
||
|
|
"grad_norm": 0.08236062526702881,
|
||
|
|
"learning_rate": 0.0001999625394194966,
|
||
|
|
"loss": 1.5617,
|
||
|
|
"step": 779
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008724149115841042,
|
||
|
|
"grad_norm": 0.07203904539346695,
|
||
|
|
"learning_rate": 0.0001999624431877174,
|
||
|
|
"loss": 1.555,
|
||
|
|
"step": 780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008735333922399812,
|
||
|
|
"grad_norm": 0.06245394051074982,
|
||
|
|
"learning_rate": 0.0001999623468325159,
|
||
|
|
"loss": 1.5696,
|
||
|
|
"step": 781
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008746518728958583,
|
||
|
|
"grad_norm": 0.06451458483934402,
|
||
|
|
"learning_rate": 0.00019996225035389222,
|
||
|
|
"loss": 1.5695,
|
||
|
|
"step": 782
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008757703535517353,
|
||
|
|
"grad_norm": 0.06131128594279289,
|
||
|
|
"learning_rate": 0.00019996215375184652,
|
||
|
|
"loss": 1.5749,
|
||
|
|
"step": 783
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008768888342076123,
|
||
|
|
"grad_norm": 0.07188650965690613,
|
||
|
|
"learning_rate": 0.00019996205702637888,
|
||
|
|
"loss": 1.5647,
|
||
|
|
"step": 784
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008780073148634894,
|
||
|
|
"grad_norm": 0.0647532194852829,
|
||
|
|
"learning_rate": 0.00019996196017748948,
|
||
|
|
"loss": 1.5906,
|
||
|
|
"step": 785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008791257955193666,
|
||
|
|
"grad_norm": 0.06955672800540924,
|
||
|
|
"learning_rate": 0.00019996186320517836,
|
||
|
|
"loss": 1.5923,
|
||
|
|
"step": 786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008802442761752436,
|
||
|
|
"grad_norm": 0.08375248312950134,
|
||
|
|
"learning_rate": 0.00019996176610944568,
|
||
|
|
"loss": 1.5949,
|
||
|
|
"step": 787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008813627568311206,
|
||
|
|
"grad_norm": 0.07389501482248306,
|
||
|
|
"learning_rate": 0.00019996166889029156,
|
||
|
|
"loss": 1.5859,
|
||
|
|
"step": 788
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008824812374869977,
|
||
|
|
"grad_norm": 0.08891887962818146,
|
||
|
|
"learning_rate": 0.0001999615715477161,
|
||
|
|
"loss": 1.5799,
|
||
|
|
"step": 789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008835997181428747,
|
||
|
|
"grad_norm": 0.08630786836147308,
|
||
|
|
"learning_rate": 0.00019996147408171948,
|
||
|
|
"loss": 1.5648,
|
||
|
|
"step": 790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008847181987987517,
|
||
|
|
"grad_norm": 0.09771011024713516,
|
||
|
|
"learning_rate": 0.00019996137649230176,
|
||
|
|
"loss": 1.5505,
|
||
|
|
"step": 791
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008858366794546288,
|
||
|
|
"grad_norm": 0.09192012995481491,
|
||
|
|
"learning_rate": 0.00019996127877946307,
|
||
|
|
"loss": 1.5704,
|
||
|
|
"step": 792
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00886955160110506,
|
||
|
|
"grad_norm": 0.07876724004745483,
|
||
|
|
"learning_rate": 0.00019996118094320355,
|
||
|
|
"loss": 1.5822,
|
||
|
|
"step": 793
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00888073640766383,
|
||
|
|
"grad_norm": 0.06203979253768921,
|
||
|
|
"learning_rate": 0.00019996108298352328,
|
||
|
|
"loss": 1.5599,
|
||
|
|
"step": 794
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0088919212142226,
|
||
|
|
"grad_norm": 0.06725753843784332,
|
||
|
|
"learning_rate": 0.00019996098490042242,
|
||
|
|
"loss": 1.562,
|
||
|
|
"step": 795
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.00890310602078137,
|
||
|
|
"grad_norm": 0.07860880345106125,
|
||
|
|
"learning_rate": 0.0001999608866939011,
|
||
|
|
"loss": 1.5554,
|
||
|
|
"step": 796
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008914290827340141,
|
||
|
|
"grad_norm": 0.07922618836164474,
|
||
|
|
"learning_rate": 0.0001999607883639594,
|
||
|
|
"loss": 1.564,
|
||
|
|
"step": 797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008925475633898911,
|
||
|
|
"grad_norm": 0.07509887218475342,
|
||
|
|
"learning_rate": 0.0001999606899105975,
|
||
|
|
"loss": 1.5531,
|
||
|
|
"step": 798
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008936660440457682,
|
||
|
|
"grad_norm": 0.0813961774110794,
|
||
|
|
"learning_rate": 0.00019996059133381547,
|
||
|
|
"loss": 1.5479,
|
||
|
|
"step": 799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008947845247016454,
|
||
|
|
"grad_norm": 0.09687768667936325,
|
||
|
|
"learning_rate": 0.00019996049263361343,
|
||
|
|
"loss": 1.5527,
|
||
|
|
"step": 800
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 1,
|
||
|
|
"max_steps": 89407,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 9223372036854775807,
|
||
|
|
"save_steps": 100,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": false
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 1.6661988212201226e+20,
|
||
|
|
"train_batch_size": 4,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|