26249 lines
634 KiB
JSON
26249 lines
634 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3737,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0002675943270002676,
|
|
"grad_norm": 12.188919067382812,
|
|
"learning_rate": 1.0695187165775401e-08,
|
|
"loss": 1.3815,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0005351886540005352,
|
|
"grad_norm": 9.284855842590332,
|
|
"learning_rate": 2.1390374331550803e-08,
|
|
"loss": 1.4795,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0008027829810008028,
|
|
"grad_norm": 10.941431999206543,
|
|
"learning_rate": 3.2085561497326206e-08,
|
|
"loss": 1.3657,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0010703773080010704,
|
|
"grad_norm": 13.973172187805176,
|
|
"learning_rate": 4.2780748663101606e-08,
|
|
"loss": 1.4645,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.001337971635001338,
|
|
"grad_norm": 10.671640396118164,
|
|
"learning_rate": 5.3475935828877005e-08,
|
|
"loss": 1.3105,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0016055659620016055,
|
|
"grad_norm": 12.536456108093262,
|
|
"learning_rate": 6.417112299465241e-08,
|
|
"loss": 1.4371,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.0018731602890018732,
|
|
"grad_norm": 12.72536849975586,
|
|
"learning_rate": 7.48663101604278e-08,
|
|
"loss": 1.3752,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.0021407546160021407,
|
|
"grad_norm": 11.476215362548828,
|
|
"learning_rate": 8.556149732620321e-08,
|
|
"loss": 1.3656,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.002408348943002408,
|
|
"grad_norm": 11.222175598144531,
|
|
"learning_rate": 9.625668449197862e-08,
|
|
"loss": 1.2515,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.002675943270002676,
|
|
"grad_norm": 10.473676681518555,
|
|
"learning_rate": 1.0695187165775401e-07,
|
|
"loss": 1.3108,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.0029435375970029436,
|
|
"grad_norm": 13.289176940917969,
|
|
"learning_rate": 1.1764705882352942e-07,
|
|
"loss": 1.4861,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.003211131924003211,
|
|
"grad_norm": 12.152190208435059,
|
|
"learning_rate": 1.2834224598930482e-07,
|
|
"loss": 1.4196,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.0034787262510034785,
|
|
"grad_norm": 12.78124713897705,
|
|
"learning_rate": 1.3903743315508023e-07,
|
|
"loss": 1.3621,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.0037463205780037465,
|
|
"grad_norm": 13.670173645019531,
|
|
"learning_rate": 1.497326203208556e-07,
|
|
"loss": 1.3979,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.004013914905004014,
|
|
"grad_norm": 11.728761672973633,
|
|
"learning_rate": 1.6042780748663104e-07,
|
|
"loss": 1.4335,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.004281509232004281,
|
|
"grad_norm": 12.166805267333984,
|
|
"learning_rate": 1.7112299465240642e-07,
|
|
"loss": 1.2879,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.004549103559004549,
|
|
"grad_norm": 11.862377166748047,
|
|
"learning_rate": 1.8181818181818183e-07,
|
|
"loss": 1.5192,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.004816697886004816,
|
|
"grad_norm": 14.839558601379395,
|
|
"learning_rate": 1.9251336898395724e-07,
|
|
"loss": 1.4282,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.005084292213005084,
|
|
"grad_norm": 9.195609092712402,
|
|
"learning_rate": 2.0320855614973264e-07,
|
|
"loss": 1.3435,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.005351886540005352,
|
|
"grad_norm": 12.21860408782959,
|
|
"learning_rate": 2.1390374331550802e-07,
|
|
"loss": 1.438,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.00561948086700562,
|
|
"grad_norm": 14.291121482849121,
|
|
"learning_rate": 2.2459893048128345e-07,
|
|
"loss": 1.478,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.005887075194005887,
|
|
"grad_norm": 11.789957046508789,
|
|
"learning_rate": 2.3529411764705883e-07,
|
|
"loss": 1.3549,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.006154669521006155,
|
|
"grad_norm": 8.266191482543945,
|
|
"learning_rate": 2.459893048128342e-07,
|
|
"loss": 1.3308,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.006422263848006422,
|
|
"grad_norm": 14.99113941192627,
|
|
"learning_rate": 2.5668449197860965e-07,
|
|
"loss": 1.4145,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.00668985817500669,
|
|
"grad_norm": 12.698420524597168,
|
|
"learning_rate": 2.6737967914438503e-07,
|
|
"loss": 1.3392,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.006957452502006957,
|
|
"grad_norm": 9.676013946533203,
|
|
"learning_rate": 2.7807486631016046e-07,
|
|
"loss": 1.2585,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.0072250468290072254,
|
|
"grad_norm": 10.009092330932617,
|
|
"learning_rate": 2.8877005347593584e-07,
|
|
"loss": 1.4387,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.007492641156007493,
|
|
"grad_norm": 10.498503684997559,
|
|
"learning_rate": 2.994652406417112e-07,
|
|
"loss": 1.2998,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.00776023548300776,
|
|
"grad_norm": 8.508102416992188,
|
|
"learning_rate": 3.1016042780748665e-07,
|
|
"loss": 1.4256,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.008027829810008028,
|
|
"grad_norm": 13.676020622253418,
|
|
"learning_rate": 3.208556149732621e-07,
|
|
"loss": 1.4097,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.008295424137008296,
|
|
"grad_norm": 8.145886421203613,
|
|
"learning_rate": 3.3155080213903747e-07,
|
|
"loss": 1.4282,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.008563018464008563,
|
|
"grad_norm": 9.673611640930176,
|
|
"learning_rate": 3.4224598930481285e-07,
|
|
"loss": 1.4322,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.008830612791008831,
|
|
"grad_norm": 8.00688362121582,
|
|
"learning_rate": 3.529411764705883e-07,
|
|
"loss": 1.2729,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.009098207118009098,
|
|
"grad_norm": 8.261327743530273,
|
|
"learning_rate": 3.6363636363636366e-07,
|
|
"loss": 1.3191,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.009365801445009366,
|
|
"grad_norm": 7.580234527587891,
|
|
"learning_rate": 3.7433155080213904e-07,
|
|
"loss": 1.3552,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.009633395772009633,
|
|
"grad_norm": 6.747824668884277,
|
|
"learning_rate": 3.8502673796791447e-07,
|
|
"loss": 1.3651,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.009900990099009901,
|
|
"grad_norm": 7.468786239624023,
|
|
"learning_rate": 3.957219251336899e-07,
|
|
"loss": 1.4516,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.010168584426010168,
|
|
"grad_norm": 7.431623935699463,
|
|
"learning_rate": 4.064171122994653e-07,
|
|
"loss": 1.3,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.010436178753010436,
|
|
"grad_norm": 7.183818817138672,
|
|
"learning_rate": 4.1711229946524066e-07,
|
|
"loss": 1.279,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.010703773080010704,
|
|
"grad_norm": 7.624693393707275,
|
|
"learning_rate": 4.2780748663101604e-07,
|
|
"loss": 1.2432,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.010971367407010971,
|
|
"grad_norm": 6.89495325088501,
|
|
"learning_rate": 4.3850267379679153e-07,
|
|
"loss": 1.3082,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.01123896173401124,
|
|
"grad_norm": 6.955787181854248,
|
|
"learning_rate": 4.491978609625669e-07,
|
|
"loss": 1.3646,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.011506556061011506,
|
|
"grad_norm": 6.60789155960083,
|
|
"learning_rate": 4.598930481283423e-07,
|
|
"loss": 1.2452,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.011774150388011774,
|
|
"grad_norm": 5.894134998321533,
|
|
"learning_rate": 4.7058823529411767e-07,
|
|
"loss": 1.2042,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.012041744715012041,
|
|
"grad_norm": 6.411407470703125,
|
|
"learning_rate": 4.812834224598931e-07,
|
|
"loss": 1.3002,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.01230933904201231,
|
|
"grad_norm": 6.972503185272217,
|
|
"learning_rate": 4.919786096256684e-07,
|
|
"loss": 1.3314,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.012576933369012578,
|
|
"grad_norm": 6.686539649963379,
|
|
"learning_rate": 5.02673796791444e-07,
|
|
"loss": 1.3888,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.012844527696012844,
|
|
"grad_norm": 5.989624500274658,
|
|
"learning_rate": 5.133689839572193e-07,
|
|
"loss": 1.2706,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.013112122023013113,
|
|
"grad_norm": 5.950018405914307,
|
|
"learning_rate": 5.240641711229947e-07,
|
|
"loss": 1.2127,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.01337971635001338,
|
|
"grad_norm": 6.040707588195801,
|
|
"learning_rate": 5.347593582887701e-07,
|
|
"loss": 1.3227,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.013647310677013648,
|
|
"grad_norm": 5.798983573913574,
|
|
"learning_rate": 5.454545454545455e-07,
|
|
"loss": 1.274,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.013914905004013914,
|
|
"grad_norm": 6.321473598480225,
|
|
"learning_rate": 5.561497326203209e-07,
|
|
"loss": 1.3229,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.014182499331014183,
|
|
"grad_norm": 6.0828447341918945,
|
|
"learning_rate": 5.668449197860964e-07,
|
|
"loss": 1.2053,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.014450093658014451,
|
|
"grad_norm": 6.756501197814941,
|
|
"learning_rate": 5.775401069518717e-07,
|
|
"loss": 1.2211,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.014717687985014717,
|
|
"grad_norm": 6.133362770080566,
|
|
"learning_rate": 5.882352941176471e-07,
|
|
"loss": 1.0539,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.014985282312014986,
|
|
"grad_norm": 6.778918743133545,
|
|
"learning_rate": 5.989304812834224e-07,
|
|
"loss": 1.2877,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.015252876639015252,
|
|
"grad_norm": 6.3190836906433105,
|
|
"learning_rate": 6.096256684491979e-07,
|
|
"loss": 1.2435,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.01552047096601552,
|
|
"grad_norm": 6.347466468811035,
|
|
"learning_rate": 6.203208556149733e-07,
|
|
"loss": 1.2126,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.01578806529301579,
|
|
"grad_norm": 6.51020622253418,
|
|
"learning_rate": 6.310160427807486e-07,
|
|
"loss": 1.2392,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.016055659620016056,
|
|
"grad_norm": 5.295354843139648,
|
|
"learning_rate": 6.417112299465242e-07,
|
|
"loss": 1.2241,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.016323253947016322,
|
|
"grad_norm": 5.243033409118652,
|
|
"learning_rate": 6.524064171122996e-07,
|
|
"loss": 1.1209,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.016590848274016592,
|
|
"grad_norm": 6.233066558837891,
|
|
"learning_rate": 6.631016042780749e-07,
|
|
"loss": 1.3712,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.01685844260101686,
|
|
"grad_norm": 6.268922328948975,
|
|
"learning_rate": 6.737967914438504e-07,
|
|
"loss": 1.2861,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.017126036928017126,
|
|
"grad_norm": 6.287389755249023,
|
|
"learning_rate": 6.844919786096257e-07,
|
|
"loss": 1.3305,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.017393631255017392,
|
|
"grad_norm": 5.882622718811035,
|
|
"learning_rate": 6.951871657754011e-07,
|
|
"loss": 1.2595,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.017661225582017662,
|
|
"grad_norm": 6.735020637512207,
|
|
"learning_rate": 7.058823529411766e-07,
|
|
"loss": 1.2909,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.01792881990901793,
|
|
"grad_norm": 6.339001178741455,
|
|
"learning_rate": 7.165775401069519e-07,
|
|
"loss": 1.4648,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.018196414236018196,
|
|
"grad_norm": 5.405106544494629,
|
|
"learning_rate": 7.272727272727273e-07,
|
|
"loss": 1.1235,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.018464008563018466,
|
|
"grad_norm": 5.867051124572754,
|
|
"learning_rate": 7.379679144385026e-07,
|
|
"loss": 1.3021,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.018731602890018732,
|
|
"grad_norm": 6.250916481018066,
|
|
"learning_rate": 7.486631016042781e-07,
|
|
"loss": 1.2687,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.018999197217019,
|
|
"grad_norm": 5.591010093688965,
|
|
"learning_rate": 7.593582887700536e-07,
|
|
"loss": 1.1995,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.019266791544019266,
|
|
"grad_norm": 5.43657112121582,
|
|
"learning_rate": 7.700534759358289e-07,
|
|
"loss": 1.2271,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.019534385871019536,
|
|
"grad_norm": 5.751687526702881,
|
|
"learning_rate": 7.807486631016044e-07,
|
|
"loss": 1.3095,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.019801980198019802,
|
|
"grad_norm": 5.247724533081055,
|
|
"learning_rate": 7.914438502673798e-07,
|
|
"loss": 1.257,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.02006957452502007,
|
|
"grad_norm": 5.353349208831787,
|
|
"learning_rate": 8.021390374331551e-07,
|
|
"loss": 1.1566,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.020337168852020335,
|
|
"grad_norm": 5.401248455047607,
|
|
"learning_rate": 8.128342245989306e-07,
|
|
"loss": 1.1666,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.020604763179020606,
|
|
"grad_norm": 5.6218953132629395,
|
|
"learning_rate": 8.235294117647059e-07,
|
|
"loss": 1.2186,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.020872357506020872,
|
|
"grad_norm": 4.6730875968933105,
|
|
"learning_rate": 8.342245989304813e-07,
|
|
"loss": 1.0226,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.02113995183302114,
|
|
"grad_norm": 5.531125545501709,
|
|
"learning_rate": 8.449197860962568e-07,
|
|
"loss": 1.1753,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.02140754616002141,
|
|
"grad_norm": 5.861851215362549,
|
|
"learning_rate": 8.556149732620321e-07,
|
|
"loss": 1.21,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.021675140487021675,
|
|
"grad_norm": 5.872004508972168,
|
|
"learning_rate": 8.663101604278075e-07,
|
|
"loss": 1.1952,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.021942734814021942,
|
|
"grad_norm": 5.292346000671387,
|
|
"learning_rate": 8.770053475935831e-07,
|
|
"loss": 1.1094,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.02221032914102221,
|
|
"grad_norm": 5.308231353759766,
|
|
"learning_rate": 8.877005347593584e-07,
|
|
"loss": 1.1175,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.02247792346802248,
|
|
"grad_norm": 5.5853071212768555,
|
|
"learning_rate": 8.983957219251338e-07,
|
|
"loss": 1.2289,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.022745517795022745,
|
|
"grad_norm": 5.129279613494873,
|
|
"learning_rate": 9.090909090909091e-07,
|
|
"loss": 1.1456,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.023013112122023012,
|
|
"grad_norm": 5.8378753662109375,
|
|
"learning_rate": 9.197860962566846e-07,
|
|
"loss": 1.3367,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.023280706449023282,
|
|
"grad_norm": 5.8775715827941895,
|
|
"learning_rate": 9.3048128342246e-07,
|
|
"loss": 1.2341,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.02354830077602355,
|
|
"grad_norm": 5.519059658050537,
|
|
"learning_rate": 9.411764705882353e-07,
|
|
"loss": 1.2504,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.023815895103023815,
|
|
"grad_norm": 5.483979225158691,
|
|
"learning_rate": 9.518716577540108e-07,
|
|
"loss": 1.1266,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.024083489430024082,
|
|
"grad_norm": 6.002791881561279,
|
|
"learning_rate": 9.625668449197862e-07,
|
|
"loss": 1.279,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.024351083757024352,
|
|
"grad_norm": 5.5504021644592285,
|
|
"learning_rate": 9.732620320855615e-07,
|
|
"loss": 1.1381,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.02461867808402462,
|
|
"grad_norm": 5.171264171600342,
|
|
"learning_rate": 9.839572192513369e-07,
|
|
"loss": 1.217,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.024886272411024885,
|
|
"grad_norm": 5.0262370109558105,
|
|
"learning_rate": 9.946524064171124e-07,
|
|
"loss": 1.2421,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.025153866738025155,
|
|
"grad_norm": 6.277072429656982,
|
|
"learning_rate": 1.005347593582888e-06,
|
|
"loss": 1.3543,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.025421461065025422,
|
|
"grad_norm": 5.420050144195557,
|
|
"learning_rate": 1.0160427807486633e-06,
|
|
"loss": 1.2239,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.02568905539202569,
|
|
"grad_norm": 5.405261516571045,
|
|
"learning_rate": 1.0267379679144386e-06,
|
|
"loss": 1.1811,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.025956649719025955,
|
|
"grad_norm": 5.828834533691406,
|
|
"learning_rate": 1.037433155080214e-06,
|
|
"loss": 1.1913,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.026224244046026225,
|
|
"grad_norm": 5.571322441101074,
|
|
"learning_rate": 1.0481283422459895e-06,
|
|
"loss": 1.1917,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.026491838373026492,
|
|
"grad_norm": 5.72471284866333,
|
|
"learning_rate": 1.0588235294117648e-06,
|
|
"loss": 1.3015,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.02675943270002676,
|
|
"grad_norm": 5.436988353729248,
|
|
"learning_rate": 1.0695187165775401e-06,
|
|
"loss": 1.3139,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.02702702702702703,
|
|
"grad_norm": 6.0810394287109375,
|
|
"learning_rate": 1.0802139037433156e-06,
|
|
"loss": 1.1722,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.027294621354027295,
|
|
"grad_norm": 5.316585540771484,
|
|
"learning_rate": 1.090909090909091e-06,
|
|
"loss": 1.1669,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.027562215681027562,
|
|
"grad_norm": 4.8456950187683105,
|
|
"learning_rate": 1.1016042780748663e-06,
|
|
"loss": 1.063,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.02782981000802783,
|
|
"grad_norm": 5.4848952293396,
|
|
"learning_rate": 1.1122994652406418e-06,
|
|
"loss": 1.1014,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.0280974043350281,
|
|
"grad_norm": 5.489200592041016,
|
|
"learning_rate": 1.1229946524064172e-06,
|
|
"loss": 1.1174,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.028364998662028365,
|
|
"grad_norm": 5.760312080383301,
|
|
"learning_rate": 1.1336898395721927e-06,
|
|
"loss": 1.3387,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.02863259298902863,
|
|
"grad_norm": 5.3693413734436035,
|
|
"learning_rate": 1.144385026737968e-06,
|
|
"loss": 1.2021,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.028900187316028902,
|
|
"grad_norm": 5.530979633331299,
|
|
"learning_rate": 1.1550802139037434e-06,
|
|
"loss": 1.3164,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.02916778164302917,
|
|
"grad_norm": 6.090900897979736,
|
|
"learning_rate": 1.165775401069519e-06,
|
|
"loss": 1.3232,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.029435375970029435,
|
|
"grad_norm": 5.393311023712158,
|
|
"learning_rate": 1.1764705882352942e-06,
|
|
"loss": 1.2509,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.0297029702970297,
|
|
"grad_norm": 5.429086685180664,
|
|
"learning_rate": 1.1871657754010696e-06,
|
|
"loss": 1.115,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.02997056462402997,
|
|
"grad_norm": 5.629342079162598,
|
|
"learning_rate": 1.1978609625668449e-06,
|
|
"loss": 1.0911,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.03023815895103024,
|
|
"grad_norm": 5.1444621086120605,
|
|
"learning_rate": 1.2085561497326204e-06,
|
|
"loss": 1.2814,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.030505753278030505,
|
|
"grad_norm": 5.754062652587891,
|
|
"learning_rate": 1.2192513368983957e-06,
|
|
"loss": 1.2656,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.030773347605030775,
|
|
"grad_norm": 5.319810390472412,
|
|
"learning_rate": 1.2299465240641713e-06,
|
|
"loss": 1.1083,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.03104094193203104,
|
|
"grad_norm": 5.084403991699219,
|
|
"learning_rate": 1.2406417112299466e-06,
|
|
"loss": 1.0987,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.03130853625903131,
|
|
"grad_norm": 5.552883625030518,
|
|
"learning_rate": 1.251336898395722e-06,
|
|
"loss": 1.1867,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.03157613058603158,
|
|
"grad_norm": 5.5615410804748535,
|
|
"learning_rate": 1.2620320855614973e-06,
|
|
"loss": 1.3064,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.03184372491303184,
|
|
"grad_norm": 5.348892688751221,
|
|
"learning_rate": 1.2727272727272728e-06,
|
|
"loss": 1.2016,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.03211131924003211,
|
|
"grad_norm": 5.782661437988281,
|
|
"learning_rate": 1.2834224598930483e-06,
|
|
"loss": 1.1626,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.03237891356703238,
|
|
"grad_norm": 4.8546977043151855,
|
|
"learning_rate": 1.2941176470588237e-06,
|
|
"loss": 1.0428,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.032646507894032645,
|
|
"grad_norm": 5.160636901855469,
|
|
"learning_rate": 1.3048128342245992e-06,
|
|
"loss": 1.1558,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.032914102221032915,
|
|
"grad_norm": 5.260807514190674,
|
|
"learning_rate": 1.3155080213903745e-06,
|
|
"loss": 1.1594,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.033181696548033185,
|
|
"grad_norm": 5.723674774169922,
|
|
"learning_rate": 1.3262032085561499e-06,
|
|
"loss": 1.0968,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.03344929087503345,
|
|
"grad_norm": 5.457815170288086,
|
|
"learning_rate": 1.3368983957219254e-06,
|
|
"loss": 1.2186,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.03371688520203372,
|
|
"grad_norm": 5.3614501953125,
|
|
"learning_rate": 1.3475935828877007e-06,
|
|
"loss": 1.2612,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.03398447952903398,
|
|
"grad_norm": 5.161847114562988,
|
|
"learning_rate": 1.358288770053476e-06,
|
|
"loss": 1.1159,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.03425207385603425,
|
|
"grad_norm": 5.856586456298828,
|
|
"learning_rate": 1.3689839572192514e-06,
|
|
"loss": 1.2016,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.03451966818303452,
|
|
"grad_norm": 5.2745490074157715,
|
|
"learning_rate": 1.379679144385027e-06,
|
|
"loss": 1.241,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.034787262510034785,
|
|
"grad_norm": 5.403688907623291,
|
|
"learning_rate": 1.3903743315508022e-06,
|
|
"loss": 1.1044,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.035054856837035055,
|
|
"grad_norm": 5.162592887878418,
|
|
"learning_rate": 1.4010695187165776e-06,
|
|
"loss": 1.0964,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.035322451164035325,
|
|
"grad_norm": 5.914812088012695,
|
|
"learning_rate": 1.4117647058823531e-06,
|
|
"loss": 1.2008,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.03559004549103559,
|
|
"grad_norm": 5.173002243041992,
|
|
"learning_rate": 1.4224598930481284e-06,
|
|
"loss": 1.2276,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.03585763981803586,
|
|
"grad_norm": 5.35471773147583,
|
|
"learning_rate": 1.4331550802139038e-06,
|
|
"loss": 1.2126,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.03612523414503613,
|
|
"grad_norm": 5.432989120483398,
|
|
"learning_rate": 1.4438502673796793e-06,
|
|
"loss": 1.2237,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.03639282847203639,
|
|
"grad_norm": 5.986301898956299,
|
|
"learning_rate": 1.4545454545454546e-06,
|
|
"loss": 1.3529,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.03666042279903666,
|
|
"grad_norm": 4.566310882568359,
|
|
"learning_rate": 1.46524064171123e-06,
|
|
"loss": 1.1274,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.03692801712603693,
|
|
"grad_norm": 4.906930923461914,
|
|
"learning_rate": 1.4759358288770053e-06,
|
|
"loss": 1.077,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.037195611453037195,
|
|
"grad_norm": 5.325079917907715,
|
|
"learning_rate": 1.4866310160427808e-06,
|
|
"loss": 1.1759,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.037463205780037465,
|
|
"grad_norm": 5.840808868408203,
|
|
"learning_rate": 1.4973262032085562e-06,
|
|
"loss": 1.2583,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.03773080010703773,
|
|
"grad_norm": 5.32767915725708,
|
|
"learning_rate": 1.5080213903743315e-06,
|
|
"loss": 1.1345,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.037998394434038,
|
|
"grad_norm": 5.714527130126953,
|
|
"learning_rate": 1.5187165775401072e-06,
|
|
"loss": 1.1113,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.03826598876103827,
|
|
"grad_norm": 5.20102596282959,
|
|
"learning_rate": 1.5294117647058826e-06,
|
|
"loss": 1.1802,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.03853358308803853,
|
|
"grad_norm": 5.638003826141357,
|
|
"learning_rate": 1.5401069518716579e-06,
|
|
"loss": 1.1773,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.0388011774150388,
|
|
"grad_norm": 6.044027328491211,
|
|
"learning_rate": 1.5508021390374334e-06,
|
|
"loss": 1.2413,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.03906877174203907,
|
|
"grad_norm": 5.571484565734863,
|
|
"learning_rate": 1.5614973262032088e-06,
|
|
"loss": 1.2316,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.039336366069039334,
|
|
"grad_norm": 4.9136457443237305,
|
|
"learning_rate": 1.572192513368984e-06,
|
|
"loss": 1.2048,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.039603960396039604,
|
|
"grad_norm": 5.684943675994873,
|
|
"learning_rate": 1.5828877005347596e-06,
|
|
"loss": 1.1958,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.039871554723039875,
|
|
"grad_norm": 5.061483860015869,
|
|
"learning_rate": 1.593582887700535e-06,
|
|
"loss": 1.0256,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.04013914905004014,
|
|
"grad_norm": 5.615631580352783,
|
|
"learning_rate": 1.6042780748663103e-06,
|
|
"loss": 1.1739,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.04040674337704041,
|
|
"grad_norm": 5.1106791496276855,
|
|
"learning_rate": 1.6149732620320858e-06,
|
|
"loss": 1.1252,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.04067433770404067,
|
|
"grad_norm": 5.248224258422852,
|
|
"learning_rate": 1.6256684491978611e-06,
|
|
"loss": 1.1636,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.04094193203104094,
|
|
"grad_norm": 5.454551696777344,
|
|
"learning_rate": 1.6363636363636365e-06,
|
|
"loss": 1.2073,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.04120952635804121,
|
|
"grad_norm": 4.683351039886475,
|
|
"learning_rate": 1.6470588235294118e-06,
|
|
"loss": 1.0651,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.041477120685041474,
|
|
"grad_norm": 5.561789512634277,
|
|
"learning_rate": 1.6577540106951873e-06,
|
|
"loss": 1.1654,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.041744715012041744,
|
|
"grad_norm": 4.899862766265869,
|
|
"learning_rate": 1.6684491978609627e-06,
|
|
"loss": 1.116,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.042012309339042014,
|
|
"grad_norm": 5.253740310668945,
|
|
"learning_rate": 1.679144385026738e-06,
|
|
"loss": 1.1066,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.04227990366604228,
|
|
"grad_norm": 5.0830183029174805,
|
|
"learning_rate": 1.6898395721925135e-06,
|
|
"loss": 1.0951,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.04254749799304255,
|
|
"grad_norm": 5.221453666687012,
|
|
"learning_rate": 1.7005347593582888e-06,
|
|
"loss": 1.1063,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.04281509232004282,
|
|
"grad_norm": 5.58682918548584,
|
|
"learning_rate": 1.7112299465240642e-06,
|
|
"loss": 1.0932,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.04308268664704308,
|
|
"grad_norm": 5.066821098327637,
|
|
"learning_rate": 1.7219251336898395e-06,
|
|
"loss": 1.2249,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.04335028097404335,
|
|
"grad_norm": 4.870170593261719,
|
|
"learning_rate": 1.732620320855615e-06,
|
|
"loss": 1.0948,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.04361787530104362,
|
|
"grad_norm": 5.372590065002441,
|
|
"learning_rate": 1.7433155080213904e-06,
|
|
"loss": 1.1966,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.043885469628043884,
|
|
"grad_norm": 5.09646463394165,
|
|
"learning_rate": 1.7540106951871661e-06,
|
|
"loss": 1.0542,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.044153063955044154,
|
|
"grad_norm": 5.390144348144531,
|
|
"learning_rate": 1.7647058823529414e-06,
|
|
"loss": 1.197,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.04442065828204442,
|
|
"grad_norm": 5.653879642486572,
|
|
"learning_rate": 1.7754010695187168e-06,
|
|
"loss": 1.2669,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.04468825260904469,
|
|
"grad_norm": 5.457263469696045,
|
|
"learning_rate": 1.7860962566844923e-06,
|
|
"loss": 1.1263,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.04495584693604496,
|
|
"grad_norm": 5.214939594268799,
|
|
"learning_rate": 1.7967914438502676e-06,
|
|
"loss": 1.272,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.04522344126304522,
|
|
"grad_norm": 5.01685905456543,
|
|
"learning_rate": 1.807486631016043e-06,
|
|
"loss": 1.1515,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.04549103559004549,
|
|
"grad_norm": 5.313577175140381,
|
|
"learning_rate": 1.8181818181818183e-06,
|
|
"loss": 1.0548,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.04575862991704576,
|
|
"grad_norm": 5.595825672149658,
|
|
"learning_rate": 1.8288770053475938e-06,
|
|
"loss": 1.2787,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.046026224244046024,
|
|
"grad_norm": 5.331969738006592,
|
|
"learning_rate": 1.8395721925133692e-06,
|
|
"loss": 1.2543,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.046293818571046294,
|
|
"grad_norm": 5.564863204956055,
|
|
"learning_rate": 1.8502673796791445e-06,
|
|
"loss": 1.1323,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.046561412898046564,
|
|
"grad_norm": 4.885172367095947,
|
|
"learning_rate": 1.86096256684492e-06,
|
|
"loss": 1.136,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.04682900722504683,
|
|
"grad_norm": 5.40251350402832,
|
|
"learning_rate": 1.8716577540106954e-06,
|
|
"loss": 1.1442,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.0470966015520471,
|
|
"grad_norm": 5.090615749359131,
|
|
"learning_rate": 1.8823529411764707e-06,
|
|
"loss": 1.1505,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.04736419587904737,
|
|
"grad_norm": 5.03092622756958,
|
|
"learning_rate": 1.893048128342246e-06,
|
|
"loss": 1.2347,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.04763179020604763,
|
|
"grad_norm": 5.308589935302734,
|
|
"learning_rate": 1.9037433155080215e-06,
|
|
"loss": 1.1856,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.0478993845330479,
|
|
"grad_norm": 5.609830379486084,
|
|
"learning_rate": 1.914438502673797e-06,
|
|
"loss": 1.208,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.048166978860048164,
|
|
"grad_norm": 4.926831245422363,
|
|
"learning_rate": 1.9251336898395724e-06,
|
|
"loss": 1.1978,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.048434573187048434,
|
|
"grad_norm": 5.084292888641357,
|
|
"learning_rate": 1.9358288770053475e-06,
|
|
"loss": 1.2949,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.048702167514048704,
|
|
"grad_norm": 4.909692764282227,
|
|
"learning_rate": 1.946524064171123e-06,
|
|
"loss": 1.1134,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.04896976184104897,
|
|
"grad_norm": 5.650701522827148,
|
|
"learning_rate": 1.9572192513368986e-06,
|
|
"loss": 1.2427,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.04923735616804924,
|
|
"grad_norm": 5.057121753692627,
|
|
"learning_rate": 1.9679144385026737e-06,
|
|
"loss": 1.1445,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.04950495049504951,
|
|
"grad_norm": 5.226599216461182,
|
|
"learning_rate": 1.9786096256684497e-06,
|
|
"loss": 1.1969,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.04977254482204977,
|
|
"grad_norm": 5.469078540802002,
|
|
"learning_rate": 1.989304812834225e-06,
|
|
"loss": 1.2631,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.05004013914905004,
|
|
"grad_norm": 5.3292460441589355,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 1.2036,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.05030773347605031,
|
|
"grad_norm": 5.132472991943359,
|
|
"learning_rate": 2.010695187165776e-06,
|
|
"loss": 1.1629,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.050575327803050574,
|
|
"grad_norm": 5.4047369956970215,
|
|
"learning_rate": 2.021390374331551e-06,
|
|
"loss": 1.1455,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.050842922130050844,
|
|
"grad_norm": 5.656977653503418,
|
|
"learning_rate": 2.0320855614973265e-06,
|
|
"loss": 1.205,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.051110516457051114,
|
|
"grad_norm": 5.2581963539123535,
|
|
"learning_rate": 2.0427807486631016e-06,
|
|
"loss": 1.1071,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.05137811078405138,
|
|
"grad_norm": 6.479303359985352,
|
|
"learning_rate": 2.053475935828877e-06,
|
|
"loss": 1.1909,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.05164570511105165,
|
|
"grad_norm": 5.340463638305664,
|
|
"learning_rate": 2.0641711229946527e-06,
|
|
"loss": 1.139,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.05191329943805191,
|
|
"grad_norm": 5.29105281829834,
|
|
"learning_rate": 2.074866310160428e-06,
|
|
"loss": 1.1934,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.05218089376505218,
|
|
"grad_norm": 5.378291606903076,
|
|
"learning_rate": 2.0855614973262034e-06,
|
|
"loss": 1.2956,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.05244848809205245,
|
|
"grad_norm": 5.540526390075684,
|
|
"learning_rate": 2.096256684491979e-06,
|
|
"loss": 1.2126,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.052716082419052714,
|
|
"grad_norm": 4.78275728225708,
|
|
"learning_rate": 2.106951871657754e-06,
|
|
"loss": 1.126,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.052983676746052984,
|
|
"grad_norm": 5.544436931610107,
|
|
"learning_rate": 2.1176470588235296e-06,
|
|
"loss": 1.1113,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.053251271073053254,
|
|
"grad_norm": 5.038266658782959,
|
|
"learning_rate": 2.128342245989305e-06,
|
|
"loss": 1.1551,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.05351886540005352,
|
|
"grad_norm": 5.480011463165283,
|
|
"learning_rate": 2.1390374331550802e-06,
|
|
"loss": 1.1223,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05378645972705379,
|
|
"grad_norm": 5.360974311828613,
|
|
"learning_rate": 2.1497326203208558e-06,
|
|
"loss": 1.1213,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.05405405405405406,
|
|
"grad_norm": 4.914999008178711,
|
|
"learning_rate": 2.1604278074866313e-06,
|
|
"loss": 1.1067,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.05432164838105432,
|
|
"grad_norm": 5.201199531555176,
|
|
"learning_rate": 2.1711229946524064e-06,
|
|
"loss": 1.0909,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.05458924270805459,
|
|
"grad_norm": 5.064680576324463,
|
|
"learning_rate": 2.181818181818182e-06,
|
|
"loss": 1.1301,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.05485683703505485,
|
|
"grad_norm": 5.714580535888672,
|
|
"learning_rate": 2.1925133689839575e-06,
|
|
"loss": 1.3127,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.055124431362055124,
|
|
"grad_norm": 5.075433731079102,
|
|
"learning_rate": 2.2032085561497326e-06,
|
|
"loss": 1.2102,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.055392025689055394,
|
|
"grad_norm": 5.047552108764648,
|
|
"learning_rate": 2.213903743315508e-06,
|
|
"loss": 1.0519,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.05565962001605566,
|
|
"grad_norm": 5.115383148193359,
|
|
"learning_rate": 2.2245989304812837e-06,
|
|
"loss": 1.0859,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.05592721434305593,
|
|
"grad_norm": 4.877355575561523,
|
|
"learning_rate": 2.2352941176470592e-06,
|
|
"loss": 0.9786,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.0561948086700562,
|
|
"grad_norm": 4.9516921043396,
|
|
"learning_rate": 2.2459893048128343e-06,
|
|
"loss": 1.1192,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.05646240299705646,
|
|
"grad_norm": 4.995131969451904,
|
|
"learning_rate": 2.25668449197861e-06,
|
|
"loss": 1.1766,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.05672999732405673,
|
|
"grad_norm": 5.119645595550537,
|
|
"learning_rate": 2.2673796791443854e-06,
|
|
"loss": 1.0791,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.056997591651057,
|
|
"grad_norm": 5.06790828704834,
|
|
"learning_rate": 2.2780748663101605e-06,
|
|
"loss": 1.2245,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.05726518597805726,
|
|
"grad_norm": 5.313665390014648,
|
|
"learning_rate": 2.288770053475936e-06,
|
|
"loss": 1.1242,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.05753278030505753,
|
|
"grad_norm": 5.126317977905273,
|
|
"learning_rate": 2.2994652406417116e-06,
|
|
"loss": 1.234,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.057800374632057804,
|
|
"grad_norm": 5.17064094543457,
|
|
"learning_rate": 2.3101604278074867e-06,
|
|
"loss": 1.1468,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.05806796895905807,
|
|
"grad_norm": 5.41604471206665,
|
|
"learning_rate": 2.3208556149732623e-06,
|
|
"loss": 1.1766,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.05833556328605834,
|
|
"grad_norm": 5.253145217895508,
|
|
"learning_rate": 2.331550802139038e-06,
|
|
"loss": 1.2682,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.0586031576130586,
|
|
"grad_norm": 5.144425868988037,
|
|
"learning_rate": 2.342245989304813e-06,
|
|
"loss": 1.1138,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.05887075194005887,
|
|
"grad_norm": 5.531948089599609,
|
|
"learning_rate": 2.3529411764705885e-06,
|
|
"loss": 1.2583,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.05913834626705914,
|
|
"grad_norm": 5.511721134185791,
|
|
"learning_rate": 2.363636363636364e-06,
|
|
"loss": 1.2773,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.0594059405940594,
|
|
"grad_norm": 5.116267204284668,
|
|
"learning_rate": 2.374331550802139e-06,
|
|
"loss": 1.2009,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.05967353492105967,
|
|
"grad_norm": 5.6089630126953125,
|
|
"learning_rate": 2.3850267379679146e-06,
|
|
"loss": 1.2062,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.05994112924805994,
|
|
"grad_norm": 4.710421085357666,
|
|
"learning_rate": 2.3957219251336898e-06,
|
|
"loss": 1.1464,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.060208723575060207,
|
|
"grad_norm": 5.434922218322754,
|
|
"learning_rate": 2.4064171122994653e-06,
|
|
"loss": 1.3069,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.06047631790206048,
|
|
"grad_norm": 4.87479305267334,
|
|
"learning_rate": 2.417112299465241e-06,
|
|
"loss": 1.1407,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.06074391222906075,
|
|
"grad_norm": 4.942396640777588,
|
|
"learning_rate": 2.427807486631016e-06,
|
|
"loss": 1.0771,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.06101150655606101,
|
|
"grad_norm": 4.659417629241943,
|
|
"learning_rate": 2.4385026737967915e-06,
|
|
"loss": 1.0804,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.06127910088306128,
|
|
"grad_norm": 4.819082260131836,
|
|
"learning_rate": 2.449197860962567e-06,
|
|
"loss": 1.1493,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.06154669521006155,
|
|
"grad_norm": 5.2966437339782715,
|
|
"learning_rate": 2.4598930481283426e-06,
|
|
"loss": 1.0462,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.06181428953706181,
|
|
"grad_norm": 4.857460021972656,
|
|
"learning_rate": 2.470588235294118e-06,
|
|
"loss": 1.1203,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.06208188386406208,
|
|
"grad_norm": 5.175891399383545,
|
|
"learning_rate": 2.4812834224598932e-06,
|
|
"loss": 1.2988,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.062349478191062346,
|
|
"grad_norm": 5.051662445068359,
|
|
"learning_rate": 2.4919786096256688e-06,
|
|
"loss": 1.1729,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.06261707251806262,
|
|
"grad_norm": 5.011804580688477,
|
|
"learning_rate": 2.502673796791444e-06,
|
|
"loss": 1.1163,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.06288466684506289,
|
|
"grad_norm": 5.881048202514648,
|
|
"learning_rate": 2.5133689839572194e-06,
|
|
"loss": 1.1809,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.06315226117206316,
|
|
"grad_norm": 5.304912567138672,
|
|
"learning_rate": 2.5240641711229945e-06,
|
|
"loss": 1.1589,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.06341985549906343,
|
|
"grad_norm": 4.8746256828308105,
|
|
"learning_rate": 2.5347593582887705e-06,
|
|
"loss": 1.1181,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.06368744982606368,
|
|
"grad_norm": 5.147508144378662,
|
|
"learning_rate": 2.5454545454545456e-06,
|
|
"loss": 1.2074,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.06395504415306395,
|
|
"grad_norm": 5.325894355773926,
|
|
"learning_rate": 2.556149732620321e-06,
|
|
"loss": 1.0711,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.06422263848006422,
|
|
"grad_norm": 5.343993186950684,
|
|
"learning_rate": 2.5668449197860967e-06,
|
|
"loss": 1.1532,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.0644902328070645,
|
|
"grad_norm": 5.7134270668029785,
|
|
"learning_rate": 2.577540106951872e-06,
|
|
"loss": 1.2013,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.06475782713406476,
|
|
"grad_norm": 4.556005001068115,
|
|
"learning_rate": 2.5882352941176473e-06,
|
|
"loss": 0.9915,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.06502542146106502,
|
|
"grad_norm": 5.159268856048584,
|
|
"learning_rate": 2.5989304812834225e-06,
|
|
"loss": 1.1166,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.06529301578806529,
|
|
"grad_norm": 5.182318210601807,
|
|
"learning_rate": 2.6096256684491984e-06,
|
|
"loss": 1.1421,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.06556061011506556,
|
|
"grad_norm": 5.662712574005127,
|
|
"learning_rate": 2.6203208556149735e-06,
|
|
"loss": 1.2302,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.06582820444206583,
|
|
"grad_norm": 5.712917804718018,
|
|
"learning_rate": 2.631016042780749e-06,
|
|
"loss": 1.3106,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.0660957987690661,
|
|
"grad_norm": 5.176303863525391,
|
|
"learning_rate": 2.641711229946524e-06,
|
|
"loss": 1.2056,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.06636339309606637,
|
|
"grad_norm": 5.5327982902526855,
|
|
"learning_rate": 2.6524064171122997e-06,
|
|
"loss": 1.2833,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.06663098742306663,
|
|
"grad_norm": 5.480837345123291,
|
|
"learning_rate": 2.663101604278075e-06,
|
|
"loss": 1.1822,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.0668985817500669,
|
|
"grad_norm": 5.221635818481445,
|
|
"learning_rate": 2.673796791443851e-06,
|
|
"loss": 1.1049,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.06716617607706717,
|
|
"grad_norm": 5.3436689376831055,
|
|
"learning_rate": 2.684491978609626e-06,
|
|
"loss": 1.1557,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.06743377040406744,
|
|
"grad_norm": 5.277728080749512,
|
|
"learning_rate": 2.6951871657754015e-06,
|
|
"loss": 1.1756,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.0677013647310677,
|
|
"grad_norm": 4.869785308837891,
|
|
"learning_rate": 2.7058823529411766e-06,
|
|
"loss": 1.0731,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.06796895905806796,
|
|
"grad_norm": 4.855271339416504,
|
|
"learning_rate": 2.716577540106952e-06,
|
|
"loss": 1.1427,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.06823655338506823,
|
|
"grad_norm": 5.062397003173828,
|
|
"learning_rate": 2.7272727272727272e-06,
|
|
"loss": 1.0324,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.0685041477120685,
|
|
"grad_norm": 5.3740620613098145,
|
|
"learning_rate": 2.7379679144385028e-06,
|
|
"loss": 1.2113,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.06877174203906877,
|
|
"grad_norm": 5.443506717681885,
|
|
"learning_rate": 2.748663101604278e-06,
|
|
"loss": 1.2146,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.06903933636606904,
|
|
"grad_norm": 5.629642009735107,
|
|
"learning_rate": 2.759358288770054e-06,
|
|
"loss": 1.1804,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.06930693069306931,
|
|
"grad_norm": 4.957646369934082,
|
|
"learning_rate": 2.770053475935829e-06,
|
|
"loss": 1.0639,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.06957452502006957,
|
|
"grad_norm": 5.224496841430664,
|
|
"learning_rate": 2.7807486631016045e-06,
|
|
"loss": 1.0334,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.06984211934706984,
|
|
"grad_norm": 4.813484191894531,
|
|
"learning_rate": 2.79144385026738e-06,
|
|
"loss": 1.0519,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.07010971367407011,
|
|
"grad_norm": 5.483233451843262,
|
|
"learning_rate": 2.802139037433155e-06,
|
|
"loss": 1.1352,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.07037730800107038,
|
|
"grad_norm": 5.673671245574951,
|
|
"learning_rate": 2.812834224598931e-06,
|
|
"loss": 1.1306,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.07064490232807065,
|
|
"grad_norm": 5.3372955322265625,
|
|
"learning_rate": 2.8235294117647062e-06,
|
|
"loss": 1.3173,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.0709124966550709,
|
|
"grad_norm": 5.20352840423584,
|
|
"learning_rate": 2.8342245989304818e-06,
|
|
"loss": 1.2713,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.07118009098207118,
|
|
"grad_norm": 5.352963924407959,
|
|
"learning_rate": 2.844919786096257e-06,
|
|
"loss": 1.1604,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.07144768530907145,
|
|
"grad_norm": 5.504600524902344,
|
|
"learning_rate": 2.8556149732620324e-06,
|
|
"loss": 1.1683,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.07171527963607172,
|
|
"grad_norm": 4.989468097686768,
|
|
"learning_rate": 2.8663101604278075e-06,
|
|
"loss": 1.1422,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.07198287396307199,
|
|
"grad_norm": 5.034707069396973,
|
|
"learning_rate": 2.8770053475935835e-06,
|
|
"loss": 1.1341,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.07225046829007226,
|
|
"grad_norm": 5.23364782333374,
|
|
"learning_rate": 2.8877005347593586e-06,
|
|
"loss": 1.2117,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.07251806261707251,
|
|
"grad_norm": 4.977064609527588,
|
|
"learning_rate": 2.898395721925134e-06,
|
|
"loss": 1.1179,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.07278565694407278,
|
|
"grad_norm": 5.462220668792725,
|
|
"learning_rate": 2.9090909090909093e-06,
|
|
"loss": 1.1499,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.07305325127107305,
|
|
"grad_norm": 4.852994918823242,
|
|
"learning_rate": 2.919786096256685e-06,
|
|
"loss": 1.0156,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.07332084559807332,
|
|
"grad_norm": 5.298532485961914,
|
|
"learning_rate": 2.93048128342246e-06,
|
|
"loss": 1.2225,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.07358843992507359,
|
|
"grad_norm": 4.783885955810547,
|
|
"learning_rate": 2.9411764705882355e-06,
|
|
"loss": 1.1382,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.07385603425207386,
|
|
"grad_norm": 5.855717182159424,
|
|
"learning_rate": 2.9518716577540106e-06,
|
|
"loss": 1.2779,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.07412362857907412,
|
|
"grad_norm": 4.655195713043213,
|
|
"learning_rate": 2.9625668449197865e-06,
|
|
"loss": 1.0411,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.07439122290607439,
|
|
"grad_norm": 5.513675689697266,
|
|
"learning_rate": 2.9732620320855617e-06,
|
|
"loss": 1.2022,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.07465881723307466,
|
|
"grad_norm": 4.99501371383667,
|
|
"learning_rate": 2.983957219251337e-06,
|
|
"loss": 1.2175,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.07492641156007493,
|
|
"grad_norm": 5.31617546081543,
|
|
"learning_rate": 2.9946524064171123e-06,
|
|
"loss": 1.2313,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.0751940058870752,
|
|
"grad_norm": 4.424401760101318,
|
|
"learning_rate": 3.005347593582888e-06,
|
|
"loss": 1.0215,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.07546160021407546,
|
|
"grad_norm": 6.041825771331787,
|
|
"learning_rate": 3.016042780748663e-06,
|
|
"loss": 1.1797,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.07572919454107573,
|
|
"grad_norm": 5.0878424644470215,
|
|
"learning_rate": 3.026737967914439e-06,
|
|
"loss": 1.117,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.075996788868076,
|
|
"grad_norm": 5.10042667388916,
|
|
"learning_rate": 3.0374331550802145e-06,
|
|
"loss": 1.1119,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.07626438319507627,
|
|
"grad_norm": 4.863668918609619,
|
|
"learning_rate": 3.0481283422459896e-06,
|
|
"loss": 1.1497,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.07653197752207654,
|
|
"grad_norm": 5.0864739418029785,
|
|
"learning_rate": 3.058823529411765e-06,
|
|
"loss": 1.1288,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.0767995718490768,
|
|
"grad_norm": 5.120104789733887,
|
|
"learning_rate": 3.0695187165775402e-06,
|
|
"loss": 1.111,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.07706716617607706,
|
|
"grad_norm": 4.764978885650635,
|
|
"learning_rate": 3.0802139037433158e-06,
|
|
"loss": 1.1646,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.07733476050307733,
|
|
"grad_norm": 4.789579391479492,
|
|
"learning_rate": 3.090909090909091e-06,
|
|
"loss": 1.1771,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.0776023548300776,
|
|
"grad_norm": 5.163838863372803,
|
|
"learning_rate": 3.101604278074867e-06,
|
|
"loss": 1.1569,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.07786994915707787,
|
|
"grad_norm": 5.361174583435059,
|
|
"learning_rate": 3.112299465240642e-06,
|
|
"loss": 1.0451,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.07813754348407814,
|
|
"grad_norm": 5.318236827850342,
|
|
"learning_rate": 3.1229946524064175e-06,
|
|
"loss": 1.1614,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.0784051378110784,
|
|
"grad_norm": 5.045052528381348,
|
|
"learning_rate": 3.1336898395721926e-06,
|
|
"loss": 1.2178,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.07867273213807867,
|
|
"grad_norm": 5.2366414070129395,
|
|
"learning_rate": 3.144385026737968e-06,
|
|
"loss": 1.1697,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.07894032646507894,
|
|
"grad_norm": 4.973846912384033,
|
|
"learning_rate": 3.1550802139037433e-06,
|
|
"loss": 1.1503,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.07920792079207921,
|
|
"grad_norm": 5.384598731994629,
|
|
"learning_rate": 3.1657754010695192e-06,
|
|
"loss": 1.0959,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.07947551511907948,
|
|
"grad_norm": 5.26463508605957,
|
|
"learning_rate": 3.1764705882352943e-06,
|
|
"loss": 1.122,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.07974310944607975,
|
|
"grad_norm": 5.0575737953186035,
|
|
"learning_rate": 3.18716577540107e-06,
|
|
"loss": 1.226,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.08001070377308,
|
|
"grad_norm": 5.238304615020752,
|
|
"learning_rate": 3.197860962566845e-06,
|
|
"loss": 1.2016,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.08027829810008028,
|
|
"grad_norm": 5.158024787902832,
|
|
"learning_rate": 3.2085561497326205e-06,
|
|
"loss": 1.174,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.08054589242708055,
|
|
"grad_norm": 5.37693452835083,
|
|
"learning_rate": 3.2192513368983957e-06,
|
|
"loss": 1.2009,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.08081348675408082,
|
|
"grad_norm": 5.042771816253662,
|
|
"learning_rate": 3.2299465240641716e-06,
|
|
"loss": 1.0364,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.08108108108108109,
|
|
"grad_norm": 5.316686153411865,
|
|
"learning_rate": 3.2406417112299467e-06,
|
|
"loss": 1.1912,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.08134867540808134,
|
|
"grad_norm": 4.9131011962890625,
|
|
"learning_rate": 3.2513368983957223e-06,
|
|
"loss": 1.1555,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.08161626973508161,
|
|
"grad_norm": 5.539770126342773,
|
|
"learning_rate": 3.262032085561498e-06,
|
|
"loss": 1.3409,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.08188386406208188,
|
|
"grad_norm": 4.658355712890625,
|
|
"learning_rate": 3.272727272727273e-06,
|
|
"loss": 1.2177,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.08215145838908215,
|
|
"grad_norm": 4.792846202850342,
|
|
"learning_rate": 3.2834224598930485e-06,
|
|
"loss": 0.9993,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.08241905271608242,
|
|
"grad_norm": 4.886536598205566,
|
|
"learning_rate": 3.2941176470588236e-06,
|
|
"loss": 1.0884,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.08268664704308269,
|
|
"grad_norm": 4.976652145385742,
|
|
"learning_rate": 3.3048128342245995e-06,
|
|
"loss": 1.1197,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.08295424137008295,
|
|
"grad_norm": 4.819093704223633,
|
|
"learning_rate": 3.3155080213903747e-06,
|
|
"loss": 1.1089,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.08322183569708322,
|
|
"grad_norm": 5.2282843589782715,
|
|
"learning_rate": 3.32620320855615e-06,
|
|
"loss": 1.1593,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.08348943002408349,
|
|
"grad_norm": 5.575779438018799,
|
|
"learning_rate": 3.3368983957219253e-06,
|
|
"loss": 1.1697,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.08375702435108376,
|
|
"grad_norm": 5.0405354499816895,
|
|
"learning_rate": 3.347593582887701e-06,
|
|
"loss": 1.2207,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.08402461867808403,
|
|
"grad_norm": 4.537448406219482,
|
|
"learning_rate": 3.358288770053476e-06,
|
|
"loss": 0.9928,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.0842922130050843,
|
|
"grad_norm": 4.672475337982178,
|
|
"learning_rate": 3.368983957219252e-06,
|
|
"loss": 1.0073,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.08455980733208456,
|
|
"grad_norm": 5.076086044311523,
|
|
"learning_rate": 3.379679144385027e-06,
|
|
"loss": 1.1783,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.08482740165908483,
|
|
"grad_norm": 4.888884544372559,
|
|
"learning_rate": 3.3903743315508026e-06,
|
|
"loss": 1.1204,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.0850949959860851,
|
|
"grad_norm": 5.4211554527282715,
|
|
"learning_rate": 3.4010695187165777e-06,
|
|
"loss": 1.2754,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.08536259031308537,
|
|
"grad_norm": 5.268496513366699,
|
|
"learning_rate": 3.4117647058823532e-06,
|
|
"loss": 1.1334,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.08563018464008564,
|
|
"grad_norm": 4.835329532623291,
|
|
"learning_rate": 3.4224598930481284e-06,
|
|
"loss": 1.1577,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.08589777896708589,
|
|
"grad_norm": 4.869121074676514,
|
|
"learning_rate": 3.433155080213904e-06,
|
|
"loss": 1.173,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.08616537329408616,
|
|
"grad_norm": 5.070735931396484,
|
|
"learning_rate": 3.443850267379679e-06,
|
|
"loss": 1.1476,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.08643296762108643,
|
|
"grad_norm": 5.45928430557251,
|
|
"learning_rate": 3.454545454545455e-06,
|
|
"loss": 1.1205,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.0867005619480867,
|
|
"grad_norm": 5.189511299133301,
|
|
"learning_rate": 3.46524064171123e-06,
|
|
"loss": 1.3158,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.08696815627508697,
|
|
"grad_norm": 4.831698417663574,
|
|
"learning_rate": 3.4759358288770056e-06,
|
|
"loss": 1.0329,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.08723575060208724,
|
|
"grad_norm": 4.864784240722656,
|
|
"learning_rate": 3.4866310160427807e-06,
|
|
"loss": 1.0805,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.0875033449290875,
|
|
"grad_norm": 4.9103240966796875,
|
|
"learning_rate": 3.4973262032085563e-06,
|
|
"loss": 1.1277,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.08777093925608777,
|
|
"grad_norm": 4.773064136505127,
|
|
"learning_rate": 3.5080213903743322e-06,
|
|
"loss": 1.0463,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.08803853358308804,
|
|
"grad_norm": 5.021261692047119,
|
|
"learning_rate": 3.5187165775401074e-06,
|
|
"loss": 1.0795,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.08830612791008831,
|
|
"grad_norm": 5.683427810668945,
|
|
"learning_rate": 3.529411764705883e-06,
|
|
"loss": 1.094,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.08857372223708858,
|
|
"grad_norm": 4.894428253173828,
|
|
"learning_rate": 3.540106951871658e-06,
|
|
"loss": 1.2105,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.08884131656408883,
|
|
"grad_norm": 5.363523006439209,
|
|
"learning_rate": 3.5508021390374335e-06,
|
|
"loss": 1.3021,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.0891089108910891,
|
|
"grad_norm": 4.995115756988525,
|
|
"learning_rate": 3.5614973262032087e-06,
|
|
"loss": 1.2159,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.08937650521808937,
|
|
"grad_norm": 4.57165002822876,
|
|
"learning_rate": 3.5721925133689846e-06,
|
|
"loss": 1.1042,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.08964409954508964,
|
|
"grad_norm": 4.659427165985107,
|
|
"learning_rate": 3.5828877005347597e-06,
|
|
"loss": 1.0984,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.08991169387208992,
|
|
"grad_norm": 4.8430986404418945,
|
|
"learning_rate": 3.5935828877005353e-06,
|
|
"loss": 1.0503,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.09017928819909019,
|
|
"grad_norm": 5.450077056884766,
|
|
"learning_rate": 3.6042780748663104e-06,
|
|
"loss": 1.2578,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.09044688252609044,
|
|
"grad_norm": 5.203562259674072,
|
|
"learning_rate": 3.614973262032086e-06,
|
|
"loss": 1.1925,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.09071447685309071,
|
|
"grad_norm": 5.250705718994141,
|
|
"learning_rate": 3.625668449197861e-06,
|
|
"loss": 1.1434,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.09098207118009098,
|
|
"grad_norm": 5.062129020690918,
|
|
"learning_rate": 3.6363636363636366e-06,
|
|
"loss": 1.1685,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.09124966550709125,
|
|
"grad_norm": 5.255050182342529,
|
|
"learning_rate": 3.6470588235294117e-06,
|
|
"loss": 1.1763,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.09151725983409152,
|
|
"grad_norm": 5.397471904754639,
|
|
"learning_rate": 3.6577540106951877e-06,
|
|
"loss": 1.1878,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.09178485416109179,
|
|
"grad_norm": 4.998739242553711,
|
|
"learning_rate": 3.6684491978609628e-06,
|
|
"loss": 1.2435,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.09205244848809205,
|
|
"grad_norm": 5.0231475830078125,
|
|
"learning_rate": 3.6791443850267383e-06,
|
|
"loss": 1.201,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.09232004281509232,
|
|
"grad_norm": 4.59348201751709,
|
|
"learning_rate": 3.6898395721925134e-06,
|
|
"loss": 1.1119,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.09258763714209259,
|
|
"grad_norm": 5.16015100479126,
|
|
"learning_rate": 3.700534759358289e-06,
|
|
"loss": 1.3093,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.09285523146909286,
|
|
"grad_norm": 4.382453441619873,
|
|
"learning_rate": 3.711229946524064e-06,
|
|
"loss": 1.0883,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.09312282579609313,
|
|
"grad_norm": 4.668209075927734,
|
|
"learning_rate": 3.72192513368984e-06,
|
|
"loss": 1.0241,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.09339042012309338,
|
|
"grad_norm": 4.655612945556641,
|
|
"learning_rate": 3.7326203208556156e-06,
|
|
"loss": 1.0818,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.09365801445009365,
|
|
"grad_norm": 4.565972805023193,
|
|
"learning_rate": 3.7433155080213907e-06,
|
|
"loss": 1.1419,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.09392560877709392,
|
|
"grad_norm": 5.171647548675537,
|
|
"learning_rate": 3.7540106951871662e-06,
|
|
"loss": 1.2268,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.0941932031040942,
|
|
"grad_norm": 4.986495018005371,
|
|
"learning_rate": 3.7647058823529414e-06,
|
|
"loss": 1.1435,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.09446079743109446,
|
|
"grad_norm": 5.132668495178223,
|
|
"learning_rate": 3.775401069518717e-06,
|
|
"loss": 1.1817,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.09472839175809473,
|
|
"grad_norm": 4.863659858703613,
|
|
"learning_rate": 3.786096256684492e-06,
|
|
"loss": 1.1636,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.09499598608509499,
|
|
"grad_norm": 4.890793323516846,
|
|
"learning_rate": 3.796791443850268e-06,
|
|
"loss": 1.099,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.09526358041209526,
|
|
"grad_norm": 4.9269208908081055,
|
|
"learning_rate": 3.807486631016043e-06,
|
|
"loss": 0.9963,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.09553117473909553,
|
|
"grad_norm": 5.276472091674805,
|
|
"learning_rate": 3.818181818181819e-06,
|
|
"loss": 1.1995,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.0957987690660958,
|
|
"grad_norm": 5.187767505645752,
|
|
"learning_rate": 3.828877005347594e-06,
|
|
"loss": 0.9875,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.09606636339309607,
|
|
"grad_norm": 4.884994983673096,
|
|
"learning_rate": 3.839572192513369e-06,
|
|
"loss": 1.1166,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.09633395772009633,
|
|
"grad_norm": 4.8466715812683105,
|
|
"learning_rate": 3.850267379679145e-06,
|
|
"loss": 1.1187,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.0966015520470966,
|
|
"grad_norm": 4.6448655128479,
|
|
"learning_rate": 3.86096256684492e-06,
|
|
"loss": 1.2536,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.09686914637409687,
|
|
"grad_norm": 4.447425365447998,
|
|
"learning_rate": 3.871657754010695e-06,
|
|
"loss": 1.0654,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.09713674070109714,
|
|
"grad_norm": 4.957208633422852,
|
|
"learning_rate": 3.882352941176471e-06,
|
|
"loss": 1.2859,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.09740433502809741,
|
|
"grad_norm": 4.9123735427856445,
|
|
"learning_rate": 3.893048128342246e-06,
|
|
"loss": 1.1159,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.09767192935509768,
|
|
"grad_norm": 5.830307960510254,
|
|
"learning_rate": 3.903743315508022e-06,
|
|
"loss": 1.05,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.09793952368209793,
|
|
"grad_norm": 4.788443088531494,
|
|
"learning_rate": 3.914438502673797e-06,
|
|
"loss": 1.1427,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.0982071180090982,
|
|
"grad_norm": 4.874475479125977,
|
|
"learning_rate": 3.925133689839573e-06,
|
|
"loss": 1.1247,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.09847471233609847,
|
|
"grad_norm": 5.284448623657227,
|
|
"learning_rate": 3.9358288770053474e-06,
|
|
"loss": 1.1494,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.09874230666309874,
|
|
"grad_norm": 5.056131839752197,
|
|
"learning_rate": 3.946524064171123e-06,
|
|
"loss": 1.1843,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.09900990099009901,
|
|
"grad_norm": 4.933049201965332,
|
|
"learning_rate": 3.957219251336899e-06,
|
|
"loss": 1.0876,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.09927749531709927,
|
|
"grad_norm": 5.440591335296631,
|
|
"learning_rate": 3.967914438502674e-06,
|
|
"loss": 1.0903,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.09954508964409954,
|
|
"grad_norm": 5.242448806762695,
|
|
"learning_rate": 3.97860962566845e-06,
|
|
"loss": 1.1539,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.09981268397109981,
|
|
"grad_norm": 5.424898147583008,
|
|
"learning_rate": 3.989304812834225e-06,
|
|
"loss": 1.1798,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.10008027829810008,
|
|
"grad_norm": 5.486216068267822,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 1.195,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.10034787262510035,
|
|
"grad_norm": 4.9611029624938965,
|
|
"learning_rate": 4.010695187165775e-06,
|
|
"loss": 1.1332,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.10061546695210062,
|
|
"grad_norm": 5.002806663513184,
|
|
"learning_rate": 4.021390374331552e-06,
|
|
"loss": 1.0903,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.10088306127910088,
|
|
"grad_norm": 5.656718730926514,
|
|
"learning_rate": 4.0320855614973264e-06,
|
|
"loss": 1.2747,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.10115065560610115,
|
|
"grad_norm": 5.98917293548584,
|
|
"learning_rate": 4.042780748663102e-06,
|
|
"loss": 1.1138,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.10141824993310142,
|
|
"grad_norm": 4.683370590209961,
|
|
"learning_rate": 4.0534759358288775e-06,
|
|
"loss": 1.1109,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.10168584426010169,
|
|
"grad_norm": 5.65017557144165,
|
|
"learning_rate": 4.064171122994653e-06,
|
|
"loss": 1.3569,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.10195343858710196,
|
|
"grad_norm": 5.555070400238037,
|
|
"learning_rate": 4.074866310160428e-06,
|
|
"loss": 1.4389,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.10222103291410223,
|
|
"grad_norm": 4.874694347381592,
|
|
"learning_rate": 4.085561497326203e-06,
|
|
"loss": 1.0894,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.10248862724110248,
|
|
"grad_norm": 4.907220840454102,
|
|
"learning_rate": 4.096256684491979e-06,
|
|
"loss": 1.1923,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.10275622156810275,
|
|
"grad_norm": 4.953684329986572,
|
|
"learning_rate": 4.106951871657754e-06,
|
|
"loss": 1.3313,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.10302381589510302,
|
|
"grad_norm": 4.817149639129639,
|
|
"learning_rate": 4.11764705882353e-06,
|
|
"loss": 1.139,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.1032914102221033,
|
|
"grad_norm": 5.111240386962891,
|
|
"learning_rate": 4.1283422459893054e-06,
|
|
"loss": 1.1787,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.10355900454910356,
|
|
"grad_norm": 4.8276519775390625,
|
|
"learning_rate": 4.13903743315508e-06,
|
|
"loss": 1.1326,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.10382659887610382,
|
|
"grad_norm": 4.992558479309082,
|
|
"learning_rate": 4.149732620320856e-06,
|
|
"loss": 1.2047,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.10409419320310409,
|
|
"grad_norm": 4.973186016082764,
|
|
"learning_rate": 4.160427807486631e-06,
|
|
"loss": 1.2083,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.10436178753010436,
|
|
"grad_norm": 5.174978733062744,
|
|
"learning_rate": 4.171122994652407e-06,
|
|
"loss": 1.1953,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.10462938185710463,
|
|
"grad_norm": 5.181015968322754,
|
|
"learning_rate": 4.181818181818182e-06,
|
|
"loss": 1.1337,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.1048969761841049,
|
|
"grad_norm": 5.914229869842529,
|
|
"learning_rate": 4.192513368983958e-06,
|
|
"loss": 1.3395,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.10516457051110517,
|
|
"grad_norm": 5.254291534423828,
|
|
"learning_rate": 4.203208556149733e-06,
|
|
"loss": 1.2202,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.10543216483810543,
|
|
"grad_norm": 5.055797100067139,
|
|
"learning_rate": 4.213903743315508e-06,
|
|
"loss": 1.1474,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.1056997591651057,
|
|
"grad_norm": 4.354243755340576,
|
|
"learning_rate": 4.224598930481284e-06,
|
|
"loss": 0.958,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.10596735349210597,
|
|
"grad_norm": 4.838346004486084,
|
|
"learning_rate": 4.235294117647059e-06,
|
|
"loss": 1.1825,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.10623494781910624,
|
|
"grad_norm": 4.711790561676025,
|
|
"learning_rate": 4.245989304812835e-06,
|
|
"loss": 1.196,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.10650254214610651,
|
|
"grad_norm": 4.71934175491333,
|
|
"learning_rate": 4.25668449197861e-06,
|
|
"loss": 1.0371,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.10677013647310676,
|
|
"grad_norm": 4.441000938415527,
|
|
"learning_rate": 4.267379679144386e-06,
|
|
"loss": 1.1646,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.10703773080010703,
|
|
"grad_norm": 5.28547477722168,
|
|
"learning_rate": 4.2780748663101604e-06,
|
|
"loss": 1.2089,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.1073053251271073,
|
|
"grad_norm": 4.684313774108887,
|
|
"learning_rate": 4.288770053475936e-06,
|
|
"loss": 1.1529,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.10757291945410757,
|
|
"grad_norm": 4.92221212387085,
|
|
"learning_rate": 4.2994652406417115e-06,
|
|
"loss": 1.1109,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.10784051378110784,
|
|
"grad_norm": 4.630762100219727,
|
|
"learning_rate": 4.310160427807487e-06,
|
|
"loss": 1.2035,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.10810810810810811,
|
|
"grad_norm": 5.12864875793457,
|
|
"learning_rate": 4.320855614973263e-06,
|
|
"loss": 1.2126,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.10837570243510837,
|
|
"grad_norm": 5.339291095733643,
|
|
"learning_rate": 4.331550802139038e-06,
|
|
"loss": 1.2983,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.10864329676210864,
|
|
"grad_norm": 5.000173568725586,
|
|
"learning_rate": 4.342245989304813e-06,
|
|
"loss": 1.2949,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.10891089108910891,
|
|
"grad_norm": 5.139687538146973,
|
|
"learning_rate": 4.352941176470588e-06,
|
|
"loss": 1.1753,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.10917848541610918,
|
|
"grad_norm": 5.229654788970947,
|
|
"learning_rate": 4.363636363636364e-06,
|
|
"loss": 1.1499,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.10944607974310945,
|
|
"grad_norm": 4.853805065155029,
|
|
"learning_rate": 4.3743315508021394e-06,
|
|
"loss": 1.2261,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.1097136740701097,
|
|
"grad_norm": 5.141970157623291,
|
|
"learning_rate": 4.385026737967915e-06,
|
|
"loss": 1.2721,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.10998126839710998,
|
|
"grad_norm": 5.1554436683654785,
|
|
"learning_rate": 4.3957219251336905e-06,
|
|
"loss": 1.2238,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.11024886272411025,
|
|
"grad_norm": 5.058832168579102,
|
|
"learning_rate": 4.406417112299465e-06,
|
|
"loss": 1.2816,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.11051645705111052,
|
|
"grad_norm": 4.609223365783691,
|
|
"learning_rate": 4.417112299465241e-06,
|
|
"loss": 1.1348,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.11078405137811079,
|
|
"grad_norm": 5.325019359588623,
|
|
"learning_rate": 4.427807486631016e-06,
|
|
"loss": 1.1162,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.11105164570511106,
|
|
"grad_norm": 4.600208759307861,
|
|
"learning_rate": 4.438502673796792e-06,
|
|
"loss": 1.0443,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.11131924003211131,
|
|
"grad_norm": 5.451298236846924,
|
|
"learning_rate": 4.449197860962567e-06,
|
|
"loss": 1.197,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.11158683435911158,
|
|
"grad_norm": 5.0797505378723145,
|
|
"learning_rate": 4.459893048128343e-06,
|
|
"loss": 1.2068,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.11185442868611185,
|
|
"grad_norm": 5.17997932434082,
|
|
"learning_rate": 4.4705882352941184e-06,
|
|
"loss": 1.137,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.11212202301311212,
|
|
"grad_norm": 5.312300682067871,
|
|
"learning_rate": 4.481283422459893e-06,
|
|
"loss": 1.176,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.1123896173401124,
|
|
"grad_norm": 4.956272602081299,
|
|
"learning_rate": 4.491978609625669e-06,
|
|
"loss": 1.1349,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.11265721166711266,
|
|
"grad_norm": 4.7235517501831055,
|
|
"learning_rate": 4.502673796791444e-06,
|
|
"loss": 1.1691,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.11292480599411292,
|
|
"grad_norm": 4.887537956237793,
|
|
"learning_rate": 4.51336898395722e-06,
|
|
"loss": 1.1562,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.11319240032111319,
|
|
"grad_norm": 4.688408851623535,
|
|
"learning_rate": 4.524064171122995e-06,
|
|
"loss": 1.1013,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.11345999464811346,
|
|
"grad_norm": 5.215854644775391,
|
|
"learning_rate": 4.534759358288771e-06,
|
|
"loss": 1.1452,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.11372758897511373,
|
|
"grad_norm": 5.092518329620361,
|
|
"learning_rate": 4.5454545454545455e-06,
|
|
"loss": 1.274,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.113995183302114,
|
|
"grad_norm": 4.888270854949951,
|
|
"learning_rate": 4.556149732620321e-06,
|
|
"loss": 1.1974,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.11426277762911426,
|
|
"grad_norm": 5.114696979522705,
|
|
"learning_rate": 4.566844919786097e-06,
|
|
"loss": 1.1434,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.11453037195611453,
|
|
"grad_norm": 5.443094730377197,
|
|
"learning_rate": 4.577540106951872e-06,
|
|
"loss": 1.3022,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.1147979662831148,
|
|
"grad_norm": 4.617439270019531,
|
|
"learning_rate": 4.588235294117647e-06,
|
|
"loss": 1.1046,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.11506556061011507,
|
|
"grad_norm": 5.151831150054932,
|
|
"learning_rate": 4.598930481283423e-06,
|
|
"loss": 1.1525,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.11533315493711534,
|
|
"grad_norm": 4.646505355834961,
|
|
"learning_rate": 4.609625668449198e-06,
|
|
"loss": 1.0613,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.11560074926411561,
|
|
"grad_norm": 4.780506610870361,
|
|
"learning_rate": 4.6203208556149734e-06,
|
|
"loss": 1.0874,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.11586834359111586,
|
|
"grad_norm": 4.499149322509766,
|
|
"learning_rate": 4.631016042780749e-06,
|
|
"loss": 1.0649,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.11613593791811613,
|
|
"grad_norm": 5.027551651000977,
|
|
"learning_rate": 4.6417112299465245e-06,
|
|
"loss": 1.1688,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.1164035322451164,
|
|
"grad_norm": 4.565614223480225,
|
|
"learning_rate": 4.6524064171123e-06,
|
|
"loss": 1.1468,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.11667112657211667,
|
|
"grad_norm": 4.508991241455078,
|
|
"learning_rate": 4.663101604278076e-06,
|
|
"loss": 1.1169,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.11693872089911694,
|
|
"grad_norm": 4.733094692230225,
|
|
"learning_rate": 4.673796791443851e-06,
|
|
"loss": 1.145,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.1172063152261172,
|
|
"grad_norm": 4.995217323303223,
|
|
"learning_rate": 4.684491978609626e-06,
|
|
"loss": 1.1946,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.11747390955311747,
|
|
"grad_norm": 4.931241512298584,
|
|
"learning_rate": 4.695187165775401e-06,
|
|
"loss": 1.0798,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.11774150388011774,
|
|
"grad_norm": 4.939948558807373,
|
|
"learning_rate": 4.705882352941177e-06,
|
|
"loss": 1.3138,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.11800909820711801,
|
|
"grad_norm": 5.553315162658691,
|
|
"learning_rate": 4.7165775401069524e-06,
|
|
"loss": 1.2837,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.11827669253411828,
|
|
"grad_norm": 5.024171829223633,
|
|
"learning_rate": 4.727272727272728e-06,
|
|
"loss": 1.0769,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.11854428686111855,
|
|
"grad_norm": 4.998294353485107,
|
|
"learning_rate": 4.7379679144385035e-06,
|
|
"loss": 1.2191,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.1188118811881188,
|
|
"grad_norm": 5.217951774597168,
|
|
"learning_rate": 4.748663101604278e-06,
|
|
"loss": 1.3233,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.11907947551511908,
|
|
"grad_norm": 4.932075500488281,
|
|
"learning_rate": 4.759358288770054e-06,
|
|
"loss": 1.2598,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.11934706984211935,
|
|
"grad_norm": 4.477123260498047,
|
|
"learning_rate": 4.770053475935829e-06,
|
|
"loss": 1.0329,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.11961466416911962,
|
|
"grad_norm": 4.998135566711426,
|
|
"learning_rate": 4.780748663101605e-06,
|
|
"loss": 1.1807,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.11988225849611989,
|
|
"grad_norm": 5.117345333099365,
|
|
"learning_rate": 4.7914438502673795e-06,
|
|
"loss": 1.1254,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.12014985282312014,
|
|
"grad_norm": 4.747807025909424,
|
|
"learning_rate": 4.802139037433156e-06,
|
|
"loss": 1.0701,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.12041744715012041,
|
|
"grad_norm": 4.674474716186523,
|
|
"learning_rate": 4.812834224598931e-06,
|
|
"loss": 1.2146,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.12068504147712068,
|
|
"grad_norm": 5.200889587402344,
|
|
"learning_rate": 4.823529411764706e-06,
|
|
"loss": 1.1634,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.12095263580412095,
|
|
"grad_norm": 4.857826232910156,
|
|
"learning_rate": 4.834224598930482e-06,
|
|
"loss": 1.13,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.12122023013112122,
|
|
"grad_norm": 4.851617336273193,
|
|
"learning_rate": 4.844919786096257e-06,
|
|
"loss": 1.1596,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.1214878244581215,
|
|
"grad_norm": 4.770223617553711,
|
|
"learning_rate": 4.855614973262032e-06,
|
|
"loss": 1.1326,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.12175541878512175,
|
|
"grad_norm": 5.090690612792969,
|
|
"learning_rate": 4.866310160427808e-06,
|
|
"loss": 1.1485,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.12202301311212202,
|
|
"grad_norm": 4.741364002227783,
|
|
"learning_rate": 4.877005347593583e-06,
|
|
"loss": 1.2239,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.12229060743912229,
|
|
"grad_norm": 4.698870658874512,
|
|
"learning_rate": 4.8877005347593585e-06,
|
|
"loss": 1.1703,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.12255820176612256,
|
|
"grad_norm": 5.272980213165283,
|
|
"learning_rate": 4.898395721925134e-06,
|
|
"loss": 1.1262,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.12282579609312283,
|
|
"grad_norm": 4.762371063232422,
|
|
"learning_rate": 4.90909090909091e-06,
|
|
"loss": 1.1365,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.1230933904201231,
|
|
"grad_norm": 4.594496726989746,
|
|
"learning_rate": 4.919786096256685e-06,
|
|
"loss": 1.1921,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.12336098474712336,
|
|
"grad_norm": 4.638429164886475,
|
|
"learning_rate": 4.93048128342246e-06,
|
|
"loss": 1.0768,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.12362857907412363,
|
|
"grad_norm": 5.253578186035156,
|
|
"learning_rate": 4.941176470588236e-06,
|
|
"loss": 1.2366,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.1238961734011239,
|
|
"grad_norm": 5.03195858001709,
|
|
"learning_rate": 4.951871657754011e-06,
|
|
"loss": 1.1402,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.12416376772812417,
|
|
"grad_norm": 4.741814136505127,
|
|
"learning_rate": 4.9625668449197864e-06,
|
|
"loss": 1.038,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.12443136205512444,
|
|
"grad_norm": 5.368718147277832,
|
|
"learning_rate": 4.973262032085562e-06,
|
|
"loss": 1.2952,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.12469895638212469,
|
|
"grad_norm": 4.668884754180908,
|
|
"learning_rate": 4.9839572192513375e-06,
|
|
"loss": 1.1318,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.12496655070912496,
|
|
"grad_norm": 5.185303688049316,
|
|
"learning_rate": 4.994652406417112e-06,
|
|
"loss": 1.1945,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.12523414503612523,
|
|
"grad_norm": 4.929427623748779,
|
|
"learning_rate": 5.005347593582888e-06,
|
|
"loss": 1.2231,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.1255017393631255,
|
|
"grad_norm": 4.767603397369385,
|
|
"learning_rate": 5.016042780748663e-06,
|
|
"loss": 1.2551,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.12576933369012577,
|
|
"grad_norm": 5.308717250823975,
|
|
"learning_rate": 5.026737967914439e-06,
|
|
"loss": 1.1529,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.12603692801712604,
|
|
"grad_norm": 4.83845329284668,
|
|
"learning_rate": 5.037433155080214e-06,
|
|
"loss": 1.2712,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.1263045223441263,
|
|
"grad_norm": 4.922050476074219,
|
|
"learning_rate": 5.048128342245989e-06,
|
|
"loss": 1.0939,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.12657211667112658,
|
|
"grad_norm": 4.888375282287598,
|
|
"learning_rate": 5.058823529411765e-06,
|
|
"loss": 1.1761,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.12683971099812685,
|
|
"grad_norm": 4.710062026977539,
|
|
"learning_rate": 5.069518716577541e-06,
|
|
"loss": 1.236,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.1271073053251271,
|
|
"grad_norm": 5.260262966156006,
|
|
"learning_rate": 5.0802139037433165e-06,
|
|
"loss": 1.2279,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.12737489965212737,
|
|
"grad_norm": 4.409514904022217,
|
|
"learning_rate": 5.090909090909091e-06,
|
|
"loss": 1.2382,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.12764249397912764,
|
|
"grad_norm": 4.516629695892334,
|
|
"learning_rate": 5.101604278074867e-06,
|
|
"loss": 0.9502,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.1279100883061279,
|
|
"grad_norm": 5.594369888305664,
|
|
"learning_rate": 5.112299465240642e-06,
|
|
"loss": 1.1869,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.12817768263312818,
|
|
"grad_norm": 4.493462562561035,
|
|
"learning_rate": 5.122994652406418e-06,
|
|
"loss": 1.0508,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.12844527696012845,
|
|
"grad_norm": 4.775510787963867,
|
|
"learning_rate": 5.133689839572193e-06,
|
|
"loss": 1.2422,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.12871287128712872,
|
|
"grad_norm": 4.813394546508789,
|
|
"learning_rate": 5.144385026737968e-06,
|
|
"loss": 1.1887,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.128980465614129,
|
|
"grad_norm": 4.693298816680908,
|
|
"learning_rate": 5.155080213903744e-06,
|
|
"loss": 1.0501,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.12924805994112926,
|
|
"grad_norm": 4.395559787750244,
|
|
"learning_rate": 5.165775401069519e-06,
|
|
"loss": 1.1917,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.12951565426812953,
|
|
"grad_norm": 5.032355785369873,
|
|
"learning_rate": 5.176470588235295e-06,
|
|
"loss": 1.2152,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.1297832485951298,
|
|
"grad_norm": 4.638949871063232,
|
|
"learning_rate": 5.187165775401069e-06,
|
|
"loss": 1.0553,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.13005084292213004,
|
|
"grad_norm": 4.831664562225342,
|
|
"learning_rate": 5.197860962566845e-06,
|
|
"loss": 1.0504,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.1303184372491303,
|
|
"grad_norm": 5.181875705718994,
|
|
"learning_rate": 5.208556149732621e-06,
|
|
"loss": 1.2088,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.13058603157613058,
|
|
"grad_norm": 5.028466701507568,
|
|
"learning_rate": 5.219251336898397e-06,
|
|
"loss": 1.0759,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.13085362590313085,
|
|
"grad_norm": 4.613313674926758,
|
|
"learning_rate": 5.2299465240641715e-06,
|
|
"loss": 1.2305,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.13112122023013112,
|
|
"grad_norm": 4.535508155822754,
|
|
"learning_rate": 5.240641711229947e-06,
|
|
"loss": 1.1403,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.1313888145571314,
|
|
"grad_norm": 4.558447360992432,
|
|
"learning_rate": 5.251336898395723e-06,
|
|
"loss": 1.1301,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.13165640888413166,
|
|
"grad_norm": 4.6473588943481445,
|
|
"learning_rate": 5.262032085561498e-06,
|
|
"loss": 1.0308,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.13192400321113193,
|
|
"grad_norm": 4.9026198387146,
|
|
"learning_rate": 5.272727272727273e-06,
|
|
"loss": 1.3224,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.1321915975381322,
|
|
"grad_norm": 4.340352535247803,
|
|
"learning_rate": 5.283422459893048e-06,
|
|
"loss": 1.1216,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.13245919186513247,
|
|
"grad_norm": 4.947085857391357,
|
|
"learning_rate": 5.294117647058824e-06,
|
|
"loss": 1.3081,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.13272678619213274,
|
|
"grad_norm": 5.271705627441406,
|
|
"learning_rate": 5.3048128342245995e-06,
|
|
"loss": 1.2907,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.13299438051913298,
|
|
"grad_norm": 4.9826507568359375,
|
|
"learning_rate": 5.315508021390374e-06,
|
|
"loss": 1.1883,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.13326197484613325,
|
|
"grad_norm": 4.606426239013672,
|
|
"learning_rate": 5.32620320855615e-06,
|
|
"loss": 1.129,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.13352956917313352,
|
|
"grad_norm": 4.6019392013549805,
|
|
"learning_rate": 5.336898395721925e-06,
|
|
"loss": 1.1887,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.1337971635001338,
|
|
"grad_norm": 5.553493976593018,
|
|
"learning_rate": 5.347593582887702e-06,
|
|
"loss": 1.2985,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.1337971635001338,
|
|
"eval_loss": 1.1709299087524414,
|
|
"eval_runtime": 11.4546,
|
|
"eval_samples_per_second": 34.92,
|
|
"eval_steps_per_second": 4.365,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.13406475782713406,
|
|
"grad_norm": 4.656076431274414,
|
|
"learning_rate": 5.358288770053477e-06,
|
|
"loss": 1.1667,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.13433235215413433,
|
|
"grad_norm": 4.8764543533325195,
|
|
"learning_rate": 5.368983957219252e-06,
|
|
"loss": 1.1945,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.1345999464811346,
|
|
"grad_norm": 4.712137699127197,
|
|
"learning_rate": 5.379679144385027e-06,
|
|
"loss": 1.1168,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.13486754080813487,
|
|
"grad_norm": 4.951474666595459,
|
|
"learning_rate": 5.390374331550803e-06,
|
|
"loss": 1.1441,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.13513513513513514,
|
|
"grad_norm": 5.019460678100586,
|
|
"learning_rate": 5.4010695187165785e-06,
|
|
"loss": 1.2449,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.1354027294621354,
|
|
"grad_norm": 4.8025689125061035,
|
|
"learning_rate": 5.411764705882353e-06,
|
|
"loss": 1.062,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.13567032378913568,
|
|
"grad_norm": 4.835244655609131,
|
|
"learning_rate": 5.422459893048129e-06,
|
|
"loss": 1.0729,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.13593791811613593,
|
|
"grad_norm": 5.318262577056885,
|
|
"learning_rate": 5.433155080213904e-06,
|
|
"loss": 1.1688,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.1362055124431362,
|
|
"grad_norm": 4.434688568115234,
|
|
"learning_rate": 5.44385026737968e-06,
|
|
"loss": 1.0925,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.13647310677013647,
|
|
"grad_norm": 4.781643867492676,
|
|
"learning_rate": 5.4545454545454545e-06,
|
|
"loss": 1.192,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.13674070109713674,
|
|
"grad_norm": 4.806861877441406,
|
|
"learning_rate": 5.46524064171123e-06,
|
|
"loss": 1.2121,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.137008295424137,
|
|
"grad_norm": 4.502013206481934,
|
|
"learning_rate": 5.4759358288770055e-06,
|
|
"loss": 1.075,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.13727588975113728,
|
|
"grad_norm": 5.604802131652832,
|
|
"learning_rate": 5.486631016042782e-06,
|
|
"loss": 1.2734,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.13754348407813755,
|
|
"grad_norm": 5.166036128997803,
|
|
"learning_rate": 5.497326203208556e-06,
|
|
"loss": 1.1035,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.13781107840513782,
|
|
"grad_norm": 5.001628875732422,
|
|
"learning_rate": 5.508021390374332e-06,
|
|
"loss": 1.2467,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.13807867273213809,
|
|
"grad_norm": 4.5005693435668945,
|
|
"learning_rate": 5.518716577540108e-06,
|
|
"loss": 1.1308,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.13834626705913836,
|
|
"grad_norm": 5.138829231262207,
|
|
"learning_rate": 5.529411764705883e-06,
|
|
"loss": 1.174,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.13861386138613863,
|
|
"grad_norm": 4.762211322784424,
|
|
"learning_rate": 5.540106951871658e-06,
|
|
"loss": 1.1242,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.13888145571313887,
|
|
"grad_norm": 5.18784761428833,
|
|
"learning_rate": 5.5508021390374335e-06,
|
|
"loss": 1.2918,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.13914905004013914,
|
|
"grad_norm": 4.61662483215332,
|
|
"learning_rate": 5.561497326203209e-06,
|
|
"loss": 1.1913,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.1394166443671394,
|
|
"grad_norm": 4.903599262237549,
|
|
"learning_rate": 5.5721925133689845e-06,
|
|
"loss": 1.3014,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.13968423869413968,
|
|
"grad_norm": 4.205623149871826,
|
|
"learning_rate": 5.58288770053476e-06,
|
|
"loss": 1.0465,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.13995183302113995,
|
|
"grad_norm": 3.9266059398651123,
|
|
"learning_rate": 5.593582887700535e-06,
|
|
"loss": 1.0034,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.14021942734814022,
|
|
"grad_norm": 5.096248626708984,
|
|
"learning_rate": 5.60427807486631e-06,
|
|
"loss": 1.2393,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.1404870216751405,
|
|
"grad_norm": 4.701903820037842,
|
|
"learning_rate": 5.614973262032086e-06,
|
|
"loss": 1.1296,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.14075461600214076,
|
|
"grad_norm": 4.736352443695068,
|
|
"learning_rate": 5.625668449197862e-06,
|
|
"loss": 1.1166,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.14102221032914103,
|
|
"grad_norm": 4.55366325378418,
|
|
"learning_rate": 5.636363636363636e-06,
|
|
"loss": 1.2381,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.1412898046561413,
|
|
"grad_norm": 4.388349533081055,
|
|
"learning_rate": 5.6470588235294125e-06,
|
|
"loss": 1.0277,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.14155739898314157,
|
|
"grad_norm": 4.596952438354492,
|
|
"learning_rate": 5.657754010695188e-06,
|
|
"loss": 0.9623,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.1418249933101418,
|
|
"grad_norm": 4.9525251388549805,
|
|
"learning_rate": 5.6684491978609635e-06,
|
|
"loss": 1.1406,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.14209258763714208,
|
|
"grad_norm": 4.623518466949463,
|
|
"learning_rate": 5.679144385026738e-06,
|
|
"loss": 1.0717,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.14236018196414235,
|
|
"grad_norm": 4.766755104064941,
|
|
"learning_rate": 5.689839572192514e-06,
|
|
"loss": 1.2016,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.14262777629114262,
|
|
"grad_norm": 4.868133068084717,
|
|
"learning_rate": 5.700534759358289e-06,
|
|
"loss": 1.1728,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.1428953706181429,
|
|
"grad_norm": 4.722245216369629,
|
|
"learning_rate": 5.711229946524065e-06,
|
|
"loss": 1.2795,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.14316296494514316,
|
|
"grad_norm": 4.916394233703613,
|
|
"learning_rate": 5.7219251336898395e-06,
|
|
"loss": 1.141,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.14343055927214343,
|
|
"grad_norm": 4.942296028137207,
|
|
"learning_rate": 5.732620320855615e-06,
|
|
"loss": 1.2017,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.1436981535991437,
|
|
"grad_norm": 4.585607051849365,
|
|
"learning_rate": 5.743315508021391e-06,
|
|
"loss": 1.2109,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.14396574792614397,
|
|
"grad_norm": 4.965005874633789,
|
|
"learning_rate": 5.754010695187167e-06,
|
|
"loss": 1.1582,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.14423334225314424,
|
|
"grad_norm": 5.032000541687012,
|
|
"learning_rate": 5.764705882352941e-06,
|
|
"loss": 1.0772,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.1445009365801445,
|
|
"grad_norm": 4.349190711975098,
|
|
"learning_rate": 5.775401069518717e-06,
|
|
"loss": 1.2469,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.14476853090714478,
|
|
"grad_norm": 4.369176387786865,
|
|
"learning_rate": 5.786096256684493e-06,
|
|
"loss": 1.131,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.14503612523414502,
|
|
"grad_norm": 4.241110324859619,
|
|
"learning_rate": 5.796791443850268e-06,
|
|
"loss": 0.9937,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.1453037195611453,
|
|
"grad_norm": 4.846850395202637,
|
|
"learning_rate": 5.807486631016043e-06,
|
|
"loss": 1.2059,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.14557131388814556,
|
|
"grad_norm": 5.102479457855225,
|
|
"learning_rate": 5.8181818181818185e-06,
|
|
"loss": 1.1612,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.14583890821514583,
|
|
"grad_norm": 4.706130027770996,
|
|
"learning_rate": 5.828877005347594e-06,
|
|
"loss": 1.1608,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.1461065025421461,
|
|
"grad_norm": 5.125561237335205,
|
|
"learning_rate": 5.83957219251337e-06,
|
|
"loss": 1.143,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.14637409686914638,
|
|
"grad_norm": 4.503932952880859,
|
|
"learning_rate": 5.850267379679145e-06,
|
|
"loss": 1.127,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.14664169119614665,
|
|
"grad_norm": 4.410585880279541,
|
|
"learning_rate": 5.86096256684492e-06,
|
|
"loss": 1.0207,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.14690928552314692,
|
|
"grad_norm": 4.253677845001221,
|
|
"learning_rate": 5.871657754010695e-06,
|
|
"loss": 1.0741,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.14717687985014719,
|
|
"grad_norm": 4.8487868309021,
|
|
"learning_rate": 5.882352941176471e-06,
|
|
"loss": 1.1049,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.14744447417714746,
|
|
"grad_norm": 5.069744110107422,
|
|
"learning_rate": 5.893048128342247e-06,
|
|
"loss": 1.2483,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.14771206850414773,
|
|
"grad_norm": 4.3907470703125,
|
|
"learning_rate": 5.903743315508021e-06,
|
|
"loss": 1.0139,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.14797966283114797,
|
|
"grad_norm": 5.0639142990112305,
|
|
"learning_rate": 5.9144385026737975e-06,
|
|
"loss": 1.2476,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.14824725715814824,
|
|
"grad_norm": 4.5384016036987305,
|
|
"learning_rate": 5.925133689839573e-06,
|
|
"loss": 1.1994,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.1485148514851485,
|
|
"grad_norm": 4.997219085693359,
|
|
"learning_rate": 5.935828877005349e-06,
|
|
"loss": 1.133,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.14878244581214878,
|
|
"grad_norm": 5.0610551834106445,
|
|
"learning_rate": 5.946524064171123e-06,
|
|
"loss": 1.3546,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.14905004013914905,
|
|
"grad_norm": 4.485021114349365,
|
|
"learning_rate": 5.957219251336899e-06,
|
|
"loss": 1.1425,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.14931763446614932,
|
|
"grad_norm": 4.8803229331970215,
|
|
"learning_rate": 5.967914438502674e-06,
|
|
"loss": 1.2082,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.1495852287931496,
|
|
"grad_norm": 4.79873514175415,
|
|
"learning_rate": 5.97860962566845e-06,
|
|
"loss": 1.225,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.14985282312014986,
|
|
"grad_norm": 4.734536170959473,
|
|
"learning_rate": 5.989304812834225e-06,
|
|
"loss": 1.1506,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.15012041744715013,
|
|
"grad_norm": 4.322850227355957,
|
|
"learning_rate": 6e-06,
|
|
"loss": 1.0387,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.1503880117741504,
|
|
"grad_norm": 4.721519947052002,
|
|
"learning_rate": 6.010695187165776e-06,
|
|
"loss": 1.0448,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.15065560610115067,
|
|
"grad_norm": 4.884403228759766,
|
|
"learning_rate": 6.021390374331551e-06,
|
|
"loss": 1.1416,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.1509232004281509,
|
|
"grad_norm": 5.24191427230835,
|
|
"learning_rate": 6.032085561497326e-06,
|
|
"loss": 1.2222,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.15119079475515118,
|
|
"grad_norm": 4.954929351806641,
|
|
"learning_rate": 6.0427807486631015e-06,
|
|
"loss": 1.2712,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.15145838908215145,
|
|
"grad_norm": 4.613723278045654,
|
|
"learning_rate": 6.053475935828878e-06,
|
|
"loss": 1.1032,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.15172598340915172,
|
|
"grad_norm": 4.400996685028076,
|
|
"learning_rate": 6.064171122994653e-06,
|
|
"loss": 1.091,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.151993577736152,
|
|
"grad_norm": 4.841631889343262,
|
|
"learning_rate": 6.074866310160429e-06,
|
|
"loss": 1.2592,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.15226117206315226,
|
|
"grad_norm": 5.009564399719238,
|
|
"learning_rate": 6.085561497326204e-06,
|
|
"loss": 1.1364,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.15252876639015253,
|
|
"grad_norm": 4.932076930999756,
|
|
"learning_rate": 6.096256684491979e-06,
|
|
"loss": 1.1759,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.1527963607171528,
|
|
"grad_norm": 5.142986297607422,
|
|
"learning_rate": 6.106951871657755e-06,
|
|
"loss": 1.2236,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.15306395504415307,
|
|
"grad_norm": 5.11539363861084,
|
|
"learning_rate": 6.11764705882353e-06,
|
|
"loss": 1.1623,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.15333154937115334,
|
|
"grad_norm": 4.659823417663574,
|
|
"learning_rate": 6.128342245989305e-06,
|
|
"loss": 1.2424,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.1535991436981536,
|
|
"grad_norm": 5.004172325134277,
|
|
"learning_rate": 6.1390374331550805e-06,
|
|
"loss": 1.2536,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.15386673802515385,
|
|
"grad_norm": 4.277651309967041,
|
|
"learning_rate": 6.149732620320856e-06,
|
|
"loss": 1.1239,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.15413433235215412,
|
|
"grad_norm": 4.292529582977295,
|
|
"learning_rate": 6.1604278074866315e-06,
|
|
"loss": 1.1876,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.1544019266791544,
|
|
"grad_norm": 4.799615859985352,
|
|
"learning_rate": 6.171122994652406e-06,
|
|
"loss": 1.1205,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.15466952100615466,
|
|
"grad_norm": 4.678570747375488,
|
|
"learning_rate": 6.181818181818182e-06,
|
|
"loss": 1.1356,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.15493711533315493,
|
|
"grad_norm": 4.6860246658325195,
|
|
"learning_rate": 6.192513368983958e-06,
|
|
"loss": 1.1228,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.1552047096601552,
|
|
"grad_norm": 4.289163112640381,
|
|
"learning_rate": 6.203208556149734e-06,
|
|
"loss": 1.0872,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.15547230398715547,
|
|
"grad_norm": 4.854632377624512,
|
|
"learning_rate": 6.213903743315508e-06,
|
|
"loss": 1.3285,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.15573989831415574,
|
|
"grad_norm": 5.2167253494262695,
|
|
"learning_rate": 6.224598930481284e-06,
|
|
"loss": 1.076,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.15600749264115601,
|
|
"grad_norm": 4.5353264808654785,
|
|
"learning_rate": 6.2352941176470595e-06,
|
|
"loss": 1.1824,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.15627508696815628,
|
|
"grad_norm": 4.770082950592041,
|
|
"learning_rate": 6.245989304812835e-06,
|
|
"loss": 1.0642,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.15654268129515655,
|
|
"grad_norm": 5.027703762054443,
|
|
"learning_rate": 6.25668449197861e-06,
|
|
"loss": 1.2616,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.1568102756221568,
|
|
"grad_norm": 4.812859058380127,
|
|
"learning_rate": 6.267379679144385e-06,
|
|
"loss": 1.2037,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.15707786994915707,
|
|
"grad_norm": 5.672885894775391,
|
|
"learning_rate": 6.278074866310161e-06,
|
|
"loss": 1.3296,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.15734546427615734,
|
|
"grad_norm": 4.318905830383301,
|
|
"learning_rate": 6.288770053475936e-06,
|
|
"loss": 1.0487,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.1576130586031576,
|
|
"grad_norm": 4.390570163726807,
|
|
"learning_rate": 6.299465240641713e-06,
|
|
"loss": 1.1174,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.15788065293015788,
|
|
"grad_norm": 5.302069664001465,
|
|
"learning_rate": 6.3101604278074865e-06,
|
|
"loss": 1.0765,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.15814824725715815,
|
|
"grad_norm": 5.114290237426758,
|
|
"learning_rate": 6.320855614973262e-06,
|
|
"loss": 1.2838,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.15841584158415842,
|
|
"grad_norm": 4.3737335205078125,
|
|
"learning_rate": 6.3315508021390385e-06,
|
|
"loss": 1.072,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.1586834359111587,
|
|
"grad_norm": 4.571005344390869,
|
|
"learning_rate": 6.342245989304814e-06,
|
|
"loss": 1.1507,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.15895103023815896,
|
|
"grad_norm": 4.546551704406738,
|
|
"learning_rate": 6.352941176470589e-06,
|
|
"loss": 1.1058,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.15921862456515923,
|
|
"grad_norm": 4.901880741119385,
|
|
"learning_rate": 6.363636363636364e-06,
|
|
"loss": 1.1524,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.1594862188921595,
|
|
"grad_norm": 4.456069469451904,
|
|
"learning_rate": 6.37433155080214e-06,
|
|
"loss": 1.1826,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.15975381321915974,
|
|
"grad_norm": 4.513467788696289,
|
|
"learning_rate": 6.385026737967915e-06,
|
|
"loss": 1.1069,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.16002140754616,
|
|
"grad_norm": 4.525417804718018,
|
|
"learning_rate": 6.39572192513369e-06,
|
|
"loss": 1.1583,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.16028900187316028,
|
|
"grad_norm": 4.3607177734375,
|
|
"learning_rate": 6.4064171122994655e-06,
|
|
"loss": 1.1446,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.16055659620016055,
|
|
"grad_norm": 4.644144058227539,
|
|
"learning_rate": 6.417112299465241e-06,
|
|
"loss": 1.1466,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16082419052716082,
|
|
"grad_norm": 4.352504730224609,
|
|
"learning_rate": 6.427807486631017e-06,
|
|
"loss": 1.1164,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.1610917848541611,
|
|
"grad_norm": 5.058422088623047,
|
|
"learning_rate": 6.438502673796791e-06,
|
|
"loss": 1.1716,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.16135937918116136,
|
|
"grad_norm": 4.505871772766113,
|
|
"learning_rate": 6.449197860962567e-06,
|
|
"loss": 1.1306,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.16162697350816163,
|
|
"grad_norm": 4.627199649810791,
|
|
"learning_rate": 6.459893048128343e-06,
|
|
"loss": 1.2105,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.1618945678351619,
|
|
"grad_norm": 5.190435409545898,
|
|
"learning_rate": 6.470588235294119e-06,
|
|
"loss": 1.2797,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.16216216216216217,
|
|
"grad_norm": 4.629772186279297,
|
|
"learning_rate": 6.4812834224598935e-06,
|
|
"loss": 1.0904,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.16242975648916244,
|
|
"grad_norm": 4.735287189483643,
|
|
"learning_rate": 6.491978609625669e-06,
|
|
"loss": 1.1999,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.16269735081616268,
|
|
"grad_norm": 5.2313008308410645,
|
|
"learning_rate": 6.5026737967914445e-06,
|
|
"loss": 1.3026,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.16296494514316295,
|
|
"grad_norm": 4.605459213256836,
|
|
"learning_rate": 6.51336898395722e-06,
|
|
"loss": 1.1164,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.16323253947016322,
|
|
"grad_norm": 4.5824480056762695,
|
|
"learning_rate": 6.524064171122996e-06,
|
|
"loss": 1.0526,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.1635001337971635,
|
|
"grad_norm": 4.864238739013672,
|
|
"learning_rate": 6.53475935828877e-06,
|
|
"loss": 1.1137,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.16376772812416376,
|
|
"grad_norm": 4.430417537689209,
|
|
"learning_rate": 6.545454545454546e-06,
|
|
"loss": 1.1407,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.16403532245116403,
|
|
"grad_norm": 4.643566131591797,
|
|
"learning_rate": 6.556149732620321e-06,
|
|
"loss": 1.115,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.1643029167781643,
|
|
"grad_norm": 5.602782249450684,
|
|
"learning_rate": 6.566844919786097e-06,
|
|
"loss": 1.4065,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.16457051110516457,
|
|
"grad_norm": 4.812868118286133,
|
|
"learning_rate": 6.577540106951872e-06,
|
|
"loss": 1.1505,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.16483810543216484,
|
|
"grad_norm": 4.6687235832214355,
|
|
"learning_rate": 6.588235294117647e-06,
|
|
"loss": 1.1733,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.16510569975916511,
|
|
"grad_norm": 4.8625264167785645,
|
|
"learning_rate": 6.5989304812834235e-06,
|
|
"loss": 1.14,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.16537329408616538,
|
|
"grad_norm": 5.044530868530273,
|
|
"learning_rate": 6.609625668449199e-06,
|
|
"loss": 1.2254,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.16564088841316565,
|
|
"grad_norm": 4.458752632141113,
|
|
"learning_rate": 6.620320855614974e-06,
|
|
"loss": 1.1927,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.1659084827401659,
|
|
"grad_norm": 4.7606377601623535,
|
|
"learning_rate": 6.631016042780749e-06,
|
|
"loss": 1.1916,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.16617607706716617,
|
|
"grad_norm": 5.007805824279785,
|
|
"learning_rate": 6.641711229946525e-06,
|
|
"loss": 1.2655,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.16644367139416644,
|
|
"grad_norm": 4.409674167633057,
|
|
"learning_rate": 6.6524064171123e-06,
|
|
"loss": 1.0725,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.1667112657211667,
|
|
"grad_norm": 4.561901569366455,
|
|
"learning_rate": 6.663101604278075e-06,
|
|
"loss": 1.1336,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.16697886004816698,
|
|
"grad_norm": 5.645256996154785,
|
|
"learning_rate": 6.673796791443851e-06,
|
|
"loss": 1.2585,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.16724645437516725,
|
|
"grad_norm": 5.0422139167785645,
|
|
"learning_rate": 6.684491978609626e-06,
|
|
"loss": 1.2117,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.16751404870216752,
|
|
"grad_norm": 5.541776180267334,
|
|
"learning_rate": 6.695187165775402e-06,
|
|
"loss": 1.2715,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.1677816430291678,
|
|
"grad_norm": 4.81757116317749,
|
|
"learning_rate": 6.705882352941176e-06,
|
|
"loss": 1.1431,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.16804923735616806,
|
|
"grad_norm": 5.481652736663818,
|
|
"learning_rate": 6.716577540106952e-06,
|
|
"loss": 1.174,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.16831683168316833,
|
|
"grad_norm": 4.777329444885254,
|
|
"learning_rate": 6.7272727272727275e-06,
|
|
"loss": 1.1886,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.1685844260101686,
|
|
"grad_norm": 4.763789176940918,
|
|
"learning_rate": 6.737967914438504e-06,
|
|
"loss": 1.1154,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.16885202033716884,
|
|
"grad_norm": 4.949760437011719,
|
|
"learning_rate": 6.748663101604279e-06,
|
|
"loss": 1.1888,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.1691196146641691,
|
|
"grad_norm": 4.344736099243164,
|
|
"learning_rate": 6.759358288770054e-06,
|
|
"loss": 1.2278,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.16938720899116938,
|
|
"grad_norm": 4.495877265930176,
|
|
"learning_rate": 6.77005347593583e-06,
|
|
"loss": 1.1668,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.16965480331816965,
|
|
"grad_norm": 4.895537853240967,
|
|
"learning_rate": 6.780748663101605e-06,
|
|
"loss": 1.3816,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.16992239764516992,
|
|
"grad_norm": 4.664587497711182,
|
|
"learning_rate": 6.791443850267381e-06,
|
|
"loss": 1.2055,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.1701899919721702,
|
|
"grad_norm": 4.564089775085449,
|
|
"learning_rate": 6.802139037433155e-06,
|
|
"loss": 1.1425,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.17045758629917046,
|
|
"grad_norm": 4.690885066986084,
|
|
"learning_rate": 6.812834224598931e-06,
|
|
"loss": 1.0908,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.17072518062617073,
|
|
"grad_norm": 4.54403018951416,
|
|
"learning_rate": 6.8235294117647065e-06,
|
|
"loss": 1.0681,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.170992774953171,
|
|
"grad_norm": 4.301973342895508,
|
|
"learning_rate": 6.834224598930482e-06,
|
|
"loss": 1.1184,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.17126036928017127,
|
|
"grad_norm": 4.822204113006592,
|
|
"learning_rate": 6.844919786096257e-06,
|
|
"loss": 1.1718,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.17152796360717154,
|
|
"grad_norm": 4.2204413414001465,
|
|
"learning_rate": 6.855614973262032e-06,
|
|
"loss": 1.141,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.17179555793417178,
|
|
"grad_norm": 4.727780818939209,
|
|
"learning_rate": 6.866310160427808e-06,
|
|
"loss": 1.2378,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.17206315226117205,
|
|
"grad_norm": 4.156445503234863,
|
|
"learning_rate": 6.877005347593584e-06,
|
|
"loss": 1.1066,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.17233074658817232,
|
|
"grad_norm": 4.479008197784424,
|
|
"learning_rate": 6.887700534759358e-06,
|
|
"loss": 1.171,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.1725983409151726,
|
|
"grad_norm": 4.782415866851807,
|
|
"learning_rate": 6.898395721925134e-06,
|
|
"loss": 1.1557,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.17286593524217286,
|
|
"grad_norm": 4.781481742858887,
|
|
"learning_rate": 6.90909090909091e-06,
|
|
"loss": 1.3044,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.17313352956917313,
|
|
"grad_norm": 4.513900279998779,
|
|
"learning_rate": 6.9197860962566855e-06,
|
|
"loss": 1.189,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.1734011238961734,
|
|
"grad_norm": 5.123539924621582,
|
|
"learning_rate": 6.93048128342246e-06,
|
|
"loss": 1.2388,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.17366871822317367,
|
|
"grad_norm": 5.24996280670166,
|
|
"learning_rate": 6.941176470588236e-06,
|
|
"loss": 1.2528,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.17393631255017394,
|
|
"grad_norm": 4.407766819000244,
|
|
"learning_rate": 6.951871657754011e-06,
|
|
"loss": 1.0828,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.1742039068771742,
|
|
"grad_norm": 4.964326858520508,
|
|
"learning_rate": 6.962566844919787e-06,
|
|
"loss": 1.1248,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.17447150120417448,
|
|
"grad_norm": 4.530794620513916,
|
|
"learning_rate": 6.9732620320855615e-06,
|
|
"loss": 1.1584,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.17473909553117473,
|
|
"grad_norm": 4.297457218170166,
|
|
"learning_rate": 6.983957219251337e-06,
|
|
"loss": 1.1548,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.175006689858175,
|
|
"grad_norm": 4.825823783874512,
|
|
"learning_rate": 6.9946524064171125e-06,
|
|
"loss": 1.2084,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.17527428418517527,
|
|
"grad_norm": 4.5333709716796875,
|
|
"learning_rate": 7.005347593582889e-06,
|
|
"loss": 1.1385,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.17554187851217554,
|
|
"grad_norm": 4.513311386108398,
|
|
"learning_rate": 7.0160427807486645e-06,
|
|
"loss": 1.1604,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.1758094728391758,
|
|
"grad_norm": 4.645889759063721,
|
|
"learning_rate": 7.026737967914438e-06,
|
|
"loss": 1.3132,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.17607706716617608,
|
|
"grad_norm": 4.844141006469727,
|
|
"learning_rate": 7.037433155080215e-06,
|
|
"loss": 1.1617,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.17634466149317635,
|
|
"grad_norm": 4.618659973144531,
|
|
"learning_rate": 7.04812834224599e-06,
|
|
"loss": 1.1599,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.17661225582017662,
|
|
"grad_norm": 4.780247688293457,
|
|
"learning_rate": 7.058823529411766e-06,
|
|
"loss": 1.2249,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.1768798501471769,
|
|
"grad_norm": 4.695610046386719,
|
|
"learning_rate": 7.0695187165775405e-06,
|
|
"loss": 1.2523,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.17714744447417716,
|
|
"grad_norm": 4.643034934997559,
|
|
"learning_rate": 7.080213903743316e-06,
|
|
"loss": 1.2802,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.17741503880117743,
|
|
"grad_norm": 4.363466739654541,
|
|
"learning_rate": 7.0909090909090916e-06,
|
|
"loss": 1.0768,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.17768263312817767,
|
|
"grad_norm": 4.794258117675781,
|
|
"learning_rate": 7.101604278074867e-06,
|
|
"loss": 1.2522,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.17795022745517794,
|
|
"grad_norm": 4.560819149017334,
|
|
"learning_rate": 7.112299465240642e-06,
|
|
"loss": 1.1901,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.1782178217821782,
|
|
"grad_norm": 4.56439733505249,
|
|
"learning_rate": 7.122994652406417e-06,
|
|
"loss": 1.1813,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.17848541610917848,
|
|
"grad_norm": 4.605260848999023,
|
|
"learning_rate": 7.133689839572193e-06,
|
|
"loss": 1.1981,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.17875301043617875,
|
|
"grad_norm": 4.7326483726501465,
|
|
"learning_rate": 7.144385026737969e-06,
|
|
"loss": 1.1832,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.17902060476317902,
|
|
"grad_norm": 4.547402858734131,
|
|
"learning_rate": 7.155080213903743e-06,
|
|
"loss": 1.0722,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.1792881990901793,
|
|
"grad_norm": 4.594086170196533,
|
|
"learning_rate": 7.1657754010695195e-06,
|
|
"loss": 1.1557,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.17955579341717956,
|
|
"grad_norm": 4.440776824951172,
|
|
"learning_rate": 7.176470588235295e-06,
|
|
"loss": 1.1161,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.17982338774417983,
|
|
"grad_norm": 5.013535976409912,
|
|
"learning_rate": 7.1871657754010706e-06,
|
|
"loss": 1.1546,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.1800909820711801,
|
|
"grad_norm": 5.5731000900268555,
|
|
"learning_rate": 7.197860962566845e-06,
|
|
"loss": 1.315,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.18035857639818037,
|
|
"grad_norm": 4.811005592346191,
|
|
"learning_rate": 7.208556149732621e-06,
|
|
"loss": 1.0571,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.1806261707251806,
|
|
"grad_norm": 4.496854782104492,
|
|
"learning_rate": 7.219251336898396e-06,
|
|
"loss": 1.0867,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.18089376505218088,
|
|
"grad_norm": 4.781049728393555,
|
|
"learning_rate": 7.229946524064172e-06,
|
|
"loss": 1.0135,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.18116135937918115,
|
|
"grad_norm": 4.150574684143066,
|
|
"learning_rate": 7.240641711229947e-06,
|
|
"loss": 1.1745,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.18142895370618142,
|
|
"grad_norm": 4.843429088592529,
|
|
"learning_rate": 7.251336898395722e-06,
|
|
"loss": 1.2394,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.1816965480331817,
|
|
"grad_norm": 4.525768280029297,
|
|
"learning_rate": 7.262032085561498e-06,
|
|
"loss": 1.2715,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.18196414236018196,
|
|
"grad_norm": 4.916580677032471,
|
|
"learning_rate": 7.272727272727273e-06,
|
|
"loss": 1.2347,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.18223173668718223,
|
|
"grad_norm": 4.803800106048584,
|
|
"learning_rate": 7.2834224598930496e-06,
|
|
"loss": 1.1586,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.1824993310141825,
|
|
"grad_norm": 4.679764747619629,
|
|
"learning_rate": 7.294117647058823e-06,
|
|
"loss": 1.268,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.18276692534118277,
|
|
"grad_norm": 4.965787410736084,
|
|
"learning_rate": 7.3048128342246e-06,
|
|
"loss": 1.1855,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.18303451966818304,
|
|
"grad_norm": 4.892383575439453,
|
|
"learning_rate": 7.315508021390375e-06,
|
|
"loss": 1.1683,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.1833021139951833,
|
|
"grad_norm": 4.476233005523682,
|
|
"learning_rate": 7.326203208556151e-06,
|
|
"loss": 1.1109,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.18356970832218358,
|
|
"grad_norm": 4.431989669799805,
|
|
"learning_rate": 7.3368983957219256e-06,
|
|
"loss": 1.1575,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.18383730264918383,
|
|
"grad_norm": 4.837761878967285,
|
|
"learning_rate": 7.347593582887701e-06,
|
|
"loss": 1.2535,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.1841048969761841,
|
|
"grad_norm": 4.285210132598877,
|
|
"learning_rate": 7.358288770053477e-06,
|
|
"loss": 1.0863,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.18437249130318437,
|
|
"grad_norm": 4.910134315490723,
|
|
"learning_rate": 7.368983957219252e-06,
|
|
"loss": 1.1904,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.18464008563018464,
|
|
"grad_norm": 4.774014472961426,
|
|
"learning_rate": 7.379679144385027e-06,
|
|
"loss": 1.1826,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.1849076799571849,
|
|
"grad_norm": 5.281838893890381,
|
|
"learning_rate": 7.390374331550802e-06,
|
|
"loss": 1.3556,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.18517527428418518,
|
|
"grad_norm": 4.740875244140625,
|
|
"learning_rate": 7.401069518716578e-06,
|
|
"loss": 1.3182,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.18544286861118545,
|
|
"grad_norm": 4.560650825500488,
|
|
"learning_rate": 7.4117647058823535e-06,
|
|
"loss": 1.2666,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.18571046293818572,
|
|
"grad_norm": 4.770612716674805,
|
|
"learning_rate": 7.422459893048128e-06,
|
|
"loss": 1.0551,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.185978057265186,
|
|
"grad_norm": 4.479051113128662,
|
|
"learning_rate": 7.433155080213904e-06,
|
|
"loss": 1.1718,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.18624565159218626,
|
|
"grad_norm": 4.537865161895752,
|
|
"learning_rate": 7.44385026737968e-06,
|
|
"loss": 1.0874,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.18651324591918653,
|
|
"grad_norm": 4.282291412353516,
|
|
"learning_rate": 7.454545454545456e-06,
|
|
"loss": 1.2062,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.18678084024618677,
|
|
"grad_norm": 4.386539459228516,
|
|
"learning_rate": 7.465240641711231e-06,
|
|
"loss": 1.313,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.18704843457318704,
|
|
"grad_norm": 4.664721488952637,
|
|
"learning_rate": 7.475935828877006e-06,
|
|
"loss": 1.2399,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.1873160289001873,
|
|
"grad_norm": 5.261703014373779,
|
|
"learning_rate": 7.486631016042781e-06,
|
|
"loss": 1.3199,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.18758362322718758,
|
|
"grad_norm": 4.195591449737549,
|
|
"learning_rate": 7.497326203208557e-06,
|
|
"loss": 1.035,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.18785121755418785,
|
|
"grad_norm": 4.815860271453857,
|
|
"learning_rate": 7.5080213903743325e-06,
|
|
"loss": 1.2643,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.18811881188118812,
|
|
"grad_norm": 5.00251579284668,
|
|
"learning_rate": 7.518716577540107e-06,
|
|
"loss": 1.1611,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.1883864062081884,
|
|
"grad_norm": 4.371436595916748,
|
|
"learning_rate": 7.529411764705883e-06,
|
|
"loss": 1.1734,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.18865400053518866,
|
|
"grad_norm": 4.646690368652344,
|
|
"learning_rate": 7.540106951871658e-06,
|
|
"loss": 1.1298,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.18892159486218893,
|
|
"grad_norm": 4.49533748626709,
|
|
"learning_rate": 7.550802139037434e-06,
|
|
"loss": 1.0886,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.1891891891891892,
|
|
"grad_norm": 4.740173816680908,
|
|
"learning_rate": 7.5614973262032085e-06,
|
|
"loss": 1.1291,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.18945678351618947,
|
|
"grad_norm": 4.919492721557617,
|
|
"learning_rate": 7.572192513368984e-06,
|
|
"loss": 1.1714,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.1897243778431897,
|
|
"grad_norm": 4.677563190460205,
|
|
"learning_rate": 7.58288770053476e-06,
|
|
"loss": 1.1417,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.18999197217018998,
|
|
"grad_norm": 4.335318088531494,
|
|
"learning_rate": 7.593582887700536e-06,
|
|
"loss": 1.0924,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.19025956649719025,
|
|
"grad_norm": 4.638528347015381,
|
|
"learning_rate": 7.604278074866311e-06,
|
|
"loss": 1.2186,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.19052716082419052,
|
|
"grad_norm": 4.537407398223877,
|
|
"learning_rate": 7.614973262032086e-06,
|
|
"loss": 1.1797,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.1907947551511908,
|
|
"grad_norm": 4.735195159912109,
|
|
"learning_rate": 7.625668449197862e-06,
|
|
"loss": 1.2728,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.19106234947819106,
|
|
"grad_norm": 4.434914588928223,
|
|
"learning_rate": 7.636363636363638e-06,
|
|
"loss": 1.1357,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.19132994380519133,
|
|
"grad_norm": 4.431911945343018,
|
|
"learning_rate": 7.647058823529411e-06,
|
|
"loss": 1.2785,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.1915975381321916,
|
|
"grad_norm": 4.211305618286133,
|
|
"learning_rate": 7.657754010695187e-06,
|
|
"loss": 1.1322,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.19186513245919187,
|
|
"grad_norm": 4.698652267456055,
|
|
"learning_rate": 7.668449197860964e-06,
|
|
"loss": 1.2302,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.19213272678619214,
|
|
"grad_norm": 4.491962909698486,
|
|
"learning_rate": 7.679144385026739e-06,
|
|
"loss": 1.1325,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.1924003211131924,
|
|
"grad_norm": 4.714018821716309,
|
|
"learning_rate": 7.689839572192515e-06,
|
|
"loss": 1.152,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.19266791544019266,
|
|
"grad_norm": 4.598504066467285,
|
|
"learning_rate": 7.70053475935829e-06,
|
|
"loss": 1.0786,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.19293550976719293,
|
|
"grad_norm": 4.4915008544921875,
|
|
"learning_rate": 7.711229946524064e-06,
|
|
"loss": 1.1606,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.1932031040941932,
|
|
"grad_norm": 4.305722236633301,
|
|
"learning_rate": 7.72192513368984e-06,
|
|
"loss": 1.1375,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.19347069842119347,
|
|
"grad_norm": 4.845047473907471,
|
|
"learning_rate": 7.732620320855615e-06,
|
|
"loss": 1.0849,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.19373829274819374,
|
|
"grad_norm": 4.809256553649902,
|
|
"learning_rate": 7.74331550802139e-06,
|
|
"loss": 1.0855,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.194005887075194,
|
|
"grad_norm": 5.05698823928833,
|
|
"learning_rate": 7.754010695187166e-06,
|
|
"loss": 1.215,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.19427348140219428,
|
|
"grad_norm": 4.64973258972168,
|
|
"learning_rate": 7.764705882352941e-06,
|
|
"loss": 1.1856,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.19454107572919455,
|
|
"grad_norm": 4.284728050231934,
|
|
"learning_rate": 7.775401069518718e-06,
|
|
"loss": 1.013,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.19480867005619482,
|
|
"grad_norm": 4.597956657409668,
|
|
"learning_rate": 7.786096256684492e-06,
|
|
"loss": 1.1597,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.19507626438319509,
|
|
"grad_norm": 4.795129299163818,
|
|
"learning_rate": 7.796791443850269e-06,
|
|
"loss": 1.26,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.19534385871019536,
|
|
"grad_norm": 4.353721618652344,
|
|
"learning_rate": 7.807486631016043e-06,
|
|
"loss": 1.1202,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.1956114530371956,
|
|
"grad_norm": 4.6432108879089355,
|
|
"learning_rate": 7.81818181818182e-06,
|
|
"loss": 1.1496,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.19587904736419587,
|
|
"grad_norm": 4.320937156677246,
|
|
"learning_rate": 7.828877005347594e-06,
|
|
"loss": 1.0743,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.19614664169119614,
|
|
"grad_norm": 4.268731594085693,
|
|
"learning_rate": 7.839572192513369e-06,
|
|
"loss": 1.0991,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.1964142360181964,
|
|
"grad_norm": 4.839014530181885,
|
|
"learning_rate": 7.850267379679145e-06,
|
|
"loss": 1.142,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.19668183034519668,
|
|
"grad_norm": 4.309354305267334,
|
|
"learning_rate": 7.86096256684492e-06,
|
|
"loss": 1.055,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.19694942467219695,
|
|
"grad_norm": 4.399764060974121,
|
|
"learning_rate": 7.871657754010695e-06,
|
|
"loss": 1.2,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.19721701899919722,
|
|
"grad_norm": 4.814887523651123,
|
|
"learning_rate": 7.882352941176471e-06,
|
|
"loss": 1.1129,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.1974846133261975,
|
|
"grad_norm": 4.662134647369385,
|
|
"learning_rate": 7.893048128342246e-06,
|
|
"loss": 1.3224,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.19775220765319776,
|
|
"grad_norm": 4.743928909301758,
|
|
"learning_rate": 7.903743315508022e-06,
|
|
"loss": 1.2364,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.19801980198019803,
|
|
"grad_norm": 4.6992716789245605,
|
|
"learning_rate": 7.914438502673799e-06,
|
|
"loss": 1.0913,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.1982873963071983,
|
|
"grad_norm": 4.529000759124756,
|
|
"learning_rate": 7.925133689839572e-06,
|
|
"loss": 1.1083,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.19855499063419854,
|
|
"grad_norm": 4.22991418838501,
|
|
"learning_rate": 7.935828877005348e-06,
|
|
"loss": 1.1632,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.1988225849611988,
|
|
"grad_norm": 4.685365676879883,
|
|
"learning_rate": 7.946524064171124e-06,
|
|
"loss": 1.1653,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.19909017928819908,
|
|
"grad_norm": 5.151124954223633,
|
|
"learning_rate": 7.9572192513369e-06,
|
|
"loss": 1.1468,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.19935777361519935,
|
|
"grad_norm": 4.344570636749268,
|
|
"learning_rate": 7.967914438502674e-06,
|
|
"loss": 1.084,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.19962536794219962,
|
|
"grad_norm": 4.775820255279541,
|
|
"learning_rate": 7.97860962566845e-06,
|
|
"loss": 1.0849,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.1998929622691999,
|
|
"grad_norm": 4.6123433113098145,
|
|
"learning_rate": 7.989304812834225e-06,
|
|
"loss": 1.1837,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.20016055659620016,
|
|
"grad_norm": 4.325228691101074,
|
|
"learning_rate": 8.000000000000001e-06,
|
|
"loss": 1.1404,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.20042815092320043,
|
|
"grad_norm": 4.531330108642578,
|
|
"learning_rate": 8.010695187165776e-06,
|
|
"loss": 1.4233,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.2006957452502007,
|
|
"grad_norm": 4.567444801330566,
|
|
"learning_rate": 8.02139037433155e-06,
|
|
"loss": 1.1898,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.20096333957720097,
|
|
"grad_norm": 4.629062175750732,
|
|
"learning_rate": 8.032085561497327e-06,
|
|
"loss": 1.2206,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.20123093390420124,
|
|
"grad_norm": 4.17169713973999,
|
|
"learning_rate": 8.042780748663103e-06,
|
|
"loss": 1.0784,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.20149852823120148,
|
|
"grad_norm": 4.538808345794678,
|
|
"learning_rate": 8.053475935828876e-06,
|
|
"loss": 1.2097,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.20176612255820175,
|
|
"grad_norm": 4.794569492340088,
|
|
"learning_rate": 8.064171122994653e-06,
|
|
"loss": 1.2594,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.20203371688520202,
|
|
"grad_norm": 4.9203972816467285,
|
|
"learning_rate": 8.07486631016043e-06,
|
|
"loss": 1.4261,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.2023013112122023,
|
|
"grad_norm": 4.924014091491699,
|
|
"learning_rate": 8.085561497326204e-06,
|
|
"loss": 1.1343,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.20256890553920257,
|
|
"grad_norm": 4.558595657348633,
|
|
"learning_rate": 8.096256684491979e-06,
|
|
"loss": 1.1778,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.20283649986620284,
|
|
"grad_norm": 4.965837478637695,
|
|
"learning_rate": 8.106951871657755e-06,
|
|
"loss": 1.406,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.2031040941932031,
|
|
"grad_norm": 4.6557207107543945,
|
|
"learning_rate": 8.11764705882353e-06,
|
|
"loss": 1.2242,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.20337168852020338,
|
|
"grad_norm": 4.143162250518799,
|
|
"learning_rate": 8.128342245989306e-06,
|
|
"loss": 1.0762,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.20363928284720365,
|
|
"grad_norm": 4.899580001831055,
|
|
"learning_rate": 8.13903743315508e-06,
|
|
"loss": 1.2824,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.20390687717420392,
|
|
"grad_norm": 4.938472270965576,
|
|
"learning_rate": 8.149732620320855e-06,
|
|
"loss": 1.3493,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.20417447150120419,
|
|
"grad_norm": 4.2447943687438965,
|
|
"learning_rate": 8.160427807486632e-06,
|
|
"loss": 1.1482,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.20444206582820446,
|
|
"grad_norm": 4.182919025421143,
|
|
"learning_rate": 8.171122994652407e-06,
|
|
"loss": 1.0402,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.2047096601552047,
|
|
"grad_norm": 4.690080165863037,
|
|
"learning_rate": 8.181818181818183e-06,
|
|
"loss": 1.3051,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.20497725448220497,
|
|
"grad_norm": 4.1133832931518555,
|
|
"learning_rate": 8.192513368983958e-06,
|
|
"loss": 1.0852,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.20524484880920524,
|
|
"grad_norm": 4.17720365524292,
|
|
"learning_rate": 8.203208556149734e-06,
|
|
"loss": 1.0509,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.2055124431362055,
|
|
"grad_norm": 4.893587589263916,
|
|
"learning_rate": 8.213903743315509e-06,
|
|
"loss": 1.2572,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.20578003746320578,
|
|
"grad_norm": 4.39441442489624,
|
|
"learning_rate": 8.224598930481285e-06,
|
|
"loss": 1.0804,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.20604763179020605,
|
|
"grad_norm": 4.479729652404785,
|
|
"learning_rate": 8.23529411764706e-06,
|
|
"loss": 1.129,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.20631522611720632,
|
|
"grad_norm": 4.792821407318115,
|
|
"learning_rate": 8.245989304812834e-06,
|
|
"loss": 1.3325,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.2065828204442066,
|
|
"grad_norm": 4.284221649169922,
|
|
"learning_rate": 8.256684491978611e-06,
|
|
"loss": 1.2608,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.20685041477120686,
|
|
"grad_norm": 4.049210071563721,
|
|
"learning_rate": 8.267379679144386e-06,
|
|
"loss": 1.1589,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.20711800909820713,
|
|
"grad_norm": 4.67439079284668,
|
|
"learning_rate": 8.27807486631016e-06,
|
|
"loss": 1.2496,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.2073856034252074,
|
|
"grad_norm": 4.5758843421936035,
|
|
"learning_rate": 8.288770053475937e-06,
|
|
"loss": 1.1285,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.20765319775220764,
|
|
"grad_norm": 4.632938861846924,
|
|
"learning_rate": 8.299465240641711e-06,
|
|
"loss": 1.3281,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.2079207920792079,
|
|
"grad_norm": 4.83327054977417,
|
|
"learning_rate": 8.310160427807488e-06,
|
|
"loss": 1.2126,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.20818838640620818,
|
|
"grad_norm": 4.5972137451171875,
|
|
"learning_rate": 8.320855614973262e-06,
|
|
"loss": 1.1829,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.20845598073320845,
|
|
"grad_norm": 4.194045543670654,
|
|
"learning_rate": 8.331550802139037e-06,
|
|
"loss": 1.1207,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.20872357506020872,
|
|
"grad_norm": 4.589977264404297,
|
|
"learning_rate": 8.342245989304813e-06,
|
|
"loss": 1.0869,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.208991169387209,
|
|
"grad_norm": 4.733802795410156,
|
|
"learning_rate": 8.35294117647059e-06,
|
|
"loss": 1.2378,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.20925876371420926,
|
|
"grad_norm": 4.47822380065918,
|
|
"learning_rate": 8.363636363636365e-06,
|
|
"loss": 1.1433,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.20952635804120953,
|
|
"grad_norm": 3.875276803970337,
|
|
"learning_rate": 8.37433155080214e-06,
|
|
"loss": 1.0709,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.2097939523682098,
|
|
"grad_norm": 4.252140522003174,
|
|
"learning_rate": 8.385026737967916e-06,
|
|
"loss": 1.1106,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.21006154669521007,
|
|
"grad_norm": 4.29549503326416,
|
|
"learning_rate": 8.39572192513369e-06,
|
|
"loss": 1.1556,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.21032914102221034,
|
|
"grad_norm": 4.358144760131836,
|
|
"learning_rate": 8.406417112299467e-06,
|
|
"loss": 1.053,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.21059673534921058,
|
|
"grad_norm": 4.602996826171875,
|
|
"learning_rate": 8.417112299465241e-06,
|
|
"loss": 1.2131,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.21086432967621085,
|
|
"grad_norm": 4.466192722320557,
|
|
"learning_rate": 8.427807486631016e-06,
|
|
"loss": 1.3099,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.21113192400321112,
|
|
"grad_norm": 4.629776954650879,
|
|
"learning_rate": 8.438502673796792e-06,
|
|
"loss": 1.176,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.2113995183302114,
|
|
"grad_norm": 4.807766437530518,
|
|
"learning_rate": 8.449197860962567e-06,
|
|
"loss": 1.2082,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.21166711265721166,
|
|
"grad_norm": 4.741950035095215,
|
|
"learning_rate": 8.459893048128342e-06,
|
|
"loss": 1.228,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.21193470698421193,
|
|
"grad_norm": 4.33003044128418,
|
|
"learning_rate": 8.470588235294118e-06,
|
|
"loss": 1.1136,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.2122023013112122,
|
|
"grad_norm": 4.555398941040039,
|
|
"learning_rate": 8.481283422459895e-06,
|
|
"loss": 1.2896,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.21246989563821247,
|
|
"grad_norm": 4.31208610534668,
|
|
"learning_rate": 8.49197860962567e-06,
|
|
"loss": 1.1487,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.21273748996521274,
|
|
"grad_norm": 4.462785243988037,
|
|
"learning_rate": 8.502673796791444e-06,
|
|
"loss": 1.1853,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.21300508429221301,
|
|
"grad_norm": 4.457045078277588,
|
|
"learning_rate": 8.51336898395722e-06,
|
|
"loss": 1.1769,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.21327267861921329,
|
|
"grad_norm": 4.705628871917725,
|
|
"learning_rate": 8.524064171122995e-06,
|
|
"loss": 1.1599,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.21354027294621353,
|
|
"grad_norm": 4.765135288238525,
|
|
"learning_rate": 8.534759358288771e-06,
|
|
"loss": 1.3051,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.2138078672732138,
|
|
"grad_norm": 4.394601345062256,
|
|
"learning_rate": 8.545454545454546e-06,
|
|
"loss": 1.1349,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.21407546160021407,
|
|
"grad_norm": 4.035240173339844,
|
|
"learning_rate": 8.556149732620321e-06,
|
|
"loss": 1.1051,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.21434305592721434,
|
|
"grad_norm": 4.072005271911621,
|
|
"learning_rate": 8.566844919786097e-06,
|
|
"loss": 1.1045,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.2146106502542146,
|
|
"grad_norm": 4.543212413787842,
|
|
"learning_rate": 8.577540106951872e-06,
|
|
"loss": 1.144,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.21487824458121488,
|
|
"grad_norm": 4.204556941986084,
|
|
"learning_rate": 8.588235294117647e-06,
|
|
"loss": 1.2146,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.21514583890821515,
|
|
"grad_norm": 3.9721314907073975,
|
|
"learning_rate": 8.598930481283423e-06,
|
|
"loss": 1.1586,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.21541343323521542,
|
|
"grad_norm": 3.9580788612365723,
|
|
"learning_rate": 8.609625668449198e-06,
|
|
"loss": 1.0575,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.2156810275622157,
|
|
"grad_norm": 4.39721155166626,
|
|
"learning_rate": 8.620320855614974e-06,
|
|
"loss": 1.2462,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.21594862188921596,
|
|
"grad_norm": 4.285038471221924,
|
|
"learning_rate": 8.63101604278075e-06,
|
|
"loss": 1.1557,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.21621621621621623,
|
|
"grad_norm": 4.489853382110596,
|
|
"learning_rate": 8.641711229946525e-06,
|
|
"loss": 1.3112,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.21648381054321647,
|
|
"grad_norm": 4.501437664031982,
|
|
"learning_rate": 8.6524064171123e-06,
|
|
"loss": 1.1479,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.21675140487021674,
|
|
"grad_norm": 4.656176567077637,
|
|
"learning_rate": 8.663101604278076e-06,
|
|
"loss": 1.2141,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.217018999197217,
|
|
"grad_norm": 4.57153844833374,
|
|
"learning_rate": 8.673796791443851e-06,
|
|
"loss": 1.1898,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.21728659352421728,
|
|
"grad_norm": 4.217146873474121,
|
|
"learning_rate": 8.684491978609626e-06,
|
|
"loss": 1.1277,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.21755418785121755,
|
|
"grad_norm": 5.0501227378845215,
|
|
"learning_rate": 8.695187165775402e-06,
|
|
"loss": 1.2793,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.21782178217821782,
|
|
"grad_norm": 4.156916618347168,
|
|
"learning_rate": 8.705882352941177e-06,
|
|
"loss": 1.0608,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.2180893765052181,
|
|
"grad_norm": 4.722466468811035,
|
|
"learning_rate": 8.716577540106953e-06,
|
|
"loss": 1.2908,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.21835697083221836,
|
|
"grad_norm": 4.382132053375244,
|
|
"learning_rate": 8.727272727272728e-06,
|
|
"loss": 1.1733,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.21862456515921863,
|
|
"grad_norm": 4.8200225830078125,
|
|
"learning_rate": 8.737967914438502e-06,
|
|
"loss": 1.3151,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.2188921594862189,
|
|
"grad_norm": 4.401098728179932,
|
|
"learning_rate": 8.748663101604279e-06,
|
|
"loss": 1.1091,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.21915975381321917,
|
|
"grad_norm": 4.914200305938721,
|
|
"learning_rate": 8.759358288770055e-06,
|
|
"loss": 1.3693,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.2194273481402194,
|
|
"grad_norm": 4.085461616516113,
|
|
"learning_rate": 8.77005347593583e-06,
|
|
"loss": 1.1384,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.21969494246721968,
|
|
"grad_norm": 3.852440595626831,
|
|
"learning_rate": 8.780748663101605e-06,
|
|
"loss": 1.1228,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.21996253679421995,
|
|
"grad_norm": 4.607455253601074,
|
|
"learning_rate": 8.791443850267381e-06,
|
|
"loss": 1.2419,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.22023013112122022,
|
|
"grad_norm": 4.384522438049316,
|
|
"learning_rate": 8.802139037433156e-06,
|
|
"loss": 1.3108,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.2204977254482205,
|
|
"grad_norm": 4.342321872711182,
|
|
"learning_rate": 8.81283422459893e-06,
|
|
"loss": 1.1794,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.22076531977522076,
|
|
"grad_norm": 4.432126045227051,
|
|
"learning_rate": 8.823529411764707e-06,
|
|
"loss": 1.1947,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.22103291410222103,
|
|
"grad_norm": 4.4877777099609375,
|
|
"learning_rate": 8.834224598930481e-06,
|
|
"loss": 1.2243,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.2213005084292213,
|
|
"grad_norm": 4.3614325523376465,
|
|
"learning_rate": 8.844919786096258e-06,
|
|
"loss": 1.2309,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.22156810275622157,
|
|
"grad_norm": 4.3788580894470215,
|
|
"learning_rate": 8.855614973262033e-06,
|
|
"loss": 1.1391,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.22183569708322184,
|
|
"grad_norm": 4.09984016418457,
|
|
"learning_rate": 8.866310160427807e-06,
|
|
"loss": 1.1843,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.22210329141022211,
|
|
"grad_norm": 4.093768119812012,
|
|
"learning_rate": 8.877005347593584e-06,
|
|
"loss": 1.1382,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.22237088573722238,
|
|
"grad_norm": 4.717266082763672,
|
|
"learning_rate": 8.88770053475936e-06,
|
|
"loss": 1.3751,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.22263848006422263,
|
|
"grad_norm": 4.382028579711914,
|
|
"learning_rate": 8.898395721925135e-06,
|
|
"loss": 1.2114,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.2229060743912229,
|
|
"grad_norm": 4.509121894836426,
|
|
"learning_rate": 8.90909090909091e-06,
|
|
"loss": 1.2096,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.22317366871822317,
|
|
"grad_norm": 4.2888078689575195,
|
|
"learning_rate": 8.919786096256686e-06,
|
|
"loss": 1.2023,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.22344126304522344,
|
|
"grad_norm": 3.797525405883789,
|
|
"learning_rate": 8.93048128342246e-06,
|
|
"loss": 1.1453,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.2237088573722237,
|
|
"grad_norm": 3.918774127960205,
|
|
"learning_rate": 8.941176470588237e-06,
|
|
"loss": 1.0776,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.22397645169922398,
|
|
"grad_norm": 4.301737308502197,
|
|
"learning_rate": 8.951871657754012e-06,
|
|
"loss": 1.1801,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.22424404602622425,
|
|
"grad_norm": 4.121411323547363,
|
|
"learning_rate": 8.962566844919786e-06,
|
|
"loss": 1.0812,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.22451164035322452,
|
|
"grad_norm": 4.318382740020752,
|
|
"learning_rate": 8.973262032085563e-06,
|
|
"loss": 1.1899,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.2247792346802248,
|
|
"grad_norm": 4.362233638763428,
|
|
"learning_rate": 8.983957219251337e-06,
|
|
"loss": 1.137,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.22504682900722506,
|
|
"grad_norm": 4.285608291625977,
|
|
"learning_rate": 8.994652406417112e-06,
|
|
"loss": 1.1922,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.22531442333422533,
|
|
"grad_norm": 4.41885232925415,
|
|
"learning_rate": 9.005347593582888e-06,
|
|
"loss": 1.2026,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.22558201766122557,
|
|
"grad_norm": 4.712429046630859,
|
|
"learning_rate": 9.016042780748663e-06,
|
|
"loss": 1.2112,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.22584961198822584,
|
|
"grad_norm": 3.9474940299987793,
|
|
"learning_rate": 9.02673796791444e-06,
|
|
"loss": 1.0856,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.2261172063152261,
|
|
"grad_norm": 4.865321159362793,
|
|
"learning_rate": 9.037433155080214e-06,
|
|
"loss": 1.2806,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.22638480064222638,
|
|
"grad_norm": 4.013378620147705,
|
|
"learning_rate": 9.04812834224599e-06,
|
|
"loss": 1.112,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.22665239496922665,
|
|
"grad_norm": 4.2192702293396,
|
|
"learning_rate": 9.058823529411765e-06,
|
|
"loss": 1.2246,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.22691998929622692,
|
|
"grad_norm": 4.709174633026123,
|
|
"learning_rate": 9.069518716577542e-06,
|
|
"loss": 1.2746,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.2271875836232272,
|
|
"grad_norm": 4.175418376922607,
|
|
"learning_rate": 9.080213903743316e-06,
|
|
"loss": 1.1651,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.22745517795022746,
|
|
"grad_norm": 4.398164749145508,
|
|
"learning_rate": 9.090909090909091e-06,
|
|
"loss": 1.2002,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.22772277227722773,
|
|
"grad_norm": 4.27931022644043,
|
|
"learning_rate": 9.101604278074867e-06,
|
|
"loss": 1.1041,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.227990366604228,
|
|
"grad_norm": 4.752706050872803,
|
|
"learning_rate": 9.112299465240642e-06,
|
|
"loss": 1.3382,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.22825796093122827,
|
|
"grad_norm": 4.658750057220459,
|
|
"learning_rate": 9.122994652406418e-06,
|
|
"loss": 1.2518,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.2285255552582285,
|
|
"grad_norm": 4.37801456451416,
|
|
"learning_rate": 9.133689839572193e-06,
|
|
"loss": 1.2284,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.22879314958522878,
|
|
"grad_norm": 4.360160827636719,
|
|
"learning_rate": 9.144385026737968e-06,
|
|
"loss": 1.107,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.22906074391222905,
|
|
"grad_norm": 4.552803993225098,
|
|
"learning_rate": 9.155080213903744e-06,
|
|
"loss": 1.2773,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.22932833823922932,
|
|
"grad_norm": 4.319884300231934,
|
|
"learning_rate": 9.16577540106952e-06,
|
|
"loss": 1.3304,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.2295959325662296,
|
|
"grad_norm": 4.7817840576171875,
|
|
"learning_rate": 9.176470588235294e-06,
|
|
"loss": 1.3394,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.22986352689322986,
|
|
"grad_norm": 3.597621202468872,
|
|
"learning_rate": 9.18716577540107e-06,
|
|
"loss": 1.0479,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.23013112122023013,
|
|
"grad_norm": 4.924500465393066,
|
|
"learning_rate": 9.197860962566846e-06,
|
|
"loss": 1.2405,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.2303987155472304,
|
|
"grad_norm": 4.659447193145752,
|
|
"learning_rate": 9.208556149732621e-06,
|
|
"loss": 1.2231,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.23066630987423067,
|
|
"grad_norm": 4.4317145347595215,
|
|
"learning_rate": 9.219251336898396e-06,
|
|
"loss": 1.2094,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.23093390420123094,
|
|
"grad_norm": 3.976191520690918,
|
|
"learning_rate": 9.229946524064172e-06,
|
|
"loss": 1.0144,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.23120149852823121,
|
|
"grad_norm": 4.48732852935791,
|
|
"learning_rate": 9.240641711229947e-06,
|
|
"loss": 1.0518,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.23146909285523146,
|
|
"grad_norm": 4.009017467498779,
|
|
"learning_rate": 9.251336898395723e-06,
|
|
"loss": 1.1445,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.23173668718223173,
|
|
"grad_norm": 4.176751136779785,
|
|
"learning_rate": 9.262032085561498e-06,
|
|
"loss": 1.1074,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.232004281509232,
|
|
"grad_norm": 4.7490763664245605,
|
|
"learning_rate": 9.272727272727273e-06,
|
|
"loss": 1.351,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.23227187583623227,
|
|
"grad_norm": 4.492088794708252,
|
|
"learning_rate": 9.283422459893049e-06,
|
|
"loss": 1.2427,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.23253947016323254,
|
|
"grad_norm": 3.9468204975128174,
|
|
"learning_rate": 9.294117647058824e-06,
|
|
"loss": 1.0236,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.2328070644902328,
|
|
"grad_norm": 4.703409194946289,
|
|
"learning_rate": 9.3048128342246e-06,
|
|
"loss": 1.1154,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.23307465881723308,
|
|
"grad_norm": 4.1995110511779785,
|
|
"learning_rate": 9.315508021390375e-06,
|
|
"loss": 1.1273,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.23334225314423335,
|
|
"grad_norm": 4.209486484527588,
|
|
"learning_rate": 9.326203208556151e-06,
|
|
"loss": 1.1375,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.23360984747123362,
|
|
"grad_norm": 3.9918205738067627,
|
|
"learning_rate": 9.336898395721926e-06,
|
|
"loss": 1.1212,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.2338774417982339,
|
|
"grad_norm": 4.315709114074707,
|
|
"learning_rate": 9.347593582887702e-06,
|
|
"loss": 1.1351,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.23414503612523416,
|
|
"grad_norm": 4.223841190338135,
|
|
"learning_rate": 9.358288770053477e-06,
|
|
"loss": 1.224,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.2344126304522344,
|
|
"grad_norm": 4.296685218811035,
|
|
"learning_rate": 9.368983957219252e-06,
|
|
"loss": 1.1524,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.23468022477923467,
|
|
"grad_norm": 4.791153430938721,
|
|
"learning_rate": 9.379679144385028e-06,
|
|
"loss": 1.315,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.23494781910623494,
|
|
"grad_norm": 4.414406776428223,
|
|
"learning_rate": 9.390374331550803e-06,
|
|
"loss": 1.2116,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.2352154134332352,
|
|
"grad_norm": 4.986870288848877,
|
|
"learning_rate": 9.401069518716577e-06,
|
|
"loss": 1.2292,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.23548300776023548,
|
|
"grad_norm": 4.407514572143555,
|
|
"learning_rate": 9.411764705882354e-06,
|
|
"loss": 1.3158,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.23575060208723575,
|
|
"grad_norm": 4.413543701171875,
|
|
"learning_rate": 9.422459893048129e-06,
|
|
"loss": 0.968,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.23601819641423602,
|
|
"grad_norm": 4.498653411865234,
|
|
"learning_rate": 9.433155080213905e-06,
|
|
"loss": 1.1329,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.2362857907412363,
|
|
"grad_norm": 4.2039313316345215,
|
|
"learning_rate": 9.44385026737968e-06,
|
|
"loss": 1.1976,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.23655338506823656,
|
|
"grad_norm": 4.075275421142578,
|
|
"learning_rate": 9.454545454545456e-06,
|
|
"loss": 1.1194,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.23682097939523683,
|
|
"grad_norm": 4.131809234619141,
|
|
"learning_rate": 9.46524064171123e-06,
|
|
"loss": 1.2058,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.2370885737222371,
|
|
"grad_norm": 4.1411824226379395,
|
|
"learning_rate": 9.475935828877007e-06,
|
|
"loss": 1.1203,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.23735616804923734,
|
|
"grad_norm": 5.270638942718506,
|
|
"learning_rate": 9.486631016042782e-06,
|
|
"loss": 1.2022,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.2376237623762376,
|
|
"grad_norm": 4.125979900360107,
|
|
"learning_rate": 9.497326203208556e-06,
|
|
"loss": 1.0407,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.23789135670323788,
|
|
"grad_norm": 4.254225730895996,
|
|
"learning_rate": 9.508021390374333e-06,
|
|
"loss": 1.1919,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.23815895103023815,
|
|
"grad_norm": 4.1460723876953125,
|
|
"learning_rate": 9.518716577540108e-06,
|
|
"loss": 1.1937,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.23842654535723842,
|
|
"grad_norm": 4.267801761627197,
|
|
"learning_rate": 9.529411764705882e-06,
|
|
"loss": 1.2268,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.2386941396842387,
|
|
"grad_norm": 4.095164775848389,
|
|
"learning_rate": 9.540106951871659e-06,
|
|
"loss": 1.2323,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.23896173401123896,
|
|
"grad_norm": 4.400330066680908,
|
|
"learning_rate": 9.550802139037433e-06,
|
|
"loss": 1.3224,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.23922932833823923,
|
|
"grad_norm": 4.906595706939697,
|
|
"learning_rate": 9.56149732620321e-06,
|
|
"loss": 1.3625,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.2394969226652395,
|
|
"grad_norm": 4.529881000518799,
|
|
"learning_rate": 9.572192513368986e-06,
|
|
"loss": 1.1608,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.23976451699223977,
|
|
"grad_norm": 4.229710102081299,
|
|
"learning_rate": 9.582887700534759e-06,
|
|
"loss": 1.1961,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.24003211131924004,
|
|
"grad_norm": 4.66829776763916,
|
|
"learning_rate": 9.593582887700535e-06,
|
|
"loss": 1.2154,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.24029970564624029,
|
|
"grad_norm": 4.366943836212158,
|
|
"learning_rate": 9.604278074866312e-06,
|
|
"loss": 1.1817,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.24056729997324056,
|
|
"grad_norm": 4.251003265380859,
|
|
"learning_rate": 9.614973262032087e-06,
|
|
"loss": 1.3212,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.24083489430024083,
|
|
"grad_norm": 5.345521450042725,
|
|
"learning_rate": 9.625668449197861e-06,
|
|
"loss": 1.1808,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.2411024886272411,
|
|
"grad_norm": 4.079299449920654,
|
|
"learning_rate": 9.636363636363638e-06,
|
|
"loss": 1.1816,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.24137008295424137,
|
|
"grad_norm": 4.181840896606445,
|
|
"learning_rate": 9.647058823529412e-06,
|
|
"loss": 1.141,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.24163767728124164,
|
|
"grad_norm": 4.736073017120361,
|
|
"learning_rate": 9.657754010695189e-06,
|
|
"loss": 1.0541,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 0.2419052716082419,
|
|
"grad_norm": 4.228132724761963,
|
|
"learning_rate": 9.668449197860963e-06,
|
|
"loss": 1.0897,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.24217286593524218,
|
|
"grad_norm": 4.429383277893066,
|
|
"learning_rate": 9.679144385026738e-06,
|
|
"loss": 1.1879,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.24244046026224245,
|
|
"grad_norm": 4.360840320587158,
|
|
"learning_rate": 9.689839572192514e-06,
|
|
"loss": 1.1828,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.24270805458924272,
|
|
"grad_norm": 4.852614879608154,
|
|
"learning_rate": 9.700534759358289e-06,
|
|
"loss": 1.2815,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 0.242975648916243,
|
|
"grad_norm": 4.6722846031188965,
|
|
"learning_rate": 9.711229946524064e-06,
|
|
"loss": 1.3035,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.24324324324324326,
|
|
"grad_norm": 4.601790904998779,
|
|
"learning_rate": 9.72192513368984e-06,
|
|
"loss": 1.2348,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 0.2435108375702435,
|
|
"grad_norm": 4.581474781036377,
|
|
"learning_rate": 9.732620320855617e-06,
|
|
"loss": 1.2717,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.24377843189724377,
|
|
"grad_norm": 4.073735237121582,
|
|
"learning_rate": 9.743315508021391e-06,
|
|
"loss": 1.2133,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 0.24404602622424404,
|
|
"grad_norm": 4.351081848144531,
|
|
"learning_rate": 9.754010695187166e-06,
|
|
"loss": 1.1797,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.2443136205512443,
|
|
"grad_norm": 3.7765159606933594,
|
|
"learning_rate": 9.764705882352942e-06,
|
|
"loss": 1.163,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 0.24458121487824458,
|
|
"grad_norm": 4.727344512939453,
|
|
"learning_rate": 9.775401069518717e-06,
|
|
"loss": 1.3226,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.24484880920524485,
|
|
"grad_norm": 4.661051273345947,
|
|
"learning_rate": 9.786096256684493e-06,
|
|
"loss": 1.1071,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.24511640353224512,
|
|
"grad_norm": 4.205208778381348,
|
|
"learning_rate": 9.796791443850268e-06,
|
|
"loss": 1.1642,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.2453839978592454,
|
|
"grad_norm": 4.339627265930176,
|
|
"learning_rate": 9.807486631016043e-06,
|
|
"loss": 1.0605,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 0.24565159218624566,
|
|
"grad_norm": 4.589977741241455,
|
|
"learning_rate": 9.81818181818182e-06,
|
|
"loss": 1.2584,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.24591918651324593,
|
|
"grad_norm": 4.377978801727295,
|
|
"learning_rate": 9.828877005347594e-06,
|
|
"loss": 1.303,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 0.2461867808402462,
|
|
"grad_norm": 4.110877513885498,
|
|
"learning_rate": 9.83957219251337e-06,
|
|
"loss": 1.1833,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.24645437516724644,
|
|
"grad_norm": 4.5038743019104,
|
|
"learning_rate": 9.850267379679145e-06,
|
|
"loss": 1.2471,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 0.2467219694942467,
|
|
"grad_norm": 4.485939025878906,
|
|
"learning_rate": 9.86096256684492e-06,
|
|
"loss": 1.1391,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.24698956382124698,
|
|
"grad_norm": 4.139279365539551,
|
|
"learning_rate": 9.871657754010696e-06,
|
|
"loss": 1.1146,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 0.24725715814824725,
|
|
"grad_norm": 3.7969651222229004,
|
|
"learning_rate": 9.882352941176472e-06,
|
|
"loss": 1.0658,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.24752475247524752,
|
|
"grad_norm": 3.978060483932495,
|
|
"learning_rate": 9.893048128342247e-06,
|
|
"loss": 1.1051,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.2477923468022478,
|
|
"grad_norm": 4.101005554199219,
|
|
"learning_rate": 9.903743315508022e-06,
|
|
"loss": 1.1725,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.24805994112924806,
|
|
"grad_norm": 3.89359188079834,
|
|
"learning_rate": 9.914438502673798e-06,
|
|
"loss": 1.1501,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 0.24832753545624833,
|
|
"grad_norm": 4.291905879974365,
|
|
"learning_rate": 9.925133689839573e-06,
|
|
"loss": 1.2188,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.2485951297832486,
|
|
"grad_norm": 4.537034034729004,
|
|
"learning_rate": 9.935828877005348e-06,
|
|
"loss": 1.2074,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 0.24886272411024887,
|
|
"grad_norm": 4.324453830718994,
|
|
"learning_rate": 9.946524064171124e-06,
|
|
"loss": 1.1561,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.24913031843724914,
|
|
"grad_norm": 4.010372638702393,
|
|
"learning_rate": 9.957219251336899e-06,
|
|
"loss": 1.156,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 0.24939791276424939,
|
|
"grad_norm": 4.636694431304932,
|
|
"learning_rate": 9.967914438502675e-06,
|
|
"loss": 1.3193,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.24966550709124966,
|
|
"grad_norm": 4.060527324676514,
|
|
"learning_rate": 9.97860962566845e-06,
|
|
"loss": 1.2258,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 0.24993310141824993,
|
|
"grad_norm": 4.463012218475342,
|
|
"learning_rate": 9.989304812834224e-06,
|
|
"loss": 1.1717,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.2502006957452502,
|
|
"grad_norm": 3.8592917919158936,
|
|
"learning_rate": 1e-05,
|
|
"loss": 1.1587,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.25046829007225047,
|
|
"grad_norm": 4.335379600524902,
|
|
"learning_rate": 9.999999921685345e-06,
|
|
"loss": 1.3002,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.25073588439925076,
|
|
"grad_norm": 4.3303680419921875,
|
|
"learning_rate": 9.999999686741384e-06,
|
|
"loss": 1.1696,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 0.251003478726251,
|
|
"grad_norm": 4.4580607414245605,
|
|
"learning_rate": 9.999999295168122e-06,
|
|
"loss": 1.2942,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.25127107305325125,
|
|
"grad_norm": 3.974984884262085,
|
|
"learning_rate": 9.999998746965573e-06,
|
|
"loss": 1.1459,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 0.25153866738025155,
|
|
"grad_norm": 4.089166164398193,
|
|
"learning_rate": 9.999998042133754e-06,
|
|
"loss": 1.2405,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.2518062617072518,
|
|
"grad_norm": 4.120057582855225,
|
|
"learning_rate": 9.999997180672684e-06,
|
|
"loss": 1.1968,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 0.2520738560342521,
|
|
"grad_norm": 3.840859889984131,
|
|
"learning_rate": 9.999996162582396e-06,
|
|
"loss": 1.144,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.25234145036125233,
|
|
"grad_norm": 4.502830505371094,
|
|
"learning_rate": 9.999994987862916e-06,
|
|
"loss": 1.2564,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 0.2526090446882526,
|
|
"grad_norm": 4.325287818908691,
|
|
"learning_rate": 9.999993656514284e-06,
|
|
"loss": 1.2646,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.25287663901525287,
|
|
"grad_norm": 4.583348751068115,
|
|
"learning_rate": 9.999992168536542e-06,
|
|
"loss": 1.1912,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.25314423334225317,
|
|
"grad_norm": 4.394077301025391,
|
|
"learning_rate": 9.999990523929734e-06,
|
|
"loss": 1.242,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.2534118276692534,
|
|
"grad_norm": 4.394894599914551,
|
|
"learning_rate": 9.999988722693914e-06,
|
|
"loss": 1.1904,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 0.2536794219962537,
|
|
"grad_norm": 4.380218982696533,
|
|
"learning_rate": 9.999986764829137e-06,
|
|
"loss": 1.3293,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.25394701632325395,
|
|
"grad_norm": 4.508794784545898,
|
|
"learning_rate": 9.999984650335468e-06,
|
|
"loss": 1.2141,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 0.2542146106502542,
|
|
"grad_norm": 4.407951831817627,
|
|
"learning_rate": 9.999982379212967e-06,
|
|
"loss": 1.1078,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.2544822049772545,
|
|
"grad_norm": 5.039391040802002,
|
|
"learning_rate": 9.99997995146171e-06,
|
|
"loss": 1.3326,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 0.25474979930425473,
|
|
"grad_norm": 4.499945640563965,
|
|
"learning_rate": 9.99997736708177e-06,
|
|
"loss": 1.3474,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.25501739363125503,
|
|
"grad_norm": 4.34948205947876,
|
|
"learning_rate": 9.99997462607323e-06,
|
|
"loss": 1.1468,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 0.25528498795825527,
|
|
"grad_norm": 4.453915596008301,
|
|
"learning_rate": 9.999971728436174e-06,
|
|
"loss": 1.4107,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.25555258228525557,
|
|
"grad_norm": 4.159339904785156,
|
|
"learning_rate": 9.999968674170697e-06,
|
|
"loss": 1.2531,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.2558201766122558,
|
|
"grad_norm": 4.395799160003662,
|
|
"learning_rate": 9.999965463276888e-06,
|
|
"loss": 1.372,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.2560877709392561,
|
|
"grad_norm": 4.233826637268066,
|
|
"learning_rate": 9.999962095754854e-06,
|
|
"loss": 1.1476,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 0.25635536526625635,
|
|
"grad_norm": 4.740029335021973,
|
|
"learning_rate": 9.999958571604697e-06,
|
|
"loss": 1.2241,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.25662295959325665,
|
|
"grad_norm": 4.2474846839904785,
|
|
"learning_rate": 9.999954890826528e-06,
|
|
"loss": 1.1625,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 0.2568905539202569,
|
|
"grad_norm": 4.043703556060791,
|
|
"learning_rate": 9.99995105342046e-06,
|
|
"loss": 1.1664,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.25715814824725713,
|
|
"grad_norm": 4.318393707275391,
|
|
"learning_rate": 9.99994705938662e-06,
|
|
"loss": 1.2221,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 0.25742574257425743,
|
|
"grad_norm": 4.372133731842041,
|
|
"learning_rate": 9.999942908725127e-06,
|
|
"loss": 1.1626,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.2576933369012577,
|
|
"grad_norm": 4.380350589752197,
|
|
"learning_rate": 9.999938601436111e-06,
|
|
"loss": 1.1594,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 0.257960931228258,
|
|
"grad_norm": 4.29257345199585,
|
|
"learning_rate": 9.999934137519711e-06,
|
|
"loss": 1.1906,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.2582285255552582,
|
|
"grad_norm": 3.729611873626709,
|
|
"learning_rate": 9.999929516976063e-06,
|
|
"loss": 0.9909,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.2584961198822585,
|
|
"grad_norm": 9.831878662109375,
|
|
"learning_rate": 9.999924739805313e-06,
|
|
"loss": 1.2233,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.25876371420925875,
|
|
"grad_norm": 4.3758544921875,
|
|
"learning_rate": 9.999919806007612e-06,
|
|
"loss": 1.3428,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 0.25903130853625905,
|
|
"grad_norm": 4.322572708129883,
|
|
"learning_rate": 9.999914715583114e-06,
|
|
"loss": 1.1024,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.2592989028632593,
|
|
"grad_norm": 4.245995998382568,
|
|
"learning_rate": 9.999909468531977e-06,
|
|
"loss": 1.1555,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 0.2595664971902596,
|
|
"grad_norm": 3.9678421020507812,
|
|
"learning_rate": 9.999904064854367e-06,
|
|
"loss": 1.0857,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.25983409151725984,
|
|
"grad_norm": 4.337567329406738,
|
|
"learning_rate": 9.999898504550452e-06,
|
|
"loss": 1.2654,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 0.2601016858442601,
|
|
"grad_norm": 4.172070503234863,
|
|
"learning_rate": 9.999892787620407e-06,
|
|
"loss": 1.1528,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.2603692801712604,
|
|
"grad_norm": 4.25397253036499,
|
|
"learning_rate": 9.999886914064411e-06,
|
|
"loss": 1.1656,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 0.2606368744982606,
|
|
"grad_norm": 4.268383979797363,
|
|
"learning_rate": 9.999880883882647e-06,
|
|
"loss": 1.1387,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.2609044688252609,
|
|
"grad_norm": 4.6072235107421875,
|
|
"learning_rate": 9.999874697075304e-06,
|
|
"loss": 1.1754,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.26117206315226116,
|
|
"grad_norm": 4.203128814697266,
|
|
"learning_rate": 9.999868353642579e-06,
|
|
"loss": 1.2258,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.26143965747926146,
|
|
"grad_norm": 3.7577922344207764,
|
|
"learning_rate": 9.999861853584666e-06,
|
|
"loss": 1.0342,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 0.2617072518062617,
|
|
"grad_norm": 4.237786769866943,
|
|
"learning_rate": 9.999855196901773e-06,
|
|
"loss": 1.2248,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.261974846133262,
|
|
"grad_norm": 4.332390785217285,
|
|
"learning_rate": 9.999848383594107e-06,
|
|
"loss": 1.2544,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 0.26224244046026224,
|
|
"grad_norm": 4.468963146209717,
|
|
"learning_rate": 9.999841413661878e-06,
|
|
"loss": 1.1949,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.26251003478726254,
|
|
"grad_norm": 4.479465484619141,
|
|
"learning_rate": 9.999834287105307e-06,
|
|
"loss": 1.1904,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 0.2627776291142628,
|
|
"grad_norm": 4.083310127258301,
|
|
"learning_rate": 9.99982700392462e-06,
|
|
"loss": 1.1588,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.263045223441263,
|
|
"grad_norm": 4.362917900085449,
|
|
"learning_rate": 9.999819564120042e-06,
|
|
"loss": 1.1189,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 0.2633128177682633,
|
|
"grad_norm": 4.328512191772461,
|
|
"learning_rate": 9.999811967691805e-06,
|
|
"loss": 1.045,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.26358041209526356,
|
|
"grad_norm": 4.410714149475098,
|
|
"learning_rate": 9.999804214640151e-06,
|
|
"loss": 1.157,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.26384800642226386,
|
|
"grad_norm": 3.88704776763916,
|
|
"learning_rate": 9.999796304965318e-06,
|
|
"loss": 1.2426,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.2641156007492641,
|
|
"grad_norm": 4.107239246368408,
|
|
"learning_rate": 9.999788238667558e-06,
|
|
"loss": 1.1627,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 0.2643831950762644,
|
|
"grad_norm": 4.336009979248047,
|
|
"learning_rate": 9.999780015747122e-06,
|
|
"loss": 1.2247,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.26465078940326464,
|
|
"grad_norm": 4.0795464515686035,
|
|
"learning_rate": 9.999771636204267e-06,
|
|
"loss": 1.1928,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 0.26491838373026494,
|
|
"grad_norm": 4.309201717376709,
|
|
"learning_rate": 9.999763100039256e-06,
|
|
"loss": 1.1767,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.2651859780572652,
|
|
"grad_norm": 4.34153938293457,
|
|
"learning_rate": 9.999754407252356e-06,
|
|
"loss": 1.3238,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 0.2654535723842655,
|
|
"grad_norm": 4.0108842849731445,
|
|
"learning_rate": 9.99974555784384e-06,
|
|
"loss": 1.0851,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.2657211667112657,
|
|
"grad_norm": 4.6918768882751465,
|
|
"learning_rate": 9.999736551813986e-06,
|
|
"loss": 1.24,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 0.26598876103826596,
|
|
"grad_norm": 4.068446636199951,
|
|
"learning_rate": 9.999727389163074e-06,
|
|
"loss": 1.1031,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.26625635536526626,
|
|
"grad_norm": 4.262712478637695,
|
|
"learning_rate": 9.999718069891392e-06,
|
|
"loss": 1.274,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.2665239496922665,
|
|
"grad_norm": 4.3243889808654785,
|
|
"learning_rate": 9.999708593999234e-06,
|
|
"loss": 1.2472,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.2667915440192668,
|
|
"grad_norm": 4.188782691955566,
|
|
"learning_rate": 9.999698961486892e-06,
|
|
"loss": 1.2658,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 0.26705913834626704,
|
|
"grad_norm": 3.9021859169006348,
|
|
"learning_rate": 9.999689172354672e-06,
|
|
"loss": 1.0972,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.26732673267326734,
|
|
"grad_norm": 4.386773586273193,
|
|
"learning_rate": 9.999679226602878e-06,
|
|
"loss": 1.1707,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 0.2675943270002676,
|
|
"grad_norm": 4.411870956420898,
|
|
"learning_rate": 9.999669124231824e-06,
|
|
"loss": 1.188,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2675943270002676,
|
|
"eval_loss": 1.216786503791809,
|
|
"eval_runtime": 11.6813,
|
|
"eval_samples_per_second": 34.243,
|
|
"eval_steps_per_second": 4.28,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2678619213272679,
|
|
"grad_norm": 4.481796741485596,
|
|
"learning_rate": 9.999658865241827e-06,
|
|
"loss": 1.2667,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 0.2681295156542681,
|
|
"grad_norm": 3.979875087738037,
|
|
"learning_rate": 9.999648449633204e-06,
|
|
"loss": 1.1305,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.2683971099812684,
|
|
"grad_norm": 3.8728244304656982,
|
|
"learning_rate": 9.999637877406284e-06,
|
|
"loss": 1.1231,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 0.26866470430826866,
|
|
"grad_norm": 4.459341526031494,
|
|
"learning_rate": 9.999627148561399e-06,
|
|
"loss": 1.1543,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.2689322986352689,
|
|
"grad_norm": 4.173006534576416,
|
|
"learning_rate": 9.999616263098886e-06,
|
|
"loss": 1.2025,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.2691998929622692,
|
|
"grad_norm": 4.177968978881836,
|
|
"learning_rate": 9.999605221019082e-06,
|
|
"loss": 1.194,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.26946748728926945,
|
|
"grad_norm": 4.970066547393799,
|
|
"learning_rate": 9.999594022322334e-06,
|
|
"loss": 1.2869,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 0.26973508161626975,
|
|
"grad_norm": 4.600182056427002,
|
|
"learning_rate": 9.999582667008995e-06,
|
|
"loss": 1.3119,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.27000267594327,
|
|
"grad_norm": 4.465086460113525,
|
|
"learning_rate": 9.999571155079422e-06,
|
|
"loss": 1.1683,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 0.2702702702702703,
|
|
"grad_norm": 4.228415012359619,
|
|
"learning_rate": 9.999559486533971e-06,
|
|
"loss": 1.1939,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.2705378645972705,
|
|
"grad_norm": 4.573855400085449,
|
|
"learning_rate": 9.99954766137301e-06,
|
|
"loss": 1.325,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 0.2708054589242708,
|
|
"grad_norm": 4.398594379425049,
|
|
"learning_rate": 9.99953567959691e-06,
|
|
"loss": 1.1402,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.27107305325127107,
|
|
"grad_norm": 4.025271892547607,
|
|
"learning_rate": 9.999523541206044e-06,
|
|
"loss": 1.1139,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 0.27134064757827137,
|
|
"grad_norm": 4.192676067352295,
|
|
"learning_rate": 9.999511246200795e-06,
|
|
"loss": 1.1699,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.2716082419052716,
|
|
"grad_norm": 4.185833930969238,
|
|
"learning_rate": 9.999498794581548e-06,
|
|
"loss": 1.1549,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.27187583623227185,
|
|
"grad_norm": 4.256872653961182,
|
|
"learning_rate": 9.99948618634869e-06,
|
|
"loss": 1.1454,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.27214343055927215,
|
|
"grad_norm": 4.221078395843506,
|
|
"learning_rate": 9.99947342150262e-06,
|
|
"loss": 1.1097,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 0.2724110248862724,
|
|
"grad_norm": 4.532137870788574,
|
|
"learning_rate": 9.999460500043734e-06,
|
|
"loss": 1.341,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.2726786192132727,
|
|
"grad_norm": 3.931379795074463,
|
|
"learning_rate": 9.999447421972439e-06,
|
|
"loss": 1.1228,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 0.27294621354027293,
|
|
"grad_norm": 4.363259792327881,
|
|
"learning_rate": 9.999434187289145e-06,
|
|
"loss": 1.1633,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.27321380786727323,
|
|
"grad_norm": 3.8973734378814697,
|
|
"learning_rate": 9.999420795994266e-06,
|
|
"loss": 1.2312,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 0.27348140219427347,
|
|
"grad_norm": 4.490160942077637,
|
|
"learning_rate": 9.99940724808822e-06,
|
|
"loss": 1.2081,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.27374899652127377,
|
|
"grad_norm": 4.149991035461426,
|
|
"learning_rate": 9.999393543571434e-06,
|
|
"loss": 1.2413,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 0.274016590848274,
|
|
"grad_norm": 4.234299659729004,
|
|
"learning_rate": 9.999379682444338e-06,
|
|
"loss": 1.2663,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.2742841851752743,
|
|
"grad_norm": 4.101381301879883,
|
|
"learning_rate": 9.999365664707361e-06,
|
|
"loss": 1.1646,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.27455177950227455,
|
|
"grad_norm": 4.511719226837158,
|
|
"learning_rate": 9.999351490360947e-06,
|
|
"loss": 1.3628,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.2748193738292748,
|
|
"grad_norm": 4.125613212585449,
|
|
"learning_rate": 9.999337159405538e-06,
|
|
"loss": 1.1866,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 0.2750869681562751,
|
|
"grad_norm": 4.274496555328369,
|
|
"learning_rate": 9.999322671841583e-06,
|
|
"loss": 1.1889,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.27535456248327533,
|
|
"grad_norm": 4.035276889801025,
|
|
"learning_rate": 9.999308027669537e-06,
|
|
"loss": 1.096,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 0.27562215681027563,
|
|
"grad_norm": 4.041557312011719,
|
|
"learning_rate": 9.999293226889857e-06,
|
|
"loss": 1.1437,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.2758897511372759,
|
|
"grad_norm": 4.458560943603516,
|
|
"learning_rate": 9.999278269503008e-06,
|
|
"loss": 1.1608,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 0.27615734546427617,
|
|
"grad_norm": 3.992985486984253,
|
|
"learning_rate": 9.999263155509459e-06,
|
|
"loss": 1.0251,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.2764249397912764,
|
|
"grad_norm": 3.9736506938934326,
|
|
"learning_rate": 9.999247884909682e-06,
|
|
"loss": 1.2267,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 0.2766925341182767,
|
|
"grad_norm": 4.004456996917725,
|
|
"learning_rate": 9.999232457704155e-06,
|
|
"loss": 1.0958,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.27696012844527695,
|
|
"grad_norm": 4.022693157196045,
|
|
"learning_rate": 9.999216873893364e-06,
|
|
"loss": 1.1375,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.27722772277227725,
|
|
"grad_norm": 3.8458046913146973,
|
|
"learning_rate": 9.999201133477793e-06,
|
|
"loss": 1.1408,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.2774953170992775,
|
|
"grad_norm": 4.127901554107666,
|
|
"learning_rate": 9.999185236457941e-06,
|
|
"loss": 1.3119,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 0.27776291142627774,
|
|
"grad_norm": 4.242637634277344,
|
|
"learning_rate": 9.9991691828343e-06,
|
|
"loss": 1.1941,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.27803050575327803,
|
|
"grad_norm": 4.139479160308838,
|
|
"learning_rate": 9.999152972607377e-06,
|
|
"loss": 1.0765,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 0.2782981000802783,
|
|
"grad_norm": 4.560730457305908,
|
|
"learning_rate": 9.999136605777678e-06,
|
|
"loss": 1.4193,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.2785656944072786,
|
|
"grad_norm": 4.292839050292969,
|
|
"learning_rate": 9.999120082345714e-06,
|
|
"loss": 1.2548,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 0.2788332887342788,
|
|
"grad_norm": 4.288617134094238,
|
|
"learning_rate": 9.999103402312005e-06,
|
|
"loss": 1.2433,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.2791008830612791,
|
|
"grad_norm": 4.059001445770264,
|
|
"learning_rate": 9.999086565677075e-06,
|
|
"loss": 1.2556,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 0.27936847738827936,
|
|
"grad_norm": 4.238238334655762,
|
|
"learning_rate": 9.999069572441448e-06,
|
|
"loss": 1.2258,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.27963607171527965,
|
|
"grad_norm": 3.9122133255004883,
|
|
"learning_rate": 9.999052422605657e-06,
|
|
"loss": 1.1679,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.2799036660422799,
|
|
"grad_norm": 4.169795513153076,
|
|
"learning_rate": 9.999035116170241e-06,
|
|
"loss": 1.19,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.2801712603692802,
|
|
"grad_norm": 3.937116861343384,
|
|
"learning_rate": 9.999017653135744e-06,
|
|
"loss": 1.1576,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 0.28043885469628044,
|
|
"grad_norm": 4.014969348907471,
|
|
"learning_rate": 9.999000033502706e-06,
|
|
"loss": 1.2584,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.2807064490232807,
|
|
"grad_norm": 3.8742942810058594,
|
|
"learning_rate": 9.998982257271685e-06,
|
|
"loss": 1.0865,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 0.280974043350281,
|
|
"grad_norm": 4.11707878112793,
|
|
"learning_rate": 9.998964324443235e-06,
|
|
"loss": 1.2188,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.2812416376772812,
|
|
"grad_norm": 4.459323406219482,
|
|
"learning_rate": 9.998946235017918e-06,
|
|
"loss": 1.2243,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 0.2815092320042815,
|
|
"grad_norm": 4.1629815101623535,
|
|
"learning_rate": 9.998927988996303e-06,
|
|
"loss": 1.3085,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.28177682633128176,
|
|
"grad_norm": 4.150962829589844,
|
|
"learning_rate": 9.998909586378959e-06,
|
|
"loss": 1.1903,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 0.28204442065828206,
|
|
"grad_norm": 3.7340071201324463,
|
|
"learning_rate": 9.998891027166466e-06,
|
|
"loss": 1.0209,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.2823120149852823,
|
|
"grad_norm": 4.1432695388793945,
|
|
"learning_rate": 9.9988723113594e-06,
|
|
"loss": 1.1885,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.2825796093122826,
|
|
"grad_norm": 4.373791217803955,
|
|
"learning_rate": 9.998853438958352e-06,
|
|
"loss": 1.2612,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.28284720363928284,
|
|
"grad_norm": 4.626842021942139,
|
|
"learning_rate": 9.99883440996391e-06,
|
|
"loss": 1.3311,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 0.28311479796628314,
|
|
"grad_norm": 4.128498077392578,
|
|
"learning_rate": 9.998815224376672e-06,
|
|
"loss": 1.2753,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.2833823922932834,
|
|
"grad_norm": 4.729836463928223,
|
|
"learning_rate": 9.998795882197238e-06,
|
|
"loss": 1.3457,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 0.2836499866202836,
|
|
"grad_norm": 4.123654365539551,
|
|
"learning_rate": 9.998776383426217e-06,
|
|
"loss": 1.2035,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.2839175809472839,
|
|
"grad_norm": 3.9608731269836426,
|
|
"learning_rate": 9.998756728064213e-06,
|
|
"loss": 1.1559,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 0.28418517527428416,
|
|
"grad_norm": 3.8772714138031006,
|
|
"learning_rate": 9.998736916111848e-06,
|
|
"loss": 1.1901,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.28445276960128446,
|
|
"grad_norm": 4.203121185302734,
|
|
"learning_rate": 9.998716947569741e-06,
|
|
"loss": 1.1789,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 0.2847203639282847,
|
|
"grad_norm": 4.262762069702148,
|
|
"learning_rate": 9.998696822438516e-06,
|
|
"loss": 1.3464,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 0.284987958255285,
|
|
"grad_norm": 4.056782245635986,
|
|
"learning_rate": 9.998676540718805e-06,
|
|
"loss": 1.1512,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.28525555258228524,
|
|
"grad_norm": 4.1949639320373535,
|
|
"learning_rate": 9.998656102411245e-06,
|
|
"loss": 1.2288,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 0.28552314690928554,
|
|
"grad_norm": 4.408857345581055,
|
|
"learning_rate": 9.99863550751647e-06,
|
|
"loss": 1.2456,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 0.2857907412362858,
|
|
"grad_norm": 4.202237606048584,
|
|
"learning_rate": 9.998614756035132e-06,
|
|
"loss": 1.2651,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 0.2860583355632861,
|
|
"grad_norm": 4.139695167541504,
|
|
"learning_rate": 9.998593847967877e-06,
|
|
"loss": 1.1924,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 0.2863259298902863,
|
|
"grad_norm": 4.123232841491699,
|
|
"learning_rate": 9.998572783315361e-06,
|
|
"loss": 1.2642,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.28659352421728657,
|
|
"grad_norm": 4.614407539367676,
|
|
"learning_rate": 9.998551562078245e-06,
|
|
"loss": 1.1987,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 0.28686111854428686,
|
|
"grad_norm": 4.054043769836426,
|
|
"learning_rate": 9.998530184257194e-06,
|
|
"loss": 1.1046,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 0.2871287128712871,
|
|
"grad_norm": 4.0423760414123535,
|
|
"learning_rate": 9.998508649852874e-06,
|
|
"loss": 1.1435,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 0.2873963071982874,
|
|
"grad_norm": 4.187506198883057,
|
|
"learning_rate": 9.998486958865965e-06,
|
|
"loss": 1.1171,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 0.28766390152528765,
|
|
"grad_norm": 4.307306289672852,
|
|
"learning_rate": 9.998465111297141e-06,
|
|
"loss": 1.1656,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.28793149585228794,
|
|
"grad_norm": 4.108502388000488,
|
|
"learning_rate": 9.99844310714709e-06,
|
|
"loss": 1.1522,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 0.2881990901792882,
|
|
"grad_norm": 4.7379069328308105,
|
|
"learning_rate": 9.9984209464165e-06,
|
|
"loss": 1.3547,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 0.2884666845062885,
|
|
"grad_norm": 4.607676029205322,
|
|
"learning_rate": 9.998398629106068e-06,
|
|
"loss": 1.2304,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 0.2887342788332887,
|
|
"grad_norm": 4.571547031402588,
|
|
"learning_rate": 9.998376155216487e-06,
|
|
"loss": 1.2825,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 0.289001873160289,
|
|
"grad_norm": 4.113447189331055,
|
|
"learning_rate": 9.998353524748468e-06,
|
|
"loss": 1.143,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.28926946748728927,
|
|
"grad_norm": 4.24326753616333,
|
|
"learning_rate": 9.998330737702714e-06,
|
|
"loss": 1.2782,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 0.28953706181428956,
|
|
"grad_norm": 3.839808464050293,
|
|
"learning_rate": 9.998307794079942e-06,
|
|
"loss": 1.1638,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 0.2898046561412898,
|
|
"grad_norm": 4.0532355308532715,
|
|
"learning_rate": 9.998284693880871e-06,
|
|
"loss": 1.2157,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 0.29007225046829005,
|
|
"grad_norm": 4.362560272216797,
|
|
"learning_rate": 9.998261437106223e-06,
|
|
"loss": 1.3047,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 0.29033984479529035,
|
|
"grad_norm": 4.096391201019287,
|
|
"learning_rate": 9.998238023756727e-06,
|
|
"loss": 1.3003,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.2906074391222906,
|
|
"grad_norm": 3.965895891189575,
|
|
"learning_rate": 9.998214453833118e-06,
|
|
"loss": 1.2066,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 0.2908750334492909,
|
|
"grad_norm": 4.827084541320801,
|
|
"learning_rate": 9.998190727336133e-06,
|
|
"loss": 1.3066,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 0.29114262777629113,
|
|
"grad_norm": 3.926433563232422,
|
|
"learning_rate": 9.998166844266515e-06,
|
|
"loss": 1.1789,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 0.2914102221032914,
|
|
"grad_norm": 4.2660651206970215,
|
|
"learning_rate": 9.998142804625011e-06,
|
|
"loss": 1.259,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 0.29167781643029167,
|
|
"grad_norm": 4.009738922119141,
|
|
"learning_rate": 9.998118608412378e-06,
|
|
"loss": 1.1624,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.29194541075729197,
|
|
"grad_norm": 4.329594612121582,
|
|
"learning_rate": 9.99809425562937e-06,
|
|
"loss": 1.2244,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 0.2922130050842922,
|
|
"grad_norm": 4.109816551208496,
|
|
"learning_rate": 9.998069746276752e-06,
|
|
"loss": 1.1706,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 0.2924805994112925,
|
|
"grad_norm": 4.28621244430542,
|
|
"learning_rate": 9.998045080355291e-06,
|
|
"loss": 1.2071,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 0.29274819373829275,
|
|
"grad_norm": 4.0972747802734375,
|
|
"learning_rate": 9.99802025786576e-06,
|
|
"loss": 1.1558,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 0.293015788065293,
|
|
"grad_norm": 4.451328277587891,
|
|
"learning_rate": 9.997995278808936e-06,
|
|
"loss": 1.3491,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.2932833823922933,
|
|
"grad_norm": 3.864147663116455,
|
|
"learning_rate": 9.997970143185603e-06,
|
|
"loss": 1.1395,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 0.29355097671929353,
|
|
"grad_norm": 4.177571773529053,
|
|
"learning_rate": 9.997944850996546e-06,
|
|
"loss": 1.319,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 0.29381857104629383,
|
|
"grad_norm": 3.75541615486145,
|
|
"learning_rate": 9.99791940224256e-06,
|
|
"loss": 1.1284,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 0.2940861653732941,
|
|
"grad_norm": 3.947469711303711,
|
|
"learning_rate": 9.99789379692444e-06,
|
|
"loss": 1.0627,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 0.29435375970029437,
|
|
"grad_norm": 4.161018371582031,
|
|
"learning_rate": 9.99786803504299e-06,
|
|
"loss": 1.2537,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.2946213540272946,
|
|
"grad_norm": 4.357724189758301,
|
|
"learning_rate": 9.997842116599014e-06,
|
|
"loss": 1.3133,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 0.2948889483542949,
|
|
"grad_norm": 4.223912715911865,
|
|
"learning_rate": 9.997816041593327e-06,
|
|
"loss": 1.2574,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 0.29515654268129515,
|
|
"grad_norm": 3.6964030265808105,
|
|
"learning_rate": 9.997789810026746e-06,
|
|
"loss": 1.0187,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 0.29542413700829545,
|
|
"grad_norm": 3.9817559719085693,
|
|
"learning_rate": 9.99776342190009e-06,
|
|
"loss": 1.1749,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 0.2956917313352957,
|
|
"grad_norm": 4.123600959777832,
|
|
"learning_rate": 9.997736877214187e-06,
|
|
"loss": 1.192,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.29595932566229594,
|
|
"grad_norm": 4.295464515686035,
|
|
"learning_rate": 9.99771017596987e-06,
|
|
"loss": 1.1886,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 0.29622691998929623,
|
|
"grad_norm": 4.501376628875732,
|
|
"learning_rate": 9.997683318167972e-06,
|
|
"loss": 1.2161,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 0.2964945143162965,
|
|
"grad_norm": 4.243162631988525,
|
|
"learning_rate": 9.997656303809338e-06,
|
|
"loss": 1.2048,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 0.2967621086432968,
|
|
"grad_norm": 4.504419326782227,
|
|
"learning_rate": 9.997629132894812e-06,
|
|
"loss": 1.263,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 0.297029702970297,
|
|
"grad_norm": 4.300513744354248,
|
|
"learning_rate": 9.997601805425246e-06,
|
|
"loss": 1.0954,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.2972972972972973,
|
|
"grad_norm": 4.057127952575684,
|
|
"learning_rate": 9.997574321401495e-06,
|
|
"loss": 1.1716,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 0.29756489162429756,
|
|
"grad_norm": 3.755995035171509,
|
|
"learning_rate": 9.997546680824422e-06,
|
|
"loss": 1.0806,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 0.29783248595129785,
|
|
"grad_norm": 4.294164180755615,
|
|
"learning_rate": 9.99751888369489e-06,
|
|
"loss": 1.358,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 0.2981000802782981,
|
|
"grad_norm": 3.7034502029418945,
|
|
"learning_rate": 9.997490930013773e-06,
|
|
"loss": 1.1258,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 0.2983676746052984,
|
|
"grad_norm": 4.4115071296691895,
|
|
"learning_rate": 9.997462819781944e-06,
|
|
"loss": 1.3023,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.29863526893229864,
|
|
"grad_norm": 4.0352678298950195,
|
|
"learning_rate": 9.997434553000286e-06,
|
|
"loss": 1.1215,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 0.2989028632592989,
|
|
"grad_norm": 3.9848623275756836,
|
|
"learning_rate": 9.997406129669682e-06,
|
|
"loss": 1.1101,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 0.2991704575862992,
|
|
"grad_norm": 4.004817962646484,
|
|
"learning_rate": 9.997377549791025e-06,
|
|
"loss": 1.2029,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 0.2994380519132994,
|
|
"grad_norm": 4.244535446166992,
|
|
"learning_rate": 9.997348813365207e-06,
|
|
"loss": 1.1389,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 0.2997056462402997,
|
|
"grad_norm": 4.067032337188721,
|
|
"learning_rate": 9.997319920393131e-06,
|
|
"loss": 1.0913,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.29997324056729996,
|
|
"grad_norm": 3.9365594387054443,
|
|
"learning_rate": 9.997290870875703e-06,
|
|
"loss": 1.1128,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 0.30024083489430026,
|
|
"grad_norm": 3.7220211029052734,
|
|
"learning_rate": 9.997261664813827e-06,
|
|
"loss": 1.0801,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 0.3005084292213005,
|
|
"grad_norm": 4.3840131759643555,
|
|
"learning_rate": 9.997232302208425e-06,
|
|
"loss": 1.2494,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 0.3007760235483008,
|
|
"grad_norm": 3.811455011367798,
|
|
"learning_rate": 9.997202783060413e-06,
|
|
"loss": 1.13,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 0.30104361787530104,
|
|
"grad_norm": 4.011319637298584,
|
|
"learning_rate": 9.997173107370717e-06,
|
|
"loss": 1.216,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.30131121220230134,
|
|
"grad_norm": 3.8559553623199463,
|
|
"learning_rate": 9.997143275140266e-06,
|
|
"loss": 1.1322,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 0.3015788065293016,
|
|
"grad_norm": 3.9331884384155273,
|
|
"learning_rate": 9.997113286369995e-06,
|
|
"loss": 1.132,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 0.3018464008563018,
|
|
"grad_norm": 3.7718307971954346,
|
|
"learning_rate": 9.997083141060842e-06,
|
|
"loss": 1.2221,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 0.3021139951833021,
|
|
"grad_norm": 4.190203666687012,
|
|
"learning_rate": 9.997052839213752e-06,
|
|
"loss": 1.2322,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 0.30238158951030236,
|
|
"grad_norm": 3.806379795074463,
|
|
"learning_rate": 9.997022380829677e-06,
|
|
"loss": 1.1844,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.30264918383730266,
|
|
"grad_norm": 3.8173298835754395,
|
|
"learning_rate": 9.996991765909568e-06,
|
|
"loss": 1.1185,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 0.3029167781643029,
|
|
"grad_norm": 4.177835464477539,
|
|
"learning_rate": 9.996960994454383e-06,
|
|
"loss": 1.2292,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 0.3031843724913032,
|
|
"grad_norm": 4.42379903793335,
|
|
"learning_rate": 9.996930066465091e-06,
|
|
"loss": 1.2661,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 0.30345196681830344,
|
|
"grad_norm": 4.7846455574035645,
|
|
"learning_rate": 9.996898981942655e-06,
|
|
"loss": 1.1785,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 0.30371956114530374,
|
|
"grad_norm": 4.796987533569336,
|
|
"learning_rate": 9.996867740888052e-06,
|
|
"loss": 1.2928,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.303987155472304,
|
|
"grad_norm": 4.014819145202637,
|
|
"learning_rate": 9.996836343302261e-06,
|
|
"loss": 1.1626,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 0.3042547497993043,
|
|
"grad_norm": 4.26397180557251,
|
|
"learning_rate": 9.996804789186263e-06,
|
|
"loss": 1.0987,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 0.3045223441263045,
|
|
"grad_norm": 4.477066516876221,
|
|
"learning_rate": 9.99677307854105e-06,
|
|
"loss": 1.28,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 0.30478993845330477,
|
|
"grad_norm": 4.38161039352417,
|
|
"learning_rate": 9.996741211367613e-06,
|
|
"loss": 1.2668,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 0.30505753278030506,
|
|
"grad_norm": 4.141867160797119,
|
|
"learning_rate": 9.996709187666951e-06,
|
|
"loss": 1.2651,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.3053251271073053,
|
|
"grad_norm": 3.891883134841919,
|
|
"learning_rate": 9.996677007440065e-06,
|
|
"loss": 1.2046,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 0.3055927214343056,
|
|
"grad_norm": 4.575502395629883,
|
|
"learning_rate": 9.996644670687966e-06,
|
|
"loss": 1.3873,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 0.30586031576130585,
|
|
"grad_norm": 3.6624574661254883,
|
|
"learning_rate": 9.996612177411667e-06,
|
|
"loss": 1.1507,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 0.30612791008830614,
|
|
"grad_norm": 4.047989845275879,
|
|
"learning_rate": 9.996579527612182e-06,
|
|
"loss": 1.1696,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 0.3063955044153064,
|
|
"grad_norm": 4.16288948059082,
|
|
"learning_rate": 9.99654672129054e-06,
|
|
"loss": 1.191,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.3066630987423067,
|
|
"grad_norm": 4.057130813598633,
|
|
"learning_rate": 9.996513758447764e-06,
|
|
"loss": 1.2684,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 0.3069306930693069,
|
|
"grad_norm": 4.151482582092285,
|
|
"learning_rate": 9.996480639084887e-06,
|
|
"loss": 1.3478,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 0.3071982873963072,
|
|
"grad_norm": 3.9425978660583496,
|
|
"learning_rate": 9.996447363202947e-06,
|
|
"loss": 1.1573,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 0.30746588172330747,
|
|
"grad_norm": 4.016078948974609,
|
|
"learning_rate": 9.996413930802988e-06,
|
|
"loss": 1.175,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 0.3077334760503077,
|
|
"grad_norm": 4.067404270172119,
|
|
"learning_rate": 9.996380341886055e-06,
|
|
"loss": 1.2151,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.308001070377308,
|
|
"grad_norm": 4.010457992553711,
|
|
"learning_rate": 9.996346596453202e-06,
|
|
"loss": 1.2175,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 0.30826866470430825,
|
|
"grad_norm": 4.103924751281738,
|
|
"learning_rate": 9.996312694505486e-06,
|
|
"loss": 1.1351,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 0.30853625903130855,
|
|
"grad_norm": 4.0794243812561035,
|
|
"learning_rate": 9.996278636043966e-06,
|
|
"loss": 1.1801,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 0.3088038533583088,
|
|
"grad_norm": 3.910602331161499,
|
|
"learning_rate": 9.996244421069714e-06,
|
|
"loss": 1.2453,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 0.3090714476853091,
|
|
"grad_norm": 4.31195068359375,
|
|
"learning_rate": 9.996210049583796e-06,
|
|
"loss": 1.1257,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.30933904201230933,
|
|
"grad_norm": 4.224134922027588,
|
|
"learning_rate": 9.996175521587294e-06,
|
|
"loss": 1.2855,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 0.3096066363393096,
|
|
"grad_norm": 4.1098198890686035,
|
|
"learning_rate": 9.996140837081288e-06,
|
|
"loss": 1.2321,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 0.30987423066630987,
|
|
"grad_norm": 4.49318265914917,
|
|
"learning_rate": 9.996105996066862e-06,
|
|
"loss": 1.2987,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 0.31014182499331017,
|
|
"grad_norm": 4.257841110229492,
|
|
"learning_rate": 9.99607099854511e-06,
|
|
"loss": 1.2069,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 0.3104094193203104,
|
|
"grad_norm": 4.5224385261535645,
|
|
"learning_rate": 9.996035844517129e-06,
|
|
"loss": 1.1976,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.31067701364731065,
|
|
"grad_norm": 4.277895927429199,
|
|
"learning_rate": 9.996000533984017e-06,
|
|
"loss": 1.2005,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 0.31094460797431095,
|
|
"grad_norm": 4.397223472595215,
|
|
"learning_rate": 9.995965066946885e-06,
|
|
"loss": 1.1852,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 0.3112122023013112,
|
|
"grad_norm": 4.2652764320373535,
|
|
"learning_rate": 9.995929443406838e-06,
|
|
"loss": 1.2549,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 0.3114797966283115,
|
|
"grad_norm": 4.212392807006836,
|
|
"learning_rate": 9.995893663364997e-06,
|
|
"loss": 1.1547,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 0.31174739095531173,
|
|
"grad_norm": 3.8345203399658203,
|
|
"learning_rate": 9.99585772682248e-06,
|
|
"loss": 1.0897,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.31201498528231203,
|
|
"grad_norm": 4.444775581359863,
|
|
"learning_rate": 9.995821633780413e-06,
|
|
"loss": 1.2715,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 0.31228257960931227,
|
|
"grad_norm": 4.151453971862793,
|
|
"learning_rate": 9.99578538423993e-06,
|
|
"loss": 1.1795,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 0.31255017393631257,
|
|
"grad_norm": 4.223361492156982,
|
|
"learning_rate": 9.99574897820216e-06,
|
|
"loss": 1.2359,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 0.3128177682633128,
|
|
"grad_norm": 4.0662841796875,
|
|
"learning_rate": 9.99571241566825e-06,
|
|
"loss": 1.1072,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 0.3130853625903131,
|
|
"grad_norm": 4.007144451141357,
|
|
"learning_rate": 9.99567569663934e-06,
|
|
"loss": 1.198,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.31335295691731335,
|
|
"grad_norm": 3.9480855464935303,
|
|
"learning_rate": 9.995638821116585e-06,
|
|
"loss": 1.0293,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 0.3136205512443136,
|
|
"grad_norm": 3.7751834392547607,
|
|
"learning_rate": 9.995601789101138e-06,
|
|
"loss": 1.0231,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 0.3138881455713139,
|
|
"grad_norm": 4.609216690063477,
|
|
"learning_rate": 9.995564600594159e-06,
|
|
"loss": 1.1539,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 0.31415573989831413,
|
|
"grad_norm": 4.05670166015625,
|
|
"learning_rate": 9.995527255596812e-06,
|
|
"loss": 1.1977,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 0.31442333422531443,
|
|
"grad_norm": 3.651618242263794,
|
|
"learning_rate": 9.995489754110268e-06,
|
|
"loss": 1.0947,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.3146909285523147,
|
|
"grad_norm": 4.308838844299316,
|
|
"learning_rate": 9.995452096135703e-06,
|
|
"loss": 1.1942,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 0.314958522879315,
|
|
"grad_norm": 3.8746747970581055,
|
|
"learning_rate": 9.995414281674294e-06,
|
|
"loss": 1.1572,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 0.3152261172063152,
|
|
"grad_norm": 4.089914321899414,
|
|
"learning_rate": 9.995376310727227e-06,
|
|
"loss": 1.2842,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 0.3154937115333155,
|
|
"grad_norm": 4.342733860015869,
|
|
"learning_rate": 9.995338183295693e-06,
|
|
"loss": 1.285,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 0.31576130586031576,
|
|
"grad_norm": 3.697603225708008,
|
|
"learning_rate": 9.995299899380884e-06,
|
|
"loss": 1.1125,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.31602890018731605,
|
|
"grad_norm": 4.422861099243164,
|
|
"learning_rate": 9.995261458983999e-06,
|
|
"loss": 1.2552,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 0.3162964945143163,
|
|
"grad_norm": 4.288775444030762,
|
|
"learning_rate": 9.995222862106245e-06,
|
|
"loss": 1.3295,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 0.31656408884131654,
|
|
"grad_norm": 3.8024377822875977,
|
|
"learning_rate": 9.995184108748827e-06,
|
|
"loss": 1.1542,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 0.31683168316831684,
|
|
"grad_norm": 4.05307674407959,
|
|
"learning_rate": 9.995145198912962e-06,
|
|
"loss": 1.1841,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 0.3170992774953171,
|
|
"grad_norm": 4.2776265144348145,
|
|
"learning_rate": 9.995106132599869e-06,
|
|
"loss": 1.1981,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.3173668718223174,
|
|
"grad_norm": 3.631357431411743,
|
|
"learning_rate": 9.995066909810771e-06,
|
|
"loss": 1.1741,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 0.3176344661493176,
|
|
"grad_norm": 3.9295742511749268,
|
|
"learning_rate": 9.995027530546895e-06,
|
|
"loss": 1.0733,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 0.3179020604763179,
|
|
"grad_norm": 3.839838743209839,
|
|
"learning_rate": 9.994987994809478e-06,
|
|
"loss": 1.2681,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 0.31816965480331816,
|
|
"grad_norm": 3.7129993438720703,
|
|
"learning_rate": 9.994948302599757e-06,
|
|
"loss": 1.0686,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 0.31843724913031846,
|
|
"grad_norm": 4.252348899841309,
|
|
"learning_rate": 9.994908453918973e-06,
|
|
"loss": 1.2652,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.3187048434573187,
|
|
"grad_norm": 4.2434916496276855,
|
|
"learning_rate": 9.994868448768378e-06,
|
|
"loss": 1.227,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 0.318972437784319,
|
|
"grad_norm": 3.613661766052246,
|
|
"learning_rate": 9.994828287149224e-06,
|
|
"loss": 1.0919,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 0.31924003211131924,
|
|
"grad_norm": 4.0223469734191895,
|
|
"learning_rate": 9.994787969062767e-06,
|
|
"loss": 1.161,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 0.3195076264383195,
|
|
"grad_norm": 3.802426815032959,
|
|
"learning_rate": 9.994747494510274e-06,
|
|
"loss": 1.2561,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 0.3197752207653198,
|
|
"grad_norm": 3.8129377365112305,
|
|
"learning_rate": 9.994706863493007e-06,
|
|
"loss": 1.1638,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.32004281509232,
|
|
"grad_norm": 4.0562872886657715,
|
|
"learning_rate": 9.994666076012245e-06,
|
|
"loss": 1.2713,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 0.3203104094193203,
|
|
"grad_norm": 4.090336322784424,
|
|
"learning_rate": 9.994625132069263e-06,
|
|
"loss": 1.1567,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 0.32057800374632056,
|
|
"grad_norm": 4.030067443847656,
|
|
"learning_rate": 9.994584031665345e-06,
|
|
"loss": 1.1686,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 0.32084559807332086,
|
|
"grad_norm": 4.26224422454834,
|
|
"learning_rate": 9.994542774801774e-06,
|
|
"loss": 1.1967,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 0.3211131924003211,
|
|
"grad_norm": 4.3625102043151855,
|
|
"learning_rate": 9.994501361479847e-06,
|
|
"loss": 1.2828,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3213807867273214,
|
|
"grad_norm": 4.201301574707031,
|
|
"learning_rate": 9.99445979170086e-06,
|
|
"loss": 1.2463,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 0.32164838105432164,
|
|
"grad_norm": 3.624171733856201,
|
|
"learning_rate": 9.994418065466116e-06,
|
|
"loss": 1.1004,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 0.32191597538132194,
|
|
"grad_norm": 3.7936317920684814,
|
|
"learning_rate": 9.99437618277692e-06,
|
|
"loss": 1.0925,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 0.3221835697083222,
|
|
"grad_norm": 3.740135431289673,
|
|
"learning_rate": 9.994334143634587e-06,
|
|
"loss": 1.1698,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 0.3224511640353224,
|
|
"grad_norm": 4.0127458572387695,
|
|
"learning_rate": 9.994291948040429e-06,
|
|
"loss": 1.2089,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.3227187583623227,
|
|
"grad_norm": 4.131107807159424,
|
|
"learning_rate": 9.994249595995774e-06,
|
|
"loss": 1.1964,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 0.32298635268932296,
|
|
"grad_norm": 3.945056200027466,
|
|
"learning_rate": 9.994207087501945e-06,
|
|
"loss": 1.2649,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 0.32325394701632326,
|
|
"grad_norm": 4.262823581695557,
|
|
"learning_rate": 9.994164422560273e-06,
|
|
"loss": 1.2617,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 0.3235215413433235,
|
|
"grad_norm": 4.310561180114746,
|
|
"learning_rate": 9.994121601172097e-06,
|
|
"loss": 1.2077,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 0.3237891356703238,
|
|
"grad_norm": 4.025747299194336,
|
|
"learning_rate": 9.994078623338757e-06,
|
|
"loss": 1.1637,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.32405672999732404,
|
|
"grad_norm": 3.766697883605957,
|
|
"learning_rate": 9.9940354890616e-06,
|
|
"loss": 1.0568,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 0.32432432432432434,
|
|
"grad_norm": 4.299594402313232,
|
|
"learning_rate": 9.993992198341976e-06,
|
|
"loss": 1.2301,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 0.3245919186513246,
|
|
"grad_norm": 3.945216417312622,
|
|
"learning_rate": 9.993948751181243e-06,
|
|
"loss": 1.2631,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 0.3248595129783249,
|
|
"grad_norm": 4.33341121673584,
|
|
"learning_rate": 9.99390514758076e-06,
|
|
"loss": 1.2464,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 0.3251271073053251,
|
|
"grad_norm": 4.582106590270996,
|
|
"learning_rate": 9.993861387541894e-06,
|
|
"loss": 1.2877,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.32539470163232537,
|
|
"grad_norm": 4.363495349884033,
|
|
"learning_rate": 9.993817471066016e-06,
|
|
"loss": 1.1593,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 0.32566229595932566,
|
|
"grad_norm": 4.1914873123168945,
|
|
"learning_rate": 9.9937733981545e-06,
|
|
"loss": 1.2427,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 0.3259298902863259,
|
|
"grad_norm": 4.207976341247559,
|
|
"learning_rate": 9.99372916880873e-06,
|
|
"loss": 1.2129,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 0.3261974846133262,
|
|
"grad_norm": 3.9144184589385986,
|
|
"learning_rate": 9.99368478303009e-06,
|
|
"loss": 1.1604,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 0.32646507894032645,
|
|
"grad_norm": 3.9831881523132324,
|
|
"learning_rate": 9.993640240819966e-06,
|
|
"loss": 1.3163,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.32673267326732675,
|
|
"grad_norm": 4.0437331199646,
|
|
"learning_rate": 9.993595542179762e-06,
|
|
"loss": 1.2957,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 0.327000267594327,
|
|
"grad_norm": 3.9239695072174072,
|
|
"learning_rate": 9.99355068711087e-06,
|
|
"loss": 1.1359,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 0.3272678619213273,
|
|
"grad_norm": 3.7763185501098633,
|
|
"learning_rate": 9.993505675614699e-06,
|
|
"loss": 1.1569,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 0.32753545624832753,
|
|
"grad_norm": 3.6293134689331055,
|
|
"learning_rate": 9.99346050769266e-06,
|
|
"loss": 1.0692,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 0.3278030505753278,
|
|
"grad_norm": 3.8709805011749268,
|
|
"learning_rate": 9.993415183346168e-06,
|
|
"loss": 1.0574,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.32807064490232807,
|
|
"grad_norm": 4.066141605377197,
|
|
"learning_rate": 9.993369702576638e-06,
|
|
"loss": 1.2466,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 0.32833823922932837,
|
|
"grad_norm": 4.21537446975708,
|
|
"learning_rate": 9.993324065385499e-06,
|
|
"loss": 1.2357,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 0.3286058335563286,
|
|
"grad_norm": 3.732475757598877,
|
|
"learning_rate": 9.99327827177418e-06,
|
|
"loss": 1.0866,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 0.32887342788332885,
|
|
"grad_norm": 3.632660150527954,
|
|
"learning_rate": 9.993232321744117e-06,
|
|
"loss": 1.0751,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 0.32914102221032915,
|
|
"grad_norm": 4.376312732696533,
|
|
"learning_rate": 9.993186215296747e-06,
|
|
"loss": 1.1535,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.3294086165373294,
|
|
"grad_norm": 4.29062557220459,
|
|
"learning_rate": 9.993139952433513e-06,
|
|
"loss": 1.298,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 0.3296762108643297,
|
|
"grad_norm": 4.182230472564697,
|
|
"learning_rate": 9.99309353315587e-06,
|
|
"loss": 1.1764,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 0.32994380519132993,
|
|
"grad_norm": 4.025058269500732,
|
|
"learning_rate": 9.993046957465264e-06,
|
|
"loss": 1.0655,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 0.33021139951833023,
|
|
"grad_norm": 4.065793514251709,
|
|
"learning_rate": 9.99300022536316e-06,
|
|
"loss": 1.2838,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 0.33047899384533047,
|
|
"grad_norm": 4.3913397789001465,
|
|
"learning_rate": 9.99295333685102e-06,
|
|
"loss": 1.3101,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.33074658817233077,
|
|
"grad_norm": 4.253934860229492,
|
|
"learning_rate": 9.992906291930315e-06,
|
|
"loss": 1.2625,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 0.331014182499331,
|
|
"grad_norm": 4.285333633422852,
|
|
"learning_rate": 9.992859090602515e-06,
|
|
"loss": 1.246,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 0.3312817768263313,
|
|
"grad_norm": 4.150413513183594,
|
|
"learning_rate": 9.992811732869102e-06,
|
|
"loss": 1.1379,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 0.33154937115333155,
|
|
"grad_norm": 4.417994976043701,
|
|
"learning_rate": 9.992764218731556e-06,
|
|
"loss": 1.2882,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 0.3318169654803318,
|
|
"grad_norm": 4.12443733215332,
|
|
"learning_rate": 9.992716548191369e-06,
|
|
"loss": 1.1555,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.3320845598073321,
|
|
"grad_norm": 3.823629140853882,
|
|
"learning_rate": 9.992668721250031e-06,
|
|
"loss": 1.145,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 0.33235215413433233,
|
|
"grad_norm": 3.9831488132476807,
|
|
"learning_rate": 9.992620737909045e-06,
|
|
"loss": 1.1958,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 0.33261974846133263,
|
|
"grad_norm": 4.018994331359863,
|
|
"learning_rate": 9.99257259816991e-06,
|
|
"loss": 1.1069,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 0.3328873427883329,
|
|
"grad_norm": 3.987264394760132,
|
|
"learning_rate": 9.992524302034133e-06,
|
|
"loss": 1.0961,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 0.33315493711533317,
|
|
"grad_norm": 4.08268404006958,
|
|
"learning_rate": 9.992475849503232e-06,
|
|
"loss": 1.2255,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.3334225314423334,
|
|
"grad_norm": 4.237321853637695,
|
|
"learning_rate": 9.992427240578719e-06,
|
|
"loss": 1.2254,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 0.3336901257693337,
|
|
"grad_norm": 4.234129905700684,
|
|
"learning_rate": 9.99237847526212e-06,
|
|
"loss": 1.31,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 0.33395772009633395,
|
|
"grad_norm": 3.7745895385742188,
|
|
"learning_rate": 9.992329553554964e-06,
|
|
"loss": 1.2299,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 0.33422531442333425,
|
|
"grad_norm": 3.8982863426208496,
|
|
"learning_rate": 9.99228047545878e-06,
|
|
"loss": 1.1502,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 0.3344929087503345,
|
|
"grad_norm": 3.888578414916992,
|
|
"learning_rate": 9.992231240975107e-06,
|
|
"loss": 1.2675,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.33476050307733474,
|
|
"grad_norm": 3.7839229106903076,
|
|
"learning_rate": 9.992181850105488e-06,
|
|
"loss": 1.1895,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 0.33502809740433503,
|
|
"grad_norm": 3.796337366104126,
|
|
"learning_rate": 9.992132302851471e-06,
|
|
"loss": 1.1802,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 0.3352956917313353,
|
|
"grad_norm": 3.6959662437438965,
|
|
"learning_rate": 9.992082599214605e-06,
|
|
"loss": 1.0366,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 0.3355632860583356,
|
|
"grad_norm": 4.231655120849609,
|
|
"learning_rate": 9.99203273919645e-06,
|
|
"loss": 1.3236,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 0.3358308803853358,
|
|
"grad_norm": 3.774073600769043,
|
|
"learning_rate": 9.991982722798565e-06,
|
|
"loss": 1.2142,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.3360984747123361,
|
|
"grad_norm": 3.9392263889312744,
|
|
"learning_rate": 9.99193255002252e-06,
|
|
"loss": 1.1911,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 0.33636606903933636,
|
|
"grad_norm": 3.8191981315612793,
|
|
"learning_rate": 9.991882220869885e-06,
|
|
"loss": 1.1639,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 0.33663366336633666,
|
|
"grad_norm": 3.6881232261657715,
|
|
"learning_rate": 9.991831735342235e-06,
|
|
"loss": 1.179,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 0.3369012576933369,
|
|
"grad_norm": 3.9517464637756348,
|
|
"learning_rate": 9.991781093441156e-06,
|
|
"loss": 1.3195,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 0.3371688520203372,
|
|
"grad_norm": 4.152409076690674,
|
|
"learning_rate": 9.991730295168229e-06,
|
|
"loss": 1.3316,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.33743644634733744,
|
|
"grad_norm": 4.356308937072754,
|
|
"learning_rate": 9.991679340525048e-06,
|
|
"loss": 1.3085,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 0.3377040406743377,
|
|
"grad_norm": 3.8445913791656494,
|
|
"learning_rate": 9.991628229513212e-06,
|
|
"loss": 1.1096,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 0.337971635001338,
|
|
"grad_norm": 4.005192279815674,
|
|
"learning_rate": 9.991576962134317e-06,
|
|
"loss": 1.0536,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 0.3382392293283382,
|
|
"grad_norm": 4.3194355964660645,
|
|
"learning_rate": 9.991525538389971e-06,
|
|
"loss": 1.228,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 0.3385068236553385,
|
|
"grad_norm": 4.254610538482666,
|
|
"learning_rate": 9.991473958281787e-06,
|
|
"loss": 1.2584,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.33877441798233876,
|
|
"grad_norm": 4.035154819488525,
|
|
"learning_rate": 9.991422221811377e-06,
|
|
"loss": 1.2187,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 0.33904201230933906,
|
|
"grad_norm": 4.172974109649658,
|
|
"learning_rate": 9.991370328980365e-06,
|
|
"loss": 1.229,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 0.3393096066363393,
|
|
"grad_norm": 4.167996883392334,
|
|
"learning_rate": 9.991318279790376e-06,
|
|
"loss": 1.1717,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 0.3395772009633396,
|
|
"grad_norm": 3.9925200939178467,
|
|
"learning_rate": 9.991266074243038e-06,
|
|
"loss": 1.0795,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 0.33984479529033984,
|
|
"grad_norm": 4.081603050231934,
|
|
"learning_rate": 9.99121371233999e-06,
|
|
"loss": 1.2579,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.34011238961734014,
|
|
"grad_norm": 3.936547040939331,
|
|
"learning_rate": 9.991161194082868e-06,
|
|
"loss": 1.1121,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 0.3403799839443404,
|
|
"grad_norm": 3.9780871868133545,
|
|
"learning_rate": 9.991108519473321e-06,
|
|
"loss": 1.0613,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 0.3406475782713406,
|
|
"grad_norm": 4.46980619430542,
|
|
"learning_rate": 9.991055688512996e-06,
|
|
"loss": 1.3501,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 0.3409151725983409,
|
|
"grad_norm": 4.383254051208496,
|
|
"learning_rate": 9.991002701203552e-06,
|
|
"loss": 1.2561,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 0.34118276692534116,
|
|
"grad_norm": 3.765019178390503,
|
|
"learning_rate": 9.990949557546644e-06,
|
|
"loss": 1.1322,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.34145036125234146,
|
|
"grad_norm": 4.063820838928223,
|
|
"learning_rate": 9.99089625754394e-06,
|
|
"loss": 1.194,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 0.3417179555793417,
|
|
"grad_norm": 3.9652814865112305,
|
|
"learning_rate": 9.990842801197109e-06,
|
|
"loss": 1.2013,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 0.341985549906342,
|
|
"grad_norm": 3.9601919651031494,
|
|
"learning_rate": 9.990789188507827e-06,
|
|
"loss": 1.4019,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 0.34225314423334224,
|
|
"grad_norm": 3.9643354415893555,
|
|
"learning_rate": 9.990735419477771e-06,
|
|
"loss": 1.1947,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 0.34252073856034254,
|
|
"grad_norm": 4.578568458557129,
|
|
"learning_rate": 9.990681494108625e-06,
|
|
"loss": 1.2858,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.3427883328873428,
|
|
"grad_norm": 3.9634618759155273,
|
|
"learning_rate": 9.990627412402081e-06,
|
|
"loss": 1.0718,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 0.3430559272143431,
|
|
"grad_norm": 4.035841941833496,
|
|
"learning_rate": 9.990573174359831e-06,
|
|
"loss": 1.0878,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 0.3433235215413433,
|
|
"grad_norm": 3.8855788707733154,
|
|
"learning_rate": 9.990518779983575e-06,
|
|
"loss": 1.08,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 0.34359111586834357,
|
|
"grad_norm": 4.141454219818115,
|
|
"learning_rate": 9.990464229275017e-06,
|
|
"loss": 1.2422,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 0.34385871019534386,
|
|
"grad_norm": 4.580347537994385,
|
|
"learning_rate": 9.990409522235866e-06,
|
|
"loss": 1.2064,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.3441263045223441,
|
|
"grad_norm": 4.343654155731201,
|
|
"learning_rate": 9.990354658867833e-06,
|
|
"loss": 1.2125,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 0.3443938988493444,
|
|
"grad_norm": 4.314458847045898,
|
|
"learning_rate": 9.990299639172643e-06,
|
|
"loss": 1.2558,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 0.34466149317634465,
|
|
"grad_norm": 3.7052886486053467,
|
|
"learning_rate": 9.990244463152012e-06,
|
|
"loss": 1.0901,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 0.34492908750334494,
|
|
"grad_norm": 4.012930393218994,
|
|
"learning_rate": 9.990189130807672e-06,
|
|
"loss": 1.2457,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 0.3451966818303452,
|
|
"grad_norm": 4.098269462585449,
|
|
"learning_rate": 9.990133642141359e-06,
|
|
"loss": 1.1806,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.3454642761573455,
|
|
"grad_norm": 3.833272695541382,
|
|
"learning_rate": 9.990077997154807e-06,
|
|
"loss": 1.1566,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 0.3457318704843457,
|
|
"grad_norm": 4.549833297729492,
|
|
"learning_rate": 9.99002219584976e-06,
|
|
"loss": 1.3042,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 0.345999464811346,
|
|
"grad_norm": 3.901430368423462,
|
|
"learning_rate": 9.989966238227967e-06,
|
|
"loss": 1.1204,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 0.34626705913834627,
|
|
"grad_norm": 4.016772270202637,
|
|
"learning_rate": 9.989910124291182e-06,
|
|
"loss": 1.0479,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 0.3465346534653465,
|
|
"grad_norm": 3.785675048828125,
|
|
"learning_rate": 9.989853854041158e-06,
|
|
"loss": 1.0056,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.3468022477923468,
|
|
"grad_norm": 3.9900505542755127,
|
|
"learning_rate": 9.989797427479663e-06,
|
|
"loss": 1.1034,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 0.34706984211934705,
|
|
"grad_norm": 3.6167192459106445,
|
|
"learning_rate": 9.989740844608464e-06,
|
|
"loss": 1.1602,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 0.34733743644634735,
|
|
"grad_norm": 4.165998458862305,
|
|
"learning_rate": 9.989684105429332e-06,
|
|
"loss": 1.2538,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 0.3476050307733476,
|
|
"grad_norm": 3.6239192485809326,
|
|
"learning_rate": 9.989627209944044e-06,
|
|
"loss": 1.1523,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 0.3478726251003479,
|
|
"grad_norm": 3.9420888423919678,
|
|
"learning_rate": 9.989570158154383e-06,
|
|
"loss": 1.1796,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.34814021942734813,
|
|
"grad_norm": 3.861833333969116,
|
|
"learning_rate": 9.989512950062135e-06,
|
|
"loss": 1.1694,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 0.3484078137543484,
|
|
"grad_norm": 4.555881023406982,
|
|
"learning_rate": 9.989455585669093e-06,
|
|
"loss": 1.1641,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 0.34867540808134867,
|
|
"grad_norm": 3.9727768898010254,
|
|
"learning_rate": 9.989398064977057e-06,
|
|
"loss": 1.2632,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 0.34894300240834897,
|
|
"grad_norm": 4.150755882263184,
|
|
"learning_rate": 9.989340387987823e-06,
|
|
"loss": 1.288,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 0.3492105967353492,
|
|
"grad_norm": 4.13301944732666,
|
|
"learning_rate": 9.989282554703202e-06,
|
|
"loss": 1.3014,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.34947819106234945,
|
|
"grad_norm": 4.18637752532959,
|
|
"learning_rate": 9.989224565125003e-06,
|
|
"loss": 1.2925,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 0.34974578538934975,
|
|
"grad_norm": 4.216982841491699,
|
|
"learning_rate": 9.989166419255047e-06,
|
|
"loss": 1.2506,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 0.35001337971635,
|
|
"grad_norm": 4.059083938598633,
|
|
"learning_rate": 9.989108117095152e-06,
|
|
"loss": 1.3471,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 0.3502809740433503,
|
|
"grad_norm": 4.72033166885376,
|
|
"learning_rate": 9.989049658647146e-06,
|
|
"loss": 1.273,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 0.35054856837035053,
|
|
"grad_norm": 3.916358232498169,
|
|
"learning_rate": 9.988991043912857e-06,
|
|
"loss": 1.2104,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.35081616269735083,
|
|
"grad_norm": 4.03465461730957,
|
|
"learning_rate": 9.988932272894123e-06,
|
|
"loss": 1.2057,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 0.3510837570243511,
|
|
"grad_norm": 3.646699905395508,
|
|
"learning_rate": 9.988873345592786e-06,
|
|
"loss": 1.1352,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 0.35135135135135137,
|
|
"grad_norm": 3.854741334915161,
|
|
"learning_rate": 9.988814262010692e-06,
|
|
"loss": 1.1613,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 0.3516189456783516,
|
|
"grad_norm": 4.387909889221191,
|
|
"learning_rate": 9.988755022149692e-06,
|
|
"loss": 1.3018,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 0.3518865400053519,
|
|
"grad_norm": 3.9308204650878906,
|
|
"learning_rate": 9.988695626011639e-06,
|
|
"loss": 1.045,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.35215413433235215,
|
|
"grad_norm": 4.186442852020264,
|
|
"learning_rate": 9.988636073598396e-06,
|
|
"loss": 1.1992,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 0.3524217286593524,
|
|
"grad_norm": 3.7117176055908203,
|
|
"learning_rate": 9.98857636491183e-06,
|
|
"loss": 1.1623,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 0.3526893229863527,
|
|
"grad_norm": 3.9418506622314453,
|
|
"learning_rate": 9.988516499953807e-06,
|
|
"loss": 1.1666,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 0.35295691731335294,
|
|
"grad_norm": 4.194133758544922,
|
|
"learning_rate": 9.988456478726207e-06,
|
|
"loss": 1.3279,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 0.35322451164035323,
|
|
"grad_norm": 3.8580989837646484,
|
|
"learning_rate": 9.988396301230908e-06,
|
|
"loss": 1.2154,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.3534921059673535,
|
|
"grad_norm": 4.107762813568115,
|
|
"learning_rate": 9.988335967469794e-06,
|
|
"loss": 1.3117,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 0.3537597002943538,
|
|
"grad_norm": 3.7596476078033447,
|
|
"learning_rate": 9.988275477444756e-06,
|
|
"loss": 1.1496,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 0.354027294621354,
|
|
"grad_norm": 4.2492289543151855,
|
|
"learning_rate": 9.98821483115769e-06,
|
|
"loss": 1.1693,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 0.3542948889483543,
|
|
"grad_norm": 3.3397791385650635,
|
|
"learning_rate": 9.988154028610496e-06,
|
|
"loss": 0.9744,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 0.35456248327535456,
|
|
"grad_norm": 3.7433207035064697,
|
|
"learning_rate": 9.988093069805074e-06,
|
|
"loss": 1.1848,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.35483007760235485,
|
|
"grad_norm": 3.7859861850738525,
|
|
"learning_rate": 9.98803195474334e-06,
|
|
"loss": 1.1676,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 0.3550976719293551,
|
|
"grad_norm": 4.002213954925537,
|
|
"learning_rate": 9.987970683427205e-06,
|
|
"loss": 1.1791,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 0.35536526625635534,
|
|
"grad_norm": 4.300050258636475,
|
|
"learning_rate": 9.987909255858588e-06,
|
|
"loss": 1.1968,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 0.35563286058335564,
|
|
"grad_norm": 3.8620917797088623,
|
|
"learning_rate": 9.987847672039416e-06,
|
|
"loss": 1.101,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 0.3559004549103559,
|
|
"grad_norm": 4.3346757888793945,
|
|
"learning_rate": 9.987785931971616e-06,
|
|
"loss": 1.105,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.3561680492373562,
|
|
"grad_norm": 3.949228048324585,
|
|
"learning_rate": 9.987724035657122e-06,
|
|
"loss": 1.225,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 0.3564356435643564,
|
|
"grad_norm": 4.213968276977539,
|
|
"learning_rate": 9.987661983097875e-06,
|
|
"loss": 1.3405,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 0.3567032378913567,
|
|
"grad_norm": 3.694033622741699,
|
|
"learning_rate": 9.987599774295815e-06,
|
|
"loss": 1.1712,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 0.35697083221835696,
|
|
"grad_norm": 3.9766345024108887,
|
|
"learning_rate": 9.987537409252895e-06,
|
|
"loss": 1.1874,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 0.35723842654535726,
|
|
"grad_norm": 3.906832456588745,
|
|
"learning_rate": 9.987474887971067e-06,
|
|
"loss": 1.1985,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.3575060208723575,
|
|
"grad_norm": 3.593148708343506,
|
|
"learning_rate": 9.987412210452288e-06,
|
|
"loss": 1.0753,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 0.3577736151993578,
|
|
"grad_norm": 4.227734088897705,
|
|
"learning_rate": 9.987349376698522e-06,
|
|
"loss": 1.2267,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 0.35804120952635804,
|
|
"grad_norm": 3.786079168319702,
|
|
"learning_rate": 9.98728638671174e-06,
|
|
"loss": 1.113,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 0.3583088038533583,
|
|
"grad_norm": 4.259524345397949,
|
|
"learning_rate": 9.987223240493912e-06,
|
|
"loss": 1.233,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 0.3585763981803586,
|
|
"grad_norm": 4.269252777099609,
|
|
"learning_rate": 9.987159938047018e-06,
|
|
"loss": 1.328,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.3588439925073588,
|
|
"grad_norm": 3.9735991954803467,
|
|
"learning_rate": 9.98709647937304e-06,
|
|
"loss": 1.2262,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 0.3591115868343591,
|
|
"grad_norm": 3.9140255451202393,
|
|
"learning_rate": 9.987032864473966e-06,
|
|
"loss": 1.1495,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 0.35937918116135936,
|
|
"grad_norm": 3.9749045372009277,
|
|
"learning_rate": 9.986969093351789e-06,
|
|
"loss": 1.071,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 0.35964677548835966,
|
|
"grad_norm": 4.553966522216797,
|
|
"learning_rate": 9.986905166008506e-06,
|
|
"loss": 1.2779,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 0.3599143698153599,
|
|
"grad_norm": 4.131070613861084,
|
|
"learning_rate": 9.98684108244612e-06,
|
|
"loss": 1.315,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.3601819641423602,
|
|
"grad_norm": 3.922656297683716,
|
|
"learning_rate": 9.986776842666641e-06,
|
|
"loss": 1.1049,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 0.36044955846936044,
|
|
"grad_norm": 4.215112209320068,
|
|
"learning_rate": 9.98671244667208e-06,
|
|
"loss": 1.1876,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 0.36071715279636074,
|
|
"grad_norm": 3.91481614112854,
|
|
"learning_rate": 9.986647894464452e-06,
|
|
"loss": 1.1058,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 0.360984747123361,
|
|
"grad_norm": 4.0664777755737305,
|
|
"learning_rate": 9.98658318604578e-06,
|
|
"loss": 1.205,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 0.3612523414503612,
|
|
"grad_norm": 4.033042907714844,
|
|
"learning_rate": 9.986518321418091e-06,
|
|
"loss": 1.2229,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.3615199357773615,
|
|
"grad_norm": 4.329224109649658,
|
|
"learning_rate": 9.986453300583419e-06,
|
|
"loss": 1.1991,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 0.36178753010436177,
|
|
"grad_norm": 4.066847324371338,
|
|
"learning_rate": 9.986388123543798e-06,
|
|
"loss": 1.0257,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 0.36205512443136206,
|
|
"grad_norm": 4.082132816314697,
|
|
"learning_rate": 9.986322790301272e-06,
|
|
"loss": 1.3073,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 0.3623227187583623,
|
|
"grad_norm": 4.0690765380859375,
|
|
"learning_rate": 9.986257300857885e-06,
|
|
"loss": 1.3173,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 0.3625903130853626,
|
|
"grad_norm": 3.81072735786438,
|
|
"learning_rate": 9.986191655215692e-06,
|
|
"loss": 1.1571,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.36285790741236285,
|
|
"grad_norm": 4.073006629943848,
|
|
"learning_rate": 9.986125853376747e-06,
|
|
"loss": 1.2328,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 0.36312550173936314,
|
|
"grad_norm": 4.015659332275391,
|
|
"learning_rate": 9.986059895343113e-06,
|
|
"loss": 1.1722,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 0.3633930960663634,
|
|
"grad_norm": 4.220362663269043,
|
|
"learning_rate": 9.985993781116853e-06,
|
|
"loss": 1.2666,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 0.3636606903933637,
|
|
"grad_norm": 4.876560688018799,
|
|
"learning_rate": 9.985927510700043e-06,
|
|
"loss": 1.3855,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 0.3639282847203639,
|
|
"grad_norm": 3.7819228172302246,
|
|
"learning_rate": 9.985861084094754e-06,
|
|
"loss": 1.2191,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.36419587904736417,
|
|
"grad_norm": 5.581944465637207,
|
|
"learning_rate": 9.98579450130307e-06,
|
|
"loss": 1.3441,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 0.36446347337436447,
|
|
"grad_norm": 4.042576789855957,
|
|
"learning_rate": 9.985727762327075e-06,
|
|
"loss": 1.23,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 0.3647310677013647,
|
|
"grad_norm": 3.75724720954895,
|
|
"learning_rate": 9.985660867168862e-06,
|
|
"loss": 1.1289,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 0.364998662028365,
|
|
"grad_norm": 3.9243931770324707,
|
|
"learning_rate": 9.985593815830524e-06,
|
|
"loss": 1.1703,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 0.36526625635536525,
|
|
"grad_norm": 3.6307761669158936,
|
|
"learning_rate": 9.985526608314162e-06,
|
|
"loss": 1.1219,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.36553385068236555,
|
|
"grad_norm": 4.060052394866943,
|
|
"learning_rate": 9.985459244621883e-06,
|
|
"loss": 1.2764,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 0.3658014450093658,
|
|
"grad_norm": 4.747690200805664,
|
|
"learning_rate": 9.985391724755796e-06,
|
|
"loss": 1.2811,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 0.3660690393363661,
|
|
"grad_norm": 3.579979658126831,
|
|
"learning_rate": 9.985324048718014e-06,
|
|
"loss": 1.1203,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 0.36633663366336633,
|
|
"grad_norm": 3.809176206588745,
|
|
"learning_rate": 9.985256216510661e-06,
|
|
"loss": 1.1502,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 0.3666042279903666,
|
|
"grad_norm": 3.7964789867401123,
|
|
"learning_rate": 9.98518822813586e-06,
|
|
"loss": 1.0763,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.36687182231736687,
|
|
"grad_norm": 3.559234619140625,
|
|
"learning_rate": 9.985120083595742e-06,
|
|
"loss": 1.2019,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 0.36713941664436717,
|
|
"grad_norm": 4.064184665679932,
|
|
"learning_rate": 9.985051782892439e-06,
|
|
"loss": 1.3257,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 0.3674070109713674,
|
|
"grad_norm": 3.9166617393493652,
|
|
"learning_rate": 9.984983326028093e-06,
|
|
"loss": 1.0911,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 0.36767460529836765,
|
|
"grad_norm": 3.9536726474761963,
|
|
"learning_rate": 9.984914713004847e-06,
|
|
"loss": 1.1804,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 0.36794219962536795,
|
|
"grad_norm": 4.240631103515625,
|
|
"learning_rate": 9.98484594382485e-06,
|
|
"loss": 1.2855,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.3682097939523682,
|
|
"grad_norm": 3.7650909423828125,
|
|
"learning_rate": 9.984777018490258e-06,
|
|
"loss": 1.1098,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 0.3684773882793685,
|
|
"grad_norm": 3.4372477531433105,
|
|
"learning_rate": 9.98470793700323e-06,
|
|
"loss": 1.1356,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 0.36874498260636873,
|
|
"grad_norm": 3.8635547161102295,
|
|
"learning_rate": 9.984638699365928e-06,
|
|
"loss": 1.0865,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 0.36901257693336903,
|
|
"grad_norm": 4.2199554443359375,
|
|
"learning_rate": 9.984569305580523e-06,
|
|
"loss": 1.1303,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 0.36928017126036927,
|
|
"grad_norm": 4.431107044219971,
|
|
"learning_rate": 9.984499755649188e-06,
|
|
"loss": 1.3348,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.36954776558736957,
|
|
"grad_norm": 3.5743043422698975,
|
|
"learning_rate": 9.984430049574103e-06,
|
|
"loss": 1.1717,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 0.3698153599143698,
|
|
"grad_norm": 3.696826934814453,
|
|
"learning_rate": 9.98436018735745e-06,
|
|
"loss": 1.1214,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 0.3700829542413701,
|
|
"grad_norm": 4.062804222106934,
|
|
"learning_rate": 9.984290169001418e-06,
|
|
"loss": 1.182,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 0.37035054856837035,
|
|
"grad_norm": 3.5979297161102295,
|
|
"learning_rate": 9.984219994508199e-06,
|
|
"loss": 1.0661,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 0.3706181428953706,
|
|
"grad_norm": 3.755028247833252,
|
|
"learning_rate": 9.984149663879994e-06,
|
|
"loss": 1.1072,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.3708857372223709,
|
|
"grad_norm": 3.7246670722961426,
|
|
"learning_rate": 9.984079177119003e-06,
|
|
"loss": 1.1284,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 0.37115333154937113,
|
|
"grad_norm": 3.9325149059295654,
|
|
"learning_rate": 9.984008534227439e-06,
|
|
"loss": 1.2296,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 0.37142092587637143,
|
|
"grad_norm": 4.277532577514648,
|
|
"learning_rate": 9.983937735207509e-06,
|
|
"loss": 1.2242,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 0.3716885202033717,
|
|
"grad_norm": 3.9127614498138428,
|
|
"learning_rate": 9.983866780061435e-06,
|
|
"loss": 1.1925,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 0.371956114530372,
|
|
"grad_norm": 3.858065366744995,
|
|
"learning_rate": 9.983795668791435e-06,
|
|
"loss": 1.1462,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.3722237088573722,
|
|
"grad_norm": 3.7166876792907715,
|
|
"learning_rate": 9.983724401399745e-06,
|
|
"loss": 1.1978,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 0.3724913031843725,
|
|
"grad_norm": 4.299930572509766,
|
|
"learning_rate": 9.98365297788859e-06,
|
|
"loss": 1.1856,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 0.37275889751137276,
|
|
"grad_norm": 3.803140640258789,
|
|
"learning_rate": 9.983581398260211e-06,
|
|
"loss": 1.1731,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 0.37302649183837305,
|
|
"grad_norm": 3.9758286476135254,
|
|
"learning_rate": 9.983509662516848e-06,
|
|
"loss": 1.2402,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 0.3732940861653733,
|
|
"grad_norm": 3.708829402923584,
|
|
"learning_rate": 9.98343777066075e-06,
|
|
"loss": 1.1367,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.37356168049237354,
|
|
"grad_norm": 3.941568613052368,
|
|
"learning_rate": 9.983365722694166e-06,
|
|
"loss": 1.187,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 0.37382927481937384,
|
|
"grad_norm": 3.5624454021453857,
|
|
"learning_rate": 9.983293518619358e-06,
|
|
"loss": 1.0969,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 0.3740968691463741,
|
|
"grad_norm": 4.011288642883301,
|
|
"learning_rate": 9.983221158438585e-06,
|
|
"loss": 1.1643,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 0.3743644634733744,
|
|
"grad_norm": 5.278192520141602,
|
|
"learning_rate": 9.983148642154114e-06,
|
|
"loss": 1.1881,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 0.3746320578003746,
|
|
"grad_norm": 3.4236741065979004,
|
|
"learning_rate": 9.983075969768217e-06,
|
|
"loss": 1.1508,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3748996521273749,
|
|
"grad_norm": 3.687683582305908,
|
|
"learning_rate": 9.98300314128317e-06,
|
|
"loss": 1.1077,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 0.37516724645437516,
|
|
"grad_norm": 3.9771618843078613,
|
|
"learning_rate": 9.982930156701254e-06,
|
|
"loss": 1.1796,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 0.37543484078137546,
|
|
"grad_norm": 3.828674554824829,
|
|
"learning_rate": 9.982857016024757e-06,
|
|
"loss": 1.2694,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 0.3757024351083757,
|
|
"grad_norm": 3.8206582069396973,
|
|
"learning_rate": 9.982783719255968e-06,
|
|
"loss": 1.1139,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 0.375970029435376,
|
|
"grad_norm": 3.9748029708862305,
|
|
"learning_rate": 9.982710266397184e-06,
|
|
"loss": 1.1027,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.37623762376237624,
|
|
"grad_norm": 3.5067262649536133,
|
|
"learning_rate": 9.982636657450706e-06,
|
|
"loss": 1.1222,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 0.3765052180893765,
|
|
"grad_norm": 4.0315093994140625,
|
|
"learning_rate": 9.98256289241884e-06,
|
|
"loss": 1.3018,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 0.3767728124163768,
|
|
"grad_norm": 3.5691301822662354,
|
|
"learning_rate": 9.982488971303899e-06,
|
|
"loss": 1.0404,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 0.377040406743377,
|
|
"grad_norm": 4.119424819946289,
|
|
"learning_rate": 9.982414894108194e-06,
|
|
"loss": 1.234,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 0.3773080010703773,
|
|
"grad_norm": 3.957841157913208,
|
|
"learning_rate": 9.982340660834049e-06,
|
|
"loss": 1.4368,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.37757559539737756,
|
|
"grad_norm": 3.7822635173797607,
|
|
"learning_rate": 9.982266271483787e-06,
|
|
"loss": 1.1002,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 0.37784318972437786,
|
|
"grad_norm": 3.610924005508423,
|
|
"learning_rate": 9.982191726059742e-06,
|
|
"loss": 1.1034,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 0.3781107840513781,
|
|
"grad_norm": 3.8048479557037354,
|
|
"learning_rate": 9.982117024564244e-06,
|
|
"loss": 1.1641,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 0.3783783783783784,
|
|
"grad_norm": 3.299978256225586,
|
|
"learning_rate": 9.982042166999639e-06,
|
|
"loss": 1.0626,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 0.37864597270537864,
|
|
"grad_norm": 4.0182061195373535,
|
|
"learning_rate": 9.981967153368266e-06,
|
|
"loss": 1.2374,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.37891356703237894,
|
|
"grad_norm": 3.6433804035186768,
|
|
"learning_rate": 9.981891983672481e-06,
|
|
"loss": 1.0366,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 0.3791811613593792,
|
|
"grad_norm": 3.748567581176758,
|
|
"learning_rate": 9.981816657914633e-06,
|
|
"loss": 1.0423,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 0.3794487556863794,
|
|
"grad_norm": 3.7218706607818604,
|
|
"learning_rate": 9.981741176097084e-06,
|
|
"loss": 1.0621,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 0.3797163500133797,
|
|
"grad_norm": 4.026986122131348,
|
|
"learning_rate": 9.981665538222201e-06,
|
|
"loss": 1.1626,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 0.37998394434037996,
|
|
"grad_norm": 4.299746513366699,
|
|
"learning_rate": 9.98158974429235e-06,
|
|
"loss": 1.2972,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.38025153866738026,
|
|
"grad_norm": 3.5890026092529297,
|
|
"learning_rate": 9.981513794309905e-06,
|
|
"loss": 1.1985,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 0.3805191329943805,
|
|
"grad_norm": 3.7244949340820312,
|
|
"learning_rate": 9.981437688277248e-06,
|
|
"loss": 1.0564,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 0.3807867273213808,
|
|
"grad_norm": 4.018440246582031,
|
|
"learning_rate": 9.981361426196763e-06,
|
|
"loss": 1.1399,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 0.38105432164838104,
|
|
"grad_norm": 4.051723003387451,
|
|
"learning_rate": 9.981285008070836e-06,
|
|
"loss": 1.3185,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 0.38132191597538134,
|
|
"grad_norm": 3.7371041774749756,
|
|
"learning_rate": 9.981208433901864e-06,
|
|
"loss": 1.1645,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.3815895103023816,
|
|
"grad_norm": 3.600698709487915,
|
|
"learning_rate": 9.981131703692241e-06,
|
|
"loss": 1.1944,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 0.3818571046293819,
|
|
"grad_norm": 3.748783826828003,
|
|
"learning_rate": 9.981054817444378e-06,
|
|
"loss": 1.1702,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 0.3821246989563821,
|
|
"grad_norm": 4.829683303833008,
|
|
"learning_rate": 9.980977775160676e-06,
|
|
"loss": 1.1126,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 0.38239229328338237,
|
|
"grad_norm": 3.9870803356170654,
|
|
"learning_rate": 9.980900576843555e-06,
|
|
"loss": 1.1593,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 0.38265988761038267,
|
|
"grad_norm": 4.103924751281738,
|
|
"learning_rate": 9.980823222495429e-06,
|
|
"loss": 1.3044,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.3829274819373829,
|
|
"grad_norm": 3.9906015396118164,
|
|
"learning_rate": 9.980745712118722e-06,
|
|
"loss": 1.1956,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 0.3831950762643832,
|
|
"grad_norm": 3.7663869857788086,
|
|
"learning_rate": 9.980668045715864e-06,
|
|
"loss": 1.1876,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 0.38346267059138345,
|
|
"grad_norm": 3.752241373062134,
|
|
"learning_rate": 9.980590223289284e-06,
|
|
"loss": 1.2941,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 0.38373026491838375,
|
|
"grad_norm": 3.90246844291687,
|
|
"learning_rate": 9.980512244841424e-06,
|
|
"loss": 1.1741,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 0.383997859245384,
|
|
"grad_norm": 3.9306254386901855,
|
|
"learning_rate": 9.980434110374725e-06,
|
|
"loss": 1.2318,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.3842654535723843,
|
|
"grad_norm": 3.9048655033111572,
|
|
"learning_rate": 9.980355819891634e-06,
|
|
"loss": 1.1667,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 0.38453304789938453,
|
|
"grad_norm": 4.364803314208984,
|
|
"learning_rate": 9.980277373394604e-06,
|
|
"loss": 1.3952,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 0.3848006422263848,
|
|
"grad_norm": 4.139352798461914,
|
|
"learning_rate": 9.980198770886094e-06,
|
|
"loss": 1.1513,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 0.38506823655338507,
|
|
"grad_norm": 4.017728805541992,
|
|
"learning_rate": 9.980120012368564e-06,
|
|
"loss": 1.3192,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 0.3853358308803853,
|
|
"grad_norm": 4.370884418487549,
|
|
"learning_rate": 9.980041097844482e-06,
|
|
"loss": 1.2861,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.3856034252073856,
|
|
"grad_norm": 3.9100890159606934,
|
|
"learning_rate": 9.979962027316322e-06,
|
|
"loss": 1.1261,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 0.38587101953438585,
|
|
"grad_norm": 4.041008949279785,
|
|
"learning_rate": 9.979882800786556e-06,
|
|
"loss": 1.223,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 0.38613861386138615,
|
|
"grad_norm": 3.6667256355285645,
|
|
"learning_rate": 9.97980341825767e-06,
|
|
"loss": 1.217,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 0.3864062081883864,
|
|
"grad_norm": 3.8240602016448975,
|
|
"learning_rate": 9.979723879732151e-06,
|
|
"loss": 1.098,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 0.3866738025153867,
|
|
"grad_norm": 3.704866647720337,
|
|
"learning_rate": 9.979644185212489e-06,
|
|
"loss": 1.0308,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.38694139684238693,
|
|
"grad_norm": 3.873335123062134,
|
|
"learning_rate": 9.97956433470118e-06,
|
|
"loss": 1.1319,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 0.38720899116938723,
|
|
"grad_norm": 3.9278151988983154,
|
|
"learning_rate": 9.979484328200726e-06,
|
|
"loss": 1.1702,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 0.38747658549638747,
|
|
"grad_norm": 4.0510077476501465,
|
|
"learning_rate": 9.979404165713633e-06,
|
|
"loss": 1.2193,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 0.38774417982338777,
|
|
"grad_norm": 3.8461694717407227,
|
|
"learning_rate": 9.979323847242414e-06,
|
|
"loss": 1.2015,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 0.388011774150388,
|
|
"grad_norm": 3.905766010284424,
|
|
"learning_rate": 9.979243372789583e-06,
|
|
"loss": 1.1764,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.38827936847738825,
|
|
"grad_norm": 3.528315305709839,
|
|
"learning_rate": 9.979162742357661e-06,
|
|
"loss": 1.1254,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 0.38854696280438855,
|
|
"grad_norm": 3.523634433746338,
|
|
"learning_rate": 9.979081955949176e-06,
|
|
"loss": 1.1075,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 0.3888145571313888,
|
|
"grad_norm": 3.9340994358062744,
|
|
"learning_rate": 9.979001013566656e-06,
|
|
"loss": 1.1919,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 0.3890821514583891,
|
|
"grad_norm": 3.8037092685699463,
|
|
"learning_rate": 9.978919915212637e-06,
|
|
"loss": 1.2163,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 0.38934974578538933,
|
|
"grad_norm": 4.22614049911499,
|
|
"learning_rate": 9.978838660889662e-06,
|
|
"loss": 1.3132,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.38961734011238963,
|
|
"grad_norm": 3.9353504180908203,
|
|
"learning_rate": 9.978757250600273e-06,
|
|
"loss": 1.0995,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 0.3898849344393899,
|
|
"grad_norm": 3.8454160690307617,
|
|
"learning_rate": 9.978675684347022e-06,
|
|
"loss": 1.1743,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 0.39015252876639017,
|
|
"grad_norm": 3.7511603832244873,
|
|
"learning_rate": 9.978593962132464e-06,
|
|
"loss": 1.0399,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 0.3904201230933904,
|
|
"grad_norm": 3.736814022064209,
|
|
"learning_rate": 9.97851208395916e-06,
|
|
"loss": 1.1429,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 0.3906877174203907,
|
|
"grad_norm": 4.143425464630127,
|
|
"learning_rate": 9.978430049829672e-06,
|
|
"loss": 1.2349,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.39095531174739095,
|
|
"grad_norm": 3.73406720161438,
|
|
"learning_rate": 9.978347859746572e-06,
|
|
"loss": 1.1704,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 0.3912229060743912,
|
|
"grad_norm": 3.936199903488159,
|
|
"learning_rate": 9.978265513712435e-06,
|
|
"loss": 1.0558,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 0.3914905004013915,
|
|
"grad_norm": 3.9773452281951904,
|
|
"learning_rate": 9.97818301172984e-06,
|
|
"loss": 1.0348,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 0.39175809472839174,
|
|
"grad_norm": 4.385336875915527,
|
|
"learning_rate": 9.97810035380137e-06,
|
|
"loss": 1.3109,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 0.39202568905539203,
|
|
"grad_norm": 3.7281599044799805,
|
|
"learning_rate": 9.978017539929617e-06,
|
|
"loss": 1.0922,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.3922932833823923,
|
|
"grad_norm": 3.8303089141845703,
|
|
"learning_rate": 9.977934570117173e-06,
|
|
"loss": 1.2507,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 0.3925608777093926,
|
|
"grad_norm": 4.046004295349121,
|
|
"learning_rate": 9.97785144436664e-06,
|
|
"loss": 1.2282,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 0.3928284720363928,
|
|
"grad_norm": 3.8748390674591064,
|
|
"learning_rate": 9.977768162680616e-06,
|
|
"loss": 1.2089,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 0.3930960663633931,
|
|
"grad_norm": 3.917059898376465,
|
|
"learning_rate": 9.977684725061716e-06,
|
|
"loss": 1.3255,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 0.39336366069039336,
|
|
"grad_norm": 3.7279913425445557,
|
|
"learning_rate": 9.977601131512553e-06,
|
|
"loss": 1.326,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.39363125501739366,
|
|
"grad_norm": 3.7477800846099854,
|
|
"learning_rate": 9.977517382035743e-06,
|
|
"loss": 1.066,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 0.3938988493443939,
|
|
"grad_norm": 3.699467420578003,
|
|
"learning_rate": 9.97743347663391e-06,
|
|
"loss": 1.1433,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 0.39416644367139414,
|
|
"grad_norm": 4.19885778427124,
|
|
"learning_rate": 9.977349415309682e-06,
|
|
"loss": 1.2562,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 0.39443403799839444,
|
|
"grad_norm": 4.173817157745361,
|
|
"learning_rate": 9.977265198065696e-06,
|
|
"loss": 1.3787,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 0.3947016323253947,
|
|
"grad_norm": 3.8569087982177734,
|
|
"learning_rate": 9.977180824904586e-06,
|
|
"loss": 1.1537,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.394969226652395,
|
|
"grad_norm": 3.5903382301330566,
|
|
"learning_rate": 9.977096295828998e-06,
|
|
"loss": 1.082,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 0.3952368209793952,
|
|
"grad_norm": 4.043254375457764,
|
|
"learning_rate": 9.977011610841579e-06,
|
|
"loss": 1.2256,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 0.3955044153063955,
|
|
"grad_norm": 3.817080020904541,
|
|
"learning_rate": 9.97692676994498e-06,
|
|
"loss": 1.162,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 0.39577200963339576,
|
|
"grad_norm": 3.870049238204956,
|
|
"learning_rate": 9.976841773141862e-06,
|
|
"loss": 1.2824,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 0.39603960396039606,
|
|
"grad_norm": 3.870774984359741,
|
|
"learning_rate": 9.976756620434882e-06,
|
|
"loss": 1.0247,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.3963071982873963,
|
|
"grad_norm": 4.375112056732178,
|
|
"learning_rate": 9.976671311826714e-06,
|
|
"loss": 1.2235,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 0.3965747926143966,
|
|
"grad_norm": 3.724886894226074,
|
|
"learning_rate": 9.976585847320028e-06,
|
|
"loss": 1.1431,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 0.39684238694139684,
|
|
"grad_norm": 3.800994873046875,
|
|
"learning_rate": 9.9765002269175e-06,
|
|
"loss": 1.0945,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 0.3971099812683971,
|
|
"grad_norm": 4.027763843536377,
|
|
"learning_rate": 9.976414450621812e-06,
|
|
"loss": 1.0553,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 0.3973775755953974,
|
|
"grad_norm": 3.982628345489502,
|
|
"learning_rate": 9.976328518435654e-06,
|
|
"loss": 1.2203,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.3976451699223976,
|
|
"grad_norm": 3.9969089031219482,
|
|
"learning_rate": 9.976242430361714e-06,
|
|
"loss": 1.2567,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 0.3979127642493979,
|
|
"grad_norm": 3.8449573516845703,
|
|
"learning_rate": 9.976156186402691e-06,
|
|
"loss": 1.2397,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 0.39818035857639816,
|
|
"grad_norm": 4.029581069946289,
|
|
"learning_rate": 9.976069786561286e-06,
|
|
"loss": 1.2279,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 0.39844795290339846,
|
|
"grad_norm": 3.9746222496032715,
|
|
"learning_rate": 9.975983230840208e-06,
|
|
"loss": 1.3537,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 0.3987155472303987,
|
|
"grad_norm": 3.8316519260406494,
|
|
"learning_rate": 9.975896519242165e-06,
|
|
"loss": 1.103,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.398983141557399,
|
|
"grad_norm": 3.714109182357788,
|
|
"learning_rate": 9.975809651769874e-06,
|
|
"loss": 1.0821,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 0.39925073588439924,
|
|
"grad_norm": 3.9708900451660156,
|
|
"learning_rate": 9.97572262842606e-06,
|
|
"loss": 1.1406,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 0.39951833021139954,
|
|
"grad_norm": 3.9574031829833984,
|
|
"learning_rate": 9.975635449213443e-06,
|
|
"loss": 1.2042,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 0.3997859245383998,
|
|
"grad_norm": 3.648296594619751,
|
|
"learning_rate": 9.975548114134756e-06,
|
|
"loss": 1.1087,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 0.4000535188654,
|
|
"grad_norm": 3.9714877605438232,
|
|
"learning_rate": 9.975460623192738e-06,
|
|
"loss": 1.3016,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.4003211131924003,
|
|
"grad_norm": 4.270079612731934,
|
|
"learning_rate": 9.975372976390126e-06,
|
|
"loss": 1.2158,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 0.40058870751940057,
|
|
"grad_norm": 3.275508165359497,
|
|
"learning_rate": 9.975285173729668e-06,
|
|
"loss": 1.0631,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 0.40085630184640086,
|
|
"grad_norm": 3.839301824569702,
|
|
"learning_rate": 9.975197215214113e-06,
|
|
"loss": 1.2388,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 0.4011238961734011,
|
|
"grad_norm": 3.7420620918273926,
|
|
"learning_rate": 9.975109100846216e-06,
|
|
"loss": 1.1396,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 0.4013914905004014,
|
|
"grad_norm": 3.785066604614258,
|
|
"learning_rate": 9.975020830628741e-06,
|
|
"loss": 1.2032,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.4013914905004014,
|
|
"eval_loss": 1.2037502527236938,
|
|
"eval_runtime": 11.6819,
|
|
"eval_samples_per_second": 34.241,
|
|
"eval_steps_per_second": 4.28,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.40165908482740165,
|
|
"grad_norm": 3.9376888275146484,
|
|
"learning_rate": 9.974932404564448e-06,
|
|
"loss": 1.1378,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 0.40192667915440194,
|
|
"grad_norm": 4.073405742645264,
|
|
"learning_rate": 9.97484382265611e-06,
|
|
"loss": 1.1687,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 0.4021942734814022,
|
|
"grad_norm": 3.9197230339050293,
|
|
"learning_rate": 9.974755084906503e-06,
|
|
"loss": 1.164,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 0.4024618678084025,
|
|
"grad_norm": 4.129556655883789,
|
|
"learning_rate": 9.974666191318402e-06,
|
|
"loss": 1.2084,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 0.4027294621354027,
|
|
"grad_norm": 4.0111799240112305,
|
|
"learning_rate": 9.974577141894597e-06,
|
|
"loss": 1.2002,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.40299705646240297,
|
|
"grad_norm": 4.588496685028076,
|
|
"learning_rate": 9.974487936637873e-06,
|
|
"loss": 1.115,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 0.40326465078940327,
|
|
"grad_norm": 3.992095708847046,
|
|
"learning_rate": 9.974398575551029e-06,
|
|
"loss": 1.2977,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 0.4035322451164035,
|
|
"grad_norm": 4.14756965637207,
|
|
"learning_rate": 9.97430905863686e-06,
|
|
"loss": 1.2135,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 0.4037998394434038,
|
|
"grad_norm": 3.6382899284362793,
|
|
"learning_rate": 9.974219385898174e-06,
|
|
"loss": 1.1663,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 0.40406743377040405,
|
|
"grad_norm": 3.954108953475952,
|
|
"learning_rate": 9.974129557337777e-06,
|
|
"loss": 1.2709,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.40433502809740435,
|
|
"grad_norm": 3.5056028366088867,
|
|
"learning_rate": 9.974039572958486e-06,
|
|
"loss": 1.0011,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 0.4046026224244046,
|
|
"grad_norm": 3.9158694744110107,
|
|
"learning_rate": 9.973949432763117e-06,
|
|
"loss": 1.2319,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 0.4048702167514049,
|
|
"grad_norm": 3.6687309741973877,
|
|
"learning_rate": 9.973859136754495e-06,
|
|
"loss": 1.1885,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 0.40513781107840513,
|
|
"grad_norm": 4.025513172149658,
|
|
"learning_rate": 9.973768684935448e-06,
|
|
"loss": 1.1389,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 0.40540540540540543,
|
|
"grad_norm": 3.8120996952056885,
|
|
"learning_rate": 9.973678077308811e-06,
|
|
"loss": 1.1946,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.40567299973240567,
|
|
"grad_norm": 3.88718318939209,
|
|
"learning_rate": 9.97358731387742e-06,
|
|
"loss": 1.2046,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 0.40594059405940597,
|
|
"grad_norm": 4.027118682861328,
|
|
"learning_rate": 9.97349639464412e-06,
|
|
"loss": 1.2053,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 0.4062081883864062,
|
|
"grad_norm": 3.675534963607788,
|
|
"learning_rate": 9.973405319611757e-06,
|
|
"loss": 1.1274,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 0.40647578271340645,
|
|
"grad_norm": 3.914788007736206,
|
|
"learning_rate": 9.973314088783188e-06,
|
|
"loss": 1.2117,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 0.40674337704040675,
|
|
"grad_norm": 3.8196732997894287,
|
|
"learning_rate": 9.973222702161267e-06,
|
|
"loss": 1.1037,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.407010971367407,
|
|
"grad_norm": 3.492936611175537,
|
|
"learning_rate": 9.97313115974886e-06,
|
|
"loss": 1.1087,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 0.4072785656944073,
|
|
"grad_norm": 4.102333068847656,
|
|
"learning_rate": 9.97303946154883e-06,
|
|
"loss": 1.2887,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 0.40754616002140753,
|
|
"grad_norm": 3.951390027999878,
|
|
"learning_rate": 9.972947607564056e-06,
|
|
"loss": 1.2433,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 0.40781375434840783,
|
|
"grad_norm": 3.7122180461883545,
|
|
"learning_rate": 9.972855597797408e-06,
|
|
"loss": 1.0165,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 0.4080813486754081,
|
|
"grad_norm": 3.8031108379364014,
|
|
"learning_rate": 9.972763432251775e-06,
|
|
"loss": 1.1836,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.40834894300240837,
|
|
"grad_norm": 3.916783571243286,
|
|
"learning_rate": 9.972671110930041e-06,
|
|
"loss": 1.2287,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 0.4086165373294086,
|
|
"grad_norm": 4.449172019958496,
|
|
"learning_rate": 9.972578633835096e-06,
|
|
"loss": 1.212,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 0.4088841316564089,
|
|
"grad_norm": 3.9276909828186035,
|
|
"learning_rate": 9.972486000969842e-06,
|
|
"loss": 1.2655,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 0.40915172598340915,
|
|
"grad_norm": 4.05131196975708,
|
|
"learning_rate": 9.972393212337178e-06,
|
|
"loss": 1.2497,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 0.4094193203104094,
|
|
"grad_norm": 3.8336915969848633,
|
|
"learning_rate": 9.972300267940009e-06,
|
|
"loss": 1.2201,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.4096869146374097,
|
|
"grad_norm": 3.7255733013153076,
|
|
"learning_rate": 9.97220716778125e-06,
|
|
"loss": 1.2857,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 0.40995450896440994,
|
|
"grad_norm": 3.4183714389801025,
|
|
"learning_rate": 9.972113911863815e-06,
|
|
"loss": 1.0868,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 0.41022210329141023,
|
|
"grad_norm": 3.8064022064208984,
|
|
"learning_rate": 9.972020500190626e-06,
|
|
"loss": 1.2251,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 0.4104896976184105,
|
|
"grad_norm": 4.1284565925598145,
|
|
"learning_rate": 9.971926932764609e-06,
|
|
"loss": 1.246,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 0.4107572919454108,
|
|
"grad_norm": 4.064891338348389,
|
|
"learning_rate": 9.971833209588696e-06,
|
|
"loss": 1.1329,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.411024886272411,
|
|
"grad_norm": 3.891404151916504,
|
|
"learning_rate": 9.971739330665821e-06,
|
|
"loss": 1.2359,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 0.4112924805994113,
|
|
"grad_norm": 3.7822113037109375,
|
|
"learning_rate": 9.971645295998929e-06,
|
|
"loss": 1.243,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 0.41156007492641156,
|
|
"grad_norm": 3.785557746887207,
|
|
"learning_rate": 9.97155110559096e-06,
|
|
"loss": 1.2446,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 0.41182766925341185,
|
|
"grad_norm": 3.562366008758545,
|
|
"learning_rate": 9.971456759444869e-06,
|
|
"loss": 1.1905,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 0.4120952635804121,
|
|
"grad_norm": 3.9148495197296143,
|
|
"learning_rate": 9.971362257563609e-06,
|
|
"loss": 1.2355,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.41236285790741234,
|
|
"grad_norm": 4.079963684082031,
|
|
"learning_rate": 9.971267599950142e-06,
|
|
"loss": 1.2154,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 0.41263045223441264,
|
|
"grad_norm": 4.090665340423584,
|
|
"learning_rate": 9.971172786607433e-06,
|
|
"loss": 1.0667,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 0.4128980465614129,
|
|
"grad_norm": 3.9606683254241943,
|
|
"learning_rate": 9.97107781753845e-06,
|
|
"loss": 1.1426,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 0.4131656408884132,
|
|
"grad_norm": 3.7528765201568604,
|
|
"learning_rate": 9.970982692746171e-06,
|
|
"loss": 1.2156,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 0.4134332352154134,
|
|
"grad_norm": 3.5512781143188477,
|
|
"learning_rate": 9.970887412233574e-06,
|
|
"loss": 1.1394,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.4137008295424137,
|
|
"grad_norm": 3.8421177864074707,
|
|
"learning_rate": 9.970791976003644e-06,
|
|
"loss": 1.11,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 0.41396842386941396,
|
|
"grad_norm": 3.389683961868286,
|
|
"learning_rate": 9.97069638405937e-06,
|
|
"loss": 1.2226,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 0.41423601819641426,
|
|
"grad_norm": 4.224984645843506,
|
|
"learning_rate": 9.97060063640375e-06,
|
|
"loss": 1.1678,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 0.4145036125234145,
|
|
"grad_norm": 3.8544325828552246,
|
|
"learning_rate": 9.970504733039778e-06,
|
|
"loss": 1.1398,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 0.4147712068504148,
|
|
"grad_norm": 3.8598458766937256,
|
|
"learning_rate": 9.970408673970464e-06,
|
|
"loss": 1.1928,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.41503880117741504,
|
|
"grad_norm": 3.5871057510375977,
|
|
"learning_rate": 9.970312459198812e-06,
|
|
"loss": 1.0778,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 0.4153063955044153,
|
|
"grad_norm": 3.948990821838379,
|
|
"learning_rate": 9.970216088727838e-06,
|
|
"loss": 1.2415,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 0.4155739898314156,
|
|
"grad_norm": 3.909735679626465,
|
|
"learning_rate": 9.970119562560562e-06,
|
|
"loss": 1.1809,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 0.4158415841584158,
|
|
"grad_norm": 3.529320240020752,
|
|
"learning_rate": 9.970022880700006e-06,
|
|
"loss": 1.1612,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 0.4161091784854161,
|
|
"grad_norm": 3.7973170280456543,
|
|
"learning_rate": 9.9699260431492e-06,
|
|
"loss": 1.1705,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.41637677281241636,
|
|
"grad_norm": 3.872828483581543,
|
|
"learning_rate": 9.969829049911178e-06,
|
|
"loss": 1.1882,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 0.41664436713941666,
|
|
"grad_norm": 3.9552464485168457,
|
|
"learning_rate": 9.969731900988975e-06,
|
|
"loss": 1.1696,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 0.4169119614664169,
|
|
"grad_norm": 3.7764220237731934,
|
|
"learning_rate": 9.969634596385637e-06,
|
|
"loss": 1.2108,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 0.4171795557934172,
|
|
"grad_norm": 3.64782452583313,
|
|
"learning_rate": 9.969537136104213e-06,
|
|
"loss": 1.2066,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 0.41744715012041744,
|
|
"grad_norm": 3.8925864696502686,
|
|
"learning_rate": 9.969439520147754e-06,
|
|
"loss": 1.1694,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.41771474444741774,
|
|
"grad_norm": 3.523120880126953,
|
|
"learning_rate": 9.969341748519319e-06,
|
|
"loss": 1.1243,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 0.417982338774418,
|
|
"grad_norm": 3.78109073638916,
|
|
"learning_rate": 9.969243821221972e-06,
|
|
"loss": 1.0945,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 0.4182499331014182,
|
|
"grad_norm": 3.5155014991760254,
|
|
"learning_rate": 9.969145738258776e-06,
|
|
"loss": 1.0598,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 0.4185175274284185,
|
|
"grad_norm": 4.167483806610107,
|
|
"learning_rate": 9.969047499632808e-06,
|
|
"loss": 1.2385,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 0.41878512175541877,
|
|
"grad_norm": 3.761597156524658,
|
|
"learning_rate": 9.968949105347146e-06,
|
|
"loss": 1.2115,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.41905271608241906,
|
|
"grad_norm": 3.6628715991973877,
|
|
"learning_rate": 9.968850555404867e-06,
|
|
"loss": 1.084,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 0.4193203104094193,
|
|
"grad_norm": 3.8627333641052246,
|
|
"learning_rate": 9.968751849809063e-06,
|
|
"loss": 1.2907,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 0.4195879047364196,
|
|
"grad_norm": 4.0308518409729,
|
|
"learning_rate": 9.968652988562826e-06,
|
|
"loss": 1.2336,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 0.41985549906341985,
|
|
"grad_norm": 3.72426438331604,
|
|
"learning_rate": 9.96855397166925e-06,
|
|
"loss": 1.2231,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 0.42012309339042014,
|
|
"grad_norm": 3.9212002754211426,
|
|
"learning_rate": 9.968454799131439e-06,
|
|
"loss": 1.1774,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.4203906877174204,
|
|
"grad_norm": 3.7344274520874023,
|
|
"learning_rate": 9.968355470952498e-06,
|
|
"loss": 1.1205,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 0.4206582820444207,
|
|
"grad_norm": 3.6700868606567383,
|
|
"learning_rate": 9.96825598713554e-06,
|
|
"loss": 1.201,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 0.4209258763714209,
|
|
"grad_norm": 3.989650249481201,
|
|
"learning_rate": 9.968156347683682e-06,
|
|
"loss": 1.2025,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 0.42119347069842117,
|
|
"grad_norm": 3.851297616958618,
|
|
"learning_rate": 9.968056552600043e-06,
|
|
"loss": 1.2169,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 0.42146106502542147,
|
|
"grad_norm": 3.8957512378692627,
|
|
"learning_rate": 9.967956601887751e-06,
|
|
"loss": 1.3342,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.4217286593524217,
|
|
"grad_norm": 3.9603443145751953,
|
|
"learning_rate": 9.967856495549935e-06,
|
|
"loss": 1.2206,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 0.421996253679422,
|
|
"grad_norm": 3.6075241565704346,
|
|
"learning_rate": 9.967756233589734e-06,
|
|
"loss": 1.1752,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 0.42226384800642225,
|
|
"grad_norm": 3.690418243408203,
|
|
"learning_rate": 9.967655816010287e-06,
|
|
"loss": 1.2098,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 0.42253144233342255,
|
|
"grad_norm": 3.748853921890259,
|
|
"learning_rate": 9.967555242814738e-06,
|
|
"loss": 1.1701,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 0.4227990366604228,
|
|
"grad_norm": 3.668382406234741,
|
|
"learning_rate": 9.96745451400624e-06,
|
|
"loss": 1.1536,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.4230666309874231,
|
|
"grad_norm": 4.043965816497803,
|
|
"learning_rate": 9.967353629587948e-06,
|
|
"loss": 1.1062,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 0.42333422531442333,
|
|
"grad_norm": 3.860582113265991,
|
|
"learning_rate": 9.967252589563023e-06,
|
|
"loss": 1.202,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 0.4236018196414236,
|
|
"grad_norm": 3.919570207595825,
|
|
"learning_rate": 9.967151393934628e-06,
|
|
"loss": 1.0513,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 0.42386941396842387,
|
|
"grad_norm": 4.271496772766113,
|
|
"learning_rate": 9.967050042705934e-06,
|
|
"loss": 1.198,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 0.4241370082954241,
|
|
"grad_norm": 3.9681901931762695,
|
|
"learning_rate": 9.966948535880118e-06,
|
|
"loss": 1.0558,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.4244046026224244,
|
|
"grad_norm": 3.7213032245635986,
|
|
"learning_rate": 9.966846873460357e-06,
|
|
"loss": 1.1218,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 0.42467219694942465,
|
|
"grad_norm": 3.8692944049835205,
|
|
"learning_rate": 9.966745055449835e-06,
|
|
"loss": 1.1898,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 0.42493979127642495,
|
|
"grad_norm": 3.475710868835449,
|
|
"learning_rate": 9.966643081851746e-06,
|
|
"loss": 1.0925,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 0.4252073856034252,
|
|
"grad_norm": 3.985151767730713,
|
|
"learning_rate": 9.966540952669279e-06,
|
|
"loss": 1.1674,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 0.4254749799304255,
|
|
"grad_norm": 3.913224935531616,
|
|
"learning_rate": 9.966438667905637e-06,
|
|
"loss": 1.2583,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.42574257425742573,
|
|
"grad_norm": 4.151821613311768,
|
|
"learning_rate": 9.966336227564022e-06,
|
|
"loss": 1.1612,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 0.42601016858442603,
|
|
"grad_norm": 4.1624603271484375,
|
|
"learning_rate": 9.966233631647646e-06,
|
|
"loss": 1.2323,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 0.4262777629114263,
|
|
"grad_norm": 3.706627368927002,
|
|
"learning_rate": 9.96613088015972e-06,
|
|
"loss": 1.1456,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 0.42654535723842657,
|
|
"grad_norm": 3.398106336593628,
|
|
"learning_rate": 9.966027973103462e-06,
|
|
"loss": 1.0205,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 0.4268129515654268,
|
|
"grad_norm": 3.6161367893218994,
|
|
"learning_rate": 9.9659249104821e-06,
|
|
"loss": 1.1139,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.42708054589242705,
|
|
"grad_norm": 3.886651039123535,
|
|
"learning_rate": 9.965821692298858e-06,
|
|
"loss": 1.1711,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 0.42734814021942735,
|
|
"grad_norm": 4.018932342529297,
|
|
"learning_rate": 9.965718318556971e-06,
|
|
"loss": 1.3092,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 0.4276157345464276,
|
|
"grad_norm": 3.6425957679748535,
|
|
"learning_rate": 9.96561478925968e-06,
|
|
"loss": 1.1201,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 0.4278833288734279,
|
|
"grad_norm": 4.067368030548096,
|
|
"learning_rate": 9.965511104410224e-06,
|
|
"loss": 1.2909,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 0.42815092320042814,
|
|
"grad_norm": 3.597480297088623,
|
|
"learning_rate": 9.965407264011852e-06,
|
|
"loss": 1.264,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.42841851752742843,
|
|
"grad_norm": 3.685746669769287,
|
|
"learning_rate": 9.965303268067819e-06,
|
|
"loss": 1.2146,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 0.4286861118544287,
|
|
"grad_norm": 3.8286211490631104,
|
|
"learning_rate": 9.965199116581381e-06,
|
|
"loss": 1.1627,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 0.428953706181429,
|
|
"grad_norm": 3.950927495956421,
|
|
"learning_rate": 9.9650948095558e-06,
|
|
"loss": 1.1935,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 0.4292213005084292,
|
|
"grad_norm": 3.7244269847869873,
|
|
"learning_rate": 9.964990346994346e-06,
|
|
"loss": 1.1994,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 0.4294888948354295,
|
|
"grad_norm": 3.8138673305511475,
|
|
"learning_rate": 9.96488572890029e-06,
|
|
"loss": 1.0713,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.42975648916242976,
|
|
"grad_norm": 3.689394474029541,
|
|
"learning_rate": 9.964780955276909e-06,
|
|
"loss": 1.1475,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 0.43002408348943,
|
|
"grad_norm": 3.922783851623535,
|
|
"learning_rate": 9.964676026127484e-06,
|
|
"loss": 1.2439,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 0.4302916778164303,
|
|
"grad_norm": 4.035757541656494,
|
|
"learning_rate": 9.964570941455304e-06,
|
|
"loss": 1.2235,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 0.43055927214343054,
|
|
"grad_norm": 3.4453186988830566,
|
|
"learning_rate": 9.96446570126366e-06,
|
|
"loss": 1.0857,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 0.43082686647043084,
|
|
"grad_norm": 3.796252727508545,
|
|
"learning_rate": 9.96436030555585e-06,
|
|
"loss": 1.3118,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.4310944607974311,
|
|
"grad_norm": 3.9162800312042236,
|
|
"learning_rate": 9.964254754335172e-06,
|
|
"loss": 1.2191,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 0.4313620551244314,
|
|
"grad_norm": 3.400801658630371,
|
|
"learning_rate": 9.964149047604936e-06,
|
|
"loss": 1.1256,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 0.4316296494514316,
|
|
"grad_norm": 3.6633102893829346,
|
|
"learning_rate": 9.964043185368453e-06,
|
|
"loss": 1.0817,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 0.4318972437784319,
|
|
"grad_norm": 3.536027193069458,
|
|
"learning_rate": 9.963937167629039e-06,
|
|
"loss": 1.0987,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 0.43216483810543216,
|
|
"grad_norm": 3.694162368774414,
|
|
"learning_rate": 9.963830994390014e-06,
|
|
"loss": 1.2215,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.43243243243243246,
|
|
"grad_norm": 3.681429862976074,
|
|
"learning_rate": 9.963724665654704e-06,
|
|
"loss": 1.2003,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 0.4327000267594327,
|
|
"grad_norm": 3.9966611862182617,
|
|
"learning_rate": 9.963618181426443e-06,
|
|
"loss": 1.1236,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 0.43296762108643294,
|
|
"grad_norm": 3.875614643096924,
|
|
"learning_rate": 9.96351154170856e-06,
|
|
"loss": 1.1395,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 0.43323521541343324,
|
|
"grad_norm": 3.63798451423645,
|
|
"learning_rate": 9.963404746504403e-06,
|
|
"loss": 1.1578,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 0.4335028097404335,
|
|
"grad_norm": 3.4750473499298096,
|
|
"learning_rate": 9.963297795817312e-06,
|
|
"loss": 1.1385,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.4337704040674338,
|
|
"grad_norm": 3.4065208435058594,
|
|
"learning_rate": 9.963190689650642e-06,
|
|
"loss": 0.9694,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 0.434037998394434,
|
|
"grad_norm": 3.5707483291625977,
|
|
"learning_rate": 9.963083428007744e-06,
|
|
"loss": 1.1541,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 0.4343055927214343,
|
|
"grad_norm": 4.226485252380371,
|
|
"learning_rate": 9.96297601089198e-06,
|
|
"loss": 1.2521,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 0.43457318704843456,
|
|
"grad_norm": 3.8039398193359375,
|
|
"learning_rate": 9.962868438306714e-06,
|
|
"loss": 1.1212,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 0.43484078137543486,
|
|
"grad_norm": 3.8403160572052,
|
|
"learning_rate": 9.962760710255317e-06,
|
|
"loss": 1.1157,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.4351083757024351,
|
|
"grad_norm": 3.634899139404297,
|
|
"learning_rate": 9.962652826741164e-06,
|
|
"loss": 1.1387,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 0.4353759700294354,
|
|
"grad_norm": 3.621347188949585,
|
|
"learning_rate": 9.962544787767634e-06,
|
|
"loss": 1.1823,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 0.43564356435643564,
|
|
"grad_norm": 4.029088973999023,
|
|
"learning_rate": 9.962436593338109e-06,
|
|
"loss": 1.0639,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 0.4359111586834359,
|
|
"grad_norm": 3.6505720615386963,
|
|
"learning_rate": 9.962328243455983e-06,
|
|
"loss": 1.1589,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 0.4361787530104362,
|
|
"grad_norm": 3.7965028285980225,
|
|
"learning_rate": 9.962219738124645e-06,
|
|
"loss": 1.1716,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.4364463473374364,
|
|
"grad_norm": 3.631714105606079,
|
|
"learning_rate": 9.962111077347499e-06,
|
|
"loss": 1.1798,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 0.4367139416644367,
|
|
"grad_norm": 3.4737563133239746,
|
|
"learning_rate": 9.962002261127946e-06,
|
|
"loss": 1.224,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 0.43698153599143696,
|
|
"grad_norm": 4.074963092803955,
|
|
"learning_rate": 9.961893289469394e-06,
|
|
"loss": 1.2144,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 0.43724913031843726,
|
|
"grad_norm": 3.729600429534912,
|
|
"learning_rate": 9.961784162375258e-06,
|
|
"loss": 1.2326,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 0.4375167246454375,
|
|
"grad_norm": 3.4806137084960938,
|
|
"learning_rate": 9.961674879848957e-06,
|
|
"loss": 0.9848,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.4377843189724378,
|
|
"grad_norm": 3.5938944816589355,
|
|
"learning_rate": 9.961565441893914e-06,
|
|
"loss": 1.0944,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 0.43805191329943804,
|
|
"grad_norm": 3.7552433013916016,
|
|
"learning_rate": 9.961455848513557e-06,
|
|
"loss": 1.1817,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 0.43831950762643834,
|
|
"grad_norm": 3.6959292888641357,
|
|
"learning_rate": 9.961346099711319e-06,
|
|
"loss": 1.1635,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 0.4385871019534386,
|
|
"grad_norm": 4.031107425689697,
|
|
"learning_rate": 9.961236195490638e-06,
|
|
"loss": 1.3005,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 0.4388546962804388,
|
|
"grad_norm": 3.8287787437438965,
|
|
"learning_rate": 9.961126135854957e-06,
|
|
"loss": 1.0702,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.4391222906074391,
|
|
"grad_norm": 3.573241949081421,
|
|
"learning_rate": 9.961015920807722e-06,
|
|
"loss": 1.0902,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 0.43938988493443937,
|
|
"grad_norm": 3.861870050430298,
|
|
"learning_rate": 9.96090555035239e-06,
|
|
"loss": 1.1281,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 0.43965747926143967,
|
|
"grad_norm": 3.52722430229187,
|
|
"learning_rate": 9.960795024492413e-06,
|
|
"loss": 1.0831,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 0.4399250735884399,
|
|
"grad_norm": 3.898618221282959,
|
|
"learning_rate": 9.960684343231258e-06,
|
|
"loss": 1.1533,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 0.4401926679154402,
|
|
"grad_norm": 3.61409854888916,
|
|
"learning_rate": 9.960573506572391e-06,
|
|
"loss": 1.103,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.44046026224244045,
|
|
"grad_norm": 3.5603694915771484,
|
|
"learning_rate": 9.96046251451928e-06,
|
|
"loss": 1.0903,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 0.44072785656944075,
|
|
"grad_norm": 3.9450957775115967,
|
|
"learning_rate": 9.960351367075407e-06,
|
|
"loss": 1.316,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 0.440995450896441,
|
|
"grad_norm": 3.022848606109619,
|
|
"learning_rate": 9.960240064244253e-06,
|
|
"loss": 0.9156,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 0.4412630452234413,
|
|
"grad_norm": 3.8123509883880615,
|
|
"learning_rate": 9.960128606029302e-06,
|
|
"loss": 1.2591,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 0.44153063955044153,
|
|
"grad_norm": 3.739405870437622,
|
|
"learning_rate": 9.960016992434047e-06,
|
|
"loss": 1.1892,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.44179823387744177,
|
|
"grad_norm": 4.091071605682373,
|
|
"learning_rate": 9.959905223461985e-06,
|
|
"loss": 1.2138,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 0.44206582820444207,
|
|
"grad_norm": 3.7963550090789795,
|
|
"learning_rate": 9.959793299116617e-06,
|
|
"loss": 1.1637,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 0.4423334225314423,
|
|
"grad_norm": 3.7637977600097656,
|
|
"learning_rate": 9.959681219401449e-06,
|
|
"loss": 1.1168,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 0.4426010168584426,
|
|
"grad_norm": 3.74827241897583,
|
|
"learning_rate": 9.959568984319991e-06,
|
|
"loss": 1.1661,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 0.44286861118544285,
|
|
"grad_norm": 3.401951551437378,
|
|
"learning_rate": 9.95945659387576e-06,
|
|
"loss": 1.1605,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 0.44313620551244315,
|
|
"grad_norm": 3.677436590194702,
|
|
"learning_rate": 9.959344048072278e-06,
|
|
"loss": 1.2423,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 0.4434037998394434,
|
|
"grad_norm": 4.045171737670898,
|
|
"learning_rate": 9.959231346913068e-06,
|
|
"loss": 1.2646,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 0.4436713941664437,
|
|
"grad_norm": 4.872179985046387,
|
|
"learning_rate": 9.95911849040166e-06,
|
|
"loss": 1.2235,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 0.44393898849344393,
|
|
"grad_norm": 3.714308023452759,
|
|
"learning_rate": 9.959005478541592e-06,
|
|
"loss": 1.074,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 0.44420658282044423,
|
|
"grad_norm": 3.844395160675049,
|
|
"learning_rate": 9.958892311336404e-06,
|
|
"loss": 1.1599,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.44447417714744447,
|
|
"grad_norm": 3.9484751224517822,
|
|
"learning_rate": 9.958778988789639e-06,
|
|
"loss": 1.088,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 0.44474177147444477,
|
|
"grad_norm": 4.029232501983643,
|
|
"learning_rate": 9.958665510904849e-06,
|
|
"loss": 1.2989,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 0.445009365801445,
|
|
"grad_norm": 4.975223064422607,
|
|
"learning_rate": 9.958551877685586e-06,
|
|
"loss": 1.104,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 0.44527696012844525,
|
|
"grad_norm": 3.774137496948242,
|
|
"learning_rate": 9.958438089135413e-06,
|
|
"loss": 1.0331,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 0.44554455445544555,
|
|
"grad_norm": 3.653740644454956,
|
|
"learning_rate": 9.958324145257893e-06,
|
|
"loss": 1.1029,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 0.4458121487824458,
|
|
"grad_norm": 4.142993450164795,
|
|
"learning_rate": 9.958210046056596e-06,
|
|
"loss": 1.2692,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 0.4460797431094461,
|
|
"grad_norm": 4.0442609786987305,
|
|
"learning_rate": 9.958095791535095e-06,
|
|
"loss": 1.1862,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 0.44634733743644633,
|
|
"grad_norm": 3.8392021656036377,
|
|
"learning_rate": 9.957981381696971e-06,
|
|
"loss": 1.2256,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 0.44661493176344663,
|
|
"grad_norm": 3.4839348793029785,
|
|
"learning_rate": 9.957866816545804e-06,
|
|
"loss": 1.1202,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 0.4468825260904469,
|
|
"grad_norm": 3.9099719524383545,
|
|
"learning_rate": 9.957752096085187e-06,
|
|
"loss": 1.1985,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.44715012041744717,
|
|
"grad_norm": 3.7614877223968506,
|
|
"learning_rate": 9.957637220318711e-06,
|
|
"loss": 1.2736,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 0.4474177147444474,
|
|
"grad_norm": 4.555272579193115,
|
|
"learning_rate": 9.957522189249979e-06,
|
|
"loss": 1.1661,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 0.4476853090714477,
|
|
"grad_norm": 3.6910009384155273,
|
|
"learning_rate": 9.95740700288259e-06,
|
|
"loss": 1.3053,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 0.44795290339844795,
|
|
"grad_norm": 3.7056405544281006,
|
|
"learning_rate": 9.957291661220154e-06,
|
|
"loss": 1.1668,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 0.4482204977254482,
|
|
"grad_norm": 3.738818645477295,
|
|
"learning_rate": 9.957176164266283e-06,
|
|
"loss": 1.2925,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.4484880920524485,
|
|
"grad_norm": 3.5437395572662354,
|
|
"learning_rate": 9.957060512024595e-06,
|
|
"loss": 1.1237,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 0.44875568637944874,
|
|
"grad_norm": 3.508234977722168,
|
|
"learning_rate": 9.956944704498715e-06,
|
|
"loss": 1.1684,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 0.44902328070644904,
|
|
"grad_norm": 3.910888433456421,
|
|
"learning_rate": 9.95682874169227e-06,
|
|
"loss": 1.1898,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 0.4492908750334493,
|
|
"grad_norm": 3.8787474632263184,
|
|
"learning_rate": 9.956712623608892e-06,
|
|
"loss": 1.1573,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 0.4495584693604496,
|
|
"grad_norm": 3.672773838043213,
|
|
"learning_rate": 9.95659635025222e-06,
|
|
"loss": 1.042,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.4498260636874498,
|
|
"grad_norm": 3.811082601547241,
|
|
"learning_rate": 9.956479921625892e-06,
|
|
"loss": 1.2272,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 0.4500936580144501,
|
|
"grad_norm": 4.042838096618652,
|
|
"learning_rate": 9.95636333773356e-06,
|
|
"loss": 1.2527,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 0.45036125234145036,
|
|
"grad_norm": 3.418757915496826,
|
|
"learning_rate": 9.956246598578874e-06,
|
|
"loss": 1.1833,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 0.45062884666845066,
|
|
"grad_norm": 3.8703603744506836,
|
|
"learning_rate": 9.956129704165491e-06,
|
|
"loss": 1.2848,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 0.4508964409954509,
|
|
"grad_norm": 3.5439083576202393,
|
|
"learning_rate": 9.956012654497073e-06,
|
|
"loss": 1.0018,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 0.45116403532245114,
|
|
"grad_norm": 3.858811616897583,
|
|
"learning_rate": 9.955895449577289e-06,
|
|
"loss": 1.204,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 0.45143162964945144,
|
|
"grad_norm": 3.636284351348877,
|
|
"learning_rate": 9.955778089409806e-06,
|
|
"loss": 1.1952,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 0.4516992239764517,
|
|
"grad_norm": 3.6916327476501465,
|
|
"learning_rate": 9.955660573998305e-06,
|
|
"loss": 1.1277,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 0.451966818303452,
|
|
"grad_norm": 3.550443172454834,
|
|
"learning_rate": 9.955542903346462e-06,
|
|
"loss": 1.1669,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 0.4522344126304522,
|
|
"grad_norm": 3.5514254570007324,
|
|
"learning_rate": 9.95542507745797e-06,
|
|
"loss": 1.2038,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.4525020069574525,
|
|
"grad_norm": 3.6606192588806152,
|
|
"learning_rate": 9.955307096336513e-06,
|
|
"loss": 1.0902,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 0.45276960128445276,
|
|
"grad_norm": 3.676407814025879,
|
|
"learning_rate": 9.955188959985792e-06,
|
|
"loss": 1.1543,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 0.45303719561145306,
|
|
"grad_norm": 3.630408763885498,
|
|
"learning_rate": 9.955070668409505e-06,
|
|
"loss": 1.1552,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 0.4533047899384533,
|
|
"grad_norm": 3.992326259613037,
|
|
"learning_rate": 9.954952221611359e-06,
|
|
"loss": 1.2438,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 0.4535723842654536,
|
|
"grad_norm": 3.313997507095337,
|
|
"learning_rate": 9.954833619595062e-06,
|
|
"loss": 1.1001,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 0.45383997859245384,
|
|
"grad_norm": 3.4902310371398926,
|
|
"learning_rate": 9.954714862364331e-06,
|
|
"loss": 1.0505,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 0.4541075729194541,
|
|
"grad_norm": 3.9076476097106934,
|
|
"learning_rate": 9.954595949922889e-06,
|
|
"loss": 1.3215,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 0.4543751672464544,
|
|
"grad_norm": 4.445606708526611,
|
|
"learning_rate": 9.954476882274458e-06,
|
|
"loss": 1.2867,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 0.4546427615734546,
|
|
"grad_norm": 4.114322662353516,
|
|
"learning_rate": 9.954357659422766e-06,
|
|
"loss": 1.2867,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 0.4549103559004549,
|
|
"grad_norm": 4.046489238739014,
|
|
"learning_rate": 9.95423828137155e-06,
|
|
"loss": 1.3018,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.45517795022745516,
|
|
"grad_norm": 4.090691089630127,
|
|
"learning_rate": 9.954118748124552e-06,
|
|
"loss": 1.1618,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 0.45544554455445546,
|
|
"grad_norm": 3.791952133178711,
|
|
"learning_rate": 9.953999059685513e-06,
|
|
"loss": 1.2585,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 0.4557131388814557,
|
|
"grad_norm": 3.9405517578125,
|
|
"learning_rate": 9.953879216058185e-06,
|
|
"loss": 1.2347,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 0.455980733208456,
|
|
"grad_norm": 3.291191816329956,
|
|
"learning_rate": 9.953759217246318e-06,
|
|
"loss": 1.1054,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 0.45624832753545624,
|
|
"grad_norm": 3.942545175552368,
|
|
"learning_rate": 9.953639063253675e-06,
|
|
"loss": 1.1939,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 0.45651592186245654,
|
|
"grad_norm": 3.7756292819976807,
|
|
"learning_rate": 9.953518754084019e-06,
|
|
"loss": 1.248,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 0.4567835161894568,
|
|
"grad_norm": 3.8124051094055176,
|
|
"learning_rate": 9.953398289741116e-06,
|
|
"loss": 1.2876,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 0.457051110516457,
|
|
"grad_norm": 3.8491921424865723,
|
|
"learning_rate": 9.953277670228745e-06,
|
|
"loss": 1.1995,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 0.4573187048434573,
|
|
"grad_norm": 3.814667224884033,
|
|
"learning_rate": 9.95315689555068e-06,
|
|
"loss": 1.0789,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 0.45758629917045757,
|
|
"grad_norm": 3.2791123390197754,
|
|
"learning_rate": 9.953035965710707e-06,
|
|
"loss": 1.0521,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.45785389349745786,
|
|
"grad_norm": 3.559877395629883,
|
|
"learning_rate": 9.952914880712611e-06,
|
|
"loss": 1.0571,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 0.4581214878244581,
|
|
"grad_norm": 3.3893074989318848,
|
|
"learning_rate": 9.952793640560189e-06,
|
|
"loss": 1.0054,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 0.4583890821514584,
|
|
"grad_norm": 3.697608232498169,
|
|
"learning_rate": 9.952672245257238e-06,
|
|
"loss": 1.1619,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 0.45865667647845865,
|
|
"grad_norm": 3.5724332332611084,
|
|
"learning_rate": 9.95255069480756e-06,
|
|
"loss": 1.1086,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 0.45892427080545894,
|
|
"grad_norm": 4.316122055053711,
|
|
"learning_rate": 9.952428989214962e-06,
|
|
"loss": 1.3456,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 0.4591918651324592,
|
|
"grad_norm": 3.5813887119293213,
|
|
"learning_rate": 9.952307128483257e-06,
|
|
"loss": 1.2169,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 0.4594594594594595,
|
|
"grad_norm": 4.544564723968506,
|
|
"learning_rate": 9.952185112616263e-06,
|
|
"loss": 1.3719,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 0.4597270537864597,
|
|
"grad_norm": 3.653928518295288,
|
|
"learning_rate": 9.952062941617801e-06,
|
|
"loss": 1.1425,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 0.45999464811345997,
|
|
"grad_norm": 3.9661028385162354,
|
|
"learning_rate": 9.9519406154917e-06,
|
|
"loss": 1.2444,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 0.46026224244046027,
|
|
"grad_norm": 3.9497625827789307,
|
|
"learning_rate": 9.95181813424179e-06,
|
|
"loss": 1.1364,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.4605298367674605,
|
|
"grad_norm": 3.682626962661743,
|
|
"learning_rate": 9.95169549787191e-06,
|
|
"loss": 1.0874,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 0.4607974310944608,
|
|
"grad_norm": 4.0337233543396,
|
|
"learning_rate": 9.951572706385901e-06,
|
|
"loss": 1.2206,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 0.46106502542146105,
|
|
"grad_norm": 3.902106761932373,
|
|
"learning_rate": 9.951449759787608e-06,
|
|
"loss": 1.2841,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 0.46133261974846135,
|
|
"grad_norm": 3.6979806423187256,
|
|
"learning_rate": 9.951326658080881e-06,
|
|
"loss": 1.1272,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 0.4616002140754616,
|
|
"grad_norm": 3.5009329319000244,
|
|
"learning_rate": 9.951203401269582e-06,
|
|
"loss": 1.0615,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.4618678084024619,
|
|
"grad_norm": 3.846033811569214,
|
|
"learning_rate": 9.951079989357569e-06,
|
|
"loss": 1.267,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 0.46213540272946213,
|
|
"grad_norm": 3.7044687271118164,
|
|
"learning_rate": 9.950956422348708e-06,
|
|
"loss": 1.228,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 0.46240299705646243,
|
|
"grad_norm": 3.8444931507110596,
|
|
"learning_rate": 9.950832700246868e-06,
|
|
"loss": 1.1271,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 0.46267059138346267,
|
|
"grad_norm": 3.61540150642395,
|
|
"learning_rate": 9.950708823055926e-06,
|
|
"loss": 1.1431,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 0.4629381857104629,
|
|
"grad_norm": 3.617910623550415,
|
|
"learning_rate": 9.950584790779765e-06,
|
|
"loss": 1.1046,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.4632057800374632,
|
|
"grad_norm": 4.220783233642578,
|
|
"learning_rate": 9.950460603422266e-06,
|
|
"loss": 1.2734,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 0.46347337436446345,
|
|
"grad_norm": 3.5768558979034424,
|
|
"learning_rate": 9.950336260987323e-06,
|
|
"loss": 1.1693,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 0.46374096869146375,
|
|
"grad_norm": 3.8983094692230225,
|
|
"learning_rate": 9.950211763478829e-06,
|
|
"loss": 1.1813,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 0.464008563018464,
|
|
"grad_norm": 3.659959316253662,
|
|
"learning_rate": 9.950087110900686e-06,
|
|
"loss": 1.204,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 0.4642761573454643,
|
|
"grad_norm": 3.40910005569458,
|
|
"learning_rate": 9.949962303256796e-06,
|
|
"loss": 1.1507,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 0.46454375167246453,
|
|
"grad_norm": 3.770167112350464,
|
|
"learning_rate": 9.949837340551072e-06,
|
|
"loss": 1.121,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 0.46481134599946483,
|
|
"grad_norm": 3.7433769702911377,
|
|
"learning_rate": 9.949712222787426e-06,
|
|
"loss": 1.1993,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 0.4650789403264651,
|
|
"grad_norm": 3.9215848445892334,
|
|
"learning_rate": 9.94958694996978e-06,
|
|
"loss": 1.211,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 0.46534653465346537,
|
|
"grad_norm": 3.646552324295044,
|
|
"learning_rate": 9.949461522102056e-06,
|
|
"loss": 1.1609,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 0.4656141289804656,
|
|
"grad_norm": 3.452594757080078,
|
|
"learning_rate": 9.949335939188181e-06,
|
|
"loss": 1.0887,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.46588172330746586,
|
|
"grad_norm": 3.6262283325195312,
|
|
"learning_rate": 9.949210201232094e-06,
|
|
"loss": 1.2381,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 0.46614931763446615,
|
|
"grad_norm": 3.7934439182281494,
|
|
"learning_rate": 9.949084308237731e-06,
|
|
"loss": 1.2578,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 0.4664169119614664,
|
|
"grad_norm": 4.024771690368652,
|
|
"learning_rate": 9.948958260209036e-06,
|
|
"loss": 1.2287,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 0.4666845062884667,
|
|
"grad_norm": 4.1637115478515625,
|
|
"learning_rate": 9.948832057149958e-06,
|
|
"loss": 1.4259,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 0.46695210061546694,
|
|
"grad_norm": 3.3593788146972656,
|
|
"learning_rate": 9.948705699064452e-06,
|
|
"loss": 1.0906,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 0.46721969494246723,
|
|
"grad_norm": 4.606101989746094,
|
|
"learning_rate": 9.948579185956472e-06,
|
|
"loss": 1.3554,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 0.4674872892694675,
|
|
"grad_norm": 4.054565906524658,
|
|
"learning_rate": 9.948452517829984e-06,
|
|
"loss": 1.316,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 0.4677548835964678,
|
|
"grad_norm": 3.526566982269287,
|
|
"learning_rate": 9.948325694688957e-06,
|
|
"loss": 1.1971,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 0.468022477923468,
|
|
"grad_norm": 3.647592544555664,
|
|
"learning_rate": 9.948198716537361e-06,
|
|
"loss": 1.1751,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 0.4682900722504683,
|
|
"grad_norm": 4.159237384796143,
|
|
"learning_rate": 9.948071583379176e-06,
|
|
"loss": 1.1648,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.46855766657746856,
|
|
"grad_norm": 3.9114818572998047,
|
|
"learning_rate": 9.947944295218384e-06,
|
|
"loss": 1.2213,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 0.4688252609044688,
|
|
"grad_norm": 3.539741039276123,
|
|
"learning_rate": 9.947816852058972e-06,
|
|
"loss": 1.1406,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 0.4690928552314691,
|
|
"grad_norm": 3.950688600540161,
|
|
"learning_rate": 9.947689253904932e-06,
|
|
"loss": 1.1622,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 0.46936044955846934,
|
|
"grad_norm": 3.5240933895111084,
|
|
"learning_rate": 9.94756150076026e-06,
|
|
"loss": 1.1184,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 0.46962804388546964,
|
|
"grad_norm": 3.455580472946167,
|
|
"learning_rate": 9.947433592628964e-06,
|
|
"loss": 1.1482,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 0.4698956382124699,
|
|
"grad_norm": 3.8551056385040283,
|
|
"learning_rate": 9.947305529515041e-06,
|
|
"loss": 1.2818,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 0.4701632325394702,
|
|
"grad_norm": 3.4578497409820557,
|
|
"learning_rate": 9.947177311422513e-06,
|
|
"loss": 1.1184,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 0.4704308268664704,
|
|
"grad_norm": 3.092772960662842,
|
|
"learning_rate": 9.947048938355389e-06,
|
|
"loss": 1.1181,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 0.4706984211934707,
|
|
"grad_norm": 3.7363767623901367,
|
|
"learning_rate": 9.946920410317694e-06,
|
|
"loss": 1.1509,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 0.47096601552047096,
|
|
"grad_norm": 3.5836987495422363,
|
|
"learning_rate": 9.946791727313453e-06,
|
|
"loss": 1.2198,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.47123360984747126,
|
|
"grad_norm": 3.2804808616638184,
|
|
"learning_rate": 9.946662889346693e-06,
|
|
"loss": 1.2418,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 0.4715012041744715,
|
|
"grad_norm": 3.3668692111968994,
|
|
"learning_rate": 9.94653389642146e-06,
|
|
"loss": 1.0194,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 0.47176879850147174,
|
|
"grad_norm": 3.753690481185913,
|
|
"learning_rate": 9.946404748541787e-06,
|
|
"loss": 1.194,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 0.47203639282847204,
|
|
"grad_norm": 3.6186954975128174,
|
|
"learning_rate": 9.946275445711722e-06,
|
|
"loss": 1.2537,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 0.4723039871554723,
|
|
"grad_norm": 3.8216750621795654,
|
|
"learning_rate": 9.946145987935315e-06,
|
|
"loss": 1.0967,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 0.4725715814824726,
|
|
"grad_norm": 3.866121530532837,
|
|
"learning_rate": 9.946016375216624e-06,
|
|
"loss": 1.1481,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 0.4728391758094728,
|
|
"grad_norm": 3.7349894046783447,
|
|
"learning_rate": 9.945886607559703e-06,
|
|
"loss": 1.1937,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 0.4731067701364731,
|
|
"grad_norm": 3.244333505630493,
|
|
"learning_rate": 9.945756684968624e-06,
|
|
"loss": 1.062,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 0.47337436446347336,
|
|
"grad_norm": 3.341917037963867,
|
|
"learning_rate": 9.945626607447452e-06,
|
|
"loss": 1.1912,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 0.47364195879047366,
|
|
"grad_norm": 3.748084545135498,
|
|
"learning_rate": 9.945496375000265e-06,
|
|
"loss": 1.1499,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.4739095531174739,
|
|
"grad_norm": 4.144589900970459,
|
|
"learning_rate": 9.94536598763114e-06,
|
|
"loss": 1.3394,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 0.4741771474444742,
|
|
"grad_norm": 3.9339520931243896,
|
|
"learning_rate": 9.945235445344164e-06,
|
|
"loss": 1.319,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 0.47444474177147444,
|
|
"grad_norm": 3.8457014560699463,
|
|
"learning_rate": 9.945104748143426e-06,
|
|
"loss": 1.1954,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 0.4747123360984747,
|
|
"grad_norm": 3.6537280082702637,
|
|
"learning_rate": 9.944973896033017e-06,
|
|
"loss": 1.1368,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 0.474979930425475,
|
|
"grad_norm": 3.5252084732055664,
|
|
"learning_rate": 9.944842889017042e-06,
|
|
"loss": 1.1575,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.4752475247524752,
|
|
"grad_norm": 3.692296266555786,
|
|
"learning_rate": 9.944711727099597e-06,
|
|
"loss": 1.1172,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 0.4755151190794755,
|
|
"grad_norm": 3.7176449298858643,
|
|
"learning_rate": 9.944580410284799e-06,
|
|
"loss": 1.3006,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 0.47578271340647577,
|
|
"grad_norm": 3.9314935207366943,
|
|
"learning_rate": 9.944448938576755e-06,
|
|
"loss": 1.1859,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 0.47605030773347606,
|
|
"grad_norm": 3.7619473934173584,
|
|
"learning_rate": 9.944317311979587e-06,
|
|
"loss": 1.273,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 0.4763179020604763,
|
|
"grad_norm": 3.6345512866973877,
|
|
"learning_rate": 9.944185530497419e-06,
|
|
"loss": 1.1356,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.4765854963874766,
|
|
"grad_norm": 4.064966201782227,
|
|
"learning_rate": 9.944053594134374e-06,
|
|
"loss": 1.3057,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 0.47685309071447685,
|
|
"grad_norm": 3.3477392196655273,
|
|
"learning_rate": 9.943921502894593e-06,
|
|
"loss": 1.1066,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 0.47712068504147714,
|
|
"grad_norm": 3.5054497718811035,
|
|
"learning_rate": 9.943789256782208e-06,
|
|
"loss": 1.1497,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 0.4773882793684774,
|
|
"grad_norm": 3.596972703933716,
|
|
"learning_rate": 9.943656855801364e-06,
|
|
"loss": 1.195,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 0.47765587369547763,
|
|
"grad_norm": 3.870314598083496,
|
|
"learning_rate": 9.943524299956206e-06,
|
|
"loss": 1.1513,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 0.4779234680224779,
|
|
"grad_norm": 3.6923234462738037,
|
|
"learning_rate": 9.94339158925089e-06,
|
|
"loss": 1.2705,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 0.47819106234947817,
|
|
"grad_norm": 3.6829121112823486,
|
|
"learning_rate": 9.94325872368957e-06,
|
|
"loss": 1.1401,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 0.47845865667647847,
|
|
"grad_norm": 3.6094305515289307,
|
|
"learning_rate": 9.943125703276411e-06,
|
|
"loss": 1.2009,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 0.4787262510034787,
|
|
"grad_norm": 3.806605339050293,
|
|
"learning_rate": 9.94299252801558e-06,
|
|
"loss": 1.1255,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 0.478993845330479,
|
|
"grad_norm": 3.4413986206054688,
|
|
"learning_rate": 9.942859197911246e-06,
|
|
"loss": 1.099,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.47926143965747925,
|
|
"grad_norm": 3.7464005947113037,
|
|
"learning_rate": 9.942725712967587e-06,
|
|
"loss": 1.1829,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 0.47952903398447955,
|
|
"grad_norm": 4.125034809112549,
|
|
"learning_rate": 9.942592073188783e-06,
|
|
"loss": 1.3325,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 0.4797966283114798,
|
|
"grad_norm": 3.741257429122925,
|
|
"learning_rate": 9.942458278579026e-06,
|
|
"loss": 1.1842,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 0.4800642226384801,
|
|
"grad_norm": 3.944084405899048,
|
|
"learning_rate": 9.9423243291425e-06,
|
|
"loss": 1.3479,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 0.48033181696548033,
|
|
"grad_norm": 3.8254520893096924,
|
|
"learning_rate": 9.942190224883406e-06,
|
|
"loss": 1.204,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 0.48059941129248057,
|
|
"grad_norm": 3.4253695011138916,
|
|
"learning_rate": 9.942055965805943e-06,
|
|
"loss": 1.0251,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 0.48086700561948087,
|
|
"grad_norm": 3.6683967113494873,
|
|
"learning_rate": 9.941921551914318e-06,
|
|
"loss": 1.1936,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 0.4811345999464811,
|
|
"grad_norm": 3.3697001934051514,
|
|
"learning_rate": 9.94178698321274e-06,
|
|
"loss": 1.0839,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 0.4814021942734814,
|
|
"grad_norm": 3.724254846572876,
|
|
"learning_rate": 9.941652259705425e-06,
|
|
"loss": 1.2582,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 0.48166978860048165,
|
|
"grad_norm": 3.8191325664520264,
|
|
"learning_rate": 9.941517381396594e-06,
|
|
"loss": 1.1972,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.48193738292748195,
|
|
"grad_norm": 3.812429904937744,
|
|
"learning_rate": 9.941382348290471e-06,
|
|
"loss": 1.1348,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 0.4822049772544822,
|
|
"grad_norm": 3.5466363430023193,
|
|
"learning_rate": 9.941247160391288e-06,
|
|
"loss": 1.2157,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 0.4824725715814825,
|
|
"grad_norm": 3.9619693756103516,
|
|
"learning_rate": 9.94111181770328e-06,
|
|
"loss": 1.2637,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 0.48274016590848273,
|
|
"grad_norm": 3.724550485610962,
|
|
"learning_rate": 9.940976320230682e-06,
|
|
"loss": 1.1937,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 0.48300776023548303,
|
|
"grad_norm": 3.91395902633667,
|
|
"learning_rate": 9.940840667977745e-06,
|
|
"loss": 1.1857,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 0.4832753545624833,
|
|
"grad_norm": 3.2114241123199463,
|
|
"learning_rate": 9.940704860948713e-06,
|
|
"loss": 1.0522,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 0.48354294888948357,
|
|
"grad_norm": 3.380030870437622,
|
|
"learning_rate": 9.940568899147844e-06,
|
|
"loss": 1.1107,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 0.4838105432164838,
|
|
"grad_norm": 3.728666067123413,
|
|
"learning_rate": 9.940432782579395e-06,
|
|
"loss": 1.2433,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 0.48407813754348405,
|
|
"grad_norm": 3.6429684162139893,
|
|
"learning_rate": 9.940296511247631e-06,
|
|
"loss": 1.0235,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 0.48434573187048435,
|
|
"grad_norm": 3.6570608615875244,
|
|
"learning_rate": 9.94016008515682e-06,
|
|
"loss": 1.1839,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.4846133261974846,
|
|
"grad_norm": 3.6205060482025146,
|
|
"learning_rate": 9.940023504311237e-06,
|
|
"loss": 1.0398,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 0.4848809205244849,
|
|
"grad_norm": 3.8173046112060547,
|
|
"learning_rate": 9.93988676871516e-06,
|
|
"loss": 1.1421,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 0.48514851485148514,
|
|
"grad_norm": 3.792316198348999,
|
|
"learning_rate": 9.939749878372873e-06,
|
|
"loss": 1.2172,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 0.48541610917848543,
|
|
"grad_norm": 3.5528106689453125,
|
|
"learning_rate": 9.939612833288662e-06,
|
|
"loss": 1.0964,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 0.4856837035054857,
|
|
"grad_norm": 3.7796194553375244,
|
|
"learning_rate": 9.939475633466822e-06,
|
|
"loss": 1.1715,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 0.485951297832486,
|
|
"grad_norm": 4.104870319366455,
|
|
"learning_rate": 9.93933827891165e-06,
|
|
"loss": 1.321,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 0.4862188921594862,
|
|
"grad_norm": 3.6252858638763428,
|
|
"learning_rate": 9.93920076962745e-06,
|
|
"loss": 1.1834,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 0.4864864864864865,
|
|
"grad_norm": 3.8419394493103027,
|
|
"learning_rate": 9.939063105618525e-06,
|
|
"loss": 1.0172,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 0.48675408081348676,
|
|
"grad_norm": 3.7632715702056885,
|
|
"learning_rate": 9.938925286889194e-06,
|
|
"loss": 1.1501,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 0.487021675140487,
|
|
"grad_norm": 3.928379535675049,
|
|
"learning_rate": 9.938787313443771e-06,
|
|
"loss": 1.2283,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.4872892694674873,
|
|
"grad_norm": 3.341074228286743,
|
|
"learning_rate": 9.93864918528658e-06,
|
|
"loss": 1.038,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 0.48755686379448754,
|
|
"grad_norm": 3.8273613452911377,
|
|
"learning_rate": 9.938510902421945e-06,
|
|
"loss": 1.2315,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 0.48782445812148784,
|
|
"grad_norm": 3.6578738689422607,
|
|
"learning_rate": 9.938372464854198e-06,
|
|
"loss": 1.1331,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 0.4880920524484881,
|
|
"grad_norm": 3.7590830326080322,
|
|
"learning_rate": 9.93823387258768e-06,
|
|
"loss": 1.0829,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 0.4883596467754884,
|
|
"grad_norm": 3.6043503284454346,
|
|
"learning_rate": 9.938095125626726e-06,
|
|
"loss": 1.0529,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.4886272411024886,
|
|
"grad_norm": 3.854071617126465,
|
|
"learning_rate": 9.93795622397569e-06,
|
|
"loss": 1.2383,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 0.4888948354294889,
|
|
"grad_norm": 3.758488416671753,
|
|
"learning_rate": 9.937817167638914e-06,
|
|
"loss": 1.0957,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 0.48916242975648916,
|
|
"grad_norm": 3.695533514022827,
|
|
"learning_rate": 9.937677956620764e-06,
|
|
"loss": 1.3151,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 0.48943002408348946,
|
|
"grad_norm": 3.5443248748779297,
|
|
"learning_rate": 9.937538590925593e-06,
|
|
"loss": 1.0494,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 0.4896976184104897,
|
|
"grad_norm": 3.6536788940429688,
|
|
"learning_rate": 9.937399070557771e-06,
|
|
"loss": 1.218,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.48996521273748994,
|
|
"grad_norm": 3.929737091064453,
|
|
"learning_rate": 9.937259395521667e-06,
|
|
"loss": 1.1923,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 0.49023280706449024,
|
|
"grad_norm": 3.3342623710632324,
|
|
"learning_rate": 9.937119565821658e-06,
|
|
"loss": 1.1186,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 0.4905004013914905,
|
|
"grad_norm": 3.6802546977996826,
|
|
"learning_rate": 9.936979581462122e-06,
|
|
"loss": 1.2171,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 0.4907679957184908,
|
|
"grad_norm": 3.4526920318603516,
|
|
"learning_rate": 9.936839442447446e-06,
|
|
"loss": 1.0922,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 0.491035590045491,
|
|
"grad_norm": 3.7504050731658936,
|
|
"learning_rate": 9.936699148782018e-06,
|
|
"loss": 1.0743,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 0.4913031843724913,
|
|
"grad_norm": 3.873074531555176,
|
|
"learning_rate": 9.936558700470234e-06,
|
|
"loss": 1.2213,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 0.49157077869949156,
|
|
"grad_norm": 3.6571013927459717,
|
|
"learning_rate": 9.936418097516495e-06,
|
|
"loss": 1.123,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 0.49183837302649186,
|
|
"grad_norm": 3.5951497554779053,
|
|
"learning_rate": 9.936277339925205e-06,
|
|
"loss": 1.1968,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 0.4921059673534921,
|
|
"grad_norm": 3.9791698455810547,
|
|
"learning_rate": 9.93613642770077e-06,
|
|
"loss": 1.1248,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 0.4923735616804924,
|
|
"grad_norm": 4.2198100090026855,
|
|
"learning_rate": 9.935995360847608e-06,
|
|
"loss": 1.1946,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.49264115600749264,
|
|
"grad_norm": 3.915623188018799,
|
|
"learning_rate": 9.935854139370139e-06,
|
|
"loss": 1.1836,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 0.4929087503344929,
|
|
"grad_norm": 3.8059470653533936,
|
|
"learning_rate": 9.93571276327278e-06,
|
|
"loss": 1.2146,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 0.4931763446614932,
|
|
"grad_norm": 4.118159770965576,
|
|
"learning_rate": 9.93557123255997e-06,
|
|
"loss": 1.1451,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 0.4934439389884934,
|
|
"grad_norm": 4.63586950302124,
|
|
"learning_rate": 9.935429547236131e-06,
|
|
"loss": 1.4108,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 0.4937115333154937,
|
|
"grad_norm": 3.541332244873047,
|
|
"learning_rate": 9.935287707305712e-06,
|
|
"loss": 1.0874,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 0.49397912764249396,
|
|
"grad_norm": 3.4757399559020996,
|
|
"learning_rate": 9.93514571277315e-06,
|
|
"loss": 1.1712,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 0.49424672196949426,
|
|
"grad_norm": 3.835604190826416,
|
|
"learning_rate": 9.935003563642895e-06,
|
|
"loss": 1.1442,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 0.4945143162964945,
|
|
"grad_norm": 3.701040029525757,
|
|
"learning_rate": 9.934861259919399e-06,
|
|
"loss": 1.0242,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 0.4947819106234948,
|
|
"grad_norm": 3.7247939109802246,
|
|
"learning_rate": 9.934718801607122e-06,
|
|
"loss": 1.2422,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 0.49504950495049505,
|
|
"grad_norm": 4.011390209197998,
|
|
"learning_rate": 9.934576188710524e-06,
|
|
"loss": 1.2711,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.49531709927749534,
|
|
"grad_norm": 3.3684377670288086,
|
|
"learning_rate": 9.934433421234073e-06,
|
|
"loss": 1.1051,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 0.4955846936044956,
|
|
"grad_norm": 3.550625801086426,
|
|
"learning_rate": 9.934290499182244e-06,
|
|
"loss": 1.2236,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 0.4958522879314958,
|
|
"grad_norm": 4.006191253662109,
|
|
"learning_rate": 9.93414742255951e-06,
|
|
"loss": 1.2605,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 0.4961198822584961,
|
|
"grad_norm": 3.7999353408813477,
|
|
"learning_rate": 9.934004191370356e-06,
|
|
"loss": 1.2019,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 0.49638747658549637,
|
|
"grad_norm": 3.6491141319274902,
|
|
"learning_rate": 9.933860805619269e-06,
|
|
"loss": 1.1939,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 0.49665507091249667,
|
|
"grad_norm": 3.60182785987854,
|
|
"learning_rate": 9.933717265310739e-06,
|
|
"loss": 1.185,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 0.4969226652394969,
|
|
"grad_norm": 3.517396926879883,
|
|
"learning_rate": 9.933573570449262e-06,
|
|
"loss": 1.0801,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 0.4971902595664972,
|
|
"grad_norm": 3.847062349319458,
|
|
"learning_rate": 9.93342972103934e-06,
|
|
"loss": 1.1699,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 0.49745785389349745,
|
|
"grad_norm": 3.5466854572296143,
|
|
"learning_rate": 9.933285717085482e-06,
|
|
"loss": 1.088,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 0.49772544822049775,
|
|
"grad_norm": 4.013504981994629,
|
|
"learning_rate": 9.933141558592196e-06,
|
|
"loss": 1.2217,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.497993042547498,
|
|
"grad_norm": 4.0954155921936035,
|
|
"learning_rate": 9.932997245563997e-06,
|
|
"loss": 1.231,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 0.4982606368744983,
|
|
"grad_norm": 3.723498821258545,
|
|
"learning_rate": 9.93285277800541e-06,
|
|
"loss": 1.1645,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 0.49852823120149853,
|
|
"grad_norm": 3.436872720718384,
|
|
"learning_rate": 9.932708155920957e-06,
|
|
"loss": 1.1673,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 0.49879582552849877,
|
|
"grad_norm": 3.8395087718963623,
|
|
"learning_rate": 9.932563379315168e-06,
|
|
"loss": 1.2485,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 0.49906341985549907,
|
|
"grad_norm": 3.937257766723633,
|
|
"learning_rate": 9.93241844819258e-06,
|
|
"loss": 1.2447,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 0.4993310141824993,
|
|
"grad_norm": 3.5979080200195312,
|
|
"learning_rate": 9.932273362557734e-06,
|
|
"loss": 1.153,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 0.4995986085094996,
|
|
"grad_norm": 3.8511085510253906,
|
|
"learning_rate": 9.932128122415173e-06,
|
|
"loss": 1.1053,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 0.49986620283649985,
|
|
"grad_norm": 4.010068893432617,
|
|
"learning_rate": 9.931982727769448e-06,
|
|
"loss": 1.155,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 0.5001337971635001,
|
|
"grad_norm": 3.749917507171631,
|
|
"learning_rate": 9.931837178625111e-06,
|
|
"loss": 1.1328,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 0.5004013914905004,
|
|
"grad_norm": 3.668951988220215,
|
|
"learning_rate": 9.931691474986726e-06,
|
|
"loss": 1.0613,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.5006689858175006,
|
|
"grad_norm": 3.563898801803589,
|
|
"learning_rate": 9.931545616858853e-06,
|
|
"loss": 1.1231,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 0.5009365801445009,
|
|
"grad_norm": 3.758409023284912,
|
|
"learning_rate": 9.931399604246064e-06,
|
|
"loss": 1.2123,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 0.5012041744715012,
|
|
"grad_norm": 3.4294962882995605,
|
|
"learning_rate": 9.93125343715293e-06,
|
|
"loss": 1.0552,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 0.5014717687985015,
|
|
"grad_norm": 3.464952230453491,
|
|
"learning_rate": 9.931107115584034e-06,
|
|
"loss": 1.1708,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 0.5017393631255017,
|
|
"grad_norm": 3.9118897914886475,
|
|
"learning_rate": 9.930960639543956e-06,
|
|
"loss": 1.1202,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.502006957452502,
|
|
"grad_norm": 3.2876811027526855,
|
|
"learning_rate": 9.930814009037286e-06,
|
|
"loss": 1.0269,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 0.5022745517795023,
|
|
"grad_norm": 3.571906566619873,
|
|
"learning_rate": 9.930667224068618e-06,
|
|
"loss": 1.1515,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 0.5025421461065025,
|
|
"grad_norm": 3.775341510772705,
|
|
"learning_rate": 9.930520284642548e-06,
|
|
"loss": 1.1708,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 0.5028097404335028,
|
|
"grad_norm": 3.8571135997772217,
|
|
"learning_rate": 9.93037319076368e-06,
|
|
"loss": 1.1639,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 0.5030773347605031,
|
|
"grad_norm": 3.8149497509002686,
|
|
"learning_rate": 9.930225942436623e-06,
|
|
"loss": 1.2267,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.5033449290875034,
|
|
"grad_norm": 3.51364803314209,
|
|
"learning_rate": 9.930078539665988e-06,
|
|
"loss": 1.1698,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 0.5036125234145036,
|
|
"grad_norm": 3.4830048084259033,
|
|
"learning_rate": 9.929930982456395e-06,
|
|
"loss": 1.1729,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 0.5038801177415039,
|
|
"grad_norm": 3.5782647132873535,
|
|
"learning_rate": 9.929783270812464e-06,
|
|
"loss": 1.1596,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 0.5041477120685042,
|
|
"grad_norm": 3.836897373199463,
|
|
"learning_rate": 9.929635404738822e-06,
|
|
"loss": 1.2473,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 0.5044153063955045,
|
|
"grad_norm": 3.5359864234924316,
|
|
"learning_rate": 9.929487384240103e-06,
|
|
"loss": 1.052,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 0.5046829007225047,
|
|
"grad_norm": 3.7053215503692627,
|
|
"learning_rate": 9.929339209320944e-06,
|
|
"loss": 1.1623,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 0.504950495049505,
|
|
"grad_norm": 3.838304042816162,
|
|
"learning_rate": 9.929190879985982e-06,
|
|
"loss": 1.2307,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 0.5052180893765053,
|
|
"grad_norm": 3.681903123855591,
|
|
"learning_rate": 9.929042396239869e-06,
|
|
"loss": 1.3262,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 0.5054856837035054,
|
|
"grad_norm": 3.6203062534332275,
|
|
"learning_rate": 9.928893758087254e-06,
|
|
"loss": 1.1616,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 0.5057532780305057,
|
|
"grad_norm": 3.843017339706421,
|
|
"learning_rate": 9.928744965532795e-06,
|
|
"loss": 1.184,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.506020872357506,
|
|
"grad_norm": 4.003540992736816,
|
|
"learning_rate": 9.928596018581151e-06,
|
|
"loss": 1.0719,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 0.5062884666845063,
|
|
"grad_norm": 3.225344657897949,
|
|
"learning_rate": 9.928446917236988e-06,
|
|
"loss": 0.9902,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 0.5065560610115065,
|
|
"grad_norm": 4.046036720275879,
|
|
"learning_rate": 9.928297661504978e-06,
|
|
"loss": 1.1583,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 0.5068236553385068,
|
|
"grad_norm": 3.522110939025879,
|
|
"learning_rate": 9.928148251389796e-06,
|
|
"loss": 1.0941,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 0.5070912496655071,
|
|
"grad_norm": 3.5445072650909424,
|
|
"learning_rate": 9.92799868689612e-06,
|
|
"loss": 1.1043,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 0.5073588439925074,
|
|
"grad_norm": 3.7460379600524902,
|
|
"learning_rate": 9.927848968028642e-06,
|
|
"loss": 1.1259,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 0.5076264383195076,
|
|
"grad_norm": 3.518141508102417,
|
|
"learning_rate": 9.927699094792045e-06,
|
|
"loss": 1.0938,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 0.5078940326465079,
|
|
"grad_norm": 4.169661521911621,
|
|
"learning_rate": 9.927549067191026e-06,
|
|
"loss": 1.3043,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 0.5081616269735082,
|
|
"grad_norm": 3.7639896869659424,
|
|
"learning_rate": 9.927398885230286e-06,
|
|
"loss": 1.254,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 0.5084292213005084,
|
|
"grad_norm": 3.5845093727111816,
|
|
"learning_rate": 9.927248548914528e-06,
|
|
"loss": 1.0115,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.5086968156275087,
|
|
"grad_norm": 3.618220806121826,
|
|
"learning_rate": 9.927098058248463e-06,
|
|
"loss": 1.1713,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 0.508964409954509,
|
|
"grad_norm": 3.6645729541778564,
|
|
"learning_rate": 9.926947413236806e-06,
|
|
"loss": 1.1468,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 0.5092320042815093,
|
|
"grad_norm": 3.4273576736450195,
|
|
"learning_rate": 9.926796613884271e-06,
|
|
"loss": 1.0282,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 0.5094995986085095,
|
|
"grad_norm": 4.018494606018066,
|
|
"learning_rate": 9.926645660195588e-06,
|
|
"loss": 1.2789,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 0.5097671929355098,
|
|
"grad_norm": 3.431507110595703,
|
|
"learning_rate": 9.926494552175484e-06,
|
|
"loss": 1.1095,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 0.5100347872625101,
|
|
"grad_norm": 3.723026752471924,
|
|
"learning_rate": 9.926343289828689e-06,
|
|
"loss": 1.1774,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 0.5103023815895104,
|
|
"grad_norm": 4.003593921661377,
|
|
"learning_rate": 9.926191873159945e-06,
|
|
"loss": 1.2947,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 0.5105699759165105,
|
|
"grad_norm": 3.923344373703003,
|
|
"learning_rate": 9.926040302173995e-06,
|
|
"loss": 1.3416,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 0.5108375702435108,
|
|
"grad_norm": 4.057835578918457,
|
|
"learning_rate": 9.925888576875588e-06,
|
|
"loss": 1.1635,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 0.5111051645705111,
|
|
"grad_norm": 3.939828395843506,
|
|
"learning_rate": 9.925736697269474e-06,
|
|
"loss": 1.3077,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.5113727588975113,
|
|
"grad_norm": 4.334293365478516,
|
|
"learning_rate": 9.925584663360412e-06,
|
|
"loss": 1.2711,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 0.5116403532245116,
|
|
"grad_norm": 3.6700150966644287,
|
|
"learning_rate": 9.925432475153166e-06,
|
|
"loss": 1.2447,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 0.5119079475515119,
|
|
"grad_norm": 3.7518320083618164,
|
|
"learning_rate": 9.925280132652503e-06,
|
|
"loss": 1.1256,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 0.5121755418785122,
|
|
"grad_norm": 3.581819534301758,
|
|
"learning_rate": 9.925127635863195e-06,
|
|
"loss": 1.0175,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 0.5124431362055124,
|
|
"grad_norm": 3.7574949264526367,
|
|
"learning_rate": 9.924974984790016e-06,
|
|
"loss": 1.2528,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 0.5127107305325127,
|
|
"grad_norm": 3.8194570541381836,
|
|
"learning_rate": 9.924822179437752e-06,
|
|
"loss": 1.2685,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 0.512978324859513,
|
|
"grad_norm": 3.690627336502075,
|
|
"learning_rate": 9.924669219811188e-06,
|
|
"loss": 1.2214,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 0.5132459191865133,
|
|
"grad_norm": 3.204648494720459,
|
|
"learning_rate": 9.924516105915116e-06,
|
|
"loss": 0.9857,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 0.5135135135135135,
|
|
"grad_norm": 3.702674627304077,
|
|
"learning_rate": 9.924362837754334e-06,
|
|
"loss": 1.0301,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 0.5137811078405138,
|
|
"grad_norm": 3.366229295730591,
|
|
"learning_rate": 9.92420941533364e-06,
|
|
"loss": 1.0714,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.5140487021675141,
|
|
"grad_norm": 3.501063823699951,
|
|
"learning_rate": 9.92405583865784e-06,
|
|
"loss": 1.1025,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 0.5143162964945143,
|
|
"grad_norm": 3.3142244815826416,
|
|
"learning_rate": 9.92390210773175e-06,
|
|
"loss": 1.0532,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 0.5145838908215146,
|
|
"grad_norm": 3.998425006866455,
|
|
"learning_rate": 9.923748222560181e-06,
|
|
"loss": 1.1796,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 0.5148514851485149,
|
|
"grad_norm": 3.6948330402374268,
|
|
"learning_rate": 9.923594183147954e-06,
|
|
"loss": 1.0869,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 0.5151190794755152,
|
|
"grad_norm": 3.7560575008392334,
|
|
"learning_rate": 9.923439989499897e-06,
|
|
"loss": 1.1566,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.5153866738025153,
|
|
"grad_norm": 3.8775906562805176,
|
|
"learning_rate": 9.923285641620838e-06,
|
|
"loss": 1.1781,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 0.5156542681295156,
|
|
"grad_norm": 3.8323404788970947,
|
|
"learning_rate": 9.923131139515613e-06,
|
|
"loss": 1.1228,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 0.515921862456516,
|
|
"grad_norm": 3.4766688346862793,
|
|
"learning_rate": 9.922976483189061e-06,
|
|
"loss": 1.0528,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 0.5161894567835162,
|
|
"grad_norm": 3.5990777015686035,
|
|
"learning_rate": 9.922821672646028e-06,
|
|
"loss": 1.0601,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 0.5164570511105164,
|
|
"grad_norm": 3.702481269836426,
|
|
"learning_rate": 9.922666707891361e-06,
|
|
"loss": 1.1455,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.5167246454375167,
|
|
"grad_norm": 3.4668917655944824,
|
|
"learning_rate": 9.92251158892992e-06,
|
|
"loss": 1.0795,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 0.516992239764517,
|
|
"grad_norm": 3.769757032394409,
|
|
"learning_rate": 9.922356315766557e-06,
|
|
"loss": 1.1749,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 0.5172598340915172,
|
|
"grad_norm": 3.681917428970337,
|
|
"learning_rate": 9.922200888406142e-06,
|
|
"loss": 1.1752,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 0.5175274284185175,
|
|
"grad_norm": 3.566633701324463,
|
|
"learning_rate": 9.922045306853542e-06,
|
|
"loss": 1.0806,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 0.5177950227455178,
|
|
"grad_norm": 3.5221433639526367,
|
|
"learning_rate": 9.921889571113629e-06,
|
|
"loss": 1.1242,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 0.5180626170725181,
|
|
"grad_norm": 3.574681043624878,
|
|
"learning_rate": 9.921733681191283e-06,
|
|
"loss": 1.117,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 0.5183302113995183,
|
|
"grad_norm": 4.224633693695068,
|
|
"learning_rate": 9.921577637091388e-06,
|
|
"loss": 1.2129,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 0.5185978057265186,
|
|
"grad_norm": 3.799368381500244,
|
|
"learning_rate": 9.92142143881883e-06,
|
|
"loss": 1.0609,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 0.5188654000535189,
|
|
"grad_norm": 3.3646318912506104,
|
|
"learning_rate": 9.921265086378504e-06,
|
|
"loss": 1.1139,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 0.5191329943805192,
|
|
"grad_norm": 4.179952621459961,
|
|
"learning_rate": 9.921108579775307e-06,
|
|
"loss": 1.2536,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.5194005887075194,
|
|
"grad_norm": 3.8131916522979736,
|
|
"learning_rate": 9.920951919014144e-06,
|
|
"loss": 1.1239,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 0.5196681830345197,
|
|
"grad_norm": 3.50144624710083,
|
|
"learning_rate": 9.920795104099919e-06,
|
|
"loss": 1.0744,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 0.51993577736152,
|
|
"grad_norm": 3.8942971229553223,
|
|
"learning_rate": 9.920638135037545e-06,
|
|
"loss": 1.1104,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 0.5202033716885202,
|
|
"grad_norm": 3.6234724521636963,
|
|
"learning_rate": 9.920481011831941e-06,
|
|
"loss": 1.1766,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 0.5204709660155205,
|
|
"grad_norm": 4.271646022796631,
|
|
"learning_rate": 9.92032373448803e-06,
|
|
"loss": 1.3459,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 0.5207385603425208,
|
|
"grad_norm": 3.910745143890381,
|
|
"learning_rate": 9.920166303010737e-06,
|
|
"loss": 1.1466,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 0.521006154669521,
|
|
"grad_norm": 3.472041606903076,
|
|
"learning_rate": 9.92000871740499e-06,
|
|
"loss": 1.2786,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 0.5212737489965212,
|
|
"grad_norm": 3.5486903190612793,
|
|
"learning_rate": 9.919850977675732e-06,
|
|
"loss": 1.1269,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 0.5215413433235215,
|
|
"grad_norm": 3.486093044281006,
|
|
"learning_rate": 9.919693083827902e-06,
|
|
"loss": 1.0447,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 0.5218089376505218,
|
|
"grad_norm": 3.836215019226074,
|
|
"learning_rate": 9.919535035866444e-06,
|
|
"loss": 1.179,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.5220765319775221,
|
|
"grad_norm": 3.5467727184295654,
|
|
"learning_rate": 9.919376833796312e-06,
|
|
"loss": 1.0668,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 0.5223441263045223,
|
|
"grad_norm": 3.5442044734954834,
|
|
"learning_rate": 9.91921847762246e-06,
|
|
"loss": 1.0542,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 0.5226117206315226,
|
|
"grad_norm": 3.7540347576141357,
|
|
"learning_rate": 9.919059967349848e-06,
|
|
"loss": 1.0402,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 0.5228793149585229,
|
|
"grad_norm": 4.026261329650879,
|
|
"learning_rate": 9.918901302983445e-06,
|
|
"loss": 1.2437,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 0.5231469092855231,
|
|
"grad_norm": 3.6572134494781494,
|
|
"learning_rate": 9.918742484528218e-06,
|
|
"loss": 1.1397,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 0.5234145036125234,
|
|
"grad_norm": 3.5838277339935303,
|
|
"learning_rate": 9.918583511989142e-06,
|
|
"loss": 1.0844,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 0.5236820979395237,
|
|
"grad_norm": 3.8754079341888428,
|
|
"learning_rate": 9.918424385371199e-06,
|
|
"loss": 1.2264,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 0.523949692266524,
|
|
"grad_norm": 3.196148633956909,
|
|
"learning_rate": 9.918265104679371e-06,
|
|
"loss": 1.0584,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 0.5242172865935242,
|
|
"grad_norm": 4.228190898895264,
|
|
"learning_rate": 9.918105669918652e-06,
|
|
"loss": 1.2559,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 0.5244848809205245,
|
|
"grad_norm": 3.834376573562622,
|
|
"learning_rate": 9.917946081094033e-06,
|
|
"loss": 1.0941,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.5247524752475248,
|
|
"grad_norm": 3.5881540775299072,
|
|
"learning_rate": 9.917786338210513e-06,
|
|
"loss": 1.1777,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 0.5250200695745251,
|
|
"grad_norm": 3.671957492828369,
|
|
"learning_rate": 9.917626441273099e-06,
|
|
"loss": 1.2193,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 0.5252876639015253,
|
|
"grad_norm": 3.508430242538452,
|
|
"learning_rate": 9.917466390286797e-06,
|
|
"loss": 1.1494,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 0.5255552582285256,
|
|
"grad_norm": 4.060336112976074,
|
|
"learning_rate": 9.917306185256621e-06,
|
|
"loss": 1.2024,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 0.5258228525555259,
|
|
"grad_norm": 3.5298852920532227,
|
|
"learning_rate": 9.91714582618759e-06,
|
|
"loss": 1.1166,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 0.526090446882526,
|
|
"grad_norm": 3.5156521797180176,
|
|
"learning_rate": 9.91698531308473e-06,
|
|
"loss": 1.1366,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 0.5263580412095263,
|
|
"grad_norm": 3.63799786567688,
|
|
"learning_rate": 9.916824645953065e-06,
|
|
"loss": 1.2219,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 0.5266256355365266,
|
|
"grad_norm": 3.7056069374084473,
|
|
"learning_rate": 9.916663824797633e-06,
|
|
"loss": 1.16,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 0.5268932298635269,
|
|
"grad_norm": 3.2435388565063477,
|
|
"learning_rate": 9.916502849623467e-06,
|
|
"loss": 1.0117,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 0.5271608241905271,
|
|
"grad_norm": 3.5529932975769043,
|
|
"learning_rate": 9.916341720435609e-06,
|
|
"loss": 1.0804,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.5274284185175274,
|
|
"grad_norm": 3.3724541664123535,
|
|
"learning_rate": 9.91618043723911e-06,
|
|
"loss": 1.0444,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 0.5276960128445277,
|
|
"grad_norm": 3.614671230316162,
|
|
"learning_rate": 9.916019000039024e-06,
|
|
"loss": 1.0751,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 0.527963607171528,
|
|
"grad_norm": 3.8645894527435303,
|
|
"learning_rate": 9.915857408840405e-06,
|
|
"loss": 1.25,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 0.5282312014985282,
|
|
"grad_norm": 3.3444855213165283,
|
|
"learning_rate": 9.915695663648315e-06,
|
|
"loss": 1.0344,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 0.5284987958255285,
|
|
"grad_norm": 3.8077821731567383,
|
|
"learning_rate": 9.91553376446782e-06,
|
|
"loss": 1.1244,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.5287663901525288,
|
|
"grad_norm": 3.517341375350952,
|
|
"learning_rate": 9.915371711303994e-06,
|
|
"loss": 1.1202,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 0.529033984479529,
|
|
"grad_norm": 4.2117767333984375,
|
|
"learning_rate": 9.915209504161914e-06,
|
|
"loss": 1.2998,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 0.5293015788065293,
|
|
"grad_norm": 3.684497117996216,
|
|
"learning_rate": 9.915047143046656e-06,
|
|
"loss": 1.1494,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 0.5295691731335296,
|
|
"grad_norm": 4.2827630043029785,
|
|
"learning_rate": 9.914884627963312e-06,
|
|
"loss": 1.3014,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 0.5298367674605299,
|
|
"grad_norm": 3.7791380882263184,
|
|
"learning_rate": 9.914721958916971e-06,
|
|
"loss": 1.2749,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.5301043617875301,
|
|
"grad_norm": 3.7178707122802734,
|
|
"learning_rate": 9.91455913591273e-06,
|
|
"loss": 1.0304,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 0.5303719561145304,
|
|
"grad_norm": 3.6490297317504883,
|
|
"learning_rate": 9.914396158955685e-06,
|
|
"loss": 1.0867,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 0.5306395504415307,
|
|
"grad_norm": 4.041894912719727,
|
|
"learning_rate": 9.914233028050945e-06,
|
|
"loss": 1.1857,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 0.530907144768531,
|
|
"grad_norm": 3.4716479778289795,
|
|
"learning_rate": 9.91406974320362e-06,
|
|
"loss": 1.0954,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 0.5311747390955311,
|
|
"grad_norm": 3.4045979976654053,
|
|
"learning_rate": 9.913906304418825e-06,
|
|
"loss": 1.2435,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 0.5314423334225314,
|
|
"grad_norm": 3.823096752166748,
|
|
"learning_rate": 9.91374271170168e-06,
|
|
"loss": 1.1779,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 0.5317099277495317,
|
|
"grad_norm": 3.7356925010681152,
|
|
"learning_rate": 9.91357896505731e-06,
|
|
"loss": 1.2326,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 0.5319775220765319,
|
|
"grad_norm": 3.4389915466308594,
|
|
"learning_rate": 9.91341506449084e-06,
|
|
"loss": 1.0635,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 0.5322451164035322,
|
|
"grad_norm": 3.3921926021575928,
|
|
"learning_rate": 9.913251010007413e-06,
|
|
"loss": 1.0636,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 0.5325127107305325,
|
|
"grad_norm": 3.5323266983032227,
|
|
"learning_rate": 9.913086801612159e-06,
|
|
"loss": 1.0485,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.5327803050575328,
|
|
"grad_norm": 3.2040328979492188,
|
|
"learning_rate": 9.91292243931023e-06,
|
|
"loss": 1.0664,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 0.533047899384533,
|
|
"grad_norm": 3.604896068572998,
|
|
"learning_rate": 9.912757923106769e-06,
|
|
"loss": 1.1632,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 0.5333154937115333,
|
|
"grad_norm": 3.466099262237549,
|
|
"learning_rate": 9.91259325300693e-06,
|
|
"loss": 1.118,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 0.5335830880385336,
|
|
"grad_norm": 3.788372039794922,
|
|
"learning_rate": 9.912428429015874e-06,
|
|
"loss": 1.2205,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 0.5338506823655339,
|
|
"grad_norm": 3.699796199798584,
|
|
"learning_rate": 9.912263451138764e-06,
|
|
"loss": 1.0773,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 0.5341182766925341,
|
|
"grad_norm": 3.928880453109741,
|
|
"learning_rate": 9.912098319380767e-06,
|
|
"loss": 1.24,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 0.5343858710195344,
|
|
"grad_norm": 3.5852925777435303,
|
|
"learning_rate": 9.911933033747056e-06,
|
|
"loss": 1.0727,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 0.5346534653465347,
|
|
"grad_norm": 4.054876327514648,
|
|
"learning_rate": 9.91176759424281e-06,
|
|
"loss": 1.1812,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 0.5349210596735349,
|
|
"grad_norm": 3.9897444248199463,
|
|
"learning_rate": 9.91160200087321e-06,
|
|
"loss": 1.2996,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 0.5351886540005352,
|
|
"grad_norm": 3.893026113510132,
|
|
"learning_rate": 9.911436253643445e-06,
|
|
"loss": 1.1287,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5351886540005352,
|
|
"eval_loss": 1.1917240619659424,
|
|
"eval_runtime": 11.6396,
|
|
"eval_samples_per_second": 34.365,
|
|
"eval_steps_per_second": 4.296,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5354562483275355,
|
|
"grad_norm": 4.189493656158447,
|
|
"learning_rate": 9.911270352558703e-06,
|
|
"loss": 1.2612,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 0.5357238426545358,
|
|
"grad_norm": 3.7188894748687744,
|
|
"learning_rate": 9.911104297624186e-06,
|
|
"loss": 1.1238,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 0.535991436981536,
|
|
"grad_norm": 3.495906352996826,
|
|
"learning_rate": 9.910938088845095e-06,
|
|
"loss": 1.0895,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 0.5362590313085362,
|
|
"grad_norm": 3.8715004920959473,
|
|
"learning_rate": 9.910771726226634e-06,
|
|
"loss": 1.1578,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 0.5365266256355365,
|
|
"grad_norm": 5.872176170349121,
|
|
"learning_rate": 9.910605209774016e-06,
|
|
"loss": 1.2899,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 0.5367942199625368,
|
|
"grad_norm": 3.8072023391723633,
|
|
"learning_rate": 9.910438539492457e-06,
|
|
"loss": 1.0038,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 0.537061814289537,
|
|
"grad_norm": 3.388889789581299,
|
|
"learning_rate": 9.91027171538718e-06,
|
|
"loss": 1.0829,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 0.5373294086165373,
|
|
"grad_norm": 3.782205104827881,
|
|
"learning_rate": 9.910104737463406e-06,
|
|
"loss": 1.1912,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 0.5375970029435376,
|
|
"grad_norm": 3.77671217918396,
|
|
"learning_rate": 9.90993760572637e-06,
|
|
"loss": 1.2044,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 0.5378645972705378,
|
|
"grad_norm": 3.633802652359009,
|
|
"learning_rate": 9.909770320181306e-06,
|
|
"loss": 1.3179,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.5381321915975381,
|
|
"grad_norm": 3.744126558303833,
|
|
"learning_rate": 9.909602880833458e-06,
|
|
"loss": 1.1907,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 0.5383997859245384,
|
|
"grad_norm": 3.903366804122925,
|
|
"learning_rate": 9.909435287688065e-06,
|
|
"loss": 1.1737,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 0.5386673802515387,
|
|
"grad_norm": 3.7042882442474365,
|
|
"learning_rate": 9.90926754075038e-06,
|
|
"loss": 1.2119,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 0.5389349745785389,
|
|
"grad_norm": 3.662655830383301,
|
|
"learning_rate": 9.90909964002566e-06,
|
|
"loss": 1.1722,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 0.5392025689055392,
|
|
"grad_norm": 3.9184234142303467,
|
|
"learning_rate": 9.90893158551916e-06,
|
|
"loss": 1.176,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 0.5394701632325395,
|
|
"grad_norm": 3.6793618202209473,
|
|
"learning_rate": 9.90876337723615e-06,
|
|
"loss": 1.1942,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 0.5397377575595398,
|
|
"grad_norm": 3.438577175140381,
|
|
"learning_rate": 9.908595015181893e-06,
|
|
"loss": 1.0737,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 0.54000535188654,
|
|
"grad_norm": 3.8159797191619873,
|
|
"learning_rate": 9.908426499361668e-06,
|
|
"loss": 1.2024,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 0.5402729462135403,
|
|
"grad_norm": 3.6021339893341064,
|
|
"learning_rate": 9.908257829780752e-06,
|
|
"loss": 1.0793,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 0.5405405405405406,
|
|
"grad_norm": 3.960874319076538,
|
|
"learning_rate": 9.908089006444427e-06,
|
|
"loss": 1.2732,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.5408081348675408,
|
|
"grad_norm": 3.724120616912842,
|
|
"learning_rate": 9.907920029357986e-06,
|
|
"loss": 1.1938,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 0.541075729194541,
|
|
"grad_norm": 3.7229902744293213,
|
|
"learning_rate": 9.90775089852672e-06,
|
|
"loss": 1.124,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 0.5413433235215414,
|
|
"grad_norm": 3.4035604000091553,
|
|
"learning_rate": 9.907581613955924e-06,
|
|
"loss": 1.0212,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 0.5416109178485417,
|
|
"grad_norm": 3.6770973205566406,
|
|
"learning_rate": 9.907412175650905e-06,
|
|
"loss": 1.0409,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 0.5418785121755418,
|
|
"grad_norm": 3.5748701095581055,
|
|
"learning_rate": 9.907242583616972e-06,
|
|
"loss": 1.0902,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.5421461065025421,
|
|
"grad_norm": 4.297303199768066,
|
|
"learning_rate": 9.907072837859434e-06,
|
|
"loss": 1.1205,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 0.5424137008295424,
|
|
"grad_norm": 3.765982151031494,
|
|
"learning_rate": 9.90690293838361e-06,
|
|
"loss": 1.2402,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 0.5426812951565427,
|
|
"grad_norm": 3.948046922683716,
|
|
"learning_rate": 9.906732885194821e-06,
|
|
"loss": 1.2607,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 0.5429488894835429,
|
|
"grad_norm": 3.599590539932251,
|
|
"learning_rate": 9.906562678298394e-06,
|
|
"loss": 1.113,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 0.5432164838105432,
|
|
"grad_norm": 3.43281626701355,
|
|
"learning_rate": 9.906392317699665e-06,
|
|
"loss": 1.0782,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.5434840781375435,
|
|
"grad_norm": 3.7561564445495605,
|
|
"learning_rate": 9.906221803403967e-06,
|
|
"loss": 1.2796,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 0.5437516724645437,
|
|
"grad_norm": 3.3608622550964355,
|
|
"learning_rate": 9.90605113541664e-06,
|
|
"loss": 1.1263,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 0.544019266791544,
|
|
"grad_norm": 3.4457077980041504,
|
|
"learning_rate": 9.905880313743035e-06,
|
|
"loss": 1.1016,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 0.5442868611185443,
|
|
"grad_norm": 3.601628065109253,
|
|
"learning_rate": 9.905709338388499e-06,
|
|
"loss": 1.1218,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 0.5445544554455446,
|
|
"grad_norm": 3.8327248096466064,
|
|
"learning_rate": 9.90553820935839e-06,
|
|
"loss": 1.0964,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 0.5448220497725448,
|
|
"grad_norm": 3.6931264400482178,
|
|
"learning_rate": 9.905366926658068e-06,
|
|
"loss": 1.2855,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 0.5450896440995451,
|
|
"grad_norm": 3.9936089515686035,
|
|
"learning_rate": 9.9051954902929e-06,
|
|
"loss": 1.2546,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 0.5453572384265454,
|
|
"grad_norm": 4.2173991203308105,
|
|
"learning_rate": 9.905023900268255e-06,
|
|
"loss": 1.2468,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 0.5456248327535457,
|
|
"grad_norm": 3.5092899799346924,
|
|
"learning_rate": 9.904852156589508e-06,
|
|
"loss": 1.0156,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 0.5458924270805459,
|
|
"grad_norm": 3.5375232696533203,
|
|
"learning_rate": 9.90468025926204e-06,
|
|
"loss": 1.1003,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.5461600214075462,
|
|
"grad_norm": 3.232635974884033,
|
|
"learning_rate": 9.904508208291236e-06,
|
|
"loss": 1.1159,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 0.5464276157345465,
|
|
"grad_norm": 3.6317005157470703,
|
|
"learning_rate": 9.904336003682484e-06,
|
|
"loss": 1.2561,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 0.5466952100615466,
|
|
"grad_norm": 3.4912993907928467,
|
|
"learning_rate": 9.90416364544118e-06,
|
|
"loss": 1.2616,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 0.5469628043885469,
|
|
"grad_norm": 3.744119882583618,
|
|
"learning_rate": 9.903991133572722e-06,
|
|
"loss": 1.1231,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 0.5472303987155472,
|
|
"grad_norm": 3.8304286003112793,
|
|
"learning_rate": 9.903818468082515e-06,
|
|
"loss": 1.1488,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 0.5474979930425475,
|
|
"grad_norm": 4.009277820587158,
|
|
"learning_rate": 9.903645648975967e-06,
|
|
"loss": 1.1767,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 0.5477655873695477,
|
|
"grad_norm": 3.655991315841675,
|
|
"learning_rate": 9.903472676258494e-06,
|
|
"loss": 1.1274,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 0.548033181696548,
|
|
"grad_norm": 3.522969961166382,
|
|
"learning_rate": 9.903299549935514e-06,
|
|
"loss": 1.0944,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 0.5483007760235483,
|
|
"grad_norm": 3.9753992557525635,
|
|
"learning_rate": 9.903126270012446e-06,
|
|
"loss": 1.1597,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 0.5485683703505486,
|
|
"grad_norm": 3.4897477626800537,
|
|
"learning_rate": 9.902952836494724e-06,
|
|
"loss": 1.213,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.5488359646775488,
|
|
"grad_norm": 3.901291847229004,
|
|
"learning_rate": 9.902779249387777e-06,
|
|
"loss": 1.1803,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 0.5491035590045491,
|
|
"grad_norm": 3.8959672451019287,
|
|
"learning_rate": 9.902605508697045e-06,
|
|
"loss": 1.1598,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 0.5493711533315494,
|
|
"grad_norm": 3.5497238636016846,
|
|
"learning_rate": 9.90243161442797e-06,
|
|
"loss": 1.1193,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 0.5496387476585496,
|
|
"grad_norm": 3.6735222339630127,
|
|
"learning_rate": 9.902257566585997e-06,
|
|
"loss": 1.2209,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 0.5499063419855499,
|
|
"grad_norm": 3.902233123779297,
|
|
"learning_rate": 9.902083365176583e-06,
|
|
"loss": 1.2137,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 0.5501739363125502,
|
|
"grad_norm": 3.133127212524414,
|
|
"learning_rate": 9.90190901020518e-06,
|
|
"loss": 1.0771,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 0.5504415306395505,
|
|
"grad_norm": 3.489025115966797,
|
|
"learning_rate": 9.901734501677254e-06,
|
|
"loss": 1.0428,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 0.5507091249665507,
|
|
"grad_norm": 3.8350815773010254,
|
|
"learning_rate": 9.90155983959827e-06,
|
|
"loss": 1.1333,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 0.550976719293551,
|
|
"grad_norm": 3.393089771270752,
|
|
"learning_rate": 9.901385023973698e-06,
|
|
"loss": 1.1228,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 0.5512443136205513,
|
|
"grad_norm": 3.4112391471862793,
|
|
"learning_rate": 9.901210054809015e-06,
|
|
"loss": 1.0732,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.5515119079475516,
|
|
"grad_norm": 3.4298675060272217,
|
|
"learning_rate": 9.901034932109702e-06,
|
|
"loss": 1.1072,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 0.5517795022745517,
|
|
"grad_norm": 3.8485376834869385,
|
|
"learning_rate": 9.900859655881248e-06,
|
|
"loss": 1.2126,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 0.552047096601552,
|
|
"grad_norm": 3.713818073272705,
|
|
"learning_rate": 9.90068422612914e-06,
|
|
"loss": 1.0999,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 0.5523146909285523,
|
|
"grad_norm": 3.7916266918182373,
|
|
"learning_rate": 9.900508642858874e-06,
|
|
"loss": 1.2815,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 0.5525822852555525,
|
|
"grad_norm": 3.449904203414917,
|
|
"learning_rate": 9.900332906075951e-06,
|
|
"loss": 1.056,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 0.5528498795825528,
|
|
"grad_norm": 3.417433500289917,
|
|
"learning_rate": 9.900157015785876e-06,
|
|
"loss": 0.9831,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 0.5531174739095531,
|
|
"grad_norm": 4.198076248168945,
|
|
"learning_rate": 9.899980971994158e-06,
|
|
"loss": 1.2668,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 0.5533850682365534,
|
|
"grad_norm": 3.3924946784973145,
|
|
"learning_rate": 9.899804774706314e-06,
|
|
"loss": 1.1,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 0.5536526625635536,
|
|
"grad_norm": 3.6874961853027344,
|
|
"learning_rate": 9.899628423927861e-06,
|
|
"loss": 1.2336,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 0.5539202568905539,
|
|
"grad_norm": 3.614410161972046,
|
|
"learning_rate": 9.899451919664325e-06,
|
|
"loss": 1.1163,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.5541878512175542,
|
|
"grad_norm": 3.503385066986084,
|
|
"learning_rate": 9.899275261921236e-06,
|
|
"loss": 1.1361,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 0.5544554455445545,
|
|
"grad_norm": 3.857766628265381,
|
|
"learning_rate": 9.899098450704125e-06,
|
|
"loss": 1.2757,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 0.5547230398715547,
|
|
"grad_norm": 3.7878856658935547,
|
|
"learning_rate": 9.898921486018532e-06,
|
|
"loss": 1.1438,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 0.554990634198555,
|
|
"grad_norm": 3.2437705993652344,
|
|
"learning_rate": 9.898744367870001e-06,
|
|
"loss": 1.0622,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 0.5552582285255553,
|
|
"grad_norm": 3.193298816680908,
|
|
"learning_rate": 9.898567096264082e-06,
|
|
"loss": 1.0384,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.5555258228525555,
|
|
"grad_norm": 3.3267760276794434,
|
|
"learning_rate": 9.898389671206324e-06,
|
|
"loss": 1.0635,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 0.5557934171795558,
|
|
"grad_norm": 3.255155086517334,
|
|
"learning_rate": 9.898212092702288e-06,
|
|
"loss": 1.0574,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 0.5560610115065561,
|
|
"grad_norm": 3.881344795227051,
|
|
"learning_rate": 9.898034360757538e-06,
|
|
"loss": 1.2048,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 0.5563286058335564,
|
|
"grad_norm": 3.6974213123321533,
|
|
"learning_rate": 9.897856475377638e-06,
|
|
"loss": 1.2133,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 0.5565962001605566,
|
|
"grad_norm": 3.4741365909576416,
|
|
"learning_rate": 9.897678436568164e-06,
|
|
"loss": 1.1787,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.5568637944875569,
|
|
"grad_norm": 3.6926300525665283,
|
|
"learning_rate": 9.89750024433469e-06,
|
|
"loss": 1.1299,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 0.5571313888145571,
|
|
"grad_norm": 3.9215118885040283,
|
|
"learning_rate": 9.8973218986828e-06,
|
|
"loss": 1.2031,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 0.5573989831415574,
|
|
"grad_norm": 3.4052512645721436,
|
|
"learning_rate": 9.897143399618081e-06,
|
|
"loss": 1.1094,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 0.5576665774685576,
|
|
"grad_norm": 3.8671302795410156,
|
|
"learning_rate": 9.896964747146125e-06,
|
|
"loss": 1.2339,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 0.5579341717955579,
|
|
"grad_norm": 3.723543643951416,
|
|
"learning_rate": 9.896785941272524e-06,
|
|
"loss": 1.2115,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 0.5582017661225582,
|
|
"grad_norm": 3.7372453212738037,
|
|
"learning_rate": 9.896606982002886e-06,
|
|
"loss": 1.1701,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 0.5584693604495584,
|
|
"grad_norm": 3.7154757976531982,
|
|
"learning_rate": 9.896427869342812e-06,
|
|
"loss": 1.0744,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 0.5587369547765587,
|
|
"grad_norm": 3.733175039291382,
|
|
"learning_rate": 9.896248603297915e-06,
|
|
"loss": 1.0824,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 0.559004549103559,
|
|
"grad_norm": 3.589911460876465,
|
|
"learning_rate": 9.896069183873809e-06,
|
|
"loss": 1.1208,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 0.5592721434305593,
|
|
"grad_norm": 3.778308868408203,
|
|
"learning_rate": 9.895889611076119e-06,
|
|
"loss": 1.2553,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.5595397377575595,
|
|
"grad_norm": 3.737415313720703,
|
|
"learning_rate": 9.895709884910464e-06,
|
|
"loss": 1.1706,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 0.5598073320845598,
|
|
"grad_norm": 3.6272811889648438,
|
|
"learning_rate": 9.895530005382478e-06,
|
|
"loss": 1.099,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 0.5600749264115601,
|
|
"grad_norm": 3.2790331840515137,
|
|
"learning_rate": 9.895349972497796e-06,
|
|
"loss": 0.9707,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 0.5603425207385604,
|
|
"grad_norm": 3.618961811065674,
|
|
"learning_rate": 9.895169786262055e-06,
|
|
"loss": 1.1975,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 0.5606101150655606,
|
|
"grad_norm": 3.8534488677978516,
|
|
"learning_rate": 9.894989446680901e-06,
|
|
"loss": 1.2889,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 0.5608777093925609,
|
|
"grad_norm": 3.748040199279785,
|
|
"learning_rate": 9.894808953759984e-06,
|
|
"loss": 1.1573,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 0.5611453037195612,
|
|
"grad_norm": 3.8036909103393555,
|
|
"learning_rate": 9.894628307504959e-06,
|
|
"loss": 1.0905,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 0.5614128980465614,
|
|
"grad_norm": 3.3763818740844727,
|
|
"learning_rate": 9.894447507921482e-06,
|
|
"loss": 0.9967,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 0.5616804923735617,
|
|
"grad_norm": 3.4757957458496094,
|
|
"learning_rate": 9.894266555015218e-06,
|
|
"loss": 1.199,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 0.561948086700562,
|
|
"grad_norm": 3.526400089263916,
|
|
"learning_rate": 9.894085448791836e-06,
|
|
"loss": 1.3028,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.5622156810275623,
|
|
"grad_norm": 3.7849979400634766,
|
|
"learning_rate": 9.89390418925701e-06,
|
|
"loss": 1.166,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 0.5624832753545624,
|
|
"grad_norm": 3.8639450073242188,
|
|
"learning_rate": 9.893722776416415e-06,
|
|
"loss": 1.1507,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 0.5627508696815627,
|
|
"grad_norm": 3.6054041385650635,
|
|
"learning_rate": 9.893541210275736e-06,
|
|
"loss": 1.216,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 0.563018464008563,
|
|
"grad_norm": 3.552934408187866,
|
|
"learning_rate": 9.893359490840662e-06,
|
|
"loss": 1.2079,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 0.5632860583355633,
|
|
"grad_norm": 3.804652690887451,
|
|
"learning_rate": 9.893177618116885e-06,
|
|
"loss": 1.2398,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 0.5635536526625635,
|
|
"grad_norm": 3.506537675857544,
|
|
"learning_rate": 9.892995592110099e-06,
|
|
"loss": 1.1581,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 0.5638212469895638,
|
|
"grad_norm": 3.4469141960144043,
|
|
"learning_rate": 9.89281341282601e-06,
|
|
"loss": 1.1475,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 0.5640888413165641,
|
|
"grad_norm": 3.478013753890991,
|
|
"learning_rate": 9.892631080270325e-06,
|
|
"loss": 1.2376,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 0.5643564356435643,
|
|
"grad_norm": 3.774752378463745,
|
|
"learning_rate": 9.89244859444875e-06,
|
|
"loss": 1.1787,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 0.5646240299705646,
|
|
"grad_norm": 3.786384344100952,
|
|
"learning_rate": 9.89226595536701e-06,
|
|
"loss": 1.2119,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.5648916242975649,
|
|
"grad_norm": 3.7795796394348145,
|
|
"learning_rate": 9.892083163030822e-06,
|
|
"loss": 1.1884,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 0.5651592186245652,
|
|
"grad_norm": 3.500213146209717,
|
|
"learning_rate": 9.89190021744591e-06,
|
|
"loss": 1.01,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 0.5654268129515654,
|
|
"grad_norm": 3.490860939025879,
|
|
"learning_rate": 9.891717118618008e-06,
|
|
"loss": 1.1551,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 0.5656944072785657,
|
|
"grad_norm": 3.658153772354126,
|
|
"learning_rate": 9.891533866552852e-06,
|
|
"loss": 1.2155,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 0.565962001605566,
|
|
"grad_norm": 3.7145233154296875,
|
|
"learning_rate": 9.891350461256179e-06,
|
|
"loss": 1.2243,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 0.5662295959325663,
|
|
"grad_norm": 3.5172886848449707,
|
|
"learning_rate": 9.89116690273374e-06,
|
|
"loss": 1.206,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 0.5664971902595665,
|
|
"grad_norm": 3.58321475982666,
|
|
"learning_rate": 9.890983190991278e-06,
|
|
"loss": 1.2536,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 0.5667647845865668,
|
|
"grad_norm": 3.534895420074463,
|
|
"learning_rate": 9.890799326034556e-06,
|
|
"loss": 1.1384,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 0.5670323789135671,
|
|
"grad_norm": 3.564685583114624,
|
|
"learning_rate": 9.890615307869326e-06,
|
|
"loss": 1.1677,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 0.5672999732405672,
|
|
"grad_norm": 4.110241413116455,
|
|
"learning_rate": 9.89043113650136e-06,
|
|
"loss": 1.1706,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.5675675675675675,
|
|
"grad_norm": 3.5671589374542236,
|
|
"learning_rate": 9.890246811936421e-06,
|
|
"loss": 1.1117,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 0.5678351618945678,
|
|
"grad_norm": 3.429584264755249,
|
|
"learning_rate": 9.890062334180286e-06,
|
|
"loss": 1.1273,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 0.5681027562215681,
|
|
"grad_norm": 3.8296971321105957,
|
|
"learning_rate": 9.889877703238732e-06,
|
|
"loss": 1.2361,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 0.5683703505485683,
|
|
"grad_norm": 3.43332839012146,
|
|
"learning_rate": 9.889692919117546e-06,
|
|
"loss": 1.0847,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 0.5686379448755686,
|
|
"grad_norm": 3.417013168334961,
|
|
"learning_rate": 9.889507981822515e-06,
|
|
"loss": 1.1709,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.5689055392025689,
|
|
"grad_norm": 3.507187843322754,
|
|
"learning_rate": 9.88932289135943e-06,
|
|
"loss": 1.1102,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 0.5691731335295692,
|
|
"grad_norm": 3.821469783782959,
|
|
"learning_rate": 9.889137647734094e-06,
|
|
"loss": 1.1736,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 0.5694407278565694,
|
|
"grad_norm": 3.63112735748291,
|
|
"learning_rate": 9.888952250952305e-06,
|
|
"loss": 1.1239,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 0.5697083221835697,
|
|
"grad_norm": 4.068948745727539,
|
|
"learning_rate": 9.888766701019873e-06,
|
|
"loss": 1.2714,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 0.56997591651057,
|
|
"grad_norm": 3.552907943725586,
|
|
"learning_rate": 9.88858099794261e-06,
|
|
"loss": 1.0754,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.5702435108375702,
|
|
"grad_norm": 4.019528388977051,
|
|
"learning_rate": 9.888395141726335e-06,
|
|
"loss": 1.2183,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 0.5705111051645705,
|
|
"grad_norm": 3.55165696144104,
|
|
"learning_rate": 9.888209132376866e-06,
|
|
"loss": 1.0137,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 0.5707786994915708,
|
|
"grad_norm": 3.8330440521240234,
|
|
"learning_rate": 9.888022969900036e-06,
|
|
"loss": 1.2188,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 0.5710462938185711,
|
|
"grad_norm": 3.5315418243408203,
|
|
"learning_rate": 9.887836654301671e-06,
|
|
"loss": 1.1769,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 0.5713138881455713,
|
|
"grad_norm": 3.613337755203247,
|
|
"learning_rate": 9.887650185587612e-06,
|
|
"loss": 1.1539,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 0.5715814824725716,
|
|
"grad_norm": 3.3528521060943604,
|
|
"learning_rate": 9.887463563763695e-06,
|
|
"loss": 1.1673,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 0.5718490767995719,
|
|
"grad_norm": 3.672227382659912,
|
|
"learning_rate": 9.887276788835772e-06,
|
|
"loss": 1.3125,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 0.5721166711265722,
|
|
"grad_norm": 3.4449851512908936,
|
|
"learning_rate": 9.88708986080969e-06,
|
|
"loss": 1.1545,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 0.5723842654535723,
|
|
"grad_norm": 3.5263442993164062,
|
|
"learning_rate": 9.886902779691306e-06,
|
|
"loss": 1.1188,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 0.5726518597805726,
|
|
"grad_norm": 3.499302864074707,
|
|
"learning_rate": 9.88671554548648e-06,
|
|
"loss": 1.2045,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.572919454107573,
|
|
"grad_norm": 3.5615437030792236,
|
|
"learning_rate": 9.886528158201076e-06,
|
|
"loss": 1.1357,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 0.5731870484345731,
|
|
"grad_norm": 3.0443129539489746,
|
|
"learning_rate": 9.886340617840968e-06,
|
|
"loss": 0.9957,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 0.5734546427615734,
|
|
"grad_norm": 3.494044542312622,
|
|
"learning_rate": 9.886152924412027e-06,
|
|
"loss": 1.1044,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 0.5737222370885737,
|
|
"grad_norm": 3.9444684982299805,
|
|
"learning_rate": 9.885965077920135e-06,
|
|
"loss": 1.2436,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 0.573989831415574,
|
|
"grad_norm": 3.808692455291748,
|
|
"learning_rate": 9.885777078371174e-06,
|
|
"loss": 1.2591,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 0.5742574257425742,
|
|
"grad_norm": 3.586069107055664,
|
|
"learning_rate": 9.885588925771037e-06,
|
|
"loss": 1.1695,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 0.5745250200695745,
|
|
"grad_norm": 3.6232335567474365,
|
|
"learning_rate": 9.885400620125616e-06,
|
|
"loss": 1.2411,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 0.5747926143965748,
|
|
"grad_norm": 4.283682346343994,
|
|
"learning_rate": 9.885212161440808e-06,
|
|
"loss": 1.2519,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 0.5750602087235751,
|
|
"grad_norm": 3.391270160675049,
|
|
"learning_rate": 9.885023549722518e-06,
|
|
"loss": 1.1671,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 0.5753278030505753,
|
|
"grad_norm": 3.8860385417938232,
|
|
"learning_rate": 9.884834784976658e-06,
|
|
"loss": 1.1987,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.5755953973775756,
|
|
"grad_norm": 3.611828565597534,
|
|
"learning_rate": 9.884645867209133e-06,
|
|
"loss": 1.2138,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 0.5758629917045759,
|
|
"grad_norm": 3.7692012786865234,
|
|
"learning_rate": 9.884456796425869e-06,
|
|
"loss": 1.2613,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 0.5761305860315761,
|
|
"grad_norm": 3.578130006790161,
|
|
"learning_rate": 9.884267572632786e-06,
|
|
"loss": 1.2619,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 0.5763981803585764,
|
|
"grad_norm": 3.362647771835327,
|
|
"learning_rate": 9.884078195835812e-06,
|
|
"loss": 1.138,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 0.5766657746855767,
|
|
"grad_norm": 3.4358744621276855,
|
|
"learning_rate": 9.883888666040876e-06,
|
|
"loss": 1.1468,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 0.576933369012577,
|
|
"grad_norm": 3.8814890384674072,
|
|
"learning_rate": 9.88369898325392e-06,
|
|
"loss": 1.2645,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 0.5772009633395772,
|
|
"grad_norm": 3.75591778755188,
|
|
"learning_rate": 9.883509147480883e-06,
|
|
"loss": 1.2342,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 0.5774685576665775,
|
|
"grad_norm": 3.7901089191436768,
|
|
"learning_rate": 9.883319158727714e-06,
|
|
"loss": 1.2423,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 0.5777361519935778,
|
|
"grad_norm": 3.8552255630493164,
|
|
"learning_rate": 9.88312901700036e-06,
|
|
"loss": 1.2367,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 0.578003746320578,
|
|
"grad_norm": 3.6209921836853027,
|
|
"learning_rate": 9.882938722304785e-06,
|
|
"loss": 1.0368,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.5782713406475782,
|
|
"grad_norm": 3.403076171875,
|
|
"learning_rate": 9.882748274646942e-06,
|
|
"loss": 1.122,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 0.5785389349745785,
|
|
"grad_norm": 3.6946861743927,
|
|
"learning_rate": 9.882557674032804e-06,
|
|
"loss": 1.2632,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 0.5788065293015788,
|
|
"grad_norm": 3.478731393814087,
|
|
"learning_rate": 9.882366920468336e-06,
|
|
"loss": 1.1385,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 0.5790741236285791,
|
|
"grad_norm": 3.849747896194458,
|
|
"learning_rate": 9.882176013959517e-06,
|
|
"loss": 1.1953,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 0.5793417179555793,
|
|
"grad_norm": 3.2899606227874756,
|
|
"learning_rate": 9.881984954512325e-06,
|
|
"loss": 1.1515,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 0.5796093122825796,
|
|
"grad_norm": 3.6500260829925537,
|
|
"learning_rate": 9.881793742132748e-06,
|
|
"loss": 1.0992,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 0.5798769066095799,
|
|
"grad_norm": 3.4262735843658447,
|
|
"learning_rate": 9.881602376826773e-06,
|
|
"loss": 1.202,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 0.5801445009365801,
|
|
"grad_norm": 3.7987382411956787,
|
|
"learning_rate": 9.881410858600397e-06,
|
|
"loss": 1.1983,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 0.5804120952635804,
|
|
"grad_norm": 3.716843605041504,
|
|
"learning_rate": 9.88121918745962e-06,
|
|
"loss": 1.2688,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 0.5806796895905807,
|
|
"grad_norm": 3.5449235439300537,
|
|
"learning_rate": 9.881027363410441e-06,
|
|
"loss": 1.1251,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.580947283917581,
|
|
"grad_norm": 3.4094340801239014,
|
|
"learning_rate": 9.880835386458873e-06,
|
|
"loss": 1.1097,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 0.5812148782445812,
|
|
"grad_norm": 3.626004934310913,
|
|
"learning_rate": 9.880643256610931e-06,
|
|
"loss": 1.2376,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 0.5814824725715815,
|
|
"grad_norm": 3.6833388805389404,
|
|
"learning_rate": 9.880450973872632e-06,
|
|
"loss": 1.2113,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 0.5817500668985818,
|
|
"grad_norm": 3.619957685470581,
|
|
"learning_rate": 9.880258538250001e-06,
|
|
"loss": 1.1476,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 0.5820176612255821,
|
|
"grad_norm": 3.6567726135253906,
|
|
"learning_rate": 9.880065949749063e-06,
|
|
"loss": 1.1462,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 0.5822852555525823,
|
|
"grad_norm": 3.6730329990386963,
|
|
"learning_rate": 9.879873208375854e-06,
|
|
"loss": 1.1644,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 0.5825528498795826,
|
|
"grad_norm": 3.4919209480285645,
|
|
"learning_rate": 9.879680314136409e-06,
|
|
"loss": 1.1,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 0.5828204442065829,
|
|
"grad_norm": 3.7565135955810547,
|
|
"learning_rate": 9.879487267036774e-06,
|
|
"loss": 1.176,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 0.583088038533583,
|
|
"grad_norm": 3.6856677532196045,
|
|
"learning_rate": 9.879294067082994e-06,
|
|
"loss": 1.0928,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 0.5833556328605833,
|
|
"grad_norm": 3.7307024002075195,
|
|
"learning_rate": 9.87910071428112e-06,
|
|
"loss": 1.2073,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.5836232271875836,
|
|
"grad_norm": 3.657536745071411,
|
|
"learning_rate": 9.878907208637214e-06,
|
|
"loss": 1.1352,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 0.5838908215145839,
|
|
"grad_norm": 3.54951810836792,
|
|
"learning_rate": 9.878713550157331e-06,
|
|
"loss": 1.1183,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 0.5841584158415841,
|
|
"grad_norm": 3.6624770164489746,
|
|
"learning_rate": 9.878519738847543e-06,
|
|
"loss": 1.1002,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 0.5844260101685844,
|
|
"grad_norm": 3.5097527503967285,
|
|
"learning_rate": 9.87832577471392e-06,
|
|
"loss": 1.2197,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 0.5846936044955847,
|
|
"grad_norm": 3.292865037918091,
|
|
"learning_rate": 9.878131657762535e-06,
|
|
"loss": 1.0721,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 0.584961198822585,
|
|
"grad_norm": 3.937479019165039,
|
|
"learning_rate": 9.877937387999473e-06,
|
|
"loss": 1.3144,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 0.5852287931495852,
|
|
"grad_norm": 3.6731297969818115,
|
|
"learning_rate": 9.877742965430816e-06,
|
|
"loss": 1.0068,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 0.5854963874765855,
|
|
"grad_norm": 3.6115329265594482,
|
|
"learning_rate": 9.877548390062656e-06,
|
|
"loss": 1.1998,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 0.5857639818035858,
|
|
"grad_norm": 3.6412646770477295,
|
|
"learning_rate": 9.87735366190109e-06,
|
|
"loss": 1.1148,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 0.586031576130586,
|
|
"grad_norm": 3.576279401779175,
|
|
"learning_rate": 9.877158780952218e-06,
|
|
"loss": 1.1437,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.5862991704575863,
|
|
"grad_norm": 3.5560824871063232,
|
|
"learning_rate": 9.876963747222142e-06,
|
|
"loss": 1.1313,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 0.5865667647845866,
|
|
"grad_norm": 3.5082075595855713,
|
|
"learning_rate": 9.876768560716972e-06,
|
|
"loss": 1.1694,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 0.5868343591115869,
|
|
"grad_norm": 3.724195718765259,
|
|
"learning_rate": 9.876573221442824e-06,
|
|
"loss": 1.212,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 0.5871019534385871,
|
|
"grad_norm": 3.5083227157592773,
|
|
"learning_rate": 9.876377729405817e-06,
|
|
"loss": 1.1469,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 0.5873695477655874,
|
|
"grad_norm": 3.8225934505462646,
|
|
"learning_rate": 9.876182084612076e-06,
|
|
"loss": 1.2833,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 0.5876371420925877,
|
|
"grad_norm": 4.439055919647217,
|
|
"learning_rate": 9.875986287067726e-06,
|
|
"loss": 1.2021,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 0.587904736419588,
|
|
"grad_norm": 3.612614393234253,
|
|
"learning_rate": 9.875790336778903e-06,
|
|
"loss": 1.1595,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 0.5881723307465881,
|
|
"grad_norm": 3.598160982131958,
|
|
"learning_rate": 9.875594233751746e-06,
|
|
"loss": 1.1245,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 0.5884399250735884,
|
|
"grad_norm": 3.281412124633789,
|
|
"learning_rate": 9.875397977992397e-06,
|
|
"loss": 1.075,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 0.5887075194005887,
|
|
"grad_norm": 3.0353622436523438,
|
|
"learning_rate": 9.875201569507004e-06,
|
|
"loss": 1.0529,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.5889751137275889,
|
|
"grad_norm": 3.3993475437164307,
|
|
"learning_rate": 9.875005008301719e-06,
|
|
"loss": 1.2462,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 0.5892427080545892,
|
|
"grad_norm": 3.3722541332244873,
|
|
"learning_rate": 9.8748082943827e-06,
|
|
"loss": 1.1508,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 0.5895103023815895,
|
|
"grad_norm": 3.270134687423706,
|
|
"learning_rate": 9.874611427756111e-06,
|
|
"loss": 1.2163,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 0.5897778967085898,
|
|
"grad_norm": 3.6814143657684326,
|
|
"learning_rate": 9.874414408428116e-06,
|
|
"loss": 1.2098,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 0.59004549103559,
|
|
"grad_norm": 3.3593337535858154,
|
|
"learning_rate": 9.874217236404889e-06,
|
|
"loss": 1.1041,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 0.5903130853625903,
|
|
"grad_norm": 3.556748628616333,
|
|
"learning_rate": 9.874019911692606e-06,
|
|
"loss": 1.1655,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 0.5905806796895906,
|
|
"grad_norm": 3.696110486984253,
|
|
"learning_rate": 9.873822434297448e-06,
|
|
"loss": 1.0674,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 0.5908482740165909,
|
|
"grad_norm": 3.481388807296753,
|
|
"learning_rate": 9.873624804225602e-06,
|
|
"loss": 1.1462,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 0.5911158683435911,
|
|
"grad_norm": 3.828707695007324,
|
|
"learning_rate": 9.873427021483256e-06,
|
|
"loss": 1.1755,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 0.5913834626705914,
|
|
"grad_norm": 3.4342329502105713,
|
|
"learning_rate": 9.87322908607661e-06,
|
|
"loss": 1.1296,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.5916510569975917,
|
|
"grad_norm": 4.072646141052246,
|
|
"learning_rate": 9.873030998011861e-06,
|
|
"loss": 1.4232,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 0.5919186513245919,
|
|
"grad_norm": 3.8405468463897705,
|
|
"learning_rate": 9.872832757295216e-06,
|
|
"loss": 1.2178,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 0.5921862456515922,
|
|
"grad_norm": 3.6950206756591797,
|
|
"learning_rate": 9.872634363932887e-06,
|
|
"loss": 1.1332,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 0.5924538399785925,
|
|
"grad_norm": 4.053956985473633,
|
|
"learning_rate": 9.872435817931085e-06,
|
|
"loss": 1.3148,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 0.5927214343055928,
|
|
"grad_norm": 3.2921195030212402,
|
|
"learning_rate": 9.87223711929603e-06,
|
|
"loss": 1.0644,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 0.592989028632593,
|
|
"grad_norm": 3.3384501934051514,
|
|
"learning_rate": 9.87203826803395e-06,
|
|
"loss": 1.1453,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 0.5932566229595932,
|
|
"grad_norm": 3.292581558227539,
|
|
"learning_rate": 9.871839264151071e-06,
|
|
"loss": 1.0399,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 0.5935242172865935,
|
|
"grad_norm": 3.5483226776123047,
|
|
"learning_rate": 9.871640107653629e-06,
|
|
"loss": 1.2065,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 0.5937918116135938,
|
|
"grad_norm": 3.443068742752075,
|
|
"learning_rate": 9.87144079854786e-06,
|
|
"loss": 1.1305,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 0.594059405940594,
|
|
"grad_norm": 3.495704412460327,
|
|
"learning_rate": 9.871241336840009e-06,
|
|
"loss": 1.1877,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.5943270002675943,
|
|
"grad_norm": 3.619189739227295,
|
|
"learning_rate": 9.871041722536326e-06,
|
|
"loss": 1.1417,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 0.5945945945945946,
|
|
"grad_norm": 3.7714147567749023,
|
|
"learning_rate": 9.87084195564306e-06,
|
|
"loss": 1.2656,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 0.5948621889215948,
|
|
"grad_norm": 3.2320003509521484,
|
|
"learning_rate": 9.870642036166474e-06,
|
|
"loss": 0.9794,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 0.5951297832485951,
|
|
"grad_norm": 3.6784067153930664,
|
|
"learning_rate": 9.870441964112826e-06,
|
|
"loss": 1.149,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 0.5953973775755954,
|
|
"grad_norm": 3.8272829055786133,
|
|
"learning_rate": 9.870241739488387e-06,
|
|
"loss": 1.2293,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 0.5956649719025957,
|
|
"grad_norm": 3.3917317390441895,
|
|
"learning_rate": 9.870041362299428e-06,
|
|
"loss": 1.0405,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 0.5959325662295959,
|
|
"grad_norm": 3.6060194969177246,
|
|
"learning_rate": 9.869840832552224e-06,
|
|
"loss": 1.2424,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 0.5962001605565962,
|
|
"grad_norm": 3.5458180904388428,
|
|
"learning_rate": 9.86964015025306e-06,
|
|
"loss": 1.2202,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 0.5964677548835965,
|
|
"grad_norm": 3.5996251106262207,
|
|
"learning_rate": 9.86943931540822e-06,
|
|
"loss": 1.2383,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 0.5967353492105968,
|
|
"grad_norm": 3.6928818225860596,
|
|
"learning_rate": 9.869238328023996e-06,
|
|
"loss": 1.0798,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.597002943537597,
|
|
"grad_norm": 3.3863589763641357,
|
|
"learning_rate": 9.869037188106684e-06,
|
|
"loss": 1.0548,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 0.5972705378645973,
|
|
"grad_norm": 3.744899272918701,
|
|
"learning_rate": 9.868835895662588e-06,
|
|
"loss": 1.1532,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 0.5975381321915976,
|
|
"grad_norm": 4.080715656280518,
|
|
"learning_rate": 9.868634450698009e-06,
|
|
"loss": 1.2823,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 0.5978057265185978,
|
|
"grad_norm": 4.020185947418213,
|
|
"learning_rate": 9.868432853219259e-06,
|
|
"loss": 1.3154,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 0.598073320845598,
|
|
"grad_norm": 3.684755325317383,
|
|
"learning_rate": 9.868231103232655e-06,
|
|
"loss": 1.1825,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 0.5983409151725984,
|
|
"grad_norm": 3.9021434783935547,
|
|
"learning_rate": 9.868029200744515e-06,
|
|
"loss": 1.3453,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 0.5986085094995987,
|
|
"grad_norm": 3.224306344985962,
|
|
"learning_rate": 9.867827145761164e-06,
|
|
"loss": 1.1202,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 0.5988761038265988,
|
|
"grad_norm": 3.318912982940674,
|
|
"learning_rate": 9.86762493828893e-06,
|
|
"loss": 1.0726,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 0.5991436981535991,
|
|
"grad_norm": 3.5440762042999268,
|
|
"learning_rate": 9.867422578334154e-06,
|
|
"loss": 1.1485,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 0.5994112924805994,
|
|
"grad_norm": 3.5095126628875732,
|
|
"learning_rate": 9.867220065903167e-06,
|
|
"loss": 1.1142,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.5996788868075997,
|
|
"grad_norm": 3.494436502456665,
|
|
"learning_rate": 9.867017401002316e-06,
|
|
"loss": 1.0809,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 0.5999464811345999,
|
|
"grad_norm": 3.985200881958008,
|
|
"learning_rate": 9.86681458363795e-06,
|
|
"loss": 1.1823,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 0.6002140754616002,
|
|
"grad_norm": 3.566523313522339,
|
|
"learning_rate": 9.866611613816425e-06,
|
|
"loss": 1.2669,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 0.6004816697886005,
|
|
"grad_norm": 3.53113055229187,
|
|
"learning_rate": 9.866408491544095e-06,
|
|
"loss": 1.0821,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 0.6007492641156007,
|
|
"grad_norm": 3.8554863929748535,
|
|
"learning_rate": 9.866205216827323e-06,
|
|
"loss": 1.3485,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 0.601016858442601,
|
|
"grad_norm": 3.4552130699157715,
|
|
"learning_rate": 9.866001789672479e-06,
|
|
"loss": 1.1277,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 0.6012844527696013,
|
|
"grad_norm": 3.2801413536071777,
|
|
"learning_rate": 9.865798210085935e-06,
|
|
"loss": 1.0526,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 0.6015520470966016,
|
|
"grad_norm": 3.6641762256622314,
|
|
"learning_rate": 9.865594478074068e-06,
|
|
"loss": 1.1389,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 0.6018196414236018,
|
|
"grad_norm": 3.1713666915893555,
|
|
"learning_rate": 9.865390593643261e-06,
|
|
"loss": 0.9773,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 0.6020872357506021,
|
|
"grad_norm": 3.2642340660095215,
|
|
"learning_rate": 9.8651865567999e-06,
|
|
"loss": 1.1162,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.6023548300776024,
|
|
"grad_norm": 3.8581626415252686,
|
|
"learning_rate": 9.864982367550375e-06,
|
|
"loss": 1.2288,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 0.6026224244046027,
|
|
"grad_norm": 3.619734525680542,
|
|
"learning_rate": 9.864778025901086e-06,
|
|
"loss": 1.1009,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 0.6028900187316029,
|
|
"grad_norm": 3.6816861629486084,
|
|
"learning_rate": 9.86457353185843e-06,
|
|
"loss": 1.2656,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 0.6031576130586032,
|
|
"grad_norm": 3.9430642127990723,
|
|
"learning_rate": 9.864368885428816e-06,
|
|
"loss": 1.2013,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 0.6034252073856035,
|
|
"grad_norm": 3.3938138484954834,
|
|
"learning_rate": 9.864164086618656e-06,
|
|
"loss": 1.0831,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 0.6036928017126036,
|
|
"grad_norm": 3.3266994953155518,
|
|
"learning_rate": 9.863959135434361e-06,
|
|
"loss": 1.1322,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 0.6039603960396039,
|
|
"grad_norm": 3.3137824535369873,
|
|
"learning_rate": 9.863754031882355e-06,
|
|
"loss": 1.1232,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 0.6042279903666042,
|
|
"grad_norm": 3.363191604614258,
|
|
"learning_rate": 9.863548775969061e-06,
|
|
"loss": 1.0118,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 0.6044955846936045,
|
|
"grad_norm": 3.245950222015381,
|
|
"learning_rate": 9.863343367700909e-06,
|
|
"loss": 1.0168,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 0.6047631790206047,
|
|
"grad_norm": 3.398611545562744,
|
|
"learning_rate": 9.863137807084336e-06,
|
|
"loss": 1.1561,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.605030773347605,
|
|
"grad_norm": 3.793672800064087,
|
|
"learning_rate": 9.862932094125778e-06,
|
|
"loss": 1.0614,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 0.6052983676746053,
|
|
"grad_norm": 3.716275691986084,
|
|
"learning_rate": 9.86272622883168e-06,
|
|
"loss": 1.2033,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 0.6055659620016056,
|
|
"grad_norm": 3.418994903564453,
|
|
"learning_rate": 9.862520211208493e-06,
|
|
"loss": 1.1246,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 0.6058335563286058,
|
|
"grad_norm": 3.4987545013427734,
|
|
"learning_rate": 9.862314041262668e-06,
|
|
"loss": 1.1269,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 0.6061011506556061,
|
|
"grad_norm": 3.595693826675415,
|
|
"learning_rate": 9.862107719000667e-06,
|
|
"loss": 1.1729,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 0.6063687449826064,
|
|
"grad_norm": 3.5446066856384277,
|
|
"learning_rate": 9.861901244428949e-06,
|
|
"loss": 1.1141,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 0.6066363393096066,
|
|
"grad_norm": 3.108658790588379,
|
|
"learning_rate": 9.861694617553983e-06,
|
|
"loss": 1.0365,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 0.6069039336366069,
|
|
"grad_norm": 3.6176912784576416,
|
|
"learning_rate": 9.861487838382244e-06,
|
|
"loss": 1.182,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 0.6071715279636072,
|
|
"grad_norm": 3.7221384048461914,
|
|
"learning_rate": 9.861280906920208e-06,
|
|
"loss": 1.0479,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 0.6074391222906075,
|
|
"grad_norm": 3.526144504547119,
|
|
"learning_rate": 9.861073823174357e-06,
|
|
"loss": 1.0778,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.6077067166176077,
|
|
"grad_norm": 3.49381160736084,
|
|
"learning_rate": 9.86086658715118e-06,
|
|
"loss": 1.0654,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 0.607974310944608,
|
|
"grad_norm": 3.485805034637451,
|
|
"learning_rate": 9.860659198857166e-06,
|
|
"loss": 1.138,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 0.6082419052716083,
|
|
"grad_norm": 3.3944783210754395,
|
|
"learning_rate": 9.860451658298813e-06,
|
|
"loss": 1.1153,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 0.6085094995986086,
|
|
"grad_norm": 3.5149385929107666,
|
|
"learning_rate": 9.860243965482623e-06,
|
|
"loss": 1.1654,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 0.6087770939256087,
|
|
"grad_norm": 3.7925617694854736,
|
|
"learning_rate": 9.860036120415102e-06,
|
|
"loss": 1.2223,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 0.609044688252609,
|
|
"grad_norm": 3.524855852127075,
|
|
"learning_rate": 9.859828123102759e-06,
|
|
"loss": 1.0867,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 0.6093122825796093,
|
|
"grad_norm": 3.534085750579834,
|
|
"learning_rate": 9.859619973552112e-06,
|
|
"loss": 1.0719,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 0.6095798769066095,
|
|
"grad_norm": 3.5767481327056885,
|
|
"learning_rate": 9.859411671769682e-06,
|
|
"loss": 1.2826,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 0.6098474712336098,
|
|
"grad_norm": 3.26108980178833,
|
|
"learning_rate": 9.859203217761993e-06,
|
|
"loss": 1.0839,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 0.6101150655606101,
|
|
"grad_norm": 3.6995849609375,
|
|
"learning_rate": 9.858994611535572e-06,
|
|
"loss": 1.2193,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.6103826598876104,
|
|
"grad_norm": 3.7640321254730225,
|
|
"learning_rate": 9.858785853096958e-06,
|
|
"loss": 1.2932,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 0.6106502542146106,
|
|
"grad_norm": 3.795732021331787,
|
|
"learning_rate": 9.85857694245269e-06,
|
|
"loss": 1.3135,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 0.6109178485416109,
|
|
"grad_norm": 3.552950620651245,
|
|
"learning_rate": 9.858367879609311e-06,
|
|
"loss": 1.106,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 0.6111854428686112,
|
|
"grad_norm": 3.506056547164917,
|
|
"learning_rate": 9.85815866457337e-06,
|
|
"loss": 1.0596,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 0.6114530371956115,
|
|
"grad_norm": 3.822715997695923,
|
|
"learning_rate": 9.857949297351423e-06,
|
|
"loss": 1.1044,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 0.6117206315226117,
|
|
"grad_norm": 3.263763427734375,
|
|
"learning_rate": 9.857739777950026e-06,
|
|
"loss": 1.0387,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 0.611988225849612,
|
|
"grad_norm": 3.378865957260132,
|
|
"learning_rate": 9.857530106375743e-06,
|
|
"loss": 1.0867,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 0.6122558201766123,
|
|
"grad_norm": 3.8504269123077393,
|
|
"learning_rate": 9.857320282635143e-06,
|
|
"loss": 1.2017,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 0.6125234145036125,
|
|
"grad_norm": 3.375674247741699,
|
|
"learning_rate": 9.857110306734798e-06,
|
|
"loss": 1.0055,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 0.6127910088306128,
|
|
"grad_norm": 3.5643208026885986,
|
|
"learning_rate": 9.856900178681287e-06,
|
|
"loss": 1.2542,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.6130586031576131,
|
|
"grad_norm": 3.1768534183502197,
|
|
"learning_rate": 9.856689898481191e-06,
|
|
"loss": 0.9672,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 0.6133261974846134,
|
|
"grad_norm": 3.2512409687042236,
|
|
"learning_rate": 9.856479466141098e-06,
|
|
"loss": 1.065,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 0.6135937918116136,
|
|
"grad_norm": 3.486975908279419,
|
|
"learning_rate": 9.8562688816676e-06,
|
|
"loss": 1.2013,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 0.6138613861386139,
|
|
"grad_norm": 3.7750918865203857,
|
|
"learning_rate": 9.856058145067293e-06,
|
|
"loss": 1.2465,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 0.6141289804656141,
|
|
"grad_norm": 4.267007827758789,
|
|
"learning_rate": 9.85584725634678e-06,
|
|
"loss": 1.2918,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 0.6143965747926144,
|
|
"grad_norm": 3.3109710216522217,
|
|
"learning_rate": 9.855636215512666e-06,
|
|
"loss": 1.0852,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 0.6146641691196146,
|
|
"grad_norm": 3.4727590084075928,
|
|
"learning_rate": 9.85542502257156e-06,
|
|
"loss": 1.1332,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 0.6149317634466149,
|
|
"grad_norm": 3.525007724761963,
|
|
"learning_rate": 9.855213677530083e-06,
|
|
"loss": 1.1852,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 0.6151993577736152,
|
|
"grad_norm": 4.421526908874512,
|
|
"learning_rate": 9.85500218039485e-06,
|
|
"loss": 1.3733,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 0.6154669521006154,
|
|
"grad_norm": 3.6387100219726562,
|
|
"learning_rate": 9.854790531172491e-06,
|
|
"loss": 1.1027,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.6157345464276157,
|
|
"grad_norm": 3.6601171493530273,
|
|
"learning_rate": 9.854578729869634e-06,
|
|
"loss": 1.1533,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 0.616002140754616,
|
|
"grad_norm": 3.661722183227539,
|
|
"learning_rate": 9.854366776492915e-06,
|
|
"loss": 1.0665,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 0.6162697350816163,
|
|
"grad_norm": 3.5786993503570557,
|
|
"learning_rate": 9.85415467104897e-06,
|
|
"loss": 1.2227,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 0.6165373294086165,
|
|
"grad_norm": 3.527582883834839,
|
|
"learning_rate": 9.853942413544448e-06,
|
|
"loss": 1.2771,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 0.6168049237356168,
|
|
"grad_norm": 3.9386675357818604,
|
|
"learning_rate": 9.853730003985995e-06,
|
|
"loss": 1.2679,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 0.6170725180626171,
|
|
"grad_norm": 3.3264570236206055,
|
|
"learning_rate": 9.853517442380266e-06,
|
|
"loss": 1.093,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 0.6173401123896174,
|
|
"grad_norm": 3.631671905517578,
|
|
"learning_rate": 9.85330472873392e-06,
|
|
"loss": 1.1611,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 0.6176077067166176,
|
|
"grad_norm": 3.9412624835968018,
|
|
"learning_rate": 9.853091863053621e-06,
|
|
"loss": 1.198,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 0.6178753010436179,
|
|
"grad_norm": 3.4055187702178955,
|
|
"learning_rate": 9.852878845346035e-06,
|
|
"loss": 1.0783,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 0.6181428953706182,
|
|
"grad_norm": 3.639285087585449,
|
|
"learning_rate": 9.852665675617837e-06,
|
|
"loss": 1.2475,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.6184104896976184,
|
|
"grad_norm": 3.5802559852600098,
|
|
"learning_rate": 9.852452353875705e-06,
|
|
"loss": 1.1369,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 0.6186780840246187,
|
|
"grad_norm": 3.259661912918091,
|
|
"learning_rate": 9.852238880126319e-06,
|
|
"loss": 1.0025,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 0.618945678351619,
|
|
"grad_norm": 3.9171831607818604,
|
|
"learning_rate": 9.852025254376367e-06,
|
|
"loss": 1.2405,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 0.6192132726786193,
|
|
"grad_norm": 3.7371790409088135,
|
|
"learning_rate": 9.851811476632544e-06,
|
|
"loss": 1.2399,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 0.6194808670056194,
|
|
"grad_norm": 3.9764063358306885,
|
|
"learning_rate": 9.851597546901543e-06,
|
|
"loss": 1.3006,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 0.6197484613326197,
|
|
"grad_norm": 3.6764659881591797,
|
|
"learning_rate": 9.851383465190068e-06,
|
|
"loss": 1.1916,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 0.62001605565962,
|
|
"grad_norm": 3.4635825157165527,
|
|
"learning_rate": 9.851169231504825e-06,
|
|
"loss": 1.0243,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 0.6202836499866203,
|
|
"grad_norm": 3.5511868000030518,
|
|
"learning_rate": 9.850954845852522e-06,
|
|
"loss": 1.1825,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 0.6205512443136205,
|
|
"grad_norm": 3.948732376098633,
|
|
"learning_rate": 9.85074030823988e-06,
|
|
"loss": 1.3428,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 0.6208188386406208,
|
|
"grad_norm": 3.748976469039917,
|
|
"learning_rate": 9.850525618673615e-06,
|
|
"loss": 1.2,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.6210864329676211,
|
|
"grad_norm": 3.6761586666107178,
|
|
"learning_rate": 9.850310777160454e-06,
|
|
"loss": 1.2541,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 0.6213540272946213,
|
|
"grad_norm": 3.328855514526367,
|
|
"learning_rate": 9.85009578370713e-06,
|
|
"loss": 1.0451,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 0.6216216216216216,
|
|
"grad_norm": 3.2399799823760986,
|
|
"learning_rate": 9.849880638320372e-06,
|
|
"loss": 1.0936,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 0.6218892159486219,
|
|
"grad_norm": 3.481745481491089,
|
|
"learning_rate": 9.849665341006924e-06,
|
|
"loss": 1.2136,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 0.6221568102756222,
|
|
"grad_norm": 3.219832181930542,
|
|
"learning_rate": 9.849449891773529e-06,
|
|
"loss": 1.059,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 0.6224244046026224,
|
|
"grad_norm": 3.4119327068328857,
|
|
"learning_rate": 9.849234290626937e-06,
|
|
"loss": 1.0072,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 0.6226919989296227,
|
|
"grad_norm": 3.2931737899780273,
|
|
"learning_rate": 9.8490185375739e-06,
|
|
"loss": 1.169,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 0.622959593256623,
|
|
"grad_norm": 4.0458760261535645,
|
|
"learning_rate": 9.848802632621177e-06,
|
|
"loss": 1.2028,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 0.6232271875836233,
|
|
"grad_norm": 3.2181153297424316,
|
|
"learning_rate": 9.848586575775534e-06,
|
|
"loss": 0.9779,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 0.6234947819106235,
|
|
"grad_norm": 3.359768867492676,
|
|
"learning_rate": 9.848370367043737e-06,
|
|
"loss": 1.0074,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.6237623762376238,
|
|
"grad_norm": 3.5515081882476807,
|
|
"learning_rate": 9.848154006432559e-06,
|
|
"loss": 1.0557,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 0.6240299705646241,
|
|
"grad_norm": 4.038802623748779,
|
|
"learning_rate": 9.847937493948778e-06,
|
|
"loss": 1.1691,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 0.6242975648916242,
|
|
"grad_norm": 3.4252140522003174,
|
|
"learning_rate": 9.847720829599177e-06,
|
|
"loss": 1.0728,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 0.6245651592186245,
|
|
"grad_norm": 3.5178418159484863,
|
|
"learning_rate": 9.847504013390542e-06,
|
|
"loss": 1.0433,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 0.6248327535456248,
|
|
"grad_norm": 4.008810043334961,
|
|
"learning_rate": 9.847287045329665e-06,
|
|
"loss": 1.2534,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 0.6251003478726251,
|
|
"grad_norm": 3.4519779682159424,
|
|
"learning_rate": 9.847069925423342e-06,
|
|
"loss": 1.2137,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 0.6253679421996253,
|
|
"grad_norm": 3.9247629642486572,
|
|
"learning_rate": 9.846852653678377e-06,
|
|
"loss": 1.0946,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 0.6256355365266256,
|
|
"grad_norm": 3.3218302726745605,
|
|
"learning_rate": 9.846635230101578e-06,
|
|
"loss": 0.992,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 0.6259031308536259,
|
|
"grad_norm": 3.259517192840576,
|
|
"learning_rate": 9.846417654699748e-06,
|
|
"loss": 1.025,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 0.6261707251806262,
|
|
"grad_norm": 3.9205453395843506,
|
|
"learning_rate": 9.846199927479711e-06,
|
|
"loss": 1.2215,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.6264383195076264,
|
|
"grad_norm": 3.4169704914093018,
|
|
"learning_rate": 9.845982048448283e-06,
|
|
"loss": 1.0521,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 0.6267059138346267,
|
|
"grad_norm": 3.2617716789245605,
|
|
"learning_rate": 9.845764017612291e-06,
|
|
"loss": 1.0927,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 0.626973508161627,
|
|
"grad_norm": 3.432112455368042,
|
|
"learning_rate": 9.845545834978565e-06,
|
|
"loss": 1.0838,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 0.6272411024886272,
|
|
"grad_norm": 3.6730408668518066,
|
|
"learning_rate": 9.845327500553938e-06,
|
|
"loss": 1.1048,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 0.6275086968156275,
|
|
"grad_norm": 3.4062979221343994,
|
|
"learning_rate": 9.845109014345251e-06,
|
|
"loss": 1.1069,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 0.6277762911426278,
|
|
"grad_norm": 3.237093687057495,
|
|
"learning_rate": 9.844890376359348e-06,
|
|
"loss": 1.1357,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 0.6280438854696281,
|
|
"grad_norm": 3.722663640975952,
|
|
"learning_rate": 9.844671586603079e-06,
|
|
"loss": 1.2362,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 0.6283114797966283,
|
|
"grad_norm": 3.7158944606781006,
|
|
"learning_rate": 9.844452645083295e-06,
|
|
"loss": 1.2066,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 0.6285790741236286,
|
|
"grad_norm": 3.6207492351531982,
|
|
"learning_rate": 9.844233551806857e-06,
|
|
"loss": 1.1971,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 0.6288466684506289,
|
|
"grad_norm": 3.799163579940796,
|
|
"learning_rate": 9.844014306780627e-06,
|
|
"loss": 1.1569,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.6291142627776292,
|
|
"grad_norm": 3.326672077178955,
|
|
"learning_rate": 9.843794910011476e-06,
|
|
"loss": 1.0336,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 0.6293818571046293,
|
|
"grad_norm": 3.5804383754730225,
|
|
"learning_rate": 9.84357536150627e-06,
|
|
"loss": 1.2486,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 0.6296494514316296,
|
|
"grad_norm": 3.147380828857422,
|
|
"learning_rate": 9.843355661271895e-06,
|
|
"loss": 1.0599,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 0.62991704575863,
|
|
"grad_norm": 3.6518685817718506,
|
|
"learning_rate": 9.843135809315227e-06,
|
|
"loss": 1.195,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 0.6301846400856301,
|
|
"grad_norm": 3.393224000930786,
|
|
"learning_rate": 9.842915805643156e-06,
|
|
"loss": 1.1262,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 0.6304522344126304,
|
|
"grad_norm": 3.6997387409210205,
|
|
"learning_rate": 9.842695650262573e-06,
|
|
"loss": 1.1872,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 0.6307198287396307,
|
|
"grad_norm": 3.419063091278076,
|
|
"learning_rate": 9.842475343180375e-06,
|
|
"loss": 1.2947,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 0.630987423066631,
|
|
"grad_norm": 3.517101764678955,
|
|
"learning_rate": 9.842254884403463e-06,
|
|
"loss": 1.2461,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 0.6312550173936312,
|
|
"grad_norm": 3.4831290245056152,
|
|
"learning_rate": 9.842034273938744e-06,
|
|
"loss": 1.153,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 0.6315226117206315,
|
|
"grad_norm": 3.965106248855591,
|
|
"learning_rate": 9.841813511793126e-06,
|
|
"loss": 1.2851,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.6317902060476318,
|
|
"grad_norm": 2.9913620948791504,
|
|
"learning_rate": 9.841592597973528e-06,
|
|
"loss": 1.1356,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 0.6320578003746321,
|
|
"grad_norm": 3.26570463180542,
|
|
"learning_rate": 9.841371532486867e-06,
|
|
"loss": 1.1497,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 0.6323253947016323,
|
|
"grad_norm": 3.5169339179992676,
|
|
"learning_rate": 9.841150315340071e-06,
|
|
"loss": 1.1598,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 0.6325929890286326,
|
|
"grad_norm": 3.4498212337493896,
|
|
"learning_rate": 9.84092894654007e-06,
|
|
"loss": 1.1632,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 0.6328605833556329,
|
|
"grad_norm": 4.316896915435791,
|
|
"learning_rate": 9.840707426093795e-06,
|
|
"loss": 1.2331,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 0.6331281776826331,
|
|
"grad_norm": 3.5566680431365967,
|
|
"learning_rate": 9.840485754008188e-06,
|
|
"loss": 1.1958,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 0.6333957720096334,
|
|
"grad_norm": 3.55718994140625,
|
|
"learning_rate": 9.840263930290192e-06,
|
|
"loss": 1.1707,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 0.6336633663366337,
|
|
"grad_norm": 3.3516623973846436,
|
|
"learning_rate": 9.840041954946757e-06,
|
|
"loss": 1.1279,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 0.633930960663634,
|
|
"grad_norm": 3.7608842849731445,
|
|
"learning_rate": 9.839819827984835e-06,
|
|
"loss": 1.0901,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 0.6341985549906342,
|
|
"grad_norm": 3.342604637145996,
|
|
"learning_rate": 9.839597549411389e-06,
|
|
"loss": 1.1313,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.6344661493176345,
|
|
"grad_norm": 3.692324161529541,
|
|
"learning_rate": 9.839375119233375e-06,
|
|
"loss": 1.1783,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 0.6347337436446348,
|
|
"grad_norm": 3.4589786529541016,
|
|
"learning_rate": 9.839152537457764e-06,
|
|
"loss": 1.0795,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 0.635001337971635,
|
|
"grad_norm": 3.76045560836792,
|
|
"learning_rate": 9.83892980409153e-06,
|
|
"loss": 1.2872,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 0.6352689322986352,
|
|
"grad_norm": 3.486509323120117,
|
|
"learning_rate": 9.838706919141649e-06,
|
|
"loss": 0.9929,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 0.6355365266256355,
|
|
"grad_norm": 3.1999824047088623,
|
|
"learning_rate": 9.838483882615101e-06,
|
|
"loss": 1.1086,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 0.6358041209526358,
|
|
"grad_norm": 3.3866939544677734,
|
|
"learning_rate": 9.838260694518877e-06,
|
|
"loss": 1.1782,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 0.636071715279636,
|
|
"grad_norm": 3.8350670337677,
|
|
"learning_rate": 9.838037354859967e-06,
|
|
"loss": 1.2023,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 0.6363393096066363,
|
|
"grad_norm": 3.401334762573242,
|
|
"learning_rate": 9.837813863645367e-06,
|
|
"loss": 1.2159,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 0.6366069039336366,
|
|
"grad_norm": 3.399458646774292,
|
|
"learning_rate": 9.837590220882076e-06,
|
|
"loss": 1.03,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 0.6368744982606369,
|
|
"grad_norm": 3.1889894008636475,
|
|
"learning_rate": 9.837366426577102e-06,
|
|
"loss": 1.0268,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.6371420925876371,
|
|
"grad_norm": 3.411510467529297,
|
|
"learning_rate": 9.837142480737457e-06,
|
|
"loss": 1.1182,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 0.6374096869146374,
|
|
"grad_norm": 3.746042251586914,
|
|
"learning_rate": 9.836918383370153e-06,
|
|
"loss": 1.1736,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 0.6376772812416377,
|
|
"grad_norm": 3.678807020187378,
|
|
"learning_rate": 9.836694134482212e-06,
|
|
"loss": 1.1744,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 0.637944875568638,
|
|
"grad_norm": 3.2534291744232178,
|
|
"learning_rate": 9.836469734080658e-06,
|
|
"loss": 0.9784,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 0.6382124698956382,
|
|
"grad_norm": 3.305079221725464,
|
|
"learning_rate": 9.83624518217252e-06,
|
|
"loss": 1.1002,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 0.6384800642226385,
|
|
"grad_norm": 3.6762077808380127,
|
|
"learning_rate": 9.836020478764835e-06,
|
|
"loss": 1.0991,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 0.6387476585496388,
|
|
"grad_norm": 3.3923799991607666,
|
|
"learning_rate": 9.83579562386464e-06,
|
|
"loss": 1.123,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 0.639015252876639,
|
|
"grad_norm": 3.8256936073303223,
|
|
"learning_rate": 9.835570617478976e-06,
|
|
"loss": 1.1498,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 0.6392828472036393,
|
|
"grad_norm": 3.488901138305664,
|
|
"learning_rate": 9.835345459614897e-06,
|
|
"loss": 1.19,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 0.6395504415306396,
|
|
"grad_norm": 3.408535957336426,
|
|
"learning_rate": 9.835120150279454e-06,
|
|
"loss": 1.1097,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.6398180358576399,
|
|
"grad_norm": 3.648115634918213,
|
|
"learning_rate": 9.834894689479703e-06,
|
|
"loss": 1.0789,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 0.64008563018464,
|
|
"grad_norm": 3.6117544174194336,
|
|
"learning_rate": 9.83466907722271e-06,
|
|
"loss": 1.256,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 0.6403532245116403,
|
|
"grad_norm": 3.7180707454681396,
|
|
"learning_rate": 9.834443313515542e-06,
|
|
"loss": 1.1885,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 0.6406208188386406,
|
|
"grad_norm": 5.158202648162842,
|
|
"learning_rate": 9.834217398365268e-06,
|
|
"loss": 1.1757,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 0.6408884131656409,
|
|
"grad_norm": 3.770582914352417,
|
|
"learning_rate": 9.83399133177897e-06,
|
|
"loss": 1.2923,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 0.6411560074926411,
|
|
"grad_norm": 3.824382781982422,
|
|
"learning_rate": 9.833765113763723e-06,
|
|
"loss": 1.2032,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 0.6414236018196414,
|
|
"grad_norm": 3.29740309715271,
|
|
"learning_rate": 9.83353874432662e-06,
|
|
"loss": 1.2306,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 0.6416911961466417,
|
|
"grad_norm": 3.3349862098693848,
|
|
"learning_rate": 9.83331222347475e-06,
|
|
"loss": 1.0847,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 0.6419587904736419,
|
|
"grad_norm": 3.7271625995635986,
|
|
"learning_rate": 9.833085551215206e-06,
|
|
"loss": 1.252,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 0.6422263848006422,
|
|
"grad_norm": 3.7548937797546387,
|
|
"learning_rate": 9.832858727555095e-06,
|
|
"loss": 1.1225,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6424939791276425,
|
|
"grad_norm": 3.6367075443267822,
|
|
"learning_rate": 9.832631752501515e-06,
|
|
"loss": 1.1896,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 0.6427615734546428,
|
|
"grad_norm": 3.585908889770508,
|
|
"learning_rate": 9.832404626061582e-06,
|
|
"loss": 1.24,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 0.643029167781643,
|
|
"grad_norm": 3.4509429931640625,
|
|
"learning_rate": 9.832177348242408e-06,
|
|
"loss": 1.1011,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 0.6432967621086433,
|
|
"grad_norm": 3.6890709400177,
|
|
"learning_rate": 9.831949919051116e-06,
|
|
"loss": 1.1894,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 0.6435643564356436,
|
|
"grad_norm": 3.348698139190674,
|
|
"learning_rate": 9.831722338494826e-06,
|
|
"loss": 1.2294,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 0.6438319507626439,
|
|
"grad_norm": 3.424172878265381,
|
|
"learning_rate": 9.831494606580669e-06,
|
|
"loss": 1.0647,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 0.6440995450896441,
|
|
"grad_norm": 3.4821624755859375,
|
|
"learning_rate": 9.83126672331578e-06,
|
|
"loss": 1.124,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 0.6443671394166444,
|
|
"grad_norm": 3.271749973297119,
|
|
"learning_rate": 9.831038688707296e-06,
|
|
"loss": 1.0989,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 0.6446347337436447,
|
|
"grad_norm": 3.6748054027557373,
|
|
"learning_rate": 9.83081050276236e-06,
|
|
"loss": 1.1704,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 0.6449023280706448,
|
|
"grad_norm": 3.3000192642211914,
|
|
"learning_rate": 9.830582165488123e-06,
|
|
"loss": 1.1656,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.6451699223976451,
|
|
"grad_norm": 4.096604824066162,
|
|
"learning_rate": 9.830353676891736e-06,
|
|
"loss": 1.1799,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 0.6454375167246454,
|
|
"grad_norm": 3.337603807449341,
|
|
"learning_rate": 9.830125036980353e-06,
|
|
"loss": 1.1693,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 0.6457051110516457,
|
|
"grad_norm": 3.6105048656463623,
|
|
"learning_rate": 9.829896245761144e-06,
|
|
"loss": 1.2169,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 0.6459727053786459,
|
|
"grad_norm": 3.29010272026062,
|
|
"learning_rate": 9.829667303241271e-06,
|
|
"loss": 1.0089,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 0.6462402997056462,
|
|
"grad_norm": 3.5054385662078857,
|
|
"learning_rate": 9.829438209427907e-06,
|
|
"loss": 1.105,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 0.6465078940326465,
|
|
"grad_norm": 3.4805397987365723,
|
|
"learning_rate": 9.829208964328228e-06,
|
|
"loss": 1.0914,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 0.6467754883596468,
|
|
"grad_norm": 3.1424105167388916,
|
|
"learning_rate": 9.828979567949416e-06,
|
|
"loss": 1.0573,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 0.647043082686647,
|
|
"grad_norm": 4.121860980987549,
|
|
"learning_rate": 9.828750020298656e-06,
|
|
"loss": 1.1732,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 0.6473106770136473,
|
|
"grad_norm": 3.2964742183685303,
|
|
"learning_rate": 9.828520321383142e-06,
|
|
"loss": 1.1536,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 0.6475782713406476,
|
|
"grad_norm": 3.4967031478881836,
|
|
"learning_rate": 9.828290471210064e-06,
|
|
"loss": 1.1049,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.6478458656676478,
|
|
"grad_norm": 3.3950541019439697,
|
|
"learning_rate": 9.828060469786626e-06,
|
|
"loss": 1.151,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 0.6481134599946481,
|
|
"grad_norm": 3.585238218307495,
|
|
"learning_rate": 9.827830317120033e-06,
|
|
"loss": 1.1172,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 0.6483810543216484,
|
|
"grad_norm": 2.9747002124786377,
|
|
"learning_rate": 9.827600013217496e-06,
|
|
"loss": 0.9499,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 0.6486486486486487,
|
|
"grad_norm": 3.2427027225494385,
|
|
"learning_rate": 9.827369558086225e-06,
|
|
"loss": 1.0767,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 0.6489162429756489,
|
|
"grad_norm": 3.419710874557495,
|
|
"learning_rate": 9.827138951733441e-06,
|
|
"loss": 1.1198,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 0.6491838373026492,
|
|
"grad_norm": 3.5654327869415283,
|
|
"learning_rate": 9.82690819416637e-06,
|
|
"loss": 1.1684,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 0.6494514316296495,
|
|
"grad_norm": 3.414553642272949,
|
|
"learning_rate": 9.826677285392238e-06,
|
|
"loss": 1.1018,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 0.6497190259566498,
|
|
"grad_norm": 3.366098642349243,
|
|
"learning_rate": 9.826446225418282e-06,
|
|
"loss": 1.0191,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 0.64998662028365,
|
|
"grad_norm": 3.913783311843872,
|
|
"learning_rate": 9.826215014251738e-06,
|
|
"loss": 1.235,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 0.6502542146106502,
|
|
"grad_norm": 4.037808418273926,
|
|
"learning_rate": 9.825983651899847e-06,
|
|
"loss": 1.2542,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.6505218089376505,
|
|
"grad_norm": 2.9834325313568115,
|
|
"learning_rate": 9.82575213836986e-06,
|
|
"loss": 0.967,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 0.6507894032646507,
|
|
"grad_norm": 3.3896093368530273,
|
|
"learning_rate": 9.825520473669026e-06,
|
|
"loss": 1.1163,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 0.651056997591651,
|
|
"grad_norm": 3.965498685836792,
|
|
"learning_rate": 9.825288657804606e-06,
|
|
"loss": 1.2024,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 0.6513245919186513,
|
|
"grad_norm": 3.836982011795044,
|
|
"learning_rate": 9.825056690783859e-06,
|
|
"loss": 1.2839,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 0.6515921862456516,
|
|
"grad_norm": 4.303612232208252,
|
|
"learning_rate": 9.82482457261405e-06,
|
|
"loss": 1.1528,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 0.6518597805726518,
|
|
"grad_norm": 3.613075017929077,
|
|
"learning_rate": 9.824592303302455e-06,
|
|
"loss": 1.1773,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 0.6521273748996521,
|
|
"grad_norm": 3.2512998580932617,
|
|
"learning_rate": 9.824359882856347e-06,
|
|
"loss": 1.0795,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 0.6523949692266524,
|
|
"grad_norm": 3.6601617336273193,
|
|
"learning_rate": 9.824127311283007e-06,
|
|
"loss": 1.1032,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 0.6526625635536527,
|
|
"grad_norm": 3.5576727390289307,
|
|
"learning_rate": 9.823894588589722e-06,
|
|
"loss": 1.1383,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 0.6529301578806529,
|
|
"grad_norm": 3.50748610496521,
|
|
"learning_rate": 9.823661714783781e-06,
|
|
"loss": 1.2066,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.6531977522076532,
|
|
"grad_norm": 3.7736473083496094,
|
|
"learning_rate": 9.823428689872479e-06,
|
|
"loss": 1.2547,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 0.6534653465346535,
|
|
"grad_norm": 3.476040840148926,
|
|
"learning_rate": 9.823195513863114e-06,
|
|
"loss": 1.1075,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 0.6537329408616537,
|
|
"grad_norm": 3.444315195083618,
|
|
"learning_rate": 9.822962186762994e-06,
|
|
"loss": 1.1135,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 0.654000535188654,
|
|
"grad_norm": 3.9835290908813477,
|
|
"learning_rate": 9.822728708579425e-06,
|
|
"loss": 1.1706,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 0.6542681295156543,
|
|
"grad_norm": 3.669281482696533,
|
|
"learning_rate": 9.822495079319725e-06,
|
|
"loss": 1.1828,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 0.6545357238426546,
|
|
"grad_norm": 3.555455446243286,
|
|
"learning_rate": 9.822261298991208e-06,
|
|
"loss": 1.1348,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 0.6548033181696548,
|
|
"grad_norm": 3.5849578380584717,
|
|
"learning_rate": 9.822027367601199e-06,
|
|
"loss": 1.1241,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 0.6550709124966551,
|
|
"grad_norm": 3.71714186668396,
|
|
"learning_rate": 9.821793285157027e-06,
|
|
"loss": 1.255,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 0.6553385068236554,
|
|
"grad_norm": 3.6075050830841064,
|
|
"learning_rate": 9.821559051666025e-06,
|
|
"loss": 1.1514,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 0.6556061011506557,
|
|
"grad_norm": 3.3877387046813965,
|
|
"learning_rate": 9.82132466713553e-06,
|
|
"loss": 1.1232,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.6558736954776558,
|
|
"grad_norm": 3.499657154083252,
|
|
"learning_rate": 9.821090131572883e-06,
|
|
"loss": 1.1694,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 0.6561412898046561,
|
|
"grad_norm": 3.8426098823547363,
|
|
"learning_rate": 9.820855444985433e-06,
|
|
"loss": 1.2109,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 0.6564088841316564,
|
|
"grad_norm": 3.5373287200927734,
|
|
"learning_rate": 9.82062060738053e-06,
|
|
"loss": 1.0852,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 0.6566764784586567,
|
|
"grad_norm": 3.0332095623016357,
|
|
"learning_rate": 9.820385618765532e-06,
|
|
"loss": 1.0035,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 0.6569440727856569,
|
|
"grad_norm": 3.5709455013275146,
|
|
"learning_rate": 9.8201504791478e-06,
|
|
"loss": 1.1012,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 0.6572116671126572,
|
|
"grad_norm": 3.4466726779937744,
|
|
"learning_rate": 9.819915188534699e-06,
|
|
"loss": 1.1192,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 0.6574792614396575,
|
|
"grad_norm": 3.5553793907165527,
|
|
"learning_rate": 9.8196797469336e-06,
|
|
"loss": 1.2776,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 0.6577468557666577,
|
|
"grad_norm": 3.3105359077453613,
|
|
"learning_rate": 9.81944415435188e-06,
|
|
"loss": 1.1082,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 0.658014450093658,
|
|
"grad_norm": 3.7504870891571045,
|
|
"learning_rate": 9.819208410796916e-06,
|
|
"loss": 1.0523,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 0.6582820444206583,
|
|
"grad_norm": 3.4031195640563965,
|
|
"learning_rate": 9.818972516276096e-06,
|
|
"loss": 1.1827,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.6585496387476586,
|
|
"grad_norm": 3.87593412399292,
|
|
"learning_rate": 9.818736470796807e-06,
|
|
"loss": 1.1583,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 0.6588172330746588,
|
|
"grad_norm": 3.425092935562134,
|
|
"learning_rate": 9.818500274366448e-06,
|
|
"loss": 1.0955,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 0.6590848274016591,
|
|
"grad_norm": 3.820794105529785,
|
|
"learning_rate": 9.818263926992411e-06,
|
|
"loss": 1.2023,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 0.6593524217286594,
|
|
"grad_norm": 3.5655276775360107,
|
|
"learning_rate": 9.818027428682104e-06,
|
|
"loss": 1.1085,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 0.6596200160556597,
|
|
"grad_norm": 3.5070512294769287,
|
|
"learning_rate": 9.817790779442937e-06,
|
|
"loss": 1.3138,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 0.6598876103826599,
|
|
"grad_norm": 4.04046106338501,
|
|
"learning_rate": 9.81755397928232e-06,
|
|
"loss": 1.1665,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 0.6601552047096602,
|
|
"grad_norm": 3.2360928058624268,
|
|
"learning_rate": 9.81731702820767e-06,
|
|
"loss": 1.0778,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 0.6604227990366605,
|
|
"grad_norm": 3.552029848098755,
|
|
"learning_rate": 9.817079926226417e-06,
|
|
"loss": 1.181,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 0.6606903933636606,
|
|
"grad_norm": 3.4324593544006348,
|
|
"learning_rate": 9.816842673345979e-06,
|
|
"loss": 1.153,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 0.6609579876906609,
|
|
"grad_norm": 3.6090657711029053,
|
|
"learning_rate": 9.816605269573794e-06,
|
|
"loss": 1.0663,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.6612255820176612,
|
|
"grad_norm": 4.007713794708252,
|
|
"learning_rate": 9.816367714917296e-06,
|
|
"loss": 1.2343,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 0.6614931763446615,
|
|
"grad_norm": 3.3371682167053223,
|
|
"learning_rate": 9.81613000938393e-06,
|
|
"loss": 1.1184,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 0.6617607706716617,
|
|
"grad_norm": 3.256664752960205,
|
|
"learning_rate": 9.815892152981138e-06,
|
|
"loss": 0.9794,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 0.662028364998662,
|
|
"grad_norm": 3.7881510257720947,
|
|
"learning_rate": 9.815654145716376e-06,
|
|
"loss": 1.1446,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 0.6622959593256623,
|
|
"grad_norm": 3.406993865966797,
|
|
"learning_rate": 9.815415987597096e-06,
|
|
"loss": 1.2445,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 0.6625635536526626,
|
|
"grad_norm": 3.7865562438964844,
|
|
"learning_rate": 9.81517767863076e-06,
|
|
"loss": 1.3335,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 0.6628311479796628,
|
|
"grad_norm": 3.755580425262451,
|
|
"learning_rate": 9.814939218824831e-06,
|
|
"loss": 1.1506,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 0.6630987423066631,
|
|
"grad_norm": 3.753258466720581,
|
|
"learning_rate": 9.814700608186783e-06,
|
|
"loss": 1.1372,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 0.6633663366336634,
|
|
"grad_norm": 3.5832202434539795,
|
|
"learning_rate": 9.814461846724087e-06,
|
|
"loss": 1.158,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 0.6636339309606636,
|
|
"grad_norm": 3.4956367015838623,
|
|
"learning_rate": 9.814222934444223e-06,
|
|
"loss": 1.1532,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.6639015252876639,
|
|
"grad_norm": 3.7934727668762207,
|
|
"learning_rate": 9.81398387135468e-06,
|
|
"loss": 1.1813,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 0.6641691196146642,
|
|
"grad_norm": 3.5121653079986572,
|
|
"learning_rate": 9.813744657462941e-06,
|
|
"loss": 1.2199,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 0.6644367139416645,
|
|
"grad_norm": 3.3426973819732666,
|
|
"learning_rate": 9.8135052927765e-06,
|
|
"loss": 1.18,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 0.6647043082686647,
|
|
"grad_norm": 3.5111615657806396,
|
|
"learning_rate": 9.813265777302858e-06,
|
|
"loss": 1.1257,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 0.664971902595665,
|
|
"grad_norm": 3.1432745456695557,
|
|
"learning_rate": 9.813026111049514e-06,
|
|
"loss": 1.0037,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 0.6652394969226653,
|
|
"grad_norm": 3.3801767826080322,
|
|
"learning_rate": 9.812786294023983e-06,
|
|
"loss": 1.1871,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 0.6655070912496656,
|
|
"grad_norm": 3.4595744609832764,
|
|
"learning_rate": 9.812546326233771e-06,
|
|
"loss": 1.1732,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 0.6657746855766657,
|
|
"grad_norm": 3.835479736328125,
|
|
"learning_rate": 9.812306207686398e-06,
|
|
"loss": 1.1428,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 0.666042279903666,
|
|
"grad_norm": 3.608619213104248,
|
|
"learning_rate": 9.812065938389384e-06,
|
|
"loss": 1.2981,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 0.6663098742306663,
|
|
"grad_norm": 3.5188703536987305,
|
|
"learning_rate": 9.811825518350257e-06,
|
|
"loss": 1.2452,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.6665774685576665,
|
|
"grad_norm": 3.481654644012451,
|
|
"learning_rate": 9.81158494757655e-06,
|
|
"loss": 1.2473,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 0.6668450628846668,
|
|
"grad_norm": 3.2645812034606934,
|
|
"learning_rate": 9.811344226075795e-06,
|
|
"loss": 1.0821,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 0.6671126572116671,
|
|
"grad_norm": 3.3354525566101074,
|
|
"learning_rate": 9.811103353855535e-06,
|
|
"loss": 1.08,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 0.6673802515386674,
|
|
"grad_norm": 3.778996467590332,
|
|
"learning_rate": 9.810862330923317e-06,
|
|
"loss": 1.3063,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 0.6676478458656676,
|
|
"grad_norm": 3.2988641262054443,
|
|
"learning_rate": 9.810621157286688e-06,
|
|
"loss": 1.0607,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 0.6679154401926679,
|
|
"grad_norm": 3.771205425262451,
|
|
"learning_rate": 9.810379832953207e-06,
|
|
"loss": 1.1692,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 0.6681830345196682,
|
|
"grad_norm": 3.594296455383301,
|
|
"learning_rate": 9.81013835793043e-06,
|
|
"loss": 1.2804,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 0.6684506288466685,
|
|
"grad_norm": 3.33087420463562,
|
|
"learning_rate": 9.809896732225923e-06,
|
|
"loss": 1.0088,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 0.6687182231736687,
|
|
"grad_norm": 3.6876564025878906,
|
|
"learning_rate": 9.809654955847256e-06,
|
|
"loss": 1.1182,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 0.668985817500669,
|
|
"grad_norm": 3.4345877170562744,
|
|
"learning_rate": 9.809413028802002e-06,
|
|
"loss": 1.1175,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.668985817500669,
|
|
"eval_loss": 1.1746242046356201,
|
|
"eval_runtime": 11.5946,
|
|
"eval_samples_per_second": 34.499,
|
|
"eval_steps_per_second": 4.312,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.6692534118276693,
|
|
"grad_norm": 3.555928945541382,
|
|
"learning_rate": 9.809170951097739e-06,
|
|
"loss": 1.1236,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 0.6695210061546695,
|
|
"grad_norm": 3.356553316116333,
|
|
"learning_rate": 9.80892872274205e-06,
|
|
"loss": 1.0821,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 0.6697886004816698,
|
|
"grad_norm": 3.5524895191192627,
|
|
"learning_rate": 9.808686343742524e-06,
|
|
"loss": 1.3042,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 0.6700561948086701,
|
|
"grad_norm": 3.2740256786346436,
|
|
"learning_rate": 9.808443814106754e-06,
|
|
"loss": 1.091,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 0.6703237891356704,
|
|
"grad_norm": 3.2746195793151855,
|
|
"learning_rate": 9.808201133842337e-06,
|
|
"loss": 1.0107,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 0.6705913834626706,
|
|
"grad_norm": 3.7611098289489746,
|
|
"learning_rate": 9.807958302956875e-06,
|
|
"loss": 1.1164,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 0.6708589777896709,
|
|
"grad_norm": 3.5693981647491455,
|
|
"learning_rate": 9.807715321457976e-06,
|
|
"loss": 1.1661,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 0.6711265721166711,
|
|
"grad_norm": 3.7224698066711426,
|
|
"learning_rate": 9.807472189353249e-06,
|
|
"loss": 1.3212,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 0.6713941664436714,
|
|
"grad_norm": 3.767155408859253,
|
|
"learning_rate": 9.807228906650312e-06,
|
|
"loss": 1.294,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 0.6716617607706716,
|
|
"grad_norm": 4.016858100891113,
|
|
"learning_rate": 9.806985473356787e-06,
|
|
"loss": 1.1964,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.6719293550976719,
|
|
"grad_norm": 3.684230089187622,
|
|
"learning_rate": 9.806741889480298e-06,
|
|
"loss": 1.1301,
|
|
"step": 2511
|
|
},
|
|
{
|
|
"epoch": 0.6721969494246722,
|
|
"grad_norm": 3.254202365875244,
|
|
"learning_rate": 9.806498155028477e-06,
|
|
"loss": 1.0444,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 0.6724645437516724,
|
|
"grad_norm": 3.6285407543182373,
|
|
"learning_rate": 9.806254270008959e-06,
|
|
"loss": 1.1154,
|
|
"step": 2513
|
|
},
|
|
{
|
|
"epoch": 0.6727321380786727,
|
|
"grad_norm": 3.4263675212860107,
|
|
"learning_rate": 9.806010234429382e-06,
|
|
"loss": 1.1158,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 0.672999732405673,
|
|
"grad_norm": 3.619586229324341,
|
|
"learning_rate": 9.805766048297392e-06,
|
|
"loss": 1.0731,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 0.6732673267326733,
|
|
"grad_norm": 3.5182855129241943,
|
|
"learning_rate": 9.80552171162064e-06,
|
|
"loss": 1.1724,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 0.6735349210596735,
|
|
"grad_norm": 3.505631446838379,
|
|
"learning_rate": 9.805277224406776e-06,
|
|
"loss": 1.1398,
|
|
"step": 2517
|
|
},
|
|
{
|
|
"epoch": 0.6738025153866738,
|
|
"grad_norm": 3.540221929550171,
|
|
"learning_rate": 9.805032586663462e-06,
|
|
"loss": 1.1665,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 0.6740701097136741,
|
|
"grad_norm": 3.7385308742523193,
|
|
"learning_rate": 9.804787798398361e-06,
|
|
"loss": 1.1794,
|
|
"step": 2519
|
|
},
|
|
{
|
|
"epoch": 0.6743377040406744,
|
|
"grad_norm": 3.9296083450317383,
|
|
"learning_rate": 9.80454285961914e-06,
|
|
"loss": 1.2484,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.6746052983676746,
|
|
"grad_norm": 3.585625171661377,
|
|
"learning_rate": 9.804297770333472e-06,
|
|
"loss": 1.2348,
|
|
"step": 2521
|
|
},
|
|
{
|
|
"epoch": 0.6748728926946749,
|
|
"grad_norm": 3.768056869506836,
|
|
"learning_rate": 9.804052530549038e-06,
|
|
"loss": 1.0857,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 0.6751404870216752,
|
|
"grad_norm": 3.6394028663635254,
|
|
"learning_rate": 9.803807140273516e-06,
|
|
"loss": 1.1641,
|
|
"step": 2523
|
|
},
|
|
{
|
|
"epoch": 0.6754080813486754,
|
|
"grad_norm": 3.505856990814209,
|
|
"learning_rate": 9.803561599514594e-06,
|
|
"loss": 1.0889,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 0.6756756756756757,
|
|
"grad_norm": 3.4862112998962402,
|
|
"learning_rate": 9.803315908279966e-06,
|
|
"loss": 1.1436,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 0.675943270002676,
|
|
"grad_norm": 3.91096568107605,
|
|
"learning_rate": 9.803070066577327e-06,
|
|
"loss": 1.1813,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 0.6762108643296763,
|
|
"grad_norm": 3.642303228378296,
|
|
"learning_rate": 9.802824074414378e-06,
|
|
"loss": 1.1385,
|
|
"step": 2527
|
|
},
|
|
{
|
|
"epoch": 0.6764784586566764,
|
|
"grad_norm": 3.8517065048217773,
|
|
"learning_rate": 9.802577931798826e-06,
|
|
"loss": 1.1738,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 0.6767460529836767,
|
|
"grad_norm": 3.9151949882507324,
|
|
"learning_rate": 9.80233163873838e-06,
|
|
"loss": 1.3214,
|
|
"step": 2529
|
|
},
|
|
{
|
|
"epoch": 0.677013647310677,
|
|
"grad_norm": 3.326645612716675,
|
|
"learning_rate": 9.802085195240755e-06,
|
|
"loss": 1.1715,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.6772812416376773,
|
|
"grad_norm": 3.2645928859710693,
|
|
"learning_rate": 9.801838601313674e-06,
|
|
"loss": 1.0983,
|
|
"step": 2531
|
|
},
|
|
{
|
|
"epoch": 0.6775488359646775,
|
|
"grad_norm": 3.471367835998535,
|
|
"learning_rate": 9.801591856964859e-06,
|
|
"loss": 1.0831,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 0.6778164302916778,
|
|
"grad_norm": 3.4886016845703125,
|
|
"learning_rate": 9.80134496220204e-06,
|
|
"loss": 1.1807,
|
|
"step": 2533
|
|
},
|
|
{
|
|
"epoch": 0.6780840246186781,
|
|
"grad_norm": 3.3703372478485107,
|
|
"learning_rate": 9.801097917032951e-06,
|
|
"loss": 1.1011,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 0.6783516189456783,
|
|
"grad_norm": 3.6529722213745117,
|
|
"learning_rate": 9.800850721465334e-06,
|
|
"loss": 1.2387,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 0.6786192132726786,
|
|
"grad_norm": 3.183479070663452,
|
|
"learning_rate": 9.800603375506928e-06,
|
|
"loss": 1.0238,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 0.6788868075996789,
|
|
"grad_norm": 3.3142735958099365,
|
|
"learning_rate": 9.800355879165485e-06,
|
|
"loss": 1.0489,
|
|
"step": 2537
|
|
},
|
|
{
|
|
"epoch": 0.6791544019266792,
|
|
"grad_norm": 3.7475812435150146,
|
|
"learning_rate": 9.800108232448754e-06,
|
|
"loss": 1.2292,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 0.6794219962536794,
|
|
"grad_norm": 3.2578468322753906,
|
|
"learning_rate": 9.7998604353645e-06,
|
|
"loss": 1.1332,
|
|
"step": 2539
|
|
},
|
|
{
|
|
"epoch": 0.6796895905806797,
|
|
"grad_norm": 3.501826286315918,
|
|
"learning_rate": 9.799612487920476e-06,
|
|
"loss": 1.1691,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.67995718490768,
|
|
"grad_norm": 3.8417768478393555,
|
|
"learning_rate": 9.799364390124456e-06,
|
|
"loss": 1.1488,
|
|
"step": 2541
|
|
},
|
|
{
|
|
"epoch": 0.6802247792346803,
|
|
"grad_norm": 4.020801544189453,
|
|
"learning_rate": 9.799116141984209e-06,
|
|
"loss": 1.2232,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 0.6804923735616805,
|
|
"grad_norm": 3.748538017272949,
|
|
"learning_rate": 9.798867743507512e-06,
|
|
"loss": 1.22,
|
|
"step": 2543
|
|
},
|
|
{
|
|
"epoch": 0.6807599678886808,
|
|
"grad_norm": 3.597007989883423,
|
|
"learning_rate": 9.798619194702148e-06,
|
|
"loss": 1.1873,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 0.6810275622156811,
|
|
"grad_norm": 3.8766472339630127,
|
|
"learning_rate": 9.798370495575901e-06,
|
|
"loss": 1.3015,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 0.6812951565426812,
|
|
"grad_norm": 3.568079948425293,
|
|
"learning_rate": 9.798121646136562e-06,
|
|
"loss": 1.1225,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 0.6815627508696815,
|
|
"grad_norm": 3.2755115032196045,
|
|
"learning_rate": 9.797872646391926e-06,
|
|
"loss": 1.0251,
|
|
"step": 2547
|
|
},
|
|
{
|
|
"epoch": 0.6818303451966818,
|
|
"grad_norm": 3.6031720638275146,
|
|
"learning_rate": 9.797623496349795e-06,
|
|
"loss": 1.0804,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 0.6820979395236821,
|
|
"grad_norm": 3.285602331161499,
|
|
"learning_rate": 9.797374196017974e-06,
|
|
"loss": 1.0666,
|
|
"step": 2549
|
|
},
|
|
{
|
|
"epoch": 0.6823655338506823,
|
|
"grad_norm": 3.9866554737091064,
|
|
"learning_rate": 9.79712474540427e-06,
|
|
"loss": 1.2911,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.6826331281776826,
|
|
"grad_norm": 3.1174442768096924,
|
|
"learning_rate": 9.796875144516498e-06,
|
|
"loss": 1.0572,
|
|
"step": 2551
|
|
},
|
|
{
|
|
"epoch": 0.6829007225046829,
|
|
"grad_norm": 3.3973238468170166,
|
|
"learning_rate": 9.796625393362477e-06,
|
|
"loss": 1.0371,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 0.6831683168316832,
|
|
"grad_norm": 3.1947076320648193,
|
|
"learning_rate": 9.796375491950034e-06,
|
|
"loss": 1.1874,
|
|
"step": 2553
|
|
},
|
|
{
|
|
"epoch": 0.6834359111586834,
|
|
"grad_norm": 3.0682270526885986,
|
|
"learning_rate": 9.796125440286992e-06,
|
|
"loss": 1.1072,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 0.6837035054856837,
|
|
"grad_norm": 3.7304959297180176,
|
|
"learning_rate": 9.795875238381188e-06,
|
|
"loss": 1.1563,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 0.683971099812684,
|
|
"grad_norm": 3.118598461151123,
|
|
"learning_rate": 9.795624886240458e-06,
|
|
"loss": 1.133,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 0.6842386941396842,
|
|
"grad_norm": 4.15332555770874,
|
|
"learning_rate": 9.795374383872645e-06,
|
|
"loss": 1.2752,
|
|
"step": 2557
|
|
},
|
|
{
|
|
"epoch": 0.6845062884666845,
|
|
"grad_norm": 3.629516124725342,
|
|
"learning_rate": 9.795123731285595e-06,
|
|
"loss": 1.2345,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 0.6847738827936848,
|
|
"grad_norm": 3.6850171089172363,
|
|
"learning_rate": 9.794872928487163e-06,
|
|
"loss": 1.2808,
|
|
"step": 2559
|
|
},
|
|
{
|
|
"epoch": 0.6850414771206851,
|
|
"grad_norm": 3.699629306793213,
|
|
"learning_rate": 9.7946219754852e-06,
|
|
"loss": 1.3603,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.6853090714476853,
|
|
"grad_norm": 3.9391591548919678,
|
|
"learning_rate": 9.794370872287575e-06,
|
|
"loss": 1.2984,
|
|
"step": 2561
|
|
},
|
|
{
|
|
"epoch": 0.6855766657746856,
|
|
"grad_norm": 3.434231758117676,
|
|
"learning_rate": 9.79411961890215e-06,
|
|
"loss": 1.2203,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 0.6858442601016859,
|
|
"grad_norm": 3.2698097229003906,
|
|
"learning_rate": 9.793868215336792e-06,
|
|
"loss": 1.1053,
|
|
"step": 2563
|
|
},
|
|
{
|
|
"epoch": 0.6861118544286862,
|
|
"grad_norm": 3.335155725479126,
|
|
"learning_rate": 9.793616661599384e-06,
|
|
"loss": 1.2078,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 0.6863794487556863,
|
|
"grad_norm": 3.870070695877075,
|
|
"learning_rate": 9.7933649576978e-06,
|
|
"loss": 1.2166,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 0.6866470430826866,
|
|
"grad_norm": 3.4541990756988525,
|
|
"learning_rate": 9.79311310363993e-06,
|
|
"loss": 1.1562,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 0.686914637409687,
|
|
"grad_norm": 3.1093223094940186,
|
|
"learning_rate": 9.792861099433657e-06,
|
|
"loss": 1.0556,
|
|
"step": 2567
|
|
},
|
|
{
|
|
"epoch": 0.6871822317366871,
|
|
"grad_norm": 3.6408331394195557,
|
|
"learning_rate": 9.79260894508688e-06,
|
|
"loss": 1.1649,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 0.6874498260636874,
|
|
"grad_norm": 3.420346260070801,
|
|
"learning_rate": 9.792356640607497e-06,
|
|
"loss": 1.0884,
|
|
"step": 2569
|
|
},
|
|
{
|
|
"epoch": 0.6877174203906877,
|
|
"grad_norm": 3.3369221687316895,
|
|
"learning_rate": 9.792104186003412e-06,
|
|
"loss": 1.1023,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.687985014717688,
|
|
"grad_norm": 3.251084089279175,
|
|
"learning_rate": 9.791851581282533e-06,
|
|
"loss": 1.0486,
|
|
"step": 2571
|
|
},
|
|
{
|
|
"epoch": 0.6882526090446882,
|
|
"grad_norm": 3.6394076347351074,
|
|
"learning_rate": 9.791598826452773e-06,
|
|
"loss": 1.0097,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 0.6885202033716885,
|
|
"grad_norm": 3.8325955867767334,
|
|
"learning_rate": 9.79134592152205e-06,
|
|
"loss": 1.2028,
|
|
"step": 2573
|
|
},
|
|
{
|
|
"epoch": 0.6887877976986888,
|
|
"grad_norm": 3.359297037124634,
|
|
"learning_rate": 9.791092866498286e-06,
|
|
"loss": 1.0754,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 0.6890553920256891,
|
|
"grad_norm": 3.287555694580078,
|
|
"learning_rate": 9.790839661389408e-06,
|
|
"loss": 1.0958,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 0.6893229863526893,
|
|
"grad_norm": 3.197094202041626,
|
|
"learning_rate": 9.790586306203348e-06,
|
|
"loss": 1.0084,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 0.6895905806796896,
|
|
"grad_norm": 3.1888086795806885,
|
|
"learning_rate": 9.790332800948044e-06,
|
|
"loss": 1.1168,
|
|
"step": 2577
|
|
},
|
|
{
|
|
"epoch": 0.6898581750066899,
|
|
"grad_norm": 3.7352941036224365,
|
|
"learning_rate": 9.790079145631434e-06,
|
|
"loss": 1.1924,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 0.6901257693336901,
|
|
"grad_norm": 3.5531890392303467,
|
|
"learning_rate": 9.789825340261467e-06,
|
|
"loss": 1.0547,
|
|
"step": 2579
|
|
},
|
|
{
|
|
"epoch": 0.6903933636606904,
|
|
"grad_norm": 3.3544304370880127,
|
|
"learning_rate": 9.789571384846093e-06,
|
|
"loss": 1.0319,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.6906609579876907,
|
|
"grad_norm": 3.5817840099334717,
|
|
"learning_rate": 9.789317279393267e-06,
|
|
"loss": 1.2264,
|
|
"step": 2581
|
|
},
|
|
{
|
|
"epoch": 0.690928552314691,
|
|
"grad_norm": 3.4651858806610107,
|
|
"learning_rate": 9.78906302391095e-06,
|
|
"loss": 1.0902,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 0.6911961466416912,
|
|
"grad_norm": 3.6912760734558105,
|
|
"learning_rate": 9.788808618407103e-06,
|
|
"loss": 1.2353,
|
|
"step": 2583
|
|
},
|
|
{
|
|
"epoch": 0.6914637409686915,
|
|
"grad_norm": 3.6648828983306885,
|
|
"learning_rate": 9.788554062889702e-06,
|
|
"loss": 1.2044,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 0.6917313352956918,
|
|
"grad_norm": 3.2582671642303467,
|
|
"learning_rate": 9.788299357366717e-06,
|
|
"loss": 1.0388,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 0.691998929622692,
|
|
"grad_norm": 3.759870767593384,
|
|
"learning_rate": 9.788044501846125e-06,
|
|
"loss": 1.2292,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 0.6922665239496922,
|
|
"grad_norm": 3.5340397357940674,
|
|
"learning_rate": 9.787789496335913e-06,
|
|
"loss": 1.2696,
|
|
"step": 2587
|
|
},
|
|
{
|
|
"epoch": 0.6925341182766925,
|
|
"grad_norm": 3.402407169342041,
|
|
"learning_rate": 9.78753434084407e-06,
|
|
"loss": 1.2269,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 0.6928017126036928,
|
|
"grad_norm": 3.7191381454467773,
|
|
"learning_rate": 9.787279035378585e-06,
|
|
"loss": 1.1591,
|
|
"step": 2589
|
|
},
|
|
{
|
|
"epoch": 0.693069306930693,
|
|
"grad_norm": 3.3745412826538086,
|
|
"learning_rate": 9.78702357994746e-06,
|
|
"loss": 1.1188,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.6933369012576933,
|
|
"grad_norm": 3.5345706939697266,
|
|
"learning_rate": 9.786767974558693e-06,
|
|
"loss": 1.2377,
|
|
"step": 2591
|
|
},
|
|
{
|
|
"epoch": 0.6936044955846936,
|
|
"grad_norm": 3.54662823677063,
|
|
"learning_rate": 9.786512219220294e-06,
|
|
"loss": 1.2069,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 0.6938720899116939,
|
|
"grad_norm": 3.509596109390259,
|
|
"learning_rate": 9.786256313940276e-06,
|
|
"loss": 1.2492,
|
|
"step": 2593
|
|
},
|
|
{
|
|
"epoch": 0.6941396842386941,
|
|
"grad_norm": 3.594794273376465,
|
|
"learning_rate": 9.786000258726652e-06,
|
|
"loss": 1.1751,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 0.6944072785656944,
|
|
"grad_norm": 3.4790191650390625,
|
|
"learning_rate": 9.785744053587445e-06,
|
|
"loss": 1.2485,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 0.6946748728926947,
|
|
"grad_norm": 3.57783842086792,
|
|
"learning_rate": 9.78548769853068e-06,
|
|
"loss": 1.1048,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 0.694942467219695,
|
|
"grad_norm": 3.960777521133423,
|
|
"learning_rate": 9.785231193564388e-06,
|
|
"loss": 1.2018,
|
|
"step": 2597
|
|
},
|
|
{
|
|
"epoch": 0.6952100615466952,
|
|
"grad_norm": 3.92084002494812,
|
|
"learning_rate": 9.784974538696606e-06,
|
|
"loss": 1.2637,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 0.6954776558736955,
|
|
"grad_norm": 3.7744603157043457,
|
|
"learning_rate": 9.78471773393537e-06,
|
|
"loss": 1.2868,
|
|
"step": 2599
|
|
},
|
|
{
|
|
"epoch": 0.6957452502006958,
|
|
"grad_norm": 3.3519065380096436,
|
|
"learning_rate": 9.784460779288727e-06,
|
|
"loss": 1.0901,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.696012844527696,
|
|
"grad_norm": 3.282240390777588,
|
|
"learning_rate": 9.784203674764727e-06,
|
|
"loss": 1.1259,
|
|
"step": 2601
|
|
},
|
|
{
|
|
"epoch": 0.6962804388546963,
|
|
"grad_norm": 3.7202768325805664,
|
|
"learning_rate": 9.783946420371424e-06,
|
|
"loss": 1.1036,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 0.6965480331816966,
|
|
"grad_norm": 3.3979485034942627,
|
|
"learning_rate": 9.783689016116874e-06,
|
|
"loss": 1.1188,
|
|
"step": 2603
|
|
},
|
|
{
|
|
"epoch": 0.6968156275086969,
|
|
"grad_norm": 3.3660459518432617,
|
|
"learning_rate": 9.783431462009146e-06,
|
|
"loss": 1.0795,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 0.697083221835697,
|
|
"grad_norm": 3.240844964981079,
|
|
"learning_rate": 9.7831737580563e-06,
|
|
"loss": 1.0436,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 0.6973508161626973,
|
|
"grad_norm": 3.5097098350524902,
|
|
"learning_rate": 9.782915904266416e-06,
|
|
"loss": 1.1287,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 0.6976184104896976,
|
|
"grad_norm": 3.2211825847625732,
|
|
"learning_rate": 9.782657900647567e-06,
|
|
"loss": 1.0162,
|
|
"step": 2607
|
|
},
|
|
{
|
|
"epoch": 0.6978860048166979,
|
|
"grad_norm": 3.5163321495056152,
|
|
"learning_rate": 9.782399747207838e-06,
|
|
"loss": 1.258,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 0.6981535991436981,
|
|
"grad_norm": 3.4427928924560547,
|
|
"learning_rate": 9.782141443955316e-06,
|
|
"loss": 1.1632,
|
|
"step": 2609
|
|
},
|
|
{
|
|
"epoch": 0.6984211934706984,
|
|
"grad_norm": 3.6478707790374756,
|
|
"learning_rate": 9.78188299089809e-06,
|
|
"loss": 1.0287,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.6986887877976987,
|
|
"grad_norm": 3.5365660190582275,
|
|
"learning_rate": 9.781624388044257e-06,
|
|
"loss": 1.1929,
|
|
"step": 2611
|
|
},
|
|
{
|
|
"epoch": 0.6989563821246989,
|
|
"grad_norm": 3.965444803237915,
|
|
"learning_rate": 9.78136563540192e-06,
|
|
"loss": 1.3651,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 0.6992239764516992,
|
|
"grad_norm": 3.7215042114257812,
|
|
"learning_rate": 9.781106732979182e-06,
|
|
"loss": 1.1677,
|
|
"step": 2613
|
|
},
|
|
{
|
|
"epoch": 0.6994915707786995,
|
|
"grad_norm": 3.5624494552612305,
|
|
"learning_rate": 9.780847680784156e-06,
|
|
"loss": 1.0269,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 0.6997591651056998,
|
|
"grad_norm": 3.623762845993042,
|
|
"learning_rate": 9.780588478824953e-06,
|
|
"loss": 1.1772,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 0.7000267594327,
|
|
"grad_norm": 3.544771194458008,
|
|
"learning_rate": 9.780329127109697e-06,
|
|
"loss": 1.1919,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 0.7002943537597003,
|
|
"grad_norm": 3.900216817855835,
|
|
"learning_rate": 9.780069625646512e-06,
|
|
"loss": 1.2399,
|
|
"step": 2617
|
|
},
|
|
{
|
|
"epoch": 0.7005619480867006,
|
|
"grad_norm": 3.4038405418395996,
|
|
"learning_rate": 9.779809974443525e-06,
|
|
"loss": 1.143,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 0.7008295424137009,
|
|
"grad_norm": 3.4808125495910645,
|
|
"learning_rate": 9.77955017350887e-06,
|
|
"loss": 1.1755,
|
|
"step": 2619
|
|
},
|
|
{
|
|
"epoch": 0.7010971367407011,
|
|
"grad_norm": 3.106503963470459,
|
|
"learning_rate": 9.779290222850686e-06,
|
|
"loss": 1.0324,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.7013647310677014,
|
|
"grad_norm": 3.0059962272644043,
|
|
"learning_rate": 9.779030122477118e-06,
|
|
"loss": 1.0385,
|
|
"step": 2621
|
|
},
|
|
{
|
|
"epoch": 0.7016323253947017,
|
|
"grad_norm": 3.8533339500427246,
|
|
"learning_rate": 9.778769872396311e-06,
|
|
"loss": 1.2545,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 0.7018999197217018,
|
|
"grad_norm": 3.361427068710327,
|
|
"learning_rate": 9.77850947261642e-06,
|
|
"loss": 1.1001,
|
|
"step": 2623
|
|
},
|
|
{
|
|
"epoch": 0.7021675140487021,
|
|
"grad_norm": 3.411195755004883,
|
|
"learning_rate": 9.778248923145599e-06,
|
|
"loss": 0.9854,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 0.7024351083757024,
|
|
"grad_norm": 3.7567944526672363,
|
|
"learning_rate": 9.777988223992014e-06,
|
|
"loss": 1.2254,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 0.7027027027027027,
|
|
"grad_norm": 3.7029223442077637,
|
|
"learning_rate": 9.777727375163828e-06,
|
|
"loss": 1.2784,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 0.7029702970297029,
|
|
"grad_norm": 3.0535287857055664,
|
|
"learning_rate": 9.777466376669214e-06,
|
|
"loss": 0.9761,
|
|
"step": 2627
|
|
},
|
|
{
|
|
"epoch": 0.7032378913567032,
|
|
"grad_norm": 3.1361937522888184,
|
|
"learning_rate": 9.777205228516349e-06,
|
|
"loss": 1.0701,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 0.7035054856837035,
|
|
"grad_norm": 3.5227043628692627,
|
|
"learning_rate": 9.776943930713411e-06,
|
|
"loss": 1.2202,
|
|
"step": 2629
|
|
},
|
|
{
|
|
"epoch": 0.7037730800107038,
|
|
"grad_norm": 3.2027533054351807,
|
|
"learning_rate": 9.776682483268588e-06,
|
|
"loss": 1.1063,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.704040674337704,
|
|
"grad_norm": 3.622596025466919,
|
|
"learning_rate": 9.776420886190069e-06,
|
|
"loss": 1.1919,
|
|
"step": 2631
|
|
},
|
|
{
|
|
"epoch": 0.7043082686647043,
|
|
"grad_norm": 3.527977466583252,
|
|
"learning_rate": 9.776159139486048e-06,
|
|
"loss": 1.1076,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 0.7045758629917046,
|
|
"grad_norm": 2.9947965145111084,
|
|
"learning_rate": 9.775897243164727e-06,
|
|
"loss": 1.1083,
|
|
"step": 2633
|
|
},
|
|
{
|
|
"epoch": 0.7048434573187048,
|
|
"grad_norm": 3.344877004623413,
|
|
"learning_rate": 9.775635197234306e-06,
|
|
"loss": 1.1652,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 0.7051110516457051,
|
|
"grad_norm": 3.506344795227051,
|
|
"learning_rate": 9.775373001702998e-06,
|
|
"loss": 1.1122,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 0.7053786459727054,
|
|
"grad_norm": 3.5416011810302734,
|
|
"learning_rate": 9.775110656579015e-06,
|
|
"loss": 1.0505,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 0.7056462402997057,
|
|
"grad_norm": 3.234518527984619,
|
|
"learning_rate": 9.774848161870574e-06,
|
|
"loss": 0.999,
|
|
"step": 2637
|
|
},
|
|
{
|
|
"epoch": 0.7059138346267059,
|
|
"grad_norm": 4.471454620361328,
|
|
"learning_rate": 9.774585517585898e-06,
|
|
"loss": 1.146,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 0.7061814289537062,
|
|
"grad_norm": 3.475337505340576,
|
|
"learning_rate": 9.774322723733216e-06,
|
|
"loss": 0.9791,
|
|
"step": 2639
|
|
},
|
|
{
|
|
"epoch": 0.7064490232807065,
|
|
"grad_norm": 3.684784173965454,
|
|
"learning_rate": 9.774059780320759e-06,
|
|
"loss": 1.1905,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.7067166176077068,
|
|
"grad_norm": 3.747850179672241,
|
|
"learning_rate": 9.773796687356764e-06,
|
|
"loss": 1.2367,
|
|
"step": 2641
|
|
},
|
|
{
|
|
"epoch": 0.706984211934707,
|
|
"grad_norm": 3.962178945541382,
|
|
"learning_rate": 9.773533444849475e-06,
|
|
"loss": 1.1333,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 0.7072518062617072,
|
|
"grad_norm": 3.6332039833068848,
|
|
"learning_rate": 9.773270052807135e-06,
|
|
"loss": 1.1832,
|
|
"step": 2643
|
|
},
|
|
{
|
|
"epoch": 0.7075194005887075,
|
|
"grad_norm": 3.565274715423584,
|
|
"learning_rate": 9.773006511237997e-06,
|
|
"loss": 1.2,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 0.7077869949157077,
|
|
"grad_norm": 3.718888998031616,
|
|
"learning_rate": 9.772742820150316e-06,
|
|
"loss": 1.18,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 0.708054589242708,
|
|
"grad_norm": 3.387706756591797,
|
|
"learning_rate": 9.772478979552353e-06,
|
|
"loss": 1.0484,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 0.7083221835697083,
|
|
"grad_norm": 3.5784285068511963,
|
|
"learning_rate": 9.772214989452372e-06,
|
|
"loss": 1.1859,
|
|
"step": 2647
|
|
},
|
|
{
|
|
"epoch": 0.7085897778967086,
|
|
"grad_norm": 3.052260160446167,
|
|
"learning_rate": 9.771950849858641e-06,
|
|
"loss": 1.0571,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 0.7088573722237088,
|
|
"grad_norm": 3.3145363330841064,
|
|
"learning_rate": 9.771686560779438e-06,
|
|
"loss": 1.1643,
|
|
"step": 2649
|
|
},
|
|
{
|
|
"epoch": 0.7091249665507091,
|
|
"grad_norm": 3.7414932250976562,
|
|
"learning_rate": 9.771422122223042e-06,
|
|
"loss": 1.2321,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.7093925608777094,
|
|
"grad_norm": 3.2115883827209473,
|
|
"learning_rate": 9.771157534197733e-06,
|
|
"loss": 1.1001,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 0.7096601552047097,
|
|
"grad_norm": 3.6394455432891846,
|
|
"learning_rate": 9.770892796711804e-06,
|
|
"loss": 1.2063,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 0.7099277495317099,
|
|
"grad_norm": 3.6082866191864014,
|
|
"learning_rate": 9.770627909773545e-06,
|
|
"loss": 1.1383,
|
|
"step": 2653
|
|
},
|
|
{
|
|
"epoch": 0.7101953438587102,
|
|
"grad_norm": 4.1197919845581055,
|
|
"learning_rate": 9.770362873391256e-06,
|
|
"loss": 1.2361,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 0.7104629381857105,
|
|
"grad_norm": 3.101154327392578,
|
|
"learning_rate": 9.770097687573235e-06,
|
|
"loss": 1.0858,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 0.7107305325127107,
|
|
"grad_norm": 3.6352450847625732,
|
|
"learning_rate": 9.769832352327795e-06,
|
|
"loss": 1.0094,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 0.710998126839711,
|
|
"grad_norm": 3.195739507675171,
|
|
"learning_rate": 9.769566867663245e-06,
|
|
"loss": 1.0132,
|
|
"step": 2657
|
|
},
|
|
{
|
|
"epoch": 0.7112657211667113,
|
|
"grad_norm": 3.4742939472198486,
|
|
"learning_rate": 9.7693012335879e-06,
|
|
"loss": 1.235,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 0.7115333154937116,
|
|
"grad_norm": 3.451916217803955,
|
|
"learning_rate": 9.769035450110084e-06,
|
|
"loss": 1.1433,
|
|
"step": 2659
|
|
},
|
|
{
|
|
"epoch": 0.7118009098207118,
|
|
"grad_norm": 3.5261240005493164,
|
|
"learning_rate": 9.768769517238124e-06,
|
|
"loss": 1.1758,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.7120685041477121,
|
|
"grad_norm": 3.142664670944214,
|
|
"learning_rate": 9.768503434980348e-06,
|
|
"loss": 1.0356,
|
|
"step": 2661
|
|
},
|
|
{
|
|
"epoch": 0.7123360984747124,
|
|
"grad_norm": 3.1803033351898193,
|
|
"learning_rate": 9.76823720334509e-06,
|
|
"loss": 1.0501,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 0.7126036928017127,
|
|
"grad_norm": 3.5911192893981934,
|
|
"learning_rate": 9.767970822340692e-06,
|
|
"loss": 1.0931,
|
|
"step": 2663
|
|
},
|
|
{
|
|
"epoch": 0.7128712871287128,
|
|
"grad_norm": 3.599949598312378,
|
|
"learning_rate": 9.7677042919755e-06,
|
|
"loss": 1.1297,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 0.7131388814557131,
|
|
"grad_norm": 3.7325220108032227,
|
|
"learning_rate": 9.76743761225786e-06,
|
|
"loss": 1.1475,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 0.7134064757827134,
|
|
"grad_norm": 3.2687487602233887,
|
|
"learning_rate": 9.767170783196128e-06,
|
|
"loss": 1.108,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 0.7136740701097136,
|
|
"grad_norm": 3.567669630050659,
|
|
"learning_rate": 9.766903804798663e-06,
|
|
"loss": 1.1965,
|
|
"step": 2667
|
|
},
|
|
{
|
|
"epoch": 0.7139416644367139,
|
|
"grad_norm": 2.9746851921081543,
|
|
"learning_rate": 9.766636677073825e-06,
|
|
"loss": 0.9885,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 0.7142092587637142,
|
|
"grad_norm": 3.6333086490631104,
|
|
"learning_rate": 9.766369400029987e-06,
|
|
"loss": 1.1906,
|
|
"step": 2669
|
|
},
|
|
{
|
|
"epoch": 0.7144768530907145,
|
|
"grad_norm": 3.6311559677124023,
|
|
"learning_rate": 9.766101973675519e-06,
|
|
"loss": 1.1869,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.7147444474177147,
|
|
"grad_norm": 3.632929801940918,
|
|
"learning_rate": 9.765834398018797e-06,
|
|
"loss": 1.1423,
|
|
"step": 2671
|
|
},
|
|
{
|
|
"epoch": 0.715012041744715,
|
|
"grad_norm": 3.4885165691375732,
|
|
"learning_rate": 9.765566673068206e-06,
|
|
"loss": 1.1226,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 0.7152796360717153,
|
|
"grad_norm": 3.2653591632843018,
|
|
"learning_rate": 9.765298798832132e-06,
|
|
"loss": 1.0427,
|
|
"step": 2673
|
|
},
|
|
{
|
|
"epoch": 0.7155472303987156,
|
|
"grad_norm": 3.1086037158966064,
|
|
"learning_rate": 9.765030775318965e-06,
|
|
"loss": 1.0032,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 0.7158148247257158,
|
|
"grad_norm": 3.084402322769165,
|
|
"learning_rate": 9.764762602537102e-06,
|
|
"loss": 1.0019,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 0.7160824190527161,
|
|
"grad_norm": 3.50754714012146,
|
|
"learning_rate": 9.764494280494943e-06,
|
|
"loss": 1.0982,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 0.7163500133797164,
|
|
"grad_norm": 3.355750799179077,
|
|
"learning_rate": 9.764225809200894e-06,
|
|
"loss": 1.1512,
|
|
"step": 2677
|
|
},
|
|
{
|
|
"epoch": 0.7166176077067166,
|
|
"grad_norm": 3.4063217639923096,
|
|
"learning_rate": 9.763957188663366e-06,
|
|
"loss": 1.2209,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 0.7168852020337169,
|
|
"grad_norm": 3.5086374282836914,
|
|
"learning_rate": 9.76368841889077e-06,
|
|
"loss": 1.2162,
|
|
"step": 2679
|
|
},
|
|
{
|
|
"epoch": 0.7171527963607172,
|
|
"grad_norm": 3.286731719970703,
|
|
"learning_rate": 9.763419499891533e-06,
|
|
"loss": 1.176,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.7174203906877175,
|
|
"grad_norm": 3.622854232788086,
|
|
"learning_rate": 9.763150431674072e-06,
|
|
"loss": 1.1612,
|
|
"step": 2681
|
|
},
|
|
{
|
|
"epoch": 0.7176879850147176,
|
|
"grad_norm": 3.5535635948181152,
|
|
"learning_rate": 9.762881214246817e-06,
|
|
"loss": 1.1771,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 0.7179555793417179,
|
|
"grad_norm": 4.360621452331543,
|
|
"learning_rate": 9.762611847618203e-06,
|
|
"loss": 1.0991,
|
|
"step": 2683
|
|
},
|
|
{
|
|
"epoch": 0.7182231736687182,
|
|
"grad_norm": 3.4346253871917725,
|
|
"learning_rate": 9.762342331796671e-06,
|
|
"loss": 1.1323,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 0.7184907679957185,
|
|
"grad_norm": 3.3075368404388428,
|
|
"learning_rate": 9.762072666790658e-06,
|
|
"loss": 1.0226,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 0.7187583623227187,
|
|
"grad_norm": 3.4032669067382812,
|
|
"learning_rate": 9.761802852608614e-06,
|
|
"loss": 1.0554,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 0.719025956649719,
|
|
"grad_norm": 3.6175873279571533,
|
|
"learning_rate": 9.76153288925899e-06,
|
|
"loss": 1.1181,
|
|
"step": 2687
|
|
},
|
|
{
|
|
"epoch": 0.7192935509767193,
|
|
"grad_norm": 3.678610324859619,
|
|
"learning_rate": 9.761262776750248e-06,
|
|
"loss": 1.2954,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 0.7195611453037195,
|
|
"grad_norm": 3.5059852600097656,
|
|
"learning_rate": 9.760992515090844e-06,
|
|
"loss": 1.1544,
|
|
"step": 2689
|
|
},
|
|
{
|
|
"epoch": 0.7198287396307198,
|
|
"grad_norm": 3.412489414215088,
|
|
"learning_rate": 9.760722104289244e-06,
|
|
"loss": 1.2178,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.7200963339577201,
|
|
"grad_norm": 3.746623992919922,
|
|
"learning_rate": 9.760451544353923e-06,
|
|
"loss": 1.2707,
|
|
"step": 2691
|
|
},
|
|
{
|
|
"epoch": 0.7203639282847204,
|
|
"grad_norm": 3.2864015102386475,
|
|
"learning_rate": 9.760180835293352e-06,
|
|
"loss": 1.0242,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 0.7206315226117206,
|
|
"grad_norm": 3.267595052719116,
|
|
"learning_rate": 9.759909977116016e-06,
|
|
"loss": 1.0804,
|
|
"step": 2693
|
|
},
|
|
{
|
|
"epoch": 0.7208991169387209,
|
|
"grad_norm": 3.310580015182495,
|
|
"learning_rate": 9.759638969830395e-06,
|
|
"loss": 1.079,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 0.7211667112657212,
|
|
"grad_norm": 3.208405017852783,
|
|
"learning_rate": 9.759367813444982e-06,
|
|
"loss": 1.0354,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 0.7214343055927215,
|
|
"grad_norm": 3.2207465171813965,
|
|
"learning_rate": 9.75909650796827e-06,
|
|
"loss": 1.0239,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 0.7217018999197217,
|
|
"grad_norm": 3.3544509410858154,
|
|
"learning_rate": 9.758825053408755e-06,
|
|
"loss": 1.1832,
|
|
"step": 2697
|
|
},
|
|
{
|
|
"epoch": 0.721969494246722,
|
|
"grad_norm": 3.2339577674865723,
|
|
"learning_rate": 9.758553449774947e-06,
|
|
"loss": 1.0818,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 0.7222370885737223,
|
|
"grad_norm": 3.8596277236938477,
|
|
"learning_rate": 9.75828169707535e-06,
|
|
"loss": 1.1333,
|
|
"step": 2699
|
|
},
|
|
{
|
|
"epoch": 0.7225046829007225,
|
|
"grad_norm": 3.5846714973449707,
|
|
"learning_rate": 9.758009795318477e-06,
|
|
"loss": 1.0485,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.7227722772277227,
|
|
"grad_norm": 3.747907876968384,
|
|
"learning_rate": 9.757737744512846e-06,
|
|
"loss": 1.0817,
|
|
"step": 2701
|
|
},
|
|
{
|
|
"epoch": 0.723039871554723,
|
|
"grad_norm": 3.468989849090576,
|
|
"learning_rate": 9.75746554466698e-06,
|
|
"loss": 1.2511,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 0.7233074658817233,
|
|
"grad_norm": 3.4616754055023193,
|
|
"learning_rate": 9.757193195789404e-06,
|
|
"loss": 1.1138,
|
|
"step": 2703
|
|
},
|
|
{
|
|
"epoch": 0.7235750602087235,
|
|
"grad_norm": 3.6678431034088135,
|
|
"learning_rate": 9.75692069788865e-06,
|
|
"loss": 1.0741,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 0.7238426545357238,
|
|
"grad_norm": 3.6420814990997314,
|
|
"learning_rate": 9.756648050973257e-06,
|
|
"loss": 1.1498,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 0.7241102488627241,
|
|
"grad_norm": 3.3295350074768066,
|
|
"learning_rate": 9.756375255051765e-06,
|
|
"loss": 1.2033,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 0.7243778431897244,
|
|
"grad_norm": 3.3087949752807617,
|
|
"learning_rate": 9.756102310132716e-06,
|
|
"loss": 1.144,
|
|
"step": 2707
|
|
},
|
|
{
|
|
"epoch": 0.7246454375167246,
|
|
"grad_norm": 3.582380771636963,
|
|
"learning_rate": 9.755829216224662e-06,
|
|
"loss": 1.0771,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 0.7249130318437249,
|
|
"grad_norm": 3.513324737548828,
|
|
"learning_rate": 9.75555597333616e-06,
|
|
"loss": 1.1307,
|
|
"step": 2709
|
|
},
|
|
{
|
|
"epoch": 0.7251806261707252,
|
|
"grad_norm": 3.110485315322876,
|
|
"learning_rate": 9.755282581475769e-06,
|
|
"loss": 1.0273,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.7254482204977254,
|
|
"grad_norm": 3.4464118480682373,
|
|
"learning_rate": 9.75500904065205e-06,
|
|
"loss": 1.0884,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 0.7257158148247257,
|
|
"grad_norm": 3.5363407135009766,
|
|
"learning_rate": 9.754735350873577e-06,
|
|
"loss": 1.1758,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 0.725983409151726,
|
|
"grad_norm": 3.9216394424438477,
|
|
"learning_rate": 9.75446151214892e-06,
|
|
"loss": 1.3667,
|
|
"step": 2713
|
|
},
|
|
{
|
|
"epoch": 0.7262510034787263,
|
|
"grad_norm": 4.166318893432617,
|
|
"learning_rate": 9.754187524486658e-06,
|
|
"loss": 1.3686,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 0.7265185978057265,
|
|
"grad_norm": 3.6010658740997314,
|
|
"learning_rate": 9.753913387895373e-06,
|
|
"loss": 1.2221,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 0.7267861921327268,
|
|
"grad_norm": 3.6263794898986816,
|
|
"learning_rate": 9.753639102383653e-06,
|
|
"loss": 1.092,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 0.7270537864597271,
|
|
"grad_norm": 3.779825210571289,
|
|
"learning_rate": 9.753364667960093e-06,
|
|
"loss": 1.1366,
|
|
"step": 2717
|
|
},
|
|
{
|
|
"epoch": 0.7273213807867274,
|
|
"grad_norm": 3.460033416748047,
|
|
"learning_rate": 9.753090084633288e-06,
|
|
"loss": 1.2492,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 0.7275889751137276,
|
|
"grad_norm": 3.507516384124756,
|
|
"learning_rate": 9.752815352411837e-06,
|
|
"loss": 1.2037,
|
|
"step": 2719
|
|
},
|
|
{
|
|
"epoch": 0.7278565694407279,
|
|
"grad_norm": 3.2808637619018555,
|
|
"learning_rate": 9.752540471304351e-06,
|
|
"loss": 1.1144,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.7281241637677281,
|
|
"grad_norm": 3.5148873329162598,
|
|
"learning_rate": 9.752265441319437e-06,
|
|
"loss": 1.1453,
|
|
"step": 2721
|
|
},
|
|
{
|
|
"epoch": 0.7283917580947283,
|
|
"grad_norm": 3.546168327331543,
|
|
"learning_rate": 9.751990262465712e-06,
|
|
"loss": 1.1504,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 0.7286593524217286,
|
|
"grad_norm": 3.2594752311706543,
|
|
"learning_rate": 9.751714934751795e-06,
|
|
"loss": 1.1144,
|
|
"step": 2723
|
|
},
|
|
{
|
|
"epoch": 0.7289269467487289,
|
|
"grad_norm": 3.6359105110168457,
|
|
"learning_rate": 9.751439458186314e-06,
|
|
"loss": 1.0574,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 0.7291945410757292,
|
|
"grad_norm": 3.5395443439483643,
|
|
"learning_rate": 9.751163832777894e-06,
|
|
"loss": 1.1311,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 0.7294621354027294,
|
|
"grad_norm": 3.6369519233703613,
|
|
"learning_rate": 9.750888058535175e-06,
|
|
"loss": 1.228,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 0.7297297297297297,
|
|
"grad_norm": 3.7972755432128906,
|
|
"learning_rate": 9.75061213546679e-06,
|
|
"loss": 1.2122,
|
|
"step": 2727
|
|
},
|
|
{
|
|
"epoch": 0.72999732405673,
|
|
"grad_norm": 3.208137273788452,
|
|
"learning_rate": 9.750336063581385e-06,
|
|
"loss": 1.1357,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 0.7302649183837303,
|
|
"grad_norm": 3.6804027557373047,
|
|
"learning_rate": 9.75005984288761e-06,
|
|
"loss": 1.0588,
|
|
"step": 2729
|
|
},
|
|
{
|
|
"epoch": 0.7305325127107305,
|
|
"grad_norm": 3.5338356494903564,
|
|
"learning_rate": 9.749783473394115e-06,
|
|
"loss": 1.1344,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.7308001070377308,
|
|
"grad_norm": 3.542436122894287,
|
|
"learning_rate": 9.74950695510956e-06,
|
|
"loss": 1.0266,
|
|
"step": 2731
|
|
},
|
|
{
|
|
"epoch": 0.7310677013647311,
|
|
"grad_norm": 3.2684695720672607,
|
|
"learning_rate": 9.749230288042605e-06,
|
|
"loss": 1.1069,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 0.7313352956917314,
|
|
"grad_norm": 3.5910449028015137,
|
|
"learning_rate": 9.748953472201919e-06,
|
|
"loss": 1.1241,
|
|
"step": 2733
|
|
},
|
|
{
|
|
"epoch": 0.7316028900187316,
|
|
"grad_norm": 3.693363904953003,
|
|
"learning_rate": 9.74867650759617e-06,
|
|
"loss": 1.029,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 0.7318704843457319,
|
|
"grad_norm": 3.376753091812134,
|
|
"learning_rate": 9.748399394234038e-06,
|
|
"loss": 1.1953,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 0.7321380786727322,
|
|
"grad_norm": 3.5596413612365723,
|
|
"learning_rate": 9.7481221321242e-06,
|
|
"loss": 1.1171,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 0.7324056729997324,
|
|
"grad_norm": 3.588493585586548,
|
|
"learning_rate": 9.747844721275345e-06,
|
|
"loss": 1.3143,
|
|
"step": 2737
|
|
},
|
|
{
|
|
"epoch": 0.7326732673267327,
|
|
"grad_norm": 3.4020540714263916,
|
|
"learning_rate": 9.747567161696163e-06,
|
|
"loss": 1.1001,
|
|
"step": 2738
|
|
},
|
|
{
|
|
"epoch": 0.732940861653733,
|
|
"grad_norm": 3.346292018890381,
|
|
"learning_rate": 9.747289453395348e-06,
|
|
"loss": 1.0981,
|
|
"step": 2739
|
|
},
|
|
{
|
|
"epoch": 0.7332084559807333,
|
|
"grad_norm": 3.401524543762207,
|
|
"learning_rate": 9.747011596381597e-06,
|
|
"loss": 1.0512,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.7334760503077334,
|
|
"grad_norm": 3.25940203666687,
|
|
"learning_rate": 9.746733590663616e-06,
|
|
"loss": 1.1377,
|
|
"step": 2741
|
|
},
|
|
{
|
|
"epoch": 0.7337436446347337,
|
|
"grad_norm": 3.116464376449585,
|
|
"learning_rate": 9.746455436250116e-06,
|
|
"loss": 1.0579,
|
|
"step": 2742
|
|
},
|
|
{
|
|
"epoch": 0.734011238961734,
|
|
"grad_norm": 3.291623592376709,
|
|
"learning_rate": 9.746177133149805e-06,
|
|
"loss": 1.0135,
|
|
"step": 2743
|
|
},
|
|
{
|
|
"epoch": 0.7342788332887343,
|
|
"grad_norm": 4.061689376831055,
|
|
"learning_rate": 9.745898681371408e-06,
|
|
"loss": 1.3905,
|
|
"step": 2744
|
|
},
|
|
{
|
|
"epoch": 0.7345464276157345,
|
|
"grad_norm": 3.388113021850586,
|
|
"learning_rate": 9.74562008092364e-06,
|
|
"loss": 1.1734,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 0.7348140219427348,
|
|
"grad_norm": 3.3899624347686768,
|
|
"learning_rate": 9.745341331815237e-06,
|
|
"loss": 1.1051,
|
|
"step": 2746
|
|
},
|
|
{
|
|
"epoch": 0.7350816162697351,
|
|
"grad_norm": 3.5263469219207764,
|
|
"learning_rate": 9.745062434054924e-06,
|
|
"loss": 1.2386,
|
|
"step": 2747
|
|
},
|
|
{
|
|
"epoch": 0.7353492105967353,
|
|
"grad_norm": 3.7269399166107178,
|
|
"learning_rate": 9.744783387651442e-06,
|
|
"loss": 1.1823,
|
|
"step": 2748
|
|
},
|
|
{
|
|
"epoch": 0.7356168049237356,
|
|
"grad_norm": 3.4878084659576416,
|
|
"learning_rate": 9.74450419261353e-06,
|
|
"loss": 1.0985,
|
|
"step": 2749
|
|
},
|
|
{
|
|
"epoch": 0.7358843992507359,
|
|
"grad_norm": 3.7728066444396973,
|
|
"learning_rate": 9.744224848949935e-06,
|
|
"loss": 1.1252,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.7361519935777362,
|
|
"grad_norm": 3.4812254905700684,
|
|
"learning_rate": 9.743945356669406e-06,
|
|
"loss": 1.1439,
|
|
"step": 2751
|
|
},
|
|
{
|
|
"epoch": 0.7364195879047364,
|
|
"grad_norm": 3.372687816619873,
|
|
"learning_rate": 9.743665715780702e-06,
|
|
"loss": 1.1005,
|
|
"step": 2752
|
|
},
|
|
{
|
|
"epoch": 0.7366871822317367,
|
|
"grad_norm": 3.568819522857666,
|
|
"learning_rate": 9.743385926292578e-06,
|
|
"loss": 1.1,
|
|
"step": 2753
|
|
},
|
|
{
|
|
"epoch": 0.736954776558737,
|
|
"grad_norm": 3.3924365043640137,
|
|
"learning_rate": 9.743105988213802e-06,
|
|
"loss": 1.2092,
|
|
"step": 2754
|
|
},
|
|
{
|
|
"epoch": 0.7372223708857373,
|
|
"grad_norm": 3.7355546951293945,
|
|
"learning_rate": 9.742825901553144e-06,
|
|
"loss": 1.2169,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 0.7374899652127375,
|
|
"grad_norm": 3.2989847660064697,
|
|
"learning_rate": 9.742545666319376e-06,
|
|
"loss": 1.1556,
|
|
"step": 2756
|
|
},
|
|
{
|
|
"epoch": 0.7377575595397378,
|
|
"grad_norm": 3.268017530441284,
|
|
"learning_rate": 9.742265282521278e-06,
|
|
"loss": 1.0398,
|
|
"step": 2757
|
|
},
|
|
{
|
|
"epoch": 0.7380251538667381,
|
|
"grad_norm": 3.880585193634033,
|
|
"learning_rate": 9.741984750167632e-06,
|
|
"loss": 1.3448,
|
|
"step": 2758
|
|
},
|
|
{
|
|
"epoch": 0.7382927481937382,
|
|
"grad_norm": 3.6967694759368896,
|
|
"learning_rate": 9.741704069267227e-06,
|
|
"loss": 1.2818,
|
|
"step": 2759
|
|
},
|
|
{
|
|
"epoch": 0.7385603425207385,
|
|
"grad_norm": 3.3277947902679443,
|
|
"learning_rate": 9.741423239828854e-06,
|
|
"loss": 1.1014,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.7388279368477388,
|
|
"grad_norm": 3.7067902088165283,
|
|
"learning_rate": 9.74114226186131e-06,
|
|
"loss": 1.1701,
|
|
"step": 2761
|
|
},
|
|
{
|
|
"epoch": 0.7390955311747391,
|
|
"grad_norm": 3.5703206062316895,
|
|
"learning_rate": 9.740861135373399e-06,
|
|
"loss": 1.2229,
|
|
"step": 2762
|
|
},
|
|
{
|
|
"epoch": 0.7393631255017393,
|
|
"grad_norm": 3.534301519393921,
|
|
"learning_rate": 9.740579860373928e-06,
|
|
"loss": 1.162,
|
|
"step": 2763
|
|
},
|
|
{
|
|
"epoch": 0.7396307198287396,
|
|
"grad_norm": 3.224804162979126,
|
|
"learning_rate": 9.740298436871705e-06,
|
|
"loss": 1.0507,
|
|
"step": 2764
|
|
},
|
|
{
|
|
"epoch": 0.7398983141557399,
|
|
"grad_norm": 3.5627236366271973,
|
|
"learning_rate": 9.74001686487555e-06,
|
|
"loss": 1.1545,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 0.7401659084827402,
|
|
"grad_norm": 3.293410301208496,
|
|
"learning_rate": 9.73973514439428e-06,
|
|
"loss": 1.0306,
|
|
"step": 2766
|
|
},
|
|
{
|
|
"epoch": 0.7404335028097404,
|
|
"grad_norm": 3.6083991527557373,
|
|
"learning_rate": 9.73945327543672e-06,
|
|
"loss": 1.2526,
|
|
"step": 2767
|
|
},
|
|
{
|
|
"epoch": 0.7407010971367407,
|
|
"grad_norm": 3.2375547885894775,
|
|
"learning_rate": 9.739171258011703e-06,
|
|
"loss": 1.0081,
|
|
"step": 2768
|
|
},
|
|
{
|
|
"epoch": 0.740968691463741,
|
|
"grad_norm": 3.6871652603149414,
|
|
"learning_rate": 9.73888909212806e-06,
|
|
"loss": 1.1781,
|
|
"step": 2769
|
|
},
|
|
{
|
|
"epoch": 0.7412362857907412,
|
|
"grad_norm": 3.2498250007629395,
|
|
"learning_rate": 9.738606777794633e-06,
|
|
"loss": 1.1097,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.7415038801177415,
|
|
"grad_norm": 3.439887046813965,
|
|
"learning_rate": 9.738324315020263e-06,
|
|
"loss": 1.1992,
|
|
"step": 2771
|
|
},
|
|
{
|
|
"epoch": 0.7417714744447418,
|
|
"grad_norm": 3.3322887420654297,
|
|
"learning_rate": 9.7380417038138e-06,
|
|
"loss": 1.2245,
|
|
"step": 2772
|
|
},
|
|
{
|
|
"epoch": 0.7420390687717421,
|
|
"grad_norm": 3.5965330600738525,
|
|
"learning_rate": 9.737758944184096e-06,
|
|
"loss": 1.1906,
|
|
"step": 2773
|
|
},
|
|
{
|
|
"epoch": 0.7423066630987423,
|
|
"grad_norm": 3.299678325653076,
|
|
"learning_rate": 9.737476036140011e-06,
|
|
"loss": 1.2128,
|
|
"step": 2774
|
|
},
|
|
{
|
|
"epoch": 0.7425742574257426,
|
|
"grad_norm": 3.468172073364258,
|
|
"learning_rate": 9.737192979690404e-06,
|
|
"loss": 1.1996,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 0.7428418517527429,
|
|
"grad_norm": 3.4514479637145996,
|
|
"learning_rate": 9.736909774844145e-06,
|
|
"loss": 1.1241,
|
|
"step": 2776
|
|
},
|
|
{
|
|
"epoch": 0.7431094460797432,
|
|
"grad_norm": 3.221329927444458,
|
|
"learning_rate": 9.736626421610104e-06,
|
|
"loss": 1.1235,
|
|
"step": 2777
|
|
},
|
|
{
|
|
"epoch": 0.7433770404067434,
|
|
"grad_norm": 3.6566755771636963,
|
|
"learning_rate": 9.73634291999716e-06,
|
|
"loss": 1.0872,
|
|
"step": 2778
|
|
},
|
|
{
|
|
"epoch": 0.7436446347337436,
|
|
"grad_norm": 3.055006980895996,
|
|
"learning_rate": 9.73605927001419e-06,
|
|
"loss": 1.0536,
|
|
"step": 2779
|
|
},
|
|
{
|
|
"epoch": 0.743912229060744,
|
|
"grad_norm": 3.0918097496032715,
|
|
"learning_rate": 9.735775471670079e-06,
|
|
"loss": 1.0914,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.7441798233877441,
|
|
"grad_norm": 3.5324559211730957,
|
|
"learning_rate": 9.735491524973723e-06,
|
|
"loss": 1.041,
|
|
"step": 2781
|
|
},
|
|
{
|
|
"epoch": 0.7444474177147444,
|
|
"grad_norm": 3.506650447845459,
|
|
"learning_rate": 9.73520742993401e-06,
|
|
"loss": 1.1942,
|
|
"step": 2782
|
|
},
|
|
{
|
|
"epoch": 0.7447150120417447,
|
|
"grad_norm": 3.5160765647888184,
|
|
"learning_rate": 9.734923186559845e-06,
|
|
"loss": 1.1306,
|
|
"step": 2783
|
|
},
|
|
{
|
|
"epoch": 0.744982606368745,
|
|
"grad_norm": 3.377394676208496,
|
|
"learning_rate": 9.73463879486013e-06,
|
|
"loss": 1.1159,
|
|
"step": 2784
|
|
},
|
|
{
|
|
"epoch": 0.7452502006957452,
|
|
"grad_norm": 3.6153159141540527,
|
|
"learning_rate": 9.734354254843773e-06,
|
|
"loss": 1.0963,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 0.7455177950227455,
|
|
"grad_norm": 3.4530587196350098,
|
|
"learning_rate": 9.734069566519688e-06,
|
|
"loss": 1.1871,
|
|
"step": 2786
|
|
},
|
|
{
|
|
"epoch": 0.7457853893497458,
|
|
"grad_norm": 3.537059783935547,
|
|
"learning_rate": 9.733784729896794e-06,
|
|
"loss": 1.2016,
|
|
"step": 2787
|
|
},
|
|
{
|
|
"epoch": 0.7460529836767461,
|
|
"grad_norm": 3.379148483276367,
|
|
"learning_rate": 9.733499744984013e-06,
|
|
"loss": 1.1321,
|
|
"step": 2788
|
|
},
|
|
{
|
|
"epoch": 0.7463205780037463,
|
|
"grad_norm": 3.4380931854248047,
|
|
"learning_rate": 9.733214611790273e-06,
|
|
"loss": 1.1662,
|
|
"step": 2789
|
|
},
|
|
{
|
|
"epoch": 0.7465881723307466,
|
|
"grad_norm": 3.5000431537628174,
|
|
"learning_rate": 9.732929330324505e-06,
|
|
"loss": 1.0895,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.7468557666577469,
|
|
"grad_norm": 3.2787697315216064,
|
|
"learning_rate": 9.732643900595646e-06,
|
|
"loss": 1.094,
|
|
"step": 2791
|
|
},
|
|
{
|
|
"epoch": 0.7471233609847471,
|
|
"grad_norm": 3.452360153198242,
|
|
"learning_rate": 9.732358322612639e-06,
|
|
"loss": 1.198,
|
|
"step": 2792
|
|
},
|
|
{
|
|
"epoch": 0.7473909553117474,
|
|
"grad_norm": 3.0884242057800293,
|
|
"learning_rate": 9.732072596384427e-06,
|
|
"loss": 1.1294,
|
|
"step": 2793
|
|
},
|
|
{
|
|
"epoch": 0.7476585496387477,
|
|
"grad_norm": 3.0468862056732178,
|
|
"learning_rate": 9.731786721919963e-06,
|
|
"loss": 1.0767,
|
|
"step": 2794
|
|
},
|
|
{
|
|
"epoch": 0.747926143965748,
|
|
"grad_norm": 3.783818483352661,
|
|
"learning_rate": 9.7315006992282e-06,
|
|
"loss": 1.1782,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 0.7481937382927482,
|
|
"grad_norm": 3.290731430053711,
|
|
"learning_rate": 9.731214528318101e-06,
|
|
"loss": 1.1673,
|
|
"step": 2796
|
|
},
|
|
{
|
|
"epoch": 0.7484613326197485,
|
|
"grad_norm": 3.4749789237976074,
|
|
"learning_rate": 9.730928209198629e-06,
|
|
"loss": 1.0845,
|
|
"step": 2797
|
|
},
|
|
{
|
|
"epoch": 0.7487289269467488,
|
|
"grad_norm": 3.39563250541687,
|
|
"learning_rate": 9.730641741878752e-06,
|
|
"loss": 1.1038,
|
|
"step": 2798
|
|
},
|
|
{
|
|
"epoch": 0.748996521273749,
|
|
"grad_norm": 3.1787352561950684,
|
|
"learning_rate": 9.730355126367446e-06,
|
|
"loss": 0.9825,
|
|
"step": 2799
|
|
},
|
|
{
|
|
"epoch": 0.7492641156007492,
|
|
"grad_norm": 3.4629030227661133,
|
|
"learning_rate": 9.730068362673686e-06,
|
|
"loss": 1.1339,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7495317099277495,
|
|
"grad_norm": 3.959449052810669,
|
|
"learning_rate": 9.72978145080646e-06,
|
|
"loss": 1.3313,
|
|
"step": 2801
|
|
},
|
|
{
|
|
"epoch": 0.7497993042547498,
|
|
"grad_norm": 2.8361458778381348,
|
|
"learning_rate": 9.729494390774753e-06,
|
|
"loss": 1.0094,
|
|
"step": 2802
|
|
},
|
|
{
|
|
"epoch": 0.75006689858175,
|
|
"grad_norm": 3.317673921585083,
|
|
"learning_rate": 9.729207182587556e-06,
|
|
"loss": 1.0883,
|
|
"step": 2803
|
|
},
|
|
{
|
|
"epoch": 0.7503344929087503,
|
|
"grad_norm": 3.1433663368225098,
|
|
"learning_rate": 9.728919826253872e-06,
|
|
"loss": 1.0981,
|
|
"step": 2804
|
|
},
|
|
{
|
|
"epoch": 0.7506020872357506,
|
|
"grad_norm": 3.8277859687805176,
|
|
"learning_rate": 9.728632321782693e-06,
|
|
"loss": 1.1252,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 0.7508696815627509,
|
|
"grad_norm": 3.1304125785827637,
|
|
"learning_rate": 9.728344669183033e-06,
|
|
"loss": 1.0509,
|
|
"step": 2806
|
|
},
|
|
{
|
|
"epoch": 0.7511372758897511,
|
|
"grad_norm": 3.266526937484741,
|
|
"learning_rate": 9.728056868463903e-06,
|
|
"loss": 1.1155,
|
|
"step": 2807
|
|
},
|
|
{
|
|
"epoch": 0.7514048702167514,
|
|
"grad_norm": 3.4399197101593018,
|
|
"learning_rate": 9.727768919634314e-06,
|
|
"loss": 1.2062,
|
|
"step": 2808
|
|
},
|
|
{
|
|
"epoch": 0.7516724645437517,
|
|
"grad_norm": 3.5575180053710938,
|
|
"learning_rate": 9.72748082270329e-06,
|
|
"loss": 1.211,
|
|
"step": 2809
|
|
},
|
|
{
|
|
"epoch": 0.751940058870752,
|
|
"grad_norm": 3.2147161960601807,
|
|
"learning_rate": 9.727192577679852e-06,
|
|
"loss": 1.0478,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.7522076531977522,
|
|
"grad_norm": 3.8405959606170654,
|
|
"learning_rate": 9.726904184573034e-06,
|
|
"loss": 1.2406,
|
|
"step": 2811
|
|
},
|
|
{
|
|
"epoch": 0.7524752475247525,
|
|
"grad_norm": 3.6334824562072754,
|
|
"learning_rate": 9.726615643391868e-06,
|
|
"loss": 1.1493,
|
|
"step": 2812
|
|
},
|
|
{
|
|
"epoch": 0.7527428418517528,
|
|
"grad_norm": 3.3839588165283203,
|
|
"learning_rate": 9.726326954145391e-06,
|
|
"loss": 1.2048,
|
|
"step": 2813
|
|
},
|
|
{
|
|
"epoch": 0.753010436178753,
|
|
"grad_norm": 3.4611270427703857,
|
|
"learning_rate": 9.72603811684265e-06,
|
|
"loss": 1.1183,
|
|
"step": 2814
|
|
},
|
|
{
|
|
"epoch": 0.7532780305057533,
|
|
"grad_norm": 3.1408448219299316,
|
|
"learning_rate": 9.725749131492691e-06,
|
|
"loss": 1.0962,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 0.7535456248327536,
|
|
"grad_norm": 3.3118159770965576,
|
|
"learning_rate": 9.725459998104568e-06,
|
|
"loss": 1.1289,
|
|
"step": 2816
|
|
},
|
|
{
|
|
"epoch": 0.7538132191597539,
|
|
"grad_norm": 3.467696189880371,
|
|
"learning_rate": 9.725170716687337e-06,
|
|
"loss": 1.1242,
|
|
"step": 2817
|
|
},
|
|
{
|
|
"epoch": 0.754080813486754,
|
|
"grad_norm": 3.346605062484741,
|
|
"learning_rate": 9.72488128725006e-06,
|
|
"loss": 1.1256,
|
|
"step": 2818
|
|
},
|
|
{
|
|
"epoch": 0.7543484078137543,
|
|
"grad_norm": 2.9358856678009033,
|
|
"learning_rate": 9.724591709801804e-06,
|
|
"loss": 1.0252,
|
|
"step": 2819
|
|
},
|
|
{
|
|
"epoch": 0.7546160021407546,
|
|
"grad_norm": 3.8935790061950684,
|
|
"learning_rate": 9.724301984351642e-06,
|
|
"loss": 1.2343,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.7548835964677549,
|
|
"grad_norm": 3.3613624572753906,
|
|
"learning_rate": 9.724012110908647e-06,
|
|
"loss": 1.0944,
|
|
"step": 2821
|
|
},
|
|
{
|
|
"epoch": 0.7551511907947551,
|
|
"grad_norm": 3.857342004776001,
|
|
"learning_rate": 9.723722089481902e-06,
|
|
"loss": 1.1819,
|
|
"step": 2822
|
|
},
|
|
{
|
|
"epoch": 0.7554187851217554,
|
|
"grad_norm": 3.4227402210235596,
|
|
"learning_rate": 9.72343192008049e-06,
|
|
"loss": 1.1457,
|
|
"step": 2823
|
|
},
|
|
{
|
|
"epoch": 0.7556863794487557,
|
|
"grad_norm": 3.526207447052002,
|
|
"learning_rate": 9.723141602713502e-06,
|
|
"loss": 1.1525,
|
|
"step": 2824
|
|
},
|
|
{
|
|
"epoch": 0.7559539737757559,
|
|
"grad_norm": 3.2722322940826416,
|
|
"learning_rate": 9.722851137390032e-06,
|
|
"loss": 0.999,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 0.7562215681027562,
|
|
"grad_norm": 3.4464046955108643,
|
|
"learning_rate": 9.72256052411918e-06,
|
|
"loss": 1.1352,
|
|
"step": 2826
|
|
},
|
|
{
|
|
"epoch": 0.7564891624297565,
|
|
"grad_norm": 3.817711353302002,
|
|
"learning_rate": 9.72226976291005e-06,
|
|
"loss": 1.2295,
|
|
"step": 2827
|
|
},
|
|
{
|
|
"epoch": 0.7567567567567568,
|
|
"grad_norm": 3.3932790756225586,
|
|
"learning_rate": 9.721978853771747e-06,
|
|
"loss": 1.1339,
|
|
"step": 2828
|
|
},
|
|
{
|
|
"epoch": 0.757024351083757,
|
|
"grad_norm": 3.173757553100586,
|
|
"learning_rate": 9.721687796713388e-06,
|
|
"loss": 1.0569,
|
|
"step": 2829
|
|
},
|
|
{
|
|
"epoch": 0.7572919454107573,
|
|
"grad_norm": 3.3787193298339844,
|
|
"learning_rate": 9.721396591744089e-06,
|
|
"loss": 1.0959,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.7575595397377576,
|
|
"grad_norm": 3.255352258682251,
|
|
"learning_rate": 9.72110523887297e-06,
|
|
"loss": 1.1342,
|
|
"step": 2831
|
|
},
|
|
{
|
|
"epoch": 0.7578271340647579,
|
|
"grad_norm": 3.6418159008026123,
|
|
"learning_rate": 9.720813738109163e-06,
|
|
"loss": 1.158,
|
|
"step": 2832
|
|
},
|
|
{
|
|
"epoch": 0.7580947283917581,
|
|
"grad_norm": 3.731308937072754,
|
|
"learning_rate": 9.720522089461795e-06,
|
|
"loss": 1.2893,
|
|
"step": 2833
|
|
},
|
|
{
|
|
"epoch": 0.7583623227187584,
|
|
"grad_norm": 3.3955862522125244,
|
|
"learning_rate": 9.720230292940005e-06,
|
|
"loss": 1.2315,
|
|
"step": 2834
|
|
},
|
|
{
|
|
"epoch": 0.7586299170457587,
|
|
"grad_norm": 3.6548402309417725,
|
|
"learning_rate": 9.71993834855293e-06,
|
|
"loss": 1.3398,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 0.7588975113727588,
|
|
"grad_norm": 3.494920015335083,
|
|
"learning_rate": 9.71964625630972e-06,
|
|
"loss": 1.0822,
|
|
"step": 2836
|
|
},
|
|
{
|
|
"epoch": 0.7591651056997591,
|
|
"grad_norm": 3.2807202339172363,
|
|
"learning_rate": 9.719354016219524e-06,
|
|
"loss": 1.0012,
|
|
"step": 2837
|
|
},
|
|
{
|
|
"epoch": 0.7594327000267594,
|
|
"grad_norm": 3.419506549835205,
|
|
"learning_rate": 9.719061628291495e-06,
|
|
"loss": 1.1424,
|
|
"step": 2838
|
|
},
|
|
{
|
|
"epoch": 0.7597002943537597,
|
|
"grad_norm": 3.452536106109619,
|
|
"learning_rate": 9.718769092534791e-06,
|
|
"loss": 1.2047,
|
|
"step": 2839
|
|
},
|
|
{
|
|
"epoch": 0.7599678886807599,
|
|
"grad_norm": 3.1318492889404297,
|
|
"learning_rate": 9.71847640895858e-06,
|
|
"loss": 1.1482,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.7602354830077602,
|
|
"grad_norm": 3.5314605236053467,
|
|
"learning_rate": 9.718183577572027e-06,
|
|
"loss": 1.1353,
|
|
"step": 2841
|
|
},
|
|
{
|
|
"epoch": 0.7605030773347605,
|
|
"grad_norm": 3.302334785461426,
|
|
"learning_rate": 9.717890598384308e-06,
|
|
"loss": 1.0301,
|
|
"step": 2842
|
|
},
|
|
{
|
|
"epoch": 0.7607706716617608,
|
|
"grad_norm": 3.7543177604675293,
|
|
"learning_rate": 9.7175974714046e-06,
|
|
"loss": 1.1595,
|
|
"step": 2843
|
|
},
|
|
{
|
|
"epoch": 0.761038265988761,
|
|
"grad_norm": 2.979762554168701,
|
|
"learning_rate": 9.717304196642084e-06,
|
|
"loss": 1.0898,
|
|
"step": 2844
|
|
},
|
|
{
|
|
"epoch": 0.7613058603157613,
|
|
"grad_norm": 3.3831746578216553,
|
|
"learning_rate": 9.717010774105948e-06,
|
|
"loss": 1.0798,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 0.7615734546427616,
|
|
"grad_norm": 3.5637614727020264,
|
|
"learning_rate": 9.716717203805383e-06,
|
|
"loss": 1.2266,
|
|
"step": 2846
|
|
},
|
|
{
|
|
"epoch": 0.7618410489697618,
|
|
"grad_norm": 3.3989365100860596,
|
|
"learning_rate": 9.716423485749587e-06,
|
|
"loss": 1.0865,
|
|
"step": 2847
|
|
},
|
|
{
|
|
"epoch": 0.7621086432967621,
|
|
"grad_norm": 3.465242385864258,
|
|
"learning_rate": 9.716129619947759e-06,
|
|
"loss": 1.1392,
|
|
"step": 2848
|
|
},
|
|
{
|
|
"epoch": 0.7623762376237624,
|
|
"grad_norm": 3.8061766624450684,
|
|
"learning_rate": 9.715835606409107e-06,
|
|
"loss": 1.1998,
|
|
"step": 2849
|
|
},
|
|
{
|
|
"epoch": 0.7626438319507627,
|
|
"grad_norm": 3.2725119590759277,
|
|
"learning_rate": 9.71554144514284e-06,
|
|
"loss": 1.1301,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.7629114262777629,
|
|
"grad_norm": 3.4146728515625,
|
|
"learning_rate": 9.715247136158173e-06,
|
|
"loss": 1.1111,
|
|
"step": 2851
|
|
},
|
|
{
|
|
"epoch": 0.7631790206047632,
|
|
"grad_norm": 3.5250372886657715,
|
|
"learning_rate": 9.714952679464324e-06,
|
|
"loss": 1.2267,
|
|
"step": 2852
|
|
},
|
|
{
|
|
"epoch": 0.7634466149317635,
|
|
"grad_norm": 3.194732427597046,
|
|
"learning_rate": 9.714658075070518e-06,
|
|
"loss": 1.0405,
|
|
"step": 2853
|
|
},
|
|
{
|
|
"epoch": 0.7637142092587638,
|
|
"grad_norm": 3.0676612854003906,
|
|
"learning_rate": 9.714363322985984e-06,
|
|
"loss": 0.9979,
|
|
"step": 2854
|
|
},
|
|
{
|
|
"epoch": 0.763981803585764,
|
|
"grad_norm": 3.3650014400482178,
|
|
"learning_rate": 9.714068423219958e-06,
|
|
"loss": 1.0496,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 0.7642493979127643,
|
|
"grad_norm": 3.422921657562256,
|
|
"learning_rate": 9.713773375781672e-06,
|
|
"loss": 1.1274,
|
|
"step": 2856
|
|
},
|
|
{
|
|
"epoch": 0.7645169922397645,
|
|
"grad_norm": 3.542006492614746,
|
|
"learning_rate": 9.713478180680375e-06,
|
|
"loss": 1.149,
|
|
"step": 2857
|
|
},
|
|
{
|
|
"epoch": 0.7647845865667647,
|
|
"grad_norm": 3.3169963359832764,
|
|
"learning_rate": 9.71318283792531e-06,
|
|
"loss": 1.0904,
|
|
"step": 2858
|
|
},
|
|
{
|
|
"epoch": 0.765052180893765,
|
|
"grad_norm": 3.1313977241516113,
|
|
"learning_rate": 9.71288734752573e-06,
|
|
"loss": 1.0458,
|
|
"step": 2859
|
|
},
|
|
{
|
|
"epoch": 0.7653197752207653,
|
|
"grad_norm": 3.309631586074829,
|
|
"learning_rate": 9.712591709490891e-06,
|
|
"loss": 1.1167,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.7655873695477656,
|
|
"grad_norm": 3.1519172191619873,
|
|
"learning_rate": 9.712295923830057e-06,
|
|
"loss": 1.0628,
|
|
"step": 2861
|
|
},
|
|
{
|
|
"epoch": 0.7658549638747658,
|
|
"grad_norm": 3.4607601165771484,
|
|
"learning_rate": 9.71199999055249e-06,
|
|
"loss": 1.157,
|
|
"step": 2862
|
|
},
|
|
{
|
|
"epoch": 0.7661225582017661,
|
|
"grad_norm": 3.342031478881836,
|
|
"learning_rate": 9.711703909667461e-06,
|
|
"loss": 1.1209,
|
|
"step": 2863
|
|
},
|
|
{
|
|
"epoch": 0.7663901525287664,
|
|
"grad_norm": 3.9641542434692383,
|
|
"learning_rate": 9.711407681184248e-06,
|
|
"loss": 1.1324,
|
|
"step": 2864
|
|
},
|
|
{
|
|
"epoch": 0.7666577468557667,
|
|
"grad_norm": 3.4209370613098145,
|
|
"learning_rate": 9.711111305112126e-06,
|
|
"loss": 1.0754,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 0.7669253411827669,
|
|
"grad_norm": 3.292510509490967,
|
|
"learning_rate": 9.710814781460383e-06,
|
|
"loss": 1.0765,
|
|
"step": 2866
|
|
},
|
|
{
|
|
"epoch": 0.7671929355097672,
|
|
"grad_norm": 4.118853569030762,
|
|
"learning_rate": 9.710518110238308e-06,
|
|
"loss": 1.2039,
|
|
"step": 2867
|
|
},
|
|
{
|
|
"epoch": 0.7674605298367675,
|
|
"grad_norm": 3.280724287033081,
|
|
"learning_rate": 9.71022129145519e-06,
|
|
"loss": 0.9847,
|
|
"step": 2868
|
|
},
|
|
{
|
|
"epoch": 0.7677281241637677,
|
|
"grad_norm": 3.1965861320495605,
|
|
"learning_rate": 9.709924325120333e-06,
|
|
"loss": 0.9819,
|
|
"step": 2869
|
|
},
|
|
{
|
|
"epoch": 0.767995718490768,
|
|
"grad_norm": 3.7593677043914795,
|
|
"learning_rate": 9.709627211243036e-06,
|
|
"loss": 1.259,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.7682633128177683,
|
|
"grad_norm": 3.556138277053833,
|
|
"learning_rate": 9.709329949832606e-06,
|
|
"loss": 1.0214,
|
|
"step": 2871
|
|
},
|
|
{
|
|
"epoch": 0.7685309071447686,
|
|
"grad_norm": 3.3062312602996826,
|
|
"learning_rate": 9.709032540898356e-06,
|
|
"loss": 1.1449,
|
|
"step": 2872
|
|
},
|
|
{
|
|
"epoch": 0.7687985014717688,
|
|
"grad_norm": 3.483119249343872,
|
|
"learning_rate": 9.708734984449605e-06,
|
|
"loss": 1.2922,
|
|
"step": 2873
|
|
},
|
|
{
|
|
"epoch": 0.7690660957987691,
|
|
"grad_norm": 3.3715760707855225,
|
|
"learning_rate": 9.70843728049567e-06,
|
|
"loss": 1.0213,
|
|
"step": 2874
|
|
},
|
|
{
|
|
"epoch": 0.7693336901257694,
|
|
"grad_norm": 3.3638691902160645,
|
|
"learning_rate": 9.70813942904588e-06,
|
|
"loss": 1.1002,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 0.7696012844527697,
|
|
"grad_norm": 3.502279281616211,
|
|
"learning_rate": 9.707841430109564e-06,
|
|
"loss": 1.1725,
|
|
"step": 2876
|
|
},
|
|
{
|
|
"epoch": 0.7698688787797698,
|
|
"grad_norm": 3.4064905643463135,
|
|
"learning_rate": 9.707543283696056e-06,
|
|
"loss": 1.1057,
|
|
"step": 2877
|
|
},
|
|
{
|
|
"epoch": 0.7701364731067701,
|
|
"grad_norm": 3.243762969970703,
|
|
"learning_rate": 9.707244989814699e-06,
|
|
"loss": 1.039,
|
|
"step": 2878
|
|
},
|
|
{
|
|
"epoch": 0.7704040674337704,
|
|
"grad_norm": 3.5731518268585205,
|
|
"learning_rate": 9.706946548474836e-06,
|
|
"loss": 1.0588,
|
|
"step": 2879
|
|
},
|
|
{
|
|
"epoch": 0.7706716617607706,
|
|
"grad_norm": 3.416506052017212,
|
|
"learning_rate": 9.706647959685813e-06,
|
|
"loss": 1.0165,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.7709392560877709,
|
|
"grad_norm": 3.192201852798462,
|
|
"learning_rate": 9.706349223456988e-06,
|
|
"loss": 1.0624,
|
|
"step": 2881
|
|
},
|
|
{
|
|
"epoch": 0.7712068504147712,
|
|
"grad_norm": 3.571995735168457,
|
|
"learning_rate": 9.706050339797714e-06,
|
|
"loss": 1.1391,
|
|
"step": 2882
|
|
},
|
|
{
|
|
"epoch": 0.7714744447417715,
|
|
"grad_norm": 3.073079824447632,
|
|
"learning_rate": 9.70575130871736e-06,
|
|
"loss": 1.0965,
|
|
"step": 2883
|
|
},
|
|
{
|
|
"epoch": 0.7717420390687717,
|
|
"grad_norm": 3.43789005279541,
|
|
"learning_rate": 9.705452130225287e-06,
|
|
"loss": 1.0569,
|
|
"step": 2884
|
|
},
|
|
{
|
|
"epoch": 0.772009633395772,
|
|
"grad_norm": 3.334461212158203,
|
|
"learning_rate": 9.705152804330872e-06,
|
|
"loss": 1.053,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 0.7722772277227723,
|
|
"grad_norm": 3.743177890777588,
|
|
"learning_rate": 9.70485333104349e-06,
|
|
"loss": 1.2276,
|
|
"step": 2886
|
|
},
|
|
{
|
|
"epoch": 0.7725448220497726,
|
|
"grad_norm": 3.443610191345215,
|
|
"learning_rate": 9.704553710372524e-06,
|
|
"loss": 1.0712,
|
|
"step": 2887
|
|
},
|
|
{
|
|
"epoch": 0.7728124163767728,
|
|
"grad_norm": 3.481642246246338,
|
|
"learning_rate": 9.704253942327357e-06,
|
|
"loss": 1.1042,
|
|
"step": 2888
|
|
},
|
|
{
|
|
"epoch": 0.7730800107037731,
|
|
"grad_norm": 3.3586671352386475,
|
|
"learning_rate": 9.703954026917379e-06,
|
|
"loss": 1.2262,
|
|
"step": 2889
|
|
},
|
|
{
|
|
"epoch": 0.7733476050307734,
|
|
"grad_norm": 3.1895251274108887,
|
|
"learning_rate": 9.703653964151986e-06,
|
|
"loss": 0.9845,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.7736151993577736,
|
|
"grad_norm": 3.624223232269287,
|
|
"learning_rate": 9.70335375404058e-06,
|
|
"loss": 1.3042,
|
|
"step": 2891
|
|
},
|
|
{
|
|
"epoch": 0.7738827936847739,
|
|
"grad_norm": 3.3234941959381104,
|
|
"learning_rate": 9.703053396592562e-06,
|
|
"loss": 1.0994,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 0.7741503880117742,
|
|
"grad_norm": 3.3439879417419434,
|
|
"learning_rate": 9.702752891817346e-06,
|
|
"loss": 1.2438,
|
|
"step": 2893
|
|
},
|
|
{
|
|
"epoch": 0.7744179823387745,
|
|
"grad_norm": 3.2826695442199707,
|
|
"learning_rate": 9.70245223972434e-06,
|
|
"loss": 1.0742,
|
|
"step": 2894
|
|
},
|
|
{
|
|
"epoch": 0.7746855766657746,
|
|
"grad_norm": 3.288297414779663,
|
|
"learning_rate": 9.702151440322964e-06,
|
|
"loss": 1.0301,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 0.7749531709927749,
|
|
"grad_norm": 3.380511999130249,
|
|
"learning_rate": 9.701850493622642e-06,
|
|
"loss": 1.1317,
|
|
"step": 2896
|
|
},
|
|
{
|
|
"epoch": 0.7752207653197752,
|
|
"grad_norm": 3.54054856300354,
|
|
"learning_rate": 9.7015493996328e-06,
|
|
"loss": 1.0911,
|
|
"step": 2897
|
|
},
|
|
{
|
|
"epoch": 0.7754883596467755,
|
|
"grad_norm": 3.4729793071746826,
|
|
"learning_rate": 9.701248158362871e-06,
|
|
"loss": 1.1824,
|
|
"step": 2898
|
|
},
|
|
{
|
|
"epoch": 0.7757559539737757,
|
|
"grad_norm": 3.3883352279663086,
|
|
"learning_rate": 9.700946769822292e-06,
|
|
"loss": 1.1696,
|
|
"step": 2899
|
|
},
|
|
{
|
|
"epoch": 0.776023548300776,
|
|
"grad_norm": 3.5626111030578613,
|
|
"learning_rate": 9.700645234020502e-06,
|
|
"loss": 1.2284,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.7762911426277763,
|
|
"grad_norm": 3.4596970081329346,
|
|
"learning_rate": 9.70034355096695e-06,
|
|
"loss": 1.0903,
|
|
"step": 2901
|
|
},
|
|
{
|
|
"epoch": 0.7765587369547765,
|
|
"grad_norm": 3.7619760036468506,
|
|
"learning_rate": 9.700041720671082e-06,
|
|
"loss": 1.2232,
|
|
"step": 2902
|
|
},
|
|
{
|
|
"epoch": 0.7768263312817768,
|
|
"grad_norm": 3.1871516704559326,
|
|
"learning_rate": 9.69973974314236e-06,
|
|
"loss": 1.0154,
|
|
"step": 2903
|
|
},
|
|
{
|
|
"epoch": 0.7770939256087771,
|
|
"grad_norm": 3.5451059341430664,
|
|
"learning_rate": 9.699437618390237e-06,
|
|
"loss": 1.0359,
|
|
"step": 2904
|
|
},
|
|
{
|
|
"epoch": 0.7773615199357774,
|
|
"grad_norm": 3.7833518981933594,
|
|
"learning_rate": 9.69913534642418e-06,
|
|
"loss": 1.1205,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 0.7776291142627776,
|
|
"grad_norm": 3.3426740169525146,
|
|
"learning_rate": 9.69883292725366e-06,
|
|
"loss": 1.146,
|
|
"step": 2906
|
|
},
|
|
{
|
|
"epoch": 0.7778967085897779,
|
|
"grad_norm": 3.564518690109253,
|
|
"learning_rate": 9.698530360888146e-06,
|
|
"loss": 1.1515,
|
|
"step": 2907
|
|
},
|
|
{
|
|
"epoch": 0.7781643029167782,
|
|
"grad_norm": 3.3578410148620605,
|
|
"learning_rate": 9.69822764733712e-06,
|
|
"loss": 1.2046,
|
|
"step": 2908
|
|
},
|
|
{
|
|
"epoch": 0.7784318972437785,
|
|
"grad_norm": 3.161803722381592,
|
|
"learning_rate": 9.697924786610063e-06,
|
|
"loss": 1.1811,
|
|
"step": 2909
|
|
},
|
|
{
|
|
"epoch": 0.7786994915707787,
|
|
"grad_norm": 3.1119868755340576,
|
|
"learning_rate": 9.697621778716465e-06,
|
|
"loss": 1.0896,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.778967085897779,
|
|
"grad_norm": 3.2111477851867676,
|
|
"learning_rate": 9.697318623665813e-06,
|
|
"loss": 1.0613,
|
|
"step": 2911
|
|
},
|
|
{
|
|
"epoch": 0.7792346802247793,
|
|
"grad_norm": 3.4069631099700928,
|
|
"learning_rate": 9.697015321467606e-06,
|
|
"loss": 1.0905,
|
|
"step": 2912
|
|
},
|
|
{
|
|
"epoch": 0.7795022745517795,
|
|
"grad_norm": 3.5640361309051514,
|
|
"learning_rate": 9.696711872131347e-06,
|
|
"loss": 1.2176,
|
|
"step": 2913
|
|
},
|
|
{
|
|
"epoch": 0.7797698688787797,
|
|
"grad_norm": 3.4428586959838867,
|
|
"learning_rate": 9.69640827566654e-06,
|
|
"loss": 1.1433,
|
|
"step": 2914
|
|
},
|
|
{
|
|
"epoch": 0.78003746320578,
|
|
"grad_norm": 3.6529276371002197,
|
|
"learning_rate": 9.696104532082695e-06,
|
|
"loss": 1.2443,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 0.7803050575327803,
|
|
"grad_norm": 3.5144267082214355,
|
|
"learning_rate": 9.695800641389327e-06,
|
|
"loss": 1.1708,
|
|
"step": 2916
|
|
},
|
|
{
|
|
"epoch": 0.7805726518597805,
|
|
"grad_norm": 3.2962844371795654,
|
|
"learning_rate": 9.695496603595959e-06,
|
|
"loss": 1.0612,
|
|
"step": 2917
|
|
},
|
|
{
|
|
"epoch": 0.7808402461867808,
|
|
"grad_norm": 3.4255483150482178,
|
|
"learning_rate": 9.695192418712111e-06,
|
|
"loss": 1.1376,
|
|
"step": 2918
|
|
},
|
|
{
|
|
"epoch": 0.7811078405137811,
|
|
"grad_norm": 3.3936362266540527,
|
|
"learning_rate": 9.694888086747315e-06,
|
|
"loss": 1.0977,
|
|
"step": 2919
|
|
},
|
|
{
|
|
"epoch": 0.7813754348407814,
|
|
"grad_norm": 3.835702896118164,
|
|
"learning_rate": 9.694583607711102e-06,
|
|
"loss": 1.2083,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.7816430291677816,
|
|
"grad_norm": 3.329684257507324,
|
|
"learning_rate": 9.69427898161301e-06,
|
|
"loss": 1.1792,
|
|
"step": 2921
|
|
},
|
|
{
|
|
"epoch": 0.7819106234947819,
|
|
"grad_norm": 3.5898046493530273,
|
|
"learning_rate": 9.693974208462585e-06,
|
|
"loss": 1.1128,
|
|
"step": 2922
|
|
},
|
|
{
|
|
"epoch": 0.7821782178217822,
|
|
"grad_norm": 3.2505688667297363,
|
|
"learning_rate": 9.693669288269371e-06,
|
|
"loss": 1.1022,
|
|
"step": 2923
|
|
},
|
|
{
|
|
"epoch": 0.7824458121487824,
|
|
"grad_norm": 3.629041910171509,
|
|
"learning_rate": 9.693364221042922e-06,
|
|
"loss": 1.0837,
|
|
"step": 2924
|
|
},
|
|
{
|
|
"epoch": 0.7827134064757827,
|
|
"grad_norm": 3.4234085083007812,
|
|
"learning_rate": 9.69305900679279e-06,
|
|
"loss": 1.2019,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 0.782981000802783,
|
|
"grad_norm": 3.353302001953125,
|
|
"learning_rate": 9.692753645528544e-06,
|
|
"loss": 1.133,
|
|
"step": 2926
|
|
},
|
|
{
|
|
"epoch": 0.7832485951297833,
|
|
"grad_norm": 3.490877866744995,
|
|
"learning_rate": 9.692448137259743e-06,
|
|
"loss": 1.2875,
|
|
"step": 2927
|
|
},
|
|
{
|
|
"epoch": 0.7835161894567835,
|
|
"grad_norm": 3.5161383152008057,
|
|
"learning_rate": 9.692142481995958e-06,
|
|
"loss": 1.1722,
|
|
"step": 2928
|
|
},
|
|
{
|
|
"epoch": 0.7837837837837838,
|
|
"grad_norm": 3.030968189239502,
|
|
"learning_rate": 9.691836679746767e-06,
|
|
"loss": 1.0362,
|
|
"step": 2929
|
|
},
|
|
{
|
|
"epoch": 0.7840513781107841,
|
|
"grad_norm": 3.819481611251831,
|
|
"learning_rate": 9.691530730521748e-06,
|
|
"loss": 1.2682,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.7843189724377844,
|
|
"grad_norm": 3.640918731689453,
|
|
"learning_rate": 9.691224634330484e-06,
|
|
"loss": 1.1868,
|
|
"step": 2931
|
|
},
|
|
{
|
|
"epoch": 0.7845865667647846,
|
|
"grad_norm": 3.7881932258605957,
|
|
"learning_rate": 9.690918391182568e-06,
|
|
"loss": 1.2436,
|
|
"step": 2932
|
|
},
|
|
{
|
|
"epoch": 0.7848541610917849,
|
|
"grad_norm": 3.8479201793670654,
|
|
"learning_rate": 9.690612001087586e-06,
|
|
"loss": 1.0979,
|
|
"step": 2933
|
|
},
|
|
{
|
|
"epoch": 0.7851217554187851,
|
|
"grad_norm": 3.2943499088287354,
|
|
"learning_rate": 9.690305464055143e-06,
|
|
"loss": 1.1036,
|
|
"step": 2934
|
|
},
|
|
{
|
|
"epoch": 0.7853893497457853,
|
|
"grad_norm": 3.42976713180542,
|
|
"learning_rate": 9.689998780094839e-06,
|
|
"loss": 1.1348,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 0.7856569440727856,
|
|
"grad_norm": 3.6888561248779297,
|
|
"learning_rate": 9.689691949216278e-06,
|
|
"loss": 1.1974,
|
|
"step": 2936
|
|
},
|
|
{
|
|
"epoch": 0.7859245383997859,
|
|
"grad_norm": 3.266007900238037,
|
|
"learning_rate": 9.689384971429077e-06,
|
|
"loss": 1.1575,
|
|
"step": 2937
|
|
},
|
|
{
|
|
"epoch": 0.7861921327267862,
|
|
"grad_norm": 3.421496629714966,
|
|
"learning_rate": 9.689077846742847e-06,
|
|
"loss": 1.1723,
|
|
"step": 2938
|
|
},
|
|
{
|
|
"epoch": 0.7864597270537864,
|
|
"grad_norm": 3.432095766067505,
|
|
"learning_rate": 9.688770575167215e-06,
|
|
"loss": 1.0722,
|
|
"step": 2939
|
|
},
|
|
{
|
|
"epoch": 0.7867273213807867,
|
|
"grad_norm": 3.453275680541992,
|
|
"learning_rate": 9.688463156711801e-06,
|
|
"loss": 1.145,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.786994915707787,
|
|
"grad_norm": 3.332948923110962,
|
|
"learning_rate": 9.688155591386239e-06,
|
|
"loss": 1.0426,
|
|
"step": 2941
|
|
},
|
|
{
|
|
"epoch": 0.7872625100347873,
|
|
"grad_norm": 3.5865981578826904,
|
|
"learning_rate": 9.687847879200161e-06,
|
|
"loss": 1.2441,
|
|
"step": 2942
|
|
},
|
|
{
|
|
"epoch": 0.7875301043617875,
|
|
"grad_norm": 3.633302688598633,
|
|
"learning_rate": 9.687540020163209e-06,
|
|
"loss": 1.2145,
|
|
"step": 2943
|
|
},
|
|
{
|
|
"epoch": 0.7877976986887878,
|
|
"grad_norm": 3.5107271671295166,
|
|
"learning_rate": 9.687232014285025e-06,
|
|
"loss": 1.1149,
|
|
"step": 2944
|
|
},
|
|
{
|
|
"epoch": 0.7880652930157881,
|
|
"grad_norm": 3.2951745986938477,
|
|
"learning_rate": 9.686923861575258e-06,
|
|
"loss": 1.0549,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 0.7883328873427883,
|
|
"grad_norm": 3.1973979473114014,
|
|
"learning_rate": 9.68661556204356e-06,
|
|
"loss": 1.1486,
|
|
"step": 2946
|
|
},
|
|
{
|
|
"epoch": 0.7886004816697886,
|
|
"grad_norm": 3.6182639598846436,
|
|
"learning_rate": 9.68630711569959e-06,
|
|
"loss": 1.1141,
|
|
"step": 2947
|
|
},
|
|
{
|
|
"epoch": 0.7888680759967889,
|
|
"grad_norm": 3.3912758827209473,
|
|
"learning_rate": 9.685998522553012e-06,
|
|
"loss": 1.1695,
|
|
"step": 2948
|
|
},
|
|
{
|
|
"epoch": 0.7891356703237892,
|
|
"grad_norm": 3.658475637435913,
|
|
"learning_rate": 9.68568978261349e-06,
|
|
"loss": 1.216,
|
|
"step": 2949
|
|
},
|
|
{
|
|
"epoch": 0.7894032646507894,
|
|
"grad_norm": 3.5192058086395264,
|
|
"learning_rate": 9.685380895890698e-06,
|
|
"loss": 1.1805,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.7896708589777897,
|
|
"grad_norm": 3.3814120292663574,
|
|
"learning_rate": 9.68507186239431e-06,
|
|
"loss": 1.1627,
|
|
"step": 2951
|
|
},
|
|
{
|
|
"epoch": 0.78993845330479,
|
|
"grad_norm": 3.405315399169922,
|
|
"learning_rate": 9.684762682134008e-06,
|
|
"loss": 1.2152,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 0.7902060476317903,
|
|
"grad_norm": 3.6160085201263428,
|
|
"learning_rate": 9.684453355119476e-06,
|
|
"loss": 1.2863,
|
|
"step": 2953
|
|
},
|
|
{
|
|
"epoch": 0.7904736419587904,
|
|
"grad_norm": 3.364459991455078,
|
|
"learning_rate": 9.684143881360406e-06,
|
|
"loss": 0.9876,
|
|
"step": 2954
|
|
},
|
|
{
|
|
"epoch": 0.7907412362857907,
|
|
"grad_norm": 3.066523551940918,
|
|
"learning_rate": 9.683834260866492e-06,
|
|
"loss": 0.9858,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 0.791008830612791,
|
|
"grad_norm": 3.4516170024871826,
|
|
"learning_rate": 9.68352449364743e-06,
|
|
"loss": 1.3306,
|
|
"step": 2956
|
|
},
|
|
{
|
|
"epoch": 0.7912764249397912,
|
|
"grad_norm": 3.5677990913391113,
|
|
"learning_rate": 9.68321457971293e-06,
|
|
"loss": 1.1928,
|
|
"step": 2957
|
|
},
|
|
{
|
|
"epoch": 0.7915440192667915,
|
|
"grad_norm": 3.59769868850708,
|
|
"learning_rate": 9.682904519072696e-06,
|
|
"loss": 1.2216,
|
|
"step": 2958
|
|
},
|
|
{
|
|
"epoch": 0.7918116135937918,
|
|
"grad_norm": 3.0195512771606445,
|
|
"learning_rate": 9.682594311736439e-06,
|
|
"loss": 0.9886,
|
|
"step": 2959
|
|
},
|
|
{
|
|
"epoch": 0.7920792079207921,
|
|
"grad_norm": 3.224322557449341,
|
|
"learning_rate": 9.68228395771388e-06,
|
|
"loss": 1.0185,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.7923468022477923,
|
|
"grad_norm": 3.414687395095825,
|
|
"learning_rate": 9.681973457014742e-06,
|
|
"loss": 1.2,
|
|
"step": 2961
|
|
},
|
|
{
|
|
"epoch": 0.7926143965747926,
|
|
"grad_norm": 3.225135564804077,
|
|
"learning_rate": 9.681662809648749e-06,
|
|
"loss": 1.1227,
|
|
"step": 2962
|
|
},
|
|
{
|
|
"epoch": 0.7928819909017929,
|
|
"grad_norm": 3.3739073276519775,
|
|
"learning_rate": 9.681352015625634e-06,
|
|
"loss": 1.1265,
|
|
"step": 2963
|
|
},
|
|
{
|
|
"epoch": 0.7931495852287932,
|
|
"grad_norm": 3.418264389038086,
|
|
"learning_rate": 9.681041074955131e-06,
|
|
"loss": 1.2126,
|
|
"step": 2964
|
|
},
|
|
{
|
|
"epoch": 0.7934171795557934,
|
|
"grad_norm": 3.712611198425293,
|
|
"learning_rate": 9.68072998764698e-06,
|
|
"loss": 1.2432,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 0.7936847738827937,
|
|
"grad_norm": 3.4805774688720703,
|
|
"learning_rate": 9.68041875371093e-06,
|
|
"loss": 1.2368,
|
|
"step": 2966
|
|
},
|
|
{
|
|
"epoch": 0.793952368209794,
|
|
"grad_norm": 3.31071400642395,
|
|
"learning_rate": 9.68010737315673e-06,
|
|
"loss": 1.1119,
|
|
"step": 2967
|
|
},
|
|
{
|
|
"epoch": 0.7942199625367942,
|
|
"grad_norm": 3.2610623836517334,
|
|
"learning_rate": 9.679795845994129e-06,
|
|
"loss": 1.0559,
|
|
"step": 2968
|
|
},
|
|
{
|
|
"epoch": 0.7944875568637945,
|
|
"grad_norm": 3.272242546081543,
|
|
"learning_rate": 9.67948417223289e-06,
|
|
"loss": 1.1692,
|
|
"step": 2969
|
|
},
|
|
{
|
|
"epoch": 0.7947551511907948,
|
|
"grad_norm": 3.6862285137176514,
|
|
"learning_rate": 9.679172351882778e-06,
|
|
"loss": 1.2637,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.7950227455177951,
|
|
"grad_norm": 3.0989432334899902,
|
|
"learning_rate": 9.678860384953558e-06,
|
|
"loss": 1.0365,
|
|
"step": 2971
|
|
},
|
|
{
|
|
"epoch": 0.7952903398447952,
|
|
"grad_norm": 3.5036988258361816,
|
|
"learning_rate": 9.678548271455002e-06,
|
|
"loss": 1.1943,
|
|
"step": 2972
|
|
},
|
|
{
|
|
"epoch": 0.7955579341717955,
|
|
"grad_norm": 3.549891233444214,
|
|
"learning_rate": 9.67823601139689e-06,
|
|
"loss": 1.2642,
|
|
"step": 2973
|
|
},
|
|
{
|
|
"epoch": 0.7958255284987958,
|
|
"grad_norm": 2.958547353744507,
|
|
"learning_rate": 9.677923604789002e-06,
|
|
"loss": 1.0623,
|
|
"step": 2974
|
|
},
|
|
{
|
|
"epoch": 0.7960931228257961,
|
|
"grad_norm": 3.7506515979766846,
|
|
"learning_rate": 9.677611051641126e-06,
|
|
"loss": 1.327,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 0.7963607171527963,
|
|
"grad_norm": 3.485591173171997,
|
|
"learning_rate": 9.677298351963051e-06,
|
|
"loss": 1.1078,
|
|
"step": 2976
|
|
},
|
|
{
|
|
"epoch": 0.7966283114797966,
|
|
"grad_norm": 3.605431079864502,
|
|
"learning_rate": 9.676985505764575e-06,
|
|
"loss": 1.1839,
|
|
"step": 2977
|
|
},
|
|
{
|
|
"epoch": 0.7968959058067969,
|
|
"grad_norm": 3.253654956817627,
|
|
"learning_rate": 9.676672513055496e-06,
|
|
"loss": 1.0312,
|
|
"step": 2978
|
|
},
|
|
{
|
|
"epoch": 0.7971635001337971,
|
|
"grad_norm": 3.57499361038208,
|
|
"learning_rate": 9.67635937384562e-06,
|
|
"loss": 1.114,
|
|
"step": 2979
|
|
},
|
|
{
|
|
"epoch": 0.7974310944607974,
|
|
"grad_norm": 3.2494728565216064,
|
|
"learning_rate": 9.676046088144755e-06,
|
|
"loss": 1.0676,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.7976986887877977,
|
|
"grad_norm": 3.5611202716827393,
|
|
"learning_rate": 9.675732655962716e-06,
|
|
"loss": 1.1961,
|
|
"step": 2981
|
|
},
|
|
{
|
|
"epoch": 0.797966283114798,
|
|
"grad_norm": 3.6243984699249268,
|
|
"learning_rate": 9.675419077309323e-06,
|
|
"loss": 1.1931,
|
|
"step": 2982
|
|
},
|
|
{
|
|
"epoch": 0.7982338774417982,
|
|
"grad_norm": 3.6850814819335938,
|
|
"learning_rate": 9.675105352194396e-06,
|
|
"loss": 1.0914,
|
|
"step": 2983
|
|
},
|
|
{
|
|
"epoch": 0.7985014717687985,
|
|
"grad_norm": 3.424598455429077,
|
|
"learning_rate": 9.674791480627763e-06,
|
|
"loss": 1.2153,
|
|
"step": 2984
|
|
},
|
|
{
|
|
"epoch": 0.7987690660957988,
|
|
"grad_norm": 3.0985870361328125,
|
|
"learning_rate": 9.67447746261926e-06,
|
|
"loss": 0.9851,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 0.7990366604227991,
|
|
"grad_norm": 3.504242181777954,
|
|
"learning_rate": 9.67416329817872e-06,
|
|
"loss": 1.1605,
|
|
"step": 2986
|
|
},
|
|
{
|
|
"epoch": 0.7993042547497993,
|
|
"grad_norm": 3.373812198638916,
|
|
"learning_rate": 9.673848987315986e-06,
|
|
"loss": 1.2192,
|
|
"step": 2987
|
|
},
|
|
{
|
|
"epoch": 0.7995718490767996,
|
|
"grad_norm": 3.4131312370300293,
|
|
"learning_rate": 9.673534530040905e-06,
|
|
"loss": 1.0798,
|
|
"step": 2988
|
|
},
|
|
{
|
|
"epoch": 0.7998394434037999,
|
|
"grad_norm": 3.4545650482177734,
|
|
"learning_rate": 9.673219926363325e-06,
|
|
"loss": 1.078,
|
|
"step": 2989
|
|
},
|
|
{
|
|
"epoch": 0.8001070377308,
|
|
"grad_norm": 3.47906231880188,
|
|
"learning_rate": 9.672905176293103e-06,
|
|
"loss": 1.0452,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.8003746320578004,
|
|
"grad_norm": 3.450021505355835,
|
|
"learning_rate": 9.6725902798401e-06,
|
|
"loss": 1.1115,
|
|
"step": 2991
|
|
},
|
|
{
|
|
"epoch": 0.8006422263848006,
|
|
"grad_norm": 3.7371201515197754,
|
|
"learning_rate": 9.672275237014178e-06,
|
|
"loss": 1.2083,
|
|
"step": 2992
|
|
},
|
|
{
|
|
"epoch": 0.800909820711801,
|
|
"grad_norm": 3.5337791442871094,
|
|
"learning_rate": 9.671960047825207e-06,
|
|
"loss": 1.1462,
|
|
"step": 2993
|
|
},
|
|
{
|
|
"epoch": 0.8011774150388011,
|
|
"grad_norm": 3.226942539215088,
|
|
"learning_rate": 9.671644712283061e-06,
|
|
"loss": 1.1274,
|
|
"step": 2994
|
|
},
|
|
{
|
|
"epoch": 0.8014450093658014,
|
|
"grad_norm": 3.127251625061035,
|
|
"learning_rate": 9.671329230397616e-06,
|
|
"loss": 1.0761,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 0.8017126036928017,
|
|
"grad_norm": 3.322313070297241,
|
|
"learning_rate": 9.67101360217876e-06,
|
|
"loss": 1.0466,
|
|
"step": 2996
|
|
},
|
|
{
|
|
"epoch": 0.801980198019802,
|
|
"grad_norm": 3.0869202613830566,
|
|
"learning_rate": 9.670697827636374e-06,
|
|
"loss": 1.0175,
|
|
"step": 2997
|
|
},
|
|
{
|
|
"epoch": 0.8022477923468022,
|
|
"grad_norm": 3.7030136585235596,
|
|
"learning_rate": 9.670381906780354e-06,
|
|
"loss": 1.2653,
|
|
"step": 2998
|
|
},
|
|
{
|
|
"epoch": 0.8025153866738025,
|
|
"grad_norm": 3.163114070892334,
|
|
"learning_rate": 9.670065839620594e-06,
|
|
"loss": 1.0952,
|
|
"step": 2999
|
|
},
|
|
{
|
|
"epoch": 0.8027829810008028,
|
|
"grad_norm": 3.4521522521972656,
|
|
"learning_rate": 9.669749626166998e-06,
|
|
"loss": 1.1834,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8027829810008028,
|
|
"eval_loss": 1.1542552709579468,
|
|
"eval_runtime": 11.69,
|
|
"eval_samples_per_second": 34.217,
|
|
"eval_steps_per_second": 4.277,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.803050575327803,
|
|
"grad_norm": 3.3275747299194336,
|
|
"learning_rate": 9.669433266429468e-06,
|
|
"loss": 1.127,
|
|
"step": 3001
|
|
},
|
|
{
|
|
"epoch": 0.8033181696548033,
|
|
"grad_norm": 3.7543275356292725,
|
|
"learning_rate": 9.669116760417919e-06,
|
|
"loss": 1.2466,
|
|
"step": 3002
|
|
},
|
|
{
|
|
"epoch": 0.8035857639818036,
|
|
"grad_norm": 3.295300006866455,
|
|
"learning_rate": 9.66880010814226e-06,
|
|
"loss": 1.0984,
|
|
"step": 3003
|
|
},
|
|
{
|
|
"epoch": 0.8038533583088039,
|
|
"grad_norm": 3.4165024757385254,
|
|
"learning_rate": 9.668483309612415e-06,
|
|
"loss": 1.0271,
|
|
"step": 3004
|
|
},
|
|
{
|
|
"epoch": 0.8041209526358041,
|
|
"grad_norm": 3.307145357131958,
|
|
"learning_rate": 9.668166364838306e-06,
|
|
"loss": 1.1855,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 0.8043885469628044,
|
|
"grad_norm": 3.409726858139038,
|
|
"learning_rate": 9.667849273829861e-06,
|
|
"loss": 1.0533,
|
|
"step": 3006
|
|
},
|
|
{
|
|
"epoch": 0.8046561412898047,
|
|
"grad_norm": 3.490656852722168,
|
|
"learning_rate": 9.667532036597017e-06,
|
|
"loss": 1.14,
|
|
"step": 3007
|
|
},
|
|
{
|
|
"epoch": 0.804923735616805,
|
|
"grad_norm": 3.395625352859497,
|
|
"learning_rate": 9.667214653149706e-06,
|
|
"loss": 1.2552,
|
|
"step": 3008
|
|
},
|
|
{
|
|
"epoch": 0.8051913299438052,
|
|
"grad_norm": 3.3872432708740234,
|
|
"learning_rate": 9.666897123497874e-06,
|
|
"loss": 1.1062,
|
|
"step": 3009
|
|
},
|
|
{
|
|
"epoch": 0.8054589242708055,
|
|
"grad_norm": 3.4740712642669678,
|
|
"learning_rate": 9.666579447651467e-06,
|
|
"loss": 1.1967,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.8057265185978058,
|
|
"grad_norm": 3.5477073192596436,
|
|
"learning_rate": 9.666261625620437e-06,
|
|
"loss": 1.0399,
|
|
"step": 3011
|
|
},
|
|
{
|
|
"epoch": 0.8059941129248059,
|
|
"grad_norm": 3.2551109790802,
|
|
"learning_rate": 9.665943657414738e-06,
|
|
"loss": 1.1364,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 0.8062617072518062,
|
|
"grad_norm": 3.1551992893218994,
|
|
"learning_rate": 9.665625543044335e-06,
|
|
"loss": 1.1235,
|
|
"step": 3013
|
|
},
|
|
{
|
|
"epoch": 0.8065293015788065,
|
|
"grad_norm": 3.2002670764923096,
|
|
"learning_rate": 9.66530728251919e-06,
|
|
"loss": 1.06,
|
|
"step": 3014
|
|
},
|
|
{
|
|
"epoch": 0.8067968959058068,
|
|
"grad_norm": 3.1332433223724365,
|
|
"learning_rate": 9.664988875849271e-06,
|
|
"loss": 1.0827,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 0.807064490232807,
|
|
"grad_norm": 3.4181861877441406,
|
|
"learning_rate": 9.664670323044555e-06,
|
|
"loss": 1.108,
|
|
"step": 3016
|
|
},
|
|
{
|
|
"epoch": 0.8073320845598073,
|
|
"grad_norm": 3.91221284866333,
|
|
"learning_rate": 9.66435162411502e-06,
|
|
"loss": 1.0166,
|
|
"step": 3017
|
|
},
|
|
{
|
|
"epoch": 0.8075996788868076,
|
|
"grad_norm": 3.2280433177948,
|
|
"learning_rate": 9.664032779070652e-06,
|
|
"loss": 1.1096,
|
|
"step": 3018
|
|
},
|
|
{
|
|
"epoch": 0.8078672732138079,
|
|
"grad_norm": 3.229264259338379,
|
|
"learning_rate": 9.663713787921436e-06,
|
|
"loss": 1.0637,
|
|
"step": 3019
|
|
},
|
|
{
|
|
"epoch": 0.8081348675408081,
|
|
"grad_norm": 3.3362436294555664,
|
|
"learning_rate": 9.663394650677368e-06,
|
|
"loss": 1.0432,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.8084024618678084,
|
|
"grad_norm": 3.3346054553985596,
|
|
"learning_rate": 9.66307536734844e-06,
|
|
"loss": 1.164,
|
|
"step": 3021
|
|
},
|
|
{
|
|
"epoch": 0.8086700561948087,
|
|
"grad_norm": 3.382387399673462,
|
|
"learning_rate": 9.662755937944657e-06,
|
|
"loss": 1.0578,
|
|
"step": 3022
|
|
},
|
|
{
|
|
"epoch": 0.808937650521809,
|
|
"grad_norm": 3.3161141872406006,
|
|
"learning_rate": 9.662436362476026e-06,
|
|
"loss": 0.9829,
|
|
"step": 3023
|
|
},
|
|
{
|
|
"epoch": 0.8092052448488092,
|
|
"grad_norm": 3.457970380783081,
|
|
"learning_rate": 9.662116640952558e-06,
|
|
"loss": 1.2865,
|
|
"step": 3024
|
|
},
|
|
{
|
|
"epoch": 0.8094728391758095,
|
|
"grad_norm": 3.1441056728363037,
|
|
"learning_rate": 9.661796773384266e-06,
|
|
"loss": 1.0722,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 0.8097404335028098,
|
|
"grad_norm": 3.2600796222686768,
|
|
"learning_rate": 9.661476759781174e-06,
|
|
"loss": 1.0949,
|
|
"step": 3026
|
|
},
|
|
{
|
|
"epoch": 0.81000802782981,
|
|
"grad_norm": 3.8801653385162354,
|
|
"learning_rate": 9.661156600153304e-06,
|
|
"loss": 1.2197,
|
|
"step": 3027
|
|
},
|
|
{
|
|
"epoch": 0.8102756221568103,
|
|
"grad_norm": 3.6208014488220215,
|
|
"learning_rate": 9.660836294510685e-06,
|
|
"loss": 1.2421,
|
|
"step": 3028
|
|
},
|
|
{
|
|
"epoch": 0.8105432164838106,
|
|
"grad_norm": 3.5174331665039062,
|
|
"learning_rate": 9.660515842863352e-06,
|
|
"loss": 1.0628,
|
|
"step": 3029
|
|
},
|
|
{
|
|
"epoch": 0.8108108108108109,
|
|
"grad_norm": 3.285752058029175,
|
|
"learning_rate": 9.660195245221345e-06,
|
|
"loss": 1.2271,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.811078405137811,
|
|
"grad_norm": 3.3572685718536377,
|
|
"learning_rate": 9.659874501594705e-06,
|
|
"loss": 1.1451,
|
|
"step": 3031
|
|
},
|
|
{
|
|
"epoch": 0.8113459994648113,
|
|
"grad_norm": 3.5392873287200928,
|
|
"learning_rate": 9.659553611993478e-06,
|
|
"loss": 1.2389,
|
|
"step": 3032
|
|
},
|
|
{
|
|
"epoch": 0.8116135937918116,
|
|
"grad_norm": 3.6059014797210693,
|
|
"learning_rate": 9.659232576427718e-06,
|
|
"loss": 1.2854,
|
|
"step": 3033
|
|
},
|
|
{
|
|
"epoch": 0.8118811881188119,
|
|
"grad_norm": 3.2821319103240967,
|
|
"learning_rate": 9.65891139490748e-06,
|
|
"loss": 1.0982,
|
|
"step": 3034
|
|
},
|
|
{
|
|
"epoch": 0.8121487824458121,
|
|
"grad_norm": 3.2728023529052734,
|
|
"learning_rate": 9.65859006744283e-06,
|
|
"loss": 1.0598,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 0.8124163767728124,
|
|
"grad_norm": 3.7586371898651123,
|
|
"learning_rate": 9.65826859404383e-06,
|
|
"loss": 1.2271,
|
|
"step": 3036
|
|
},
|
|
{
|
|
"epoch": 0.8126839710998127,
|
|
"grad_norm": 3.513029098510742,
|
|
"learning_rate": 9.65794697472055e-06,
|
|
"loss": 1.0671,
|
|
"step": 3037
|
|
},
|
|
{
|
|
"epoch": 0.8129515654268129,
|
|
"grad_norm": 3.1939735412597656,
|
|
"learning_rate": 9.657625209483066e-06,
|
|
"loss": 1.0949,
|
|
"step": 3038
|
|
},
|
|
{
|
|
"epoch": 0.8132191597538132,
|
|
"grad_norm": 3.557431221008301,
|
|
"learning_rate": 9.65730329834146e-06,
|
|
"loss": 1.1211,
|
|
"step": 3039
|
|
},
|
|
{
|
|
"epoch": 0.8134867540808135,
|
|
"grad_norm": 3.6598188877105713,
|
|
"learning_rate": 9.656981241305811e-06,
|
|
"loss": 1.1189,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.8137543484078138,
|
|
"grad_norm": 3.781261444091797,
|
|
"learning_rate": 9.656659038386213e-06,
|
|
"loss": 1.1389,
|
|
"step": 3041
|
|
},
|
|
{
|
|
"epoch": 0.814021942734814,
|
|
"grad_norm": 3.638216733932495,
|
|
"learning_rate": 9.656336689592756e-06,
|
|
"loss": 1.3045,
|
|
"step": 3042
|
|
},
|
|
{
|
|
"epoch": 0.8142895370618143,
|
|
"grad_norm": 3.7151739597320557,
|
|
"learning_rate": 9.65601419493554e-06,
|
|
"loss": 1.3391,
|
|
"step": 3043
|
|
},
|
|
{
|
|
"epoch": 0.8145571313888146,
|
|
"grad_norm": 3.482971429824829,
|
|
"learning_rate": 9.655691554424664e-06,
|
|
"loss": 1.0059,
|
|
"step": 3044
|
|
},
|
|
{
|
|
"epoch": 0.8148247257158149,
|
|
"grad_norm": 3.6908507347106934,
|
|
"learning_rate": 9.655368768070239e-06,
|
|
"loss": 1.33,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 0.8150923200428151,
|
|
"grad_norm": 3.2988486289978027,
|
|
"learning_rate": 9.655045835882373e-06,
|
|
"loss": 1.0606,
|
|
"step": 3046
|
|
},
|
|
{
|
|
"epoch": 0.8153599143698154,
|
|
"grad_norm": 3.3644847869873047,
|
|
"learning_rate": 9.654722757871184e-06,
|
|
"loss": 1.1128,
|
|
"step": 3047
|
|
},
|
|
{
|
|
"epoch": 0.8156275086968157,
|
|
"grad_norm": 3.3999931812286377,
|
|
"learning_rate": 9.654399534046795e-06,
|
|
"loss": 1.1453,
|
|
"step": 3048
|
|
},
|
|
{
|
|
"epoch": 0.8158951030238158,
|
|
"grad_norm": 3.4176931381225586,
|
|
"learning_rate": 9.654076164419326e-06,
|
|
"loss": 1.1062,
|
|
"step": 3049
|
|
},
|
|
{
|
|
"epoch": 0.8161626973508161,
|
|
"grad_norm": 3.199340581893921,
|
|
"learning_rate": 9.65375264899891e-06,
|
|
"loss": 1.1005,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.8164302916778164,
|
|
"grad_norm": 3.3121516704559326,
|
|
"learning_rate": 9.653428987795684e-06,
|
|
"loss": 1.0958,
|
|
"step": 3051
|
|
},
|
|
{
|
|
"epoch": 0.8166978860048167,
|
|
"grad_norm": 3.3458409309387207,
|
|
"learning_rate": 9.65310518081978e-06,
|
|
"loss": 1.1123,
|
|
"step": 3052
|
|
},
|
|
{
|
|
"epoch": 0.8169654803318169,
|
|
"grad_norm": 3.438964366912842,
|
|
"learning_rate": 9.652781228081348e-06,
|
|
"loss": 1.2157,
|
|
"step": 3053
|
|
},
|
|
{
|
|
"epoch": 0.8172330746588172,
|
|
"grad_norm": 3.2331788539886475,
|
|
"learning_rate": 9.652457129590534e-06,
|
|
"loss": 1.0771,
|
|
"step": 3054
|
|
},
|
|
{
|
|
"epoch": 0.8175006689858175,
|
|
"grad_norm": 3.585362434387207,
|
|
"learning_rate": 9.652132885357488e-06,
|
|
"loss": 1.2097,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 0.8177682633128178,
|
|
"grad_norm": 3.1754887104034424,
|
|
"learning_rate": 9.65180849539237e-06,
|
|
"loss": 1.0281,
|
|
"step": 3056
|
|
},
|
|
{
|
|
"epoch": 0.818035857639818,
|
|
"grad_norm": 3.4600307941436768,
|
|
"learning_rate": 9.651483959705344e-06,
|
|
"loss": 1.1359,
|
|
"step": 3057
|
|
},
|
|
{
|
|
"epoch": 0.8183034519668183,
|
|
"grad_norm": 3.3886213302612305,
|
|
"learning_rate": 9.65115927830657e-06,
|
|
"loss": 1.0735,
|
|
"step": 3058
|
|
},
|
|
{
|
|
"epoch": 0.8185710462938186,
|
|
"grad_norm": 3.5005595684051514,
|
|
"learning_rate": 9.650834451206225e-06,
|
|
"loss": 1.0937,
|
|
"step": 3059
|
|
},
|
|
{
|
|
"epoch": 0.8188386406208188,
|
|
"grad_norm": 3.592665672302246,
|
|
"learning_rate": 9.650509478414483e-06,
|
|
"loss": 1.1554,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.8191062349478191,
|
|
"grad_norm": 3.504587173461914,
|
|
"learning_rate": 9.650184359941522e-06,
|
|
"loss": 1.0877,
|
|
"step": 3061
|
|
},
|
|
{
|
|
"epoch": 0.8193738292748194,
|
|
"grad_norm": 3.803943634033203,
|
|
"learning_rate": 9.649859095797526e-06,
|
|
"loss": 1.2895,
|
|
"step": 3062
|
|
},
|
|
{
|
|
"epoch": 0.8196414236018197,
|
|
"grad_norm": 3.929657459259033,
|
|
"learning_rate": 9.649533685992687e-06,
|
|
"loss": 1.2047,
|
|
"step": 3063
|
|
},
|
|
{
|
|
"epoch": 0.8199090179288199,
|
|
"grad_norm": 3.074686288833618,
|
|
"learning_rate": 9.649208130537199e-06,
|
|
"loss": 1.0744,
|
|
"step": 3064
|
|
},
|
|
{
|
|
"epoch": 0.8201766122558202,
|
|
"grad_norm": 3.3522446155548096,
|
|
"learning_rate": 9.648882429441258e-06,
|
|
"loss": 1.1105,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 0.8204442065828205,
|
|
"grad_norm": 3.2287309169769287,
|
|
"learning_rate": 9.648556582715067e-06,
|
|
"loss": 1.1542,
|
|
"step": 3066
|
|
},
|
|
{
|
|
"epoch": 0.8207118009098208,
|
|
"grad_norm": 3.072052478790283,
|
|
"learning_rate": 9.648230590368836e-06,
|
|
"loss": 1.0983,
|
|
"step": 3067
|
|
},
|
|
{
|
|
"epoch": 0.820979395236821,
|
|
"grad_norm": 3.3558244705200195,
|
|
"learning_rate": 9.647904452412774e-06,
|
|
"loss": 1.1362,
|
|
"step": 3068
|
|
},
|
|
{
|
|
"epoch": 0.8212469895638213,
|
|
"grad_norm": 3.917283296585083,
|
|
"learning_rate": 9.647578168857101e-06,
|
|
"loss": 1.1743,
|
|
"step": 3069
|
|
},
|
|
{
|
|
"epoch": 0.8215145838908215,
|
|
"grad_norm": 3.6273481845855713,
|
|
"learning_rate": 9.647251739712034e-06,
|
|
"loss": 1.2516,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.8217821782178217,
|
|
"grad_norm": 3.0678317546844482,
|
|
"learning_rate": 9.646925164987802e-06,
|
|
"loss": 1.0271,
|
|
"step": 3071
|
|
},
|
|
{
|
|
"epoch": 0.822049772544822,
|
|
"grad_norm": 2.849170684814453,
|
|
"learning_rate": 9.646598444694631e-06,
|
|
"loss": 0.9816,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 0.8223173668718223,
|
|
"grad_norm": 3.5290982723236084,
|
|
"learning_rate": 9.64627157884276e-06,
|
|
"loss": 1.1911,
|
|
"step": 3073
|
|
},
|
|
{
|
|
"epoch": 0.8225849611988226,
|
|
"grad_norm": 3.403162717819214,
|
|
"learning_rate": 9.645944567442429e-06,
|
|
"loss": 1.1342,
|
|
"step": 3074
|
|
},
|
|
{
|
|
"epoch": 0.8228525555258228,
|
|
"grad_norm": 3.4919958114624023,
|
|
"learning_rate": 9.645617410503879e-06,
|
|
"loss": 1.256,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 0.8231201498528231,
|
|
"grad_norm": 3.1679487228393555,
|
|
"learning_rate": 9.645290108037358e-06,
|
|
"loss": 1.0346,
|
|
"step": 3076
|
|
},
|
|
{
|
|
"epoch": 0.8233877441798234,
|
|
"grad_norm": 3.7232227325439453,
|
|
"learning_rate": 9.644962660053122e-06,
|
|
"loss": 1.2307,
|
|
"step": 3077
|
|
},
|
|
{
|
|
"epoch": 0.8236553385068237,
|
|
"grad_norm": 3.162550449371338,
|
|
"learning_rate": 9.644635066561426e-06,
|
|
"loss": 1.0,
|
|
"step": 3078
|
|
},
|
|
{
|
|
"epoch": 0.8239229328338239,
|
|
"grad_norm": 3.254295825958252,
|
|
"learning_rate": 9.644307327572533e-06,
|
|
"loss": 0.9921,
|
|
"step": 3079
|
|
},
|
|
{
|
|
"epoch": 0.8241905271608242,
|
|
"grad_norm": 3.4591927528381348,
|
|
"learning_rate": 9.643979443096711e-06,
|
|
"loss": 1.149,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.8244581214878245,
|
|
"grad_norm": 3.499791383743286,
|
|
"learning_rate": 9.64365141314423e-06,
|
|
"loss": 1.1439,
|
|
"step": 3081
|
|
},
|
|
{
|
|
"epoch": 0.8247257158148247,
|
|
"grad_norm": 4.038766860961914,
|
|
"learning_rate": 9.643323237725366e-06,
|
|
"loss": 1.2011,
|
|
"step": 3082
|
|
},
|
|
{
|
|
"epoch": 0.824993310141825,
|
|
"grad_norm": 3.279536247253418,
|
|
"learning_rate": 9.6429949168504e-06,
|
|
"loss": 1.1845,
|
|
"step": 3083
|
|
},
|
|
{
|
|
"epoch": 0.8252609044688253,
|
|
"grad_norm": 3.441106081008911,
|
|
"learning_rate": 9.642666450529613e-06,
|
|
"loss": 1.1406,
|
|
"step": 3084
|
|
},
|
|
{
|
|
"epoch": 0.8255284987958256,
|
|
"grad_norm": 3.512998104095459,
|
|
"learning_rate": 9.6423378387733e-06,
|
|
"loss": 1.1178,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 0.8257960931228258,
|
|
"grad_norm": 3.23618483543396,
|
|
"learning_rate": 9.642009081591753e-06,
|
|
"loss": 1.0476,
|
|
"step": 3086
|
|
},
|
|
{
|
|
"epoch": 0.8260636874498261,
|
|
"grad_norm": 3.6994218826293945,
|
|
"learning_rate": 9.641680178995272e-06,
|
|
"loss": 1.3332,
|
|
"step": 3087
|
|
},
|
|
{
|
|
"epoch": 0.8263312817768264,
|
|
"grad_norm": 3.1423802375793457,
|
|
"learning_rate": 9.641351130994155e-06,
|
|
"loss": 1.0252,
|
|
"step": 3088
|
|
},
|
|
{
|
|
"epoch": 0.8265988761038267,
|
|
"grad_norm": 3.7969133853912354,
|
|
"learning_rate": 9.641021937598715e-06,
|
|
"loss": 1.2813,
|
|
"step": 3089
|
|
},
|
|
{
|
|
"epoch": 0.8268664704308268,
|
|
"grad_norm": 3.5946247577667236,
|
|
"learning_rate": 9.640692598819263e-06,
|
|
"loss": 1.2722,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.8271340647578271,
|
|
"grad_norm": 3.4758689403533936,
|
|
"learning_rate": 9.640363114666115e-06,
|
|
"loss": 1.0492,
|
|
"step": 3091
|
|
},
|
|
{
|
|
"epoch": 0.8274016590848274,
|
|
"grad_norm": 3.2242352962493896,
|
|
"learning_rate": 9.640033485149594e-06,
|
|
"loss": 1.0117,
|
|
"step": 3092
|
|
},
|
|
{
|
|
"epoch": 0.8276692534118276,
|
|
"grad_norm": 3.510794162750244,
|
|
"learning_rate": 9.639703710280022e-06,
|
|
"loss": 1.1141,
|
|
"step": 3093
|
|
},
|
|
{
|
|
"epoch": 0.8279368477388279,
|
|
"grad_norm": 3.322143077850342,
|
|
"learning_rate": 9.639373790067734e-06,
|
|
"loss": 1.1013,
|
|
"step": 3094
|
|
},
|
|
{
|
|
"epoch": 0.8282044420658282,
|
|
"grad_norm": 3.6232595443725586,
|
|
"learning_rate": 9.639043724523063e-06,
|
|
"loss": 1.0961,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 0.8284720363928285,
|
|
"grad_norm": 3.486630439758301,
|
|
"learning_rate": 9.638713513656348e-06,
|
|
"loss": 1.2134,
|
|
"step": 3096
|
|
},
|
|
{
|
|
"epoch": 0.8287396307198287,
|
|
"grad_norm": 3.1779515743255615,
|
|
"learning_rate": 9.638383157477935e-06,
|
|
"loss": 1.0994,
|
|
"step": 3097
|
|
},
|
|
{
|
|
"epoch": 0.829007225046829,
|
|
"grad_norm": 3.2651169300079346,
|
|
"learning_rate": 9.638052655998172e-06,
|
|
"loss": 1.0744,
|
|
"step": 3098
|
|
},
|
|
{
|
|
"epoch": 0.8292748193738293,
|
|
"grad_norm": 4.6499152183532715,
|
|
"learning_rate": 9.63772200922741e-06,
|
|
"loss": 1.1698,
|
|
"step": 3099
|
|
},
|
|
{
|
|
"epoch": 0.8295424137008296,
|
|
"grad_norm": 3.5739243030548096,
|
|
"learning_rate": 9.63739121717601e-06,
|
|
"loss": 1.1934,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.8298100080278298,
|
|
"grad_norm": 3.5878965854644775,
|
|
"learning_rate": 9.637060279854331e-06,
|
|
"loss": 1.2052,
|
|
"step": 3101
|
|
},
|
|
{
|
|
"epoch": 0.8300776023548301,
|
|
"grad_norm": 3.2450151443481445,
|
|
"learning_rate": 9.636729197272745e-06,
|
|
"loss": 1.0594,
|
|
"step": 3102
|
|
},
|
|
{
|
|
"epoch": 0.8303451966818304,
|
|
"grad_norm": 3.224311351776123,
|
|
"learning_rate": 9.636397969441617e-06,
|
|
"loss": 1.1101,
|
|
"step": 3103
|
|
},
|
|
{
|
|
"epoch": 0.8306127910088306,
|
|
"grad_norm": 3.1356136798858643,
|
|
"learning_rate": 9.63606659637133e-06,
|
|
"loss": 1.0276,
|
|
"step": 3104
|
|
},
|
|
{
|
|
"epoch": 0.8308803853358309,
|
|
"grad_norm": 3.4842357635498047,
|
|
"learning_rate": 9.635735078072259e-06,
|
|
"loss": 1.2134,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 0.8311479796628312,
|
|
"grad_norm": 3.485252857208252,
|
|
"learning_rate": 9.635403414554791e-06,
|
|
"loss": 1.0805,
|
|
"step": 3106
|
|
},
|
|
{
|
|
"epoch": 0.8314155739898315,
|
|
"grad_norm": 3.630953311920166,
|
|
"learning_rate": 9.635071605829315e-06,
|
|
"loss": 1.1837,
|
|
"step": 3107
|
|
},
|
|
{
|
|
"epoch": 0.8316831683168316,
|
|
"grad_norm": 3.5035400390625,
|
|
"learning_rate": 9.634739651906227e-06,
|
|
"loss": 1.142,
|
|
"step": 3108
|
|
},
|
|
{
|
|
"epoch": 0.8319507626438319,
|
|
"grad_norm": 3.4590489864349365,
|
|
"learning_rate": 9.634407552795924e-06,
|
|
"loss": 1.1785,
|
|
"step": 3109
|
|
},
|
|
{
|
|
"epoch": 0.8322183569708322,
|
|
"grad_norm": 3.730466842651367,
|
|
"learning_rate": 9.63407530850881e-06,
|
|
"loss": 1.1447,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.8324859512978325,
|
|
"grad_norm": 3.295057535171509,
|
|
"learning_rate": 9.633742919055294e-06,
|
|
"loss": 1.0757,
|
|
"step": 3111
|
|
},
|
|
{
|
|
"epoch": 0.8327535456248327,
|
|
"grad_norm": 3.471201181411743,
|
|
"learning_rate": 9.633410384445785e-06,
|
|
"loss": 1.1772,
|
|
"step": 3112
|
|
},
|
|
{
|
|
"epoch": 0.833021139951833,
|
|
"grad_norm": 3.329434633255005,
|
|
"learning_rate": 9.633077704690702e-06,
|
|
"loss": 1.2623,
|
|
"step": 3113
|
|
},
|
|
{
|
|
"epoch": 0.8332887342788333,
|
|
"grad_norm": 3.593980073928833,
|
|
"learning_rate": 9.632744879800468e-06,
|
|
"loss": 1.1858,
|
|
"step": 3114
|
|
},
|
|
{
|
|
"epoch": 0.8335563286058335,
|
|
"grad_norm": 3.156765937805176,
|
|
"learning_rate": 9.632411909785506e-06,
|
|
"loss": 1.1067,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 0.8338239229328338,
|
|
"grad_norm": 3.3649260997772217,
|
|
"learning_rate": 9.632078794656249e-06,
|
|
"loss": 1.1243,
|
|
"step": 3116
|
|
},
|
|
{
|
|
"epoch": 0.8340915172598341,
|
|
"grad_norm": 3.270552635192871,
|
|
"learning_rate": 9.631745534423132e-06,
|
|
"loss": 1.1653,
|
|
"step": 3117
|
|
},
|
|
{
|
|
"epoch": 0.8343591115868344,
|
|
"grad_norm": 2.9742650985717773,
|
|
"learning_rate": 9.631412129096591e-06,
|
|
"loss": 1.0039,
|
|
"step": 3118
|
|
},
|
|
{
|
|
"epoch": 0.8346267059138346,
|
|
"grad_norm": 3.710505247116089,
|
|
"learning_rate": 9.631078578687077e-06,
|
|
"loss": 1.1613,
|
|
"step": 3119
|
|
},
|
|
{
|
|
"epoch": 0.8348943002408349,
|
|
"grad_norm": 3.2119741439819336,
|
|
"learning_rate": 9.630744883205031e-06,
|
|
"loss": 1.1568,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.8351618945678352,
|
|
"grad_norm": 3.492464065551758,
|
|
"learning_rate": 9.630411042660913e-06,
|
|
"loss": 1.3087,
|
|
"step": 3121
|
|
},
|
|
{
|
|
"epoch": 0.8354294888948355,
|
|
"grad_norm": 4.012518405914307,
|
|
"learning_rate": 9.630077057065177e-06,
|
|
"loss": 1.1665,
|
|
"step": 3122
|
|
},
|
|
{
|
|
"epoch": 0.8356970832218357,
|
|
"grad_norm": 3.5707449913024902,
|
|
"learning_rate": 9.629742926428287e-06,
|
|
"loss": 1.1712,
|
|
"step": 3123
|
|
},
|
|
{
|
|
"epoch": 0.835964677548836,
|
|
"grad_norm": 3.179173469543457,
|
|
"learning_rate": 9.629408650760707e-06,
|
|
"loss": 1.0721,
|
|
"step": 3124
|
|
},
|
|
{
|
|
"epoch": 0.8362322718758363,
|
|
"grad_norm": 3.2190163135528564,
|
|
"learning_rate": 9.629074230072913e-06,
|
|
"loss": 1.1279,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 0.8364998662028365,
|
|
"grad_norm": 3.036876916885376,
|
|
"learning_rate": 9.62873966437538e-06,
|
|
"loss": 1.0463,
|
|
"step": 3126
|
|
},
|
|
{
|
|
"epoch": 0.8367674605298367,
|
|
"grad_norm": 3.333547592163086,
|
|
"learning_rate": 9.628404953678585e-06,
|
|
"loss": 1.1396,
|
|
"step": 3127
|
|
},
|
|
{
|
|
"epoch": 0.837035054856837,
|
|
"grad_norm": 3.266360282897949,
|
|
"learning_rate": 9.628070097993016e-06,
|
|
"loss": 1.1264,
|
|
"step": 3128
|
|
},
|
|
{
|
|
"epoch": 0.8373026491838373,
|
|
"grad_norm": 3.3879363536834717,
|
|
"learning_rate": 9.627735097329161e-06,
|
|
"loss": 1.0972,
|
|
"step": 3129
|
|
},
|
|
{
|
|
"epoch": 0.8375702435108375,
|
|
"grad_norm": 3.3518929481506348,
|
|
"learning_rate": 9.627399951697516e-06,
|
|
"loss": 1.1234,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.8378378378378378,
|
|
"grad_norm": 3.3172409534454346,
|
|
"learning_rate": 9.627064661108581e-06,
|
|
"loss": 1.0768,
|
|
"step": 3131
|
|
},
|
|
{
|
|
"epoch": 0.8381054321648381,
|
|
"grad_norm": 3.6157588958740234,
|
|
"learning_rate": 9.626729225572854e-06,
|
|
"loss": 1.2114,
|
|
"step": 3132
|
|
},
|
|
{
|
|
"epoch": 0.8383730264918384,
|
|
"grad_norm": 3.2437682151794434,
|
|
"learning_rate": 9.626393645100849e-06,
|
|
"loss": 1.0175,
|
|
"step": 3133
|
|
},
|
|
{
|
|
"epoch": 0.8386406208188386,
|
|
"grad_norm": 3.443774461746216,
|
|
"learning_rate": 9.626057919703073e-06,
|
|
"loss": 1.1866,
|
|
"step": 3134
|
|
},
|
|
{
|
|
"epoch": 0.8389082151458389,
|
|
"grad_norm": 3.1143884658813477,
|
|
"learning_rate": 9.625722049390048e-06,
|
|
"loss": 0.9715,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 0.8391758094728392,
|
|
"grad_norm": 3.3151462078094482,
|
|
"learning_rate": 9.62538603417229e-06,
|
|
"loss": 1.0459,
|
|
"step": 3136
|
|
},
|
|
{
|
|
"epoch": 0.8394434037998394,
|
|
"grad_norm": 3.691002368927002,
|
|
"learning_rate": 9.625049874060331e-06,
|
|
"loss": 1.1284,
|
|
"step": 3137
|
|
},
|
|
{
|
|
"epoch": 0.8397109981268397,
|
|
"grad_norm": 3.0173420906066895,
|
|
"learning_rate": 9.624713569064695e-06,
|
|
"loss": 0.9815,
|
|
"step": 3138
|
|
},
|
|
{
|
|
"epoch": 0.83997859245384,
|
|
"grad_norm": 3.3124630451202393,
|
|
"learning_rate": 9.624377119195922e-06,
|
|
"loss": 1.1042,
|
|
"step": 3139
|
|
},
|
|
{
|
|
"epoch": 0.8402461867808403,
|
|
"grad_norm": 3.262075424194336,
|
|
"learning_rate": 9.624040524464548e-06,
|
|
"loss": 1.1501,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.8405137811078405,
|
|
"grad_norm": 3.391528367996216,
|
|
"learning_rate": 9.623703784881121e-06,
|
|
"loss": 1.086,
|
|
"step": 3141
|
|
},
|
|
{
|
|
"epoch": 0.8407813754348408,
|
|
"grad_norm": 3.690544843673706,
|
|
"learning_rate": 9.623366900456186e-06,
|
|
"loss": 1.1857,
|
|
"step": 3142
|
|
},
|
|
{
|
|
"epoch": 0.8410489697618411,
|
|
"grad_norm": 3.2583820819854736,
|
|
"learning_rate": 9.6230298712003e-06,
|
|
"loss": 1.063,
|
|
"step": 3143
|
|
},
|
|
{
|
|
"epoch": 0.8413165640888414,
|
|
"grad_norm": 3.278346300125122,
|
|
"learning_rate": 9.622692697124016e-06,
|
|
"loss": 1.1059,
|
|
"step": 3144
|
|
},
|
|
{
|
|
"epoch": 0.8415841584158416,
|
|
"grad_norm": 3.320652484893799,
|
|
"learning_rate": 9.6223553782379e-06,
|
|
"loss": 1.165,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 0.8418517527428419,
|
|
"grad_norm": 3.6142923831939697,
|
|
"learning_rate": 9.622017914552519e-06,
|
|
"loss": 1.1734,
|
|
"step": 3146
|
|
},
|
|
{
|
|
"epoch": 0.8421193470698422,
|
|
"grad_norm": 3.483147382736206,
|
|
"learning_rate": 9.62168030607844e-06,
|
|
"loss": 1.0495,
|
|
"step": 3147
|
|
},
|
|
{
|
|
"epoch": 0.8423869413968423,
|
|
"grad_norm": 3.2388815879821777,
|
|
"learning_rate": 9.621342552826245e-06,
|
|
"loss": 1.0552,
|
|
"step": 3148
|
|
},
|
|
{
|
|
"epoch": 0.8426545357238426,
|
|
"grad_norm": 3.1021432876586914,
|
|
"learning_rate": 9.62100465480651e-06,
|
|
"loss": 0.9876,
|
|
"step": 3149
|
|
},
|
|
{
|
|
"epoch": 0.8429221300508429,
|
|
"grad_norm": 3.7463855743408203,
|
|
"learning_rate": 9.62066661202982e-06,
|
|
"loss": 1.0824,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.8431897243778432,
|
|
"grad_norm": 3.345280170440674,
|
|
"learning_rate": 9.620328424506767e-06,
|
|
"loss": 1.0385,
|
|
"step": 3151
|
|
},
|
|
{
|
|
"epoch": 0.8434573187048434,
|
|
"grad_norm": 3.582469940185547,
|
|
"learning_rate": 9.619990092247943e-06,
|
|
"loss": 1.219,
|
|
"step": 3152
|
|
},
|
|
{
|
|
"epoch": 0.8437249130318437,
|
|
"grad_norm": 3.824211835861206,
|
|
"learning_rate": 9.619651615263948e-06,
|
|
"loss": 1.4056,
|
|
"step": 3153
|
|
},
|
|
{
|
|
"epoch": 0.843992507358844,
|
|
"grad_norm": 3.295612335205078,
|
|
"learning_rate": 9.619312993565382e-06,
|
|
"loss": 1.0493,
|
|
"step": 3154
|
|
},
|
|
{
|
|
"epoch": 0.8442601016858443,
|
|
"grad_norm": 3.390982151031494,
|
|
"learning_rate": 9.618974227162857e-06,
|
|
"loss": 1.136,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 0.8445276960128445,
|
|
"grad_norm": 3.5141913890838623,
|
|
"learning_rate": 9.618635316066984e-06,
|
|
"loss": 1.1228,
|
|
"step": 3156
|
|
},
|
|
{
|
|
"epoch": 0.8447952903398448,
|
|
"grad_norm": 3.6350278854370117,
|
|
"learning_rate": 9.618296260288376e-06,
|
|
"loss": 1.2088,
|
|
"step": 3157
|
|
},
|
|
{
|
|
"epoch": 0.8450628846668451,
|
|
"grad_norm": 3.1930181980133057,
|
|
"learning_rate": 9.617957059837659e-06,
|
|
"loss": 1.1015,
|
|
"step": 3158
|
|
},
|
|
{
|
|
"epoch": 0.8453304789938453,
|
|
"grad_norm": 3.7268929481506348,
|
|
"learning_rate": 9.617617714725456e-06,
|
|
"loss": 1.18,
|
|
"step": 3159
|
|
},
|
|
{
|
|
"epoch": 0.8455980733208456,
|
|
"grad_norm": 3.712311267852783,
|
|
"learning_rate": 9.617278224962398e-06,
|
|
"loss": 1.1109,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.8458656676478459,
|
|
"grad_norm": 3.326599597930908,
|
|
"learning_rate": 9.616938590559121e-06,
|
|
"loss": 1.0733,
|
|
"step": 3161
|
|
},
|
|
{
|
|
"epoch": 0.8461332619748462,
|
|
"grad_norm": 3.1389646530151367,
|
|
"learning_rate": 9.616598811526263e-06,
|
|
"loss": 1.0736,
|
|
"step": 3162
|
|
},
|
|
{
|
|
"epoch": 0.8464008563018464,
|
|
"grad_norm": 3.1288650035858154,
|
|
"learning_rate": 9.616258887874467e-06,
|
|
"loss": 1.0572,
|
|
"step": 3163
|
|
},
|
|
{
|
|
"epoch": 0.8466684506288467,
|
|
"grad_norm": 3.364788055419922,
|
|
"learning_rate": 9.615918819614382e-06,
|
|
"loss": 1.0658,
|
|
"step": 3164
|
|
},
|
|
{
|
|
"epoch": 0.846936044955847,
|
|
"grad_norm": 3.18229341506958,
|
|
"learning_rate": 9.615578606756663e-06,
|
|
"loss": 1.0498,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 0.8472036392828473,
|
|
"grad_norm": 3.276883125305176,
|
|
"learning_rate": 9.615238249311964e-06,
|
|
"loss": 1.0673,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 0.8474712336098474,
|
|
"grad_norm": 3.2905640602111816,
|
|
"learning_rate": 9.61489774729095e-06,
|
|
"loss": 1.0059,
|
|
"step": 3167
|
|
},
|
|
{
|
|
"epoch": 0.8477388279368477,
|
|
"grad_norm": 3.756727933883667,
|
|
"learning_rate": 9.614557100704286e-06,
|
|
"loss": 1.2645,
|
|
"step": 3168
|
|
},
|
|
{
|
|
"epoch": 0.848006422263848,
|
|
"grad_norm": 3.3283801078796387,
|
|
"learning_rate": 9.614216309562643e-06,
|
|
"loss": 1.0832,
|
|
"step": 3169
|
|
},
|
|
{
|
|
"epoch": 0.8482740165908482,
|
|
"grad_norm": 3.5391414165496826,
|
|
"learning_rate": 9.613875373876698e-06,
|
|
"loss": 1.0946,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.8485416109178485,
|
|
"grad_norm": 3.3885715007781982,
|
|
"learning_rate": 9.61353429365713e-06,
|
|
"loss": 1.1686,
|
|
"step": 3171
|
|
},
|
|
{
|
|
"epoch": 0.8488092052448488,
|
|
"grad_norm": 3.24389386177063,
|
|
"learning_rate": 9.613193068914623e-06,
|
|
"loss": 1.1554,
|
|
"step": 3172
|
|
},
|
|
{
|
|
"epoch": 0.8490767995718491,
|
|
"grad_norm": 3.2578012943267822,
|
|
"learning_rate": 9.612851699659867e-06,
|
|
"loss": 1.1124,
|
|
"step": 3173
|
|
},
|
|
{
|
|
"epoch": 0.8493443938988493,
|
|
"grad_norm": 3.3463966846466064,
|
|
"learning_rate": 9.612510185903554e-06,
|
|
"loss": 1.0264,
|
|
"step": 3174
|
|
},
|
|
{
|
|
"epoch": 0.8496119882258496,
|
|
"grad_norm": 3.320957899093628,
|
|
"learning_rate": 9.612168527656386e-06,
|
|
"loss": 1.1183,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 0.8498795825528499,
|
|
"grad_norm": 3.1176092624664307,
|
|
"learning_rate": 9.611826724929063e-06,
|
|
"loss": 1.1182,
|
|
"step": 3176
|
|
},
|
|
{
|
|
"epoch": 0.8501471768798502,
|
|
"grad_norm": 3.338179349899292,
|
|
"learning_rate": 9.611484777732292e-06,
|
|
"loss": 1.1178,
|
|
"step": 3177
|
|
},
|
|
{
|
|
"epoch": 0.8504147712068504,
|
|
"grad_norm": 3.7851016521453857,
|
|
"learning_rate": 9.611142686076787e-06,
|
|
"loss": 1.2339,
|
|
"step": 3178
|
|
},
|
|
{
|
|
"epoch": 0.8506823655338507,
|
|
"grad_norm": 3.0936129093170166,
|
|
"learning_rate": 9.610800449973261e-06,
|
|
"loss": 1.1433,
|
|
"step": 3179
|
|
},
|
|
{
|
|
"epoch": 0.850949959860851,
|
|
"grad_norm": 3.48248291015625,
|
|
"learning_rate": 9.610458069432438e-06,
|
|
"loss": 1.1971,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.8512175541878512,
|
|
"grad_norm": 3.774419069290161,
|
|
"learning_rate": 9.610115544465042e-06,
|
|
"loss": 1.1778,
|
|
"step": 3181
|
|
},
|
|
{
|
|
"epoch": 0.8514851485148515,
|
|
"grad_norm": 3.461056709289551,
|
|
"learning_rate": 9.609772875081802e-06,
|
|
"loss": 1.1425,
|
|
"step": 3182
|
|
},
|
|
{
|
|
"epoch": 0.8517527428418518,
|
|
"grad_norm": 3.332552671432495,
|
|
"learning_rate": 9.609430061293454e-06,
|
|
"loss": 1.0041,
|
|
"step": 3183
|
|
},
|
|
{
|
|
"epoch": 0.8520203371688521,
|
|
"grad_norm": 3.5970587730407715,
|
|
"learning_rate": 9.609087103110737e-06,
|
|
"loss": 1.2363,
|
|
"step": 3184
|
|
},
|
|
{
|
|
"epoch": 0.8522879314958522,
|
|
"grad_norm": 3.4365155696868896,
|
|
"learning_rate": 9.608744000544392e-06,
|
|
"loss": 1.0534,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 0.8525555258228525,
|
|
"grad_norm": 3.2905330657958984,
|
|
"learning_rate": 9.60840075360517e-06,
|
|
"loss": 1.2355,
|
|
"step": 3186
|
|
},
|
|
{
|
|
"epoch": 0.8528231201498528,
|
|
"grad_norm": 3.4680607318878174,
|
|
"learning_rate": 9.608057362303823e-06,
|
|
"loss": 1.0901,
|
|
"step": 3187
|
|
},
|
|
{
|
|
"epoch": 0.8530907144768531,
|
|
"grad_norm": 3.351891279220581,
|
|
"learning_rate": 9.607713826651107e-06,
|
|
"loss": 1.1422,
|
|
"step": 3188
|
|
},
|
|
{
|
|
"epoch": 0.8533583088038533,
|
|
"grad_norm": 3.7744686603546143,
|
|
"learning_rate": 9.607370146657782e-06,
|
|
"loss": 1.1692,
|
|
"step": 3189
|
|
},
|
|
{
|
|
"epoch": 0.8536259031308536,
|
|
"grad_norm": 3.2692463397979736,
|
|
"learning_rate": 9.607026322334618e-06,
|
|
"loss": 1.0488,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.8538934974578539,
|
|
"grad_norm": 3.201399564743042,
|
|
"learning_rate": 9.606682353692383e-06,
|
|
"loss": 0.9253,
|
|
"step": 3191
|
|
},
|
|
{
|
|
"epoch": 0.8541610917848541,
|
|
"grad_norm": 3.5281589031219482,
|
|
"learning_rate": 9.606338240741851e-06,
|
|
"loss": 1.1785,
|
|
"step": 3192
|
|
},
|
|
{
|
|
"epoch": 0.8544286861118544,
|
|
"grad_norm": 3.3514602184295654,
|
|
"learning_rate": 9.605993983493804e-06,
|
|
"loss": 1.1364,
|
|
"step": 3193
|
|
},
|
|
{
|
|
"epoch": 0.8546962804388547,
|
|
"grad_norm": 3.6264495849609375,
|
|
"learning_rate": 9.605649581959027e-06,
|
|
"loss": 1.1206,
|
|
"step": 3194
|
|
},
|
|
{
|
|
"epoch": 0.854963874765855,
|
|
"grad_norm": 3.1329174041748047,
|
|
"learning_rate": 9.605305036148306e-06,
|
|
"loss": 1.0666,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 0.8552314690928552,
|
|
"grad_norm": 3.255485773086548,
|
|
"learning_rate": 9.604960346072435e-06,
|
|
"loss": 0.9613,
|
|
"step": 3196
|
|
},
|
|
{
|
|
"epoch": 0.8554990634198555,
|
|
"grad_norm": 3.693399429321289,
|
|
"learning_rate": 9.604615511742213e-06,
|
|
"loss": 1.169,
|
|
"step": 3197
|
|
},
|
|
{
|
|
"epoch": 0.8557666577468558,
|
|
"grad_norm": 3.0587754249572754,
|
|
"learning_rate": 9.604270533168441e-06,
|
|
"loss": 1.0926,
|
|
"step": 3198
|
|
},
|
|
{
|
|
"epoch": 0.8560342520738561,
|
|
"grad_norm": 3.428370952606201,
|
|
"learning_rate": 9.603925410361925e-06,
|
|
"loss": 1.111,
|
|
"step": 3199
|
|
},
|
|
{
|
|
"epoch": 0.8563018464008563,
|
|
"grad_norm": 3.2654330730438232,
|
|
"learning_rate": 9.603580143333478e-06,
|
|
"loss": 1.1336,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8565694407278566,
|
|
"grad_norm": 3.4805808067321777,
|
|
"learning_rate": 9.603234732093913e-06,
|
|
"loss": 1.1853,
|
|
"step": 3201
|
|
},
|
|
{
|
|
"epoch": 0.8568370350548569,
|
|
"grad_norm": 3.3785743713378906,
|
|
"learning_rate": 9.602889176654055e-06,
|
|
"loss": 1.1114,
|
|
"step": 3202
|
|
},
|
|
{
|
|
"epoch": 0.857104629381857,
|
|
"grad_norm": 3.4433510303497314,
|
|
"learning_rate": 9.602543477024725e-06,
|
|
"loss": 1.1787,
|
|
"step": 3203
|
|
},
|
|
{
|
|
"epoch": 0.8573722237088574,
|
|
"grad_norm": 3.078172445297241,
|
|
"learning_rate": 9.602197633216754e-06,
|
|
"loss": 1.0089,
|
|
"step": 3204
|
|
},
|
|
{
|
|
"epoch": 0.8576398180358576,
|
|
"grad_norm": 3.009098768234253,
|
|
"learning_rate": 9.601851645240974e-06,
|
|
"loss": 0.9585,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 0.857907412362858,
|
|
"grad_norm": 3.31787109375,
|
|
"learning_rate": 9.601505513108227e-06,
|
|
"loss": 1.097,
|
|
"step": 3206
|
|
},
|
|
{
|
|
"epoch": 0.8581750066898581,
|
|
"grad_norm": 3.802264451980591,
|
|
"learning_rate": 9.601159236829353e-06,
|
|
"loss": 1.2097,
|
|
"step": 3207
|
|
},
|
|
{
|
|
"epoch": 0.8584426010168584,
|
|
"grad_norm": 3.393442392349243,
|
|
"learning_rate": 9.600812816415199e-06,
|
|
"loss": 1.1896,
|
|
"step": 3208
|
|
},
|
|
{
|
|
"epoch": 0.8587101953438587,
|
|
"grad_norm": 3.611478090286255,
|
|
"learning_rate": 9.600466251876618e-06,
|
|
"loss": 1.306,
|
|
"step": 3209
|
|
},
|
|
{
|
|
"epoch": 0.858977789670859,
|
|
"grad_norm": 3.6949093341827393,
|
|
"learning_rate": 9.600119543224467e-06,
|
|
"loss": 1.0832,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.8592453839978592,
|
|
"grad_norm": 3.2041354179382324,
|
|
"learning_rate": 9.599772690469606e-06,
|
|
"loss": 1.0338,
|
|
"step": 3211
|
|
},
|
|
{
|
|
"epoch": 0.8595129783248595,
|
|
"grad_norm": 3.4140734672546387,
|
|
"learning_rate": 9.599425693622902e-06,
|
|
"loss": 1.1597,
|
|
"step": 3212
|
|
},
|
|
{
|
|
"epoch": 0.8597805726518598,
|
|
"grad_norm": 2.8042356967926025,
|
|
"learning_rate": 9.599078552695223e-06,
|
|
"loss": 0.9167,
|
|
"step": 3213
|
|
},
|
|
{
|
|
"epoch": 0.86004816697886,
|
|
"grad_norm": 3.645156145095825,
|
|
"learning_rate": 9.598731267697443e-06,
|
|
"loss": 1.3054,
|
|
"step": 3214
|
|
},
|
|
{
|
|
"epoch": 0.8603157613058603,
|
|
"grad_norm": 3.2911882400512695,
|
|
"learning_rate": 9.598383838640443e-06,
|
|
"loss": 1.0813,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 0.8605833556328606,
|
|
"grad_norm": 3.168053388595581,
|
|
"learning_rate": 9.598036265535104e-06,
|
|
"loss": 1.0603,
|
|
"step": 3216
|
|
},
|
|
{
|
|
"epoch": 0.8608509499598609,
|
|
"grad_norm": 3.6250712871551514,
|
|
"learning_rate": 9.597688548392319e-06,
|
|
"loss": 1.1671,
|
|
"step": 3217
|
|
},
|
|
{
|
|
"epoch": 0.8611185442868611,
|
|
"grad_norm": 3.570465326309204,
|
|
"learning_rate": 9.597340687222975e-06,
|
|
"loss": 1.1248,
|
|
"step": 3218
|
|
},
|
|
{
|
|
"epoch": 0.8613861386138614,
|
|
"grad_norm": 3.188462495803833,
|
|
"learning_rate": 9.596992682037973e-06,
|
|
"loss": 1.0927,
|
|
"step": 3219
|
|
},
|
|
{
|
|
"epoch": 0.8616537329408617,
|
|
"grad_norm": 3.166240692138672,
|
|
"learning_rate": 9.596644532848211e-06,
|
|
"loss": 1.1719,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.861921327267862,
|
|
"grad_norm": 3.369922399520874,
|
|
"learning_rate": 9.5962962396646e-06,
|
|
"loss": 1.0546,
|
|
"step": 3221
|
|
},
|
|
{
|
|
"epoch": 0.8621889215948622,
|
|
"grad_norm": 3.611721992492676,
|
|
"learning_rate": 9.595947802498046e-06,
|
|
"loss": 1.1727,
|
|
"step": 3222
|
|
},
|
|
{
|
|
"epoch": 0.8624565159218625,
|
|
"grad_norm": 3.5370113849639893,
|
|
"learning_rate": 9.595599221359464e-06,
|
|
"loss": 1.1045,
|
|
"step": 3223
|
|
},
|
|
{
|
|
"epoch": 0.8627241102488628,
|
|
"grad_norm": 3.5464746952056885,
|
|
"learning_rate": 9.595250496259778e-06,
|
|
"loss": 1.1146,
|
|
"step": 3224
|
|
},
|
|
{
|
|
"epoch": 0.8629917045758629,
|
|
"grad_norm": 3.1489906311035156,
|
|
"learning_rate": 9.594901627209908e-06,
|
|
"loss": 1.0356,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 0.8632592989028632,
|
|
"grad_norm": 3.69189190864563,
|
|
"learning_rate": 9.594552614220785e-06,
|
|
"loss": 1.2625,
|
|
"step": 3226
|
|
},
|
|
{
|
|
"epoch": 0.8635268932298635,
|
|
"grad_norm": 3.298753499984741,
|
|
"learning_rate": 9.594203457303339e-06,
|
|
"loss": 1.0553,
|
|
"step": 3227
|
|
},
|
|
{
|
|
"epoch": 0.8637944875568638,
|
|
"grad_norm": 3.2291910648345947,
|
|
"learning_rate": 9.593854156468512e-06,
|
|
"loss": 1.133,
|
|
"step": 3228
|
|
},
|
|
{
|
|
"epoch": 0.864062081883864,
|
|
"grad_norm": 3.1540310382843018,
|
|
"learning_rate": 9.593504711727243e-06,
|
|
"loss": 0.9898,
|
|
"step": 3229
|
|
},
|
|
{
|
|
"epoch": 0.8643296762108643,
|
|
"grad_norm": 3.049051523208618,
|
|
"learning_rate": 9.593155123090479e-06,
|
|
"loss": 0.9765,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.8645972705378646,
|
|
"grad_norm": 3.2508833408355713,
|
|
"learning_rate": 9.592805390569173e-06,
|
|
"loss": 1.0901,
|
|
"step": 3231
|
|
},
|
|
{
|
|
"epoch": 0.8648648648648649,
|
|
"grad_norm": 3.9772286415100098,
|
|
"learning_rate": 9.59245551417428e-06,
|
|
"loss": 1.2102,
|
|
"step": 3232
|
|
},
|
|
{
|
|
"epoch": 0.8651324591918651,
|
|
"grad_norm": 3.5919082164764404,
|
|
"learning_rate": 9.592105493916758e-06,
|
|
"loss": 1.1591,
|
|
"step": 3233
|
|
},
|
|
{
|
|
"epoch": 0.8654000535188654,
|
|
"grad_norm": 3.9090003967285156,
|
|
"learning_rate": 9.591755329807574e-06,
|
|
"loss": 1.2031,
|
|
"step": 3234
|
|
},
|
|
{
|
|
"epoch": 0.8656676478458657,
|
|
"grad_norm": 3.496093273162842,
|
|
"learning_rate": 9.591405021857697e-06,
|
|
"loss": 1.109,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 0.8659352421728659,
|
|
"grad_norm": 3.8281493186950684,
|
|
"learning_rate": 9.5910545700781e-06,
|
|
"loss": 1.211,
|
|
"step": 3236
|
|
},
|
|
{
|
|
"epoch": 0.8662028364998662,
|
|
"grad_norm": 3.2927358150482178,
|
|
"learning_rate": 9.59070397447976e-06,
|
|
"loss": 1.0816,
|
|
"step": 3237
|
|
},
|
|
{
|
|
"epoch": 0.8664704308268665,
|
|
"grad_norm": 3.4841480255126953,
|
|
"learning_rate": 9.590353235073663e-06,
|
|
"loss": 1.1187,
|
|
"step": 3238
|
|
},
|
|
{
|
|
"epoch": 0.8667380251538668,
|
|
"grad_norm": 3.2719860076904297,
|
|
"learning_rate": 9.590002351870793e-06,
|
|
"loss": 1.0203,
|
|
"step": 3239
|
|
},
|
|
{
|
|
"epoch": 0.867005619480867,
|
|
"grad_norm": 3.0735063552856445,
|
|
"learning_rate": 9.589651324882143e-06,
|
|
"loss": 1.1405,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.8672732138078673,
|
|
"grad_norm": 3.6215524673461914,
|
|
"learning_rate": 9.58930015411871e-06,
|
|
"loss": 1.2612,
|
|
"step": 3241
|
|
},
|
|
{
|
|
"epoch": 0.8675408081348676,
|
|
"grad_norm": 3.0990355014801025,
|
|
"learning_rate": 9.588948839591494e-06,
|
|
"loss": 1.1183,
|
|
"step": 3242
|
|
},
|
|
{
|
|
"epoch": 0.8678084024618679,
|
|
"grad_norm": 2.985930919647217,
|
|
"learning_rate": 9.5885973813115e-06,
|
|
"loss": 1.0287,
|
|
"step": 3243
|
|
},
|
|
{
|
|
"epoch": 0.868075996788868,
|
|
"grad_norm": 3.4825994968414307,
|
|
"learning_rate": 9.588245779289738e-06,
|
|
"loss": 1.0794,
|
|
"step": 3244
|
|
},
|
|
{
|
|
"epoch": 0.8683435911158683,
|
|
"grad_norm": 3.289504289627075,
|
|
"learning_rate": 9.587894033537223e-06,
|
|
"loss": 1.1203,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 0.8686111854428686,
|
|
"grad_norm": 3.175842761993408,
|
|
"learning_rate": 9.587542144064972e-06,
|
|
"loss": 1.0932,
|
|
"step": 3246
|
|
},
|
|
{
|
|
"epoch": 0.8688787797698688,
|
|
"grad_norm": 3.251260995864868,
|
|
"learning_rate": 9.587190110884009e-06,
|
|
"loss": 1.0971,
|
|
"step": 3247
|
|
},
|
|
{
|
|
"epoch": 0.8691463740968691,
|
|
"grad_norm": 2.8951752185821533,
|
|
"learning_rate": 9.586837934005363e-06,
|
|
"loss": 0.9605,
|
|
"step": 3248
|
|
},
|
|
{
|
|
"epoch": 0.8694139684238694,
|
|
"grad_norm": 3.264331102371216,
|
|
"learning_rate": 9.586485613440064e-06,
|
|
"loss": 1.1496,
|
|
"step": 3249
|
|
},
|
|
{
|
|
"epoch": 0.8696815627508697,
|
|
"grad_norm": 3.1973655223846436,
|
|
"learning_rate": 9.586133149199151e-06,
|
|
"loss": 1.152,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.8699491570778699,
|
|
"grad_norm": 2.9676499366760254,
|
|
"learning_rate": 9.585780541293663e-06,
|
|
"loss": 1.1106,
|
|
"step": 3251
|
|
},
|
|
{
|
|
"epoch": 0.8702167514048702,
|
|
"grad_norm": 3.147977352142334,
|
|
"learning_rate": 9.585427789734647e-06,
|
|
"loss": 1.2677,
|
|
"step": 3252
|
|
},
|
|
{
|
|
"epoch": 0.8704843457318705,
|
|
"grad_norm": 3.3018641471862793,
|
|
"learning_rate": 9.585074894533154e-06,
|
|
"loss": 1.0454,
|
|
"step": 3253
|
|
},
|
|
{
|
|
"epoch": 0.8707519400588708,
|
|
"grad_norm": 3.2543413639068604,
|
|
"learning_rate": 9.584721855700238e-06,
|
|
"loss": 1.0568,
|
|
"step": 3254
|
|
},
|
|
{
|
|
"epoch": 0.871019534385871,
|
|
"grad_norm": 3.091062545776367,
|
|
"learning_rate": 9.584368673246957e-06,
|
|
"loss": 1.0498,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 0.8712871287128713,
|
|
"grad_norm": 3.4238486289978027,
|
|
"learning_rate": 9.584015347184376e-06,
|
|
"loss": 1.0649,
|
|
"step": 3256
|
|
},
|
|
{
|
|
"epoch": 0.8715547230398716,
|
|
"grad_norm": 3.575374126434326,
|
|
"learning_rate": 9.583661877523565e-06,
|
|
"loss": 1.1816,
|
|
"step": 3257
|
|
},
|
|
{
|
|
"epoch": 0.8718223173668718,
|
|
"grad_norm": 3.7600672245025635,
|
|
"learning_rate": 9.583308264275593e-06,
|
|
"loss": 1.2363,
|
|
"step": 3258
|
|
},
|
|
{
|
|
"epoch": 0.8720899116938721,
|
|
"grad_norm": 3.4532859325408936,
|
|
"learning_rate": 9.58295450745154e-06,
|
|
"loss": 1.2357,
|
|
"step": 3259
|
|
},
|
|
{
|
|
"epoch": 0.8723575060208724,
|
|
"grad_norm": 2.9904305934906006,
|
|
"learning_rate": 9.582600607062486e-06,
|
|
"loss": 0.9099,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.8726251003478727,
|
|
"grad_norm": 3.143056631088257,
|
|
"learning_rate": 9.58224656311952e-06,
|
|
"loss": 1.1002,
|
|
"step": 3261
|
|
},
|
|
{
|
|
"epoch": 0.8728926946748728,
|
|
"grad_norm": 3.371873140335083,
|
|
"learning_rate": 9.581892375633729e-06,
|
|
"loss": 1.1547,
|
|
"step": 3262
|
|
},
|
|
{
|
|
"epoch": 0.8731602890018731,
|
|
"grad_norm": 3.277872085571289,
|
|
"learning_rate": 9.58153804461621e-06,
|
|
"loss": 1.0734,
|
|
"step": 3263
|
|
},
|
|
{
|
|
"epoch": 0.8734278833288734,
|
|
"grad_norm": 3.2630152702331543,
|
|
"learning_rate": 9.581183570078064e-06,
|
|
"loss": 1.0204,
|
|
"step": 3264
|
|
},
|
|
{
|
|
"epoch": 0.8736954776558737,
|
|
"grad_norm": 3.5363354682922363,
|
|
"learning_rate": 9.580828952030392e-06,
|
|
"loss": 1.2343,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 0.8739630719828739,
|
|
"grad_norm": 3.0592634677886963,
|
|
"learning_rate": 9.580474190484306e-06,
|
|
"loss": 1.0132,
|
|
"step": 3266
|
|
},
|
|
{
|
|
"epoch": 0.8742306663098742,
|
|
"grad_norm": 3.0633037090301514,
|
|
"learning_rate": 9.580119285450917e-06,
|
|
"loss": 1.081,
|
|
"step": 3267
|
|
},
|
|
{
|
|
"epoch": 0.8744982606368745,
|
|
"grad_norm": 3.3034725189208984,
|
|
"learning_rate": 9.579764236941345e-06,
|
|
"loss": 1.1423,
|
|
"step": 3268
|
|
},
|
|
{
|
|
"epoch": 0.8747658549638747,
|
|
"grad_norm": 3.3148138523101807,
|
|
"learning_rate": 9.57940904496671e-06,
|
|
"loss": 1.1116,
|
|
"step": 3269
|
|
},
|
|
{
|
|
"epoch": 0.875033449290875,
|
|
"grad_norm": 3.3860421180725098,
|
|
"learning_rate": 9.57905370953814e-06,
|
|
"loss": 1.0291,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.8753010436178753,
|
|
"grad_norm": 3.3635714054107666,
|
|
"learning_rate": 9.578698230666767e-06,
|
|
"loss": 1.117,
|
|
"step": 3271
|
|
},
|
|
{
|
|
"epoch": 0.8755686379448756,
|
|
"grad_norm": 3.6900482177734375,
|
|
"learning_rate": 9.578342608363723e-06,
|
|
"loss": 1.1655,
|
|
"step": 3272
|
|
},
|
|
{
|
|
"epoch": 0.8758362322718758,
|
|
"grad_norm": 3.450373411178589,
|
|
"learning_rate": 9.577986842640152e-06,
|
|
"loss": 1.1539,
|
|
"step": 3273
|
|
},
|
|
{
|
|
"epoch": 0.8761038265988761,
|
|
"grad_norm": 3.40311598777771,
|
|
"learning_rate": 9.577630933507196e-06,
|
|
"loss": 1.1297,
|
|
"step": 3274
|
|
},
|
|
{
|
|
"epoch": 0.8763714209258764,
|
|
"grad_norm": 3.1564464569091797,
|
|
"learning_rate": 9.577274880976007e-06,
|
|
"loss": 1.0134,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 0.8766390152528767,
|
|
"grad_norm": 4.080739498138428,
|
|
"learning_rate": 9.576918685057736e-06,
|
|
"loss": 1.0344,
|
|
"step": 3276
|
|
},
|
|
{
|
|
"epoch": 0.8769066095798769,
|
|
"grad_norm": 3.6657655239105225,
|
|
"learning_rate": 9.576562345763542e-06,
|
|
"loss": 1.1657,
|
|
"step": 3277
|
|
},
|
|
{
|
|
"epoch": 0.8771742039068772,
|
|
"grad_norm": 3.4710497856140137,
|
|
"learning_rate": 9.576205863104588e-06,
|
|
"loss": 1.2043,
|
|
"step": 3278
|
|
},
|
|
{
|
|
"epoch": 0.8774417982338775,
|
|
"grad_norm": 3.297534704208374,
|
|
"learning_rate": 9.575849237092042e-06,
|
|
"loss": 1.0513,
|
|
"step": 3279
|
|
},
|
|
{
|
|
"epoch": 0.8777093925608777,
|
|
"grad_norm": 3.476609230041504,
|
|
"learning_rate": 9.575492467737074e-06,
|
|
"loss": 1.1804,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.877976986887878,
|
|
"grad_norm": 3.289842367172241,
|
|
"learning_rate": 9.575135555050861e-06,
|
|
"loss": 1.1133,
|
|
"step": 3281
|
|
},
|
|
{
|
|
"epoch": 0.8782445812148783,
|
|
"grad_norm": 3.235844135284424,
|
|
"learning_rate": 9.574778499044582e-06,
|
|
"loss": 1.0974,
|
|
"step": 3282
|
|
},
|
|
{
|
|
"epoch": 0.8785121755418785,
|
|
"grad_norm": 3.5740182399749756,
|
|
"learning_rate": 9.574421299729424e-06,
|
|
"loss": 1.2028,
|
|
"step": 3283
|
|
},
|
|
{
|
|
"epoch": 0.8787797698688787,
|
|
"grad_norm": 3.4164657592773438,
|
|
"learning_rate": 9.574063957116575e-06,
|
|
"loss": 1.1063,
|
|
"step": 3284
|
|
},
|
|
{
|
|
"epoch": 0.879047364195879,
|
|
"grad_norm": 3.4936609268188477,
|
|
"learning_rate": 9.573706471217232e-06,
|
|
"loss": 1.1992,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 0.8793149585228793,
|
|
"grad_norm": 3.449378252029419,
|
|
"learning_rate": 9.573348842042592e-06,
|
|
"loss": 1.1717,
|
|
"step": 3286
|
|
},
|
|
{
|
|
"epoch": 0.8795825528498796,
|
|
"grad_norm": 3.8629961013793945,
|
|
"learning_rate": 9.572991069603853e-06,
|
|
"loss": 1.2591,
|
|
"step": 3287
|
|
},
|
|
{
|
|
"epoch": 0.8798501471768798,
|
|
"grad_norm": 3.312222957611084,
|
|
"learning_rate": 9.572633153912232e-06,
|
|
"loss": 1.0761,
|
|
"step": 3288
|
|
},
|
|
{
|
|
"epoch": 0.8801177415038801,
|
|
"grad_norm": 3.432467460632324,
|
|
"learning_rate": 9.572275094978934e-06,
|
|
"loss": 1.211,
|
|
"step": 3289
|
|
},
|
|
{
|
|
"epoch": 0.8803853358308804,
|
|
"grad_norm": 3.3937036991119385,
|
|
"learning_rate": 9.571916892815179e-06,
|
|
"loss": 1.2013,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.8806529301578806,
|
|
"grad_norm": 3.1374263763427734,
|
|
"learning_rate": 9.571558547432185e-06,
|
|
"loss": 1.1642,
|
|
"step": 3291
|
|
},
|
|
{
|
|
"epoch": 0.8809205244848809,
|
|
"grad_norm": 3.3817193508148193,
|
|
"learning_rate": 9.57120005884118e-06,
|
|
"loss": 1.0251,
|
|
"step": 3292
|
|
},
|
|
{
|
|
"epoch": 0.8811881188118812,
|
|
"grad_norm": 3.5183558464050293,
|
|
"learning_rate": 9.570841427053394e-06,
|
|
"loss": 1.103,
|
|
"step": 3293
|
|
},
|
|
{
|
|
"epoch": 0.8814557131388815,
|
|
"grad_norm": 3.377310037612915,
|
|
"learning_rate": 9.57048265208006e-06,
|
|
"loss": 1.0861,
|
|
"step": 3294
|
|
},
|
|
{
|
|
"epoch": 0.8817233074658817,
|
|
"grad_norm": 3.5976264476776123,
|
|
"learning_rate": 9.570123733932415e-06,
|
|
"loss": 1.2058,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 0.881990901792882,
|
|
"grad_norm": 3.367487668991089,
|
|
"learning_rate": 9.569764672621707e-06,
|
|
"loss": 1.042,
|
|
"step": 3296
|
|
},
|
|
{
|
|
"epoch": 0.8822584961198823,
|
|
"grad_norm": 3.392971992492676,
|
|
"learning_rate": 9.569405468159183e-06,
|
|
"loss": 1.1187,
|
|
"step": 3297
|
|
},
|
|
{
|
|
"epoch": 0.8825260904468826,
|
|
"grad_norm": 3.862159013748169,
|
|
"learning_rate": 9.569046120556092e-06,
|
|
"loss": 1.2342,
|
|
"step": 3298
|
|
},
|
|
{
|
|
"epoch": 0.8827936847738828,
|
|
"grad_norm": 2.9790005683898926,
|
|
"learning_rate": 9.568686629823693e-06,
|
|
"loss": 1.0197,
|
|
"step": 3299
|
|
},
|
|
{
|
|
"epoch": 0.8830612791008831,
|
|
"grad_norm": 3.4555444717407227,
|
|
"learning_rate": 9.56832699597325e-06,
|
|
"loss": 1.1651,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.8833288734278834,
|
|
"grad_norm": 4.1415557861328125,
|
|
"learning_rate": 9.567967219016024e-06,
|
|
"loss": 1.2982,
|
|
"step": 3301
|
|
},
|
|
{
|
|
"epoch": 0.8835964677548835,
|
|
"grad_norm": 3.344966173171997,
|
|
"learning_rate": 9.567607298963288e-06,
|
|
"loss": 1.1131,
|
|
"step": 3302
|
|
},
|
|
{
|
|
"epoch": 0.8838640620818838,
|
|
"grad_norm": 3.7621214389801025,
|
|
"learning_rate": 9.567247235826316e-06,
|
|
"loss": 1.1731,
|
|
"step": 3303
|
|
},
|
|
{
|
|
"epoch": 0.8841316564088841,
|
|
"grad_norm": 3.282111167907715,
|
|
"learning_rate": 9.56688702961639e-06,
|
|
"loss": 1.1508,
|
|
"step": 3304
|
|
},
|
|
{
|
|
"epoch": 0.8843992507358844,
|
|
"grad_norm": 3.501091957092285,
|
|
"learning_rate": 9.566526680344788e-06,
|
|
"loss": 1.1034,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 0.8846668450628846,
|
|
"grad_norm": 3.3922479152679443,
|
|
"learning_rate": 9.566166188022804e-06,
|
|
"loss": 1.054,
|
|
"step": 3306
|
|
},
|
|
{
|
|
"epoch": 0.8849344393898849,
|
|
"grad_norm": 3.4459426403045654,
|
|
"learning_rate": 9.565805552661728e-06,
|
|
"loss": 1.2095,
|
|
"step": 3307
|
|
},
|
|
{
|
|
"epoch": 0.8852020337168852,
|
|
"grad_norm": 3.219888210296631,
|
|
"learning_rate": 9.565444774272858e-06,
|
|
"loss": 1.106,
|
|
"step": 3308
|
|
},
|
|
{
|
|
"epoch": 0.8854696280438855,
|
|
"grad_norm": 3.378981113433838,
|
|
"learning_rate": 9.565083852867494e-06,
|
|
"loss": 1.1935,
|
|
"step": 3309
|
|
},
|
|
{
|
|
"epoch": 0.8857372223708857,
|
|
"grad_norm": 3.991638660430908,
|
|
"learning_rate": 9.564722788456943e-06,
|
|
"loss": 1.4006,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.886004816697886,
|
|
"grad_norm": 3.2675182819366455,
|
|
"learning_rate": 9.564361581052519e-06,
|
|
"loss": 1.109,
|
|
"step": 3311
|
|
},
|
|
{
|
|
"epoch": 0.8862724110248863,
|
|
"grad_norm": 3.029271125793457,
|
|
"learning_rate": 9.564000230665534e-06,
|
|
"loss": 1.0853,
|
|
"step": 3312
|
|
},
|
|
{
|
|
"epoch": 0.8865400053518866,
|
|
"grad_norm": 3.339115619659424,
|
|
"learning_rate": 9.563638737307307e-06,
|
|
"loss": 1.1887,
|
|
"step": 3313
|
|
},
|
|
{
|
|
"epoch": 0.8868075996788868,
|
|
"grad_norm": 3.1957242488861084,
|
|
"learning_rate": 9.56327710098916e-06,
|
|
"loss": 1.1597,
|
|
"step": 3314
|
|
},
|
|
{
|
|
"epoch": 0.8870751940058871,
|
|
"grad_norm": 3.588334798812866,
|
|
"learning_rate": 9.562915321722428e-06,
|
|
"loss": 1.1607,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 0.8873427883328874,
|
|
"grad_norm": 3.3101933002471924,
|
|
"learning_rate": 9.56255339951844e-06,
|
|
"loss": 1.1174,
|
|
"step": 3316
|
|
},
|
|
{
|
|
"epoch": 0.8876103826598876,
|
|
"grad_norm": 3.237942934036255,
|
|
"learning_rate": 9.562191334388535e-06,
|
|
"loss": 1.1496,
|
|
"step": 3317
|
|
},
|
|
{
|
|
"epoch": 0.8878779769868879,
|
|
"grad_norm": 3.141970634460449,
|
|
"learning_rate": 9.561829126344053e-06,
|
|
"loss": 1.0682,
|
|
"step": 3318
|
|
},
|
|
{
|
|
"epoch": 0.8881455713138882,
|
|
"grad_norm": 3.4344382286071777,
|
|
"learning_rate": 9.561466775396342e-06,
|
|
"loss": 1.0629,
|
|
"step": 3319
|
|
},
|
|
{
|
|
"epoch": 0.8884131656408885,
|
|
"grad_norm": 3.219492197036743,
|
|
"learning_rate": 9.561104281556752e-06,
|
|
"loss": 1.167,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.8886807599678886,
|
|
"grad_norm": 3.2902910709381104,
|
|
"learning_rate": 9.56074164483664e-06,
|
|
"loss": 1.0776,
|
|
"step": 3321
|
|
},
|
|
{
|
|
"epoch": 0.8889483542948889,
|
|
"grad_norm": 3.4856672286987305,
|
|
"learning_rate": 9.560378865247363e-06,
|
|
"loss": 1.2053,
|
|
"step": 3322
|
|
},
|
|
{
|
|
"epoch": 0.8892159486218892,
|
|
"grad_norm": 3.6184751987457275,
|
|
"learning_rate": 9.560015942800289e-06,
|
|
"loss": 1.1847,
|
|
"step": 3323
|
|
},
|
|
{
|
|
"epoch": 0.8894835429488895,
|
|
"grad_norm": 3.2977712154388428,
|
|
"learning_rate": 9.559652877506785e-06,
|
|
"loss": 1.1069,
|
|
"step": 3324
|
|
},
|
|
{
|
|
"epoch": 0.8897511372758897,
|
|
"grad_norm": 3.0380518436431885,
|
|
"learning_rate": 9.559289669378224e-06,
|
|
"loss": 1.0513,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 0.89001873160289,
|
|
"grad_norm": 3.521289348602295,
|
|
"learning_rate": 9.558926318425986e-06,
|
|
"loss": 1.0462,
|
|
"step": 3326
|
|
},
|
|
{
|
|
"epoch": 0.8902863259298903,
|
|
"grad_norm": 2.921584129333496,
|
|
"learning_rate": 9.558562824661448e-06,
|
|
"loss": 0.9953,
|
|
"step": 3327
|
|
},
|
|
{
|
|
"epoch": 0.8905539202568905,
|
|
"grad_norm": 3.5694329738616943,
|
|
"learning_rate": 9.558199188096004e-06,
|
|
"loss": 1.231,
|
|
"step": 3328
|
|
},
|
|
{
|
|
"epoch": 0.8908215145838908,
|
|
"grad_norm": 3.6749255657196045,
|
|
"learning_rate": 9.557835408741039e-06,
|
|
"loss": 1.1523,
|
|
"step": 3329
|
|
},
|
|
{
|
|
"epoch": 0.8910891089108911,
|
|
"grad_norm": 3.4388997554779053,
|
|
"learning_rate": 9.557471486607952e-06,
|
|
"loss": 1.1669,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.8913567032378914,
|
|
"grad_norm": 3.3783130645751953,
|
|
"learning_rate": 9.557107421708142e-06,
|
|
"loss": 1.0796,
|
|
"step": 3331
|
|
},
|
|
{
|
|
"epoch": 0.8916242975648916,
|
|
"grad_norm": 3.5907704830169678,
|
|
"learning_rate": 9.556743214053017e-06,
|
|
"loss": 1.1456,
|
|
"step": 3332
|
|
},
|
|
{
|
|
"epoch": 0.8918918918918919,
|
|
"grad_norm": 3.543071746826172,
|
|
"learning_rate": 9.55637886365398e-06,
|
|
"loss": 1.2526,
|
|
"step": 3333
|
|
},
|
|
{
|
|
"epoch": 0.8921594862188922,
|
|
"grad_norm": 3.632092237472534,
|
|
"learning_rate": 9.55601437052245e-06,
|
|
"loss": 1.2962,
|
|
"step": 3334
|
|
},
|
|
{
|
|
"epoch": 0.8924270805458925,
|
|
"grad_norm": 3.31510066986084,
|
|
"learning_rate": 9.55564973466984e-06,
|
|
"loss": 1.138,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 0.8926946748728927,
|
|
"grad_norm": 3.437994956970215,
|
|
"learning_rate": 9.555284956107578e-06,
|
|
"loss": 1.0778,
|
|
"step": 3336
|
|
},
|
|
{
|
|
"epoch": 0.892962269199893,
|
|
"grad_norm": 3.2904369831085205,
|
|
"learning_rate": 9.554920034847088e-06,
|
|
"loss": 1.1556,
|
|
"step": 3337
|
|
},
|
|
{
|
|
"epoch": 0.8932298635268933,
|
|
"grad_norm": 3.6388745307922363,
|
|
"learning_rate": 9.5545549708998e-06,
|
|
"loss": 1.1318,
|
|
"step": 3338
|
|
},
|
|
{
|
|
"epoch": 0.8934974578538935,
|
|
"grad_norm": 3.85868239402771,
|
|
"learning_rate": 9.554189764277155e-06,
|
|
"loss": 1.26,
|
|
"step": 3339
|
|
},
|
|
{
|
|
"epoch": 0.8937650521808937,
|
|
"grad_norm": 3.250420570373535,
|
|
"learning_rate": 9.553824414990588e-06,
|
|
"loss": 1.0647,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.894032646507894,
|
|
"grad_norm": 3.053664445877075,
|
|
"learning_rate": 9.553458923051546e-06,
|
|
"loss": 1.0382,
|
|
"step": 3341
|
|
},
|
|
{
|
|
"epoch": 0.8943002408348943,
|
|
"grad_norm": 3.8642590045928955,
|
|
"learning_rate": 9.553093288471479e-06,
|
|
"loss": 1.1887,
|
|
"step": 3342
|
|
},
|
|
{
|
|
"epoch": 0.8945678351618945,
|
|
"grad_norm": 3.3516335487365723,
|
|
"learning_rate": 9.552727511261841e-06,
|
|
"loss": 1.1131,
|
|
"step": 3343
|
|
},
|
|
{
|
|
"epoch": 0.8948354294888948,
|
|
"grad_norm": 2.911613702774048,
|
|
"learning_rate": 9.55236159143409e-06,
|
|
"loss": 1.0118,
|
|
"step": 3344
|
|
},
|
|
{
|
|
"epoch": 0.8951030238158951,
|
|
"grad_norm": 3.028801441192627,
|
|
"learning_rate": 9.551995528999686e-06,
|
|
"loss": 1.0264,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 0.8953706181428954,
|
|
"grad_norm": 3.528012752532959,
|
|
"learning_rate": 9.5516293239701e-06,
|
|
"loss": 1.1946,
|
|
"step": 3346
|
|
},
|
|
{
|
|
"epoch": 0.8956382124698956,
|
|
"grad_norm": 2.96004581451416,
|
|
"learning_rate": 9.551262976356801e-06,
|
|
"loss": 0.9409,
|
|
"step": 3347
|
|
},
|
|
{
|
|
"epoch": 0.8959058067968959,
|
|
"grad_norm": 3.013521194458008,
|
|
"learning_rate": 9.550896486171268e-06,
|
|
"loss": 1.0383,
|
|
"step": 3348
|
|
},
|
|
{
|
|
"epoch": 0.8961734011238962,
|
|
"grad_norm": 3.353602409362793,
|
|
"learning_rate": 9.550529853424979e-06,
|
|
"loss": 1.0802,
|
|
"step": 3349
|
|
},
|
|
{
|
|
"epoch": 0.8964409954508964,
|
|
"grad_norm": 3.60223126411438,
|
|
"learning_rate": 9.55016307812942e-06,
|
|
"loss": 1.1218,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.8967085897778967,
|
|
"grad_norm": 3.689014434814453,
|
|
"learning_rate": 9.549796160296081e-06,
|
|
"loss": 1.2695,
|
|
"step": 3351
|
|
},
|
|
{
|
|
"epoch": 0.896976184104897,
|
|
"grad_norm": 2.9822311401367188,
|
|
"learning_rate": 9.549429099936455e-06,
|
|
"loss": 0.9881,
|
|
"step": 3352
|
|
},
|
|
{
|
|
"epoch": 0.8972437784318973,
|
|
"grad_norm": 3.03279447555542,
|
|
"learning_rate": 9.549061897062043e-06,
|
|
"loss": 1.0138,
|
|
"step": 3353
|
|
},
|
|
{
|
|
"epoch": 0.8975113727588975,
|
|
"grad_norm": 3.1987500190734863,
|
|
"learning_rate": 9.548694551684345e-06,
|
|
"loss": 1.0596,
|
|
"step": 3354
|
|
},
|
|
{
|
|
"epoch": 0.8977789670858978,
|
|
"grad_norm": 3.4811275005340576,
|
|
"learning_rate": 9.548327063814871e-06,
|
|
"loss": 1.1956,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 0.8980465614128981,
|
|
"grad_norm": 3.577713966369629,
|
|
"learning_rate": 9.547959433465128e-06,
|
|
"loss": 1.1442,
|
|
"step": 3356
|
|
},
|
|
{
|
|
"epoch": 0.8983141557398984,
|
|
"grad_norm": 3.459491014480591,
|
|
"learning_rate": 9.547591660646637e-06,
|
|
"loss": 1.3063,
|
|
"step": 3357
|
|
},
|
|
{
|
|
"epoch": 0.8985817500668986,
|
|
"grad_norm": 3.2407350540161133,
|
|
"learning_rate": 9.54722374537092e-06,
|
|
"loss": 1.0682,
|
|
"step": 3358
|
|
},
|
|
{
|
|
"epoch": 0.8988493443938989,
|
|
"grad_norm": 3.129257917404175,
|
|
"learning_rate": 9.546855687649497e-06,
|
|
"loss": 1.0516,
|
|
"step": 3359
|
|
},
|
|
{
|
|
"epoch": 0.8991169387208992,
|
|
"grad_norm": 3.4182918071746826,
|
|
"learning_rate": 9.5464874874939e-06,
|
|
"loss": 1.1032,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.8993845330478993,
|
|
"grad_norm": 3.995587110519409,
|
|
"learning_rate": 9.546119144915667e-06,
|
|
"loss": 1.1301,
|
|
"step": 3361
|
|
},
|
|
{
|
|
"epoch": 0.8996521273748996,
|
|
"grad_norm": 3.1512610912323,
|
|
"learning_rate": 9.545750659926331e-06,
|
|
"loss": 1.0217,
|
|
"step": 3362
|
|
},
|
|
{
|
|
"epoch": 0.8999197217018999,
|
|
"grad_norm": 3.4359290599823,
|
|
"learning_rate": 9.545382032537438e-06,
|
|
"loss": 1.2411,
|
|
"step": 3363
|
|
},
|
|
{
|
|
"epoch": 0.9001873160289002,
|
|
"grad_norm": 3.2754461765289307,
|
|
"learning_rate": 9.545013262760535e-06,
|
|
"loss": 1.0496,
|
|
"step": 3364
|
|
},
|
|
{
|
|
"epoch": 0.9004549103559004,
|
|
"grad_norm": 3.3657703399658203,
|
|
"learning_rate": 9.544644350607173e-06,
|
|
"loss": 1.0734,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 0.9007225046829007,
|
|
"grad_norm": 3.1346230506896973,
|
|
"learning_rate": 9.54427529608891e-06,
|
|
"loss": 1.1506,
|
|
"step": 3366
|
|
},
|
|
{
|
|
"epoch": 0.900990099009901,
|
|
"grad_norm": 3.4281997680664062,
|
|
"learning_rate": 9.543906099217308e-06,
|
|
"loss": 1.0849,
|
|
"step": 3367
|
|
},
|
|
{
|
|
"epoch": 0.9012576933369013,
|
|
"grad_norm": 3.335317850112915,
|
|
"learning_rate": 9.543536760003928e-06,
|
|
"loss": 1.1822,
|
|
"step": 3368
|
|
},
|
|
{
|
|
"epoch": 0.9015252876639015,
|
|
"grad_norm": 3.0482382774353027,
|
|
"learning_rate": 9.543167278460345e-06,
|
|
"loss": 1.1431,
|
|
"step": 3369
|
|
},
|
|
{
|
|
"epoch": 0.9017928819909018,
|
|
"grad_norm": 3.4252700805664062,
|
|
"learning_rate": 9.54279765459813e-06,
|
|
"loss": 1.0517,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.9020604763179021,
|
|
"grad_norm": 3.5490097999572754,
|
|
"learning_rate": 9.542427888428864e-06,
|
|
"loss": 1.1277,
|
|
"step": 3371
|
|
},
|
|
{
|
|
"epoch": 0.9023280706449023,
|
|
"grad_norm": 3.5822768211364746,
|
|
"learning_rate": 9.54205797996413e-06,
|
|
"loss": 1.0832,
|
|
"step": 3372
|
|
},
|
|
{
|
|
"epoch": 0.9025956649719026,
|
|
"grad_norm": 3.2864580154418945,
|
|
"learning_rate": 9.541687929215512e-06,
|
|
"loss": 1.0394,
|
|
"step": 3373
|
|
},
|
|
{
|
|
"epoch": 0.9028632592989029,
|
|
"grad_norm": 3.281869888305664,
|
|
"learning_rate": 9.541317736194608e-06,
|
|
"loss": 1.1343,
|
|
"step": 3374
|
|
},
|
|
{
|
|
"epoch": 0.9031308536259032,
|
|
"grad_norm": 3.3536150455474854,
|
|
"learning_rate": 9.54094740091301e-06,
|
|
"loss": 1.1296,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 0.9033984479529034,
|
|
"grad_norm": 3.730140447616577,
|
|
"learning_rate": 9.54057692338232e-06,
|
|
"loss": 1.1234,
|
|
"step": 3376
|
|
},
|
|
{
|
|
"epoch": 0.9036660422799037,
|
|
"grad_norm": 3.273059129714966,
|
|
"learning_rate": 9.540206303614146e-06,
|
|
"loss": 1.0658,
|
|
"step": 3377
|
|
},
|
|
{
|
|
"epoch": 0.903933636606904,
|
|
"grad_norm": 3.179582357406616,
|
|
"learning_rate": 9.539835541620096e-06,
|
|
"loss": 1.0502,
|
|
"step": 3378
|
|
},
|
|
{
|
|
"epoch": 0.9042012309339043,
|
|
"grad_norm": 3.5026938915252686,
|
|
"learning_rate": 9.539464637411782e-06,
|
|
"loss": 1.1883,
|
|
"step": 3379
|
|
},
|
|
{
|
|
"epoch": 0.9044688252609044,
|
|
"grad_norm": 3.0967392921447754,
|
|
"learning_rate": 9.539093591000828e-06,
|
|
"loss": 1.169,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.9047364195879047,
|
|
"grad_norm": 3.457193613052368,
|
|
"learning_rate": 9.538722402398854e-06,
|
|
"loss": 1.1136,
|
|
"step": 3381
|
|
},
|
|
{
|
|
"epoch": 0.905004013914905,
|
|
"grad_norm": 3.4893715381622314,
|
|
"learning_rate": 9.538351071617489e-06,
|
|
"loss": 1.1458,
|
|
"step": 3382
|
|
},
|
|
{
|
|
"epoch": 0.9052716082419052,
|
|
"grad_norm": 3.3818037509918213,
|
|
"learning_rate": 9.537979598668364e-06,
|
|
"loss": 1.1278,
|
|
"step": 3383
|
|
},
|
|
{
|
|
"epoch": 0.9055392025689055,
|
|
"grad_norm": 3.309565782546997,
|
|
"learning_rate": 9.537607983563117e-06,
|
|
"loss": 1.1216,
|
|
"step": 3384
|
|
},
|
|
{
|
|
"epoch": 0.9058067968959058,
|
|
"grad_norm": 3.428215980529785,
|
|
"learning_rate": 9.53723622631339e-06,
|
|
"loss": 1.1297,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 0.9060743912229061,
|
|
"grad_norm": 3.088609457015991,
|
|
"learning_rate": 9.536864326930826e-06,
|
|
"loss": 1.0726,
|
|
"step": 3386
|
|
},
|
|
{
|
|
"epoch": 0.9063419855499063,
|
|
"grad_norm": 3.1660032272338867,
|
|
"learning_rate": 9.536492285427077e-06,
|
|
"loss": 1.1357,
|
|
"step": 3387
|
|
},
|
|
{
|
|
"epoch": 0.9066095798769066,
|
|
"grad_norm": 3.1965155601501465,
|
|
"learning_rate": 9.536120101813797e-06,
|
|
"loss": 1.0836,
|
|
"step": 3388
|
|
},
|
|
{
|
|
"epoch": 0.9068771742039069,
|
|
"grad_norm": 3.284010410308838,
|
|
"learning_rate": 9.535747776102645e-06,
|
|
"loss": 1.1047,
|
|
"step": 3389
|
|
},
|
|
{
|
|
"epoch": 0.9071447685309072,
|
|
"grad_norm": 3.5011777877807617,
|
|
"learning_rate": 9.535375308305283e-06,
|
|
"loss": 1.2027,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.9074123628579074,
|
|
"grad_norm": 3.8040499687194824,
|
|
"learning_rate": 9.535002698433383e-06,
|
|
"loss": 1.1663,
|
|
"step": 3391
|
|
},
|
|
{
|
|
"epoch": 0.9076799571849077,
|
|
"grad_norm": 3.3377859592437744,
|
|
"learning_rate": 9.534629946498613e-06,
|
|
"loss": 1.2642,
|
|
"step": 3392
|
|
},
|
|
{
|
|
"epoch": 0.907947551511908,
|
|
"grad_norm": 3.4258129596710205,
|
|
"learning_rate": 9.534257052512651e-06,
|
|
"loss": 1.1166,
|
|
"step": 3393
|
|
},
|
|
{
|
|
"epoch": 0.9082151458389082,
|
|
"grad_norm": 3.600273847579956,
|
|
"learning_rate": 9.533884016487181e-06,
|
|
"loss": 1.0948,
|
|
"step": 3394
|
|
},
|
|
{
|
|
"epoch": 0.9084827401659085,
|
|
"grad_norm": 3.1784534454345703,
|
|
"learning_rate": 9.533510838433884e-06,
|
|
"loss": 1.0578,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 0.9087503344929088,
|
|
"grad_norm": 3.3856201171875,
|
|
"learning_rate": 9.533137518364453e-06,
|
|
"loss": 1.1899,
|
|
"step": 3396
|
|
},
|
|
{
|
|
"epoch": 0.9090179288199091,
|
|
"grad_norm": 3.489384412765503,
|
|
"learning_rate": 9.532764056290582e-06,
|
|
"loss": 1.0646,
|
|
"step": 3397
|
|
},
|
|
{
|
|
"epoch": 0.9092855231469092,
|
|
"grad_norm": 3.3582942485809326,
|
|
"learning_rate": 9.53239045222397e-06,
|
|
"loss": 1.1614,
|
|
"step": 3398
|
|
},
|
|
{
|
|
"epoch": 0.9095531174739095,
|
|
"grad_norm": 3.891071319580078,
|
|
"learning_rate": 9.53201670617632e-06,
|
|
"loss": 1.2788,
|
|
"step": 3399
|
|
},
|
|
{
|
|
"epoch": 0.9098207118009098,
|
|
"grad_norm": 4.0248026847839355,
|
|
"learning_rate": 9.531642818159341e-06,
|
|
"loss": 1.2048,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.9100883061279101,
|
|
"grad_norm": 3.6524062156677246,
|
|
"learning_rate": 9.531268788184744e-06,
|
|
"loss": 1.3362,
|
|
"step": 3401
|
|
},
|
|
{
|
|
"epoch": 0.9103559004549103,
|
|
"grad_norm": 3.208773374557495,
|
|
"learning_rate": 9.530894616264248e-06,
|
|
"loss": 1.0624,
|
|
"step": 3402
|
|
},
|
|
{
|
|
"epoch": 0.9106234947819106,
|
|
"grad_norm": 3.280280351638794,
|
|
"learning_rate": 9.530520302409572e-06,
|
|
"loss": 1.138,
|
|
"step": 3403
|
|
},
|
|
{
|
|
"epoch": 0.9108910891089109,
|
|
"grad_norm": 3.034221887588501,
|
|
"learning_rate": 9.530145846632441e-06,
|
|
"loss": 1.1124,
|
|
"step": 3404
|
|
},
|
|
{
|
|
"epoch": 0.9111586834359111,
|
|
"grad_norm": 3.4571194648742676,
|
|
"learning_rate": 9.52977124894459e-06,
|
|
"loss": 1.1556,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 0.9114262777629114,
|
|
"grad_norm": 3.2355082035064697,
|
|
"learning_rate": 9.529396509357748e-06,
|
|
"loss": 1.1784,
|
|
"step": 3406
|
|
},
|
|
{
|
|
"epoch": 0.9116938720899117,
|
|
"grad_norm": 3.521646499633789,
|
|
"learning_rate": 9.529021627883657e-06,
|
|
"loss": 1.0774,
|
|
"step": 3407
|
|
},
|
|
{
|
|
"epoch": 0.911961466416912,
|
|
"grad_norm": 3.313163995742798,
|
|
"learning_rate": 9.528646604534058e-06,
|
|
"loss": 1.035,
|
|
"step": 3408
|
|
},
|
|
{
|
|
"epoch": 0.9122290607439122,
|
|
"grad_norm": 3.7362465858459473,
|
|
"learning_rate": 9.528271439320703e-06,
|
|
"loss": 1.2586,
|
|
"step": 3409
|
|
},
|
|
{
|
|
"epoch": 0.9124966550709125,
|
|
"grad_norm": 3.2787117958068848,
|
|
"learning_rate": 9.527896132255341e-06,
|
|
"loss": 1.1981,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.9127642493979128,
|
|
"grad_norm": 3.5333304405212402,
|
|
"learning_rate": 9.52752068334973e-06,
|
|
"loss": 1.1986,
|
|
"step": 3411
|
|
},
|
|
{
|
|
"epoch": 0.9130318437249131,
|
|
"grad_norm": 3.5400190353393555,
|
|
"learning_rate": 9.527145092615631e-06,
|
|
"loss": 1.1002,
|
|
"step": 3412
|
|
},
|
|
{
|
|
"epoch": 0.9132994380519133,
|
|
"grad_norm": 3.786625862121582,
|
|
"learning_rate": 9.526769360064812e-06,
|
|
"loss": 1.1783,
|
|
"step": 3413
|
|
},
|
|
{
|
|
"epoch": 0.9135670323789136,
|
|
"grad_norm": 3.3080086708068848,
|
|
"learning_rate": 9.526393485709038e-06,
|
|
"loss": 1.0919,
|
|
"step": 3414
|
|
},
|
|
{
|
|
"epoch": 0.9138346267059139,
|
|
"grad_norm": 3.204632043838501,
|
|
"learning_rate": 9.526017469560088e-06,
|
|
"loss": 1.0737,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 0.914102221032914,
|
|
"grad_norm": 3.2012712955474854,
|
|
"learning_rate": 9.52564131162974e-06,
|
|
"loss": 1.1405,
|
|
"step": 3416
|
|
},
|
|
{
|
|
"epoch": 0.9143698153599144,
|
|
"grad_norm": 3.119194746017456,
|
|
"learning_rate": 9.525265011929776e-06,
|
|
"loss": 1.0067,
|
|
"step": 3417
|
|
},
|
|
{
|
|
"epoch": 0.9146374096869146,
|
|
"grad_norm": 3.2325518131256104,
|
|
"learning_rate": 9.524888570471987e-06,
|
|
"loss": 1.2162,
|
|
"step": 3418
|
|
},
|
|
{
|
|
"epoch": 0.914905004013915,
|
|
"grad_norm": 3.490710973739624,
|
|
"learning_rate": 9.524511987268161e-06,
|
|
"loss": 1.0244,
|
|
"step": 3419
|
|
},
|
|
{
|
|
"epoch": 0.9151725983409151,
|
|
"grad_norm": 3.0969996452331543,
|
|
"learning_rate": 9.524135262330098e-06,
|
|
"loss": 1.1009,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.9154401926679154,
|
|
"grad_norm": 3.454273223876953,
|
|
"learning_rate": 9.523758395669598e-06,
|
|
"loss": 1.1822,
|
|
"step": 3421
|
|
},
|
|
{
|
|
"epoch": 0.9157077869949157,
|
|
"grad_norm": 3.2115368843078613,
|
|
"learning_rate": 9.523381387298469e-06,
|
|
"loss": 1.0328,
|
|
"step": 3422
|
|
},
|
|
{
|
|
"epoch": 0.915975381321916,
|
|
"grad_norm": 3.3276708126068115,
|
|
"learning_rate": 9.523004237228517e-06,
|
|
"loss": 1.1133,
|
|
"step": 3423
|
|
},
|
|
{
|
|
"epoch": 0.9162429756489162,
|
|
"grad_norm": 3.601778507232666,
|
|
"learning_rate": 9.522626945471561e-06,
|
|
"loss": 1.2206,
|
|
"step": 3424
|
|
},
|
|
{
|
|
"epoch": 0.9165105699759165,
|
|
"grad_norm": 3.281283378601074,
|
|
"learning_rate": 9.522249512039417e-06,
|
|
"loss": 1.09,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 0.9167781643029168,
|
|
"grad_norm": 3.4089417457580566,
|
|
"learning_rate": 9.521871936943907e-06,
|
|
"loss": 1.2478,
|
|
"step": 3426
|
|
},
|
|
{
|
|
"epoch": 0.917045758629917,
|
|
"grad_norm": 3.5590267181396484,
|
|
"learning_rate": 9.521494220196862e-06,
|
|
"loss": 1.1056,
|
|
"step": 3427
|
|
},
|
|
{
|
|
"epoch": 0.9173133529569173,
|
|
"grad_norm": 3.1895391941070557,
|
|
"learning_rate": 9.521116361810115e-06,
|
|
"loss": 1.1022,
|
|
"step": 3428
|
|
},
|
|
{
|
|
"epoch": 0.9175809472839176,
|
|
"grad_norm": 3.6638033390045166,
|
|
"learning_rate": 9.5207383617955e-06,
|
|
"loss": 1.3269,
|
|
"step": 3429
|
|
},
|
|
{
|
|
"epoch": 0.9178485416109179,
|
|
"grad_norm": 3.7325849533081055,
|
|
"learning_rate": 9.52036022016486e-06,
|
|
"loss": 1.2461,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.9181161359379181,
|
|
"grad_norm": 3.56632137298584,
|
|
"learning_rate": 9.519981936930038e-06,
|
|
"loss": 1.2025,
|
|
"step": 3431
|
|
},
|
|
{
|
|
"epoch": 0.9183837302649184,
|
|
"grad_norm": 3.4402332305908203,
|
|
"learning_rate": 9.519603512102887e-06,
|
|
"loss": 1.0808,
|
|
"step": 3432
|
|
},
|
|
{
|
|
"epoch": 0.9186513245919187,
|
|
"grad_norm": 3.298569917678833,
|
|
"learning_rate": 9.51922494569526e-06,
|
|
"loss": 1.0316,
|
|
"step": 3433
|
|
},
|
|
{
|
|
"epoch": 0.918918918918919,
|
|
"grad_norm": 3.712266683578491,
|
|
"learning_rate": 9.518846237719018e-06,
|
|
"loss": 1.2548,
|
|
"step": 3434
|
|
},
|
|
{
|
|
"epoch": 0.9191865132459192,
|
|
"grad_norm": 3.270195722579956,
|
|
"learning_rate": 9.51846738818602e-06,
|
|
"loss": 1.1098,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 0.9194541075729195,
|
|
"grad_norm": 3.471754312515259,
|
|
"learning_rate": 9.518088397108138e-06,
|
|
"loss": 1.0728,
|
|
"step": 3436
|
|
},
|
|
{
|
|
"epoch": 0.9197217018999198,
|
|
"grad_norm": 3.256743907928467,
|
|
"learning_rate": 9.517709264497242e-06,
|
|
"loss": 1.1637,
|
|
"step": 3437
|
|
},
|
|
{
|
|
"epoch": 0.9199892962269199,
|
|
"grad_norm": 3.3077757358551025,
|
|
"learning_rate": 9.51732999036521e-06,
|
|
"loss": 1.1242,
|
|
"step": 3438
|
|
},
|
|
{
|
|
"epoch": 0.9202568905539202,
|
|
"grad_norm": 3.492668867111206,
|
|
"learning_rate": 9.516950574723922e-06,
|
|
"loss": 1.0478,
|
|
"step": 3439
|
|
},
|
|
{
|
|
"epoch": 0.9205244848809205,
|
|
"grad_norm": 3.167327880859375,
|
|
"learning_rate": 9.516571017585265e-06,
|
|
"loss": 1.1084,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.9207920792079208,
|
|
"grad_norm": 2.9469475746154785,
|
|
"learning_rate": 9.516191318961126e-06,
|
|
"loss": 1.0578,
|
|
"step": 3441
|
|
},
|
|
{
|
|
"epoch": 0.921059673534921,
|
|
"grad_norm": 3.6381478309631348,
|
|
"learning_rate": 9.515811478863402e-06,
|
|
"loss": 1.2693,
|
|
"step": 3442
|
|
},
|
|
{
|
|
"epoch": 0.9213272678619213,
|
|
"grad_norm": 3.4339804649353027,
|
|
"learning_rate": 9.515431497303992e-06,
|
|
"loss": 1.1058,
|
|
"step": 3443
|
|
},
|
|
{
|
|
"epoch": 0.9215948621889216,
|
|
"grad_norm": 3.2788732051849365,
|
|
"learning_rate": 9.515051374294797e-06,
|
|
"loss": 0.9658,
|
|
"step": 3444
|
|
},
|
|
{
|
|
"epoch": 0.9218624565159219,
|
|
"grad_norm": 3.393667697906494,
|
|
"learning_rate": 9.514671109847727e-06,
|
|
"loss": 1.0991,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 0.9221300508429221,
|
|
"grad_norm": 3.7156498432159424,
|
|
"learning_rate": 9.514290703974694e-06,
|
|
"loss": 1.0418,
|
|
"step": 3446
|
|
},
|
|
{
|
|
"epoch": 0.9223976451699224,
|
|
"grad_norm": 3.149561882019043,
|
|
"learning_rate": 9.513910156687612e-06,
|
|
"loss": 1.1174,
|
|
"step": 3447
|
|
},
|
|
{
|
|
"epoch": 0.9226652394969227,
|
|
"grad_norm": 3.51082181930542,
|
|
"learning_rate": 9.513529467998404e-06,
|
|
"loss": 1.1957,
|
|
"step": 3448
|
|
},
|
|
{
|
|
"epoch": 0.9229328338239229,
|
|
"grad_norm": 3.3628318309783936,
|
|
"learning_rate": 9.513148637918995e-06,
|
|
"loss": 1.0921,
|
|
"step": 3449
|
|
},
|
|
{
|
|
"epoch": 0.9232004281509232,
|
|
"grad_norm": 3.5448429584503174,
|
|
"learning_rate": 9.512767666461316e-06,
|
|
"loss": 1.1887,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.9234680224779235,
|
|
"grad_norm": 3.335571050643921,
|
|
"learning_rate": 9.512386553637298e-06,
|
|
"loss": 1.0374,
|
|
"step": 3451
|
|
},
|
|
{
|
|
"epoch": 0.9237356168049238,
|
|
"grad_norm": 3.2040200233459473,
|
|
"learning_rate": 9.512005299458885e-06,
|
|
"loss": 1.1254,
|
|
"step": 3452
|
|
},
|
|
{
|
|
"epoch": 0.924003211131924,
|
|
"grad_norm": 3.4492249488830566,
|
|
"learning_rate": 9.511623903938015e-06,
|
|
"loss": 1.1536,
|
|
"step": 3453
|
|
},
|
|
{
|
|
"epoch": 0.9242708054589243,
|
|
"grad_norm": 3.422445774078369,
|
|
"learning_rate": 9.511242367086637e-06,
|
|
"loss": 1.2253,
|
|
"step": 3454
|
|
},
|
|
{
|
|
"epoch": 0.9245383997859246,
|
|
"grad_norm": 3.5041046142578125,
|
|
"learning_rate": 9.510860688916704e-06,
|
|
"loss": 1.1549,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 0.9248059941129249,
|
|
"grad_norm": 3.517303705215454,
|
|
"learning_rate": 9.510478869440172e-06,
|
|
"loss": 1.1445,
|
|
"step": 3456
|
|
},
|
|
{
|
|
"epoch": 0.925073588439925,
|
|
"grad_norm": 3.393651247024536,
|
|
"learning_rate": 9.510096908669e-06,
|
|
"loss": 1.2026,
|
|
"step": 3457
|
|
},
|
|
{
|
|
"epoch": 0.9253411827669253,
|
|
"grad_norm": 3.499711751937866,
|
|
"learning_rate": 9.509714806615157e-06,
|
|
"loss": 1.2276,
|
|
"step": 3458
|
|
},
|
|
{
|
|
"epoch": 0.9256087770939256,
|
|
"grad_norm": 3.527127742767334,
|
|
"learning_rate": 9.509332563290611e-06,
|
|
"loss": 1.1068,
|
|
"step": 3459
|
|
},
|
|
{
|
|
"epoch": 0.9258763714209258,
|
|
"grad_norm": 2.9405460357666016,
|
|
"learning_rate": 9.508950178707335e-06,
|
|
"loss": 0.984,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.9261439657479261,
|
|
"grad_norm": 3.161170482635498,
|
|
"learning_rate": 9.508567652877307e-06,
|
|
"loss": 1.0606,
|
|
"step": 3461
|
|
},
|
|
{
|
|
"epoch": 0.9264115600749264,
|
|
"grad_norm": 2.99855375289917,
|
|
"learning_rate": 9.508184985812514e-06,
|
|
"loss": 0.9767,
|
|
"step": 3462
|
|
},
|
|
{
|
|
"epoch": 0.9266791544019267,
|
|
"grad_norm": 3.1051480770111084,
|
|
"learning_rate": 9.507802177524937e-06,
|
|
"loss": 1.0899,
|
|
"step": 3463
|
|
},
|
|
{
|
|
"epoch": 0.9269467487289269,
|
|
"grad_norm": 3.3303263187408447,
|
|
"learning_rate": 9.507419228026574e-06,
|
|
"loss": 1.2223,
|
|
"step": 3464
|
|
},
|
|
{
|
|
"epoch": 0.9272143430559272,
|
|
"grad_norm": 3.6255226135253906,
|
|
"learning_rate": 9.507036137329417e-06,
|
|
"loss": 1.2277,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 0.9274819373829275,
|
|
"grad_norm": 3.4016709327697754,
|
|
"learning_rate": 9.50665290544547e-06,
|
|
"loss": 1.0828,
|
|
"step": 3466
|
|
},
|
|
{
|
|
"epoch": 0.9277495317099278,
|
|
"grad_norm": 3.1820127964019775,
|
|
"learning_rate": 9.506269532386736e-06,
|
|
"loss": 1.0512,
|
|
"step": 3467
|
|
},
|
|
{
|
|
"epoch": 0.928017126036928,
|
|
"grad_norm": 3.8635528087615967,
|
|
"learning_rate": 9.505886018165223e-06,
|
|
"loss": 1.137,
|
|
"step": 3468
|
|
},
|
|
{
|
|
"epoch": 0.9282847203639283,
|
|
"grad_norm": 3.615046977996826,
|
|
"learning_rate": 9.505502362792947e-06,
|
|
"loss": 1.1372,
|
|
"step": 3469
|
|
},
|
|
{
|
|
"epoch": 0.9285523146909286,
|
|
"grad_norm": 3.5528488159179688,
|
|
"learning_rate": 9.505118566281928e-06,
|
|
"loss": 1.145,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.9288199090179288,
|
|
"grad_norm": 3.367374897003174,
|
|
"learning_rate": 9.504734628644186e-06,
|
|
"loss": 1.1556,
|
|
"step": 3471
|
|
},
|
|
{
|
|
"epoch": 0.9290875033449291,
|
|
"grad_norm": 3.4801955223083496,
|
|
"learning_rate": 9.504350549891748e-06,
|
|
"loss": 1.1011,
|
|
"step": 3472
|
|
},
|
|
{
|
|
"epoch": 0.9293550976719294,
|
|
"grad_norm": 3.310755729675293,
|
|
"learning_rate": 9.503966330036646e-06,
|
|
"loss": 1.239,
|
|
"step": 3473
|
|
},
|
|
{
|
|
"epoch": 0.9296226919989297,
|
|
"grad_norm": 3.1718642711639404,
|
|
"learning_rate": 9.50358196909092e-06,
|
|
"loss": 1.1341,
|
|
"step": 3474
|
|
},
|
|
{
|
|
"epoch": 0.9298902863259298,
|
|
"grad_norm": 3.5848066806793213,
|
|
"learning_rate": 9.503197467066604e-06,
|
|
"loss": 1.2817,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 0.9301578806529301,
|
|
"grad_norm": 3.2660577297210693,
|
|
"learning_rate": 9.502812823975746e-06,
|
|
"loss": 1.1082,
|
|
"step": 3476
|
|
},
|
|
{
|
|
"epoch": 0.9304254749799304,
|
|
"grad_norm": 3.4839932918548584,
|
|
"learning_rate": 9.502428039830395e-06,
|
|
"loss": 1.0307,
|
|
"step": 3477
|
|
},
|
|
{
|
|
"epoch": 0.9306930693069307,
|
|
"grad_norm": 3.186553478240967,
|
|
"learning_rate": 9.502043114642607e-06,
|
|
"loss": 0.9545,
|
|
"step": 3478
|
|
},
|
|
{
|
|
"epoch": 0.9309606636339309,
|
|
"grad_norm": 3.0523486137390137,
|
|
"learning_rate": 9.501658048424437e-06,
|
|
"loss": 0.99,
|
|
"step": 3479
|
|
},
|
|
{
|
|
"epoch": 0.9312282579609312,
|
|
"grad_norm": 3.2745773792266846,
|
|
"learning_rate": 9.501272841187949e-06,
|
|
"loss": 1.105,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.9314958522879315,
|
|
"grad_norm": 3.30385422706604,
|
|
"learning_rate": 9.500887492945208e-06,
|
|
"loss": 1.03,
|
|
"step": 3481
|
|
},
|
|
{
|
|
"epoch": 0.9317634466149317,
|
|
"grad_norm": 3.7593932151794434,
|
|
"learning_rate": 9.500502003708287e-06,
|
|
"loss": 1.3537,
|
|
"step": 3482
|
|
},
|
|
{
|
|
"epoch": 0.932031040941932,
|
|
"grad_norm": 3.3091962337493896,
|
|
"learning_rate": 9.500116373489264e-06,
|
|
"loss": 1.1339,
|
|
"step": 3483
|
|
},
|
|
{
|
|
"epoch": 0.9322986352689323,
|
|
"grad_norm": 3.1026055812835693,
|
|
"learning_rate": 9.499730602300213e-06,
|
|
"loss": 1.095,
|
|
"step": 3484
|
|
},
|
|
{
|
|
"epoch": 0.9325662295959326,
|
|
"grad_norm": 3.178584337234497,
|
|
"learning_rate": 9.499344690153226e-06,
|
|
"loss": 0.9671,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 0.9328338239229328,
|
|
"grad_norm": 3.1470065116882324,
|
|
"learning_rate": 9.498958637060385e-06,
|
|
"loss": 1.1067,
|
|
"step": 3486
|
|
},
|
|
{
|
|
"epoch": 0.9331014182499331,
|
|
"grad_norm": 3.4016363620758057,
|
|
"learning_rate": 9.498572443033789e-06,
|
|
"loss": 1.1828,
|
|
"step": 3487
|
|
},
|
|
{
|
|
"epoch": 0.9333690125769334,
|
|
"grad_norm": 3.14091157913208,
|
|
"learning_rate": 9.498186108085534e-06,
|
|
"loss": 1.0975,
|
|
"step": 3488
|
|
},
|
|
{
|
|
"epoch": 0.9336366069039337,
|
|
"grad_norm": 3.34956431388855,
|
|
"learning_rate": 9.497799632227721e-06,
|
|
"loss": 1.2063,
|
|
"step": 3489
|
|
},
|
|
{
|
|
"epoch": 0.9339042012309339,
|
|
"grad_norm": 3.507667064666748,
|
|
"learning_rate": 9.497413015472458e-06,
|
|
"loss": 1.1349,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.9341717955579342,
|
|
"grad_norm": 3.209160566329956,
|
|
"learning_rate": 9.497026257831856e-06,
|
|
"loss": 1.0282,
|
|
"step": 3491
|
|
},
|
|
{
|
|
"epoch": 0.9344393898849345,
|
|
"grad_norm": 3.31990647315979,
|
|
"learning_rate": 9.49663935931803e-06,
|
|
"loss": 1.1131,
|
|
"step": 3492
|
|
},
|
|
{
|
|
"epoch": 0.9347069842119347,
|
|
"grad_norm": 3.19960618019104,
|
|
"learning_rate": 9.4962523199431e-06,
|
|
"loss": 1.1472,
|
|
"step": 3493
|
|
},
|
|
{
|
|
"epoch": 0.934974578538935,
|
|
"grad_norm": 3.4468166828155518,
|
|
"learning_rate": 9.495865139719192e-06,
|
|
"loss": 1.1146,
|
|
"step": 3494
|
|
},
|
|
{
|
|
"epoch": 0.9352421728659353,
|
|
"grad_norm": 3.4722113609313965,
|
|
"learning_rate": 9.495477818658432e-06,
|
|
"loss": 1.0553,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 0.9355097671929355,
|
|
"grad_norm": 3.26762318611145,
|
|
"learning_rate": 9.495090356772955e-06,
|
|
"loss": 1.0282,
|
|
"step": 3496
|
|
},
|
|
{
|
|
"epoch": 0.9357773615199357,
|
|
"grad_norm": 3.4020135402679443,
|
|
"learning_rate": 9.494702754074898e-06,
|
|
"loss": 1.1634,
|
|
"step": 3497
|
|
},
|
|
{
|
|
"epoch": 0.936044955846936,
|
|
"grad_norm": 3.114060401916504,
|
|
"learning_rate": 9.494315010576405e-06,
|
|
"loss": 1.0458,
|
|
"step": 3498
|
|
},
|
|
{
|
|
"epoch": 0.9363125501739363,
|
|
"grad_norm": 2.988351345062256,
|
|
"learning_rate": 9.493927126289619e-06,
|
|
"loss": 1.0748,
|
|
"step": 3499
|
|
},
|
|
{
|
|
"epoch": 0.9365801445009366,
|
|
"grad_norm": 3.3398990631103516,
|
|
"learning_rate": 9.493539101226692e-06,
|
|
"loss": 1.0633,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.9365801445009366,
|
|
"eval_loss": 1.1470533609390259,
|
|
"eval_runtime": 11.4325,
|
|
"eval_samples_per_second": 34.988,
|
|
"eval_steps_per_second": 4.373,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.9368477388279368,
|
|
"grad_norm": 3.560861587524414,
|
|
"learning_rate": 9.493150935399779e-06,
|
|
"loss": 0.9958,
|
|
"step": 3501
|
|
},
|
|
{
|
|
"epoch": 0.9371153331549371,
|
|
"grad_norm": 3.2529478073120117,
|
|
"learning_rate": 9.49276262882104e-06,
|
|
"loss": 1.1023,
|
|
"step": 3502
|
|
},
|
|
{
|
|
"epoch": 0.9373829274819374,
|
|
"grad_norm": 3.466764211654663,
|
|
"learning_rate": 9.49237418150264e-06,
|
|
"loss": 1.1395,
|
|
"step": 3503
|
|
},
|
|
{
|
|
"epoch": 0.9376505218089376,
|
|
"grad_norm": 3.3550920486450195,
|
|
"learning_rate": 9.491985593456747e-06,
|
|
"loss": 1.111,
|
|
"step": 3504
|
|
},
|
|
{
|
|
"epoch": 0.9379181161359379,
|
|
"grad_norm": 3.469780683517456,
|
|
"learning_rate": 9.491596864695534e-06,
|
|
"loss": 1.1792,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 0.9381857104629382,
|
|
"grad_norm": 3.7144579887390137,
|
|
"learning_rate": 9.491207995231176e-06,
|
|
"loss": 1.2884,
|
|
"step": 3506
|
|
},
|
|
{
|
|
"epoch": 0.9384533047899385,
|
|
"grad_norm": 2.8839621543884277,
|
|
"learning_rate": 9.490818985075856e-06,
|
|
"loss": 1.0105,
|
|
"step": 3507
|
|
},
|
|
{
|
|
"epoch": 0.9387208991169387,
|
|
"grad_norm": 3.0779335498809814,
|
|
"learning_rate": 9.490429834241763e-06,
|
|
"loss": 1.052,
|
|
"step": 3508
|
|
},
|
|
{
|
|
"epoch": 0.938988493443939,
|
|
"grad_norm": 3.260777711868286,
|
|
"learning_rate": 9.490040542741084e-06,
|
|
"loss": 1.0654,
|
|
"step": 3509
|
|
},
|
|
{
|
|
"epoch": 0.9392560877709393,
|
|
"grad_norm": 3.4385604858398438,
|
|
"learning_rate": 9.489651110586014e-06,
|
|
"loss": 1.2247,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.9395236820979396,
|
|
"grad_norm": 3.460442066192627,
|
|
"learning_rate": 9.489261537788754e-06,
|
|
"loss": 1.0923,
|
|
"step": 3511
|
|
},
|
|
{
|
|
"epoch": 0.9397912764249398,
|
|
"grad_norm": 3.365694284439087,
|
|
"learning_rate": 9.488871824361508e-06,
|
|
"loss": 1.0738,
|
|
"step": 3512
|
|
},
|
|
{
|
|
"epoch": 0.9400588707519401,
|
|
"grad_norm": 3.1729164123535156,
|
|
"learning_rate": 9.48848197031648e-06,
|
|
"loss": 1.1758,
|
|
"step": 3513
|
|
},
|
|
{
|
|
"epoch": 0.9403264650789404,
|
|
"grad_norm": 3.1198678016662598,
|
|
"learning_rate": 9.488091975665887e-06,
|
|
"loss": 1.0894,
|
|
"step": 3514
|
|
},
|
|
{
|
|
"epoch": 0.9405940594059405,
|
|
"grad_norm": 3.1159262657165527,
|
|
"learning_rate": 9.487701840421945e-06,
|
|
"loss": 1.1764,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 0.9408616537329408,
|
|
"grad_norm": 3.0507001876831055,
|
|
"learning_rate": 9.487311564596875e-06,
|
|
"loss": 1.0162,
|
|
"step": 3516
|
|
},
|
|
{
|
|
"epoch": 0.9411292480599411,
|
|
"grad_norm": 3.2293381690979004,
|
|
"learning_rate": 9.486921148202901e-06,
|
|
"loss": 1.0442,
|
|
"step": 3517
|
|
},
|
|
{
|
|
"epoch": 0.9413968423869414,
|
|
"grad_norm": 3.246738910675049,
|
|
"learning_rate": 9.486530591252257e-06,
|
|
"loss": 1.1221,
|
|
"step": 3518
|
|
},
|
|
{
|
|
"epoch": 0.9416644367139416,
|
|
"grad_norm": 3.022080659866333,
|
|
"learning_rate": 9.486139893757172e-06,
|
|
"loss": 0.9833,
|
|
"step": 3519
|
|
},
|
|
{
|
|
"epoch": 0.9419320310409419,
|
|
"grad_norm": 3.232952356338501,
|
|
"learning_rate": 9.485749055729891e-06,
|
|
"loss": 1.058,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.9421996253679422,
|
|
"grad_norm": 3.4002277851104736,
|
|
"learning_rate": 9.485358077182653e-06,
|
|
"loss": 1.1898,
|
|
"step": 3521
|
|
},
|
|
{
|
|
"epoch": 0.9424672196949425,
|
|
"grad_norm": 3.4779772758483887,
|
|
"learning_rate": 9.484966958127707e-06,
|
|
"loss": 1.0439,
|
|
"step": 3522
|
|
},
|
|
{
|
|
"epoch": 0.9427348140219427,
|
|
"grad_norm": 3.6369545459747314,
|
|
"learning_rate": 9.484575698577305e-06,
|
|
"loss": 1.1877,
|
|
"step": 3523
|
|
},
|
|
{
|
|
"epoch": 0.943002408348943,
|
|
"grad_norm": 3.197024345397949,
|
|
"learning_rate": 9.484184298543706e-06,
|
|
"loss": 1.0569,
|
|
"step": 3524
|
|
},
|
|
{
|
|
"epoch": 0.9432700026759433,
|
|
"grad_norm": 3.642747640609741,
|
|
"learning_rate": 9.483792758039165e-06,
|
|
"loss": 1.2194,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 0.9435375970029435,
|
|
"grad_norm": 3.4038000106811523,
|
|
"learning_rate": 9.483401077075954e-06,
|
|
"loss": 1.1591,
|
|
"step": 3526
|
|
},
|
|
{
|
|
"epoch": 0.9438051913299438,
|
|
"grad_norm": 2.978867292404175,
|
|
"learning_rate": 9.48300925566634e-06,
|
|
"loss": 1.0912,
|
|
"step": 3527
|
|
},
|
|
{
|
|
"epoch": 0.9440727856569441,
|
|
"grad_norm": 3.416811943054199,
|
|
"learning_rate": 9.482617293822596e-06,
|
|
"loss": 1.124,
|
|
"step": 3528
|
|
},
|
|
{
|
|
"epoch": 0.9443403799839444,
|
|
"grad_norm": 3.1118931770324707,
|
|
"learning_rate": 9.482225191557001e-06,
|
|
"loss": 1.0341,
|
|
"step": 3529
|
|
},
|
|
{
|
|
"epoch": 0.9446079743109446,
|
|
"grad_norm": 3.495542049407959,
|
|
"learning_rate": 9.48183294888184e-06,
|
|
"loss": 1.3161,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.9448755686379449,
|
|
"grad_norm": 3.3605546951293945,
|
|
"learning_rate": 9.481440565809398e-06,
|
|
"loss": 1.0635,
|
|
"step": 3531
|
|
},
|
|
{
|
|
"epoch": 0.9451431629649452,
|
|
"grad_norm": 3.383915901184082,
|
|
"learning_rate": 9.481048042351967e-06,
|
|
"loss": 1.0157,
|
|
"step": 3532
|
|
},
|
|
{
|
|
"epoch": 0.9454107572919455,
|
|
"grad_norm": 2.984562873840332,
|
|
"learning_rate": 9.480655378521845e-06,
|
|
"loss": 1.0117,
|
|
"step": 3533
|
|
},
|
|
{
|
|
"epoch": 0.9456783516189456,
|
|
"grad_norm": 3.205364942550659,
|
|
"learning_rate": 9.480262574331331e-06,
|
|
"loss": 1.0787,
|
|
"step": 3534
|
|
},
|
|
{
|
|
"epoch": 0.9459459459459459,
|
|
"grad_norm": 3.5284080505371094,
|
|
"learning_rate": 9.479869629792729e-06,
|
|
"loss": 1.2231,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 0.9462135402729462,
|
|
"grad_norm": 3.833712577819824,
|
|
"learning_rate": 9.47947654491835e-06,
|
|
"loss": 1.1585,
|
|
"step": 3536
|
|
},
|
|
{
|
|
"epoch": 0.9464811345999464,
|
|
"grad_norm": 3.6991257667541504,
|
|
"learning_rate": 9.479083319720508e-06,
|
|
"loss": 1.1624,
|
|
"step": 3537
|
|
},
|
|
{
|
|
"epoch": 0.9467487289269467,
|
|
"grad_norm": 3.437206983566284,
|
|
"learning_rate": 9.47868995421152e-06,
|
|
"loss": 1.1759,
|
|
"step": 3538
|
|
},
|
|
{
|
|
"epoch": 0.947016323253947,
|
|
"grad_norm": 4.02100944519043,
|
|
"learning_rate": 9.478296448403707e-06,
|
|
"loss": 1.3648,
|
|
"step": 3539
|
|
},
|
|
{
|
|
"epoch": 0.9472839175809473,
|
|
"grad_norm": 3.552727699279785,
|
|
"learning_rate": 9.4779028023094e-06,
|
|
"loss": 1.1806,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.9475515119079475,
|
|
"grad_norm": 3.3923287391662598,
|
|
"learning_rate": 9.477509015940928e-06,
|
|
"loss": 1.1629,
|
|
"step": 3541
|
|
},
|
|
{
|
|
"epoch": 0.9478191062349478,
|
|
"grad_norm": 3.3122611045837402,
|
|
"learning_rate": 9.477115089310626e-06,
|
|
"loss": 1.0938,
|
|
"step": 3542
|
|
},
|
|
{
|
|
"epoch": 0.9480867005619481,
|
|
"grad_norm": 3.2239348888397217,
|
|
"learning_rate": 9.476721022430834e-06,
|
|
"loss": 1.2055,
|
|
"step": 3543
|
|
},
|
|
{
|
|
"epoch": 0.9483542948889484,
|
|
"grad_norm": 3.426645278930664,
|
|
"learning_rate": 9.4763268153139e-06,
|
|
"loss": 1.1852,
|
|
"step": 3544
|
|
},
|
|
{
|
|
"epoch": 0.9486218892159486,
|
|
"grad_norm": 3.3436081409454346,
|
|
"learning_rate": 9.475932467972169e-06,
|
|
"loss": 1.0941,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 0.9488894835429489,
|
|
"grad_norm": 3.2754054069519043,
|
|
"learning_rate": 9.475537980417994e-06,
|
|
"loss": 1.0063,
|
|
"step": 3546
|
|
},
|
|
{
|
|
"epoch": 0.9491570778699492,
|
|
"grad_norm": 3.0442628860473633,
|
|
"learning_rate": 9.475143352663736e-06,
|
|
"loss": 1.0222,
|
|
"step": 3547
|
|
},
|
|
{
|
|
"epoch": 0.9494246721969494,
|
|
"grad_norm": 3.662236213684082,
|
|
"learning_rate": 9.474748584721755e-06,
|
|
"loss": 1.2637,
|
|
"step": 3548
|
|
},
|
|
{
|
|
"epoch": 0.9496922665239497,
|
|
"grad_norm": 3.3807575702667236,
|
|
"learning_rate": 9.474353676604416e-06,
|
|
"loss": 1.1554,
|
|
"step": 3549
|
|
},
|
|
{
|
|
"epoch": 0.94995986085095,
|
|
"grad_norm": 3.3155808448791504,
|
|
"learning_rate": 9.473958628324093e-06,
|
|
"loss": 1.1654,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.9502274551779503,
|
|
"grad_norm": 3.54040789604187,
|
|
"learning_rate": 9.47356343989316e-06,
|
|
"loss": 1.3299,
|
|
"step": 3551
|
|
},
|
|
{
|
|
"epoch": 0.9504950495049505,
|
|
"grad_norm": 3.640690565109253,
|
|
"learning_rate": 9.473168111323995e-06,
|
|
"loss": 1.2952,
|
|
"step": 3552
|
|
},
|
|
{
|
|
"epoch": 0.9507626438319507,
|
|
"grad_norm": 3.141310214996338,
|
|
"learning_rate": 9.472772642628984e-06,
|
|
"loss": 1.0503,
|
|
"step": 3553
|
|
},
|
|
{
|
|
"epoch": 0.951030238158951,
|
|
"grad_norm": 3.160971164703369,
|
|
"learning_rate": 9.472377033820514e-06,
|
|
"loss": 1.065,
|
|
"step": 3554
|
|
},
|
|
{
|
|
"epoch": 0.9512978324859513,
|
|
"grad_norm": 3.1601452827453613,
|
|
"learning_rate": 9.47198128491098e-06,
|
|
"loss": 1.1415,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 0.9515654268129515,
|
|
"grad_norm": 3.017057180404663,
|
|
"learning_rate": 9.471585395912776e-06,
|
|
"loss": 1.1711,
|
|
"step": 3556
|
|
},
|
|
{
|
|
"epoch": 0.9518330211399518,
|
|
"grad_norm": 3.127492666244507,
|
|
"learning_rate": 9.471189366838307e-06,
|
|
"loss": 1.0834,
|
|
"step": 3557
|
|
},
|
|
{
|
|
"epoch": 0.9521006154669521,
|
|
"grad_norm": 3.3001465797424316,
|
|
"learning_rate": 9.470793197699977e-06,
|
|
"loss": 1.1977,
|
|
"step": 3558
|
|
},
|
|
{
|
|
"epoch": 0.9523682097939523,
|
|
"grad_norm": 3.1050307750701904,
|
|
"learning_rate": 9.470396888510196e-06,
|
|
"loss": 1.0702,
|
|
"step": 3559
|
|
},
|
|
{
|
|
"epoch": 0.9526358041209526,
|
|
"grad_norm": 3.070390224456787,
|
|
"learning_rate": 9.470000439281379e-06,
|
|
"loss": 0.9667,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.9529033984479529,
|
|
"grad_norm": 3.1426901817321777,
|
|
"learning_rate": 9.469603850025946e-06,
|
|
"loss": 0.9483,
|
|
"step": 3561
|
|
},
|
|
{
|
|
"epoch": 0.9531709927749532,
|
|
"grad_norm": 3.142430543899536,
|
|
"learning_rate": 9.46920712075632e-06,
|
|
"loss": 0.9968,
|
|
"step": 3562
|
|
},
|
|
{
|
|
"epoch": 0.9534385871019534,
|
|
"grad_norm": 3.0528955459594727,
|
|
"learning_rate": 9.468810251484928e-06,
|
|
"loss": 1.0463,
|
|
"step": 3563
|
|
},
|
|
{
|
|
"epoch": 0.9537061814289537,
|
|
"grad_norm": 3.309941530227661,
|
|
"learning_rate": 9.468413242224204e-06,
|
|
"loss": 1.2411,
|
|
"step": 3564
|
|
},
|
|
{
|
|
"epoch": 0.953973775755954,
|
|
"grad_norm": 3.3797719478607178,
|
|
"learning_rate": 9.468016092986583e-06,
|
|
"loss": 1.1239,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 0.9542413700829543,
|
|
"grad_norm": 3.4834790229797363,
|
|
"learning_rate": 9.467618803784507e-06,
|
|
"loss": 1.1477,
|
|
"step": 3566
|
|
},
|
|
{
|
|
"epoch": 0.9545089644099545,
|
|
"grad_norm": 3.8445494174957275,
|
|
"learning_rate": 9.467221374630422e-06,
|
|
"loss": 1.2043,
|
|
"step": 3567
|
|
},
|
|
{
|
|
"epoch": 0.9547765587369548,
|
|
"grad_norm": 3.565916061401367,
|
|
"learning_rate": 9.466823805536776e-06,
|
|
"loss": 1.0226,
|
|
"step": 3568
|
|
},
|
|
{
|
|
"epoch": 0.9550441530639551,
|
|
"grad_norm": 3.3251450061798096,
|
|
"learning_rate": 9.466426096516024e-06,
|
|
"loss": 1.1116,
|
|
"step": 3569
|
|
},
|
|
{
|
|
"epoch": 0.9553117473909553,
|
|
"grad_norm": 3.7028703689575195,
|
|
"learning_rate": 9.466028247580624e-06,
|
|
"loss": 1.1634,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.9555793417179556,
|
|
"grad_norm": 3.316803455352783,
|
|
"learning_rate": 9.465630258743041e-06,
|
|
"loss": 1.1836,
|
|
"step": 3571
|
|
},
|
|
{
|
|
"epoch": 0.9558469360449559,
|
|
"grad_norm": 3.284135341644287,
|
|
"learning_rate": 9.46523213001574e-06,
|
|
"loss": 1.077,
|
|
"step": 3572
|
|
},
|
|
{
|
|
"epoch": 0.9561145303719562,
|
|
"grad_norm": 3.4031052589416504,
|
|
"learning_rate": 9.464833861411196e-06,
|
|
"loss": 1.0654,
|
|
"step": 3573
|
|
},
|
|
{
|
|
"epoch": 0.9563821246989563,
|
|
"grad_norm": 3.1017119884490967,
|
|
"learning_rate": 9.464435452941881e-06,
|
|
"loss": 1.055,
|
|
"step": 3574
|
|
},
|
|
{
|
|
"epoch": 0.9566497190259566,
|
|
"grad_norm": 3.4276275634765625,
|
|
"learning_rate": 9.464036904620278e-06,
|
|
"loss": 1.1501,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 0.9569173133529569,
|
|
"grad_norm": 3.362846851348877,
|
|
"learning_rate": 9.46363821645887e-06,
|
|
"loss": 1.1495,
|
|
"step": 3576
|
|
},
|
|
{
|
|
"epoch": 0.9571849076799572,
|
|
"grad_norm": 3.4557695388793945,
|
|
"learning_rate": 9.46323938847015e-06,
|
|
"loss": 1.0333,
|
|
"step": 3577
|
|
},
|
|
{
|
|
"epoch": 0.9574525020069574,
|
|
"grad_norm": 3.3175323009490967,
|
|
"learning_rate": 9.462840420666607e-06,
|
|
"loss": 1.0388,
|
|
"step": 3578
|
|
},
|
|
{
|
|
"epoch": 0.9577200963339577,
|
|
"grad_norm": 3.987826347351074,
|
|
"learning_rate": 9.462441313060741e-06,
|
|
"loss": 1.3387,
|
|
"step": 3579
|
|
},
|
|
{
|
|
"epoch": 0.957987690660958,
|
|
"grad_norm": 3.354966163635254,
|
|
"learning_rate": 9.462042065665057e-06,
|
|
"loss": 1.0009,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.9582552849879582,
|
|
"grad_norm": 3.2124292850494385,
|
|
"learning_rate": 9.461642678492059e-06,
|
|
"loss": 1.0912,
|
|
"step": 3581
|
|
},
|
|
{
|
|
"epoch": 0.9585228793149585,
|
|
"grad_norm": 3.4074666500091553,
|
|
"learning_rate": 9.461243151554257e-06,
|
|
"loss": 1.1189,
|
|
"step": 3582
|
|
},
|
|
{
|
|
"epoch": 0.9587904736419588,
|
|
"grad_norm": 3.342137336730957,
|
|
"learning_rate": 9.460843484864168e-06,
|
|
"loss": 1.1774,
|
|
"step": 3583
|
|
},
|
|
{
|
|
"epoch": 0.9590580679689591,
|
|
"grad_norm": 3.108454704284668,
|
|
"learning_rate": 9.460443678434313e-06,
|
|
"loss": 0.9848,
|
|
"step": 3584
|
|
},
|
|
{
|
|
"epoch": 0.9593256622959593,
|
|
"grad_norm": 2.764338731765747,
|
|
"learning_rate": 9.460043732277213e-06,
|
|
"loss": 0.9759,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 0.9595932566229596,
|
|
"grad_norm": 3.277522087097168,
|
|
"learning_rate": 9.4596436464054e-06,
|
|
"loss": 1.0655,
|
|
"step": 3586
|
|
},
|
|
{
|
|
"epoch": 0.9598608509499599,
|
|
"grad_norm": 3.2596328258514404,
|
|
"learning_rate": 9.459243420831406e-06,
|
|
"loss": 1.0931,
|
|
"step": 3587
|
|
},
|
|
{
|
|
"epoch": 0.9601284452769602,
|
|
"grad_norm": 3.3938968181610107,
|
|
"learning_rate": 9.45884305556777e-06,
|
|
"loss": 1.1095,
|
|
"step": 3588
|
|
},
|
|
{
|
|
"epoch": 0.9603960396039604,
|
|
"grad_norm": 3.694939374923706,
|
|
"learning_rate": 9.45844255062703e-06,
|
|
"loss": 1.274,
|
|
"step": 3589
|
|
},
|
|
{
|
|
"epoch": 0.9606636339309607,
|
|
"grad_norm": 3.2319419384002686,
|
|
"learning_rate": 9.458041906021733e-06,
|
|
"loss": 1.1096,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.960931228257961,
|
|
"grad_norm": 3.1638870239257812,
|
|
"learning_rate": 9.457641121764433e-06,
|
|
"loss": 1.0875,
|
|
"step": 3591
|
|
},
|
|
{
|
|
"epoch": 0.9611988225849611,
|
|
"grad_norm": 3.1526119709014893,
|
|
"learning_rate": 9.457240197867682e-06,
|
|
"loss": 0.9327,
|
|
"step": 3592
|
|
},
|
|
{
|
|
"epoch": 0.9614664169119614,
|
|
"grad_norm": 3.1602957248687744,
|
|
"learning_rate": 9.45683913434404e-06,
|
|
"loss": 1.0397,
|
|
"step": 3593
|
|
},
|
|
{
|
|
"epoch": 0.9617340112389617,
|
|
"grad_norm": 3.8300065994262695,
|
|
"learning_rate": 9.45643793120607e-06,
|
|
"loss": 1.2436,
|
|
"step": 3594
|
|
},
|
|
{
|
|
"epoch": 0.962001605565962,
|
|
"grad_norm": 3.174600839614868,
|
|
"learning_rate": 9.456036588466342e-06,
|
|
"loss": 1.1446,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 0.9622691998929622,
|
|
"grad_norm": 3.3465054035186768,
|
|
"learning_rate": 9.455635106137427e-06,
|
|
"loss": 1.1416,
|
|
"step": 3596
|
|
},
|
|
{
|
|
"epoch": 0.9625367942199625,
|
|
"grad_norm": 2.8999526500701904,
|
|
"learning_rate": 9.455233484231901e-06,
|
|
"loss": 0.9453,
|
|
"step": 3597
|
|
},
|
|
{
|
|
"epoch": 0.9628043885469628,
|
|
"grad_norm": 3.197765350341797,
|
|
"learning_rate": 9.454831722762346e-06,
|
|
"loss": 1.0374,
|
|
"step": 3598
|
|
},
|
|
{
|
|
"epoch": 0.9630719828739631,
|
|
"grad_norm": 3.180861711502075,
|
|
"learning_rate": 9.454429821741346e-06,
|
|
"loss": 1.051,
|
|
"step": 3599
|
|
},
|
|
{
|
|
"epoch": 0.9633395772009633,
|
|
"grad_norm": 3.3026978969573975,
|
|
"learning_rate": 9.454027781181496e-06,
|
|
"loss": 1.0753,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.9636071715279636,
|
|
"grad_norm": 3.2190282344818115,
|
|
"learning_rate": 9.453625601095385e-06,
|
|
"loss": 1.0299,
|
|
"step": 3601
|
|
},
|
|
{
|
|
"epoch": 0.9638747658549639,
|
|
"grad_norm": 3.302915334701538,
|
|
"learning_rate": 9.453223281495612e-06,
|
|
"loss": 1.1025,
|
|
"step": 3602
|
|
},
|
|
{
|
|
"epoch": 0.9641423601819642,
|
|
"grad_norm": 3.600322961807251,
|
|
"learning_rate": 9.452820822394783e-06,
|
|
"loss": 1.1095,
|
|
"step": 3603
|
|
},
|
|
{
|
|
"epoch": 0.9644099545089644,
|
|
"grad_norm": 3.5180773735046387,
|
|
"learning_rate": 9.452418223805502e-06,
|
|
"loss": 1.1045,
|
|
"step": 3604
|
|
},
|
|
{
|
|
"epoch": 0.9646775488359647,
|
|
"grad_norm": 3.113248825073242,
|
|
"learning_rate": 9.452015485740384e-06,
|
|
"loss": 0.9298,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 0.964945143162965,
|
|
"grad_norm": 3.2621712684631348,
|
|
"learning_rate": 9.451612608212044e-06,
|
|
"loss": 1.0531,
|
|
"step": 3606
|
|
},
|
|
{
|
|
"epoch": 0.9652127374899652,
|
|
"grad_norm": 2.929264783859253,
|
|
"learning_rate": 9.4512095912331e-06,
|
|
"loss": 1.0108,
|
|
"step": 3607
|
|
},
|
|
{
|
|
"epoch": 0.9654803318169655,
|
|
"grad_norm": 3.159482955932617,
|
|
"learning_rate": 9.45080643481618e-06,
|
|
"loss": 1.0752,
|
|
"step": 3608
|
|
},
|
|
{
|
|
"epoch": 0.9657479261439658,
|
|
"grad_norm": 3.332207202911377,
|
|
"learning_rate": 9.450403138973914e-06,
|
|
"loss": 1.1162,
|
|
"step": 3609
|
|
},
|
|
{
|
|
"epoch": 0.9660155204709661,
|
|
"grad_norm": 3.570805072784424,
|
|
"learning_rate": 9.44999970371893e-06,
|
|
"loss": 1.2211,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.9662831147979662,
|
|
"grad_norm": 2.926478624343872,
|
|
"learning_rate": 9.449596129063873e-06,
|
|
"loss": 0.9727,
|
|
"step": 3611
|
|
},
|
|
{
|
|
"epoch": 0.9665507091249665,
|
|
"grad_norm": 3.6152682304382324,
|
|
"learning_rate": 9.44919241502138e-06,
|
|
"loss": 1.0963,
|
|
"step": 3612
|
|
},
|
|
{
|
|
"epoch": 0.9668183034519668,
|
|
"grad_norm": 2.954878807067871,
|
|
"learning_rate": 9.4487885616041e-06,
|
|
"loss": 1.0543,
|
|
"step": 3613
|
|
},
|
|
{
|
|
"epoch": 0.9670858977789671,
|
|
"grad_norm": 3.6669461727142334,
|
|
"learning_rate": 9.448384568824685e-06,
|
|
"loss": 1.2643,
|
|
"step": 3614
|
|
},
|
|
{
|
|
"epoch": 0.9673534921059673,
|
|
"grad_norm": 3.3067846298217773,
|
|
"learning_rate": 9.447980436695787e-06,
|
|
"loss": 1.2128,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 0.9676210864329676,
|
|
"grad_norm": 3.863605499267578,
|
|
"learning_rate": 9.44757616523007e-06,
|
|
"loss": 1.2189,
|
|
"step": 3616
|
|
},
|
|
{
|
|
"epoch": 0.9678886807599679,
|
|
"grad_norm": 3.104247570037842,
|
|
"learning_rate": 9.447171754440195e-06,
|
|
"loss": 1.0933,
|
|
"step": 3617
|
|
},
|
|
{
|
|
"epoch": 0.9681562750869681,
|
|
"grad_norm": 4.070940017700195,
|
|
"learning_rate": 9.446767204338832e-06,
|
|
"loss": 1.2085,
|
|
"step": 3618
|
|
},
|
|
{
|
|
"epoch": 0.9684238694139684,
|
|
"grad_norm": 3.5472705364227295,
|
|
"learning_rate": 9.446362514938653e-06,
|
|
"loss": 1.1472,
|
|
"step": 3619
|
|
},
|
|
{
|
|
"epoch": 0.9686914637409687,
|
|
"grad_norm": 3.455854654312134,
|
|
"learning_rate": 9.445957686252336e-06,
|
|
"loss": 1.2072,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.968959058067969,
|
|
"grad_norm": 3.4434685707092285,
|
|
"learning_rate": 9.445552718292564e-06,
|
|
"loss": 1.0107,
|
|
"step": 3621
|
|
},
|
|
{
|
|
"epoch": 0.9692266523949692,
|
|
"grad_norm": 3.8100736141204834,
|
|
"learning_rate": 9.445147611072019e-06,
|
|
"loss": 1.4714,
|
|
"step": 3622
|
|
},
|
|
{
|
|
"epoch": 0.9694942467219695,
|
|
"grad_norm": 3.2713818550109863,
|
|
"learning_rate": 9.444742364603394e-06,
|
|
"loss": 1.1,
|
|
"step": 3623
|
|
},
|
|
{
|
|
"epoch": 0.9697618410489698,
|
|
"grad_norm": 3.6512348651885986,
|
|
"learning_rate": 9.444336978899384e-06,
|
|
"loss": 1.1204,
|
|
"step": 3624
|
|
},
|
|
{
|
|
"epoch": 0.9700294353759701,
|
|
"grad_norm": 3.12085223197937,
|
|
"learning_rate": 9.443931453972688e-06,
|
|
"loss": 1.1352,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 0.9702970297029703,
|
|
"grad_norm": 3.4997334480285645,
|
|
"learning_rate": 9.443525789836008e-06,
|
|
"loss": 1.184,
|
|
"step": 3626
|
|
},
|
|
{
|
|
"epoch": 0.9705646240299706,
|
|
"grad_norm": 3.1696505546569824,
|
|
"learning_rate": 9.443119986502053e-06,
|
|
"loss": 1.0827,
|
|
"step": 3627
|
|
},
|
|
{
|
|
"epoch": 0.9708322183569709,
|
|
"grad_norm": 2.911027431488037,
|
|
"learning_rate": 9.442714043983534e-06,
|
|
"loss": 0.9887,
|
|
"step": 3628
|
|
},
|
|
{
|
|
"epoch": 0.971099812683971,
|
|
"grad_norm": 3.3565635681152344,
|
|
"learning_rate": 9.44230796229317e-06,
|
|
"loss": 1.0637,
|
|
"step": 3629
|
|
},
|
|
{
|
|
"epoch": 0.9713674070109714,
|
|
"grad_norm": 3.567798614501953,
|
|
"learning_rate": 9.441901741443678e-06,
|
|
"loss": 1.0772,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.9716350013379716,
|
|
"grad_norm": 3.278237819671631,
|
|
"learning_rate": 9.441495381447787e-06,
|
|
"loss": 1.1284,
|
|
"step": 3631
|
|
},
|
|
{
|
|
"epoch": 0.971902595664972,
|
|
"grad_norm": 3.231175661087036,
|
|
"learning_rate": 9.441088882318223e-06,
|
|
"loss": 1.1555,
|
|
"step": 3632
|
|
},
|
|
{
|
|
"epoch": 0.9721701899919721,
|
|
"grad_norm": 3.619246006011963,
|
|
"learning_rate": 9.440682244067724e-06,
|
|
"loss": 1.2106,
|
|
"step": 3633
|
|
},
|
|
{
|
|
"epoch": 0.9724377843189724,
|
|
"grad_norm": 3.553589344024658,
|
|
"learning_rate": 9.440275466709025e-06,
|
|
"loss": 1.1818,
|
|
"step": 3634
|
|
},
|
|
{
|
|
"epoch": 0.9727053786459727,
|
|
"grad_norm": 3.7136471271514893,
|
|
"learning_rate": 9.43986855025487e-06,
|
|
"loss": 1.2618,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 0.972972972972973,
|
|
"grad_norm": 3.361229658126831,
|
|
"learning_rate": 9.439461494718006e-06,
|
|
"loss": 1.1693,
|
|
"step": 3636
|
|
},
|
|
{
|
|
"epoch": 0.9732405672999732,
|
|
"grad_norm": 3.6238696575164795,
|
|
"learning_rate": 9.439054300111183e-06,
|
|
"loss": 1.2334,
|
|
"step": 3637
|
|
},
|
|
{
|
|
"epoch": 0.9735081616269735,
|
|
"grad_norm": 3.4375593662261963,
|
|
"learning_rate": 9.438646966447158e-06,
|
|
"loss": 1.1148,
|
|
"step": 3638
|
|
},
|
|
{
|
|
"epoch": 0.9737757559539738,
|
|
"grad_norm": 3.1718556880950928,
|
|
"learning_rate": 9.438239493738692e-06,
|
|
"loss": 1.13,
|
|
"step": 3639
|
|
},
|
|
{
|
|
"epoch": 0.974043350280974,
|
|
"grad_norm": 3.3983285427093506,
|
|
"learning_rate": 9.437831881998548e-06,
|
|
"loss": 1.2176,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.9743109446079743,
|
|
"grad_norm": 3.254225015640259,
|
|
"learning_rate": 9.437424131239496e-06,
|
|
"loss": 1.2788,
|
|
"step": 3641
|
|
},
|
|
{
|
|
"epoch": 0.9745785389349746,
|
|
"grad_norm": 3.181648015975952,
|
|
"learning_rate": 9.437016241474307e-06,
|
|
"loss": 1.0597,
|
|
"step": 3642
|
|
},
|
|
{
|
|
"epoch": 0.9748461332619749,
|
|
"grad_norm": 3.5744338035583496,
|
|
"learning_rate": 9.43660821271576e-06,
|
|
"loss": 1.1843,
|
|
"step": 3643
|
|
},
|
|
{
|
|
"epoch": 0.9751137275889751,
|
|
"grad_norm": 3.0701401233673096,
|
|
"learning_rate": 9.436200044976638e-06,
|
|
"loss": 0.9703,
|
|
"step": 3644
|
|
},
|
|
{
|
|
"epoch": 0.9753813219159754,
|
|
"grad_norm": 3.134881019592285,
|
|
"learning_rate": 9.435791738269725e-06,
|
|
"loss": 1.1275,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 0.9756489162429757,
|
|
"grad_norm": 3.301896810531616,
|
|
"learning_rate": 9.43538329260781e-06,
|
|
"loss": 1.0886,
|
|
"step": 3646
|
|
},
|
|
{
|
|
"epoch": 0.975916510569976,
|
|
"grad_norm": 3.211254358291626,
|
|
"learning_rate": 9.434974708003694e-06,
|
|
"loss": 1.018,
|
|
"step": 3647
|
|
},
|
|
{
|
|
"epoch": 0.9761841048969762,
|
|
"grad_norm": 3.36275577545166,
|
|
"learning_rate": 9.434565984470172e-06,
|
|
"loss": 1.1677,
|
|
"step": 3648
|
|
},
|
|
{
|
|
"epoch": 0.9764516992239765,
|
|
"grad_norm": 3.162914276123047,
|
|
"learning_rate": 9.434157122020047e-06,
|
|
"loss": 1.1324,
|
|
"step": 3649
|
|
},
|
|
{
|
|
"epoch": 0.9767192935509768,
|
|
"grad_norm": 3.0449602603912354,
|
|
"learning_rate": 9.433748120666129e-06,
|
|
"loss": 1.0288,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.9769868878779769,
|
|
"grad_norm": 3.1934330463409424,
|
|
"learning_rate": 9.43333898042123e-06,
|
|
"loss": 1.1246,
|
|
"step": 3651
|
|
},
|
|
{
|
|
"epoch": 0.9772544822049772,
|
|
"grad_norm": 3.4677317142486572,
|
|
"learning_rate": 9.432929701298166e-06,
|
|
"loss": 1.0707,
|
|
"step": 3652
|
|
},
|
|
{
|
|
"epoch": 0.9775220765319775,
|
|
"grad_norm": 3.6359336376190186,
|
|
"learning_rate": 9.432520283309756e-06,
|
|
"loss": 1.1178,
|
|
"step": 3653
|
|
},
|
|
{
|
|
"epoch": 0.9777896708589778,
|
|
"grad_norm": 3.6173250675201416,
|
|
"learning_rate": 9.43211072646883e-06,
|
|
"loss": 1.2067,
|
|
"step": 3654
|
|
},
|
|
{
|
|
"epoch": 0.978057265185978,
|
|
"grad_norm": 3.4153003692626953,
|
|
"learning_rate": 9.431701030788215e-06,
|
|
"loss": 1.1684,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 0.9783248595129783,
|
|
"grad_norm": 3.5359673500061035,
|
|
"learning_rate": 9.431291196280745e-06,
|
|
"loss": 1.2438,
|
|
"step": 3656
|
|
},
|
|
{
|
|
"epoch": 0.9785924538399786,
|
|
"grad_norm": 3.2912895679473877,
|
|
"learning_rate": 9.430881222959258e-06,
|
|
"loss": 1.1959,
|
|
"step": 3657
|
|
},
|
|
{
|
|
"epoch": 0.9788600481669789,
|
|
"grad_norm": 3.617729902267456,
|
|
"learning_rate": 9.430471110836599e-06,
|
|
"loss": 1.2038,
|
|
"step": 3658
|
|
},
|
|
{
|
|
"epoch": 0.9791276424939791,
|
|
"grad_norm": 2.9933791160583496,
|
|
"learning_rate": 9.430060859925614e-06,
|
|
"loss": 1.0116,
|
|
"step": 3659
|
|
},
|
|
{
|
|
"epoch": 0.9793952368209794,
|
|
"grad_norm": 3.0839054584503174,
|
|
"learning_rate": 9.429650470239152e-06,
|
|
"loss": 1.0596,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.9796628311479797,
|
|
"grad_norm": 2.8952386379241943,
|
|
"learning_rate": 9.429239941790072e-06,
|
|
"loss": 1.0405,
|
|
"step": 3661
|
|
},
|
|
{
|
|
"epoch": 0.9799304254749799,
|
|
"grad_norm": 3.154313564300537,
|
|
"learning_rate": 9.428829274591234e-06,
|
|
"loss": 0.9956,
|
|
"step": 3662
|
|
},
|
|
{
|
|
"epoch": 0.9801980198019802,
|
|
"grad_norm": 3.2512032985687256,
|
|
"learning_rate": 9.4284184686555e-06,
|
|
"loss": 1.0996,
|
|
"step": 3663
|
|
},
|
|
{
|
|
"epoch": 0.9804656141289805,
|
|
"grad_norm": 3.4779818058013916,
|
|
"learning_rate": 9.428007523995741e-06,
|
|
"loss": 1.1982,
|
|
"step": 3664
|
|
},
|
|
{
|
|
"epoch": 0.9807332084559808,
|
|
"grad_norm": 3.4594082832336426,
|
|
"learning_rate": 9.427596440624832e-06,
|
|
"loss": 1.2461,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 0.981000802782981,
|
|
"grad_norm": 3.6881461143493652,
|
|
"learning_rate": 9.427185218555645e-06,
|
|
"loss": 1.1537,
|
|
"step": 3666
|
|
},
|
|
{
|
|
"epoch": 0.9812683971099813,
|
|
"grad_norm": 3.308070182800293,
|
|
"learning_rate": 9.426773857801067e-06,
|
|
"loss": 1.1589,
|
|
"step": 3667
|
|
},
|
|
{
|
|
"epoch": 0.9815359914369816,
|
|
"grad_norm": 3.4280967712402344,
|
|
"learning_rate": 9.426362358373981e-06,
|
|
"loss": 1.0666,
|
|
"step": 3668
|
|
},
|
|
{
|
|
"epoch": 0.9818035857639819,
|
|
"grad_norm": 3.077514886856079,
|
|
"learning_rate": 9.42595072028728e-06,
|
|
"loss": 1.2246,
|
|
"step": 3669
|
|
},
|
|
{
|
|
"epoch": 0.982071180090982,
|
|
"grad_norm": 2.923833131790161,
|
|
"learning_rate": 9.425538943553858e-06,
|
|
"loss": 0.9993,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.9823387744179823,
|
|
"grad_norm": 3.5569820404052734,
|
|
"learning_rate": 9.425127028186613e-06,
|
|
"loss": 1.1705,
|
|
"step": 3671
|
|
},
|
|
{
|
|
"epoch": 0.9826063687449826,
|
|
"grad_norm": 3.547506093978882,
|
|
"learning_rate": 9.42471497419845e-06,
|
|
"loss": 1.1549,
|
|
"step": 3672
|
|
},
|
|
{
|
|
"epoch": 0.9828739630719828,
|
|
"grad_norm": 3.3490121364593506,
|
|
"learning_rate": 9.424302781602277e-06,
|
|
"loss": 1.1942,
|
|
"step": 3673
|
|
},
|
|
{
|
|
"epoch": 0.9831415573989831,
|
|
"grad_norm": 3.038180112838745,
|
|
"learning_rate": 9.423890450411005e-06,
|
|
"loss": 0.9606,
|
|
"step": 3674
|
|
},
|
|
{
|
|
"epoch": 0.9834091517259834,
|
|
"grad_norm": 3.5518031120300293,
|
|
"learning_rate": 9.423477980637552e-06,
|
|
"loss": 1.1898,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 0.9836767460529837,
|
|
"grad_norm": 3.295964002609253,
|
|
"learning_rate": 9.42306537229484e-06,
|
|
"loss": 1.1399,
|
|
"step": 3676
|
|
},
|
|
{
|
|
"epoch": 0.9839443403799839,
|
|
"grad_norm": 3.0206878185272217,
|
|
"learning_rate": 9.422652625395791e-06,
|
|
"loss": 1.0414,
|
|
"step": 3677
|
|
},
|
|
{
|
|
"epoch": 0.9842119347069842,
|
|
"grad_norm": 3.4383180141448975,
|
|
"learning_rate": 9.422239739953337e-06,
|
|
"loss": 1.1399,
|
|
"step": 3678
|
|
},
|
|
{
|
|
"epoch": 0.9844795290339845,
|
|
"grad_norm": 3.2433290481567383,
|
|
"learning_rate": 9.42182671598041e-06,
|
|
"loss": 1.1698,
|
|
"step": 3679
|
|
},
|
|
{
|
|
"epoch": 0.9847471233609848,
|
|
"grad_norm": 3.151118755340576,
|
|
"learning_rate": 9.421413553489952e-06,
|
|
"loss": 1.1247,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.985014717687985,
|
|
"grad_norm": 3.5655605792999268,
|
|
"learning_rate": 9.421000252494902e-06,
|
|
"loss": 1.1653,
|
|
"step": 3681
|
|
},
|
|
{
|
|
"epoch": 0.9852823120149853,
|
|
"grad_norm": 2.9051101207733154,
|
|
"learning_rate": 9.42058681300821e-06,
|
|
"loss": 0.9898,
|
|
"step": 3682
|
|
},
|
|
{
|
|
"epoch": 0.9855499063419856,
|
|
"grad_norm": 3.184781312942505,
|
|
"learning_rate": 9.420173235042825e-06,
|
|
"loss": 1.0923,
|
|
"step": 3683
|
|
},
|
|
{
|
|
"epoch": 0.9858175006689858,
|
|
"grad_norm": 3.448216199874878,
|
|
"learning_rate": 9.419759518611704e-06,
|
|
"loss": 1.1623,
|
|
"step": 3684
|
|
},
|
|
{
|
|
"epoch": 0.9860850949959861,
|
|
"grad_norm": 3.177147626876831,
|
|
"learning_rate": 9.419345663727805e-06,
|
|
"loss": 1.0076,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 0.9863526893229864,
|
|
"grad_norm": 3.480036497116089,
|
|
"learning_rate": 9.418931670404096e-06,
|
|
"loss": 1.1148,
|
|
"step": 3686
|
|
},
|
|
{
|
|
"epoch": 0.9866202836499867,
|
|
"grad_norm": 3.5168023109436035,
|
|
"learning_rate": 9.418517538653541e-06,
|
|
"loss": 1.1358,
|
|
"step": 3687
|
|
},
|
|
{
|
|
"epoch": 0.9868878779769868,
|
|
"grad_norm": 3.518444061279297,
|
|
"learning_rate": 9.41810326848912e-06,
|
|
"loss": 1.1001,
|
|
"step": 3688
|
|
},
|
|
{
|
|
"epoch": 0.9871554723039871,
|
|
"grad_norm": 3.6086690425872803,
|
|
"learning_rate": 9.417688859923803e-06,
|
|
"loss": 1.2808,
|
|
"step": 3689
|
|
},
|
|
{
|
|
"epoch": 0.9874230666309874,
|
|
"grad_norm": 3.5940322875976562,
|
|
"learning_rate": 9.417274312970574e-06,
|
|
"loss": 1.1559,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.9876906609579877,
|
|
"grad_norm": 3.6169145107269287,
|
|
"learning_rate": 9.416859627642421e-06,
|
|
"loss": 1.1886,
|
|
"step": 3691
|
|
},
|
|
{
|
|
"epoch": 0.9879582552849879,
|
|
"grad_norm": 3.5202441215515137,
|
|
"learning_rate": 9.416444803952334e-06,
|
|
"loss": 1.2057,
|
|
"step": 3692
|
|
},
|
|
{
|
|
"epoch": 0.9882258496119882,
|
|
"grad_norm": 3.2127108573913574,
|
|
"learning_rate": 9.416029841913306e-06,
|
|
"loss": 0.9903,
|
|
"step": 3693
|
|
},
|
|
{
|
|
"epoch": 0.9884934439389885,
|
|
"grad_norm": 3.4693965911865234,
|
|
"learning_rate": 9.415614741538334e-06,
|
|
"loss": 1.1354,
|
|
"step": 3694
|
|
},
|
|
{
|
|
"epoch": 0.9887610382659887,
|
|
"grad_norm": 3.5482194423675537,
|
|
"learning_rate": 9.415199502840428e-06,
|
|
"loss": 1.2641,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 0.989028632592989,
|
|
"grad_norm": 2.904871702194214,
|
|
"learning_rate": 9.414784125832592e-06,
|
|
"loss": 1.0245,
|
|
"step": 3696
|
|
},
|
|
{
|
|
"epoch": 0.9892962269199893,
|
|
"grad_norm": 3.295982599258423,
|
|
"learning_rate": 9.414368610527835e-06,
|
|
"loss": 1.1042,
|
|
"step": 3697
|
|
},
|
|
{
|
|
"epoch": 0.9895638212469896,
|
|
"grad_norm": 3.2028210163116455,
|
|
"learning_rate": 9.413952956939179e-06,
|
|
"loss": 1.1382,
|
|
"step": 3698
|
|
},
|
|
{
|
|
"epoch": 0.9898314155739898,
|
|
"grad_norm": 3.677734136581421,
|
|
"learning_rate": 9.413537165079642e-06,
|
|
"loss": 1.3448,
|
|
"step": 3699
|
|
},
|
|
{
|
|
"epoch": 0.9900990099009901,
|
|
"grad_norm": 3.27496337890625,
|
|
"learning_rate": 9.413121234962248e-06,
|
|
"loss": 1.2098,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.9903666042279904,
|
|
"grad_norm": 3.1553688049316406,
|
|
"learning_rate": 9.412705166600026e-06,
|
|
"loss": 1.1474,
|
|
"step": 3701
|
|
},
|
|
{
|
|
"epoch": 0.9906341985549907,
|
|
"grad_norm": 3.2582755088806152,
|
|
"learning_rate": 9.412288960006016e-06,
|
|
"loss": 1.0499,
|
|
"step": 3702
|
|
},
|
|
{
|
|
"epoch": 0.9909017928819909,
|
|
"grad_norm": 3.174191474914551,
|
|
"learning_rate": 9.411872615193247e-06,
|
|
"loss": 1.1505,
|
|
"step": 3703
|
|
},
|
|
{
|
|
"epoch": 0.9911693872089912,
|
|
"grad_norm": 3.137990951538086,
|
|
"learning_rate": 9.411456132174768e-06,
|
|
"loss": 1.0766,
|
|
"step": 3704
|
|
},
|
|
{
|
|
"epoch": 0.9914369815359915,
|
|
"grad_norm": 3.366492748260498,
|
|
"learning_rate": 9.411039510963622e-06,
|
|
"loss": 1.265,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 0.9917045758629917,
|
|
"grad_norm": 3.27744460105896,
|
|
"learning_rate": 9.41062275157286e-06,
|
|
"loss": 1.0141,
|
|
"step": 3706
|
|
},
|
|
{
|
|
"epoch": 0.991972170189992,
|
|
"grad_norm": 3.3313217163085938,
|
|
"learning_rate": 9.410205854015542e-06,
|
|
"loss": 1.1903,
|
|
"step": 3707
|
|
},
|
|
{
|
|
"epoch": 0.9922397645169923,
|
|
"grad_norm": 3.281294584274292,
|
|
"learning_rate": 9.409788818304722e-06,
|
|
"loss": 1.164,
|
|
"step": 3708
|
|
},
|
|
{
|
|
"epoch": 0.9925073588439925,
|
|
"grad_norm": 3.4108831882476807,
|
|
"learning_rate": 9.409371644453467e-06,
|
|
"loss": 1.0759,
|
|
"step": 3709
|
|
},
|
|
{
|
|
"epoch": 0.9927749531709927,
|
|
"grad_norm": 2.919935703277588,
|
|
"learning_rate": 9.408954332474845e-06,
|
|
"loss": 1.0233,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.993042547497993,
|
|
"grad_norm": 3.332352876663208,
|
|
"learning_rate": 9.408536882381928e-06,
|
|
"loss": 1.0486,
|
|
"step": 3711
|
|
},
|
|
{
|
|
"epoch": 0.9933101418249933,
|
|
"grad_norm": 3.033339023590088,
|
|
"learning_rate": 9.408119294187791e-06,
|
|
"loss": 0.9737,
|
|
"step": 3712
|
|
},
|
|
{
|
|
"epoch": 0.9935777361519936,
|
|
"grad_norm": 3.5248947143554688,
|
|
"learning_rate": 9.40770156790552e-06,
|
|
"loss": 1.2238,
|
|
"step": 3713
|
|
},
|
|
{
|
|
"epoch": 0.9938453304789938,
|
|
"grad_norm": 3.687695026397705,
|
|
"learning_rate": 9.407283703548198e-06,
|
|
"loss": 1.1228,
|
|
"step": 3714
|
|
},
|
|
{
|
|
"epoch": 0.9941129248059941,
|
|
"grad_norm": 3.2966973781585693,
|
|
"learning_rate": 9.406865701128916e-06,
|
|
"loss": 1.0991,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 0.9943805191329944,
|
|
"grad_norm": 3.3037948608398438,
|
|
"learning_rate": 9.406447560660764e-06,
|
|
"loss": 1.1622,
|
|
"step": 3716
|
|
},
|
|
{
|
|
"epoch": 0.9946481134599946,
|
|
"grad_norm": 3.2481751441955566,
|
|
"learning_rate": 9.40602928215685e-06,
|
|
"loss": 1.0621,
|
|
"step": 3717
|
|
},
|
|
{
|
|
"epoch": 0.9949157077869949,
|
|
"grad_norm": 3.3406388759613037,
|
|
"learning_rate": 9.405610865630266e-06,
|
|
"loss": 1.1471,
|
|
"step": 3718
|
|
},
|
|
{
|
|
"epoch": 0.9951833021139952,
|
|
"grad_norm": 3.2971763610839844,
|
|
"learning_rate": 9.405192311094126e-06,
|
|
"loss": 1.1473,
|
|
"step": 3719
|
|
},
|
|
{
|
|
"epoch": 0.9954508964409955,
|
|
"grad_norm": 3.1722514629364014,
|
|
"learning_rate": 9.40477361856154e-06,
|
|
"loss": 1.0188,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.9957184907679957,
|
|
"grad_norm": 3.5597033500671387,
|
|
"learning_rate": 9.404354788045624e-06,
|
|
"loss": 1.1735,
|
|
"step": 3721
|
|
},
|
|
{
|
|
"epoch": 0.995986085094996,
|
|
"grad_norm": 3.2064480781555176,
|
|
"learning_rate": 9.403935819559497e-06,
|
|
"loss": 1.1026,
|
|
"step": 3722
|
|
},
|
|
{
|
|
"epoch": 0.9962536794219963,
|
|
"grad_norm": 3.40864896774292,
|
|
"learning_rate": 9.403516713116286e-06,
|
|
"loss": 1.2236,
|
|
"step": 3723
|
|
},
|
|
{
|
|
"epoch": 0.9965212737489966,
|
|
"grad_norm": 3.1237740516662598,
|
|
"learning_rate": 9.403097468729117e-06,
|
|
"loss": 1.0975,
|
|
"step": 3724
|
|
},
|
|
{
|
|
"epoch": 0.9967888680759968,
|
|
"grad_norm": 3.1500935554504395,
|
|
"learning_rate": 9.402678086411125e-06,
|
|
"loss": 1.0771,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 0.9970564624029971,
|
|
"grad_norm": 3.1867852210998535,
|
|
"learning_rate": 9.402258566175448e-06,
|
|
"loss": 0.9686,
|
|
"step": 3726
|
|
},
|
|
{
|
|
"epoch": 0.9973240567299974,
|
|
"grad_norm": 3.5945894718170166,
|
|
"learning_rate": 9.401838908035226e-06,
|
|
"loss": 1.2021,
|
|
"step": 3727
|
|
},
|
|
{
|
|
"epoch": 0.9975916510569975,
|
|
"grad_norm": 3.2258009910583496,
|
|
"learning_rate": 9.401419112003607e-06,
|
|
"loss": 1.1979,
|
|
"step": 3728
|
|
},
|
|
{
|
|
"epoch": 0.9978592453839978,
|
|
"grad_norm": 3.2605550289154053,
|
|
"learning_rate": 9.40099917809374e-06,
|
|
"loss": 1.1081,
|
|
"step": 3729
|
|
},
|
|
{
|
|
"epoch": 0.9981268397109981,
|
|
"grad_norm": 3.4050374031066895,
|
|
"learning_rate": 9.400579106318781e-06,
|
|
"loss": 1.1763,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.9983944340379984,
|
|
"grad_norm": 3.2462353706359863,
|
|
"learning_rate": 9.400158896691887e-06,
|
|
"loss": 1.0444,
|
|
"step": 3731
|
|
},
|
|
{
|
|
"epoch": 0.9986620283649986,
|
|
"grad_norm": 3.2882609367370605,
|
|
"learning_rate": 9.399738549226226e-06,
|
|
"loss": 1.0605,
|
|
"step": 3732
|
|
},
|
|
{
|
|
"epoch": 0.9989296226919989,
|
|
"grad_norm": 3.3432724475860596,
|
|
"learning_rate": 9.399318063934959e-06,
|
|
"loss": 1.109,
|
|
"step": 3733
|
|
},
|
|
{
|
|
"epoch": 0.9991972170189992,
|
|
"grad_norm": 3.5138442516326904,
|
|
"learning_rate": 9.398897440831263e-06,
|
|
"loss": 1.2697,
|
|
"step": 3734
|
|
},
|
|
{
|
|
"epoch": 0.9994648113459995,
|
|
"grad_norm": 3.237004280090332,
|
|
"learning_rate": 9.398476679928313e-06,
|
|
"loss": 1.0746,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 0.9997324056729997,
|
|
"grad_norm": 3.4802517890930176,
|
|
"learning_rate": 9.39805578123929e-06,
|
|
"loss": 1.1824,
|
|
"step": 3736
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 3.3513879776000977,
|
|
"learning_rate": 9.39763474477738e-06,
|
|
"loss": 1.1062,
|
|
"step": 3737
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 18685,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500.0,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.2477154653844275e+18,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|